]> xenbits.xensource.com Git - people/aperard/xen-unstable.git/commitdiff
bitkeeper revision 1.2 (3ddb79c9KusG02eh7i-uXkgY0IksKA)
authorsmh22@boulderdash.cl.cam.ac.uk <smh22@boulderdash.cl.cam.ac.uk>
Wed, 20 Nov 2002 12:02:17 +0000 (12:02 +0000)
committersmh22@boulderdash.cl.cam.ac.uk <smh22@boulderdash.cl.cam.ac.uk>
Wed, 20 Nov 2002 12:02:17 +0000 (12:02 +0000)
Import changeset

401 files changed:
.rootkeys
BitKeeper/etc/logging_ok [new file with mode: 0644]
xen-2.4.16/Makefile [new file with mode: 0644]
xen-2.4.16/README [new file with mode: 0644]
xen-2.4.16/Rules.mk [new file with mode: 0644]
xen-2.4.16/arch/i386/Makefile [new file with mode: 0644]
xen-2.4.16/arch/i386/Rules.mk [new file with mode: 0644]
xen-2.4.16/arch/i386/apic.c [new file with mode: 0644]
xen-2.4.16/arch/i386/boot/boot.S [new file with mode: 0644]
xen-2.4.16/arch/i386/delay.c [new file with mode: 0644]
xen-2.4.16/arch/i386/entry.S [new file with mode: 0644]
xen-2.4.16/arch/i386/extable.c [new file with mode: 0644]
xen-2.4.16/arch/i386/i387.c [new file with mode: 0644]
xen-2.4.16/arch/i386/i8259.c [new file with mode: 0644]
xen-2.4.16/arch/i386/idle0_task.c [new file with mode: 0644]
xen-2.4.16/arch/i386/io_apic.c [new file with mode: 0644]
xen-2.4.16/arch/i386/ioremap.c [new file with mode: 0644]
xen-2.4.16/arch/i386/irq.c [new file with mode: 0644]
xen-2.4.16/arch/i386/mm.c [new file with mode: 0644]
xen-2.4.16/arch/i386/mpparse.c [new file with mode: 0644]
xen-2.4.16/arch/i386/pci-dma.c [new file with mode: 0644]
xen-2.4.16/arch/i386/pci-i386.c [new file with mode: 0644]
xen-2.4.16/arch/i386/pci-i386.h [new file with mode: 0644]
xen-2.4.16/arch/i386/pci-irq.c [new file with mode: 0644]
xen-2.4.16/arch/i386/pci-pc.c [new file with mode: 0644]
xen-2.4.16/arch/i386/process.c [new file with mode: 0644]
xen-2.4.16/arch/i386/rwlock.c [new file with mode: 0644]
xen-2.4.16/arch/i386/setup.c [new file with mode: 0644]
xen-2.4.16/arch/i386/smp.c [new file with mode: 0644]
xen-2.4.16/arch/i386/smpboot.c [new file with mode: 0644]
xen-2.4.16/arch/i386/time.c [new file with mode: 0644]
xen-2.4.16/arch/i386/trampoline.S [new file with mode: 0644]
xen-2.4.16/arch/i386/traps.c [new file with mode: 0644]
xen-2.4.16/arch/i386/usercopy.c [new file with mode: 0644]
xen-2.4.16/arch/i386/xeno.lds [new file with mode: 0644]
xen-2.4.16/common/Makefile [new file with mode: 0644]
xen-2.4.16/common/block.c [new file with mode: 0644]
xen-2.4.16/common/brlock.c [new file with mode: 0644]
xen-2.4.16/common/dom0_ops.c [new file with mode: 0644]
xen-2.4.16/common/domain.c [new file with mode: 0644]
xen-2.4.16/common/event.c [new file with mode: 0644]
xen-2.4.16/common/kernel.c [new file with mode: 0644]
xen-2.4.16/common/lib.c [new file with mode: 0644]
xen-2.4.16/common/memory.c [new file with mode: 0644]
xen-2.4.16/common/network.c [new file with mode: 0644]
xen-2.4.16/common/page_alloc.c [new file with mode: 0644]
xen-2.4.16/common/resource.c [new file with mode: 0644]
xen-2.4.16/common/slab.c [new file with mode: 0644]
xen-2.4.16/common/softirq.c [new file with mode: 0644]
xen-2.4.16/common/timer.c [new file with mode: 0644]
xen-2.4.16/common/vsprintf.c [new file with mode: 0644]
xen-2.4.16/drivers/Makefile [new file with mode: 0644]
xen-2.4.16/drivers/block/Makefile [new file with mode: 0644]
xen-2.4.16/drivers/block/blkpg.c [new file with mode: 0644]
xen-2.4.16/drivers/block/elevator.c [new file with mode: 0644]
xen-2.4.16/drivers/block/genhd.c [new file with mode: 0644]
xen-2.4.16/drivers/block/ll_rw_blk.c [new file with mode: 0644]
xen-2.4.16/drivers/ide/Makefile [new file with mode: 0644]
xen-2.4.16/drivers/ide/ide-disk.c [new file with mode: 0644]
xen-2.4.16/drivers/ide/ide-dma.c [new file with mode: 0644]
xen-2.4.16/drivers/ide/ide-features.c [new file with mode: 0644]
xen-2.4.16/drivers/ide/ide-geometry.c [new file with mode: 0644]
xen-2.4.16/drivers/ide/ide-pci.c [new file with mode: 0644]
xen-2.4.16/drivers/ide/ide-probe.c [new file with mode: 0644]
xen-2.4.16/drivers/ide/ide-taskfile.c [new file with mode: 0644]
xen-2.4.16/drivers/ide/ide.c [new file with mode: 0644]
xen-2.4.16/drivers/ide/ide_modes.h [new file with mode: 0644]
xen-2.4.16/drivers/net/3c509.c [new file with mode: 0644]
xen-2.4.16/drivers/net/3c59x.c [new file with mode: 0644]
xen-2.4.16/drivers/net/8139cp.c [new file with mode: 0644]
xen-2.4.16/drivers/net/8139too.c [new file with mode: 0644]
xen-2.4.16/drivers/net/Makefile [new file with mode: 0644]
xen-2.4.16/drivers/net/Space.c [new file with mode: 0644]
xen-2.4.16/drivers/net/eepro100.c [new file with mode: 0644]
xen-2.4.16/drivers/net/net_init.c [new file with mode: 0644]
xen-2.4.16/drivers/net/pcnet32.c [new file with mode: 0644]
xen-2.4.16/drivers/net/setup.c [new file with mode: 0644]
xen-2.4.16/drivers/net/tulip/.depend [new file with mode: 0644]
xen-2.4.16/drivers/net/tulip/21142.c [new file with mode: 0644]
xen-2.4.16/drivers/net/tulip/ChangeLog [new file with mode: 0644]
xen-2.4.16/drivers/net/tulip/Makefile [new file with mode: 0644]
xen-2.4.16/drivers/net/tulip/eeprom.c [new file with mode: 0644]
xen-2.4.16/drivers/net/tulip/interrupt.c [new file with mode: 0644]
xen-2.4.16/drivers/net/tulip/media.c [new file with mode: 0644]
xen-2.4.16/drivers/net/tulip/pnic.c [new file with mode: 0644]
xen-2.4.16/drivers/net/tulip/pnic2.c [new file with mode: 0644]
xen-2.4.16/drivers/net/tulip/timer.c [new file with mode: 0644]
xen-2.4.16/drivers/net/tulip/tulip.h [new file with mode: 0644]
xen-2.4.16/drivers/net/tulip/tulip_core.c [new file with mode: 0644]
xen-2.4.16/drivers/pci/Makefile [new file with mode: 0644]
xen-2.4.16/drivers/pci/compat.c [new file with mode: 0644]
xen-2.4.16/drivers/pci/gen-devlist.c [new file with mode: 0644]
xen-2.4.16/drivers/pci/names.c [new file with mode: 0644]
xen-2.4.16/drivers/pci/pci.c [new file with mode: 0644]
xen-2.4.16/drivers/pci/pci.ids [new file with mode: 0644]
xen-2.4.16/drivers/pci/proc.c [new file with mode: 0644]
xen-2.4.16/drivers/pci/quirks.c [new file with mode: 0644]
xen-2.4.16/drivers/pci/setup-bus.c [new file with mode: 0644]
xen-2.4.16/drivers/pci/setup-irq.c [new file with mode: 0644]
xen-2.4.16/drivers/pci/setup-res.c [new file with mode: 0644]
xen-2.4.16/drivers/pci/syscall.c [new file with mode: 0644]
xen-2.4.16/drivers/scsi/Makefile [new file with mode: 0644]
xen-2.4.16/drivers/scsi/constants.h [new file with mode: 0644]
xen-2.4.16/drivers/scsi/hosts.h [new file with mode: 0644]
xen-2.4.16/drivers/scsi/scsi.c [new file with mode: 0644]
xen-2.4.16/drivers/scsi/scsi.h [new file with mode: 0644]
xen-2.4.16/drivers/scsi/scsi_dma.c [new file with mode: 0644]
xen-2.4.16/drivers/scsi/scsi_error.c [new file with mode: 0644]
xen-2.4.16/drivers/scsi/scsi_ioctl.c [new file with mode: 0644]
xen-2.4.16/drivers/scsi/scsi_lib.c [new file with mode: 0644]
xen-2.4.16/drivers/scsi/scsi_merge.c [new file with mode: 0644]
xen-2.4.16/drivers/scsi/scsi_module.c [new file with mode: 0644]
xen-2.4.16/drivers/scsi/scsi_obsolete.c [new file with mode: 0644]
xen-2.4.16/drivers/scsi/scsi_obsolete.h [new file with mode: 0644]
xen-2.4.16/drivers/scsi/scsi_proc.c [new file with mode: 0644]
xen-2.4.16/drivers/scsi/scsi_queue.c [new file with mode: 0644]
xen-2.4.16/drivers/scsi/scsi_scan.c [new file with mode: 0644]
xen-2.4.16/drivers/scsi/scsi_syms.c [new file with mode: 0644]
xen-2.4.16/drivers/scsi/scsicam.c [new file with mode: 0644]
xen-2.4.16/drivers/scsi/sd.c [new file with mode: 0644]
xen-2.4.16/drivers/scsi/sd.h [new file with mode: 0644]
xen-2.4.16/include/asm-i386/apic.h [new file with mode: 0644]
xen-2.4.16/include/asm-i386/apicdef.h [new file with mode: 0644]
xen-2.4.16/include/asm-i386/atomic.h [new file with mode: 0644]
xen-2.4.16/include/asm-i386/bitops.h [new file with mode: 0644]
xen-2.4.16/include/asm-i386/byteorder.h [new file with mode: 0644]
xen-2.4.16/include/asm-i386/cache.h [new file with mode: 0644]
xen-2.4.16/include/asm-i386/cpufeature.h [new file with mode: 0644]
xen-2.4.16/include/asm-i386/current.h [new file with mode: 0644]
xen-2.4.16/include/asm-i386/debugreg.h [new file with mode: 0644]
xen-2.4.16/include/asm-i386/delay.h [new file with mode: 0644]
xen-2.4.16/include/asm-i386/desc.h [new file with mode: 0644]
xen-2.4.16/include/asm-i386/elf.h [new file with mode: 0644]
xen-2.4.16/include/asm-i386/fixmap.h [new file with mode: 0644]
xen-2.4.16/include/asm-i386/hardirq.h [new file with mode: 0644]
xen-2.4.16/include/asm-i386/hdreg.h [new file with mode: 0644]
xen-2.4.16/include/asm-i386/i387.h [new file with mode: 0644]
xen-2.4.16/include/asm-i386/ide.h [new file with mode: 0644]
xen-2.4.16/include/asm-i386/io.h [new file with mode: 0644]
xen-2.4.16/include/asm-i386/io_apic.h [new file with mode: 0644]
xen-2.4.16/include/asm-i386/ioctl.h [new file with mode: 0644]
xen-2.4.16/include/asm-i386/irq.h [new file with mode: 0644]
xen-2.4.16/include/asm-i386/mc146818rtc.h [new file with mode: 0644]
xen-2.4.16/include/asm-i386/mpspec.h [new file with mode: 0644]
xen-2.4.16/include/asm-i386/msr.h [new file with mode: 0644]
xen-2.4.16/include/asm-i386/page.h [new file with mode: 0644]
xen-2.4.16/include/asm-i386/pci.h [new file with mode: 0644]
xen-2.4.16/include/asm-i386/pgalloc.h [new file with mode: 0644]
xen-2.4.16/include/asm-i386/processor.h [new file with mode: 0644]
xen-2.4.16/include/asm-i386/ptrace.h [new file with mode: 0644]
xen-2.4.16/include/asm-i386/rwlock.h [new file with mode: 0644]
xen-2.4.16/include/asm-i386/scatterlist.h [new file with mode: 0644]
xen-2.4.16/include/asm-i386/smp.h [new file with mode: 0644]
xen-2.4.16/include/asm-i386/smpboot.h [new file with mode: 0644]
xen-2.4.16/include/asm-i386/softirq.h [new file with mode: 0644]
xen-2.4.16/include/asm-i386/spinlock.h [new file with mode: 0644]
xen-2.4.16/include/asm-i386/system.h [new file with mode: 0644]
xen-2.4.16/include/asm-i386/types.h [new file with mode: 0644]
xen-2.4.16/include/asm-i386/uaccess.h [new file with mode: 0644]
xen-2.4.16/include/asm-i386/unaligned.h [new file with mode: 0644]
xen-2.4.16/include/hypervisor-ifs/block.h [new file with mode: 0644]
xen-2.4.16/include/hypervisor-ifs/hypervisor-if.h [new file with mode: 0644]
xen-2.4.16/include/hypervisor-ifs/network.h [new file with mode: 0644]
xen-2.4.16/include/scsi/scsi.h [new file with mode: 0644]
xen-2.4.16/include/scsi/scsi_ioctl.h [new file with mode: 0644]
xen-2.4.16/include/scsi/scsicam.h [new file with mode: 0644]
xen-2.4.16/include/scsi/sg.h [new file with mode: 0644]
xen-2.4.16/include/xeno/blk.h [new file with mode: 0644]
xen-2.4.16/include/xeno/blkdev.h [new file with mode: 0644]
xen-2.4.16/include/xeno/blkpg.h [new file with mode: 0644]
xen-2.4.16/include/xeno/block.h [new file with mode: 0644]
xen-2.4.16/include/xeno/bootmem.h [new file with mode: 0644]
xen-2.4.16/include/xeno/brlock.h [new file with mode: 0644]
xen-2.4.16/include/xeno/byteorder/big_endian.h [new file with mode: 0644]
xen-2.4.16/include/xeno/byteorder/generic.h [new file with mode: 0644]
xen-2.4.16/include/xeno/byteorder/little_endian.h [new file with mode: 0644]
xen-2.4.16/include/xeno/byteorder/pdp_endian.h [new file with mode: 0644]
xen-2.4.16/include/xeno/byteorder/swab.h [new file with mode: 0644]
xen-2.4.16/include/xeno/byteorder/swabb.h [new file with mode: 0644]
xen-2.4.16/include/xeno/cache.h [new file with mode: 0644]
xen-2.4.16/include/xeno/config.h [new file with mode: 0644]
xen-2.4.16/include/xeno/ctype.h [new file with mode: 0644]
xen-2.4.16/include/xeno/delay.h [new file with mode: 0644]
xen-2.4.16/include/xeno/dom0_ops.h [new file with mode: 0644]
xen-2.4.16/include/xeno/elevator.h [new file with mode: 0644]
xen-2.4.16/include/xeno/errno.h [new file with mode: 0644]
xen-2.4.16/include/xeno/etherdevice.h [new file with mode: 0644]
xen-2.4.16/include/xeno/ethtool.h [new file with mode: 0644]
xen-2.4.16/include/xeno/event.h [new file with mode: 0644]
xen-2.4.16/include/xeno/genhd.h [new file with mode: 0644]
xen-2.4.16/include/xeno/hdreg.h [new file with mode: 0644]
xen-2.4.16/include/xeno/hdsmart.h [new file with mode: 0644]
xen-2.4.16/include/xeno/ide.h [new file with mode: 0644]
xen-2.4.16/include/xeno/if.h [new file with mode: 0644]
xen-2.4.16/include/xeno/if_ether.h [new file with mode: 0644]
xen-2.4.16/include/xeno/if_packet.h [new file with mode: 0644]
xen-2.4.16/include/xeno/init.h [new file with mode: 0644]
xen-2.4.16/include/xeno/interrupt.h [new file with mode: 0644]
xen-2.4.16/include/xeno/ioctl.h [new file with mode: 0644]
xen-2.4.16/include/xeno/ioport.h [new file with mode: 0644]
xen-2.4.16/include/xeno/irq.h [new file with mode: 0644]
xen-2.4.16/include/xeno/irq_cpustat.h [new file with mode: 0644]
xen-2.4.16/include/xeno/kdev_t.h [new file with mode: 0644]
xen-2.4.16/include/xeno/lib.h [new file with mode: 0644]
xen-2.4.16/include/xeno/list.h [new file with mode: 0644]
xen-2.4.16/include/xeno/major.h [new file with mode: 0644]
xen-2.4.16/include/xeno/mii.h [new file with mode: 0644]
xen-2.4.16/include/xeno/mm.h [new file with mode: 0644]
xen-2.4.16/include/xeno/module.h [new file with mode: 0644]
xen-2.4.16/include/xeno/multiboot.h [new file with mode: 0644]
xen-2.4.16/include/xeno/netdevice.h [new file with mode: 0644]
xen-2.4.16/include/xeno/pci.h [new file with mode: 0644]
xen-2.4.16/include/xeno/pci_ids.h [new file with mode: 0644]
xen-2.4.16/include/xeno/pkt_sched.h [new file with mode: 0644]
xen-2.4.16/include/xeno/prefetch.h [new file with mode: 0644]
xen-2.4.16/include/xeno/sched.h [new file with mode: 0644]
xen-2.4.16/include/xeno/skbuff.h [new file with mode: 0644]
xen-2.4.16/include/xeno/slab.h [new file with mode: 0644]
xen-2.4.16/include/xeno/smp.h [new file with mode: 0644]
xen-2.4.16/include/xeno/socket.h [new file with mode: 0644]
xen-2.4.16/include/xeno/sockios.h [new file with mode: 0644]
xen-2.4.16/include/xeno/spinlock.h [new file with mode: 0644]
xen-2.4.16/include/xeno/time.h [new file with mode: 0644]
xen-2.4.16/include/xeno/timer.h [new file with mode: 0644]
xen-2.4.16/include/xeno/timex.h [new file with mode: 0644]
xen-2.4.16/include/xeno/tqueue.h [new file with mode: 0644]
xen-2.4.16/include/xeno/types.h [new file with mode: 0644]
xen-2.4.16/include/xeno/vif.h [new file with mode: 0644]
xen-2.4.16/net/Makefile [new file with mode: 0644]
xen-2.4.16/net/dev.c [new file with mode: 0644]
xen-2.4.16/net/dev_mcast.c [new file with mode: 0644]
xen-2.4.16/net/eth.c [new file with mode: 0644]
xen-2.4.16/net/sch_generic.c [new file with mode: 0644]
xen-2.4.16/net/skbuff.c [new file with mode: 0644]
xen-2.4.16/net/utils.c [new file with mode: 0644]
xen-2.4.16/tools/Makefile [new file with mode: 0644]
xen-2.4.16/tools/elf-reloc.c [new file with mode: 0644]
xenolinux-2.4.16-sparse/Makefile [new file with mode: 0644]
xenolinux-2.4.16-sparse/arch/xeno/Makefile [new file with mode: 0644]
xenolinux-2.4.16-sparse/arch/xeno/boot/Makefile [new file with mode: 0644]
xenolinux-2.4.16-sparse/arch/xeno/config.in [new file with mode: 0644]
xenolinux-2.4.16-sparse/arch/xeno/defconfig [new file with mode: 0644]
xenolinux-2.4.16-sparse/arch/xeno/drivers/block/Makefile [new file with mode: 0644]
xenolinux-2.4.16-sparse/arch/xeno/drivers/block/block.c [new file with mode: 0644]
xenolinux-2.4.16-sparse/arch/xeno/drivers/console/Makefile [new file with mode: 0644]
xenolinux-2.4.16-sparse/arch/xeno/drivers/console/console.c [new file with mode: 0644]
xenolinux-2.4.16-sparse/arch/xeno/drivers/dom0/Makefile [new file with mode: 0644]
xenolinux-2.4.16-sparse/arch/xeno/drivers/dom0/dom0_core.c [new file with mode: 0644]
xenolinux-2.4.16-sparse/arch/xeno/drivers/dom0/dom0_ops.h [new file with mode: 0644]
xenolinux-2.4.16-sparse/arch/xeno/drivers/network/Makefile [new file with mode: 0644]
xenolinux-2.4.16-sparse/arch/xeno/drivers/network/network.c [new file with mode: 0644]
xenolinux-2.4.16-sparse/arch/xeno/kernel/Makefile [new file with mode: 0644]
xenolinux-2.4.16-sparse/arch/xeno/kernel/entry.S [new file with mode: 0644]
xenolinux-2.4.16-sparse/arch/xeno/kernel/head.S [new file with mode: 0644]
xenolinux-2.4.16-sparse/arch/xeno/kernel/hypervisor.c [new file with mode: 0644]
xenolinux-2.4.16-sparse/arch/xeno/kernel/i386_ksyms.c [new file with mode: 0644]
xenolinux-2.4.16-sparse/arch/xeno/kernel/i387.c [new file with mode: 0644]
xenolinux-2.4.16-sparse/arch/xeno/kernel/init_task.c [new file with mode: 0644]
xenolinux-2.4.16-sparse/arch/xeno/kernel/ioport.c [new file with mode: 0644]
xenolinux-2.4.16-sparse/arch/xeno/kernel/irq.c [new file with mode: 0644]
xenolinux-2.4.16-sparse/arch/xeno/kernel/ldt.c [new file with mode: 0644]
xenolinux-2.4.16-sparse/arch/xeno/kernel/process.c [new file with mode: 0644]
xenolinux-2.4.16-sparse/arch/xeno/kernel/ptrace.c [new file with mode: 0644]
xenolinux-2.4.16-sparse/arch/xeno/kernel/semaphore.c [new file with mode: 0644]
xenolinux-2.4.16-sparse/arch/xeno/kernel/setup.c [new file with mode: 0644]
xenolinux-2.4.16-sparse/arch/xeno/kernel/signal.c [new file with mode: 0644]
xenolinux-2.4.16-sparse/arch/xeno/kernel/sys_i386.c [new file with mode: 0644]
xenolinux-2.4.16-sparse/arch/xeno/kernel/time.c [new file with mode: 0644]
xenolinux-2.4.16-sparse/arch/xeno/kernel/traps.c [new file with mode: 0644]
xenolinux-2.4.16-sparse/arch/xeno/lib/Makefile [new file with mode: 0644]
xenolinux-2.4.16-sparse/arch/xeno/lib/checksum.S [new file with mode: 0644]
xenolinux-2.4.16-sparse/arch/xeno/lib/dec_and_lock.c [new file with mode: 0644]
xenolinux-2.4.16-sparse/arch/xeno/lib/delay.c [new file with mode: 0644]
xenolinux-2.4.16-sparse/arch/xeno/lib/getuser.S [new file with mode: 0644]
xenolinux-2.4.16-sparse/arch/xeno/lib/iodebug.c [new file with mode: 0644]
xenolinux-2.4.16-sparse/arch/xeno/lib/memcpy.c [new file with mode: 0644]
xenolinux-2.4.16-sparse/arch/xeno/lib/mmx.c [new file with mode: 0644]
xenolinux-2.4.16-sparse/arch/xeno/lib/old-checksum.c [new file with mode: 0644]
xenolinux-2.4.16-sparse/arch/xeno/lib/strstr.c [new file with mode: 0644]
xenolinux-2.4.16-sparse/arch/xeno/lib/usercopy.c [new file with mode: 0644]
xenolinux-2.4.16-sparse/arch/xeno/mm/Makefile [new file with mode: 0644]
xenolinux-2.4.16-sparse/arch/xeno/mm/extable.c [new file with mode: 0644]
xenolinux-2.4.16-sparse/arch/xeno/mm/fault.c [new file with mode: 0644]
xenolinux-2.4.16-sparse/arch/xeno/mm/hypervisor.c [new file with mode: 0644]
xenolinux-2.4.16-sparse/arch/xeno/mm/init.c [new file with mode: 0644]
xenolinux-2.4.16-sparse/arch/xeno/vmlinux.lds [new file with mode: 0644]
xenolinux-2.4.16-sparse/drivers/block/ll_rw_blk.c [new file with mode: 0644]
xenolinux-2.4.16-sparse/drivers/block/rd.c [new file with mode: 0644]
xenolinux-2.4.16-sparse/drivers/char/tty_io.c [new file with mode: 0644]
xenolinux-2.4.16-sparse/fs/nfs/nfsroot.c [new file with mode: 0644]
xenolinux-2.4.16-sparse/include/asm-xeno/a.out.h [new file with mode: 0644]
xenolinux-2.4.16-sparse/include/asm-xeno/apic.h [new file with mode: 0644]
xenolinux-2.4.16-sparse/include/asm-xeno/apicdef.h [new file with mode: 0644]
xenolinux-2.4.16-sparse/include/asm-xeno/atomic.h [new file with mode: 0644]
xenolinux-2.4.16-sparse/include/asm-xeno/bitops.h [new file with mode: 0644]
xenolinux-2.4.16-sparse/include/asm-xeno/boot.h [new file with mode: 0644]
xenolinux-2.4.16-sparse/include/asm-xeno/bugs.h [new file with mode: 0644]
xenolinux-2.4.16-sparse/include/asm-xeno/byteorder.h [new file with mode: 0644]
xenolinux-2.4.16-sparse/include/asm-xeno/cache.h [new file with mode: 0644]
xenolinux-2.4.16-sparse/include/asm-xeno/checksum.h [new file with mode: 0644]
xenolinux-2.4.16-sparse/include/asm-xeno/cpufeature.h [new file with mode: 0644]
xenolinux-2.4.16-sparse/include/asm-xeno/current.h [new file with mode: 0644]
xenolinux-2.4.16-sparse/include/asm-xeno/debugreg.h [new file with mode: 0644]
xenolinux-2.4.16-sparse/include/asm-xeno/delay.h [new file with mode: 0644]
xenolinux-2.4.16-sparse/include/asm-xeno/desc.h [new file with mode: 0644]
xenolinux-2.4.16-sparse/include/asm-xeno/div64.h [new file with mode: 0644]
xenolinux-2.4.16-sparse/include/asm-xeno/dma.h [new file with mode: 0644]
xenolinux-2.4.16-sparse/include/asm-xeno/elf.h [new file with mode: 0644]
xenolinux-2.4.16-sparse/include/asm-xeno/errno.h [new file with mode: 0644]
xenolinux-2.4.16-sparse/include/asm-xeno/fcntl.h [new file with mode: 0644]
xenolinux-2.4.16-sparse/include/asm-xeno/fixmap.h [new file with mode: 0644]
xenolinux-2.4.16-sparse/include/asm-xeno/floppy.h [new file with mode: 0644]
xenolinux-2.4.16-sparse/include/asm-xeno/hardirq.h [new file with mode: 0644]
xenolinux-2.4.16-sparse/include/asm-xeno/hdreg.h [new file with mode: 0644]
xenolinux-2.4.16-sparse/include/asm-xeno/highmem.h [new file with mode: 0644]
xenolinux-2.4.16-sparse/include/asm-xeno/hw_irq.h [new file with mode: 0644]
xenolinux-2.4.16-sparse/include/asm-xeno/hypervisor-ifs/block.h [new file with mode: 0644]
xenolinux-2.4.16-sparse/include/asm-xeno/hypervisor-ifs/hypervisor-if.h [new file with mode: 0644]
xenolinux-2.4.16-sparse/include/asm-xeno/hypervisor-ifs/network.h [new file with mode: 0644]
xenolinux-2.4.16-sparse/include/asm-xeno/hypervisor.h [new file with mode: 0644]
xenolinux-2.4.16-sparse/include/asm-xeno/i387.h [new file with mode: 0644]
xenolinux-2.4.16-sparse/include/asm-xeno/ide.h [new file with mode: 0644]
xenolinux-2.4.16-sparse/include/asm-xeno/init.h [new file with mode: 0644]
xenolinux-2.4.16-sparse/include/asm-xeno/io.h [new file with mode: 0644]
xenolinux-2.4.16-sparse/include/asm-xeno/io_apic.h [new file with mode: 0644]
xenolinux-2.4.16-sparse/include/asm-xeno/ioctl.h [new file with mode: 0644]
xenolinux-2.4.16-sparse/include/asm-xeno/ioctls.h [new file with mode: 0644]
xenolinux-2.4.16-sparse/include/asm-xeno/ipc.h [new file with mode: 0644]
xenolinux-2.4.16-sparse/include/asm-xeno/ipcbuf.h [new file with mode: 0644]
xenolinux-2.4.16-sparse/include/asm-xeno/irq.h [new file with mode: 0644]
xenolinux-2.4.16-sparse/include/asm-xeno/kdb.h [new file with mode: 0644]
xenolinux-2.4.16-sparse/include/asm-xeno/kdbprivate.h [new file with mode: 0644]
xenolinux-2.4.16-sparse/include/asm-xeno/keyboard.h [new file with mode: 0644]
xenolinux-2.4.16-sparse/include/asm-xeno/kmap_types.h [new file with mode: 0644]
xenolinux-2.4.16-sparse/include/asm-xeno/ldt.h [new file with mode: 0644]
xenolinux-2.4.16-sparse/include/asm-xeno/linux_logo.h [new file with mode: 0644]
xenolinux-2.4.16-sparse/include/asm-xeno/locks.h [new file with mode: 0644]
xenolinux-2.4.16-sparse/include/asm-xeno/math_emu.h [new file with mode: 0644]
xenolinux-2.4.16-sparse/include/asm-xeno/mc146818rtc.h [new file with mode: 0644]
xenolinux-2.4.16-sparse/include/asm-xeno/mca_dma.h [new file with mode: 0644]
xenolinux-2.4.16-sparse/include/asm-xeno/mman.h [new file with mode: 0644]
xenolinux-2.4.16-sparse/include/asm-xeno/mmu.h [new file with mode: 0644]
xenolinux-2.4.16-sparse/include/asm-xeno/mmu_context.h [new file with mode: 0644]
xenolinux-2.4.16-sparse/include/asm-xeno/mmx.h [new file with mode: 0644]
xenolinux-2.4.16-sparse/include/asm-xeno/module.h [new file with mode: 0644]
xenolinux-2.4.16-sparse/include/asm-xeno/mpspec.h [new file with mode: 0644]
xenolinux-2.4.16-sparse/include/asm-xeno/msgbuf.h [new file with mode: 0644]
xenolinux-2.4.16-sparse/include/asm-xeno/msr.h [new file with mode: 0644]
xenolinux-2.4.16-sparse/include/asm-xeno/mtrr.h [new file with mode: 0644]
xenolinux-2.4.16-sparse/include/asm-xeno/namei.h [new file with mode: 0644]
xenolinux-2.4.16-sparse/include/asm-xeno/page.h [new file with mode: 0644]
xenolinux-2.4.16-sparse/include/asm-xeno/param.h [new file with mode: 0644]
xenolinux-2.4.16-sparse/include/asm-xeno/parport.h [new file with mode: 0644]
xenolinux-2.4.16-sparse/include/asm-xeno/pgalloc.h [new file with mode: 0644]
xenolinux-2.4.16-sparse/include/asm-xeno/pgtable-2level.h [new file with mode: 0644]
xenolinux-2.4.16-sparse/include/asm-xeno/pgtable-3level.h [new file with mode: 0644]
xenolinux-2.4.16-sparse/include/asm-xeno/pgtable.h [new file with mode: 0644]
xenolinux-2.4.16-sparse/include/asm-xeno/poll.h [new file with mode: 0644]
xenolinux-2.4.16-sparse/include/asm-xeno/posix_types.h [new file with mode: 0644]
xenolinux-2.4.16-sparse/include/asm-xeno/processor.h [new file with mode: 0644]
xenolinux-2.4.16-sparse/include/asm-xeno/ptrace.h [new file with mode: 0644]
xenolinux-2.4.16-sparse/include/asm-xeno/resource.h [new file with mode: 0644]
xenolinux-2.4.16-sparse/include/asm-xeno/rwlock.h [new file with mode: 0644]
xenolinux-2.4.16-sparse/include/asm-xeno/rwsem.h [new file with mode: 0644]
xenolinux-2.4.16-sparse/include/asm-xeno/scatterlist.h [new file with mode: 0644]
xenolinux-2.4.16-sparse/include/asm-xeno/segment.h [new file with mode: 0644]
xenolinux-2.4.16-sparse/include/asm-xeno/semaphore.h [new file with mode: 0644]
xenolinux-2.4.16-sparse/include/asm-xeno/sembuf.h [new file with mode: 0644]
xenolinux-2.4.16-sparse/include/asm-xeno/serial.h [new file with mode: 0644]
xenolinux-2.4.16-sparse/include/asm-xeno/setup.h [new file with mode: 0644]
xenolinux-2.4.16-sparse/include/asm-xeno/shmbuf.h [new file with mode: 0644]
xenolinux-2.4.16-sparse/include/asm-xeno/shmparam.h [new file with mode: 0644]
xenolinux-2.4.16-sparse/include/asm-xeno/sigcontext.h [new file with mode: 0644]
xenolinux-2.4.16-sparse/include/asm-xeno/siginfo.h [new file with mode: 0644]
xenolinux-2.4.16-sparse/include/asm-xeno/signal.h [new file with mode: 0644]
xenolinux-2.4.16-sparse/include/asm-xeno/smp.h [new file with mode: 0644]
xenolinux-2.4.16-sparse/include/asm-xeno/smplock.h [new file with mode: 0644]
xenolinux-2.4.16-sparse/include/asm-xeno/socket.h [new file with mode: 0644]
xenolinux-2.4.16-sparse/include/asm-xeno/sockios.h [new file with mode: 0644]
xenolinux-2.4.16-sparse/include/asm-xeno/softirq.h [new file with mode: 0644]
xenolinux-2.4.16-sparse/include/asm-xeno/spinlock.h [new file with mode: 0644]
xenolinux-2.4.16-sparse/include/asm-xeno/stat.h [new file with mode: 0644]
xenolinux-2.4.16-sparse/include/asm-xeno/statfs.h [new file with mode: 0644]
xenolinux-2.4.16-sparse/include/asm-xeno/string-486.h [new file with mode: 0644]
xenolinux-2.4.16-sparse/include/asm-xeno/string.h [new file with mode: 0644]
xenolinux-2.4.16-sparse/include/asm-xeno/system.h [new file with mode: 0644]
xenolinux-2.4.16-sparse/include/asm-xeno/termbits.h [new file with mode: 0644]
xenolinux-2.4.16-sparse/include/asm-xeno/termios.h [new file with mode: 0644]
xenolinux-2.4.16-sparse/include/asm-xeno/timex.h [new file with mode: 0644]
xenolinux-2.4.16-sparse/include/asm-xeno/tlb.h [new file with mode: 0644]
xenolinux-2.4.16-sparse/include/asm-xeno/types.h [new file with mode: 0644]
xenolinux-2.4.16-sparse/include/asm-xeno/uaccess.h [new file with mode: 0644]
xenolinux-2.4.16-sparse/include/asm-xeno/ucontext.h [new file with mode: 0644]
xenolinux-2.4.16-sparse/include/asm-xeno/unaligned.h [new file with mode: 0644]
xenolinux-2.4.16-sparse/include/asm-xeno/unistd.h [new file with mode: 0644]
xenolinux-2.4.16-sparse/include/asm-xeno/user.h [new file with mode: 0644]
xenolinux-2.4.16-sparse/include/asm-xeno/vga.h [new file with mode: 0644]
xenolinux-2.4.16-sparse/include/asm-xeno/xor.h [new file with mode: 0644]
xenolinux-2.4.16-sparse/include/linux/sunrpc/debug.h [new file with mode: 0644]
xenolinux-2.4.16-sparse/kernel/panic.c [new file with mode: 0644]
xenolinux-2.4.16-sparse/mk [new file with mode: 0755]

index f93931bc9c993a43fe072e0f2688839df1c781dd..1544186af7ab34fe54663916ca4f017f04b5dad1 100644 (file)
--- a/.rootkeys
+++ b/.rootkeys
@@ -1,2 +1,402 @@
 3ddb6b0bKlMz_dz-M59a1mkUa1lASw BitKeeper/etc/config
 3ddb6b0buTaC5zg1_a8FoAR9FWi_mw BitKeeper/etc/ignore
+3ddb79c9_hgSp-gsQm8HqWM_9W3B_A BitKeeper/etc/logging_ok
+3ddb79bcbOVHh38VJzc97-JEGD4dJQ xen-2.4.16/Makefile
+3ddb79bcCa2VbsMp7mWKlhgwLQUQGA xen-2.4.16/README
+3ddb79bcWnTwYsQRWl_PaneJfa6p0w xen-2.4.16/Rules.mk
+3ddb79bcZbRBzT3elFWSX7u6NtMagQ xen-2.4.16/arch/i386/Makefile
+3ddb79bcBQF85CfLS4i1WGZ4oLLaCA xen-2.4.16/arch/i386/Rules.mk
+3ddb79bcsjinG9k1KcvbVBuas1R2dA xen-2.4.16/arch/i386/apic.c
+3ddb79bcSC_LvnmFlX-T5iTgaR0SKg xen-2.4.16/arch/i386/boot/boot.S
+3ddb79bcUrk2EIaM5VsT6wUudH1kkg xen-2.4.16/arch/i386/delay.c
+3ddb79bcecupHj56ZbTa3B0FxDowMg xen-2.4.16/arch/i386/entry.S
+3ddb79bcY5zW7KhvI9gvfuPi3ZumEg xen-2.4.16/arch/i386/extable.c
+3ddb79bcesE5E-lS4QhRhlqXxqj9cA xen-2.4.16/arch/i386/i387.c
+3ddb79bcCAq6IpdkHueChoVTfXqEQQ xen-2.4.16/arch/i386/i8259.c
+3ddb79bcBit4xJXbwtX0kb1hh2uO1Q xen-2.4.16/arch/i386/idle0_task.c
+3ddb79bcKIkRR0kqWaJhe5VUDkMdxg xen-2.4.16/arch/i386/io_apic.c
+3ddb79bc1uNlAtc-84Ioq4qfcnI_CQ xen-2.4.16/arch/i386/ioremap.c
+3ddb79bdqfIcjkz_h9Hvtp8Tk_19Zw xen-2.4.16/arch/i386/irq.c
+3ddb79bcHwuCQDjBICDTSis52hWguw xen-2.4.16/arch/i386/mm.c
+3ddb79bdS4UeWWXDH-FaBKqcpMFcnw xen-2.4.16/arch/i386/mpparse.c
+3ddb79bcnL-_Dtsbtjgxl7vJU3vBiQ xen-2.4.16/arch/i386/pci-dma.c
+3ddb79bdeJ7_86z03yTAPIeeywOg3Q xen-2.4.16/arch/i386/pci-i386.c
+3ddb79bdIKgipvGoqExEQ7jawfVowA xen-2.4.16/arch/i386/pci-i386.h
+3ddb79bdHe6_Uij4-glW91vInNtBYQ xen-2.4.16/arch/i386/pci-irq.c
+3ddb79bcZ_2FxINljqNSkqa17ISyJw xen-2.4.16/arch/i386/pci-pc.c
+3ddb79bc1_2bAt67x9MFCP4AZrQnvQ xen-2.4.16/arch/i386/process.c
+3ddb79bc7KxGCEJsgBnkDX7XjD_ZEQ xen-2.4.16/arch/i386/rwlock.c
+3ddb79bcrD6Z_rUvSDgrvjyb4846Eg xen-2.4.16/arch/i386/setup.c
+3ddb79bcSx2e8JSR3pdSGa8x1ScYzA xen-2.4.16/arch/i386/smp.c
+3ddb79bcfUN3-UBCPzX26IU8bq-3aw xen-2.4.16/arch/i386/smpboot.c
+3ddb79bc-Udq7ol-NX4q9XsYnN7A2Q xen-2.4.16/arch/i386/time.c
+3ddb79bccYVzXZJyVaxuv5T42Z1Fsw xen-2.4.16/arch/i386/trampoline.S
+3ddb79bcOftONV9h4QCxXOfiT0h91w xen-2.4.16/arch/i386/traps.c
+3ddb79bc4nTpGQOe6_-MbyZzkhlhFQ xen-2.4.16/arch/i386/usercopy.c
+3ddb79bcOMCu9-5mKpjIh5d0qqBDPg xen-2.4.16/arch/i386/xeno.lds
+3ddb79bdff-gj-jFGKjOejeHLqL8Lg xen-2.4.16/common/Makefile
+3ddb79bddEYJbcURvqqcx99Yl2iAhQ xen-2.4.16/common/block.c
+3ddb79bdrqnW93GR9gZk1OJe1qK-iQ xen-2.4.16/common/brlock.c
+3ddb79bdLX_P6iB7ILiblRLWvebapg xen-2.4.16/common/dom0_ops.c
+3ddb79bdYO5D8Av12NHqPeSviav7cg xen-2.4.16/common/domain.c
+3ddb79bdeyutmaXEfpQvvxj7eQ0fCw xen-2.4.16/common/event.c
+3ddb79bd9drcFPVxd4w2GPOIjLlXpA xen-2.4.16/common/kernel.c
+3ddb79bduhSEZI8xa7IbGQCpap5y2A xen-2.4.16/common/lib.c
+3ddb79bdS39UXxUtZnaScie83-7VTQ xen-2.4.16/common/memory.c
+3ddb79bdN51qpRC-6bOH-v5hl_AK6A xen-2.4.16/common/network.c
+3ddb79bdD4SLmmdMD7yLW5HcUWucXw xen-2.4.16/common/page_alloc.c
+3ddb79bdHqdQpATqC0rmUZNbsb6L6A xen-2.4.16/common/resource.c
+3ddb79bdB9RNMnkQnUyZ5C9hhMSQQw xen-2.4.16/common/slab.c
+3ddb79bd0gVQYmL2zvuJnldvD0AGxQ xen-2.4.16/common/softirq.c
+3ddb79bdQqFHtHRGEO2dsxGgo6eAhw xen-2.4.16/common/timer.c
+3ddb79bd3zgV33PHdt-cgh3sxcb1hw xen-2.4.16/common/vsprintf.c
+3ddb79c0ppNeJtjC4va8j41ADCnchA xen-2.4.16/drivers/Makefile
+3ddb79beWzgPS8ozf2BL2g3ZkiWhhQ xen-2.4.16/drivers/block/Makefile
+3ddb79be04dyXzyXqDbMRS_1funwXQ xen-2.4.16/drivers/block/blkpg.c
+3ddb79beME_0abStePF6fU8XLuQnWw xen-2.4.16/drivers/block/elevator.c
+3ddb79beNQVrdGyoI4njXhgAjD6a4A xen-2.4.16/drivers/block/genhd.c
+3ddb79beyWwLRP_BiM2t1JKgr_plEw xen-2.4.16/drivers/block/ll_rw_blk.c
+3ddb79bdhcqD9ebrslr0O0oHqTiiXg xen-2.4.16/drivers/ide/Makefile
+3ddb79bdErDn_WC3G-fWxKNR3viLnA xen-2.4.16/drivers/ide/ide-disk.c
+3ddb79bdIPNW36FrlId94jTXaW8HoA xen-2.4.16/drivers/ide/ide-dma.c
+3ddb79be5Ysvhn4se_Z-LQY_hI6UPw xen-2.4.16/drivers/ide/ide-features.c
+3ddb79bdh1ohsWYRH_KdaXr7cqs12w xen-2.4.16/drivers/ide/ide-geometry.c
+3ddb79bdYcxXT-2UEaDcG0Ic4MIK1g xen-2.4.16/drivers/ide/ide-pci.c
+3ddb79bdOXTbcImJo8DwmlNX88k78Q xen-2.4.16/drivers/ide/ide-probe.c
+3ddb79bdDWFwINnKn29RlFDwGJhjYg xen-2.4.16/drivers/ide/ide-taskfile.c
+3ddb79bdkDY1bSOYkToP1Cc49VdBxg xen-2.4.16/drivers/ide/ide.c
+3ddb79bdPyAvT_WZTAFhaX0jp-yXSw xen-2.4.16/drivers/ide/ide_modes.h
+3ddb79bfogeJNHTIepPjd8fy1TyoTw xen-2.4.16/drivers/net/3c509.c
+3ddb79bfMlOcWUwjtg6oMYhGySHDDw xen-2.4.16/drivers/net/3c59x.c
+3ddb79bfl_DWxZQFKiJ2BXrSedV4lg xen-2.4.16/drivers/net/8139cp.c
+3ddb79bfLVGtyXNJS4NQg-lP21rndA xen-2.4.16/drivers/net/8139too.c
+3ddb79c0tWiE8xIFHszxipeVCGKTSA xen-2.4.16/drivers/net/Makefile
+3ddb79bfU-H1Hms4BuJEPPydjXUEaQ xen-2.4.16/drivers/net/Space.c
+3ddb79c0GejJrp1U6W4G6dYi-RiH4A xen-2.4.16/drivers/net/eepro100.c
+3ddb79bfKvn9mt0kofpkw0QaWjxO6A xen-2.4.16/drivers/net/net_init.c
+3ddb79c0fQgORkFlqWZdP-6cDHyFIQ xen-2.4.16/drivers/net/pcnet32.c
+3ddb79bf_CBcu3QWYwq4bNAOnM2RqQ xen-2.4.16/drivers/net/setup.c
+3ddb79bfh8ucmq_HqRSaURalpeAmPg xen-2.4.16/drivers/net/tulip/.depend
+3ddb79bfsJ-hdQ17EXTFiUOHisjNgQ xen-2.4.16/drivers/net/tulip/21142.c
+3ddb79bf0lzTL-ywAdOO7vctTYAmJA xen-2.4.16/drivers/net/tulip/ChangeLog
+3ddb79bfRbGBTu5mznHtxpFPtnQYSQ xen-2.4.16/drivers/net/tulip/Makefile
+3ddb79bfLLamkCaJZDJ7i6qrCUhwBw xen-2.4.16/drivers/net/tulip/eeprom.c
+3ddb79bf-zt39-zIUgWC9Kb4As--Ew xen-2.4.16/drivers/net/tulip/interrupt.c
+3ddb79bfdr1I4DtFnXCzpaHkEkHE2Q xen-2.4.16/drivers/net/tulip/media.c
+3ddb79bftPnJDLCAo0Do4KPr5raERA xen-2.4.16/drivers/net/tulip/pnic.c
+3ddb79bf5lr4NIjy1oZOM_jhpxGidw xen-2.4.16/drivers/net/tulip/pnic2.c
+3ddb79bfBhawkz-2DT9lakMPbqzljQ xen-2.4.16/drivers/net/tulip/timer.c
+3ddb79bf7yw7hGBMM60aMgMgxY_G2g xen-2.4.16/drivers/net/tulip/tulip.h
+3ddb79bfhUr3baP8Lf4oZjhBX7i5kw xen-2.4.16/drivers/net/tulip/tulip_core.c
+3ddb79beUWngyIhMHgyPtuTem4o4JA xen-2.4.16/drivers/pci/Makefile
+3ddb79beU9td0Mnm0VUMklerBa37qQ xen-2.4.16/drivers/pci/compat.c
+3ddb79beHkGQE58z5t5gyUCYiwOxvw xen-2.4.16/drivers/pci/gen-devlist.c
+3ddb79bfoQcFKLf5P6wZlDl36alWdQ xen-2.4.16/drivers/pci/names.c
+3ddb79bfyX7-pD6XdxY_mdNrJR20iw xen-2.4.16/drivers/pci/pci.c
+3ddb79bf2AS7YBGwooE_Kbv7XgUqNQ xen-2.4.16/drivers/pci/pci.ids
+3ddb79bfGf5-CZSdzn0DGBYWjQiDjw xen-2.4.16/drivers/pci/proc.c
+3ddb79bf7sTn85WtP_8Nc2YEmmVExQ xen-2.4.16/drivers/pci/quirks.c
+3ddb79bfkVLMq5CWjZLACPDivqxq_w xen-2.4.16/drivers/pci/setup-bus.c
+3ddb79bfl1H1arbB0pzAEC2uPmY_3g xen-2.4.16/drivers/pci/setup-irq.c
+3ddb79bfJaf0bkE1Y67bnll8-kjEPg xen-2.4.16/drivers/pci/setup-res.c
+3ddb79bfIcCWJsBDNcQQE3ok2Azn-Q xen-2.4.16/drivers/pci/syscall.c
+3ddb79be3kwzyKagpMHGoXZFdan7dg xen-2.4.16/drivers/scsi/Makefile
+3ddb79beXZxwKh7cGyPfr40bhDyRrA xen-2.4.16/drivers/scsi/constants.h
+3ddb79beGiGljlTNq_kRnCBZECgC9Q xen-2.4.16/drivers/scsi/hosts.h
+3ddb79bexarQo1tQ541PPUyK9HXNDA xen-2.4.16/drivers/scsi/scsi.c
+3ddb79beBOiYxQUiWTHosepRlJyuGA xen-2.4.16/drivers/scsi/scsi.h
+3ddb79beVTYJj6_KMxYLJmCP7p9MuQ xen-2.4.16/drivers/scsi/scsi_dma.c
+3ddb79beDrImFCFGgB_GLgUbeuHjog xen-2.4.16/drivers/scsi/scsi_error.c
+3ddb79bepDvUltYDsInaUsH9lII9Sw xen-2.4.16/drivers/scsi/scsi_ioctl.c
+3ddb79berPStE_-ILQHgcl1BLDLywA xen-2.4.16/drivers/scsi/scsi_lib.c
+3ddb79beRXjB7_nNUbJMIRyjDmeByQ xen-2.4.16/drivers/scsi/scsi_merge.c
+3ddb79beGNb7Es1bATZAGsPZEu5F2Q xen-2.4.16/drivers/scsi/scsi_module.c
+3ddb79beZ--AZB0twliIm3qmQJO8Zg xen-2.4.16/drivers/scsi/scsi_obsolete.c
+3ddb79beQgG_st0eBZUX8AQI7kBkHA xen-2.4.16/drivers/scsi/scsi_obsolete.h
+3ddb79beK65cNRldY0CFGXjZ3-A74Q xen-2.4.16/drivers/scsi/scsi_proc.c
+3ddb79beeIuwGDE0Ldl8wy6mt86Bag xen-2.4.16/drivers/scsi/scsi_queue.c
+3ddb79beQVxjXLLSY896cqce3j6Ehg xen-2.4.16/drivers/scsi/scsi_scan.c
+3ddb79beVrSvakLg_9MSo22vJ_TGrA xen-2.4.16/drivers/scsi/scsi_syms.c
+3ddb79beC6PIqDEaxAfO3bLKcmMLeA xen-2.4.16/drivers/scsi/scsicam.c
+3ddb79bedAG8DPsr3S1N4IASxUuBug xen-2.4.16/drivers/scsi/sd.c
+3ddb79beA27dAK0xtNh4k6SJniKnlA xen-2.4.16/drivers/scsi/sd.h
+3ddb79c3l4IiQtf6MS2jIzcd-hJS8g xen-2.4.16/include/asm-i386/apic.h
+3ddb79c3QJYWr8LLGdonLbWmNb9pQQ xen-2.4.16/include/asm-i386/apicdef.h
+3ddb79c3OiG9eTsi9Dy3F_OkuRAzKA xen-2.4.16/include/asm-i386/atomic.h
+3ddb79c3rM-Ote0Xn6Ytg8Y6YqAG-A xen-2.4.16/include/asm-i386/bitops.h
+3ddb79c3pXaTAGGWSIZF9EnRV5PlRw xen-2.4.16/include/asm-i386/byteorder.h
+3ddb79c3KhTI0F_Iw_hRL9QEyOVK-g xen-2.4.16/include/asm-i386/cache.h
+3ddb79c2LLt11EQHjrd6sB7FUqvFfA xen-2.4.16/include/asm-i386/cpufeature.h
+3ddb79c2ADvRmdexd9y3AYK9_NTx-Q xen-2.4.16/include/asm-i386/current.h
+3ddb79c2jFkPAZTDmU35L6IUssYMgQ xen-2.4.16/include/asm-i386/debugreg.h
+3ddb79c3r9-31dIsewPV3P3i8HALsQ xen-2.4.16/include/asm-i386/delay.h
+3ddb79c34BFiXjBJ_cCKB0aCsV1IDw xen-2.4.16/include/asm-i386/desc.h
+3ddb79c2O729EttZTYu1c8LcsUO_GQ xen-2.4.16/include/asm-i386/elf.h
+3ddb79c3NU8Zy40OTrq3D-i30Y3t4A xen-2.4.16/include/asm-i386/fixmap.h
+3ddb79c39o75zPP0T1aQQ4mNrCAN2w xen-2.4.16/include/asm-i386/hardirq.h
+3ddb79c3BFEIwXR4IsWbwp4BoL4DkA xen-2.4.16/include/asm-i386/hdreg.h
+3ddb79c3TMDjkxVndKFKnGiwY0HzDg xen-2.4.16/include/asm-i386/i387.h
+3ddb79c3otbjpnqFDSzSeD0J-0xcwg xen-2.4.16/include/asm-i386/ide.h
+3ddb79c3fQ_O3o5NHK2N8AJdk0Ea4Q xen-2.4.16/include/asm-i386/io.h
+3ddb79c2TKeScYHQZreTdHqYNLbehQ xen-2.4.16/include/asm-i386/io_apic.h
+3ddb79c3S9Tga4XZRPrD4-aN3XIV6w xen-2.4.16/include/asm-i386/ioctl.h
+3ddb79c2L7rTlFzazOLW1XuSZefpFw xen-2.4.16/include/asm-i386/irq.h
+3ddb79c3I98vWcQR8xEo34JMJ4Ahyw xen-2.4.16/include/asm-i386/mc146818rtc.h
+3ddb79c3n_UbPuxlkNxvvLycClIkxA xen-2.4.16/include/asm-i386/mpspec.h
+3ddb79c2wa0dA_LGigxOelSGbJ284Q xen-2.4.16/include/asm-i386/msr.h
+3ddb79c3xjYnrv5t3VqYlR4tNEOl4Q xen-2.4.16/include/asm-i386/page.h
+3ddb79c3ysKUbxZuwKBRK3WXU2TlEg xen-2.4.16/include/asm-i386/pci.h
+3ddb79c3nm2zdzeO6Mj8g7ex3txgGw xen-2.4.16/include/asm-i386/pgalloc.h
+3ddb79c2QF5-pZGzuX4QukPCDAl59A xen-2.4.16/include/asm-i386/processor.h
+3ddb79c3mbqEM7QQr3zVq7NiBNhouA xen-2.4.16/include/asm-i386/ptrace.h
+3ddb79c2plf7ciNgoNjU-RsbUzawsw xen-2.4.16/include/asm-i386/rwlock.h
+3ddb79c2mJI9YuGMScjofPlD8EdtgA xen-2.4.16/include/asm-i386/scatterlist.h
+3ddb79c3Hgbb2g8CyWLMCK-6_ZVQSQ xen-2.4.16/include/asm-i386/smp.h
+3ddb79c3jn8ALV_S9W5aeTYUQRKBpg xen-2.4.16/include/asm-i386/smpboot.h
+3ddb79c3e9DCEoR-WzNxcOQDzLu7BQ xen-2.4.16/include/asm-i386/softirq.h
+3ddb79c3NiyQE2vQnyGiaBnNjBO1rA xen-2.4.16/include/asm-i386/spinlock.h
+3ddb79c3ezddh34MdelJpa5tNR00Dw xen-2.4.16/include/asm-i386/system.h
+3ddb79c4HugMq7IYGxcQKFBpKwKhzA xen-2.4.16/include/asm-i386/types.h
+3ddb79c3M2n1ROZH6xk3HbyN4CPDqg xen-2.4.16/include/asm-i386/uaccess.h
+3ddb79c3uPGcP_l_2xyGgBSWd5aC-Q xen-2.4.16/include/asm-i386/unaligned.h
+3ddb79c2YTaZwOqWin9-QNgHge5RVw xen-2.4.16/include/hypervisor-ifs/block.h
+3ddb79c25UE59iu4JJcbRalx95mvcg xen-2.4.16/include/hypervisor-ifs/hypervisor-if.h
+3ddb79c2oRPrzClk3zbTkRHlpumzKA xen-2.4.16/include/hypervisor-ifs/network.h
+3ddb79c4qbCoOFHrv9sCGshbWzBVlQ xen-2.4.16/include/scsi/scsi.h
+3ddb79c4R4iVwqIIeychVQYmIH4FUg xen-2.4.16/include/scsi/scsi_ioctl.h
+3ddb79c4yw_mfd4Uikn3v_IOPRpa1Q xen-2.4.16/include/scsi/scsicam.h
+3ddb79c4HKPMLvDBP9LxzPi_szVxGA xen-2.4.16/include/scsi/sg.h
+3ddb79c0nTsjSpVK4ZVTI9WwN24xtQ xen-2.4.16/include/xeno/blk.h
+3ddb79c0dVhTHLsv6CPTf4baKix4mA xen-2.4.16/include/xeno/blkdev.h
+3ddb79c18ePBgitnOs7GiOCFilODVw xen-2.4.16/include/xeno/blkpg.h
+3ddb79c2SisDOHDyTeK5-MV3m7pNbA xen-2.4.16/include/xeno/block.h
+3ddb79c2JOriBs0mWh-Tlolq78tg3w xen-2.4.16/include/xeno/bootmem.h
+3ddb79c1oOjpQbp68MW7yiUpoi-S-w xen-2.4.16/include/xeno/brlock.h
+3ddb79c1x7Ie3kifu7dQRx8y7HVyvA xen-2.4.16/include/xeno/byteorder/big_endian.h
+3ddb79c1qFXOEX1eD0yXJ_gsGkUt8w xen-2.4.16/include/xeno/byteorder/generic.h
+3ddb79c1VbwFALNpgx6uC_iZKFHD-A xen-2.4.16/include/xeno/byteorder/little_endian.h
+3ddb79c1VvNRMM35bpdZMekirCXP-A xen-2.4.16/include/xeno/byteorder/pdp_endian.h
+3ddb79c116WbJV8bwGZXFFJy_GNNvw xen-2.4.16/include/xeno/byteorder/swab.h
+3ddb79c1pwmlw8VXW8aaSKAVGVmjDA xen-2.4.16/include/xeno/byteorder/swabb.h
+3ddb79c0c0cX_DZE209-Bb-Rx1v-Aw xen-2.4.16/include/xeno/cache.h
+3ddb79c259jh8hE7vre_8NuE7nwNSA xen-2.4.16/include/xeno/config.h
+3ddb79c1V44RD26YqCUm-kqIupM37A xen-2.4.16/include/xeno/ctype.h
+3ddb79c05DdHQ0UxX_jKsXdR4QlMCA xen-2.4.16/include/xeno/delay.h
+3ddb79c2PMeWTK86y4C3F4MzHw4A1g xen-2.4.16/include/xeno/dom0_ops.h
+3ddb79c1uaWQZj551j1O0B5z8AnHOg xen-2.4.16/include/xeno/elevator.h
+3ddb79c0HIghfBF8zFUdmXhOU8i6hA xen-2.4.16/include/xeno/errno.h
+3ddb79c0rMjudDKkJku_mkm0J-BZgw xen-2.4.16/include/xeno/etherdevice.h
+3ddb79c0T3X07lFnM9OSE-W5bqIDSQ xen-2.4.16/include/xeno/ethtool.h
+3ddb79c1W0lQca8gRV7sN6j3iY4Luw xen-2.4.16/include/xeno/event.h
+3ddb79c1J4I_AjNflZL-1c1jOIlSyg xen-2.4.16/include/xeno/genhd.h
+3ddb79c1i-chIoeniqgYwMM3EgaR5w xen-2.4.16/include/xeno/hdreg.h
+3ddb79c12GuUuaxBKiMuwf-Qvuwpng xen-2.4.16/include/xeno/hdsmart.h
+3ddb79c0MM575N4YvMSiw9EqKH4JDA xen-2.4.16/include/xeno/ide.h
+3ddb79c1yHLp08JhgPxIMcZ8DwN9hg xen-2.4.16/include/xeno/if.h
+3ddb79c1RCWOkWPQRzbYVTX_e-E7CA xen-2.4.16/include/xeno/if_ether.h
+3ddb79c2IYah7z7hkzPyOiG8szKkyw xen-2.4.16/include/xeno/if_packet.h
+3ddb79c0GurNF9tDWqQbAwJFH8ugfA xen-2.4.16/include/xeno/init.h
+3ddb79c1Vi5VleJAOKHAlY0G2zAsgw xen-2.4.16/include/xeno/interrupt.h
+3ddb79c2J6EnruiygRhBCgftzMzTeQ xen-2.4.16/include/xeno/ioctl.h
+3ddb79c1nzaWu8NoF4xCCMSFJR4MlA xen-2.4.16/include/xeno/ioport.h
+3ddb79c2qAxCOABlkKtD8Txohe-qEw xen-2.4.16/include/xeno/irq.h
+3ddb79c2b3qe-6Ann09FqZBF4IrJaQ xen-2.4.16/include/xeno/irq_cpustat.h
+3ddb79c11w_O7z7YZJnzuDSxaK5LlA xen-2.4.16/include/xeno/kdev_t.h
+3ddb79c1NfYlOrWNqgZkj9EwtFfJow xen-2.4.16/include/xeno/lib.h
+3ddb79c18Ajy7micDGQQfJ0zWgEHtA xen-2.4.16/include/xeno/list.h
+3ddb79c0_s2_wgV0cA6tztEaeyy1NA xen-2.4.16/include/xeno/major.h
+3ddb79c1fsWuKI2sGlW5bqoG2lPVNA xen-2.4.16/include/xeno/mii.h
+3ddb79c1gs2VbLbQlw0dcDUXYIepDA xen-2.4.16/include/xeno/mm.h
+3ddb79c13p9iHn1XAp0IS1qvj4yDsg xen-2.4.16/include/xeno/module.h
+3ddb79c1ieLZfGSFwfvvSQ2NK1BMSg xen-2.4.16/include/xeno/multiboot.h
+3ddb79c0CLfAlJLg1ohdPD-Jjn-jxg xen-2.4.16/include/xeno/netdevice.h
+3ddb79c2Fg44_PBPVxHSC0gTOMq4Ow xen-2.4.16/include/xeno/pci.h
+3ddb79c0MOVXq8qZDQRGb6z64_xAwg xen-2.4.16/include/xeno/pci_ids.h
+3ddb79c2byJwwNNkiES__A9H4Cvc4g xen-2.4.16/include/xeno/pkt_sched.h
+3ddb79c04nQVR3EYM5L4zxDV_MCo1g xen-2.4.16/include/xeno/prefetch.h
+3ddb79c0LzqqS0LhAQ50ekgj4oGl7Q xen-2.4.16/include/xeno/sched.h
+3ddb79c0VDeD-Oft5eNfMneTU3D1dQ xen-2.4.16/include/xeno/skbuff.h
+3ddb79c14dXIhP7C2ahnoD08K90G_w xen-2.4.16/include/xeno/slab.h
+3ddb79c09xbS-xxfKxuV3JETIhBzmg xen-2.4.16/include/xeno/smp.h
+3ddb79c1-yIt89RT02wIPp2xDR8YjQ xen-2.4.16/include/xeno/socket.h
+3ddb79c2V2P9F2xMCzDJ9vbUofSg_Q xen-2.4.16/include/xeno/sockios.h
+3ddb79c2iIcESrDAB8samy_yAh6olQ xen-2.4.16/include/xeno/spinlock.h
+3ddb79c0BnA20PbgmuMPSGIBljNRQw xen-2.4.16/include/xeno/time.h
+3ddb79c2HFkXuRxi1CriJtSFmY6Ybw xen-2.4.16/include/xeno/timer.h
+3ddb79c2_m8lT9jDKse_tePj7zcnNQ xen-2.4.16/include/xeno/timex.h
+3ddb79c2e2C14HkndNEJlYwXaPrF5A xen-2.4.16/include/xeno/tqueue.h
+3ddb79c1-kVvF8cVa0k3ZHDdBMj01Q xen-2.4.16/include/xeno/types.h
+3ddb79c2Ae5KpzhC9LCYG7mP_Vi4Aw xen-2.4.16/include/xeno/vif.h
+3ddb79c4YQCQ6r0xNLLu0jfbM7pVmA xen-2.4.16/net/Makefile
+3ddb79c4AkfDkTCw0comx4L8wsUOMg xen-2.4.16/net/dev.c
+3ddb79c4x1L_soh8b-r_1jQW_37Icw xen-2.4.16/net/dev_mcast.c
+3ddb79c4NSDwiQ-AmrYdxcRAwLPzwQ xen-2.4.16/net/eth.c
+3ddb79c4KZhNxUuYJ7lul8cc-wRkyg xen-2.4.16/net/sch_generic.c
+3ddb79c4TZj1wXPKQt36O72SddtBNQ xen-2.4.16/net/skbuff.c
+3ddb79c4ARyIHqv3Y6YFckIUbyA8Tw xen-2.4.16/net/utils.c
+3ddb79c4x8dvwPtzclghWAKFWpEBFA xen-2.4.16/tools/Makefile
+3ddb79c4yGZ7_22QAFFwPzqP4NSHwA xen-2.4.16/tools/elf-reloc.c
+3ddb79bbYMXGmQTsr5BeGS_RuZ5f_w xenolinux-2.4.16-sparse/Makefile
+3ddb79b7e0ssyz3Q1GoqjDds-x1PLQ xenolinux-2.4.16-sparse/arch/xeno/Makefile
+3ddb79b7_rLvYZU3tOY6Wwuw_Sg3_w xenolinux-2.4.16-sparse/arch/xeno/boot/Makefile
+3ddb79b8L4xnwrcvWk6nAbgKVbNkSA xenolinux-2.4.16-sparse/arch/xeno/config.in
+3ddb79b7v_Be34as7_mlzFlw65hOjQ xenolinux-2.4.16-sparse/arch/xeno/defconfig
+3ddb79b7KUvtx0knQJoRaBDZQeNidg xenolinux-2.4.16-sparse/arch/xeno/drivers/block/Makefile
+3ddb79b6Rc0uAOGFthIFxq1KGWZ_Iw xenolinux-2.4.16-sparse/arch/xeno/drivers/block/block.c
+3ddb79b7LLVJBGynxHSOh9A9l97sug xenolinux-2.4.16-sparse/arch/xeno/drivers/console/Makefile
+3ddb79b7UG2QiRAU-Wvc1Y_BLigu1Q xenolinux-2.4.16-sparse/arch/xeno/drivers/console/console.c
+3ddb79b75eo4PRXkT6Th9popt_SJhg xenolinux-2.4.16-sparse/arch/xeno/drivers/dom0/Makefile
+3ddb79b7Xyaoep6U0kLvx6Kx7OauDw xenolinux-2.4.16-sparse/arch/xeno/drivers/dom0/dom0_core.c
+3ddb79b7PulSkF9m3c7K5MkxHRf4hA xenolinux-2.4.16-sparse/arch/xeno/drivers/dom0/dom0_ops.h
+3ddb79b7s7yYBioHidSkIoHtQxYmOw xenolinux-2.4.16-sparse/arch/xeno/drivers/network/Makefile
+3ddb79b7CpLL98ScdpbKkVBktlbCtQ xenolinux-2.4.16-sparse/arch/xeno/drivers/network/network.c
+3ddb79b7hqi9krq6h98lnpONHGzvEA xenolinux-2.4.16-sparse/arch/xeno/kernel/Makefile
+3ddb79b7eyEv5bsN8EQkjIG0y11Q1A xenolinux-2.4.16-sparse/arch/xeno/kernel/entry.S
+3ddb79b70XAg9bJwp0-DWHe0LtzlBw xenolinux-2.4.16-sparse/arch/xeno/kernel/head.S
+3ddb79b7dDsKjU22VxQ-C5BMFaMUmw xenolinux-2.4.16-sparse/arch/xeno/kernel/hypervisor.c
+3ddb79b7xzwEc8-lo1vu3BxB-gBURQ xenolinux-2.4.16-sparse/arch/xeno/kernel/i386_ksyms.c
+3ddb79b7i7pfPEcy_zjDfW9JHD305g xenolinux-2.4.16-sparse/arch/xeno/kernel/i387.c
+3ddb79b7Ti2i5ztQzM-w67zN-cJD8A xenolinux-2.4.16-sparse/arch/xeno/kernel/init_task.c
+3ddb79b7MEQGMZrsF94atNJZ4-OGzA xenolinux-2.4.16-sparse/arch/xeno/kernel/ioport.c
+3ddb79b7DOz-Mz5jsjRd5W8jN0XbPw xenolinux-2.4.16-sparse/arch/xeno/kernel/irq.c
+3ddb79b7bOAPk_YAUUsruhVGO2GOOg xenolinux-2.4.16-sparse/arch/xeno/kernel/ldt.c
+3ddb79b7qf2WK6vMKcmOLIeKN5GSjg xenolinux-2.4.16-sparse/arch/xeno/kernel/process.c
+3ddb79b7HdaBR7yk_u51auihbr31aQ xenolinux-2.4.16-sparse/arch/xeno/kernel/ptrace.c
+3ddb79b7yJunvnrgWD2VTy6yot0PMg xenolinux-2.4.16-sparse/arch/xeno/kernel/semaphore.c
+3ddb79b7BIitpVygiksiMBQYvh5Z2A xenolinux-2.4.16-sparse/arch/xeno/kernel/setup.c
+3ddb79b7DTevmwhNla67jZxjBSIKFg xenolinux-2.4.16-sparse/arch/xeno/kernel/signal.c
+3ddb79b76mCf-gZPR4KLjL8ktZ37GA xenolinux-2.4.16-sparse/arch/xeno/kernel/sys_i386.c
+3ddb79b7xDsEKErRFeqcSm6eRrTEIg xenolinux-2.4.16-sparse/arch/xeno/kernel/time.c
+3ddb79b7V4dv_KAQu4Msa2Ebhd0aKw xenolinux-2.4.16-sparse/arch/xeno/kernel/traps.c
+3ddb79b8oUnwncDaZuRWF3-n3jPsIA xenolinux-2.4.16-sparse/arch/xeno/lib/Makefile
+3ddb79b8BMxi8qW3_NT44SSd1uMD3Q xenolinux-2.4.16-sparse/arch/xeno/lib/checksum.S
+3ddb79b8WcWel2g9zU9pBJb-yA8jBw xenolinux-2.4.16-sparse/arch/xeno/lib/dec_and_lock.c
+3ddb79b8oxeiaIW6Au95OM0GlsMrMw xenolinux-2.4.16-sparse/arch/xeno/lib/delay.c
+3ddb79b8XPasnRhvK-_6xYksf3S6qA xenolinux-2.4.16-sparse/arch/xeno/lib/getuser.S
+3ddb79b8HFLUh8mwcl4X44ta-ny1KA xenolinux-2.4.16-sparse/arch/xeno/lib/iodebug.c
+3ddb79b879qeoLlarHXvNIDEva6ssA xenolinux-2.4.16-sparse/arch/xeno/lib/memcpy.c
+3ddb79b869CY_yr4HymV6k98pfpMgg xenolinux-2.4.16-sparse/arch/xeno/lib/mmx.c
+3ddb79b8S77yf1--Qa4C0ZYmqKXCww xenolinux-2.4.16-sparse/arch/xeno/lib/old-checksum.c
+3ddb79b8ffZ79cU2ZzfA2ekeo6pqeg xenolinux-2.4.16-sparse/arch/xeno/lib/strstr.c
+3ddb79b82kQ5oIXpxq3TUmlgxsLzLg xenolinux-2.4.16-sparse/arch/xeno/lib/usercopy.c
+3ddb79b8qdD_svLCCAja_oP2w4Tn8Q xenolinux-2.4.16-sparse/arch/xeno/mm/Makefile
+3ddb79b8ukY8dsPYmR8eNk-aCzFPsQ xenolinux-2.4.16-sparse/arch/xeno/mm/extable.c
+3ddb79b856Zta9b3s0bgUCGbG1blvQ xenolinux-2.4.16-sparse/arch/xeno/mm/fault.c
+3ddb79b85fpsKT8A9WYnuJg03b715g xenolinux-2.4.16-sparse/arch/xeno/mm/hypervisor.c
+3ddb79b83Zj7Xn2QVhU4HeMuAC9FjA xenolinux-2.4.16-sparse/arch/xeno/mm/init.c
+3ddb79b7aKdTkbr3u6aze8tVwGh_TQ xenolinux-2.4.16-sparse/arch/xeno/vmlinux.lds
+3ddb79bbx682YH6vR2zbVOXwg73ULg xenolinux-2.4.16-sparse/drivers/block/ll_rw_blk.c
+3ddb79bcJfHdwrPsjqgI33_OsGdVCg xenolinux-2.4.16-sparse/drivers/block/rd.c
+3ddb79bcpVu-IbnqwQqpRqsEbLpsuw xenolinux-2.4.16-sparse/drivers/char/tty_io.c
+3ddb79bba_zKpuurHVeWfgDkyPoq8A xenolinux-2.4.16-sparse/fs/nfs/nfsroot.c
+3ddb79b8VFtfWSCrXKPN2K21zd_vtw xenolinux-2.4.16-sparse/include/asm-xeno/a.out.h
+3ddb79b8Zzi13p3OAPV25QgiC3THAQ xenolinux-2.4.16-sparse/include/asm-xeno/apic.h
+3ddb79baZDlsdV_m6C5CXnWMl15p1g xenolinux-2.4.16-sparse/include/asm-xeno/apicdef.h
+3ddb79baZM88u4CnriVA8ZXBdnMNvg xenolinux-2.4.16-sparse/include/asm-xeno/atomic.h
+3ddb79baYHyZsDCiXiq8Y8_XxHE-jQ xenolinux-2.4.16-sparse/include/asm-xeno/bitops.h
+3ddb79b8vFGtGb6pg3GZFXSiwOZfcg xenolinux-2.4.16-sparse/include/asm-xeno/boot.h
+3ddb79baW8tf6PiBQUF50QQM5nY9sw xenolinux-2.4.16-sparse/include/asm-xeno/bugs.h
+3ddb79b80msOlzTZRoVudYdemzgOlA xenolinux-2.4.16-sparse/include/asm-xeno/byteorder.h
+3ddb79b8brNSUEujnq8f_zr8kA-cUg xenolinux-2.4.16-sparse/include/asm-xeno/cache.h
+3ddb79bayhr6C6prVhAYlFRChhf3wg xenolinux-2.4.16-sparse/include/asm-xeno/checksum.h
+3ddb79b8RNUaDbpPjdVVwKAsbiTBKQ xenolinux-2.4.16-sparse/include/asm-xeno/cpufeature.h
+3ddb79b8pJe4aNsUKkfHEoBT9Y-UMA xenolinux-2.4.16-sparse/include/asm-xeno/current.h
+3ddb79b8KL7icUfxKRoWDIkHkLQ1kQ xenolinux-2.4.16-sparse/include/asm-xeno/debugreg.h
+3ddb79baDUP_cRdFgqaH0rXUvMxx4A xenolinux-2.4.16-sparse/include/asm-xeno/delay.h
+3ddb79b89CgBTFsS3joEJ1ZniSHEgA xenolinux-2.4.16-sparse/include/asm-xeno/desc.h
+3ddb79ba6xyT4mJOYSp1Fg2l0ta93A xenolinux-2.4.16-sparse/include/asm-xeno/div64.h
+3ddb79b80Z4ZUIqbD1Xu_t4OCuEHeQ xenolinux-2.4.16-sparse/include/asm-xeno/dma.h
+3ddb79bac26NkKcPIEsfxETc5Snyag xenolinux-2.4.16-sparse/include/asm-xeno/elf.h
+3ddb79ba722pCJ_g_xI8ebsE31IK-Q xenolinux-2.4.16-sparse/include/asm-xeno/errno.h
+3ddb79b8vIpUpgaSNEneFkg5hYSvNg xenolinux-2.4.16-sparse/include/asm-xeno/fcntl.h
+3ddb79b8c_oKu2_BGNJctM4DBET31Q xenolinux-2.4.16-sparse/include/asm-xeno/fixmap.h
+3ddb79b8780YvqvK1g5KPIWzQ6P15w xenolinux-2.4.16-sparse/include/asm-xeno/floppy.h
+3ddb79bas-nFywnmilbUeT34PEAA0g xenolinux-2.4.16-sparse/include/asm-xeno/hardirq.h
+3ddb79batzR40ZFY9dvgs5f1aM9I6g xenolinux-2.4.16-sparse/include/asm-xeno/hdreg.h
+3ddb79b90xBgbeYgCcImS2ZxJakxBA xenolinux-2.4.16-sparse/include/asm-xeno/highmem.h
+3ddb79baXLZV3dUKQI2gIYpAy67RuA xenolinux-2.4.16-sparse/include/asm-xeno/hw_irq.h
+3ddb79b82xfEY3yBet-2FXY4p8b7yg xenolinux-2.4.16-sparse/include/asm-xeno/hypervisor-ifs/block.h
+3ddb79b8KUSolAgH19qEzo1Ey0f1Ng xenolinux-2.4.16-sparse/include/asm-xeno/hypervisor-ifs/hypervisor-if.h
+3ddb79b8J0Y2UA8NKoN5Ng71WFQRIg xenolinux-2.4.16-sparse/include/asm-xeno/hypervisor-ifs/network.h
+3ddb79bapQ9Z9ewa5O1pqAVaNBTazg xenolinux-2.4.16-sparse/include/asm-xeno/hypervisor.h
+3ddb79baL-pjPI8hg5xjPgd4__SlOA xenolinux-2.4.16-sparse/include/asm-xeno/i387.h
+3ddb79ba66TwvG7HpbBo04fRhmj3KQ xenolinux-2.4.16-sparse/include/asm-xeno/ide.h
+3ddb79bahFfCuRcmd9kBYA-CuGtCSg xenolinux-2.4.16-sparse/include/asm-xeno/init.h
+3ddb79ba3wnwflaNW6QheYvxoj5S8Q xenolinux-2.4.16-sparse/include/asm-xeno/io.h
+3ddb79ba1MhHpElCXFlijej2zWVk4g xenolinux-2.4.16-sparse/include/asm-xeno/io_apic.h
+3ddb79baiyfcZN9rJwhq0UeFjI6GkQ xenolinux-2.4.16-sparse/include/asm-xeno/ioctl.h
+3ddb79badReKYSok3yHShb4jg0vA-A xenolinux-2.4.16-sparse/include/asm-xeno/ioctls.h
+3ddb79b9iuIxteTXg8_myIsrWF5uxg xenolinux-2.4.16-sparse/include/asm-xeno/ipc.h
+3ddb79baw5Dxo78880UMSanDV70WdA xenolinux-2.4.16-sparse/include/asm-xeno/ipcbuf.h
+3ddb79ban9FUBuEaznRZvPfry3xnHQ xenolinux-2.4.16-sparse/include/asm-xeno/irq.h
+3ddb79banJ5r-mqE7LZ8nPGTADaGBA xenolinux-2.4.16-sparse/include/asm-xeno/kdb.h
+3ddb79baoaoHGTW0oiBXFPUg4KWydw xenolinux-2.4.16-sparse/include/asm-xeno/kdbprivate.h
+3ddb79bam2K6c7tS2HX6jis2Bqx71w xenolinux-2.4.16-sparse/include/asm-xeno/keyboard.h
+3ddb79baQ8gGrZm2Jlo3I1ntD0H5HA xenolinux-2.4.16-sparse/include/asm-xeno/kmap_types.h
+3ddb79b9MeQEYrafy-Mx9OoeVFM_uw xenolinux-2.4.16-sparse/include/asm-xeno/ldt.h
+3ddb79baAsy5W-cJ9ML_w9chqqUh4A xenolinux-2.4.16-sparse/include/asm-xeno/linux_logo.h
+3ddb79b9uhsumPVE0wGgarfIkYDSFA xenolinux-2.4.16-sparse/include/asm-xeno/locks.h
+3ddb79b954ISbF9e68hB0WTulCJRgg xenolinux-2.4.16-sparse/include/asm-xeno/math_emu.h
+3ddb79b9EZDlC6RGn_y0OYr0nyQWlw xenolinux-2.4.16-sparse/include/asm-xeno/mc146818rtc.h
+3ddb79b9M6pTF4maDgh8TYbg_HHUbw xenolinux-2.4.16-sparse/include/asm-xeno/mca_dma.h
+3ddb79baA7dlps8FkicOUEXKEQuQsA xenolinux-2.4.16-sparse/include/asm-xeno/mman.h
+3ddb79baKfF36-eRvkxnEvMRQRai-w xenolinux-2.4.16-sparse/include/asm-xeno/mmu.h
+3ddb79baQyKbT5U4EmZNePY9Txp-tA xenolinux-2.4.16-sparse/include/asm-xeno/mmu_context.h
+3ddb79bberC3Ghs4vy-06Pu-LSiWtw xenolinux-2.4.16-sparse/include/asm-xeno/mmx.h
+3ddb79bbsJLF10xQcKDoV8f_7gcOXg xenolinux-2.4.16-sparse/include/asm-xeno/module.h
+3ddb79bbY5RffJ8_F1oC4VI7q3Eejg xenolinux-2.4.16-sparse/include/asm-xeno/mpspec.h
+3ddb79b90vB4Vzzy_wL6SYXQMy9N9Q xenolinux-2.4.16-sparse/include/asm-xeno/msgbuf.h
+3ddb79bbZ5a_vTk0xcgOHuPokaqwMw xenolinux-2.4.16-sparse/include/asm-xeno/msr.h
+3ddb79b9y1xAKaPiBd79MBcCopNNYw xenolinux-2.4.16-sparse/include/asm-xeno/mtrr.h
+3ddb79b90hX0QBJlWQN_VsHtX5Ijkw xenolinux-2.4.16-sparse/include/asm-xeno/namei.h
+3ddb79bbG2p9MNq7tuISz8md1Oj2lg xenolinux-2.4.16-sparse/include/asm-xeno/page.h
+3ddb79bb_iUa2piFSwaB8YPw-rB5SQ xenolinux-2.4.16-sparse/include/asm-xeno/param.h
+3ddb79b9Y8UU0S9AoDznoqqcYxg9-A xenolinux-2.4.16-sparse/include/asm-xeno/parport.h
+3ddb79b9K9_edWxBHS7TdCpyDmfp6g xenolinux-2.4.16-sparse/include/asm-xeno/pgalloc.h
+3ddb79bahaS_P3UYp9VEU6kHxXbajA xenolinux-2.4.16-sparse/include/asm-xeno/pgtable-2level.h
+3ddb79b9MjXUB_rk29GJgaNY24feCw xenolinux-2.4.16-sparse/include/asm-xeno/pgtable-3level.h
+3ddb79bbPF2ENpNHBru8K3hyYVPmkQ xenolinux-2.4.16-sparse/include/asm-xeno/pgtable.h
+3ddb79b9t9xKrOf8aP3X5jEit9tj-w xenolinux-2.4.16-sparse/include/asm-xeno/poll.h
+3ddb79b9xHtTDWFaI9ncAtxyUth0Yg xenolinux-2.4.16-sparse/include/asm-xeno/posix_types.h
+3ddb79b9VwZ9YsSpA7CkJmYXVadrCA xenolinux-2.4.16-sparse/include/asm-xeno/processor.h
+3ddb79bbKaOkHrGG9j05AGlii-voaQ xenolinux-2.4.16-sparse/include/asm-xeno/ptrace.h
+3ddb79bbVpJCVTXmc2yNf1rsC00YEg xenolinux-2.4.16-sparse/include/asm-xeno/resource.h
+3ddb79bbtU0Kh27NbNpqKAIhshJvXQ xenolinux-2.4.16-sparse/include/asm-xeno/rwlock.h
+3ddb79b97LhdmW6hYmybQOPDkK6plg xenolinux-2.4.16-sparse/include/asm-xeno/rwsem.h
+3ddb79bbgRzM1NoXYbLoOCbZt8s5NA xenolinux-2.4.16-sparse/include/asm-xeno/scatterlist.h
+3ddb79b9xCi3F80Z6xxx6nzkCAo8vQ xenolinux-2.4.16-sparse/include/asm-xeno/segment.h
+3ddb79b9WxySbCKDoMgTgPtn1G3BFw xenolinux-2.4.16-sparse/include/asm-xeno/semaphore.h
+3ddb79b9gNK3dtgXONloSBaNgZPjAg xenolinux-2.4.16-sparse/include/asm-xeno/sembuf.h
+3ddb79bb2UTDgfwju2J3weDXemw3LA xenolinux-2.4.16-sparse/include/asm-xeno/serial.h
+3ddb79bbAPFyA_n4dNVCRoee8obOKA xenolinux-2.4.16-sparse/include/asm-xeno/setup.h
+3ddb79b9XTOB5DwWBGzPLLR4rNEkDQ xenolinux-2.4.16-sparse/include/asm-xeno/shmbuf.h
+3ddb79b9-_jDlAj3qVZe4opi3zectQ xenolinux-2.4.16-sparse/include/asm-xeno/shmparam.h
+3ddb79b9AW75ErwlTRX4McxO15sEaQ xenolinux-2.4.16-sparse/include/asm-xeno/sigcontext.h
+3ddb79b9NBJW-KAI3mgveUCr7sIOwA xenolinux-2.4.16-sparse/include/asm-xeno/siginfo.h
+3ddb79bbj0i8tUVNMKtZVrLJqv3Nsw xenolinux-2.4.16-sparse/include/asm-xeno/signal.h
+3ddb79bbfAmpotdy-No2dwGez2fnIg xenolinux-2.4.16-sparse/include/asm-xeno/smp.h
+3ddb79b9pDERXiqSumFWMTFJ1X9xIw xenolinux-2.4.16-sparse/include/asm-xeno/smplock.h
+3ddb79bbuCOIWTlWEHgOTexEBbdDow xenolinux-2.4.16-sparse/include/asm-xeno/socket.h
+3ddb79b9ExeUznVBlSn1e2nvOCrJ4A xenolinux-2.4.16-sparse/include/asm-xeno/sockios.h
+3ddb79b9kL3xvucBb-Gmg4_vo-99vw xenolinux-2.4.16-sparse/include/asm-xeno/softirq.h
+3ddb79b9rJ8AfSzGzA0arI8mazYLlQ xenolinux-2.4.16-sparse/include/asm-xeno/spinlock.h
+3ddb79bbXaA_zUHNPkAKRNz1h0gIJw xenolinux-2.4.16-sparse/include/asm-xeno/stat.h
+3ddb79b9G004IlCplrjWgF1aXbp8dA xenolinux-2.4.16-sparse/include/asm-xeno/statfs.h
+3ddb79bbRsy3GlCFrQEbVMVp--xlwQ xenolinux-2.4.16-sparse/include/asm-xeno/string-486.h
+3ddb79bb4xug4cDph6ODLQFQIan_sg xenolinux-2.4.16-sparse/include/asm-xeno/string.h
+3ddb79b9JhjJtJUO3g5LmrHPkdxgKg xenolinux-2.4.16-sparse/include/asm-xeno/system.h
+3ddb79b9tbjCU9zSbqKbbv4m8tijlg xenolinux-2.4.16-sparse/include/asm-xeno/termbits.h
+3ddb79bbi0mW10tH4xX1_KHXKM_xPg xenolinux-2.4.16-sparse/include/asm-xeno/termios.h
+3ddb79b9JuR1VvNzlkyMlA-Dnlmy9Q xenolinux-2.4.16-sparse/include/asm-xeno/timex.h
+3ddb79b9Bofq-p3sCTF0ELVuf_iBYA xenolinux-2.4.16-sparse/include/asm-xeno/tlb.h
+3ddb79b9tpBUqS8-S6euSqyk2hFkKg xenolinux-2.4.16-sparse/include/asm-xeno/types.h
+3ddb79bb5bkAaEzD7pdqQZdWyA_0eQ xenolinux-2.4.16-sparse/include/asm-xeno/uaccess.h
+3ddb79bbiDIz1dxgFixHKyGuqRqfDQ xenolinux-2.4.16-sparse/include/asm-xeno/ucontext.h
+3ddb79ba_Smn-GiYtr5ZTMaZXn-AHg xenolinux-2.4.16-sparse/include/asm-xeno/unaligned.h
+3ddb79bb3cMSs_k2X5Oq2hOIBvmPYA xenolinux-2.4.16-sparse/include/asm-xeno/unistd.h
+3ddb79ba2qYtIQAT_-vCFkkZUXu_UQ xenolinux-2.4.16-sparse/include/asm-xeno/user.h
+3ddb79bbqhb9X9qWOz5Bv4wOzrkITg xenolinux-2.4.16-sparse/include/asm-xeno/vga.h
+3ddb79bbA52x94o6uwDYsbzrH2hjzA xenolinux-2.4.16-sparse/include/asm-xeno/xor.h
+3ddb79bb_7YG4U75ZmEic9YXWTW7Vw xenolinux-2.4.16-sparse/include/linux/sunrpc/debug.h
+3ddb79bcxkVPfWlZ1PQKvDrfArzOVw xenolinux-2.4.16-sparse/kernel/panic.c
+3ddb79bbP31im-mx2NbfthSeqty1Dg xenolinux-2.4.16-sparse/mk
diff --git a/BitKeeper/etc/logging_ok b/BitKeeper/etc/logging_ok
new file mode 100644 (file)
index 0000000..d8aa55c
--- /dev/null
@@ -0,0 +1 @@
+smh22@boulderdash.cl.cam.ac.uk
diff --git a/xen-2.4.16/Makefile b/xen-2.4.16/Makefile
new file mode 100644 (file)
index 0000000..7ede454
--- /dev/null
@@ -0,0 +1,39 @@
+
+export BASEDIR := $(shell pwd)
+
+include Rules.mk
+
+default: $(TARGET)
+
+install: $(TARGET)
+       gzip -f -9 < $(TARGET) > $(TARGET).gz
+       cp $(TARGET).gz ../../install/images/image
+
+clean: delete-links
+       $(MAKE) -C tools clean
+       $(MAKE) -C common clean
+       $(MAKE) -C net clean
+       $(MAKE) -C drivers clean
+       $(MAKE) -C arch/$(ARCH) clean
+       rm -f *.o $(TARGET)* *~ core
+
+$(TARGET): make-links
+       $(MAKE) -C tools
+       $(MAKE) -C common
+       $(MAKE) -C net
+       $(MAKE) -C drivers
+       $(MAKE) -C arch/$(ARCH)
+
+make-links:
+       ln -sf xeno include/linux
+       ln -sf asm-$(ARCH) include/asm
+
+delete-links:
+       rm -f include/linux include/asm
+
+SUBDIRS         =arch common drivers net 
+TAGS: 
+       etags `find include/asm-$(ARCH) -name '*.h'`
+       find include -type d \( -name "asm-*" -o -name config \) -prune -o -name '*.h' -print | xargs etags -a
+       find $(SUBDIRS) -name '*.[ch]' | xargs etags -a
+
diff --git a/xen-2.4.16/README b/xen-2.4.16/README
new file mode 100644 (file)
index 0000000..3518b82
--- /dev/null
@@ -0,0 +1,145 @@
+
+*****************************************************
+   Xeno Hypervisor (18/7/02)
+
+1) Tree layout
+Looks rather like a simplified Linux :-)
+Headers are in include/xeno and include asm-<arch>.
+At build time we create symlinks:
+ include/linux -> include/xeno
+ include/asm   -> include/asm-<arch>
+In this way, Linux device drivers should need less tweaking of
+their #include lines.
+
+For source files, mapping between hypervisor and Linux is:
+ Linux                 Hypervisor
+ -----                 ----------
+ kernel/init/mm/lib -> common
+ net/*              -> net/*
+ drivers/*          -> drivers/*
+ arch/*             -> arch/*
+
+Note that the use of #include <asm/...> and #include <linux/...> can
+lead to confusion, as such files will often exist on the system include
+path, even if a version doesn't exist within the hypervisor tree.
+Unfortunately '-nostdinc' cannot be specified to the compiler, as that
+prevents us using stdarg.h in the compiler's own header directory.
+
+We try to not modify things in driver/* as much as possible, so we can
+easily take updates from Linux. arch/* is basically straight from
+Linux, with fingers in Linux-specific pies hacked off. common/* has
+a lot of Linux code in it, but certain subsystems (task maintenance,
+low-level memory handling) have been replaced. net/* contains enough
+Linux-like gloop to get network drivers to work with little/no
+modification.
+
+2) Building
+'make': Builds ELF executable called 'image' in base directory
+'make install': gzip-compresses 'image' and copies it to TFTP server
+'make clean': removes *all* build and target files
+
+
+*****************************************************
+Random thoughts and stuff from here down...
+
+Todo list
+---------
+* Hypervisor need only directly map its own memory pool
+  (maybe 128MB, tops). That would need 0x08000000....
+  This would allow 512MB Linux with plenty room for vmalloc'ed areas.
+* Network device -- port drivers to hypervisor, implement virtual
+  driver for xeno-linux. Looks like Ethernet.
+  -- Hypervisor needs to do (at a minimum):
+       - packet filtering on tx (unicast IP only)
+       - packet demux on rx     (unicast IP only)
+       - provide DHCP [maybedo something simpler?]
+         and ARP [at least for hypervisor IP address]
+
+
+Segment descriptor tables
+-------------------------
+We want to allow guest OSes to specify GDT and LDT tables using their
+own pages of memory (just like with page tables). So allow the following:
+ * new_table_entry(ptr, val)
+   [Allows insertion of a code, data, or LDT descriptor into given
+    location. Can simply be checked then poked, with no need to look at
+    page type.]
+ * new_GDT() -- relevent virtual pages are resolved to frames. Either
+    (i) page not present; or (ii) page is only mapped read-only and checks
+    out okay (then marked as special page). Old table is resolved first,
+    and the pages are unmarked (no longer special type).
+ * new_LDT() -- same as for new_GDT(), with same special page type.
+
+Page table updates must be hooked, so we look for updates to virtual page
+addresses in the GDT/LDT range. If map to not present, then old physpage
+has type_count decremented. If map to present, ensure read-only, check the
+page, and set special type.
+
+Merge set_{LDT,GDT} into update_baseptr, by passing four args:
+ update_baseptrs(mask, ptab, gdttab, ldttab);
+Update of ptab requires update of gtab (or set to internal default).
+Update of gtab requires update of ltab (or set to internal default).
+
+
+The hypervisor page cache
+-------------------------
+This will allow guest OSes to make use of spare pages in the system, but
+allow them to be immediately used for any new domains or memory requests.
+The idea is that, when a page is laundered and falls off Linux's clean_LRU
+list, rather than freeing it it becomes a candidate for passing down into
+the hypervisor. In return, xeno-linux may ask for one of its previously-
+cached pages back:
+ (page, new_id) = cache_query(page, old_id);
+If the requested page couldn't be kept, a blank page is returned.
+When would Linux make the query? Whenever it wants a page back without
+the delay or going to disc. Also, whenever a page would otherwise be
+flushed to disc.
+
+To try and add to the cache: (blank_page, new_id) = cache_query(page, NULL);
+ [NULL means "give me a blank page"].
+To try and retrieve from the cache: (page, new_id) = cache_query(x_page, id)
+ [we may request that x_page just be discarded, and therefore not impinge
+  on this domain's cache quota].
+
+
+Booting secondary processors
+----------------------------
+
+start_of_day (i386/setup.c)
+smp_boot_cpus (i386/smpboot.c)
+ * initialises boot CPU data
+ * parses APIC tables
+ * for each cpu:
+   do_boot_cpu (i386/smpboot.c)
+    * forks a new idle process
+    * points initial stack inside new task struct
+    * points initial EIP at a trampoline in very low memory
+    * frobs remote APIC....
+
+On other processor:
+ * trampoline sets GDT and IDT
+ * jumps at main boot address with magic register value
+ * after setting proper page and descriptor tables, jumps at...
+   initialize_secondary (i386/smpboot.c)
+    * simply reads ESP/EIP out of the (new) idle task
+    * this causes a jump to...
+      start_secondary (i386/smpboot.c)
+       * reset all processor state
+       * barrier, then write bitmasks to signal back to boot cpu
+       * then barrel into...
+         cpu_idle (i386/process.c)
+         [THIS IS PROBABLY REASONABLE -- BOOT CPU SHOULD KICK
+          SECONDARIES TO GET WORK DONE]
+
+
+SMP capabilities
+----------------
+
+Current intention is to allow hypervisor to schedule on all processors in
+SMP boxen, but to tie each domain to a single processor. This simplifies
+many SMP intricacies both in terms of correctness and efficiency (eg.
+TLB flushing, network packet delivery, ...).
+
+Clients can still make use of SMP by installing multiple domains on a single
+machine, and treating it as a fast cluster (at the very least, the
+hypervisor will have fast routing of locally-destined packets).
diff --git a/xen-2.4.16/Rules.mk b/xen-2.4.16/Rules.mk
new file mode 100644 (file)
index 0000000..25c6e47
--- /dev/null
@@ -0,0 +1,31 @@
+
+ARCH    := i386
+
+TARGET  := $(BASEDIR)/image
+HDRS    := $(wildcard $(BASEDIR)/include/xeno/*.h)
+HDRS    += $(wildcard $(BASEDIR)/include/scsi/*.h)
+HDRS    += $(wildcard $(BASEDIR)/include/hypervisor-ifs/*.h)
+HDRS    += $(wildcard $(BASEDIR)/include/asm-$(ARCH)/*.h)
+
+C_SRCS  := $(wildcard *.c)
+S_SRCS  := $(wildcard *.S)
+OBJS    := $(patsubst %.S,%.o,$(S_SRCS))
+OBJS    += $(patsubst %.c,%.o,$(C_SRCS))
+
+# Note that link order matters!
+ALL_OBJS := $(BASEDIR)/common/common.o
+ALL_OBJS += $(BASEDIR)/net/network.o
+ALL_OBJS += $(BASEDIR)/drivers/pci/driver.o
+ALL_OBJS += $(BASEDIR)/drivers/net/driver.o
+ALL_OBJS += $(BASEDIR)/drivers/block/driver.o
+ALL_OBJS += $(BASEDIR)/drivers/ide/driver.o
+ALL_OBJS += $(BASEDIR)/arch/$(ARCH)/arch.o
+
+include $(BASEDIR)/arch/$(ARCH)/Rules.mk
+
+%.o: %.c $(HDRS) Makefile
+       $(CC) $(CFLAGS) -c $< -o $@
+
+%.o: %.S $(HDRS) Makefile
+       $(CC) $(CFLAGS) -D__ASSEMBLY__ -c $< -o $@
+
diff --git a/xen-2.4.16/arch/i386/Makefile b/xen-2.4.16/arch/i386/Makefile
new file mode 100644 (file)
index 0000000..6778324
--- /dev/null
@@ -0,0 +1,17 @@
+
+include $(BASEDIR)/Rules.mk
+
+# What happens here? We link monitor object files together, starting
+# at MONITOR_BASE (a very high address). But bootloader cannot put
+# things there, so we initially load at LOAD_BASE. A hacky little
+# tool called `elf-reloc' is used to modify segment offsets from
+# MONITOR_BASE-relative to LOAD_BASE-relative.
+# (NB. Linux gets round this by turning its image into raw binary, then 
+# wrapping that with a low-memory bootstrapper.)
+default: boot/boot.o $(OBJS)
+       $(LD) -r -o arch.o $(OBJS)
+       $(LD) $(LDFLAGS) boot/boot.o $(ALL_OBJS) -o $(TARGET)
+       $(BASEDIR)/tools/elf-reloc $(MONITOR_BASE) $(LOAD_BASE) $(TARGET)
+
+clean:
+       rm -f *.o *~ core boot/*.o boot/*~ boot/core
diff --git a/xen-2.4.16/arch/i386/Rules.mk b/xen-2.4.16/arch/i386/Rules.mk
new file mode 100644 (file)
index 0000000..1727771
--- /dev/null
@@ -0,0 +1,14 @@
+########################################
+# x86-specific definitions
+
+CC := gcc
+LD := ld
+# Linker should relocate monitor to this address
+MONITOR_BASE := 0xE0100000
+# Bootloader should load monitor to this real address
+LOAD_BASE    := 0x00100000
+CFLAGS  := -fno-builtin -O3 -Wall -DMONITOR_BASE=$(MONITOR_BASE) 
+CFLAGS  += -I$(BASEDIR)/include -D__KERNEL__
+LDFLAGS := -T xeno.lds -N
+
+
diff --git a/xen-2.4.16/arch/i386/apic.c b/xen-2.4.16/arch/i386/apic.c
new file mode 100644 (file)
index 0000000..a09613b
--- /dev/null
@@ -0,0 +1,836 @@
+/*
+ *     Local APIC handling, local APIC timers
+ *
+ *     (c) 1999, 2000 Ingo Molnar <mingo@redhat.com>
+ *
+ *     Fixes
+ *     Maciej W. Rozycki       :       Bits for genuine 82489DX APICs;
+ *                                     thanks to Eric Gilmore
+ *                                     and Rolf G. Tews
+ *                                     for testing these extensively.
+ */
+
+#include <xeno/config.h>
+#include <xeno/init.h>
+#include <xeno/sched.h>
+#include <xeno/irq.h>
+#include <xeno/delay.h>
+#include <asm/mc146818rtc.h>
+#include <asm/msr.h>
+#include <xeno/errno.h>
+#include <asm/atomic.h>
+#include <xeno/smp.h>
+#include <xeno/interrupt.h>
+#include <asm/mpspec.h>
+#include <asm/pgalloc.h>
+#include <asm/hardirq.h>
+
+/* Using APIC to generate smp_local_timer_interrupt? */
+int using_apic_timer = 0;
+
+int get_maxlvt(void)
+{
+    unsigned int v, ver, maxlvt;
+
+    v = apic_read(APIC_LVR);
+    ver = GET_APIC_VERSION(v);
+    /* 82489DXs do not report # of LVT entries. */
+    maxlvt = APIC_INTEGRATED(ver) ? GET_APIC_MAXLVT(v) : 2;
+    return maxlvt;
+}
+
+void clear_local_APIC(void)
+{
+    int maxlvt;
+    unsigned long v;
+
+    maxlvt = get_maxlvt();
+
+    /*
+     * Careful: we have to set masks only first to deassert
+     * any level-triggered sources.
+     */
+    v = apic_read(APIC_LVTT);
+    apic_write_around(APIC_LVTT, v | APIC_LVT_MASKED);
+    v = apic_read(APIC_LVT0);
+    apic_write_around(APIC_LVT0, v | APIC_LVT_MASKED);
+    v = apic_read(APIC_LVT1);
+    apic_write_around(APIC_LVT1, v | APIC_LVT_MASKED);
+    if (maxlvt >= 3) {
+        v = apic_read(APIC_LVTERR);
+        apic_write_around(APIC_LVTERR, v | APIC_LVT_MASKED);
+    }
+    if (maxlvt >= 4) {
+        v = apic_read(APIC_LVTPC);
+        apic_write_around(APIC_LVTPC, v | APIC_LVT_MASKED);
+    }
+
+    /*
+     * Clean APIC state for other OSs:
+     */
+    apic_write_around(APIC_LVTT, APIC_LVT_MASKED);
+    apic_write_around(APIC_LVT0, APIC_LVT_MASKED);
+    apic_write_around(APIC_LVT1, APIC_LVT_MASKED);
+    if (maxlvt >= 3)
+        apic_write_around(APIC_LVTERR, APIC_LVT_MASKED);
+    if (maxlvt >= 4)
+        apic_write_around(APIC_LVTPC, APIC_LVT_MASKED);
+}
+
+void __init connect_bsp_APIC(void)
+{
+    if (pic_mode) {
+        /*
+         * Do not trust the local APIC being empty at bootup.
+         */
+        clear_local_APIC();
+        /*
+         * PIC mode, enable APIC mode in the IMCR, i.e.
+         * connect BSP's local APIC to INT and NMI lines.
+         */
+        printk("leaving PIC mode, enabling APIC mode.\n");
+        outb(0x70, 0x22);
+        outb(0x01, 0x23);
+    }
+}
+
+void disconnect_bsp_APIC(void)
+{
+    if (pic_mode) {
+        /*
+         * Put the board back into PIC mode (has an effect
+         * only on certain older boards).  Note that APIC
+         * interrupts, including IPIs, won't work beyond
+         * this point!  The only exception are INIT IPIs.
+         */
+        printk("disabling APIC mode, entering PIC mode.\n");
+        outb(0x70, 0x22);
+        outb(0x00, 0x23);
+    }
+}
+
+void disable_local_APIC(void)
+{
+    unsigned long value;
+
+    clear_local_APIC();
+
+    /*
+     * Disable APIC (implies clearing of registers
+     * for 82489DX!).
+     */
+    value = apic_read(APIC_SPIV);
+    value &= ~APIC_SPIV_APIC_ENABLED;
+    apic_write_around(APIC_SPIV, value);
+}
+
+/*
+ * This is to verify that we're looking at a real local APIC.
+ * Check these against your board if the CPUs aren't getting
+ * started for no apparent reason.
+ */
+int __init verify_local_APIC(void)
+{
+    unsigned int reg0, reg1;
+
+    /*
+     * The version register is read-only in a real APIC.
+     */
+    reg0 = apic_read(APIC_LVR);
+    Dprintk("Getting VERSION: %x\n", reg0);
+    apic_write(APIC_LVR, reg0 ^ APIC_LVR_MASK);
+    reg1 = apic_read(APIC_LVR);
+    Dprintk("Getting VERSION: %x\n", reg1);
+
+    /*
+     * The two version reads above should print the same
+     * numbers.  If the second one is different, then we
+     * poke at a non-APIC.
+     */
+    if (reg1 != reg0)
+        return 0;
+
+    /*
+     * Check if the version looks reasonably.
+     */
+    reg1 = GET_APIC_VERSION(reg0);
+    if (reg1 == 0x00 || reg1 == 0xff)
+        return 0;
+    reg1 = get_maxlvt();
+    if (reg1 < 0x02 || reg1 == 0xff)
+        return 0;
+
+    /*
+     * The ID register is read/write in a real APIC.
+     */
+    reg0 = apic_read(APIC_ID);
+    Dprintk("Getting ID: %x\n", reg0);
+    apic_write(APIC_ID, reg0 ^ APIC_ID_MASK);
+    reg1 = apic_read(APIC_ID);
+    Dprintk("Getting ID: %x\n", reg1);
+    apic_write(APIC_ID, reg0);
+    if (reg1 != (reg0 ^ APIC_ID_MASK))
+        return 0;
+
+    /*
+     * The next two are just to see if we have sane values.
+     * They're only really relevant if we're in Virtual Wire
+     * compatibility mode, but most boxes are anymore.
+     */
+    reg0 = apic_read(APIC_LVT0);
+    Dprintk("Getting LVT0: %x\n", reg0);
+    reg1 = apic_read(APIC_LVT1);
+    Dprintk("Getting LVT1: %x\n", reg1);
+
+    return 1;
+}
+
+void __init sync_Arb_IDs(void)
+{
+    /*
+     * Wait for idle.
+        */
+    apic_wait_icr_idle();
+
+    Dprintk("Synchronizing Arb IDs.\n");
+    apic_write_around(APIC_ICR, APIC_DEST_ALLINC | APIC_INT_LEVELTRIG
+                      | APIC_DM_INIT);
+}
+
+extern void __error_in_apic_c (void);
+
+/*
+ * An initial setup of the virtual wire mode.
+ */
+void __init init_bsp_APIC(void)
+{
+    unsigned long value, ver;
+
+    /*
+     * Don't do the setup now if we have a SMP BIOS as the
+     * through-I/O-APIC virtual wire mode might be active.
+     */
+    if (smp_found_config || !cpu_has_apic)
+        return;
+
+    value = apic_read(APIC_LVR);
+    ver = GET_APIC_VERSION(value);
+
+    /*
+     * Do not trust the local APIC being empty at bootup.
+     */
+    clear_local_APIC();
+
+    /*
+     * Enable APIC.
+     */
+    value = apic_read(APIC_SPIV);
+    value &= ~APIC_VECTOR_MASK;
+    value |= APIC_SPIV_APIC_ENABLED;
+    value |= APIC_SPIV_FOCUS_DISABLED;
+    value |= SPURIOUS_APIC_VECTOR;
+    apic_write_around(APIC_SPIV, value);
+
+    /*
+     * Set up the virtual wire mode.
+     */
+    apic_write_around(APIC_LVT0, APIC_DM_EXTINT);
+    value = APIC_DM_NMI;
+    if (!APIC_INTEGRATED(ver))         /* 82489DX */
+        value |= APIC_LVT_LEVEL_TRIGGER;
+    apic_write_around(APIC_LVT1, value);
+}
+
+void __init setup_local_APIC (void)
+{
+    unsigned long value, ver, maxlvt;
+
+    value = apic_read(APIC_LVR);
+    ver = GET_APIC_VERSION(value);
+
+    if ((SPURIOUS_APIC_VECTOR & 0x0f) != 0x0f)
+        __error_in_apic_c();
+
+    /* Double-check wether this APIC is really registered. */
+    if (!test_bit(GET_APIC_ID(apic_read(APIC_ID)), &phys_cpu_present_map))
+        BUG();
+
+    /*
+     * Intel recommends to set DFR, LDR and TPR before enabling
+     * an APIC.  See e.g. "AP-388 82489DX User's Manual" (Intel
+     * document number 292116).  So here it goes...
+     */
+
+    /*
+     * In clustered apic mode, the firmware does this for us 
+     * Put the APIC into flat delivery mode.
+     * Must be "all ones" explicitly for 82489DX.
+     */
+    apic_write_around(APIC_DFR, 0xffffffff);
+
+    /*
+     * Set up the logical destination ID.
+     */
+    value = apic_read(APIC_LDR);
+    value &= ~APIC_LDR_MASK;
+    value |= (1<<(smp_processor_id()+24));
+    apic_write_around(APIC_LDR, value);
+
+    /*
+     * Set Task Priority to 'accept all'. We never change this
+     * later on.
+     */
+    value = apic_read(APIC_TASKPRI);
+    value &= ~APIC_TPRI_MASK;
+    apic_write_around(APIC_TASKPRI, value);
+
+    /*
+     * Now that we are all set up, enable the APIC
+     */
+    value = apic_read(APIC_SPIV);
+    value &= ~APIC_VECTOR_MASK;
+    /*
+     * Enable APIC
+     */
+    value |= APIC_SPIV_APIC_ENABLED;
+
+    /* Enable focus processor (bit==0) */
+    value &= ~APIC_SPIV_FOCUS_DISABLED;
+
+    /* Set spurious IRQ vector */
+    value |= SPURIOUS_APIC_VECTOR;
+    apic_write_around(APIC_SPIV, value);
+
+    /*
+     * Set up LVT0, LVT1:
+     *
+     * set up through-local-APIC on the BP's LINT0. This is not
+     * strictly necessery in pure symmetric-IO mode, but sometimes
+     * we delegate interrupts to the 8259A.
+     */
+    /*
+     * TODO: set up through-local-APIC from through-I/O-APIC? --macro
+     */
+    value = apic_read(APIC_LVT0) & APIC_LVT_MASKED;
+    if (!smp_processor_id()) { 
+/* && (pic_mode || !value)) { */
+        value = APIC_DM_EXTINT;
+        printk("enabled ExtINT on CPU#%d\n", smp_processor_id());
+    } else {
+        value = APIC_DM_EXTINT | APIC_LVT_MASKED;
+        printk("masked ExtINT on CPU#%d\n", smp_processor_id());
+    }
+    apic_write_around(APIC_LVT0, value);
+
+    /*
+     * only the BP should see the LINT1 NMI signal, obviously.
+     */
+    if (!smp_processor_id())
+        value = APIC_DM_NMI;
+    else
+        value = APIC_DM_NMI | APIC_LVT_MASKED;
+    if (!APIC_INTEGRATED(ver))         /* 82489DX */
+        value |= APIC_LVT_LEVEL_TRIGGER;
+    apic_write_around(APIC_LVT1, value);
+
+    if (APIC_INTEGRATED(ver)) {                /* !82489DX */
+        maxlvt = get_maxlvt();
+        if (maxlvt > 3)                /* Due to the Pentium erratum 3AP. */
+            apic_write(APIC_ESR, 0);
+        value = apic_read(APIC_ESR);
+        printk("ESR value before enabling vector: %08lx\n", value);
+
+        value = ERROR_APIC_VECTOR;      // enables sending errors
+        apic_write_around(APIC_LVTERR, value);
+        /*
+         * spec says clear errors after enabling vector.
+         */
+        if (maxlvt > 3)
+            apic_write(APIC_ESR, 0);
+        value = apic_read(APIC_ESR);
+        printk("ESR value after enabling vector: %08lx\n", value);
+    } else {
+        printk("No ESR for 82489DX.\n");
+    }
+}
+
+
+static inline void apic_pm_init1(void) { }
+static inline void apic_pm_init2(void) { }
+
+
+/*
+ * Detect and enable local APICs on non-SMP boards.
+ * Original code written by Keir Fraser.
+ */
+
+static int __init detect_init_APIC (void)
+{
+    u32 h, l, features;
+    extern void get_cpu_vendor(struct cpuinfo_x86*);
+
+    /* Workaround for us being called before identify_cpu(). */
+    get_cpu_vendor(&boot_cpu_data);
+
+    switch (boot_cpu_data.x86_vendor) {
+    case X86_VENDOR_AMD:
+        if (boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model > 1)
+            break;
+        goto no_apic;
+    case X86_VENDOR_INTEL:
+        if (boot_cpu_data.x86 == 6 ||
+            (boot_cpu_data.x86 == 15 && cpu_has_apic) ||
+            (boot_cpu_data.x86 == 5 && cpu_has_apic))
+            break;
+        goto no_apic;
+    default:
+        goto no_apic;
+    }
+
+    if (!cpu_has_apic) {
+        /*
+         * Some BIOSes disable the local APIC in the
+         * APIC_BASE MSR. This can only be done in
+         * software for Intel P6 and AMD K7 (Model > 1).
+         */
+        rdmsr(MSR_IA32_APICBASE, l, h);
+        if (!(l & MSR_IA32_APICBASE_ENABLE)) {
+            printk("Local APIC disabled by BIOS -- reenabling.\n");
+            l &= ~MSR_IA32_APICBASE_BASE;
+            l |= MSR_IA32_APICBASE_ENABLE | APIC_DEFAULT_PHYS_BASE;
+            wrmsr(MSR_IA32_APICBASE, l, h);
+        }
+    }
+    /*
+     * The APIC feature bit should now be enabled
+     * in `cpuid'
+     */
+    features = cpuid_edx(1);
+    if (!(features & (1 << X86_FEATURE_APIC))) {
+        printk("Could not enable APIC!\n");
+        return -1;
+    }
+
+    set_bit(X86_FEATURE_APIC, &boot_cpu_data.x86_capability);
+    mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
+    boot_cpu_physical_apicid = 0;
+
+    printk("Found and enabled local APIC!\n");
+
+    apic_pm_init1();
+
+    return 0;
+
+ no_apic:
+    printk("No local APIC present or hardware disabled\n");
+    return -1;
+}
+
+void __init init_apic_mappings(void)
+{
+    unsigned long apic_phys = 0;
+
+    /*
+     * If no local APIC can be found then set up a fake all zeroes page to 
+     * simulate the local APIC and another one for the IO-APIC.
+     */
+    if (!smp_found_config && detect_init_APIC()) {
+        apic_phys = get_free_page(GFP_KERNEL);
+        apic_phys = __pa(apic_phys);
+    } else
+        apic_phys = mp_lapic_addr;
+
+    set_fixmap_nocache(FIX_APIC_BASE, apic_phys);
+    Dprintk("mapped APIC to %08lx (%08lx)\n", APIC_BASE, apic_phys);
+
+    /*
+     * Fetch the APIC ID of the BSP in case we have a
+     * default configuration (or the MP table is broken).
+     */
+    if (boot_cpu_physical_apicid == -1U)
+        boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID));
+
+#ifdef CONFIG_X86_IO_APIC
+    {
+        unsigned long ioapic_phys = 0, idx = FIX_IO_APIC_BASE_0;
+        int i;
+
+        for (i = 0; i < nr_ioapics; i++) {
+            if (smp_found_config)
+                ioapic_phys = mp_ioapics[i].mpc_apicaddr;
+            set_fixmap_nocache(idx, ioapic_phys);
+            Dprintk("mapped IOAPIC to %08lx (%08lx)\n",
+                    __fix_to_virt(idx), ioapic_phys);
+            idx++;
+        }
+    }
+#endif
+}
+
+/*
+ * This part sets up the APIC 32 bit clock in LVTT1, with HZ interrupts
+ * per second. We assume that the caller has already set up the local
+ * APIC.
+ *
+ * The APIC timer is not exactly sync with the external timer chip, it
+ * closely follows bus clocks.
+ */
+
+/*
+ * The timer chip is already set up at HZ interrupts per second here,
+ * but we do not accept timer interrupts yet. We only allow the BP
+ * to calibrate.
+ */
+static unsigned int __init get_8254_timer_count(void)
+{
+    /*extern spinlock_t i8253_lock;*/
+    /*unsigned long flags;*/
+
+    unsigned int count;
+
+    /*spin_lock_irqsave(&i8253_lock, flags);*/
+
+    outb_p(0x00, 0x43);
+    count = inb_p(0x40);
+    count |= inb_p(0x40) << 8;
+
+    /*spin_unlock_irqrestore(&i8253_lock, flags);*/
+
+    return count;
+}
+
+void __init wait_8254_wraparound(void)
+{
+    unsigned int curr_count, prev_count=~0;
+    int delta;
+
+    curr_count = get_8254_timer_count();
+
+    do {
+        prev_count = curr_count;
+        curr_count = get_8254_timer_count();
+        delta = curr_count-prev_count;
+
+       /*
+        * This limit for delta seems arbitrary, but it isn't, it's
+        * slightly above the level of error a buggy Mercury/Neptune
+        * chipset timer can cause.
+        */
+
+    } while (delta < 300);
+}
+
+/*
+ * This function sets up the local APIC timer, with a timeout of
+ * 'clocks' APIC bus clock. During calibration we actually call
+ * this function twice on the boot CPU, once with a bogus timeout
+ * value, second time for real. The other (noncalibrating) CPUs
+ * call this function only once, with the real, calibrated value.
+ *
+ * We do reads before writes even if unnecessary, to get around the
+ * P5 APIC double write bug.
+ */
+
+#define APIC_DIVISOR 16
+
+void __setup_APIC_LVTT(unsigned int clocks)
+{
+    unsigned int lvtt1_value, tmp_value;
+
+    lvtt1_value = SET_APIC_TIMER_BASE(APIC_TIMER_BASE_DIV) |
+        APIC_LVT_TIMER_PERIODIC | LOCAL_TIMER_VECTOR;
+    apic_write_around(APIC_LVTT, lvtt1_value);
+
+    /*
+     * Divide PICLK by 16
+     */
+    tmp_value = apic_read(APIC_TDCR);
+    apic_write_around(APIC_TDCR, (tmp_value
+                                  & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE))
+                      | APIC_TDR_DIV_16);
+
+    apic_write_around(APIC_TMICT, clocks/APIC_DIVISOR);
+}
+
+void setup_APIC_timer(void * data)
+{
+    unsigned int clocks = (unsigned int) data, slice, t0, t1;
+    unsigned long flags;
+    int delta;
+
+    __save_flags(flags);
+    __sti();
+    /*
+     * ok, Intel has some smart code in their APIC that knows
+     * if a CPU was in 'hlt' lowpower mode, and this increases
+     * its APIC arbitration priority. To avoid the external timer
+     * IRQ APIC event being in synchron with the APIC clock we
+     * introduce an interrupt skew to spread out timer events.
+     *
+     * The number of slices within a 'big' timeslice is smp_num_cpus+1
+     */
+
+    slice = clocks / (smp_num_cpus+1);
+    printk("cpu: %d, clocks: %d, slice: %d\n",
+           smp_processor_id(), clocks, slice);
+
+    /*
+     * Wait for IRQ0's slice:
+     */
+    wait_8254_wraparound();
+
+    __setup_APIC_LVTT(clocks);
+
+    t0 = apic_read(APIC_TMICT)*APIC_DIVISOR;
+    /* Wait till TMCCT gets reloaded from TMICT... */
+    do {
+        t1 = apic_read(APIC_TMCCT)*APIC_DIVISOR;
+        delta = (int)(t0 - t1 - slice*(smp_processor_id()+1));
+    } while (delta >= 0);
+    /* Now wait for our slice for real. */
+    do {
+        t1 = apic_read(APIC_TMCCT)*APIC_DIVISOR;
+        delta = (int)(t0 - t1 - slice*(smp_processor_id()+1));
+    } while (delta < 0);
+
+    __setup_APIC_LVTT(clocks);
+
+    printk("CPU%d<T0:%d,T1:%d,D:%d,S:%d,C:%d>\n",
+           smp_processor_id(), t0, t1, delta, slice, clocks);
+
+    __restore_flags(flags);
+}
+
+/*
+ * In this function we calibrate APIC bus clocks to the external timer.
+ *
+ * We want to do the calibration only once since we
+ * want to have local timer irqs syncron. CPUs connected
+ * by the same APIC bus have the very same bus frequency.
+ * And we want to have irqs off anyways, no accidental
+ * APIC irq that way.
+ */
+
+int __init calibrate_APIC_clock(void)
+{
+    unsigned long long t1 = 0, t2 = 0;
+    long tt1, tt2;
+    long result;
+    int i;
+    const int LOOPS = HZ/10;
+
+    printk("calibrating APIC timer ...\n");
+
+    /*
+     * Put whatever arbitrary (but long enough) timeout
+     * value into the APIC clock, we just want to get the
+     * counter running for calibration.
+     */
+    __setup_APIC_LVTT(1000000000);
+
+    /*
+     * The timer chip counts down to zero. Let's wait
+     * for a wraparound to start exact measurement:
+     * (the current tick might have been already half done)
+     */
+
+    wait_8254_wraparound();
+
+    /*
+     * We wrapped around just now. Let's start:
+     */
+    rdtscll(t1);
+    tt1 = apic_read(APIC_TMCCT);
+
+    /*
+     * Let's wait LOOPS wraprounds:
+     */
+    for (i = 0; i < LOOPS; i++)
+        wait_8254_wraparound();
+
+    tt2 = apic_read(APIC_TMCCT);
+    rdtscll(t2);
+
+    /*
+     * The APIC bus clock counter is 32 bits only, it
+     * might have overflown, but note that we use signed
+     * longs, thus no extra care needed.
+     *
+     * underflown to be exact, as the timer counts down ;)
+     */
+
+    result = (tt1-tt2)*APIC_DIVISOR/LOOPS;
+
+    printk("..... CPU clock speed is %ld.%04ld MHz.\n",
+           ((long)(t2-t1)/LOOPS)/(1000000/HZ),
+           ((long)(t2-t1)/LOOPS)%(1000000/HZ));
+
+    printk("..... host bus clock speed is %ld.%04ld MHz.\n",
+           result/(1000000/HZ),
+           result%(1000000/HZ));
+
+    return result;
+}
+
+static unsigned int calibration_result;
+
+void __init setup_APIC_clocks (void)
+{
+    printk("Using local APIC timer interrupts.\n");
+    using_apic_timer = 1;
+
+    __cli();
+
+    calibration_result = calibrate_APIC_clock();
+    /*
+     * Now set up the timer for real.
+     */
+    setup_APIC_timer((void *)calibration_result);
+
+    __sti();
+
+    /* and update all other cpus */
+    smp_call_function(setup_APIC_timer, (void *)calibration_result, 1, 1);
+}
+
+#undef APIC_DIVISOR
+
+/*
+ * Local timer interrupt handler. It does both profiling and
+ * process statistics/rescheduling.
+ *
+ * We do profiling in every local tick, statistics/rescheduling
+ * happen only every 'profiling multiplier' ticks. The default
+ * multiplier is 1 and it can be changed by writing the new multiplier
+ * value into /proc/profile.
+ */
+
+inline void smp_local_timer_interrupt(struct pt_regs * regs)
+{
+    update_process_times(user_mode(regs));
+}
+
+/*
+ * Local APIC timer interrupt. This is the most natural way for doing
+ * local interrupts, but local timer interrupts can be emulated by
+ * broadcast interrupts too. [in case the hw doesnt support APIC timers]
+ *
+ * [ if a single-CPU system runs an SMP kernel then we call the local
+ *   interrupt as well. Thus we cannot inline the local irq ... ]
+ */
+unsigned int apic_timer_irqs [NR_CPUS];
+
+void smp_apic_timer_interrupt(struct pt_regs * regs)
+{
+    int cpu = smp_processor_id();
+
+    /*
+     * the NMI deadlock-detector uses this.
+     */
+    apic_timer_irqs[cpu]++;
+
+    /*
+     * NOTE! We'd better ACK the irq immediately,
+     * because timer handling can be slow.
+     */
+    ack_APIC_irq();
+    /*
+     * update_process_times() expects us to have done irq_enter().
+     * Besides, if we don't timer interrupts ignore the global
+     * interrupt lock, which is the WrongThing (tm) to do.
+     */
+    irq_enter(cpu, 0);
+    smp_local_timer_interrupt(regs);
+    irq_exit(cpu, 0);
+
+    if (softirq_pending(cpu))
+        do_softirq();
+}
+
+/*
+ * This interrupt should _never_ happen with our APIC/SMP architecture
+ */
+asmlinkage void smp_spurious_interrupt(void)
+{
+    unsigned long v;
+
+    /*
+     * Check if this really is a spurious interrupt and ACK it
+     * if it is a vectored one.  Just in case...
+     * Spurious interrupts should not be ACKed.
+     */
+    v = apic_read(APIC_ISR + ((SPURIOUS_APIC_VECTOR & ~0x1f) >> 1));
+    if (v & (1 << (SPURIOUS_APIC_VECTOR & 0x1f)))
+        ack_APIC_irq();
+
+    /* see sw-dev-man vol 3, chapter 7.4.13.5 */
+    printk("spurious APIC interrupt on CPU#%d, should never happen.\n",
+           smp_processor_id());
+}
+
+/*
+ * This interrupt should never happen with our APIC/SMP architecture
+ */
+
+asmlinkage void smp_error_interrupt(void)
+{
+    unsigned long v, v1;
+
+    /* First tickle the hardware, only then report what went on. -- REW */
+    v = apic_read(APIC_ESR);
+    apic_write(APIC_ESR, 0);
+    v1 = apic_read(APIC_ESR);
+    ack_APIC_irq();
+    atomic_inc(&irq_err_count);
+
+    /* Here is what the APIC error bits mean:
+       0: Send CS error
+       1: Receive CS error
+       2: Send accept error
+       3: Receive accept error
+       4: Reserved
+       5: Send illegal vector
+       6: Received illegal vector
+       7: Illegal register address
+    */
+    printk ("APIC error on CPU%d: %02lx(%02lx)\n",
+            smp_processor_id(), v , v1);
+}
+
+/*
+ * This initializes the IO-APIC and APIC hardware if this is
+ * a UP kernel.
+ */
+int __init APIC_init_uniprocessor (void)
+{
+    if (!smp_found_config && !cpu_has_apic)
+        return -1;
+
+    /*
+     * Complain if the BIOS pretends there is one.
+     */
+    if (!cpu_has_apic && APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) {
+        printk("BIOS bug, local APIC #%d not detected!...\n",
+               boot_cpu_physical_apicid);
+        return -1;
+    }
+
+    verify_local_APIC();
+
+    connect_bsp_APIC();
+
+    phys_cpu_present_map = 1;
+    apic_write_around(APIC_ID, boot_cpu_physical_apicid);
+
+    apic_pm_init2();
+
+    setup_local_APIC();
+
+#ifdef CONFIG_X86_IO_APIC
+    if (smp_found_config && nr_ioapics)
+        setup_IO_APIC();
+#endif
+    setup_APIC_clocks();
+
+    return 0;
+}
diff --git a/xen-2.4.16/arch/i386/boot/boot.S b/xen-2.4.16/arch/i386/boot/boot.S
new file mode 100644 (file)
index 0000000..1b7323e
--- /dev/null
@@ -0,0 +1,241 @@
+#include <xeno/config.h>
+#include <asm/page.h>
+
+#define  SECONDARY_CPU_FLAG 0xA5A5A5A5
+                
+               .text
+
+ENTRY(start)
+        jmp hal_entry
+
+        .align 4
+
+/*** MULTIBOOT HEADER ****/
+        /* Magic number indicating a Multiboot header. */
+       .long   0x1BADB002
+       /* Flags to bootloader (see Multiboot spec). */
+       .long   0x00000006
+       /* Checksum: must be the negated sum of the first two fields. */
+       .long   -0x1BADB008
+        /* Unused loader addresses (ELF header has all this already).*/
+        .long   0,0,0,0,0
+        /* EGA text mode. */
+        .long   1,0,0,0
+        
+hal_entry:
+        /* Set up a few descriptors: on entry only CS is guaranteed good. */
+        lgdt    %cs:nopaging_gdt_descr-__PAGE_OFFSET
+        mov     $(__HYPERVISOR_DS),%ecx
+        mov     %ecx,%ds
+        mov     %ecx,%es
+        ljmp    $(__HYPERVISOR_CS),$(1f)-__PAGE_OFFSET
+1:      lss     stack_start-__PAGE_OFFSET,%esp
+
+        /* Reset EFLAGS (subsumes CLI and CLD). */
+       pushl   $0
+       popf
+
+        /* CPU type checks. We need P6+. */
+        mov     $0x200000,%edx
+        pushfl
+        pop     %ecx
+        and     %edx,%ecx
+        jne     bad_cpu            # ID bit should be clear
+        pushl   %edx
+        popfl
+        pushfl
+        pop     %ecx
+        and     %edx,%ecx
+        je      bad_cpu            # ID bit should be set
+
+        /* Set up CR0. */
+        mov     %cr0,%ecx
+        and     $0x00000011,%ecx   # save ET and PE
+        or      $0x00050022,%ecx   # set AM, WP, NE and MP
+        mov     %ecx,%cr0
+
+        /* Set up FPU. */
+        fninit
+        
+        /* Set up CR4, except global flag which Intel requires should be     */
+        /* left until after paging is enabled (IA32 Manual Vol. 3, Sec. 2.5) */
+        mov     %cr4,%ecx
+        or      mmu_cr4_features-__PAGE_OFFSET,%ecx
+        mov     %ecx,mmu_cr4_features-__PAGE_OFFSET
+        and     $0x7f,%ecx /* disable GLOBAL bit */
+        mov     %ecx,%cr4
+                
+        /* Is this a non-boot processor? */
+        cmp     $(SECONDARY_CPU_FLAG),%ebx
+        jne     continue_boot_cpu
+        
+        call    start_paging
+        lidt    idt_descr                        
+        jmp     initialize_secondary
+        
+continue_boot_cpu:
+        add     $__PAGE_OFFSET,%ebx
+       push    %ebx /* Multiboot info struct */
+       push    %eax /* Multiboot magic value */
+
+        /* Initialize BSS (no nasty surprises!) */
+        mov     $__bss_start-__PAGE_OFFSET,%edi
+        mov     $_end-__PAGE_OFFSET,%ecx
+        sub     %edi,%ecx
+        xor     %eax,%eax
+        rep     stosb
+
+        /* Initialize low and high mappings of all memory with 4MB pages */
+        mov     $idle0_pg_table-__PAGE_OFFSET,%edi
+        mov     $0x1e3,%eax                  /* PRESENT+RW+A+D+4MB+GLOBAL */
+1:      mov     %eax,__PAGE_OFFSET>>20(%edi) /* high mapping */
+        stosl                                /* low mapping */
+        add     $(1<<L2_PAGETABLE_SHIFT),%eax
+        cmp     $MAX_USABLE_ADDRESS+0x1e3,%eax
+        jne     1b
+
+        call    start_paging        
+        call    setup_idt
+        lidt    idt_descr
+                
+        /* Call into main C routine. This should never return.*/
+               call    cmain
+        ud2     /* Force a panic (invalid opcode). */
+
+start_paging:
+        mov     $idle0_pg_table-__PAGE_OFFSET,%eax
+        mov     %eax,%cr3
+        mov     %cr0,%eax
+        or      $0x80010000,%eax /* set PG and WP bits */
+        mov     %eax,%cr0
+        jmp     1f
+1:      /* Install relocated selectors (FS/GS unused). */
+        lgdt    gdt_descr
+        mov     $(__HYPERVISOR_DS),%ecx
+        mov     %ecx,%ds
+        mov     %ecx,%es
+        mov     %ecx,%ss
+        ljmp    $(__HYPERVISOR_CS),$1f
+1:      /* Paging enabled, so we can now enable GLOBAL mappings in CR4. */
+        movl    mmu_cr4_features,%ecx
+        movl    %ecx,%cr4
+        /* Relocate ESP */
+        add     $__PAGE_OFFSET,%esp
+        /* Relocate EIP via return jump */
+        pop     %ecx
+        add     $__PAGE_OFFSET,%ecx
+        jmp     *%ecx
+    
+            
+/*** INTERRUPT INITIALISATION ***/
+        
+setup_idt:
+        lea     ignore_int,%edx
+        mov     $(__HYPERVISOR_CS << 16),%eax
+        mov     %dx,%ax            /* selector = 0x0010 = cs */
+        mov     $0x8E00,%dx        /* interrupt gate - dpl=0, present */
+
+        lea     SYMBOL_NAME(idt_table),%edi
+        mov     $256,%ecx
+1:      mov     %eax,(%edi)
+        mov     %edx,4(%edi)
+        add     $8,%edi
+        loop    1b
+        ret
+
+/* This is the default interrupt handler. */
+int_msg:
+        .asciz "Unknown interrupt\n"
+        ALIGN
+ignore_int:
+        cld
+        push    %eax
+        push    %ecx
+        push    %edx
+        pushl   %es
+        pushl   %ds
+        mov     $(__HYPERVISOR_DS),%eax
+        mov     %eax,%ds
+        mov     %eax,%es
+        pushl   $int_msg
+        call    SYMBOL_NAME(printf)
+1:      jmp     1b
+        pop     %eax
+        popl    %ds
+        popl    %es
+        pop     %edx
+        pop     %ecx
+        pop     %eax
+        iret
+
+
+bad_cpu_msg:
+        .asciz  "Bad CPU type. Need P6+."
+        ALIGN
+bad_cpu: 
+        call    init_serial
+        mov     $bad_cpu_msg,%esi
+1:      lodsb
+        test    %al,%al
+        je      1f
+        push    %eax
+        call    putchar_serial
+        add     $4,%esp
+        jmp     1b
+1:      jmp     1b
+                   
+        
+/*** STACK LOCATION ***/
+        
+ENTRY(stack_start)
+        .long SYMBOL_NAME(idle0_task_union)+8192-__PAGE_OFFSET
+        .long __HYPERVISOR_DS
+        
+/*** DESCRIPTOR TABLES ***/
+
+.globl SYMBOL_NAME(idt)
+.globl SYMBOL_NAME(gdt)        
+
+        ALIGN
+        
+        .word   0    
+idt_descr:
+       .word   256*8-1
+SYMBOL_NAME(idt):
+        .long  SYMBOL_NAME(idt_table)
+
+        .word   0
+gdt_descr:
+       .word   256*8-1
+SYMBOL_NAME(gdt):       
+        .long   SYMBOL_NAME(gdt_table) /* gdt base */
+
+        .word   0
+nopaging_gdt_descr:
+        .word   256*8-1
+        .long   SYMBOL_NAME(gdt_table)-__PAGE_OFFSET
+        
+        ALIGN
+ENTRY(gdt_table)
+        .quad 0x0000000000000000     /* NULL descriptor */
+        .quad 0x0000000000000000     /* not used */
+        .quad 0x00ceba000000ffff     /* 0x11 ring 1 3.5GB code at 0x00000000 */
+        .quad 0x00ceb2000000ffff     /* 0x19 ring 1 3.5GB data at 0x00000000 */
+        .quad 0x00cefa000000ffff     /* 0x23 ring 3 3.5GB code at 0x00000000 */
+        .quad 0x00cef2000000ffff     /* 0x2b ring 3 3.5GB data at 0x00000000 */
+        .quad 0x00cf9a000000ffff     /* 0x30 ring 0 4.0GB code at 0x00000000 */
+        .quad 0x00cf92000000ffff     /* 0x38 ring 0 4.0GB data at 0x00000000 */
+        .quad 0x0000000000000000
+        .quad 0x0000000000000000
+        .quad 0x0000000000000000
+        .quad 0x0000000000000000
+        .fill NR_CPUS*4,8,0             /* space for TSS's and LDT's */
+
+# The following adds 12kB to the kernel file size.
+        .org 0x1000
+ENTRY(idle0_pg_table)
+        .org 0x2000
+ENTRY(idle0_task_union)
+        .org 0x4000
+ENTRY(stext)
+ENTRY(_stext)
diff --git a/xen-2.4.16/arch/i386/delay.c b/xen-2.4.16/arch/i386/delay.c
new file mode 100644 (file)
index 0000000..078ff77
--- /dev/null
@@ -0,0 +1,29 @@
+/*
+ *     Precise Delay Loops for i386
+ *
+ *     Copyright (C) 1993 Linus Torvalds
+ *     Copyright (C) 1997 Martin Mares <mj@atrey.karlin.mff.cuni.cz>
+ *
+ *     The __delay function must _NOT_ be inlined as its execution time
+ *     depends wildly on alignment on many x86 processors. The additional
+ *     jump magic is needed to get the timing stable on all the CPU's
+ *     we have to worry about.
+ */
+
+#include <xeno/config.h>
+#include <xeno/delay.h>
+#include <asm/msr.h>
+#include <asm/processor.h>
+
+void __udelay(unsigned long usecs)
+{
+    unsigned long ticks = usecs * ticks_per_usec;
+    unsigned long s, e;
+
+    rdtscl(s);
+    do
+    {
+        rep_nop();
+        rdtscl(e);
+    } while ((e-s) < ticks);
+}
diff --git a/xen-2.4.16/arch/i386/entry.S b/xen-2.4.16/arch/i386/entry.S
new file mode 100644 (file)
index 0000000..996c1eb
--- /dev/null
@@ -0,0 +1,526 @@
+/*
+ *  linux/arch/i386/entry.S
+ *
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ */
+
+/*
+ * entry.S contains the system-call and fault low-level handling routines.
+ * This also contains the timer-interrupt handler, as well as all interrupts
+ * and faults that can result in a task-switch.
+ *
+ * Stack layout in 'ret_from_system_call':
+ *      0(%esp) - %ebx
+ *      4(%esp) - %ecx
+ *      8(%esp) - %edx
+ *       C(%esp) - %esi
+ *     10(%esp) - %edi
+ *     14(%esp) - %ebp
+ *     18(%esp) - %eax
+ *     1C(%esp) - %ds
+ *     20(%esp) - %es
+ *     24(%esp) - orig_eax
+ *     28(%esp) - %eip
+ *     2C(%esp) - %cs
+ *     30(%esp) - %eflags
+ *     34(%esp) - %oldesp
+ *     38(%esp) - %oldss
+ *
+ * "current" is in register %ebx during any slow entries.
+ */
+/* The idea for callbacks from monitor -> guest OS.
+ * 
+ * First, we require that all callbacks (either via a supplied
+ * interrupt-descriptor-table, or via the special event or failsafe callbacks
+ * in the shared-info-structure) are to ring 1. This just makes life easier,
+ * in that it means we don't have to do messy GDT/LDT lookups to find
+ * out which the privilege-level of the return code-selector. That code
+ * would just be a hassle to write, and would need to account for running
+ * off the end of the GDT/LDT, for example. The event callback has quite
+ * a constrained callback method: the guest OS provides a linear address
+ * which we call back to using the hard-coded __GUEST_CS descriptor (which
+ * is a ring 1 descriptor). For IDT callbacks, we check that the provided
+ * return CS is not == __HYPERVISOR_{CS,DS}. Apart from that we're safe as
+ * don't allow a guest OS to install ring-0 privileges into the GDT/LDT.
+ * It's up to the guest OS to ensure all returns via the IDT are to ring 1.
+ * If not, we load incorrect SS/ESP values from the TSS (for ring 1 rather
+ * than the correct ring) and bad things are bound to ensue -- IRET is
+ * likely to fault, and we may end up killing the domain (no harm can
+ * come to the hypervisor itself, though).
+ *      
+ * When doing a callback, we check if the return CS is in ring 0. If so,
+ * callback is delayed until next return to ring != 0.
+ * If return CS is in ring 1, then we create a callback frame
+ * starting at return SS/ESP. The base of the frame does an intra-privilege
+ * interrupt-return.
+ * If return CS is in ring > 1, we create a callback frame starting
+ * at SS/ESP taken from appropriate section of the current TSS. The base
+ * of the frame does an inter-privilege interrupt-return.
+ * 
+ * Note that the "failsafe callback" uses a special stackframe:
+ * { return_DS, return_ES, return_EIP, return_CS, return_EFLAGS, ... }
+ * That is, original values for DS/ES are placed on stack rather than
+ * in DS/ES themselves. Why? It saves us loading them, only to have them
+ * saved/restored in guest OS. Furthermore, if we load them we may cause
+ * a fault if they are invalid, which is a hassle to deal with. We avoid
+ * that problem if we don't load them :-) This property allows us to use
+ * the failsafe callback as a fallback: if we ever fault on loading DS/ES
+ * on return to ring != 0, we can simply package it up as a return via
+ * the failsafe callback, and let the guest OS sort it out (perhaps by
+ * killing an application process). Note that we also do this for any
+ * faulting IRET -- just let the guest OS handle it via the event
+ * callback.
+ *
+ * We terminate a domain in the following cases:
+ *  - creating a callback stack frame (due to bad ring-1 stack).
+ *  - faulting IRET on entry to failsafe callback handler.
+ * So, each domain must keep its ring-1 %ss/%esp and failsafe callback
+ * handler in good order (absolutely no faults allowed!).
+ */
+
+#include <xeno/config.h>
+#include <asm/smp.h>
+
+EBX            = 0x00
+ECX            = 0x04
+EDX            = 0x08
+ESI            = 0x0C
+EDI            = 0x10
+EBP            = 0x14
+EAX            = 0x18
+DS             = 0x1C
+ES             = 0x20
+ORIG_EAX       = 0x24
+EIP            = 0x28
+CS             = 0x2C
+EFLAGS         = 0x30
+OLDESP         = 0x34
+OLDSS          = 0x38
+
+/* Offsets in task_struct */
+PROCESSOR       =  0
+STATE           =  4
+HYP_EVENTS      =  8
+DOMAIN          = 12        
+SHARED_INFO     = 16
+
+/* Offsets in shared_info_t */
+EVENTS          =  0
+EVENTS_ENABLE   =  4
+EVENT_ADDR      =  8
+FAILSAFE_ADDR   = 12
+
+/* Offsets in guest_trap_bounce */
+GTB_ERROR_CODE  =  0
+GTB_CR2         =  4
+GTB_FLAGS       =  8
+GTB_CS          = 10
+GTB_EIP         = 12
+GTBF_TRAP       =  1
+GTBF_TRAP_NOCODE = 2
+GTBF_TRAP_CR2   = 4
+                        
+CF_MASK                = 0x00000001
+IF_MASK                = 0x00000200
+NT_MASK                = 0x00004000
+
+#define SAVE_ALL \
+       cld; \
+       pushl %es; \
+       pushl %ds; \
+       pushl %eax; \
+       pushl %ebp; \
+       pushl %edi; \
+       pushl %esi; \
+       pushl %edx; \
+       pushl %ecx; \
+       pushl %ebx; \
+       movl $(__HYPERVISOR_DS),%edx; \
+       movl %edx,%ds; \
+       movl %edx,%es;
+
+#define RESTORE_ALL    \
+       popl %ebx;      \
+       popl %ecx;      \
+       popl %edx;      \
+       popl %esi;      \
+       popl %edi;      \
+       popl %ebp;      \
+       popl %eax;      \
+1:     popl %ds;       \
+2:     popl %es;       \
+        addl $4,%esp;  \
+3:      iret;          \
+.section .fixup,"ax";  \
+6:      subl $4,%esp;   \
+        pushl %es;      \
+5:      pushl %ds;      \
+4:      pushl %eax;     \
+       pushl %ebp;     \
+       pushl %edi;     \
+       pushl %esi;     \
+       pushl %edx;     \
+       pushl %ecx;     \
+       pushl %ebx;     \
+       pushl %ss;           \
+       popl  %ds;           \
+       pushl %ss;           \
+       popl  %es;           \
+       jmp  failsafe_callback;      \
+.previous;                           \
+.section __ex_table,"a";             \
+       .align 4;                    \
+       .long 1b,4b;                 \
+       .long 2b,5b;                 \
+       .long 3b,6b;                 \
+.previous
+
+#define GET_CURRENT(reg)  \
+       movl $-8192, reg; \
+       andl %esp, reg
+
+ENTRY(ret_from_newdomain)
+       GET_CURRENT(%ebx)
+       jmp test_all_events
+
+        ALIGN
+restore_all:
+       RESTORE_ALL
+
+        ALIGN
+ENTRY(hypervisor_call)
+        pushl %eax                     # save orig_eax
+       SAVE_ALL
+       GET_CURRENT(%ebx)
+       andl $255,%eax
+       call *SYMBOL_NAME(hypervisor_call_table)(,%eax,4)
+       movl %eax,EAX(%esp)             # save the return value
+
+test_all_events:
+        mov  PROCESSOR(%ebx),%eax
+        shl  $4,%eax                    # sizeof(irq_cpustat) == 16
+        lea  guest_trap_bounce(%eax),%edx
+        cli                             # tests must not race interrupts
+        xorl %ecx,%ecx
+        notl %ecx
+test_softirqs:  
+        mov  PROCESSOR(%ebx),%eax
+        shl  $4,%eax                    # sizeof(irq_cpustat) == 16
+        test %ecx,SYMBOL_NAME(irq_stat)(%eax,1)
+        jnz  process_softirqs
+test_hyp_events:        
+        test %ecx, HYP_EVENTS(%ebx)
+        jnz  process_hyp_events
+test_guest_events:      
+        movl SHARED_INFO(%ebx),%eax
+        test %ecx,EVENTS(%eax)
+        jz   restore_all
+        test %ecx,EVENTS_ENABLE(%eax)
+        jz   restore_all
+        /* Prevent unnecessary reentry of event callback (stack overflow!) */
+        xorl %ecx,%ecx
+        movl %ecx,EVENTS_ENABLE(%eax)      
+/* %eax == shared_info, %ebx == task_struct, %edx == guest_trap_bounce */
+process_guest_events:   
+        movl EVENT_ADDR(%eax),%eax
+        movl %eax,GTB_EIP(%edx)
+        movw $__GUEST_CS,GTB_CS(%edx)
+        call create_bounce_frame
+        jmp  restore_all
+
+        ALIGN
+process_softirqs:       
+        push %edx
+        call SYMBOL_NAME(do_softirq)
+        pop  %edx
+        jmp  test_hyp_events
+        
+        ALIGN
+process_hyp_events:
+        sti
+        call SYMBOL_NAME(do_hyp_events)
+        jmp  test_all_events
+
+/* No special register assumptions */
+failsafe_callback:
+        GET_CURRENT(%ebx)
+        mov  PROCESSOR(%ebx),%eax
+        shl  $4,%eax
+        lea  guest_trap_bounce(%eax),%edx
+        movl SHARED_INFO(%ebx),%eax
+        movl FAILSAFE_ADDR(%eax),%eax
+        movl %eax,GTB_EIP(%edx)
+        movw $__GUEST_CS,GTB_CS(%edx)
+        call create_bounce_frame
+        subl $8,%esi                 # add DS/ES to failsafe stack frame
+        movl DS(%esp),%eax
+FAULT3: movl %eax,(%esi) 
+        movl ES(%esp),%eax
+FAULT4: movl %eax,4(%esi)
+        movl %esi,OLDESP(%esp)
+        popl %ebx
+        popl %ecx
+        popl %edx
+        popl %esi
+        popl %edi
+        popl %ebp
+        popl %eax
+        addl $12,%esp
+FAULT5: iret 
+
+        
+/* CREATE A BASIC EXCEPTION FRAME ON GUEST OS (RING-1) STACK:         */
+/*   {EIP, CS, EFLAGS, [ESP, SS]}                                     */
+/* %edx == guest_trap_bounce, %ebx == task_struct                     */
+/* %eax,%ecx are clobbered. %ds:%esi contain new OLDSS/OLDESP.        */
+create_bounce_frame:        
+        mov  CS+4(%esp),%cl
+        test $2,%cl
+        jz   1f /* jump if returning to an existing ring-1 activation */
+        /* obtain ss/esp from TSS -- no current ring-1 activations */
+        movl PROCESSOR(%ebx),%eax
+        shll $8,%eax /* multiply by 256 */
+        addl $init_tss + 12,%eax
+        movl (%eax),%esi /* tss->esp1 */
+FAULT6: movl 4(%eax),%ds /* tss->ss1  */
+        /* base of stack frame must contain ss/esp (inter-priv iret) */
+        subl $8,%esi
+        movl OLDESP+4(%esp),%eax
+FAULT7: movl %eax,(%esi) 
+        movl OLDSS+4(%esp),%eax
+FAULT8: movl %eax,4(%esi) 
+        jmp 2f
+1:      /* obtain ss/esp from oldss/oldesp -- a ring-1 activation exists */
+        movl OLDESP+4(%esp),%esi
+FAULT9: movl OLDSS+4(%esp),%ds 
+2:      /* Construct a stack frame: EFLAGS, CS/EIP */
+        subl $12,%esi
+        movl EIP+4(%esp),%eax
+FAULT10:movl %eax,(%esi) 
+        movl CS+4(%esp),%eax
+FAULT11:movl %eax,4(%esi) 
+        movl EFLAGS+4(%esp),%eax
+FAULT12:movl %eax,8(%esi)
+        /* Rewrite our stack frame and return to ring 1. */
+        movl %ds,OLDSS+4(%esp)
+        movl %esi,OLDESP+4(%esp)
+        movzwl %es:GTB_CS(%edx),%eax
+        movl %eax,CS+4(%esp)
+        movl %es:GTB_EIP(%edx),%eax
+        movl %eax,EIP+4(%esp)
+        ret
+        
+                              
+.section __ex_table,"a"
+        .align 4
+        .long FAULT1, kill_domain_fixup3 # Fault writing to ring-1 stack
+        .long FAULT2, kill_domain_fixup3 # Fault writing to ring-1 stack
+        .long FAULT3, kill_domain_fixup3 # Fault writing to ring-1 stack
+        .long FAULT4, kill_domain_fixup3 # Fault writing to ring-1 stack
+        .long FAULT5, kill_domain_fixup1 # Fault executing failsafe iret
+        .long FAULT6, kill_domain_fixup2 # Fault loading ring-1 stack selector
+        .long FAULT7, kill_domain_fixup2 # Fault writing to ring-1 stack
+        .long FAULT8, kill_domain_fixup2 # Fault writing to ring-1 stack
+        .long FAULT9, kill_domain_fixup2 # Fault loading ring-1 stack selector
+        .long FAULT10,kill_domain_fixup2 # Fault writing to ring-1 stack
+        .long FAULT11,kill_domain_fixup2 # Fault writing to ring-1 stack
+        .long FAULT12,kill_domain_fixup2 # Fault writing to ring-1 stack
+.previous
+               
+# This handler kills domains which experience unrecoverable faults.
+.section .fixup,"ax"
+kill_domain_fixup1:
+        subl  $4,%esp
+        SAVE_ALL
+        jmp   kill_domain
+kill_domain_fixup2:
+        addl  $4,%esp                     
+kill_domain_fixup3:
+        pushl %ss
+        popl  %ds
+        jmp   kill_domain
+.previous
+
+        ALIGN
+process_guest_exception_and_events:        
+        mov  PROCESSOR(%ebx),%eax
+        shl  $4,%eax                    # sizeof(irq_cpustat) == 16
+        lea  guest_trap_bounce(%eax),%edx
+        testb $~0,GTB_FLAGS(%edx)
+        jz   test_all_events
+        call create_bounce_frame        # just the basic frame
+        mov  %es:GTB_FLAGS(%edx),%cl
+        test $GTBF_TRAP_NOCODE,%cl
+        jnz  2f
+        subl $4,%esi                    # push error_code onto guest frame
+        movl %es:GTB_ERROR_CODE(%edx),%eax
+FAULT1: movl %eax,(%esi)
+        test $GTBF_TRAP_CR2,%cl
+        jz   1f
+        subl $4,%esi                    # push %cr2 onto guest frame
+        movl %es:GTB_CR2(%edx),%eax
+FAULT2: movl %eax,(%esi)
+1:      movl %esi,OLDESP(%esp)        
+2:      push %es                        # unclobber %ds
+        pop  %ds 
+        movb $0,GTB_FLAGS(%edx)
+        jmp  test_all_events
+
+        ALIGN
+ENTRY(ret_from_intr)
+       GET_CURRENT(%ebx)
+        movb CS(%esp),%al
+       testb $3,%al    # return to non-supervisor?
+       jne test_all_events
+       jmp restore_all
+
+        ALIGN
+ret_from_exception:
+        movb CS(%esp),%al
+       testb $3,%al    # return to non-supervisor?
+       jne process_guest_exception_and_events
+        jmp restore_all
+
+       ALIGN
+
+ENTRY(divide_error)
+       pushl $0                # no error code
+       pushl $ SYMBOL_NAME(do_divide_error)
+       ALIGN
+error_code:
+       pushl %ds
+       pushl %eax
+       xorl %eax,%eax
+       pushl %ebp
+       pushl %edi
+       pushl %esi
+       pushl %edx
+       decl %eax                       # eax = -1
+       pushl %ecx
+       pushl %ebx
+       cld
+       movl %es,%ecx
+       movl ORIG_EAX(%esp), %esi       # get the error code
+       movl ES(%esp), %edi             # get the function address
+       movl %eax, ORIG_EAX(%esp)
+       movl %ecx, ES(%esp)
+       movl %esp,%edx
+       pushl %esi                      # push the error code
+       pushl %edx                      # push the pt_regs pointer
+       movl $(__HYPERVISOR_DS),%edx
+       movl %edx,%ds
+       movl %edx,%es
+       GET_CURRENT(%ebx)
+       call *%edi
+       addl $8,%esp
+       jmp ret_from_exception
+
+ENTRY(coprocessor_error)
+       pushl $0
+       pushl $ SYMBOL_NAME(do_coprocessor_error)
+       jmp error_code
+
+ENTRY(simd_coprocessor_error)
+       pushl $0
+       pushl $ SYMBOL_NAME(do_simd_coprocessor_error)
+       jmp error_code
+
+ENTRY(device_not_available)
+       pushl $0
+        pushl $SYMBOL_NAME(math_state_restore)
+        jmp   error_code
+
+ENTRY(debug)
+       pushl $0
+       pushl $ SYMBOL_NAME(do_debug)
+       jmp error_code
+
+ENTRY(nmi)
+       pushl %eax
+       SAVE_ALL
+       movl %esp,%edx
+       pushl $0
+       pushl %edx
+       call SYMBOL_NAME(do_nmi)
+       addl $8,%esp
+       RESTORE_ALL
+
+ENTRY(int3)
+       pushl $0
+       pushl $ SYMBOL_NAME(do_int3)
+       jmp error_code
+
+ENTRY(overflow)
+       pushl $0
+       pushl $ SYMBOL_NAME(do_overflow)
+       jmp error_code
+
+ENTRY(bounds)
+       pushl $0
+       pushl $ SYMBOL_NAME(do_bounds)
+       jmp error_code
+
+ENTRY(invalid_op)
+       pushl $0
+       pushl $ SYMBOL_NAME(do_invalid_op)
+       jmp error_code
+
+ENTRY(coprocessor_segment_overrun)
+       pushl $0
+       pushl $ SYMBOL_NAME(do_coprocessor_segment_overrun)
+       jmp error_code
+
+ENTRY(double_fault)
+       pushl $ SYMBOL_NAME(do_double_fault)
+       jmp error_code
+
+ENTRY(invalid_TSS)
+       pushl $ SYMBOL_NAME(do_invalid_TSS)
+       jmp error_code
+
+ENTRY(segment_not_present)
+       pushl $ SYMBOL_NAME(do_segment_not_present)
+       jmp error_code
+
+ENTRY(stack_segment)
+       pushl $ SYMBOL_NAME(do_stack_segment)
+       jmp error_code
+
+ENTRY(general_protection)
+       pushl $ SYMBOL_NAME(do_general_protection)
+       jmp error_code
+
+ENTRY(alignment_check)
+       pushl $ SYMBOL_NAME(do_alignment_check)
+       jmp error_code
+
+ENTRY(page_fault)
+       pushl $ SYMBOL_NAME(do_page_fault)
+       jmp error_code
+
+ENTRY(machine_check)
+       pushl $0
+       pushl $ SYMBOL_NAME(do_machine_check)
+       jmp error_code
+
+ENTRY(spurious_interrupt_bug)
+       pushl $0
+       pushl $ SYMBOL_NAME(do_spurious_interrupt_bug)
+       jmp error_code
+
+.data
+ENTRY(hypervisor_call_table)
+        .long SYMBOL_NAME(do_set_trap_table)
+        .long SYMBOL_NAME(do_process_page_updates)
+        .long SYMBOL_NAME(do_console_write)
+        .long SYMBOL_NAME(do_set_pagetable)
+        .long SYMBOL_NAME(do_set_guest_stack)
+        .long SYMBOL_NAME(do_net_update)
+        .long SYMBOL_NAME(do_fpu_taskswitch)
+        .long SYMBOL_NAME(do_yield)
+        .long SYMBOL_NAME(kill_domain)
+        .long SYMBOL_NAME(do_dom0_op)
+        .long SYMBOL_NAME(do_network_op)
+        .rept NR_syscalls-(.-hypervisor_call_table)/4
+        .long SYMBOL_NAME(sys_ni_syscall)
+       .endr
diff --git a/xen-2.4.16/arch/i386/extable.c b/xen-2.4.16/arch/i386/extable.c
new file mode 100644 (file)
index 0000000..4cd9f06
--- /dev/null
@@ -0,0 +1,62 @@
+/*
+ * linux/arch/i386/mm/extable.c
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <asm/uaccess.h>
+
+extern const struct exception_table_entry __start___ex_table[];
+extern const struct exception_table_entry __stop___ex_table[];
+
+static inline unsigned long
+search_one_table(const struct exception_table_entry *first,
+                const struct exception_table_entry *last,
+                unsigned long value)
+{
+        while (first <= last) {
+               const struct exception_table_entry *mid;
+               long diff;
+
+               mid = (last - first) / 2 + first;
+               diff = mid->insn - value;
+                if (diff == 0)
+                        return mid->fixup;
+                else if (diff < 0)
+                        first = mid+1;
+                else
+                        last = mid-1;
+        }
+        return 0;
+}
+
+extern spinlock_t modlist_lock;
+
+unsigned long
+search_exception_table(unsigned long addr)
+{
+       unsigned long ret = 0;
+       
+#ifndef CONFIG_MODULES
+       /* There is only the kernel to search.  */
+       ret = search_one_table(__start___ex_table, __stop___ex_table-1, addr);
+       return ret;
+#else
+       unsigned long flags;
+       /* The kernel is the last "module" -- no need to treat it special.  */
+       struct module *mp;
+
+       spin_lock_irqsave(&modlist_lock, flags);
+       for (mp = module_list; mp != NULL; mp = mp->next) {
+               if (mp->ex_table_start == NULL || !(mp->flags&(MOD_RUNNING|MOD_INITIALIZING)))
+                       continue;
+               ret = search_one_table(mp->ex_table_start,
+                                      mp->ex_table_end - 1, addr);
+               if (ret)
+                       break;
+       }
+       spin_unlock_irqrestore(&modlist_lock, flags);
+       return ret;
+#endif
+}
diff --git a/xen-2.4.16/arch/i386/i387.c b/xen-2.4.16/arch/i386/i387.c
new file mode 100644 (file)
index 0000000..dc94cc1
--- /dev/null
@@ -0,0 +1,50 @@
+/*
+ *  linux/arch/i386/kernel/i387.c
+ *
+ *  Copyright (C) 1994 Linus Torvalds
+ *
+ *  Pentium III FXSR, SSE support
+ *  General FPU state handling cleanups
+ *     Gareth Hughes <gareth@valinux.com>, May 2000
+ */
+
+#include <xeno/config.h>
+#include <xeno/sched.h>
+#include <asm/processor.h>
+#include <asm/i387.h>
+
+void init_fpu(void)
+{
+    __asm__("fninit");
+    if ( cpu_has_xmm ) load_mxcsr(0x1f80);
+    current->flags |= PF_DONEFPUINIT;
+}
+
+static inline void __save_init_fpu( struct task_struct *tsk )
+{
+       if ( cpu_has_fxsr ) {
+               asm volatile( "fxsave %0 ; fnclex"
+                             : "=m" (tsk->thread.i387.fxsave) );
+       } else {
+               asm volatile( "fnsave %0 ; fwait"
+                             : "=m" (tsk->thread.i387.fsave) );
+       }
+       tsk->flags &= ~PF_USEDFPU;
+}
+
+void save_init_fpu( struct task_struct *tsk )
+{
+       __save_init_fpu(tsk);
+       stts();
+}
+
+void restore_fpu( struct task_struct *tsk )
+{
+    if ( cpu_has_fxsr ) {
+        asm volatile( "fxrstor %0"
+                      : : "m" (tsk->thread.i387.fxsave) );
+    } else {
+        asm volatile( "frstor %0"
+                      : : "m" (tsk->thread.i387.fsave) );
+    }
+}
diff --git a/xen-2.4.16/arch/i386/i8259.c b/xen-2.4.16/arch/i386/i8259.c
new file mode 100644 (file)
index 0000000..c9d4dfe
--- /dev/null
@@ -0,0 +1,469 @@
+/******************************************************************************
+ * i8259.c
+ * 
+ * Well, this is required for SMP systems as well, as it build interrupt
+ * tables for IO APICS as well as uniprocessor 8259-alikes.
+ */
+
+#include <xeno/config.h>
+#include <xeno/init.h>
+#include <asm/ptrace.h>
+#include <xeno/errno.h>
+#include <xeno/sched.h>
+#include <xeno/interrupt.h>
+#include <xeno/irq.h>
+
+#include <asm/atomic.h>
+#include <asm/system.h>
+#include <asm/io.h>
+#include <asm/desc.h>
+#include <asm/bitops.h>
+#include <xeno/delay.h>
+#include <asm/apic.h>
+
+
+/*
+ * Common place to define all x86 IRQ vectors
+ *
+ * This builds up the IRQ handler stubs using some ugly macros in irq.h
+ *
+ * These macros create the low-level assembly IRQ routines that save
+ * register context and call do_IRQ(). do_IRQ() then does all the
+ * operations that are needed to keep the AT (or SMP IOAPIC)
+ * interrupt-controller happy.
+ */
+
+BUILD_COMMON_IRQ()
+
+#define BI(x,y) \
+       BUILD_IRQ(x##y)
+
+#define BUILD_16_IRQS(x) \
+       BI(x,0) BI(x,1) BI(x,2) BI(x,3) \
+       BI(x,4) BI(x,5) BI(x,6) BI(x,7) \
+       BI(x,8) BI(x,9) BI(x,a) BI(x,b) \
+       BI(x,c) BI(x,d) BI(x,e) BI(x,f)
+
+/*
+ * ISA PIC or low IO-APIC triggered (INTA-cycle or APIC) interrupts:
+ * (these are usually mapped to vectors 0x20-0x2f)
+ */
+    BUILD_16_IRQS(0x0)
+
+#ifdef CONFIG_X86_IO_APIC
+/*
+ * The IO-APIC gives us many more interrupt sources. Most of these 
+ * are unused but an SMP system is supposed to have enough memory ...
+ * sometimes (mostly wrt. hw bugs) we get corrupted vectors all
+ * across the spectrum, so we really want to be prepared to get all
+ * of these. Plus, more powerful systems might have more than 64
+ * IO-APIC registers.
+ *
+ * (these are usually mapped into the 0x30-0xff vector range)
+ */
+    BUILD_16_IRQS(0x1) BUILD_16_IRQS(0x2) BUILD_16_IRQS(0x3)
+    BUILD_16_IRQS(0x4) BUILD_16_IRQS(0x5) BUILD_16_IRQS(0x6) BUILD_16_IRQS(0x7)
+    BUILD_16_IRQS(0x8) BUILD_16_IRQS(0x9) BUILD_16_IRQS(0xa) BUILD_16_IRQS(0xb)
+    BUILD_16_IRQS(0xc) BUILD_16_IRQS(0xd)
+#endif
+
+#undef BUILD_16_IRQS
+#undef BI
+
+
+/*
+ * The following vectors are part of the Linux architecture, there
+ * is no hardware IRQ pin equivalent for them, they are triggered
+ * through the ICC by us (IPIs)
+ */
+#ifdef CONFIG_SMP
+    BUILD_SMP_INTERRUPT(event_check_interrupt,EVENT_CHECK_VECTOR)
+    BUILD_SMP_INTERRUPT(invalidate_interrupt,INVALIDATE_TLB_VECTOR)
+    BUILD_SMP_INTERRUPT(call_function_interrupt,CALL_FUNCTION_VECTOR)
+#endif
+
+/*
+ * every pentium local APIC has two 'local interrupts', with a
+ * soft-definable vector attached to both interrupts, one of
+ * which is a timer interrupt, the other one is error counter
+ * overflow. Linux uses the local APIC timer interrupt to get
+ * a much simpler SMP time architecture:
+ */
+#ifdef CONFIG_X86_LOCAL_APIC
+    BUILD_SMP_TIMER_INTERRUPT(apic_timer_interrupt,LOCAL_TIMER_VECTOR)
+    BUILD_SMP_INTERRUPT(error_interrupt,ERROR_APIC_VECTOR)
+    BUILD_SMP_INTERRUPT(spurious_interrupt,SPURIOUS_APIC_VECTOR)
+#endif
+
+#define IRQ(x,y) \
+       IRQ##x##y##_interrupt
+
+#define IRQLIST_16(x) \
+       IRQ(x,0), IRQ(x,1), IRQ(x,2), IRQ(x,3), \
+       IRQ(x,4), IRQ(x,5), IRQ(x,6), IRQ(x,7), \
+       IRQ(x,8), IRQ(x,9), IRQ(x,a), IRQ(x,b), \
+       IRQ(x,c), IRQ(x,d), IRQ(x,e), IRQ(x,f)
+
+    void (*interrupt[NR_IRQS])(void) = {
+       IRQLIST_16(0x0),
+
+#ifdef CONFIG_X86_IO_APIC
+        IRQLIST_16(0x1), IRQLIST_16(0x2), IRQLIST_16(0x3),
+       IRQLIST_16(0x4), IRQLIST_16(0x5), IRQLIST_16(0x6), IRQLIST_16(0x7),
+       IRQLIST_16(0x8), IRQLIST_16(0x9), IRQLIST_16(0xa), IRQLIST_16(0xb),
+       IRQLIST_16(0xc), IRQLIST_16(0xd)
+#endif
+    };
+
+#undef IRQ
+#undef IRQLIST_16
+
+/*
+ * This is the 'legacy' 8259A Programmable Interrupt Controller,
+ * present in the majority of PC/AT boxes.
+ * plus some generic x86 specific things if generic specifics makes
+ * any sense at all.
+ * this file should become arch/i386/kernel/irq.c when the old irq.c
+ * moves to arch independent land
+ */
+
+spinlock_t i8259A_lock = SPIN_LOCK_UNLOCKED;
+
+static void end_8259A_irq (unsigned int irq)
+{
+    if (!(irq_desc[irq].status & (IRQ_DISABLED|IRQ_INPROGRESS)))
+        enable_8259A_irq(irq);
+}
+
+#define shutdown_8259A_irq     disable_8259A_irq
+
+void mask_and_ack_8259A(unsigned int);
+
+static unsigned int startup_8259A_irq(unsigned int irq)
+{ 
+    enable_8259A_irq(irq);
+    return 0; /* never anything pending */
+}
+
+static struct hw_interrupt_type i8259A_irq_type = {
+    "XT-PIC",
+    startup_8259A_irq,
+    shutdown_8259A_irq,
+    enable_8259A_irq,
+    disable_8259A_irq,
+    mask_and_ack_8259A,
+    end_8259A_irq,
+    NULL
+};
+
+/*
+ * 8259A PIC functions to handle ISA devices:
+ */
+
+/*
+ * This contains the irq mask for both 8259A irq controllers,
+ */
+static unsigned int cached_irq_mask = 0xffff;
+
+#define __byte(x,y)    (((unsigned char *)&(y))[x])
+#define cached_21      (__byte(0,cached_irq_mask))
+#define cached_A1      (__byte(1,cached_irq_mask))
+
+/*
+ * Not all IRQs can be routed through the IO-APIC, eg. on certain (older)
+ * boards the timer interrupt is not really connected to any IO-APIC pin,
+ * it's fed to the master 8259A's IR0 line only.
+ *
+ * Any '1' bit in this mask means the IRQ is routed through the IO-APIC.
+ * this 'mixed mode' IRQ handling costs nothing because it's only used
+ * at IRQ setup time.
+ */
+unsigned long io_apic_irqs;
+
+void disable_8259A_irq(unsigned int irq)
+{
+    unsigned int mask = 1 << irq;
+    unsigned long flags;
+
+    spin_lock_irqsave(&i8259A_lock, flags);
+    cached_irq_mask |= mask;
+    if (irq & 8)
+        outb(cached_A1,0xA1);
+    else
+        outb(cached_21,0x21);
+    spin_unlock_irqrestore(&i8259A_lock, flags);
+}
+
+void enable_8259A_irq(unsigned int irq)
+{
+    unsigned int mask = ~(1 << irq);
+    unsigned long flags;
+
+    spin_lock_irqsave(&i8259A_lock, flags);
+    cached_irq_mask &= mask;
+    if (irq & 8)
+        outb(cached_A1,0xA1);
+    else
+        outb(cached_21,0x21);
+    spin_unlock_irqrestore(&i8259A_lock, flags);
+}
+
+int i8259A_irq_pending(unsigned int irq)
+{
+    unsigned int mask = 1<<irq;
+    unsigned long flags;
+    int ret;
+
+    spin_lock_irqsave(&i8259A_lock, flags);
+    if (irq < 8)
+        ret = inb(0x20) & mask;
+    else
+        ret = inb(0xA0) & (mask >> 8);
+    spin_unlock_irqrestore(&i8259A_lock, flags);
+
+    return ret;
+}
+
+void make_8259A_irq(unsigned int irq)
+{
+    disable_irq_nosync(irq);
+    io_apic_irqs &= ~(1<<irq);
+    irq_desc[irq].handler = &i8259A_irq_type;
+    enable_irq(irq);
+}
+
+/*
+ * This function assumes to be called rarely. Switching between
+ * 8259A registers is slow.
+ * This has to be protected by the irq controller spinlock
+ * before being called.
+ */
+static inline int i8259A_irq_real(unsigned int irq)
+{
+    int value;
+    int irqmask = 1<<irq;
+
+    if (irq < 8) {
+        outb(0x0B,0x20);               /* ISR register */
+        value = inb(0x20) & irqmask;
+        outb(0x0A,0x20);               /* back to the IRR register */
+        return value;
+    }
+    outb(0x0B,0xA0);           /* ISR register */
+    value = inb(0xA0) & (irqmask >> 8);
+    outb(0x0A,0xA0);           /* back to the IRR register */
+    return value;
+}
+
+/*
+ * Careful! The 8259A is a fragile beast, it pretty
+ * much _has_ to be done exactly like this (mask it
+ * first, _then_ send the EOI, and the order of EOI
+ * to the two 8259s is important!
+ */
+void mask_and_ack_8259A(unsigned int irq)
+{
+    unsigned int irqmask = 1 << irq;
+    unsigned long flags;
+
+    spin_lock_irqsave(&i8259A_lock, flags);
+    /*
+     * Lightweight spurious IRQ detection. We do not want
+     * to overdo spurious IRQ handling - it's usually a sign
+     * of hardware problems, so we only do the checks we can
+     * do without slowing down good hardware unnecesserily.
+     *
+     * Note that IRQ7 and IRQ15 (the two spurious IRQs
+     * usually resulting from the 8259A-1|2 PICs) occur
+     * even if the IRQ is masked in the 8259A. Thus we
+     * can check spurious 8259A IRQs without doing the
+     * quite slow i8259A_irq_real() call for every IRQ.
+     * This does not cover 100% of spurious interrupts,
+     * but should be enough to warn the user that there
+     * is something bad going on ...
+     */
+    if (cached_irq_mask & irqmask)
+        goto spurious_8259A_irq;
+    cached_irq_mask |= irqmask;
+
+ handle_real_irq:
+    if (irq & 8) {
+        inb(0xA1);             /* DUMMY - (do we need this?) */
+        outb(cached_A1,0xA1);
+        outb(0x60+(irq&7),0xA0);/* 'Specific EOI' to slave */
+        outb(0x62,0x20);       /* 'Specific EOI' to master-IRQ2 */
+    } else {
+        inb(0x21);             /* DUMMY - (do we need this?) */
+        outb(cached_21,0x21);
+        outb(0x60+irq,0x20);   /* 'Specific EOI' to master */
+    }
+    spin_unlock_irqrestore(&i8259A_lock, flags);
+    return;
+
+ spurious_8259A_irq:
+    /*
+     * this is the slow path - should happen rarely.
+     */
+    if (i8259A_irq_real(irq))
+        /*
+         * oops, the IRQ _is_ in service according to the
+         * 8259A - not spurious, go handle it.
+         */
+        goto handle_real_irq;
+
+    {
+        static int spurious_irq_mask;
+        /*
+         * At this point we can be sure the IRQ is spurious,
+         * lets ACK and report it. [once per IRQ]
+         */
+        if (!(spurious_irq_mask & irqmask)) {
+            printk("spurious 8259A interrupt: IRQ%d.\n", irq);
+            spurious_irq_mask |= irqmask;
+        }
+        atomic_inc(&irq_err_count);
+        /*
+         * Theoretically we do not have to handle this IRQ,
+         * but in Linux this does not cause problems and is
+         * simpler for us.
+         */
+        goto handle_real_irq;
+    }
+}
+
+void __init init_8259A(int auto_eoi)
+{
+    unsigned long flags;
+
+    spin_lock_irqsave(&i8259A_lock, flags);
+
+    outb(0xff, 0x21);  /* mask all of 8259A-1 */
+    outb(0xff, 0xA1);  /* mask all of 8259A-2 */
+
+    /*
+     * outb_p - this has to work on a wide range of PC hardware.
+     */
+    outb_p(0x11, 0x20);        /* ICW1: select 8259A-1 init */
+    outb_p(0x20 + 0, 0x21);    /* ICW2: 8259A-1 IR0-7 mapped to 0x20-0x27 */
+    outb_p(0x04, 0x21);        /* 8259A-1 (the master) has a slave on IR2 */
+    if (auto_eoi)
+        outb_p(0x03, 0x21);    /* master does Auto EOI */
+    else
+        outb_p(0x01, 0x21);    /* master expects normal EOI */
+
+    outb_p(0x11, 0xA0);        /* ICW1: select 8259A-2 init */
+    outb_p(0x20 + 8, 0xA1);    /* ICW2: 8259A-2 IR0-7 mapped to 0x28-0x2f */
+    outb_p(0x02, 0xA1);        /* 8259A-2 is a slave on master's IR2 */
+    outb_p(0x01, 0xA1);        /* (slave's support for AEOI in flat mode
+                           is to be investigated) */
+
+    if (auto_eoi)
+        /*
+         * in AEOI mode we just have to mask the interrupt
+         * when acking.
+         */
+        i8259A_irq_type.ack = disable_8259A_irq;
+    else
+        i8259A_irq_type.ack = mask_and_ack_8259A;
+
+    udelay(100);               /* wait for 8259A to initialize */
+
+    outb(cached_21, 0x21);     /* restore master IRQ mask */
+    outb(cached_A1, 0xA1);     /* restore slave IRQ mask */
+
+    spin_unlock_irqrestore(&i8259A_lock, flags);
+}
+
+
+/*
+ * IRQ2 is cascade interrupt to second interrupt controller
+ */
+
+static struct irqaction irq2 = { no_action, 0, 0, "cascade", NULL, NULL};
+
+void __init init_ISA_irqs (void)
+{
+    int i;
+
+#ifdef CONFIG_X86_LOCAL_APIC
+    init_bsp_APIC();
+#endif
+    init_8259A(0);
+
+    for (i = 0; i < NR_IRQS; i++) {
+        irq_desc[i].status = IRQ_DISABLED;
+        irq_desc[i].action = 0;
+        irq_desc[i].depth = 1;
+
+        if (i < 16) {
+            /*
+             * 16 old-style INTA-cycle interrupts:
+             */
+            irq_desc[i].handler = &i8259A_irq_type;
+        } else {
+            /*
+             * 'high' PCI IRQs filled in on demand
+             */
+            irq_desc[i].handler = &no_irq_type;
+        }
+    }
+}
+
+void __init init_IRQ(void)
+{
+    int i;
+
+    init_ISA_irqs();
+
+    /*
+     * Cover the whole vector space, no vector can escape
+     * us. (some of these will be overridden and become
+     * 'special' SMP interrupts)
+     */
+    for (i = 0; i < NR_IRQS; i++) {
+        int vector = FIRST_EXTERNAL_VECTOR + i;
+        if (vector != HYPERVISOR_CALL_VECTOR) 
+            set_intr_gate(vector, interrupt[i]);
+    }
+
+#ifdef CONFIG_SMP
+    /*
+     * IRQ0 must be given a fixed assignment and initialized,
+     * because it's used before the IO-APIC is set up.
+     */
+    set_intr_gate(FIRST_DEVICE_VECTOR, interrupt[0]);
+
+    /*
+     * The reschedule interrupt is a CPU-to-CPU reschedule-helper
+     * IPI, driven by wakeup.
+     */
+    set_intr_gate(EVENT_CHECK_VECTOR, event_check_interrupt);
+
+    /* IPI for invalidation */
+    set_intr_gate(INVALIDATE_TLB_VECTOR, invalidate_interrupt);
+
+    /* IPI for generic function call */
+    set_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
+#endif 
+
+#ifdef CONFIG_X86_LOCAL_APIC
+    /* self generated IPI for local APIC timer */
+    set_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt);
+
+    /* IPI vectors for APIC spurious and error interrupts */
+    set_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
+    set_intr_gate(ERROR_APIC_VECTOR, error_interrupt);
+#endif
+
+    /*
+     * Set the clock to HZ Hz, we already have a valid
+     * vector now:
+     */
+#define CLOCK_TICK_RATE 1193180 /* crystal freq (Hz) */
+#define LATCH (((CLOCK_TICK_RATE)+(HZ/2))/HZ)
+    outb_p(0x34,0x43);         /* binary, mode 2, LSB/MSB, ch 0 */
+    outb_p(LATCH & 0xff , 0x40);       /* LSB */
+    outb(LATCH >> 8 , 0x40);   /* MSB */
+
+    setup_irq(2, &irq2);
+}
diff --git a/xen-2.4.16/arch/i386/idle0_task.c b/xen-2.4.16/arch/i386/idle0_task.c
new file mode 100644 (file)
index 0000000..0d2b9e4
--- /dev/null
@@ -0,0 +1,20 @@
+#include <xeno/config.h>
+#include <xeno/sched.h>
+#include <asm/desc.h>
+
+/*
+ * Initial task structure. XXX KAF: To get this 8192-byte aligned without
+ * linker tricks I copy it into aligned BSS area at boot time.
+ * Actual name idle0_task_union now declared in boot.S.
+ */
+struct task_struct first_task_struct = IDLE0_TASK(idle0_task_union.task);
+
+/*
+ * per-CPU TSS segments. Threads are completely 'soft' on Linux,
+ * no more per-task TSS's. The TSS size is kept cacheline-aligned
+ * so they are allowed to end up in the .data.cacheline_aligned
+ * section. Since TSS's are completely CPU-local, we want them
+ * on exact cacheline boundaries, to eliminate cacheline ping-pong.
+ */ 
+struct tss_struct init_tss[NR_CPUS] __cacheline_aligned = { [0 ... NR_CPUS-1] = INIT_TSS };
+
diff --git a/xen-2.4.16/arch/i386/io_apic.c b/xen-2.4.16/arch/i386/io_apic.c
new file mode 100644 (file)
index 0000000..190e6be
--- /dev/null
@@ -0,0 +1,1487 @@
+/*
+ *     Intel IO-APIC support for multi-Pentium hosts.
+ *
+ *     Copyright (C) 1997, 1998, 1999, 2000 Ingo Molnar, Hajnalka Szabo
+ *
+ *     Many thanks to Stig Venaas for trying out countless experimental
+ *     patches and reporting/debugging problems patiently!
+ *
+ *     (c) 1999, Multiple IO-APIC support, developed by
+ *     Ken-ichi Yaku <yaku@css1.kbnes.nec.co.jp> and
+ *      Hidemi Kishimoto <kisimoto@css1.kbnes.nec.co.jp>,
+ *     further tested and cleaned up by Zach Brown <zab@redhat.com>
+ *     and Ingo Molnar <mingo@redhat.com>
+ *
+ *     Fixes
+ *     Maciej W. Rozycki       :       Bits for genuine 82489DX APICs;
+ *                                     thanks to Eric Gilmore
+ *                                     and Rolf G. Tews
+ *                                     for testing these extensively
+ */
+
+#include <xeno/config.h>
+#include <xeno/init.h>
+#include <xeno/interrupt.h>
+#include <xeno/irq.h>
+#include <xeno/delay.h>
+#include <xeno/sched.h>
+#include <xeno/config.h>
+#include <asm/mc146818rtc.h>
+#include <asm/io.h>
+#include <asm/desc.h>
+#include <asm/smp.h>
+
+static spinlock_t ioapic_lock = SPIN_LOCK_UNLOCKED;
+
+/*
+ * # of IRQ routing registers
+ */
+int nr_ioapic_registers[MAX_IO_APICS];
+
+/*
+ * Rough estimation of how many shared IRQs there are, can
+ * be changed anytime.
+ */
+#define MAX_PLUS_SHARED_IRQS NR_IRQS
+#define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS)
+
+/*
+ * This is performance-critical, we want to do it O(1)
+ * the indexing order of this array favors 1:1 mappings
+ * between pins and IRQs.
+ */
+
+static struct irq_pin_list {
+       int apic, pin, next;
+} irq_2_pin[PIN_MAP_SIZE];
+
+/*
+ * The common case is 1:1 IRQ<->pin mappings. Sometimes there are
+ * shared ISA-space IRQs, so we have to support them. We are super
+ * fast in the common case, and fast for shared ISA-space IRQs.
+ */
+static void add_pin_to_irq(unsigned int irq, int apic, int pin)
+{
+       static int first_free_entry = NR_IRQS;
+       struct irq_pin_list *entry = irq_2_pin + irq;
+
+       while (entry->next)
+               entry = irq_2_pin + entry->next;
+
+       if (entry->pin != -1) {
+               entry->next = first_free_entry;
+               entry = irq_2_pin + entry->next;
+               if (++first_free_entry >= PIN_MAP_SIZE)
+                       panic("io_apic.c: whoops");
+       }
+       entry->apic = apic;
+       entry->pin = pin;
+}
+
+#define __DO_ACTION(R, ACTION, FINAL)                                  \
+                                                                       \
+{                                                                      \
+       int pin;                                                        \
+       struct irq_pin_list *entry = irq_2_pin + irq;                   \
+                                                                       \
+       for (;;) {                                                      \
+               unsigned int reg;                                       \
+               pin = entry->pin;                                       \
+               if (pin == -1)                                          \
+                       break;                                          \
+               reg = io_apic_read(entry->apic, 0x10 + R + pin*2);      \
+               reg ACTION;                                             \
+               io_apic_modify(entry->apic, reg);                       \
+               if (!entry->next)                                       \
+                       break;                                          \
+               entry = irq_2_pin + entry->next;                        \
+       }                                                               \
+       FINAL;                                                          \
+}
+
+#define DO_ACTION(name,R,ACTION, FINAL)                                        \
+                                                                       \
+       static void name##_IO_APIC_irq (unsigned int irq)               \
+       __DO_ACTION(R, ACTION, FINAL)
+
+DO_ACTION( __mask,             0, |= 0x00010000, io_apic_sync(entry->apic) )
+                                               /* mask = 1 */
+DO_ACTION( __unmask,           0, &= 0xfffeffff, )
+                                               /* mask = 0 */
+DO_ACTION( __mask_and_edge,    0, = (reg & 0xffff7fff) | 0x00010000, )
+                                               /* mask = 1, trigger = 0 */
+DO_ACTION( __unmask_and_level, 0, = (reg & 0xfffeffff) | 0x00008000, )
+                                               /* mask = 0, trigger = 1 */
+
+static void mask_IO_APIC_irq (unsigned int irq)
+{
+       unsigned long flags;
+
+       spin_lock_irqsave(&ioapic_lock, flags);
+       __mask_IO_APIC_irq(irq);
+       spin_unlock_irqrestore(&ioapic_lock, flags);
+}
+
+static void unmask_IO_APIC_irq (unsigned int irq)
+{
+       unsigned long flags;
+
+       spin_lock_irqsave(&ioapic_lock, flags);
+       __unmask_IO_APIC_irq(irq);
+       spin_unlock_irqrestore(&ioapic_lock, flags);
+}
+
+void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
+{
+       struct IO_APIC_route_entry entry;
+       unsigned long flags;
+
+       /*
+        * Disable it in the IO-APIC irq-routing table:
+        */
+       memset(&entry, 0, sizeof(entry));
+       entry.mask = 1;
+       spin_lock_irqsave(&ioapic_lock, flags);
+       io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry) + 0));
+       io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry) + 1));
+       spin_unlock_irqrestore(&ioapic_lock, flags);
+}
+
+static void clear_IO_APIC (void)
+{
+       int apic, pin;
+
+       for (apic = 0; apic < nr_ioapics; apic++)
+               for (pin = 0; pin < nr_ioapic_registers[apic]; pin++)
+                       clear_IO_APIC_pin(apic, pin);
+}
+
+/*
+ * Find the IRQ entry number of a certain pin.
+ */
+static int __init find_irq_entry(int apic, int pin, int type)
+{
+       int i;
+
+       for (i = 0; i < mp_irq_entries; i++)
+               if (mp_irqs[i].mpc_irqtype == type &&
+                   (mp_irqs[i].mpc_dstapic == mp_ioapics[apic].mpc_apicid ||
+                    mp_irqs[i].mpc_dstapic == MP_APIC_ALL) &&
+                   mp_irqs[i].mpc_dstirq == pin)
+                       return i;
+
+       return -1;
+}
+
+/*
+ * Find the pin to which IRQ[irq] (ISA) is connected
+ */
+static int __init find_isa_irq_pin(int irq, int type)
+{
+       int i;
+
+       for (i = 0; i < mp_irq_entries; i++) {
+               int lbus = mp_irqs[i].mpc_srcbus;
+
+               if ((mp_bus_id_to_type[lbus] == MP_BUS_ISA ||
+                    mp_bus_id_to_type[lbus] == MP_BUS_EISA ||
+                    mp_bus_id_to_type[lbus] == MP_BUS_MCA) &&
+                   (mp_irqs[i].mpc_irqtype == type) &&
+                   (mp_irqs[i].mpc_srcbusirq == irq))
+
+                       return mp_irqs[i].mpc_dstirq;
+       }
+       return -1;
+}
+
+/*
+ * Find a specific PCI IRQ entry.
+ * Not an __init, possibly needed by modules
+ */
+static int pin_2_irq(int idx, int apic, int pin);
+
+int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
+{
+       int apic, i, best_guess = -1;
+
+       Dprintk("querying PCI -> IRQ mapping bus:%d, slot:%d, pin:%d.\n",
+               bus, slot, pin);
+       if (mp_bus_id_to_pci_bus[bus] == -1) {
+               printk(KERN_WARNING "PCI BIOS passed nonexistent PCI bus %d!\n", bus);
+               return -1;
+       }
+       for (i = 0; i < mp_irq_entries; i++) {
+               int lbus = mp_irqs[i].mpc_srcbus;
+
+               for (apic = 0; apic < nr_ioapics; apic++)
+                       if (mp_ioapics[apic].mpc_apicid == mp_irqs[i].mpc_dstapic ||
+                           mp_irqs[i].mpc_dstapic == MP_APIC_ALL)
+                               break;
+
+               if ((mp_bus_id_to_type[lbus] == MP_BUS_PCI) &&
+                   !mp_irqs[i].mpc_irqtype &&
+                   (bus == lbus) &&
+                   (slot == ((mp_irqs[i].mpc_srcbusirq >> 2) & 0x1f))) {
+                       int irq = pin_2_irq(i,apic,mp_irqs[i].mpc_dstirq);
+
+                       if (!(apic || IO_APIC_IRQ(irq)))
+                               continue;
+
+                       if (pin == (mp_irqs[i].mpc_srcbusirq & 3))
+                               return irq;
+                       /*
+                        * Use the first all-but-pin matching entry as a
+                        * best-guess fuzzy result for broken mptables.
+                        */
+                       if (best_guess < 0)
+                               best_guess = irq;
+               }
+       }
+       return best_guess;
+}
+
+/*
+ * EISA Edge/Level control register, ELCR
+ */
+static int __init EISA_ELCR(unsigned int irq)
+{
+       if (irq < 16) {
+               unsigned int port = 0x4d0 + (irq >> 3);
+               return (inb(port) >> (irq & 7)) & 1;
+       }
+       printk(KERN_INFO "Broken MPtable reports ISA irq %d\n", irq);
+       return 0;
+}
+
+/* EISA interrupts are always polarity zero and can be edge or level
+ * trigger depending on the ELCR value.  If an interrupt is listed as
+ * EISA conforming in the MP table, that means its trigger type must
+ * be read in from the ELCR */
+
+#define default_EISA_trigger(idx)      (EISA_ELCR(mp_irqs[idx].mpc_srcbusirq))
+#define default_EISA_polarity(idx)     (0)
+
+/* ISA interrupts are always polarity zero edge triggered,
+ * when listed as conforming in the MP table. */
+
+#define default_ISA_trigger(idx)       (0)
+#define default_ISA_polarity(idx)      (0)
+
+/* PCI interrupts are always polarity one level triggered,
+ * when listed as conforming in the MP table. */
+
+#define default_PCI_trigger(idx)       (1)
+#define default_PCI_polarity(idx)      (1)
+
+/* MCA interrupts are always polarity zero level triggered,
+ * when listed as conforming in the MP table. */
+
+#define default_MCA_trigger(idx)       (1)
+#define default_MCA_polarity(idx)      (0)
+
+static int __init MPBIOS_polarity(int idx)
+{
+       int bus = mp_irqs[idx].mpc_srcbus;
+       int polarity;
+
+       /*
+        * Determine IRQ line polarity (high active or low active):
+        */
+       switch (mp_irqs[idx].mpc_irqflag & 3)
+       {
+               case 0: /* conforms, ie. bus-type dependent polarity */
+               {
+                       switch (mp_bus_id_to_type[bus])
+                       {
+                               case MP_BUS_ISA: /* ISA pin */
+                               {
+                                       polarity = default_ISA_polarity(idx);
+                                       break;
+                               }
+                               case MP_BUS_EISA: /* EISA pin */
+                               {
+                                       polarity = default_EISA_polarity(idx);
+                                       break;
+                               }
+                               case MP_BUS_PCI: /* PCI pin */
+                               {
+                                       polarity = default_PCI_polarity(idx);
+                                       break;
+                               }
+                               case MP_BUS_MCA: /* MCA pin */
+                               {
+                                       polarity = default_MCA_polarity(idx);
+                                       break;
+                               }
+                               default:
+                               {
+                                       printk(KERN_WARNING "broken BIOS!!\n");
+                                       polarity = 1;
+                                       break;
+                               }
+                       }
+                       break;
+               }
+               case 1: /* high active */
+               {
+                       polarity = 0;
+                       break;
+               }
+               case 2: /* reserved */
+               {
+                       printk(KERN_WARNING "broken BIOS!!\n");
+                       polarity = 1;
+                       break;
+               }
+               case 3: /* low active */
+               {
+                       polarity = 1;
+                       break;
+               }
+               default: /* invalid */
+               {
+                       printk(KERN_WARNING "broken BIOS!!\n");
+                       polarity = 1;
+                       break;
+               }
+       }
+       return polarity;
+}
+
+static int __init MPBIOS_trigger(int idx)
+{
+       int bus = mp_irqs[idx].mpc_srcbus;
+       int trigger;
+
+       /*
+        * Determine IRQ trigger mode (edge or level sensitive):
+        */
+       switch ((mp_irqs[idx].mpc_irqflag>>2) & 3)
+       {
+               case 0: /* conforms, ie. bus-type dependent */
+               {
+                       switch (mp_bus_id_to_type[bus])
+                       {
+                               case MP_BUS_ISA: /* ISA pin */
+                               {
+                                       trigger = default_ISA_trigger(idx);
+                                       break;
+                               }
+                               case MP_BUS_EISA: /* EISA pin */
+                               {
+                                       trigger = default_EISA_trigger(idx);
+                                       break;
+                               }
+                               case MP_BUS_PCI: /* PCI pin */
+                               {
+                                       trigger = default_PCI_trigger(idx);
+                                       break;
+                               }
+                               case MP_BUS_MCA: /* MCA pin */
+                               {
+                                       trigger = default_MCA_trigger(idx);
+                                       break;
+                               }
+                               default:
+                               {
+                                       printk(KERN_WARNING "broken BIOS!!\n");
+                                       trigger = 1;
+                                       break;
+                               }
+                       }
+                       break;
+               }
+               case 1: /* edge */
+               {
+                       trigger = 0;
+                       break;
+               }
+               case 2: /* reserved */
+               {
+                       printk(KERN_WARNING "broken BIOS!!\n");
+                       trigger = 1;
+                       break;
+               }
+               case 3: /* level */
+               {
+                       trigger = 1;
+                       break;
+               }
+               default: /* invalid */
+               {
+                       printk(KERN_WARNING "broken BIOS!!\n");
+                       trigger = 0;
+                       break;
+               }
+       }
+       return trigger;
+}
+
+static inline int irq_polarity(int idx)
+{
+       return MPBIOS_polarity(idx);
+}
+
+static inline int irq_trigger(int idx)
+{
+       return MPBIOS_trigger(idx);
+}
+
+static int pin_2_irq(int idx, int apic, int pin)
+{
+       int irq, i;
+       int bus = mp_irqs[idx].mpc_srcbus;
+
+       /*
+        * Debugging check, we are in big trouble if this message pops up!
+        */
+       if (mp_irqs[idx].mpc_dstirq != pin)
+               printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n");
+
+       switch (mp_bus_id_to_type[bus])
+       {
+               case MP_BUS_ISA: /* ISA pin */
+               case MP_BUS_EISA:
+               case MP_BUS_MCA:
+               {
+                       irq = mp_irqs[idx].mpc_srcbusirq;
+                       break;
+               }
+               case MP_BUS_PCI: /* PCI pin */
+               {
+                       /*
+                        * PCI IRQs are mapped in order
+                        */
+                       i = irq = 0;
+                       while (i < apic)
+                               irq += nr_ioapic_registers[i++];
+                       irq += pin;
+                       break;
+               }
+               default:
+               {
+                       printk(KERN_ERR "unknown bus type %d.\n",bus); 
+                       irq = 0;
+                       break;
+               }
+       }
+
+       return irq;
+}
+
+static inline int IO_APIC_irq_trigger(int irq)
+{
+       int apic, idx, pin;
+
+       for (apic = 0; apic < nr_ioapics; apic++) {
+               for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
+                       idx = find_irq_entry(apic,pin,mp_INT);
+                       if ((idx != -1) && (irq == pin_2_irq(idx,apic,pin)))
+                               return irq_trigger(idx);
+               }
+       }
+       /*
+        * nonexistent IRQs are edge default
+        */
+       return 0;
+}
+
+int irq_vector[NR_IRQS] = { FIRST_DEVICE_VECTOR , 0 };
+
+static int __init assign_irq_vector(int irq)
+{
+       static int current_vector = FIRST_DEVICE_VECTOR, offset = 0;
+       if (IO_APIC_VECTOR(irq) > 0)
+               return IO_APIC_VECTOR(irq);
+next:
+       current_vector += 8;
+       if (current_vector == HYPERVISOR_CALL_VECTOR)
+               goto next;
+
+       if (current_vector > FIRST_SYSTEM_VECTOR) {
+               offset++;
+               current_vector = FIRST_DEVICE_VECTOR + offset;
+       }
+
+       if (current_vector == FIRST_SYSTEM_VECTOR)
+               panic("ran out of interrupt sources!");
+
+       IO_APIC_VECTOR(irq) = current_vector;
+       return current_vector;
+}
+
+extern void (*interrupt[NR_IRQS])(void);
+static struct hw_interrupt_type ioapic_level_irq_type;
+static struct hw_interrupt_type ioapic_edge_irq_type;
+
+void __init setup_IO_APIC_irqs(void)
+{
+       struct IO_APIC_route_entry entry;
+       int apic, pin, idx, irq, first_notcon = 1, vector;
+       unsigned long flags;
+
+       printk(KERN_DEBUG "init IO_APIC IRQs\n");
+
+       for (apic = 0; apic < nr_ioapics; apic++) {
+       for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
+
+               /*
+                * add it to the IO-APIC irq-routing table:
+                */
+               memset(&entry,0,sizeof(entry));
+
+               entry.delivery_mode = dest_LowestPrio;
+               entry.dest_mode = INT_DELIVERY_MODE;
+               entry.mask = 0;                         /* enable IRQ */
+               entry.dest.logical.logical_dest = TARGET_CPUS;
+
+               idx = find_irq_entry(apic,pin,mp_INT);
+               if (idx == -1) {
+                       if (first_notcon) {
+                               printk(KERN_DEBUG " IO-APIC (apicid-pin) %d-%d", mp_ioapics[apic].mpc_apicid, pin);
+                               first_notcon = 0;
+                       } else
+                               printk(", %d-%d", mp_ioapics[apic].mpc_apicid, pin);
+                       continue;
+               }
+
+               entry.trigger = irq_trigger(idx);
+               entry.polarity = irq_polarity(idx);
+
+               if (irq_trigger(idx)) {
+                       entry.trigger = 1;
+                       entry.mask = 1;
+                       entry.dest.logical.logical_dest = TARGET_CPUS;
+               }
+
+               irq = pin_2_irq(idx, apic, pin);
+               add_pin_to_irq(irq, apic, pin);
+
+               if (!apic && !IO_APIC_IRQ(irq))
+                       continue;
+
+               if (IO_APIC_IRQ(irq)) {
+                       vector = assign_irq_vector(irq);
+                       entry.vector = vector;
+
+                       if (IO_APIC_irq_trigger(irq))
+                               irq_desc[irq].handler = &ioapic_level_irq_type;
+                       else
+                               irq_desc[irq].handler = &ioapic_edge_irq_type;
+
+                       set_intr_gate(vector, interrupt[irq]);
+               
+                       if (!apic && (irq < 16))
+                               disable_8259A_irq(irq);
+               }
+               spin_lock_irqsave(&ioapic_lock, flags);
+               io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1));
+               io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0));
+               spin_unlock_irqrestore(&ioapic_lock, flags);
+       }
+       }
+
+       if (!first_notcon)
+               printk(" not connected.\n");
+}
+
+/*
+ * Set up the 8259A-master output pin as broadcast to all
+ * CPUs.
+ */
+void __init setup_ExtINT_IRQ0_pin(unsigned int pin, int vector)
+{
+       struct IO_APIC_route_entry entry;
+       unsigned long flags;
+
+       memset(&entry,0,sizeof(entry));
+
+       disable_8259A_irq(0);
+
+       /* mask LVT0 */
+       apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
+
+       /*
+        * We use logical delivery to get the timer IRQ
+        * to the first CPU.
+        */
+       entry.dest_mode = INT_DELIVERY_MODE;
+       entry.mask = 0;                                 /* unmask IRQ now */
+       entry.dest.logical.logical_dest = TARGET_CPUS;
+       entry.delivery_mode = dest_LowestPrio;
+       entry.polarity = 0;
+       entry.trigger = 0;
+       entry.vector = vector;
+
+       /*
+        * The timer IRQ doesnt have to know that behind the
+        * scene we have a 8259A-master in AEOI mode ...
+        */
+       irq_desc[0].handler = &ioapic_edge_irq_type;
+
+       /*
+        * Add it to the IO-APIC irq-routing table:
+        */
+       spin_lock_irqsave(&ioapic_lock, flags);
+       io_apic_write(0, 0x11+2*pin, *(((int *)&entry)+1));
+       io_apic_write(0, 0x10+2*pin, *(((int *)&entry)+0));
+       spin_unlock_irqrestore(&ioapic_lock, flags);
+
+       enable_8259A_irq(0);
+}
+
+void __init UNEXPECTED_IO_APIC(void)
+{
+       printk(KERN_WARNING " WARNING: unexpected IO-APIC, please mail\n");
+       printk(KERN_WARNING "          to linux-smp@vger.kernel.org\n");
+}
+
+void __init print_IO_APIC(void)
+{
+       int apic, i;
+       struct IO_APIC_reg_00 reg_00;
+       struct IO_APIC_reg_01 reg_01;
+       struct IO_APIC_reg_02 reg_02;
+       unsigned long flags;
+
+       printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries);
+       for (i = 0; i < nr_ioapics; i++)
+               printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n",
+                      mp_ioapics[i].mpc_apicid, nr_ioapic_registers[i]);
+
+       /*
+        * We are a bit conservative about what we expect.  We have to
+        * know about every hardware change ASAP.
+        */
+       printk(KERN_INFO "testing the IO APIC.......................\n");
+
+       for (apic = 0; apic < nr_ioapics; apic++) {
+
+       spin_lock_irqsave(&ioapic_lock, flags);
+       *(int *)&reg_00 = io_apic_read(apic, 0);
+       *(int *)&reg_01 = io_apic_read(apic, 1);
+       if (reg_01.version >= 0x10)
+               *(int *)&reg_02 = io_apic_read(apic, 2);
+       spin_unlock_irqrestore(&ioapic_lock, flags);
+
+       printk("\n");
+       printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mpc_apicid);
+       printk(KERN_DEBUG ".... register #00: %08X\n", *(int *)&reg_00);
+       printk(KERN_DEBUG ".......    : physical APIC id: %02X\n", reg_00.ID);
+       if (reg_00.__reserved_1 || reg_00.__reserved_2)
+               UNEXPECTED_IO_APIC();
+
+       printk(KERN_DEBUG ".... register #01: %08X\n", *(int *)&reg_01);
+       printk(KERN_DEBUG ".......     : max redirection entries: %04X\n", reg_01.entries);
+       if (    (reg_01.entries != 0x0f) && /* older (Neptune) boards */
+               (reg_01.entries != 0x17) && /* typical ISA+PCI boards */
+               (reg_01.entries != 0x1b) && /* Compaq Proliant boards */
+               (reg_01.entries != 0x1f) && /* dual Xeon boards */
+               (reg_01.entries != 0x22) && /* bigger Xeon boards */
+               (reg_01.entries != 0x2E) &&
+               (reg_01.entries != 0x3F)
+       )
+               UNEXPECTED_IO_APIC();
+
+       printk(KERN_DEBUG ".......     : PRQ implemented: %X\n", reg_01.PRQ);
+       printk(KERN_DEBUG ".......     : IO APIC version: %04X\n", reg_01.version);
+       if (    (reg_01.version != 0x01) && /* 82489DX IO-APICs */
+               (reg_01.version != 0x10) && /* oldest IO-APICs */
+               (reg_01.version != 0x11) && /* Pentium/Pro IO-APICs */
+               (reg_01.version != 0x13) && /* Xeon IO-APICs */
+               (reg_01.version != 0x20)    /* Intel P64H (82806 AA) */
+       )
+               UNEXPECTED_IO_APIC();
+       if (reg_01.__reserved_1 || reg_01.__reserved_2)
+               UNEXPECTED_IO_APIC();
+
+       if (reg_01.version >= 0x10) {
+               printk(KERN_DEBUG ".... register #02: %08X\n", *(int *)&reg_02);
+               printk(KERN_DEBUG ".......     : arbitration: %02X\n", reg_02.arbitration);
+               if (reg_02.__reserved_1 || reg_02.__reserved_2)
+                       UNEXPECTED_IO_APIC();
+       }
+
+       printk(KERN_DEBUG ".... IRQ redirection table:\n");
+
+       printk(KERN_DEBUG " NR Log Phy Mask Trig IRR Pol"
+                         " Stat Dest Deli Vect:   \n");
+
+       for (i = 0; i <= reg_01.entries; i++) {
+               struct IO_APIC_route_entry entry;
+
+               spin_lock_irqsave(&ioapic_lock, flags);
+               *(((int *)&entry)+0) = io_apic_read(apic, 0x10+i*2);
+               *(((int *)&entry)+1) = io_apic_read(apic, 0x11+i*2);
+               spin_unlock_irqrestore(&ioapic_lock, flags);
+
+               printk(KERN_DEBUG " %02x %03X %02X  ",
+                       i,
+                       entry.dest.logical.logical_dest,
+                       entry.dest.physical.physical_dest
+               );
+
+               printk("%1d    %1d    %1d   %1d   %1d    %1d    %1d    %02X\n",
+                       entry.mask,
+                       entry.trigger,
+                       entry.irr,
+                       entry.polarity,
+                       entry.delivery_status,
+                       entry.dest_mode,
+                       entry.delivery_mode,
+                       entry.vector
+               );
+       }
+       }
+       printk(KERN_DEBUG "IRQ to pin mappings:\n");
+       for (i = 0; i < NR_IRQS; i++) {
+               struct irq_pin_list *entry = irq_2_pin + i;
+               if (entry->pin < 0)
+                       continue;
+               printk(KERN_DEBUG "IRQ%d ", i);
+               for (;;) {
+                       printk("-> %d:%d", entry->apic, entry->pin);
+                       if (!entry->next)
+                               break;
+                       entry = irq_2_pin + entry->next;
+               }
+               printk("\n");
+       }
+
+       printk(KERN_INFO ".................................... done.\n");
+
+       return;
+}
+
+static void print_APIC_bitfield (int base)
+{
+       unsigned int v;
+       int i, j;
+
+       printk(KERN_DEBUG "0123456789abcdef0123456789abcdef\n" KERN_DEBUG);
+       for (i = 0; i < 8; i++) {
+               v = apic_read(base + i*0x10);
+               for (j = 0; j < 32; j++) {
+                       if (v & (1<<j))
+                               printk("1");
+                       else
+                               printk("0");
+               }
+               printk("\n");
+       }
+}
+
+void /*__init*/ print_local_APIC(void * dummy)
+{
+       unsigned int v, ver, maxlvt;
+
+       printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n",
+               smp_processor_id(), hard_smp_processor_id());
+       v = apic_read(APIC_ID);
+       printk(KERN_INFO "... APIC ID:      %08x (%01x)\n", v, GET_APIC_ID(v));
+       v = apic_read(APIC_LVR);
+       printk(KERN_INFO "... APIC VERSION: %08x\n", v);
+       ver = GET_APIC_VERSION(v);
+       maxlvt = get_maxlvt();
+
+       v = apic_read(APIC_TASKPRI);
+       printk(KERN_DEBUG "... APIC TASKPRI: %08x (%02x)\n", v, v & APIC_TPRI_MASK);
+
+       if (APIC_INTEGRATED(ver)) {                     /* !82489DX */
+               v = apic_read(APIC_ARBPRI);
+               printk(KERN_DEBUG "... APIC ARBPRI: %08x (%02x)\n", v,
+                       v & APIC_ARBPRI_MASK);
+               v = apic_read(APIC_PROCPRI);
+               printk(KERN_DEBUG "... APIC PROCPRI: %08x\n", v);
+       }
+
+       v = apic_read(APIC_EOI);
+       printk(KERN_DEBUG "... APIC EOI: %08x\n", v);
+       v = apic_read(APIC_RRR);
+       printk(KERN_DEBUG "... APIC RRR: %08x\n", v);
+       v = apic_read(APIC_LDR);
+       printk(KERN_DEBUG "... APIC LDR: %08x\n", v);
+       v = apic_read(APIC_DFR);
+       printk(KERN_DEBUG "... APIC DFR: %08x\n", v);
+       v = apic_read(APIC_SPIV);
+       printk(KERN_DEBUG "... APIC SPIV: %08x\n", v);
+
+       printk(KERN_DEBUG "... APIC ISR field:\n");
+       print_APIC_bitfield(APIC_ISR);
+       printk(KERN_DEBUG "... APIC TMR field:\n");
+       print_APIC_bitfield(APIC_TMR);
+       printk(KERN_DEBUG "... APIC IRR field:\n");
+       print_APIC_bitfield(APIC_IRR);
+
+       if (APIC_INTEGRATED(ver)) {             /* !82489DX */
+               if (maxlvt > 3)         /* Due to the Pentium erratum 3AP. */
+                       apic_write(APIC_ESR, 0);
+               v = apic_read(APIC_ESR);
+               printk(KERN_DEBUG "... APIC ESR: %08x\n", v);
+       }
+
+       v = apic_read(APIC_ICR);
+       printk(KERN_DEBUG "... APIC ICR: %08x\n", v);
+       v = apic_read(APIC_ICR2);
+       printk(KERN_DEBUG "... APIC ICR2: %08x\n", v);
+
+       v = apic_read(APIC_LVTT);
+       printk(KERN_DEBUG "... APIC LVTT: %08x\n", v);
+
+       if (maxlvt > 3) {                       /* PC is LVT#4. */
+               v = apic_read(APIC_LVTPC);
+               printk(KERN_DEBUG "... APIC LVTPC: %08x\n", v);
+       }
+       v = apic_read(APIC_LVT0);
+       printk(KERN_DEBUG "... APIC LVT0: %08x\n", v);
+       v = apic_read(APIC_LVT1);
+       printk(KERN_DEBUG "... APIC LVT1: %08x\n", v);
+
+       if (maxlvt > 2) {                       /* ERR is LVT#3. */
+               v = apic_read(APIC_LVTERR);
+               printk(KERN_DEBUG "... APIC LVTERR: %08x\n", v);
+       }
+
+       v = apic_read(APIC_TMICT);
+       printk(KERN_DEBUG "... APIC TMICT: %08x\n", v);
+       v = apic_read(APIC_TMCCT);
+       printk(KERN_DEBUG "... APIC TMCCT: %08x\n", v);
+       v = apic_read(APIC_TDCR);
+       printk(KERN_DEBUG "... APIC TDCR: %08x\n", v);
+       printk("\n");
+}
+
+void print_all_local_APICs (void)
+{
+       smp_call_function(print_local_APIC, NULL, 1, 1);
+       print_local_APIC(NULL);
+}
+
+void /*__init*/ print_PIC(void)
+{
+       extern spinlock_t i8259A_lock;
+       unsigned int v, flags;
+
+       printk(KERN_DEBUG "\nprinting PIC contents\n");
+
+       spin_lock_irqsave(&i8259A_lock, flags);
+
+       v = inb(0xa1) << 8 | inb(0x21);
+       printk(KERN_DEBUG "... PIC  IMR: %04x\n", v);
+
+       v = inb(0xa0) << 8 | inb(0x20);
+       printk(KERN_DEBUG "... PIC  IRR: %04x\n", v);
+
+       outb(0x0b,0xa0);
+       outb(0x0b,0x20);
+       v = inb(0xa0) << 8 | inb(0x20);
+       outb(0x0a,0xa0);
+       outb(0x0a,0x20);
+
+       spin_unlock_irqrestore(&i8259A_lock, flags);
+
+       printk(KERN_DEBUG "... PIC  ISR: %04x\n", v);
+
+       v = inb(0x4d1) << 8 | inb(0x4d0);
+       printk(KERN_DEBUG "... PIC ELCR: %04x\n", v);
+}
+
+static void __init enable_IO_APIC(void)
+{
+       struct IO_APIC_reg_01 reg_01;
+       int i;
+       unsigned long flags;
+
+       for (i = 0; i < PIN_MAP_SIZE; i++) {
+               irq_2_pin[i].pin = -1;
+               irq_2_pin[i].next = 0;
+       }
+
+       /*
+        * The number of IO-APIC IRQ registers (== #pins):
+        */
+       for (i = 0; i < nr_ioapics; i++) {
+               spin_lock_irqsave(&ioapic_lock, flags);
+               *(int *)&reg_01 = io_apic_read(i, 1);
+               spin_unlock_irqrestore(&ioapic_lock, flags);
+               nr_ioapic_registers[i] = reg_01.entries+1;
+       }
+
+       /*
+        * Do not trust the IO-APIC being empty at bootup
+        */
+       clear_IO_APIC();
+}
+
+/*
+ * Not an __init, needed by the reboot code
+ */
+void disable_IO_APIC(void)
+{
+       /*
+        * Clear the IO-APIC before rebooting:
+        */
+       clear_IO_APIC();
+
+       disconnect_bsp_APIC();
+}
+
+/*
+ * function to set the IO-APIC physical IDs based on the
+ * values stored in the MPC table.
+ *
+ * by Matt Domsch <Matt_Domsch@dell.com>  Tue Dec 21 12:25:05 CST 1999
+ */
+
+static void __init setup_ioapic_ids_from_mpc (void)
+{
+       struct IO_APIC_reg_00 reg_00;
+       unsigned long phys_id_present_map = phys_cpu_present_map;
+       int apic;
+       int i;
+       unsigned char old_id;
+       unsigned long flags;
+
+       /*
+        * Set the IOAPIC ID to the value stored in the MPC table.
+        */
+       for (apic = 0; apic < nr_ioapics; apic++) {
+
+               /* Read the register 0 value */
+               spin_lock_irqsave(&ioapic_lock, flags);
+               *(int *)&reg_00 = io_apic_read(apic, 0);
+               spin_unlock_irqrestore(&ioapic_lock, flags);
+               
+               old_id = mp_ioapics[apic].mpc_apicid;
+
+               if (mp_ioapics[apic].mpc_apicid >= 0xf) {
+                       printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n",
+                               apic, mp_ioapics[apic].mpc_apicid);
+                       printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
+                               reg_00.ID);
+                       mp_ioapics[apic].mpc_apicid = reg_00.ID;
+               }
+
+               /*
+                * Sanity check, is the ID really free? Every APIC in a
+                * system must have a unique ID or we get lots of nice
+                * 'stuck on smp_invalidate_needed IPI wait' messages.
+                */
+               if (phys_id_present_map & (1 << mp_ioapics[apic].mpc_apicid)) {
+                       printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n",
+                               apic, mp_ioapics[apic].mpc_apicid);
+                       for (i = 0; i < 0xf; i++)
+                               if (!(phys_id_present_map & (1 << i)))
+                                       break;
+                       if (i >= 0xf)
+                               panic("Max APIC ID exceeded!\n");
+                       printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
+                               i);
+                       phys_id_present_map |= 1 << i;
+                       mp_ioapics[apic].mpc_apicid = i;
+               } else {
+                       printk("Setting %d in the phys_id_present_map\n", mp_ioapics[apic].mpc_apicid);
+                       phys_id_present_map |= 1 << mp_ioapics[apic].mpc_apicid;
+               }
+
+
+               /*
+                * We need to adjust the IRQ routing table
+                * if the ID changed.
+                */
+               if (old_id != mp_ioapics[apic].mpc_apicid)
+                       for (i = 0; i < mp_irq_entries; i++)
+                               if (mp_irqs[i].mpc_dstapic == old_id)
+                                       mp_irqs[i].mpc_dstapic
+                                               = mp_ioapics[apic].mpc_apicid;
+
+               /*
+                * Read the right value from the MPC table and
+                * write it into the ID register.
+                */
+               printk(KERN_INFO "...changing IO-APIC physical APIC ID to %d ...",
+                                       mp_ioapics[apic].mpc_apicid);
+
+               reg_00.ID = mp_ioapics[apic].mpc_apicid;
+               spin_lock_irqsave(&ioapic_lock, flags);
+               io_apic_write(apic, 0, *(int *)&reg_00);
+               spin_unlock_irqrestore(&ioapic_lock, flags);
+
+               /*
+                * Sanity check
+                */
+               spin_lock_irqsave(&ioapic_lock, flags);
+               *(int *)&reg_00 = io_apic_read(apic, 0);
+               spin_unlock_irqrestore(&ioapic_lock, flags);
+               if (reg_00.ID != mp_ioapics[apic].mpc_apicid)
+                       panic("could not set ID!\n");
+               else
+                       printk(" ok.\n");
+       }
+}
+
+/*
+ * There is a nasty bug in some older SMP boards, their mptable lies
+ * about the timer IRQ. We do the following to work around the situation:
+ *
+ *     - timer IRQ defaults to IO-APIC IRQ
+ *     - if this function detects that timer IRQs are defunct, then we fall
+ *       back to ISA timer IRQs
+ */
+static int __init timer_irq_works(void)
+{
+       unsigned int t1 = jiffies;
+
+       sti();
+       /* Let ten ticks pass... */
+       mdelay((10 * 1000) / HZ);
+
+       /*
+        * Expect a few ticks at least, to be sure some possible
+        * glue logic does not lock up after one or two first
+        * ticks in a non-ExtINT mode.  Also the local APIC
+        * might have cached one ExtINT interrupt.  Finally, at
+        * least one tick may be lost due to delays.
+        */
+       if (jiffies - t1 > 4)
+               return 1;
+
+       return 0;
+}
+
+/*
+ * In the SMP+IOAPIC case it might happen that there are an unspecified
+ * number of pending IRQ events unhandled. These cases are very rare,
+ * so we 'resend' these IRQs via IPIs, to the same CPU. It's much
+ * better to do it this way as thus we do not have to be aware of
+ * 'pending' interrupts in the IRQ path, except at this point.
+ */
+/*
+ * Edge triggered needs to resend any interrupt
+ * that was delayed but this is now handled in the device
+ * independent code.
+ */
+#define enable_edge_ioapic_irq unmask_IO_APIC_irq
+
+static void disable_edge_ioapic_irq (unsigned int irq) { /* nothing */ }
+
+/*
+ * Starting up a edge-triggered IO-APIC interrupt is
+ * nasty - we need to make sure that we get the edge.
+ * If it is already asserted for some reason, we need
+ * return 1 to indicate that is was pending.
+ *
+ * This is not complete - we should be able to fake
+ * an edge even if it isn't on the 8259A...
+ */
+
+static unsigned int startup_edge_ioapic_irq(unsigned int irq)
+{
+       int was_pending = 0;
+       unsigned long flags;
+
+       spin_lock_irqsave(&ioapic_lock, flags);
+       if (irq < 16) {
+               disable_8259A_irq(irq);
+               if (i8259A_irq_pending(irq))
+                       was_pending = 1;
+       }
+       __unmask_IO_APIC_irq(irq);
+       spin_unlock_irqrestore(&ioapic_lock, flags);
+
+       return was_pending;
+}
+
+#define shutdown_edge_ioapic_irq       disable_edge_ioapic_irq
+
+/*
+ * Once we have recorded IRQ_PENDING already, we can mask the
+ * interrupt for real. This prevents IRQ storms from unhandled
+ * devices.
+ */
+static void ack_edge_ioapic_irq(unsigned int irq)
+{
+       if ((irq_desc[irq].status & (IRQ_PENDING | IRQ_DISABLED))
+                                       == (IRQ_PENDING | IRQ_DISABLED))
+               mask_IO_APIC_irq(irq);
+       ack_APIC_irq();
+}
+
+static void end_edge_ioapic_irq (unsigned int i) { /* nothing */ }
+
+
+/*
+ * Level triggered interrupts can just be masked,
+ * and shutting down and starting up the interrupt
+ * is the same as enabling and disabling them -- except
+ * with a startup need to return a "was pending" value.
+ *
+ * Level triggered interrupts are special because we
+ * do not touch any IO-APIC register while handling
+ * them. We ack the APIC in the end-IRQ handler, not
+ * in the start-IRQ-handler. Protection against reentrance
+ * from the same interrupt is still provided, both by the
+ * generic IRQ layer and by the fact that an unacked local
+ * APIC does not accept IRQs.
+ */
+static unsigned int startup_level_ioapic_irq (unsigned int irq)
+{
+       unmask_IO_APIC_irq(irq);
+
+       return 0; /* don't check for pending */
+}
+
+#define shutdown_level_ioapic_irq      mask_IO_APIC_irq
+#define enable_level_ioapic_irq                unmask_IO_APIC_irq
+#define disable_level_ioapic_irq       mask_IO_APIC_irq
+
+static void end_level_ioapic_irq (unsigned int irq)
+{
+       unsigned long v;
+       int i;
+
+/*
+ * It appears there is an erratum which affects at least version 0x11
+ * of I/O APIC (that's the 82093AA and cores integrated into various
+ * chipsets).  Under certain conditions a level-triggered interrupt is
+ * erroneously delivered as edge-triggered one but the respective IRR
+ * bit gets set nevertheless.  As a result the I/O unit expects an EOI
+ * message but it will never arrive and further interrupts are blocked
+ * from the source.  The exact reason is so far unknown, but the
+ * phenomenon was observed when two consecutive interrupt requests
+ * from a given source get delivered to the same CPU and the source is
+ * temporarily disabled in between.
+ *
+ * A workaround is to simulate an EOI message manually.  We achieve it
+ * by setting the trigger mode to edge and then to level when the edge
+ * trigger mode gets detected in the TMR of a local APIC for a
+ * level-triggered interrupt.  We mask the source for the time of the
+ * operation to prevent an edge-triggered interrupt escaping meanwhile.
+ * The idea is from Manfred Spraul.  --macro
+ */
+       i = IO_APIC_VECTOR(irq);
+       v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
+
+       ack_APIC_irq();
+
+       if (!(v & (1 << (i & 0x1f)))) {
+#ifdef APIC_LOCKUP_DEBUG
+               struct irq_pin_list *entry;
+#endif
+               spin_lock(&ioapic_lock);
+               __mask_and_edge_IO_APIC_irq(irq);
+#ifdef APIC_LOCKUP_DEBUG
+               for (entry = irq_2_pin + irq;;) {
+                       unsigned int reg;
+
+                       if (entry->pin == -1)
+                               break;
+                       reg = io_apic_read(entry->apic, 0x10 + entry->pin * 2);
+                       if (reg & 0x00004000)
+                               printk(KERN_CRIT "Aieee!!!  Remote IRR"
+                                       " still set after unlock!\n");
+                       if (!entry->next)
+                               break;
+                       entry = irq_2_pin + entry->next;
+               }
+#endif
+               __unmask_and_level_IO_APIC_irq(irq);
+               spin_unlock(&ioapic_lock);
+       }
+}
+
+static void mask_and_ack_level_ioapic_irq (unsigned int irq) { /* nothing */ }
+
+static void set_ioapic_affinity (unsigned int irq, unsigned long mask)
+{
+       unsigned long flags;
+       /*
+        * Only the first 8 bits are valid.
+        */
+       mask = mask << 24;
+
+       spin_lock_irqsave(&ioapic_lock, flags);
+       __DO_ACTION(1, = mask, )
+       spin_unlock_irqrestore(&ioapic_lock, flags);
+}
+
+/*
+ * Level and edge triggered IO-APIC interrupts need different handling,
+ * so we use two separate IRQ descriptors. Edge triggered IRQs can be
+ * handled with the level-triggered descriptor, but that one has slightly
+ * more overhead. Level-triggered interrupts cannot be handled with the
+ * edge-triggered handler, without risking IRQ storms and other ugly
+ * races.
+ */
+
+static struct hw_interrupt_type ioapic_edge_irq_type = {
+       "IO-APIC-edge",
+       startup_edge_ioapic_irq,
+       shutdown_edge_ioapic_irq,
+       enable_edge_ioapic_irq,
+       disable_edge_ioapic_irq,
+       ack_edge_ioapic_irq,
+       end_edge_ioapic_irq,
+       set_ioapic_affinity,
+};
+
+static struct hw_interrupt_type ioapic_level_irq_type = {
+       "IO-APIC-level",
+       startup_level_ioapic_irq,
+       shutdown_level_ioapic_irq,
+       enable_level_ioapic_irq,
+       disable_level_ioapic_irq,
+       mask_and_ack_level_ioapic_irq,
+       end_level_ioapic_irq,
+       set_ioapic_affinity,
+};
+
+static inline void init_IO_APIC_traps(void)
+{
+       int irq;
+
+       /*
+        * NOTE! The local APIC isn't very good at handling
+        * multiple interrupts at the same interrupt level.
+        * As the interrupt level is determined by taking the
+        * vector number and shifting that right by 4, we
+        * want to spread these out a bit so that they don't
+        * all fall in the same interrupt level.
+        *
+        * Also, we've got to be careful not to trash gate
+        * 0x80, because int 0x80 is hm, kind of importantish. ;)
+        */
+       for (irq = 0; irq < NR_IRQS ; irq++) {
+               if (IO_APIC_IRQ(irq) && !IO_APIC_VECTOR(irq)) {
+                       /*
+                        * Hmm.. We don't have an entry for this,
+                        * so default to an old-fashioned 8259
+                        * interrupt if we can..
+                        */
+                       if (irq < 16)
+                               make_8259A_irq(irq);
+                       else
+                               /* Strange. Oh, well.. */
+                               irq_desc[irq].handler = &no_irq_type;
+               }
+       }
+}
+
+static void enable_lapic_irq (unsigned int irq)
+{
+       unsigned long v;
+
+       v = apic_read(APIC_LVT0);
+       apic_write_around(APIC_LVT0, v & ~APIC_LVT_MASKED);
+}
+
+static void disable_lapic_irq (unsigned int irq)
+{
+       unsigned long v;
+
+       v = apic_read(APIC_LVT0);
+       apic_write_around(APIC_LVT0, v | APIC_LVT_MASKED);
+}
+
+static void ack_lapic_irq (unsigned int irq)
+{
+       ack_APIC_irq();
+}
+
+static void end_lapic_irq (unsigned int i) { /* nothing */ }
+
+static struct hw_interrupt_type lapic_irq_type = {
+       "local-APIC-edge",
+       NULL, /* startup_irq() not used for IRQ0 */
+       NULL, /* shutdown_irq() not used for IRQ0 */
+       enable_lapic_irq,
+       disable_lapic_irq,
+       ack_lapic_irq,
+       end_lapic_irq
+};
+
+
+/*
+ * This looks a bit hackish but it's about the only one way of sending
+ * a few INTA cycles to 8259As and any associated glue logic.  ICR does
+ * not support the ExtINT mode, unfortunately.  We need to send these
+ * cycles as some i82489DX-based boards have glue logic that keeps the
+ * 8259A interrupt line asserted until INTA.  --macro
+ */
+static inline void unlock_ExtINT_logic(void)
+{
+       int pin, i;
+       struct IO_APIC_route_entry entry0, entry1;
+       unsigned char save_control, save_freq_select;
+       unsigned long flags;
+
+       pin = find_isa_irq_pin(8, mp_INT);
+       if (pin == -1)
+               return;
+
+       spin_lock_irqsave(&ioapic_lock, flags);
+       *(((int *)&entry0) + 1) = io_apic_read(0, 0x11 + 2 * pin);
+       *(((int *)&entry0) + 0) = io_apic_read(0, 0x10 + 2 * pin);
+       spin_unlock_irqrestore(&ioapic_lock, flags);
+       clear_IO_APIC_pin(0, pin);
+
+       memset(&entry1, 0, sizeof(entry1));
+
+       entry1.dest_mode = 0;                   /* physical delivery */
+       entry1.mask = 0;                        /* unmask IRQ now */
+       entry1.dest.physical.physical_dest = hard_smp_processor_id();
+       entry1.delivery_mode = dest_ExtINT;
+       entry1.polarity = entry0.polarity;
+       entry1.trigger = 0;
+       entry1.vector = 0;
+
+       spin_lock_irqsave(&ioapic_lock, flags);
+       io_apic_write(0, 0x11 + 2 * pin, *(((int *)&entry1) + 1));
+       io_apic_write(0, 0x10 + 2 * pin, *(((int *)&entry1) + 0));
+       spin_unlock_irqrestore(&ioapic_lock, flags);
+
+       save_control = CMOS_READ(RTC_CONTROL);
+       save_freq_select = CMOS_READ(RTC_FREQ_SELECT);
+       CMOS_WRITE((save_freq_select & ~RTC_RATE_SELECT) | 0x6,
+                  RTC_FREQ_SELECT);
+       CMOS_WRITE(save_control | RTC_PIE, RTC_CONTROL);
+
+       i = 100;
+       while (i-- > 0) {
+               mdelay(10);
+               if ((CMOS_READ(RTC_INTR_FLAGS) & RTC_PF) == RTC_PF)
+                       i -= 10;
+       }
+
+       CMOS_WRITE(save_control, RTC_CONTROL);
+       CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT);
+       clear_IO_APIC_pin(0, pin);
+
+       spin_lock_irqsave(&ioapic_lock, flags);
+       io_apic_write(0, 0x11 + 2 * pin, *(((int *)&entry0) + 1));
+       io_apic_write(0, 0x10 + 2 * pin, *(((int *)&entry0) + 0));
+       spin_unlock_irqrestore(&ioapic_lock, flags);
+}
+
+/*
+ * This code may look a bit paranoid, but it's supposed to cooperate with
+ * a wide range of boards and BIOS bugs.  Fortunately only the timer IRQ
+ * is so screwy.  Thanks to Brian Perkins for testing/hacking this beast
+ * fanatically on his truly buggy board.
+ */
+static inline void check_timer(void)
+{
+       extern int timer_ack;
+       int pin1, pin2;
+       int vector;
+
+       /*
+        * get/set the timer IRQ vector:
+        */
+       disable_8259A_irq(0);
+       vector = assign_irq_vector(0);
+       set_intr_gate(vector, interrupt[0]);
+
+       /*
+        * Subtle, code in do_timer_interrupt() expects an AEOI
+        * mode for the 8259A whenever interrupts are routed
+        * through I/O APICs.  Also IRQ0 has to be enabled in
+        * the 8259A which implies the virtual wire has to be
+        * disabled in the local APIC.
+        */
+       apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
+       init_8259A(1);
+       timer_ack = 1;
+       enable_8259A_irq(0);
+
+       pin1 = find_isa_irq_pin(0, mp_INT);
+       pin2 = find_isa_irq_pin(0, mp_ExtINT);
+
+       printk(KERN_INFO "..TIMER: vector=0x%02X pin1=%d pin2=%d\n", vector, pin1, pin2);
+
+       if (pin1 != -1) {
+               /*
+                * Ok, does IRQ0 through the IOAPIC work?
+                */
+               unmask_IO_APIC_irq(0);
+               if (timer_irq_works()) {
+                       return;
+               }
+               clear_IO_APIC_pin(0, pin1);
+               printk(KERN_ERR "..MP-BIOS bug: 8254 timer not connected to IO-APIC\n");
+       }
+
+       printk(KERN_INFO "...trying to set up timer (IRQ0) through the 8259A ... ");
+       if (pin2 != -1) {
+               printk("\n..... (found pin %d) ...", pin2);
+               /*
+                * legacy devices should be connected to IO APIC #0
+                */
+               setup_ExtINT_IRQ0_pin(pin2, vector);
+               if (timer_irq_works()) {
+                       printk("works.\n");
+                       return;
+               }
+               /*
+                * Cleanup, just in case ...
+                */
+               clear_IO_APIC_pin(0, pin2);
+       }
+       printk(" failed.\n");
+
+       printk(KERN_INFO "...trying to set up timer as Virtual Wire IRQ...");
+
+       disable_8259A_irq(0);
+       irq_desc[0].handler = &lapic_irq_type;
+       apic_write_around(APIC_LVT0, APIC_DM_FIXED | vector);   /* Fixed mode */
+       enable_8259A_irq(0);
+
+       if (timer_irq_works()) {
+               printk(" works.\n");
+               return;
+       }
+       apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | vector);
+       printk(" failed.\n");
+
+       printk(KERN_INFO "...trying to set up timer as ExtINT IRQ...");
+
+       init_8259A(0);
+       make_8259A_irq(0);
+       apic_write_around(APIC_LVT0, APIC_DM_EXTINT);
+
+       unlock_ExtINT_logic();
+
+       if (timer_irq_works()) {
+               printk(" works.\n");
+               return;
+       }
+       printk(" failed :(.\n");
+       panic("IO-APIC + timer doesn't work! pester mingo@redhat.com");
+}
+
+/*
+ * IRQ's that are handled by the old PIC in all cases:
+ * - IRQ2 is the cascade IRQ, and cannot be a io-apic IRQ.
+ *   Linux doesn't really care, as it's not actually used
+ *   for any interrupt handling anyway.
+ */
+#define PIC_IRQS       (1<<2)
+
+void __init setup_IO_APIC(void)
+{
+       enable_IO_APIC();
+
+       io_apic_irqs = ~PIC_IRQS;
+       printk("ENABLING IO-APIC IRQs\n");
+
+       /*
+        * Set up the IO-APIC IRQ routing table by parsing the MP-BIOS
+        * mptable:
+        */
+       setup_ioapic_ids_from_mpc();
+       sync_Arb_IDs();
+       setup_IO_APIC_irqs();
+       init_IO_APIC_traps();
+       check_timer();
+       print_IO_APIC();
+}
diff --git a/xen-2.4.16/arch/i386/ioremap.c b/xen-2.4.16/arch/i386/ioremap.c
new file mode 100644 (file)
index 0000000..aefe75e
--- /dev/null
@@ -0,0 +1,106 @@
+/*
+ * arch/i386/mm/ioremap.c
+ *
+ * Re-map IO memory to kernel address space so that we can access it.
+ * This is needed for high PCI addresses that aren't mapped in the
+ * 640k-1MB IO memory area on PC's
+ *
+ * (C) Copyright 1995 1996 Linus Torvalds
+ */
+
+//#include <linux/vmalloc.h>
+#include <asm/io.h>
+#include <asm/pgalloc.h>
+#include <asm/page.h>
+
+static unsigned long remap_base = 0;
+
+#define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED)
+#define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY)
+
+#define PAGE_ALIGN(addr)    (((addr)+PAGE_SIZE-1)&PAGE_MASK)
+
+static void new_l2e(l2_pgentry_t *pl2e)
+{
+    l1_pgentry_t *pl1e = (l1_pgentry_t *)get_free_page(GFP_KERNEL);
+    if ( !pl1e ) BUG();
+    clear_page(pl1e);
+    *pl2e = mk_l2_pgentry(__pa(pl1e)|L2_PROT);
+}
+
+void * __ioremap(unsigned long phys_addr, unsigned long size, unsigned long flags)
+{
+    unsigned long vaddr;
+    unsigned long offset, cur=0, last_addr;
+    l2_pgentry_t *pl2e;
+    l1_pgentry_t *pl1e;
+
+    /* First time through, start allocating from end of real memory. */
+    if ( !remap_base ) 
+        remap_base = (unsigned long)phys_to_virt(MAX_USABLE_ADDRESS);
+
+    /* Don't allow wraparound or zero size */
+    last_addr = phys_addr + size - 1;
+    if (!size || last_addr < phys_addr)
+        return NULL;
+
+    /*
+     * Don't remap the low PCI/ISA area, it's always mapped..
+     */
+    if (phys_addr >= 0xA0000 && last_addr < 0x100000)
+        return phys_to_virt(phys_addr);
+
+#if 0
+    /*
+     * Don't allow anybody to remap normal RAM that we're using..
+     */
+    if (phys_addr < virt_to_phys(high_memory)) {
+        char *t_addr, *t_end;
+        struct pfn_info *page;
+
+        t_addr = __va(phys_addr);
+        t_end = t_addr + (size - 1);
+          
+        for(page = virt_to_page(t_addr); page <= virt_to_page(t_end); page++)
+            if(!PageReserved(page))
+                return NULL;
+    }
+#endif
+
+    /*
+     * Mappings have to be page-aligned
+     */
+    offset = phys_addr & ~PAGE_MASK;
+    phys_addr &= PAGE_MASK;
+    size = PAGE_ALIGN(last_addr) - phys_addr;
+
+    /*
+     * Ok, go for it..
+     */
+    vaddr = remap_base;
+    remap_base += size;
+    pl2e = idle0_pg_table + l2_table_offset(vaddr);
+    if ( l2_pgentry_empty(*pl2e) ) new_l2e(pl2e);
+    pl1e = l2_pgentry_to_l1(*pl2e++) + l1_table_offset(vaddr);
+    for ( ; ; ) 
+    {
+        if ( !l1_pgentry_empty(*pl1e) ) BUG();
+        *pl1e++ = mk_l1_pgentry((phys_addr+cur)|L1_PROT|flags);
+        cur += PAGE_SIZE;
+        if ( cur == size ) break;
+        if ( !((unsigned long)pl1e & (PAGE_SIZE-1)) )
+        {
+            if ( l2_pgentry_empty(*pl2e) ) new_l2e(pl2e);
+            pl1e = l2_pgentry_to_l1(*pl2e++);        
+        }
+    }
+
+    flush_tlb_all();
+
+    return (void *) (offset + (char *)vaddr);
+}
+
+void iounmap(void *addr)
+{
+    /* NOP for now. */
+}
diff --git a/xen-2.4.16/arch/i386/irq.c b/xen-2.4.16/arch/i386/irq.c
new file mode 100644 (file)
index 0000000..e58fb8f
--- /dev/null
@@ -0,0 +1,895 @@
+/*
+ *     linux/arch/i386/kernel/irq.c
+ *
+ *     Copyright (C) 1992, 1998 Linus Torvalds, Ingo Molnar
+ *
+ * This file contains the code used by various IRQ handling routines:
+ * asking for different IRQ's should be done through these routines
+ * instead of just grabbing them. Thus setup_irqs with different IRQ numbers
+ * shouldn't result in any weird surprises, and installing new handlers
+ * should be easier.
+ */
+
+/*
+ * (mostly architecture independent, will move to kernel/irq.c in 2.5.)
+ *
+ * IRQs are in fact implemented a bit like signal handlers for the kernel.
+ * Naturally it's not a 1:1 relation, but there are similarities.
+ */
+
+#include <xeno/config.h>
+#include <xeno/init.h>
+#include <xeno/errno.h>
+#include <xeno/sched.h>
+#include <xeno/interrupt.h>
+#include <xeno/irq.h>
+#include <xeno/slab.h>
+
+#include <asm/msr.h>
+#include <asm/hardirq.h>
+#include <asm/ptrace.h>
+#include <asm/atomic.h>
+#include <asm/io.h>
+#include <asm/smp.h>
+#include <asm/system.h>
+#include <asm/bitops.h>
+#include <asm/pgalloc.h>
+#include <xeno/delay.h>
+
+
+/*
+ * Linux has a controller-independent x86 interrupt architecture.
+ * every controller has a 'controller-template', that is used
+ * by the main code to do the right thing. Each driver-visible
+ * interrupt source is transparently wired to the apropriate
+ * controller. Thus drivers need not be aware of the
+ * interrupt-controller.
+ *
+ * Various interrupt controllers we handle: 8259 PIC, SMP IO-APIC,
+ * PIIX4's internal 8259 PIC and SGI's Visual Workstation Cobalt (IO-)APIC.
+ * (IO-APICs assumed to be messaging to Pentium local-APICs)
+ *
+ * the code is designed to be easily extended with new/different
+ * interrupt controllers, without having to do assembly magic.
+ */
+
+/*
+ * Controller mappings for all interrupt sources:
+ */
+irq_desc_t irq_desc[NR_IRQS] __cacheline_aligned =
+{ [0 ... NR_IRQS-1] = { 0, &no_irq_type, NULL, 0, SPIN_LOCK_UNLOCKED}};
+
+/*
+ * Special irq handlers.
+ */
+
+void no_action(int cpl, void *dev_id, struct pt_regs *regs) { }
+
+/*
+ * Generic no controller code
+ */
+
+static void enable_none(unsigned int irq) { }
+static unsigned int startup_none(unsigned int irq) { return 0; }
+static void disable_none(unsigned int irq) { }
+static void ack_none(unsigned int irq)
+{
+/*
+ * 'what should we do if we get a hw irq event on an illegal vector'.
+ * each architecture has to answer this themselves, it doesnt deserve
+ * a generic callback i think.
+ */
+#if CONFIG_X86
+    printk("unexpected IRQ trap at vector %02x\n", irq);
+#ifdef CONFIG_X86_LOCAL_APIC
+    /*
+        * Currently unexpected vectors happen only on SMP and APIC.
+        * We _must_ ack these because every local APIC has only N
+        * irq slots per priority level, and a 'hanging, unacked' IRQ
+        * holds up an irq slot - in excessive cases (when multiple
+        * unexpected vectors occur) that might lock up the APIC
+        * completely.
+        */
+    ack_APIC_irq();
+#endif
+#endif
+}
+
+/* startup is the same as "enable", shutdown is same as "disable" */
+#define shutdown_none  disable_none
+#define end_none       enable_none
+
+struct hw_interrupt_type no_irq_type = {
+    "none",
+    startup_none,
+    shutdown_none,
+    enable_none,
+    disable_none,
+    ack_none,
+    end_none
+};
+
+atomic_t irq_err_count;
+#ifdef CONFIG_X86_IO_APIC
+#ifdef APIC_MISMATCH_DEBUG
+atomic_t irq_mis_count;
+#endif
+#endif
+
+/*
+ * Generic, controller-independent functions:
+ */
+
+/*
+ * Global interrupt locks for SMP. Allow interrupts to come in on any
+ * CPU, yet make cli/sti act globally to protect critical regions..
+ */
+
+#ifdef CONFIG_SMP
+unsigned char global_irq_holder = 0xff;
+unsigned volatile long global_irq_lock; /* pendantic: long for set_bit --RR */
+       
+#define MAXCOUNT 100000000
+
+/*
+ * I had a lockup scenario where a tight loop doing
+ * spin_unlock()/spin_lock() on CPU#1 was racing with
+ * spin_lock() on CPU#0. CPU#0 should have noticed spin_unlock(), but
+ * apparently the spin_unlock() information did not make it
+ * through to CPU#0 ... nasty, is this by design, do we have to limit
+ * 'memory update oscillation frequency' artificially like here?
+ *
+ * Such 'high frequency update' races can be avoided by careful design, but
+ * some of our major constructs like spinlocks use similar techniques,
+ * it would be nice to clarify this issue. Set this define to 0 if you
+ * want to check whether your system freezes.  I suspect the delay done
+ * by SYNC_OTHER_CORES() is in correlation with 'snooping latency', but
+ * i thought that such things are guaranteed by design, since we use
+ * the 'LOCK' prefix.
+ */
+#define SUSPECTED_CPU_OR_CHIPSET_BUG_WORKAROUND 0
+
+#if SUSPECTED_CPU_OR_CHIPSET_BUG_WORKAROUND
+# define SYNC_OTHER_CORES(x) udelay(x+1)
+#else
+/*
+ * We have to allow irqs to arrive between __sti and __cli
+ */
+# define SYNC_OTHER_CORES(x) __asm__ __volatile__ ("nop")
+#endif
+
+static inline void wait_on_irq(int cpu)
+{
+    for (;;) {
+
+        /*
+         * Wait until all interrupts are gone. Wait
+         * for bottom half handlers unless we're
+         * already executing in one..
+         */
+        if (!irqs_running())
+            if (local_bh_count(cpu) || !spin_is_locked(&global_bh_lock))
+                break;
+
+        /* Duh, we have to loop. Release the lock to avoid deadlocks */
+        clear_bit(0,&global_irq_lock);
+
+        for (;;) {
+            __sti();
+            SYNC_OTHER_CORES(cpu);
+            __cli();
+            if (irqs_running())
+                continue;
+            if (global_irq_lock)
+                continue;
+            if (!local_bh_count(cpu) && spin_is_locked(&global_bh_lock))
+                continue;
+            if (!test_and_set_bit(0,&global_irq_lock))
+                break;
+        }
+    }
+}
+
+/*
+ * This is called when we want to synchronize with
+ * interrupts. We may for example tell a device to
+ * stop sending interrupts: but to make sure there
+ * are no interrupts that are executing on another
+ * CPU we need to call this function.
+ */
+void synchronize_irq(void)
+{
+    if (irqs_running()) {
+        /* Stupid approach */
+        cli();
+        sti();
+    }
+}
+
+static inline void get_irqlock(int cpu)
+{
+    if (test_and_set_bit(0,&global_irq_lock)) {
+        /* do we already hold the lock? */
+        if ((unsigned char) cpu == global_irq_holder)
+            return;
+        /* Uhhuh.. Somebody else got it. Wait.. */
+        do {
+            do {
+                rep_nop();
+            } while (test_bit(0,&global_irq_lock));
+        } while (test_and_set_bit(0,&global_irq_lock));                
+    }
+    /* 
+     * We also to make sure that nobody else is running
+     * in an interrupt context. 
+     */
+    wait_on_irq(cpu);
+
+    /*
+     * Ok, finally..
+     */
+    global_irq_holder = cpu;
+}
+
+#define EFLAGS_IF_SHIFT 9
+
+/*
+ * A global "cli()" while in an interrupt context
+ * turns into just a local cli(). Interrupts
+ * should use spinlocks for the (very unlikely)
+ * case that they ever want to protect against
+ * each other.
+ *
+ * If we already have local interrupts disabled,
+ * this will not turn a local disable into a
+ * global one (problems with spinlocks: this makes
+ * save_flags+cli+sti usable inside a spinlock).
+ */
+void __global_cli(void)
+{
+    unsigned int flags;
+
+    __save_flags(flags);
+    if (flags & (1 << EFLAGS_IF_SHIFT)) {
+        int cpu = smp_processor_id();
+        __cli();
+        if (!local_irq_count(cpu))
+            get_irqlock(cpu);
+    }
+}
+
+void __global_sti(void)
+{
+    int cpu = smp_processor_id();
+
+    if (!local_irq_count(cpu))
+        release_irqlock(cpu);
+    __sti();
+}
+
+/*
+ * SMP flags value to restore to:
+ * 0 - global cli
+ * 1 - global sti
+ * 2 - local cli
+ * 3 - local sti
+ */
+unsigned long __global_save_flags(void)
+{
+    int retval;
+    int local_enabled;
+    unsigned long flags;
+    int cpu = smp_processor_id();
+
+    __save_flags(flags);
+    local_enabled = (flags >> EFLAGS_IF_SHIFT) & 1;
+    /* default to local */
+    retval = 2 + local_enabled;
+
+    /* check for global flags if we're not in an interrupt */
+    if (!local_irq_count(cpu)) {
+        if (local_enabled)
+            retval = 1;
+        if (global_irq_holder == cpu)
+            retval = 0;
+    }
+    return retval;
+}
+
+void __global_restore_flags(unsigned long flags)
+{
+    switch (flags) {
+    case 0:
+        __global_cli();
+        break;
+    case 1:
+        __global_sti();
+        break;
+    case 2:
+        __cli();
+        break;
+    case 3:
+        __sti();
+        break;
+    default:
+        printk("global_restore_flags: %08lx (%08lx)\n",
+               flags, (&flags)[-1]);
+    }
+}
+
+#endif
+
+/*
+ * This should really return information about whether
+ * we should do bottom half handling etc. Right now we
+ * end up _always_ checking the bottom half, which is a
+ * waste of time and is not what some drivers would
+ * prefer.
+ */
+int handle_IRQ_event(unsigned int irq, struct pt_regs * regs, struct irqaction * action)
+{
+    int status;
+    int cpu = smp_processor_id();
+
+    irq_enter(cpu, irq);
+
+    status = 1;        /* Force the "do bottom halves" bit */
+
+    if (!(action->flags & SA_INTERRUPT))
+        __sti();
+
+    do {
+        status |= action->flags;
+        action->handler(irq, action->dev_id, regs);
+        action = action->next;
+    } while (action);
+
+    __cli();
+
+    irq_exit(cpu, irq);
+
+    return status;
+}
+
+/*
+ * Generic enable/disable code: this just calls
+ * down into the PIC-specific version for the actual
+ * hardware disable after having gotten the irq
+ * controller lock. 
+ */
+/**
+ *     disable_irq_nosync - disable an irq without waiting
+ *     @irq: Interrupt to disable
+ *
+ *     Disable the selected interrupt line.  Disables and Enables are
+ *     nested.
+ *     Unlike disable_irq(), this function does not ensure existing
+ *     instances of the IRQ handler have completed before returning.
+ *
+ *     This function may be called from IRQ context.
+ */
+inline void disable_irq_nosync(unsigned int irq)
+{
+    irq_desc_t *desc = irq_desc + irq;
+    unsigned long flags;
+
+    spin_lock_irqsave(&desc->lock, flags);
+    if (!desc->depth++) {
+        desc->status |= IRQ_DISABLED;
+        desc->handler->disable(irq);
+    }
+    spin_unlock_irqrestore(&desc->lock, flags);
+}
+
+/**
+ *     disable_irq - disable an irq and wait for completion
+ *     @irq: Interrupt to disable
+ *
+ *     Disable the selected interrupt line.  Enables and Disables are
+ *     nested.
+ *     This function waits for any pending IRQ handlers for this interrupt
+ *     to complete before returning. If you use this function while
+ *     holding a resource the IRQ handler may need you will deadlock.
+ *
+ *     This function may be called - with care - from IRQ context.
+ */
+void disable_irq(unsigned int irq)
+{
+    disable_irq_nosync(irq);
+
+    if (!local_irq_count(smp_processor_id())) {
+        do {
+            barrier();
+            cpu_relax();
+        } while (irq_desc[irq].status & IRQ_INPROGRESS);
+    }
+}
+
+/**
+ *     enable_irq - enable handling of an irq
+ *     @irq: Interrupt to enable
+ *
+ *     Undoes the effect of one call to disable_irq().  If this
+ *     matches the last disable, processing of interrupts on this
+ *     IRQ line is re-enabled.
+ *
+ *     This function may be called from IRQ context.
+ */
+void enable_irq(unsigned int irq)
+{
+    irq_desc_t *desc = irq_desc + irq;
+    unsigned long flags;
+
+    spin_lock_irqsave(&desc->lock, flags);
+    switch (desc->depth) {
+    case 1: {
+        unsigned int status = desc->status & ~IRQ_DISABLED;
+        desc->status = status;
+        if ((status & (IRQ_PENDING | IRQ_REPLAY)) == IRQ_PENDING) {
+            desc->status = status | IRQ_REPLAY;
+            hw_resend_irq(desc->handler,irq);
+        }
+        desc->handler->enable(irq);
+        /* fall-through */
+    }
+    default:
+        desc->depth--;
+        break;
+    case 0:
+        printk("enable_irq(%u) unbalanced from %p\n", irq,
+               __builtin_return_address(0));
+    }
+    spin_unlock_irqrestore(&desc->lock, flags);
+}
+
+/*
+ * do_IRQ handles all normal device IRQ's (the special
+ * SMP cross-CPU interrupts have their own specific
+ * handlers).
+ */
+asmlinkage unsigned int do_IRQ(struct pt_regs regs)
+{      
+    /* 
+     * We ack quickly, we don't want the irq controller
+     * thinking we're snobs just because some other CPU has
+     * disabled global interrupts (we have already done the
+     * INT_ACK cycles, it's too late to try to pretend to the
+     * controller that we aren't taking the interrupt).
+     *
+     * 0 return value means that this irq is already being
+     * handled by some other CPU. (or is disabled)
+     */
+    int irq = regs.orig_eax & 0xff; /* high bits used in ret_from_ code  */
+    int cpu = smp_processor_id();
+    irq_desc_t *desc = irq_desc + irq;
+    struct irqaction * action;
+    unsigned int status;
+
+    spin_lock(&desc->lock);
+    desc->handler->ack(irq);
+    /*
+      REPLAY is when Linux resends an IRQ that was dropped earlier
+      WAITING is used by probe to mark irqs that are being tested
+    */
+    status = desc->status & ~(IRQ_REPLAY | IRQ_WAITING);
+    status |= IRQ_PENDING; /* we _want_ to handle it */
+
+       /*
+        * If the IRQ is disabled for whatever reason, we cannot
+        * use the action we have.
+        */
+    action = NULL;
+    if (!(status & (IRQ_DISABLED | IRQ_INPROGRESS))) {
+        action = desc->action;
+        status &= ~IRQ_PENDING; /* we commit to handling */
+        status |= IRQ_INPROGRESS; /* we are handling it */
+    }
+    desc->status = status;
+
+       /*
+        * If there is no IRQ handler or it was disabled, exit early.
+          Since we set PENDING, if another processor is handling
+          a different instance of this same irq, the other processor
+          will take care of it.
+        */
+    if (!action)
+        goto out;
+
+       /*
+        * Edge triggered interrupts need to remember
+        * pending events.
+        * This applies to any hw interrupts that allow a second
+        * instance of the same irq to arrive while we are in do_IRQ
+        * or in the handler. But the code here only handles the _second_
+        * instance of the irq, not the third or fourth. So it is mostly
+        * useful for irq hardware that does not mask cleanly in an
+        * SMP environment.
+        */
+    for (;;) {
+        spin_unlock(&desc->lock);
+        handle_IRQ_event(irq, &regs, action);
+        spin_lock(&desc->lock);
+               
+        if (!(desc->status & IRQ_PENDING))
+            break;
+        desc->status &= ~IRQ_PENDING;
+    }
+    desc->status &= ~IRQ_INPROGRESS;
+ out:
+    /*
+        * The ->end() handler has to deal with interrupts which got
+        * disabled while the handler was running.
+        */
+    desc->handler->end(irq);
+    spin_unlock(&desc->lock);
+
+    if (softirq_pending(cpu))
+        do_softirq();
+
+    return 1;
+}
+
+/**
+ *     request_irq - allocate an interrupt line
+ *     @irq: Interrupt line to allocate
+ *     @handler: Function to be called when the IRQ occurs
+ *     @irqflags: Interrupt type flags
+ *     @devname: An ascii name for the claiming device
+ *     @dev_id: A cookie passed back to the handler function
+ *
+ *     This call allocates interrupt resources and enables the
+ *     interrupt line and IRQ handling. From the point this
+ *     call is made your handler function may be invoked. Since
+ *     your handler function must clear any interrupt the board 
+ *     raises, you must take care both to initialise your hardware
+ *     and to set up the interrupt handler in the right order.
+ *
+ *     Dev_id must be globally unique. Normally the address of the
+ *     device data structure is used as the cookie. Since the handler
+ *     receives this value it makes sense to use it.
+ *
+ *     If your interrupt is shared you must pass a non NULL dev_id
+ *     as this is required when freeing the interrupt.
+ *
+ *     Flags:
+ *
+ *     SA_SHIRQ                Interrupt is shared
+ *
+ *     SA_INTERRUPT            Disable local interrupts while processing
+ */
+int request_irq(unsigned int irq, 
+               void (*handler)(int, void *, struct pt_regs *),
+               unsigned long irqflags, 
+               const char * devname,
+               void *dev_id)
+{
+    int retval;
+    struct irqaction * action;
+
+    if (irq >= NR_IRQS)
+        return -EINVAL;
+    if (!handler)
+        return -EINVAL;
+
+    action = (struct irqaction *)
+        kmalloc(sizeof(struct irqaction), GFP_KERNEL);
+    if (!action)
+        return -ENOMEM;
+
+    action->handler = handler;
+    action->flags = irqflags;
+    action->mask = 0;
+    action->name = devname;
+    action->next = NULL;
+    action->dev_id = dev_id;
+
+    retval = setup_irq(irq, action);
+    if (retval)
+        kfree(action);
+
+    return retval;
+}
+
+/**
+ *     free_irq - free an interrupt
+ *     @irq: Interrupt line to free
+ *     @dev_id: Device identity to free
+ *
+ *     Remove an interrupt handler. The handler is removed and if the
+ *     interrupt line is no longer in use by any driver it is disabled.
+ *     On a shared IRQ the caller must ensure the interrupt is disabled
+ *     on the card it drives before calling this function. The function
+ *     does not return until any executing interrupts for this IRQ
+ *     have completed.
+ *
+ *     This function may be called from interrupt context. 
+ *
+ *     Bugs: Attempting to free an irq in a handler for the same irq hangs
+ *           the machine.
+ */
+void free_irq(unsigned int irq, void *dev_id)
+{
+    irq_desc_t *desc;
+    struct irqaction **p;
+    unsigned long flags;
+
+    if (irq >= NR_IRQS)
+        return;
+
+    desc = irq_desc + irq;
+    spin_lock_irqsave(&desc->lock,flags);
+    p = &desc->action;
+    for (;;) {
+        struct irqaction * action = *p;
+        if (action) {
+            struct irqaction **pp = p;
+            p = &action->next;
+            if (action->dev_id != dev_id)
+                continue;
+
+            /* Found it - now remove it from the list of entries */
+            *pp = action->next;
+            if (!desc->action) {
+                desc->status |= IRQ_DISABLED;
+                desc->handler->shutdown(irq);
+            }
+            spin_unlock_irqrestore(&desc->lock,flags);
+
+#ifdef CONFIG_SMP
+            /* Wait to make sure it's not being used on another CPU */
+            while (desc->status & IRQ_INPROGRESS) {
+                barrier();
+                cpu_relax();
+            }
+#endif
+            kfree(action);
+            return;
+        }
+        printk("Trying to free free IRQ%d\n",irq);
+        spin_unlock_irqrestore(&desc->lock,flags);
+        return;
+    }
+}
+
+/*
+ * IRQ autodetection code..
+ *
+ * This depends on the fact that any interrupt that
+ * comes in on to an unassigned handler will get stuck
+ * with "IRQ_WAITING" cleared and the interrupt
+ * disabled.
+ */
+
+static spinlock_t probe_sem = SPIN_LOCK_UNLOCKED;
+
+/**
+ *     probe_irq_on    - begin an interrupt autodetect
+ *
+ *     Commence probing for an interrupt. The interrupts are scanned
+ *     and a mask of potential interrupt lines is returned.
+ *
+ */
+unsigned long probe_irq_on(void)
+{
+    unsigned int i;
+    irq_desc_t *desc;
+    unsigned long val;
+    unsigned long s=0, e=0;
+
+    spin_lock(&probe_sem);
+    /* 
+     * something may have generated an irq long ago and we want to
+     * flush such a longstanding irq before considering it as spurious. 
+     */
+    for (i = NR_IRQS-1; i > 0; i--)  {
+        desc = irq_desc + i;
+
+        spin_lock_irq(&desc->lock);
+        if (!irq_desc[i].action) 
+            irq_desc[i].handler->startup(i);
+        spin_unlock_irq(&desc->lock);
+    }
+
+    /* Wait for longstanding interrupts to trigger (20ms delay). */
+    rdtscl(s);
+    do {
+        synchronize_irq();
+        rdtscl(e);
+    } while ( ((e-s)/ticks_per_usec) < 20000 );
+
+    /*
+     * enable any unassigned irqs
+     * (we must startup again here because if a longstanding irq
+     * happened in the previous stage, it may have masked itself)
+     */
+    for (i = NR_IRQS-1; i > 0; i--) {
+        desc = irq_desc + i;
+
+        spin_lock_irq(&desc->lock);
+        if (!desc->action) {
+            desc->status |= IRQ_AUTODETECT | IRQ_WAITING;
+            if (desc->handler->startup(i))
+                desc->status |= IRQ_PENDING;
+        }
+        spin_unlock_irq(&desc->lock);
+    }
+
+    /*
+     * Wait for spurious interrupts to trigger (100ms delay). 
+     */
+    rdtscl(s);
+    do {
+        synchronize_irq();
+        rdtscl(e);
+    } while ( ((e-s)/ticks_per_usec) < 100000 );
+
+    /*
+     * Now filter out any obviously spurious interrupts
+     */
+    val = 0;
+    for (i = 0; i < NR_IRQS; i++) {
+        irq_desc_t *desc = irq_desc + i;
+        unsigned int status;
+
+        spin_lock_irq(&desc->lock);
+        status = desc->status;
+
+        if (status & IRQ_AUTODETECT) {
+            /* It triggered already - consider it spurious. */
+            if (!(status & IRQ_WAITING)) {
+                desc->status = status & ~IRQ_AUTODETECT;
+                desc->handler->shutdown(i);
+            } else
+                if (i < 32)
+                    val |= 1 << i;
+        }
+        spin_unlock_irq(&desc->lock);
+    }
+
+    return val;
+}
+
+/*
+ * Return a mask of triggered interrupts (this
+ * can handle only legacy ISA interrupts).
+ */
+/**
+ *     probe_irq_mask - scan a bitmap of interrupt lines
+ *     @val:   mask of interrupts to consider
+ *
+ *     Scan the ISA bus interrupt lines and return a bitmap of
+ *     active interrupts. The interrupt probe logic state is then
+ *     returned to its previous value.
+ *
+ *     Note: we need to scan all the irq's even though we will
+ *     only return ISA irq numbers - just so that we reset them
+ *     all to a known state.
+ */
+unsigned int probe_irq_mask(unsigned long val)
+{
+    int i;
+    unsigned int mask;
+
+    mask = 0;
+    for (i = 0; i < NR_IRQS; i++) {
+        irq_desc_t *desc = irq_desc + i;
+        unsigned int status;
+
+        spin_lock_irq(&desc->lock);
+        status = desc->status;
+
+        if (status & IRQ_AUTODETECT) {
+            if (i < 16 && !(status & IRQ_WAITING))
+                mask |= 1 << i;
+
+            desc->status = status & ~IRQ_AUTODETECT;
+            desc->handler->shutdown(i);
+        }
+        spin_unlock_irq(&desc->lock);
+    }
+    spin_unlock(&probe_sem);
+
+    return mask & val;
+}
+
+/*
+ * Return the one interrupt that triggered (this can
+ * handle any interrupt source).
+ */
+
+/**
+ *     probe_irq_off   - end an interrupt autodetect
+ *     @val: mask of potential interrupts (unused)
+ *
+ *     Scans the unused interrupt lines and returns the line which
+ *     appears to have triggered the interrupt. If no interrupt was
+ *     found then zero is returned. If more than one interrupt is
+ *     found then minus the first candidate is returned to indicate
+ *     their is doubt.
+ *
+ *     The interrupt probe logic state is returned to its previous
+ *     value.
+ *
+ *     BUGS: When used in a module (which arguably shouldnt happen)
+ *     nothing prevents two IRQ probe callers from overlapping. The
+ *     results of this are non-optimal.
+ */
+int probe_irq_off(unsigned long val)
+{
+    int i, irq_found, nr_irqs;
+
+    nr_irqs = 0;
+    irq_found = 0;
+    for (i = 0; i < NR_IRQS; i++) {
+        irq_desc_t *desc = irq_desc + i;
+        unsigned int status;
+
+        spin_lock_irq(&desc->lock);
+        status = desc->status;
+
+        if (status & IRQ_AUTODETECT) {
+            if (!(status & IRQ_WAITING)) {
+                if (!nr_irqs)
+                    irq_found = i;
+                nr_irqs++;
+            }
+            desc->status = status & ~IRQ_AUTODETECT;
+            desc->handler->shutdown(i);
+        }
+        spin_unlock_irq(&desc->lock);
+    }
+    spin_unlock(&probe_sem);
+
+    if (nr_irqs > 1)
+        irq_found = -irq_found;
+    return irq_found;
+}
+
+/* this was setup_x86_irq but it seems pretty generic */
+int setup_irq(unsigned int irq, struct irqaction * new)
+{
+    int shared = 0;
+    unsigned long flags;
+    struct irqaction *old, **p;
+    irq_desc_t *desc = irq_desc + irq;
+
+    /*
+     * The following block of code has to be executed atomically
+     */
+    spin_lock_irqsave(&desc->lock,flags);
+    p = &desc->action;
+    if ((old = *p) != NULL) {
+        /* Can't share interrupts unless both agree to */
+        if (!(old->flags & new->flags & SA_SHIRQ)) {
+            spin_unlock_irqrestore(&desc->lock,flags);
+            return -EBUSY;
+        }
+
+        /* add new interrupt at end of irq queue */
+        do {
+            p = &old->next;
+            old = *p;
+        } while (old);
+        shared = 1;
+    }
+
+    *p = new;
+
+    if (!shared) {
+        desc->depth = 0;
+        desc->status &= ~(IRQ_DISABLED | IRQ_AUTODETECT | IRQ_WAITING);
+        desc->handler->startup(irq);
+    }
+    spin_unlock_irqrestore(&desc->lock,flags);
+
+    return 0;
+}
diff --git a/xen-2.4.16/arch/i386/mm.c b/xen-2.4.16/arch/i386/mm.c
new file mode 100644 (file)
index 0000000..967816b
--- /dev/null
@@ -0,0 +1,96 @@
+#include <xeno/config.h>
+#include <xeno/lib.h>
+#include <xeno/init.h>
+#include <xeno/mm.h>
+#include <asm/page.h>
+#include <asm/pgalloc.h>
+#include <asm/fixmap.h>
+
+static inline void set_pte_phys (unsigned long vaddr,
+                                 l1_pgentry_t entry)
+{
+    l2_pgentry_t *l2ent;
+    l1_pgentry_t *l1ent;
+
+    l2ent = idle0_pg_table + l2_table_offset(vaddr);
+    l1ent = l2_pgentry_to_l1(*l2ent) + l1_table_offset(vaddr);
+    *l1ent = entry;
+
+    /* It's enough to flush this one mapping. */
+    __flush_tlb_one(vaddr);
+}
+
+void __set_fixmap (enum fixed_addresses idx, 
+                   l1_pgentry_t entry)
+{
+    unsigned long address = __fix_to_virt(idx);
+
+    if (idx >= __end_of_fixed_addresses) {
+        printk("Invalid __set_fixmap\n");
+        return;
+    }
+    set_pte_phys(address, entry);
+}
+
+static void __init fixrange_init (unsigned long start, 
+                                  unsigned long end, l2_pgentry_t *pg_base)
+{
+    l2_pgentry_t *l2e;
+    int i;
+    unsigned long vaddr, page;
+
+    vaddr = start;
+    i = l2_table_offset(vaddr);
+    l2e = pg_base + i;
+
+    for ( ; (i < ENTRIES_PER_L2_PAGETABLE) && (vaddr != end); l2e++, i++ ) 
+    {
+        if ( !l2_pgentry_empty(*l2e) ) continue;
+        page = (unsigned long)get_free_page(GFP_KERNEL);
+        clear_page(page);
+        *l2e = mk_l2_pgentry(__pa(page) | PAGE_HYPERVISOR);
+        vaddr += 1 << L2_PAGETABLE_SHIFT;
+    }
+}
+
+void __init paging_init(void)
+{
+    unsigned long addr;
+
+    /* XXX initialised in boot.S */
+    /*if ( cpu_has_pge ) set_in_cr4(X86_CR4_PGE);*/
+    /*if ( cpu_has_pse ) set_in_cr4(X86_CR4_PSE);*/
+    /*if ( cpu_has_pae ) set_in_cr4(X86_CR4_PAE);*/
+
+    /*
+     * Fixed mappings, only the page table structure has to be
+     * created - mappings will be set by set_fixmap():
+     */
+    addr = FIXADDR_START & ~((1<<L2_PAGETABLE_SHIFT)-1);
+    fixrange_init(addr, 0, idle0_pg_table);
+}
+
+void __init zap_low_mappings (void)
+{
+    int i;
+    for (i = 0; i < DOMAIN_ENTRIES_PER_L2_PAGETABLE; i++ )
+        idle0_pg_table[i] = mk_l2_pgentry(0);
+    flush_tlb_all();
+}
+
+
+long do_set_guest_stack(unsigned long ss, unsigned long esp)
+{
+    int nr = smp_processor_id();
+    struct tss_struct *t = &init_tss[nr];
+
+    if ( (ss == __HYPERVISOR_CS) || (ss == __HYPERVISOR_DS) )
+        return -1;
+
+    current->thread.ss1  = ss;
+    current->thread.esp1 = esp;
+    t->ss1  = ss;
+    t->esp1 = esp;
+
+    return 0;
+}
diff --git a/xen-2.4.16/arch/i386/mpparse.c b/xen-2.4.16/arch/i386/mpparse.c
new file mode 100644 (file)
index 0000000..c5cf58a
--- /dev/null
@@ -0,0 +1,630 @@
+/*
+ *     Intel Multiprocessor Specificiation 1.1 and 1.4
+ *     compliant MP-table parsing routines.
+ *
+ *     (c) 1995 Alan Cox, Building #3 <alan@redhat.com>
+ *     (c) 1998, 1999, 2000 Ingo Molnar <mingo@redhat.com>
+ *
+ *     Fixes
+ *             Erich Boleyn    :       MP v1.4 and additional changes.
+ *             Alan Cox        :       Added EBDA scanning
+ *             Ingo Molnar     :       various cleanups and rewrites
+ *     Maciej W. Rozycki       :       Bits for default MP configurations
+ */
+
+#include <xeno/config.h>
+#include <xeno/init.h>
+#include <xeno/lib.h>
+#include <asm/io.h>
+#include <xeno/irq.h>
+#include <xeno/smp.h>
+#include <asm/mpspec.h>
+#include <asm/pgalloc.h>
+
+/* Have we found an MP table */
+int smp_found_config;
+
+/*
+ * Various Linux-internal data structures created from the
+ * MP-table.
+ */
+int apic_version [MAX_APICS];
+int mp_bus_id_to_type [MAX_MP_BUSSES];
+int mp_bus_id_to_node [MAX_MP_BUSSES];
+int mp_bus_id_to_pci_bus [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 };
+int mp_current_pci_id;
+
+/* I/O APIC entries */
+struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS];
+
+/* # of MP IRQ source entries */
+struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
+
+/* MP IRQ source entries */
+int mp_irq_entries;
+
+int nr_ioapics;
+
+int pic_mode;
+unsigned long mp_lapic_addr;
+
+/* Processor that is doing the boot up */
+unsigned int boot_cpu_physical_apicid = -1U;
+unsigned int boot_cpu_logical_apicid = -1U;
+/* Internal processor count */
+static unsigned int num_processors;
+
+/* Bitmask of physically existing CPUs */
+unsigned long phys_cpu_present_map;
+
+/*
+ * Intel MP BIOS table parsing routines:
+ */
+
+/*
+ * Checksum an MP configuration block.
+ */
+
+static int __init mpf_checksum(unsigned char *mp, int len)
+{
+    int sum = 0;
+
+    while (len--)
+        sum += *mp++;
+
+    return sum & 0xFF;
+}
+
+/*
+ * Processor encoding in an MP configuration block
+ */
+
+static char __init *mpc_family(int family,int model)
+{
+    static char n[32];
+    static char *model_defs[]=
+    {
+        "80486DX","80486DX",
+        "80486SX","80486DX/2 or 80487",
+        "80486SL","80486SX/2",
+        "Unknown","80486DX/2-WB",
+        "80486DX/4","80486DX/4-WB"
+    };
+
+    switch (family) {
+    case 0x04:
+        if (model < 10)
+            return model_defs[model];
+        break;
+
+    case 0x05:
+        return("Pentium(tm)");
+
+    case 0x06:
+        return("Pentium(tm) Pro");
+
+    case 0x0F:
+        if (model == 0x00)
+            return("Pentium 4(tm)");
+        if (model == 0x0F)
+            return("Special controller");
+    }
+    sprintf(n,"Unknown CPU [%d:%d]",family, model);
+    return n;
+}
+
+/* 
+ * Have to match translation table entries to main table entries by counter
+ * hence the mpc_record variable .... can't see a less disgusting way of
+ * doing this ....
+ */
+
+static int mpc_record; 
+
+void __init MP_processor_info (struct mpc_config_processor *m)
+{
+    int ver, logical_apicid;
+       
+    if (!(m->mpc_cpuflag & CPU_ENABLED))
+        return;
+
+    logical_apicid = m->mpc_apicid;
+    printk("Processor #%d %s APIC version %d\n",
+           m->mpc_apicid,
+           mpc_family((m->mpc_cpufeature & CPU_FAMILY_MASK)>>8 ,
+                      (m->mpc_cpufeature & CPU_MODEL_MASK)>>4),
+           m->mpc_apicver);
+
+    if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) {
+        Dprintk("    Bootup CPU\n");
+        boot_cpu_physical_apicid = m->mpc_apicid;
+        boot_cpu_logical_apicid = logical_apicid;
+    }
+
+    num_processors++;
+
+    if (m->mpc_apicid > MAX_APICS) {
+        printk("Processor #%d INVALID. (Max ID: %d).\n",
+               m->mpc_apicid, MAX_APICS);
+        return;
+    }
+    ver = m->mpc_apicver;
+
+    phys_cpu_present_map |= 1 << m->mpc_apicid;
+
+    /*
+     * Validate version
+     */
+    if (ver == 0x0) {
+        printk("BIOS bug, APIC version is 0 for CPU#%d! fixing up to 0x10. (tell your hw vendor)\n", m->mpc_apicid);
+        ver = 0x10;
+    }
+    apic_version[m->mpc_apicid] = ver;
+}
+
+static void __init MP_bus_info (struct mpc_config_bus *m)
+{
+    char str[7];
+
+    memcpy(str, m->mpc_bustype, 6);
+    str[6] = 0;
+       
+    Dprintk("Bus #%d is %s\n", m->mpc_busid, str);
+
+    if (strncmp(str, BUSTYPE_ISA, sizeof(BUSTYPE_ISA)-1) == 0) {
+        mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA;
+    } else if (strncmp(str, BUSTYPE_EISA, sizeof(BUSTYPE_EISA)-1) == 0) {
+        mp_bus_id_to_type[m->mpc_busid] = MP_BUS_EISA;
+    } else if (strncmp(str, BUSTYPE_PCI, sizeof(BUSTYPE_PCI)-1) == 0) {
+        mp_bus_id_to_type[m->mpc_busid] = MP_BUS_PCI;
+        mp_bus_id_to_pci_bus[m->mpc_busid] = mp_current_pci_id;
+        mp_current_pci_id++;
+    } else if (strncmp(str, BUSTYPE_MCA, sizeof(BUSTYPE_MCA)-1) == 0) {
+        mp_bus_id_to_type[m->mpc_busid] = MP_BUS_MCA;
+    } else {
+        printk("Unknown bustype %s - ignoring\n", str);
+    }
+}
+
+static void __init MP_ioapic_info (struct mpc_config_ioapic *m)
+{
+    if (!(m->mpc_flags & MPC_APIC_USABLE))
+        return;
+
+    printk("I/O APIC #%d Version %d at 0x%lX.\n",
+           m->mpc_apicid, m->mpc_apicver, m->mpc_apicaddr);
+    if (nr_ioapics >= MAX_IO_APICS) {
+        printk("Max # of I/O APICs (%d) exceeded (found %d).\n",
+               MAX_IO_APICS, nr_ioapics);
+        panic("Recompile kernel with bigger MAX_IO_APICS!.\n");
+    }
+    if (!m->mpc_apicaddr) {
+        printk("WARNING: bogus zero I/O APIC address"
+               " found in MP table, skipping!\n");
+        return;
+    }
+    mp_ioapics[nr_ioapics] = *m;
+    nr_ioapics++;
+}
+
+static void __init MP_intsrc_info (struct mpc_config_intsrc *m)
+{
+    mp_irqs [mp_irq_entries] = *m;
+    Dprintk("Int: type %d, pol %d, trig %d, bus %d,"
+            " IRQ %02x, APIC ID %x, APIC INT %02x\n",
+            m->mpc_irqtype, m->mpc_irqflag & 3,
+            (m->mpc_irqflag >> 2) & 3, m->mpc_srcbus,
+            m->mpc_srcbusirq, m->mpc_dstapic, m->mpc_dstirq);
+    if (++mp_irq_entries == MAX_IRQ_SOURCES)
+        panic("Max # of irq sources exceeded!!\n");
+}
+
+static void __init MP_lintsrc_info (struct mpc_config_lintsrc *m)
+{
+    Dprintk("Lint: type %d, pol %d, trig %d, bus %d,"
+            " IRQ %02x, APIC ID %x, APIC LINT %02x\n",
+            m->mpc_irqtype, m->mpc_irqflag & 3,
+            (m->mpc_irqflag >> 2) &3, m->mpc_srcbusid,
+            m->mpc_srcbusirq, m->mpc_destapic, m->mpc_destapiclint);
+    /*
+     * Well it seems all SMP boards in existence
+     * use ExtINT/LVT1 == LINT0 and
+     * NMI/LVT2 == LINT1 - the following check
+     * will show us if this assumptions is false.
+     * Until then we do not have to add baggage.
+     */
+    if ((m->mpc_irqtype == mp_ExtINT) &&
+        (m->mpc_destapiclint != 0))
+        BUG();
+    if ((m->mpc_irqtype == mp_NMI) &&
+        (m->mpc_destapiclint != 1))
+        BUG();
+}
+
+
+/*
+ * Read/parse the MPC
+ */
+
+static int __init smp_read_mpc(struct mp_config_table *mpc)
+{
+    char str[16];
+    int count=sizeof(*mpc);
+    unsigned char *mpt=((unsigned char *)mpc)+count;
+
+    if (memcmp(mpc->mpc_signature,MPC_SIGNATURE,4)) {
+        panic("SMP mptable: bad signature [%c%c%c%c]!\n",
+              mpc->mpc_signature[0],
+              mpc->mpc_signature[1],
+              mpc->mpc_signature[2],
+              mpc->mpc_signature[3]);
+        return 0;
+    }
+    if (mpf_checksum((unsigned char *)mpc,mpc->mpc_length)) {
+        panic("SMP mptable: checksum error!\n");
+        return 0;
+    }
+    if (mpc->mpc_spec!=0x01 && mpc->mpc_spec!=0x04) {
+        printk("SMP mptable: bad table version (%d)!!\n",
+               mpc->mpc_spec);
+        return 0;
+    }
+    if (!mpc->mpc_lapic) {
+        printk("SMP mptable: null local APIC address!\n");
+        return 0;
+    }
+    memcpy(str,mpc->mpc_oem,8);
+    str[8]=0;
+    printk("OEM ID: %s ",str);
+
+    memcpy(str,mpc->mpc_productid,12);
+    str[12]=0;
+    printk("Product ID: %s ",str);
+
+    printk("APIC at: 0x%lX\n", mpc->mpc_lapic);
+
+    /* save the local APIC address, it might be non-default. */
+    mp_lapic_addr = mpc->mpc_lapic;
+
+    /*
+     * Now process the configuration blocks.
+     */
+    while (count < mpc->mpc_length) {
+        switch(*mpt) {
+        case MP_PROCESSOR:
+        {
+            struct mpc_config_processor *m=
+                (struct mpc_config_processor *)mpt;
+
+            MP_processor_info(m);
+            mpt += sizeof(*m);
+            count += sizeof(*m);
+            break;
+        }
+        case MP_BUS:
+        {
+            struct mpc_config_bus *m=
+                (struct mpc_config_bus *)mpt;
+            MP_bus_info(m);
+            mpt += sizeof(*m);
+            count += sizeof(*m);
+            break;
+        }
+        case MP_IOAPIC:
+        {
+            struct mpc_config_ioapic *m=
+                (struct mpc_config_ioapic *)mpt;
+            MP_ioapic_info(m);
+            mpt+=sizeof(*m);
+            count+=sizeof(*m);
+            break;
+        }
+        case MP_INTSRC:
+        {
+            struct mpc_config_intsrc *m=
+                (struct mpc_config_intsrc *)mpt;
+
+            MP_intsrc_info(m);
+            mpt+=sizeof(*m);
+            count+=sizeof(*m);
+            break;
+        }
+        case MP_LINTSRC:
+        {
+            struct mpc_config_lintsrc *m=
+                (struct mpc_config_lintsrc *)mpt;
+            MP_lintsrc_info(m);
+            mpt+=sizeof(*m);
+            count+=sizeof(*m);
+            break;
+        }
+        default:
+        {
+            count = mpc->mpc_length;
+            break;
+        }
+        }
+        ++mpc_record;
+    }
+
+    if (!num_processors)
+        printk("SMP mptable: no processors registered!\n");
+    return num_processors;
+}
+
+static int __init ELCR_trigger(unsigned int irq)
+{
+    unsigned int port;
+
+    port = 0x4d0 + (irq >> 3);
+    return (inb(port) >> (irq & 7)) & 1;
+}
+
+static void __init construct_default_ioirq_mptable(int mpc_default_type)
+{
+    struct mpc_config_intsrc intsrc;
+    int i;
+    int ELCR_fallback = 0;
+
+    intsrc.mpc_type = MP_INTSRC;
+    intsrc.mpc_irqflag = 0;                    /* conforming */
+    intsrc.mpc_srcbus = 0;
+    intsrc.mpc_dstapic = mp_ioapics[0].mpc_apicid;
+
+    intsrc.mpc_irqtype = mp_INT;
+
+    /*
+     *  If true, we have an ISA/PCI system with no IRQ entries
+     *  in the MP table. To prevent the PCI interrupts from being set up
+     *  incorrectly, we try to use the ELCR. The sanity check to see if
+     *  there is good ELCR data is very simple - IRQ0, 1, 2 and 13 can
+     *  never be level sensitive, so we simply see if the ELCR agrees.
+     *  If it does, we assume it's valid.
+     */
+    if (mpc_default_type == 5) {
+        printk("ISA/PCI bus type with no IRQ information... falling back to ELCR\n");
+
+        if (ELCR_trigger(0) || ELCR_trigger(1) || ELCR_trigger(2) || ELCR_trigger(13))
+            printk("ELCR contains invalid data... not using ELCR\n");
+        else {
+            printk("Using ELCR to identify PCI interrupts\n");
+            ELCR_fallback = 1;
+        }
+    }
+
+    for (i = 0; i < 16; i++) {
+        switch (mpc_default_type) {
+        case 2:
+            if (i == 0 || i == 13)
+                continue;      /* IRQ0 & IRQ13 not connected */
+            /* fall through */
+        default:
+            if (i == 2)
+                continue;      /* IRQ2 is never connected */
+        }
+
+        if (ELCR_fallback) {
+            /*
+             *  If the ELCR indicates a level-sensitive interrupt, we
+             *  copy that information over to the MP table in the
+             *  irqflag field (level sensitive, active high polarity).
+             */
+            if (ELCR_trigger(i))
+                intsrc.mpc_irqflag = 13;
+            else
+                intsrc.mpc_irqflag = 0;
+        }
+
+        intsrc.mpc_srcbusirq = i;
+        intsrc.mpc_dstirq = i ? i : 2;         /* IRQ0 to INTIN2 */
+        MP_intsrc_info(&intsrc);
+    }
+
+    intsrc.mpc_irqtype = mp_ExtINT;
+    intsrc.mpc_srcbusirq = 0;
+    intsrc.mpc_dstirq = 0;                             /* 8259A to INTIN0 */
+    MP_intsrc_info(&intsrc);
+}
+
+static inline void __init construct_default_ISA_mptable(int mpc_default_type)
+{
+    struct mpc_config_processor processor;
+    struct mpc_config_bus bus;
+    struct mpc_config_ioapic ioapic;
+    struct mpc_config_lintsrc lintsrc;
+    int linttypes[2] = { mp_ExtINT, mp_NMI };
+    int i;
+
+    /*
+     * local APIC has default address
+     */
+    mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
+
+    /*
+     * 2 CPUs, numbered 0 & 1.
+     */
+    processor.mpc_type = MP_PROCESSOR;
+    /* Either an integrated APIC or a discrete 82489DX. */
+    processor.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01;
+    processor.mpc_cpuflag = CPU_ENABLED;
+    processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) |
+        (boot_cpu_data.x86_model << 4) |
+        boot_cpu_data.x86_mask;
+    processor.mpc_featureflag = boot_cpu_data.x86_capability[0];
+    processor.mpc_reserved[0] = 0;
+    processor.mpc_reserved[1] = 0;
+    for (i = 0; i < 2; i++) {
+        processor.mpc_apicid = i;
+        MP_processor_info(&processor);
+    }
+
+    bus.mpc_type = MP_BUS;
+    bus.mpc_busid = 0;
+    switch (mpc_default_type) {
+    default:
+        printk("???\nUnknown standard configuration %d\n",
+               mpc_default_type);
+        /* fall through */
+    case 1:
+    case 5:
+        memcpy(bus.mpc_bustype, "ISA   ", 6);
+        break;
+    case 2:
+    case 6:
+    case 3:
+        memcpy(bus.mpc_bustype, "EISA  ", 6);
+        break;
+    case 4:
+    case 7:
+        memcpy(bus.mpc_bustype, "MCA   ", 6);
+    }
+    MP_bus_info(&bus);
+    if (mpc_default_type > 4) {
+        bus.mpc_busid = 1;
+        memcpy(bus.mpc_bustype, "PCI   ", 6);
+        MP_bus_info(&bus);
+    }
+
+    ioapic.mpc_type = MP_IOAPIC;
+    ioapic.mpc_apicid = 2;
+    ioapic.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01;
+    ioapic.mpc_flags = MPC_APIC_USABLE;
+    ioapic.mpc_apicaddr = 0xFEC00000;
+    MP_ioapic_info(&ioapic);
+
+    /*
+     * We set up most of the low 16 IO-APIC pins according to MPS rules.
+     */
+    construct_default_ioirq_mptable(mpc_default_type);
+
+    lintsrc.mpc_type = MP_LINTSRC;
+    lintsrc.mpc_irqflag = 0;           /* conforming */
+    lintsrc.mpc_srcbusid = 0;
+    lintsrc.mpc_srcbusirq = 0;
+    lintsrc.mpc_destapic = MP_APIC_ALL;
+    for (i = 0; i < 2; i++) {
+        lintsrc.mpc_irqtype = linttypes[i];
+        lintsrc.mpc_destapiclint = i;
+        MP_lintsrc_info(&lintsrc);
+    }
+}
+
+static struct intel_mp_floating *mpf_found;
+
+/*
+ * Scan the memory blocks for an SMP configuration block.
+ */
+void __init get_smp_config (void)
+{
+    struct intel_mp_floating *mpf = mpf_found;
+       
+    printk("Intel MultiProcessor Specification v1.%d\n", mpf->mpf_specification);
+    if (mpf->mpf_feature2 & (1<<7)) {
+        printk("    IMCR and PIC compatibility mode.\n");
+        pic_mode = 1;
+    } else {
+        printk("    Virtual Wire compatibility mode.\n");
+        pic_mode = 0;
+    }
+
+    /*
+     * Now see if we need to read further.
+     */
+    if (mpf->mpf_feature1 != 0) {
+
+        printk("Default MP configuration #%d\n", mpf->mpf_feature1);
+        construct_default_ISA_mptable(mpf->mpf_feature1);
+
+    } else if (mpf->mpf_physptr) {
+
+        /*
+         * Read the physical hardware table.  Anything here will
+         * override the defaults.
+         */
+        if (!smp_read_mpc((void *)mpf->mpf_physptr)) {
+            smp_found_config = 0;
+            printk("BIOS bug, MP table errors detected!...\n");
+            printk("... disabling SMP support. (tell your hw vendor)\n");
+            return;
+        }
+        /*
+         * If there are no explicit MP IRQ entries, then we are
+         * broken.  We set up most of the low 16 IO-APIC pins to
+         * ISA defaults and hope it will work.
+         */
+        if (!mp_irq_entries) {
+            struct mpc_config_bus bus;
+
+            printk("BIOS bug, no explicit IRQ entries, using default mptable. (tell your hw vendor)\n");
+
+            bus.mpc_type = MP_BUS;
+            bus.mpc_busid = 0;
+            memcpy(bus.mpc_bustype, "ISA   ", 6);
+            MP_bus_info(&bus);
+
+            construct_default_ioirq_mptable(0);
+        }
+
+    } else
+        BUG();
+
+    printk("Processors: %d\n", num_processors);
+    /*
+     * Only use the first configuration found.
+     */
+}
+
+static int __init smp_scan_config (unsigned long base, unsigned long length)
+{
+    unsigned long *bp = phys_to_virt(base);
+    struct intel_mp_floating *mpf;
+
+    Dprintk("Scan SMP from %p for %ld bytes.\n", bp,length);
+    if (sizeof(*mpf) != 16)
+        printk("Error: MPF size\n");
+
+    while (length > 0) {
+        mpf = (struct intel_mp_floating *)bp;
+        if ((*bp == SMP_MAGIC_IDENT) &&
+            (mpf->mpf_length == 1) &&
+            !mpf_checksum((unsigned char *)bp, 16) &&
+            ((mpf->mpf_specification == 1)
+             || (mpf->mpf_specification == 4)) ) {
+
+            smp_found_config = 1;
+            printk("found SMP MP-table at %08lx\n",
+                   virt_to_phys(mpf));
+            reserve_bootmem(virt_to_phys(mpf), PAGE_SIZE);
+            if (mpf->mpf_physptr)
+                reserve_bootmem(mpf->mpf_physptr, PAGE_SIZE);
+            mpf_found = mpf;
+            return 1;
+        }
+        bp += 4;
+        length -= 16;
+    }
+    return 0;
+}
+
+void __init find_intel_smp (void)
+{
+    /*
+     * 1) Scan the bottom 1K for a signature
+     * 2) Scan the top 1K of base RAM
+     * 3) Scan the 64K of bios
+     */
+    if (smp_scan_config(0x0,0x400) ||
+        smp_scan_config(639*0x400,0x400) ||
+        smp_scan_config(0xF0000,0x10000))
+        return;
+}
+
+/*
+ * - Intel MP Configuration Table
+ * - or SGI Visual Workstation configuration
+ */
+void __init find_smp_config (void)
+{
+    find_intel_smp();
+}
+
diff --git a/xen-2.4.16/arch/i386/pci-dma.c b/xen-2.4.16/arch/i386/pci-dma.c
new file mode 100644 (file)
index 0000000..7cf3a4e
--- /dev/null
@@ -0,0 +1,37 @@
+/*
+ * Dynamic DMA mapping support.
+ *
+ * On i386 there is no hardware dynamic DMA address translation,
+ * so consistent alloc/free are merely page allocation/freeing.
+ * The rest of the dynamic DMA mapping interface is implemented
+ * in asm/pci.h.
+ */
+
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/lib.h>
+#include <linux/pci.h>
+#include <asm/io.h>
+
+void *pci_alloc_consistent(struct pci_dev *hwdev, size_t size,
+                          dma_addr_t *dma_handle)
+{
+       void *ret;
+       int gfp = GFP_ATOMIC;
+
+       if (hwdev == NULL || hwdev->dma_mask != 0xffffffff)
+               gfp |= GFP_DMA;
+       ret = (void *)__get_free_pages(gfp, get_order(size));
+
+       if (ret != NULL) {
+               memset(ret, 0, size);
+               *dma_handle = virt_to_bus(ret);
+       }
+       return ret;
+}
+
+void pci_free_consistent(struct pci_dev *hwdev, size_t size,
+                        void *vaddr, dma_addr_t dma_handle)
+{
+       free_pages((unsigned long)vaddr, get_order(size));
+}
diff --git a/xen-2.4.16/arch/i386/pci-i386.c b/xen-2.4.16/arch/i386/pci-i386.c
new file mode 100644 (file)
index 0000000..f2e9c43
--- /dev/null
@@ -0,0 +1,354 @@
+/*
+ *     Low-Level PCI Access for i386 machines
+ *
+ * Copyright 1993, 1994 Drew Eckhardt
+ *      Visionary Computing
+ *      (Unix and Linux consulting and custom programming)
+ *      Drew@Colorado.EDU
+ *      +1 (303) 786-7975
+ *
+ * Drew's work was sponsored by:
+ *     iX Multiuser Multitasking Magazine
+ *     Hannover, Germany
+ *     hm@ix.de
+ *
+ * Copyright 1997--2000 Martin Mares <mj@ucw.cz>
+ *
+ * For more information, please consult the following manuals (look at
+ * http://www.pcisig.com/ for how to get them):
+ *
+ * PCI BIOS Specification
+ * PCI Local Bus Specification
+ * PCI to PCI Bridge Specification
+ * PCI System Design Guide
+ *
+ *
+ * CHANGELOG :
+ * Jun 17, 1994 : Modified to accommodate the broken pre-PCI BIOS SPECIFICATION
+ *     Revision 2.0 present on <thys@dennis.ee.up.ac.za>'s ASUS mainboard.
+ *
+ * Jan 5,  1995 : Modified to probe PCI hardware at boot time by Frederic
+ *     Potter, potter@cao-vlsi.ibp.fr
+ *
+ * Jan 10, 1995 : Modified to store the information about configured pci
+ *      devices into a list, which can be accessed via /proc/pci by
+ *      Curtis Varner, cvarner@cs.ucr.edu
+ *
+ * Jan 12, 1995 : CPU-PCI bridge optimization support by Frederic Potter.
+ *     Alpha version. Intel & UMC chipset support only.
+ *
+ * Apr 16, 1995 : Source merge with the DEC Alpha PCI support. Most of the code
+ *     moved to drivers/pci/pci.c.
+ *
+ * Dec 7, 1996  : Added support for direct configuration access of boards
+ *      with Intel compatible access schemes (tsbogend@alpha.franken.de)
+ *
+ * Feb 3, 1997  : Set internal functions to static, save/restore flags
+ *     avoid dead locks reading broken PCI BIOS, werner@suse.de 
+ *
+ * Apr 26, 1997 : Fixed case when there is BIOS32, but not PCI BIOS
+ *     (mj@atrey.karlin.mff.cuni.cz)
+ *
+ * May 7,  1997 : Added some missing cli()'s. [mj]
+ * 
+ * Jun 20, 1997 : Corrected problems in "conf1" type accesses.
+ *      (paubert@iram.es)
+ *
+ * Aug 2,  1997 : Split to PCI BIOS handling and direct PCI access parts
+ *     and cleaned it up...     Martin Mares <mj@atrey.karlin.mff.cuni.cz>
+ *
+ * Feb 6,  1998 : No longer using BIOS to find devices and device classes. [mj]
+ *
+ * May 1,  1998 : Support for peer host bridges. [mj]
+ *
+ * Jun 19, 1998 : Changed to use spinlocks, so that PCI configuration space
+ *     can be accessed from interrupts even on SMP systems. [mj]
+ *
+ * August  1998 : Better support for peer host bridges and more paranoid
+ *     checks for direct hardware access. Ugh, this file starts to look as
+ *     a large gallery of common hardware bug workarounds (watch the comments)
+ *     -- the PCI specs themselves are sane, but most implementors should be
+ *     hit hard with \hammer scaled \magstep5. [mj]
+ *
+ * Jan 23, 1999 : More improvements to peer host bridge logic. i450NX fixup. [mj]
+ *
+ * Feb 8,  1999 : Added UM8886BF I/O address fixup. [mj]
+ *
+ * August  1999 : New resource management and configuration access stuff. [mj]
+ *
+ * Sep 19, 1999 : Use PCI IRQ routing tables for detection of peer host bridges.
+ *               Based on ideas by Chris Frantz and David Hinds. [mj]
+ *
+ * Sep 28, 1999 : Handle unreported/unassigned IRQs. Thanks to Shuu Yamaguchi
+ *               for a lot of patience during testing. [mj]
+ *
+ * Oct  8, 1999 : Split to pci-i386.c, pci-pc.c and pci-visws.c. [mj]
+ */
+
+#include <linux/types.h>
+//#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/init.h>
+#include <linux/ioport.h>
+#include <linux/errno.h>
+
+#include "pci-i386.h"
+
+void
+pcibios_update_resource(struct pci_dev *dev, struct resource *root,
+                       struct resource *res, int resource)
+{
+       u32 new, check;
+       int reg;
+
+       new = res->start | (res->flags & PCI_REGION_FLAG_MASK);
+       if (resource < 6) {
+               reg = PCI_BASE_ADDRESS_0 + 4*resource;
+       } else if (resource == PCI_ROM_RESOURCE) {
+               res->flags |= PCI_ROM_ADDRESS_ENABLE;
+               new |= PCI_ROM_ADDRESS_ENABLE;
+               reg = dev->rom_base_reg;
+       } else {
+               /* Somebody might have asked allocation of a non-standard resource */
+               return;
+       }
+       
+       pci_write_config_dword(dev, reg, new);
+       pci_read_config_dword(dev, reg, &check);
+       if ((new ^ check) & ((new & PCI_BASE_ADDRESS_SPACE_IO) ? PCI_BASE_ADDRESS_IO_MASK : PCI_BASE_ADDRESS_MEM_MASK)) {
+               printk(KERN_ERR "PCI: Error while updating region "
+                      "%s/%d (%08x != %08x)\n", dev->slot_name, resource,
+                      new, check);
+       }
+}
+
+/*
+ * We need to avoid collisions with `mirrored' VGA ports
+ * and other strange ISA hardware, so we always want the
+ * addresses to be allocated in the 0x000-0x0ff region
+ * modulo 0x400.
+ *
+ * Why? Because some silly external IO cards only decode
+ * the low 10 bits of the IO address. The 0x00-0xff region
+ * is reserved for motherboard devices that decode all 16
+ * bits, so it's ok to allocate at, say, 0x2800-0x28ff,
+ * but we want to try to avoid allocating at 0x2900-0x2bff
+ * which might have be mirrored at 0x0100-0x03ff..
+ */
+void
+pcibios_align_resource(void *data, struct resource *res, unsigned long size)
+{
+       if (res->flags & IORESOURCE_IO) {
+               unsigned long start = res->start;
+
+               if (start & 0x300) {
+                       start = (start + 0x3ff) & ~0x3ff;
+                       res->start = start;
+               }
+       }
+}
+
+
+/*
+ *  Handle resources of PCI devices.  If the world were perfect, we could
+ *  just allocate all the resource regions and do nothing more.  It isn't.
+ *  On the other hand, we cannot just re-allocate all devices, as it would
+ *  require us to know lots of host bridge internals.  So we attempt to
+ *  keep as much of the original configuration as possible, but tweak it
+ *  when it's found to be wrong.
+ *
+ *  Known BIOS problems we have to work around:
+ *     - I/O or memory regions not configured
+ *     - regions configured, but not enabled in the command register
+ *     - bogus I/O addresses above 64K used
+ *     - expansion ROMs left enabled (this may sound harmless, but given
+ *       the fact the PCI specs explicitly allow address decoders to be
+ *       shared between expansion ROMs and other resource regions, it's
+ *       at least dangerous)
+ *
+ *  Our solution:
+ *     (1) Allocate resources for all buses behind PCI-to-PCI bridges.
+ *         This gives us fixed barriers on where we can allocate.
+ *     (2) Allocate resources for all enabled devices.  If there is
+ *         a collision, just mark the resource as unallocated. Also
+ *         disable expansion ROMs during this step.
+ *     (3) Try to allocate resources for disabled devices.  If the
+ *         resources were assigned correctly, everything goes well,
+ *         if they weren't, they won't disturb allocation of other
+ *         resources.
+ *     (4) Assign new addresses to resources which were either
+ *         not configured at all or misconfigured.  If explicitly
+ *         requested by the user, configure expansion ROM address
+ *         as well.
+ */
+
+static void __init pcibios_allocate_bus_resources(struct list_head *bus_list)
+{
+       struct list_head *ln;
+       struct pci_bus *bus;
+       struct pci_dev *dev;
+       int idx;
+       struct resource *r, *pr;
+
+       /* Depth-First Search on bus tree */
+       for (ln=bus_list->next; ln != bus_list; ln=ln->next) {
+               bus = pci_bus_b(ln);
+               if ((dev = bus->self)) {
+                       for (idx = PCI_BRIDGE_RESOURCES; idx < PCI_NUM_RESOURCES; idx++) {
+                               r = &dev->resource[idx];
+                               if (!r->start)
+                                       continue;
+                               pr = pci_find_parent_resource(dev, r);
+                               if (!pr || request_resource(pr, r) < 0)
+                                       printk(KERN_ERR "PCI: Cannot allocate resource region %d of bridge %s\n", idx, dev->slot_name);
+                       }
+               }
+               pcibios_allocate_bus_resources(&bus->children);
+       }
+}
+
+static void __init pcibios_allocate_resources(int pass)
+{
+       struct pci_dev *dev;
+       int idx, disabled;
+       u16 command;
+       struct resource *r, *pr;
+
+       pci_for_each_dev(dev) {
+               pci_read_config_word(dev, PCI_COMMAND, &command);
+               for(idx = 0; idx < 6; idx++) {
+                       r = &dev->resource[idx];
+                       if (r->parent)          /* Already allocated */
+                               continue;
+                       if (!r->start)          /* Address not assigned at all */
+                               continue;
+                       if (r->flags & IORESOURCE_IO)
+                               disabled = !(command & PCI_COMMAND_IO);
+                       else
+                               disabled = !(command & PCI_COMMAND_MEMORY);
+                       if (pass == disabled) {
+                               DBG("PCI: Resource %08lx-%08lx (f=%lx, d=%d, p=%d)\n",
+                                   r->start, r->end, r->flags, disabled, pass);
+                               pr = pci_find_parent_resource(dev, r);
+                               if (!pr || request_resource(pr, r) < 0) {
+                                       printk(KERN_ERR "PCI: Cannot allocate resource region %d of device %s\n", idx, dev->slot_name);
+                                       /* We'll assign a new address later */
+                                       r->end -= r->start;
+                                       r->start = 0;
+                               }
+                       }
+               }
+               if (!pass) {
+                       r = &dev->resource[PCI_ROM_RESOURCE];
+                       if (r->flags & PCI_ROM_ADDRESS_ENABLE) {
+                               /* Turn the ROM off, leave the resource region, but keep it unregistered. */
+                               u32 reg;
+                               DBG("PCI: Switching off ROM of %s\n", dev->slot_name);
+                               r->flags &= ~PCI_ROM_ADDRESS_ENABLE;
+                               pci_read_config_dword(dev, dev->rom_base_reg, &reg);
+                               pci_write_config_dword(dev, dev->rom_base_reg, reg & ~PCI_ROM_ADDRESS_ENABLE);
+                       }
+               }
+       }
+}
+
+static void __init pcibios_assign_resources(void)
+{
+       struct pci_dev *dev;
+       int idx;
+       struct resource *r;
+
+       pci_for_each_dev(dev) {
+               int class = dev->class >> 8;
+
+               /* Don't touch classless devices and host bridges */
+               if (!class || class == PCI_CLASS_BRIDGE_HOST)
+                       continue;
+
+               for(idx=0; idx<6; idx++) {
+                       r = &dev->resource[idx];
+
+                       /*
+                        *  Don't touch IDE controllers and I/O ports of video cards!
+                        */
+                       if ((class == PCI_CLASS_STORAGE_IDE && idx < 4) ||
+                           (class == PCI_CLASS_DISPLAY_VGA && (r->flags & IORESOURCE_IO)))
+                               continue;
+
+                       /*
+                        *  We shall assign a new address to this resource, either because
+                        *  the BIOS forgot to do so or because we have decided the old
+                        *  address was unusable for some reason.
+                        */
+                       if (!r->start && r->end)
+                               pci_assign_resource(dev, idx);
+               }
+
+               if (pci_probe & PCI_ASSIGN_ROMS) {
+                       r = &dev->resource[PCI_ROM_RESOURCE];
+                       r->end -= r->start;
+                       r->start = 0;
+                       if (r->end)
+                               pci_assign_resource(dev, PCI_ROM_RESOURCE);
+               }
+       }
+}
+
+void __init pcibios_resource_survey(void)
+{
+       DBG("PCI: Allocating resources\n");
+       pcibios_allocate_bus_resources(&pci_root_buses);
+       pcibios_allocate_resources(0);
+       pcibios_allocate_resources(1);
+       pcibios_assign_resources();
+}
+
+int pcibios_enable_resources(struct pci_dev *dev)
+{
+       u16 cmd, old_cmd;
+       int idx;
+       struct resource *r;
+
+       pci_read_config_word(dev, PCI_COMMAND, &cmd);
+       old_cmd = cmd;
+       for(idx=0; idx<6; idx++) {
+               r = &dev->resource[idx];
+               if (!r->start && r->end) {
+                       printk(KERN_ERR "PCI: Device %s not available because of resource collisions\n", dev->slot_name);
+                       return -EINVAL;
+               }
+               if (r->flags & IORESOURCE_IO)
+                       cmd |= PCI_COMMAND_IO;
+               if (r->flags & IORESOURCE_MEM)
+                       cmd |= PCI_COMMAND_MEMORY;
+       }
+       if (dev->resource[PCI_ROM_RESOURCE].start)
+               cmd |= PCI_COMMAND_MEMORY;
+       if (cmd != old_cmd) {
+               printk("PCI: Enabling device %s (%04x -> %04x)\n", dev->slot_name, old_cmd, cmd);
+               pci_write_config_word(dev, PCI_COMMAND, cmd);
+       }
+
+       return 0;
+}
+
+/*
+ *  If we set up a device for bus mastering, we need to check the latency
+ *  timer as certain crappy BIOSes forget to set it properly.
+ */
+unsigned int pcibios_max_latency = 255;
+
+void pcibios_set_master(struct pci_dev *dev)
+{
+       u8 lat;
+       pci_read_config_byte(dev, PCI_LATENCY_TIMER, &lat);
+       if (lat < 16)
+               lat = (64 <= pcibios_max_latency) ? 64 : pcibios_max_latency;
+       else if (lat > pcibios_max_latency)
+               lat = pcibios_max_latency;
+       else
+               return;
+       printk("PCI: Setting latency timer of device %s to %d\n", dev->slot_name, lat);
+       pci_write_config_byte(dev, PCI_LATENCY_TIMER, lat);
+}
+
diff --git a/xen-2.4.16/arch/i386/pci-i386.h b/xen-2.4.16/arch/i386/pci-i386.h
new file mode 100644 (file)
index 0000000..2c821af
--- /dev/null
@@ -0,0 +1,69 @@
+/*
+ *     Low-Level PCI Access for i386 machines.
+ *
+ *     (c) 1999 Martin Mares <mj@ucw.cz>
+ */
+
+#undef DEBUG
+
+#ifdef DEBUG
+#define DBG(x...) printk(x)
+#else
+#define DBG(x...)
+#endif
+
+#define PCI_PROBE_BIOS         0x0001
+#define PCI_PROBE_CONF1                0x0002
+#define PCI_PROBE_CONF2                0x0004
+#define PCI_NO_SORT            0x0100
+#define PCI_BIOS_SORT          0x0200
+#define PCI_NO_CHECKS          0x0400
+#define PCI_ASSIGN_ROMS                0x1000
+#define PCI_BIOS_IRQ_SCAN      0x2000
+#define PCI_ASSIGN_ALL_BUSSES  0x4000
+
+extern unsigned int pci_probe;
+
+/* pci-i386.c */
+
+extern unsigned int pcibios_max_latency;
+
+void pcibios_resource_survey(void);
+int pcibios_enable_resources(struct pci_dev *);
+
+/* pci-pc.c */
+
+extern int pcibios_last_bus;
+extern struct pci_bus *pci_root_bus;
+extern struct pci_ops *pci_root_ops;
+
+/* pci-irq.c */
+
+struct irq_info {
+       u8 bus, devfn;                  /* Bus, device and function */
+       struct {
+               u8 link;                /* IRQ line ID, chipset dependent, 0=not routed */
+               u16 bitmap;             /* Available IRQs */
+       } __attribute__((packed)) irq[4];
+       u8 slot;                        /* Slot number, 0=onboard */
+       u8 rfu;
+} __attribute__((packed));
+
+struct irq_routing_table {
+       u32 signature;                  /* PIRQ_SIGNATURE should be here */
+       u16 version;                    /* PIRQ_VERSION */
+       u16 size;                       /* Table size in bytes */
+       u8 rtr_bus, rtr_devfn;          /* Where the interrupt router lies */
+       u16 exclusive_irqs;             /* IRQs devoted exclusively to PCI usage */
+       u16 rtr_vendor, rtr_device;     /* Vendor and device ID of interrupt router */
+       u32 miniport_data;              /* Crap */
+       u8 rfu[11];
+       u8 checksum;                    /* Modulo 256 checksum must give zero */
+       struct irq_info slots[0];
+} __attribute__((packed));
+
+extern unsigned int pcibios_irq_mask;
+
+void pcibios_irq_init(void);
+void pcibios_fixup_irqs(void);
+void pcibios_enable_irq(struct pci_dev *dev);
diff --git a/xen-2.4.16/arch/i386/pci-irq.c b/xen-2.4.16/arch/i386/pci-irq.c
new file mode 100644 (file)
index 0000000..f08f379
--- /dev/null
@@ -0,0 +1,753 @@
+/*
+ *     Low-Level PCI Support for PC -- Routing of Interrupts
+ *
+ *     (c) 1999--2000 Martin Mares <mj@ucw.cz>
+ */
+
+#include <linux/config.h>
+#include <linux/types.h>
+//#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/sched.h>
+
+#include <asm/io.h>
+#include <asm/smp.h>
+#include <asm/io_apic.h>
+
+#include "pci-i386.h"
+
+#define PIRQ_SIGNATURE (('$' << 0) + ('P' << 8) + ('I' << 16) + ('R' << 24))
+#define PIRQ_VERSION 0x0100
+
+static struct irq_routing_table *pirq_table;
+
+/*
+ * Never use: 0, 1, 2 (timer, keyboard, and cascade)
+ * Avoid using: 13, 14 and 15 (FP error and IDE).
+ * Penalize: 3, 4, 6, 7, 12 (known ISA uses: serial, floppy, parallel and mouse)
+ */
+unsigned int pcibios_irq_mask = 0xfff8;
+
+static int pirq_penalty[16] = {
+       1000000, 1000000, 1000000, 1000, 1000, 0, 1000, 1000,
+       0, 0, 0, 0, 1000, 100000, 100000, 100000
+};
+
+struct irq_router {
+       char *name;
+       u16 vendor, device;
+       int (*get)(struct pci_dev *router, struct pci_dev *dev, int pirq);
+       int (*set)(struct pci_dev *router, struct pci_dev *dev, int pirq, int new);
+};
+
+/*
+ *  Search 0xf0000 -- 0xfffff for the PCI IRQ Routing Table.
+ */
+
+static struct irq_routing_table * __init pirq_find_routing_table(void)
+{
+       u8 *addr;
+       struct irq_routing_table *rt;
+       int i;
+       u8 sum;
+
+       for(addr = (u8 *) __va(0xf0000); addr < (u8 *) __va(0x100000); addr += 16) {
+               rt = (struct irq_routing_table *) addr;
+               if (rt->signature != PIRQ_SIGNATURE ||
+                   rt->version != PIRQ_VERSION ||
+                   rt->size % 16 ||
+                   rt->size < sizeof(struct irq_routing_table))
+                       continue;
+               sum = 0;
+               for(i=0; i<rt->size; i++)
+                       sum += addr[i];
+               if (!sum) {
+                       DBG("PCI: Interrupt Routing Table found at 0x%p\n", rt);
+                       return rt;
+               }
+       }
+       return NULL;
+}
+
+/*
+ *  If we have a IRQ routing table, use it to search for peer host
+ *  bridges.  It's a gross hack, but since there are no other known
+ *  ways how to get a list of buses, we have to go this way.
+ */
+
+static void __init pirq_peer_trick(void)
+{
+       struct irq_routing_table *rt = pirq_table;
+       u8 busmap[256];
+       int i;
+       struct irq_info *e;
+
+       memset(busmap, 0, sizeof(busmap));
+       for(i=0; i < (rt->size - sizeof(struct irq_routing_table)) / sizeof(struct irq_info); i++) {
+               e = &rt->slots[i];
+#ifdef DEBUG
+               {
+                       int j;
+                       DBG("%02x:%02x slot=%02x", e->bus, e->devfn/8, e->slot);
+                       for(j=0; j<4; j++)
+                               DBG(" %d:%02x/%04x", j, e->irq[j].link, e->irq[j].bitmap);
+                       DBG("\n");
+               }
+#endif
+               busmap[e->bus] = 1;
+       }
+       for(i=1; i<256; i++)
+               /*
+                *  It might be a secondary bus, but in this case its parent is already
+                *  known (ascending bus order) and therefore pci_scan_bus returns immediately.
+                */
+               if (busmap[i] && pci_scan_bus(i, pci_root_bus->ops, NULL))
+                       printk(KERN_INFO "PCI: Discovered primary peer bus %02x [IRQ]\n", i);
+       pcibios_last_bus = -1;
+}
+
+/*
+ *  Code for querying and setting of IRQ routes on various interrupt routers.
+ */
+
+static void eisa_set_level_irq(unsigned int irq)
+{
+       unsigned char mask = 1 << (irq & 7);
+       unsigned int port = 0x4d0 + (irq >> 3);
+       unsigned char val = inb(port);
+
+       if (!(val & mask)) {
+               DBG(" -> edge");
+               outb(val | mask, port);
+       }
+}
+
+/*
+ * Common IRQ routing practice: nybbles in config space,
+ * offset by some magic constant.
+ */
+static unsigned int read_config_nybble(struct pci_dev *router, unsigned offset, unsigned nr)
+{
+       u8 x;
+       unsigned reg = offset + (nr >> 1);
+
+       pci_read_config_byte(router, reg, &x);
+       return (nr & 1) ? (x >> 4) : (x & 0xf);
+}
+
+static void write_config_nybble(struct pci_dev *router, unsigned offset, unsigned nr, unsigned int val)
+{
+       u8 x;
+       unsigned reg = offset + (nr >> 1);
+
+       pci_read_config_byte(router, reg, &x);
+       x = (nr & 1) ? ((x & 0x0f) | (val << 4)) : ((x & 0xf0) | val);
+       pci_write_config_byte(router, reg, x);
+}
+
+/*
+ * ALI pirq entries are damn ugly, and completely undocumented.
+ * This has been figured out from pirq tables, and it's not a pretty
+ * picture.
+ */
+static int pirq_ali_get(struct pci_dev *router, struct pci_dev *dev, int pirq)
+{
+       static unsigned char irqmap[16] = { 0, 9, 3, 10, 4, 5, 7, 6, 1, 11, 0, 12, 0, 14, 0, 15 };
+
+       return irqmap[read_config_nybble(router, 0x48, pirq-1)];
+}
+
+static int pirq_ali_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
+{
+       static unsigned char irqmap[16] = { 0, 8, 0, 2, 4, 5, 7, 6, 0, 1, 3, 9, 11, 0, 13, 15 };
+       unsigned int val = irqmap[irq];
+               
+       if (val) {
+               write_config_nybble(router, 0x48, pirq-1, val);
+               return 1;
+       }
+       return 0;
+}
+
+/*
+ * The Intel PIIX4 pirq rules are fairly simple: "pirq" is
+ * just a pointer to the config space.
+ */
+static int pirq_piix_get(struct pci_dev *router, struct pci_dev *dev, int pirq)
+{
+       u8 x;
+
+       pci_read_config_byte(router, pirq, &x);
+       return (x < 16) ? x : 0;
+}
+
+static int pirq_piix_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
+{
+       pci_write_config_byte(router, pirq, irq);
+       return 1;
+}
+
+/*
+ * The VIA pirq rules are nibble-based, like ALI,
+ * but without the ugly irq number munging.
+ */
+static int pirq_via_get(struct pci_dev *router, struct pci_dev *dev, int pirq)
+{
+       return read_config_nybble(router, 0x55, pirq);
+}
+
+static int pirq_via_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
+{
+       write_config_nybble(router, 0x55, pirq, irq);
+       return 1;
+}
+
+/*
+ * OPTI: high four bits are nibble pointer..
+ * I wonder what the low bits do?
+ */
+static int pirq_opti_get(struct pci_dev *router, struct pci_dev *dev, int pirq)
+{
+       return read_config_nybble(router, 0xb8, pirq >> 4);
+}
+
+static int pirq_opti_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
+{
+       write_config_nybble(router, 0xb8, pirq >> 4, irq);
+       return 1;
+}
+
+/*
+ * Cyrix: nibble offset 0x5C
+ */
+static int pirq_cyrix_get(struct pci_dev *router, struct pci_dev *dev, int pirq)
+{
+       return read_config_nybble(router, 0x5C, pirq-1);
+}
+
+static int pirq_cyrix_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
+{
+       write_config_nybble(router, 0x5C, pirq-1, irq);
+       return 1;
+}
+
+/*
+ *     PIRQ routing for SiS 85C503 router used in several SiS chipsets
+ *     According to the SiS 5595 datasheet (preliminary V1.0, 12/24/1997)
+ *     the related registers work as follows:
+ *     
+ *     general: one byte per re-routable IRQ,
+ *              bit 7      IRQ mapping enabled (0) or disabled (1)
+ *              bits [6:4] reserved
+ *              bits [3:0] IRQ to map to
+ *                  allowed: 3-7, 9-12, 14-15
+ *                  reserved: 0, 1, 2, 8, 13
+ *
+ *     individual registers in device config space:
+ *
+ *     0x41/0x42/0x43/0x44:    PCI INT A/B/C/D - bits as in general case
+ *
+ *     0x61:                   IDEIRQ: bits as in general case - but:
+ *                             bits [6:5] must be written 01
+ *                             bit 4 channel-select primary (0), secondary (1)
+ *
+ *     0x62:                   USBIRQ: bits as in general case - but:
+ *                             bit 4 OHCI function disabled (0), enabled (1)
+ *     
+ *     0x6a:                   ACPI/SCI IRQ - bits as in general case
+ *
+ *     0x7e:                   Data Acq. Module IRQ - bits as in general case
+ *
+ *     Apparently there are systems implementing PCI routing table using both
+ *     link values 0x01-0x04 and 0x41-0x44 for PCI INTA..D, but register offsets
+ *     like 0x62 as link values for USBIRQ e.g. So there is no simple
+ *     "register = offset + pirq" relation.
+ *     Currently we support PCI INTA..D and USBIRQ and try our best to handle
+ *     both link mappings.
+ *     IDE/ACPI/DAQ mapping is currently unsupported (left untouched as set by BIOS).
+ */
+
+static int pirq_sis_get(struct pci_dev *router, struct pci_dev *dev, int pirq)
+{
+       u8 x;
+       int reg = pirq;
+
+       switch(pirq) {
+               case 0x01:
+               case 0x02:
+               case 0x03:
+               case 0x04:
+                       reg += 0x40;
+               case 0x41:
+               case 0x42:
+               case 0x43:
+               case 0x44:
+               case 0x62:
+                       pci_read_config_byte(router, reg, &x);
+                       if (reg != 0x62)
+                               break;
+                       if (!(x & 0x40))
+                               return 0;
+                       break;
+               case 0x61:
+               case 0x6a:
+               case 0x7e:
+                       printk(KERN_INFO "SiS pirq: advanced IDE/ACPI/DAQ mapping not yet implemented\n");
+                       return 0;
+               default:                        
+                       printk(KERN_INFO "SiS router pirq escape (%d)\n", pirq);
+                       return 0;
+       }
+       return (x & 0x80) ? 0 : (x & 0x0f);
+}
+
+static int pirq_sis_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
+{
+       u8 x;
+       int reg = pirq;
+
+       switch(pirq) {
+               case 0x01:
+               case 0x02:
+               case 0x03:
+               case 0x04:
+                       reg += 0x40;
+               case 0x41:
+               case 0x42:
+               case 0x43:
+               case 0x44:
+               case 0x62:
+                       x = (irq&0x0f) ? (irq&0x0f) : 0x80;
+                       if (reg != 0x62)
+                               break;
+                       /* always mark OHCI enabled, as nothing else knows about this */
+                       x |= 0x40;
+                       break;
+               case 0x61:
+               case 0x6a:
+               case 0x7e:
+                       printk(KERN_INFO "advanced SiS pirq mapping not yet implemented\n");
+                       return 0;
+               default:                        
+                       printk(KERN_INFO "SiS router pirq escape (%d)\n", pirq);
+                       return 0;
+       }
+       pci_write_config_byte(router, reg, x);
+
+       return 1;
+}
+
+/*
+ * VLSI: nibble offset 0x74 - educated guess due to routing table and
+ *       config space of VLSI 82C534 PCI-bridge/router (1004:0102)
+ *       Tested on HP OmniBook 800 covering PIRQ 1, 2, 4, 8 for onboard
+ *       devices, PIRQ 3 for non-pci(!) soundchip and (untested) PIRQ 6
+ *       for the busbridge to the docking station.
+ */
+
+static int pirq_vlsi_get(struct pci_dev *router, struct pci_dev *dev, int pirq)
+{
+       if (pirq > 8) {
+               printk(KERN_INFO "VLSI router pirq escape (%d)\n", pirq);
+               return 0;
+       }
+       return read_config_nybble(router, 0x74, pirq-1);
+}
+
+static int pirq_vlsi_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
+{
+       if (pirq > 8) {
+               printk(KERN_INFO "VLSI router pirq escape (%d)\n", pirq);
+               return 0;
+       }
+       write_config_nybble(router, 0x74, pirq-1, irq);
+       return 1;
+}
+
+/*
+ * ServerWorks: PCI interrupts mapped to system IRQ lines through Index
+ * and Redirect I/O registers (0x0c00 and 0x0c01).  The Index register
+ * format is (PCIIRQ## | 0x10), e.g.: PCIIRQ10=0x1a.  The Redirect
+ * register is a straight binary coding of desired PIC IRQ (low nibble).
+ *
+ * The 'link' value in the PIRQ table is already in the correct format
+ * for the Index register.  There are some special index values:
+ * 0x00 for ACPI (SCI), 0x01 for USB, 0x02 for IDE0, 0x04 for IDE1,
+ * and 0x03 for SMBus.
+ */
+static int pirq_serverworks_get(struct pci_dev *router, struct pci_dev *dev, int pirq)
+{
+       outb_p(pirq, 0xc00);
+       return inb(0xc01) & 0xf;
+}
+
+static int pirq_serverworks_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
+{
+       outb_p(pirq, 0xc00);
+       outb_p(irq, 0xc01);
+       return 1;
+}
+
+/* Support for AMD756 PCI IRQ Routing
+ * Jhon H. Caicedo <jhcaiced@osso.org.co>
+ * Jun/21/2001 0.2.0 Release, fixed to use "nybble" functions... (jhcaiced)
+ * Jun/19/2001 Alpha Release 0.1.0 (jhcaiced)
+ * The AMD756 pirq rules are nibble-based
+ * offset 0x56 0-3 PIRQA  4-7  PIRQB
+ * offset 0x57 0-3 PIRQC  4-7  PIRQD
+ */
+static int pirq_amd756_get(struct pci_dev *router, struct pci_dev *dev, int pirq)
+{
+       u8 irq;
+       irq = 0;
+       if (pirq <= 4)
+       {
+               irq = read_config_nybble(router, 0x56, pirq - 1);
+       }
+       printk(KERN_INFO "AMD756: dev %04x:%04x, router pirq : %d get irq : %2d\n",
+               dev->vendor, dev->device, pirq, irq);
+       return irq;
+}
+
+static int pirq_amd756_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
+{
+       printk(KERN_INFO "AMD756: dev %04x:%04x, router pirq : %d SET irq : %2d\n", 
+               dev->vendor, dev->device, pirq, irq);
+       if (pirq <= 4)
+       {
+               write_config_nybble(router, 0x56, pirq - 1, irq);
+       }
+       return 1;
+}
+
+#ifdef CONFIG_PCI_BIOS
+
+static int pirq_bios_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
+{
+       struct pci_dev *bridge;
+       int pin = pci_get_interrupt_pin(dev, &bridge);
+       return pcibios_set_irq_routing(bridge, pin, irq);
+}
+
+static struct irq_router pirq_bios_router =
+       { "BIOS", 0, 0, NULL, pirq_bios_set };
+
+#endif
+
+static struct irq_router pirq_routers[] = {
+       { "PIIX", PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82371FB_0, pirq_piix_get, pirq_piix_set },
+       { "PIIX", PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82371SB_0, pirq_piix_get, pirq_piix_set },
+       { "PIIX", PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82371AB_0, pirq_piix_get, pirq_piix_set },
+       { "PIIX", PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82371MX,   pirq_piix_get, pirq_piix_set },
+       { "PIIX", PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82443MX_0, pirq_piix_get, pirq_piix_set },
+       { "PIIX", PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801BA_0, pirq_piix_get, pirq_piix_set },
+
+       { "ALI", PCI_VENDOR_ID_AL, PCI_DEVICE_ID_AL_M1533, pirq_ali_get, pirq_ali_set },
+
+       { "VIA", PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C586_0, pirq_via_get, pirq_via_set },
+       { "VIA", PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C596, pirq_via_get, pirq_via_set },
+       { "VIA", PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C686, pirq_via_get, pirq_via_set },
+
+       { "OPTI", PCI_VENDOR_ID_OPTI, PCI_DEVICE_ID_OPTI_82C700, pirq_opti_get, pirq_opti_set },
+
+       { "NatSemi", PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5520, pirq_cyrix_get, pirq_cyrix_set },
+       { "SIS", PCI_VENDOR_ID_SI, PCI_DEVICE_ID_SI_503, pirq_sis_get, pirq_sis_set },
+       { "VLSI 82C534", PCI_VENDOR_ID_VLSI, PCI_DEVICE_ID_VLSI_82C534, pirq_vlsi_get, pirq_vlsi_set },
+       { "ServerWorks", PCI_VENDOR_ID_SERVERWORKS, PCI_DEVICE_ID_SERVERWORKS_OSB4,
+         pirq_serverworks_get, pirq_serverworks_set },
+       { "ServerWorks", PCI_VENDOR_ID_SERVERWORKS, PCI_DEVICE_ID_SERVERWORKS_CSB5,
+         pirq_serverworks_get, pirq_serverworks_set },
+       { "AMD756 VIPER", PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_VIPER_740B,
+               pirq_amd756_get, pirq_amd756_set },
+
+       { "default", 0, 0, NULL, NULL }
+};
+
+static struct irq_router *pirq_router;
+static struct pci_dev *pirq_router_dev;
+
+static void __init pirq_find_router(void)
+{
+       struct irq_routing_table *rt = pirq_table;
+       struct irq_router *r;
+
+#ifdef CONFIG_PCI_BIOS
+       if (!rt->signature) {
+               printk(KERN_INFO "PCI: Using BIOS for IRQ routing\n");
+               pirq_router = &pirq_bios_router;
+               return;
+       }
+#endif
+
+       DBG("PCI: Attempting to find IRQ router for %04x:%04x\n",
+           rt->rtr_vendor, rt->rtr_device);
+
+       /* fall back to default router if nothing else found */
+       pirq_router = &pirq_routers[ARRAY_SIZE(pirq_routers) - 1];
+
+       pirq_router_dev = pci_find_slot(rt->rtr_bus, rt->rtr_devfn);
+       if (!pirq_router_dev) {
+               DBG("PCI: Interrupt router not found at %02x:%02x\n", rt->rtr_bus, rt->rtr_devfn);
+               return;
+       }
+
+       for(r=pirq_routers; r->vendor; r++) {
+               /* Exact match against router table entry? Use it! */
+               if (r->vendor == rt->rtr_vendor && r->device == rt->rtr_device) {
+                       pirq_router = r;
+                       break;
+               }
+               /* Match against router device entry? Use it as a fallback */
+               if (r->vendor == pirq_router_dev->vendor && r->device == pirq_router_dev->device) {
+                       pirq_router = r;
+               }
+       }
+       printk(KERN_INFO "PCI: Using IRQ router %s [%04x/%04x] at %s\n",
+               pirq_router->name,
+               pirq_router_dev->vendor,
+               pirq_router_dev->device,
+               pirq_router_dev->slot_name);
+}
+
+static struct irq_info *pirq_get_info(struct pci_dev *dev)
+{
+       struct irq_routing_table *rt = pirq_table;
+       int entries = (rt->size - sizeof(struct irq_routing_table)) / sizeof(struct irq_info);
+       struct irq_info *info;
+
+       for (info = rt->slots; entries--; info++)
+               if (info->bus == dev->bus->number && PCI_SLOT(info->devfn) == PCI_SLOT(dev->devfn))
+                       return info;
+       return NULL;
+}
+
+static void pcibios_test_irq_handler(int irq, void *dev_id, struct pt_regs *regs)
+{
+}
+
+static int pcibios_lookup_irq(struct pci_dev *dev, int assign)
+{
+       u8 pin;
+       struct irq_info *info;
+       int i, pirq, newirq;
+       int irq = 0;
+       u32 mask;
+       struct irq_router *r = pirq_router;
+       struct pci_dev *dev2;
+       char *msg = NULL;
+
+       if (!pirq_table)
+               return 0;
+
+       /* Find IRQ routing entry */
+       pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin);
+       if (!pin) {
+               DBG(" -> no interrupt pin\n");
+               return 0;
+       }
+       pin = pin - 1;
+       
+       DBG("IRQ for %s:%d", dev->slot_name, pin);
+       info = pirq_get_info(dev);
+       if (!info) {
+               DBG(" -> not found in routing table\n");
+               return 0;
+       }
+       pirq = info->irq[pin].link;
+       mask = info->irq[pin].bitmap;
+       if (!pirq) {
+               DBG(" -> not routed\n");
+               return 0;
+       }
+       DBG(" -> PIRQ %02x, mask %04x, excl %04x", pirq, mask, pirq_table->exclusive_irqs);
+       mask &= pcibios_irq_mask;
+
+       /*
+        * Find the best IRQ to assign: use the one
+        * reported by the device if possible.
+        */
+       newirq = dev->irq;
+       if (!newirq && assign) {
+               for (i = 0; i < 16; i++) {
+                       if (!(mask & (1 << i)))
+                               continue;
+                       if (pirq_penalty[i] < pirq_penalty[newirq] &&
+                           !request_irq(i, pcibios_test_irq_handler, SA_SHIRQ, "pci-test", dev)) {
+                               free_irq(i, dev);
+                               newirq = i;
+                       }
+               }
+       }
+       DBG(" -> newirq=%d", newirq);
+
+       /* Check if it is hardcoded */
+       if ((pirq & 0xf0) == 0xf0) {
+               irq = pirq & 0xf;
+               DBG(" -> hardcoded IRQ %d\n", irq);
+               msg = "Hardcoded";
+       } else if (r->get && (irq = r->get(pirq_router_dev, dev, pirq))) {
+               DBG(" -> got IRQ %d\n", irq);
+               msg = "Found";
+       } else if (newirq && r->set && (dev->class >> 8) != PCI_CLASS_DISPLAY_VGA) {
+               DBG(" -> assigning IRQ %d", newirq);
+               if (r->set(pirq_router_dev, dev, pirq, newirq)) {
+                       eisa_set_level_irq(newirq);
+                       DBG(" ... OK\n");
+                       msg = "Assigned";
+                       irq = newirq;
+               }
+       }
+
+       if (!irq) {
+               DBG(" ... failed\n");
+               if (newirq && mask == (1 << newirq)) {
+                       msg = "Guessed";
+                       irq = newirq;
+               } else
+                       return 0;
+       }
+       printk(KERN_INFO "PCI: %s IRQ %d for device %s\n", msg, irq, dev->slot_name);
+
+       /* Update IRQ for all devices with the same pirq value */
+       pci_for_each_dev(dev2) {
+               pci_read_config_byte(dev2, PCI_INTERRUPT_PIN, &pin);
+               if (!pin)
+                       continue;
+               pin--;
+               info = pirq_get_info(dev2);
+               if (!info)
+                       continue;
+               if (info->irq[pin].link == pirq) {
+                       /* We refuse to override the dev->irq information. Give a warning! */
+                       if (dev2->irq && dev2->irq != irq) {
+                               printk(KERN_INFO "IRQ routing conflict for %s, have irq %d, want irq %d\n",
+                                      dev2->slot_name, dev2->irq, irq);
+                               continue;
+                       }
+                       dev2->irq = irq;
+                       pirq_penalty[irq]++;
+                       if (dev != dev2)
+                               printk(KERN_INFO "PCI: Sharing IRQ %d with %s\n", irq, dev2->slot_name);
+               }
+       }
+       return 1;
+}
+
+void __init pcibios_irq_init(void)
+{
+       DBG("PCI: IRQ init\n");
+       pirq_table = pirq_find_routing_table();
+#ifdef CONFIG_PCI_BIOS
+       if (!pirq_table && (pci_probe & PCI_BIOS_IRQ_SCAN))
+               pirq_table = pcibios_get_irq_routing_table();
+#endif
+       if (pirq_table) {
+               pirq_peer_trick();
+               pirq_find_router();
+               if (pirq_table->exclusive_irqs) {
+                       int i;
+                       for (i=0; i<16; i++)
+                               if (!(pirq_table->exclusive_irqs & (1 << i)))
+                                       pirq_penalty[i] += 100;
+               }
+               /* If we're using the I/O APIC, avoid using the PCI IRQ routing table */
+               if (io_apic_assign_pci_irqs)
+                       pirq_table = NULL;
+       }
+}
+
+void __init pcibios_fixup_irqs(void)
+{
+       struct pci_dev *dev;
+       u8 pin;
+
+       DBG("PCI: IRQ fixup\n");
+       pci_for_each_dev(dev) {
+               /*
+                * If the BIOS has set an out of range IRQ number, just ignore it.
+                * Also keep track of which IRQ's are already in use.
+                */
+               if (dev->irq >= 16) {
+                       DBG("%s: ignoring bogus IRQ %d\n", dev->slot_name, dev->irq);
+                       dev->irq = 0;
+               }
+               /* If the IRQ is already assigned to a PCI device, ignore its ISA use penalty */
+               if (pirq_penalty[dev->irq] >= 100 && pirq_penalty[dev->irq] < 100000)
+                       pirq_penalty[dev->irq] = 0;
+               pirq_penalty[dev->irq]++;
+       }
+
+       pci_for_each_dev(dev) {
+               pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin);
+#ifdef CONFIG_X86_IO_APIC
+               /*
+                * Recalculate IRQ numbers if we use the I/O APIC.
+                */
+               if (io_apic_assign_pci_irqs)
+               {
+                       int irq;
+
+                       if (pin) {
+                               pin--;          /* interrupt pins are numbered starting from 1 */
+                               irq = IO_APIC_get_PCI_irq_vector(dev->bus->number, PCI_SLOT(dev->devfn), pin);
+       /*
+        * Busses behind bridges are typically not listed in the MP-table.
+        * In this case we have to look up the IRQ based on the parent bus,
+        * parent slot, and pin number. The SMP code detects such bridged
+        * busses itself so we should get into this branch reliably.
+        */
+                               if (irq < 0 && dev->bus->parent) { /* go back to the bridge */
+                                       struct pci_dev * bridge = dev->bus->self;
+
+                                       pin = (pin + PCI_SLOT(dev->devfn)) % 4;
+                                       irq = IO_APIC_get_PCI_irq_vector(bridge->bus->number, 
+                                                       PCI_SLOT(bridge->devfn), pin);
+                                       if (irq >= 0)
+                                               printk(KERN_WARNING "PCI: using PPB(B%d,I%d,P%d) to get irq %d\n", 
+                                                       bridge->bus->number, PCI_SLOT(bridge->devfn), pin, irq);
+                               }
+                               if (irq >= 0) {
+                                       printk(KERN_INFO "PCI->APIC IRQ transform: (B%d,I%d,P%d) -> %d\n",
+                                               dev->bus->number, PCI_SLOT(dev->devfn), pin, irq);
+                                       dev->irq = irq;
+                               }
+                       }
+               }
+#endif
+               /*
+                * Still no IRQ? Try to lookup one...
+                */
+               if (pin && !dev->irq)
+                       pcibios_lookup_irq(dev, 0);
+       }
+}
+
+void pcibios_penalize_isa_irq(int irq)
+{
+       /*
+        *  If any ISAPnP device reports an IRQ in its list of possible
+        *  IRQ's, we try to avoid assigning it to PCI devices.
+        */
+       pirq_penalty[irq] += 100;
+}
+
+void pcibios_enable_irq(struct pci_dev *dev)
+{
+       u8 pin;
+       pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin);
+       if (pin && !pcibios_lookup_irq(dev, 1) && !dev->irq) {
+               char *msg;
+               if (io_apic_assign_pci_irqs)
+                       msg = " Probably buggy MP table.";
+               else if (pci_probe & PCI_BIOS_IRQ_SCAN)
+                       msg = "";
+               else
+                       msg = " Please try using pci=biosirq.";
+               printk(KERN_WARNING "PCI: No IRQ known for interrupt pin %c of device %s.%s\n",
+                      'A' + pin - 1, dev->slot_name, msg);
+       }
+}
diff --git a/xen-2.4.16/arch/i386/pci-pc.c b/xen-2.4.16/arch/i386/pci-pc.c
new file mode 100644 (file)
index 0000000..7b02d3f
--- /dev/null
@@ -0,0 +1,1276 @@
+/*
+ *     Low-Level PCI Support for PC
+ *
+ *     (c) 1999--2000 Martin Mares <mj@ucw.cz>
+ */
+
+#include <linux/config.h>
+#include <linux/types.h>
+//#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/pci.h>
+#include <linux/init.h>
+#include <linux/ioport.h>
+
+//#include <asm/segment.h>
+#include <asm/io.h>
+
+#include "pci-i386.h"
+
+#define __KERNEL_CS __HYPERVISOR_CS
+#define __KERNEL_DS __HYPERVISOR_DS
+
+unsigned int pci_probe = PCI_PROBE_BIOS | PCI_PROBE_CONF1 | PCI_PROBE_CONF2;
+
+int pcibios_last_bus = -1;
+struct pci_bus *pci_root_bus = NULL;
+struct pci_ops *pci_root_ops = NULL;
+
+int (*pci_config_read)(int seg, int bus, int dev, int fn, int reg, int len, u32 *value) = NULL;
+int (*pci_config_write)(int seg, int bus, int dev, int fn, int reg, int len, u32 value) = NULL;
+
+/*
+ * This interrupt-safe spinlock protects all accesses to PCI
+ * configuration space.
+ */
+spinlock_t pci_config_lock = SPIN_LOCK_UNLOCKED;
+
+
+/*
+ * Functions for accessing PCI configuration space with type 1 accesses
+ */
+
+#ifdef CONFIG_PCI_DIRECT
+
+#define PCI_CONF1_ADDRESS(bus, dev, fn, reg) \
+       (0x80000000 | (bus << 16) | (dev << 11) | (fn << 8) | (reg & ~3))
+
+static int pci_conf1_read (int seg, int bus, int dev, int fn, int reg, int len, u32 *value)
+{
+       unsigned long flags;
+
+       if (!value || (bus > 255) || (dev > 31) || (fn > 7) || (reg > 255))
+               return -EINVAL;
+
+       spin_lock_irqsave(&pci_config_lock, flags);
+
+       outl(PCI_CONF1_ADDRESS(bus, dev, fn, reg), 0xCF8);
+
+       switch (len) {
+       case 1:
+               *value = inb(0xCFC + (reg & 3));
+               break;
+       case 2:
+               *value = inw(0xCFC + (reg & 2));
+               break;
+       case 4:
+               *value = inl(0xCFC);
+               break;
+       }
+
+       spin_unlock_irqrestore(&pci_config_lock, flags);
+
+       return 0;
+}
+
+static int pci_conf1_write (int seg, int bus, int dev, int fn, int reg, int len, u32 value)
+{
+       unsigned long flags;
+
+       if ((bus > 255) || (dev > 31) || (fn > 7) || (reg > 255)) 
+               return -EINVAL;
+
+       spin_lock_irqsave(&pci_config_lock, flags);
+
+       outl(PCI_CONF1_ADDRESS(bus, dev, fn, reg), 0xCF8);
+
+       switch (len) {
+       case 1:
+               outb((u8)value, 0xCFC + (reg & 3));
+               break;
+       case 2:
+               outw((u16)value, 0xCFC + (reg & 2));
+               break;
+       case 4:
+               outl((u32)value, 0xCFC);
+               break;
+       }
+
+       spin_unlock_irqrestore(&pci_config_lock, flags);
+
+       return 0;
+}
+
+#undef PCI_CONF1_ADDRESS
+
+static int pci_conf1_read_config_byte(struct pci_dev *dev, int where, u8 *value)
+{
+       int result; 
+       u32 data;
+
+       if (!value) 
+               return -EINVAL;
+
+       result = pci_conf1_read(0, dev->bus->number, PCI_SLOT(dev->devfn), 
+               PCI_FUNC(dev->devfn), where, 1, &data);
+
+       *value = (u8)data;
+
+       return result;
+}
+
+static int pci_conf1_read_config_word(struct pci_dev *dev, int where, u16 *value)
+{
+       int result; 
+       u32 data;
+
+       if (!value) 
+               return -EINVAL;
+
+       result = pci_conf1_read(0, dev->bus->number, PCI_SLOT(dev->devfn), 
+               PCI_FUNC(dev->devfn), where, 2, &data);
+
+       *value = (u16)data;
+
+       return result;
+}
+
+static int pci_conf1_read_config_dword(struct pci_dev *dev, int where, u32 *value)
+{
+       if (!value) 
+               return -EINVAL;
+
+       return pci_conf1_read(0, dev->bus->number, PCI_SLOT(dev->devfn), 
+               PCI_FUNC(dev->devfn), where, 4, value);
+}
+
+static int pci_conf1_write_config_byte(struct pci_dev *dev, int where, u8 value)
+{
+       return pci_conf1_write(0, dev->bus->number, PCI_SLOT(dev->devfn), 
+               PCI_FUNC(dev->devfn), where, 1, value);
+}
+
+static int pci_conf1_write_config_word(struct pci_dev *dev, int where, u16 value)
+{
+       return pci_conf1_write(0, dev->bus->number, PCI_SLOT(dev->devfn), 
+               PCI_FUNC(dev->devfn), where, 2, value);
+}
+
+static int pci_conf1_write_config_dword(struct pci_dev *dev, int where, u32 value)
+{
+       return pci_conf1_write(0, dev->bus->number, PCI_SLOT(dev->devfn), 
+               PCI_FUNC(dev->devfn), where, 4, value);
+}
+
+static struct pci_ops pci_direct_conf1 = {
+       pci_conf1_read_config_byte,
+       pci_conf1_read_config_word,
+       pci_conf1_read_config_dword,
+       pci_conf1_write_config_byte,
+       pci_conf1_write_config_word,
+       pci_conf1_write_config_dword
+};
+
+
+/*
+ * Functions for accessing PCI configuration space with type 2 accesses
+ */
+
+#define PCI_CONF2_ADDRESS(dev, reg)    (u16)(0xC000 | (dev << 8) | reg)
+
+static int pci_conf2_read (int seg, int bus, int dev, int fn, int reg, int len, u32 *value)
+{
+       unsigned long flags;
+
+       if (!value || (bus > 255) || (dev > 31) || (fn > 7) || (reg > 255))
+               return -EINVAL;
+
+       if (dev & 0x10) 
+               return PCIBIOS_DEVICE_NOT_FOUND;
+
+       spin_lock_irqsave(&pci_config_lock, flags);
+
+       outb((u8)(0xF0 | (fn << 1)), 0xCF8);
+       outb((u8)bus, 0xCFA);
+
+       switch (len) {
+       case 1:
+               *value = inb(PCI_CONF2_ADDRESS(dev, reg));
+               break;
+       case 2:
+               *value = inw(PCI_CONF2_ADDRESS(dev, reg));
+               break;
+       case 4:
+               *value = inl(PCI_CONF2_ADDRESS(dev, reg));
+               break;
+       }
+
+       outb (0, 0xCF8);
+
+       spin_unlock_irqrestore(&pci_config_lock, flags);
+
+       return 0;
+}
+
+static int pci_conf2_write (int seg, int bus, int dev, int fn, int reg, int len, u32 value)
+{
+       unsigned long flags;
+
+       if ((bus > 255) || (dev > 31) || (fn > 7) || (reg > 255)) 
+               return -EINVAL;
+
+       if (dev & 0x10) 
+               return PCIBIOS_DEVICE_NOT_FOUND;
+
+       spin_lock_irqsave(&pci_config_lock, flags);
+
+       outb((u8)(0xF0 | (fn << 1)), 0xCF8);
+       outb((u8)bus, 0xCFA);
+
+       switch (len) {
+       case 1:
+               outb ((u8)value, PCI_CONF2_ADDRESS(dev, reg));
+               break;
+       case 2:
+               outw ((u16)value, PCI_CONF2_ADDRESS(dev, reg));
+               break;
+       case 4:
+               outl ((u32)value, PCI_CONF2_ADDRESS(dev, reg));
+               break;
+       }
+
+       outb (0, 0xCF8);    
+
+       spin_unlock_irqrestore(&pci_config_lock, flags);
+
+       return 0;
+}
+
+#undef PCI_CONF2_ADDRESS
+
+static int pci_conf2_read_config_byte(struct pci_dev *dev, int where, u8 *value)
+{
+       int result; 
+       u32 data;
+       result = pci_conf2_read(0, dev->bus->number, PCI_SLOT(dev->devfn), 
+               PCI_FUNC(dev->devfn), where, 1, &data);
+       *value = (u8)data;
+       return result;
+}
+
+static int pci_conf2_read_config_word(struct pci_dev *dev, int where, u16 *value)
+{
+       int result; 
+       u32 data;
+       result = pci_conf2_read(0, dev->bus->number, PCI_SLOT(dev->devfn), 
+               PCI_FUNC(dev->devfn), where, 2, &data);
+       *value = (u16)data;
+       return result;
+}
+
+static int pci_conf2_read_config_dword(struct pci_dev *dev, int where, u32 *value)
+{
+       return pci_conf2_read(0, dev->bus->number, PCI_SLOT(dev->devfn), 
+               PCI_FUNC(dev->devfn), where, 4, value);
+}
+
+static int pci_conf2_write_config_byte(struct pci_dev *dev, int where, u8 value)
+{
+       return pci_conf2_write(0, dev->bus->number, PCI_SLOT(dev->devfn), 
+               PCI_FUNC(dev->devfn), where, 1, value);
+}
+
+static int pci_conf2_write_config_word(struct pci_dev *dev, int where, u16 value)
+{
+       return pci_conf2_write(0, dev->bus->number, PCI_SLOT(dev->devfn), 
+               PCI_FUNC(dev->devfn), where, 2, value);
+}
+
+static int pci_conf2_write_config_dword(struct pci_dev *dev, int where, u32 value)
+{
+       return pci_conf2_write(0, dev->bus->number, PCI_SLOT(dev->devfn), 
+               PCI_FUNC(dev->devfn), where, 4, value);
+}
+
+static struct pci_ops pci_direct_conf2 = {
+       pci_conf2_read_config_byte,
+       pci_conf2_read_config_word,
+       pci_conf2_read_config_dword,
+       pci_conf2_write_config_byte,
+       pci_conf2_write_config_word,
+       pci_conf2_write_config_dword
+};
+
+
+/*
+ * Before we decide to use direct hardware access mechanisms, we try to do some
+ * trivial checks to ensure it at least _seems_ to be working -- we just test
+ * whether bus 00 contains a host bridge (this is similar to checking
+ * techniques used in XFree86, but ours should be more reliable since we
+ * attempt to make use of direct access hints provided by the PCI BIOS).
+ *
+ * This should be close to trivial, but it isn't, because there are buggy
+ * chipsets (yes, you guessed it, by Intel and Compaq) that have no class ID.
+ */
+static int __devinit pci_sanity_check(struct pci_ops *o)
+{
+       u16 x;
+       struct pci_bus bus;             /* Fake bus and device */
+       struct pci_dev dev;
+
+       if (pci_probe & PCI_NO_CHECKS)
+               return 1;
+       bus.number = 0;
+       dev.bus = &bus;
+       for(dev.devfn=0; dev.devfn < 0x100; dev.devfn++)
+               if ((!o->read_word(&dev, PCI_CLASS_DEVICE, &x) &&
+                    (x == PCI_CLASS_BRIDGE_HOST || x == PCI_CLASS_DISPLAY_VGA)) ||
+                   (!o->read_word(&dev, PCI_VENDOR_ID, &x) &&
+                    (x == PCI_VENDOR_ID_INTEL || x == PCI_VENDOR_ID_COMPAQ)))
+                       return 1;
+       DBG("PCI: Sanity check failed\n");
+       return 0;
+}
+
+static struct pci_ops * __devinit pci_check_direct(void)
+{
+       unsigned int tmp;
+       unsigned long flags;
+
+       __save_flags(flags); __cli();
+
+       /*
+        * Check if configuration type 1 works.
+        */
+       if (pci_probe & PCI_PROBE_CONF1) {
+               outb (0x01, 0xCFB);
+               tmp = inl (0xCF8);
+               outl (0x80000000, 0xCF8);
+               if (inl (0xCF8) == 0x80000000 &&
+                   pci_sanity_check(&pci_direct_conf1)) {
+                       outl (tmp, 0xCF8);
+                       __restore_flags(flags);
+                       printk("PCI: Using configuration type 1\n");
+                       request_region(0xCF8, 8, "PCI conf1");
+                       return &pci_direct_conf1;
+               }
+               outl (tmp, 0xCF8);
+       }
+
+       /*
+        * Check if configuration type 2 works.
+        */
+       if (pci_probe & PCI_PROBE_CONF2) {
+               outb (0x00, 0xCFB);
+               outb (0x00, 0xCF8);
+               outb (0x00, 0xCFA);
+               if (inb (0xCF8) == 0x00 && inb (0xCFA) == 0x00 &&
+                   pci_sanity_check(&pci_direct_conf2)) {
+                       __restore_flags(flags);
+                       printk("PCI: Using configuration type 2\n");
+                       request_region(0xCF8, 4, "PCI conf2");
+                       return &pci_direct_conf2;
+               }
+       }
+
+       __restore_flags(flags);
+       return NULL;
+}
+
+#endif
+
+/*
+ * BIOS32 and PCI BIOS handling.
+ */
+
+#ifdef CONFIG_PCI_BIOS
+
+#define PCIBIOS_PCI_FUNCTION_ID        0xb1XX
+#define PCIBIOS_PCI_BIOS_PRESENT       0xb101
+#define PCIBIOS_FIND_PCI_DEVICE                0xb102
+#define PCIBIOS_FIND_PCI_CLASS_CODE    0xb103
+#define PCIBIOS_GENERATE_SPECIAL_CYCLE 0xb106
+#define PCIBIOS_READ_CONFIG_BYTE       0xb108
+#define PCIBIOS_READ_CONFIG_WORD       0xb109
+#define PCIBIOS_READ_CONFIG_DWORD      0xb10a
+#define PCIBIOS_WRITE_CONFIG_BYTE      0xb10b
+#define PCIBIOS_WRITE_CONFIG_WORD      0xb10c
+#define PCIBIOS_WRITE_CONFIG_DWORD     0xb10d
+#define PCIBIOS_GET_ROUTING_OPTIONS    0xb10e
+#define PCIBIOS_SET_PCI_HW_INT         0xb10f
+
+/* BIOS32 signature: "_32_" */
+#define BIOS32_SIGNATURE       (('_' << 0) + ('3' << 8) + ('2' << 16) + ('_' << 24))
+
+/* PCI signature: "PCI " */
+#define PCI_SIGNATURE          (('P' << 0) + ('C' << 8) + ('I' << 16) + (' ' << 24))
+
+/* PCI service signature: "$PCI" */
+#define PCI_SERVICE            (('$' << 0) + ('P' << 8) + ('C' << 16) + ('I' << 24))
+
+/* PCI BIOS hardware mechanism flags */
+#define PCIBIOS_HW_TYPE1               0x01
+#define PCIBIOS_HW_TYPE2               0x02
+#define PCIBIOS_HW_TYPE1_SPEC          0x10
+#define PCIBIOS_HW_TYPE2_SPEC          0x20
+
+/*
+ * This is the standard structure used to identify the entry point
+ * to the BIOS32 Service Directory, as documented in
+ *     Standard BIOS 32-bit Service Directory Proposal
+ *     Revision 0.4 May 24, 1993
+ *     Phoenix Technologies Ltd.
+ *     Norwood, MA
+ * and the PCI BIOS specification.
+ */
+
+union bios32 {
+       struct {
+               unsigned long signature;        /* _32_ */
+               unsigned long entry;            /* 32 bit physical address */
+               unsigned char revision;         /* Revision level, 0 */
+               unsigned char length;           /* Length in paragraphs should be 01 */
+               unsigned char checksum;         /* All bytes must add up to zero */
+               unsigned char reserved[5];      /* Must be zero */
+       } fields;
+       char chars[16];
+};
+
+/*
+ * Physical address of the service directory.  I don't know if we're
+ * allowed to have more than one of these or not, so just in case
+ * we'll make pcibios_present() take a memory start parameter and store
+ * the array there.
+ */
+
+static struct {
+       unsigned long address;
+       unsigned short segment;
+} bios32_indirect = { 0, __KERNEL_CS };
+
+/*
+ * Returns the entry point for the given service, NULL on error
+ */
+
+static unsigned long bios32_service(unsigned long service)
+{
+       unsigned char return_code;      /* %al */
+       unsigned long address;          /* %ebx */
+       unsigned long length;           /* %ecx */
+       unsigned long entry;            /* %edx */
+       unsigned long flags;
+
+       __save_flags(flags); __cli();
+       __asm__("lcall *(%%edi); cld"
+               : "=a" (return_code),
+                 "=b" (address),
+                 "=c" (length),
+                 "=d" (entry)
+               : "0" (service),
+                 "1" (0),
+                 "D" (&bios32_indirect));
+       __restore_flags(flags);
+
+       switch (return_code) {
+               case 0:
+                       return address + entry;
+               case 0x80:      /* Not present */
+                       printk("bios32_service(0x%lx): not present\n", service);
+                       return 0;
+               default: /* Shouldn't happen */
+                       printk("bios32_service(0x%lx): returned 0x%x -- BIOS bug!\n",
+                               service, return_code);
+                       return 0;
+       }
+}
+
+static struct {
+       unsigned long address;
+       unsigned short segment;
+} pci_indirect = { 0, __KERNEL_CS };
+
+static int pci_bios_present;
+
+static int __devinit check_pcibios(void)
+{
+       u32 signature, eax, ebx, ecx;
+       u8 status, major_ver, minor_ver, hw_mech;
+       unsigned long flags, pcibios_entry;
+
+       if ((pcibios_entry = bios32_service(PCI_SERVICE))) {
+               pci_indirect.address = pcibios_entry + PAGE_OFFSET;
+
+               __save_flags(flags); __cli();
+               __asm__(
+                       "lcall *(%%edi); cld\n\t"
+                       "jc 1f\n\t"
+                       "xor %%ah, %%ah\n"
+                       "1:"
+                       : "=d" (signature),
+                         "=a" (eax),
+                         "=b" (ebx),
+                         "=c" (ecx)
+                       : "1" (PCIBIOS_PCI_BIOS_PRESENT),
+                         "D" (&pci_indirect)
+                       : "memory");
+               __restore_flags(flags);
+
+               status = (eax >> 8) & 0xff;
+               hw_mech = eax & 0xff;
+               major_ver = (ebx >> 8) & 0xff;
+               minor_ver = ebx & 0xff;
+               if (pcibios_last_bus < 0)
+                       pcibios_last_bus = ecx & 0xff;
+               DBG("PCI: BIOS probe returned s=%02x hw=%02x ver=%02x.%02x l=%02x\n",
+                       status, hw_mech, major_ver, minor_ver, pcibios_last_bus);
+               if (status || signature != PCI_SIGNATURE) {
+                       printk (KERN_ERR "PCI: BIOS BUG #%x[%08x] found\n",
+                               status, signature);
+                       return 0;
+               }
+               printk("PCI: PCI BIOS revision %x.%02x entry at 0x%lx, last bus=%d\n",
+                       major_ver, minor_ver, pcibios_entry, pcibios_last_bus);
+#ifdef CONFIG_PCI_DIRECT
+               if (!(hw_mech & PCIBIOS_HW_TYPE1))
+                       pci_probe &= ~PCI_PROBE_CONF1;
+               if (!(hw_mech & PCIBIOS_HW_TYPE2))
+                       pci_probe &= ~PCI_PROBE_CONF2;
+#endif
+               return 1;
+       }
+       return 0;
+}
+
+static int __devinit pci_bios_find_device (unsigned short vendor, unsigned short device_id,
+                                       unsigned short index, unsigned char *bus, unsigned char *device_fn)
+{
+       unsigned short bx;
+       unsigned short ret;
+
+       __asm__("lcall *(%%edi); cld\n\t"
+               "jc 1f\n\t"
+               "xor %%ah, %%ah\n"
+               "1:"
+               : "=b" (bx),
+                 "=a" (ret)
+               : "1" (PCIBIOS_FIND_PCI_DEVICE),
+                 "c" (device_id),
+                 "d" (vendor),
+                 "S" ((int) index),
+                 "D" (&pci_indirect));
+       *bus = (bx >> 8) & 0xff;
+       *device_fn = bx & 0xff;
+       return (int) (ret & 0xff00) >> 8;
+}
+
+static int pci_bios_read (int seg, int bus, int dev, int fn, int reg, int len, u32 *value)
+{
+       unsigned long result = 0;
+       unsigned long flags;
+       unsigned long bx = ((bus << 8) | (dev << 3) | fn);
+
+       if (!value || (bus > 255) || (dev > 31) || (fn > 7) || (reg > 255))
+               return -EINVAL;
+
+       spin_lock_irqsave(&pci_config_lock, flags);
+
+       switch (len) {
+       case 1:
+               __asm__("lcall *(%%esi); cld\n\t"
+                       "jc 1f\n\t"
+                       "xor %%ah, %%ah\n"
+                       "1:"
+                       : "=c" (*value),
+                         "=a" (result)
+                       : "1" (PCIBIOS_READ_CONFIG_BYTE),
+                         "b" (bx),
+                         "D" ((long)reg),
+                         "S" (&pci_indirect));
+               break;
+       case 2:
+               __asm__("lcall *(%%esi); cld\n\t"
+                       "jc 1f\n\t"
+                       "xor %%ah, %%ah\n"
+                       "1:"
+                       : "=c" (*value),
+                         "=a" (result)
+                       : "1" (PCIBIOS_READ_CONFIG_WORD),
+                         "b" (bx),
+                         "D" ((long)reg),
+                         "S" (&pci_indirect));
+               break;
+       case 4:
+               __asm__("lcall *(%%esi); cld\n\t"
+                       "jc 1f\n\t"
+                       "xor %%ah, %%ah\n"
+                       "1:"
+                       : "=c" (*value),
+                         "=a" (result)
+                       : "1" (PCIBIOS_READ_CONFIG_DWORD),
+                         "b" (bx),
+                         "D" ((long)reg),
+                         "S" (&pci_indirect));
+               break;
+       }
+
+       spin_unlock_irqrestore(&pci_config_lock, flags);
+
+       return (int)((result & 0xff00) >> 8);
+}
+
+static int pci_bios_write (int seg, int bus, int dev, int fn, int reg, int len, u32 value)
+{
+       unsigned long result = 0;
+       unsigned long flags;
+       unsigned long bx = ((bus << 8) | (dev << 3) | fn);
+
+       if ((bus > 255) || (dev > 31) || (fn > 7) || (reg > 255)) 
+               return -EINVAL;
+
+       spin_lock_irqsave(&pci_config_lock, flags);
+
+       switch (len) {
+       case 1:
+               __asm__("lcall *(%%esi); cld\n\t"
+                       "jc 1f\n\t"
+                       "xor %%ah, %%ah\n"
+                       "1:"
+                       : "=a" (result)
+                       : "0" (PCIBIOS_WRITE_CONFIG_BYTE),
+                         "c" (value),
+                         "b" (bx),
+                         "D" ((long)reg),
+                         "S" (&pci_indirect));
+               break;
+       case 2:
+               __asm__("lcall *(%%esi); cld\n\t"
+                       "jc 1f\n\t"
+                       "xor %%ah, %%ah\n"
+                       "1:"
+                       : "=a" (result)
+                       : "0" (PCIBIOS_WRITE_CONFIG_WORD),
+                         "c" (value),
+                         "b" (bx),
+                         "D" ((long)reg),
+                         "S" (&pci_indirect));
+               break;
+       case 4:
+               __asm__("lcall *(%%esi); cld\n\t"
+                       "jc 1f\n\t"
+                       "xor %%ah, %%ah\n"
+                       "1:"
+                       : "=a" (result)
+                       : "0" (PCIBIOS_WRITE_CONFIG_DWORD),
+                         "c" (value),
+                         "b" (bx),
+                         "D" ((long)reg),
+                         "S" (&pci_indirect));
+               break;
+       }
+
+       spin_unlock_irqrestore(&pci_config_lock, flags);
+
+       return (int)((result & 0xff00) >> 8);
+}
+
+static int pci_bios_read_config_byte(struct pci_dev *dev, int where, u8 *value)
+{
+       int result; 
+       u32 data;
+
+       if (!value) 
+               return -EINVAL;
+
+       result = pci_bios_read(0, dev->bus->number, PCI_SLOT(dev->devfn), 
+               PCI_FUNC(dev->devfn), where, 1, &data);
+
+       *value = (u8)data;
+
+       return result;
+}
+
+static int pci_bios_read_config_word(struct pci_dev *dev, int where, u16 *value)
+{
+       int result; 
+       u32 data;
+
+       if (!value) 
+               return -EINVAL;
+
+       result = pci_bios_read(0, dev->bus->number, PCI_SLOT(dev->devfn), 
+               PCI_FUNC(dev->devfn), where, 2, &data);
+
+       *value = (u16)data;
+
+       return result;
+}
+
+static int pci_bios_read_config_dword(struct pci_dev *dev, int where, u32 *value)
+{
+       if (!value) 
+               return -EINVAL;
+       
+       return pci_bios_read(0, dev->bus->number, PCI_SLOT(dev->devfn), 
+               PCI_FUNC(dev->devfn), where, 4, value);
+}
+
+static int pci_bios_write_config_byte(struct pci_dev *dev, int where, u8 value)
+{
+       return pci_bios_write(0, dev->bus->number, PCI_SLOT(dev->devfn), 
+               PCI_FUNC(dev->devfn), where, 1, value);
+}
+
+static int pci_bios_write_config_word(struct pci_dev *dev, int where, u16 value)
+{
+       return pci_bios_write(0, dev->bus->number, PCI_SLOT(dev->devfn), 
+               PCI_FUNC(dev->devfn), where, 2, value);
+}
+
+static int pci_bios_write_config_dword(struct pci_dev *dev, int where, u32 value)
+{
+       return pci_bios_write(0, dev->bus->number, PCI_SLOT(dev->devfn), 
+               PCI_FUNC(dev->devfn), where, 4, value);
+}
+
+
+/*
+ * Function table for BIOS32 access
+ */
+
+static struct pci_ops pci_bios_access = {
+      pci_bios_read_config_byte,
+      pci_bios_read_config_word,
+      pci_bios_read_config_dword,
+      pci_bios_write_config_byte,
+      pci_bios_write_config_word,
+      pci_bios_write_config_dword
+};
+
+/*
+ * Try to find PCI BIOS.
+ */
+
+static struct pci_ops * __devinit pci_find_bios(void)
+{
+       union bios32 *check;
+       unsigned char sum;
+       int i, length;
+
+       /*
+        * Follow the standard procedure for locating the BIOS32 Service
+        * directory by scanning the permissible address range from
+        * 0xe0000 through 0xfffff for a valid BIOS32 structure.
+        */
+
+       for (check = (union bios32 *) __va(0xe0000);
+            check <= (union bios32 *) __va(0xffff0);
+            ++check) {
+               if (check->fields.signature != BIOS32_SIGNATURE)
+                       continue;
+               length = check->fields.length * 16;
+               if (!length)
+                       continue;
+               sum = 0;
+               for (i = 0; i < length ; ++i)
+                       sum += check->chars[i];
+               if (sum != 0)
+                       continue;
+               if (check->fields.revision != 0) {
+                       printk("PCI: unsupported BIOS32 revision %d at 0x%p\n",
+                               check->fields.revision, check);
+                       continue;
+               }
+               DBG("PCI: BIOS32 Service Directory structure at 0x%p\n", check);
+               if (check->fields.entry >= 0x100000) {
+                       printk("PCI: BIOS32 entry (0x%p) in high memory, cannot use.\n", check);
+                       return NULL;
+               } else {
+                       unsigned long bios32_entry = check->fields.entry;
+                       DBG("PCI: BIOS32 Service Directory entry at 0x%lx\n", bios32_entry);
+                       bios32_indirect.address = bios32_entry + PAGE_OFFSET;
+                       if (check_pcibios())
+                               return &pci_bios_access;
+               }
+               break;  /* Hopefully more than one BIOS32 cannot happen... */
+       }
+
+       return NULL;
+}
+
+/*
+ * Sort the device list according to PCI BIOS. Nasty hack, but since some
+ * fool forgot to define the `correct' device order in the PCI BIOS specs
+ * and we want to be (possibly bug-to-bug ;-]) compatible with older kernels
+ * which used BIOS ordering, we are bound to do this...
+ */
+
+static void __devinit pcibios_sort(void)
+{
+       LIST_HEAD(sorted_devices);
+       struct list_head *ln;
+       struct pci_dev *dev, *d;
+       int idx, found;
+       unsigned char bus, devfn;
+
+       DBG("PCI: Sorting device list...\n");
+       while (!list_empty(&pci_devices)) {
+               ln = pci_devices.next;
+               dev = pci_dev_g(ln);
+               idx = found = 0;
+               while (pci_bios_find_device(dev->vendor, dev->device, idx, &bus, &devfn) == PCIBIOS_SUCCESSFUL) {
+                       idx++;
+                       for (ln=pci_devices.next; ln != &pci_devices; ln=ln->next) {
+                               d = pci_dev_g(ln);
+                               if (d->bus->number == bus && d->devfn == devfn) {
+                                       list_del(&d->global_list);
+                                       list_add_tail(&d->global_list, &sorted_devices);
+                                       if (d == dev)
+                                               found = 1;
+                                       break;
+                               }
+                       }
+                       if (ln == &pci_devices) {
+                               printk("PCI: BIOS reporting unknown device %02x:%02x\n", bus, devfn);
+                               /*
+                                * We must not continue scanning as several buggy BIOSes
+                                * return garbage after the last device. Grr.
+                                */
+                               break;
+                       }
+               }
+               if (!found) {
+                       printk("PCI: Device %02x:%02x not found by BIOS\n",
+                               dev->bus->number, dev->devfn);
+                       list_del(&dev->global_list);
+                       list_add_tail(&dev->global_list, &sorted_devices);
+               }
+       }
+       list_splice(&sorted_devices, &pci_devices);
+}
+
+/*
+ *  BIOS Functions for IRQ Routing
+ */
+
+struct irq_routing_options {
+       u16 size;
+       struct irq_info *table;
+       u16 segment;
+} __attribute__((packed));
+
+struct irq_routing_table * __devinit pcibios_get_irq_routing_table(void)
+{
+       struct irq_routing_options opt;
+       struct irq_routing_table *rt = NULL;
+       int ret, map;
+       unsigned long page;
+
+       if (!pci_bios_present)
+               return NULL;
+       page = __get_free_page(GFP_KERNEL);
+       if (!page)
+               return NULL;
+       opt.table = (struct irq_info *) page;
+       opt.size = PAGE_SIZE;
+       opt.segment = __KERNEL_DS;
+
+       DBG("PCI: Fetching IRQ routing table... ");
+       __asm__("push %%es\n\t"
+               "push %%ds\n\t"
+               "pop  %%es\n\t"
+               "lcall *(%%esi); cld\n\t"
+               "pop %%es\n\t"
+               "jc 1f\n\t"
+               "xor %%ah, %%ah\n"
+               "1:"
+               : "=a" (ret),
+                 "=b" (map)
+               : "0" (PCIBIOS_GET_ROUTING_OPTIONS),
+                 "1" (0),
+                 "D" ((long) &opt),
+                 "S" (&pci_indirect));
+       DBG("OK  ret=%d, size=%d, map=%x\n", ret, opt.size, map);
+       if (ret & 0xff00)
+               printk(KERN_ERR "PCI: Error %02x when fetching IRQ routing table.\n", (ret >> 8) & 0xff);
+       else if (opt.size) {
+               rt = kmalloc(sizeof(struct irq_routing_table) + opt.size, GFP_KERNEL);
+               if (rt) {
+                       memset(rt, 0, sizeof(struct irq_routing_table));
+                       rt->size = opt.size + sizeof(struct irq_routing_table);
+                       rt->exclusive_irqs = map;
+                       memcpy(rt->slots, (void *) page, opt.size);
+                       printk("PCI: Using BIOS Interrupt Routing Table\n");
+               }
+       }
+       free_page(page);
+       return rt;
+}
+
+
+int pcibios_set_irq_routing(struct pci_dev *dev, int pin, int irq)
+{
+       int ret;
+
+       __asm__("lcall *(%%esi); cld\n\t"
+               "jc 1f\n\t"
+               "xor %%ah, %%ah\n"
+               "1:"
+               : "=a" (ret)
+               : "0" (PCIBIOS_SET_PCI_HW_INT),
+                 "b" ((dev->bus->number << 8) | dev->devfn),
+                 "c" ((irq << 8) | (pin + 10)),
+                 "S" (&pci_indirect));
+       return !(ret & 0xff00);
+}
+
+#endif
+
+/*
+ * Several buggy motherboards address only 16 devices and mirror
+ * them to next 16 IDs. We try to detect this `feature' on all
+ * primary buses (those containing host bridges as they are
+ * expected to be unique) and remove the ghost devices.
+ */
+
+static void __devinit pcibios_fixup_ghosts(struct pci_bus *b)
+{
+       struct list_head *ln, *mn;
+       struct pci_dev *d, *e;
+       int mirror = PCI_DEVFN(16,0);
+       int seen_host_bridge = 0;
+       int i;
+
+       DBG("PCI: Scanning for ghost devices on bus %d\n", b->number);
+       for (ln=b->devices.next; ln != &b->devices; ln=ln->next) {
+               d = pci_dev_b(ln);
+               if ((d->class >> 8) == PCI_CLASS_BRIDGE_HOST)
+                       seen_host_bridge++;
+               for (mn=ln->next; mn != &b->devices; mn=mn->next) {
+                       e = pci_dev_b(mn);
+                       if (e->devfn != d->devfn + mirror ||
+                           e->vendor != d->vendor ||
+                           e->device != d->device ||
+                           e->class != d->class)
+                               continue;
+                       for(i=0; i<PCI_NUM_RESOURCES; i++)
+                               if (e->resource[i].start != d->resource[i].start ||
+                                   e->resource[i].end != d->resource[i].end ||
+                                   e->resource[i].flags != d->resource[i].flags)
+                                       continue;
+                       break;
+               }
+               if (mn == &b->devices)
+                       return;
+       }
+       if (!seen_host_bridge)
+               return;
+       printk("PCI: Ignoring ghost devices on bus %02x\n", b->number);
+
+       ln = &b->devices;
+       while (ln->next != &b->devices) {
+               d = pci_dev_b(ln->next);
+               if (d->devfn >= mirror) {
+                       list_del(&d->global_list);
+                       list_del(&d->bus_list);
+                       kfree(d);
+               } else
+                       ln = ln->next;
+       }
+}
+
+/*
+ * Discover remaining PCI buses in case there are peer host bridges.
+ * We use the number of last PCI bus provided by the PCI BIOS.
+ */
+static void __devinit pcibios_fixup_peer_bridges(void)
+{
+       int n;
+       struct pci_bus bus;
+       struct pci_dev dev;
+       u16 l;
+
+       if (pcibios_last_bus <= 0 || pcibios_last_bus >= 0xff)
+               return;
+       DBG("PCI: Peer bridge fixup\n");
+       for (n=0; n <= pcibios_last_bus; n++) {
+               if (pci_bus_exists(&pci_root_buses, n))
+                       continue;
+               bus.number = n;
+               bus.ops = pci_root_ops;
+               dev.bus = &bus;
+               for(dev.devfn=0; dev.devfn<256; dev.devfn += 8)
+                       if (!pci_read_config_word(&dev, PCI_VENDOR_ID, &l) &&
+                           l != 0x0000 && l != 0xffff) {
+                               DBG("Found device at %02x:%02x [%04x]\n", n, dev.devfn, l);
+                               printk("PCI: Discovered peer bus %02x\n", n);
+                               pci_scan_bus(n, pci_root_ops, NULL);
+                               break;
+                       }
+       }
+}
+
+/*
+ * Exceptions for specific devices. Usually work-arounds for fatal design flaws.
+ */
+
+static void __devinit pci_fixup_i450nx(struct pci_dev *d)
+{
+       /*
+        * i450NX -- Find and scan all secondary buses on all PXB's.
+        */
+       int pxb, reg;
+       u8 busno, suba, subb;
+       printk("PCI: Searching for i450NX host bridges on %s\n", d->slot_name);
+       reg = 0xd0;
+       for(pxb=0; pxb<2; pxb++) {
+               pci_read_config_byte(d, reg++, &busno);
+               pci_read_config_byte(d, reg++, &suba);
+               pci_read_config_byte(d, reg++, &subb);
+               DBG("i450NX PXB %d: %02x/%02x/%02x\n", pxb, busno, suba, subb);
+               if (busno)
+                       pci_scan_bus(busno, pci_root_ops, NULL);        /* Bus A */
+               if (suba < subb)
+                       pci_scan_bus(suba+1, pci_root_ops, NULL);       /* Bus B */
+       }
+       pcibios_last_bus = -1;
+}
+
+static void __devinit pci_fixup_i450gx(struct pci_dev *d)
+{
+       /*
+        * i450GX and i450KX -- Find and scan all secondary buses.
+        * (called separately for each PCI bridge found)
+        */
+       u8 busno;
+       pci_read_config_byte(d, 0x4a, &busno);
+       printk("PCI: i440KX/GX host bridge %s: secondary bus %02x\n", d->slot_name, busno);
+       pci_scan_bus(busno, pci_root_ops, NULL);
+       pcibios_last_bus = -1;
+}
+
+static void __devinit  pci_fixup_umc_ide(struct pci_dev *d)
+{
+       /*
+        * UM8886BF IDE controller sets region type bits incorrectly,
+        * therefore they look like memory despite of them being I/O.
+        */
+       int i;
+
+       printk("PCI: Fixing base address flags for device %s\n", d->slot_name);
+       for(i=0; i<4; i++)
+               d->resource[i].flags |= PCI_BASE_ADDRESS_SPACE_IO;
+}
+
+static void __devinit pci_fixup_ide_bases(struct pci_dev *d)
+{
+       int i;
+
+       /*
+        * PCI IDE controllers use non-standard I/O port decoding, respect it.
+        */
+       if ((d->class >> 8) != PCI_CLASS_STORAGE_IDE)
+               return;
+       DBG("PCI: IDE base address fixup for %s\n", d->slot_name);
+       for(i=0; i<4; i++) {
+               struct resource *r = &d->resource[i];
+               if ((r->start & ~0x80) == 0x374) {
+                       r->start |= 2;
+                       r->end = r->start;
+               }
+       }
+}
+
+static void __devinit  pci_fixup_ide_trash(struct pci_dev *d)
+{
+       int i;
+
+       /*
+        * There exist PCI IDE controllers which have utter garbage
+        * in first four base registers. Ignore that.
+        */
+       DBG("PCI: IDE base address trash cleared for %s\n", d->slot_name);
+       for(i=0; i<4; i++)
+               d->resource[i].start = d->resource[i].end = d->resource[i].flags = 0;
+}
+
+static void __devinit  pci_fixup_latency(struct pci_dev *d)
+{
+       /*
+        *  SiS 5597 and 5598 chipsets require latency timer set to
+        *  at most 32 to avoid lockups.
+        */
+       DBG("PCI: Setting max latency to 32\n");
+       pcibios_max_latency = 32;
+}
+
+static void __devinit pci_fixup_piix4_acpi(struct pci_dev *d)
+{
+       /*
+        * PIIX4 ACPI device: hardwired IRQ9
+        */
+       d->irq = 9;
+}
+
+/*
+ * Nobody seems to know what this does. Damn.
+ *
+ * But it does seem to fix some unspecified problem
+ * with 'movntq' copies on Athlons.
+ *
+ * VIA 8363 chipset:
+ *  - bit 7 at offset 0x55: Debug (RW)
+ */
+static void __init pci_fixup_via_athlon_bug(struct pci_dev *d)
+{
+       u8 v;
+       pci_read_config_byte(d, 0x55, &v);
+       if (v & 0x80) {
+               printk("Trying to stomp on Athlon bug...\n");
+               v &= 0x7f; /* clear bit 55.7 */
+               pci_write_config_byte(d, 0x55, v);
+       }
+}
+
+struct pci_fixup pcibios_fixups[] = {
+       { PCI_FIXUP_HEADER,     PCI_VENDOR_ID_INTEL,    PCI_DEVICE_ID_INTEL_82451NX,    pci_fixup_i450nx },
+       { PCI_FIXUP_HEADER,     PCI_VENDOR_ID_INTEL,    PCI_DEVICE_ID_INTEL_82454GX,    pci_fixup_i450gx },
+       { PCI_FIXUP_HEADER,     PCI_VENDOR_ID_UMC,      PCI_DEVICE_ID_UMC_UM8886BF,     pci_fixup_umc_ide },
+       { PCI_FIXUP_HEADER,     PCI_VENDOR_ID_SI,       PCI_DEVICE_ID_SI_5513,          pci_fixup_ide_trash },
+       { PCI_FIXUP_HEADER,     PCI_ANY_ID,             PCI_ANY_ID,                     pci_fixup_ide_bases },
+       { PCI_FIXUP_HEADER,     PCI_VENDOR_ID_SI,       PCI_DEVICE_ID_SI_5597,          pci_fixup_latency },
+       { PCI_FIXUP_HEADER,     PCI_VENDOR_ID_SI,       PCI_DEVICE_ID_SI_5598,          pci_fixup_latency },
+       { PCI_FIXUP_HEADER,     PCI_VENDOR_ID_INTEL,    PCI_DEVICE_ID_INTEL_82371AB_3,  pci_fixup_piix4_acpi },
+       { PCI_FIXUP_HEADER,     PCI_VENDOR_ID_VIA,      PCI_DEVICE_ID_VIA_8363_0,       pci_fixup_via_athlon_bug },
+       { 0 }
+};
+
+/*
+ *  Called after each bus is probed, but before its children
+ *  are examined.
+ */
+
+void __devinit  pcibios_fixup_bus(struct pci_bus *b)
+{
+       pcibios_fixup_ghosts(b);
+       pci_read_bridge_bases(b);
+}
+
+
+void __devinit pcibios_config_init(void)
+{
+       /*
+        * Try all known PCI access methods. Note that we support using 
+        * both PCI BIOS and direct access, with a preference for direct.
+        */
+
+#ifdef CONFIG_PCI_BIOS
+       if ((pci_probe & PCI_PROBE_BIOS) 
+               && ((pci_root_ops = pci_find_bios()))) {
+               pci_probe |= PCI_BIOS_SORT;
+               pci_bios_present = 1;
+               pci_config_read = pci_bios_read;
+               pci_config_write = pci_bios_write;
+       }
+#endif
+
+#ifdef CONFIG_PCI_DIRECT
+       if ((pci_probe & (PCI_PROBE_CONF1 | PCI_PROBE_CONF2)) 
+               && (pci_root_ops = pci_check_direct())) {
+               if (pci_root_ops == &pci_direct_conf1) {
+                       pci_config_read = pci_conf1_read;
+                       pci_config_write = pci_conf1_write;
+               }
+               else {
+                       pci_config_read = pci_conf2_read;
+                       pci_config_write = pci_conf2_write;
+               }
+       }
+#endif
+
+       return;
+}
+
+void __init pcibios_init(void)
+{
+       if (!pci_root_ops)
+               pcibios_config_init();
+       if (!pci_root_ops) {
+               printk("PCI: System does not support PCI\n");
+               return;
+       }
+
+       printk("PCI: Probing PCI hardware\n");
+       pci_root_bus = pci_scan_bus(0, pci_root_ops, NULL);
+
+       pcibios_irq_init();
+       pcibios_fixup_peer_bridges();
+       pcibios_fixup_irqs();
+       pcibios_resource_survey();
+
+#ifdef CONFIG_PCI_BIOS
+       if ((pci_probe & PCI_BIOS_SORT) && !(pci_probe & PCI_NO_SORT))
+               pcibios_sort();
+#endif
+}
+
+char * __devinit  pcibios_setup(char *str)
+{
+       if (!strcmp(str, "off")) {
+               pci_probe = 0;
+               return NULL;
+       }
+#ifdef CONFIG_PCI_BIOS
+       else if (!strcmp(str, "bios")) {
+               pci_probe = PCI_PROBE_BIOS;
+               return NULL;
+       } else if (!strcmp(str, "nobios")) {
+               pci_probe &= ~PCI_PROBE_BIOS;
+               return NULL;
+       } else if (!strcmp(str, "nosort")) {
+               pci_probe |= PCI_NO_SORT;
+               return NULL;
+       } else if (!strcmp(str, "biosirq")) {
+               pci_probe |= PCI_BIOS_IRQ_SCAN;
+               return NULL;
+       }
+#endif
+#ifdef CONFIG_PCI_DIRECT
+       else if (!strcmp(str, "conf1")) {
+               pci_probe = PCI_PROBE_CONF1 | PCI_NO_CHECKS;
+               return NULL;
+       }
+       else if (!strcmp(str, "conf2")) {
+               pci_probe = PCI_PROBE_CONF2 | PCI_NO_CHECKS;
+               return NULL;
+       }
+#endif
+       else if (!strcmp(str, "rom")) {
+               pci_probe |= PCI_ASSIGN_ROMS;
+               return NULL;
+       } else if (!strcmp(str, "assign-busses")) {
+               pci_probe |= PCI_ASSIGN_ALL_BUSSES;
+               return NULL;
+       } else if (!strncmp(str, "irqmask=", 8)) {
+               pcibios_irq_mask = simple_strtol(str+8, NULL, 0);
+               return NULL;
+       } else if (!strncmp(str, "lastbus=", 8)) {
+               pcibios_last_bus = simple_strtol(str+8, NULL, 0);
+               return NULL;
+       }
+       return str;
+}
+
+unsigned int pcibios_assign_all_busses(void)
+{
+       return (pci_probe & PCI_ASSIGN_ALL_BUSSES) ? 1 : 0;
+}
+
+int pcibios_enable_device(struct pci_dev *dev)
+{
+       int err;
+
+       if ((err = pcibios_enable_resources(dev)) < 0)
+               return err;
+       pcibios_enable_irq(dev);
+       return 0;
+}
diff --git a/xen-2.4.16/arch/i386/process.c b/xen-2.4.16/arch/i386/process.c
new file mode 100644 (file)
index 0000000..11cd5a1
--- /dev/null
@@ -0,0 +1,402 @@
+/*
+ *  linux/arch/i386/kernel/process.c
+ *
+ *  Copyright (C) 1995  Linus Torvalds
+ *
+ *  Pentium III FXSR, SSE support
+ *     Gareth Hughes <gareth@valinux.com>, May 2000
+ */
+
+/*
+ * This file handles the architecture-dependent parts of process handling..
+ */
+
+#define __KERNEL_SYSCALLS__
+#include <stdarg.h>
+
+#include <xeno/config.h>
+#include <xeno/lib.h>
+#include <xeno/errno.h>
+#include <xeno/sched.h>
+#include <xeno/smp.h>
+#include <asm/ptrace.h>
+#include <xeno/delay.h>
+#include <asm/mc146818rtc.h>
+
+#include <asm/system.h>
+#include <asm/io.h>
+#include <asm/processor.h>
+#include <asm/desc.h>
+#include <asm/i387.h>
+
+#include <xeno/irq.h>
+#include <xeno/event.h>
+
+asmlinkage void ret_from_newdomain(void) __asm__("ret_from_newdomain");
+
+int hlt_counter;
+
+void disable_hlt(void)
+{
+    hlt_counter++;
+}
+
+void enable_hlt(void)
+{
+    hlt_counter--;
+}
+
+/*
+ * We use this if we don't have any better
+ * idle routine..
+ */
+static void default_idle(void)
+{
+    if (!hlt_counter) {
+        __cli();
+        if (!current->hyp_events)
+            safe_halt();
+        else
+            __sti();
+    }
+}
+
+/*
+ * The idle thread. There's no useful work to be
+ * done, so just try to conserve power and have a
+ * low exit latency (ie sit in a loop waiting for
+ * somebody to say that they'd like to reschedule)
+ */
+void cpu_idle (void)
+{
+    ASSERT(current->domain == IDLE_DOMAIN_ID);
+    current->has_cpu = 1;
+
+    /*
+     * Declares CPU setup done to the boot processor.
+     * Therefore memory barrier to ensure state is visible.
+     */
+    smp_mb();
+    init_idle();
+
+    for ( ; ; )
+    {
+        while (!current->hyp_events)
+            default_idle();
+        do_hyp_events();
+    }
+}
+
+static long no_idt[2];
+static int reboot_mode;
+int reboot_thru_bios = 0;
+
+#ifdef CONFIG_SMP
+int reboot_smp = 0;
+static int reboot_cpu = -1;
+/* shamelessly grabbed from lib/vsprintf.c for readability */
+#define is_digit(c)    ((c) >= '0' && (c) <= '9')
+#endif
+
+
+static inline void kb_wait(void)
+{
+    int i;
+
+    for (i=0; i<0x10000; i++)
+        if ((inb_p(0x64) & 0x02) == 0)
+            break;
+}
+
+
+void machine_restart(char * __unused)
+{
+#if CONFIG_SMP
+    int cpuid;
+       
+    cpuid = GET_APIC_ID(apic_read(APIC_ID));
+
+    if (reboot_smp) {
+
+        /* check to see if reboot_cpu is valid 
+           if its not, default to the BSP */
+        if ((reboot_cpu == -1) ||  
+            (reboot_cpu > (NR_CPUS -1))  || 
+            !(phys_cpu_present_map & (1<<cpuid))) 
+            reboot_cpu = boot_cpu_physical_apicid;
+
+        reboot_smp = 0;  /* use this as a flag to only go through this once*/
+        /* re-run this function on the other CPUs
+           it will fall though this section since we have 
+           cleared reboot_smp, and do the reboot if it is the
+           correct CPU, otherwise it halts. */
+        if (reboot_cpu != cpuid)
+            smp_call_function((void *)machine_restart , NULL, 1, 0);
+    }
+
+    /* if reboot_cpu is still -1, then we want a tradional reboot, 
+       and if we are not running on the reboot_cpu,, halt */
+    if ((reboot_cpu != -1) && (cpuid != reboot_cpu)) {
+        for (;;)
+            __asm__ __volatile__ ("hlt");
+    }
+    /*
+     * Stop all CPUs and turn off local APICs and the IO-APIC, so
+     * other OSs see a clean IRQ state.
+     */
+    smp_send_stop();
+    disable_IO_APIC();
+#endif
+
+    if(!reboot_thru_bios) {
+        /* rebooting needs to touch the page at absolute addr 0 */
+        *((unsigned short *)__va(0x472)) = reboot_mode;
+        for (;;) {
+            int i;
+            for (i=0; i<100; i++) {
+                kb_wait();
+                udelay(50);
+                outb(0xfe,0x64);         /* pulse reset low */
+                udelay(50);
+            }
+            /* That didn't work - force a triple fault.. */
+            __asm__ __volatile__("lidt %0": :"m" (no_idt));
+            __asm__ __volatile__("int3");
+        }
+    }
+
+    panic("Need to reinclude BIOS reboot code\n");
+}
+
+void machine_halt(void)
+{
+    machine_restart(0);
+}
+
+void machine_power_off(void)
+{
+    machine_restart(0);
+}
+
+extern void show_trace(unsigned long* esp);
+
+void show_regs(struct pt_regs * regs)
+{
+    unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L;
+
+    printk("\n");
+    printk("EIP: %04x:[<%08lx>] CPU: %d",0xffff & regs->xcs,regs->eip, smp_processor_id());
+    if (regs->xcs & 3)
+        printk(" ESP: %04x:%08lx",0xffff & regs->xss,regs->esp);
+    printk(" EFLAGS: %08lx\n",regs->eflags);
+    printk("EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n",
+           regs->eax,regs->ebx,regs->ecx,regs->edx);
+    printk("ESI: %08lx EDI: %08lx EBP: %08lx",
+           regs->esi, regs->edi, regs->ebp);
+    printk(" DS: %04x ES: %04x\n",
+           0xffff & regs->xds,0xffff & regs->xes);
+
+    __asm__("movl %%cr0, %0": "=r" (cr0));
+    __asm__("movl %%cr2, %0": "=r" (cr2));
+    __asm__("movl %%cr3, %0": "=r" (cr3));
+    /* This could fault if %cr4 does not exist */
+    __asm__("1: movl %%cr4, %0         \n"
+            "2:                                \n"
+            ".section __ex_table,\"a\" \n"
+            ".long 1b,2b                       \n"
+            ".previous                 \n"
+            : "=r" (cr4): "0" (0));
+    printk("CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n", cr0, cr2, cr3, cr4);
+    show_trace(&regs->esp);
+}
+
+/*
+ * No need to lock the MM as we are the last user
+ */
+void release_segments(struct mm_struct *mm)
+{
+#if 0
+    void * ldt = mm.context.segments;
+
+    /*
+     * free the LDT
+     */
+    if (ldt) {
+        mm.context.segments = NULL;
+        clear_LDT();
+        vfree(ldt);
+    }
+#endif
+}
+
+
+/*
+ * Free current thread data structures etc..
+ */
+void exit_thread(void)
+{
+    /* nothing to do ... */
+}
+
+void flush_thread(void)
+{
+    struct task_struct *tsk = current;
+
+    memset(tsk->thread.debugreg, 0, sizeof(unsigned long)*8);
+    /*
+     * Forget coprocessor state..
+     */
+    clear_fpu(tsk);
+    tsk->flags &= ~PF_DONEFPUINIT;
+}
+
+void release_thread(struct task_struct *dead_task)
+{
+#if 0
+    if (dead_task->mm) {
+        void * ldt = dead_task->mm.context.segments;
+
+        // temporary debugging check
+        if (ldt) {
+            printk("WARNING: dead process %8s still has LDT? <%p>\n",
+                   dead_task->comm, ldt);
+            BUG();
+        }
+    }
+#endif
+}
+
+/*
+ * we do not have to muck with descriptors here, that is
+ * done in switch_mm() as needed.
+ */
+void copy_segments(struct task_struct *p, struct mm_struct *new_mm)
+{
+#if 0
+    struct mm_struct * old_mm;
+    void *old_ldt, *ldt;
+
+    ldt = NULL;
+    old_mm = current->mm;
+    if (old_mm && (old_ldt = old_mm.context.segments) != NULL) {
+        /*
+         * Completely new LDT, we initialize it from the parent:
+         */
+        ldt = vmalloc(LDT_ENTRIES*LDT_ENTRY_SIZE);
+        if (!ldt)
+            printk(KERN_WARNING "ldt allocation failed\n");
+        else
+            memcpy(ldt, old_ldt, LDT_ENTRIES*LDT_ENTRY_SIZE);
+    }
+    new_mm.context.segments = ldt;
+    new_mm.context.cpuvalid = ~0UL;    /* valid on all CPU's - they can't have stale data */
+#endif
+}
+
+
+void new_thread(struct task_struct *p,
+                unsigned long start_pc,
+                unsigned long start_stack,
+                unsigned long start_info)
+{
+    struct pt_regs * regs;
+
+    regs = ((struct pt_regs *) (THREAD_SIZE + (unsigned long) p)) - 1;
+    memset(regs, 0, sizeof(*regs));
+
+    /*
+     * Initial register values:
+     *  DS,ES,FS,GS = __GUEST_DS
+     *       CS:EIP = __GUEST_CS:start_pc
+     *       SS:ESP = __GUEST_DS:start_stack
+     *          ESI = start_info
+     *  [EAX,EBX,ECX,EDX,EDI,EBP are zero]
+     */
+    p->thread.fs = p->thread.gs = __GUEST_DS;
+    regs->xds = regs->xes = regs->xss = __GUEST_DS;
+    regs->xcs = __GUEST_CS;
+    regs->eip = start_pc;
+    regs->esp = start_stack;
+    regs->esi = start_info;
+
+    p->thread.esp = (unsigned long) regs;
+    p->thread.esp0 = (unsigned long) (regs+1);
+
+    p->thread.eip = (unsigned long) ret_from_newdomain;
+
+    __save_flags(regs->eflags);
+    regs->eflags |= X86_EFLAGS_IF;
+}
+
+
+/*
+ * This special macro can be used to load a debugging register
+ */
+#define loaddebug(thread,register) \
+               __asm__("movl %0,%%db" #register  \
+                       : /* no output */ \
+                       :"r" (thread->debugreg[register]))
+
+/*
+ *     switch_to(x,yn) should switch tasks from x to y.
+ *
+ * We fsave/fwait so that an exception goes off at the right time
+ * (as a call from the fsave or fwait in effect) rather than to
+ * the wrong process. Lazy FP saving no longer makes any sense
+ * with modern CPU's, and this simplifies a lot of things (SMP
+ * and UP become the same).
+ *
+ * NOTE! We used to use the x86 hardware context switching. The
+ * reason for not using it any more becomes apparent when you
+ * try to recover gracefully from saved state that is no longer
+ * valid (stale segment register values in particular). With the
+ * hardware task-switch, there is no way to fix up bad state in
+ * a reasonable manner.
+ *
+ * The fact that Intel documents the hardware task-switching to
+ * be slow is a fairly red herring - this code is not noticeably
+ * faster. However, there _is_ some room for improvement here,
+ * so the performance issues may eventually be a valid point.
+ * More important, however, is the fact that this allows us much
+ * more flexibility.
+ */
+/* NB. prev_p passed in %eax, next_p passed in %edx */
+void __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
+{
+    struct thread_struct *prev = &prev_p->thread,
+        *next = &next_p->thread;
+    struct tss_struct *tss = init_tss + smp_processor_id();
+
+    unlazy_fpu(prev_p);
+
+    tss->esp0 = next->esp0;
+    tss->esp1 = next->esp1;
+    tss->ss1  = next->ss1;
+
+    /*
+     * Save away %fs and %gs. No need to save %es and %ds, as
+     * those are always kernel segments while inside the kernel.
+     */
+    asm volatile("movl %%fs,%0":"=m" (*(int *)&prev->fs));
+    asm volatile("movl %%gs,%0":"=m" (*(int *)&prev->gs));
+
+    /*
+     * Restore %fs and %gs.
+     */
+    loadsegment(fs, next->fs);
+    loadsegment(gs, next->gs);
+
+    /*
+     * Now maybe reload the debug registers
+     */
+    if (next->debugreg[7]){
+        loaddebug(next, 0);
+        loaddebug(next, 1);
+        loaddebug(next, 2);
+        loaddebug(next, 3);
+        /* no 4 and 5 */
+        loaddebug(next, 6);
+        loaddebug(next, 7);
+    }
+
+}
diff --git a/xen-2.4.16/arch/i386/rwlock.c b/xen-2.4.16/arch/i386/rwlock.c
new file mode 100644 (file)
index 0000000..3b9b689
--- /dev/null
@@ -0,0 +1,33 @@
+#include <asm/atomic.h>
+#include <asm/rwlock.h>
+
+#if defined(CONFIG_SMP)
+asm(
+"
+.align  4
+.globl  __write_lock_failed
+__write_lock_failed:
+        " LOCK "addl    $" RW_LOCK_BIAS_STR ",(%eax)
+1:      rep; nop
+        cmpl    $" RW_LOCK_BIAS_STR ",(%eax)
+        jne     1b
+
+        " LOCK "subl    $" RW_LOCK_BIAS_STR ",(%eax)
+        jnz     __write_lock_failed
+        ret
+
+
+.align  4
+.globl  __read_lock_failed
+__read_lock_failed:
+        lock ; incl     (%eax)
+1:      rep; nop
+        cmpl    $1,(%eax)
+        js      1b
+
+        lock ; decl     (%eax)
+        js      __read_lock_failed
+        ret
+"
+);
+#endif
diff --git a/xen-2.4.16/arch/i386/setup.c b/xen-2.4.16/arch/i386/setup.c
new file mode 100644 (file)
index 0000000..5b5c73f
--- /dev/null
@@ -0,0 +1,333 @@
+
+#include <xeno/config.h>
+#include <xeno/init.h>
+#include <xeno/interrupt.h>
+#include <xeno/lib.h>
+#include <xeno/sched.h>
+#include <xeno/bootmem.h>
+#include <xeno/pci.h>
+#include <asm/bitops.h>
+#include <asm/smp.h>
+#include <asm/processor.h>
+#include <asm/mpspec.h>
+#include <asm/apic.h>
+#include <asm/desc.h>
+
+struct cpuinfo_x86 boot_cpu_data = { 0 };
+/* Lots of nice things, since we only target PPro+. */
+unsigned long mmu_cr4_features = X86_CR4_PSE | X86_CR4_PGE;
+unsigned long wait_init_idle;
+
+/* Standard macro to see if a specific flag is changeable */
+static inline int flag_is_changeable_p(u32 flag)
+{
+    u32 f1, f2;
+
+    asm("pushfl\n\t"
+        "pushfl\n\t"
+        "popl %0\n\t"
+        "movl %0,%1\n\t"
+        "xorl %2,%0\n\t"
+        "pushl %0\n\t"
+        "popfl\n\t"
+        "pushfl\n\t"
+        "popl %0\n\t"
+        "popfl\n\t"
+        : "=&r" (f1), "=&r" (f2)
+        : "ir" (flag));
+
+    return ((f1^f2) & flag) != 0;
+}
+
+/* Probe for the CPUID instruction */
+static int __init have_cpuid_p(void)
+{
+    return flag_is_changeable_p(X86_EFLAGS_ID);
+}
+
+void __init get_cpu_vendor(struct cpuinfo_x86 *c)
+{
+        char *v = c->x86_vendor_id;
+
+        if (!strcmp(v, "GenuineIntel"))
+                c->x86_vendor = X86_VENDOR_INTEL;
+        else if (!strcmp(v, "AuthenticAMD"))
+                c->x86_vendor = X86_VENDOR_AMD;
+        else if (!strcmp(v, "CyrixInstead"))
+                c->x86_vendor = X86_VENDOR_CYRIX;
+        else if (!strcmp(v, "UMC UMC UMC "))
+                c->x86_vendor = X86_VENDOR_UMC;
+        else if (!strcmp(v, "CentaurHauls"))
+                c->x86_vendor = X86_VENDOR_CENTAUR;
+        else if (!strcmp(v, "NexGenDriven"))
+                c->x86_vendor = X86_VENDOR_NEXGEN;
+        else if (!strcmp(v, "RiseRiseRise"))
+                c->x86_vendor = X86_VENDOR_RISE;
+        else if (!strcmp(v, "GenuineTMx86") ||
+                 !strcmp(v, "TransmetaCPU"))
+                c->x86_vendor = X86_VENDOR_TRANSMETA;
+        else
+                c->x86_vendor = X86_VENDOR_UNKNOWN;
+}
+
+static void __init init_intel(struct cpuinfo_x86 *c)
+{
+    /* SEP CPUID bug: Pentium Pro reports SEP but doesn't have it */
+    if ( c->x86 == 6 && c->x86_model < 3 && c->x86_mask < 3 )
+        clear_bit(X86_FEATURE_SEP, &c->x86_capability);
+}
+
+static void __init init_amd(struct cpuinfo_x86 *c)
+{
+    /* Bit 31 in normal CPUID used for nonstandard 3DNow ID;
+       3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */
+    clear_bit(0*32+31, &c->x86_capability);
+       
+    switch(c->x86)
+    {
+    case 5:
+        panic("AMD K6 is not supported.\n");
+    case 6:    /* An Athlon/Duron. We can trust the BIOS probably */
+        break;         
+    }
+}
+
+/*
+ * This does the hard work of actually picking apart the CPU stuff...
+ */
+void __init identify_cpu(struct cpuinfo_x86 *c)
+{
+    int junk, i;
+    u32 xlvl, tfms;
+
+    c->x86_vendor = X86_VENDOR_UNKNOWN;
+    c->cpuid_level = -1;       /* CPUID not detected */
+    c->x86_model = c->x86_mask = 0;    /* So far unknown... */
+    c->x86_vendor_id[0] = '\0'; /* Unset */
+    memset(&c->x86_capability, 0, sizeof c->x86_capability);
+
+    if ( !have_cpuid_p() )
+        panic("Ancient processors not supported\n");
+
+    /* Get vendor name */
+    cpuid(0x00000000, &c->cpuid_level,
+          (int *)&c->x86_vendor_id[0],
+          (int *)&c->x86_vendor_id[8],
+          (int *)&c->x86_vendor_id[4]);
+
+    get_cpu_vendor(c);
+               
+    if ( c->cpuid_level == 0 )
+        panic("Decrepit CPUID not supported\n");
+
+    cpuid(0x00000001, &tfms, &junk, &junk,
+          &c->x86_capability[0]);
+    c->x86 = (tfms >> 8) & 15;
+    c->x86_model = (tfms >> 4) & 15;
+    c->x86_mask = tfms & 15;
+
+    /* AMD-defined flags: level 0x80000001 */
+    xlvl = cpuid_eax(0x80000000);
+    if ( (xlvl & 0xffff0000) == 0x80000000 ) {
+        if ( xlvl >= 0x80000001 )
+            c->x86_capability[1] = cpuid_edx(0x80000001);
+    }
+
+    /* Transmeta-defined flags: level 0x80860001 */
+    xlvl = cpuid_eax(0x80860000);
+    if ( (xlvl & 0xffff0000) == 0x80860000 ) {
+        if (  xlvl >= 0x80860001 )
+            c->x86_capability[2] = cpuid_edx(0x80860001);
+    }
+
+    printk("CPU: Before vendor init, caps: %08x %08x %08x, vendor = %d\n",
+           c->x86_capability[0],
+           c->x86_capability[1],
+           c->x86_capability[2],
+           c->x86_vendor);
+
+    switch ( c->x86_vendor ) {
+    case X86_VENDOR_INTEL:
+        init_intel(c);
+        break;
+    case X86_VENDOR_AMD:
+        init_amd(c);
+        break;
+    default:
+        panic("Only support Intel processors (P6+)\n");
+    }
+       
+    printk("CPU caps: %08x %08x %08x %08x\n",
+           c->x86_capability[0],
+           c->x86_capability[1],
+           c->x86_capability[2],
+           c->x86_capability[3]);
+
+    /*
+     * On SMP, boot_cpu_data holds the common feature set between
+     * all CPUs; so make sure that we indicate which features are
+     * common between the CPUs.  The first time this routine gets
+     * executed, c == &boot_cpu_data.
+     */
+    if ( c != &boot_cpu_data ) {
+        /* AND the already accumulated flags with these */
+        for ( i = 0 ; i < NCAPINTS ; i++ )
+            boot_cpu_data.x86_capability[i] &= c->x86_capability[i];
+    }
+}
+
+
+unsigned long cpu_initialized;
+void __init cpu_init(void)
+{
+    int nr = smp_processor_id();
+    struct tss_struct * t = &init_tss[nr];
+    
+    if ( test_and_set_bit(nr, &cpu_initialized) )
+        panic("CPU#%d already initialized!!!\n", nr);
+    printk("Initializing CPU#%d\n", nr);
+
+    __asm__ __volatile__("lgdt %0": "=m" (gdt_descr));
+    __asm__ __volatile__("lidt %0": "=m" (idt_descr));
+
+    /* No nested task. */
+    __asm__("pushfl ; andl $0xffffbfff,(%esp) ; popfl");
+
+    /* Set up and load the per-CPU TSS and LDT. */
+    t->ss0  = __HYPERVISOR_DS;
+    t->esp0 = current->thread.esp0;
+    set_tss_desc(nr,t);
+    load_TR(nr);
+    __asm__ __volatile__("lldt %%ax"::"a" (0));
+
+    /* Clear all 6 debug registers. */
+#define CD(register) __asm__("movl %0,%%db" #register ::"r"(0) );
+    CD(0); CD(1); CD(2); CD(3); /* no db4 and db5 */; CD(6); CD(7);
+#undef CD
+
+    /* Stick the idle task on the run queue. */
+    (void)wake_up(current);
+}
+
+static void __init do_initcalls(void)
+{
+        initcall_t *call;
+
+        call = &__initcall_start;
+        do {
+                (*call)();
+                call++;
+        } while (call < &__initcall_end);
+}
+
+/*
+ * IBM-compatible BIOSes place drive info tables at initial interrupt
+ * vectors 0x41 and 0x46. These are in the for of 16-bit-mode far ptrs.
+ */
+struct drive_info_struct { unsigned char dummy[32]; } drive_info;
+void get_bios_driveinfo(void)
+{
+    unsigned long seg, off, tab1, tab2;
+
+    off  = (unsigned long)*(unsigned short *)(4*0x41+0);
+    seg  = (unsigned long)*(unsigned short *)(4*0x41+2);
+    tab1 = (seg<<4) + off;
+    
+    off  = (unsigned long)*(unsigned short *)(4*0x46+0);
+    seg  = (unsigned long)*(unsigned short *)(4*0x46+2);
+    tab2 = (seg<<4) + off;
+
+    printk("Reading BIOS drive-info tables at 0x%05lx and 0x%05lx\n", 
+           tab1, tab2);
+
+    memcpy(drive_info.dummy+ 0, (char *)tab1, 16);
+    memcpy(drive_info.dummy+16, (char *)tab2, 16);
+}
+
+
+unsigned long pci_mem_start = 0x10000000;
+
+void __init start_of_day(void)
+{
+    extern void trap_init(void);
+    extern void init_IRQ(void);
+    extern void time_init(void);
+    extern void softirq_init(void);
+    extern void timer_bh(void);
+    extern void tqueue_bh(void);
+    extern void immediate_bh(void);
+    extern void init_timervecs(void);
+    extern int  setup_network_devices(void);
+    extern void net_init(void);
+
+    unsigned long low_mem_size;
+    
+    /*
+     * We do this early, but tables are in the lowest 1MB (usually
+     * 0xfe000-0xfffff). Therefore they're unlikely to ever get clobbered.
+     */
+    get_bios_driveinfo();
+
+    /* Tell the PCI layer not to allocate too close to the RAM area.. */
+    low_mem_size = ((max_page << PAGE_SHIFT) + 0xfffff) & ~0xfffff;
+    if ( low_mem_size > pci_mem_start ) pci_mem_start = low_mem_size;
+    
+    identify_cpu(&boot_cpu_data); /* get CPU type info */
+    if ( cpu_has_fxsr ) set_in_cr4(X86_CR4_OSFXSR);
+    if ( cpu_has_xmm )  set_in_cr4(X86_CR4_OSXMMEXCPT);
+    find_smp_config();            /* find ACPI tables */
+    smp_alloc_memory();           /* trampoline which other CPUs jump at */
+    paging_init();                /* not much here now, but sets up fixmap */
+    if ( smp_found_config ) get_smp_config();
+    domain_init();
+    trap_init(); /*
+                  * installs trap (s/w exception) wrappers.
+                  * Most route via entry.S and thence back into traps.c
+                  * where a really simple handler does a panic.
+                  * Instead, we'll want to pass most back to a domain.
+                  */
+    init_IRQ();  /* installs simple interrupt wrappers. Starts HZ clock. */
+    time_init(); /* installs software handler for HZ clock. */
+    softirq_init();
+    init_timervecs();
+    init_bh(TIMER_BH, timer_bh);
+    init_bh(TQUEUE_BH, tqueue_bh);
+    init_bh(IMMEDIATE_BH, immediate_bh);
+    init_apic_mappings(); /* make APICs addressable in our pagetables. */
+
+    sti();
+
+#ifndef CONFIG_SMP    
+    APIC_init_uniprocessor();
+#else
+    smp_boot_cpus(); /*
+                      * Does loads of stuff, including kicking the local
+                      * APIC, and the IO APIC after other CPUs are booted.
+                      * Each IRQ is preferably handled by IO-APIC, but
+                      * fall thru to 8259A if we have to (but slower).
+                      */
+#endif
+
+    zap_low_mappings();
+    kmem_cache_init();
+    kmem_cache_sizes_init(max_page);
+#ifdef CONFIG_PCI
+    pci_init();
+#endif
+    do_initcalls();
+    if ( !setup_network_devices() )
+        panic("Must have a network device!\n");
+    net_init(); /* initializes virtual network system. */
+
+#ifdef CONFIG_SMP
+    wait_init_idle = cpu_online_map;
+    clear_bit(smp_processor_id(), &wait_init_idle);
+    smp_threads_ready = 1;
+    smp_commence(); /* Tell other CPUs that state of the world is stable. */
+    while (wait_init_idle) 
+    {
+        cpu_relax();
+        barrier();
+    }
+#endif
+}
diff --git a/xen-2.4.16/arch/i386/smp.c b/xen-2.4.16/arch/i386/smp.c
new file mode 100644 (file)
index 0000000..008d1aa
--- /dev/null
@@ -0,0 +1,578 @@
+/*
+ *     Intel SMP support routines.
+ *
+ *     (c) 1995 Alan Cox, Building #3 <alan@redhat.com>
+ *     (c) 1998-99, 2000 Ingo Molnar <mingo@redhat.com>
+ *
+ *     This code is released under the GNU General Public License version 2 or
+ *     later.
+ */
+
+#include <xeno/irq.h>
+#include <xeno/sched.h>
+#include <xeno/delay.h>
+#include <xeno/spinlock.h>
+#include <asm/smp.h>
+#include <asm/mc146818rtc.h>
+#include <asm/pgalloc.h>
+#include <asm/smpboot.h>
+
+/*
+ *     Some notes on x86 processor bugs affecting SMP operation:
+ *
+ *     Pentium, Pentium Pro, II, III (and all CPUs) have bugs.
+ *     The Linux implications for SMP are handled as follows:
+ *
+ *     Pentium III / [Xeon]
+ *             None of the E1AP-E3AP errata are visible to the user.
+ *
+ *     E1AP.   see PII A1AP
+ *     E2AP.   see PII A2AP
+ *     E3AP.   see PII A3AP
+ *
+ *     Pentium II / [Xeon]
+ *             None of the A1AP-A3AP errata are visible to the user.
+ *
+ *     A1AP.   see PPro 1AP
+ *     A2AP.   see PPro 2AP
+ *     A3AP.   see PPro 7AP
+ *
+ *     Pentium Pro
+ *             None of 1AP-9AP errata are visible to the normal user,
+ *     except occasional delivery of 'spurious interrupt' as trap #15.
+ *     This is very rare and a non-problem.
+ *
+ *     1AP.    Linux maps APIC as non-cacheable
+ *     2AP.    worked around in hardware
+ *     3AP.    fixed in C0 and above steppings microcode update.
+ *             Linux does not use excessive STARTUP_IPIs.
+ *     4AP.    worked around in hardware
+ *     5AP.    symmetric IO mode (normal Linux operation) not affected.
+ *             'noapic' mode has vector 0xf filled out properly.
+ *     6AP.    'noapic' mode might be affected - fixed in later steppings
+ *     7AP.    We do not assume writes to the LVT deassering IRQs
+ *     8AP.    We do not enable low power mode (deep sleep) during MP bootup
+ *     9AP.    We do not use mixed mode
+ *
+ *     Pentium
+ *             There is a marginal case where REP MOVS on 100MHz SMP
+ *     machines with B stepping processors can fail. XXX should provide
+ *     an L1cache=Writethrough or L1cache=off option.
+ *
+ *             B stepping CPUs may hang. There are hardware work arounds
+ *     for this. We warn about it in case your board doesnt have the work
+ *     arounds. Basically thats so I can tell anyone with a B stepping
+ *     CPU and SMP problems "tough".
+ *
+ *     Specific items [From Pentium Processor Specification Update]
+ *
+ *     1AP.    Linux doesn't use remote read
+ *     2AP.    Linux doesn't trust APIC errors
+ *     3AP.    We work around this
+ *     4AP.    Linux never generated 3 interrupts of the same priority
+ *             to cause a lost local interrupt.
+ *     5AP.    Remote read is never used
+ *     6AP.    not affected - worked around in hardware
+ *     7AP.    not affected - worked around in hardware
+ *     8AP.    worked around in hardware - we get explicit CS errors if not
+ *     9AP.    only 'noapic' mode affected. Might generate spurious
+ *             interrupts, we log only the first one and count the
+ *             rest silently.
+ *     10AP.   not affected - worked around in hardware
+ *     11AP.   Linux reads the APIC between writes to avoid this, as per
+ *             the documentation. Make sure you preserve this as it affects
+ *             the C stepping chips too.
+ *     12AP.   not affected - worked around in hardware
+ *     13AP.   not affected - worked around in hardware
+ *     14AP.   we always deassert INIT during bootup
+ *     15AP.   not affected - worked around in hardware
+ *     16AP.   not affected - worked around in hardware
+ *     17AP.   not affected - worked around in hardware
+ *     18AP.   not affected - worked around in hardware
+ *     19AP.   not affected - worked around in BIOS
+ *
+ *     If this sounds worrying believe me these bugs are either ___RARE___,
+ *     or are signal timing bugs worked around in hardware and there's
+ *     about nothing of note with C stepping upwards.
+ */
+
+/* The 'big kernel lock' */
+spinlock_t kernel_flag = SPIN_LOCK_UNLOCKED;
+
+struct tlb_state cpu_tlbstate[NR_CPUS] = {[0 ... NR_CPUS-1] = { 0 }};
+
+/*
+ * the following functions deal with sending IPIs between CPUs.
+ *
+ * We use 'broadcast', CPU->CPU IPIs and self-IPIs too.
+ */
+
+static inline int __prepare_ICR (unsigned int shortcut, int vector)
+{
+    return APIC_DM_FIXED | shortcut | vector | APIC_DEST_LOGICAL;
+}
+
+static inline int __prepare_ICR2 (unsigned int mask)
+{
+    return SET_APIC_DEST_FIELD(mask);
+}
+
+static inline void __send_IPI_shortcut(unsigned int shortcut, int vector)
+{
+    /*
+     * Subtle. In the case of the 'never do double writes' workaround
+     * we have to lock out interrupts to be safe.  As we don't care
+     * of the value read we use an atomic rmw access to avoid costly
+     * cli/sti.  Otherwise we use an even cheaper single atomic write
+     * to the APIC.
+        */
+    unsigned int cfg;
+
+    /*
+        * Wait for idle.
+        */
+    apic_wait_icr_idle();
+
+    /*
+        * No need to touch the target chip field
+        */
+    cfg = __prepare_ICR(shortcut, vector);
+
+    /*
+        * Send the IPI. The write to APIC_ICR fires this off.
+        */
+    apic_write_around(APIC_ICR, cfg);
+}
+
+void send_IPI_self(int vector)
+{
+    __send_IPI_shortcut(APIC_DEST_SELF, vector);
+}
+
+static inline void send_IPI_mask_bitmask(int mask, int vector)
+{
+    unsigned long cfg;
+    unsigned long flags;
+
+    __save_flags(flags);
+    __cli();
+
+               
+    /*
+     * Wait for idle.
+     */
+    apic_wait_icr_idle();
+               
+    /*
+     * prepare target chip field
+     */
+    cfg = __prepare_ICR2(mask);
+    apic_write_around(APIC_ICR2, cfg);
+               
+    /*
+     * program the ICR 
+     */
+    cfg = __prepare_ICR(0, vector);
+                       
+    /*
+     * Send the IPI. The write to APIC_ICR fires this off.
+     */
+    apic_write_around(APIC_ICR, cfg);
+
+    __restore_flags(flags);
+}
+
+static inline void send_IPI_mask_sequence(int mask, int vector)
+{
+    unsigned long cfg, flags;
+    unsigned int query_cpu, query_mask;
+
+    __save_flags(flags);
+    __cli();
+
+    for (query_cpu = 0; query_cpu < NR_CPUS; ++query_cpu) {
+        query_mask = 1 << query_cpu;
+        if (query_mask & mask) {
+               
+            /*
+             * Wait for idle.
+             */
+            apic_wait_icr_idle();
+               
+            /*
+             * prepare target chip field
+             */
+            cfg = __prepare_ICR2(cpu_to_logical_apicid(query_cpu));
+            apic_write_around(APIC_ICR2, cfg);
+               
+            /*
+             * program the ICR 
+             */
+            cfg = __prepare_ICR(0, vector);
+                       
+            /*
+             * Send the IPI. The write to APIC_ICR fires this off.
+             */
+            apic_write_around(APIC_ICR, cfg);
+        }
+    }
+    __restore_flags(flags);
+}
+
+static inline void send_IPI_mask(int mask, int vector)
+{
+    send_IPI_mask_bitmask(mask, vector);
+}
+
+static inline void send_IPI_allbutself(int vector)
+{
+    /*
+     * if there are no other CPUs in the system then
+     * we get an APIC send error if we try to broadcast.
+     * thus we have to avoid sending IPIs in this case.
+     */
+    if (!(smp_num_cpus > 1))
+        return;
+
+    __send_IPI_shortcut(APIC_DEST_ALLBUT, vector);
+}
+
+static inline void send_IPI_all(int vector)
+{
+    __send_IPI_shortcut(APIC_DEST_ALLINC, vector);
+}
+
+/*
+ *     Smarter SMP flushing macros. 
+ *             c/o Linus Torvalds.
+ *
+ *     These mean you can really definitely utterly forget about
+ *     writing to user space from interrupts. (Its not allowed anyway).
+ *
+ *     Optimizations Manfred Spraul <manfred@colorfullife.com>
+ */
+
+static volatile unsigned long flush_cpumask;
+static struct mm_struct * flush_mm;
+static unsigned long flush_va;
+static spinlock_t tlbstate_lock = SPIN_LOCK_UNLOCKED;
+#define FLUSH_ALL      0xffffffff
+
+/*
+ * We cannot call mmdrop() because we are in interrupt context, 
+ * instead update mm.cpu_vm_mask.
+ */
+static void inline leave_mm (unsigned long cpu)
+{
+    if (cpu_tlbstate[cpu].state == TLBSTATE_OK)
+        BUG();
+    clear_bit(cpu, &cpu_tlbstate[cpu].active_mm->cpu_vm_mask);
+}
+
+/*
+ *
+ * The flush IPI assumes that a thread switch happens in this order:
+ * [cpu0: the cpu that switches]
+ * 1) switch_mm() either 1a) or 1b)
+ * 1a) thread switch to a different mm
+ * 1a1) clear_bit(cpu, &old_mm.cpu_vm_mask);
+ *     Stop ipi delivery for the old mm. This is not synchronized with
+ *     the other cpus, but smp_invalidate_interrupt ignore flush ipis
+ *     for the wrong mm, and in the worst case we perform a superflous
+ *     tlb flush.
+ * 1a2) set cpu_tlbstate to TLBSTATE_OK
+ *     Now the smp_invalidate_interrupt won't call leave_mm if cpu0
+ *     was in lazy tlb mode.
+ * 1a3) update cpu_tlbstate[].active_mm
+ *     Now cpu0 accepts tlb flushes for the new mm.
+ * 1a4) set_bit(cpu, &new_mm.cpu_vm_mask);
+ *     Now the other cpus will send tlb flush ipis.
+ * 1a4) change cr3.
+ * 1b) thread switch without mm change
+ *     cpu_tlbstate[].active_mm is correct, cpu0 already handles
+ *     flush ipis.
+ * 1b1) set cpu_tlbstate to TLBSTATE_OK
+ * 1b2) test_and_set the cpu bit in cpu_vm_mask.
+ *     Atomically set the bit [other cpus will start sending flush ipis],
+ *     and test the bit.
+ * 1b3) if the bit was 0: leave_mm was called, flush the tlb.
+ * 2) switch %%esp, ie current
+ *
+ * The interrupt must handle 2 special cases:
+ * - cr3 is changed before %%esp, ie. it cannot use current->{active_,}mm.
+ * - the cpu performs speculative tlb reads, i.e. even if the cpu only
+ *   runs in kernel space, the cpu could load tlb entries for user space
+ *   pages.
+ *
+ * The good news is that cpu_tlbstate is local to each cpu, no
+ * write/read ordering problems.
+ */
+
+/*
+ * TLB flush IPI:
+ *
+ * 1) Flush the tlb entries if the cpu uses the mm that's being flushed.
+ * 2) Leave the mm if we are in the lazy tlb mode.
+ */
+
+asmlinkage void smp_invalidate_interrupt (void)
+{
+    unsigned long cpu = smp_processor_id();
+
+    if (!test_bit(cpu, &flush_cpumask))
+        return;
+    /* 
+     * This was a BUG() but until someone can quote me the
+     * line from the intel manual that guarantees an IPI to
+     * multiple CPUs is retried _only_ on the erroring CPUs
+     * its staying as a return
+     *
+     * BUG();
+     */
+                
+    if (flush_mm == cpu_tlbstate[cpu].active_mm) {
+        if (cpu_tlbstate[cpu].state == TLBSTATE_OK) {
+            if (flush_va == FLUSH_ALL)
+                local_flush_tlb();
+            else
+                __flush_tlb_one(flush_va);
+        } else
+            leave_mm(cpu);
+    }
+    ack_APIC_irq();
+    clear_bit(cpu, &flush_cpumask);
+}
+
+static void flush_tlb_others (unsigned long cpumask, struct mm_struct *mm,
+                              unsigned long va)
+{
+    /*
+     * A couple of (to be removed) sanity checks:
+        *
+        * - we do not send IPIs to not-yet booted CPUs.
+        * - current CPU must not be in mask
+        * - mask must exist :)
+        */
+    if (!cpumask)
+        BUG();
+    if ((cpumask & cpu_online_map) != cpumask)
+        BUG();
+    if (cpumask & (1 << smp_processor_id()))
+        BUG();
+    if (!mm)
+        BUG();
+
+       /*
+        * i'm not happy about this global shared spinlock in the
+        * MM hot path, but we'll see how contended it is.
+        * Temporarily this turns IRQs off, so that lockups are
+        * detected by the NMI watchdog.
+        */
+    spin_lock(&tlbstate_lock);
+       
+    flush_mm = mm;
+    flush_va = va;
+    atomic_set_mask(cpumask, &flush_cpumask);
+    /*
+     * We have to send the IPI only to
+     * CPUs affected.
+        */
+    send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR);
+
+    while (flush_cpumask)
+        /* nothing. lockup detection does not belong here */;
+
+    flush_mm = NULL;
+    flush_va = 0;
+    spin_unlock(&tlbstate_lock);
+}
+       
+void flush_tlb_current_task(void)
+{
+    struct mm_struct *mm = &current->mm;
+    unsigned long cpu_mask = mm->cpu_vm_mask & ~(1 << smp_processor_id());
+
+    local_flush_tlb();
+    if (cpu_mask)
+        flush_tlb_others(cpu_mask, mm, FLUSH_ALL);
+}
+
+void flush_tlb_mm (struct mm_struct * mm)
+{
+    unsigned long cpu_mask = mm->cpu_vm_mask & ~(1 << smp_processor_id());
+
+    if (current->active_mm == mm)
+        local_flush_tlb();
+    if (cpu_mask)
+        flush_tlb_others(cpu_mask, mm, FLUSH_ALL);
+}
+
+#if 0
+void flush_tlb_page(struct vm_area_struct * vma, unsigned long va)
+{
+    struct mm_struct *mm = vma->vm_mm;
+    unsigned long cpu_mask = mm.cpu_vm_mask & ~(1 << smp_processor_id());
+
+    if (current->active_mm == mm) {
+        if(current->mm)
+            __flush_tlb_one(va);
+        else
+            leave_mm(smp_processor_id());
+    }
+
+    if (cpu_mask)
+        flush_tlb_others(cpu_mask, mm, va);
+}
+#endif
+
+static inline void do_flush_tlb_all_local(void)
+{
+    unsigned long cpu = smp_processor_id();
+
+    __flush_tlb_all();
+    if (cpu_tlbstate[cpu].state == TLBSTATE_LAZY)
+        leave_mm(cpu);
+}
+
+static void flush_tlb_all_ipi(void* info)
+{
+    do_flush_tlb_all_local();
+}
+
+void flush_tlb_all(void)
+{
+    smp_call_function (flush_tlb_all_ipi,0,1,1);
+
+    do_flush_tlb_all_local();
+}
+
+void smp_send_event_check_mask(unsigned long cpu_mask)
+{
+    send_IPI_mask(cpu_mask, EVENT_CHECK_VECTOR);
+}
+
+/*
+ * Structure and data for smp_call_function(). This is designed to minimise
+ * static memory requirements. It also looks cleaner.
+ */
+static spinlock_t call_lock = SPIN_LOCK_UNLOCKED;
+
+struct call_data_struct {
+    void (*func) (void *info);
+    void *info;
+    atomic_t started;
+    atomic_t finished;
+    int wait;
+};
+
+static struct call_data_struct * call_data;
+
+/*
+ * this function sends a 'generic call function' IPI to all other CPUs
+ * in the system.
+ */
+
+int smp_call_function (void (*func) (void *info), void *info, int nonatomic,
+                       int wait)
+/*
+ * [SUMMARY] Run a function on all other CPUs.
+ * <func> The function to run. This must be fast and non-blocking.
+ * <info> An arbitrary pointer to pass to the function.
+ * <nonatomic> currently unused.
+ * <wait> If true, wait (atomically) until function has completed on other CPUs.
+ * [RETURNS] 0 on success, else a negative status code. Does not return until
+ * remote CPUs are nearly ready to execute <<func>> or are or have executed.
+ *
+ * You must not call this function with disabled interrupts or from a
+ * hardware interrupt handler, or bottom halfs.
+ */
+{
+    struct call_data_struct data;
+    int cpus = smp_num_cpus-1;
+
+    if (!cpus)
+        return 0;
+
+    data.func = func;
+    data.info = info;
+    atomic_set(&data.started, 0);
+    data.wait = wait;
+    if (wait)
+        atomic_set(&data.finished, 0);
+
+    spin_lock(&call_lock);
+    call_data = &data;
+    wmb();
+    /* Send a message to all other CPUs and wait for them to respond */
+    send_IPI_allbutself(CALL_FUNCTION_VECTOR);
+
+    /* Wait for response */
+    while (atomic_read(&data.started) != cpus)
+        barrier();
+
+    if (wait)
+        while (atomic_read(&data.finished) != cpus)
+            barrier();
+
+    spin_unlock(&call_lock);
+
+    return 0;
+}
+
+static void stop_this_cpu (void * dummy)
+{
+    /*
+     * Remove this CPU:
+     */
+    clear_bit(smp_processor_id(), &cpu_online_map);
+    __cli();
+    disable_local_APIC();
+    for(;;) __asm__("hlt");
+}
+
+/*
+ * this function calls the 'stop' function on all other CPUs in the system.
+ */
+
+void smp_send_stop(void)
+{
+    smp_call_function(stop_this_cpu, NULL, 1, 0);
+    smp_num_cpus = 1;
+
+    __cli();
+    disable_local_APIC();
+    __sti();
+}
+
+/*
+ * Nothing to do, as all the work is done automatically when
+ * we return from the interrupt.
+ */
+asmlinkage void smp_event_check_interrupt(void)
+{
+    ack_APIC_irq();
+}
+
+asmlinkage void smp_call_function_interrupt(void)
+{
+    void (*func) (void *info) = call_data->func;
+    void *info = call_data->info;
+    int wait = call_data->wait;
+
+    ack_APIC_irq();
+    /*
+     * Notify initiating CPU that I've grabbed the data and am
+     * about to execute the function
+     */
+    mb();
+    atomic_inc(&call_data->started);
+    /*
+     * At this point the info structure may be out of scope unless wait==1
+     */
+    (*func)(info);
+    if (wait) {
+        mb();
+        atomic_inc(&call_data->finished);
+    }
+}
+
diff --git a/xen-2.4.16/arch/i386/smpboot.c b/xen-2.4.16/arch/i386/smpboot.c
new file mode 100644 (file)
index 0000000..ce4d3da
--- /dev/null
@@ -0,0 +1,942 @@
+/*
+ *     x86 SMP booting functions
+ *
+ *     (c) 1995 Alan Cox, Building #3 <alan@redhat.com>
+ *     (c) 1998, 1999, 2000 Ingo Molnar <mingo@redhat.com>
+ *
+ *     Much of the core SMP work is based on previous work by Thomas Radke, to
+ *     whom a great many thanks are extended.
+ *
+ *     Thanks to Intel for making available several different Pentium,
+ *     Pentium Pro and Pentium-II/Xeon MP machines.
+ *     Original development of Linux SMP code supported by Caldera.
+ *
+ *     This code is released under the GNU General Public License version 2 or
+ *     later.
+ *
+ *     Fixes
+ *             Felix Koop      :       NR_CPUS used properly
+ *             Jose Renau      :       Handle single CPU case.
+ *             Alan Cox        :       By repeated request 8) - Total BogoMIP report.
+ *             Greg Wright     :       Fix for kernel stacks panic.
+ *             Erich Boleyn    :       MP v1.4 and additional changes.
+ *     Matthias Sattler        :       Changes for 2.1 kernel map.
+ *     Michel Lespinasse       :       Changes for 2.1 kernel map.
+ *     Michael Chastain        :       Change trampoline.S to gnu as.
+ *             Alan Cox        :       Dumb bug: 'B' step PPro's are fine
+ *             Ingo Molnar     :       Added APIC timers, based on code
+ *                                     from Jose Renau
+ *             Ingo Molnar     :       various cleanups and rewrites
+ *             Tigran Aivazian :       fixed "0.00 in /proc/uptime on SMP" bug.
+ *     Maciej W. Rozycki       :       Bits for genuine 82489DX APICs
+ *             Martin J. Bligh :       Added support for multi-quad systems
+ */
+
+#include <xeno/config.h>
+#include <xeno/init.h>
+#include <xeno/interrupt.h>
+#include <xeno/irq.h>
+#include <asm/pgalloc.h>
+#include <asm/mc146818rtc.h>
+#include <asm/smpboot.h>
+#include <xeno/smp.h>
+#include <asm/msr.h>
+#include <asm/system.h>
+#include <xeno/sched.h>
+#include <xeno/delay.h>
+#include <xeno/lib.h>
+
+/* Set if we find a B stepping CPU                     */
+static int smp_b_stepping;
+
+/* Setup configured maximum number of CPUs to activate */
+static int max_cpus = -1;
+
+/* Total count of live CPUs */
+int smp_num_cpus = 1;
+
+/* Bitmask of currently online CPUs */
+unsigned long cpu_online_map;
+
+static volatile unsigned long cpu_callin_map;
+static volatile unsigned long cpu_callout_map;
+
+/* Per CPU bogomips and other parameters */
+struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned;
+
+/* Set when the idlers are all forked */
+int smp_threads_ready;
+
+/*
+ * Trampoline 80x86 program as an array.
+ */
+
+extern unsigned char trampoline_data [];
+extern unsigned char trampoline_end  [];
+static unsigned char *trampoline_base;
+
+/*
+ * Currently trivial. Write the real->protected mode
+ * bootstrap into the page concerned. The caller
+ * has made sure it's suitably aligned.
+ */
+
+static unsigned long __init setup_trampoline(void)
+{
+    memcpy(trampoline_base, trampoline_data, trampoline_end - trampoline_data);
+    return virt_to_phys(trampoline_base);
+}
+
+/*
+ * We are called very early to get the low memory for the
+ * SMP bootup trampoline page.
+ */
+void __init smp_alloc_memory(void)
+{
+    /*
+     * Has to be in very low memory so we can execute
+     * real-mode AP code.
+     */
+    trampoline_base = __va(0x90000);
+}
+
+/*
+ * The bootstrap kernel entry code has set these up. Save them for
+ * a given CPU
+ */
+
+void __init smp_store_cpu_info(int id)
+{
+    struct cpuinfo_x86 *c = cpu_data + id;
+
+    *c = boot_cpu_data;
+    c->pte_quick = 0;
+    c->pmd_quick = 0;
+    c->pgd_quick = 0;
+    c->pgtable_cache_sz = 0;
+    identify_cpu(c);
+    /*
+     * Mask B, Pentium, but not Pentium MMX
+     */
+    if (c->x86_vendor == X86_VENDOR_INTEL &&
+        c->x86 == 5 &&
+        c->x86_mask >= 1 && c->x86_mask <= 4 &&
+        c->x86_model <= 3)
+        /*
+         * Remember we have B step Pentia with bugs
+         */
+        smp_b_stepping = 1;
+}
+
+/*
+ * Architecture specific routine called by the kernel just before init is
+ * fired off. This allows the BP to have everything in order [we hope].
+ * At the end of this all the APs will hit the system scheduling and off
+ * we go. Each AP will load the system gdt's and jump through the kernel
+ * init into idle(). At this point the scheduler will one day take over
+ * and give them jobs to do. smp_callin is a standard routine
+ * we use to track CPUs as they power up.
+ */
+
+static atomic_t smp_commenced = ATOMIC_INIT(0);
+
+void __init smp_commence(void)
+{
+    /*
+     * Lets the callins below out of their loop.
+     */
+    Dprintk("Setting commenced=1, go go go\n");
+
+    wmb();
+    atomic_set(&smp_commenced,1);
+}
+
+/*
+ * TSC synchronization.
+ *
+ * We first check wether all CPUs have their TSC's synchronized,
+ * then we print a warning if not, and always resync.
+ */
+
+static atomic_t tsc_start_flag = ATOMIC_INIT(0);
+static atomic_t tsc_count_start = ATOMIC_INIT(0);
+static atomic_t tsc_count_stop = ATOMIC_INIT(0);
+static unsigned long long tsc_values[NR_CPUS];
+
+#define NR_LOOPS 5
+
+/*
+ * accurate 64-bit/32-bit division, expanded to 32-bit divisions and 64-bit
+ * multiplication. Not terribly optimized but we need it at boot time only
+ * anyway.
+ *
+ * result == a / b
+ *     == (a1 + a2*(2^32)) / b
+ *     == a1/b + a2*(2^32/b)
+ *     == a1/b + a2*((2^32-1)/b) + a2/b + (a2*((2^32-1) % b))/b
+ *                 ^---- (this multiplication can overflow)
+ */
+
+static unsigned long long div64 (unsigned long long a, unsigned long b0)
+{
+    unsigned int a1, a2;
+    unsigned long long res;
+
+    a1 = ((unsigned int*)&a)[0];
+    a2 = ((unsigned int*)&a)[1];
+
+    res = a1/b0 +
+        (unsigned long long)a2 * (unsigned long long)(0xffffffff/b0) +
+        a2 / b0 +
+        (a2 * (0xffffffff % b0)) / b0;
+
+    return res;
+}
+
+static void __init synchronize_tsc_bp (void)
+{
+    int i;
+    unsigned long long t0;
+    unsigned long long sum, avg;
+    long long delta;
+    int buggy = 0;
+
+    printk("checking TSC synchronization across CPUs: ");
+
+    atomic_set(&tsc_start_flag, 1);
+    wmb();
+
+    /*
+     * We loop a few times to get a primed instruction cache,
+     * then the last pass is more or less synchronized and
+     * the BP and APs set their cycle counters to zero all at
+     * once. This reduces the chance of having random offsets
+     * between the processors, and guarantees that the maximum
+     * delay between the cycle counters is never bigger than
+     * the latency of information-passing (cachelines) between
+     * two CPUs.
+     */
+    for (i = 0; i < NR_LOOPS; i++) {
+        /*
+         * all APs synchronize but they loop on '== num_cpus'
+         */
+        while (atomic_read(&tsc_count_start) != smp_num_cpus-1) mb();
+        atomic_set(&tsc_count_stop, 0);
+        wmb();
+        /*
+         * this lets the APs save their current TSC:
+         */
+        atomic_inc(&tsc_count_start);
+
+        rdtscll(tsc_values[smp_processor_id()]);
+        /*
+         * We clear the TSC in the last loop:
+         */
+        if (i == NR_LOOPS-1)
+            write_tsc(0, 0);
+
+        /*
+         * Wait for all APs to leave the synchronization point:
+         */
+        while (atomic_read(&tsc_count_stop) != smp_num_cpus-1) mb();
+        atomic_set(&tsc_count_start, 0);
+        wmb();
+        atomic_inc(&tsc_count_stop);
+    }
+
+    sum = 0;
+    for (i = 0; i < smp_num_cpus; i++) {
+        t0 = tsc_values[i];
+        sum += t0;
+    }
+    avg = div64(sum, smp_num_cpus);
+
+    sum = 0;
+    for (i = 0; i < smp_num_cpus; i++) {
+        delta = tsc_values[i] - avg;
+        if (delta < 0)
+            delta = -delta;
+        /*
+         * We report bigger than 2 microseconds clock differences.
+         */
+        if (delta > 2*ticks_per_usec) {
+            long realdelta;
+            if (!buggy) {
+                buggy = 1;
+                printk("\n");
+            }
+            realdelta = div64(delta, ticks_per_usec);
+            if (tsc_values[i] < avg)
+                realdelta = -realdelta;
+
+            printk("BIOS BUG: CPU#%d improperly initialized, has %ld usecs TSC skew! FIXED.\n",
+                   i, realdelta);
+        }
+
+        sum += delta;
+    }
+    if (!buggy)
+        printk("passed.\n");
+}
+
+static void __init synchronize_tsc_ap (void)
+{
+    int i;
+
+    /*
+     * smp_num_cpus is not necessarily known at the time
+     * this gets called, so we first wait for the BP to
+     * finish SMP initialization:
+     */
+    while (!atomic_read(&tsc_start_flag)) mb();
+
+    for (i = 0; i < NR_LOOPS; i++) {
+        atomic_inc(&tsc_count_start);
+        while (atomic_read(&tsc_count_start) != smp_num_cpus) mb();
+
+        rdtscll(tsc_values[smp_processor_id()]);
+        if (i == NR_LOOPS-1)
+            write_tsc(0, 0);
+
+        atomic_inc(&tsc_count_stop);
+        while (atomic_read(&tsc_count_stop) != smp_num_cpus) mb();
+    }
+}
+#undef NR_LOOPS
+
+static atomic_t init_deasserted;
+
+void __init smp_callin(void)
+{
+    int cpuid, phys_id, i;
+
+    /*
+     * If waken up by an INIT in an 82489DX configuration
+     * we may get here before an INIT-deassert IPI reaches
+     * our local APIC.  We have to wait for the IPI or we'll
+     * lock up on an APIC access.
+     */
+    while (!atomic_read(&init_deasserted));
+
+    /*
+     * (This works even if the APIC is not enabled.)
+     */
+    phys_id = GET_APIC_ID(apic_read(APIC_ID));
+    cpuid = smp_processor_id();
+    if (test_and_set_bit(cpuid, &cpu_online_map)) {
+        printk("huh, phys CPU#%d, CPU#%d already present??\n",
+               phys_id, cpuid);
+        BUG();
+    }
+    Dprintk("CPU#%d (phys ID: %d) waiting for CALLOUT\n", cpuid, phys_id);
+
+    /*
+     * STARTUP IPIs are fragile beasts as they might sometimes
+     * trigger some glue motherboard logic. Complete APIC bus
+     * silence for 1 second, this overestimates the time the
+     * boot CPU is spending to send the up to 2 STARTUP IPIs
+     * by a factor of two. This should be enough.
+     */
+
+    for ( i = 0; i < 200; i++ )
+    {
+        if ( test_bit(cpuid, &cpu_callout_map) ) break;
+        mdelay(10);
+    }
+
+    if (!test_bit(cpuid, &cpu_callout_map)) {
+        printk("BUG: CPU%d started up but did not get a callout!\n",
+               cpuid);
+        BUG();
+    }
+
+    /*
+     * the boot CPU has finished the init stage and is spinning
+     * on callin_map until we finish. We are free to set up this
+     * CPU, first the APIC. (this is probably redundant on most
+     * boards)
+     */
+
+    Dprintk("CALLIN, before setup_local_APIC().\n");
+
+    setup_local_APIC();
+
+    __sti();
+
+#ifdef CONFIG_MTRR
+    /*
+     * Must be done before calibration delay is computed
+     */
+    mtrr_init_secondary_cpu ();
+#endif
+
+    Dprintk("Stack at about %p\n",&cpuid);
+
+    /*
+     * Save our processor parameters
+     */
+    smp_store_cpu_info(cpuid);
+
+    /*
+     * Allow the master to continue.
+     */
+    set_bit(cpuid, &cpu_callin_map);
+
+    /*
+     *      Synchronize the TSC with the BP
+     */
+    synchronize_tsc_ap();
+}
+
+int cpucount;
+
+/*
+ * Activate a secondary processor.
+ */
+int __init start_secondary(void *unused)
+{
+    extern void cpu_init(void);
+
+    /*
+     * Dont put anything before smp_callin(), SMP
+     * booting is too fragile that we want to limit the
+     * things done here to the most necessary things.
+     */
+    cpu_init();
+    smp_callin();
+
+    while (!atomic_read(&smp_commenced))
+        rep_nop();
+
+    /*
+     * low-memory mappings have been cleared, flush them from the local TLBs 
+     * too.
+     */
+    local_flush_tlb();
+
+    cpu_idle();
+    BUG();
+
+    return 0;
+}
+
+/*
+ * Everything has been set up for the secondary
+ * CPUs - they just need to reload everything
+ * from the task structure
+ * This function must not return.
+ */
+void __init initialize_secondary(void)
+{
+    /*
+     * We don't actually need to load the full TSS,
+     * basically just the stack pointer and the eip.
+     */
+    asm volatile(
+        "movl %0,%%esp\n\t"
+        "jmp *%1"
+        :
+        :"r" (current->thread.esp),"r" (current->thread.eip));
+}
+
+extern struct {
+    void * esp;
+    unsigned short ss;
+} stack_start;
+
+/* which physical APIC ID maps to which logical CPU number */
+volatile int physical_apicid_2_cpu[MAX_APICID];
+/* which logical CPU number maps to which physical APIC ID */
+volatile int cpu_2_physical_apicid[NR_CPUS];
+
+/* which logical APIC ID maps to which logical CPU number */
+volatile int logical_apicid_2_cpu[MAX_APICID];
+/* which logical CPU number maps to which logical APIC ID */
+volatile int cpu_2_logical_apicid[NR_CPUS];
+
+static inline void init_cpu_to_apicid(void)
+/* Initialize all maps between cpu number and apicids */
+{
+    int apicid, cpu;
+
+    for (apicid = 0; apicid < MAX_APICID; apicid++) {
+        physical_apicid_2_cpu[apicid] = -1;
+        logical_apicid_2_cpu[apicid] = -1;
+    }
+    for (cpu = 0; cpu < NR_CPUS; cpu++) {
+        cpu_2_physical_apicid[cpu] = -1;
+        cpu_2_logical_apicid[cpu] = -1;
+    }
+}
+
+static inline void map_cpu_to_boot_apicid(int cpu, int apicid)
+/* 
+ * set up a mapping between cpu and apicid. Uses logical apicids for multiquad,
+ * else physical apic ids
+ */
+{
+    physical_apicid_2_cpu[apicid] = cpu;       
+    cpu_2_physical_apicid[cpu] = apicid;
+}
+
+static inline void unmap_cpu_to_boot_apicid(int cpu, int apicid)
+/* 
+ * undo a mapping between cpu and apicid. Uses logical apicids for multiquad,
+ * else physical apic ids
+ */
+{
+    physical_apicid_2_cpu[apicid] = -1;        
+    cpu_2_physical_apicid[cpu] = -1;
+}
+
+#if APIC_DEBUG
+static inline void inquire_remote_apic(int apicid)
+{
+    int i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 };
+    char *names[] = { "ID", "VERSION", "SPIV" };
+    int timeout, status;
+
+    printk("Inquiring remote APIC #%d...\n", apicid);
+
+    for (i = 0; i < sizeof(regs) / sizeof(*regs); i++) {
+        printk("... APIC #%d %s: ", apicid, names[i]);
+
+        /*
+         * Wait for idle.
+         */
+        apic_wait_icr_idle();
+
+        apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(apicid));
+        apic_write_around(APIC_ICR, APIC_DM_REMRD | regs[i]);
+
+        timeout = 0;
+        do {
+            udelay(100);
+            status = apic_read(APIC_ICR) & APIC_ICR_RR_MASK;
+        } while (status == APIC_ICR_RR_INPROG && timeout++ < 1000);
+
+        switch (status) {
+        case APIC_ICR_RR_VALID:
+            status = apic_read(APIC_RRR);
+            printk("%08x\n", status);
+            break;
+        default:
+            printk("failed\n");
+        }
+    }
+}
+#endif
+
+
+static int wakeup_secondary_via_INIT(int phys_apicid, unsigned long start_eip)
+{
+    unsigned long send_status = 0, accept_status = 0;
+    int maxlvt, timeout, num_starts, j;
+
+    Dprintk("Asserting INIT.\n");
+
+    /*
+     * Turn INIT on target chip
+     */
+    apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
+
+    /*
+     * Send IPI
+     */
+    apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_INT_ASSERT
+                      | APIC_DM_INIT);
+
+    Dprintk("Waiting for send to finish...\n");
+    timeout = 0;
+    do {
+        Dprintk("+");
+        udelay(100);
+        send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
+    } while (send_status && (timeout++ < 1000));
+
+    mdelay(10);
+
+    Dprintk("Deasserting INIT.\n");
+
+    /* Target chip */
+    apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
+
+    /* Send IPI */
+    apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT);
+
+    Dprintk("Waiting for send to finish...\n");
+    timeout = 0;
+    do {
+        Dprintk("+");
+        udelay(100);
+        send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
+    } while (send_status && (timeout++ < 1000));
+
+    atomic_set(&init_deasserted, 1);
+
+    /*
+     * Should we send STARTUP IPIs ?
+     *
+     * Determine this based on the APIC version.
+     * If we don't have an integrated APIC, don't send the STARTUP IPIs.
+     */
+    if (APIC_INTEGRATED(apic_version[phys_apicid]))
+        num_starts = 2;
+    else
+        num_starts = 0;
+
+    /*
+     * Run STARTUP IPI loop.
+     */
+    Dprintk("#startup loops: %d.\n", num_starts);
+
+    maxlvt = get_maxlvt();
+
+    for (j = 1; j <= num_starts; j++) {
+        Dprintk("Sending STARTUP #%d.\n",j);
+
+        apic_read_around(APIC_SPIV);
+        apic_write(APIC_ESR, 0);
+        apic_read(APIC_ESR);
+        Dprintk("After apic_write.\n");
+
+        /*
+         * STARTUP IPI
+         */
+
+        /* Target chip */
+        apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
+
+        /* Boot on the stack */
+        /* Kick the second */
+        apic_write_around(APIC_ICR, APIC_DM_STARTUP
+                          | (start_eip >> 12));
+
+        /*
+         * Give the other CPU some time to accept the IPI.
+         */
+        udelay(300);
+
+        Dprintk("Startup point 1.\n");
+
+        Dprintk("Waiting for send to finish...\n");
+        timeout = 0;
+        do {
+            Dprintk("+");
+            udelay(100);
+            send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
+        } while (send_status && (timeout++ < 1000));
+
+        /*
+         * Give the other CPU some time to accept the IPI.
+         */
+        udelay(200);
+        /*
+         * Due to the Pentium erratum 3AP.
+         */
+        if (maxlvt > 3) {
+            apic_read_around(APIC_SPIV);
+            apic_write(APIC_ESR, 0);
+        }
+        accept_status = (apic_read(APIC_ESR) & 0xEF);
+        if (send_status || accept_status)
+            break;
+    }
+    Dprintk("After Startup.\n");
+
+    if (send_status)
+        printk("APIC never delivered???\n");
+    if (accept_status)
+        printk("APIC delivery error (%lx).\n", accept_status);
+
+    return (send_status | accept_status);
+}
+
+extern unsigned long cpu_initialized;
+
+static void __init do_boot_cpu (int apicid) 
+/*
+ * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad
+ * (ie clustered apic addressing mode), this is a LOGICAL apic ID.
+ */
+{
+    struct task_struct *idle;
+    unsigned long boot_error = 0;
+    int timeout, cpu;
+    unsigned long start_eip;
+
+    cpu = ++cpucount;
+    /*
+     * We can't use kernel_thread since we must avoid to reschedule the child.
+     */
+    if ( (idle = do_newdomain()) == NULL )
+        panic("failed 'newdomain' for CPU %d", cpu);
+    idle->processor    = cpu;
+    idle->domain       = IDLE_DOMAIN_ID;
+    idle->mm.pagetable = mk_pagetable((unsigned long)idle0_pg_table);
+
+    map_cpu_to_boot_apicid(cpu, apicid);
+
+    idle->thread.esp = idle->thread.esp0 = (unsigned long)idle + THREAD_SIZE;
+    idle->thread.eip = (unsigned long) start_secondary;
+
+    /* start_eip had better be page-aligned! */
+    start_eip = setup_trampoline();
+
+    /* So we see what's up   */
+    printk("Booting processor %d/%d eip %lx\n", cpu, apicid, start_eip);
+    stack_start.esp = (void *) (1024+PAGE_SIZE+(char *)idle-__PAGE_OFFSET);
+
+    /*
+     * This grunge runs the startup process for
+     * the targeted processor.
+     */
+
+    atomic_set(&init_deasserted, 0);
+
+    Dprintk("Setting warm reset code and vector.\n");
+
+    CMOS_WRITE(0xa, 0xf);
+    local_flush_tlb();
+    Dprintk("1.\n");
+    *((volatile unsigned short *) TRAMPOLINE_HIGH) = start_eip >> 4;
+    Dprintk("2.\n");
+    *((volatile unsigned short *) TRAMPOLINE_LOW) = start_eip & 0xf;
+    Dprintk("3.\n");
+
+    /*
+     * Be paranoid about clearing APIC errors.
+     */
+    if (APIC_INTEGRATED(apic_version[apicid])) {
+        apic_read_around(APIC_SPIV);
+        apic_write(APIC_ESR, 0);
+        apic_read(APIC_ESR);
+    }
+
+    /*
+     * Status is now clean
+     */
+    boot_error = 0;
+
+    /*
+     * Starting actual IPI sequence...
+     */
+
+    boot_error = wakeup_secondary_via_INIT(apicid, start_eip);
+
+    if (!boot_error) {
+        /*
+         * allow APs to start initializing.
+         */
+        Dprintk("Before Callout %d.\n", cpu);
+        set_bit(cpu, &cpu_callout_map);
+        Dprintk("After Callout %d.\n", cpu);
+
+        /*
+         * Wait 5s total for a response
+         */
+        for (timeout = 0; timeout < 50000; timeout++) {
+            if (test_bit(cpu, &cpu_callin_map))
+                break; /* It has booted */
+            udelay(100);
+        }
+
+        if (test_bit(cpu, &cpu_callin_map)) {
+            /* number CPUs logically, starting from 1 (BSP is 0) */
+            printk("CPU%d has booted.\n", cpu);
+        } else {
+            boot_error= 1;
+            if (*((volatile unsigned char *)phys_to_virt(8192))
+                == 0xA5)
+                               /* trampoline started but...? */
+                printk("Stuck ??\n");
+            else
+                               /* trampoline code not run */
+                printk("Not responding.\n");
+#if APIC_DEBUG
+            inquire_remote_apic(apicid);
+#endif
+        }
+    }
+    if (boot_error) {
+        /* Try to put things back the way they were before ... */
+        unmap_cpu_to_boot_apicid(cpu, apicid);
+        clear_bit(cpu, &cpu_callout_map); /* was set here (do_boot_cpu()) */
+        clear_bit(cpu, &cpu_initialized); /* was set by cpu_init() */
+        clear_bit(cpu, &cpu_online_map);  /* was set in smp_callin() */
+        cpucount--;
+    }
+
+    /* mark "stuck" area as not stuck */
+    *((volatile unsigned long *)phys_to_virt(8192)) = 0;
+}
+
+
+/*
+ * Cycle through the processors sending APIC IPIs to boot each.
+ */
+
+static int boot_cpu_logical_apicid;
+/* Where the IO area was mapped on multiquad, always 0 otherwise */
+void *xquad_portio = NULL;
+
+void __init smp_boot_cpus(void)
+{
+    int apicid, bit;
+
+#ifdef CONFIG_MTRR
+    /*  Must be done before other processors booted  */
+    mtrr_init_boot_cpu ();
+#endif
+    /* Initialize the logical to physical CPU number mapping */
+    init_cpu_to_apicid();
+
+    /*
+     * Setup boot CPU information
+     */
+    smp_store_cpu_info(0); /* Final full version of the data */
+    printk("CPU%d booted\n", 0);
+
+    /*
+     * We have the boot CPU online for sure.
+     */
+    set_bit(0, &cpu_online_map);
+    boot_cpu_logical_apicid = logical_smp_processor_id();
+    map_cpu_to_boot_apicid(0, boot_cpu_apicid);
+
+    /*
+     * If we couldnt find an SMP configuration at boot time,
+     * get out of here now!
+     */
+    if (!smp_found_config) {
+        printk("SMP motherboard not detected.\n");
+        io_apic_irqs = 0;
+        cpu_online_map = phys_cpu_present_map = 1;
+        smp_num_cpus = 1;
+        if (APIC_init_uniprocessor())
+            printk("Local APIC not detected."
+                   " Using dummy APIC emulation.\n");
+        goto smp_done;
+    }
+
+    /*
+     * Should not be necessary because the MP table should list the boot
+     * CPU too, but we do it for the sake of robustness anyway.
+     */
+    if (!test_bit(boot_cpu_physical_apicid, &phys_cpu_present_map)) {
+        printk("weird, boot CPU (#%d) not listed by the BIOS.\n",
+               boot_cpu_physical_apicid);
+        phys_cpu_present_map |= (1 << hard_smp_processor_id());
+    }
+
+    /*
+     * If we couldn't find a local APIC, then get out of here now!
+     */
+    if (APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid]) &&
+        !test_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability)) {
+        printk("BIOS bug, local APIC #%d not detected!...\n",
+               boot_cpu_physical_apicid);
+        printk("... forcing use of dummy APIC emulation. (tell your hw vendor)\n");
+        io_apic_irqs = 0;
+        cpu_online_map = phys_cpu_present_map = 1;
+        smp_num_cpus = 1;
+        goto smp_done;
+    }
+
+    verify_local_APIC();
+
+    /*
+     * If SMP should be disabled, then really disable it!
+     */
+    if (!max_cpus) {
+        smp_found_config = 0;
+        printk("SMP mode deactivated, forcing use of dummy APIC emulation.\n");
+        io_apic_irqs = 0;
+        cpu_online_map = phys_cpu_present_map = 1;
+        smp_num_cpus = 1;
+        goto smp_done;
+    }
+
+    connect_bsp_APIC();
+    setup_local_APIC();
+
+    if (GET_APIC_ID(apic_read(APIC_ID)) != boot_cpu_physical_apicid)
+        BUG();
+
+    /*
+     * Scan the CPU present map and fire up the other CPUs via do_boot_cpu
+     *
+     * In clustered apic mode, phys_cpu_present_map is a constructed thus:
+     * bits 0-3 are quad0, 4-7 are quad1, etc. A perverse twist on the 
+     * clustered apic ID.
+     */
+    Dprintk("CPU present map: %lx\n", phys_cpu_present_map);
+
+    for (bit = 0; bit < NR_CPUS; bit++) {
+        apicid = cpu_present_to_apicid(bit);
+        /*
+         * Don't even attempt to start the boot CPU!
+         */
+        if (apicid == boot_cpu_apicid)
+            continue;
+
+        if (!(phys_cpu_present_map & (1 << bit)))
+            continue;
+        if ((max_cpus >= 0) && (max_cpus <= cpucount+1))
+            continue;
+
+        do_boot_cpu(apicid);
+
+        /*
+         * Make sure we unmap all failed CPUs
+         */
+        if ((boot_apicid_to_cpu(apicid) == -1) &&
+            (phys_cpu_present_map & (1 << bit)))
+            printk("CPU #%d not responding - cannot use it.\n",
+                   apicid);
+    }
+
+    /*
+     * Cleanup possible dangling ends...
+     */
+    /*
+     * Install writable page 0 entry to set BIOS data area.
+     */
+    local_flush_tlb();
+
+    /*
+     * Paranoid:  Set warm reset code and vector here back
+     * to default values.
+     */
+    CMOS_WRITE(0, 0xf);
+
+    *((volatile long *) phys_to_virt(0x467)) = 0;
+
+    if (!cpucount) {
+        printk("Error: only one processor found.\n");
+    } else {
+        printk("Total of %d processors activated.\n", cpucount+1);
+    }
+    smp_num_cpus = cpucount + 1;
+
+    if (smp_b_stepping)
+        printk("WARNING: SMP operation may"
+               " be unreliable with B stepping processors.\n");
+    Dprintk("Boot done.\n");
+
+    /*
+     * Here we can be sure that there is an IO-APIC in the system. Let's
+     * go and set it up:
+     */
+    if ( nr_ioapics ) setup_IO_APIC();
+
+    /* Set up all local APIC timers in the system. */
+    setup_APIC_clocks();
+
+    /* Synchronize the TSC with the AP(s). */
+    if ( cpucount ) synchronize_tsc_bp();
+
+ smp_done:
+    ;
+}
diff --git a/xen-2.4.16/arch/i386/time.c b/xen-2.4.16/arch/i386/time.c
new file mode 100644 (file)
index 0000000..0b7d3ea
--- /dev/null
@@ -0,0 +1,143 @@
+/*
+ *  linux/arch/i386/kernel/time.c
+ *
+ *  Copyright (C) 1991, 1992, 1995  Linus Torvalds
+ */
+
+#include <xeno/errno.h>
+#include <xeno/sched.h>
+#include <xeno/lib.h>
+#include <xeno/config.h>
+#include <xeno/init.h>
+#include <xeno/interrupt.h>
+#include <xeno/time.h>
+
+#include <asm/io.h>
+#include <xeno/smp.h>
+#include <xeno/irq.h>
+#include <asm/msr.h>
+#include <asm/mpspec.h>
+#include <asm/processor.h>
+#include <asm/fixmap.h>
+
+unsigned long cpu_khz; /* Detected as we calibrate the TSC */
+unsigned long ticks_per_usec; /* TSC ticks per microsecond. */
+
+spinlock_t rtc_lock = SPIN_LOCK_UNLOCKED;
+
+/*
+ * timer_interrupt() needs to keep up the real-time clock,
+ * as well as call the "do_timer()" routine every clocktick
+ */
+int timer_ack=0;
+extern spinlock_t i8259A_lock;
+static inline void do_timer_interrupt(int irq, 
+                                      void *dev_id, struct pt_regs *regs)
+{
+#ifdef CONFIG_X86_IO_APIC
+    if (timer_ack) {
+        /*
+         * Subtle, when I/O APICs are used we have to ack timer IRQ
+         * manually to reset the IRR bit for do_slow_gettimeoffset().
+         * This will also deassert NMI lines for the watchdog if run
+         * on an 82489DX-based system.
+         */
+        spin_lock(&i8259A_lock);
+        outb(0x0c, 0x20);
+        /* Ack the IRQ; AEOI will end it automatically. */
+        inb(0x20);
+        spin_unlock(&i8259A_lock);
+    }
+#endif
+    do_timer(regs);
+}
+
+/*
+ * This is the same as the above, except we _also_ save the current
+ * Time Stamp Counter value at the time of the timer interrupt, so that
+ * we later on can estimate the time of day more exactly.
+ */
+static void timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
+{
+    do_timer_interrupt(irq, NULL, regs);
+}
+
+static struct irqaction irq0  = { timer_interrupt, SA_INTERRUPT, 0, "timer", NULL, NULL};
+
+/* ------ Calibrate the TSC ------- 
+ * Return processor ticks per second / CALIBRATE_FRAC.
+ */
+
+#define CLOCK_TICK_RATE 1193180 /* system crystal frequency (Hz) */
+#define CALIBRATE_FRAC  20     /* calibrate over 50ms */
+#define CALIBRATE_LATCH ((CLOCK_TICK_RATE+(CALIBRATE_FRAC/2))/CALIBRATE_FRAC)
+
+static unsigned long __init calibrate_tsc(void)
+{
+    /* Set the Gate high, disable speaker */
+    outb((inb(0x61) & ~0x02) | 0x01, 0x61);
+
+       /*
+        * Now let's take care of CTC channel 2
+        *
+        * Set the Gate high, program CTC channel 2 for mode 0,
+        * (interrupt on terminal count mode), binary count,
+        * load 5 * LATCH count, (LSB and MSB) to begin countdown.
+        */
+    outb(0xb0, 0x43);                  /* binary, mode 0, LSB/MSB, Ch 2 */
+    outb(CALIBRATE_LATCH & 0xff, 0x42);        /* LSB of count */
+    outb(CALIBRATE_LATCH >> 8, 0x42);  /* MSB of count */
+
+    {
+        unsigned long startlow, starthigh;
+        unsigned long endlow, endhigh;
+        unsigned long count;
+
+        rdtsc(startlow,starthigh);
+        count = 0;
+        do {
+            count++;
+        } while ((inb(0x61) & 0x20) == 0);
+        rdtsc(endlow,endhigh);
+
+        /* Error: ECTCNEVERSET */
+        if (count <= 1)
+            goto bad_ctc;
+
+        /* 64-bit subtract - gcc just messes up with long longs */
+        __asm__("subl %2,%0\n\t"
+                "sbbl %3,%1"
+                :"=a" (endlow), "=d" (endhigh)
+                :"g" (startlow), "g" (starthigh),
+                "0" (endlow), "1" (endhigh));
+
+        /* Error: ECPUTOOFAST */
+        if (endhigh)
+            goto bad_ctc;
+
+        return endlow;
+    }
+
+    /*
+     * The CTC wasn't reliable: we got a hit on the very first read, or the 
+     * CPU was so fast/slow that the quotient wouldn't fit in 32 bits..
+     */
+ bad_ctc:
+    return 0;
+}
+
+void __init time_init(void)
+{
+    unsigned long ticks_per_frac = calibrate_tsc();
+
+    if ( !ticks_per_frac )
+        panic("Error calibrating TSC\n");
+
+    ticks_per_usec = ticks_per_frac / (1000000/CALIBRATE_FRAC);
+    cpu_khz = ticks_per_frac / (1000/CALIBRATE_FRAC);
+
+    printk("Detected %lu.%03lu MHz processor.\n", 
+           cpu_khz / 1000, cpu_khz % 1000);
+
+    setup_irq(0, &irq0);
+}
diff --git a/xen-2.4.16/arch/i386/trampoline.S b/xen-2.4.16/arch/i386/trampoline.S
new file mode 100644 (file)
index 0000000..f0beef7
--- /dev/null
@@ -0,0 +1,54 @@
+/*
+ *
+ *     Trampoline.S    Derived from Setup.S by Linus Torvalds
+ *
+ *     4 Jan 1997 Michael Chastain: changed to gnu as.
+ *
+ *     Entry: CS:IP point to the start of our code, we are 
+ *     in real mode with no stack, but the rest of the 
+ *     trampoline page to make our stack and everything else
+ *     is a mystery.
+ *
+ *     On entry to trampoline_data, the processor is in real mode
+ *     with 16-bit addressing and 16-bit data.  CS has some value
+ *     and IP is zero.  Thus, data addresses need to be absolute
+ *     (no relocation) and are taken with regard to r_base.
+ */
+
+#include <xeno/config.h>
+#include <asm/page.h>
+
+.data
+
+.code16
+
+ENTRY(trampoline_data)
+r_base = .
+        mov    %cs, %ax        # Code and data in the same place
+       mov     %ax, %ds
+
+       movl    $0xA5A5A5A5, %ebx # Flag an SMP trampoline
+       cli                     # We should be safe anyway
+
+       movl    $0xA5A5A5A5, trampoline_data - r_base
+
+       lidt    idt_48 - r_base # load idt with 0, 0
+       lgdt    gdt_48 - r_base # load gdt with whatever is appropriate
+
+       xor     %ax, %ax
+       inc     %ax             # protected mode (PE) bit
+       lmsw    %ax             # into protected mode
+       jmp     flush_instr
+flush_instr:
+       ljmpl   $__HYPERVISOR_CS, $(MONITOR_BASE)-__PAGE_OFFSET
+
+idt_48:
+       .word   0                       # idt limit = 0
+       .word   0, 0                    # idt base = 0L
+
+gdt_48:
+       .word   0x0800                  # gdt limit = 2048, 256 GDT entries
+       .long   gdt_table-__PAGE_OFFSET # gdt base = gdt (first SMP CPU)
+
+.globl SYMBOL_NAME(trampoline_end)
+SYMBOL_NAME_LABEL(trampoline_end)
diff --git a/xen-2.4.16/arch/i386/traps.c b/xen-2.4.16/arch/i386/traps.c
new file mode 100644 (file)
index 0000000..e84690a
--- /dev/null
@@ -0,0 +1,595 @@
+/*
+ *  linux/arch/i386/traps.c
+ *
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ *
+ *  Pentium III FXSR, SSE support
+ *     Gareth Hughes <gareth@valinux.com>, May 2000
+ */
+
+#include <xeno/config.h>
+#include <xeno/init.h>
+#include <xeno/interrupt.h>
+#include <xeno/sched.h>
+#include <xeno/lib.h>
+#include <xeno/errno.h>
+#include <asm/ptrace.h>
+#include <xeno/delay.h>
+#include <xeno/spinlock.h>
+#include <xeno/irq.h>
+
+#include <asm/system.h>
+#include <asm/io.h>
+#include <asm/atomic.h>
+#include <asm/desc.h>
+#include <asm/debugreg.h>
+#include <asm/smp.h>
+#include <asm/pgalloc.h>
+#include <asm/uaccess.h>
+#include <asm/i387.h>
+
+#define GTBF_TRAP        1
+#define GTBF_TRAP_NOCODE 2
+#define GTBF_TRAP_CR2    4
+struct guest_trap_bounce {
+    unsigned long  error_code;        /*   0 */
+    unsigned long  cr2;               /*   4 */
+    unsigned short flags;             /*   8 */
+    unsigned short cs;                /*  10 */
+    unsigned long  eip;               /*  12 */
+} guest_trap_bounce[NR_CPUS] = { { 0 } };
+
+asmlinkage int hypervisor_call(void);
+asmlinkage void lcall7(void);
+asmlinkage void lcall27(void);
+
+/*
+ * The IDT has to be page-aligned to simplify the Pentium
+ * F0 0F bug workaround.. We have a special link segment
+ * for this.
+ */
+struct desc_struct idt_table[256] __attribute__((__section__(".data.idt"))) = { {0, 0}, };
+
+asmlinkage void divide_error(void);
+asmlinkage void debug(void);
+asmlinkage void nmi(void);
+asmlinkage void int3(void);
+asmlinkage void overflow(void);
+asmlinkage void bounds(void);
+asmlinkage void invalid_op(void);
+asmlinkage void device_not_available(void);
+asmlinkage void double_fault(void);
+asmlinkage void coprocessor_segment_overrun(void);
+asmlinkage void invalid_TSS(void);
+asmlinkage void segment_not_present(void);
+asmlinkage void stack_segment(void);
+asmlinkage void general_protection(void);
+asmlinkage void page_fault(void);
+asmlinkage void coprocessor_error(void);
+asmlinkage void simd_coprocessor_error(void);
+asmlinkage void alignment_check(void);
+asmlinkage void spurious_interrupt_bug(void);
+asmlinkage void machine_check(void);
+
+int kstack_depth_to_print = 24;
+
+static inline int kernel_text_address(unsigned long addr)
+{
+    return ( 1 );
+}
+
+void show_trace(unsigned long * stack)
+{
+    int i;
+    unsigned long addr;
+
+    if (!stack)
+        stack = (unsigned long*)&stack;
+
+    printk("Call Trace: ");
+    i = 1;
+    while (((long) stack & (THREAD_SIZE-1)) != 0) {
+        addr = *stack++;
+        if (kernel_text_address(addr)) {
+            if (i && ((i % 6) == 0))
+                printk("\n   ");
+            printk("[<%08lx>] ", addr);
+            i++;
+        }
+    }
+    printk("\n");
+}
+
+void show_trace_task(struct task_struct *tsk)
+{
+    unsigned long esp = tsk->thread.esp;
+
+    /* User space on another CPU? */
+    if ((esp ^ (unsigned long)tsk) & (PAGE_MASK<<1))
+        return;
+    show_trace((unsigned long *)esp);
+}
+
+void show_stack(unsigned long * esp)
+{
+    unsigned long *stack;
+    int i;
+
+    // debugging aid: "show_stack(NULL);" prints the
+    // back trace for this cpu.
+
+    if(esp==NULL)
+        esp=(unsigned long*)&esp;
+
+    stack = esp;
+    for(i=0; i < kstack_depth_to_print; i++) {
+        if (((long) stack & (THREAD_SIZE-1)) == 0)
+            break;
+        if (i && ((i % 8) == 0))
+            printk("\n       ");
+        printk("%08lx ", *stack++);
+    }
+    printk("\n");
+    show_trace(esp);
+}
+
+void show_registers(struct pt_regs *regs)
+{
+    unsigned long esp;
+    unsigned short ss;
+
+    esp = (unsigned long) (&regs->esp);
+    ss  = __HYPERVISOR_DS;
+    if ( regs->xcs & 3 )
+    {
+        esp = regs->esp;
+        ss  = regs->xss & 0xffff;
+    }
+
+    printk("CPU:    %d\nEIP:    %04x:[<%08lx>]      \nEFLAGS: %08lx\n",
+           smp_processor_id(), 0xffff & regs->xcs, regs->eip, regs->eflags);
+    printk("eax: %08lx   ebx: %08lx   ecx: %08lx   edx: %08lx\n",
+           regs->eax, regs->ebx, regs->ecx, regs->edx);
+    printk("esi: %08lx   edi: %08lx   ebp: %08lx   esp: %08lx\n",
+           regs->esi, regs->edi, regs->ebp, esp);
+    printk("ds: %04x   es: %04x   ss: %04x\n",
+           regs->xds & 0xffff, regs->xes & 0xffff, ss);
+}      
+
+
+spinlock_t die_lock = SPIN_LOCK_UNLOCKED;
+
+void die(const char * str, struct pt_regs * regs, long err)
+{
+    spin_lock_irq(&die_lock);
+    printk("%s: %04lx,%04lx\n", str, err >> 16, err & 0xffff);
+    show_registers(regs);
+    spin_unlock_irq(&die_lock);
+    panic("HYPERVISOR DEATH!!\n");
+}
+
+static inline void die_if_kernel(const char * str, struct pt_regs * regs, long err)
+{
+    if (!(3 & regs->xcs)) die(str, regs, err);
+}
+
+static void inline do_trap(int trapnr, char *str,
+                          struct pt_regs * regs, 
+                           long error_code, int use_error_code)
+{
+    struct guest_trap_bounce *gtb = guest_trap_bounce+smp_processor_id();
+    trap_info_t *ti;
+    unsigned long addr, fixup;
+
+    if (!(regs->xcs & 3))
+        goto fault_in_hypervisor;
+
+    ti = current->thread.traps + trapnr;
+    if ( trapnr == 14 )
+    {
+        /* page fault pushes %cr2 */
+        gtb->flags = GTBF_TRAP_CR2;
+        __asm__ __volatile__ ("movl %%cr2,%0" : "=r" (gtb->cr2) : );
+    }
+    else
+    {
+        gtb->flags = use_error_code ? GTBF_TRAP : GTBF_TRAP_NOCODE;
+    }
+    gtb->error_code = error_code;
+    gtb->cs         = ti->cs;
+    gtb->eip        = ti->address;
+    return; 
+
+ fault_in_hypervisor:
+
+    if ( (fixup = search_exception_table(regs->eip)) != 0 )
+    {
+        regs->eip = fixup;
+        return;
+    }
+
+    __asm__ __volatile__ ("movl %%cr2,%0" : "=r" (addr) : );
+
+    if ( trapnr == 14 )
+    {
+        unsigned long page;
+        __asm__ __volatile__ ("movl %%cr3,%0" : "=r" (page) : );
+        printk(" pde = %08lx\n", page);
+        page = ((unsigned long *) __va(page))[addr >> 22];
+        printk("*pde = %08lx\n", page);
+        if ( page & _PAGE_PRESENT )
+        {
+            page &= PAGE_MASK;
+            page = ((unsigned long *) __va(page))[(addr&0x3ff000)>>PAGE_SHIFT];
+            printk(" *pte = %08lx\n", page);
+        }
+    }
+
+    show_registers(regs);
+    panic("CPU%d FATAL TRAP: vector = %d (%s)\n"
+          "[error_code=%08x]\n"
+          "Faulting linear address might be %08lx\n",
+          smp_processor_id(), trapnr, str,
+          error_code, addr);
+}
+
+#define DO_ERROR_NOCODE(trapnr, str, name) \
+asmlinkage void do_##name(struct pt_regs * regs, long error_code) \
+{ \
+do_trap(trapnr, str, regs, error_code, 0); \
+}
+
+#define DO_ERROR(trapnr, str, name) \
+asmlinkage void do_##name(struct pt_regs * regs, long error_code) \
+{ \
+do_trap(trapnr, str, regs, error_code, 1); \
+}
+
+DO_ERROR_NOCODE( 0, "divide error", divide_error)
+DO_ERROR_NOCODE( 3, "int3", int3)
+DO_ERROR_NOCODE( 4, "overflow", overflow)
+DO_ERROR_NOCODE( 5, "bounds", bounds)
+DO_ERROR_NOCODE( 6, "invalid operand", invalid_op)
+DO_ERROR_NOCODE( 7, "device not available", device_not_available)
+DO_ERROR( 8, "double fault", double_fault)
+DO_ERROR_NOCODE( 9, "coprocessor segment overrun", coprocessor_segment_overrun)
+DO_ERROR(10, "invalid TSS", invalid_TSS)
+DO_ERROR(11, "segment not present", segment_not_present)
+DO_ERROR(12, "stack segment", stack_segment)
+DO_ERROR(14, "page fault", page_fault)
+/* Vector 15 reserved by Intel */
+DO_ERROR_NOCODE(16, "fpu error", coprocessor_error)
+DO_ERROR(17, "alignment check", alignment_check)
+DO_ERROR_NOCODE(18, "machine check", machine_check)
+DO_ERROR_NOCODE(19, "simd error", simd_coprocessor_error)
+
+asmlinkage void do_general_protection(struct pt_regs * regs, long error_code)
+{
+    struct guest_trap_bounce *gtb = guest_trap_bounce+smp_processor_id();
+    trap_info_t *ti;
+    unsigned long fixup;
+
+    /* Bad shit if error in ring 0, or result of an interrupt. */
+    if (!(regs->xcs & 3) || (error_code & 1))
+        goto gp_in_kernel;
+
+    if ( (error_code & 2) )
+    {
+        /* This fault must be due to <INT n> instruction. */
+        ti = current->thread.traps + (error_code>>3);
+        if ( ti->dpl >= (regs->xcs & 3) )
+        {
+            gtb->flags = GTBF_TRAP_NOCODE;
+            gtb->cs    = ti->cs;
+            gtb->eip   = ti->address;
+            regs->eip += 2;
+            return;
+        }
+    }
+
+    /* Pass on GPF as is. */
+    ti = current->thread.traps + 13;
+    gtb->flags      = GTBF_TRAP;
+    gtb->error_code = error_code;
+    gtb->cs         = ti->cs;
+    gtb->eip        = ti->address;
+    return;
+
+ gp_in_kernel:
+    if ( (fixup = search_exception_table(regs->eip)) != 0 )
+    {
+        regs->eip = fixup;
+        return;
+    }
+
+    die("general protection fault", regs, error_code);
+}
+
+static void mem_parity_error(unsigned char reason, struct pt_regs * regs)
+{
+    printk("Uhhuh. NMI received. Dazed and confused, but trying to continue\n");
+    printk("You probably have a hardware problem with your RAM chips\n");
+
+    /* Clear and disable the memory parity error line. */
+    reason = (reason & 0xf) | 4;
+    outb(reason, 0x61);
+}
+
+static void io_check_error(unsigned char reason, struct pt_regs * regs)
+{
+    unsigned long i;
+
+    printk("NMI: IOCK error (debug interrupt?)\n");
+    show_registers(regs);
+
+    /* Re-enable the IOCK line, wait for a few seconds */
+    reason = (reason & 0xf) | 8;
+    outb(reason, 0x61);
+    i = 2000;
+    while (--i) udelay(1000);
+    reason &= ~8;
+    outb(reason, 0x61);
+}
+
+static void unknown_nmi_error(unsigned char reason, struct pt_regs * regs)
+{
+    printk("Uhhuh. NMI received for unknown reason %02x.\n", reason);
+    printk("Dazed and confused, but trying to continue\n");
+    printk("Do you have a strange power saving mode enabled?\n");
+}
+
+asmlinkage void do_nmi(struct pt_regs * regs, long error_code)
+{
+    unsigned char reason = inb(0x61);
+
+    if (!(reason & 0xc0)) {
+        unknown_nmi_error(reason, regs);
+        return;
+    }
+    if (reason & 0x80)
+        mem_parity_error(reason, regs);
+    if (reason & 0x40)
+        io_check_error(reason, regs);
+    /*
+     * Reassert NMI in case it became active meanwhile
+     * as it's edge-triggered.
+     */
+    outb(0x8f, 0x70);
+    inb(0x71);         /* dummy */
+    outb(0x0f, 0x70);
+    inb(0x71);         /* dummy */
+}
+
+asmlinkage void math_state_restore(struct pt_regs *regs, long error_code)
+{
+    __asm__ __volatile__("clts");
+
+    if ( !(current->flags & PF_USEDFPU) )
+    {
+        if ( current->flags & PF_DONEFPUINIT )
+            restore_fpu(current);
+        else
+            init_fpu();
+        current->flags |= PF_USEDFPU;   /* So we fnsave on switch_to() */    
+    }
+
+    if ( current->flags & PF_GUEST_STTS )
+    {
+        struct guest_trap_bounce *gtb = guest_trap_bounce+smp_processor_id();
+        gtb->flags      = GTBF_TRAP_NOCODE;
+        gtb->cs         = current->thread.traps[7].cs;
+        gtb->eip        = current->thread.traps[7].address;
+        current->flags &= ~PF_GUEST_STTS;
+    }
+}
+
+
+/*
+ * Our handling of the processor debug registers is non-trivial.
+ * We do not clear them on entry and exit from the kernel. Therefore
+ * it is possible to get a watchpoint trap here from inside the kernel.
+ * However, the code in ./ptrace.c has ensured that the user can
+ * only set watchpoints on userspace addresses. Therefore the in-kernel
+ * watchpoint trap can only occur in code which is reading/writing
+ * from user space. Such code must not hold kernel locks (since it
+ * can equally take a page fault), therefore it is safe to call
+ * force_sig_info even though that claims and releases locks.
+ * 
+ * Code in ./signal.c ensures that the debug control register
+ * is restored before we deliver any signal, and therefore that
+ * user code runs with the correct debug control register even though
+ * we clear it here.
+ *
+ * Being careful here means that we don't have to be as careful in a
+ * lot of more complicated places (task switching can be a bit lazy
+ * about restoring all the debug state, and ptrace doesn't have to
+ * find every occurrence of the TF bit that could be saved away even
+ * by user code)
+ */
+asmlinkage void do_debug(struct pt_regs * regs, long error_code)
+{
+    unsigned int condition;
+    struct task_struct *tsk = current;
+
+    __asm__ __volatile__("movl %%db6,%0" : "=r" (condition));
+
+    /* Mask out spurious debug traps due to lazy DR7 setting */
+    if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) {
+        if (!tsk->thread.debugreg[7])
+            goto clear_dr7;
+    }
+
+    /* Save debug status register where ptrace can see it */
+    tsk->thread.debugreg[6] = condition;
+
+    panic("trap up to OS here, pehaps\n");
+
+    /* Disable additional traps. They'll be re-enabled when
+     * the signal is delivered.
+     */
+ clear_dr7:
+    __asm__("movl %0,%%db7"
+            : /* no output */
+            : "r" (0));
+}
+
+
+asmlinkage void do_spurious_interrupt_bug(struct pt_regs * regs,
+                                         long error_code)
+{ /* nothing */ }
+
+
+#define _set_gate(gate_addr,type,dpl,addr) \
+do { \
+  int __d0, __d1; \
+  __asm__ __volatile__ ("movw %%dx,%%ax\n\t" \
+       "movw %4,%%dx\n\t" \
+       "movl %%eax,%0\n\t" \
+       "movl %%edx,%1" \
+       :"=m" (*((long *) (gate_addr))), \
+        "=m" (*(1+(long *) (gate_addr))), "=&a" (__d0), "=&d" (__d1) \
+       :"i" ((short) (0x8000+(dpl<<13)+(type<<8))), \
+        "3" ((char *) (addr)),"2" (__HYPERVISOR_CS << 16)); \
+} while (0)
+
+
+/*
+ * This needs to use 'idt_table' rather than 'idt', and
+ * thus use the _nonmapped_ version of the IDT, as the
+ * Pentium F0 0F bugfix can have resulted in the mapped
+ * IDT being write-protected.
+ */
+void set_intr_gate(unsigned int n, void *addr)
+{
+    _set_gate(idt_table+n,14,0,addr);
+}
+
+static void __init set_trap_gate(unsigned int n, void *addr)
+{
+    _set_gate(idt_table+n,15,0,addr);
+}
+
+static void __init set_system_gate(unsigned int n, void *addr)
+{
+    _set_gate(idt_table+n,15,3,addr);
+}
+
+static void __init set_call_gate(void *a, void *addr)
+{
+    _set_gate(a,12,3,addr);
+}
+
+#define _set_seg_desc(gate_addr,type,dpl,base,limit) {\
+       *((gate_addr)+1) = ((base) & 0xff000000) | \
+               (((base) & 0x00ff0000)>>16) | \
+               ((limit) & 0xf0000) | \
+               ((dpl)<<13) | \
+               (0x00408000) | \
+               ((type)<<8); \
+       *(gate_addr) = (((base) & 0x0000ffff)<<16) | \
+               ((limit) & 0x0ffff); }
+
+#define _set_tssldt_desc(n,addr,limit,type) \
+__asm__ __volatile__ ("movw %w3,0(%2)\n\t" \
+       "movw %%ax,2(%2)\n\t" \
+       "rorl $16,%%eax\n\t" \
+       "movb %%al,4(%2)\n\t" \
+       "movb %4,5(%2)\n\t" \
+       "movb $0,6(%2)\n\t" \
+       "movb %%ah,7(%2)\n\t" \
+       "rorl $16,%%eax" \
+       : "=m"(*(n)) : "a" (addr), "r"(n), "ir"(limit), "i"(type))
+
+void set_tss_desc(unsigned int n, void *addr)
+{
+    _set_tssldt_desc(gdt_table+__TSS(n), (int)addr, 235, 0x89);
+}
+
+void set_ldt_desc(unsigned int n, void *addr, unsigned int size)
+{
+    _set_tssldt_desc(gdt_table+__LDT(n), (int)addr, ((size << 3)-1), 0x82);
+}
+
+void __init trap_init(void)
+{
+    set_trap_gate(0,&divide_error);
+    set_trap_gate(1,&debug);
+    set_intr_gate(2,&nmi);
+    set_system_gate(3,&int3);  /* int3-5 can be called from all */
+    set_system_gate(4,&overflow);
+    set_system_gate(5,&bounds);
+    set_trap_gate(6,&invalid_op);
+    set_trap_gate(7,&device_not_available);
+    set_trap_gate(8,&double_fault);
+    set_trap_gate(9,&coprocessor_segment_overrun);
+    set_trap_gate(10,&invalid_TSS);
+    set_trap_gate(11,&segment_not_present);
+    set_trap_gate(12,&stack_segment);
+    set_trap_gate(13,&general_protection);
+    set_intr_gate(14,&page_fault);
+    set_trap_gate(15,&spurious_interrupt_bug);
+    set_trap_gate(16,&coprocessor_error);
+    set_trap_gate(17,&alignment_check);
+    set_trap_gate(18,&machine_check);
+    set_trap_gate(19,&simd_coprocessor_error);
+
+    /*
+     * Cunning trick to allow arbitrary "INT n" handling.
+     * 
+     * 1. 3 <= N <= 5 is trivial, as these are intended to be explicit.
+     * 
+     * 2. All others, we set gate DPL == 0. Any use of "INT n" will thus
+     *    cause a GPF with CS:EIP pointing at the faulting instruction.
+     *    We can then peek at the instruction at check if it is of the
+     *    form "0xCD <imm8>". If so, we fake out an exception to the
+     *    guest OS. If the protected read page faults, we patch that up as
+     *    a page fault to the guest OS.
+     *    [NB. Of course we check the "soft DPL" to check that guest OS
+     *     wants to handle a particular 'n'. If not, we pass the GPF up
+     *     to the guest OS untouched.]
+     * 
+     * 3. For efficiency, we may want to allow direct traps by the guest
+     *    OS for certain critical vectors (eg. 0x80 in Linux). These must
+     *    therefore not be mapped by hardware interrupts, and so we'd need
+     *    a static list of them, which we add to on demand.
+     */
+
+    /* Only ring 1 can access monitor services. */
+    _set_gate(idt_table+HYPERVISOR_CALL_VECTOR,15,1,&hypervisor_call);
+
+    /*
+     * Should be a barrier for any external CPU state.
+     */
+    {
+        extern void cpu_init(void);
+        cpu_init();
+    }
+}
+
+
+long do_set_trap_table(trap_info_t *traps)
+{
+    trap_info_t cur;
+    trap_info_t *dst = current->thread.traps;
+
+    memset(dst, 0, sizeof(*dst) * 256);
+
+    for ( ; ; )
+    {
+        if ( copy_from_user(&cur, traps, sizeof(cur)) ) return -EFAULT;
+        if ( (cur.cs & 3) == 0 ) return -EPERM;
+        if ( cur.address == 0 ) break;
+        memcpy(dst+cur.vector, &cur, sizeof(cur));
+        traps++;
+    }
+
+    return(0);
+}
+
+
+long do_fpu_taskswitch(void)
+{
+    current->flags |= PF_GUEST_STTS;
+    stts();
+    return 0;
+}
diff --git a/xen-2.4.16/arch/i386/usercopy.c b/xen-2.4.16/arch/i386/usercopy.c
new file mode 100644 (file)
index 0000000..56322f1
--- /dev/null
@@ -0,0 +1,190 @@
+/* 
+ * User address space access functions.
+ * The non inlined parts of asm-i386/uaccess.h are here.
+ *
+ * Copyright 1997 Andi Kleen <ak@muc.de>
+ * Copyright 1997 Linus Torvalds
+ */
+#include <linux/config.h>
+#include <asm/uaccess.h>
+//#include <asm/mmx.h>
+
+#ifdef CONFIG_X86_USE_3DNOW_AND_WORKS
+
+unsigned long
+__generic_copy_to_user(void *to, const void *from, unsigned long n)
+{
+       if (access_ok(VERIFY_WRITE, to, n))
+       {
+               if(n<512)
+                       __copy_user(to,from,n);
+               else
+                       mmx_copy_user(to,from,n);
+       }
+       return n;
+}
+
+unsigned long
+__generic_copy_from_user(void *to, const void *from, unsigned long n)
+{
+       if (access_ok(VERIFY_READ, from, n))
+       {
+               if(n<512)
+                       __copy_user_zeroing(to,from,n);
+               else
+                       mmx_copy_user_zeroing(to, from, n);
+       }
+       else
+               memset(to, 0, n);
+       return n;
+}
+
+#else
+
+unsigned long
+__generic_copy_to_user(void *to, const void *from, unsigned long n)
+{
+       prefetch(from);
+       if (access_ok(VERIFY_WRITE, to, n))
+               __copy_user(to,from,n);
+       return n;
+}
+
+unsigned long
+__generic_copy_from_user(void *to, const void *from, unsigned long n)
+{
+       prefetchw(to);
+       if (access_ok(VERIFY_READ, from, n))
+               __copy_user_zeroing(to,from,n);
+       else
+               memset(to, 0, n);
+       return n;
+}
+
+#endif
+
+/*
+ * Copy a null terminated string from userspace.
+ */
+
+#define __do_strncpy_from_user(dst,src,count,res)                         \
+do {                                                                      \
+       int __d0, __d1, __d2;                                              \
+       __asm__ __volatile__(                                              \
+               "       testl %1,%1\n"                                     \
+               "       jz 2f\n"                                           \
+               "0:     lodsb\n"                                           \
+               "       stosb\n"                                           \
+               "       testb %%al,%%al\n"                                 \
+               "       jz 1f\n"                                           \
+               "       decl %1\n"                                         \
+               "       jnz 0b\n"                                          \
+               "1:     subl %1,%0\n"                                      \
+               "2:\n"                                                     \
+               ".section .fixup,\"ax\"\n"                                 \
+               "3:     movl %5,%0\n"                                      \
+               "       jmp 2b\n"                                          \
+               ".previous\n"                                              \
+               ".section __ex_table,\"a\"\n"                              \
+               "       .align 4\n"                                        \
+               "       .long 0b,3b\n"                                     \
+               ".previous"                                                \
+               : "=d"(res), "=c"(count), "=&a" (__d0), "=&S" (__d1),      \
+                 "=&D" (__d2)                                             \
+               : "i"(-EFAULT), "0"(count), "1"(count), "3"(src), "4"(dst) \
+               : "memory");                                               \
+} while (0)
+
+long
+__strncpy_from_user(char *dst, const char *src, long count)
+{
+       long res;
+       __do_strncpy_from_user(dst, src, count, res);
+       return res;
+}
+
+long
+strncpy_from_user(char *dst, const char *src, long count)
+{
+       long res = -EFAULT;
+       if (access_ok(VERIFY_READ, src, 1))
+               __do_strncpy_from_user(dst, src, count, res);
+       return res;
+}
+
+
+/*
+ * Zero Userspace
+ */
+
+#define __do_clear_user(addr,size)                                     \
+do {                                                                   \
+       int __d0;                                                       \
+       __asm__ __volatile__(                                           \
+               "0:     rep; stosl\n"                                   \
+               "       movl %2,%0\n"                                   \
+               "1:     rep; stosb\n"                                   \
+               "2:\n"                                                  \
+               ".section .fixup,\"ax\"\n"                              \
+               "3:     lea 0(%2,%0,4),%0\n"                            \
+               "       jmp 2b\n"                                       \
+               ".previous\n"                                           \
+               ".section __ex_table,\"a\"\n"                           \
+               "       .align 4\n"                                     \
+               "       .long 0b,3b\n"                                  \
+               "       .long 1b,2b\n"                                  \
+               ".previous"                                             \
+               : "=&c"(size), "=&D" (__d0)                             \
+               : "r"(size & 3), "0"(size / 4), "1"(addr), "a"(0));     \
+} while (0)
+
+unsigned long
+clear_user(void *to, unsigned long n)
+{
+       if (access_ok(VERIFY_WRITE, to, n))
+               __do_clear_user(to, n);
+       return n;
+}
+
+unsigned long
+__clear_user(void *to, unsigned long n)
+{
+       __do_clear_user(to, n);
+       return n;
+}
+
+/*
+ * Return the size of a string (including the ending 0)
+ *
+ * Return 0 on exception, a value greater than N if too long
+ */
+
+long strnlen_user(const char *s, long n)
+{
+       unsigned long mask = -__addr_ok(s);
+       unsigned long res, tmp;
+
+       __asm__ __volatile__(
+               "       testl %0, %0\n"
+               "       jz 3f\n"
+               "       andl %0,%%ecx\n"
+               "0:     repne; scasb\n"
+               "       setne %%al\n"
+               "       subl %%ecx,%0\n"
+               "       addl %0,%%eax\n"
+               "1:\n"
+               ".section .fixup,\"ax\"\n"
+               "2:     xorl %%eax,%%eax\n"
+               "       jmp 1b\n"
+               "3:     movb $1,%%al\n"
+               "       jmp 1b\n"
+               ".previous\n"
+               ".section __ex_table,\"a\"\n"
+               "       .align 4\n"
+               "       .long 0b,2b\n"
+               ".previous"
+               :"=r" (n), "=D" (s), "=a" (res), "=c" (tmp)
+               :"0" (n), "1" (s), "2" (0), "3" (mask)
+               :"cc");
+       return res & mask;
+}
diff --git a/xen-2.4.16/arch/i386/xeno.lds b/xen-2.4.16/arch/i386/xeno.lds
new file mode 100644 (file)
index 0000000..3f784a7
--- /dev/null
@@ -0,0 +1,87 @@
+/* ld script to make i386 Linux kernel
+ * Written by Martin Mares <mj@atrey.karlin.mff.cuni.cz>;
+ */
+OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386")
+OUTPUT_ARCH(i386)
+ENTRY(start)
+SECTIONS
+{
+  . = 0xE0000000 + 0x100000;
+  _text = .;                   /* Text and read-only data */
+  .text : {
+       *(.text)
+       *(.fixup)
+       *(.gnu.warning)
+       } = 0x9090
+  .text.lock : { *(.text.lock) }       /* out-of-line lock text */
+
+  _etext = .;                  /* End of text section */
+
+  .rodata : { *(.rodata) *(.rodata.*) }
+  .kstrtab : { *(.kstrtab) }
+
+  . = ALIGN(16);               /* Exception table */
+  __start___ex_table = .;
+  __ex_table : { *(__ex_table) }
+  __stop___ex_table = .;
+
+  __start___ksymtab = .;       /* Kernel symbol table */
+  __ksymtab : { *(__ksymtab) }
+  __stop___ksymtab = .;
+
+  __start___kallsyms = .;      /* All kernel symbols */
+  __kallsyms : { *(__kallsyms) }
+  __stop___kallsyms = .;
+
+  .data : {                    /* Data */
+       *(.data)
+       CONSTRUCTORS
+       }
+
+  _edata = .;                  /* End of data section */
+
+  . = ALIGN(8192);             /* init_task */
+  .data.init_task : { *(.data.init_task) }
+
+  . = ALIGN(4096);             /* Init code and data */
+  __init_begin = .;
+  .text.init : { *(.text.init) }
+  .data.init : { *(.data.init) }
+  . = ALIGN(16);
+  __setup_start = .;
+  .setup.init : { *(.setup.init) }
+  __setup_end = .;
+  __initcall_start = .;
+  .initcall.init : { *(.initcall.init) }
+  __initcall_end = .;
+  . = ALIGN(4096);
+  __init_end = .;
+
+  . = ALIGN(4096);
+  .data.page_aligned : { *(.data.idt) }
+
+  . = ALIGN(32);
+  .data.cacheline_aligned : { *(.data.cacheline_aligned) }
+
+  __bss_start = .;             /* BSS */
+  .bss : {
+       *(.bss)
+       }
+  _end = . ;
+
+  /* Sections to be discarded */
+  /DISCARD/ : {
+       *(.text.exit)
+       *(.data.exit)
+       *(.exitcall.exit)
+       }
+
+  /* Stabs debugging sections.  */
+  .stab 0 : { *(.stab) }
+  .stabstr 0 : { *(.stabstr) }
+  .stab.excl 0 : { *(.stab.excl) }
+  .stab.exclstr 0 : { *(.stab.exclstr) }
+  .stab.index 0 : { *(.stab.index) }
+  .stab.indexstr 0 : { *(.stab.indexstr) }
+  .comment 0 : { *(.comment) }
+}
diff --git a/xen-2.4.16/common/Makefile b/xen-2.4.16/common/Makefile
new file mode 100644 (file)
index 0000000..12f1f7d
--- /dev/null
@@ -0,0 +1,8 @@
+
+include $(BASEDIR)/Rules.mk
+
+default: $(OBJS)
+       $(LD) -r -o common.o $(OBJS)
+
+clean:
+       rm -f *.o *~ core
diff --git a/xen-2.4.16/common/block.c b/xen-2.4.16/common/block.c
new file mode 100644 (file)
index 0000000..6ccdb10
--- /dev/null
@@ -0,0 +1,21 @@
+/* block.c
+ *
+ * ring data structures for buffering messages between hypervisor and
+ * guestos's. 
+ *
+ */
+
+#include <hypervisor-ifs/block.h>
+#include <xeno/lib.h>
+
+/*
+ * create_block_ring
+ *
+ * domain:
+ *
+ * allocates space for a particular domain's block io ring.
+ */
+blk_ring_t *create_block_ring(int domain)
+{
+  printk ("XEN create block ring");
+}
diff --git a/xen-2.4.16/common/brlock.c b/xen-2.4.16/common/brlock.c
new file mode 100644 (file)
index 0000000..e2bccec
--- /dev/null
@@ -0,0 +1,69 @@
+/*
+ *
+ * linux/lib/brlock.c
+ *
+ * 'Big Reader' read-write spinlocks.  See linux/brlock.h for details.
+ *
+ * Copyright 2000, Ingo Molnar <mingo@redhat.com>
+ * Copyright 2000, David S. Miller <davem@redhat.com>
+ */
+
+#include <linux/config.h>
+
+#ifdef CONFIG_SMP
+
+#include <linux/sched.h>
+#include <linux/brlock.h>
+
+#ifdef __BRLOCK_USE_ATOMICS
+
+brlock_read_lock_t __brlock_array[NR_CPUS][__BR_IDX_MAX] =
+   { [0 ... NR_CPUS-1] = { [0 ... __BR_IDX_MAX-1] = RW_LOCK_UNLOCKED } };
+
+void __br_write_lock (enum brlock_indices idx)
+{
+       int i;
+
+       for (i = 0; i < smp_num_cpus; i++)
+               write_lock(&__brlock_array[cpu_logical_map(i)][idx]);
+}
+
+void __br_write_unlock (enum brlock_indices idx)
+{
+       int i;
+
+       for (i = 0; i < smp_num_cpus; i++)
+               write_unlock(&__brlock_array[cpu_logical_map(i)][idx]);
+}
+
+#else /* ! __BRLOCK_USE_ATOMICS */
+
+brlock_read_lock_t __brlock_array[NR_CPUS][__BR_IDX_MAX] =
+   { [0 ... NR_CPUS-1] = { [0 ... __BR_IDX_MAX-1] = 0 } };
+
+struct br_wrlock __br_write_locks[__BR_IDX_MAX] =
+   { [0 ... __BR_IDX_MAX-1] = { SPIN_LOCK_UNLOCKED } };
+
+void __br_write_lock (enum brlock_indices idx)
+{
+       int i;
+
+again:
+       spin_lock(&__br_write_locks[idx].lock);
+       for (i = 0; i < smp_num_cpus; i++)
+               if (__brlock_array[cpu_logical_map(i)][idx] != 0) {
+                       spin_unlock(&__br_write_locks[idx].lock);
+                       barrier();
+                       cpu_relax();
+                       goto again;
+               }
+}
+
+void __br_write_unlock (enum brlock_indices idx)
+{
+       spin_unlock(&__br_write_locks[idx].lock);
+}
+
+#endif /* __BRLOCK_USE_ATOMICS */
+
+#endif /* CONFIG_SMP */
diff --git a/xen-2.4.16/common/dom0_ops.c b/xen-2.4.16/common/dom0_ops.c
new file mode 100644 (file)
index 0000000..ee0984a
--- /dev/null
@@ -0,0 +1,90 @@
+/******************************************************************************
+ * dom0_ops.c
+ * 
+ * Process command requests from domain-0 guest OS.
+ * 
+ * Copyright (c) 2002, K A Fraser
+ */
+
+#include <xeno/config.h>
+#include <xeno/types.h>
+#include <xeno/lib.h>
+#include <xeno/dom0_ops.h>
+#include <xeno/sched.h>
+#include <xeno/event.h>
+
+
+static unsigned int get_domnr(void)
+{
+    struct task_struct *p = &idle0_task;
+    unsigned long dom_mask = 0;
+    read_lock_irq(&tasklist_lock);
+    do {
+        if ( is_idle_task(p) ) continue;
+        set_bit(p->domain, &dom_mask); 
+    }
+    while ( (p = p->next_task) != &idle0_task );   
+    read_unlock_irq(&tasklist_lock);
+    return (dom_mask == ~0UL) ? 0 : ffz(dom_mask);
+}
+
+long do_dom0_op(dom0_op_t *u_dom0_op)
+{
+    long ret = 0;
+    dom0_op_t op;
+
+    if ( current->domain != 0 )
+        return -EPERM;
+
+    if ( copy_from_user(&op, u_dom0_op, sizeof(op)) )
+        return -EFAULT;
+
+    switch ( op.cmd )
+    {
+
+    case DOM0_NEWDOMAIN:
+    {
+        struct task_struct *p;
+        static unsigned int pro = 0;
+        unsigned int dom = get_domnr();
+        ret = -ENOMEM;
+        if ( !dom ) break;
+        p = do_newdomain();
+        if ( !p ) break;
+        p->domain = dom;
+        pro = (pro+1) % smp_num_cpus;
+        p->processor = pro;
+        ret = setup_guestos(p, &op.u.newdomain);    /* Load guest OS into @p */
+        if ( ret != 0 ) 
+        {
+            p->state = TASK_DYING;
+            release_task(p);
+            break;
+        }
+        wake_up(p);          /* Put @p on runqueue */
+        reschedule(p);       /* Force a scheduling decision on @p's CPU */
+        ret = p->domain;
+    }
+    break;
+
+    case DOM0_KILLDOMAIN:
+    {
+        unsigned int dom = op.u.killdomain.domain;
+        if ( dom == IDLE_DOMAIN_ID )
+        {
+            ret = -EPERM;
+        }
+        else
+        {
+            ret = kill_other_domain(dom);
+        }
+    }
+    break;
+
+    default:
+        ret = -ENOSYS;
+
+    }
+
+    return ret;
+}
diff --git a/xen-2.4.16/common/domain.c b/xen-2.4.16/common/domain.c
new file mode 100644 (file)
index 0000000..784b4e4
--- /dev/null
@@ -0,0 +1,654 @@
+#include <xeno/config.h>
+#include <xeno/init.h>
+#include <xeno/lib.h>
+#include <xeno/errno.h>
+#include <xeno/sched.h>
+#include <xeno/mm.h>
+#include <xeno/skbuff.h>
+#include <xeno/interrupt.h>
+#include <xeno/delay.h>
+#include <xeno/event.h>
+#include <xeno/dom0_ops.h>
+#include <asm/io.h>
+
+rwlock_t tasklist_lock __cacheline_aligned = RW_LOCK_UNLOCKED;
+
+schedule_data_t schedule_data[NR_CPUS];
+
+int wake_up(struct task_struct *p)
+{
+    unsigned long flags;
+    int ret = 0;
+    spin_lock_irqsave(&schedule_data[p->processor].lock, flags);
+    if ( __task_on_runqueue(p) ) goto out;
+    p->state = TASK_RUNNING;
+    __add_to_runqueue(p);
+    ret = 1;
+
+ out:
+    spin_unlock_irqrestore(&schedule_data[p->processor].lock, flags);
+    return ret;
+}
+
+
+struct task_struct *do_newdomain(void)
+{
+    int retval;
+    struct task_struct *p = NULL;
+
+    retval = -ENOMEM;
+    p = alloc_task_struct();
+    if (!p) goto newdomain_out;
+    memset(p, 0, sizeof(*p));
+    p->shared_info = (void *)get_free_page(GFP_KERNEL);
+    memset(p->shared_info, 0, sizeof(shared_info_t));
+
+    p->addr_limit = USER_DS;
+    p->state      = TASK_UNINTERRUPTIBLE;
+    p->active_mm  = &p->mm;
+    p->num_net_vifs = 0;
+
+    /*
+     * KAF: Passing in newdomain struct to this function is gross!
+     * Therefore, for now we just allocate the single blk_ring
+     * before the multiople net_rings :-)
+     */
+    p->blk_ring_base = (blk_ring_t *)(p->shared_info + 1);
+    p->net_ring_base = (net_ring_t *)(p->blk_ring_base + 1);
+    p->pg_head = p->pg_tail = p->tot_pages = 0;
+    write_lock_irq(&tasklist_lock);
+    SET_LINKS(p);
+    write_unlock_irq(&tasklist_lock);
+
+ newdomain_out:
+    return(p);
+}
+
+
+void reschedule(struct task_struct *p)
+{
+    int cpu = p->processor;
+    struct task_struct *curr;
+    unsigned long flags;
+
+    if ( p->has_cpu ) return;
+
+    spin_lock_irqsave(&schedule_data[cpu].lock, flags);
+    curr = schedule_data[cpu].curr;
+    if ( is_idle_task(curr) ) 
+    {
+        set_bit(_HYP_EVENT_NEED_RESCHED, &curr->hyp_events);
+        spin_unlock_irqrestore(&schedule_data[cpu].lock, flags);
+#ifdef CONFIG_SMP
+        if ( cpu != smp_processor_id() ) smp_send_event_check_cpu(cpu);
+#endif
+    }
+    else
+    {
+        spin_unlock_irqrestore(&schedule_data[cpu].lock, flags);
+    }
+}
+
+
+static void process_timeout(unsigned long __data)
+{
+    struct task_struct * p = (struct task_struct *) __data;
+    wake_up(p);
+}
+
+long schedule_timeout(long timeout)
+{
+    struct timer_list timer;
+    unsigned long expire;
+    
+    switch (timeout)
+    {
+    case MAX_SCHEDULE_TIMEOUT:
+        /*
+         * These two special cases are useful to be comfortable in the caller.
+         * Nothing more. We could take MAX_SCHEDULE_TIMEOUT from one of the
+         * negative value but I' d like to return a valid offset (>=0) to allow
+         * the caller to do everything it want with the retval.
+         */
+        schedule();
+        goto out;
+    default:
+        /*
+         * Another bit of PARANOID. Note that the retval will be 0 since no
+         * piece of kernel is supposed to do a check for a negative retval of
+         * schedule_timeout() (since it should never happens anyway). You just
+         * have the printk() that will tell you if something is gone wrong and
+         * where.
+         */
+        if (timeout < 0)
+        {
+            printk(KERN_ERR "schedule_timeout: wrong timeout "
+                   "value %lx from %p\n", timeout,
+                   __builtin_return_address(0));
+            current->state = TASK_RUNNING;
+            goto out;
+        }
+    }
+    
+    expire = timeout + jiffies;
+    
+    init_timer(&timer);
+    timer.expires = expire;
+    timer.data = (unsigned long) current;
+    timer.function = process_timeout;
+    
+    add_timer(&timer);
+    schedule();
+    del_timer_sync(&timer);
+    
+    timeout = expire - jiffies;
+    
+ out:
+    return timeout < 0 ? 0 : timeout;
+}
+
+
+long do_yield(void)
+{
+    current->state = TASK_INTERRUPTIBLE;
+    schedule();
+    return 0;
+}
+
+/* Get a pointer to the specified domain.  Consider replacing this
+ * with a hash lookup later.
+ *
+ * Also, kill_other_domain should call this instead of scanning on its own.
+ */
+struct task_struct *find_domain_by_id(unsigned int dom)
+{
+    struct task_struct *p = &idle0_task;
+
+    read_lock_irq(&tasklist_lock);
+    do {
+        if ( (p->domain == dom) ) {
+            read_unlock_irq(&tasklist_lock);
+            return (p);
+        }
+    } while ( (p = p->next_task) != &idle0_task );
+    read_unlock_irq(&tasklist_lock);
+
+    return 0;
+}
+
+
+void kill_domain_with_errmsg(const char *err)
+{
+    printk("DOM%d FATAL ERROR: %s\n", 
+           current->domain, err);
+    kill_domain();
+}
+
+
+/* Kill the currently executing domain. */
+void kill_domain(void)
+{
+    if ( current->domain == 0 )
+    {
+        extern void machine_restart(char *);
+        printk("Domain 0 killed: rebooting machine!\n");
+        machine_restart(0);
+    }
+
+    printk("Killing domain %d\n", current->domain);
+    current->state = TASK_DYING;
+    schedule();
+    BUG(); /* never get here */
+}
+
+
+long kill_other_domain(unsigned int dom)
+{
+    struct task_struct *p = &idle0_task;
+    unsigned long cpu_mask = 0;
+    long ret = -ESRCH;
+
+    read_lock_irq(&tasklist_lock);
+    do {
+        if ( p->domain == dom )
+        {
+            cpu_mask = mark_guest_event(p, _EVENT_DIE);
+            ret = 0;
+            break;
+        }
+    }
+    while ( (p = p->next_task) != &idle0_task );
+    read_unlock_irq(&tasklist_lock);
+
+    hyp_event_notify(cpu_mask);
+
+    return ret;
+}
+
+
+/* Release resources belonging to task @p. */
+void release_task(struct task_struct *p)
+{
+    ASSERT(!__task_on_runqueue(p));
+    ASSERT(p->state == TASK_DYING);
+    ASSERT(!p->has_cpu);
+    write_lock_irq(&tasklist_lock);
+    REMOVE_LINKS(p);
+    write_unlock_irq(&tasklist_lock);
+
+    /*
+     * Safe! Only queue skbuffs with tasklist_lock held.
+     * Only access shared_info with tasklist_lock held.
+     * And free_task_struct() only releases if refcnt == 0.
+     */
+    while ( p->num_net_vifs )
+    {
+        destroy_net_vif(p);
+    }
+    free_page((unsigned long)p->shared_info);
+    free_task_struct(p);
+}
+
+
+asmlinkage void schedule(void)
+{
+    struct task_struct *prev, *next, *p;
+    struct list_head *tmp;
+    int this_cpu;
+
+ need_resched_back:
+    prev = current;
+    this_cpu = prev->processor;
+
+    spin_lock_irq(&schedule_data[this_cpu].lock);
+
+    ASSERT(!in_interrupt());
+    ASSERT(__task_on_runqueue(prev));
+
+    if ( !prev->counter )
+    {
+        prev->counter = 2;
+        __move_last_runqueue(prev);
+    }
+
+    switch ( prev->state )
+    {
+    case TASK_INTERRUPTIBLE:
+        if ( signal_pending(prev) )
+        {
+            prev->state = TASK_RUNNING;
+            break;
+        }
+    default:
+        __del_from_runqueue(prev);
+    case TASK_RUNNING:;
+    }
+    clear_bit(_HYP_EVENT_NEED_RESCHED, &prev->hyp_events);
+
+    next = NULL;
+    list_for_each(tmp, &schedule_data[smp_processor_id()].runqueue) {
+        p = list_entry(tmp, struct task_struct, run_list);
+        next = p;
+        break;
+    }
+
+    prev->has_cpu = 0;
+    next->has_cpu = 1;
+
+    schedule_data[this_cpu].prev = prev;
+    schedule_data[this_cpu].curr = next;
+
+    spin_unlock_irq(&schedule_data[this_cpu].lock);
+
+    if ( unlikely(prev == next) )
+    {
+        /* We won't go through the normal tail, so do this by hand */
+        prev->policy &= ~SCHED_YIELD;
+        goto same_process;
+    }
+
+    prepare_to_switch();
+    switch_to(prev, next);
+    prev = schedule_data[this_cpu].prev;
+    
+    prev->policy &= ~SCHED_YIELD;
+    if ( prev->state == TASK_DYING ) release_task(prev);
+
+ same_process:
+    if ( test_bit(_HYP_EVENT_NEED_RESCHED, &current->hyp_events) )
+        goto need_resched_back;
+    return;
+}
+
+
+static unsigned int alloc_new_dom_mem(struct task_struct *p, unsigned int kbytes)
+{
+
+    struct list_head *temp;
+    struct pfn_info *pf;
+    unsigned int alloc_pfns;
+    unsigned int req_pages;
+
+    /* how many pages do we need to alloc? */
+    req_pages = kbytes >> (PAGE_SHIFT - 10);
+
+    /* is there enough mem to serve the request? */   
+    if(req_pages > free_pfns)
+        return -1;
+    
+    /* allocate pages and build a thread through frame_table */
+    temp = free_list.next;
+    printk("bd240 debug: DOM%d requesting %d pages\n", p->domain, req_pages);
+    for(alloc_pfns = req_pages; alloc_pfns; alloc_pfns--){
+        pf = list_entry(temp, struct pfn_info, list);
+        pf->flags |= p->domain;
+        temp = temp->next;
+        list_del(&pf->list);
+
+        if(p->pg_tail){
+            pf->next = p->pg_tail;
+            (frame_table + pf->next)->prev = p->pg_tail = (pf - frame_table); 
+        } else {
+            p->pg_head = (pf - frame_table);
+            p->pg_tail = p->pg_head;
+            pf->next = 0;
+            pf->prev = 0;
+        }
+        pf->prev = 0;
+
+        free_pfns--;
+    }
+    
+    p->tot_pages = req_pages;
+
+    return 0;
+}
+
+/*
+ * Initial load map:
+ *  start_address:
+ *     OS image
+ *      ....
+ *  stack_start:
+ *  start_info:
+ *      <one page>
+ *  page tables:
+ *      <enough pages>
+ *  end_address:
+ *  shared_info:
+ *      <one page>
+ */
+#define MB_PER_DOMAIN 16
+#include <asm/msr.h>
+#include <xeno/multiboot.h>
+extern int nr_mods;
+extern module_t *mod;
+extern unsigned char *cmdline;
+int setup_guestos(struct task_struct *p, dom0_newdomain_t *params)
+{
+#define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_USER|_PAGE_ACCESSED)
+#define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_USER|_PAGE_ACCESSED|_PAGE_DIRTY)
+#define ALLOC_PAGE_FROM_DOMAIN() \
+  ({ alloc_address -= PAGE_SIZE; __va(alloc_address); })
+    char *src, *dst;
+    int i, dom = p->domain;
+    unsigned long start_address = MAX_MONITOR_ADDRESS;
+    unsigned long cur_address, end_address, alloc_address, vaddr;
+    unsigned long virt_load_address, virt_stack_address, virt_shinfo_address;
+    unsigned long virt_ftable_start_addr = 0, virt_ftable_end_addr;
+    unsigned int ft_size = 0;
+    start_info_t  *virt_startinfo_address;
+    unsigned long long time;
+    l2_pgentry_t *l2tab;
+    l1_pgentry_t *l1tab = NULL;
+    struct pfn_info *page = NULL;
+    net_ring_t *net_ring;
+    blk_ring_t *blk_ring;
+
+    if ( strncmp(__va(mod[0].mod_start), "XenoGues", 8) )
+    {
+        printk("DOM%d: Invalid guest OS image\n", dom);
+        return -1;
+    }
+
+    virt_load_address = *(unsigned long *)__va(mod[0].mod_start + 8);
+    if ( (virt_load_address & (PAGE_SIZE-1)) )
+    {
+        printk("DOM%d: Guest OS load address not page-aligned (%08lx)\n",
+               dom, virt_load_address);
+        return -1;
+    }
+
+    if ( alloc_new_dom_mem(p, params->memory_kb) ) return -ENOMEM;
+
+    /* temporary, *_address have to be reimplemented in another way
+     * as we can no longer expect contiguous addr space
+     */
+    start_address = p->pg_head << PAGE_SHIFT; 
+    alloc_address = end_address = start_address + (p->tot_pages << PAGE_SHIFT);
+
+    /* start_address += (dom * MB_PER_DOMAIN) << 20; */ /* MB -> bytes */
+    /* alloc_address = end_address = start_address + (MB_PER_DOMAIN << 20); */
+
+    if ( (mod[nr_mods-1].mod_end-mod[0].mod_start) > 
+         ((end_address-start_address)>>1) )
+    {
+        printk("DOM%d: Guest OS image is too large\n"
+               "       (%luMB is greater than %luMB limit for a\n"
+               "        %luMB address space)\n",
+               dom, (mod[nr_mods-1].mod_end-mod[0].mod_start)>>20,
+               (end_address-start_address)>>21,
+               (end_address-start_address)>>20);
+        /* XXX Should release memory from alloc_new_dom_mem here XXX */
+        return -1;
+    }
+
+    /* Set up initial mappings. */
+    printk("DOM%d: Mapping physmem %08lx -> %08lx (%luMB)\n", dom,
+           start_address, end_address, (end_address-start_address)>>20);
+    printk("DOM%d: Guest OS virtual load address is %08lx\n", dom,
+           virt_load_address);
+    
+    l2tab = (l2_pgentry_t *)ALLOC_PAGE_FROM_DOMAIN();
+    memcpy(l2tab, idle0_pg_table, sizeof(idle0_pg_table));
+    memset(l2tab, 0, DOMAIN_ENTRIES_PER_L2_PAGETABLE*sizeof(l2_pgentry_t));
+    p->mm.pagetable = mk_pagetable((unsigned long)l2tab);
+
+    /*
+     * NB. The upper limit on this loop does one extra page. This is to
+     * make sure a pte exists when we want to map the shared_info struct.
+     */
+
+    /* bd240: not only one extra page but one + num of pages required for
+     * frame_table if domain 0 is in question. this ugly for loop 
+     * condition is going to change once domain building is moved out
+     * of hypervisor.
+     */
+
+    if(dom == 0)
+        ft_size = frame_table_size; 
+
+    l2tab = pagetable_ptr(p->mm.pagetable) +
+        l2_table_offset(virt_load_address);    
+    for ( cur_address  = start_address;
+          cur_address != (end_address + PAGE_SIZE + ft_size);
+          cur_address += PAGE_SIZE )
+    {
+        if ( !((unsigned long)l1tab & (PAGE_SIZE-1)) )
+        {
+            l1tab = (l1_pgentry_t *)ALLOC_PAGE_FROM_DOMAIN();
+            clear_page(l1tab);
+            l1tab += l1_table_offset(
+                virt_load_address + cur_address - start_address);
+            *l2tab++ = mk_l2_pgentry(__pa(l1tab)|L2_PROT);
+        }
+        *l1tab++ = mk_l1_pgentry(cur_address|L1_PROT);
+        
+        /* New domain doesn't own shared_info page, or frame_table. */
+        if ( cur_address < end_address )
+        {
+            page = frame_table + (cur_address >> PAGE_SHIFT);
+            page->flags = dom | PGT_writeable_page;
+            page->type_count = page->tot_count = 1;
+        }
+    }
+    
+    /* Pages that are part of page tables must be read-only. */
+    vaddr = virt_load_address + alloc_address - start_address;
+    l2tab = pagetable_ptr(p->mm.pagetable) + l2_table_offset(vaddr);
+    l1tab = l2_pgentry_to_l1(*l2tab++) + l1_table_offset(vaddr);
+    for ( cur_address  = alloc_address;
+          cur_address != end_address;
+          cur_address += PAGE_SIZE )
+    {
+        *l1tab++ = mk_l1_pgentry(l1_pgentry_val(*l1tab) & ~_PAGE_RW);
+        if ( !((unsigned long)l1tab & (PAGE_SIZE-1)) )
+            l1tab = l2_pgentry_to_l1(*l2tab++);
+        page = frame_table + (cur_address >> PAGE_SHIFT);
+        page->flags = dom | PGT_l1_page_table;
+        page->tot_count++;
+    }
+    page->flags = dom | PGT_l2_page_table;
+
+    /* Map in the the shared info structure. */
+    virt_shinfo_address = end_address - start_address + virt_load_address;
+    l2tab = pagetable_ptr(p->mm.pagetable) +
+        l2_table_offset(virt_shinfo_address);
+    l1tab = l2_pgentry_to_l1(*l2tab) +
+        l1_table_offset(virt_shinfo_address);
+    *l1tab = mk_l1_pgentry(__pa(p->shared_info)|L1_PROT);
+
+    /* Set up shared info area. */
+    rdtscll(time);
+    p->shared_info->wall_time    = time;
+    p->shared_info->domain_time  = time;
+    p->shared_info->ticks_per_ms = ticks_per_usec * 1000;
+
+    /* for DOM0, setup mapping of frame table */
+    if ( dom == 0 )
+    {
+        virt_ftable_start_addr = virt_load_address + virt_shinfo_address + PAGE_SIZE;
+        virt_ftable_end_addr = virt_ftable_start_addr + frame_table_size;
+        for(cur_address = virt_ftable_start_addr;
+            cur_address < virt_ftable_end_addr;
+            cur_address += PAGE_SIZE)
+        {
+            l2tab = pagetable_ptr(p->mm.pagetable) + l2_table_offset(cur_address);
+            l1tab = l2_pgentry_to_l1(*l2tab) + l1_table_offset(cur_address); 
+            *l1tab = mk_l1_pgentry(__pa(cur_address)|L1_PROT);
+        }
+    }
+
+    virt_startinfo_address = (start_info_t *)
+        (alloc_address - start_address - PAGE_SIZE + virt_load_address);
+    virt_stack_address  = (unsigned long)virt_startinfo_address;
+
+    /* Install the new page tables. */
+    __cli();
+    __asm__ __volatile__ (
+        "mov %%eax,%%cr3"
+        : : "a" (__pa(pagetable_ptr(p->mm.pagetable))));
+
+    /* Copy the guest OS image. */
+    src = (char *)__va(mod[0].mod_start + 12);
+    dst = (char *)virt_load_address;
+    while ( src < (char *)__va(mod[nr_mods-1].mod_end) ) *dst++ = *src++;
+
+    /* Set up start info area. */
+    memset(virt_startinfo_address, 0, sizeof(*virt_startinfo_address));
+    virt_startinfo_address->nr_pages = (end_address-start_address)>>PAGE_SHIFT;
+    virt_startinfo_address->shared_info = 
+        (shared_info_t *)virt_shinfo_address;
+    virt_startinfo_address->pt_base = 
+        end_address - PAGE_SIZE - start_address + virt_load_address;
+    virt_startinfo_address->phys_base = start_address;
+    /* NB. Next field will be NULL if dom != 0. */
+    virt_startinfo_address->frame_table = virt_ftable_start_addr;
+
+    /* Add virtual network interfaces and point to them in startinfo. */
+    while (params->num_vifs-- > 0) {
+        net_ring = create_net_vif(dom);
+        if (!net_ring) panic("no network ring!\n");
+    }
+    virt_startinfo_address->net_rings = p->net_ring_base;
+    virt_startinfo_address->num_net_rings = p->num_net_vifs;
+
+    /* Add block io interface */
+    virt_startinfo_address->blk_ring = p->blk_ring_base;
+
+    /* We tell OS about any modules we were given. */
+    if ( nr_mods > 1 )
+    {
+        virt_startinfo_address->mod_start = 
+            (mod[1].mod_start-mod[0].mod_start-12) + virt_load_address;
+        virt_startinfo_address->mod_len = 
+            mod[nr_mods-1].mod_end - mod[1].mod_start;
+    }
+
+    dst = virt_startinfo_address->cmd_line;
+    if ( mod[0].string )
+    {
+        char *modline = (char *)__va(mod[0].string);
+        for ( i = 0; i < 255; i++ )
+        {
+            if ( modline[i] == '\0' ) break;
+            *dst++ = modline[i];
+        }
+    }
+    *dst = '\0';
+
+    if ( opt_nfsroot )
+    {
+        unsigned char boot[150];
+        unsigned char ipbase[20], nfsserv[20], gateway[20], netmask[20];
+        unsigned char nfsroot[70];
+        snprintf(nfsroot, 70, opt_nfsroot, dom); 
+        snprintf(boot, 200,
+                " root=/dev/nfs ip=%s:%s:%s:%s::eth0:off nfsroot=%s",
+                 quad_to_str(opt_ipbase + dom, ipbase),
+                 quad_to_str(opt_nfsserv, nfsserv),
+                 quad_to_str(opt_gateway, gateway),
+                 quad_to_str(opt_netmask, netmask),
+                 nfsroot);
+        strcpy(dst, boot);
+    }
+
+    /* Reinstate the caller's page tables. */
+    __asm__ __volatile__ (
+        "mov %%eax,%%cr3"
+        : : "a" (__pa(pagetable_ptr(current->mm.pagetable))));    
+    __sti();
+
+    new_thread(p, 
+               (unsigned long)virt_load_address, 
+               (unsigned long)virt_stack_address, 
+               (unsigned long)virt_startinfo_address);
+
+    return 0;
+}
+
+
+void __init domain_init(void)
+{
+    int i;
+    for ( i = 0; i < NR_CPUS; i++ )
+    {
+        INIT_LIST_HEAD(&schedule_data[i].runqueue);
+        spin_lock_init(&schedule_data[i].lock);
+        schedule_data[i].prev = &idle0_task;
+        schedule_data[i].curr = &idle0_task;
+    }
+}
+
+
+
+#if 0
+    unsigned long s = (mod[        0].mod_start + (PAGE_SIZE-1)) & PAGE_MASK;
+    unsigned long e = (mod[nr_mods-1].mod_end   + (PAGE_SIZE-1)) & PAGE_MASK;
+    while ( s != e ) 
+    { 
+        free_pages((unsigned long)__va(s), 0); 
+        s += PAGE_SIZE;
+    }
+#endif
+
diff --git a/xen-2.4.16/common/event.c b/xen-2.4.16/common/event.c
new file mode 100644 (file)
index 0000000..6a81c63
--- /dev/null
@@ -0,0 +1,33 @@
+/******************************************************************************
+ * event.c
+ * 
+ * A nice interface for passing per-domain asynchronous events. 
+ * These events are handled in the hypervisor, prior to return
+ * to the guest OS.
+ * 
+ * Copyright (c) 2002, K A Fraser
+ */
+
+#include <xeno/config.h>
+#include <xeno/event.h>
+
+typedef void (*hyp_event_callback_fn_t)(void);
+
+extern void schedule(void);
+extern void flush_rx_queue(void);
+
+/* Ordering must match definitions of _HYP_EVENT_* in xeno/sched.h */
+static hyp_event_callback_fn_t event_call_fn[] = 
+{
+    schedule,
+    flush_rx_queue,
+    kill_domain
+};
+
+/* Handle outstanding events for the currently-executing domain. */
+void do_hyp_events(void)
+{
+    int nr;
+    while ( (nr = ffs(current->hyp_events)) != 0 )
+        (event_call_fn[nr-1])();
+}
diff --git a/xen-2.4.16/common/kernel.c b/xen-2.4.16/common/kernel.c
new file mode 100644 (file)
index 0000000..3e860e8
--- /dev/null
@@ -0,0 +1,358 @@
+#include <stdarg.h>
+#include <xeno/lib.h>
+#include <xeno/errno.h>
+#include <xeno/multiboot.h>
+#include <xeno/spinlock.h>
+#include <xeno/sched.h>
+#include <xeno/mm.h>
+#include <xeno/delay.h>
+#include <xeno/skbuff.h>
+#include <xeno/interrupt.h>
+#include <asm/io.h>
+#include <asm/msr.h>
+#include <asm/uaccess.h>
+#include <xeno/dom0_ops.h>
+
+/* VGA text definitions. */
+#define COLUMNS            80
+#define LINES      24
+#define ATTRIBUTE    7
+#define VIDEO      __va(0xB8000)
+
+static int xpos, ypos;
+static volatile unsigned char *video;
+
+spinlock_t console_lock = SPIN_LOCK_UNLOCKED;
+
+struct e820entry {
+    unsigned long addr_lo, addr_hi;        /* start of memory segment */
+    unsigned long size_lo, size_hi;        /* size of memory segment */
+    unsigned long type;                    /* type of memory segment */
+};
+
+/* Used by domain.c:setup_guestos */
+int nr_mods;
+module_t *mod;
+
+void init_serial(void);
+void start_of_day(void);
+
+/* Command line options and variables. */
+unsigned long opt_ipbase=0, opt_nfsserv=0, opt_gateway=0, opt_netmask=0;
+unsigned char opt_nfsroot[50]="";
+unsigned int opt_dom0_mem = 16000; /* default kbytes for DOM0 */
+enum { OPT_IP, OPT_STR, OPT_UINT };
+static struct {
+    unsigned char *name;
+    int type;
+    void *var;
+} opts[] = {
+    { "ipbase",   OPT_IP,   &opt_ipbase  },
+    { "nfsserv",  OPT_IP,   &opt_nfsserv },
+    { "gateway",  OPT_IP,   &opt_gateway },
+    { "netmask",  OPT_IP,   &opt_netmask },
+    { "nfsroot",  OPT_STR,  &opt_nfsroot },
+    { "dom0_mem", OPT_UINT, &opt_dom0_mem }, 
+    { NULL,       0,        NULL     }
+};
+
+void cmain (unsigned long magic, multiboot_info_t *mbi)
+{
+    struct task_struct *new_dom;
+    dom0_newdomain_t dom0_params;
+    unsigned long max_page, remaining_hypervisor_memory;
+    unsigned char *cmdline;
+    int i;
+
+    init_serial();
+    cls();
+
+    if ( magic != MULTIBOOT_BOOTLOADER_MAGIC )
+    {
+        printf("Invalid magic number: 0x%x\n", (unsigned)magic);
+        return;
+    }
+
+    /*
+     * We require some kind of memory and module information.
+     * The rest we can fake!
+     */
+    if ( (mbi->flags & 9) != 9 )
+    {
+        printf("Bad flags passed by bootloader: 0x%x\n", (unsigned)mbi->flags);
+        return;
+    }
+
+    if ( mbi->mods_count == 0 )
+    {
+        printf("Require at least one module!\n");
+        return;
+    }
+
+    /* Are mmap_* valid?  */
+#if 0
+    if ( (mbi->flags & (1<<6)) )
+    {
+        memory_map_t *mmap = (memory_map_t *)mbi->mmap_addr;
+        struct e820entry *e820 = E820_MAP;
+
+        while ( (unsigned long)mmap < (mbi->mmap_addr + mbi->mmap_length) )
+        {
+            e820->addr_lo = mmap->base_addr_low;
+            e820->addr_hi = mmap->base_addr_high;
+            e820->size_lo = mmap->length_low;
+            e820->size_hi = mmap->length_high;
+            e820->type    = mmap->type;
+            e820++;
+            mmap = (memory_map_t *) 
+                ((unsigned long)mmap + mmap->size + sizeof (mmap->size));
+        }
+    }
+#endif
+
+    nr_mods = mbi->mods_count;
+    mod     = (module_t *)__va(mbi->mods_addr);
+
+    /* Parse the command line. */
+    cmdline = (unsigned char *)(mbi->cmdline ? __va(mbi->cmdline) : NULL);
+    if ( cmdline != NULL )
+    {
+        unsigned char *opt_end, *opt;
+        while ( *cmdline == ' ' ) cmdline++;
+        cmdline = strchr(cmdline, ' ');
+        while ( cmdline != NULL )
+        {
+            while ( *cmdline == ' ' ) cmdline++;
+            if ( (opt = strchr(cmdline, '=')) == NULL ) break;
+            *opt++ = '\0';
+            opt_end = strchr(opt, ' ');
+            if ( opt_end != NULL ) *opt_end++ = '\0';
+            for ( i = 0; opts[i].name != NULL; i++ )
+            {
+                if ( strcmp(opts[i].name, cmdline ) == 0 )
+                {
+                    if ( opts[i].type == OPT_IP )
+                    {
+                        *(unsigned long *)opts[i].var = str_to_quad(opt);
+                    }
+                    else if(opts[i].type == OPT_STR)
+                    {
+                        strcpy(opts[i].var, opt);
+                    }
+                    else /* opts[i].type == OPT_UINT */
+                    {
+                        *(unsigned int *)opts[i].var = simple_strtol(opt, (char **)&opt, 10);
+                    }
+                    break;
+                }
+            }
+            cmdline = opt_end;
+        }
+    }
+
+    memcpy(&idle0_task_union, &first_task_struct, sizeof(first_task_struct));
+
+    max_page = (mbi->mem_upper+1024) >> (PAGE_SHIFT - 10);
+    if ( max_page > (MAX_USABLE_ADDRESS >> PAGE_SHIFT) )
+        max_page = MAX_USABLE_ADDRESS >> PAGE_SHIFT;
+    /* mem_upper is address of first memory hole in high memory, minus 1MB. */
+    /* PS. mem_upper is in kB. */
+    remaining_hypervisor_memory = init_frametable(max_page);
+    printk("Initialised %luMB of memory on a %luMB machine\n",
+           max_page >> (20-PAGE_SHIFT), (mbi->mem_upper>>10)+1);
+
+    init_page_allocator(mod[nr_mods-1].mod_end, remaining_hypervisor_memory);
+    /* These things will get done by do_newdomain() for all other tasks. */
+    current->shared_info = (void *)get_free_page(GFP_KERNEL);
+    memset(current->shared_info, 0, sizeof(shared_info_t));
+    set_fs(USER_DS);
+    current->num_net_vifs = 0;
+
+    start_of_day();
+
+    /* Create initial domain 0. */
+    dom0_params.num_vifs  = 1;
+    dom0_params.memory_kb = opt_dom0_mem;
+
+    new_dom = do_newdomain();
+    if ( new_dom == NULL ) panic("Error creating domain 0\n");
+    new_dom->processor = 0;
+    new_dom->domain    = 0;
+    if ( setup_guestos(new_dom, &dom0_params) != 0 )
+    {
+        panic("Could not set up DOM0 guest OS\n");
+    }
+    wake_up(new_dom);
+
+    cpu_idle();
+}    
+
+
+#define SERIAL_BASE 0x3f8
+#define RX_BUF      0
+#define TX_HOLD     0
+#define INT_ENABLE  1
+#define INT_IDENT   2
+#define DATA_FORMAT 3
+#define LINE_CTL    4
+#define LINE_STATUS 5
+#define LINE_IN     6
+#define DIVISOR_LO  0
+#define DIVISOR_HI  1
+
+void init_serial(void)
+{
+    /* 9600 baud, no parity, 1 stop bit, 8 data bits. */
+    outb(0x83, SERIAL_BASE+DATA_FORMAT);
+    outb(12, SERIAL_BASE+DIVISOR_LO);
+    outb(0, SERIAL_BASE+DIVISOR_HI);
+    outb(0x03, SERIAL_BASE+DATA_FORMAT);
+
+    /* No interrupts. */
+    outb(0x00, SERIAL_BASE+INT_ENABLE);
+}
+
+
+void putchar_serial(unsigned char c)
+{
+    if ( c == '\n' ) putchar_serial('\r');
+    if ( (c != '\n') && (c != '\r') && ((c < 32) || (c > 126)) ) return;
+    while ( !(inb(SERIAL_BASE+LINE_STATUS)&(1<<5)) ) barrier();
+    outb(c, SERIAL_BASE+TX_HOLD);
+}
+
+
+/* Clear the screen and initialize VIDEO, XPOS and YPOS.  */
+void cls (void)
+{
+    int i;
+
+    video = (unsigned char *) VIDEO;
+  
+    for (i = 0; i < COLUMNS * LINES * 2; i++)
+        *(video + i) = 0;
+
+    xpos = 0;
+    ypos = 0;
+
+    outw(10+(1<<(5+8)), 0x3d4); /* cursor off */
+}
+
+
+/* Put the character C on the screen.  */
+static void putchar (int c)
+{
+    static char zeroarr[2*COLUMNS] = { 0 };
+
+    putchar_serial(c);
+
+    if (c == '\n' || c == '\r')
+    {
+    newline:
+        xpos = 0;
+        ypos++;
+        if (ypos >= LINES)
+        {
+            ypos = LINES-1;
+            memcpy((char*)video, 
+                   (char*)video + 2*COLUMNS, (LINES-1)*2*COLUMNS);
+            memcpy((char*)video + (LINES-1)*2*COLUMNS, 
+                   zeroarr, 2*COLUMNS);
+        }
+        return;
+    }
+
+    *(video + (xpos + ypos * COLUMNS) * 2) = c & 0xFF;
+    *(video + (xpos + ypos * COLUMNS) * 2 + 1) = ATTRIBUTE;
+
+    xpos++;
+    if (xpos >= COLUMNS)
+        goto newline;
+}
+
+static inline void __putstr(const char *str)
+{
+    while ( *str ) putchar(*str++);
+}
+
+void printf (const char *fmt, ...)
+{
+    va_list args;
+    char buf[1024], *p;
+    unsigned long flags;
+
+    va_start(args, fmt);
+    (void)vsnprintf(buf, sizeof(buf), fmt, args);
+    va_end(args);
+  
+    p = buf; 
+    spin_lock_irqsave(&console_lock, flags);
+    while ( *p ) putchar(*p++);
+    spin_unlock_irqrestore(&console_lock, flags);
+}
+
+void panic(const char *fmt, ...)
+{
+    va_list args;
+    char buf[1024], *p;
+    unsigned long flags;
+    extern void machine_restart(char *);
+
+    va_start(args, fmt);
+    (void)vsnprintf(buf, sizeof(buf), fmt, args);
+    va_end(args);
+  
+    /* Spit out multiline message in one go. */
+    spin_lock_irqsave(&console_lock, flags);
+    __putstr("\n****************************************\n");
+    p = buf;
+    while ( *p ) putchar(*p++);
+    __putstr("Aieee! CPU");
+    putchar((char)smp_processor_id() + '0');
+    __putstr(" is toast...\n");
+    __putstr("****************************************\n\n");
+    __putstr("Reboot in five seconds...\n");
+    spin_unlock_irqrestore(&console_lock, flags);
+
+    mdelay(5000);
+    machine_restart(0);
+}
+
+/* No-op syscall. */
+asmlinkage long sys_ni_syscall(void)
+{
+    return -ENOSYS;
+}
+
+
+long do_console_write(char *str, int count)
+{
+#define SIZEOF_BUF 256
+    unsigned char safe_str[SIZEOF_BUF];
+    unsigned long flags;
+    int i;
+    unsigned char prev = '\n';
+
+    if ( count > SIZEOF_BUF ) count = SIZEOF_BUF;
+
+    if ( copy_from_user(safe_str, str, count) )
+        return -EFAULT;
+        
+    spin_lock_irqsave(&console_lock, flags);
+    for ( i = 0; i < count; i++ )
+    {
+        if ( prev == '\n' )
+        {
+            __putstr("DOM"); 
+            putchar(current->domain+'0'); 
+            __putstr(": ");
+        }
+        if ( !safe_str[i] ) break;
+        putchar(prev = safe_str[i]);
+    }
+    if ( prev != '\n' ) putchar('\n');
+    spin_unlock_irqrestore(&console_lock, flags);
+
+    return(0);
+}
diff --git a/xen-2.4.16/common/lib.c b/xen-2.4.16/common/lib.c
new file mode 100644 (file)
index 0000000..2a55b99
--- /dev/null
@@ -0,0 +1,171 @@
+
+#include <xeno/ctype.h>
+#include <xeno/lib.h>
+
+int memcmp(const void * cs,const void * ct,size_t count)
+{
+       const unsigned char *su1, *su2;
+       signed char res = 0;
+
+       for( su1 = cs, su2 = ct; 0 < count; ++su1, ++su2, count--)
+               if ((res = *su1 - *su2) != 0)
+                       break;
+       return res;
+}
+
+void * memcpy(void * dest,const void *src,size_t count)
+{
+       char *tmp = (char *) dest, *s = (char *) src;
+
+       while (count--)
+               *tmp++ = *s++;
+
+       return dest;
+}
+
+int strncmp(const char * cs,const char * ct,size_t count)
+{
+       register signed char __res = 0;
+
+       while (count) {
+               if ((__res = *cs - *ct++) != 0 || !*cs++)
+                       break;
+               count--;
+       }
+
+       return __res;
+}
+
+int strcmp(const char * cs,const char * ct)
+{
+        register signed char __res;
+
+        while (1) {
+                if ((__res = *cs - *ct++) != 0 || !*cs++)
+                        break;
+        }
+
+        return __res;
+}
+
+char * strcpy(char * dest,const char *src)
+{
+        char *tmp = dest;
+
+        while ((*dest++ = *src++) != '\0')
+                /* nothing */;
+        return tmp;
+}
+
+char * strncpy(char * dest,const char *src,size_t count)
+{
+        char *tmp = dest;
+
+        while (count-- && (*dest++ = *src++) != '\0')
+                /* nothing */;
+
+        return tmp;
+}
+
+void * memset(void * s,int c,size_t count)
+{
+        char *xs = (char *) s;
+
+        while (count--)
+                *xs++ = c;
+
+        return s;
+}
+
+size_t strnlen(const char * s, size_t count)
+{
+        const char *sc;
+
+        for (sc = s; count-- && *sc != '\0'; ++sc)
+                /* nothing */;
+        return sc - s;
+}
+
+size_t strlen(const char * s)
+{
+       const char *sc;
+
+       for (sc = s; *sc != '\0'; ++sc)
+               /* nothing */;
+       return sc - s;
+}
+
+char * strchr(const char * s, int c)
+{
+        for(; *s != (char) c; ++s)
+                if (*s == '\0')
+                        return NULL;
+        return (char *) s;
+}
+
+char * strstr(const char * s1,const char * s2)
+{
+        int l1, l2;
+
+        l2 = strlen(s2);
+        if (!l2)
+                return (char *) s1;
+        l1 = strlen(s1);
+        while (l1 >= l2) {
+                l1--;
+                if (!memcmp(s1,s2,l2))
+                        return (char *) s1;
+                s1++;
+        }
+        return NULL;
+}
+
+
+/* for inc/ctype.h */
+unsigned char _ctype[] = {
+_C,_C,_C,_C,_C,_C,_C,_C,                        /* 0-7 */
+_C,_C|_S,_C|_S,_C|_S,_C|_S,_C|_S,_C,_C,         /* 8-15 */
+_C,_C,_C,_C,_C,_C,_C,_C,                        /* 16-23 */
+_C,_C,_C,_C,_C,_C,_C,_C,                        /* 24-31 */
+_S|_SP,_P,_P,_P,_P,_P,_P,_P,                    /* 32-39 */
+_P,_P,_P,_P,_P,_P,_P,_P,                        /* 40-47 */
+_D,_D,_D,_D,_D,_D,_D,_D,                        /* 48-55 */
+_D,_D,_P,_P,_P,_P,_P,_P,                        /* 56-63 */
+_P,_U|_X,_U|_X,_U|_X,_U|_X,_U|_X,_U|_X,_U,      /* 64-71 */
+_U,_U,_U,_U,_U,_U,_U,_U,                        /* 72-79 */
+_U,_U,_U,_U,_U,_U,_U,_U,                        /* 80-87 */
+_U,_U,_U,_P,_P,_P,_P,_P,                        /* 88-95 */
+_P,_L|_X,_L|_X,_L|_X,_L|_X,_L|_X,_L|_X,_L,      /* 96-103 */
+_L,_L,_L,_L,_L,_L,_L,_L,                        /* 104-111 */
+_L,_L,_L,_L,_L,_L,_L,_L,                        /* 112-119 */
+_L,_L,_L,_P,_P,_P,_P,_C,                        /* 120-127 */
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,                /* 128-143 */
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,                /* 144-159 */
+_S|_SP,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,   /* 160-175 */
+_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,       /* 176-191 */
+_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,       /* 192-207 */
+_U,_U,_U,_U,_U,_U,_U,_P,_U,_U,_U,_U,_U,_U,_U,_L,       /* 208-223 */
+_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,       /* 224-239 */
+_L,_L,_L,_L,_L,_L,_L,_P,_L,_L,_L,_L,_L,_L,_L,_L};      /* 240-255 */
+
+
+unsigned long str_to_quad(unsigned char *s)
+{
+    unsigned long quad = 0;
+    do {
+        quad <<= 8;
+        quad  |= simple_strtol(s, (char **)&s, 10);
+    }  
+    while ( *s++ == '.' );
+    return quad;
+}
+
+
+unsigned char *quad_to_str(unsigned long q, unsigned char *s)
+{
+    sprintf(s, "%ld.%ld.%ld.%ld", 
+            (q>>24)&255, (q>>16)&255, (q>>8)&255, (q>>0)&255);
+    return s;
+}
+   
+
diff --git a/xen-2.4.16/common/memory.c b/xen-2.4.16/common/memory.c
new file mode 100644 (file)
index 0000000..2d60698
--- /dev/null
@@ -0,0 +1,654 @@
+/******************************************************************************
+ * memory.c
+ * 
+ * Copyright (c) 2002 K A Fraser
+ * 
+ * A description of the page table API:
+ * 
+ * Domains trap to process_page_updates with a list of update requests.
+ * This is a list of (ptr, val) pairs, where the requested operation
+ * is *ptr = val. The exceptions are when ptr is PGREQ_ADD_BASEPTR, or
+ * PGREQ_REMOVE_BASEPTR.
+ * 
+ * Reference counting of pages:
+ * ----------------------------
+ * Each page has two refcounts: tot_count and type_count.
+ * 
+ * TOT_COUNT is the obvious reference count. It counts all uses of a
+ * physical page frame by a domain, including uses as a page directory,
+ * a page table, or simple mappings via a PTE. This count prevents a
+ * domain from releasing a frame back to the hypervisor's free pool when
+ * it is still referencing it!
+ * 
+ * TYPE_COUNT is more subtle. A frame can be put to one of three
+ * mutually-exclusive uses: it might be used as a page directory, or a
+ * page table, or it may be mapped writeable by the domain [of course, a
+ * frame may not be used in any of these three ways!].
+ * So, type_count is a count of the number of times a frame is being 
+ * referred to in its current incarnation. Therefore, a page can only
+ * change its type when its type count is zero.
+ * 
+ * A further note on writeable page mappings:
+ * ------------------------------------------
+ * For simplicity, the count of writeable mappings for a page may not
+ * correspond to reality. The 'writeable count' is incremented for every
+ * PTE which maps the page with the _PAGE_RW flag set. However, for
+ * write access to be possible the page directory entry must also have
+ * its _PAGE_RW bit set. We do not check this as it complicates the 
+ * reference counting considerably [consider the case of multiple
+ * directory entries referencing a single page table, some with the RW
+ * bit set, others not -- it starts getting a bit messy].
+ * In normal use, this simplification shouldn't be a problem.
+ * However, the logic can be added if required.
+ * 
+ * One more note on read-only page mappings:
+ * -----------------------------------------
+ * We want domains to be able to map pages for read-only access. The
+ * main reason is that page tables and directories should be readable
+ * by a domain, but it would not be safe for them to be writeable.
+ * However, domains have free access to rings 1 & 2 of the Intel
+ * privilege model. In terms of page protection, these are considered
+ * to be part of 'supervisor mode'. The WP bit in CR0 controls whether
+ * read-only restrictions are respected in supervisor mode -- if the 
+ * bit is clear then any mapped page is writeable.
+ * 
+ * We get round this by always setting the WP bit and disallowing 
+ * updates to it. This is very unlikely to cause a problem for guest
+ * OS's, which will generally use the WP bit to simplify copy-on-write
+ * implementation (in that case, OS wants a fault when it writes to
+ * an application-supplied buffer).
+ */
+
+
+/*
+ * THE FOLLOWING ARE ISSUES IF GUEST OPERATING SYSTEMS BECOME SMP-CAPABLE.
+ * [THAT IS, THEY'RE NOT A PROBLEM NOW, AND MAY NOT EVER BE.]
+ * -----------------------------------------------------------------------
+ * 
+ * *********
+ * UPDATE 15/7/02: Interface has changed --updates now specify physical
+ * address of page-table entry, rather than specifying a virtual address,
+ * so hypervisor no longer "walks" the page tables. Therefore the 
+ * solution below cannot work. Another possibility is to add a new entry
+ * to our "struct page" which says to which top-level page table each
+ * lower-level page table or writeable mapping belongs. If it belongs to more
+ * than one, we'd probably just flush on all processors running the domain.
+ * *********
+ * 
+ * ** 1 **
+ * The problem involves creating new page tables which might be mapped 
+ * writeable in the TLB of another processor. As an example, a domain might be 
+ * running in two contexts (ie. on two processors) simultaneously, using the 
+ * same top-level page table in both contexts. Now, if context 1 sends an 
+ * update request [make page P read-only, add a reference to page P as a page 
+ * table], that will succeed if there was only one writeable mapping of P. 
+ * However, that mapping may persist in the TLB of context 2.
+ * 
+ * Solution: when installing a new page table, we must flush foreign TLBs as
+ * necessary. Naive solution is to flush on any processor running our domain.
+ * Cleverer solution is to flush on any processor running same top-level page
+ * table, but this will sometimes fail (consider two different top-level page
+ * tables which have a shared lower-level page table).
+ * 
+ * A better solution: when squashing a write reference, check how many times
+ * that lowest-level table entry is referenced by ORing refcounts of tables
+ * down the page-table hierarchy. If results is != 1, we require flushing all
+ * instances of current domain if a new table is installed (because the
+ * lowest-level entry may be referenced by many top-level page tables).
+ * However, common case will be that result == 1, so we only need to flush
+ * processors with the same top-level page table. Make choice at
+ * table-installation time based on a `flush_level' flag, which is
+ * FLUSH_NONE, FLUSH_PAGETABLE, FLUSH_DOMAIN. A flush reduces this
+ * to FLUSH_NONE, while squashed write mappings can only promote up
+ * to more aggressive flush types.
+ * 
+ * ** 2 **
+ * Same problem occurs when removing a page table, at level 1 say, then
+ * making it writeable. Need a TLB flush between otherwise another processor
+ * might write an illegal mapping into the old table, while yet another
+ * processor can use the illegal mapping because of a stale level-2 TLB
+ * entry. So, removal of a table reference sets 'flush_level' appropriately,
+ * and a flush occurs on next addition of a fresh write mapping.
+ * 
+ * BETTER SOLUTION FOR BOTH 1 AND 2:
+ * When type_refcnt goes to zero, leave old type in place (don't set to
+ * PGT_none). Then, only flush if making a page table of a page with
+ * (cnt=0,type=PGT_writeable), or when adding a write mapping for a page
+ * with (cnt=0, type=PGT_pagexxx). A TLB flush will cause all pages
+ * with refcnt==0 to be reset to PGT_none. Need an array for the purpose,
+ * added to when a type_refcnt goes to zero, and emptied on a TLB flush.
+ * Either have per-domain table, or force TLB flush at end of each
+ * call to 'process_page_updates'.
+ * Most OSes will always keep a writeable reference hanging around, and
+ * page table structure is fairly static, so this mechanism should be
+ * fairly cheap.
+ * 
+ * MAYBE EVEN BETTER? [somewhat dubious: not for first cut of the code]:
+ * If we need to force an intermediate flush, those other processors
+ * spin until we complete, then do a single TLB flush. They can spin on
+ * the lock protecting 'process_page_updates', and continue when that
+ * is freed. Saves cost of setting up and servicing an IPI: later
+ * communication is synchronous. Processors trying to install the domain
+ * or domain&pagetable would also enter the spin.
+ * 
+ * ** 3 **
+ * Indeed, this problem generalises to reusing page tables at different
+ * levels of the hierarchy (conceptually, the guest OS can use the
+ * hypervisor to introduce illegal table entries by proxy). Consider
+ * unlinking a level-1 page table and reintroducing at level 2 with no
+ * TLB flush. Hypervisor can add a reference to some other level-1 table
+ * with the RW bit set. This is fine in the level-2 context, but some
+ * other processor may still be using that table in level-1 context
+ * (due to a stale TLB entry). At level 1 it may look like the
+ * processor has write access to the other level-1 page table! Therefore
+ * can add illegal values there with impunity :-(
+ * 
+ * Fortunately, the solution above generalises to this extended problem.
+ */
+
+/*
+ * UPDATE 12.11.02.: We no longer have struct page and mem_map. These
+ * have been replaced by struct pfn_info and frame_table respectively.
+ * 
+ * system_free_list is a list_head linking all system owned free pages.
+ * it is initialized in init_frametable.
+ *
+ * Boris Dragovic.
+ */
+#include <xeno/config.h>
+#include <xeno/init.h>
+#include <xeno/lib.h>
+#include <xeno/mm.h>
+#include <xeno/sched.h>
+#include <xeno/errno.h>
+#include <asm/page.h>
+#include <asm/io.h>
+#include <asm/uaccess.h>
+
+#if 1
+#define MEM_LOG(_f, _a...) printk("DOM%d: (file=memory.c, line=%d) " _f "\n", current->domain, __LINE__, ## _a )
+#else
+#define MEM_LOG(_f, _a...) ((void)0)
+#endif
+
+/* 'get' checks parameter for validity before inc'ing refcnt. */
+static int get_l2_table(unsigned long page_nr);
+static int get_l1_table(unsigned long page_nr);
+static int get_page(unsigned long page_nr, int writeable);
+static int inc_page_refcnt(unsigned long page_nr, unsigned int type);
+/* 'put' does no checking because if refcnt not zero, entity must be valid. */
+static int  put_l2_table(unsigned long page_nr);
+static void put_l1_table(unsigned long page_nr);
+static void put_page(unsigned long page_nr, int writeable);
+static int dec_page_refcnt(unsigned long page_nr, unsigned int type);
+
+static int mod_l2_entry(l2_pgentry_t *, l2_pgentry_t);
+static int mod_l1_entry(l1_pgentry_t *, l1_pgentry_t);
+
+/* frame table size and its size in pages */
+frame_table_t * frame_table;
+unsigned long frame_table_size;
+unsigned long max_page;
+
+struct list_head free_list;
+unsigned int free_pfns;
+
+
+/*
+ * init_frametable:
+ * Initialise per-frame memory information. The return value
+ * is the amount of memory available for use by the rest of Xen.
+ * The very highest frames are reserved for the per-frame info.
+ * This function should be called before initialising the
+ * page allocator!
+ */
+unsigned long __init init_frametable(unsigned long nr_pages)
+{
+    struct pfn_info *pf;
+    unsigned long page_index;
+
+    max_page = nr_pages;
+    frame_table_size = nr_pages * sizeof(struct pfn_info);
+    frame_table_size = (frame_table_size + PAGE_SIZE - 1) & PAGE_MASK;
+    free_pfns = nr_pages - (MAX_MONITOR_ADDRESS >> PAGE_SHIFT);
+
+    frame_table = phys_to_virt(MAX_MONITOR_ADDRESS - frame_table_size);
+    memset(frame_table, 0, frame_table_size);
+
+    /* Put all domain-allocatable memory on a free list. */
+    INIT_LIST_HEAD(&free_list);
+    for( page_index = MAX_MONITOR_ADDRESS >> PAGE_SHIFT; 
+         page_index < nr_pages; 
+         page_index++ )      
+    {
+        pf = list_entry(&frame_table[page_index].list, struct pfn_info, list);
+        list_add_tail(&pf->list, &free_list);
+    }
+
+    /* Return the remaing Xen-allocatable memory. */
+    return(MAX_MONITOR_ADDRESS - frame_table_size);
+}
+
+
+/* Return original refcnt, or -1 on error. */
+static int inc_page_refcnt(unsigned long page_nr, unsigned int type)
+{
+    struct pfn_info *page;
+    unsigned long flags;
+
+    if ( page_nr >= max_page )
+    {
+        MEM_LOG("Page out of range (%08lx>%08lx)", page_nr, max_page);
+        return(-1);
+    }
+    page = frame_table + page_nr;
+    flags = page->flags;
+    if ( (flags & PG_domain_mask) != current->domain )
+    {
+        MEM_LOG("Bad page domain (%ld)", flags & PG_domain_mask);
+        return(-1);
+    }
+    if ( (flags & PG_type_mask) != type )
+    {
+        if ( page_type_count(page) != 0 )
+        {
+            MEM_LOG("Page %08lx bad type/count (%08lx!=%08x) cnt=%ld",
+                    page_nr << PAGE_SHIFT,
+                    flags & PG_type_mask, type, page_type_count(page));
+            return(-1);
+        }
+        page->flags |= type;
+    }
+
+    get_page_tot(page);
+    return(get_page_type(page));
+}
+
+/* Return new refcnt, or -1 on error. */
+static int dec_page_refcnt(unsigned long page_nr, unsigned int type)
+{
+    struct pfn_info *page;
+    int ret;
+
+    if ( page_nr >= max_page )
+    {
+        MEM_LOG("Page out of range (%08lx>%08lx)", page_nr, max_page);
+        return(-1);
+    }
+    page = frame_table + page_nr;
+    if ( (page->flags & (PG_type_mask | PG_domain_mask)) != 
+         (type | current->domain) ) 
+    {
+        MEM_LOG("Bad page type/domain (dom=%ld) (type %ld != expected %d)",
+                page->flags & PG_domain_mask, page->flags & PG_type_mask,
+                type);
+        return(-1);
+    }
+    ASSERT(page_type_count(page) != 0);
+    if ( (ret = put_page_type(page)) == 0 ) page->flags &= ~PG_type_mask;
+    put_page_tot(page);
+    return(ret);
+}
+
+
+static int get_l2_table(unsigned long page_nr)
+{
+    l2_pgentry_t *p_l2_entry, l2_entry;
+    int i, ret=0;
+    
+    ret = inc_page_refcnt(page_nr, PGT_l2_page_table);
+    if ( ret != 0 ) return((ret < 0) ? ret : 0);
+    
+    /* NEW level-2 page table! Deal with every PDE in the table. */
+    p_l2_entry = (l2_pgentry_t *)__va(page_nr << PAGE_SHIFT);
+    for ( i = 0; i < DOMAIN_ENTRIES_PER_L2_PAGETABLE; i++ )
+    {
+        l2_entry = *p_l2_entry++;
+        if ( !(l2_pgentry_val(l2_entry) & _PAGE_PRESENT) ) continue;
+        if ( (l2_pgentry_val(l2_entry) & (_PAGE_GLOBAL|_PAGE_PSE)) )
+        {
+            MEM_LOG("Bad L2 page type settings %04lx",
+                    l2_pgentry_val(l2_entry) & (_PAGE_GLOBAL|_PAGE_PSE));
+            return(-1);
+        }
+        ret = get_l1_table(l2_pgentry_to_pagenr(l2_entry));
+        if ( ret ) return(ret);
+    }
+
+    /* Now we simply slap in our high mapping. */
+    memcpy(p_l2_entry, idle0_pg_table + DOMAIN_ENTRIES_PER_L2_PAGETABLE,
+           HYPERVISOR_ENTRIES_PER_L2_PAGETABLE * sizeof(l2_pgentry_t));
+
+    return(ret);
+}
+
+static int get_l1_table(unsigned long page_nr)
+{
+    l1_pgentry_t *p_l1_entry, l1_entry;
+    int i, ret;
+
+    /* Update ref count for page pointed at by PDE. */
+    ret = inc_page_refcnt(page_nr, PGT_l1_page_table);
+    if ( ret != 0 ) return((ret < 0) ? ret : 0);
+
+    /* NEW level-1 page table! Deal with every PTE in the table. */
+    p_l1_entry = (l1_pgentry_t *)__va(page_nr << PAGE_SHIFT);
+    for ( i = 0; i < ENTRIES_PER_L1_PAGETABLE; i++ )
+    {
+        l1_entry = *p_l1_entry++;
+        if ( !(l1_pgentry_val(l1_entry) & _PAGE_PRESENT) ) continue;
+        if ( (l1_pgentry_val(l1_entry) &
+              (_PAGE_GLOBAL|_PAGE_PAT)) )
+        {
+            MEM_LOG("Bad L1 page type settings %04lx",
+                    l1_pgentry_val(l1_entry) &
+                    (_PAGE_GLOBAL|_PAGE_PAT));
+            return(-1);
+        }
+        ret = get_page(l1_pgentry_to_pagenr(l1_entry),
+                       l1_pgentry_val(l1_entry) & _PAGE_RW);
+        if ( ret ) return(ret);
+    }
+
+    return(ret);
+}
+
+static int get_page(unsigned long page_nr, int writeable)
+{
+    struct pfn_info *page;
+    unsigned long flags;
+
+    /* Update ref count for page pointed at by PTE. */
+    if ( page_nr >= max_page )
+    {
+        MEM_LOG("Page out of range (%08lx>%08lx)", page_nr, max_page);
+        return(-1);
+    }
+    page = frame_table + page_nr;
+    flags = page->flags;
+    if ( (flags & PG_domain_mask) != current->domain )
+    {
+        MEM_LOG("Bad page domain (%ld)", flags & PG_domain_mask);
+        return(-1);
+    }
+
+    if ( writeable )
+    {
+        if ( (flags & PG_type_mask) != PGT_writeable_page )
+        {
+            if ( page_type_count(page) != 0 )
+            {
+                MEM_LOG("Bad page type/count (%08lx!=%08x) cnt=%ld",
+                        flags & PG_type_mask, PGT_writeable_page,
+                        page_type_count(page));
+                return(-1);
+            }
+            page->flags |= PGT_writeable_page;
+        }
+        get_page_type(page);
+    }
+
+    get_page_tot(page);
+    
+    return(0);
+}
+
+static int put_l2_table(unsigned long page_nr)
+{
+    l2_pgentry_t *p_l2_entry, l2_entry;
+    int i, ret;
+
+    ret = dec_page_refcnt(page_nr, PGT_l2_page_table);
+    if ( ret != 0 ) return((ret < 0) ? ret : 0);
+
+    /* We had last reference to level-2 page table. Free the PDEs. */
+    p_l2_entry = (l2_pgentry_t *)__va(page_nr << PAGE_SHIFT);
+    for ( i = 0; i < HYPERVISOR_ENTRIES_PER_L2_PAGETABLE; i++ )
+    {
+        l2_entry = *p_l2_entry++;
+        if ( (l2_pgentry_val(l2_entry) & _PAGE_PRESENT) ) 
+            put_l1_table(l2_pgentry_to_pagenr(l2_entry));
+    }
+
+    return(0);
+}
+
+static void put_l1_table(unsigned long page_nr)
+{
+    l1_pgentry_t *p_l1_entry, l1_entry;
+    int i;
+
+    if ( dec_page_refcnt(page_nr, PGT_l1_page_table) != 0 ) return;
+
+    /* We had last reference to level-1 page table. Free the PTEs. */
+    p_l1_entry = (l1_pgentry_t *)__va(page_nr << PAGE_SHIFT);
+    for ( i = 0; i < ENTRIES_PER_L1_PAGETABLE; i++ )
+    {
+        l1_entry = *p_l1_entry++;
+        if ( (l1_pgentry_val(l1_entry) & _PAGE_PRESENT) ) 
+        {
+            put_page(l1_pgentry_to_pagenr(l1_entry), 
+                     l1_pgentry_val(l1_entry) & _PAGE_RW);
+        }
+    }
+}
+
+static void put_page(unsigned long page_nr, int writeable)
+{
+    struct pfn_info *page;
+    ASSERT(page_nr < max_page);
+    page = frame_table + page_nr;
+    ASSERT((page->flags & PG_domain_mask) == current->domain);
+    ASSERT((((page->flags & PG_type_mask) == PGT_writeable_page) &&
+            (page_type_count(page) != 0)) ||
+           (((page->flags & PG_type_mask) == PGT_none) &&
+            (page_type_count(page) == 0)));
+    ASSERT((!writeable) || (page_type_count(page) != 0));
+    if ( writeable && (put_page_type(page) == 0) )
+        page->flags &= ~PG_type_mask;
+    put_page_tot(page);
+}
+
+
+static int mod_l2_entry(l2_pgentry_t *p_l2_entry, l2_pgentry_t new_l2_entry)
+{
+    l2_pgentry_t old_l2_entry = *p_l2_entry;
+
+    if ( (((unsigned long)p_l2_entry & (PAGE_SIZE-1)) >> 2) >=
+         DOMAIN_ENTRIES_PER_L2_PAGETABLE )
+    {
+        MEM_LOG("Illegal L2 update attempt in hypervisor area %p\n",
+                p_l2_entry);
+        goto fail;
+    }
+
+    if ( (l2_pgentry_val(new_l2_entry) & _PAGE_PRESENT) )
+    {
+        if ( (l2_pgentry_val(new_l2_entry) & (_PAGE_GLOBAL|_PAGE_PSE)) )
+        {
+            MEM_LOG("Bad L2 entry val %04lx",
+                    l2_pgentry_val(new_l2_entry) & 
+                    (_PAGE_GLOBAL|_PAGE_PSE));
+            goto fail;
+        }
+        /* Differ in mapping (bits 12-31) or presence (bit 0)? */
+        if ( ((l2_pgentry_val(old_l2_entry) ^ 
+               l2_pgentry_val(new_l2_entry)) & 0xfffff001) != 0 )
+        {
+            if ( (l2_pgentry_val(old_l2_entry) & _PAGE_PRESENT) ) 
+            {
+                put_l1_table(l2_pgentry_to_pagenr(old_l2_entry));
+            }
+            
+            if ( get_l1_table(l2_pgentry_to_pagenr(new_l2_entry)) )
+                goto fail;
+        } 
+    }
+    else if ( (l2_pgentry_val(old_l2_entry) & _PAGE_PRESENT) )
+    {
+        put_l1_table(l2_pgentry_to_pagenr(old_l2_entry));
+    }
+    
+    *p_l2_entry++ = new_l2_entry;
+    
+    return(0);
+ fail:
+    return(-1);
+}
+
+
+static int mod_l1_entry(l1_pgentry_t *p_l1_entry, l1_pgentry_t new_l1_entry)
+{
+    l1_pgentry_t old_l1_entry = *p_l1_entry;
+
+    if ( (l1_pgentry_val(new_l1_entry) & _PAGE_PRESENT) )
+    {
+        if ( (l1_pgentry_val(new_l1_entry) &
+              (_PAGE_GLOBAL|_PAGE_PAT)) ) 
+        {
+            MEM_LOG("Bad L1 entry val %04lx",
+                    l1_pgentry_val(new_l1_entry) & 
+                    (_PAGE_GLOBAL|_PAGE_PAT));
+            goto fail;
+        }
+        /*
+         * Differ in mapping (bits 12-31), writeable (bit 1), or
+         * presence (bit 0)?
+         */
+        if ( ((l1_pgentry_val(old_l1_entry) ^
+               l1_pgentry_val(new_l1_entry)) & 0xfffff003) != 0 )
+        {
+            if ( (l1_pgentry_val(old_l1_entry) & _PAGE_PRESENT) ) 
+            {
+                put_page(l1_pgentry_to_pagenr(old_l1_entry),
+                         l1_pgentry_val(old_l1_entry) & _PAGE_RW);
+            }
+            
+            if ( get_page(l1_pgentry_to_pagenr(new_l1_entry),
+                          l1_pgentry_val(new_l1_entry) & _PAGE_RW) )
+                goto fail;
+        } 
+    }
+    else if ( (l1_pgentry_val(old_l1_entry) & _PAGE_PRESENT) )
+    {
+        put_page(l1_pgentry_to_pagenr(old_l1_entry),
+                 l1_pgentry_val(old_l1_entry) & _PAGE_RW);
+    }
+
+    *p_l1_entry++ = new_l1_entry;
+
+    return(0);
+ fail:
+    return(-1);
+}
+
+
+/* Apply updates to page table @pagetable_id within the current domain. */
+int do_process_page_updates(page_update_request_t *updates, int count)
+{
+    page_update_request_t cur;
+    unsigned long flags, pfn;
+    struct pfn_info *page;
+    int err = 0, i;
+
+    for ( i = 0; i < count; i++ )
+    {
+        if ( copy_from_user(&cur, updates, sizeof(cur)) )
+        {
+            kill_domain_with_errmsg("Cannot read page update request");
+        }
+
+        err = 1;
+
+        pfn = cur.ptr >> PAGE_SHIFT;
+        if ( !pfn )
+        {
+            switch ( cur.ptr )
+            {
+            case PGREQ_ADD_BASEPTR:
+                err = get_l2_table(cur.val >> PAGE_SHIFT);
+                break;
+            case PGREQ_REMOVE_BASEPTR:
+                if ( cur.val == __pa(pagetable_ptr(current->mm.pagetable)) )
+                {
+                    MEM_LOG("Attempt to remove current baseptr! %08lx",
+                            cur.val);
+                }
+                else
+                {
+                    err = put_l2_table(cur.val >> PAGE_SHIFT);
+                }
+                break;
+            }
+        }
+        else if ( (cur.ptr & (sizeof(l1_pgentry_t)-1)) || (pfn >= max_page) )
+        {
+            MEM_LOG("Page out of range (%08lx>%08lx) or misalign %08lx",
+                    pfn, max_page, cur.ptr);
+        }
+        else
+        {
+            page = frame_table + pfn;
+            flags = page->flags;
+            if ( (flags & PG_domain_mask) == current->domain )
+            {
+                switch ( (flags & PG_type_mask) )
+                {
+                case PGT_l1_page_table: 
+                    err = mod_l1_entry((l1_pgentry_t *)__va(cur.ptr), 
+                                       mk_l1_pgentry(cur.val)); 
+                    break;
+                case PGT_l2_page_table: 
+                    err = mod_l2_entry((l2_pgentry_t *)__va(cur.ptr),
+                                       mk_l2_pgentry(cur.val)); 
+                    break;
+                }
+            }
+        }
+
+        if ( err )
+        {
+            kill_domain_with_errmsg("Illegal page update request");
+        }
+
+        updates++;
+    }
+
+    __asm__ __volatile__ ("movl %%eax,%%cr3" : : 
+                          "a" (__pa(pagetable_ptr(current->mm.pagetable))));
+    return(0);
+}
+
+
+int do_set_pagetable(unsigned long ptr)
+{
+    struct pfn_info *page;
+    unsigned long pfn, flags;
+
+    if ( (ptr & ~PAGE_MASK) ) 
+    {
+        MEM_LOG("Misaligned new baseptr %08lx", ptr);
+        return -1;
+    }
+    pfn = ptr >> PAGE_SHIFT;
+    if ( pfn >= max_page )
+    {
+        MEM_LOG("Page out of range (%08lx>%08lx)", pfn, max_page);
+        return -1;
+    }
+    page = frame_table + (ptr >> PAGE_SHIFT);
+    flags = page->flags;
+    if ( (flags & (PG_domain_mask|PG_type_mask)) != 
+         (current->domain|PGT_l2_page_table) )
+    {
+        MEM_LOG("Page %08lx bad type/domain (dom=%ld) "
+                "(type %08lx != expected %08x)",
+                ptr, flags & PG_domain_mask, flags & PG_type_mask,
+                PGT_l2_page_table);
+        return -1;
+    }
+    current->mm.pagetable = mk_pagetable((unsigned long)__va(ptr));
+    __asm__ __volatile__ ("movl %%eax,%%cr3" : : "a" (ptr));
+    return 0;
+}
diff --git a/xen-2.4.16/common/network.c b/xen-2.4.16/common/network.c
new file mode 100644 (file)
index 0000000..7fc7b99
--- /dev/null
@@ -0,0 +1,109 @@
+/* net_ring.c
+ *
+ * ring data structures for buffering messages between hypervisor and
+ * guestos's.  As it stands this is only used for network buffer exchange.
+ *
+ */
+
+#include <hypervisor-ifs/network.h>
+#include <xeno/sched.h>
+#include <xeno/errno.h>
+#include <xeno/init.h>
+#include <xeno/slab.h>
+
+/* vif globals */
+int sys_vif_count;
+kmem_cache_t *net_vif_cache;
+
+net_ring_t *create_net_vif(int domain)
+{
+    net_vif_t *new_vif;
+    net_ring_t *new_ring;
+    struct task_struct *dom_task;
+    
+    if ( !(dom_task = find_domain_by_id(domain)) ) 
+    {
+            return NULL;
+    }
+    
+    if ( (new_vif = kmem_cache_alloc(net_vif_cache, GFP_KERNEL)) == NULL )
+    {
+            return NULL;
+    }
+    dom_task->net_vif_list[dom_task->num_net_vifs] = new_vif;
+    
+    new_ring = dom_task->net_ring_base + dom_task->num_net_vifs;
+    memset(new_ring, 0, sizeof(net_ring_t));
+
+    dom_task->net_vif_list[dom_task->num_net_vifs]->net_ring = new_ring;
+    skb_queue_head_init(
+                    &dom_task->net_vif_list[dom_task->num_net_vifs]->skb_list);
+    dom_task->net_vif_list[dom_task->num_net_vifs]->id = sys_vif_count++;
+    dom_task->num_net_vifs++;
+
+    return new_ring;
+}
+
+/* delete the last vif in the given domain. There doesn't seem to be any reason
+ * (yet) to be able to axe an arbitrary vif, by vif id. 
+ */
+void destroy_net_vif(struct task_struct *p)
+{
+    struct sk_buff *skb;
+    int i;
+
+    if ( p->num_net_vifs <= 0 ) return; // nothing to do.
+    
+    i = --p->num_net_vifs;
+    while ( (skb = skb_dequeue(&p->net_vif_list[i]->skb_list)) != NULL )
+    {
+        kfree_skb(skb);
+    }
+    kmem_cache_free(net_vif_cache, p->net_vif_list[i]);
+}
+
+
+/* 
+ * This is the hook function to handle guest-invoked traps requesting 
+ * changes to the network system.
+ */
+
+long do_network_op(network_op_t *u_network_op)
+{
+    long ret=0;
+    network_op_t op;
+    
+    if ( current->domain != 0 )
+        return -EPERM;
+
+    if ( copy_from_user(&op, u_network_op, sizeof(op)) )
+        return -EFAULT;
+
+    switch ( op.cmd )
+    {
+
+    case NETWORK_OP_ADDRULE:
+    {
+        printk("received addrule request from guestos!\n");
+    }
+    break;
+
+    case NETWORK_OP_DELETERULE:
+    {
+        printk("received deleterule request from guestos!\n");
+    }
+    break;
+
+    default:
+        ret = -ENOSYS;
+    }
+
+    return ret;
+}
+
+void __init net_init (void)
+{
+    sys_vif_count = 0;
+    net_vif_cache = kmem_cache_create("net_vif_cache", sizeof(net_vif_t),
+                                      0, SLAB_HWCACHE_ALIGN, NULL, NULL);
+}
diff --git a/xen-2.4.16/common/page_alloc.c b/xen-2.4.16/common/page_alloc.c
new file mode 100644 (file)
index 0000000..b7cd2c1
--- /dev/null
@@ -0,0 +1,272 @@
+/******************************************************************************
+ * page_alloc.c
+ * 
+ * Simple buddy allocator for Xenoserver hypervisor.
+ * 
+ * Copyright (c) 2002 K A Fraser
+ */
+
+#include <xeno/config.h>
+#include <xeno/init.h>
+#include <xeno/types.h>
+#include <xeno/lib.h>
+#include <asm/page.h>
+#include <xeno/spinlock.h>
+
+
+static spinlock_t alloc_lock = SPIN_LOCK_UNLOCKED;
+
+
+/*********************
+ * ALLOCATION BITMAP
+ *  One bit per page of memory. Bit set => page is allocated.
+ */
+
+static unsigned long *alloc_bitmap;
+#define PAGES_PER_MAPWORD (sizeof(unsigned long) * 8)
+
+#define allocated_in_map(_pn) \
+(alloc_bitmap[(_pn)/PAGES_PER_MAPWORD] & (1<<((_pn)&(PAGES_PER_MAPWORD-1))))
+
+
+/*
+ * Hint regarding bitwise arithmetic in map_{alloc,free}:
+ *  -(1<<n)  sets all bits >= n. 
+ *  (1<<n)-1 sets all bits <  n.
+ * Variable names in map_{alloc,free}:
+ *  *_idx == Index into `alloc_bitmap' array.
+ *  *_off == Bit offset within an element of the `alloc_bitmap' array.
+ */
+
+static void map_alloc(unsigned long first_page, unsigned long nr_pages)
+{
+    unsigned long start_off, end_off, curr_idx, end_idx;
+
+    curr_idx  = first_page / PAGES_PER_MAPWORD;
+    start_off = first_page & (PAGES_PER_MAPWORD-1);
+    end_idx   = (first_page + nr_pages) / PAGES_PER_MAPWORD;
+    end_off   = (first_page + nr_pages) & (PAGES_PER_MAPWORD-1);
+
+    if ( curr_idx == end_idx )
+    {
+        alloc_bitmap[curr_idx] |= ((1<<end_off)-1) & -(1<<start_off);
+    }
+    else 
+    {
+        alloc_bitmap[curr_idx] |= -(1<<start_off);
+        while ( ++curr_idx < end_idx ) alloc_bitmap[curr_idx] = ~0L;
+        alloc_bitmap[curr_idx] |= (1<<end_off)-1;
+    }
+}
+
+
+static void map_free(unsigned long first_page, unsigned long nr_pages)
+{
+    unsigned long start_off, end_off, curr_idx, end_idx;
+
+    curr_idx = first_page / PAGES_PER_MAPWORD;
+    start_off = first_page & (PAGES_PER_MAPWORD-1);
+    end_idx   = (first_page + nr_pages) / PAGES_PER_MAPWORD;
+    end_off   = (first_page + nr_pages) & (PAGES_PER_MAPWORD-1);
+
+    if ( curr_idx == end_idx )
+    {
+        alloc_bitmap[curr_idx] &= -(1<<end_off) | ((1<<start_off)-1);
+    }
+    else 
+    {
+        alloc_bitmap[curr_idx] &= (1<<start_off)-1;
+        while ( ++curr_idx != end_idx ) alloc_bitmap[curr_idx] = 0;
+        alloc_bitmap[curr_idx] &= -(1<<end_off);
+    }
+}
+
+
+
+/*************************
+ * BINARY BUDDY ALLOCATOR
+ */
+
+typedef struct chunk_head_st chunk_head_t;
+typedef struct chunk_tail_st chunk_tail_t;
+
+struct chunk_head_st {
+    chunk_head_t  *next;
+    chunk_head_t **pprev;
+    int            level;
+};
+
+struct chunk_tail_st {
+    int level;
+};
+
+/* Linked lists of free chunks of different powers-of-two in size. */
+#define FREELIST_SIZE ((sizeof(void*)<<3)-PAGE_SHIFT)
+static chunk_head_t *free_list[FREELIST_SIZE];
+static chunk_head_t  free_tail[FREELIST_SIZE];
+#define FREELIST_EMPTY(_l) ((_l)->next == NULL)
+
+#define round_pgdown(_p)  ((_p)&PAGE_MASK)
+#define round_pgup(_p)    (((_p)+(PAGE_SIZE-1))&PAGE_MASK)
+
+
+/* Initialise allocator, placing addresses [@min,@max] in free pool. */
+void __init init_page_allocator(unsigned long min, unsigned long max)
+{
+    int i;
+    unsigned long range, bitmap_size;
+    chunk_head_t *ch;
+    chunk_tail_t *ct;
+
+    for ( i = 0; i < FREELIST_SIZE; i++ )
+    {
+        free_list[i]       = &free_tail[i];
+        free_tail[i].pprev = &free_list[i];
+        free_tail[i].next  = NULL;
+    }
+
+    min = round_pgup  (min);
+    max = round_pgdown(max);
+
+    /* Allocate space for the allocation bitmap. */
+    bitmap_size  = (max+1) >> (PAGE_SHIFT+3);
+    bitmap_size  = round_pgup(bitmap_size);
+    alloc_bitmap = (unsigned long *)__va(min);
+    min         += bitmap_size;
+    range        = max - min;
+
+    /* All allocated by default. */
+    memset(alloc_bitmap, ~0, bitmap_size);
+    /* Free up the memory we've been given to play with. */
+    map_free(min>>PAGE_SHIFT, range>>PAGE_SHIFT);
+    
+    /* The buddy lists are addressed in high memory. */
+    min += PAGE_OFFSET;
+    max += PAGE_OFFSET;
+
+    while ( range != 0 )
+    {
+        /*
+         * Next chunk is limited by alignment of min, but also
+         * must not be bigger than remaining range.
+         */
+        for ( i = PAGE_SHIFT; (1<<(i+1)) <= range; i++ )
+            if ( min & (1<<i) ) break;
+
+        ch = (chunk_head_t *)min;
+        min   += (1<<i);
+        range -= (1<<i);
+        ct = (chunk_tail_t *)min-1;
+        i -= PAGE_SHIFT;
+        ch->level       = i;
+        ch->next        = free_list[i];
+        ch->pprev       = &free_list[i];
+        ch->next->pprev = &ch->next;
+        free_list[i]    = ch;
+        ct->level       = i;
+    }
+}
+
+
+/* Allocate 2^@order contiguous pages. */
+unsigned long __get_free_pages(int mask, int order)
+{
+    int i;
+    chunk_head_t *alloc_ch, *spare_ch;
+    chunk_tail_t            *spare_ct;
+    unsigned long           flags;
+
+    spin_lock_irqsave(&alloc_lock, flags);
+
+    /* Found smallest order which can satisfy the request. */
+    for ( i = order; FREELIST_EMPTY(free_list[i]); i++ ) 
+    {
+        if ( i == FREELIST_SIZE ) 
+            panic("Out of memory!\n");
+    }
+
+    /* Unlink a chunk. */
+    alloc_ch = free_list[i];
+    free_list[i] = alloc_ch->next;
+    alloc_ch->next->pprev = alloc_ch->pprev;
+
+    /* We may have to break the chunk a number of times. */
+    while ( i != order )
+    {
+        /* Split into two equal parts. */
+        i--;
+        spare_ch = (chunk_head_t *)((char *)alloc_ch + (1<<(i+PAGE_SHIFT)));
+        spare_ct = (chunk_tail_t *)((char *)spare_ch + (1<<(i+PAGE_SHIFT)))-1;
+
+        /* Create new header for spare chunk. */
+        spare_ch->level = i;
+        spare_ch->next  = free_list[i];
+        spare_ch->pprev = &free_list[i];
+        spare_ct->level = i;
+
+        /* Link in the spare chunk. */
+        spare_ch->next->pprev = &spare_ch->next;
+        free_list[i] = spare_ch;
+    }
+    
+    map_alloc(__pa(alloc_ch)>>PAGE_SHIFT, 1<<order);
+
+    spin_unlock_irqrestore(&alloc_lock, flags);
+
+    return((unsigned long)alloc_ch);
+}
+
+
+/* Free 2^@order pages at location @p. */
+void __free_pages(unsigned long p, int order)
+{
+    unsigned long size = 1 << (order + PAGE_SHIFT);
+    chunk_head_t *ch;
+    chunk_tail_t *ct;
+    unsigned long flags;
+    unsigned long pagenr = __pa(p) >> PAGE_SHIFT;
+
+    spin_lock_irqsave(&alloc_lock, flags);
+
+    map_free(pagenr, 1<<order);
+    
+    /* Merge chunks as far as possible. */
+    for ( ; ; )
+    {
+        if ( (p & size) )
+        {
+            /* Merge with predecessor block? */
+            if ( allocated_in_map(pagenr-1) ) break;
+            ct = (chunk_tail_t *)p - 1;
+            if ( ct->level != order ) break;
+            ch = (chunk_head_t *)(p - size);
+            p -= size;
+        }
+        else
+        {
+            /* Merge with successor block? */
+            if ( allocated_in_map(pagenr+(1<<order)) ) break;
+            ch = (chunk_head_t *)(p + size);
+            if ( ch->level != order ) break;
+        }
+        
+        /* Okay, unlink the neighbour. */
+        *ch->pprev = ch->next;
+        ch->next->pprev = ch->pprev;
+
+        order++;
+        size <<= 1;
+    }
+
+    /* Okay, add the final chunk to the appropriate free list. */
+    ch = (chunk_head_t *)p;
+    ct = (chunk_tail_t *)(p+size)-1;
+    ct->level = order;
+    ch->level = order;
+    ch->pprev = &free_list[order];
+    ch->next  = free_list[order];
+    ch->next->pprev = &ch->next;
+    free_list[order] = ch;
+
+    spin_unlock_irqrestore(&alloc_lock, flags);
+}
diff --git a/xen-2.4.16/common/resource.c b/xen-2.4.16/common/resource.c
new file mode 100644 (file)
index 0000000..15f2999
--- /dev/null
@@ -0,0 +1,329 @@
+/*
+ *     linux/kernel/resource.c
+ *
+ * Copyright (C) 1999  Linus Torvalds
+ * Copyright (C) 1999  Martin Mares <mj@ucw.cz>
+ *
+ * Arbitrary resource management.
+ */
+
+#include <linux/config.h>
+#include <linux/lib.h>
+#include <linux/sched.h>
+#include <linux/errno.h>
+#include <linux/ioport.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <asm/io.h>
+
+struct resource ioport_resource = { "PCI IO", 0x0000, IO_SPACE_LIMIT, IORESOURCE_IO };
+struct resource iomem_resource = { "PCI mem", 0x00000000, 0xffffffff, IORESOURCE_MEM };
+
+static rwlock_t resource_lock = RW_LOCK_UNLOCKED;
+
+/*
+ * This generates reports for /proc/ioports and /proc/iomem
+ */
+static char * do_resource_list(struct resource *entry, const char *fmt, int offset, char *buf, char *end)
+{
+       if (offset < 0)
+               offset = 0;
+
+       while (entry) {
+               const char *name = entry->name;
+               unsigned long from, to;
+
+               if ((int) (end-buf) < 80)
+                       return buf;
+
+               from = entry->start;
+               to = entry->end;
+               if (!name)
+                       name = "<BAD>";
+
+               buf += sprintf(buf, fmt + offset, from, to, name);
+               if (entry->child)
+                       buf = do_resource_list(entry->child, fmt, offset-2, buf, end);
+               entry = entry->sibling;
+       }
+
+       return buf;
+}
+
+int get_resource_list(struct resource *root, char *buf, int size)
+{
+       char *fmt;
+       int retval;
+
+       fmt = "        %08lx-%08lx : %s\n";
+       if (root->end < 0x10000)
+               fmt = "        %04lx-%04lx : %s\n";
+       read_lock(&resource_lock);
+       retval = do_resource_list(root->child, fmt, 8, buf, buf + size) - buf;
+       read_unlock(&resource_lock);
+       return retval;
+}      
+
+/* Return the conflict entry if you can't request it */
+static struct resource * __request_resource(struct resource *root, struct resource *new)
+{
+       unsigned long start = new->start;
+       unsigned long end = new->end;
+       struct resource *tmp, **p;
+
+       if (end < start)
+               return root;
+       if (start < root->start)
+               return root;
+       if (end > root->end)
+               return root;
+       p = &root->child;
+       for (;;) {
+               tmp = *p;
+               if (!tmp || tmp->start > end) {
+                       new->sibling = tmp;
+                       *p = new;
+                       new->parent = root;
+                       return NULL;
+               }
+               p = &tmp->sibling;
+               if (tmp->end < start)
+                       continue;
+               return tmp;
+       }
+}
+
+static int __release_resource(struct resource *old)
+{
+       struct resource *tmp, **p;
+
+       p = &old->parent->child;
+       for (;;) {
+               tmp = *p;
+               if (!tmp)
+                       break;
+               if (tmp == old) {
+                       *p = tmp->sibling;
+                       old->parent = NULL;
+                       return 0;
+               }
+               p = &tmp->sibling;
+       }
+       return -EINVAL;
+}
+
+int request_resource(struct resource *root, struct resource *new)
+{
+       struct resource *conflict;
+
+       write_lock(&resource_lock);
+       conflict = __request_resource(root, new);
+       write_unlock(&resource_lock);
+       return conflict ? -EBUSY : 0;
+}
+
+int release_resource(struct resource *old)
+{
+       int retval;
+
+       write_lock(&resource_lock);
+       retval = __release_resource(old);
+       write_unlock(&resource_lock);
+       return retval;
+}
+
+int check_resource(struct resource *root, unsigned long start, unsigned long len)
+{
+       struct resource *conflict, tmp;
+
+       tmp.start = start;
+       tmp.end = start + len - 1;
+       write_lock(&resource_lock);
+       conflict = __request_resource(root, &tmp);
+       if (!conflict)
+               __release_resource(&tmp);
+       write_unlock(&resource_lock);
+       return conflict ? -EBUSY : 0;
+}
+
+/*
+ * Find empty slot in the resource tree given range and alignment.
+ */
+static int find_resource(struct resource *root, struct resource *new,
+                        unsigned long size,
+                        unsigned long min, unsigned long max,
+                        unsigned long align,
+                        void (*alignf)(void *, struct resource *, unsigned long),
+                        void *alignf_data)
+{
+       struct resource *this = root->child;
+
+       new->start = root->start;
+       for(;;) {
+               if (this)
+                       new->end = this->start;
+               else
+                       new->end = root->end;
+               if (new->start < min)
+                       new->start = min;
+               if (new->end > max)
+                       new->end = max;
+               new->start = (new->start + align - 1) & ~(align - 1);
+               if (alignf)
+                       alignf(alignf_data, new, size);
+               if (new->start < new->end && new->end - new->start + 1 >= size) {
+                       new->end = new->start + size - 1;
+                       return 0;
+               }
+               if (!this)
+                       break;
+               new->start = this->end + 1;
+               this = this->sibling;
+       }
+       return -EBUSY;
+}
+
+/*
+ * Allocate empty slot in the resource tree given range and alignment.
+ */
+int allocate_resource(struct resource *root, struct resource *new,
+                     unsigned long size,
+                     unsigned long min, unsigned long max,
+                     unsigned long align,
+                     void (*alignf)(void *, struct resource *, unsigned long),
+                     void *alignf_data)
+{
+       int err;
+
+       write_lock(&resource_lock);
+       err = find_resource(root, new, size, min, max, align, alignf, alignf_data);
+       if (err >= 0 && __request_resource(root, new))
+               err = -EBUSY;
+       write_unlock(&resource_lock);
+       return err;
+}
+
+/*
+ * This is compatibility stuff for IO resources.
+ *
+ * Note how this, unlike the above, knows about
+ * the IO flag meanings (busy etc).
+ *
+ * Request-region creates a new busy region.
+ *
+ * Check-region returns non-zero if the area is already busy
+ *
+ * Release-region releases a matching busy region.
+ */
+struct resource * __request_region(struct resource *parent, unsigned long start, unsigned long n, const char *name)
+{
+       struct resource *res = kmalloc(sizeof(*res), GFP_KERNEL);
+
+       if (res) {
+               memset(res, 0, sizeof(*res));
+               res->name = name;
+               res->start = start;
+               res->end = start + n - 1;
+               res->flags = IORESOURCE_BUSY;
+
+               write_lock(&resource_lock);
+
+               for (;;) {
+                       struct resource *conflict;
+
+                       conflict = __request_resource(parent, res);
+                       if (!conflict)
+                               break;
+                       if (conflict != parent) {
+                               parent = conflict;
+                               if (!(conflict->flags & IORESOURCE_BUSY))
+                                       continue;
+                       }
+
+                       /* Uhhuh, that didn't work out.. */
+                       kfree(res);
+                       res = NULL;
+                       break;
+               }
+               write_unlock(&resource_lock);
+       }
+       return res;
+}
+
+int __check_region(struct resource *parent, unsigned long start, unsigned long n)
+{
+       struct resource * res;
+
+       res = __request_region(parent, start, n, "check-region");
+       if (!res)
+               return -EBUSY;
+
+       release_resource(res);
+       kfree(res);
+       return 0;
+}
+
+void __release_region(struct resource *parent, unsigned long start, unsigned long n)
+{
+       struct resource **p;
+       unsigned long end;
+
+       p = &parent->child;
+       end = start + n - 1;
+
+       for (;;) {
+               struct resource *res = *p;
+
+               if (!res)
+                       break;
+               if (res->start <= start && res->end >= end) {
+                       if (!(res->flags & IORESOURCE_BUSY)) {
+                               p = &res->child;
+                               continue;
+                       }
+                       if (res->start != start || res->end != end)
+                               break;
+                       *p = res->sibling;
+                       kfree(res);
+                       return;
+               }
+               p = &res->sibling;
+       }
+       printk("Trying to free nonexistent resource <%08lx-%08lx>\n", start, end);
+}
+
+#if 0
+/*
+ * Called from init/main.c to reserve IO ports.
+ */
+#define MAXRESERVE 4
+static int __init reserve_setup(char *str)
+{
+       static int reserved = 0;
+       static struct resource reserve[MAXRESERVE];
+
+       for (;;) {
+               int io_start, io_num;
+               int x = reserved;
+
+               if (get_option (&str, &io_start) != 2)
+                       break;
+               if (get_option (&str, &io_num)   == 0)
+                       break;
+               if (x < MAXRESERVE) {
+                       struct resource *res = reserve + x;
+                       res->name = "reserved";
+                       res->start = io_start;
+                       res->end = io_start + io_num - 1;
+                       res->flags = IORESOURCE_BUSY;
+                       res->child = NULL;
+                       if (request_resource(res->start >= 0x10000 ? &iomem_resource : &ioport_resource, res) == 0)
+                               reserved = x+1;
+               }
+       }
+       return 1;
+}
+
+__setup("reserve=", reserve_setup);
+#endif
diff --git a/xen-2.4.16/common/slab.c b/xen-2.4.16/common/slab.c
new file mode 100644 (file)
index 0000000..98a79bb
--- /dev/null
@@ -0,0 +1,1839 @@
+/*
+ * linux/mm/slab.c
+ * Written by Mark Hemment, 1996/97.
+ * (markhe@nextd.demon.co.uk)
+ *
+ * kmem_cache_destroy() + some cleanup - 1999 Andrea Arcangeli
+ *
+ * Major cleanup, different bufctl logic, per-cpu arrays
+ *     (c) 2000 Manfred Spraul
+ *
+ * An implementation of the Slab Allocator as described in outline in;
+ *     UNIX Internals: The New Frontiers by Uresh Vahalia
+ *     Pub: Prentice Hall      ISBN 0-13-101908-2
+ * or with a little more detail in;
+ *     The Slab Allocator: An Object-Caching Kernel Memory Allocator
+ *     Jeff Bonwick (Sun Microsystems).
+ *     Presented at: USENIX Summer 1994 Technical Conference
+ *
+ *
+ * The memory is organized in caches, one cache for each object type.
+ * (e.g. inode_cache, dentry_cache, buffer_head, vm_area_struct)
+ * Each cache consists out of many slabs (they are small (usually one
+ * page long) and always contiguous), and each slab contains multiple
+ * initialized objects.
+ *
+ * Each cache can only support one memory type (GFP_DMA, GFP_HIGHMEM,
+ * normal). If you need a special memory type, then must create a new
+ * cache for that memory type.
+ *
+ * In order to reduce fragmentation, the slabs are sorted in 3 groups:
+ *   full slabs with 0 free objects
+ *   partial slabs
+ *   empty slabs with no allocated objects
+ *
+ * If partial slabs exist, then new allocations come from these slabs,
+ * otherwise from empty slabs or new slabs are allocated.
+ *
+ * kmem_cache_destroy() CAN CRASH if you try to allocate from the cache
+ * during kmem_cache_destroy(). The caller must prevent concurrent allocs.
+ *
+ * On SMP systems, each cache has a short per-cpu head array, most allocs
+ * and frees go into that array, and if that array overflows, then 1/2
+ * of the entries in the array are given back into the global cache.
+ * This reduces the number of spinlock operations.
+ *
+ * The c_cpuarray may not be read with enabled local interrupts.
+ *
+ * SMP synchronization:
+ *  constructors and destructors are called without any locking.
+ *  Several members in kmem_cache_t and slab_t never change, they
+ *     are accessed without any locking.
+ *  The per-cpu arrays are never accessed from the wrong cpu, no locking.
+ *  The non-constant members are protected with a per-cache irq spinlock.
+ *
+ * Further notes from the original documentation:
+ *
+ * 11 April '97.  Started multi-threading - markhe
+ *     The global cache-chain is protected by the semaphore 'cache_chain_sem'.
+ *     The sem is only needed when accessing/extending the cache-chain, which
+ *     can never happen inside an interrupt (kmem_cache_create(),
+ *     kmem_cache_shrink() and kmem_cache_reap()).
+ *
+ *     To prevent kmem_cache_shrink() trying to shrink a 'growing' cache (which
+ *     maybe be sleeping and therefore not holding the semaphore/lock), the
+ *     growing field is used.  This also prevents reaping from a cache.
+ *
+ *     At present, each engine can be growing a cache.  This should be blocked.
+ *
+ */
+
+/*
+ * DEBUG       - 1 for kmem_cache_create() to honour; SLAB_DEBUG_INITIAL,
+ *               SLAB_RED_ZONE & SLAB_POISON.
+ *               0 for faster, smaller code (especially in the critical paths).
+ *
+ * STATS       - 1 to collect stats for /proc/slabinfo.
+ *               0 for faster, smaller code (especially in the critical paths).
+ *
+ * FORCED_DEBUG        - 1 enables SLAB_RED_ZONE and SLAB_POISON (if possible)
+ */
+
+#include <xeno/config.h>
+#include <xeno/init.h>
+#include <xeno/types.h>
+#include <xeno/lib.h>
+#include <xeno/slab.h>
+#include <xeno/list.h>
+#include <xeno/spinlock.h>
+#include <xeno/errno.h>
+#include <xeno/smp.h>
+#include <xeno/sched.h>
+
+
+#ifdef CONFIG_DEBUG_SLAB
+#define        DEBUG           1
+#define        STATS           1
+#define        FORCED_DEBUG    1
+#else
+#define        DEBUG           0
+#define        STATS           0
+#define        FORCED_DEBUG    0
+#endif
+
+/*
+ * Parameters for kmem_cache_reap
+ */
+#define REAP_SCANLEN   10
+#define REAP_PERFECT   10
+
+/* Shouldn't this be in a header file somewhere? */
+#define        BYTES_PER_WORD          sizeof(void *)
+
+/* Legal flag mask for kmem_cache_create(). */
+#if DEBUG
+# define CREATE_MASK   (SLAB_DEBUG_INITIAL | SLAB_RED_ZONE | \
+                        SLAB_POISON | SLAB_HWCACHE_ALIGN | \
+                        SLAB_NO_REAP | SLAB_CACHE_DMA)
+#else
+# define CREATE_MASK   (SLAB_HWCACHE_ALIGN | SLAB_NO_REAP | SLAB_CACHE_DMA)
+#endif
+
+/*
+ * kmem_bufctl_t:
+ *
+ * Bufctl's are used for linking objs within a slab
+ * linked offsets.
+ *
+ * This implementaion relies on "struct page" for locating the cache &
+ * slab an object belongs to.
+ * This allows the bufctl structure to be small (one int), but limits
+ * the number of objects a slab (not a cache) can contain when off-slab
+ * bufctls are used. The limit is the size of the largest general cache
+ * that does not use off-slab slabs.
+ * For 32bit archs with 4 kB pages, is this 56.
+ * This is not serious, as it is only for large objects, when it is unwise
+ * to have too many per slab.
+ * Note: This limit can be raised by introducing a general cache whose size
+ * is less than 512 (PAGE_SIZE<<3), but greater than 256.
+ */
+
+#define BUFCTL_END 0xffffFFFF
+#define        SLAB_LIMIT 0xffffFFFE
+typedef unsigned int kmem_bufctl_t;
+
+/* Max number of objs-per-slab for caches which use off-slab slabs.
+ * Needed to avoid a possible looping condition in kmem_cache_grow().
+ */
+static unsigned long offslab_limit;
+
+/*
+ * slab_t
+ *
+ * Manages the objs in a slab. Placed either at the beginning of mem allocated
+ * for a slab, or allocated from an general cache.
+ * Slabs are chained into three list: fully used, partial, fully free slabs.
+ */
+typedef struct slab_s {
+       struct list_head        list;
+       unsigned long           colouroff;
+       void                    *s_mem;         /* including colour offset */
+       unsigned int            inuse;          /* num of objs active in slab */
+       kmem_bufctl_t           free;
+} slab_t;
+
+#define slab_bufctl(slabp) \
+       ((kmem_bufctl_t *)(((slab_t*)slabp)+1))
+
+/*
+ * cpucache_t
+ *
+ * Per cpu structures
+ * The limit is stored in the per-cpu structure to reduce the data cache
+ * footprint.
+ */
+typedef struct cpucache_s {
+       unsigned int avail;
+       unsigned int limit;
+} cpucache_t;
+
+#define cc_entry(cpucache) \
+       ((void **)(((cpucache_t*)(cpucache))+1))
+#define cc_data(cachep) \
+       ((cachep)->cpudata[smp_processor_id()])
+/*
+ * kmem_cache_t
+ *
+ * manages a cache.
+ */
+
+#define CACHE_NAMELEN  20      /* max name length for a slab cache */
+
+struct kmem_cache_s {
+/* 1) each alloc & free */
+       /* full, partial first, then free */
+       struct list_head        slabs_full;
+       struct list_head        slabs_partial;
+       struct list_head        slabs_free;
+       unsigned int            objsize;
+       unsigned int            flags;  /* constant flags */
+       unsigned int            num;    /* # of objs per slab */
+       spinlock_t              spinlock;
+#ifdef CONFIG_SMP
+       unsigned int            batchcount;
+#endif
+
+/* 2) slab additions /removals */
+       /* order of pgs per slab (2^n) */
+       unsigned int            gfporder;
+
+       /* force GFP flags, e.g. GFP_DMA */
+       unsigned int            gfpflags;
+
+       size_t                  colour;         /* cache colouring range */
+       unsigned int            colour_off;     /* colour offset */
+       unsigned int            colour_next;    /* cache colouring */
+       kmem_cache_t            *slabp_cache;
+       unsigned int            growing;
+       unsigned int            dflags;         /* dynamic flags */
+
+       /* constructor func */
+       void (*ctor)(void *, kmem_cache_t *, unsigned long);
+
+       /* de-constructor func */
+       void (*dtor)(void *, kmem_cache_t *, unsigned long);
+
+       unsigned long           failures;
+
+/* 3) cache creation/removal */
+       char                    name[CACHE_NAMELEN];
+       struct list_head        next;
+#ifdef CONFIG_SMP
+/* 4) per-cpu data */
+       cpucache_t              *cpudata[NR_CPUS];
+#endif
+#if STATS
+       unsigned long           num_active;
+       unsigned long           num_allocations;
+       unsigned long           high_mark;
+       unsigned long           grown;
+       unsigned long           reaped;
+       unsigned long           errors;
+#ifdef CONFIG_SMP
+       atomic_t                allochit;
+       atomic_t                allocmiss;
+       atomic_t                freehit;
+       atomic_t                freemiss;
+#endif
+#endif
+};
+
+/* internal c_flags */
+#define        CFLGS_OFF_SLAB  0x010000UL      /* slab management in own cache */
+#define        CFLGS_OPTIMIZE  0x020000UL      /* optimized slab lookup */
+
+/* c_dflags (dynamic flags). Need to hold the spinlock to access this member */
+#define        DFLGS_GROWN     0x000001UL      /* don't reap a recently grown */
+
+#define        OFF_SLAB(x)     ((x)->flags & CFLGS_OFF_SLAB)
+#define        OPTIMIZE(x)     ((x)->flags & CFLGS_OPTIMIZE)
+#define        GROWN(x)        ((x)->dlags & DFLGS_GROWN)
+
+#if STATS
+#define        STATS_INC_ACTIVE(x)     ((x)->num_active++)
+#define        STATS_DEC_ACTIVE(x)     ((x)->num_active--)
+#define        STATS_INC_ALLOCED(x)    ((x)->num_allocations++)
+#define        STATS_INC_GROWN(x)      ((x)->grown++)
+#define        STATS_INC_REAPED(x)     ((x)->reaped++)
+#define        STATS_SET_HIGH(x)       do { if ((x)->num_active > (x)->high_mark) \
+                                       (x)->high_mark = (x)->num_active; \
+                               } while (0)
+#define        STATS_INC_ERR(x)        ((x)->errors++)
+#else
+#define        STATS_INC_ACTIVE(x)     do { } while (0)
+#define        STATS_DEC_ACTIVE(x)     do { } while (0)
+#define        STATS_INC_ALLOCED(x)    do { } while (0)
+#define        STATS_INC_GROWN(x)      do { } while (0)
+#define        STATS_INC_REAPED(x)     do { } while (0)
+#define        STATS_SET_HIGH(x)       do { } while (0)
+#define        STATS_INC_ERR(x)        do { } while (0)
+#endif
+
+#if STATS && defined(CONFIG_SMP)
+#define STATS_INC_ALLOCHIT(x)  atomic_inc(&(x)->allochit)
+#define STATS_INC_ALLOCMISS(x) atomic_inc(&(x)->allocmiss)
+#define STATS_INC_FREEHIT(x)   atomic_inc(&(x)->freehit)
+#define STATS_INC_FREEMISS(x)  atomic_inc(&(x)->freemiss)
+#else
+#define STATS_INC_ALLOCHIT(x)  do { } while (0)
+#define STATS_INC_ALLOCMISS(x) do { } while (0)
+#define STATS_INC_FREEHIT(x)   do { } while (0)
+#define STATS_INC_FREEMISS(x)  do { } while (0)
+#endif
+
+#if DEBUG
+/* Magic nums for obj red zoning.
+ * Placed in the first word before and the first word after an obj.
+ */
+#define        RED_MAGIC1      0x5A2CF071UL    /* when obj is active */
+#define        RED_MAGIC2      0x170FC2A5UL    /* when obj is inactive */
+
+/* ...and for poisoning */
+#define        POISON_BYTE     0x5a            /* byte value for poisoning */
+#define        POISON_END      0xa5            /* end-byte of poisoning */
+
+#endif
+
+/* maximum size of an obj (in 2^order pages) */
+#define        MAX_OBJ_ORDER   5       /* 32 pages */
+
+/*
+ * Do not go above this order unless 0 objects fit into the slab.
+ */
+#define        BREAK_GFP_ORDER_HI      2
+#define        BREAK_GFP_ORDER_LO      1
+static int slab_break_gfp_order = BREAK_GFP_ORDER_LO;
+
+/*
+ * Absolute limit for the gfp order
+ */
+#define        MAX_GFP_ORDER   5       /* 32 pages */
+
+
+/* Macros for storing/retrieving the cachep and or slab from the
+ * global 'mem_map'. These are used to find the slab an obj belongs to.
+ * With kfree(), these are used to find the cache which an obj belongs to.
+ */
+#define        SET_PAGE_CACHE(pg,x)  ((pg)->list.next = (struct list_head *)(x))
+#define        GET_PAGE_CACHE(pg)    ((kmem_cache_t *)(pg)->list.next)
+#define        SET_PAGE_SLAB(pg,x)   ((pg)->list.prev = (struct list_head *)(x))
+#define        GET_PAGE_SLAB(pg)     ((slab_t *)(pg)->list.prev)
+
+/* Size description struct for general caches. */
+typedef struct cache_sizes {
+       size_t           cs_size;
+       kmem_cache_t    *cs_cachep;
+       kmem_cache_t    *cs_dmacachep;
+} cache_sizes_t;
+
+static cache_sizes_t cache_sizes[] = {
+#if PAGE_SIZE == 4096
+       {    32,        NULL, NULL},
+#endif
+       {    64,        NULL, NULL},
+       {   128,        NULL, NULL},
+       {   256,        NULL, NULL},
+       {   512,        NULL, NULL},
+       {  1024,        NULL, NULL},
+       {  2048,        NULL, NULL},
+       {  4096,        NULL, NULL},
+       {  8192,        NULL, NULL},
+       { 16384,        NULL, NULL},
+       { 32768,        NULL, NULL},
+       { 65536,        NULL, NULL},
+       {131072,        NULL, NULL},
+       {     0,        NULL, NULL}
+};
+
+/* internal cache of cache description objs */
+static kmem_cache_t cache_cache = {
+       slabs_full:     LIST_HEAD_INIT(cache_cache.slabs_full),
+       slabs_partial:  LIST_HEAD_INIT(cache_cache.slabs_partial),
+       slabs_free:     LIST_HEAD_INIT(cache_cache.slabs_free),
+       objsize:        sizeof(kmem_cache_t),
+       flags:          SLAB_NO_REAP,
+       spinlock:       SPIN_LOCK_UNLOCKED,
+       colour_off:     L1_CACHE_BYTES,
+       name:           "kmem_cache",
+};
+
+/* Guard access to the cache-chain. */
+/* KAF: No semaphores, as we'll never wait around for I/O. */
+static spinlock_t cache_chain_sem;
+#define init_MUTEX(_m)   spin_lock_init(_m)
+#define down(_m)         spin_lock_irqsave(_m,spin_flags)
+#define up(_m)           spin_unlock_irqrestore(_m,spin_flags)
+
+/* Place maintainer for reaping. */
+static kmem_cache_t *clock_searchp = &cache_cache;
+
+#define cache_chain (cache_cache.next)
+
+#ifdef CONFIG_SMP
+/*
+ * chicken and egg problem: delay the per-cpu array allocation
+ * until the general caches are up.
+ */
+static int g_cpucache_up;
+
+static void enable_cpucache (kmem_cache_t *cachep);
+static void enable_all_cpucaches (void);
+#endif
+
+/* Cal the num objs, wastage, and bytes left over for a given slab size. */
+static void kmem_cache_estimate (unsigned long gfporder, size_t size,
+                int flags, size_t *left_over, unsigned int *num)
+{
+       int i;
+       size_t wastage = PAGE_SIZE<<gfporder;
+       size_t extra = 0;
+       size_t base = 0;
+
+       if (!(flags & CFLGS_OFF_SLAB)) {
+               base = sizeof(slab_t);
+               extra = sizeof(kmem_bufctl_t);
+       }
+       i = 0;
+       while (i*size + L1_CACHE_ALIGN(base+i*extra) <= wastage)
+               i++;
+       if (i > 0)
+               i--;
+
+       if (i > SLAB_LIMIT)
+               i = SLAB_LIMIT;
+
+       *num = i;
+       wastage -= i*size;
+       wastage -= L1_CACHE_ALIGN(base+i*extra);
+       *left_over = wastage;
+}
+
+/* Initialisation - setup the `cache' cache. */
+void __init kmem_cache_init(void)
+{
+       size_t left_over;
+
+       init_MUTEX(&cache_chain_sem);
+       INIT_LIST_HEAD(&cache_chain);
+
+       kmem_cache_estimate(0, cache_cache.objsize, 0,
+                       &left_over, &cache_cache.num);
+       if (!cache_cache.num)
+               BUG();
+
+       cache_cache.colour = left_over/cache_cache.colour_off;
+       cache_cache.colour_next = 0;
+}
+
+
+/* Initialisation - setup remaining internal and general caches.
+ * Called after the gfp() functions have been enabled, and before smp_init().
+ */
+void __init kmem_cache_sizes_init(unsigned long num_physpages)
+{
+       cache_sizes_t *sizes = cache_sizes;
+       char name[20];
+       /*
+        * Fragmentation resistance on low memory - only use bigger
+        * page orders on machines with more than 32MB of memory.
+        */
+       if (num_physpages > (32 << 20) >> PAGE_SHIFT)
+               slab_break_gfp_order = BREAK_GFP_ORDER_HI;
+       do {
+               /* For performance, all the general caches are L1 aligned.
+                * This should be particularly beneficial on SMP boxes, as it
+                * eliminates "false sharing".
+                * Note for systems short on memory removing the alignment will
+                * allow tighter packing of the smaller caches. */
+               sprintf(name,"size-%Zd",sizes->cs_size);
+               if (!(sizes->cs_cachep =
+                       kmem_cache_create(name, sizes->cs_size,
+                                       0, SLAB_HWCACHE_ALIGN, NULL, NULL))) {
+                       BUG();
+               }
+
+               /* Inc off-slab bufctl limit until the ceiling is hit. */
+               if (!(OFF_SLAB(sizes->cs_cachep))) {
+                       offslab_limit = sizes->cs_size-sizeof(slab_t);
+                       offslab_limit /= 2;
+               }
+               sprintf(name, "size-%Zd(DMA)",sizes->cs_size);
+               sizes->cs_dmacachep = kmem_cache_create(name, sizes->cs_size, 0,
+                             SLAB_CACHE_DMA|SLAB_HWCACHE_ALIGN, NULL, NULL);
+               if (!sizes->cs_dmacachep)
+                       BUG();
+               sizes++;
+       } while (sizes->cs_size);
+}
+
+int __init kmem_cpucache_init(void)
+{
+#ifdef CONFIG_SMP
+       g_cpucache_up = 1;
+       enable_all_cpucaches();
+#endif
+       return 0;
+}
+
+/*__initcall(kmem_cpucache_init);*/
+
+/* Interface to system's page allocator. No need to hold the cache-lock.
+ */
+static inline void * kmem_getpages (kmem_cache_t *cachep, unsigned long flags)
+{
+       void    *addr;
+
+       /*
+        * If we requested dmaable memory, we will get it. Even if we
+        * did not request dmaable memory, we might get it, but that
+        * would be relatively rare and ignorable.
+        */
+       flags |= cachep->gfpflags;
+       addr = (void*) __get_free_pages(flags, cachep->gfporder);
+       /* Assume that now we have the pages no one else can legally
+        * messes with the 'struct page's.
+        * However vm_scan() might try to test the structure to see if
+        * it is a named-page or buffer-page.  The members it tests are
+        * of no interest here.....
+        */
+       return addr;
+}
+
+/* Interface to system's page release. */
+static inline void kmem_freepages (kmem_cache_t *cachep, void *addr)
+{
+       unsigned long i = (1<<cachep->gfporder);
+       struct pfn_info *page = virt_to_page(addr);
+
+       /* free_pages() does not clear the type bit - we do that.
+        * The pages have been unlinked from their cache-slab,
+        * but their 'struct page's might be accessed in
+        * vm_scan(). Shouldn't be a worry.
+        */
+       while (i--) {
+               PageClearSlab(page);
+               page++;
+       }
+
+       free_pages((unsigned long)addr, cachep->gfporder);
+}
+
+#if DEBUG
+static inline void kmem_poison_obj (kmem_cache_t *cachep, void *addr)
+{
+       int size = cachep->objsize;
+       if (cachep->flags & SLAB_RED_ZONE) {
+               addr += BYTES_PER_WORD;
+               size -= 2*BYTES_PER_WORD;
+       }
+       memset(addr, POISON_BYTE, size);
+       *(unsigned char *)(addr+size-1) = POISON_END;
+}
+
+static inline int kmem_check_poison_obj (kmem_cache_t *cachep, void *addr)
+{
+       int size = cachep->objsize;
+       void *end;
+       if (cachep->flags & SLAB_RED_ZONE) {
+               addr += BYTES_PER_WORD;
+               size -= 2*BYTES_PER_WORD;
+       }
+       end = memchr(addr, POISON_END, size);
+       if (end != (addr+size-1))
+               return 1;
+       return 0;
+}
+#endif
+
+/* Destroy all the objs in a slab, and release the mem back to the system.
+ * Before calling the slab must have been unlinked from the cache.
+ * The cache-lock is not held/needed.
+ */
+static void kmem_slab_destroy (kmem_cache_t *cachep, slab_t *slabp)
+{
+       if (cachep->dtor
+#if DEBUG
+               || cachep->flags & (SLAB_POISON | SLAB_RED_ZONE)
+#endif
+       ) {
+               int i;
+               for (i = 0; i < cachep->num; i++) {
+                       void* objp = slabp->s_mem+cachep->objsize*i;
+#if DEBUG
+                       if (cachep->flags & SLAB_RED_ZONE) {
+                               if (*((unsigned long*)(objp)) != RED_MAGIC1)
+                                       BUG();
+                               if (*((unsigned long*)(objp + cachep->objsize
+                                               -BYTES_PER_WORD)) != RED_MAGIC1)
+                                       BUG();
+                               objp += BYTES_PER_WORD;
+                       }
+#endif
+                       if (cachep->dtor)
+                               (cachep->dtor)(objp, cachep, 0);
+#if DEBUG
+                       if (cachep->flags & SLAB_RED_ZONE) {
+                               objp -= BYTES_PER_WORD;
+                       }       
+                       if ((cachep->flags & SLAB_POISON)  &&
+                               kmem_check_poison_obj(cachep, objp))
+                               BUG();
+#endif
+               }
+       }
+
+       kmem_freepages(cachep, slabp->s_mem-slabp->colouroff);
+       if (OFF_SLAB(cachep))
+               kmem_cache_free(cachep->slabp_cache, slabp);
+}
+
+/**
+ * kmem_cache_create - Create a cache.
+ * @name: A string which is used in /proc/slabinfo to identify this cache.
+ * @size: The size of objects to be created in this cache.
+ * @offset: The offset to use within the page.
+ * @flags: SLAB flags
+ * @ctor: A constructor for the objects.
+ * @dtor: A destructor for the objects.
+ *
+ * Returns a ptr to the cache on success, NULL on failure.
+ * Cannot be called within a int, but can be interrupted.
+ * The @ctor is run when new pages are allocated by the cache
+ * and the @dtor is run before the pages are handed back.
+ * The flags are
+ *
+ * %SLAB_POISON - Poison the slab with a known test pattern (a5a5a5a5)
+ * to catch references to uninitialised memory.
+ *
+ * %SLAB_RED_ZONE - Insert `Red' zones around the allocated memory to check
+ * for buffer overruns.
+ *
+ * %SLAB_NO_REAP - Don't automatically reap this cache when we're under
+ * memory pressure.
+ *
+ * %SLAB_HWCACHE_ALIGN - Align the objects in this cache to a hardware
+ * cacheline.  This can be beneficial if you're counting cycles as closely
+ * as davem.
+ */
+kmem_cache_t *
+kmem_cache_create (const char *name, size_t size, size_t offset,
+       unsigned long flags, void (*ctor)(void*, kmem_cache_t *, unsigned long),
+       void (*dtor)(void*, kmem_cache_t *, unsigned long))
+{
+       const char *func_nm = KERN_ERR "kmem_create: ";
+       size_t left_over, align, slab_size;
+       kmem_cache_t *cachep = NULL;
+        unsigned long spin_flags;
+
+       /*
+        * Sanity checks... these are all serious usage bugs.
+        */
+       if ((!name) ||
+               ((strlen(name) >= CACHE_NAMELEN - 1)) ||
+               (size < BYTES_PER_WORD) ||
+               (size > (1<<MAX_OBJ_ORDER)*PAGE_SIZE) ||
+               (dtor && !ctor) ||
+               (offset < 0 || offset > size))
+                       BUG();
+
+#if DEBUG
+       if ((flags & SLAB_DEBUG_INITIAL) && !ctor) {
+               /* No constructor, but inital state check requested */
+               printk("%sNo con, but init state check requested - %s\n", func_nm, name);
+               flags &= ~SLAB_DEBUG_INITIAL;
+       }
+
+       if ((flags & SLAB_POISON) && ctor) {
+               /* request for poisoning, but we can't do that with a constructor */
+               printk("%sPoisoning requested, but con given - %s\n", func_nm, name);
+               flags &= ~SLAB_POISON;
+       }
+#if FORCED_DEBUG
+       if (size < (PAGE_SIZE>>3))
+               /*
+                * do not red zone large object, causes severe
+                * fragmentation.
+                */
+               flags |= SLAB_RED_ZONE;
+       if (!ctor)
+               flags |= SLAB_POISON;
+#endif
+#endif
+
+       /*
+        * Always checks flags, a caller might be expecting debug
+        * support which isn't available.
+        */
+       if (flags & ~CREATE_MASK)
+               BUG();
+
+       /* Get cache's description obj. */
+       cachep = (kmem_cache_t *) kmem_cache_alloc(&cache_cache, SLAB_KERNEL);
+       if (!cachep)
+               goto opps;
+       memset(cachep, 0, sizeof(kmem_cache_t));
+
+       /* Check that size is in terms of words.  This is needed to avoid
+        * unaligned accesses for some archs when redzoning is used, and makes
+        * sure any on-slab bufctl's are also correctly aligned.
+        */
+       if (size & (BYTES_PER_WORD-1)) {
+               size += (BYTES_PER_WORD-1);
+               size &= ~(BYTES_PER_WORD-1);
+               printk("%sForcing size word alignment - %s\n", func_nm, name);
+       }
+       
+#if DEBUG
+       if (flags & SLAB_RED_ZONE) {
+               /*
+                * There is no point trying to honour cache alignment
+                * when redzoning.
+                */
+               flags &= ~SLAB_HWCACHE_ALIGN;
+               size += 2*BYTES_PER_WORD;       /* words for redzone */
+       }
+#endif
+       align = BYTES_PER_WORD;
+       if (flags & SLAB_HWCACHE_ALIGN)
+               align = L1_CACHE_BYTES;
+
+       /* Determine if the slab management is 'on' or 'off' slab. */
+       if (size >= (PAGE_SIZE>>3))
+               /*
+                * Size is large, assume best to place the slab management obj
+                * off-slab (should allow better packing of objs).
+                */
+               flags |= CFLGS_OFF_SLAB;
+
+       if (flags & SLAB_HWCACHE_ALIGN) {
+               /* Need to adjust size so that objs are cache aligned. */
+               /* Small obj size, can get at least two per cache line. */
+               /* FIXME: only power of 2 supported, was better */
+               while (size < align/2)
+                       align /= 2;
+               size = (size+align-1)&(~(align-1));
+       }
+
+       /* Cal size (in pages) of slabs, and the num of objs per slab.
+        * This could be made much more intelligent.  For now, try to avoid
+        * using high page-orders for slabs.  When the gfp() funcs are more
+        * friendly towards high-order requests, this should be changed.
+        */
+       do {
+               unsigned int break_flag = 0;
+cal_wastage:
+               kmem_cache_estimate(cachep->gfporder, size, flags,
+                                               &left_over, &cachep->num);
+               if (break_flag)
+                       break;
+               if (cachep->gfporder >= MAX_GFP_ORDER)
+                       break;
+               if (!cachep->num)
+                       goto next;
+               if (flags & CFLGS_OFF_SLAB && cachep->num > offslab_limit) {
+                       /* Oops, this num of objs will cause problems. */
+                       cachep->gfporder--;
+                       break_flag++;
+                       goto cal_wastage;
+               }
+
+               /*
+                * Large num of objs is good, but v. large slabs are currently
+                * bad for the gfp()s.
+                */
+               if (cachep->gfporder >= slab_break_gfp_order)
+                       break;
+
+               if ((left_over*8) <= (PAGE_SIZE<<cachep->gfporder))
+                       break;  /* Acceptable internal fragmentation. */
+next:
+               cachep->gfporder++;
+       } while (1);
+
+       if (!cachep->num) {
+               printk("kmem_cache_create: couldn't create cache %s.\n", name);
+               kmem_cache_free(&cache_cache, cachep);
+               cachep = NULL;
+               goto opps;
+       }
+       slab_size = L1_CACHE_ALIGN(cachep->num*sizeof(kmem_bufctl_t)+sizeof(slab_t));
+
+       /*
+        * If the slab has been placed off-slab, and we have enough space then
+        * move it on-slab. This is at the expense of any extra colouring.
+        */
+       if (flags & CFLGS_OFF_SLAB && left_over >= slab_size) {
+               flags &= ~CFLGS_OFF_SLAB;
+               left_over -= slab_size;
+       }
+
+       /* Offset must be a multiple of the alignment. */
+       offset += (align-1);
+       offset &= ~(align-1);
+       if (!offset)
+               offset = L1_CACHE_BYTES;
+       cachep->colour_off = offset;
+       cachep->colour = left_over/offset;
+
+       /* init remaining fields */
+       if (!cachep->gfporder && !(flags & CFLGS_OFF_SLAB))
+               flags |= CFLGS_OPTIMIZE;
+
+       cachep->flags = flags;
+       cachep->gfpflags = 0;
+       if (flags & SLAB_CACHE_DMA)
+               cachep->gfpflags |= GFP_DMA;
+       spin_lock_init(&cachep->spinlock);
+       cachep->objsize = size;
+       INIT_LIST_HEAD(&cachep->slabs_full);
+       INIT_LIST_HEAD(&cachep->slabs_partial);
+       INIT_LIST_HEAD(&cachep->slabs_free);
+
+       if (flags & CFLGS_OFF_SLAB)
+               cachep->slabp_cache = kmem_find_general_cachep(slab_size,0);
+       cachep->ctor = ctor;
+       cachep->dtor = dtor;
+       /* Copy name over so we don't have problems with unloaded modules */
+       strcpy(cachep->name, name);
+
+#ifdef CONFIG_SMP
+       if (g_cpucache_up)
+               enable_cpucache(cachep);
+#endif
+       /* Need the semaphore to access the chain. */
+       down(&cache_chain_sem);
+       {
+               struct list_head *p;
+
+               list_for_each(p, &cache_chain) {
+                       kmem_cache_t *pc = list_entry(p, kmem_cache_t, next);
+
+                       /* The name field is constant - no lock needed. */
+                       if (!strcmp(pc->name, name))
+                               BUG();
+               }
+       }
+
+       /* There is no reason to lock our new cache before we
+        * link it in - no one knows about it yet...
+        */
+       list_add(&cachep->next, &cache_chain);
+       up(&cache_chain_sem);
+opps:
+       return cachep;
+}
+
+
+#if DEBUG
+/*
+ * This check if the kmem_cache_t pointer is chained in the cache_cache
+ * list. -arca
+ */
+static int is_chained_kmem_cache(kmem_cache_t * cachep)
+{
+       struct list_head *p;
+       int ret = 0;
+        unsigned long spin_flags;
+
+       /* Find the cache in the chain of caches. */
+       down(&cache_chain_sem);
+       list_for_each(p, &cache_chain) {
+               if (p == &cachep->next) {
+                       ret = 1;
+                       break;
+               }
+       }
+       up(&cache_chain_sem);
+
+       return ret;
+}
+#else
+#define is_chained_kmem_cache(x) 1
+#endif
+
+#ifdef CONFIG_SMP
+/*
+ * Waits for all CPUs to execute func().
+ */
+static void smp_call_function_all_cpus(void (*func) (void *arg), void *arg)
+{
+       local_irq_disable();
+       func(arg);
+       local_irq_enable();
+
+       if (smp_call_function(func, arg, 1, 1))
+               BUG();
+}
+typedef struct ccupdate_struct_s
+{
+       kmem_cache_t *cachep;
+       cpucache_t *new[NR_CPUS];
+} ccupdate_struct_t;
+
+static void do_ccupdate_local(void *info)
+{
+       ccupdate_struct_t *new = (ccupdate_struct_t *)info;
+       cpucache_t *old = cc_data(new->cachep);
+       
+       cc_data(new->cachep) = new->new[smp_processor_id()];
+       new->new[smp_processor_id()] = old;
+}
+
+static void free_block (kmem_cache_t* cachep, void** objpp, int len);
+
+static void drain_cpu_caches(kmem_cache_t *cachep)
+{
+       ccupdate_struct_t new;
+       int i;
+        unsigned long spin_flags;
+
+       memset(&new.new,0,sizeof(new.new));
+
+       new.cachep = cachep;
+
+       down(&cache_chain_sem);
+       smp_call_function_all_cpus(do_ccupdate_local, (void *)&new);
+
+       for (i = 0; i < smp_num_cpus; i++) {
+               cpucache_t* ccold = new.new[cpu_logical_map(i)];
+               if (!ccold || (ccold->avail == 0))
+                       continue;
+               local_irq_disable();
+               free_block(cachep, cc_entry(ccold), ccold->avail);
+               local_irq_enable();
+               ccold->avail = 0;
+       }
+       smp_call_function_all_cpus(do_ccupdate_local, (void *)&new);
+       up(&cache_chain_sem);
+}
+
+#else
+#define drain_cpu_caches(cachep)       do { } while (0)
+#endif
+
+static int __kmem_cache_shrink(kmem_cache_t *cachep)
+{
+       slab_t *slabp;
+       int ret;
+
+       drain_cpu_caches(cachep);
+
+       spin_lock_irq(&cachep->spinlock);
+
+       /* If the cache is growing, stop shrinking. */
+       while (!cachep->growing) {
+               struct list_head *p;
+
+               p = cachep->slabs_free.prev;
+               if (p == &cachep->slabs_free)
+                       break;
+
+               slabp = list_entry(cachep->slabs_free.prev, slab_t, list);
+#if DEBUG
+               if (slabp->inuse)
+                       BUG();
+#endif
+               list_del(&slabp->list);
+
+               spin_unlock_irq(&cachep->spinlock);
+               kmem_slab_destroy(cachep, slabp);
+               spin_lock_irq(&cachep->spinlock);
+       }
+       ret = !list_empty(&cachep->slabs_full) || !list_empty(&cachep->slabs_partial);
+       spin_unlock_irq(&cachep->spinlock);
+       return ret;
+}
+
+/**
+ * kmem_cache_shrink - Shrink a cache.
+ * @cachep: The cache to shrink.
+ *
+ * Releases as many slabs as possible for a cache.
+ * To help debugging, a zero exit status indicates all slabs were released.
+ */
+int kmem_cache_shrink(kmem_cache_t *cachep)
+{
+       if (!cachep || !is_chained_kmem_cache(cachep))
+               BUG();
+
+       return __kmem_cache_shrink(cachep);
+}
+
+/**
+ * kmem_cache_destroy - delete a cache
+ * @cachep: the cache to destroy
+ *
+ * Remove a kmem_cache_t object from the slab cache.
+ * Returns 0 on success.
+ *
+ * It is expected this function will be called by a module when it is
+ * unloaded.  This will remove the cache completely, and avoid a duplicate
+ * cache being allocated each time a module is loaded and unloaded, if the
+ * module doesn't have persistent in-kernel storage across loads and unloads.
+ *
+ * The caller must guarantee that noone will allocate memory from the cache
+ * during the kmem_cache_destroy().
+ */
+int kmem_cache_destroy (kmem_cache_t * cachep)
+{
+        unsigned long spin_flags;
+
+       if (!cachep || cachep->growing)
+               BUG();
+
+       /* Find the cache in the chain of caches. */
+       down(&cache_chain_sem);
+       /* the chain is never empty, cache_cache is never destroyed */
+       if (clock_searchp == cachep)
+               clock_searchp = list_entry(cachep->next.next,
+                                               kmem_cache_t, next);
+       list_del(&cachep->next);
+       up(&cache_chain_sem);
+
+       if (__kmem_cache_shrink(cachep)) {
+               printk(KERN_ERR "kmem_cache_destroy: Can't free all objects %p\n",
+                      cachep);
+               down(&cache_chain_sem);
+               list_add(&cachep->next,&cache_chain);
+               up(&cache_chain_sem);
+               return 1;
+       }
+#ifdef CONFIG_SMP
+       {
+               int i;
+               for (i = 0; i < NR_CPUS; i++)
+                       kfree(cachep->cpudata[i]);
+       }
+#endif
+       kmem_cache_free(&cache_cache, cachep);
+
+       return 0;
+}
+
+/* Get the memory for a slab management obj. */
+static inline slab_t * kmem_cache_slabmgmt (kmem_cache_t *cachep,
+                       void *objp, int colour_off, int local_flags)
+{
+       slab_t *slabp;
+       
+       if (OFF_SLAB(cachep)) {
+               /* Slab management obj is off-slab. */
+               slabp = kmem_cache_alloc(cachep->slabp_cache, local_flags);
+               if (!slabp)
+                       return NULL;
+       } else {
+               /* FIXME: change to
+                       slabp = objp
+                * if you enable OPTIMIZE
+                */
+               slabp = objp+colour_off;
+               colour_off += L1_CACHE_ALIGN(cachep->num *
+                               sizeof(kmem_bufctl_t) + sizeof(slab_t));
+       }
+       slabp->inuse = 0;
+       slabp->colouroff = colour_off;
+       slabp->s_mem = objp+colour_off;
+
+       return slabp;
+}
+
+static inline void kmem_cache_init_objs (kmem_cache_t * cachep,
+                       slab_t * slabp, unsigned long ctor_flags)
+{
+       int i;
+
+       for (i = 0; i < cachep->num; i++) {
+               void* objp = slabp->s_mem+cachep->objsize*i;
+#if DEBUG
+               if (cachep->flags & SLAB_RED_ZONE) {
+                       *((unsigned long*)(objp)) = RED_MAGIC1;
+                       *((unsigned long*)(objp + cachep->objsize -
+                                       BYTES_PER_WORD)) = RED_MAGIC1;
+                       objp += BYTES_PER_WORD;
+               }
+#endif
+
+               /*
+                * Constructors are not allowed to allocate memory from
+                * the same cache which they are a constructor for.
+                * Otherwise, deadlock. They must also be threaded.
+                */
+               if (cachep->ctor)
+                       cachep->ctor(objp, cachep, ctor_flags);
+#if DEBUG
+               if (cachep->flags & SLAB_RED_ZONE)
+                       objp -= BYTES_PER_WORD;
+               if (cachep->flags & SLAB_POISON)
+                       /* need to poison the objs */
+                       kmem_poison_obj(cachep, objp);
+               if (cachep->flags & SLAB_RED_ZONE) {
+                       if (*((unsigned long*)(objp)) != RED_MAGIC1)
+                               BUG();
+                       if (*((unsigned long*)(objp + cachep->objsize -
+                                       BYTES_PER_WORD)) != RED_MAGIC1)
+                               BUG();
+               }
+#endif
+               slab_bufctl(slabp)[i] = i+1;
+       }
+       slab_bufctl(slabp)[i-1] = BUFCTL_END;
+       slabp->free = 0;
+}
+
+/*
+ * Grow (by 1) the number of slabs within a cache.  This is called by
+ * kmem_cache_alloc() when there are no active objs left in a cache.
+ */
+static int kmem_cache_grow (kmem_cache_t * cachep, int flags)
+{
+       slab_t  *slabp;
+       struct pfn_info *page; unsigned int i;
+       void            *objp;
+       size_t           offset;
+       unsigned int     local_flags;
+       unsigned long    ctor_flags;
+       unsigned long    save_flags;
+
+       /* Be lazy and only check for valid flags here,
+        * keeping it out of the critical path in kmem_cache_alloc().
+        */
+       if (flags & ~(SLAB_DMA|SLAB_LEVEL_MASK|SLAB_NO_GROW))
+               BUG();
+       if (flags & SLAB_NO_GROW)
+               return 0;
+
+#if 0
+       if (in_interrupt() && (flags & SLAB_LEVEL_MASK) != SLAB_ATOMIC)
+               BUG();
+#endif
+
+       ctor_flags = SLAB_CTOR_CONSTRUCTOR;
+       local_flags = (flags & SLAB_LEVEL_MASK);
+       if (local_flags == SLAB_ATOMIC)
+               /*
+                * Not allowed to sleep.  Need to tell a constructor about
+                * this - it might need to know...
+                */
+               ctor_flags |= SLAB_CTOR_ATOMIC;
+
+       /* About to mess with non-constant members - lock. */
+       spin_lock_irqsave(&cachep->spinlock, save_flags);
+
+       /* Get colour for the slab, and cal the next value. */
+       offset = cachep->colour_next;
+       cachep->colour_next++;
+       if (cachep->colour_next >= cachep->colour)
+               cachep->colour_next = 0;
+       offset *= cachep->colour_off;
+       cachep->dflags |= DFLGS_GROWN;
+
+       cachep->growing++;
+       spin_unlock_irqrestore(&cachep->spinlock, save_flags);
+
+       /* A series of memory allocations for a new slab.
+        * Neither the cache-chain semaphore, or cache-lock, are
+        * held, but the incrementing c_growing prevents this
+        * cache from being reaped or shrunk.
+        * Note: The cache could be selected in for reaping in
+        * kmem_cache_reap(), but when the final test is made the
+        * growing value will be seen.
+        */
+
+       /* Get mem for the objs. */
+       if (!(objp = kmem_getpages(cachep, flags)))
+               goto failed;
+
+       /* Get slab management. */
+       if (!(slabp = kmem_cache_slabmgmt(cachep, objp, offset, local_flags)))
+               goto opps1;
+
+       /* Nasty!!!!!! I hope this is OK. */
+       i = 1 << cachep->gfporder;
+       page = virt_to_page(objp);
+       do {
+               SET_PAGE_CACHE(page, cachep);
+               SET_PAGE_SLAB(page, slabp);
+               PageSetSlab(page);
+               page++;
+       } while (--i);
+
+       kmem_cache_init_objs(cachep, slabp, ctor_flags);
+
+       spin_lock_irqsave(&cachep->spinlock, save_flags);
+       cachep->growing--;
+
+       /* Make slab active. */
+       list_add_tail(&slabp->list, &cachep->slabs_free);
+       STATS_INC_GROWN(cachep);
+       cachep->failures = 0;
+
+       spin_unlock_irqrestore(&cachep->spinlock, save_flags);
+       return 1;
+opps1:
+       kmem_freepages(cachep, objp);
+failed:
+       spin_lock_irqsave(&cachep->spinlock, save_flags);
+       cachep->growing--;
+       spin_unlock_irqrestore(&cachep->spinlock, save_flags);
+       return 0;
+}
+
+/*
+ * Perform extra freeing checks:
+ * - detect double free
+ * - detect bad pointers.
+ * Called with the cache-lock held.
+ */
+
+#if DEBUG
+static int kmem_extra_free_checks (kmem_cache_t * cachep,
+                       slab_t *slabp, void * objp)
+{
+       int i;
+       unsigned int objnr = (objp-slabp->s_mem)/cachep->objsize;
+
+       if (objnr >= cachep->num)
+               BUG();
+       if (objp != slabp->s_mem + objnr*cachep->objsize)
+               BUG();
+
+       /* Check slab's freelist to see if this obj is there. */
+       for (i = slabp->free; i != BUFCTL_END; i = slab_bufctl(slabp)[i]) {
+               if (i == objnr)
+                       BUG();
+       }
+       return 0;
+}
+#endif
+
+static inline void kmem_cache_alloc_head(kmem_cache_t *cachep, int flags)
+{
+       if (flags & SLAB_DMA) {
+               if (!(cachep->gfpflags & GFP_DMA))
+                       BUG();
+       } else {
+               if (cachep->gfpflags & GFP_DMA)
+                       BUG();
+       }
+}
+
+static inline void * kmem_cache_alloc_one_tail (kmem_cache_t *cachep,
+                                               slab_t *slabp)
+{
+       void *objp;
+
+       STATS_INC_ALLOCED(cachep);
+       STATS_INC_ACTIVE(cachep);
+       STATS_SET_HIGH(cachep);
+
+       /* get obj pointer */
+       slabp->inuse++;
+       objp = slabp->s_mem + slabp->free*cachep->objsize;
+       slabp->free=slab_bufctl(slabp)[slabp->free];
+
+       if (unlikely(slabp->free == BUFCTL_END)) {
+               list_del(&slabp->list);
+               list_add(&slabp->list, &cachep->slabs_full);
+       }
+#if DEBUG
+       if (cachep->flags & SLAB_POISON)
+               if (kmem_check_poison_obj(cachep, objp))
+                       BUG();
+       if (cachep->flags & SLAB_RED_ZONE) {
+               /* Set alloc red-zone, and check old one. */
+               if (xchg((unsigned long *)objp, RED_MAGIC2) !=
+                                                        RED_MAGIC1)
+                       BUG();
+               if (xchg((unsigned long *)(objp+cachep->objsize -
+                         BYTES_PER_WORD), RED_MAGIC2) != RED_MAGIC1)
+                       BUG();
+               objp += BYTES_PER_WORD;
+       }
+#endif
+       return objp;
+}
+
+/*
+ * Returns a ptr to an obj in the given cache.
+ * caller must guarantee synchronization
+ * #define for the goto optimization 8-)
+ */
+#define kmem_cache_alloc_one(cachep)                           \
+({                                                             \
+       struct list_head * slabs_partial, * entry;              \
+       slab_t *slabp;                                          \
+                                                               \
+       slabs_partial = &(cachep)->slabs_partial;               \
+       entry = slabs_partial->next;                            \
+       if (unlikely(entry == slabs_partial)) {                 \
+               struct list_head * slabs_free;                  \
+               slabs_free = &(cachep)->slabs_free;             \
+               entry = slabs_free->next;                       \
+               if (unlikely(entry == slabs_free))              \
+                       goto alloc_new_slab;                    \
+               list_del(entry);                                \
+               list_add(entry, slabs_partial);                 \
+       }                                                       \
+                                                               \
+       slabp = list_entry(entry, slab_t, list);                \
+       kmem_cache_alloc_one_tail(cachep, slabp);               \
+})
+
+#ifdef CONFIG_SMP
+void* kmem_cache_alloc_batch(kmem_cache_t* cachep, int flags)
+{
+       int batchcount = cachep->batchcount;
+       cpucache_t* cc = cc_data(cachep);
+
+       spin_lock(&cachep->spinlock);
+       while (batchcount--) {
+               struct list_head * slabs_partial, * entry;
+               slab_t *slabp;
+               /* Get slab alloc is to come from. */
+               slabs_partial = &(cachep)->slabs_partial;
+               entry = slabs_partial->next;
+               if (unlikely(entry == slabs_partial)) {
+                       struct list_head * slabs_free;
+                       slabs_free = &(cachep)->slabs_free;
+                       entry = slabs_free->next;
+                       if (unlikely(entry == slabs_free))
+                               break;
+                       list_del(entry);
+                       list_add(entry, slabs_partial);
+               }
+
+               slabp = list_entry(entry, slab_t, list);
+               cc_entry(cc)[cc->avail++] =
+                               kmem_cache_alloc_one_tail(cachep, slabp);
+       }
+       spin_unlock(&cachep->spinlock);
+
+       if (cc->avail)
+               return cc_entry(cc)[--cc->avail];
+       return NULL;
+}
+#endif
+
+static inline void * __kmem_cache_alloc (kmem_cache_t *cachep, int flags)
+{
+       unsigned long save_flags;
+       void* objp;
+
+       kmem_cache_alloc_head(cachep, flags);
+try_again:
+       local_irq_save(save_flags);
+#ifdef CONFIG_SMP
+       {
+               cpucache_t *cc = cc_data(cachep);
+
+               if (cc) {
+                       if (cc->avail) {
+                               STATS_INC_ALLOCHIT(cachep);
+                               objp = cc_entry(cc)[--cc->avail];
+                       } else {
+                               STATS_INC_ALLOCMISS(cachep);
+                               objp = kmem_cache_alloc_batch(cachep,flags);
+                               if (!objp)
+                                       goto alloc_new_slab_nolock;
+                       }
+               } else {
+                       spin_lock(&cachep->spinlock);
+                       objp = kmem_cache_alloc_one(cachep);
+                       spin_unlock(&cachep->spinlock);
+               }
+       }
+#else
+       objp = kmem_cache_alloc_one(cachep);
+#endif
+       local_irq_restore(save_flags);
+       return objp;
+alloc_new_slab:
+#ifdef CONFIG_SMP
+       spin_unlock(&cachep->spinlock);
+alloc_new_slab_nolock:
+#endif
+       local_irq_restore(save_flags);
+       if (kmem_cache_grow(cachep, flags))
+               /* Someone may have stolen our objs.  Doesn't matter, we'll
+                * just come back here again.
+                */
+               goto try_again;
+       return NULL;
+}
+
+/*
+ * Release an obj back to its cache. If the obj has a constructed
+ * state, it should be in this state _before_ it is released.
+ * - caller is responsible for the synchronization
+ */
+
+#if DEBUG
+# define CHECK_NR(pg)                                          \
+       do {                                                    \
+               if (!VALID_PAGE(pg)) {                          \
+                       printk(KERN_ERR "kfree: out of range ptr %lxh.\n", \
+                               (unsigned long)objp);           \
+                       BUG();                                  \
+               } \
+       } while (0)
+# define CHECK_PAGE(page)                                      \
+       do {                                                    \
+               CHECK_NR(page);                                 \
+               if (!PageSlab(page)) {                          \
+                       printk(KERN_ERR "kfree: bad ptr %lxh.\n", \
+                               (unsigned long)objp);           \
+                       BUG();                                  \
+               }                                               \
+       } while (0)
+
+#else
+# define CHECK_PAGE(pg)        do { } while (0)
+#endif
+
+static inline void kmem_cache_free_one(kmem_cache_t *cachep, void *objp)
+{
+       slab_t* slabp;
+
+       CHECK_PAGE(virt_to_page(objp));
+       /* reduces memory footprint
+        *
+       if (OPTIMIZE(cachep))
+               slabp = (void*)((unsigned long)objp&(~(PAGE_SIZE-1)));
+        else
+        */
+       slabp = GET_PAGE_SLAB(virt_to_page(objp));
+
+#if DEBUG
+       if (cachep->flags & SLAB_DEBUG_INITIAL)
+               /* Need to call the slab's constructor so the
+                * caller can perform a verify of its state (debugging).
+                * Called without the cache-lock held.
+                */
+               cachep->ctor(objp, cachep, SLAB_CTOR_CONSTRUCTOR|SLAB_CTOR_VERIFY);
+
+       if (cachep->flags & SLAB_RED_ZONE) {
+               objp -= BYTES_PER_WORD;
+               if (xchg((unsigned long *)objp, RED_MAGIC1) != RED_MAGIC2)
+                       /* Either write before start, or a double free. */
+                       BUG();
+               if (xchg((unsigned long *)(objp+cachep->objsize -
+                               BYTES_PER_WORD), RED_MAGIC1) != RED_MAGIC2)
+                       /* Either write past end, or a double free. */
+                       BUG();
+       }
+       if (cachep->flags & SLAB_POISON)
+               kmem_poison_obj(cachep, objp);
+       if (kmem_extra_free_checks(cachep, slabp, objp))
+               return;
+#endif
+       {
+               unsigned int objnr = (objp-slabp->s_mem)/cachep->objsize;
+
+               slab_bufctl(slabp)[objnr] = slabp->free;
+               slabp->free = objnr;
+       }
+       STATS_DEC_ACTIVE(cachep);
+       
+       /* fixup slab chains */
+       {
+               int inuse = slabp->inuse;
+               if (unlikely(!--slabp->inuse)) {
+                       /* Was partial or full, now empty. */
+                       list_del(&slabp->list);
+                       list_add(&slabp->list, &cachep->slabs_free);
+               } else if (unlikely(inuse == cachep->num)) {
+                       /* Was full. */
+                       list_del(&slabp->list);
+                       list_add(&slabp->list, &cachep->slabs_partial);
+               }
+       }
+}
+
+#ifdef CONFIG_SMP
+static inline void __free_block (kmem_cache_t* cachep,
+                                                       void** objpp, int len)
+{
+       for ( ; len > 0; len--, objpp++)
+               kmem_cache_free_one(cachep, *objpp);
+}
+
+static void free_block (kmem_cache_t* cachep, void** objpp, int len)
+{
+       spin_lock(&cachep->spinlock);
+       __free_block(cachep, objpp, len);
+       spin_unlock(&cachep->spinlock);
+}
+#endif
+
+/*
+ * __kmem_cache_free
+ * called with disabled ints
+ */
+static inline void __kmem_cache_free (kmem_cache_t *cachep, void* objp)
+{
+#ifdef CONFIG_SMP
+       cpucache_t *cc = cc_data(cachep);
+
+       CHECK_PAGE(virt_to_page(objp));
+       if (cc) {
+               int batchcount;
+               if (cc->avail < cc->limit) {
+                       STATS_INC_FREEHIT(cachep);
+                       cc_entry(cc)[cc->avail++] = objp;
+                       return;
+               }
+               STATS_INC_FREEMISS(cachep);
+               batchcount = cachep->batchcount;
+               cc->avail -= batchcount;
+               free_block(cachep,
+                                       &cc_entry(cc)[cc->avail],batchcount);
+               cc_entry(cc)[cc->avail++] = objp;
+               return;
+       } else {
+               free_block(cachep, &objp, 1);
+       }
+#else
+       kmem_cache_free_one(cachep, objp);
+#endif
+}
+
+/**
+ * kmem_cache_alloc - Allocate an object
+ * @cachep: The cache to allocate from.
+ * @flags: See kmalloc().
+ *
+ * Allocate an object from this cache.  The flags are only relevant
+ * if the cache has no available objects.
+ */
+void * kmem_cache_alloc (kmem_cache_t *cachep, int flags)
+{
+       return __kmem_cache_alloc(cachep, flags);
+}
+
+/**
+ * kmalloc - allocate memory
+ * @size: how many bytes of memory are required.
+ * @flags: the type of memory to allocate.
+ *
+ * kmalloc is the normal method of allocating memory
+ * in the kernel.
+ *
+ * The @flags argument may be one of:
+ *
+ * %GFP_USER - Allocate memory on behalf of user.  May sleep.
+ *
+ * %GFP_KERNEL - Allocate normal kernel ram.  May sleep.
+ *
+ * %GFP_ATOMIC - Allocation will not sleep.  Use inside interrupt handlers.
+ *
+ * Additionally, the %GFP_DMA flag may be set to indicate the memory
+ * must be suitable for DMA.  This can mean different things on different
+ * platforms.  For example, on i386, it means that the memory must come
+ * from the first 16MB.
+ */
+void * kmalloc (size_t size, int flags)
+{
+       cache_sizes_t *csizep = cache_sizes;
+
+       for (; csizep->cs_size; csizep++) {
+               if (size > csizep->cs_size)
+                       continue;
+               return __kmem_cache_alloc(flags & GFP_DMA ?
+                        csizep->cs_dmacachep : csizep->cs_cachep, flags);
+       }
+       return NULL;
+}
+
+/**
+ * kmem_cache_free - Deallocate an object
+ * @cachep: The cache the allocation was from.
+ * @objp: The previously allocated object.
+ *
+ * Free an object which was previously allocated from this
+ * cache.
+ */
+void kmem_cache_free (kmem_cache_t *cachep, void *objp)
+{
+       unsigned long flags;
+#if DEBUG
+       CHECK_PAGE(virt_to_page(objp));
+       if (cachep != GET_PAGE_CACHE(virt_to_page(objp)))
+               BUG();
+#endif
+
+       local_irq_save(flags);
+       __kmem_cache_free(cachep, objp);
+       local_irq_restore(flags);
+}
+
+/**
+ * kfree - free previously allocated memory
+ * @objp: pointer returned by kmalloc.
+ *
+ * Don't free memory not originally allocated by kmalloc()
+ * or you will run into trouble.
+ */
+void kfree (const void *objp)
+{
+       kmem_cache_t *c;
+       unsigned long flags;
+
+       if (!objp)
+               return;
+       local_irq_save(flags);
+       CHECK_PAGE(virt_to_page(objp));
+       c = GET_PAGE_CACHE(virt_to_page(objp));
+       __kmem_cache_free(c, (void*)objp);
+       local_irq_restore(flags);
+}
+
+kmem_cache_t * kmem_find_general_cachep (size_t size, int gfpflags)
+{
+       cache_sizes_t *csizep = cache_sizes;
+
+       /* This function could be moved to the header file, and
+        * made inline so consumers can quickly determine what
+        * cache pointer they require.
+        */
+       for ( ; csizep->cs_size; csizep++) {
+               if (size > csizep->cs_size)
+                       continue;
+               break;
+       }
+       return (gfpflags & GFP_DMA) ? csizep->cs_dmacachep : csizep->cs_cachep;
+}
+
+#ifdef CONFIG_SMP
+
+/* called with cache_chain_sem acquired.  */
+static int kmem_tune_cpucache (kmem_cache_t* cachep, int limit, int batchcount)
+{
+       ccupdate_struct_t new;
+       int i;
+
+       /*
+        * These are admin-provided, so we are more graceful.
+        */
+       if (limit < 0)
+               return -EINVAL;
+       if (batchcount < 0)
+               return -EINVAL;
+       if (batchcount > limit)
+               return -EINVAL;
+       if (limit != 0 && !batchcount)
+               return -EINVAL;
+
+       memset(&new.new,0,sizeof(new.new));
+       if (limit) {
+               for (i = 0; i< smp_num_cpus; i++) {
+                       cpucache_t* ccnew;
+
+                       ccnew = kmalloc(sizeof(void*)*limit+
+                                       sizeof(cpucache_t), GFP_KERNEL);
+                       if (!ccnew)
+                               goto oom;
+                       ccnew->limit = limit;
+                       ccnew->avail = 0;
+                       new.new[cpu_logical_map(i)] = ccnew;
+               }
+       }
+       new.cachep = cachep;
+       spin_lock_irq(&cachep->spinlock);
+       cachep->batchcount = batchcount;
+       spin_unlock_irq(&cachep->spinlock);
+
+       smp_call_function_all_cpus(do_ccupdate_local, (void *)&new);
+
+       for (i = 0; i < smp_num_cpus; i++) {
+               cpucache_t* ccold = new.new[cpu_logical_map(i)];
+               if (!ccold)
+                       continue;
+               local_irq_disable();
+               free_block(cachep, cc_entry(ccold), ccold->avail);
+               local_irq_enable();
+               kfree(ccold);
+       }
+       return 0;
+oom:
+       for (i--; i >= 0; i--)
+               kfree(new.new[cpu_logical_map(i)]);
+       return -ENOMEM;
+}
+
+static void enable_cpucache (kmem_cache_t *cachep)
+{
+       int err;
+       int limit;
+
+       /* FIXME: optimize */
+       if (cachep->objsize > PAGE_SIZE)
+               return;
+       if (cachep->objsize > 1024)
+               limit = 60;
+       else if (cachep->objsize > 256)
+               limit = 124;
+       else
+               limit = 252;
+
+       err = kmem_tune_cpucache(cachep, limit, limit/2);
+       if (err)
+               printk(KERN_ERR "enable_cpucache failed for %s, error %d.\n",
+                                       cachep->name, -err);
+}
+
+static void enable_all_cpucaches (void)
+{
+       struct list_head* p;
+        unsigned long spin_flags;
+
+       down(&cache_chain_sem);
+
+       p = &cache_cache.next;
+       do {
+               kmem_cache_t* cachep = list_entry(p, kmem_cache_t, next);
+
+               enable_cpucache(cachep);
+               p = cachep->next.next;
+       } while (p != &cache_cache.next);
+
+       up(&cache_chain_sem);
+}
+#endif
+
+/**
+ * kmem_cache_reap - Reclaim memory from caches.
+ * @gfp_mask: the type of memory required.
+ *
+ * Called from do_try_to_free_pages() and __alloc_pages()
+ */
+int kmem_cache_reap (int gfp_mask)
+{
+       slab_t *slabp;
+       kmem_cache_t *searchp;
+       kmem_cache_t *best_cachep;
+       unsigned int best_pages;
+       unsigned int best_len;
+       unsigned int scan;
+       int ret = 0;
+        unsigned long spin_flags;
+
+        down(&cache_chain_sem);
+
+       scan = REAP_SCANLEN;
+       best_len = 0;
+       best_pages = 0;
+       best_cachep = NULL;
+       searchp = clock_searchp;
+       do {
+               unsigned int pages;
+               struct list_head* p;
+               unsigned int full_free;
+
+               /* It's safe to test this without holding the cache-lock. */
+               if (searchp->flags & SLAB_NO_REAP)
+                       goto next;
+               spin_lock_irq(&searchp->spinlock);
+               if (searchp->growing)
+                       goto next_unlock;
+               if (searchp->dflags & DFLGS_GROWN) {
+                       searchp->dflags &= ~DFLGS_GROWN;
+                       goto next_unlock;
+               }
+#ifdef CONFIG_SMP
+               {
+                       cpucache_t *cc = cc_data(searchp);
+                       if (cc && cc->avail) {
+                               __free_block(searchp, cc_entry(cc), cc->avail);
+                               cc->avail = 0;
+                       }
+               }
+#endif
+
+               full_free = 0;
+               p = searchp->slabs_free.next;
+               while (p != &searchp->slabs_free) {
+                       slabp = list_entry(p, slab_t, list);
+#if DEBUG
+                       if (slabp->inuse)
+                               BUG();
+#endif
+                       full_free++;
+                       p = p->next;
+               }
+
+               /*
+                * Try to avoid slabs with constructors and/or
+                * more than one page per slab (as it can be difficult
+                * to get high orders from gfp()).
+                */
+               pages = full_free * (1<<searchp->gfporder);
+               if (searchp->ctor)
+                       pages = (pages*4+1)/5;
+               if (searchp->gfporder)
+                       pages = (pages*4+1)/5;
+               if (pages > best_pages) {
+                       best_cachep = searchp;
+                       best_len = full_free;
+                       best_pages = pages;
+                       if (pages >= REAP_PERFECT) {
+                               clock_searchp = list_entry(searchp->next.next,
+                                                       kmem_cache_t,next);
+                               goto perfect;
+                       }
+               }
+next_unlock:
+               spin_unlock_irq(&searchp->spinlock);
+next:
+               searchp = list_entry(searchp->next.next,kmem_cache_t,next);
+       } while (--scan && searchp != clock_searchp);
+
+       clock_searchp = searchp;
+
+       if (!best_cachep)
+               /* couldn't find anything to reap */
+               goto out;
+
+       spin_lock_irq(&best_cachep->spinlock);
+perfect:
+       /* free only 50% of the free slabs */
+       best_len = (best_len + 1)/2;
+       for (scan = 0; scan < best_len; scan++) {
+               struct list_head *p;
+
+               if (best_cachep->growing)
+                       break;
+               p = best_cachep->slabs_free.prev;
+               if (p == &best_cachep->slabs_free)
+                       break;
+               slabp = list_entry(p,slab_t,list);
+#if DEBUG
+               if (slabp->inuse)
+                       BUG();
+#endif
+               list_del(&slabp->list);
+               STATS_INC_REAPED(best_cachep);
+
+               /* Safe to drop the lock. The slab is no longer linked to the
+                * cache.
+                */
+               spin_unlock_irq(&best_cachep->spinlock);
+               kmem_slab_destroy(best_cachep, slabp);
+               spin_lock_irq(&best_cachep->spinlock);
+       }
+       spin_unlock_irq(&best_cachep->spinlock);
+       ret = scan * (1 << best_cachep->gfporder);
+out:
+       up(&cache_chain_sem);
+       return ret;
+}
+
diff --git a/xen-2.4.16/common/softirq.c b/xen-2.4.16/common/softirq.c
new file mode 100644 (file)
index 0000000..3593271
--- /dev/null
@@ -0,0 +1,334 @@
+/*
+ *     linux/kernel/softirq.c
+ *
+ *     Copyright (C) 1992 Linus Torvalds
+ *
+ * Fixed a disable_bh()/enable_bh() race (was causing a console lockup)
+ * due bh_mask_count not atomic handling. Copyright (C) 1998  Andrea Arcangeli
+ *
+ * Rewritten. Old one was good in 2.2, but in 2.3 it was immoral. --ANK (990903)
+ */
+
+#include <linux/config.h>
+#include <linux/mm.h>
+#include <linux/sched.h>
+//#include <linux/kernel_stat.h>
+#include <linux/interrupt.h>
+//#include <linux/smp_lock.h>
+#include <linux/init.h>
+#include <linux/tqueue.h>
+
+/*
+   - No shared variables, all the data are CPU local.
+   - If a softirq needs serialization, let it serialize itself
+     by its own spinlocks.
+   - Even if softirq is serialized, only local cpu is marked for
+     execution. Hence, we get something sort of weak cpu binding.
+     Though it is still not clear, will it result in better locality
+     or will not.
+   - These softirqs are not masked by global cli() and start_bh_atomic()
+     (by clear reasons). Hence, old parts of code still using global locks
+     MUST NOT use softirqs, but insert interfacing routines acquiring
+     global locks. F.e. look at BHs implementation.
+
+   Examples:
+   - NET RX softirq. It is multithreaded and does not require
+     any global serialization.
+   - NET TX softirq. It kicks software netdevice queues, hence
+     it is logically serialized per device, but this serialization
+     is invisible to common code.
+   - Tasklets: serialized wrt itself.
+   - Bottom halves: globally serialized, grr...
+ */
+
+irq_cpustat_t irq_stat[NR_CPUS];
+
+static struct softirq_action softirq_vec[32] __cacheline_aligned;
+
+
+asmlinkage void do_softirq()
+{
+       int cpu = smp_processor_id();
+       __u32 pending;
+       long flags;
+
+       if (in_interrupt())
+               return;
+
+       local_irq_save(flags);
+
+       pending = softirq_pending(cpu);
+
+       while (pending) {
+               struct softirq_action *h;
+
+               local_bh_disable();
+restart:
+               /* Reset the pending bitmask before enabling irqs */
+               softirq_pending(cpu) = 0;
+
+               local_irq_enable();
+
+               h = softirq_vec;
+
+               do {
+                       if (pending & 1)
+                               h->action(h);
+                       h++;
+                       pending >>= 1;
+               } while (pending);
+
+               local_irq_disable();
+
+               pending = softirq_pending(cpu);
+               if (pending) goto restart;
+               __local_bh_enable();
+       }
+
+       local_irq_restore(flags);
+}
+
+/*
+ * This function must run with irq disabled!
+ */
+inline void cpu_raise_softirq(unsigned int cpu, unsigned int nr)
+{
+       __cpu_raise_softirq(cpu, nr);
+
+#ifdef CONFIG_SMP
+        if ( cpu != smp_processor_id() )
+            smp_send_event_check_cpu(cpu);
+#endif
+}
+
+void raise_softirq(unsigned int nr)
+{
+       long flags;
+
+       local_irq_save(flags);
+       cpu_raise_softirq(smp_processor_id(), nr);
+       local_irq_restore(flags);
+}
+
+void open_softirq(int nr, void (*action)(struct softirq_action*), void *data)
+{
+       softirq_vec[nr].data = data;
+       softirq_vec[nr].action = action;
+}
+
+
+/* Tasklets */
+
+struct tasklet_head tasklet_vec[NR_CPUS] __cacheline_aligned;
+struct tasklet_head tasklet_hi_vec[NR_CPUS] __cacheline_aligned;
+
+void __tasklet_schedule(struct tasklet_struct *t)
+{
+       int cpu = smp_processor_id();
+       unsigned long flags;
+
+       local_irq_save(flags);
+       t->next = tasklet_vec[cpu].list;
+       tasklet_vec[cpu].list = t;
+       cpu_raise_softirq(cpu, TASKLET_SOFTIRQ);
+       local_irq_restore(flags);
+}
+
+void __tasklet_hi_schedule(struct tasklet_struct *t)
+{
+       int cpu = smp_processor_id();
+       unsigned long flags;
+
+       local_irq_save(flags);
+       t->next = tasklet_hi_vec[cpu].list;
+       tasklet_hi_vec[cpu].list = t;
+       cpu_raise_softirq(cpu, HI_SOFTIRQ);
+       local_irq_restore(flags);
+}
+
+static void tasklet_action(struct softirq_action *a)
+{
+       int cpu = smp_processor_id();
+       struct tasklet_struct *list;
+
+       local_irq_disable();
+       list = tasklet_vec[cpu].list;
+       tasklet_vec[cpu].list = NULL;
+       local_irq_enable();
+
+       while (list) {
+               struct tasklet_struct *t = list;
+
+               list = list->next;
+
+               if (tasklet_trylock(t)) {
+                       if (!atomic_read(&t->count)) {
+                               if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
+                                       BUG();
+                               t->func(t->data);
+                               tasklet_unlock(t);
+                               continue;
+                       }
+                       tasklet_unlock(t);
+               }
+
+               local_irq_disable();
+               t->next = tasklet_vec[cpu].list;
+               tasklet_vec[cpu].list = t;
+               __cpu_raise_softirq(cpu, TASKLET_SOFTIRQ);
+               local_irq_enable();
+       }
+}
+
+static void tasklet_hi_action(struct softirq_action *a)
+{
+       int cpu = smp_processor_id();
+       struct tasklet_struct *list;
+
+       local_irq_disable();
+       list = tasklet_hi_vec[cpu].list;
+       tasklet_hi_vec[cpu].list = NULL;
+       local_irq_enable();
+
+       while (list) {
+               struct tasklet_struct *t = list;
+
+               list = list->next;
+
+               if (tasklet_trylock(t)) {
+                       if (!atomic_read(&t->count)) {
+                               if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
+                                       BUG();
+                               t->func(t->data);
+                               tasklet_unlock(t);
+                               continue;
+                       }
+                       tasklet_unlock(t);
+               }
+
+               local_irq_disable();
+               t->next = tasklet_hi_vec[cpu].list;
+               tasklet_hi_vec[cpu].list = t;
+               __cpu_raise_softirq(cpu, HI_SOFTIRQ);
+               local_irq_enable();
+       }
+}
+
+
+void tasklet_init(struct tasklet_struct *t,
+                 void (*func)(unsigned long), unsigned long data)
+{
+       t->next = NULL;
+       t->state = 0;
+       atomic_set(&t->count, 0);
+       t->func = func;
+       t->data = data;
+}
+
+void tasklet_kill(struct tasklet_struct *t)
+{
+       if (in_interrupt())
+               printk("Attempt to kill tasklet from interrupt\n");
+
+       while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) {
+               set_current_state(TASK_RUNNING);
+               do {
+                       current->policy |= SCHED_YIELD;
+                       schedule();
+               } while (test_bit(TASKLET_STATE_SCHED, &t->state));
+       }
+       tasklet_unlock_wait(t);
+       clear_bit(TASKLET_STATE_SCHED, &t->state);
+}
+
+
+
+/* Old style BHs */
+
+static void (*bh_base[32])(void);
+struct tasklet_struct bh_task_vec[32];
+
+/* BHs are serialized by spinlock global_bh_lock.
+
+   It is still possible to make synchronize_bh() as
+   spin_unlock_wait(&global_bh_lock). This operation is not used
+   by kernel now, so that this lock is not made private only
+   due to wait_on_irq().
+
+   It can be removed only after auditing all the BHs.
+ */
+spinlock_t global_bh_lock = SPIN_LOCK_UNLOCKED;
+
+static void bh_action(unsigned long nr)
+{
+       int cpu = smp_processor_id();
+
+       if (!spin_trylock(&global_bh_lock))
+               goto resched;
+
+       if (!hardirq_trylock(cpu))
+               goto resched_unlock;
+
+       if (bh_base[nr])
+               bh_base[nr]();
+
+       hardirq_endlock(cpu);
+       spin_unlock(&global_bh_lock);
+       return;
+
+resched_unlock:
+       spin_unlock(&global_bh_lock);
+resched:
+       mark_bh(nr);
+}
+
+void init_bh(int nr, void (*routine)(void))
+{
+       bh_base[nr] = routine;
+       mb();
+}
+
+void remove_bh(int nr)
+{
+       tasklet_kill(bh_task_vec+nr);
+       bh_base[nr] = NULL;
+}
+
+void __init softirq_init()
+{
+       int i;
+
+       for (i=0; i<32; i++)
+               tasklet_init(bh_task_vec+i, bh_action, i);
+
+       open_softirq(TASKLET_SOFTIRQ, tasklet_action, NULL);
+       open_softirq(HI_SOFTIRQ, tasklet_hi_action, NULL);
+}
+
+void __run_task_queue(task_queue *list)
+{
+       struct list_head head, *next;
+       unsigned long flags;
+
+       spin_lock_irqsave(&tqueue_lock, flags);
+       list_add(&head, list);
+       list_del_init(list);
+       spin_unlock_irqrestore(&tqueue_lock, flags);
+
+       next = head.next;
+       while (next != &head) {
+               void (*f) (void *);
+               struct tq_struct *p;
+               void *data;
+
+               p = list_entry(next, struct tq_struct, list);
+               next = next->next;
+               f = p->routine;
+               data = p->data;
+               wmb();
+               p->sync = 0;
+               if (f)
+                       f(data);
+       }
+}
+
diff --git a/xen-2.4.16/common/timer.c b/xen-2.4.16/common/timer.c
new file mode 100644 (file)
index 0000000..3882753
--- /dev/null
@@ -0,0 +1,621 @@
+/*
+ *  linux/kernel/timer.c
+ *
+ *  Kernel internal timers, kernel timekeeping, basic process system calls
+ *
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ *
+ *  1997-01-28  Modified by Finn Arne Gangstad to make timers scale better.
+ *
+ *  1997-09-10  Updated NTP code according to technical memorandum Jan '96
+ *              "A Kernel Model for Precision Timekeeping" by Dave Mills
+ *  1998-12-24  Fixed a xtime SMP race (we need the xtime_lock rw spinlock to
+ *              serialize accesses to xtime/lost_ticks).
+ *                              Copyright (C) 1998  Andrea Arcangeli
+ *  1999-03-10  Improved NTP compatibility by Ulrich Windl
+ */
+
+#include <linux/config.h>
+#include <linux/mm.h>
+#include <linux/time.h>
+#include <linux/timer.h>
+#include <linux/timex.h>
+#include <linux/tqueue.h>
+#include <linux/delay.h>
+//#include <linux/smp_lock.h>
+#include <linux/interrupt.h>
+//#include <linux/kernel_stat.h>
+
+#include <xeno/event.h>
+
+#include <asm/uaccess.h>
+
+/*
+ * Timekeeping variables
+ */
+
+long tick = (1000000 + HZ/2) / HZ;     /* timer interrupt period */
+
+/* The current time */
+struct timeval xtime __attribute__ ((aligned (16)));
+
+/* Don't completely fail for HZ > 500.  */
+int tickadj = 500/HZ ? : 1;            /* microsecs */
+
+DECLARE_TASK_QUEUE(tq_timer);
+DECLARE_TASK_QUEUE(tq_immediate);
+
+/*
+ * phase-lock loop variables
+ */
+/* TIME_ERROR prevents overwriting the CMOS clock */
+int time_state = TIME_OK;              /* clock synchronization status */
+int time_status = STA_UNSYNC;          /* clock status bits            */
+long time_offset;                      /* time adjustment (us)         */
+long time_constant = 2;                        /* pll time constant            */
+long time_tolerance = MAXFREQ;         /* frequency tolerance (ppm)    */
+long time_precision = 1;               /* clock precision (us)         */
+long time_maxerror = NTP_PHASE_LIMIT;  /* maximum error (us)           */
+long time_esterror = NTP_PHASE_LIMIT;  /* estimated error (us)         */
+long time_phase;                       /* phase offset (scaled us)     */
+long time_freq = ((1000000 + HZ/2) % HZ - HZ/2) << SHIFT_USEC;
+                                       /* frequency offset (scaled ppm)*/
+long time_adj;                         /* tick adjust (scaled 1 / HZ)  */
+long time_reftime;                     /* time at last adjustment (s)  */
+
+long time_adjust;
+long time_adjust_step;
+
+unsigned long event;
+
+unsigned long volatile jiffies;
+
+unsigned int * prof_buffer;
+unsigned long prof_len;
+unsigned long prof_shift;
+
+/*
+ * Event timer code
+ */
+#define TVN_BITS 6
+#define TVR_BITS 8
+#define TVN_SIZE (1 << TVN_BITS)
+#define TVR_SIZE (1 << TVR_BITS)
+#define TVN_MASK (TVN_SIZE - 1)
+#define TVR_MASK (TVR_SIZE - 1)
+
+struct timer_vec {
+       int index;
+       struct list_head vec[TVN_SIZE];
+};
+
+struct timer_vec_root {
+       int index;
+       struct list_head vec[TVR_SIZE];
+};
+
+static struct timer_vec tv5;
+static struct timer_vec tv4;
+static struct timer_vec tv3;
+static struct timer_vec tv2;
+static struct timer_vec_root tv1;
+
+static struct timer_vec * const tvecs[] = {
+       (struct timer_vec *)&tv1, &tv2, &tv3, &tv4, &tv5
+};
+
+#define NOOF_TVECS (sizeof(tvecs) / sizeof(tvecs[0]))
+
+void init_timervecs (void)
+{
+       int i;
+
+       for (i = 0; i < TVN_SIZE; i++) {
+               INIT_LIST_HEAD(tv5.vec + i);
+               INIT_LIST_HEAD(tv4.vec + i);
+               INIT_LIST_HEAD(tv3.vec + i);
+               INIT_LIST_HEAD(tv2.vec + i);
+       }
+       for (i = 0; i < TVR_SIZE; i++)
+               INIT_LIST_HEAD(tv1.vec + i);
+}
+
+static unsigned long timer_jiffies;
+
+static inline void internal_add_timer(struct timer_list *timer)
+{
+       /*
+        * must be cli-ed when calling this
+        */
+       unsigned long expires = timer->expires;
+       unsigned long idx = expires - timer_jiffies;
+       struct list_head * vec;
+
+       if (idx < TVR_SIZE) {
+               int i = expires & TVR_MASK;
+               vec = tv1.vec + i;
+       } else if (idx < 1 << (TVR_BITS + TVN_BITS)) {
+               int i = (expires >> TVR_BITS) & TVN_MASK;
+               vec = tv2.vec + i;
+       } else if (idx < 1 << (TVR_BITS + 2 * TVN_BITS)) {
+               int i = (expires >> (TVR_BITS + TVN_BITS)) & TVN_MASK;
+               vec =  tv3.vec + i;
+       } else if (idx < 1 << (TVR_BITS + 3 * TVN_BITS)) {
+               int i = (expires >> (TVR_BITS + 2 * TVN_BITS)) & TVN_MASK;
+               vec = tv4.vec + i;
+       } else if ((signed long) idx < 0) {
+               /* can happen if you add a timer with expires == jiffies,
+                * or you set a timer to go off in the past
+                */
+               vec = tv1.vec + tv1.index;
+       } else if (idx <= 0xffffffffUL) {
+               int i = (expires >> (TVR_BITS + 3 * TVN_BITS)) & TVN_MASK;
+               vec = tv5.vec + i;
+       } else {
+               /* Can only get here on architectures with 64-bit jiffies */
+               INIT_LIST_HEAD(&timer->list);
+               return;
+       }
+       /*
+        * Timers are FIFO!
+        */
+       list_add(&timer->list, vec->prev);
+}
+
+/* Initialize both explicitly - let's try to have them in the same cache line */
+spinlock_t timerlist_lock = SPIN_LOCK_UNLOCKED;
+
+#ifdef CONFIG_SMP
+volatile struct timer_list * volatile running_timer;
+#define timer_enter(t) do { running_timer = t; mb(); } while (0)
+#define timer_exit() do { running_timer = NULL; } while (0)
+#define timer_is_running(t) (running_timer == t)
+#define timer_synchronize(t) while (timer_is_running(t)) barrier()
+#else
+#define timer_enter(t)         do { } while (0)
+#define timer_exit()           do { } while (0)
+#endif
+
+void add_timer(struct timer_list *timer)
+{
+       unsigned long flags;
+
+       spin_lock_irqsave(&timerlist_lock, flags);
+       if (timer_pending(timer))
+               goto bug;
+       internal_add_timer(timer);
+       spin_unlock_irqrestore(&timerlist_lock, flags);
+       return;
+bug:
+       spin_unlock_irqrestore(&timerlist_lock, flags);
+       printk("bug: kernel timer added twice at %p.\n",
+                       __builtin_return_address(0));
+}
+
+static inline int detach_timer (struct timer_list *timer)
+{
+       if (!timer_pending(timer))
+               return 0;
+       list_del(&timer->list);
+       return 1;
+}
+
+int mod_timer(struct timer_list *timer, unsigned long expires)
+{
+       int ret;
+       unsigned long flags;
+
+       spin_lock_irqsave(&timerlist_lock, flags);
+       timer->expires = expires;
+       ret = detach_timer(timer);
+       internal_add_timer(timer);
+       spin_unlock_irqrestore(&timerlist_lock, flags);
+       return ret;
+}
+
+int del_timer(struct timer_list * timer)
+{
+       int ret;
+       unsigned long flags;
+
+       spin_lock_irqsave(&timerlist_lock, flags);
+       ret = detach_timer(timer);
+       timer->list.next = timer->list.prev = NULL;
+       spin_unlock_irqrestore(&timerlist_lock, flags);
+       return ret;
+}
+
+#ifdef CONFIG_SMP
+void sync_timers(void)
+{
+       spin_unlock_wait(&global_bh_lock);
+}
+
+/*
+ * SMP specific function to delete periodic timer.
+ * Caller must disable by some means restarting the timer
+ * for new. Upon exit the timer is not queued and handler is not running
+ * on any CPU. It returns number of times, which timer was deleted
+ * (for reference counting).
+ */
+
+int del_timer_sync(struct timer_list * timer)
+{
+       int ret = 0;
+
+       for (;;) {
+               unsigned long flags;
+               int running;
+
+               spin_lock_irqsave(&timerlist_lock, flags);
+               ret += detach_timer(timer);
+               timer->list.next = timer->list.prev = 0;
+               running = timer_is_running(timer);
+               spin_unlock_irqrestore(&timerlist_lock, flags);
+
+               if (!running)
+                       break;
+
+               timer_synchronize(timer);
+       }
+
+       return ret;
+}
+#endif
+
+
+static inline void cascade_timers(struct timer_vec *tv)
+{
+       /* cascade all the timers from tv up one level */
+       struct list_head *head, *curr, *next;
+
+       head = tv->vec + tv->index;
+       curr = head->next;
+       /*
+        * We are removing _all_ timers from the list, so we don't  have to
+        * detach them individually, just clear the list afterwards.
+        */
+       while (curr != head) {
+               struct timer_list *tmp;
+
+               tmp = list_entry(curr, struct timer_list, list);
+               next = curr->next;
+               list_del(curr); // not needed
+               internal_add_timer(tmp);
+               curr = next;
+       }
+       INIT_LIST_HEAD(head);
+       tv->index = (tv->index + 1) & TVN_MASK;
+}
+
+static inline void run_timer_list(void)
+{
+       spin_lock_irq(&timerlist_lock);
+       while ((long)(jiffies - timer_jiffies) >= 0) {
+               struct list_head *head, *curr;
+               if (!tv1.index) {
+                       int n = 1;
+                       do {
+                               cascade_timers(tvecs[n]);
+                       } while (tvecs[n]->index == 1 && ++n < NOOF_TVECS);
+               }
+repeat:
+               head = tv1.vec + tv1.index;
+               curr = head->next;
+               if (curr != head) {
+                       struct timer_list *timer;
+                       void (*fn)(unsigned long);
+                       unsigned long data;
+
+                       timer = list_entry(curr, struct timer_list, list);
+                       fn = timer->function;
+                       data= timer->data;
+
+                       detach_timer(timer);
+                       timer->list.next = timer->list.prev = NULL;
+                       timer_enter(timer);
+                       spin_unlock_irq(&timerlist_lock);
+                       fn(data);
+                       spin_lock_irq(&timerlist_lock);
+                       timer_exit();
+                       goto repeat;
+               }
+               ++timer_jiffies; 
+               tv1.index = (tv1.index + 1) & TVR_MASK;
+       }
+       spin_unlock_irq(&timerlist_lock);
+}
+
+spinlock_t tqueue_lock = SPIN_LOCK_UNLOCKED;
+
+void tqueue_bh(void)
+{
+       run_task_queue(&tq_timer);
+}
+
+void immediate_bh(void)
+{
+       run_task_queue(&tq_immediate);
+}
+
+/*
+ * this routine handles the overflow of the microsecond field
+ *
+ * The tricky bits of code to handle the accurate clock support
+ * were provided by Dave Mills (Mills@UDEL.EDU) of NTP fame.
+ * They were originally developed for SUN and DEC kernels.
+ * All the kudos should go to Dave for this stuff.
+ *
+ */
+static void second_overflow(void)
+{
+    long ltemp;
+
+    /* Bump the maxerror field */
+    time_maxerror += time_tolerance >> SHIFT_USEC;
+    if ( time_maxerror > NTP_PHASE_LIMIT ) {
+       time_maxerror = NTP_PHASE_LIMIT;
+       time_status |= STA_UNSYNC;
+    }
+
+    /*
+     * Leap second processing. If in leap-insert state at
+     * the end of the day, the system clock is set back one
+     * second; if in leap-delete state, the system clock is
+     * set ahead one second. The microtime() routine or
+     * external clock driver will insure that reported time
+     * is always monotonic. The ugly divides should be
+     * replaced.
+     */
+    switch (time_state) {
+
+    case TIME_OK:
+       if (time_status & STA_INS)
+           time_state = TIME_INS;
+       else if (time_status & STA_DEL)
+           time_state = TIME_DEL;
+       break;
+
+    case TIME_INS:
+       if (xtime.tv_sec % 86400 == 0) {
+           xtime.tv_sec--;
+           time_state = TIME_OOP;
+           printk(KERN_NOTICE "Clock: inserting leap second 23:59:60 UTC\n");
+       }
+       break;
+
+    case TIME_DEL:
+       if ((xtime.tv_sec + 1) % 86400 == 0) {
+           xtime.tv_sec++;
+           time_state = TIME_WAIT;
+           printk(KERN_NOTICE "Clock: deleting leap second 23:59:59 UTC\n");
+       }
+       break;
+
+    case TIME_OOP:
+       time_state = TIME_WAIT;
+       break;
+
+    case TIME_WAIT:
+       if (!(time_status & (STA_INS | STA_DEL)))
+           time_state = TIME_OK;
+    }
+
+    /*
+     * Compute the phase adjustment for the next second. In
+     * PLL mode, the offset is reduced by a fixed factor
+     * times the time constant. In FLL mode the offset is
+     * used directly. In either mode, the maximum phase
+     * adjustment for each second is clamped so as to spread
+     * the adjustment over not more than the number of
+     * seconds between updates.
+     */
+    if (time_offset < 0) {
+       ltemp = -time_offset;
+       if (!(time_status & STA_FLL))
+           ltemp >>= SHIFT_KG + time_constant;
+       if (ltemp > (MAXPHASE / MINSEC) << SHIFT_UPDATE)
+           ltemp = (MAXPHASE / MINSEC) << SHIFT_UPDATE;
+       time_offset += ltemp;
+       time_adj = -ltemp << (SHIFT_SCALE - SHIFT_HZ - SHIFT_UPDATE);
+    } else {
+       ltemp = time_offset;
+       if (!(time_status & STA_FLL))
+           ltemp >>= SHIFT_KG + time_constant;
+       if (ltemp > (MAXPHASE / MINSEC) << SHIFT_UPDATE)
+           ltemp = (MAXPHASE / MINSEC) << SHIFT_UPDATE;
+       time_offset -= ltemp;
+       time_adj = ltemp << (SHIFT_SCALE - SHIFT_HZ - SHIFT_UPDATE);
+    }
+
+    if (ltemp < 0)
+       time_adj -= -ltemp >>
+           (SHIFT_USEC + SHIFT_HZ - SHIFT_SCALE);
+    else
+       time_adj += ltemp >>
+           (SHIFT_USEC + SHIFT_HZ - SHIFT_SCALE);
+
+#if HZ == 100
+    /* Compensate for (HZ==100) != (1 << SHIFT_HZ).
+     * Add 25% and 3.125% to get 128.125; => only 0.125% error (p. 14)
+     */
+    if (time_adj < 0)
+       time_adj -= (-time_adj >> 2) + (-time_adj >> 5);
+    else
+       time_adj += (time_adj >> 2) + (time_adj >> 5);
+#endif
+}
+
+/* in the NTP reference this is called "hardclock()" */
+static void update_wall_time_one_tick(void)
+{
+       if ( (time_adjust_step = time_adjust) != 0 ) {
+           /* We are doing an adjtime thing. 
+            *
+            * Prepare time_adjust_step to be within bounds.
+            * Note that a positive time_adjust means we want the clock
+            * to run faster.
+            *
+            * Limit the amount of the step to be in the range
+            * -tickadj .. +tickadj
+            */
+            if (time_adjust > tickadj)
+               time_adjust_step = tickadj;
+            else if (time_adjust < -tickadj)
+               time_adjust_step = -tickadj;
+            
+           /* Reduce by this step the amount of time left  */
+           time_adjust -= time_adjust_step;
+       }
+       xtime.tv_usec += tick + time_adjust_step;
+       /*
+        * Advance the phase, once it gets to one microsecond, then
+        * advance the tick more.
+        */
+       time_phase += time_adj;
+       if (time_phase <= -FINEUSEC) {
+               long ltemp = -time_phase >> SHIFT_SCALE;
+               time_phase += ltemp << SHIFT_SCALE;
+               xtime.tv_usec -= ltemp;
+       }
+       else if (time_phase >= FINEUSEC) {
+               long ltemp = time_phase >> SHIFT_SCALE;
+               time_phase -= ltemp << SHIFT_SCALE;
+               xtime.tv_usec += ltemp;
+       }
+}
+
+/*
+ * Using a loop looks inefficient, but "ticks" is
+ * usually just one (we shouldn't be losing ticks,
+ * we're doing this this way mainly for interrupt
+ * latency reasons, not because we think we'll
+ * have lots of lost timer ticks
+ */
+static void update_wall_time(unsigned long ticks)
+{
+       do {
+               ticks--;
+               update_wall_time_one_tick();
+       } while (ticks);
+
+       if (xtime.tv_usec >= 1000000) {
+           xtime.tv_usec -= 1000000;
+           xtime.tv_sec++;
+           second_overflow();
+       }
+}
+
+static inline void do_process_times(struct task_struct *p,
+       unsigned long user, unsigned long system)
+{
+    //unsigned long psecs;
+
+//     psecs = (p->times.tms_utime += user);
+       //psecs += (p->times.tms_stime += system);
+}
+
+
+void update_one_process(struct task_struct *p, unsigned long user,
+                       unsigned long system, int cpu)
+{
+//     p->per_cpu_utime[cpu] += user;
+//     p->per_cpu_stime[cpu] += system;
+       do_process_times(p, user, system);
+}      
+
+/*
+ * Called from the timer interrupt handler to charge one tick to the current 
+ * process.  user_tick is 1 if the tick is user time, 0 for system.
+ */
+void update_process_times(int user_tick)
+{
+    struct task_struct *p = current;
+    int cpu = smp_processor_id(), system = user_tick ^ 1;
+    
+    update_one_process(p, user_tick, system, cpu);
+    
+    if ( --p->counter <= 0 )
+    {
+        p->counter = 0;
+        set_bit(_HYP_EVENT_NEED_RESCHED, &p->hyp_events);
+    }
+}
+
+
+/* jiffies at the most recent update of wall time */
+unsigned long wall_jiffies;
+
+/*
+ * This spinlock protect us from races in SMP while playing with xtime. -arca
+ */
+rwlock_t xtime_lock = RW_LOCK_UNLOCKED;
+
+static inline void update_times(void)
+{
+       unsigned long ticks;
+
+       /*
+        * update_times() is run from the raw timer_bh handler so we
+        * just know that the irqs are locally enabled and so we don't
+        * need to save/restore the flags of the local CPU here. -arca
+        */
+       write_lock_irq(&xtime_lock);
+
+       ticks = jiffies - wall_jiffies;
+       if (ticks) {
+               wall_jiffies += ticks;
+               update_wall_time(ticks);
+       }
+       write_unlock_irq(&xtime_lock);
+}
+
+void timer_bh(void)
+{
+       update_times();
+       run_timer_list();
+}
+
+#include <xeno/errno.h>
+#include <xeno/sched.h>
+#include <xeno/lib.h>
+#include <xeno/config.h>
+#include <xeno/smp.h>
+#include <xeno/irq.h>
+#include <asm/msr.h>
+
+void do_timer(struct pt_regs *regs)
+{
+    struct task_struct *p;
+    shared_info_t *s;
+    unsigned long long wall;
+    unsigned long cpu_mask = 0;
+
+    (*(unsigned long *)&jiffies)++;
+
+    if ( !using_apic_timer )
+        update_process_times(user_mode(regs));
+
+    rdtscll(wall);
+
+    read_lock(&tasklist_lock);
+    p = &idle0_task;
+    do {
+        s = p->shared_info;
+        s->wall_time = s->domain_time = wall;
+        cpu_mask |= mark_guest_event(p, _EVENT_TIMER);
+    }
+    while ( (p = p->next_task) != &idle0_task );
+    read_unlock(&tasklist_lock);
+
+    guest_event_notify(cpu_mask);
+
+    mark_bh(TIMER_BH);
+    if (TQ_ACTIVE(tq_timer))
+        mark_bh(TQUEUE_BH);
+}
+
+void get_fast_time(struct timeval * tm)
+{
+        *tm=xtime;
+}
diff --git a/xen-2.4.16/common/vsprintf.c b/xen-2.4.16/common/vsprintf.c
new file mode 100644 (file)
index 0000000..fe17225
--- /dev/null
@@ -0,0 +1,713 @@
+/*
+ *  linux/lib/vsprintf.c
+ *
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ */
+
+/* vsprintf.c -- Lars Wirzenius & Linus Torvalds. */
+/*
+ * Wirzenius wrote this portably, Torvalds fucked it up :-)
+ */
+
+/* 
+ * Fri Jul 13 2001 Crutcher Dunnavant <crutcher+kernel@datastacks.com>
+ * - changed to provide snprintf and vsnprintf functions
+ */
+
+#include <stdarg.h>
+#include <xeno/ctype.h>
+#include <xeno/lib.h>
+
+/**
+ * simple_strtoul - convert a string to an unsigned long
+ * @cp: The start of the string
+ * @endp: A pointer to the end of the parsed string will be placed here
+ * @base: The number base to use
+ */
+unsigned long simple_strtoul(const char *cp,char **endp,unsigned int base)
+{
+    unsigned long result = 0,value;
+
+    if (!base) {
+        base = 10;
+        if (*cp == '0') {
+            base = 8;
+            cp++;
+            if ((*cp == 'x') && isxdigit(cp[1])) {
+                cp++;
+                base = 16;
+            }
+        }
+    }
+    while (isxdigit(*cp) &&
+           (value = isdigit(*cp) ? *cp-'0' : toupper(*cp)-'A'+10) < base) {
+        result = result*base + value;
+        cp++;
+    }
+    if (endp)
+        *endp = (char *)cp;
+    return result;
+}
+
+/**
+ * simple_strtol - convert a string to a signed long
+ * @cp: The start of the string
+ * @endp: A pointer to the end of the parsed string will be placed here
+ * @base: The number base to use
+ */
+long simple_strtol(const char *cp,char **endp,unsigned int base)
+{
+    if(*cp=='-')
+        return -simple_strtoul(cp+1,endp,base);
+    return simple_strtoul(cp,endp,base);
+}
+
+/**
+ * simple_strtoull - convert a string to an unsigned long long
+ * @cp: The start of the string
+ * @endp: A pointer to the end of the parsed string will be placed here
+ * @base: The number base to use
+ */
+unsigned long long simple_strtoull(const char *cp,char **endp,unsigned int base)
+{
+    unsigned long long result = 0,value;
+
+    if (!base) {
+        base = 10;
+        if (*cp == '0') {
+            base = 8;
+            cp++;
+            if ((*cp == 'x') && isxdigit(cp[1])) {
+                cp++;
+                base = 16;
+            }
+        }
+    }
+    while (isxdigit(*cp) && (value = isdigit(*cp) ? *cp-'0' : (islower(*cp)
+                                                               ? toupper(*cp) : *cp)-'A'+10) < base) {
+        result = result*base + value;
+        cp++;
+    }
+    if (endp)
+        *endp = (char *)cp;
+    return result;
+}
+
+/**
+ * simple_strtoll - convert a string to a signed long long
+ * @cp: The start of the string
+ * @endp: A pointer to the end of the parsed string will be placed here
+ * @base: The number base to use
+ */
+long long simple_strtoll(const char *cp,char **endp,unsigned int base)
+{
+    if(*cp=='-')
+        return -simple_strtoull(cp+1,endp,base);
+    return simple_strtoull(cp,endp,base);
+}
+
+static int skip_atoi(const char **s)
+{
+    int i=0;
+
+    while (isdigit(**s))
+        i = i*10 + *((*s)++) - '0';
+    return i;
+}
+
+#define ZEROPAD        1               /* pad with zero */
+#define SIGN   2               /* unsigned/signed long */
+#define PLUS   4               /* show plus */
+#define SPACE  8               /* space if plus */
+#define LEFT   16              /* left justified */
+#define SPECIAL        32              /* 0x */
+#define LARGE  64              /* use 'ABCDEF' instead of 'abcdef' */
+
+static char * number(char * buf, char * end, long num, int base, int size, int precision, int type)
+{
+    char c,sign,tmp[66];
+    const char *digits;
+    const char small_digits[] = "0123456789abcdefghijklmnopqrstuvwxyz";
+    const char large_digits[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
+    int i;
+
+    digits = (type & LARGE) ? large_digits : small_digits;
+    if (type & LEFT)
+        type &= ~ZEROPAD;
+    if (base < 2 || base > 36)
+        return buf;
+    c = (type & ZEROPAD) ? '0' : ' ';
+    sign = 0;
+    if (type & SIGN) {
+        if (num < 0) {
+            sign = '-';
+            num = -num;
+            size--;
+        } else if (type & PLUS) {
+            sign = '+';
+            size--;
+        } else if (type & SPACE) {
+            sign = ' ';
+            size--;
+        }
+    }
+    if (type & SPECIAL) {
+        if (base == 16)
+            size -= 2;
+        else if (base == 8)
+            size--;
+    }
+    i = 0;
+    if (num == 0)
+        tmp[i++]='0';
+    else 
+    {
+        /* XXX KAF: force unsigned mod and div. */
+        unsigned long num2=(unsigned long)num;
+        unsigned int base2=(unsigned int)base;
+        while (num2 != 0) { tmp[i++] = digits[num2%base2]; num2 /= base2; }
+    }
+    if (i > precision)
+        precision = i;
+    size -= precision;
+    if (!(type&(ZEROPAD+LEFT))) {
+        while(size-->0) {
+            if (buf <= end)
+                *buf = ' ';
+            ++buf;
+        }
+    }
+    if (sign) {
+        if (buf <= end)
+            *buf = sign;
+        ++buf;
+    }
+    if (type & SPECIAL) {
+        if (base==8) {
+            if (buf <= end)
+                *buf = '0';
+            ++buf;
+        } else if (base==16) {
+            if (buf <= end)
+                *buf = '0';
+            ++buf;
+            if (buf <= end)
+                *buf = digits[33];
+            ++buf;
+        }
+    }
+    if (!(type & LEFT)) {
+        while (size-- > 0) {
+            if (buf <= end)
+                *buf = c;
+            ++buf;
+        }
+    }
+    while (i < precision--) {
+        if (buf <= end)
+            *buf = '0';
+        ++buf;
+    }
+    while (i-- > 0) {
+        if (buf <= end)
+            *buf = tmp[i];
+        ++buf;
+    }
+    while (size-- > 0) {
+        if (buf <= end)
+            *buf = ' ';
+        ++buf;
+    }
+    return buf;
+}
+
+/**
+* vsnprintf - Format a string and place it in a buffer
+* @buf: The buffer to place the result into
+* @size: The size of the buffer, including the trailing null space
+* @fmt: The format string to use
+* @args: Arguments for the format string
+*
+* Call this function if you are already dealing with a va_list.
+* You probably want snprintf instead.
+ */
+int vsnprintf(char *buf, size_t size, const char *fmt, va_list args)
+{
+    int len;
+    unsigned long long num;
+    int i, base;
+    char *str, *end, c;
+    const char *s;
+
+    int flags;         /* flags to number() */
+
+    int field_width;   /* width of output field */
+    int precision;             /* min. # of digits for integers; max
+                                  number of chars for from string */
+    int qualifier;             /* 'h', 'l', or 'L' for integer fields */
+                               /* 'z' support added 23/7/1999 S.H.    */
+                               /* 'z' changed to 'Z' --davidm 1/25/99 */
+
+    str = buf;
+    end = buf + size - 1;
+
+    if (end < buf - 1) {
+        end = ((void *) -1);
+        size = end - buf + 1;
+    }
+
+    for (; *fmt ; ++fmt) {
+        if (*fmt != '%') {
+            if (str <= end)
+                *str = *fmt;
+            ++str;
+            continue;
+        }
+
+        /* process flags */
+        flags = 0;
+    repeat:
+        ++fmt;         /* this also skips first '%' */
+        switch (*fmt) {
+        case '-': flags |= LEFT; goto repeat;
+        case '+': flags |= PLUS; goto repeat;
+        case ' ': flags |= SPACE; goto repeat;
+        case '#': flags |= SPECIAL; goto repeat;
+        case '0': flags |= ZEROPAD; goto repeat;
+        }
+
+        /* get field width */
+        field_width = -1;
+        if (isdigit(*fmt))
+            field_width = skip_atoi(&fmt);
+        else if (*fmt == '*') {
+            ++fmt;
+            /* it's the next argument */
+            field_width = va_arg(args, int);
+            if (field_width < 0) {
+                field_width = -field_width;
+                flags |= LEFT;
+            }
+        }
+
+        /* get the precision */
+        precision = -1;
+        if (*fmt == '.') {
+            ++fmt;     
+            if (isdigit(*fmt))
+                precision = skip_atoi(&fmt);
+            else if (*fmt == '*') {
+                ++fmt;
+                               /* it's the next argument */
+                precision = va_arg(args, int);
+            }
+            if (precision < 0)
+                precision = 0;
+        }
+
+        /* get the conversion qualifier */
+        qualifier = -1;
+        if (*fmt == 'h' || *fmt == 'l' || *fmt == 'L' || *fmt =='Z') {
+            qualifier = *fmt;
+            ++fmt;
+            if (qualifier == 'l' && *fmt == 'l') {
+                qualifier = 'L';
+                ++fmt;
+            }
+        }
+
+        /* default base */
+        base = 10;
+
+        switch (*fmt) {
+        case 'c':
+            if (!(flags & LEFT)) {
+                while (--field_width > 0) {
+                    if (str <= end)
+                        *str = ' ';
+                    ++str;
+                }
+            }
+            c = (unsigned char) va_arg(args, int);
+            if (str <= end)
+                *str = c;
+            ++str;
+            while (--field_width > 0) {
+                if (str <= end)
+                    *str = ' ';
+                ++str;
+            }
+            continue;
+
+        case 's':
+            s = va_arg(args, char *);
+            if (!s)
+                s = "<NULL>";
+
+            len = strnlen(s, precision);
+
+            if (!(flags & LEFT)) {
+                while (len < field_width--) {
+                    if (str <= end)
+                        *str = ' ';
+                    ++str;
+                }
+            }
+            for (i = 0; i < len; ++i) {
+                if (str <= end)
+                    *str = *s;
+                ++str; ++s;
+            }
+            while (len < field_width--) {
+                if (str <= end)
+                    *str = ' ';
+                ++str;
+            }
+            continue;
+
+        case 'p':
+            if (field_width == -1) {
+                field_width = 2*sizeof(void *);
+                flags |= ZEROPAD;
+            }
+            str = number(str, end,
+                         (unsigned long) va_arg(args, void *),
+                         16, field_width, precision, flags);
+            continue;
+
+
+        case 'n':
+                               /* FIXME:
+                                 * What does C99 say about the overflow case here? */
+            if (qualifier == 'l') {
+                long * ip = va_arg(args, long *);
+                *ip = (str - buf);
+            } else if (qualifier == 'Z') {
+                size_t * ip = va_arg(args, size_t *);
+                *ip = (str - buf);
+            } else {
+                int * ip = va_arg(args, int *);
+                *ip = (str - buf);
+            }
+            continue;
+
+        case '%':
+            if (str <= end)
+                *str = '%';
+            ++str;
+            continue;
+
+                               /* integer number formats - set up the flags and "break" */
+        case 'o':
+            base = 8;
+            break;
+
+        case 'X':
+            flags |= LARGE;
+        case 'x':
+            base = 16;
+            break;
+
+        case 'd':
+        case 'i':
+            flags |= SIGN;
+        case 'u':
+            break;
+
+        default:
+            if (str <= end)
+                *str = '%';
+            ++str;
+            if (*fmt) {
+                if (str <= end)
+                    *str = *fmt;
+                ++str;
+            } else {
+                --fmt;
+            }
+            continue;
+        }
+        if (qualifier == 'L')
+            num = va_arg(args, long long);
+        else if (qualifier == 'l') {
+            num = va_arg(args, unsigned long);
+            if (flags & SIGN)
+                num = (signed long) num;
+        } else if (qualifier == 'Z') {
+            num = va_arg(args, size_t);
+        } else if (qualifier == 'h') {
+            num = (unsigned short) va_arg(args, int);
+            if (flags & SIGN)
+                num = (signed short) num;
+        } else {
+            num = va_arg(args, unsigned int);
+            if (flags & SIGN)
+                num = (signed int) num;
+        }
+
+        str = number(str, end, num, base,
+                     field_width, precision, flags);
+    }
+    if (str <= end)
+        *str = '\0';
+    else if (size > 0)
+        /* don't write out a null byte if the buf size is zero */
+        *end = '\0';
+    /* the trailing null byte doesn't count towards the total
+     * ++str;
+     */
+    return str-buf;
+}
+
+/**
+ * snprintf - Format a string and place it in a buffer
+ * @buf: The buffer to place the result into
+ * @size: The size of the buffer, including the trailing null space
+ * @fmt: The format string to use
+ * @...: Arguments for the format string
+ */
+int snprintf(char * buf, size_t size, const char *fmt, ...)
+{
+    va_list args;
+    int i;
+
+    va_start(args, fmt);
+    i=vsnprintf(buf,size,fmt,args);
+    va_end(args);
+    return i;
+}
+
+/**
+ * vsprintf - Format a string and place it in a buffer
+ * @buf: The buffer to place the result into
+ * @fmt: The format string to use
+ * @args: Arguments for the format string
+ *
+ * Call this function if you are already dealing with a va_list.
+ * You probably want sprintf instead.
+ */
+int vsprintf(char *buf, const char *fmt, va_list args)
+{
+    return vsnprintf(buf, 0xFFFFFFFFUL, fmt, args);
+}
+
+
+/**
+ * sprintf - Format a string and place it in a buffer
+ * @buf: The buffer to place the result into
+ * @fmt: The format string to use
+ * @...: Arguments for the format string
+ */
+int sprintf(char * buf, const char *fmt, ...)
+{
+    va_list args;
+    int i;
+
+    va_start(args, fmt);
+    i=vsprintf(buf,fmt,args);
+    va_end(args);
+    return i;
+}
+
+/**
+ * vsscanf - Unformat a buffer into a list of arguments
+ * @buf:       input buffer
+ * @fmt:       format of buffer
+ * @args:      arguments
+ */
+int vsscanf(const char * buf, const char * fmt, va_list args)
+{
+    const char *str = buf;
+    char *next;
+    int num = 0;
+    int qualifier;
+    int base;
+    int field_width = -1;
+    int is_sign = 0;
+
+    while(*fmt && *str) {
+        /* skip any white space in format */
+        /* white space in format matchs any amount of
+         * white space, including none, in the input.
+         */
+        if (isspace(*fmt)) {
+            while (isspace(*fmt))
+                ++fmt;
+            while (isspace(*str))
+                ++str;
+        }
+
+        /* anything that is not a conversion must match exactly */
+        if (*fmt != '%' && *fmt) {
+            if (*fmt++ != *str++)
+                break;
+            continue;
+        }
+
+        if (!*fmt)
+            break;
+        ++fmt;
+               
+        /* skip this conversion.
+         * advance both strings to next white space
+         */
+        if (*fmt == '*') {
+            while (!isspace(*fmt) && *fmt)
+                fmt++;
+            while (!isspace(*str) && *str)
+                str++;
+            continue;
+        }
+
+        /* get field width */
+        if (isdigit(*fmt))
+            field_width = skip_atoi(&fmt);
+
+        /* get conversion qualifier */
+        qualifier = -1;
+        if (*fmt == 'h' || *fmt == 'l' || *fmt == 'L' || *fmt == 'Z') {
+            qualifier = *fmt;
+            fmt++;
+        }
+        base = 10;
+        is_sign = 0;
+
+        if (!*fmt || !*str)
+            break;
+
+        switch(*fmt++) {
+        case 'c':
+        {
+            char *s = (char *) va_arg(args,char*);
+            if (field_width == -1)
+                field_width = 1;
+            do {
+                *s++ = *str++;
+            } while(field_width-- > 0 && *str);
+            num++;
+        }
+        continue;
+        case 's':
+        {
+            char *s = (char *) va_arg(args, char *);
+            if(field_width == -1)
+                field_width = INT_MAX;
+            /* first, skip leading white space in buffer */
+            while (isspace(*str))
+                str++;
+
+            /* now copy until next white space */
+            while (*str && !isspace(*str) && field_width--) {
+                *s++ = *str++;
+            }
+            *s = '\0';
+            num++;
+        }
+        continue;
+        case 'n':
+            /* return number of characters read so far */
+        {
+            int *i = (int *)va_arg(args,int*);
+            *i = str - buf;
+        }
+        continue;
+        case 'o':
+            base = 8;
+            break;
+        case 'x':
+        case 'X':
+            base = 16;
+            break;
+        case 'd':
+        case 'i':
+            is_sign = 1;
+        case 'u':
+            break;
+        case '%':
+            /* looking for '%' in str */
+            if (*str++ != '%') 
+                return num;
+            continue;
+        default:
+            /* invalid format; stop here */
+            return num;
+        }
+
+        /* have some sort of integer conversion.
+         * first, skip white space in buffer.
+         */
+        while (isspace(*str))
+            str++;
+
+        if (!*str || !isdigit(*str))
+            break;
+
+        switch(qualifier) {
+        case 'h':
+            if (is_sign) {
+                short *s = (short *) va_arg(args,short *);
+                *s = (short) simple_strtol(str,&next,base);
+            } else {
+                unsigned short *s = (unsigned short *) va_arg(args, unsigned short *);
+                *s = (unsigned short) simple_strtoul(str, &next, base);
+            }
+            break;
+        case 'l':
+            if (is_sign) {
+                long *l = (long *) va_arg(args,long *);
+                *l = simple_strtol(str,&next,base);
+            } else {
+                unsigned long *l = (unsigned long*) va_arg(args,unsigned long*);
+                *l = simple_strtoul(str,&next,base);
+            }
+            break;
+        case 'L':
+            if (is_sign) {
+                long long *l = (long long*) va_arg(args,long long *);
+                *l = simple_strtoll(str,&next,base);
+            } else {
+                unsigned long long *l = (unsigned long long*) va_arg(args,unsigned long long*);
+                *l = simple_strtoull(str,&next,base);
+            }
+            break;
+        case 'Z':
+        {
+            size_t *s = (size_t*) va_arg(args,size_t*);
+            *s = (size_t) simple_strtoul(str,&next,base);
+        }
+        break;
+        default:
+            if (is_sign) {
+                int *i = (int *) va_arg(args, int*);
+                *i = (int) simple_strtol(str,&next,base);
+            } else {
+                unsigned int *i = (unsigned int*) va_arg(args, unsigned int*);
+                *i = (unsigned int) simple_strtoul(str,&next,base);
+            }
+            break;
+        }
+        num++;
+
+        if (!next)
+            break;
+        str = next;
+    }
+    return num;
+}
+
+/**
+ * sscanf - Unformat a buffer into a list of arguments
+ * @buf:       input buffer
+ * @fmt:       formatting of buffer
+ * @...:       resulting arguments
+ */
+int sscanf(const char * buf, const char * fmt, ...)
+{
+    va_list args;
+    int i;
+
+    va_start(args,fmt);
+    i = vsscanf(buf,fmt,args);
+    va_end(args);
+    return i;
+}
diff --git a/xen-2.4.16/drivers/Makefile b/xen-2.4.16/drivers/Makefile
new file mode 100644 (file)
index 0000000..5aa320f
--- /dev/null
@@ -0,0 +1,12 @@
+
+default:
+       $(MAKE) -C pci
+       $(MAKE) -C net
+       $(MAKE) -C block
+       $(MAKE) -C ide
+
+clean:
+       $(MAKE) -C pci clean
+       $(MAKE) -C net clean
+       $(MAKE) -C block clean
+       $(MAKE) -C ide clean
diff --git a/xen-2.4.16/drivers/block/Makefile b/xen-2.4.16/drivers/block/Makefile
new file mode 100644 (file)
index 0000000..574b7d2
--- /dev/null
@@ -0,0 +1,8 @@
+
+include $(BASEDIR)/Rules.mk
+
+default: $(OBJS)
+       $(LD) -r -o driver.o $(OBJS)
+
+clean:
+       rm -f *.o *~ core
diff --git a/xen-2.4.16/drivers/block/blkpg.c b/xen-2.4.16/drivers/block/blkpg.c
new file mode 100644 (file)
index 0000000..2e27a1a
--- /dev/null
@@ -0,0 +1,315 @@
+/*
+ * Partition table and disk geometry handling
+ *
+ * This obsoletes the partition-handling code in genhd.c:
+ * Userspace can look at a disk in arbitrary format and tell
+ * the kernel what partitions there are on the disk, and how
+ * these should be numbered.
+ * It also allows one to repartition a disk that is being used.
+ *
+ * A single ioctl with lots of subfunctions:
+ *
+ * Device number stuff:
+ *    get_whole_disk()          (given the device number of a partition, find
+ *                               the device number of the encompassing disk)
+ *    get_all_partitions()      (given the device number of a disk, return the
+ *                               device numbers of all its known partitions)
+ *
+ * Partition stuff:
+ *    add_partition()
+ *    delete_partition()
+ *    test_partition_in_use()   (also for test_disk_in_use)
+ *
+ * Geometry stuff:
+ *    get_geometry()
+ *    set_geometry()
+ *    get_bios_drivedata()
+ *
+ * For today, only the partition stuff - aeb, 990515
+ */
+
+#include <xeno/config.h>
+#include <xeno/types.h>
+#include <xeno/errno.h>
+/*#include <xeno/fs.h> */              /* for BLKRASET, ... */
+#include <xeno/sched.h>                /* for capable() */
+#include <xeno/blk.h>                  /* for set_device_ro() */
+#include <xeno/blkpg.h>
+#include <xeno/genhd.h>
+/*#include <xeno/swap.h>*/                     /* for is_swap_partition() */
+#include <xeno/module.h>               /* for EXPORT_SYMBOL */
+
+#include <asm/uaccess.h>
+
+#define is_mounted(_dev)         (0)
+#define is_swap_partition(_dev)  (0)
+
+#define fsync_dev(_dev) (panic("fsync_dev???"))
+#define invalidate_buffers(_dev) (panic("invalidate_buffers???"))
+
+/*
+ * What is the data describing a partition?
+ *
+ * 1. a device number (kdev_t)
+ * 2. a starting sector and number of sectors (hd_struct)
+ *    given in the part[] array of the gendisk structure for the drive.
+ *
+ * The number of sectors is replicated in the sizes[] array of
+ * the gendisk structure for the major, which again is copied to
+ * the blk_size[][] array.
+ * (However, hd_struct has the number of 512-byte sectors,
+ *  g->sizes[] and blk_size[][] have the number of 1024-byte blocks.)
+ * Note that several drives may have the same major.
+ */
+
+/*
+ * Add a partition.
+ *
+ * returns: EINVAL: bad parameters
+ *          ENXIO: cannot find drive
+ *          EBUSY: proposed partition overlaps an existing one
+ *                 or has the same number as an existing one
+ *          0: all OK.
+ */
+int add_partition(kdev_t dev, struct blkpg_partition *p) {
+       struct gendisk *g;
+       long long ppstart, pplength;
+       long pstart, plength;
+       int i, drive, first_minor, end_minor, minor;
+
+       /* convert bytes to sectors, check for fit in a hd_struct */
+       ppstart = (p->start >> 9);
+       pplength = (p->length >> 9);
+       pstart = ppstart;
+       plength = pplength;
+       if (pstart != ppstart || plength != pplength
+           || pstart < 0 || plength < 0)
+               return -EINVAL;
+
+       /* find the drive major */
+       g = get_gendisk(dev);
+       if (!g)
+               return -ENXIO;
+
+       /* existing drive? */
+       drive = (MINOR(dev) >> g->minor_shift);
+       first_minor = (drive << g->minor_shift);
+       end_minor   = first_minor + g->max_p;
+       if (drive >= g->nr_real)
+               return -ENXIO;
+
+       /* drive and partition number OK? */
+       if (first_minor != MINOR(dev) || p->pno <= 0 || p->pno >= g->max_p)
+               return -EINVAL;
+
+       /* partition number in use? */
+       minor = first_minor + p->pno;
+       if (g->part[minor].nr_sects != 0)
+               return -EBUSY;
+
+       /* overlap? */
+       for (i=first_minor+1; i<end_minor; i++)
+               if (!(pstart+plength <= g->part[i].start_sect ||
+                     pstart >= g->part[i].start_sect + g->part[i].nr_sects))
+                       return -EBUSY;
+
+       /* all seems OK */
+       g->part[minor].start_sect = pstart;
+       g->part[minor].nr_sects = plength;
+       if (g->sizes)
+               g->sizes[minor] = (plength >> (BLOCK_SIZE_BITS - 9));
+#ifdef DEVFS_MUST_DIE
+       devfs_register_partitions (g, first_minor, 0);
+#endif
+       return 0;
+}
+
+/*
+ * Delete a partition given by partition number
+ *
+ * returns: EINVAL: bad parameters
+ *          ENXIO: cannot find partition
+ *          EBUSY: partition is busy
+ *          0: all OK.
+ *
+ * Note that the dev argument refers to the entire disk, not the partition.
+ */
+int del_partition(kdev_t dev, struct blkpg_partition *p) {
+       struct gendisk *g;
+       kdev_t devp;
+       int drive, first_minor, minor;
+
+       /* find the drive major */
+       g = get_gendisk(dev);
+       if (!g)
+               return -ENXIO;
+
+       /* drive and partition number OK? */
+       drive = (MINOR(dev) >> g->minor_shift);
+       first_minor = (drive << g->minor_shift);
+       if (first_minor != MINOR(dev) || p->pno <= 0 || p->pno >= g->max_p)
+               return -EINVAL;
+
+       /* existing drive and partition? */
+       minor = first_minor + p->pno;
+       if (drive >= g->nr_real || g->part[minor].nr_sects == 0)
+               return -ENXIO;
+
+       /* partition in use? Incomplete check for now. */
+       devp = MKDEV(MAJOR(dev), minor);
+       if (is_mounted(devp) || is_swap_partition(devp))
+               return -EBUSY;
+
+       /* all seems OK */
+       fsync_dev(devp);
+       invalidate_buffers(devp);
+
+       g->part[minor].start_sect = 0;
+       g->part[minor].nr_sects = 0;
+       if (g->sizes)
+               g->sizes[minor] = 0;
+#ifdef DEVFS_MUST_DIE
+       devfs_register_partitions (g, first_minor, 0);
+#endif
+
+       return 0;
+}
+
+int blkpg_ioctl(kdev_t dev, struct blkpg_ioctl_arg *arg)
+{
+       struct blkpg_ioctl_arg a;
+       struct blkpg_partition p;
+       int len;
+
+       if (copy_from_user(&a, arg, sizeof(struct blkpg_ioctl_arg)))
+               return -EFAULT;
+
+       switch (a.op) {
+               case BLKPG_ADD_PARTITION:
+               case BLKPG_DEL_PARTITION:
+                       len = a.datalen;
+                       if (len < sizeof(struct blkpg_partition))
+                               return -EINVAL;
+                       if (copy_from_user(&p, a.data, sizeof(struct blkpg_partition)))
+                               return -EFAULT;
+                       if (!capable(CAP_SYS_ADMIN))
+                               return -EACCES;
+                       if (a.op == BLKPG_ADD_PARTITION)
+                               return add_partition(dev, &p);
+                       else
+                               return del_partition(dev, &p);
+               default:
+                       return -EINVAL;
+       }
+}
+
+/*
+ * Common ioctl's for block devices
+ */
+
+int blk_ioctl(kdev_t dev, unsigned int cmd, unsigned long arg)
+{
+#if 1
+    printk("May want to check out blk_ioctl...\n");
+    return -EINVAL;
+#else
+       struct gendisk *g;
+       u64 ullval = 0;
+       int intval;
+
+       if (!dev)
+               return -EINVAL;
+
+       switch (cmd) {
+               case BLKROSET:
+                       if (!capable(CAP_SYS_ADMIN))
+                               return -EACCES;
+                       if (get_user(intval, (int *)(arg)))
+                               return -EFAULT;
+                       set_device_ro(dev, intval);
+                       return 0;
+               case BLKROGET:
+                       intval = (is_read_only(dev) != 0);
+                       return put_user(intval, (int *)(arg));
+
+               case BLKRASET:
+                       if(!capable(CAP_SYS_ADMIN))
+                               return -EACCES;
+                       if(arg > 0xff)
+                               return -EINVAL;
+                       read_ahead[MAJOR(dev)] = arg;
+                       return 0;
+               case BLKRAGET:
+                       if (!arg)
+                               return -EINVAL;
+                       return put_user(read_ahead[MAJOR(dev)], (long *) arg);
+
+               case BLKFLSBUF:
+                       if(!capable(CAP_SYS_ADMIN))
+                               return -EACCES;
+                       fsync_dev(dev);
+                       invalidate_buffers(dev);
+                       return 0;
+
+               case BLKSSZGET:
+                       /* get block device sector size as needed e.g. by fdisk */
+                       intval = get_hardsect_size(dev);
+                       return put_user(intval, (int *) arg);
+
+               case BLKGETSIZE:
+               case BLKGETSIZE64:
+                       g = get_gendisk(dev);
+                       if (g)
+                               ullval = g->part[MINOR(dev)].nr_sects;
+
+                       if (cmd == BLKGETSIZE)
+                               return put_user((unsigned long)ullval, (unsigned long *)arg);
+                       else
+                               return put_user(ullval << 9, (u64 *)arg);
+#if 0
+               case BLKRRPART: /* Re-read partition tables */
+                       if (!capable(CAP_SYS_ADMIN)) 
+                               return -EACCES;
+                       return reread_partitions(dev, 1);
+#endif
+
+               case BLKPG:
+                       return blkpg_ioctl(dev, (struct blkpg_ioctl_arg *) arg);
+                       
+               case BLKELVGET:
+                       return blkelvget_ioctl(&blk_get_queue(dev)->elevator,
+                                              (blkelv_ioctl_arg_t *) arg);
+               case BLKELVSET:
+                       return blkelvset_ioctl(&blk_get_queue(dev)->elevator,
+                                              (blkelv_ioctl_arg_t *) arg);
+
+               case BLKBSZGET:
+                       /* get the logical block size (cf. BLKSSZGET) */
+                       intval = BLOCK_SIZE;
+                       if (blksize_size[MAJOR(dev)])
+                               intval = blksize_size[MAJOR(dev)][MINOR(dev)];
+                       return put_user (intval, (int *) arg);
+
+               case BLKBSZSET:
+                       /* set the logical block size */
+                       if (!capable (CAP_SYS_ADMIN))
+                               return -EACCES;
+                       if (!dev || !arg)
+                               return -EINVAL;
+                       if (get_user (intval, (int *) arg))
+                               return -EFAULT;
+                       if (intval > PAGE_SIZE || intval < 512 ||
+                           (intval & (intval - 1)))
+                               return -EINVAL;
+                       if (is_mounted (dev) || is_swap_partition (dev))
+                               return -EBUSY;
+                       set_blocksize (dev, intval);
+                       return 0;
+
+               default:
+                       return -EINVAL;
+       }
+#endif
+}
+
+EXPORT_SYMBOL(blk_ioctl);
diff --git a/xen-2.4.16/drivers/block/elevator.c b/xen-2.4.16/drivers/block/elevator.c
new file mode 100644 (file)
index 0000000..281e8f8
--- /dev/null
@@ -0,0 +1,224 @@
+/*
+ *  linux/drivers/block/elevator.c
+ *
+ *  Block device elevator/IO-scheduler.
+ *
+ *  Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE
+ *
+ * 30042000 Jens Axboe <axboe@suse.de> :
+ *
+ * Split the elevator a bit so that it is possible to choose a different
+ * one or even write a new "plug in". There are three pieces:
+ * - elevator_fn, inserts a new request in the queue list
+ * - elevator_merge_fn, decides whether a new buffer can be merged with
+ *   an existing request
+ * - elevator_dequeue_fn, called when a request is taken off the active list
+ *
+ * 20082000 Dave Jones <davej@suse.de> :
+ * Removed tests for max-bomb-segments, which was breaking elvtune
+ *  when run without -bN
+ *
+ */
+
+#include <xeno/config.h>
+#include <xeno/types.h>
+/*#include <xeno/fs.h>*/
+#include <xeno/blkdev.h>
+#include <xeno/elevator.h>
+#include <xeno/blk.h>
+#include <xeno/module.h>
+#include <asm/uaccess.h>
+
+/*
+ * This is a bit tricky. It's given that bh and rq are for the same
+ * device, but the next request might of course not be. Run through
+ * the tests below to check if we want to insert here if we can't merge
+ * bh into an existing request
+ */
+inline int bh_rq_in_between(struct buffer_head *bh, struct request *rq,
+                           struct list_head *head)
+{
+       struct list_head *next;
+       struct request *next_rq;
+
+       next = rq->queue.next;
+       if (next == head)
+               return 0;
+
+       /*
+        * if the device is different (usually on a different partition),
+        * just check if bh is after rq
+        */
+       next_rq = blkdev_entry_to_request(next);
+       if (next_rq->rq_dev != rq->rq_dev)
+               return bh->b_rsector > rq->sector;
+
+       /*
+        * ok, rq, next_rq and bh are on the same device. if bh is in between
+        * the two, this is the sweet spot
+        */
+       if (bh->b_rsector < next_rq->sector && bh->b_rsector > rq->sector)
+               return 1;
+
+       /*
+        * next_rq is ordered wrt rq, but bh is not in between the two
+        */
+       if (next_rq->sector > rq->sector)
+               return 0;
+
+       /*
+        * next_rq and rq not ordered, if we happen to be either before
+        * next_rq or after rq insert here anyway
+        */
+       if (bh->b_rsector > rq->sector || bh->b_rsector < next_rq->sector)
+               return 1;
+
+       return 0;
+}
+
+
+int elevator_linus_merge(request_queue_t *q, struct request **req,
+                        struct list_head * head,
+                        struct buffer_head *bh, int rw,
+                        int max_sectors)
+{
+       struct list_head *entry = &q->queue_head;
+       unsigned int count = bh->b_size >> 9, ret = ELEVATOR_NO_MERGE;
+
+       while ((entry = entry->prev) != head) {
+               struct request *__rq = blkdev_entry_to_request(entry);
+
+               /*
+                * simply "aging" of requests in queue
+                */
+               if (__rq->elevator_sequence-- <= 0)
+                       break;
+
+               if (__rq->waiting)
+                       continue;
+               if (__rq->rq_dev != bh->b_rdev)
+                       continue;
+               if (!*req && bh_rq_in_between(bh, __rq, &q->queue_head))
+                       *req = __rq;
+               if (__rq->cmd != rw)
+                       continue;
+               if (__rq->nr_sectors + count > max_sectors)
+                       continue;
+               if (__rq->elevator_sequence < count)
+                       break;
+               if (__rq->sector + __rq->nr_sectors == bh->b_rsector) {
+                       ret = ELEVATOR_BACK_MERGE;
+                       *req = __rq;
+                       break;
+               } else if (__rq->sector - count == bh->b_rsector) {
+                       ret = ELEVATOR_FRONT_MERGE;
+                       __rq->elevator_sequence -= count;
+                       *req = __rq;
+                       break;
+               }
+       }
+
+       return ret;
+}
+
+void elevator_linus_merge_cleanup(request_queue_t *q, struct request *req, int count)
+{
+       struct list_head *entry = &req->queue, *head = &q->queue_head;
+
+       /*
+        * second pass scan of requests that got passed over, if any
+        */
+       while ((entry = entry->next) != head) {
+               struct request *tmp = blkdev_entry_to_request(entry);
+               tmp->elevator_sequence -= count;
+       }
+}
+
+void elevator_linus_merge_req(struct request *req, struct request *next)
+{
+       if (next->elevator_sequence < req->elevator_sequence)
+               req->elevator_sequence = next->elevator_sequence;
+}
+
+/*
+ * See if we can find a request that this buffer can be coalesced with.
+ */
+int elevator_noop_merge(request_queue_t *q, struct request **req,
+                       struct list_head * head,
+                       struct buffer_head *bh, int rw,
+                       int max_sectors)
+{
+       struct list_head *entry;
+       unsigned int count = bh->b_size >> 9;
+
+       if (list_empty(&q->queue_head))
+               return ELEVATOR_NO_MERGE;
+
+       entry = &q->queue_head;
+       while ((entry = entry->prev) != head) {
+               struct request *__rq = blkdev_entry_to_request(entry);
+
+               if (__rq->cmd != rw)
+                       continue;
+               if (__rq->rq_dev != bh->b_rdev)
+                       continue;
+               if (__rq->nr_sectors + count > max_sectors)
+                       continue;
+               if (__rq->waiting)
+                       continue;
+               if (__rq->sector + __rq->nr_sectors == bh->b_rsector) {
+                       *req = __rq;
+                       return ELEVATOR_BACK_MERGE;
+               } else if (__rq->sector - count == bh->b_rsector) {
+                       *req = __rq;
+                       return ELEVATOR_FRONT_MERGE;
+               }
+       }
+
+       *req = blkdev_entry_to_request(q->queue_head.prev);
+       return ELEVATOR_NO_MERGE;
+}
+
+void elevator_noop_merge_cleanup(request_queue_t *q, struct request *req, int count) {}
+
+void elevator_noop_merge_req(struct request *req, struct request *next) {}
+
+int blkelvget_ioctl(elevator_t * elevator, blkelv_ioctl_arg_t * arg)
+{
+       blkelv_ioctl_arg_t output;
+
+       output.queue_ID                 = elevator->queue_ID;
+       output.read_latency             = elevator->read_latency;
+       output.write_latency            = elevator->write_latency;
+       output.max_bomb_segments        = 0;
+
+       if (copy_to_user(arg, &output, sizeof(blkelv_ioctl_arg_t)))
+               return -EFAULT;
+
+       return 0;
+}
+
+int blkelvset_ioctl(elevator_t * elevator, const blkelv_ioctl_arg_t * arg)
+{
+       blkelv_ioctl_arg_t input;
+
+       if (copy_from_user(&input, arg, sizeof(blkelv_ioctl_arg_t)))
+               return -EFAULT;
+
+       if (input.read_latency < 0)
+               return -EINVAL;
+       if (input.write_latency < 0)
+               return -EINVAL;
+
+       elevator->read_latency          = input.read_latency;
+       elevator->write_latency         = input.write_latency;
+       return 0;
+}
+
+void elevator_init(elevator_t * elevator, elevator_t type)
+{
+       static unsigned int queue_ID;
+
+       *elevator = type;
+       elevator->queue_ID = queue_ID++;
+}
diff --git a/xen-2.4.16/drivers/block/genhd.c b/xen-2.4.16/drivers/block/genhd.c
new file mode 100644 (file)
index 0000000..427c2cb
--- /dev/null
@@ -0,0 +1,219 @@
+/*
+ *  Code extracted from
+ *  linux/kernel/hd.c
+ *
+ *  Copyright (C) 1991-1998  Linus Torvalds
+ *
+ *  devfs support - jj, rgooch, 980122
+ *
+ *  Moved partition checking code to fs/partitions* - Russell King
+ *  (linux@arm.uk.linux.org)
+ */
+
+/*
+ * TODO:  rip out the remaining init crap from this file  --hch
+ */
+
+#include <xeno/config.h>
+#include <xeno/module.h>
+/*#include <xeno/fs.h>*/
+#include <xeno/genhd.h>
+#include <xeno/lib.h>
+#include <xeno/blk.h>
+#include <xeno/init.h>
+#include <xeno/spinlock.h>
+
+
+static rwlock_t gendisk_lock;
+
+/*
+ * Global kernel list of partitioning information.
+ *
+ * XXX: you should _never_ access this directly.
+ *     the only reason this is exported is source compatiblity.
+ */
+/*static*/ struct gendisk *gendisk_head;
+static struct gendisk *gendisk_array[MAX_BLKDEV];
+
+EXPORT_SYMBOL(gendisk_head);
+
+
+/**
+ * add_gendisk - add partitioning information to kernel list
+ * @gp: per-device partitioning information
+ *
+ * This function registers the partitioning information in @gp
+ * with the kernel.
+ */
+void
+add_gendisk(struct gendisk *gp)
+{
+       struct gendisk *sgp;
+
+       write_lock(&gendisk_lock);
+
+       /*
+        *      In 2.5 this will go away. Fix the drivers who rely on
+        *      old behaviour.
+        */
+
+       for (sgp = gendisk_head; sgp; sgp = sgp->next)
+       {
+               if (sgp == gp)
+               {
+//                     printk(KERN_ERR "add_gendisk: device major %d is buggy and added a live gendisk!\n",
+//                             sgp->major)
+                       goto out;
+               }
+       }
+       gendisk_array[gp->major] = gp;
+       gp->next = gendisk_head;
+       gendisk_head = gp;
+out:
+       write_unlock(&gendisk_lock);
+}
+
+EXPORT_SYMBOL(add_gendisk);
+
+
+/**
+ * del_gendisk - remove partitioning information from kernel list
+ * @gp: per-device partitioning information
+ *
+ * This function unregisters the partitioning information in @gp
+ * with the kernel.
+ */
+void
+del_gendisk(struct gendisk *gp)
+{
+       struct gendisk **gpp;
+
+       write_lock(&gendisk_lock);
+       gendisk_array[gp->major] = NULL;
+       for (gpp = &gendisk_head; *gpp; gpp = &((*gpp)->next))
+               if (*gpp == gp)
+                       break;
+       if (*gpp)
+               *gpp = (*gpp)->next;
+       write_unlock(&gendisk_lock);
+}
+
+EXPORT_SYMBOL(del_gendisk);
+
+
+/**
+ * get_gendisk - get partitioning information for a given device
+ * @dev: device to get partitioning information for
+ *
+ * This function gets the structure containing partitioning
+ * information for the given device @dev.
+ */
+struct gendisk *
+get_gendisk(kdev_t dev)
+{
+       struct gendisk *gp = NULL;
+       int maj = MAJOR(dev);
+
+       read_lock(&gendisk_lock);
+       if ((gp = gendisk_array[maj]))
+               goto out;
+
+       /* This is needed for early 2.4 source compatiblity.  --hch */
+       for (gp = gendisk_head; gp; gp = gp->next)
+               if (gp->major == maj)
+                       break;
+out:
+       read_unlock(&gendisk_lock);
+       return gp;
+}
+
+EXPORT_SYMBOL(get_gendisk);
+
+
+/**
+ * walk_gendisk - issue a command for every registered gendisk
+ * @walk: user-specified callback
+ * @data: opaque data for the callback
+ *
+ * This function walks through the gendisk chain and calls back
+ * into @walk for every element.
+ */
+int
+walk_gendisk(int (*walk)(struct gendisk *, void *), void *data)
+{
+       struct gendisk *gp;
+       int error = 0;
+
+       read_lock(&gendisk_lock);
+       for (gp = gendisk_head; gp; gp = gp->next)
+               if ((error = walk(gp, data)))
+                       break;
+       read_unlock(&gendisk_lock);
+
+       return error;
+}
+
+
+#ifdef CONFIG_PROC_FS
+int
+get_partition_list(char *page, char **start, off_t offset, int count)
+{
+       struct gendisk *gp;
+       struct hd_struct *hd;
+       char buf[64];
+       int len, n;
+
+       len = sprintf(page, "major minor  #blocks  name\n\n");
+               
+       read_lock(&gendisk_lock);
+       for (gp = gendisk_head; gp; gp = gp->next) {
+               for (n = 0; n < (gp->nr_real << gp->minor_shift); n++) {
+                       if (gp->part[n].nr_sects == 0)
+                               continue;
+
+                       hd = &gp->part[n]; disk_round_stats(hd);
+                       len += sprintf(page + len,
+                               "%4d  %4d %10d %s\n", gp->major,
+                               n, gp->sizes[n], disk_name(gp, n, buf));
+
+                       if (len < offset)
+                               offset -= len, len = 0;
+                       else if (len >= offset + count)
+                               goto out;
+               }
+       }
+
+out:
+       read_unlock(&gendisk_lock);
+       *start = page + offset;
+       len -= offset;
+       if (len < 0)
+               len = 0;
+       return len > count ? count : len;
+}
+#endif
+
+
+extern int blk_dev_init(void);
+extern int net_dev_init(void);
+extern void console_map_init(void);
+extern int atmdev_init(void);
+
+int __init device_init(void)
+{
+       rwlock_init(&gendisk_lock);
+       blk_dev_init();
+       sti();
+#ifdef CONFIG_NET
+       net_dev_init();
+#endif
+#ifdef CONFIG_ATM
+       (void) atmdev_init();
+#endif
+#ifdef CONFIG_VT
+       console_map_init();
+#endif
+       return 0;
+}
+
+__initcall(device_init);
diff --git a/xen-2.4.16/drivers/block/ll_rw_blk.c b/xen-2.4.16/drivers/block/ll_rw_blk.c
new file mode 100644 (file)
index 0000000..0ee8477
--- /dev/null
@@ -0,0 +1,1466 @@
+/*
+ *  linux/drivers/block/ll_rw_blk.c
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ * Copyright (C) 1994,      Karl Keyte: Added support for disk statistics
+ * Elevator latency, (C) 2000  Andrea Arcangeli <andrea@suse.de> SuSE
+ * Queue request tables / lock, selectable elevator, Jens Axboe <axboe@suse.de>
+ * kernel-doc documentation started by NeilBrown <neilb@cse.unsw.edu.au> -  July2000
+ */
+
+/*
+ * This handles all read/write requests to block devices
+ */
+#include <xeno/config.h>
+#include <xeno/types.h>
+#include <xeno/lib.h>
+#include <xeno/sched.h>
+/*#include <xeno/kernel_stat.h>*/
+#include <xeno/errno.h>
+/*#include <xeno/locks.h>*/
+#include <xeno/mm.h>
+/*#include <xeno/swap.h>*/
+#include <xeno/init.h>
+/*#include <xeno/smp_lock.h>*/
+/*#include <xeno/completion.h>*/
+
+#include <asm/system.h>
+#include <asm/io.h>
+#include <xeno/blk.h>
+/*#include <xeno/highmem.h>*/
+#include <xeno/slab.h>
+#include <xeno/module.h>
+
+/* This will die as all synchronous stuff is coming to an end */
+#define end_buffer_io_sync NULL
+#define complete(_r) panic("completion.h stuff may be needed...")
+
+/*
+ * MAC Floppy IWM hooks
+ */
+
+#ifdef CONFIG_MAC_FLOPPY_IWM
+extern int mac_floppy_init(void);
+#endif
+
+/*
+ * For the allocated request tables
+ */
+static kmem_cache_t *request_cachep;
+
+/*
+ * The "disk" task queue is used to start the actual requests
+ * after a plug
+ */
+DECLARE_TASK_QUEUE(tq_disk);
+
+/*
+ * Protect the request list against multiple users..
+ *
+ * With this spinlock the Linux block IO subsystem is 100% SMP threaded
+ * from the IRQ event side, and almost 100% SMP threaded from the syscall
+ * side (we still have protect against block device array operations, and
+ * the do_request() side is casually still unsafe. The kernel lock protects
+ * this part currently.).
+ *
+ * there is a fair chance that things will work just OK if these functions
+ * are called with no global kernel lock held ...
+ */
+spinlock_t io_request_lock = SPIN_LOCK_UNLOCKED;
+
+/* This specifies how many sectors to read ahead on the disk. */
+
+int read_ahead[MAX_BLKDEV];
+
+/* blk_dev_struct is:
+ *     *request_fn
+ *     *current_request
+ */
+struct blk_dev_struct blk_dev[MAX_BLKDEV]; /* initialized by blk_dev_init() */
+
+/*
+ * blk_size contains the size of all block-devices in units of 1024 byte
+ * sectors:
+ *
+ * blk_size[MAJOR][MINOR]
+ *
+ * if (!blk_size[MAJOR]) then no minor size checking is done.
+ */
+int * blk_size[MAX_BLKDEV];
+
+/*
+ * blksize_size contains the size of all block-devices:
+ *
+ * blksize_size[MAJOR][MINOR]
+ *
+ * if (!blksize_size[MAJOR]) then 1024 bytes is assumed.
+ */
+int * blksize_size[MAX_BLKDEV];
+
+/*
+ * hardsect_size contains the size of the hardware sector of a device.
+ *
+ * hardsect_size[MAJOR][MINOR]
+ *
+ * if (!hardsect_size[MAJOR])
+ *             then 512 bytes is assumed.
+ * else
+ *             sector_size is hardsect_size[MAJOR][MINOR]
+ * This is currently set by some scsi devices and read by the msdos fs driver.
+ * Other uses may appear later.
+ */
+int * hardsect_size[MAX_BLKDEV];
+
+/*
+ * The following tunes the read-ahead algorithm in mm/filemap.c
+ */
+int * max_readahead[MAX_BLKDEV];
+
+/*
+ * Max number of sectors per request
+ */
+int * max_sectors[MAX_BLKDEV];
+
+static inline int get_max_sectors(kdev_t dev)
+{
+       if (!max_sectors[MAJOR(dev)])
+               return MAX_SECTORS;
+       return max_sectors[MAJOR(dev)][MINOR(dev)];
+}
+
+inline request_queue_t *blk_get_queue(kdev_t dev)
+{
+       struct blk_dev_struct *bdev = blk_dev + MAJOR(dev);
+
+       if (bdev->queue)
+               return bdev->queue(dev);
+       else
+               return &blk_dev[MAJOR(dev)].request_queue;
+}
+
+static int __blk_cleanup_queue(struct request_list *list)
+{
+       struct list_head *head = &list->free;
+       struct request *rq;
+       int i = 0;
+
+       while (!list_empty(head)) {
+               rq = list_entry(head->next, struct request, queue);
+               list_del(&rq->queue);
+               kmem_cache_free(request_cachep, rq);
+               i++;
+       };
+
+       if (i != list->count)
+               printk("request list leak!\n");
+
+       list->count = 0;
+       return i;
+}
+
+/**
+ * blk_cleanup_queue: - release a &request_queue_t when it is no longer needed
+ * @q:    the request queue to be released
+ *
+ * Description:
+ *     blk_cleanup_queue is the pair to blk_init_queue().  It should
+ *     be called when a request queue is being released; typically
+ *     when a block device is being de-registered.  Currently, its
+ *     primary task it to free all the &struct request structures that
+ *     were allocated to the queue.
+ * Caveat: 
+ *     Hopefully the low level driver will have finished any
+ *     outstanding requests first...
+ **/
+void blk_cleanup_queue(request_queue_t * q)
+{
+       int count = q->nr_requests;
+
+       count -= __blk_cleanup_queue(&q->rq[READ]);
+       count -= __blk_cleanup_queue(&q->rq[WRITE]);
+
+       if (count)
+               printk("blk_cleanup_queue: leaked requests (%d)\n", count);
+
+       memset(q, 0, sizeof(*q));
+}
+
+/**
+ * blk_queue_headactive - indicate whether head of request queue may be active
+ * @q:       The queue which this applies to.
+ * @active:  A flag indication where the head of the queue is active.
+ *
+ * Description:
+ *    The driver for a block device may choose to leave the currently active
+ *    request on the request queue, removing it only when it has completed.
+ *    The queue handling routines assume this by default for safety reasons
+ *    and will not involve the head of the request queue in any merging or
+ *    reordering of requests when the queue is unplugged (and thus may be
+ *    working on this particular request).
+ *
+ *    If a driver removes requests from the queue before processing them, then
+ *    it may indicate that it does so, there by allowing the head of the queue
+ *    to be involved in merging and reordering.  This is done be calling
+ *    blk_queue_headactive() with an @active flag of %0.
+ *
+ *    If a driver processes several requests at once, it must remove them (or
+ *    at least all but one of them) from the request queue.
+ *
+ *    When a queue is plugged the head will be assumed to be inactive.
+ **/
+void blk_queue_headactive(request_queue_t * q, int active)
+{
+       q->head_active = active;
+}
+
+/**
+ * blk_queue_make_request - define an alternate make_request function for a device
+ * @q:  the request queue for the device to be affected
+ * @mfn: the alternate make_request function
+ *
+ * Description:
+ *    The normal way for &struct buffer_heads to be passed to a device
+ *    driver is for them to be collected into requests on a request
+ *    queue, and then to allow the device driver to select requests
+ *    off that queue when it is ready.  This works well for many block
+ *    devices. However some block devices (typically virtual devices
+ *    such as md or lvm) do not benefit from the processing on the
+ *    request queue, and are served best by having the requests passed
+ *    directly to them.  This can be achieved by providing a function
+ *    to blk_queue_make_request().
+ *
+ * Caveat:
+ *    The driver that does this *must* be able to deal appropriately
+ *    with buffers in "highmemory", either by calling bh_kmap() to get
+ *    a kernel mapping, to by calling create_bounce() to create a
+ *    buffer in normal memory.
+ **/
+
+void blk_queue_make_request(request_queue_t * q, make_request_fn * mfn)
+{
+       q->make_request_fn = mfn;
+}
+
+static inline int ll_new_segment(request_queue_t *q, struct request *req, int max_segments)
+{
+       if (req->nr_segments < max_segments) {
+               req->nr_segments++;
+               return 1;
+       }
+       return 0;
+}
+
+static int ll_back_merge_fn(request_queue_t *q, struct request *req, 
+                           struct buffer_head *bh, int max_segments)
+{
+       if (req->bhtail->b_data + req->bhtail->b_size == bh->b_data)
+               return 1;
+       return ll_new_segment(q, req, max_segments);
+}
+
+static int ll_front_merge_fn(request_queue_t *q, struct request *req, 
+                            struct buffer_head *bh, int max_segments)
+{
+       if (bh->b_data + bh->b_size == req->bh->b_data)
+               return 1;
+       return ll_new_segment(q, req, max_segments);
+}
+
+static int ll_merge_requests_fn(request_queue_t *q, struct request *req,
+                               struct request *next, int max_segments)
+{
+       int total_segments = req->nr_segments + next->nr_segments;
+
+       if (req->bhtail->b_data + req->bhtail->b_size == next->bh->b_data)
+               total_segments--;
+    
+       if (total_segments > max_segments)
+               return 0;
+
+       req->nr_segments = total_segments;
+       return 1;
+}
+
+/*
+ * "plug" the device if there are no outstanding requests: this will
+ * force the transfer to start only after we have put all the requests
+ * on the list.
+ *
+ * This is called with interrupts off and no requests on the queue.
+ * (and with the request spinlock acquired)
+ */
+static void generic_plug_device(request_queue_t *q, kdev_t dev)
+{
+       /*
+        * no need to replug device
+        */
+       if (!list_empty(&q->queue_head) || q->plugged)
+               return;
+
+       q->plugged = 1;
+       queue_task(&q->plug_tq, &tq_disk);
+}
+
+/*
+ * remove the plug and let it rip..
+ */
+static inline void __generic_unplug_device(request_queue_t *q)
+{
+       if (q->plugged) {
+               q->plugged = 0;
+               if (!list_empty(&q->queue_head))
+                       q->request_fn(q);
+       }
+}
+
+void generic_unplug_device(void *data)
+{
+       request_queue_t *q = (request_queue_t *) data;
+       unsigned long flags;
+
+       spin_lock_irqsave(&io_request_lock, flags);
+       __generic_unplug_device(q);
+       spin_unlock_irqrestore(&io_request_lock, flags);
+}
+
+/** blk_grow_request_list
+ *  @q: The &request_queue_t
+ *  @nr_requests: how many requests are desired
+ *
+ * More free requests are added to the queue's free lists, bringing
+ * the total number of requests to @nr_requests.
+ *
+ * The requests are added equally to the request queue's read
+ * and write freelists.
+ *
+ * This function can sleep.
+ *
+ * Returns the (new) number of requests which the queue has available.
+ */
+int blk_grow_request_list(request_queue_t *q, int nr_requests)
+{
+       unsigned long flags;
+       /* Several broken drivers assume that this function doesn't sleep,
+        * this causes system hangs during boot.
+        * As a temporary fix, make the the function non-blocking.
+        */
+       spin_lock_irqsave(&io_request_lock, flags);
+       while (q->nr_requests < nr_requests) {
+               struct request *rq;
+               int rw;
+
+               rq = kmem_cache_alloc(request_cachep, SLAB_ATOMIC);
+               if (rq == NULL)
+                       break;
+               memset(rq, 0, sizeof(*rq));
+               rq->rq_status = RQ_INACTIVE;
+               rw = q->nr_requests & 1;
+               list_add(&rq->queue, &q->rq[rw].free);
+               q->rq[rw].count++;
+               q->nr_requests++;
+       }
+       q->batch_requests = q->nr_requests / 4;
+       if (q->batch_requests > 32)
+               q->batch_requests = 32;
+       spin_unlock_irqrestore(&io_request_lock, flags);
+       return q->nr_requests;
+}
+
+static void blk_init_free_list(request_queue_t *q)
+{
+    /*struct sysinfo si;*/
+    /*int megs;*/              /* Total memory, in megabytes */
+       int nr_requests;
+
+       INIT_LIST_HEAD(&q->rq[READ].free);
+       INIT_LIST_HEAD(&q->rq[WRITE].free);
+       q->rq[READ].count = 0;
+       q->rq[WRITE].count = 0;
+       q->nr_requests = 0;
+
+#if 0
+       si_meminfo(&si);
+       megs = si.totalram >> (20 - PAGE_SHIFT);
+       nr_requests = 128;
+       if (megs < 32)
+               nr_requests /= 2;
+#else
+        nr_requests = 128;
+#endif
+       blk_grow_request_list(q, nr_requests);
+
+#if 0
+       init_waitqueue_head(&q->wait_for_requests[0]);
+       init_waitqueue_head(&q->wait_for_requests[1]);
+#endif
+       spin_lock_init(&q->queue_lock);
+}
+
+static int __make_request(request_queue_t * q, int rw, struct buffer_head * bh);
+
+/**
+ * blk_init_queue  - prepare a request queue for use with a block device
+ * @q:    The &request_queue_t to be initialised
+ * @rfn:  The function to be called to process requests that have been
+ *        placed on the queue.
+ *
+ * Description:
+ *    If a block device wishes to use the standard request handling procedures,
+ *    which sorts requests and coalesces adjacent requests, then it must
+ *    call blk_init_queue().  The function @rfn will be called when there
+ *    are requests on the queue that need to be processed.  If the device
+ *    supports plugging, then @rfn may not be called immediately when requests
+ *    are available on the queue, but may be called at some time later instead.
+ *    Plugged queues are generally unplugged when a buffer belonging to one
+ *    of the requests on the queue is needed, or due to memory pressure.
+ *
+ *    @rfn is not required, or even expected, to remove all requests off the
+ *    queue, but only as many as it can handle at a time.  If it does leave
+ *    requests on the queue, it is responsible for arranging that the requests
+ *    get dealt with eventually.
+ *
+ *    A global spin lock $io_request_lock must be held while manipulating the
+ *    requests on the request queue.
+ *
+ *    The request on the head of the queue is by default assumed to be
+ *    potentially active, and it is not considered for re-ordering or merging
+ *    whenever the given queue is unplugged. This behaviour can be changed with
+ *    blk_queue_headactive().
+ *
+ * Note:
+ *    blk_init_queue() must be paired with a blk_cleanup_queue() call
+ *    when the block device is deactivated (such as at module unload).
+ **/
+void blk_init_queue(request_queue_t * q, request_fn_proc * rfn)
+{
+       INIT_LIST_HEAD(&q->queue_head);
+       elevator_init(&q->elevator, ELEVATOR_LINUS);
+       blk_init_free_list(q);
+       q->request_fn           = rfn;
+       q->back_merge_fn        = ll_back_merge_fn;
+       q->front_merge_fn       = ll_front_merge_fn;
+       q->merge_requests_fn    = ll_merge_requests_fn;
+       q->make_request_fn      = __make_request;
+       q->plug_tq.sync         = 0;
+       q->plug_tq.routine      = &generic_unplug_device;
+       q->plug_tq.data         = q;
+       q->plugged              = 0;
+       /*
+        * These booleans describe the queue properties.  We set the
+        * default (and most common) values here.  Other drivers can
+        * use the appropriate functions to alter the queue properties.
+        * as appropriate.
+        */
+       q->plug_device_fn       = generic_plug_device;
+       q->head_active          = 1;
+}
+
+#define blkdev_free_rq(list) list_entry((list)->next, struct request, queue);
+/*
+ * Get a free request. io_request_lock must be held and interrupts
+ * disabled on the way in.  Returns NULL if there are no free requests.
+ */
+static struct request *get_request(request_queue_t *q, int rw)
+{
+       struct request *rq = NULL;
+       struct request_list *rl = q->rq + rw;
+
+       if (!list_empty(&rl->free)) {
+               rq = blkdev_free_rq(&rl->free);
+               list_del(&rq->queue);
+               rl->count--;
+               rq->rq_status = RQ_ACTIVE;
+               rq->cmd = rw;
+               rq->special = NULL;
+               rq->q = q;
+       }
+
+       return rq;
+}
+
+/*
+ * Here's the request allocation design:
+ *
+ * 1: Blocking on request exhaustion is a key part of I/O throttling.
+ * 
+ * 2: We want to be `fair' to all requesters.  We must avoid starvation, and
+ *    attempt to ensure that all requesters sleep for a similar duration.  Hence
+ *    no stealing requests when there are other processes waiting.
+ * 
+ * 3: We also wish to support `batching' of requests.  So when a process is
+ *    woken, we want to allow it to allocate a decent number of requests
+ *    before it blocks again, so they can be nicely merged (this only really
+ *    matters if the process happens to be adding requests near the head of
+ *    the queue).
+ * 
+ * 4: We want to avoid scheduling storms.  This isn't really important, because
+ *    the system will be I/O bound anyway.  But it's easy.
+ * 
+ *    There is tension between requirements 2 and 3.  Once a task has woken,
+ *    we don't want to allow it to sleep as soon as it takes its second request.
+ *    But we don't want currently-running tasks to steal all the requests
+ *    from the sleepers.  We handle this with wakeup hysteresis around
+ *    0 .. batch_requests and with the assumption that request taking is much,
+ *    much faster than request freeing.
+ * 
+ * So here's what we do:
+ * 
+ *    a) A READA requester fails if free_requests < batch_requests
+ * 
+ *       We don't want READA requests to prevent sleepers from ever
+ *       waking.  Note that READA is used extremely rarely - a few
+ *       filesystems use it for directory readahead.
+ * 
+ *  When a process wants a new request:
+ * 
+ *    b) If free_requests == 0, the requester sleeps in FIFO manner.
+ * 
+ *    b) If 0 <  free_requests < batch_requests and there are waiters,
+ *       we still take a request non-blockingly.  This provides batching.
+ *
+ *    c) If free_requests >= batch_requests, the caller is immediately
+ *       granted a new request.
+ * 
+ *  When a request is released:
+ * 
+ *    d) If free_requests < batch_requests, do nothing.
+ * 
+ *    f) If free_requests >= batch_requests, wake up a single waiter.
+ * 
+ *   The net effect is that when a process is woken at the batch_requests level,
+ *   it will be able to take approximately (batch_requests) requests before
+ *   blocking again (at the tail of the queue).
+ * 
+ *   This all assumes that the rate of taking requests is much, much higher
+ *   than the rate of releasing them.  Which is very true.
+ *
+ * -akpm, Feb 2002.
+ */
+
+static struct request *__get_request_wait(request_queue_t *q, int rw)
+{
+#if 0
+       register struct request *rq;
+       /*DECLARE_WAITQUEUE(wait, current);*/
+
+       generic_unplug_device(q);
+       add_wait_queue_exclusive(&q->wait_for_requests[rw], &wait);
+       do {
+               set_current_state(TASK_UNINTERRUPTIBLE);
+               if (q->rq[rw].count == 0)
+                       schedule();
+               spin_lock_irq(&io_request_lock);
+               rq = get_request(q,rw);
+               spin_unlock_irq(&io_request_lock);
+       } while (rq == NULL);
+       remove_wait_queue(&q->wait_for_requests[rw], &wait);
+       current->state = TASK_RUNNING;
+       return rq;
+#else
+        panic("__get_request_wait shouldn't be depended on");
+        return 0;
+#endif
+}
+
+/* RO fail safe mechanism */
+
+static long ro_bits[MAX_BLKDEV][8];
+
+int is_read_only(kdev_t dev)
+{
+       int minor,major;
+
+       major = MAJOR(dev);
+       minor = MINOR(dev);
+       if (major < 0 || major >= MAX_BLKDEV) return 0;
+       return ro_bits[major][minor >> 5] & (1 << (minor & 31));
+}
+
+void set_device_ro(kdev_t dev,int flag)
+{
+       int minor,major;
+
+       major = MAJOR(dev);
+       minor = MINOR(dev);
+       if (major < 0 || major >= MAX_BLKDEV) return;
+       if (flag) ro_bits[major][minor >> 5] |= 1 << (minor & 31);
+       else ro_bits[major][minor >> 5] &= ~(1 << (minor & 31));
+}
+
+inline void drive_stat_acct (kdev_t dev, int rw,
+                               unsigned long nr_sectors, int new_io)
+{
+    /*unsigned int major = MAJOR(dev);*/
+       unsigned int index;
+
+       index = disk_index(dev);
+#if 0
+       if ((index >= DK_MAX_DISK) || (major >= DK_MAX_MAJOR))
+               return;
+#endif
+
+#if 0
+       kstat.dk_drive[major][index] += new_io;
+       if (rw == READ) {
+               kstat.dk_drive_rio[major][index] += new_io;
+               kstat.dk_drive_rblk[major][index] += nr_sectors;
+       } else if (rw == WRITE) {
+               kstat.dk_drive_wio[major][index] += new_io;
+               kstat.dk_drive_wblk[major][index] += nr_sectors;
+       } else
+               printk(KERN_ERR "drive_stat_acct: cmd not R/W?\n");
+#endif
+}
+
+/* Return up to two hd_structs on which to do IO accounting for a given
+ * request.  On a partitioned device, we want to account both against
+ * the partition and against the whole disk.  */
+static void locate_hd_struct(struct request *req, 
+                            struct hd_struct **hd1,
+                            struct hd_struct **hd2)
+{
+       struct gendisk *gd;
+
+       *hd1 = NULL;
+       *hd2 = NULL;
+       
+       gd = get_gendisk(req->rq_dev);
+       if (gd && gd->part) {
+               /* Mask out the partition bits: account for the entire disk */
+               int devnr = MINOR(req->rq_dev) >> gd->minor_shift;
+               int whole_minor = devnr << gd->minor_shift;
+               *hd1 = &gd->part[whole_minor];
+               if (whole_minor != MINOR(req->rq_dev))
+                       *hd2= &gd->part[MINOR(req->rq_dev)];
+       }
+}
+
+/* Round off the performance stats on an hd_struct.  The average IO
+ * queue length and utilisation statistics are maintained by observing
+ * the current state of the queue length and the amount of time it has
+ * been in this state for.  Normally, that accounting is done on IO
+ * completion, but that can result in more than a second's worth of IO
+ * being accounted for within any one second, leading to >100%
+ * utilisation.  To deal with that, we do a round-off before returning
+ * the results when reading /proc/partitions, accounting immediately for
+ * all queue usage up to the current jiffies and restarting the counters
+ * again. */
+void disk_round_stats(struct hd_struct *hd)
+{
+       unsigned long now = jiffies;
+       
+       hd->aveq += (hd->ios_in_flight * (jiffies - hd->last_queue_change));
+       hd->last_queue_change = now;
+
+       if (hd->ios_in_flight)
+               hd->io_ticks += (now - hd->last_idle_time);
+       hd->last_idle_time = now;       
+}
+
+
+static inline void down_ios(struct hd_struct *hd)
+{
+       disk_round_stats(hd);   
+       --hd->ios_in_flight;
+}
+
+static inline void up_ios(struct hd_struct *hd)
+{
+       disk_round_stats(hd);
+       ++hd->ios_in_flight;
+}
+
+static void account_io_start(struct hd_struct *hd, struct request *req,
+                            int merge, int sectors)
+{
+       switch (req->cmd) {
+       case READ:
+               if (merge)
+                       hd->rd_merges++;
+               hd->rd_sectors += sectors;
+               break;
+       case WRITE:
+               if (merge)
+                       hd->wr_merges++;
+               hd->wr_sectors += sectors;
+               break;
+       }
+       if (!merge)
+               up_ios(hd);
+}
+
+static void account_io_end(struct hd_struct *hd, struct request *req)
+{
+       unsigned long duration = jiffies - req->start_time;
+       switch (req->cmd) {
+       case READ:
+               hd->rd_ticks += duration;
+               hd->rd_ios++;
+               break;
+       case WRITE:
+               hd->wr_ticks += duration;
+               hd->wr_ios++;
+               break;
+       }
+       down_ios(hd);
+}
+
+void req_new_io(struct request *req, int merge, int sectors)
+{
+       struct hd_struct *hd1, *hd2;
+       locate_hd_struct(req, &hd1, &hd2);
+       if (hd1)
+               account_io_start(hd1, req, merge, sectors);
+       if (hd2)
+               account_io_start(hd2, req, merge, sectors);
+}
+
+void req_finished_io(struct request *req)
+{
+       struct hd_struct *hd1, *hd2;
+       locate_hd_struct(req, &hd1, &hd2);
+       if (hd1)
+               account_io_end(hd1, req);
+       if (hd2)        
+               account_io_end(hd2, req);
+}
+
+/*
+ * add-request adds a request to the linked list.
+ * io_request_lock is held and interrupts disabled, as we muck with the
+ * request queue list.
+ *
+ * By this point, req->cmd is always either READ/WRITE, never READA,
+ * which is important for drive_stat_acct() above.
+ */
+static inline void add_request(request_queue_t * q, struct request * req,
+                              struct list_head *insert_here)
+{
+       drive_stat_acct(req->rq_dev, req->cmd, req->nr_sectors, 1);
+
+       if (!q->plugged && q->head_active && insert_here == &q->queue_head) {
+               spin_unlock_irq(&io_request_lock);
+               BUG();
+       }
+
+       /*
+        * elevator indicated where it wants this request to be
+        * inserted at elevator_merge time
+        */
+       list_add(&req->queue, insert_here);
+}
+
+/*
+ * Must be called with io_request_lock held and interrupts disabled
+ */
+void blkdev_release_request(struct request *req)
+{
+       request_queue_t *q = req->q;
+       int rw = req->cmd;
+
+       req->rq_status = RQ_INACTIVE;
+       req->q = NULL;
+
+       /*
+        * Request may not have originated from ll_rw_blk. if not,
+        * assume it has free buffers and check waiters
+        */
+       if (q) {
+               list_add(&req->queue, &q->rq[rw].free);
+#if 0
+               if (++q->rq[rw].count >= q->batch_requests &&
+                               waitqueue_active(&q->wait_for_requests[rw]))
+                       wake_up(&q->wait_for_requests[rw]);
+#endif
+       }
+}
+
+/*
+ * Has to be called with the request spinlock acquired
+ */
+static void attempt_merge(request_queue_t * q,
+                         struct request *req,
+                         int max_sectors,
+                         int max_segments)
+{
+       struct request *next;
+       struct hd_struct *hd1, *hd2;
+  
+       next = blkdev_next_request(req);
+       if (req->sector + req->nr_sectors != next->sector)
+               return;
+       if (req->cmd != next->cmd
+           || req->rq_dev != next->rq_dev
+           || req->nr_sectors + next->nr_sectors > max_sectors
+           || next->waiting)
+               return;
+       /*
+        * If we are not allowed to merge these requests, then
+        * return.  If we are allowed to merge, then the count
+        * will have been updated to the appropriate number,
+        * and we shouldn't do it here too.
+        */
+       if (!q->merge_requests_fn(q, req, next, max_segments))
+               return;
+
+       q->elevator.elevator_merge_req_fn(req, next);
+       req->bhtail->b_reqnext = next->bh;
+       req->bhtail = next->bhtail;
+       req->nr_sectors = req->hard_nr_sectors += next->hard_nr_sectors;
+       list_del(&next->queue);
+
+       /* One last thing: we have removed a request, so we now have one
+          less expected IO to complete for accounting purposes. */
+
+       locate_hd_struct(req, &hd1, &hd2);
+       if (hd1)
+               down_ios(hd1);
+       if (hd2)        
+               down_ios(hd2);
+       blkdev_release_request(next);
+}
+
+static inline void attempt_back_merge(request_queue_t * q,
+                                     struct request *req,
+                                     int max_sectors,
+                                     int max_segments)
+{
+       if (&req->queue == q->queue_head.prev)
+               return;
+       attempt_merge(q, req, max_sectors, max_segments);
+}
+
+static inline void attempt_front_merge(request_queue_t * q,
+                                      struct list_head * head,
+                                      struct request *req,
+                                      int max_sectors,
+                                      int max_segments)
+{
+       struct list_head * prev;
+
+       prev = req->queue.prev;
+       if (head == prev)
+               return;
+       attempt_merge(q, blkdev_entry_to_request(prev), max_sectors, max_segments);
+}
+
+static int __make_request(request_queue_t * q, int rw,
+                                 struct buffer_head * bh)
+{
+       unsigned int sector, count;
+       int max_segments = MAX_SEGMENTS;
+       struct request * req, *freereq = NULL;
+       int rw_ahead, max_sectors, el_ret;
+       struct list_head *head, *insert_here;
+       int latency;
+       elevator_t *elevator = &q->elevator;
+
+       count = bh->b_size >> 9;
+       sector = bh->b_rsector;
+
+       rw_ahead = 0;   /* normal case; gets changed below for READA */
+       switch (rw) {
+               case READA:
+#if 0  /* bread() misinterprets failed READA attempts as IO errors on SMP */
+                       rw_ahead = 1;
+#endif
+                       rw = READ;      /* drop into READ */
+               case READ:
+               case WRITE:
+                       latency = elevator_request_latency(elevator, rw);
+                       break;
+               default:
+                       BUG();
+                       goto end_io;
+       }
+
+       /* We'd better have a real physical mapping!
+          Check this bit only if the buffer was dirty and just locked
+          down by us so at this point flushpage will block and
+          won't clear the mapped bit under us. */
+       if (!buffer_mapped(bh))
+               BUG();
+
+       /*
+        * Temporary solution - in 2.5 this will be done by the lowlevel
+        * driver. Create a bounce buffer if the buffer data points into
+        * high memory - keep the original buffer otherwise.
+        */
+#if CONFIG_HIGHMEM
+       bh = create_bounce(rw, bh);
+#endif
+
+/* look for a free request. */
+       /*
+        * Try to coalesce the new request with old requests
+        */
+       max_sectors = get_max_sectors(bh->b_rdev);
+
+again:
+       req = NULL;
+       head = &q->queue_head;
+       /*
+        * Now we acquire the request spinlock, we have to be mega careful
+        * not to schedule or do something nonatomic
+        */
+       spin_lock_irq(&io_request_lock);
+
+       insert_here = head->prev;
+       if (list_empty(head)) {
+               q->plug_device_fn(q, bh->b_rdev); /* is atomic */
+               goto get_rq;
+       } else if (q->head_active && !q->plugged)
+               head = head->next;
+
+       el_ret = elevator->elevator_merge_fn(q, &req, head, bh, rw,max_sectors);
+       switch (el_ret) {
+
+               case ELEVATOR_BACK_MERGE:
+                       if (!q->back_merge_fn(q, req, bh, max_segments)) {
+                               insert_here = &req->queue;
+                               break;
+                       }
+                       elevator->elevator_merge_cleanup_fn(q, req, count);
+                       req->bhtail->b_reqnext = bh;
+                       req->bhtail = bh;
+                       req->nr_sectors = req->hard_nr_sectors += count;
+                       blk_started_io(count);
+                       drive_stat_acct(req->rq_dev, req->cmd, count, 0);
+                       req_new_io(req, 1, count);
+                       attempt_back_merge(q, req, max_sectors, max_segments);
+                       goto out;
+
+               case ELEVATOR_FRONT_MERGE:
+                       if (!q->front_merge_fn(q, req, bh, max_segments)) {
+                               insert_here = req->queue.prev;
+                               break;
+                       }
+                       elevator->elevator_merge_cleanup_fn(q, req, count);
+                       bh->b_reqnext = req->bh;
+                       req->bh = bh;
+                       req->buffer = bh->b_data;
+                       req->current_nr_sectors = count;
+                       req->sector = req->hard_sector = sector;
+                       req->nr_sectors = req->hard_nr_sectors += count;
+                       blk_started_io(count);
+                       drive_stat_acct(req->rq_dev, req->cmd, count, 0);
+                       req_new_io(req, 1, count);
+                       attempt_front_merge(q, head, req, max_sectors, max_segments);
+                       goto out;
+
+               /*
+                * elevator says don't/can't merge. get new request
+                */
+               case ELEVATOR_NO_MERGE:
+                       /*
+                        * use elevator hints as to where to insert the
+                        * request. if no hints, just add it to the back
+                        * of the queue
+                        */
+                       if (req)
+                               insert_here = &req->queue;
+                       break;
+
+               default:
+                       printk("elevator returned crap (%d)\n", el_ret);
+                       BUG();
+       }
+               
+get_rq:
+       if (freereq) {
+               req = freereq;
+               freereq = NULL;
+       } else {
+               /*
+                * See description above __get_request_wait()
+                */
+               if (rw_ahead) {
+                       if (q->rq[rw].count < q->batch_requests) {
+                               spin_unlock_irq(&io_request_lock);
+                               goto end_io;
+                       }
+                       req = get_request(q, rw);
+                       if (req == NULL)
+                               BUG();
+               } else {
+                       req = get_request(q, rw);
+                       if (req == NULL) {
+                               spin_unlock_irq(&io_request_lock);
+                               freereq = __get_request_wait(q, rw);
+                               goto again;
+                       }
+               }
+       }
+
+/* fill up the request-info, and add it to the queue */
+       req->elevator_sequence = latency;
+       req->cmd = rw;
+       req->errors = 0;
+       req->hard_sector = req->sector = sector;
+       req->hard_nr_sectors = req->nr_sectors = count;
+       req->current_nr_sectors = count;
+       req->nr_segments = 1; /* Always 1 for a new request. */
+       req->nr_hw_segments = 1; /* Always 1 for a new request. */
+       req->buffer = bh->b_data;
+       req->waiting = NULL;
+       req->bh = bh;
+       req->bhtail = bh;
+       req->rq_dev = bh->b_rdev;
+       req->start_time = jiffies;
+       req_new_io(req, 0, count);
+       blk_started_io(count);
+       add_request(q, req, insert_here);
+out:
+       if (freereq)
+               blkdev_release_request(freereq);
+       spin_unlock_irq(&io_request_lock);
+       return 0;
+end_io:
+       bh->b_end_io(bh, test_bit(BH_Uptodate, &bh->b_state));
+       return 0;
+}
+
+/**
+ * generic_make_request: hand a buffer head to it's device driver for I/O
+ * @rw:  READ, WRITE, or READA - what sort of I/O is desired.
+ * @bh:  The buffer head describing the location in memory and on the device.
+ *
+ * generic_make_request() is used to make I/O requests of block
+ * devices. It is passed a &struct buffer_head and a &rw value.  The
+ * %READ and %WRITE options are (hopefully) obvious in meaning.  The
+ * %READA value means that a read is required, but that the driver is
+ * free to fail the request if, for example, it cannot get needed
+ * resources immediately.
+ *
+ * generic_make_request() does not return any status.  The
+ * success/failure status of the request, along with notification of
+ * completion, is delivered asynchronously through the bh->b_end_io
+ * function described (one day) else where.
+ *
+ * The caller of generic_make_request must make sure that b_page,
+ * b_addr, b_size are set to describe the memory buffer, that b_rdev
+ * and b_rsector are set to describe the device address, and the
+ * b_end_io and optionally b_private are set to describe how
+ * completion notification should be signaled.  BH_Mapped should also
+ * be set (to confirm that b_dev and b_blocknr are valid).
+ *
+ * generic_make_request and the drivers it calls may use b_reqnext,
+ * and may change b_rdev and b_rsector.  So the values of these fields
+ * should NOT be depended on after the call to generic_make_request.
+ * Because of this, the caller should record the device address
+ * information in b_dev and b_blocknr.
+ *
+ * Apart from those fields mentioned above, no other fields, and in
+ * particular, no other flags, are changed by generic_make_request or
+ * any lower level drivers.
+ * */
+void generic_make_request (int rw, struct buffer_head * bh)
+{
+       int major = MAJOR(bh->b_rdev);
+       int minorsize = 0;
+       request_queue_t *q;
+
+       if (!bh->b_end_io)
+               BUG();
+
+       /* Test device size, when known. */
+       if (blk_size[major])
+               minorsize = blk_size[major][MINOR(bh->b_rdev)];
+       if (minorsize) {
+               unsigned long maxsector = (minorsize << 1) + 1;
+               unsigned long sector = bh->b_rsector;
+               unsigned int count = bh->b_size >> 9;
+
+               if (maxsector < count || maxsector - count < sector) {
+                       /* Yecch */
+                       bh->b_state &= (1 << BH_Lock) | (1 << BH_Mapped);
+
+                       /* This may well happen - the kernel calls bread()
+                          without checking the size of the device, e.g.,
+                          when mounting a device. */
+                       printk(KERN_INFO
+                              "attempt to access beyond end of device\n");
+                       printk(KERN_INFO "%s: rw=%d, want=%ld, limit=%d\n",
+                              kdevname(bh->b_rdev), rw,
+                              (sector + count)>>1, minorsize);
+
+                       /* Yecch again */
+                       bh->b_end_io(bh, 0);
+                       return;
+               }
+       }
+
+       /*
+        * Resolve the mapping until finished. (drivers are
+        * still free to implement/resolve their own stacking
+        * by explicitly returning 0)
+        */
+       /* NOTE: we don't repeat the blk_size check for each new device.
+        * Stacking drivers are expected to know what they are doing.
+        */
+       do {
+               q = blk_get_queue(bh->b_rdev);
+               if (!q) {
+                       printk(KERN_ERR
+                              "generic_make_request: Trying to access "
+                              "nonexistent block-device %s (%ld)\n",
+                              kdevname(bh->b_rdev), bh->b_rsector);
+                       buffer_IO_error(bh);
+                       break;
+               }
+       } while (q->make_request_fn(q, rw, bh));
+}
+
+
+/**
+ * submit_bh: submit a buffer_head to the block device later for I/O
+ * @rw: whether to %READ or %WRITE, or maybe to %READA (read ahead)
+ * @bh: The &struct buffer_head which describes the I/O
+ *
+ * submit_bh() is very similar in purpose to generic_make_request(), and
+ * uses that function to do most of the work.
+ *
+ * The extra functionality provided by submit_bh is to determine
+ * b_rsector from b_blocknr and b_size, and to set b_rdev from b_dev.
+ * This is is appropriate for IO requests that come from the buffer
+ * cache and page cache which (currently) always use aligned blocks.
+ */
+void submit_bh(int rw, struct buffer_head * bh)
+{
+       int count = bh->b_size >> 9;
+
+       if (!test_bit(BH_Lock, &bh->b_state))
+               BUG();
+
+       set_bit(BH_Req, &bh->b_state);
+       set_bit(BH_Launder, &bh->b_state);
+
+       /*
+        * First step, 'identity mapping' - RAID or LVM might
+        * further remap this.
+        */
+       bh->b_rdev = bh->b_dev;
+       bh->b_rsector = bh->b_blocknr * count;
+
+       generic_make_request(rw, bh);
+
+#if 0
+       switch (rw) {
+               case WRITE:
+                       kstat.pgpgout += count;
+                       break;
+               default:
+                       kstat.pgpgin += count;
+                       break;
+       }
+#endif
+}
+
+/**
+ * ll_rw_block: low-level access to block devices
+ * @rw: whether to %READ or %WRITE or maybe %READA (readahead)
+ * @nr: number of &struct buffer_heads in the array
+ * @bhs: array of pointers to &struct buffer_head
+ *
+ * ll_rw_block() takes an array of pointers to &struct buffer_heads,
+ * and requests an I/O operation on them, either a %READ or a %WRITE.
+ * The third %READA option is described in the documentation for
+ * generic_make_request() which ll_rw_block() calls.
+ *
+ * This function provides extra functionality that is not in
+ * generic_make_request() that is relevant to buffers in the buffer
+ * cache or page cache.  In particular it drops any buffer that it
+ * cannot get a lock on (with the BH_Lock state bit), any buffer that
+ * appears to be clean when doing a write request, and any buffer that
+ * appears to be up-to-date when doing read request.  Further it marks
+ * as clean buffers that are processed for writing (the buffer cache
+ * wont assume that they are actually clean until the buffer gets
+ * unlocked).
+ *
+ * ll_rw_block sets b_end_io to simple completion handler that marks
+ * the buffer up-to-date (if approriate), unlocks the buffer and wakes
+ * any waiters.  As client that needs a more interesting completion
+ * routine should call submit_bh() (or generic_make_request())
+ * directly.
+ *
+ * Caveat:
+ *  All of the buffers must be for the same device, and must also be
+ *  of the current approved size for the device.  */
+
+void ll_rw_block(int rw, int nr, struct buffer_head * bhs[])
+{
+       unsigned int major;
+       int correct_size;
+       int i;
+
+       if (!nr)
+               return;
+
+       major = MAJOR(bhs[0]->b_dev);
+
+       /* Determine correct block size for this device. */
+       correct_size = get_hardsect_size(bhs[0]->b_dev);
+
+       /* Verify requested block sizes. */
+       for (i = 0; i < nr; i++) {
+               struct buffer_head *bh = bhs[i];
+               if (bh->b_size % correct_size) {
+                       printk(KERN_NOTICE "ll_rw_block: device %s: "
+                              "only %d-char blocks implemented (%u)\n",
+                              kdevname(bhs[0]->b_dev),
+                              correct_size, bh->b_size);
+                       goto sorry;
+               }
+       }
+
+       if ((rw & WRITE) && is_read_only(bhs[0]->b_dev)) {
+               printk(KERN_NOTICE "Can't write to read-only device %s\n",
+                      kdevname(bhs[0]->b_dev));
+               goto sorry;
+       }
+
+       for (i = 0; i < nr; i++) {
+               struct buffer_head *bh = bhs[i];
+
+               /* Only one thread can actually submit the I/O. */
+               if (test_and_set_bit(BH_Lock, &bh->b_state))
+                       continue;
+
+               /* We have the buffer lock */
+               atomic_inc(&bh->b_count);
+               bh->b_end_io = end_buffer_io_sync;
+
+               switch(rw) {
+               case WRITE:
+                       if (!atomic_set_buffer_clean(bh))
+                               /* Hmmph! Nothing to write */
+                               goto end_io;
+                       __mark_buffer_clean(bh);
+                       break;
+
+               case READA:
+               case READ:
+                       if (buffer_uptodate(bh))
+                               /* Hmmph! Already have it */
+                               goto end_io;
+                       break;
+               default:
+                       BUG();
+       end_io:
+                       bh->b_end_io(bh, test_bit(BH_Uptodate, &bh->b_state));
+                       continue;
+               }
+
+               submit_bh(rw, bh);
+       }
+       return;
+
+sorry:
+       /* Make sure we don't get infinite dirty retries.. */
+       for (i = 0; i < nr; i++)
+               mark_buffer_clean(bhs[i]);
+}
+
+#ifdef CONFIG_STRAM_SWAP
+extern int stram_device_init (void);
+#endif
+
+
+/**
+ * end_that_request_first - end I/O on one buffer.
+ * @req:      the request being processed
+ * @uptodate: 0 for I/O error
+ * @name:     the name printed for an I/O error
+ *
+ * Description:
+ *     Ends I/O on the first buffer attached to @req, and sets it up
+ *     for the next buffer_head (if any) in the cluster.
+ *     
+ * Return:
+ *     0 - we are done with this request, call end_that_request_last()
+ *     1 - still buffers pending for this request
+ *
+ * Caveat: 
+ *     Drivers implementing their own end_request handling must call
+ *     blk_finished_io() appropriately.
+ **/
+
+int end_that_request_first (struct request *req, int uptodate, char *name)
+{
+       struct buffer_head * bh;
+       int nsect;
+
+       req->errors = 0;
+       if (!uptodate)
+               printk("end_request: I/O error, dev %s (%s), sector %lu\n",
+                       kdevname(req->rq_dev), name, req->sector);
+
+       if ((bh = req->bh) != NULL) {
+               nsect = bh->b_size >> 9;
+               blk_finished_io(nsect);
+               req->bh = bh->b_reqnext;
+               bh->b_reqnext = NULL;
+               bh->b_end_io(bh, uptodate);
+               if ((bh = req->bh) != NULL) {
+                       req->hard_sector += nsect;
+                       req->hard_nr_sectors -= nsect;
+                       req->sector = req->hard_sector;
+                       req->nr_sectors = req->hard_nr_sectors;
+
+                       req->current_nr_sectors = bh->b_size >> 9;
+                       if (req->nr_sectors < req->current_nr_sectors) {
+                               req->nr_sectors = req->current_nr_sectors;
+                               printk("end_request: buffer-list destroyed\n");
+                       }
+                       req->buffer = bh->b_data;
+                       return 1;
+               }
+       }
+       return 0;
+}
+
+void end_that_request_last(struct request *req)
+{
+       if (req->waiting != NULL)
+               complete(req->waiting);
+       req_finished_io(req);
+
+       blkdev_release_request(req);
+}
+
+int __init blk_dev_init(void)
+{
+       struct blk_dev_struct *dev;
+
+       request_cachep = kmem_cache_create("blkdev_requests",
+                                          sizeof(struct request),
+                                          0, SLAB_HWCACHE_ALIGN, NULL, NULL);
+
+       if (!request_cachep)
+               panic("Can't create request pool slab cache\n");
+
+       for (dev = blk_dev + MAX_BLKDEV; dev-- != blk_dev;)
+               dev->queue = NULL;
+
+       memset(ro_bits,0,sizeof(ro_bits));
+       memset(max_readahead, 0, sizeof(max_readahead));
+       memset(max_sectors, 0, sizeof(max_sectors));
+
+#ifdef CONFIG_AMIGA_Z2RAM
+       z2_init();
+#endif
+#ifdef CONFIG_STRAM_SWAP
+       stram_device_init();
+#endif
+#ifdef CONFIG_ISP16_CDI
+       isp16_init();
+#endif
+#if defined(CONFIG_IDE) && defined(CONFIG_BLK_DEV_IDE)
+       ide_init();             /* this MUST precede hd_init */
+#endif
+#if defined(CONFIG_IDE) && defined(CONFIG_BLK_DEV_HD)
+       hd_init();
+#endif
+#ifdef CONFIG_BLK_DEV_PS2
+       ps2esdi_init();
+#endif
+#ifdef CONFIG_BLK_DEV_XD
+       xd_init();
+#endif
+#ifdef CONFIG_BLK_DEV_MFM
+       mfm_init();
+#endif
+#ifdef CONFIG_PARIDE
+       { extern void paride_init(void); paride_init(); };
+#endif
+#ifdef CONFIG_MAC_FLOPPY
+       swim3_init();
+#endif
+#ifdef CONFIG_BLK_DEV_SWIM_IOP
+       swimiop_init();
+#endif
+#ifdef CONFIG_AMIGA_FLOPPY
+       amiga_floppy_init();
+#endif
+#ifdef CONFIG_ATARI_FLOPPY
+       atari_floppy_init();
+#endif
+#ifdef CONFIG_BLK_DEV_FD
+       floppy_init();
+#else
+#if defined(__i386__)  /* Do we even need this? */
+       outb_p(0xc, 0x3f2);
+#endif
+#endif
+#ifdef CONFIG_CDU31A
+       cdu31a_init();
+#endif
+#ifdef CONFIG_ATARI_ACSI
+       acsi_init();
+#endif
+#ifdef CONFIG_MCD
+       mcd_init();
+#endif
+#ifdef CONFIG_MCDX
+       mcdx_init();
+#endif
+#ifdef CONFIG_SBPCD
+       sbpcd_init();
+#endif
+#ifdef CONFIG_AZTCD
+       aztcd_init();
+#endif
+#ifdef CONFIG_CDU535
+       sony535_init();
+#endif
+#ifdef CONFIG_GSCD
+       gscd_init();
+#endif
+#ifdef CONFIG_CM206
+       cm206_init();
+#endif
+#ifdef CONFIG_OPTCD
+       optcd_init();
+#endif
+#ifdef CONFIG_SJCD
+       sjcd_init();
+#endif
+#ifdef CONFIG_APBLOCK
+       ap_init();
+#endif
+#ifdef CONFIG_DDV
+       ddv_init();
+#endif
+#ifdef CONFIG_MDISK
+       mdisk_init();
+#endif
+#ifdef CONFIG_DASD
+       dasd_init();
+#endif
+#if defined(CONFIG_S390_TAPE) && defined(CONFIG_S390_TAPE_BLOCK)
+       tapeblock_init();
+#endif
+#ifdef CONFIG_BLK_DEV_XPRAM
+        xpram_init();
+#endif
+
+#ifdef CONFIG_SUN_JSFLASH
+       jsfd_init();
+#endif
+       return 0;
+};
+
+EXPORT_SYMBOL(io_request_lock);
+EXPORT_SYMBOL(end_that_request_first);
+EXPORT_SYMBOL(end_that_request_last);
+EXPORT_SYMBOL(blk_grow_request_list);
+EXPORT_SYMBOL(blk_init_queue);
+EXPORT_SYMBOL(blk_get_queue);
+EXPORT_SYMBOL(blk_cleanup_queue);
+EXPORT_SYMBOL(blk_queue_headactive);
+EXPORT_SYMBOL(blk_queue_make_request);
+EXPORT_SYMBOL(generic_make_request);
+EXPORT_SYMBOL(blkdev_release_request);
+EXPORT_SYMBOL(req_finished_io);
+EXPORT_SYMBOL(generic_unplug_device);
diff --git a/xen-2.4.16/drivers/ide/Makefile b/xen-2.4.16/drivers/ide/Makefile
new file mode 100644 (file)
index 0000000..574b7d2
--- /dev/null
@@ -0,0 +1,8 @@
+
+include $(BASEDIR)/Rules.mk
+
+default: $(OBJS)
+       $(LD) -r -o driver.o $(OBJS)
+
+clean:
+       rm -f *.o *~ core
diff --git a/xen-2.4.16/drivers/ide/ide-disk.c b/xen-2.4.16/drivers/ide/ide-disk.c
new file mode 100644 (file)
index 0000000..984e53c
--- /dev/null
@@ -0,0 +1,1550 @@
+/*
+ *  linux/drivers/ide/ide-disk.c       Version 1.10    June 9, 2000
+ *
+ *  Copyright (C) 1994-1998  Linus Torvalds & authors (see below)
+ */
+
+/*
+ *  Mostly written by Mark Lord <mlord@pobox.com>
+ *                and Gadi Oxman <gadio@netvision.net.il>
+ *                and Andre Hedrick <andre@linux-ide.org>
+ *
+ * This is the IDE/ATA disk driver, as evolved from hd.c and ide.c.
+ *
+ * Version 1.00                move disk only code from ide.c to ide-disk.c
+ *                     support optional byte-swapping of all data
+ * Version 1.01                fix previous byte-swapping code
+ * Version 1.02                remove ", LBA" from drive identification msgs
+ * Version 1.03                fix display of id->buf_size for big-endian
+ * Version 1.04                add /proc configurable settings and S.M.A.R.T support
+ * Version 1.05                add capacity support for ATA3 >= 8GB
+ * Version 1.06                get boot-up messages to show full cyl count
+ * Version 1.07                disable door-locking if it fails
+ * Version 1.08                fixed CHS/LBA translations for ATA4 > 8GB,
+ *                     process of adding new ATA4 compliance.
+ *                     fixed problems in allowing fdisk to see
+ *                     the entire disk.
+ * Version 1.09                added increment of rq->sector in ide_multwrite
+ *                     added UDMA 3/4 reporting
+ * Version 1.10                request queue changes, Ultra DMA 100
+ * Version 1.11                added 48-bit lba
+ * Version 1.12                adding taskfile io access method
+ */
+
+#define IDEDISK_VERSION        "1.12"
+
+#undef REALLY_SLOW_IO          /* most systems can safely undef this */
+
+#include <xeno/config.h>
+#include <xeno/module.h>
+#include <xeno/types.h>
+#include <xeno/lib.h>
+#include <xeno/timer.h>
+#include <xeno/mm.h>
+#include <xeno/interrupt.h>
+#include <xeno/major.h>
+#include <xeno/errno.h>
+#include <xeno/genhd.h>
+#include <xeno/slab.h>
+#include <xeno/delay.h>
+#include <xeno/ide.h>
+
+#include <asm/byteorder.h>
+#include <asm/irq.h>
+#include <asm/uaccess.h>
+#include <asm/io.h>
+
+#ifdef CONFIG_BLK_DEV_PDC4030
+#define IS_PDC4030_DRIVE (HWIF(drive)->chipset == ide_pdc4030)
+#else
+#define IS_PDC4030_DRIVE (0)   /* auto-NULLs out pdc4030 code */
+#endif
+
+#ifdef CONFIG_IDE_TASKFILE_IO
+#  undef __TASKFILE__IO /* define __TASKFILE__IO */
+#else /* CONFIG_IDE_TASKFILE_IO */
+#  undef __TASKFILE__IO
+#endif /* CONFIG_IDE_TASKFILE_IO */
+
+#ifndef __TASKFILE__IO
+
+static void idedisk_bswap_data (void *buffer, int wcount)
+{
+       u16 *p = buffer;
+
+       while (wcount--) {
+               *p = *p << 8 | *p >> 8; p++;
+               *p = *p << 8 | *p >> 8; p++;
+       }
+}
+
+static inline void idedisk_input_data (ide_drive_t *drive, void *buffer, unsigned int wcount)
+{
+       ide_input_data(drive, buffer, wcount);
+       if (drive->bswap)
+               idedisk_bswap_data(buffer, wcount);
+}
+
+static inline void idedisk_output_data (ide_drive_t *drive, void *buffer, unsigned int wcount)
+{
+       if (drive->bswap) {
+               idedisk_bswap_data(buffer, wcount);
+               ide_output_data(drive, buffer, wcount);
+               idedisk_bswap_data(buffer, wcount);
+       } else
+               ide_output_data(drive, buffer, wcount);
+}
+
+#endif /* __TASKFILE__IO */
+
+/*
+ * lba_capacity_is_ok() performs a sanity check on the claimed "lba_capacity"
+ * value for this drive (from its reported identification information).
+ *
+ * Returns:    1 if lba_capacity looks sensible
+ *             0 otherwise
+ *
+ * It is called only once for each drive.
+ */
+static int lba_capacity_is_ok (struct hd_driveid *id)
+{
+       unsigned long lba_sects, chs_sects, head, tail;
+
+       if ((id->command_set_2 & 0x0400) && (id->cfs_enable_2 & 0x0400)) {
+               printk("48-bit Drive: %llu \n", id->lba_capacity_2);
+               return 1;
+       }
+
+       /*
+        * The ATA spec tells large drives to return
+        * C/H/S = 16383/16/63 independent of their size.
+        * Some drives can be jumpered to use 15 heads instead of 16.
+        * Some drives can be jumpered to use 4092 cyls instead of 16383.
+        */
+       if ((id->cyls == 16383
+            || (id->cyls == 4092 && id->cur_cyls == 16383)) &&
+           id->sectors == 63 &&
+           (id->heads == 15 || id->heads == 16) &&
+           id->lba_capacity >= 16383*63*id->heads)
+               return 1;
+
+       lba_sects   = id->lba_capacity;
+       chs_sects   = id->cyls * id->heads * id->sectors;
+
+       /* perform a rough sanity check on lba_sects:  within 10% is OK */
+       if ((lba_sects - chs_sects) < chs_sects/10)
+               return 1;
+
+       /* some drives have the word order reversed */
+       head = ((lba_sects >> 16) & 0xffff);
+       tail = (lba_sects & 0xffff);
+       lba_sects = (head | (tail << 16));
+       if ((lba_sects - chs_sects) < chs_sects/10) {
+               id->lba_capacity = lba_sects;
+               return 1;       /* lba_capacity is (now) good */
+       }
+
+       return 0;       /* lba_capacity value may be bad */
+}
+
+#ifndef __TASKFILE__IO
+
+/*
+ * read_intr() is the handler for disk read/multread interrupts
+ */
+static ide_startstop_t read_intr (ide_drive_t *drive)
+{
+       byte stat;
+       int i;
+       unsigned int msect, nsect;
+       struct request *rq;
+
+       /* new way for dealing with premature shared PCI interrupts */
+       if (!OK_STAT(stat=GET_STAT(),DATA_READY,BAD_R_STAT)) {
+               if (stat & (ERR_STAT|DRQ_STAT)) {
+                       return ide_error(drive, "read_intr", stat);
+               }
+               /* no data yet, so wait for another interrupt */
+               ide_set_handler(drive, &read_intr, WAIT_CMD, NULL);
+               return ide_started;
+       }
+       msect = drive->mult_count;
+       
+read_next:
+       rq = HWGROUP(drive)->rq;
+       if (msect) {
+               if ((nsect = rq->current_nr_sectors) > msect)
+                       nsect = msect;
+               msect -= nsect;
+       } else
+               nsect = 1;
+       idedisk_input_data(drive, rq->buffer, nsect * SECTOR_WORDS);
+#ifdef DEBUG
+       printk("%s:  read: sectors(%ld-%ld), buffer=0x%08lx, remaining=%ld\n",
+               drive->name, rq->sector, rq->sector+nsect-1,
+               (unsigned long) rq->buffer+(nsect<<9), rq->nr_sectors-nsect);
+#endif
+       rq->sector += nsect;
+       rq->buffer += nsect<<9;
+       rq->errors = 0;
+       i = (rq->nr_sectors -= nsect);
+       if (((long)(rq->current_nr_sectors -= nsect)) <= 0)
+               ide_end_request(1, HWGROUP(drive));
+       if (i > 0) {
+               if (msect)
+                       goto read_next;
+               ide_set_handler (drive, &read_intr, WAIT_CMD, NULL);
+                return ide_started;
+       }
+        return ide_stopped;
+}
+
+/*
+ * write_intr() is the handler for disk write interrupts
+ */
+static ide_startstop_t write_intr (ide_drive_t *drive)
+{
+       byte stat;
+       int i;
+       ide_hwgroup_t *hwgroup = HWGROUP(drive);
+       struct request *rq = hwgroup->rq;
+
+       if (!OK_STAT(stat=GET_STAT(),DRIVE_READY,drive->bad_wstat)) {
+               printk("%s: write_intr error1: nr_sectors=%ld, stat=0x%02x\n", drive->name, rq->nr_sectors, stat);
+        } else {
+#ifdef DEBUG
+               printk("%s: write: sector %ld, buffer=0x%08lx, remaining=%ld\n",
+                       drive->name, rq->sector, (unsigned long) rq->buffer,
+                       rq->nr_sectors-1);
+#endif
+               if ((rq->nr_sectors == 1) ^ ((stat & DRQ_STAT) != 0)) {
+                       rq->sector++;
+                       rq->buffer += 512;
+                       rq->errors = 0;
+                       i = --rq->nr_sectors;
+                       --rq->current_nr_sectors;
+                       if (((long)rq->current_nr_sectors) <= 0)
+                               ide_end_request(1, hwgroup);
+                       if (i > 0) {
+                               idedisk_output_data (drive, rq->buffer, SECTOR_WORDS);
+                               ide_set_handler (drive, &write_intr, WAIT_CMD, NULL);
+                                return ide_started;
+                       }
+                        return ide_stopped;
+               }
+               return ide_stopped;     /* the original code did this here (?) */
+       }
+       return ide_error(drive, "write_intr", stat);
+}
+
+/*
+ * ide_multwrite() transfers a block of up to mcount sectors of data
+ * to a drive as part of a disk multiple-sector write operation.
+ *
+ * Returns 0 on success.
+ *
+ * Note that we may be called from two contexts - the do_rw_disk context
+ * and IRQ context. The IRQ can happen any time after we've output the
+ * full "mcount" number of sectors, so we must make sure we update the
+ * state _before_ we output the final part of the data!
+ */
+int ide_multwrite (ide_drive_t *drive, unsigned int mcount)
+{
+       ide_hwgroup_t   *hwgroup= HWGROUP(drive);
+       struct request  *rq = &hwgroup->wrq;
+       do {
+               char *buffer;
+               int nsect = rq->current_nr_sectors;
+               if (nsect > mcount)
+                       nsect = mcount;
+               mcount -= nsect;
+               buffer = rq->buffer;
+
+               rq->sector += nsect;
+               rq->buffer += nsect << 9;
+               rq->nr_sectors -= nsect;
+               rq->current_nr_sectors -= nsect;
+
+               /* Do we move to the next bh after this? */
+               if (!rq->current_nr_sectors) {
+                       struct buffer_head *bh = rq->bh->b_reqnext;
+
+                       /* end early early we ran out of requests */
+                       if (!bh) {
+                               mcount = 0;
+                       } else {
+                               rq->bh = bh;
+                               rq->current_nr_sectors = bh->b_size >> 9;
+                               rq->buffer             = bh->b_data;
+                       }
+               }
+
+               /*
+                * Ok, we're all setup for the interrupt
+                * re-entering us on the last transfer.
+                */
+               idedisk_output_data(drive, buffer, nsect<<7);
+       } while (mcount);
+
+        return 0;
+}
+
+/*
+ * multwrite_intr() is the handler for disk multwrite interrupts
+ */
+static ide_startstop_t multwrite_intr (ide_drive_t *drive)
+{
+       byte stat;
+       int i;
+       ide_hwgroup_t *hwgroup = HWGROUP(drive);
+       struct request *rq = &hwgroup->wrq;
+
+       if (OK_STAT(stat=GET_STAT(),DRIVE_READY,drive->bad_wstat)) {
+               if (stat & DRQ_STAT) {
+                       /*
+                        *      The drive wants data. Remember rq is the copy
+                        *      of the request
+                        */
+                       if (rq->nr_sectors) {
+                               if (ide_multwrite(drive, drive->mult_count))
+                                       return ide_stopped;
+                               ide_set_handler (drive, &multwrite_intr, WAIT_CMD, NULL);
+                               return ide_started;
+                       }
+               } else {
+                       /*
+                        *      If the copy has all the blocks completed then
+                        *      we can end the original request.
+                        */
+                       if (!rq->nr_sectors) {  /* all done? */
+                               rq = hwgroup->rq;
+                               for (i = rq->nr_sectors; i > 0;){
+                                       i -= rq->current_nr_sectors;
+                                       ide_end_request(1, hwgroup);
+                               }
+                               return ide_stopped;
+                       }
+               }
+               return ide_stopped;     /* the original code did this here (?) */
+       }
+       return ide_error(drive, "multwrite_intr", stat);
+}
+#endif /* __TASKFILE__IO */
+
+#ifdef __TASKFILE__IO
+
+static ide_startstop_t chs_rw_disk (ide_drive_t *drive, struct request *rq, unsigned long block);
+static ide_startstop_t lba_28_rw_disk (ide_drive_t *drive, struct request *rq, unsigned long block);
+static ide_startstop_t lba_48_rw_disk (ide_drive_t *drive, struct request *rq, unsigned long long block);
+
+/*
+ * do_rw_disk() issues READ and WRITE commands to a disk,
+ * using LBA if supported, or CHS otherwise, to address sectors.
+ * It also takes care of issuing special DRIVE_CMDs.
+ */
+static ide_startstop_t do_rw_disk (ide_drive_t *drive, struct request *rq, unsigned long block)
+{
+       if (rq->cmd == READ)
+               goto good_command;
+       if (rq->cmd == WRITE)
+               goto good_command;
+
+       printk(KERN_ERR "%s: bad command: %d\n", drive->name, rq->cmd);
+       ide_end_request(0, HWGROUP(drive));
+       return ide_stopped;
+
+good_command:
+
+#ifdef CONFIG_BLK_DEV_PDC4030
+       if (IS_PDC4030_DRIVE) {
+               extern ide_startstop_t promise_rw_disk(ide_drive_t *, struct request *, unsigned long);
+               return promise_rw_disk(drive, rq, block);
+       }
+#endif /* CONFIG_BLK_DEV_PDC4030 */
+
+       if ((drive->id->cfs_enable_2 & 0x0400) && (drive->addressing))  /* 48-bit LBA */
+               return lba_48_rw_disk(drive, rq, (unsigned long long) block);
+       if (drive->select.b.lba)                /* 28-bit LBA */
+               return lba_28_rw_disk(drive, rq, (unsigned long) block);
+
+       /* 28-bit CHS : DIE DIE DIE piece of legacy crap!!! */
+       return chs_rw_disk(drive, rq, (unsigned long) block);
+}
+
+static task_ioreg_t get_command (ide_drive_t *drive, int cmd)
+{
+       int lba48bit = (drive->id->cfs_enable_2 & 0x0400) ? 1 : 0;
+
+#if 1
+       lba48bit = drive->addressing;
+#endif
+
+       if ((cmd == READ) && (drive->using_dma))
+               return (lba48bit) ? WIN_READDMA_EXT : WIN_READDMA;
+       else if ((cmd == READ) && (drive->mult_count))
+               return (lba48bit) ? WIN_MULTREAD_EXT : WIN_MULTREAD;
+       else if (cmd == READ)
+               return (lba48bit) ? WIN_READ_EXT : WIN_READ;
+       else if ((cmd == WRITE) && (drive->using_dma))
+               return (lba48bit) ? WIN_WRITEDMA_EXT : WIN_WRITEDMA;
+       else if ((cmd == WRITE) && (drive->mult_count))
+               return (lba48bit) ? WIN_MULTWRITE_EXT : WIN_MULTWRITE;
+       else if (cmd == WRITE)
+               return (lba48bit) ? WIN_WRITE_EXT : WIN_WRITE;
+       else
+               return WIN_NOP;
+}
+
+static ide_startstop_t chs_rw_disk (ide_drive_t *drive, struct request *rq, unsigned long block)
+{
+       struct hd_drive_task_hdr        taskfile;
+       struct hd_drive_hob_hdr         hobfile;
+       ide_task_t                      args;
+
+       task_ioreg_t command    = get_command(drive, rq->cmd);
+       unsigned int track      = (block / drive->sect);
+       unsigned int sect       = (block % drive->sect) + 1;
+       unsigned int head       = (track % drive->head);
+       unsigned int cyl        = (track / drive->head);
+
+       memset(&taskfile, 0, sizeof(task_struct_t));
+       memset(&hobfile, 0, sizeof(hob_struct_t));
+
+       taskfile.sector_count   = (rq->nr_sectors==256)?0x00:rq->nr_sectors;
+       taskfile.sector_number  = sect;
+       taskfile.low_cylinder   = cyl;
+       taskfile.high_cylinder  = (cyl>>8);
+       taskfile.device_head    = head;
+       taskfile.device_head    |= drive->select.all;
+       taskfile.command        = command;
+
+#ifdef DEBUG
+       printk("%s: %sing: ", drive->name, (rq->cmd==READ) ? "read" : "writ");
+       if (lba)        printk("LBAsect=%lld, ", block);
+       else            printk("CHS=%d/%d/%d, ", cyl, head, sect);
+       printk("sectors=%ld, ", rq->nr_sectors);
+       printk("buffer=0x%08lx\n", (unsigned long) rq->buffer);
+#endif
+
+       memcpy(args.tfRegister, &taskfile, sizeof(struct hd_drive_task_hdr));
+       memcpy(args.hobRegister, &hobfile, sizeof(struct hd_drive_hob_hdr));
+       args.command_type       = ide_cmd_type_parser(&args);
+       args.prehandler         = ide_pre_handler_parser(&taskfile, &hobfile);
+       args.handler            = ide_handler_parser(&taskfile, &hobfile);
+       args.posthandler        = NULL;
+       args.rq                 = (struct request *) rq;
+       args.block              = block;
+       rq->special             = NULL;
+       rq->special             = (ide_task_t *)&args;
+
+       return do_rw_taskfile(drive, &args);
+}
+
+static ide_startstop_t lba_28_rw_disk (ide_drive_t *drive, struct request *rq, unsigned long block)
+{
+       struct hd_drive_task_hdr        taskfile;
+       struct hd_drive_hob_hdr         hobfile;
+       ide_task_t                      args;
+
+       task_ioreg_t command    = get_command(drive, rq->cmd);
+
+       memset(&taskfile, 0, sizeof(task_struct_t));
+       memset(&hobfile, 0, sizeof(hob_struct_t));
+
+       taskfile.sector_count   = (rq->nr_sectors==256)?0x00:rq->nr_sectors;
+       taskfile.sector_number  = block;
+       taskfile.low_cylinder   = (block>>=8);
+       taskfile.high_cylinder  = (block>>=8);
+       taskfile.device_head    = ((block>>8)&0x0f);
+       taskfile.device_head    |= drive->select.all;
+       taskfile.command        = command;
+
+
+#ifdef DEBUG
+       printk("%s: %sing: ", drive->name, (rq->cmd==READ) ? "read" : "writ");
+       if (lba)        printk("LBAsect=%lld, ", block);
+       else            printk("CHS=%d/%d/%d, ", cyl, head, sect);
+       printk("sectors=%ld, ", rq->nr_sectors);
+       printk("buffer=0x%08lx\n", (unsigned long) rq->buffer);
+#endif
+
+       memcpy(args.tfRegister, &taskfile, sizeof(struct hd_drive_task_hdr));
+       memcpy(args.hobRegister, &hobfile, sizeof(struct hd_drive_hob_hdr));
+       args.command_type       = ide_cmd_type_parser(&args);
+       args.prehandler         = ide_pre_handler_parser(&taskfile, &hobfile);
+       args.handler            = ide_handler_parser(&taskfile, &hobfile);
+       args.posthandler        = NULL;
+       args.rq                 = (struct request *) rq;
+       args.block              = block;
+       rq->special             = NULL;
+       rq->special             = (ide_task_t *)&args;
+
+       return do_rw_taskfile(drive, &args);
+}
+
+/*
+ * 268435455  == 137439 MB or 28bit limit
+ * 320173056  == 163929 MB or 48bit addressing
+ * 1073741822 == 549756 MB or 48bit addressing fake drive
+ */
+
+static ide_startstop_t lba_48_rw_disk (ide_drive_t *drive, struct request *rq, unsigned long long block)
+{
+       struct hd_drive_task_hdr        taskfile;
+       struct hd_drive_hob_hdr         hobfile;
+       ide_task_t                      args;
+
+       task_ioreg_t command    = get_command(drive, rq->cmd);
+
+       memset(&taskfile, 0, sizeof(task_struct_t));
+       memset(&hobfile, 0, sizeof(hob_struct_t));
+
+       taskfile.sector_count   = rq->nr_sectors;
+       hobfile.sector_count    = (rq->nr_sectors>>8);
+
+       if (rq->nr_sectors == 65536) {
+               taskfile.sector_count   = 0x00;
+               hobfile.sector_count    = 0x00;
+       }
+
+       taskfile.sector_number  = block;        /* low lba */
+       taskfile.low_cylinder   = (block>>=8);  /* mid lba */
+       taskfile.high_cylinder  = (block>>=8);  /* hi  lba */
+       hobfile.sector_number   = (block>>=8);  /* low lba */
+       hobfile.low_cylinder    = (block>>=8);  /* mid lba */
+       hobfile.high_cylinder   = (block>>=8);  /* hi  lba */
+       taskfile.device_head    = drive->select.all;
+       hobfile.device_head     = taskfile.device_head;
+       hobfile.control         = (drive->ctl|0x80);
+       taskfile.command        = command;
+
+#ifdef DEBUG
+       printk("%s: %sing: ", drive->name, (rq->cmd==READ) ? "read" : "writ");
+       if (lba)        printk("LBAsect=%lld, ", block);
+       else            printk("CHS=%d/%d/%d, ", cyl, head, sect);
+       printk("sectors=%ld, ", rq->nr_sectors);
+       printk("buffer=0x%08lx\n", (unsigned long) rq->buffer);
+#endif
+
+       memcpy(args.tfRegister, &taskfile, sizeof(struct hd_drive_task_hdr));
+       memcpy(args.hobRegister, &hobfile, sizeof(struct hd_drive_hob_hdr));
+       args.command_type       = ide_cmd_type_parser(&args);
+       args.prehandler         = ide_pre_handler_parser(&taskfile, &hobfile);
+       args.handler            = ide_handler_parser(&taskfile, &hobfile);
+       args.posthandler        = NULL;
+       args.rq                 = (struct request *) rq;
+       args.block              = block;
+       rq->special             = NULL;
+       rq->special             = (ide_task_t *)&args;
+
+       return do_rw_taskfile(drive, &args);
+}
+
+#else /* !__TASKFILE__IO */
+/*
+ * do_rw_disk() issues READ and WRITE commands to a disk,
+ * using LBA if supported, or CHS otherwise, to address sectors.
+ * It also takes care of issuing special DRIVE_CMDs.
+ */
+static ide_startstop_t do_rw_disk (ide_drive_t *drive, struct request *rq, unsigned long block)
+{
+       if (IDE_CONTROL_REG)
+               OUT_BYTE(drive->ctl,IDE_CONTROL_REG);
+
+#ifdef CONFIG_BLK_DEV_PDC4030
+       if (drive->select.b.lba || IS_PDC4030_DRIVE) {
+#else /* !CONFIG_BLK_DEV_PDC4030 */
+       if (drive->select.b.lba) {
+#endif /* CONFIG_BLK_DEV_PDC4030 */
+
+               if ((drive->id->cfs_enable_2 & 0x0400) && (drive->addressing)) {
+                       task_ioreg_t tasklets[10];
+
+                       tasklets[0] = 0;
+                       tasklets[1] = 0;
+                       tasklets[2] = rq->nr_sectors;
+                       tasklets[3] = (rq->nr_sectors>>8);
+                       if (rq->nr_sectors == 65536) {
+                               tasklets[2] = 0x00;
+                               tasklets[3] = 0x00;
+                       }
+                       tasklets[4] = (task_ioreg_t) block;
+                       tasklets[5] = (task_ioreg_t) (block>>8);
+                       tasklets[6] = (task_ioreg_t) (block>>16);
+                       tasklets[7] = (task_ioreg_t) (block>>24);
+                       tasklets[8] = (task_ioreg_t) 0;
+                       tasklets[9] = (task_ioreg_t) 0;
+//                     tasklets[8] = (task_ioreg_t) (block>>32);
+//                     tasklets[9] = (task_ioreg_t) (block>>40);
+#ifdef DEBUG
+                       printk("%s: %sing: LBAsect=%lu, sectors=%ld, buffer=0x%08lx, LBAsect=0x%012lx\n",
+                               drive->name,
+                               (rq->cmd==READ)?"read":"writ",
+                               block,
+                               rq->nr_sectors,
+                               (unsigned long) rq->buffer,
+                               block);
+                       printk("%s: 0x%02x%02x 0x%02x%02x%02x%02x%02x%02x\n",
+                               drive->name, tasklets[3], tasklets[2],
+                               tasklets[9], tasklets[8], tasklets[7],
+                               tasklets[6], tasklets[5], tasklets[4]);
+#endif
+                       OUT_BYTE(tasklets[1], IDE_FEATURE_REG);
+                       OUT_BYTE(tasklets[3], IDE_NSECTOR_REG);
+                       OUT_BYTE(tasklets[7], IDE_SECTOR_REG);
+                       OUT_BYTE(tasklets[8], IDE_LCYL_REG);
+                       OUT_BYTE(tasklets[9], IDE_HCYL_REG);
+
+                       OUT_BYTE(tasklets[0], IDE_FEATURE_REG);
+                       OUT_BYTE(tasklets[2], IDE_NSECTOR_REG);
+                       OUT_BYTE(tasklets[4], IDE_SECTOR_REG);
+                       OUT_BYTE(tasklets[5], IDE_LCYL_REG);
+                       OUT_BYTE(tasklets[6], IDE_HCYL_REG);
+                       OUT_BYTE(0x00|drive->select.all,IDE_SELECT_REG);
+               } else {
+#ifdef DEBUG
+                       printk("%s: %sing: LBAsect=%ld, sectors=%ld, buffer=0x%08lx\n",
+                               drive->name, (rq->cmd==READ)?"read":"writ",
+                               block, rq->nr_sectors, (unsigned long) rq->buffer);
+#endif
+                       OUT_BYTE(0x00, IDE_FEATURE_REG);
+                       OUT_BYTE((rq->nr_sectors==256)?0x00:rq->nr_sectors,IDE_NSECTOR_REG);
+                       OUT_BYTE(block,IDE_SECTOR_REG);
+                       OUT_BYTE(block>>=8,IDE_LCYL_REG);
+                       OUT_BYTE(block>>=8,IDE_HCYL_REG);
+                       OUT_BYTE(((block>>8)&0x0f)|drive->select.all,IDE_SELECT_REG);
+               }
+       } else {
+               unsigned int sect,head,cyl,track;
+               track = block / drive->sect;
+               sect  = block % drive->sect + 1;
+               OUT_BYTE(sect,IDE_SECTOR_REG);
+               head  = track % drive->head;
+               cyl   = track / drive->head;
+
+               OUT_BYTE(0x00, IDE_FEATURE_REG);
+               OUT_BYTE((rq->nr_sectors==256)?0x00:rq->nr_sectors,IDE_NSECTOR_REG);
+               OUT_BYTE(cyl,IDE_LCYL_REG);
+               OUT_BYTE(cyl>>8,IDE_HCYL_REG);
+               OUT_BYTE(head|drive->select.all,IDE_SELECT_REG);
+#ifdef DEBUG
+               printk("%s: %sing: CHS=%d/%d/%d, sectors=%ld, buffer=0x%08lx\n",
+                       drive->name, (rq->cmd==READ)?"read":"writ", cyl,
+                       head, sect, rq->nr_sectors, (unsigned long) rq->buffer);
+#endif
+       }
+#ifdef CONFIG_BLK_DEV_PDC4030
+       if (IS_PDC4030_DRIVE) {
+               extern ide_startstop_t do_pdc4030_io(ide_drive_t *, struct request *);
+               return do_pdc4030_io (drive, rq);
+       }
+#endif /* CONFIG_BLK_DEV_PDC4030 */
+       if (rq->cmd == READ) {
+#ifdef CONFIG_BLK_DEV_IDEDMA
+               if (drive->using_dma && !(HWIF(drive)->dmaproc(ide_dma_read, drive)))
+                       return ide_started;
+#endif /* CONFIG_BLK_DEV_IDEDMA */
+               ide_set_handler(drive, &read_intr, WAIT_CMD, NULL);
+               if ((drive->id->cfs_enable_2 & 0x0400) && (drive->addressing)) {
+                       OUT_BYTE(drive->mult_count ? WIN_MULTREAD_EXT : WIN_READ_EXT, IDE_COMMAND_REG);
+               } else {
+                       OUT_BYTE(drive->mult_count ? WIN_MULTREAD : WIN_READ, IDE_COMMAND_REG);
+               }
+               return ide_started;
+       }
+       if (rq->cmd == WRITE) {
+               ide_startstop_t startstop;
+#ifdef CONFIG_BLK_DEV_IDEDMA
+               if (drive->using_dma && !(HWIF(drive)->dmaproc(ide_dma_write, drive)))
+                       return ide_started;
+#endif /* CONFIG_BLK_DEV_IDEDMA */
+               if ((drive->id->cfs_enable_2 & 0x0400) && (drive->addressing)) {
+                       OUT_BYTE(drive->mult_count ? WIN_MULTWRITE_EXT : WIN_WRITE_EXT, IDE_COMMAND_REG);
+               } else {
+                       OUT_BYTE(drive->mult_count ? WIN_MULTWRITE : WIN_WRITE, IDE_COMMAND_REG);
+               }
+               if (ide_wait_stat(&startstop, drive, DATA_READY, drive->bad_wstat, WAIT_DRQ)) {
+                       printk(KERN_ERR "%s: no DRQ after issuing %s\n", drive->name,
+                               drive->mult_count ? "MULTWRITE" : "WRITE");
+                       return startstop;
+               }
+               if (!drive->unmask)
+                       __cli();        /* local CPU only */
+               if (drive->mult_count) {
+                       ide_hwgroup_t *hwgroup = HWGROUP(drive);
+       /*
+        * Ugh.. this part looks ugly because we MUST set up
+        * the interrupt handler before outputting the first block
+        * of data to be written.  If we hit an error (corrupted buffer list)
+        * in ide_multwrite(), then we need to remove the handler/timer
+        * before returning.  Fortunately, this NEVER happens (right?).
+        *
+        * Except when you get an error it seems...
+        */
+                       hwgroup->wrq = *rq; /* scratchpad */
+                       ide_set_handler(drive, &multwrite_intr, WAIT_CMD, NULL);
+                       if (ide_multwrite(drive, drive->mult_count)) {
+                               unsigned long flags;
+                               spin_lock_irqsave(&io_request_lock, flags);
+                               hwgroup->handler = NULL;
+                               del_timer(&hwgroup->timer);
+                               spin_unlock_irqrestore(&io_request_lock, flags);
+                               return ide_stopped;
+                       }
+               } else {
+                       ide_set_handler (drive, &write_intr, WAIT_CMD, NULL);
+                       idedisk_output_data(drive, rq->buffer, SECTOR_WORDS);
+               }
+               return ide_started;
+       }
+       printk(KERN_ERR "%s: bad command: %d\n", drive->name, rq->cmd);
+       ide_end_request(0, HWGROUP(drive));
+       return ide_stopped;
+}
+
+#endif /* __TASKFILE__IO */
+
+static int idedisk_open (struct inode *inode, struct file *filp, ide_drive_t *drive)
+{
+       MOD_INC_USE_COUNT;
+       if (drive->removable && drive->usage == 1) {
+               struct hd_drive_task_hdr taskfile;
+               struct hd_drive_hob_hdr hobfile;
+               memset(&taskfile, 0, sizeof(struct hd_drive_task_hdr));
+               memset(&hobfile, 0, sizeof(struct hd_drive_hob_hdr));
+               taskfile.command = WIN_DOORLOCK;
+               check_disk_change(inode->i_rdev);
+               /*
+                * Ignore the return code from door_lock,
+                * since the open() has already succeeded,
+                * and the door_lock is irrelevant at this point.
+                */
+               if (drive->doorlocking && ide_wait_taskfile(drive, &taskfile, &hobfile, NULL))
+                       drive->doorlocking = 0;
+       }
+       return 0;
+}
+
+static int do_idedisk_flushcache(ide_drive_t *drive);
+
+static void idedisk_release (struct inode *inode, struct file *filp, ide_drive_t *drive)
+{
+       if (drive->removable && !drive->usage) {
+               struct hd_drive_task_hdr taskfile;
+               struct hd_drive_hob_hdr hobfile;
+               memset(&taskfile, 0, sizeof(struct hd_drive_task_hdr));
+               memset(&hobfile, 0, sizeof(struct hd_drive_hob_hdr));
+               taskfile.command = WIN_DOORUNLOCK;
+               invalidate_bdev(inode->i_bdev, 0);
+               if (drive->doorlocking && ide_wait_taskfile(drive, &taskfile, &hobfile, NULL))
+                       drive->doorlocking = 0;
+       }
+       if ((drive->id->cfs_enable_2 & 0x3000) && drive->wcache)
+               if (do_idedisk_flushcache(drive))
+                       printk (KERN_INFO "%s: Write Cache FAILED Flushing!\n",
+                               drive->name);
+       MOD_DEC_USE_COUNT;
+}
+
+static int idedisk_media_change (ide_drive_t *drive)
+{
+       return drive->removable;        /* if removable, always assume it was changed */
+}
+
+static void idedisk_revalidate (ide_drive_t *drive)
+{
+       grok_partitions(HWIF(drive)->gd, drive->select.b.unit,
+                       1<<PARTN_BITS,
+                       current_capacity(drive));
+}
+
+/*
+ * Queries for true maximum capacity of the drive.
+ * Returns maximum LBA address (> 0) of the drive, 0 if failed.
+ */
+static unsigned long idedisk_read_native_max_address(ide_drive_t *drive)
+{
+       ide_task_t args;
+       unsigned long addr = 0;
+
+       if (!(drive->id->command_set_1 & 0x0400) &&
+           !(drive->id->cfs_enable_2 & 0x0100))
+               return addr;
+
+       /* Create IDE/ATA command request structure */
+       memset(&args, 0, sizeof(ide_task_t));
+       args.tfRegister[IDE_SELECT_OFFSET]      = 0x40;
+       args.tfRegister[IDE_COMMAND_OFFSET]     = WIN_READ_NATIVE_MAX;
+       args.handler                            = task_no_data_intr;
+
+       /* submit command request */
+       ide_raw_taskfile(drive, &args, NULL);
+
+       /* if OK, compute maximum address value */
+       if ((args.tfRegister[IDE_STATUS_OFFSET] & 0x01) == 0) {
+               addr = ((args.tfRegister[IDE_SELECT_OFFSET] & 0x0f) << 24)
+                    | ((args.tfRegister[  IDE_HCYL_OFFSET]       ) << 16)
+                    | ((args.tfRegister[  IDE_LCYL_OFFSET]       ) <<  8)
+                    | ((args.tfRegister[IDE_SECTOR_OFFSET]       ));
+       }
+       addr++; /* since the return value is (maxlba - 1), we add 1 */
+       return addr;
+}
+
+static unsigned long long idedisk_read_native_max_address_ext(ide_drive_t *drive)
+{
+       ide_task_t args;
+       unsigned long long addr = 0;
+
+       /* Create IDE/ATA command request structure */
+       memset(&args, 0, sizeof(ide_task_t));
+
+       args.tfRegister[IDE_SELECT_OFFSET]      = 0x40;
+       args.tfRegister[IDE_COMMAND_OFFSET]     = WIN_READ_NATIVE_MAX_EXT;
+       args.handler                            = task_no_data_intr;
+
+        /* submit command request */
+        ide_raw_taskfile(drive, &args, NULL);
+
+       /* if OK, compute maximum address value */
+       if ((args.tfRegister[IDE_STATUS_OFFSET] & 0x01) == 0) {
+               u32 high = ((args.hobRegister[IDE_HCYL_OFFSET_HOB])<<16) |
+                          ((args.hobRegister[IDE_LCYL_OFFSET_HOB])<<8) |
+                           (args.hobRegister[IDE_SECTOR_OFFSET_HOB]); 
+               u32 low  = ((args.tfRegister[IDE_HCYL_OFFSET])<<16) |
+                          ((args.tfRegister[IDE_LCYL_OFFSET])<<8) |
+                           (args.tfRegister[IDE_SECTOR_OFFSET]);
+               addr = ((__u64)high << 24) | low;
+       }
+       addr++; /* since the return value is (maxlba - 1), we add 1 */
+       return addr;
+}
+
+#ifdef CONFIG_IDEDISK_STROKE
+/*
+ * Sets maximum virtual LBA address of the drive.
+ * Returns new maximum virtual LBA address (> 0) or 0 on failure.
+ */
+static unsigned long idedisk_set_max_address(ide_drive_t *drive, unsigned long addr_req)
+{
+       ide_task_t args;
+       unsigned long addr_set = 0;
+       
+       addr_req--;
+       /* Create IDE/ATA command request structure */
+       memset(&args, 0, sizeof(ide_task_t));
+       args.tfRegister[IDE_SECTOR_OFFSET]      = ((addr_req >>  0) & 0xff);
+       args.tfRegister[IDE_LCYL_OFFSET]        = ((addr_req >>  8) & 0xff);
+       args.tfRegister[IDE_HCYL_OFFSET]        = ((addr_req >> 16) & 0xff);
+       args.tfRegister[IDE_SELECT_OFFSET]      = ((addr_req >> 24) & 0x0f) | 0x40;
+       args.tfRegister[IDE_COMMAND_OFFSET]     = WIN_SET_MAX;
+       args.handler                            = task_no_data_intr;
+       /* submit command request */
+       ide_raw_taskfile(drive, &args, NULL);
+       /* if OK, read new maximum address value */
+       if ((args.tfRegister[IDE_STATUS_OFFSET] & 0x01) == 0) {
+               addr_set = ((args.tfRegister[IDE_SELECT_OFFSET] & 0x0f) << 24)
+                        | ((args.tfRegister[  IDE_HCYL_OFFSET]       ) << 16)
+                        | ((args.tfRegister[  IDE_LCYL_OFFSET]       ) <<  8)
+                        | ((args.tfRegister[IDE_SECTOR_OFFSET]       ));
+       }
+       addr_set++;
+       return addr_set;
+}
+
+static unsigned long long idedisk_set_max_address_ext(ide_drive_t *drive, unsigned long long addr_req)
+{
+       ide_task_t args;
+       unsigned long long addr_set = 0;
+
+       addr_req--;
+       /* Create IDE/ATA command request structure */
+       memset(&args, 0, sizeof(ide_task_t));
+       args.tfRegister[IDE_SECTOR_OFFSET]      = ((addr_req >>  0) & 0xff);
+       args.tfRegister[IDE_LCYL_OFFSET]        = ((addr_req >>= 8) & 0xff);
+       args.tfRegister[IDE_HCYL_OFFSET]        = ((addr_req >>= 8) & 0xff);
+       args.tfRegister[IDE_SELECT_OFFSET]      = 0x40;
+       args.tfRegister[IDE_COMMAND_OFFSET]     = WIN_SET_MAX_EXT;
+       args.hobRegister[IDE_SECTOR_OFFSET_HOB] = ((addr_req >>= 8) & 0xff);
+       args.hobRegister[IDE_LCYL_OFFSET_HOB]   = ((addr_req >>= 8) & 0xff);
+       args.hobRegister[IDE_HCYL_OFFSET_HOB]   = ((addr_req >>= 8) & 0xff);
+       args.hobRegister[IDE_SELECT_OFFSET_HOB] = 0x40;
+       args.hobRegister[IDE_CONTROL_OFFSET_HOB]= (drive->ctl|0x80);
+        args.handler                           = task_no_data_intr;
+       /* submit command request */
+       ide_raw_taskfile(drive, &args, NULL);
+       /* if OK, compute maximum address value */
+       if ((args.tfRegister[IDE_STATUS_OFFSET] & 0x01) == 0) {
+               u32 high = ((args.hobRegister[IDE_HCYL_OFFSET_HOB])<<16) |
+                          ((args.hobRegister[IDE_LCYL_OFFSET_HOB])<<8) |
+                           (args.hobRegister[IDE_SECTOR_OFFSET_HOB]);
+               u32 low  = ((args.tfRegister[IDE_HCYL_OFFSET])<<16) |
+                          ((args.tfRegister[IDE_LCYL_OFFSET])<<8) |
+                           (args.tfRegister[IDE_SECTOR_OFFSET]);
+               addr_set = ((__u64)high << 24) | low;
+       }
+       return addr_set;
+}
+
+/*
+ * Tests if the drive supports Host Protected Area feature.
+ * Returns true if supported, false otherwise.
+ */
+static inline int idedisk_supports_host_protected_area(ide_drive_t *drive)
+{
+       int flag = (drive->id->cfs_enable_1 & 0x0400) ? 1 : 0;
+       printk("%s: host protected area => %d\n", drive->name, flag);
+       return flag;
+}
+
+#endif /* CONFIG_IDEDISK_STROKE */
+
+/*
+ * Compute drive->capacity, the full capacity of the drive
+ * Called with drive->id != NULL.
+ *
+ * To compute capacity, this uses either of
+ *
+ *    1. CHS value set by user       (whatever user sets will be trusted)
+ *    2. LBA value from target drive (require new ATA feature)
+ *    3. LBA value from system BIOS  (new one is OK, old one may break)
+ *    4. CHS value from system BIOS  (traditional style)
+ *
+ * in above order (i.e., if value of higher priority is available,
+ * reset will be ignored).
+ */
+static void init_idedisk_capacity (ide_drive_t  *drive)
+{
+       struct hd_driveid *id = drive->id;
+       unsigned long capacity = drive->cyl * drive->head * drive->sect;
+       unsigned long set_max = idedisk_read_native_max_address(drive);
+       unsigned long long capacity_2 = capacity;
+       unsigned long long set_max_ext;
+
+       drive->capacity48 = 0;
+       drive->select.b.lba = 0;
+
+       if (id->cfs_enable_2 & 0x0400) {
+               capacity_2 = id->lba_capacity_2;
+               drive->head             = drive->bios_head = 255;
+               drive->sect             = drive->bios_sect = 63;
+               drive->cyl = (unsigned int) capacity_2 / (drive->head * drive->sect);
+               drive->select.b.lba     = 1;
+               set_max_ext = idedisk_read_native_max_address_ext(drive);
+               if (set_max_ext > capacity_2) {
+#ifdef CONFIG_IDEDISK_STROKE
+                       set_max_ext = idedisk_read_native_max_address_ext(drive);
+                       set_max_ext = idedisk_set_max_address_ext(drive, set_max_ext);
+                       if (set_max_ext) {
+                               drive->capacity48 = capacity_2 = set_max_ext;
+                               drive->cyl = (unsigned int) set_max_ext / (drive->head * drive->sect);
+                               drive->select.b.lba = 1;
+                               drive->id->lba_capacity_2 = capacity_2;
+                        }
+#else /* !CONFIG_IDEDISK_STROKE */
+                       printk("%s: setmax_ext LBA %llu, native  %llu\n",
+                               drive->name, set_max_ext, capacity_2);
+#endif /* CONFIG_IDEDISK_STROKE */
+               }
+               drive->bios_cyl         = drive->cyl;
+               drive->capacity48       = capacity_2;
+               drive->capacity         = (unsigned long) capacity_2;
+               return;
+       /* Determine capacity, and use LBA if the drive properly supports it */
+       } else if ((id->capability & 2) && lba_capacity_is_ok(id)) {
+               capacity = id->lba_capacity;
+               drive->cyl = capacity / (drive->head * drive->sect);
+               drive->select.b.lba = 1;
+       }
+
+       if (set_max > capacity) {
+#ifdef CONFIG_IDEDISK_STROKE
+               set_max = idedisk_read_native_max_address(drive);
+               set_max = idedisk_set_max_address(drive, set_max);
+               if (set_max) {
+                       drive->capacity = capacity = set_max;
+                       drive->cyl = set_max / (drive->head * drive->sect);
+                       drive->select.b.lba = 1;
+                       drive->id->lba_capacity = capacity;
+               }
+#else /* !CONFIG_IDEDISK_STROKE */
+               printk("%s: setmax LBA %lu, native  %lu\n",
+                       drive->name, set_max, capacity);
+#endif /* CONFIG_IDEDISK_STROKE */
+       }
+
+       drive->capacity = capacity;
+
+       if ((id->command_set_2 & 0x0400) && (id->cfs_enable_2 & 0x0400)) {
+                drive->capacity48 = id->lba_capacity_2;
+               drive->head = 255;
+               drive->sect = 63;
+               drive->cyl = (unsigned long)(drive->capacity48) / (drive->head * drive->sect);
+       }
+}
+
+static unsigned long idedisk_capacity (ide_drive_t *drive)
+{
+       if (drive->id->cfs_enable_2 & 0x0400)
+               return (drive->capacity48 - drive->sect0);
+       return (drive->capacity - drive->sect0);
+}
+
+static ide_startstop_t idedisk_special (ide_drive_t *drive)
+{
+       special_t *s = &drive->special;
+
+       if (s->b.set_geometry) {
+               struct hd_drive_task_hdr taskfile;
+               struct hd_drive_hob_hdr hobfile;
+               ide_handler_t *handler = NULL;
+
+               memset(&taskfile, 0, sizeof(struct hd_drive_task_hdr));
+               memset(&hobfile, 0, sizeof(struct hd_drive_hob_hdr));
+
+               s->b.set_geometry       = 0;
+               taskfile.sector_number  = drive->sect;
+               taskfile.low_cylinder   = drive->cyl;
+               taskfile.high_cylinder  = drive->cyl>>8;
+               taskfile.device_head    = ((drive->head-1)|drive->select.all)&0xBF;
+               if (!IS_PDC4030_DRIVE) {
+                       taskfile.sector_count   = drive->sect;
+                       taskfile.command        = WIN_SPECIFY;
+                       handler                 = ide_handler_parser(&taskfile, &hobfile);
+               }
+               do_taskfile(drive, &taskfile, &hobfile, handler);
+       } else if (s->b.recalibrate) {
+               s->b.recalibrate = 0;
+               if (!IS_PDC4030_DRIVE) {
+                       struct hd_drive_task_hdr taskfile;
+                       struct hd_drive_hob_hdr hobfile;
+                       memset(&taskfile, 0, sizeof(struct hd_drive_task_hdr));
+                       memset(&hobfile, 0, sizeof(struct hd_drive_hob_hdr));
+                       taskfile.sector_count   = drive->sect;
+                       taskfile.command        = WIN_RESTORE;
+                       do_taskfile(drive, &taskfile, &hobfile, ide_handler_parser(&taskfile, &hobfile));
+               }
+       } else if (s->b.set_multmode) {
+               s->b.set_multmode = 0;
+               if (drive->id && drive->mult_req > drive->id->max_multsect)
+                       drive->mult_req = drive->id->max_multsect;
+               if (!IS_PDC4030_DRIVE) {
+                       struct hd_drive_task_hdr taskfile;
+                       struct hd_drive_hob_hdr hobfile;
+                       memset(&taskfile, 0, sizeof(struct hd_drive_task_hdr));
+                       memset(&hobfile, 0, sizeof(struct hd_drive_hob_hdr));
+                       taskfile.sector_count   = drive->mult_req;
+                       taskfile.command        = WIN_SETMULT;
+                       do_taskfile(drive, &taskfile, &hobfile, ide_handler_parser(&taskfile, &hobfile));
+               }
+       } else if (s->all) {
+               int special = s->all;
+               s->all = 0;
+               printk(KERN_ERR "%s: bad special flag: 0x%02x\n", drive->name, special);
+               return ide_stopped;
+       }
+       return IS_PDC4030_DRIVE ? ide_stopped : ide_started;
+}
+
+static void idedisk_pre_reset (ide_drive_t *drive)
+{
+       int legacy = (drive->id->cfs_enable_2 & 0x0400) ? 0 : 1;
+
+       drive->special.all = 0;
+       drive->special.b.set_geometry = legacy;
+       drive->special.b.recalibrate  = legacy;
+       if (OK_TO_RESET_CONTROLLER)
+               drive->mult_count = 0;
+       if (!drive->keep_settings && !drive->using_dma)
+               drive->mult_req = 0;
+       if (drive->mult_req != drive->mult_count)
+               drive->special.b.set_multmode = 1;
+}
+
+#ifdef CONFIG_PROC_FS
+
+static int smart_enable(ide_drive_t *drive)
+{
+       struct hd_drive_task_hdr taskfile;
+       struct hd_drive_hob_hdr hobfile;
+       memset(&taskfile, 0, sizeof(struct hd_drive_task_hdr));
+       memset(&hobfile, 0, sizeof(struct hd_drive_hob_hdr));
+       taskfile.feature        = SMART_ENABLE;
+       taskfile.low_cylinder   = SMART_LCYL_PASS;
+       taskfile.high_cylinder  = SMART_HCYL_PASS;
+       taskfile.command        = WIN_SMART;
+       return ide_wait_taskfile(drive, &taskfile, &hobfile, NULL);
+}
+
+static int get_smart_values(ide_drive_t *drive, byte *buf)
+{
+       struct hd_drive_task_hdr taskfile;
+       struct hd_drive_hob_hdr hobfile;
+       memset(&taskfile, 0, sizeof(struct hd_drive_task_hdr));
+       memset(&hobfile, 0, sizeof(struct hd_drive_hob_hdr));
+       taskfile.feature        = SMART_READ_VALUES;
+       taskfile.sector_count   = 0x01;
+       taskfile.low_cylinder   = SMART_LCYL_PASS;
+       taskfile.high_cylinder  = SMART_HCYL_PASS;
+       taskfile.command        = WIN_SMART;
+       (void) smart_enable(drive);
+       return ide_wait_taskfile(drive, &taskfile, &hobfile, buf);
+}
+
+static int get_smart_thresholds(ide_drive_t *drive, byte *buf)
+{
+       struct hd_drive_task_hdr taskfile;
+       struct hd_drive_hob_hdr hobfile;
+       memset(&taskfile, 0, sizeof(struct hd_drive_task_hdr));
+       memset(&hobfile, 0, sizeof(struct hd_drive_hob_hdr));
+       taskfile.feature        = SMART_READ_THRESHOLDS;
+       taskfile.sector_count   = 0x01;
+       taskfile.low_cylinder   = SMART_LCYL_PASS;
+       taskfile.high_cylinder  = SMART_HCYL_PASS;
+       taskfile.command        = WIN_SMART;
+       (void) smart_enable(drive);
+       return ide_wait_taskfile(drive, &taskfile, &hobfile, buf);
+}
+
+static int proc_idedisk_read_cache
+       (char *page, char **start, off_t off, int count, int *eof, void *data)
+{
+       ide_drive_t     *drive = (ide_drive_t *) data;
+       char            *out = page;
+       int             len;
+
+       if (drive->id)
+               len = sprintf(out,"%i\n", drive->id->buf_size / 2);
+       else
+               len = sprintf(out,"(none)\n");
+       PROC_IDE_READ_RETURN(page,start,off,count,eof,len);
+}
+
+static int proc_idedisk_read_smart_thresholds
+       (char *page, char **start, off_t off, int count, int *eof, void *data)
+{
+       ide_drive_t     *drive = (ide_drive_t *)data;
+       int             len = 0, i = 0;
+
+       if (!get_smart_thresholds(drive, page)) {
+               unsigned short *val = (unsigned short *) page;
+               char *out = ((char *)val) + (SECTOR_WORDS * 4);
+               page = out;
+               do {
+                       out += sprintf(out, "%04x%c", le16_to_cpu(*val), (++i & 7) ? ' ' : '\n');
+                       val += 1;
+               } while (i < (SECTOR_WORDS * 2));
+               len = out - page;
+       }
+       PROC_IDE_READ_RETURN(page,start,off,count,eof,len);
+}
+
+static int proc_idedisk_read_smart_values
+       (char *page, char **start, off_t off, int count, int *eof, void *data)
+{
+       ide_drive_t     *drive = (ide_drive_t *)data;
+       int             len = 0, i = 0;
+
+       if (!get_smart_values(drive, page)) {
+               unsigned short *val = (unsigned short *) page;
+               char *out = ((char *)val) + (SECTOR_WORDS * 4);
+               page = out;
+               do {
+                       out += sprintf(out, "%04x%c", le16_to_cpu(*val), (++i & 7) ? ' ' : '\n');
+                       val += 1;
+               } while (i < (SECTOR_WORDS * 2));
+               len = out - page;
+       }
+       PROC_IDE_READ_RETURN(page,start,off,count,eof,len);
+}
+
+static ide_proc_entry_t idedisk_proc[] = {
+       { "cache",              S_IFREG|S_IRUGO,        proc_idedisk_read_cache,                NULL },
+       { "geometry",           S_IFREG|S_IRUGO,        proc_ide_read_geometry,                 NULL },
+       { "smart_values",       S_IFREG|S_IRUSR,        proc_idedisk_read_smart_values,         NULL },
+       { "smart_thresholds",   S_IFREG|S_IRUSR,        proc_idedisk_read_smart_thresholds,     NULL },
+       { NULL, 0, NULL, NULL }
+};
+
+#else
+
+#define        idedisk_proc    NULL
+
+#endif /* CONFIG_PROC_FS */
+
+static int set_multcount(ide_drive_t *drive, int arg)
+{
+#ifdef __TASKFILE__IO
+       struct hd_drive_task_hdr taskfile;
+       struct hd_drive_hob_hdr hobfile;
+
+       if (drive->special.b.set_multmode)
+               return -EBUSY;
+
+       memset(&taskfile, 0, sizeof(struct hd_drive_task_hdr));
+       memset(&hobfile, 0, sizeof(struct hd_drive_hob_hdr));
+       taskfile.sector_count   = drive->mult_req;
+       taskfile.command        = WIN_SETMULT;
+       drive->mult_req         = arg;
+       drive->special.b.set_multmode = 1;
+       ide_wait_taskfile(drive, &taskfile, &hobfile, NULL);
+#else /* !__TASKFILE__IO */
+       struct request rq;
+
+       if (drive->special.b.set_multmode)
+               return -EBUSY;
+       ide_init_drive_cmd (&rq);
+       rq.cmd = IDE_DRIVE_CMD;
+       drive->mult_req = arg;
+       drive->special.b.set_multmode = 1;
+       (void) ide_do_drive_cmd (drive, &rq, ide_wait);
+#endif /* __TASKFILE__IO */
+       return (drive->mult_count == arg) ? 0 : -EIO;
+}
+
+static int set_nowerr(ide_drive_t *drive, int arg)
+{
+       if (ide_spin_wait_hwgroup(drive))
+               return -EBUSY;
+       drive->nowerr = arg;
+       drive->bad_wstat = arg ? BAD_R_STAT : BAD_W_STAT;
+       spin_unlock_irq(&io_request_lock);
+       return 0;
+}
+
+static int write_cache (ide_drive_t *drive, int arg)
+{
+       struct hd_drive_task_hdr taskfile;
+       struct hd_drive_hob_hdr hobfile;
+       memset(&taskfile, 0, sizeof(struct hd_drive_task_hdr));
+       memset(&hobfile, 0, sizeof(struct hd_drive_hob_hdr));
+       taskfile.feature        = (arg) ? SETFEATURES_EN_WCACHE : SETFEATURES_DIS_WCACHE;
+       taskfile.command        = WIN_SETFEATURES;
+
+       if (!(drive->id->cfs_enable_2 & 0x3000))
+               return 1;
+
+       (void) ide_wait_taskfile(drive, &taskfile, &hobfile, NULL);
+       drive->wcache = arg;
+       return 0;
+}
+
+static int do_idedisk_standby (ide_drive_t *drive)
+{
+       struct hd_drive_task_hdr taskfile;
+       struct hd_drive_hob_hdr hobfile;
+       memset(&taskfile, 0, sizeof(struct hd_drive_task_hdr));
+       memset(&hobfile, 0, sizeof(struct hd_drive_hob_hdr));
+       taskfile.command        = WIN_STANDBYNOW1;
+       return ide_wait_taskfile(drive, &taskfile, &hobfile, NULL);
+}
+
+static int do_idedisk_flushcache (ide_drive_t *drive)
+{
+       struct hd_drive_task_hdr taskfile;
+       struct hd_drive_hob_hdr hobfile;
+       memset(&taskfile, 0, sizeof(struct hd_drive_task_hdr));
+       memset(&hobfile, 0, sizeof(struct hd_drive_hob_hdr));
+       if (drive->id->cfs_enable_2 & 0x2400) {
+               taskfile.command        = WIN_FLUSH_CACHE_EXT;
+       } else {
+               taskfile.command        = WIN_FLUSH_CACHE;
+       }
+       return ide_wait_taskfile(drive, &taskfile, &hobfile, NULL);
+}
+
+static int set_acoustic (ide_drive_t *drive, int arg)
+{
+       struct hd_drive_task_hdr taskfile;
+       struct hd_drive_hob_hdr hobfile;
+       memset(&taskfile, 0, sizeof(struct hd_drive_task_hdr));
+       memset(&hobfile, 0, sizeof(struct hd_drive_hob_hdr));
+
+       taskfile.feature        = (arg)?SETFEATURES_EN_AAM:SETFEATURES_DIS_AAM;
+       taskfile.sector_count   = arg;
+
+       taskfile.command        = WIN_SETFEATURES;
+       (void) ide_wait_taskfile(drive, &taskfile, &hobfile, NULL);
+       drive->acoustic = arg;
+       return 0;
+}
+
+static int probe_lba_addressing (ide_drive_t *drive, int arg)
+{
+       drive->addressing =  0;
+
+       if (!(drive->id->cfs_enable_2 & 0x0400))
+                return -EIO;
+
+       drive->addressing = arg;
+       return 0;
+}
+
+static int set_lba_addressing (ide_drive_t *drive, int arg)
+{
+       return (probe_lba_addressing(drive, arg));
+}
+
+static void idedisk_add_settings(ide_drive_t *drive)
+{
+       struct hd_driveid *id = drive->id;
+#if 0
+       int major = HWIF(drive)->major;
+       int minor = drive->select.b.unit << PARTN_BITS;
+#endif
+
+       ide_add_setting(drive,  "bios_cyl",             SETTING_RW,                                     -1,                     -1,                     TYPE_INT,       0,      65535,                          1,      1,      &drive->bios_cyl,               NULL);
+       ide_add_setting(drive,  "bios_head",            SETTING_RW,                                     -1,                     -1,                     TYPE_BYTE,      0,      255,                            1,      1,      &drive->bios_head,              NULL);
+       ide_add_setting(drive,  "bios_sect",            SETTING_RW,                                     -1,                     -1,                     TYPE_BYTE,      0,      63,                             1,      1,      &drive->bios_sect,              NULL);
+       ide_add_setting(drive,  "address",              SETTING_RW,                                     HDIO_GET_ADDRESS,       HDIO_SET_ADDRESS,       TYPE_INTA,      0,      2,                              1,      1,      &drive->addressing,     set_lba_addressing);
+       ide_add_setting(drive,  "bswap",                SETTING_READ,                                   -1,                     -1,                     TYPE_BYTE,      0,      1,                              1,      1,      &drive->bswap,                  NULL);
+       ide_add_setting(drive,  "multcount",            id ? SETTING_RW : SETTING_READ,                 HDIO_GET_MULTCOUNT,     HDIO_SET_MULTCOUNT,     TYPE_BYTE,      0,      id ? id->max_multsect : 0,      1,      1,      &drive->mult_count,             set_multcount);
+       ide_add_setting(drive,  "nowerr",               SETTING_RW,                                     HDIO_GET_NOWERR,        HDIO_SET_NOWERR,        TYPE_BYTE,      0,      1,                              1,      1,      &drive->nowerr,                 set_nowerr);
+#if 0
+       ide_add_setting(drive,  "breada_readahead",     SETTING_RW,                                     BLKRAGET,               BLKRASET,               TYPE_INT,       0,      255,                            1,      1,      &read_ahead[major],             NULL);
+       ide_add_setting(drive,  "file_readahead",       SETTING_RW,                                     BLKFRAGET,              BLKFRASET,              TYPE_INTA,      0,      4096,                   PAGE_SIZE,      1024,   &max_readahead[major][minor],   NULL);
+       ide_add_setting(drive,  "max_kb_per_request",   SETTING_RW,                                     BLKSECTGET,             BLKSECTSET,             TYPE_INTA,      1,      255,                            1,      1,      &max_sectors[major][minor],     NULL);
+#endif
+       ide_add_setting(drive,  "lun",                  SETTING_RW,                                     -1,                     -1,                     TYPE_INT,       0,      7,                              1,      1,      &drive->lun,                    NULL);
+       ide_add_setting(drive,  "wcache",               SETTING_RW,                                     HDIO_GET_WCACHE,        HDIO_SET_WCACHE,        TYPE_BYTE,      0,      1,                              1,      1,      &drive->wcache,                 write_cache);
+       ide_add_setting(drive,  "acoustic",             SETTING_RW,                                     HDIO_GET_ACOUSTIC,      HDIO_SET_ACOUSTIC,      TYPE_BYTE,      0,      254,                            1,      1,      &drive->acoustic,               set_acoustic);
+       ide_add_setting(drive,  "failures",             SETTING_RW,                                     -1,                     -1,                     TYPE_INT,       0,      65535,                          1,      1,      &drive->failures,               NULL);
+       ide_add_setting(drive,  "max_failures",         SETTING_RW,                                     -1,                     -1,                     TYPE_INT,       0,      65535,                          1,      1,      &drive->max_failures,           NULL);
+}
+
+static void idedisk_setup (ide_drive_t *drive)
+{
+       int i;
+       
+       struct hd_driveid *id = drive->id;
+       unsigned long capacity;
+       
+       idedisk_add_settings(drive);
+
+       if (id == NULL)
+               return;
+
+       /*
+        * CompactFlash cards and their brethern look just like hard drives
+        * to us, but they are removable and don't have a doorlock mechanism.
+        */
+       if (drive->removable && !drive_is_flashcard(drive)) {
+               /*
+                * Removable disks (eg. SYQUEST); ignore 'WD' drives 
+                */
+               if (id->model[0] != 'W' || id->model[1] != 'D') {
+                       drive->doorlocking = 1;
+               }
+       }
+       for (i = 0; i < MAX_DRIVES; ++i) {
+               ide_hwif_t *hwif = HWIF(drive);
+
+               if (drive != &hwif->drives[i]) continue;
+#ifdef DEVFS_MUST_DIE
+               hwif->gd->de_arr[i] = drive->de;
+#endif
+               if (drive->removable)
+                       hwif->gd->flags[i] |= GENHD_FL_REMOVABLE;
+               break;
+       }
+
+       /* Extract geometry if we did not already have one for the drive */
+       if (!drive->cyl || !drive->head || !drive->sect) {
+               drive->cyl     = drive->bios_cyl  = id->cyls;
+               drive->head    = drive->bios_head = id->heads;
+               drive->sect    = drive->bios_sect = id->sectors;
+       }
+
+       /* Handle logical geometry translation by the drive */
+       if ((id->field_valid & 1) && id->cur_cyls &&
+           id->cur_heads && (id->cur_heads <= 16) && id->cur_sectors) {
+               drive->cyl  = id->cur_cyls;
+               drive->head = id->cur_heads;
+               drive->sect = id->cur_sectors;
+       }
+
+       /* Use physical geometry if what we have still makes no sense */
+       if (drive->head > 16 && id->heads && id->heads <= 16) {
+               drive->cyl  = id->cyls;
+               drive->head = id->heads;
+               drive->sect = id->sectors;
+       }
+
+       /* calculate drive capacity, and select LBA if possible */
+       init_idedisk_capacity (drive);
+
+       /*
+        * if possible, give fdisk access to more of the drive,
+        * by correcting bios_cyls:
+        */
+       capacity = idedisk_capacity (drive);
+       if ((capacity >= (drive->bios_cyl * drive->bios_sect * drive->bios_head)) &&
+           (!drive->forced_geom) && drive->bios_sect && drive->bios_head)
+               drive->bios_cyl = (capacity / drive->bios_sect) / drive->bios_head;
+       printk (KERN_INFO "XEN %s: %ld sectors", drive->name, capacity);
+
+       /* Give size in megabytes (MB), not mebibytes (MiB). */
+       /* We compute the exact rounded value, avoiding overflow. */
+       printk (" (%ld MB)", (capacity - capacity/625 + 974)/1950);
+
+       /* Only print cache size when it was specified */
+       if (id->buf_size)
+               printk (" w/%dKiB Cache", id->buf_size/2);
+
+       printk(", CHS=%d/%d/%d", 
+              drive->bios_cyl, drive->bios_head, drive->bios_sect);
+#ifdef CONFIG_BLK_DEV_IDEDMA
+       if (drive->using_dma)
+               (void) HWIF(drive)->dmaproc(ide_dma_verbose, drive);
+#endif /* CONFIG_BLK_DEV_IDEDMA */
+       printk("\n");
+
+       drive->mult_count = 0;
+       if (id->max_multsect) {
+#ifdef CONFIG_IDEDISK_MULTI_MODE
+               id->multsect = ((id->max_multsect/2) > 1) ? id->max_multsect : 0;
+               id->multsect_valid = id->multsect ? 1 : 0;
+               drive->mult_req = id->multsect_valid ? id->max_multsect : INITIAL_MULT_COUNT;
+               drive->special.b.set_multmode = drive->mult_req ? 1 : 0;
+#else  /* original, pre IDE-NFG, per request of AC */
+               drive->mult_req = INITIAL_MULT_COUNT;
+               if (drive->mult_req > id->max_multsect)
+                       drive->mult_req = id->max_multsect;
+               if (drive->mult_req || ((id->multsect_valid & 1) && id->multsect))
+                       drive->special.b.set_multmode = 1;
+#endif /* CONFIG_IDEDISK_MULTI_MODE */
+       }
+       drive->no_io_32bit = id->dword_io ? 1 : 0;
+       if (drive->id->cfs_enable_2 & 0x3000)
+               write_cache(drive, (id->cfs_enable_2 & 0x3000));
+       (void) probe_lba_addressing(drive, 1);
+}
+
+static int idedisk_cleanup (ide_drive_t *drive)
+{
+       if ((drive->id->cfs_enable_2 & 0x3000) && drive->wcache)
+               if (do_idedisk_flushcache(drive))
+                       printk (KERN_INFO "%s: Write Cache FAILED Flushing!\n",
+                               drive->name);
+       return ide_unregister_subdriver(drive);
+}
+
+int idedisk_reinit(ide_drive_t *drive);
+
+/*
+ *      IDE subdriver functions, registered with ide.c
+ */
+static ide_driver_t idedisk_driver = {
+       name:                   "ide-disk",
+       version:                IDEDISK_VERSION,
+       media:                  ide_disk,
+       busy:                   0,
+       supports_dma:           1,
+       supports_dsc_overlap:   0,
+       cleanup:                idedisk_cleanup,
+       standby:                do_idedisk_standby,
+       flushcache:             do_idedisk_flushcache,
+       do_request:             do_rw_disk,
+       end_request:            NULL,
+       ioctl:                  NULL,
+       open:                   idedisk_open,
+       release:                idedisk_release,
+       media_change:           idedisk_media_change,
+       revalidate:             idedisk_revalidate,
+       pre_reset:              idedisk_pre_reset,
+       capacity:               idedisk_capacity,
+       special:                idedisk_special,
+       /*proc:                 idedisk_proc,*/
+       reinit:                 idedisk_reinit,
+       ata_prebuilder:         NULL,
+       atapi_prebuilder:       NULL,
+};
+
+int idedisk_init (void);
+static ide_module_t idedisk_module = {
+       IDE_DRIVER_MODULE,
+       idedisk_init,
+       &idedisk_driver,
+       NULL
+};
+
+MODULE_DESCRIPTION("ATA DISK Driver");
+
+int idedisk_reinit (ide_drive_t *drive)
+{
+       int failed = 0;
+
+       MOD_INC_USE_COUNT;
+
+       if (ide_register_subdriver (drive, &idedisk_driver, IDE_SUBDRIVER_VERSION)) {
+               printk (KERN_ERR "ide-disk: %s: Failed to register the driver with ide.c\n", drive->name);
+               return 1;
+       }
+       DRIVER(drive)->busy++;
+       idedisk_setup(drive);
+       if ((!drive->head || drive->head > 16) && !drive->select.b.lba) {
+               printk(KERN_ERR "%s: INVALID GEOMETRY: %d PHYSICAL HEADS?\n", drive->name, drive->head);
+               (void) idedisk_cleanup(drive);
+               DRIVER(drive)->busy--;
+               return 1;
+       }
+       DRIVER(drive)->busy--;
+       failed--;
+
+       ide_register_module(&idedisk_module);
+       MOD_DEC_USE_COUNT;
+       return 0;
+}
+
+static void __exit idedisk_exit (void)
+{
+       ide_drive_t *drive;
+       int failed = 0;
+
+       while ((drive = ide_scan_devices (ide_disk, idedisk_driver.name, &idedisk_driver, failed)) != NULL) {
+               if (idedisk_cleanup (drive)) {
+                       printk (KERN_ERR "%s: cleanup_module() called while still busy\n", drive->name);
+                       failed++;
+               }
+               /* We must remove proc entries defined in this module.
+                  Otherwise we oops while accessing these entries */
+#ifdef CONFIG_PROC_FS
+               if (drive->proc)
+                       ide_remove_proc_entries(drive->proc, idedisk_proc);
+#endif
+       }
+       ide_unregister_module(&idedisk_module);
+}
+
+int idedisk_init (void)
+{
+       ide_drive_t *drive;
+       int failed = 0;
+       
+       MOD_INC_USE_COUNT;
+       while ((drive = ide_scan_devices (ide_disk, idedisk_driver.name, NULL, failed++)) != NULL) {
+               if (ide_register_subdriver (drive, &idedisk_driver, IDE_SUBDRIVER_VERSION)) {
+                       printk (KERN_ERR "ide-disk: %s: Failed to register the driver with ide.c\n", drive->name);
+                       continue;
+               }
+               DRIVER(drive)->busy++;
+               idedisk_setup(drive);
+               if ((!drive->head || drive->head > 16) && !drive->select.b.lba) {
+                       printk(KERN_ERR "%s: INVALID GEOMETRY: %d PHYSICAL HEADS?\n", drive->name, drive->head);
+                       (void) idedisk_cleanup(drive);
+                       DRIVER(drive)->busy--;
+                       continue;
+               }
+               DRIVER(drive)->busy--;
+               failed--;
+       }
+       ide_register_module(&idedisk_module);
+       MOD_DEC_USE_COUNT;
+       return 0;
+}
+
+module_init(idedisk_init);
+module_exit(idedisk_exit);
+MODULE_LICENSE("GPL");
diff --git a/xen-2.4.16/drivers/ide/ide-dma.c b/xen-2.4.16/drivers/ide/ide-dma.c
new file mode 100644 (file)
index 0000000..f0cd6cc
--- /dev/null
@@ -0,0 +1,867 @@
+/*
+ *  linux/drivers/ide/ide-dma.c                Version 4.10    June 9, 2000
+ *
+ *  Copyright (c) 1999-2000    Andre Hedrick <andre@linux-ide.org>
+ *  May be copied or modified under the terms of the GNU General Public License
+ */
+
+/*
+ *  Special Thanks to Mark for his Six years of work.
+ *
+ *  Copyright (c) 1995-1998  Mark Lord
+ *  May be copied or modified under the terms of the GNU General Public License
+ */
+
+/*
+ * This module provides support for the bus-master IDE DMA functions
+ * of various PCI chipsets, including the Intel PIIX (i82371FB for
+ * the 430 FX chipset), the PIIX3 (i82371SB for the 430 HX/VX and 
+ * 440 chipsets), and the PIIX4 (i82371AB for the 430 TX chipset)
+ * ("PIIX" stands for "PCI ISA IDE Xcellerator").
+ *
+ * Pretty much the same code works for other IDE PCI bus-mastering chipsets.
+ *
+ * DMA is supported for all IDE devices (disk drives, cdroms, tapes, floppies).
+ *
+ * By default, DMA support is prepared for use, but is currently enabled only
+ * for drives which already have DMA enabled (UltraDMA or mode 2 multi/single),
+ * or which are recognized as "good" (see table below).  Drives with only mode0
+ * or mode1 (multi/single) DMA should also work with this chipset/driver
+ * (eg. MC2112A) but are not enabled by default.
+ *
+ * Use "hdparm -i" to view modes supported by a given drive.
+ *
+ * The hdparm-3.5 (or later) utility can be used for manually enabling/disabling
+ * DMA support, but must be (re-)compiled against this kernel version or later.
+ *
+ * To enable DMA, use "hdparm -d1 /dev/hd?" on a per-drive basis after booting.
+ * If problems arise, ide.c will disable DMA operation after a few retries.
+ * This error recovery mechanism works and has been extremely well exercised.
+ *
+ * IDE drives, depending on their vintage, may support several different modes
+ * of DMA operation.  The boot-time modes are indicated with a "*" in
+ * the "hdparm -i" listing, and can be changed with *knowledgeable* use of
+ * the "hdparm -X" feature.  There is seldom a need to do this, as drives
+ * normally power-up with their "best" PIO/DMA modes enabled.
+ *
+ * Testing has been done with a rather extensive number of drives,
+ * with Quantum & Western Digital models generally outperforming the pack,
+ * and Fujitsu & Conner (and some Seagate which are really Conner) drives
+ * showing more lackluster throughput.
+ *
+ * Keep an eye on /var/adm/messages for "DMA disabled" messages.
+ *
+ * Some people have reported trouble with Intel Zappa motherboards.
+ * This can be fixed by upgrading the AMI BIOS to version 1.00.04.BS0,
+ * available from ftp://ftp.intel.com/pub/bios/10004bs0.exe
+ * (thanks to Glen Morrell <glen@spin.Stanford.edu> for researching this).
+ *
+ * Thanks to "Christopher J. Reimer" <reimer@doe.carleton.ca> for
+ * fixing the problem with the BIOS on some Acer motherboards.
+ *
+ * Thanks to "Benoit Poulot-Cazajous" <poulot@chorus.fr> for testing
+ * "TX" chipset compatibility and for providing patches for the "TX" chipset.
+ *
+ * Thanks to Christian Brunner <chb@muc.de> for taking a good first crack
+ * at generic DMA -- his patches were referred to when preparing this code.
+ *
+ * Most importantly, thanks to Robert Bringman <rob@mars.trion.com>
+ * for supplying a Promise UDMA board & WD UDMA drive for this work!
+ *
+ * And, yes, Intel Zappa boards really *do* use both PIIX IDE ports.
+ *
+ * check_drive_lists(ide_drive_t *drive, int good_bad)
+ *
+ * ATA-66/100 and recovery functions, I forgot the rest......
+ * SELECT_READ_WRITE(hwif,drive,func) for active tuning based on IO direction.
+ *
+ */
+
+#include <xeno/config.h>
+#include <xeno/types.h>
+#include <xeno/lib.h>
+#include <xeno/timer.h>
+#include <xeno/mm.h>
+#include <xeno/interrupt.h>
+#include <xeno/pci.h>
+#include <xeno/init.h>
+#include <xeno/ide.h>
+
+#include <asm/byteorder.h>
+#include <asm/io.h>
+#include <asm/irq.h>
+
+/*
+ * Long lost data from 2.0.34 that is now in 2.0.39
+ *
+ * This was used in ./drivers/block/triton.c to do DMA Base address setup
+ * when PnP failed.  Oh the things we forget.  I believe this was part
+ * of SFF-8038i that has been withdrawn from public access... :-((
+ */
+#define DEFAULT_BMIBA  0xe800  /* in case BIOS did not init it */
+#define DEFAULT_BMCRBA 0xcc00  /* VIA's default value */
+#define DEFAULT_BMALIBA        0xd400  /* ALI's default value */
+
+extern char *ide_dmafunc_verbose(ide_dma_action_t dmafunc);
+
+#ifdef CONFIG_IDEDMA_NEW_DRIVE_LISTINGS
+
+struct drive_list_entry {
+       char * id_model;
+       char * id_firmware;
+};
+
+struct drive_list_entry drive_whitelist [] = {
+
+       { "Micropolis 2112A"    ,       "ALL"           },
+       { "CONNER CTMA 4000"    ,       "ALL"           },
+       { "CONNER CTT8000-A"    ,       "ALL"           },
+       { "ST34342A"            ,       "ALL"           },
+       { 0                     ,       0               }
+};
+
+struct drive_list_entry drive_blacklist [] = {
+
+       { "WDC AC11000H"        ,       "ALL"           },
+       { "WDC AC22100H"        ,       "ALL"           },
+       { "WDC AC31000H"        ,       "ALL"           },
+       { "WDC AC32500H"        ,       "ALL"           },
+       { "WDC AC33100H"        ,       "ALL"           },
+       { "WDC AC31600H"        ,       "ALL"           },
+       { "WDC AC32100H"        ,       "24.09P07"      },
+       { "WDC AC23200L"        ,       "21.10N21"      },
+       { "Compaq CRD-8241B"    ,       "ALL"           },
+       { "CRD-8400B"           ,       "ALL"           },
+       { "CRD-8480B",                  "ALL"           },
+       { "CRD-8480C",                  "ALL"           },
+       { "CRD-8482B",                  "ALL"           },
+       { "CRD-84"              ,       "ALL"           },
+       { "SanDisk SDP3B"       ,       "ALL"           },
+       { "SanDisk SDP3B-64"    ,       "ALL"           },
+       { "SANYO CD-ROM CRD"    ,       "ALL"           },
+       { "HITACHI CDR-8"       ,       "ALL"           },
+       { "HITACHI CDR-8335"    ,       "ALL"           },
+       { "HITACHI CDR-8435"    ,       "ALL"           },
+       { "Toshiba CD-ROM XM-6202B"     ,       "ALL"           },
+       { "CD-532E-A"           ,       "ALL"           },
+       { "E-IDE CD-ROM CR-840",        "ALL"           },
+       { "CD-ROM Drive/F5A",   "ALL"           },
+       { "RICOH CD-R/RW MP7083A",      "ALL"           },
+       { "WPI CDD-820",                "ALL"           },
+       { "SAMSUNG CD-ROM SC-148C",     "ALL"           },
+       { "SAMSUNG CD-ROM SC-148F",     "ALL"           },
+       { "SAMSUNG CD-ROM SC",  "ALL"           },
+       { "SanDisk SDP3B-64"    ,       "ALL"           },
+       { "SAMSUNG CD-ROM SN-124",      "ALL"           },
+       { "PLEXTOR CD-R PX-W8432T",     "ALL"           },
+       { "ATAPI CD-ROM DRIVE 40X MAXIMUM",     "ALL"           },
+       { "_NEC DV5800A",               "ALL"           },  
+       { 0                     ,       0               }
+
+};
+
+int in_drive_list(struct hd_driveid *id, struct drive_list_entry * drive_table)
+{
+       for ( ; drive_table->id_model ; drive_table++)
+               if ((!strcmp(drive_table->id_model, id->model)) &&
+                   ((!strstr(drive_table->id_firmware, id->fw_rev)) ||
+                    (!strcmp(drive_table->id_firmware, "ALL"))))
+                       return 1;
+       return 0;
+}
+
+#else /* !CONFIG_IDEDMA_NEW_DRIVE_LISTINGS */
+
+/*
+ * good_dma_drives() lists the model names (from "hdparm -i")
+ * of drives which do not support mode2 DMA but which are
+ * known to work fine with this interface under Linux.
+ */
+const char *good_dma_drives[] = {"Micropolis 2112A",
+                                "CONNER CTMA 4000",
+                                "CONNER CTT8000-A",
+                                "ST34342A",    /* for Sun Ultra */
+                                NULL};
+
+/*
+ * bad_dma_drives() lists the model names (from "hdparm -i")
+ * of drives which supposedly support (U)DMA but which are
+ * known to corrupt data with this interface under Linux.
+ *
+ * This is an empirical list. Its generated from bug reports. That means
+ * while it reflects actual problem distributions it doesn't answer whether
+ * the drive or the controller, or cabling, or software, or some combination
+ * thereof is the fault. If you don't happen to agree with the kernel's 
+ * opinion of your drive - use hdparm to turn DMA on.
+ */
+const char *bad_dma_drives[] = {"WDC AC11000H",
+                               "WDC AC22100H",
+                               "WDC AC32100H",
+                               "WDC AC32500H",
+                               "WDC AC33100H",
+                               "WDC AC31600H",
+                               NULL};
+
+#endif /* CONFIG_IDEDMA_NEW_DRIVE_LISTINGS */
+
+/*
+ * Our Physical Region Descriptor (PRD) table should be large enough
+ * to handle the biggest I/O request we are likely to see.  Since requests
+ * can have no more than 256 sectors, and since the typical blocksize is
+ * two or more sectors, we could get by with a limit of 128 entries here for
+ * the usual worst case.  Most requests seem to include some contiguous blocks,
+ * further reducing the number of table entries required.
+ *
+ * The driver reverts to PIO mode for individual requests that exceed
+ * this limit (possible with 512 byte blocksizes, eg. MSDOS f/s), so handling
+ * 100% of all crazy scenarios here is not necessary.
+ *
+ * As it turns out though, we must allocate a full 4KB page for this,
+ * so the two PRD tables (ide0 & ide1) will each get half of that,
+ * allowing each to have about 256 entries (8 bytes each) from this.
+ */
+#define PRD_BYTES      8
+#define PRD_ENTRIES    (PAGE_SIZE / (2 * PRD_BYTES))
+
+/*
+ * dma_intr() is the handler for disk read/write DMA interrupts
+ */
+ide_startstop_t ide_dma_intr (ide_drive_t *drive)
+{
+       int i;
+       byte stat, dma_stat;
+
+       dma_stat = HWIF(drive)->dmaproc(ide_dma_end, drive);
+       stat = GET_STAT();                      /* get drive status */
+       if (OK_STAT(stat,DRIVE_READY,drive->bad_wstat|DRQ_STAT)) {
+               if (!dma_stat) {
+                       struct request *rq = HWGROUP(drive)->rq;
+                       rq = HWGROUP(drive)->rq;
+                       for (i = rq->nr_sectors; i > 0;) {
+                               i -= rq->current_nr_sectors;
+                               ide_end_request(1, HWGROUP(drive));
+                       }
+                       return ide_stopped;
+               }
+               printk("%s: dma_intr: bad DMA status (dma_stat=%x)\n", 
+                      drive->name, dma_stat);
+       }
+       return ide_error(drive, "dma_intr", stat);
+}
+
+static int ide_build_sglist (ide_hwif_t *hwif, struct request *rq)
+{
+       struct buffer_head *bh;
+       struct scatterlist *sg = hwif->sg_table;
+       int nents = 0;
+
+       if (hwif->sg_dma_active)
+               BUG();
+               
+       if (rq->cmd == READ)
+               hwif->sg_dma_direction = PCI_DMA_FROMDEVICE;
+       else
+               hwif->sg_dma_direction = PCI_DMA_TODEVICE;
+       bh = rq->bh;
+       do {
+               unsigned char *virt_addr = bh->b_data;
+               unsigned int size = bh->b_size;
+
+               if (nents >= PRD_ENTRIES)
+                       return 0;
+
+               while ((bh = bh->b_reqnext) != NULL) {
+                       if ((virt_addr + size) != (unsigned char *) bh->b_data)
+                               break;
+                       size += bh->b_size;
+               }
+               memset(&sg[nents], 0, sizeof(*sg));
+               sg[nents].address = virt_addr;
+               sg[nents].length = size;
+               nents++;
+       } while (bh != NULL);
+
+       return pci_map_sg(hwif->pci_dev, sg, nents, hwif->sg_dma_direction);
+}
+
+static int ide_raw_build_sglist (ide_hwif_t *hwif, struct request *rq)
+{
+       struct scatterlist *sg = hwif->sg_table;
+       int nents = 0;
+       ide_task_t *args = rq->special;
+       unsigned char *virt_addr = rq->buffer;
+       int sector_count = rq->nr_sectors;
+
+//     if ((args->tfRegister[IDE_COMMAND_OFFSET] == WIN_WRITEDMA) ||
+//         (args->tfRegister[IDE_COMMAND_OFFSET] == WIN_WRITEDMA_EXT))
+       if (args->command_type == IDE_DRIVE_TASK_RAW_WRITE)
+               hwif->sg_dma_direction = PCI_DMA_TODEVICE;
+       else
+               hwif->sg_dma_direction = PCI_DMA_FROMDEVICE;
+       
+       if (sector_count > 128) {
+               memset(&sg[nents], 0, sizeof(*sg));
+               sg[nents].address = virt_addr;
+               sg[nents].length = 128  * SECTOR_SIZE;
+               nents++;
+               virt_addr = virt_addr + (128 * SECTOR_SIZE);
+               sector_count -= 128;
+       }
+       memset(&sg[nents], 0, sizeof(*sg));
+       sg[nents].address = virt_addr;
+       sg[nents].length =  sector_count  * SECTOR_SIZE;
+       nents++;
+   
+       return pci_map_sg(hwif->pci_dev, sg, nents, hwif->sg_dma_direction);
+}
+
+/*
+ * ide_build_dmatable() prepares a dma request.
+ * Returns 0 if all went okay, returns 1 otherwise.
+ * May also be invoked from trm290.c
+ */
+int ide_build_dmatable (ide_drive_t *drive, ide_dma_action_t func)
+{
+       unsigned int *table = HWIF(drive)->dmatable_cpu;
+#ifdef CONFIG_BLK_DEV_TRM290
+       unsigned int is_trm290_chipset = (HWIF(drive)->chipset == ide_trm290);
+#else
+       const int is_trm290_chipset = 0;
+#endif
+       unsigned int count = 0;
+       int i;
+       struct scatterlist *sg;
+
+       if (HWGROUP(drive)->rq->cmd == IDE_DRIVE_TASKFILE)
+               HWIF(drive)->sg_nents = i = ide_raw_build_sglist(HWIF(drive), HWGROUP(drive)->rq);
+       else
+               HWIF(drive)->sg_nents = i = ide_build_sglist(HWIF(drive), HWGROUP(drive)->rq);
+
+       if (!i)
+               return 0;
+
+       sg = HWIF(drive)->sg_table;
+       while (i && sg_dma_len(sg)) {
+               u32 cur_addr;
+               u32 cur_len;
+
+               cur_addr = sg_dma_address(sg);
+               cur_len = sg_dma_len(sg);
+
+               /*
+                * Fill in the dma table, without crossing any 64kB boundaries.
+                * Most hardware requires 16-bit alignment of all blocks,
+                * but the trm290 requires 32-bit alignment.
+                */
+
+               while (cur_len) {
+                       if (count++ >= PRD_ENTRIES) {
+                               printk("%s: DMA table too small\n", drive->name);
+                               goto use_pio_instead;
+                       } else {
+                               u32 xcount, bcount = 0x10000 - (cur_addr & 0xffff);
+
+                               if (bcount > cur_len)
+                                       bcount = cur_len;
+                               *table++ = cpu_to_le32(cur_addr);
+                               xcount = bcount & 0xffff;
+                               if (is_trm290_chipset)
+                                       xcount = ((xcount >> 2) - 1) << 16;
+                               if (xcount == 0x0000) {
+                                       /* 
+                                        * Most chipsets correctly interpret a length of 0x0000 as 64KB,
+                                        * but at least one (e.g. CS5530) misinterprets it as zero (!).
+                                        * So here we break the 64KB entry into two 32KB entries instead.
+                                        */
+                                       if (count++ >= PRD_ENTRIES) {
+                                               printk("%s: DMA table too small\n", drive->name);
+                                               goto use_pio_instead;
+                                       }
+                                       *table++ = cpu_to_le32(0x8000);
+                                       *table++ = cpu_to_le32(cur_addr + 0x8000);
+                                       xcount = 0x8000;
+                               }
+                               *table++ = cpu_to_le32(xcount);
+                               cur_addr += bcount;
+                               cur_len -= bcount;
+                       }
+               }
+
+               sg++;
+               i--;
+       }
+
+       if (count) {
+               if (!is_trm290_chipset)
+                       *--table |= cpu_to_le32(0x80000000);
+               return count;
+       }
+       printk("%s: empty DMA table?\n", drive->name);
+use_pio_instead:
+       pci_unmap_sg(HWIF(drive)->pci_dev,
+                    HWIF(drive)->sg_table,
+                    HWIF(drive)->sg_nents,
+                    HWIF(drive)->sg_dma_direction);
+       HWIF(drive)->sg_dma_active = 0;
+       return 0; /* revert to PIO for this request */
+}
+
+/* Teardown mappings after DMA has completed.  */
+void ide_destroy_dmatable (ide_drive_t *drive)
+{
+       struct pci_dev *dev = HWIF(drive)->pci_dev;
+       struct scatterlist *sg = HWIF(drive)->sg_table;
+       int nents = HWIF(drive)->sg_nents;
+
+       pci_unmap_sg(dev, sg, nents, HWIF(drive)->sg_dma_direction);
+       HWIF(drive)->sg_dma_active = 0;
+}
+
+/*
+ *  For both Blacklisted and Whitelisted drives.
+ *  This is setup to be called as an extern for future support
+ *  to other special driver code.
+ */
+int check_drive_lists (ide_drive_t *drive, int good_bad)
+{
+       struct hd_driveid *id = drive->id;
+
+#ifdef CONFIG_IDEDMA_NEW_DRIVE_LISTINGS
+       if (good_bad) {
+               return in_drive_list(id, drive_whitelist);
+       } else {
+               int blacklist = in_drive_list(id, drive_blacklist);
+               if (blacklist)
+                       printk("%s: Disabling (U)DMA for %s\n", drive->name, id->model);
+               return(blacklist);
+       }
+#else /* !CONFIG_IDEDMA_NEW_DRIVE_LISTINGS */
+       const char **list;
+
+       if (good_bad) {
+               /* Consult the list of known "good" drives */
+               list = good_dma_drives;
+               while (*list) {
+                       if (!strcmp(*list++,id->model))
+                               return 1;
+               }
+       } else {
+               /* Consult the list of known "bad" drives */
+               list = bad_dma_drives;
+               while (*list) {
+                       if (!strcmp(*list++,id->model)) {
+                               printk("%s: Disabling (U)DMA for %s\n",
+                                       drive->name, id->model);
+                               return 1;
+                       }
+               }
+       }
+#endif /* CONFIG_IDEDMA_NEW_DRIVE_LISTINGS */
+       return 0;
+}
+
+int report_drive_dmaing (ide_drive_t *drive)
+{
+       struct hd_driveid *id = drive->id;
+
+       if ((id->field_valid & 4) && (eighty_ninty_three(drive)) &&
+           (id->dma_ultra & (id->dma_ultra >> 14) & 3)) {
+               if ((id->dma_ultra >> 15) & 1) {
+                       printk(", UDMA(mode 7)");       /* UDMA BIOS-enabled! */
+               } else {
+                       printk(", UDMA(133)");  /* UDMA BIOS-enabled! */
+               }
+       } else if ((id->field_valid & 4) && (eighty_ninty_three(drive)) &&
+                 (id->dma_ultra & (id->dma_ultra >> 11) & 7)) {
+               if ((id->dma_ultra >> 13) & 1) {
+                       printk(", UDMA(100)");  /* UDMA BIOS-enabled! */
+               } else if ((id->dma_ultra >> 12) & 1) {
+                       printk(", UDMA(66)");   /* UDMA BIOS-enabled! */
+               } else {
+                       printk(", UDMA(44)");   /* UDMA BIOS-enabled! */
+               }
+       } else if ((id->field_valid & 4) &&
+                  (id->dma_ultra & (id->dma_ultra >> 8) & 7)) {
+               if ((id->dma_ultra >> 10) & 1) {
+                       printk(", UDMA(33)");   /* UDMA BIOS-enabled! */
+               } else if ((id->dma_ultra >> 9) & 1) {
+                       printk(", UDMA(25)");   /* UDMA BIOS-enabled! */
+               } else {
+                       printk(", UDMA(16)");   /* UDMA BIOS-enabled! */
+               }
+       } else if (id->field_valid & 4) {
+               printk(", (U)DMA");     /* Can be BIOS-enabled! */
+       } else {
+               printk(", DMA");
+       }
+       return 1;
+}
+
+static int config_drive_for_dma (ide_drive_t *drive)
+{
+       int config_allows_dma = 1;
+       struct hd_driveid *id = drive->id;
+       ide_hwif_t *hwif = HWIF(drive);
+
+#ifdef CONFIG_IDEDMA_ONLYDISK
+       if (drive->media != ide_disk)
+               config_allows_dma = 0;
+#endif
+
+       if (id && (id->capability & 1) && hwif->autodma && config_allows_dma) {
+               /* Consult the list of known "bad" drives */
+               if (ide_dmaproc(ide_dma_bad_drive, drive))
+                       return hwif->dmaproc(ide_dma_off, drive);
+
+               /* Enable DMA on any drive that has UltraDMA (mode 6/7/?) enabled */
+               if ((id->field_valid & 4) && (eighty_ninty_three(drive)))
+                       if ((id->dma_ultra & (id->dma_ultra >> 14) & 2))
+                               return hwif->dmaproc(ide_dma_on, drive);
+               /* Enable DMA on any drive that has UltraDMA (mode 3/4/5) enabled */
+               if ((id->field_valid & 4) && (eighty_ninty_three(drive)))
+                       if ((id->dma_ultra & (id->dma_ultra >> 11) & 7))
+                               return hwif->dmaproc(ide_dma_on, drive);
+               /* Enable DMA on any drive that has UltraDMA (mode 0/1/2) enabled */
+               if (id->field_valid & 4)        /* UltraDMA */
+                       if ((id->dma_ultra & (id->dma_ultra >> 8) & 7))
+                               return hwif->dmaproc(ide_dma_on, drive);
+               /* Enable DMA on any drive that has mode2 DMA (multi or single) enabled */
+               if (id->field_valid & 2)        /* regular DMA */
+                       if ((id->dma_mword & 0x404) == 0x404 || (id->dma_1word & 0x404) == 0x404)
+                               return hwif->dmaproc(ide_dma_on, drive);
+               /* Consult the list of known "good" drives */
+               if (ide_dmaproc(ide_dma_good_drive, drive))
+                       return hwif->dmaproc(ide_dma_on, drive);
+       }
+       return hwif->dmaproc(ide_dma_off_quietly, drive);
+}
+
+#ifndef CONFIG_BLK_DEV_IDEDMA_TIMEOUT
+/*
+ * 1 dmaing, 2 error, 4 intr
+ */
+static int dma_timer_expiry (ide_drive_t *drive)
+{
+       byte dma_stat = inb(HWIF(drive)->dma_base+2);
+
+#ifdef DEBUG
+       printk("%s: dma_timer_expiry: dma status == 0x%02x\n", drive->name, dma_stat);
+#endif /* DEBUG */
+
+#if 0
+       HWGROUP(drive)->expiry = NULL;  /* one free ride for now */
+#endif
+
+       if (dma_stat & 2) {     /* ERROR */
+               byte stat = GET_STAT();
+               return ide_error(drive, "dma_timer_expiry", stat);
+       }
+       if (dma_stat & 1)       /* DMAing */
+               return WAIT_CMD;
+       return 0;
+}
+#else /* CONFIG_BLK_DEV_IDEDMA_TIMEOUT */
+static ide_startstop_t ide_dma_timeout_revovery (ide_drive_t *drive)
+{
+       ide_hwgroup_t *hwgroup  = HWGROUP(drive);
+       ide_hwif_t *hwif        = HWIF(drive);
+       int enable_dma          = drive->using_dma;
+       unsigned long flags;
+       ide_startstop_t startstop;
+
+       spin_lock_irqsave(&io_request_lock, flags);
+       hwgroup->handler = NULL;
+       del_timer(&hwgroup->timer);
+       spin_unlock_irqrestore(&io_request_lock, flags);
+
+       drive->waiting_for_dma = 0;
+
+       startstop = ide_do_reset(drive);
+
+       if ((enable_dma) && !(drive->using_dma))
+               (void) hwif->dmaproc(ide_dma_on, drive);
+
+       return startstop;
+}
+#endif /* CONFIG_BLK_DEV_IDEDMA_TIMEOUT */
+
+/*
+ * ide_dmaproc() initiates/aborts DMA read/write operations on a drive.
+ *
+ * The caller is assumed to have selected the drive and programmed the drive's
+ * sector address using CHS or LBA.  All that remains is to prepare for DMA
+ * and then issue the actual read/write DMA/PIO command to the drive.
+ *
+ * For ATAPI devices, we just prepare for DMA and return. The caller should
+ * then issue the packet command to the drive and call us again with
+ * ide_dma_begin afterwards.
+ *
+ * Returns 0 if all went well.
+ * Returns 1 if DMA read/write could not be started, in which case
+ * the caller should revert to PIO for the current request.
+ * May also be invoked from trm290.c
+ */
+int ide_dmaproc (ide_dma_action_t func, ide_drive_t *drive)
+{
+//     ide_hwgroup_t *hwgroup  = HWGROUP(drive);
+       ide_hwif_t *hwif                = HWIF(drive);
+       unsigned long dma_base          = hwif->dma_base;
+       byte unit                       = (drive->select.b.unit & 0x01);
+       unsigned int count, reading     = 0;
+       byte dma_stat;
+
+       switch (func) {
+               case ide_dma_off:
+                       printk("%s: DMA disabled\n", drive->name);
+               case ide_dma_off_quietly:
+                       outb(inb(dma_base+2) & ~(1<<(5+unit)), dma_base+2);
+               case ide_dma_on:
+                       drive->using_dma = (func == ide_dma_on);
+                       if (drive->using_dma)
+                               outb(inb(dma_base+2)|(1<<(5+unit)), dma_base+2);
+                       return 0;
+               case ide_dma_check:
+                       return config_drive_for_dma (drive);
+               case ide_dma_read:
+                       reading = 1 << 3;
+               case ide_dma_write:
+                       SELECT_READ_WRITE(hwif,drive,func);
+                       if (!(count = ide_build_dmatable(drive, func)))
+                               return 1;       /* try PIO instead of DMA */
+                       outl(hwif->dmatable_dma, dma_base + 4); /* PRD table */
+                       outb(reading, dma_base);                        /* specify r/w */
+                       outb(inb(dma_base+2)|6, dma_base+2);            /* clear INTR & ERROR flags */
+                       drive->waiting_for_dma = 1;
+                       if (drive->media != ide_disk)
+                               return 0;
+#ifdef CONFIG_BLK_DEV_IDEDMA_TIMEOUT
+                       ide_set_handler(drive, &ide_dma_intr, 2*WAIT_CMD, NULL);        /* issue cmd to drive */
+#else /* !CONFIG_BLK_DEV_IDEDMA_TIMEOUT */
+                       ide_set_handler(drive, &ide_dma_intr, WAIT_CMD, dma_timer_expiry);      /* issue cmd to drive */
+#endif /* CONFIG_BLK_DEV_IDEDMA_TIMEOUT */
+                       if ((HWGROUP(drive)->rq->cmd == IDE_DRIVE_TASKFILE) &&
+                           (drive->addressing == 1)) {
+                               ide_task_t *args = HWGROUP(drive)->rq->special;
+                               OUT_BYTE(args->tfRegister[IDE_COMMAND_OFFSET], IDE_COMMAND_REG);
+                       } else if (drive->addressing) {
+                               OUT_BYTE(reading ? WIN_READDMA_EXT : WIN_WRITEDMA_EXT, IDE_COMMAND_REG);
+                       } else {
+                               OUT_BYTE(reading ? WIN_READDMA : WIN_WRITEDMA, IDE_COMMAND_REG);
+                       }
+                       return HWIF(drive)->dmaproc(ide_dma_begin, drive);
+               case ide_dma_begin:
+                       /* Note that this is done *after* the cmd has
+                        * been issued to the drive, as per the BM-IDE spec.
+                        * The Promise Ultra33 doesn't work correctly when
+                        * we do this part before issuing the drive cmd.
+                        */
+                       outb(inb(dma_base)|1, dma_base);                /* start DMA */
+                       return 0;
+               case ide_dma_end: /* returns 1 on error, 0 otherwise */
+                       drive->waiting_for_dma = 0;
+                       outb(inb(dma_base)&~1, dma_base);       /* stop DMA */
+                       dma_stat = inb(dma_base+2);             /* get DMA status */
+                       outb(dma_stat|6, dma_base+2);   /* clear the INTR & ERROR bits */
+                       ide_destroy_dmatable(drive);    /* purge DMA mappings */
+                       return (dma_stat & 7) != 4 ? (0x10 | dma_stat) : 0;     /* verify good DMA status */
+               case ide_dma_test_irq: /* returns 1 if dma irq issued, 0 otherwise */
+                       dma_stat = inb(dma_base+2);
+#if 0  /* do not set unless you know what you are doing */
+                       if (dma_stat & 4) {
+                               byte stat = GET_STAT();
+                               outb(dma_base+2, dma_stat & 0xE4);
+                       }
+#endif
+                       return (dma_stat & 4) == 4;     /* return 1 if INTR asserted */
+               case ide_dma_bad_drive:
+               case ide_dma_good_drive:
+                       return check_drive_lists(drive, (func == ide_dma_good_drive));
+               case ide_dma_verbose:
+                       return report_drive_dmaing(drive);
+               case ide_dma_timeout:
+                       // FIXME: Many IDE chipsets do not permit command file register access
+                       // FIXME: while the bus-master function is still active.
+                       // FIXME: To prevent deadlock with those chipsets, we must be extremely
+                       // FIXME: careful here (and in ide_intr() as well) to NOT access any
+                       // FIXME: registers from the 0x1Fx/0x17x sets before terminating the
+                       // FIXME: bus-master operation via the bus-master control reg.
+                       // FIXME: Otherwise, chipset deadlock will occur, and some systems will
+                       // FIXME: lock up completely!!
+#ifdef CONFIG_BLK_DEV_IDEDMA_TIMEOUT
+                       /*
+                        * Have to issue an abort and requeue the request
+                        * DMA engine got turned off by a goofy ASIC, and
+                        * we have to clean up the mess, and here is as good
+                        * as any.  Do it globally for all chipsets.
+                        */
+                       outb(0x00, dma_base);           /* stop DMA */
+                       dma_stat = inb(dma_base+2);     /* get DMA status */
+                       outb(dma_stat|6, dma_base+2);   /* clear the INTR & ERROR bits */
+                       printk("%s: %s: Lets do it again!" \
+                               "stat = 0x%02x, dma_stat = 0x%02x\n",
+                               drive->name, ide_dmafunc_verbose(func),
+                               GET_STAT(), dma_stat);
+
+                       if (dma_stat & 0xF0)
+                               return ide_dma_timeout_revovery(drive);
+
+                       printk("%s: %s: (restart_request) Lets do it again!" \
+                               "stat = 0x%02x, dma_stat = 0x%02x\n",
+                               drive->name, ide_dmafunc_verbose(func),
+                               GET_STAT(), dma_stat);
+
+                       return restart_request(drive);  // BUG: return types do not match!!
+//#else
+//                     return HWGROUP(drive)->handler(drive);
+#endif /* CONFIG_BLK_DEV_IDEDMA_TIMEOUT */
+               case ide_dma_retune:
+               case ide_dma_lostirq:
+                       printk("ide_dmaproc: chipset supported %s func only: %d\n", ide_dmafunc_verbose(func),  func);
+                       return 1;
+               default:
+                       printk("ide_dmaproc: unsupported %s func: %d\n", ide_dmafunc_verbose(func), func);
+                       return 1;
+       }
+}
+
+/*
+ * Needed for allowing full modular support of ide-driver
+ */
+int ide_release_dma (ide_hwif_t *hwif)
+{
+       if (hwif->dmatable_cpu) {
+               pci_free_consistent(hwif->pci_dev,
+                                   PRD_ENTRIES * PRD_BYTES,
+                                   hwif->dmatable_cpu,
+                                   hwif->dmatable_dma);
+               hwif->dmatable_cpu = NULL;
+       }
+       if (hwif->sg_table) {
+               kfree(hwif->sg_table);
+               hwif->sg_table = NULL;
+       }
+       if ((hwif->dma_extra) && (hwif->channel == 0))
+               release_region((hwif->dma_base + 16), hwif->dma_extra);
+       release_region(hwif->dma_base, 8);
+       return 1;
+}
+
+/*
+ *     This can be called for a dynamically installed interface. Don't __init it
+ */
+void ide_setup_dma (ide_hwif_t *hwif, unsigned long dma_base, unsigned int num_ports)
+{
+       printk("    %s: BM-DMA at 0x%04lx-0x%04lx", hwif->name, dma_base, dma_base + num_ports - 1);
+       if (check_region(dma_base, num_ports)) {
+               printk(" -- ERROR, PORT ADDRESSES ALREADY IN USE\n");
+               return;
+       }
+       request_region(dma_base, num_ports, hwif->name);
+       hwif->dma_base = dma_base;
+       hwif->dmatable_cpu = pci_alloc_consistent(hwif->pci_dev,
+                                                 PRD_ENTRIES * PRD_BYTES,
+                                                 &hwif->dmatable_dma);
+       if (hwif->dmatable_cpu == NULL)
+               goto dma_alloc_failure;
+
+       hwif->sg_table = kmalloc(sizeof(struct scatterlist) * PRD_ENTRIES,
+                                GFP_KERNEL);
+       if (hwif->sg_table == NULL) {
+               pci_free_consistent(hwif->pci_dev, PRD_ENTRIES * PRD_BYTES,
+                                   hwif->dmatable_cpu, hwif->dmatable_dma);
+               goto dma_alloc_failure;
+       }
+
+       hwif->dmaproc = &ide_dmaproc;
+
+       if (hwif->chipset != ide_trm290) {
+               byte dma_stat = inb(dma_base+2);
+               printk(", BIOS settings: %s:%s, %s:%s",
+                      hwif->drives[0].name, (dma_stat & 0x20) ? "DMA" : "pio",
+                      hwif->drives[1].name, (dma_stat & 0x40) ? "DMA" : "pio");
+       }
+       printk("\n");
+       return;
+
+dma_alloc_failure:
+       printk(" -- ERROR, UNABLE TO ALLOCATE DMA TABLES\n");
+}
+
+/*
+ * Fetch the DMA Bus-Master-I/O-Base-Address (BMIBA) from PCI space:
+ */
+unsigned long __init ide_get_or_set_dma_base (ide_hwif_t *hwif, int extra, const char *name)
+{
+       unsigned long   dma_base = 0;
+       struct pci_dev  *dev = hwif->pci_dev;
+
+#ifdef CONFIG_BLK_DEV_IDEDMA_FORCED
+       int second_chance = 0;
+
+second_chance_to_dma:
+#endif /* CONFIG_BLK_DEV_IDEDMA_FORCED */
+
+       if (hwif->mate && hwif->mate->dma_base) {
+               dma_base = hwif->mate->dma_base - (hwif->channel ? 0 : 8);
+       } else {
+               dma_base = pci_resource_start(dev, 4);
+               if (!dma_base) {
+                       printk("%s: dma_base is invalid (0x%04lx)\n", name, dma_base);
+                       dma_base = 0;
+               }
+       }
+
+#ifdef CONFIG_BLK_DEV_IDEDMA_FORCED
+       if ((!dma_base) && (!second_chance)) {
+               unsigned long set_bmiba = 0;
+               second_chance++;
+               switch(dev->vendor) {
+                       case PCI_VENDOR_ID_AL:
+                               set_bmiba = DEFAULT_BMALIBA; break;
+                       case PCI_VENDOR_ID_VIA:
+                               set_bmiba = DEFAULT_BMCRBA; break;
+                       case PCI_VENDOR_ID_INTEL:
+                               set_bmiba = DEFAULT_BMIBA; break;
+                       default:
+                               return dma_base;
+               }
+               pci_write_config_dword(dev, 0x20, set_bmiba|1);
+               goto second_chance_to_dma;
+       }
+#endif /* CONFIG_BLK_DEV_IDEDMA_FORCED */
+
+       if (dma_base) {
+               if (extra) /* PDC20246, PDC20262, HPT343, & HPT366 */
+                       request_region(dma_base+16, extra, name);
+               dma_base += hwif->channel ? 8 : 0;
+               hwif->dma_extra = extra;
+
+               switch(dev->device) {
+                       case PCI_DEVICE_ID_AL_M5219:
+                       case PCI_DEVICE_ID_AMD_VIPER_7409:
+                       case PCI_DEVICE_ID_CMD_643:
+                               outb(inb(dma_base+2) & 0x60, dma_base+2);
+                               if (inb(dma_base+2) & 0x80) {
+                                       printk("%s: simplex device: DMA forced\n", name);
+                               }
+                               break;
+                       default:
+                               /*
+                                * If the device claims "simplex" DMA,
+                                * this means only one of the two interfaces
+                                * can be trusted with DMA at any point in time.
+                                * So we should enable DMA only on one of the
+                                * two interfaces.
+                                */
+                               if ((inb(dma_base+2) & 0x80)) { /* simplex device? */
+                                       if ((!hwif->drives[0].present && !hwif->drives[1].present) ||
+                                           (hwif->mate && hwif->mate->dma_base)) {
+                                               printk("%s: simplex device:  DMA disabled\n", name);
+                                               dma_base = 0;
+                                       }
+                               }
+               }
+       }
+       return dma_base;
+}
diff --git a/xen-2.4.16/drivers/ide/ide-features.c b/xen-2.4.16/drivers/ide/ide-features.c
new file mode 100644 (file)
index 0000000..a60af2d
--- /dev/null
@@ -0,0 +1,384 @@
+/*
+ * linux/drivers/block/ide-features.c  Version 0.04    June 9, 2000
+ *
+ *  Copyright (C) 1999-2000    Linus Torvalds & authors (see below)
+ *  
+ *  Copyright (C) 1999-2000    Andre Hedrick <andre@linux-ide.org>
+ *
+ *  Extracts if ide.c to address the evolving transfer rate code for
+ *  the SETFEATURES_XFER callouts.  Various parts of any given function
+ *  are credited to previous ATA-IDE maintainers.
+ *
+ *  Auto-CRC downgrade for Ultra DMA(ing)
+ *
+ *  May be copied or modified under the terms of the GNU General Public License
+ */
+
+#include <xeno/config.h>
+#define __NO_VERSION__
+#include <xeno/module.h>
+#include <xeno/types.h>
+#include <xeno/lib.h>
+#include <xeno/timer.h>
+#include <xeno/mm.h>
+#include <xeno/interrupt.h>
+#include <xeno/major.h>
+#include <xeno/errno.h>
+#include <xeno/genhd.h>
+#include <xeno/blkpg.h>
+#include <xeno/slab.h>
+#include <xeno/pci.h>
+#include <xeno/delay.h>
+#include <xeno/hdreg.h>
+#include <xeno/ide.h>
+
+#include <asm/byteorder.h>
+#include <asm/irq.h>
+#include <asm/uaccess.h>
+#include <asm/io.h>
+#include <asm/bitops.h>
+
+/*
+ * A Verbose noise maker for debugging on the attempted transfer rates.
+ */
+char *ide_xfer_verbose (byte xfer_rate)
+{
+       switch(xfer_rate) {
+               case XFER_UDMA_7:       return("UDMA 7");
+               case XFER_UDMA_6:       return("UDMA 6");
+               case XFER_UDMA_5:       return("UDMA 5");
+               case XFER_UDMA_4:       return("UDMA 4");
+               case XFER_UDMA_3:       return("UDMA 3");
+               case XFER_UDMA_2:       return("UDMA 2");
+               case XFER_UDMA_1:       return("UDMA 1");
+               case XFER_UDMA_0:       return("UDMA 0");
+               case XFER_MW_DMA_2:     return("MW DMA 2");
+               case XFER_MW_DMA_1:     return("MW DMA 1");
+               case XFER_MW_DMA_0:     return("MW DMA 0");
+               case XFER_SW_DMA_2:     return("SW DMA 2");
+               case XFER_SW_DMA_1:     return("SW DMA 1");
+               case XFER_SW_DMA_0:     return("SW DMA 0");
+               case XFER_PIO_4:        return("PIO 4");
+               case XFER_PIO_3:        return("PIO 3");
+               case XFER_PIO_2:        return("PIO 2");
+               case XFER_PIO_1:        return("PIO 1");
+               case XFER_PIO_0:        return("PIO 0");
+               case XFER_PIO_SLOW:     return("PIO SLOW");
+               default:                return("XFER ERROR");
+       }
+}
+
+/*
+ *
+ */
+char *ide_media_verbose (ide_drive_t *drive)
+{
+       switch (drive->media) {
+               case ide_scsi:          return("scsi   ");
+               case ide_disk:          return("disk   ");
+               case ide_optical:       return("optical");
+               case ide_cdrom:         return("cdrom  ");
+               case ide_tape:          return("tape   ");
+               case ide_floppy:        return("floppy ");
+               default:                return("???????");
+       }
+}
+
+/*
+ * A Verbose noise maker for debugging on the attempted dmaing calls.
+ */
+char *ide_dmafunc_verbose (ide_dma_action_t dmafunc)
+{
+       switch (dmafunc) {
+               case ide_dma_read:              return("ide_dma_read");
+               case ide_dma_write:             return("ide_dma_write");
+               case ide_dma_begin:             return("ide_dma_begin");
+               case ide_dma_end:               return("ide_dma_end:");
+               case ide_dma_check:             return("ide_dma_check");
+               case ide_dma_on:                return("ide_dma_on");
+               case ide_dma_off:               return("ide_dma_off");
+               case ide_dma_off_quietly:       return("ide_dma_off_quietly");
+               case ide_dma_test_irq:          return("ide_dma_test_irq");
+               case ide_dma_bad_drive:         return("ide_dma_bad_drive");
+               case ide_dma_good_drive:        return("ide_dma_good_drive");
+               case ide_dma_verbose:           return("ide_dma_verbose");
+               case ide_dma_retune:            return("ide_dma_retune");
+               case ide_dma_lostirq:           return("ide_dma_lostirq");
+               case ide_dma_timeout:           return("ide_dma_timeout");
+               default:                        return("unknown");
+       }
+}
+
+/*
+ *
+ */
+byte ide_auto_reduce_xfer (ide_drive_t *drive)
+{
+       if (!drive->crc_count)
+               return drive->current_speed;
+       drive->crc_count = 0;
+
+       switch(drive->current_speed) {
+               case XFER_UDMA_7:       return XFER_UDMA_6;
+               case XFER_UDMA_6:       return XFER_UDMA_5;
+               case XFER_UDMA_5:       return XFER_UDMA_4;
+               case XFER_UDMA_4:       return XFER_UDMA_3;
+               case XFER_UDMA_3:       return XFER_UDMA_2;
+               case XFER_UDMA_2:       return XFER_UDMA_1;
+               case XFER_UDMA_1:       return XFER_UDMA_0;
+                       /*
+                        * OOPS we do not goto non Ultra DMA modes
+                        * without iCRC's available we force
+                        * the system to PIO and make the user
+                        * invoke the ATA-1 ATA-2 DMA modes.
+                        */
+               case XFER_UDMA_0:
+               default:                return XFER_PIO_4;
+       }
+}
+
+/*
+ * Update the 
+ */
+int ide_driveid_update (ide_drive_t *drive)
+{
+       /*
+        * Re-read drive->id for possible DMA mode
+        * change (copied from ide-probe.c)
+        */
+       struct hd_driveid *id;
+       unsigned long timeout, flags;
+
+       SELECT_MASK(HWIF(drive), drive, 1);
+       if (IDE_CONTROL_REG)
+               OUT_BYTE(drive->ctl,IDE_CONTROL_REG);
+       ide_delay_50ms();
+       OUT_BYTE(WIN_IDENTIFY, IDE_COMMAND_REG);
+       timeout = jiffies + WAIT_WORSTCASE;
+       do {
+               if (0 < (signed long)(jiffies - timeout)) {
+                       SELECT_MASK(HWIF(drive), drive, 0);
+                       return 0;       /* drive timed-out */
+               }
+               ide_delay_50ms();       /* give drive a breather */
+       } while (IN_BYTE(IDE_ALTSTATUS_REG) & BUSY_STAT);
+       ide_delay_50ms();       /* wait for IRQ and DRQ_STAT */
+       if (!OK_STAT(GET_STAT(),DRQ_STAT,BAD_R_STAT)) {
+               SELECT_MASK(HWIF(drive), drive, 0);
+               printk("%s: CHECK for good STATUS\n", drive->name);
+               return 0;
+       }
+       __save_flags(flags);    /* local CPU only */
+       __cli();                /* local CPU only; some systems need this */
+       SELECT_MASK(HWIF(drive), drive, 0);
+       id = kmalloc(SECTOR_WORDS*4, GFP_ATOMIC);
+       if (!id) {
+               __restore_flags(flags); /* local CPU only */
+               return 0;
+       }
+       ide_input_data(drive, id, SECTOR_WORDS);
+       (void) GET_STAT();      /* clear drive IRQ */
+       ide__sti();             /* local CPU only */
+       __restore_flags(flags); /* local CPU only */
+       ide_fix_driveid(id);
+       if (id) {
+               drive->id->dma_ultra = id->dma_ultra;
+               drive->id->dma_mword = id->dma_mword;
+               drive->id->dma_1word = id->dma_1word;
+               /* anything more ? */
+               kfree(id);
+       }
+
+       return 1;
+}
+
+/*
+ * Verify that we are doing an approved SETFEATURES_XFER with respect
+ * to the hardware being able to support request.  Since some hardware
+ * can improperly report capabilties, we check to see if the host adapter
+ * in combination with the device (usually a disk) properly detect
+ * and acknowledge each end of the ribbon.
+ */
+int ide_ata66_check (ide_drive_t *drive, ide_task_t *args)
+{
+       if ((args->tfRegister[IDE_COMMAND_OFFSET] == WIN_SETFEATURES) &&
+           (args->tfRegister[IDE_SECTOR_OFFSET] > XFER_UDMA_2) &&
+           (args->tfRegister[IDE_FEATURE_OFFSET] == SETFEATURES_XFER)) {
+               if (!HWIF(drive)->udma_four) {
+                       printk("%s: Speed warnings UDMA 3/4/5 is not functional.\n", HWIF(drive)->name);
+                       return 1;
+               }
+#ifndef CONFIG_IDEDMA_IVB
+               if ((drive->id->hw_config & 0x6000) == 0) {
+#else /* !CONFIG_IDEDMA_IVB */
+               if (((drive->id->hw_config & 0x2000) == 0) ||
+                   ((drive->id->hw_config & 0x4000) == 0)) {
+#endif /* CONFIG_IDEDMA_IVB */
+                       printk("%s: Speed warnings UDMA 3/4/5 is not functional.\n", drive->name);
+                       return 1;
+               }
+       }
+       return 0;
+}
+
+/*
+ * Backside of HDIO_DRIVE_CMD call of SETFEATURES_XFER.
+ * 1 : Safe to update drive->id DMA registers.
+ * 0 : OOPs not allowed.
+ */
+int set_transfer (ide_drive_t *drive, ide_task_t *args)
+{
+       if ((args->tfRegister[IDE_COMMAND_OFFSET] == WIN_SETFEATURES) &&
+           (args->tfRegister[IDE_SECTOR_OFFSET] >= XFER_SW_DMA_0) &&
+           (args->tfRegister[IDE_FEATURE_OFFSET] == SETFEATURES_XFER) &&
+           (drive->id->dma_ultra ||
+            drive->id->dma_mword ||
+            drive->id->dma_1word))
+               return 1;
+
+       return 0;
+}
+
+#ifdef CONFIG_BLK_DEV_IDEDMA
+/*
+ *  All hosts that use the 80c ribbon mus use!
+ */
+byte eighty_ninty_three (ide_drive_t *drive)
+{
+#ifdef CONFIG_BLK_DEV_IDEPCI
+       if (HWIF(drive)->pci_devid.vid==0x105a)
+           return(HWIF(drive)->udma_four);
+#endif
+       /* PDC202XX: that's because some HDD will return wrong info */
+       return ((byte) ((HWIF(drive)->udma_four) &&
+#ifndef CONFIG_IDEDMA_IVB
+                       (drive->id->hw_config & 0x4000) &&
+#endif /* CONFIG_IDEDMA_IVB */
+                       (drive->id->hw_config & 0x6000)) ? 1 : 0);
+}
+#endif // CONFIG_BLK_DEV_IDEDMA
+
+/*
+ * Similar to ide_wait_stat(), except it never calls ide_error internally.
+ * This is a kludge to handle the new ide_config_drive_speed() function,
+ * and should not otherwise be used anywhere.  Eventually, the tuneproc's
+ * should be updated to return ide_startstop_t, in which case we can get
+ * rid of this abomination again.  :)   -ml
+ *
+ * It is gone..........
+ *
+ * const char *msg == consider adding for verbose errors.
+ */
+int ide_config_drive_speed (ide_drive_t *drive, byte speed)
+{
+       ide_hwif_t *hwif = HWIF(drive);
+       int     i, error = 1;
+       byte stat;
+
+#if defined(CONFIG_BLK_DEV_IDEDMA) && !defined(CONFIG_DMA_NONPCI)
+       byte unit = (drive->select.b.unit & 0x01);
+       outb(inb(hwif->dma_base+2) & ~(1<<(5+unit)), hwif->dma_base+2);
+#endif /* (CONFIG_BLK_DEV_IDEDMA) && !(CONFIG_DMA_NONPCI) */
+
+       /*
+        * Don't use ide_wait_cmd here - it will
+        * attempt to set_geometry and recalibrate,
+        * but for some reason these don't work at
+        * this point (lost interrupt).
+        */
+        /*
+         * Select the drive, and issue the SETFEATURES command
+         */
+       disable_irq(hwif->irq); /* disable_irq_nosync ?? */
+       udelay(1);
+       SELECT_DRIVE(HWIF(drive), drive);
+       SELECT_MASK(HWIF(drive), drive, 0);
+       udelay(1);
+       if (IDE_CONTROL_REG)
+               OUT_BYTE(drive->ctl | 2, IDE_CONTROL_REG);
+       OUT_BYTE(speed, IDE_NSECTOR_REG);
+       OUT_BYTE(SETFEATURES_XFER, IDE_FEATURE_REG);
+       OUT_BYTE(WIN_SETFEATURES, IDE_COMMAND_REG);
+       if ((IDE_CONTROL_REG) && (drive->quirk_list == 2))
+               OUT_BYTE(drive->ctl, IDE_CONTROL_REG);
+       udelay(1);
+       /*
+        * Wait for drive to become non-BUSY
+        */
+       if ((stat = GET_STAT()) & BUSY_STAT) {
+               unsigned long flags, timeout;
+               __save_flags(flags);    /* local CPU only */
+               ide__sti();             /* local CPU only -- for jiffies */
+               timeout = jiffies + WAIT_CMD;
+               while ((stat = GET_STAT()) & BUSY_STAT) {
+                       if (0 < (signed long)(jiffies - timeout))
+                               break;
+               }
+               __restore_flags(flags); /* local CPU only */
+       }
+
+       /*
+        * Allow status to settle, then read it again.
+        * A few rare drives vastly violate the 400ns spec here,
+        * so we'll wait up to 10usec for a "good" status
+        * rather than expensively fail things immediately.
+        * This fix courtesy of Matthew Faupel & Niccolo Rigacci.
+        */
+       for (i = 0; i < 10; i++) {
+               udelay(1);
+               if (OK_STAT((stat = GET_STAT()), DRIVE_READY, BUSY_STAT|DRQ_STAT|ERR_STAT)) {
+                       error = 0;
+                       break;
+               }
+       }
+
+       SELECT_MASK(HWIF(drive), drive, 0);
+
+       enable_irq(hwif->irq);
+
+       if (error) {
+               (void) ide_dump_status(drive, "set_drive_speed_status", stat);
+               return error;
+       }
+
+       drive->id->dma_ultra &= ~0xFF00;
+       drive->id->dma_mword &= ~0x0F00;
+       drive->id->dma_1word &= ~0x0F00;
+
+#if defined(CONFIG_BLK_DEV_IDEDMA) && !defined(CONFIG_DMA_NONPCI)
+       if (speed > XFER_PIO_4) {
+               outb(inb(hwif->dma_base+2)|(1<<(5+unit)), hwif->dma_base+2);
+       } else {
+               outb(inb(hwif->dma_base+2) & ~(1<<(5+unit)), hwif->dma_base+2);
+       }
+#endif /* (CONFIG_BLK_DEV_IDEDMA) && !(CONFIG_DMA_NONPCI) */
+
+       switch(speed) {
+               case XFER_UDMA_7:   drive->id->dma_ultra |= 0x8080; break;
+               case XFER_UDMA_6:   drive->id->dma_ultra |= 0x4040; break;
+               case XFER_UDMA_5:   drive->id->dma_ultra |= 0x2020; break;
+               case XFER_UDMA_4:   drive->id->dma_ultra |= 0x1010; break;
+               case XFER_UDMA_3:   drive->id->dma_ultra |= 0x0808; break;
+               case XFER_UDMA_2:   drive->id->dma_ultra |= 0x0404; break;
+               case XFER_UDMA_1:   drive->id->dma_ultra |= 0x0202; break;
+               case XFER_UDMA_0:   drive->id->dma_ultra |= 0x0101; break;
+               case XFER_MW_DMA_2: drive->id->dma_mword |= 0x0404; break;
+               case XFER_MW_DMA_1: drive->id->dma_mword |= 0x0202; break;
+               case XFER_MW_DMA_0: drive->id->dma_mword |= 0x0101; break;
+               case XFER_SW_DMA_2: drive->id->dma_1word |= 0x0404; break;
+               case XFER_SW_DMA_1: drive->id->dma_1word |= 0x0202; break;
+               case XFER_SW_DMA_0: drive->id->dma_1word |= 0x0101; break;
+               default: break;
+       }
+       return error;
+}
+
+EXPORT_SYMBOL(ide_auto_reduce_xfer);
+EXPORT_SYMBOL(ide_driveid_update);
+EXPORT_SYMBOL(ide_ata66_check);
+EXPORT_SYMBOL(set_transfer);
+#ifdef CONFIG_BLK_DEV_IDEDMA
+EXPORT_SYMBOL(eighty_ninty_three);
+#endif // CONFIG_BLK_DEV_IDEDMA
+EXPORT_SYMBOL(ide_config_drive_speed);
+
diff --git a/xen-2.4.16/drivers/ide/ide-geometry.c b/xen-2.4.16/drivers/ide/ide-geometry.c
new file mode 100644 (file)
index 0000000..2242828
--- /dev/null
@@ -0,0 +1,222 @@
+/*
+ * linux/drivers/ide/ide-geometry.c
+ */
+#include <xeno/config.h>
+#include <xeno/ide.h>
+#include <asm/mc146818rtc.h>
+#include <asm/io.h>
+
+#ifdef CONFIG_BLK_DEV_IDE
+
+/*
+ * We query CMOS about hard disks : it could be that we have a SCSI/ESDI/etc
+ * controller that is BIOS compatible with ST-506, and thus showing up in our
+ * BIOS table, but not register compatible, and therefore not present in CMOS.
+ *
+ * Furthermore, we will assume that our ST-506 drives <if any> are the primary
+ * drives in the system -- the ones reflected as drive 1 or 2.  The first
+ * drive is stored in the high nibble of CMOS byte 0x12, the second in the low
+ * nibble.  This will be either a 4 bit drive type or 0xf indicating use byte
+ * 0x19 for an 8 bit type, drive 1, 0x1a for drive 2 in CMOS.  A non-zero value
+ * means we have an AT controller hard disk for that drive.
+ *
+ * Of course, there is no guarantee that either drive is actually on the
+ * "primary" IDE interface, but we don't bother trying to sort that out here.
+ * If a drive is not actually on the primary interface, then these parameters
+ * will be ignored.  This results in the user having to supply the logical
+ * drive geometry as a boot parameter for each drive not on the primary i/f.
+ */
+/*
+ * The only "perfect" way to handle this would be to modify the setup.[cS] code
+ * to do BIOS calls Int13h/Fn08h and Int13h/Fn48h to get all of the drive info
+ * for us during initialization.  I have the necessary docs -- any takers?  -ml
+ */
+/*
+ * I did this, but it doesnt work - there is no reasonable way to find the
+ * correspondence between the BIOS numbering of the disks and the Linux
+ * numbering. -aeb
+ *
+ * The code below is bad. One of the problems is that drives 1 and 2
+ * may be SCSI disks (even when IDE disks are present), so that
+ * the geometry we read here from BIOS is attributed to the wrong disks.
+ * Consequently, also the former "drive->present = 1" below was a mistake.
+ *
+ * Eventually the entire routine below should be removed.
+ *
+ * 17-OCT-2000 rjohnson@analogic.com Added spin-locks for reading CMOS
+ * chip.
+ */
+
+void probe_cmos_for_drives (ide_hwif_t *hwif)
+{
+#ifdef __i386__
+        extern struct drive_info_struct drive_info;
+       byte cmos_disks, *BIOS = (byte *) &drive_info;
+       int unit;
+       unsigned long flags;
+
+#ifdef CONFIG_BLK_DEV_PDC4030
+       if (hwif->chipset == ide_pdc4030 && hwif->channel != 0)
+               return;
+#endif /* CONFIG_BLK_DEV_PDC4030 */
+       spin_lock_irqsave(&rtc_lock, flags);
+       cmos_disks = CMOS_READ(0x12);
+       spin_unlock_irqrestore(&rtc_lock, flags);
+       /* Extract drive geometry from CMOS+BIOS if not already setup */
+       for (unit = 0; unit < MAX_DRIVES; ++unit) {
+               ide_drive_t *drive = &hwif->drives[unit];
+               if ((cmos_disks & (0xf0 >> (unit*4)))
+                  && !drive->present && !drive->nobios) {
+                       unsigned short cyl = *(unsigned short *)BIOS;
+                       unsigned char head = *(BIOS+2);
+                       unsigned char sect = *(BIOS+14);
+                       if (cyl > 0 && head > 0 && sect > 0 && sect < 64) {
+                               drive->cyl   = drive->bios_cyl  = cyl;
+                               drive->head  = drive->bios_head = head;
+                               drive->sect  = drive->bios_sect = sect;
+                               drive->ctl   = *(BIOS+8);
+                       } else {
+                               printk("hd%c: C/H/S=%d/%d/%d from BIOS ignored\n",
+                                      unit+'a', cyl, head, sect);
+                       }
+               }
+               BIOS += 16;
+       }
+#endif
+}
+#endif /* CONFIG_BLK_DEV_IDE */
+
+
+#if defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_IDE_MODULE)
+
+extern ide_drive_t * get_info_ptr(kdev_t);
+extern unsigned long current_capacity (ide_drive_t *);
+
+/*
+ * If heads is nonzero: find a translation with this many heads and S=63.
+ * Otherwise: find out how OnTrack Disk Manager would translate the disk.
+ */
+static void
+ontrack(ide_drive_t *drive, int heads, unsigned int *c, int *h, int *s) {
+       static const byte dm_head_vals[] = {4, 8, 16, 32, 64, 128, 255, 0};
+       const byte *headp = dm_head_vals;
+       unsigned long total;
+
+       /*
+        * The specs say: take geometry as obtained from Identify,
+        * compute total capacity C*H*S from that, and truncate to
+        * 1024*255*63. Now take S=63, H the first in the sequence
+        * 4, 8, 16, 32, 64, 128, 255 such that 63*H*1024 >= total.
+        * [Please tell aeb@cwi.nl in case this computes a
+        * geometry different from what OnTrack uses.]
+        */
+       total = DRIVER(drive)->capacity(drive);
+
+       *s = 63;
+
+       if (heads) {
+               *h = heads;
+               *c = total / (63 * heads);
+               return;
+       }
+
+       while (63 * headp[0] * 1024 < total && headp[1] != 0)
+                headp++;
+       *h = headp[0];
+       *c = total / (63 * headp[0]);
+}
+
+/*
+ * This routine is called from the partition-table code in pt/msdos.c.
+ * It has two tasks:
+ * (i) to handle Ontrack DiskManager by offsetting everything by 63 sectors,
+ *  or to handle EZdrive by remapping sector 0 to sector 1.
+ * (ii) to invent a translated geometry.
+ * Part (i) is suppressed if the user specifies the "noremap" option
+ * on the command line.
+ * Part (ii) is suppressed if the user specifies an explicit geometry.
+ *
+ * The ptheads parameter is either 0 or tells about the number of
+ * heads shown by the end of the first nonempty partition.
+ * If this is either 16, 32, 64, 128, 240 or 255 we'll believe it.
+ *
+ * The xparm parameter has the following meaning:
+ *      0 = convert to CHS with fewer than 1024 cyls
+ *          using the same method as Ontrack DiskManager.
+ *      1 = same as "0", plus offset everything by 63 sectors.
+ *     -1 = similar to "0", plus redirect sector 0 to sector 1.
+ *      2 = convert to a CHS geometry with "ptheads" heads.
+ *
+ * Returns 0 if the translation was not possible, if the device was not 
+ * an IDE disk drive, or if a geometry was "forced" on the commandline.
+ * Returns 1 if the geometry translation was successful.
+ */
+int ide_xlate_1024 (kdev_t i_rdev, int xparm, int ptheads, const char *msg)
+{
+       ide_drive_t *drive;
+       const char *msg1 = "";
+       int heads = 0;
+       int c, h, s;
+       int transl = 1;         /* try translation */
+       int ret = 0;
+
+       drive = get_info_ptr(i_rdev);
+       if (!drive)
+               return 0;
+
+       /* remap? */
+       if (drive->remap_0_to_1 != 2) {
+               if (xparm == 1) {               /* DM */
+                       drive->sect0 = 63;
+                       msg1 = " [remap +63]";
+                       ret = 1;
+               } else if (xparm == -1) {       /* EZ-Drive */
+                       if (drive->remap_0_to_1 == 0) {
+                               drive->remap_0_to_1 = 1;
+                               msg1 = " [remap 0->1]";
+                               ret = 1;
+                       }
+               }
+       }
+
+       /* There used to be code here that assigned drive->id->CHS
+          to drive->CHS and that to drive->bios_CHS. However,
+          some disks have id->C/H/S = 4092/16/63 but are larger than 2.1 GB.
+          In such cases that code was wrong.  Moreover,
+          there seems to be no reason to do any of these things. */
+
+       /* translate? */
+       if (drive->forced_geom)
+               transl = 0;
+
+       /* does ptheads look reasonable? */
+       if (ptheads == 32 || ptheads == 64 || ptheads == 128 ||
+           ptheads == 240 || ptheads == 255)
+               heads = ptheads;
+
+       if (xparm == 2) {
+               if (!heads ||
+                  (drive->bios_head >= heads && drive->bios_sect == 63))
+                       transl = 0;
+       }
+       if (xparm == -1) {
+               if (drive->bios_head > 16)
+                       transl = 0;     /* we already have a translation */
+       }
+
+       if (transl) {
+               ontrack(drive, heads, &c, &h, &s);
+               drive->bios_cyl = c;
+               drive->bios_head = h;
+               drive->bios_sect = s;
+               ret = 1;
+       }
+
+       drive->part[0].nr_sects = current_capacity(drive);
+
+       if (ret)
+               printk("%s%s [%d/%d/%d]", msg, msg1,
+                      drive->bios_cyl, drive->bios_head, drive->bios_sect);
+       return ret;
+}
+#endif /* defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_IDE_MODULE) */
diff --git a/xen-2.4.16/drivers/ide/ide-pci.c b/xen-2.4.16/drivers/ide/ide-pci.c
new file mode 100644 (file)
index 0000000..c68ff36
--- /dev/null
@@ -0,0 +1,1003 @@
+/*
+ *  linux/drivers/ide/ide-pci.c                Version 1.05    June 9, 2000
+ *
+ *  Copyright (c) 1998-2000  Andre Hedrick <andre@linux-ide.org>
+ *
+ *  Copyright (c) 1995-1998  Mark Lord
+ *  May be copied or modified under the terms of the GNU General Public License
+ */
+
+/*
+ *  This module provides support for automatic detection and
+ *  configuration of all PCI IDE interfaces present in a system.  
+ */
+
+#include <xeno/config.h>
+#include <xeno/types.h>
+#include <xeno/lib.h>
+#include <xeno/timer.h>
+#include <xeno/mm.h>
+#include <xeno/interrupt.h>
+#include <xeno/pci.h>
+#include <xeno/init.h>
+#include <xeno/ide.h>
+
+#include <asm/io.h>
+#include <asm/irq.h>
+
+#define DEVID_PIIXa    ((ide_pci_devid_t){PCI_VENDOR_ID_INTEL,   PCI_DEVICE_ID_INTEL_82371FB_0})
+#define DEVID_PIIXb    ((ide_pci_devid_t){PCI_VENDOR_ID_INTEL,   PCI_DEVICE_ID_INTEL_82371FB_1})
+#define DEVID_MPIIX    ((ide_pci_devid_t){PCI_VENDOR_ID_INTEL,   PCI_DEVICE_ID_INTEL_82371MX})
+#define DEVID_PIIX3    ((ide_pci_devid_t){PCI_VENDOR_ID_INTEL,   PCI_DEVICE_ID_INTEL_82371SB_1})
+#define DEVID_PIIX4    ((ide_pci_devid_t){PCI_VENDOR_ID_INTEL,   PCI_DEVICE_ID_INTEL_82371AB})
+#define DEVID_ICH0     ((ide_pci_devid_t){PCI_VENDOR_ID_INTEL,   PCI_DEVICE_ID_INTEL_82801AB_1})
+#define DEVID_PIIX4E2  ((ide_pci_devid_t){PCI_VENDOR_ID_INTEL,   PCI_DEVICE_ID_INTEL_82443MX_1})
+#define DEVID_ICH      ((ide_pci_devid_t){PCI_VENDOR_ID_INTEL,   PCI_DEVICE_ID_INTEL_82801AA_1})
+#define DEVID_PIIX4U2  ((ide_pci_devid_t){PCI_VENDOR_ID_INTEL,   PCI_DEVICE_ID_INTEL_82372FB_1})
+#define DEVID_PIIX4NX  ((ide_pci_devid_t){PCI_VENDOR_ID_INTEL,   PCI_DEVICE_ID_INTEL_82451NX})
+#define DEVID_ICH2     ((ide_pci_devid_t){PCI_VENDOR_ID_INTEL,   PCI_DEVICE_ID_INTEL_82801BA_9})
+#define DEVID_ICH2M    ((ide_pci_devid_t){PCI_VENDOR_ID_INTEL,   PCI_DEVICE_ID_INTEL_82801BA_8})
+#define DEVID_ICH3M    ((ide_pci_devid_t){PCI_VENDOR_ID_INTEL,   PCI_DEVICE_ID_INTEL_82801CA_10})
+#define DEVID_ICH3     ((ide_pci_devid_t){PCI_VENDOR_ID_INTEL,   PCI_DEVICE_ID_INTEL_82801CA_11})
+#define DEVID_ICH4     ((ide_pci_devid_t){PCI_VENDOR_ID_INTEL,   PCI_DEVICE_ID_INTEL_82801DB_11})
+#define DEVID_CICH     ((ide_pci_devid_t){PCI_VENDOR_ID_INTEL,   PCI_DEVICE_ID_INTEL_82801E_11})
+#define DEVID_VIA_IDE  ((ide_pci_devid_t){PCI_VENDOR_ID_VIA,     PCI_DEVICE_ID_VIA_82C561})
+#define DEVID_MR_IDE   ((ide_pci_devid_t){PCI_VENDOR_ID_VIA,     PCI_DEVICE_ID_VIA_82C576_1})
+#define DEVID_VP_IDE   ((ide_pci_devid_t){PCI_VENDOR_ID_VIA,     PCI_DEVICE_ID_VIA_82C586_1})
+#define DEVID_PDC20246 ((ide_pci_devid_t){PCI_VENDOR_ID_PROMISE, PCI_DEVICE_ID_PROMISE_20246})
+#define DEVID_PDC20262 ((ide_pci_devid_t){PCI_VENDOR_ID_PROMISE, PCI_DEVICE_ID_PROMISE_20262})
+#define DEVID_PDC20265 ((ide_pci_devid_t){PCI_VENDOR_ID_PROMISE, PCI_DEVICE_ID_PROMISE_20265})
+#define DEVID_PDC20267 ((ide_pci_devid_t){PCI_VENDOR_ID_PROMISE, PCI_DEVICE_ID_PROMISE_20267})
+#define DEVID_PDC20268  ((ide_pci_devid_t){PCI_VENDOR_ID_PROMISE, PCI_DEVICE_ID_PROMISE_20268})
+#define DEVID_PDC20270  ((ide_pci_devid_t){PCI_VENDOR_ID_PROMISE, PCI_DEVICE_ID_PROMISE_20270})
+#define DEVID_PDC20269 ((ide_pci_devid_t){PCI_VENDOR_ID_PROMISE, PCI_DEVICE_ID_PROMISE_20269})
+#define DEVID_PDC20275 ((ide_pci_devid_t){PCI_VENDOR_ID_PROMISE, PCI_DEVICE_ID_PROMISE_20275})
+#define DEVID_PDC20276 ((ide_pci_devid_t){PCI_VENDOR_ID_PROMISE, PCI_DEVICE_ID_PROMISE_20276})
+#define DEVID_RZ1000   ((ide_pci_devid_t){PCI_VENDOR_ID_PCTECH,  PCI_DEVICE_ID_PCTECH_RZ1000})
+#define DEVID_RZ1001   ((ide_pci_devid_t){PCI_VENDOR_ID_PCTECH,  PCI_DEVICE_ID_PCTECH_RZ1001})
+#define DEVID_SAMURAI  ((ide_pci_devid_t){PCI_VENDOR_ID_PCTECH,  PCI_DEVICE_ID_PCTECH_SAMURAI_IDE})
+#define DEVID_CMD640   ((ide_pci_devid_t){PCI_VENDOR_ID_CMD,     PCI_DEVICE_ID_CMD_640})
+#define DEVID_CMD643   ((ide_pci_devid_t){PCI_VENDOR_ID_CMD,     PCI_DEVICE_ID_CMD_643})
+#define DEVID_CMD646   ((ide_pci_devid_t){PCI_VENDOR_ID_CMD,     PCI_DEVICE_ID_CMD_646})
+#define DEVID_CMD648   ((ide_pci_devid_t){PCI_VENDOR_ID_CMD,     PCI_DEVICE_ID_CMD_648})
+#define DEVID_CMD649   ((ide_pci_devid_t){PCI_VENDOR_ID_CMD,     PCI_DEVICE_ID_CMD_649})
+#define DEVID_CMD680   ((ide_pci_devid_t){PCI_VENDOR_ID_CMD,     PCI_DEVICE_ID_CMD_680})
+#define DEVID_SIS5513  ((ide_pci_devid_t){PCI_VENDOR_ID_SI,      PCI_DEVICE_ID_SI_5513})
+#define DEVID_OPTI621  ((ide_pci_devid_t){PCI_VENDOR_ID_OPTI,    PCI_DEVICE_ID_OPTI_82C621})
+#define DEVID_OPTI621V ((ide_pci_devid_t){PCI_VENDOR_ID_OPTI,    PCI_DEVICE_ID_OPTI_82C558})
+#define DEVID_OPTI621X ((ide_pci_devid_t){PCI_VENDOR_ID_OPTI,    PCI_DEVICE_ID_OPTI_82C825})
+#define DEVID_TRM290   ((ide_pci_devid_t){PCI_VENDOR_ID_TEKRAM,  PCI_DEVICE_ID_TEKRAM_DC290})
+#define DEVID_NS87410  ((ide_pci_devid_t){PCI_VENDOR_ID_NS,      PCI_DEVICE_ID_NS_87410})
+#define DEVID_NS87415  ((ide_pci_devid_t){PCI_VENDOR_ID_NS,      PCI_DEVICE_ID_NS_87415})
+#define DEVID_HT6565   ((ide_pci_devid_t){PCI_VENDOR_ID_HOLTEK,  PCI_DEVICE_ID_HOLTEK_6565})
+#define DEVID_AEC6210  ((ide_pci_devid_t){PCI_VENDOR_ID_ARTOP,   PCI_DEVICE_ID_ARTOP_ATP850UF})
+#define DEVID_AEC6260  ((ide_pci_devid_t){PCI_VENDOR_ID_ARTOP,   PCI_DEVICE_ID_ARTOP_ATP860})
+#define DEVID_AEC6260R ((ide_pci_devid_t){PCI_VENDOR_ID_ARTOP,   PCI_DEVICE_ID_ARTOP_ATP860R})
+#define DEVID_W82C105  ((ide_pci_devid_t){PCI_VENDOR_ID_WINBOND, PCI_DEVICE_ID_WINBOND_82C105})
+#define DEVID_UM8673F  ((ide_pci_devid_t){PCI_VENDOR_ID_UMC,     PCI_DEVICE_ID_UMC_UM8673F})
+#define DEVID_UM8886A  ((ide_pci_devid_t){PCI_VENDOR_ID_UMC,     PCI_DEVICE_ID_UMC_UM8886A})
+#define DEVID_UM8886BF ((ide_pci_devid_t){PCI_VENDOR_ID_UMC,     PCI_DEVICE_ID_UMC_UM8886BF})
+#define DEVID_HPT34X   ((ide_pci_devid_t){PCI_VENDOR_ID_TTI,     PCI_DEVICE_ID_TTI_HPT343})
+#define DEVID_HPT366   ((ide_pci_devid_t){PCI_VENDOR_ID_TTI,     PCI_DEVICE_ID_TTI_HPT366})
+#define DEVID_ALI15X3  ((ide_pci_devid_t){PCI_VENDOR_ID_AL,      PCI_DEVICE_ID_AL_M5229})
+#define DEVID_CY82C693 ((ide_pci_devid_t){PCI_VENDOR_ID_CONTAQ,  PCI_DEVICE_ID_CONTAQ_82C693})
+#define DEVID_HINT     ((ide_pci_devid_t){0x3388,                0x8013})
+#define DEVID_CS5530   ((ide_pci_devid_t){PCI_VENDOR_ID_CYRIX,   PCI_DEVICE_ID_CYRIX_5530_IDE})
+#define DEVID_AMD7401  ((ide_pci_devid_t){PCI_VENDOR_ID_AMD,     PCI_DEVICE_ID_AMD_COBRA_7401})
+#define DEVID_AMD7409  ((ide_pci_devid_t){PCI_VENDOR_ID_AMD,     PCI_DEVICE_ID_AMD_VIPER_7409})
+#define DEVID_AMD7411  ((ide_pci_devid_t){PCI_VENDOR_ID_AMD,     PCI_DEVICE_ID_AMD_VIPER_7411})
+#define DEVID_AMD7441  ((ide_pci_devid_t){PCI_VENDOR_ID_AMD,     PCI_DEVICE_ID_AMD_VIPER_7441})
+#define DEVID_PDCADMA  ((ide_pci_devid_t){PCI_VENDOR_ID_PDC,     PCI_DEVICE_ID_PDC_1841})
+#define DEVID_SLC90E66 ((ide_pci_devid_t){PCI_VENDOR_ID_EFAR,    PCI_DEVICE_ID_EFAR_SLC90E66_1})
+#define DEVID_OSB4     ((ide_pci_devid_t){PCI_VENDOR_ID_SERVERWORKS, PCI_DEVICE_ID_SERVERWORKS_OSB4IDE})
+#define DEVID_CSB5     ((ide_pci_devid_t){PCI_VENDOR_ID_SERVERWORKS, PCI_DEVICE_ID_SERVERWORKS_CSB5IDE})
+#define DEVID_ITE8172G ((ide_pci_devid_t){PCI_VENDOR_ID_ITE,     PCI_DEVICE_ID_ITE_IT8172G})
+
+#define        IDE_IGNORE      ((void *)-1)
+#define IDE_NO_DRIVER  ((void *)-2)
+
+#ifdef CONFIG_BLK_DEV_AEC62XX
+extern unsigned int pci_init_aec62xx(struct pci_dev *, const char *);
+extern unsigned int ata66_aec62xx(ide_hwif_t *);
+extern void ide_init_aec62xx(ide_hwif_t *);
+extern void ide_dmacapable_aec62xx(ide_hwif_t *, unsigned long);
+#define PCI_AEC62XX    &pci_init_aec62xx
+#define ATA66_AEC62XX  &ata66_aec62xx
+#define INIT_AEC62XX   &ide_init_aec62xx
+#define DMA_AEC62XX    &ide_dmacapable_aec62xx
+#else
+#define PCI_AEC62XX    NULL
+#define ATA66_AEC62XX  NULL
+#define INIT_AEC62XX   IDE_NO_DRIVER
+#define DMA_AEC62XX    NULL
+#endif
+
+#ifdef CONFIG_BLK_DEV_ALI15X3
+extern unsigned int pci_init_ali15x3(struct pci_dev *, const char *);
+extern unsigned int ata66_ali15x3(ide_hwif_t *);
+extern void ide_init_ali15x3(ide_hwif_t *);
+extern void ide_dmacapable_ali15x3(ide_hwif_t *, unsigned long);
+#define PCI_ALI15X3    &pci_init_ali15x3
+#define ATA66_ALI15X3  &ata66_ali15x3
+#define INIT_ALI15X3   &ide_init_ali15x3
+#define DMA_ALI15X3    &ide_dmacapable_ali15x3
+#else
+#define PCI_ALI15X3    NULL
+#define ATA66_ALI15X3  NULL
+#define INIT_ALI15X3   IDE_NO_DRIVER
+#define DMA_ALI15X3    NULL
+#endif
+
+#ifdef CONFIG_BLK_DEV_AMD74XX
+extern unsigned int pci_init_amd74xx(struct pci_dev *, const char *);
+extern unsigned int ata66_amd74xx(ide_hwif_t *);
+extern void ide_init_amd74xx(ide_hwif_t *);
+extern void ide_dmacapable_amd74xx(ide_hwif_t *, unsigned long);
+#define PCI_AMD74XX    &pci_init_amd74xx
+#define ATA66_AMD74XX  &ata66_amd74xx
+#define INIT_AMD74XX   &ide_init_amd74xx
+#define DMA_AMD74XX    &ide_dmacapable_amd74xx
+#else
+#define PCI_AMD74XX    NULL
+#define ATA66_AMD74XX  NULL
+#define INIT_AMD74XX   IDE_NO_DRIVER
+#define DMA_AMD74XX    NULL
+#endif
+
+#ifdef CONFIG_BLK_DEV_CMD64X
+extern unsigned int pci_init_cmd64x(struct pci_dev *, const char *);
+extern unsigned int ata66_cmd64x(ide_hwif_t *);
+extern void ide_init_cmd64x(ide_hwif_t *);
+extern void ide_dmacapable_cmd64x(ide_hwif_t *, unsigned long);
+#define PCI_CMD64X     &pci_init_cmd64x
+#define ATA66_CMD64X   &ata66_cmd64x
+#define INIT_CMD64X    &ide_init_cmd64x
+#else
+#define PCI_CMD64X     NULL
+#define ATA66_CMD64X   NULL
+#ifdef __sparc_v9__
+#define INIT_CMD64X    IDE_IGNORE
+#else
+#define INIT_CMD64X    IDE_NO_DRIVER
+#endif
+#endif
+
+#ifdef CONFIG_BLK_DEV_CY82C693
+extern unsigned int pci_init_cy82c693(struct pci_dev *, const char *);
+extern void ide_init_cy82c693(ide_hwif_t *);
+#define PCI_CY82C693   &pci_init_cy82c693
+#define INIT_CY82C693  &ide_init_cy82c693
+#else
+#define PCI_CY82C693   NULL
+#define INIT_CY82C693  IDE_NO_DRIVER
+#endif
+
+#ifdef CONFIG_BLK_DEV_CS5530
+extern unsigned int pci_init_cs5530(struct pci_dev *, const char *);
+extern void ide_init_cs5530(ide_hwif_t *);
+#define PCI_CS5530     &pci_init_cs5530
+#define INIT_CS5530    &ide_init_cs5530
+#else
+#define PCI_CS5530     NULL
+#define INIT_CS5530    IDE_NO_DRIVER
+#endif
+
+#ifdef CONFIG_BLK_DEV_HPT34X
+extern unsigned int pci_init_hpt34x(struct pci_dev *, const char *);
+extern void ide_init_hpt34x(ide_hwif_t *);
+#define PCI_HPT34X     &pci_init_hpt34x
+#define INIT_HPT34X    &ide_init_hpt34x
+#else
+#define PCI_HPT34X     NULL
+#define INIT_HPT34X    IDE_IGNORE
+#endif
+
+#ifdef CONFIG_BLK_DEV_HPT366
+extern byte hpt363_shared_irq;
+extern byte hpt363_shared_pin;
+extern unsigned int pci_init_hpt366(struct pci_dev *, const char *);
+extern unsigned int ata66_hpt366(ide_hwif_t *);
+extern void ide_init_hpt366(ide_hwif_t *);
+extern void ide_dmacapable_hpt366(ide_hwif_t *, unsigned long);
+#define PCI_HPT366     &pci_init_hpt366
+#define ATA66_HPT366   &ata66_hpt366
+#define INIT_HPT366    &ide_init_hpt366
+#define DMA_HPT366     &ide_dmacapable_hpt366
+#else
+static byte hpt363_shared_irq;
+static byte hpt363_shared_pin;
+#define PCI_HPT366     NULL
+#define ATA66_HPT366   NULL
+#define INIT_HPT366    IDE_NO_DRIVER
+#define DMA_HPT366     NULL
+#endif
+
+#ifdef CONFIG_BLK_DEV_NS87415
+extern void ide_init_ns87415(ide_hwif_t *);
+#define INIT_NS87415   &ide_init_ns87415
+#else
+#define INIT_NS87415   IDE_IGNORE
+#endif
+
+#ifdef CONFIG_BLK_DEV_OPTI621
+extern void ide_init_opti621(ide_hwif_t *);
+#define INIT_OPTI621   &ide_init_opti621
+#else
+#define INIT_OPTI621   IDE_NO_DRIVER
+#endif
+
+#ifdef CONFIG_BLK_DEV_PDC_ADMA
+extern unsigned int pci_init_pdcadma(struct pci_dev *, const char *);
+extern unsigned int ata66_pdcadma(ide_hwif_t *);
+extern void ide_init_pdcadma(ide_hwif_t *);
+extern void ide_dmacapable_pdcadma(ide_hwif_t *, unsigned long);
+#define PCI_PDCADMA    &pci_init_pdcadma
+#define ATA66_PDCADMA  &ata66_pdcadma
+#define INIT_PDCADMA   &ide_init_pdcadma
+#define DMA_PDCADMA    &ide_dmacapable_pdcadma
+#else
+#define PCI_PDCADMA    IDE_IGNORE
+#define ATA66_PDCADMA  IDE_IGNORE
+#define INIT_PDCADMA   IDE_IGNORE
+#define DMA_PDCADMA    IDE_IGNORE
+#endif
+
+#ifdef CONFIG_BLK_DEV_PDC202XX
+extern unsigned int pci_init_pdc202xx(struct pci_dev *, const char *);
+extern unsigned int ata66_pdc202xx(ide_hwif_t *);
+extern void ide_init_pdc202xx(ide_hwif_t *);
+#define PCI_PDC202XX   &pci_init_pdc202xx
+#define ATA66_PDC202XX &ata66_pdc202xx
+#define INIT_PDC202XX  &ide_init_pdc202xx
+#else
+#define PCI_PDC202XX   NULL
+#define ATA66_PDC202XX NULL
+#define INIT_PDC202XX  NULL
+#endif
+
+#ifdef CONFIG_BLK_DEV_PIIX
+extern unsigned int pci_init_piix(struct pci_dev *, const char *);
+extern unsigned int ata66_piix(ide_hwif_t *);
+extern void ide_init_piix(ide_hwif_t *);
+#define PCI_PIIX       &pci_init_piix
+#define ATA66_PIIX     &ata66_piix
+#define INIT_PIIX      &ide_init_piix
+#else
+#define PCI_PIIX       NULL
+#define ATA66_PIIX     NULL
+#define INIT_PIIX      IDE_NO_DRIVER
+#endif
+
+#ifdef CONFIG_BLK_DEV_IT8172
+extern unsigned int pci_init_it8172(struct pci_dev *, const char *);
+extern unsigned int ata66_it8172(ide_hwif_t *);
+extern void ide_init_it8172(ide_hwif_t *);
+#define PCI_IT8172     &pci_init_it8172
+#define INIT_IT8172    &ide_init_it8172
+#else
+#define PCI_IT8172     NULL
+#define ATA66_IT8172   NULL
+#define INIT_IT8172    NULL
+#endif
+
+#ifdef CONFIG_BLK_DEV_RZ1000
+extern void ide_init_rz1000(ide_hwif_t *);
+#define INIT_RZ1000    &ide_init_rz1000
+#else
+#define INIT_RZ1000    IDE_IGNORE
+#endif
+
+#define INIT_SAMURAI   NULL
+
+#ifdef CONFIG_BLK_DEV_SVWKS
+extern unsigned int pci_init_svwks(struct pci_dev *, const char *);
+extern unsigned int ata66_svwks(ide_hwif_t *);
+extern void ide_init_svwks(ide_hwif_t *);
+#define PCI_SVWKS      &pci_init_svwks
+#define ATA66_SVWKS    &ata66_svwks
+#define INIT_SVWKS     &ide_init_svwks
+#else
+#define PCI_SVWKS      NULL
+#define ATA66_SVWKS    NULL
+#define INIT_SVWKS     IDE_NO_DRIVER
+#endif
+
+#ifdef CONFIG_BLK_DEV_SIS5513
+extern unsigned int pci_init_sis5513(struct pci_dev *, const char *);
+extern unsigned int ata66_sis5513(ide_hwif_t *);
+extern void ide_init_sis5513(ide_hwif_t *);
+#define PCI_SIS5513    &pci_init_sis5513
+#define ATA66_SIS5513  &ata66_sis5513
+#define INIT_SIS5513   &ide_init_sis5513
+#else
+#define PCI_SIS5513    NULL
+#define ATA66_SIS5513  NULL
+#define INIT_SIS5513   IDE_NO_DRIVER
+#endif
+
+#ifdef CONFIG_BLK_DEV_SLC90E66
+extern unsigned int pci_init_slc90e66(struct pci_dev *, const char *);
+extern unsigned int ata66_slc90e66(ide_hwif_t *);
+extern void ide_init_slc90e66(ide_hwif_t *);
+#define PCI_SLC90E66   &pci_init_slc90e66
+#define ATA66_SLC90E66 &ata66_slc90e66
+#define INIT_SLC90E66  &ide_init_slc90e66
+#else
+#define PCI_SLC90E66   NULL
+#define ATA66_SLC90E66 NULL
+#define INIT_SLC90E66  IDE_NO_DRIVER
+#endif
+
+#ifdef CONFIG_BLK_DEV_SL82C105
+extern unsigned int pci_init_sl82c105(struct pci_dev *, const char *);
+extern void dma_init_sl82c105(ide_hwif_t *, unsigned long);
+extern void ide_init_sl82c105(ide_hwif_t *);
+#define PCI_W82C105    &pci_init_sl82c105
+#define DMA_W82C105    &dma_init_sl82c105
+#define INIT_W82C105   &ide_init_sl82c105
+#else
+#define PCI_W82C105    NULL
+#define DMA_W82C105    NULL
+#define INIT_W82C105   IDE_IGNORE
+#endif
+
+#ifdef CONFIG_BLK_DEV_TRM290
+extern void ide_init_trm290(ide_hwif_t *);
+#define INIT_TRM290    &ide_init_trm290
+#else
+#define INIT_TRM290    IDE_IGNORE
+#endif
+
+#ifdef CONFIG_BLK_DEV_VIA82CXXX
+extern unsigned int pci_init_via82cxxx(struct pci_dev *, const char *);
+extern unsigned int ata66_via82cxxx(ide_hwif_t *);
+extern void ide_init_via82cxxx(ide_hwif_t *);
+extern void ide_dmacapable_via82cxxx(ide_hwif_t *, unsigned long);
+#define PCI_VIA82CXXX  &pci_init_via82cxxx
+#define ATA66_VIA82CXXX        &ata66_via82cxxx
+#define INIT_VIA82CXXX &ide_init_via82cxxx
+#define DMA_VIA82CXXX  &ide_dmacapable_via82cxxx
+#else
+#define PCI_VIA82CXXX  NULL
+#define ATA66_VIA82CXXX        NULL
+#define INIT_VIA82CXXX IDE_NO_DRIVER
+#define DMA_VIA82CXXX  NULL
+#endif
+
+typedef struct ide_pci_enablebit_s {
+       byte    reg;    /* byte pci reg holding the enable-bit */
+       byte    mask;   /* mask to isolate the enable-bit */
+       byte    val;    /* value of masked reg when "enabled" */
+} ide_pci_enablebit_t;
+
+typedef struct ide_pci_device_s {
+       ide_pci_devid_t         devid;
+       char                    *name;
+       unsigned int            (*init_chipset)(struct pci_dev *dev, const char *name);
+       unsigned int            (*ata66_check)(ide_hwif_t *hwif);
+       void                    (*init_hwif)(ide_hwif_t *hwif);
+       void                    (*dma_init)(ide_hwif_t *hwif, unsigned long dmabase);
+       ide_pci_enablebit_t     enablebits[2];
+       byte                    bootable;
+       unsigned int            extra;
+} ide_pci_device_t;
+
+static ide_pci_device_t ide_pci_chipsets[] __initdata = {
+       {DEVID_PIIXa,   "PIIX",         NULL,           NULL,           INIT_PIIX,      NULL,           {{0x41,0x80,0x80}, {0x43,0x80,0x80}},   ON_BOARD,       0 },
+       {DEVID_PIIXb,   "PIIX",         NULL,           NULL,           INIT_PIIX,      NULL,           {{0x41,0x80,0x80}, {0x43,0x80,0x80}},   ON_BOARD,       0 },
+       {DEVID_MPIIX,   "MPIIX",        NULL,           NULL,           INIT_PIIX,      NULL,           {{0x6D,0x80,0x80}, {0x6F,0x80,0x80}},   ON_BOARD,       0 },
+       {DEVID_PIIX3,   "PIIX3",        PCI_PIIX,       NULL,           INIT_PIIX,      NULL,           {{0x41,0x80,0x80}, {0x43,0x80,0x80}},   ON_BOARD,       0 },
+       {DEVID_PIIX4,   "PIIX4",        PCI_PIIX,       NULL,           INIT_PIIX,      NULL,           {{0x41,0x80,0x80}, {0x43,0x80,0x80}},   ON_BOARD,       0 },
+       {DEVID_ICH0,    "ICH0",         PCI_PIIX,       NULL,           INIT_PIIX,      NULL,           {{0x41,0x80,0x80}, {0x43,0x80,0x80}},   ON_BOARD,       0 },
+       {DEVID_PIIX4E2, "PIIX4",        PCI_PIIX,       NULL,           INIT_PIIX,      NULL,           {{0x41,0x80,0x80}, {0x43,0x80,0x80}},   ON_BOARD,       0 },
+       {DEVID_ICH,     "ICH",          PCI_PIIX,       ATA66_PIIX,     INIT_PIIX,      NULL,           {{0x41,0x80,0x80}, {0x43,0x80,0x80}},   ON_BOARD,       0 },
+       {DEVID_PIIX4U2, "PIIX4",        PCI_PIIX,       ATA66_PIIX,     INIT_PIIX,      NULL,           {{0x41,0x80,0x80}, {0x43,0x80,0x80}},   ON_BOARD,       0 },
+       {DEVID_PIIX4NX, "PIIX4",        PCI_PIIX,       NULL,           INIT_PIIX,      NULL,           {{0x41,0x80,0x80}, {0x43,0x80,0x80}},   ON_BOARD,       0 },
+       {DEVID_ICH2,    "ICH2",         PCI_PIIX,       ATA66_PIIX,     INIT_PIIX,      NULL,           {{0x41,0x80,0x80}, {0x43,0x80,0x80}},   ON_BOARD,       0 },
+       {DEVID_ICH2M,   "ICH2M",        PCI_PIIX,       ATA66_PIIX,     INIT_PIIX,      NULL,           {{0x41,0x80,0x80}, {0x43,0x80,0x80}},   ON_BOARD,       0 },
+       {DEVID_ICH3M,   "ICH3M",        PCI_PIIX,       ATA66_PIIX,     INIT_PIIX,      NULL,           {{0x41,0x80,0x80}, {0x43,0x80,0x80}},   ON_BOARD,       0 },
+       {DEVID_ICH3,    "ICH3",         PCI_PIIX,       ATA66_PIIX,     INIT_PIIX,      NULL,           {{0x41,0x80,0x80}, {0x43,0x80,0x80}},   ON_BOARD,       0 },
+       {DEVID_ICH4,    "ICH4",         PCI_PIIX,       ATA66_PIIX,     INIT_PIIX,      NULL,           {{0x41,0x80,0x80}, {0x43,0x80,0x80}},   ON_BOARD,       0 },
+       {DEVID_CICH,    "C-ICH",        PCI_PIIX,       ATA66_PIIX,     INIT_PIIX,      NULL,           {{0x41,0x80,0x80}, {0x43,0x80,0x80}},   ON_BOARD,       0 },
+       {DEVID_VIA_IDE, "VIA_IDE",      NULL,           NULL,           NULL,           NULL,           {{0x00,0x00,0x00}, {0x00,0x00,0x00}},   ON_BOARD,       0 },
+       {DEVID_MR_IDE,  "VP_IDE",       PCI_VIA82CXXX,  ATA66_VIA82CXXX,INIT_VIA82CXXX, DMA_VIA82CXXX,  {{0x40,0x02,0x02}, {0x40,0x01,0x01}},   ON_BOARD,       0 },
+       {DEVID_VP_IDE,  "VP_IDE",       PCI_VIA82CXXX,  ATA66_VIA82CXXX,INIT_VIA82CXXX, DMA_VIA82CXXX,  {{0x40,0x02,0x02}, {0x40,0x01,0x01}},   ON_BOARD,       0 },
+#ifndef CONFIG_PDC202XX_FORCE
+        {DEVID_PDC20246,"PDC20246",    PCI_PDC202XX,   NULL,           INIT_PDC202XX,  NULL,           {{0x00,0x00,0x00}, {0x00,0x00,0x00}},   OFF_BOARD,      16 },
+        {DEVID_PDC20262,"PDC20262",    PCI_PDC202XX,   ATA66_PDC202XX, INIT_PDC202XX,  NULL,           {{0x00,0x00,0x00}, {0x00,0x00,0x00}},   OFF_BOARD,      48 },
+        {DEVID_PDC20265,"PDC20265",    PCI_PDC202XX,   ATA66_PDC202XX, INIT_PDC202XX,  NULL,           {{0x00,0x00,0x00}, {0x00,0x00,0x00}},   ON_BOARD,       48 },
+        {DEVID_PDC20267,"PDC20267",    PCI_PDC202XX,   ATA66_PDC202XX, INIT_PDC202XX,  NULL,           {{0x00,0x00,0x00}, {0x00,0x00,0x00}},   OFF_BOARD,      48 },
+#else /* !CONFIG_PDC202XX_FORCE */
+       {DEVID_PDC20246,"PDC20246",     PCI_PDC202XX,   NULL,           INIT_PDC202XX,  NULL,           {{0x50,0x02,0x02}, {0x50,0x04,0x04}},   OFF_BOARD,      16 },
+       {DEVID_PDC20262,"PDC20262",     PCI_PDC202XX,   ATA66_PDC202XX, INIT_PDC202XX,  NULL,           {{0x50,0x02,0x02}, {0x50,0x04,0x04}},   OFF_BOARD,      48 },
+       {DEVID_PDC20265,"PDC20265",     PCI_PDC202XX,   ATA66_PDC202XX, INIT_PDC202XX,  NULL,           {{0x50,0x02,0x02}, {0x50,0x04,0x04}},   OFF_BOARD,      48 },
+       {DEVID_PDC20267,"PDC20267",     PCI_PDC202XX,   ATA66_PDC202XX, INIT_PDC202XX,  NULL,           {{0x50,0x02,0x02}, {0x50,0x04,0x04}},   OFF_BOARD,      48 },
+#endif
+       {DEVID_PDC20268,"PDC20268",     PCI_PDC202XX,   ATA66_PDC202XX, INIT_PDC202XX,  NULL,           {{0x00,0x00,0x00}, {0x00,0x00,0x00}},   OFF_BOARD,      0 },
+       /* Promise used a different PCI ident for the raid card apparently to try and
+          prevent Linux detecting it and using our own raid code. We want to detect
+          it for the ataraid drivers, so we have to list both here.. */
+       {DEVID_PDC20270,"PDC20270",     PCI_PDC202XX,   ATA66_PDC202XX, INIT_PDC202XX,  NULL,           {{0x00,0x00,0x00}, {0x00,0x00,0x00}},   OFF_BOARD,      0 },
+       {DEVID_PDC20269,"PDC20269",     PCI_PDC202XX,   ATA66_PDC202XX,  INIT_PDC202XX, NULL,           {{0x00,0x00,0x00}, {0x00,0x00,0x00}},   OFF_BOARD,      0 },
+       {DEVID_PDC20275,"PDC20275",     PCI_PDC202XX,   ATA66_PDC202XX, INIT_PDC202XX,  NULL,           {{0x00,0x00,0x00}, {0x00,0x00,0x00}},   OFF_BOARD,      0 },
+       {DEVID_PDC20276,"PDC20276",     PCI_PDC202XX,   ATA66_PDC202XX, INIT_PDC202XX,  NULL,           {{0x00,0x00,0x00}, {0x00,0x00,0x00}},   OFF_BOARD,      0 },
+       {DEVID_RZ1000,  "RZ1000",       NULL,           NULL,           INIT_RZ1000,    NULL,           {{0x00,0x00,0x00}, {0x00,0x00,0x00}},   ON_BOARD,       0 },
+       {DEVID_RZ1001,  "RZ1001",       NULL,           NULL,           INIT_RZ1000,    NULL,           {{0x00,0x00,0x00}, {0x00,0x00,0x00}},   ON_BOARD,       0 },
+       {DEVID_SAMURAI, "SAMURAI",      NULL,           NULL,           INIT_SAMURAI,   NULL,           {{0x00,0x00,0x00}, {0x00,0x00,0x00}},   ON_BOARD,       0 },
+       {DEVID_CMD640,  "CMD640",       NULL,           NULL,           IDE_IGNORE,     NULL,           {{0x00,0x00,0x00}, {0x00,0x00,0x00}},   ON_BOARD,       0 },
+       {DEVID_NS87410, "NS87410",      NULL,           NULL,           NULL,           NULL,           {{0x43,0x08,0x08}, {0x47,0x08,0x08}},   ON_BOARD,       0 },
+       {DEVID_SIS5513, "SIS5513",      PCI_SIS5513,    ATA66_SIS5513,  INIT_SIS5513,   NULL,           {{0x4a,0x02,0x02}, {0x4a,0x04,0x04}},   ON_BOARD,       0 },
+       {DEVID_CMD643,  "CMD643",       PCI_CMD64X,     NULL,           INIT_CMD64X,    NULL,           {{0x00,0x00,0x00}, {0x00,0x00,0x00}},   ON_BOARD,       0 },
+       {DEVID_CMD646,  "CMD646",       PCI_CMD64X,     NULL,           INIT_CMD64X,    NULL,           {{0x00,0x00,0x00}, {0x51,0x80,0x80}},   ON_BOARD,       0 },
+       {DEVID_CMD648,  "CMD648",       PCI_CMD64X,     ATA66_CMD64X,   INIT_CMD64X,    NULL,           {{0x00,0x00,0x00}, {0x00,0x00,0x00}},   ON_BOARD,       0 },
+       {DEVID_CMD649,  "CMD649",       PCI_CMD64X,     ATA66_CMD64X,   INIT_CMD64X,    NULL,           {{0x00,0x00,0x00}, {0x00,0x00,0x00}},   ON_BOARD,       0 },
+#ifndef CONFIG_BLK_DEV_CMD680
+       {DEVID_CMD680,  "CMD680",       NULL,           NULL,           NULL,           NULL,           {{0x00,0x00,0x00}, {0x00,0x00,0x00}},   ON_BOARD,       0 },
+#else /* CONFIG_BLK_DEV_CMD680 */
+       {DEVID_CMD680,  "CMD680",       PCI_CMD64X,     ATA66_CMD64X,   INIT_CMD64X,    NULL,           {{0x00,0x00,0x00}, {0x00,0x00,0x00}},   ON_BOARD,       0 },
+#endif /* !CONFIG_BLK_DEV_CMD680 */
+       {DEVID_HT6565,  "HT6565",       NULL,           NULL,           NULL,           NULL,           {{0x00,0x00,0x00}, {0x00,0x00,0x00}},   ON_BOARD,       0 },
+       {DEVID_OPTI621, "OPTI621",      NULL,           NULL,           INIT_OPTI621,   NULL,           {{0x45,0x80,0x00}, {0x40,0x08,0x00}},   ON_BOARD,       0 },
+       {DEVID_OPTI621X,"OPTI621X",     NULL,           NULL,           INIT_OPTI621,   NULL,           {{0x45,0x80,0x00}, {0x40,0x08,0x00}},   ON_BOARD,       0 },
+       {DEVID_TRM290,  "TRM290",       NULL,           NULL,           INIT_TRM290,    NULL,           {{0x00,0x00,0x00}, {0x00,0x00,0x00}},   ON_BOARD,       0 },
+       {DEVID_NS87415, "NS87415",      NULL,           NULL,           INIT_NS87415,   NULL,           {{0x00,0x00,0x00}, {0x00,0x00,0x00}},   ON_BOARD,       0 },
+       {DEVID_AEC6210, "AEC6210",      PCI_AEC62XX,    NULL,           INIT_AEC62XX,   DMA_AEC62XX,    {{0x4a,0x02,0x02}, {0x4a,0x04,0x04}},   OFF_BOARD,      0 },
+       {DEVID_AEC6260, "AEC6260",      PCI_AEC62XX,    ATA66_AEC62XX,  INIT_AEC62XX,   NULL,           {{0x00,0x00,0x00}, {0x00,0x00,0x00}},   NEVER_BOARD,    0 },
+       {DEVID_AEC6260R,"AEC6260R",     PCI_AEC62XX,    ATA66_AEC62XX,  INIT_AEC62XX,   NULL,           {{0x4a,0x02,0x02}, {0x4a,0x04,0x04}},   OFF_BOARD,      0 },
+       {DEVID_W82C105, "W82C105",      PCI_W82C105,    NULL,           INIT_W82C105,   DMA_W82C105,    {{0x40,0x01,0x01}, {0x40,0x10,0x10}},   ON_BOARD,       0 },
+       {DEVID_UM8673F, "UM8673F",      NULL,           NULL,           NULL,           NULL,           {{0x00,0x00,0x00}, {0x00,0x00,0x00}},   ON_BOARD,       0 },
+       {DEVID_UM8886A, "UM8886A",      NULL,           NULL,           NULL,           NULL,           {{0x00,0x00,0x00}, {0x00,0x00,0x00}},   ON_BOARD,       0 },
+       {DEVID_UM8886BF,"UM8886BF",     NULL,           NULL,           NULL,           NULL,           {{0x00,0x00,0x00}, {0x00,0x00,0x00}},   ON_BOARD,       0 },
+       {DEVID_HPT34X,  "HPT34X",       PCI_HPT34X,     NULL,           INIT_HPT34X,    NULL,           {{0x00,0x00,0x00}, {0x00,0x00,0x00}},   NEVER_BOARD,    16 },
+       {DEVID_HPT366,  "HPT366",       PCI_HPT366,     ATA66_HPT366,   INIT_HPT366,    DMA_HPT366,     {{0x00,0x00,0x00}, {0x00,0x00,0x00}},   OFF_BOARD,      240 },
+       {DEVID_ALI15X3, "ALI15X3",      PCI_ALI15X3,    ATA66_ALI15X3,  INIT_ALI15X3,   DMA_ALI15X3,    {{0x00,0x00,0x00}, {0x00,0x00,0x00}},   ON_BOARD,       0 },
+       {DEVID_CY82C693,"CY82C693",     PCI_CY82C693,   NULL,           INIT_CY82C693,  NULL,           {{0x00,0x00,0x00}, {0x00,0x00,0x00}},   ON_BOARD,       0 },
+       {DEVID_HINT,    "HINT_IDE",     NULL,           NULL,           NULL,           NULL,           {{0x00,0x00,0x00}, {0x00,0x00,0x00}},   ON_BOARD,       0 },
+       {DEVID_CS5530,  "CS5530",       PCI_CS5530,     NULL,           INIT_CS5530,    NULL,           {{0x00,0x00,0x00}, {0x00,0x00,0x00}},   ON_BOARD,       0 },
+       {DEVID_AMD7401, "AMD7401",      NULL,           NULL,           NULL,           DMA_AMD74XX,    {{0x40,0x01,0x01}, {0x40,0x02,0x02}},   ON_BOARD,       0 },
+       {DEVID_AMD7409, "AMD7409",      PCI_AMD74XX,    ATA66_AMD74XX,  INIT_AMD74XX,   DMA_AMD74XX,    {{0x40,0x01,0x01}, {0x40,0x02,0x02}},   ON_BOARD,       0 },
+       {DEVID_AMD7411, "AMD7411",      PCI_AMD74XX,    ATA66_AMD74XX,  INIT_AMD74XX,   DMA_AMD74XX,    {{0x40,0x01,0x01}, {0x40,0x02,0x02}},   ON_BOARD,       0 },
+       {DEVID_AMD7441, "AMD7441",      PCI_AMD74XX,    ATA66_AMD74XX,  INIT_AMD74XX,   DMA_AMD74XX,    {{0x40,0x01,0x01}, {0x40,0x02,0x02}},   ON_BOARD,       0 },
+       {DEVID_PDCADMA, "PDCADMA",      PCI_PDCADMA,    ATA66_PDCADMA,  INIT_PDCADMA,   DMA_PDCADMA,    {{0x00,0x00,0x00}, {0x00,0x00,0x00}},   OFF_BOARD,      0 },
+       {DEVID_SLC90E66,"SLC90E66",     PCI_SLC90E66,   ATA66_SLC90E66, INIT_SLC90E66,  NULL,           {{0x41,0x80,0x80}, {0x43,0x80,0x80}},   ON_BOARD,       0 },
+        {DEVID_OSB4,    "ServerWorks OSB4",            PCI_SVWKS,      ATA66_SVWKS,    INIT_SVWKS,     NULL,           {{0x00,0x00,0x00}, {0x00,0x00,0x00}},   ON_BOARD,       0 },
+       {DEVID_CSB5,    "ServerWorks CSB5",             PCI_SVWKS,      ATA66_SVWKS,    INIT_SVWKS,     NULL,           {{0x00,0x00,0x00}, {0x00,0x00,0x00}},   ON_BOARD,       0 },
+       {DEVID_ITE8172G,"IT8172G",      PCI_IT8172,     NULL,   INIT_IT8172,    NULL,           {{0x00,0x00,0x00}, {0x40,0x00,0x01}},   ON_BOARD,       0 },
+       {IDE_PCI_DEVID_NULL, "PCI_IDE", NULL,           NULL,           NULL,           NULL,           {{0x00,0x00,0x00}, {0x00,0x00,0x00}},   ON_BOARD,       0 }};
+
+/*
+ * This allows offboard ide-pci cards the enable a BIOS, verify interrupt
+ * settings of split-mirror pci-config space, place chipset into init-mode,
+ * and/or preserve an interrupt if the card is not native ide support.
+ */
+static unsigned int __init ide_special_settings (struct pci_dev *dev, const char *name)
+{
+       switch(dev->device) {
+               case PCI_DEVICE_ID_TTI_HPT366:
+               case PCI_DEVICE_ID_PROMISE_20246:
+               case PCI_DEVICE_ID_PROMISE_20262:
+               case PCI_DEVICE_ID_PROMISE_20265:
+               case PCI_DEVICE_ID_PROMISE_20267:
+               case PCI_DEVICE_ID_PROMISE_20268:
+               case PCI_DEVICE_ID_PROMISE_20270:
+               case PCI_DEVICE_ID_PROMISE_20269:
+               case PCI_DEVICE_ID_PROMISE_20275:
+               case PCI_DEVICE_ID_PROMISE_20276:
+               case PCI_DEVICE_ID_ARTOP_ATP850UF:
+               case PCI_DEVICE_ID_ARTOP_ATP860:
+               case PCI_DEVICE_ID_ARTOP_ATP860R:
+                       return dev->irq;
+               default:
+                       break;
+       }
+       return 0;
+}
+
+/*
+ * Match a PCI IDE port against an entry in ide_hwifs[],
+ * based on io_base port if possible.
+ */
+static ide_hwif_t __init *ide_match_hwif (unsigned long io_base, byte bootable, const char *name)
+{
+       int h;
+       ide_hwif_t *hwif;
+
+       /*
+        * Look for a hwif with matching io_base specified using
+        * parameters to ide_setup().
+        */
+       for (h = 0; h < MAX_HWIFS; ++h) {
+               hwif = &ide_hwifs[h];
+               if (hwif->io_ports[IDE_DATA_OFFSET] == io_base) {
+                       if (hwif->chipset == ide_generic)
+                               return hwif; /* a perfect match */
+               }
+       }
+       /*
+        * Look for a hwif with matching io_base default value.
+        * If chipset is "ide_unknown", then claim that hwif slot.
+        * Otherwise, some other chipset has already claimed it..  :(
+        */
+       for (h = 0; h < MAX_HWIFS; ++h) {
+               hwif = &ide_hwifs[h];
+               if (hwif->io_ports[IDE_DATA_OFFSET] == io_base) {
+                       if (hwif->chipset == ide_unknown)
+                               return hwif; /* match */
+                       printk("%s: port 0x%04lx already claimed by %s\n", name, io_base, hwif->name);
+                       return NULL;    /* already claimed */
+               }
+       }
+       /*
+        * Okay, there is no hwif matching our io_base,
+        * so we'll just claim an unassigned slot.
+        * Give preference to claiming other slots before claiming ide0/ide1,
+        * just in case there's another interface yet-to-be-scanned
+        * which uses ports 1f0/170 (the ide0/ide1 defaults).
+        *
+        * Unless there is a bootable card that does not use the standard
+        * ports 1f0/170 (the ide0/ide1 defaults). The (bootable) flag.
+        */
+       if (bootable) {
+               for (h = 0; h < MAX_HWIFS; ++h) {
+                       hwif = &ide_hwifs[h];
+                       if (hwif->chipset == ide_unknown)
+                               return hwif;    /* pick an unused entry */
+               }
+       } else {
+               for (h = 2; h < MAX_HWIFS; ++h) {
+                       hwif = ide_hwifs + h;
+                       if (hwif->chipset == ide_unknown)
+                               return hwif;    /* pick an unused entry */
+               }
+       }
+       for (h = 0; h < 2; ++h) {
+               hwif = ide_hwifs + h;
+               if (hwif->chipset == ide_unknown)
+                       return hwif;    /* pick an unused entry */
+       }
+       printk("%s: too many IDE interfaces, no room in table\n", name);
+       return NULL;
+}
+
+static int __init ide_setup_pci_baseregs (struct pci_dev *dev, const char *name)
+{
+       byte reg, progif = 0;
+
+       /*
+        * Place both IDE interfaces into PCI "native" mode:
+        */
+       if (pci_read_config_byte(dev, PCI_CLASS_PROG, &progif) || (progif & 5) != 5) {
+               if ((progif & 0xa) != 0xa) {
+                       printk("%s: device not capable of full native PCI mode\n", name);
+                       return 1;
+               }
+               printk("%s: placing both ports into native PCI mode\n", name);
+               (void) pci_write_config_byte(dev, PCI_CLASS_PROG, progif|5);
+               if (pci_read_config_byte(dev, PCI_CLASS_PROG, &progif) || (progif & 5) != 5) {
+                       printk("%s: rewrite of PROGIF failed, wanted 0x%04x, got 0x%04x\n", name, progif|5, progif);
+                       return 1;
+               }
+       }
+       /*
+        * Setup base registers for IDE command/control spaces for each interface:
+        */
+       for (reg = 0; reg < 4; reg++) {
+               struct resource *res = dev->resource + reg;
+               if ((res->flags & IORESOURCE_IO) == 0)
+                       continue;
+               if (!res->start) {
+                       printk("%s: Missing I/O address #%d\n", name, reg);
+                       return 1;
+               }
+       }
+       return 0;
+}
+
+/*
+ * ide_setup_pci_device() looks at the primary/secondary interfaces
+ * on a PCI IDE device and, if they are enabled, prepares the IDE driver
+ * for use with them.  This generic code works for most PCI chipsets.
+ *
+ * One thing that is not standardized is the location of the
+ * primary/secondary interface "enable/disable" bits.  For chipsets that
+ * we "know" about, this information is in the ide_pci_device_t struct;
+ * for all other chipsets, we just assume both interfaces are enabled.
+ */
+static void __init ide_setup_pci_device (struct pci_dev *dev, ide_pci_device_t *d)
+{
+       unsigned int port, at_least_one_hwif_enabled = 0, autodma = 0, pciirq = 0;
+       unsigned short pcicmd = 0, tried_config = 0;
+       byte tmp = 0;
+       ide_hwif_t *hwif, *mate = NULL;
+       unsigned int class_rev;
+       static int secondpdc = 0;
+
+#ifdef CONFIG_IDEDMA_AUTO
+       if (!noautodma)
+               autodma = 1;
+#endif
+
+       if (d->init_hwif == IDE_NO_DRIVER) {
+               printk(KERN_WARNING "%s: detected chipset, but driver not compiled in!\n", d->name);
+               d->init_hwif = NULL;
+       }
+
+       if (pci_enable_device(dev)) {
+               printk(KERN_WARNING "%s: (ide_setup_pci_device:) Could not enable device.\n", d->name);
+               return;
+       }
+
+check_if_enabled:
+       if (pci_read_config_word(dev, PCI_COMMAND, &pcicmd)) {
+               printk("%s: error accessing PCI regs\n", d->name);
+               return;
+       }
+       if (!(pcicmd & PCI_COMMAND_IO)) {       /* is device disabled? */
+               /*
+                * PnP BIOS was *supposed* to have set this device up for us,
+                * but we can do it ourselves, so long as the BIOS has assigned an IRQ
+                *  (or possibly the device is using a "legacy header" for IRQs).
+                * Maybe the user deliberately *disabled* the device,
+                * but we'll eventually ignore it again if no drives respond.
+                */
+               if (tried_config++
+                || ide_setup_pci_baseregs(dev, d->name)
+                || pci_write_config_word(dev, PCI_COMMAND, pcicmd | PCI_COMMAND_IO)) {
+                       printk("%s: device disabled (BIOS)\n", d->name);
+                       return;
+               }
+               autodma = 0;    /* default DMA off if we had to configure it here */
+               goto check_if_enabled;
+       }
+       if (tried_config)
+               printk("%s: device enabled (Linux)\n", d->name);
+
+       pci_read_config_dword(dev, PCI_CLASS_REVISION, &class_rev);
+       class_rev &= 0xff;
+
+       if (IDE_PCI_DEVID_EQ(d->devid, DEVID_HPT34X)) {
+               /* see comments in hpt34x.c on why..... */
+               char *chipset_names[] = {"HPT343", "HPT345"};
+               strcpy(d->name, chipset_names[(pcicmd & PCI_COMMAND_MEMORY) ? 1 : 0]);
+               d->bootable = (pcicmd & PCI_COMMAND_MEMORY) ? OFF_BOARD : NEVER_BOARD;
+       }
+
+       printk("%s: chipset revision %d\n", d->name, class_rev);
+
+       /*
+        * Can we trust the reported IRQ?
+        */
+       pciirq = dev->irq;
+       
+#ifdef CONFIG_PDC202XX_FORCE
+       if (dev->class >> 8 == PCI_CLASS_STORAGE_RAID) {
+               /*
+                * By rights we want to ignore Promise FastTrak and SuperTrak
+                * series here, those use own driver.
+                */
+               if (dev->vendor == PCI_VENDOR_ID_PROMISE) {
+                       printk(KERN_INFO "ide: Skipping Promise RAID controller.\n");
+                       return;
+               }
+       }
+#endif /* CONFIG_PDC202XX_FORCE */
+       if ((dev->class & ~(0xfa)) != ((PCI_CLASS_STORAGE_IDE << 8) | 5)) {
+               printk("%s: not 100%% native mode: will probe irqs later\n", d->name);
+               /*
+                * This allows offboard ide-pci cards the enable a BIOS,
+                * verify interrupt settings of split-mirror pci-config
+                * space, place chipset into init-mode, and/or preserve
+                * an interrupt if the card is not native ide support.
+                */
+               pciirq = (d->init_chipset) ? d->init_chipset(dev, d->name) : ide_special_settings(dev, d->name);
+       } else if (tried_config) {
+               printk("%s: will probe irqs later\n", d->name);
+               pciirq = 0;
+       } else if (!pciirq) {
+               printk("%s: bad irq (%d): will probe later\n", d->name, pciirq);
+               pciirq = 0;
+       } else {
+               if (d->init_chipset)
+                       (void) d->init_chipset(dev, d->name);
+#ifdef __sparc__
+               printk("%s: 100%% native mode on irq %s\n",
+                      d->name, __irq_itoa(pciirq));
+#else
+               printk("%s: 100%% native mode on irq %d\n", d->name, pciirq);
+#endif
+       }
+
+       /*
+        * Set up the IDE ports
+        */
+       for (port = 0; port <= 1; ++port) {
+               unsigned long base = 0, ctl = 0;
+               ide_pci_enablebit_t *e = &(d->enablebits[port]);
+       
+               /* 
+                * If this is a Promise FakeRaid controller, the 2nd controller will be marked as 
+                * disabled while it is actually there and enabled by the bios for raid purposes. 
+                * Skip the normal "is it enabled" test for those.
+                */
+               if ((IDE_PCI_DEVID_EQ(d->devid, DEVID_PDC20265)) && (secondpdc++==1) && (port==1)  ) 
+                       goto controller_ok;
+               if ((IDE_PCI_DEVID_EQ(d->devid, DEVID_PDC20262)) && (secondpdc++==1) && (port==1)  ) 
+                       goto controller_ok;
+                       
+               if (e->reg && (pci_read_config_byte(dev, e->reg, &tmp) || (tmp & e->mask) != e->val))
+                       continue;       /* port not enabled */
+controller_ok:                 
+               if (IDE_PCI_DEVID_EQ(d->devid, DEVID_HPT366) && (port) && (class_rev < 0x03))
+                       return;
+               if ((dev->class >> 8) != PCI_CLASS_STORAGE_IDE || (dev->class & (port ? 4 : 1)) != 0) {
+                       ctl  = dev->resource[(2*port)+1].start;
+                       base = dev->resource[2*port].start;
+                       if (!(ctl & PCI_BASE_ADDRESS_IO_MASK) ||
+                           !(base & PCI_BASE_ADDRESS_IO_MASK)) {
+                               printk("%s: IO baseregs (BIOS) are reported as MEM, report to <andre@linux-ide.org>.\n", d->name);
+#if 0
+                               /* FIXME! This really should check that it really gets the IO/MEM part right! */
+                               continue;
+#endif
+                       }
+               }
+               if ((ctl && !base) || (base && !ctl)) {
+                       printk("%s: inconsistent baseregs (BIOS) for port %d, skipping\n", d->name, port);
+                       continue;
+               }
+               if (!ctl)
+                       ctl = port ? 0x374 : 0x3f4;     /* use default value */
+               if (!base)
+                       base = port ? 0x170 : 0x1f0;    /* use default value */
+               if ((hwif = ide_match_hwif(base, d->bootable, d->name)) == NULL)
+                       continue;       /* no room in ide_hwifs[] */
+               if (hwif->io_ports[IDE_DATA_OFFSET] != base) {
+                       ide_init_hwif_ports(&hwif->hw, base, (ctl | 2), NULL);
+                       memcpy(hwif->io_ports, hwif->hw.io_ports, sizeof(hwif->io_ports));
+                       hwif->noprobe = !hwif->io_ports[IDE_DATA_OFFSET];
+               }
+               hwif->chipset = ide_pci;
+               hwif->pci_dev = dev;
+               hwif->pci_devid = d->devid;
+               hwif->channel = port;
+               if (!hwif->irq)
+                       hwif->irq = pciirq;
+               if (mate) {
+                       hwif->mate = mate;
+                       mate->mate = hwif;
+                       if (IDE_PCI_DEVID_EQ(d->devid, DEVID_AEC6210)) {
+                               hwif->serialized = 1;
+                               mate->serialized = 1;
+                       }
+               }
+               if (IDE_PCI_DEVID_EQ(d->devid, DEVID_UM8886A) ||
+                   IDE_PCI_DEVID_EQ(d->devid, DEVID_UM8886BF) ||
+                   IDE_PCI_DEVID_EQ(d->devid, DEVID_UM8673F)) {
+                       hwif->irq = hwif->channel ? 15 : 14;
+                       goto bypass_umc_dma;
+               }
+               if (IDE_PCI_DEVID_EQ(d->devid, DEVID_MPIIX))
+                       goto bypass_piix_dma;
+               if (IDE_PCI_DEVID_EQ(d->devid, DEVID_PDCADMA))
+                       goto bypass_legacy_dma;
+               if (hwif->udma_four) {
+                       printk("%s: ATA-66/100 forced bit set (WARNING)!!\n", d->name);
+               } else {
+                       hwif->udma_four = (d->ata66_check) ? d->ata66_check(hwif) : 0;
+               }
+#ifdef CONFIG_BLK_DEV_IDEDMA
+               if (IDE_PCI_DEVID_EQ(d->devid, DEVID_SIS5513) ||
+                   IDE_PCI_DEVID_EQ(d->devid, DEVID_AEC6260) ||
+                   IDE_PCI_DEVID_EQ(d->devid, DEVID_PIIX4NX) ||
+                   IDE_PCI_DEVID_EQ(d->devid, DEVID_HPT34X)  ||
+                   IDE_PCI_DEVID_EQ(d->devid, DEVID_VIA_IDE) ||
+                   IDE_PCI_DEVID_EQ(d->devid, DEVID_MR_IDE)  ||
+                   IDE_PCI_DEVID_EQ(d->devid, DEVID_VP_IDE))
+                       autodma = 0;
+               if (autodma)
+                       hwif->autodma = 1;
+
+               if (IDE_PCI_DEVID_EQ(d->devid, DEVID_PDC20246) ||
+                   IDE_PCI_DEVID_EQ(d->devid, DEVID_PDC20262) ||
+                   IDE_PCI_DEVID_EQ(d->devid, DEVID_PDC20265) ||
+                   IDE_PCI_DEVID_EQ(d->devid, DEVID_PDC20267) ||
+                   IDE_PCI_DEVID_EQ(d->devid, DEVID_PDC20268) ||
+                   IDE_PCI_DEVID_EQ(d->devid, DEVID_PDC20270) ||
+                   IDE_PCI_DEVID_EQ(d->devid, DEVID_PDC20269) ||
+                   IDE_PCI_DEVID_EQ(d->devid, DEVID_PDC20275) ||
+                   IDE_PCI_DEVID_EQ(d->devid, DEVID_PDC20276) ||
+                   IDE_PCI_DEVID_EQ(d->devid, DEVID_AEC6210) ||
+                   IDE_PCI_DEVID_EQ(d->devid, DEVID_AEC6260) ||
+                   IDE_PCI_DEVID_EQ(d->devid, DEVID_AEC6260R) ||
+                   IDE_PCI_DEVID_EQ(d->devid, DEVID_HPT34X) ||
+                   IDE_PCI_DEVID_EQ(d->devid, DEVID_HPT366) ||
+                   IDE_PCI_DEVID_EQ(d->devid, DEVID_CS5530) ||
+                   IDE_PCI_DEVID_EQ(d->devid, DEVID_CY82C693) ||
+                   IDE_PCI_DEVID_EQ(d->devid, DEVID_CMD646) ||
+                   IDE_PCI_DEVID_EQ(d->devid, DEVID_CMD648) ||
+                   IDE_PCI_DEVID_EQ(d->devid, DEVID_CMD649) ||
+                   IDE_PCI_DEVID_EQ(d->devid, DEVID_CMD680) ||
+                   IDE_PCI_DEVID_EQ(d->devid, DEVID_OSB4) ||
+                   ((dev->class >> 8) == PCI_CLASS_STORAGE_IDE && (dev->class & 0x80))) {
+                       unsigned long dma_base = ide_get_or_set_dma_base(hwif, (!mate && d->extra) ? d->extra : 0, d->name);
+                       if (dma_base && !(pcicmd & PCI_COMMAND_MASTER)) {
+                               /*
+                                * Set up BM-DMA capability (PnP BIOS should have done this)
+                                */
+                               if (!IDE_PCI_DEVID_EQ(d->devid, DEVID_CS5530))
+                                       hwif->autodma = 0;      /* default DMA off if we had to configure it here */
+                               (void) pci_write_config_word(dev, PCI_COMMAND, pcicmd | PCI_COMMAND_MASTER);
+                               if (pci_read_config_word(dev, PCI_COMMAND, &pcicmd) || !(pcicmd & PCI_COMMAND_MASTER)) {
+                                       printk("%s: %s error updating PCICMD\n", hwif->name, d->name);
+                                       dma_base = 0;
+                               }
+                       }
+                       if (dma_base) {
+                               if (d->dma_init) {
+                                       d->dma_init(hwif, dma_base);
+                               } else {
+                                       ide_setup_dma(hwif, dma_base, 8);
+                               }
+                       } else {
+                               printk("%s: %s Bus-Master DMA disabled (BIOS)\n", hwif->name, d->name);
+                       }
+               }
+#endif /* CONFIG_BLK_DEV_IDEDMA */
+bypass_legacy_dma:
+bypass_piix_dma:
+bypass_umc_dma:
+               if (d->init_hwif)  /* Call chipset-specific routine for each enabled hwif */
+                       d->init_hwif(hwif);
+               mate = hwif;
+               at_least_one_hwif_enabled = 1;
+       }
+       if (!at_least_one_hwif_enabled)
+               printk("%s: neither IDE port enabled (BIOS)\n", d->name);
+}
+
+static void __init pdc20270_device_order_fixup (struct pci_dev *dev, ide_pci_device_t *d)
+{
+       struct pci_dev *dev2 = NULL, *findev;
+       ide_pci_device_t *d2;
+
+       if ((dev->bus->self &&
+            dev->bus->self->vendor == PCI_VENDOR_ID_DEC) &&
+           (dev->bus->self->device == PCI_DEVICE_ID_DEC_21150)) {
+               if (PCI_SLOT(dev->devfn) & 2) {
+                       return;
+               }
+               d->extra = 0;
+               pci_for_each_dev(findev) {
+                       if ((findev->vendor == dev->vendor) &&
+                           (findev->device == dev->device) &&
+                           (PCI_SLOT(findev->devfn) & 2)) {
+                               byte irq = 0, irq2 = 0;
+                               dev2 = findev;
+                               pci_read_config_byte(dev, PCI_INTERRUPT_LINE, &irq);
+                               pci_read_config_byte(dev2, PCI_INTERRUPT_LINE, &irq2);
+                                if (irq != irq2) {
+                                       dev2->irq = dev->irq;
+                                        pci_write_config_byte(dev2, PCI_INTERRUPT_LINE, irq);
+                                }
+
+                       }
+               }
+       }
+
+       printk("%s: IDE controller on PCI bus %02x dev %02x\n", d->name, dev->bus->number, dev->devfn);
+       ide_setup_pci_device(dev, d);
+       if (!dev2)
+               return;
+       d2 = d;
+       printk("%s: IDE controller on PCI bus %02x dev %02x\n", d2->name, dev2->bus->number, dev2->devfn);
+       ide_setup_pci_device(dev2, d2);
+}
+
+static void __init hpt366_device_order_fixup (struct pci_dev *dev, ide_pci_device_t *d)
+{
+       struct pci_dev *dev2 = NULL, *findev;
+       ide_pci_device_t *d2;
+       unsigned char pin1 = 0, pin2 = 0;
+       unsigned int class_rev;
+       char *chipset_names[] = {"HPT366", "HPT366", "HPT368", "HPT370", "HPT370A", "HPT372"};
+
+       if (PCI_FUNC(dev->devfn) & 1)
+               return;
+
+       pci_read_config_dword(dev, PCI_CLASS_REVISION, &class_rev);
+       class_rev &= 0xff;
+       if (class_rev > 5)
+               class_rev = 5;
+       
+       strcpy(d->name, chipset_names[class_rev]);
+
+       switch(class_rev) {
+               case 4:
+               case 3: printk("%s: IDE controller on PCI bus %02x dev %02x\n", d->name, dev->bus->number, dev->devfn);
+                       ide_setup_pci_device(dev, d);
+                       return;
+               default:        break;
+       }
+
+       pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin1);
+       pci_for_each_dev(findev) {
+               if ((findev->vendor == dev->vendor) &&
+                   (findev->device == dev->device) &&
+                   ((findev->devfn - dev->devfn) == 1) &&
+                   (PCI_FUNC(findev->devfn) & 1)) {
+                       dev2 = findev;
+                       pci_read_config_byte(dev2, PCI_INTERRUPT_PIN, &pin2);
+                       hpt363_shared_pin = (pin1 != pin2) ? 1 : 0;
+                       hpt363_shared_irq = (dev->irq == dev2->irq) ? 1 : 0;
+                       if (hpt363_shared_pin && hpt363_shared_irq) {
+                               d->bootable = ON_BOARD;
+                               printk("%s: onboard version of chipset, pin1=%d pin2=%d\n", d->name, pin1, pin2);
+#if 0
+                               /* I forgot why I did this once, but it fixed something. */
+                               pci_write_config_byte(dev2, PCI_INTERRUPT_PIN, dev->irq);
+                               printk("PCI: %s: Fixing interrupt %d pin %d to ZERO \n", d->name, dev2->irq, pin2);
+                               pci_write_config_byte(dev2, PCI_INTERRUPT_LINE, 0);
+#endif
+                       }
+                       break;
+               }
+       }
+       printk("%s: IDE controller on PCI bus %02x dev %02x\n", d->name, dev->bus->number, dev->devfn);
+       ide_setup_pci_device(dev, d);
+       if (!dev2)
+               return;
+       d2 = d;
+       printk("%s: IDE controller on PCI bus %02x dev %02x\n", d2->name, dev2->bus->number, dev2->devfn);
+       ide_setup_pci_device(dev2, d2);
+}
+
+/*
+ * ide_scan_pcibus() gets invoked at boot time from ide.c.
+ * It finds all PCI IDE controllers and calls ide_setup_pci_device for them.
+ */
+void __init ide_scan_pcidev (struct pci_dev *dev)
+{
+       ide_pci_devid_t         devid;
+       ide_pci_device_t        *d;
+
+       devid.vid = dev->vendor;
+       devid.did = dev->device;
+       for (d = ide_pci_chipsets; d->devid.vid && !IDE_PCI_DEVID_EQ(d->devid, devid); ++d);
+       if (d->init_hwif == IDE_IGNORE)
+               printk("%s: ignored by ide_scan_pci_device() (uses own driver)\n", d->name);
+       else if (IDE_PCI_DEVID_EQ(d->devid, DEVID_OPTI621V) && !(PCI_FUNC(dev->devfn) & 1))
+               return;
+       else if (IDE_PCI_DEVID_EQ(d->devid, DEVID_CY82C693) && (!(PCI_FUNC(dev->devfn) & 1) || !((dev->class >> 8) == PCI_CLASS_STORAGE_IDE)))
+               return; /* CY82C693 is more than only a IDE controller */
+       else if (IDE_PCI_DEVID_EQ(d->devid, DEVID_ITE8172G) && (!(PCI_FUNC(dev->devfn) & 1) || !((dev->class >> 8) == PCI_CLASS_STORAGE_IDE)))
+               return; /* IT8172G is also more than only an IDE controller */
+       else if (IDE_PCI_DEVID_EQ(d->devid, DEVID_UM8886A) && !(PCI_FUNC(dev->devfn) & 1))
+               return; /* UM8886A/BF pair */
+       else if (IDE_PCI_DEVID_EQ(d->devid, DEVID_HPT366))
+               hpt366_device_order_fixup(dev, d);
+       else if (IDE_PCI_DEVID_EQ(d->devid, DEVID_PDC20270))
+               pdc20270_device_order_fixup(dev, d);
+       else if (!IDE_PCI_DEVID_EQ(d->devid, IDE_PCI_DEVID_NULL) || (dev->class >> 8) == PCI_CLASS_STORAGE_IDE) {
+               if (IDE_PCI_DEVID_EQ(d->devid, IDE_PCI_DEVID_NULL))
+                       printk("%s: unknown IDE controller on PCI bus %02x device %02x, VID=%04x, DID=%04x\n",
+                              d->name, dev->bus->number, dev->devfn, devid.vid, devid.did);
+               else
+                       printk("%s: IDE controller on PCI bus %02x dev %02x\n", d->name, dev->bus->number, dev->devfn);
+               ide_setup_pci_device(dev, d);
+       }
+}
+
+void __init ide_scan_pcibus (int scan_direction)
+{
+       struct pci_dev *dev;
+
+       if (!scan_direction) {
+               pci_for_each_dev(dev) {
+                       ide_scan_pcidev(dev);
+               }
+       } else {
+               pci_for_each_dev_reverse(dev) {
+                       ide_scan_pcidev(dev);
+               }
+       }
+}
diff --git a/xen-2.4.16/drivers/ide/ide-probe.c b/xen-2.4.16/drivers/ide/ide-probe.c
new file mode 100644 (file)
index 0000000..e83157e
--- /dev/null
@@ -0,0 +1,1023 @@
+/*
+ *  linux/drivers/ide/ide-probe.c      Version 1.07    March 18, 2001
+ *
+ *  Copyright (C) 1994-1998  Linus Torvalds & authors (see below)
+ */
+
+/*
+ *  Mostly written by Mark Lord <mlord@pobox.com>
+ *                and Gadi Oxman <gadio@netvision.net.il>
+ *                and Andre Hedrick <andre@linux-ide.org>
+ *
+ *  See linux/MAINTAINERS for address of current maintainer.
+ *
+ * This is the IDE probe module, as evolved from hd.c and ide.c.
+ *
+ * Version 1.00                move drive probing code from ide.c to ide-probe.c
+ * Version 1.01                fix compilation problem for m68k
+ * Version 1.02                increase WAIT_PIDENTIFY to avoid CD-ROM locking at boot
+ *                      by Andrea Arcangeli
+ * Version 1.03                fix for (hwif->chipset == ide_4drives)
+ * Version 1.04                fixed buggy treatments of known flash memory cards
+ *
+ * Version 1.05                fix for (hwif->chipset == ide_pdc4030)
+ *                     added ide6/7/8/9
+ *                     allowed for secondary flash card to be detectable
+ *                      with new flag : drive->ata_flash : 1;
+ * Version 1.06                stream line request queue and prep for cascade project.
+ * Version 1.07                max_sect <= 255; slower disks would get behind and
+ *                     then fall over when they get to 256.    Paul G.
+ */
+
+#undef REALLY_SLOW_IO          /* most systems can safely undef this */
+
+#include <xeno/config.h>
+#include <xeno/module.h>
+#include <xeno/types.h>
+#include <xeno/lib.h>
+#include <xeno/timer.h>
+#include <xeno/mm.h>
+#include <xeno/interrupt.h>
+#include <xeno/major.h>
+#include <xeno/errno.h>
+#include <xeno/genhd.h>
+#include <xeno/slab.h>
+#include <xeno/delay.h>
+#include <xeno/ide.h>
+#include <xeno/spinlock.h>
+
+#include <asm/byteorder.h>
+#include <asm/irq.h>
+#include <asm/uaccess.h>
+#include <asm/io.h>
+
+static inline void do_identify (ide_drive_t *drive, byte cmd)
+{
+       int bswap = 1;
+       struct hd_driveid *id;
+
+       id = drive->id = kmalloc (SECTOR_WORDS*4, GFP_ATOMIC);  /* called with interrupts disabled! */
+       if (!id) {
+               printk(KERN_WARNING "(ide-probe::do_identify) Out of memory.\n");
+               goto err_kmalloc;
+       }
+
+       ide_input_data(drive, id, SECTOR_WORDS);                /* read 512 bytes of id info */
+       ide__sti();     /* local CPU only */
+       ide_fix_driveid(id);
+
+       if (id->word156 == 0x4d42) {
+               printk("%s: drive->id->word156 == 0x%04x \n", drive->name, drive->id->word156);
+       }
+
+       if (!drive->forced_lun)
+               drive->last_lun = id->last_lun & 0x7;
+#if defined (CONFIG_SCSI_EATA_DMA) || defined (CONFIG_SCSI_EATA_PIO) || defined (CONFIG_SCSI_EATA)
+       /*
+        * EATA SCSI controllers do a hardware ATA emulation:
+        * Ignore them if there is a driver for them available.
+        */
+       if ((id->model[0] == 'P' && id->model[1] == 'M')
+        || (id->model[0] == 'S' && id->model[1] == 'K')) {
+               printk("%s: EATA SCSI HBA %.10s\n", drive->name, id->model);
+               goto err_misc;
+       }
+#endif /* CONFIG_SCSI_EATA_DMA || CONFIG_SCSI_EATA_PIO */
+
+       /*
+        *  WIN_IDENTIFY returns little-endian info,
+        *  WIN_PIDENTIFY *usually* returns little-endian info.
+        */
+       if (cmd == WIN_PIDENTIFY) {
+               if ((id->model[0] == 'N' && id->model[1] == 'E') /* NEC */
+                || (id->model[0] == 'F' && id->model[1] == 'X') /* Mitsumi */
+                || (id->model[0] == 'P' && id->model[1] == 'i'))/* Pioneer */
+                       bswap ^= 1;     /* Vertos drives may still be weird */
+       }
+       ide_fixstring (id->model,     sizeof(id->model),     bswap);
+       ide_fixstring (id->fw_rev,    sizeof(id->fw_rev),    bswap);
+       ide_fixstring (id->serial_no, sizeof(id->serial_no), bswap);
+
+       if (strstr(id->model, "E X A B Y T E N E S T"))
+               goto err_misc;
+
+       id->model[sizeof(id->model)-1] = '\0';  /* we depend on this a lot! */
+       printk("%s: %s, ", drive->name, id->model);
+       drive->present = 1;
+
+       /*
+        * Check for an ATAPI device
+        */
+       if (cmd == WIN_PIDENTIFY) {
+               byte type = (id->config >> 8) & 0x1f;
+               printk("ATAPI ");
+#ifdef CONFIG_BLK_DEV_PDC4030
+               if (HWIF(drive)->channel == 1 && HWIF(drive)->chipset == ide_pdc4030) {
+                       printk(" -- not supported on 2nd Promise port\n");
+                       goto err_misc;
+               }
+#endif /* CONFIG_BLK_DEV_PDC4030 */
+               switch (type) {
+                       case ide_floppy:
+                               if (!strstr(id->model, "CD-ROM")) {
+                                       if (!strstr(id->model, "oppy") && !strstr(id->model, "poyp") && !strstr(id->model, "ZIP"))
+                                               printk("cdrom or floppy?, assuming ");
+                                       if (drive->media != ide_cdrom) {
+                                               printk ("FLOPPY");
+                                               break;
+                                       }
+                               }
+                               type = ide_cdrom;       /* Early cdrom models used zero */
+                       case ide_cdrom:
+                               drive->removable = 1;
+#ifdef CONFIG_PPC
+                               /* kludge for Apple PowerBook internal zip */
+                               if (!strstr(id->model, "CD-ROM") && strstr(id->model, "ZIP")) {
+                                       printk ("FLOPPY");
+                                       type = ide_floppy;
+                                       break;
+                               }
+#endif
+                               printk ("CD/DVD-ROM");
+                               break;
+                       case ide_tape:
+                               printk ("TAPE");
+                               break;
+                       case ide_optical:
+                               printk ("OPTICAL");
+                               drive->removable = 1;
+                               break;
+                       default:
+                               printk("UNKNOWN (type %d)", type);
+                               break;
+               }
+               printk (" drive\n");
+               drive->media = type;
+               return;
+       }
+
+       /*
+        * Not an ATAPI device: looks like a "regular" hard disk
+        */
+       if (id->config & (1<<7))
+               drive->removable = 1;
+       /*
+        * Prevent long system lockup probing later for non-existant
+        * slave drive if the hwif is actually a flash memory card of some variety:
+        */
+       if (drive_is_flashcard(drive)) {
+               ide_drive_t *mate = &HWIF(drive)->drives[1^drive->select.b.unit];
+               if (!mate->ata_flash) {
+                       mate->present = 0;
+                       mate->noprobe = 1;
+               }
+       }
+       drive->media = ide_disk;
+       printk("ATA DISK drive\n");
+       QUIRK_LIST(HWIF(drive),drive);
+       return;
+
+err_misc:
+       kfree(id);
+err_kmalloc:
+       drive->present = 0;
+       return;
+}
+
+/*
+ * try_to_identify() sends an ATA(PI) IDENTIFY request to a drive
+ * and waits for a response.  It also monitors irqs while this is
+ * happening, in hope of automatically determining which one is
+ * being used by the interface.
+ *
+ * Returns:    0  device was identified
+ *             1  device timed-out (no response to identify request)
+ *             2  device aborted the command (refused to identify itself)
+ */
+static int actual_try_to_identify (ide_drive_t *drive, byte cmd)
+{
+       int rc;
+       ide_ioreg_t hd_status;
+       unsigned long timeout;
+       byte s, a;
+
+       if (IDE_CONTROL_REG) {
+               /* take a deep breath */
+               ide_delay_50ms();
+               a = IN_BYTE(IDE_ALTSTATUS_REG);
+               s = IN_BYTE(IDE_STATUS_REG);
+               if ((a ^ s) & ~INDEX_STAT) {
+                       printk("%s: probing with STATUS(0x%02x) instead of ALTSTATUS(0x%02x)\n", drive->name, s, a);
+                       hd_status = IDE_STATUS_REG;     /* ancient Seagate drives, broken interfaces */
+               } else {
+                       hd_status = IDE_ALTSTATUS_REG;  /* use non-intrusive polling */
+               }
+       } else {
+               ide_delay_50ms();
+               hd_status = IDE_STATUS_REG;
+       }
+
+       /* set features register for atapi identify command to be sure of reply */
+       if ((cmd == WIN_PIDENTIFY))
+               OUT_BYTE(0,IDE_FEATURE_REG);    /* disable dma & overlap */
+
+#if CONFIG_BLK_DEV_PDC4030
+       if (HWIF(drive)->chipset == ide_pdc4030) {
+               /* DC4030 hosted drives need their own identify... */
+               extern int pdc4030_identify(ide_drive_t *);
+               if (pdc4030_identify(drive)) {
+                       return 1;
+               }
+       } else
+#endif /* CONFIG_BLK_DEV_PDC4030 */
+               OUT_BYTE(cmd,IDE_COMMAND_REG);          /* ask drive for ID */
+       timeout = ((cmd == WIN_IDENTIFY) ? WAIT_WORSTCASE : WAIT_PIDENTIFY) / 2;
+       timeout += jiffies;
+       do {
+               if (0 < (signed long)(jiffies - timeout)) {
+                       return 1;       /* drive timed-out */
+               }
+               ide_delay_50ms();               /* give drive a breather */
+       } while (IN_BYTE(hd_status) & BUSY_STAT);
+
+       ide_delay_50ms();               /* wait for IRQ and DRQ_STAT */
+       if (OK_STAT(GET_STAT(),DRQ_STAT,BAD_R_STAT)) {
+               unsigned long flags;
+               __save_flags(flags);    /* local CPU only */
+               __cli();                /* local CPU only; some systems need this */
+               do_identify(drive, cmd); /* drive returned ID */
+               rc = 0;                 /* drive responded with ID */
+               (void) GET_STAT();      /* clear drive IRQ */
+               __restore_flags(flags); /* local CPU only */
+       } else
+               rc = 2;                 /* drive refused ID */
+       return rc;
+}
+
+static int try_to_identify (ide_drive_t *drive, byte cmd)
+{
+       int retval;
+       int autoprobe = 0;
+       unsigned long cookie = 0;
+
+       if (IDE_CONTROL_REG && !HWIF(drive)->irq) {
+               autoprobe = 1;
+               cookie = probe_irq_on();
+               OUT_BYTE(drive->ctl,IDE_CONTROL_REG);   /* enable device irq */
+       }
+
+       retval = actual_try_to_identify(drive, cmd);
+
+       if (autoprobe) {
+               int irq;
+               OUT_BYTE(drive->ctl|2,IDE_CONTROL_REG); /* mask device irq */
+               (void) GET_STAT();                      /* clear drive IRQ */
+               udelay(5);
+               irq = probe_irq_off(cookie);
+               if (!HWIF(drive)->irq) {
+                       if (irq > 0) {
+                               HWIF(drive)->irq = irq;
+                       } else {        /* Mmmm.. multiple IRQs.. don't know which was ours */
+                               printk("%s: IRQ probe failed (0x%lx)\n", drive->name, cookie);
+#ifdef CONFIG_BLK_DEV_CMD640
+#ifdef CMD640_DUMP_REGS
+                               if (HWIF(drive)->chipset == ide_cmd640) {
+                                       printk("%s: Hmmm.. probably a driver problem.\n", drive->name);
+                                       CMD640_DUMP_REGS;
+                               }
+#endif /* CMD640_DUMP_REGS */
+#endif /* CONFIG_BLK_DEV_CMD640 */
+                       }
+               }
+       }
+       return retval;
+}
+
+
+/*
+ * do_probe() has the difficult job of finding a drive if it exists,
+ * without getting hung up if it doesn't exist, without trampling on
+ * ethernet cards, and without leaving any IRQs dangling to haunt us later.
+ *
+ * If a drive is "known" to exist (from CMOS or kernel parameters),
+ * but does not respond right away, the probe will "hang in there"
+ * for the maximum wait time (about 30 seconds), otherwise it will
+ * exit much more quickly.
+ *
+ * Returns:    0  device was identified
+ *             1  device timed-out (no response to identify request)
+ *             2  device aborted the command (refused to identify itself)
+ *             3  bad status from device (possible for ATAPI drives)
+ *             4  probe was not attempted because failure was obvious
+ */
+static int do_probe (ide_drive_t *drive, byte cmd)
+{
+       int rc;
+       ide_hwif_t *hwif = HWIF(drive);
+       if (drive->present) {   /* avoid waiting for inappropriate probes */
+               if ((drive->media != ide_disk) && (cmd == WIN_IDENTIFY))
+                       return 4;
+       }
+#ifdef DEBUG
+       printk("probing for %s: present=%d, media=%d, probetype=%s\n",
+               drive->name, drive->present, drive->media,
+               (cmd == WIN_IDENTIFY) ? "ATA" : "ATAPI");
+#endif
+       ide_delay_50ms();       /* needed for some systems (e.g. crw9624 as drive0 with disk as slave) */
+       SELECT_DRIVE(hwif,drive);
+       ide_delay_50ms();
+       if (IN_BYTE(IDE_SELECT_REG) != drive->select.all && !drive->present) {
+               if (drive->select.b.unit != 0) {
+                       SELECT_DRIVE(hwif,&hwif->drives[0]);    /* exit with drive0 selected */
+                       ide_delay_50ms();               /* allow BUSY_STAT to assert & clear */
+               }
+               return 3;    /* no i/f present: mmm.. this should be a 4 -ml */
+       }
+
+       if (OK_STAT(GET_STAT(),READY_STAT,BUSY_STAT)
+        || drive->present || cmd == WIN_PIDENTIFY)
+       {
+               if ((rc = try_to_identify(drive,cmd)))   /* send cmd and wait */
+                       rc = try_to_identify(drive,cmd); /* failed: try again */
+               if (rc == 1 && cmd == WIN_PIDENTIFY && drive->autotune != 2) {
+                       unsigned long timeout;
+                       printk("%s: no response (status = 0x%02x), resetting drive\n", drive->name, GET_STAT());
+                       ide_delay_50ms();
+                       OUT_BYTE (drive->select.all, IDE_SELECT_REG);
+                       ide_delay_50ms();
+                       OUT_BYTE(WIN_SRST, IDE_COMMAND_REG);
+                       timeout = jiffies;
+                       while ((GET_STAT() & BUSY_STAT) && time_before(jiffies, timeout + WAIT_WORSTCASE))
+                               ide_delay_50ms();
+                       rc = try_to_identify(drive, cmd);
+               }
+               if (rc == 1)
+                       printk("%s: no response (status = 0x%02x)\n", drive->name, GET_STAT());
+               (void) GET_STAT();              /* ensure drive irq is clear */
+       } else {
+               rc = 3;                         /* not present or maybe ATAPI */
+       }
+       if (drive->select.b.unit != 0) {
+               SELECT_DRIVE(hwif,&hwif->drives[0]);    /* exit with drive0 selected */
+               ide_delay_50ms();
+               (void) GET_STAT();              /* ensure drive irq is clear */
+       }
+       return rc;
+}
+
+/*
+ *
+ */
+static void enable_nest (ide_drive_t *drive)
+{
+       unsigned long timeout;
+
+       printk("%s: enabling %s -- ", HWIF(drive)->name, drive->id->model);
+       SELECT_DRIVE(HWIF(drive), drive);
+       ide_delay_50ms();
+       OUT_BYTE(EXABYTE_ENABLE_NEST, IDE_COMMAND_REG);
+       timeout = jiffies + WAIT_WORSTCASE;
+       do {
+               if (time_after(jiffies, timeout)) {
+                       printk("failed (timeout)\n");
+                       return;
+               }
+               ide_delay_50ms();
+       } while (GET_STAT() & BUSY_STAT);
+       ide_delay_50ms();
+       if (!OK_STAT(GET_STAT(), 0, BAD_STAT))
+               printk("failed (status = 0x%02x)\n", GET_STAT());
+       else
+               printk("success\n");
+       if (do_probe(drive, WIN_IDENTIFY) >= 2) {       /* if !(success||timed-out) */
+               (void) do_probe(drive, WIN_PIDENTIFY);  /* look for ATAPI device */
+       }
+}
+
+/*
+ * probe_for_drive() tests for existence of a given drive using do_probe().
+ *
+ * Returns:    0  no device was found
+ *             1  device was found (note: drive->present might still be 0)
+ */
+static inline byte probe_for_drive (ide_drive_t *drive)
+{
+       if (drive->noprobe)                     /* skip probing? */
+               return drive->present;
+       if (do_probe(drive, WIN_IDENTIFY) >= 2) { /* if !(success||timed-out) */
+               (void) do_probe(drive, WIN_PIDENTIFY); /* look for ATAPI device */
+       }
+       if (drive->id && strstr(drive->id->model, "E X A B Y T E N E S T"))
+               enable_nest(drive);
+       if (!drive->present)
+               return 0;                       /* drive not found */
+       if (drive->id == NULL) {                /* identification failed? */
+               if (drive->media == ide_disk) {
+                       printk ("%s: non-IDE drive, CHS=%d/%d/%d\n",
+                        drive->name, drive->cyl, drive->head, drive->sect);
+               } else if (drive->media == ide_cdrom) {
+                       printk("%s: ATAPI cdrom (?)\n", drive->name);
+               } else {
+                       drive->present = 0;     /* nuke it */
+               }
+       }
+       return 1;       /* drive was found */
+}
+
+/*
+ * Calculate the region that this interface occupies,
+ * handling interfaces where the registers may not be
+ * ordered sanely.  We deal with the CONTROL register
+ * separately.
+ */
+static int hwif_check_regions (ide_hwif_t *hwif)
+{
+       int region_errors = 0;
+
+       hwif->straight8 = 0;
+       region_errors  = ide_check_region(hwif->io_ports[IDE_DATA_OFFSET], 1);
+       region_errors += ide_check_region(hwif->io_ports[IDE_ERROR_OFFSET], 1);
+       region_errors += ide_check_region(hwif->io_ports[IDE_NSECTOR_OFFSET], 1);
+       region_errors += ide_check_region(hwif->io_ports[IDE_SECTOR_OFFSET], 1);
+       region_errors += ide_check_region(hwif->io_ports[IDE_LCYL_OFFSET], 1);
+       region_errors += ide_check_region(hwif->io_ports[IDE_HCYL_OFFSET], 1);
+       region_errors += ide_check_region(hwif->io_ports[IDE_SELECT_OFFSET], 1);
+       region_errors += ide_check_region(hwif->io_ports[IDE_STATUS_OFFSET], 1);
+
+       if (hwif->io_ports[IDE_CONTROL_OFFSET])
+               region_errors += ide_check_region(hwif->io_ports[IDE_CONTROL_OFFSET], 1);
+#if defined(CONFIG_AMIGA) || defined(CONFIG_MAC)
+       if (hwif->io_ports[IDE_IRQ_OFFSET])
+               region_errors += ide_check_region(hwif->io_ports[IDE_IRQ_OFFSET], 1);
+#endif /* (CONFIG_AMIGA) || (CONFIG_MAC) */
+       /*
+        * If any errors are return, we drop the hwif interface.
+        */
+       return(region_errors);
+}
+
+static void hwif_register (ide_hwif_t *hwif)
+{
+       if (((unsigned long)hwif->io_ports[IDE_DATA_OFFSET] | 7) ==
+           ((unsigned long)hwif->io_ports[IDE_STATUS_OFFSET])) {
+               ide_request_region(hwif->io_ports[IDE_DATA_OFFSET], 8, hwif->name);
+               hwif->straight8 = 1;
+               goto jump_straight8;
+       }
+
+       if (hwif->io_ports[IDE_DATA_OFFSET])
+               ide_request_region(hwif->io_ports[IDE_DATA_OFFSET], 1, hwif->name);
+       if (hwif->io_ports[IDE_ERROR_OFFSET])
+               ide_request_region(hwif->io_ports[IDE_ERROR_OFFSET], 1, hwif->name);
+       if (hwif->io_ports[IDE_NSECTOR_OFFSET])
+               ide_request_region(hwif->io_ports[IDE_NSECTOR_OFFSET], 1, hwif->name);
+       if (hwif->io_ports[IDE_SECTOR_OFFSET])
+               ide_request_region(hwif->io_ports[IDE_SECTOR_OFFSET], 1, hwif->name);
+       if (hwif->io_ports[IDE_LCYL_OFFSET])
+               ide_request_region(hwif->io_ports[IDE_LCYL_OFFSET], 1, hwif->name);
+       if (hwif->io_ports[IDE_HCYL_OFFSET])
+               ide_request_region(hwif->io_ports[IDE_HCYL_OFFSET], 1, hwif->name);
+       if (hwif->io_ports[IDE_SELECT_OFFSET])
+               ide_request_region(hwif->io_ports[IDE_SELECT_OFFSET], 1, hwif->name);
+       if (hwif->io_ports[IDE_STATUS_OFFSET])
+               ide_request_region(hwif->io_ports[IDE_STATUS_OFFSET], 1, hwif->name);
+
+jump_straight8:
+       if (hwif->io_ports[IDE_CONTROL_OFFSET])
+               ide_request_region(hwif->io_ports[IDE_CONTROL_OFFSET], 1, hwif->name);
+#if defined(CONFIG_AMIGA) || defined(CONFIG_MAC)
+       if (hwif->io_ports[IDE_IRQ_OFFSET])
+               ide_request_region(hwif->io_ports[IDE_IRQ_OFFSET], 1, hwif->name);
+#endif /* (CONFIG_AMIGA) || (CONFIG_MAC) */
+}
+
+/*
+ * This routine only knows how to look for drive units 0 and 1
+ * on an interface, so any setting of MAX_DRIVES > 2 won't work here.
+ */
+static void probe_hwif (ide_hwif_t *hwif)
+{
+       unsigned int unit;
+       unsigned long flags;
+
+       if (hwif->noprobe)
+               return;
+#ifdef CONFIG_BLK_DEV_IDE
+       if (hwif->io_ports[IDE_DATA_OFFSET] == HD_DATA) {
+               extern void probe_cmos_for_drives(ide_hwif_t *);
+
+               probe_cmos_for_drives (hwif);
+       }
+#endif
+
+       if ((hwif->chipset != ide_4drives || !hwif->mate->present) &&
+#if CONFIG_BLK_DEV_PDC4030
+           (hwif->chipset != ide_pdc4030 || hwif->channel == 0) &&
+#endif /* CONFIG_BLK_DEV_PDC4030 */
+           (hwif_check_regions(hwif))) {
+               int msgout = 0;
+               for (unit = 0; unit < MAX_DRIVES; ++unit) {
+                       ide_drive_t *drive = &hwif->drives[unit];
+                       if (drive->present) {
+                               drive->present = 0;
+                               printk("%s: ERROR, PORTS ALREADY IN USE\n", drive->name);
+                               msgout = 1;
+                       }
+               }
+               if (!msgout)
+                       printk("%s: ports already in use, skipping probe\n", hwif->name);
+               return; 
+       }
+
+       __save_flags(flags);    /* local CPU only */
+       __sti();                /* local CPU only; needed for jiffies and irq probing */
+       /*
+        * Second drive should only exist if first drive was found,
+        * but a lot of cdrom drives are configured as single slaves.
+        */
+       for (unit = 0; unit < MAX_DRIVES; ++unit) {
+               ide_drive_t *drive = &hwif->drives[unit];
+               (void) probe_for_drive (drive);
+               if (drive->present && !hwif->present) {
+                       hwif->present = 1;
+                       if (hwif->chipset != ide_4drives || !hwif->mate->present) {
+                               hwif_register(hwif);
+                       }
+               }
+       }
+       if (hwif->io_ports[IDE_CONTROL_OFFSET] && hwif->reset) {
+               unsigned long timeout = jiffies + WAIT_WORSTCASE;
+               byte stat;
+
+               printk("%s: reset\n", hwif->name);
+               OUT_BYTE(12, hwif->io_ports[IDE_CONTROL_OFFSET]);
+               udelay(10);
+               OUT_BYTE(8, hwif->io_ports[IDE_CONTROL_OFFSET]);
+               do {
+                       ide_delay_50ms();
+                       stat = IN_BYTE(hwif->io_ports[IDE_STATUS_OFFSET]);
+               } while ((stat & BUSY_STAT) && 0 < (signed long)(timeout - jiffies));
+
+       }
+       __restore_flags(flags); /* local CPU only */
+       for (unit = 0; unit < MAX_DRIVES; ++unit) {
+               ide_drive_t *drive = &hwif->drives[unit];
+               if (drive->present) {
+                       ide_tuneproc_t *tuneproc = HWIF(drive)->tuneproc;
+                       if (tuneproc != NULL && drive->autotune == 1)
+                               tuneproc(drive, 255);   /* auto-tune PIO mode */
+               }
+       }
+}
+
+#if MAX_HWIFS > 1
+/*
+ * save_match() is used to simplify logic in init_irq() below.
+ *
+ * A loophole here is that we may not know about a particular
+ * hwif's irq until after that hwif is actually probed/initialized..
+ * This could be a problem for the case where an hwif is on a
+ * dual interface that requires serialization (eg. cmd640) and another
+ * hwif using one of the same irqs is initialized beforehand.
+ *
+ * This routine detects and reports such situations, but does not fix them.
+ */
+static void save_match (ide_hwif_t *hwif, ide_hwif_t *new, ide_hwif_t **match)
+{
+       ide_hwif_t *m = *match;
+
+       if (m && m->hwgroup && m->hwgroup != new->hwgroup) {
+               if (!new->hwgroup)
+                       return;
+               printk("%s: potential irq problem with %s and %s\n", hwif->name, new->name, m->name);
+       }
+       if (!m || m->irq != hwif->irq) /* don't undo a prior perfect match */
+               *match = new;
+}
+#endif /* MAX_HWIFS > 1 */
+
+/*
+ * init request queue
+ */
+static void ide_init_queue(ide_drive_t *drive)
+{
+       request_queue_t *q = &drive->queue;
+
+       q->queuedata = HWGROUP(drive);
+       blk_init_queue(q, do_ide_request);
+
+       if (drive->media == ide_disk) {
+#ifdef CONFIG_BLK_DEV_ELEVATOR_NOOP
+               elevator_init(&q->elevator, ELEVATOR_NOOP);
+#endif
+       }
+}
+
+/*
+ * This routine sets up the irq for an ide interface, and creates a new
+ * hwgroup for the irq/hwif if none was previously assigned.
+ *
+ * Much of the code is for correctly detecting/handling irq sharing
+ * and irq serialization situations.  This is somewhat complex because
+ * it handles static as well as dynamic (PCMCIA) IDE interfaces.
+ *
+ * The SA_INTERRUPT in sa_flags means ide_intr() is always entered with
+ * interrupts completely disabled.  This can be bad for interrupt latency,
+ * but anything else has led to problems on some machines.  We re-enable
+ * interrupts as much as we can safely do in most places.
+ */
+static int init_irq (ide_hwif_t *hwif)
+{
+       unsigned long flags;
+       unsigned int index;
+       ide_hwgroup_t *hwgroup, *new_hwgroup;
+       ide_hwif_t *match = NULL;
+
+       
+       /* Allocate the buffer and potentially sleep first */
+       
+       new_hwgroup = kmalloc(sizeof(ide_hwgroup_t),GFP_KERNEL);
+       
+       save_flags(flags);      /* all CPUs */
+       cli();                  /* all CPUs */
+
+       hwif->hwgroup = NULL;
+#if MAX_HWIFS > 1
+       /*
+        * Group up with any other hwifs that share our irq(s).
+        */
+       for (index = 0; index < MAX_HWIFS; index++) {
+               ide_hwif_t *h = &ide_hwifs[index];
+               if (h->hwgroup) {  /* scan only initialized hwif's */
+                       if (hwif->irq == h->irq) {
+                               hwif->sharing_irq = h->sharing_irq = 1;
+                               if (hwif->chipset != ide_pci || h->chipset != ide_pci) {
+                                       save_match(hwif, h, &match);
+                               }
+                       }
+                       if (hwif->serialized) {
+                               if (hwif->mate && hwif->mate->irq == h->irq)
+                                       save_match(hwif, h, &match);
+                       }
+                       if (h->serialized) {
+                               if (h->mate && hwif->irq == h->mate->irq)
+                                       save_match(hwif, h, &match);
+                       }
+               }
+       }
+#endif /* MAX_HWIFS > 1 */
+       /*
+        * If we are still without a hwgroup, then form a new one
+        */
+       if (match) {
+               hwgroup = match->hwgroup;
+               if(new_hwgroup)
+                       kfree(new_hwgroup);
+       } else {
+               hwgroup = new_hwgroup;
+               if (!hwgroup) {
+                       restore_flags(flags);   /* all CPUs */
+                       return 1;
+               }
+               memset(hwgroup, 0, sizeof(ide_hwgroup_t));
+               hwgroup->hwif     = hwif->next = hwif;
+               hwgroup->rq       = NULL;
+               hwgroup->handler  = NULL;
+               hwgroup->drive    = NULL;
+               hwgroup->busy     = 0;
+               init_timer(&hwgroup->timer);
+               hwgroup->timer.function = &ide_timer_expiry;
+               hwgroup->timer.data = (unsigned long) hwgroup;
+       }
+
+       /*
+        * Allocate the irq, if not already obtained for another hwif
+        */
+       if (!match || match->irq != hwif->irq) {
+#ifdef CONFIG_IDEPCI_SHARE_IRQ
+               int sa = IDE_CHIPSET_IS_PCI(hwif->chipset) ? SA_SHIRQ : SA_INTERRUPT;
+#else /* !CONFIG_IDEPCI_SHARE_IRQ */
+               int sa = IDE_CHIPSET_IS_PCI(hwif->chipset) ? SA_INTERRUPT|SA_SHIRQ : SA_INTERRUPT;
+#endif /* CONFIG_IDEPCI_SHARE_IRQ */
+
+               if (hwif->io_ports[IDE_CONTROL_OFFSET])
+                       OUT_BYTE(0x08, hwif->io_ports[IDE_CONTROL_OFFSET]); /* clear nIEN */
+
+               if (ide_request_irq(hwif->irq, &ide_intr, sa, hwif->name, hwgroup)) {
+                       if (!match)
+                               kfree(hwgroup);
+                       restore_flags(flags);   /* all CPUs */
+                       return 1;
+               }
+       }
+
+       /*
+        * Everything is okay, so link us into the hwgroup
+        */
+       hwif->hwgroup = hwgroup;
+       hwif->next = hwgroup->hwif->next;
+       hwgroup->hwif->next = hwif;
+
+       for (index = 0; index < MAX_DRIVES; ++index) {
+               ide_drive_t *drive = &hwif->drives[index];
+               if (!drive->present)
+                       continue;
+               if (!hwgroup->drive)
+                       hwgroup->drive = drive;
+               drive->next = hwgroup->drive->next;
+               hwgroup->drive->next = drive;
+               ide_init_queue(drive);
+       }
+       if (!hwgroup->hwif) {
+               hwgroup->hwif = HWIF(hwgroup->drive);
+#ifdef DEBUG
+               printk("%s : Adding missed hwif to hwgroup!!\n", hwif->name);
+#endif
+       }
+       restore_flags(flags);   /* all CPUs; safe now that hwif->hwgroup is set up */
+
+#if !defined(__mc68000__) && !defined(CONFIG_APUS) && !defined(__sparc__)
+       printk("%s at 0x%03x-0x%03x,0x%03x on irq %d", hwif->name,
+               hwif->io_ports[IDE_DATA_OFFSET],
+               hwif->io_ports[IDE_DATA_OFFSET]+7,
+               hwif->io_ports[IDE_CONTROL_OFFSET], hwif->irq);
+#elif defined(__sparc__)
+       printk("%s at 0x%03lx-0x%03lx,0x%03lx on irq %s", hwif->name,
+               hwif->io_ports[IDE_DATA_OFFSET],
+               hwif->io_ports[IDE_DATA_OFFSET]+7,
+               hwif->io_ports[IDE_CONTROL_OFFSET], __irq_itoa(hwif->irq));
+#else
+       printk("%s at %p on irq 0x%08x", hwif->name,
+               hwif->io_ports[IDE_DATA_OFFSET], hwif->irq);
+#endif /* __mc68000__ && CONFIG_APUS */
+       if (match)
+               printk(" (%sed with %s)",
+                       hwif->sharing_irq ? "shar" : "serializ", match->name);
+       printk("\n");
+       return 0;
+}
+
+/*
+ * init_gendisk() (as opposed to ide_geninit) is called for each major device,
+ * after probing for drives, to allocate partition tables and other data
+ * structures needed for the routines in genhd.c.  ide_geninit() gets called
+ * somewhat later, during the partition check.
+ */
+static void init_gendisk (ide_hwif_t *hwif)
+{
+       struct gendisk *gd;
+       unsigned int unit, units, minors;
+       int *bs, *max_sect; /* , *max_ra; */
+#ifdef DEVFS_MUST_DIE
+       extern devfs_handle_t ide_devfs_handle;
+#endif
+
+#if 1
+       units = MAX_DRIVES;
+#else
+       /* figure out maximum drive number on the interface */
+       for (units = MAX_DRIVES; units > 0; --units) {
+               if (hwif->drives[units-1].present)
+                       break;
+       }
+#endif
+
+       minors    = units * (1<<PARTN_BITS);
+       gd        = kmalloc (sizeof(struct gendisk), GFP_KERNEL);
+       if (!gd)
+               goto err_kmalloc_gd;
+       gd->sizes = kmalloc (minors * sizeof(int), GFP_KERNEL);
+       if (!gd->sizes)
+               goto err_kmalloc_gd_sizes;
+       gd->part  = kmalloc (minors * sizeof(struct hd_struct), GFP_KERNEL);
+       if (!gd->part)
+               goto err_kmalloc_gd_part;
+       bs        = kmalloc (minors*sizeof(int), GFP_KERNEL);
+       if (!bs)
+               goto err_kmalloc_bs;
+       max_sect  = kmalloc (minors*sizeof(int), GFP_KERNEL);
+       if (!max_sect)
+               goto err_kmalloc_max_sect;
+#if 0
+       max_ra    = kmalloc (minors*sizeof(int), GFP_KERNEL);
+       if (!max_ra)
+               goto err_kmalloc_max_ra;
+#endif
+
+       memset(gd->part, 0, minors * sizeof(struct hd_struct));
+
+       /* cdroms and msdos f/s are examples of non-1024 blocksizes */
+       blksize_size[hwif->major] = bs;
+       max_sectors[hwif->major] = max_sect;
+       /*max_readahead[hwif->major] = max_ra;*/
+       for (unit = 0; unit < minors; ++unit) {
+               *bs++ = BLOCK_SIZE;
+               /*
+                * IDE can do up to 128K per request == 256
+                */
+               *max_sect++ = ((hwif->chipset == ide_pdc4030) ? 127 : 128);
+               /* *max_ra++ = vm_max_readahead; */
+       }
+
+       for (unit = 0; unit < units; ++unit)
+               hwif->drives[unit].part = &gd->part[unit << PARTN_BITS];
+
+       gd->major       = hwif->major;          /* our major device number */
+       gd->major_name  = IDE_MAJOR_NAME;       /* treated special in genhd.c */
+       gd->minor_shift = PARTN_BITS;           /* num bits for partitions */
+       gd->max_p       = 1<<PARTN_BITS;        /* 1 + max partitions / drive */
+       gd->nr_real     = units;                /* current num real drives */
+       gd->real_devices= hwif;                 /* ptr to internal data */
+       gd->next        = NULL;                 /* linked list of major devs */
+       gd->fops        = ide_fops;             /* file operations */
+       gd->flags       = kmalloc (sizeof *gd->flags * units, GFP_KERNEL);
+       if (gd->flags)
+               memset (gd->flags, 0, sizeof *gd->flags * units);
+#ifdef DEVFS_MUST_DIE
+       gd->de_arr      = kmalloc (sizeof *gd->de_arr * units, GFP_KERNEL);
+       if (gd->de_arr)
+               memset (gd->de_arr, 0, sizeof *gd->de_arr * units);
+#endif
+
+       hwif->gd = gd;
+       add_gendisk(gd);
+
+       for (unit = 0; unit < units; ++unit) {
+#if 1
+               char name[64];
+               ide_add_generic_settings(hwif->drives + unit);
+               hwif->drives[unit].dn = ((hwif->channel ? 2 : 0) + unit);
+               sprintf (name, "host%d/bus%d/target%d/lun%d",
+                       (hwif->channel && hwif->mate) ?
+                       hwif->mate->index : hwif->index,
+                       hwif->channel, unit, hwif->drives[unit].lun);
+#ifdef DEVFS_MUST_DIE
+               if (hwif->drives[unit].present)
+                       hwif->drives[unit].de = devfs_mk_dir(ide_devfs_handle, name, NULL);
+#endif
+#else
+               if (hwif->drives[unit].present) {
+                       char name[64];
+
+                       ide_add_generic_settings(hwif->drives + unit);
+                       hwif->drives[unit].dn = ((hwif->channel ? 2 : 0) + unit);
+                       sprintf (name, "host%d/bus%d/target%d/lun%d",
+                                (hwif->channel && hwif->mate) ? hwif->mate->index : hwif->index,
+                                hwif->channel, unit, hwif->drives[unit].lun);
+                       hwif->drives[unit].de =
+                               devfs_mk_dir (ide_devfs_handle, name, NULL);
+               }
+#endif
+       }
+       return;
+
+#if 0
+err_kmalloc_max_ra:
+       kfree(max_sect);
+#endif
+err_kmalloc_max_sect:
+       kfree(bs);
+err_kmalloc_bs:
+       kfree(gd->part);
+err_kmalloc_gd_part:
+       kfree(gd->sizes);
+err_kmalloc_gd_sizes:
+       kfree(gd);
+err_kmalloc_gd:
+       printk(KERN_WARNING "(ide::init_gendisk) Out of memory\n");
+       return;
+}
+
+static int hwif_init (ide_hwif_t *hwif)
+{
+       if (!hwif->present)
+               return 0;
+       if (!hwif->irq) {
+               if (!(hwif->irq = ide_default_irq(hwif->io_ports[IDE_DATA_OFFSET])))
+               {
+                       printk("%s: DISABLED, NO IRQ\n", hwif->name);
+                       return (hwif->present = 0);
+               }
+       }
+#ifdef CONFIG_BLK_DEV_HD
+       if (hwif->irq == HD_IRQ && hwif->io_ports[IDE_DATA_OFFSET] != HD_DATA) {
+               printk("%s: CANNOT SHARE IRQ WITH OLD HARDDISK DRIVER (hd.c)\n", hwif->name);
+               return (hwif->present = 0);
+       }
+#endif /* CONFIG_BLK_DEV_HD */
+       
+       hwif->present = 0; /* we set it back to 1 if all is ok below */
+
+#ifdef DEVFS_MUST_DIE
+       if (devfs_register_blkdev (hwif->major, hwif->name, ide_fops)) {
+               printk("%s: UNABLE TO GET MAJOR NUMBER %d\n", hwif->name, hwif->major);
+               return (hwif->present = 0);
+       }
+#endif 
+
+       if (init_irq(hwif)) {
+               int i = hwif->irq;
+               /*
+                *      It failed to initialise. Find the default IRQ for 
+                *      this port and try that.
+                */
+               if (!(hwif->irq = ide_default_irq(hwif->io_ports[IDE_DATA_OFFSET]))) {
+                       printk("%s: Disabled unable to get IRQ %d.\n", hwif->name, i);
+                       (void) unregister_blkdev (hwif->major, hwif->name);
+                       return (hwif->present = 0);
+               }
+               if (init_irq(hwif)) {
+                       printk("%s: probed IRQ %d and default IRQ %d failed.\n",
+                               hwif->name, i, hwif->irq);
+                       (void) unregister_blkdev (hwif->major, hwif->name);
+                       return (hwif->present = 0);
+               }
+               printk("%s: probed IRQ %d failed, using default.\n",
+                       hwif->name, hwif->irq);
+       }
+       
+       init_gendisk(hwif);
+       blk_dev[hwif->major].data = hwif;
+       blk_dev[hwif->major].queue = ide_get_queue;
+#if 0
+       read_ahead[hwif->major] = 8;    /* (4kB) */
+#endif
+       hwif->present = 1;      /* success */
+
+#if (DEBUG_SPINLOCK > 0)
+{
+       static int done = 0;
+       if (!done++)
+               printk("io_request_lock is %p\n", &io_request_lock);    /* FIXME */
+}
+#endif
+       return hwif->present;
+}
+
+void export_ide_init_queue (ide_drive_t *drive)
+{
+       ide_init_queue(drive);
+}
+
+byte export_probe_for_drive (ide_drive_t *drive)
+{
+       return probe_for_drive(drive);
+}
+
+EXPORT_SYMBOL(export_ide_init_queue);
+EXPORT_SYMBOL(export_probe_for_drive);
+
+int ideprobe_init (void);
+static ide_module_t ideprobe_module = {
+       IDE_PROBE_MODULE,
+       ideprobe_init,
+       NULL
+};
+
+int ideprobe_init (void)
+{
+       unsigned int index;
+       int probe[MAX_HWIFS];
+       
+       MOD_INC_USE_COUNT;
+       memset(probe, 0, MAX_HWIFS * sizeof(int));
+       for (index = 0; index < MAX_HWIFS; ++index)
+               probe[index] = !ide_hwifs[index].present;
+
+       /*
+        * Probe for drives in the usual way.. CMOS/BIOS, then poke at ports
+        */
+       for (index = 0; index < MAX_HWIFS; ++index)
+               if (probe[index])
+                       probe_hwif(&ide_hwifs[index]);
+       for (index = 0; index < MAX_HWIFS; ++index)
+               if (probe[index])
+                       hwif_init(&ide_hwifs[index]);
+       if (!ide_probe)
+               ide_probe = &ideprobe_module;
+       MOD_DEC_USE_COUNT;
+       return 0;
+}
+
+#ifdef MODULE
+extern int (*ide_xlate_1024_hook)(kdev_t, int, int, const char *);
+
+int init_module (void)
+{
+       unsigned int index;
+       
+       for (index = 0; index < MAX_HWIFS; ++index)
+               ide_unregister(index);
+       ideprobe_init();
+       create_proc_ide_interfaces();
+       ide_xlate_1024_hook = ide_xlate_1024;
+       return 0;
+}
+
+void cleanup_module (void)
+{
+       ide_probe = NULL;
+       ide_xlate_1024_hook = 0;
+}
+MODULE_LICENSE("GPL");
+#endif /* MODULE */
diff --git a/xen-2.4.16/drivers/ide/ide-taskfile.c b/xen-2.4.16/drivers/ide/ide-taskfile.c
new file mode 100644 (file)
index 0000000..34bface
--- /dev/null
@@ -0,0 +1,1722 @@
+/*
+ * linux/drivers/ide/ide-taskfile.c    Version 0.20    Oct 11, 2000
+ *
+ *  Copyright (C) 2000         Michael Cornwell <cornwell@acm.org>
+ *  Copyright (C) 2000         Andre Hedrick <andre@linux-ide.org>
+ *
+ *  May be copied or modified under the terms of the GNU General Public License
+ *
+ * IDE_DEBUG(__LINE__);
+ */
+
+#include <xeno/config.h>
+#define __NO_VERSION__
+#include <xeno/module.h>
+#include <xeno/types.h>
+#include <xeno/lib.h>
+#include <xeno/timer.h>
+#include <xeno/mm.h>
+#include <xeno/interrupt.h>
+#include <xeno/major.h>
+#include <xeno/errno.h>
+#include <xeno/genhd.h>
+#include <xeno/blkpg.h>
+#include <xeno/slab.h>
+#include <xeno/pci.h>
+#include <xeno/delay.h>
+#include <xeno/hdreg.h>
+#include <xeno/ide.h>
+
+#include <asm/byteorder.h>
+#include <asm/irq.h>
+#include <asm/uaccess.h>
+#include <asm/io.h>
+#include <asm/bitops.h>
+
+#ifdef CONFIG_IDE_TASKFILE_IO
+#  define __TASKFILE__IO
+#else /* CONFIG_IDE_TASKFILE_IO */
+#  undef __TASKFILE__IO
+#endif /* CONFIG_IDE_TASKFILE_IO */
+
+#define DEBUG_TASKFILE 0       /* unset when fixed */
+
+#if DEBUG_TASKFILE
+#define DTF(x...) printk(##x)
+#else
+#define DTF(x...)
+#endif
+
+inline u32 task_read_24 (ide_drive_t *drive)
+{
+       return  (IN_BYTE(IDE_HCYL_REG)<<16) |
+               (IN_BYTE(IDE_LCYL_REG)<<8) |
+                IN_BYTE(IDE_SECTOR_REG);
+}
+
+static void ata_bswap_data (void *buffer, int wcount)
+{
+       u16 *p = buffer;
+
+       while (wcount--) {
+               *p = *p << 8 | *p >> 8; p++;
+               *p = *p << 8 | *p >> 8; p++;
+       }
+}
+
+#if SUPPORT_VLB_SYNC
+/*
+ * Some localbus EIDE interfaces require a special access sequence
+ * when using 32-bit I/O instructions to transfer data.  We call this
+ * the "vlb_sync" sequence, which consists of three successive reads
+ * of the sector count register location, with interrupts disabled
+ * to ensure that the reads all happen together.
+ */
+static inline void task_vlb_sync (ide_ioreg_t port) {
+       (void) inb (port);
+       (void) inb (port);
+       (void) inb (port);
+}
+#endif /* SUPPORT_VLB_SYNC */
+
+/*
+ * This is used for most PIO data transfers *from* the IDE interface
+ */
+void ata_input_data (ide_drive_t *drive, void *buffer, unsigned int wcount)
+{
+       byte io_32bit = drive->io_32bit;
+
+       if (io_32bit) {
+#if SUPPORT_VLB_SYNC
+               if (io_32bit & 2) {
+                       unsigned long flags;
+                       __save_flags(flags);    /* local CPU only */
+                       __cli();                /* local CPU only */
+                       task_vlb_sync(IDE_NSECTOR_REG);
+                       insl(IDE_DATA_REG, buffer, wcount);
+                       __restore_flags(flags); /* local CPU only */
+               } else
+#endif /* SUPPORT_VLB_SYNC */
+                       insl(IDE_DATA_REG, buffer, wcount);
+       } else {
+#if SUPPORT_SLOW_DATA_PORTS
+               if (drive->slow) {
+                       unsigned short *ptr = (unsigned short *) buffer;
+                       while (wcount--) {
+                               *ptr++ = inw_p(IDE_DATA_REG);
+                               *ptr++ = inw_p(IDE_DATA_REG);
+                       }
+               } else
+#endif /* SUPPORT_SLOW_DATA_PORTS */
+                       insw(IDE_DATA_REG, buffer, wcount<<1);
+       }
+}
+
+/*
+ * This is used for most PIO data transfers *to* the IDE interface
+ */
+void ata_output_data (ide_drive_t *drive, void *buffer, unsigned int wcount)
+{
+       byte io_32bit = drive->io_32bit;
+
+       if (io_32bit) {
+#if SUPPORT_VLB_SYNC
+               if (io_32bit & 2) {
+                       unsigned long flags;
+                       __save_flags(flags);    /* local CPU only */
+                       __cli();                /* local CPU only */
+                       task_vlb_sync(IDE_NSECTOR_REG);
+                       outsl(IDE_DATA_REG, buffer, wcount);
+                       __restore_flags(flags); /* local CPU only */
+               } else
+#endif /* SUPPORT_VLB_SYNC */
+                       outsl(IDE_DATA_REG, buffer, wcount);
+       } else {
+#if SUPPORT_SLOW_DATA_PORTS
+               if (drive->slow) {
+                       unsigned short *ptr = (unsigned short *) buffer;
+                       while (wcount--) {
+                               outw_p(*ptr++, IDE_DATA_REG);
+                               outw_p(*ptr++, IDE_DATA_REG);
+                       }
+               } else
+#endif /* SUPPORT_SLOW_DATA_PORTS */
+                       outsw(IDE_DATA_REG, buffer, wcount<<1);
+       }
+}
+
+
+static inline void taskfile_input_data (ide_drive_t *drive, void *buffer, unsigned int wcount)
+{
+       ata_input_data(drive, buffer, wcount);
+       if (drive->bswap)
+               ata_bswap_data(buffer, wcount);
+}
+
+static inline void taskfile_output_data (ide_drive_t *drive, void *buffer, unsigned int wcount)
+{
+       if (drive->bswap) {
+               ata_bswap_data(buffer, wcount);
+               ata_output_data(drive, buffer, wcount);
+               ata_bswap_data(buffer, wcount);
+       } else {
+               ata_output_data(drive, buffer, wcount);
+       }
+}
+
+ide_startstop_t do_rw_taskfile (ide_drive_t *drive, ide_task_t *task)
+{
+       task_struct_t *taskfile = (task_struct_t *) task->tfRegister;
+       hob_struct_t *hobfile = (hob_struct_t *) task->hobRegister;
+       struct hd_driveid *id = drive->id;
+       byte HIHI = (drive->addressing) ? 0xE0 : 0xEF;
+
+       /* (ks/hs): Moved to start, do not use for multiple out commands */
+       if (task->handler != task_mulout_intr) {
+               if (IDE_CONTROL_REG)
+                       OUT_BYTE(drive->ctl, IDE_CONTROL_REG);  /* clear nIEN */
+               SELECT_MASK(HWIF(drive), drive, 0);
+       }
+
+       if ((id->command_set_2 & 0x0400) &&
+           (id->cfs_enable_2 & 0x0400) &&
+           (drive->addressing == 1)) {
+               OUT_BYTE(hobfile->feature, IDE_FEATURE_REG);
+               OUT_BYTE(hobfile->sector_count, IDE_NSECTOR_REG);
+               OUT_BYTE(hobfile->sector_number, IDE_SECTOR_REG);
+               OUT_BYTE(hobfile->low_cylinder, IDE_LCYL_REG);
+               OUT_BYTE(hobfile->high_cylinder, IDE_HCYL_REG);
+       }
+
+       OUT_BYTE(taskfile->feature, IDE_FEATURE_REG);
+       OUT_BYTE(taskfile->sector_count, IDE_NSECTOR_REG);
+       /* refers to number of sectors to transfer */
+       OUT_BYTE(taskfile->sector_number, IDE_SECTOR_REG);
+       /* refers to sector offset or start sector */
+       OUT_BYTE(taskfile->low_cylinder, IDE_LCYL_REG);
+       OUT_BYTE(taskfile->high_cylinder, IDE_HCYL_REG);
+
+       OUT_BYTE((taskfile->device_head & HIHI) | drive->select.all, IDE_SELECT_REG);
+       if (task->handler != NULL) {
+#if 0
+               ide_set_handler (drive, task->handler, WAIT_CMD, NULL);
+               OUT_BYTE(taskfile->command, IDE_COMMAND_REG);
+               /*
+                * warning check for race between handler and prehandler for
+                * writing first block of data.  however since we are well
+                * inside the boundaries of the seek, we should be okay.
+                */
+               if (task->prehandler != NULL) {
+                       return task->prehandler(drive, task->rq);
+               }
+#else
+               ide_startstop_t startstop;
+
+               ide_set_handler (drive, task->handler, WAIT_CMD, NULL);
+               OUT_BYTE(taskfile->command, IDE_COMMAND_REG);
+
+               if (ide_wait_stat(&startstop, drive, DATA_READY, drive->bad_wstat, WAIT_DRQ)) {
+                       printk(KERN_ERR "%s: no DRQ after issuing %s\n",
+                               drive->name,
+                               drive->mult_count ? "MULTWRITE" : "WRITE");
+                       return startstop;
+               }
+               /* (ks/hs): Fixed Multi Write */
+               if ((taskfile->command != WIN_MULTWRITE) &&
+                   (taskfile->command != WIN_MULTWRITE_EXT)) {
+                       struct request *rq = HWGROUP(drive)->rq;
+               /* For Write_sectors we need to stuff the first sector */
+                       taskfile_output_data(drive, rq->buffer, SECTOR_WORDS);
+                       rq->current_nr_sectors--;
+               } else {
+               /* Stuff first sector(s) by implicitly calling the handler */
+                       if (!(drive_is_ready(drive))) {
+                       /* FIXME: Replace hard-coded 100, error handling? */
+                               int i;
+                               for (i=0; i<100; i++) {
+                                       if (drive_is_ready(drive))
+                                               break;
+                               }
+                       }
+                       return task->handler(drive);
+               }
+#endif
+       } else {
+               /* for dma commands we down set the handler */
+               if (drive->using_dma && !(HWIF(drive)->dmaproc(((taskfile->command == WIN_WRITEDMA) || (taskfile->command == WIN_WRITEDMA_EXT)) ? ide_dma_write : ide_dma_read, drive)));
+       }
+
+       return ide_started;
+}
+
+void do_taskfile (ide_drive_t *drive, struct hd_drive_task_hdr *taskfile, struct hd_drive_hob_hdr *hobfile, ide_handler_t *handler)
+{
+       struct hd_driveid *id = drive->id;
+       byte HIHI = (drive->addressing) ? 0xE0 : 0xEF;
+
+       /* (ks/hs): Moved to start, do not use for multiple out commands */
+       if (*handler != task_mulout_intr) {
+               if (IDE_CONTROL_REG)
+                       OUT_BYTE(drive->ctl, IDE_CONTROL_REG);  /* clear nIEN */
+               SELECT_MASK(HWIF(drive), drive, 0);
+       }
+
+       if ((id->command_set_2 & 0x0400) &&
+           (id->cfs_enable_2 & 0x0400) &&
+           (drive->addressing == 1)) {
+               OUT_BYTE(hobfile->feature, IDE_FEATURE_REG);
+               OUT_BYTE(hobfile->sector_count, IDE_NSECTOR_REG);
+               OUT_BYTE(hobfile->sector_number, IDE_SECTOR_REG);
+               OUT_BYTE(hobfile->low_cylinder, IDE_LCYL_REG);
+               OUT_BYTE(hobfile->high_cylinder, IDE_HCYL_REG);
+       }
+
+       OUT_BYTE(taskfile->feature, IDE_FEATURE_REG);
+       OUT_BYTE(taskfile->sector_count, IDE_NSECTOR_REG);
+       /* refers to number of sectors to transfer */
+       OUT_BYTE(taskfile->sector_number, IDE_SECTOR_REG);
+       /* refers to sector offset or start sector */
+       OUT_BYTE(taskfile->low_cylinder, IDE_LCYL_REG);
+       OUT_BYTE(taskfile->high_cylinder, IDE_HCYL_REG);
+
+       OUT_BYTE((taskfile->device_head & HIHI) | drive->select.all, IDE_SELECT_REG);
+       if (handler != NULL) {
+               ide_set_handler (drive, handler, WAIT_CMD, NULL);
+               OUT_BYTE(taskfile->command, IDE_COMMAND_REG);
+       } else {
+               /* for dma commands we down set the handler */
+               if (drive->using_dma && !(HWIF(drive)->dmaproc(((taskfile->command == WIN_WRITEDMA) || (taskfile->command == WIN_WRITEDMA_EXT)) ? ide_dma_write : ide_dma_read, drive)));
+       }
+}
+
+#if 0
+ide_startstop_t flagged_taskfile (ide_drive_t *drive, ide_task_t *task)
+{
+       task_struct_t *taskfile = (task_struct_t *) task->tfRegister;
+       hob_struct_t *hobfile = (hob_struct_t *) task->hobRegister;
+       struct hd_driveid *id = drive->id;
+
+       /*
+        * (KS) Check taskfile in/out flags.
+        * If set, then execute as it is defined.
+        * If not set, then define default settings.
+        * The default values are:
+        *      write and read all taskfile registers (except data) 
+        *      write and read the hob registers (sector,nsector,lcyl,hcyl)
+        */
+       if (task->tf_out_flags.all == 0) {
+               task->tf_out_flags.all = IDE_TASKFILE_STD_OUT_FLAGS;
+               if ((id->command_set_2 & 0x0400) &&
+                   (id->cfs_enable_2 & 0x0400) &&
+                   (drive->addressing == 1)) {
+                       task->tf_out_flags.all != (IDE_HOB_STD_OUT_FLAGS << 8);
+               }
+        }
+
+       if (task->tf_in_flags.all == 0) {
+               task->tf_in_flags.all = IDE_TASKFILE_STD_IN_FLAGS;
+               if ((id->command_set_2 & 0x0400) &&
+                   (id->cfs_enable_2 & 0x0400) &&
+                   (drive->addressing == 1)) {
+                       task->tf_in_flags.all  != (IDE_HOB_STD_IN_FLAGS  << 8);
+               }
+        }
+
+       if (IDE_CONTROL_REG)
+               OUT_BYTE(drive->ctl, IDE_CONTROL_REG);  /* clear nIEN */
+       SELECT_MASK(HWIF(drive), drive, 0);
+
+       if (task->tf_out_flags.b.data) {
+               unsigned short data =  taskfile->data + (hobfile->data << 8);
+               OUT_WORD (data, IDE_DATA_REG);
+       }
+
+       /* (KS) send hob registers first */
+       if (task->tf_out_flags.b.nsector_hob)
+               OUT_BYTE(hobfile->sector_count, IDE_NSECTOR_REG);
+       if (task->tf_out_flags.b.sector_hob)
+               OUT_BYTE(hobfile->sector_number, IDE_SECTOR_REG);
+       if (task->tf_out_flags.b.lcyl_hob)
+               OUT_BYTE(hobfile->low_cylinder, IDE_LCYL_REG);
+       if (task->tf_out_flags.b.hcyl_hob)
+               OUT_BYTE(hobfile->high_cylinder, IDE_HCYL_REG);
+
+
+       /* (KS) Send now the standard registers */
+       if (task->tf_out_flags.b.error_feature)
+               OUT_BYTE(taskfile->feature, IDE_FEATURE_REG);
+       /* refers to number of sectors to transfer */
+       if (task->tf_out_flags.b.nsector)
+               OUT_BYTE(taskfile->sector_count, IDE_NSECTOR_REG);
+       /* refers to sector offset or start sector */
+       if (task->tf_out_flags.b.sector)
+               OUT_BYTE(taskfile->sector_number, IDE_SECTOR_REG);
+       if (task->tf_out_flags.b.lcyl)
+               OUT_BYTE(taskfile->low_cylinder, IDE_LCYL_REG);
+       if (task->tf_out_flags.b.hcyl)
+               OUT_BYTE(taskfile->high_cylinder, IDE_HCYL_REG);
+
+        /*
+        * (KS) Do not modify the specified taskfile. We want to have a
+        * universal pass through, so we must execute ALL specified values.
+        *
+        * (KS) The drive head register is mandatory.
+        * Don't care about the out flags !
+        */
+       OUT_BYTE(taskfile->device_head | drive->select.all, IDE_SELECT_REG);
+       if (task->handler != NULL) {
+#if 0
+               ide_set_handler (drive, task->handler, WAIT_CMD, NULL);
+               OUT_BYTE(taskfile->command, IDE_COMMAND_REG);
+               /*
+                * warning check for race between handler and prehandler for
+                * writing first block of data.  however since we are well
+                * inside the boundaries of the seek, we should be okay.
+                */
+               if (task->prehandler != NULL) {
+                       return task->prehandler(drive, task->rq);
+               }
+#else
+               ide_startstop_t startstop;
+
+               ide_set_handler (drive, task->handler, WAIT_CMD, NULL);
+
+               /*
+                * (KS) The drive command register is also mandatory.
+                * Don't care about the out flags !
+                */
+               OUT_BYTE(taskfile->command, IDE_COMMAND_REG);
+
+               if (ide_wait_stat(&startstop, drive, DATA_READY, drive->bad_wstat, WAIT_DRQ)) {
+                       printk(KERN_ERR "%s: no DRQ after issuing %s\n",
+                               drive->name,
+                               drive->mult_count ? "MULTWRITE" : "WRITE");
+                       return startstop;
+               }
+               /* (ks/hs): Fixed Multi Write */
+               if ((taskfile->command != WIN_MULTWRITE) &&
+                   (taskfile->command != WIN_MULTWRITE_EXT)) {
+                       struct request *rq = HWGROUP(drive)->rq;
+               /* For Write_sectors we need to stuff the first sector */
+                       taskfile_output_data(drive, rq->buffer, SECTOR_WORDS);
+                       rq->current_nr_sectors--;
+               } else {
+               /* Stuff first sector(s) by implicitly calling the handler */
+                       if (!(drive_is_ready(drive))) {
+                       /* FIXME: Replace hard-coded 100, error handling? */
+                               int i;
+                               for (i=0; i<100; i++) {
+                                       if (drive_is_ready(drive))
+                                               break;
+                               }
+                       }
+                       return task->handler(drive);
+               }
+#endif
+       } else {
+               /* for dma commands we down set the handler */
+               if (drive->using_dma && !(HWIF(drive)->dmaproc(((taskfile->command == WIN_WRITEDMA) || (taskfile->command == WIN_WRITEDMA_EXT)) ? ide_dma_write : ide_dma_read, drive)));
+       }
+
+       return ide_started;
+}
+#endif
+
+#if 0
+/*
+ * Error reporting, in human readable form (luxurious, but a memory hog).
+ */
+byte taskfile_dump_status (ide_drive_t *drive, const char *msg, byte stat)
+{
+       unsigned long flags;
+       byte err = 0;
+
+       __save_flags (flags);   /* local CPU only */
+       ide__sti();             /* local CPU only */
+       printk("%s: %s: status=0x%02x", drive->name, msg, stat);
+#if FANCY_STATUS_DUMPS
+       printk(" { ");
+       if (stat & BUSY_STAT)
+               printk("Busy ");
+       else {
+               if (stat & READY_STAT)  printk("DriveReady ");
+               if (stat & WRERR_STAT)  printk("DeviceFault ");
+               if (stat & SEEK_STAT)   printk("SeekComplete ");
+               if (stat & DRQ_STAT)    printk("DataRequest ");
+               if (stat & ECC_STAT)    printk("CorrectedError ");
+               if (stat & INDEX_STAT)  printk("Index ");
+               if (stat & ERR_STAT)    printk("Error ");
+       }
+       printk("}");
+#endif  /* FANCY_STATUS_DUMPS */
+       printk("\n");
+       if ((stat & (BUSY_STAT|ERR_STAT)) == ERR_STAT) {
+               err = GET_ERR();
+               printk("%s: %s: error=0x%02x", drive->name, msg, err);
+#if FANCY_STATUS_DUMPS
+               if (drive->media == ide_disk) {
+                       printk(" { ");
+                       if (err & ABRT_ERR)     printk("DriveStatusError ");
+                       if (err & ICRC_ERR)     printk("%s", (err & ABRT_ERR) ? "BadCRC " : "BadSector ");
+                       if (err & ECC_ERR)      printk("UncorrectableError ");
+                       if (err & ID_ERR)       printk("SectorIdNotFound ");
+                       if (err & TRK0_ERR)     printk("TrackZeroNotFound ");
+                       if (err & MARK_ERR)     printk("AddrMarkNotFound ");
+                       printk("}");
+                       if ((err & (BBD_ERR | ABRT_ERR)) == BBD_ERR || (err & (ECC_ERR|ID_ERR|MARK_ERR))) {
+                               if ((drive->id->command_set_2 & 0x0400) &&
+                                   (drive->id->cfs_enable_2 & 0x0400) &&
+                                   (drive->addressing == 1)) {
+                                       __u64 sectors = 0;
+                                       u32 low = 0, high = 0;
+                                       low = task_read_24(drive);
+                                       OUT_BYTE(0x80, IDE_CONTROL_REG);
+                                       high = task_read_24(drive);
+                                       sectors = ((__u64)high << 24) | low;
+                                       printk(", LBAsect=%lld", sectors);
+                               } else {
+                                       byte cur = IN_BYTE(IDE_SELECT_REG);
+                                       if (cur & 0x40) {       /* using LBA? */
+                                               printk(", LBAsect=%ld", (unsigned long)
+                                                ((cur&0xf)<<24)
+                                                |(IN_BYTE(IDE_HCYL_REG)<<16)
+                                                |(IN_BYTE(IDE_LCYL_REG)<<8)
+                                                | IN_BYTE(IDE_SECTOR_REG));
+                                       } else {
+                                               printk(", CHS=%d/%d/%d",
+                                                 (IN_BYTE(IDE_HCYL_REG)<<8) +
+                                                  IN_BYTE(IDE_LCYL_REG),
+                                                 cur & 0xf,
+                                                 IN_BYTE(IDE_SECTOR_REG));
+                                       }
+                               }
+                               if (HWGROUP(drive)->rq)
+                                       printk(", sector=%llu", (__u64) HWGROUP(drive)->rq->sector);
+                       }
+               }
+#endif  /* FANCY_STATUS_DUMPS */
+               printk("\n");
+       }
+       __restore_flags (flags);        /* local CPU only */
+       return err;
+}
+
+/*
+ * Clean up after success/failure of an explicit taskfile operation.
+ */
+void ide_end_taskfile (ide_drive_t *drive, byte stat, byte err)
+{
+       unsigned long flags;
+       struct request *rq;
+       ide_task_t *args;
+       task_ioreg_t command;
+
+       spin_lock_irqsave(&io_request_lock, flags);
+       rq = HWGROUP(drive)->rq;
+       spin_unlock_irqrestore(&io_request_lock, flags);
+       args = (ide_task_t *) rq->special;
+
+       command = args->tfRegister[IDE_COMMAND_OFFSET];
+
+       rq->errors = !OK_STAT(stat,READY_STAT,BAD_STAT);
+
+       args->tfRegister[IDE_ERROR_OFFSET]   = err;
+       args->tfRegister[IDE_NSECTOR_OFFSET] = IN_BYTE(IDE_NSECTOR_REG);
+       args->tfRegister[IDE_SECTOR_OFFSET]  = IN_BYTE(IDE_SECTOR_REG);
+       args->tfRegister[IDE_LCYL_OFFSET]    = IN_BYTE(IDE_LCYL_REG);
+       args->tfRegister[IDE_HCYL_OFFSET]    = IN_BYTE(IDE_HCYL_REG);
+       args->tfRegister[IDE_SELECT_OFFSET]  = IN_BYTE(IDE_SELECT_REG);
+       args->tfRegister[IDE_STATUS_OFFSET]  = stat;
+       if ((drive->id->command_set_2 & 0x0400) &&
+           (drive->id->cfs_enable_2 & 0x0400) &&
+           (drive->addressing == 1)) {
+               OUT_BYTE(drive->ctl|0x80, IDE_CONTROL_REG_HOB);
+               args->hobRegister[IDE_FEATURE_OFFSET_HOB] = IN_BYTE(IDE_FEATURE_REG);
+               args->hobRegister[IDE_NSECTOR_OFFSET_HOB] = IN_BYTE(IDE_NSECTOR_REG);
+               args->hobRegister[IDE_SECTOR_OFFSET_HOB]  = IN_BYTE(IDE_SECTOR_REG);
+               args->hobRegister[IDE_LCYL_OFFSET_HOB]    = IN_BYTE(IDE_LCYL_REG);
+               args->hobRegister[IDE_HCYL_OFFSET_HOB]    = IN_BYTE(IDE_HCYL_REG);
+       }
+
+/*     taskfile_settings_update(drive, args, command); */
+
+       spin_lock_irqsave(&io_request_lock, flags);
+       blkdev_dequeue_request(rq);
+       HWGROUP(drive)->rq = NULL;
+       end_that_request_last(rq);
+       spin_unlock_irqrestore(&io_request_lock, flags);
+}
+
+/*
+ * try_to_flush_leftover_data() is invoked in response to a drive
+ * unexpectedly having its DRQ_STAT bit set.  As an alternative to
+ * resetting the drive, this routine tries to clear the condition
+ * by read a sector's worth of data from the drive.  Of course,
+ * this may not help if the drive is *waiting* for data from *us*.
+ */
+void task_try_to_flush_leftover_data (ide_drive_t *drive)
+{
+       int i = (drive->mult_count ? drive->mult_count : 1) * SECTOR_WORDS;
+
+       if (drive->media != ide_disk)
+               return;
+       while (i > 0) {
+               u32 buffer[16];
+               unsigned int wcount = (i > 16) ? 16 : i;
+               i -= wcount;
+               taskfile_input_data (drive, buffer, wcount);
+       }
+}
+
+/*
+ * taskfile_error() takes action based on the error returned by the drive.
+ */
+ide_startstop_t taskfile_error (ide_drive_t *drive, const char *msg, byte stat)
+{
+       struct request *rq;
+       byte err;
+
+        err = taskfile_dump_status(drive, msg, stat);
+       if (drive == NULL || (rq = HWGROUP(drive)->rq) == NULL)
+               return ide_stopped;
+       /* retry only "normal" I/O: */
+       if (rq->cmd == IDE_DRIVE_TASKFILE) {
+               rq->errors = 1;
+               ide_end_taskfile(drive, stat, err);
+               return ide_stopped;
+       }
+       if (stat & BUSY_STAT || ((stat & WRERR_STAT) && !drive->nowerr)) { /* other bits are useless when BUSY */
+               rq->errors |= ERROR_RESET;
+       } else {
+               if (drive->media == ide_disk && (stat & ERR_STAT)) {
+                       /* err has different meaning on cdrom and tape */
+                       if (err == ABRT_ERR) {
+                               if (drive->select.b.lba && IN_BYTE(IDE_COMMAND_REG) == WIN_SPECIFY)
+                                       return ide_stopped;     /* some newer drives don't support WIN_SPECIFY */
+                       } else if ((err & (ABRT_ERR | ICRC_ERR)) == (ABRT_ERR | ICRC_ERR)) {
+                               drive->crc_count++;     /* UDMA crc error -- just retry the operation */
+                       } else if (err & (BBD_ERR | ECC_ERR))   /* retries won't help these */
+                               rq->errors = ERROR_MAX;
+                       else if (err & TRK0_ERR)        /* help it find track zero */
+                                rq->errors |= ERROR_RECAL;
+                }
+                if ((stat & DRQ_STAT) && rq->cmd != WRITE)
+                        task_try_to_flush_leftover_data(drive);
+       }
+       if (GET_STAT() & (BUSY_STAT|DRQ_STAT))
+               OUT_BYTE(WIN_IDLEIMMEDIATE,IDE_COMMAND_REG);    /* force an abort */
+
+       if (rq->errors >= ERROR_MAX) {
+               if (drive->driver != NULL)
+                       DRIVER(drive)->end_request(0, HWGROUP(drive));
+               else
+                       ide_end_request(0, HWGROUP(drive));
+       } else {
+               if ((rq->errors & ERROR_RESET) == ERROR_RESET) {
+                       ++rq->errors;
+                       return ide_do_reset(drive);
+               }
+               if ((rq->errors & ERROR_RECAL) == ERROR_RECAL)
+                       drive->special.b.recalibrate = 1;
+               ++rq->errors;
+       }
+       return ide_stopped;
+}
+#endif
+
+/*
+ * Handler for special commands without a data phase from ide-disk
+ */
+
+/*
+ * set_multmode_intr() is invoked on completion of a WIN_SETMULT cmd.
+ */
+ide_startstop_t set_multmode_intr (ide_drive_t *drive)
+{
+       byte stat;
+
+       if (OK_STAT(stat=GET_STAT(),READY_STAT,BAD_STAT)) {
+               drive->mult_count = drive->mult_req;
+       } else {
+               drive->mult_req = drive->mult_count = 0;
+               drive->special.b.recalibrate = 1;
+               (void) ide_dump_status(drive, "set_multmode", stat);
+       }
+       return ide_stopped;
+}
+
+/*
+ * set_geometry_intr() is invoked on completion of a WIN_SPECIFY cmd.
+ */
+ide_startstop_t set_geometry_intr (ide_drive_t *drive)
+{
+       byte stat;
+
+       if (OK_STAT(stat=GET_STAT(),READY_STAT,BAD_STAT))
+               return ide_stopped;
+
+       if (stat & (ERR_STAT|DRQ_STAT))
+               return ide_error(drive, "set_geometry_intr", stat);
+
+       ide_set_handler(drive, &set_geometry_intr, WAIT_CMD, NULL);
+       return ide_started;
+}
+
+/*
+ * recal_intr() is invoked on completion of a WIN_RESTORE (recalibrate) cmd.
+ */
+ide_startstop_t recal_intr (ide_drive_t *drive)
+{
+       byte stat = GET_STAT();
+
+       if (!OK_STAT(stat,READY_STAT,BAD_STAT))
+               return ide_error(drive, "recal_intr", stat);
+       return ide_stopped;
+}
+
+/*
+ * Handler for commands without a data phase
+ */
+ide_startstop_t task_no_data_intr (ide_drive_t *drive)
+{
+       ide_task_t *args        = HWGROUP(drive)->rq->special;
+       byte stat               = GET_STAT();
+
+       ide__sti();     /* local CPU only */
+
+       if (!OK_STAT(stat, READY_STAT, BAD_STAT))
+               return ide_error(drive, "task_no_data_intr", stat); /* calls ide_end_drive_cmd */
+
+       if (args)
+               ide_end_drive_cmd (drive, stat, GET_ERR());
+
+       return ide_stopped;
+}
+
+/*
+ * Handler for command with PIO data-in phase
+ */
+ide_startstop_t task_in_intr (ide_drive_t *drive)
+{
+       byte stat               = GET_STAT();
+       byte io_32bit           = drive->io_32bit;
+       struct request *rq      = HWGROUP(drive)->rq;
+       char *pBuf              = NULL;
+
+       if (!OK_STAT(stat,DATA_READY,BAD_R_STAT)) {
+               if (stat & (ERR_STAT|DRQ_STAT)) {
+                       return ide_error(drive, "task_in_intr", stat);
+               }
+               if (!(stat & BUSY_STAT)) {
+                       DTF("task_in_intr to Soon wait for next interrupt\n");
+                       ide_set_handler(drive, &task_in_intr, WAIT_CMD, NULL);
+                       return ide_started;  
+               }
+       }
+       DTF("stat: %02x\n", stat);
+       pBuf = rq->buffer + ((rq->nr_sectors - rq->current_nr_sectors) * SECTOR_SIZE);
+       DTF("Read: %p, rq->current_nr_sectors: %d\n", pBuf, (int) rq->current_nr_sectors);
+
+       drive->io_32bit = 0;
+       taskfile_input_data(drive, pBuf, SECTOR_WORDS);
+       drive->io_32bit = io_32bit;
+
+       if (--rq->current_nr_sectors <= 0) {
+               /* (hs): swapped next 2 lines */
+               DTF("Request Ended stat: %02x\n", GET_STAT());
+               ide_end_request(1, HWGROUP(drive));
+       } else {
+               ide_set_handler(drive, &task_in_intr,  WAIT_CMD, NULL);
+               return ide_started;
+       }
+       return ide_stopped;
+}
+
+#undef ALTSTAT_SCREW_UP
+
+#ifdef ALTSTAT_SCREW_UP
+/*
+ * (ks/hs): Poll Alternate Status Register to ensure
+ * that drive is not busy.
+ */
+byte altstat_multi_busy (ide_drive_t *drive, byte stat, const char *msg)
+{
+       int i;
+
+       DTF("multi%s: ASR = %x\n", msg, stat);
+       if (stat & BUSY_STAT) {
+               /* (ks/hs): FIXME: Replace hard-coded 100, error handling? */
+               for (i=0; i<100; i++) {
+                       stat = GET_ALTSTAT();
+                       if ((stat & BUSY_STAT) == 0)
+                               break;
+               }
+       }
+       /*
+        * (ks/hs): Read Status AFTER Alternate Status Register
+        */
+       return(GET_STAT());
+}
+
+/*
+ * (ks/hs): Poll Alternate status register to wait for drive
+ * to become ready for next transfer
+ */
+byte altstat_multi_poll (ide_drive_t *drive, byte stat, const char *msg)
+{
+       /* (ks/hs): FIXME: Error handling, time-out? */
+       while (stat & BUSY_STAT)
+               stat = GET_ALTSTAT();
+       DTF("multi%s: nsect=1, ASR = %x\n", msg, stat);
+       return(GET_STAT());     /* (ks/hs): Clear pending IRQ */
+}
+#endif /* ALTSTAT_SCREW_UP */
+
+/*
+ * Handler for command with Read Multiple
+ */
+ide_startstop_t task_mulin_intr (ide_drive_t *drive)
+{
+       unsigned int            msect, nsect;
+
+#ifdef ALTSTAT_SCREW_UP
+       byte stat       = altstat_multi_busy(drive, GET_ALTSTAT(), "read");
+#else
+       byte stat               = GET_STAT();
+#endif /* ALTSTAT_SCREW_UP */
+
+       byte io_32bit           = drive->io_32bit;
+       struct request *rq      = HWGROUP(drive)->rq;
+       char *pBuf              = NULL;
+
+       if (!OK_STAT(stat,DATA_READY,BAD_R_STAT)) {
+               if (stat & (ERR_STAT|DRQ_STAT)) {
+                       return ide_error(drive, "task_mulin_intr", stat);
+               }
+               /* no data yet, so wait for another interrupt */
+               ide_set_handler(drive, &task_mulin_intr, WAIT_CMD, NULL);
+               return ide_started;
+       }
+
+       /* (ks/hs): Fixed Multi-Sector transfer */
+       msect = drive->mult_count;
+
+#ifdef ALTSTAT_SCREW_UP
+       /*
+        * Screw the request we do not support bad data-phase setups!
+        * Either read and learn the ATA standard or crash yourself!
+        */
+       if (!msect) {
+               /*
+                * (ks/hs): Drive supports multi-sector transfer,
+                * drive->mult_count was not set
+                */
+               nsect = 1;
+               while (rq->current_nr_sectors) {
+                       pBuf = rq->buffer + ((rq->nr_sectors - rq->current_nr_sectors) * SECTOR_SIZE);
+                       DTF("Multiread: %p, nsect: %d, rq->current_nr_sectors: %ld\n", pBuf, nsect, rq->current_nr_sectors);
+                       drive->io_32bit = 0;
+                       taskfile_input_data(drive, pBuf, nsect * SECTOR_WORDS);
+                       drive->io_32bit = io_32bit;
+                       rq->errors = 0;
+                       rq->current_nr_sectors -= nsect;
+                       stat = altstat_multi_poll(drive, GET_ALTSTAT(), "read");
+               }
+               ide_end_request(1, HWGROUP(drive));
+               return ide_stopped;
+       }
+#endif /* ALTSTAT_SCREW_UP */
+
+       nsect = (rq->current_nr_sectors > msect) ? msect : rq->current_nr_sectors;
+       pBuf = rq->buffer + ((rq->nr_sectors - rq->current_nr_sectors) * SECTOR_SIZE);
+
+       DTF("Multiread: %p, nsect: %d , rq->current_nr_sectors: %ld\n",
+               pBuf, nsect, rq->current_nr_sectors);
+       drive->io_32bit = 0;
+       taskfile_input_data(drive, pBuf, nsect * SECTOR_WORDS);
+       drive->io_32bit = io_32bit;
+       rq->errors = 0;
+       rq->current_nr_sectors -= nsect;
+       if (rq->current_nr_sectors != 0) {
+               ide_set_handler(drive, &task_mulin_intr, WAIT_CMD, NULL);
+               return ide_started;
+       }
+       ide_end_request(1, HWGROUP(drive));
+       return ide_stopped;
+}
+
+ide_startstop_t pre_task_out_intr (ide_drive_t *drive, struct request *rq)
+{
+       ide_task_t *args = rq->special;
+       ide_startstop_t startstop;
+
+       if (ide_wait_stat(&startstop, drive, DATA_READY, drive->bad_wstat, WAIT_DRQ)) {
+               printk(KERN_ERR "%s: no DRQ after issuing %s\n", drive->name, drive->mult_count ? "MULTWRITE" : "WRITE");
+               return startstop;
+       }
+
+       /* (ks/hs): Fixed Multi Write */
+       if ((args->tfRegister[IDE_COMMAND_OFFSET] != WIN_MULTWRITE) &&
+           (args->tfRegister[IDE_COMMAND_OFFSET] != WIN_MULTWRITE_EXT)) {
+               /* For Write_sectors we need to stuff the first sector */
+               taskfile_output_data(drive, rq->buffer, SECTOR_WORDS);
+               rq->current_nr_sectors--;
+               return ide_started;
+       } else {
+               /*
+                * (ks/hs): Stuff the first sector(s)
+                * by implicitly calling the handler
+                */
+               if (!(drive_is_ready(drive))) {
+                       int i;
+                       /*
+                        * (ks/hs): FIXME: Replace hard-coded
+                        *               100, error handling?
+                        */
+                       for (i=0; i<100; i++) {
+                               if (drive_is_ready(drive))
+                                       break;
+                       }
+               }
+               return args->handler(drive);
+       }
+       return ide_started;
+}
+
+/*
+ * Handler for command with PIO data-out phase
+ */
+ide_startstop_t task_out_intr (ide_drive_t *drive)
+{
+       byte stat               = GET_STAT();
+       byte io_32bit           = drive->io_32bit;
+       struct request *rq      = HWGROUP(drive)->rq;
+       char *pBuf              = NULL;
+
+       if (!rq->current_nr_sectors) { 
+               ide_end_request(1, HWGROUP(drive));
+               return ide_stopped;
+       }
+
+       if (!OK_STAT(stat,DRIVE_READY,drive->bad_wstat)) {
+               return ide_error(drive, "task_out_intr", stat);
+       }
+       if ((rq->current_nr_sectors==1) ^ (stat & DRQ_STAT)) {
+               rq = HWGROUP(drive)->rq;
+               pBuf = rq->buffer + ((rq->nr_sectors - rq->current_nr_sectors) * SECTOR_SIZE);
+               DTF("write: %p, rq->current_nr_sectors: %d\n", pBuf, (int) rq->current_nr_sectors);
+               drive->io_32bit = 0;
+               taskfile_output_data(drive, pBuf, SECTOR_WORDS);
+               drive->io_32bit = io_32bit;
+               rq->errors = 0;
+               rq->current_nr_sectors--;
+       }
+
+       if (rq->current_nr_sectors <= 0) {
+               ide_end_request(1, HWGROUP(drive));
+       } else {
+               ide_set_handler(drive, &task_out_intr, WAIT_CMD, NULL);
+               return ide_started;
+       }
+       return ide_stopped;
+}
+
+/*
+ * Handler for command write multiple
+ * Called directly from execute_drive_cmd for the first bunch of sectors,
+ * afterwards only by the ISR
+ */
+ide_startstop_t task_mulout_intr (ide_drive_t *drive)
+{
+       unsigned int            msect, nsect;
+
+#ifdef ALTSTAT_SCREW_UP
+       byte stat       = altstat_multi_busy(drive, GET_ALTSTAT(), "write");
+#else
+       byte stat               = GET_STAT();
+#endif /* ALTSTAT_SCREW_UP */
+
+       byte io_32bit           = drive->io_32bit;
+       struct request *rq      = HWGROUP(drive)->rq;
+       ide_hwgroup_t *hwgroup  = HWGROUP(drive);
+       char *pBuf              = NULL;
+
+       /*
+        * (ks/hs): Handle last IRQ on multi-sector transfer,
+        * occurs after all data was sent
+        */
+       if (rq->current_nr_sectors == 0) {
+               if (stat & (ERR_STAT|DRQ_STAT))
+                       return ide_error(drive, "task_mulout_intr", stat);
+               ide_end_request(1, HWGROUP(drive));
+               return ide_stopped;
+       }
+
+       if (!OK_STAT(stat,DATA_READY,BAD_R_STAT)) {
+               if (stat & (ERR_STAT|DRQ_STAT)) {
+                       return ide_error(drive, "task_mulout_intr", stat);
+               }
+               /* no data yet, so wait for another interrupt */
+               if (hwgroup->handler == NULL)
+                       ide_set_handler(drive, &task_mulout_intr, WAIT_CMD, NULL);
+               return ide_started;
+       }
+
+       /* (ks/hs): See task_mulin_intr */
+       msect = drive->mult_count;
+
+#ifdef ALTSTAT_SCREW_UP
+       /*
+        * Screw the request we do not support bad data-phase setups!
+        * Either read and learn the ATA standard or crash yourself!
+        */
+       if (!msect) {
+               nsect = 1;
+               while (rq->current_nr_sectors) {
+                       pBuf = rq->buffer + ((rq->nr_sectors - rq->current_nr_sectors) * SECTOR_SIZE);
+                       DTF("Multiwrite: %p, nsect: %d, rq->current_nr_sectors: %ld\n", pBuf, nsect, rq->current_nr_sectors);
+                       drive->io_32bit = 0;
+                       taskfile_output_data(drive, pBuf, nsect * SECTOR_WORDS);
+                       drive->io_32bit = io_32bit;
+                       rq->errors = 0;
+                       rq->current_nr_sectors -= nsect;
+                       stat = altstat_multi_poll(drive, GET_ALTSTAT(), "write");
+               }
+               ide_end_request(1, HWGROUP(drive));
+               return ide_stopped;
+       }
+#endif /* ALTSTAT_SCREW_UP */
+
+       nsect = (rq->current_nr_sectors > msect) ? msect : rq->current_nr_sectors;
+       pBuf = rq->buffer + ((rq->nr_sectors - rq->current_nr_sectors) * SECTOR_SIZE);
+       DTF("Multiwrite: %p, nsect: %d , rq->current_nr_sectors: %ld\n",
+               pBuf, nsect, rq->current_nr_sectors);
+       drive->io_32bit = 0;
+       taskfile_output_data(drive, pBuf, nsect * SECTOR_WORDS);
+       drive->io_32bit = io_32bit;
+       rq->errors = 0;
+       rq->current_nr_sectors -= nsect;
+       if (hwgroup->handler == NULL)
+               ide_set_handler(drive, &task_mulout_intr, WAIT_CMD, NULL);
+       return ide_started;
+}
+
+/* Called by internal to feature out type of command being called */
+ide_pre_handler_t * ide_pre_handler_parser (struct hd_drive_task_hdr *taskfile, struct hd_drive_hob_hdr *hobfile)
+{
+       switch(taskfile->command) {
+                               /* IDE_DRIVE_TASK_RAW_WRITE */
+               case CFA_WRITE_MULTI_WO_ERASE:
+               case WIN_MULTWRITE:
+               case WIN_MULTWRITE_EXT:
+//             case WIN_WRITEDMA:
+//             case WIN_WRITEDMA_QUEUED:
+//             case WIN_WRITEDMA_EXT:
+//             case WIN_WRITEDMA_QUEUED_EXT:
+                               /* IDE_DRIVE_TASK_OUT */
+               case WIN_WRITE:
+               case WIN_WRITE_VERIFY:
+               case WIN_WRITE_BUFFER:
+               case CFA_WRITE_SECT_WO_ERASE:
+               case WIN_DOWNLOAD_MICROCODE:
+                       return &pre_task_out_intr;
+                               /* IDE_DRIVE_TASK_OUT */
+               case WIN_SMART:
+                       if (taskfile->feature == SMART_WRITE_LOG_SECTOR)
+                               return &pre_task_out_intr;
+               default:
+                       break;
+       }
+       return(NULL);
+}
+
+/* Called by internal to feature out type of command being called */
+ide_handler_t * ide_handler_parser (struct hd_drive_task_hdr *taskfile, struct hd_drive_hob_hdr *hobfile)
+{
+       switch(taskfile->command) {
+               case WIN_IDENTIFY:
+               case WIN_PIDENTIFY:
+               case CFA_TRANSLATE_SECTOR:
+               case WIN_READ_BUFFER:
+               case WIN_READ:
+               case WIN_READ_EXT:
+                       return &task_in_intr;
+               case WIN_SECURITY_DISABLE:
+               case WIN_SECURITY_ERASE_UNIT:
+               case WIN_SECURITY_SET_PASS:
+               case WIN_SECURITY_UNLOCK:
+               case WIN_DOWNLOAD_MICROCODE:
+               case CFA_WRITE_SECT_WO_ERASE:
+               case WIN_WRITE_BUFFER:
+               case WIN_WRITE_VERIFY:
+               case WIN_WRITE:
+               case WIN_WRITE_EXT:
+                       return &task_out_intr;
+               case WIN_MULTREAD:
+               case WIN_MULTREAD_EXT:
+                       return &task_mulin_intr;
+               case CFA_WRITE_MULTI_WO_ERASE:
+               case WIN_MULTWRITE:
+               case WIN_MULTWRITE_EXT:
+                       return &task_mulout_intr;
+               case WIN_SMART:
+                       switch(taskfile->feature) {
+                               case SMART_READ_VALUES:
+                               case SMART_READ_THRESHOLDS:
+                               case SMART_READ_LOG_SECTOR:
+                                       return &task_in_intr;
+                               case SMART_WRITE_LOG_SECTOR:
+                                       return &task_out_intr;
+                               default:
+                                       return &task_no_data_intr;
+                       }
+               case CFA_REQ_EXT_ERROR_CODE:
+               case CFA_ERASE_SECTORS:
+               case WIN_VERIFY:
+               case WIN_VERIFY_EXT:
+               case WIN_SEEK:
+                       return &task_no_data_intr;
+               case WIN_SPECIFY:
+                       return &set_geometry_intr;
+               case WIN_RESTORE:
+                       return &recal_intr;
+               case WIN_DIAGNOSE:
+               case WIN_FLUSH_CACHE:
+               case WIN_FLUSH_CACHE_EXT:
+               case WIN_STANDBYNOW1:
+               case WIN_STANDBYNOW2:
+               case WIN_SLEEPNOW1:
+               case WIN_SLEEPNOW2:
+               case WIN_SETIDLE1:
+               case WIN_CHECKPOWERMODE1:
+               case WIN_CHECKPOWERMODE2:
+               case WIN_GETMEDIASTATUS:
+               case WIN_MEDIAEJECT:
+                       return &task_no_data_intr;
+               case WIN_SETMULT:
+                       return &set_multmode_intr;
+               case WIN_READ_NATIVE_MAX:
+               case WIN_SET_MAX:
+               case WIN_READ_NATIVE_MAX_EXT:
+               case WIN_SET_MAX_EXT:
+               case WIN_SECURITY_ERASE_PREPARE:
+               case WIN_SECURITY_FREEZE_LOCK:
+               case WIN_DOORLOCK:
+               case WIN_DOORUNLOCK:
+               case WIN_SETFEATURES:
+                       return &task_no_data_intr;
+               case DISABLE_SEAGATE:
+               case EXABYTE_ENABLE_NEST:
+                       return &task_no_data_intr;
+#ifdef CONFIG_BLK_DEV_IDEDMA
+               case WIN_READDMA:
+               case WIN_IDENTIFY_DMA:
+               case WIN_READDMA_QUEUED:
+               case WIN_READDMA_EXT:
+               case WIN_READDMA_QUEUED_EXT:
+               case WIN_WRITEDMA:
+               case WIN_WRITEDMA_QUEUED:
+               case WIN_WRITEDMA_EXT:
+               case WIN_WRITEDMA_QUEUED_EXT:
+#endif
+               case WIN_FORMAT:
+               case WIN_INIT:
+               case WIN_DEVICE_RESET:
+               case WIN_QUEUED_SERVICE:
+               case WIN_PACKETCMD:
+               default:
+                       return(NULL);
+       }       
+}
+
+/* Called by ioctl to feature out type of command being called */
+int ide_cmd_type_parser (ide_task_t *args)
+{
+       struct hd_drive_task_hdr *taskfile = (struct hd_drive_task_hdr *) args->tfRegister;
+       struct hd_drive_hob_hdr *hobfile = (struct hd_drive_hob_hdr *) args->hobRegister;
+
+       args->prehandler = ide_pre_handler_parser(taskfile, hobfile);
+       args->handler = ide_handler_parser(taskfile, hobfile);
+
+       switch(args->tfRegister[IDE_COMMAND_OFFSET]) {
+               case WIN_IDENTIFY:
+               case WIN_PIDENTIFY:
+                       return IDE_DRIVE_TASK_IN;
+               case CFA_TRANSLATE_SECTOR:
+               case WIN_READ:
+               case WIN_READ_BUFFER:
+                       return IDE_DRIVE_TASK_IN;
+               case WIN_WRITE:
+               case WIN_WRITE_VERIFY:
+               case WIN_WRITE_BUFFER:
+               case CFA_WRITE_SECT_WO_ERASE:
+               case WIN_DOWNLOAD_MICROCODE:
+                       return IDE_DRIVE_TASK_RAW_WRITE;
+               case WIN_MULTREAD:
+                       return IDE_DRIVE_TASK_IN;
+               case CFA_WRITE_MULTI_WO_ERASE:
+               case WIN_MULTWRITE:
+                       return IDE_DRIVE_TASK_RAW_WRITE;
+               case WIN_SECURITY_DISABLE:
+               case WIN_SECURITY_ERASE_UNIT:
+               case WIN_SECURITY_SET_PASS:
+               case WIN_SECURITY_UNLOCK:
+                       return IDE_DRIVE_TASK_OUT;
+               case WIN_SMART:
+                       args->tfRegister[IDE_LCYL_OFFSET] = SMART_LCYL_PASS;
+                       args->tfRegister[IDE_HCYL_OFFSET] = SMART_HCYL_PASS;
+                       switch(args->tfRegister[IDE_FEATURE_OFFSET]) {
+                               case SMART_READ_VALUES:
+                               case SMART_READ_THRESHOLDS:
+                               case SMART_READ_LOG_SECTOR:
+                                       return IDE_DRIVE_TASK_IN;
+                               case SMART_WRITE_LOG_SECTOR:
+                                       return IDE_DRIVE_TASK_OUT;
+                               default:
+                                       return IDE_DRIVE_TASK_NO_DATA;
+                       }
+#ifdef CONFIG_BLK_DEV_IDEDMA
+               case WIN_READDMA:
+               case WIN_IDENTIFY_DMA:
+               case WIN_READDMA_QUEUED:
+               case WIN_READDMA_EXT:
+               case WIN_READDMA_QUEUED_EXT:
+                       return IDE_DRIVE_TASK_IN;
+               case WIN_WRITEDMA:
+               case WIN_WRITEDMA_QUEUED:
+               case WIN_WRITEDMA_EXT:
+               case WIN_WRITEDMA_QUEUED_EXT:
+                       return IDE_DRIVE_TASK_RAW_WRITE;
+#endif
+               case WIN_SETFEATURES:
+                       switch(args->tfRegister[IDE_FEATURE_OFFSET]) {
+                               case SETFEATURES_XFER:
+                                       return IDE_DRIVE_TASK_SET_XFER;
+                               case SETFEATURES_DIS_DEFECT:
+                               case SETFEATURES_EN_APM:
+                               case SETFEATURES_DIS_MSN:
+                               case SETFEATURES_EN_RI:
+                               case SETFEATURES_EN_SI:
+                               case SETFEATURES_DIS_RPOD:
+                               case SETFEATURES_DIS_WCACHE:
+                               case SETFEATURES_EN_DEFECT:
+                               case SETFEATURES_DIS_APM:
+                               case SETFEATURES_EN_MSN:
+                               case SETFEATURES_EN_RLA:
+                               case SETFEATURES_PREFETCH:
+                               case SETFEATURES_EN_RPOD:
+                               case SETFEATURES_DIS_RI:
+                               case SETFEATURES_DIS_SI:
+                               default:
+                                       return IDE_DRIVE_TASK_NO_DATA;
+                       }
+               case WIN_NOP:
+               case CFA_REQ_EXT_ERROR_CODE:
+               case CFA_ERASE_SECTORS:
+               case WIN_VERIFY:
+               case WIN_VERIFY_EXT:
+               case WIN_SEEK:
+               case WIN_SPECIFY:
+               case WIN_RESTORE:
+               case WIN_DIAGNOSE:
+               case WIN_FLUSH_CACHE:
+               case WIN_FLUSH_CACHE_EXT:
+               case WIN_STANDBYNOW1:
+               case WIN_STANDBYNOW2:
+               case WIN_SLEEPNOW1:
+               case WIN_SLEEPNOW2:
+               case WIN_SETIDLE1:
+               case DISABLE_SEAGATE:
+               case WIN_CHECKPOWERMODE1:
+               case WIN_CHECKPOWERMODE2:
+               case WIN_GETMEDIASTATUS:
+               case WIN_MEDIAEJECT:
+               case WIN_SETMULT:
+               case WIN_READ_NATIVE_MAX:
+               case WIN_SET_MAX:
+               case WIN_READ_NATIVE_MAX_EXT:
+               case WIN_SET_MAX_EXT:
+               case WIN_SECURITY_ERASE_PREPARE:
+               case WIN_SECURITY_FREEZE_LOCK:
+               case EXABYTE_ENABLE_NEST:
+               case WIN_DOORLOCK:
+               case WIN_DOORUNLOCK:
+                       return IDE_DRIVE_TASK_NO_DATA;
+               case WIN_FORMAT:
+               case WIN_INIT:
+               case WIN_DEVICE_RESET:
+               case WIN_QUEUED_SERVICE:
+               case WIN_PACKETCMD:
+               default:
+                       return IDE_DRIVE_TASK_INVALID;
+       }
+}
+
+/*
+ * This function is intended to be used prior to invoking ide_do_drive_cmd().
+ */
+void ide_init_drive_taskfile (struct request *rq)
+{
+       memset(rq, 0, sizeof(*rq));
+       rq->cmd = IDE_DRIVE_TASK_NO_DATA;
+}
+
+/*
+ * This is kept for internal use only !!!
+ * This is an internal call and nobody in user-space has a damn
+ * reason to call this taskfile.
+ *
+ * ide_raw_taskfile is the one that user-space executes.
+ */
+int ide_wait_taskfile (ide_drive_t *drive, struct hd_drive_task_hdr *taskfile, struct hd_drive_hob_hdr *hobfile, byte *buf)
+{
+       struct request rq;
+       ide_task_t args;
+
+       memset(&args, 0, sizeof(ide_task_t));
+
+       args.tfRegister[IDE_DATA_OFFSET]         = taskfile->data;
+       args.tfRegister[IDE_FEATURE_OFFSET]      = taskfile->feature;
+       args.tfRegister[IDE_NSECTOR_OFFSET]      = taskfile->sector_count;
+       args.tfRegister[IDE_SECTOR_OFFSET]       = taskfile->sector_number;
+       args.tfRegister[IDE_LCYL_OFFSET]         = taskfile->low_cylinder;
+       args.tfRegister[IDE_HCYL_OFFSET]         = taskfile->high_cylinder;
+       args.tfRegister[IDE_SELECT_OFFSET]       = taskfile->device_head;
+       args.tfRegister[IDE_COMMAND_OFFSET]      = taskfile->command;
+
+       args.hobRegister[IDE_DATA_OFFSET_HOB]    = hobfile->data;
+       args.hobRegister[IDE_FEATURE_OFFSET_HOB] = hobfile->feature;
+       args.hobRegister[IDE_NSECTOR_OFFSET_HOB] = hobfile->sector_count;
+       args.hobRegister[IDE_SECTOR_OFFSET_HOB]  = hobfile->sector_number;
+       args.hobRegister[IDE_LCYL_OFFSET_HOB]    = hobfile->low_cylinder;
+       args.hobRegister[IDE_HCYL_OFFSET_HOB]    = hobfile->high_cylinder;
+       args.hobRegister[IDE_SELECT_OFFSET_HOB]  = hobfile->device_head;
+       args.hobRegister[IDE_CONTROL_OFFSET_HOB] = hobfile->control;
+
+       ide_init_drive_taskfile(&rq);
+       /* This is kept for internal use only !!! */
+       args.command_type = ide_cmd_type_parser (&args);
+       if (args.command_type != IDE_DRIVE_TASK_NO_DATA)
+               rq.current_nr_sectors = rq.nr_sectors = (hobfile->sector_count << 8) | taskfile->sector_count;
+
+       rq.cmd = IDE_DRIVE_TASKFILE;
+       rq.buffer = buf;
+       rq.special = &args;
+       return ide_do_drive_cmd(drive, &rq, ide_wait);
+}
+
+int ide_raw_taskfile (ide_drive_t *drive, ide_task_t *args, byte *buf)
+{
+       struct request rq;
+       ide_init_drive_taskfile(&rq);
+       rq.cmd = IDE_DRIVE_TASKFILE;
+       rq.buffer = buf;
+
+       if (args->command_type != IDE_DRIVE_TASK_NO_DATA)
+               rq.current_nr_sectors = rq.nr_sectors = (args->hobRegister[IDE_NSECTOR_OFFSET_HOB] << 8) | args->tfRegister[IDE_NSECTOR_OFFSET];
+
+       rq.special = args;
+       return ide_do_drive_cmd(drive, &rq, ide_wait);
+}
+
+
+#ifdef CONFIG_IDE_TASK_IOCTL_DEBUG
+char * ide_ioctl_verbose (unsigned int cmd)
+{
+       return("unknown");
+}
+
+char * ide_task_cmd_verbose (byte task)
+{
+       return("unknown");
+}
+#endif /* CONFIG_IDE_TASK_IOCTL_DEBUG */
+
+/*
+ *  The taskfile glue table
+ *
+ *  reqtask.data_phase reqtask.req_cmd
+ *                     args.command_type               args.handler
+ *
+ *  TASKFILE_P_OUT_DMAQ        ??                              ??
+ *  TASKFILE_P_IN_DMAQ ??                              ??
+ *  TASKFILE_P_OUT_DMA ??                              ??
+ *  TASKFILE_P_IN_DMA  ??                              ??
+ *  TASKFILE_P_OUT     ??                              ??
+ *  TASKFILE_P_IN      ??                              ??
+ *
+ *  TASKFILE_OUT_DMAQ  IDE_DRIVE_TASK_RAW_WRITE        NULL
+ *  TASKFILE_IN_DMAQ   IDE_DRIVE_TASK_IN               NULL
+ *
+ *  TASKFILE_OUT_DMA   IDE_DRIVE_TASK_RAW_WRITE        NULL
+ *  TASKFILE_IN_DMA    IDE_DRIVE_TASK_IN               NULL
+ *
+ *  TASKFILE_IN_OUT    ??                              ??
+ *
+ *  TASKFILE_MULTI_OUT IDE_DRIVE_TASK_RAW_WRITE        task_mulout_intr
+ *  TASKFILE_MULTI_IN  IDE_DRIVE_TASK_IN               task_mulin_intr
+ *
+ *  TASKFILE_OUT       IDE_DRIVE_TASK_RAW_WRITE        task_out_intr
+ *  TASKFILE_OUT       IDE_DRIVE_TASK_OUT              task_out_intr
+ *
+ *  TASKFILE_IN                IDE_DRIVE_TASK_IN               task_in_intr
+ *  TASKFILE_NO_DATA   IDE_DRIVE_TASK_NO_DATA          task_no_data_intr
+ *
+ *                     IDE_DRIVE_TASK_SET_XFER         task_no_data_intr
+ *                     IDE_DRIVE_TASK_INVALID
+ *
+ */
+
+#define MAX_DMA                (256*SECTOR_WORDS)
+
+int ide_taskfile_ioctl (ide_drive_t *drive, struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg)
+{
+       ide_task_request_t      *req_task;
+       ide_task_t              args;
+
+       byte *outbuf            = NULL;
+       byte *inbuf             = NULL;
+       task_ioreg_t *argsptr   = args.tfRegister;
+       task_ioreg_t *hobsptr   = args.hobRegister;
+       int err                 = 0;
+       int tasksize            = sizeof(struct ide_task_request_s);
+       int taskin              = 0;
+       int taskout             = 0;
+
+       req_task = kmalloc(tasksize, GFP_KERNEL);
+       if (req_task == NULL) return -ENOMEM;
+       memset(req_task, 0, tasksize);
+       if (copy_from_user(req_task, (void *) arg, tasksize)) {
+               kfree(req_task);
+               return -EFAULT;
+       }
+
+       taskout = (int) req_task->out_size;
+       taskin  = (int) req_task->in_size;
+
+       if (taskout) {
+               int outtotal = tasksize;
+               outbuf = kmalloc(taskout, GFP_KERNEL);
+               if (outbuf == NULL) {
+                       err = -ENOMEM;
+                       goto abort;
+               }
+               memset(outbuf, 0, taskout);
+               if (copy_from_user(outbuf, (void *)arg + outtotal, taskout)) {
+                       err = -EFAULT;
+                       goto abort;
+               }
+       }
+
+       if (taskin) {
+               int intotal = tasksize + taskout;
+               inbuf = kmalloc(taskin, GFP_KERNEL);
+               if (inbuf == NULL) {
+                       err = -ENOMEM;
+                       goto abort;
+               }
+               memset(inbuf, 0, taskin);
+               if (copy_from_user(inbuf, (void *)arg + intotal , taskin)) {
+                       err = -EFAULT;
+                       goto abort;
+               }
+       }
+
+       memset(argsptr, 0, HDIO_DRIVE_TASK_HDR_SIZE);
+       memset(hobsptr, 0, HDIO_DRIVE_HOB_HDR_SIZE);
+       memcpy(argsptr, req_task->io_ports, HDIO_DRIVE_TASK_HDR_SIZE);
+       memcpy(hobsptr, req_task->hob_ports, HDIO_DRIVE_HOB_HDR_SIZE);
+
+       args.tf_in_flags  = req_task->in_flags;
+       args.tf_out_flags = req_task->out_flags;
+       args.data_phase   = req_task->data_phase;
+       args.command_type = req_task->req_cmd;
+
+#ifdef CONFIG_IDE_TASK_IOCTL_DEBUG
+       DTF("%s: ide_ioctl_cmd %s:  ide_task_cmd %s\n",
+               drive->name,
+               ide_ioctl_verbose(cmd),
+               ide_task_cmd_verbose(args.tfRegister[IDE_COMMAND_OFFSET]));
+#endif /* CONFIG_IDE_TASK_IOCTL_DEBUG */
+
+       switch(req_task->data_phase) {
+               case TASKFILE_OUT_DMAQ:
+               case TASKFILE_OUT_DMA:
+                       args.prehandler = NULL;
+                       args.handler = NULL;
+                       args.posthandler = NULL;
+                       err = ide_raw_taskfile(drive, &args, outbuf);
+                       break;
+               case TASKFILE_IN_DMAQ:
+               case TASKFILE_IN_DMA:
+                       args.prehandler = NULL;
+                       args.handler = NULL;
+                       args.posthandler = NULL;
+                       err = ide_raw_taskfile(drive, &args, inbuf);
+                       break;
+               case TASKFILE_IN_OUT:
+#if 0
+                       args.prehandler = &pre_task_out_intr;
+                       args.handler = &task_out_intr;
+                       args.posthandler = NULL;
+                       err = ide_raw_taskfile(drive, &args, outbuf);
+                       args.prehandler = NULL;
+                       args.handler = &task_in_intr;
+                       args.posthandler = NULL;
+                       err = ide_raw_taskfile(drive, &args, inbuf);
+                       break;
+#else
+                       err = -EFAULT;
+                       goto abort;
+#endif
+               case TASKFILE_MULTI_OUT:
+                       if (drive->mult_count) {
+                               args.prehandler = &pre_task_out_intr;
+                               args.handler = &task_mulout_intr;
+                               args.posthandler = NULL;
+                               err = ide_raw_taskfile(drive, &args, outbuf);
+                       } else {
+                               /* (hs): give up if multcount is not set */
+                               printk("%s: %s Multimode Write " \
+                                       "multcount is not set\n",
+                                        drive->name, __FUNCTION__);
+                               err = -EPERM;
+                               goto abort;
+                       }
+                       break;
+               case TASKFILE_OUT:
+                       args.prehandler = &pre_task_out_intr;
+                       args.handler = &task_out_intr;
+                       args.posthandler = NULL;
+                       err = ide_raw_taskfile(drive, &args, outbuf);
+                       break;
+               case TASKFILE_MULTI_IN:
+                       if (drive->mult_count) {
+                               args.prehandler = NULL;
+                               args.handler = &task_mulin_intr;
+                               args.posthandler = NULL;
+                               err = ide_raw_taskfile(drive, &args, inbuf);
+                       } else {
+                               /* (hs): give up if multcount is not set */
+                               printk("%s: %s Multimode Read failure " \
+                                       "multcount is not set\n",
+                                       drive->name, __FUNCTION__);
+                               err = -EPERM;
+                               goto abort;
+                       }
+                       break;
+               case TASKFILE_IN:
+                       args.prehandler = NULL;
+                       args.handler = &task_in_intr;
+                       args.posthandler = NULL;
+                       err = ide_raw_taskfile(drive, &args, inbuf);
+                       break;
+               case TASKFILE_NO_DATA:
+                       args.prehandler = NULL;
+                       args.handler = &task_no_data_intr;
+                       args.posthandler = NULL;
+                       err = ide_raw_taskfile(drive, &args, NULL);
+                       break;
+               default:
+                       args.prehandler = NULL;
+                       args.handler = NULL;
+                       args.posthandler = NULL;
+                       err = -EFAULT;
+                       goto abort;
+       }
+
+       memcpy(req_task->io_ports, &(args.tfRegister), HDIO_DRIVE_TASK_HDR_SIZE);
+       memcpy(req_task->hob_ports, &(args.hobRegister), HDIO_DRIVE_HOB_HDR_SIZE);
+       req_task->in_flags  = args.tf_in_flags;
+       req_task->out_flags = args.tf_out_flags;
+
+       if (copy_to_user((void *)arg, req_task, tasksize)) {
+               err = -EFAULT;
+               goto abort;
+       }
+       if (taskout) {
+               int outtotal = tasksize;
+               if (copy_to_user((void *)arg+outtotal, outbuf, taskout)) {
+                       err = -EFAULT;
+                       goto abort;
+               }
+       }
+       if (taskin) {
+               int intotal = tasksize + taskout;
+               if (copy_to_user((void *)arg+intotal, inbuf, taskin)) {
+                       err = -EFAULT;
+                       goto abort;
+               }
+       }
+abort:
+       kfree(req_task);
+       if (outbuf != NULL)
+               kfree(outbuf);
+       if (inbuf != NULL)
+               kfree(inbuf);
+       return err;
+}
+
+EXPORT_SYMBOL(task_read_24);
+EXPORT_SYMBOL(do_rw_taskfile);
+EXPORT_SYMBOL(do_taskfile);
+// EXPORT_SYMBOL(flagged_taskfile);
+
+//EXPORT_SYMBOL(ide_end_taskfile);
+
+EXPORT_SYMBOL(set_multmode_intr);
+EXPORT_SYMBOL(set_geometry_intr);
+EXPORT_SYMBOL(recal_intr);
+
+EXPORT_SYMBOL(task_no_data_intr);
+EXPORT_SYMBOL(task_in_intr);
+EXPORT_SYMBOL(task_mulin_intr);
+EXPORT_SYMBOL(pre_task_out_intr);
+EXPORT_SYMBOL(task_out_intr);
+EXPORT_SYMBOL(task_mulout_intr);
+
+EXPORT_SYMBOL(ide_init_drive_taskfile);
+EXPORT_SYMBOL(ide_wait_taskfile);
+EXPORT_SYMBOL(ide_raw_taskfile);
+EXPORT_SYMBOL(ide_pre_handler_parser);
+EXPORT_SYMBOL(ide_handler_parser);
+EXPORT_SYMBOL(ide_cmd_type_parser);
+EXPORT_SYMBOL(ide_taskfile_ioctl);
+
+#ifdef CONFIG_PKT_TASK_IOCTL
+
+#if 0
+{
+
+{ /* start cdrom */
+
+       struct cdrom_info *info = drive->driver_data;
+
+       if (info->dma) {
+               if (info->cmd == READ) {
+                       info->dma = !HWIF(drive)->dmaproc(ide_dma_read, drive);
+               } else if (info->cmd == WRITE) {
+                       info->dma = !HWIF(drive)->dmaproc(ide_dma_write, drive);
+               } else {
+                       printk("ide-cd: DMA set, but not allowed\n");
+               }
+       }
+
+       /* Set up the controller registers. */
+       OUT_BYTE (info->dma, IDE_FEATURE_REG);
+       OUT_BYTE (0, IDE_NSECTOR_REG);
+       OUT_BYTE (0, IDE_SECTOR_REG);
+
+       OUT_BYTE (xferlen & 0xff, IDE_LCYL_REG);
+       OUT_BYTE (xferlen >> 8  , IDE_HCYL_REG);
+       if (IDE_CONTROL_REG)
+               OUT_BYTE (drive->ctl, IDE_CONTROL_REG);
+
+       if (info->dma)
+               (void) (HWIF(drive)->dmaproc(ide_dma_begin, drive));
+
+       if (CDROM_CONFIG_FLAGS (drive)->drq_interrupt) {
+               ide_set_handler (drive, handler, WAIT_CMD, cdrom_timer_expiry);
+               OUT_BYTE (WIN_PACKETCMD, IDE_COMMAND_REG); /* packet command */
+               return ide_started;
+       } else {
+               OUT_BYTE (WIN_PACKETCMD, IDE_COMMAND_REG); /* packet command */
+               return (*handler) (drive);
+       }
+
+} /* end cdrom */
+
+{ /* start floppy */
+
+       idefloppy_floppy_t *floppy = drive->driver_data;
+       idefloppy_bcount_reg_t bcount;
+       int dma_ok = 0;
+
+       floppy->pc=pc;          /* Set the current packet command */
+
+       pc->retries++;
+       pc->actually_transferred=0; /* We haven't transferred any data yet */
+       pc->current_position=pc->buffer;
+       bcount.all = IDE_MIN(pc->request_transfer, 63 * 1024);
+
+#ifdef CONFIG_BLK_DEV_IDEDMA
+       if (test_and_clear_bit (PC_DMA_ERROR, &pc->flags)) {
+               (void) HWIF(drive)->dmaproc(ide_dma_off, drive);
+       }
+       if (test_bit (PC_DMA_RECOMMENDED, &pc->flags) && drive->using_dma)
+               dma_ok=!HWIF(drive)->dmaproc(test_bit (PC_WRITING, &pc->flags) ? ide_dma_write : ide_dma_read, drive);
+#endif /* CONFIG_BLK_DEV_IDEDMA */
+
+       if (IDE_CONTROL_REG)
+               OUT_BYTE (drive->ctl,IDE_CONTROL_REG);
+       OUT_BYTE (dma_ok ? 1:0,IDE_FEATURE_REG);        /* Use PIO/DMA */
+       OUT_BYTE (bcount.b.high,IDE_BCOUNTH_REG);
+       OUT_BYTE (bcount.b.low,IDE_BCOUNTL_REG);
+       OUT_BYTE (drive->select.all,IDE_SELECT_REG);
+
+#ifdef CONFIG_BLK_DEV_IDEDMA
+       if (dma_ok) {   /* Begin DMA, if necessary */
+               set_bit (PC_DMA_IN_PROGRESS, &pc->flags);
+               (void) (HWIF(drive)->dmaproc(ide_dma_begin, drive));
+       }
+#endif /* CONFIG_BLK_DEV_IDEDMA */
+
+} /* end floppy */
+
+{ /* start tape */
+
+       idetape_tape_t *tape = drive->driver_data;
+
+#ifdef CONFIG_BLK_DEV_IDEDMA
+       if (test_and_clear_bit (PC_DMA_ERROR, &pc->flags)) {
+               printk (KERN_WARNING "ide-tape: DMA disabled, reverting to PIO\n");
+               (void) HWIF(drive)->dmaproc(ide_dma_off, drive);
+       }
+       if (test_bit (PC_DMA_RECOMMENDED, &pc->flags) && drive->using_dma)
+               dma_ok=!HWIF(drive)->dmaproc(test_bit (PC_WRITING, &pc->flags) ? ide_dma_write : ide_dma_read, drive);
+#endif /* CONFIG_BLK_DEV_IDEDMA */
+
+       if (IDE_CONTROL_REG)
+               OUT_BYTE (drive->ctl,IDE_CONTROL_REG);
+       OUT_BYTE (dma_ok ? 1:0,IDE_FEATURE_REG);        /* Use PIO/DMA */
+       OUT_BYTE (bcount.b.high,IDE_BCOUNTH_REG);
+       OUT_BYTE (bcount.b.low,IDE_BCOUNTL_REG);
+       OUT_BYTE (drive->select.all,IDE_SELECT_REG);
+#ifdef CONFIG_BLK_DEV_IDEDMA
+       if (dma_ok) {   /* Begin DMA, if necessary */
+               set_bit (PC_DMA_IN_PROGRESS, &pc->flags);
+               (void) (HWIF(drive)->dmaproc(ide_dma_begin, drive));
+       }
+#endif /* CONFIG_BLK_DEV_IDEDMA */
+       if (test_bit(IDETAPE_DRQ_INTERRUPT, &tape->flags)) {
+               ide_set_handler(drive, &idetape_transfer_pc, IDETAPE_WAIT_CMD, NULL);
+               OUT_BYTE(WIN_PACKETCMD, IDE_COMMAND_REG);
+               return ide_started;
+       } else {
+               OUT_BYTE(WIN_PACKETCMD, IDE_COMMAND_REG);
+               return idetape_transfer_pc(drive);
+       }
+
+} /* end tape */
+
+}
+#endif
+
+int pkt_taskfile_ioctl (ide_drive_t *drive, struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg)
+{
+#if 0
+       switch(req_task->data_phase) {
+               case TASKFILE_P_OUT_DMAQ:
+               case TASKFILE_P_IN_DMAQ:
+               case TASKFILE_P_OUT_DMA:
+               case TASKFILE_P_IN_DMA:
+               case TASKFILE_P_OUT:
+               case TASKFILE_P_IN:
+       }
+#endif
+       return -ENOMSG;
+}
+
+EXPORT_SYMBOL(pkt_taskfile_ioctl);
+
+#endif /* CONFIG_PKT_TASK_IOCTL */
diff --git a/xen-2.4.16/drivers/ide/ide.c b/xen-2.4.16/drivers/ide/ide.c
new file mode 100644 (file)
index 0000000..af3694b
--- /dev/null
@@ -0,0 +1,4167 @@
+/*
+ *  linux/drivers/ide/ide.c            Version 6.31    June 9, 2000
+ *
+ *  Copyright (C) 1994-1998  Linus Torvalds & authors (see below)
+ */
+
+/*
+ *  Mostly written by Mark Lord  <mlord@pobox.com>
+ *                and Gadi Oxman <gadio@netvision.net.il>
+ *                and Andre Hedrick <andre@linux-ide.org>
+ *
+ *  See linux/MAINTAINERS for address of current maintainer.
+ *
+ * This is the multiple IDE interface driver, as evolved from hd.c.
+ * It supports up to MAX_HWIFS IDE interfaces, on one or more IRQs (usually 14 & 15).
+ * There can be up to two drives per interface, as per the ATA-2 spec.
+ *
+ * Primary:    ide0, port 0x1f0; major=3;  hda is minor=0; hdb is minor=64
+ * Secondary:  ide1, port 0x170; major=22; hdc is minor=0; hdd is minor=64
+ * Tertiary:   ide2, port 0x???; major=33; hde is minor=0; hdf is minor=64
+ * Quaternary: ide3, port 0x???; major=34; hdg is minor=0; hdh is minor=64
+ * ...
+ *
+ *  From hd.c:
+ *  |
+ *  | It traverses the request-list, using interrupts to jump between functions.
+ *  | As nearly all functions can be called within interrupts, we may not sleep.
+ *  | Special care is recommended.  Have Fun!
+ *  |
+ *  | modified by Drew Eckhardt to check nr of hd's from the CMOS.
+ *  |
+ *  | Thanks to Branko Lankester, lankeste@fwi.uva.nl, who found a bug
+ *  | in the early extended-partition checks and added DM partitions.
+ *  |
+ *  | Early work on error handling by Mika Liljeberg (liljeber@cs.Helsinki.FI).
+ *  |
+ *  | IRQ-unmask, drive-id, multiple-mode, support for ">16 heads",
+ *  | and general streamlining by Mark Lord (mlord@pobox.com).
+ *
+ *  October, 1994 -- Complete line-by-line overhaul for linux 1.1.x, by:
+ *
+ *     Mark Lord       (mlord@pobox.com)               (IDE Perf.Pkg)
+ *     Delman Lee      (delman@ieee.org)               ("Mr. atdisk2")
+ *     Scott Snyder    (snyder@fnald0.fnal.gov)        (ATAPI IDE cd-rom)
+ *
+ *  This was a rewrite of just about everything from hd.c, though some original
+ *  code is still sprinkled about.  Think of it as a major evolution, with
+ *  inspiration from lots of linux users, esp.  hamish@zot.apana.org.au
+ *
+ *  Version 1.0 ALPHA  initial code, primary i/f working okay
+ *  Version 1.3 BETA   dual i/f on shared irq tested & working!
+ *  Version 1.4 BETA   added auto probing for irq(s)
+ *  Version 1.5 BETA   added ALPHA (untested) support for IDE cd-roms,
+ *  ...
+ * Version 5.50                allow values as small as 20 for idebus=
+ * Version 5.51                force non io_32bit in drive_cmd_intr()
+ *                     change delay_10ms() to delay_50ms() to fix problems
+ * Version 5.52                fix incorrect invalidation of removable devices
+ *                     add "hdx=slow" command line option
+ * Version 5.60                start to modularize the driver; the disk and ATAPI
+ *                      drivers can be compiled as loadable modules.
+ *                     move IDE probe code to ide-probe.c
+ *                     move IDE disk code to ide-disk.c
+ *                     add support for generic IDE device subdrivers
+ *                     add m68k code from Geert Uytterhoeven
+ *                     probe all interfaces by default
+ *                     add ioctl to (re)probe an interface
+ * Version 6.00                use per device request queues
+ *                     attempt to optimize shared hwgroup performance
+ *                     add ioctl to manually adjust bandwidth algorithms
+ *                     add kerneld support for the probe module
+ *                     fix bug in ide_error()
+ *                     fix bug in the first ide_get_lock() call for Atari
+ *                     don't flush leftover data for ATAPI devices
+ * Version 6.01                clear hwgroup->active while the hwgroup sleeps
+ *                     support HDIO_GETGEO for floppies
+ * Version 6.02                fix ide_ack_intr() call
+ *                     check partition table on floppies
+ * Version 6.03                handle bad status bit sequencing in ide_wait_stat()
+ * Version 6.10                deleted old entries from this list of updates
+ *                     replaced triton.c with ide-dma.c generic PCI DMA
+ *                     added support for BIOS-enabled UltraDMA
+ *                     rename all "promise" things to "pdc4030"
+ *                     fix EZ-DRIVE handling on small disks
+ * Version 6.11                fix probe error in ide_scan_devices()
+ *                     fix ancient "jiffies" polling bugs
+ *                     mask all hwgroup interrupts on each irq entry
+ * Version 6.12                integrate ioctl and proc interfaces
+ *                     fix parsing of "idex=" command line parameter
+ * Version 6.13                add support for ide4/ide5 courtesy rjones@orchestream.com
+ * Version 6.14                fixed IRQ sharing among PCI devices
+ * Version 6.15                added SMP awareness to IDE drivers
+ * Version 6.16                fixed various bugs; even more SMP friendly
+ * Version 6.17                fix for newest EZ-Drive problem
+ * Version 6.18                default unpartitioned-disk translation now "BIOS LBA"
+ * Version 6.19                Re-design for a UNIFORM driver for all platforms,
+ *                       model based on suggestions from Russell King and
+ *                       Geert Uytterhoeven
+ *                     Promise DC4030VL now supported.
+ *                     add support for ide6/ide7
+ *                     delay_50ms() changed to ide_delay_50ms() and exported.
+ * Version 6.20                Added/Fixed Generic ATA-66 support and hwif detection.
+ *                     Added hdx=flash to allow for second flash disk
+ *                       detection w/o the hang loop.
+ *                     Added support for ide8/ide9
+ *                     Added idex=ata66 for the quirky chipsets that are
+ *                       ATA-66 compliant, but have yet to determine a method
+ *                       of verification of the 80c cable presence.
+ *                       Specifically Promise's PDC20262 chipset.
+ * Version 6.21                Fixing/Fixed SMP spinlock issue with insight from an old
+ *                       hat that clarified original low level driver design.
+ * Version 6.30                Added SMP support; fixed multmode issues.  -ml
+ * Version 6.31                Debug Share INTR's and request queue streaming
+ *                     Native ATA-100 support
+ *                     Prep for Cascades Project
+ *
+ *  Some additional driver compile-time options are in ./include/linux/ide.h
+ *
+ *  To do, in likely order of completion:
+ *     - modify kernel to obtain BIOS geometry for drives on 2nd/3rd/4th i/f
+ *
+ */
+
+#define        REVISION        "Revision: 6.31"
+#define        VERSION         "Id: ide.c 6.31 2000/06/09"
+
+#undef REALLY_SLOW_IO          /* most systems can safely undef this */
+
+#define _IDE_C                 /* Tell ide.h it's really us */
+
+#include <xeno/config.h>
+#include <xeno/module.h>
+#include <xeno/types.h>
+#include <xeno/lib.h>
+/*#include <xeno/kernel.h>*/
+#include <xeno/timer.h>
+#include <xeno/mm.h>
+#include <xeno/interrupt.h>
+#include <xeno/major.h>
+#include <xeno/errno.h>
+#include <xeno/genhd.h>
+#include <xeno/blkpg.h>
+#include <xeno/slab.h>
+#include <xeno/init.h>
+#include <xeno/pci.h>
+#include <xeno/delay.h>
+#include <xeno/ide.h>
+/*#include <xeno/devfs_fs_kernel.h>*/
+/*#include <xeno/completion.h>*/
+/*#include <xeno/reboot.h>*/
+
+#include <asm/byteorder.h>
+#include <asm/irq.h>
+#include <asm/uaccess.h>
+#include <asm/io.h>
+#include <asm/bitops.h>
+
+#include "ide_modes.h"
+
+#ifdef CONFIG_KMOD
+#include <xeno/kmod.h>
+#endif /* CONFIG_KMOD */
+
+#ifdef CONFIG_IDE_TASKFILE_IO
+#  define __TASKFILE__IO
+#else /* CONFIG_IDE_TASKFILE_IO */
+#  undef __TASKFILE__IO
+#endif /* CONFIG_IDE_TASKFILE_IO */
+
+#ifdef __TASKFILE__IO
+#else /* !__TASKFILE__IO */
+#endif /* __TASKFILE__IO */
+
+
+
+
+/* XXXXXXXXXXXX This may be replaced by fs/block_dev.c versions!!! XXXXX */
+/* (only included here so the hypervisor will link :-) */
+int check_disk_change(kdev_t dev) { return 0; }
+int unregister_blkdev(unsigned int major, const char * name) { return 0; }
+/* And these ones are from fs/inode.c... */
+int invalidate_device(kdev_t dev, int do_sync) { return 0; }
+/* fs/buffer.c... */
+void invalidate_bdev(struct block_device *bdev, int destroy_dirty_buffers) { }
+/* fs/partitions/check.c... */
+void grok_partitions(struct gendisk *dev, int drive, 
+                     unsigned minors, long size) { }
+void register_disk(struct gendisk *dev, kdev_t first, 
+                   unsigned minors, struct block_device_operations *ops, 
+                   long size) { }
+/* fs/devices.c... */
+const char * kdevname(kdev_t dev) { return NULL; }
+/* End of XXXXXX region */
+
+
+
+
+/* default maximum number of failures */
+#define IDE_DEFAULT_MAX_FAILURES       1
+
+static const byte ide_hwif_to_major[] = { IDE0_MAJOR, IDE1_MAJOR, IDE2_MAJOR, IDE3_MAJOR, IDE4_MAJOR, IDE5_MAJOR, IDE6_MAJOR, IDE7_MAJOR, IDE8_MAJOR, IDE9_MAJOR };
+
+static int     idebus_parameter; /* holds the "idebus=" parameter */
+static int     system_bus_speed; /* holds what we think is VESA/PCI bus speed */
+static int     initializing;     /* set while initializing built-in drivers */
+
+#ifdef CONFIG_BLK_DEV_IDEPCI
+static int     ide_scan_direction;     /* THIS was formerly 2.2.x pci=reverse */
+#endif /* CONFIG_BLK_DEV_IDEPCI */
+
+#if defined(__mc68000__) || defined(CONFIG_APUS)
+/*
+ * ide_lock is used by the Atari code to obtain access to the IDE interrupt,
+ * which is shared between several drivers.
+ */
+static int     ide_lock;
+#endif /* __mc68000__ || CONFIG_APUS */
+
+int noautodma = 0;
+
+/*
+ * ide_modules keeps track of the available IDE chipset/probe/driver modules.
+ */
+ide_module_t *ide_modules;
+ide_module_t *ide_probe;
+
+/*
+ * This is declared extern in ide.h, for access by other IDE modules:
+ */
+ide_hwif_t     ide_hwifs[MAX_HWIFS];   /* master data repository */
+
+#if (DISK_RECOVERY_TIME > 0)
+/*
+ * For really screwy hardware (hey, at least it *can* be used with Linux)
+ * we can enforce a minimum delay time between successive operations.
+ */
+static unsigned long read_timer (void)
+{
+       unsigned long t, flags;
+       int i;
+
+       __save_flags(flags);    /* local CPU only */
+       __cli();                /* local CPU only */
+       t = jiffies * 11932;
+       outb_p(0, 0x43);
+       i = inb_p(0x40);
+       i |= inb(0x40) << 8;
+       __restore_flags(flags); /* local CPU only */
+       return (t - i);
+}
+#endif /* DISK_RECOVERY_TIME */
+
+static inline void set_recovery_timer (ide_hwif_t *hwif)
+{
+#if (DISK_RECOVERY_TIME > 0)
+       hwif->last_time = read_timer();
+#endif /* DISK_RECOVERY_TIME */
+}
+
+/*
+ * Do not even *think* about calling this!
+ */
+static void init_hwif_data (unsigned int index)
+{
+       unsigned int unit;
+       hw_regs_t hw;
+       ide_hwif_t *hwif = &ide_hwifs[index];
+
+       /* bulk initialize hwif & drive info with zeros */
+       memset(hwif, 0, sizeof(ide_hwif_t));
+       memset(&hw, 0, sizeof(hw_regs_t));
+
+       /* fill in any non-zero initial values */
+       hwif->index     = index;
+       ide_init_hwif_ports(&hw, ide_default_io_base(index), 0, &hwif->irq);
+       memcpy(&hwif->hw, &hw, sizeof(hw));
+       memcpy(hwif->io_ports, hw.io_ports, sizeof(hw.io_ports));
+       hwif->noprobe   = !hwif->io_ports[IDE_DATA_OFFSET];
+#ifdef CONFIG_BLK_DEV_HD
+       if (hwif->io_ports[IDE_DATA_OFFSET] == HD_DATA)
+               hwif->noprobe = 1; /* may be overridden by ide_setup() */
+#endif /* CONFIG_BLK_DEV_HD */
+       hwif->major     = ide_hwif_to_major[index];
+       hwif->name[0]   = 'i';
+       hwif->name[1]   = 'd';
+       hwif->name[2]   = 'e';
+       hwif->name[3]   = '0' + index;
+       hwif->bus_state = BUSSTATE_ON;
+       for (unit = 0; unit < MAX_DRIVES; ++unit) {
+               ide_drive_t *drive = &hwif->drives[unit];
+
+               drive->media                    = ide_disk;
+               drive->select.all               = (unit<<4)|0xa0;
+               drive->hwif                     = hwif;
+               drive->ctl                      = 0x08;
+               drive->ready_stat               = READY_STAT;
+               drive->bad_wstat                = BAD_W_STAT;
+               drive->special.b.recalibrate    = 1;
+               drive->special.b.set_geometry   = 1;
+               drive->name[0]                  = 'h';
+               drive->name[1]                  = 'd';
+               drive->name[2]                  = 'a' + (index * MAX_DRIVES) + unit;
+               drive->max_failures             = IDE_DEFAULT_MAX_FAILURES;
+               /*init_waitqueue_head(&drive->wqueue);*/
+       }
+}
+
+/*
+ * init_ide_data() sets reasonable default values into all fields
+ * of all instances of the hwifs and drives, but only on the first call.
+ * Subsequent calls have no effect (they don't wipe out anything).
+ *
+ * This routine is normally called at driver initialization time,
+ * but may also be called MUCH earlier during kernel "command-line"
+ * parameter processing.  As such, we cannot depend on any other parts
+ * of the kernel (such as memory allocation) to be functioning yet.
+ *
+ * This is too bad, as otherwise we could dynamically allocate the
+ * ide_drive_t structs as needed, rather than always consuming memory
+ * for the max possible number (MAX_HWIFS * MAX_DRIVES) of them.
+ */
+#define MAGIC_COOKIE 0x12345678
+static void __init init_ide_data (void)
+{
+       unsigned int index;
+       static unsigned long magic_cookie = MAGIC_COOKIE;
+
+       if (magic_cookie != MAGIC_COOKIE)
+               return;         /* already initialized */
+       magic_cookie = 0;
+
+       /* Initialise all interface structures */
+       for (index = 0; index < MAX_HWIFS; ++index)
+               init_hwif_data(index);
+
+       /* Add default hw interfaces */
+       ide_init_default_hwifs();
+
+       idebus_parameter = 0;
+       system_bus_speed = 0;
+}
+
+/*
+ * CompactFlash cards and their brethern pretend to be removable hard disks, except:
+ *     (1) they never have a slave unit, and
+ *     (2) they don't have doorlock mechanisms.
+ * This test catches them, and is invoked elsewhere when setting appropriate config bits.
+ *
+ * FIXME: This treatment is probably applicable for *all* PCMCIA (PC CARD) devices,
+ * so in linux 2.3.x we should change this to just treat all PCMCIA drives this way,
+ * and get rid of the model-name tests below (too big of an interface change for 2.2.x).
+ * At that time, we might also consider parameterizing the timeouts and retries,
+ * since these are MUCH faster than mechanical drives. -M.Lord
+ */
+int drive_is_flashcard (ide_drive_t *drive)
+{
+       struct hd_driveid *id = drive->id;
+
+       if (drive->removable && id != NULL) {
+               if (id->config == 0x848a) return 1;     /* CompactFlash */
+               if (!strncmp(id->model, "KODAK ATA_FLASH", 15)  /* Kodak */
+                || !strncmp(id->model, "Hitachi CV", 10)       /* Hitachi */
+                || !strncmp(id->model, "SunDisk SDCFB", 13)    /* SunDisk */
+                || !strncmp(id->model, "HAGIWARA HPC", 12)     /* Hagiwara */
+                || !strncmp(id->model, "LEXAR ATA_FLASH", 15)  /* Lexar */
+                || !strncmp(id->model, "ATA_FLASH", 9))        /* Simple Tech */
+               {
+                       return 1;       /* yes, it is a flash memory card */
+               }
+       }
+       return 0;       /* no, it is not a flash memory card */
+}
+
+/*
+ * ide_system_bus_speed() returns what we think is the system VESA/PCI
+ * bus speed (in MHz).  This is used for calculating interface PIO timings.
+ * The default is 40 for known PCI systems, 50 otherwise.
+ * The "idebus=xx" parameter can be used to override this value.
+ * The actual value to be used is computed/displayed the first time through.
+ */
+int ide_system_bus_speed (void)
+{
+       if (!system_bus_speed) {
+               if (idebus_parameter)
+                       system_bus_speed = idebus_parameter;    /* user supplied value */
+#ifdef CONFIG_PCI
+               else if (pci_present())
+                       system_bus_speed = 33;  /* safe default value for PCI */
+#endif /* CONFIG_PCI */
+               else
+                       system_bus_speed = 50;  /* safe default value for VESA and PCI */
+               printk("ide: Assuming %dMHz system bus speed for PIO modes%s\n", system_bus_speed,
+                       idebus_parameter ? "" : "; override with idebus=xx");
+       }
+       return system_bus_speed;
+}
+
+#if SUPPORT_VLB_SYNC
+/*
+ * Some localbus EIDE interfaces require a special access sequence
+ * when using 32-bit I/O instructions to transfer data.  We call this
+ * the "vlb_sync" sequence, which consists of three successive reads
+ * of the sector count register location, with interrupts disabled
+ * to ensure that the reads all happen together.
+ */
+static inline void do_vlb_sync (ide_ioreg_t port) {
+       (void) inb (port);
+       (void) inb (port);
+       (void) inb (port);
+}
+#endif /* SUPPORT_VLB_SYNC */
+
+/*
+ * This is used for most PIO data transfers *from* the IDE interface
+ */
+void ide_input_data (ide_drive_t *drive, void *buffer, unsigned int wcount)
+{
+       byte io_32bit;
+
+       /* first check if this controller has defined a special function
+        * for handling polled ide transfers
+        */
+
+       if(HWIF(drive)->ideproc) {
+               HWIF(drive)->ideproc(ideproc_ide_input_data,
+                                    drive, buffer, wcount);
+               return;
+       }
+
+       io_32bit = drive->io_32bit;
+
+       if (io_32bit) {
+#if SUPPORT_VLB_SYNC
+               if (io_32bit & 2) {
+                       unsigned long flags;
+                       __save_flags(flags);    /* local CPU only */
+                       __cli();                /* local CPU only */
+                       do_vlb_sync(IDE_NSECTOR_REG);
+                       insl(IDE_DATA_REG, buffer, wcount);
+                       __restore_flags(flags); /* local CPU only */
+               } else
+#endif /* SUPPORT_VLB_SYNC */
+                       insl(IDE_DATA_REG, buffer, wcount);
+       } else {
+#if SUPPORT_SLOW_DATA_PORTS
+               if (drive->slow) {
+                       unsigned short *ptr = (unsigned short *) buffer;
+                       while (wcount--) {
+                               *ptr++ = inw_p(IDE_DATA_REG);
+                               *ptr++ = inw_p(IDE_DATA_REG);
+                       }
+               } else
+#endif /* SUPPORT_SLOW_DATA_PORTS */
+                       insw(IDE_DATA_REG, buffer, wcount<<1);
+       }
+}
+
+/*
+ * This is used for most PIO data transfers *to* the IDE interface
+ */
+void ide_output_data (ide_drive_t *drive, void *buffer, unsigned int wcount)
+{
+       byte io_32bit;
+
+       if(HWIF(drive)->ideproc) {
+               HWIF(drive)->ideproc(ideproc_ide_output_data,
+                                    drive, buffer, wcount);
+               return;
+       }
+
+       io_32bit = drive->io_32bit;
+
+       if (io_32bit) {
+#if SUPPORT_VLB_SYNC
+               if (io_32bit & 2) {
+                       unsigned long flags;
+                       __save_flags(flags);    /* local CPU only */
+                       __cli();                /* local CPU only */
+                       do_vlb_sync(IDE_NSECTOR_REG);
+                       outsl(IDE_DATA_REG, buffer, wcount);
+                       __restore_flags(flags); /* local CPU only */
+               } else
+#endif /* SUPPORT_VLB_SYNC */
+                       outsl(IDE_DATA_REG, buffer, wcount);
+       } else {
+#if SUPPORT_SLOW_DATA_PORTS
+               if (drive->slow) {
+                       unsigned short *ptr = (unsigned short *) buffer;
+                       while (wcount--) {
+                               outw_p(*ptr++, IDE_DATA_REG);
+                               outw_p(*ptr++, IDE_DATA_REG);
+                       }
+               } else
+#endif /* SUPPORT_SLOW_DATA_PORTS */
+                       outsw(IDE_DATA_REG, buffer, wcount<<1);
+       }
+}
+
+/*
+ * The following routines are mainly used by the ATAPI drivers.
+ *
+ * These routines will round up any request for an odd number of bytes,
+ * so if an odd bytecount is specified, be sure that there's at least one
+ * extra byte allocated for the buffer.
+ */
+void atapi_input_bytes (ide_drive_t *drive, void *buffer, unsigned int bytecount)
+{
+       if(HWIF(drive)->ideproc) {
+               HWIF(drive)->ideproc(ideproc_atapi_input_bytes,
+                                    drive, buffer, bytecount);
+               return;
+       }
+
+       ++bytecount;
+#if defined(CONFIG_ATARI) || defined(CONFIG_Q40)
+       if (MACH_IS_ATARI || MACH_IS_Q40) {
+               /* Atari has a byte-swapped IDE interface */
+               insw_swapw(IDE_DATA_REG, buffer, bytecount / 2);
+               return;
+       }
+#endif /* CONFIG_ATARI */
+       ide_input_data (drive, buffer, bytecount / 4);
+       if ((bytecount & 0x03) >= 2)
+               insw (IDE_DATA_REG, ((byte *)buffer) + (bytecount & ~0x03), 1);
+}
+
+void atapi_output_bytes (ide_drive_t *drive, void *buffer, unsigned int bytecount)
+{
+       if(HWIF(drive)->ideproc) {
+               HWIF(drive)->ideproc(ideproc_atapi_output_bytes,
+                                    drive, buffer, bytecount);
+               return;
+       }
+
+       ++bytecount;
+#if defined(CONFIG_ATARI) || defined(CONFIG_Q40)
+       if (MACH_IS_ATARI || MACH_IS_Q40) {
+               /* Atari has a byte-swapped IDE interface */
+               outsw_swapw(IDE_DATA_REG, buffer, bytecount / 2);
+               return;
+       }
+#endif /* CONFIG_ATARI */
+       ide_output_data (drive, buffer, bytecount / 4);
+       if ((bytecount & 0x03) >= 2)
+               outsw (IDE_DATA_REG, ((byte *)buffer) + (bytecount & ~0x03), 1);
+}
+
+/*
+ * Needed for PCI irq sharing
+ */
+//static inline
+int drive_is_ready (ide_drive_t *drive)
+{
+       byte stat = 0;
+       if (drive->waiting_for_dma)
+               return HWIF(drive)->dmaproc(ide_dma_test_irq, drive);
+#if 0
+       udelay(1);      /* need to guarantee 400ns since last command was issued */
+#endif
+
+#ifdef CONFIG_IDEPCI_SHARE_IRQ
+       /*
+        * We do a passive status test under shared PCI interrupts on
+        * cards that truly share the ATA side interrupt, but may also share
+        * an interrupt with another pci card/device.  We make no assumptions
+        * about possible isa-pnp and pci-pnp issues yet.
+        */
+       if (IDE_CONTROL_REG)
+               stat = GET_ALTSTAT();
+       else
+#endif /* CONFIG_IDEPCI_SHARE_IRQ */
+       stat = GET_STAT();      /* Note: this may clear a pending IRQ!! */
+
+       if (stat & BUSY_STAT)
+               return 0;       /* drive busy:  definitely not interrupting */
+       return 1;               /* drive ready: *might* be interrupting */
+}
+
+/*
+ * This is our end_request replacement function.
+ */
+void ide_end_request (byte uptodate, ide_hwgroup_t *hwgroup)
+{
+       struct request *rq;
+       unsigned long flags;
+       ide_drive_t *drive = hwgroup->drive;
+
+       spin_lock_irqsave(&io_request_lock, flags);
+       rq = hwgroup->rq;
+
+       /*
+        * decide whether to reenable DMA -- 3 is a random magic for now,
+        * if we DMA timeout more than 3 times, just stay in PIO
+        */
+       if (drive->state == DMA_PIO_RETRY && drive->retry_pio <= 3) {
+               drive->state = 0;
+               hwgroup->hwif->dmaproc(ide_dma_on, drive);
+       }
+
+       if (!end_that_request_first(rq, uptodate, hwgroup->drive->name)) {
+               add_blkdev_randomness(MAJOR(rq->rq_dev));
+               blkdev_dequeue_request(rq);
+               hwgroup->rq = NULL;
+               end_that_request_last(rq);
+       }
+       spin_unlock_irqrestore(&io_request_lock, flags);
+}
+
+/*
+ * This should get invoked any time we exit the driver to
+ * wait for an interrupt response from a drive.  handler() points
+ * at the appropriate code to handle the next interrupt, and a
+ * timer is started to prevent us from waiting forever in case
+ * something goes wrong (see the ide_timer_expiry() handler later on).
+ */
+void ide_set_handler (ide_drive_t *drive, ide_handler_t *handler,
+                     unsigned int timeout, ide_expiry_t *expiry)
+{
+       unsigned long flags;
+       ide_hwgroup_t *hwgroup = HWGROUP(drive);
+
+       spin_lock_irqsave(&io_request_lock, flags);
+       if (hwgroup->handler != NULL) {
+               printk("%s: ide_set_handler: handler not null; old=%p, new=%p\n",
+                       drive->name, hwgroup->handler, handler);
+       }
+       hwgroup->handler        = handler;
+       hwgroup->expiry         = expiry;
+       hwgroup->timer.expires  = jiffies + timeout;
+       add_timer(&hwgroup->timer);
+       spin_unlock_irqrestore(&io_request_lock, flags);
+}
+
+/*
+ * current_capacity() returns the capacity (in sectors) of a drive
+ * according to its current geometry/LBA settings.
+ */
+unsigned long current_capacity (ide_drive_t *drive)
+{
+       if (!drive->present)
+               return 0;
+       if (drive->driver != NULL)
+               return DRIVER(drive)->capacity(drive);
+       return 0;
+}
+
+extern struct block_device_operations ide_fops[];
+/*
+ * ide_geninit() is called exactly *once* for each interface.
+ */
+void ide_geninit (ide_hwif_t *hwif)
+{
+       unsigned int unit;
+       struct gendisk *gd = hwif->gd;
+
+       for (unit = 0; unit < MAX_DRIVES; ++unit) {
+               ide_drive_t *drive = &hwif->drives[unit];
+
+               if (!drive->present)
+                       continue;
+               if (drive->media!=ide_disk && drive->media!=ide_floppy)
+                       continue;
+               register_disk(gd,MKDEV(hwif->major,unit<<PARTN_BITS),
+#ifdef CONFIG_BLK_DEV_ISAPNP
+                       (drive->forced_geom && drive->noprobe) ? 1 :
+#endif /* CONFIG_BLK_DEV_ISAPNP */
+                       1<<PARTN_BITS, ide_fops,
+                       current_capacity(drive));
+       }
+}
+
+static ide_startstop_t do_reset1 (ide_drive_t *, int);         /* needed below */
+
+/*
+ * atapi_reset_pollfunc() gets invoked to poll the interface for completion every 50ms
+ * during an atapi drive reset operation. If the drive has not yet responded,
+ * and we have not yet hit our maximum waiting time, then the timer is restarted
+ * for another 50ms.
+ */
+static ide_startstop_t atapi_reset_pollfunc (ide_drive_t *drive)
+{
+       ide_hwgroup_t *hwgroup = HWGROUP(drive);
+       byte stat;
+
+       SELECT_DRIVE(HWIF(drive),drive);
+       udelay (10);
+
+       if (OK_STAT(stat=GET_STAT(), 0, BUSY_STAT)) {
+               printk("%s: ATAPI reset complete\n", drive->name);
+       } else {
+               if (0 < (signed long)(hwgroup->poll_timeout - jiffies)) {
+                       ide_set_handler (drive, &atapi_reset_pollfunc, HZ/20, NULL);
+                       return ide_started;     /* continue polling */
+               }
+               hwgroup->poll_timeout = 0;      /* end of polling */
+               printk("%s: ATAPI reset timed-out, status=0x%02x\n", drive->name, stat);
+               return do_reset1 (drive, 1);    /* do it the old fashioned way */
+       }
+       hwgroup->poll_timeout = 0;      /* done polling */
+       return ide_stopped;
+}
+
+/*
+ * reset_pollfunc() gets invoked to poll the interface for completion every 50ms
+ * during an ide reset operation. If the drives have not yet responded,
+ * and we have not yet hit our maximum waiting time, then the timer is restarted
+ * for another 50ms.
+ */
+static ide_startstop_t reset_pollfunc (ide_drive_t *drive)
+{
+       ide_hwgroup_t *hwgroup = HWGROUP(drive);
+       ide_hwif_t *hwif = HWIF(drive);
+       byte tmp;
+
+       if (!OK_STAT(tmp=GET_STAT(), 0, BUSY_STAT)) {
+               if (0 < (signed long)(hwgroup->poll_timeout - jiffies)) {
+                       ide_set_handler (drive, &reset_pollfunc, HZ/20, NULL);
+                       return ide_started;     /* continue polling */
+               }
+               printk("%s: reset timed-out, status=0x%02x\n", hwif->name, tmp);
+               drive->failures++;
+       } else  {
+               printk("%s: reset: ", hwif->name);
+               if ((tmp = GET_ERR()) == 1) {
+                       printk("success\n");
+                       drive->failures = 0;
+               } else {
+                       drive->failures++;
+#if FANCY_STATUS_DUMPS
+                       printk("master: ");
+                       switch (tmp & 0x7f) {
+                               case 1: printk("passed");
+                                       break;
+                               case 2: printk("formatter device error");
+                                       break;
+                               case 3: printk("sector buffer error");
+                                       break;
+                               case 4: printk("ECC circuitry error");
+                                       break;
+                               case 5: printk("controlling MPU error");
+                                       break;
+                               default:printk("error (0x%02x?)", tmp);
+                       }
+                       if (tmp & 0x80)
+                               printk("; slave: failed");
+                       printk("\n");
+#else
+                       printk("failed\n");
+#endif /* FANCY_STATUS_DUMPS */
+               }
+       }
+       hwgroup->poll_timeout = 0;      /* done polling */
+       return ide_stopped;
+}
+
+static void check_dma_crc (ide_drive_t *drive)
+{
+       if (drive->crc_count) {
+               (void) HWIF(drive)->dmaproc(ide_dma_off_quietly, drive);
+               if ((HWIF(drive)->speedproc) != NULL)
+                       HWIF(drive)->speedproc(drive, ide_auto_reduce_xfer(drive));
+               if (drive->current_speed >= XFER_SW_DMA_0)
+                       (void) HWIF(drive)->dmaproc(ide_dma_on, drive);
+       } else {
+               (void) HWIF(drive)->dmaproc(ide_dma_off, drive);
+       }
+}
+
+static void pre_reset (ide_drive_t *drive)
+{
+       if (drive->driver != NULL)
+               DRIVER(drive)->pre_reset(drive);
+
+       if (!drive->keep_settings) {
+               if (drive->using_dma) {
+                       check_dma_crc(drive);
+               } else {
+                       drive->unmask = 0;
+                       drive->io_32bit = 0;
+               }
+               return;
+       }
+       if (drive->using_dma)
+               check_dma_crc(drive);
+}
+
+/*
+ * do_reset1() attempts to recover a confused drive by resetting it.
+ * Unfortunately, resetting a disk drive actually resets all devices on
+ * the same interface, so it can really be thought of as resetting the
+ * interface rather than resetting the drive.
+ *
+ * ATAPI devices have their own reset mechanism which allows them to be
+ * individually reset without clobbering other devices on the same interface.
+ *
+ * Unfortunately, the IDE interface does not generate an interrupt to let
+ * us know when the reset operation has finished, so we must poll for this.
+ * Equally poor, though, is the fact that this may a very long time to complete,
+ * (up to 30 seconds worstcase).  So, instead of busy-waiting here for it,
+ * we set a timer to poll at 50ms intervals.
+ */
+static ide_startstop_t do_reset1 (ide_drive_t *drive, int do_not_try_atapi)
+{
+       unsigned int unit;
+       unsigned long flags;
+       ide_hwif_t *hwif = HWIF(drive);
+       ide_hwgroup_t *hwgroup = HWGROUP(drive);
+
+       __save_flags(flags);    /* local CPU only */
+       __cli();                /* local CPU only */
+
+       /* For an ATAPI device, first try an ATAPI SRST. */
+       if (drive->media != ide_disk && !do_not_try_atapi) {
+               pre_reset(drive);
+               SELECT_DRIVE(hwif,drive);
+               udelay (20);
+               OUT_BYTE (WIN_SRST, IDE_COMMAND_REG);
+               hwgroup->poll_timeout = jiffies + WAIT_WORSTCASE;
+               ide_set_handler (drive, &atapi_reset_pollfunc, HZ/20, NULL);
+               __restore_flags (flags);        /* local CPU only */
+               return ide_started;
+       }
+
+       /*
+        * First, reset any device state data we were maintaining
+        * for any of the drives on this interface.
+        */
+       for (unit = 0; unit < MAX_DRIVES; ++unit)
+               pre_reset(&hwif->drives[unit]);
+
+#if OK_TO_RESET_CONTROLLER
+       if (!IDE_CONTROL_REG) {
+               __restore_flags(flags);
+               return ide_stopped;
+       }
+       /*
+        * Note that we also set nIEN while resetting the device,
+        * to mask unwanted interrupts from the interface during the reset.
+        * However, due to the design of PC hardware, this will cause an
+        * immediate interrupt due to the edge transition it produces.
+        * This single interrupt gives us a "fast poll" for drives that
+        * recover from reset very quickly, saving us the first 50ms wait time.
+        */
+       OUT_BYTE(drive->ctl|6,IDE_CONTROL_REG); /* set SRST and nIEN */
+       udelay(10);                     /* more than enough time */
+       if (drive->quirk_list == 2) {
+               OUT_BYTE(drive->ctl,IDE_CONTROL_REG);   /* clear SRST and nIEN */
+       } else {
+               OUT_BYTE(drive->ctl|2,IDE_CONTROL_REG); /* clear SRST, leave nIEN */
+       }
+       udelay(10);                     /* more than enough time */
+       hwgroup->poll_timeout = jiffies + WAIT_WORSTCASE;
+       ide_set_handler (drive, &reset_pollfunc, HZ/20, NULL);
+
+       /*
+        * Some weird controller like resetting themselves to a strange
+        * state when the disks are reset this way. At least, the Winbond
+        * 553 documentation says that
+        */
+       if (hwif->resetproc != NULL)
+               hwif->resetproc(drive);
+
+#endif /* OK_TO_RESET_CONTROLLER */
+
+       __restore_flags (flags);        /* local CPU only */
+       return ide_started;
+}
+
+/*
+ * ide_do_reset() is the entry point to the drive/interface reset code.
+ */
+ide_startstop_t ide_do_reset (ide_drive_t *drive)
+{
+       return do_reset1 (drive, 0);
+}
+
+static inline u32 read_24 (ide_drive_t *drive)
+{
+       return  (IN_BYTE(IDE_HCYL_REG)<<16) |
+               (IN_BYTE(IDE_LCYL_REG)<<8) |
+                IN_BYTE(IDE_SECTOR_REG);
+}
+
+/*
+ * Clean up after success/failure of an explicit drive cmd
+ */
+void ide_end_drive_cmd (ide_drive_t *drive, byte stat, byte err)
+{
+       unsigned long flags;
+       struct request *rq;
+
+       spin_lock_irqsave(&io_request_lock, flags);
+       rq = HWGROUP(drive)->rq;
+       spin_unlock_irqrestore(&io_request_lock, flags);
+
+       switch(rq->cmd) {
+               case IDE_DRIVE_CMD:
+               {
+                       byte *args = (byte *) rq->buffer;
+                       rq->errors = !OK_STAT(stat,READY_STAT,BAD_STAT);
+                       if (args) {
+                               args[0] = stat;
+                               args[1] = err;
+                               args[2] = IN_BYTE(IDE_NSECTOR_REG);
+                       }
+                       break;
+               }
+               case IDE_DRIVE_TASK:
+               {
+                       byte *args = (byte *) rq->buffer;
+                       rq->errors = !OK_STAT(stat,READY_STAT,BAD_STAT);
+                       if (args) {
+                               args[0] = stat;
+                               args[1] = err;
+                               args[2] = IN_BYTE(IDE_NSECTOR_REG);
+                               args[3] = IN_BYTE(IDE_SECTOR_REG);
+                               args[4] = IN_BYTE(IDE_LCYL_REG);
+                               args[5] = IN_BYTE(IDE_HCYL_REG);
+                               args[6] = IN_BYTE(IDE_SELECT_REG);
+                       }
+                       break;
+               }
+               case IDE_DRIVE_TASKFILE:
+               {
+                       ide_task_t *args = (ide_task_t *) rq->special;
+                       rq->errors = !OK_STAT(stat,READY_STAT,BAD_STAT);
+                       if (args) {
+                               if (args->tf_in_flags.b.data) {
+                                       unsigned short data                     = IN_WORD(IDE_DATA_REG);
+                                       args->tfRegister[IDE_DATA_OFFSET]       = (data) & 0xFF;
+                                       args->hobRegister[IDE_DATA_OFFSET_HOB]  = (data >> 8) & 0xFF;
+                               }
+                               args->tfRegister[IDE_ERROR_OFFSET]   = err;
+                               args->tfRegister[IDE_NSECTOR_OFFSET] = IN_BYTE(IDE_NSECTOR_REG);
+                               args->tfRegister[IDE_SECTOR_OFFSET]  = IN_BYTE(IDE_SECTOR_REG);
+                               args->tfRegister[IDE_LCYL_OFFSET]    = IN_BYTE(IDE_LCYL_REG);
+                               args->tfRegister[IDE_HCYL_OFFSET]    = IN_BYTE(IDE_HCYL_REG);
+                               args->tfRegister[IDE_SELECT_OFFSET]  = IN_BYTE(IDE_SELECT_REG);
+                               args->tfRegister[IDE_STATUS_OFFSET]  = stat;
+
+                               if ((drive->id->command_set_2 & 0x0400) &&
+                                   (drive->id->cfs_enable_2 & 0x0400) &&
+                                   (drive->addressing == 1)) {
+                                       OUT_BYTE(drive->ctl|0x80, IDE_CONTROL_REG_HOB);
+                                       args->hobRegister[IDE_FEATURE_OFFSET_HOB] = IN_BYTE(IDE_FEATURE_REG);
+                                       args->hobRegister[IDE_NSECTOR_OFFSET_HOB] = IN_BYTE(IDE_NSECTOR_REG);
+                                       args->hobRegister[IDE_SECTOR_OFFSET_HOB]  = IN_BYTE(IDE_SECTOR_REG);
+                                       args->hobRegister[IDE_LCYL_OFFSET_HOB]    = IN_BYTE(IDE_LCYL_REG);
+                                       args->hobRegister[IDE_HCYL_OFFSET_HOB]    = IN_BYTE(IDE_HCYL_REG);
+                               }
+                       }
+                       break;
+               }
+               default:
+                       break;
+       }
+       spin_lock_irqsave(&io_request_lock, flags);
+       blkdev_dequeue_request(rq);
+       HWGROUP(drive)->rq = NULL;
+       end_that_request_last(rq);
+       spin_unlock_irqrestore(&io_request_lock, flags);
+}
+
+/*
+ * Error reporting, in human readable form (luxurious, but a memory hog).
+ */
+byte ide_dump_status (ide_drive_t *drive, const char *msg, byte stat)
+{
+       unsigned long flags;
+       byte err = 0;
+
+       __save_flags (flags);   /* local CPU only */
+       ide__sti();             /* local CPU only */
+       printk("%s: %s: status=0x%02x", drive->name, msg, stat);
+#if FANCY_STATUS_DUMPS
+       printk(" { ");
+       if (stat & BUSY_STAT)
+               printk("Busy ");
+       else {
+               if (stat & READY_STAT)  printk("DriveReady ");
+               if (stat & WRERR_STAT)  printk("DeviceFault ");
+               if (stat & SEEK_STAT)   printk("SeekComplete ");
+               if (stat & DRQ_STAT)    printk("DataRequest ");
+               if (stat & ECC_STAT)    printk("CorrectedError ");
+               if (stat & INDEX_STAT)  printk("Index ");
+               if (stat & ERR_STAT)    printk("Error ");
+       }
+       printk("}");
+#endif /* FANCY_STATUS_DUMPS */
+       printk("\n");
+       if ((stat & (BUSY_STAT|ERR_STAT)) == ERR_STAT) {
+               err = GET_ERR();
+               printk("%s: %s: error=0x%02x", drive->name, msg, err);
+#if FANCY_STATUS_DUMPS
+               if (drive->media == ide_disk) {
+                       printk(" { ");
+                       if (err & ABRT_ERR)     printk("DriveStatusError ");
+                       if (err & ICRC_ERR)     printk("%s", (err & ABRT_ERR) ? "BadCRC " : "BadSector ");
+                       if (err & ECC_ERR)      printk("UncorrectableError ");
+                       if (err & ID_ERR)       printk("SectorIdNotFound ");
+                       if (err & TRK0_ERR)     printk("TrackZeroNotFound ");
+                       if (err & MARK_ERR)     printk("AddrMarkNotFound ");
+                       printk("}");
+                       if ((err & (BBD_ERR | ABRT_ERR)) == BBD_ERR || (err & (ECC_ERR|ID_ERR|MARK_ERR))) {
+                               if ((drive->id->command_set_2 & 0x0400) &&
+                                   (drive->id->cfs_enable_2 & 0x0400) &&
+                                   (drive->addressing == 1)) {
+                                       __u64 sectors = 0;
+                                       u32 low = 0, high = 0;
+                                       low = read_24(drive);
+                                       OUT_BYTE(drive->ctl|0x80, IDE_CONTROL_REG);
+                                       high = read_24(drive);
+
+                                       sectors = ((__u64)high << 24) | low;
+                                       printk(", LBAsect=%llu, high=%d, low=%d",
+                                              (unsigned long long) sectors,
+                                              high, low);
+                               } else {
+                                       byte cur = IN_BYTE(IDE_SELECT_REG);
+                                       if (cur & 0x40) {       /* using LBA? */
+                                               printk(", LBAsect=%ld", (unsigned long)
+                                                ((cur&0xf)<<24)
+                                                |(IN_BYTE(IDE_HCYL_REG)<<16)
+                                                |(IN_BYTE(IDE_LCYL_REG)<<8)
+                                                | IN_BYTE(IDE_SECTOR_REG));
+                                       } else {
+                                               printk(", CHS=%d/%d/%d",
+                                                (IN_BYTE(IDE_HCYL_REG)<<8) +
+                                                 IN_BYTE(IDE_LCYL_REG),
+                                                 cur & 0xf,
+                                                 IN_BYTE(IDE_SECTOR_REG));
+                                       }
+                               }
+                               if (HWGROUP(drive) && HWGROUP(drive)->rq)
+                                       printk(", sector=%ld", HWGROUP(drive)->rq->sector);
+                       }
+               }
+#endif /* FANCY_STATUS_DUMPS */
+               printk("\n");
+       }
+       __restore_flags (flags);        /* local CPU only */
+       return err;
+}
+
+/*
+ * try_to_flush_leftover_data() is invoked in response to a drive
+ * unexpectedly having its DRQ_STAT bit set.  As an alternative to
+ * resetting the drive, this routine tries to clear the condition
+ * by read a sector's worth of data from the drive.  Of course,
+ * this may not help if the drive is *waiting* for data from *us*.
+ */
+static void try_to_flush_leftover_data (ide_drive_t *drive)
+{
+       int i = (drive->mult_count ? drive->mult_count : 1) * SECTOR_WORDS;
+
+       if (drive->media != ide_disk)
+               return;
+       while (i > 0) {
+               u32 buffer[16];
+               unsigned int wcount = (i > 16) ? 16 : i;
+               i -= wcount;
+               ide_input_data (drive, buffer, wcount);
+       }
+}
+
+/*
+ * ide_error() takes action based on the error returned by the drive.
+ */
+ide_startstop_t ide_error (ide_drive_t *drive, const char *msg, byte stat)
+{
+       struct request *rq;
+       byte err;
+
+       err = ide_dump_status(drive, msg, stat);
+       if (drive == NULL || (rq = HWGROUP(drive)->rq) == NULL)
+               return ide_stopped;
+       /* retry only "normal" I/O: */
+       if (rq->cmd == IDE_DRIVE_CMD || rq->cmd == IDE_DRIVE_TASK) {
+               rq->errors = 1;
+               ide_end_drive_cmd(drive, stat, err);
+               return ide_stopped;
+       }
+       if (rq->cmd == IDE_DRIVE_TASKFILE) {
+               rq->errors = 1;
+               ide_end_drive_cmd(drive, stat, err);
+//             ide_end_taskfile(drive, stat, err);
+               return ide_stopped;
+       }
+
+       if (stat & BUSY_STAT || ((stat & WRERR_STAT) && !drive->nowerr)) { /* other bits are useless when BUSY */
+               rq->errors |= ERROR_RESET;
+       } else {
+               if (drive->media == ide_disk && (stat & ERR_STAT)) {
+                       /* err has different meaning on cdrom and tape */
+                       if (err == ABRT_ERR) {
+                               if (drive->select.b.lba && IN_BYTE(IDE_COMMAND_REG) == WIN_SPECIFY)
+                                       return ide_stopped; /* some newer drives don't support WIN_SPECIFY */
+                       } else if ((err & (ABRT_ERR | ICRC_ERR)) == (ABRT_ERR | ICRC_ERR)) {
+                               drive->crc_count++; /* UDMA crc error -- just retry the operation */
+                       } else if (err & (BBD_ERR | ECC_ERR))   /* retries won't help these */
+                               rq->errors = ERROR_MAX;
+                       else if (err & TRK0_ERR)        /* help it find track zero */
+                               rq->errors |= ERROR_RECAL;
+               }
+               if ((stat & DRQ_STAT) && rq->cmd != WRITE)
+                       try_to_flush_leftover_data(drive);
+       }
+       if (GET_STAT() & (BUSY_STAT|DRQ_STAT))
+               OUT_BYTE(WIN_IDLEIMMEDIATE,IDE_COMMAND_REG);    /* force an abort */
+
+       if (rq->errors >= ERROR_MAX) {
+               if (drive->driver != NULL)
+                       DRIVER(drive)->end_request(0, HWGROUP(drive));
+               else
+                       ide_end_request(0, HWGROUP(drive));
+       } else {
+               if ((rq->errors & ERROR_RESET) == ERROR_RESET) {
+                       ++rq->errors;
+                       return ide_do_reset(drive);
+               }
+               if ((rq->errors & ERROR_RECAL) == ERROR_RECAL)
+                       drive->special.b.recalibrate = 1;
+               ++rq->errors;
+       }
+       return ide_stopped;
+}
+
+/*
+ * Issue a simple drive command
+ * The drive must be selected beforehand.
+ */
+void ide_cmd (ide_drive_t *drive, byte cmd, byte nsect, ide_handler_t *handler)
+{
+       ide_set_handler (drive, handler, WAIT_CMD, NULL);
+       if (IDE_CONTROL_REG)
+               OUT_BYTE(drive->ctl,IDE_CONTROL_REG);   /* clear nIEN */
+       SELECT_MASK(HWIF(drive),drive,0);
+       OUT_BYTE(nsect,IDE_NSECTOR_REG);
+       OUT_BYTE(cmd,IDE_COMMAND_REG);
+}
+
+/*
+ * drive_cmd_intr() is invoked on completion of a special DRIVE_CMD.
+ */
+static ide_startstop_t drive_cmd_intr (ide_drive_t *drive)
+{
+       struct request *rq = HWGROUP(drive)->rq;
+       byte *args = (byte *) rq->buffer;
+       byte stat = GET_STAT();
+       int retries = 10;
+
+       ide__sti();     /* local CPU only */
+       if ((stat & DRQ_STAT) && args && args[3]) {
+               byte io_32bit = drive->io_32bit;
+               drive->io_32bit = 0;
+               ide_input_data(drive, &args[4], args[3] * SECTOR_WORDS);
+               drive->io_32bit = io_32bit;
+               while (((stat = GET_STAT()) & BUSY_STAT) && retries--)
+                       udelay(100);
+       }
+
+       if (!OK_STAT(stat, READY_STAT, BAD_STAT))
+               return ide_error(drive, "drive_cmd", stat); /* calls ide_end_drive_cmd */
+       ide_end_drive_cmd (drive, stat, GET_ERR());
+       return ide_stopped;
+}
+
+/*
+ * do_special() is used to issue WIN_SPECIFY, WIN_RESTORE, and WIN_SETMULT
+ * commands to a drive.  It used to do much more, but has been scaled back.
+ */
+static ide_startstop_t do_special (ide_drive_t *drive)
+{
+       special_t *s = &drive->special;
+
+#ifdef DEBUG
+       printk("%s: do_special: 0x%02x\n", drive->name, s->all);
+#endif
+       if (s->b.set_tune) {
+               ide_tuneproc_t *tuneproc = HWIF(drive)->tuneproc;
+               s->b.set_tune = 0;
+               if (tuneproc != NULL)
+                       tuneproc(drive, drive->tune_req);
+       } else if (drive->driver != NULL) {
+               return DRIVER(drive)->special(drive);
+       } else if (s->all) {
+               printk("%s: bad special flag: 0x%02x\n", drive->name, s->all);
+               s->all = 0;
+       }
+       return ide_stopped;
+}
+
+/*
+ * This routine busy-waits for the drive status to be not "busy".
+ * It then checks the status for all of the "good" bits and none
+ * of the "bad" bits, and if all is okay it returns 0.  All other
+ * cases return 1 after invoking ide_error() -- caller should just return.
+ *
+ * This routine should get fixed to not hog the cpu during extra long waits..
+ * That could be done by busy-waiting for the first jiffy or two, and then
+ * setting a timer to wake up at half second intervals thereafter,
+ * until timeout is achieved, before timing out.
+ */
+int ide_wait_stat (ide_startstop_t *startstop, ide_drive_t *drive, byte good, byte bad, unsigned long timeout) {
+       byte stat;
+       int i;
+       unsigned long flags;
+       /* bail early if we've exceeded max_failures */
+       if (drive->max_failures && (drive->failures > drive->max_failures)) {
+               *startstop = ide_stopped;
+               return 1;
+       }
+
+       udelay(1);      /* spec allows drive 400ns to assert "BUSY" */
+       if ((stat = GET_STAT()) & BUSY_STAT) {
+               __save_flags(flags);    /* local CPU only */
+               ide__sti();             /* local CPU only */
+               timeout += jiffies;
+               while ((stat = GET_STAT()) & BUSY_STAT) {
+                       if (0 < (signed long)(jiffies - timeout)) {
+                               __restore_flags(flags); /* local CPU only */
+                               *startstop = ide_error(drive, "status timeout", stat);
+                               return 1;
+                       }
+               }
+               __restore_flags(flags); /* local CPU only */
+       }
+       /*
+        * Allow status to settle, then read it again.
+        * A few rare drives vastly violate the 400ns spec here,
+        * so we'll wait up to 10usec for a "good" status
+        * rather than expensively fail things immediately.
+        * This fix courtesy of Matthew Faupel & Niccolo Rigacci.
+        */
+       for (i = 0; i < 10; i++) {
+               udelay(1);
+               if (OK_STAT((stat = GET_STAT()), good, bad))
+                       return 0;
+       }
+       *startstop = ide_error(drive, "status error", stat);
+       return 1;
+}
+
+/*
+ * execute_drive_cmd() issues a special drive command,
+ * usually initiated by ioctl() from the external hdparm program.
+ */
+static ide_startstop_t execute_drive_cmd (ide_drive_t *drive, struct request *rq)
+{
+       switch(rq->cmd) {
+               case IDE_DRIVE_TASKFILE:
+               {
+                       ide_task_t *args = rq->special;
+                       if (!(args)) break;
+#ifdef CONFIG_IDE_TASK_IOCTL_DEBUG
+       {
+       printk(KERN_INFO "%s: ", drive->name);
+//     printk("TF.0=x%02x ", args->tfRegister[IDE_DATA_OFFSET]);
+       printk("TF.1=x%02x ", args->tfRegister[IDE_FEATURE_OFFSET]);
+       printk("TF.2=x%02x ", args->tfRegister[IDE_NSECTOR_OFFSET]);
+       printk("TF.3=x%02x ", args->tfRegister[IDE_SECTOR_OFFSET]);
+       printk("TF.4=x%02x ", args->tfRegister[IDE_LCYL_OFFSET]);
+       printk("TF.5=x%02x ", args->tfRegister[IDE_HCYL_OFFSET]);
+       printk("TF.6=x%02x ", args->tfRegister[IDE_SELECT_OFFSET]);
+       printk("TF.7=x%02x\n", args->tfRegister[IDE_COMMAND_OFFSET]);
+       printk(KERN_INFO "%s: ", drive->name);
+//     printk("HTF.0=x%02x ", args->hobRegister[IDE_DATA_OFFSET_HOB]);
+       printk("HTF.1=x%02x ", args->hobRegister[IDE_FEATURE_OFFSET_HOB]);
+       printk("HTF.2=x%02x ", args->hobRegister[IDE_NSECTOR_OFFSET_HOB]);
+       printk("HTF.3=x%02x ", args->hobRegister[IDE_SECTOR_OFFSET_HOB]);
+       printk("HTF.4=x%02x ", args->hobRegister[IDE_LCYL_OFFSET_HOB]);
+       printk("HTF.5=x%02x ", args->hobRegister[IDE_HCYL_OFFSET_HOB]);
+       printk("HTF.6=x%02x ", args->hobRegister[IDE_SELECT_OFFSET_HOB]);
+       printk("HTF.7=x%02x\n", args->hobRegister[IDE_CONTROL_OFFSET_HOB]);
+       }
+#endif /* CONFIG_IDE_TASK_IOCTL_DEBUG */
+
+//                     if (args->tf_out_flags.all == 0) {
+                       do_taskfile(drive,
+                               (struct hd_drive_task_hdr *)&args->tfRegister,
+                               (struct hd_drive_hob_hdr *)&args->hobRegister,
+                               args->handler);
+//                     } else {
+//                             return flagged_taskfile(drive, args);
+//                     } 
+
+                       if (((args->command_type == IDE_DRIVE_TASK_RAW_WRITE) ||
+                            (args->command_type == IDE_DRIVE_TASK_OUT)) &&
+                             args->prehandler && args->handler)
+                               return args->prehandler(drive, rq);
+                       return ide_started;
+               }
+               case IDE_DRIVE_TASK:
+               {
+                       byte *args = rq->buffer;
+                       byte sel;
+                       if (!(args)) break;
+#ifdef DEBUG
+                       printk("%s: DRIVE_TASK_CMD ", drive->name);
+                       printk("cmd=0x%02x ", args[0]);
+                       printk("fr=0x%02x ", args[1]);
+                       printk("ns=0x%02x ", args[2]);
+                       printk("sc=0x%02x ", args[3]);
+                       printk("lcyl=0x%02x ", args[4]);
+                       printk("hcyl=0x%02x ", args[5]);
+                       printk("sel=0x%02x\n", args[6]);
+#endif
+                       OUT_BYTE(args[1], IDE_FEATURE_REG);
+                       OUT_BYTE(args[3], IDE_SECTOR_REG);
+                       OUT_BYTE(args[4], IDE_LCYL_REG);
+                       OUT_BYTE(args[5], IDE_HCYL_REG);
+                       sel = (args[6] & ~0x10);
+                       if (drive->select.b.unit)
+                               sel |= 0x10;
+                       OUT_BYTE(sel, IDE_SELECT_REG);
+                       ide_cmd(drive, args[0], args[2], &drive_cmd_intr);
+                       return ide_started;
+               }
+               case IDE_DRIVE_CMD:
+               {
+                       byte *args = rq->buffer;
+                       if (!(args)) break;
+#ifdef DEBUG
+                       printk("%s: DRIVE_CMD ", drive->name);
+                       printk("cmd=0x%02x ", args[0]);
+                       printk("sc=0x%02x ", args[1]);
+                       printk("fr=0x%02x ", args[2]);
+                       printk("xx=0x%02x\n", args[3]);
+#endif
+                       if (args[0] == WIN_SMART) {
+                               OUT_BYTE(0x4f, IDE_LCYL_REG);
+                               OUT_BYTE(0xc2, IDE_HCYL_REG);
+                               OUT_BYTE(args[2],IDE_FEATURE_REG);
+                               OUT_BYTE(args[1],IDE_SECTOR_REG);
+                               ide_cmd(drive, args[0], args[3], &drive_cmd_intr);
+                               return ide_started;
+                       }
+                       OUT_BYTE(args[2],IDE_FEATURE_REG);
+                       ide_cmd(drive, args[0], args[1], &drive_cmd_intr);
+                       return ide_started;
+               }
+               default:
+                       break;
+       }
+       /*
+        * NULL is actually a valid way of waiting for
+        * all current requests to be flushed from the queue.
+        */
+#ifdef DEBUG
+       printk("%s: DRIVE_CMD (null)\n", drive->name);
+#endif
+       ide_end_drive_cmd(drive, GET_STAT(), GET_ERR());
+       return ide_stopped;
+}
+
+/*
+ * start_request() initiates handling of a new I/O request
+ * needed to reverse the perverted changes anonymously made back
+ * 2.3.99-pre6
+ */
+static ide_startstop_t start_request (ide_drive_t *drive, struct request *rq)
+{
+       ide_startstop_t startstop;
+       unsigned long block, blockend;
+       unsigned int minor = MINOR(rq->rq_dev), unit = minor >> PARTN_BITS;
+       ide_hwif_t *hwif = HWIF(drive);
+
+#ifdef DEBUG
+       printk("%s: start_request: current=0x%08lx\n", hwif->name, (unsigned long) rq);
+#endif
+       /* bail early if we've exceeded max_failures */
+       if (drive->max_failures && (drive->failures > drive->max_failures)) {
+               goto kill_rq;
+       }
+
+       if (unit >= MAX_DRIVES) {
+               printk("%s: bad device number: %s\n", hwif->name, kdevname(rq->rq_dev));
+               goto kill_rq;
+       }
+#ifdef DEBUG
+       if (rq->bh && !buffer_locked(rq->bh)) {
+               printk("%s: block not locked\n", drive->name);
+               goto kill_rq;
+       }
+#endif
+       block    = rq->sector;
+       blockend = block + rq->nr_sectors;
+
+       if ((rq->cmd == READ || rq->cmd == WRITE) &&
+           (drive->media == ide_disk || drive->media == ide_floppy)) {
+               if ((blockend < block) || (blockend > drive->part[minor&PARTN_MASK].nr_sects)) {
+                       printk("%s%c: bad access: block=%ld, count=%ld\n", drive->name,
+                        (minor&PARTN_MASK)?'0'+(minor&PARTN_MASK):' ', block, rq->nr_sectors);
+                       goto kill_rq;
+               }
+               block += drive->part[minor&PARTN_MASK].start_sect + drive->sect0;
+       }
+       /* Yecch - this will shift the entire interval,
+          possibly killing some innocent following sector */
+       if (block == 0 && drive->remap_0_to_1 == 1)
+               block = 1;  /* redirect MBR access to EZ-Drive partn table */
+
+#if (DISK_RECOVERY_TIME > 0)
+       while ((read_timer() - hwif->last_time) < DISK_RECOVERY_TIME);
+#endif
+
+       SELECT_DRIVE(hwif, drive);
+       if (ide_wait_stat(&startstop, drive, drive->ready_stat, BUSY_STAT|DRQ_STAT, WAIT_READY)) {
+               printk("%s: drive not ready for command\n", drive->name);
+               return startstop;
+       }
+       if (!drive->special.all) {
+               switch(rq->cmd) {
+                       case IDE_DRIVE_CMD:
+                       case IDE_DRIVE_TASK:
+                       case IDE_DRIVE_TASKFILE:
+                               return execute_drive_cmd(drive, rq);
+                       default:
+                               break;
+               }
+               if (drive->driver != NULL) {
+                       return (DRIVER(drive)->do_request(drive, rq, block));
+               }
+               printk("%s: media type %d not supported\n", drive->name, drive->media);
+               goto kill_rq;
+       }
+       return do_special(drive);
+kill_rq:
+       if (drive->driver != NULL)
+               DRIVER(drive)->end_request(0, HWGROUP(drive));
+       else
+               ide_end_request(0, HWGROUP(drive));
+       return ide_stopped;
+}
+
+ide_startstop_t restart_request (ide_drive_t *drive)
+{
+       ide_hwgroup_t *hwgroup = HWGROUP(drive);
+       unsigned long flags;
+       struct request *rq;
+
+       spin_lock_irqsave(&io_request_lock, flags);
+       hwgroup->handler = NULL;
+       del_timer(&hwgroup->timer);
+       rq = hwgroup->rq;
+       spin_unlock_irqrestore(&io_request_lock, flags);
+
+       return start_request(drive, rq);
+}
+
+/*
+ * ide_stall_queue() can be used by a drive to give excess bandwidth back
+ * to the hwgroup by sleeping for timeout jiffies.
+ */
+void ide_stall_queue (ide_drive_t *drive, unsigned long timeout)
+{
+       if (timeout > WAIT_WORSTCASE)
+               timeout = WAIT_WORSTCASE;
+       drive->sleep = timeout + jiffies;
+}
+
+#define WAKEUP(drive)  ((drive)->service_start + 2 * (drive)->service_time)
+
+/*
+ * choose_drive() selects the next drive which will be serviced.
+ */
+static inline ide_drive_t *choose_drive (ide_hwgroup_t *hwgroup)
+{
+       ide_drive_t *drive, *best;
+
+repeat:        
+       best = NULL;
+       drive = hwgroup->drive;
+       do {
+               if (!list_empty(&drive->queue.queue_head) && (!drive->sleep || 0 <= (signed long)(jiffies - drive->sleep))) {
+                       if (!best
+                        || (drive->sleep && (!best->sleep || 0 < (signed long)(best->sleep - drive->sleep)))
+                        || (!best->sleep && 0 < (signed long)(WAKEUP(best) - WAKEUP(drive))))
+                       {
+                               if( !drive->queue.plugged )
+                                       best = drive;
+                       }
+               }
+       } while ((drive = drive->next) != hwgroup->drive);
+       if (best && best->nice1 && !best->sleep && best != hwgroup->drive && best->service_time > WAIT_MIN_SLEEP) {
+               long t = (signed long)(WAKEUP(best) - jiffies);
+               if (t >= WAIT_MIN_SLEEP) {
+                       /*
+                        * We *may* have some time to spare, but first let's see if
+                        * someone can potentially benefit from our nice mood today..
+                        */
+                       drive = best->next;
+                       do {
+                               if (!drive->sleep
+                                && 0 < (signed long)(WAKEUP(drive) - (jiffies - best->service_time))
+                                && 0 < (signed long)((jiffies + t) - WAKEUP(drive)))
+                               {
+                                       ide_stall_queue(best, IDE_MIN(t, 10 * WAIT_MIN_SLEEP));
+                                       goto repeat;
+                               }
+                       } while ((drive = drive->next) != best);
+               }
+       }
+       return best;
+}
+
+/*
+ * Issue a new request to a drive from hwgroup
+ * Caller must have already done spin_lock_irqsave(&io_request_lock, ..);
+ *
+ * A hwgroup is a serialized group of IDE interfaces.  Usually there is
+ * exactly one hwif (interface) per hwgroup, but buggy controllers (eg. CMD640)
+ * may have both interfaces in a single hwgroup to "serialize" access.
+ * Or possibly multiple ISA interfaces can share a common IRQ by being grouped
+ * together into one hwgroup for serialized access.
+ *
+ * Note also that several hwgroups can end up sharing a single IRQ,
+ * possibly along with many other devices.  This is especially common in
+ * PCI-based systems with off-board IDE controller cards.
+ *
+ * The IDE driver uses the single global io_request_lock spinlock to protect
+ * access to the request queues, and to protect the hwgroup->busy flag.
+ *
+ * The first thread into the driver for a particular hwgroup sets the
+ * hwgroup->busy flag to indicate that this hwgroup is now active,
+ * and then initiates processing of the top request from the request queue.
+ *
+ * Other threads attempting entry notice the busy setting, and will simply
+ * queue their new requests and exit immediately.  Note that hwgroup->busy
+ * remains set even when the driver is merely awaiting the next interrupt.
+ * Thus, the meaning is "this hwgroup is busy processing a request".
+ *
+ * When processing of a request completes, the completing thread or IRQ-handler
+ * will start the next request from the queue.  If no more work remains,
+ * the driver will clear the hwgroup->busy flag and exit.
+ *
+ * The io_request_lock (spinlock) is used to protect all access to the
+ * hwgroup->busy flag, but is otherwise not needed for most processing in
+ * the driver.  This makes the driver much more friendlier to shared IRQs
+ * than previous designs, while remaining 100% (?) SMP safe and capable.
+ */
+/* --BenH: made non-static as ide-pmac.c uses it to kick the hwgroup back
+ *         into life on wakeup from machine sleep.
+ */ 
+void ide_do_request (ide_hwgroup_t *hwgroup, int masked_irq)
+{
+       ide_drive_t     *drive;
+       ide_hwif_t      *hwif;
+       struct request  *rq;
+       ide_startstop_t startstop;
+
+       ide_get_lock(&ide_lock, ide_intr, hwgroup);     /* for atari only: POSSIBLY BROKEN HERE(?) */
+
+       __cli();        /* necessary paranoia: ensure IRQs are masked on local CPU */
+
+       while (!hwgroup->busy) {
+               hwgroup->busy = 1;
+               drive = choose_drive(hwgroup);
+               if (drive == NULL) {
+                       unsigned long sleep = 0;
+                       hwgroup->rq = NULL;
+                       drive = hwgroup->drive;
+                       do {
+                               if (drive->sleep && (!sleep || 0 < (signed long)(sleep - drive->sleep)))
+                                       sleep = drive->sleep;
+                       } while ((drive = drive->next) != hwgroup->drive);
+                       if (sleep) {
+                               /*
+                                * Take a short snooze, and then wake up this hwgroup again.
+                                * This gives other hwgroups on the same a chance to
+                                * play fairly with us, just in case there are big differences
+                                * in relative throughputs.. don't want to hog the cpu too much.
+                                */
+                               if (0 < (signed long)(jiffies + WAIT_MIN_SLEEP - sleep)) 
+                                       sleep = jiffies + WAIT_MIN_SLEEP;
+#if 1
+                               if (timer_pending(&hwgroup->timer))
+                                       printk("ide_set_handler: timer already active\n");
+#endif
+                               hwgroup->sleeping = 1;  /* so that ide_timer_expiry knows what to do */
+                               mod_timer(&hwgroup->timer, sleep);
+                               /* we purposely leave hwgroup->busy==1 while sleeping */
+                       } else {
+                               /* Ugly, but how can we sleep for the lock otherwise? perhaps from tq_disk? */
+                               ide_release_lock(&ide_lock);    /* for atari only */
+                               hwgroup->busy = 0;
+                       }
+                       return;         /* no more work for this hwgroup (for now) */
+               }
+               hwif = HWIF(drive);
+               if (hwgroup->hwif->sharing_irq && hwif != hwgroup->hwif && hwif->io_ports[IDE_CONTROL_OFFSET]) {
+                       /* set nIEN for previous hwif */
+                       SELECT_INTERRUPT(hwif, drive);
+               }
+               hwgroup->hwif = hwif;
+               hwgroup->drive = drive;
+               drive->sleep = 0;
+               drive->service_start = jiffies;
+
+               if ( drive->queue.plugged )     /* paranoia */
+                       printk("%s: Huh? nuking plugged queue\n", drive->name);
+
+               rq = hwgroup->rq = blkdev_entry_next_request(&drive->queue.queue_head);
+               /*
+                * Some systems have trouble with IDE IRQs arriving while
+                * the driver is still setting things up.  So, here we disable
+                * the IRQ used by this interface while the request is being started.
+                * This may look bad at first, but pretty much the same thing
+                * happens anyway when any interrupt comes in, IDE or otherwise
+                *  -- the kernel masks the IRQ while it is being handled.
+                */
+               if (masked_irq && hwif->irq != masked_irq)
+                       disable_irq_nosync(hwif->irq);
+               spin_unlock(&io_request_lock);
+               ide__sti();     /* allow other IRQs while we start this request */
+               startstop = start_request(drive, rq);
+               spin_lock_irq(&io_request_lock);
+               if (masked_irq && hwif->irq != masked_irq)
+                       enable_irq(hwif->irq);
+               if (startstop == ide_stopped)
+                       hwgroup->busy = 0;
+       }
+}
+
+/*
+ * ide_get_queue() returns the queue which corresponds to a given device.
+ */
+request_queue_t *ide_get_queue (kdev_t dev)
+{
+       ide_hwif_t *hwif = (ide_hwif_t *)blk_dev[MAJOR(dev)].data;
+
+       return &hwif->drives[DEVICE_NR(dev) & 1].queue;
+}
+
+/*
+ * Passes the stuff to ide_do_request
+ */
+void do_ide_request(request_queue_t *q)
+{
+       ide_do_request(q->queuedata, 0);
+}
+
+/*
+ * un-busy the hwgroup etc, and clear any pending DMA status. we want to
+ * retry the current request in pio mode instead of risking tossing it
+ * all away
+ */
+void ide_dma_timeout_retry(ide_drive_t *drive)
+{
+       ide_hwif_t *hwif = HWIF(drive);
+       struct request *rq;
+
+       /*
+        * end current dma transaction
+        */
+       (void) hwif->dmaproc(ide_dma_end, drive);
+
+       /*
+        * complain a little, later we might remove some of this verbosity
+        */
+       printk("%s: timeout waiting for DMA\n", drive->name);
+       (void) hwif->dmaproc(ide_dma_timeout, drive);
+
+       /*
+        * disable dma for now, but remember that we did so because of
+        * a timeout -- we'll reenable after we finish this next request
+        * (or rather the first chunk of it) in pio.
+        */
+       drive->retry_pio++;
+       drive->state = DMA_PIO_RETRY;
+       (void) hwif->dmaproc(ide_dma_off_quietly, drive);
+
+       /*
+        * un-busy drive etc (hwgroup->busy is cleared on return) and
+        * make sure request is sane
+        */
+       rq = HWGROUP(drive)->rq;
+       HWGROUP(drive)->rq = NULL;
+
+       rq->errors = 0;
+       rq->sector = rq->bh->b_rsector;
+       rq->current_nr_sectors = rq->bh->b_size >> 9;
+       rq->buffer = rq->bh->b_data;
+}
+
+/*
+ * ide_timer_expiry() is our timeout function for all drive operations.
+ * But note that it can also be invoked as a result of a "sleep" operation
+ * triggered by the mod_timer() call in ide_do_request.
+ */
+void ide_timer_expiry (unsigned long data)
+{
+       ide_hwgroup_t   *hwgroup = (ide_hwgroup_t *) data;
+       ide_handler_t   *handler;
+       ide_expiry_t    *expiry;
+       unsigned long   flags;
+       unsigned long   wait;
+
+       spin_lock_irqsave(&io_request_lock, flags);
+       del_timer(&hwgroup->timer);
+
+       if ((handler = hwgroup->handler) == NULL) {
+               /*
+                * Either a marginal timeout occurred
+                * (got the interrupt just as timer expired),
+                * or we were "sleeping" to give other devices a chance.
+                * Either way, we don't really want to complain about anything.
+                */
+               if (hwgroup->sleeping) {
+                       hwgroup->sleeping = 0;
+                       hwgroup->busy = 0;
+               }
+       } else {
+               ide_drive_t *drive = hwgroup->drive;
+               if (!drive) {
+                       printk("ide_timer_expiry: hwgroup->drive was NULL\n");
+                       hwgroup->handler = NULL;
+               } else {
+                       ide_hwif_t *hwif;
+                       ide_startstop_t startstop;
+                       if (!hwgroup->busy) {
+                               hwgroup->busy = 1;      /* paranoia */
+                               printk("%s: ide_timer_expiry: hwgroup->busy was 0 ??\n", drive->name);
+                       }
+                       if ((expiry = hwgroup->expiry) != NULL) {
+                               /* continue */
+                               if ((wait = expiry(drive)) != 0) {
+                                       /* reset timer */
+                                       hwgroup->timer.expires  = jiffies + wait;
+                                       add_timer(&hwgroup->timer);
+                                       spin_unlock_irqrestore(&io_request_lock, flags);
+                                       return;
+                               }
+                       }
+                       hwgroup->handler = NULL;
+                       /*
+                        * We need to simulate a real interrupt when invoking
+                        * the handler() function, which means we need to globally
+                        * mask the specific IRQ:
+                        */
+                       spin_unlock(&io_request_lock);
+                       hwif  = HWIF(drive);
+#if DISABLE_IRQ_NOSYNC
+                       disable_irq_nosync(hwif->irq);
+#else
+                       disable_irq(hwif->irq); /* disable_irq_nosync ?? */
+#endif /* DISABLE_IRQ_NOSYNC */
+                       __cli();        /* local CPU only, as if we were handling an interrupt */
+                       if (hwgroup->poll_timeout != 0) {
+                               startstop = handler(drive);
+                       } else if (drive_is_ready(drive)) {
+                               if (drive->waiting_for_dma)
+                                       (void) hwgroup->hwif->dmaproc(ide_dma_lostirq, drive);
+                               (void)ide_ack_intr(hwif);
+                               printk("%s: lost interrupt\n", drive->name);
+                               startstop = handler(drive);
+                       } else {
+                               if (drive->waiting_for_dma) {
+                                       startstop = ide_stopped;
+                                       ide_dma_timeout_retry(drive);
+                               } else
+                                       startstop = ide_error(drive, "irq timeout", GET_STAT());
+                       }
+                       set_recovery_timer(hwif);
+                       drive->service_time = jiffies - drive->service_start;
+                       enable_irq(hwif->irq);
+                       spin_lock_irq(&io_request_lock);
+                       if (startstop == ide_stopped)
+                               hwgroup->busy = 0;
+               }
+       }
+       ide_do_request(hwgroup, 0);
+       spin_unlock_irqrestore(&io_request_lock, flags);
+}
+
+/*
+ * There's nothing really useful we can do with an unexpected interrupt,
+ * other than reading the status register (to clear it), and logging it.
+ * There should be no way that an irq can happen before we're ready for it,
+ * so we needn't worry much about losing an "important" interrupt here.
+ *
+ * On laptops (and "green" PCs), an unexpected interrupt occurs whenever the
+ * drive enters "idle", "standby", or "sleep" mode, so if the status looks
+ * "good", we just ignore the interrupt completely.
+ *
+ * This routine assumes __cli() is in effect when called.
+ *
+ * If an unexpected interrupt happens on irq15 while we are handling irq14
+ * and if the two interfaces are "serialized" (CMD640), then it looks like
+ * we could screw up by interfering with a new request being set up for irq15.
+ *
+ * In reality, this is a non-issue.  The new command is not sent unless the
+ * drive is ready to accept one, in which case we know the drive is not
+ * trying to interrupt us.  And ide_set_handler() is always invoked before
+ * completing the issuance of any new drive command, so we will not be
+ * accidentally invoked as a result of any valid command completion interrupt.
+ *
+ */
+static void unexpected_intr (int irq, ide_hwgroup_t *hwgroup)
+{
+       byte stat;
+       ide_hwif_t *hwif = hwgroup->hwif;
+
+       /*
+        * handle the unexpected interrupt
+        */
+       do {
+               if (hwif->irq == irq) {
+                       stat = IN_BYTE(hwif->io_ports[IDE_STATUS_OFFSET]);
+                       if (!OK_STAT(stat, READY_STAT, BAD_STAT)) {
+                               /* Try to not flood the console with msgs */
+                               static unsigned long last_msgtime, count;
+                               ++count;
+                               if (0 < (signed long)(jiffies - (last_msgtime + HZ))) {
+                                       last_msgtime = jiffies;
+                                       printk("%s%s: unexpected interrupt, status=0x%02x, count=%ld\n",
+                                        hwif->name, (hwif->next == hwgroup->hwif) ? "" : "(?)", stat, count);
+                               }
+                       }
+               }
+       } while ((hwif = hwif->next) != hwgroup->hwif);
+}
+
+/*
+ * entry point for all interrupts, caller does __cli() for us
+ */
+void ide_intr (int irq, void *dev_id, struct pt_regs *regs)
+{
+       unsigned long flags;
+       ide_hwgroup_t *hwgroup = (ide_hwgroup_t *)dev_id;
+       ide_hwif_t *hwif;
+       ide_drive_t *drive;
+       ide_handler_t *handler;
+       ide_startstop_t startstop;
+
+       spin_lock_irqsave(&io_request_lock, flags);
+       hwif = hwgroup->hwif;
+
+       if (!ide_ack_intr(hwif)) {
+               spin_unlock_irqrestore(&io_request_lock, flags);
+               return;
+       }
+
+       if ((handler = hwgroup->handler) == NULL || hwgroup->poll_timeout != 0) {
+               /*
+                * Not expecting an interrupt from this drive.
+                * That means this could be:
+                *      (1) an interrupt from another PCI device
+                *      sharing the same PCI INT# as us.
+                * or   (2) a drive just entered sleep or standby mode,
+                *      and is interrupting to let us know.
+                * or   (3) a spurious interrupt of unknown origin.
+                *
+                * For PCI, we cannot tell the difference,
+                * so in that case we just ignore it and hope it goes away.
+                */
+#ifdef CONFIG_BLK_DEV_IDEPCI
+               if (IDE_PCI_DEVID_EQ(hwif->pci_devid, IDE_PCI_DEVID_NULL))
+#endif /* CONFIG_BLK_DEV_IDEPCI */
+               {
+                       /*
+                        * Probably not a shared PCI interrupt,
+                        * so we can safely try to do something about it:
+                        */
+                       unexpected_intr(irq, hwgroup);
+#ifdef CONFIG_BLK_DEV_IDEPCI
+               } else {
+                       /*
+                        * Whack the status register, just in case we have a leftover pending IRQ.
+                        */
+                       (void) IN_BYTE(hwif->io_ports[IDE_STATUS_OFFSET]);
+#endif /* CONFIG_BLK_DEV_IDEPCI */
+               }
+               spin_unlock_irqrestore(&io_request_lock, flags);
+               return;
+       }
+       drive = hwgroup->drive;
+       if (!drive) {
+               /*
+                * This should NEVER happen, and there isn't much we could do about it here.
+                */
+               spin_unlock_irqrestore(&io_request_lock, flags);
+               return;
+       }
+       if (!drive_is_ready(drive)) {
+               /*
+                * This happens regularly when we share a PCI IRQ with another device.
+                * Unfortunately, it can also happen with some buggy drives that trigger
+                * the IRQ before their status register is up to date.  Hopefully we have
+                * enough advance overhead that the latter isn't a problem.
+                */
+               spin_unlock_irqrestore(&io_request_lock, flags);
+               return;
+       }
+       if (!hwgroup->busy) {
+               hwgroup->busy = 1;      /* paranoia */
+               printk("%s: ide_intr: hwgroup->busy was 0 ??\n", drive->name);
+       }
+       hwgroup->handler = NULL;
+       del_timer(&hwgroup->timer);
+       spin_unlock(&io_request_lock);
+
+       if (drive->unmask)
+               ide__sti();     /* local CPU only */
+       startstop = handler(drive);             /* service this interrupt, may set handler for next interrupt */
+       spin_lock_irq(&io_request_lock);
+
+       /*
+        * Note that handler() may have set things up for another
+        * interrupt to occur soon, but it cannot happen until
+        * we exit from this routine, because it will be the
+        * same irq as is currently being serviced here, and Linux
+        * won't allow another of the same (on any CPU) until we return.
+        */
+       set_recovery_timer(HWIF(drive));
+       drive->service_time = jiffies - drive->service_start;
+       if (startstop == ide_stopped) {
+               if (hwgroup->handler == NULL) { /* paranoia */
+                       hwgroup->busy = 0;
+                       ide_do_request(hwgroup, hwif->irq);
+               } else {
+                       printk("%s: ide_intr: huh? expected NULL handler on exit\n", drive->name);
+               }
+       }
+       spin_unlock_irqrestore(&io_request_lock, flags);
+}
+
+/*
+ * get_info_ptr() returns the (ide_drive_t *) for a given device number.
+ * It returns NULL if the given device number does not match any present drives.
+ */
+ide_drive_t *get_info_ptr (kdev_t i_rdev)
+{
+       int             major = MAJOR(i_rdev);
+#if 0
+       int             minor = MINOR(i_rdev) & PARTN_MASK;
+#endif
+       unsigned int    h;
+
+       for (h = 0; h < MAX_HWIFS; ++h) {
+               ide_hwif_t  *hwif = &ide_hwifs[h];
+               if (hwif->present && major == hwif->major) {
+                       unsigned unit = DEVICE_NR(i_rdev);
+                       if (unit < MAX_DRIVES) {
+                               ide_drive_t *drive = &hwif->drives[unit];
+#if 0
+                               if ((drive->present) && (drive->part[minor].nr_sects))
+#else
+                               if (drive->present)
+#endif
+                                       return drive;
+                       }
+                       break;
+               }
+       }
+       return NULL;
+}
+
+/*
+ * This function is intended to be used prior to invoking ide_do_drive_cmd().
+ */
+void ide_init_drive_cmd (struct request *rq)
+{
+       memset(rq, 0, sizeof(*rq));
+       rq->cmd = IDE_DRIVE_CMD;
+}
+
+/*
+ * This function issues a special IDE device request
+ * onto the request queue.
+ *
+ * If action is ide_wait, then the rq is queued at the end of the
+ * request queue, and the function sleeps until it has been processed.
+ * This is for use when invoked from an ioctl handler.
+ *
+ * If action is ide_preempt, then the rq is queued at the head of
+ * the request queue, displacing the currently-being-processed
+ * request and this function returns immediately without waiting
+ * for the new rq to be completed.  This is VERY DANGEROUS, and is
+ * intended for careful use by the ATAPI tape/cdrom driver code.
+ *
+ * If action is ide_next, then the rq is queued immediately after
+ * the currently-being-processed-request (if any), and the function
+ * returns without waiting for the new rq to be completed.  As above,
+ * This is VERY DANGEROUS, and is intended for careful use by the
+ * ATAPI tape/cdrom driver code.
+ *
+ * If action is ide_end, then the rq is queued at the end of the
+ * request queue, and the function returns immediately without waiting
+ * for the new rq to be completed. This is again intended for careful
+ * use by the ATAPI tape/cdrom driver code.
+ */
+int ide_do_drive_cmd (ide_drive_t *drive, struct request *rq, ide_action_t action)
+{
+       unsigned long flags;
+       ide_hwgroup_t *hwgroup = HWGROUP(drive);
+       unsigned int major = HWIF(drive)->major;
+       struct list_head *queue_head = &drive->queue.queue_head;
+       /*DECLARE_COMPLETION(wait);*/
+
+#ifdef CONFIG_BLK_DEV_PDC4030
+       if (HWIF(drive)->chipset == ide_pdc4030 && rq->buffer != NULL)
+               return -ENOSYS;  /* special drive cmds not supported */
+#endif
+       rq->errors = 0;
+       rq->rq_status = RQ_ACTIVE;
+       rq->rq_dev = MKDEV(major,(drive->select.b.unit)<<PARTN_BITS);
+       if (action == ide_wait) { 
+               printk("SMH says: wait on IDE device but no queue :-(\n"); 
+               return 0; 
+       } 
+       spin_lock_irqsave(&io_request_lock, flags);
+       if (list_empty(queue_head) || action == ide_preempt) {
+               if (action == ide_preempt)
+                       hwgroup->rq = NULL;
+       } else {
+               if (action == ide_wait || action == ide_end) {
+                       queue_head = queue_head->prev;
+               } else
+                       queue_head = queue_head->next;
+       }
+       list_add(&rq->queue, queue_head);
+       ide_do_request(hwgroup, 0);
+       spin_unlock_irqrestore(&io_request_lock, flags);
+       return 0;
+
+}
+
+/*
+ * This routine is called to flush all partitions and partition tables
+ * for a changed disk, and then re-read the new partition table.
+ * If we are revalidating a disk because of a media change, then we
+ * enter with usage == 0.  If we are using an ioctl, we automatically have
+ * usage == 1 (we need an open channel to use an ioctl :-), so this
+ * is our limit.
+ */
+int ide_revalidate_disk (kdev_t i_rdev)
+{
+       ide_drive_t *drive;
+       ide_hwgroup_t *hwgroup;
+       unsigned int p, major, minor;
+       unsigned long flags;
+
+       if ((drive = get_info_ptr(i_rdev)) == NULL)
+               return -ENODEV;
+       major = MAJOR(i_rdev);
+       minor = drive->select.b.unit << PARTN_BITS;
+       hwgroup = HWGROUP(drive);
+       spin_lock_irqsave(&io_request_lock, flags);
+       if (drive->busy || (drive->usage > 1)) {
+               spin_unlock_irqrestore(&io_request_lock, flags);
+               return -EBUSY;
+       };
+       drive->busy = 1;
+       MOD_INC_USE_COUNT;
+       spin_unlock_irqrestore(&io_request_lock, flags);
+
+       for (p = 0; p < (1<<PARTN_BITS); ++p) {
+               if (drive->part[p].nr_sects > 0) {
+                       kdev_t devp = MKDEV(major, minor+p);
+                       invalidate_device(devp, 1);
+               }
+               drive->part[p].start_sect = 0;
+               drive->part[p].nr_sects   = 0;
+       };
+
+       if (DRIVER(drive)->revalidate)
+               DRIVER(drive)->revalidate(drive);
+
+       drive->busy = 0;
+       /*wake_up(&drive->wqueue);*/
+       MOD_DEC_USE_COUNT;
+       return 0;
+}
+
+static void revalidate_drives (void)
+{
+       ide_hwif_t *hwif;
+       ide_drive_t *drive;
+       int index, unit;
+
+       for (index = 0; index < MAX_HWIFS; ++index) {
+               hwif = &ide_hwifs[index];
+               for (unit = 0; unit < MAX_DRIVES; ++unit) {
+                       drive = &ide_hwifs[index].drives[unit];
+                       if (drive->revalidate) {
+                               drive->revalidate = 0;
+                               if (!initializing)
+                                       (void) ide_revalidate_disk(MKDEV(hwif->major, unit<<PARTN_BITS));
+                       }
+               }
+       }
+}
+
+static void ide_probe_module (void)
+{
+       if (!ide_probe) {
+#if defined(CONFIG_KMOD) && defined(CONFIG_BLK_DEV_IDE_MODULE)
+               (void) request_module("ide-probe-mod");
+#endif /* (CONFIG_KMOD) && (CONFIG_BLK_DEV_IDE_MODULE) */
+       } else {
+               (void) ide_probe->init();
+       }
+       revalidate_drives();
+}
+
+static void ide_driver_module (void)
+{
+       int index;
+       ide_module_t *module = ide_modules;
+
+       for (index = 0; index < MAX_HWIFS; ++index)
+               if (ide_hwifs[index].present)
+                       goto search;
+       ide_probe_module();
+search:
+       while (module) {
+               (void) module->init();
+               module = module->next;
+       }
+       revalidate_drives();
+}
+
+static int ide_open (struct inode * inode, struct file * filp)
+{
+       ide_drive_t *drive;
+
+       if ((drive = get_info_ptr(inode->i_rdev)) == NULL)
+               return -ENXIO;
+       if (drive->driver == NULL)
+               ide_driver_module();
+#ifdef CONFIG_KMOD
+       if (drive->driver == NULL) {
+               if (drive->media == ide_disk)
+                       (void) request_module("ide-disk");
+               if (drive->media == ide_cdrom)
+                       (void) request_module("ide-cd");
+               if (drive->media == ide_tape)
+                       (void) request_module("ide-tape");
+               if (drive->media == ide_floppy)
+                       (void) request_module("ide-floppy");
+#if defined(CONFIG_BLK_DEV_IDESCSI) && defined(CONFIG_SCSI)
+               if (drive->media == ide_scsi)
+                       (void) request_module("ide-scsi");
+#endif /* defined(CONFIG_BLK_DEV_IDESCSI) && defined(CONFIG_SCSI) */
+       }
+#endif /* CONFIG_KMOD */
+#if 0
+       while (drive->busy)
+               sleep_on(&drive->wqueue);
+#endif
+       drive->usage++;
+       if (drive->driver != NULL)
+               return DRIVER(drive)->open(inode, filp, drive);
+       printk ("%s: driver not present\n", drive->name);
+       drive->usage--;
+       return -ENXIO;
+}
+
+/*
+ * Releasing a block device means we sync() it, so that it can safely
+ * be forgotten about...
+ */
+static int ide_release (struct inode * inode, struct file * file)
+{
+       ide_drive_t *drive;
+
+       if ((drive = get_info_ptr(inode->i_rdev)) != NULL) {
+               drive->usage--;
+               if (drive->driver != NULL)
+                       DRIVER(drive)->release(inode, file, drive);
+       }
+       return 0;
+}
+
+int ide_replace_subdriver (ide_drive_t *drive, const char *driver)
+{
+       if (!drive->present || drive->busy || drive->usage)
+               goto abort;
+       if (drive->driver != NULL && DRIVER(drive)->cleanup(drive))
+               goto abort;
+       strncpy(drive->driver_req, driver, 9);
+       ide_driver_module();
+       drive->driver_req[0] = 0;
+       ide_driver_module();
+       if (DRIVER(drive) && !strcmp(DRIVER(drive)->name, driver))
+               return 0;
+abort:
+       return 1;
+}
+
+#ifdef CONFIG_PROC_FS
+ide_proc_entry_t generic_subdriver_entries[] = {
+       { "capacity",   S_IFREG|S_IRUGO,        proc_ide_read_capacity, NULL },
+       { NULL, 0, NULL, NULL }
+};
+#endif
+
+/*
+ * Note that we only release the standard ports,
+ * and do not even try to handle any extra ports
+ * allocated for weird IDE interface chipsets.
+ */
+void hwif_unregister (ide_hwif_t *hwif)
+{
+       if (hwif->straight8) {
+               ide_release_region(hwif->io_ports[IDE_DATA_OFFSET], 8);
+               goto jump_eight;
+       }
+       if (hwif->io_ports[IDE_DATA_OFFSET])
+               ide_release_region(hwif->io_ports[IDE_DATA_OFFSET], 1);
+       if (hwif->io_ports[IDE_ERROR_OFFSET])
+               ide_release_region(hwif->io_ports[IDE_ERROR_OFFSET], 1);
+       if (hwif->io_ports[IDE_NSECTOR_OFFSET])
+               ide_release_region(hwif->io_ports[IDE_NSECTOR_OFFSET], 1);
+       if (hwif->io_ports[IDE_SECTOR_OFFSET])
+               ide_release_region(hwif->io_ports[IDE_SECTOR_OFFSET], 1);
+       if (hwif->io_ports[IDE_LCYL_OFFSET])
+               ide_release_region(hwif->io_ports[IDE_LCYL_OFFSET], 1);
+       if (hwif->io_ports[IDE_HCYL_OFFSET])
+               ide_release_region(hwif->io_ports[IDE_HCYL_OFFSET], 1);
+       if (hwif->io_ports[IDE_SELECT_OFFSET])
+               ide_release_region(hwif->io_ports[IDE_SELECT_OFFSET], 1);
+       if (hwif->io_ports[IDE_STATUS_OFFSET])
+               ide_release_region(hwif->io_ports[IDE_STATUS_OFFSET], 1);
+jump_eight:
+       if (hwif->io_ports[IDE_CONTROL_OFFSET])
+               ide_release_region(hwif->io_ports[IDE_CONTROL_OFFSET], 1);
+#if defined(CONFIG_AMIGA) || defined(CONFIG_MAC)
+       if (hwif->io_ports[IDE_IRQ_OFFSET])
+               ide_release_region(hwif->io_ports[IDE_IRQ_OFFSET], 1);
+#endif /* (CONFIG_AMIGA) || (CONFIG_MAC) */
+}
+
+void ide_unregister (unsigned int index)
+{
+       struct gendisk *gd;
+       ide_drive_t *drive, *d;
+       ide_hwif_t *hwif, *g;
+       ide_hwgroup_t *hwgroup;
+       int irq_count = 0, unit, i;
+       unsigned long flags;
+       unsigned int p, minor;
+       ide_hwif_t old_hwif;
+
+       if (index >= MAX_HWIFS)
+               return;
+       save_flags(flags);      /* all CPUs */
+       cli();                  /* all CPUs */
+       hwif = &ide_hwifs[index];
+       if (!hwif->present)
+               goto abort;
+       for (unit = 0; unit < MAX_DRIVES; ++unit) {
+               drive = &hwif->drives[unit];
+               if (!drive->present)
+                       continue;
+               if (drive->busy || drive->usage)
+                       goto abort;
+               if (drive->driver != NULL && DRIVER(drive)->cleanup(drive))
+                       goto abort;
+       }
+       hwif->present = 0;
+       
+       /*
+        * All clear?  Then blow away the buffer cache
+        */
+       sti();
+       for (unit = 0; unit < MAX_DRIVES; ++unit) {
+               drive = &hwif->drives[unit];
+               if (!drive->present)
+                       continue;
+               minor = drive->select.b.unit << PARTN_BITS;
+               for (p = 0; p < (1<<PARTN_BITS); ++p) {
+                       if (drive->part[p].nr_sects > 0) {
+                               kdev_t devp = MKDEV(hwif->major, minor+p);
+                               invalidate_device(devp, 0);
+                       }
+               }
+#ifdef CONFIG_PROC_FS
+               destroy_proc_ide_drives(hwif);
+#endif
+       }
+       cli();
+       hwgroup = hwif->hwgroup;
+
+       /*
+        * free the irq if we were the only hwif using it
+        */
+       g = hwgroup->hwif;
+       do {
+               if (g->irq == hwif->irq)
+                       ++irq_count;
+               g = g->next;
+       } while (g != hwgroup->hwif);
+       if (irq_count == 1)
+               free_irq(hwif->irq, hwgroup);
+
+       /*
+        * Note that we only release the standard ports,
+        * and do not even try to handle any extra ports
+        * allocated for weird IDE interface chipsets.
+        */
+       hwif_unregister(hwif);
+
+       /*
+        * Remove us from the hwgroup, and free
+        * the hwgroup if we were the only member
+        */
+       d = hwgroup->drive;
+       for (i = 0; i < MAX_DRIVES; ++i) {
+               drive = &hwif->drives[i];
+#ifdef DEVFS_MUST_DIE
+               if (drive->de) {
+                       devfs_unregister (drive->de);
+                       drive->de = NULL;
+               }
+#endif
+               if (!drive->present)
+                       continue;
+               while (hwgroup->drive->next != drive)
+                       hwgroup->drive = hwgroup->drive->next;
+               hwgroup->drive->next = drive->next;
+               if (hwgroup->drive == drive)
+                       hwgroup->drive = NULL;
+               if (drive->id != NULL) {
+                       kfree(drive->id);
+                       drive->id = NULL;
+               }
+               drive->present = 0;
+               blk_cleanup_queue(&drive->queue);
+       }
+       if (d->present)
+               hwgroup->drive = d;
+       while (hwgroup->hwif->next != hwif)
+               hwgroup->hwif = hwgroup->hwif->next;
+       hwgroup->hwif->next = hwif->next;
+       if (hwgroup->hwif == hwif)
+               kfree(hwgroup);
+       else
+               hwgroup->hwif = HWIF(hwgroup->drive);
+
+#if defined(CONFIG_BLK_DEV_IDEDMA) && !defined(CONFIG_DMA_NONPCI)
+       if (hwif->dma_base) {
+               (void) ide_release_dma(hwif);
+               hwif->dma_base = 0;
+       }
+#endif /* (CONFIG_BLK_DEV_IDEDMA) && !(CONFIG_DMA_NONPCI) */
+
+       /*
+        * Remove us from the kernel's knowledge
+        */
+       unregister_blkdev(hwif->major, hwif->name);
+       kfree(blksize_size[hwif->major]);
+       kfree(max_sectors[hwif->major]);
+       /*kfree(max_readahead[hwif->major]);*/
+       blk_dev[hwif->major].data = NULL;
+       blk_dev[hwif->major].queue = NULL;
+       blksize_size[hwif->major] = NULL;
+       gd = hwif->gd;
+       if (gd) {
+               del_gendisk(gd);
+               kfree(gd->sizes);
+               kfree(gd->part);
+#ifdef DEVFS_MUST_DIE
+               if (gd->de_arr)
+                       kfree (gd->de_arr);
+#endif
+               if (gd->flags)
+                       kfree (gd->flags);
+               kfree(gd);
+               hwif->gd = NULL;
+       }
+       old_hwif                = *hwif;
+       init_hwif_data (index); /* restore hwif data to pristine status */
+       hwif->hwgroup           = old_hwif.hwgroup;
+       hwif->tuneproc          = old_hwif.tuneproc;
+       hwif->speedproc         = old_hwif.speedproc;
+       hwif->selectproc        = old_hwif.selectproc;
+       hwif->resetproc         = old_hwif.resetproc;
+       hwif->intrproc          = old_hwif.intrproc;
+       hwif->maskproc          = old_hwif.maskproc;
+       hwif->quirkproc         = old_hwif.quirkproc;
+       hwif->rwproc            = old_hwif.rwproc;
+       hwif->ideproc           = old_hwif.ideproc;
+       hwif->dmaproc           = old_hwif.dmaproc;
+       hwif->busproc           = old_hwif.busproc;
+       hwif->bus_state         = old_hwif.bus_state;
+       hwif->dma_base          = old_hwif.dma_base;
+       hwif->dma_extra         = old_hwif.dma_extra;
+       hwif->config_data       = old_hwif.config_data;
+       hwif->select_data       = old_hwif.select_data;
+       hwif->proc              = old_hwif.proc;
+#ifndef CONFIG_BLK_DEV_IDECS
+       hwif->irq               = old_hwif.irq;
+#endif /* CONFIG_BLK_DEV_IDECS */
+       hwif->major             = old_hwif.major;
+       hwif->chipset           = old_hwif.chipset;
+       hwif->autodma           = old_hwif.autodma;
+       hwif->udma_four         = old_hwif.udma_four;
+#ifdef CONFIG_BLK_DEV_IDEPCI
+       hwif->pci_dev           = old_hwif.pci_dev;
+       hwif->pci_devid         = old_hwif.pci_devid;
+#endif /* CONFIG_BLK_DEV_IDEPCI */
+       hwif->straight8         = old_hwif.straight8;
+       hwif->hwif_data         = old_hwif.hwif_data;
+abort:
+       restore_flags(flags);   /* all CPUs */
+}
+
+/*
+ * Setup hw_regs_t structure described by parameters.  You
+ * may set up the hw structure yourself OR use this routine to
+ * do it for you.
+ */
+void ide_setup_ports ( hw_regs_t *hw,
+                       ide_ioreg_t base, int *offsets,
+                       ide_ioreg_t ctrl, ide_ioreg_t intr,
+                       ide_ack_intr_t *ack_intr, int irq)
+{
+       int i;
+
+       for (i = 0; i < IDE_NR_PORTS; i++) {
+               if (offsets[i] == -1) {
+                       switch(i) {
+                               case IDE_CONTROL_OFFSET:
+                                       hw->io_ports[i] = ctrl;
+                                       break;
+#if defined(CONFIG_AMIGA) || defined(CONFIG_MAC)
+                               case IDE_IRQ_OFFSET:
+                                       hw->io_ports[i] = intr;
+                                       break;
+#endif /* (CONFIG_AMIGA) || (CONFIG_MAC) */
+                               default:
+                                       hw->io_ports[i] = 0;
+                                       break;
+                       }
+               } else {
+                       hw->io_ports[i] = base + offsets[i];
+               }
+       }
+       hw->irq = irq;
+       hw->dma = NO_DMA;
+       hw->ack_intr = ack_intr;
+}
+
+/*
+ * Register an IDE interface, specifing exactly the registers etc
+ * Set init=1 iff calling before probes have taken place.
+ */
+int ide_register_hw (hw_regs_t *hw, ide_hwif_t **hwifp)
+{
+       int index, retry = 1;
+       ide_hwif_t *hwif;
+
+       do {
+               for (index = 0; index < MAX_HWIFS; ++index) {
+                       hwif = &ide_hwifs[index];
+                       if (hwif->hw.io_ports[IDE_DATA_OFFSET] == hw->io_ports[IDE_DATA_OFFSET])
+                               goto found;
+               }
+               for (index = 0; index < MAX_HWIFS; ++index) {
+                       hwif = &ide_hwifs[index];
+                       if ((!hwif->present && !hwif->mate && !initializing) ||
+                           (!hwif->hw.io_ports[IDE_DATA_OFFSET] && initializing))
+                               goto found;
+               }
+               for (index = 0; index < MAX_HWIFS; index++)
+                       ide_unregister(index);
+       } while (retry--);
+       return -1;
+found:
+       if (hwif->present)
+               ide_unregister(index);
+       if (hwif->present)
+               return -1;
+       memcpy(&hwif->hw, hw, sizeof(*hw));
+       memcpy(hwif->io_ports, hwif->hw.io_ports, sizeof(hwif->hw.io_ports));
+       hwif->irq = hw->irq;
+       hwif->noprobe = 0;
+       hwif->chipset = hw->chipset;
+
+       if (!initializing) {
+               ide_probe_module();
+#ifdef CONFIG_PROC_FS
+               create_proc_ide_interfaces();
+#endif
+               ide_driver_module();
+       }
+
+       if (hwifp)
+               *hwifp = hwif;
+
+       return (initializing || hwif->present) ? index : -1;
+}
+
+/*
+ * Compatability function with existing drivers.  If you want
+ * something different, use the function above.
+ */
+int ide_register (int arg1, int arg2, int irq)
+{
+       hw_regs_t hw;
+       ide_init_hwif_ports(&hw, (ide_ioreg_t) arg1, (ide_ioreg_t) arg2, NULL);
+       hw.irq = irq;
+       return ide_register_hw(&hw, NULL);
+}
+
+void ide_add_setting (ide_drive_t *drive, const char *name, int rw, int read_ioctl, int write_ioctl, int data_type, int min, int max, int mul_factor, int div_factor, void *data, ide_procset_t *set)
+{
+       ide_settings_t **p = (ide_settings_t **) &drive->settings, *setting = NULL;
+
+       while ((*p) && strcmp((*p)->name, name) < 0)
+               p = &((*p)->next);
+       if ((setting = kmalloc(sizeof(*setting), GFP_KERNEL)) == NULL)
+               goto abort;
+       memset(setting, 0, sizeof(*setting));
+       if ((setting->name = kmalloc(strlen(name) + 1, GFP_KERNEL)) == NULL)
+               goto abort;
+       strcpy(setting->name, name);            setting->rw = rw;
+       setting->read_ioctl = read_ioctl;       setting->write_ioctl = write_ioctl;
+       setting->data_type = data_type;         setting->min = min;
+       setting->max = max;                     setting->mul_factor = mul_factor;
+       setting->div_factor = div_factor;       setting->data = data;
+       setting->set = set;                     setting->next = *p;
+       if (drive->driver)
+               setting->auto_remove = 1;
+       *p = setting;
+       return;
+abort:
+       if (setting)
+               kfree(setting);
+}
+
+void ide_remove_setting (ide_drive_t *drive, char *name)
+{
+       ide_settings_t **p = (ide_settings_t **) &drive->settings, *setting;
+
+       while ((*p) && strcmp((*p)->name, name))
+               p = &((*p)->next);
+       if ((setting = (*p)) == NULL)
+               return;
+       (*p) = setting->next;
+       kfree(setting->name);
+       kfree(setting);
+}
+
+static ide_settings_t *ide_find_setting_by_ioctl (ide_drive_t *drive, int cmd)
+{
+       ide_settings_t *setting = drive->settings;
+
+       while (setting) {
+               if (setting->read_ioctl == cmd || setting->write_ioctl == cmd)
+                       break;
+               setting = setting->next;
+       }
+       return setting;
+}
+
+ide_settings_t *ide_find_setting_by_name (ide_drive_t *drive, char *name)
+{
+       ide_settings_t *setting = drive->settings;
+
+       while (setting) {
+               if (strcmp(setting->name, name) == 0)
+                       break;
+               setting = setting->next;
+       }
+       return setting;
+}
+
+static void auto_remove_settings (ide_drive_t *drive)
+{
+       ide_settings_t *setting;
+repeat:
+       setting = drive->settings;
+       while (setting) {
+               if (setting->auto_remove) {
+                       ide_remove_setting(drive, setting->name);
+                       goto repeat;
+               }
+               setting = setting->next;
+       }
+}
+
+int ide_read_setting (ide_drive_t *drive, ide_settings_t *setting)
+{
+       int             val = -EINVAL;
+       unsigned long   flags;
+
+       if ((setting->rw & SETTING_READ)) {
+               spin_lock_irqsave(&io_request_lock, flags);
+               switch(setting->data_type) {
+                       case TYPE_BYTE:
+                               val = *((u8 *) setting->data);
+                               break;
+                       case TYPE_SHORT:
+                               val = *((u16 *) setting->data);
+                               break;
+                       case TYPE_INT:
+                       case TYPE_INTA:
+                               val = *((u32 *) setting->data);
+                               break;
+               }
+               spin_unlock_irqrestore(&io_request_lock, flags);
+       }
+       return val;
+}
+
+int ide_spin_wait_hwgroup (ide_drive_t *drive)
+{
+       ide_hwgroup_t *hwgroup = HWGROUP(drive);
+       unsigned long timeout = jiffies + (3 * HZ);
+
+       spin_lock_irq(&io_request_lock);
+
+       while (hwgroup->busy) {
+               unsigned long lflags;
+               spin_unlock_irq(&io_request_lock);
+               __save_flags(lflags);   /* local CPU only */
+               __sti();                /* local CPU only; needed for jiffies */
+               if (0 < (signed long)(jiffies - timeout)) {
+                       __restore_flags(lflags);        /* local CPU only */
+                       printk("%s: channel busy\n", drive->name);
+                       return -EBUSY;
+               }
+               __restore_flags(lflags);        /* local CPU only */
+               spin_lock_irq(&io_request_lock);
+       }
+       return 0;
+}
+
+/*
+ * FIXME:  This should be changed to enqueue a special request
+ * to the driver to change settings, and then wait on a sema for completion.
+ * The current scheme of polling is kludgey, though safe enough.
+ */
+int ide_write_setting (ide_drive_t *drive, ide_settings_t *setting, int val)
+{
+       int i;
+       u32 *p;
+
+       if (!capable(CAP_SYS_ADMIN))
+               return -EACCES;
+       if (!(setting->rw & SETTING_WRITE))
+               return -EPERM;
+       if (val < setting->min || val > setting->max)
+               return -EINVAL;
+       if (setting->set)
+               return setting->set(drive, val);
+       if (ide_spin_wait_hwgroup(drive))
+               return -EBUSY;
+       switch (setting->data_type) {
+               case TYPE_BYTE:
+                       *((u8 *) setting->data) = val;
+                       break;
+               case TYPE_SHORT:
+                       *((u16 *) setting->data) = val;
+                       break;
+               case TYPE_INT:
+                       *((u32 *) setting->data) = val;
+                       break;
+               case TYPE_INTA:
+                       p = (u32 *) setting->data;
+                       for (i = 0; i < 1 << PARTN_BITS; i++, p++)
+                               *p = val;
+                       break;
+       }
+       spin_unlock_irq(&io_request_lock);
+       return 0;
+}
+
+static int set_io_32bit(ide_drive_t *drive, int arg)
+{
+       drive->io_32bit = arg;
+#ifdef CONFIG_BLK_DEV_DTC2278
+       if (HWIF(drive)->chipset == ide_dtc2278)
+               HWIF(drive)->drives[!drive->select.b.unit].io_32bit = arg;
+#endif /* CONFIG_BLK_DEV_DTC2278 */
+       return 0;
+}
+
+static int set_using_dma (ide_drive_t *drive, int arg)
+{
+       if (!drive->driver || !DRIVER(drive)->supports_dma)
+               return -EPERM;
+       if (!drive->id || !(drive->id->capability & 1) || !HWIF(drive)->dmaproc)
+               return -EPERM;
+       if (HWIF(drive)->dmaproc(arg ? ide_dma_on : ide_dma_off, drive))
+               return -EIO;
+       return 0;
+}
+
+static int set_pio_mode (ide_drive_t *drive, int arg)
+{
+       struct request rq;
+
+       if (!HWIF(drive)->tuneproc)
+               return -ENOSYS;
+       if (drive->special.b.set_tune)
+               return -EBUSY;
+       ide_init_drive_cmd(&rq);
+       drive->tune_req = (byte) arg;
+       drive->special.b.set_tune = 1;
+       (void) ide_do_drive_cmd (drive, &rq, ide_wait);
+       return 0;
+}
+
+void ide_add_generic_settings (ide_drive_t *drive)
+{
+/*
+ *                     drive   setting name            read/write access                               read ioctl              write ioctl             data type       min     max                             mul_factor      div_factor      data pointer                    set function
+ */
+       ide_add_setting(drive,  "io_32bit",             drive->no_io_32bit ? SETTING_READ : SETTING_RW, HDIO_GET_32BIT,         HDIO_SET_32BIT,         TYPE_BYTE,      0,      1 + (SUPPORT_VLB_SYNC << 1),    1,              1,              &drive->io_32bit,               set_io_32bit);
+       ide_add_setting(drive,  "keepsettings",         SETTING_RW,                                     HDIO_GET_KEEPSETTINGS,  HDIO_SET_KEEPSETTINGS,  TYPE_BYTE,      0,      1,                              1,              1,              &drive->keep_settings,          NULL);
+       ide_add_setting(drive,  "nice1",                SETTING_RW,                                     -1,                     -1,                     TYPE_BYTE,      0,      1,                              1,              1,              &drive->nice1,                  NULL);
+       ide_add_setting(drive,  "pio_mode",             SETTING_WRITE,                                  -1,                     HDIO_SET_PIO_MODE,      TYPE_BYTE,      0,      255,                            1,              1,              NULL,                           set_pio_mode);
+       ide_add_setting(drive,  "slow",                 SETTING_RW,                                     -1,                     -1,                     TYPE_BYTE,      0,      1,                              1,              1,              &drive->slow,                   NULL);
+       ide_add_setting(drive,  "unmaskirq",            drive->no_unmask ? SETTING_READ : SETTING_RW,   HDIO_GET_UNMASKINTR,    HDIO_SET_UNMASKINTR,    TYPE_BYTE,      0,      1,                              1,              1,              &drive->unmask,                 NULL);
+       ide_add_setting(drive,  "using_dma",            SETTING_RW,                                     HDIO_GET_DMA,           HDIO_SET_DMA,           TYPE_BYTE,      0,      1,                              1,              1,              &drive->using_dma,              set_using_dma);
+       ide_add_setting(drive,  "ide_scsi",             SETTING_RW,                                     -1,                     -1,                     TYPE_BYTE,      0,      1,                              1,              1,              &drive->scsi,                   NULL);
+       ide_add_setting(drive,  "init_speed",           SETTING_RW,                                     -1,                     -1,                     TYPE_BYTE,      0,      69,                             1,              1,              &drive->init_speed,             NULL);
+       ide_add_setting(drive,  "current_speed",        SETTING_RW,                                     -1,                     -1,                     TYPE_BYTE,      0,      69,                             1,              1,              &drive->current_speed,          NULL);
+       ide_add_setting(drive,  "number",               SETTING_RW,                                     -1,                     -1,                     TYPE_BYTE,      0,      3,                              1,              1,              &drive->dn,                     NULL);
+}
+
+int ide_wait_cmd (ide_drive_t *drive, int cmd, int nsect, int feature, int sectors, byte *buf)
+{
+       struct request rq;
+       byte buffer[4];
+
+       if (!buf)
+               buf = buffer;
+       memset(buf, 0, 4 + SECTOR_WORDS * 4 * sectors);
+       ide_init_drive_cmd(&rq);
+       rq.buffer = buf;
+       *buf++ = cmd;
+       *buf++ = nsect;
+       *buf++ = feature;
+       *buf++ = sectors;
+       return ide_do_drive_cmd(drive, &rq, ide_wait);
+}
+
+int ide_wait_cmd_task (ide_drive_t *drive, byte *buf)
+{
+       struct request rq;
+
+       ide_init_drive_cmd(&rq);
+       rq.cmd = IDE_DRIVE_TASK;
+       rq.buffer = buf;
+       return ide_do_drive_cmd(drive, &rq, ide_wait);
+}
+
+/*
+ * Delay for *at least* 50ms.  As we don't know how much time is left
+ * until the next tick occurs, we wait an extra tick to be safe.
+ * This is used only during the probing/polling for drives at boot time.
+ *
+ * However, its usefullness may be needed in other places, thus we export it now.
+ * The future may change this to a millisecond setable delay.
+ */
+void ide_delay_50ms (void)
+{
+#ifndef CONFIG_BLK_DEV_IDECS
+       mdelay(50);
+#else
+       __set_current_state(TASK_UNINTERRUPTIBLE);
+       schedule_timeout(HZ/20);
+#endif /* CONFIG_BLK_DEV_IDECS */
+}
+
+int system_bus_clock (void)
+{
+       return((int) ((!system_bus_speed) ? ide_system_bus_speed() : system_bus_speed ));
+}
+
+int ide_reinit_drive (ide_drive_t *drive)
+{
+       switch (drive->media) {
+#ifdef CONFIG_BLK_DEV_IDECD
+               case ide_cdrom:
+               {
+                       extern int ide_cdrom_reinit(ide_drive_t *drive);
+                       if (ide_cdrom_reinit(drive))
+                               return 1;
+                       break;
+               }
+#endif /* CONFIG_BLK_DEV_IDECD */
+#ifdef CONFIG_BLK_DEV_IDEDISK
+               case ide_disk:
+               {
+                       extern int idedisk_reinit(ide_drive_t *drive);
+                       if (idedisk_reinit(drive))
+                               return 1;
+                       break;
+               }
+#endif /* CONFIG_BLK_DEV_IDEDISK */
+#ifdef CONFIG_BLK_DEV_IDEFLOPPY
+               case ide_floppy:
+               {
+                       extern int idefloppy_reinit(ide_drive_t *drive);
+                       if (idefloppy_reinit(drive))
+                               return 1;
+                       break;
+               }
+#endif /* CONFIG_BLK_DEV_IDEFLOPPY */
+#ifdef CONFIG_BLK_DEV_IDETAPE
+               case ide_tape:
+               {
+                       extern int idetape_reinit(ide_drive_t *drive);
+                       if (idetape_reinit(drive))
+                               return 1;
+                       break;
+               }
+#endif /* CONFIG_BLK_DEV_IDETAPE */
+#ifdef CONFIG_BLK_DEV_IDESCSI
+/*
+ *              {
+ *                      extern int idescsi_reinit(ide_drive_t *drive);
+ *                      if (idescsi_reinit(drive))
+ *                              return 1;
+ *                      break;
+ * }
+ */
+#endif /* CONFIG_BLK_DEV_IDESCSI */
+               default:
+                       return 1;
+       }
+       return 0;
+}
+
+static int ide_ioctl (struct inode *inode, struct file *file,
+                       unsigned int cmd, unsigned long arg)
+{
+       int err = 0, major, minor;
+       ide_drive_t *drive;
+       struct request rq;
+       kdev_t dev;
+       ide_settings_t *setting;
+
+       if (!inode || !(dev = inode->i_rdev))
+               return -EINVAL;
+       major = MAJOR(dev); minor = MINOR(dev);
+       if ((drive = get_info_ptr(inode->i_rdev)) == NULL)
+               return -ENODEV;
+
+       if ((setting = ide_find_setting_by_ioctl(drive, cmd)) != NULL) {
+               if (cmd == setting->read_ioctl) {
+                       err = ide_read_setting(drive, setting);
+                       return err >= 0 ? put_user(err, (long *) arg) : err;
+               } else {
+                       if ((MINOR(inode->i_rdev) & PARTN_MASK))
+                               return -EINVAL;
+                       return ide_write_setting(drive, setting, arg);
+               }
+       }
+
+       ide_init_drive_cmd (&rq);
+       switch (cmd) {
+               case HDIO_GETGEO:
+               {
+                       struct hd_geometry *loc = (struct hd_geometry *) arg;
+                       unsigned short bios_cyl = drive->bios_cyl; /* truncate */
+                       if (!loc || (drive->media != ide_disk && drive->media != ide_floppy)) return -EINVAL;
+                       if (put_user(drive->bios_head, (byte *) &loc->heads)) return -EFAULT;
+                       if (put_user(drive->bios_sect, (byte *) &loc->sectors)) return -EFAULT;
+                       if (put_user(bios_cyl, (unsigned short *) &loc->cylinders)) return -EFAULT;
+                       if (put_user((unsigned)drive->part[MINOR(inode->i_rdev)&PARTN_MASK].start_sect,
+                               (unsigned long *) &loc->start)) return -EFAULT;
+                       return 0;
+               }
+
+               case HDIO_GETGEO_BIG:
+               {
+                       struct hd_big_geometry *loc = (struct hd_big_geometry *) arg;
+                       if (!loc || (drive->media != ide_disk && drive->media != ide_floppy)) return -EINVAL;
+                       if (put_user(drive->bios_head, (byte *) &loc->heads)) return -EFAULT;
+                       if (put_user(drive->bios_sect, (byte *) &loc->sectors)) return -EFAULT;
+                       if (put_user(drive->bios_cyl, (unsigned int *) &loc->cylinders)) return -EFAULT;
+                       if (put_user((unsigned)drive->part[MINOR(inode->i_rdev)&PARTN_MASK].start_sect,
+                               (unsigned long *) &loc->start)) return -EFAULT;
+                       return 0;
+               }
+
+               case HDIO_GETGEO_BIG_RAW:
+               {
+                       struct hd_big_geometry *loc = (struct hd_big_geometry *) arg;
+                       if (!loc || (drive->media != ide_disk && drive->media != ide_floppy)) return -EINVAL;
+                       if (put_user(drive->head, (byte *) &loc->heads)) return -EFAULT;
+                       if (put_user(drive->sect, (byte *) &loc->sectors)) return -EFAULT;
+                       if (put_user(drive->cyl, (unsigned int *) &loc->cylinders)) return -EFAULT;
+                       if (put_user((unsigned)drive->part[MINOR(inode->i_rdev)&PARTN_MASK].start_sect,
+                               (unsigned long *) &loc->start)) return -EFAULT;
+                       return 0;
+               }
+
+#if 0
+               case BLKGETSIZE:   /* Return device size */
+                       return put_user(drive->part[MINOR(inode->i_rdev)&PARTN_MASK].nr_sects, (unsigned long *) arg);
+               case BLKGETSIZE64:
+                       return put_user((u64)drive->part[MINOR(inode->i_rdev)&PARTN_MASK].nr_sects << 9, (u64 *) arg);
+
+               case BLKRRPART: /* Re-read partition tables */
+                       if (!capable(CAP_SYS_ADMIN)) return -EACCES;
+                       return ide_revalidate_disk(inode->i_rdev);
+#endif
+
+               case HDIO_OBSOLETE_IDENTITY:
+               case HDIO_GET_IDENTITY:
+                       if (MINOR(inode->i_rdev) & PARTN_MASK)
+                               return -EINVAL;
+                       if (drive->id == NULL)
+                               return -ENOMSG;
+                       if (copy_to_user((char *)arg, (char *)drive->id, (cmd == HDIO_GET_IDENTITY) ? sizeof(*drive->id) : 142))
+                               return -EFAULT;
+                       return 0;
+
+               case HDIO_GET_NICE:
+                       return put_user(drive->dsc_overlap      <<      IDE_NICE_DSC_OVERLAP    |
+                                       drive->atapi_overlap    <<      IDE_NICE_ATAPI_OVERLAP  |
+                                       drive->nice0            <<      IDE_NICE_0              |
+                                       drive->nice1            <<      IDE_NICE_1              |
+                                       drive->nice2            <<      IDE_NICE_2,
+                                       (long *) arg);
+
+#ifdef CONFIG_IDE_TASK_IOCTL
+               case HDIO_DRIVE_TASKFILE:
+                       if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RAWIO))
+                               return -EACCES;
+                       switch(drive->media) {
+                               case ide_disk:
+                                       return ide_taskfile_ioctl(drive, inode, file, cmd, arg);
+#ifdef CONFIG_PKT_TASK_IOCTL
+                               case ide_cdrom:
+                               case ide_tape:
+                               case ide_floppy:
+                                       return pkt_taskfile_ioctl(drive, inode, file, cmd, arg);
+#endif /* CONFIG_PKT_TASK_IOCTL */
+                               default:
+                                       return -ENOMSG;
+                       }
+#endif /* CONFIG_IDE_TASK_IOCTL */
+
+               case HDIO_DRIVE_CMD:
+               {
+                       byte args[4], *argbuf = args;
+                       byte xfer_rate = 0;
+                       int argsize = 4;
+                       ide_task_t tfargs;
+
+                       if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RAWIO))
+                               return -EACCES;
+                       if (NULL == (void *) arg)
+                               return ide_do_drive_cmd(drive, &rq, ide_wait);
+                       if (copy_from_user(args, (void *)arg, 4))
+                               return -EFAULT;
+
+                       tfargs.tfRegister[IDE_FEATURE_OFFSET] = args[2];
+                       tfargs.tfRegister[IDE_NSECTOR_OFFSET] = args[3];
+                       tfargs.tfRegister[IDE_SECTOR_OFFSET]  = args[1];
+                       tfargs.tfRegister[IDE_LCYL_OFFSET]    = 0x00;
+                       tfargs.tfRegister[IDE_HCYL_OFFSET]    = 0x00;
+                       tfargs.tfRegister[IDE_SELECT_OFFSET]  = 0x00;
+                       tfargs.tfRegister[IDE_COMMAND_OFFSET] = args[0];
+
+                       if (args[3]) {
+                               argsize = 4 + (SECTOR_WORDS * 4 * args[3]);
+                               argbuf = kmalloc(argsize, GFP_KERNEL);
+                               if (argbuf == NULL)
+                                       return -ENOMEM;
+                               memcpy(argbuf, args, 4);
+                       }
+
+                       if (set_transfer(drive, &tfargs)) {
+                               xfer_rate = args[1];
+                               if (ide_ata66_check(drive, &tfargs))
+                                       goto abort;
+                       }
+
+                       err = ide_wait_cmd(drive, args[0], args[1], args[2], args[3], argbuf);
+
+                       if (!err && xfer_rate) {
+                               /* active-retuning-calls future */
+                               if ((HWIF(drive)->speedproc) != NULL)
+                                       HWIF(drive)->speedproc(drive, xfer_rate);
+                               ide_driveid_update(drive);
+                       }
+               abort:
+                       if (copy_to_user((void *)arg, argbuf, argsize))
+                               err = -EFAULT;
+                       if (argsize > 4)
+                               kfree(argbuf);
+                       return err;
+               }
+               case HDIO_DRIVE_TASK:
+               {
+                       byte args[7], *argbuf = args;
+                       int argsize = 7;
+                       if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RAWIO)) return -EACCES;
+                       if (copy_from_user(args, (void *)arg, 7))
+                               return -EFAULT;
+                       err = ide_wait_cmd_task(drive, argbuf);
+                       if (copy_to_user((void *)arg, argbuf, argsize))
+                               err = -EFAULT;
+                       return err;
+               }
+               case HDIO_SCAN_HWIF:
+               {
+                       int args[3];
+                       if (!capable(CAP_SYS_ADMIN)) return -EACCES;
+                       if (copy_from_user(args, (void *)arg, 3 * sizeof(int)))
+                               return -EFAULT;
+                       if (ide_register(args[0], args[1], args[2]) == -1)
+                               return -EIO;
+                       return 0;
+               }
+               case HDIO_UNREGISTER_HWIF:
+                       if (!capable(CAP_SYS_ADMIN)) return -EACCES;
+                       /* (arg > MAX_HWIFS) checked in function */
+                       ide_unregister(arg);
+                       return 0;
+               case HDIO_SET_NICE:
+                       if (!capable(CAP_SYS_ADMIN)) return -EACCES;
+                       if (drive->driver == NULL)
+                               return -EPERM;
+                       if (arg != (arg & ((1 << IDE_NICE_DSC_OVERLAP) | (1 << IDE_NICE_1))))
+                               return -EPERM;
+                       drive->dsc_overlap = (arg >> IDE_NICE_DSC_OVERLAP) & 1;
+                       if (drive->dsc_overlap && !DRIVER(drive)->supports_dsc_overlap) {
+                               drive->dsc_overlap = 0;
+                               return -EPERM;
+                       }
+                       drive->nice1 = (arg >> IDE_NICE_1) & 1;
+                       return 0;
+               case HDIO_DRIVE_RESET:
+               {
+                       unsigned long flags;
+                       ide_hwgroup_t *hwgroup = HWGROUP(drive);
+
+                       if (!capable(CAP_SYS_ADMIN)) return -EACCES;
+#if 1
+                       spin_lock_irqsave(&io_request_lock, flags);
+                       if (hwgroup->handler != NULL) {
+                               printk("%s: ide_set_handler: handler not null; %p\n", drive->name, hwgroup->handler);
+                               (void) hwgroup->handler(drive);
+//                             hwgroup->handler = NULL;
+//                             hwgroup->expiry = NULL;
+                               hwgroup->timer.expires = jiffies + 0;;
+                               del_timer(&hwgroup->timer);
+                       }
+                       spin_unlock_irqrestore(&io_request_lock, flags);
+
+#endif
+                       (void) ide_do_reset(drive);
+                       if (drive->suspend_reset) {
+/*
+ *                             APM WAKE UP todo !!
+ *                             int nogoodpower = 1;
+ *                             while(nogoodpower) {
+ *                                     check_power1() or check_power2()
+ *                                     nogoodpower = 0;
+ *                             } 
+ *                             HWIF(drive)->multiproc(drive);
+ */
+                               return ide_revalidate_disk(inode->i_rdev);
+                       }
+                       return 0;
+               }
+#if 0
+               case BLKROSET:
+               case BLKROGET:
+               case BLKFLSBUF:
+               case BLKSSZGET:
+               case BLKPG:
+               case BLKELVGET:
+               case BLKELVSET:
+               case BLKBSZGET:
+               case BLKBSZSET:
+                       return blk_ioctl(inode->i_rdev, cmd, arg);
+#endif
+
+               case HDIO_GET_BUSSTATE:
+                       if (!capable(CAP_SYS_ADMIN))
+                               return -EACCES;
+                       if (put_user(HWIF(drive)->bus_state, (long *)arg))
+                               return -EFAULT;
+                       return 0;
+
+               case HDIO_SET_BUSSTATE:
+                       if (!capable(CAP_SYS_ADMIN))
+                               return -EACCES;
+                       if (HWIF(drive)->busproc)
+                               HWIF(drive)->busproc(drive, (int)arg);
+                       return 0;
+
+               default:
+                       if (drive->driver != NULL)
+                               return DRIVER(drive)->ioctl(drive, inode, file, cmd, arg);
+                       return -EPERM;
+       }
+}
+
+static int ide_check_media_change (kdev_t i_rdev)
+{
+       ide_drive_t *drive;
+
+       if ((drive = get_info_ptr(i_rdev)) == NULL)
+               return -ENODEV;
+       if (drive->driver != NULL)
+               return DRIVER(drive)->media_change(drive);
+       return 0;
+}
+
+void ide_fixstring (byte *s, const int bytecount, const int byteswap)
+{
+       byte *p = s, *end = &s[bytecount & ~1]; /* bytecount must be even */
+
+       if (byteswap) {
+               /* convert from big-endian to host byte order */
+               for (p = end ; p != s;) {
+                       unsigned short *pp = (unsigned short *) (p -= 2);
+                       *pp = ntohs(*pp);
+               }
+       }
+
+       /* strip leading blanks */
+       while (s != end && *s == ' ')
+               ++s;
+
+       /* compress internal blanks and strip trailing blanks */
+       while (s != end && *s) {
+               if (*s++ != ' ' || (s != end && *s && *s != ' '))
+                       *p++ = *(s-1);
+       }
+
+       /* wipe out trailing garbage */
+       while (p != end)
+               *p++ = '\0';
+}
+
+/*
+ * stridx() returns the offset of c within s,
+ * or -1 if c is '\0' or not found within s.
+ */
+static int __init stridx (const char *s, char c)
+{
+       char *i = strchr(s, c);
+       return (i && c) ? i - s : -1;
+}
+
+/*
+ * match_parm() does parsing for ide_setup():
+ *
+ * 1. the first char of s must be '='.
+ * 2. if the remainder matches one of the supplied keywords,
+ *     the index (1 based) of the keyword is negated and returned.
+ * 3. if the remainder is a series of no more than max_vals numbers
+ *     separated by commas, the numbers are saved in vals[] and a
+ *     count of how many were saved is returned.  Base10 is assumed,
+ *     and base16 is allowed when prefixed with "0x".
+ * 4. otherwise, zero is returned.
+ */
+static int __init match_parm (char *s, const char *keywords[], int vals[], int max_vals)
+{
+       static const char *decimal = "0123456789";
+       static const char *hex = "0123456789abcdef";
+       int i, n;
+
+       if (*s++ == '=') {
+               /*
+                * Try matching against the supplied keywords,
+                * and return -(index+1) if we match one
+                */
+               if (keywords != NULL) {
+                       for (i = 0; *keywords != NULL; ++i) {
+                               if (!strcmp(s, *keywords++))
+                                       return -(i+1);
+                       }
+               }
+               /*
+                * Look for a series of no more than "max_vals"
+                * numeric values separated by commas, in base10,
+                * or base16 when prefixed with "0x".
+                * Return a count of how many were found.
+                */
+               for (n = 0; (i = stridx(decimal, *s)) >= 0;) {
+                       vals[n] = i;
+                       while ((i = stridx(decimal, *++s)) >= 0)
+                               vals[n] = (vals[n] * 10) + i;
+                       if (*s == 'x' && !vals[n]) {
+                               while ((i = stridx(hex, *++s)) >= 0)
+                                       vals[n] = (vals[n] * 0x10) + i;
+                       }
+                       if (++n == max_vals)
+                               break;
+                       if (*s == ',' || *s == ';')
+                               ++s;
+               }
+               if (!*s)
+                       return n;
+       }
+       return 0;       /* zero = nothing matched */
+}
+
+/*
+ * ide_setup() gets called VERY EARLY during initialization,
+ * to handle kernel "command line" strings beginning with "hdx="
+ * or "ide".  Here is the complete set currently supported:
+ *
+ * "hdx="  is recognized for all "x" from "a" to "h", such as "hdc".
+ * "idex=" is recognized for all "x" from "0" to "3", such as "ide1".
+ *
+ * "hdx=noprobe"       : drive may be present, but do not probe for it
+ * "hdx=none"          : drive is NOT present, ignore cmos and do not probe
+ * "hdx=nowerr"                : ignore the WRERR_STAT bit on this drive
+ * "hdx=cdrom"         : drive is present, and is a cdrom drive
+ * "hdx=cyl,head,sect" : disk drive is present, with specified geometry
+ * "hdx=noremap"       : do not remap 0->1 even though EZD was detected
+ * "hdx=autotune"      : driver will attempt to tune interface speed
+ *                             to the fastest PIO mode supported,
+ *                             if possible for this drive only.
+ *                             Not fully supported by all chipset types,
+ *                             and quite likely to cause trouble with
+ *                             older/odd IDE drives.
+ *
+ * "hdx=slow"          : insert a huge pause after each access to the data
+ *                             port. Should be used only as a last resort.
+ *
+ * "hdx=swapdata"      : when the drive is a disk, byte swap all data
+ * "hdx=bswap"         : same as above..........
+ * "hdxlun=xx"          : set the drive last logical unit.
+ * "hdx=flash"         : allows for more than one ata_flash disk to be
+ *                             registered. In most cases, only one device
+ *                             will be present.
+ * "hdx=scsi"          : the return of the ide-scsi flag, this is useful for
+ *                             allowwing ide-floppy, ide-tape, and ide-cdrom|writers
+ *                             to use ide-scsi emulation on a device specific option.
+ * "idebus=xx"         : inform IDE driver of VESA/PCI bus speed in MHz,
+ *                             where "xx" is between 20 and 66 inclusive,
+ *                             used when tuning chipset PIO modes.
+ *                             For PCI bus, 25 is correct for a P75 system,
+ *                             30 is correct for P90,P120,P180 systems,
+ *                             and 33 is used for P100,P133,P166 systems.
+ *                             If in doubt, use idebus=33 for PCI.
+ *                             As for VLB, it is safest to not specify it.
+ *
+ * "idex=noprobe"      : do not attempt to access/use this interface
+ * "idex=base"         : probe for an interface at the addr specified,
+ *                             where "base" is usually 0x1f0 or 0x170
+ *                             and "ctl" is assumed to be "base"+0x206
+ * "idex=base,ctl"     : specify both base and ctl
+ * "idex=base,ctl,irq" : specify base, ctl, and irq number
+ * "idex=autotune"     : driver will attempt to tune interface speed
+ *                             to the fastest PIO mode supported,
+ *                             for all drives on this interface.
+ *                             Not fully supported by all chipset types,
+ *                             and quite likely to cause trouble with
+ *                             older/odd IDE drives.
+ * "idex=noautotune"   : driver will NOT attempt to tune interface speed
+ *                             This is the default for most chipsets,
+ *                             except the cmd640.
+ * "idex=serialize"    : do not overlap operations on idex and ide(x^1)
+ * "idex=four"         : four drives on idex and ide(x^1) share same ports
+ * "idex=reset"                : reset interface before first use
+ * "idex=dma"          : enable DMA by default on both drives if possible
+ * "idex=ata66"                : informs the interface that it has an 80c cable
+ *                             for chipsets that are ATA-66 capable, but
+ *                             the ablity to bit test for detection is
+ *                             currently unknown.
+ * "ide=reverse"       : Formerly called to pci sub-system, but now local.
+ *
+ * The following are valid ONLY on ide0, (except dc4030)
+ * and the defaults for the base,ctl ports must not be altered.
+ *
+ * "ide0=dtc2278"      : probe/support DTC2278 interface
+ * "ide0=ht6560b"      : probe/support HT6560B interface
+ * "ide0=cmd640_vlb"   : *REQUIRED* for VLB cards with the CMD640 chip
+ *                       (not for PCI -- automatically detected)
+ * "ide0=qd65xx"       : probe/support qd65xx interface
+ * "ide0=ali14xx"      : probe/support ali14xx chipsets (ALI M1439, M1443, M1445)
+ * "ide0=umc8672"      : probe/support umc8672 chipsets
+ * "idex=dc4030"       : probe/support Promise DC4030VL interface
+ * "ide=doubler"       : probe/support IDE doublers on Amiga
+ */
+int __init ide_setup (char *s)
+{
+       int i, vals[3];
+       ide_hwif_t *hwif;
+       ide_drive_t *drive;
+       unsigned int hw, unit;
+       const char max_drive = 'a' + ((MAX_HWIFS * MAX_DRIVES) - 1);
+       const char max_hwif  = '0' + (MAX_HWIFS - 1);
+
+       
+       if (strncmp(s,"hd",2) == 0 && s[2] == '=')      /* hd= is for hd.c   */
+               return 0;                               /* driver and not us */
+
+       if (strncmp(s,"ide",3) &&
+           strncmp(s,"idebus",6) &&
+           strncmp(s,"hd",2))          /* hdx= & hdxlun= */
+               return 0;
+
+       printk("ide_setup: %s", s);
+       init_ide_data ();
+
+#ifdef CONFIG_BLK_DEV_IDEDOUBLER
+       if (!strcmp(s, "ide=doubler")) {
+               extern int ide_doubler;
+
+               printk(" : Enabled support for IDE doublers\n");
+               ide_doubler = 1;
+               return 1;
+       }
+#endif /* CONFIG_BLK_DEV_IDEDOUBLER */
+
+       if (!strcmp(s, "ide=nodma")) {
+               printk("IDE: Prevented DMA\n");
+               noautodma = 1;
+               return 1;
+       }
+
+#ifdef CONFIG_BLK_DEV_IDEPCI
+       if (!strcmp(s, "ide=reverse")) {
+               ide_scan_direction = 1;
+               printk(" : Enabled support for IDE inverse scan order.\n");
+               return 1;
+       }
+#endif /* CONFIG_BLK_DEV_IDEPCI */
+
+       /*
+        * Look for drive options:  "hdx="
+        */
+       if (s[0] == 'h' && s[1] == 'd' && s[2] >= 'a' && s[2] <= max_drive) {
+               const char *hd_words[] = {"none", "noprobe", "nowerr", "cdrom",
+                               "serialize", "autotune", "noautotune",
+                               "slow", "swapdata", "bswap", "flash",
+                               "remap", "noremap", "scsi", NULL};
+               unit = s[2] - 'a';
+               hw   = unit / MAX_DRIVES;
+               unit = unit % MAX_DRIVES;
+               hwif = &ide_hwifs[hw];
+               drive = &hwif->drives[unit];
+               if (strncmp(s + 4, "ide-", 4) == 0) {
+                       strncpy(drive->driver_req, s + 4, 9);
+                       goto done;
+               }
+               /*
+                * Look for last lun option:  "hdxlun="
+                */
+               if (s[3] == 'l' && s[4] == 'u' && s[5] == 'n') {
+                       if (match_parm(&s[6], NULL, vals, 1) != 1)
+                               goto bad_option;
+                       if (vals[0] >= 0 && vals[0] <= 7) {
+                               drive->last_lun = vals[0];
+                               drive->forced_lun = 1;
+                       } else
+                               printk(" -- BAD LAST LUN! Expected value from 0 to 7");
+                       goto done;
+               }
+               switch (match_parm(&s[3], hd_words, vals, 3)) {
+                       case -1: /* "none" */
+                               drive->nobios = 1;  /* drop into "noprobe" */
+                       case -2: /* "noprobe" */
+                               drive->noprobe = 1;
+                               goto done;
+                       case -3: /* "nowerr" */
+                               drive->bad_wstat = BAD_R_STAT;
+                               hwif->noprobe = 0;
+                               goto done;
+                       case -4: /* "cdrom" */
+                               drive->present = 1;
+                               drive->media = ide_cdrom;
+                               hwif->noprobe = 0;
+                               goto done;
+                       case -5: /* "serialize" */
+                               printk(" -- USE \"ide%d=serialize\" INSTEAD", hw);
+                               goto do_serialize;
+                       case -6: /* "autotune" */
+                               drive->autotune = 1;
+                               goto done;
+                       case -7: /* "noautotune" */
+                               drive->autotune = 2;
+                               goto done;
+                       case -8: /* "slow" */
+                               drive->slow = 1;
+                               goto done;
+                       case -9: /* "swapdata" or "bswap" */
+                       case -10:
+                               drive->bswap = 1;
+                               goto done;
+                       case -11: /* "flash" */
+                               drive->ata_flash = 1;
+                               goto done;
+                       case -12: /* "remap" */
+                               drive->remap_0_to_1 = 1;
+                               goto done;
+                       case -13: /* "noremap" */
+                               drive->remap_0_to_1 = 2;
+                               goto done;
+                       case -14: /* "scsi" */
+#if defined(CONFIG_BLK_DEV_IDESCSI) && defined(CONFIG_SCSI)
+                               drive->scsi = 1;
+                               goto done;
+#else
+                               drive->scsi = 0;
+                               goto bad_option;
+#endif /* defined(CONFIG_BLK_DEV_IDESCSI) && defined(CONFIG_SCSI) */
+                       case 3: /* cyl,head,sect */
+                               drive->media    = ide_disk;
+                               drive->cyl      = drive->bios_cyl  = vals[0];
+                               drive->head     = drive->bios_head = vals[1];
+                               drive->sect     = drive->bios_sect = vals[2];
+                               drive->present  = 1;
+                               drive->forced_geom = 1;
+                               hwif->noprobe = 0;
+                               goto done;
+                       default:
+                               goto bad_option;
+               }
+       }
+
+       if (s[0] != 'i' || s[1] != 'd' || s[2] != 'e')
+               goto bad_option;
+       /*
+        * Look for bus speed option:  "idebus="
+        */
+       if (s[3] == 'b' && s[4] == 'u' && s[5] == 's') {
+               if (match_parm(&s[6], NULL, vals, 1) != 1)
+                       goto bad_option;
+               if (vals[0] >= 20 && vals[0] <= 66) {
+                       idebus_parameter = vals[0];
+               } else
+                       printk(" -- BAD BUS SPEED! Expected value from 20 to 66");
+               goto done;
+       }
+       /*
+        * Look for interface options:  "idex="
+        */
+       if (s[3] >= '0' && s[3] <= max_hwif) {
+               /*
+                * Be VERY CAREFUL changing this: note hardcoded indexes below
+                * -8,-9,-10 : are reserved for future idex calls to ease the hardcoding.
+                */
+               const char *ide_words[] = {
+                       "noprobe", "serialize", "autotune", "noautotune", "reset", "dma", "ata66",
+                       "minus8", "minus9", "minus10",
+                       "four", "qd65xx", "ht6560b", "cmd640_vlb", "dtc2278", "umc8672", "ali14xx", "dc4030", NULL };
+               hw = s[3] - '0';
+               hwif = &ide_hwifs[hw];
+               i = match_parm(&s[4], ide_words, vals, 3);
+
+               /*
+                * Cryptic check to ensure chipset not already set for hwif:
+                */
+               if (i > 0 || i <= -11) {                        /* is parameter a chipset name? */
+                       if (hwif->chipset != ide_unknown)
+                               goto bad_option;        /* chipset already specified */
+                       if (i <= -11 && i != -18 && hw != 0)
+                               goto bad_hwif;          /* chipset drivers are for "ide0=" only */
+                       if (i <= -11 && i != -18 && ide_hwifs[hw+1].chipset != ide_unknown)
+                               goto bad_option;        /* chipset for 2nd port already specified */
+                       printk("\n");
+               }
+
+               switch (i) {
+#ifdef CONFIG_BLK_DEV_PDC4030
+                       case -18: /* "dc4030" */
+                       {
+                               extern void init_pdc4030(void);
+                               init_pdc4030();
+                               goto done;
+                       }
+#endif /* CONFIG_BLK_DEV_PDC4030 */
+#ifdef CONFIG_BLK_DEV_ALI14XX
+                       case -17: /* "ali14xx" */
+                       {
+                               extern void init_ali14xx (void);
+                               init_ali14xx();
+                               goto done;
+                       }
+#endif /* CONFIG_BLK_DEV_ALI14XX */
+#ifdef CONFIG_BLK_DEV_UMC8672
+                       case -16: /* "umc8672" */
+                       {
+                               extern void init_umc8672 (void);
+                               init_umc8672();
+                               goto done;
+                       }
+#endif /* CONFIG_BLK_DEV_UMC8672 */
+#ifdef CONFIG_BLK_DEV_DTC2278
+                       case -15: /* "dtc2278" */
+                       {
+                               extern void init_dtc2278 (void);
+                               init_dtc2278();
+                               goto done;
+                       }
+#endif /* CONFIG_BLK_DEV_DTC2278 */
+#ifdef CONFIG_BLK_DEV_CMD640
+                       case -14: /* "cmd640_vlb" */
+                       {
+                               extern int cmd640_vlb; /* flag for cmd640.c */
+                               cmd640_vlb = 1;
+                               goto done;
+                       }
+#endif /* CONFIG_BLK_DEV_CMD640 */
+#ifdef CONFIG_BLK_DEV_HT6560B
+                       case -13: /* "ht6560b" */
+                       {
+                               extern void init_ht6560b (void);
+                               init_ht6560b();
+                               goto done;
+                       }
+#endif /* CONFIG_BLK_DEV_HT6560B */
+#if CONFIG_BLK_DEV_QD65XX
+                       case -12: /* "qd65xx" */
+                       {
+                               extern void init_qd65xx (void);
+                               init_qd65xx();
+                               goto done;
+                       }
+#endif /* CONFIG_BLK_DEV_QD65XX */
+#ifdef CONFIG_BLK_DEV_4DRIVES
+                       case -11: /* "four" drives on one set of ports */
+                       {
+                               ide_hwif_t *mate = &ide_hwifs[hw^1];
+                               mate->drives[0].select.all ^= 0x20;
+                               mate->drives[1].select.all ^= 0x20;
+                               hwif->chipset = mate->chipset = ide_4drives;
+                               mate->irq = hwif->irq;
+                               memcpy(mate->io_ports, hwif->io_ports, sizeof(hwif->io_ports));
+                               goto do_serialize;
+                       }
+#endif /* CONFIG_BLK_DEV_4DRIVES */
+                       case -10: /* minus10 */
+                       case -9: /* minus9 */
+                       case -8: /* minus8 */
+                               goto bad_option;
+                       case -7: /* ata66 */
+#ifdef CONFIG_BLK_DEV_IDEPCI
+                               hwif->udma_four = 1;
+                               goto done;
+#else /* !CONFIG_BLK_DEV_IDEPCI */
+                               hwif->udma_four = 0;
+                               goto bad_hwif;
+#endif /* CONFIG_BLK_DEV_IDEPCI */
+                       case -6: /* dma */
+                               hwif->autodma = 1;
+                               goto done;
+                       case -5: /* "reset" */
+                               hwif->reset = 1;
+                               goto done;
+                       case -4: /* "noautotune" */
+                               hwif->drives[0].autotune = 2;
+                               hwif->drives[1].autotune = 2;
+                               goto done;
+                       case -3: /* "autotune" */
+                               hwif->drives[0].autotune = 1;
+                               hwif->drives[1].autotune = 1;
+                               goto done;
+                       case -2: /* "serialize" */
+                       do_serialize:
+                               hwif->mate = &ide_hwifs[hw^1];
+                               hwif->mate->mate = hwif;
+                               hwif->serialized = hwif->mate->serialized = 1;
+                               goto done;
+
+                       case -1: /* "noprobe" */
+                               hwif->noprobe = 1;
+                               goto done;
+
+                       case 1: /* base */
+                               vals[1] = vals[0] + 0x206; /* default ctl */
+                       case 2: /* base,ctl */
+                               vals[2] = 0;    /* default irq = probe for it */
+                       case 3: /* base,ctl,irq */
+                               hwif->hw.irq = vals[2];
+                               ide_init_hwif_ports(&hwif->hw, (ide_ioreg_t) vals[0], (ide_ioreg_t) vals[1], &hwif->irq);
+                               memcpy(hwif->io_ports, hwif->hw.io_ports, sizeof(hwif->io_ports));
+                               hwif->irq      = vals[2];
+                               hwif->noprobe  = 0;
+                               hwif->chipset  = ide_generic;
+                               goto done;
+
+                       case 0: goto bad_option;
+                       default:
+                               printk(" -- SUPPORT NOT CONFIGURED IN THIS KERNEL\n");
+                               return 1;
+               }
+       }
+bad_option:
+       printk(" -- BAD OPTION\n");
+       return 1;
+bad_hwif:
+       printk("-- NOT SUPPORTED ON ide%d", hw);
+done:
+       printk("\n");
+       return 1;
+}
+
+/*
+ * probe_for_hwifs() finds/initializes "known" IDE interfaces
+ */
+static void __init probe_for_hwifs (void)
+{
+#ifdef CONFIG_PCI
+       if (pci_present())
+       {
+#ifdef CONFIG_BLK_DEV_IDEPCI
+               ide_scan_pcibus(ide_scan_direction);
+#else
+#ifdef CONFIG_BLK_DEV_RZ1000
+               {
+                       extern void ide_probe_for_rz100x(void);
+                       ide_probe_for_rz100x();
+               }
+#endif /* CONFIG_BLK_DEV_RZ1000 */
+#endif /* CONFIG_BLK_DEV_IDEPCI */
+       }
+#endif /* CONFIG_PCI */
+
+#ifdef CONFIG_ETRAX_IDE
+       {
+               extern void init_e100_ide(void);
+               init_e100_ide();
+       }
+#endif /* CONFIG_ETRAX_IDE */
+#ifdef CONFIG_BLK_DEV_CMD640
+       {
+               extern void ide_probe_for_cmd640x(void);
+               ide_probe_for_cmd640x();
+       }
+#endif /* CONFIG_BLK_DEV_CMD640 */
+#ifdef CONFIG_BLK_DEV_PDC4030
+       {
+               extern int ide_probe_for_pdc4030(void);
+               (void) ide_probe_for_pdc4030();
+       }
+#endif /* CONFIG_BLK_DEV_PDC4030 */
+#ifdef CONFIG_BLK_DEV_IDE_PMAC
+       {
+               extern void pmac_ide_probe(void);
+               pmac_ide_probe();
+       }
+#endif /* CONFIG_BLK_DEV_IDE_PMAC */
+#ifdef CONFIG_BLK_DEV_IDE_SWARM
+       {
+               extern void swarm_ide_probe(void);
+               swarm_ide_probe();
+       }
+#endif /* CONFIG_BLK_DEV_IDE_SWARM */
+#ifdef CONFIG_BLK_DEV_IDE_ICSIDE
+       {
+               extern void icside_init(void);
+               icside_init();
+       }
+#endif /* CONFIG_BLK_DEV_IDE_ICSIDE */
+#ifdef CONFIG_BLK_DEV_IDE_RAPIDE
+       {
+               extern void rapide_init(void);
+               rapide_init();
+       }
+#endif /* CONFIG_BLK_DEV_IDE_RAPIDE */
+#ifdef CONFIG_BLK_DEV_GAYLE
+       {
+               extern void gayle_init(void);
+               gayle_init();
+       }
+#endif /* CONFIG_BLK_DEV_GAYLE */
+#ifdef CONFIG_BLK_DEV_FALCON_IDE
+       {
+               extern void falconide_init(void);
+               falconide_init();
+       }
+#endif /* CONFIG_BLK_DEV_FALCON_IDE */
+#ifdef CONFIG_BLK_DEV_MAC_IDE
+       {
+               extern void macide_init(void);
+               macide_init();
+       }
+#endif /* CONFIG_BLK_DEV_MAC_IDE */
+#ifdef CONFIG_BLK_DEV_Q40IDE
+       {
+               extern void q40ide_init(void);
+               q40ide_init();
+       }
+#endif /* CONFIG_BLK_DEV_Q40IDE */
+#ifdef CONFIG_BLK_DEV_BUDDHA
+       {
+               extern void buddha_init(void);
+               buddha_init();
+       }
+#endif /* CONFIG_BLK_DEV_BUDDHA */
+#if defined(CONFIG_BLK_DEV_ISAPNP) && defined(CONFIG_ISAPNP)
+       {
+               extern void pnpide_init(int enable);
+               pnpide_init(1);
+       }
+#endif /* CONFIG_BLK_DEV_ISAPNP */
+}
+
+void __init ide_init_builtin_drivers (void)
+{
+       /*
+        * Probe for special PCI and other "known" interface chipsets
+        */
+       probe_for_hwifs ();
+
+#ifdef CONFIG_BLK_DEV_IDE
+#if defined(__mc68000__) || defined(CONFIG_APUS)
+       if (ide_hwifs[0].io_ports[IDE_DATA_OFFSET]) {
+               ide_get_lock(&ide_lock, NULL, NULL);    /* for atari only */
+               disable_irq(ide_hwifs[0].irq);  /* disable_irq_nosync ?? */
+//             disable_irq_nosync(ide_hwifs[0].irq);
+       }
+#endif /* __mc68000__ || CONFIG_APUS */
+
+       (void) ideprobe_init();
+
+#if defined(__mc68000__) || defined(CONFIG_APUS)
+       if (ide_hwifs[0].io_ports[IDE_DATA_OFFSET]) {
+               enable_irq(ide_hwifs[0].irq);
+               ide_release_lock(&ide_lock);    /* for atari only */
+       }
+#endif /* __mc68000__ || CONFIG_APUS */
+#endif /* CONFIG_BLK_DEV_IDE */
+
+#ifdef CONFIG_PROC_FS
+       proc_ide_create();
+#endif
+
+       /*
+        * Attempt to match drivers for the available drives
+        */
+#ifdef CONFIG_BLK_DEV_IDEDISK
+       (void) idedisk_init();
+#endif /* CONFIG_BLK_DEV_IDEDISK */
+#ifdef CONFIG_BLK_DEV_IDECD
+       (void) ide_cdrom_init();
+#endif /* CONFIG_BLK_DEV_IDECD */
+#ifdef CONFIG_BLK_DEV_IDETAPE
+       (void) idetape_init();
+#endif /* CONFIG_BLK_DEV_IDETAPE */
+#ifdef CONFIG_BLK_DEV_IDEFLOPPY
+       (void) idefloppy_init();
+#endif /* CONFIG_BLK_DEV_IDEFLOPPY */
+#ifdef CONFIG_BLK_DEV_IDESCSI
+ #ifdef CONFIG_SCSI
+       (void) idescsi_init();
+ #else
+    #warning ide scsi-emulation selected but no SCSI-subsystem in kernel
+ #endif
+#endif /* CONFIG_BLK_DEV_IDESCSI */
+}
+
+static int default_cleanup (ide_drive_t *drive)
+{
+       return ide_unregister_subdriver(drive);
+}
+
+static int default_standby (ide_drive_t *drive)
+{
+       return 0;
+}
+
+static int default_flushcache (ide_drive_t *drive)
+{
+       return 0;
+}
+
+static ide_startstop_t default_do_request(ide_drive_t *drive, struct request *rq, unsigned long block)
+{
+       ide_end_request(0, HWGROUP(drive));
+       return ide_stopped;
+}
+static void default_end_request (byte uptodate, ide_hwgroup_t *hwgroup)
+{
+       ide_end_request(uptodate, hwgroup);
+}
+  
+static int default_ioctl (ide_drive_t *drive, struct inode *inode, struct file *file,
+                         unsigned int cmd, unsigned long arg)
+{
+       return -EIO;
+}
+
+static int default_open (struct inode *inode, struct file *filp, ide_drive_t *drive)
+{
+       drive->usage--;
+       return -EIO;
+}
+
+static void default_release (struct inode *inode, struct file *filp, ide_drive_t *drive)
+{
+}
+
+static int default_check_media_change (ide_drive_t *drive)
+{
+       return 1;
+}
+
+static void default_pre_reset (ide_drive_t *drive)
+{
+}
+
+static unsigned long default_capacity (ide_drive_t *drive)
+{
+       return 0x7fffffff;
+}
+
+static ide_startstop_t default_special (ide_drive_t *drive)
+{
+       special_t *s = &drive->special;
+
+       s->all = 0;
+       drive->mult_req = 0;
+       return ide_stopped;
+}
+
+static int default_reinit (ide_drive_t *drive)
+{
+       printk(KERN_ERR "%s: does not support hotswap of device class !\n", drive->name);
+
+       return 0;
+}
+
+static void setup_driver_defaults (ide_drive_t *drive)
+{
+       ide_driver_t *d = drive->driver;
+
+       if (d->cleanup == NULL)         d->cleanup = default_cleanup;
+       if (d->standby == NULL)         d->standby = default_standby;
+       if (d->flushcache == NULL)      d->flushcache = default_flushcache;
+       if (d->do_request == NULL)      d->do_request = default_do_request;
+       if (d->end_request == NULL)     d->end_request = default_end_request;
+       if (d->ioctl == NULL)           d->ioctl = default_ioctl;
+       if (d->open == NULL)            d->open = default_open;
+       if (d->release == NULL)         d->release = default_release;
+       if (d->media_change == NULL)    d->media_change = default_check_media_change;
+       if (d->pre_reset == NULL)       d->pre_reset = default_pre_reset;
+       if (d->capacity == NULL)        d->capacity = default_capacity;
+       if (d->special == NULL)         d->special = default_special;
+       if (d->reinit == NULL)          d->reinit = default_reinit;
+}
+
+ide_drive_t *ide_scan_devices (byte media, const char *name, ide_driver_t *driver, int n)
+{
+       unsigned int unit, index, i;
+
+       for (index = 0, i = 0; index < MAX_HWIFS; ++index) {
+               ide_hwif_t *hwif = &ide_hwifs[index];
+               if (!hwif->present)
+                       continue;
+               for (unit = 0; unit < MAX_DRIVES; ++unit) {
+                       ide_drive_t *drive = &hwif->drives[unit];
+                       char *req = drive->driver_req;
+                       if (*req && !strstr(name, req))
+                               continue;
+                       if (drive->present && drive->media == media && drive->driver == driver && ++i > n)
+                               return drive;
+               }
+       }
+       return NULL;
+}
+
+int ide_register_subdriver (ide_drive_t *drive, ide_driver_t *driver, int version)
+{
+       unsigned long flags;
+       
+       save_flags(flags);              /* all CPUs */
+       cli();                          /* all CPUs */
+       if (version != IDE_SUBDRIVER_VERSION || !drive->present || drive->driver != NULL || drive->busy || drive->usage) {
+               restore_flags(flags);   /* all CPUs */
+               return 1;
+       }
+       drive->driver = driver;
+       setup_driver_defaults(drive);
+       restore_flags(flags);           /* all CPUs */
+       if (drive->autotune != 2) {
+               if (driver->supports_dma && HWIF(drive)->dmaproc != NULL) {
+                       /*
+                        * Force DMAing for the beginning of the check.
+                        * Some chipsets appear to do interesting things,
+                        * if not checked and cleared.
+                        *   PARANOIA!!!
+                        */
+                       (void) (HWIF(drive)->dmaproc(ide_dma_off_quietly, drive));
+                       (void) (HWIF(drive)->dmaproc(ide_dma_check, drive));
+               }
+               drive->dsc_overlap = (drive->next != drive && driver->supports_dsc_overlap);
+               drive->nice1 = 1;
+       }
+       drive->revalidate = 1;
+       drive->suspend_reset = 0;
+#ifdef CONFIG_PROC_FS
+       ide_add_proc_entries(drive->proc, generic_subdriver_entries, drive);
+       ide_add_proc_entries(drive->proc, driver->proc, drive);
+#endif
+       return 0;
+}
+
+int ide_unregister_subdriver (ide_drive_t *drive)
+{
+       unsigned long flags;
+       
+       save_flags(flags);              /* all CPUs */
+       cli();                          /* all CPUs */
+       if (drive->usage || drive->busy || drive->driver == NULL || DRIVER(drive)->busy) {
+               restore_flags(flags);   /* all CPUs */
+               return 1;
+       }
+#if defined(CONFIG_BLK_DEV_ISAPNP) && defined(CONFIG_ISAPNP) && defined(MODULE)
+       pnpide_init(0);
+#endif /* CONFIG_BLK_DEV_ISAPNP */
+#ifdef CONFIG_PROC_FS
+       ide_remove_proc_entries(drive->proc, DRIVER(drive)->proc);
+       ide_remove_proc_entries(drive->proc, generic_subdriver_entries);
+#endif
+       auto_remove_settings(drive);
+       drive->driver = NULL;
+       restore_flags(flags);           /* all CPUs */
+       return 0;
+}
+
+int ide_register_module (ide_module_t *module)
+{
+       ide_module_t *p = ide_modules;
+
+       while (p) {
+               if (p == module)
+                       return 1;
+               p = p->next;
+       }
+       module->next = ide_modules;
+       ide_modules = module;
+       revalidate_drives();
+       return 0;
+}
+
+void ide_unregister_module (ide_module_t *module)
+{
+       ide_module_t **p;
+
+       for (p = &ide_modules; (*p) && (*p) != module; p = &((*p)->next));
+       if (*p)
+               *p = (*p)->next;
+}
+
+struct block_device_operations ide_fops[] = {{
+       open:                   ide_open,
+       release:                ide_release,
+       ioctl:                  ide_ioctl,
+       check_media_change:     ide_check_media_change,
+       revalidate:             ide_revalidate_disk
+}};
+
+EXPORT_SYMBOL(ide_hwifs);
+EXPORT_SYMBOL(ide_register_module);
+EXPORT_SYMBOL(ide_unregister_module);
+EXPORT_SYMBOL(ide_spin_wait_hwgroup);
+
+/*
+ * Probe module
+ */
+#ifdef DEVFS_MUST_DIE
+devfs_handle_t ide_devfs_handle;
+#endif
+
+EXPORT_SYMBOL(ide_probe);
+EXPORT_SYMBOL(drive_is_flashcard);
+EXPORT_SYMBOL(ide_timer_expiry);
+EXPORT_SYMBOL(ide_intr);
+EXPORT_SYMBOL(ide_fops);
+EXPORT_SYMBOL(ide_get_queue);
+EXPORT_SYMBOL(ide_add_generic_settings);
+#ifdef DEVFS_MUST_DIE
+EXPORT_SYMBOL(ide_devfs_handle);
+#endif
+EXPORT_SYMBOL(do_ide_request);
+/*
+ * Driver module
+ */
+EXPORT_SYMBOL(ide_scan_devices);
+EXPORT_SYMBOL(ide_register_subdriver);
+EXPORT_SYMBOL(ide_unregister_subdriver);
+EXPORT_SYMBOL(ide_replace_subdriver);
+EXPORT_SYMBOL(ide_input_data);
+EXPORT_SYMBOL(ide_output_data);
+EXPORT_SYMBOL(atapi_input_bytes);
+EXPORT_SYMBOL(atapi_output_bytes);
+EXPORT_SYMBOL(drive_is_ready);
+EXPORT_SYMBOL(ide_set_handler);
+EXPORT_SYMBOL(ide_dump_status);
+EXPORT_SYMBOL(ide_error);
+EXPORT_SYMBOL(ide_fixstring);
+EXPORT_SYMBOL(ide_wait_stat);
+EXPORT_SYMBOL(ide_do_reset);
+EXPORT_SYMBOL(restart_request);
+EXPORT_SYMBOL(ide_init_drive_cmd);
+EXPORT_SYMBOL(ide_do_drive_cmd);
+EXPORT_SYMBOL(ide_end_drive_cmd);
+EXPORT_SYMBOL(ide_end_request);
+EXPORT_SYMBOL(ide_revalidate_disk);
+EXPORT_SYMBOL(ide_cmd);
+EXPORT_SYMBOL(ide_wait_cmd);
+EXPORT_SYMBOL(ide_wait_cmd_task);
+EXPORT_SYMBOL(ide_delay_50ms);
+EXPORT_SYMBOL(ide_stall_queue);
+#ifdef CONFIG_PROC_FS
+EXPORT_SYMBOL(ide_add_proc_entries);
+EXPORT_SYMBOL(ide_remove_proc_entries);
+EXPORT_SYMBOL(proc_ide_read_geometry);
+EXPORT_SYMBOL(create_proc_ide_interfaces);
+EXPORT_SYMBOL(recreate_proc_ide_device);
+EXPORT_SYMBOL(destroy_proc_ide_device);
+#endif
+EXPORT_SYMBOL(ide_add_setting);
+EXPORT_SYMBOL(ide_remove_setting);
+
+EXPORT_SYMBOL(ide_register_hw);
+EXPORT_SYMBOL(ide_register);
+EXPORT_SYMBOL(ide_unregister);
+EXPORT_SYMBOL(ide_setup_ports);
+EXPORT_SYMBOL(hwif_unregister);
+EXPORT_SYMBOL(get_info_ptr);
+EXPORT_SYMBOL(current_capacity);
+
+EXPORT_SYMBOL(system_bus_clock);
+
+EXPORT_SYMBOL(ide_reinit_drive);
+
+#if 0
+static int ide_notify_reboot (struct notifier_block *this, unsigned long event, void *x)
+{
+       ide_hwif_t *hwif;
+       ide_drive_t *drive;
+       int i, unit;
+
+       switch (event) {
+               case SYS_HALT:
+               case SYS_POWER_OFF:
+               case SYS_RESTART:
+                       break;
+               default:
+                       return NOTIFY_DONE;
+       }
+
+       printk("flushing ide devices: ");
+
+       for (i = 0; i < MAX_HWIFS; i++) {
+               hwif = &ide_hwifs[i];
+               if (!hwif->present)
+                       continue;
+               for (unit = 0; unit < MAX_DRIVES; ++unit) {
+                       drive = &hwif->drives[unit];
+                       if (!drive->present)
+                               continue;
+
+                       /* set the drive to standby */
+                       printk("%s ", drive->name);
+                       if (event != SYS_RESTART)
+                               if (drive->driver != NULL && DRIVER(drive)->standby(drive))
+                               continue;
+
+                       if (drive->driver != NULL && DRIVER(drive)->cleanup(drive))
+                               continue;
+               }
+       }
+       printk("\n");
+       return NOTIFY_DONE;
+}
+
+static struct notifier_block ide_notifier = {
+       ide_notify_reboot,
+       NULL,
+       5
+};
+#endif
+
+/*
+ * This is gets invoked once during initialization, to set *everything* up
+ */
+int __init ide_init (void)
+{
+       static char banner_printed;
+       int i;
+
+       if (!banner_printed) {
+               printk(KERN_INFO "Uniform Multi-Platform E-IDE driver " REVISION "\n");
+#ifdef DEVFS_MUST_DIE
+               ide_devfs_handle = devfs_mk_dir (NULL, "ide", NULL);
+#endif
+               system_bus_speed = ide_system_bus_speed();
+               banner_printed = 1;
+       }
+
+       init_ide_data ();
+
+       initializing = 1;
+       ide_init_builtin_drivers();
+       initializing = 0;
+
+       for (i = 0; i < MAX_HWIFS; ++i) {
+               ide_hwif_t  *hwif = &ide_hwifs[i];
+               if (hwif->present)
+                       ide_geninit(hwif);
+       }
+
+       /*register_reboot_notifier(&ide_notifier);*/
+       return 0;
+}
+
+#ifdef MODULE
+char *options = NULL;
+MODULE_PARM(options,"s");
+MODULE_LICENSE("GPL");
+
+static void __init parse_options (char *line)
+{
+       char *next = line;
+
+       if (line == NULL || !*line)
+               return;
+       while ((line = next) != NULL) {
+               if ((next = strchr(line,' ')) != NULL)
+                       *next++ = 0;
+               if (!ide_setup(line))
+                       printk ("Unknown option '%s'\n", line);
+       }
+}
+
+int init_module (void)
+{
+       parse_options(options);
+       return ide_init();
+}
+
+void cleanup_module (void)
+{
+       int index;
+
+       /*unregister_reboot_notifier(&ide_notifier);*/
+       for (index = 0; index < MAX_HWIFS; ++index) {
+               ide_unregister(index);
+#if defined(CONFIG_BLK_DEV_IDEDMA) && !defined(CONFIG_DMA_NONPCI)
+               if (ide_hwifs[index].dma_base)
+                       (void) ide_release_dma(&ide_hwifs[index]);
+#endif /* (CONFIG_BLK_DEV_IDEDMA) && !(CONFIG_DMA_NONPCI) */
+       }
+
+#ifdef CONFIG_PROC_FS
+       proc_ide_destroy();
+#endif
+#ifdef DEVFS_MUST_DIE
+       devfs_unregister (ide_devfs_handle);
+#endif
+}
+
+#else /* !MODULE */
+
+__setup("", ide_setup);
+
+#endif /* MODULE */
diff --git a/xen-2.4.16/drivers/ide/ide_modes.h b/xen-2.4.16/drivers/ide/ide_modes.h
new file mode 100644 (file)
index 0000000..16b8cf1
--- /dev/null
@@ -0,0 +1,236 @@
+/*
+ *  linux/drivers/ide/ide_modes.h
+ *
+ *  Copyright (C) 1996  Linus Torvalds, Igor Abramov, and Mark Lord
+ */
+
+#ifndef _IDE_MODES_H
+#define _IDE_MODES_H
+
+#include <linux/config.h>
+
+/*
+ * Shared data/functions for determining best PIO mode for an IDE drive.
+ * Most of this stuff originally lived in cmd640.c, and changes to the
+ * ide_pio_blacklist[] table should be made with EXTREME CAUTION to avoid
+ * breaking the fragile cmd640.c support.
+ */
+
+#ifdef CONFIG_BLK_DEV_IDE_MODES
+
+/*
+ * Standard (generic) timings for PIO modes, from ATA2 specification.
+ * These timings are for access to the IDE data port register *only*.
+ * Some drives may specify a mode, while also specifying a different
+ * value for cycle_time (from drive identification data).
+ */
+typedef struct ide_pio_timings_s {
+       int     setup_time;     /* Address setup (ns) minimum */
+       int     active_time;    /* Active pulse (ns) minimum */
+       int     cycle_time;     /* Cycle time (ns) minimum = (setup + active + recovery) */
+} ide_pio_timings_t;
+
+typedef struct ide_pio_data_s {
+       byte pio_mode;
+       byte use_iordy;
+       byte overridden;
+       byte blacklisted;
+       unsigned int cycle_time;
+} ide_pio_data_t;
+       
+#ifndef _IDE_C
+
+int ide_scan_pio_blacklist (char *model);
+byte ide_get_best_pio_mode (ide_drive_t *drive, byte mode_wanted, byte max_mode, ide_pio_data_t *d);
+extern const ide_pio_timings_t ide_pio_timings[6];
+
+#else /* _IDE_C */
+
+const ide_pio_timings_t ide_pio_timings[6] = {
+       { 70,   165,    600 },  /* PIO Mode 0 */
+       { 50,   125,    383 },  /* PIO Mode 1 */
+       { 30,   100,    240 },  /* PIO Mode 2 */
+       { 30,   80,     180 },  /* PIO Mode 3 with IORDY */
+       { 25,   70,     120 },  /* PIO Mode 4 with IORDY */
+       { 20,   50,     100 }   /* PIO Mode 5 with IORDY (nonstandard) */
+};
+
+/*
+ * Black list. Some drives incorrectly report their maximal PIO mode,
+ * at least in respect to CMD640. Here we keep info on some known drives.
+ */
+static struct ide_pio_info {
+       const char      *name;
+       int             pio;
+} ide_pio_blacklist [] = {
+/*     { "Conner Peripherals 1275MB - CFS1275A", 4 }, */
+       { "Conner Peripherals 540MB - CFS540A", 3 },
+
+       { "WDC AC2700",  3 },
+       { "WDC AC2540",  3 },
+       { "WDC AC2420",  3 },
+       { "WDC AC2340",  3 },
+       { "WDC AC2250",  0 },
+       { "WDC AC2200",  0 },
+       { "WDC AC21200", 4 },
+       { "WDC AC2120",  0 },
+       { "WDC AC2850",  3 },
+       { "WDC AC1270",  3 },
+       { "WDC AC1170",  1 },
+       { "WDC AC1210",  1 },
+       { "WDC AC280",   0 },
+/*     { "WDC AC21000", 4 }, */
+       { "WDC AC31000", 3 },
+       { "WDC AC31200", 3 },
+/*     { "WDC AC31600", 4 }, */
+
+       { "Maxtor 7131 AT", 1 },
+       { "Maxtor 7171 AT", 1 },
+       { "Maxtor 7213 AT", 1 },
+       { "Maxtor 7245 AT", 1 },
+       { "Maxtor 7345 AT", 1 },
+       { "Maxtor 7546 AT", 3 },
+       { "Maxtor 7540 AV", 3 },
+
+       { "SAMSUNG SHD-3121A", 1 },
+       { "SAMSUNG SHD-3122A", 1 },
+       { "SAMSUNG SHD-3172A", 1 },
+
+/*     { "ST51080A", 4 },
+ *     { "ST51270A", 4 },
+ *     { "ST31220A", 4 },
+ *     { "ST31640A", 4 },
+ *     { "ST32140A", 4 },
+ *     { "ST3780A",  4 },
+ */
+       { "ST5660A",  3 },
+       { "ST3660A",  3 },
+       { "ST3630A",  3 },
+       { "ST3655A",  3 },
+       { "ST3391A",  3 },
+       { "ST3390A",  1 },
+       { "ST3600A",  1 },
+       { "ST3290A",  0 },
+       { "ST3144A",  0 },
+       { "ST3491A",  1 },      /* reports 3, should be 1 or 2 (depending on */ 
+                               /* drive) according to Seagates FIND-ATA program */
+
+       { "QUANTUM ELS127A", 0 },
+       { "QUANTUM ELS170A", 0 },
+       { "QUANTUM LPS240A", 0 },
+       { "QUANTUM LPS210A", 3 },
+       { "QUANTUM LPS270A", 3 },
+       { "QUANTUM LPS365A", 3 },
+       { "QUANTUM LPS540A", 3 },
+       { "QUANTUM LIGHTNING 540A", 3 },
+       { "QUANTUM LIGHTNING 730A", 3 },
+
+        { "QUANTUM FIREBALL_540", 3 }, /* Older Quantum Fireballs don't work */
+        { "QUANTUM FIREBALL_640", 3 }, 
+        { "QUANTUM FIREBALL_1080", 3 },
+        { "QUANTUM FIREBALL_1280", 3 },
+       { NULL, 0 }
+};
+
+/*
+ * This routine searches the ide_pio_blacklist for an entry
+ * matching the start/whole of the supplied model name.
+ *
+ * Returns -1 if no match found.
+ * Otherwise returns the recommended PIO mode from ide_pio_blacklist[].
+ */
+int ide_scan_pio_blacklist (char *model)
+{
+       struct ide_pio_info *p;
+
+       for (p = ide_pio_blacklist; p->name != NULL; p++) {
+               if (strncmp(p->name, model, strlen(p->name)) == 0)
+                       return p->pio;
+       }
+       return -1;
+}
+
+/*
+ * This routine returns the recommended PIO settings for a given drive,
+ * based on the drive->id information and the ide_pio_blacklist[].
+ * This is used by most chipset support modules when "auto-tuning".
+ */
+
+/*
+ * Drive PIO mode auto selection
+ */
+byte ide_get_best_pio_mode (ide_drive_t *drive, byte mode_wanted, byte max_mode, ide_pio_data_t *d)
+{
+       int pio_mode;
+       int cycle_time = 0;
+       int use_iordy = 0;
+       struct hd_driveid* id = drive->id;
+       int overridden  = 0;
+       int blacklisted = 0;
+
+       if (mode_wanted != 255) {
+               pio_mode = mode_wanted;
+       } else if (!drive->id) {
+               pio_mode = 0;
+       } else if ((pio_mode = ide_scan_pio_blacklist(id->model)) != -1) {
+               overridden = 1;
+               blacklisted = 1;
+               use_iordy = (pio_mode > 2);
+       } else {
+               pio_mode = id->tPIO;
+               if (pio_mode > 2) {     /* 2 is maximum allowed tPIO value */
+                       pio_mode = 2;
+                       overridden = 1;
+               }
+               if (id->field_valid & 2) {        /* drive implements ATA2? */
+                       if (id->capability & 8) { /* drive supports use_iordy? */
+                               use_iordy = 1;
+                               cycle_time = id->eide_pio_iordy;
+                               if (id->eide_pio_modes & 7) {
+                                       overridden = 0;
+                                       if (id->eide_pio_modes & 4)
+                                               pio_mode = 5;
+                                       else if (id->eide_pio_modes & 2)
+                                               pio_mode = 4;
+                                       else
+                                               pio_mode = 3;
+                               }
+                       } else {
+                               cycle_time = id->eide_pio;
+                       }
+               }
+
+#if 0
+               if (drive->id->major_rev_num & 0x0004) printk("ATA-2 ");
+#endif
+
+               /*
+                * Conservative "downgrade" for all pre-ATA2 drives
+                */
+               if (pio_mode && pio_mode < 4) {
+                       pio_mode--;
+                       overridden = 1;
+#if 0
+                       use_iordy = (pio_mode > 2);
+#endif
+                       if (cycle_time && cycle_time < ide_pio_timings[pio_mode].cycle_time)
+                               cycle_time = 0; /* use standard timing */
+               }
+       }
+       if (pio_mode > max_mode) {
+               pio_mode = max_mode;
+               cycle_time = 0;
+       }
+       if (d) {
+               d->pio_mode = pio_mode;
+               d->cycle_time = cycle_time ? cycle_time : ide_pio_timings[pio_mode].cycle_time;
+               d->use_iordy = use_iordy;
+               d->overridden = overridden;
+               d->blacklisted = blacklisted;
+       }
+       return pio_mode;
+}
+
+#endif /* _IDE_C */
+#endif /* CONFIG_BLK_DEV_IDE_MODES */
+#endif /* _IDE_MODES_H */
diff --git a/xen-2.4.16/drivers/net/3c509.c b/xen-2.4.16/drivers/net/3c509.c
new file mode 100644 (file)
index 0000000..8729f39
--- /dev/null
@@ -0,0 +1,1068 @@
+/* 3c509.c: A 3c509 EtherLink3 ethernet driver for linux. */
+/*
+       Written 1993-2000 by Donald Becker.
+
+       Copyright 1994-2000 by Donald Becker.
+       Copyright 1993 United States Government as represented by the
+       Director, National Security Agency.      This software may be used and
+       distributed according to the terms of the GNU General Public License,
+       incorporated herein by reference.
+
+       This driver is for the 3Com EtherLinkIII series.
+
+       The author may be reached as becker@scyld.com, or C/O
+       Scyld Computing Corporation
+       410 Severn Ave., Suite 210
+       Annapolis MD 21403
+
+       Known limitations:
+       Because of the way 3c509 ISA detection works it's difficult to predict
+       a priori which of several ISA-mode cards will be detected first.
+
+       This driver does not use predictive interrupt mode, resulting in higher
+       packet latency but lower overhead.  If interrupts are disabled for an
+       unusually long time it could also result in missed packets, but in
+       practice this rarely happens.
+
+
+       FIXES:
+               Alan Cox:       Removed the 'Unexpected interrupt' bug.
+               Michael Meskes: Upgraded to Donald Becker's version 1.07.
+               Alan Cox:       Increased the eeprom delay. Regardless of 
+                               what the docs say some people definitely
+                               get problems with lower (but in card spec)
+                               delays
+               v1.10 4/21/97 Fixed module code so that multiple cards may be detected,
+                               other cleanups.  -djb
+               Andrea Arcangeli:       Upgraded to Donald Becker's version 1.12.
+               Rick Payne:     Fixed SMP race condition
+               v1.13 9/8/97 Made 'max_interrupt_work' an insmod-settable variable -djb
+               v1.14 10/15/97 Avoided waiting..discard message for fast machines -djb
+               v1.15 1/31/98 Faster recovery for Tx errors. -djb
+               v1.16 2/3/98 Different ID port handling to avoid sound cards. -djb
+               v1.18 12Mar2001 Andrew Morton <andrewm@uow.edu.au>
+                       - Avoid bogus detect of 3c590's (Andrzej Krzysztofowicz)
+                       - Reviewed against 1.18 from scyld.com
+*/
+
+/* A few values that may be tweaked. */
+
+/* Time in jiffies before concluding the transmitter is hung. */
+#define TX_TIMEOUT  (400*HZ/1000)
+/* Maximum events (Rx packets, etc.) to handle at each interrupt. */
+static int max_interrupt_work = 10;
+
+#include <linux/config.h>
+#include <linux/module.h>
+
+//#include <linux/mca.h>
+//#include <linux/isapnp.h>
+#include <linux/sched.h>
+//#include <linux/string.h>
+#include <linux/lib.h>
+#include <linux/interrupt.h>
+#include <linux/errno.h>
+//#include <linux/in.h>
+#include <linux/slab.h>
+#include <linux/ioport.h>
+#include <linux/init.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/skbuff.h>
+#include <linux/delay.h>       /* for udelay() */
+#include <linux/spinlock.h>
+
+#include <asm/bitops.h>
+#include <asm/io.h>
+#include <asm/irq.h>
+
+static char versionA[] __initdata = "3c509.c:1.18 12Mar2001 becker@scyld.com\n";
+static char versionB[] __initdata = "http://www.scyld.com/network/3c509.html\n";
+
+#ifdef EL3_DEBUG
+static int el3_debug = EL3_DEBUG;
+#else
+static int el3_debug = 2;
+#endif
+
+/* To minimize the size of the driver source I only define operating
+   constants if they are used several times.  You'll need the manual
+   anyway if you want to understand driver details. */
+/* Offsets from base I/O address. */
+#define EL3_DATA 0x00
+#define EL3_CMD 0x0e
+#define EL3_STATUS 0x0e
+#define         EEPROM_READ 0x80
+
+#define EL3_IO_EXTENT  16
+
+#define EL3WINDOW(win_num) outw(SelectWindow + (win_num), ioaddr + EL3_CMD)
+
+
+/* The top five bits written to EL3_CMD are a command, the lower
+   11 bits are the parameter, if applicable. */
+enum c509cmd {
+       TotalReset = 0<<11, SelectWindow = 1<<11, StartCoax = 2<<11,
+       RxDisable = 3<<11, RxEnable = 4<<11, RxReset = 5<<11, RxDiscard = 8<<11,
+       TxEnable = 9<<11, TxDisable = 10<<11, TxReset = 11<<11,
+       FakeIntr = 12<<11, AckIntr = 13<<11, SetIntrEnb = 14<<11,
+       SetStatusEnb = 15<<11, SetRxFilter = 16<<11, SetRxThreshold = 17<<11,
+       SetTxThreshold = 18<<11, SetTxStart = 19<<11, StatsEnable = 21<<11,
+       StatsDisable = 22<<11, StopCoax = 23<<11,};
+
+enum c509status {
+       IntLatch = 0x0001, AdapterFailure = 0x0002, TxComplete = 0x0004,
+       TxAvailable = 0x0008, RxComplete = 0x0010, RxEarly = 0x0020,
+       IntReq = 0x0040, StatsFull = 0x0080, CmdBusy = 0x1000, };
+
+/* The SetRxFilter command accepts the following classes: */
+enum RxFilter {
+       RxStation = 1, RxMulticast = 2, RxBroadcast = 4, RxProm = 8 };
+
+/* Register window 1 offsets, the window used in normal operation. */
+#define TX_FIFO                0x00
+#define RX_FIFO                0x00
+#define RX_STATUS      0x08
+#define TX_STATUS      0x0B
+#define TX_FREE                0x0C            /* Remaining free bytes in Tx buffer. */
+
+#define WN0_IRQ                0x08            /* Window 0: Set IRQ line in bits 12-15. */
+#define WN4_MEDIA      0x0A            /* Window 4: Various transcvr/media bits. */
+#define  MEDIA_TP      0x00C0          /* Enable link beat and jabber for 10baseT. */
+
+/*
+ * Must be a power of two (we use a binary and in the
+ * circular queue)
+ */
+#define SKB_QUEUE_SIZE 64
+
+struct el3_private {
+       struct net_device_stats stats;
+       struct net_device *next_dev;
+       spinlock_t lock;
+       /* skb send-queue */
+       int head, size;
+       struct sk_buff *queue[SKB_QUEUE_SIZE];
+       char mca_slot;
+};
+static int id_port __initdata = 0x110; /* Start with 0x110 to avoid new sound cards.*/
+static struct net_device *el3_root_dev;
+
+static ushort id_read_eeprom(int index);
+static ushort read_eeprom(int ioaddr, int index);
+static int el3_open(struct net_device *dev);
+static int el3_start_xmit(struct sk_buff *skb, struct net_device *dev);
+static void el3_interrupt(int irq, void *dev_id, struct pt_regs *regs);
+static void update_stats(struct net_device *dev);
+static struct net_device_stats *el3_get_stats(struct net_device *dev);
+static int el3_rx(struct net_device *dev);
+static int el3_close(struct net_device *dev);
+static void set_multicast_list(struct net_device *dev);
+static void el3_tx_timeout (struct net_device *dev);
+
+#ifdef CONFIG_MCA
+struct el3_mca_adapters_struct {
+       char* name;
+       int id;
+};
+
+static struct el3_mca_adapters_struct el3_mca_adapters[] __initdata = {
+       { "3Com 3c529 EtherLink III (10base2)", 0x627c },
+       { "3Com 3c529 EtherLink III (10baseT)", 0x627d },
+       { "3Com 3c529 EtherLink III (test mode)", 0x62db },
+       { "3Com 3c529 EtherLink III (TP or coax)", 0x62f6 },
+       { "3Com 3c529 EtherLink III (TP)", 0x62f7 },
+       { NULL, 0 },
+};
+#endif /* CONFIG_MCA */
+
+#if defined(CONFIG_ISAPNP) || defined(CONFIG_ISAPNP_MODULE)
+static struct isapnp_device_id el3_isapnp_adapters[] __initdata = {
+       {       ISAPNP_ANY_ID, ISAPNP_ANY_ID,
+               ISAPNP_VENDOR('T', 'C', 'M'), ISAPNP_FUNCTION(0x5090),
+               (long) "3Com Etherlink III (TP)" },
+       {       ISAPNP_ANY_ID, ISAPNP_ANY_ID,
+               ISAPNP_VENDOR('T', 'C', 'M'), ISAPNP_FUNCTION(0x5091),
+               (long) "3Com Etherlink III" },
+       {       ISAPNP_ANY_ID, ISAPNP_ANY_ID,
+               ISAPNP_VENDOR('T', 'C', 'M'), ISAPNP_FUNCTION(0x5094),
+               (long) "3Com Etherlink III (combo)" },
+       {       ISAPNP_ANY_ID, ISAPNP_ANY_ID,
+               ISAPNP_VENDOR('T', 'C', 'M'), ISAPNP_FUNCTION(0x5095),
+               (long) "3Com Etherlink III (TPO)" },
+       {       ISAPNP_ANY_ID, ISAPNP_ANY_ID,
+               ISAPNP_VENDOR('T', 'C', 'M'), ISAPNP_FUNCTION(0x5098),
+               (long) "3Com Etherlink III (TPC)" },
+       {       ISAPNP_ANY_ID, ISAPNP_ANY_ID,
+               ISAPNP_VENDOR('P', 'N', 'P'), ISAPNP_FUNCTION(0x80f7),
+               (long) "3Com Etherlink III compatible" },
+       {       ISAPNP_ANY_ID, ISAPNP_ANY_ID,
+               ISAPNP_VENDOR('P', 'N', 'P'), ISAPNP_FUNCTION(0x80f8),
+               (long) "3Com Etherlink III compatible" },
+       { }     /* terminate list */
+};
+
+MODULE_DEVICE_TABLE(isapnp, el3_isapnp_adapters);
+MODULE_LICENSE("GPL");
+
+
+static u16 el3_isapnp_phys_addr[8][3];
+static int nopnp;
+#endif /* CONFIG_ISAPNP || CONFIG_ISAPNP_MODULE */
+
+int __init el3_probe(struct net_device *dev)
+{
+       struct el3_private *lp;
+       short lrs_state = 0xff, i;
+       int ioaddr, irq, if_port;
+       u16 phys_addr[3];
+       static int current_tag;
+       int mca_slot = -1;
+#if defined(CONFIG_ISAPNP) || defined(CONFIG_ISAPNP_MODULE)
+       static int pnp_cards;
+#endif /* CONFIG_ISAPNP || CONFIG_ISAPNP_MODULE */
+
+       if (dev) SET_MODULE_OWNER(dev);
+
+       /* First check all slots of the EISA bus.  The next slot address to
+          probe is kept in 'eisa_addr' to support multiple probe() calls. */
+       if (EISA_bus) {
+               static int eisa_addr = 0x1000;
+               while (eisa_addr < 0x9000) {
+                       int device_id;
+
+                       ioaddr = eisa_addr;
+                       eisa_addr += 0x1000;
+
+                       /* Check the standard EISA ID register for an encoded '3Com'. */
+                       if (inw(ioaddr + 0xC80) != 0x6d50)
+                               continue;
+
+                       /* Avoid conflict with 3c590, 3c592, 3c597, etc */
+                       device_id = (inb(ioaddr + 0xC82)<<8) + inb(ioaddr + 0xC83);
+                       if ((device_id & 0xFF00) == 0x5900) {
+                               continue;
+                       }
+
+                       /* Change the register set to the configuration window 0. */
+                       outw(SelectWindow | 0, ioaddr + 0xC80 + EL3_CMD);
+
+                       irq = inw(ioaddr + WN0_IRQ) >> 12;
+                       if_port = inw(ioaddr + 6)>>14;
+                       for (i = 0; i < 3; i++)
+                               phys_addr[i] = htons(read_eeprom(ioaddr, i));
+
+                       /* Restore the "Product ID" to the EEPROM read register. */
+                       read_eeprom(ioaddr, 3);
+
+                       /* Was the EISA code an add-on hack?  Nahhhhh... */
+                       goto found;
+               }
+       }
+
+#ifdef CONFIG_MCA
+       /* Based on Erik Nygren's (nygren@mit.edu) 3c529 patch, heavily
+        * modified by Chris Beauregard (cpbeaure@csclub.uwaterloo.ca)
+        * to support standard MCA probing.
+        *
+        * redone for multi-card detection by ZP Gu (zpg@castle.net)
+        * now works as a module
+        */
+
+       if( MCA_bus ) {
+               int slot, j;
+               u_char pos4, pos5;
+
+               for( j = 0; el3_mca_adapters[j].name != NULL; j ++ ) {
+                       slot = 0;
+                       while( slot != MCA_NOTFOUND ) {
+                               slot = mca_find_unused_adapter(
+                                       el3_mca_adapters[j].id, slot );
+                               if( slot == MCA_NOTFOUND ) break;
+
+                               /* if we get this far, an adapter has been
+                                * detected and is enabled
+                                */
+
+                               pos4 = mca_read_stored_pos( slot, 4 );
+                               pos5 = mca_read_stored_pos( slot, 5 );
+
+                               ioaddr = ((short)((pos4&0xfc)|0x02)) << 8;
+                               irq = pos5 & 0x0f;
+
+                               /* probing for a card at a particular IO/IRQ */
+                               if(dev && ((dev->irq >= 1 && dev->irq != irq) ||
+                               (dev->base_addr >= 1 && dev->base_addr != ioaddr))) {
+                                       slot++;         /* probing next slot */
+                                       continue;
+                               }
+
+                               printk("3c509: found %s at slot %d\n",
+                                       el3_mca_adapters[j].name, slot + 1 );
+
+                               /* claim the slot */
+                               mca_set_adapter_name(slot, el3_mca_adapters[j].name);
+                               mca_set_adapter_procfn(slot, NULL, NULL);
+                               mca_mark_as_used(slot);
+
+                               if_port = pos4 & 0x03;
+                               if (el3_debug > 2) {
+                                       printk("3c529: irq %d  ioaddr 0x%x  ifport %d\n", irq, ioaddr, if_port);
+                               }
+                               EL3WINDOW(0);
+                               for (i = 0; i < 3; i++) {
+                                       phys_addr[i] = htons(read_eeprom(ioaddr, i));
+                               }
+                               
+                               mca_slot = slot;
+
+                               goto found;
+                       }
+               }
+               /* if we get here, we didn't find an MCA adapter */
+               return -ENODEV;
+       }
+#endif /* CONFIG_MCA */
+
+#if defined(CONFIG_ISAPNP) || defined(CONFIG_ISAPNP_MODULE)
+       if (nopnp == 1)
+               goto no_pnp;
+
+       for (i=0; el3_isapnp_adapters[i].vendor != 0; i++) {
+               struct pci_dev *idev = NULL;
+               int j;
+               while ((idev = isapnp_find_dev(NULL,
+                                               el3_isapnp_adapters[i].vendor,
+                                               el3_isapnp_adapters[i].function,
+                                               idev))) {
+                       idev->prepare(idev);
+                       /* Deactivation is needed if the driver was called
+                          with "nopnp=1" before, does not harm if not. */
+                       idev->deactivate(idev);
+                       idev->activate(idev);
+                       if (!idev->resource[0].start || check_region(idev->resource[0].start, EL3_IO_EXTENT))
+                               continue;
+                       ioaddr = idev->resource[0].start;
+                       if (!request_region(ioaddr, EL3_IO_EXTENT, "3c509 PnP"))
+                               return -EBUSY;
+                       irq = idev->irq_resource[0].start;
+                       if (el3_debug > 3)
+                               printk ("ISAPnP reports %s at i/o 0x%x, irq %d\n",
+                                       (char*) el3_isapnp_adapters[i].driver_data, ioaddr, irq);
+                       EL3WINDOW(0);
+                       for (j = 0; j < 3; j++)
+                               el3_isapnp_phys_addr[pnp_cards][j] =
+                                       phys_addr[j] =
+                                               htons(read_eeprom(ioaddr, j));
+                       if_port = read_eeprom(ioaddr, 8) >> 14;
+                       pnp_cards++;
+                       goto found;
+               }
+       }
+no_pnp:
+#endif /* CONFIG_ISAPNP || CONFIG_ISAPNP_MODULE */
+
+       /* Select an open I/O location at 0x1*0 to do contention select. */
+       for ( ; id_port < 0x200; id_port += 0x10) {
+               if (check_region(id_port, 1))
+                       continue;
+               outb(0x00, id_port);
+               outb(0xff, id_port);
+               if (inb(id_port) & 0x01)
+                       break;
+       }
+       if (id_port >= 0x200) {
+               /* Rare -- do we really need a warning? */
+               printk(" WARNING: No I/O port available for 3c509 activation.\n");
+               return -ENODEV;
+       }
+       /* Next check for all ISA bus boards by sending the ID sequence to the
+          ID_PORT.  We find cards past the first by setting the 'current_tag'
+          on cards as they are found.  Cards with their tag set will not
+          respond to subsequent ID sequences. */
+
+       outb(0x00, id_port);
+       outb(0x00, id_port);
+       for(i = 0; i < 255; i++) {
+               outb(lrs_state, id_port);
+               lrs_state <<= 1;
+               lrs_state = lrs_state & 0x100 ? lrs_state ^ 0xcf : lrs_state;
+       }
+
+       /* For the first probe, clear all board's tag registers. */
+       if (current_tag == 0)
+               outb(0xd0, id_port);
+       else                            /* Otherwise kill off already-found boards. */
+               outb(0xd8, id_port);
+
+       if (id_read_eeprom(7) != 0x6d50) {
+               return -ENODEV;
+       }
+
+       /* Read in EEPROM data, which does contention-select.
+          Only the lowest address board will stay "on-line".
+          3Com got the byte order backwards. */
+       for (i = 0; i < 3; i++) {
+               phys_addr[i] = htons(id_read_eeprom(i));
+       }
+
+#if defined(CONFIG_ISAPNP) || defined(CONFIG_ISAPNP_MODULE)
+       if (nopnp == 0) {
+               /* The ISA PnP 3c509 cards respond to the ID sequence.
+                  This check is needed in order not to register them twice. */
+               for (i = 0; i < pnp_cards; i++) {
+                       if (phys_addr[0] == el3_isapnp_phys_addr[i][0] &&
+                           phys_addr[1] == el3_isapnp_phys_addr[i][1] &&
+                           phys_addr[2] == el3_isapnp_phys_addr[i][2])
+                       {
+                               if (el3_debug > 3)
+                                       printk("3c509 with address %02x %02x %02x %02x %02x %02x was found by ISAPnP\n",
+                                               phys_addr[0] & 0xff, phys_addr[0] >> 8,
+                                               phys_addr[1] & 0xff, phys_addr[1] >> 8,
+                                               phys_addr[2] & 0xff, phys_addr[2] >> 8);
+                               /* Set the adaptor tag so that the next card can be found. */
+                               outb(0xd0 + ++current_tag, id_port);
+                               goto no_pnp;
+                       }
+               }
+       }
+#endif /* CONFIG_ISAPNP || CONFIG_ISAPNP_MODULE */
+
+       {
+               unsigned int iobase = id_read_eeprom(8);
+               if_port = iobase >> 14;
+               ioaddr = 0x200 + ((iobase & 0x1f) << 4);
+       }
+       irq = id_read_eeprom(9) >> 12;
+
+       if (dev) {                                      /* Set passed-in IRQ or I/O Addr. */
+               if (dev->irq > 1  &&  dev->irq < 16)
+                       irq = dev->irq;
+
+               if (dev->base_addr) {
+                       if (dev->mem_end == 0x3c509                     /* Magic key */
+                               && dev->base_addr >= 0x200  &&  dev->base_addr <= 0x3e0)
+                               ioaddr = dev->base_addr & 0x3f0;
+                       else if (dev->base_addr != ioaddr)
+                               return -ENODEV;
+               }
+       }
+
+       if (!request_region(ioaddr, EL3_IO_EXTENT, "3c509"))
+               return -EBUSY;
+
+       /* Set the adaptor tag so that the next card can be found. */
+       outb(0xd0 + ++current_tag, id_port);
+
+       /* Activate the adaptor at the EEPROM location. */
+       outb((ioaddr >> 4) | 0xe0, id_port);
+
+       EL3WINDOW(0);
+       if (inw(ioaddr) != 0x6d50) {
+               release_region(ioaddr, EL3_IO_EXTENT);
+               return -ENODEV;
+       }
+
+       /* Free the interrupt so that some other card can use it. */
+       outw(0x0f00, ioaddr + WN0_IRQ);
+ found:
+       if (dev == NULL) {
+               dev = init_etherdev(dev, sizeof(struct el3_private));
+               if (dev == NULL) {
+                       release_region(ioaddr, EL3_IO_EXTENT);
+                       return -ENOMEM;
+               }
+               SET_MODULE_OWNER(dev);
+       }
+       memcpy(dev->dev_addr, phys_addr, sizeof(phys_addr));
+       dev->base_addr = ioaddr;
+       dev->irq = irq;
+       dev->if_port = (dev->mem_start & 0x1f) ? dev->mem_start & 3 : if_port;
+
+       {
+               const char *if_names[] = {"10baseT", "AUI", "undefined", "BNC"};
+               printk("%s: 3c5x9 at %#3.3lx, %s port, address ",
+                          dev->name, dev->base_addr, if_names[dev->if_port]);
+       }
+
+       /* Read in the station address. */
+       for (i = 0; i < 6; i++)
+               printk(" %2.2x", dev->dev_addr[i]);
+       printk(", IRQ %d.\n", dev->irq);
+
+       /* Make up a EL3-specific-data structure. */
+       if (dev->priv == NULL)
+               dev->priv = kmalloc(sizeof(struct el3_private), GFP_KERNEL);
+       if (dev->priv == NULL)
+               return -ENOMEM;
+       memset(dev->priv, 0, sizeof(struct el3_private));
+       
+       lp = dev->priv;
+       lp->mca_slot = mca_slot;
+       lp->next_dev = el3_root_dev;
+       spin_lock_init(&lp->lock);
+       el3_root_dev = dev;
+
+       if (el3_debug > 0)
+               printk(KERN_INFO "%s" KERN_INFO "%s", versionA, versionB);
+
+       /* The EL3-specific entries in the device structure. */
+       dev->open = &el3_open;
+       dev->hard_start_xmit = &el3_start_xmit;
+       dev->stop = &el3_close;
+       dev->get_stats = &el3_get_stats;
+       dev->set_multicast_list = &set_multicast_list;
+       dev->tx_timeout = el3_tx_timeout;
+       dev->watchdog_timeo = TX_TIMEOUT;
+
+       /* Fill in the generic fields of the device structure. */
+       ether_setup(dev);
+       return 0;
+}
+
+/* Read a word from the EEPROM using the regular EEPROM access register.
+   Assume that we are in register window zero.
+ */
+static ushort __init read_eeprom(int ioaddr, int index)
+{
+       outw(EEPROM_READ + index, ioaddr + 10);
+       /* Pause for at least 162 us. for the read to take place. */
+       udelay (500);
+       return inw(ioaddr + 12);
+}
+
+/* Read a word from the EEPROM when in the ISA ID probe state. */
+static ushort __init id_read_eeprom(int index)
+{
+       int bit, word = 0;
+
+       /* Issue read command, and pause for at least 162 us. for it to complete.
+          Assume extra-fast 16Mhz bus. */
+       outb(EEPROM_READ + index, id_port);
+
+       /* Pause for at least 162 us. for the read to take place. */
+       udelay (500);
+       
+       for (bit = 15; bit >= 0; bit--)
+               word = (word << 1) + (inb(id_port) & 0x01);
+
+       if (el3_debug > 3)
+               printk("  3c509 EEPROM word %d %#4.4x.\n", index, word);
+
+       return word;
+}
+
+
+static int
+el3_open(struct net_device *dev)
+{
+       int ioaddr = dev->base_addr;
+       int i;
+
+       outw(TxReset, ioaddr + EL3_CMD);
+       outw(RxReset, ioaddr + EL3_CMD);
+       outw(SetStatusEnb | 0x00, ioaddr + EL3_CMD);
+
+       i = request_irq(dev->irq, &el3_interrupt, 0, dev->name, dev);
+       if (i) return i;
+
+       EL3WINDOW(0);
+       if (el3_debug > 3)
+               printk("%s: Opening, IRQ %d      status@%x %4.4x.\n", dev->name,
+                          dev->irq, ioaddr + EL3_STATUS, inw(ioaddr + EL3_STATUS));
+
+       /* Activate board: this is probably unnecessary. */
+       outw(0x0001, ioaddr + 4);
+
+       /* Set the IRQ line. */
+       outw((dev->irq << 12) | 0x0f00, ioaddr + WN0_IRQ);
+
+       /* Set the station address in window 2 each time opened. */
+       EL3WINDOW(2);
+
+       for (i = 0; i < 6; i++)
+               outb(dev->dev_addr[i], ioaddr + i);
+
+       if (dev->if_port == 3)
+               /* Start the thinnet transceiver. We should really wait 50ms...*/
+               outw(StartCoax, ioaddr + EL3_CMD);
+       else if (dev->if_port == 0) {
+               /* 10baseT interface, enabled link beat and jabber check. */
+               EL3WINDOW(4);
+               outw(inw(ioaddr + WN4_MEDIA) | MEDIA_TP, ioaddr + WN4_MEDIA);
+       }
+
+       /* Switch to the stats window, and clear all stats by reading. */
+       outw(StatsDisable, ioaddr + EL3_CMD);
+       EL3WINDOW(6);
+       for (i = 0; i < 9; i++)
+               inb(ioaddr + i);
+       inw(ioaddr + 10);
+       inw(ioaddr + 12);
+
+       /* Switch to register set 1 for normal use. */
+       EL3WINDOW(1);
+
+       /* Accept b-case and phys addr only. */
+       outw(SetRxFilter | RxStation | RxBroadcast, ioaddr + EL3_CMD);
+       outw(StatsEnable, ioaddr + EL3_CMD); /* Turn on statistics. */
+
+       netif_start_queue(dev);
+
+       outw(RxEnable, ioaddr + EL3_CMD); /* Enable the receiver. */
+       outw(TxEnable, ioaddr + EL3_CMD); /* Enable transmitter. */
+       /* Allow status bits to be seen. */
+       outw(SetStatusEnb | 0xff, ioaddr + EL3_CMD);
+       /* Ack all pending events, and set active indicator mask. */
+       outw(AckIntr | IntLatch | TxAvailable | RxEarly | IntReq,
+                ioaddr + EL3_CMD);
+       outw(SetIntrEnb | IntLatch|TxAvailable|TxComplete|RxComplete|StatsFull,
+                ioaddr + EL3_CMD);
+
+       if (el3_debug > 3)
+               printk("%s: Opened 3c509  IRQ %d  status %4.4x.\n",
+                          dev->name, dev->irq, inw(ioaddr + EL3_STATUS));
+
+       return 0;
+}
+
+static void
+el3_tx_timeout (struct net_device *dev)
+{
+       struct el3_private *lp = (struct el3_private *)dev->priv;
+       int ioaddr = dev->base_addr;
+
+       /* Transmitter timeout, serious problems. */
+       printk("%s: transmit timed out, Tx_status %2.2x status %4.4x "
+                  "Tx FIFO room %d.\n",
+                  dev->name, inb(ioaddr + TX_STATUS), inw(ioaddr + EL3_STATUS),
+                  inw(ioaddr + TX_FREE));
+       lp->stats.tx_errors++;
+       dev->trans_start = jiffies;
+       /* Issue TX_RESET and TX_START commands. */
+       outw(TxReset, ioaddr + EL3_CMD);
+       outw(TxEnable, ioaddr + EL3_CMD);
+       netif_wake_queue(dev);
+}
+
+
+static int
+el3_start_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+       struct el3_private *lp = (struct el3_private *)dev->priv;
+       int ioaddr = dev->base_addr;
+       unsigned long flags;
+
+       netif_stop_queue (dev);
+
+       lp->stats.tx_bytes += skb->len;
+       
+       if (el3_debug > 4) {
+               printk("%s: el3_start_xmit(length = %u) called, status %4.4x.\n",
+                          dev->name, skb->len, inw(ioaddr + EL3_STATUS));
+       }
+#if 0
+#ifndef final_version
+       {       /* Error-checking code, delete someday. */
+               ushort status = inw(ioaddr + EL3_STATUS);
+               if (status & 0x0001             /* IRQ line active, missed one. */
+                       && inw(ioaddr + EL3_STATUS) & 1) {                      /* Make sure. */
+                       printk("%s: Missed interrupt, status then %04x now %04x"
+                                  "  Tx %2.2x Rx %4.4x.\n", dev->name, status,
+                                  inw(ioaddr + EL3_STATUS), inb(ioaddr + TX_STATUS),
+                                  inw(ioaddr + RX_STATUS));
+                       /* Fake interrupt trigger by masking, acknowledge interrupts. */
+                       outw(SetStatusEnb | 0x00, ioaddr + EL3_CMD);
+                       outw(AckIntr | IntLatch | TxAvailable | RxEarly | IntReq,
+                                ioaddr + EL3_CMD);
+                       outw(SetStatusEnb | 0xff, ioaddr + EL3_CMD);
+               }
+       }
+#endif
+#endif
+       /*
+        *      We lock the driver against other processors. Note
+        *      we don't need to lock versus the IRQ as we suspended
+        *      that. This means that we lose the ability to take
+        *      an RX during a TX upload. That sucks a bit with SMP
+        *      on an original 3c509 (2K buffer)
+        *
+        *      Using disable_irq stops us crapping on other
+        *      time sensitive devices.
+        */
+
+       spin_lock_irqsave(&lp->lock, flags);
+           
+       /* Put out the doubleword header... */
+       outw(skb->len, ioaddr + TX_FIFO);
+       outw(0x00, ioaddr + TX_FIFO);
+       /* ... and the packet rounded to a doubleword. */
+#ifdef  __powerpc__
+       outsl_unswapped(ioaddr + TX_FIFO, skb->data, (skb->len + 3) >> 2);
+#else
+       outsl(ioaddr + TX_FIFO, skb->data, (skb->len + 3) >> 2);
+#endif
+
+       dev->trans_start = jiffies;
+       if (inw(ioaddr + TX_FREE) > 1536)
+               netif_start_queue(dev);
+       else
+               /* Interrupt us when the FIFO has room for max-sized packet. */
+               outw(SetTxThreshold + 1536, ioaddr + EL3_CMD);
+
+       spin_unlock_irqrestore(&lp->lock, flags);
+
+       dev_kfree_skb (skb);
+
+       /* Clear the Tx status stack. */
+       {
+               short tx_status;
+               int i = 4;
+
+               while (--i > 0  &&      (tx_status = inb(ioaddr + TX_STATUS)) > 0) {
+                       if (tx_status & 0x38) lp->stats.tx_aborted_errors++;
+                       if (tx_status & 0x30) outw(TxReset, ioaddr + EL3_CMD);
+                       if (tx_status & 0x3C) outw(TxEnable, ioaddr + EL3_CMD);
+                       outb(0x00, ioaddr + TX_STATUS); /* Pop the status stack. */
+               }
+       }
+       return 0;
+}
+
+/* The EL3 interrupt handler. */
+static void
+el3_interrupt(int irq, void *dev_id, struct pt_regs *regs)
+{
+       struct net_device *dev = (struct net_device *)dev_id;
+       struct el3_private *lp;
+       int ioaddr, status;
+       int i = max_interrupt_work;
+
+       if (dev == NULL) {
+               printk ("el3_interrupt(): irq %d for unknown device.\n", irq);
+               return;
+       }
+
+       lp = (struct el3_private *)dev->priv;
+       spin_lock(&lp->lock);
+
+       ioaddr = dev->base_addr;
+
+       if (el3_debug > 4) {
+               status = inw(ioaddr + EL3_STATUS);
+               printk("%s: interrupt, status %4.4x.\n", dev->name, status);
+       }
+
+       while ((status = inw(ioaddr + EL3_STATUS)) &
+                  (IntLatch | RxComplete | StatsFull)) {
+
+               if (status & RxComplete)
+                       el3_rx(dev);
+
+               if (status & TxAvailable) {
+                       if (el3_debug > 5)
+                               printk("        TX room bit was handled.\n");
+                       /* There's room in the FIFO for a full-sized packet. */
+                       outw(AckIntr | TxAvailable, ioaddr + EL3_CMD);
+                       netif_wake_queue (dev);
+               }
+               if (status & (AdapterFailure | RxEarly | StatsFull | TxComplete)) {
+                       /* Handle all uncommon interrupts. */
+                       if (status & StatsFull)                         /* Empty statistics. */
+                               update_stats(dev);
+                       if (status & RxEarly) {                         /* Rx early is unused. */
+                               el3_rx(dev);
+                               outw(AckIntr | RxEarly, ioaddr + EL3_CMD);
+                       }
+                       if (status & TxComplete) {                      /* Really Tx error. */
+                               struct el3_private *lp = (struct el3_private *)dev->priv;
+                               short tx_status;
+                               int i = 4;
+
+                               while (--i>0 && (tx_status = inb(ioaddr + TX_STATUS)) > 0) {
+                                       if (tx_status & 0x38) lp->stats.tx_aborted_errors++;
+                                       if (tx_status & 0x30) outw(TxReset, ioaddr + EL3_CMD);
+                                       if (tx_status & 0x3C) outw(TxEnable, ioaddr + EL3_CMD);
+                                       outb(0x00, ioaddr + TX_STATUS); /* Pop the status stack. */
+                               }
+                       }
+                       if (status & AdapterFailure) {
+                               /* Adapter failure requires Rx reset and reinit. */
+                               outw(RxReset, ioaddr + EL3_CMD);
+                               /* Set the Rx filter to the current state. */
+                               outw(SetRxFilter | RxStation | RxBroadcast
+                                        | (dev->flags & IFF_ALLMULTI ? RxMulticast : 0)
+                                        | (dev->flags & IFF_PROMISC ? RxProm : 0),
+                                        ioaddr + EL3_CMD);
+                               outw(RxEnable, ioaddr + EL3_CMD); /* Re-enable the receiver. */
+                               outw(AckIntr | AdapterFailure, ioaddr + EL3_CMD);
+                       }
+               }
+
+               if (--i < 0) {
+                       printk("%s: Infinite loop in interrupt, status %4.4x.\n",
+                                  dev->name, status);
+                       /* Clear all interrupts. */
+                       outw(AckIntr | 0xFF, ioaddr + EL3_CMD);
+                       break;
+               }
+               /* Acknowledge the IRQ. */
+               outw(AckIntr | IntReq | IntLatch, ioaddr + EL3_CMD); /* Ack IRQ */
+       }
+
+       if (el3_debug > 4) {
+               printk("%s: exiting interrupt, status %4.4x.\n", dev->name,
+                          inw(ioaddr + EL3_STATUS));
+       }
+       spin_unlock(&lp->lock);
+       return;
+}
+
+
+static struct net_device_stats *
+el3_get_stats(struct net_device *dev)
+{
+       struct el3_private *lp = (struct el3_private *)dev->priv;
+       unsigned long flags;
+
+       /*
+        *      This is fast enough not to bother with disable IRQ
+        *      stuff.
+        */
+        
+       spin_lock_irqsave(&lp->lock, flags);
+       update_stats(dev);
+       spin_unlock_irqrestore(&lp->lock, flags);
+       return &lp->stats;
+}
+
+/*  Update statistics.  We change to register window 6, so this should be run
+       single-threaded if the device is active. This is expected to be a rare
+       operation, and it's simpler for the rest of the driver to assume that
+       window 1 is always valid rather than use a special window-state variable.
+       */
+static void update_stats(struct net_device *dev)
+{
+       struct el3_private *lp = (struct el3_private *)dev->priv;
+       int ioaddr = dev->base_addr;
+
+       if (el3_debug > 5)
+               printk("   Updating the statistics.\n");
+       /* Turn off statistics updates while reading. */
+       outw(StatsDisable, ioaddr + EL3_CMD);
+       /* Switch to the stats window, and read everything. */
+       EL3WINDOW(6);
+       lp->stats.tx_carrier_errors     += inb(ioaddr + 0);
+       lp->stats.tx_heartbeat_errors   += inb(ioaddr + 1);
+       /* Multiple collisions. */         inb(ioaddr + 2);
+       lp->stats.collisions            += inb(ioaddr + 3);
+       lp->stats.tx_window_errors      += inb(ioaddr + 4);
+       lp->stats.rx_fifo_errors        += inb(ioaddr + 5);
+       lp->stats.tx_packets            += inb(ioaddr + 6);
+       /* Rx packets   */                 inb(ioaddr + 7);
+       /* Tx deferrals */                 inb(ioaddr + 8);
+       inw(ioaddr + 10);       /* Total Rx and Tx octets. */
+       inw(ioaddr + 12);
+
+       /* Back to window 1, and turn statistics back on. */
+       EL3WINDOW(1);
+       outw(StatsEnable, ioaddr + EL3_CMD);
+       return;
+}
+
+static int
+el3_rx(struct net_device *dev)
+{
+       struct el3_private *lp = (struct el3_private *)dev->priv;
+       int ioaddr = dev->base_addr;
+       short rx_status;
+
+       if (el3_debug > 5)
+               printk("   In rx_packet(), status %4.4x, rx_status %4.4x.\n",
+                          inw(ioaddr+EL3_STATUS), inw(ioaddr+RX_STATUS));
+       while ((rx_status = inw(ioaddr + RX_STATUS)) > 0) {
+               if (rx_status & 0x4000) { /* Error, update stats. */
+                       short error = rx_status & 0x3800;
+
+                       outw(RxDiscard, ioaddr + EL3_CMD);
+                       lp->stats.rx_errors++;
+                       switch (error) {
+                       case 0x0000:            lp->stats.rx_over_errors++; break;
+                       case 0x0800:            lp->stats.rx_length_errors++; break;
+                       case 0x1000:            lp->stats.rx_frame_errors++; break;
+                       case 0x1800:            lp->stats.rx_length_errors++; break;
+                       case 0x2000:            lp->stats.rx_frame_errors++; break;
+                       case 0x2800:            lp->stats.rx_crc_errors++; break;
+                       }
+               } else {
+                       short pkt_len = rx_status & 0x7ff;
+                       struct sk_buff *skb;
+
+                       skb = dev_alloc_skb(pkt_len+5);
+                       lp->stats.rx_bytes += pkt_len;
+                       if (el3_debug > 4)
+                               printk("Receiving packet size %d status %4.4x.\n",
+                                          pkt_len, rx_status);
+                       if (skb != NULL) {
+                               skb->dev = dev;
+                               skb_reserve(skb, 2);     /* Align IP on 16 byte */
+
+                               /* 'skb->data' points to the start of sk_buff data area. */
+#ifdef  __powerpc__
+                               insl_unswapped(ioaddr+RX_FIFO, skb_put(skb,pkt_len),
+                                                          (pkt_len + 3) >> 2);
+#else
+                               insl(ioaddr + RX_FIFO, skb_put(skb,pkt_len),
+                                        (pkt_len + 3) >> 2);
+#endif
+
+                               outw(RxDiscard, ioaddr + EL3_CMD); /* Pop top Rx packet. */
+                               skb->protocol = eth_type_trans(skb,dev);
+                               netif_rx(skb);
+                               dev->last_rx = jiffies;
+                               lp->stats.rx_packets++;
+                               continue;
+                       }
+                       outw(RxDiscard, ioaddr + EL3_CMD);
+                       lp->stats.rx_dropped++;
+                       if (el3_debug)
+                               printk("%s: Couldn't allocate a sk_buff of size %d.\n",
+                                          dev->name, pkt_len);
+               }
+               inw(ioaddr + EL3_STATUS);                               /* Delay. */
+               while (inw(ioaddr + EL3_STATUS) & 0x1000)
+                       printk(KERN_DEBUG "     Waiting for 3c509 to discard packet, status %x.\n",
+                                  inw(ioaddr + EL3_STATUS) );
+       }
+
+       return 0;
+}
+
+/*
+ *     Set or clear the multicast filter for this adaptor.
+ */
+static void
+set_multicast_list(struct net_device *dev)
+{
+       unsigned long flags;
+       struct el3_private *lp = (struct el3_private *)dev->priv;
+       int ioaddr = dev->base_addr;
+
+       if (el3_debug > 1) {
+               static int old;
+               if (old != dev->mc_count) {
+                       old = dev->mc_count;
+                       printk("%s: Setting Rx mode to %d addresses.\n", dev->name, dev->mc_count);
+               }
+       }
+       spin_lock_irqsave(&lp->lock, flags);
+       if (dev->flags&IFF_PROMISC) {
+               outw(SetRxFilter | RxStation | RxMulticast | RxBroadcast | RxProm,
+                        ioaddr + EL3_CMD);
+       }
+       else if (dev->mc_count || (dev->flags&IFF_ALLMULTI)) {
+               outw(SetRxFilter | RxStation | RxMulticast | RxBroadcast, ioaddr + EL3_CMD);
+       }
+       else
+                outw(SetRxFilter | RxStation | RxBroadcast, ioaddr + EL3_CMD);
+       spin_unlock_irqrestore(&lp->lock, flags);
+}
+
+static int
+el3_close(struct net_device *dev)
+{
+       int ioaddr = dev->base_addr;
+
+       if (el3_debug > 2)
+               printk("%s: Shutting down ethercard.\n", dev->name);
+
+       netif_stop_queue(dev);
+
+       /* Turn off statistics ASAP.  We update lp->stats below. */
+       outw(StatsDisable, ioaddr + EL3_CMD);
+
+       /* Disable the receiver and transmitter. */
+       outw(RxDisable, ioaddr + EL3_CMD);
+       outw(TxDisable, ioaddr + EL3_CMD);
+
+       if (dev->if_port == 3)
+               /* Turn off thinnet power.  Green! */
+               outw(StopCoax, ioaddr + EL3_CMD);
+       else if (dev->if_port == 0) {
+               /* Disable link beat and jabber, if_port may change ere next open(). */
+               EL3WINDOW(4);
+               outw(inw(ioaddr + WN4_MEDIA) & ~MEDIA_TP, ioaddr + WN4_MEDIA);
+       }
+
+       free_irq(dev->irq, dev);
+       /* Switching back to window 0 disables the IRQ. */
+       EL3WINDOW(0);
+       /* But we explicitly zero the IRQ line select anyway. */
+       outw(0x0f00, ioaddr + WN0_IRQ);
+
+       update_stats(dev);
+       return 0;
+}
+
+/*#ifdef MODULE*/
+/* Parameters that may be passed into the module. */
+static int debug = -1;
+static int irq[] = {-1, -1, -1, -1, -1, -1, -1, -1};
+static int xcvr[] = {-1, -1, -1, -1, -1, -1, -1, -1};
+
+MODULE_PARM(debug,"i");
+MODULE_PARM(irq,"1-8i");
+MODULE_PARM(xcvr,"1-8i");
+MODULE_PARM(max_interrupt_work, "i");
+MODULE_PARM_DESC(debug, "EtherLink III debug level (0-6)");
+MODULE_PARM_DESC(irq, "EtherLink III IRQ number(s) (assigned)");
+MODULE_PARM_DESC(xcvr,"EtherLink III tranceiver(s) (0=internal, 1=external)");
+MODULE_PARM_DESC(max_interrupt_work, "EtherLink III maximum events handled per interrupt");
+#ifdef CONFIG_ISAPNP
+MODULE_PARM(nopnp, "i");
+MODULE_PARM_DESC(nopnp, "EtherLink III disable ISA PnP support (0-1)");
+#endif /* CONFIG_ISAPNP */
+
+int
+init_module(void)
+{
+       int el3_cards = 0;
+
+       if (debug >= 0)
+               el3_debug = debug;
+
+       el3_root_dev = NULL;
+       while (el3_probe(0) == 0) {
+               if (irq[el3_cards] > 1)
+                       el3_root_dev->irq = irq[el3_cards];
+               if (xcvr[el3_cards] >= 0)
+                       el3_root_dev->if_port = xcvr[el3_cards];
+               el3_cards++;
+       }
+
+       return el3_cards ? 0 : -ENODEV;
+}
+
+void
+cleanup_module(void)
+{
+       struct net_device *next_dev;
+
+       /* No need to check MOD_IN_USE, as sys_delete_module() checks. */
+       while (el3_root_dev) {
+               struct el3_private *lp = (struct el3_private *)el3_root_dev->priv;
+#ifdef CONFIG_MCA              
+               if(lp->mca_slot!=-1)
+                       mca_mark_as_unused(lp->mca_slot);
+#endif                 
+               next_dev = lp->next_dev;
+               unregister_netdev(el3_root_dev);
+               release_region(el3_root_dev->base_addr, EL3_IO_EXTENT);
+               kfree(el3_root_dev);
+               el3_root_dev = next_dev;
+       }
+}
+/*#endif*/
+
+module_init(init_module);
+module_exit(cleanup_module);
diff --git a/xen-2.4.16/drivers/net/3c59x.c b/xen-2.4.16/drivers/net/3c59x.c
new file mode 100644 (file)
index 0000000..af5d1ad
--- /dev/null
@@ -0,0 +1,2987 @@
+/* EtherLinkXL.c: A 3Com EtherLink PCI III/XL ethernet driver for linux. */
+/*
+       Written 1996-1999 by Donald Becker.
+
+       This software may be used and distributed according to the terms
+       of the GNU General Public License, incorporated herein by reference.
+
+       This driver is for the 3Com "Vortex" and "Boomerang" series ethercards.
+       Members of the series include Fast EtherLink 3c590/3c592/3c595/3c597
+       and the EtherLink XL 3c900 and 3c905 cards.
+
+       Problem reports and questions should be directed to
+       vortex@scyld.com
+
+       The author may be reached as becker@scyld.com, or C/O
+       Scyld Computing Corporation
+       410 Severn Ave., Suite 210
+       Annapolis MD 21403
+
+       Linux Kernel Additions:
+       
+       0.99H+lk0.9 - David S. Miller - softnet, PCI DMA updates
+       0.99H+lk1.0 - Jeff Garzik <jgarzik@mandrakesoft.com>
+               Remove compatibility defines for kernel versions < 2.2.x.
+               Update for new 2.3.x module interface
+       LK1.1.2 (March 19, 2000)
+       * New PCI interface (jgarzik)
+
+    LK1.1.3 25 April 2000, Andrew Morton <andrewm@uow.edu.au>
+    - Merged with 3c575_cb.c
+    - Don't set RxComplete in boomerang interrupt enable reg
+    - spinlock in vortex_timer to protect mdio functions
+    - disable local interrupts around call to vortex_interrupt in
+      vortex_tx_timeout() (So vortex_interrupt can use spin_lock())
+    - Select window 3 in vortex_timer()'s write to Wn3_MAC_Ctrl
+    - In vortex_start_xmit(), move the lock to _after_ we've altered
+      vp->cur_tx and vp->tx_full.  This defeats the race between
+      vortex_start_xmit() and vortex_interrupt which was identified
+      by Bogdan Costescu.
+    - Merged back support for six new cards from various sources
+    - Set vortex_have_pci if pci_module_init returns zero (fixes cardbus
+      insertion oops)
+    - Tell it that 3c905C has NWAY for 100bT autoneg
+    - Fix handling of SetStatusEnd in 'Too much work..' code, as
+      per 2.3.99's 3c575_cb (Dave Hinds).
+    - Split ISR into two for vortex & boomerang
+    - Fix MOD_INC/DEC races
+    - Handle resource allocation failures.
+    - Fix 3CCFE575CT LED polarity
+    - Make tx_interrupt_mitigation the default
+
+    LK1.1.4 25 April 2000, Andrew Morton <andrewm@uow.edu.au>    
+    - Add extra TxReset to vortex_up() to fix 575_cb hotplug initialisation probs.
+    - Put vortex_info_tbl into __devinitdata
+    - In the vortex_error StatsFull HACK, disable stats in vp->intr_enable as well
+      as in the hardware.
+    - Increased the loop counter in issue_and_wait from 2,000 to 4,000.
+
+    LK1.1.5 28 April 2000, andrewm
+    - Added powerpc defines (John Daniel <jdaniel@etresoft.com> said these work...)
+    - Some extra diagnostics
+    - In vortex_error(), reset the Tx on maxCollisions.  Otherwise most
+      chips usually get a Tx timeout.
+    - Added extra_reset module parm
+    - Replaced some inline timer manip with mod_timer
+      (Franois romieu <Francois.Romieu@nic.fr>)
+    - In vortex_up(), don't make Wn3_config initialisation dependent upon has_nway
+      (this came across from 3c575_cb).
+
+    LK1.1.6 06 Jun 2000, andrewm
+    - Backed out the PPC defines.
+    - Use del_timer_sync(), mod_timer().
+    - Fix wrapped ulong comparison in boomerang_rx()
+    - Add IS_TORNADO, use it to suppress 3c905C checksum error msg
+      (Donald Becker, I Lee Hetherington <ilh@sls.lcs.mit.edu>)
+    - Replace union wn3_config with BFINS/BFEXT manipulation for
+      sparc64 (Pete Zaitcev, Peter Jones)
+    - In vortex_error, do_tx_reset and vortex_tx_timeout(Vortex):
+      do a netif_wake_queue() to better recover from errors. (Anders Pedersen,
+      Donald Becker)
+    - Print a warning on out-of-memory (rate limited to 1 per 10 secs)
+    - Added two more Cardbus 575 NICs: 5b57 and 6564 (Paul Wagland)
+
+    LK1.1.7 2 Jul 2000 andrewm
+    - Better handling of shared IRQs
+    - Reset the transmitter on a Tx reclaim error
+    - Fixed crash under OOM during vortex_open() (Mark Hemment)
+    - Fix Rx cessation problem during OOM (help from Mark Hemment)
+    - The spinlocks around the mdio access were blocking interrupts for 300uS.
+      Fix all this to use spin_lock_bh() within mdio_read/write
+    - Only write to TxFreeThreshold if it's a boomerang - other NICs don't
+      have one.
+    - Added 802.3x MAC-layer flow control support
+
+   LK1.1.8 13 Aug 2000 andrewm
+    - Ignore request_region() return value - already reserved if Cardbus.
+    - Merged some additional Cardbus flags from Don's 0.99Qk
+    - Some fixes for 3c556 (Fred Maciel)
+    - Fix for EISA initialisation (Jan Rekorajski)
+    - Renamed MII_XCVR_PWR and EEPROM_230 to align with 3c575_cb and D. Becker's drivers
+    - Fixed MII_XCVR_PWR for 3CCFE575CT
+    - Added INVERT_LED_PWR, used it.
+    - Backed out the extra_reset stuff
+
+   LK1.1.9 12 Sep 2000 andrewm
+    - Backed out the tx_reset_resume flags.  It was a no-op.
+    - In vortex_error, don't reset the Tx on txReclaim errors
+    - In vortex_error, don't reset the Tx on maxCollisions errors.
+      Hence backed out all the DownListPtr logic here.
+    - In vortex_error, give Tornado cards a partial TxReset on
+      maxCollisions (David Hinds).  Defined MAX_COLLISION_RESET for this.
+    - Redid some driver flags and device names based on pcmcia_cs-3.1.20.
+    - Fixed a bug where, if vp->tx_full is set when the interface
+      is downed, it remains set when the interface is upped.  Bad
+      things happen.
+
+   LK1.1.10 17 Sep 2000 andrewm
+    - Added EEPROM_8BIT for 3c555 (Fred Maciel)
+    - Added experimental support for the 3c556B Laptop Hurricane (Louis Gerbarg)
+    - Add HAS_NWAY to "3c900 Cyclone 10Mbps TPO"
+
+   LK1.1.11 13 Nov 2000 andrewm
+    - Dump MOD_INC/DEC_USE_COUNT, use SET_MODULE_OWNER
+
+   LK1.1.12 1 Jan 2001 andrewm (2.4.0-pre1)
+    - Call pci_enable_device before we request our IRQ (Tobias Ringstrom)
+    - Add 3c590 PCI latency timer hack to vortex_probe1 (from 0.99Ra)
+    - Added extended issue_and_wait for the 3c905CX.
+    - Look for an MII on PHY index 24 first (3c905CX oddity).
+    - Add HAS_NWAY to 3cSOHO100-TX (Brett Frankenberger)
+    - Don't free skbs we don't own on oom path in vortex_open().
+
+   LK1.1.13 27 Jan 2001
+    - Added explicit `medialock' flag so we can truly
+      lock the media type down with `options'.
+    - "check ioremap return and some tidbits" (Arnaldo Carvalho de Melo <acme@conectiva.com.br>)
+    - Added and used EEPROM_NORESET for 3c556B PM resumes.
+    - Fixed leakage of vp->rx_ring.
+    - Break out separate HAS_HWCKSM device capability flag.
+    - Kill vp->tx_full (ANK)
+    - Merge zerocopy fragment handling (ANK?)
+
+   LK1.1.14 15 Feb 2001
+    - Enable WOL.  Can be turned on with `enable_wol' module option.
+    - EISA and PCI initialisation fixes (jgarzik, Manfred Spraul)
+    - If a device's internalconfig register reports it has NWAY,
+      use it, even if autoselect is enabled.
+
+   LK1.1.15 6 June 2001 akpm
+    - Prevent double counting of received bytes (Lars Christensen)
+    - Add ethtool support (jgarzik)
+    - Add module parm descriptions (Andrzej M. Krzysztofowicz)
+    - Implemented alloc_etherdev() API
+    - Special-case the 'Tx error 82' message.
+
+   LK1.1.16 18 July 2001 akpm
+    - Make NETIF_F_SG dependent upon nr_free_highpages(), not on CONFIG_HIGHMEM
+    - Lessen verbosity of bootup messages
+    - Fix WOL - use new PM API functions.
+    - Use netif_running() instead of vp->open in suspend/resume.
+    - Don't reset the interface logic on open/close/rmmod.  It upsets
+      autonegotiation, and hence DHCP (from 0.99T).
+    - Back out EEPROM_NORESET flag because of the above (we do it for all
+      NICs).
+    - Correct 3c982 identification string
+    - Rename wait_for_completion() to issue_and_wait() to avoid completion.h
+      clash.
+
+    - See http://www.uow.edu.au/~andrewm/linux/#3c59x-2.3 for more details.
+    - Also see Documentation/networking/vortex.txt
+*/
+
+/*
+ * FIXME: This driver _could_ support MTU changing, but doesn't.  See Don's hamachi.c implementation
+ * as well as other drivers
+ *
+ * NOTE: If you make 'vortex_debug' a constant (#define vortex_debug 0) the driver shrinks by 2k
+ * due to dead code elimination.  There will be some performance benefits from this due to
+ * elimination of all the tests and reduced cache footprint.
+ */
+
+
+#define DRV_NAME       "3c59x"
+#define DRV_VERSION    "LK1.1.16"
+#define DRV_RELDATE    "19 July 2001"
+
+
+/* "Knobs" that adjust features and parameters. */
+/* Set the copy breakpoint for the copy-only-tiny-frames scheme.
+   Setting to > 1512 effectively disables this feature. */
+#ifndef __arm__
+static const int rx_copybreak = 200;
+#else
+/* ARM systems perform better by disregarding the bus-master
+   transfer capability of these cards. -- rmk */
+static const int rx_copybreak = 1513;
+#endif
+/* Allow setting MTU to a larger size, bypassing the normal ethernet setup. */
+static const int mtu = 1500;
+/* Maximum events (Rx packets, etc.) to handle at each interrupt. */
+static int max_interrupt_work = 32;
+/* Tx timeout interval (millisecs) */
+static int watchdog = 5000;
+
+/* Allow aggregation of Tx interrupts.  Saves CPU load at the cost
+ * of possible Tx stalls if the system is blocking interrupts
+ * somewhere else.  Undefine this to disable.
+ */
+#define tx_interrupt_mitigation 1
+
+/* Put out somewhat more debugging messages. (0: no msg, 1 minimal .. 6). */
+#define vortex_debug debug
+#ifdef VORTEX_DEBUG
+static int vortex_debug = VORTEX_DEBUG;
+#else
+static int vortex_debug = 1;
+#endif
+
+#ifndef __OPTIMIZE__
+#error You must compile this file with the correct options!
+#error See the last lines of the source file.
+#error You must compile this driver with "-O".
+#endif
+
+#include <linux/config.h>
+#include <linux/lib.h>
+#include <linux/module.h>
+//#include <linux/kernel.h>
+#include <linux/sched.h>
+//#include <linux/string.h>
+#include <linux/timer.h>
+#include <linux/errno.h>
+//#include <linux/in.h>
+#include <linux/ioport.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <linux/pci.h>
+#include <linux/mii.h>
+#include <linux/init.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/skbuff.h>
+#include <linux/ethtool.h>
+//#include <linux/highmem.h>
+#include <asm/irq.h>                   /* For NR_IRQS only. */
+#include <asm/bitops.h>
+#include <asm/io.h>
+#include <asm/uaccess.h>
+
+/* A few values that may be tweaked. */
+/* Keep the ring sizes a power of two for efficiency. */
+#undef  TX_RING_SIZE
+#undef  RX_RING_SIZE
+#define TX_RING_SIZE   16
+#define RX_RING_SIZE   32
+#define PKT_BUF_SZ             1536                    /* Size of each temporary Rx buffer.*/
+
+/* Kernel compatibility defines, some common to David Hinds' PCMCIA package.
+   This is only in the support-all-kernels source code. */
+
+#define RUN_AT(x) (jiffies + (x))
+
+#include <linux/delay.h>
+
+
+static char version[] __devinitdata =
+DRV_NAME ": Donald Becker and others. www.scyld.com/network/vortex.html\n";
+
+MODULE_AUTHOR("Donald Becker <becker@scyld.com>");
+MODULE_DESCRIPTION("3Com 3c59x/3c9xx ethernet driver "
+                                       DRV_VERSION " " DRV_RELDATE);
+MODULE_LICENSE("GPL");
+
+MODULE_PARM(debug, "i");
+MODULE_PARM(options, "1-" __MODULE_STRING(8) "i");
+MODULE_PARM(full_duplex, "1-" __MODULE_STRING(8) "i");
+MODULE_PARM(hw_checksums, "1-" __MODULE_STRING(8) "i");
+MODULE_PARM(flow_ctrl, "1-" __MODULE_STRING(8) "i");
+MODULE_PARM(enable_wol, "1-" __MODULE_STRING(8) "i");
+MODULE_PARM(rx_copybreak, "i");
+MODULE_PARM(max_interrupt_work, "i");
+MODULE_PARM(compaq_ioaddr, "i");
+MODULE_PARM(compaq_irq, "i");
+MODULE_PARM(compaq_device_id, "i");
+MODULE_PARM(watchdog, "i");
+MODULE_PARM_DESC(debug, "3c59x debug level (0-6)");
+MODULE_PARM_DESC(options, "3c59x: Bits 0-3: media type, bit 4: bus mastering, bit 9: full duplex");
+MODULE_PARM_DESC(full_duplex, "3c59x full duplex setting(s) (1)");
+MODULE_PARM_DESC(hw_checksums, "3c59x Hardware checksum checking by adapter(s) (0-1)");
+MODULE_PARM_DESC(flow_ctrl, "3c59x 802.3x flow control usage (PAUSE only) (0-1)");
+MODULE_PARM_DESC(enable_wol, "3c59x: Turn on Wake-on-LAN for adapter(s) (0-1)");
+MODULE_PARM_DESC(rx_copybreak, "3c59x copy breakpoint for copy-only-tiny-frames");
+MODULE_PARM_DESC(max_interrupt_work, "3c59x maximum events handled per interrupt");
+MODULE_PARM_DESC(compaq_ioaddr, "3c59x PCI I/O base address (Compaq BIOS problem workaround)");
+MODULE_PARM_DESC(compaq_irq, "3c59x PCI IRQ number (Compaq BIOS problem workaround)");
+MODULE_PARM_DESC(compaq_device_id, "3c59x PCI device ID (Compaq BIOS problem workaround)");
+MODULE_PARM_DESC(watchdog, "3c59x transmit timeout in milliseconds");
+
+/* Operational parameter that usually are not changed. */
+
+/* The Vortex size is twice that of the original EtherLinkIII series: the
+   runtime register window, window 1, is now always mapped in.
+   The Boomerang size is twice as large as the Vortex -- it has additional
+   bus master control registers. */
+#define VORTEX_TOTAL_SIZE 0x20
+#define BOOMERANG_TOTAL_SIZE 0x40
+
+/* Set iff a MII transceiver on any interface requires mdio preamble.
+   This only set with the original DP83840 on older 3c905 boards, so the extra
+   code size of a per-interface flag is not worthwhile. */
+static char mii_preamble_required;
+
+#define PFX DRV_NAME ": "
+
+
+
+/*
+                               Theory of Operation
+
+I. Board Compatibility
+
+This device driver is designed for the 3Com FastEtherLink and FastEtherLink
+XL, 3Com's PCI to 10/100baseT adapters.  It also works with the 10Mbs
+versions of the FastEtherLink cards.  The supported product IDs are
+  3c590, 3c592, 3c595, 3c597, 3c900, 3c905
+
+The related ISA 3c515 is supported with a separate driver, 3c515.c, included
+with the kernel source or available from
+    cesdis.gsfc.nasa.gov:/pub/linux/drivers/3c515.html
+
+II. Board-specific settings
+
+PCI bus devices are configured by the system at boot time, so no jumpers
+need to be set on the board.  The system BIOS should be set to assign the
+PCI INTA signal to an otherwise unused system IRQ line.
+
+The EEPROM settings for media type and forced-full-duplex are observed.
+The EEPROM media type should be left at the default "autoselect" unless using
+10base2 or AUI connections which cannot be reliably detected.
+
+III. Driver operation
+
+The 3c59x series use an interface that's very similar to the previous 3c5x9
+series.  The primary interface is two programmed-I/O FIFOs, with an
+alternate single-contiguous-region bus-master transfer (see next).
+
+The 3c900 "Boomerang" series uses a full-bus-master interface with separate
+lists of transmit and receive descriptors, similar to the AMD LANCE/PCnet,
+DEC Tulip and Intel Speedo3.  The first chip version retains a compatible
+programmed-I/O interface that has been removed in 'B' and subsequent board
+revisions.
+
+One extension that is advertised in a very large font is that the adapters
+are capable of being bus masters.  On the Vortex chip this capability was
+only for a single contiguous region making it far less useful than the full
+bus master capability.  There is a significant performance impact of taking
+an extra interrupt or polling for the completion of each transfer, as well
+as difficulty sharing the single transfer engine between the transmit and
+receive threads.  Using DMA transfers is a win only with large blocks or
+with the flawed versions of the Intel Orion motherboard PCI controller.
+
+The Boomerang chip's full-bus-master interface is useful, and has the
+currently-unused advantages over other similar chips that queued transmit
+packets may be reordered and receive buffer groups are associated with a
+single frame.
+
+With full-bus-master support, this driver uses a "RX_COPYBREAK" scheme.
+Rather than a fixed intermediate receive buffer, this scheme allocates
+full-sized skbuffs as receive buffers.  The value RX_COPYBREAK is used as
+the copying breakpoint: it is chosen to trade-off the memory wasted by
+passing the full-sized skbuff to the queue layer for all frames vs. the
+copying cost of copying a frame to a correctly-sized skbuff.
+
+IIIC. Synchronization
+The driver runs as two independent, single-threaded flows of control.  One
+is the send-packet routine, which enforces single-threaded use by the
+dev->tbusy flag.  The other thread is the interrupt handler, which is single
+threaded by the hardware and other software.
+
+IV. Notes
+
+Thanks to Cameron Spitzer and Terry Murphy of 3Com for providing development
+3c590, 3c595, and 3c900 boards.
+The name "Vortex" is the internal 3Com project name for the PCI ASIC, and
+the EISA version is called "Demon".  According to Terry these names come
+from rides at the local amusement park.
+
+The new chips support both ethernet (1.5K) and FDDI (4.5K) packet sizes!
+This driver only supports ethernet packets because of the skbuff allocation
+limit of 4K.
+*/
+
+/* This table drives the PCI probe routines.  It's mostly boilerplate in all
+   of the drivers, and will likely be provided by some future kernel.
+*/
+enum pci_flags_bit {
+       PCI_USES_IO=1, PCI_USES_MEM=2, PCI_USES_MASTER=4,
+       PCI_ADDR0=0x10<<0, PCI_ADDR1=0x10<<1, PCI_ADDR2=0x10<<2, PCI_ADDR3=0x10<<3,
+};
+
+enum { IS_VORTEX=1, IS_BOOMERANG=2, IS_CYCLONE=4, IS_TORNADO=8,
+       EEPROM_8BIT=0x10,       /* AKPM: Uses 0x230 as the base bitmaps for EEPROM reads */
+       HAS_PWR_CTRL=0x20, HAS_MII=0x40, HAS_NWAY=0x80, HAS_CB_FNS=0x100,
+       INVERT_MII_PWR=0x200, INVERT_LED_PWR=0x400, MAX_COLLISION_RESET=0x800,
+       EEPROM_OFFSET=0x1000, HAS_HWCKSM=0x2000 };
+
+enum vortex_chips {
+       CH_3C590 = 0,
+       CH_3C592,
+       CH_3C597,
+       CH_3C595_1,
+       CH_3C595_2,
+
+       CH_3C595_3,
+       CH_3C900_1,
+       CH_3C900_2,
+       CH_3C900_3,
+       CH_3C900_4,
+
+       CH_3C900_5,
+       CH_3C900B_FL,
+       CH_3C905_1,
+       CH_3C905_2,
+       CH_3C905B_1,
+
+       CH_3C905B_2,
+       CH_3C905B_FX,
+       CH_3C905C,
+       CH_3C980,
+       CH_3C9805,
+
+       CH_3CSOHO100_TX,
+       CH_3C555,
+       CH_3C556,
+       CH_3C556B,
+       CH_3C575,
+
+       CH_3C575_1,
+       CH_3CCFE575,
+       CH_3CCFE575CT,
+       CH_3CCFE656,
+       CH_3CCFEM656,
+
+       CH_3CCFEM656_1,
+       CH_3C450,
+};
+
+
+/* note: this array directly indexed by above enums, and MUST
+ * be kept in sync with both the enums above, and the PCI device
+ * table below
+ */
+static struct vortex_chip_info {
+       const char *name;
+       int flags;
+       int drv_flags;
+       int io_size;
+} vortex_info_tbl[] __devinitdata = {
+#define EISA_TBL_OFFSET        0               /* Offset of this entry for vortex_eisa_init */
+       {"3c590 Vortex 10Mbps",
+        PCI_USES_IO|PCI_USES_MASTER, IS_VORTEX, 32, },
+       {"3c592 EISA 10Mbps Demon/Vortex",                                      /* AKPM: from Don's 3c59x_cb.c 0.49H */
+        PCI_USES_IO|PCI_USES_MASTER, IS_VORTEX, 32, },
+       {"3c597 EISA Fast Demon/Vortex",                                        /* AKPM: from Don's 3c59x_cb.c 0.49H */
+        PCI_USES_IO|PCI_USES_MASTER, IS_VORTEX, 32, },
+       {"3c595 Vortex 100baseTx",
+        PCI_USES_IO|PCI_USES_MASTER, IS_VORTEX, 32, },
+       {"3c595 Vortex 100baseT4",
+        PCI_USES_IO|PCI_USES_MASTER, IS_VORTEX, 32, },
+
+       {"3c595 Vortex 100base-MII",
+        PCI_USES_IO|PCI_USES_MASTER, IS_VORTEX, 32, },
+       {"3c900 Boomerang 10baseT",
+        PCI_USES_IO|PCI_USES_MASTER, IS_BOOMERANG, 64, },
+       {"3c900 Boomerang 10Mbps Combo",
+        PCI_USES_IO|PCI_USES_MASTER, IS_BOOMERANG, 64, },
+       {"3c900 Cyclone 10Mbps TPO",                                            /* AKPM: from Don's 0.99M */
+        PCI_USES_IO|PCI_USES_MASTER, IS_CYCLONE|HAS_NWAY|HAS_HWCKSM, 128, },
+       {"3c900 Cyclone 10Mbps Combo",
+        PCI_USES_IO|PCI_USES_MASTER, IS_CYCLONE|HAS_HWCKSM, 128, },
+
+       {"3c900 Cyclone 10Mbps TPC",                                            /* AKPM: from Don's 0.99M */
+        PCI_USES_IO|PCI_USES_MASTER, IS_CYCLONE|HAS_HWCKSM, 128, },
+       {"3c900B-FL Cyclone 10base-FL",
+        PCI_USES_IO|PCI_USES_MASTER, IS_CYCLONE|HAS_HWCKSM, 128, },
+       {"3c905 Boomerang 100baseTx",
+        PCI_USES_IO|PCI_USES_MASTER, IS_BOOMERANG|HAS_MII, 64, },
+       {"3c905 Boomerang 100baseT4",
+        PCI_USES_IO|PCI_USES_MASTER, IS_BOOMERANG|HAS_MII, 64, },
+       {"3c905B Cyclone 100baseTx",
+        PCI_USES_IO|PCI_USES_MASTER, IS_CYCLONE|HAS_NWAY|HAS_HWCKSM, 128, },
+
+       {"3c905B Cyclone 10/100/BNC",
+        PCI_USES_IO|PCI_USES_MASTER, IS_CYCLONE|HAS_NWAY|HAS_HWCKSM, 128, },
+       {"3c905B-FX Cyclone 100baseFx",
+        PCI_USES_IO|PCI_USES_MASTER, IS_CYCLONE|HAS_HWCKSM, 128, },
+       {"3c905C Tornado",
+        PCI_USES_IO|PCI_USES_MASTER, IS_TORNADO|HAS_NWAY|HAS_HWCKSM, 128, },
+       {"3c980 Cyclone",
+        PCI_USES_IO|PCI_USES_MASTER, IS_CYCLONE|HAS_HWCKSM, 128, },
+       {"3c982 Dual Port Server Cyclone",
+        PCI_USES_IO|PCI_USES_MASTER, IS_CYCLONE|HAS_HWCKSM, 128, },
+
+       {"3cSOHO100-TX Hurricane",
+        PCI_USES_IO|PCI_USES_MASTER, IS_CYCLONE|HAS_NWAY|HAS_HWCKSM, 128, },
+       {"3c555 Laptop Hurricane",
+        PCI_USES_IO|PCI_USES_MASTER, IS_CYCLONE|EEPROM_8BIT|HAS_HWCKSM, 128, },
+       {"3c556 Laptop Tornado",
+        PCI_USES_IO|PCI_USES_MASTER, IS_TORNADO|HAS_NWAY|EEPROM_8BIT|HAS_CB_FNS|INVERT_MII_PWR|
+                                                                       HAS_HWCKSM, 128, },
+       {"3c556B Laptop Hurricane",
+        PCI_USES_IO|PCI_USES_MASTER, IS_TORNADO|HAS_NWAY|EEPROM_OFFSET|HAS_CB_FNS|INVERT_MII_PWR|
+                                                                       HAS_HWCKSM, 128, },
+       {"3c575 [Megahertz] 10/100 LAN  CardBus",
+       PCI_USES_IO|PCI_USES_MASTER, IS_BOOMERANG|HAS_MII|EEPROM_8BIT, 128, },
+
+       {"3c575 Boomerang CardBus",
+        PCI_USES_IO|PCI_USES_MASTER, IS_BOOMERANG|HAS_MII|EEPROM_8BIT, 128, },
+       {"3CCFE575BT Cyclone CardBus",
+        PCI_USES_IO|PCI_USES_MASTER, IS_CYCLONE|HAS_NWAY|HAS_CB_FNS|EEPROM_8BIT|
+                                                                       INVERT_LED_PWR|HAS_HWCKSM, 128, },
+       {"3CCFE575CT Tornado CardBus",
+        PCI_USES_IO|PCI_USES_MASTER, IS_TORNADO|HAS_NWAY|HAS_CB_FNS|EEPROM_8BIT|INVERT_MII_PWR|
+                                                                       MAX_COLLISION_RESET|HAS_HWCKSM, 128, },
+       {"3CCFE656 Cyclone CardBus",
+        PCI_USES_IO|PCI_USES_MASTER, IS_CYCLONE|HAS_NWAY|HAS_CB_FNS|EEPROM_8BIT|INVERT_MII_PWR|
+                                                                       INVERT_LED_PWR|HAS_HWCKSM, 128, },
+       {"3CCFEM656B Cyclone+Winmodem CardBus",
+        PCI_USES_IO|PCI_USES_MASTER, IS_CYCLONE|HAS_NWAY|HAS_CB_FNS|EEPROM_8BIT|INVERT_MII_PWR|
+                                                                       INVERT_LED_PWR|HAS_HWCKSM, 128, },
+
+       {"3CXFEM656C Tornado+Winmodem CardBus",                 /* From pcmcia-cs-3.1.5 */
+        PCI_USES_IO|PCI_USES_MASTER, IS_TORNADO|HAS_NWAY|HAS_CB_FNS|EEPROM_8BIT|INVERT_MII_PWR|
+                                                                       MAX_COLLISION_RESET|HAS_HWCKSM, 128, },
+       {"3c450 HomePNA Tornado",                                               /* AKPM: from Don's 0.99Q */
+        PCI_USES_IO|PCI_USES_MASTER, IS_TORNADO|HAS_NWAY|HAS_HWCKSM, 128, },
+       {0,}, /* 0 terminated list. */
+};
+
+
+static struct pci_device_id vortex_pci_tbl[] __devinitdata = {
+       { 0x10B7, 0x5900, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C590 },
+       { 0x10B7, 0x5920, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C592 },
+       { 0x10B7, 0x5970, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C597 },
+       { 0x10B7, 0x5950, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C595_1 },
+       { 0x10B7, 0x5951, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C595_2 },
+
+       { 0x10B7, 0x5952, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C595_3 },
+       { 0x10B7, 0x9000, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C900_1 },
+       { 0x10B7, 0x9001, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C900_2 },
+       { 0x10B7, 0x9004, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C900_3 },
+       { 0x10B7, 0x9005, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C900_4 },
+
+       { 0x10B7, 0x9006, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C900_5 },
+       { 0x10B7, 0x900A, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C900B_FL },
+       { 0x10B7, 0x9050, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C905_1 },
+       { 0x10B7, 0x9051, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C905_2 },
+       { 0x10B7, 0x9055, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C905B_1 },
+
+       { 0x10B7, 0x9058, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C905B_2 },
+       { 0x10B7, 0x905A, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C905B_FX },
+       { 0x10B7, 0x9200, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C905C },
+       { 0x10B7, 0x9800, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C980 },
+       { 0x10B7, 0x9805, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C9805 },
+
+       { 0x10B7, 0x7646, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3CSOHO100_TX },
+       { 0x10B7, 0x5055, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C555 },
+       { 0x10B7, 0x6055, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C556 },
+       { 0x10B7, 0x6056, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C556B },
+       { 0x10B7, 0x5b57, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C575 },
+
+       { 0x10B7, 0x5057, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C575_1 },
+       { 0x10B7, 0x5157, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3CCFE575 },
+       { 0x10B7, 0x5257, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3CCFE575CT },
+       { 0x10B7, 0x6560, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3CCFE656 },
+       { 0x10B7, 0x6562, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3CCFEM656 },
+
+       { 0x10B7, 0x6564, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3CCFEM656_1 },
+       { 0x10B7, 0x4500, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C450 },
+       {0,}                                            /* 0 terminated list. */
+};
+MODULE_DEVICE_TABLE(pci, vortex_pci_tbl);
+
+
+/* Operational definitions.
+   These are not used by other compilation units and thus are not
+   exported in a ".h" file.
+
+   First the windows.  There are eight register windows, with the command
+   and status registers available in each.
+   */
+#define EL3WINDOW(win_num) outw(SelectWindow + (win_num), ioaddr + EL3_CMD)
+#define EL3_CMD 0x0e
+#define EL3_STATUS 0x0e
+
+/* The top five bits written to EL3_CMD are a command, the lower
+   11 bits are the parameter, if applicable.
+   Note that 11 parameters bits was fine for ethernet, but the new chip
+   can handle FDDI length frames (~4500 octets) and now parameters count
+   32-bit 'Dwords' rather than octets. */
+
+enum vortex_cmd {
+       TotalReset = 0<<11, SelectWindow = 1<<11, StartCoax = 2<<11,
+       RxDisable = 3<<11, RxEnable = 4<<11, RxReset = 5<<11,
+       UpStall = 6<<11, UpUnstall = (6<<11)+1,
+       DownStall = (6<<11)+2, DownUnstall = (6<<11)+3,
+       RxDiscard = 8<<11, TxEnable = 9<<11, TxDisable = 10<<11, TxReset = 11<<11,
+       FakeIntr = 12<<11, AckIntr = 13<<11, SetIntrEnb = 14<<11,
+       SetStatusEnb = 15<<11, SetRxFilter = 16<<11, SetRxThreshold = 17<<11,
+       SetTxThreshold = 18<<11, SetTxStart = 19<<11,
+       StartDMAUp = 20<<11, StartDMADown = (20<<11)+1, StatsEnable = 21<<11,
+       StatsDisable = 22<<11, StopCoax = 23<<11, SetFilterBit = 25<<11,};
+
+/* The SetRxFilter command accepts the following classes: */
+enum RxFilter {
+       RxStation = 1, RxMulticast = 2, RxBroadcast = 4, RxProm = 8 };
+
+/* Bits in the general status register. */
+enum vortex_status {
+       IntLatch = 0x0001, HostError = 0x0002, TxComplete = 0x0004,
+       TxAvailable = 0x0008, RxComplete = 0x0010, RxEarly = 0x0020,
+       IntReq = 0x0040, StatsFull = 0x0080,
+       DMADone = 1<<8, DownComplete = 1<<9, UpComplete = 1<<10,
+       DMAInProgress = 1<<11,                  /* DMA controller is still busy.*/
+       CmdInProgress = 1<<12,                  /* EL3_CMD is still busy.*/
+};
+
+/* Register window 1 offsets, the window used in normal operation.
+   On the Vortex this window is always mapped at offsets 0x10-0x1f. */
+enum Window1 {
+       TX_FIFO = 0x10,  RX_FIFO = 0x10,  RxErrors = 0x14,
+       RxStatus = 0x18,  Timer=0x1A, TxStatus = 0x1B,
+       TxFree = 0x1C, /* Remaining free bytes in Tx buffer. */
+};
+enum Window0 {
+       Wn0EepromCmd = 10,              /* Window 0: EEPROM command register. */
+       Wn0EepromData = 12,             /* Window 0: EEPROM results register. */
+       IntrStatus=0x0E,                /* Valid in all windows. */
+};
+enum Win0_EEPROM_bits {
+       EEPROM_Read = 0x80, EEPROM_WRITE = 0x40, EEPROM_ERASE = 0xC0,
+       EEPROM_EWENB = 0x30,            /* Enable erasing/writing for 10 msec. */
+       EEPROM_EWDIS = 0x00,            /* Disable EWENB before 10 msec timeout. */
+};
+/* EEPROM locations. */
+enum eeprom_offset {
+       PhysAddr01=0, PhysAddr23=1, PhysAddr45=2, ModelID=3,
+       EtherLink3ID=7, IFXcvrIO=8, IRQLine=9,
+       NodeAddr01=10, NodeAddr23=11, NodeAddr45=12,
+       DriverTune=13, Checksum=15};
+
+enum Window2 {                 /* Window 2. */
+       Wn2_ResetOptions=12,
+};
+enum Window3 {                 /* Window 3: MAC/config bits. */
+       Wn3_Config=0, Wn3_MAC_Ctrl=6, Wn3_Options=8,
+};
+
+#define BFEXT(value, offset, bitcount)  \
+    ((((unsigned long)(value)) >> (offset)) & ((1 << (bitcount)) - 1))
+
+#define BFINS(lhs, rhs, offset, bitcount)                                      \
+       (((lhs) & ~((((1 << (bitcount)) - 1)) << (offset))) |   \
+       (((rhs) & ((1 << (bitcount)) - 1)) << (offset)))
+
+#define RAM_SIZE(v)            BFEXT(v, 0, 3)
+#define RAM_WIDTH(v)   BFEXT(v, 3, 1)
+#define RAM_SPEED(v)   BFEXT(v, 4, 2)
+#define ROM_SIZE(v)            BFEXT(v, 6, 2)
+#define RAM_SPLIT(v)   BFEXT(v, 16, 2)
+#define XCVR(v)                        BFEXT(v, 20, 4)
+#define AUTOSELECT(v)  BFEXT(v, 24, 1)
+
+enum Window4 {         /* Window 4: Xcvr/media bits. */
+       Wn4_FIFODiag = 4, Wn4_NetDiag = 6, Wn4_PhysicalMgmt=8, Wn4_Media = 10,
+};
+enum Win4_Media_bits {
+       Media_SQE = 0x0008,             /* Enable SQE error counting for AUI. */
+       Media_10TP = 0x00C0,    /* Enable link beat and jabber for 10baseT. */
+       Media_Lnk = 0x0080,             /* Enable just link beat for 100TX/100FX. */
+       Media_LnkBeat = 0x0800,
+};
+enum Window7 {                                 /* Window 7: Bus Master control. */
+       Wn7_MasterAddr = 0, Wn7_MasterLen = 6, Wn7_MasterStatus = 12,
+};
+/* Boomerang bus master control registers. */
+enum MasterCtrl {
+       PktStatus = 0x20, DownListPtr = 0x24, FragAddr = 0x28, FragLen = 0x2c,
+       TxFreeThreshold = 0x2f, UpPktStatus = 0x30, UpListPtr = 0x38,
+};
+
+/* The Rx and Tx descriptor lists.
+   Caution Alpha hackers: these types are 32 bits!  Note also the 8 byte
+   alignment contraint on tx_ring[] and rx_ring[]. */
+#define LAST_FRAG      0x80000000                      /* Last Addr/Len pair in descriptor. */
+#define DN_COMPLETE    0x00010000                      /* This packet has been downloaded */
+struct boom_rx_desc {
+       u32 next;                                       /* Last entry points to 0.   */
+       s32 status;
+       u32 addr;                                       /* Up to 63 addr/len pairs possible. */
+       s32 length;                                     /* Set LAST_FRAG to indicate last pair. */
+};
+/* Values for the Rx status entry. */
+enum rx_desc_status {
+       RxDComplete=0x00008000, RxDError=0x4000,
+       /* See boomerang_rx() for actual error bits */
+       IPChksumErr=1<<25, TCPChksumErr=1<<26, UDPChksumErr=1<<27,
+       IPChksumValid=1<<29, TCPChksumValid=1<<30, UDPChksumValid=1<<31,
+};
+
+#ifdef MAX_SKB_FRAGS
+#define DO_ZEROCOPY 1
+#else
+#define DO_ZEROCOPY 0
+#endif
+
+struct boom_tx_desc {
+       u32 next;                                       /* Last entry points to 0.   */
+       s32 status;                                     /* bits 0:12 length, others see below.  */
+#if DO_ZEROCOPY
+       struct {
+               u32 addr;
+               s32 length;
+       } frag[1+MAX_SKB_FRAGS];
+#else
+               u32 addr;
+               s32 length;
+#endif
+};
+
+/* Values for the Tx status entry. */
+enum tx_desc_status {
+       CRCDisable=0x2000, TxDComplete=0x8000,
+       AddIPChksum=0x02000000, AddTCPChksum=0x04000000, AddUDPChksum=0x08000000,
+       TxIntrUploaded=0x80000000,              /* IRQ when in FIFO, but maybe not sent. */
+};
+
+/* Chip features we care about in vp->capabilities, read from the EEPROM. */
+enum ChipCaps { CapBusMaster=0x20, CapPwrMgmt=0x2000 };
+
+struct vortex_private {
+       /* The Rx and Tx rings should be quad-word-aligned. */
+       struct boom_rx_desc* rx_ring;
+       struct boom_tx_desc* tx_ring;
+       dma_addr_t rx_ring_dma;
+       dma_addr_t tx_ring_dma;
+       /* The addresses of transmit- and receive-in-place skbuffs. */
+       struct sk_buff* rx_skbuff[RX_RING_SIZE];
+       struct sk_buff* tx_skbuff[TX_RING_SIZE];
+       struct net_device *next_module;         /* NULL if PCI device */
+       unsigned int cur_rx, cur_tx;            /* The next free ring entry */
+       unsigned int dirty_rx, dirty_tx;        /* The ring entries to be free()ed. */
+       struct net_device_stats stats;
+       struct sk_buff *tx_skb;                         /* Packet being eaten by bus master ctrl.  */
+       dma_addr_t tx_skb_dma;                          /* Allocated DMA address for bus master ctrl DMA.   */
+
+       /* PCI configuration space information. */
+       struct pci_dev *pdev;
+       char *cb_fn_base;                                       /* CardBus function status addr space. */
+
+       /* Some values here only for performance evaluation and path-coverage */
+       int rx_nocopy, rx_copy, queued_packet, rx_csumhits;
+       int card_idx;
+
+       /* The remainder are related to chip state, mostly media selection. */
+       struct timer_list timer;                        /* Media selection timer. */
+       struct timer_list rx_oom_timer;         /* Rx skb allocation retry timer */
+       int options;                                            /* User-settable misc. driver options. */
+       unsigned int media_override:4,          /* Passed-in media type. */
+               default_media:4,                                /* Read from the EEPROM/Wn3_Config. */
+               full_duplex:1, force_fd:1, autoselect:1,
+               bus_master:1,                                   /* Vortex can only do a fragment bus-m. */
+               full_bus_master_tx:1, full_bus_master_rx:2, /* Boomerang  */
+               flow_ctrl:1,                                    /* Use 802.3x flow control (PAUSE only) */
+               partner_flow_ctrl:1,                    /* Partner supports flow control */
+               has_nway:1,
+               enable_wol:1,                                   /* Wake-on-LAN is enabled */
+               pm_state_valid:1,                               /* power_state[] has sane contents */
+               open:1,
+               medialock:1,
+               must_free_region:1;                             /* Flag: if zero, Cardbus owns the I/O region */
+       int drv_flags;
+       u16 status_enable;
+       u16 intr_enable;
+       u16 available_media;                            /* From Wn3_Options. */
+       u16 capabilities, info1, info2;         /* Various, from EEPROM. */
+       u16 advertising;                                        /* NWay media advertisement */
+       unsigned char phys[2];                          /* MII device addresses. */
+       u16 deferred;                                           /* Resend these interrupts when we
+                                                                                * bale from the ISR */
+       u16 io_size;                                            /* Size of PCI region (for release_region) */
+       spinlock_t lock;                                        /* Serialise access to device & its vortex_private */
+       spinlock_t mdio_lock;                           /* Serialise access to mdio hardware */
+       u32 power_state[16];
+};
+
+/* The action to take with a media selection timer tick.
+   Note that we deviate from the 3Com order by checking 10base2 before AUI.
+ */
+enum xcvr_types {
+       XCVR_10baseT=0, XCVR_AUI, XCVR_10baseTOnly, XCVR_10base2, XCVR_100baseTx,
+       XCVR_100baseFx, XCVR_MII=6, XCVR_NWAY=8, XCVR_ExtMII=9, XCVR_Default=10,
+};
+
+static struct media_table {
+       char *name;
+       unsigned int media_bits:16,             /* Bits to set in Wn4_Media register. */
+               mask:8,                                         /* The transceiver-present bit in Wn3_Config.*/
+               next:8;                                         /* The media type to try next. */
+       int wait;                                               /* Time before we check media status. */
+} media_tbl[] = {
+  {    "10baseT",   Media_10TP,0x08, XCVR_10base2, (14*HZ)/10},
+  { "10Mbs AUI", Media_SQE, 0x20, XCVR_Default, (1*HZ)/10},
+  { "undefined", 0,                    0x80, XCVR_10baseT, 10000},
+  { "10base2",   0,                    0x10, XCVR_AUI,         (1*HZ)/10},
+  { "100baseTX", Media_Lnk, 0x02, XCVR_100baseFx, (14*HZ)/10},
+  { "100baseFX", Media_Lnk, 0x04, XCVR_MII,            (14*HZ)/10},
+  { "MII",              0,                     0x41, XCVR_10baseT, 3*HZ },
+  { "undefined", 0,                    0x01, XCVR_10baseT, 10000},
+  { "Autonegotiate", 0,                0x41, XCVR_10baseT, 3*HZ},
+  { "MII-External",     0,             0x41, XCVR_10baseT, 3*HZ },
+  { "Default",  0,                     0xFF, XCVR_10baseT, 10000},
+};
+
+static int vortex_probe1(struct pci_dev *pdev, long ioaddr, int irq,
+                                  int chip_idx, int card_idx);
+static void vortex_up(struct net_device *dev);
+static void vortex_down(struct net_device *dev);
+static int vortex_open(struct net_device *dev);
+static void mdio_sync(long ioaddr, int bits);
+static int mdio_read(struct net_device *dev, int phy_id, int location);
+static void mdio_write(struct net_device *vp, int phy_id, int location, int value);
+static void vortex_timer(unsigned long arg);
+static void rx_oom_timer(unsigned long arg);
+static int vortex_start_xmit(struct sk_buff *skb, struct net_device *dev);
+static int boomerang_start_xmit(struct sk_buff *skb, struct net_device *dev);
+static int vortex_rx(struct net_device *dev);
+static int boomerang_rx(struct net_device *dev);
+static void vortex_interrupt(int irq, void *dev_id, struct pt_regs *regs);
+static void boomerang_interrupt(int irq, void *dev_id, struct pt_regs *regs);
+static int vortex_close(struct net_device *dev);
+static void dump_tx_ring(struct net_device *dev);
+static void update_stats(long ioaddr, struct net_device *dev);
+static struct net_device_stats *vortex_get_stats(struct net_device *dev);
+static void set_rx_mode(struct net_device *dev);
+static int vortex_ioctl(struct net_device *dev, struct ifreq *rq, int cmd);
+static void vortex_tx_timeout(struct net_device *dev);
+static void acpi_set_WOL(struct net_device *dev);
+\f
+/* This driver uses 'options' to pass the media type, full-duplex flag, etc. */
+/* Option count limit only -- unlimited interfaces are supported. */
+#define MAX_UNITS 8
+static int options[MAX_UNITS] = { -1, -1, -1, -1, -1, -1, -1, -1,};
+static int full_duplex[MAX_UNITS] = {-1, -1, -1, -1, -1, -1, -1, -1};
+static int hw_checksums[MAX_UNITS] = {-1, -1, -1, -1, -1, -1, -1, -1};
+static int flow_ctrl[MAX_UNITS] = {-1, -1, -1, -1, -1, -1, -1, -1};
+static int enable_wol[MAX_UNITS] = {-1, -1, -1, -1, -1, -1, -1, -1};
+
+/* #define dev_alloc_skb dev_alloc_skb_debug */
+
+/* A list of all installed Vortex EISA devices, for removing the driver module. */
+static struct net_device *root_vortex_eisa_dev;
+
+/* Variables to work-around the Compaq PCI BIOS32 problem. */
+static int compaq_ioaddr, compaq_irq, compaq_device_id = 0x5900;
+
+static int vortex_cards_found;
+
+#ifdef CONFIG_PM
+
+static int vortex_suspend (struct pci_dev *pdev, u32 state)
+{
+       struct net_device *dev = pci_get_drvdata(pdev);
+
+       if (dev && dev->priv) {
+               if (netif_running(dev)) {
+                       netif_device_detach(dev);
+                       vortex_down(dev);
+               }
+       }
+       return 0;
+}
+
+static int vortex_resume (struct pci_dev *pdev)
+{
+       struct net_device *dev = pci_get_drvdata(pdev);
+
+       if (dev && dev->priv) {
+               if (netif_running(dev)) {
+                       vortex_up(dev);
+                       netif_device_attach(dev);
+               }
+       }
+       return 0;
+}
+
+#endif /* CONFIG_PM */
+
+/* returns count found (>= 0), or negative on error */
+static int __init vortex_eisa_init (void)
+{
+       long ioaddr;
+       int rc;
+       int orig_cards_found = vortex_cards_found;
+
+       /* Now check all slots of the EISA bus. */
+       if (!EISA_bus)
+               return 0;
+
+       for (ioaddr = 0x1000; ioaddr < 0x9000; ioaddr += 0x1000) {
+               int device_id;
+
+               if (request_region(ioaddr, VORTEX_TOTAL_SIZE, DRV_NAME) == NULL)
+                       continue;
+
+               /* Check the standard EISA ID register for an encoded '3Com'. */
+               if (inw(ioaddr + 0xC80) != 0x6d50) {
+                       release_region (ioaddr, VORTEX_TOTAL_SIZE);
+                       continue;
+               }
+
+               /* Check for a product that we support, 3c59{2,7} any rev. */
+               device_id = (inb(ioaddr + 0xC82)<<8) + inb(ioaddr + 0xC83);
+               if ((device_id & 0xFF00) != 0x5900) {
+                       release_region (ioaddr, VORTEX_TOTAL_SIZE);
+                       continue;
+               }
+
+               rc = vortex_probe1(NULL, ioaddr, inw(ioaddr + 0xC88) >> 12,
+                                  EISA_TBL_OFFSET, vortex_cards_found);
+               if (rc == 0)
+                       vortex_cards_found++;
+               else
+                       release_region (ioaddr, VORTEX_TOTAL_SIZE);
+       }
+
+       /* Special code to work-around the Compaq PCI BIOS32 problem. */
+       if (compaq_ioaddr) {
+               vortex_probe1(NULL, compaq_ioaddr, compaq_irq,
+                                         compaq_device_id, vortex_cards_found++);
+       }
+
+       return vortex_cards_found - orig_cards_found;
+}
+
+/* returns count (>= 0), or negative on error */
+static int __devinit vortex_init_one (struct pci_dev *pdev,
+                                     const struct pci_device_id *ent)
+{
+       int rc;
+
+       /* wake up and enable device */         
+       if (pci_enable_device (pdev)) {
+               rc = -EIO;
+       } else {
+               rc = vortex_probe1 (pdev, pci_resource_start (pdev, 0), pdev->irq,
+                                   ent->driver_data, vortex_cards_found);
+               if (rc == 0)
+                       vortex_cards_found++;
+       }
+       return rc;
+}
+
+/*
+ * Start up the PCI device which is described by *pdev.
+ * Return 0 on success.
+ *
+ * NOTE: pdev can be NULL, for the case of an EISA driver
+ */
+static int __devinit vortex_probe1(struct pci_dev *pdev,
+                                  long ioaddr, int irq,
+                                  int chip_idx, int card_idx)
+{
+       struct vortex_private *vp;
+       int option;
+       unsigned int eeprom[0x40], checksum = 0;                /* EEPROM contents */
+       int i, step;
+       struct net_device *dev;
+       static int printed_version;
+       int retval, print_info;
+       struct vortex_chip_info * const vci = &vortex_info_tbl[chip_idx];
+       char *print_name;
+
+       if (!printed_version) {
+               printk (version);
+               printed_version = 1;
+       }
+
+       print_name = pdev ? pdev->slot_name : "3c59x";
+
+       dev = alloc_etherdev(sizeof(*vp));
+       retval = -ENOMEM;
+       if (!dev) {
+               printk (KERN_ERR PFX "unable to allocate etherdev, aborting\n");
+               goto out;
+       }
+       SET_MODULE_OWNER(dev);
+       vp = dev->priv;
+
+       /* The lower four bits are the media type. */
+       if (dev->mem_start) {
+               /*
+                * The 'options' param is passed in as the third arg to the
+                * LILO 'ether=' argument for non-modular use
+                */
+               option = dev->mem_start;
+       }
+       else if (card_idx < MAX_UNITS)
+               option = options[card_idx];
+       else
+               option = -1;
+
+       if (option > 0) {
+               if (option & 0x8000)
+                       vortex_debug = 7;
+               if (option & 0x4000)
+                       vortex_debug = 2;
+               if (option & 0x0400)
+                       vp->enable_wol = 1;
+       }
+
+       print_info = (vortex_debug > 1);
+       if (print_info)
+               printk (KERN_INFO "See Documentation/networking/vortex.txt\n");
+
+       printk(KERN_INFO "%s: 3Com %s %s at 0x%lx. Vers " DRV_VERSION "\n",
+              print_name,
+              pdev ? "PCI" : "EISA",
+              vci->name,
+              ioaddr);
+
+       dev->base_addr = ioaddr;
+       dev->irq = irq;
+       dev->mtu = mtu;
+       vp->drv_flags = vci->drv_flags;
+       vp->has_nway = (vci->drv_flags & HAS_NWAY) ? 1 : 0;
+       vp->io_size = vci->io_size;
+       vp->card_idx = card_idx;
+
+       /* module list only for EISA devices */
+       if (pdev == NULL) {
+               vp->next_module = root_vortex_eisa_dev;
+               root_vortex_eisa_dev = dev;
+       }
+
+       /* PCI-only startup logic */
+       if (pdev) {
+               /* EISA resources already marked, so only PCI needs to do this here */
+               /* Ignore return value, because Cardbus drivers already allocate for us */
+               if (request_region(ioaddr, vci->io_size, print_name) != NULL)
+                       vp->must_free_region = 1;
+
+               /* enable bus-mastering if necessary */         
+               if (vci->flags & PCI_USES_MASTER)
+                       pci_set_master (pdev);
+
+               if (vci->drv_flags & IS_VORTEX) {
+                       u8 pci_latency;
+                       u8 new_latency = 248;
+
+                       /* Check the PCI latency value.  On the 3c590 series the latency timer
+                          must be set to the maximum value to avoid data corruption that occurs
+                          when the timer expires during a transfer.  This bug exists the Vortex
+                          chip only. */
+                       pci_read_config_byte(pdev, PCI_LATENCY_TIMER, &pci_latency);
+                       if (pci_latency < new_latency) {
+                               printk(KERN_INFO "%s: Overriding PCI latency"
+                                       " timer (CFLT) setting of %d, new value is %d.\n",
+                                       print_name, pci_latency, new_latency);
+                                       pci_write_config_byte(pdev, PCI_LATENCY_TIMER, new_latency);
+                       }
+               }
+       }
+
+       spin_lock_init(&vp->lock);
+       spin_lock_init(&vp->mdio_lock);
+       vp->pdev = pdev;
+
+       /* Makes sure rings are at least 16 byte aligned. */
+       vp->rx_ring = pci_alloc_consistent(pdev, sizeof(struct boom_rx_desc) * RX_RING_SIZE
+                                          + sizeof(struct boom_tx_desc) * TX_RING_SIZE,
+                                          &vp->rx_ring_dma);
+       retval = -ENOMEM;
+       if (vp->rx_ring == 0)
+               goto free_region;
+
+       vp->tx_ring = (struct boom_tx_desc *)(vp->rx_ring + RX_RING_SIZE);
+       vp->tx_ring_dma = vp->rx_ring_dma + sizeof(struct boom_rx_desc) * RX_RING_SIZE;
+
+       /* if we are a PCI driver, we store info in pdev->driver_data
+        * instead of a module list */  
+       if (pdev)
+               pci_set_drvdata(pdev, dev);
+
+       vp->media_override = 7;
+       if (option >= 0) {
+               vp->media_override = ((option & 7) == 2)  ?  0  :  option & 15;
+               if (vp->media_override != 7)
+                       vp->medialock = 1;
+               vp->full_duplex = (option & 0x200) ? 1 : 0;
+               vp->bus_master = (option & 16) ? 1 : 0;
+       }
+
+       if (card_idx < MAX_UNITS) {
+               if (full_duplex[card_idx] > 0)
+                       vp->full_duplex = 1;
+               if (flow_ctrl[card_idx] > 0)
+                       vp->flow_ctrl = 1;
+               if (enable_wol[card_idx] > 0)
+                       vp->enable_wol = 1;
+       }
+
+       vp->force_fd = vp->full_duplex;
+       vp->options = option;
+       /* Read the station address from the EEPROM. */
+       EL3WINDOW(0);
+       {
+               int base;
+
+               if (vci->drv_flags & EEPROM_8BIT)
+                       base = 0x230;
+               else if (vci->drv_flags & EEPROM_OFFSET)
+                       base = EEPROM_Read + 0x30;
+               else
+                       base = EEPROM_Read;
+
+               for (i = 0; i < 0x40; i++) {
+                       int timer;
+                       outw(base + i, ioaddr + Wn0EepromCmd);
+                       /* Pause for at least 162 us. for the read to take place. */
+                       for (timer = 10; timer >= 0; timer--) {
+                               udelay(162);
+                               if ((inw(ioaddr + Wn0EepromCmd) & 0x8000) == 0)
+                                       break;
+                       }
+                       eeprom[i] = inw(ioaddr + Wn0EepromData);
+               }
+       }
+       for (i = 0; i < 0x18; i++)
+               checksum ^= eeprom[i];
+       checksum = (checksum ^ (checksum >> 8)) & 0xff;
+       if (checksum != 0x00) {         /* Grrr, needless incompatible change 3Com. */
+               while (i < 0x21)
+                       checksum ^= eeprom[i++];
+               checksum = (checksum ^ (checksum >> 8)) & 0xff;
+       }
+       if ((checksum != 0x00) && !(vci->drv_flags & IS_TORNADO))
+               printk(" ***INVALID CHECKSUM %4.4x*** ", checksum);
+       for (i = 0; i < 3; i++)
+               ((u16 *)dev->dev_addr)[i] = htons(eeprom[i + 10]);
+       if (print_info) {
+               for (i = 0; i < 6; i++)
+                       printk("%c%2.2x", i ? ':' : ' ', dev->dev_addr[i]);
+       }
+       EL3WINDOW(2);
+       for (i = 0; i < 6; i++)
+               outb(dev->dev_addr[i], ioaddr + i);
+
+#ifdef __sparc__
+       if (print_info)
+               printk(", IRQ %s\n", __irq_itoa(dev->irq));
+#else
+       if (print_info)
+               printk(", IRQ %d\n", dev->irq);
+       /* Tell them about an invalid IRQ. */
+       if (dev->irq <= 0 || dev->irq >= NR_IRQS)
+               printk(KERN_WARNING " *** Warning: IRQ %d is unlikely to work! ***\n",
+                          dev->irq);
+#endif
+
+       EL3WINDOW(4);
+       step = (inb(ioaddr + Wn4_NetDiag) & 0x1e) >> 1;
+       if (print_info) {
+               printk(KERN_INFO "  product code %02x%02x rev %02x.%d date %02d-"
+                       "%02d-%02d\n", eeprom[6]&0xff, eeprom[6]>>8, eeprom[0x14],
+                       step, (eeprom[4]>>5) & 15, eeprom[4] & 31, eeprom[4]>>9);
+       }
+
+
+       if (pdev && vci->drv_flags & HAS_CB_FNS) {
+               unsigned long fn_st_addr;                       /* Cardbus function status space */
+               unsigned short n;
+
+               fn_st_addr = pci_resource_start (pdev, 2);
+               if (fn_st_addr) {
+                       vp->cb_fn_base = ioremap(fn_st_addr, 128);
+                       retval = -ENOMEM;
+                       if (!vp->cb_fn_base)
+                               goto free_ring;
+               }
+               if (print_info) {
+                       printk(KERN_INFO "%s: CardBus functions mapped %8.8lx->%p\n",
+                               print_name, fn_st_addr, vp->cb_fn_base);
+               }
+               EL3WINDOW(2);
+
+               n = inw(ioaddr + Wn2_ResetOptions) & ~0x4010;
+               if (vp->drv_flags & INVERT_LED_PWR)
+                       n |= 0x10;
+               if (vp->drv_flags & INVERT_MII_PWR)
+                       n |= 0x4000;
+               outw(n, ioaddr + Wn2_ResetOptions);
+       }
+
+       /* Extract our information from the EEPROM data. */
+       vp->info1 = eeprom[13];
+       vp->info2 = eeprom[15];
+       vp->capabilities = eeprom[16];
+
+       if (vp->info1 & 0x8000) {
+               vp->full_duplex = 1;
+               if (print_info)
+                       printk(KERN_INFO "Full duplex capable\n");
+       }
+
+       {
+               static const char * ram_split[] = {"5:3", "3:1", "1:1", "3:5"};
+               unsigned int config;
+               EL3WINDOW(3);
+               vp->available_media = inw(ioaddr + Wn3_Options);
+               if ((vp->available_media & 0xff) == 0)          /* Broken 3c916 */
+                       vp->available_media = 0x40;
+               config = inl(ioaddr + Wn3_Config);
+               if (print_info) {
+                       printk(KERN_DEBUG "  Internal config register is %4.4x, "
+                                  "transceivers %#x.\n", config, inw(ioaddr + Wn3_Options));
+                       printk(KERN_INFO "  %dK %s-wide RAM %s Rx:Tx split, %s%s interface.\n",
+                                  8 << RAM_SIZE(config),
+                                  RAM_WIDTH(config) ? "word" : "byte",
+                                  ram_split[RAM_SPLIT(config)],
+                                  AUTOSELECT(config) ? "autoselect/" : "",
+                                  XCVR(config) > XCVR_ExtMII ? "<invalid transceiver>" :
+                                  media_tbl[XCVR(config)].name);
+               }
+               vp->default_media = XCVR(config);
+               if (vp->default_media == XCVR_NWAY)
+                       vp->has_nway = 1;
+               vp->autoselect = AUTOSELECT(config);
+       }
+
+       if (vp->media_override != 7) {
+               printk(KERN_INFO "%s:  Media override to transceiver type %d (%s).\n",
+                               print_name, vp->media_override,
+                               media_tbl[vp->media_override].name);
+               dev->if_port = vp->media_override;
+       } else
+               dev->if_port = vp->default_media;
+
+       if (dev->if_port == XCVR_MII || dev->if_port == XCVR_NWAY) {
+               int phy, phy_idx = 0;
+               EL3WINDOW(4);
+               mii_preamble_required++;
+               mii_preamble_required++;
+               mdio_read(dev, 24, 1);
+               for (phy = 0; phy < 32 && phy_idx < 1; phy++) {
+                       int mii_status, phyx;
+
+                       /*
+                        * For the 3c905CX we look at index 24 first, because it bogusly
+                        * reports an external PHY at all indices
+                        */
+                       if (phy == 0)
+                               phyx = 24;
+                       else if (phy <= 24)
+                               phyx = phy - 1;
+                       else
+                               phyx = phy;
+                       mii_status = mdio_read(dev, phyx, 1);
+                       if (mii_status  &&  mii_status != 0xffff) {
+                               vp->phys[phy_idx++] = phyx;
+                               if (print_info) {
+                                       printk(KERN_INFO "  MII transceiver found at address %d,"
+                                               " status %4x.\n", phyx, mii_status);
+                               }
+                               if ((mii_status & 0x0040) == 0)
+                                       mii_preamble_required++;
+                       }
+               }
+               mii_preamble_required--;
+               if (phy_idx == 0) {
+                       printk(KERN_WARNING"  ***WARNING*** No MII transceivers found!\n");
+                       vp->phys[0] = 24;
+               } else {
+                       vp->advertising = mdio_read(dev, vp->phys[0], 4);
+                       if (vp->full_duplex) {
+                               /* Only advertise the FD media types. */
+                               vp->advertising &= ~0x02A0;
+                               mdio_write(dev, vp->phys[0], 4, vp->advertising);
+                       }
+               }
+       }
+
+       if (vp->capabilities & CapBusMaster) {
+               vp->full_bus_master_tx = 1;
+               if (print_info) {
+                       printk(KERN_INFO "  Enabling bus-master transmits and %s receives.\n",
+                       (vp->info2 & 1) ? "early" : "whole-frame" );
+               }
+               vp->full_bus_master_rx = (vp->info2 & 1) ? 1 : 2;
+               vp->bus_master = 0;             /* AKPM: vortex only */
+       }
+
+       /* The 3c59x-specific entries in the device structure. */
+       dev->open = vortex_open;
+       if (vp->full_bus_master_tx) {
+               dev->hard_start_xmit = boomerang_start_xmit;
+               /* Actually, it still should work with iommu. */
+               dev->features |= NETIF_F_SG;
+               if (((hw_checksums[card_idx] == -1) && (vp->drv_flags & HAS_HWCKSM)) ||
+                                       (hw_checksums[card_idx] == 1)) {
+                               dev->features |= NETIF_F_IP_CSUM;
+               }
+       } else {
+               dev->hard_start_xmit = vortex_start_xmit;
+       }
+
+       if (print_info) {
+               printk(KERN_INFO "%s: scatter/gather %sabled. h/w checksums %sabled\n",
+                               print_name,
+                               (dev->features & NETIF_F_SG) ? "en":"dis",
+                               (dev->features & NETIF_F_IP_CSUM) ? "en":"dis");
+       }
+
+       dev->stop = vortex_close;
+       dev->get_stats = vortex_get_stats;
+       dev->do_ioctl = vortex_ioctl;
+       dev->set_multicast_list = set_rx_mode;
+       dev->tx_timeout = vortex_tx_timeout;
+       dev->watchdog_timeo = (watchdog * HZ) / 1000;
+       if (pdev && vp->enable_wol) {
+               vp->pm_state_valid = 1;
+               pci_save_state(vp->pdev, vp->power_state);
+               acpi_set_WOL(dev);
+       }
+       retval = register_netdev(dev);
+       if (retval == 0)
+               return 0;
+
+free_ring:
+       pci_free_consistent(pdev,
+                                               sizeof(struct boom_rx_desc) * RX_RING_SIZE
+                                                       + sizeof(struct boom_tx_desc) * TX_RING_SIZE,
+                                               vp->rx_ring,
+                                               vp->rx_ring_dma);
+free_region:
+       if (vp->must_free_region)
+               release_region(ioaddr, vci->io_size);
+       kfree (dev);
+       printk(KERN_ERR PFX "vortex_probe1 fails.  Returns %d\n", retval);
+out:
+       return retval;
+}
+
+static void
+issue_and_wait(struct net_device *dev, int cmd)
+{
+       int i;
+
+       outw(cmd, dev->base_addr + EL3_CMD);
+       for (i = 0; i < 2000; i++) {
+               if (!(inw(dev->base_addr + EL3_STATUS) & CmdInProgress))
+                       return;
+       }
+
+       /* OK, that didn't work.  Do it the slow way.  One second */
+       for (i = 0; i < 100000; i++) {
+               if (!(inw(dev->base_addr + EL3_STATUS) & CmdInProgress)) {
+                       if (vortex_debug > 1)
+                               printk(KERN_INFO "%s: command 0x%04x took %d usecs\n",
+                                          dev->name, cmd, i * 10);
+                       return;
+               }
+               udelay(10);
+       }
+       printk(KERN_ERR "%s: command 0x%04x did not complete! Status=0x%x\n",
+                          dev->name, cmd, inw(dev->base_addr + EL3_STATUS));
+}
+
+static void
+vortex_up(struct net_device *dev)
+{
+       long ioaddr = dev->base_addr;
+       struct vortex_private *vp = (struct vortex_private *)dev->priv;
+       unsigned int config;
+       int i;
+
+       if (vp->pdev && vp->enable_wol) {
+               pci_set_power_state(vp->pdev, 0);       /* Go active */
+               pci_restore_state(vp->pdev, vp->power_state);
+       }
+
+       /* Before initializing select the active media port. */
+       EL3WINDOW(3);
+       config = inl(ioaddr + Wn3_Config);
+
+       if (vp->media_override != 7) {
+               printk(KERN_INFO "%s: Media override to transceiver %d (%s).\n",
+                          dev->name, vp->media_override,
+                          media_tbl[vp->media_override].name);
+               dev->if_port = vp->media_override;
+       } else if (vp->autoselect) {
+               if (vp->has_nway) {
+                       if (vortex_debug > 1)
+                               printk(KERN_INFO "%s: using NWAY device table, not %d\n",
+                                                               dev->name, dev->if_port);
+                       dev->if_port = XCVR_NWAY;
+               } else {
+                       /* Find first available media type, starting with 100baseTx. */
+                       dev->if_port = XCVR_100baseTx;
+                       while (! (vp->available_media & media_tbl[dev->if_port].mask))
+                               dev->if_port = media_tbl[dev->if_port].next;
+                       if (vortex_debug > 1)
+                               printk(KERN_INFO "%s: first available media type: %s\n",
+                                       dev->name, media_tbl[dev->if_port].name);
+               }
+       } else {
+               dev->if_port = vp->default_media;
+               if (vortex_debug > 1)
+                       printk(KERN_INFO "%s: using default media %s\n",
+                               dev->name, media_tbl[dev->if_port].name);
+       }
+
+       init_timer(&vp->timer);
+       vp->timer.expires = RUN_AT(media_tbl[dev->if_port].wait);
+       vp->timer.data = (unsigned long)dev;
+       vp->timer.function = vortex_timer;              /* timer handler */
+       add_timer(&vp->timer);
+
+       init_timer(&vp->rx_oom_timer);
+       vp->rx_oom_timer.data = (unsigned long)dev;
+       vp->rx_oom_timer.function = rx_oom_timer;
+
+       if (vortex_debug > 1)
+               printk(KERN_DEBUG "%s: Initial media type %s.\n",
+                          dev->name, media_tbl[dev->if_port].name);
+
+       vp->full_duplex = vp->force_fd;
+       config = BFINS(config, dev->if_port, 20, 4);
+       if (vortex_debug > 6)
+               printk(KERN_DEBUG "vortex_up(): writing 0x%x to InternalConfig\n", config);
+       outl(config, ioaddr + Wn3_Config);
+
+       if (dev->if_port == XCVR_MII || dev->if_port == XCVR_NWAY) {
+               int mii_reg1, mii_reg5;
+               EL3WINDOW(4);
+               /* Read BMSR (reg1) only to clear old status. */
+               mii_reg1 = mdio_read(dev, vp->phys[0], 1);
+               mii_reg5 = mdio_read(dev, vp->phys[0], 5);
+               if (mii_reg5 == 0xffff  ||  mii_reg5 == 0x0000)
+                       ;                                       /* No MII device or no link partner report */
+               else if ((mii_reg5 & 0x0100) != 0       /* 100baseTx-FD */
+                                || (mii_reg5 & 0x00C0) == 0x0040) /* 10T-FD, but not 100-HD */
+                       vp->full_duplex = 1;
+               vp->partner_flow_ctrl = ((mii_reg5 & 0x0400) != 0);
+               if (vortex_debug > 1)
+                       printk(KERN_INFO "%s: MII #%d status %4.4x, link partner capability %4.4x,"
+                                  " info1 %04x, setting %s-duplex.\n",
+                                       dev->name, vp->phys[0],
+                                       mii_reg1, mii_reg5,
+                                       vp->info1, ((vp->info1 & 0x8000) || vp->full_duplex) ? "full" : "half");
+               EL3WINDOW(3);
+       }
+
+       /* Set the full-duplex bit. */
+       outw(   ((vp->info1 & 0x8000) || vp->full_duplex ? 0x20 : 0) |
+                       (dev->mtu > 1500 ? 0x40 : 0) |
+                       ((vp->full_duplex && vp->flow_ctrl && vp->partner_flow_ctrl) ? 0x100 : 0),
+                       ioaddr + Wn3_MAC_Ctrl);
+
+       if (vortex_debug > 1) {
+               printk(KERN_DEBUG "%s: vortex_up() InternalConfig %8.8x.\n",
+                       dev->name, config);
+       }
+
+       issue_and_wait(dev, TxReset);
+       /*
+        * Don't reset the PHY - that upsets autonegotiation during DHCP operations.
+        */
+       issue_and_wait(dev, RxReset|0x04);
+
+       outw(SetStatusEnb | 0x00, ioaddr + EL3_CMD);
+
+       if (vortex_debug > 1) {
+               EL3WINDOW(4);
+               printk(KERN_DEBUG "%s: vortex_up() irq %d media status %4.4x.\n",
+                          dev->name, dev->irq, inw(ioaddr + Wn4_Media));
+       }
+
+       /* Set the station address and mask in window 2 each time opened. */
+       EL3WINDOW(2);
+       for (i = 0; i < 6; i++)
+               outb(dev->dev_addr[i], ioaddr + i);
+       for (; i < 12; i+=2)
+               outw(0, ioaddr + i);
+
+       if (vp->cb_fn_base) {
+               unsigned short n = inw(ioaddr + Wn2_ResetOptions) & ~0x4010;
+               if (vp->drv_flags & INVERT_LED_PWR)
+                       n |= 0x10;
+               if (vp->drv_flags & INVERT_MII_PWR)
+                       n |= 0x4000;
+               outw(n, ioaddr + Wn2_ResetOptions);
+       }
+
+       if (dev->if_port == XCVR_10base2)
+               /* Start the thinnet transceiver. We should really wait 50ms...*/
+               outw(StartCoax, ioaddr + EL3_CMD);
+       if (dev->if_port != XCVR_NWAY) {
+               EL3WINDOW(4);
+               outw((inw(ioaddr + Wn4_Media) & ~(Media_10TP|Media_SQE)) |
+                        media_tbl[dev->if_port].media_bits, ioaddr + Wn4_Media);
+       }
+
+       /* Switch to the stats window, and clear all stats by reading. */
+       outw(StatsDisable, ioaddr + EL3_CMD);
+       EL3WINDOW(6);
+       for (i = 0; i < 10; i++)
+               inb(ioaddr + i);
+       inw(ioaddr + 10);
+       inw(ioaddr + 12);
+       /* New: On the Vortex we must also clear the BadSSD counter. */
+       EL3WINDOW(4);
+       inb(ioaddr + 12);
+       /* ..and on the Boomerang we enable the extra statistics bits. */
+       outw(0x0040, ioaddr + Wn4_NetDiag);
+
+       /* Switch to register set 7 for normal use. */
+       EL3WINDOW(7);
+
+       if (vp->full_bus_master_rx) { /* Boomerang bus master. */
+               vp->cur_rx = vp->dirty_rx = 0;
+               /* Initialize the RxEarly register as recommended. */
+               outw(SetRxThreshold + (1536>>2), ioaddr + EL3_CMD);
+               outl(0x0020, ioaddr + PktStatus);
+               outl(vp->rx_ring_dma, ioaddr + UpListPtr);
+       }
+       if (vp->full_bus_master_tx) {           /* Boomerang bus master Tx. */
+               vp->cur_tx = vp->dirty_tx = 0;
+               if (vp->drv_flags & IS_BOOMERANG)
+                       outb(PKT_BUF_SZ>>8, ioaddr + TxFreeThreshold); /* Room for a packet. */
+               /* Clear the Rx, Tx rings. */
+               for (i = 0; i < RX_RING_SIZE; i++)      /* AKPM: this is done in vortex_open, too */
+                       vp->rx_ring[i].status = 0;
+               for (i = 0; i < TX_RING_SIZE; i++)
+                       vp->tx_skbuff[i] = 0;
+               outl(0, ioaddr + DownListPtr);
+       }
+       /* Set receiver mode: presumably accept b-case and phys addr only. */
+       set_rx_mode(dev);
+       outw(StatsEnable, ioaddr + EL3_CMD); /* Turn on statistics. */
+
+//     issue_and_wait(dev, SetTxStart|0x07ff);
+       outw(RxEnable, ioaddr + EL3_CMD); /* Enable the receiver. */
+       outw(TxEnable, ioaddr + EL3_CMD); /* Enable transmitter. */
+       /* Allow status bits to be seen. */
+       vp->status_enable = SetStatusEnb | HostError|IntReq|StatsFull|TxComplete|
+               (vp->full_bus_master_tx ? DownComplete : TxAvailable) |
+               (vp->full_bus_master_rx ? UpComplete : RxComplete) |
+               (vp->bus_master ? DMADone : 0);
+       vp->intr_enable = SetIntrEnb | IntLatch | TxAvailable |
+               (vp->full_bus_master_rx ? 0 : RxComplete) |
+               StatsFull | HostError | TxComplete | IntReq
+               | (vp->bus_master ? DMADone : 0) | UpComplete | DownComplete;
+       outw(vp->status_enable, ioaddr + EL3_CMD);
+       /* Ack all pending events, and set active indicator mask. */
+       outw(AckIntr | IntLatch | TxAvailable | RxEarly | IntReq,
+                ioaddr + EL3_CMD);
+       outw(vp->intr_enable, ioaddr + EL3_CMD);
+       if (vp->cb_fn_base)                     /* The PCMCIA people are idiots.  */
+               writel(0x8000, vp->cb_fn_base + 4);
+       netif_start_queue (dev);
+}
+
+static int
+vortex_open(struct net_device *dev)
+{
+       struct vortex_private *vp = (struct vortex_private *)dev->priv;
+       int i;
+       int retval;
+
+       /* Use the now-standard shared IRQ implementation. */
+       if ((retval = request_irq(dev->irq, vp->full_bus_master_rx ?
+                               &boomerang_interrupt : &vortex_interrupt, SA_SHIRQ, dev->name, dev))) {
+               printk(KERN_ERR "%s: Could not reserve IRQ %d\n", dev->name, dev->irq);
+               goto out;
+       }
+
+       if (vp->full_bus_master_rx) { /* Boomerang bus master. */
+               if (vortex_debug > 2)
+                       printk(KERN_DEBUG "%s:  Filling in the Rx ring.\n", dev->name);
+               for (i = 0; i < RX_RING_SIZE; i++) {
+                       struct sk_buff *skb;
+                       vp->rx_ring[i].next = cpu_to_le32(vp->rx_ring_dma + sizeof(struct boom_rx_desc) * (i+1));
+                       vp->rx_ring[i].status = 0;      /* Clear complete bit. */
+                       vp->rx_ring[i].length = cpu_to_le32(PKT_BUF_SZ | LAST_FRAG);
+                       skb = dev_alloc_skb(PKT_BUF_SZ);
+                       vp->rx_skbuff[i] = skb;
+                       if (skb == NULL)
+                               break;                  /* Bad news!  */
+                       skb->dev = dev;                 /* Mark as being used by this device. */
+                       skb_reserve(skb, 2);    /* Align IP on 16 byte boundaries */
+                       vp->rx_ring[i].addr = cpu_to_le32(pci_map_single(vp->pdev, skb->tail, PKT_BUF_SZ, PCI_DMA_FROMDEVICE));
+               }
+               if (i != RX_RING_SIZE) {
+                       int j;
+                       printk(KERN_EMERG "%s: no memory for rx ring\n", dev->name);
+                       for (j = 0; j < i; j++) {
+                               if (vp->rx_skbuff[j]) {
+                                       dev_kfree_skb(vp->rx_skbuff[j]);
+                                       vp->rx_skbuff[j] = 0;
+                               }
+                       }
+                       retval = -ENOMEM;
+                       goto out_free_irq;
+               }
+               /* Wrap the ring. */
+               vp->rx_ring[i-1].next = cpu_to_le32(vp->rx_ring_dma);
+       }
+
+       vortex_up(dev);
+       return 0;
+
+out_free_irq:
+       free_irq(dev->irq, dev);
+out:
+       if (vortex_debug > 1)
+               printk(KERN_ERR "%s: vortex_open() fails: returning %d\n", dev->name, retval);
+       return retval;
+}
+
+static void
+vortex_timer(unsigned long data)
+{
+       struct net_device *dev = (struct net_device *)data;
+       struct vortex_private *vp = (struct vortex_private *)dev->priv;
+       long ioaddr = dev->base_addr;
+       int next_tick = 60*HZ;
+       int ok = 0;
+       int media_status, mii_status, old_window;
+
+       if (vortex_debug > 2) {
+               printk(KERN_DEBUG "%s: Media selection timer tick happened, %s.\n",
+                          dev->name, media_tbl[dev->if_port].name);
+               printk(KERN_DEBUG "dev->watchdog_timeo=%d\n", dev->watchdog_timeo);
+       }
+
+       if (vp->medialock)
+               goto leave_media_alone;
+       disable_irq(dev->irq);
+       old_window = inw(ioaddr + EL3_CMD) >> 13;
+       EL3WINDOW(4);
+       media_status = inw(ioaddr + Wn4_Media);
+       switch (dev->if_port) {
+       case XCVR_10baseT:  case XCVR_100baseTx:  case XCVR_100baseFx:
+               if (media_status & Media_LnkBeat) {
+                       ok = 1;
+                       if (vortex_debug > 1)
+                               printk(KERN_DEBUG "%s: Media %s has link beat, %x.\n",
+                                          dev->name, media_tbl[dev->if_port].name, media_status);
+               } else if (vortex_debug > 1)
+                       printk(KERN_DEBUG "%s: Media %s has no link beat, %x.\n",
+                                  dev->name, media_tbl[dev->if_port].name, media_status);
+               break;
+       case XCVR_MII: case XCVR_NWAY:
+               {
+                       mii_status = mdio_read(dev, vp->phys[0], 1);
+                       ok = 1;
+                       if (vortex_debug > 2)
+                               printk(KERN_DEBUG "%s: MII transceiver has status %4.4x.\n",
+                                       dev->name, mii_status);
+                       if (mii_status & 0x0004) {
+                               int mii_reg5 = mdio_read(dev, vp->phys[0], 5);
+                               if (! vp->force_fd  &&  mii_reg5 != 0xffff) {
+                                       int duplex = (mii_reg5&0x0100) ||
+                                               (mii_reg5 & 0x01C0) == 0x0040;
+                                       if (vp->full_duplex != duplex) {
+                                               vp->full_duplex = duplex;
+                                               printk(KERN_INFO "%s: Setting %s-duplex based on MII "
+                                                       "#%d link partner capability of %4.4x.\n",
+                                                       dev->name, vp->full_duplex ? "full" : "half",
+                                                       vp->phys[0], mii_reg5);
+                                               /* Set the full-duplex bit. */
+                                               EL3WINDOW(3);
+                                               outw(   (vp->full_duplex ? 0x20 : 0) |
+                                                               (dev->mtu > 1500 ? 0x40 : 0) |
+                                                               ((vp->full_duplex && vp->flow_ctrl && vp->partner_flow_ctrl) ? 0x100 : 0),
+                                                               ioaddr + Wn3_MAC_Ctrl);
+                                               if (vortex_debug > 1)
+                                                       printk(KERN_DEBUG "Setting duplex in Wn3_MAC_Ctrl\n");
+                                               /* AKPM: bug: should reset Tx and Rx after setting Duplex.  Page 180 */
+                                       }
+                               }
+                       }
+               }
+               break;
+         default:                                      /* Other media types handled by Tx timeouts. */
+               if (vortex_debug > 1)
+                 printk(KERN_DEBUG "%s: Media %s has no indication, %x.\n",
+                                dev->name, media_tbl[dev->if_port].name, media_status);
+               ok = 1;
+       }
+       if ( ! ok) {
+               unsigned int config;
+
+               do {
+                       dev->if_port = media_tbl[dev->if_port].next;
+               } while ( ! (vp->available_media & media_tbl[dev->if_port].mask));
+               if (dev->if_port == XCVR_Default) { /* Go back to default. */
+                 dev->if_port = vp->default_media;
+                 if (vortex_debug > 1)
+                       printk(KERN_DEBUG "%s: Media selection failing, using default "
+                                  "%s port.\n",
+                                  dev->name, media_tbl[dev->if_port].name);
+               } else {
+                       if (vortex_debug > 1)
+                               printk(KERN_DEBUG "%s: Media selection failed, now trying "
+                                          "%s port.\n",
+                                          dev->name, media_tbl[dev->if_port].name);
+                       next_tick = media_tbl[dev->if_port].wait;
+               }
+               outw((media_status & ~(Media_10TP|Media_SQE)) |
+                        media_tbl[dev->if_port].media_bits, ioaddr + Wn4_Media);
+
+               EL3WINDOW(3);
+               config = inl(ioaddr + Wn3_Config);
+               config = BFINS(config, dev->if_port, 20, 4);
+               outl(config, ioaddr + Wn3_Config);
+
+               outw(dev->if_port == XCVR_10base2 ? StartCoax : StopCoax,
+                        ioaddr + EL3_CMD);
+               if (vortex_debug > 1)
+                       printk(KERN_DEBUG "wrote 0x%08x to Wn3_Config\n", config);
+               /* AKPM: FIXME: Should reset Rx & Tx here.  P60 of 3c90xc.pdf */
+       }
+       EL3WINDOW(old_window);
+       enable_irq(dev->irq);
+
+leave_media_alone:
+       if (vortex_debug > 2)
+         printk(KERN_DEBUG "%s: Media selection timer finished, %s.\n",
+                        dev->name, media_tbl[dev->if_port].name);
+
+       mod_timer(&vp->timer, RUN_AT(next_tick));
+       if (vp->deferred)
+               outw(FakeIntr, ioaddr + EL3_CMD);
+       return;
+}
+
+static void vortex_tx_timeout(struct net_device *dev)
+{
+       struct vortex_private *vp = (struct vortex_private *)dev->priv;
+       long ioaddr = dev->base_addr;
+
+       printk(KERN_ERR "%s: transmit timed out, tx_status %2.2x status %4.4x.\n",
+                  dev->name, inb(ioaddr + TxStatus),
+                  inw(ioaddr + EL3_STATUS));
+       EL3WINDOW(4);
+       printk(KERN_ERR "  diagnostics: net %04x media %04x dma %8.8x.\n",
+                  inw(ioaddr + Wn4_NetDiag), inw(ioaddr + Wn4_Media),
+                  inl(ioaddr + PktStatus));
+       /* Slight code bloat to be user friendly. */
+       if ((inb(ioaddr + TxStatus) & 0x88) == 0x88)
+               printk(KERN_ERR "%s: Transmitter encountered 16 collisions --"
+                          " network cable problem?\n", dev->name);
+       if (inw(ioaddr + EL3_STATUS) & IntLatch) {
+               printk(KERN_ERR "%s: Interrupt posted but not delivered --"
+                          " IRQ blocked by another device?\n", dev->name);
+               /* Bad idea here.. but we might as well handle a few events. */
+               {
+                       /*
+                        * Block interrupts because vortex_interrupt does a bare spin_lock()
+                        */
+                       unsigned long flags;
+                       local_irq_save(flags);
+                       if (vp->full_bus_master_tx)
+                               boomerang_interrupt(dev->irq, dev, 0);
+                       else
+                               vortex_interrupt(dev->irq, dev, 0);
+                       local_irq_restore(flags);
+               }
+       }
+
+       if (vortex_debug > 0)
+               dump_tx_ring(dev);
+
+       issue_and_wait(dev, TxReset);
+
+       vp->stats.tx_errors++;
+       if (vp->full_bus_master_tx) {
+               printk(KERN_DEBUG "%s: Resetting the Tx ring pointer.\n", dev->name);
+               if (vp->cur_tx - vp->dirty_tx > 0  &&  inl(ioaddr + DownListPtr) == 0)
+                       outl(vp->tx_ring_dma + (vp->dirty_tx % TX_RING_SIZE) * sizeof(struct boom_tx_desc),
+                                ioaddr + DownListPtr);
+               if (vp->cur_tx - vp->dirty_tx < TX_RING_SIZE)
+                       netif_wake_queue (dev);
+               if (vp->drv_flags & IS_BOOMERANG)
+                       outb(PKT_BUF_SZ>>8, ioaddr + TxFreeThreshold);
+               outw(DownUnstall, ioaddr + EL3_CMD);
+       } else {
+               vp->stats.tx_dropped++;
+               netif_wake_queue(dev);
+       }
+       
+       /* Issue Tx Enable */
+       outw(TxEnable, ioaddr + EL3_CMD);
+       dev->trans_start = jiffies;
+       
+       /* Switch to register set 7 for normal use. */
+       EL3WINDOW(7);
+}
+
+/*
+ * Handle uncommon interrupt sources.  This is a separate routine to minimize
+ * the cache impact.
+ */
+static void
+vortex_error(struct net_device *dev, int status)
+{
+       struct vortex_private *vp = (struct vortex_private *)dev->priv;
+       long ioaddr = dev->base_addr;
+       int do_tx_reset = 0, reset_mask = 0;
+       unsigned char tx_status = 0;
+
+       if (vortex_debug > 2) {
+               printk(KERN_ERR "%s: vortex_error(), status=0x%x\n", dev->name, status);
+       }
+
+       if (status & TxComplete) {                      /* Really "TxError" for us. */
+               tx_status = inb(ioaddr + TxStatus);
+               /* Presumably a tx-timeout. We must merely re-enable. */
+               if (vortex_debug > 2
+                       || (tx_status != 0x88 && vortex_debug > 0)) {
+                       printk(KERN_ERR "%s: Transmit error, Tx status register %2.2x.\n",
+                                  dev->name, tx_status);
+                       if (tx_status == 0x82) {
+                               printk(KERN_ERR "Probably a duplex mismatch.  See "
+                                               "Documentation/networking/vortex.txt\n");
+                       }
+                       dump_tx_ring(dev);
+               }
+               if (tx_status & 0x14)  vp->stats.tx_fifo_errors++;
+               if (tx_status & 0x38)  vp->stats.tx_aborted_errors++;
+               outb(0, ioaddr + TxStatus);
+               if (tx_status & 0x30) {                 /* txJabber or txUnderrun */
+                       do_tx_reset = 1;
+               } else if ((tx_status & 0x08) && (vp->drv_flags & MAX_COLLISION_RESET)) {       /* maxCollisions */
+                       do_tx_reset = 1;
+                       reset_mask = 0x0108;            /* Reset interface logic, but not download logic */
+               } else {                                                /* Merely re-enable the transmitter. */
+                       outw(TxEnable, ioaddr + EL3_CMD);
+               }
+       }
+
+       if (status & RxEarly) {                         /* Rx early is unused. */
+               vortex_rx(dev);
+               outw(AckIntr | RxEarly, ioaddr + EL3_CMD);
+       }
+       if (status & StatsFull) {                       /* Empty statistics. */
+               static int DoneDidThat;
+               if (vortex_debug > 4)
+                       printk(KERN_DEBUG "%s: Updating stats.\n", dev->name);
+               update_stats(ioaddr, dev);
+               /* HACK: Disable statistics as an interrupt source. */
+               /* This occurs when we have the wrong media type! */
+               if (DoneDidThat == 0  &&
+                       inw(ioaddr + EL3_STATUS) & StatsFull) {
+                       printk(KERN_WARNING "%s: Updating statistics failed, disabling "
+                                  "stats as an interrupt source.\n", dev->name);
+                       EL3WINDOW(5);
+                       outw(SetIntrEnb | (inw(ioaddr + 10) & ~StatsFull), ioaddr + EL3_CMD);
+                       vp->intr_enable &= ~StatsFull;
+                       EL3WINDOW(7);
+                       DoneDidThat++;
+               }
+       }
+       if (status & IntReq) {          /* Restore all interrupt sources.  */
+               outw(vp->status_enable, ioaddr + EL3_CMD);
+               outw(vp->intr_enable, ioaddr + EL3_CMD);
+       }
+       if (status & HostError) {
+               u16 fifo_diag;
+               EL3WINDOW(4);
+               fifo_diag = inw(ioaddr + Wn4_FIFODiag);
+               printk(KERN_ERR "%s: Host error, FIFO diagnostic register %4.4x.\n",
+                          dev->name, fifo_diag);
+               /* Adapter failure requires Tx/Rx reset and reinit. */
+               if (vp->full_bus_master_tx) {
+                       int bus_status = inl(ioaddr + PktStatus);
+                       /* 0x80000000 PCI master abort. */
+                       /* 0x40000000 PCI target abort. */
+                       if (vortex_debug)
+                               printk(KERN_ERR "%s: PCI bus error, bus status %8.8x\n", dev->name, bus_status);
+
+                       /* In this case, blow the card away */
+                       vortex_down(dev);
+                       issue_and_wait(dev, TotalReset | 0xff);
+                       vortex_up(dev);         /* AKPM: bug.  vortex_up() assumes that the rx ring is full. It may not be. */
+               } else if (fifo_diag & 0x0400)
+                       do_tx_reset = 1;
+               if (fifo_diag & 0x3000) {
+                       /* Reset Rx fifo and upload logic */
+                       issue_and_wait(dev, RxReset|0x07);
+                       /* Set the Rx filter to the current state. */
+                       set_rx_mode(dev);
+                       outw(RxEnable, ioaddr + EL3_CMD); /* Re-enable the receiver. */
+                       outw(AckIntr | HostError, ioaddr + EL3_CMD);
+               }
+       }
+
+       if (do_tx_reset) {
+               issue_and_wait(dev, TxReset|reset_mask);
+               outw(TxEnable, ioaddr + EL3_CMD);
+               if (!vp->full_bus_master_tx)
+                       netif_wake_queue(dev);
+       }
+}
+
+static int
+vortex_start_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+       struct vortex_private *vp = (struct vortex_private *)dev->priv;
+       long ioaddr = dev->base_addr;
+
+       /* Put out the doubleword header... */
+       outl(skb->len, ioaddr + TX_FIFO);
+       if (vp->bus_master) {
+               /* Set the bus-master controller to transfer the packet. */
+               int len = (skb->len + 3) & ~3;
+               outl(   vp->tx_skb_dma = pci_map_single(vp->pdev, skb->data, len, PCI_DMA_TODEVICE),
+                               ioaddr + Wn7_MasterAddr);
+               outw(len, ioaddr + Wn7_MasterLen);
+               vp->tx_skb = skb;
+               outw(StartDMADown, ioaddr + EL3_CMD);
+               /* netif_wake_queue() will be called at the DMADone interrupt. */
+       } else {
+               /* ... and the packet rounded to a doubleword. */
+               outsl(ioaddr + TX_FIFO, skb->data, (skb->len + 3) >> 2);
+               dev_kfree_skb (skb);
+               if (inw(ioaddr + TxFree) > 1536) {
+                       netif_start_queue (dev);        /* AKPM: redundant? */
+               } else {
+                       /* Interrupt us when the FIFO has room for max-sized packet. */
+                       netif_stop_queue(dev);
+                       outw(SetTxThreshold + (1536>>2), ioaddr + EL3_CMD);
+               }
+       }
+
+       dev->trans_start = jiffies;
+
+       /* Clear the Tx status stack. */
+       {
+               int tx_status;
+               int i = 32;
+
+               while (--i > 0  &&      (tx_status = inb(ioaddr + TxStatus)) > 0) {
+                       if (tx_status & 0x3C) {         /* A Tx-disabling error occurred.  */
+                               if (vortex_debug > 2)
+                                 printk(KERN_DEBUG "%s: Tx error, status %2.2x.\n",
+                                                dev->name, tx_status);
+                               if (tx_status & 0x04) vp->stats.tx_fifo_errors++;
+                               if (tx_status & 0x38) vp->stats.tx_aborted_errors++;
+                               if (tx_status & 0x30) {
+                                       issue_and_wait(dev, TxReset);
+                               }
+                               outw(TxEnable, ioaddr + EL3_CMD);
+                       }
+                       outb(0x00, ioaddr + TxStatus); /* Pop the status stack. */
+               }
+       }
+       return 0;
+}
+
+static int
+boomerang_start_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+       struct vortex_private *vp = (struct vortex_private *)dev->priv;
+       long ioaddr = dev->base_addr;
+       /* Calculate the next Tx descriptor entry. */
+       int entry = vp->cur_tx % TX_RING_SIZE;
+       struct boom_tx_desc *prev_entry = &vp->tx_ring[(vp->cur_tx-1) % TX_RING_SIZE];
+       unsigned long flags;
+
+       if (vortex_debug > 6) {
+               printk(KERN_DEBUG "boomerang_start_xmit()\n");
+               if (vortex_debug > 3)
+                       printk(KERN_DEBUG "%s: Trying to send a packet, Tx index %d.\n",
+                                  dev->name, vp->cur_tx);
+       }
+
+       if (vp->cur_tx - vp->dirty_tx >= TX_RING_SIZE) {
+               if (vortex_debug > 0)
+                       printk(KERN_WARNING "%s: BUG! Tx Ring full, refusing to send buffer.\n",
+                                  dev->name);
+               netif_stop_queue(dev);
+               return 1;
+       }
+
+       vp->tx_skbuff[entry] = skb;
+
+       vp->tx_ring[entry].next = 0;
+#if DO_ZEROCOPY
+       if (skb->ip_summed != CHECKSUM_HW)
+                       vp->tx_ring[entry].status = cpu_to_le32(skb->len | TxIntrUploaded);
+       else
+                       vp->tx_ring[entry].status = cpu_to_le32(skb->len | TxIntrUploaded | AddTCPChksum);
+
+       if (!skb_shinfo(skb)->nr_frags) {
+               vp->tx_ring[entry].frag[0].addr = cpu_to_le32(pci_map_single(vp->pdev, skb->data,
+                                                                               skb->len, PCI_DMA_TODEVICE));
+               vp->tx_ring[entry].frag[0].length = cpu_to_le32(skb->len | LAST_FRAG);
+       } else {
+               int i;
+
+               vp->tx_ring[entry].frag[0].addr = cpu_to_le32(pci_map_single(vp->pdev, skb->data,
+                                                                               skb->len-skb->data_len, PCI_DMA_TODEVICE));
+               vp->tx_ring[entry].frag[0].length = cpu_to_le32(skb->len-skb->data_len);
+
+               for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+                       skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+
+                       vp->tx_ring[entry].frag[i+1].addr =
+                                       cpu_to_le32(pci_map_single(vp->pdev,
+                                                                                          (void*)page_address(frag->page) + frag->page_offset,
+                                                                                          frag->size, PCI_DMA_TODEVICE));
+
+                       if (i == skb_shinfo(skb)->nr_frags-1)
+                                       vp->tx_ring[entry].frag[i+1].length = cpu_to_le32(frag->size|LAST_FRAG);
+                       else
+                                       vp->tx_ring[entry].frag[i+1].length = cpu_to_le32(frag->size);
+               }
+       }
+#else
+       vp->tx_ring[entry].addr = cpu_to_le32(pci_map_single(vp->pdev, skb->data, skb->len, PCI_DMA_TODEVICE));
+       vp->tx_ring[entry].length = cpu_to_le32(skb->len | LAST_FRAG);
+       vp->tx_ring[entry].status = cpu_to_le32(skb->len | TxIntrUploaded);
+#endif
+
+       spin_lock_irqsave(&vp->lock, flags);
+       /* Wait for the stall to complete. */
+       issue_and_wait(dev, DownStall);
+       prev_entry->next = cpu_to_le32(vp->tx_ring_dma + entry * sizeof(struct boom_tx_desc));
+       if (inl(ioaddr + DownListPtr) == 0) {
+               outl(vp->tx_ring_dma + entry * sizeof(struct boom_tx_desc), ioaddr + DownListPtr);
+               vp->queued_packet++;
+       }
+
+       vp->cur_tx++;
+       if (vp->cur_tx - vp->dirty_tx > TX_RING_SIZE - 1) {
+               netif_stop_queue (dev);
+       } else {                                        /* Clear previous interrupt enable. */
+#if defined(tx_interrupt_mitigation)
+               /* Dubious. If in boomeang_interrupt "faster" cyclone ifdef
+                * were selected, this would corrupt DN_COMPLETE. No?
+                */
+               prev_entry->status &= cpu_to_le32(~TxIntrUploaded);
+#endif
+       }
+       outw(DownUnstall, ioaddr + EL3_CMD);
+       spin_unlock_irqrestore(&vp->lock, flags);
+       dev->trans_start = jiffies;
+       return 0;
+}
+
+/* The interrupt handler does all of the Rx thread work and cleans up
+   after the Tx thread. */
+
+/*
+ * This is the ISR for the vortex series chips.
+ * full_bus_master_tx == 0 && full_bus_master_rx == 0
+ */
+
+static void vortex_interrupt(int irq, void *dev_id, struct pt_regs *regs)
+{
+       struct net_device *dev = dev_id;
+       struct vortex_private *vp = (struct vortex_private *)dev->priv;
+       long ioaddr;
+       int status;
+       int work_done = max_interrupt_work;
+       
+       ioaddr = dev->base_addr;
+       spin_lock(&vp->lock);
+
+       status = inw(ioaddr + EL3_STATUS);
+
+       if (vortex_debug > 6)
+               printk("vortex_interrupt(). status=0x%4x\n", status);
+
+       if ((status & IntLatch) == 0)
+               goto handler_exit;              /* No interrupt: shared IRQs cause this */
+
+       if (status & IntReq) {
+               status |= vp->deferred;
+               vp->deferred = 0;
+       }
+
+       if (status == 0xffff)           /* h/w no longer present (hotplug)? */
+               goto handler_exit;
+
+       if (vortex_debug > 4)
+               printk(KERN_DEBUG "%s: interrupt, status %4.4x, latency %d ticks.\n",
+                          dev->name, status, inb(ioaddr + Timer));
+
+       do {
+               if (vortex_debug > 5)
+                               printk(KERN_DEBUG "%s: In interrupt loop, status %4.4x.\n",
+                                          dev->name, status);
+               if (status & RxComplete)
+                       vortex_rx(dev);
+
+               if (status & TxAvailable) {
+                       if (vortex_debug > 5)
+                               printk(KERN_DEBUG "     TX room bit was handled.\n");
+                       /* There's room in the FIFO for a full-sized packet. */
+                       outw(AckIntr | TxAvailable, ioaddr + EL3_CMD);
+                       netif_wake_queue (dev);
+               }
+
+               if (status & DMADone) {
+                       if (inw(ioaddr + Wn7_MasterStatus) & 0x1000) {
+                               outw(0x1000, ioaddr + Wn7_MasterStatus); /* Ack the event. */
+                               pci_unmap_single(vp->pdev, vp->tx_skb_dma, (vp->tx_skb->len + 3) & ~3, PCI_DMA_TODEVICE);
+                               dev_kfree_skb_irq(vp->tx_skb); /* Release the transferred buffer */
+                               if (inw(ioaddr + TxFree) > 1536) {
+                                       /*
+                                        * AKPM: FIXME: I don't think we need this.  If the queue was stopped due to
+                                        * insufficient FIFO room, the TxAvailable test will succeed and call
+                                        * netif_wake_queue()
+                                        */
+                                       netif_wake_queue(dev);
+                               } else { /* Interrupt when FIFO has room for max-sized packet. */
+                                       outw(SetTxThreshold + (1536>>2), ioaddr + EL3_CMD);
+                                       netif_stop_queue(dev);
+                               }
+                       }
+               }
+               /* Check for all uncommon interrupts at once. */
+               if (status & (HostError | RxEarly | StatsFull | TxComplete | IntReq)) {
+                       if (status == 0xffff)
+                               break;
+                       vortex_error(dev, status);
+               }
+
+               if (--work_done < 0) {
+                       printk(KERN_WARNING "%s: Too much work in interrupt, status "
+                                  "%4.4x.\n", dev->name, status);
+                       /* Disable all pending interrupts. */
+                       do {
+                               vp->deferred |= status;
+                               outw(SetStatusEnb | (~vp->deferred & vp->status_enable),
+                                        ioaddr + EL3_CMD);
+                               outw(AckIntr | (vp->deferred & 0x7ff), ioaddr + EL3_CMD);
+                       } while ((status = inw(ioaddr + EL3_CMD)) & IntLatch);
+                       /* The timer will reenable interrupts. */
+                       mod_timer(&vp->timer, jiffies + 1*HZ);
+                       break;
+               }
+               /* Acknowledge the IRQ. */
+               outw(AckIntr | IntReq | IntLatch, ioaddr + EL3_CMD);
+       } while ((status = inw(ioaddr + EL3_STATUS)) & (IntLatch | RxComplete));
+
+       if (vortex_debug > 4)
+               printk(KERN_DEBUG "%s: exiting interrupt, status %4.4x.\n",
+                          dev->name, status);
+handler_exit:
+       spin_unlock(&vp->lock);
+}
+
+/*
+ * This is the ISR for the boomerang series chips.
+ * full_bus_master_tx == 1 && full_bus_master_rx == 1
+ */
+
+static void boomerang_interrupt(int irq, void *dev_id, struct pt_regs *regs)
+{
+       struct net_device *dev = dev_id;
+       struct vortex_private *vp = (struct vortex_private *)dev->priv;
+       long ioaddr;
+       int status;
+       int work_done = max_interrupt_work;
+
+       ioaddr = dev->base_addr;
+
+       /*
+        * It seems dopey to put the spinlock this early, but we could race against vortex_tx_timeout
+        * and boomerang_start_xmit
+        */
+       spin_lock(&vp->lock);
+
+       status = inw(ioaddr + EL3_STATUS);
+
+       if (vortex_debug > 6)
+               printk(KERN_DEBUG "boomerang_interrupt. status=0x%4x\n", status);
+
+       if ((status & IntLatch) == 0)
+               goto handler_exit;              /* No interrupt: shared IRQs can cause this */
+
+       if (status == 0xffff) {         /* h/w no longer present (hotplug)? */
+               if (vortex_debug > 1)
+                       printk(KERN_DEBUG "boomerang_interrupt(1): status = 0xffff\n");
+               goto handler_exit;
+       }
+
+       if (status & IntReq) {
+               status |= vp->deferred;
+               vp->deferred = 0;
+       }
+
+       if (vortex_debug > 4)
+               printk(KERN_DEBUG "%s: interrupt, status %4.4x, latency %d ticks.\n",
+                          dev->name, status, inb(ioaddr + Timer));
+       do {
+               if (vortex_debug > 5)
+                               printk(KERN_DEBUG "%s: In interrupt loop, status %4.4x.\n",
+                                          dev->name, status);
+               if (status & UpComplete) {
+                       outw(AckIntr | UpComplete, ioaddr + EL3_CMD);
+                       if (vortex_debug > 5)
+                               printk(KERN_DEBUG "boomerang_interrupt->boomerang_rx\n");
+                       boomerang_rx(dev);
+               }
+
+               if (status & DownComplete) {
+                       unsigned int dirty_tx = vp->dirty_tx;
+
+                       outw(AckIntr | DownComplete, ioaddr + EL3_CMD);
+                       while (vp->cur_tx - dirty_tx > 0) {
+                               int entry = dirty_tx % TX_RING_SIZE;
+#if 1  /* AKPM: the latter is faster, but cyclone-only */
+                               if (inl(ioaddr + DownListPtr) ==
+                                       vp->tx_ring_dma + entry * sizeof(struct boom_tx_desc))
+                                       break;                  /* It still hasn't been processed. */
+#else
+                               if ((vp->tx_ring[entry].status & DN_COMPLETE) == 0)
+                                       break;                  /* It still hasn't been processed. */
+#endif
+                                       
+                               if (vp->tx_skbuff[entry]) {
+                                       struct sk_buff *skb = vp->tx_skbuff[entry];
+#if DO_ZEROCOPY                                        
+                                       int i;
+                                       for (i=0; i<=skb_shinfo(skb)->nr_frags; i++)
+                                                       pci_unmap_single(vp->pdev,
+                                                                                        le32_to_cpu(vp->tx_ring[entry].frag[i].addr),
+                                                                                        le32_to_cpu(vp->tx_ring[entry].frag[i].length)&0xFFF,
+                                                                                        PCI_DMA_TODEVICE);
+#else
+                                       pci_unmap_single(vp->pdev,
+                                               le32_to_cpu(vp->tx_ring[entry].addr), skb->len, PCI_DMA_TODEVICE);
+#endif
+                                       dev_kfree_skb_irq(skb);
+                                       vp->tx_skbuff[entry] = 0;
+                               } else {
+                                       printk(KERN_DEBUG "boomerang_interrupt: no skb!\n");
+                               }
+                               /* vp->stats.tx_packets++;  Counted below. */
+                               dirty_tx++;
+                       }
+                       vp->dirty_tx = dirty_tx;
+                       if (vp->cur_tx - dirty_tx <= TX_RING_SIZE - 1) {
+                               if (vortex_debug > 6)
+                                       printk(KERN_DEBUG "boomerang_interrupt: wake queue\n");
+                               netif_wake_queue (dev);
+                       }
+               }
+
+               /* Check for all uncommon interrupts at once. */
+               if (status & (HostError | RxEarly | StatsFull | TxComplete | IntReq))
+                       vortex_error(dev, status);
+
+               if (--work_done < 0) {
+                       printk(KERN_WARNING "%s: Too much work in interrupt, status "
+                                  "%4.4x.\n", dev->name, status);
+                       /* Disable all pending interrupts. */
+                       do {
+                               vp->deferred |= status;
+                               outw(SetStatusEnb | (~vp->deferred & vp->status_enable),
+                                        ioaddr + EL3_CMD);
+                               outw(AckIntr | (vp->deferred & 0x7ff), ioaddr + EL3_CMD);
+                       } while ((status = inw(ioaddr + EL3_CMD)) & IntLatch);
+                       /* The timer will reenable interrupts. */
+                       mod_timer(&vp->timer, jiffies + 1*HZ);
+                       break;
+               }
+               /* Acknowledge the IRQ. */
+               outw(AckIntr | IntReq | IntLatch, ioaddr + EL3_CMD);
+               if (vp->cb_fn_base)                     /* The PCMCIA people are idiots.  */
+                       writel(0x8000, vp->cb_fn_base + 4);
+
+       } while ((status = inw(ioaddr + EL3_STATUS)) & IntLatch);
+
+       if (vortex_debug > 4)
+               printk(KERN_DEBUG "%s: exiting interrupt, status %4.4x.\n",
+                          dev->name, status);
+handler_exit:
+       spin_unlock(&vp->lock);
+}
+
+static int vortex_rx(struct net_device *dev)
+{
+       struct vortex_private *vp = (struct vortex_private *)dev->priv;
+       long ioaddr = dev->base_addr;
+       int i;
+       short rx_status;
+
+       if (vortex_debug > 5)
+               printk(KERN_DEBUG "vortex_rx(): status %4.4x, rx_status %4.4x.\n",
+                          inw(ioaddr+EL3_STATUS), inw(ioaddr+RxStatus));
+       while ((rx_status = inw(ioaddr + RxStatus)) > 0) {
+               if (rx_status & 0x4000) { /* Error, update stats. */
+                       unsigned char rx_error = inb(ioaddr + RxErrors);
+                       if (vortex_debug > 2)
+                               printk(KERN_DEBUG " Rx error: status %2.2x.\n", rx_error);
+                       vp->stats.rx_errors++;
+                       if (rx_error & 0x01)  vp->stats.rx_over_errors++;
+                       if (rx_error & 0x02)  vp->stats.rx_length_errors++;
+                       if (rx_error & 0x04)  vp->stats.rx_frame_errors++;
+                       if (rx_error & 0x08)  vp->stats.rx_crc_errors++;
+                       if (rx_error & 0x10)  vp->stats.rx_length_errors++;
+               } else {
+                       /* The packet length: up to 4.5K!. */
+                       int pkt_len = rx_status & 0x1fff;
+                       struct sk_buff *skb;
+
+                       skb = dev_alloc_skb(pkt_len + 5);
+                       if (vortex_debug > 4)
+                               printk(KERN_DEBUG "Receiving packet size %d status %4.4x.\n",
+                                          pkt_len, rx_status);
+                       if (skb != NULL) {
+                               skb->dev = dev;
+                               skb_reserve(skb, 2);    /* Align IP on 16 byte boundaries */
+                               /* 'skb_put()' points to the start of sk_buff data area. */
+                               if (vp->bus_master &&
+                                       ! (inw(ioaddr + Wn7_MasterStatus) & 0x8000)) {
+                                       dma_addr_t dma = pci_map_single(vp->pdev, skb_put(skb, pkt_len),
+                                                                          pkt_len, PCI_DMA_FROMDEVICE);
+                                       outl(dma, ioaddr + Wn7_MasterAddr);
+                                       outw((skb->len + 3) & ~3, ioaddr + Wn7_MasterLen);
+                                       outw(StartDMAUp, ioaddr + EL3_CMD);
+                                       while (inw(ioaddr + Wn7_MasterStatus) & 0x8000)
+                                               ;
+                                       pci_unmap_single(vp->pdev, dma, pkt_len, PCI_DMA_FROMDEVICE);
+                               } else {
+                                       insl(ioaddr + RX_FIFO, skb_put(skb, pkt_len),
+                                                (pkt_len + 3) >> 2);
+                               }
+                               outw(RxDiscard, ioaddr + EL3_CMD); /* Pop top Rx packet. */
+                               skb->protocol = eth_type_trans(skb, dev);
+                               netif_rx(skb);
+                               dev->last_rx = jiffies;
+                               vp->stats.rx_packets++;
+                               /* Wait a limited time to go to next packet. */
+                               for (i = 200; i >= 0; i--)
+                                       if ( ! (inw(ioaddr + EL3_STATUS) & CmdInProgress))
+                                               break;
+                               continue;
+                       } else if (vortex_debug > 0)
+                               printk(KERN_NOTICE "%s: No memory to allocate a sk_buff of "
+                                          "size %d.\n", dev->name, pkt_len);
+               }
+               vp->stats.rx_dropped++;
+               issue_and_wait(dev, RxDiscard);
+       }
+
+       return 0;
+}
+
+static int
+boomerang_rx(struct net_device *dev)
+{
+       struct vortex_private *vp = (struct vortex_private *)dev->priv;
+       int entry = vp->cur_rx % RX_RING_SIZE;
+       long ioaddr = dev->base_addr;
+       int rx_status;
+       int rx_work_limit = vp->dirty_rx + RX_RING_SIZE - vp->cur_rx;
+
+       if (vortex_debug > 5)
+               printk(KERN_DEBUG "boomerang_rx(): status %4.4x\n", inw(ioaddr+EL3_STATUS));
+
+       while ((rx_status = le32_to_cpu(vp->rx_ring[entry].status)) & RxDComplete){
+               if (--rx_work_limit < 0)
+                       break;
+               if (rx_status & RxDError) { /* Error, update stats. */
+                       unsigned char rx_error = rx_status >> 16;
+                       if (vortex_debug > 2)
+                               printk(KERN_DEBUG " Rx error: status %2.2x.\n", rx_error);
+                       vp->stats.rx_errors++;
+                       if (rx_error & 0x01)  vp->stats.rx_over_errors++;
+                       if (rx_error & 0x02)  vp->stats.rx_length_errors++;
+                       if (rx_error & 0x04)  vp->stats.rx_frame_errors++;
+                       if (rx_error & 0x08)  vp->stats.rx_crc_errors++;
+                       if (rx_error & 0x10)  vp->stats.rx_length_errors++;
+               } else {
+                       /* The packet length: up to 4.5K!. */
+                       int pkt_len = rx_status & 0x1fff;
+                       struct sk_buff *skb;
+                       dma_addr_t dma = le32_to_cpu(vp->rx_ring[entry].addr);
+
+                       if (vortex_debug > 4)
+                               printk(KERN_DEBUG "Receiving packet size %d status %4.4x.\n",
+                                          pkt_len, rx_status);
+
+                       /* Check if the packet is long enough to just accept without
+                          copying to a properly sized skbuff. */
+                       if (pkt_len < rx_copybreak && (skb = dev_alloc_skb(pkt_len + 2)) != 0) {
+                               skb->dev = dev;
+                               skb_reserve(skb, 2);    /* Align IP on 16 byte boundaries */
+                               pci_dma_sync_single(vp->pdev, dma, PKT_BUF_SZ, PCI_DMA_FROMDEVICE);
+                               /* 'skb_put()' points to the start of sk_buff data area. */
+                               memcpy(skb_put(skb, pkt_len),
+                                          vp->rx_skbuff[entry]->tail,
+                                          pkt_len);
+                               vp->rx_copy++;
+                       } else {
+                               /* Pass up the skbuff already on the Rx ring. */
+                               skb = vp->rx_skbuff[entry];
+                               vp->rx_skbuff[entry] = NULL;
+                               skb_put(skb, pkt_len);
+                               pci_unmap_single(vp->pdev, dma, PKT_BUF_SZ, PCI_DMA_FROMDEVICE);
+                               vp->rx_nocopy++;
+                       }
+                       skb->protocol = eth_type_trans(skb, dev);
+                       {                                       /* Use hardware checksum info. */
+                               int csum_bits = rx_status & 0xee000000;
+                               if (csum_bits &&
+                                       (csum_bits == (IPChksumValid | TCPChksumValid) ||
+                                        csum_bits == (IPChksumValid | UDPChksumValid))) {
+                                       skb->ip_summed = CHECKSUM_UNNECESSARY;
+                                       vp->rx_csumhits++;
+                               }
+                       }
+                       netif_rx(skb);
+                       dev->last_rx = jiffies;
+                       vp->stats.rx_packets++;
+               }
+               entry = (++vp->cur_rx) % RX_RING_SIZE;
+       }
+       /* Refill the Rx ring buffers. */
+       for (; vp->cur_rx - vp->dirty_rx > 0; vp->dirty_rx++) {
+               struct sk_buff *skb;
+               entry = vp->dirty_rx % RX_RING_SIZE;
+               if (vp->rx_skbuff[entry] == NULL) {
+                       skb = dev_alloc_skb(PKT_BUF_SZ);
+                       if (skb == NULL) {
+                               static unsigned long last_jif;
+                               if ((jiffies - last_jif) > 10 * HZ) {
+                                       printk(KERN_WARNING "%s: memory shortage\n", dev->name);
+                                       last_jif = jiffies;
+                               }
+                               if ((vp->cur_rx - vp->dirty_rx) == RX_RING_SIZE)
+                                       mod_timer(&vp->rx_oom_timer, RUN_AT(HZ * 1));
+                               break;                  /* Bad news!  */
+                       }
+                       skb->dev = dev;                 /* Mark as being used by this device. */
+                       skb_reserve(skb, 2);    /* Align IP on 16 byte boundaries */
+                       vp->rx_ring[entry].addr = cpu_to_le32(pci_map_single(vp->pdev, skb->tail, PKT_BUF_SZ, PCI_DMA_FROMDEVICE));
+                       vp->rx_skbuff[entry] = skb;
+               }
+               vp->rx_ring[entry].status = 0;  /* Clear complete bit. */
+               outw(UpUnstall, ioaddr + EL3_CMD);
+       }
+       return 0;
+}
+
+/*
+ * If we've hit a total OOM refilling the Rx ring we poll once a second
+ * for some memory.  Otherwise there is no way to restart the rx process.
+ */
+static void
+rx_oom_timer(unsigned long arg)
+{
+       struct net_device *dev = (struct net_device *)arg;
+       struct vortex_private *vp = (struct vortex_private *)dev->priv;
+
+       spin_lock_irq(&vp->lock);
+       if ((vp->cur_rx - vp->dirty_rx) == RX_RING_SIZE)        /* This test is redundant, but makes me feel good */
+               boomerang_rx(dev);
+       if (vortex_debug > 1) {
+               printk(KERN_DEBUG "%s: rx_oom_timer %s\n", dev->name,
+                       ((vp->cur_rx - vp->dirty_rx) != RX_RING_SIZE) ? "succeeded" : "retrying");
+       }
+       spin_unlock_irq(&vp->lock);
+}
+
+static void
+vortex_down(struct net_device *dev)
+{
+       struct vortex_private *vp = (struct vortex_private *)dev->priv;
+       long ioaddr = dev->base_addr;
+
+       netif_stop_queue (dev);
+
+       del_timer_sync(&vp->rx_oom_timer);
+       del_timer_sync(&vp->timer);
+
+       /* Turn off statistics ASAP.  We update vp->stats below. */
+       outw(StatsDisable, ioaddr + EL3_CMD);
+
+       /* Disable the receiver and transmitter. */
+       outw(RxDisable, ioaddr + EL3_CMD);
+       outw(TxDisable, ioaddr + EL3_CMD);
+
+       if (dev->if_port == XCVR_10base2)
+               /* Turn off thinnet power.  Green! */
+               outw(StopCoax, ioaddr + EL3_CMD);
+
+       outw(SetIntrEnb | 0x0000, ioaddr + EL3_CMD);
+
+       update_stats(ioaddr, dev);
+       if (vp->full_bus_master_rx)
+               outl(0, ioaddr + UpListPtr);
+       if (vp->full_bus_master_tx)
+               outl(0, ioaddr + DownListPtr);
+
+       if (vp->pdev && vp->enable_wol) {
+               pci_save_state(vp->pdev, vp->power_state);
+               acpi_set_WOL(dev);
+       }
+}
+
+static int
+vortex_close(struct net_device *dev)
+{
+       struct vortex_private *vp = (struct vortex_private *)dev->priv;
+       long ioaddr = dev->base_addr;
+       int i;
+
+       if (netif_device_present(dev))
+               vortex_down(dev);
+
+       if (vortex_debug > 1) {
+               printk(KERN_DEBUG"%s: vortex_close() status %4.4x, Tx status %2.2x.\n",
+                          dev->name, inw(ioaddr + EL3_STATUS), inb(ioaddr + TxStatus));
+               printk(KERN_DEBUG "%s: vortex close stats: rx_nocopy %d rx_copy %d"
+                          " tx_queued %d Rx pre-checksummed %d.\n",
+                          dev->name, vp->rx_nocopy, vp->rx_copy, vp->queued_packet, vp->rx_csumhits);
+       }
+
+#if DO_ZEROCOPY
+       if (    vp->rx_csumhits &&
+                       ((vp->drv_flags & HAS_HWCKSM) == 0) &&
+                       (hw_checksums[vp->card_idx] == -1)) {
+               printk(KERN_WARNING "%s supports hardware checksums, and we're not using them!\n", dev->name);
+               printk(KERN_WARNING "Please see http://www.uow.edu.au/~andrewm/zerocopy.html\n");
+       }
+#endif
+               
+       free_irq(dev->irq, dev);
+
+       if (vp->full_bus_master_rx) { /* Free Boomerang bus master Rx buffers. */
+               for (i = 0; i < RX_RING_SIZE; i++)
+                       if (vp->rx_skbuff[i]) {
+                               pci_unmap_single(       vp->pdev, le32_to_cpu(vp->rx_ring[i].addr),
+                                                                       PKT_BUF_SZ, PCI_DMA_FROMDEVICE);
+                               dev_kfree_skb(vp->rx_skbuff[i]);
+                               vp->rx_skbuff[i] = 0;
+                       }
+       }
+       if (vp->full_bus_master_tx) { /* Free Boomerang bus master Tx buffers. */
+               for (i = 0; i < TX_RING_SIZE; i++) {
+                       if (vp->tx_skbuff[i]) {
+                               struct sk_buff *skb = vp->tx_skbuff[i];
+#if DO_ZEROCOPY
+                               int k;
+
+                               for (k=0; k<=skb_shinfo(skb)->nr_frags; k++)
+                                               pci_unmap_single(vp->pdev,
+                                                                                le32_to_cpu(vp->tx_ring[i].frag[k].addr),
+                                                                                le32_to_cpu(vp->tx_ring[i].frag[k].length)&0xFFF,
+                                                                                PCI_DMA_TODEVICE);
+#else
+                               pci_unmap_single(vp->pdev, le32_to_cpu(vp->tx_ring[i].addr), skb->len, PCI_DMA_TODEVICE);
+#endif
+                               dev_kfree_skb(skb);
+                               vp->tx_skbuff[i] = 0;
+                       }
+               }
+       }
+
+       return 0;
+}
+
+static void
+dump_tx_ring(struct net_device *dev)
+{
+       if (vortex_debug > 0) {
+               struct vortex_private *vp = (struct vortex_private *)dev->priv;
+               long ioaddr = dev->base_addr;
+               
+               if (vp->full_bus_master_tx) {
+                       int i;
+                       int stalled = inl(ioaddr + PktStatus) & 0x04;   /* Possible racy. But it's only debug stuff */
+
+                       printk(KERN_ERR "  Flags; bus-master %d, dirty %d(%d) current %d(%d)\n",
+                                       vp->full_bus_master_tx,
+                                       vp->dirty_tx, vp->dirty_tx % TX_RING_SIZE,
+                                       vp->cur_tx, vp->cur_tx % TX_RING_SIZE);
+                       printk(KERN_ERR "  Transmit list %8.8x vs. %p.\n",
+                                  inl(ioaddr + DownListPtr),
+                                  &vp->tx_ring[vp->dirty_tx % TX_RING_SIZE]);
+                       issue_and_wait(dev, DownStall);
+                       for (i = 0; i < TX_RING_SIZE; i++) {
+                               printk(KERN_ERR "  %d: @%p  length %8.8x status %8.8x\n", i,
+                                          &vp->tx_ring[i],
+#if DO_ZEROCOPY
+                                          le32_to_cpu(vp->tx_ring[i].frag[0].length),
+#else
+                                          le32_to_cpu(vp->tx_ring[i].length),
+#endif
+                                          le32_to_cpu(vp->tx_ring[i].status));
+                       }
+                       if (!stalled)
+                               outw(DownUnstall, ioaddr + EL3_CMD);
+               }
+       }
+}
+
+static struct net_device_stats *vortex_get_stats(struct net_device *dev)
+{
+       struct vortex_private *vp = (struct vortex_private *)dev->priv;
+       unsigned long flags;
+
+       if (netif_device_present(dev)) {        /* AKPM: Used to be netif_running */
+               spin_lock_irqsave (&vp->lock, flags);
+               update_stats(dev->base_addr, dev);
+               spin_unlock_irqrestore (&vp->lock, flags);
+       }
+       return &vp->stats;
+}
+
+/*  Update statistics.
+       Unlike with the EL3 we need not worry about interrupts changing
+       the window setting from underneath us, but we must still guard
+       against a race condition with a StatsUpdate interrupt updating the
+       table.  This is done by checking that the ASM (!) code generated uses
+       atomic updates with '+='.
+       */
+static void update_stats(long ioaddr, struct net_device *dev)
+{
+       struct vortex_private *vp = (struct vortex_private *)dev->priv;
+       int old_window = inw(ioaddr + EL3_CMD);
+
+       if (old_window == 0xffff)       /* Chip suspended or ejected. */
+               return;
+       /* Unlike the 3c5x9 we need not turn off stats updates while reading. */
+       /* Switch to the stats window, and read everything. */
+       EL3WINDOW(6);
+       vp->stats.tx_carrier_errors             += inb(ioaddr + 0);
+       vp->stats.tx_heartbeat_errors   += inb(ioaddr + 1);
+       /* Multiple collisions. */              inb(ioaddr + 2);
+       vp->stats.collisions                    += inb(ioaddr + 3);
+       vp->stats.tx_window_errors              += inb(ioaddr + 4);
+       vp->stats.rx_fifo_errors                += inb(ioaddr + 5);
+       vp->stats.tx_packets                    += inb(ioaddr + 6);
+       vp->stats.tx_packets                    += (inb(ioaddr + 9)&0x30) << 4;
+       /* Rx packets   */                              inb(ioaddr + 7);   /* Must read to clear */
+       /* Tx deferrals */                              inb(ioaddr + 8);
+       /* Don't bother with register 9, an extension of registers 6&7.
+          If we do use the 6&7 values the atomic update assumption above
+          is invalid. */
+       vp->stats.rx_bytes += inw(ioaddr + 10);
+       vp->stats.tx_bytes += inw(ioaddr + 12);
+       /* New: On the Vortex we must also clear the BadSSD counter. */
+       EL3WINDOW(4);
+       inb(ioaddr + 12);
+
+       {
+               u8 up = inb(ioaddr + 13);
+               vp->stats.rx_bytes += (up & 0x0f) << 16;
+               vp->stats.tx_bytes += (up & 0xf0) << 12;
+       }
+
+       EL3WINDOW(old_window >> 13);
+       return;
+}
+
+
+static int netdev_ethtool_ioctl(struct net_device *dev, void *useraddr)
+{
+       struct vortex_private *vp = dev->priv;
+       u32 ethcmd;
+               
+       if (copy_from_user(&ethcmd, useraddr, sizeof(ethcmd)))
+               return -EFAULT;
+
+        switch (ethcmd) {
+        case ETHTOOL_GDRVINFO: {
+               struct ethtool_drvinfo info = {ETHTOOL_GDRVINFO};
+               strcpy(info.driver, DRV_NAME);
+               strcpy(info.version, DRV_VERSION);
+               if (vp->pdev)
+                       strcpy(info.bus_info, vp->pdev->slot_name);
+               else
+                       sprintf(info.bus_info, "EISA 0x%lx %d",
+                               dev->base_addr, dev->irq);
+               if (copy_to_user(useraddr, &info, sizeof(info)))
+                       return -EFAULT;
+               return 0;
+       }
+
+        }
+       
+       return -EOPNOTSUPP;
+}
+
+static int vortex_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
+{
+       struct vortex_private *vp = (struct vortex_private *)dev->priv;
+       long ioaddr = dev->base_addr;
+       struct mii_ioctl_data *data = (struct mii_ioctl_data *)&rq->ifr_data;
+       int phy = vp->phys[0] & 0x1f;
+       int retval;
+
+       switch(cmd) {
+       case SIOCETHTOOL:
+               return netdev_ethtool_ioctl(dev, (void *) rq->ifr_data);
+
+       case SIOCGMIIPHY:               /* Get address of MII PHY in use. */
+       case SIOCDEVPRIVATE:            /* for binary compat, remove in 2.5 */
+               data->phy_id = phy;
+
+       case SIOCGMIIREG:               /* Read MII PHY register. */
+       case SIOCDEVPRIVATE+1:          /* for binary compat, remove in 2.5 */
+               EL3WINDOW(4);
+               data->val_out = mdio_read(dev, data->phy_id & 0x1f, data->reg_num & 0x1f);
+               retval = 0;
+               break;
+
+       case SIOCSMIIREG:               /* Write MII PHY register. */
+       case SIOCDEVPRIVATE+2:          /* for binary compat, remove in 2.5 */
+               if (!capable(CAP_NET_ADMIN)) {
+                       retval = -EPERM;
+               } else {
+                       EL3WINDOW(4);
+                       mdio_write(dev, data->phy_id & 0x1f, data->reg_num & 0x1f, data->val_in);
+                       retval = 0;
+               }
+               break;
+       default:
+               retval = -EOPNOTSUPP;
+               break;
+       }
+
+       return retval;
+}
+
+/* Pre-Cyclone chips have no documented multicast filter, so the only
+   multicast setting is to receive all multicast frames.  At least
+   the chip has a very clean way to set the mode, unlike many others. */
+static void set_rx_mode(struct net_device *dev)
+{
+       long ioaddr = dev->base_addr;
+       int new_mode;
+
+       if (dev->flags & IFF_PROMISC) {
+               if (vortex_debug > 0)
+                       printk(KERN_NOTICE "%s: Setting promiscuous mode.\n", dev->name);
+               new_mode = SetRxFilter|RxStation|RxMulticast|RxBroadcast|RxProm;
+       } else  if ((dev->mc_list)  ||  (dev->flags & IFF_ALLMULTI)) {
+               new_mode = SetRxFilter|RxStation|RxMulticast|RxBroadcast;
+       } else
+               new_mode = SetRxFilter | RxStation | RxBroadcast;
+
+       outw(new_mode, ioaddr + EL3_CMD);
+}
+
+/* MII transceiver control section.
+   Read and write the MII registers using software-generated serial
+   MDIO protocol.  See the MII specifications or DP83840A data sheet
+   for details. */
+
+/* The maximum data clock rate is 2.5 Mhz.  The minimum timing is usually
+   met by back-to-back PCI I/O cycles, but we insert a delay to avoid
+   "overclocking" issues. */
+#define mdio_delay() inl(mdio_addr)
+
+#define MDIO_SHIFT_CLK 0x01
+#define MDIO_DIR_WRITE 0x04
+#define MDIO_DATA_WRITE0 (0x00 | MDIO_DIR_WRITE)
+#define MDIO_DATA_WRITE1 (0x02 | MDIO_DIR_WRITE)
+#define MDIO_DATA_READ 0x02
+#define MDIO_ENB_IN            0x00
+
+/* Generate the preamble required for initial synchronization and
+   a few older transceivers. */
+static void mdio_sync(long ioaddr, int bits)
+{
+       long mdio_addr = ioaddr + Wn4_PhysicalMgmt;
+
+       /* Establish sync by sending at least 32 logic ones. */
+       while (-- bits >= 0) {
+               outw(MDIO_DATA_WRITE1, mdio_addr);
+               mdio_delay();
+               outw(MDIO_DATA_WRITE1 | MDIO_SHIFT_CLK, mdio_addr);
+               mdio_delay();
+       }
+}
+
+static int mdio_read(struct net_device *dev, int phy_id, int location)
+{
+       struct vortex_private *vp = (struct vortex_private *)dev->priv;
+       int i;
+       long ioaddr = dev->base_addr;
+       int read_cmd = (0xf6 << 10) | (phy_id << 5) | location;
+       unsigned int retval = 0;
+       long mdio_addr = ioaddr + Wn4_PhysicalMgmt;
+
+       spin_lock_bh(&vp->mdio_lock);
+
+       if (mii_preamble_required)
+               mdio_sync(ioaddr, 32);
+
+       /* Shift the read command bits out. */
+       for (i = 14; i >= 0; i--) {
+               int dataval = (read_cmd&(1<<i)) ? MDIO_DATA_WRITE1 : MDIO_DATA_WRITE0;
+               outw(dataval, mdio_addr);
+               mdio_delay();
+               outw(dataval | MDIO_SHIFT_CLK, mdio_addr);
+               mdio_delay();
+       }
+       /* Read the two transition, 16 data, and wire-idle bits. */
+       for (i = 19; i > 0; i--) {
+               outw(MDIO_ENB_IN, mdio_addr);
+               mdio_delay();
+               retval = (retval << 1) | ((inw(mdio_addr) & MDIO_DATA_READ) ? 1 : 0);
+               outw(MDIO_ENB_IN | MDIO_SHIFT_CLK, mdio_addr);
+               mdio_delay();
+       }
+       spin_unlock_bh(&vp->mdio_lock);
+       return retval & 0x20000 ? 0xffff : retval>>1 & 0xffff;
+}
+
+static void mdio_write(struct net_device *dev, int phy_id, int location, int value)
+{
+       struct vortex_private *vp = (struct vortex_private *)dev->priv;
+       long ioaddr = dev->base_addr;
+       int write_cmd = 0x50020000 | (phy_id << 23) | (location << 18) | value;
+       long mdio_addr = ioaddr + Wn4_PhysicalMgmt;
+       int i;
+
+       spin_lock_bh(&vp->mdio_lock);
+
+       if (mii_preamble_required)
+               mdio_sync(ioaddr, 32);
+
+       /* Shift the command bits out. */
+       for (i = 31; i >= 0; i--) {
+               int dataval = (write_cmd&(1<<i)) ? MDIO_DATA_WRITE1 : MDIO_DATA_WRITE0;
+               outw(dataval, mdio_addr);
+               mdio_delay();
+               outw(dataval | MDIO_SHIFT_CLK, mdio_addr);
+               mdio_delay();
+       }
+       /* Leave the interface idle. */
+       for (i = 1; i >= 0; i--) {
+               outw(MDIO_ENB_IN, mdio_addr);
+               mdio_delay();
+               outw(MDIO_ENB_IN | MDIO_SHIFT_CLK, mdio_addr);
+               mdio_delay();
+       }
+       spin_unlock_bh(&vp->mdio_lock);
+       return;
+}
+\f
+/* ACPI: Advanced Configuration and Power Interface. */
+/* Set Wake-On-LAN mode and put the board into D3 (power-down) state. */
+static void acpi_set_WOL(struct net_device *dev)
+{
+       struct vortex_private *vp = (struct vortex_private *)dev->priv;
+       long ioaddr = dev->base_addr;
+
+       /* Power up on: 1==Downloaded Filter, 2==Magic Packets, 4==Link Status. */
+       EL3WINDOW(7);
+       outw(2, ioaddr + 0x0c);
+       /* The RxFilter must accept the WOL frames. */
+       outw(SetRxFilter|RxStation|RxMulticast|RxBroadcast, ioaddr + EL3_CMD);
+       outw(RxEnable, ioaddr + EL3_CMD);
+
+       /* Change the power state to D3; RxEnable doesn't take effect. */
+       pci_enable_wake(vp->pdev, 0, 1);
+       pci_set_power_state(vp->pdev, 3);
+}
+
+
+static void __devexit vortex_remove_one (struct pci_dev *pdev)
+{
+       struct net_device *dev = pci_get_drvdata(pdev);
+       struct vortex_private *vp;
+
+       if (!dev) {
+               printk("vortex_remove_one called for EISA device!\n");
+               BUG();
+       }
+
+       vp = dev->priv;
+
+       /* AKPM: FIXME: we should have
+        *      if (vp->cb_fn_base) iounmap(vp->cb_fn_base);
+        * here
+        */
+       unregister_netdev(dev);
+       /* Should really use issue_and_wait() here */
+       outw(TotalReset|0x14, dev->base_addr + EL3_CMD);
+
+       if (vp->pdev && vp->enable_wol) {
+               pci_set_power_state(vp->pdev, 0);       /* Go active */
+               if (vp->pm_state_valid)
+                       pci_restore_state(vp->pdev, vp->power_state);
+       }
+
+       pci_free_consistent(pdev,
+                                               sizeof(struct boom_rx_desc) * RX_RING_SIZE
+                                                       + sizeof(struct boom_tx_desc) * TX_RING_SIZE,
+                                               vp->rx_ring,
+                                               vp->rx_ring_dma);
+       if (vp->must_free_region)
+               release_region(dev->base_addr, vp->io_size);
+       kfree(dev);
+}
+
+
+static struct pci_driver vortex_driver = {
+       name:           "3c59x",
+       probe:          vortex_init_one,
+       remove:         __devexit_p(vortex_remove_one),
+       id_table:       vortex_pci_tbl,
+#ifdef CONFIG_PM
+       suspend:        vortex_suspend,
+       resume:         vortex_resume,
+#endif
+};
+
+
+static int vortex_have_pci;
+static int vortex_have_eisa;
+
+
+static int __init vortex_init (void)
+{
+       int pci_rc, eisa_rc;
+
+       pci_rc = pci_module_init(&vortex_driver);
+       eisa_rc = vortex_eisa_init();
+
+       if (pci_rc == 0)
+               vortex_have_pci = 1;
+       if (eisa_rc > 0)
+               vortex_have_eisa = 1;
+
+       return (vortex_have_pci + vortex_have_eisa) ? 0 : -ENODEV;
+}
+
+
+static void __exit vortex_eisa_cleanup (void)
+{
+       struct net_device *dev, *tmp;
+       struct vortex_private *vp;
+       long ioaddr;
+
+       dev = root_vortex_eisa_dev;
+
+       while (dev) {
+               vp = dev->priv;
+               ioaddr = dev->base_addr;
+
+               unregister_netdev (dev);
+               outw (TotalReset, ioaddr + EL3_CMD);
+               release_region (ioaddr, VORTEX_TOTAL_SIZE);
+
+               tmp = dev;
+               dev = vp->next_module;
+
+               kfree (tmp);
+       }
+}
+
+
+static void __exit vortex_cleanup (void)
+{
+       if (vortex_have_pci)
+               pci_unregister_driver (&vortex_driver);
+       if (vortex_have_eisa)
+               vortex_eisa_cleanup ();
+}
+
+
+module_init(vortex_init);
+module_exit(vortex_cleanup);
diff --git a/xen-2.4.16/drivers/net/8139cp.c b/xen-2.4.16/drivers/net/8139cp.c
new file mode 100644 (file)
index 0000000..c1e2f19
--- /dev/null
@@ -0,0 +1,1334 @@
+/* 8139cp.c: A Linux PCI Ethernet driver for the RealTek 8139C+ chips. */
+/*
+       Copyright 2001 Jeff Garzik <jgarzik@mandrakesoft.com>
+
+       Copyright (C) 2000, 2001 David S. Miller (davem@redhat.com) [sungem.c]
+       Copyright 2001 Manfred Spraul                               [natsemi.c]
+       Copyright 1999-2001 by Donald Becker.                       [natsemi.c]
+               Written 1997-2001 by Donald Becker.                         [8139too.c]
+       Copyright 1998-2001 by Jes Sorensen, <jes@trained-monkey.org>. [acenic.c]
+
+       This software may be used and distributed according to the terms of
+       the GNU General Public License (GPL), incorporated herein by reference.
+       Drivers based on or derived from this code fall under the GPL and must
+       retain the authorship, copyright and license notice.  This file is not
+       a complete program and may only be used when the entire operating
+       system is licensed under the GPL.
+
+       See the file COPYING in this distribution for more information.
+
+       TODO, in rough priority order:
+       * dev->tx_timeout
+       * LinkChg interrupt
+       * ETHTOOL_[GS]SET
+       * Support forcing media type with a module parameter,
+         like dl2k.c/sundance.c
+       * Implement PCI suspend/resume
+       * Constants (module parms?) for Rx work limit
+       * support 64-bit PCI DMA
+       * Complete reset on PciErr
+       * Consider Rx interrupt mitigation using TimerIntr
+       * Implement 8139C+ statistics dump; maybe not...
+         h/w stats can be reset only by software reset
+       * Rx checksumming
+       * Tx checksumming
+       * ETHTOOL_GREGS, ETHTOOL_[GS]WOL,
+         ETHTOOL_[GS]MSGLVL, ETHTOOL_NWAY_RST
+       * Jumbo frames / dev->change_mtu
+       * Investigate using skb->priority with h/w VLAN priority
+       * Investigate using High Priority Tx Queue with skb->priority
+       * Adjust Rx FIFO threshold and Max Rx DMA burst on Rx FIFO error
+       * Adjust Tx FIFO threshold and Max Tx DMA burst on Tx FIFO error
+
+ */
+
+#define DRV_NAME               "8139cp"
+#define DRV_VERSION            "0.0.5"
+#define DRV_RELDATE            "Oct 19, 2001"
+
+
+#include <linux/config.h>
+#include <linux/module.h>
+//#include <linux/kernel.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/init.h>
+#include <linux/pci.h>
+#include <linux/delay.h>
+#include <linux/ethtool.h>
+#include <asm/io.h>
+#include <asm/uaccess.h>
+
+/* These identify the driver base version and may not be removed. */
+static char version[] __devinitdata =
+KERN_INFO DRV_NAME " 10/100 PCI Ethernet driver v" DRV_VERSION " (" DRV_RELDATE ")\n";
+
+MODULE_AUTHOR("Jeff Garzik <jgarzik@mandrakesoft.com>");
+MODULE_DESCRIPTION("RealTek RTL-8139C+ series 10/100 PCI Ethernet driver");
+MODULE_LICENSE("GPL");
+
+static int debug = -1;
+MODULE_PARM (debug, "i");
+MODULE_PARM_DESC (debug, "8139cp bitmapped message enable number");
+
+/* Maximum number of multicast addresses to filter (vs. Rx-all-multicast).
+   The RTL chips use a 64 element hash table based on the Ethernet CRC.  */
+static int multicast_filter_limit = 32;
+MODULE_PARM (multicast_filter_limit, "i");
+MODULE_PARM_DESC (multicast_filter_limit, "8139cp maximum number of filtered multicast addresses");
+
+/* Set the copy breakpoint for the copy-only-tiny-buffer Rx structure. */
+#if defined(__alpha__) || defined(__arm__) || defined(__hppa__) \
+        || defined(__sparc_) || defined(__ia64__) \
+        || defined(__sh__) || defined(__mips__)
+static int rx_copybreak = 1518;
+#else
+static int rx_copybreak = 100;
+#endif
+MODULE_PARM (rx_copybreak, "i");
+MODULE_PARM_DESC (rx_copybreak, "8139cp Breakpoint at which Rx packets are copied");
+
+#define PFX                    DRV_NAME ": "
+
+#define CP_DEF_MSG_ENABLE      (NETIF_MSG_DRV          | \
+                                NETIF_MSG_PROBE        | \
+                                NETIF_MSG_LINK)
+#define CP_REGS_SIZE           (0xff + 1)
+#define CP_RX_RING_SIZE                64
+#define CP_TX_RING_SIZE                64
+#define CP_RING_BYTES          \
+               ((sizeof(struct cp_desc) * CP_RX_RING_SIZE) +   \
+               (sizeof(struct cp_desc) * CP_TX_RING_SIZE))
+#define NEXT_TX(N)             (((N) + 1) & (CP_TX_RING_SIZE - 1))
+#define NEXT_RX(N)             (((N) + 1) & (CP_RX_RING_SIZE - 1))
+#define TX_BUFFS_AVAIL(CP)                                     \
+       (((CP)->tx_tail <= (CP)->tx_head) ?                     \
+         (CP)->tx_tail + (CP_TX_RING_SIZE - 1) - (CP)->tx_head :       \
+         (CP)->tx_tail - (CP)->tx_head - 1)
+#define CP_CHIP_VERSION                0x76
+
+#define PKT_BUF_SZ             1536    /* Size of each temporary Rx buffer.*/
+#define RX_OFFSET              2
+
+/* The following settings are log_2(bytes)-4:  0 == 16 bytes .. 6==1024, 7==end of packet. */
+#define RX_FIFO_THRESH         5       /* Rx buffer level before first PCI xfer.  */
+#define RX_DMA_BURST           4       /* Maximum PCI burst, '4' is 256 */
+#define TX_DMA_BURST           6       /* Maximum PCI burst, '6' is 1024 */
+#define TX_EARLY_THRESH                256     /* Early Tx threshold, in bytes */
+
+/* Time in jiffies before concluding the transmitter is hung. */
+#define TX_TIMEOUT  (6*HZ)
+
+
+enum {
+       /* NIC register offsets */
+       MAC0            = 0x00, /* Ethernet hardware address. */
+       MAR0            = 0x08, /* Multicast filter. */
+       TxRingAddr      = 0x20, /* 64-bit start addr of Tx ring */
+       HiTxRingAddr    = 0x28, /* 64-bit start addr of high priority Tx ring */
+       Cmd             = 0x37, /* Command register */
+       IntrMask        = 0x3C, /* Interrupt mask */
+       IntrStatus      = 0x3E, /* Interrupt status */
+       TxConfig        = 0x40, /* Tx configuration */
+       ChipVersion     = 0x43, /* 8-bit chip version, inside TxConfig */
+       RxConfig        = 0x44, /* Rx configuration */
+       Cfg9346         = 0x50, /* EEPROM select/control; Cfg reg [un]lock */
+       Config1         = 0x52, /* Config1 */
+       Config3         = 0x59, /* Config3 */
+       Config4         = 0x5A, /* Config4 */
+       MultiIntr       = 0x5C, /* Multiple interrupt select */
+       Config5         = 0xD8, /* Config5 */
+       TxPoll          = 0xD9, /* Tell chip to check Tx descriptors for work */
+       CpCmd           = 0xE0, /* C+ Command register (C+ mode only) */
+       RxRingAddr      = 0xE4, /* 64-bit start addr of Rx ring */
+       TxThresh        = 0xEC, /* Early Tx threshold */
+       OldRxBufAddr    = 0x30, /* DMA address of Rx ring buffer (C mode) */
+       OldTSD0         = 0x10, /* DMA address of first Tx desc (C mode) */
+
+       /* Tx and Rx status descriptors */
+       DescOwn         = (1 << 31), /* Descriptor is owned by NIC */
+       RingEnd         = (1 << 30), /* End of descriptor ring */
+       FirstFrag       = (1 << 29), /* First segment of a packet */
+       LastFrag        = (1 << 28), /* Final segment of a packet */
+       TxError         = (1 << 23), /* Tx error summary */
+       RxError         = (1 << 20), /* Rx error summary */
+       IPCS            = (1 << 18), /* Calculate IP checksum */
+       UDPCS           = (1 << 17), /* Calculate UDP/IP checksum */
+       TCPCS           = (1 << 16), /* Calculate TCP/IP checksum */
+       IPFail          = (1 << 15), /* IP checksum failed */
+       UDPFail         = (1 << 14), /* UDP/IP checksum failed */
+       TCPFail         = (1 << 13), /* TCP/IP checksum failed */
+       NormalTxPoll    = (1 << 6),  /* One or more normal Tx packets to send */
+       PID1            = (1 << 17), /* 2 protocol id bits:  0==non-IP, */
+       PID0            = (1 << 16), /* 1==UDP/IP, 2==TCP/IP, 3==IP */
+       TxFIFOUnder     = (1 << 25), /* Tx FIFO underrun */
+       TxOWC           = (1 << 22), /* Tx Out-of-window collision */
+       TxLinkFail      = (1 << 21), /* Link failed during Tx of packet */
+       TxMaxCol        = (1 << 20), /* Tx aborted due to excessive collisions */
+       TxColCntShift   = 16,        /* Shift, to get 4-bit Tx collision cnt */
+       TxColCntMask    = 0x01 | 0x02 | 0x04 | 0x08, /* 4-bit collision count */
+       RxErrFrame      = (1 << 27), /* Rx frame alignment error */
+       RxMcast         = (1 << 26), /* Rx multicast packet rcv'd */
+       RxErrCRC        = (1 << 18), /* Rx CRC error */
+       RxErrRunt       = (1 << 19), /* Rx error, packet < 64 bytes */
+       RxErrLong       = (1 << 21), /* Rx error, packet > 4096 bytes */
+       RxErrFIFO       = (1 << 22), /* Rx error, FIFO overflowed, pkt bad */
+
+       /* RxConfig register */
+       RxCfgFIFOShift  = 13,        /* Shift, to get Rx FIFO thresh value */
+       RxCfgDMAShift   = 8,         /* Shift, to get Rx Max DMA value */
+       AcceptErr       = 0x20,      /* Accept packets with CRC errors */
+       AcceptRunt      = 0x10,      /* Accept runt (<64 bytes) packets */
+       AcceptBroadcast = 0x08,      /* Accept broadcast packets */
+       AcceptMulticast = 0x04,      /* Accept multicast packets */
+       AcceptMyPhys    = 0x02,      /* Accept pkts with our MAC as dest */
+       AcceptAllPhys   = 0x01,      /* Accept all pkts w/ physical dest */
+
+       /* IntrMask / IntrStatus registers */
+       PciErr          = (1 << 15), /* System error on the PCI bus */
+       TimerIntr       = (1 << 14), /* Asserted when TCTR reaches TimerInt value */
+       LenChg          = (1 << 13), /* Cable length change */
+       SWInt           = (1 << 8),  /* Software-requested interrupt */
+       TxEmpty         = (1 << 7),  /* No Tx descriptors available */
+       RxFIFOOvr       = (1 << 6),  /* Rx FIFO Overflow */
+       LinkChg         = (1 << 5),  /* Packet underrun, or link change */
+       RxEmpty         = (1 << 4),  /* No Rx descriptors available */
+       TxErr           = (1 << 3),  /* Tx error */
+       TxOK            = (1 << 2),  /* Tx packet sent */
+       RxErr           = (1 << 1),  /* Rx error */
+       RxOK            = (1 << 0),  /* Rx packet received */
+       IntrResvd       = (1 << 10), /* reserved, according to RealTek engineers,
+                                       but hardware likes to raise it */
+
+       IntrAll         = PciErr | TimerIntr | LenChg | SWInt | TxEmpty |
+                         RxFIFOOvr | LinkChg | RxEmpty | TxErr | TxOK |
+                         RxErr | RxOK | IntrResvd,
+
+       /* C mode command register */
+       CmdReset        = (1 << 4),  /* Enable to reset; self-clearing */
+       RxOn            = (1 << 3),  /* Rx mode enable */
+       TxOn            = (1 << 2),  /* Tx mode enable */
+
+       /* C+ mode command register */
+       RxChkSum        = (1 << 5),  /* Rx checksum offload enable */
+       PCIMulRW        = (1 << 3),  /* Enable PCI read/write multiple */
+       CpRxOn          = (1 << 1),  /* Rx mode enable */
+       CpTxOn          = (1 << 0),  /* Tx mode enable */
+
+       /* Cfg9436 EEPROM control register */
+       Cfg9346_Lock    = 0x00,      /* Lock ConfigX/MII register access */
+       Cfg9346_Unlock  = 0xC0,      /* Unlock ConfigX/MII register access */
+
+       /* TxConfig register */
+       IFG             = (1 << 25) | (1 << 24), /* standard IEEE interframe gap */
+       TxDMAShift      = 8,         /* DMA burst value (0-7) is shift this many bits */
+
+       /* Early Tx Threshold register */
+       TxThreshMask    = 0x3f,      /* Mask bits 5-0 */
+       TxThreshMax     = 2048,      /* Max early Tx threshold */
+
+       /* Config1 register */
+       DriverLoaded    = (1 << 5),  /* Software marker, driver is loaded */
+       PMEnable        = (1 << 0),  /* Enable various PM features of chip */
+
+       /* Config3 register */
+       PARMEnable      = (1 << 6),  /* Enable auto-loading of PHY parms */
+
+       /* Config5 register */
+       PMEStatus       = (1 << 0),  /* PME status can be reset by PCI RST# */
+};
+
+static const unsigned int cp_intr_mask =
+       PciErr | LinkChg |
+       RxOK | RxErr | RxEmpty | RxFIFOOvr |
+       TxOK | TxErr | TxEmpty;
+
+static const unsigned int cp_rx_config =
+         (RX_FIFO_THRESH << RxCfgFIFOShift) |
+         (RX_DMA_BURST << RxCfgDMAShift);
+
+struct cp_desc {
+       u32             opts1;
+       u32             opts2;
+       u32             addr_lo;
+       u32             addr_hi;
+};
+
+struct ring_info {
+       struct sk_buff          *skb;
+       dma_addr_t              mapping;
+       unsigned                frag;
+};
+
+struct cp_extra_stats {
+       unsigned long           rx_frags;
+};
+
+struct cp_private {
+       unsigned                tx_head;
+       unsigned                tx_tail;
+       unsigned                rx_tail;
+
+       void                    *regs;
+       struct net_device       *dev;
+       spinlock_t              lock;
+
+       struct cp_desc          *rx_ring;
+       struct cp_desc          *tx_ring;
+       struct ring_info        tx_skb[CP_TX_RING_SIZE];
+       struct ring_info        rx_skb[CP_RX_RING_SIZE];
+       unsigned                rx_buf_sz;
+       dma_addr_t              ring_dma;
+
+       u32                     msg_enable;
+
+       struct net_device_stats net_stats;
+       struct cp_extra_stats   cp_stats;
+
+       struct pci_dev          *pdev;
+       u32                     rx_config;
+
+       struct sk_buff          *frag_skb;
+       unsigned                dropping_frag : 1;
+};
+
+#define cpr8(reg)      readb(cp->regs + (reg))
+#define cpr16(reg)     readw(cp->regs + (reg))
+#define cpr32(reg)     readl(cp->regs + (reg))
+#define cpw8(reg,val)  writeb((val), cp->regs + (reg))
+#define cpw16(reg,val) writew((val), cp->regs + (reg))
+#define cpw32(reg,val) writel((val), cp->regs + (reg))
+#define cpw8_f(reg,val) do {                   \
+       writeb((val), cp->regs + (reg));        \
+       readb(cp->regs + (reg));                \
+       } while (0)
+#define cpw16_f(reg,val) do {                  \
+       writew((val), cp->regs + (reg));        \
+       readw(cp->regs + (reg));                \
+       } while (0)
+#define cpw32_f(reg,val) do {                  \
+       writel((val), cp->regs + (reg));        \
+       readl(cp->regs + (reg));                \
+       } while (0)
+
+
+static void __cp_set_rx_mode (struct net_device *dev);
+static void cp_tx (struct cp_private *cp);
+static void cp_clean_rings (struct cp_private *cp);
+
+
+static struct pci_device_id cp_pci_tbl[] __devinitdata = {
+       { PCI_VENDOR_ID_REALTEK, PCI_DEVICE_ID_REALTEK_8139,
+         PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 },
+       { },
+};
+MODULE_DEVICE_TABLE(pci, cp_pci_tbl);
+
+static inline void cp_rx_skb (struct cp_private *cp, struct sk_buff *skb)
+{
+       skb->protocol = eth_type_trans (skb, cp->dev);
+
+       cp->net_stats.rx_packets++;
+       cp->net_stats.rx_bytes += skb->len;
+       cp->dev->last_rx = jiffies;
+       netif_rx (skb);
+}
+
+static inline void cp_rx_err_acct (struct cp_private *cp, unsigned rx_tail,
+                                  u32 status, u32 len)
+{
+       if (netif_msg_rx_err (cp))
+               printk (KERN_DEBUG
+                       "%s: rx err, slot %d status 0x%x len %d\n",
+                       cp->dev->name, rx_tail, status, len);
+       cp->net_stats.rx_errors++;
+       if (status & RxErrFrame)
+               cp->net_stats.rx_frame_errors++;
+       if (status & RxErrCRC)
+               cp->net_stats.rx_crc_errors++;
+       if (status & RxErrRunt)
+               cp->net_stats.rx_length_errors++;
+       if (status & RxErrLong)
+               cp->net_stats.rx_length_errors++;
+       if (status & RxErrFIFO)
+               cp->net_stats.rx_fifo_errors++;
+}
+
+static void cp_rx_frag (struct cp_private *cp, unsigned rx_tail,
+                       struct sk_buff *skb, u32 status, u32 len)
+{
+       struct sk_buff *copy_skb, *frag_skb = cp->frag_skb;
+       unsigned orig_len = frag_skb ? frag_skb->len : 0;
+       unsigned target_len = orig_len + len;
+       unsigned first_frag = status & FirstFrag;
+       unsigned last_frag = status & LastFrag;
+
+       if (netif_msg_rx_status (cp))
+               printk (KERN_DEBUG "%s: rx %s%sfrag, slot %d status 0x%x len %d\n",
+                       cp->dev->name,
+                       cp->dropping_frag ? "dropping " : "",
+                       first_frag ? "first " :
+                       last_frag ? "last " : "",
+                       rx_tail, status, len);
+
+       cp->cp_stats.rx_frags++;
+
+       if (!frag_skb && !first_frag)
+               cp->dropping_frag = 1;
+       if (cp->dropping_frag)
+               goto drop_frag;
+
+       copy_skb = dev_alloc_skb (target_len + RX_OFFSET);
+       if (!copy_skb) {
+               printk(KERN_WARNING "%s: rx slot %d alloc failed\n",
+                      cp->dev->name, rx_tail);
+
+               cp->dropping_frag = 1;
+drop_frag:
+               if (frag_skb) {
+                       dev_kfree_skb_irq(frag_skb);
+                       cp->frag_skb = NULL;
+               }
+               if (last_frag) {
+                       cp->net_stats.rx_dropped++;
+                       cp->dropping_frag = 0;
+               }
+               return;
+       }
+
+       copy_skb->dev = cp->dev;
+       skb_reserve(copy_skb, RX_OFFSET);
+       skb_put(copy_skb, target_len);
+       if (frag_skb) {
+               memcpy(copy_skb->data, frag_skb->data, orig_len);
+               dev_kfree_skb_irq(frag_skb);
+       }
+       pci_dma_sync_single(cp->pdev, cp->rx_skb[rx_tail].mapping,
+                           len, PCI_DMA_FROMDEVICE);
+       memcpy(copy_skb->data + orig_len, skb->data, len);
+
+       copy_skb->ip_summed = CHECKSUM_NONE;
+
+       if (last_frag) {
+               if (status & (RxError | RxErrFIFO)) {
+                       cp_rx_err_acct(cp, rx_tail, status, len);
+                       dev_kfree_skb_irq(copy_skb);
+               } else
+                       cp_rx_skb(cp, copy_skb);
+               cp->frag_skb = NULL;
+       } else {
+               cp->frag_skb = copy_skb;
+       }
+}
+
+static void cp_rx (struct cp_private *cp)
+{
+       unsigned rx_tail = cp->rx_tail;
+       unsigned rx_work = 100;
+
+       while (rx_work--) {
+               u32 status, len;
+               dma_addr_t mapping;
+               struct sk_buff *skb, *copy_skb;
+               unsigned copying_skb, buflen;
+
+               skb = cp->rx_skb[rx_tail].skb;
+               if (!skb)
+                       BUG();
+               rmb();
+               status = le32_to_cpu(cp->rx_ring[rx_tail].opts1);
+               if (status & DescOwn)
+                       break;
+
+               len = (status & 0x1fff) - 4;
+               mapping = cp->rx_skb[rx_tail].mapping;
+
+               if ((status & (FirstFrag | LastFrag)) != (FirstFrag | LastFrag)) {
+                       cp_rx_frag(cp, rx_tail, skb, status, len);
+                       goto rx_next;
+               }
+
+               if (status & (RxError | RxErrFIFO)) {
+                       cp_rx_err_acct(cp, rx_tail, status, len);
+                       goto rx_next;
+               }
+
+               copying_skb = (len <= rx_copybreak);
+
+               if (netif_msg_rx_status(cp))
+                       printk(KERN_DEBUG "%s: rx slot %d status 0x%x len %d copying? %d\n",
+                              cp->dev->name, rx_tail, status, len,
+                              copying_skb);
+
+               buflen = copying_skb ? len : cp->rx_buf_sz;
+               copy_skb = dev_alloc_skb (buflen + RX_OFFSET);
+               if (!copy_skb) {
+                       cp->net_stats.rx_dropped++;
+                       goto rx_next;
+               }
+
+               skb_reserve(copy_skb, RX_OFFSET);
+               copy_skb->dev = cp->dev;
+
+               if (!copying_skb) {
+                       pci_unmap_single(cp->pdev, mapping,
+                                        buflen, PCI_DMA_FROMDEVICE);
+                       skb->ip_summed = CHECKSUM_NONE;
+                       skb_trim(skb, len);
+
+                       mapping =
+                       cp->rx_skb[rx_tail].mapping =
+                               pci_map_single(cp->pdev, copy_skb->data,
+                                              buflen, PCI_DMA_FROMDEVICE);
+                       cp->rx_skb[rx_tail].skb = copy_skb;
+                       skb_put(copy_skb, buflen);
+               } else {
+                       skb_put(copy_skb, len);
+                       pci_dma_sync_single(cp->pdev, mapping, len, PCI_DMA_FROMDEVICE);
+                       memcpy(copy_skb->data, skb->data, len);
+
+                       /* We'll reuse the original ring buffer. */
+                       skb = copy_skb;
+               }
+
+               cp_rx_skb(cp, skb);
+
+rx_next:
+               if (rx_tail == (CP_RX_RING_SIZE - 1))
+                       cp->rx_ring[rx_tail].opts1 =
+                               cpu_to_le32(DescOwn | RingEnd | cp->rx_buf_sz);
+               else
+                       cp->rx_ring[rx_tail].opts1 =
+                               cpu_to_le32(DescOwn | cp->rx_buf_sz);
+               cp->rx_ring[rx_tail].opts2 = 0;
+               cp->rx_ring[rx_tail].addr_lo = cpu_to_le32(mapping);
+               rx_tail = NEXT_RX(rx_tail);
+       }
+
+       if (!rx_work)
+               printk(KERN_WARNING "%s: rx work limit reached\n", cp->dev->name);
+
+       cp->rx_tail = rx_tail;
+}
+
+static void cp_interrupt (int irq, void *dev_instance, struct pt_regs *regs)
+{
+       struct net_device *dev = dev_instance;
+       struct cp_private *cp = dev->priv;
+       u16 status;
+
+       status = cpr16(IntrStatus);
+       if (!status || (status == 0xFFFF))
+               return;
+
+       if (netif_msg_intr(cp))
+               printk(KERN_DEBUG "%s: intr, status %04x cmd %02x cpcmd %04x\n",
+                       dev->name, status, cpr8(Cmd), cpr16(CpCmd));
+
+       spin_lock(&cp->lock);
+
+       if (status & (RxOK | RxErr | RxEmpty | RxFIFOOvr))
+               cp_rx(cp);
+       if (status & (TxOK | TxErr | TxEmpty | SWInt))
+               cp_tx(cp);
+
+       cpw16_f(IntrStatus, status);
+
+       if (status & PciErr) {
+               u16 pci_status;
+
+               pci_read_config_word(cp->pdev, PCI_STATUS, &pci_status);
+               pci_write_config_word(cp->pdev, PCI_STATUS, pci_status);
+               printk(KERN_ERR "%s: PCI bus error, status=%04x, PCI status=%04x\n",
+                      dev->name, status, pci_status);
+       }
+
+       spin_unlock(&cp->lock);
+}
+
+static void cp_tx (struct cp_private *cp)
+{
+       unsigned tx_head = cp->tx_head;
+       unsigned tx_tail = cp->tx_tail;
+
+       while (tx_tail != tx_head) {
+               struct sk_buff *skb;
+               u32 status;
+
+               rmb();
+               status = le32_to_cpu(cp->tx_ring[tx_tail].opts1);
+               if (status & DescOwn)
+                       break;
+
+               skb = cp->tx_skb[tx_tail].skb;
+               if (!skb)
+                       BUG();
+
+               pci_unmap_single(cp->pdev, cp->tx_skb[tx_tail].mapping,
+                                       skb->len, PCI_DMA_TODEVICE);
+
+               if (status & LastFrag) {
+                       if (status & (TxError | TxFIFOUnder)) {
+                               if (netif_msg_tx_err(cp))
+                                       printk(KERN_DEBUG "%s: tx err, status 0x%x\n",
+                                              cp->dev->name, status);
+                               cp->net_stats.tx_errors++;
+                               if (status & TxOWC)
+                                       cp->net_stats.tx_window_errors++;
+                               if (status & TxMaxCol)
+                                       cp->net_stats.tx_aborted_errors++;
+                               if (status & TxLinkFail)
+                                       cp->net_stats.tx_carrier_errors++;
+                               if (status & TxFIFOUnder)
+                                       cp->net_stats.tx_fifo_errors++;
+                       } else {
+                               cp->net_stats.collisions +=
+                                       ((status >> TxColCntShift) & TxColCntMask);
+                               cp->net_stats.tx_packets++;
+                               cp->net_stats.tx_bytes += skb->len;
+                               if (netif_msg_tx_done(cp))
+                                       printk(KERN_DEBUG "%s: tx done, slot %d\n", cp->dev->name, tx_tail);
+                       }
+                       dev_kfree_skb_irq(skb);
+               }
+
+               cp->tx_skb[tx_tail].skb = NULL;
+
+               tx_tail = NEXT_TX(tx_tail);
+       }
+
+       cp->tx_tail = tx_tail;
+
+       if (netif_queue_stopped(cp->dev) && (TX_BUFFS_AVAIL(cp) > 1))
+               netif_wake_queue(cp->dev);
+}
+
+static int cp_start_xmit (struct sk_buff *skb, struct net_device *dev)
+{
+       struct cp_private *cp = dev->priv;
+       unsigned entry;
+       u32 eor;
+
+       spin_lock_irq(&cp->lock);
+
+       if (TX_BUFFS_AVAIL(cp) <= (skb_shinfo(skb)->nr_frags + 1)) {
+               netif_stop_queue(dev);
+               spin_unlock_irq(&cp->lock);
+               return 1;
+       }
+
+       entry = cp->tx_head;
+       eor = (entry == (CP_TX_RING_SIZE - 1)) ? RingEnd : 0;
+       if (skb_shinfo(skb)->nr_frags == 0) {
+               struct cp_desc *txd = &cp->tx_ring[entry];
+               u32 mapping, len;
+
+               len = skb->len;
+               mapping = pci_map_single(cp->pdev, skb->data, len, PCI_DMA_TODEVICE);
+               eor = (entry == (CP_TX_RING_SIZE - 1)) ? RingEnd : 0;
+               txd->opts2 = 0;
+               txd->addr_lo = cpu_to_le32(mapping);
+               wmb();
+
+#ifdef CP_TX_CHECKSUM
+               txd->opts1 = cpu_to_le32(eor | len | DescOwn | FirstFrag |
+                       LastFrag | IPCS | UDPCS | TCPCS);
+#else
+               txd->opts1 = cpu_to_le32(eor | len | DescOwn | FirstFrag |
+                       LastFrag);
+#endif
+               wmb();
+
+               cp->tx_skb[entry].skb = skb;
+               cp->tx_skb[entry].mapping = mapping;
+               cp->tx_skb[entry].frag = 0;
+               entry = NEXT_TX(entry);
+       } else {
+               struct cp_desc *txd;
+               u32 first_len, first_mapping;
+               int frag, first_entry = entry;
+
+               /* We must give this initial chunk to the device last.
+                * Otherwise we could race with the device.
+                */
+               first_len = skb->len - skb->data_len;
+               first_mapping = pci_map_single(cp->pdev, skb->data,
+                                              first_len, PCI_DMA_TODEVICE);
+               cp->tx_skb[entry].skb = skb;
+               cp->tx_skb[entry].mapping = first_mapping;
+               cp->tx_skb[entry].frag = 1;
+               entry = NEXT_TX(entry);
+
+               for (frag = 0; frag < skb_shinfo(skb)->nr_frags; frag++) {
+                       skb_frag_t *this_frag = &skb_shinfo(skb)->frags[frag];
+                       u32 len, mapping;
+                       u32 ctrl;
+
+                       len = this_frag->size;
+                       mapping = pci_map_single(cp->pdev,
+                                                ((void *) page_address(this_frag->page) +
+                                                 this_frag->page_offset),
+                                                len, PCI_DMA_TODEVICE);
+                       eor = (entry == (CP_TX_RING_SIZE - 1)) ? RingEnd : 0;
+#ifdef CP_TX_CHECKSUM
+                       ctrl = eor | len | DescOwn | IPCS | UDPCS | TCPCS;
+#else
+                       ctrl = eor | len | DescOwn;
+#endif
+                       if (frag == skb_shinfo(skb)->nr_frags - 1)
+                               ctrl |= LastFrag;
+
+                       txd = &cp->tx_ring[entry];
+                       txd->opts2 = 0;
+                       txd->addr_lo = cpu_to_le32(mapping);
+                       wmb();
+
+                       txd->opts1 = cpu_to_le32(ctrl);
+                       wmb();
+
+                       cp->tx_skb[entry].skb = skb;
+                       cp->tx_skb[entry].mapping = mapping;
+                       cp->tx_skb[entry].frag = frag + 2;
+                       entry = NEXT_TX(entry);
+               }
+
+               txd = &cp->tx_ring[first_entry];
+               txd->opts2 = 0;
+               txd->addr_lo = cpu_to_le32(first_mapping);
+               wmb();
+
+#ifdef CP_TX_CHECKSUM
+               txd->opts1 = cpu_to_le32(first_len | FirstFrag | DescOwn | IPCS | UDPCS | TCPCS);
+#else
+               txd->opts1 = cpu_to_le32(first_len | FirstFrag | DescOwn);
+#endif
+               wmb();
+       }
+       cp->tx_head = entry;
+       if (netif_msg_tx_queued(cp))
+               printk(KERN_DEBUG "%s: tx queued, slot %d, skblen %d\n",
+                      dev->name, entry, skb->len);
+       if (TX_BUFFS_AVAIL(cp) < 0)
+               BUG();
+       if (TX_BUFFS_AVAIL(cp) == 0)
+               netif_stop_queue(dev);
+
+       spin_unlock_irq(&cp->lock);
+
+       cpw8(TxPoll, NormalTxPoll);
+       dev->trans_start = jiffies;
+
+       return 0;
+}
+
+/* Set or clear the multicast filter for this adaptor.
+   This routine is not state sensitive and need not be SMP locked. */
+
+static unsigned const ethernet_polynomial = 0x04c11db7U;
+static inline u32 ether_crc (int length, unsigned char *data)
+{
+       int crc = -1;
+
+       while (--length >= 0) {
+               unsigned char current_octet = *data++;
+               int bit;
+               for (bit = 0; bit < 8; bit++, current_octet >>= 1)
+                       crc = (crc << 1) ^ ((crc < 0) ^ (current_octet & 1) ?
+                            ethernet_polynomial : 0);
+       }
+
+       return crc;
+}
+
+static void __cp_set_rx_mode (struct net_device *dev)
+{
+       struct cp_private *cp = dev->priv;
+       u32 mc_filter[2];       /* Multicast hash filter */
+       int i, rx_mode;
+       u32 tmp;
+
+       /* Note: do not reorder, GCC is clever about common statements. */
+       if (dev->flags & IFF_PROMISC) {
+               /* Unconditionally log net taps. */
+               printk (KERN_NOTICE "%s: Promiscuous mode enabled.\n",
+                       dev->name);
+               rx_mode =
+                   AcceptBroadcast | AcceptMulticast | AcceptMyPhys |
+                   AcceptAllPhys;
+               mc_filter[1] = mc_filter[0] = 0xffffffff;
+       } else if ((dev->mc_count > multicast_filter_limit)
+                  || (dev->flags & IFF_ALLMULTI)) {
+               /* Too many to filter perfectly -- accept all multicasts. */
+               rx_mode = AcceptBroadcast | AcceptMulticast | AcceptMyPhys;
+               mc_filter[1] = mc_filter[0] = 0xffffffff;
+       } else {
+               struct dev_mc_list *mclist;
+               rx_mode = AcceptBroadcast | AcceptMyPhys;
+               mc_filter[1] = mc_filter[0] = 0;
+               for (i = 0, mclist = dev->mc_list; mclist && i < dev->mc_count;
+                    i++, mclist = mclist->next) {
+                       int bit_nr = ether_crc(ETH_ALEN, mclist->dmi_addr) >> 26;
+
+                       mc_filter[bit_nr >> 5] |= cpu_to_le32(1 << (bit_nr & 31));
+                       rx_mode |= AcceptMulticast;
+               }
+       }
+
+       /* We can safely update without stopping the chip. */
+       tmp = cp_rx_config | rx_mode;
+       if (cp->rx_config != tmp) {
+               cpw32_f (RxConfig, tmp);
+               cp->rx_config = tmp;
+       }
+       cpw32_f (MAR0 + 0, mc_filter[0]);
+       cpw32_f (MAR0 + 4, mc_filter[1]);
+}
+
+static void cp_set_rx_mode (struct net_device *dev)
+{
+       unsigned long flags;
+       struct cp_private *cp = dev->priv;
+
+       spin_lock_irqsave (&cp->lock, flags);
+       __cp_set_rx_mode(dev);
+       spin_unlock_irqrestore (&cp->lock, flags);
+}
+
+static void __cp_get_stats(struct cp_private *cp)
+{
+       /* XXX implement */
+}
+
+static struct net_device_stats *cp_get_stats(struct net_device *dev)
+{
+       struct cp_private *cp = dev->priv;
+
+       /* The chip only need report frame silently dropped. */
+       spin_lock_irq(&cp->lock);
+       if (netif_running(dev) && netif_device_present(dev))
+               __cp_get_stats(cp);
+       spin_unlock_irq(&cp->lock);
+
+       return &cp->net_stats;
+}
+
+static void cp_stop_hw (struct cp_private *cp)
+{
+       cpw16(IntrMask, 0);
+       cpr16(IntrMask);
+       cpw8(Cmd, 0);
+       cpw16(CpCmd, 0);
+       cpr16(CpCmd);
+       cpw16(IntrStatus, ~(cpr16(IntrStatus)));
+       synchronize_irq();
+       udelay(10);
+
+       cp->rx_tail = 0;
+       cp->tx_head = cp->tx_tail = 0;
+}
+
+static void cp_reset_hw (struct cp_private *cp)
+{
+       unsigned work = 1000;
+
+       cpw8(Cmd, CmdReset);
+
+       while (work--) {
+               if (!(cpr8(Cmd) & CmdReset))
+                       return;
+
+               set_current_state(TASK_UNINTERRUPTIBLE);
+               schedule_timeout(10);
+       }
+
+       printk(KERN_ERR "%s: hardware reset timeout\n", cp->dev->name);
+}
+
+static void cp_init_hw (struct cp_private *cp)
+{
+       struct net_device *dev = cp->dev;
+
+       cp_reset_hw(cp);
+
+       cpw8_f (Cfg9346, Cfg9346_Unlock);
+
+       /* Restore our idea of the MAC address. */
+       cpw32_f (MAC0 + 0, cpu_to_le32 (*(u32 *) (dev->dev_addr + 0)));
+       cpw32_f (MAC0 + 4, cpu_to_le32 (*(u32 *) (dev->dev_addr + 4)));
+
+       cpw8(Cmd, RxOn | TxOn);
+       cpw16(CpCmd, PCIMulRW | CpRxOn | CpTxOn);
+       cpw8(TxThresh, 0x06); /* XXX convert magic num to a constant */
+
+       __cp_set_rx_mode(dev);
+       cpw32_f (TxConfig, IFG | (TX_DMA_BURST << TxDMAShift));
+
+       cpw8(Config1, cpr8(Config1) | DriverLoaded | PMEnable);
+       cpw8(Config3, PARMEnable); /* disables magic packet and WOL */
+       cpw8(Config5, cpr8(Config5) & PMEStatus); /* disables more WOL stuff */
+
+       cpw32_f(HiTxRingAddr, 0);
+       cpw32_f(HiTxRingAddr + 4, 0);
+       cpw32_f(OldRxBufAddr, 0);
+       cpw32_f(OldTSD0, 0);
+       cpw32_f(OldTSD0 + 4, 0);
+       cpw32_f(OldTSD0 + 8, 0);
+       cpw32_f(OldTSD0 + 12, 0);
+
+       cpw32_f(RxRingAddr, cp->ring_dma);
+       cpw32_f(RxRingAddr + 4, 0);
+       cpw32_f(TxRingAddr, cp->ring_dma + (sizeof(struct cp_desc) * CP_RX_RING_SIZE));
+       cpw32_f(TxRingAddr + 4, 0);
+
+       cpw16(MultiIntr, 0);
+
+       cpw16(IntrMask, cp_intr_mask);
+
+       cpw8_f (Cfg9346, Cfg9346_Lock);
+}
+
+static int cp_refill_rx (struct cp_private *cp)
+{
+       unsigned i;
+
+       for (i = 0; i < CP_RX_RING_SIZE; i++) {
+               struct sk_buff *skb;
+
+               skb = dev_alloc_skb(cp->rx_buf_sz + RX_OFFSET);
+               if (!skb)
+                       goto err_out;
+
+               skb->dev = cp->dev;
+               skb_reserve(skb, RX_OFFSET);
+               skb_put(skb, cp->rx_buf_sz);
+
+               cp->rx_skb[i].mapping = pci_map_single(cp->pdev,
+                       skb->data, cp->rx_buf_sz, PCI_DMA_FROMDEVICE);
+               cp->rx_skb[i].skb = skb;
+               cp->rx_skb[i].frag = 0;
+
+               if (i == (CP_RX_RING_SIZE - 1))
+                       cp->rx_ring[i].opts1 =
+                               cpu_to_le32(DescOwn | RingEnd | cp->rx_buf_sz);
+               else
+                       cp->rx_ring[i].opts1 =
+                               cpu_to_le32(DescOwn | cp->rx_buf_sz);
+               cp->rx_ring[i].opts2 = 0;
+               cp->rx_ring[i].addr_lo = cpu_to_le32(cp->rx_skb[i].mapping);
+               cp->rx_ring[i].addr_hi = 0;
+       }
+
+       return 0;
+
+err_out:
+       cp_clean_rings(cp);
+       return -ENOMEM;
+}
+
+static int cp_init_rings (struct cp_private *cp)
+{
+       memset(cp->tx_ring, 0, sizeof(struct cp_desc) * CP_TX_RING_SIZE);
+       cp->tx_ring[CP_TX_RING_SIZE - 1].opts1 = cpu_to_le32(RingEnd);
+
+       cp->rx_tail = 0;
+       cp->tx_head = cp->tx_tail = 0;
+
+       return cp_refill_rx (cp);
+}
+
+static int cp_alloc_rings (struct cp_private *cp)
+{
+       cp->rx_ring = pci_alloc_consistent(cp->pdev, CP_RING_BYTES, &cp->ring_dma);
+       if (!cp->rx_ring)
+               return -ENOMEM;
+       cp->tx_ring = &cp->rx_ring[CP_RX_RING_SIZE];
+       return cp_init_rings(cp);
+}
+
+static void cp_clean_rings (struct cp_private *cp)
+{
+       unsigned i;
+
+       memset(cp->rx_ring, 0, sizeof(struct cp_desc) * CP_RX_RING_SIZE);
+       memset(cp->tx_ring, 0, sizeof(struct cp_desc) * CP_TX_RING_SIZE);
+
+       for (i = 0; i < CP_RX_RING_SIZE; i++) {
+               if (cp->rx_skb[i].skb) {
+                       pci_unmap_single(cp->pdev, cp->rx_skb[i].mapping,
+                                        cp->rx_buf_sz, PCI_DMA_FROMDEVICE);
+                       dev_kfree_skb(cp->rx_skb[i].skb);
+               }
+       }
+
+       for (i = 0; i < CP_TX_RING_SIZE; i++) {
+               if (cp->tx_skb[i].skb) {
+                       struct sk_buff *skb = cp->tx_skb[i].skb;
+                       pci_unmap_single(cp->pdev, cp->tx_skb[i].mapping,
+                                        skb->len, PCI_DMA_TODEVICE);
+                       dev_kfree_skb(skb);
+                       cp->net_stats.tx_dropped++;
+               }
+       }
+
+       memset(&cp->rx_skb, 0, sizeof(struct ring_info) * CP_RX_RING_SIZE);
+       memset(&cp->tx_skb, 0, sizeof(struct ring_info) * CP_TX_RING_SIZE);
+}
+
+static void cp_free_rings (struct cp_private *cp)
+{
+       cp_clean_rings(cp);
+       pci_free_consistent(cp->pdev, CP_RING_BYTES, cp->rx_ring, cp->ring_dma);
+       cp->rx_ring = NULL;
+       cp->tx_ring = NULL;
+}
+
+static int cp_open (struct net_device *dev)
+{
+       struct cp_private *cp = dev->priv;
+       int rc;
+
+       if (netif_msg_ifup(cp))
+               printk(KERN_DEBUG "%s: enabling interface\n", dev->name);
+
+       cp->rx_buf_sz = (dev->mtu <= 1500 ? PKT_BUF_SZ : dev->mtu + 32);
+
+       rc = cp_alloc_rings(cp);
+       if (rc)
+               return rc;
+
+       cp_init_hw(cp);
+
+       rc = request_irq(dev->irq, cp_interrupt, SA_SHIRQ, dev->name, dev);
+       if (rc)
+               goto err_out_hw;
+
+       netif_start_queue(dev);
+
+       return 0;
+
+err_out_hw:
+       cp_stop_hw(cp);
+       cp_free_rings(cp);
+       return rc;
+}
+
+static int cp_close (struct net_device *dev)
+{
+       struct cp_private *cp = dev->priv;
+
+       if (netif_msg_ifdown(cp))
+               printk(KERN_DEBUG "%s: disabling interface\n", dev->name);
+
+       netif_stop_queue(dev);
+       cp_stop_hw(cp);
+       free_irq(dev->irq, dev);
+       cp_free_rings(cp);
+       return 0;
+}
+
+static int cp_ethtool_ioctl (struct cp_private *cp, void *useraddr)
+{
+       u32 ethcmd;
+
+       /* dev_ioctl() in ../../net/core/dev.c has already checked
+          capable(CAP_NET_ADMIN), so don't bother with that here.  */
+
+       if (copy_from_user (&ethcmd, useraddr, sizeof (ethcmd)))
+               return -EFAULT;
+
+       switch (ethcmd) {
+
+       case ETHTOOL_GDRVINFO:
+               {
+                       struct ethtool_drvinfo info = { ETHTOOL_GDRVINFO };
+                       strcpy (info.driver, DRV_NAME);
+                       strcpy (info.version, DRV_VERSION);
+                       strcpy (info.bus_info, cp->pdev->slot_name);
+                       if (copy_to_user (useraddr, &info, sizeof (info)))
+                               return -EFAULT;
+                       return 0;
+               }
+
+       default:
+               break;
+       }
+
+       return -EOPNOTSUPP;
+}
+
+
+static int cp_ioctl (struct net_device *dev, struct ifreq *rq, int cmd)
+{
+       struct cp_private *cp = dev->priv;
+       int rc = 0;
+
+       switch (cmd) {
+       case SIOCETHTOOL:
+               return cp_ethtool_ioctl(cp, (void *) rq->ifr_data);
+
+       default:
+               rc = -EOPNOTSUPP;
+               break;
+       }
+
+       return rc;
+}
+
+
+
+/* Serial EEPROM section. */
+
+/*  EEPROM_Ctrl bits. */
+#define EE_SHIFT_CLK   0x04    /* EEPROM shift clock. */
+#define EE_CS                  0x08    /* EEPROM chip select. */
+#define EE_DATA_WRITE  0x02    /* EEPROM chip data in. */
+#define EE_WRITE_0             0x00
+#define EE_WRITE_1             0x02
+#define EE_DATA_READ   0x01    /* EEPROM chip data out. */
+#define EE_ENB                 (0x80 | EE_CS)
+
+/* Delay between EEPROM clock transitions.
+   No extra delay is needed with 33Mhz PCI, but 66Mhz may change this.
+ */
+
+#define eeprom_delay() readl(ee_addr)
+
+/* The EEPROM commands include the alway-set leading bit. */
+#define EE_WRITE_CMD   (5)
+#define EE_READ_CMD            (6)
+#define EE_ERASE_CMD   (7)
+
+static int __devinit read_eeprom (void *ioaddr, int location, int addr_len)
+{
+       int i;
+       unsigned retval = 0;
+       void *ee_addr = ioaddr + Cfg9346;
+       int read_cmd = location | (EE_READ_CMD << addr_len);
+
+       writeb (EE_ENB & ~EE_CS, ee_addr);
+       writeb (EE_ENB, ee_addr);
+       eeprom_delay ();
+
+       /* Shift the read command bits out. */
+       for (i = 4 + addr_len; i >= 0; i--) {
+               int dataval = (read_cmd & (1 << i)) ? EE_DATA_WRITE : 0;
+               writeb (EE_ENB | dataval, ee_addr);
+               eeprom_delay ();
+               writeb (EE_ENB | dataval | EE_SHIFT_CLK, ee_addr);
+               eeprom_delay ();
+       }
+       writeb (EE_ENB, ee_addr);
+       eeprom_delay ();
+
+       for (i = 16; i > 0; i--) {
+               writeb (EE_ENB | EE_SHIFT_CLK, ee_addr);
+               eeprom_delay ();
+               retval =
+                   (retval << 1) | ((readb (ee_addr) & EE_DATA_READ) ? 1 :
+                                    0);
+               writeb (EE_ENB, ee_addr);
+               eeprom_delay ();
+       }
+
+       /* Terminate the EEPROM access. */
+       writeb (~EE_CS, ee_addr);
+       eeprom_delay ();
+
+       return retval;
+}
+
+static int __devinit cp_init_one (struct pci_dev *pdev,
+                                 const struct pci_device_id *ent)
+{
+       struct net_device *dev;
+       struct cp_private *cp;
+       int rc;
+       void *regs;
+       long pciaddr;
+       unsigned addr_len, i;
+       u8 pci_rev, cache_size;
+       u16 pci_command;
+
+#ifndef MODULE
+       static int version_printed;
+       if (version_printed++ == 0)
+               printk("%s", version);
+#endif
+
+       pci_read_config_byte(pdev, PCI_REVISION_ID, &pci_rev);
+
+       if (pdev->vendor == PCI_VENDOR_ID_REALTEK &&
+           pdev->device == PCI_DEVICE_ID_REALTEK_8139 && pci_rev < 0x20) {
+               printk(KERN_ERR PFX "pci dev %s (id %04x:%04x rev %02x) is not an 8139C+ compatible chip\n",
+                      pdev->slot_name, pdev->vendor, pdev->device, pci_rev);
+               printk(KERN_ERR PFX "Ensure the \"8139too\" driver is installed!\n");
+               return -ENODEV;
+       }
+
+       dev = alloc_etherdev(sizeof(struct cp_private));
+       if (!dev)
+               return -ENOMEM;
+       SET_MODULE_OWNER(dev);
+       cp = dev->priv;
+       cp->pdev = pdev;
+       cp->dev = dev;
+       cp->msg_enable = (debug < 0 ? CP_DEF_MSG_ENABLE : debug);
+       spin_lock_init (&cp->lock);
+
+       rc = pci_enable_device(pdev);
+       if (rc)
+               goto err_out_free;
+
+       rc = pci_request_regions(pdev, DRV_NAME);
+       if (rc)
+               goto err_out_disable;
+
+       if (pdev->irq < 2) {
+               rc = -EIO;
+               printk(KERN_ERR PFX "invalid irq (%d) for pci dev %s\n",
+                      pdev->irq, pdev->slot_name);
+               goto err_out_res;
+       }
+       pciaddr = pci_resource_start(pdev, 1);
+       if (!pciaddr) {
+               rc = -EIO;
+               printk(KERN_ERR PFX "no MMIO resource for pci dev %s\n",
+                      pdev->slot_name);
+               goto err_out_res;
+       }
+       if (pci_resource_len(pdev, 1) < CP_REGS_SIZE) {
+               rc = -EIO;
+               printk(KERN_ERR PFX "MMIO resource (%lx) too small on pci dev %s\n",
+                      pci_resource_len(pdev, 1), pdev->slot_name);
+               goto err_out_res;
+       }
+
+       regs = ioremap_nocache(pciaddr, CP_REGS_SIZE);
+       if (!regs) {
+               rc = -EIO;
+               printk(KERN_ERR PFX "Cannot map PCI MMIO (%lx@%lx) on pci dev %s\n",
+                      pci_resource_len(pdev, 1), pciaddr, pdev->slot_name);
+               goto err_out_res;
+       }
+       dev->base_addr = (unsigned long) regs;
+       cp->regs = regs;
+
+       cp_stop_hw(cp);
+
+       /* read MAC address from EEPROM */
+       addr_len = read_eeprom (regs, 0, 8) == 0x8129 ? 8 : 6;
+       for (i = 0; i < 3; i++)
+               ((u16 *) (dev->dev_addr))[i] =
+                   le16_to_cpu (read_eeprom (regs, i + 7, addr_len));
+
+       dev->open = cp_open;
+       dev->stop = cp_close;
+       dev->set_multicast_list = cp_set_rx_mode;
+       dev->hard_start_xmit = cp_start_xmit;
+       dev->get_stats = cp_get_stats;
+       dev->do_ioctl = cp_ioctl;
+#if 0
+       dev->tx_timeout = cp_tx_timeout;
+       dev->watchdog_timeo = TX_TIMEOUT;
+#endif
+#ifdef CP_TX_CHECKSUM
+       dev->features |= NETIF_F_SG | NETIF_F_IP_CSUM;
+#endif
+
+       dev->irq = pdev->irq;
+
+       rc = register_netdev(dev);
+       if (rc)
+               goto err_out_iomap;
+
+       printk (KERN_INFO "%s: %s at 0x%lx, "
+               "%02x:%02x:%02x:%02x:%02x:%02x, "
+               "IRQ %d\n",
+               dev->name,
+               "RTL-8139C+",
+               dev->base_addr,
+               dev->dev_addr[0], dev->dev_addr[1],
+               dev->dev_addr[2], dev->dev_addr[3],
+               dev->dev_addr[4], dev->dev_addr[5],
+               dev->irq);
+
+       pci_set_drvdata(pdev, dev);
+
+       /*
+        * Looks like this is necessary to deal with on all architectures,
+        * even this %$#%$# N440BX Intel based thing doesn't get it right.
+        * Ie. having two NICs in the machine, one will have the cache
+        * line set at boot time, the other will not.
+        */
+       pci_read_config_byte(pdev, PCI_CACHE_LINE_SIZE, &cache_size);
+       cache_size <<= 2;
+       if (cache_size != SMP_CACHE_BYTES) {
+               printk(KERN_INFO "%s: PCI cache line size set incorrectly "
+                      "(%i bytes) by BIOS/FW, ", dev->name, cache_size);
+               if (cache_size > SMP_CACHE_BYTES)
+                       printk("expecting %i\n", SMP_CACHE_BYTES);
+               else {
+                       printk("correcting to %i\n", SMP_CACHE_BYTES);
+                       pci_write_config_byte(pdev, PCI_CACHE_LINE_SIZE,
+                                             SMP_CACHE_BYTES >> 2);
+               }
+       }
+
+       /* enable busmastering and memory-write-invalidate */
+       pci_read_config_word(pdev, PCI_COMMAND, &pci_command);
+       if (!(pci_command & PCI_COMMAND_INVALIDATE)) {
+               pci_command |= PCI_COMMAND_INVALIDATE;
+               pci_write_config_word(pdev, PCI_COMMAND, pci_command);
+       }
+       pci_set_master(pdev);
+
+       return 0;
+
+err_out_iomap:
+       iounmap(regs);
+err_out_res:
+       pci_release_regions(pdev);
+err_out_disable:
+       pci_disable_device(pdev);
+err_out_free:
+       kfree(dev);
+       return rc;
+}
+
+static void __devexit cp_remove_one (struct pci_dev *pdev)
+{
+       struct net_device *dev = pci_get_drvdata(pdev);
+       struct cp_private *cp = dev->priv;
+
+       if (!dev)
+               BUG();
+       unregister_netdev(dev);
+       iounmap(cp->regs);
+       pci_release_regions(pdev);
+       pci_disable_device(pdev);
+       pci_set_drvdata(pdev, NULL);
+       kfree(dev);
+}
+
+static struct pci_driver cp_driver = {
+       name:           DRV_NAME,
+       id_table:       cp_pci_tbl,
+       probe:          cp_init_one,
+       remove:         __devexit_p(cp_remove_one),
+};
+
+static int __init cp_init (void)
+{
+#ifdef MODULE
+       printk("%s", version);
+#endif
+       return pci_module_init (&cp_driver);
+}
+
+static void __exit cp_exit (void)
+{
+       pci_unregister_driver (&cp_driver);
+}
+
+module_init(cp_init);
+module_exit(cp_exit);
diff --git a/xen-2.4.16/drivers/net/8139too.c b/xen-2.4.16/drivers/net/8139too.c
new file mode 100644 (file)
index 0000000..d1f0a04
--- /dev/null
@@ -0,0 +1,2324 @@
+
+#define CONFIG_8139TOO_8129
+
+/*
+
+       8139too.c: A RealTek RTL-8139 Fast Ethernet driver for Linux.
+
+       Maintained by Jeff Garzik <jgarzik@mandrakesoft.com>
+       Copyright 2000,2001 Jeff Garzik
+
+       Much code comes from Donald Becker's rtl8139.c driver,
+       versions 1.13 and older.  This driver was originally based
+       on rtl8139.c version 1.07.  Header of rtl8139.c version 1.13:
+
+       -----<snip>-----
+
+               Written 1997-2001 by Donald Becker.
+               This software may be used and distributed according to the
+               terms of the GNU General Public License (GPL), incorporated
+               herein by reference.  Drivers based on or derived from this
+               code fall under the GPL and must retain the authorship,
+               copyright and license notice.  This file is not a complete
+               program and may only be used when the entire operating
+               system is licensed under the GPL.
+
+               This driver is for boards based on the RTL8129 and RTL8139
+               PCI ethernet chips.
+
+               The author may be reached as becker@scyld.com, or C/O Scyld
+               Computing Corporation 410 Severn Ave., Suite 210 Annapolis
+               MD 21403
+
+               Support and updates available at
+               http://www.scyld.com/network/rtl8139.html
+
+               Twister-tuning table provided by Kinston
+               <shangh@realtek.com.tw>.
+
+       -----<snip>-----
+
+       This software may be used and distributed according to the terms
+       of the GNU General Public License, incorporated herein by reference.
+
+       Contributors:
+
+               Donald Becker - he wrote the original driver, kudos to him!
+               (but please don't e-mail him for support, this isn't his driver)
+
+               Tigran Aivazian - bug fixes, skbuff free cleanup
+
+               Martin Mares - suggestions for PCI cleanup
+
+               David S. Miller - PCI DMA and softnet updates
+
+               Ernst Gill - fixes ported from BSD driver
+
+               Daniel Kobras - identified specific locations of
+                       posted MMIO write bugginess
+
+               Gerard Sharp - bug fix, testing and feedback
+
+               David Ford - Rx ring wrap fix
+
+               Dan DeMaggio - swapped RTL8139 cards with me, and allowed me
+               to find and fix a crucial bug on older chipsets.
+
+               Donald Becker/Chris Butterworth/Marcus Westergren -
+               Noticed various Rx packet size-related buglets.
+
+               Santiago Garcia Mantinan - testing and feedback
+
+               Jens David - 2.2.x kernel backports
+
+               Martin Dennett - incredibly helpful insight on undocumented
+               features of the 8139 chips
+
+               Jean-Jacques Michel - bug fix
+
+               Tobias Ringström - Rx interrupt status checking suggestion
+
+               Andrew Morton - Clear blocked signals, avoid
+               buffer overrun setting current->comm.
+
+               Kalle Olavi Niemitalo - Wake-on-LAN ioctls
+
+               Robert Kuebel - Save kernel thread from dying on any signal.
+
+       Submitting bug reports:
+
+               "rtl8139-diag -mmmaaavvveefN" output
+               enable RTL8139_DEBUG below, and look at 'dmesg' or kernel log
+
+               See 8139too.txt for more details.
+
+*/
+
+#define DRV_NAME       "8139too"
+#define DRV_VERSION    "0.9.22"
+
+
+#include <linux/config.h>
+#include <linux/module.h>
+//#include <linux/kernel.h>
+//#include <linux/compiler.h>
+#include <linux/pci.h>
+#include <linux/init.h>
+#include <linux/ioport.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+//#include <linux/rtnetlink.h>
+#include <linux/delay.h>
+#include <linux/ethtool.h>
+#include <linux/mii.h>
+//#include <linux/completion.h>
+#include <asm/io.h>
+#include <asm/uaccess.h>
+
+#define RTL8139_DRIVER_NAME   DRV_NAME " Fast Ethernet driver " DRV_VERSION
+#define PFX DRV_NAME ": "
+
+
+/* enable PIO instead of MMIO, if CONFIG_8139TOO_PIO is selected */
+#ifdef CONFIG_8139TOO_PIO
+#define USE_IO_OPS 1
+#endif
+
+/* define to 1 to enable copious debugging info */
+#undef RTL8139_DEBUG
+
+/* define to 1 to disable lightweight runtime debugging checks */
+#undef RTL8139_NDEBUG
+
+
+#ifdef RTL8139_DEBUG
+/* note: prints function name for you */
+#  define DPRINTK(fmt, args...) printk(KERN_DEBUG "%s: " fmt, __FUNCTION__ , ## args)
+#else
+#  define DPRINTK(fmt, args...)
+#endif
+
+#ifdef RTL8139_NDEBUG
+#  define assert(expr) do {} while (0)
+#else
+#  define assert(expr) \
+        if(!(expr)) {                                  \
+        printk( "Assertion failed! %s,%s,%s,line=%d\n",        \
+        #expr,__FILE__,__FUNCTION__,__LINE__);         \
+        }
+#endif
+
+
+/* A few user-configurable values. */
+/* media options */
+#define MAX_UNITS 8
+static int media[MAX_UNITS] = {-1, -1, -1, -1, -1, -1, -1, -1};
+static int full_duplex[MAX_UNITS] = {-1, -1, -1, -1, -1, -1, -1, -1};
+
+/* Maximum events (Rx packets, etc.) to handle at each interrupt. */
+static int max_interrupt_work = 20;
+
+/* Maximum number of multicast addresses to filter (vs. Rx-all-multicast).
+   The RTL chips use a 64 element hash table based on the Ethernet CRC.  */
+static int multicast_filter_limit = 32;
+
+/* Size of the in-memory receive ring. */
+#define RX_BUF_LEN_IDX 2       /* 0==8K, 1==16K, 2==32K, 3==64K */
+#define RX_BUF_LEN     (8192 << RX_BUF_LEN_IDX)
+#define RX_BUF_PAD     16
+#define RX_BUF_WRAP_PAD 2048 /* spare padding to handle lack of packet wrap */
+#define RX_BUF_TOT_LEN (RX_BUF_LEN + RX_BUF_PAD + RX_BUF_WRAP_PAD)
+
+/* Number of Tx descriptor registers. */
+#define NUM_TX_DESC    4
+
+/* max supported ethernet frame size -- must be at least (dev->mtu+14+4).*/
+#define MAX_ETH_FRAME_SIZE     1536
+
+/* Size of the Tx bounce buffers -- must be at least (dev->mtu+14+4). */
+#define TX_BUF_SIZE    MAX_ETH_FRAME_SIZE
+#define TX_BUF_TOT_LEN (TX_BUF_SIZE * NUM_TX_DESC)
+
+/* PCI Tuning Parameters
+   Threshold is bytes transferred to chip before transmission starts. */
+#define TX_FIFO_THRESH 256     /* In bytes, rounded down to 32 byte units. */
+
+/* The following settings are log_2(bytes)-4:  0 == 16 bytes .. 6==1024, 7==end of packet. */
+#define RX_FIFO_THRESH 7       /* Rx buffer level before first PCI xfer.  */
+#define RX_DMA_BURST   7       /* Maximum PCI burst, '6' is 1024 */
+#define TX_DMA_BURST   6       /* Maximum PCI burst, '6' is 1024 */
+#define TX_RETRY       8       /* 0-15.  retries = 16 + (TX_RETRY * 16) */
+
+/* Operational parameters that usually are not changed. */
+/* Time in jiffies before concluding the transmitter is hung. */
+#define TX_TIMEOUT  (6*HZ)
+
+
+enum {
+       HAS_MII_XCVR = 0x010000,
+       HAS_CHIP_XCVR = 0x020000,
+       HAS_LNK_CHNG = 0x040000,
+};
+
+#define RTL_MIN_IO_SIZE 0x80
+#define RTL8139B_IO_SIZE 256
+
+#define RTL8129_CAPS   HAS_MII_XCVR
+#define RTL8139_CAPS   HAS_CHIP_XCVR|HAS_LNK_CHNG
+
+typedef enum {
+       RTL8139 = 0,
+       RTL8139_CB,
+       SMC1211TX,
+       /*MPX5030,*/
+       DELTA8139,
+       ADDTRON8139,
+       DFE538TX,
+       DFE690TXD,
+       RTL8129,
+} board_t;
+
+
+/* indexed by board_t, above */
+static struct {
+       const char *name;
+       u32 hw_flags;
+} board_info[] __devinitdata = {
+       { "RealTek RTL8139 Fast Ethernet", RTL8139_CAPS },
+       { "RealTek RTL8139B PCI/CardBus", RTL8139_CAPS },
+       { "SMC1211TX EZCard 10/100 (RealTek RTL8139)", RTL8139_CAPS },
+/*     { MPX5030, "Accton MPX5030 (RealTek RTL8139)", RTL8139_CAPS },*/
+       { "Delta Electronics 8139 10/100BaseTX", RTL8139_CAPS },
+       { "Addtron Technolgy 8139 10/100BaseTX", RTL8139_CAPS },
+       { "D-Link DFE-538TX (RealTek RTL8139)", RTL8139_CAPS },
+       { "D-Link DFE-690TXD (RealTek RTL8139)", RTL8139_CAPS },
+       { "RealTek RTL8129", RTL8129_CAPS },
+};
+
+
+static struct pci_device_id rtl8139_pci_tbl[] __devinitdata = {
+       {0x10ec, 0x8139, PCI_ANY_ID, PCI_ANY_ID, 0, 0, RTL8139 },
+       {0x10ec, 0x8138, PCI_ANY_ID, PCI_ANY_ID, 0, 0, RTL8139_CB },
+       {0x1113, 0x1211, PCI_ANY_ID, PCI_ANY_ID, 0, 0, SMC1211TX },
+/*     {0x1113, 0x1211, PCI_ANY_ID, PCI_ANY_ID, 0, 0, MPX5030 },*/
+       {0x1500, 0x1360, PCI_ANY_ID, PCI_ANY_ID, 0, 0, DELTA8139 },
+       {0x4033, 0x1360, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ADDTRON8139 },
+       {0x1186, 0x1300, PCI_ANY_ID, PCI_ANY_ID, 0, 0, DFE538TX },
+       {0x1186, 0x1340, PCI_ANY_ID, PCI_ANY_ID, 0, 0, DFE690TXD },
+
+#ifdef CONFIG_8139TOO_8129
+       {0x10ec, 0x8129, PCI_ANY_ID, PCI_ANY_ID, 0, 0, RTL8129 },
+#endif
+
+       /* some crazy cards report invalid vendor ids like
+        * 0x0001 here.  The other ids are valid and constant,
+        * so we simply don't match on the main vendor id.
+        */
+       {PCI_ANY_ID, 0x8139, 0x10ec, 0x8139, 0, 0, RTL8139 },
+       {PCI_ANY_ID, 0x8139, 0x1186, 0x1300, 0, 0, DFE538TX },
+
+       {0,}
+};
+MODULE_DEVICE_TABLE (pci, rtl8139_pci_tbl);
+
+
+/* The rest of these values should never change. */
+
+/* Symbolic offsets to registers. */
+enum RTL8139_registers {
+       MAC0 = 0,               /* Ethernet hardware address. */
+       MAR0 = 8,               /* Multicast filter. */
+       TxStatus0 = 0x10,       /* Transmit status (Four 32bit registers). */
+       TxAddr0 = 0x20,         /* Tx descriptors (also four 32bit). */
+       RxBuf = 0x30,
+       ChipCmd = 0x37,
+       RxBufPtr = 0x38,
+       RxBufAddr = 0x3A,
+       IntrMask = 0x3C,
+       IntrStatus = 0x3E,
+       TxConfig = 0x40,
+       ChipVersion = 0x43,
+       RxConfig = 0x44,
+       Timer = 0x48,           /* A general-purpose counter. */
+       RxMissed = 0x4C,        /* 24 bits valid, write clears. */
+       Cfg9346 = 0x50,
+       Config0 = 0x51,
+       Config1 = 0x52,
+       FlashReg = 0x54,
+       MediaStatus = 0x58,
+       Config3 = 0x59,
+       Config4 = 0x5A,         /* absent on RTL-8139A */
+       HltClk = 0x5B,
+       MultiIntr = 0x5C,
+       TxSummary = 0x60,
+       BasicModeCtrl = 0x62,
+       BasicModeStatus = 0x64,
+       NWayAdvert = 0x66,
+       NWayLPAR = 0x68,
+       NWayExpansion = 0x6A,
+       /* Undocumented registers, but required for proper operation. */
+       FIFOTMS = 0x70,         /* FIFO Control and test. */
+       CSCR = 0x74,            /* Chip Status and Configuration Register. */
+       PARA78 = 0x78,
+       PARA7c = 0x7c,          /* Magic transceiver parameter register. */
+       Config5 = 0xD8,         /* absent on RTL-8139A */
+};
+
+enum ClearBitMasks {
+       MultiIntrClear = 0xF000,
+       ChipCmdClear = 0xE2,
+       Config1Clear = (1<<7)|(1<<6)|(1<<3)|(1<<2)|(1<<1),
+};
+
+enum ChipCmdBits {
+       CmdReset = 0x10,
+       CmdRxEnb = 0x08,
+       CmdTxEnb = 0x04,
+       RxBufEmpty = 0x01,
+};
+
+/* Interrupt register bits, using my own meaningful names. */
+enum IntrStatusBits {
+       PCIErr = 0x8000,
+       PCSTimeout = 0x4000,
+       RxFIFOOver = 0x40,
+       RxUnderrun = 0x20,
+       RxOverflow = 0x10,
+       TxErr = 0x08,
+       TxOK = 0x04,
+       RxErr = 0x02,
+       RxOK = 0x01,
+
+       RxAckBits = RxFIFOOver | RxOverflow | RxOK,
+};
+
+enum TxStatusBits {
+       TxHostOwns = 0x2000,
+       TxUnderrun = 0x4000,
+       TxStatOK = 0x8000,
+       TxOutOfWindow = 0x20000000,
+       TxAborted = 0x40000000,
+       TxCarrierLost = 0x80000000,
+};
+enum RxStatusBits {
+       RxMulticast = 0x8000,
+       RxPhysical = 0x4000,
+       RxBroadcast = 0x2000,
+       RxBadSymbol = 0x0020,
+       RxRunt = 0x0010,
+       RxTooLong = 0x0008,
+       RxCRCErr = 0x0004,
+       RxBadAlign = 0x0002,
+       RxStatusOK = 0x0001,
+};
+
+/* Bits in RxConfig. */
+enum rx_mode_bits {
+       AcceptErr = 0x20,
+       AcceptRunt = 0x10,
+       AcceptBroadcast = 0x08,
+       AcceptMulticast = 0x04,
+       AcceptMyPhys = 0x02,
+       AcceptAllPhys = 0x01,
+};
+
+/* Bits in TxConfig. */
+enum tx_config_bits {
+       TxIFG1 = (1 << 25),     /* Interframe Gap Time */
+       TxIFG0 = (1 << 24),     /* Enabling these bits violates IEEE 802.3 */
+       TxLoopBack = (1 << 18) | (1 << 17), /* enable loopback test mode */
+       TxCRC = (1 << 16),      /* DISABLE appending CRC to end of Tx packets */
+       TxClearAbt = (1 << 0),  /* Clear abort (WO) */
+       TxDMAShift = 8,         /* DMA burst value (0-7) is shifted this many bits */
+       TxRetryShift = 4,       /* TXRR value (0-15) is shifted this many bits */
+
+       TxVersionMask = 0x7C800000, /* mask out version bits 30-26, 23 */
+};
+
+/* Bits in Config1 */
+enum Config1Bits {
+       Cfg1_PM_Enable = 0x01,
+       Cfg1_VPD_Enable = 0x02,
+       Cfg1_PIO = 0x04,
+       Cfg1_MMIO = 0x08,
+       LWAKE = 0x10,           /* not on 8139, 8139A */
+       Cfg1_Driver_Load = 0x20,
+       Cfg1_LED0 = 0x40,
+       Cfg1_LED1 = 0x80,
+       SLEEP = (1 << 1),       /* only on 8139, 8139A */
+       PWRDN = (1 << 0),       /* only on 8139, 8139A */
+};
+
+/* Bits in Config3 */
+enum Config3Bits {
+       Cfg3_FBtBEn    = (1 << 0), /* 1 = Fast Back to Back */
+       Cfg3_FuncRegEn = (1 << 1), /* 1 = enable CardBus Function registers */
+       Cfg3_CLKRUN_En = (1 << 2), /* 1 = enable CLKRUN */
+       Cfg3_CardB_En  = (1 << 3), /* 1 = enable CardBus registers */
+       Cfg3_LinkUp    = (1 << 4), /* 1 = wake up on link up */
+       Cfg3_Magic     = (1 << 5), /* 1 = wake up on Magic Packet (tm) */
+       Cfg3_PARM_En   = (1 << 6), /* 0 = software can set twister parameters */
+       Cfg3_GNTSel    = (1 << 7), /* 1 = delay 1 clock from PCI GNT signal */
+};
+
+/* Bits in Config4 */
+enum Config4Bits {
+       LWPTN = (1 << 2),       /* not on 8139, 8139A */
+};
+
+/* Bits in Config5 */
+enum Config5Bits {
+       Cfg5_PME_STS     = (1 << 0), /* 1 = PCI reset resets PME_Status */
+       Cfg5_LANWake     = (1 << 1), /* 1 = enable LANWake signal */
+       Cfg5_LDPS        = (1 << 2), /* 0 = save power when link is down */
+       Cfg5_FIFOAddrPtr = (1 << 3), /* Realtek internal SRAM testing */
+       Cfg5_UWF         = (1 << 4), /* 1 = accept unicast wakeup frame */
+       Cfg5_MWF         = (1 << 5), /* 1 = accept multicast wakeup frame */
+       Cfg5_BWF         = (1 << 6), /* 1 = accept broadcast wakeup frame */
+};
+
+enum RxConfigBits {
+       /* rx fifo threshold */
+       RxCfgFIFOShift = 13,
+       RxCfgFIFONone = (7 << RxCfgFIFOShift),
+
+       /* Max DMA burst */
+       RxCfgDMAShift = 8,
+       RxCfgDMAUnlimited = (7 << RxCfgDMAShift),
+
+       /* rx ring buffer length */
+       RxCfgRcv8K = 0,
+       RxCfgRcv16K = (1 << 11),
+       RxCfgRcv32K = (1 << 12),
+       RxCfgRcv64K = (1 << 11) | (1 << 12),
+
+       /* Disable packet wrap at end of Rx buffer */
+       RxNoWrap = (1 << 7),
+};
+
+
+/* Twister tuning parameters from RealTek.
+   Completely undocumented, but required to tune bad links. */
+enum CSCRBits {
+       CSCR_LinkOKBit = 0x0400,
+       CSCR_LinkChangeBit = 0x0800,
+       CSCR_LinkStatusBits = 0x0f000,
+       CSCR_LinkDownOffCmd = 0x003c0,
+       CSCR_LinkDownCmd = 0x0f3c0,
+};
+
+
+enum Cfg9346Bits {
+       Cfg9346_Lock = 0x00,
+       Cfg9346_Unlock = 0xC0,
+};
+
+
+#define PARA78_default 0x78fa8388
+#define PARA7c_default 0xcb38de43      /* param[0][3] */
+#define PARA7c_xxx             0xcb38de43
+static const unsigned long param[4][4] = {
+       {0xcb39de43, 0xcb39ce43, 0xfb38de03, 0xcb38de43},
+       {0xcb39de43, 0xcb39ce43, 0xcb39ce83, 0xcb39ce83},
+       {0xcb39de43, 0xcb39ce43, 0xcb39ce83, 0xcb39ce83},
+       {0xbb39de43, 0xbb39ce43, 0xbb39ce83, 0xbb39ce83}
+};
+
+typedef enum {
+       CH_8139 = 0,
+       CH_8139_K,
+       CH_8139A,
+       CH_8139B,
+       CH_8130,
+       CH_8139C,
+} chip_t;
+
+enum chip_flags {
+       HasHltClk = (1 << 0),
+       HasLWake = (1 << 1),
+};
+
+
+/* directly indexed by chip_t, above */
+const static struct {
+       const char *name;
+       u8 version; /* from RTL8139C docs */
+       u32 RxConfigMask; /* should clear the bits supported by this chip */
+       u32 flags;
+} rtl_chip_info[] = {
+       { "RTL-8139",
+         0x40,
+         0xf0fe0040, /* XXX copied from RTL8139A, verify */
+         HasHltClk,
+       },
+
+       { "RTL-8139 rev K",
+         0x60,
+         0xf0fe0040,
+         HasHltClk,
+       },
+
+       { "RTL-8139A",
+         0x70,
+         0xf0fe0040,
+         HasHltClk, /* XXX undocumented? */
+       },
+
+       { "RTL-8139B",
+         0x78,
+         0xf0fc0040,
+         HasLWake,
+       },
+
+       { "RTL-8130",
+         0x7C,
+         0xf0fe0040, /* XXX copied from RTL8139A, verify */
+         HasLWake,
+       },
+
+       { "RTL-8139C",
+         0x74,
+         0xf0fc0040, /* XXX copied from RTL8139B, verify */
+         HasLWake,
+       },
+
+};
+
+struct rtl_extra_stats {
+       unsigned long early_rx;
+       unsigned long tx_buf_mapped;
+       unsigned long tx_timeouts;
+};
+
+struct rtl8139_private {
+       void *mmio_addr;
+       int drv_flags;
+       struct pci_dev *pci_dev;
+       struct net_device_stats stats;
+       unsigned char *rx_ring;
+       unsigned int cur_rx;    /* Index into the Rx buffer of next Rx pkt. */
+       unsigned int tx_flag;
+       unsigned long cur_tx;
+       unsigned long dirty_tx;
+       unsigned char *tx_buf[NUM_TX_DESC];     /* Tx bounce buffers */
+       unsigned char *tx_bufs; /* Tx bounce buffer region. */
+       dma_addr_t rx_ring_dma;
+       dma_addr_t tx_bufs_dma;
+       signed char phys[4];            /* MII device addresses. */
+       char twistie, twist_row, twist_col;     /* Twister tune state. */
+       unsigned int full_duplex:1;     /* Full-duplex operation requested. */
+       unsigned int duplex_lock:1;
+       unsigned int default_port:4;    /* Last dev->if_port value. */
+       unsigned int media2:4;  /* Secondary monitored media port. */
+       unsigned int medialock:1;       /* Don't sense media type. */
+       unsigned int mediasense:1;      /* Media sensing in progress. */
+       spinlock_t lock;
+       chip_t chipset;
+       u32 rx_config;
+       struct rtl_extra_stats xstats;
+       int time_to_die;
+};
+
+MODULE_AUTHOR ("Jeff Garzik <jgarzik@mandrakesoft.com>");
+MODULE_DESCRIPTION ("RealTek RTL-8139 Fast Ethernet driver");
+MODULE_LICENSE("GPL");
+
+MODULE_PARM (multicast_filter_limit, "i");
+MODULE_PARM (max_interrupt_work, "i");
+MODULE_PARM (media, "1-" __MODULE_STRING(MAX_UNITS) "i");
+MODULE_PARM (full_duplex, "1-" __MODULE_STRING(MAX_UNITS) "i");
+MODULE_PARM_DESC (multicast_filter_limit, "8139too maximum number of filtered multicast addresses");
+MODULE_PARM_DESC (max_interrupt_work, "8139too maximum events handled per interrupt");
+MODULE_PARM_DESC (media, "8139too: Bits 4+9: force full duplex, bit 5: 100Mbps");
+MODULE_PARM_DESC (full_duplex, "8139too: Force full duplex for board(s) (1)");
+
+static int read_eeprom (void *ioaddr, int location, int addr_len);
+static int rtl8139_open (struct net_device *dev);
+static int mdio_read (struct net_device *dev, int phy_id, int location);
+static void mdio_write (struct net_device *dev, int phy_id, int location,
+                       int val);
+static void rtl8139_tx_timeout (struct net_device *dev);
+static void rtl8139_init_ring (struct net_device *dev);
+static int rtl8139_start_xmit (struct sk_buff *skb,
+                              struct net_device *dev);
+static void rtl8139_interrupt (int irq, void *dev_instance,
+                              struct pt_regs *regs);
+static int rtl8139_close (struct net_device *dev);
+static int netdev_ioctl (struct net_device *dev, struct ifreq *rq, int cmd);
+static struct net_device_stats *rtl8139_get_stats (struct net_device *dev);
+static inline u32 ether_crc (int length, unsigned char *data);
+static void rtl8139_set_rx_mode (struct net_device *dev);
+static void __set_rx_mode (struct net_device *dev);
+static void rtl8139_hw_start (struct net_device *dev);
+
+#ifdef USE_IO_OPS
+
+#define RTL_R8(reg)            inb (((unsigned long)ioaddr) + (reg))
+#define RTL_R16(reg)           inw (((unsigned long)ioaddr) + (reg))
+#define RTL_R32(reg)           ((unsigned long) inl (((unsigned long)ioaddr) + (reg)))
+#define RTL_W8(reg, val8)      outb ((val8), ((unsigned long)ioaddr) + (reg))
+#define RTL_W16(reg, val16)    outw ((val16), ((unsigned long)ioaddr) + (reg))
+#define RTL_W32(reg, val32)    outl ((val32), ((unsigned long)ioaddr) + (reg))
+#define RTL_W8_F               RTL_W8
+#define RTL_W16_F              RTL_W16
+#define RTL_W32_F              RTL_W32
+#undef readb
+#undef readw
+#undef readl
+#undef writeb
+#undef writew
+#undef writel
+#define readb(addr) inb((unsigned long)(addr))
+#define readw(addr) inw((unsigned long)(addr))
+#define readl(addr) inl((unsigned long)(addr))
+#define writeb(val,addr) outb((val),(unsigned long)(addr))
+#define writew(val,addr) outw((val),(unsigned long)(addr))
+#define writel(val,addr) outl((val),(unsigned long)(addr))
+
+#else
+
+/* write MMIO register, with flush */
+/* Flush avoids rtl8139 bug w/ posted MMIO writes */
+#define RTL_W8_F(reg, val8)    do { writeb ((val8), ioaddr + (reg)); readb (ioaddr + (reg)); } while (0)
+#define RTL_W16_F(reg, val16)  do { writew ((val16), ioaddr + (reg)); readw (ioaddr + (reg)); } while (0)
+#define RTL_W32_F(reg, val32)  do { writel ((val32), ioaddr + (reg)); readl (ioaddr + (reg)); } while (0)
+
+
+#define MMIO_FLUSH_AUDIT_COMPLETE 1
+#if MMIO_FLUSH_AUDIT_COMPLETE
+
+/* write MMIO register */
+#define RTL_W8(reg, val8)      writeb ((val8), ioaddr + (reg))
+#define RTL_W16(reg, val16)    writew ((val16), ioaddr + (reg))
+#define RTL_W32(reg, val32)    writel ((val32), ioaddr + (reg))
+
+#else
+
+/* write MMIO register, then flush */
+#define RTL_W8         RTL_W8_F
+#define RTL_W16                RTL_W16_F
+#define RTL_W32                RTL_W32_F
+
+#endif /* MMIO_FLUSH_AUDIT_COMPLETE */
+
+/* read MMIO register */
+#define RTL_R8(reg)            readb (ioaddr + (reg))
+#define RTL_R16(reg)           readw (ioaddr + (reg))
+#define RTL_R32(reg)           ((unsigned long) readl (ioaddr + (reg)))
+
+#endif /* USE_IO_OPS */
+
+
+static const u16 rtl8139_intr_mask =
+       PCIErr | PCSTimeout | RxUnderrun | RxOverflow | RxFIFOOver |
+       TxErr | TxOK | RxErr | RxOK;
+
+static const unsigned int rtl8139_rx_config =
+       RxCfgRcv32K | RxNoWrap |
+       (RX_FIFO_THRESH << RxCfgFIFOShift) |
+       (RX_DMA_BURST << RxCfgDMAShift);
+
+static const unsigned int rtl8139_tx_config =
+       (TX_DMA_BURST << TxDMAShift) | (TX_RETRY << TxRetryShift);
+
+static void __rtl8139_cleanup_dev (struct net_device *dev)
+{
+       struct rtl8139_private *tp;
+       struct pci_dev *pdev;
+
+       assert (dev != NULL);
+       assert (dev->priv != NULL);
+
+       tp = dev->priv;
+       assert (tp->pci_dev != NULL);
+       pdev = tp->pci_dev;
+
+#ifndef USE_IO_OPS
+       if (tp->mmio_addr)
+               iounmap (tp->mmio_addr);
+#endif /* !USE_IO_OPS */
+
+       /* it's ok to call this even if we have no regions to free */
+       pci_release_regions (pdev);
+
+#ifndef RTL8139_NDEBUG
+       /* poison memory before freeing */
+       memset (dev, 0xBC,
+               sizeof (struct net_device) +
+               sizeof (struct rtl8139_private));
+#endif /* RTL8139_NDEBUG */
+
+       kfree (dev);
+
+       pci_set_drvdata (pdev, NULL);
+}
+
+
+static void rtl8139_chip_reset (void *ioaddr)
+{
+       int i;
+
+       /* Soft reset the chip. */
+       RTL_W8 (ChipCmd, CmdReset);
+
+       /* Check that the chip has finished the reset. */
+       for (i = 1000; i > 0; i--) {
+               barrier();
+               if ((RTL_R8 (ChipCmd) & CmdReset) == 0)
+                       break;
+               udelay (10);
+       }
+}
+
+
+static int __devinit rtl8139_init_board (struct pci_dev *pdev,
+                                        struct net_device **dev_out)
+{
+       void *ioaddr;
+       struct net_device *dev;
+       struct rtl8139_private *tp;
+       u8 tmp8;
+       int rc;
+       unsigned int i;
+       u32 pio_start, pio_end, pio_flags, pio_len;
+       unsigned long mmio_start, mmio_end, mmio_flags, mmio_len;
+       u32 tmp;
+
+       assert (pdev != NULL);
+
+       *dev_out = NULL;
+
+       /* dev and dev->priv zeroed in alloc_etherdev */
+       dev = alloc_etherdev (sizeof (*tp));
+       if (dev == NULL) {
+               printk (KERN_ERR PFX "%s: Unable to alloc new net device\n", pdev->slot_name);
+               return -ENOMEM;
+       }
+       SET_MODULE_OWNER(dev);
+       tp = dev->priv;
+       tp->pci_dev = pdev;
+
+       /* enable device (incl. PCI PM wakeup and hotplug setup) */
+       rc = pci_enable_device (pdev);
+       if (rc)
+               goto err_out;
+
+       pio_start = pci_resource_start (pdev, 0);
+       pio_end = pci_resource_end (pdev, 0);
+       pio_flags = pci_resource_flags (pdev, 0);
+       pio_len = pci_resource_len (pdev, 0);
+
+       mmio_start = pci_resource_start (pdev, 1);
+       mmio_end = pci_resource_end (pdev, 1);
+       mmio_flags = pci_resource_flags (pdev, 1);
+       mmio_len = pci_resource_len (pdev, 1);
+
+       /* set this immediately, we need to know before
+        * we talk to the chip directly */
+       DPRINTK("PIO region size == 0x%02X\n", pio_len);
+       DPRINTK("MMIO region size == 0x%02lX\n", mmio_len);
+
+#ifdef USE_IO_OPS
+       /* make sure PCI base addr 0 is PIO */
+       if (!(pio_flags & IORESOURCE_IO)) {
+               printk (KERN_ERR PFX "%s: region #0 not a PIO resource, aborting\n", pdev->slot_name);
+               rc = -ENODEV;
+               goto err_out;
+       }
+       /* check for weird/broken PCI region reporting */
+       if (pio_len < RTL_MIN_IO_SIZE) {
+               printk (KERN_ERR PFX "%s: Invalid PCI I/O region size(s), aborting\n", pdev->slot_name);
+               rc = -ENODEV;
+               goto err_out;
+       }
+#else
+       /* make sure PCI base addr 1 is MMIO */
+       if (!(mmio_flags & IORESOURCE_MEM)) {
+               printk (KERN_ERR PFX "%s: region #1 not an MMIO resource, aborting\n", pdev->slot_name);
+               rc = -ENODEV;
+               goto err_out;
+       }
+       if (mmio_len < RTL_MIN_IO_SIZE) {
+               printk (KERN_ERR PFX "%s: Invalid PCI mem region size(s), aborting\n", pdev->slot_name);
+               rc = -ENODEV;
+               goto err_out;
+       }
+#endif
+
+       rc = pci_request_regions (pdev, "8139too");
+       if (rc)
+               goto err_out;
+
+       /* enable PCI bus-mastering */
+       pci_set_master (pdev);
+
+#ifdef USE_IO_OPS
+       ioaddr = (void *) pio_start;
+       dev->base_addr = pio_start;
+       tp->mmio_addr = ioaddr;
+#else
+       /* ioremap MMIO region */
+       ioaddr = ioremap (mmio_start, mmio_len);
+       if (ioaddr == NULL) {
+               printk (KERN_ERR PFX "%s: cannot remap MMIO, aborting\n", pdev->slot_name);
+               rc = -EIO;
+               goto err_out;
+       }
+       dev->base_addr = (long) ioaddr;
+       tp->mmio_addr = ioaddr;
+#endif /* USE_IO_OPS */
+
+       /* Bring old chips out of low-power mode. */
+       RTL_W8 (HltClk, 'R');
+
+       /* check for missing/broken hardware */
+       if (RTL_R32 (TxConfig) == 0xFFFFFFFF) {
+               printk (KERN_ERR PFX "%s: Chip not responding, ignoring board\n",
+                       pdev->slot_name);
+               rc = -EIO;
+               goto err_out;
+       }
+
+       /* identify chip attached to board */
+       tmp = RTL_R8 (ChipVersion);
+       for (i = 0; i < ARRAY_SIZE (rtl_chip_info); i++)
+               if (tmp == rtl_chip_info[i].version) {
+                       tp->chipset = i;
+                       goto match;
+               }
+
+       /* if unknown chip, assume array element #0, original RTL-8139 in this case */
+       printk (KERN_DEBUG PFX "%s: unknown chip version, assuming RTL-8139\n",
+               pdev->slot_name);
+       printk (KERN_DEBUG PFX "%s: TxConfig = 0x%lx\n", pdev->slot_name, RTL_R32 (TxConfig));
+       tp->chipset = 0;
+
+match:
+       DPRINTK ("chipset id (%d) == index %d, '%s'\n",
+               tmp,
+               tp->chipset,
+               rtl_chip_info[tp->chipset].name);
+
+       if (tp->chipset >= CH_8139B) {
+               u8 new_tmp8 = tmp8 = RTL_R8 (Config1);
+               DPRINTK("PCI PM wakeup\n");
+               if ((rtl_chip_info[tp->chipset].flags & HasLWake) &&
+                   (tmp8 & LWAKE))
+                       new_tmp8 &= ~LWAKE;
+               new_tmp8 |= Cfg1_PM_Enable;
+               if (new_tmp8 != tmp8) {
+                       RTL_W8 (Cfg9346, Cfg9346_Unlock);
+                       RTL_W8 (Config1, tmp8);
+                       RTL_W8 (Cfg9346, Cfg9346_Lock);
+               }
+               if (rtl_chip_info[tp->chipset].flags & HasLWake) {
+                       tmp8 = RTL_R8 (Config4);
+                       if (tmp8 & LWPTN)
+                               RTL_W8 (Config4, tmp8 & ~LWPTN);
+               }
+       } else {
+               DPRINTK("Old chip wakeup\n");
+               tmp8 = RTL_R8 (Config1);
+               tmp8 &= ~(SLEEP | PWRDN);
+               RTL_W8 (Config1, tmp8);
+       }
+
+       rtl8139_chip_reset (ioaddr);
+
+       *dev_out = dev;
+       return 0;
+
+err_out:
+       __rtl8139_cleanup_dev (dev);
+       return rc;
+}
+
+
+static int __devinit rtl8139_init_one (struct pci_dev *pdev,
+                                      const struct pci_device_id *ent)
+{
+       struct net_device *dev = NULL;
+       struct rtl8139_private *tp;
+       int i, addr_len, option;
+       void *ioaddr;
+       static int board_idx = -1;
+       u8 pci_rev;
+
+       assert (pdev != NULL);
+       assert (ent != NULL);
+
+       board_idx++;
+
+       /* when we're built into the kernel, the driver version message
+        * is only printed if at least one 8139 board has been found
+        */
+#ifndef MODULE
+       {
+               static int printed_version;
+               if (!printed_version++)
+                       printk (KERN_INFO RTL8139_DRIVER_NAME "\n");
+       }
+#endif
+
+       pci_read_config_byte(pdev, PCI_REVISION_ID, &pci_rev);
+
+       if (pdev->vendor == PCI_VENDOR_ID_REALTEK &&
+           pdev->device == PCI_DEVICE_ID_REALTEK_8139 && pci_rev >= 0x20) {
+               printk(KERN_INFO PFX "pci dev %s (id %04x:%04x rev %02x) is an enhanced 8139C+ chip\n",
+                      pdev->slot_name, pdev->vendor, pdev->device, pci_rev);
+               printk(KERN_INFO PFX "Ensure the \"8139cp\" driver is installed!\n");
+                return -ENODEV; /* force use of better driver */
+       }
+
+       i = rtl8139_init_board (pdev, &dev);
+       if (i < 0)
+               return i;
+
+       tp = dev->priv;
+       ioaddr = tp->mmio_addr;
+
+       assert (ioaddr != NULL);
+       assert (dev != NULL);
+       assert (tp != NULL);
+
+       addr_len = read_eeprom (ioaddr, 0, 8) == 0x8129 ? 8 : 6;
+       for (i = 0; i < 3; i++)
+               ((u16 *) (dev->dev_addr))[i] =
+                   le16_to_cpu (read_eeprom (ioaddr, i + 7, addr_len));
+
+       /* The Rtl8139-specific entries in the device structure. */
+       dev->open = rtl8139_open;
+       dev->hard_start_xmit = rtl8139_start_xmit;
+       dev->stop = rtl8139_close;
+       dev->get_stats = rtl8139_get_stats;
+       dev->set_multicast_list = rtl8139_set_rx_mode;
+       dev->do_ioctl = netdev_ioctl;
+       dev->tx_timeout = rtl8139_tx_timeout;
+       dev->watchdog_timeo = TX_TIMEOUT;
+       dev->features |= NETIF_F_SG;
+
+       dev->irq = pdev->irq;
+
+       /* dev->priv/tp zeroed and aligned in init_etherdev */
+       tp = dev->priv;
+
+       /* note: tp->chipset set in rtl8139_init_board */
+       tp->drv_flags = board_info[ent->driver_data].hw_flags;
+       tp->mmio_addr = ioaddr;
+       spin_lock_init (&tp->lock);
+
+       /* dev is fully set up and ready to use now */
+       DPRINTK("about to register device named %s (%p)...\n", dev->name, dev);
+       i = register_netdev (dev);
+       if (i) goto err_out;
+
+       pci_set_drvdata (pdev, dev);
+
+       printk (KERN_INFO "%s: %s at 0x%lx, "
+               "%2.2x:%2.2x:%2.2x:%2.2x:%2.2x:%2.2x, "
+               "IRQ %d\n",
+               dev->name,
+               board_info[ent->driver_data].name,
+               dev->base_addr,
+               dev->dev_addr[0], dev->dev_addr[1],
+               dev->dev_addr[2], dev->dev_addr[3],
+               dev->dev_addr[4], dev->dev_addr[5],
+               dev->irq);
+
+       printk (KERN_DEBUG "%s:  Identified 8139 chip type '%s'\n",
+               dev->name, rtl_chip_info[tp->chipset].name);
+
+       /* Find the connected MII xcvrs.
+          Doing this in open() would allow detecting external xcvrs later, but
+          takes too much time. */
+#ifdef CONFIG_8139TOO_8129
+       if (tp->drv_flags & HAS_MII_XCVR) {
+               int phy, phy_idx = 0;
+               for (phy = 0; phy < 32 && phy_idx < sizeof(tp->phys); phy++) {
+                       int mii_status = mdio_read(dev, phy, 1);
+                       if (mii_status != 0xffff  &&  mii_status != 0x0000) {
+                               u16 advertising = mdio_read(dev, phy, 4);
+                               tp->phys[phy_idx++] = phy;
+                               printk(KERN_INFO "%s: MII transceiver %d status 0x%4.4x "
+                                          "advertising %4.4x.\n",
+                                          dev->name, phy, mii_status, advertising);
+                       }
+               }
+               if (phy_idx == 0) {
+                       printk(KERN_INFO "%s: No MII transceivers found!  Assuming SYM "
+                                  "transceiver.\n",
+                                  dev->name);
+                       tp->phys[0] = 32;
+               }
+       } else
+#endif
+               tp->phys[0] = 32;
+
+       /* The lower four bits are the media type. */
+       option = (board_idx >= MAX_UNITS) ? 0 : media[board_idx];
+       if (option > 0) {
+               tp->full_duplex = (option & 0x210) ? 1 : 0;
+               tp->default_port = option & 0xFF;
+               if (tp->default_port)
+                       tp->medialock = 1;
+       }
+       if (board_idx < MAX_UNITS  &&  full_duplex[board_idx] > 0)
+               tp->full_duplex = full_duplex[board_idx];
+       if (tp->full_duplex) {
+               printk(KERN_INFO "%s: Media type forced to Full Duplex.\n", dev->name);
+               /* Changing the MII-advertised media because might prevent
+                  re-connection. */
+               tp->duplex_lock = 1;
+       }
+       if (tp->default_port) {
+               printk(KERN_INFO "  Forcing %dMbps %s-duplex operation.\n",
+                          (option & 0x20 ? 100 : 10),
+                          (option & 0x10 ? "full" : "half"));
+               mdio_write(dev, tp->phys[0], 0,
+                                  ((option & 0x20) ? 0x2000 : 0) |     /* 100Mbps? */
+                                  ((option & 0x10) ? 0x0100 : 0)); /* Full duplex? */
+       }
+
+       /* Put the chip into low-power mode. */
+       if (rtl_chip_info[tp->chipset].flags & HasHltClk)
+               RTL_W8 (HltClk, 'H');   /* 'R' would leave the clock running. */
+
+       return 0;
+
+err_out:
+       __rtl8139_cleanup_dev (dev);
+       return i;
+}
+
+
+static void __devexit rtl8139_remove_one (struct pci_dev *pdev)
+{
+       struct net_device *dev = pci_get_drvdata (pdev);
+       struct rtl8139_private *np;
+
+       assert (dev != NULL);
+       np = dev->priv;
+       assert (np != NULL);
+
+       unregister_netdev (dev);
+
+       __rtl8139_cleanup_dev (dev);
+}
+
+
+/* Serial EEPROM section. */
+
+/*  EEPROM_Ctrl bits. */
+#define EE_SHIFT_CLK   0x04    /* EEPROM shift clock. */
+#define EE_CS                  0x08    /* EEPROM chip select. */
+#define EE_DATA_WRITE  0x02    /* EEPROM chip data in. */
+#define EE_WRITE_0             0x00
+#define EE_WRITE_1             0x02
+#define EE_DATA_READ   0x01    /* EEPROM chip data out. */
+#define EE_ENB                 (0x80 | EE_CS)
+
+/* Delay between EEPROM clock transitions.
+   No extra delay is needed with 33Mhz PCI, but 66Mhz may change this.
+ */
+
+#define eeprom_delay() readl(ee_addr)
+
+/* The EEPROM commands include the alway-set leading bit. */
+#define EE_WRITE_CMD   (5)
+#define EE_READ_CMD            (6)
+#define EE_ERASE_CMD   (7)
+
+static int __devinit read_eeprom (void *ioaddr, int location, int addr_len)
+{
+       int i;
+       unsigned retval = 0;
+       void *ee_addr = ioaddr + Cfg9346;
+       int read_cmd = location | (EE_READ_CMD << addr_len);
+
+       writeb (EE_ENB & ~EE_CS, ee_addr);
+       writeb (EE_ENB, ee_addr);
+       eeprom_delay ();
+
+       /* Shift the read command bits out. */
+       for (i = 4 + addr_len; i >= 0; i--) {
+               int dataval = (read_cmd & (1 << i)) ? EE_DATA_WRITE : 0;
+               writeb (EE_ENB | dataval, ee_addr);
+               eeprom_delay ();
+               writeb (EE_ENB | dataval | EE_SHIFT_CLK, ee_addr);
+               eeprom_delay ();
+       }
+       writeb (EE_ENB, ee_addr);
+       eeprom_delay ();
+
+       for (i = 16; i > 0; i--) {
+               writeb (EE_ENB | EE_SHIFT_CLK, ee_addr);
+               eeprom_delay ();
+               retval =
+                   (retval << 1) | ((readb (ee_addr) & EE_DATA_READ) ? 1 :
+                                    0);
+               writeb (EE_ENB, ee_addr);
+               eeprom_delay ();
+       }
+
+       /* Terminate the EEPROM access. */
+       writeb (~EE_CS, ee_addr);
+       eeprom_delay ();
+
+       return retval;
+}
+
+/* MII serial management: mostly bogus for now. */
+/* Read and write the MII management registers using software-generated
+   serial MDIO protocol.
+   The maximum data clock rate is 2.5 Mhz.  The minimum timing is usually
+   met by back-to-back PCI I/O cycles, but we insert a delay to avoid
+   "overclocking" issues. */
+#define MDIO_DIR               0x80
+#define MDIO_DATA_OUT  0x04
+#define MDIO_DATA_IN   0x02
+#define MDIO_CLK               0x01
+#define MDIO_WRITE0 (MDIO_DIR)
+#define MDIO_WRITE1 (MDIO_DIR | MDIO_DATA_OUT)
+
+#define mdio_delay(mdio_addr)  readb(mdio_addr)
+
+
+static char mii_2_8139_map[8] = {
+       BasicModeCtrl,
+       BasicModeStatus,
+       0,
+       0,
+       NWayAdvert,
+       NWayLPAR,
+       NWayExpansion,
+       0
+};
+
+
+#ifdef CONFIG_8139TOO_8129
+/* Syncronize the MII management interface by shifting 32 one bits out. */
+static void mdio_sync (void *mdio_addr)
+{
+       int i;
+
+       for (i = 32; i >= 0; i--) {
+               writeb (MDIO_WRITE1, mdio_addr);
+               mdio_delay (mdio_addr);
+               writeb (MDIO_WRITE1 | MDIO_CLK, mdio_addr);
+               mdio_delay (mdio_addr);
+       }
+}
+#endif
+
+static int mdio_read (struct net_device *dev, int phy_id, int location)
+{
+       struct rtl8139_private *tp = dev->priv;
+       int retval = 0;
+#ifdef CONFIG_8139TOO_8129
+       void *mdio_addr = tp->mmio_addr + Config4;
+       int mii_cmd = (0xf6 << 10) | (phy_id << 5) | location;
+       int i;
+#endif
+
+       if (phy_id > 31) {      /* Really a 8139.  Use internal registers. */
+               return location < 8 && mii_2_8139_map[location] ?
+                   readw (tp->mmio_addr + mii_2_8139_map[location]) : 0;
+       }
+
+#ifdef CONFIG_8139TOO_8129
+       mdio_sync (mdio_addr);
+       /* Shift the read command bits out. */
+       for (i = 15; i >= 0; i--) {
+               int dataval = (mii_cmd & (1 << i)) ? MDIO_DATA_OUT : 0;
+
+               writeb (MDIO_DIR | dataval, mdio_addr);
+               mdio_delay (mdio_addr);
+               writeb (MDIO_DIR | dataval | MDIO_CLK, mdio_addr);
+               mdio_delay (mdio_addr);
+       }
+
+       /* Read the two transition, 16 data, and wire-idle bits. */
+       for (i = 19; i > 0; i--) {
+               writeb (0, mdio_addr);
+               mdio_delay (mdio_addr);
+               retval = (retval << 1) | ((readb (mdio_addr) & MDIO_DATA_IN) ? 1 : 0);
+               writeb (MDIO_CLK, mdio_addr);
+               mdio_delay (mdio_addr);
+       }
+#endif
+
+       return (retval >> 1) & 0xffff;
+}
+
+
+static void mdio_write (struct net_device *dev, int phy_id, int location,
+                       int value)
+{
+       struct rtl8139_private *tp = dev->priv;
+#ifdef CONFIG_8139TOO_8129
+       void *mdio_addr = tp->mmio_addr + Config4;
+       int mii_cmd = (0x5002 << 16) | (phy_id << 23) | (location << 18) | value;
+       int i;
+#endif
+
+       if (phy_id > 31) {      /* Really a 8139.  Use internal registers. */
+               void *ioaddr = tp->mmio_addr;
+               if (location == 0) {
+                       RTL_W8 (Cfg9346, Cfg9346_Unlock);
+                       RTL_W16 (BasicModeCtrl, value);
+                       RTL_W8 (Cfg9346, Cfg9346_Lock);
+               } else if (location < 8 && mii_2_8139_map[location])
+                       RTL_W16 (mii_2_8139_map[location], value);
+               return;
+       }
+
+#ifdef CONFIG_8139TOO_8129
+       mdio_sync (mdio_addr);
+
+       /* Shift the command bits out. */
+       for (i = 31; i >= 0; i--) {
+               int dataval =
+                   (mii_cmd & (1 << i)) ? MDIO_WRITE1 : MDIO_WRITE0;
+               writeb (dataval, mdio_addr);
+               mdio_delay (mdio_addr);
+               writeb (dataval | MDIO_CLK, mdio_addr);
+               mdio_delay (mdio_addr);
+       }
+       /* Clear out extra bits. */
+       for (i = 2; i > 0; i--) {
+               writeb (0, mdio_addr);
+               mdio_delay (mdio_addr);
+               writeb (MDIO_CLK, mdio_addr);
+               mdio_delay (mdio_addr);
+       }
+#endif
+}
+
+
+static int rtl8139_open (struct net_device *dev)
+{
+       struct rtl8139_private *tp = dev->priv;
+       int retval;
+#ifdef RTL8139_DEBUG
+       void *ioaddr = tp->mmio_addr;
+#endif
+
+       retval = request_irq (dev->irq, rtl8139_interrupt, SA_SHIRQ, dev->name, dev);
+       if (retval)
+               return retval;
+
+       tp->tx_bufs = pci_alloc_consistent(tp->pci_dev, TX_BUF_TOT_LEN,
+                                          &tp->tx_bufs_dma);
+       tp->rx_ring = pci_alloc_consistent(tp->pci_dev, RX_BUF_TOT_LEN,
+                                          &tp->rx_ring_dma);
+       if (tp->tx_bufs == NULL || tp->rx_ring == NULL) {
+               free_irq(dev->irq, dev);
+
+               if (tp->tx_bufs)
+                       pci_free_consistent(tp->pci_dev, TX_BUF_TOT_LEN,
+                                           tp->tx_bufs, tp->tx_bufs_dma);
+               if (tp->rx_ring)
+                       pci_free_consistent(tp->pci_dev, RX_BUF_TOT_LEN,
+                                           tp->rx_ring, tp->rx_ring_dma);
+
+               return -ENOMEM;
+
+       }
+
+       tp->full_duplex = tp->duplex_lock;
+       tp->tx_flag = (TX_FIFO_THRESH << 11) & 0x003f0000;
+       tp->twistie = 1;
+       tp->time_to_die = 0;
+
+       rtl8139_init_ring (dev);
+       rtl8139_hw_start (dev);
+
+       DPRINTK ("%s: rtl8139_open() ioaddr %#lx IRQ %d"
+                       " GP Pins %2.2x %s-duplex.\n",
+                       dev->name, pci_resource_start (tp->pci_dev, 1),
+                       dev->irq, RTL_R8 (MediaStatus),
+                       tp->full_duplex ? "full" : "half");
+
+       return 0;
+}
+
+
+static void rtl_check_media (struct net_device *dev)
+{
+       struct rtl8139_private *tp = dev->priv;
+
+       if (tp->phys[0] >= 0) {
+               u16 mii_reg5 = mdio_read(dev, tp->phys[0], 5);
+               if (mii_reg5 == 0xffff)
+                       ;                                       /* Not there */
+               else if ((mii_reg5 & 0x0100) == 0x0100
+                                || (mii_reg5 & 0x00C0) == 0x0040)
+                       tp->full_duplex = 1;
+
+               printk (KERN_INFO"%s: Setting %s%s-duplex based on"
+                               " auto-negotiated partner ability %4.4x.\n",
+                       dev->name, mii_reg5 == 0 ? "" :
+                               (mii_reg5 & 0x0180) ? "100mbps " : "10mbps ",
+                       tp->full_duplex ? "full" : "half", mii_reg5);
+       }
+}
+
+/* Start the hardware at open or resume. */
+static void rtl8139_hw_start (struct net_device *dev)
+{
+       struct rtl8139_private *tp = dev->priv;
+       void *ioaddr = tp->mmio_addr;
+       u32 i;
+       u8 tmp;
+
+       /* Bring old chips out of low-power mode. */
+       if (rtl_chip_info[tp->chipset].flags & HasHltClk)
+               RTL_W8 (HltClk, 'R');
+
+       rtl8139_chip_reset (ioaddr);
+
+       /* unlock Config[01234] and BMCR register writes */
+       RTL_W8_F (Cfg9346, Cfg9346_Unlock);
+       /* Restore our idea of the MAC address. */
+       RTL_W32_F (MAC0 + 0, cpu_to_le32 (*(u32 *) (dev->dev_addr + 0)));
+       RTL_W32_F (MAC0 + 4, cpu_to_le32 (*(u32 *) (dev->dev_addr + 4)));
+
+       /* Must enable Tx/Rx before setting transfer thresholds! */
+       RTL_W8 (ChipCmd, CmdRxEnb | CmdTxEnb);
+
+       tp->rx_config = rtl8139_rx_config | AcceptBroadcast | AcceptMyPhys;
+       RTL_W32 (RxConfig, tp->rx_config);
+
+       /* Check this value: the documentation for IFG contradicts ifself. */
+       RTL_W32 (TxConfig, rtl8139_tx_config);
+
+       tp->cur_rx = 0;
+
+       rtl_check_media (dev);
+
+       if (tp->chipset >= CH_8139B) {
+               /* Disable magic packet scanning, which is enabled
+                * when PM is enabled in Config1.  It can be reenabled
+                * via ETHTOOL_SWOL if desired.  */
+               RTL_W8 (Config3, RTL_R8 (Config3) & ~Cfg3_Magic);
+       }
+
+       DPRINTK("init buffer addresses\n");
+
+       /* Lock Config[01234] and BMCR register writes */
+       RTL_W8 (Cfg9346, Cfg9346_Lock);
+
+       /* init Rx ring buffer DMA address */
+       RTL_W32_F (RxBuf, tp->rx_ring_dma);
+
+       /* init Tx buffer DMA addresses */
+       for (i = 0; i < NUM_TX_DESC; i++)
+               RTL_W32_F (TxAddr0 + (i * 4), tp->tx_bufs_dma + (tp->tx_buf[i] - tp->tx_bufs));
+
+       RTL_W32 (RxMissed, 0);
+
+       rtl8139_set_rx_mode (dev);
+
+       /* no early-rx interrupts */
+       RTL_W16 (MultiIntr, RTL_R16 (MultiIntr) & MultiIntrClear);
+
+       /* make sure RxTx has started */
+       tmp = RTL_R8 (ChipCmd);
+       if ((!(tmp & CmdRxEnb)) || (!(tmp & CmdTxEnb)))
+               RTL_W8 (ChipCmd, CmdRxEnb | CmdTxEnb);
+
+       /* Enable all known interrupts by setting the interrupt mask. */
+       RTL_W16 (IntrMask, rtl8139_intr_mask);
+
+       netif_start_queue (dev);
+}
+
+
+/* Initialize the Rx and Tx rings, along with various 'dev' bits. */
+static void rtl8139_init_ring (struct net_device *dev)
+{
+       struct rtl8139_private *tp = dev->priv;
+       int i;
+
+       tp->cur_rx = 0;
+       tp->cur_tx = 0;
+       tp->dirty_tx = 0;
+
+       for (i = 0; i < NUM_TX_DESC; i++)
+               tp->tx_buf[i] = &tp->tx_bufs[i * TX_BUF_SIZE];
+}
+
+
+static void rtl8139_tx_clear (struct rtl8139_private *tp)
+{
+       tp->cur_tx = 0;
+       tp->dirty_tx = 0;
+
+       /* XXX account for unsent Tx packets in tp->stats.tx_dropped */
+}
+
+
+static void rtl8139_tx_timeout (struct net_device *dev)
+{
+       struct rtl8139_private *tp = dev->priv;
+       void *ioaddr = tp->mmio_addr;
+       int i;
+       u8 tmp8;
+       unsigned long flags;
+
+       DPRINTK ("%s: Transmit timeout, status %2.2x %4.4x "
+                "media %2.2x.\n", dev->name,
+                RTL_R8 (ChipCmd),
+                RTL_R16 (IntrStatus),
+                RTL_R8 (MediaStatus));
+
+       tp->xstats.tx_timeouts++;
+
+       /* disable Tx ASAP, if not already */
+       tmp8 = RTL_R8 (ChipCmd);
+       if (tmp8 & CmdTxEnb)
+               RTL_W8 (ChipCmd, CmdRxEnb);
+
+       /* Disable interrupts by clearing the interrupt mask. */
+       RTL_W16 (IntrMask, 0x0000);
+
+       /* Emit info to figure out what went wrong. */
+       printk (KERN_DEBUG "%s: Tx queue start entry %ld  dirty entry %ld.\n",
+               dev->name, tp->cur_tx, tp->dirty_tx);
+       for (i = 0; i < NUM_TX_DESC; i++)
+               printk (KERN_DEBUG "%s:  Tx descriptor %d is %8.8lx.%s\n",
+                       dev->name, i, RTL_R32 (TxStatus0 + (i * 4)),
+                       i == tp->dirty_tx % NUM_TX_DESC ?
+                               " (queue head)" : "");
+
+       /* Stop a shared interrupt from scavenging while we are. */
+       spin_lock_irqsave (&tp->lock, flags);
+       rtl8139_tx_clear (tp);
+       spin_unlock_irqrestore (&tp->lock, flags);
+
+       /* ...and finally, reset everything */
+       rtl8139_hw_start (dev);
+
+       netif_wake_queue (dev);
+}
+
+
+static int rtl8139_start_xmit (struct sk_buff *skb, struct net_device *dev)
+{
+       struct rtl8139_private *tp = dev->priv;
+       void *ioaddr = tp->mmio_addr;
+       unsigned int entry;
+
+       /* Calculate the next Tx descriptor entry. */
+       entry = tp->cur_tx % NUM_TX_DESC;
+
+       if (likely(skb->len < TX_BUF_SIZE)) {
+               skb_copy_bits(skb, 0, tp->tx_buf[entry], skb->len);
+               dev_kfree_skb(skb);
+       } else {
+               dev_kfree_skb(skb);
+               tp->stats.tx_dropped++;
+               return 0;
+       }
+
+       /* Note: the chip doesn't have auto-pad! */
+       spin_lock_irq(&tp->lock);
+       RTL_W32_F (TxStatus0 + (entry * sizeof (u32)),
+                  tp->tx_flag | (skb->len >= ETH_ZLEN ? skb->len : ETH_ZLEN));
+
+       dev->trans_start = jiffies;
+
+       tp->cur_tx++;
+       wmb();
+
+       if ((tp->cur_tx - NUM_TX_DESC) == tp->dirty_tx)
+               netif_stop_queue (dev);
+       spin_unlock_irq(&tp->lock);
+
+       DPRINTK ("%s: Queued Tx packet at %p size %u to slot %d.\n",
+                dev->name, skb->data, skb->len, entry);
+
+       return 0;
+}
+
+
+static void rtl8139_tx_interrupt (struct net_device *dev,
+                                 struct rtl8139_private *tp,
+                                 void *ioaddr)
+{
+       unsigned long dirty_tx, tx_left;
+
+       assert (dev != NULL);
+       assert (tp != NULL);
+       assert (ioaddr != NULL);
+
+       dirty_tx = tp->dirty_tx;
+       tx_left = tp->cur_tx - dirty_tx;
+       while (tx_left > 0) {
+               int entry = dirty_tx % NUM_TX_DESC;
+               int txstatus;
+
+               txstatus = RTL_R32 (TxStatus0 + (entry * sizeof (u32)));
+
+               if (!(txstatus & (TxStatOK | TxUnderrun | TxAborted)))
+                       break;  /* It still hasn't been Txed */
+
+               /* Note: TxCarrierLost is always asserted at 100mbps. */
+               if (txstatus & (TxOutOfWindow | TxAborted)) {
+                       /* There was an major error, log it. */
+                       DPRINTK ("%s: Transmit error, Tx status %8.8x.\n",
+                                dev->name, txstatus);
+                       tp->stats.tx_errors++;
+                       if (txstatus & TxAborted) {
+                               tp->stats.tx_aborted_errors++;
+                               RTL_W32 (TxConfig, TxClearAbt);
+                               RTL_W16 (IntrStatus, TxErr);
+                               wmb();
+                       }
+                       if (txstatus & TxCarrierLost)
+                               tp->stats.tx_carrier_errors++;
+                       if (txstatus & TxOutOfWindow)
+                               tp->stats.tx_window_errors++;
+#ifdef ETHER_STATS
+                       if ((txstatus & 0x0f000000) == 0x0f000000)
+                               tp->stats.collisions16++;
+#endif
+               } else {
+                       if (txstatus & TxUnderrun) {
+                               /* Add 64 to the Tx FIFO threshold. */
+                               if (tp->tx_flag < 0x00300000)
+                                       tp->tx_flag += 0x00020000;
+                               tp->stats.tx_fifo_errors++;
+                       }
+                       tp->stats.collisions += (txstatus >> 24) & 15;
+                       tp->stats.tx_bytes += txstatus & 0x7ff;
+                       tp->stats.tx_packets++;
+               }
+
+               dirty_tx++;
+               tx_left--;
+       }
+
+#ifndef RTL8139_NDEBUG
+       if (tp->cur_tx - dirty_tx > NUM_TX_DESC) {
+               printk (KERN_ERR "%s: Out-of-sync dirty pointer, %ld vs. %ld.\n",
+                       dev->name, dirty_tx, tp->cur_tx);
+               dirty_tx += NUM_TX_DESC;
+       }
+#endif /* RTL8139_NDEBUG */
+
+       /* only wake the queue if we did work, and the queue is stopped */
+       if (tp->dirty_tx != dirty_tx) {
+               tp->dirty_tx = dirty_tx;
+               mb();
+               if (netif_queue_stopped (dev))
+                       netif_wake_queue (dev);
+       }
+}
+
+
+/* TODO: clean this up!  Rx reset need not be this intensive */
+static void rtl8139_rx_err (u32 rx_status, struct net_device *dev,
+                           struct rtl8139_private *tp, void *ioaddr)
+{
+       u8 tmp8;
+       int tmp_work;
+
+       DPRINTK ("%s: Ethernet frame had errors, status %8.8x.\n",
+                dev->name, rx_status);
+       if (rx_status & RxTooLong) {
+               DPRINTK ("%s: Oversized Ethernet frame, status %4.4x!\n",
+                        dev->name, rx_status);
+               /* A.C.: The chip hangs here. */
+       }
+       tp->stats.rx_errors++;
+       if (rx_status & (RxBadSymbol | RxBadAlign))
+               tp->stats.rx_frame_errors++;
+       if (rx_status & (RxRunt | RxTooLong))
+               tp->stats.rx_length_errors++;
+       if (rx_status & RxCRCErr)
+               tp->stats.rx_crc_errors++;
+
+       /* Reset the receiver, based on RealTek recommendation. (Bug?) */
+
+       /* disable receive */
+       RTL_W8_F (ChipCmd, CmdTxEnb);
+       tmp_work = 200;
+       while (--tmp_work > 0) {
+               udelay(1);
+               tmp8 = RTL_R8 (ChipCmd);
+               if (!(tmp8 & CmdRxEnb))
+                       break;
+       }
+       if (tmp_work <= 0)
+               printk (KERN_WARNING PFX "rx stop wait too long\n");
+       /* restart receive */
+       tmp_work = 200;
+       while (--tmp_work > 0) {
+               RTL_W8_F (ChipCmd, CmdRxEnb | CmdTxEnb);
+               udelay(1);
+               tmp8 = RTL_R8 (ChipCmd);
+               if ((tmp8 & CmdRxEnb) && (tmp8 & CmdTxEnb))
+                       break;
+       }
+       if (tmp_work <= 0)
+               printk (KERN_WARNING PFX "tx/rx enable wait too long\n");
+
+       /* and reinitialize all rx related registers */
+       RTL_W8_F (Cfg9346, Cfg9346_Unlock);
+       /* Must enable Tx/Rx before setting transfer thresholds! */
+       RTL_W8 (ChipCmd, CmdRxEnb | CmdTxEnb);
+
+       tp->rx_config = rtl8139_rx_config | AcceptBroadcast | AcceptMyPhys;
+       RTL_W32 (RxConfig, tp->rx_config);
+       tp->cur_rx = 0;
+
+       DPRINTK("init buffer addresses\n");
+
+       /* Lock Config[01234] and BMCR register writes */
+       RTL_W8 (Cfg9346, Cfg9346_Lock);
+
+       /* init Rx ring buffer DMA address */
+       RTL_W32_F (RxBuf, tp->rx_ring_dma);
+
+       /* A.C.: Reset the multicast list. */
+       __set_rx_mode (dev);
+}
+
+static void rtl8139_rx_interrupt (struct net_device *dev,
+                                 struct rtl8139_private *tp, void *ioaddr)
+{
+       unsigned char *rx_ring;
+       u16 cur_rx;
+
+       assert (dev != NULL);
+       assert (tp != NULL);
+       assert (ioaddr != NULL);
+
+       rx_ring = tp->rx_ring;
+       cur_rx = tp->cur_rx;
+
+       DPRINTK ("%s: In rtl8139_rx(), current %4.4x BufAddr %4.4x,"
+                " free to %4.4x, Cmd %2.2x.\n", dev->name, cur_rx,
+                RTL_R16 (RxBufAddr),
+                RTL_R16 (RxBufPtr), RTL_R8 (ChipCmd));
+
+       while ((RTL_R8 (ChipCmd) & RxBufEmpty) == 0) {
+               int ring_offset = cur_rx % RX_BUF_LEN;
+               u32 rx_status;
+               unsigned int rx_size;
+               unsigned int pkt_size;
+               struct sk_buff *skb;
+
+               rmb();
+
+               /* read size+status of next frame from DMA ring buffer */
+               rx_status = le32_to_cpu (*(u32 *) (rx_ring + ring_offset));
+               rx_size = rx_status >> 16;
+               pkt_size = rx_size - 4;
+
+               DPRINTK ("%s:  rtl8139_rx() status %4.4x, size %4.4x,"
+                        " cur %4.4x.\n", dev->name, rx_status,
+                        rx_size, cur_rx);
+#if RTL8139_DEBUG > 2
+               {
+                       int i;
+                       DPRINTK ("%s: Frame contents ", dev->name);
+                       for (i = 0; i < 70; i++)
+                               printk (" %2.2x",
+                                       rx_ring[ring_offset + i]);
+                       printk (".\n");
+               }
+#endif
+
+               /* Packet copy from FIFO still in progress.
+                * Theoretically, this should never happen
+                * since EarlyRx is disabled.
+                */
+               if (rx_size == 0xfff0) {
+                       tp->xstats.early_rx++;
+                       break;
+               }
+
+               /* If Rx err or invalid rx_size/rx_status received
+                * (which happens if we get lost in the ring),
+                * Rx process gets reset, so we abort any further
+                * Rx processing.
+                */
+               if ((rx_size > (MAX_ETH_FRAME_SIZE+4)) ||
+                   (rx_size < 8) ||
+                   (!(rx_status & RxStatusOK))) {
+                       rtl8139_rx_err (rx_status, dev, tp, ioaddr);
+                       return;
+               }
+
+               /* Malloc up new buffer, compatible with net-2e. */
+               /* Omit the four octet CRC from the length. */
+
+               /* TODO: consider allocating skb's outside of
+                * interrupt context, both to speed interrupt processing,
+                * and also to reduce the chances of having to
+                * drop packets here under memory pressure.
+                */
+
+               skb = dev_alloc_skb (pkt_size + 2);
+               if (skb) {
+                       skb->dev = dev;
+                       skb_reserve (skb, 2);   /* 16 byte align the IP fields. */
+
+                       eth_copy_and_sum (skb, &rx_ring[ring_offset + 4], pkt_size, 0);
+                       skb_put (skb, pkt_size);
+
+                       skb->protocol = eth_type_trans (skb, dev);
+                       netif_rx (skb);
+                       dev->last_rx = jiffies;
+                       tp->stats.rx_bytes += pkt_size;
+                       tp->stats.rx_packets++;
+               } else {
+                       printk (KERN_WARNING
+                               "%s: Memory squeeze, dropping packet.\n",
+                               dev->name);
+                       tp->stats.rx_dropped++;
+               }
+
+               cur_rx = (cur_rx + rx_size + 4 + 3) & ~3;
+               RTL_W16 (RxBufPtr, cur_rx - 16);
+
+               if (RTL_R16 (IntrStatus) & RxAckBits)
+                       RTL_W16_F (IntrStatus, RxAckBits);
+       }
+
+       DPRINTK ("%s: Done rtl8139_rx(), current %4.4x BufAddr %4.4x,"
+                " free to %4.4x, Cmd %2.2x.\n", dev->name, cur_rx,
+                RTL_R16 (RxBufAddr),
+                RTL_R16 (RxBufPtr), RTL_R8 (ChipCmd));
+
+       tp->cur_rx = cur_rx;
+}
+
+
+static void rtl8139_weird_interrupt (struct net_device *dev,
+                                    struct rtl8139_private *tp,
+                                    void *ioaddr,
+                                    int status, int link_changed)
+{
+       DPRINTK ("%s: Abnormal interrupt, status %8.8x.\n",
+                dev->name, status);
+
+       assert (dev != NULL);
+       assert (tp != NULL);
+       assert (ioaddr != NULL);
+
+       /* Update the error count. */
+       tp->stats.rx_missed_errors += RTL_R32 (RxMissed);
+       RTL_W32 (RxMissed, 0);
+
+       if ((status & RxUnderrun) && link_changed &&
+           (tp->drv_flags & HAS_LNK_CHNG)) {
+               /* Really link-change on new chips. */
+               int lpar = RTL_R16 (NWayLPAR);
+               int duplex = (lpar & 0x0100) || (lpar & 0x01C0) == 0x0040
+                               || tp->duplex_lock;
+               if (tp->full_duplex != duplex) {
+                       tp->full_duplex = duplex;
+#if 0
+                       RTL_W8 (Cfg9346, Cfg9346_Unlock);
+                       RTL_W8 (Config1, tp->full_duplex ? 0x60 : 0x20);
+                       RTL_W8 (Cfg9346, Cfg9346_Lock);
+#endif
+               }
+               status &= ~RxUnderrun;
+       }
+
+       /* XXX along with rtl8139_rx_err, are we double-counting errors? */
+       if (status &
+           (RxUnderrun | RxOverflow | RxErr | RxFIFOOver))
+               tp->stats.rx_errors++;
+
+       if (status & PCSTimeout)
+               tp->stats.rx_length_errors++;
+       if (status & (RxUnderrun | RxFIFOOver))
+               tp->stats.rx_fifo_errors++;
+       if (status & PCIErr) {
+               u16 pci_cmd_status;
+               pci_read_config_word (tp->pci_dev, PCI_STATUS, &pci_cmd_status);
+               pci_write_config_word (tp->pci_dev, PCI_STATUS, pci_cmd_status);
+
+               printk (KERN_ERR "%s: PCI Bus error %4.4x.\n",
+                       dev->name, pci_cmd_status);
+       }
+}
+
+
+/* The interrupt handler does all of the Rx thread work and cleans up
+   after the Tx thread. */
+static void rtl8139_interrupt (int irq, void *dev_instance,
+                              struct pt_regs *regs)
+{
+       struct net_device *dev = (struct net_device *) dev_instance;
+       struct rtl8139_private *tp = dev->priv;
+       int boguscnt = max_interrupt_work;
+       void *ioaddr = tp->mmio_addr;
+       int ackstat, status;
+       int link_changed = 0; /* avoid bogus "uninit" warning */
+
+       spin_lock (&tp->lock);
+
+       do {
+               status = RTL_R16 (IntrStatus);
+
+               /* h/w no longer present (hotplug?) or major error, bail */
+               if (status == 0xFFFF)
+                       break;
+
+               if ((status &
+                    (PCIErr | PCSTimeout | RxUnderrun | RxOverflow |
+                     RxFIFOOver | TxErr | TxOK | RxErr | RxOK)) == 0)
+                       break;
+
+               /* Acknowledge all of the current interrupt sources ASAP, but
+                  an first get an additional status bit from CSCR. */
+               if (status & RxUnderrun)
+                       link_changed = RTL_R16 (CSCR) & CSCR_LinkChangeBit;
+
+               /* The chip takes special action when we clear RxAckBits,
+                * so we clear them later in rtl8139_rx_interrupt
+                */
+               ackstat = status & ~(RxAckBits | TxErr);
+               RTL_W16 (IntrStatus, ackstat);
+
+               DPRINTK ("%s: interrupt  status=%#4.4x ackstat=%#4.4x new intstat=%#4.4x.\n",
+                        dev->name, ackstat, status, RTL_R16 (IntrStatus));
+
+               if (netif_running (dev) && (status & RxAckBits))
+                       rtl8139_rx_interrupt (dev, tp, ioaddr);
+
+               /* Check uncommon events with one test. */
+               if (status & (PCIErr | PCSTimeout | RxUnderrun | RxOverflow |
+                             RxFIFOOver | RxErr))
+                       rtl8139_weird_interrupt (dev, tp, ioaddr,
+                                                status, link_changed);
+
+               if (netif_running (dev) && (status & (TxOK | TxErr))) {
+                       rtl8139_tx_interrupt (dev, tp, ioaddr);
+                       if (status & TxErr)
+                               RTL_W16 (IntrStatus, TxErr);
+               }
+
+               boguscnt--;
+       } while (boguscnt > 0);
+
+       if (boguscnt <= 0) {
+               printk (KERN_WARNING "%s: Too much work at interrupt, "
+                       "IntrStatus=0x%4.4x.\n", dev->name, status);
+
+               /* Clear all interrupt sources. */
+               RTL_W16 (IntrStatus, 0xffff);
+       }
+
+       spin_unlock (&tp->lock);
+
+       DPRINTK ("%s: exiting interrupt, intr_status=%#4.4x.\n",
+                dev->name, RTL_R16 (IntrStatus));
+}
+
+
+static int rtl8139_close (struct net_device *dev)
+{
+       struct rtl8139_private *tp = dev->priv;
+       void *ioaddr = tp->mmio_addr;
+       unsigned long flags;
+
+       netif_stop_queue (dev);
+
+       DPRINTK ("%s: Shutting down ethercard, status was 0x%4.4x.\n",
+                       dev->name, RTL_R16 (IntrStatus));
+
+       spin_lock_irqsave (&tp->lock, flags);
+
+       /* Stop the chip's Tx and Rx DMA processes. */
+       RTL_W8 (ChipCmd, 0);
+
+       /* Disable interrupts by clearing the interrupt mask. */
+       RTL_W16 (IntrMask, 0);
+
+       /* Update the error counts. */
+       tp->stats.rx_missed_errors += RTL_R32 (RxMissed);
+       RTL_W32 (RxMissed, 0);
+
+       spin_unlock_irqrestore (&tp->lock, flags);
+
+       synchronize_irq ();
+       free_irq (dev->irq, dev);
+
+       rtl8139_tx_clear (tp);
+
+       pci_free_consistent(tp->pci_dev, RX_BUF_TOT_LEN,
+                           tp->rx_ring, tp->rx_ring_dma);
+       pci_free_consistent(tp->pci_dev, TX_BUF_TOT_LEN,
+                           tp->tx_bufs, tp->tx_bufs_dma);
+       tp->rx_ring = NULL;
+       tp->tx_bufs = NULL;
+
+       /* Green! Put the chip in low-power mode. */
+       RTL_W8 (Cfg9346, Cfg9346_Unlock);
+
+       if (rtl_chip_info[tp->chipset].flags & HasHltClk)
+               RTL_W8 (HltClk, 'H');   /* 'R' would leave the clock running. */
+
+       return 0;
+}
+
+
+/* Get the ethtool settings.  Assumes that eset points to kernel
+   memory, *eset has been initialized as {ETHTOOL_GSET}, and other
+   threads or interrupts aren't messing with the 8139.  */
+static void netdev_get_eset (struct net_device *dev, struct ethtool_cmd *eset)
+{
+       struct rtl8139_private *np = dev->priv;
+       void *ioaddr = np->mmio_addr;
+       u16 advert;
+
+       eset->supported = SUPPORTED_10baseT_Half
+                       | SUPPORTED_10baseT_Full
+                       | SUPPORTED_100baseT_Half
+                       | SUPPORTED_100baseT_Full
+                       | SUPPORTED_Autoneg
+                       | SUPPORTED_TP;
+
+       eset->advertising = ADVERTISED_TP | ADVERTISED_Autoneg;
+       advert = mdio_read (dev, np->phys[0], 4);
+       if (advert & 0x0020)
+               eset->advertising |= ADVERTISED_10baseT_Half;
+       if (advert & 0x0040)
+               eset->advertising |= ADVERTISED_10baseT_Full;
+       if (advert & 0x0080)
+               eset->advertising |= ADVERTISED_100baseT_Half;
+       if (advert & 0x0100)
+               eset->advertising |= ADVERTISED_100baseT_Full;
+
+       eset->speed = (RTL_R8 (MediaStatus) & 0x08) ? 10 : 100;
+       /* (KON)FIXME: np->full_duplex is set or reset by the thread,
+          which means this always shows half duplex if the interface
+          isn't up yet, even if it has already autonegotiated.  */
+       eset->duplex = np->full_duplex ? DUPLEX_FULL : DUPLEX_HALF;
+       eset->port = PORT_TP;
+       /* (KON)FIXME: Is np->phys[0] correct?  starfire.c uses that.  */
+       eset->phy_address = np->phys[0];
+       eset->transceiver = XCVR_INTERNAL;
+       eset->autoneg = (mdio_read (dev, np->phys[0], 0) & 0x1000) != 0;
+       eset->maxtxpkt = 1;
+       eset->maxrxpkt = 1;
+}
+
+
+/* Get the ethtool Wake-on-LAN settings.  Assumes that wol points to
+   kernel memory, *wol has been initialized as {ETHTOOL_GWOL}, and
+   other threads or interrupts aren't messing with the 8139.  */
+static void netdev_get_wol (struct net_device *dev, struct ethtool_wolinfo *wol)
+{
+       struct rtl8139_private *np = dev->priv;
+       void *ioaddr = np->mmio_addr;
+
+       if (rtl_chip_info[np->chipset].flags & HasLWake) {
+               u8 cfg3 = RTL_R8 (Config3);
+               u8 cfg5 = RTL_R8 (Config5);
+
+               wol->supported = WAKE_PHY | WAKE_MAGIC
+                       | WAKE_UCAST | WAKE_MCAST | WAKE_BCAST;
+
+               wol->wolopts = 0;
+               if (cfg3 & Cfg3_LinkUp)
+                       wol->wolopts |= WAKE_PHY;
+               if (cfg3 & Cfg3_Magic)
+                       wol->wolopts |= WAKE_MAGIC;
+               /* (KON)FIXME: See how netdev_set_wol() handles the
+                  following constants.  */
+               if (cfg5 & Cfg5_UWF)
+                       wol->wolopts |= WAKE_UCAST;
+               if (cfg5 & Cfg5_MWF)
+                       wol->wolopts |= WAKE_MCAST;
+               if (cfg5 & Cfg5_BWF)
+                       wol->wolopts |= WAKE_BCAST;
+       }
+}
+
+
+/* Set the ethtool Wake-on-LAN settings.  Return 0 or -errno.  Assumes
+   that wol points to kernel memory and other threads or interrupts
+   aren't messing with the 8139.  */
+static int netdev_set_wol (struct net_device *dev,
+                          const struct ethtool_wolinfo *wol)
+{
+       struct rtl8139_private *np = dev->priv;
+       void *ioaddr = np->mmio_addr;
+       u32 support;
+       u8 cfg3, cfg5;
+
+       support = ((rtl_chip_info[np->chipset].flags & HasLWake)
+                  ? (WAKE_PHY | WAKE_MAGIC
+                     | WAKE_UCAST | WAKE_MCAST | WAKE_BCAST)
+                  : 0);
+       if (wol->wolopts & ~support)
+               return -EINVAL;
+
+       cfg3 = RTL_R8 (Config3) & ~(Cfg3_LinkUp | Cfg3_Magic);
+       if (wol->wolopts & WAKE_PHY)
+               cfg3 |= Cfg3_LinkUp;
+       if (wol->wolopts & WAKE_MAGIC)
+               cfg3 |= Cfg3_Magic;
+       RTL_W8 (Cfg9346, Cfg9346_Unlock);
+       RTL_W8 (Config3, cfg3);
+       RTL_W8 (Cfg9346, Cfg9346_Lock);
+
+       cfg5 = RTL_R8 (Config5) & ~(Cfg5_UWF | Cfg5_MWF | Cfg5_BWF);
+       /* (KON)FIXME: These are untested.  We may have to set the
+          CRC0, Wakeup0 and LSBCRC0 registers too, but I have no
+          documentation.  */
+       if (wol->wolopts & WAKE_UCAST)
+               cfg5 |= Cfg5_UWF;
+       if (wol->wolopts & WAKE_MCAST)
+               cfg5 |= Cfg5_MWF;
+       if (wol->wolopts & WAKE_BCAST)
+               cfg5 |= Cfg5_BWF;
+       RTL_W8 (Config5, cfg5); /* need not unlock via Cfg9346 */
+
+       return 0;
+}
+
+
+static int netdev_ethtool_ioctl (struct net_device *dev, void *useraddr)
+{
+       struct rtl8139_private *np = dev->priv;
+       u32 ethcmd;
+
+       /* dev_ioctl() in ../../net/core/dev.c has already checked
+          capable(CAP_NET_ADMIN), so don't bother with that here.  */
+
+       if (copy_from_user (&ethcmd, useraddr, sizeof (ethcmd)))
+               return -EFAULT;
+
+       switch (ethcmd) {
+       case ETHTOOL_GSET:
+               {
+                       struct ethtool_cmd eset = { ETHTOOL_GSET };
+                       spin_lock_irq (&np->lock);
+                       netdev_get_eset (dev, &eset);
+                       spin_unlock_irq (&np->lock);
+                       if (copy_to_user (useraddr, &eset, sizeof (eset)))
+                               return -EFAULT;
+                       return 0;
+               }
+
+       /* TODO: ETHTOOL_SSET */
+
+       case ETHTOOL_GDRVINFO:
+               {
+                       struct ethtool_drvinfo info = { ETHTOOL_GDRVINFO };
+                       strcpy (info.driver, DRV_NAME);
+                       strcpy (info.version, DRV_VERSION);
+                       strcpy (info.bus_info, np->pci_dev->slot_name);
+                       if (copy_to_user (useraddr, &info, sizeof (info)))
+                               return -EFAULT;
+                       return 0;
+               }
+
+       case ETHTOOL_GWOL:
+               {
+                       struct ethtool_wolinfo wol = { ETHTOOL_GWOL };
+                       spin_lock_irq (&np->lock);
+                       netdev_get_wol (dev, &wol);
+                       spin_unlock_irq (&np->lock);
+                       if (copy_to_user (useraddr, &wol, sizeof (wol)))
+                               return -EFAULT;
+                       return 0;
+               }
+
+       case ETHTOOL_SWOL:
+               {
+                       struct ethtool_wolinfo wol;
+                       int rc;
+                       if (copy_from_user (&wol, useraddr, sizeof (wol)))
+                               return -EFAULT;
+                       spin_lock_irq (&np->lock);
+                       rc = netdev_set_wol (dev, &wol);
+                       spin_unlock_irq (&np->lock);
+                       return rc;
+               }
+
+       default:
+               break;
+       }
+
+       return -EOPNOTSUPP;
+}
+
+
+static int netdev_ioctl (struct net_device *dev, struct ifreq *rq, int cmd)
+{
+       struct rtl8139_private *tp = dev->priv;
+       struct mii_ioctl_data *data = (struct mii_ioctl_data *)&rq->ifr_data;
+       int rc = 0;
+       int phy = tp->phys[0] & 0x3f;
+
+       if (cmd != SIOCETHTOOL) {
+               /* With SIOCETHTOOL, this would corrupt the pointer.  */
+               data->phy_id &= 0x1f;
+               data->reg_num &= 0x1f;
+       }
+
+       switch (cmd) {
+       case SIOCETHTOOL:
+               return netdev_ethtool_ioctl(dev, (void *) rq->ifr_data);
+
+       case SIOCGMIIPHY:       /* Get the address of the PHY in use. */
+       case SIOCDEVPRIVATE:    /* binary compat, remove in 2.5 */
+               data->phy_id = phy;
+               /* Fall Through */
+
+       case SIOCGMIIREG:       /* Read the specified MII register. */
+       case SIOCDEVPRIVATE+1:  /* binary compat, remove in 2.5 */
+               data->val_out = mdio_read (dev, data->phy_id, data->reg_num);
+               break;
+
+       case SIOCSMIIREG:       /* Write the specified MII register */
+       case SIOCDEVPRIVATE+2:  /* binary compat, remove in 2.5 */
+               if (!capable (CAP_NET_ADMIN)) {
+                       rc = -EPERM;
+                       break;
+               }
+
+               if (data->phy_id == phy) {
+                       u16 value = data->val_in;
+                       switch (data->reg_num) {
+                       case 0:
+                               /* Check for autonegotiation on or reset. */
+                               tp->medialock = (value & 0x9000) ? 0 : 1;
+                               if (tp->medialock)
+                                       tp->full_duplex = (value & 0x0100) ? 1 : 0;
+                               break;
+                       case 4: /* tp->advertising = value; */ break;
+                       }
+               }
+               mdio_write(dev, data->phy_id, data->reg_num, data->val_in);
+               break;
+
+       default:
+               rc = -EOPNOTSUPP;
+               break;
+       }
+
+       return rc;
+}
+
+
+static struct net_device_stats *rtl8139_get_stats (struct net_device *dev)
+{
+       struct rtl8139_private *tp = dev->priv;
+       void *ioaddr = tp->mmio_addr;
+       unsigned long flags;
+
+       if (netif_running(dev)) {
+               spin_lock_irqsave (&tp->lock, flags);
+               tp->stats.rx_missed_errors += RTL_R32 (RxMissed);
+               RTL_W32 (RxMissed, 0);
+               spin_unlock_irqrestore (&tp->lock, flags);
+       }
+
+       return &tp->stats;
+}
+
+/* Set or clear the multicast filter for this adaptor.
+   This routine is not state sensitive and need not be SMP locked. */
+
+static unsigned const ethernet_polynomial = 0x04c11db7U;
+static inline u32 ether_crc (int length, unsigned char *data)
+{
+       int crc = -1;
+
+       while (--length >= 0) {
+               unsigned char current_octet = *data++;
+               int bit;
+               for (bit = 0; bit < 8; bit++, current_octet >>= 1)
+                       crc = (crc << 1) ^ ((crc < 0) ^ (current_octet & 1) ?
+                            ethernet_polynomial : 0);
+       }
+
+       return crc;
+}
+
+
+static void __set_rx_mode (struct net_device *dev)
+{
+       struct rtl8139_private *tp = dev->priv;
+       void *ioaddr = tp->mmio_addr;
+       u32 mc_filter[2];       /* Multicast hash filter */
+       int i, rx_mode;
+       u32 tmp;
+
+       DPRINTK ("%s:   rtl8139_set_rx_mode(%4.4x) done -- Rx config %8.8lx.\n",
+                       dev->name, dev->flags, RTL_R32 (RxConfig));
+
+       /* Note: do not reorder, GCC is clever about common statements. */
+       if (dev->flags & IFF_PROMISC) {
+               /* Unconditionally log net taps. */
+               printk (KERN_NOTICE "%s: Promiscuous mode enabled.\n",
+                       dev->name);
+               rx_mode =
+                   AcceptBroadcast | AcceptMulticast | AcceptMyPhys |
+                   AcceptAllPhys;
+               mc_filter[1] = mc_filter[0] = 0xffffffff;
+       } else if ((dev->mc_count > multicast_filter_limit)
+                  || (dev->flags & IFF_ALLMULTI)) {
+               /* Too many to filter perfectly -- accept all multicasts. */
+               rx_mode = AcceptBroadcast | AcceptMulticast | AcceptMyPhys;
+               mc_filter[1] = mc_filter[0] = 0xffffffff;
+       } else {
+               struct dev_mc_list *mclist;
+               rx_mode = AcceptBroadcast | AcceptMyPhys;
+               mc_filter[1] = mc_filter[0] = 0;
+               for (i = 0, mclist = dev->mc_list; mclist && i < dev->mc_count;
+                    i++, mclist = mclist->next) {
+                       int bit_nr = ether_crc(ETH_ALEN, mclist->dmi_addr) >> 26;
+
+                       mc_filter[bit_nr >> 5] |= cpu_to_le32(1 << (bit_nr & 31));
+                       rx_mode |= AcceptMulticast;
+               }
+       }
+
+       /* We can safely update without stopping the chip. */
+       tmp = rtl8139_rx_config | rx_mode;
+       if (tp->rx_config != tmp) {
+               RTL_W32_F (RxConfig, tmp);
+               tp->rx_config = tmp;
+       }
+       RTL_W32_F (MAR0 + 0, mc_filter[0]);
+       RTL_W32_F (MAR0 + 4, mc_filter[1]);
+}
+
+static void rtl8139_set_rx_mode (struct net_device *dev)
+{
+       unsigned long flags;
+       struct rtl8139_private *tp = dev->priv;
+
+       spin_lock_irqsave (&tp->lock, flags);
+       __set_rx_mode(dev);
+       spin_unlock_irqrestore (&tp->lock, flags);
+}
+
+#ifdef CONFIG_PM
+
+static int rtl8139_suspend (struct pci_dev *pdev, u32 state)
+{
+       struct net_device *dev = pci_get_drvdata (pdev);
+       struct rtl8139_private *tp = dev->priv;
+       void *ioaddr = tp->mmio_addr;
+       unsigned long flags;
+
+       if (!netif_running (dev))
+               return 0;
+
+       netif_device_detach (dev);
+
+       spin_lock_irqsave (&tp->lock, flags);
+
+       /* Disable interrupts, stop Tx and Rx. */
+       RTL_W16 (IntrMask, 0);
+       RTL_W8 (ChipCmd, 0);
+
+       /* Update the error counts. */
+       tp->stats.rx_missed_errors += RTL_R32 (RxMissed);
+       RTL_W32 (RxMissed, 0);
+
+       spin_unlock_irqrestore (&tp->lock, flags);
+       return 0;
+}
+
+
+static int rtl8139_resume (struct pci_dev *pdev)
+{
+       struct net_device *dev = pci_get_drvdata (pdev);
+
+       if (!netif_running (dev))
+               return 0;
+       netif_device_attach (dev);
+       rtl8139_hw_start (dev);
+       return 0;
+}
+
+#endif /* CONFIG_PM */
+
+
+static struct pci_driver rtl8139_pci_driver = {
+       name:           DRV_NAME,
+       id_table:       rtl8139_pci_tbl,
+       probe:          rtl8139_init_one,
+       remove:         __devexit_p(rtl8139_remove_one),
+#ifdef CONFIG_PM
+       suspend:        rtl8139_suspend,
+       resume:         rtl8139_resume,
+#endif /* CONFIG_PM */
+};
+
+
+static int __init rtl8139_init_module (void)
+{
+       /* when we're a module, we always print a version message,
+        * even if no 8139 board is found.
+        */
+#ifdef MODULE
+       printk (KERN_INFO RTL8139_DRIVER_NAME "\n");
+#endif
+
+       return pci_module_init (&rtl8139_pci_driver);
+}
+
+
+static void __exit rtl8139_cleanup_module (void)
+{
+       pci_unregister_driver (&rtl8139_pci_driver);
+}
+
+
+module_init(rtl8139_init_module);
+module_exit(rtl8139_cleanup_module);
diff --git a/xen-2.4.16/drivers/net/Makefile b/xen-2.4.16/drivers/net/Makefile
new file mode 100644 (file)
index 0000000..69b624e
--- /dev/null
@@ -0,0 +1,10 @@
+
+include $(BASEDIR)/Rules.mk
+
+default: $(OBJS)
+       $(MAKE) -C tulip
+       $(LD) -r -o driver.o tulip/tulip.o $(OBJS)
+
+clean:
+       $(MAKE) -C tulip clean
+       rm -f *.o *~ core
diff --git a/xen-2.4.16/drivers/net/Space.c b/xen-2.4.16/drivers/net/Space.c
new file mode 100644 (file)
index 0000000..5724837
--- /dev/null
@@ -0,0 +1,44 @@
+/*
+ * INET                An implementation of the TCP/IP protocol suite for the LINUX
+ *             operating system.  INET is implemented using the  BSD Socket
+ *             interface as the means of communication with the user level.
+ *
+ *             Holds initial configuration information for devices.
+ *
+ * Version:    @(#)Space.c     1.0.7   08/12/93
+ *
+ * Authors:    Ross Biro, <bir7@leland.Stanford.Edu>
+ *             Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
+ *             Donald J. Becker, <becker@scyld.com>
+ */
+#include <linux/config.h>
+#include <linux/netdevice.h>
+
+/*
+ * KAF (23/7/02): All the probe shit is gone from here -- each network
+ * driver should probe as part of its setup, and dynamically append
+ * to dev_base when it finds a NIC.
+ */
+
+/*
+ * The @dev_base list is protected by @dev_base_lock and the rtln
+ * semaphore.
+ *
+ * Pure readers hold dev_base_lock for reading.
+ *
+ * Writers must hold the rtnl semaphore while they loop through the
+ * dev_base list, and hold dev_base_lock for writing when they do the
+ * actual updates.  This allows pure readers to access the list even
+ * while a writer is preparing to update it.
+ *
+ * To put it another way, dev_base_lock is held for writing only to
+ * protect against pure readers; the rtnl semaphore provides the
+ * protection against other writers.
+ *
+ * See, for example usages, register_netdevice() and
+ * unregister_netdevice(), which must be called with the rtnl
+ * semaphore held.
+ */
+struct net_device *dev_base = NULL;
+rwlock_t dev_base_lock = RW_LOCK_UNLOCKED;
+
diff --git a/xen-2.4.16/drivers/net/eepro100.c b/xen-2.4.16/drivers/net/eepro100.c
new file mode 100644 (file)
index 0000000..bce354f
--- /dev/null
@@ -0,0 +1,2309 @@
+/* drivers/net/eepro100.c: An Intel i82557-559 Ethernet driver for Linux. */
+/*
+   NOTICE: For use with late 2.3 kernels only.
+   May not compile for kernels 2.3.43-47.
+       Written 1996-1999 by Donald Becker.
+
+       The driver also contains updates by different kernel developers
+       (see incomplete list below).
+       Current maintainer is Andrey V. Savochkin <saw@saw.sw.com.sg>.
+       Please use this email address and linux-kernel mailing list for bug reports.
+
+       This software may be used and distributed according to the terms
+       of the GNU General Public License, incorporated herein by reference.
+
+       This driver is for the Intel EtherExpress Pro100 (Speedo3) design.
+       It should work with all i82557/558/559 boards.
+
+       Version history:
+       1998 Apr - 2000 Feb  Andrey V. Savochkin <saw@saw.sw.com.sg>
+               Serious fixes for multicast filter list setting, TX timeout routine;
+               RX ring refilling logic;  other stuff
+       2000 Feb  Jeff Garzik <jgarzik@mandrakesoft.com>
+               Convert to new PCI driver interface
+       2000 Mar 24  Dragan Stancevic <visitor@valinux.com>
+               Disabled FC and ER, to avoid lockups when when we get FCP interrupts.
+       2000 Jul 17 Goutham Rao <goutham.rao@intel.com>
+               PCI DMA API fixes, adding pci_dma_sync_single calls where neccesary
+*/
+
+static const char *version =
+"eepro100.c:v1.09j-t 9/29/99 Donald Becker http://cesdis.gsfc.nasa.gov/linux/drivers/eepro100.html\n"
+"eepro100.c: $Revision: 1.36 $ 2000/11/17 Modified by Andrey V. Savochkin <saw@saw.sw.com.sg> and others\n";
+
+/* A few user-configurable values that apply to all boards.
+   First set is undocumented and spelled per Intel recommendations. */
+
+static int congenb /* = 0 */; /* Enable congestion control in the DP83840. */
+static int txfifo = 8;         /* Tx FIFO threshold in 4 byte units, 0-15 */
+static int rxfifo = 8;         /* Rx FIFO threshold, default 32 bytes. */
+/* Tx/Rx DMA burst length, 0-127, 0 == no preemption, tx==128 -> disabled. */
+static int txdmacount = 128;
+static int rxdmacount /* = 0 */;
+
+/* Set the copy breakpoint for the copy-only-tiny-buffer Rx method.
+   Lower values use more memory, but are faster. */
+#if defined(__alpha__) || defined(__sparc__) || defined(__mips__) || \
+    defined(__arm__)
+static int rx_copybreak = 1518;
+#else
+static int rx_copybreak = 200;
+#endif
+
+/* Maximum events (Rx packets, etc.) to handle at each interrupt. */
+static int max_interrupt_work = 20;
+
+/* Maximum number of multicast addresses to filter (vs. rx-all-multicast) */
+static int multicast_filter_limit = 64;
+
+/* 'options' is used to pass a transceiver override or full-duplex flag
+   e.g. "options=16" for FD, "options=32" for 100mbps-only. */
+static int full_duplex[] = {-1, -1, -1, -1, -1, -1, -1, -1};
+static int options[] = {-1, -1, -1, -1, -1, -1, -1, -1};
+static int debug = -1;                 /* The debug level */
+
+#if !defined(__OPTIMIZE__)  ||  !defined(__KERNEL__)
+#warning  You must compile this file with the correct options!
+#warning  See the last lines of the source file.
+#error You must compile this driver with "-O".
+#endif
+
+#include <linux/config.h>
+#include <linux/lib.h>
+//#include <linux/version.h>
+#include <linux/module.h>
+
+//#include <linux/kernel.h>
+//#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/ioport.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <linux/timer.h>
+#include <linux/pci.h>
+#include <linux/spinlock.h>
+#include <linux/init.h>
+#include <linux/mii.h>
+#include <linux/delay.h>
+
+#include <asm/bitops.h>
+#include <asm/io.h>
+#include <asm/uaccess.h>
+
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/skbuff.h>
+#include <linux/ethtool.h>
+#include <linux/delay.h>
+
+/* A few values that may be tweaked. */
+
+/* The ring sizes should be a power of two for efficiency. */
+#undef RX_RING_SIZE
+#undef TX_RING_SIZE
+#define TX_RING_SIZE   32
+#define RX_RING_SIZE   32
+
+/* How much slots multicast filter setup may take.
+   Do not descrease without changing set_rx_mode() implementaion. */
+#define TX_MULTICAST_SIZE   2
+#define TX_MULTICAST_RESERV (TX_MULTICAST_SIZE*2)
+/* Actual number of TX packets queued, must be
+   <= TX_RING_SIZE-TX_MULTICAST_RESERV. */
+#define TX_QUEUE_LIMIT  (TX_RING_SIZE-TX_MULTICAST_RESERV)
+/* Hysteresis marking queue as no longer full. */
+#define TX_QUEUE_UNFULL (TX_QUEUE_LIMIT-4)
+
+/* Operational parameters that usually are not changed. */
+
+/* Time in jiffies before concluding the transmitter is hung. */
+#define TX_TIMEOUT             (2*HZ)
+/* Size of an pre-allocated Rx buffer: <Ethernet MTU> + slack.*/
+#define PKT_BUF_SZ             1536
+
+MODULE_AUTHOR("Maintainer: Andrey V. Savochkin <saw@saw.sw.com.sg>");
+MODULE_DESCRIPTION("Intel i82557/i82558/i82559 PCI EtherExpressPro driver");
+MODULE_LICENSE("GPL");
+MODULE_PARM(debug, "i");
+MODULE_PARM(options, "1-" __MODULE_STRING(8) "i");
+MODULE_PARM(full_duplex, "1-" __MODULE_STRING(8) "i");
+MODULE_PARM(congenb, "i");
+MODULE_PARM(txfifo, "i");
+MODULE_PARM(rxfifo, "i");
+MODULE_PARM(txdmacount, "i");
+MODULE_PARM(rxdmacount, "i");
+MODULE_PARM(rx_copybreak, "i");
+MODULE_PARM(max_interrupt_work, "i");
+MODULE_PARM(multicast_filter_limit, "i");
+MODULE_PARM_DESC(debug, "eepro100 debug level (0-6)");
+MODULE_PARM_DESC(options, "eepro100: Bits 0-3: tranceiver type, bit 4: full duplex, bit 5: 100Mbps");
+MODULE_PARM_DESC(full_duplex, "eepro100 full duplex setting(s) (1)");
+MODULE_PARM_DESC(congenb, "eepro100  Enable congestion control (1)");
+MODULE_PARM_DESC(txfifo, "eepro100 Tx FIFO threshold in 4 byte units, (0-15)");
+MODULE_PARM_DESC(rxfifo, "eepro100 Rx FIFO threshold in 4 byte units, (0-15)");
+MODULE_PARM_DESC(txdmaccount, "eepro100 Tx DMA burst length; 128 - disable (0-128)");
+MODULE_PARM_DESC(rxdmaccount, "eepro100 Rx DMA burst length; 128 - disable (0-128)");
+MODULE_PARM_DESC(rx_copybreak, "eepro100 copy breakpoint for copy-only-tiny-frames");
+MODULE_PARM_DESC(max_interrupt_work, "eepro100 maximum events handled per interrupt");
+MODULE_PARM_DESC(multicast_filter_limit, "eepro100 maximum number of filtered multicast addresses");
+
+#define RUN_AT(x) (jiffies + (x))
+
+/* ACPI power states don't universally work (yet) */
+#ifndef CONFIG_PM
+#define SET_POWER_STATE(_dev, _state) pci_set_power_state(_dev, 0)
+#else
+#define SET_POWER_STATE(_dev, _state) pci_set_power_state(_dev, _state)
+#endif /* CONFIG_PM */
+
+#define netdevice_start(dev)
+#define netdevice_stop(dev)
+#define netif_set_tx_timeout(dev, tf, tm) \
+                                                               do { \
+                                                                       (dev)->tx_timeout = (tf); \
+                                                                       (dev)->watchdog_timeo = (tm); \
+                                                               } while(0)
+
+#ifndef PCI_DEVICE_ID_INTEL_ID1029
+#define PCI_DEVICE_ID_INTEL_ID1029 0x1029
+#endif
+#ifndef PCI_DEVICE_ID_INTEL_ID1030
+#define PCI_DEVICE_ID_INTEL_ID1030 0x1030
+#endif
+
+
+static int speedo_debug = 1;
+
+/*
+                               Theory of Operation
+
+I. Board Compatibility
+
+This device driver is designed for the Intel i82557 "Speedo3" chip, Intel's
+single-chip fast Ethernet controller for PCI, as used on the Intel
+EtherExpress Pro 100 adapter.
+
+II. Board-specific settings
+
+PCI bus devices are configured by the system at boot time, so no jumpers
+need to be set on the board.  The system BIOS should be set to assign the
+PCI INTA signal to an otherwise unused system IRQ line.  While it's
+possible to share PCI interrupt lines, it negatively impacts performance and
+only recent kernels support it.
+
+III. Driver operation
+
+IIIA. General
+The Speedo3 is very similar to other Intel network chips, that is to say
+"apparently designed on a different planet".  This chips retains the complex
+Rx and Tx descriptors and multiple buffers pointers as previous chips, but
+also has simplified Tx and Rx buffer modes.  This driver uses the "flexible"
+Tx mode, but in a simplified lower-overhead manner: it associates only a
+single buffer descriptor with each frame descriptor.
+
+Despite the extra space overhead in each receive skbuff, the driver must use
+the simplified Rx buffer mode to assure that only a single data buffer is
+associated with each RxFD. The driver implements this by reserving space
+for the Rx descriptor at the head of each Rx skbuff.
+
+The Speedo-3 has receive and command unit base addresses that are added to
+almost all descriptor pointers.  The driver sets these to zero, so that all
+pointer fields are absolute addresses.
+
+The System Control Block (SCB) of some previous Intel chips exists on the
+chip in both PCI I/O and memory space.  This driver uses the I/O space
+registers, but might switch to memory mapped mode to better support non-x86
+processors.
+
+IIIB. Transmit structure
+
+The driver must use the complex Tx command+descriptor mode in order to
+have a indirect pointer to the skbuff data section.  Each Tx command block
+(TxCB) is associated with two immediately appended Tx Buffer Descriptor
+(TxBD).  A fixed ring of these TxCB+TxBD pairs are kept as part of the
+speedo_private data structure for each adapter instance.
+
+The newer i82558 explicitly supports this structure, and can read the two
+TxBDs in the same PCI burst as the TxCB.
+
+This ring structure is used for all normal transmit packets, but the
+transmit packet descriptors aren't long enough for most non-Tx commands such
+as CmdConfigure.  This is complicated by the possibility that the chip has
+already loaded the link address in the previous descriptor.  So for these
+commands we convert the next free descriptor on the ring to a NoOp, and point
+that descriptor's link to the complex command.
+
+An additional complexity of these non-transmit commands are that they may be
+added asynchronous to the normal transmit queue, so we disable interrupts
+whenever the Tx descriptor ring is manipulated.
+
+A notable aspect of these special configure commands is that they do
+work with the normal Tx ring entry scavenge method.  The Tx ring scavenge
+is done at interrupt time using the 'dirty_tx' index, and checking for the
+command-complete bit.  While the setup frames may have the NoOp command on the
+Tx ring marked as complete, but not have completed the setup command, this
+is not a problem.  The tx_ring entry can be still safely reused, as the
+tx_skbuff[] entry is always empty for config_cmd and mc_setup frames.
+
+Commands may have bits set e.g. CmdSuspend in the command word to either
+suspend or stop the transmit/command unit.  This driver always flags the last
+command with CmdSuspend, erases the CmdSuspend in the previous command, and
+then issues a CU_RESUME.
+Note: Watch out for the potential race condition here: imagine
+       erasing the previous suspend
+               the chip processes the previous command
+               the chip processes the final command, and suspends
+       doing the CU_RESUME
+               the chip processes the next-yet-valid post-final-command.
+So blindly sending a CU_RESUME is only safe if we do it immediately after
+after erasing the previous CmdSuspend, without the possibility of an
+intervening delay.  Thus the resume command is always within the
+interrupts-disabled region.  This is a timing dependence, but handling this
+condition in a timing-independent way would considerably complicate the code.
+
+Note: In previous generation Intel chips, restarting the command unit was a
+notoriously slow process.  This is presumably no longer true.
+
+IIIC. Receive structure
+
+Because of the bus-master support on the Speedo3 this driver uses the new
+SKBUFF_RX_COPYBREAK scheme, rather than a fixed intermediate receive buffer.
+This scheme allocates full-sized skbuffs as receive buffers.  The value
+SKBUFF_RX_COPYBREAK is used as the copying breakpoint: it is chosen to
+trade-off the memory wasted by passing the full-sized skbuff to the queue
+layer for all frames vs. the copying cost of copying a frame to a
+correctly-sized skbuff.
+
+For small frames the copying cost is negligible (esp. considering that we
+are pre-loading the cache with immediately useful header information), so we
+allocate a new, minimally-sized skbuff.  For large frames the copying cost
+is non-trivial, and the larger copy might flush the cache of useful data, so
+we pass up the skbuff the packet was received into.
+
+IV. Notes
+
+Thanks to Steve Williams of Intel for arranging the non-disclosure agreement
+that stated that I could disclose the information.  But I still resent
+having to sign an Intel NDA when I'm helping Intel sell their own product!
+
+*/
+
+static int speedo_found1(struct pci_dev *pdev, long ioaddr, int fnd_cnt, int acpi_idle_state);
+
+enum pci_flags_bit {
+       PCI_USES_IO=1, PCI_USES_MEM=2, PCI_USES_MASTER=4,
+       PCI_ADDR0=0x10<<0, PCI_ADDR1=0x10<<1, PCI_ADDR2=0x10<<2, PCI_ADDR3=0x10<<3,
+};
+
+static inline unsigned int io_inw(unsigned long port)
+{
+       return inw(port);
+}
+static inline void io_outw(unsigned int val, unsigned long port)
+{
+       outw(val, port);
+}
+
+#ifndef USE_IO
+/* Currently alpha headers define in/out macros.
+   Undefine them.  2000/03/30  SAW */
+#undef inb
+#undef inw
+#undef inl
+#undef outb
+#undef outw
+#undef outl
+#define inb readb
+#define inw readw
+#define inl readl
+#define outb writeb
+#define outw writew
+#define outl writel
+#endif
+
+/* How to wait for the command unit to accept a command.
+   Typically this takes 0 ticks. */
+static inline void wait_for_cmd_done(long cmd_ioaddr)
+{
+       int wait = 1000;
+       do  udelay(1) ;
+       while(inb(cmd_ioaddr) && --wait >= 0);
+#ifndef final_version
+       if (wait < 0)
+               printk(KERN_ALERT "eepro100: wait_for_cmd_done timeout!\n");
+#endif
+}
+
+/* Offsets to the various registers.
+   All accesses need not be longword aligned. */
+enum speedo_offsets {
+       SCBStatus = 0, SCBCmd = 2,      /* Rx/Command Unit command and status. */
+       SCBPointer = 4,                         /* General purpose pointer. */
+       SCBPort = 8,                            /* Misc. commands and operands.  */
+       SCBflash = 12, SCBeeprom = 14, /* EEPROM and flash memory control. */
+       SCBCtrlMDI = 16,                        /* MDI interface control. */
+       SCBEarlyRx = 20,                        /* Early receive byte count. */
+};
+/* Commands that can be put in a command list entry. */
+enum commands {
+       CmdNOp = 0, CmdIASetup = 0x10000, CmdConfigure = 0x20000,
+       CmdMulticastList = 0x30000, CmdTx = 0x40000, CmdTDR = 0x50000,
+       CmdDump = 0x60000, CmdDiagnose = 0x70000,
+       CmdSuspend = 0x40000000,        /* Suspend after completion. */
+       CmdIntr = 0x20000000,           /* Interrupt after completion. */
+       CmdTxFlex = 0x00080000,         /* Use "Flexible mode" for CmdTx command. */
+};
+/* Clear CmdSuspend (1<<30) avoiding interference with the card access to the
+   status bits.  Previous driver versions used separate 16 bit fields for
+   commands and statuses.  --SAW
+ */
+#if defined(__alpha__)
+# define clear_suspend(cmd)  clear_bit(30, &(cmd)->cmd_status);
+#else
+# if defined(__LITTLE_ENDIAN)
+#  define clear_suspend(cmd)  ((__u16 *)&(cmd)->cmd_status)[1] &= ~0x4000
+# elif defined(__BIG_ENDIAN)
+#  define clear_suspend(cmd)  ((__u16 *)&(cmd)->cmd_status)[1] &= ~0x0040
+# else
+#  error Unsupported byteorder
+# endif
+#endif
+
+enum SCBCmdBits {
+       SCBMaskCmdDone=0x8000, SCBMaskRxDone=0x4000, SCBMaskCmdIdle=0x2000,
+       SCBMaskRxSuspend=0x1000, SCBMaskEarlyRx=0x0800, SCBMaskFlowCtl=0x0400,
+       SCBTriggerIntr=0x0200, SCBMaskAll=0x0100,
+       /* The rest are Rx and Tx commands. */
+       CUStart=0x0010, CUResume=0x0020, CUStatsAddr=0x0040, CUShowStats=0x0050,
+       CUCmdBase=0x0060,       /* CU Base address (set to zero) . */
+       CUDumpStats=0x0070, /* Dump then reset stats counters. */
+       RxStart=0x0001, RxResume=0x0002, RxAbort=0x0004, RxAddrLoad=0x0006,
+       RxResumeNoResources=0x0007,
+};
+
+enum SCBPort_cmds {
+       PortReset=0, PortSelfTest=1, PortPartialReset=2, PortDump=3,
+};
+
+/* The Speedo3 Rx and Tx frame/buffer descriptors. */
+struct descriptor {                        /* A generic descriptor. */
+       s32 cmd_status;                         /* All command and status fields. */
+       u32 link;                                   /* struct descriptor *  */
+       unsigned char params[0];
+};
+
+/* The Speedo3 Rx and Tx buffer descriptors. */
+struct RxFD {                                  /* Receive frame descriptor. */
+       s32 status;
+       u32 link;                                       /* struct RxFD * */
+       u32 rx_buf_addr;                        /* void * */
+       u32 count;
+};
+
+/* Selected elements of the Tx/RxFD.status word. */
+enum RxFD_bits {
+       RxComplete=0x8000, RxOK=0x2000,
+       RxErrCRC=0x0800, RxErrAlign=0x0400, RxErrTooBig=0x0200, RxErrSymbol=0x0010,
+       RxEth2Type=0x0020, RxNoMatch=0x0004, RxNoIAMatch=0x0002,
+       TxUnderrun=0x1000,  StatusComplete=0x8000,
+};
+
+#define CONFIG_DATA_SIZE 22
+struct TxFD {                                  /* Transmit frame descriptor set. */
+       s32 status;
+       u32 link;                                       /* void * */
+       u32 tx_desc_addr;                       /* Always points to the tx_buf_addr element. */
+       s32 count;                                      /* # of TBD (=1), Tx start thresh., etc. */
+       /* This constitutes two "TBD" entries -- we only use one. */
+#define TX_DESCR_BUF_OFFSET 16
+       u32 tx_buf_addr0;                       /* void *, frame to be transmitted.  */
+       s32 tx_buf_size0;                       /* Length of Tx frame. */
+       u32 tx_buf_addr1;                       /* void *, frame to be transmitted.  */
+       s32 tx_buf_size1;                       /* Length of Tx frame. */
+       /* the structure must have space for at least CONFIG_DATA_SIZE starting
+        * from tx_desc_addr field */
+};
+
+/* Multicast filter setting block.  --SAW */
+struct speedo_mc_block {
+       struct speedo_mc_block *next;
+       unsigned int tx;
+       dma_addr_t frame_dma;
+       unsigned int len;
+       struct descriptor frame __attribute__ ((__aligned__(16)));
+};
+
+/* Elements of the dump_statistics block. This block must be lword aligned. */
+struct speedo_stats {
+       u32 tx_good_frames;
+       u32 tx_coll16_errs;
+       u32 tx_late_colls;
+       u32 tx_underruns;
+       u32 tx_lost_carrier;
+       u32 tx_deferred;
+       u32 tx_one_colls;
+       u32 tx_multi_colls;
+       u32 tx_total_colls;
+       u32 rx_good_frames;
+       u32 rx_crc_errs;
+       u32 rx_align_errs;
+       u32 rx_resource_errs;
+       u32 rx_overrun_errs;
+       u32 rx_colls_errs;
+       u32 rx_runt_errs;
+       u32 done_marker;
+};
+
+enum Rx_ring_state_bits {
+       RrNoMem=1, RrPostponed=2, RrNoResources=4, RrOOMReported=8,
+};
+
+/* Do not change the position (alignment) of the first few elements!
+   The later elements are grouped for cache locality.
+
+   Unfortunately, all the positions have been shifted since there.
+   A new re-alignment is required.  2000/03/06  SAW */
+struct speedo_private {
+       struct TxFD     *tx_ring;                               /* Commands (usually CmdTxPacket). */
+       struct RxFD *rx_ringp[RX_RING_SIZE];/* Rx descriptor, used as ring. */
+       /* The addresses of a Tx/Rx-in-place packets/buffers. */
+       struct sk_buff *tx_skbuff[TX_RING_SIZE];
+       struct sk_buff *rx_skbuff[RX_RING_SIZE];
+       /* Mapped addresses of the rings. */
+       dma_addr_t tx_ring_dma;
+#define TX_RING_ELEM_DMA(sp, n) ((sp)->tx_ring_dma + (n)*sizeof(struct TxFD))
+       dma_addr_t rx_ring_dma[RX_RING_SIZE];
+       struct descriptor *last_cmd;            /* Last command sent. */
+       unsigned int cur_tx, dirty_tx;          /* The ring entries to be free()ed. */
+       spinlock_t lock;                                        /* Group with Tx control cache line. */
+       u32 tx_threshold;                                       /* The value for txdesc.count. */
+       struct RxFD *last_rxf;                          /* Last filled RX buffer. */
+       dma_addr_t last_rxf_dma;
+       unsigned int cur_rx, dirty_rx;          /* The next free ring entry */
+       long last_rx_time;                      /* Last Rx, in jiffies, to handle Rx hang. */
+       struct net_device_stats stats;
+       struct speedo_stats *lstats;
+       dma_addr_t lstats_dma;
+       int chip_id;
+       struct pci_dev *pdev;
+       struct timer_list timer;                        /* Media selection timer. */
+       struct speedo_mc_block *mc_setup_head;/* Multicast setup frame list head. */
+       struct speedo_mc_block *mc_setup_tail;/* Multicast setup frame list tail. */
+       long in_interrupt;                                      /* Word-aligned dev->interrupt */
+       unsigned char acpi_pwr;
+       signed char rx_mode;                                    /* Current PROMISC/ALLMULTI setting. */
+       unsigned int tx_full:1;                         /* The Tx queue is full. */
+       unsigned int full_duplex:1;                     /* Full-duplex operation requested. */
+       unsigned int flow_ctrl:1;                       /* Use 802.3x flow control. */
+       unsigned int rx_bug:1;                          /* Work around receiver hang errata. */
+       unsigned char default_port:8;           /* Last dev->if_port value. */
+       unsigned char rx_ring_state;            /* RX ring status flags. */
+       unsigned short phy[2];                          /* PHY media interfaces available. */
+       unsigned short advertising;                     /* Current PHY advertised caps. */
+       unsigned short partner;                         /* Link partner caps. */
+#ifdef CONFIG_PM
+       u32 pm_state[16];
+#endif
+};
+
+/* The parameters for a CmdConfigure operation.
+   There are so many options that it would be difficult to document each bit.
+   We mostly use the default or recommended settings. */
+static const char i82557_config_cmd[CONFIG_DATA_SIZE] = {
+       22, 0x08, 0, 0,  0, 0, 0x32, 0x03,  1, /* 1=Use MII  0=Use AUI */
+       0, 0x2E, 0,  0x60, 0,
+       0xf2, 0x48,   0, 0x40, 0xf2, 0x80,              /* 0x40=Force full-duplex */
+       0x3f, 0x05, };
+static const char i82558_config_cmd[CONFIG_DATA_SIZE] = {
+       22, 0x08, 0, 1,  0, 0, 0x22, 0x03,  1, /* 1=Use MII  0=Use AUI */
+       0, 0x2E, 0,  0x60, 0x08, 0x88,
+       0x68, 0, 0x40, 0xf2, 0x84,              /* Disable FC */
+       0x31, 0x05, };
+
+/* PHY media interface chips. */
+static const char *phys[] = {
+       "None", "i82553-A/B", "i82553-C", "i82503",
+       "DP83840", "80c240", "80c24", "i82555",
+       "unknown-8", "unknown-9", "DP83840A", "unknown-11",
+       "unknown-12", "unknown-13", "unknown-14", "unknown-15", };
+enum phy_chips { NonSuchPhy=0, I82553AB, I82553C, I82503, DP83840, S80C240,
+                                        S80C24, I82555, DP83840A=10, };
+static const char is_mii[] = { 0, 1, 1, 0, 1, 1, 0, 1 };
+#define EE_READ_CMD            (6)
+
+static int eepro100_init_one(struct pci_dev *pdev,
+               const struct pci_device_id *ent);
+static void eepro100_remove_one (struct pci_dev *pdev);
+#ifdef CONFIG_PM
+static int eepro100_suspend (struct pci_dev *pdev, u32 state);
+static int eepro100_resume (struct pci_dev *pdev);
+#endif
+
+static int do_eeprom_cmd(long ioaddr, int cmd, int cmd_len);
+static int mdio_read(long ioaddr, int phy_id, int location);
+static int mdio_write(long ioaddr, int phy_id, int location, int value);
+static int speedo_open(struct net_device *dev);
+static void speedo_resume(struct net_device *dev);
+static void speedo_timer(unsigned long data);
+static void speedo_init_rx_ring(struct net_device *dev);
+static void speedo_tx_timeout(struct net_device *dev);
+static int speedo_start_xmit(struct sk_buff *skb, struct net_device *dev);
+static void speedo_refill_rx_buffers(struct net_device *dev, int force);
+static int speedo_rx(struct net_device *dev);
+static void speedo_tx_buffer_gc(struct net_device *dev);
+static void speedo_interrupt(int irq, void *dev_instance, struct pt_regs *regs);
+static int speedo_close(struct net_device *dev);
+static struct net_device_stats *speedo_get_stats(struct net_device *dev);
+static int speedo_ioctl(struct net_device *dev, struct ifreq *rq, int cmd);
+static void set_rx_mode(struct net_device *dev);
+static void speedo_show_state(struct net_device *dev);
+
+\f
+
+#ifdef honor_default_port
+/* Optional driver feature to allow forcing the transceiver setting.
+   Not recommended. */
+static int mii_ctrl[8] = { 0x3300, 0x3100, 0x0000, 0x0100,
+                                                  0x2000, 0x2100, 0x0400, 0x3100};
+#endif
+
+static int __devinit eepro100_init_one (struct pci_dev *pdev,
+               const struct pci_device_id *ent)
+{
+       unsigned long ioaddr;
+       int irq;
+       int acpi_idle_state = 0, pm;
+       static int cards_found /* = 0 */;
+
+       static int did_version /* = 0 */;               /* Already printed version info. */
+       if (speedo_debug > 0  &&  did_version++ == 0)
+               printk(version);
+
+       if (!request_region(pci_resource_start(pdev, 1),
+                       pci_resource_len(pdev, 1), "eepro100")) {
+               printk (KERN_ERR "eepro100: cannot reserve I/O ports\n");
+               goto err_out_none;
+       }
+       if (!request_mem_region(pci_resource_start(pdev, 0),
+                       pci_resource_len(pdev, 0), "eepro100")) {
+               printk (KERN_ERR "eepro100: cannot reserve MMIO region\n");
+               goto err_out_free_pio_region;
+       }
+
+       irq = pdev->irq;
+#ifdef USE_IO
+       ioaddr = pci_resource_start(pdev, 1);
+       if (speedo_debug > 2)
+               printk("Found Intel i82557 PCI Speedo at I/O %#lx, IRQ %d.\n",
+                          ioaddr, irq);
+#else
+       ioaddr = (unsigned long)ioremap(pci_resource_start(pdev, 0),
+                                                                       pci_resource_len(pdev, 0));
+       if (!ioaddr) {
+               printk (KERN_ERR "eepro100: cannot remap MMIO region %lx @ %lx\n",
+                               pci_resource_len(pdev, 0), pci_resource_start(pdev, 0));
+               goto err_out_free_mmio_region;
+       }
+       if (speedo_debug > 2)
+               printk("Found Intel i82557 PCI Speedo, MMIO at %#lx, IRQ %d.\n",
+                          pci_resource_start(pdev, 0), irq);
+#endif
+
+       /* save power state b4 pci_enable_device overwrites it */
+       pm = pci_find_capability(pdev, PCI_CAP_ID_PM);
+       if (pm) {
+               u16 pwr_command;
+               pci_read_config_word(pdev, pm + PCI_PM_CTRL, &pwr_command);
+               acpi_idle_state = pwr_command & PCI_PM_CTRL_STATE_MASK;
+       }
+
+       if (pci_enable_device(pdev))
+               goto err_out_free_mmio_region;
+
+       pci_set_master(pdev);
+
+       if (speedo_found1(pdev, ioaddr, cards_found, acpi_idle_state) == 0)
+               cards_found++;
+       else
+               goto err_out_iounmap;
+
+       return 0;
+
+err_out_iounmap: ;
+#ifndef USE_IO
+       iounmap ((void *)ioaddr);
+#endif
+err_out_free_mmio_region:
+       release_mem_region(pci_resource_start(pdev, 0), pci_resource_len(pdev, 0));
+err_out_free_pio_region:
+       release_region(pci_resource_start(pdev, 1), pci_resource_len(pdev, 1));
+err_out_none:
+       return -ENODEV;
+}
+
+static int speedo_found1(struct pci_dev *pdev,
+               long ioaddr, int card_idx, int acpi_idle_state)
+{
+       struct net_device *dev;
+       struct speedo_private *sp;
+       const char *product;
+       int i, option;
+       u16 eeprom[0x100];
+       int size;
+       void *tx_ring_space;
+       dma_addr_t tx_ring_dma;
+
+       size = TX_RING_SIZE * sizeof(struct TxFD) + sizeof(struct speedo_stats);
+       tx_ring_space = pci_alloc_consistent(pdev, size, &tx_ring_dma);
+       if (tx_ring_space == NULL)
+               return -1;
+
+       dev = init_etherdev(NULL, sizeof(struct speedo_private));
+       if (dev == NULL) {
+               printk(KERN_ERR "eepro100: Could not allocate ethernet device.\n");
+               pci_free_consistent(pdev, size, tx_ring_space, tx_ring_dma);
+               return -1;
+       }
+
+       if (dev->mem_start > 0)
+               option = dev->mem_start;
+       else if (card_idx >= 0  &&  options[card_idx] >= 0)
+               option = options[card_idx];
+       else
+               option = 0;
+
+       /* Read the station address EEPROM before doing the reset.
+          Nominally his should even be done before accepting the device, but
+          then we wouldn't have a device name with which to report the error.
+          The size test is for 6 bit vs. 8 bit address serial EEPROMs.
+       */
+       {
+               unsigned long iobase;
+               int read_cmd, ee_size;
+               u16 sum;
+               int j;
+
+               /* Use IO only to avoid postponed writes and satisfy EEPROM timing
+                  requirements. */
+               iobase = pci_resource_start(pdev, 1);
+               if ((do_eeprom_cmd(iobase, EE_READ_CMD << 24, 27) & 0xffe0000)
+                       == 0xffe0000) {
+                       ee_size = 0x100;
+                       read_cmd = EE_READ_CMD << 24;
+               } else {
+                       ee_size = 0x40;
+                       read_cmd = EE_READ_CMD << 22;
+               }
+
+               for (j = 0, i = 0, sum = 0; i < ee_size; i++) {
+                       u16 value = do_eeprom_cmd(iobase, read_cmd | (i << 16), 27);
+                       eeprom[i] = value;
+                       sum += value;
+                       if (i < 3) {
+                               dev->dev_addr[j++] = value;
+                               dev->dev_addr[j++] = value >> 8;
+                       }
+               }
+               if (sum != 0xBABA)
+                       printk(KERN_WARNING "%s: Invalid EEPROM checksum %#4.4x, "
+                                  "check settings before activating this device!\n",
+                                  dev->name, sum);
+               /* Don't  unregister_netdev(dev);  as the EEPro may actually be
+                  usable, especially if the MAC address is set later.
+                  On the other hand, it may be unusable if MDI data is corrupted. */
+       }
+
+       /* Reset the chip: stop Tx and Rx processes and clear counters.
+          This takes less than 10usec and will easily finish before the next
+          action. */
+       outl(PortReset, ioaddr + SCBPort);
+       inl(ioaddr + SCBPort);
+       udelay(10);
+
+       if (eeprom[3] & 0x0100)
+               product = "OEM i82557/i82558 10/100 Ethernet";
+       else
+               product = pdev->name;
+
+       printk(KERN_INFO "%s: %s, ", dev->name, product);
+
+       for (i = 0; i < 5; i++)
+               printk("%2.2X:", dev->dev_addr[i]);
+       printk("%2.2X, ", dev->dev_addr[i]);
+#ifdef USE_IO
+       printk("I/O at %#3lx, ", ioaddr);
+#endif
+       printk("IRQ %d.\n", pdev->irq);
+
+#if 1 || defined(kernel_bloat)
+       /* OK, this is pure kernel bloat.  I don't like it when other drivers
+          waste non-pageable kernel space to emit similar messages, but I need
+          them for bug reports. */
+       {
+               const char *connectors[] = {" RJ45", " BNC", " AUI", " MII"};
+               /* The self-test results must be paragraph aligned. */
+               volatile s32 *self_test_results;
+               int boguscnt = 16000;   /* Timeout for set-test. */
+               if ((eeprom[3] & 0x03) != 0x03)
+                       printk(KERN_INFO "  Receiver lock-up bug exists -- enabling"
+                                  " work-around.\n");
+               printk(KERN_INFO "  Board assembly %4.4x%2.2x-%3.3d, Physical"
+                          " connectors present:",
+                          eeprom[8], eeprom[9]>>8, eeprom[9] & 0xff);
+               for (i = 0; i < 4; i++)
+                       if (eeprom[5] & (1<<i))
+                               printk(connectors[i]);
+               printk("\n"KERN_INFO"  Primary interface chip %s PHY #%d.\n",
+                          phys[(eeprom[6]>>8)&15], eeprom[6] & 0x1f);
+               if (eeprom[7] & 0x0700)
+                       printk(KERN_INFO "    Secondary interface chip %s.\n",
+                                  phys[(eeprom[7]>>8)&7]);
+               if (((eeprom[6]>>8) & 0x3f) == DP83840
+                       ||  ((eeprom[6]>>8) & 0x3f) == DP83840A) {
+                       int mdi_reg23 = mdio_read(ioaddr, eeprom[6] & 0x1f, 23) | 0x0422;
+                       if (congenb)
+                         mdi_reg23 |= 0x0100;
+                       printk(KERN_INFO"  DP83840 specific setup, setting register 23 to %4.4x.\n",
+                                  mdi_reg23);
+                       mdio_write(ioaddr, eeprom[6] & 0x1f, 23, mdi_reg23);
+               }
+               if ((option >= 0) && (option & 0x70)) {
+                       printk(KERN_INFO "  Forcing %dMbs %s-duplex operation.\n",
+                                  (option & 0x20 ? 100 : 10),
+                                  (option & 0x10 ? "full" : "half"));
+                       mdio_write(ioaddr, eeprom[6] & 0x1f, 0,
+                                          ((option & 0x20) ? 0x2000 : 0) |     /* 100mbps? */
+                                          ((option & 0x10) ? 0x0100 : 0)); /* Full duplex? */
+               }
+
+               /* Perform a system self-test. */
+               self_test_results = (s32*) ((((long) tx_ring_space) + 15) & ~0xf);
+               self_test_results[0] = 0;
+               self_test_results[1] = -1;
+               outl(tx_ring_dma | PortSelfTest, ioaddr + SCBPort);
+               do {
+                       udelay(10);
+               } while (self_test_results[1] == -1  &&  --boguscnt >= 0);
+
+               if (boguscnt < 0) {             /* Test optimized out. */
+                       printk(KERN_ERR "Self test failed, status %8.8x:\n"
+                                  KERN_ERR " Failure to initialize the i82557.\n"
+                                  KERN_ERR " Verify that the card is a bus-master"
+                                  " capable slot.\n",
+                                  self_test_results[1]);
+               } else
+                       printk(KERN_INFO "  General self-test: %s.\n"
+                                  KERN_INFO "  Serial sub-system self-test: %s.\n"
+                                  KERN_INFO "  Internal registers self-test: %s.\n"
+                                  KERN_INFO "  ROM checksum self-test: %s (%#8.8x).\n",
+                                  self_test_results[1] & 0x1000 ? "failed" : "passed",
+                                  self_test_results[1] & 0x0020 ? "failed" : "passed",
+                                  self_test_results[1] & 0x0008 ? "failed" : "passed",
+                                  self_test_results[1] & 0x0004 ? "failed" : "passed",
+                                  self_test_results[0]);
+       }
+#endif  /* kernel_bloat */
+
+       outl(PortReset, ioaddr + SCBPort);
+       inl(ioaddr + SCBPort);
+       udelay(10);
+
+       /* Return the chip to its original power state. */
+       SET_POWER_STATE(pdev, acpi_idle_state);
+
+       pci_set_drvdata (pdev, dev);
+
+       dev->base_addr = ioaddr;
+       dev->irq = pdev->irq;
+
+       sp = dev->priv;
+       sp->pdev = pdev;
+       sp->acpi_pwr = acpi_idle_state;
+       sp->tx_ring = tx_ring_space;
+       sp->tx_ring_dma = tx_ring_dma;
+       sp->lstats = (struct speedo_stats *)(sp->tx_ring + TX_RING_SIZE);
+       sp->lstats_dma = TX_RING_ELEM_DMA(sp, TX_RING_SIZE);
+       init_timer(&sp->timer); /* used in ioctl() */
+
+       sp->full_duplex = option >= 0 && (option & 0x10) ? 1 : 0;
+       if (card_idx >= 0) {
+               if (full_duplex[card_idx] >= 0)
+                       sp->full_duplex = full_duplex[card_idx];
+       }
+       sp->default_port = option >= 0 ? (option & 0x0f) : 0;
+
+       sp->phy[0] = eeprom[6];
+       sp->phy[1] = eeprom[7];
+       sp->rx_bug = (eeprom[3] & 0x03) == 3 ? 0 : 1;
+
+       if (sp->rx_bug)
+               printk(KERN_INFO "  Receiver lock-up workaround activated.\n");
+
+       /* The Speedo-specific entries in the device structure. */
+       dev->open = &speedo_open;
+       dev->hard_start_xmit = &speedo_start_xmit;
+       netif_set_tx_timeout(dev, &speedo_tx_timeout, TX_TIMEOUT);
+       dev->stop = &speedo_close;
+       dev->get_stats = &speedo_get_stats;
+       dev->set_multicast_list = &set_rx_mode;
+       dev->do_ioctl = &speedo_ioctl;
+
+       return 0;
+}
+\f
+/* Serial EEPROM section.
+   A "bit" grungy, but we work our way through bit-by-bit :->. */
+/*  EEPROM_Ctrl bits. */
+#define EE_SHIFT_CLK   0x01    /* EEPROM shift clock. */
+#define EE_CS                  0x02    /* EEPROM chip select. */
+#define EE_DATA_WRITE  0x04    /* EEPROM chip data in. */
+#define EE_DATA_READ   0x08    /* EEPROM chip data out. */
+#define EE_ENB                 (0x4800 | EE_CS)
+#define EE_WRITE_0             0x4802
+#define EE_WRITE_1             0x4806
+#define EE_OFFSET              SCBeeprom
+
+/* The fixes for the code were kindly provided by Dragan Stancevic
+   <visitor@valinux.com> to strictly follow Intel specifications of EEPROM
+   access timing.
+   The publicly available sheet 64486302 (sec. 3.1) specifies 1us access
+   interval for serial EEPROM.  However, it looks like that there is an
+   additional requirement dictating larger udelay's in the code below.
+   2000/05/24  SAW */
+static int do_eeprom_cmd(long ioaddr, int cmd, int cmd_len)
+{
+       unsigned retval = 0;
+       long ee_addr = ioaddr + SCBeeprom;
+
+       io_outw(EE_ENB, ee_addr); udelay(2);
+       io_outw(EE_ENB | EE_SHIFT_CLK, ee_addr); udelay(2);
+
+       /* Shift the command bits out. */
+       do {
+               short dataval = (cmd & (1 << cmd_len)) ? EE_WRITE_1 : EE_WRITE_0;
+               io_outw(dataval, ee_addr); udelay(2);
+               io_outw(dataval | EE_SHIFT_CLK, ee_addr); udelay(2);
+               retval = (retval << 1) | ((io_inw(ee_addr) & EE_DATA_READ) ? 1 : 0);
+       } while (--cmd_len >= 0);
+       io_outw(EE_ENB, ee_addr); udelay(2);
+
+       /* Terminate the EEPROM access. */
+       io_outw(EE_ENB & ~EE_CS, ee_addr);
+       return retval;
+}
+
+static int mdio_read(long ioaddr, int phy_id, int location)
+{
+       int val, boguscnt = 64*10;              /* <64 usec. to complete, typ 27 ticks */
+       outl(0x08000000 | (location<<16) | (phy_id<<21), ioaddr + SCBCtrlMDI);
+       do {
+               val = inl(ioaddr + SCBCtrlMDI);
+               if (--boguscnt < 0) {
+                       printk(KERN_ERR " mdio_read() timed out with val = %8.8x.\n", val);
+                       break;
+               }
+       } while (! (val & 0x10000000));
+       return val & 0xffff;
+}
+
+static int mdio_write(long ioaddr, int phy_id, int location, int value)
+{
+       int val, boguscnt = 64*10;              /* <64 usec. to complete, typ 27 ticks */
+       outl(0x04000000 | (location<<16) | (phy_id<<21) | value,
+                ioaddr + SCBCtrlMDI);
+       do {
+               val = inl(ioaddr + SCBCtrlMDI);
+               if (--boguscnt < 0) {
+                       printk(KERN_ERR" mdio_write() timed out with val = %8.8x.\n", val);
+                       break;
+               }
+       } while (! (val & 0x10000000));
+       return val & 0xffff;
+}
+
+\f
+static int
+speedo_open(struct net_device *dev)
+{
+       struct speedo_private *sp = (struct speedo_private *)dev->priv;
+       long ioaddr = dev->base_addr;
+       int retval;
+
+       if (speedo_debug > 1)
+               printk(KERN_DEBUG "%s: speedo_open() irq %d.\n", dev->name, dev->irq);
+
+       MOD_INC_USE_COUNT;
+
+       SET_POWER_STATE(sp->pdev, 0);
+
+       /* Set up the Tx queue early.. */
+       sp->cur_tx = 0;
+       sp->dirty_tx = 0;
+       sp->last_cmd = 0;
+       sp->tx_full = 0;
+       spin_lock_init(&sp->lock);
+       sp->in_interrupt = 0;
+
+       /* .. we can safely take handler calls during init. */
+       retval = request_irq(dev->irq, &speedo_interrupt, SA_SHIRQ, dev->name, dev);
+       if (retval) {
+               MOD_DEC_USE_COUNT;
+               return retval;
+       }
+
+       dev->if_port = sp->default_port;
+
+#ifdef oh_no_you_dont_unless_you_honour_the_options_passed_in_to_us
+       /* Retrigger negotiation to reset previous errors. */
+       if ((sp->phy[0] & 0x8000) == 0) {
+               int phy_addr = sp->phy[0] & 0x1f ;
+               /* Use 0x3300 for restarting NWay, other values to force xcvr:
+                  0x0000 10-HD
+                  0x0100 10-FD
+                  0x2000 100-HD
+                  0x2100 100-FD
+               */
+#ifdef honor_default_port
+               mdio_write(ioaddr, phy_addr, 0, mii_ctrl[dev->default_port & 7]);
+#else
+               mdio_write(ioaddr, phy_addr, 0, 0x3300);
+#endif
+       }
+#endif
+
+       speedo_init_rx_ring(dev);
+
+       /* Fire up the hardware. */
+       outw(SCBMaskAll, ioaddr + SCBCmd);
+       speedo_resume(dev);
+
+       netdevice_start(dev);
+       netif_start_queue(dev);
+
+       /* Setup the chip and configure the multicast list. */
+       sp->mc_setup_head = NULL;
+       sp->mc_setup_tail = NULL;
+       sp->flow_ctrl = sp->partner = 0;
+       sp->rx_mode = -1;                       /* Invalid -> always reset the mode. */
+       set_rx_mode(dev);
+       if ((sp->phy[0] & 0x8000) == 0)
+               sp->advertising = mdio_read(ioaddr, sp->phy[0] & 0x1f, 4);
+
+       if (speedo_debug > 2) {
+               printk(KERN_DEBUG "%s: Done speedo_open(), status %8.8x.\n",
+                          dev->name, inw(ioaddr + SCBStatus));
+       }
+
+       /* Set the timer.  The timer serves a dual purpose:
+          1) to monitor the media interface (e.g. link beat) and perhaps switch
+          to an alternate media type
+          2) to monitor Rx activity, and restart the Rx process if the receiver
+          hangs. */
+       sp->timer.expires = RUN_AT((24*HZ)/10);                         /* 2.4 sec. */
+       sp->timer.data = (unsigned long)dev;
+       sp->timer.function = &speedo_timer;                                     /* timer handler */
+       add_timer(&sp->timer);
+
+       /* No need to wait for the command unit to accept here. */
+       if ((sp->phy[0] & 0x8000) == 0)
+               mdio_read(ioaddr, sp->phy[0] & 0x1f, 0);
+
+       return 0;
+}
+
+/* Start the chip hardware after a full reset. */
+static void speedo_resume(struct net_device *dev)
+{
+       struct speedo_private *sp = (struct speedo_private *)dev->priv;
+       long ioaddr = dev->base_addr;
+
+       /* Start with a Tx threshold of 256 (0x..20.... 8 byte units). */
+       sp->tx_threshold = 0x01208000;
+
+       /* Set the segment registers to '0'. */
+       wait_for_cmd_done(ioaddr + SCBCmd);
+       outl(0, ioaddr + SCBPointer);
+       /* impose a delay to avoid a bug */
+       inl(ioaddr + SCBPointer);
+       udelay(10);
+       outb(RxAddrLoad, ioaddr + SCBCmd);
+       wait_for_cmd_done(ioaddr + SCBCmd);
+       outb(CUCmdBase, ioaddr + SCBCmd);
+
+       /* Load the statistics block and rx ring addresses. */
+       wait_for_cmd_done(ioaddr + SCBCmd);
+       outl(sp->lstats_dma, ioaddr + SCBPointer);
+       outb(CUStatsAddr, ioaddr + SCBCmd);
+       sp->lstats->done_marker = 0;
+
+       if (sp->rx_ringp[sp->cur_rx % RX_RING_SIZE] == NULL) {
+               if (speedo_debug > 2)
+                       printk(KERN_DEBUG "%s: NULL cur_rx in speedo_resume().\n",
+                                       dev->name);
+       } else {
+               wait_for_cmd_done(ioaddr + SCBCmd);
+               outl(sp->rx_ring_dma[sp->cur_rx % RX_RING_SIZE],
+                        ioaddr + SCBPointer);
+               outb(RxStart, ioaddr + SCBCmd);
+       }
+
+       wait_for_cmd_done(ioaddr + SCBCmd);
+       outb(CUDumpStats, ioaddr + SCBCmd);
+       udelay(30);
+
+       /* Fill the first command with our physical address. */
+       {
+               struct descriptor *ias_cmd;
+
+               ias_cmd =
+                       (struct descriptor *)&sp->tx_ring[sp->cur_tx++ % TX_RING_SIZE];
+               /* Avoid a bug(?!) here by marking the command already completed. */
+               ias_cmd->cmd_status = cpu_to_le32((CmdSuspend | CmdIASetup) | 0xa000);
+               ias_cmd->link =
+                       cpu_to_le32(TX_RING_ELEM_DMA(sp, sp->cur_tx % TX_RING_SIZE));
+               memcpy(ias_cmd->params, dev->dev_addr, 6);
+               sp->last_cmd = ias_cmd;
+       }
+
+       /* Start the chip's Tx process and unmask interrupts. */
+       wait_for_cmd_done(ioaddr + SCBCmd);
+       outl(TX_RING_ELEM_DMA(sp, sp->dirty_tx % TX_RING_SIZE),
+                ioaddr + SCBPointer);
+       /* We are not ACK-ing FCP and ER in the interrupt handler yet so they should
+          remain masked --Dragan */
+       outw(CUStart | SCBMaskEarlyRx | SCBMaskFlowCtl, ioaddr + SCBCmd);
+}
+
+/* Media monitoring and control. */
+static void speedo_timer(unsigned long data)
+{
+       struct net_device *dev = (struct net_device *)data;
+       struct speedo_private *sp = (struct speedo_private *)dev->priv;
+       long ioaddr = dev->base_addr;
+       int phy_num = sp->phy[0] & 0x1f;
+
+       /* We have MII and lost link beat. */
+       if ((sp->phy[0] & 0x8000) == 0) {
+               int partner = mdio_read(ioaddr, phy_num, 5);
+               if (partner != sp->partner) {
+                       int flow_ctrl = sp->advertising & partner & 0x0400 ? 1 : 0;
+                       if (speedo_debug > 2) {
+                               printk(KERN_DEBUG "%s: Link status change.\n", dev->name);
+                               printk(KERN_DEBUG "%s: Old partner %x, new %x, adv %x.\n",
+                                          dev->name, sp->partner, partner, sp->advertising);
+                       }
+                       sp->partner = partner;
+                       if (flow_ctrl != sp->flow_ctrl) {
+                               sp->flow_ctrl = flow_ctrl;
+                               sp->rx_mode = -1;       /* Trigger a reload. */
+                       }
+                       /* Clear sticky bit. */
+                       mdio_read(ioaddr, phy_num, 1);
+                       /* If link beat has returned... */
+                       if (mdio_read(ioaddr, phy_num, 1) & 0x0004)
+                               dev->flags |= IFF_RUNNING;
+                       else
+                               dev->flags &= ~IFF_RUNNING;
+               }
+       }
+       if (speedo_debug > 3) {
+               printk(KERN_DEBUG "%s: Media control tick, status %4.4x.\n",
+                          dev->name, inw(ioaddr + SCBStatus));
+       }
+       if (sp->rx_mode < 0  ||
+               (sp->rx_bug  && jiffies - sp->last_rx_time > 2*HZ)) {
+               /* We haven't received a packet in a Long Time.  We might have been
+                  bitten by the receiver hang bug.  This can be cleared by sending
+                  a set multicast list command. */
+               if (speedo_debug > 3)
+                       printk(KERN_DEBUG "%s: Sending a multicast list set command"
+                                  " from a timer routine,"
+                                  " m=%d, j=%ld, l=%ld.\n",
+                                  dev->name, sp->rx_mode, jiffies, sp->last_rx_time);
+               set_rx_mode(dev);
+       }
+       /* We must continue to monitor the media. */
+       sp->timer.expires = RUN_AT(2*HZ);                       /* 2.0 sec. */
+       add_timer(&sp->timer);
+#if defined(timer_exit)
+       timer_exit(&sp->timer);
+#endif
+}
+
+static void speedo_show_state(struct net_device *dev)
+{
+       struct speedo_private *sp = (struct speedo_private *)dev->priv;
+       int i;
+
+       /* Print a few items for debugging. */
+       if (speedo_debug > 0) {
+               int i;
+               printk(KERN_DEBUG "%s: Tx ring dump,  Tx queue %u / %u:\n", dev->name,
+                          sp->cur_tx, sp->dirty_tx);
+               for (i = 0; i < TX_RING_SIZE; i++)
+                       printk(KERN_DEBUG "%s:  %c%c%2d %8.8x.\n", dev->name,
+                                  i == sp->dirty_tx % TX_RING_SIZE ? '*' : ' ',
+                                  i == sp->cur_tx % TX_RING_SIZE ? '=' : ' ',
+                                  i, sp->tx_ring[i].status);
+       }
+       printk(KERN_DEBUG "%s: Printing Rx ring"
+                  " (next to receive into %u, dirty index %u).\n",
+                  dev->name, sp->cur_rx, sp->dirty_rx);
+
+       for (i = 0; i < RX_RING_SIZE; i++)
+               printk(KERN_DEBUG "%s: %c%c%c%2d %8.8x.\n", dev->name,
+                          sp->rx_ringp[i] == sp->last_rxf ? 'l' : ' ',
+                          i == sp->dirty_rx % RX_RING_SIZE ? '*' : ' ',
+                          i == sp->cur_rx % RX_RING_SIZE ? '=' : ' ',
+                          i, (sp->rx_ringp[i] != NULL) ?
+                                          (unsigned)sp->rx_ringp[i]->status : 0);
+
+#if 0
+       {
+               long ioaddr = dev->base_addr;
+               int phy_num = sp->phy[0] & 0x1f;
+               for (i = 0; i < 16; i++) {
+                       /* FIXME: what does it mean?  --SAW */
+                       if (i == 6) i = 21;
+                       printk(KERN_DEBUG "%s:  PHY index %d register %d is %4.4x.\n",
+                                  dev->name, phy_num, i, mdio_read(ioaddr, phy_num, i));
+               }
+       }
+#endif
+
+}
+
+/* Initialize the Rx and Tx rings, along with various 'dev' bits. */
+static void
+speedo_init_rx_ring(struct net_device *dev)
+{
+       struct speedo_private *sp = (struct speedo_private *)dev->priv;
+       struct RxFD *rxf, *last_rxf = NULL;
+       dma_addr_t last_rxf_dma = 0 /* to shut up the compiler */;
+       int i;
+
+       sp->cur_rx = 0;
+
+       for (i = 0; i < RX_RING_SIZE; i++) {
+               struct sk_buff *skb;
+               skb = dev_alloc_skb(PKT_BUF_SZ + sizeof(struct RxFD));
+               sp->rx_skbuff[i] = skb;
+               if (skb == NULL)
+                       break;                  /* OK.  Just initially short of Rx bufs. */
+               skb->dev = dev;                 /* Mark as being used by this device. */
+               rxf = (struct RxFD *)skb->tail;
+               sp->rx_ringp[i] = rxf;
+               sp->rx_ring_dma[i] =
+                       pci_map_single(sp->pdev, rxf,
+                                       PKT_BUF_SZ + sizeof(struct RxFD), PCI_DMA_BIDIRECTIONAL);
+               skb_reserve(skb, sizeof(struct RxFD));
+               if (last_rxf) {
+                       last_rxf->link = cpu_to_le32(sp->rx_ring_dma[i]);
+                       pci_dma_sync_single(sp->pdev, last_rxf_dma,
+                                       sizeof(struct RxFD), PCI_DMA_TODEVICE);
+               }
+               last_rxf = rxf;
+               last_rxf_dma = sp->rx_ring_dma[i];
+               rxf->status = cpu_to_le32(0x00000001);  /* '1' is flag value only. */
+               rxf->link = 0;                                          /* None yet. */
+               /* This field unused by i82557. */
+               rxf->rx_buf_addr = 0xffffffff;
+               rxf->count = cpu_to_le32(PKT_BUF_SZ << 16);
+               pci_dma_sync_single(sp->pdev, sp->rx_ring_dma[i],
+                               sizeof(struct RxFD), PCI_DMA_TODEVICE);
+       }
+       sp->dirty_rx = (unsigned int)(i - RX_RING_SIZE);
+       /* Mark the last entry as end-of-list. */
+       last_rxf->status = cpu_to_le32(0xC0000002);     /* '2' is flag value only. */
+       pci_dma_sync_single(sp->pdev, sp->rx_ring_dma[RX_RING_SIZE-1],
+                       sizeof(struct RxFD), PCI_DMA_TODEVICE);
+       sp->last_rxf = last_rxf;
+       sp->last_rxf_dma = last_rxf_dma;
+}
+
+static void speedo_purge_tx(struct net_device *dev)
+{
+       struct speedo_private *sp = (struct speedo_private *)dev->priv;
+       int entry;
+
+       while ((int)(sp->cur_tx - sp->dirty_tx) > 0) {
+               entry = sp->dirty_tx % TX_RING_SIZE;
+               if (sp->tx_skbuff[entry]) {
+                       sp->stats.tx_errors++;
+                       pci_unmap_single(sp->pdev,
+                                       le32_to_cpu(sp->tx_ring[entry].tx_buf_addr0),
+                                       sp->tx_skbuff[entry]->len, PCI_DMA_TODEVICE);
+                       dev_kfree_skb_irq(sp->tx_skbuff[entry]);
+                       sp->tx_skbuff[entry] = 0;
+               }
+               sp->dirty_tx++;
+       }
+       while (sp->mc_setup_head != NULL) {
+               struct speedo_mc_block *t;
+               if (speedo_debug > 1)
+                       printk(KERN_DEBUG "%s: freeing mc frame.\n", dev->name);
+               pci_unmap_single(sp->pdev, sp->mc_setup_head->frame_dma,
+                               sp->mc_setup_head->len, PCI_DMA_TODEVICE);
+               t = sp->mc_setup_head->next;
+               kfree(sp->mc_setup_head);
+               sp->mc_setup_head = t;
+       }
+       sp->mc_setup_tail = NULL;
+       sp->tx_full = 0;
+       netif_wake_queue(dev);
+}
+
+static void reset_mii(struct net_device *dev)
+{
+       struct speedo_private *sp = (struct speedo_private *)dev->priv;
+       long ioaddr = dev->base_addr;
+       /* Reset the MII transceiver, suggested by Fred Young @ scalable.com. */
+       if ((sp->phy[0] & 0x8000) == 0) {
+               int phy_addr = sp->phy[0] & 0x1f;
+               int advertising = mdio_read(ioaddr, phy_addr, 4);
+               int mii_bmcr = mdio_read(ioaddr, phy_addr, 0);
+               mdio_write(ioaddr, phy_addr, 0, 0x0400);
+               mdio_write(ioaddr, phy_addr, 1, 0x0000);
+               mdio_write(ioaddr, phy_addr, 4, 0x0000);
+               mdio_write(ioaddr, phy_addr, 0, 0x8000);
+#ifdef honor_default_port
+               mdio_write(ioaddr, phy_addr, 0, mii_ctrl[dev->default_port & 7]);
+#else
+               mdio_read(ioaddr, phy_addr, 0);
+               mdio_write(ioaddr, phy_addr, 0, mii_bmcr);
+               mdio_write(ioaddr, phy_addr, 4, advertising);
+#endif
+       }
+}
+
+static void speedo_tx_timeout(struct net_device *dev)
+{
+       struct speedo_private *sp = (struct speedo_private *)dev->priv;
+       long ioaddr = dev->base_addr;
+       int status = inw(ioaddr + SCBStatus);
+       unsigned long flags;
+
+       printk(KERN_WARNING "%s: Transmit timed out: status %4.4x "
+                  " %4.4x at %d/%d command %8.8x.\n",
+                  dev->name, status, inw(ioaddr + SCBCmd),
+                  sp->dirty_tx, sp->cur_tx,
+                  sp->tx_ring[sp->dirty_tx % TX_RING_SIZE].status);
+
+       speedo_show_state(dev);
+#if 0
+       if ((status & 0x00C0) != 0x0080
+               &&  (status & 0x003C) == 0x0010) {
+               /* Only the command unit has stopped. */
+               printk(KERN_WARNING "%s: Trying to restart the transmitter...\n",
+                          dev->name);
+               outl(TX_RING_ELEM_DMA(sp, dirty_tx % TX_RING_SIZE]),
+                        ioaddr + SCBPointer);
+               outw(CUStart, ioaddr + SCBCmd);
+               reset_mii(dev);
+       } else {
+#else
+       {
+#endif
+               del_timer_sync(&sp->timer);
+               /* Reset the Tx and Rx units. */
+               outl(PortReset, ioaddr + SCBPort);
+               /* We may get spurious interrupts here.  But I don't think that they
+                  may do much harm.  1999/12/09 SAW */
+               udelay(10);
+               /* Disable interrupts. */
+               outw(SCBMaskAll, ioaddr + SCBCmd);
+               synchronize_irq();
+               speedo_tx_buffer_gc(dev);
+               /* Free as much as possible.
+                  It helps to recover from a hang because of out-of-memory.
+                  It also simplifies speedo_resume() in case TX ring is full or
+                  close-to-be full. */
+               speedo_purge_tx(dev);
+               speedo_refill_rx_buffers(dev, 1);
+               spin_lock_irqsave(&sp->lock, flags);
+               speedo_resume(dev);
+               sp->rx_mode = -1;
+               dev->trans_start = jiffies;
+               spin_unlock_irqrestore(&sp->lock, flags);
+               set_rx_mode(dev); /* it takes the spinlock itself --SAW */
+               /* Reset MII transceiver.  Do it before starting the timer to serialize
+                  mdio_xxx operations.  Yes, it's a paranoya :-)  2000/05/09 SAW */
+               reset_mii(dev);
+               sp->timer.expires = RUN_AT(2*HZ);
+               add_timer(&sp->timer);
+       }
+       return;
+}
+
+static int
+speedo_start_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+       struct speedo_private *sp = (struct speedo_private *)dev->priv;
+       long ioaddr = dev->base_addr;
+       int entry;
+
+       /* Prevent interrupts from changing the Tx ring from underneath us. */
+       unsigned long flags;
+
+       spin_lock_irqsave(&sp->lock, flags);
+
+       /* Check if there are enough space. */
+       if ((int)(sp->cur_tx - sp->dirty_tx) >= TX_QUEUE_LIMIT) {
+               printk(KERN_ERR "%s: incorrect tbusy state, fixed.\n", dev->name);
+               netif_stop_queue(dev);
+               sp->tx_full = 1;
+               spin_unlock_irqrestore(&sp->lock, flags);
+               return 1;
+       }
+
+       /* Calculate the Tx descriptor entry. */
+       entry = sp->cur_tx++ % TX_RING_SIZE;
+
+       sp->tx_skbuff[entry] = skb;
+       sp->tx_ring[entry].status =
+               cpu_to_le32(CmdSuspend | CmdTx | CmdTxFlex);
+       if (!(entry & ((TX_RING_SIZE>>2)-1)))
+               sp->tx_ring[entry].status |= cpu_to_le32(CmdIntr);
+       sp->tx_ring[entry].link =
+               cpu_to_le32(TX_RING_ELEM_DMA(sp, sp->cur_tx % TX_RING_SIZE));
+       sp->tx_ring[entry].tx_desc_addr =
+               cpu_to_le32(TX_RING_ELEM_DMA(sp, entry) + TX_DESCR_BUF_OFFSET);
+       /* The data region is always in one buffer descriptor. */
+       sp->tx_ring[entry].count = cpu_to_le32(sp->tx_threshold);
+       sp->tx_ring[entry].tx_buf_addr0 =
+               cpu_to_le32(pci_map_single(sp->pdev, skb->data,
+                                          skb->len, PCI_DMA_TODEVICE));
+       sp->tx_ring[entry].tx_buf_size0 = cpu_to_le32(skb->len);
+
+       /* workaround for hardware bug on 10 mbit half duplex */
+
+       if ((sp->partner==0) && (sp->chip_id==1)) {
+               wait_for_cmd_done(ioaddr + SCBCmd);
+               outb(0 , ioaddr + SCBCmd);
+       }
+
+       /* Trigger the command unit resume. */
+       wait_for_cmd_done(ioaddr + SCBCmd);
+       clear_suspend(sp->last_cmd);
+       /* We want the time window between clearing suspend flag on the previous
+          command and resuming CU to be as small as possible.
+          Interrupts in between are very undesired.  --SAW */
+       outb(CUResume, ioaddr + SCBCmd);
+       sp->last_cmd = (struct descriptor *)&sp->tx_ring[entry];
+
+       /* Leave room for set_rx_mode(). If there is no more space than reserved
+          for multicast filter mark the ring as full. */
+       if ((int)(sp->cur_tx - sp->dirty_tx) >= TX_QUEUE_LIMIT) {
+               netif_stop_queue(dev);
+               sp->tx_full = 1;
+       }
+
+       spin_unlock_irqrestore(&sp->lock, flags);
+
+       dev->trans_start = jiffies;
+
+       return 0;
+}
+
+static void speedo_tx_buffer_gc(struct net_device *dev)
+{
+       unsigned int dirty_tx;
+       struct speedo_private *sp = (struct speedo_private *)dev->priv;
+
+       dirty_tx = sp->dirty_tx;
+       while ((int)(sp->cur_tx - dirty_tx) > 0) {
+               int entry = dirty_tx % TX_RING_SIZE;
+               int status = le32_to_cpu(sp->tx_ring[entry].status);
+
+               if (speedo_debug > 5)
+                       printk(KERN_DEBUG " scavenge candidate %d status %4.4x.\n",
+                                  entry, status);
+               if ((status & StatusComplete) == 0)
+                       break;                  /* It still hasn't been processed. */
+               if (status & TxUnderrun)
+                       if (sp->tx_threshold < 0x01e08000) {
+                               if (speedo_debug > 2)
+                                       printk(KERN_DEBUG "%s: TX underrun, threshold adjusted.\n",
+                                                  dev->name);
+                               sp->tx_threshold += 0x00040000;
+                       }
+               /* Free the original skb. */
+               if (sp->tx_skbuff[entry]) {
+                       sp->stats.tx_packets++; /* Count only user packets. */
+                       sp->stats.tx_bytes += sp->tx_skbuff[entry]->len;
+                       pci_unmap_single(sp->pdev,
+                                       le32_to_cpu(sp->tx_ring[entry].tx_buf_addr0),
+                                       sp->tx_skbuff[entry]->len, PCI_DMA_TODEVICE);
+                       dev_kfree_skb_irq(sp->tx_skbuff[entry]);
+                       sp->tx_skbuff[entry] = 0;
+               }
+               dirty_tx++;
+       }
+
+       if (speedo_debug && (int)(sp->cur_tx - dirty_tx) > TX_RING_SIZE) {
+               printk(KERN_ERR "out-of-sync dirty pointer, %d vs. %d,"
+                          " full=%d.\n",
+                          dirty_tx, sp->cur_tx, sp->tx_full);
+               dirty_tx += TX_RING_SIZE;
+       }
+
+       while (sp->mc_setup_head != NULL
+                  && (int)(dirty_tx - sp->mc_setup_head->tx - 1) > 0) {
+               struct speedo_mc_block *t;
+               if (speedo_debug > 1)
+                       printk(KERN_DEBUG "%s: freeing mc frame.\n", dev->name);
+               pci_unmap_single(sp->pdev, sp->mc_setup_head->frame_dma,
+                               sp->mc_setup_head->len, PCI_DMA_TODEVICE);
+               t = sp->mc_setup_head->next;
+               kfree(sp->mc_setup_head);
+               sp->mc_setup_head = t;
+       }
+       if (sp->mc_setup_head == NULL)
+               sp->mc_setup_tail = NULL;
+
+       sp->dirty_tx = dirty_tx;
+}
+
+/* The interrupt handler does all of the Rx thread work and cleans up
+   after the Tx thread. */
+static void speedo_interrupt(int irq, void *dev_instance, struct pt_regs *regs)
+{
+       struct net_device *dev = (struct net_device *)dev_instance;
+       struct speedo_private *sp;
+       long ioaddr, boguscnt = max_interrupt_work;
+       unsigned short status;
+
+#ifndef final_version
+       if (dev == NULL) {
+               printk(KERN_ERR "speedo_interrupt(): irq %d for unknown device.\n", irq);
+               return;
+       }
+#endif
+
+       ioaddr = dev->base_addr;
+       sp = (struct speedo_private *)dev->priv;
+
+#ifndef final_version
+       /* A lock to prevent simultaneous entry on SMP machines. */
+       if (test_and_set_bit(0, (void*)&sp->in_interrupt)) {
+               printk(KERN_ERR"%s: SMP simultaneous entry of an interrupt handler.\n",
+                          dev->name);
+               sp->in_interrupt = 0;   /* Avoid halting machine. */
+               return;
+       }
+#endif
+
+       do {
+               status = inw(ioaddr + SCBStatus);
+               /* Acknowledge all of the current interrupt sources ASAP. */
+               /* Will change from 0xfc00 to 0xff00 when we start handling
+                  FCP and ER interrupts --Dragan */
+               outw(status & 0xfc00, ioaddr + SCBStatus);
+
+               if (speedo_debug > 4)
+                       printk(KERN_DEBUG "%s: interrupt  status=%#4.4x.\n",
+                                  dev->name, status);
+
+               if ((status & 0xfc00) == 0)
+                       break;
+
+               /* Always check if all rx buffers are allocated.  --SAW */
+               speedo_refill_rx_buffers(dev, 0);
+
+               if ((status & 0x5000) ||        /* Packet received, or Rx error. */
+                       (sp->rx_ring_state&(RrNoMem|RrPostponed)) == RrPostponed)
+                                                                       /* Need to gather the postponed packet. */
+                       speedo_rx(dev);
+
+               if (status & 0x1000) {
+                       spin_lock(&sp->lock);
+                       if ((status & 0x003c) == 0x0028) {              /* No more Rx buffers. */
+                               struct RxFD *rxf;
+                               printk(KERN_WARNING "%s: card reports no RX buffers.\n",
+                                               dev->name);
+                               rxf = sp->rx_ringp[sp->cur_rx % RX_RING_SIZE];
+                               if (rxf == NULL) {
+                                       if (speedo_debug > 2)
+                                               printk(KERN_DEBUG
+                                                               "%s: NULL cur_rx in speedo_interrupt().\n",
+                                                               dev->name);
+                                       sp->rx_ring_state |= RrNoMem|RrNoResources;
+                               } else if (rxf == sp->last_rxf) {
+                                       if (speedo_debug > 2)
+                                               printk(KERN_DEBUG
+                                                               "%s: cur_rx is last in speedo_interrupt().\n",
+                                                               dev->name);
+                                       sp->rx_ring_state |= RrNoMem|RrNoResources;
+                               } else
+                                       outb(RxResumeNoResources, ioaddr + SCBCmd);
+                       } else if ((status & 0x003c) == 0x0008) { /* No resources. */
+                               struct RxFD *rxf;
+                               printk(KERN_WARNING "%s: card reports no resources.\n",
+                                               dev->name);
+                               rxf = sp->rx_ringp[sp->cur_rx % RX_RING_SIZE];
+                               if (rxf == NULL) {
+                                       if (speedo_debug > 2)
+                                               printk(KERN_DEBUG
+                                                               "%s: NULL cur_rx in speedo_interrupt().\n",
+                                                               dev->name);
+                                       sp->rx_ring_state |= RrNoMem|RrNoResources;
+                               } else if (rxf == sp->last_rxf) {
+                                       if (speedo_debug > 2)
+                                               printk(KERN_DEBUG
+                                                               "%s: cur_rx is last in speedo_interrupt().\n",
+                                                               dev->name);
+                                       sp->rx_ring_state |= RrNoMem|RrNoResources;
+                               } else {
+                                       /* Restart the receiver. */
+                                       outl(sp->rx_ring_dma[sp->cur_rx % RX_RING_SIZE],
+                                                ioaddr + SCBPointer);
+                                       outb(RxStart, ioaddr + SCBCmd);
+                               }
+                       }
+                       sp->stats.rx_errors++;
+                       spin_unlock(&sp->lock);
+               }
+
+               if ((sp->rx_ring_state&(RrNoMem|RrNoResources)) == RrNoResources) {
+                       printk(KERN_WARNING
+                                       "%s: restart the receiver after a possible hang.\n",
+                                       dev->name);
+                       spin_lock(&sp->lock);
+                       /* Restart the receiver.
+                          I'm not sure if it's always right to restart the receiver
+                          here but I don't know another way to prevent receiver hangs.
+                          1999/12/25 SAW */
+                       outl(sp->rx_ring_dma[sp->cur_rx % RX_RING_SIZE],
+                                ioaddr + SCBPointer);
+                       outb(RxStart, ioaddr + SCBCmd);
+                       sp->rx_ring_state &= ~RrNoResources;
+                       spin_unlock(&sp->lock);
+               }
+
+               /* User interrupt, Command/Tx unit interrupt or CU not active. */
+               if (status & 0xA400) {
+                       spin_lock(&sp->lock);
+                       speedo_tx_buffer_gc(dev);
+                       if (sp->tx_full
+                               && (int)(sp->cur_tx - sp->dirty_tx) < TX_QUEUE_UNFULL) {
+                               /* The ring is no longer full. */
+                               sp->tx_full = 0;
+                               netif_wake_queue(dev); /* Attention: under a spinlock.  --SAW */
+                       }
+                       spin_unlock(&sp->lock);
+               }
+
+               if (--boguscnt < 0) {
+                       printk(KERN_ERR "%s: Too much work at interrupt, status=0x%4.4x.\n",
+                                  dev->name, status);
+                       /* Clear all interrupt sources. */
+                       /* Will change from 0xfc00 to 0xff00 when we start handling
+                          FCP and ER interrupts --Dragan */
+                       outw(0xfc00, ioaddr + SCBStatus);
+                       break;
+               }
+       } while (1);
+
+       if (speedo_debug > 3)
+               printk(KERN_DEBUG "%s: exiting interrupt, status=%#4.4x.\n",
+                          dev->name, inw(ioaddr + SCBStatus));
+
+       clear_bit(0, (void*)&sp->in_interrupt);
+       return;
+}
+
+static inline struct RxFD *speedo_rx_alloc(struct net_device *dev, int entry)
+{
+       struct speedo_private *sp = (struct speedo_private *)dev->priv;
+       struct RxFD *rxf;
+       struct sk_buff *skb;
+       /* Get a fresh skbuff to replace the consumed one. */
+       skb = dev_alloc_skb(PKT_BUF_SZ + sizeof(struct RxFD));
+       sp->rx_skbuff[entry] = skb;
+       if (skb == NULL) {
+               sp->rx_ringp[entry] = NULL;
+               return NULL;
+       }
+       rxf = sp->rx_ringp[entry] = (struct RxFD *)skb->tail;
+       sp->rx_ring_dma[entry] =
+               pci_map_single(sp->pdev, rxf,
+                                          PKT_BUF_SZ + sizeof(struct RxFD), PCI_DMA_FROMDEVICE);
+       skb->dev = dev;
+       skb_reserve(skb, sizeof(struct RxFD));
+       rxf->rx_buf_addr = 0xffffffff;
+       pci_dma_sync_single(sp->pdev, sp->rx_ring_dma[entry],
+                       sizeof(struct RxFD), PCI_DMA_TODEVICE);
+       return rxf;
+}
+
+static inline void speedo_rx_link(struct net_device *dev, int entry,
+                                                                 struct RxFD *rxf, dma_addr_t rxf_dma)
+{
+       struct speedo_private *sp = (struct speedo_private *)dev->priv;
+       rxf->status = cpu_to_le32(0xC0000001);  /* '1' for driver use only. */
+       rxf->link = 0;                  /* None yet. */
+       rxf->count = cpu_to_le32(PKT_BUF_SZ << 16);
+       sp->last_rxf->link = cpu_to_le32(rxf_dma);
+       sp->last_rxf->status &= cpu_to_le32(~0xC0000000);
+       pci_dma_sync_single(sp->pdev, sp->last_rxf_dma,
+                       sizeof(struct RxFD), PCI_DMA_TODEVICE);
+       sp->last_rxf = rxf;
+       sp->last_rxf_dma = rxf_dma;
+}
+
+static int speedo_refill_rx_buf(struct net_device *dev, int force)
+{
+       struct speedo_private *sp = (struct speedo_private *)dev->priv;
+       int entry;
+       struct RxFD *rxf;
+
+       entry = sp->dirty_rx % RX_RING_SIZE;
+       if (sp->rx_skbuff[entry] == NULL) {
+               rxf = speedo_rx_alloc(dev, entry);
+               if (rxf == NULL) {
+                       unsigned int forw;
+                       int forw_entry;
+                       if (speedo_debug > 2 || !(sp->rx_ring_state & RrOOMReported)) {
+                               printk(KERN_WARNING "%s: can't fill rx buffer (force %d)!\n",
+                                               dev->name, force);
+                               speedo_show_state(dev);
+                               sp->rx_ring_state |= RrOOMReported;
+                       }
+                       if (!force)
+                               return -1;      /* Better luck next time!  */
+                       /* Borrow an skb from one of next entries. */
+                       for (forw = sp->dirty_rx + 1; forw != sp->cur_rx; forw++)
+                               if (sp->rx_skbuff[forw % RX_RING_SIZE] != NULL)
+                                       break;
+                       if (forw == sp->cur_rx)
+                               return -1;
+                       forw_entry = forw % RX_RING_SIZE;
+                       sp->rx_skbuff[entry] = sp->rx_skbuff[forw_entry];
+                       sp->rx_skbuff[forw_entry] = NULL;
+                       rxf = sp->rx_ringp[forw_entry];
+                       sp->rx_ringp[forw_entry] = NULL;
+                       sp->rx_ringp[entry] = rxf;
+               }
+       } else {
+               rxf = sp->rx_ringp[entry];
+       }
+       speedo_rx_link(dev, entry, rxf, sp->rx_ring_dma[entry]);
+       sp->dirty_rx++;
+       sp->rx_ring_state &= ~(RrNoMem|RrOOMReported); /* Mark the progress. */
+       return 0;
+}
+
+static void speedo_refill_rx_buffers(struct net_device *dev, int force)
+{
+       struct speedo_private *sp = (struct speedo_private *)dev->priv;
+
+       /* Refill the RX ring. */
+       while ((int)(sp->cur_rx - sp->dirty_rx) > 0 &&
+                       speedo_refill_rx_buf(dev, force) != -1);
+}
+
+static int
+speedo_rx(struct net_device *dev)
+{
+       struct speedo_private *sp = (struct speedo_private *)dev->priv;
+       int entry = sp->cur_rx % RX_RING_SIZE;
+       int rx_work_limit = sp->dirty_rx + RX_RING_SIZE - sp->cur_rx;
+       int alloc_ok = 1;
+
+       if (speedo_debug > 4)
+               printk(KERN_DEBUG " In speedo_rx().\n");
+       /* If we own the next entry, it's a new packet. Send it up. */
+       while (sp->rx_ringp[entry] != NULL) {
+               int status;
+               int pkt_len;
+
+               pci_dma_sync_single(sp->pdev, sp->rx_ring_dma[entry],
+                       sizeof(struct RxFD), PCI_DMA_FROMDEVICE);
+               status = le32_to_cpu(sp->rx_ringp[entry]->status);
+               pkt_len = le32_to_cpu(sp->rx_ringp[entry]->count) & 0x3fff;
+
+               if (!(status & RxComplete))
+                       break;
+
+               if (--rx_work_limit < 0)
+                       break;
+
+               /* Check for a rare out-of-memory case: the current buffer is
+                  the last buffer allocated in the RX ring.  --SAW */
+               if (sp->last_rxf == sp->rx_ringp[entry]) {
+                       /* Postpone the packet.  It'll be reaped at an interrupt when this
+                          packet is no longer the last packet in the ring. */
+                       if (speedo_debug > 2)
+                               printk(KERN_DEBUG "%s: RX packet postponed!\n",
+                                          dev->name);
+                       sp->rx_ring_state |= RrPostponed;
+                       break;
+               }
+
+               if (speedo_debug > 4)
+                       printk(KERN_DEBUG "  speedo_rx() status %8.8x len %d.\n", status,
+                                  pkt_len);
+               if ((status & (RxErrTooBig|RxOK|0x0f90)) != RxOK) {
+                       if (status & RxErrTooBig)
+                               printk(KERN_ERR "%s: Ethernet frame overran the Rx buffer, "
+                                          "status %8.8x!\n", dev->name, status);
+                       else if (! (status & RxOK)) {
+                               /* There was a fatal error.  This *should* be impossible. */
+                               sp->stats.rx_errors++;
+                               printk(KERN_ERR "%s: Anomalous event in speedo_rx(), "
+                                          "status %8.8x.\n",
+                                          dev->name, status);
+                       }
+               } else {
+                       struct sk_buff *skb;
+
+                       /* Check if the packet is long enough to just accept without
+                          copying to a properly sized skbuff. */
+                       if (pkt_len < rx_copybreak
+                               && (skb = dev_alloc_skb(pkt_len + 2)) != 0) {
+                               skb->dev = dev;
+                               skb_reserve(skb, 2);    /* Align IP on 16 byte boundaries */
+                               /* 'skb_put()' points to the start of sk_buff data area. */
+                               pci_dma_sync_single(sp->pdev, sp->rx_ring_dma[entry],
+                                       sizeof(struct RxFD) + pkt_len, PCI_DMA_FROMDEVICE);
+
+#if 1 || USE_IP_CSUM
+                               /* Packet is in one chunk -- we can copy + cksum. */
+                               eth_copy_and_sum(skb, sp->rx_skbuff[entry]->tail, pkt_len, 0);
+                               skb_put(skb, pkt_len);
+#else
+                               memcpy(skb_put(skb, pkt_len), sp->rx_skbuff[entry]->tail,
+                                          pkt_len);
+#endif
+                       } else {
+                               /* Pass up the already-filled skbuff. */
+                               skb = sp->rx_skbuff[entry];
+                               if (skb == NULL) {
+                                       printk(KERN_ERR "%s: Inconsistent Rx descriptor chain.\n",
+                                                  dev->name);
+                                       break;
+                               }
+                               sp->rx_skbuff[entry] = NULL;
+                               skb_put(skb, pkt_len);
+                               sp->rx_ringp[entry] = NULL;
+                               pci_unmap_single(sp->pdev, sp->rx_ring_dma[entry],
+                                               PKT_BUF_SZ + sizeof(struct RxFD), PCI_DMA_FROMDEVICE);
+                       }
+                       skb->protocol = eth_type_trans(skb, dev);
+                       netif_rx(skb);
+                       sp->stats.rx_packets++;
+                       sp->stats.rx_bytes += pkt_len;
+               }
+               entry = (++sp->cur_rx) % RX_RING_SIZE;
+               sp->rx_ring_state &= ~RrPostponed;
+               /* Refill the recently taken buffers.
+                  Do it one-by-one to handle traffic bursts better. */
+               if (alloc_ok && speedo_refill_rx_buf(dev, 0) == -1)
+                       alloc_ok = 0;
+       }
+
+       /* Try hard to refill the recently taken buffers. */
+       speedo_refill_rx_buffers(dev, 1);
+
+       sp->last_rx_time = jiffies;
+
+       return 0;
+}
+
+static int
+speedo_close(struct net_device *dev)
+{
+       long ioaddr = dev->base_addr;
+       struct speedo_private *sp = (struct speedo_private *)dev->priv;
+       int i;
+
+       netdevice_stop(dev);
+       netif_stop_queue(dev);
+
+       if (speedo_debug > 1)
+               printk(KERN_DEBUG "%s: Shutting down ethercard, status was %4.4x.\n",
+                          dev->name, inw(ioaddr + SCBStatus));
+
+       /* Shut off the media monitoring timer. */
+       del_timer_sync(&sp->timer);
+
+       /* Shutting down the chip nicely fails to disable flow control. So.. */
+       outl(PortPartialReset, ioaddr + SCBPort);
+
+       free_irq(dev->irq, dev);
+
+       /* Print a few items for debugging. */
+       if (speedo_debug > 3)
+               speedo_show_state(dev);
+
+    /* Free all the skbuffs in the Rx and Tx queues. */
+       for (i = 0; i < RX_RING_SIZE; i++) {
+               struct sk_buff *skb = sp->rx_skbuff[i];
+               sp->rx_skbuff[i] = 0;
+               /* Clear the Rx descriptors. */
+               if (skb) {
+                       pci_unmap_single(sp->pdev,
+                                        sp->rx_ring_dma[i],
+                                        PKT_BUF_SZ + sizeof(struct RxFD), PCI_DMA_FROMDEVICE);
+                       dev_kfree_skb(skb);
+               }
+       }
+
+       for (i = 0; i < TX_RING_SIZE; i++) {
+               struct sk_buff *skb = sp->tx_skbuff[i];
+               sp->tx_skbuff[i] = 0;
+               /* Clear the Tx descriptors. */
+               if (skb) {
+                       pci_unmap_single(sp->pdev,
+                                        le32_to_cpu(sp->tx_ring[i].tx_buf_addr0),
+                                        skb->len, PCI_DMA_TODEVICE);
+                       dev_kfree_skb(skb);
+               }
+       }
+
+       /* Free multicast setting blocks. */
+       for (i = 0; sp->mc_setup_head != NULL; i++) {
+               struct speedo_mc_block *t;
+               t = sp->mc_setup_head->next;
+               kfree(sp->mc_setup_head);
+               sp->mc_setup_head = t;
+       }
+       sp->mc_setup_tail = NULL;
+       if (speedo_debug > 0)
+               printk(KERN_DEBUG "%s: %d multicast blocks dropped.\n", dev->name, i);
+
+       SET_POWER_STATE(sp->pdev, 2);
+
+       MOD_DEC_USE_COUNT;
+
+       return 0;
+}
+
+/* The Speedo-3 has an especially awkward and unusable method of getting
+   statistics out of the chip.  It takes an unpredictable length of time
+   for the dump-stats command to complete.  To avoid a busy-wait loop we
+   update the stats with the previous dump results, and then trigger a
+   new dump.
+
+   Oh, and incoming frames are dropped while executing dump-stats!
+   */
+static struct net_device_stats *
+speedo_get_stats(struct net_device *dev)
+{
+       struct speedo_private *sp = (struct speedo_private *)dev->priv;
+       long ioaddr = dev->base_addr;
+
+       /* Update only if the previous dump finished. */
+       if (sp->lstats->done_marker == le32_to_cpu(0xA007)) {
+               sp->stats.tx_aborted_errors += le32_to_cpu(sp->lstats->tx_coll16_errs);
+               sp->stats.tx_window_errors += le32_to_cpu(sp->lstats->tx_late_colls);
+               sp->stats.tx_fifo_errors += le32_to_cpu(sp->lstats->tx_underruns);
+               sp->stats.tx_fifo_errors += le32_to_cpu(sp->lstats->tx_lost_carrier);
+               /*sp->stats.tx_deferred += le32_to_cpu(sp->lstats->tx_deferred);*/
+               sp->stats.collisions += le32_to_cpu(sp->lstats->tx_total_colls);
+               sp->stats.rx_crc_errors += le32_to_cpu(sp->lstats->rx_crc_errs);
+               sp->stats.rx_frame_errors += le32_to_cpu(sp->lstats->rx_align_errs);
+               sp->stats.rx_over_errors += le32_to_cpu(sp->lstats->rx_resource_errs);
+               sp->stats.rx_fifo_errors += le32_to_cpu(sp->lstats->rx_overrun_errs);
+               sp->stats.rx_length_errors += le32_to_cpu(sp->lstats->rx_runt_errs);
+               sp->lstats->done_marker = 0x0000;
+               if (netif_running(dev)) {
+                       unsigned long flags;
+                       /* Take a spinlock to make wait_for_cmd_done and sending the
+                          command atomic.  --SAW */
+                       spin_lock_irqsave(&sp->lock, flags);
+                       wait_for_cmd_done(ioaddr + SCBCmd);
+                       outb(CUDumpStats, ioaddr + SCBCmd);
+                       spin_unlock_irqrestore(&sp->lock, flags);
+               }
+       }
+       return &sp->stats;
+}
+
+static int netdev_ethtool_ioctl(struct net_device *dev, void *useraddr)
+{
+       u32 ethcmd;
+       struct speedo_private *sp = dev->priv;
+               
+       if (copy_from_user(&ethcmd, useraddr, sizeof(ethcmd)))
+               return -EFAULT;
+       
+        switch (ethcmd) {
+       case ETHTOOL_GDRVINFO: {
+               struct ethtool_drvinfo info = {ETHTOOL_GDRVINFO};
+               strncpy(info.driver, "eepro100", sizeof(info.driver)-1);
+               strncpy(info.version, version, sizeof(info.version)-1);
+               if (sp && sp->pdev)
+                       strcpy(info.bus_info, sp->pdev->slot_name);
+               if (copy_to_user(useraddr, &info, sizeof(info)))
+                       return -EFAULT;
+               return 0;
+       }
+       
+        }
+       
+       return -EOPNOTSUPP;
+}
+
+
+
+
+
+static int speedo_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
+{
+       struct speedo_private *sp = (struct speedo_private *)dev->priv;
+       long ioaddr = dev->base_addr;
+       struct mii_ioctl_data *data = (struct mii_ioctl_data *)&rq->ifr_data;
+       int phy = sp->phy[0] & 0x1f;
+       int saved_acpi;
+       int t;
+
+    switch(cmd) {
+       case SIOCGMIIPHY:               /* Get address of MII PHY in use. */
+       case SIOCDEVPRIVATE:            /* for binary compat, remove in 2.5 */
+               data->phy_id = phy;
+
+       case SIOCGMIIREG:               /* Read MII PHY register. */
+       case SIOCDEVPRIVATE+1:          /* for binary compat, remove in 2.5 */
+               /* FIXME: these operations need to be serialized with MDIO
+                  access from the timeout handler.
+                  They are currently serialized only with MDIO access from the
+                  timer routine.  2000/05/09 SAW */
+               saved_acpi = SET_POWER_STATE(sp->pdev, 0);
+               t = del_timer_sync(&sp->timer);
+               data->val_out = mdio_read(ioaddr, data->phy_id & 0x1f, data->reg_num & 0x1f);
+               if (t)
+                       add_timer(&sp->timer); /* may be set to the past  --SAW */
+               SET_POWER_STATE(sp->pdev, saved_acpi);
+               return 0;
+
+       case SIOCSMIIREG:               /* Write MII PHY register. */
+       case SIOCDEVPRIVATE+2:          /* for binary compat, remove in 2.5 */
+               if (!capable(CAP_NET_ADMIN))
+                       return -EPERM;
+               saved_acpi = SET_POWER_STATE(sp->pdev, 0);
+               t = del_timer_sync(&sp->timer);
+               mdio_write(ioaddr, data->phy_id, data->reg_num, data->val_in);
+               if (t)
+                       add_timer(&sp->timer); /* may be set to the past  --SAW */
+               SET_POWER_STATE(sp->pdev, saved_acpi);
+               return 0;
+       case SIOCETHTOOL:
+               return netdev_ethtool_ioctl(dev, (void *) rq->ifr_data);
+       default:
+               return -EOPNOTSUPP;
+       }
+}
+
+/* Set or clear the multicast filter for this adaptor.
+   This is very ugly with Intel chips -- we usually have to execute an
+   entire configuration command, plus process a multicast command.
+   This is complicated.  We must put a large configuration command and
+   an arbitrarily-sized multicast command in the transmit list.
+   To minimize the disruption -- the previous command might have already
+   loaded the link -- we convert the current command block, normally a Tx
+   command, into a no-op and link it to the new command.
+*/
+static void set_rx_mode(struct net_device *dev)
+{
+       struct speedo_private *sp = (struct speedo_private *)dev->priv;
+       long ioaddr = dev->base_addr;
+       struct descriptor *last_cmd;
+       char new_rx_mode;
+       unsigned long flags;
+       int entry, i;
+
+       if (dev->flags & IFF_PROMISC) {                 /* Set promiscuous. */
+               new_rx_mode = 3;
+       } else if ((dev->flags & IFF_ALLMULTI)  ||
+                          dev->mc_count > multicast_filter_limit) {
+               new_rx_mode = 1;
+       } else
+               new_rx_mode = 0;
+
+       if (speedo_debug > 3)
+               printk(KERN_DEBUG "%s: set_rx_mode %d -> %d\n", dev->name,
+                               sp->rx_mode, new_rx_mode);
+
+       if ((int)(sp->cur_tx - sp->dirty_tx) > TX_RING_SIZE - TX_MULTICAST_SIZE) {
+           /* The Tx ring is full -- don't add anything!  Hope the mode will be
+                * set again later. */
+               sp->rx_mode = -1;
+               return;
+       }
+
+       if (new_rx_mode != sp->rx_mode) {
+               u8 *config_cmd_data;
+
+               spin_lock_irqsave(&sp->lock, flags);
+               entry = sp->cur_tx++ % TX_RING_SIZE;
+               last_cmd = sp->last_cmd;
+               sp->last_cmd = (struct descriptor *)&sp->tx_ring[entry];
+
+               sp->tx_skbuff[entry] = 0;                       /* Redundant. */
+               sp->tx_ring[entry].status = cpu_to_le32(CmdSuspend | CmdConfigure);
+               sp->tx_ring[entry].link =
+                       cpu_to_le32(TX_RING_ELEM_DMA(sp, (entry + 1) % TX_RING_SIZE));
+               config_cmd_data = (void *)&sp->tx_ring[entry].tx_desc_addr;
+               /* Construct a full CmdConfig frame. */
+               memcpy(config_cmd_data, i82558_config_cmd, CONFIG_DATA_SIZE);
+               config_cmd_data[1] = (txfifo << 4) | rxfifo;
+               config_cmd_data[4] = rxdmacount;
+               config_cmd_data[5] = txdmacount + 0x80;
+               config_cmd_data[15] |= (new_rx_mode & 2) ? 1 : 0;
+               /* 0x80 doesn't disable FC 0x84 does.
+                  Disable Flow control since we are not ACK-ing any FC interrupts
+                  for now. --Dragan */
+               config_cmd_data[19] = 0x84;
+               config_cmd_data[19] |= sp->full_duplex ? 0x40 : 0;
+               config_cmd_data[21] = (new_rx_mode & 1) ? 0x0D : 0x05;
+               if (sp->phy[0] & 0x8000) {                      /* Use the AUI port instead. */
+                       config_cmd_data[15] |= 0x80;
+                       config_cmd_data[8] = 0;
+               }
+               /* Trigger the command unit resume. */
+               wait_for_cmd_done(ioaddr + SCBCmd);
+               clear_suspend(last_cmd);
+               outb(CUResume, ioaddr + SCBCmd);
+               if ((int)(sp->cur_tx - sp->dirty_tx) >= TX_QUEUE_LIMIT) {
+                       netif_stop_queue(dev);
+                       sp->tx_full = 1;
+               }
+               spin_unlock_irqrestore(&sp->lock, flags);
+       }
+
+       if (new_rx_mode == 0  &&  dev->mc_count < 4) {
+               /* The simple case of 0-3 multicast list entries occurs often, and
+                  fits within one tx_ring[] entry. */
+               struct dev_mc_list *mclist;
+               u16 *setup_params, *eaddrs;
+
+               spin_lock_irqsave(&sp->lock, flags);
+               entry = sp->cur_tx++ % TX_RING_SIZE;
+               last_cmd = sp->last_cmd;
+               sp->last_cmd = (struct descriptor *)&sp->tx_ring[entry];
+
+               sp->tx_skbuff[entry] = 0;
+               sp->tx_ring[entry].status = cpu_to_le32(CmdSuspend | CmdMulticastList);
+               sp->tx_ring[entry].link =
+                       cpu_to_le32(TX_RING_ELEM_DMA(sp, (entry + 1) % TX_RING_SIZE));
+               sp->tx_ring[entry].tx_desc_addr = 0; /* Really MC list count. */
+               setup_params = (u16 *)&sp->tx_ring[entry].tx_desc_addr;
+               *setup_params++ = cpu_to_le16(dev->mc_count*6);
+               /* Fill in the multicast addresses. */
+               for (i = 0, mclist = dev->mc_list; i < dev->mc_count;
+                        i++, mclist = mclist->next) {
+                       eaddrs = (u16 *)mclist->dmi_addr;
+                       *setup_params++ = *eaddrs++;
+                       *setup_params++ = *eaddrs++;
+                       *setup_params++ = *eaddrs++;
+               }
+
+               wait_for_cmd_done(ioaddr + SCBCmd);
+               clear_suspend(last_cmd);
+               /* Immediately trigger the command unit resume. */
+               outb(CUResume, ioaddr + SCBCmd);
+
+               if ((int)(sp->cur_tx - sp->dirty_tx) >= TX_QUEUE_LIMIT) {
+                       netif_stop_queue(dev);
+                       sp->tx_full = 1;
+               }
+               spin_unlock_irqrestore(&sp->lock, flags);
+       } else if (new_rx_mode == 0) {
+               struct dev_mc_list *mclist;
+               u16 *setup_params, *eaddrs;
+               struct speedo_mc_block *mc_blk;
+               struct descriptor *mc_setup_frm;
+               int i;
+
+               mc_blk = kmalloc(sizeof(*mc_blk) + 2 + multicast_filter_limit*6,
+                                                GFP_ATOMIC);
+               if (mc_blk == NULL) {
+                       printk(KERN_ERR "%s: Failed to allocate a setup frame.\n",
+                                  dev->name);
+                       sp->rx_mode = -1; /* We failed, try again. */
+                       return;
+               }
+               mc_blk->next = NULL;
+               mc_blk->len = 2 + multicast_filter_limit*6;
+               mc_blk->frame_dma =
+                       pci_map_single(sp->pdev, &mc_blk->frame, mc_blk->len,
+                                       PCI_DMA_TODEVICE);
+               mc_setup_frm = &mc_blk->frame;
+
+               /* Fill the setup frame. */
+               if (speedo_debug > 1)
+                       printk(KERN_DEBUG "%s: Constructing a setup frame at %p.\n",
+                                  dev->name, mc_setup_frm);
+               mc_setup_frm->cmd_status =
+                       cpu_to_le32(CmdSuspend | CmdIntr | CmdMulticastList);
+               /* Link set below. */
+               setup_params = (u16 *)&mc_setup_frm->params;
+               *setup_params++ = cpu_to_le16(dev->mc_count*6);
+               /* Fill in the multicast addresses. */
+               for (i = 0, mclist = dev->mc_list; i < dev->mc_count;
+                        i++, mclist = mclist->next) {
+                       eaddrs = (u16 *)mclist->dmi_addr;
+                       *setup_params++ = *eaddrs++;
+                       *setup_params++ = *eaddrs++;
+                       *setup_params++ = *eaddrs++;
+               }
+
+               /* Disable interrupts while playing with the Tx Cmd list. */
+               spin_lock_irqsave(&sp->lock, flags);
+
+               if (sp->mc_setup_tail)
+                       sp->mc_setup_tail->next = mc_blk;
+               else
+                       sp->mc_setup_head = mc_blk;
+               sp->mc_setup_tail = mc_blk;
+               mc_blk->tx = sp->cur_tx;
+
+               entry = sp->cur_tx++ % TX_RING_SIZE;
+               last_cmd = sp->last_cmd;
+               sp->last_cmd = mc_setup_frm;
+
+               /* Change the command to a NoOp, pointing to the CmdMulti command. */
+               sp->tx_skbuff[entry] = 0;
+               sp->tx_ring[entry].status = cpu_to_le32(CmdNOp);
+               sp->tx_ring[entry].link = cpu_to_le32(mc_blk->frame_dma);
+
+               /* Set the link in the setup frame. */
+               mc_setup_frm->link =
+                       cpu_to_le32(TX_RING_ELEM_DMA(sp, (entry + 1) % TX_RING_SIZE));
+
+               pci_dma_sync_single(sp->pdev, mc_blk->frame_dma,
+                               mc_blk->len, PCI_DMA_TODEVICE);
+
+               wait_for_cmd_done(ioaddr + SCBCmd);
+               clear_suspend(last_cmd);
+               /* Immediately trigger the command unit resume. */
+               outb(CUResume, ioaddr + SCBCmd);
+
+               if ((int)(sp->cur_tx - sp->dirty_tx) >= TX_QUEUE_LIMIT) {
+                       netif_stop_queue(dev);
+                       sp->tx_full = 1;
+               }
+               spin_unlock_irqrestore(&sp->lock, flags);
+
+               if (speedo_debug > 5)
+                       printk(" CmdMCSetup frame length %d in entry %d.\n",
+                                  dev->mc_count, entry);
+       }
+
+       sp->rx_mode = new_rx_mode;
+}
+\f
+#ifdef CONFIG_PM
+static int eepro100_suspend(struct pci_dev *pdev, u32 state)
+{
+       struct net_device *dev = pci_get_drvdata (pdev);
+       struct speedo_private *sp = (struct speedo_private *)dev->priv;
+       long ioaddr = dev->base_addr;
+
+       pci_save_state(pdev, sp->pm_state);
+
+       if (!netif_running(dev))
+               return 0;
+
+       netif_device_detach(dev);
+       outl(PortPartialReset, ioaddr + SCBPort);
+       
+       /* XXX call SET_POWER_STATE ()? */
+       return 0;
+}
+
+static int eepro100_resume(struct pci_dev *pdev)
+{
+       struct net_device *dev = pci_get_drvdata (pdev);
+       struct speedo_private *sp = (struct speedo_private *)dev->priv;
+       long ioaddr = dev->base_addr;
+
+       pci_restore_state(pdev, sp->pm_state);
+
+       if (!netif_running(dev))
+               return 0;
+
+       /* I'm absolutely uncertain if this part of code may work.
+          The problems are:
+           - correct hardware reinitialization;
+               - correct driver behavior between different steps of the
+                 reinitialization;
+               - serialization with other driver calls.
+          2000/03/08  SAW */
+       outw(SCBMaskAll, ioaddr + SCBCmd);
+       speedo_resume(dev);
+       netif_device_attach(dev);
+       sp->rx_mode = -1;
+       sp->flow_ctrl = sp->partner = 0;
+       set_rx_mode(dev);
+       return 0;
+}
+#endif /* CONFIG_PM */
+
+static void __devexit eepro100_remove_one (struct pci_dev *pdev)
+{
+       struct net_device *dev = pci_get_drvdata (pdev);
+       struct speedo_private *sp = (struct speedo_private *)dev->priv;
+       
+       unregister_netdev(dev);
+
+       release_region(pci_resource_start(pdev, 1), pci_resource_len(pdev, 1));
+       release_mem_region(pci_resource_start(pdev, 0), pci_resource_len(pdev, 0));
+
+#ifndef USE_IO
+       iounmap((char *)dev->base_addr);
+#endif
+
+       pci_free_consistent(pdev, TX_RING_SIZE * sizeof(struct TxFD)
+                                                               + sizeof(struct speedo_stats),
+                                               sp->tx_ring, sp->tx_ring_dma);
+       pci_disable_device(pdev);
+       kfree(dev);
+}
+\f
+static struct pci_device_id eepro100_pci_tbl[] __devinitdata = {
+       { PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82557,
+               PCI_ANY_ID, PCI_ANY_ID, },
+       { PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82562ET,
+               PCI_ANY_ID, PCI_ANY_ID, },
+       { PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82559ER,
+               PCI_ANY_ID, PCI_ANY_ID, },
+       { PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ID1029,
+               PCI_ANY_ID, PCI_ANY_ID, },
+       { PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ID1030,
+               PCI_ANY_ID, PCI_ANY_ID, },
+       { PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801BA_7,
+               PCI_ANY_ID, PCI_ANY_ID, },
+       { 0,}
+};
+MODULE_DEVICE_TABLE(pci, eepro100_pci_tbl);
+       
+static struct pci_driver eepro100_driver = {
+       name:           "eepro100",
+       id_table:       eepro100_pci_tbl,
+       probe:          eepro100_init_one,
+       remove:         __devexit_p(eepro100_remove_one),
+#ifdef CONFIG_PM
+       suspend:        eepro100_suspend,
+       resume:         eepro100_resume,
+#endif /* CONFIG_PM */
+};
+
+static int __init eepro100_init_module(void)
+{
+       if (debug >= 0 && speedo_debug != debug)
+               printk(KERN_INFO "eepro100.c: Debug level is %d.\n", debug);
+       if (debug >= 0)
+               speedo_debug = debug;
+
+       return pci_module_init(&eepro100_driver);
+}
+
+static void __exit eepro100_cleanup_module(void)
+{
+       pci_unregister_driver(&eepro100_driver);
+}
+
+module_init(eepro100_init_module);
+module_exit(eepro100_cleanup_module);
diff --git a/xen-2.4.16/drivers/net/net_init.c b/xen-2.4.16/drivers/net/net_init.c
new file mode 100644 (file)
index 0000000..a0c1f01
--- /dev/null
@@ -0,0 +1,738 @@
+/* net_init.c: Initialization for network devices. */
+/*
+       Written 1993,1994,1995 by Donald Becker.
+
+       The author may be reached as becker@scyld.com, or C/O
+       Scyld Computing Corporation
+       410 Severn Ave., Suite 210
+       Annapolis MD 21403
+
+       This file contains the initialization for the "pl14+" style ethernet
+       drivers.  It should eventually replace most of drivers/net/Space.c.
+       It's primary advantage is that it's able to allocate low-memory buffers.
+       A secondary advantage is that the dangerous NE*000 netcards can reserve
+       their I/O port region before the SCSI probes start.
+
+       Modifications/additions by Bjorn Ekwall <bj0rn@blox.se>:
+               ethdev_index[MAX_ETH_CARDS]
+               register_netdev() / unregister_netdev()
+               
+       Modifications by Wolfgang Walter
+               Use dev_close cleanly so we always shut things down tidily.
+               
+       Changed 29/10/95, Alan Cox to pass sockaddr's around for mac addresses.
+       
+       14/06/96 - Paul Gortmaker:      Add generic eth_change_mtu() function. 
+       24/09/96 - Paul Norton: Add token-ring variants of the netdev functions. 
+       
+       08/11/99 - Alan Cox: Got fed up of the mess in this file and cleaned it
+                       up. We now share common code and have regularised name
+                       allocation setups. Abolished the 16 card limits.
+       03/19/2000 - jgarzik and Urban Widmark: init_etherdev 32-byte align
+       03/21/2001 - jgarzik: alloc_etherdev and friends
+
+*/
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/errno.h>
+//#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/types.h>
+//#include <linux/fs.h>
+#include <linux/slab.h>
+#include <linux/if_ether.h>
+#include <linux/lib.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+//#include <linux/fddidevice.h>
+//#include <linux/hippidevice.h>
+//#include <linux/trdevice.h>
+//#include <linux/fcdevice.h>
+//#include <linux/if_arp.h>
+//#include <linux/if_ltalk.h>
+//#include <linux/rtnetlink.h>
+//#include <net/neighbour.h>
+
+#define rtnl_lock() ((void)0)
+#define rtnl_unlock() ((void)0)
+
+/* The network devices currently exist only in the socket namespace, so these
+   entries are unused.  The only ones that make sense are
+    open       start the ethercard
+    close      stop  the ethercard
+    ioctl      To get statistics, perhaps set the interface port (AUI, BNC, etc.)
+   One can also imagine getting raw packets using
+    read & write
+   but this is probably better handled by a raw packet socket.
+
+   Given that almost all of these functions are handled in the current
+   socket-based scheme, putting ethercard devices in /dev/ seems pointless.
+   
+   [Removed all support for /dev network devices. When someone adds
+    streams then by magic we get them, but otherwise they are un-needed
+       and a space waste]
+*/
+
+
+static struct net_device *alloc_netdev(int sizeof_priv, const char *mask,
+                                      void (*setup)(struct net_device *))
+{
+       struct net_device *dev;
+       int alloc_size;
+
+       /* ensure 32-byte alignment of the private area */
+       alloc_size = sizeof (*dev) + sizeof_priv + 31;
+
+       dev = (struct net_device *) kmalloc (alloc_size, GFP_KERNEL);
+       if (dev == NULL)
+       {
+               printk(KERN_ERR "alloc_dev: Unable to allocate device memory.\n");
+               return NULL;
+       }
+
+       memset(dev, 0, alloc_size);
+
+       if (sizeof_priv)
+               dev->priv = (void *) (((long)(dev + 1) + 31) & ~31);
+
+       setup(dev);
+       strcpy(dev->name, mask);
+
+       return dev;
+}
+
+static struct net_device *init_alloc_dev(int sizeof_priv)
+{
+       struct net_device *dev;
+       int alloc_size;
+
+       /* ensure 32-byte alignment of the private area */
+       alloc_size = sizeof (*dev) + sizeof_priv + 31;
+
+       dev = (struct net_device *) kmalloc (alloc_size, GFP_KERNEL);
+       if (dev == NULL)
+       {
+               printk(KERN_ERR "alloc_dev: Unable to allocate device memory.\n");
+               return NULL;
+       }
+
+       memset(dev, 0, alloc_size);
+
+       if (sizeof_priv)
+               dev->priv = (void *) (((long)(dev + 1) + 31) & ~31);
+
+       return dev;
+}
+
+/* 
+ *     Create and name a device from a prototype, then perform any needed
+ *     setup.
+ */
+
+static struct net_device *init_netdev(struct net_device *dev, int sizeof_priv,
+                                     char *mask, void (*setup)(struct net_device *))
+{
+       int new_device = 0;
+
+       /*
+        *      Allocate a device if one is not provided.
+        */
+        
+       if (dev == NULL) {
+               dev=init_alloc_dev(sizeof_priv);
+               if(dev==NULL)
+                       return NULL;
+               new_device = 1;
+       }
+
+       /*
+        *      Allocate a name
+        */
+        
+       if (dev->name[0] == '\0' || dev->name[0] == ' ') {
+               strcpy(dev->name, mask);
+               if (dev_alloc_name(dev, mask)<0) {
+                       if (new_device)
+                               kfree(dev);
+                       return NULL;
+               }
+       }
+
+       //netdev_boot_setup_check(dev);
+       
+       /*
+        *      Configure via the caller provided setup function then
+        *      register if needed.
+        */
+       
+       setup(dev);
+       
+       if (new_device) {
+               int err;
+
+               rtnl_lock();
+               err = register_netdevice(dev);
+               rtnl_unlock();
+
+               if (err < 0) {
+                       kfree(dev);
+                       dev = NULL;
+               }
+       }
+       return dev;
+}
+
+#if defined(CONFIG_HIPPI) || defined(CONFIG_TR) || defined(CONFIG_NET_FC)
+static int __register_netdev(struct net_device *dev)
+{
+       if (dev->init && dev->init(dev) != 0) {
+               unregister_netdev(dev);
+               return -EIO;
+       }
+       return 0;
+}
+#endif
+
+/**
+ * init_etherdev - Register ethernet device
+ * @dev: An ethernet device structure to be filled in, or %NULL if a new
+ *     struct should be allocated.
+ * @sizeof_priv: Size of additional driver-private structure to be allocated
+ *     for this ethernet device
+ *
+ * Fill in the fields of the device structure with ethernet-generic values.
+ *
+ * If no device structure is passed, a new one is constructed, complete with
+ * a private data area of size @sizeof_priv.  A 32-byte (not bit)
+ * alignment is enforced for this private data area.
+ *
+ * If an empty string area is passed as dev->name, or a new structure is made,
+ * a new name string is constructed.
+ */
+
+struct net_device *init_etherdev(struct net_device *dev, int sizeof_priv)
+{
+       return init_netdev(dev, sizeof_priv, "eth%d", ether_setup);
+}
+
+/**
+ * alloc_etherdev - Allocates and sets up an ethernet device
+ * @sizeof_priv: Size of additional driver-private structure to be allocated
+ *     for this ethernet device
+ *
+ * Fill in the fields of the device structure with ethernet-generic
+ * values. Basically does everything except registering the device.
+ *
+ * Constructs a new net device, complete with a private data area of
+ * size @sizeof_priv.  A 32-byte (not bit) alignment is enforced for
+ * this private data area.
+ */
+
+struct net_device *alloc_etherdev(int sizeof_priv)
+{
+       return alloc_netdev(sizeof_priv, "eth%d", ether_setup);
+}
+
+EXPORT_SYMBOL(init_etherdev);
+EXPORT_SYMBOL(alloc_etherdev);
+
+static int eth_mac_addr(struct net_device *dev, void *p)
+{
+       struct sockaddr *addr=p;
+       if (netif_running(dev))
+               return -EBUSY;
+       memcpy(dev->dev_addr, addr->sa_data,dev->addr_len);
+       return 0;
+}
+
+static int eth_change_mtu(struct net_device *dev, int new_mtu)
+{
+       if ((new_mtu < 68) || (new_mtu > 1500))
+               return -EINVAL;
+       dev->mtu = new_mtu;
+       return 0;
+}
+
+#ifdef CONFIG_FDDI
+
+/**
+ * init_fddidev - Register FDDI device
+ * @dev: A FDDI device structure to be filled in, or %NULL if a new
+ *     struct should be allocated.
+ * @sizeof_priv: Size of additional driver-private structure to be allocated
+ *     for this ethernet device
+ *
+ * Fill in the fields of the device structure with FDDI-generic values.
+ *
+ * If no device structure is passed, a new one is constructed, complete with
+ * a private data area of size @sizeof_priv.  A 32-byte (not bit)
+ * alignment is enforced for this private data area.
+ *
+ * If an empty string area is passed as dev->name, or a new structure is made,
+ * a new name string is constructed.
+ */
+
+struct net_device *init_fddidev(struct net_device *dev, int sizeof_priv)
+{
+       return init_netdev(dev, sizeof_priv, "fddi%d", fddi_setup);
+}
+
+/**
+ * alloc_fddidev - Register FDDI device
+ * @sizeof_priv: Size of additional driver-private structure to be allocated
+ *     for this FDDI device
+ *
+ * Fill in the fields of the device structure with FDDI-generic values.
+ *
+ * Constructs a new net device, complete with a private data area of
+ * size @sizeof_priv.  A 32-byte (not bit) alignment is enforced for
+ * this private data area.
+ */
+
+struct net_device *alloc_fddidev(int sizeof_priv)
+{
+       return alloc_netdev(sizeof_priv, "fddi%d", fddi_setup);
+}
+
+EXPORT_SYMBOL(init_fddidev);
+EXPORT_SYMBOL(alloc_fddidev);
+
+static int fddi_change_mtu(struct net_device *dev, int new_mtu)
+{
+       if ((new_mtu < FDDI_K_SNAP_HLEN) || (new_mtu > FDDI_K_SNAP_DLEN))
+               return(-EINVAL);
+       dev->mtu = new_mtu;
+       return(0);
+}
+
+#endif /* CONFIG_FDDI */
+
+#ifdef CONFIG_HIPPI
+
+static int hippi_change_mtu(struct net_device *dev, int new_mtu)
+{
+       /*
+        * HIPPI's got these nice large MTUs.
+        */
+       if ((new_mtu < 68) || (new_mtu > 65280))
+               return -EINVAL;
+       dev->mtu = new_mtu;
+       return(0);
+}
+
+
+/*
+ * For HIPPI we will actually use the lower 4 bytes of the hardware
+ * address as the I-FIELD rather than the actual hardware address.
+ */
+static int hippi_mac_addr(struct net_device *dev, void *p)
+{
+       struct sockaddr *addr = p;
+       if (netif_running(dev))
+               return -EBUSY;
+       memcpy(dev->dev_addr, addr->sa_data, dev->addr_len);
+       return 0;
+}
+
+
+/**
+ * init_hippi_dev - Register HIPPI device
+ * @dev: A HIPPI device structure to be filled in, or %NULL if a new
+ *     struct should be allocated.
+ * @sizeof_priv: Size of additional driver-private structure to be allocated
+ *     for this ethernet device
+ *
+ * Fill in the fields of the device structure with HIPPI-generic values.
+ *
+ * If no device structure is passed, a new one is constructed, complete with
+ * a private data area of size @sizeof_priv.  A 32-byte (not bit)
+ * alignment is enforced for this private data area.
+ *
+ * If an empty string area is passed as dev->name, or a new structure is made,
+ * a new name string is constructed.
+ */
+
+struct net_device *init_hippi_dev(struct net_device *dev, int sizeof_priv)
+{
+       return init_netdev(dev, sizeof_priv, "hip%d", hippi_setup);
+}
+
+/**
+ * alloc_hippi_dev - Register HIPPI device
+ * @sizeof_priv: Size of additional driver-private structure to be allocated
+ *     for this HIPPI device
+ *
+ * Fill in the fields of the device structure with HIPPI-generic values.
+ *
+ * Constructs a new net device, complete with a private data area of
+ * size @sizeof_priv.  A 32-byte (not bit) alignment is enforced for
+ * this private data area.
+ */
+
+struct net_device *alloc_hippi_dev(int sizeof_priv)
+{
+       return alloc_netdev(sizeof_priv, "hip%d", hippi_setup);
+}
+
+int register_hipdev(struct net_device *dev)
+{
+       return __register_netdev(dev);
+}
+
+void unregister_hipdev(struct net_device *dev)
+{
+       unregister_netdev(dev);
+}
+
+EXPORT_SYMBOL(init_hippi_dev);
+EXPORT_SYMBOL(alloc_hippi_dev);
+EXPORT_SYMBOL(register_hipdev);
+EXPORT_SYMBOL(unregister_hipdev);
+
+static int hippi_neigh_setup_dev(struct net_device *dev, struct neigh_parms *p)
+{
+       /* Never send broadcast/multicast ARP messages */
+       p->mcast_probes = 0;
+       /* In IPv6 unicast probes are valid even on NBMA,
+       * because they are encapsulated in normal IPv6 protocol.
+       * Should be a generic flag. 
+       */
+       if (p->tbl->family != AF_INET6)
+               p->ucast_probes = 0;
+       return 0;
+}
+
+#endif /* CONFIG_HIPPI */
+
+void ether_setup(struct net_device *dev)
+{
+       /* Fill in the fields of the device structure with ethernet-generic values.
+          This should be in a common file instead of per-driver.  */
+       
+       dev->change_mtu         = eth_change_mtu;
+       dev->hard_header        = eth_header;
+       dev->rebuild_header     = eth_rebuild_header;
+       dev->set_mac_address    = eth_mac_addr;
+       dev->hard_header_cache  = eth_header_cache;
+       dev->header_cache_update= eth_header_cache_update;
+       dev->hard_header_parse  = eth_header_parse;
+
+       dev->type               = 0; //ARPHRD_ETHER;
+       dev->hard_header_len    = ETH_HLEN;
+       dev->mtu                = 1500; /* eth_mtu */
+       dev->addr_len           = ETH_ALEN;
+       dev->tx_queue_len       = 100;  /* Ethernet wants good queues */        
+       
+       memset(dev->broadcast,0xFF, ETH_ALEN);
+
+       /* New-style flags. */
+       dev->flags              = IFF_BROADCAST|IFF_MULTICAST;
+}
+EXPORT_SYMBOL(ether_setup);
+
+#ifdef CONFIG_FDDI
+
+void fddi_setup(struct net_device *dev)
+{
+       /*
+        * Fill in the fields of the device structure with FDDI-generic values.
+        * This should be in a common file instead of per-driver.
+        */
+       
+       dev->change_mtu                 = fddi_change_mtu;
+       dev->hard_header                = fddi_header;
+       dev->rebuild_header             = fddi_rebuild_header;
+
+       dev->type                               = ARPHRD_FDDI;
+       dev->hard_header_len    = FDDI_K_SNAP_HLEN+3;   /* Assume 802.2 SNAP hdr len + 3 pad bytes */
+       dev->mtu                                = FDDI_K_SNAP_DLEN;             /* Assume max payload of 802.2 SNAP frame */
+       dev->addr_len                   = FDDI_K_ALEN;
+       dev->tx_queue_len               = 100;  /* Long queues on FDDI */
+       
+       memset(dev->broadcast, 0xFF, FDDI_K_ALEN);
+
+       /* New-style flags */
+       dev->flags              = IFF_BROADCAST | IFF_MULTICAST;
+}
+EXPORT_SYMBOL(fddi_setup);
+
+#endif /* CONFIG_FDDI */
+
+#ifdef CONFIG_HIPPI
+void hippi_setup(struct net_device *dev)
+{
+       dev->set_multicast_list = NULL;
+       dev->change_mtu                 = hippi_change_mtu;
+       dev->hard_header                = hippi_header;
+       dev->rebuild_header             = hippi_rebuild_header;
+       dev->set_mac_address            = hippi_mac_addr;
+       dev->hard_header_parse          = NULL;
+       dev->hard_header_cache          = NULL;
+       dev->header_cache_update        = NULL;
+       dev->neigh_setup                = hippi_neigh_setup_dev; 
+
+       /*
+        * We don't support HIPPI `ARP' for the time being, and probably
+        * never will unless someone else implements it. However we
+        * still need a fake ARPHRD to make ifconfig and friends play ball.
+        */
+       dev->type               = ARPHRD_HIPPI;
+       dev->hard_header_len    = HIPPI_HLEN;
+       dev->mtu                = 65280;
+       dev->addr_len           = HIPPI_ALEN;
+       dev->tx_queue_len       = 25 /* 5 */;
+       memset(dev->broadcast, 0xFF, HIPPI_ALEN);
+
+
+       /*
+        * HIPPI doesn't support broadcast+multicast and we only use
+        * static ARP tables. ARP is disabled by hippi_neigh_setup_dev. 
+        */
+       dev->flags = 0; 
+}
+EXPORT_SYMBOL(hippi_setup);
+#endif /* CONFIG_HIPPI */
+
+#if defined(CONFIG_ATALK) || defined(CONFIG_ATALK_MODULE)
+
+static int ltalk_change_mtu(struct net_device *dev, int mtu)
+{
+       return -EINVAL;
+}
+
+static int ltalk_mac_addr(struct net_device *dev, void *addr)
+{      
+       return -EINVAL;
+}
+
+
+void ltalk_setup(struct net_device *dev)
+{
+       /* Fill in the fields of the device structure with localtalk-generic values. */
+       
+       dev->change_mtu         = ltalk_change_mtu;
+       dev->hard_header        = NULL;
+       dev->rebuild_header     = NULL;
+       dev->set_mac_address    = ltalk_mac_addr;
+       dev->hard_header_cache  = NULL;
+       dev->header_cache_update= NULL;
+
+       dev->type               = ARPHRD_LOCALTLK;
+       dev->hard_header_len    = LTALK_HLEN;
+       dev->mtu                = LTALK_MTU;
+       dev->addr_len           = LTALK_ALEN;
+       dev->tx_queue_len       = 10;   
+       
+       dev->broadcast[0]       = 0xFF;
+
+       dev->flags              = IFF_BROADCAST|IFF_MULTICAST|IFF_NOARP;
+}
+EXPORT_SYMBOL(ltalk_setup);
+
+#endif /* CONFIG_ATALK || CONFIG_ATALK_MODULE */
+
+int register_netdev(struct net_device *dev)
+{
+       int err;
+
+       rtnl_lock();
+
+       /*
+        *      If the name is a format string the caller wants us to
+        *      do a name allocation
+        */
+        
+       if (strchr(dev->name, '%'))
+       {
+               err = dev_alloc_name(dev, dev->name);
+               if (err < 0)
+                       goto out;
+       }
+       
+       /*
+        *      Back compatibility hook. Kill this one in 2.5
+        */
+       
+       if (dev->name[0]==0 || dev->name[0]==' ')
+       {
+               err = dev_alloc_name(dev, "eth%d");
+               if (err < 0)
+                       goto out;
+       }
+
+       err = register_netdevice(dev);
+
+out:
+       rtnl_unlock();
+       return err;
+}
+
+void unregister_netdev(struct net_device *dev)
+{
+       rtnl_lock();
+       unregister_netdevice(dev);
+       rtnl_unlock();
+}
+
+EXPORT_SYMBOL(register_netdev);
+EXPORT_SYMBOL(unregister_netdev);
+
+#ifdef CONFIG_TR
+
+void tr_setup(struct net_device *dev)
+{
+       /*
+        *      Configure and register
+        */
+       
+       dev->hard_header        = tr_header;
+       dev->rebuild_header     = tr_rebuild_header;
+
+       dev->type               = ARPHRD_IEEE802_TR;
+       dev->hard_header_len    = TR_HLEN;
+       dev->mtu                = 2000;
+       dev->addr_len           = TR_ALEN;
+       dev->tx_queue_len       = 100;  /* Long queues on tr */
+       
+       memset(dev->broadcast,0xFF, TR_ALEN);
+
+       /* New-style flags. */
+       dev->flags              = IFF_BROADCAST | IFF_MULTICAST ;
+}
+
+/**
+ * init_trdev - Register token ring device
+ * @dev: A token ring device structure to be filled in, or %NULL if a new
+ *     struct should be allocated.
+ * @sizeof_priv: Size of additional driver-private structure to be allocated
+ *     for this ethernet device
+ *
+ * Fill in the fields of the device structure with token ring-generic values.
+ *
+ * If no device structure is passed, a new one is constructed, complete with
+ * a private data area of size @sizeof_priv.  A 32-byte (not bit)
+ * alignment is enforced for this private data area.
+ *
+ * If an empty string area is passed as dev->name, or a new structure is made,
+ * a new name string is constructed.
+ */
+
+struct net_device *init_trdev(struct net_device *dev, int sizeof_priv)
+{
+       return init_netdev(dev, sizeof_priv, "tr%d", tr_setup);
+}
+
+/**
+ * alloc_trdev - Register token ring device
+ * @sizeof_priv: Size of additional driver-private structure to be allocated
+ *     for this token ring device
+ *
+ * Fill in the fields of the device structure with token ring-generic values.
+ *
+ * Constructs a new net device, complete with a private data area of
+ * size @sizeof_priv.  A 32-byte (not bit) alignment is enforced for
+ * this private data area.
+ */
+
+struct net_device *alloc_trdev(int sizeof_priv)
+{
+       return alloc_netdev(sizeof_priv, "tr%d", tr_setup);
+}
+
+int register_trdev(struct net_device *dev)
+{
+       return __register_netdev(dev);
+}
+
+void unregister_trdev(struct net_device *dev)
+{
+       unregister_netdev(dev);
+}
+
+EXPORT_SYMBOL(tr_setup);
+EXPORT_SYMBOL(init_trdev);
+EXPORT_SYMBOL(alloc_trdev);
+EXPORT_SYMBOL(register_trdev);
+EXPORT_SYMBOL(unregister_trdev);
+
+#endif /* CONFIG_TR */
+
+
+#ifdef CONFIG_NET_FC
+
+void fc_setup(struct net_device *dev)
+{
+       dev->hard_header        =        fc_header;
+        dev->rebuild_header    =        fc_rebuild_header;
+                
+        dev->type               =        ARPHRD_IEEE802;
+       dev->hard_header_len    =        FC_HLEN;
+        dev->mtu                =        2024;
+        dev->addr_len           =        FC_ALEN;
+        dev->tx_queue_len       =        100; /* Long queues on fc */
+
+        memset(dev->broadcast,0xFF, FC_ALEN);
+
+        /* New-style flags. */
+        dev->flags              =        IFF_BROADCAST;
+}
+
+/**
+ * init_fcdev - Register fibre channel device
+ * @dev: A fibre channel device structure to be filled in, or %NULL if a new
+ *     struct should be allocated.
+ * @sizeof_priv: Size of additional driver-private structure to be allocated
+ *     for this ethernet device
+ *
+ * Fill in the fields of the device structure with fibre channel-generic values.
+ *
+ * If no device structure is passed, a new one is constructed, complete with
+ * a private data area of size @sizeof_priv.  A 32-byte (not bit)
+ * alignment is enforced for this private data area.
+ *
+ * If an empty string area is passed as dev->name, or a new structure is made,
+ * a new name string is constructed.
+ */
+
+struct net_device *init_fcdev(struct net_device *dev, int sizeof_priv)
+{
+       return init_netdev(dev, sizeof_priv, "fc%d", fc_setup);
+}
+
+/**
+ * alloc_fcdev - Register fibre channel device
+ * @sizeof_priv: Size of additional driver-private structure to be allocated
+ *     for this fibre channel device
+ *
+ * Fill in the fields of the device structure with fibre channel-generic values.
+ *
+ * Constructs a new net device, complete with a private data area of
+ * size @sizeof_priv.  A 32-byte (not bit) alignment is enforced for
+ * this private data area.
+ */
+
+struct net_device *alloc_fcdev(int sizeof_priv)
+{
+       return alloc_netdev(sizeof_priv, "fc%d", fc_setup);
+}
+
+int register_fcdev(struct net_device *dev)
+{
+       return __register_netdev(dev);
+}                                               
+        
+void unregister_fcdev(struct net_device *dev)
+{
+       unregister_netdev(dev);
+}
+
+EXPORT_SYMBOL(fc_setup);
+EXPORT_SYMBOL(init_fcdev);
+EXPORT_SYMBOL(alloc_fcdev);
+EXPORT_SYMBOL(register_fcdev);
+EXPORT_SYMBOL(unregister_fcdev);
+
+#endif /* CONFIG_NET_FC */
+
diff --git a/xen-2.4.16/drivers/net/pcnet32.c b/xen-2.4.16/drivers/net/pcnet32.c
new file mode 100644 (file)
index 0000000..08ace71
--- /dev/null
@@ -0,0 +1,1614 @@
+/* pcnet32.c: An AMD PCnet32 ethernet driver for linux. */
+/*
+ *     Copyright 1996-1999 Thomas Bogendoerfer
+ * 
+ *     Derived from the lance driver written 1993,1994,1995 by Donald Becker.
+ * 
+ *     Copyright 1993 United States Government as represented by the
+ *     Director, National Security Agency.
+ * 
+ *     This software may be used and distributed according to the terms
+ *     of the GNU General Public License, incorporated herein by reference.
+ *
+ *     This driver is for PCnet32 and PCnetPCI based ethercards
+ */
+/**************************************************************************
+ *  23 Oct, 2000.
+ *  Fixed a few bugs, related to running the controller in 32bit mode.
+ *
+ *  Carsten Langgaard, carstenl@mips.com
+ *  Copyright (C) 2000 MIPS Technologies, Inc.  All rights reserved.
+ *
+ *************************************************************************/
+
+static const char *version = "pcnet32.c:v1.25kf 26.9.1999 tsbogend@alpha.franken.de\n";
+
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/lib.h>
+
+#include <linux/module.h>
+
+//#include <linux/kernel.h>
+#include <linux/sched.h>
+//#include <linux/string.h>
+//#include <linux/ptrace.h>
+#include <linux/errno.h>
+#include <linux/ioport.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <linux/pci.h>
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <asm/bitops.h>
+#include <asm/io.h>
+//#include <asm/dma.h>
+
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/skbuff.h>
+#include <linux/spinlock.h>
+
+static unsigned int pcnet32_portlist[] __initdata = {0x300, 0x320, 0x340, 0x360, 0};
+
+/*
+ * PCI device identifiers for "new style" Linux PCI Device Drivers
+ */
+static struct pci_device_id pcnet32_pci_tbl[] __devinitdata = {
+    { PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_LANCE_HOME, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 },
+    { PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_LANCE, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 },
+    { 0, }
+};
+
+static int pcnet32_debug = 1;
+static int tx_start = 1; /* Mapping -- 0:20, 1:64, 2:128, 3:~220 (depends on chip vers) */
+
+static struct net_device *pcnet32_dev;
+
+static const int max_interrupt_work = 80;
+static const int rx_copybreak = 200;
+
+#define PORT_AUI      0x00
+#define PORT_10BT     0x01
+#define PORT_GPSI     0x02
+#define PORT_MII      0x03
+
+#define PORT_PORTSEL  0x03
+#define PORT_ASEL     0x04
+#define PORT_100      0x40
+#define PORT_FD              0x80
+
+#define PCNET32_DMA_MASK 0xffffffff
+
+/*
+ * table to translate option values from tulip
+ * to internal options
+ */
+static unsigned char options_mapping[] = {
+    PORT_ASEL,                    /*  0 Auto-select      */
+    PORT_AUI,                     /*  1 BNC/AUI          */
+    PORT_AUI,                     /*  2 AUI/BNC          */ 
+    PORT_ASEL,                    /*  3 not supported    */
+    PORT_10BT | PORT_FD,          /*  4 10baseT-FD       */
+    PORT_ASEL,                    /*  5 not supported    */
+    PORT_ASEL,                    /*  6 not supported    */
+    PORT_ASEL,                    /*  7 not supported    */
+    PORT_ASEL,                    /*  8 not supported    */
+    PORT_MII,                     /*  9 MII 10baseT      */
+    PORT_MII | PORT_FD,                   /* 10 MII 10baseT-FD   */
+    PORT_MII,                     /* 11 MII (autosel)    */
+    PORT_10BT,                    /* 12 10BaseT          */
+    PORT_MII | PORT_100,          /* 13 MII 100BaseTx    */
+    PORT_MII | PORT_100 | PORT_FD, /* 14 MII 100BaseTx-FD */
+    PORT_ASEL                     /* 15 not supported    */
+};
+
+#define MAX_UNITS 8
+static int options[MAX_UNITS];
+static int full_duplex[MAX_UNITS];
+
+/*
+ *                             Theory of Operation
+ * 
+ * This driver uses the same software structure as the normal lance
+ * driver. So look for a verbose description in lance.c. The differences
+ * to the normal lance driver is the use of the 32bit mode of PCnet32
+ * and PCnetPCI chips. Because these chips are 32bit chips, there is no
+ * 16MB limitation and we don't need bounce buffers.
+ */
+/*
+ * History:
+ * v0.01:  Initial version
+ *        only tested on Alpha Noname Board
+ * v0.02:  changed IRQ handling for new interrupt scheme (dev_id)
+ *        tested on a ASUS SP3G
+ * v0.10:  fixed an odd problem with the 79C974 in a Compaq Deskpro XL
+ *        looks like the 974 doesn't like stopping and restarting in a
+ *        short period of time; now we do a reinit of the lance; the
+ *        bug was triggered by doing ifconfig eth0 <ip> broadcast <addr>
+ *        and hangs the machine (thanks to Klaus Liedl for debugging)
+ * v0.12:  by suggestion from Donald Becker: Renamed driver to pcnet32,
+ *        made it standalone (no need for lance.c)
+ * v0.13:  added additional PCI detecting for special PCI devices (Compaq)
+ * v0.14:  stripped down additional PCI probe (thanks to David C Niemi
+ *        and sveneric@xs4all.nl for testing this on their Compaq boxes)
+ * v0.15:  added 79C965 (VLB) probe
+ *        added interrupt sharing for PCI chips
+ * v0.16:  fixed set_multicast_list on Alpha machines
+ * v0.17:  removed hack from dev.c; now pcnet32 uses ethif_probe in Space.c
+ * v0.19:  changed setting of autoselect bit
+ * v0.20:  removed additional Compaq PCI probe; there is now a working one
+ *        in arch/i386/bios32.c
+ * v0.21:  added endian conversion for ppc, from work by cort@cs.nmt.edu
+ * v0.22:  added printing of status to ring dump
+ * v0.23:  changed enet_statistics to net_devive_stats
+ * v0.90:  added multicast filter
+ *        added module support
+ *        changed irq probe to new style
+ *        added PCnetFast chip id
+ *        added fix for receive stalls with Intel saturn chipsets
+ *        added in-place rx skbs like in the tulip driver
+ *        minor cleanups
+ * v0.91:  added PCnetFast+ chip id
+ *        back port to 2.0.x
+ * v1.00:  added some stuff from Donald Becker's 2.0.34 version
+ *        added support for byte counters in net_dev_stats
+ * v1.01:  do ring dumps, only when debugging the driver
+ *        increased the transmit timeout
+ * v1.02:  fixed memory leak in pcnet32_init_ring()
+ * v1.10:  workaround for stopped transmitter
+ *        added port selection for modules
+ *        detect special T1/E1 WAN card and setup port selection
+ * v1.11:  fixed wrong checking of Tx errors
+ * v1.20:  added check of return value kmalloc (cpeterso@cs.washington.edu)
+ *        added save original kmalloc addr for freeing (mcr@solidum.com)
+ *        added support for PCnetHome chip (joe@MIT.EDU)
+ *        rewritten PCI card detection
+ *        added dwio mode to get driver working on some PPC machines
+ * v1.21:  added mii selection and mii ioctl
+ * v1.22:  changed pci scanning code to make PPC people happy
+ *        fixed switching to 32bit mode in pcnet32_open() (thanks
+ *        to Michael Richard <mcr@solidum.com> for noticing this one)
+ *        added sub vendor/device id matching (thanks again to 
+ *        Michael Richard <mcr@solidum.com>)
+ *        added chip id for 79c973/975 (thanks to Zach Brown <zab@zabbo.net>)
+ * v1.23   fixed small bug, when manual selecting MII speed/duplex
+ * v1.24   Applied Thomas' patch to use TxStartPoint and thus decrease TxFIFO
+ *        underflows.  Added tx_start_pt module parameter. Increased
+ *        TX_RING_SIZE from 16 to 32.  Added #ifdef'd code to use DXSUFLO
+ *        for FAST[+] chipsets. <kaf@fc.hp.com>
+ * v1.24ac Added SMP spinlocking - Alan Cox <alan@redhat.com>
+ * v1.25kf Added No Interrupt on successful Tx for some Tx's <kaf@fc.hp.com>
+ * v1.26   Converted to pci_alloc_consistent, Jamey Hicks / George France
+ *                                           <jamey@crl.dec.com>
+ * v1.26p Fix oops on rmmod+insmod; plug i/o resource leak - Paul Gortmaker
+ */
+
+
+/*
+ * Set the number of Tx and Rx buffers, using Log_2(# buffers).
+ * Reasonable default values are 4 Tx buffers, and 16 Rx buffers.
+ * That translates to 2 (4 == 2^^2) and 4 (16 == 2^^4).
+ */
+#ifndef PCNET32_LOG_TX_BUFFERS
+#define PCNET32_LOG_TX_BUFFERS 4
+#define PCNET32_LOG_RX_BUFFERS 5
+#endif
+
+#undef RX_RING_SIZE
+#undef TX_RING_SIZE
+
+#define TX_RING_SIZE                   (1 << (PCNET32_LOG_TX_BUFFERS))
+#define TX_RING_MOD_MASK               (TX_RING_SIZE - 1)
+#define TX_RING_LEN_BITS               ((PCNET32_LOG_TX_BUFFERS) << 12)
+
+#define RX_RING_SIZE                   (1 << (PCNET32_LOG_RX_BUFFERS))
+#define RX_RING_MOD_MASK               (RX_RING_SIZE - 1)
+#define RX_RING_LEN_BITS               ((PCNET32_LOG_RX_BUFFERS) << 4)
+
+#define PKT_BUF_SZ             1544
+
+/* Offsets from base I/O address. */
+#define PCNET32_WIO_RDP                0x10
+#define PCNET32_WIO_RAP                0x12
+#define PCNET32_WIO_RESET      0x14
+#define PCNET32_WIO_BDP                0x16
+
+#define PCNET32_DWIO_RDP       0x10
+#define PCNET32_DWIO_RAP       0x14
+#define PCNET32_DWIO_RESET     0x18
+#define PCNET32_DWIO_BDP       0x1C
+
+#define PCNET32_TOTAL_SIZE 0x20
+
+#define CRC_POLYNOMIAL_LE 0xedb88320UL /* Ethernet CRC, little endian */
+
+/* The PCNET32 Rx and Tx ring descriptors. */
+struct pcnet32_rx_head {
+    u32 base;
+    s16 buf_length;
+    s16 status;           
+    u32 msg_length;
+    u32 reserved;
+};
+       
+struct pcnet32_tx_head {
+    u32 base;
+    s16 length;
+    s16 status;
+    u32 misc;
+    u32 reserved;
+};
+
+/* The PCNET32 32-Bit initialization block, described in databook. */
+struct pcnet32_init_block {
+    u16 mode;
+    u16 tlen_rlen;
+    u8 phys_addr[6];
+    u16 reserved;
+    u32 filter[2];
+    /* Receive and transmit ring base, along with extra bits. */    
+    u32 rx_ring;
+    u32 tx_ring;
+};
+
+/* PCnet32 access functions */
+struct pcnet32_access {
+    u16 (*read_csr)(unsigned long, int);
+    void (*write_csr)(unsigned long, int, u16);
+    u16 (*read_bcr)(unsigned long, int);
+    void (*write_bcr)(unsigned long, int, u16);
+    u16 (*read_rap)(unsigned long);
+    void (*write_rap)(unsigned long, u16);
+    void (*reset)(unsigned long);
+};
+
+/*
+ * The first three fields of pcnet32_private are read by the ethernet device 
+ * so we allocate the structure should be allocated by pci_alloc_consistent().
+ */
+struct pcnet32_private {
+    /* The Tx and Rx ring entries must be aligned on 16-byte boundaries in 32bit mode. */
+    struct pcnet32_rx_head   rx_ring[RX_RING_SIZE];
+    struct pcnet32_tx_head   tx_ring[TX_RING_SIZE];
+    struct pcnet32_init_block  init_block;
+    dma_addr_t dma_addr;               /* DMA address of beginning of this object, returned by pci_alloc_consistent */
+    struct pci_dev *pci_dev;           /* Pointer to the associated pci device structure */
+    const char *name;
+    /* The saved address of a sent-in-place packet/buffer, for skfree(). */
+    struct sk_buff *tx_skbuff[TX_RING_SIZE];
+    struct sk_buff *rx_skbuff[RX_RING_SIZE];
+    dma_addr_t tx_dma_addr[TX_RING_SIZE];
+    dma_addr_t rx_dma_addr[RX_RING_SIZE];
+    struct pcnet32_access a;
+    spinlock_t lock;                                   /* Guard lock */
+    unsigned int cur_rx, cur_tx;               /* The next free ring entry */
+    unsigned int dirty_rx, dirty_tx;   /* The ring entries to be free()ed. */
+    struct net_device_stats stats;
+    char tx_full;
+    int         options;
+    int         shared_irq:1,                  /* shared irq possible */
+       ltint:1,
+#ifdef DO_DXSUFLO
+             dxsuflo:1,                                                    /* disable transmit stop on uflo */
+#endif
+       full_duplex:1,                          /* full duplex possible */
+       mii:1;                                  /* mii port available */
+    struct net_device *next;
+};
+
+static int  pcnet32_probe_vlbus(int cards_found);
+static int  pcnet32_probe_pci(struct pci_dev *, const struct pci_device_id *);
+static int  pcnet32_probe1(unsigned long, unsigned char, int, int, struct pci_dev *);
+static int  pcnet32_open(struct net_device *);
+static int  pcnet32_init_ring(struct net_device *);
+static int  pcnet32_start_xmit(struct sk_buff *, struct net_device *);
+static int  pcnet32_rx(struct net_device *);
+static void pcnet32_tx_timeout (struct net_device *dev);
+static void pcnet32_interrupt(int, void *, struct pt_regs *);
+static int  pcnet32_close(struct net_device *);
+static struct net_device_stats *pcnet32_get_stats(struct net_device *);
+static void pcnet32_set_multicast_list(struct net_device *);
+#ifdef HAVE_PRIVATE_IOCTL
+static int  pcnet32_mii_ioctl(struct net_device *, struct ifreq *, int);
+#endif
+
+enum pci_flags_bit {
+    PCI_USES_IO=1, PCI_USES_MEM=2, PCI_USES_MASTER=4,
+    PCI_ADDR0=0x10<<0, PCI_ADDR1=0x10<<1, PCI_ADDR2=0x10<<2, PCI_ADDR3=0x10<<3,
+};
+
+struct pcnet32_pci_id_info {
+    const char *name;
+    u16 vendor_id, device_id, svid, sdid, flags;
+    int io_size;
+    int (*probe1) (unsigned long, unsigned char, int, int, struct pci_dev *);
+};
+
+
+MODULE_DEVICE_TABLE (pci, pcnet32_pci_tbl);
+
+static u16 pcnet32_wio_read_csr (unsigned long addr, int index)
+{
+    outw (index, addr+PCNET32_WIO_RAP);
+    return inw (addr+PCNET32_WIO_RDP);
+}
+
+static void pcnet32_wio_write_csr (unsigned long addr, int index, u16 val)
+{
+    outw (index, addr+PCNET32_WIO_RAP);
+    outw (val, addr+PCNET32_WIO_RDP);
+}
+
+static u16 pcnet32_wio_read_bcr (unsigned long addr, int index)
+{
+    outw (index, addr+PCNET32_WIO_RAP);
+    return inw (addr+PCNET32_WIO_BDP);
+}
+
+static void pcnet32_wio_write_bcr (unsigned long addr, int index, u16 val)
+{
+    outw (index, addr+PCNET32_WIO_RAP);
+    outw (val, addr+PCNET32_WIO_BDP);
+}
+
+static u16 pcnet32_wio_read_rap (unsigned long addr)
+{
+    return inw (addr+PCNET32_WIO_RAP);
+}
+
+static void pcnet32_wio_write_rap (unsigned long addr, u16 val)
+{
+    outw (val, addr+PCNET32_WIO_RAP);
+}
+
+static void pcnet32_wio_reset (unsigned long addr)
+{
+    inw (addr+PCNET32_WIO_RESET);
+}
+
+static int pcnet32_wio_check (unsigned long addr)
+{
+    outw (88, addr+PCNET32_WIO_RAP);
+    return (inw (addr+PCNET32_WIO_RAP) == 88);
+}
+
+static struct pcnet32_access pcnet32_wio = {
+    pcnet32_wio_read_csr,
+    pcnet32_wio_write_csr,
+    pcnet32_wio_read_bcr,
+    pcnet32_wio_write_bcr,
+    pcnet32_wio_read_rap,
+    pcnet32_wio_write_rap,
+    pcnet32_wio_reset
+};
+
+static u16 pcnet32_dwio_read_csr (unsigned long addr, int index)
+{
+    outl (index, addr+PCNET32_DWIO_RAP);
+    return (inl (addr+PCNET32_DWIO_RDP) & 0xffff);
+}
+
+static void pcnet32_dwio_write_csr (unsigned long addr, int index, u16 val)
+{
+    outl (index, addr+PCNET32_DWIO_RAP);
+    outl (val, addr+PCNET32_DWIO_RDP);
+}
+
+static u16 pcnet32_dwio_read_bcr (unsigned long addr, int index)
+{
+    outl (index, addr+PCNET32_DWIO_RAP);
+    return (inl (addr+PCNET32_DWIO_BDP) & 0xffff);
+}
+
+static void pcnet32_dwio_write_bcr (unsigned long addr, int index, u16 val)
+{
+    outl (index, addr+PCNET32_DWIO_RAP);
+    outl (val, addr+PCNET32_DWIO_BDP);
+}
+
+static u16 pcnet32_dwio_read_rap (unsigned long addr)
+{
+    return (inl (addr+PCNET32_DWIO_RAP) & 0xffff);
+}
+
+static void pcnet32_dwio_write_rap (unsigned long addr, u16 val)
+{
+    outl (val, addr+PCNET32_DWIO_RAP);
+}
+
+static void pcnet32_dwio_reset (unsigned long addr)
+{
+    inl (addr+PCNET32_DWIO_RESET);
+}
+
+static int pcnet32_dwio_check (unsigned long addr)
+{
+    outl (88, addr+PCNET32_DWIO_RAP);
+    return ((inl (addr+PCNET32_DWIO_RAP) & 0xffff) == 88);
+}
+
+static struct pcnet32_access pcnet32_dwio = {
+    pcnet32_dwio_read_csr,
+    pcnet32_dwio_write_csr,
+    pcnet32_dwio_read_bcr,
+    pcnet32_dwio_write_bcr,
+    pcnet32_dwio_read_rap,
+    pcnet32_dwio_write_rap,
+    pcnet32_dwio_reset
+
+};
+
+\f
+
+/* only probes for non-PCI devices, the rest are handled by pci_register_driver via pcnet32_probe_pci*/
+static int __init pcnet32_probe_vlbus(int cards_found)
+{
+    unsigned long ioaddr = 0; // FIXME dev ? dev->base_addr: 0;
+    unsigned int  irq_line = 0; // FIXME dev ? dev->irq : 0;
+    int *port;
+    
+    printk(KERN_INFO "pcnet32_probe_vlbus: cards_found=%d\n", cards_found);
+#ifndef __powerpc__
+    if (ioaddr > 0x1ff) {
+       if (check_region(ioaddr, PCNET32_TOTAL_SIZE) == 0)
+           return pcnet32_probe1(ioaddr, irq_line, 0, 0, NULL);
+       else
+           return -ENODEV;
+    } else
+#endif
+       if (ioaddr != 0)
+           return -ENXIO;
+    
+    /* now look for PCnet32 VLB cards */
+    for (port = pcnet32_portlist; *port; port++) {
+       unsigned long ioaddr = *port;
+       
+       if ( check_region(ioaddr, PCNET32_TOTAL_SIZE) == 0) {
+           /* check if there is really a pcnet chip on that ioaddr */
+           if ((inb(ioaddr + 14) == 0x57) &&
+               (inb(ioaddr + 15) == 0x57) &&
+               (pcnet32_probe1(ioaddr, 0, 0, 0, NULL) == 0))
+               cards_found++;
+       }
+    }
+    return cards_found ? 0: -ENODEV;
+}
+
+
+
+static int __devinit
+pcnet32_probe_pci(struct pci_dev *pdev, const struct pci_device_id *ent)
+{
+    static int card_idx;
+    long ioaddr;
+    int err = 0;
+
+    printk(KERN_INFO "pcnet32_probe_pci: found device %#08x.%#08x\n", ent->vendor, ent->device);
+
+    if ((err = pci_enable_device(pdev)) < 0) {
+       printk(KERN_ERR "pcnet32.c: failed to enable device -- err=%d\n", err);
+       return err;
+    }
+    pci_set_master(pdev);
+
+    ioaddr = pci_resource_start (pdev, 0);
+    printk(KERN_INFO "    ioaddr=%#08lx  resource_flags=%#08lx\n", ioaddr, pci_resource_flags (pdev, 0));
+    if (!ioaddr) {
+        printk (KERN_ERR "no PCI IO resources, aborting\n");
+        return -ENODEV;
+    }
+       
+    if (!pci_dma_supported(pdev, PCNET32_DMA_MASK)) {
+       printk(KERN_ERR "pcnet32.c: architecture does not support 32bit PCI busmaster DMA\n");
+       return -ENODEV;
+    }
+
+    return pcnet32_probe1(ioaddr, pdev->irq, 1, card_idx, pdev);
+}
+
+
+/* pcnet32_probe1 
+ *  Called from both pcnet32_probe_vlbus and pcnet_probe_pci.  
+ *  pdev will be NULL when called from pcnet32_probe_vlbus.
+ */
+static int __devinit
+pcnet32_probe1(unsigned long ioaddr, unsigned char irq_line, int shared, int card_idx, struct pci_dev *pdev)
+{
+    struct pcnet32_private *lp;
+    struct resource *res;
+    dma_addr_t lp_dma_addr;
+    int i,media,fdx = 0, mii = 0, fset = 0;
+#ifdef DO_DXSUFLO
+    int dxsuflo = 0;
+#endif
+    int ltint = 0;
+    int chip_version;
+    char *chipname;
+    struct net_device *dev;
+    struct pcnet32_access *a = NULL;
+
+    /* reset the chip */
+    pcnet32_dwio_reset(ioaddr);
+    pcnet32_wio_reset(ioaddr);
+
+    /* NOTE: 16-bit check is first, otherwise some older PCnet chips fail */
+    if (pcnet32_wio_read_csr (ioaddr, 0) == 4 && pcnet32_wio_check (ioaddr)) {
+       a = &pcnet32_wio;
+    } else {
+       if (pcnet32_dwio_read_csr (ioaddr, 0) == 4 && pcnet32_dwio_check(ioaddr)) {
+           a = &pcnet32_dwio;
+       } else
+           return -ENODEV;
+    }
+
+    chip_version = a->read_csr (ioaddr, 88) | (a->read_csr (ioaddr,89) << 16);
+    if (pcnet32_debug > 2)
+       printk(KERN_INFO "  PCnet chip version is %#x.\n", chip_version);
+    if ((chip_version & 0xfff) != 0x003)
+       return -ENODEV;
+    chip_version = (chip_version >> 12) & 0xffff;
+    switch (chip_version) {
+    case 0x2420:
+       chipname = "PCnet/PCI 79C970"; /* PCI */
+       break;
+    case 0x2430:
+       if (shared)
+           chipname = "PCnet/PCI 79C970"; /* 970 gives the wrong chip id back */
+       else
+           chipname = "PCnet/32 79C965"; /* 486/VL bus */
+       break;
+    case 0x2621:
+       chipname = "PCnet/PCI II 79C970A"; /* PCI */
+       fdx = 1;
+       break;
+    case 0x2623:
+       chipname = "PCnet/FAST 79C971"; /* PCI */
+       fdx = 1; mii = 1; fset = 1;
+       ltint = 1;
+       break;
+    case 0x2624:
+       chipname = "PCnet/FAST+ 79C972"; /* PCI */
+       fdx = 1; mii = 1; fset = 1;
+       break;
+    case 0x2625:
+       chipname = "PCnet/FAST III 79C973"; /* PCI */
+       fdx = 1; mii = 1;
+       break;
+    case 0x2626:
+       chipname = "PCnet/Home 79C978"; /* PCI */
+       fdx = 1;
+       /* 
+        * This is based on specs published at www.amd.com.  This section
+        * assumes that a card with a 79C978 wants to go into 1Mb HomePNA
+        * mode.  The 79C978 can also go into standard ethernet, and there
+        * probably should be some sort of module option to select the
+        * mode by which the card should operate
+        */
+       /* switch to home wiring mode */
+       media = a->read_bcr (ioaddr, 49);
+#if 0
+       if (pcnet32_debug > 2)
+           printk(KERN_DEBUG "pcnet32: pcnet32 media value %#x.\n",  media);
+       media &= ~3;
+       media |= 1;
+#endif
+       if (pcnet32_debug > 2)
+           printk(KERN_DEBUG "pcnet32: pcnet32 media reset to %#x.\n",  media);
+       a->write_bcr (ioaddr, 49, media);
+       break;
+    case 0x2627:
+       chipname = "PCnet/FAST III 79C975"; /* PCI */
+       fdx = 1; mii = 1;
+       break;
+    default:
+       printk(KERN_INFO "pcnet32: PCnet version %#x, no PCnet32 chip.\n",chip_version);
+       return -ENODEV;
+    }
+
+    /*
+     * On selected chips turn on the BCR18:NOUFLO bit. This stops transmit
+     * starting until the packet is loaded. Strike one for reliability, lose
+     * one for latency - although on PCI this isnt a big loss. Older chips 
+     * have FIFO's smaller than a packet, so you can't do this.
+     */
+        
+    if(fset)
+    {
+       a->write_bcr(ioaddr, 18, (a->read_bcr(ioaddr, 18) | 0x0800));
+       a->write_csr(ioaddr, 80, (a->read_csr(ioaddr, 80) & 0x0C00) | 0x0c00);
+#ifdef DO_DXSUFLO
+       dxsuflo = 1;
+#endif
+       ltint = 1;
+    }
+    
+    dev = init_etherdev(NULL, 0);
+    if(dev==NULL)
+       return -ENOMEM;
+
+    printk(KERN_INFO "%s: %s at %#3lx,", dev->name, chipname, ioaddr);
+
+    /* In most chips, after a chip reset, the ethernet address is read from the
+     * station address PROM at the base address and programmed into the
+     * "Physical Address Registers" CSR12-14.
+     * As a precautionary measure, we read the PROM values and complain if
+     * they disagree with the CSRs.  Either way, we use the CSR values, and
+     * double check that they are valid.
+     */
+    for (i = 0; i < 3; i++) {
+       unsigned int val;
+       val = a->read_csr(ioaddr, i+12) & 0x0ffff;
+       /* There may be endianness issues here. */
+       dev->dev_addr[2*i] = val & 0x0ff;
+       dev->dev_addr[2*i+1] = (val >> 8) & 0x0ff;
+    }
+    {
+       u8 promaddr[6];
+       for (i = 0; i < 6; i++) {
+           promaddr[i] = inb(ioaddr + i);
+       }
+       if( memcmp( promaddr, dev->dev_addr, 6) )
+       {
+           printk(" warning PROM address does not match CSR address\n");
+#if defined(__i386__)
+           printk(KERN_WARNING "%s: Probably a Compaq, using the PROM address of", dev->name);
+           memcpy(dev->dev_addr, promaddr, 6);
+#endif
+       }                   
+    }
+    /* if the ethernet address is not valid, force to 00:00:00:00:00:00 */
+    if( !is_valid_ether_addr(dev->dev_addr) )
+       for (i = 0; i < 6; i++)
+           dev->dev_addr[i]=0;
+
+    for (i = 0; i < 6; i++)
+       printk(" %2.2x", dev->dev_addr[i] );
+
+    if (((chip_version + 1) & 0xfffe) == 0x2624) { /* Version 0x2623 or 0x2624 */
+       i = a->read_csr(ioaddr, 80) & 0x0C00;  /* Check tx_start_pt */
+       printk("\n" KERN_INFO "    tx_start_pt(0x%04x):",i);
+       switch(i>>10) {
+           case 0: printk("  20 bytes,"); break;
+           case 1: printk("  64 bytes,"); break;
+           case 2: printk(" 128 bytes,"); break;
+           case 3: printk("~220 bytes,"); break;
+       }
+       i = a->read_bcr(ioaddr, 18);  /* Check Burst/Bus control */
+       printk(" BCR18(%x):",i&0xffff);
+       if (i & (1<<5)) printk("BurstWrEn ");
+       if (i & (1<<6)) printk("BurstRdEn ");
+       if (i & (1<<7)) printk("DWordIO ");
+       if (i & (1<<11)) printk("NoUFlow ");
+       i = a->read_bcr(ioaddr, 25);
+       printk("\n" KERN_INFO "    SRAMSIZE=0x%04x,",i<<8);
+       i = a->read_bcr(ioaddr, 26);
+       printk(" SRAM_BND=0x%04x,",i<<8);
+       i = a->read_bcr(ioaddr, 27);
+       if (i & (1<<14)) printk("LowLatRx");
+    }
+
+    dev->base_addr = ioaddr;
+    res = request_region(ioaddr, PCNET32_TOTAL_SIZE, chipname);
+    if (res == NULL)
+       return -EBUSY;
+    
+    /* pci_alloc_consistent returns page-aligned memory, so we do not have to check the alignment */
+    if ((lp = pci_alloc_consistent(pdev, sizeof(*lp), &lp_dma_addr)) == NULL) {
+       release_resource(res);
+       return -ENOMEM;
+    }
+
+    memset(lp, 0, sizeof(*lp));
+    lp->dma_addr = lp_dma_addr;
+    lp->pci_dev = pdev;
+    printk("\n" KERN_INFO "pcnet32: pcnet32_private lp=%p lp_dma_addr=%#08x", lp, lp_dma_addr);
+
+    spin_lock_init(&lp->lock);
+    
+    dev->priv = lp;
+    lp->name = chipname;
+    lp->shared_irq = shared;
+    lp->full_duplex = fdx;
+#ifdef DO_DXSUFLO
+    lp->dxsuflo = dxsuflo;
+#endif
+    lp->ltint = ltint;
+    lp->mii = mii;
+    if (options[card_idx] > sizeof (options_mapping))
+       lp->options = PORT_ASEL;
+    else
+       lp->options = options_mapping[options[card_idx]];
+    
+    if (fdx && !(lp->options & PORT_ASEL) && full_duplex[card_idx])
+       lp->options |= PORT_FD;
+    
+    if (a == NULL) {
+      printk(KERN_ERR "pcnet32: No access methods\n");
+      pci_free_consistent(lp->pci_dev, sizeof(*lp), lp, lp->dma_addr);
+      release_resource(res);
+      return -ENODEV;
+    }
+    lp->a = *a;
+    
+    /* detect special T1/E1 WAN card by checking for MAC address */
+    if (dev->dev_addr[0] == 0x00 && dev->dev_addr[1] == 0xe0 && dev->dev_addr[2] == 0x75)
+       lp->options = PORT_FD | PORT_GPSI;
+
+    lp->init_block.mode = le16_to_cpu(0x0003); /* Disable Rx and Tx. */
+    lp->init_block.tlen_rlen = le16_to_cpu(TX_RING_LEN_BITS | RX_RING_LEN_BITS); 
+    for (i = 0; i < 6; i++)
+       lp->init_block.phys_addr[i] = dev->dev_addr[i];
+    lp->init_block.filter[0] = 0x00000000;
+    lp->init_block.filter[1] = 0x00000000;
+    lp->init_block.rx_ring = (u32)le32_to_cpu(lp->dma_addr + offsetof(struct pcnet32_private, rx_ring));
+    lp->init_block.tx_ring = (u32)le32_to_cpu(lp->dma_addr + offsetof(struct pcnet32_private, tx_ring));
+    
+    /* switch pcnet32 to 32bit mode */
+    a->write_bcr (ioaddr, 20, 2);
+
+    a->write_csr (ioaddr, 1, (lp->dma_addr + offsetof(struct pcnet32_private, init_block)) & 0xffff);
+    a->write_csr (ioaddr, 2, (lp->dma_addr + offsetof(struct pcnet32_private, init_block)) >> 16);
+    
+    if (irq_line) {
+       dev->irq = irq_line;
+    }
+    
+    if (dev->irq >= 2)
+       printk(" assigned IRQ %d.\n", dev->irq);
+    else {
+       unsigned long irq_mask = probe_irq_on();
+       
+       /*
+        * To auto-IRQ we enable the initialization-done and DMA error
+        * interrupts. For ISA boards we get a DMA error, but VLB and PCI
+        * boards will work.
+        */
+       /* Trigger an initialization just for the interrupt. */
+       a->write_csr (ioaddr, 0, 0x41);
+       mdelay (1);
+       
+       dev->irq = probe_irq_off (irq_mask);
+       if (dev->irq)
+           printk(", probed IRQ %d.\n", dev->irq);
+       else {
+           printk(", failed to detect IRQ line.\n");
+           pci_free_consistent(lp->pci_dev, sizeof(*lp), lp, lp->dma_addr);
+           release_resource(res);
+           return -ENODEV;
+       }
+    }
+
+    if (pcnet32_debug > 0)
+       printk(KERN_INFO "%s", version);
+    
+    /* The PCNET32-specific entries in the device structure. */
+    dev->open = &pcnet32_open;
+    dev->hard_start_xmit = &pcnet32_start_xmit;
+    dev->stop = &pcnet32_close;
+    dev->get_stats = &pcnet32_get_stats;
+    dev->set_multicast_list = &pcnet32_set_multicast_list;
+#ifdef HAVE_PRIVATE_IOCTL
+    dev->do_ioctl = &pcnet32_mii_ioctl;
+#endif
+    dev->tx_timeout = pcnet32_tx_timeout;
+    dev->watchdog_timeo = (HZ >> 1);
+
+    lp->next = pcnet32_dev;
+    pcnet32_dev = dev;
+
+    /* Fill in the generic fields of the device structure. */
+    ether_setup(dev);
+    return 0;
+}
+
+\f
+static int
+pcnet32_open(struct net_device *dev)
+{
+    struct pcnet32_private *lp = dev->priv;
+    unsigned long ioaddr = dev->base_addr;
+    u16 val;
+    int i;
+
+    if (dev->irq == 0 ||
+       request_irq(dev->irq, &pcnet32_interrupt,
+                   lp->shared_irq ? SA_SHIRQ : 0, lp->name, (void *)dev)) {
+       return -EAGAIN;
+    }
+
+    /* Check for a valid station address */
+    if( !is_valid_ether_addr(dev->dev_addr) )
+       return -EINVAL;
+
+    /* Reset the PCNET32 */
+    lp->a.reset (ioaddr);
+
+    /* switch pcnet32 to 32bit mode */
+    lp->a.write_bcr (ioaddr, 20, 2);
+
+    if (pcnet32_debug > 1)
+       printk(KERN_DEBUG "%s: pcnet32_open() irq %d tx/rx rings %#x/%#x init %#x.\n",
+              dev->name, dev->irq,
+              (u32) (lp->dma_addr + offsetof(struct pcnet32_private, tx_ring)),
+              (u32) (lp->dma_addr + offsetof(struct pcnet32_private, rx_ring)),
+              (u32) (lp->dma_addr + offsetof(struct pcnet32_private, init_block)));
+    
+    /* set/reset autoselect bit */
+    val = lp->a.read_bcr (ioaddr, 2) & ~2;
+    if (lp->options & PORT_ASEL)
+       val |= 2;
+    lp->a.write_bcr (ioaddr, 2, val);
+    
+    /* handle full duplex setting */
+    if (lp->full_duplex) {
+       val = lp->a.read_bcr (ioaddr, 9) & ~3;
+       if (lp->options & PORT_FD) {
+           val |= 1;
+           if (lp->options == (PORT_FD | PORT_AUI))
+               val |= 2;
+       }
+       lp->a.write_bcr (ioaddr, 9, val);
+    }
+    
+    /* set/reset GPSI bit in test register */
+    val = lp->a.read_csr (ioaddr, 124) & ~0x10;
+    if ((lp->options & PORT_PORTSEL) == PORT_GPSI)
+       val |= 0x10;
+    lp->a.write_csr (ioaddr, 124, val);
+    
+    if (lp->mii && !(lp->options & PORT_ASEL)) {
+       val = lp->a.read_bcr (ioaddr, 32) & ~0x38; /* disable Auto Negotiation, set 10Mpbs, HD */
+       if (lp->options & PORT_FD)
+           val |= 0x10;
+       if (lp->options & PORT_100)
+           val |= 0x08;
+       lp->a.write_bcr (ioaddr, 32, val);
+    } else {
+       if (lp->options & PORT_ASEL) {  /* enable auto negotiate, setup, disable fd */
+               val = lp->a.read_bcr(ioaddr, 32) & ~0x98;
+               val |= 0x20;
+               lp->a.write_bcr(ioaddr, 32, val);
+       }
+    }
+
+#ifdef DO_DXSUFLO 
+    if (lp->dxsuflo) { /* Disable transmit stop on underflow */
+       val = lp->a.read_csr (ioaddr, 3);
+       val |= 0x40;
+       lp->a.write_csr (ioaddr, 3, val);
+    }
+#endif
+    if (lp->ltint) { /* Enable TxDone-intr inhibitor */
+       val = lp->a.read_csr (ioaddr, 5);
+       val |= (1<<14);
+       lp->a.write_csr (ioaddr, 5, val);
+    }
+   
+    lp->init_block.mode = le16_to_cpu((lp->options & PORT_PORTSEL) << 7);
+    lp->init_block.filter[0] = 0x00000000;
+    lp->init_block.filter[1] = 0x00000000;
+    if (pcnet32_init_ring(dev))
+       return -ENOMEM;
+    
+    /* Re-initialize the PCNET32, and start it when done. */
+    lp->a.write_csr (ioaddr, 1, (lp->dma_addr + offsetof(struct pcnet32_private, init_block)) &0xffff);
+    lp->a.write_csr (ioaddr, 2, (lp->dma_addr + offsetof(struct pcnet32_private, init_block)) >> 16);
+
+    lp->a.write_csr (ioaddr, 4, 0x0915);
+    lp->a.write_csr (ioaddr, 0, 0x0001);
+
+    netif_start_queue(dev);
+
+    i = 0;
+    while (i++ < 100)
+       if (lp->a.read_csr (ioaddr, 0) & 0x0100)
+           break;
+    /* 
+     * We used to clear the InitDone bit, 0x0100, here but Mark Stockton
+     * reports that doing so triggers a bug in the '974.
+     */
+    lp->a.write_csr (ioaddr, 0, 0x0042);
+
+    if (pcnet32_debug > 2)
+       printk(KERN_DEBUG "%s: pcnet32 open after %d ticks, init block %#x csr0 %4.4x.\n",
+              dev->name, i, (u32) (lp->dma_addr + offsetof(struct pcnet32_private, init_block)),
+              lp->a.read_csr (ioaddr, 0));
+
+
+    MOD_INC_USE_COUNT;
+    
+    return 0;  /* Always succeed */
+}
+
+/*
+ * The LANCE has been halted for one reason or another (busmaster memory
+ * arbitration error, Tx FIFO underflow, driver stopped it to reconfigure,
+ * etc.).  Modern LANCE variants always reload their ring-buffer
+ * configuration when restarted, so we must reinitialize our ring
+ * context before restarting.  As part of this reinitialization,
+ * find all packets still on the Tx ring and pretend that they had been
+ * sent (in effect, drop the packets on the floor) - the higher-level
+ * protocols will time out and retransmit.  It'd be better to shuffle
+ * these skbs to a temp list and then actually re-Tx them after
+ * restarting the chip, but I'm too lazy to do so right now.  dplatt@3do.com
+ */
+
+static void 
+pcnet32_purge_tx_ring(struct net_device *dev)
+{
+    struct pcnet32_private *lp = dev->priv;
+    int i;
+
+    for (i = 0; i < TX_RING_SIZE; i++) {
+       if (lp->tx_skbuff[i]) {
+            pci_unmap_single(lp->pci_dev, lp->tx_dma_addr[i], lp->tx_skbuff[i]->len, PCI_DMA_TODEVICE);
+           dev_kfree_skb(lp->tx_skbuff[i]); 
+           lp->tx_skbuff[i] = NULL;
+            lp->tx_dma_addr[i] = 0;
+       }
+    }
+}
+
+
+/* Initialize the PCNET32 Rx and Tx rings. */
+static int
+pcnet32_init_ring(struct net_device *dev)
+{
+    struct pcnet32_private *lp = dev->priv;
+    int i;
+
+    lp->tx_full = 0;
+    lp->cur_rx = lp->cur_tx = 0;
+    lp->dirty_rx = lp->dirty_tx = 0;
+
+    for (i = 0; i < RX_RING_SIZE; i++) {
+        struct sk_buff *rx_skbuff = lp->rx_skbuff[i];
+       if (rx_skbuff == NULL) {
+           if (!(rx_skbuff = lp->rx_skbuff[i] = dev_alloc_skb (PKT_BUF_SZ))) {
+               /* there is not much, we can do at this point */
+               printk(KERN_ERR "%s: pcnet32_init_ring dev_alloc_skb failed.\n",dev->name);
+               return -1;
+           }
+           skb_reserve (rx_skbuff, 2);
+       }
+        lp->rx_dma_addr[i] = pci_map_single(lp->pci_dev, rx_skbuff->tail, rx_skbuff->len, PCI_DMA_FROMDEVICE);
+       lp->rx_ring[i].base = (u32)le32_to_cpu(lp->rx_dma_addr[i]);
+       lp->rx_ring[i].buf_length = le16_to_cpu(-PKT_BUF_SZ);
+       lp->rx_ring[i].status = le16_to_cpu(0x8000);
+    }
+    /* The Tx buffer address is filled in as needed, but we do need to clear
+       the upper ownership bit. */
+    for (i = 0; i < TX_RING_SIZE; i++) {
+       lp->tx_ring[i].base = 0;
+       lp->tx_ring[i].status = 0;
+        lp->tx_dma_addr[i] = 0;
+    }
+
+    lp->init_block.tlen_rlen = le16_to_cpu(TX_RING_LEN_BITS | RX_RING_LEN_BITS);
+    for (i = 0; i < 6; i++)
+       lp->init_block.phys_addr[i] = dev->dev_addr[i];
+    lp->init_block.rx_ring = (u32)le32_to_cpu(lp->dma_addr + offsetof(struct pcnet32_private, rx_ring));
+    lp->init_block.tx_ring = (u32)le32_to_cpu(lp->dma_addr + offsetof(struct pcnet32_private, tx_ring));
+    return 0;
+}
+
+static void
+pcnet32_restart(struct net_device *dev, unsigned int csr0_bits)
+{
+    struct pcnet32_private *lp = dev->priv;
+    unsigned long ioaddr = dev->base_addr;
+    int i;
+    
+    pcnet32_purge_tx_ring(dev);
+    if (pcnet32_init_ring(dev))
+       return;
+    
+    /* ReInit Ring */
+    lp->a.write_csr (ioaddr, 0, 1);
+    i = 0;
+    while (i++ < 100)
+       if (lp->a.read_csr (ioaddr, 0) & 0x0100)
+           break;
+
+    lp->a.write_csr (ioaddr, 0, csr0_bits);
+}
+
+
+static void
+pcnet32_tx_timeout (struct net_device *dev)
+{
+    struct pcnet32_private *lp = dev->priv;
+    unsigned int ioaddr = dev->base_addr;
+
+    /* Transmitter timeout, serious problems. */
+       printk(KERN_ERR "%s: transmit timed out, status %4.4x, resetting.\n",
+              dev->name, lp->a.read_csr (ioaddr, 0));
+       lp->a.write_csr (ioaddr, 0, 0x0004);
+       lp->stats.tx_errors++;
+       if (pcnet32_debug > 2) {
+           int i;
+           printk(KERN_DEBUG " Ring data dump: dirty_tx %d cur_tx %d%s cur_rx %d.",
+              lp->dirty_tx, lp->cur_tx, lp->tx_full ? " (full)" : "",
+              lp->cur_rx);
+           for (i = 0 ; i < RX_RING_SIZE; i++)
+           printk("%s %08x %04x %08x %04x", i & 1 ? "" : "\n ",
+                  lp->rx_ring[i].base, -lp->rx_ring[i].buf_length,
+                  lp->rx_ring[i].msg_length, (unsigned)lp->rx_ring[i].status);
+           for (i = 0 ; i < TX_RING_SIZE; i++)
+           printk("%s %08x %04x %08x %04x", i & 1 ? "" : "\n ",
+                  lp->tx_ring[i].base, -lp->tx_ring[i].length,
+                  lp->tx_ring[i].misc, (unsigned)lp->tx_ring[i].status);
+           printk("\n");
+       }
+       pcnet32_restart(dev, 0x0042);
+
+       dev->trans_start = jiffies;
+       netif_start_queue(dev);
+}
+
+
+static int
+pcnet32_start_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+    struct pcnet32_private *lp = dev->priv;
+    unsigned int ioaddr = dev->base_addr;
+    u16 status;
+    int entry;
+    unsigned long flags;
+
+    if (pcnet32_debug > 3) {
+       printk(KERN_DEBUG "%s: pcnet32_start_xmit() called, csr0 %4.4x.\n",
+              dev->name, lp->a.read_csr (ioaddr, 0));
+    }
+
+    spin_lock_irqsave(&lp->lock, flags);
+
+    /* Default status -- will not enable Successful-TxDone
+     * interrupt when that option is available to us.
+     */
+    status = 0x8300;
+    if ((lp->ltint) &&
+       ((lp->cur_tx - lp->dirty_tx == TX_RING_SIZE/2) ||
+        (lp->cur_tx - lp->dirty_tx >= TX_RING_SIZE-2)))
+    {
+       /* Enable Successful-TxDone interrupt if we have
+        * 1/2 of, or nearly all of, our ring buffer Tx'd
+        * but not yet cleaned up.  Thus, most of the time,
+        * we will not enable Successful-TxDone interrupts.
+        */
+       status = 0x9300;
+    }
+  
+    /* Fill in a Tx ring entry */
+  
+    /* Mask to ring buffer boundary. */
+    entry = lp->cur_tx & TX_RING_MOD_MASK;
+  
+    /* Caution: the write order is important here, set the base address
+       with the "ownership" bits last. */
+
+    lp->tx_ring[entry].length = le16_to_cpu(-skb->len);
+
+    lp->tx_ring[entry].misc = 0x00000000;
+
+    lp->tx_skbuff[entry] = skb;
+    lp->tx_dma_addr[entry] = pci_map_single(lp->pci_dev, skb->data, skb->len, PCI_DMA_TODEVICE);
+    lp->tx_ring[entry].base = (u32)le32_to_cpu(lp->tx_dma_addr[entry]);
+    lp->tx_ring[entry].status = le16_to_cpu(status);
+
+    lp->cur_tx++;
+    lp->stats.tx_bytes += skb->len;
+
+    /* Trigger an immediate send poll. */
+    lp->a.write_csr (ioaddr, 0, 0x0048);
+
+    dev->trans_start = jiffies;
+
+    if (lp->tx_ring[(entry+1) & TX_RING_MOD_MASK].base == 0)
+       netif_start_queue(dev);
+    else {
+       lp->tx_full = 1;
+       netif_stop_queue(dev);
+    }
+    spin_unlock_irqrestore(&lp->lock, flags);
+    return 0;
+}
+
+/* The PCNET32 interrupt handler. */
+static void
+pcnet32_interrupt(int irq, void *dev_id, struct pt_regs * regs)
+{
+    struct net_device *dev = dev_id;
+    struct pcnet32_private *lp;
+    unsigned long ioaddr;
+    u16 csr0,rap;
+    int boguscnt =  max_interrupt_work;
+    int must_restart;
+
+    if (dev == NULL) {
+       printk (KERN_DEBUG "pcnet32_interrupt(): irq %d for unknown device.\n", irq);
+       return;
+    }
+
+    ioaddr = dev->base_addr;
+    lp = dev->priv;
+    
+    spin_lock(&lp->lock);
+    
+    rap = lp->a.read_rap(ioaddr);
+    while ((csr0 = lp->a.read_csr (ioaddr, 0)) & 0x8600 && --boguscnt >= 0) {
+       /* Acknowledge all of the current interrupt sources ASAP. */
+       lp->a.write_csr (ioaddr, 0, csr0 & ~0x004f);
+
+       must_restart = 0;
+
+       if (pcnet32_debug > 5)
+           printk(KERN_DEBUG "%s: interrupt  csr0=%#2.2x new csr=%#2.2x.\n",
+                  dev->name, csr0, lp->a.read_csr (ioaddr, 0));
+
+       if (csr0 & 0x0400)              /* Rx interrupt */
+           pcnet32_rx(dev);
+
+       if (csr0 & 0x0200) {            /* Tx-done interrupt */
+           unsigned int dirty_tx = lp->dirty_tx;
+
+           while (dirty_tx < lp->cur_tx) {
+               int entry = dirty_tx & TX_RING_MOD_MASK;
+               int status = (short)le16_to_cpu(lp->tx_ring[entry].status);
+                       
+               if (status < 0)
+                   break;              /* It still hasn't been Txed */
+
+               lp->tx_ring[entry].base = 0;
+
+               if (status & 0x4000) {
+                   /* There was an major error, log it. */
+                   int err_status = le32_to_cpu(lp->tx_ring[entry].misc);
+                   lp->stats.tx_errors++;
+                   if (err_status & 0x04000000) lp->stats.tx_aborted_errors++;
+                   if (err_status & 0x08000000) lp->stats.tx_carrier_errors++;
+                   if (err_status & 0x10000000) lp->stats.tx_window_errors++;
+#ifndef DO_DXSUFLO
+                   if (err_status & 0x40000000) {
+                       lp->stats.tx_fifo_errors++;
+                       /* Ackk!  On FIFO errors the Tx unit is turned off! */
+                       /* Remove this verbosity later! */
+                       printk(KERN_ERR "%s: Tx FIFO error! CSR0=%4.4x\n",
+                              dev->name, csr0);
+                       must_restart = 1;
+                   }
+#else
+                   if (err_status & 0x40000000) {
+                       lp->stats.tx_fifo_errors++;
+                       if (! lp->dxsuflo) {  /* If controller doesn't recover ... */
+                           /* Ackk!  On FIFO errors the Tx unit is turned off! */
+                           /* Remove this verbosity later! */
+                           printk(KERN_ERR "%s: Tx FIFO error! CSR0=%4.4x\n",
+                                  dev->name, csr0);
+                           must_restart = 1;
+                       }
+                   }
+#endif
+               } else {
+                   if (status & 0x1800)
+                       lp->stats.collisions++;
+                   lp->stats.tx_packets++;
+               }
+
+               /* We must free the original skb */
+               if (lp->tx_skbuff[entry]) {
+                    pci_unmap_single(lp->pci_dev, lp->tx_dma_addr[entry], lp->tx_skbuff[entry]->len, PCI_DMA_TODEVICE);
+                   dev_kfree_skb_irq(lp->tx_skbuff[entry]);
+                   lp->tx_skbuff[entry] = 0;
+                    lp->tx_dma_addr[entry] = 0;
+               }
+               dirty_tx++;
+           }
+
+#ifndef final_version
+           if (lp->cur_tx - dirty_tx >= TX_RING_SIZE) {
+               printk(KERN_ERR "out-of-sync dirty pointer, %d vs. %d, full=%d.\n",
+                      dirty_tx, lp->cur_tx, lp->tx_full);
+               dirty_tx += TX_RING_SIZE;
+           }
+#endif
+           if (lp->tx_full &&
+               netif_queue_stopped(dev) &&
+               dirty_tx > lp->cur_tx - TX_RING_SIZE + 2) {
+               /* The ring is no longer full, clear tbusy. */
+               lp->tx_full = 0;
+               netif_wake_queue (dev);
+           }
+           lp->dirty_tx = dirty_tx;
+       }
+
+       /* Log misc errors. */
+       if (csr0 & 0x4000) lp->stats.tx_errors++; /* Tx babble. */
+       if (csr0 & 0x1000) {
+           /*
+            * this happens when our receive ring is full. This shouldn't
+            * be a problem as we will see normal rx interrupts for the frames
+            * in the receive ring. But there are some PCI chipsets (I can reproduce
+            * this on SP3G with Intel saturn chipset) which have sometimes problems
+            * and will fill up the receive ring with error descriptors. In this
+            * situation we don't get a rx interrupt, but a missed frame interrupt sooner
+            * or later. So we try to clean up our receive ring here.
+            */
+           pcnet32_rx(dev);
+           lp->stats.rx_errors++; /* Missed a Rx frame. */
+       }
+       if (csr0 & 0x0800) {
+           printk(KERN_ERR "%s: Bus master arbitration failure, status %4.4x.\n",
+                  dev->name, csr0);
+           /* unlike for the lance, there is no restart needed */
+       }
+
+       if (must_restart) {
+           /* stop the chip to clear the error condition, then restart */
+           lp->a.write_csr (ioaddr, 0, 0x0004);
+           pcnet32_restart(dev, 0x0002);
+       }
+    }
+
+    /* Clear any other interrupt, and set interrupt enable. */
+    lp->a.write_csr (ioaddr, 0, 0x7940);
+    lp->a.write_rap(ioaddr,rap);
+    
+    if (pcnet32_debug > 4)
+       printk(KERN_DEBUG "%s: exiting interrupt, csr0=%#4.4x.\n",
+              dev->name, lp->a.read_csr (ioaddr, 0));
+
+    spin_unlock(&lp->lock);
+}
+
+static int
+pcnet32_rx(struct net_device *dev)
+{
+    struct pcnet32_private *lp = dev->priv;
+    int entry = lp->cur_rx & RX_RING_MOD_MASK;
+
+    /* If we own the next entry, it's a new packet. Send it up. */
+    while ((short)le16_to_cpu(lp->rx_ring[entry].status) >= 0) {
+       int status = (short)le16_to_cpu(lp->rx_ring[entry].status) >> 8;
+
+       if (status != 0x03) {                   /* There was an error. */
+           /* 
+            * There is a tricky error noted by John Murphy,
+            * <murf@perftech.com> to Russ Nelson: Even with full-sized
+            * buffers it's possible for a jabber packet to use two
+            * buffers, with only the last correctly noting the error.
+            */
+           if (status & 0x01)  /* Only count a general error at the */
+               lp->stats.rx_errors++; /* end of a packet.*/
+           if (status & 0x20) lp->stats.rx_frame_errors++;
+           if (status & 0x10) lp->stats.rx_over_errors++;
+           if (status & 0x08) lp->stats.rx_crc_errors++;
+           if (status & 0x04) lp->stats.rx_fifo_errors++;
+           lp->rx_ring[entry].status &= le16_to_cpu(0x03ff);
+       } else {
+           /* Malloc up new buffer, compatible with net-2e. */
+           short pkt_len = (le32_to_cpu(lp->rx_ring[entry].msg_length) & 0xfff)-4;
+           struct sk_buff *skb;
+                       
+           if(pkt_len < 60) {
+               printk(KERN_ERR "%s: Runt packet!\n",dev->name);
+               lp->stats.rx_errors++;
+           } else {
+               int rx_in_place = 0;
+
+               if (pkt_len > rx_copybreak) {
+                   struct sk_buff *newskb;
+                               
+                   if ((newskb = dev_alloc_skb (PKT_BUF_SZ))) {
+                       skb_reserve (newskb, 2);
+                       skb = lp->rx_skbuff[entry];
+                       skb_put (skb, pkt_len);
+                       lp->rx_skbuff[entry] = newskb;
+                       newskb->dev = dev;
+                        lp->rx_dma_addr[entry] = pci_map_single(lp->pci_dev, newskb->tail, newskb->len, PCI_DMA_FROMDEVICE);
+                       lp->rx_ring[entry].base = le32_to_cpu(lp->rx_dma_addr[entry]);
+                       rx_in_place = 1;
+                   } else
+                       skb = NULL;
+               } else {
+                   skb = dev_alloc_skb(pkt_len+2);
+                }
+                           
+               if (skb == NULL) {
+                    int i;
+                   printk(KERN_ERR "%s: Memory squeeze, deferring packet.\n", dev->name);
+                   for (i = 0; i < RX_RING_SIZE; i++)
+                       if ((short)le16_to_cpu(lp->rx_ring[(entry+i) & RX_RING_MOD_MASK].status) < 0)
+                           break;
+
+                   if (i > RX_RING_SIZE -2) {
+                       lp->stats.rx_dropped++;
+                       lp->rx_ring[entry].status |= le16_to_cpu(0x8000);
+                       lp->cur_rx++;
+                   }
+                   break;
+               }
+               skb->dev = dev;
+               if (!rx_in_place) {
+                   skb_reserve(skb,2); /* 16 byte align */
+                   skb_put(skb,pkt_len);       /* Make room */
+                   eth_copy_and_sum(skb,
+                                    (unsigned char *)(lp->rx_skbuff[entry]->tail),
+                                    pkt_len,0);
+               }
+               lp->stats.rx_bytes += skb->len;
+               skb->protocol=eth_type_trans(skb,dev);
+               netif_rx(skb);
+               lp->stats.rx_packets++;
+           }
+       }
+       /*
+        * The docs say that the buffer length isn't touched, but Andrew Boyd
+        * of QNX reports that some revs of the 79C965 clear it.
+        */
+       lp->rx_ring[entry].buf_length = le16_to_cpu(-PKT_BUF_SZ);
+       lp->rx_ring[entry].status |= le16_to_cpu(0x8000);
+       entry = (++lp->cur_rx) & RX_RING_MOD_MASK;
+    }
+
+    return 0;
+}
+
+static int
+pcnet32_close(struct net_device *dev)
+{
+    unsigned long ioaddr = dev->base_addr;
+    struct pcnet32_private *lp = dev->priv;
+    int i;
+
+    netif_stop_queue(dev);
+
+    lp->stats.rx_missed_errors = lp->a.read_csr (ioaddr, 112);
+
+    if (pcnet32_debug > 1)
+       printk(KERN_DEBUG "%s: Shutting down ethercard, status was %2.2x.\n",
+              dev->name, lp->a.read_csr (ioaddr, 0));
+
+    /* We stop the PCNET32 here -- it occasionally polls memory if we don't. */
+    lp->a.write_csr (ioaddr, 0, 0x0004);
+
+    /*
+     * Switch back to 16bit mode to avoid problems with dumb 
+     * DOS packet driver after a warm reboot
+     */
+    lp->a.write_bcr (ioaddr, 20, 4);
+
+    free_irq(dev->irq, dev);
+    
+    /* free all allocated skbuffs */
+    for (i = 0; i < RX_RING_SIZE; i++) {
+       lp->rx_ring[i].status = 0;                          
+       if (lp->rx_skbuff[i]) {
+            pci_unmap_single(lp->pci_dev, lp->rx_dma_addr[i], lp->rx_skbuff[i]->len, PCI_DMA_FROMDEVICE);
+           dev_kfree_skb(lp->rx_skbuff[i]);
+        }
+       lp->rx_skbuff[i] = NULL;
+        lp->rx_dma_addr[i] = 0;
+    }
+    
+    for (i = 0; i < TX_RING_SIZE; i++) {
+       if (lp->tx_skbuff[i]) {
+            pci_unmap_single(lp->pci_dev, lp->tx_dma_addr[i], lp->tx_skbuff[i]->len, PCI_DMA_TODEVICE);
+           dev_kfree_skb(lp->tx_skbuff[i]);
+        }
+       lp->tx_skbuff[i] = NULL;
+        lp->tx_dma_addr[i] = 0;
+    }
+    
+    MOD_DEC_USE_COUNT;
+
+    return 0;
+}
+
+static struct net_device_stats *
+pcnet32_get_stats(struct net_device *dev)
+{
+    struct pcnet32_private *lp = dev->priv;
+    unsigned long ioaddr = dev->base_addr;
+    u16 saved_addr;
+    unsigned long flags;
+
+    spin_lock_irqsave(&lp->lock, flags);
+    saved_addr = lp->a.read_rap(ioaddr);
+    lp->stats.rx_missed_errors = lp->a.read_csr (ioaddr, 112);
+    lp->a.write_rap(ioaddr, saved_addr);
+    spin_unlock_irqrestore(&lp->lock, flags);
+
+    return &lp->stats;
+}
+
+/* taken from the sunlance driver, which it took from the depca driver */
+static void pcnet32_load_multicast (struct net_device *dev)
+{
+    struct pcnet32_private *lp = dev->priv;
+    volatile struct pcnet32_init_block *ib = &lp->init_block;
+    volatile u16 *mcast_table = (u16 *)&ib->filter;
+    struct dev_mc_list *dmi=dev->mc_list;
+    char *addrs;
+    int i, j, bit, byte;
+    u32 crc, poly = CRC_POLYNOMIAL_LE;
+       
+    /* set all multicast bits */
+    if (dev->flags & IFF_ALLMULTI){ 
+       ib->filter [0] = 0xffffffff;
+       ib->filter [1] = 0xffffffff;
+       return;
+    }
+    /* clear the multicast filter */
+    ib->filter [0] = 0;
+    ib->filter [1] = 0;
+
+    /* Add addresses */
+    for (i = 0; i < dev->mc_count; i++){
+       addrs = dmi->dmi_addr;
+       dmi   = dmi->next;
+       
+       /* multicast address? */
+       if (!(*addrs & 1))
+           continue;
+       
+       crc = 0xffffffff;
+       for (byte = 0; byte < 6; byte++)
+           for (bit = *addrs++, j = 0; j < 8; j++, bit >>= 1) {
+               int test;
+               
+               test = ((bit ^ crc) & 0x01);
+               crc >>= 1;
+               
+               if (test) {
+                   crc = crc ^ poly;
+               }
+           }
+       
+       crc = crc >> 26;
+       mcast_table [crc >> 4] |= 1 << (crc & 0xf);
+    }
+    return;
+}
+
+
+/*
+ * Set or clear the multicast filter for this adaptor.
+ */
+static void pcnet32_set_multicast_list(struct net_device *dev)
+{
+    unsigned long ioaddr = dev->base_addr;
+    struct pcnet32_private *lp = dev->priv;     
+
+    if (dev->flags&IFF_PROMISC) {
+       /* Log any net taps. */
+       printk(KERN_INFO "%s: Promiscuous mode enabled.\n", dev->name);
+       lp->init_block.mode = le16_to_cpu(0x8000 | (lp->options & PORT_PORTSEL) << 7);
+    } else {
+       lp->init_block.mode = le16_to_cpu((lp->options & PORT_PORTSEL) << 7);
+       pcnet32_load_multicast (dev);
+    }
+    
+    lp->a.write_csr (ioaddr, 0, 0x0004); /* Temporarily stop the lance. */
+
+    pcnet32_restart(dev, 0x0042); /*  Resume normal operation */
+}
+
+#ifdef HAVE_PRIVATE_IOCTL
+static int pcnet32_mii_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
+{
+    unsigned long ioaddr = dev->base_addr;
+    struct pcnet32_private *lp = dev->priv;     
+    u16 *data = (u16 *)&rq->ifr_data;
+    int phyaddr = lp->a.read_bcr (ioaddr, 33);
+
+    if (lp->mii) {
+       switch(cmd) {
+       case SIOCDEVPRIVATE:            /* Get the address of the PHY in use. */
+           data[0] = (phyaddr >> 5) & 0x1f;
+           /* Fall Through */
+       case SIOCDEVPRIVATE+1:          /* Read the specified MII register. */
+           lp->a.write_bcr (ioaddr, 33, ((data[0] & 0x1f) << 5) | (data[1] & 0x1f));
+           data[3] = lp->a.read_bcr (ioaddr, 34);
+           lp->a.write_bcr (ioaddr, 33, phyaddr);
+           return 0;
+       case SIOCDEVPRIVATE+2:          /* Write the specified MII register */
+           if (!capable(CAP_NET_ADMIN))
+               return -EPERM;
+           lp->a.write_bcr (ioaddr, 33, ((data[0] & 0x1f) << 5) | (data[1] & 0x1f));
+           lp->a.write_bcr (ioaddr, 34, data[2]);
+           lp->a.write_bcr (ioaddr, 33, phyaddr);
+           return 0;
+       default:
+           return -EOPNOTSUPP;
+       }
+    }
+    return -EOPNOTSUPP;
+}
+#endif /* HAVE_PRIVATE_IOCTL */
+                                           
+static struct pci_driver pcnet32_driver = {
+    name:  "pcnet32",
+    probe: pcnet32_probe_pci,
+    remove: NULL,
+    id_table: pcnet32_pci_tbl,
+};
+
+MODULE_PARM(debug, "i");
+MODULE_PARM(max_interrupt_work, "i");
+MODULE_PARM(rx_copybreak, "i");
+MODULE_PARM(tx_start_pt, "i");
+MODULE_PARM(options, "1-" __MODULE_STRING(MAX_UNITS) "i");
+MODULE_PARM(full_duplex, "1-" __MODULE_STRING(MAX_UNITS) "i");
+MODULE_AUTHOR("Thomas Bogendoerfer");
+MODULE_DESCRIPTION("Driver for PCnet32 and PCnetPCI based ethercards");
+MODULE_LICENSE("GPL");
+
+/* An additional parameter that may be passed in... */
+static int debug = -1;
+static int tx_start_pt = -1;
+
+static int __init pcnet32_init_module(void)
+{
+    int cards_found = 0;
+    int err;
+
+    if (debug > 0)
+       pcnet32_debug = debug;
+    if ((tx_start_pt >= 0) && (tx_start_pt <= 3))
+       tx_start = tx_start_pt;
+    
+    pcnet32_dev = NULL;
+    /* find the PCI devices */
+#define USE_PCI_REGISTER_DRIVER
+#ifdef USE_PCI_REGISTER_DRIVER
+    if ((err = pci_module_init(&pcnet32_driver)) < 0 )
+       return err;
+#else
+    {
+        struct pci_device_id *devid = pcnet32_pci_tbl;
+        for (devid = pcnet32_pci_tbl; devid != NULL && devid->vendor != 0; devid++) {
+            struct pci_dev *pdev = pci_find_subsys(devid->vendor, devid->device, devid->subvendor, devid->subdevice, NULL);
+            if (pdev != NULL) {
+                if (pcnet32_probe_pci(pdev, devid) >= 0) {
+                    cards_found++;
+                }
+            }
+        }
+    }
+#endif
+    return 0;
+    /* find any remaining VLbus devices */
+    return pcnet32_probe_vlbus(cards_found);
+}
+
+static void __exit pcnet32_cleanup_module(void)
+{
+    struct net_device *next_dev;
+
+    /* No need to check MOD_IN_USE, as sys_delete_module() checks. */
+    while (pcnet32_dev) {
+        struct pcnet32_private *lp = pcnet32_dev->priv;
+       next_dev = lp->next;
+       unregister_netdev(pcnet32_dev);
+       release_region(pcnet32_dev->base_addr, PCNET32_TOTAL_SIZE);
+       if (lp->pci_dev != NULL)
+           pci_unregister_driver(&pcnet32_driver);
+        pci_free_consistent(lp->pci_dev, sizeof(*lp), lp, lp->dma_addr);
+       kfree(pcnet32_dev);
+       pcnet32_dev = next_dev;
+    }
+}
+
+module_init(pcnet32_init_module);
+module_exit(pcnet32_cleanup_module);
+
+/*
+ * Local variables:
+ *  compile-command: "gcc -D__KERNEL__ -I/usr/src/linux/net/inet -Wall -Wstrict-prototypes -O6 -m486 -c pcnet32.c"
+ *  c-indent-level: 4
+ *  tab-width: 8
+ * End:
+ */
diff --git a/xen-2.4.16/drivers/net/setup.c b/xen-2.4.16/drivers/net/setup.c
new file mode 100644 (file)
index 0000000..1352a1c
--- /dev/null
@@ -0,0 +1,173 @@
+
+/*
+ *     New style setup code for the network devices
+ */
+#include <linux/config.h>
+#include <linux/netdevice.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+//#include <linux/netlink.h>
+
+extern int slip_init_ctrl_dev(void);
+extern int x25_asy_init_ctrl_dev(void);
+  
+extern int dmascc_init(void);
+
+extern int awc4500_pci_probe(void);
+extern int awc4500_isa_probe(void);
+extern int awc4500_pnp_probe(void);
+extern int awc4500_365_probe(void);
+extern int arcnet_init(void); 
+extern int scc_enet_init(void); 
+extern int fec_enet_init(void); 
+extern int dlci_setup(void); 
+extern int sdla_setup(void); 
+extern int sdla_c_setup(void); 
+extern int comx_init(void);
+extern int lmc_setup(void);
+
+extern int madgemc_probe(void);
+extern int uml_net_probe(void);
+
+/* Pad device name to IFNAMSIZ=16. F.e. __PAD6 is string of 9 zeros. */
+#define __PAD6 "\0\0\0\0\0\0\0\0\0"
+#define __PAD5 __PAD6 "\0"
+#define __PAD4 __PAD5 "\0"
+#define __PAD3 __PAD4 "\0"
+#define __PAD2 __PAD3 "\0"
+
+
+/*
+ *     Devices in this list must do new style probing. That is they must
+ *     allocate their own device objects and do their own bus scans.
+ */
+
+struct net_probe
+{
+       int (*probe)(void);
+       int status;     /* non-zero if autoprobe has failed */
+};
+static struct net_probe pci_probes[] __initdata = {
+       /*
+        *      Early setup devices
+        */
+
+#if defined(CONFIG_DMASCC)
+       {dmascc_init, 0},
+#endif 
+#if defined(CONFIG_DLCI)
+       {dlci_setup, 0},
+#endif
+#if defined(CONFIG_SDLA)
+       {sdla_c_setup, 0},
+#endif
+#if defined(CONFIG_ARCNET)
+       {arcnet_init, 0},
+#endif
+#if defined(CONFIG_SCC_ENET)
+        {scc_enet_init, 0},
+#endif
+#if defined(CONFIG_FEC_ENET)
+        {fec_enet_init, 0},
+#endif
+#if defined(CONFIG_COMX)
+       {comx_init, 0},
+#endif
+        
+#if defined(CONFIG_LANMEDIA)
+       {lmc_setup, 0},
+#endif
+        
+/*
+*
+*      Wireless non-HAM
+*
+*/
+#ifdef CONFIG_AIRONET4500_NONCS
+
+#ifdef CONFIG_AIRONET4500_PCI
+       {awc4500_pci_probe,0},
+#endif
+
+#ifdef CONFIG_AIRONET4500_PNP
+       {awc4500_pnp_probe,0},
+#endif
+
+#endif
+
+/*
+ *     Token Ring Drivers
+ */  
+#ifdef CONFIG_MADGEMC
+       {madgemc_probe, 0},
+#endif
+#ifdef CONFIG_UML_NET
+       {uml_net_probe, 0},
+#endif
+       {NULL, 0},
+};
+
+
+/*
+ *     Run the updated device probes. These do not need a device passed
+ *     into them.
+ */
+static void __init network_probe(void)
+{
+       struct net_probe *p = pci_probes;
+
+       while (p->probe != NULL)
+       {
+               p->status = p->probe();
+               p++;
+       }
+}
+
+
+/*
+ *     Initialise the line discipline drivers
+ */
+static void __init network_ldisc_init(void)
+{
+#if defined(CONFIG_SLIP)
+       slip_init_ctrl_dev();
+#endif
+#if defined(CONFIG_X25_ASY)
+       x25_asy_init_ctrl_dev();
+#endif
+}
+
+
+static void __init special_device_init(void)
+{
+#ifdef CONFIG_NET_SB1000
+       {
+               extern int sb1000_probe(struct net_device *dev);
+               static struct net_device sb1000_dev = 
+               {
+                       "cm0" __PAD3, 0x0, 0x0, 0x0, 0x0, 0, 0, 0, 0, 0, NULL, sb1000_probe 
+               };
+               register_netdev(&sb1000_dev);
+       }
+#endif
+}
+
+/*
+ *     Initialise network devices
+ */
+void __init net_device_init(void)
+{
+       /* Devices supporting the new probing API */
+       network_probe();
+       /* Line disciplines */
+       network_ldisc_init();
+       /* Special devices */
+       special_device_init();
+       /* That kicks off the legacy init functions */
+}
diff --git a/xen-2.4.16/drivers/net/tulip/.depend b/xen-2.4.16/drivers/net/tulip/.depend
new file mode 100644 (file)
index 0000000..d2cbd43
--- /dev/null
@@ -0,0 +1,58 @@
+21142.o: 21142.c \
+   tulip.h \
+   /home/kaf24/xeno/build/linux-2.4.16-kdb-orig/include/linux/pci.h \
+   /home/kaf24/xeno/build/linux-2.4.16-kdb-orig/include/linux/delay.h
+eeprom.o: eeprom.c \
+   tulip.h \
+   /home/kaf24/xeno/build/linux-2.4.16-kdb-orig/include/linux/init.h \
+   /home/kaf24/xeno/build/linux-2.4.16-kdb-orig/include/asm/unaligned.h
+interrupt.o: interrupt.c \
+   tulip.h \
+   /home/kaf24/xeno/build/linux-2.4.16-kdb-orig/include/linux/etherdevice.h \
+   /home/kaf24/xeno/build/linux-2.4.16-kdb-orig/include/linux/pci.h \
+   $(wildcard /home/kaf24/xeno/build/linux-2.4.16-kdb-orig/include/config/net/hw/flowcontrol.h)
+media.o: media.c \
+   /home/kaf24/xeno/build/linux-2.4.16-kdb-orig/include/linux/kernel.h \
+   /home/kaf24/xeno/build/linux-2.4.16-kdb-orig/include/linux/mii.h \
+   /home/kaf24/xeno/build/linux-2.4.16-kdb-orig/include/linux/init.h \
+   /home/kaf24/xeno/build/linux-2.4.16-kdb-orig/include/linux/delay.h \
+   tulip.h
+pnic2.o: pnic2.c \
+   tulip.h \
+   /home/kaf24/xeno/build/linux-2.4.16-kdb-orig/include/linux/pci.h \
+   /home/kaf24/xeno/build/linux-2.4.16-kdb-orig/include/linux/delay.h
+pnic.o: pnic.c \
+   /home/kaf24/xeno/build/linux-2.4.16-kdb-orig/include/linux/kernel.h \
+   tulip.h
+timer.o: timer.c \
+   tulip.h
+tulip_core.o: tulip_core.c \
+   /home/kaf24/xeno/build/linux-2.4.16-kdb-orig/include/linux/module.h \
+   tulip.h \
+   /home/kaf24/xeno/build/linux-2.4.16-kdb-orig/include/linux/pci.h \
+   /home/kaf24/xeno/build/linux-2.4.16-kdb-orig/include/linux/init.h \
+   /home/kaf24/xeno/build/linux-2.4.16-kdb-orig/include/linux/etherdevice.h \
+   /home/kaf24/xeno/build/linux-2.4.16-kdb-orig/include/linux/delay.h \
+   /home/kaf24/xeno/build/linux-2.4.16-kdb-orig/include/linux/mii.h \
+   /home/kaf24/xeno/build/linux-2.4.16-kdb-orig/include/linux/ethtool.h \
+   /home/kaf24/xeno/build/linux-2.4.16-kdb-orig/include/asm/unaligned.h \
+   /home/kaf24/xeno/build/linux-2.4.16-kdb-orig/include/asm/uaccess.h \
+   $(wildcard /home/kaf24/xeno/build/linux-2.4.16-kdb-orig/include/config/net/hw/flowcontrol.h) \
+   $(wildcard /home/kaf24/xeno/build/linux-2.4.16-kdb-orig/include/config/tulip/mwi.h) \
+   $(wildcard /home/kaf24/xeno/build/linux-2.4.16-kdb-orig/include/config/ddb5476.h) \
+   $(wildcard /home/kaf24/xeno/build/linux-2.4.16-kdb-orig/include/config/ddb5477.h) \
+   $(wildcard /home/kaf24/xeno/build/linux-2.4.16-kdb-orig/include/config/pm.h)
+tulip.h: \
+   /home/kaf24/xeno/build/linux-2.4.16-kdb-orig/include/linux/kernel.h \
+   /home/kaf24/xeno/build/linux-2.4.16-kdb-orig/include/linux/types.h \
+   /home/kaf24/xeno/build/linux-2.4.16-kdb-orig/include/linux/spinlock.h \
+   /home/kaf24/xeno/build/linux-2.4.16-kdb-orig/include/linux/netdevice.h \
+   /home/kaf24/xeno/build/linux-2.4.16-kdb-orig/include/linux/timer.h \
+   /home/kaf24/xeno/build/linux-2.4.16-kdb-orig/include/linux/delay.h \
+   /home/kaf24/xeno/build/linux-2.4.16-kdb-orig/include/asm/io.h \
+   /home/kaf24/xeno/build/linux-2.4.16-kdb-orig/include/asm/irq.h \
+   $(wildcard /home/kaf24/xeno/build/linux-2.4.16-kdb-orig/include/config/tulip/mmio.h) \
+   $(wildcard /home/kaf24/xeno/build/linux-2.4.16-kdb-orig/include/config/net/hw/flowcontrol.h)
+       @touch tulip.h
+.PRECIOUS:     tulip.h \
+
diff --git a/xen-2.4.16/drivers/net/tulip/21142.c b/xen-2.4.16/drivers/net/tulip/21142.c
new file mode 100644 (file)
index 0000000..3a88c44
--- /dev/null
@@ -0,0 +1,239 @@
+/*
+       drivers/net/tulip/21142.c
+
+       Maintained by Jeff Garzik <jgarzik@mandrakesoft.com>
+       Copyright 2000,2001  The Linux Kernel Team
+       Written/copyright 1994-2001 by Donald Becker.
+
+       This software may be used and distributed according to the terms
+       of the GNU General Public License, incorporated herein by reference.
+
+       Please refer to Documentation/DocBook/tulip.{pdf,ps,html}
+       for more information on this driver, or visit the project
+       Web page at http://sourceforge.net/projects/tulip/
+
+*/
+
+#include "tulip.h"
+#include <linux/pci.h>
+#include <linux/delay.h>
+
+
+static u16 t21142_csr13[] = { 0x0001, 0x0009, 0x0009, 0x0000, 0x0001, };
+u16 t21142_csr14[] =       { 0xFFFF, 0x0705, 0x0705, 0x0000, 0x7F3D, };
+static u16 t21142_csr15[] = { 0x0008, 0x0006, 0x000E, 0x0008, 0x0008, };
+
+
+/* Handle the 21143 uniquely: do autoselect with NWay, not the EEPROM list
+   of available transceivers.  */
+void t21142_timer(unsigned long data)
+{
+       struct net_device *dev = (struct net_device *)data;
+       struct tulip_private *tp = (struct tulip_private *)dev->priv;
+       long ioaddr = dev->base_addr;
+       int csr12 = inl(ioaddr + CSR12);
+       int next_tick = 60*HZ;
+       int new_csr6 = 0;
+
+       if (tulip_debug > 2)
+               printk(KERN_INFO"%s: 21143 negotiation status %8.8x, %s.\n",
+                          dev->name, csr12, medianame[dev->if_port]);
+       if (tulip_media_cap[dev->if_port] & MediaIsMII) {
+               tulip_check_duplex(dev);
+               next_tick = 60*HZ;
+       } else if (tp->nwayset) {
+               /* Don't screw up a negotiated session! */
+               if (tulip_debug > 1)
+                       printk(KERN_INFO"%s: Using NWay-set %s media, csr12 %8.8x.\n",
+                                  dev->name, medianame[dev->if_port], csr12);
+       } else if (tp->medialock) {
+                       ;
+       } else if (dev->if_port == 3) {
+               if (csr12 & 2) {        /* No 100mbps link beat, revert to 10mbps. */
+                       if (tulip_debug > 1)
+                               printk(KERN_INFO"%s: No 21143 100baseTx link beat, %8.8x, "
+                                          "trying NWay.\n", dev->name, csr12);
+                       t21142_start_nway(dev);
+                       next_tick = 3*HZ;
+               }
+       } else if ((csr12 & 0x7000) != 0x5000) {
+               /* Negotiation failed.  Search media types. */
+               if (tulip_debug > 1)
+                       printk(KERN_INFO"%s: 21143 negotiation failed, status %8.8x.\n",
+                                  dev->name, csr12);
+               if (!(csr12 & 4)) {             /* 10mbps link beat good. */
+                       new_csr6 = 0x82420000;
+                       dev->if_port = 0;
+                       outl(0, ioaddr + CSR13);
+                       outl(0x0003FFFF, ioaddr + CSR14);
+                       outw(t21142_csr15[dev->if_port], ioaddr + CSR15);
+                       outl(t21142_csr13[dev->if_port], ioaddr + CSR13);
+               } else {
+                       /* Select 100mbps port to check for link beat. */
+                       new_csr6 = 0x83860000;
+                       dev->if_port = 3;
+                       outl(0, ioaddr + CSR13);
+                       outl(0x0003FF7F, ioaddr + CSR14);
+                       outw(8, ioaddr + CSR15);
+                       outl(1, ioaddr + CSR13);
+               }
+               if (tulip_debug > 1)
+                       printk(KERN_INFO"%s: Testing new 21143 media %s.\n",
+                                  dev->name, medianame[dev->if_port]);
+               if (new_csr6 != (tp->csr6 & ~0x00D5)) {
+                       tp->csr6 &= 0x00D5;
+                       tp->csr6 |= new_csr6;
+                       outl(0x0301, ioaddr + CSR12);
+                       tulip_restart_rxtx(tp);
+               }
+               next_tick = 3*HZ;
+       }
+
+       /* mod_timer synchronizes us with potential add_timer calls
+        * from interrupts.
+        */
+       mod_timer(&tp->timer, RUN_AT(next_tick));
+}
+
+
+void t21142_start_nway(struct net_device *dev)
+{
+       struct tulip_private *tp = (struct tulip_private *)dev->priv;
+       long ioaddr = dev->base_addr;
+       int csr14 = ((tp->sym_advertise & 0x0780) << 9)  |
+               ((tp->sym_advertise & 0x0020) << 1) | 0xffbf;
+
+       dev->if_port = 0;
+       tp->nway = tp->mediasense = 1;
+       tp->nwayset = tp->lpar = 0;
+       if (tulip_debug > 1)
+               printk(KERN_DEBUG "%s: Restarting 21143 autonegotiation, csr14=%8.8x.\n",
+                          dev->name, csr14);
+       outl(0x0001, ioaddr + CSR13);
+       udelay(100);
+       outl(csr14, ioaddr + CSR14);
+       tp->csr6 = 0x82420000 | (tp->sym_advertise & 0x0040 ? FullDuplex : 0);
+       outl(tp->csr6, ioaddr + CSR6);
+       if (tp->mtable  &&  tp->mtable->csr15dir) {
+               outl(tp->mtable->csr15dir, ioaddr + CSR15);
+               outl(tp->mtable->csr15val, ioaddr + CSR15);
+       } else
+               outw(0x0008, ioaddr + CSR15);
+       outl(0x1301, ioaddr + CSR12);           /* Trigger NWAY. */
+}
+
+
+
+void t21142_lnk_change(struct net_device *dev, int csr5)
+{
+       struct tulip_private *tp = (struct tulip_private *)dev->priv;
+       long ioaddr = dev->base_addr;
+       int csr12 = inl(ioaddr + CSR12);
+
+       if (tulip_debug > 1)
+               printk(KERN_INFO"%s: 21143 link status interrupt %8.8x, CSR5 %x, "
+                          "%8.8x.\n", dev->name, csr12, csr5, inl(ioaddr + CSR14));
+
+       /* If NWay finished and we have a negotiated partner capability. */
+       if (tp->nway  &&  !tp->nwayset  &&  (csr12 & 0x7000) == 0x5000) {
+               int setup_done = 0;
+               int negotiated = tp->sym_advertise & (csr12 >> 16);
+               tp->lpar = csr12 >> 16;
+               tp->nwayset = 1;
+               if (negotiated & 0x0100)                dev->if_port = 5;
+               else if (negotiated & 0x0080)   dev->if_port = 3;
+               else if (negotiated & 0x0040)   dev->if_port = 4;
+               else if (negotiated & 0x0020)   dev->if_port = 0;
+               else {
+                       tp->nwayset = 0;
+                       if ((csr12 & 2) == 0  &&  (tp->sym_advertise & 0x0180))
+                               dev->if_port = 3;
+               }
+               tp->full_duplex = (tulip_media_cap[dev->if_port] & MediaAlwaysFD) ? 1:0;
+
+               if (tulip_debug > 1) {
+                       if (tp->nwayset)
+                               printk(KERN_INFO "%s: Switching to %s based on link "
+                                          "negotiation %4.4x & %4.4x = %4.4x.\n",
+                                          dev->name, medianame[dev->if_port], tp->sym_advertise,
+                                          tp->lpar, negotiated);
+                       else
+                               printk(KERN_INFO "%s: Autonegotiation failed, using %s,"
+                                          " link beat status %4.4x.\n",
+                                          dev->name, medianame[dev->if_port], csr12);
+               }
+
+               if (tp->mtable) {
+                       int i;
+                       for (i = 0; i < tp->mtable->leafcount; i++)
+                               if (tp->mtable->mleaf[i].media == dev->if_port) {
+                                       tp->cur_index = i;
+                                       tulip_select_media(dev, 1);
+                                       setup_done = 1;
+                                       break;
+                               }
+               }
+               if ( ! setup_done) {
+                       tp->csr6 = (dev->if_port & 1 ? 0x838E0000 : 0x82420000) | (tp->csr6 & 0x20ff);
+                       if (tp->full_duplex)
+                               tp->csr6 |= 0x0200;
+                       outl(1, ioaddr + CSR13);
+               }
+#if 0                                                  /* Restart shouldn't be needed. */
+               outl(tp->csr6 | RxOn, ioaddr + CSR6);
+               if (tulip_debug > 2)
+                       printk(KERN_DEBUG "%s:  Restarting Tx and Rx, CSR5 is %8.8x.\n",
+                                  dev->name, inl(ioaddr + CSR5));
+#endif
+               tulip_start_rxtx(tp);
+               if (tulip_debug > 2)
+                       printk(KERN_DEBUG "%s:  Setting CSR6 %8.8x/%x CSR12 %8.8x.\n",
+                                  dev->name, tp->csr6, inl(ioaddr + CSR6),
+                                  inl(ioaddr + CSR12));
+       } else if ((tp->nwayset  &&  (csr5 & 0x08000000)
+                               && (dev->if_port == 3  ||  dev->if_port == 5)
+                               && (csr12 & 2) == 2) ||
+                          (tp->nway && (csr5 & (TPLnkFail)))) {
+               /* Link blew? Maybe restart NWay. */
+               del_timer_sync(&tp->timer);
+               t21142_start_nway(dev);
+               tp->timer.expires = RUN_AT(3*HZ);
+               add_timer(&tp->timer);
+       } else if (dev->if_port == 3  ||  dev->if_port == 5) {
+               if (tulip_debug > 1)
+                       printk(KERN_INFO"%s: 21143 %s link beat %s.\n",
+                                  dev->name, medianame[dev->if_port],
+                                  (csr12 & 2) ? "failed" : "good");
+               if ((csr12 & 2)  &&  ! tp->medialock) {
+                       del_timer_sync(&tp->timer);
+                       t21142_start_nway(dev);
+                       tp->timer.expires = RUN_AT(3*HZ);
+                       add_timer(&tp->timer);
+               } else if (dev->if_port == 5)
+                       outl(inl(ioaddr + CSR14) & ~0x080, ioaddr + CSR14);
+       } else if (dev->if_port == 0  ||  dev->if_port == 4) {
+               if ((csr12 & 4) == 0)
+                       printk(KERN_INFO"%s: 21143 10baseT link beat good.\n",
+                                  dev->name);
+       } else if (!(csr12 & 4)) {              /* 10mbps link beat good. */
+               if (tulip_debug)
+                       printk(KERN_INFO"%s: 21143 10mbps sensed media.\n",
+                                  dev->name);
+               dev->if_port = 0;
+       } else if (tp->nwayset) {
+               if (tulip_debug)
+                       printk(KERN_INFO"%s: 21143 using NWay-set %s, csr6 %8.8x.\n",
+                                  dev->name, medianame[dev->if_port], tp->csr6);
+       } else {                /* 100mbps link beat good. */
+               if (tulip_debug)
+                       printk(KERN_INFO"%s: 21143 100baseTx sensed media.\n",
+                                  dev->name);
+               dev->if_port = 3;
+               tp->csr6 = 0x838E0000 | (tp->csr6 & 0x20ff);
+               outl(0x0003FF7F, ioaddr + CSR14);
+               outl(0x0301, ioaddr + CSR12);
+               tulip_restart_rxtx(tp);
+       }
+}
+
+
diff --git a/xen-2.4.16/drivers/net/tulip/ChangeLog b/xen-2.4.16/drivers/net/tulip/ChangeLog
new file mode 100644 (file)
index 0000000..a515efc
--- /dev/null
@@ -0,0 +1,520 @@
+2001-11-13  David S. Miller  <davem@redhat.com>
+
+       * tulip_core.c (tulip_mwi_config): Kill unused label early_out.
+
+2001-11-06  Richard Mortimer  <richm@oldelvet.netscapeonline.co.uk>
+
+       * tulip_core.c:  Correct set of values to mask out of csr0,
+       for DM9102A chips.  Limit burst/alignment of DM9102A chips
+       on Sparcs.
+
+2001-11-06  Jun Sun  <jsun@mvista.com>
+
+       * tulip_core.c:  Support finding MAC address on
+       two MIPS boards, DDB5476 and DDB5477.
+
+2001-11-06  Kevin B. Hendricks  <khendricks@ivey.uwo.ca>
+
+       * Makefile, tulip.h, tulip_core.c, pnic2.c, 21142.c:
+       Fixes for PNIC II support.
+
+2001-11-06  David S. Miller  <davem@redhat.com>
+
+       * tulip_core.c: Support reading MAC address from
+       Sparc OBP property local-mac-address.
+
+2001-07-17  Erik A. Hendriks  <hendriks@lanl.gov>
+
+       * 21142.c: Merge fix from tulip.c 0.92w which prevents the
+       overwriting of csr6 bits we want to preserve.
+
+2001-07-10  Jeff Golds  <jgolds@resilience.com>
+
+       * tulip_core.c: Fix two comments
+
+2001-07-06  Stephen Degler  <sdegler@degler.net>
+
+       * media.c:
+       The media selection process at the end of NWAY is busted
+       because for the case of MII/SYM it needs to be:
+
+        csr13 <- 0
+        csr14 <- 0
+        csr6 <-  the value calculated is okay.
+
+       In the other media cases csr14 is computed by
+       t21142_csr14val[dev->if_port], which seems ok.  The value of
+       zero as opposed to 3FFFFF comes straight from appendix D of the
+       21143 data book, and it makes logical sense because you're
+       bypassing all the SIA interface when you usa MII or SYM (see
+       figure 1-1 in the data book if your're visually oriented)
+
+2001-07-03  Jeff Golds  <jgolds@resilience.com>
+
+       * tulip_core.c (tulip_clean_tx_ring):
+       Clear status for in-progress Tx's, and count
+       Tx errors for all packets being released.
+
+2001-06-16  Jeff Garzik  <jgarzik@mandrakesoft.com>
+
+       * tulip.h, tulip_core.c:
+       Integrate MMIO support from devel branch, but default
+       it to off for stable kernel and driver series.
+
+2001-06-16  Jeff Garzik  <jgarzik@mandrakesoft.com>
+
+       * tulip_core.c (tulip_init_one):
+       Free descriptor rings on error.
+
+2001-06-16  Jeff Garzik  <jgarzik@mandrakesoft.com>
+
+       * tulip_core.c (tulip_mwi_config, tulip_init_one):
+       Large update to csr0 bus configuration code.  This is not stable
+       yet, so it is only conditionally enabled, via CONFIG_TULIP_MWI.
+
+2001-06-16  Jeff Garzik  <jgarzik@mandrakesoft.com>
+
+       * tulip_core.c:
+       Initialize timer in tulip_init_one and tulip_down,
+       not in tulip_up.
+
+2001-06-14  Jeff Garzik  <jgarzik@mandrakesoft.com>
+
+       * tulip_core.c:
+       - Update tulip_suspend, tulip_resume for new PCI PM API.
+       - Surround suspend/resume code with CONFIG_PM.
+
+2001-06-12  Jeff Golds  <jgolds@resilience.com>
+
+       * tulip_core.c:
+       - Reset sw ring ptrs in tulip_up.  Fixes PM resume case.
+       - Clean rx and tx rings on device down.
+
+2001-06-05  David Miller  <davem@redhat.com>
+
+       * tulip_core (set_rx_mode):  Do not use set_bit
+       on an integer variable.  Also fix endianness issue.
+
+2001-06-04  Jeff Garzik  <jgarzik@mandrakesoft.com>
+
+       * interrupt.c:
+       Simplify rx processing when CONFIG_NET_HW_FLOWCONTROL is
+       active, and in the process fix a bug where flow control
+       and low load caused rx not to be acknowledged properly.
+
+2001-06-01  Jeff Garzik  <jgarzik@mandrakesoft.com>
+
+       * tulip.h:
+       - Remove tulip_outl_csr helper, redundant.
+       - Add tulip_start_rxtx inline helper.
+       - tulip_stop_rxtx helper: Add synchronization. Always use current
+         csr6 value, instead of tp->csr6 value or value passed as arg.
+       - tulip_restart_rxtx helper: Add synchronization. Always
+         use tp->csr6 for desired mode, not value passed as arg.
+       - New RxOn, TxOn, RxTx constants for csr6 modes.
+       - Remove now-redundant constants csr6_st, csr6_sr.
+
+       * 21142.c, interrupt.c, media.c, pnic.c, tulip_core.c:
+       Update for above rxtx helper changes.
+
+       * interrupt.c:
+       - whitespace cleanup around #ifdef CONFIG_NET_HW_FLOWCONTROL,
+         convert tabs to spaces.
+       - Move tp->stats.rx_missed_errors update outside the ifdef.
+
+2001-05-18  Jeff Garzik  <jgarzik@mandrakesoft.com>
+
+       * tulip_core.c: Added ethtool support.
+         ETHTOOL_GDRVINFO ioctl only, for now.
+
+2001-05-14  Robert Olsson <Robert.Olsson@data.slu.se>
+
+       * Restored HW_FLOWCONTROL from Linux 2.1 series tulip (ANK)
+         plus Jamal's NETIF_RX_* feedback control.
+
+2001-05-14  Robert Olsson <Robert.Olsson@data.slu.se>
+
+       * Added support for 21143's Interrupt Mitigation.
+         Jamal original instigator.
+
+2001-05-14  Robert Olsson <Robert.Olsson@data.slu.se>
+
+       * tulip_refill_rx prototype added to tulip.h
+
+2001-05-13  Jeff Garzik  <jgarzik@mandrakesoft.com>
+
+       * tulip_core.c: Remove HAS_PCI_MWI flag from Comet, untested.
+
+2001-05-12  Jeff Garzik  <jgarzik@mandrakesoft.com>
+
+       * tulip_core.c, tulip.h: Remove Conexant PCI id, no chip
+       docs are available to fix problems with support.
+
+2001-05-12  Jeff Garzik  <jgarzik@mandrakesoft.com>
+
+       * tulip_core.c (tulip_init_one): Do not call
+       unregister_netdev in error cleanup.  Remnant of old
+       usage of init_etherdev.
+
+2001-05-12  Jeff Garzik  <jgarzik@mandrakesoft.com>
+
+       * media.c (tulip_find_mii): Simple write the updated BMCR
+       twice, as it seems the best thing to do for both broken and
+       sane chips.
+       If the mii_advert value, as read from MII_ADVERTISE, is zero,
+       then generate a value we should advertise from the capability
+       bits in BMSR.
+       Fill in tp->advertising for all cases.
+       Just to be safe, clear all unwanted bits.
+
+2001-05-12  Jeff Garzik  <jgarzik@mandrakesoft.com>
+
+       * tulip_core.c (private_ioctl):  Fill in tp->advertising
+       when advertising value is changed by the user.
+
+2001-05-12  Jeff Garzik  <jgarzik@mandrakesoft.com>
+
+       * tulip_core.c: Mark Comet chips as needed the updated MWI
+       csr0 configuration.
+
+2001-05-12  Jeff Garzik  <jgarzik@mandrakesoft.com>
+
+       * media.c, tulip_core.c:  Move MII scan into
+       from inlined inside tulip_init_one to new function
+       tulip_find_mii in media.c.
+
+2001-05-12  Jeff Garzik  <jgarzik@mandrakesoft.com>
+
+       * media.c (tulip_check_duplex):
+       Only restart Rx/Tx engines if they are active
+       (and csr6 changes)
+
+2001-05-12  Jeff Garzik  <jgarzik@mandrakesoft.com>
+
+       * tulip_core.c (tulip_mwi_config):
+       Clamp values read from PCI cache line size register to
+       values acceptable to tulip chip.  Done for safety and
+       -almost- certainly unneeded.
+
+2001-05-11  Jeff Garzik  <jgarzik@mandrakesoft.com>
+
+       * tulip_core.c (tulip_init_one):
+       Instead of unconditionally enabling autonegotiation, disable
+       autonegotiation if not using the default port.  Further,
+       flip the nway bit immediately, and then update the
+       speed/duplex in a separate MII transaction.  We do this
+       because some boards require that nway be disabled separately,
+       before media selection is forced.
+
+       TODO: Investigate if we can simply write the same value
+       to BMCR twice, to avoid setting unnecessarily changing
+       phy settings.
+
+2001-05-11  Jeff Garzik  <jgarzik@mandrakesoft.com>
+
+       * tulip.h, tulip_core.c:  If HAS_PCI_MWI is set for a
+       given chip, adjust the csr0 values not according to
+       provided values but according to system cache line size.
+       Currently cache alignment is matched as closely to cache
+       line size as possible.  Currently programmable burst limit
+       is set (ie. never unlimited), and always equal to cache
+       alignment and system cache size.  Currently MWI bit is set
+       only if the MWI bit is present in the PCI command register.
+
+2001-05-11  Jeff Garzik  <jgarzik@mandrakesoft.com>
+
+       * media.c (tulip_select_media):
+       For media types 1 and 3, only use the provided eeprom
+       advertising value if it is non-zero.
+       (tulip_check_duplex):
+       Do not exit ASAP if full_duplex_lock is set.  This
+       ensures that the csr6 value is written if an update
+       is needed.
+
+2001-05-10  Jeff Garzik  <jgarzik@mandrakesoft.com>
+
+       Merge PNIC-II-specific stuff from Becker's tulip.c:
+
+       * tulip.h, 21142.c (pnic2_lnk_change): new function
+       * tulip_core.c (tulip_init_one): use it
+
+       * tulip_core.c (tulip_tx_timeout): Add specific
+       debugging for PNIC2.
+
+2001-05-10  Jeff Garzik  <jgarzik@mandrakesoft.com>
+
+       * tulip_core.c (tulip_init_one): Print out
+       tulip%d instead of PCI device number, for
+       consistency.
+
+2001-05-10  Jeff Garzik  <jgarzik@mandrakesoft.com>
+
+       * Merge changes from Becker's tulip.c:
+       Fix bugs in ioctl.
+       Fix several bugs by distinguishing between MII
+       and SYM advertising values.
+       Set CSR14 autonegotiation bit for media types 2 and 4,
+       where the SIA CSR setup values are not provided.
+
+2001-05-10  Jeff Garzik  <jgarzik@mandrakesoft.com>
+
+       * media.c (tulip_select_media): Only update MII
+       advertising value if startup arg < 2.
+
+       * tulip.h: Do not enable CSR13/14/15 autoconfiguration
+       for 21041.
+
+       * tulip_core.c:
+       21041: add specific code for reset, and do not set CAC bit
+       When resetting media, for media table type 11 media, pass
+       value 2 as 'startup' arg to select_media, to avoid updating
+       MII advertising value.
+
+2001-05-10  Jeff Garzik  <jgarzik@mandrakesoft.com>
+
+       * pnic.c (pnic_check_duplex): remove
+       pnic.c (pnic_lnk_change, pnic_timer): use
+       tulip_check_duplex not pnic_check_duplex.
+
+       * media.c (tulip_check_duplex):
+       Clean up to use symbolic names instead of numeric constants.
+       Set TxThreshold mode as necessary as well as clearing it.
+       Update csr6 if csr6 changes, not simply if duplex changes.
+
+       (found by Manfred Spraul)
+
+2001-05-10  Jeff Garzik  <jgarzik@mandrakesoft.com>
+
+       * 21142.c, eeprom.c, tulip.h, tulip_core.c:
+       Remove DPRINTK as another, better method of
+       debug message printing is available.
+
+2001-05-09  Jeff Garzik  <jgarzik@mandrakesoft.com>
+
+       * 21142.c (t21142_lnk_change): Pass arg startup==1
+       to tulip_select_media, in order to force csr13 to be
+       zeroed out prior to going to full duplex mode.  Fixes
+       autonegotiation on a quad-port Znyx card.
+       (from Stephen Dengler)
+
+2001-05-09  Russell King  <rmk@arm.linux.org.uk>
+
+       * interrupt.c: Better PCI bus error reporting.
+
+2001-04-03  Jeff Garzik  <jgarzik@mandrakesoft.com>
+
+       * tulip_core.c: Now that dev->name is only available late
+         in the probe, insert a hack to replace a not-evaluated
+         "eth%d" string with an evaluated "tulip%d" string.
+         Also, remove obvious comment and an indentation cleanup.
+
+2001-04-03  Jeff Garzik  <jgarzik@mandrakesoft.com>
+
+       * tulip_core.c: If we are a module, always print out the
+         version string.  If we are built into the kernel, only print
+         the version string if at least one tulip is detected.
+
+2001-04-03  Jeff Garzik  <jgarzik@mandrakesoft.com>
+
+       Merged from Becker's tulip.c 0.92t:
+
+       * tulip_core.c: Add support for Conexant LANfinity.
+
+2001-04-03  Jeff Garzik  <jgarzik@mandrakesoft.com>
+
+       * tulip_core.c: Only suspend/resume if the interface
+         is up and running.  Use alloc_etherdev and pci_request_regions.
+         Spelling fix.
+
+2001-04-03  Jeff Garzik  <jgarzik@mandrakesoft.com>
+
+       * tulip_core.c: Remove code that existed when one or more of
+         the following defines existed.  These defines were never used
+         by normal users in practice: TULIP_FULL_DUPLEX,
+         TULIP_DEFAULT_MEDIA, and TULIP_NO_MEDIA_SWITCH.
+
+       * tulip.h, eeprom.c: Move EE_* constants from tulip.h to eeprom.c.
+       * tulip.h, media.c: Move MDIO_* constants from tulip.h to media.c.
+
+       * media.c: Add barrier() to mdio_read/write's PNIC status check
+         loops.
+
+2001-04-03  Jeff Garzik  <jgarzik@mandrakesoft.com>
+
+       Merged from Becker's tulip.c 0.92t:
+
+       * tulip.h: Add MEDIA_MASK constant for bounding medianame[]
+         array lookups.
+       * eeprom.c, media.c, timer.c, tulip_core.c: Use it.
+
+       * media.c, tulip_core.c: mdio_{read,write} cleanup.  Since this
+         is called [pretty much] directly from ioctl, we mask
+         read/write arguments to limit the values passed.
+         Added mii_lock.  Added comet_miireg2offset and better
+         Comet-specific mdio_read/write code.  Pay closer attention
+         to the bits we set in ioctl.  Remove spinlocks from ioctl,
+         they are in mdio_read/write now.  Use mask to limit
+         phy number in tulip_init_one's MII scan.
+
+2001-04-03  Jeff Garzik  <jgarzik@mandrakesoft.com>
+
+       Merged from Becker's tulip.c 0.92t:
+
+       * 21142.c, tulip_core.c: PNIC2 MAC address and NWay fixes.
+       * tulip.h: Add FullDuplex constant, used in above change.
+
+2001-04-03  Jeff Garzik  <jgarzik@mandrakesoft.com>
+
+       * timer.c: Do not call netif_carrier_{on,off}, it is not used in
+         the main tree.  Leave code in, disabled, as markers for future
+         carrier notification.
+
+2001-04-03  Jeff Garzik  <jgarzik@mandrakesoft.com>
+
+       Merged from Becker's tulip.c 0.92t, except for the tulip.h
+       whitespace cleanup:
+
+       * interrupt.c: If Rx stops, make sure to update the
+         multicast filter before restarting.
+       * tulip.h: Add COMET_MAC_ADDR feature flag, clean up flags.
+         Add Accept* Rx mode bit constants.
+         Add mc_filter[] to driver private struct.
+       * tulip_core.c: Add new Comet PCI id 0x1113:0x9511.
+         Add COMET_MAC_ADDR feature flag to comet entry in board info array.
+         Prefer to test COMET_MAC_ADDR flag to testing chip_id for COMET,
+         when dealing with the Comet's MAC address.
+         Enable Tx underrun recovery for Comet chips.
+         Use new Accept* constants in set_rx_mode.
+         Prefer COMET_MAC_ADDR flag test to chip_id test in set_rx_mode.
+         Store built mc_filter for later use in intr handler by Comets.
+
+2001-04-03  Jeff Garzik  <jgarzik@mandrakesoft.com>
+
+       * tulip_core.c: Use tp->cur_tx when building the
+         setup frame, instead of assuming that the setup
+         frame is always built in slot zero.  This case is
+         hit during PM resume.
+
+2001-04-03  Jeff Garzik  <jgarzik@mandrakesoft.com>
+
+       * *.c: Update file headers (copyright, urls, etc.)
+       * Makefile: re-order to that chip-specific modules on own line
+       * eeprom.c: BSS/zero-init cleanup (Andrey Panin)
+       * tulip_core.c: merge medianame[] update from tulip.c.
+         Additional arch-specific rx_copybreak, csr0 values. (various)
+
+2001-02-20  Jeff Garzik  <jgarzik@mandrakesoft.com>
+
+       * media.c (tulip_select_media): No need to initialize
+       new_csr6, all cases initialize it properly.
+
+2001-02-18  Manfred Spraul  <manfred@colorfullife.com>
+
+       * interrupt.c (tulip_refill_rx): Make public.
+       If PNIC chip stops due to lack of Rx buffers, restart it.
+       (tulip_interrupt): PNIC doesn't have a h/w timer, emulate
+       with software timers.
+       * pnic.c (pnic_check_duplex): New function, PNIC-specific
+       version of tulip_check_duplex.
+       (pnic_lnk_change): Call pnic_check_duplex.  If we use an
+       external MII, then we mustn't use the internal negotiation.
+       (pnic_timer): Support Rx refilling on work overflow in
+       interrupt handler, as PNIC doesn't support a h/w timer.
+       * tulip_core.c (tulip_tbl[]): Modify default csr6
+
+2001-02-11  Jeff Garzik  <jgarzik@mandrakesoft.com>
+
+       * tulip_core.c (tulip_init_one): Call pci_enable_device
+       to ensure wakeup/resource assignment before checking those
+       values.
+       (tulip_init_one): Replace PCI ids with constants from pci_id.h.
+       (tulip_suspend, tulip_resume, tulip_remove_one): Call
+       pci_power_on/off (commented out for now).
+
+2001-02-10  Jeff Garzik  <jgarzik@mandrakesoft.com>
+
+       * tulip.h: Add CFDD_xxx bits for Tulip power management
+       * tulip_core.c (tulip_set_power_state): New function,
+       manipulating Tulip chip power state where supported.
+       (tulip_up, tulip_down, tulip_init_one): Use it.
+
+2001-02-10  Jeff Garzik  <jgarzik@mandrakesoft.com>
+
+       * tulip_core.c (tulip_tx_timeout): Call netif_wake_queue
+       to ensure the next Tx is always sent to us.
+
+2001-01-27  Jeff Garzik  <jgarzik@mandrakesoft.com>
+
+       * tulip_core.c (tulip_remove_one): Fix mem leak by freeing
+       tp->media_tbl.  Add check for !dev, reformat code appropriately.
+
+2001-01-27  Jeff Garzik  <jgarzik@mandrakesoft.com>
+
+       * tulip_tbl[]: Comment all entries to make order and chip_id
+       relationship more clear.
+       * tulip_pci_tbl[]: Add new Accton PCI id (COMET chipset).
+
+2001-01-16  Jeff Garzik  <jgarzik@mandrakesoft.com>
+
+       * tulip_core.c: static vars no longer explicitly
+       initialized to zero.
+       * eeprom.c (tulip_read_eeprom): Make sure to delay between
+       EE_ENB and EE_ENB|EE_SHIFT_CLK.  Merged from becker tulip.c.
+
+2001-01-05  Peter De Schrijver  <p2@mind.be>
+
+       * eeprom.c (tulip_parse_eeprom): Interpret a bit more of 21142
+       extended format type 3 info blocks in a tulip SROM.
+
+2001-01-03  Matti Aarnio  <matti.aarnio@zmailer.org>
+
+       * media.c (tulip_select_media): Support media types 5 and 6
+
+2001-??-??  ??
+
+       * tulip_core.c: Add comment about LanMedia needing
+       a different driver.
+       Enable workarounds for early PCI chipsets.
+       Add IA64 csr0 support, update HPPA csr0 support.
+
+2000-12-17  Alan Cox  <alan@redhat.com>
+
+        * eeprom.c, timer.c, tulip.h, tulip_core.c: Merge support
+       for the Davicom's quirks into the main tulip.
+       Patch by Tobias Ringstrom
+
+2000-11-08  Jim Studt  <jim@federated.com>
+
+       * eeprom.c (tulip_parse_eeprom):  Check array bounds for
+       medianame[] and block_name[] arrays to avoid oops due
+       to bad values returned from hardware.
+
+2000-11-02  Jeff Garzik  <jgarzik@mandrakesoft.com>
+
+       * tulip_core.c (set_rx_mode): This is synchronized via
+       dev->xmit_lock, so only the queueing of the setup frame needs to
+       be locked, against tulip_interrupt.
+
+2000-11-02  Alexey Kuznetov  <kuznet@ms2.inr.ac.ru>
+
+       * timer.c (tulip_timer): Call netif_carrier_{on,off} to report
+       link state to the rest of the kernel, and userspace.
+       * interrupt.c (tulip_interrupt): Remove tx_full.
+       * tulip.h: Likewise.
+       * tulip_core.c (tulip_init_ring, tulip_start_xmit, set_rx_mode):
+       Likewise.
+
+2000-10-18  Jeff Garzik  <jgarzik@mandrakesoft.com>
+
+       * tulip_core.c: (tulip_init_one) Print out ethernet interface
+       on error.  Print out a message when pci_enable_device fails.
+       Handle DMA alloc failure.
+
+2000-10-18  Jeff Garzik  <jgarzik@mandrakesoft.com>
+
+       * Makefile: New file.
+       * tulip_core.c (tulip_init_one): Correct error messages
+       on PIO/MMIO region reserve failure.
+       (tulip_init_one) Add new check to ensure that PIO region is
+       sufficient for our needs.
+
diff --git a/xen-2.4.16/drivers/net/tulip/Makefile b/xen-2.4.16/drivers/net/tulip/Makefile
new file mode 100644 (file)
index 0000000..a96b5b9
--- /dev/null
@@ -0,0 +1,8 @@
+
+include $(BASEDIR)/Rules.mk
+
+default: $(OBJS)
+       $(LD) -r -o tulip.o $(OBJS)
+
+clean:
+       rm -f *.o *~ core
diff --git a/xen-2.4.16/drivers/net/tulip/eeprom.c b/xen-2.4.16/drivers/net/tulip/eeprom.c
new file mode 100644 (file)
index 0000000..beb1430
--- /dev/null
@@ -0,0 +1,320 @@
+/*
+       drivers/net/tulip/eeprom.c
+
+       Maintained by Jeff Garzik <jgarzik@mandrakesoft.com>
+       Copyright 2000,2001  The Linux Kernel Team
+       Written/copyright 1994-2001 by Donald Becker.
+
+       This software may be used and distributed according to the terms
+       of the GNU General Public License, incorporated herein by reference.
+
+       Please refer to Documentation/DocBook/tulip.{pdf,ps,html}
+       for more information on this driver, or visit the project
+       Web page at http://sourceforge.net/projects/tulip/
+
+*/
+
+#include "tulip.h"
+#include <linux/init.h>
+#include <asm/unaligned.h>
+
+
+
+/* Serial EEPROM section. */
+/* The main routine to parse the very complicated SROM structure.
+   Search www.digital.com for "21X4 SROM" to get details.
+   This code is very complex, and will require changes to support
+   additional cards, so I'll be verbose about what is going on.
+   */
+
+/* Known cards that have old-style EEPROMs. */
+static struct eeprom_fixup eeprom_fixups[] __devinitdata = {
+  {"Asante", 0, 0, 0x94, {0x1e00, 0x0000, 0x0800, 0x0100, 0x018c,
+                         0x0000, 0x0000, 0xe078, 0x0001, 0x0050, 0x0018 }},
+  {"SMC9332DST", 0, 0, 0xC0, { 0x1e00, 0x0000, 0x0800, 0x041f,
+                          0x0000, 0x009E, /* 10baseT */
+                          0x0004, 0x009E, /* 10baseT-FD */
+                          0x0903, 0x006D, /* 100baseTx */
+                          0x0905, 0x006D, /* 100baseTx-FD */ }},
+  {"Cogent EM100", 0, 0, 0x92, { 0x1e00, 0x0000, 0x0800, 0x063f,
+                                0x0107, 0x8021, /* 100baseFx */
+                                0x0108, 0x8021, /* 100baseFx-FD */
+                                0x0100, 0x009E, /* 10baseT */
+                                0x0104, 0x009E, /* 10baseT-FD */
+                                0x0103, 0x006D, /* 100baseTx */
+                                0x0105, 0x006D, /* 100baseTx-FD */ }},
+  {"Maxtech NX-110", 0, 0, 0xE8, { 0x1e00, 0x0000, 0x0800, 0x0513,
+                                  0x1001, 0x009E, /* 10base2, CSR12 0x10*/
+                                  0x0000, 0x009E, /* 10baseT */
+                                  0x0004, 0x009E, /* 10baseT-FD */
+                                  0x0303, 0x006D, /* 100baseTx, CSR12 0x03 */
+                                  0x0305, 0x006D, /* 100baseTx-FD CSR12 0x03 */}},
+  {"Accton EN1207", 0, 0, 0xE8, { 0x1e00, 0x0000, 0x0800, 0x051F,
+                                 0x1B01, 0x0000, /* 10base2,   CSR12 0x1B */
+                                 0x0B00, 0x009E, /* 10baseT,   CSR12 0x0B */
+                                 0x0B04, 0x009E, /* 10baseT-FD,CSR12 0x0B */
+                                 0x1B03, 0x006D, /* 100baseTx, CSR12 0x1B */
+                                 0x1B05, 0x006D, /* 100baseTx-FD CSR12 0x1B */
+   }},
+  {"NetWinder", 0x00, 0x10, 0x57,
+       /* Default media = MII
+        * MII block, reset sequence (3) = 0x0821 0x0000 0x0001, capabilities 0x01e1
+        */
+       { 0x1e00, 0x0000, 0x000b, 0x8f01, 0x0103, 0x0300, 0x0821, 0x000, 0x0001, 0x0000, 0x01e1 }
+  },
+  {0, 0, 0, 0, {}}};
+
+
+static const char *block_name[] __devinitdata = {
+       "21140 non-MII",
+       "21140 MII PHY",
+       "21142 Serial PHY",
+       "21142 MII PHY",
+       "21143 SYM PHY",
+       "21143 reset method"
+};
+
+
+void __devinit tulip_parse_eeprom(struct net_device *dev)
+{
+       /* The last media info list parsed, for multiport boards.  */
+       static struct mediatable *last_mediatable;
+       static unsigned char *last_ee_data;
+       static int controller_index;
+       struct tulip_private *tp = (struct tulip_private *)dev->priv;
+       unsigned char *ee_data = tp->eeprom;
+       int i;
+
+       tp->mtable = 0;
+       /* Detect an old-style (SA only) EEPROM layout:
+          memcmp(eedata, eedata+16, 8). */
+       for (i = 0; i < 8; i ++)
+               if (ee_data[i] != ee_data[16+i])
+                       break;
+       if (i >= 8) {
+               if (ee_data[0] == 0xff) {
+                       if (last_mediatable) {
+                               controller_index++;
+                               printk(KERN_INFO "%s:  Controller %d of multiport board.\n",
+                                          dev->name, controller_index);
+                               tp->mtable = last_mediatable;
+                               ee_data = last_ee_data;
+                               goto subsequent_board;
+                       } else
+                               printk(KERN_INFO "%s:  Missing EEPROM, this interface may "
+                                          "not work correctly!\n",
+                          dev->name);
+                       return;
+               }
+         /* Do a fix-up based on the vendor half of the station address prefix. */
+         for (i = 0; eeprom_fixups[i].name; i++) {
+               if (dev->dev_addr[0] == eeprom_fixups[i].addr0
+                       &&  dev->dev_addr[1] == eeprom_fixups[i].addr1
+                       &&  dev->dev_addr[2] == eeprom_fixups[i].addr2) {
+                 if (dev->dev_addr[2] == 0xE8  &&  ee_data[0x1a] == 0x55)
+                         i++;                  /* An Accton EN1207, not an outlaw Maxtech. */
+                 memcpy(ee_data + 26, eeprom_fixups[i].newtable,
+                                sizeof(eeprom_fixups[i].newtable));
+                 printk(KERN_INFO "%s: Old format EEPROM on '%s' board.  Using"
+                                " substitute media control info.\n",
+                                dev->name, eeprom_fixups[i].name);
+                 break;
+               }
+         }
+         if (eeprom_fixups[i].name == NULL) { /* No fixup found. */
+                 printk(KERN_INFO "%s: Old style EEPROM with no media selection "
+                                "information.\n",
+                          dev->name);
+               return;
+         }
+       }
+
+       controller_index = 0;
+       if (ee_data[19] > 1) {          /* Multiport board. */
+               last_ee_data = ee_data;
+       }
+subsequent_board:
+
+       if (ee_data[27] == 0) {         /* No valid media table. */
+       } else if (tp->chip_id == DC21041) {
+               unsigned char *p = (void *)ee_data + ee_data[27 + controller_index*3];
+               int media = get_u16(p);
+               int count = p[2];
+               p += 3;
+
+               printk(KERN_INFO "%s: 21041 Media table, default media %4.4x (%s).\n",
+                          dev->name, media,
+                          media & 0x0800 ? "Autosense" : medianame[media & MEDIA_MASK]);
+               for (i = 0; i < count; i++) {
+                       unsigned char media_block = *p++;
+                       int media_code = media_block & MEDIA_MASK;
+                       if (media_block & 0x40)
+                               p += 6;
+                       printk(KERN_INFO "%s:  21041 media #%d, %s.\n",
+                                  dev->name, media_code, medianame[media_code]);
+               }
+       } else {
+               unsigned char *p = (void *)ee_data + ee_data[27];
+               unsigned char csr12dir = 0;
+               int count, new_advertise = 0;
+               struct mediatable *mtable;
+               u16 media = get_u16(p);
+
+               p += 2;
+               if (tp->flags & CSR12_IN_SROM)
+                       csr12dir = *p++;
+               count = *p++;
+
+               /* there is no phy information, don't even try to build mtable */
+               if (count == 0) {
+                       if (tulip_debug > 0)
+                               printk(KERN_WARNING "%s: no phy info, aborting mtable build\n", dev->name);
+                       return;
+               }
+
+               mtable = (struct mediatable *)
+                       kmalloc(sizeof(struct mediatable) + count*sizeof(struct medialeaf),
+                                       GFP_KERNEL);
+               if (mtable == NULL)
+                       return;                         /* Horrible, impossible failure. */
+               last_mediatable = tp->mtable = mtable;
+               mtable->defaultmedia = media;
+               mtable->leafcount = count;
+               mtable->csr12dir = csr12dir;
+               mtable->has_nonmii = mtable->has_mii = mtable->has_reset = 0;
+               mtable->csr15dir = mtable->csr15val = 0;
+
+               printk(KERN_INFO "%s:  EEPROM default media type %s.\n", dev->name,
+                          media & 0x0800 ? "Autosense" : medianame[media & MEDIA_MASK]);
+               for (i = 0; i < count; i++) {
+                       struct medialeaf *leaf = &mtable->mleaf[i];
+
+                       if ((p[0] & 0x80) == 0) { /* 21140 Compact block. */
+                               leaf->type = 0;
+                               leaf->media = p[0] & 0x3f;
+                               leaf->leafdata = p;
+                               if ((p[2] & 0x61) == 0x01)      /* Bogus, but Znyx boards do it. */
+                                       mtable->has_mii = 1;
+                               p += 4;
+                       } else {
+                               leaf->type = p[1];
+                               if (p[1] == 0x05) {
+                                       mtable->has_reset = i;
+                                       leaf->media = p[2] & 0x0f;
+                               } else if (tp->chip_id == DM910X && p[1] == 0x80) {
+                                       /* Hack to ignore Davicom delay period block */
+                                       mtable->leafcount--;
+                                       count--;
+                                       i--;
+                                       leaf->leafdata = p + 2;
+                                       p += (p[0] & 0x3f) + 1;
+                                       continue;
+                               } else if (p[1] & 1) {
+                                       int gpr_len, reset_len;
+
+                                       mtable->has_mii = 1;
+                                       leaf->media = 11;
+                                       gpr_len=p[3]*2;
+                                       reset_len=p[4+gpr_len]*2;
+                                       new_advertise |= get_u16(&p[7+gpr_len+reset_len]);
+                               } else {
+                                       mtable->has_nonmii = 1;
+                                       leaf->media = p[2] & MEDIA_MASK;
+                                       /* Davicom's media number for 100BaseTX is strange */
+                                       if (tp->chip_id == DM910X && leaf->media == 1)
+                                               leaf->media = 3;
+                                       switch (leaf->media) {
+                                       case 0: new_advertise |= 0x0020; break;
+                                       case 4: new_advertise |= 0x0040; break;
+                                       case 3: new_advertise |= 0x0080; break;
+                                       case 5: new_advertise |= 0x0100; break;
+                                       case 6: new_advertise |= 0x0200; break;
+                                       }
+                                       if (p[1] == 2  &&  leaf->media == 0) {
+                                               if (p[2] & 0x40) {
+                                                       u32 base15 = get_unaligned((u16*)&p[7]);
+                                                       mtable->csr15dir =
+                                                               (get_unaligned((u16*)&p[9])<<16) + base15;
+                                                       mtable->csr15val =
+                                                               (get_unaligned((u16*)&p[11])<<16) + base15;
+                                               } else {
+                                                       mtable->csr15dir = get_unaligned((u16*)&p[3])<<16;
+                                                       mtable->csr15val = get_unaligned((u16*)&p[5])<<16;
+                                               }
+                                       }
+                               }
+                               leaf->leafdata = p + 2;
+                               p += (p[0] & 0x3f) + 1;
+                       }
+                       if (tulip_debug > 1  &&  leaf->media == 11) {
+                               unsigned char *bp = leaf->leafdata;
+                               printk(KERN_INFO "%s:  MII interface PHY %d, setup/reset "
+                                          "sequences %d/%d long, capabilities %2.2x %2.2x.\n",
+                                          dev->name, bp[0], bp[1], bp[2 + bp[1]*2],
+                                          bp[5 + bp[2 + bp[1]*2]*2], bp[4 + bp[2 + bp[1]*2]*2]);
+                       }
+                       printk(KERN_INFO "%s:  Index #%d - Media %s (#%d) described "
+                                  "by a %s (%d) block.\n",
+                                  dev->name, i, medianame[leaf->media & 15], leaf->media,
+                                  leaf->type < ARRAY_SIZE(block_name) ? block_name[leaf->type] : "<unknown>",
+                                  leaf->type);
+               }
+               if (new_advertise)
+                       tp->sym_advertise = new_advertise;
+       }
+}
+/* Reading a serial EEPROM is a "bit" grungy, but we work our way through:->.*/
+
+/*  EEPROM_Ctrl bits. */
+#define EE_SHIFT_CLK   0x02    /* EEPROM shift clock. */
+#define EE_CS                  0x01    /* EEPROM chip select. */
+#define EE_DATA_WRITE  0x04    /* Data from the Tulip to EEPROM. */
+#define EE_WRITE_0             0x01
+#define EE_WRITE_1             0x05
+#define EE_DATA_READ   0x08    /* Data from the EEPROM chip. */
+#define EE_ENB                 (0x4800 | EE_CS)
+
+/* Delay between EEPROM clock transitions.
+   Even at 33Mhz current PCI implementations don't overrun the EEPROM clock.
+   We add a bus turn-around to insure that this remains true. */
+#define eeprom_delay() inl(ee_addr)
+
+/* The EEPROM commands include the alway-set leading bit. */
+#define EE_READ_CMD            (6)
+
+/* Note: this routine returns extra data bits for size detection. */
+int __devinit tulip_read_eeprom(long ioaddr, int location, int addr_len)
+{
+       int i;
+       unsigned retval = 0;
+       long ee_addr = ioaddr + CSR9;
+       int read_cmd = location | (EE_READ_CMD << addr_len);
+
+       outl(EE_ENB & ~EE_CS, ee_addr);
+       outl(EE_ENB, ee_addr);
+
+       /* Shift the read command bits out. */
+       for (i = 4 + addr_len; i >= 0; i--) {
+               short dataval = (read_cmd & (1 << i)) ? EE_DATA_WRITE : 0;
+               outl(EE_ENB | dataval, ee_addr);
+               eeprom_delay();
+               outl(EE_ENB | dataval | EE_SHIFT_CLK, ee_addr);
+               eeprom_delay();
+               retval = (retval << 1) | ((inl(ee_addr) & EE_DATA_READ) ? 1 : 0);
+       }
+       outl(EE_ENB, ee_addr);
+       eeprom_delay();
+
+       for (i = 16; i > 0; i--) {
+               outl(EE_ENB | EE_SHIFT_CLK, ee_addr);
+               eeprom_delay();
+               retval = (retval << 1) | ((inl(ee_addr) & EE_DATA_READ) ? 1 : 0);
+               outl(EE_ENB, ee_addr);
+               eeprom_delay();
+       }
+
+       /* Terminate the EEPROM access. */
+       outl(EE_ENB & ~EE_CS, ee_addr);
+       return retval;
+}
+
diff --git a/xen-2.4.16/drivers/net/tulip/interrupt.c b/xen-2.4.16/drivers/net/tulip/interrupt.c
new file mode 100644 (file)
index 0000000..c92b12e
--- /dev/null
@@ -0,0 +1,559 @@
+/*
+       drivers/net/tulip/interrupt.c
+
+       Maintained by Jeff Garzik <jgarzik@mandrakesoft.com>
+       Copyright 2000,2001  The Linux Kernel Team
+       Written/copyright 1994-2001 by Donald Becker.
+
+       This software may be used and distributed according to the terms
+       of the GNU General Public License, incorporated herein by reference.
+
+       Please refer to Documentation/DocBook/tulip.{pdf,ps,html}
+       for more information on this driver, or visit the project
+       Web page at http://sourceforge.net/projects/tulip/
+
+*/
+
+#include "tulip.h"
+#include <linux/config.h>
+#include <linux/etherdevice.h>
+#include <linux/pci.h>
+
+
+int tulip_rx_copybreak;
+unsigned int tulip_max_interrupt_work;
+
+#ifdef CONFIG_NET_HW_FLOWCONTROL
+
+#define MIT_SIZE 15
+unsigned int mit_table[MIT_SIZE+1] =
+{
+        /*  CRS11 21143 hardware Mitigation Control Interrupt
+            We use only RX mitigation we other techniques for
+            TX intr. mitigation.
+
+           31    Cycle Size (timer control)
+           30:27 TX timer in 16 * Cycle size
+           26:24 TX No pkts before Int.
+           23:20 RX timer in Cycle size
+           19:17 RX No pkts before Int.
+           16       Continues Mode (CM)
+        */
+
+        0x0,             /* IM disabled */
+        0x80150000,      /* RX time = 1, RX pkts = 2, CM = 1 */
+        0x80150000,
+        0x80270000,
+        0x80370000,
+        0x80490000,
+        0x80590000,
+        0x80690000,
+        0x807B0000,
+        0x808B0000,
+        0x809D0000,
+        0x80AD0000,
+        0x80BD0000,
+        0x80CF0000,
+        0x80DF0000,
+//       0x80FF0000      /* RX time = 16, RX pkts = 7, CM = 1 */
+        0x80F10000      /* RX time = 16, RX pkts = 0, CM = 1 */
+};
+#endif
+
+
+int tulip_refill_rx(struct net_device *dev)
+{
+       struct tulip_private *tp = (struct tulip_private *)dev->priv;
+       int entry;
+       int refilled = 0;
+
+       /* Refill the Rx ring buffers. */
+       for (; tp->cur_rx - tp->dirty_rx > 0; tp->dirty_rx++) {
+               entry = tp->dirty_rx % RX_RING_SIZE;
+               if (tp->rx_buffers[entry].skb == NULL) {
+                       struct sk_buff *skb;
+                       dma_addr_t mapping;
+
+                       skb = tp->rx_buffers[entry].skb = dev_alloc_skb(PKT_BUF_SZ);
+                       if (skb == NULL)
+                               break;
+
+                       mapping = pci_map_single(tp->pdev, skb->tail, PKT_BUF_SZ,
+                                                PCI_DMA_FROMDEVICE);
+                       tp->rx_buffers[entry].mapping = mapping;
+
+                       skb->dev = dev;                 /* Mark as being used by this device. */
+                       tp->rx_ring[entry].buffer1 = cpu_to_le32(mapping);
+                       refilled++;
+               }
+               tp->rx_ring[entry].status = cpu_to_le32(DescOwned);
+       }
+       if(tp->chip_id == LC82C168) {
+               if(((inl(dev->base_addr + CSR5)>>17)&0x07) == 4) {
+                       /* Rx stopped due to out of buffers,
+                        * restart it
+                        */
+                       outl(0x01, dev->base_addr + CSR2);
+               }
+       }
+       return refilled;
+}
+
+
+static int tulip_rx(struct net_device *dev)
+{
+       struct tulip_private *tp = (struct tulip_private *)dev->priv;
+       int entry = tp->cur_rx % RX_RING_SIZE;
+       int rx_work_limit = tp->dirty_rx + RX_RING_SIZE - tp->cur_rx;
+       int received = 0;
+
+#ifdef CONFIG_NET_HW_FLOWCONTROL
+        int drop = 0, mit_sel = 0;
+
+/* that one buffer is needed for mit activation; or might be a
+   bug in the ring buffer code; check later -- JHS*/
+
+        if (rx_work_limit >=RX_RING_SIZE) rx_work_limit--;
+#endif
+
+       if (tulip_debug > 4)
+               printk(KERN_DEBUG " In tulip_rx(), entry %d %8.8x.\n", entry,
+                          tp->rx_ring[entry].status);
+       /* If we own the next entry, it is a new packet. Send it up. */
+       while ( ! (tp->rx_ring[entry].status & cpu_to_le32(DescOwned))) {
+               s32 status = le32_to_cpu(tp->rx_ring[entry].status);
+
+               if (tulip_debug > 5)
+                       printk(KERN_DEBUG "%s: In tulip_rx(), entry %d %8.8x.\n",
+                                  dev->name, entry, status);
+               if (--rx_work_limit < 0)
+                       break;
+               if ((status & 0x38008300) != 0x0300) {
+                       if ((status & 0x38000300) != 0x0300) {
+                               /* Ingore earlier buffers. */
+                               if ((status & 0xffff) != 0x7fff) {
+                                       if (tulip_debug > 1)
+                                               printk(KERN_WARNING "%s: Oversized Ethernet frame "
+                                                          "spanned multiple buffers, status %8.8x!\n",
+                                                          dev->name, status);
+                                       tp->stats.rx_length_errors++;
+                               }
+                       } else if (status & RxDescFatalErr) {
+                               /* There was a fatal error. */
+                               if (tulip_debug > 2)
+                                       printk(KERN_DEBUG "%s: Receive error, Rx status %8.8x.\n",
+                                                  dev->name, status);
+                               tp->stats.rx_errors++; /* end of a packet.*/
+                               if (status & 0x0890) tp->stats.rx_length_errors++;
+                               if (status & 0x0004) tp->stats.rx_frame_errors++;
+                               if (status & 0x0002) tp->stats.rx_crc_errors++;
+                               if (status & 0x0001) tp->stats.rx_fifo_errors++;
+                       }
+               } else {
+                       /* Omit the four octet CRC from the length. */
+                       short pkt_len = ((status >> 16) & 0x7ff) - 4;
+                       struct sk_buff *skb;
+
+#ifndef final_version
+                       if (pkt_len > 1518) {
+                               printk(KERN_WARNING "%s: Bogus packet size of %d (%#x).\n",
+                                          dev->name, pkt_len, pkt_len);
+                               pkt_len = 1518;
+                               tp->stats.rx_length_errors++;
+                       }
+#endif
+
+#ifdef CONFIG_NET_HW_FLOWCONTROL
+                        drop = atomic_read(&netdev_dropping);
+                        if (drop)
+                                goto throttle;
+#endif
+                       /* Check if the packet is long enough to accept without copying
+                          to a minimally-sized skbuff. */
+                       if (pkt_len < tulip_rx_copybreak
+                               && (skb = dev_alloc_skb(pkt_len + 2)) != NULL) {
+                               skb->dev = dev;
+                               skb_reserve(skb, 2);    /* 16 byte align the IP header */
+                               pci_dma_sync_single(tp->pdev,
+                                                   tp->rx_buffers[entry].mapping,
+                                                   pkt_len, PCI_DMA_FROMDEVICE);
+#if ! defined(__alpha__)
+                               eth_copy_and_sum(skb, tp->rx_buffers[entry].skb->tail,
+                                                pkt_len, 0);
+                               skb_put(skb, pkt_len);
+#else
+                               memcpy(skb_put(skb, pkt_len),
+                                      tp->rx_buffers[entry].skb->tail,
+                                      pkt_len);
+#endif
+                       } else {        /* Pass up the skb already on the Rx ring. */
+                               char *temp = skb_put(skb = tp->rx_buffers[entry].skb,
+                                                    pkt_len);
+
+#ifndef final_version
+                               if (tp->rx_buffers[entry].mapping !=
+                                   le32_to_cpu(tp->rx_ring[entry].buffer1)) {
+                                       printk(KERN_ERR "%s: Internal fault: The skbuff addresses "
+                                              "do not match in tulip_rx: %08x vs. %08x %p / %p.\n",
+                                              dev->name,
+                                              le32_to_cpu(tp->rx_ring[entry].buffer1),
+                                              tp->rx_buffers[entry].mapping,
+                                              skb->head, temp);
+                               }
+#endif
+
+                               pci_unmap_single(tp->pdev, tp->rx_buffers[entry].mapping,
+                                                PKT_BUF_SZ, PCI_DMA_FROMDEVICE);
+
+                               tp->rx_buffers[entry].skb = NULL;
+                               tp->rx_buffers[entry].mapping = 0;
+                       }
+                       skb->protocol = eth_type_trans(skb, dev);
+#ifdef CONFIG_NET_HW_FLOWCONTROL
+                        mit_sel =
+#endif
+                       netif_rx(skb);
+
+#ifdef CONFIG_NET_HW_FLOWCONTROL
+                        switch (mit_sel) {
+                        case NET_RX_SUCCESS:
+                        case NET_RX_CN_LOW:
+                        case NET_RX_CN_MOD:
+                                break;
+
+                        case NET_RX_CN_HIGH:
+                                rx_work_limit -= NET_RX_CN_HIGH; /* additional*/
+                                break;
+                        case NET_RX_DROP:
+                                rx_work_limit = -1;
+                                break;
+                        default:
+                                printk("unknown feedback return code %d\n", mit_sel);
+                                break;
+                        }
+
+                        drop = atomic_read(&netdev_dropping);
+                        if (drop) {
+throttle:
+                                rx_work_limit = -1;
+                                mit_sel = NET_RX_DROP;
+
+                                if (tp->fc_bit) {
+                                        long ioaddr = dev->base_addr;
+
+                                        /* disable Rx & RxNoBuf ints. */
+                                        outl(tulip_tbl[tp->chip_id].valid_intrs&RX_A_NBF_STOP, ioaddr + CSR7);
+                                        set_bit(tp->fc_bit, &netdev_fc_xoff);
+                                }
+                        }
+#endif
+                       dev->last_rx = jiffies;
+                       tp->stats.rx_packets++;
+                       tp->stats.rx_bytes += pkt_len;
+               }
+               received++;
+               entry = (++tp->cur_rx) % RX_RING_SIZE;
+       }
+#ifdef CONFIG_NET_HW_FLOWCONTROL
+
+        /* We use this simplistic scheme for IM. It's proven by
+           real life installations. We can have IM enabled
+           continuesly but this would cause unnecessary latency.
+           Unfortunely we can't use all the NET_RX_* feedback here.
+           This would turn on IM for devices that is not contributing
+           to backlog congestion with unnecessary latency.
+
+           We monitor the the device RX-ring and have:
+
+           HW Interrupt Mitigation either ON or OFF.
+
+           ON:  More then 1 pkt received (per intr.) OR we are dropping
+           OFF: Only 1 pkt received
+
+           Note. We only use min and max (0, 15) settings from mit_table */
+
+
+        if( tp->flags &  HAS_INTR_MITIGATION) {
+                if((received > 1 || mit_sel == NET_RX_DROP)
+                   && tp->mit_sel != 15 ) {
+                        tp->mit_sel = 15;
+                        tp->mit_change = 1; /* Force IM change */
+                }
+                if((received <= 1 && mit_sel != NET_RX_DROP) && tp->mit_sel != 0 ) {
+                        tp->mit_sel = 0;
+                        tp->mit_change = 1; /* Force IM change */
+                }
+        }
+
+        return RX_RING_SIZE+1; /* maxrx+1 */
+#else
+       return received;
+#endif
+}
+
+
+/* The interrupt handler does all of the Rx thread work and cleans up
+   after the Tx thread. */
+void tulip_interrupt(int irq, void *dev_instance, struct pt_regs *regs)
+{
+       struct net_device *dev = (struct net_device *)dev_instance;
+       struct tulip_private *tp = (struct tulip_private *)dev->priv;
+       long ioaddr = dev->base_addr;
+       int csr5;
+       int entry;
+       int missed;
+       int rx = 0;
+       int tx = 0;
+       int oi = 0;
+       int maxrx = RX_RING_SIZE;
+       int maxtx = TX_RING_SIZE;
+       int maxoi = TX_RING_SIZE;
+       unsigned int work_count = tulip_max_interrupt_work;
+
+       /* Let's see whether the interrupt really is for us */
+       csr5 = inl(ioaddr + CSR5);
+
+       if ((csr5 & (NormalIntr|AbnormalIntr)) == 0)
+               return;
+
+       tp->nir++;
+
+       do {
+               /* Acknowledge all of the current interrupt sources ASAP. */
+               outl(csr5 & 0x0001ffff, ioaddr + CSR5);
+
+               if (tulip_debug > 4)
+                       printk(KERN_DEBUG "%s: interrupt  csr5=%#8.8x new csr5=%#8.8x.\n",
+                                  dev->name, csr5, inl(dev->base_addr + CSR5));
+
+               if (csr5 & (RxIntr | RxNoBuf)) {
+#ifdef CONFIG_NET_HW_FLOWCONTROL
+                        if ((!tp->fc_bit) ||
+                           (!test_bit(tp->fc_bit, &netdev_fc_xoff)))
+#endif
+                               rx += tulip_rx(dev);
+                       tulip_refill_rx(dev);
+               }
+
+               if (csr5 & (TxNoBuf | TxDied | TxIntr | TimerInt)) {
+                       unsigned int dirty_tx;
+
+                       spin_lock(&tp->lock);
+
+                       for (dirty_tx = tp->dirty_tx; tp->cur_tx - dirty_tx > 0;
+                                dirty_tx++) {
+                               int entry = dirty_tx % TX_RING_SIZE;
+                               int status = le32_to_cpu(tp->tx_ring[entry].status);
+
+                               if (status < 0)
+                                       break;                  /* It still has not been Txed */
+
+                               /* Check for Rx filter setup frames. */
+                               if (tp->tx_buffers[entry].skb == NULL) {
+                                       /* test because dummy frames not mapped */
+                                       if (tp->tx_buffers[entry].mapping)
+                                               pci_unmap_single(tp->pdev,
+                                                        tp->tx_buffers[entry].mapping,
+                                                        sizeof(tp->setup_frame),
+                                                        PCI_DMA_TODEVICE);
+                                       continue;
+                               }
+
+                               if (status & 0x8000) {
+                                       /* There was an major error, log it. */
+#ifndef final_version
+                                       if (tulip_debug > 1)
+                                               printk(KERN_DEBUG "%s: Transmit error, Tx status %8.8x.\n",
+                                                          dev->name, status);
+#endif
+                                       tp->stats.tx_errors++;
+                                       if (status & 0x4104) tp->stats.tx_aborted_errors++;
+                                       if (status & 0x0C00) tp->stats.tx_carrier_errors++;
+                                       if (status & 0x0200) tp->stats.tx_window_errors++;
+                                       if (status & 0x0002) tp->stats.tx_fifo_errors++;
+                                       if ((status & 0x0080) && tp->full_duplex == 0)
+                                               tp->stats.tx_heartbeat_errors++;
+                               } else {
+                                       tp->stats.tx_bytes +=
+                                               tp->tx_buffers[entry].skb->len;
+                                       tp->stats.collisions += (status >> 3) & 15;
+                                       tp->stats.tx_packets++;
+                               }
+
+                               pci_unmap_single(tp->pdev, tp->tx_buffers[entry].mapping,
+                                                tp->tx_buffers[entry].skb->len,
+                                                PCI_DMA_TODEVICE);
+
+                               /* Free the original skb. */
+                               dev_kfree_skb_irq(tp->tx_buffers[entry].skb);
+                               tp->tx_buffers[entry].skb = NULL;
+                               tp->tx_buffers[entry].mapping = 0;
+                               tx++;
+                       }
+
+#ifndef final_version
+                       if (tp->cur_tx - dirty_tx > TX_RING_SIZE) {
+                               printk(KERN_ERR "%s: Out-of-sync dirty pointer, %d vs. %d.\n",
+                                          dev->name, dirty_tx, tp->cur_tx);
+                               dirty_tx += TX_RING_SIZE;
+                       }
+#endif
+
+                       if (tp->cur_tx - dirty_tx < TX_RING_SIZE - 2)
+                               netif_wake_queue(dev);
+
+                       tp->dirty_tx = dirty_tx;
+                       if (csr5 & TxDied) {
+                               if (tulip_debug > 2)
+                                       printk(KERN_WARNING "%s: The transmitter stopped."
+                                                  "  CSR5 is %x, CSR6 %x, new CSR6 %x.\n",
+                                                  dev->name, csr5, inl(ioaddr + CSR6), tp->csr6);
+                               tulip_restart_rxtx(tp);
+                       }
+                       spin_unlock(&tp->lock);
+               }
+
+               /* Log errors. */
+               if (csr5 & AbnormalIntr) {      /* Abnormal error summary bit. */
+                       if (csr5 == 0xffffffff)
+                               break;
+                       if (csr5 & TxJabber) tp->stats.tx_errors++;
+                       if (csr5 & TxFIFOUnderflow) {
+                               if ((tp->csr6 & 0xC000) != 0xC000)
+                                       tp->csr6 += 0x4000;     /* Bump up the Tx threshold */
+                               else
+                                       tp->csr6 |= 0x00200000;  /* Store-n-forward. */
+                               /* Restart the transmit process. */
+                               tulip_restart_rxtx(tp);
+                               outl(0, ioaddr + CSR1);
+                       }
+                       if (csr5 & (RxDied | RxNoBuf)) {
+                               if (tp->flags & COMET_MAC_ADDR) {
+                                       outl(tp->mc_filter[0], ioaddr + 0xAC);
+                                       outl(tp->mc_filter[1], ioaddr + 0xB0);
+                               }
+                       }
+                       if (csr5 & RxDied) {            /* Missed a Rx frame. */
+                                tp->stats.rx_missed_errors += inl(ioaddr + CSR8) & 0xffff;
+#ifdef CONFIG_NET_HW_FLOWCONTROL
+                               if (tp->fc_bit && !test_bit(tp->fc_bit, &netdev_fc_xoff)) {
+                                       tp->stats.rx_errors++;
+                                       tulip_start_rxtx(tp);
+                               }
+#else
+                               tp->stats.rx_errors++;
+                               tulip_start_rxtx(tp);
+#endif
+                       }
+                       /*
+                        * NB: t21142_lnk_change() does a del_timer_sync(), so be careful if this
+                        * call is ever done under the spinlock
+                        */
+                       if (csr5 & (TPLnkPass | TPLnkFail | 0x08000000)) {
+                               if (tp->link_change)
+                                       (tp->link_change)(dev, csr5);
+                       }
+                       if (csr5 & SytemError) {
+                               int error = (csr5 >> 23) & 7;
+                               /* oops, we hit a PCI error.  The code produced corresponds
+                                * to the reason:
+                                *  0 - parity error
+                                *  1 - master abort
+                                *  2 - target abort
+                                * Note that on parity error, we should do a software reset
+                                * of the chip to get it back into a sane state (according
+                                * to the 21142/3 docs that is).
+                                *   -- rmk
+                                */
+                               printk(KERN_ERR "%s: (%lu) System Error occured (%d)\n",
+                                       dev->name, tp->nir, error);
+                       }
+                       /* Clear all error sources, included undocumented ones! */
+                       outl(0x0800f7ba, ioaddr + CSR5);
+                       oi++;
+               }
+               if (csr5 & TimerInt) {
+
+                       if (tulip_debug > 2)
+                               printk(KERN_ERR "%s: Re-enabling interrupts, %8.8x.\n",
+                                          dev->name, csr5);
+#ifdef CONFIG_NET_HW_FLOWCONTROL
+                        if (tp->fc_bit && (test_bit(tp->fc_bit, &netdev_fc_xoff)))
+                          if (net_ratelimit()) printk("BUG!! enabling interupt when FC off (timerintr.) \n");
+#endif
+                       outl(tulip_tbl[tp->chip_id].valid_intrs, ioaddr + CSR7);
+                       tp->ttimer = 0;
+                       oi++;
+               }
+               if (tx > maxtx || rx > maxrx || oi > maxoi) {
+                       if (tulip_debug > 1)
+                               printk(KERN_WARNING "%s: Too much work during an interrupt, "
+                                          "csr5=0x%8.8x. (%lu) (%d,%d,%d)\n", dev->name, csr5, tp->nir, tx, rx, oi);
+
+                       /* Acknowledge all interrupt sources. */
+                        outl(0x8001ffff, ioaddr + CSR5);
+                        if (tp->flags & HAS_INTR_MITIGATION) {
+#ifdef CONFIG_NET_HW_FLOWCONTROL
+                                if(tp->mit_change) {
+                                        outl(mit_table[tp->mit_sel], ioaddr + CSR11);
+                                        tp->mit_change = 0;
+                                }
+#else
+                     /* Josip Loncaric at ICASE did extensive experimentation
+                       to develop a good interrupt mitigation setting.*/
+                                outl(0x8b240000, ioaddr + CSR11);
+#endif
+                        } else if (tp->chip_id == LC82C168) {
+                               /* the LC82C168 doesn't have a hw timer.*/
+                               outl(0x00, ioaddr + CSR7);
+                               mod_timer(&tp->timer, RUN_AT(HZ/50));
+                       } else {
+                          /* Mask all interrupting sources, set timer to
+                               re-enable. */
+#ifndef CONFIG_NET_HW_FLOWCONTROL
+                                outl(((~csr5) & 0x0001ebef) | AbnormalIntr | TimerInt, ioaddr + CSR7);
+                                outl(0x0012, ioaddr + CSR11);
+#endif
+                        }
+                       break;
+               }
+
+               work_count--;
+               if (work_count == 0)
+                       break;
+
+               csr5 = inl(ioaddr + CSR5);
+       } while ((csr5 & (NormalIntr|AbnormalIntr)) != 0);
+
+       tulip_refill_rx(dev);
+
+       /* check if the card is in suspend mode */
+       entry = tp->dirty_rx % RX_RING_SIZE;
+       if (tp->rx_buffers[entry].skb == NULL) {
+               if (tulip_debug > 1)
+                       printk(KERN_WARNING "%s: in rx suspend mode: (%lu) (tp->cur_rx = %u, ttimer = %d, rx = %d) go/stay in suspend mode\n", dev->name, tp->nir, tp->cur_rx, tp->ttimer, rx);
+               if (tp->chip_id == LC82C168) {
+                       outl(0x00, ioaddr + CSR7);
+                       mod_timer(&tp->timer, RUN_AT(HZ/50));
+               } else {
+                       if (tp->ttimer == 0 || (inl(ioaddr + CSR11) & 0xffff) == 0) {
+                               if (tulip_debug > 1)
+                                       printk(KERN_WARNING "%s: in rx suspend mode: (%lu) set timer\n", dev->name, tp->nir);
+                               outl(tulip_tbl[tp->chip_id].valid_intrs | TimerInt,
+                                       ioaddr + CSR7);
+                               outl(TimerInt, ioaddr + CSR5);
+                               outl(12, ioaddr + CSR11);
+                               tp->ttimer = 1;
+                       }
+               }
+       }
+
+       if ((missed = inl(ioaddr + CSR8) & 0x1ffff)) {
+               tp->stats.rx_dropped += missed & 0x10000 ? 0x10000 : missed;
+       }
+
+       if (tulip_debug > 4)
+               printk(KERN_DEBUG "%s: exiting interrupt, csr5=%#4.4x.\n",
+                          dev->name, inl(ioaddr + CSR5));
+
+}
diff --git a/xen-2.4.16/drivers/net/tulip/media.c b/xen-2.4.16/drivers/net/tulip/media.c
new file mode 100644 (file)
index 0000000..e0bb197
--- /dev/null
@@ -0,0 +1,563 @@
+/*
+       drivers/net/tulip/media.c
+
+       Maintained by Jeff Garzik <jgarzik@mandrakesoft.com>
+       Copyright 2000,2001  The Linux Kernel Team
+       Written/copyright 1994-2001 by Donald Becker.
+
+       This software may be used and distributed according to the terms
+       of the GNU General Public License, incorporated herein by reference.
+
+       Please refer to Documentation/DocBook/tulip.{pdf,ps,html}
+       for more information on this driver, or visit the project
+       Web page at http://sourceforge.net/projects/tulip/
+
+*/
+
+//#include <linux/kernel.h>
+#include <linux/mii.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include "tulip.h"
+
+
+/* This is a mysterious value that can be written to CSR11 in the 21040 (only)
+   to support a pre-NWay full-duplex signaling mechanism using short frames.
+   No one knows what it should be, but if left at its default value some
+   10base2(!) packets trigger a full-duplex-request interrupt. */
+#define FULL_DUPLEX_MAGIC      0x6969
+
+/* The maximum data clock rate is 2.5 Mhz.  The minimum timing is usually
+   met by back-to-back PCI I/O cycles, but we insert a delay to avoid
+   "overclocking" issues or future 66Mhz PCI. */
+#define mdio_delay() inl(mdio_addr)
+
+/* Read and write the MII registers using software-generated serial
+   MDIO protocol.  It is just different enough from the EEPROM protocol
+   to not share code.  The maxium data clock rate is 2.5 Mhz. */
+#define MDIO_SHIFT_CLK         0x10000
+#define MDIO_DATA_WRITE0       0x00000
+#define MDIO_DATA_WRITE1       0x20000
+#define MDIO_ENB               0x00000 /* Ignore the 0x02000 databook setting. */
+#define MDIO_ENB_IN            0x40000
+#define MDIO_DATA_READ         0x80000
+
+static const unsigned char comet_miireg2offset[32] = {
+       0xB4, 0xB8, 0xBC, 0xC0,  0xC4, 0xC8, 0xCC, 0,  0,0,0,0,  0,0,0,0,
+       0,0xD0,0,0,  0,0,0,0,  0,0,0,0, 0, 0xD4, 0xD8, 0xDC, };
+
+
+/* MII transceiver control section.
+   Read and write the MII registers using software-generated serial
+   MDIO protocol.  See the MII specifications or DP83840A data sheet
+   for details. */
+
+int tulip_mdio_read(struct net_device *dev, int phy_id, int location)
+{
+       struct tulip_private *tp = (struct tulip_private *)dev->priv;
+       int i;
+       int read_cmd = (0xf6 << 10) | ((phy_id & 0x1f) << 5) | location;
+       int retval = 0;
+       long ioaddr = dev->base_addr;
+       long mdio_addr = ioaddr + CSR9;
+       unsigned long flags;
+
+       if (location & ~0x1f)
+               return 0xffff;
+
+       if (tp->chip_id == COMET  &&  phy_id == 30) {
+               if (comet_miireg2offset[location])
+                       return inl(ioaddr + comet_miireg2offset[location]);
+               return 0xffff;
+       }
+
+       spin_lock_irqsave(&tp->mii_lock, flags);
+       if (tp->chip_id == LC82C168) {
+               int i = 1000;
+               outl(0x60020000 + (phy_id<<23) + (location<<18), ioaddr + 0xA0);
+               inl(ioaddr + 0xA0);
+               inl(ioaddr + 0xA0);
+               while (--i > 0) {
+                       barrier();
+                       if ( ! ((retval = inl(ioaddr + 0xA0)) & 0x80000000))
+                               break;
+               }
+               spin_unlock_irqrestore(&tp->mii_lock, flags);
+               return retval & 0xffff;
+       }
+
+       /* Establish sync by sending at least 32 logic ones. */
+       for (i = 32; i >= 0; i--) {
+               outl(MDIO_ENB | MDIO_DATA_WRITE1, mdio_addr);
+               mdio_delay();
+               outl(MDIO_ENB | MDIO_DATA_WRITE1 | MDIO_SHIFT_CLK, mdio_addr);
+               mdio_delay();
+       }
+       /* Shift the read command bits out. */
+       for (i = 15; i >= 0; i--) {
+               int dataval = (read_cmd & (1 << i)) ? MDIO_DATA_WRITE1 : 0;
+
+               outl(MDIO_ENB | dataval, mdio_addr);
+               mdio_delay();
+               outl(MDIO_ENB | dataval | MDIO_SHIFT_CLK, mdio_addr);
+               mdio_delay();
+       }
+       /* Read the two transition, 16 data, and wire-idle bits. */
+       for (i = 19; i > 0; i--) {
+               outl(MDIO_ENB_IN, mdio_addr);
+               mdio_delay();
+               retval = (retval << 1) | ((inl(mdio_addr) & MDIO_DATA_READ) ? 1 : 0);
+               outl(MDIO_ENB_IN | MDIO_SHIFT_CLK, mdio_addr);
+               mdio_delay();
+       }
+
+       spin_unlock_irqrestore(&tp->mii_lock, flags);
+       return (retval>>1) & 0xffff;
+}
+
+void tulip_mdio_write(struct net_device *dev, int phy_id, int location, int val)
+{
+       struct tulip_private *tp = (struct tulip_private *)dev->priv;
+       int i;
+       int cmd = (0x5002 << 16) | ((phy_id & 0x1f) << 23) | (location<<18) | (val & 0xffff);
+       long ioaddr = dev->base_addr;
+       long mdio_addr = ioaddr + CSR9;
+       unsigned long flags;
+
+       if (location & ~0x1f)
+               return;
+
+       if (tp->chip_id == COMET && phy_id == 30) {
+               if (comet_miireg2offset[location])
+                       outl(val, ioaddr + comet_miireg2offset[location]);
+               return;
+       }
+
+       spin_lock_irqsave(&tp->mii_lock, flags);
+       if (tp->chip_id == LC82C168) {
+               int i = 1000;
+               outl(cmd, ioaddr + 0xA0);
+               do {
+                       barrier();
+                       if ( ! (inl(ioaddr + 0xA0) & 0x80000000))
+                               break;
+               } while (--i > 0);
+               spin_unlock_irqrestore(&tp->mii_lock, flags);
+               return;
+       }
+
+       /* Establish sync by sending 32 logic ones. */
+       for (i = 32; i >= 0; i--) {
+               outl(MDIO_ENB | MDIO_DATA_WRITE1, mdio_addr);
+               mdio_delay();
+               outl(MDIO_ENB | MDIO_DATA_WRITE1 | MDIO_SHIFT_CLK, mdio_addr);
+               mdio_delay();
+       }
+       /* Shift the command bits out. */
+       for (i = 31; i >= 0; i--) {
+               int dataval = (cmd & (1 << i)) ? MDIO_DATA_WRITE1 : 0;
+               outl(MDIO_ENB | dataval, mdio_addr);
+               mdio_delay();
+               outl(MDIO_ENB | dataval | MDIO_SHIFT_CLK, mdio_addr);
+               mdio_delay();
+       }
+       /* Clear out extra bits. */
+       for (i = 2; i > 0; i--) {
+               outl(MDIO_ENB_IN, mdio_addr);
+               mdio_delay();
+               outl(MDIO_ENB_IN | MDIO_SHIFT_CLK, mdio_addr);
+               mdio_delay();
+       }
+
+       spin_unlock_irqrestore(&tp->mii_lock, flags);
+}
+
+
+/* Set up the transceiver control registers for the selected media type. */
+void tulip_select_media(struct net_device *dev, int startup)
+{
+       long ioaddr = dev->base_addr;
+       struct tulip_private *tp = (struct tulip_private *)dev->priv;
+       struct mediatable *mtable = tp->mtable;
+       u32 new_csr6;
+       int i;
+
+       if (mtable) {
+               struct medialeaf *mleaf = &mtable->mleaf[tp->cur_index];
+               unsigned char *p = mleaf->leafdata;
+               switch (mleaf->type) {
+               case 0:                                 /* 21140 non-MII xcvr. */
+                       if (tulip_debug > 1)
+                               printk(KERN_DEBUG "%s: Using a 21140 non-MII transceiver"
+                                          " with control setting %2.2x.\n",
+                                          dev->name, p[1]);
+                       dev->if_port = p[0];
+                       if (startup)
+                               outl(mtable->csr12dir | 0x100, ioaddr + CSR12);
+                       outl(p[1], ioaddr + CSR12);
+                       new_csr6 = 0x02000000 | ((p[2] & 0x71) << 18);
+                       break;
+               case 2: case 4: {
+                       u16 setup[5];
+                       u32 csr13val, csr14val, csr15dir, csr15val;
+                       for (i = 0; i < 5; i++)
+                               setup[i] = get_u16(&p[i*2 + 1]);
+
+                       dev->if_port = p[0] & MEDIA_MASK;
+                       if (tulip_media_cap[dev->if_port] & MediaAlwaysFD)
+                               tp->full_duplex = 1;
+
+                       if (startup && mtable->has_reset) {
+                               struct medialeaf *rleaf = &mtable->mleaf[mtable->has_reset];
+                               unsigned char *rst = rleaf->leafdata;
+                               if (tulip_debug > 1)
+                                       printk(KERN_DEBUG "%s: Resetting the transceiver.\n",
+                                                  dev->name);
+                               for (i = 0; i < rst[0]; i++)
+                                       outl(get_u16(rst + 1 + (i<<1)) << 16, ioaddr + CSR15);
+                       }
+                       if (tulip_debug > 1)
+                               printk(KERN_DEBUG "%s: 21143 non-MII %s transceiver control "
+                                          "%4.4x/%4.4x.\n",
+                                          dev->name, medianame[dev->if_port], setup[0], setup[1]);
+                       if (p[0] & 0x40) {      /* SIA (CSR13-15) setup values are provided. */
+                               csr13val = setup[0];
+                               csr14val = setup[1];
+                               csr15dir = (setup[3]<<16) | setup[2];
+                               csr15val = (setup[4]<<16) | setup[2];
+                               outl(0, ioaddr + CSR13);
+                               outl(csr14val, ioaddr + CSR14);
+                               outl(csr15dir, ioaddr + CSR15); /* Direction */
+                               outl(csr15val, ioaddr + CSR15); /* Data */
+                               outl(csr13val, ioaddr + CSR13);
+                       } else {
+                               csr13val = 1;
+                               csr14val = 0;
+                               csr15dir = (setup[0]<<16) | 0x0008;
+                               csr15val = (setup[1]<<16) | 0x0008;
+                               if (dev->if_port <= 4)
+                                       csr14val = t21142_csr14[dev->if_port];
+                               if (startup) {
+                                       outl(0, ioaddr + CSR13);
+                                       outl(csr14val, ioaddr + CSR14);
+                               }
+                               outl(csr15dir, ioaddr + CSR15); /* Direction */
+                               outl(csr15val, ioaddr + CSR15); /* Data */
+                               if (startup) outl(csr13val, ioaddr + CSR13);
+                       }
+                       if (tulip_debug > 1)
+                               printk(KERN_DEBUG "%s:  Setting CSR15 to %8.8x/%8.8x.\n",
+                                          dev->name, csr15dir, csr15val);
+                       if (mleaf->type == 4)
+                               new_csr6 = 0x82020000 | ((setup[2] & 0x71) << 18);
+                       else
+                               new_csr6 = 0x82420000;
+                       break;
+               }
+               case 1: case 3: {
+                       int phy_num = p[0];
+                       int init_length = p[1];
+                       u16 *misc_info, tmp_info;
+
+                       dev->if_port = 11;
+                       new_csr6 = 0x020E0000;
+                       if (mleaf->type == 3) { /* 21142 */
+                               u16 *init_sequence = (u16*)(p+2);
+                               u16 *reset_sequence = &((u16*)(p+3))[init_length];
+                               int reset_length = p[2 + init_length*2];
+                               misc_info = reset_sequence + reset_length;
+                               if (startup)
+                                       for (i = 0; i < reset_length; i++)
+                                               outl(get_u16(&reset_sequence[i]) << 16, ioaddr + CSR15);
+                               for (i = 0; i < init_length; i++)
+                                       outl(get_u16(&init_sequence[i]) << 16, ioaddr + CSR15);
+                       } else {
+                               u8 *init_sequence = p + 2;
+                               u8 *reset_sequence = p + 3 + init_length;
+                               int reset_length = p[2 + init_length];
+                               misc_info = (u16*)(reset_sequence + reset_length);
+                               if (startup) {
+                                       outl(mtable->csr12dir | 0x100, ioaddr + CSR12);
+                                       for (i = 0; i < reset_length; i++)
+                                               outl(reset_sequence[i], ioaddr + CSR12);
+                               }
+                               for (i = 0; i < init_length; i++)
+                                       outl(init_sequence[i], ioaddr + CSR12);
+                       }
+                       tmp_info = get_u16(&misc_info[1]);
+                       if (tmp_info)
+                               tp->advertising[phy_num] = tmp_info | 1;
+                       if (tmp_info && startup < 2) {
+                               if (tp->mii_advertise == 0)
+                                       tp->mii_advertise = tp->advertising[phy_num];
+                               if (tulip_debug > 1)
+                                       printk(KERN_DEBUG "%s:  Advertising %4.4x on MII %d.\n",
+                                              dev->name, tp->mii_advertise, tp->phys[phy_num]);
+                               tulip_mdio_write(dev, tp->phys[phy_num], 4, tp->mii_advertise);
+                       }
+                       break;
+               }
+               case 5: case 6: {
+                       u16 setup[5];
+
+                       new_csr6 = 0; /* FIXME */
+
+                       for (i = 0; i < 5; i++)
+                               setup[i] = get_u16(&p[i*2 + 1]);
+
+                       if (startup && mtable->has_reset) {
+                               struct medialeaf *rleaf = &mtable->mleaf[mtable->has_reset];
+                               unsigned char *rst = rleaf->leafdata;
+                               if (tulip_debug > 1)
+                                       printk(KERN_DEBUG "%s: Resetting the transceiver.\n",
+                                                  dev->name);
+                               for (i = 0; i < rst[0]; i++)
+                                       outl(get_u16(rst + 1 + (i<<1)) << 16, ioaddr + CSR15);
+                       }
+
+                       break;
+               }
+               default:
+                       printk(KERN_DEBUG "%s:  Invalid media table selection %d.\n",
+                                          dev->name, mleaf->type);
+                       new_csr6 = 0x020E0000;
+               }
+               if (tulip_debug > 1)
+                       printk(KERN_DEBUG "%s: Using media type %s, CSR12 is %2.2x.\n",
+                                  dev->name, medianame[dev->if_port],
+                                  inl(ioaddr + CSR12) & 0xff);
+       } else if (tp->chip_id == DC21041) {
+               int port = dev->if_port <= 4 ? dev->if_port : 0;
+               if (tulip_debug > 1)
+                       printk(KERN_DEBUG "%s: 21041 using media %s, CSR12 is %4.4x.\n",
+                                  dev->name, medianame[port == 3 ? 12: port],
+                                  inl(ioaddr + CSR12));
+               outl(0x00000000, ioaddr + CSR13); /* Reset the serial interface */
+               outl(t21041_csr14[port], ioaddr + CSR14);
+               outl(t21041_csr15[port], ioaddr + CSR15);
+               outl(t21041_csr13[port], ioaddr + CSR13);
+               new_csr6 = 0x80020000;
+       } else if (tp->chip_id == LC82C168) {
+               if (startup && ! tp->medialock)
+                       dev->if_port = tp->mii_cnt ? 11 : 0;
+               if (tulip_debug > 1)
+                       printk(KERN_DEBUG "%s: PNIC PHY status is %3.3x, media %s.\n",
+                                  dev->name, inl(ioaddr + 0xB8), medianame[dev->if_port]);
+               if (tp->mii_cnt) {
+                       new_csr6 = 0x810C0000;
+                       outl(0x0001, ioaddr + CSR15);
+                       outl(0x0201B07A, ioaddr + 0xB8);
+               } else if (startup) {
+                       /* Start with 10mbps to do autonegotiation. */
+                       outl(0x32, ioaddr + CSR12);
+                       new_csr6 = 0x00420000;
+                       outl(0x0001B078, ioaddr + 0xB8);
+                       outl(0x0201B078, ioaddr + 0xB8);
+               } else if (dev->if_port == 3  ||  dev->if_port == 5) {
+                       outl(0x33, ioaddr + CSR12);
+                       new_csr6 = 0x01860000;
+                       /* Trigger autonegotiation. */
+                       outl(startup ? 0x0201F868 : 0x0001F868, ioaddr + 0xB8);
+               } else {
+                       outl(0x32, ioaddr + CSR12);
+                       new_csr6 = 0x00420000;
+                       outl(0x1F078, ioaddr + 0xB8);
+               }
+       } else if (tp->chip_id == DC21040) {                                    /* 21040 */
+               /* Turn on the xcvr interface. */
+               int csr12 = inl(ioaddr + CSR12);
+               if (tulip_debug > 1)
+                       printk(KERN_DEBUG "%s: 21040 media type is %s, CSR12 is %2.2x.\n",
+                                  dev->name, medianame[dev->if_port], csr12);
+               if (tulip_media_cap[dev->if_port] & MediaAlwaysFD)
+                       tp->full_duplex = 1;
+               new_csr6 = 0x20000;
+               /* Set the full duplux match frame. */
+               outl(FULL_DUPLEX_MAGIC, ioaddr + CSR11);
+               outl(0x00000000, ioaddr + CSR13); /* Reset the serial interface */
+               if (t21040_csr13[dev->if_port] & 8) {
+                       outl(0x0705, ioaddr + CSR14);
+                       outl(0x0006, ioaddr + CSR15);
+               } else {
+                       outl(0xffff, ioaddr + CSR14);
+                       outl(0x0000, ioaddr + CSR15);
+               }
+               outl(0x8f01 | t21040_csr13[dev->if_port], ioaddr + CSR13);
+       } else {                                        /* Unknown chip type with no media table. */
+               if (tp->default_port == 0)
+                       dev->if_port = tp->mii_cnt ? 11 : 3;
+               if (tulip_media_cap[dev->if_port] & MediaIsMII) {
+                       new_csr6 = 0x020E0000;
+               } else if (tulip_media_cap[dev->if_port] & MediaIsFx) {
+                       new_csr6 = 0x028600000;
+               } else
+                       new_csr6 = 0x038600000;
+               if (tulip_debug > 1)
+                       printk(KERN_DEBUG "%s: No media description table, assuming "
+                                  "%s transceiver, CSR12 %2.2x.\n",
+                                  dev->name, medianame[dev->if_port],
+                                  inl(ioaddr + CSR12));
+       }
+
+       tp->csr6 = new_csr6 | (tp->csr6 & 0xfdff) | (tp->full_duplex ? 0x0200 : 0);
+       return;
+}
+
+/*
+  Check the MII negotiated duplex and change the CSR6 setting if
+  required.
+  Return 0 if everything is OK.
+  Return < 0 if the transceiver is missing or has no link beat.
+  */
+int tulip_check_duplex(struct net_device *dev)
+{
+       struct tulip_private *tp = dev->priv;
+       unsigned int bmsr, lpa, negotiated, new_csr6;
+
+       bmsr = tulip_mdio_read(dev, tp->phys[0], MII_BMSR);
+       lpa = tulip_mdio_read(dev, tp->phys[0], MII_LPA);
+       if (tulip_debug > 1)
+               printk(KERN_INFO "%s: MII status %4.4x, Link partner report "
+                          "%4.4x.\n", dev->name, bmsr, lpa);
+       if (bmsr == 0xffff)
+               return -2;
+       if ((bmsr & BMSR_LSTATUS) == 0) {
+               int new_bmsr = tulip_mdio_read(dev, tp->phys[0], MII_BMSR);
+               if ((new_bmsr & BMSR_LSTATUS) == 0) {
+                       if (tulip_debug  > 1)
+                               printk(KERN_INFO "%s: No link beat on the MII interface,"
+                                          " status %4.4x.\n", dev->name, new_bmsr);
+                       return -1;
+               }
+       }
+       negotiated = lpa & tp->advertising[0];
+       tp->full_duplex = mii_duplex(tp->full_duplex_lock, negotiated);
+
+       new_csr6 = tp->csr6;
+
+       if (negotiated & LPA_100) new_csr6 &= ~TxThreshold;
+       else                      new_csr6 |= TxThreshold;
+       if (tp->full_duplex) new_csr6 |= FullDuplex;
+       else                 new_csr6 &= ~FullDuplex;
+
+       if (new_csr6 != tp->csr6) {
+               tp->csr6 = new_csr6;
+               tulip_restart_rxtx(tp);
+
+               if (tulip_debug > 0)
+                       printk(KERN_INFO "%s: Setting %s-duplex based on MII"
+                                  "#%d link partner capability of %4.4x.\n",
+                                  dev->name, tp->full_duplex ? "full" : "half",
+                                  tp->phys[0], lpa);
+               return 1;
+       }
+
+       return 0;
+}
+
+void __devinit tulip_find_mii (struct net_device *dev, int board_idx)
+{
+       struct tulip_private *tp = dev->priv;
+       int phyn, phy_idx = 0;
+       int mii_reg0;
+       int mii_advert;
+       unsigned int to_advert, new_bmcr, ane_switch;
+
+       /* Find the connected MII xcvrs.
+          Doing this in open() would allow detecting external xcvrs later,
+          but takes much time. */
+       for (phyn = 1; phyn <= 32 && phy_idx < sizeof (tp->phys); phyn++) {
+               int phy = phyn & 0x1f;
+               int mii_status = tulip_mdio_read (dev, phy, MII_BMSR);
+               if ((mii_status & 0x8301) == 0x8001 ||
+                   ((mii_status & BMSR_100BASE4) == 0
+                    && (mii_status & 0x7800) != 0)) {
+                       /* preserve Becker logic, gain indentation level */
+               } else {
+                       continue;
+               }
+
+               mii_reg0 = tulip_mdio_read (dev, phy, MII_BMCR);
+               mii_advert = tulip_mdio_read (dev, phy, MII_ADVERTISE);
+               ane_switch = 0;
+
+               /* if not advertising at all, gen an
+                * advertising value from the capability
+                * bits in BMSR
+                */
+               if ((mii_advert & ADVERTISE_ALL) == 0) {
+                       unsigned int tmpadv = tulip_mdio_read (dev, phy, MII_BMSR);
+                       mii_advert = ((tmpadv >> 6) & 0x3e0) | 1;
+               }
+
+               if (tp->mii_advertise) {
+                       tp->advertising[phy_idx] =
+                       to_advert = tp->mii_advertise;
+               } else if (tp->advertising[phy_idx]) {
+                       to_advert = tp->advertising[phy_idx];
+               } else {
+                       tp->advertising[phy_idx] =
+                       tp->mii_advertise =
+                       to_advert = mii_advert;
+               }
+
+               tp->phys[phy_idx++] = phy;
+
+               printk (KERN_INFO "tulip%d:  MII transceiver #%d "
+                       "config %4.4x status %4.4x advertising %4.4x.\n",
+                       board_idx, phy, mii_reg0, mii_status, mii_advert);
+
+               /* Fixup for DLink with miswired PHY. */
+               if (mii_advert != to_advert) {
+                       printk (KERN_DEBUG "tulip%d:  Advertising %4.4x on PHY %d,"
+                               " previously advertising %4.4x.\n",
+                               board_idx, to_advert, phy, mii_advert);
+                       tulip_mdio_write (dev, phy, 4, to_advert);
+               }
+
+               /* Enable autonegotiation: some boards default to off. */
+               if (tp->default_port == 0) {
+                       new_bmcr = mii_reg0 | BMCR_ANENABLE;
+                       if (new_bmcr != mii_reg0) {
+                               new_bmcr |= BMCR_ANRESTART;
+                               ane_switch = 1;
+                       }
+               }
+               /* ...or disable nway, if forcing media */
+               else {
+                       new_bmcr = mii_reg0 & ~BMCR_ANENABLE;
+                       if (new_bmcr != mii_reg0)
+                               ane_switch = 1;
+               }
+
+               /* clear out bits we never want at this point */
+               new_bmcr &= ~(BMCR_CTST | BMCR_FULLDPLX | BMCR_ISOLATE |
+                             BMCR_PDOWN | BMCR_SPEED100 | BMCR_LOOPBACK |
+                             BMCR_RESET);
+
+               if (tp->full_duplex)
+                       new_bmcr |= BMCR_FULLDPLX;
+               if (tulip_media_cap[tp->default_port] & MediaIs100)
+                       new_bmcr |= BMCR_SPEED100;
+
+               if (new_bmcr != mii_reg0) {
+                       /* some phys need the ANE switch to
+                        * happen before forced media settings
+                        * will "take."  However, we write the
+                        * same value twice in order not to
+                        * confuse the sane phys.
+                        */
+                       if (ane_switch) {
+                               tulip_mdio_write (dev, phy, MII_BMCR, new_bmcr);
+                               udelay (10);
+                       }
+                       tulip_mdio_write (dev, phy, MII_BMCR, new_bmcr);
+               }
+       }
+       tp->mii_cnt = phy_idx;
+       if (tp->mtable && tp->mtable->has_mii && phy_idx == 0) {
+               printk (KERN_INFO "tulip%d: ***WARNING***: No MII transceiver found!\n",
+                       board_idx);
+               tp->phys[0] = 1;
+       }
+}
diff --git a/xen-2.4.16/drivers/net/tulip/pnic.c b/xen-2.4.16/drivers/net/tulip/pnic.c
new file mode 100644 (file)
index 0000000..6739dd3
--- /dev/null
@@ -0,0 +1,171 @@
+/*
+       drivers/net/tulip/pnic.c
+
+       Maintained by Jeff Garzik <jgarzik@mandrakesoft.com>
+       Copyright 2000,2001  The Linux Kernel Team
+       Written/copyright 1994-2001 by Donald Becker.
+
+       This software may be used and distributed according to the terms
+       of the GNU General Public License, incorporated herein by reference.
+
+       Please refer to Documentation/DocBook/tulip.{pdf,ps,html}
+       for more information on this driver, or visit the project
+       Web page at http://sourceforge.net/projects/tulip/
+
+*/
+
+//#include <linux/kernel.h>
+#include "tulip.h"
+
+
+void pnic_do_nway(struct net_device *dev)
+{
+       struct tulip_private *tp = (struct tulip_private *)dev->priv;
+       long ioaddr = dev->base_addr;
+       u32 phy_reg = inl(ioaddr + 0xB8);
+       u32 new_csr6 = tp->csr6 & ~0x40C40200;
+
+       if (phy_reg & 0x78000000) { /* Ignore baseT4 */
+               if (phy_reg & 0x20000000)               dev->if_port = 5;
+               else if (phy_reg & 0x40000000)  dev->if_port = 3;
+               else if (phy_reg & 0x10000000)  dev->if_port = 4;
+               else if (phy_reg & 0x08000000)  dev->if_port = 0;
+               tp->nwayset = 1;
+               new_csr6 = (dev->if_port & 1) ? 0x01860000 : 0x00420000;
+               outl(0x32 | (dev->if_port & 1), ioaddr + CSR12);
+               if (dev->if_port & 1)
+                       outl(0x1F868, ioaddr + 0xB8);
+               if (phy_reg & 0x30000000) {
+                       tp->full_duplex = 1;
+                       new_csr6 |= 0x00000200;
+               }
+               if (tulip_debug > 1)
+                       printk(KERN_DEBUG "%s: PNIC autonegotiated status %8.8x, %s.\n",
+                                  dev->name, phy_reg, medianame[dev->if_port]);
+               if (tp->csr6 != new_csr6) {
+                       tp->csr6 = new_csr6;
+                       /* Restart Tx */
+                       tulip_restart_rxtx(tp);
+                       dev->trans_start = jiffies;
+               }
+       }
+}
+
+void pnic_lnk_change(struct net_device *dev, int csr5)
+{
+       struct tulip_private *tp = (struct tulip_private *)dev->priv;
+       long ioaddr = dev->base_addr;
+       int phy_reg = inl(ioaddr + 0xB8);
+
+       if (tulip_debug > 1)
+               printk(KERN_DEBUG "%s: PNIC link changed state %8.8x, CSR5 %8.8x.\n",
+                          dev->name, phy_reg, csr5);
+       if (inl(ioaddr + CSR5) & TPLnkFail) {
+               outl((inl(ioaddr + CSR7) & ~TPLnkFail) | TPLnkPass, ioaddr + CSR7);
+               /* If we use an external MII, then we mustn't use the
+                * internal negotiation.
+                */
+               if (tulip_media_cap[dev->if_port] & MediaIsMII)
+                       return;
+               if (! tp->nwayset  ||  jiffies - dev->trans_start > 1*HZ) {
+                       tp->csr6 = 0x00420000 | (tp->csr6 & 0x0000fdff);
+                       outl(tp->csr6, ioaddr + CSR6);
+                       outl(0x30, ioaddr + CSR12);
+                       outl(0x0201F078, ioaddr + 0xB8); /* Turn on autonegotiation. */
+                       dev->trans_start = jiffies;
+               }
+       } else if (inl(ioaddr + CSR5) & TPLnkPass) {
+               if (tulip_media_cap[dev->if_port] & MediaIsMII) {
+                       spin_lock(&tp->lock);
+                       tulip_check_duplex(dev);
+                       spin_unlock(&tp->lock);
+               } else {
+                       pnic_do_nway(dev);
+               }
+               outl((inl(ioaddr + CSR7) & ~TPLnkPass) | TPLnkFail, ioaddr + CSR7);
+       }
+}
+
+void pnic_timer(unsigned long data)
+{
+       struct net_device *dev = (struct net_device *)data;
+       struct tulip_private *tp = (struct tulip_private *)dev->priv;
+       long ioaddr = dev->base_addr;
+       int next_tick = 60*HZ;
+
+       if(!inl(ioaddr + CSR7)) {
+               /* the timer was called due to a work overflow
+                * in the interrupt handler. Skip the connection
+                * checks, the nic is definitively speaking with
+                * his link partner.
+                */
+               goto too_good_connection;
+       }
+
+       if (tulip_media_cap[dev->if_port] & MediaIsMII) {
+               spin_lock_irq(&tp->lock);
+               if (tulip_check_duplex(dev) > 0)
+                       next_tick = 3*HZ;
+               spin_unlock_irq(&tp->lock);
+       } else {
+               int csr12 = inl(ioaddr + CSR12);
+               int new_csr6 = tp->csr6 & ~0x40C40200;
+               int phy_reg = inl(ioaddr + 0xB8);
+               int csr5 = inl(ioaddr + CSR5);
+
+               if (tulip_debug > 1)
+                       printk(KERN_DEBUG "%s: PNIC timer PHY status %8.8x, %s "
+                                  "CSR5 %8.8x.\n",
+                                  dev->name, phy_reg, medianame[dev->if_port], csr5);
+               if (phy_reg & 0x04000000) {     /* Remote link fault */
+                       outl(0x0201F078, ioaddr + 0xB8);
+                       next_tick = 1*HZ;
+                       tp->nwayset = 0;
+               } else if (phy_reg & 0x78000000) { /* Ignore baseT4 */
+                       pnic_do_nway(dev);
+                       next_tick = 60*HZ;
+               } else if (csr5 & TPLnkFail) { /* 100baseTx link beat */
+                       if (tulip_debug > 1)
+                               printk(KERN_DEBUG "%s: %s link beat failed, CSR12 %4.4x, "
+                                          "CSR5 %8.8x, PHY %3.3x.\n",
+                                          dev->name, medianame[dev->if_port], csr12,
+                                          inl(ioaddr + CSR5), inl(ioaddr + 0xB8));
+                       next_tick = 3*HZ;
+                       if (tp->medialock) {
+                       } else if (tp->nwayset  &&  (dev->if_port & 1)) {
+                               next_tick = 1*HZ;
+                       } else if (dev->if_port == 0) {
+                               dev->if_port = 3;
+                               outl(0x33, ioaddr + CSR12);
+                               new_csr6 = 0x01860000;
+                               outl(0x1F868, ioaddr + 0xB8);
+                       } else {
+                               dev->if_port = 0;
+                               outl(0x32, ioaddr + CSR12);
+                               new_csr6 = 0x00420000;
+                               outl(0x1F078, ioaddr + 0xB8);
+                       }
+                       if (tp->csr6 != new_csr6) {
+                               tp->csr6 = new_csr6;
+                               /* Restart Tx */
+                               tulip_restart_rxtx(tp);
+                               dev->trans_start = jiffies;
+                               if (tulip_debug > 1)
+                                       printk(KERN_INFO "%s: Changing PNIC configuration to %s "
+                                                  "%s-duplex, CSR6 %8.8x.\n",
+                                                  dev->name, medianame[dev->if_port],
+                                                  tp->full_duplex ? "full" : "half", new_csr6);
+                       }
+               }
+       }
+too_good_connection:
+       mod_timer(&tp->timer, RUN_AT(next_tick));
+       if(!inl(ioaddr + CSR7)) {
+               if (tulip_debug > 1)
+                       printk(KERN_INFO "%s: sw timer wakeup.\n", dev->name);
+               disable_irq(dev->irq);
+               tulip_refill_rx(dev);
+               enable_irq(dev->irq);
+               outl(tulip_tbl[tp->chip_id].valid_intrs, ioaddr + CSR7);
+       }
+}
diff --git a/xen-2.4.16/drivers/net/tulip/pnic2.c b/xen-2.4.16/drivers/net/tulip/pnic2.c
new file mode 100644 (file)
index 0000000..9b209d2
--- /dev/null
@@ -0,0 +1,407 @@
+/*
+       drivers/net/tulip/pnic2.c
+
+       Maintained by Jeff Garzik <jgarzik@mandrakesoft.com>
+       Copyright 2000,2001  The Linux Kernel Team
+       Written/copyright 1994-2001 by Donald Becker.
+        Modified to hep support PNIC_II by Kevin B. Hendricks
+
+       This software may be used and distributed according to the terms
+       of the GNU General Public License, incorporated herein by reference.
+
+       Please refer to Documentation/DocBook/tulip.{pdf,ps,html}
+       for more information on this driver, or visit the project
+       Web page at http://sourceforge.net/projects/tulip/
+
+*/
+
+
+/* Understanding the PNIC_II - everything is this file is based
+ * on the PNIC_II_PDF datasheet which is sorely lacking in detail
+ *
+ * As I understand things, here are the registers and bits that
+ * explain the masks and constants used in this file that are
+ * either different from the 21142/3 or important for basic operation.
+ *
+ *
+ * CSR 6  (mask = 0xfe3bd1fd of bits not to change)
+ * -----
+ * Bit 24    - SCR
+ * Bit 23    - PCS
+ * Bit 22    - TTM (Trasmit Threshold Mode)
+ * Bit 18    - Port Select
+ * Bit 13    - Start - 1, Stop - 0 Transmissions
+ * Bit 11:10 - Loop Back Operation Mode
+ * Bit 9     - Full Duplex mode (Advertise 10BaseT-FD is CSR14<7> is set)
+ * Bit 1     - Start - 1, Stop - 0 Receive
+ *
+ *
+ * CSR 14  (mask = 0xfff0ee39 of bits not to change)
+ * ------
+ * Bit 19    - PAUSE-Pause
+ * Bit 18    - Advertise T4
+ * Bit 17    - Advertise 100baseTx-FD
+ * Bit 16    - Advertise 100baseTx-HD
+ * Bit 12    - LTE - Link Test Enable
+ * Bit 7     - ANE - Auto Negotiate Enable
+ * Bit 6     - HDE - Advertise 10baseT-HD
+ * Bit 2     - Reset to Power down - kept as 1 for normal operation
+ * Bit 1     -  Loop Back enable for 10baseT MCC
+ *
+ *
+ * CSR 12
+ * ------
+ * Bit 25    - Partner can do T4
+ * Bit 24    - Partner can do 100baseTx-FD
+ * Bit 23    - Partner can do 100baseTx-HD
+ * Bit 22    - Partner can do 10baseT-FD
+ * Bit 21    - Partner can do 10baseT-HD
+ * Bit 15    - LPN is 1 if all above bits are valid other wise 0
+ * Bit 14:12 - autonegotiation state (write 001 to start autonegotiate)
+ * Bit 3     - Autopolarity state
+ * Bit 2     - LS10B - link state of 10baseT 0 - good, 1 - failed
+ * Bit 1     - LS100B - link state of 100baseT 0 - good, 1- faild
+ *
+ *
+ * Data Port Selection Info
+ *-------------------------
+ *
+ * CSR14<7>   CSR6<18>    CSR6<22>    CSR6<23>    CSR6<24>   MODE/PORT
+ *   1           0           0 (X)       0 (X)       1        NWAY
+ *   0           0           1           0 (X)       0        10baseT
+ *   0           1           0           1           1 (X)    100baseT
+ *
+ *
+ */
+
+
+
+#include "tulip.h"
+#include <linux/pci.h>
+#include <linux/delay.h>
+
+
+void pnic2_timer(unsigned long data)
+{
+       struct net_device *dev = (struct net_device *)data;
+       struct tulip_private *tp = (struct tulip_private *)dev->priv;
+       long ioaddr = dev->base_addr;
+       int next_tick = 60*HZ;
+
+       if (tulip_debug > 3)
+               printk(KERN_INFO"%s: PNIC2 negotiation status %8.8x.\n",
+                    dev->name,inl(ioaddr + CSR12));
+
+       if (next_tick) {
+               mod_timer(&tp->timer, RUN_AT(next_tick));
+       }
+}
+
+
+void pnic2_start_nway(struct net_device *dev)
+{
+       struct tulip_private *tp = (struct tulip_private *)dev->priv;
+       long ioaddr = dev->base_addr;
+        int csr14;
+        int csr12;
+
+        /* set up what to advertise during the negotiation */
+
+        /* load in csr14  and mask off bits not to touch
+         * comment at top of file explains mask value
+         */
+       csr14 = (inl(ioaddr + CSR14) & 0xfff0ee39);
+
+        /* bit 17 - advetise 100baseTx-FD */
+        if (tp->sym_advertise & 0x0100) csr14 |= 0x00020000;
+
+        /* bit 16 - advertise 100baseTx-HD */
+        if (tp->sym_advertise & 0x0080) csr14 |= 0x00010000;
+
+        /* bit 6 - advertise 10baseT-HD */
+        if (tp->sym_advertise & 0x0020) csr14 |= 0x00000040;
+
+        /* Now set bit 12 Link Test Enable, Bit 7 Autonegotiation Enable
+         * and bit 0 Don't PowerDown 10baseT
+         */
+        csr14 |= 0x00001184;
+
+       if (tulip_debug > 1)
+               printk(KERN_DEBUG "%s: Restarting PNIC2 autonegotiation, "
+                      "csr14=%8.8x.\n", dev->name, csr14);
+
+        /* tell pnic2_lnk_change we are doing an nway negotiation */
+       dev->if_port = 0;
+       tp->nway = tp->mediasense = 1;
+       tp->nwayset = tp->lpar = 0;
+
+        /* now we have to set up csr6 for NWAY state */
+
+       tp->csr6 = inl(ioaddr + CSR6);
+       if (tulip_debug > 1)
+               printk(KERN_DEBUG "%s: On Entry to Nway, "
+                      "csr6=%8.8x.\n", dev->name, tp->csr6);
+
+        /* mask off any bits not to touch
+         * comment at top of file explains mask value
+         */
+       tp->csr6 = tp->csr6 & 0xfe3bd1fd;
+
+        /* don't forget that bit 9 is also used for advertising */
+        /* advertise 10baseT-FD for the negotiation (bit 9) */
+        if (tp->sym_advertise & 0x0040) tp->csr6 |= 0x00000200;
+
+        /* set bit 24 for nway negotiation mode ...
+         * see Data Port Selection comment at top of file
+         * and "Stop" - reset both Transmit (bit 13) and Receive (bit 1)
+         */
+        tp->csr6 |= 0x01000000;
+       outl(csr14, ioaddr + CSR14);
+       outl(tp->csr6, ioaddr + CSR6);
+        udelay(100);
+
+        /* all set up so now force the negotiation to begin */
+
+        /* read in current values and mask off all but the
+        * Autonegotiation bits 14:12.  Writing a 001 to those bits
+         * should start the autonegotiation
+         */
+        csr12 = (inl(ioaddr + CSR12) & 0xffff8fff);
+        csr12 |= 0x1000;
+       outl(csr12, ioaddr + CSR12);
+}
+
+
+
+void pnic2_lnk_change(struct net_device *dev, int csr5)
+{
+       struct tulip_private *tp = (struct tulip_private *)dev->priv;
+       long ioaddr = dev->base_addr;
+        int csr14;
+
+        /* read the staus register to find out what is up */
+       int csr12 = inl(ioaddr + CSR12);
+
+       if (tulip_debug > 1)
+               printk(KERN_INFO"%s: PNIC2 link status interrupt %8.8x, "
+                       " CSR5 %x, %8.8x.\n", dev->name, csr12,
+                       csr5, inl(ioaddr + CSR14));
+
+       /* If NWay finished and we have a negotiated partner capability.
+         * check bits 14:12 for bit pattern 101 - all is good
+         */
+       if (tp->nway  &&  !tp->nwayset) {
+
+               /* we did an auto negotiation */
+
+                if ((csr12 & 0x7000) == 0x5000) {
+
+                      /* negotiation ended successfully */
+
+                      /* get the link partners reply and mask out all but
+                        * bits 24-21 which show the partners capabilites
+                        * and match those to what we advertised
+                        *
+                        * then begin to interpret the results of the negotiation.
+                        * Always go in this order : (we are ignoring T4 for now)
+                        *     100baseTx-FD, 100baseTx-HD, 10baseT-FD, 10baseT-HD
+                        */
+
+                       int negotiated = ((csr12 >> 16) & 0x01E0) & tp->sym_advertise;
+                       tp->lpar = (csr12 >> 16);
+                       tp->nwayset = 1;
+
+                        if (negotiated & 0x0100)        dev->if_port = 5;
+                       else if (negotiated & 0x0080)   dev->if_port = 3;
+                       else if (negotiated & 0x0040)   dev->if_port = 4;
+                       else if (negotiated & 0x0020)   dev->if_port = 0;
+                       else {
+                            if (tulip_debug > 1)
+                                  printk(KERN_INFO "%s: funny autonegotiate result "
+                                        "csr12 %8.8x advertising %4.4x\n",
+                                        dev->name, csr12, tp->sym_advertise);
+                            tp->nwayset = 0;
+                            /* so check  if 100baseTx link state is okay */
+                            if ((csr12 & 2) == 0  &&  (tp->sym_advertise & 0x0180))
+                              dev->if_port = 3;
+                       }
+
+                       /* now record the duplex that was negotiated */
+                       tp->full_duplex = 0;
+                       if ((dev->if_port == 4) || (dev->if_port == 5))
+                              tp->full_duplex = 1;
+
+                       if (tulip_debug > 1) {
+                              if (tp->nwayset)
+                                    printk(KERN_INFO "%s: Switching to %s based on link "
+                                   "negotiation %4.4x & %4.4x = %4.4x.\n",
+                                    dev->name, medianame[dev->if_port],
+                                     tp->sym_advertise, tp->lpar, negotiated);
+                       }
+
+                        /* remember to turn off bit 7 - autonegotiate
+                         * enable so we can properly end nway mode and
+                         * set duplex (ie. use csr6<9> again)
+                         */
+                       csr14 = (inl(ioaddr + CSR14) & 0xffffff7f);
+                        outl(csr14,ioaddr + CSR14);
+
+
+                        /* now set the data port and operating mode
+                        * (see the Data Port Selection comments at
+                        * the top of the file
+                        */
+
+                       /* get current csr6 and mask off bits not to touch */
+                       /* see comment at top of file */
+
+                       tp->csr6 = (inl(ioaddr + CSR6) & 0xfe3bd1fd);
+
+                       /* so if using if_port 3 or 5 then select the 100baseT
+                        * port else select the 10baseT port.
+                        * See the Data Port Selection table at the top
+                        * of the file which was taken from the PNIC_II.PDF
+                        * datasheet
+                        */
+                       if (dev->if_port & 1) tp->csr6 |= 0x01840000;
+                       else tp->csr6 |= 0x00400000;
+
+                       /* now set the full duplex bit appropriately */
+                       if (tp->full_duplex) tp->csr6 |= 0x00000200;
+
+                       outl(1, ioaddr + CSR13);
+
+                       if (tulip_debug > 2)
+                               printk(KERN_DEBUG "%s:  Setting CSR6 %8.8x/%x CSR12 "
+                                      "%8.8x.\n", dev->name, tp->csr6,
+                                      inl(ioaddr + CSR6), inl(ioaddr + CSR12));
+
+                       /* now the following actually writes out the
+                        * new csr6 values
+                        */
+                       tulip_start_rxtx(tp);
+
+                        return;
+
+               } else {
+                       printk(KERN_INFO "%s: Autonegotiation failed, "
+                                    "using %s, link beat status %4.4x.\n",
+                                    dev->name, medianame[dev->if_port], csr12);
+
+                        /* remember to turn off bit 7 - autonegotiate
+                         * enable so we don't forget
+                         */
+                       csr14 = (inl(ioaddr + CSR14) & 0xffffff7f);
+                        outl(csr14,ioaddr + CSR14);
+
+                        /* what should we do when autonegotiate fails?
+                         * should we try again or default to baseline
+                         * case.  I just don't know.
+                         *
+                         * for now default to some baseline case
+                         */
+
+                        dev->if_port = 0;
+                         tp->nway = 0;
+                         tp->nwayset = 1;
+
+                         /* set to 10baseTx-HD - see Data Port Selection
+                          * comment given at the top of the file
+                          */
+                        tp->csr6 = (inl(ioaddr + CSR6) & 0xfe3bd1fd);
+                         tp->csr6 |= 0x00400000;
+
+                        tulip_restart_rxtx(tp);
+
+                         return;
+
+               }
+       }
+
+       if ((tp->nwayset  &&  (csr5 & 0x08000000)
+                         && (dev->if_port == 3  ||  dev->if_port == 5)
+                         && (csr12 & 2) == 2) || (tp->nway && (csr5 & (TPLnkFail)))) {
+
+               /* Link blew? Maybe restart NWay. */
+
+               if (tulip_debug > 2)
+                       printk(KERN_DEBUG "%s: Ugh! Link blew?\n", dev->name);
+
+               del_timer_sync(&tp->timer);
+               pnic2_start_nway(dev);
+               tp->timer.expires = RUN_AT(3*HZ);
+               add_timer(&tp->timer);
+
+                return;
+       }
+
+
+        if (dev->if_port == 3  ||  dev->if_port == 5) {
+
+               /* we are at 100mb and a potential link change occurred */
+
+               if (tulip_debug > 1)
+                       printk(KERN_INFO"%s: PNIC2 %s link beat %s.\n",
+                                  dev->name, medianame[dev->if_port],
+                                  (csr12 & 2) ? "failed" : "good");
+
+                /* check 100 link beat */
+
+                tp->nway = 0;
+                tp->nwayset = 1;
+
+                /* if failed then try doing an nway to get in sync */
+               if ((csr12 & 2)  &&  ! tp->medialock) {
+                       del_timer_sync(&tp->timer);
+                       pnic2_start_nway(dev);
+                       tp->timer.expires = RUN_AT(3*HZ);
+                               add_timer(&tp->timer);
+                }
+
+                return;
+        }
+
+       if (dev->if_port == 0  ||  dev->if_port == 4) {
+
+               /* we are at 10mb and a potential link change occurred */
+
+               if (tulip_debug > 1)
+                       printk(KERN_INFO"%s: PNIC2 %s link beat %s.\n",
+                                  dev->name, medianame[dev->if_port],
+                                  (csr12 & 4) ? "failed" : "good");
+
+
+                tp->nway = 0;
+                tp->nwayset = 1;
+
+                /* if failed, try doing an nway to get in sync */
+               if ((csr12 & 4)  &&  ! tp->medialock) {
+                       del_timer_sync(&tp->timer);
+                       pnic2_start_nway(dev);
+                       tp->timer.expires = RUN_AT(3*HZ);
+                               add_timer(&tp->timer);
+                }
+
+                return;
+        }
+
+
+       if (tulip_debug > 1)
+               printk(KERN_INFO"%s: PNIC2 Link Change Default?\n",dev->name);
+
+        /* if all else fails default to trying 10baseT-HD */
+       dev->if_port = 0;
+
+        /* make sure autonegotiate enable is off */
+       csr14 = (inl(ioaddr + CSR14) & 0xffffff7f);
+        outl(csr14,ioaddr + CSR14);
+
+        /* set to 10baseTx-HD - see Data Port Selection
+         * comment given at the top of the file
+         */
+       tp->csr6 = (inl(ioaddr + CSR6) & 0xfe3bd1fd);
+        tp->csr6 |= 0x00400000;
+
+       tulip_restart_rxtx(tp);
+}
+
diff --git a/xen-2.4.16/drivers/net/tulip/timer.c b/xen-2.4.16/drivers/net/tulip/timer.c
new file mode 100644 (file)
index 0000000..4079772
--- /dev/null
@@ -0,0 +1,220 @@
+/*
+       drivers/net/tulip/timer.c
+
+       Maintained by Jeff Garzik <jgarzik@mandrakesoft.com>
+       Copyright 2000,2001  The Linux Kernel Team
+       Written/copyright 1994-2001 by Donald Becker.
+
+       This software may be used and distributed according to the terms
+       of the GNU General Public License, incorporated herein by reference.
+
+       Please refer to Documentation/DocBook/tulip.{pdf,ps,html}
+       for more information on this driver, or visit the project
+       Web page at http://sourceforge.net/projects/tulip/
+
+*/
+
+#include "tulip.h"
+
+
+void tulip_timer(unsigned long data)
+{
+       struct net_device *dev = (struct net_device *)data;
+       struct tulip_private *tp = (struct tulip_private *)dev->priv;
+       long ioaddr = dev->base_addr;
+       u32 csr12 = inl(ioaddr + CSR12);
+       int next_tick = 2*HZ;
+
+       if (tulip_debug > 2) {
+               printk(KERN_DEBUG "%s: Media selection tick, %s, status %8.8x mode"
+                          " %8.8x SIA %8.8x %8.8x %8.8x %8.8x.\n",
+                          dev->name, medianame[dev->if_port], inl(ioaddr + CSR5),
+                          inl(ioaddr + CSR6), csr12, inl(ioaddr + CSR13),
+                          inl(ioaddr + CSR14), inl(ioaddr + CSR15));
+       }
+       switch (tp->chip_id) {
+       case DC21040:
+               if (!tp->medialock  &&  csr12 & 0x0002) { /* Network error */
+                       printk(KERN_INFO "%s: No link beat found.\n",
+                                  dev->name);
+                       dev->if_port = (dev->if_port == 2 ? 0 : 2);
+                       tulip_select_media(dev, 0);
+                       dev->trans_start = jiffies;
+               }
+               break;
+       case DC21041:
+               if (tulip_debug > 2)
+                       printk(KERN_DEBUG "%s: 21041 media tick  CSR12 %8.8x.\n",
+                                  dev->name, csr12);
+               if (tp->medialock) break;
+               switch (dev->if_port) {
+               case 0: case 3: case 4:
+                 if (csr12 & 0x0004) { /*LnkFail */
+                       /* 10baseT is dead.  Check for activity on alternate port. */
+                       tp->mediasense = 1;
+                       if (csr12 & 0x0200)
+                               dev->if_port = 2;
+                       else
+                               dev->if_port = 1;
+                       printk(KERN_INFO "%s: No 21041 10baseT link beat, Media switched to %s.\n",
+                                  dev->name, medianame[dev->if_port]);
+                       outl(0, ioaddr + CSR13); /* Reset */
+                       outl(t21041_csr14[dev->if_port], ioaddr + CSR14);
+                       outl(t21041_csr15[dev->if_port], ioaddr + CSR15);
+                       outl(t21041_csr13[dev->if_port], ioaddr + CSR13);
+                       next_tick = 10*HZ;                      /* 2.4 sec. */
+                 } else
+                       next_tick = 30*HZ;
+                 break;
+               case 1:                                 /* 10base2 */
+               case 2:                                 /* AUI */
+                       if (csr12 & 0x0100) {
+                               next_tick = (30*HZ);                    /* 30 sec. */
+                               tp->mediasense = 0;
+                       } else if ((csr12 & 0x0004) == 0) {
+                               printk(KERN_INFO "%s: 21041 media switched to 10baseT.\n",
+                                          dev->name);
+                               dev->if_port = 0;
+                               tulip_select_media(dev, 0);
+                               next_tick = (24*HZ)/10;                         /* 2.4 sec. */
+                       } else if (tp->mediasense || (csr12 & 0x0002)) {
+                               dev->if_port = 3 - dev->if_port; /* Swap ports. */
+                               tulip_select_media(dev, 0);
+                               next_tick = 20*HZ;
+                       } else {
+                               next_tick = 20*HZ;
+                       }
+                       break;
+               }
+               break;
+       case DC21140:
+       case DC21142:
+       case MX98713:
+       case COMPEX9881:
+       case DM910X:
+       default: {
+               struct medialeaf *mleaf;
+               unsigned char *p;
+               if (tp->mtable == NULL) {       /* No EEPROM info, use generic code. */
+                       /* Not much that can be done.
+                          Assume this a generic MII or SYM transceiver. */
+                       next_tick = 60*HZ;
+                       if (tulip_debug > 2)
+                               printk(KERN_DEBUG "%s: network media monitor CSR6 %8.8x "
+                                          "CSR12 0x%2.2x.\n",
+                                          dev->name, inl(ioaddr + CSR6), csr12 & 0xff);
+                       break;
+               }
+               mleaf = &tp->mtable->mleaf[tp->cur_index];
+               p = mleaf->leafdata;
+               switch (mleaf->type) {
+               case 0: case 4: {
+                       /* Type 0 serial or 4 SYM transceiver.  Check the link beat bit. */
+                       int offset = mleaf->type == 4 ? 5 : 2;
+                       s8 bitnum = p[offset];
+                       if (p[offset+1] & 0x80) {
+                               if (tulip_debug > 1)
+                                       printk(KERN_DEBUG"%s: Transceiver monitor tick "
+                                                  "CSR12=%#2.2x, no media sense.\n",
+                                                  dev->name, csr12);
+                               if (mleaf->type == 4) {
+                                       if (mleaf->media == 3 && (csr12 & 0x02))
+                                               goto select_next_media;
+                               }
+                               break;
+                       }
+                       if (tulip_debug > 2)
+                               printk(KERN_DEBUG "%s: Transceiver monitor tick: CSR12=%#2.2x"
+                                          " bit %d is %d, expecting %d.\n",
+                                          dev->name, csr12, (bitnum >> 1) & 7,
+                                          (csr12 & (1 << ((bitnum >> 1) & 7))) != 0,
+                                          (bitnum >= 0));
+                       /* Check that the specified bit has the proper value. */
+                       if ((bitnum < 0) !=
+                               ((csr12 & (1 << ((bitnum >> 1) & 7))) != 0)) {
+                               if (tulip_debug > 2)
+                                       printk(KERN_DEBUG "%s: Link beat detected for %s.\n", dev->name,
+                                              medianame[mleaf->media & MEDIA_MASK]);
+                               if ((p[2] & 0x61) == 0x01)      /* Bogus Znyx board. */
+                                       goto actually_mii;
+                               /* netif_carrier_on(dev); */
+                               break;
+                       }
+                       /* netif_carrier_off(dev); */
+                       if (tp->medialock)
+                               break;
+         select_next_media:
+                       if (--tp->cur_index < 0) {
+                               /* We start again, but should instead look for default. */
+                               tp->cur_index = tp->mtable->leafcount - 1;
+                       }
+                       dev->if_port = tp->mtable->mleaf[tp->cur_index].media;
+                       if (tulip_media_cap[dev->if_port] & MediaIsFD)
+                               goto select_next_media; /* Skip FD entries. */
+                       if (tulip_debug > 1)
+                               printk(KERN_DEBUG "%s: No link beat on media %s,"
+                                      " trying transceiver type %s.\n",
+                                      dev->name, medianame[mleaf->media & MEDIA_MASK],
+                                      medianame[tp->mtable->mleaf[tp->cur_index].media]);
+                       tulip_select_media(dev, 0);
+                       /* Restart the transmit process. */
+                       tulip_restart_rxtx(tp);
+                       next_tick = (24*HZ)/10;
+                       break;
+               }
+               case 1:  case 3:                /* 21140, 21142 MII */
+               actually_mii:
+                       if (tulip_check_duplex(dev) < 0)
+                               { /* netif_carrier_off(dev); */ }
+                       else
+                               { /* netif_carrier_on(dev); */ }
+                       next_tick = 60*HZ;
+                       break;
+               case 2:                                 /* 21142 serial block has no link beat. */
+               default:
+                       break;
+               }
+       }
+       break;
+       }
+       /* mod_timer synchronizes us with potential add_timer calls
+        * from interrupts.
+        */
+       mod_timer(&tp->timer, RUN_AT(next_tick));
+}
+
+
+void mxic_timer(unsigned long data)
+{
+       struct net_device *dev = (struct net_device *)data;
+       struct tulip_private *tp = (struct tulip_private *)dev->priv;
+       long ioaddr = dev->base_addr;
+       int next_tick = 60*HZ;
+
+       if (tulip_debug > 3) {
+               printk(KERN_INFO"%s: MXIC negotiation status %8.8x.\n", dev->name,
+                          inl(ioaddr + CSR12));
+       }
+       if (next_tick) {
+               mod_timer(&tp->timer, RUN_AT(next_tick));
+       }
+}
+
+
+void comet_timer(unsigned long data)
+{
+       struct net_device *dev = (struct net_device *)data;
+       struct tulip_private *tp = (struct tulip_private *)dev->priv;
+       long ioaddr = dev->base_addr;
+       int next_tick = 60*HZ;
+
+       if (tulip_debug > 1)
+               printk(KERN_DEBUG "%s: Comet link status %4.4x partner capability "
+                          "%4.4x.\n",
+                          dev->name, inl(ioaddr + 0xB8), inl(ioaddr + 0xC8));
+       /* mod_timer synchronizes us with potential add_timer calls
+        * from interrupts.
+        */
+       mod_timer(&tp->timer, RUN_AT(next_tick));
+}
+
diff --git a/xen-2.4.16/drivers/net/tulip/tulip.h b/xen-2.4.16/drivers/net/tulip/tulip.h
new file mode 100644 (file)
index 0000000..716bc92
--- /dev/null
@@ -0,0 +1,499 @@
+/*
+       drivers/net/tulip/tulip.h
+
+       Copyright 2000,2001  The Linux Kernel Team
+       Written/copyright 1994-2001 by Donald Becker.
+
+       This software may be used and distributed according to the terms
+       of the GNU General Public License, incorporated herein by reference.
+
+       Please refer to Documentation/DocBook/tulip.{pdf,ps,html}
+       for more information on this driver, or visit the project
+       Web page at http://sourceforge.net/projects/tulip/
+
+*/
+
+#ifndef __NET_TULIP_H__
+#define __NET_TULIP_H__
+
+#include <linux/config.h>
+//#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/spinlock.h>
+#include <linux/netdevice.h>
+#include <linux/timer.h>
+#include <linux/delay.h>
+#include <asm/io.h>
+#include <asm/irq.h>
+
+
+
+/* undefine, or define to various debugging levels (>4 == obscene levels) */
+#define TULIP_DEBUG 1
+
+/* undefine USE_IO_OPS for MMIO, define for PIO */
+#ifdef CONFIG_TULIP_MMIO
+# undef USE_IO_OPS
+#else
+# define USE_IO_OPS 1
+#endif
+
+
+
+struct tulip_chip_table {
+       char *chip_name;
+       int io_size;
+       int valid_intrs;        /* CSR7 interrupt enable settings */
+       int flags;
+       void (*media_timer) (unsigned long data);
+};
+
+
+enum tbl_flag {
+       HAS_MII                 = 0x0001,
+       HAS_MEDIA_TABLE         = 0x0002,
+       CSR12_IN_SROM           = 0x0004,
+       ALWAYS_CHECK_MII        = 0x0008,
+       HAS_ACPI                = 0x0010,
+       MC_HASH_ONLY            = 0x0020, /* Hash-only multicast filter. */
+       HAS_PNICNWAY            = 0x0080,
+       HAS_NWAY                = 0x0040, /* Uses internal NWay xcvr. */
+       HAS_INTR_MITIGATION     = 0x0100,
+       IS_ASIX                 = 0x0200,
+       HAS_8023X               = 0x0400,
+       COMET_MAC_ADDR          = 0x0800,
+       HAS_PCI_MWI             = 0x1000,
+};
+
+
+/* chip types.  careful!  order is VERY IMPORTANT here, as these
+ * are used throughout the driver as indices into arrays */
+/* Note 21142 == 21143. */
+enum chips {
+       DC21040 = 0,
+       DC21041 = 1,
+       DC21140 = 2,
+       DC21142 = 3, DC21143 = 3,
+       LC82C168,
+       MX98713,
+       MX98715,
+       MX98725,
+       AX88140,
+       PNIC2,
+       COMET,
+       COMPEX9881,
+       I21145,
+       DM910X,
+};
+
+
+enum MediaIs {
+       MediaIsFD = 1,
+       MediaAlwaysFD = 2,
+       MediaIsMII = 4,
+       MediaIsFx = 8,
+       MediaIs100 = 16
+};
+
+
+/* Offsets to the Command and Status Registers, "CSRs".  All accesses
+   must be longword instructions and quadword aligned. */
+enum tulip_offsets {
+       CSR0 = 0,
+       CSR1 = 0x08,
+       CSR2 = 0x10,
+       CSR3 = 0x18,
+       CSR4 = 0x20,
+       CSR5 = 0x28,
+       CSR6 = 0x30,
+       CSR7 = 0x38,
+       CSR8 = 0x40,
+       CSR9 = 0x48,
+       CSR10 = 0x50,
+       CSR11 = 0x58,
+       CSR12 = 0x60,
+       CSR13 = 0x68,
+       CSR14 = 0x70,
+       CSR15 = 0x78,
+};
+
+/* register offset and bits for CFDD PCI config reg */
+enum pci_cfg_driver_reg {
+       CFDD = 0x40,
+       CFDD_Sleep = (1 << 31),
+       CFDD_Snooze = (1 << 30),
+};
+
+
+/* The bits in the CSR5 status registers, mostly interrupt sources. */
+enum status_bits {
+       TimerInt = 0x800,
+       SytemError = 0x2000,
+       TPLnkFail = 0x1000,
+       TPLnkPass = 0x10,
+       NormalIntr = 0x10000,
+       AbnormalIntr = 0x8000,
+       RxJabber = 0x200,
+       RxDied = 0x100,
+       RxNoBuf = 0x80,
+       RxIntr = 0x40,
+       TxFIFOUnderflow = 0x20,
+       TxJabber = 0x08,
+       TxNoBuf = 0x04,
+       TxDied = 0x02,
+       TxIntr = 0x01,
+};
+
+
+enum tulip_mode_bits {
+       TxThreshold             = (1 << 22),
+       FullDuplex              = (1 << 9),
+       TxOn                    = 0x2000,
+       AcceptBroadcast         = 0x0100,
+       AcceptAllMulticast      = 0x0080,
+       AcceptAllPhys           = 0x0040,
+       AcceptRunt              = 0x0008,
+       RxOn                    = 0x0002,
+       RxTx                    = (TxOn | RxOn),
+};
+
+
+enum tulip_busconfig_bits {
+       MWI                     = (1 << 24),
+       MRL                     = (1 << 23),
+       MRM                     = (1 << 21),
+       CALShift                = 14,
+       BurstLenShift           = 8,
+};
+
+
+/* The Tulip Rx and Tx buffer descriptors. */
+struct tulip_rx_desc {
+       s32 status;
+       s32 length;
+       u32 buffer1;
+       u32 buffer2;
+};
+
+
+struct tulip_tx_desc {
+       s32 status;
+       s32 length;
+       u32 buffer1;
+       u32 buffer2;            /* We use only buffer 1.  */
+};
+
+
+enum desc_status_bits {
+       DescOwned = 0x80000000,
+       RxDescFatalErr = 0x8000,
+       RxWholePkt = 0x0300,
+};
+
+
+enum t21041_csr13_bits {
+       csr13_eng = (0xEF0<<4), /* for eng. purposes only, hardcode at EF0h */
+       csr13_aui = (1<<3), /* clear to force 10bT, set to force AUI/BNC */
+       csr13_cac = (1<<2), /* CSR13/14/15 autoconfiguration */
+       csr13_srl = (1<<0), /* When reset, resets all SIA functions, machines */
+
+       csr13_mask_auibnc = (csr13_eng | csr13_aui | csr13_srl),
+       csr13_mask_10bt = (csr13_eng | csr13_srl),
+};
+
+enum t21143_csr6_bits {
+       csr6_sc = (1<<31),
+       csr6_ra = (1<<30),
+       csr6_ign_dest_msb = (1<<26),
+       csr6_mbo = (1<<25),
+       csr6_scr = (1<<24),  /* scramble mode flag: can't be set */
+       csr6_pcs = (1<<23),  /* Enables PCS functions (symbol mode requires csr6_ps be set) default is set */
+       csr6_ttm = (1<<22),  /* Transmit Threshold Mode, set for 10baseT, 0 for 100BaseTX */
+       csr6_sf = (1<<21),   /* Store and forward. If set ignores TR bits */
+       csr6_hbd = (1<<19),  /* Heart beat disable. Disables SQE function in 10baseT */
+       csr6_ps = (1<<18),   /* Port Select. 0 (defualt) = 10baseT, 1 = 100baseTX: can't be set */
+       csr6_ca = (1<<17),   /* Collision Offset Enable. If set uses special algorithm in low collision situations */
+       csr6_trh = (1<<15),  /* Transmit Threshold high bit */
+       csr6_trl = (1<<14),  /* Transmit Threshold low bit */
+
+       /***************************************************************
+        * This table shows transmit threshold values based on media   *
+        * and these two registers (from PNIC1 & 2 docs) Note: this is *
+        * all meaningless if sf is set.                               *
+        ***************************************************************/
+
+       /***********************************
+        * (trh,trl) * 100BaseTX * 10BaseT *
+        ***********************************
+        *   (0,0)   *     128   *    72   *
+        *   (0,1)   *     256   *    96   *
+        *   (1,0)   *     512   *   128   *
+        *   (1,1)   *    1024   *   160   *
+        ***********************************/
+
+       csr6_fc = (1<<12),   /* Forces a collision in next transmission (for testing in loopback mode) */
+       csr6_om_int_loop = (1<<10), /* internal (FIFO) loopback flag */
+       csr6_om_ext_loop = (1<<11), /* external (PMD) loopback flag */
+       /* set both and you get (PHY) loopback */
+       csr6_fd = (1<<9),    /* Full duplex mode, disables hearbeat, no loopback */
+       csr6_pm = (1<<7),    /* Pass All Multicast */
+       csr6_pr = (1<<6),    /* Promiscuous mode */
+       csr6_sb = (1<<5),    /* Start(1)/Stop(0) backoff counter */
+       csr6_if = (1<<4),    /* Inverse Filtering, rejects only addresses in address table: can't be set */
+       csr6_pb = (1<<3),    /* Pass Bad Frames, (1) causes even bad frames to be passed on */
+       csr6_ho = (1<<2),    /* Hash-only filtering mode: can't be set */
+       csr6_hp = (1<<0),    /* Hash/Perfect Receive Filtering Mode: can't be set */
+
+       csr6_mask_capture = (csr6_sc | csr6_ca),
+       csr6_mask_defstate = (csr6_mask_capture | csr6_mbo),
+       csr6_mask_hdcap = (csr6_mask_defstate | csr6_hbd | csr6_ps),
+       csr6_mask_hdcaptt = (csr6_mask_hdcap  | csr6_trh | csr6_trl),
+       csr6_mask_fullcap = (csr6_mask_hdcaptt | csr6_fd),
+       csr6_mask_fullpromisc = (csr6_pr | csr6_pm),
+       csr6_mask_filters = (csr6_hp | csr6_ho | csr6_if),
+       csr6_mask_100bt = (csr6_scr | csr6_pcs | csr6_hbd),
+};
+
+
+/* Keep the ring sizes a power of two for efficiency.
+   Making the Tx ring too large decreases the effectiveness of channel
+   bonding and packet priority.
+   There are no ill effects from too-large receive rings. */
+#undef TX_RING_SIZE
+#undef RX_RING_SIZE
+#define TX_RING_SIZE   16
+#define RX_RING_SIZE   32
+
+#define MEDIA_MASK     31
+
+#define PKT_BUF_SZ             1536    /* Size of each temporary Rx buffer. */
+
+#define TULIP_MIN_CACHE_LINE   8       /* in units of 32-bit words */
+
+#if defined(__sparc__) || defined(__hppa__)
+/* The UltraSparc PCI controllers will disconnect at every 64-byte
+ * crossing anyways so it makes no sense to tell Tulip to burst
+ * any more than that.
+ */
+#define TULIP_MAX_CACHE_LINE   16      /* in units of 32-bit words */
+#else
+#define TULIP_MAX_CACHE_LINE   32      /* in units of 32-bit words */
+#endif
+
+
+/* Ring-wrap flag in length field, use for last ring entry.
+       0x01000000 means chain on buffer2 address,
+       0x02000000 means use the ring start address in CSR2/3.
+   Note: Some work-alike chips do not function correctly in chained mode.
+   The ASIX chip works only in chained mode.
+   Thus we indicates ring mode, but always write the 'next' field for
+   chained mode as well.
+*/
+#define DESC_RING_WRAP 0x02000000
+
+
+#define EEPROM_SIZE 128        /* 2 << EEPROM_ADDRLEN */
+
+
+#define RUN_AT(x) (jiffies + (x))
+
+#if defined(__i386__)                  /* AKA get_unaligned() */
+#define get_u16(ptr) (*(u16 *)(ptr))
+#else
+#define get_u16(ptr) (((u8*)(ptr))[0] + (((u8*)(ptr))[1]<<8))
+#endif
+
+struct medialeaf {
+       u8 type;
+       u8 media;
+       unsigned char *leafdata;
+};
+
+
+struct mediatable {
+       u16 defaultmedia;
+       u8 leafcount;
+       u8 csr12dir;            /* General purpose pin directions. */
+       unsigned has_mii:1;
+       unsigned has_nonmii:1;
+       unsigned has_reset:6;
+       u32 csr15dir;
+       u32 csr15val;           /* 21143 NWay setting. */
+       struct medialeaf mleaf[0];
+};
+
+
+struct mediainfo {
+       struct mediainfo *next;
+       int info_type;
+       int index;
+       unsigned char *info;
+};
+
+struct ring_info {
+       struct sk_buff  *skb;
+       dma_addr_t      mapping;
+};
+
+
+struct tulip_private {
+       const char *product_name;
+       struct net_device *next_module;
+       struct tulip_rx_desc *rx_ring;
+       struct tulip_tx_desc *tx_ring;
+       dma_addr_t rx_ring_dma;
+       dma_addr_t tx_ring_dma;
+       /* The saved address of a sent-in-place packet/buffer, for skfree(). */
+       struct ring_info tx_buffers[TX_RING_SIZE];
+       /* The addresses of receive-in-place skbuffs. */
+       struct ring_info rx_buffers[RX_RING_SIZE];
+       u16 setup_frame[96];    /* Pseudo-Tx frame to init address table. */
+       int chip_id;
+       int revision;
+       int flags;
+       struct net_device_stats stats;
+       struct timer_list timer;        /* Media selection timer. */
+       u32 mc_filter[2];
+       spinlock_t lock;
+       spinlock_t mii_lock;
+       unsigned int cur_rx, cur_tx;    /* The next free ring entry */
+       unsigned int dirty_rx, dirty_tx;        /* The ring entries to be free()ed. */
+
+#ifdef CONFIG_NET_HW_FLOWCONTROL
+#define RX_A_NBF_STOP 0xffffff3f /* To disable RX and RX-NOBUF ints. */
+        int fc_bit;
+        int mit_sel;
+        int mit_change; /* Signal for Interrupt Mitigtion */
+#endif
+       unsigned int full_duplex:1;     /* Full-duplex operation requested. */
+       unsigned int full_duplex_lock:1;
+       unsigned int fake_addr:1;       /* Multiport board faked address. */
+       unsigned int default_port:4;    /* Last dev->if_port value. */
+       unsigned int media2:4;  /* Secondary monitored media port. */
+       unsigned int medialock:1;       /* Don't sense media type. */
+       unsigned int mediasense:1;      /* Media sensing in progress. */
+       unsigned int nway:1, nwayset:1;         /* 21143 internal NWay. */
+       unsigned int csr0;      /* CSR0 setting. */
+       unsigned int csr6;      /* Current CSR6 control settings. */
+       unsigned char eeprom[EEPROM_SIZE];      /* Serial EEPROM contents. */
+       void (*link_change) (struct net_device * dev, int csr5);
+       u16 sym_advertise, mii_advertise; /* NWay capabilities advertised.  */
+       u16 lpar;               /* 21143 Link partner ability. */
+       u16 advertising[4];
+       signed char phys[4], mii_cnt;   /* MII device addresses. */
+       struct mediatable *mtable;
+       int cur_index;          /* Current media index. */
+       int saved_if_port;
+       struct pci_dev *pdev;
+       int ttimer;
+       int susp_rx;
+       unsigned long nir;
+       unsigned long base_addr;
+       int pad0, pad1;         /* Used for 8-byte alignment */
+};
+
+
+struct eeprom_fixup {
+       char *name;
+       unsigned char addr0;
+       unsigned char addr1;
+       unsigned char addr2;
+       u16 newtable[32];       /* Max length below. */
+};
+
+
+/* 21142.c */
+extern u16 t21142_csr14[];
+void t21142_timer(unsigned long data);
+void t21142_start_nway(struct net_device *dev);
+void t21142_lnk_change(struct net_device *dev, int csr5);
+
+
+/* PNIC2.c */
+void pnic2_lnk_change(struct net_device *dev, int csr5);
+void pnic2_timer(unsigned long data);
+void pnic2_start_nway(struct net_device *dev);
+void pnic2_lnk_change(struct net_device *dev, int csr5);
+
+/* eeprom.c */
+void tulip_parse_eeprom(struct net_device *dev);
+int tulip_read_eeprom(long ioaddr, int location, int addr_len);
+
+/* interrupt.c */
+extern unsigned int tulip_max_interrupt_work;
+extern int tulip_rx_copybreak;
+void tulip_interrupt(int irq, void *dev_instance, struct pt_regs *regs);
+int tulip_refill_rx(struct net_device *dev);
+
+/* media.c */
+int tulip_mdio_read(struct net_device *dev, int phy_id, int location);
+void tulip_mdio_write(struct net_device *dev, int phy_id, int location, int value);
+void tulip_select_media(struct net_device *dev, int startup);
+int tulip_check_duplex(struct net_device *dev);
+void tulip_find_mii (struct net_device *dev, int board_idx);
+
+/* pnic.c */
+void pnic_do_nway(struct net_device *dev);
+void pnic_lnk_change(struct net_device *dev, int csr5);
+void pnic_timer(unsigned long data);
+
+/* timer.c */
+void tulip_timer(unsigned long data);
+void mxic_timer(unsigned long data);
+void comet_timer(unsigned long data);
+
+/* tulip_core.c */
+extern int tulip_debug;
+extern const char * const medianame[];
+extern const char tulip_media_cap[];
+extern struct tulip_chip_table tulip_tbl[];
+extern u8 t21040_csr13[];
+extern u16 t21041_csr13[];
+extern u16 t21041_csr14[];
+extern u16 t21041_csr15[];
+
+#ifndef USE_IO_OPS
+#undef inb
+#undef inw
+#undef inl
+#undef outb
+#undef outw
+#undef outl
+#define inb(addr) readb((void*)(addr))
+#define inw(addr) readw((void*)(addr))
+#define inl(addr) readl((void*)(addr))
+#define outb(val,addr) writeb((val), (void*)(addr))
+#define outw(val,addr) writew((val), (void*)(addr))
+#define outl(val,addr) writel((val), (void*)(addr))
+#endif /* !USE_IO_OPS */
+
+
+
+static inline void tulip_start_rxtx(struct tulip_private *tp)
+{
+       long ioaddr = tp->base_addr;
+       outl(tp->csr6 | RxTx, ioaddr + CSR6);
+       barrier();
+       (void) inl(ioaddr + CSR6); /* mmio sync */
+}
+
+static inline void tulip_stop_rxtx(struct tulip_private *tp)
+{
+       long ioaddr = tp->base_addr;
+       u32 csr6 = inl(ioaddr + CSR6);
+
+       if (csr6 & RxTx) {
+               outl(csr6 & ~RxTx, ioaddr + CSR6);
+               barrier();
+               (void) inl(ioaddr + CSR6); /* mmio sync */
+       }
+}
+
+static inline void tulip_restart_rxtx(struct tulip_private *tp)
+{
+       tulip_stop_rxtx(tp);
+       udelay(5);
+       tulip_start_rxtx(tp);
+}
+
+#endif /* __NET_TULIP_H__ */
diff --git a/xen-2.4.16/drivers/net/tulip/tulip_core.c b/xen-2.4.16/drivers/net/tulip/tulip_core.c
new file mode 100644 (file)
index 0000000..601046c
--- /dev/null
@@ -0,0 +1,1922 @@
+/* tulip_core.c: A DEC 21x4x-family ethernet driver for Linux. */
+
+/*
+       Maintained by Jeff Garzik <jgarzik@mandrakesoft.com>
+       Copyright 2000,2001  The Linux Kernel Team
+       Written/copyright 1994-2001 by Donald Becker.
+
+       This software may be used and distributed according to the terms
+       of the GNU General Public License, incorporated herein by reference.
+
+       Please refer to Documentation/DocBook/tulip.{pdf,ps,html}
+       for more information on this driver, or visit the project
+       Web page at http://sourceforge.net/projects/tulip/
+
+*/
+
+#define DRV_NAME       "tulip"
+#define DRV_VERSION    "0.9.15-pre9"
+#define DRV_RELDATE    "Nov 6, 2001"
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include "tulip.h"
+#include <linux/pci.h>
+#include <linux/init.h>
+#include <linux/etherdevice.h>
+#include <linux/delay.h>
+#include <linux/mii.h>
+#include <linux/ethtool.h>
+#include <asm/unaligned.h>
+#include <asm/uaccess.h>
+
+#ifdef __sparc__
+#include <asm/pbm.h>
+#endif
+
+static char version[] __devinitdata =
+       "Linux Tulip driver version " DRV_VERSION " (" DRV_RELDATE ")\n";
+
+
+/* A few user-configurable values. */
+
+/* Maximum events (Rx packets, etc.) to handle at each interrupt. */
+static unsigned int max_interrupt_work = 25;
+
+#define MAX_UNITS 8
+/* Used to pass the full-duplex flag, etc. */
+static int full_duplex[MAX_UNITS];
+static int options[MAX_UNITS];
+static int mtu[MAX_UNITS];                     /* Jumbo MTU for interfaces. */
+
+/*  The possible media types that can be set in options[] are: */
+const char * const medianame[32] = {
+       "10baseT", "10base2", "AUI", "100baseTx",
+       "10baseT-FDX", "100baseTx-FDX", "100baseT4", "100baseFx",
+       "100baseFx-FDX", "MII 10baseT", "MII 10baseT-FDX", "MII",
+       "10baseT(forced)", "MII 100baseTx", "MII 100baseTx-FDX", "MII 100baseT4",
+       "MII 100baseFx-HDX", "MII 100baseFx-FDX", "Home-PNA 1Mbps", "Invalid-19",
+       "","","","", "","","","",  "","","","Transceiver reset",
+};
+
+/* Set the copy breakpoint for the copy-only-tiny-buffer Rx structure. */
+#if defined(__alpha__) || defined(__arm__) || defined(__hppa__) \
+       || defined(__sparc_) || defined(__ia64__) \
+       || defined(__sh__) || defined(__mips__)
+static int rx_copybreak = 1518;
+#else
+static int rx_copybreak = 100;
+#endif
+
+/*
+  Set the bus performance register.
+       Typical: Set 16 longword cache alignment, no burst limit.
+       Cache alignment bits 15:14           Burst length 13:8
+               0000    No alignment  0x00000000 unlimited              0800 8 longwords
+               4000    8  longwords            0100 1 longword         1000 16 longwords
+               8000    16 longwords            0200 2 longwords        2000 32 longwords
+               C000    32  longwords           0400 4 longwords
+       Warning: many older 486 systems are broken and require setting 0x00A04800
+          8 longword cache alignment, 8 longword burst.
+       ToDo: Non-Intel setting could be better.
+*/
+
+#if defined(__alpha__) || defined(__ia64__) || defined(__x86_64__)
+static int csr0 = 0x01A00000 | 0xE000;
+#elif defined(__i386__) || defined(__powerpc__)
+static int csr0 = 0x01A00000 | 0x8000;
+#elif defined(__sparc__) || defined(__hppa__)
+/* The UltraSparc PCI controllers will disconnect at every 64-byte
+ * crossing anyways so it makes no sense to tell Tulip to burst
+ * any more than that.
+ */
+static int csr0 = 0x01A00000 | 0x9000;
+#elif defined(__arm__) || defined(__sh__)
+static int csr0 = 0x01A00000 | 0x4800;
+#else
+#warning Processor architecture undefined!
+static int csr0 = 0x00A00000 | 0x4800;
+#endif
+
+/* Operational parameters that usually are not changed. */
+/* Time in jiffies before concluding the transmitter is hung. */
+#define TX_TIMEOUT  (4*HZ)
+
+
+MODULE_AUTHOR("The Linux Kernel Team");
+MODULE_DESCRIPTION("Digital 21*4* Tulip ethernet driver");
+MODULE_LICENSE("GPL");
+MODULE_PARM(tulip_debug, "i");
+MODULE_PARM(max_interrupt_work, "i");
+MODULE_PARM(rx_copybreak, "i");
+MODULE_PARM(csr0, "i");
+MODULE_PARM(options, "1-" __MODULE_STRING(MAX_UNITS) "i");
+MODULE_PARM(full_duplex, "1-" __MODULE_STRING(MAX_UNITS) "i");
+
+#define PFX DRV_NAME ": "
+
+#ifdef TULIP_DEBUG
+int tulip_debug = TULIP_DEBUG;
+#else
+int tulip_debug = 1;
+#endif
+
+
+
+/*
+ * This table use during operation for capabilities and media timer.
+ *
+ * It is indexed via the values in 'enum chips'
+ */
+
+struct tulip_chip_table tulip_tbl[] = {
+  /* DC21040 */
+  { "Digital DC21040 Tulip", 128, 0x0001ebef, 0, tulip_timer },
+
+  /* DC21041 */
+  { "Digital DC21041 Tulip", 128, 0x0001ebef,
+       HAS_MEDIA_TABLE | HAS_NWAY, tulip_timer },
+
+  /* DC21140 */
+  { "Digital DS21140 Tulip", 128, 0x0001ebef,
+       HAS_MII | HAS_MEDIA_TABLE | CSR12_IN_SROM | HAS_PCI_MWI, tulip_timer },
+
+  /* DC21142, DC21143 */
+  { "Digital DS21143 Tulip", 128, 0x0801fbff,
+       HAS_MII | HAS_MEDIA_TABLE | ALWAYS_CHECK_MII | HAS_ACPI | HAS_NWAY
+       | HAS_INTR_MITIGATION | HAS_PCI_MWI, t21142_timer },
+
+  /* LC82C168 */
+  { "Lite-On 82c168 PNIC", 256, 0x0001fbef,
+       HAS_MII | HAS_PNICNWAY, pnic_timer },
+
+  /* MX98713 */
+  { "Macronix 98713 PMAC", 128, 0x0001ebef,
+       HAS_MII | HAS_MEDIA_TABLE | CSR12_IN_SROM, mxic_timer },
+
+  /* MX98715 */
+  { "Macronix 98715 PMAC", 256, 0x0001ebef,
+       HAS_MEDIA_TABLE, mxic_timer },
+
+  /* MX98725 */
+  { "Macronix 98725 PMAC", 256, 0x0001ebef,
+       HAS_MEDIA_TABLE, mxic_timer },
+
+  /* AX88140 */
+  { "ASIX AX88140", 128, 0x0001fbff,
+       HAS_MII | HAS_MEDIA_TABLE | CSR12_IN_SROM | MC_HASH_ONLY
+       | IS_ASIX, tulip_timer },
+
+  /* PNIC2 */
+  { "Lite-On PNIC-II", 256, 0x0801fbff,
+       HAS_MII | HAS_NWAY | HAS_8023X | HAS_PCI_MWI, pnic2_timer },
+
+  /* COMET */
+  { "ADMtek Comet", 256, 0x0001abef,
+       MC_HASH_ONLY | COMET_MAC_ADDR, comet_timer },
+
+  /* COMPEX9881 */
+  { "Compex 9881 PMAC", 128, 0x0001ebef,
+       HAS_MII | HAS_MEDIA_TABLE | CSR12_IN_SROM, mxic_timer },
+
+  /* I21145 */
+  { "Intel DS21145 Tulip", 128, 0x0801fbff,
+       HAS_MII | HAS_MEDIA_TABLE | ALWAYS_CHECK_MII | HAS_ACPI
+       | HAS_NWAY | HAS_PCI_MWI, t21142_timer },
+
+  /* DM910X */
+  { "Davicom DM9102/DM9102A", 128, 0x0001ebef,
+       HAS_MII | HAS_MEDIA_TABLE | CSR12_IN_SROM | HAS_ACPI,
+       tulip_timer },
+};
+
+
+static struct pci_device_id tulip_pci_tbl[] __devinitdata = {
+       { 0x1011, 0x0002, PCI_ANY_ID, PCI_ANY_ID, 0, 0, DC21040 },
+       { 0x1011, 0x0014, PCI_ANY_ID, PCI_ANY_ID, 0, 0, DC21041 },
+       { 0x1011, 0x0009, PCI_ANY_ID, PCI_ANY_ID, 0, 0, DC21140 },
+       { 0x1011, 0x0019, PCI_ANY_ID, PCI_ANY_ID, 0, 0, DC21143 },
+       { 0x11AD, 0x0002, PCI_ANY_ID, PCI_ANY_ID, 0, 0, LC82C168 },
+       { 0x10d9, 0x0512, PCI_ANY_ID, PCI_ANY_ID, 0, 0, MX98713 },
+       { 0x10d9, 0x0531, PCI_ANY_ID, PCI_ANY_ID, 0, 0, MX98715 },
+/*     { 0x10d9, 0x0531, PCI_ANY_ID, PCI_ANY_ID, 0, 0, MX98725 },*/
+       { 0x125B, 0x1400, PCI_ANY_ID, PCI_ANY_ID, 0, 0, AX88140 },
+       { 0x11AD, 0xc115, PCI_ANY_ID, PCI_ANY_ID, 0, 0, PNIC2 },
+       { 0x1317, 0x0981, PCI_ANY_ID, PCI_ANY_ID, 0, 0, COMET },
+       { 0x1317, 0x0985, PCI_ANY_ID, PCI_ANY_ID, 0, 0, COMET },
+       { 0x1317, 0x1985, PCI_ANY_ID, PCI_ANY_ID, 0, 0, COMET },
+       { 0x13D1, 0xAB02, PCI_ANY_ID, PCI_ANY_ID, 0, 0, COMET },
+       { 0x13D1, 0xAB03, PCI_ANY_ID, PCI_ANY_ID, 0, 0, COMET },
+       { 0x104A, 0x0981, PCI_ANY_ID, PCI_ANY_ID, 0, 0, COMET },
+       { 0x104A, 0x2774, PCI_ANY_ID, PCI_ANY_ID, 0, 0, COMET },
+       { 0x11F6, 0x9881, PCI_ANY_ID, PCI_ANY_ID, 0, 0, COMPEX9881 },
+       { 0x8086, 0x0039, PCI_ANY_ID, PCI_ANY_ID, 0, 0, I21145 },
+       { 0x1282, 0x9100, PCI_ANY_ID, PCI_ANY_ID, 0, 0, DM910X },
+       { 0x1282, 0x9102, PCI_ANY_ID, PCI_ANY_ID, 0, 0, DM910X },
+       { 0x1113, 0x1216, PCI_ANY_ID, PCI_ANY_ID, 0, 0, COMET },
+       { 0x1113, 0x1217, PCI_ANY_ID, PCI_ANY_ID, 0, 0, MX98715 },
+       { 0x1113, 0x9511, PCI_ANY_ID, PCI_ANY_ID, 0, 0, COMET },
+       { } /* terminate list */
+};
+MODULE_DEVICE_TABLE(pci, tulip_pci_tbl);
+
+
+/* A full-duplex map for media types. */
+const char tulip_media_cap[32] =
+{0,0,0,16,  3,19,16,24,  27,4,7,5, 0,20,23,20,  28,31,0,0, };
+u8 t21040_csr13[] = {2,0x0C,8,4,  4,0,0,0, 0,0,0,0, 4,0,0,0};
+
+/* 21041 transceiver register settings: 10-T, 10-2, AUI, 10-T, 10T-FD*/
+u16 t21041_csr13[] = {
+       csr13_mask_10bt,                /* 10-T */
+       csr13_mask_auibnc,              /* 10-2 */
+       csr13_mask_auibnc,              /* AUI */
+       csr13_mask_10bt,                /* 10-T */
+       csr13_mask_10bt,                /* 10T-FD */
+};
+u16 t21041_csr14[] = { 0xFFFF, 0xF7FD, 0xF7FD, 0x7F3F, 0x7F3D, };
+u16 t21041_csr15[] = { 0x0008, 0x0006, 0x000E, 0x0008, 0x0008, };
+
+
+static void tulip_tx_timeout(struct net_device *dev);
+static void tulip_init_ring(struct net_device *dev);
+static int tulip_start_xmit(struct sk_buff *skb, struct net_device *dev);
+static int tulip_open(struct net_device *dev);
+static int tulip_close(struct net_device *dev);
+static void tulip_up(struct net_device *dev);
+static void tulip_down(struct net_device *dev);
+static struct net_device_stats *tulip_get_stats(struct net_device *dev);
+static int private_ioctl(struct net_device *dev, struct ifreq *rq, int cmd);
+static void set_rx_mode(struct net_device *dev);
+
+
+
+static void tulip_set_power_state (struct tulip_private *tp,
+                                  int sleep, int snooze)
+{
+       if (tp->flags & HAS_ACPI) {
+               u32 tmp, newtmp;
+               pci_read_config_dword (tp->pdev, CFDD, &tmp);
+               newtmp = tmp & ~(CFDD_Sleep | CFDD_Snooze);
+               if (sleep)
+                       newtmp |= CFDD_Sleep;
+               else if (snooze)
+                       newtmp |= CFDD_Snooze;
+               if (tmp != newtmp)
+                       pci_write_config_dword (tp->pdev, CFDD, newtmp);
+       }
+
+}
+
+
+static void tulip_up(struct net_device *dev)
+{
+       struct tulip_private *tp = (struct tulip_private *)dev->priv;
+       long ioaddr = dev->base_addr;
+       int next_tick = 3*HZ;
+       int i;
+
+       /* Wake the chip from sleep/snooze mode. */
+       tulip_set_power_state (tp, 0, 0);
+
+       /* On some chip revs we must set the MII/SYM port before the reset!? */
+       if (tp->mii_cnt  ||  (tp->mtable  &&  tp->mtable->has_mii))
+               outl(0x00040000, ioaddr + CSR6);
+
+       /* Reset the chip, holding bit 0 set at least 50 PCI cycles. */
+       outl(0x00000001, ioaddr + CSR0);
+       udelay(100);
+
+       /* Deassert reset.
+          Wait the specified 50 PCI cycles after a reset by initializing
+          Tx and Rx queues and the address filter list. */
+       outl(tp->csr0, ioaddr + CSR0);
+       udelay(100);
+
+       if (tulip_debug > 1)
+               printk(KERN_DEBUG "%s: tulip_up(), irq==%d.\n", dev->name, dev->irq);
+
+       outl(tp->rx_ring_dma, ioaddr + CSR3);
+       outl(tp->tx_ring_dma, ioaddr + CSR4);
+       tp->cur_rx = tp->cur_tx = 0;
+       tp->dirty_rx = tp->dirty_tx = 0;
+
+       if (tp->flags & MC_HASH_ONLY) {
+               u32 addr_low = cpu_to_le32(get_unaligned((u32 *)dev->dev_addr));
+               u32 addr_high = cpu_to_le32(get_unaligned((u16 *)(dev->dev_addr+4)));
+               if (tp->chip_id == AX88140) {
+                       outl(0, ioaddr + CSR13);
+                       outl(addr_low,  ioaddr + CSR14);
+                       outl(1, ioaddr + CSR13);
+                       outl(addr_high, ioaddr + CSR14);
+               } else if (tp->flags & COMET_MAC_ADDR) {
+                       outl(addr_low,  ioaddr + 0xA4);
+                       outl(addr_high, ioaddr + 0xA8);
+                       outl(0, ioaddr + 0xAC);
+                       outl(0, ioaddr + 0xB0);
+               }
+       } else {
+               /* This is set_rx_mode(), but without starting the transmitter. */
+               u16 *eaddrs = (u16 *)dev->dev_addr;
+               u16 *setup_frm = &tp->setup_frame[15*6];
+               dma_addr_t mapping;
+
+               /* 21140 bug: you must add the broadcast address. */
+               memset(tp->setup_frame, 0xff, sizeof(tp->setup_frame));
+               /* Fill the final entry of the table with our physical address. */
+               *setup_frm++ = eaddrs[0]; *setup_frm++ = eaddrs[0];
+               *setup_frm++ = eaddrs[1]; *setup_frm++ = eaddrs[1];
+               *setup_frm++ = eaddrs[2]; *setup_frm++ = eaddrs[2];
+
+               mapping = pci_map_single(tp->pdev, tp->setup_frame,
+                                        sizeof(tp->setup_frame),
+                                        PCI_DMA_TODEVICE);
+               tp->tx_buffers[tp->cur_tx].skb = NULL;
+               tp->tx_buffers[tp->cur_tx].mapping = mapping;
+
+               /* Put the setup frame on the Tx list. */
+               tp->tx_ring[tp->cur_tx].length = cpu_to_le32(0x08000000 | 192);
+               tp->tx_ring[tp->cur_tx].buffer1 = cpu_to_le32(mapping);
+               tp->tx_ring[tp->cur_tx].status = cpu_to_le32(DescOwned);
+
+               tp->cur_tx++;
+       }
+
+       tp->saved_if_port = dev->if_port;
+       if (dev->if_port == 0)
+               dev->if_port = tp->default_port;
+
+       /* Allow selecting a default media. */
+       i = 0;
+       if (tp->mtable == NULL)
+               goto media_picked;
+       if (dev->if_port) {
+               int looking_for = tulip_media_cap[dev->if_port] & MediaIsMII ? 11 :
+                       (dev->if_port == 12 ? 0 : dev->if_port);
+               for (i = 0; i < tp->mtable->leafcount; i++)
+                       if (tp->mtable->mleaf[i].media == looking_for) {
+                               printk(KERN_INFO "%s: Using user-specified media %s.\n",
+                                          dev->name, medianame[dev->if_port]);
+                               goto media_picked;
+                       }
+       }
+       if ((tp->mtable->defaultmedia & 0x0800) == 0) {
+               int looking_for = tp->mtable->defaultmedia & MEDIA_MASK;
+               for (i = 0; i < tp->mtable->leafcount; i++)
+                       if (tp->mtable->mleaf[i].media == looking_for) {
+                               printk(KERN_INFO "%s: Using EEPROM-set media %s.\n",
+                                          dev->name, medianame[looking_for]);
+                               goto media_picked;
+                       }
+       }
+       /* Start sensing first non-full-duplex media. */
+       for (i = tp->mtable->leafcount - 1;
+                (tulip_media_cap[tp->mtable->mleaf[i].media] & MediaAlwaysFD) && i > 0; i--)
+               ;
+media_picked:
+
+       tp->csr6 = 0;
+       tp->cur_index = i;
+       tp->nwayset = 0;
+
+       if (dev->if_port) {
+               if (tp->chip_id == DC21143  &&
+                   (tulip_media_cap[dev->if_port] & MediaIsMII)) {
+                       /* We must reset the media CSRs when we force-select MII mode. */
+                       outl(0x0000, ioaddr + CSR13);
+                       outl(0x0000, ioaddr + CSR14);
+                       outl(0x0008, ioaddr + CSR15);
+               }
+               tulip_select_media(dev, 1);
+       } else if (tp->chip_id == DC21041) {
+               dev->if_port = 0;
+               tp->nway = tp->mediasense = 1;
+               tp->nwayset = tp->lpar = 0;
+               outl(0x00000000, ioaddr + CSR13);
+               outl(0xFFFFFFFF, ioaddr + CSR14);
+               outl(0x00000008, ioaddr + CSR15); /* Listen on AUI also. */
+               tp->csr6 = 0x80020000;
+               if (tp->sym_advertise & 0x0040)
+                       tp->csr6 |= FullDuplex;
+               outl(tp->csr6, ioaddr + CSR6);
+               outl(0x0000EF01, ioaddr + CSR13);
+
+       } else if (tp->chip_id == DC21142) {
+               if (tp->mii_cnt) {
+                       tulip_select_media(dev, 1);
+                       if (tulip_debug > 1)
+                               printk(KERN_INFO "%s: Using MII transceiver %d, status "
+                                          "%4.4x.\n",
+                                          dev->name, tp->phys[0], tulip_mdio_read(dev, tp->phys[0], 1));
+                       outl(csr6_mask_defstate, ioaddr + CSR6);
+                       tp->csr6 = csr6_mask_hdcap;
+                       dev->if_port = 11;
+                       outl(0x0000, ioaddr + CSR13);
+                       outl(0x0000, ioaddr + CSR14);
+               } else
+                       t21142_start_nway(dev);
+       } else if (tp->chip_id == PNIC2) {
+               /* for initial startup advertise 10/100 Full and Half */
+               tp->sym_advertise = 0x01E0;
+                /* enable autonegotiate end interrupt */
+               outl(inl(ioaddr+CSR5)| 0x00008010, ioaddr + CSR5);
+               outl(inl(ioaddr+CSR7)| 0x00008010, ioaddr + CSR7);
+               pnic2_start_nway(dev);
+       } else if (tp->chip_id == LC82C168  &&  ! tp->medialock) {
+               if (tp->mii_cnt) {
+                       dev->if_port = 11;
+                       tp->csr6 = 0x814C0000 | (tp->full_duplex ? 0x0200 : 0);
+                       outl(0x0001, ioaddr + CSR15);
+               } else if (inl(ioaddr + CSR5) & TPLnkPass)
+                       pnic_do_nway(dev);
+               else {
+                       /* Start with 10mbps to do autonegotiation. */
+                       outl(0x32, ioaddr + CSR12);
+                       tp->csr6 = 0x00420000;
+                       outl(0x0001B078, ioaddr + 0xB8);
+                       outl(0x0201B078, ioaddr + 0xB8);
+                       next_tick = 1*HZ;
+               }
+       } else if ((tp->chip_id == MX98713 || tp->chip_id == COMPEX9881)
+                          && ! tp->medialock) {
+               dev->if_port = 0;
+               tp->csr6 = 0x01880000 | (tp->full_duplex ? 0x0200 : 0);
+               outl(0x0f370000 | inw(ioaddr + 0x80), ioaddr + 0x80);
+       } else if (tp->chip_id == MX98715 || tp->chip_id == MX98725) {
+               /* Provided by BOLO, Macronix - 12/10/1998. */
+               dev->if_port = 0;
+               tp->csr6 = 0x01a80200;
+               outl(0x0f370000 | inw(ioaddr + 0x80), ioaddr + 0x80);
+               outl(0x11000 | inw(ioaddr + 0xa0), ioaddr + 0xa0);
+       } else if (tp->chip_id == COMET) {
+               /* Enable automatic Tx underrun recovery. */
+               outl(inl(ioaddr + 0x88) | 1, ioaddr + 0x88);
+               dev->if_port = tp->mii_cnt ? 11 : 0;
+               tp->csr6 = 0x00040000;
+       } else if (tp->chip_id == AX88140) {
+               tp->csr6 = tp->mii_cnt ? 0x00040100 : 0x00000100;
+       } else
+               tulip_select_media(dev, 1);
+
+       /* Start the chip's Tx to process setup frame. */
+       tulip_stop_rxtx(tp);
+       barrier();
+       udelay(5);
+       outl(tp->csr6 | TxOn, ioaddr + CSR6);
+
+       /* Enable interrupts by setting the interrupt mask. */
+       outl(tulip_tbl[tp->chip_id].valid_intrs, ioaddr + CSR5);
+       outl(tulip_tbl[tp->chip_id].valid_intrs, ioaddr + CSR7);
+       tulip_start_rxtx(tp);
+       outl(0, ioaddr + CSR2);         /* Rx poll demand */
+
+       if (tulip_debug > 2) {
+               printk(KERN_DEBUG "%s: Done tulip_up(), CSR0 %8.8x, CSR5 %8.8x CSR6 %8.8x.\n",
+                          dev->name, inl(ioaddr + CSR0), inl(ioaddr + CSR5),
+                          inl(ioaddr + CSR6));
+       }
+
+       /* Set the timer to switch to check for link beat and perhaps switch
+          to an alternate media type. */
+       tp->timer.expires = RUN_AT(next_tick);
+       add_timer(&tp->timer);
+}
+
+#ifdef CONFIG_NET_HW_FLOWCONTROL
+/* Enable receiver */
+void tulip_xon(struct net_device *dev)
+{
+        struct tulip_private *tp = (struct tulip_private *)dev->priv;
+
+        clear_bit(tp->fc_bit, &netdev_fc_xoff);
+        if (netif_running(dev)){
+
+                tulip_refill_rx(dev);
+                outl(tulip_tbl[tp->chip_id].valid_intrs,  dev->base_addr+CSR7);
+        }
+}
+#endif
+
+static int
+tulip_open(struct net_device *dev)
+{
+#ifdef CONFIG_NET_HW_FLOWCONTROL
+        struct tulip_private *tp = (struct tulip_private *)dev->priv;
+#endif
+       int retval;
+       MOD_INC_USE_COUNT;
+
+       if ((retval = request_irq(dev->irq, &tulip_interrupt, SA_SHIRQ, dev->name, dev))) {
+               MOD_DEC_USE_COUNT;
+               return retval;
+       }
+
+       tulip_init_ring (dev);
+
+       tulip_up (dev);
+
+#ifdef CONFIG_NET_HW_FLOWCONTROL
+        tp->fc_bit = netdev_register_fc(dev, tulip_xon);
+#endif
+
+       netif_start_queue (dev);
+
+       return 0;
+}
+
+
+static void tulip_tx_timeout(struct net_device *dev)
+{
+       struct tulip_private *tp = (struct tulip_private *)dev->priv;
+       long ioaddr = dev->base_addr;
+       unsigned long flags;
+
+       spin_lock_irqsave (&tp->lock, flags);
+
+       if (tulip_media_cap[dev->if_port] & MediaIsMII) {
+               /* Do nothing -- the media monitor should handle this. */
+               if (tulip_debug > 1)
+                       printk(KERN_WARNING "%s: Transmit timeout using MII device.\n",
+                                  dev->name);
+       } else if (tp->chip_id == DC21040) {
+               if ( !tp->medialock  &&  inl(ioaddr + CSR12) & 0x0002) {
+                       dev->if_port = (dev->if_port == 2 ? 0 : 2);
+                       printk(KERN_INFO "%s: 21040 transmit timed out, switching to "
+                                  "%s.\n",
+                                  dev->name, medianame[dev->if_port]);
+                       tulip_select_media(dev, 0);
+               }
+               goto out;
+       } else if (tp->chip_id == DC21041) {
+               int csr12 = inl(ioaddr + CSR12);
+
+               printk(KERN_WARNING "%s: 21041 transmit timed out, status %8.8x, "
+                          "CSR12 %8.8x, CSR13 %8.8x, CSR14 %8.8x, resetting...\n",
+                          dev->name, inl(ioaddr + CSR5), csr12,
+                          inl(ioaddr + CSR13), inl(ioaddr + CSR14));
+               tp->mediasense = 1;
+               if ( ! tp->medialock) {
+                       if (dev->if_port == 1 || dev->if_port == 2)
+                               if (csr12 & 0x0004) {
+                                       dev->if_port = 2 - dev->if_port;
+                               } else
+                                       dev->if_port = 0;
+                       else
+                               dev->if_port = 1;
+                       tulip_select_media(dev, 0);
+               }
+       } else if (tp->chip_id == DC21140 || tp->chip_id == DC21142
+                          || tp->chip_id == MX98713 || tp->chip_id == COMPEX9881
+                          || tp->chip_id == DM910X) {
+               printk(KERN_WARNING "%s: 21140 transmit timed out, status %8.8x, "
+                          "SIA %8.8x %8.8x %8.8x %8.8x, resetting...\n",
+                          dev->name, inl(ioaddr + CSR5), inl(ioaddr + CSR12),
+                          inl(ioaddr + CSR13), inl(ioaddr + CSR14), inl(ioaddr + CSR15));
+               if ( ! tp->medialock  &&  tp->mtable) {
+                       do
+                               --tp->cur_index;
+                       while (tp->cur_index >= 0
+                                  && (tulip_media_cap[tp->mtable->mleaf[tp->cur_index].media]
+                                          & MediaIsFD));
+                       if (--tp->cur_index < 0) {
+                               /* We start again, but should instead look for default. */
+                               tp->cur_index = tp->mtable->leafcount - 1;
+                       }
+                       tulip_select_media(dev, 0);
+                       printk(KERN_WARNING "%s: transmit timed out, switching to %s "
+                                  "media.\n", dev->name, medianame[dev->if_port]);
+               }
+       } else if (tp->chip_id == PNIC2) {
+               printk(KERN_WARNING "%s: PNIC2 transmit timed out, status %8.8x, "
+                      "CSR6/7 %8.8x / %8.8x CSR12 %8.8x, resetting...\n",
+                      dev->name, (int)inl(ioaddr + CSR5), (int)inl(ioaddr + CSR6),
+                      (int)inl(ioaddr + CSR7), (int)inl(ioaddr + CSR12));
+       } else {
+               printk(KERN_WARNING "%s: Transmit timed out, status %8.8x, CSR12 "
+                          "%8.8x, resetting...\n",
+                          dev->name, inl(ioaddr + CSR5), inl(ioaddr + CSR12));
+               dev->if_port = 0;
+       }
+
+#if defined(way_too_many_messages)
+       if (tulip_debug > 3) {
+               int i;
+               for (i = 0; i < RX_RING_SIZE; i++) {
+                       u8 *buf = (u8 *)(tp->rx_ring[i].buffer1);
+                       int j;
+                       printk(KERN_DEBUG "%2d: %8.8x %8.8x %8.8x %8.8x  "
+                                  "%2.2x %2.2x %2.2x.\n",
+                                  i, (unsigned int)tp->rx_ring[i].status,
+                                  (unsigned int)tp->rx_ring[i].length,
+                                  (unsigned int)tp->rx_ring[i].buffer1,
+                                  (unsigned int)tp->rx_ring[i].buffer2,
+                                  buf[0], buf[1], buf[2]);
+                       for (j = 0; buf[j] != 0xee && j < 1600; j++)
+                               if (j < 100) printk(" %2.2x", buf[j]);
+                       printk(" j=%d.\n", j);
+               }
+               printk(KERN_DEBUG "  Rx ring %8.8x: ", (int)tp->rx_ring);
+               for (i = 0; i < RX_RING_SIZE; i++)
+                       printk(" %8.8x", (unsigned int)tp->rx_ring[i].status);
+               printk("\n" KERN_DEBUG "  Tx ring %8.8x: ", (int)tp->tx_ring);
+               for (i = 0; i < TX_RING_SIZE; i++)
+                       printk(" %8.8x", (unsigned int)tp->tx_ring[i].status);
+               printk("\n");
+       }
+#endif
+
+       /* Stop and restart the chip's Tx processes . */
+#ifdef CONFIG_NET_HW_FLOWCONTROL
+        if (tp->fc_bit && test_bit(tp->fc_bit,&netdev_fc_xoff))
+                printk("BUG tx_timeout restarting rx when fc on\n");
+#endif
+       tulip_restart_rxtx(tp);
+       /* Trigger an immediate transmit demand. */
+       outl(0, ioaddr + CSR1);
+
+       tp->stats.tx_errors++;
+
+out:
+       spin_unlock_irqrestore (&tp->lock, flags);
+       dev->trans_start = jiffies;
+       netif_wake_queue (dev);
+}
+
+
+/* Initialize the Rx and Tx rings, along with various 'dev' bits. */
+static void tulip_init_ring(struct net_device *dev)
+{
+       struct tulip_private *tp = (struct tulip_private *)dev->priv;
+       int i;
+
+       tp->susp_rx = 0;
+       tp->ttimer = 0;
+       tp->nir = 0;
+
+       for (i = 0; i < RX_RING_SIZE; i++) {
+               tp->rx_ring[i].status = 0x00000000;
+               tp->rx_ring[i].length = cpu_to_le32(PKT_BUF_SZ);
+               tp->rx_ring[i].buffer2 = cpu_to_le32(tp->rx_ring_dma + sizeof(struct tulip_rx_desc) * (i + 1));
+               tp->rx_buffers[i].skb = NULL;
+               tp->rx_buffers[i].mapping = 0;
+       }
+       /* Mark the last entry as wrapping the ring. */
+       tp->rx_ring[i-1].length = cpu_to_le32(PKT_BUF_SZ | DESC_RING_WRAP);
+       tp->rx_ring[i-1].buffer2 = cpu_to_le32(tp->rx_ring_dma);
+
+       for (i = 0; i < RX_RING_SIZE; i++) {
+               dma_addr_t mapping;
+
+               /* Note the receive buffer must be longword aligned.
+                  dev_alloc_skb() provides 16 byte alignment.  But do *not*
+                  use skb_reserve() to align the IP header! */
+               struct sk_buff *skb = dev_alloc_skb(PKT_BUF_SZ);
+               tp->rx_buffers[i].skb = skb;
+               if (skb == NULL)
+                       break;
+               mapping = pci_map_single(tp->pdev, skb->tail,
+                                        PKT_BUF_SZ, PCI_DMA_FROMDEVICE);
+               tp->rx_buffers[i].mapping = mapping;
+               skb->dev = dev;                 /* Mark as being used by this device. */
+               tp->rx_ring[i].status = cpu_to_le32(DescOwned); /* Owned by Tulip chip */
+               tp->rx_ring[i].buffer1 = cpu_to_le32(mapping);
+       }
+       tp->dirty_rx = (unsigned int)(i - RX_RING_SIZE);
+
+       /* The Tx buffer descriptor is filled in as needed, but we
+          do need to clear the ownership bit. */
+       for (i = 0; i < TX_RING_SIZE; i++) {
+               tp->tx_buffers[i].skb = NULL;
+               tp->tx_buffers[i].mapping = 0;
+               tp->tx_ring[i].status = 0x00000000;
+               tp->tx_ring[i].buffer2 = cpu_to_le32(tp->tx_ring_dma + sizeof(struct tulip_tx_desc) * (i + 1));
+       }
+       tp->tx_ring[i-1].buffer2 = cpu_to_le32(tp->tx_ring_dma);
+}
+
+static int
+tulip_start_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+       struct tulip_private *tp = (struct tulip_private *)dev->priv;
+       int entry;
+       u32 flag;
+       dma_addr_t mapping;
+
+       spin_lock_irq(&tp->lock);
+
+       /* Calculate the next Tx descriptor entry. */
+       entry = tp->cur_tx % TX_RING_SIZE;
+
+       tp->tx_buffers[entry].skb = skb;
+       mapping = pci_map_single(tp->pdev, skb->data,
+                                skb->len, PCI_DMA_TODEVICE);
+       tp->tx_buffers[entry].mapping = mapping;
+       tp->tx_ring[entry].buffer1 = cpu_to_le32(mapping);
+
+       if (tp->cur_tx - tp->dirty_tx < TX_RING_SIZE/2) {/* Typical path */
+               flag = 0x60000000; /* No interrupt */
+       } else if (tp->cur_tx - tp->dirty_tx == TX_RING_SIZE/2) {
+               flag = 0xe0000000; /* Tx-done intr. */
+       } else if (tp->cur_tx - tp->dirty_tx < TX_RING_SIZE - 2) {
+               flag = 0x60000000; /* No Tx-done intr. */
+       } else {                /* Leave room for set_rx_mode() to fill entries. */
+               flag = 0xe0000000; /* Tx-done intr. */
+               netif_stop_queue(dev);
+       }
+       if (entry == TX_RING_SIZE-1)
+               flag = 0xe0000000 | DESC_RING_WRAP;
+
+       tp->tx_ring[entry].length = cpu_to_le32(skb->len | flag);
+       /* if we were using Transmit Automatic Polling, we would need a
+        * wmb() here. */
+       tp->tx_ring[entry].status = cpu_to_le32(DescOwned);
+       wmb();
+
+       tp->cur_tx++;
+
+       /* Trigger an immediate transmit demand. */
+       outl(0, dev->base_addr + CSR1);
+
+       spin_unlock_irq(&tp->lock);
+
+       dev->trans_start = jiffies;
+
+       return 0;
+}
+
+static void tulip_clean_tx_ring(struct tulip_private *tp)
+{
+       unsigned int dirty_tx;
+
+       for (dirty_tx = tp->dirty_tx ; tp->cur_tx - dirty_tx > 0;
+               dirty_tx++) {
+               int entry = dirty_tx % TX_RING_SIZE;
+               int status = le32_to_cpu(tp->tx_ring[entry].status);
+
+               if (status < 0) {
+                       tp->stats.tx_errors++;  /* It wasn't Txed */
+                       tp->tx_ring[entry].status = 0;
+               }
+
+               /* Check for Tx filter setup frames. */
+               if (tp->tx_buffers[entry].skb == NULL) {
+                       /* test because dummy frames not mapped */
+                       if (tp->tx_buffers[entry].mapping)
+                               pci_unmap_single(tp->pdev,
+                                       tp->tx_buffers[entry].mapping,
+                                       sizeof(tp->setup_frame),
+                                       PCI_DMA_TODEVICE);
+                       continue;
+               }
+
+               pci_unmap_single(tp->pdev, tp->tx_buffers[entry].mapping,
+                               tp->tx_buffers[entry].skb->len,
+                               PCI_DMA_TODEVICE);
+
+               /* Free the original skb. */
+               dev_kfree_skb_irq(tp->tx_buffers[entry].skb);
+               tp->tx_buffers[entry].skb = NULL;
+               tp->tx_buffers[entry].mapping = 0;
+       }
+}
+
+static void tulip_down (struct net_device *dev)
+{
+       long ioaddr = dev->base_addr;
+       struct tulip_private *tp = (struct tulip_private *) dev->priv;
+       unsigned long flags;
+
+       del_timer_sync (&tp->timer);
+
+       spin_lock_irqsave (&tp->lock, flags);
+
+       /* Disable interrupts by clearing the interrupt mask. */
+       outl (0x00000000, ioaddr + CSR7);
+
+       /* Stop the Tx and Rx processes. */
+       tulip_stop_rxtx(tp);
+
+       /* prepare receive buffers */
+       tulip_refill_rx(dev);
+
+       /* release any unconsumed transmit buffers */
+       tulip_clean_tx_ring(tp);
+
+       /* 21040 -- Leave the card in 10baseT state. */
+       if (tp->chip_id == DC21040)
+               outl (0x00000004, ioaddr + CSR13);
+
+       if (inl (ioaddr + CSR6) != 0xffffffff)
+               tp->stats.rx_missed_errors += inl (ioaddr + CSR8) & 0xffff;
+
+       spin_unlock_irqrestore (&tp->lock, flags);
+
+       init_timer(&tp->timer);
+       tp->timer.data = (unsigned long)dev;
+       tp->timer.function = tulip_tbl[tp->chip_id].media_timer;
+
+       dev->if_port = tp->saved_if_port;
+
+       /* Leave the driver in snooze, not sleep, mode. */
+       tulip_set_power_state (tp, 0, 1);
+}
+
+
+static int tulip_close (struct net_device *dev)
+{
+       long ioaddr = dev->base_addr;
+       struct tulip_private *tp = (struct tulip_private *) dev->priv;
+       int i;
+
+       netif_stop_queue (dev);
+
+#ifdef CONFIG_NET_HW_FLOWCONTROL
+        if (tp->fc_bit) {
+                int bit = tp->fc_bit;
+                tp->fc_bit = 0;
+                netdev_unregister_fc(bit);
+        }
+#endif
+       tulip_down (dev);
+
+       if (tulip_debug > 1)
+               printk (KERN_DEBUG "%s: Shutting down ethercard, status was %2.2x.\n",
+                       dev->name, inl (ioaddr + CSR5));
+
+       free_irq (dev->irq, dev);
+
+       /* Free all the skbuffs in the Rx queue. */
+       for (i = 0; i < RX_RING_SIZE; i++) {
+               struct sk_buff *skb = tp->rx_buffers[i].skb;
+               dma_addr_t mapping = tp->rx_buffers[i].mapping;
+
+               tp->rx_buffers[i].skb = NULL;
+               tp->rx_buffers[i].mapping = 0;
+
+               tp->rx_ring[i].status = 0;      /* Not owned by Tulip chip. */
+               tp->rx_ring[i].length = 0;
+               tp->rx_ring[i].buffer1 = 0xBADF00D0;    /* An invalid address. */
+               if (skb) {
+                       pci_unmap_single(tp->pdev, mapping, PKT_BUF_SZ,
+                                        PCI_DMA_FROMDEVICE);
+                       dev_kfree_skb (skb);
+               }
+       }
+       for (i = 0; i < TX_RING_SIZE; i++) {
+               struct sk_buff *skb = tp->tx_buffers[i].skb;
+
+               if (skb != NULL) {
+                       pci_unmap_single(tp->pdev, tp->tx_buffers[i].mapping,
+                                        skb->len, PCI_DMA_TODEVICE);
+                       dev_kfree_skb (skb);
+               }
+               tp->tx_buffers[i].skb = NULL;
+               tp->tx_buffers[i].mapping = 0;
+       }
+
+       MOD_DEC_USE_COUNT;
+
+       return 0;
+}
+
+static struct net_device_stats *tulip_get_stats(struct net_device *dev)
+{
+       struct tulip_private *tp = (struct tulip_private *)dev->priv;
+       long ioaddr = dev->base_addr;
+
+       if (netif_running(dev)) {
+               unsigned long flags;
+
+               spin_lock_irqsave (&tp->lock, flags);
+
+               tp->stats.rx_missed_errors += inl(ioaddr + CSR8) & 0xffff;
+
+               spin_unlock_irqrestore(&tp->lock, flags);
+       }
+
+       return &tp->stats;
+}
+
+
+static int netdev_ethtool_ioctl(struct net_device *dev, void *useraddr)
+{
+       struct tulip_private *np = dev->priv;
+       u32 ethcmd;
+
+       if (copy_from_user(&ethcmd, useraddr, sizeof(ethcmd)))
+               return -EFAULT;
+
+        switch (ethcmd) {
+        case ETHTOOL_GDRVINFO: {
+               struct ethtool_drvinfo info = {ETHTOOL_GDRVINFO};
+               strcpy(info.driver, DRV_NAME);
+               strcpy(info.version, DRV_VERSION);
+               strcpy(info.bus_info, np->pdev->slot_name);
+               if (copy_to_user(useraddr, &info, sizeof(info)))
+                       return -EFAULT;
+               return 0;
+       }
+
+        }
+
+       return -EOPNOTSUPP;
+}
+
+/* Provide ioctl() calls to examine the MII xcvr state. */
+static int private_ioctl (struct net_device *dev, struct ifreq *rq, int cmd)
+{
+       struct tulip_private *tp = dev->priv;
+       long ioaddr = dev->base_addr;
+       struct mii_ioctl_data *data = (struct mii_ioctl_data *) & rq->ifr_data;
+       const unsigned int phy_idx = 0;
+       int phy = tp->phys[phy_idx] & 0x1f;
+       unsigned int regnum = data->reg_num;
+
+       switch (cmd) {
+       case SIOCETHTOOL:
+               return netdev_ethtool_ioctl(dev, (void *) rq->ifr_data);
+
+       case SIOCGMIIPHY:               /* Get address of MII PHY in use. */
+       case SIOCDEVPRIVATE:            /* for binary compat, remove in 2.5 */
+               if (tp->mii_cnt)
+                       data->phy_id = phy;
+               else if (tp->flags & HAS_NWAY)
+                       data->phy_id = 32;
+               else if (tp->chip_id == COMET)
+                       data->phy_id = 1;
+               else
+                       return -ENODEV;
+
+       case SIOCGMIIREG:               /* Read MII PHY register. */
+       case SIOCDEVPRIVATE+1:          /* for binary compat, remove in 2.5 */
+               if (data->phy_id == 32 && (tp->flags & HAS_NWAY)) {
+                       int csr12 = inl (ioaddr + CSR12);
+                       int csr14 = inl (ioaddr + CSR14);
+                       switch (regnum) {
+                       case 0:
+                                if (((csr14<<5) & 0x1000) ||
+                                        (dev->if_port == 5 && tp->nwayset))
+                                        data->val_out = 0x1000;
+                                else
+                                        data->val_out = (tulip_media_cap[dev->if_port]&MediaIs100 ? 0x2000 : 0)
+                                                | (tulip_media_cap[dev->if_port]&MediaIsFD ? 0x0100 : 0);
+                               break;
+                       case 1:
+                                data->val_out =
+                                       0x1848 +
+                                       ((csr12&0x7000) == 0x5000 ? 0x20 : 0) +
+                                       ((csr12&0x06) == 6 ? 0 : 4);
+                                if (tp->chip_id != DC21041)
+                                        data->val_out |= 0x6048;
+                               break;
+                       case 4:
+                                /* Advertised value, bogus 10baseTx-FD value from CSR6. */
+                                data->val_out =
+                                       ((inl(ioaddr + CSR6) >> 3) & 0x0040) +
+                                       ((csr14 >> 1) & 0x20) + 1;
+                                if (tp->chip_id != DC21041)
+                                         data->val_out |= ((csr14 >> 9) & 0x03C0);
+                               break;
+                       case 5: data->val_out = tp->lpar; break;
+                       default: data->val_out = 0; break;
+                       }
+               } else {
+                       data->val_out = tulip_mdio_read (dev, data->phy_id & 0x1f, regnum);
+               }
+               return 0;
+
+       case SIOCSMIIREG:               /* Write MII PHY register. */
+       case SIOCDEVPRIVATE+2:          /* for binary compat, remove in 2.5 */
+               if (!capable (CAP_NET_ADMIN))
+                       return -EPERM;
+               if (regnum & ~0x1f)
+                       return -EINVAL;
+               if (data->phy_id == phy) {
+                       u16 value = data->val_in;
+                       switch (regnum) {
+                       case 0: /* Check for autonegotiation on or reset. */
+                               tp->full_duplex_lock = (value & 0x9000) ? 0 : 1;
+                               if (tp->full_duplex_lock)
+                                       tp->full_duplex = (value & 0x0100) ? 1 : 0;
+                               break;
+                       case 4:
+                               tp->advertising[phy_idx] =
+                               tp->mii_advertise = data->val_in;
+                               break;
+                       }
+               }
+               if (data->phy_id == 32 && (tp->flags & HAS_NWAY)) {
+                       u16 value = data->val_in;
+                       if (regnum == 0) {
+                         if ((value & 0x1200) == 0x1200) {
+                           if (tp->chip_id == PNIC2) {
+                                   pnic2_start_nway (dev);
+                            } else {
+                                  t21142_start_nway (dev);
+                            }
+                         }
+                       } else if (regnum == 4)
+                               tp->sym_advertise = value;
+               } else {
+                       tulip_mdio_write (dev, data->phy_id & 0x1f, regnum, data->val_in);
+               }
+               return 0;
+       default:
+               return -EOPNOTSUPP;
+       }
+
+       return -EOPNOTSUPP;
+}
+
+
+/* Set or clear the multicast filter for this adaptor.
+   Note that we only use exclusion around actually queueing the
+   new frame, not around filling tp->setup_frame.  This is non-deterministic
+   when re-entered but still correct. */
+
+/* The little-endian AUTODIN32 ethernet CRC calculation.
+   N.B. Do not use for bulk data, use a table-based routine instead.
+   This is common code and should be moved to net/core/crc.c */
+static unsigned const ethernet_polynomial_le = 0xedb88320U;
+static inline u32 ether_crc_le(int length, unsigned char *data)
+{
+       u32 crc = 0xffffffff;   /* Initial value. */
+       while(--length >= 0) {
+               unsigned char current_octet = *data++;
+               int bit;
+               for (bit = 8; --bit >= 0; current_octet >>= 1) {
+                       if ((crc ^ current_octet) & 1) {
+                               crc >>= 1;
+                               crc ^= ethernet_polynomial_le;
+                       } else
+                               crc >>= 1;
+               }
+       }
+       return crc;
+}
+static unsigned const ethernet_polynomial = 0x04c11db7U;
+static inline u32 ether_crc(int length, unsigned char *data)
+{
+    int crc = -1;
+
+    while(--length >= 0) {
+               unsigned char current_octet = *data++;
+               int bit;
+               for (bit = 0; bit < 8; bit++, current_octet >>= 1)
+                       crc = (crc << 1) ^
+                               ((crc < 0) ^ (current_octet & 1) ? ethernet_polynomial : 0);
+    }
+    return crc;
+}
+
+#undef set_bit_le
+#define set_bit_le(i,p) do { ((char *)(p))[(i)/8] |= (1<<((i)%8)); } while(0)
+
+static void build_setup_frame_hash(u16 *setup_frm, struct net_device *dev)
+{
+       struct tulip_private *tp = (struct tulip_private *)dev->priv;
+       u16 hash_table[32];
+       struct dev_mc_list *mclist;
+       int i;
+       u16 *eaddrs;
+
+       memset(hash_table, 0, sizeof(hash_table));
+       set_bit_le(255, hash_table);                    /* Broadcast entry */
+       /* This should work on big-endian machines as well. */
+       for (i = 0, mclist = dev->mc_list; mclist && i < dev->mc_count;
+            i++, mclist = mclist->next) {
+               int index = ether_crc_le(ETH_ALEN, mclist->dmi_addr) & 0x1ff;
+
+               set_bit_le(index, hash_table);
+
+               for (i = 0; i < 32; i++) {
+                       *setup_frm++ = hash_table[i];
+                       *setup_frm++ = hash_table[i];
+               }
+               setup_frm = &tp->setup_frame[13*6];
+       }
+
+       /* Fill the final entry with our physical address. */
+       eaddrs = (u16 *)dev->dev_addr;
+       *setup_frm++ = eaddrs[0]; *setup_frm++ = eaddrs[0];
+       *setup_frm++ = eaddrs[1]; *setup_frm++ = eaddrs[1];
+       *setup_frm++ = eaddrs[2]; *setup_frm++ = eaddrs[2];
+}
+
+static void build_setup_frame_perfect(u16 *setup_frm, struct net_device *dev)
+{
+       struct tulip_private *tp = (struct tulip_private *)dev->priv;
+       struct dev_mc_list *mclist;
+       int i;
+       u16 *eaddrs;
+
+       /* We have <= 14 addresses so we can use the wonderful
+          16 address perfect filtering of the Tulip. */
+       for (i = 0, mclist = dev->mc_list; i < dev->mc_count;
+            i++, mclist = mclist->next) {
+               eaddrs = (u16 *)mclist->dmi_addr;
+               *setup_frm++ = *eaddrs; *setup_frm++ = *eaddrs++;
+               *setup_frm++ = *eaddrs; *setup_frm++ = *eaddrs++;
+               *setup_frm++ = *eaddrs; *setup_frm++ = *eaddrs++;
+       }
+       /* Fill the unused entries with the broadcast address. */
+       memset(setup_frm, 0xff, (15-i)*12);
+       setup_frm = &tp->setup_frame[15*6];
+
+       /* Fill the final entry with our physical address. */
+       eaddrs = (u16 *)dev->dev_addr;
+       *setup_frm++ = eaddrs[0]; *setup_frm++ = eaddrs[0];
+       *setup_frm++ = eaddrs[1]; *setup_frm++ = eaddrs[1];
+       *setup_frm++ = eaddrs[2]; *setup_frm++ = eaddrs[2];
+}
+
+
+static void set_rx_mode(struct net_device *dev)
+{
+       struct tulip_private *tp = (struct tulip_private *)dev->priv;
+       long ioaddr = dev->base_addr;
+       int csr6;
+
+       csr6 = inl(ioaddr + CSR6) & ~0x00D5;
+
+       tp->csr6 &= ~0x00D5;
+       if (dev->flags & IFF_PROMISC) {                 /* Set promiscuous. */
+               tp->csr6 |= AcceptAllMulticast | AcceptAllPhys;
+               csr6 |= AcceptAllMulticast | AcceptAllPhys;
+               /* Unconditionally log net taps. */
+               printk(KERN_INFO "%s: Promiscuous mode enabled.\n", dev->name);
+       } else if ((dev->mc_count > 1000)  ||  (dev->flags & IFF_ALLMULTI)) {
+               /* Too many to filter well -- accept all multicasts. */
+               tp->csr6 |= AcceptAllMulticast;
+               csr6 |= AcceptAllMulticast;
+       } else  if (tp->flags & MC_HASH_ONLY) {
+               /* Some work-alikes have only a 64-entry hash filter table. */
+               /* Should verify correctness on big-endian/__powerpc__ */
+               struct dev_mc_list *mclist;
+               int i;
+               if (dev->mc_count > 64) {               /* Arbitrary non-effective limit. */
+                       tp->csr6 |= AcceptAllMulticast;
+                       csr6 |= AcceptAllMulticast;
+               } else {
+                       u32 mc_filter[2] = {0, 0};               /* Multicast hash filter */
+                       int filterbit;
+                       for (i = 0, mclist = dev->mc_list; mclist && i < dev->mc_count;
+                                i++, mclist = mclist->next) {
+                               if (tp->flags & COMET_MAC_ADDR)
+                                       filterbit = ether_crc_le(ETH_ALEN, mclist->dmi_addr);
+                               else
+                                       filterbit = ether_crc(ETH_ALEN, mclist->dmi_addr) >> 26;
+                               filterbit &= 0x3f;
+                               mc_filter[filterbit >> 5] |= cpu_to_le32(1 << (filterbit & 31));
+                               if (tulip_debug > 2) {
+                                       printk(KERN_INFO "%s: Added filter for %2.2x:%2.2x:%2.2x:"
+                                                  "%2.2x:%2.2x:%2.2x  %8.8x bit %d.\n", dev->name,
+                                                  mclist->dmi_addr[0], mclist->dmi_addr[1],
+                                                  mclist->dmi_addr[2], mclist->dmi_addr[3],
+                                                  mclist->dmi_addr[4], mclist->dmi_addr[5],
+                                                  ether_crc(ETH_ALEN, mclist->dmi_addr), filterbit);
+                               }
+                       }
+                       if (mc_filter[0] == tp->mc_filter[0]  &&
+                               mc_filter[1] == tp->mc_filter[1])
+                               ;                               /* No change. */
+                       else if (tp->flags & IS_ASIX) {
+                               outl(2, ioaddr + CSR13);
+                               outl(mc_filter[0], ioaddr + CSR14);
+                               outl(3, ioaddr + CSR13);
+                               outl(mc_filter[1], ioaddr + CSR14);
+                       } else if (tp->flags & COMET_MAC_ADDR) {
+                               outl(mc_filter[0], ioaddr + 0xAC);
+                               outl(mc_filter[1], ioaddr + 0xB0);
+                       }
+                       tp->mc_filter[0] = mc_filter[0];
+                       tp->mc_filter[1] = mc_filter[1];
+               }
+       } else {
+               unsigned long flags;
+
+               /* Note that only the low-address shortword of setup_frame is valid!
+                  The values are doubled for big-endian architectures. */
+               if (dev->mc_count > 14) { /* Must use a multicast hash table. */
+                       build_setup_frame_hash(tp->setup_frame, dev);
+               } else {
+                       build_setup_frame_perfect(tp->setup_frame, dev);
+               }
+
+               spin_lock_irqsave(&tp->lock, flags);
+
+               if (tp->cur_tx - tp->dirty_tx > TX_RING_SIZE - 2) {
+                       /* Same setup recently queued, we need not add it. */
+               } else {
+                       u32 tx_flags = 0x08000000 | 192;
+                       unsigned int entry;
+                       int dummy = -1;
+
+                       /* Now add this frame to the Tx list. */
+
+                       entry = tp->cur_tx++ % TX_RING_SIZE;
+
+                       if (entry != 0) {
+                               /* Avoid a chip errata by prefixing a dummy entry. */
+                               tp->tx_buffers[entry].skb = NULL;
+                               tp->tx_buffers[entry].mapping = 0;
+                               tp->tx_ring[entry].length =
+                                       (entry == TX_RING_SIZE-1) ? cpu_to_le32(DESC_RING_WRAP) : 0;
+                               tp->tx_ring[entry].buffer1 = 0;
+                               /* Must set DescOwned later to avoid race with chip */
+                               dummy = entry;
+                               entry = tp->cur_tx++ % TX_RING_SIZE;
+                       }
+
+                       tp->tx_buffers[entry].skb = NULL;
+                       tp->tx_buffers[entry].mapping =
+                               pci_map_single(tp->pdev, tp->setup_frame,
+                                              sizeof(tp->setup_frame),
+                                              PCI_DMA_TODEVICE);
+                       /* Put the setup frame on the Tx list. */
+                       if (entry == TX_RING_SIZE-1)
+                               tx_flags |= DESC_RING_WRAP;             /* Wrap ring. */
+                       tp->tx_ring[entry].length = cpu_to_le32(tx_flags);
+                       tp->tx_ring[entry].buffer1 =
+                               cpu_to_le32(tp->tx_buffers[entry].mapping);
+                       tp->tx_ring[entry].status = cpu_to_le32(DescOwned);
+                       if (dummy >= 0)
+                               tp->tx_ring[dummy].status = cpu_to_le32(DescOwned);
+                       if (tp->cur_tx - tp->dirty_tx >= TX_RING_SIZE - 2)
+                               netif_stop_queue(dev);
+
+                       /* Trigger an immediate transmit demand. */
+                       outl(0, ioaddr + CSR1);
+               }
+
+               spin_unlock_irqrestore(&tp->lock, flags);
+       }
+
+       outl(csr6, ioaddr + CSR6);
+}
+
+#ifdef CONFIG_TULIP_MWI
+static void __devinit tulip_mwi_config (struct pci_dev *pdev,
+                                       struct net_device *dev)
+{
+       struct tulip_private *tp = dev->priv;
+       u8 cache;
+       u16 pci_command, new_command;
+       u32 csr0;
+
+       if (tulip_debug > 3)
+               printk(KERN_DEBUG "%s: tulip_mwi_config()\n", pdev->slot_name);
+
+       tp->csr0 = csr0 = 0;
+
+       /* check for sane cache line size. from acenic.c. */
+       pci_read_config_byte(pdev, PCI_CACHE_LINE_SIZE, &cache);
+       if ((cache << 2) != SMP_CACHE_BYTES) {
+               printk(KERN_WARNING "%s: PCI cache line size set incorrectly "
+                      "(%i bytes) by BIOS/FW, correcting to %i\n",
+                      pdev->slot_name, (cache << 2), SMP_CACHE_BYTES);
+               pci_write_config_byte(pdev, PCI_CACHE_LINE_SIZE,
+                                     SMP_CACHE_BYTES >> 2);
+               udelay(5);
+       }
+
+       /* read cache line size again, hardware may not have accepted
+        * our cache line size change
+        */
+       pci_read_config_byte(pdev, PCI_CACHE_LINE_SIZE, &cache);
+       if (!cache)
+               goto out;
+
+       /* if we have any cache line size at all, we can do MRM */
+       csr0 |= MRM;
+
+       /* ...and barring hardware bugs, MWI */
+       if (!(tp->chip_id == DC21143 && tp->revision == 65))
+               csr0 |= MWI;
+
+       /* set or disable MWI in the standard PCI command bit.
+        * Check for the case where  mwi is desired but not available
+        */
+       pci_read_config_word(pdev, PCI_COMMAND, &pci_command);
+       if (csr0 & MWI) new_command = pci_command | PCI_COMMAND_INVALIDATE;
+       else            new_command = pci_command & ~PCI_COMMAND_INVALIDATE;
+       if (new_command != pci_command) {
+               pci_write_config_word(pdev, PCI_COMMAND, new_command);
+               udelay(5);
+               pci_read_config_word(pdev, PCI_COMMAND, &pci_command);
+               if ((csr0 & MWI) && (!(pci_command & PCI_COMMAND_INVALIDATE)))
+                       csr0 &= ~MWI;
+       }
+
+       /* assign per-cacheline-size cache alignment and
+        * burst length values
+        */
+       switch (cache) {
+       case 8:
+               csr0 |= MRL | (1 << CALShift) | (16 << BurstLenShift);
+               break;
+       case 16:
+               csr0 |= MRL | (2 << CALShift) | (16 << BurstLenShift);
+               break;
+       case 32:
+               csr0 |= MRL | (3 << CALShift) | (32 << BurstLenShift);
+               break;
+       default:
+               goto out;
+       }
+
+       tp->csr0 = csr0;
+       goto out;
+
+       if (csr0 & MWI) {
+               pci_command &= ~PCI_COMMAND_INVALIDATE;
+               pci_write_config_word(pdev, PCI_COMMAND, pci_command);
+               csr0 &= ~MWI;
+       }
+       tp->csr0 = csr0 | (8 << BurstLenShift) | (1 << CALShift);
+
+out:
+       if (tulip_debug > 2)
+               printk(KERN_DEBUG "%s: MWI config cacheline=%d, csr0=%08x\n",
+                      pdev->slot_name, cache, csr0);
+}
+#endif
+
+static int __devinit tulip_init_one (struct pci_dev *pdev,
+                                    const struct pci_device_id *ent)
+{
+       struct tulip_private *tp;
+       /* See note below on the multiport cards. */
+       static unsigned char last_phys_addr[6] = {0x00, 'L', 'i', 'n', 'u', 'x'};
+       static int last_irq;
+       static int multiport_cnt;       /* For four-port boards w/one EEPROM */
+       u8 chip_rev;
+       int i, irq;
+       unsigned short sum;
+       u8 ee_data[EEPROM_SIZE];
+       struct net_device *dev;
+       long ioaddr;
+       static int board_idx = -1;
+       int chip_idx = ent->driver_data;
+       unsigned int t2104x_mode = 0;
+       unsigned int eeprom_missing = 0;
+       unsigned int force_csr0 = 0;
+
+#ifndef MODULE
+       static int did_version;         /* Already printed version info. */
+       if (tulip_debug > 0  &&  did_version++ == 0)
+               printk (KERN_INFO "%s", version);
+#endif
+
+       board_idx++;
+
+       /*
+        *      Lan media wire a tulip chip to a wan interface. Needs a very
+        *      different driver (lmc driver)
+        */
+
+        if (pdev->subsystem_vendor == PCI_VENDOR_ID_LMC) {
+               printk (KERN_ERR PFX "skipping LMC card.\n");
+               return -ENODEV;
+       }
+
+       /*
+        *      Early DM9100's need software CRC and the DMFE driver
+        */
+
+       if (pdev->vendor == 0x1282 && pdev->device == 0x9100)
+       {
+               u32 dev_rev;
+               /* Read Chip revision */
+               pci_read_config_dword(pdev, PCI_REVISION_ID, &dev_rev);
+               if(dev_rev < 0x02000030)
+               {
+                       printk(KERN_ERR PFX "skipping early DM9100 with Crc bug (use dmfe)\n");
+                       return -ENODEV;
+               }
+       }
+
+       /*
+        *      Looks for early PCI chipsets where people report hangs
+        *      without the workarounds being on.
+        */
+
+       /* Intel Saturn. Switch to 8 long words burst, 8 long word cache aligned
+          Aries might need this too. The Saturn errata are not pretty reading but
+          thankfully its an old 486 chipset.
+       */
+
+       if (pci_find_device(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82424, NULL)) {
+               csr0 = MRL | MRM | (8 << BurstLenShift) | (1 << CALShift);
+               force_csr0 = 1;
+       }
+       /* The dreaded SiS496 486 chipset. Same workaround as above. */
+       if (pci_find_device(PCI_VENDOR_ID_SI, PCI_DEVICE_ID_SI_496, NULL)) {
+               csr0 = MRL | MRM | (8 << BurstLenShift) | (1 << CALShift);
+               force_csr0 = 1;
+       }
+
+       /* bugfix: the ASIX must have a burst limit or horrible things happen. */
+       if (chip_idx == AX88140) {
+               if ((csr0 & 0x3f00) == 0)
+                       csr0 |= 0x2000;
+       }
+
+       /* PNIC doesn't have MWI/MRL/MRM... */
+       if (chip_idx == LC82C168)
+               csr0 &= ~0xfff10000; /* zero reserved bits 31:20, 16 */
+
+       /* DM9102A has troubles with MRM & clear reserved bits 24:22, 20, 16, 7:1 */
+       if (pdev->vendor == 0x1282 && pdev->device == 0x9102)
+               csr0 &= ~0x01f100ff;
+
+#if defined(__sparc__)
+        /* DM9102A needs 32-dword alignment/burst length on sparc - chip bug? */
+        if (pdev->vendor == 0x1282 && pdev->device == 0x9102)
+                csr0 = (csr0 & ~0xff00) | 0xe000;
+#endif
+
+       /*
+        *      And back to business
+        */
+
+       i = pci_enable_device(pdev);
+       if (i) {
+               printk (KERN_ERR PFX
+                       "Cannot enable tulip board #%d, aborting\n",
+                       board_idx);
+               return i;
+       }
+
+       ioaddr = pci_resource_start (pdev, 0);
+       irq = pdev->irq;
+
+       /* alloc_etherdev ensures aligned and zeroed private structures */
+       dev = alloc_etherdev (sizeof (*tp));
+       if (!dev) {
+               printk (KERN_ERR PFX "ether device alloc failed, aborting\n");
+               return -ENOMEM;
+       }
+
+       if (pci_resource_len (pdev, 0) < tulip_tbl[chip_idx].io_size) {
+               printk (KERN_ERR PFX "%s: I/O region (0x%lx@0x%lx) too small, "
+                       "aborting\n", pdev->slot_name,
+                       pci_resource_len (pdev, 0),
+                       pci_resource_start (pdev, 0));
+               goto err_out_free_netdev;
+       }
+
+       /* grab all resources from both PIO and MMIO regions, as we
+        * don't want anyone else messing around with our hardware */
+       if (pci_request_regions (pdev, "tulip"))
+               goto err_out_free_netdev;
+
+#ifndef USE_IO_OPS
+       ioaddr = (unsigned long) ioremap (pci_resource_start (pdev, 1),
+                                         tulip_tbl[chip_idx].io_size);
+       if (!ioaddr)
+               goto err_out_free_res;
+#endif
+
+       pci_read_config_byte (pdev, PCI_REVISION_ID, &chip_rev);
+
+       /*
+        * initialize private data structure 'tp'
+        * it is zeroed and aligned in alloc_etherdev
+        */
+       tp = dev->priv;
+
+       tp->rx_ring = pci_alloc_consistent(pdev,
+                                          sizeof(struct tulip_rx_desc) * RX_RING_SIZE +
+                                          sizeof(struct tulip_tx_desc) * TX_RING_SIZE,
+                                          &tp->rx_ring_dma);
+       if (!tp->rx_ring)
+               goto err_out_mtable;
+       tp->tx_ring = (struct tulip_tx_desc *)(tp->rx_ring + RX_RING_SIZE);
+       tp->tx_ring_dma = tp->rx_ring_dma + sizeof(struct tulip_rx_desc) * RX_RING_SIZE;
+
+       tp->chip_id = chip_idx;
+       tp->flags = tulip_tbl[chip_idx].flags;
+       tp->pdev = pdev;
+       tp->base_addr = ioaddr;
+       tp->revision = chip_rev;
+       tp->csr0 = csr0;
+       spin_lock_init(&tp->lock);
+       spin_lock_init(&tp->mii_lock);
+       init_timer(&tp->timer);
+       tp->timer.data = (unsigned long)dev;
+       tp->timer.function = tulip_tbl[tp->chip_id].media_timer;
+
+       dev->base_addr = ioaddr;
+       dev->irq = irq;
+
+#ifdef CONFIG_TULIP_MWI
+       if (!force_csr0 && (tp->flags & HAS_PCI_MWI))
+               tulip_mwi_config (pdev, dev);
+#else
+       /* MWI is broken for DC21143 rev 65... */
+       if (chip_idx == DC21143 && chip_rev == 65)
+               tp->csr0 &= ~MWI;
+#endif
+
+       /* Stop the chip's Tx and Rx processes. */
+       tulip_stop_rxtx(tp);
+
+       pci_set_master(pdev);
+
+       /* Clear the missed-packet counter. */
+       inl(ioaddr + CSR8);
+
+       if (chip_idx == DC21041) {
+               if (inl(ioaddr + CSR9) & 0x8000) {
+                       chip_idx = DC21040;
+                       t2104x_mode = 1;
+               } else {
+                       t2104x_mode = 2;
+               }
+       }
+
+       /* The station address ROM is read byte serially.  The register must
+          be polled, waiting for the value to be read bit serially from the
+          EEPROM.
+          */
+       sum = 0;
+       if (chip_idx == DC21040) {
+               outl(0, ioaddr + CSR9);         /* Reset the pointer with a dummy write. */
+               for (i = 0; i < 6; i++) {
+                       int value, boguscnt = 100000;
+                       do
+                               value = inl(ioaddr + CSR9);
+                       while (value < 0  && --boguscnt > 0);
+                       dev->dev_addr[i] = value;
+                       sum += value & 0xff;
+               }
+       } else if (chip_idx == LC82C168) {
+               for (i = 0; i < 3; i++) {
+                       int value, boguscnt = 100000;
+                       outl(0x600 | i, ioaddr + 0x98);
+                       do
+                               value = inl(ioaddr + CSR9);
+                       while (value < 0  && --boguscnt > 0);
+                       put_unaligned(le16_to_cpu(value), ((u16*)dev->dev_addr) + i);
+                       sum += value & 0xffff;
+               }
+       } else if (chip_idx == COMET) {
+               /* No need to read the EEPROM. */
+               put_unaligned(inl(ioaddr + 0xA4), (u32 *)dev->dev_addr);
+               put_unaligned(inl(ioaddr + 0xA8), (u16 *)(dev->dev_addr + 4));
+               for (i = 0; i < 6; i ++)
+                       sum += dev->dev_addr[i];
+       } else {
+               /* A serial EEPROM interface, we read now and sort it out later. */
+               int sa_offset = 0;
+               int ee_addr_size = tulip_read_eeprom(ioaddr, 0xff, 8) & 0x40000 ? 8 : 6;
+
+               for (i = 0; i < sizeof(ee_data)/2; i++)
+                       ((u16 *)ee_data)[i] =
+                               le16_to_cpu(tulip_read_eeprom(ioaddr, i, ee_addr_size));
+
+               /* DEC now has a specification (see Notes) but early board makers
+                  just put the address in the first EEPROM locations. */
+               /* This does  memcmp(eedata, eedata+16, 8) */
+               for (i = 0; i < 8; i ++)
+                       if (ee_data[i] != ee_data[16+i])
+                               sa_offset = 20;
+               if (ee_data[0] == 0xff  &&  ee_data[1] == 0xff &&  ee_data[2] == 0) {
+                       sa_offset = 2;          /* Grrr, damn Matrox boards. */
+                       multiport_cnt = 4;
+               }
+#ifdef CONFIG_DDB5476
+               if ((pdev->bus->number == 0) && (PCI_SLOT(pdev->devfn) == 6)) {
+                       /* DDB5476 MAC address in first EEPROM locations. */
+                       sa_offset = 0;
+                       /* No media table either */
+                       tp->flags &= ~HAS_MEDIA_TABLE;
+               }
+#endif
+#ifdef CONFIG_DDB5477
+               if ((pdev->bus->number == 0) && (PCI_SLOT(pdev->devfn) == 4)) {
+                       /* DDB5477 MAC address in first EEPROM locations. */
+                       sa_offset = 0;
+                       /* No media table either */
+                       tp->flags &= ~HAS_MEDIA_TABLE;
+               }
+#endif
+               for (i = 0; i < 6; i ++) {
+                       dev->dev_addr[i] = ee_data[i + sa_offset];
+                       sum += ee_data[i + sa_offset];
+               }
+       }
+       /* Lite-On boards have the address byte-swapped. */
+       if ((dev->dev_addr[0] == 0xA0  ||  dev->dev_addr[0] == 0xC0)
+               &&  dev->dev_addr[1] == 0x00)
+               for (i = 0; i < 6; i+=2) {
+                       char tmp = dev->dev_addr[i];
+                       dev->dev_addr[i] = dev->dev_addr[i+1];
+                       dev->dev_addr[i+1] = tmp;
+               }
+       /* On the Zynx 315 Etherarray and other multiport boards only the
+          first Tulip has an EEPROM.
+          On Sparc systems the mac address is held in the OBP property
+          "local-mac-address".
+          The addresses of the subsequent ports are derived from the first.
+          Many PCI BIOSes also incorrectly report the IRQ line, so we correct
+          that here as well. */
+       if (sum == 0  || sum == 6*0xff) {
+#if defined(__sparc__)
+               struct pcidev_cookie *pcp = pdev->sysdata;
+#endif
+               eeprom_missing = 1;
+               for (i = 0; i < 5; i++)
+                       dev->dev_addr[i] = last_phys_addr[i];
+               dev->dev_addr[i] = last_phys_addr[i] + 1;
+#if defined(__sparc__)
+               if ((pcp != NULL) && prom_getproplen(pcp->prom_node,
+                       "local-mac-address") == 6) {
+                       prom_getproperty(pcp->prom_node, "local-mac-address",
+                           dev->dev_addr, 6);
+               }
+#endif
+#if defined(__i386__)          /* Patch up x86 BIOS bug. */
+               if (last_irq)
+                       irq = last_irq;
+#endif
+       }
+
+       for (i = 0; i < 6; i++)
+               last_phys_addr[i] = dev->dev_addr[i];
+       last_irq = irq;
+
+       /* The lower four bits are the media type. */
+       if (board_idx >= 0  &&  board_idx < MAX_UNITS) {
+               if (options[board_idx] & MEDIA_MASK)
+                       tp->default_port = options[board_idx] & MEDIA_MASK;
+               if ((options[board_idx] & FullDuplex) || full_duplex[board_idx] > 0)
+                       tp->full_duplex = 1;
+               if (mtu[board_idx] > 0)
+                       dev->mtu = mtu[board_idx];
+       }
+       if (dev->mem_start & MEDIA_MASK)
+               tp->default_port = dev->mem_start & MEDIA_MASK;
+       if (tp->default_port) {
+               printk(KERN_INFO "tulip%d: Transceiver selection forced to %s.\n",
+                      board_idx, medianame[tp->default_port & MEDIA_MASK]);
+               tp->medialock = 1;
+               if (tulip_media_cap[tp->default_port] & MediaAlwaysFD)
+                       tp->full_duplex = 1;
+       }
+       if (tp->full_duplex)
+               tp->full_duplex_lock = 1;
+
+       if (tulip_media_cap[tp->default_port] & MediaIsMII) {
+               u16 media2advert[] = { 0x20, 0x40, 0x03e0, 0x60, 0x80, 0x100, 0x200 };
+               tp->mii_advertise = media2advert[tp->default_port - 9];
+               tp->mii_advertise |= (tp->flags & HAS_8023X); /* Matching bits! */
+       }
+
+       if (tp->flags & HAS_MEDIA_TABLE) {
+               memcpy(tp->eeprom, ee_data, sizeof(tp->eeprom));
+
+               sprintf(dev->name, "tulip%d", board_idx);       /* hack */
+               tulip_parse_eeprom(dev);
+               strcpy(dev->name, "eth%d");                     /* un-hack */
+       }
+
+       if ((tp->flags & ALWAYS_CHECK_MII) ||
+               (tp->mtable  &&  tp->mtable->has_mii) ||
+               ( ! tp->mtable  &&  (tp->flags & HAS_MII))) {
+               if (tp->mtable  &&  tp->mtable->has_mii) {
+                       for (i = 0; i < tp->mtable->leafcount; i++)
+                               if (tp->mtable->mleaf[i].media == 11) {
+                                       tp->cur_index = i;
+                                       tp->saved_if_port = dev->if_port;
+                                       tulip_select_media(dev, 2);
+                                       dev->if_port = tp->saved_if_port;
+                                       break;
+                               }
+               }
+
+               /* Find the connected MII xcvrs.
+                  Doing this in open() would allow detecting external xcvrs
+                  later, but takes much time. */
+               tulip_find_mii (dev, board_idx);
+       }
+
+       /* The Tulip-specific entries in the device structure. */
+       dev->open = tulip_open;
+       dev->hard_start_xmit = tulip_start_xmit;
+       dev->tx_timeout = tulip_tx_timeout;
+       dev->watchdog_timeo = TX_TIMEOUT;
+       dev->stop = tulip_close;
+       dev->get_stats = tulip_get_stats;
+       dev->do_ioctl = private_ioctl;
+       dev->set_multicast_list = set_rx_mode;
+
+       if (register_netdev(dev))
+               goto err_out_free_ring;
+
+       printk(KERN_INFO "%s: %s rev %d at %#3lx,",
+              dev->name, tulip_tbl[chip_idx].chip_name, chip_rev, ioaddr);
+       pci_set_drvdata(pdev, dev);
+
+       if (t2104x_mode == 1)
+               printk(" 21040 compatible mode,");
+       else if (t2104x_mode == 2)
+               printk(" 21041 mode,");
+       if (eeprom_missing)
+               printk(" EEPROM not present,");
+       for (i = 0; i < 6; i++)
+               printk("%c%2.2X", i ? ':' : ' ', dev->dev_addr[i]);
+       printk(", IRQ %d.\n", irq);
+
+        if (tp->chip_id == PNIC2)
+               tp->link_change = pnic2_lnk_change;
+       else if ((tp->flags & HAS_NWAY)  || tp->chip_id == DC21041)
+               tp->link_change = t21142_lnk_change;
+       else if (tp->flags & HAS_PNICNWAY)
+               tp->link_change = pnic_lnk_change;
+
+       /* Reset the xcvr interface and turn on heartbeat. */
+       switch (chip_idx) {
+       case DC21041:
+               if (tp->sym_advertise == 0)
+                       tp->sym_advertise = 0x0061;
+               outl(0x00000000, ioaddr + CSR13);
+               outl(0xFFFFFFFF, ioaddr + CSR14);
+               outl(0x00000008, ioaddr + CSR15); /* Listen on AUI also. */
+               outl(inl(ioaddr + CSR6) | csr6_fd, ioaddr + CSR6);
+               outl(0x0000EF01, ioaddr + CSR13);
+               break;
+       case DC21040:
+               outl(0x00000000, ioaddr + CSR13);
+               outl(0x00000004, ioaddr + CSR13);
+               break;
+       case DC21140:
+       case DM910X:
+       default:
+               if (tp->mtable)
+                       outl(tp->mtable->csr12dir | 0x100, ioaddr + CSR12);
+               break;
+       case DC21142:
+               if (tp->mii_cnt  ||  tulip_media_cap[dev->if_port] & MediaIsMII) {
+                       outl(csr6_mask_defstate, ioaddr + CSR6);
+                       outl(0x0000, ioaddr + CSR13);
+                       outl(0x0000, ioaddr + CSR14);
+                       outl(csr6_mask_hdcap, ioaddr + CSR6);
+               } else
+                       t21142_start_nway(dev);
+               break;
+       case PNIC2:
+               /* just do a reset for sanity sake */
+               outl(0x0000, ioaddr + CSR13);
+               outl(0x0000, ioaddr + CSR14);
+               break;
+       case LC82C168:
+               if ( ! tp->mii_cnt) {
+                       tp->nway = 1;
+                       tp->nwayset = 0;
+                       outl(csr6_ttm | csr6_ca, ioaddr + CSR6);
+                       outl(0x30, ioaddr + CSR12);
+                       outl(0x0001F078, ioaddr + CSR6);
+                       outl(0x0201F078, ioaddr + CSR6); /* Turn on autonegotiation. */
+               }
+               break;
+       case MX98713:
+       case COMPEX9881:
+               outl(0x00000000, ioaddr + CSR6);
+               outl(0x000711C0, ioaddr + CSR14); /* Turn on NWay. */
+               outl(0x00000001, ioaddr + CSR13);
+               break;
+       case MX98715:
+       case MX98725:
+               outl(0x01a80000, ioaddr + CSR6);
+               outl(0xFFFFFFFF, ioaddr + CSR14);
+               outl(0x00001000, ioaddr + CSR12);
+               break;
+       case COMET:
+               /* No initialization necessary. */
+               break;
+       }
+
+       /* put the chip in snooze mode until opened */
+       tulip_set_power_state (tp, 0, 1);
+
+       return 0;
+
+err_out_free_ring:
+       pci_free_consistent (pdev,
+                            sizeof (struct tulip_rx_desc) * RX_RING_SIZE +
+                            sizeof (struct tulip_tx_desc) * TX_RING_SIZE,
+                            tp->rx_ring, tp->rx_ring_dma);
+
+err_out_mtable:
+       if (tp->mtable)
+               kfree (tp->mtable);
+#ifndef USE_IO_OPS
+       iounmap((void *)ioaddr);
+
+err_out_free_res:
+#endif
+       pci_release_regions (pdev);
+
+err_out_free_netdev:
+       kfree (dev);
+       return -ENODEV;
+}
+
+
+#ifdef CONFIG_PM
+
+static int tulip_suspend (struct pci_dev *pdev, u32 state)
+{
+       struct net_device *dev = pci_get_drvdata(pdev);
+
+       if (dev && netif_running (dev) && netif_device_present (dev)) {
+               netif_device_detach (dev);
+               tulip_down (dev);
+               /* pci_power_off(pdev, -1); */
+       }
+       return 0;
+}
+
+
+static int tulip_resume(struct pci_dev *pdev)
+{
+       struct net_device *dev = pci_get_drvdata(pdev);
+
+       if (dev && netif_running (dev) && !netif_device_present (dev)) {
+#if 1
+               pci_enable_device (pdev);
+#endif
+               /* pci_power_on(pdev); */
+               tulip_up (dev);
+               netif_device_attach (dev);
+       }
+       return 0;
+}
+
+#endif /* CONFIG_PM */
+
+
+static void __devexit tulip_remove_one (struct pci_dev *pdev)
+{
+       struct net_device *dev = pci_get_drvdata (pdev);
+       struct tulip_private *tp;
+
+       if (!dev)
+               return;
+
+       tp = dev->priv;
+       pci_free_consistent (pdev,
+                            sizeof (struct tulip_rx_desc) * RX_RING_SIZE +
+                            sizeof (struct tulip_tx_desc) * TX_RING_SIZE,
+                            tp->rx_ring, tp->rx_ring_dma);
+       unregister_netdev (dev);
+       if (tp->mtable)
+               kfree (tp->mtable);
+#ifndef USE_IO_OPS
+       iounmap((void *)dev->base_addr);
+#endif
+       kfree (dev);
+       pci_release_regions (pdev);
+       pci_set_drvdata (pdev, NULL);
+
+       /* pci_power_off (pdev, -1); */
+}
+
+
+static struct pci_driver tulip_driver = {
+       name:           DRV_NAME,
+       id_table:       tulip_pci_tbl,
+       probe:          tulip_init_one,
+       remove:         __devexit_p(tulip_remove_one),
+#ifdef CONFIG_PM
+       suspend:        tulip_suspend,
+       resume:         tulip_resume,
+#endif /* CONFIG_PM */
+};
+
+
+static int __init tulip_init (void)
+{
+#ifdef MODULE
+       printk (KERN_INFO "%s", version);
+#endif
+
+       /* copy module parms into globals */
+       tulip_rx_copybreak = rx_copybreak;
+       tulip_max_interrupt_work = max_interrupt_work;
+
+       /* probe for and init boards */
+       return pci_module_init (&tulip_driver);
+}
+
+
+static void __exit tulip_cleanup (void)
+{
+       pci_unregister_driver (&tulip_driver);
+}
+
+
+module_init(tulip_init);
+module_exit(tulip_cleanup);
diff --git a/xen-2.4.16/drivers/pci/Makefile b/xen-2.4.16/drivers/pci/Makefile
new file mode 100644 (file)
index 0000000..36972ab
--- /dev/null
@@ -0,0 +1,43 @@
+#
+# Makefile for the PCI bus specific drivers.
+#
+
+include $(BASEDIR)/Rules.mk
+
+OBJS := pci.o quirks.o compat.o names.o setup-res.o
+
+#obj-$(CONFIG_PCI) += pci.o quirks.o compat.o names.o
+#obj-$(CONFIG_PROC_FS) += proc.o
+
+#ifndef CONFIG_SPARC64
+#obj-$(CONFIG_PCI) += setup-res.o
+#endif
+
+#
+# Some architectures use the generic PCI setup functions
+#
+#obj-$(CONFIG_ALPHA) += setup-bus.o setup-irq.o
+#obj-$(CONFIG_ARM) += setup-bus.o setup-irq.o
+#obj-$(CONFIG_PARISC) += setup-bus.o
+#obj-$(CONFIG_SUPERH) += setup-bus.o setup-irq.o
+#obj-$(CONFIG_ALL_PPC) += setup-bus.o
+#obj-$(CONFIG_DDB5476) += setup-bus.o
+#obj-$(CONFIG_SGI_IP27) += setup-irq.o
+
+#ifndef CONFIG_X86
+#obj-y += syscall.o
+#endif
+
+default: $(OBJS)
+       $(LD) -r -o driver.o $(OBJS)
+
+clean:
+       rm -f *.o *~ core gen-devlist classlist.h devlist.h
+
+names.o: names.c devlist.h classlist.h
+
+devlist.h classlist.h: pci.ids gen-devlist
+       ./gen-devlist <pci.ids
+
+gen-devlist: gen-devlist.c
+       $(CC) $(CFLAGS) -o gen-devlist gen-devlist.c
diff --git a/xen-2.4.16/drivers/pci/compat.c b/xen-2.4.16/drivers/pci/compat.c
new file mode 100644 (file)
index 0000000..e035f86
--- /dev/null
@@ -0,0 +1,65 @@
+/*
+ *     $Id: compat.c,v 1.1 1998/02/16 10:35:50 mj Exp $
+ *
+ *     PCI Bus Services -- Function For Backward Compatibility
+ *
+ *     Copyright 1998--2000 Martin Mares <mj@ucw.cz>
+ */
+
+#include <linux/types.h>
+//#include <linux/kernel.h>
+#include <linux/pci.h>
+
+int
+pcibios_present(void)
+{
+       return !list_empty(&pci_devices);
+}
+
+int
+pcibios_find_class(unsigned int class, unsigned short index, unsigned char *bus, unsigned char *devfn)
+{
+       const struct pci_dev *dev = NULL;
+       int cnt = 0;
+
+       while ((dev = pci_find_class(class, dev)))
+               if (index == cnt++) {
+                       *bus = dev->bus->number;
+                       *devfn = dev->devfn;
+                       return PCIBIOS_SUCCESSFUL;
+               }
+       return PCIBIOS_DEVICE_NOT_FOUND;
+}
+
+
+int
+pcibios_find_device(unsigned short vendor, unsigned short device, unsigned short index,
+                   unsigned char *bus, unsigned char *devfn)
+{
+       const struct pci_dev *dev = NULL;
+       int cnt = 0;
+
+       while ((dev = pci_find_device(vendor, device, dev)))
+               if (index == cnt++) {
+                       *bus = dev->bus->number;
+                       *devfn = dev->devfn;
+                       return PCIBIOS_SUCCESSFUL;
+               }
+       return PCIBIOS_DEVICE_NOT_FOUND;
+}
+
+#define PCI_OP(rw,size,type)                                                   \
+int pcibios_##rw##_config_##size (unsigned char bus, unsigned char dev_fn,     \
+                                 unsigned char where, unsigned type val)       \
+{                                                                              \
+       struct pci_dev *dev = pci_find_slot(bus, dev_fn);                       \
+       if (!dev) return PCIBIOS_DEVICE_NOT_FOUND;                              \
+       return pci_##rw##_config_##size(dev, where, val);                       \
+}
+
+PCI_OP(read, byte, char *)
+PCI_OP(read, word, short *)
+PCI_OP(read, dword, int *)
+PCI_OP(write, byte, char)
+PCI_OP(write, word, short)
+PCI_OP(write, dword, int)
diff --git a/xen-2.4.16/drivers/pci/gen-devlist.c b/xen-2.4.16/drivers/pci/gen-devlist.c
new file mode 100644 (file)
index 0000000..7c71808
--- /dev/null
@@ -0,0 +1,125 @@
+/*
+ *     Generate devlist.h and classlist.h from the PCI ID file.
+ *
+ *     (c) 1999--2000 Martin Mares <mj@ucw.cz>
+ */
+
+#include <stdio.h>
+#include <string.h>
+
+#define MAX_NAME_SIZE 79
+
+static void
+pq(FILE *f, const char *c)
+{
+       while (*c) {
+               if (*c == '"')
+                       fprintf(f, "\\\"");
+               else
+                       fputc(*c, f);
+               c++;
+       }
+}
+
+int
+main(void)
+{
+       char line[1024], *c, *bra, vend[8];
+       int vendors = 0;
+       int mode = 0;
+       int lino = 0;
+       int vendor_len = 0;
+       FILE *devf, *clsf;
+
+       devf = fopen("devlist.h", "w");
+       clsf = fopen("classlist.h", "w");
+       if (!devf || !clsf) {
+               fprintf(stderr, "Cannot create output file!\n");
+               return 1;
+       }
+
+       while (fgets(line, sizeof(line)-1, stdin)) {
+               lino++;
+               if ((c = strchr(line, '\n')))
+                       *c = 0;
+               if (!line[0] || line[0] == '#')
+                       continue;
+               if (line[1] == ' ') {
+                       if (line[0] == 'C' && strlen(line) > 4 && line[4] == ' ') {
+                               vend[0] = line[2];
+                               vend[1] = line[3];
+                               vend[2] = 0;
+                               mode = 2;
+                       } else goto err;
+               }
+               else if (line[0] == '\t') {
+                       if (line[1] == '\t')
+                               continue;
+                       switch (mode) {
+                       case 1:
+                               if (strlen(line) > 5 && line[5] == ' ') {
+                                       c = line + 5;
+                                       while (*c == ' ')
+                                               *c++ = 0;
+                                       if (vendor_len + strlen(c) + 1 > MAX_NAME_SIZE) {
+                                               /* Too long, try cutting off long description */
+                                               bra = strchr(c, '[');
+                                               if (bra && bra > c && bra[-1] == ' ')
+                                                       bra[-1] = 0;
+                                               if (vendor_len + strlen(c) + 1 > MAX_NAME_SIZE) {
+                                                       fprintf(stderr, "Line %d: Device name too long\n", lino);
+                                                       fprintf(stderr, "%s\n", c);
+                                                       return 1;
+                                               }
+                                       }
+                                       fprintf(devf, "\tDEVICE(%s,%s,\"", vend, line+1);
+                                       pq(devf, c);
+                                       fputs("\")\n", devf);
+                               } else goto err;
+                               break;
+                       case 2:
+                               if (strlen(line) > 3 && line[3] == ' ') {
+                                       c = line + 3;
+                                       while (*c == ' ')
+                                               *c++ = 0;
+                                       fprintf(clsf, "CLASS(%s%s, \"%s\")\n", vend, line+1, c);
+                               } else goto err;
+                               break;
+                       default:
+                               goto err;
+                       }
+               } else if (strlen(line) > 4 && line[4] == ' ') {
+                       c = line + 4;
+                       while (*c == ' ')
+                               *c++ = 0;
+                       if (vendors)
+                               fputs("ENDVENDOR()\n\n", devf);
+                       vendors++;
+                       strcpy(vend, line);
+                       vendor_len = strlen(c);
+                       if (vendor_len + 24 > MAX_NAME_SIZE) {
+                               fprintf(stderr, "Line %d: Vendor name too long\n", lino);
+                               return 1;
+                       }
+                       fprintf(devf, "VENDOR(%s,\"", vend);
+                       pq(devf, c);
+                       fputs("\")\n", devf);
+                       mode = 1;
+               } else {
+               err:
+                       fprintf(stderr, "Line %d: Syntax error in mode %d: %s\n", lino, mode, line);
+                       return 1;
+               }
+       }
+       fputs("ENDVENDOR()\n\
+\n\
+#undef VENDOR\n\
+#undef DEVICE\n\
+#undef ENDVENDOR\n", devf);
+       fputs("\n#undef CLASS\n", clsf);
+
+       fclose(devf);
+       fclose(clsf);
+
+       return 0;
+}
diff --git a/xen-2.4.16/drivers/pci/names.c b/xen-2.4.16/drivers/pci/names.c
new file mode 100644 (file)
index 0000000..90bb37e
--- /dev/null
@@ -0,0 +1,135 @@
+/*
+ *     PCI Class and Device Name Tables
+ *
+ *     Copyright 1993--1999 Drew Eckhardt, Frederic Potter,
+ *     David Mosberger-Tang, Martin Mares
+ */
+
+#include <linux/config.h>
+#include <linux/types.h>
+//#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/init.h>
+
+#ifdef CONFIG_PCI_NAMES
+
+struct pci_device_info {
+       unsigned short device;
+       unsigned short seen;
+       const char *name;
+};
+
+struct pci_vendor_info {
+       unsigned short vendor;
+       unsigned short nr;
+       const char *name;
+       struct pci_device_info *devices;
+};
+
+/*
+ * This is ridiculous, but we want the strings in
+ * the .init section so that they don't take up
+ * real memory.. Parse the same file multiple times
+ * to get all the info.
+ */
+#define VENDOR( vendor, name )         static char __vendorstr_##vendor[] __initdata = name;
+#define ENDVENDOR()
+#define DEVICE( vendor, device, name )         static char __devicestr_##vendor##device[] __initdata = name;
+#include "devlist.h"
+
+
+#define VENDOR( vendor, name )         static struct pci_device_info __devices_##vendor[] __initdata = {
+#define ENDVENDOR()                    };
+#define DEVICE( vendor, device, name ) { 0x##device, 0, __devicestr_##vendor##device },
+#include "devlist.h"
+
+static struct pci_vendor_info __initdata pci_vendor_list[] = {
+#define VENDOR( vendor, name )         { 0x##vendor, sizeof(__devices_##vendor) / sizeof(struct pci_device_info), __vendorstr_##vendor, __devices_##vendor },
+#define ENDVENDOR()
+#define DEVICE( vendor, device, name )
+#include "devlist.h"
+};
+
+#define VENDORS (sizeof(pci_vendor_list)/sizeof(struct pci_vendor_info))
+
+void __devinit pci_name_device(struct pci_dev *dev)
+{
+       const struct pci_vendor_info *vendor_p = pci_vendor_list;
+       int i = VENDORS;
+       char *name = dev->name;
+
+       do {
+               if (vendor_p->vendor == dev->vendor)
+                       goto match_vendor;
+               vendor_p++;
+       } while (--i);
+
+       /* Couldn't find either the vendor nor the device */
+       sprintf(name, "PCI device %04x:%04x", dev->vendor, dev->device);
+       return;
+
+       match_vendor: {
+               struct pci_device_info *device_p = vendor_p->devices;
+               int i = vendor_p->nr;
+
+               while (i > 0) {
+                       if (device_p->device == dev->device)
+                               goto match_device;
+                       device_p++;
+                       i--;
+               }
+
+               /* Ok, found the vendor, but unknown device */
+               sprintf(name, "PCI device %04x:%04x (%s)", dev->vendor, dev->device, vendor_p->name);
+               return;
+
+               /* Full match */
+               match_device: {
+                       char *n = name + sprintf(name, "%s %s", vendor_p->name, device_p->name);
+                       int nr = device_p->seen + 1;
+                       device_p->seen = nr;
+                       if (nr > 1)
+                               sprintf(n, " (#%d)", nr);
+               }
+       }
+}
+
+/*
+ *  Class names. Not in .init section as they are needed in runtime.
+ */
+
+static u16 pci_class_numbers[] = {
+#define CLASS(x,y) 0x##x,
+#include "classlist.h"
+};
+
+static char *pci_class_names[] = {
+#define CLASS(x,y) y,
+#include "classlist.h"
+};
+
+char *
+pci_class_name(u32 class)
+{
+       int i;
+
+       for(i=0; i<sizeof(pci_class_numbers)/sizeof(pci_class_numbers[0]); i++)
+               if (pci_class_numbers[i] == class)
+                       return pci_class_names[i];
+       return NULL;
+}
+
+#else
+
+void __init pci_name_device(struct pci_dev *dev)
+{
+}
+
+char *
+pci_class_name(u32 class)
+{
+       return NULL;
+}
+
+#endif /* CONFIG_PCI_NAMES */
+
diff --git a/xen-2.4.16/drivers/pci/pci.c b/xen-2.4.16/drivers/pci/pci.c
new file mode 100644 (file)
index 0000000..3c09f98
--- /dev/null
@@ -0,0 +1,2040 @@
+/*
+ *     $Id: pci.c,v 1.91 1999/01/21 13:34:01 davem Exp $
+ *
+ *     PCI Bus Services, see include/linux/pci.h for further explanation.
+ *
+ *     Copyright 1993 -- 1997 Drew Eckhardt, Frederic Potter,
+ *     David Mosberger-Tang
+ *
+ *     Copyright 1997 -- 2000 Martin Mares <mj@ucw.cz>
+ */
+
+#include <linux/config.h>
+#include <linux/sched.h>
+#include <linux/module.h>
+#include <linux/types.h>
+//#include <linux/kernel.h>
+#include <linux/pci.h>
+//#include <linux/string.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/ioport.h>
+#include <linux/spinlock.h>
+//#include <linux/pm.h>
+//#include <linux/kmod.h>              /* for hotplug_path */
+//#include <linux/bitops.h>
+#include <linux/delay.h>
+
+#include <asm/page.h>
+//#include <asm/dma.h> /* isa_dma_bridge_buggy */
+
+#undef DEBUG
+
+#ifdef DEBUG
+#define DBG(x...) printk(x)
+#else
+#define DBG(x...)
+#endif
+
+LIST_HEAD(pci_root_buses);
+LIST_HEAD(pci_devices);
+
+/**
+ * pci_find_slot - locate PCI device from a given PCI slot
+ * @bus: number of PCI bus on which desired PCI device resides
+ * @devfn: encodes number of PCI slot in which the desired PCI 
+ * device resides and the logical device number within that slot 
+ * in case of multi-function devices.
+ *
+ * Given a PCI bus and slot/function number, the desired PCI device 
+ * is located in system global list of PCI devices.  If the device
+ * is found, a pointer to its data structure is returned.  If no 
+ * device is found, %NULL is returned.
+ */
+struct pci_dev *
+pci_find_slot(unsigned int bus, unsigned int devfn)
+{
+       struct pci_dev *dev;
+
+       pci_for_each_dev(dev) {
+               if (dev->bus->number == bus && dev->devfn == devfn)
+                       return dev;
+       }
+       return NULL;
+}
+
+/**
+ * pci_find_subsys - begin or continue searching for a PCI device by vendor/subvendor/device/subdevice id
+ * @vendor: PCI vendor id to match, or %PCI_ANY_ID to match all vendor ids
+ * @device: PCI device id to match, or %PCI_ANY_ID to match all device ids
+ * @ss_vendor: PCI subsystem vendor id to match, or %PCI_ANY_ID to match all vendor ids
+ * @ss_device: PCI subsystem device id to match, or %PCI_ANY_ID to match all device ids
+ * @from: Previous PCI device found in search, or %NULL for new search.
+ *
+ * Iterates through the list of known PCI devices.  If a PCI device is
+ * found with a matching @vendor, @device, @ss_vendor and @ss_device, a pointer to its
+ * device structure is returned.  Otherwise, %NULL is returned.
+ * A new search is initiated by passing %NULL to the @from argument.
+ * Otherwise if @from is not %NULL, searches continue from next device on the global list.
+ */
+struct pci_dev *
+pci_find_subsys(unsigned int vendor, unsigned int device,
+               unsigned int ss_vendor, unsigned int ss_device,
+               const struct pci_dev *from)
+{
+       struct list_head *n = from ? from->global_list.next : pci_devices.next;
+
+       while (n != &pci_devices) {
+               struct pci_dev *dev = pci_dev_g(n);
+               if ((vendor == PCI_ANY_ID || dev->vendor == vendor) &&
+                   (device == PCI_ANY_ID || dev->device == device) &&
+                   (ss_vendor == PCI_ANY_ID || dev->subsystem_vendor == ss_vendor) &&
+                   (ss_device == PCI_ANY_ID || dev->subsystem_device == ss_device))
+                       return dev;
+               n = n->next;
+       }
+       return NULL;
+}
+
+
+/**
+ * pci_find_device - begin or continue searching for a PCI device by vendor/device id
+ * @vendor: PCI vendor id to match, or %PCI_ANY_ID to match all vendor ids
+ * @device: PCI device id to match, or %PCI_ANY_ID to match all device ids
+ * @from: Previous PCI device found in search, or %NULL for new search.
+ *
+ * Iterates through the list of known PCI devices.  If a PCI device is
+ * found with a matching @vendor and @device, a pointer to its device structure is
+ * returned.  Otherwise, %NULL is returned.
+ * A new search is initiated by passing %NULL to the @from argument.
+ * Otherwise if @from is not %NULL, searches continue from next device on the global list.
+ */
+struct pci_dev *
+pci_find_device(unsigned int vendor, unsigned int device, const struct pci_dev *from)
+{
+       return pci_find_subsys(vendor, device, PCI_ANY_ID, PCI_ANY_ID, from);
+}
+
+
+/**
+ * pci_find_class - begin or continue searching for a PCI device by class
+ * @class: search for a PCI device with this class designation
+ * @from: Previous PCI device found in search, or %NULL for new search.
+ *
+ * Iterates through the list of known PCI devices.  If a PCI device is
+ * found with a matching @class, a pointer to its device structure is
+ * returned.  Otherwise, %NULL is returned.
+ * A new search is initiated by passing %NULL to the @from argument.
+ * Otherwise if @from is not %NULL, searches continue from next device
+ * on the global list.
+ */
+struct pci_dev *
+pci_find_class(unsigned int class, const struct pci_dev *from)
+{
+       struct list_head *n = from ? from->global_list.next : pci_devices.next;
+
+       while (n != &pci_devices) {
+               struct pci_dev *dev = pci_dev_g(n);
+               if (dev->class == class)
+                       return dev;
+               n = n->next;
+       }
+       return NULL;
+}
+
+/**
+ * pci_find_capability - query for devices' capabilities 
+ * @dev: PCI device to query
+ * @cap: capability code
+ *
+ * Tell if a device supports a given PCI capability.
+ * Returns the address of the requested capability structure within the
+ * device's PCI configuration space or 0 in case the device does not
+ * support it.  Possible values for @cap:
+ *
+ *  %PCI_CAP_ID_PM           Power Management 
+ *
+ *  %PCI_CAP_ID_AGP          Accelerated Graphics Port 
+ *
+ *  %PCI_CAP_ID_VPD          Vital Product Data 
+ *
+ *  %PCI_CAP_ID_SLOTID       Slot Identification 
+ *
+ *  %PCI_CAP_ID_MSI          Message Signalled Interrupts
+ *
+ *  %PCI_CAP_ID_CHSWP        CompactPCI HotSwap 
+ */
+int
+pci_find_capability(struct pci_dev *dev, int cap)
+{
+       u16 status;
+       u8 pos, id;
+       int ttl = 48;
+
+       pci_read_config_word(dev, PCI_STATUS, &status);
+       if (!(status & PCI_STATUS_CAP_LIST))
+               return 0;
+       switch (dev->hdr_type) {
+       case PCI_HEADER_TYPE_NORMAL:
+       case PCI_HEADER_TYPE_BRIDGE:
+               pci_read_config_byte(dev, PCI_CAPABILITY_LIST, &pos);
+               break;
+       case PCI_HEADER_TYPE_CARDBUS:
+               pci_read_config_byte(dev, PCI_CB_CAPABILITY_LIST, &pos);
+               break;
+       default:
+               return 0;
+       }
+       while (ttl-- && pos >= 0x40) {
+               pos &= ~3;
+               pci_read_config_byte(dev, pos + PCI_CAP_LIST_ID, &id);
+               if (id == 0xff)
+                       break;
+               if (id == cap)
+                       return pos;
+               pci_read_config_byte(dev, pos + PCI_CAP_LIST_NEXT, &pos);
+       }
+       return 0;
+}
+
+
+/**
+ * pci_find_parent_resource - return resource region of parent bus of given region
+ * @dev: PCI device structure contains resources to be searched
+ * @res: child resource record for which parent is sought
+ *
+ *  For given resource region of given device, return the resource
+ *  region of parent bus the given region is contained in or where
+ *  it should be allocated from.
+ */
+struct resource *
+pci_find_parent_resource(const struct pci_dev *dev, struct resource *res)
+{
+       const struct pci_bus *bus = dev->bus;
+       int i;
+       struct resource *best = NULL;
+
+       for(i=0; i<4; i++) {
+               struct resource *r = bus->resource[i];
+               if (!r)
+                       continue;
+               if (res->start && !(res->start >= r->start && res->end <= r->end))
+                       continue;       /* Not contained */
+               if ((res->flags ^ r->flags) & (IORESOURCE_IO | IORESOURCE_MEM))
+                       continue;       /* Wrong type */
+               if (!((res->flags ^ r->flags) & IORESOURCE_PREFETCH))
+                       return r;       /* Exact match */
+               if ((res->flags & IORESOURCE_PREFETCH) && !(r->flags & IORESOURCE_PREFETCH))
+                       best = r;       /* Approximating prefetchable by non-prefetchable */
+       }
+       return best;
+}
+
+/**
+ * pci_set_power_state - Set the power state of a PCI device
+ * @dev: PCI device to be suspended
+ * @state: Power state we're entering
+ *
+ * Transition a device to a new power state, using the Power Management 
+ * Capabilities in the device's config space.
+ *
+ * RETURN VALUE: 
+ * -EINVAL if trying to enter a lower state than we're already in.
+ * 0 if we're already in the requested state.
+ * -EIO if device does not support PCI PM.
+ * 0 if we can successfully change the power state.
+ */
+
+int
+pci_set_power_state(struct pci_dev *dev, int state)
+{
+       int pm;
+       u16 pmcsr;
+
+       /* bound the state we're entering */
+       if (state > 3) state = 3;
+
+       /* Validate current state:
+        * Can enter D0 from any state, but if we can only go deeper 
+        * to sleep if we're already in a low power state
+        */
+       if (state > 0 && dev->current_state > state)
+               return -EINVAL;
+       else if (dev->current_state == state) 
+               return 0;        /* we're already there */
+
+       /* find PCI PM capability in list */
+       pm = pci_find_capability(dev, PCI_CAP_ID_PM);
+       
+       /* abort if the device doesn't support PM capabilities */
+       if (!pm) return -EIO; 
+
+       /* check if this device supports the desired state */
+       if (state == 1 || state == 2) {
+               u16 pmc;
+               pci_read_config_word(dev,pm + PCI_PM_PMC,&pmc);
+               if (state == 1 && !(pmc & PCI_PM_CAP_D1)) return -EIO;
+               else if (state == 2 && !(pmc & PCI_PM_CAP_D2)) return -EIO;
+       }
+
+       /* If we're in D3, force entire word to 0.
+        * This doesn't affect PME_Status, disables PME_En, and
+        * sets PowerState to 0.
+        */
+       if (dev->current_state >= 3)
+               pmcsr = 0;
+       else {
+               pci_read_config_word(dev, pm + PCI_PM_CTRL, &pmcsr);
+               pmcsr &= ~PCI_PM_CTRL_STATE_MASK;
+               pmcsr |= state;
+       }
+
+       /* enter specified state */
+       pci_write_config_word(dev, pm + PCI_PM_CTRL, pmcsr);
+
+       /* Mandatory power management transition delays */
+       /* see PCI PM 1.1 5.6.1 table 18 */
+       if(state == 3 || dev->current_state == 3)
+       {
+               set_current_state(TASK_UNINTERRUPTIBLE);
+               schedule_timeout(HZ/100);
+       }
+       else if(state == 2 || dev->current_state == 2)
+               udelay(200);
+       dev->current_state = state;
+
+       return 0;
+}
+
+/**
+ * pci_save_state - save the PCI configuration space of a device before suspending
+ * @dev: - PCI device that we're dealing with
+ * @buffer: - buffer to hold config space context
+ *
+ * @buffer must be large enough to hold the entire PCI 2.2 config space 
+ * (>= 64 bytes).
+ */
+int
+pci_save_state(struct pci_dev *dev, u32 *buffer)
+{
+       int i;
+       if (buffer) {
+               /* XXX: 100% dword access ok here? */
+               for (i = 0; i < 16; i++)
+                       pci_read_config_dword(dev, i * 4,&buffer[i]);
+       }
+       return 0;
+}
+
+/** 
+ * pci_restore_state - Restore the saved state of a PCI device
+ * @dev: - PCI device that we're dealing with
+ * @buffer: - saved PCI config space
+ *
+ */
+int 
+pci_restore_state(struct pci_dev *dev, u32 *buffer)
+{
+       int i;
+
+       if (buffer) {
+               for (i = 0; i < 16; i++)
+                       pci_write_config_dword(dev,i * 4, buffer[i]);
+       }
+       /*
+        * otherwise, write the context information we know from bootup.
+        * This works around a problem where warm-booting from Windows
+        * combined with a D3(hot)->D0 transition causes PCI config
+        * header data to be forgotten.
+        */     
+       else {
+               for (i = 0; i < 6; i ++)
+                       pci_write_config_dword(dev,
+                                              PCI_BASE_ADDRESS_0 + (i * 4),
+                                              dev->resource[i].start);
+               pci_write_config_byte(dev, PCI_INTERRUPT_LINE, dev->irq);
+       }
+       return 0;
+}
+
+/**
+ * pci_enable_device - Initialize device before it's used by a driver.
+ * @dev: PCI device to be initialized
+ *
+ *  Initialize device before it's used by a driver. Ask low-level code
+ *  to enable I/O and memory. Wake up the device if it was suspended.
+ *  Beware, this function can fail.
+ */
+int
+pci_enable_device(struct pci_dev *dev)
+{
+       int err;
+
+       pci_set_power_state(dev, 0);
+       if ((err = pcibios_enable_device(dev)) < 0)
+               return err;
+       return 0;
+}
+
+/**
+ * pci_disable_device - Disable PCI device after use
+ * @dev: PCI device to be disabled
+ *
+ * Signal to the system that the PCI device is not in use by the system
+ * anymore.  This only involves disabling PCI bus-mastering, if active.
+ */
+void
+pci_disable_device(struct pci_dev *dev)
+{
+       u16 pci_command;
+
+       pci_read_config_word(dev, PCI_COMMAND, &pci_command);
+       if (pci_command & PCI_COMMAND_MASTER) {
+               pci_command &= ~PCI_COMMAND_MASTER;
+               pci_write_config_word(dev, PCI_COMMAND, pci_command);
+       }
+}
+
+/**
+ * pci_enable_wake - enable device to generate PME# when suspended
+ * @dev: - PCI device to operate on
+ * @state: - Current state of device.
+ * @enable: - Flag to enable or disable generation
+ * 
+ * Set the bits in the device's PM Capabilities to generate PME# when
+ * the system is suspended. 
+ *
+ * -EIO is returned if device doesn't have PM Capabilities. 
+ * -EINVAL is returned if device supports it, but can't generate wake events.
+ * 0 if operation is successful.
+ * 
+ */
+int pci_enable_wake(struct pci_dev *dev, u32 state, int enable)
+{
+       int pm;
+       u16 value;
+
+       /* find PCI PM capability in list */
+       pm = pci_find_capability(dev, PCI_CAP_ID_PM);
+
+       /* If device doesn't support PM Capabilities, but request is to disable
+        * wake events, it's a nop; otherwise fail */
+       if (!pm) 
+               return enable ? -EIO : 0; 
+
+       /* Check device's ability to generate PME# */
+       pci_read_config_word(dev,pm+PCI_PM_PMC,&value);
+
+       value &= PCI_PM_CAP_PME_MASK;
+       value >>= ffs(value);   /* First bit of mask */
+
+       /* Check if it can generate PME# from requested state. */
+       if (!value || !(value & (1 << state))) 
+               return enable ? -EINVAL : 0;
+
+       pci_read_config_word(dev, pm + PCI_PM_CTRL, &value);
+
+       /* Clear PME_Status by writing 1 to it and enable PME# */
+       value |= PCI_PM_CTRL_PME_STATUS | PCI_PM_CTRL_PME_ENABLE;
+
+       if (!enable)
+               value &= ~PCI_PM_CTRL_PME_ENABLE;
+
+       pci_write_config_word(dev, pm + PCI_PM_CTRL, value);
+       
+       return 0;
+}
+
+int
+pci_get_interrupt_pin(struct pci_dev *dev, struct pci_dev **bridge)
+{
+       u8 pin;
+
+       pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin);
+       if (!pin)
+               return -1;
+       pin--;
+       while (dev->bus->self) {
+               pin = (pin + PCI_SLOT(dev->devfn)) % 4;
+               dev = dev->bus->self;
+       }
+       *bridge = dev;
+       return pin;
+}
+
+/**
+ *     pci_release_regions - Release reserved PCI I/O and memory resources
+ *     @pdev: PCI device whose resources were previously reserved by pci_request_regions
+ *
+ *     Releases all PCI I/O and memory resources previously reserved by a
+ *     successful call to pci_request_regions.  Call this function only
+ *     after all use of the PCI regions has ceased.
+ */
+void pci_release_regions(struct pci_dev *pdev)
+{
+       int i;
+       
+       for (i = 0; i < 6; i++) {
+               if (pci_resource_len(pdev, i) == 0)
+                       continue;
+
+               if (pci_resource_flags(pdev, i) & IORESOURCE_IO)
+                       release_region(pci_resource_start(pdev, i),
+                                      pci_resource_len(pdev, i));
+
+               else if (pci_resource_flags(pdev, i) & IORESOURCE_MEM)
+                       release_mem_region(pci_resource_start(pdev, i),
+                                          pci_resource_len(pdev, i));
+       }
+}
+
+/**
+ *     pci_request_regions - Reserved PCI I/O and memory resources
+ *     @pdev: PCI device whose resources are to be reserved
+ *     @res_name: Name to be associated with resource.
+ *
+ *     Mark all PCI regions associated with PCI device @pdev as
+ *     being reserved by owner @res_name.  Do not access any
+ *     address inside the PCI regions unless this call returns
+ *     successfully.
+ *
+ *     Returns 0 on success, or %EBUSY on error.  A warning
+ *     message is also printed on failure.
+ */
+int pci_request_regions(struct pci_dev *pdev, char *res_name)
+{
+       int i;
+       
+       for (i = 0; i < 6; i++) {
+               if (pci_resource_len(pdev, i) == 0)
+                       continue;
+
+               if (pci_resource_flags(pdev, i) & IORESOURCE_IO) {
+                       if (!request_region(pci_resource_start(pdev, i),
+                                           pci_resource_len(pdev, i), res_name))
+                               goto err_out;
+               }
+               
+               else if (pci_resource_flags(pdev, i) & IORESOURCE_MEM) {
+                       if (!request_mem_region(pci_resource_start(pdev, i),
+                                               pci_resource_len(pdev, i), res_name))
+                               goto err_out;
+               }
+       }
+       
+       return 0;
+
+err_out:
+       printk (KERN_WARNING "PCI: Unable to reserve %s region #%d:%lx@%lx for device %s\n",
+               pci_resource_flags(pdev, i) & IORESOURCE_IO ? "I/O" : "mem",
+               i + 1, /* PCI BAR # */
+               pci_resource_len(pdev, i), pci_resource_start(pdev, i),
+               pdev->slot_name);
+       pci_release_regions(pdev);
+       return -EBUSY;
+}
+
+
+/*
+ *  Registration of PCI drivers and handling of hot-pluggable devices.
+ */
+
+static LIST_HEAD(pci_drivers);
+
+/**
+ * pci_match_device - Tell if a PCI device structure has a matching PCI device id structure
+ * @ids: array of PCI device id structures to search in
+ * @dev: the PCI device structure to match against
+ * 
+ * Used by a driver to check whether a PCI device present in the
+ * system is in its list of supported devices.Returns the matching
+ * pci_device_id structure or %NULL if there is no match.
+ */
+const struct pci_device_id *
+pci_match_device(const struct pci_device_id *ids, const struct pci_dev *dev)
+{
+       while (ids->vendor || ids->subvendor || ids->class_mask) {
+               if ((ids->vendor == PCI_ANY_ID || ids->vendor == dev->vendor) &&
+                   (ids->device == PCI_ANY_ID || ids->device == dev->device) &&
+                   (ids->subvendor == PCI_ANY_ID || ids->subvendor == dev->subsystem_vendor) &&
+                   (ids->subdevice == PCI_ANY_ID || ids->subdevice == dev->subsystem_device) &&
+                   !((ids->class ^ dev->class) & ids->class_mask))
+                       return ids;
+               ids++;
+       }
+       return NULL;
+}
+
+static int
+pci_announce_device(struct pci_driver *drv, struct pci_dev *dev)
+{
+       const struct pci_device_id *id;
+       int ret = 0;
+
+       if (drv->id_table) {
+               id = pci_match_device(drv->id_table, dev);
+               if (!id) {
+                       ret = 0;
+                       goto out;
+               }
+       } else
+               id = NULL;
+
+       dev_probe_lock();
+       if (drv->probe(dev, id) >= 0) {
+               dev->driver = drv;
+               ret = 1;
+       }
+       dev_probe_unlock();
+out:
+       return ret;
+}
+
+/**
+ * pci_register_driver - register a new pci driver
+ * @drv: the driver structure to register
+ * 
+ * Adds the driver structure to the list of registered drivers
+ * Returns the number of pci devices which were claimed by the driver
+ * during registration.  The driver remains registered even if the
+ * return value is zero.
+ */
+int
+pci_register_driver(struct pci_driver *drv)
+{
+       struct pci_dev *dev;
+       int count = 0;
+
+       list_add_tail(&drv->node, &pci_drivers);
+       pci_for_each_dev(dev) {
+               if (!pci_dev_driver(dev))
+                       count += pci_announce_device(drv, dev);
+       }
+       return count;
+}
+
+/**
+ * pci_unregister_driver - unregister a pci driver
+ * @drv: the driver structure to unregister
+ * 
+ * Deletes the driver structure from the list of registered PCI drivers,
+ * gives it a chance to clean up by calling its remove() function for
+ * each device it was responsible for, and marks those devices as
+ * driverless.
+ */
+
+void
+pci_unregister_driver(struct pci_driver *drv)
+{
+       struct pci_dev *dev;
+
+       list_del(&drv->node);
+       pci_for_each_dev(dev) {
+               if (dev->driver == drv) {
+                       if (drv->remove)
+                               drv->remove(dev);
+                       dev->driver = NULL;
+               }
+       }
+}
+
+#ifdef CONFIG_HOTPLUG
+
+#ifndef FALSE
+#define FALSE  (0)
+#define TRUE   (!FALSE)
+#endif
+
+static void
+run_sbin_hotplug(struct pci_dev *pdev, int insert)
+{
+       int i;
+       char *argv[3], *envp[8];
+       char id[20], sub_id[24], bus_id[24], class_id[20];
+
+       if (!hotplug_path[0])
+               return;
+
+       sprintf(class_id, "PCI_CLASS=%04X", pdev->class);
+       sprintf(id, "PCI_ID=%04X:%04X", pdev->vendor, pdev->device);
+       sprintf(sub_id, "PCI_SUBSYS_ID=%04X:%04X", pdev->subsystem_vendor, pdev->subsystem_device);
+       sprintf(bus_id, "PCI_SLOT_NAME=%s", pdev->slot_name);
+
+       i = 0;
+       argv[i++] = hotplug_path;
+       argv[i++] = "pci";
+       argv[i] = 0;
+
+       i = 0;
+       /* minimal command environment */
+       envp[i++] = "HOME=/";
+       envp[i++] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin";
+       
+       /* other stuff we want to pass to /sbin/hotplug */
+       envp[i++] = class_id;
+       envp[i++] = id;
+       envp[i++] = sub_id;
+       envp[i++] = bus_id;
+       if (insert)
+               envp[i++] = "ACTION=add";
+       else
+               envp[i++] = "ACTION=remove";
+       envp[i] = 0;
+
+       call_usermodehelper (argv [0], argv, envp);
+}
+
+/**
+ * pci_announce_device_to_drivers - tell the drivers a new device has appeared
+ * @dev: the device that has shown up
+ *
+ * Notifys the drivers that a new device has appeared, and also notifys
+ * userspace through /sbin/hotplug.
+ */
+void
+pci_announce_device_to_drivers(struct pci_dev *dev)
+{
+       struct list_head *ln;
+
+       for(ln=pci_drivers.next; ln != &pci_drivers; ln=ln->next) {
+               struct pci_driver *drv = list_entry(ln, struct pci_driver, node);
+               if (drv->remove && pci_announce_device(drv, dev))
+                       break;
+       }
+
+       /* notify userspace of new hotplug device */
+       run_sbin_hotplug(dev, TRUE);
+}
+
+/**
+ * pci_insert_device - insert a hotplug device
+ * @dev: the device to insert
+ * @bus: where to insert it
+ *
+ * Add a new device to the device lists and notify userspace (/sbin/hotplug).
+ */
+void
+pci_insert_device(struct pci_dev *dev, struct pci_bus *bus)
+{
+       list_add_tail(&dev->bus_list, &bus->devices);
+       list_add_tail(&dev->global_list, &pci_devices);
+#ifdef CONFIG_PROC_FS
+       pci_proc_attach_device(dev);
+#endif
+       pci_announce_device_to_drivers(dev);
+}
+
+static void
+pci_free_resources(struct pci_dev *dev)
+{
+       int i;
+
+       for (i = 0; i < PCI_NUM_RESOURCES; i++) {
+               struct resource *res = dev->resource + i;
+               if (res->parent)
+                       release_resource(res);
+       }
+}
+
+/**
+ * pci_remove_device - remove a hotplug device
+ * @dev: the device to remove
+ *
+ * Delete the device structure from the device lists and 
+ * notify userspace (/sbin/hotplug).
+ */
+void
+pci_remove_device(struct pci_dev *dev)
+{
+       if (dev->driver) {
+               if (dev->driver->remove)
+                       dev->driver->remove(dev);
+               dev->driver = NULL;
+       }
+       list_del(&dev->bus_list);
+       list_del(&dev->global_list);
+       pci_free_resources(dev);
+#ifdef CONFIG_PROC_FS
+       pci_proc_detach_device(dev);
+#endif
+
+       /* notify userspace of hotplug device removal */
+       run_sbin_hotplug(dev, FALSE);
+}
+
+#endif
+
+static struct pci_driver pci_compat_driver = {
+       name: "compat"
+};
+
+/**
+ * pci_dev_driver - get the pci_driver of a device
+ * @dev: the device to query
+ *
+ * Returns the appropriate pci_driver structure or %NULL if there is no 
+ * registered driver for the device.
+ */
+struct pci_driver *
+pci_dev_driver(const struct pci_dev *dev)
+{
+       if (dev->driver)
+               return dev->driver;
+       else {
+               int i;
+               for(i=0; i<=PCI_ROM_RESOURCE; i++)
+                       if (dev->resource[i].flags & IORESOURCE_BUSY)
+                               return &pci_compat_driver;
+       }
+       return NULL;
+}
+
+
+/*
+ * This interrupt-safe spinlock protects all accesses to PCI
+ * configuration space.
+ */
+
+static spinlock_t pci_lock = SPIN_LOCK_UNLOCKED;
+
+/*
+ *  Wrappers for all PCI configuration access functions.  They just check
+ *  alignment, do locking and call the low-level functions pointed to
+ *  by pci_dev->ops.
+ */
+
+#define PCI_byte_BAD 0
+#define PCI_word_BAD (pos & 1)
+#define PCI_dword_BAD (pos & 3)
+
+#define PCI_OP(rw,size,type) \
+int pci_##rw##_config_##size (struct pci_dev *dev, int pos, type value) \
+{                                                                      \
+       int res;                                                        \
+       unsigned long flags;                                            \
+       if (PCI_##size##_BAD) return PCIBIOS_BAD_REGISTER_NUMBER;       \
+       spin_lock_irqsave(&pci_lock, flags);                            \
+       res = dev->bus->ops->rw##_##size(dev, pos, value);              \
+       spin_unlock_irqrestore(&pci_lock, flags);                       \
+       return res;                                                     \
+}
+
+PCI_OP(read, byte, u8 *)
+PCI_OP(read, word, u16 *)
+PCI_OP(read, dword, u32 *)
+PCI_OP(write, byte, u8)
+PCI_OP(write, word, u16)
+PCI_OP(write, dword, u32)
+
+/**
+ * pci_set_master - enables bus-mastering for device dev
+ * @dev: the PCI device to enable
+ *
+ * Enables bus-mastering on the device and calls pcibios_set_master()
+ * to do the needed arch specific settings.
+ */
+void
+pci_set_master(struct pci_dev *dev)
+{
+       u16 cmd;
+
+       pci_read_config_word(dev, PCI_COMMAND, &cmd);
+       if (! (cmd & PCI_COMMAND_MASTER)) {
+               DBG("PCI: Enabling bus mastering for device %s\n", dev->slot_name);
+               cmd |= PCI_COMMAND_MASTER;
+               pci_write_config_word(dev, PCI_COMMAND, cmd);
+       }
+       pcibios_set_master(dev);
+}
+
+int
+pci_set_dma_mask(struct pci_dev *dev, u64 mask)
+{
+       if (!pci_dma_supported(dev, mask))
+               return -EIO;
+
+       dev->dma_mask = mask;
+
+       return 0;
+}
+    
+int
+pci_dac_set_dma_mask(struct pci_dev *dev, u64 mask)
+{
+       if (!pci_dac_dma_supported(dev, mask))
+               return -EIO;
+
+       dev->dma_mask = mask;
+
+       return 0;
+}
+    
+/*
+ * Translate the low bits of the PCI base
+ * to the resource type
+ */
+static inline unsigned int pci_calc_resource_flags(unsigned int flags)
+{
+       if (flags & PCI_BASE_ADDRESS_SPACE_IO)
+               return IORESOURCE_IO;
+
+       if (flags & PCI_BASE_ADDRESS_MEM_PREFETCH)
+               return IORESOURCE_MEM | IORESOURCE_PREFETCH;
+
+       return IORESOURCE_MEM;
+}
+
+/*
+ * Find the extent of a PCI decode..
+ */
+static u32 pci_size(u32 base, unsigned long mask)
+{
+       u32 size = mask & base;         /* Find the significant bits */
+       size = size & ~(size-1);        /* Get the lowest of them to find the decode size */
+       return size-1;                  /* extent = size - 1 */
+}
+
+static void pci_read_bases(struct pci_dev *dev, unsigned int howmany, int rom)
+{
+       unsigned int pos, reg, next;
+       u32 l, sz;
+       struct resource *res;
+
+       for(pos=0; pos<howmany; pos = next) {
+               next = pos+1;
+               res = &dev->resource[pos];
+               res->name = dev->name;
+               reg = PCI_BASE_ADDRESS_0 + (pos << 2);
+               pci_read_config_dword(dev, reg, &l);
+               pci_write_config_dword(dev, reg, ~0);
+               pci_read_config_dword(dev, reg, &sz);
+               pci_write_config_dword(dev, reg, l);
+               if (!sz || sz == 0xffffffff)
+                       continue;
+               if (l == 0xffffffff)
+                       l = 0;
+               if ((l & PCI_BASE_ADDRESS_SPACE) == PCI_BASE_ADDRESS_SPACE_MEMORY) {
+                       res->start = l & PCI_BASE_ADDRESS_MEM_MASK;
+                       sz = pci_size(sz, PCI_BASE_ADDRESS_MEM_MASK);
+               } else {
+                       res->start = l & PCI_BASE_ADDRESS_IO_MASK;
+                       sz = pci_size(sz, PCI_BASE_ADDRESS_IO_MASK & 0xffff);
+               }
+               res->end = res->start + (unsigned long) sz;
+               res->flags |= (l & 0xf) | pci_calc_resource_flags(l);
+               if ((l & (PCI_BASE_ADDRESS_SPACE | PCI_BASE_ADDRESS_MEM_TYPE_MASK))
+                   == (PCI_BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_TYPE_64)) {
+                       pci_read_config_dword(dev, reg+4, &l);
+                       next++;
+#if BITS_PER_LONG == 64
+                       res->start |= ((unsigned long) l) << 32;
+                       res->end = res->start + sz;
+                       pci_write_config_dword(dev, reg+4, ~0);
+                       pci_read_config_dword(dev, reg+4, &sz);
+                       pci_write_config_dword(dev, reg+4, l);
+                       if (~sz)
+                               res->end = res->start + 0xffffffff +
+                                               (((unsigned long) ~sz) << 32);
+#else
+                       if (l) {
+                               printk(KERN_ERR "PCI: Unable to handle 64-bit address for device %s\n", dev->slot_name);
+                               res->start = 0;
+                               res->flags = 0;
+                               continue;
+                       }
+#endif
+               }
+       }
+       if (rom) {
+               dev->rom_base_reg = rom;
+               res = &dev->resource[PCI_ROM_RESOURCE];
+               pci_read_config_dword(dev, rom, &l);
+               pci_write_config_dword(dev, rom, ~PCI_ROM_ADDRESS_ENABLE);
+               pci_read_config_dword(dev, rom, &sz);
+               pci_write_config_dword(dev, rom, l);
+               if (l == 0xffffffff)
+                       l = 0;
+               if (sz && sz != 0xffffffff) {
+                       res->flags = (l & PCI_ROM_ADDRESS_ENABLE) |
+                         IORESOURCE_MEM | IORESOURCE_PREFETCH | IORESOURCE_READONLY | IORESOURCE_CACHEABLE;
+                       res->start = l & PCI_ROM_ADDRESS_MASK;
+                       sz = pci_size(sz, PCI_ROM_ADDRESS_MASK);
+                       res->end = res->start + (unsigned long) sz;
+               }
+               res->name = dev->name;
+       }
+}
+
+void __devinit  pci_read_bridge_bases(struct pci_bus *child)
+{
+       struct pci_dev *dev = child->self;
+       u8 io_base_lo, io_limit_lo;
+       u16 mem_base_lo, mem_limit_lo;
+       unsigned long base, limit;
+       struct resource *res;
+       int i;
+
+       if (!dev)               /* It's a host bus, nothing to read */
+               return;
+
+       for(i=0; i<3; i++)
+               child->resource[i] = &dev->resource[PCI_BRIDGE_RESOURCES+i];
+
+       res = child->resource[0];
+       pci_read_config_byte(dev, PCI_IO_BASE, &io_base_lo);
+       pci_read_config_byte(dev, PCI_IO_LIMIT, &io_limit_lo);
+       base = (io_base_lo & PCI_IO_RANGE_MASK) << 8;
+       limit = (io_limit_lo & PCI_IO_RANGE_MASK) << 8;
+
+       if ((io_base_lo & PCI_IO_RANGE_TYPE_MASK) == PCI_IO_RANGE_TYPE_32) {
+               u16 io_base_hi, io_limit_hi;
+               pci_read_config_word(dev, PCI_IO_BASE_UPPER16, &io_base_hi);
+               pci_read_config_word(dev, PCI_IO_LIMIT_UPPER16, &io_limit_hi);
+               base |= (io_base_hi << 16);
+               limit |= (io_limit_hi << 16);
+       }
+
+       if (base && base <= limit) {
+               res->flags = (io_base_lo & PCI_IO_RANGE_TYPE_MASK) | IORESOURCE_IO;
+               res->start = base;
+               res->end = limit + 0xfff;
+               res->name = child->name;
+       } else {
+               /*
+                * Ugh. We don't know enough about this bridge. Just assume
+                * that it's entirely transparent.
+                */
+               printk(KERN_ERR "Unknown bridge resource %d: assuming transparent\n", 0);
+               child->resource[0] = child->parent->resource[0];
+       }
+
+       res = child->resource[1];
+       pci_read_config_word(dev, PCI_MEMORY_BASE, &mem_base_lo);
+       pci_read_config_word(dev, PCI_MEMORY_LIMIT, &mem_limit_lo);
+       base = (mem_base_lo & PCI_MEMORY_RANGE_MASK) << 16;
+       limit = (mem_limit_lo & PCI_MEMORY_RANGE_MASK) << 16;
+       if (base && base <= limit) {
+               res->flags = (mem_base_lo & PCI_MEMORY_RANGE_TYPE_MASK) | IORESOURCE_MEM;
+               res->start = base;
+               res->end = limit + 0xfffff;
+               res->name = child->name;
+       } else {
+               /* See comment above. Same thing */
+               printk(KERN_ERR "Unknown bridge resource %d: assuming transparent\n", 1);
+               child->resource[1] = child->parent->resource[1];
+       }
+
+       res = child->resource[2];
+       pci_read_config_word(dev, PCI_PREF_MEMORY_BASE, &mem_base_lo);
+       pci_read_config_word(dev, PCI_PREF_MEMORY_LIMIT, &mem_limit_lo);
+       base = (mem_base_lo & PCI_PREF_RANGE_MASK) << 16;
+       limit = (mem_limit_lo & PCI_PREF_RANGE_MASK) << 16;
+
+       if ((mem_base_lo & PCI_PREF_RANGE_TYPE_MASK) == PCI_PREF_RANGE_TYPE_64) {
+               u32 mem_base_hi, mem_limit_hi;
+               pci_read_config_dword(dev, PCI_PREF_BASE_UPPER32, &mem_base_hi);
+               pci_read_config_dword(dev, PCI_PREF_LIMIT_UPPER32, &mem_limit_hi);
+#if BITS_PER_LONG == 64
+               base |= ((long) mem_base_hi) << 32;
+               limit |= ((long) mem_limit_hi) << 32;
+#else
+               if (mem_base_hi || mem_limit_hi) {
+                       printk(KERN_ERR "PCI: Unable to handle 64-bit address space for %s\n", child->name);
+                       return;
+               }
+#endif
+       }
+       if (base && base <= limit) {
+               res->flags = (mem_base_lo & PCI_MEMORY_RANGE_TYPE_MASK) | IORESOURCE_MEM | IORESOURCE_PREFETCH;
+               res->start = base;
+               res->end = limit + 0xfffff;
+               res->name = child->name;
+       } else {
+               /* See comments above */
+               printk(KERN_ERR "Unknown bridge resource %d: assuming transparent\n", 2);
+               child->resource[2] = child->parent->resource[2];
+       }
+}
+
+static struct pci_bus * __devinit  pci_alloc_bus(void)
+{
+       struct pci_bus *b;
+
+       b = kmalloc(sizeof(*b), GFP_KERNEL);
+       if (b) {
+               memset(b, 0, sizeof(*b));
+               INIT_LIST_HEAD(&b->children);
+               INIT_LIST_HEAD(&b->devices);
+       }
+       return b;
+}
+
+struct pci_bus * __devinit pci_add_new_bus(struct pci_bus *parent, struct pci_dev *dev, int busnr)
+{
+       struct pci_bus *child;
+       int i;
+
+       /*
+        * Allocate a new bus, and inherit stuff from the parent..
+        */
+       child = pci_alloc_bus();
+
+       list_add_tail(&child->node, &parent->children);
+       child->self = dev;
+       dev->subordinate = child;
+       child->parent = parent;
+       child->ops = parent->ops;
+       child->sysdata = parent->sysdata;
+
+       /*
+        * Set up the primary, secondary and subordinate
+        * bus numbers.
+        */
+       child->number = child->secondary = busnr;
+       child->primary = parent->secondary;
+       child->subordinate = 0xff;
+
+       /* Set up default resource pointers.. */
+       for (i = 0; i < 4; i++)
+               child->resource[i] = &dev->resource[PCI_BRIDGE_RESOURCES+i];
+
+       return child;
+}
+
+unsigned int __devinit pci_do_scan_bus(struct pci_bus *bus);
+
+/*
+ * If it's a bridge, configure it and scan the bus behind it.
+ * For CardBus bridges, we don't scan behind as the devices will
+ * be handled by the bridge driver itself.
+ *
+ * We need to process bridges in two passes -- first we scan those
+ * already configured by the BIOS and after we are done with all of
+ * them, we proceed to assigning numbers to the remaining buses in
+ * order to avoid overlaps between old and new bus numbers.
+ */
+static int __devinit pci_scan_bridge(struct pci_bus *bus, struct pci_dev * dev, int max, int pass)
+{
+       unsigned int buses;
+       unsigned short cr;
+       struct pci_bus *child;
+       int is_cardbus = (dev->hdr_type == PCI_HEADER_TYPE_CARDBUS);
+
+       pci_read_config_dword(dev, PCI_PRIMARY_BUS, &buses);
+       DBG("Scanning behind PCI bridge %s, config %06x, pass %d\n", dev->slot_name, buses & 0xffffff, pass);
+       if ((buses & 0xffff00) && !pcibios_assign_all_busses()) {
+               /*
+                * Bus already configured by firmware, process it in the first
+                * pass and just note the configuration.
+                */
+               if (pass)
+                       return max;
+               child = pci_add_new_bus(bus, dev, 0);
+               child->primary = buses & 0xFF;
+               child->secondary = (buses >> 8) & 0xFF;
+               child->subordinate = (buses >> 16) & 0xFF;
+               child->number = child->secondary;
+               if (!is_cardbus) {
+                       unsigned int cmax = pci_do_scan_bus(child);
+                       if (cmax > max) max = cmax;
+               } else {
+                       unsigned int cmax = child->subordinate;
+                       if (cmax > max) max = cmax;
+               }
+       } else {
+               /*
+                * We need to assign a number to this bus which we always
+                * do in the second pass. We also keep all address decoders
+                * on the bridge disabled during scanning.  FIXME: Why?
+                */
+               if (!pass)
+                       return max;
+               pci_read_config_word(dev, PCI_COMMAND, &cr);
+               pci_write_config_word(dev, PCI_COMMAND, 0x0000);
+               pci_write_config_word(dev, PCI_STATUS, 0xffff);
+
+               child = pci_add_new_bus(bus, dev, ++max);
+               buses = (buses & 0xff000000)
+                     | ((unsigned int)(child->primary)     <<  0)
+                     | ((unsigned int)(child->secondary)   <<  8)
+                     | ((unsigned int)(child->subordinate) << 16);
+               /*
+                * We need to blast all three values with a single write.
+                */
+               pci_write_config_dword(dev, PCI_PRIMARY_BUS, buses);
+               if (!is_cardbus) {
+                       /* Now we can scan all subordinate buses... */
+                       max = pci_do_scan_bus(child);
+               } else {
+                       /*
+                        * For CardBus bridges, we leave 4 bus numbers
+                        * as cards with a PCI-to-PCI bridge can be
+                        * inserted later.
+                        */
+                       max += 3;
+               }
+               /*
+                * Set the subordinate bus number to its real value.
+                */
+               child->subordinate = max;
+               pci_write_config_byte(dev, PCI_SUBORDINATE_BUS, max);
+               pci_write_config_word(dev, PCI_COMMAND, cr);
+       }
+       sprintf(child->name, (is_cardbus ? "PCI CardBus #%02x" : "PCI Bus #%02x"), child->number);
+       return max;
+}
+
+/*
+ * Read interrupt line and base address registers.
+ * The architecture-dependent code can tweak these, of course.
+ */
+static void pci_read_irq(struct pci_dev *dev)
+{
+       unsigned char irq;
+
+       pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &irq);
+       if (irq)
+               pci_read_config_byte(dev, PCI_INTERRUPT_LINE, &irq);
+       dev->irq = irq;
+}
+
+/**
+ * pci_setup_device - fill in class and map information of a device
+ * @dev: the device structure to fill
+ *
+ * Initialize the device structure with information about the device's 
+ * vendor,class,memory and IO-space addresses,IRQ lines etc.
+ * Called at initialisation of the PCI subsystem and by CardBus services.
+ * Returns 0 on success and -1 if unknown type of device (not normal, bridge
+ * or CardBus).
+ */
+int pci_setup_device(struct pci_dev * dev)
+{
+       u32 class;
+
+       sprintf(dev->slot_name, "%02x:%02x.%d", dev->bus->number, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn));
+       sprintf(dev->name, "PCI device %04x:%04x", dev->vendor, dev->device);
+       
+       pci_read_config_dword(dev, PCI_CLASS_REVISION, &class);
+       class >>= 8;                                /* upper 3 bytes */
+       dev->class = class;
+       class >>= 8;
+
+       DBG("Found %02x:%02x [%04x/%04x] %06x %02x\n", dev->bus->number, dev->devfn, dev->vendor, dev->device, class, dev->hdr_type);
+
+       /* "Unknown power state" */
+       dev->current_state = 4;
+
+       switch (dev->hdr_type) {                    /* header type */
+       case PCI_HEADER_TYPE_NORMAL:                /* standard header */
+               if (class == PCI_CLASS_BRIDGE_PCI)
+                       goto bad;
+               pci_read_irq(dev);
+               pci_read_bases(dev, 6, PCI_ROM_ADDRESS);
+               pci_read_config_word(dev, PCI_SUBSYSTEM_VENDOR_ID, &dev->subsystem_vendor);
+               pci_read_config_word(dev, PCI_SUBSYSTEM_ID, &dev->subsystem_device);
+               break;
+
+       case PCI_HEADER_TYPE_BRIDGE:                /* bridge header */
+               if (class != PCI_CLASS_BRIDGE_PCI)
+                       goto bad;
+               pci_read_bases(dev, 2, PCI_ROM_ADDRESS1);
+               break;
+
+       case PCI_HEADER_TYPE_CARDBUS:               /* CardBus bridge header */
+               if (class != PCI_CLASS_BRIDGE_CARDBUS)
+                       goto bad;
+               pci_read_irq(dev);
+               pci_read_bases(dev, 1, 0);
+               pci_read_config_word(dev, PCI_CB_SUBSYSTEM_VENDOR_ID, &dev->subsystem_vendor);
+               pci_read_config_word(dev, PCI_CB_SUBSYSTEM_ID, &dev->subsystem_device);
+               break;
+
+       default:                                    /* unknown header */
+               printk(KERN_ERR "PCI: device %s has unknown header type %02x, ignoring.\n",
+                       dev->slot_name, dev->hdr_type);
+               return -1;
+
+       bad:
+               printk(KERN_ERR "PCI: %s: class %x doesn't match header type %02x. Ignoring class.\n",
+                      dev->slot_name, class, dev->hdr_type);
+               dev->class = PCI_CLASS_NOT_DEFINED;
+       }
+
+       /* We found a fine healthy device, go go go... */
+       return 0;
+}
+
+/*
+ * Read the config data for a PCI device, sanity-check it
+ * and fill in the dev structure...
+ */
+struct pci_dev * __devinit pci_scan_device(struct pci_dev *temp)
+{
+       struct pci_dev *dev;
+       u32 l;
+
+       if (pci_read_config_dword(temp, PCI_VENDOR_ID, &l))
+               return NULL;
+
+       /* some broken boards return 0 or ~0 if a slot is empty: */
+       if (l == 0xffffffff || l == 0x00000000 || l == 0x0000ffff || l == 0xffff0000)
+               return NULL;
+
+       dev = kmalloc(sizeof(*dev), GFP_KERNEL);
+       if (!dev)
+               return NULL;
+
+       memcpy(dev, temp, sizeof(*dev));
+       dev->vendor = l & 0xffff;
+       dev->device = (l >> 16) & 0xffff;
+
+       /* Assume 32-bit PCI; let 64-bit PCI cards (which are far rarer)
+          set this higher, assuming the system even supports it.  */
+       dev->dma_mask = 0xffffffff;
+       if (pci_setup_device(dev) < 0) {
+               kfree(dev);
+               dev = NULL;
+       }
+       return dev;
+}
+
+struct pci_dev * __devinit pci_scan_slot(struct pci_dev *temp)
+{
+       struct pci_bus *bus = temp->bus;
+       struct pci_dev *dev;
+       struct pci_dev *first_dev = NULL;
+       int func = 0;
+       int is_multi = 0;
+       u8 hdr_type;
+
+       for (func = 0; func < 8; func++, temp->devfn++) {
+               if (func && !is_multi)          /* not a multi-function device */
+                       continue;
+               if (pci_read_config_byte(temp, PCI_HEADER_TYPE, &hdr_type))
+                       continue;
+               temp->hdr_type = hdr_type & 0x7f;
+
+               dev = pci_scan_device(temp);
+               if (!dev)
+                       continue;
+               pci_name_device(dev);
+               if (!func) {
+                       is_multi = hdr_type & 0x80;
+                       first_dev = dev;
+               }
+
+               /*
+                * Link the device to both the global PCI device chain and
+                * the per-bus list of devices.
+                */
+               list_add_tail(&dev->global_list, &pci_devices);
+               list_add_tail(&dev->bus_list, &bus->devices);
+
+               /* Fix up broken headers */
+               pci_fixup_device(PCI_FIXUP_HEADER, dev);
+       }
+       return first_dev;
+}
+
+unsigned int __devinit pci_do_scan_bus(struct pci_bus *bus)
+{
+       unsigned int devfn, max, pass;
+       struct list_head *ln;
+       struct pci_dev *dev, dev0;
+
+       DBG("Scanning bus %02x\n", bus->number);
+       max = bus->secondary;
+
+       /* Create a device template */
+       memset(&dev0, 0, sizeof(dev0));
+       dev0.bus = bus;
+       dev0.sysdata = bus->sysdata;
+
+       /* Go find them, Rover! */
+       for (devfn = 0; devfn < 0x100; devfn += 8) {
+               dev0.devfn = devfn;
+               pci_scan_slot(&dev0);
+       }
+
+       /*
+        * After performing arch-dependent fixup of the bus, look behind
+        * all PCI-to-PCI bridges on this bus.
+        */
+       DBG("Fixups for bus %02x\n", bus->number);
+       pcibios_fixup_bus(bus);
+       for (pass=0; pass < 2; pass++)
+               for (ln=bus->devices.next; ln != &bus->devices; ln=ln->next) {
+                       dev = pci_dev_b(ln);
+                       if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE || dev->hdr_type == PCI_HEADER_TYPE_CARDBUS)
+                               max = pci_scan_bridge(bus, dev, max, pass);
+               }
+
+       /*
+        * We've scanned the bus and so we know all about what's on
+        * the other side of any bridges that may be on this bus plus
+        * any devices.
+        *
+        * Return how far we've got finding sub-buses.
+        */
+       DBG("Bus scan for %02x returning with max=%02x\n", bus->number, max);
+       return max;
+}
+
+int __devinit  pci_bus_exists(const struct list_head *list, int nr)
+{
+       const struct list_head *l;
+
+       for(l=list->next; l != list; l = l->next) {
+               const struct pci_bus *b = pci_bus_b(l);
+               if (b->number == nr || pci_bus_exists(&b->children, nr))
+                       return 1;
+       }
+       return 0;
+}
+
+struct pci_bus * __devinit  pci_alloc_primary_bus(int bus)
+{
+       struct pci_bus *b;
+
+       if (pci_bus_exists(&pci_root_buses, bus)) {
+               /* If we already got to this bus through a different bridge, ignore it */
+               DBG("PCI: Bus %02x already known\n", bus);
+               return NULL;
+       }
+
+       b = pci_alloc_bus();
+       list_add_tail(&b->node, &pci_root_buses);
+
+       b->number = b->secondary = bus;
+       b->resource[0] = &ioport_resource;
+       b->resource[1] = &iomem_resource;
+       return b;
+}
+
+struct pci_bus * __devinit  pci_scan_bus(int bus, struct pci_ops *ops, void *sysdata)
+{
+       struct pci_bus *b = pci_alloc_primary_bus(bus);
+       if (b) {
+               b->sysdata = sysdata;
+               b->ops = ops;
+               b->subordinate = pci_do_scan_bus(b);
+       }
+       return b;
+}
+
+#ifdef CONFIG_PM
+
+/*
+ * PCI Power management..
+ *
+ * This needs to be done centralized, so that we power manage PCI
+ * devices in the right order: we should not shut down PCI bridges
+ * before we've shut down the devices behind them, and we should
+ * not wake up devices before we've woken up the bridge to the
+ * device.. Eh?
+ *
+ * We do not touch devices that don't have a driver that exports
+ * a suspend/resume function. That is just too dangerous. If the default
+ * PCI suspend/resume functions work for a device, the driver can
+ * easily implement them (ie just have a suspend function that calls
+ * the pci_set_power_state() function).
+ */
+
+static int pci_pm_save_state_device(struct pci_dev *dev, u32 state)
+{
+       int error = 0;
+       if (dev) {
+               struct pci_driver *driver = dev->driver;
+               if (driver && driver->save_state) 
+                       error = driver->save_state(dev,state);
+       }
+       return error;
+}
+
+static int pci_pm_suspend_device(struct pci_dev *dev, u32 state)
+{
+       int error = 0;
+       if (dev) {
+               struct pci_driver *driver = dev->driver;
+               if (driver && driver->suspend)
+                       error = driver->suspend(dev,state);
+       }
+       return error;
+}
+
+static int pci_pm_resume_device(struct pci_dev *dev)
+{
+       int error = 0;
+       if (dev) {
+               struct pci_driver *driver = dev->driver;
+               if (driver && driver->resume)
+                       error = driver->resume(dev);
+       }
+       return error;
+}
+
+static int pci_pm_save_state_bus(struct pci_bus *bus, u32 state)
+{
+       struct list_head *list;
+       int error = 0;
+
+       list_for_each(list, &bus->children) {
+               error = pci_pm_save_state_bus(pci_bus_b(list),state);
+               if (error) return error;
+       }
+       list_for_each(list, &bus->devices) {
+               error = pci_pm_save_state_device(pci_dev_b(list),state);
+               if (error) return error;
+       }
+       return 0;
+}
+
+static int pci_pm_suspend_bus(struct pci_bus *bus, u32 state)
+{
+       struct list_head *list;
+
+       /* Walk the bus children list */
+       list_for_each(list, &bus->children) 
+               pci_pm_suspend_bus(pci_bus_b(list),state);
+
+       /* Walk the device children list */
+       list_for_each(list, &bus->devices)
+               pci_pm_suspend_device(pci_dev_b(list),state);
+       return 0;
+}
+
+static int pci_pm_resume_bus(struct pci_bus *bus)
+{
+       struct list_head *list;
+
+       /* Walk the device children list */
+       list_for_each(list, &bus->devices)
+               pci_pm_resume_device(pci_dev_b(list));
+
+       /* And then walk the bus children */
+       list_for_each(list, &bus->children)
+               pci_pm_resume_bus(pci_bus_b(list));
+       return 0;
+}
+
+static int pci_pm_save_state(u32 state)
+{
+       struct list_head *list;
+       struct pci_bus *bus;
+       int error = 0;
+
+       list_for_each(list, &pci_root_buses) {
+               bus = pci_bus_b(list);
+               error = pci_pm_save_state_bus(bus,state);
+               if (!error)
+                       error = pci_pm_save_state_device(bus->self,state);
+       }
+       return error;
+}
+
+static int pci_pm_suspend(u32 state)
+{
+       struct list_head *list;
+       struct pci_bus *bus;
+
+       list_for_each(list, &pci_root_buses) {
+               bus = pci_bus_b(list);
+               pci_pm_suspend_bus(bus,state);
+               pci_pm_suspend_device(bus->self,state);
+       }
+       return 0;
+}
+
+static int pci_pm_resume(void)
+{
+       struct list_head *list;
+       struct pci_bus *bus;
+
+       list_for_each(list, &pci_root_buses) {
+               bus = pci_bus_b(list);
+               pci_pm_resume_device(bus->self);
+               pci_pm_resume_bus(bus);
+       }
+       return 0;
+}
+
+static int 
+pci_pm_callback(struct pm_dev *pm_device, pm_request_t rqst, void *data)
+{
+       int error = 0;
+
+       switch (rqst) {
+       case PM_SAVE_STATE:
+               error = pci_pm_save_state((u32)data);
+               break;
+       case PM_SUSPEND:
+               error = pci_pm_suspend((u32)data);
+               break;
+       case PM_RESUME:
+               error = pci_pm_resume();
+               break;
+       default: break;
+       }
+       return error;
+}
+
+#endif
+
+#if 0 /* XXX KAF: Only USB uses this stuff -- I think we'll just bin it. */
+
+/*
+ * Pool allocator ... wraps the pci_alloc_consistent page allocator, so
+ * small blocks are easily used by drivers for bus mastering controllers.
+ * This should probably be sharing the guts of the slab allocator.
+ */
+
+struct pci_pool {      /* the pool */
+       struct list_head        page_list;
+       spinlock_t              lock;
+       size_t                  blocks_per_page;
+       size_t                  size;
+       int                     flags;
+       struct pci_dev          *dev;
+       size_t                  allocation;
+       char                    name [32];
+       wait_queue_head_t       waitq;
+};
+
+struct pci_page {      /* cacheable header for 'allocation' bytes */
+       struct list_head        page_list;
+       void                    *vaddr;
+       dma_addr_t              dma;
+       unsigned long           bitmap [0];
+};
+
+#define        POOL_TIMEOUT_JIFFIES    ((100 /* msec */ * HZ) / 1000)
+#define        POOL_POISON_BYTE        0xa7
+
+// #define CONFIG_PCIPOOL_DEBUG
+
+
+/**
+ * pci_pool_create - Creates a pool of pci consistent memory blocks, for dma.
+ * @name: name of pool, for diagnostics
+ * @pdev: pci device that will be doing the DMA
+ * @size: size of the blocks in this pool.
+ * @align: alignment requirement for blocks; must be a power of two
+ * @allocation: returned blocks won't cross this boundary (or zero)
+ * @flags: SLAB_* flags (not all are supported).
+ *
+ * Returns a pci allocation pool with the requested characteristics, or
+ * null if one can't be created.  Given one of these pools, pci_pool_alloc()
+ * may be used to allocate memory.  Such memory will all have "consistent"
+ * DMA mappings, accessible by the device and its driver without using
+ * cache flushing primitives.  The actual size of blocks allocated may be
+ * larger than requested because of alignment.
+ *
+ * If allocation is nonzero, objects returned from pci_pool_alloc() won't
+ * cross that size boundary.  This is useful for devices which have
+ * addressing restrictions on individual DMA transfers, such as not crossing
+ * boundaries of 4KBytes.
+ */
+struct pci_pool *
+pci_pool_create (const char *name, struct pci_dev *pdev,
+       size_t size, size_t align, size_t allocation, int flags)
+{
+       struct pci_pool         *retval;
+
+       if (align == 0)
+               align = 1;
+       if (size == 0)
+               return 0;
+       else if (size < align)
+               size = align;
+       else if ((size % align) != 0) {
+               size += align + 1;
+               size &= ~(align - 1);
+       }
+
+       if (allocation == 0) {
+               if (PAGE_SIZE < size)
+                       allocation = size;
+               else
+                       allocation = PAGE_SIZE;
+               // FIXME: round up for less fragmentation
+       } else if (allocation < size)
+               return 0;
+
+       if (!(retval = kmalloc (sizeof *retval, flags)))
+               return retval;
+
+#ifdef CONFIG_PCIPOOL_DEBUG
+       flags |= SLAB_POISON;
+#endif
+
+       strncpy (retval->name, name, sizeof retval->name);
+       retval->name [sizeof retval->name - 1] = 0;
+
+       retval->dev = pdev;
+       INIT_LIST_HEAD (&retval->page_list);
+       spin_lock_init (&retval->lock);
+       retval->size = size;
+       retval->flags = flags;
+       retval->allocation = allocation;
+       retval->blocks_per_page = allocation / size;
+       init_waitqueue_head (&retval->waitq);
+
+#ifdef CONFIG_PCIPOOL_DEBUG
+       printk (KERN_DEBUG "pcipool create %s/%s size %d, %d/page (%d alloc)\n",
+               pdev ? pdev->slot_name : NULL, retval->name, size,
+               retval->blocks_per_page, allocation);
+#endif
+
+       return retval;
+}
+
+
+static struct pci_page *
+pool_alloc_page (struct pci_pool *pool, int mem_flags)
+{
+       struct pci_page *page;
+       int             mapsize;
+
+       mapsize = pool->blocks_per_page;
+       mapsize = (mapsize + BITS_PER_LONG - 1) / BITS_PER_LONG;
+       mapsize *= sizeof (long);
+
+       page = (struct pci_page *) kmalloc (mapsize + sizeof *page, mem_flags);
+       if (!page)
+               return 0;
+       page->vaddr = pci_alloc_consistent (pool->dev,
+                                           pool->allocation,
+                                           &page->dma);
+       if (page->vaddr) {
+               memset (page->bitmap, 0xff, mapsize);   // bit set == free
+               if (pool->flags & SLAB_POISON)
+                       memset (page->vaddr, POOL_POISON_BYTE, pool->allocation);
+               list_add (&page->page_list, &pool->page_list);
+       } else {
+               kfree (page);
+               page = 0;
+       }
+       return page;
+}
+
+
+static inline int
+is_page_busy (int blocks, unsigned long *bitmap)
+{
+       while (blocks > 0) {
+               if (*bitmap++ != ~0UL)
+                       return 1;
+               blocks -= BITS_PER_LONG;
+       }
+       return 0;
+}
+
+static void
+pool_free_page (struct pci_pool *pool, struct pci_page *page)
+{
+       dma_addr_t      dma = page->dma;
+
+       if (pool->flags & SLAB_POISON)
+               memset (page->vaddr, POOL_POISON_BYTE, pool->allocation);
+       pci_free_consistent (pool->dev, pool->allocation, page->vaddr, dma);
+       list_del (&page->page_list);
+       kfree (page);
+}
+
+
+/**
+ * pci_pool_destroy - destroys a pool of pci memory blocks.
+ * @pool: pci pool that will be destroyed
+ *
+ * Caller guarantees that no more memory from the pool is in use,
+ * and that nothing will try to use the pool after this call.
+ */
+void
+pci_pool_destroy (struct pci_pool *pool)
+{
+       unsigned long           flags;
+
+#ifdef CONFIG_PCIPOOL_DEBUG
+       printk (KERN_DEBUG "pcipool destroy %s/%s\n",
+               pool->dev ? pool->dev->slot_name : NULL,
+               pool->name);
+#endif
+
+       spin_lock_irqsave (&pool->lock, flags);
+       while (!list_empty (&pool->page_list)) {
+               struct pci_page         *page;
+               page = list_entry (pool->page_list.next,
+                               struct pci_page, page_list);
+               if (is_page_busy (pool->blocks_per_page, page->bitmap)) {
+                       printk (KERN_ERR "pci_pool_destroy %s/%s, %p busy\n",
+                               pool->dev ? pool->dev->slot_name : NULL,
+                               pool->name, page->vaddr);
+                       /* leak the still-in-use consistent memory */
+                       list_del (&page->page_list);
+                       kfree (page);
+               } else
+                       pool_free_page (pool, page);
+       }
+       spin_unlock_irqrestore (&pool->lock, flags);
+       kfree (pool);
+}
+
+
+/**
+ * pci_pool_alloc - get a block of consistent memory
+ * @pool: pci pool that will produce the block
+ * @mem_flags: SLAB_KERNEL or SLAB_ATOMIC
+ * @handle: pointer to dma address of block
+ *
+ * This returns the kernel virtual address of a currently unused block,
+ * and reports its dma address through the handle.
+ * If such a memory block can't be allocated, null is returned.
+ */
+void *
+pci_pool_alloc (struct pci_pool *pool, int mem_flags, dma_addr_t *handle)
+{
+       unsigned long           flags;
+       struct list_head        *entry;
+       struct pci_page         *page;
+       int                     map, block;
+       size_t                  offset;
+       void                    *retval;
+
+restart:
+       spin_lock_irqsave (&pool->lock, flags);
+       list_for_each (entry, &pool->page_list) {
+               int             i;
+               page = list_entry (entry, struct pci_page, page_list);
+               /* only cachable accesses here ... */
+               for (map = 0, i = 0;
+                               i < pool->blocks_per_page;
+                               i += BITS_PER_LONG, map++) {
+                       if (page->bitmap [map] == 0)
+                               continue;
+                       block = ffz (~ page->bitmap [map]);
+                       if ((i + block) < pool->blocks_per_page) {
+                               clear_bit (block, &page->bitmap [map]);
+                               offset = (BITS_PER_LONG * map) + block;
+                               offset *= pool->size;
+                               goto ready;
+                       }
+               }
+       }
+       if (!(page = pool_alloc_page (pool, mem_flags))) {
+               if (mem_flags == SLAB_KERNEL) {
+                       DECLARE_WAITQUEUE (wait, current);
+
+                       current->state = TASK_INTERRUPTIBLE;
+                       add_wait_queue (&pool->waitq, &wait);
+                       spin_unlock_irqrestore (&pool->lock, flags);
+
+                       schedule_timeout (POOL_TIMEOUT_JIFFIES);
+
+                       current->state = TASK_RUNNING;
+                       remove_wait_queue (&pool->waitq, &wait);
+                       goto restart;
+               }
+               retval = 0;
+               goto done;
+       }
+
+       clear_bit (0, &page->bitmap [0]);
+       offset = 0;
+ready:
+       retval = offset + page->vaddr;
+       *handle = offset + page->dma;
+done:
+       spin_unlock_irqrestore (&pool->lock, flags);
+       return retval;
+}
+
+
+static struct pci_page *
+pool_find_page (struct pci_pool *pool, dma_addr_t dma)
+{
+       unsigned long           flags;
+       struct list_head        *entry;
+       struct pci_page         *page;
+
+       spin_lock_irqsave (&pool->lock, flags);
+       list_for_each (entry, &pool->page_list) {
+               page = list_entry (entry, struct pci_page, page_list);
+               if (dma < page->dma)
+                       continue;
+               if (dma < (page->dma + pool->allocation))
+                       goto done;
+       }
+       page = 0;
+done:
+       spin_unlock_irqrestore (&pool->lock, flags);
+       return page;
+}
+
+
+/**
+ * pci_pool_free - put block back into pci pool
+ * @pool: the pci pool holding the block
+ * @vaddr: virtual address of block
+ * @dma: dma address of block
+ *
+ * Caller promises neither device nor driver will again touch this block
+ * unless it is first re-allocated.
+ */
+void
+pci_pool_free (struct pci_pool *pool, void *vaddr, dma_addr_t dma)
+{
+       struct pci_page         *page;
+       unsigned long           flags;
+       int                     map, block;
+
+       if ((page = pool_find_page (pool, dma)) == 0) {
+               printk (KERN_ERR "pci_pool_free %s/%s, %p/%x (bad dma)\n",
+                       pool->dev ? pool->dev->slot_name : NULL,
+                       pool->name, vaddr, (int) (dma & 0xffffffff));
+               return;
+       }
+#ifdef CONFIG_PCIPOOL_DEBUG
+       if (((dma - page->dma) + (void *)page->vaddr) != vaddr) {
+               printk (KERN_ERR "pci_pool_free %s/%s, %p (bad vaddr)/%x\n",
+                       pool->dev ? pool->dev->slot_name : NULL,
+                       pool->name, vaddr, (int) (dma & 0xffffffff));
+               return;
+       }
+#endif
+
+       block = dma - page->dma;
+       block /= pool->size;
+       map = block / BITS_PER_LONG;
+       block %= BITS_PER_LONG;
+
+#ifdef CONFIG_PCIPOOL_DEBUG
+       if (page->bitmap [map] & (1UL << block)) {
+               printk (KERN_ERR "pci_pool_free %s/%s, dma %x already free\n",
+                       pool->dev ? pool->dev->slot_name : NULL,
+                       pool->name, dma);
+               return;
+       }
+#endif
+       if (pool->flags & SLAB_POISON)
+               memset (vaddr, POOL_POISON_BYTE, pool->size);
+
+       spin_lock_irqsave (&pool->lock, flags);
+       set_bit (block, &page->bitmap [map]);
+       if (waitqueue_active (&pool->waitq))
+               wake_up (&pool->waitq);
+       /*
+        * Resist a temptation to do
+        *    if (!is_page_busy(bpp, page->bitmap)) pool_free_page(pool, page);
+        * it is not interrupt safe. Better have empty pages hang around.
+        */
+       spin_unlock_irqrestore (&pool->lock, flags);
+}
+
+#endif /* XXX End of PCI pool allocator stuff. */
+
+
+void __devinit  pci_init(void)
+{
+       struct pci_dev *dev;
+
+       pcibios_init();
+
+       pci_for_each_dev(dev) {
+               pci_fixup_device(PCI_FIXUP_FINAL, dev);
+       }
+
+#ifdef CONFIG_PM
+       pm_register(PM_PCI_DEV, 0, pci_pm_callback);
+#endif
+}
+
+static int __devinit  pci_setup(char *str)
+{
+       while (str) {
+               char *k = strchr(str, ',');
+               if (k)
+                       *k++ = 0;
+               if (*str && (str = pcibios_setup(str)) && *str) {
+                       /* PCI layer options should be handled here */
+                       printk(KERN_ERR "PCI: Unknown option `%s'\n", str);
+               }
+               str = k;
+       }
+       return 1;
+}
+
+__setup("pci=", pci_setup);
+
+EXPORT_SYMBOL(pci_read_config_byte);
+EXPORT_SYMBOL(pci_read_config_word);
+EXPORT_SYMBOL(pci_read_config_dword);
+EXPORT_SYMBOL(pci_write_config_byte);
+EXPORT_SYMBOL(pci_write_config_word);
+EXPORT_SYMBOL(pci_write_config_dword);
+EXPORT_SYMBOL(pci_devices);
+EXPORT_SYMBOL(pci_root_buses);
+EXPORT_SYMBOL(pci_enable_device);
+EXPORT_SYMBOL(pci_disable_device);
+EXPORT_SYMBOL(pci_find_capability);
+EXPORT_SYMBOL(pci_release_regions);
+EXPORT_SYMBOL(pci_request_regions);
+EXPORT_SYMBOL(pci_find_class);
+EXPORT_SYMBOL(pci_find_device);
+EXPORT_SYMBOL(pci_find_slot);
+EXPORT_SYMBOL(pci_find_subsys);
+EXPORT_SYMBOL(pci_set_master);
+EXPORT_SYMBOL(pci_set_dma_mask);
+EXPORT_SYMBOL(pci_dac_set_dma_mask);
+EXPORT_SYMBOL(pci_assign_resource);
+EXPORT_SYMBOL(pci_register_driver);
+EXPORT_SYMBOL(pci_unregister_driver);
+EXPORT_SYMBOL(pci_dev_driver);
+EXPORT_SYMBOL(pci_match_device);
+EXPORT_SYMBOL(pci_find_parent_resource);
+
+#ifdef CONFIG_HOTPLUG
+EXPORT_SYMBOL(pci_setup_device);
+EXPORT_SYMBOL(pci_insert_device);
+EXPORT_SYMBOL(pci_remove_device);
+EXPORT_SYMBOL(pci_announce_device_to_drivers);
+EXPORT_SYMBOL(pci_add_new_bus);
+EXPORT_SYMBOL(pci_do_scan_bus);
+EXPORT_SYMBOL(pci_scan_slot);
+EXPORT_SYMBOL(pci_proc_attach_device);
+EXPORT_SYMBOL(pci_proc_detach_device);
+EXPORT_SYMBOL(pci_proc_attach_bus);
+EXPORT_SYMBOL(pci_proc_detach_bus);
+#endif
+
+EXPORT_SYMBOL(pci_set_power_state);
+EXPORT_SYMBOL(pci_save_state);
+EXPORT_SYMBOL(pci_restore_state);
+EXPORT_SYMBOL(pci_enable_wake);
+
+/* Obsolete functions */
+
+EXPORT_SYMBOL(pcibios_present);
+EXPORT_SYMBOL(pcibios_read_config_byte);
+EXPORT_SYMBOL(pcibios_read_config_word);
+EXPORT_SYMBOL(pcibios_read_config_dword);
+EXPORT_SYMBOL(pcibios_write_config_byte);
+EXPORT_SYMBOL(pcibios_write_config_word);
+EXPORT_SYMBOL(pcibios_write_config_dword);
+EXPORT_SYMBOL(pcibios_find_class);
+EXPORT_SYMBOL(pcibios_find_device);
+
+/* Quirk info */
+
+EXPORT_SYMBOL(isa_dma_bridge_buggy);
+EXPORT_SYMBOL(pci_pci_problems);
+
+#if 0
+/* Pool allocator */
+
+EXPORT_SYMBOL (pci_pool_create);
+EXPORT_SYMBOL (pci_pool_destroy);
+EXPORT_SYMBOL (pci_pool_alloc);
+EXPORT_SYMBOL (pci_pool_free);
+
+#endif
diff --git a/xen-2.4.16/drivers/pci/pci.ids b/xen-2.4.16/drivers/pci/pci.ids
new file mode 100644 (file)
index 0000000..c73f0bd
--- /dev/null
@@ -0,0 +1,5445 @@
+#
+#      List of PCI ID's
+#
+#      Maintained by Martin Mares <mj@ucw.cz> and other volunteers from the
+#      Linux PCI ID's Project at http://pciids.sf.net/. New data are always
+#      welcome (if they are accurate), we're eagerly expecting new entries,
+#      so if you have anything to contribute, please visit the home page or
+#      send a diff -u against the most recent pci.ids to pci-ids@ucw.cz.
+#
+#      $Id: pci.ids,v 1.24 2001/10/28 21:55:26 mares Exp $
+#
+
+# Vendors, devices and subsystems. Please keep sorted.
+
+# Syntax:
+# vendor  vendor_name
+#      device  device_name                             <-- single tab
+#              subvendor subdevice  subsystem_name     <-- two tabs
+
+0000  Gammagraphx, Inc.
+001a  Ascend Communications, Inc.
+0033  Paradyne corp.
+003d  Lockheed Martin-Marietta Corp
+0070  Hauppauge computer works Inc.
+0100  Ncipher Corp Ltd
+0675  Dynalink
+       1700  IS64PH ISDN Adapter
+       1702  IS64PH ISDN Adapter
+0a89  BREA Technologies Inc
+0e11  Compaq Computer Corporation
+       0001  PCI to EISA Bridge
+       0002  PCI to ISA Bridge
+       0049  NC7132 Gigabit Upgrade Module
+       004a  NC6136 Gigabit Server Adapter
+       0508  Netelligent 4/16 Token Ring
+       1000  Triflex/Pentium Bridge, Model 1000
+       2000  Triflex/Pentium Bridge, Model 2000
+       3032  QVision 1280/p
+       3033  QVision 1280/p
+       3034  QVision 1280/p
+       4000  4000 [Triflex]
+       6010  HotPlug PCI Bridge 6010
+       7020  USB Controller
+       a0ec  Fibre Channel Host Controller
+       a0f0  Advanced System Management Controller
+       a0f3  Triflex PCI to ISA Bridge
+       a0f7  PCI Hotplug Controller
+               8086 002a  PCI Hotplug Controller A
+               8086 002b  PCI Hotplug Controller B
+       a0f8  USB Open Host Controller
+       a0fc  Fibre Channel Host Controller
+       ae10  Smart-2/P RAID Controller
+               0e11 4030  Smart-2/P Array Controller
+               0e11 4031  Smart-2SL Array Controller
+               0e11 4032  Smart Array Controller
+               0e11 4033  Smart 3100ES Array Controller
+       ae29  MIS-L
+       ae2a  MPC
+       ae2b  MIS-E
+       ae31  System Management Controller
+       ae32  Netelligent 10/100
+       ae33  Triflex Dual EIDE Controller
+       ae34  Netelligent 10
+       ae35  Integrated NetFlex-3/P
+       ae40  Netelligent 10/100 Dual
+       ae43  ProLiant Integrated Netelligent 10/100
+       ae69  CETUS-L
+       ae6c  Northstar
+       ae6d  NorthStar CPU to PCI Bridge
+       b011  Integrated Netelligent 10/100
+       b012  Netelligent 10 T/2
+       b01e  NC3120 Fast Ethernet NIC
+       b01f  NC3122 Fast Ethernet NIC
+       b02f  NC1120 Ethernet NIC
+       b030  Netelligent WS 5100
+       b04a  10/100 TX PCI Intel WOL UTP Controller
+       b060  Smart Array 5300 Controller
+       b0c6  NC3161 Fast Ethernet NIC
+       b0c7  NC3160 Fast Ethernet NIC
+       b0d7  NC3121 Fast Ethernet NIC
+       b0dd  NC3131 Fast Ethernet NIC
+       b0de  NC3132 Fast Ethernet Module
+       b0df  NC6132 Gigabit Module
+       b0e0  NC6133 Gigabit Module
+       b0e1  NC3133 Fast Ethernet Module
+       b123  NC6134 Gigabit NIC
+       b134  NC3163 Fast Ethernet NIC
+       b13c  NC3162 Fast Ethernet NIC
+       b144  NC3123 Fast Ethernet NIC
+       b163  NC3134 Fast Ethernet NIC
+       b164  NC3135 Fast Ethernet Upgrade Module
+       b178  Smart Array 5i/532
+       b1a4  NC7131 Gigabit Server Adapter
+       f130  NetFlex-3/P ThunderLAN 1.0
+       f150  NetFlex-3/P ThunderLAN 2.3
+1000  LSI Logic / Symbios Logic (formerly NCR)
+       0001  53c810
+               1000 1000  8100S
+       0002  53c820
+       0003  53c825
+       0004  53c815
+       0005  53c810AP
+       0006  53c860
+       000a  53c1510
+       000b  53c896
+       000c  53c895
+       000d  53c885
+       000f  53c875
+               0e11 7004  Embedded Ultra Wide SCSI Controller
+               1092 8760  FirePort 40 Dual SCSI Controller
+               1de1 3904  DC390F Ultra Wide SCSI Controller
+       0012  53c895a
+       0020  53c1010 Ultra3 SCSI Adapter
+       0021  53c1010 66MHz  Ultra3 SCSI Adapter
+       0030  53c1030
+       0040  53c1035
+       008f  53c875J
+               1092 8000  FirePort 40 SCSI Controller
+               1092 8760  FirePort 40 Dual SCSI Host Adapter
+       0621  FC909
+       0622  FC929
+       0623  FC929 LAN
+       0624  FC919
+       0625  FC919 LAN
+       0701  83C885
+       0702  Yellowfin G-NIC gigabit ethernet
+               1318 0000  PEI100X
+       0901  61C102
+       1000  63C815
+1001  Initio
+       0010  PCI 1616 Measurement card with 32 digital I/O lines
+       0011  OPTO-PCI Opto-Isolated digital I/O board
+       0012  PCI-AD/DA Analogue I/O board
+       0013  PCI-OPTO-RELAIS Digital I/O board with relay outputs
+       0014  PCI-Counter/Timer Counter Timer board
+       0015  PCI-DAC416 Analogue output board
+       0016  PCI-MFB Analogue I/O board
+       0017  PROTO-3 PCI Prototyping board
+       9100  INI-9100/9100W SCSI Host
+1002  ATI Technologies Inc
+       4158  68800AX [Mach32]
+       4354  215CT [Mach64 CT]
+       4358  210888CX [Mach64 CX]
+       4554  210888ET [Mach64 ET]
+       4654  Mach64 VT
+       4742  3D Rage Pro AGP 1X/2X
+               1028 4082  Optiplex GX1 Onboard Display Adapter
+               8086 4152  Rage 3D Pro AGP
+       4744  3D Rage Pro AGP 1X
+       4747  3D Rage Pro
+       4749  3D Rage Pro
+       474c  Rage XC
+       474d  Rage XL AGP
+       474e  Rage XC AGP
+       474f  Rage XL
+       4750  3D Rage Pro 215GP
+       4751  3D Rage Pro 215GQ
+       4752  Rage XL
+       4753  Rage XC
+       4754  3D Rage I/II 215GT [Mach64 GT]
+       4755  3D Rage II+ 215GTB [Mach64 GTB]
+       4756  3D Rage IIC 215IIC [Mach64 GT IIC]
+       4757  3D Rage IIC AGP
+               1028 0089  Rage 3D IIC
+               1028 4082  Rage 3D IIC
+               1028 8082  Rage 3D IIC
+               1028 c082  Rage 3D IIC
+       4758  210888GX [Mach64 GX]
+       4759  3D Rage IIC
+       475a  3D Rage IIC AGP
+               1002 0087  Rage 3D IIC
+       4c42  3D Rage LT Pro AGP-133
+               0e11 b0e8  Rage 3D LT Pro
+               0e11 b10e  3D Rage LT Pro (Compaq Armada 1750)
+               1028 0085  Rage 3D LT Pro
+       4c44  3D Rage LT Pro AGP-66
+       4c45  Rage Mobility M3 AGP
+       4c46  Rage Mobility M3 AGP 2x
+       4c47  3D Rage LT-G 215LG
+       4c49  3D Rage LT Pro
+       4c4d  Rage Mobility P/M AGP 2x
+       4c4e  Rage Mobility L AGP 2x
+       4c50  3D Rage LT Pro
+       4c51  3D Rage LT Pro
+       4c52  Rage Mobility P/M
+       4c53  Rage Mobility L
+       4c54  264LT [Mach64 LT]
+       4c57  Radeon Mobility M6 LW
+       4c59  Radeon Mobility M6 LY
+       4c5a  Radeon Mobility M6 LZ
+       4d46  Rage Mobility M4 AGP
+       4d4c  Rage Mobility M4 AGP
+       5041  Rage 128 PA
+       5042  Rage 128 PB
+       5043  Rage 128 PC
+       5044  Rage 128 PD
+       5045  Rage 128 PE
+       5046  Rage 128 PF
+               1002 2000  Rage Fury MAXX AGP 4x (TMDS) (VGA device)
+               1002 2001  Rage Fury MAXX AGP 4x (TMDS) (Extra device?!)
+       5047  Rage 128 PG
+       5048  Rage 128 PH
+       5049  Rage 128 PI
+       504a  Rage 128 PJ
+       504b  Rage 128 PK
+       504c  Rage 128 PL
+       504d  Rage 128 PM
+       504e  Rage 128 PN
+       504f  Rage 128 PO
+       5050  Rage 128 PP
+       5051  Rage 128 PQ
+       5052  Rage 128 PR
+       5053  Rage 128 PS
+       5054  Rage 128 PT
+       5055  Rage 128 PU
+       5056  Rage 128 PV
+       5057  Rage 128 PW
+       5058  Rage 128 PX
+       5144  Radeon QD
+       5145  Radeon QE
+       5146  Radeon QF
+       5147  Radeon QG
+       5159  Radeon VE QY
+       515a  Radeon VE QZ
+       5245  Rage 128 RE
+       5246  Rage 128 RF
+       5247  Rage 128 RG
+       524b  Rage 128 RK
+       524c  Rage 128 RL
+       5345  Rage 128 SE
+       5346  Rage 128 SF
+       5347  Rage 128 SG
+       5348  Rage 128 4x
+       534b  Rage 128 SK
+       534c  Rage 128 SL
+       534d  Rage 128 SM
+       534e  Rage 128 SN
+       5354  Mach 64 VT
+               1002 5654  Mach 64 reference
+       5446  Rage 128 Pro TF
+       544c  Rage 128 Pro TL
+       5452  Rage 128 Pro TR
+       5654  264VT [Mach64 VT]
+               1002 5654  Mach64VT Reference
+       5655  264VT3 [Mach64 VT3]
+       5656  264VT4 [Mach64 VT4]
+1003  ULSI Systems
+       0201  US201
+1004  VLSI Technology Inc
+       0005  82C592-FC1
+       0006  82C593-FC1
+       0007  82C594-AFC2
+       0008  82C596/7 [Wildcat]
+       0009  82C597-AFC2
+       000c  82C541 [Lynx]
+       000d  82C543 [Lynx]
+       0101  82C532
+       0102  82C534
+       0103  82C538
+       0104  82C535
+       0105  82C147
+       0200  82C975
+       0280  82C925
+       0304  QSound ThunderBird PCI Audio
+               1004 0304  QSound ThunderBird PCI Audio
+               122d 1206  DSP368 Audio
+               1483 5020  XWave Thunder 3D Audio
+       0305  QSound ThunderBird PCI Audio Gameport
+               1004 0305  QSound ThunderBird PCI Audio Gameport
+               122d 1207  DSP368 Audio Gameport
+               1483 5021  XWave Thunder 3D Audio Gameport
+       0306  QSound ThunderBird PCI Audio Support Registers
+               1004 0306  QSound ThunderBird PCI Audio Support Registers
+               122d 1208  DSP368 Audio Support Registers
+               1483 5022  XWave Thunder 3D Audio Support Registers
+       0702  VAS96011 [Golden Gate II]
+1005  Avance Logic Inc. [ALI]
+       2064  ALG2032/2064
+       2128  ALG2364A
+       2301  ALG2301
+       2302  ALG2302
+       2364  ALG2364
+       2464  ALG2364A
+       2501  ALG2564A/25128A
+1006  Reply Group
+1007  NetFrame Systems Inc
+1008  Epson
+100a  Phoenix Technologies
+100b  National Semiconductor Corporation
+       0001  DP83810
+       0002  87415/87560 IDE
+       000e  87560 Legacy I/O
+       000f  OHCI Compliant FireWire Controller
+       0011  National PCI System I/O
+       0012  USB Controller
+       0020  DP83815 (MacPhyter) Ethernet Controller
+       0022  DP83820 10/100/1000 Ethernet Controller
+       d001  87410 IDE
+100c  Tseng Labs Inc
+       3202  ET4000/W32p rev A
+       3205  ET4000/W32p rev B
+       3206  ET4000/W32p rev C
+       3207  ET4000/W32p rev D
+       3208  ET6000
+       4702  ET6300
+100d  AST Research Inc
+100e  Weitek
+       9000  P9000
+       9001  P9000
+       9100  P9100
+1010  Video Logic, Ltd.
+1011  Digital Equipment Corporation
+       0001  DECchip 21050
+       0002  DECchip 21040 [Tulip]
+       0004  DECchip 21030 [TGA]
+       0007  NVRAM [Zephyr NVRAM]
+       0008  KZPSA [KZPSA]
+       0009  DECchip 21140 [FasterNet]
+               10b8 2001  SMC9332BDT EtherPower 10/100
+               10b8 2002  SMC9332BVT EtherPower T4 10/100
+               10b8 2003  SMC9334BDT EtherPower 10/100 (1-port)
+               1109 2400  ANA-6944A/TX Fast Ethernet
+               1112 2300  RNS2300 Fast Ethernet
+               1112 2320  RNS2320 Fast Ethernet
+               1112 2340  RNS2340 Fast Ethernet
+               1113 1207  EN-1207-TX Fast Ethernet
+               1186 1100  DFE-500TX Fast Ethernet
+               1186 1112  DFE-570TX Fast Ethernet
+               1186 1140  DFE-660 Cardbus Ethernet 10/100
+               1186 1142  DFE-660 Cardbus Ethernet 10/100
+               1282 9100  AEF-380TXD Fast Ethernet
+               1385 1100  FA310TX Fast Ethernet
+               2646 0001  KNE100TX Fast Ethernet
+       000a  21230 Video Codec
+       000d  PBXGB [TGA2]
+       000f  DEFPA
+       0014  DECchip 21041 [Tulip Pass 3]
+               1186 0100  DE-530+
+       0016  DGLPB [OPPO]
+       0019  DECchip 21142/43
+               1011 500b  DE500 Fast Ethernet
+               1014 0001  10/100 EtherJet Cardbus
+               1025 0315  ALN315 Fast Ethernet
+               108d 0016  Rapidfire 2327 10/100 Ethernet
+               10b8 2005  SMC8032DT Extreme Ethernet 10/100
+               10ef 8169  Cardbus Fast Ethernet
+               1109 2a00  ANA-6911A/TX Fast Ethernet
+               1109 2b00  ANA-6911A/TXC Fast Ethernet
+               1109 3000  ANA-6922/TX Fast Ethernet
+               1113 1207  Cheetah Fast Ethernet
+               1113 2220  Cardbus Fast Ethernet
+               115d 0002  Cardbus Ethernet 10/100
+               1179 0203  Fast Ethernet
+               1179 0204  Cardbus Fast Ethernet
+               1186 1100  DFE-500TX Fast Ethernet
+               1186 1101  DFE-500TX Fast Ethernet
+               1186 1102  DFE-500TX Fast Ethernet
+               1266 0004  Eagle Fast EtherMAX
+               12af 0019  NetFlyer Cardbus Fast Ethernet
+               1374 0001  Cardbus Ethernet Card 10/100
+               1395 0001  10/100 Ethernet CardBus PC Card
+               8086 0001  EtherExpress PRO/100 Mobile CardBus 32
+       0021  DECchip 21052
+       0022  DECchip 21150
+       0023  DECchip 21150
+       0024  DECchip 21152
+       0025  DECchip 21153
+       0026  DECchip 21154
+       0045  DECchip 21553
+       0046  DECchip 21554
+               103c 10c2  Hewlett-Packard NetRAID-4M
+               9005 0365  Adaptec 5400S
+               9005 1364  Dell PowerEdge RAID Controller 2
+               9005 1365  Dell PowerEdge RAID Controller 2
+       1065  StrongARM DC21285
+               1069 0020  DAC960P
+1012  Micronics Computers Inc
+1013  Cirrus Logic
+       0038  GD 7548
+       0040  GD 7555 Flat Panel GUI Accelerator
+       004c  GD 7556 Video/Graphics LCD/CRT Ctrlr
+       00a0  GD 5430/40 [Alpine]
+       00a2  GD 5432 [Alpine]
+       00a4  GD 5434-4 [Alpine]
+       00a8  GD 5434-8 [Alpine]
+       00ac  GD 5436 [Alpine]
+       00b0  GD 5440
+       00b8  GD 5446
+       00bc  GD 5480
+               1013 00bc  CL-GD5480
+       00d0  GD 5462
+       00d2  GD 5462 [Laguna I]
+       00d4  GD 5464 [Laguna]
+       00d6  GD 5465 [Laguna]
+       00e8  GD 5436U
+       1100  CL 6729
+       1110  PD 6832
+       1112  PD 6834 PCMCIA/CardBus Ctrlr
+       1113  PD 6833 PCMCIA/CardBus Ctrlr
+       1200  GD 7542 [Nordic]
+       1202  GD 7543 [Viking]
+       1204  GD 7541 [Nordic Light]
+       4400  CD 4400
+       6001  CS 4610/11 [CrystalClear SoundFusion Audio Accelerator]
+               1014 1010  CS4610 SoundFusion Audio Accelerator
+       6003  CS 4614/22/24 [CrystalClear SoundFusion Audio Accelerator]
+               1013 4280  Crystal SoundFusion PCI Audio Accelerator
+       6005  Crystal CS4281 PCI Audio
+               1013 4281  Crystal CS4281 PCI Audio
+               10cf 10a8  Crystal CS4281 PCI Audio
+               10cf 10a9  Crystal CS4281 PCI Audio
+               10cf 10aa  Crystal CS4281 PCI Audio
+               10cf 10ab  Crystal CS4281 PCI Audio
+               10cf 10ac  Crystal CS4281 PCI Audio
+               10cf 10ad  Crystal CS4281 PCI Audio
+               10cf 10b4  Crystal CS4281 PCI Audio
+               1179 0001  Crystal CS4281 PCI Audio
+               14c0 000c  Crystal CS4281 PCI Audio
+1014  IBM
+       0002  PCI to MCA Bridge
+       0005  Alta Lite
+       0007  Alta MP
+       000a  Fire Coral
+       0017  CPU to PCI Bridge
+       0018  TR Auto LANstreamer
+       001b  GXT-150P
+       001d  82G2675
+       0020  MCA
+       0022  IBM27-82351
+       002d  Python
+       002e  ServeRAID-3x
+       0036  Miami
+       003a  CPU to PCI Bridge
+       003e  16/4 Token ring UTP/STP controller
+               1014 003e  Token-Ring Adapter
+               1014 00cd  Token-Ring Adapter + Wake-On-LAN
+               1014 00ce  16/4 Token-Ring Adapter 2
+               1014 00cf  16/4 Token-Ring Adapter Special
+               1014 00e4  High-Speed 100/16/4 Token-Ring Adapter
+               1014 00e5  16/4 Token-Ring Adapter 2 + Wake-On-LAN
+               1014 016d  iSeries 2744 Card
+       0045  SSA Adapter
+       0046  MPIC interrupt controller
+       0047  PCI to PCI Bridge
+       0048  PCI to PCI Bridge
+       0049  Warhead SCSI Controller
+       004e  ATM Controller (14104e00)
+       004f  ATM Controller (14104f00)
+       0050  ATM Controller (14105000)
+       0053  25 MBit ATM Controller
+       0057  MPEG PCI Bridge
+       005c  i82557B 10/100
+       007c  ATM Controller (14107c00)
+       007d  3780IDSP [MWave]
+       0090  GXT 3000P
+               1014 008e  GXT-3000P
+       0095  20H2999 PCI Docking Bridge
+       0096  Chukar chipset SCSI controller
+               1014 0097  iSeries 2778 DASD IOA
+               1014 0098  iSeries 2763 DASD IOA
+               1014 0099  iSeries 2748 DASD IOA
+       00a5  ATM Controller (1410a500)
+       00a6  ATM 155MBPS MM Controller (1410a600)
+       00b7  256-bit Graphics Rasterizer [Fire GL1]
+               1902 00b8  Fire GL1
+       00be  ATM 622MBPS Controller (1410be00)
+       0142  Yotta Video Compositor Input
+               1014 0143  Yotta Input Controller (ytin)
+       0144  Yotta Video Compositor Output
+               1014 0145  Yotta Output Controller (ytout)
+       0156  405GP PLB to PCI Bridge
+       01bd  Netfinity ServeRAID controller
+       01be  ServeRAID-4M
+       01bf  ServeRAID-4L
+       022e  ServeRAID-4H
+       ffff  MPIC-2 interrupt controller
+1015  LSI Logic Corp of Canada
+1016  ICL Personal Systems
+1017  SPEA Software AG
+       5343  SPEA 3D Accelerator
+1018  Unisys Systems
+1019  Elitegroup Computer Systems
+101a  AT&T GIS (NCR)
+       0005  100VG ethernet
+101b  Vitesse Semiconductor
+101c  Western Digital
+       0193  33C193A
+       0196  33C196A
+       0197  33C197A
+       0296  33C296A
+       3193  7193
+       3197  7197
+       3296  33C296A
+       4296  34C296
+       9710  Pipeline 9710
+       9712  Pipeline 9712
+       c24a  90C
+101e  American Megatrends Inc.
+       1960  MegaRAID
+               1028 0471  PowerEdge RAID Controller 3/QC
+               1028 0493  PowerEdge RAID Controller 3/DC
+       9010  MegaRAID
+       9030  EIDE Controller
+       9031  EIDE Controller
+       9032  EIDE & SCSI Controller
+       9033  SCSI Controller
+       9040  Multimedia card
+       9060  MegaRAID
+101f  PictureTel
+1020  Hitachi Computer Products
+1021  OKI Electric Industry Co. Ltd.
+1022  Advanced Micro Devices [AMD]
+       2000  79c970 [PCnet LANCE]
+               103c 104c  Ethernet with LAN remote power Adapter
+               103c 1064  Ethernet with LAN remote power Adapter
+               103c 1065  Ethernet with LAN remote power Adapter
+               103c 106c  Ethernet with LAN remote power Adapter
+               103c 106e  Ethernet with LAN remote power Adapter
+               103c 10ea  Ethernet with LAN remote power Adapter
+               1113 1220  EN1220 10/100 Fast Ethernet
+               1259 2450  AT-2450 10/100 Fast Ethernet
+               1259 2700  AT-2700TX 10/100 Fast Ethernet
+               1259 2701  AT-2700FX 100Mb Ethernet
+       2001  79c978 [HomePNA]
+       2020  53c974 [PCscsi]
+       2040  79c974
+       7006  AMD-751 [Irongate] System Controller
+       7007  AMD-751 [Irongate] AGP Bridge
+       700e  AMD-760 [Irongate] System Controller
+       700f  AMD-760 [Irongate] AGP Bridge
+       7400  AMD-755 [Cobra] ISA
+       7401  AMD-755 [Cobra] IDE
+       7403  AMD-755 [Cobra] ACPI
+       7404  AMD-755 [Cobra] USB
+       7408  AMD-756 [Viper] ISA
+       7409  AMD-756 [Viper] IDE
+       740b  AMD-756 [Viper] ACPI
+       740c  AMD-756 [Viper] USB
+       7410  AMD-765 [Viper] ISA
+       7411  AMD-765 [Viper] IDE
+       7413  AMD-765 [Viper] ACPI
+       7414  AMD-765 [Viper] USB
+       7440  AMD-768 [??] ISA
+       7441  AMD-768 [??] IDE
+       7443  AMD-768 [??] ACPI
+       7448  AMD-768 [??] PCI
+       7449  AMD-768 [??] USB
+1023  Trident Microsystems
+       0194  82C194
+       2000  4DWave DX
+       2001  4DWave NX
+       8400  CyberBlade/i7
+               1023 8400  CyberBlade i7 AGP
+       8420  CyberBlade/i7d
+               0e11 b15a  CyberBlade i7 AGP
+       8500  CyberBlade/i1
+       8520  CyberBlade i1
+               0e11 b16e  CyberBlade i1 AGP
+               1023 8520  CyberBlade i1 AGP
+       9320  TGUI 9320
+       9350  GUI Accelerator
+       9360  Flat panel GUI Accelerator
+       9382  Cyber 9382 [Reference design]
+       9383  Cyber 9383 [Reference design]
+       9385  Cyber 9385 [Reference design]
+       9386  Cyber 9386
+       9388  Cyber 9388
+       9397  Cyber 9397
+       939a  Cyber 9397DVD
+       9420  TGUI 9420
+       9430  TGUI 9430
+       9440  TGUI 9440
+       9460  TGUI 9460
+       9470  TGUI 9470
+       9520  Cyber 9520
+       9525  Cyber 9525
+       9540  Cyber 9540
+       9660  TGUI 9660/968x/968x
+       9680  TGUI 9680
+       9682  TGUI 9682
+       9683  TGUI 9683
+       9685  ProVIDIA 9685
+       9750  3DIm`age 975
+               1014 9750  3DImage 9750
+               1023 9750  3DImage 9750
+       9753  TGUI 9753
+       9754  TGUI 9754
+       9759  TGUI 975
+       9783  TGUI 9783
+       9785  TGUI 9785
+       9850  3DImage 9850
+       9880  Blade 3D PCI/AGP
+               1023 9880  Blade 3D
+       9910  CyberBlade/XP
+       9930  CyberBlade/XPm
+1024  Zenith Data Systems
+1025  Acer Incorporated [ALI]
+       1435  M1435
+       1445  M1445
+       1449  M1449
+       1451  M1451
+       1461  M1461
+       1489  M1489
+       1511  M1511
+       1512  ALI M1512 Aladdin
+       1513  M1513
+       1521  ALI M1521 Aladdin III CPU Bridge
+               10b9 1521  ALI M1521 Aladdin III CPU Bridge
+       1523  ALI M1523 ISA Bridge
+               10b9 1523  ALI M1523 ISA Bridge
+       1531  M1531 Northbridge [Aladdin IV/IV+]
+       1533  M1533 PCI-to-ISA Bridge
+               10b9 1533  ALI M1533 Aladdin IV/V ISA South Bridge
+       1535  M1535 PCI Bridge + Super I/O + FIR
+       1541  M1541 Northbridge [Aladdin V]
+               10b9 1541  ALI M1541 Aladdin V/V+ AGP+PCI North Bridge
+       1542  M1542 Northbridge [Aladdin V]
+       1543  M1543 PCI-to-ISA Bridge + Super I/O + FIR
+       1561  M1561 Northbridge [Aladdin 7]
+       1621  M1621 Northbridge [Aladdin-Pro II]
+       1631  M1631 Northbridge+3D Graphics [Aladdin TNT2]
+       1641  M1641 Northbridge [Aladdin-Pro IV]
+       3141  M3141
+       3143  M3143
+       3145  M3145
+       3147  M3147
+       3149  M3149
+       3151  M3151
+       3307  M3307 MPEG-I Video Controller
+       3309  M3309 MPEG-II Video w/ Software Audio Decoder
+       3321  M3321 MPEG-II Audio/Video Decoder
+       5212  ALI M4803
+       5215  ALI PCI EIDE Controller
+       5217  M5217H
+       5219  M5219
+       5225  M5225
+       5229  M5229
+       5235  M5235
+       5237  ALI M5237 PCI USB Host Controller
+       5240  EIDE Controller
+       5241  PCMCIA Bridge
+       5242  General Purpose Controller
+       5243  PCI to PCI Bridge Controller
+       5244  Floppy Disk Controller
+       5247  ALI M1541 PCI to PCI Bridge
+       5251  M5251 P1394 OHCI Controller
+       5427  ALI PCI to AGP Bridge
+       5451  ALI M5451 PCI AC-Link Controller Audio Device
+       5453  ALI M5453 PCI AC-Link Controller Modem Device
+       7101  ALI M7101 PCI PMU Power Management Controller
+               10b9 7101  ALI M7101 PCI PMU Power Management Controller
+1028  Dell Computer Corporation
+       0001  PowerEdge Expandable RAID Controller 2/Si
+       0002  PowerEdge Expandable RAID Controller 3/Di
+       0003  PowerEdge Expandable RAID Controller 3/Si
+       0004  PowerEdge Expandable RAID Controller 3/Si
+       0005  PowerEdge Expandable RAID Controller 3/Di
+       0006  PowerEdge Expandable RAID Controller 3/Di
+       0008  PowerEdge Expandable RAID Controller 3/Di
+       000a  PowerEdge Expandable RAID Controller 3/Di
+1029  Siemens Nixdorf IS
+102a  LSI Logic
+       0000  HYDRA
+       0010  ASPEN
+102b  Matrox Graphics, Inc.
+# DJ: I've a suspicion that 0010 is a duplicate of 0d10.
+       0010  MGA-I [Impression?]
+       0518  MGA-II [Athena]
+       0519  MGA 2064W [Millennium]
+       051a  MGA 1064SG [Mystique]
+               102b 1100  MGA-1084SG Mystique
+               102b 1200  MGA-1084SG Mystique
+               1100 102b  MGA-1084SG Mystique
+               110a 0018  Scenic Pro C5 (D1025)
+       051b  MGA 2164W [Millennium II]
+               102b 051b  MGA-2164W Millennium II
+               102b 1100  MGA-2164W Millennium II
+               102b 1200  MGA-2164W Millennium II
+       051e  MGA 1064SG [Mystique] AGP
+       051f  MGA 2164W [Millennium II] AGP
+       0520  MGA G200
+               102b dbc2  G200 Multi-Monitor
+               102b dbc8  G200 Multi-Monitor
+               102b dbe2  G200 Multi-Monitor
+               102b dbe8  G200 Multi-Monitor
+               102b ff03  Millennium G200 SD
+               102b ff04  Marvel G200
+       0521  MGA G200 AGP
+               1014 ff03  Millennium G200 AGP
+               102b 48e9  Mystique G200 AGP
+               102b 48f8  Millennium G200 SD AGP
+               102b 4a60  Millennium G200 LE AGP
+               102b 4a64  Millennium G200 AGP
+               102b c93c  Millennium G200 AGP
+               102b c9b0  Millennium G200 AGP
+               102b c9bc  Millennium G200 AGP
+               102b ca60  Millennium G250 LE AGP
+               102b ca6c  Millennium G250 AGP
+               102b dbbc  Millennium G200 AGP
+               102b dbc2  Millennium G200 MMS (Dual G200)
+               102b dbc8  Millennium G200 MMS (Dual G200)
+               102b dbe2  Millennium G200 MMS (Quad G200)
+               102b dbe8  Millennium G200 MMS (Quad G200)
+               102b f806  Mystique G200 Video AGP
+               102b ff00  MGA-G200 AGP
+               102b ff02  Mystique G200 AGP
+               102b ff03  Millennium G200 AGP
+               102b ff04  Marvel G200 AGP
+               110a 0032  MGA-G200 AGP
+       0525  MGA G400 AGP
+               0e11 b16f  Matrox MGA-G400 AGP
+               102b 0328  Millennium G400 16Mb SDRAM
+               102b 0338  Millennium G400 16Mb SDRAM
+               102b 0378  Millennium G400 32Mb SDRAM
+               102b 0541  Millennium G450 Dual Head
+               102b 0542  Millennium G450 Dual Head LX
+               102b 0641  Millennium G450 32Mb SDRAM
+               102b 0642  Millennium G450 32Mb SDRAM Dual Head LX
+               102b 07c0  Millennium G450 Dual Head LE
+               102b 07c1  Millennium G450 SDR Dual Head
+               102b 0d41  Millennium G450 Dual Head PCI
+               102b 0d42  Millennium G450 Dual Head LX PCI
+               102b 0e00  Marvel G450 eTV
+               102b 0e01  Marvel G450 eTV
+               102b 0e02  Marvel G450 eTV
+               102b 0e03  Marvel G450 eTV
+               102b 0f80  Millennium G450 Low Profile
+               102b 0f81  Millennium G450 Low Profile
+               102b 0f82  Millennium G450 Low Profile DVI
+               102b 0f83  Millennium G450 Low Profile DVI
+               102b 19d8  Millennium G400 16Mb SGRAM
+               102b 19f8  Millennium G400 32Mb SGRAM
+               102b 2159  Millennium G400 Dual Head 16Mb
+               102b 2179  Millennium G400 MAX/Dual Head 32Mb
+               102b 217d  Millennium G400 Dual Head Max
+               102b 2f58  Millennium G400
+               102b 2f78  Millennium G400
+               102b 3693  Marvel G400 AGP
+               1705 0001  Millennium G450 32MB SGRAM
+               b16f 0e11  Matrox MGA-G400 AGP
+       0d10  MGA Ultima/Impression
+       1000  MGA G100 [Productiva]
+               102b ff01  Productiva G100
+               102b ff05  Productiva G100 Multi-Monitor
+       1001  MGA G100 [Productiva] AGP
+               102b 1001  MGA-G100 AGP
+               102b ff00  MGA-G100 AGP
+               102b ff01  MGA-G100 Productiva AGP
+               102b ff03  Millennium G100 AGP
+               102b ff04  MGA-G100 AGP
+               102b ff05  MGA-G100 Productiva AGP Multi-Monitor
+               110a 001e  MGA-G100 AGP
+       2007  MGA Mistral
+       2527  MGA G550 AGP
+               102b 0f84  Millennium G550 Dual Head DDR 32Mb
+       4536  VIA Framegrabber
+       6573  Shark 10/100 Multiport SwitchNIC
+102c  Chips and Technologies
+       00b8  F64310
+       00c0  F69000 HiQVideo
+       00d0  F65545
+       00d8  F65545
+       00dc  F65548
+       00e0  F65550
+       00e4  F65554
+       00e5  F65555 HiQVPro
+               0e11 b049  Armada 1700 Laptop Display Controller
+       00f0  F68554
+       00f4  F68554 HiQVision
+       00f5  F68555
+102d  Wyse Technology Inc.
+       50dc  3328 Audio
+102e  Olivetti Advanced Technology
+102f  Toshiba America
+       0009  r4x00
+       0020  ATM Meteor 155
+1030  TMC Research
+1031  Miro Computer Products AG
+       5601  DC20 ASIC
+       5607  Video I/O & motion JPEG compressor
+       5631  Media 3D
+       6057  MiroVideo DC10/DC30+
+1032  Compaq
+1033  NEC Corporation
+       0001  PCI to 486-like bus Bridge
+       0002  PCI to VL98 Bridge
+       0003  ATM Controller
+       0004  R4000 PCI Bridge
+       0005  PCI to 486-like bus Bridge
+       0006  GUI Accelerator
+       0007  PCI to UX-Bus Bridge
+       0008  GUI Accelerator
+       0009  GUI Accelerator for W98
+       001a  [Nile II]
+       0021  Vrc4373 [Nile I]
+       0029  PowerVR PCX1
+       002a  PowerVR 3D
+       0035  USB
+       003e  NAPCCARD Cardbus Controller
+       0046  PowerVR PCX2 [midas]
+       005a  Vrc5074 [Nile 4]
+       0063  Firewarden
+       0067  PowerVR Neon 250 Chipset
+               1010 0020  PowerVR Neon 250 AGP 32Mb
+               1010 0080  PowerVR Neon 250 AGP 16Mb
+               1010 0088  PowerVR Neon 250 16Mb
+               1010 0090  PowerVR Neon 250 AGP 16Mb
+               1010 0098  PowerVR Neon 250 16Mb
+               1010 00a0  PowerVR Neon 250 AGP 32Mb
+               1010 00a8  PowerVR Neon 250 32Mb
+               1010 0120  PowerVR Neon 250 AGP 32Mb
+       0074  56k Voice Modem
+               1033 8014  RCV56ACF 56k Voice Modem
+       009b  Vrc5476
+1034  Framatome Connectors USA Inc.
+1035  Comp. & Comm. Research Lab
+1036  Future Domain Corp.
+       0000  TMC-18C30 [36C70]
+1037  Hitachi Micro Systems
+1038  AMP, Inc
+1039  Silicon Integrated Systems [SiS]
+       0001  5591/5592 AGP
+       0002  SG86C202
+       0006  85C501/2/3
+       0008  85C503/5513
+       0009  ACPI
+       0018  SiS85C503/5513 (LPC Bridge)
+       0200  5597/5598 VGA
+               1039 0000  SiS5597 SVGA (Shared RAM)
+       0204  82C204
+       0205  SG86C205
+       0406  85C501/2
+       0496  85C496
+       0530  530 Host
+       0540  540 Host
+       0597  5513C
+       0601  85C601
+       0620  620 Host
+       0630  630 Host
+       0730  730 Host
+       0735  735 Host
+       0900  SiS900 10/100 Ethernet
+               1039 0900  SiS900 10/100 Ethernet Adapter
+       3602  83C602
+       5107  5107
+       5300  SiS540 PCI Display Adapter
+       5401  486 PCI Chipset
+       5511  5511/5512
+       5513  5513 [IDE]
+               1039 5513  SiS5513 EIDE Controller (A,B step)
+       5517  5517
+       5571  5571
+       5581  5581 Pentium Chipset
+       5582  5582
+       5591  5591/5592 Host
+       5596  5596 Pentium Chipset
+       5597  5597 [SiS5582]
+       5600  5600 Host
+       6204  Video decoder & MPEG interface
+       6205  VGA Controller
+       6236  6236 3D-AGP
+       6300  SiS630 GUI Accelerator+3D
+       6306  6306 3D-AGP
+               1039 6306  SiS530,620 GUI Accelerator+3D
+       6326  86C326
+               1039 6326  SiS6326 GUI Accelerator
+               1092 0a50  SpeedStar A50
+               1092 0a70  SpeedStar A70
+               1092 4910  SpeedStar A70
+               1092 4920  SpeedStar A70
+               1569 6326  SiS6326 GUI Accelerator
+       7001  7001
+       7007  OHCI Compliant FireWire Controller
+       7012  SiS7012 PCI Audio Accelerator
+       7016  SiS7016 10/100 Ethernet Adapter
+               1039 7016  SiS7016 10/100 Ethernet Adapter
+       7018  SiS PCI Audio Accelerator
+               1014 01b6  SiS PCI Audio Accelerator
+               1014 01b7  SiS PCI Audio Accelerator
+               1019 7018  SiS PCI Audio Accelerator
+               1025 000e  SiS PCI Audio Accelerator
+               1025 0018  SiS PCI Audio Accelerator
+               1039 7018  SiS PCI Audio Accelerator
+               1043 800b  SiS PCI Audio Accelerator
+               1054 7018  SiS PCI Audio Accelerator
+               107d 5330  SiS PCI Audio Accelerator
+               107d 5350  SiS PCI Audio Accelerator
+               1170 3209  SiS PCI Audio Accelerator
+               1462 400a  SiS PCI Audio Accelerator
+               14a4 2089  SiS PCI Audio Accelerator
+               14cd 2194  SiS PCI Audio Accelerator
+               14ff 1100  SiS PCI Audio Accelerator
+               152d 8808  SiS PCI Audio Accelerator
+               1558 1103  SiS PCI Audio Accelerator
+               1558 2200  SiS PCI Audio Accelerator
+               1563 7018  SiS PCI Audio Accelerator
+               15c5 0111  SiS PCI Audio Accelerator
+               270f a171  SiS PCI Audio Accelerator
+               a0a0 0022  SiS PCI Audio Accelerator
+103a  Seiko Epson Corporation
+103b  Tatung Co. of America
+103c  Hewlett-Packard Company
+       1005  A4977A Visualize EG
+       1030  J2585A
+       1031  J2585B
+               103c 1040  J2973A DeskDirect 10BaseT NIC
+               103c 1041  J2585B DeskDirect 10/100VG NIC
+               103c 1042  J2970A DeskDirect 10BaseT/2 NIC
+       1040  J2973A DeskDirect 10BaseT NIC
+       1041  J2585B DeskDirect 10/100 NIC
+       1042  J2970A DeskDirect 10BaseT/2 NIC
+       1064  79C970 PCnet Ethernet Controller
+       10c1  NetServer Smart IRQ Router
+       10ed  TopTools Remote Control
+       1200  82557B 10/100 NIC
+       1219  NetServer PCI Hot-Plug Controller
+       121a  NetServer SMIC Controller
+       121b  NetServer Legacy COM Port Decoder
+       121c  NetServer PCI COM Port Decoder
+       2910  E2910A
+       2925  E2925A
+103e  Solliday Engineering
+103f  Synopsys/Logic Modeling Group
+1040  Accelgraphics Inc.
+1041  Computrend
+1042  Micron
+       1000  FDC 37C665
+       1001  37C922
+       3000  Samurai_0
+       3010  Samurai_1
+       3020  Samurai_IDE
+1043  Asustek Computer, Inc.
+1044  Distributed Processing Technology
+       1012  Domino RAID Engine
+       a400  SmartCache/Raid I-IV Controller
+       a500  PCI Bridge
+       a501  SmartRAID V Controller
+1045  OPTi Inc.
+       a0f8  82C750 [Vendetta] USB Controller
+       c101  92C264
+       c178  92C178
+       c556  82X556 [Viper]
+       c557  82C557 [Viper-M]
+       c558  82C558 [Viper-M ISA+IDE]
+       c567  82C750 [Vendetta], device 0
+       c568  82C750 [Vendetta], device 1
+       c569  82C579 [Viper XPress+ Chipset]
+       c621  82C621
+       c700  82C700
+       c701  82C701 [FireStar Plus]
+       c814  82C814 [Firebridge 1]
+       c822  82C822
+       c824  82C824
+       c825  82C825 [Firebridge 2]
+       c832  82C832
+       c861  82C861
+       c895  82C895
+       c935  EV1935 ECTIVA MachOne PCI Audio
+       d568  82C825 [Firebridge 2]
+1046  IPC Corporation, Ltd.
+1047  Genoa Systems Corp
+1048  Elsa AG
+       1000  QuickStep 1000
+       3000  QuickStep 3000
+1049  Fountain Technologies, Inc.
+104a  SGS Thomson Microelectronics
+       0008  STG 2000X
+       0009  STG 1764X
+       1746  STG 1764X
+       3520  MPEG-II decoder card
+104b  BusLogic
+       0140  BT-946C (old) [multimaster  01]
+       1040  BT-946C (BA80C30) [MultiMaster 10]
+       8130  Flashpoint LT
+104c  Texas Instruments
+       0500  100 MBit LAN Controller
+       0508  TMS380C2X Compressor Interface
+       1000  Eagle i/f AS
+       3d04  TVP4010 [Permedia]
+       3d07  TVP4020 [Permedia 2]
+               1092 0127  FIRE GL 1000 PRO
+               1092 0136  FIRE GL 1000 PRO
+               1092 0141  FIRE GL 1000 PRO
+               1092 0146  FIRE GL 1000 PRO
+               1092 0148  FIRE GL 1000 PRO
+               1092 0149  FIRE GL 1000 PRO
+               1092 0152  FIRE GL 1000 PRO
+               1092 0154  FIRE GL 1000 PRO
+               1092 0155  FIRE GL 1000 PRO
+               1092 0156  FIRE GL 1000 PRO
+               1092 0157  FIRE GL 1000 PRO
+               1097 3d01  Jeronimo Pro
+               3d3d 0100  Reference Permedia 2 3D
+       8000  PCILynx/PCILynx2 IEEE 1394 Link Layer Controller
+               e4bf 1010  CF1-1-SNARE
+               e4bf 1020  CF1-2-SNARE
+       8009  OHCI Compliant FireWire Controller
+       8019  TSB12LV23 OHCI Compliant IEEE-1394 Controller
+               11bd 000a  Studio DV500-1394
+               11bd 000e  Studio DV
+               e4bf 1010  CF2-1-CYMBAL
+       a001  TDC1570
+       a100  TDC1561
+       ac10  PCI1050
+       ac11  PCI1053
+       ac12  PCI1130
+       ac13  PCI1031
+       ac15  PCI1131
+       ac16  PCI1250
+       ac17  PCI1220
+       ac18  PCI1260
+       ac19  PCI1221
+       ac1a  PCI1210
+       ac1b  PCI1450
+       ac1c  PCI1225
+       ac1d  PCI1251A
+       ac1e  PCI1211
+       ac1f  PCI1251B
+       ac20  TI 2030
+       ac30  PCI1260 PC card Cardbus Controller
+       ac40  PCI4450 PC card Cardbus Controller
+       ac41  PCI4410 PC card Cardbus Controller
+       ac42  PCI4451 PC card Cardbus Controller
+       ac50  PCI1410 PC card Cardbus Controller
+       ac51  PCI1420
+       ac52  PCI1451 PC card Cardbus Controller
+       ac53  PCI1421 PC card Cardbus Controller
+       fe00  FireWire Host Controller
+       fe03  12C01A FireWire Host Controller
+104d  Sony Corporation
+       8009  CXD1947Q i.LINK Controller
+       8039  CXD3222 i.LINK Controller
+       8056  Rockwell HCF 56K modem
+       808a  Memory Stick Controller
+104e  Oak Technology, Inc
+       0017  OTI-64017
+       0107  OTI-107 [Spitfire]
+       0109  Video Adapter
+       0111  OTI-64111 [Spitfire]
+       0217  OTI-64217
+       0317  OTI-64317
+104f  Co-time Computer Ltd
+1050  Winbond Electronics Corp
+       0000  NE2000
+       0001  W83769F
+       0105  W82C105
+       0840  W89C840
+               1050 0001  W89C840 Ethernet Adapter
+               1050 0840  W89C840 Ethernet Adapter
+       0940  W89C940
+       5a5a  W89C940F
+       9970  W9970CF
+1051  Anigma, Inc.
+1052  ?Young Micro Systems
+1053  Young Micro Systems
+1054  Hitachi, Ltd
+1055  EFAR Microsystems
+       9130  EIDE Controller
+       9460  PCI to ISA Bridge
+       9462  USB Universal Host Controller [OHCI]
+       9463  Power Management Controller [Bridge]
+1056  ICL
+# Motorola made a mistake and used 1507 instead of 1057 in some chips. Please look at the 1507 entry as well when updating this.
+1057  Motorola
+       0001  MPC105 [Eagle]
+       0002  MPC106 [Grackle]
+       0100  MC145575 [HFC-PCI]
+       0431  KTI829c 100VG
+       1801  Audio I/O Controller (MIDI)
+               ecc0 0030  Layla
+       4801  Raven
+       4802  Falcon
+       4803  Hawk
+       4806  CPX8216
+       4d68  20268
+       5600  SM56 PCI Modem
+               1057 0300  SM56 PCI Speakerphone Modem
+               1057 0301  SM56 PCI Voice Modem
+               1057 0302  SM56 PCI Fax Modem
+               1057 5600  SM56 PCI Voice modem
+               13d2 0300  SM56 PCI Speakerphone Modem
+               13d2 0301  SM56 PCI Voice modem
+               13d2 0302  SM56 PCI Fax Modem
+               1436 0300  SM56 PCI Speakerphone Modem
+               1436 0301  SM56 PCI Voice modem
+               1436 0302  SM56 PCI Fax Modem
+               144f 100c  SM56 PCI Fax Modem
+               1494 0300  SM56 PCI Speakerphone Modem
+               1494 0301  SM56 PCI Voice modem
+               14c8 0300  SM56 PCI Speakerphone Modem
+               14c8 0302  SM56 PCI Fax Modem
+               1668 0300  SM56 PCI Speakerphone Modem
+               1668 0302  SM56 PCI Fax Modem
+1058  Electronics & Telecommunications RSH
+1059  Teknor Industrial Computers Inc
+105a  Promise Technology, Inc.
+       0d30  20265
+       4d30  20267
+       4d33  20246
+       4d38  20262
+       4d68  20268
+       6268  20268R
+       4d69  20269
+       5300  DC5300
+105b  Foxconn International, Inc.
+105c  Wipro Infotech Limited
+105d  Number 9 Computer Company
+       2309  Imagine 128
+       2339  Imagine 128-II
+               105d 0000  Imagine 128 series 2 4Mb VRAM
+               105d 0001  Imagine 128 series 2 4Mb VRAM
+               105d 0002  Imagine 128 series 2 4Mb VRAM
+               105d 0003  Imagine 128 series 2 4Mb VRAM
+               105d 0004  Imagine 128 series 2 4Mb VRAM
+               105d 0005  Imagine 128 series 2 4Mb VRAM
+               105d 0006  Imagine 128 series 2 4Mb VRAM
+               105d 0007  Imagine 128 series 2 4Mb VRAM
+               105d 0008  Imagine 128 series 2e 4Mb DRAM
+               105d 0009  Imagine 128 series 2e 4Mb DRAM
+               105d 000a  Imagine 128 series 2 8Mb VRAM
+               105d 000b  Imagine 128 series 2 8Mb H-VRAM
+       493d  Imagine 128 T2R [Ticket to Ride]
+       5348  Revolution 4
+105e  Vtech Computers Ltd
+105f  Infotronic America Inc
+1060  United Microelectronics [UMC]
+       0001  UM82C881
+       0002  UM82C886
+       0101  UM8673F
+       0881  UM8881
+       0886  UM8886F
+       0891  UM8891A
+       1001  UM886A
+       673a  UM8886BF
+       673b  EIDE Master/DMA
+       8710  UM8710
+       886a  UM8886A
+       8881  UM8881F
+       8886  UM8886F
+       888a  UM8886A
+       8891  UM8891A
+       9017  UM9017F
+       9018  UM9018
+       9026  UM9026
+       e881  UM8881N
+       e886  UM8886N
+       e88a  UM8886N
+       e891  UM8891N
+1061  I.I.T.
+       0001  AGX016
+       0002  IIT3204/3501
+1062  Maspar Computer Corp
+1063  Ocean Office Automation
+1064  Alcatel
+1065  Texas Microsystems
+1066  PicoPower Technology
+       0000  PT80C826
+       0001  PT86C52x [Vesuvius]
+       0002  PT80C524 [Nile]
+       0005  National PC87550 System Controller
+       8002  PT80C524 [Nile]
+1067  Mitsubishi Electric
+       1002  VG500 [VolumePro Volume Rendering Accelerator]
+1068  Diversified Technology
+1069  Mylex Corporation
+       0001  DAC960P
+       0002  DAC960PD
+       0010  DAC960PX
+       ba55  eXtremeRAID support Device
+106a  Aten Research Inc
+106b  Apple Computer Inc.
+       0001  Bandit PowerPC host bridge
+       0002  Grand Central I/O
+       0003  Control Video
+       0004  PlanB Video-In
+       0007  O'Hare I/O
+       000e  Hydra Mac I/O
+       0010  Heathrow Mac I/O
+       0017  Paddington Mac I/O
+       0018  UniNorth FireWire
+       0019  KeyLargo USB
+       001e  UniNorth PCI
+       001f  UniNorth PCI
+       0020  UniNorth AGP
+       0021  UniNorth GMAC
+       0022  KeyLargo Mac I/O
+106c  Hyundai Electronics America
+       8801  Dual Pentium ISA/PCI Motherboard
+       8802  PowerPC ISA/PCI Motherboard
+       8803  Dual Window Graphics Accelerator
+       8804  LAN Controller
+       8805  100-BaseT LAN
+106d  Sequent Computer Systems
+106e  DFI, Inc
+106f  City Gate Development Ltd
+1070  Daewoo Telecom Ltd
+1071  Mitac
+1072  GIT Co Ltd
+1073  Yamaha Corporation
+       0001  3D GUI Accelerator
+       0002  YGV615 [RPA3 3D-Graphics Controller]
+       0003  YMF-740
+       0004  YMF-724
+               1073 0004  YMF724-Based PCI Audio Adapter
+       0005  DS1 Audio
+               1073 0005  DS-XG PCI Audio CODEC
+       0006  DS1 Audio
+       0008  DS1 Audio
+               1073 0008  DS-XG PCI Audio CODEC
+       000a  DS1L Audio
+               1073 0004  DS-XG PCI Audio CODEC
+               1073 000a  DS-XG PCI Audio CODEC
+       000c  YMF-740C [DS-1L Audio Controller]
+               107a 000c  DS-XG PCI Audio CODEC
+       000d  YMF-724F [DS-1 Audio Controller]
+               1073 000d  DS-XG PCI Audio CODEC
+       0010  YMF-744B [DS-1S Audio Controller]
+               1073 0006  DS-XG PCI Audio CODEC
+               1073 0010  DS-XG PCI Audio CODEC
+       0012  YMF-754 [DS-1E Audio Controller]
+               1073 0012  DS-XG PCI Audio Codec
+       0020  DS-1 Audio
+       2000  DS2416 Digital Mixing Card
+               1073 2000  DS2416 Digital Mixing Card
+1074  NexGen Microsystems
+       4e78  82c500/1
+1075  Advanced Integrations Research
+1076  Chaintech Computer Co. Ltd
+1077  QLogic Corp.
+       1016  QLA10160
+       1020  ISP1020
+       1022  ISP1022
+       1080  QLA1080
+       1216  QLA12160
+               101e 8471  QLA12160 on AMI MegaRAID
+               101e 8493  QLA12160 on AMI MegaRAID
+       1240  QLA1240
+       1280  QLA1280
+       2020  ISP2020A
+       2100  QLA2100
+       2200  QLA2200
+       2300  QLA2300
+1078  Cyrix Corporation
+       0000  5510 [Grappa]
+       0001  PCI Master
+       0002  5520 [Cognac]
+       0100  5530 Legacy [Kahlua]
+       0101  5530 SMI [Kahlua]
+       0102  5530 IDE [Kahlua]
+       0103  5530 Audio [Kahlua]
+       0104  5530 Video [Kahlua]
+1079  I-Bus
+107a  NetWorth
+107b  Gateway 2000
+107c  LG Electronics [Lucky Goldstar Co. Ltd]
+107d  LeadTek Research Inc.
+       0000  P86C850
+107e  Interphase Corporation
+       0001  ATM Interface Card
+       0002  100 VG AnyLan Controller
+       0008  155 Mbit ATM Controller
+107f  Data Technology Corporation
+       0802  SL82C105
+1080  Contaq Microsystems
+       0600  82C599
+       c691  Cypress CY82C691
+       c693  82c693
+1081  Supermac Technology
+       0d47  Radius PCI to NuBUS Bridge
+1082  EFA Corporation of America
+1083  Forex Computer Corporation
+       0001  FR710
+1084  Parador
+1085  Tulip Computers Int.B.V.
+1086  J. Bond Computer Systems
+1087  Cache Computer
+1088  Microcomputer Systems (M) Son
+1089  Data General Corporation
+108a  Bit3 Computer Corp.
+       0001  VME Bridge Model 617
+       0010  VME Bridge Model 618
+       3000  VME Bridge Model 2706
+108c  Oakleigh Systems Inc.
+108d  Olicom
+       0001  Token-Ring 16/4 PCI Adapter (3136/3137)
+       0002  16/4 Token Ring
+       0004  RapidFire 3139 Token-Ring 16/4 PCI Adapter
+               108d 0004  OC-3139/3140 RapidFire Token-Ring 16/4 Adapter
+       0005  GoCard 3250 Token-Ring 16/4 CardBus PC Card
+       0006  OC-3530 RapidFire Token-Ring 100
+       0007  RapidFire 3141 Token-Ring 16/4 PCI Fiber Adapter
+               108d 0007  OC-3141 RapidFire Token-Ring 16/4 Adapter
+       0008  RapidFire 3540 HSTR 100/16/4 PCI Adapter
+               108d 0008  OC-3540 RapidFire HSTR 100/16/4 Adapter
+       0011  OC-2315
+       0012  OC-2325
+       0013  OC-2183/2185
+       0014  OC-2326
+       0019  OC-2327/2250 10/100 Ethernet Adapter
+               108d 0016  OC-2327 Rapidfire 10/100 Ethernet Adapter
+               108d 0017  OC-2250 GoCard 10/100 Ethernet Adapter
+       0021  OC-6151/6152 [RapidFire ATM 155]
+       0022  ATM Adapter
+108e  Sun Microsystems Computer Corp.
+       0001  EBUS
+       1000  EBUS
+       1001  Happy Meal
+       1100  RIO EBUS
+       1101  RIO GEM
+       1102  RIO 1394
+       1103  RIO USB
+       2bad  GEM
+       5000  Simba Advanced PCI Bridge
+       5043  SunPCI Co-processor
+       8000  Psycho PCI Bus Module
+       8001  Schizo PCI Bus Module
+       a000  Ultra IIi
+       a001  Ultra IIe
+108f  Systemsoft
+1090  Encore Computer Corporation
+1091  Intergraph Corporation
+       0020  3D graphics processor
+       0021  3D graphics processor w/Texturing
+       0040  3D graphics frame buffer
+       0041  3D graphics frame buffer
+       0060  Proprietary bus bridge
+       00e4  Powerstorm 4D50T
+       0720  Motion JPEG codec
+1092  Diamond Multimedia Systems
+       00a0  Speedstar Pro SE
+       00a8  Speedstar 64
+       0550  Viper V550
+       08d4  Supra 2260 Modem
+       1092  Viper V330
+       6120  Maximum DVD
+       8810  Stealth SE
+       8811  Stealth 64/SE
+       8880  Stealth
+       8881  Stealth
+       88b0  Stealth 64
+       88b1  Stealth 64
+       88c0  Stealth 64
+       88c1  Stealth 64
+       88d0  Stealth 64
+       88d1  Stealth 64
+       88f0  Stealth 64
+       88f1  Stealth 64
+       9999  DMD-I0928-1 "Monster sound" sound chip
+1093  National Instruments
+       0160  PCI-DIO-96
+       0162  PCI-MIO-16XE-50
+       1170  PCI-MIO-16XE-10
+       1180  PCI-MIO-16E-1
+       1190  PCI-MIO-16E-4
+       1330  PCI-6031E
+       1350  PCI-6071E
+       2a60  PCI-6023E
+       b001  IMAQ-PCI-1408
+       b011  IMAQ-PXI-1408
+       b021  IMAQ-PCI-1424
+       b031  IMAQ-PCI-1413
+       b041  IMAQ-PCI-1407
+       b051  IMAQ-PXI-1407
+       b061  IMAQ-PCI-1411
+       b071  IMAQ-PCI-1422
+       b081  IMAQ-PXI-1422
+       b091  IMAQ-PXI-1411
+       c801  PCI-GPIB
+1094  First International Computers [FIC]
+1095  CMD Technology Inc
+       0640  PCI0640
+       0643  PCI0643
+       0646  PCI0646
+       0647  PCI0647
+       0648  PCI0648
+       0649  PCI0649
+       0650  PBC0650A
+       0670  USB0670
+       0673  USB0673
+       0680  PCI0680
+1096  Alacron
+1097  Appian Technology
+1098  Quantum Designs (H.K.) Ltd
+       0001  QD-8500
+       0002  QD-8580
+1099  Samsung Electronics Co., Ltd
+109a  Packard Bell
+109b  Gemlight Computer Ltd.
+109c  Megachips Corporation
+109d  Zida Technologies Ltd.
+109e  Brooktree Corporation
+       0350  Bt848 TV with DMA push
+       0351  Bt849A Video capture
+       036c  Bt879(??) Video Capture
+               13e9 0070  Win/TV (Video Section)
+       036e  Bt878
+               0070 13eb  WinTV/GO
+               127a 0001  Bt878 Mediastream Controller NTSC
+               127a 0002  Bt878 Mediastream Controller PAL BG
+               127a 0003  Bt878a Mediastream Controller PAL BG
+               127a 0048  Bt878/832 Mediastream Controller
+               144f 3000  MagicTView CPH060 - Video
+               14f1 0001  Bt878 Mediastream Controller NTSC
+               14f1 0002  Bt878 Mediastream Controller PAL BG
+               14f1 0003  Bt878a Mediastream Controller PAL BG
+               14f1 0048  Bt878/832 Mediastream Controller
+               1851 1850  FlyVideo'98 - Video
+               1851 1851  FlyVideo II
+               1852 1852  FlyVideo'98 - Video (with FM Tuner)
+       036f  Bt879
+               127a 0044  Bt879 Video Capture NTSC
+               127a 0122  Bt879 Video Capture PAL I
+               127a 0144  Bt879 Video Capture NTSC
+               127a 0222  Bt879 Video Capture PAL BG
+               127a 0244  Bt879a Video Capture NTSC
+               127a 0322  Bt879 Video Capture NTSC
+               127a 0422  Bt879 Video Capture NTSC
+               127a 1122  Bt879 Video Capture PAL I
+               127a 1222  Bt879 Video Capture PAL BG
+               127a 1322  Bt879 Video Capture NTSC
+               127a 1522  Bt879a Video Capture PAL I
+               127a 1622  Bt879a Video Capture PAL BG
+               127a 1722  Bt879a Video Capture NTSC
+               14f1 0044  Bt879 Video Capture NTSC
+               14f1 0122  Bt879 Video Capture PAL I
+               14f1 0144  Bt879 Video Capture NTSC
+               14f1 0222  Bt879 Video Capture PAL BG
+               14f1 0244  Bt879a Video Capture NTSC
+               14f1 0322  Bt879 Video Capture NTSC
+               14f1 0422  Bt879 Video Capture NTSC
+               14f1 1122  Bt879 Video Capture PAL I
+               14f1 1222  Bt879 Video Capture PAL BG
+               14f1 1322  Bt879 Video Capture NTSC
+               14f1 1522  Bt879a Video Capture PAL I
+               14f1 1622  Bt879a Video Capture PAL BG
+               14f1 1722  Bt879a Video Capture NTSC
+               1851 1850  FlyVideo'98 - Video
+               1851 1851  FlyVideo II
+               1852 1852  FlyVideo'98 - Video (with FM Tuner)
+       0370  Bt880 Video Capture
+               1851 1850  FlyVideo'98
+               1851 1851  FlyVideo'98 EZ - video
+               1852 1852  FlyVideo'98 (with FM Tuner)
+       0878  Bt878
+               0070 13eb  WinTV/GO
+               127a 0001  Bt878 Video Capture (Audio Section)
+               127a 0002  Bt878 Video Capture (Audio Section)
+               127a 0003  Bt878 Video Capture (Audio Section)
+               127a 0048  Bt878 Video Capture (Audio Section)
+               13e9 0070  Win/TV (Audio Section)
+               144f 3000  MagicTView CPH060 - Audio
+               14f1 0001  Bt878 Video Capture (Audio Section)
+               14f1 0002  Bt878 Video Capture (Audio Section)
+               14f1 0003  Bt878 Video Capture (Audio Section)
+               14f1 0048  Bt878 Video Capture (Audio Section)
+       0879  Bt879 Video Capture (Audio Section)
+               127a 0044  Bt879 Video Capture (Audio Section)
+               127a 0122  Bt879 Video Capture (Audio Section)
+               127a 0144  Bt879 Video Capture (Audio Section)
+               127a 0222  Bt879 Video Capture (Audio Section)
+               127a 0244  Bt879 Video Capture (Audio Section)
+               127a 0322  Bt879 Video Capture (Audio Section)
+               127a 0422  Bt879 Video Capture (Audio Section)
+               127a 1122  Bt879 Video Capture (Audio Section)
+               127a 1222  Bt879 Video Capture (Audio Section)
+               127a 1322  Bt879 Video Capture (Audio Section)
+               127a 1522  Bt879 Video Capture (Audio Section)
+               127a 1622  Bt879 Video Capture (Audio Section)
+               127a 1722  Bt879 Video Capture (Audio Section)
+               14f1 0044  Bt879 Video Capture (Audio Section)
+               14f1 0122  Bt879 Video Capture (Audio Section)
+               14f1 0144  Bt879 Video Capture (Audio Section)
+               14f1 0222  Bt879 Video Capture (Audio Section)
+               14f1 0244  Bt879 Video Capture (Audio Section)
+               14f1 0322  Bt879 Video Capture (Audio Section)
+               14f1 0422  Bt879 Video Capture (Audio Section)
+               14f1 1122  Bt879 Video Capture (Audio Section)
+               14f1 1222  Bt879 Video Capture (Audio Section)
+               14f1 1322  Bt879 Video Capture (Audio Section)
+               14f1 1522  Bt879 Video Capture (Audio Section)
+               14f1 1622  Bt879 Video Capture (Audio Section)
+               14f1 1722  Bt879 Video Capture (Audio Section)
+       0880  Bt880 Video Capture (Audio Section)
+       2115  BtV 2115 Mediastream controller
+       2125  BtV 2125 Mediastream controller
+       2164  BtV 2164
+       2165  BtV 2165
+       8230  Bt8230 ATM Segment/Reassembly Ctrlr (SRC)
+       8472  Bt8472
+       8474  Bt8474
+109f  Trigem Computer Inc.
+10a0  Meidensha Corporation
+10a1  Juko Electronics Ind. Co. Ltd
+10a2  Quantum Corporation
+10a3  Everex Systems Inc
+10a4  Globe Manufacturing Sales
+10a5  Racal Interlan
+10a6  Informtech Industrial Ltd.
+10a7  Benchmarq Microelectronics
+10a8  Sierra Semiconductor
+       0000  STB Horizon 64
+10a9  Silicon Graphics, Inc.
+       0001  Crosstalk to PCI Bridge
+       0002  Linc I/O controller
+       0003  IOC3 I/O controller
+       0004  O2 MACE
+       0005  RAD Audio
+       0006  HPCEX
+       0007  RPCEX
+       0008  DiVO VIP
+       0009  Alteon Gigabit Ethernet
+       0010  AMP Video I/O
+       0011  GRIP
+       0012  SGH PSHAC GSN
+       1001  Magic Carpet
+       1002  Lithium
+       1003  Dual JPEG 1
+       1004  Dual JPEG 2
+       1005  Dual JPEG 3
+       1006  Dual JPEG 4
+       1007  Dual JPEG 5
+       1008  Cesium
+       2001  Fibre Channel
+       2002  ASDE
+       8001  O2 1394
+       8002  G-net NT
+10aa  ACC Microelectronics
+       0000  ACCM 2188
+10ab  Digicom
+10ac  Honeywell IAC
+10ad  Symphony Labs
+       0001  W83769F
+       0003  SL82C103
+       0005  SL82C105
+       0103  SL82c103
+       0105  SL82c105
+       0565  W83C553
+10ae  Cornerstone Technology
+10af  Micro Computer Systems Inc
+10b0  CardExpert Technology
+10b1  Cabletron Systems Inc
+10b2  Raytheon Company
+10b3  Databook Inc
+       3106  DB87144
+       b106  DB87144
+10b4  STB Systems Inc
+       1b1d  Velocity 128 3D
+               10b4 237e  Velocity 4400
+10b5  PLX Technology, Inc.
+       0001  i960 PCI bus interface
+       1076  VScom 800 8 port serial adaptor
+       1077  VScom 400 4 port serial adaptor
+       9036  9036
+       9050  PCI <-> IOBus Bridge
+               10b5 2273  SH-ARC SoHard ARCnet card
+               d84d 4006  EX-4006 1P
+               d84d 4008  EX-4008 1P EPP/ECP
+               d84d 4014  EX-4014 2P
+               d84d 4018  EX-4018 3P EPP/ECP
+               d84d 4025  EX-4025 1S(16C550) RS-232
+               d84d 4027  EX-4027 1S(16C650) RS-232
+               d84d 4028  EX-4028 1S(16C850) RS-232
+               d84d 4036  EX-4036 2S(16C650) RS-232
+               d84d 4037  EX-4037 2S(16C650) RS-232
+               d84d 4038  EX-4038 2S(16C850) RS-232
+               d84d 4052  EX-4052 1S(16C550) RS-422/485
+               d84d 4053  EX-4053 2S(16C550) RS-422/485
+               d84d 4055  EX-4055 4S(16C550) RS-232
+               d84d 4058  EX-4055 4S(16C650) RS-232
+               d84d 4065  EX-4065 8S(16C550) RS-232
+               d84d 4068  EX-4068 8S(16C650) RS-232
+               d84d 4078  EX-4078 2S(16C552) RS-232+1P
+       9060  9060
+       906d  9060SD
+               125c 0640  Aries 16000P
+       906e  9060ES
+       9080  9080
+               10b5 9080  9080 [real subsystem ID not set]
+       a001  GTEK Jetport II 2 port serial adaptor
+       c001  GTEK Cyclone 16/32 port serial adaptor
+10b6  Madge Networks
+       0001  Smart 16/4 PCI Ringnode
+       0002  Smart 16/4 PCI Ringnode Mk2
+               10b6 0002  Smart 16/4 PCI Ringnode Mk2
+               10b6 0006  16/4 CardBus Adapter
+       0003  Smart 16/4 PCI Ringnode Mk3
+               0e11 b0fd  Compaq NC4621 PCI, 4/16, WOL
+               10b6 0003  Smart 16/4 PCI Ringnode Mk3
+               10b6 0007  Presto PCI Plus Adapter
+       0004  Smart 16/4 PCI Ringnode Mk1
+       0006  16/4 Cardbus Adapter
+               10b6 0006  16/4 CardBus Adapter
+       0007  Presto PCI Adapter
+               10b6 0007  Presto PCI
+       0009  Smart 100/16/4 PCI-HS Ringnode
+               10b6 0009  Smart 100/16/4 PCI-HS Ringnode
+       000a  Smart 100/16/4 PCI Ringnode
+               10b6 000a  Smart 100/16/4 PCI Ringnode
+       000b  16/4 CardBus Adapter Mk2
+               10b6 000b  16/4 Cardbus Adapter Mk2
+       1000  Collage 25 ATM Adapter
+       1001  Collage 155 ATM Server Adapter
+10b7  3Com Corporation
+       0001  3c985 1000BaseSX
+       3390  Token Link Velocity
+       3590  3c359 TokenLink Velocity XL
+               10b7 3590  TokenLink Velocity XL Adapter
+       4500  3c450 Cyclone/unknown
+       5055  3c555 Laptop Hurricane
+       5057  3c575 [Megahertz] 10/100 LAN CardBus
+               10b7 5a57  3C575 Megahertz 10/100 LAN Cardbus PC Card
+       5157  3c575 [Megahertz] 10/100 LAN CardBus
+               10b7 5b57  3C575 Megahertz 10/100 LAN Cardbus PC Card
+       5257  3CCFE575CT Cyclone CardBus
+       5900  3c590 10BaseT [Vortex]
+       5920  3c592 EISA 10mbps Demon/Vortex
+       5950  3c595 100BaseTX [Vortex]
+       5951  3c595 100BaseT4 [Vortex]
+       5952  3c595 100Base-MII [Vortex]
+       5970  3c597 EISA Fast Demon/Vortex
+       5b57  3c595 [Megahertz] 10/100 LAN CardBus
+               10b7 5b57  3C575 Megahertz 10/100 LAN Cardbus PC Card
+       6055  3c556 Hurricane CardBus
+       6056  3c556B Hurricane CardBus
+       6560  3CCFE656 Cyclone CardBus
+       6562  3CCFEM656 [id 6562] Cyclone CardBus
+       6564  3CCFEM656 [id 6564] Cyclone CardBus
+       7646  3cSOHO100-TX Hurricane
+       8811  Token ring
+       9000  3c900 10BaseT [Boomerang]
+       9001  3c900 Combo [Boomerang]
+       9004  3c900B-TPO [Etherlink XL TPO]
+               10b7 9004  3C900B-TPO Etherlink XL TPO 10Mb
+       9005  3c900B-Combo [Etherlink XL Combo]
+               10b7 9005  3C900B-Combo Etherlink XL Combo
+       9006  3c900B-TPC [Etherlink XL TPC]
+       900a  3c900B-FL [Etherlink XL FL]
+       9050  3c905 100BaseTX [Boomerang]
+       9051  3c905 100BaseT4
+       9055  3c905B 100BaseTX [Cyclone]
+               1028 0080  3C905B Fast Etherlink XL 10/100
+               1028 0081  3C905B Fast Etherlink XL 10/100
+               1028 0082  3C905B Fast Etherlink XL 10/100
+               1028 0083  3C905B Fast Etherlink XL 10/100
+               1028 0084  3C905B Fast Etherlink XL 10/100
+               1028 0085  3C905B Fast Etherlink XL 10/100
+               1028 0086  3C905B Fast Etherlink XL 10/100
+               1028 0087  3C905B Fast Etherlink XL 10/100
+               1028 0088  3C905B Fast Etherlink XL 10/100
+               1028 0089  3C905B Fast Etherlink XL 10/100
+               1028 0090  3C905B Fast Etherlink XL 10/100
+               1028 0091  3C905B Fast Etherlink XL 10/100
+               1028 0092  3C905B Fast Etherlink XL 10/100
+               1028 0093  3C905B Fast Etherlink XL 10/100
+               1028 0094  3C905B Fast Etherlink XL 10/100
+               1028 0095  3C905B Fast Etherlink XL 10/100
+               1028 0096  3C905B Fast Etherlink XL 10/100
+               1028 0097  3C905B Fast Etherlink XL 10/100
+               1028 0098  3C905B Fast Etherlink XL 10/100
+               1028 0099  3C905B Fast Etherlink XL 10/100
+               10b7 9055  3C905B Fast Etherlink XL 10/100
+       9056  3c905B-T4
+       9058  3c905B-Combo [Deluxe Etherlink XL 10/100]
+       905a  3c905B-FX [Fast Etherlink XL FX 10/100]
+       9200  3c905C-TX [Fast Etherlink]
+               10b7 1000  3C905C-TX Fast Etherlink for PC Management NIC
+       9800  3c980-TX [Fast Etherlink XL Server Adapter]
+               10b7 9800  3c980-TX Fast Etherlink XL Server Adapter
+       9805  3c980-TX 10/100baseTX NIC [Python-T]
+               10b7 9805  3c980 10/100baseTX NIC [Python-T]
+10b8  Standard Microsystems Corp [SMC]
+       0005  83C170QF
+               1055 e000  LANEPIC
+               1055 e002  LANEPIC
+               10b8 a011  EtherPower II 10/100
+               10b8 a014  EtherPower II 10/100
+               10b8 a015  EtherPower II 10/100
+               10b8 a016  EtherPower II 10/100
+               10b8 a017  EtherPower II 10/100
+       0006  LANEPIC
+               1055 e100  LANEPIC Cardbus Fast Ethernet Adapter
+               1055 e102  LANEPIC Cardbus Fast Ethernet Adapter
+               1055 e300  LANEPIC Cardbus Fast Ethernet Adapter
+               1055 e302  LANEPIC Cardbus Fast Ethernet Adapter
+               10b8 a012  LANEPIC Cardbus Fast Ethernet Adapter
+               13a2 8002  LANEPIC Cardbus Fast Ethernet Adapter
+               13a2 8006  LANEPIC Cardbus Fast Ethernet Adapter
+       1000  FDC 37c665
+       1001  FDC 37C922
+       a011  83C170QF
+       b106  SMC34C90
+10b9  Acer Laboratories Inc. [ALi]
+       0111  C-Media CMI8738/C3DX Audio Device (OEM)
+               10b9 0111  C-Media CMI8738/C3DX Audio Device (OEM)
+       1435  M1435
+       1445  M1445
+       1449  M1449
+       1451  M1451
+       1461  M1461
+       1489  M1489
+       1511  M1511 [Aladdin]
+       1512  M1512 [Aladdin]
+       1513  M1513 [Aladdin]
+       1521  M1521 [Aladdin III]
+               10b9 1521  ALI M1521 Aladdin III CPU Bridge
+       1523  M1523
+               10b9 1523  ALI M1523 ISA Bridge
+       1531  M1531 [Aladdin IV]
+       1533  M1533 PCI to ISA Bridge [Aladdin IV]
+               10b9 1533  ALI M1533 Aladdin IV ISA Bridge
+       1541  M1541
+               10b9 1541  ALI M1541 Aladdin V/V+ AGP System Controller
+       1543  M1543
+       1621  M1621
+       1631  ALI M1631 PCI North Bridge Aladdin Pro III
+       1641  ALI M1641 PCI North Bridge Aladdin Pro IV
+       3141  M3141
+       3143  M3143
+       3145  M3145
+       3147  M3147
+       3149  M3149
+       3151  M3151
+       3307  M3307
+       3309  M3309
+       5212  M4803
+       5215  MS4803
+       5217  M5217H
+       5219  M5219
+       5225  M5225
+       5229  M5229 IDE
+       5235  M5225
+       5237  M5237 USB
+       5243  M5243
+       5247  M5247
+       5451  M5451 PCI South Bridge Audio
+       7101  M7101 PMU
+               10b9 7101  ALI M7101 Power Management Controller
+10ba  Mitsubishi Electric Corp.
+       0301  AccelGraphics AccelECLIPSE
+10bb  Dapha Electronics Corporation
+10bc  Advanced Logic Research
+10bd  Surecom Technology
+       0e34  NE-34
+10be  Tseng Labs International Co.
+10bf  Most Inc
+10c0  Boca Research Inc.
+10c1  ICM Co., Ltd.
+10c2  Auspex Systems Inc.
+10c3  Samsung Semiconductors, Inc.
+       1100  Smartether100 SC1100 LAN Adapter (i82557B)
+10c4  Award Software International Inc.
+10c5  Xerox Corporation
+10c6  Rambus Inc.
+10c7  Media Vision
+10c8  Neomagic Corporation
+       0001  NM2070 [MagicGraph NM2070]
+       0002  NM2090 [MagicGraph 128V]
+       0003  NM2093 [MagicGraph 128ZV]
+       0004  NM2160 [MagicGraph 128XD]
+               1014 00ba  MagicGraph 128XD
+               1025 1007  MagicGraph 128XD
+               1028 0074  MagicGraph 128XD
+               1028 0075  MagicGraph 128XD
+               1028 007d  MagicGraph 128XD
+               1028 007e  MagicGraph 128XD
+               1033 802f  MagicGraph 128XD
+               104d 801b  MagicGraph 128XD
+               104d 802f  MagicGraph 128XD
+               104d 830b  MagicGraph 128XD
+               10ba 0e00  MagicGraph 128XD
+               10c8 0004  MagicGraph 128XD
+               10cf 1029  MagicGraph 128XD
+               10f7 8308  MagicGraph 128XD
+               10f7 8309  MagicGraph 128XD
+               10f7 830b  MagicGraph 128XD
+               10f7 830d  MagicGraph 128XD
+               10f7 8312  MagicGraph 128XD
+       0005  [MagicMedia 256AV]
+       0006  NM2360 [MagicMedia 256ZX]
+       0016  NM2380 [MagicMedia 256XL+]
+               10c8 0016  MagicMedia 256XL+
+       0025  [MagicMedia 256AV+]
+       0083  [MagicGraph 128ZV Plus]
+       8005  [MagicMedia 256AV Audio]
+               0e11 b0d1  MagicMedia 256AV Audio Device on Discovery
+               0e11 b126  MagicMedia 256AV Audio Device on Durango
+               1014 00dd  MagicMedia 256AV Audio Device on BlackTip Thinkpad
+               1025 1003  MagicMedia 256AV Audio Device on TravelMate 720
+               1028 008f  MagicMedia 256AV Audio Device on Colorado Inspiron
+               103c 0007  MagicMedia 256AV Audio Device on Voyager II
+               103c 0008  MagicMedia 256AV Audio Device on Voyager III
+               103c 000d  MagicMedia 256AV Audio Device on Omnibook 900
+               10c8 8005  MagicMedia 256AV Audio Device on FireAnt
+               110a 8005  MagicMedia 256AV Audio Device
+               14c0 0004  MagicMedia 256AV Audio Device
+       8006  NM2360 [MagicMedia 256ZX Audio]
+10c9  Dataexpert Corporation
+10ca  Fujitsu Microelectr., Inc.
+10cb  Omron Corporation
+10cc  Mentor ARC Inc
+10cd  Advanced System Products, Inc
+       1100  ASC1100
+       1200  ASC1200 [(abp940) Fast SCSI-II]
+       1300  ABP940-U / ABP960-U
+               10cd 1310  ASC1300 SCSI Adapter
+       2300  ABP940-UW
+       2500  ABP940-U2W
+10ce  Radius
+10cf  Citicorp TTI
+       2001  mb86605
+10d0  Fujitsu Limited
+10d1  FuturePlus Systems Corp.
+10d2  Molex Incorporated
+10d3  Jabil Circuit Inc
+10d4  Hualon Microelectronics
+10d5  Autologic Inc.
+10d6  Cetia
+10d7  BCM Advanced Research
+10d8  Advanced Peripherals Labs
+10d9  Macronix, Inc. [MXIC]
+       0512  MX98713
+       0531  MX987x5
+       8625  MX86250
+       8888  MX86200
+10da  Compaq IPG-Austin
+       0508  TC4048 Token Ring 4/16
+       3390  Tl3c3x9
+10db  Rohm LSI Systems, Inc.
+10dc  CERN/ECP/EDU
+       0001  STAR/RD24 SCI-PCI (PMC)
+       0002  TAR/RD24 SCI-PCI (PMC)
+       0021  HIPPI destination
+       0022  HIPPI source
+       10dc  ATT2C15-3 FPGA
+10dd  Evans & Sutherland
+10de  nVidia Corporation
+       0008  EDGE 3D [NV1]
+       0009  EDGE 3D [NV1]
+       0010  Mutara V08 [NV2]
+       0020  Riva TnT [NV04]
+               1043 0200  V3400 TNT
+               1048 0c18  Erazor II SGRAM
+               1092 0550  Viper V550
+               1092 0552  Viper V550
+               1092 4804  Viper V550
+               1092 4808  Viper V550
+               1092 4810  Viper V550
+               1092 4812  Viper V550
+               1092 4815  Viper V550
+               1092 4820  Viper V550 with TV out
+               1092 4822  Viper V550
+               1092 4904  Viper V550
+               1092 4914  Viper V550
+               1092 8225  Viper V550
+               10de 0020  Riva TNT
+               1102 1015  Graphics Blaster CT6710
+               1102 1016  Graphics Blaster RIVA TNT
+       0028  Riva TnT2 [NV5]
+               1043 0200  AGP-V3800 SGRAM
+               1043 0201  AGP-V3800 SDRAM
+               1043 0205  PCI-V3800
+               1043 4000  AGP-V3800PRO
+               1092 4804  Viper V770
+               1092 4a00  Viper V770
+               1092 4a02  Viper V770 Ultra
+               1092 6a02  Viper V770 Ultra
+               1092 7a02  Viper V770 Ultra
+               10de 0005  RIVA TNT2 Pro
+               1102 1020  3D Blaster RIVA TNT2
+               1102 1026  3D Blaster RIVA TNT2 Digital
+               14af 5810  Maxi Gamer Xentor
+       0029  Riva TnT2 Ultra [NV5]
+               1043 0200  AGP-V3800 Deluxe
+               1043 0201  AGP-V3800 Ultra SDRAM
+               1043 0205  PCI-V3800 Ultra
+               1102 1021  3D Blaster RIVA TNT2 Ultra
+               1102 1029  3D Blaster RIVA TNT2 Ultra
+               1102 102f  3D Blaster RIVA TNT2 Ultra
+               14af 5820  Maxi Gamer Xentor 32
+       002a  Riva TnT2 [NV5]
+       002b  Riva TnT2 [NV5]
+       002c  Vanta [NV6]
+               1043 0200  AGP-V3800 Combat SDRAM
+               1043 0201  AGP-V3800 Combat
+               1092 6820  Viper V730
+               1102 1031  CT6938 VANTA 8MB
+               1102 1034  CT6894 VANTA 16MB
+               14af 5008  Maxi Gamer Phoenix 2
+       002d  Vanta [NV6]
+               1043 0200  AGP-V3800M
+               1043 0201  AGP-V3800M
+               1102 1023  CT6892 RIVA TNT2 Value
+               1102 1024  CT6932 RIVA TNT2 Value 32Mb
+               1102 102c  CT6931 RIVA TNT2 Value (Jumper)
+               1462 8808  MSI-8808
+       002e  Vanta [NV6]
+       002f  Vanta [NV6]
+       00a0  Riva TNT2
+               14af 5810  Maxi Gamer Xentor
+       0100  GeForce 256
+               1043 0200  AGP-V6600 SGRAM
+               1043 0201  AGP-V6600 SDRAM
+               1043 4008  AGP-V6600 SGRAM
+               1043 4009  AGP-V6600 SDRAM
+               1102 102d  CT6941 GeForce 256
+       0101  GeForce 256 DDR
+               1043 0202  AGP-V6800 DDR
+               1043 400a  AGP-V6800 DDR SGRAM
+               1043 400b  AGP-V6800 DDR SDRAM
+               1102 102e  CT6971 GeForce 256 DDR
+               14af 5021  3D Prophet DDR-DVI
+       0103  Quadro (GeForce 256 GL)
+       0110  NV11 (GeForce2 MX)
+       0111  NV11 (GeForce2 MX DDR)
+       0112  GeForce2 Go
+       0113  NV11 (GeForce2 MXR)
+       0150  NV15 (GeForce2 Pro)
+               107d 2840  WinFast GeForce2 GTS with TV output
+       0151  NV15 DDR (GeForce2 GTS)
+       0152  NV15 Bladerunner (GeForce2 Ultra)
+       0153  NV15 GL (Quadro2 Pro)
+       0200  NV20 (GeForce3)
+       0203  Quadro DCC
+10df  Emulex Corporation
+       10df  Light Pulse Fibre Channel Adapter
+       1ae5  LP6000 Fibre Channel Host Adapter
+       f700  LP7000 Fibre Channel Host Adapter
+10e0  Integrated Micro Solutions Inc.
+       5026  IMS5026/27/28
+       5027  IMS5027
+       5028  IMS5028
+       8849  IMS8849
+       8853  IMS8853
+       9128  IMS9129 [Twin turbo 128]
+10e1  Tekram Technology Co.,Ltd.
+       0391  TRM-S1040
+               10e1 0391  DC-315U SCSI-3 Host Adapter
+       690c  DC-690c
+       dc29  DC-290
+10e2  Aptix Corporation
+10e3  Tundra Semiconductor Corp.
+       0000  CA91C042 [Universe]
+       0860  CA91C860 [QSpan]
+10e4  Tandem Computers
+10e5  Micro Industries Corporation
+10e6  Gainbery Computer Products Inc.
+10e7  Vadem
+10e8  Applied Micro Circuits Corp.
+       2011  Q-Motion Video Capture/Edit board
+       4750  S5930 [Matchmaker]
+       5920  S5920
+       8043  LANai4.x [Myrinet LANai interface chip]
+       8062  S5933_PARASTATION
+       807d  S5933 [Matchmaker]
+       8088  Kingsberg Spacetec Format Synchronizer
+       8089  Kingsberg Spacetec Serial Output Board
+       809c  S5933_HEPC3
+       80d7  PCI-9112
+       80d9  PCI-9118
+       80da  PCI-9812
+       811a  PCI-IEEE1355-DS-DE Interface
+       8170  S5933 [Matchmaker] (Chipset Development Tool)
+10e9  Alps Electric Co., Ltd.
+10ea  Intergraphics Systems
+       1680  IGA-1680
+       1682  IGA-1682
+       1683  IGA-1683
+       2000  CyberPro 2000
+       2010  CyberPro 2000A
+       5000  CyberPro 5000
+       5050  CyberPro 5050
+10eb  Artists Graphics
+       0101  3GA
+       8111  Twist3 Frame Grabber
+10ec  Realtek Semiconductor Co., Ltd.
+       8029  RTL-8029(AS)
+               10b8 2011  EZ-Card
+               10ec 8029  RT8029(AS)
+               1113 1208  EN1208
+               1186 0300  DE-528
+               1259 2400  AT-2400
+       8129  RTL-8129
+               10ec 8129  RT8129 Fast Ethernet Adapter
+       8138  RT8139 (B/C) Cardbus Fast Ethernet Adapter
+               10ec 8138  RT8139 (B/C) Fast Ethernet Adapter
+       8139  RTL-8139
+               1025 8920  ALN-325
+               1025 8921  ALN-325
+               10bd 0320  EP-320X-R
+               10ec 8139  RT8139
+               1186 1300  DFE-538TX
+               1186 1320  SN5200
+               1259 2500  AT-2500TX
+               1429 d010  ND010
+               1432 9130  EN-9130TX
+               1436 8139  RT8139
+               146c 1439  FE-1439TX
+               1489 6001  GF100TXRII
+               1489 6002  GF100TXRA
+               149c 139a  LFE-8139ATX
+               149c 8139  LFE-8139TX
+               2646 0001  EtheRx
+               8e2e 7000  KF-230TX
+               8e2e 7100  KF-230TX/2
+               a0a0 0007  ALN-325C
+10ed  Ascii Corporation
+       7310  V7310
+10ee  Xilinx, Inc.
+       3fc0  RME Digi96
+       3fc1  RME Digi96/8
+       3fc2  RME Digi96/8 Pro
+       3fc3  RME Digi96/8 Pad
+10ef  Racore Computer Products, Inc.
+       8154  M815x Token Ring Adapter
+10f0  Peritek Corporation
+10f1  Tyan Computer
+10f2  Achme Computer, Inc.
+10f3  Alaris, Inc.
+10f4  S-MOS Systems, Inc.
+10f5  NKK Corporation
+       a001  NDR4000 [NR4600 Bridge]
+10f6  Creative Electronic Systems SA
+10f7  Matsushita Electric Industrial Co., Ltd.
+10f8  Altos India Ltd
+10f9  PC Direct
+10fa  Truevision
+       000c  TARGA 1000
+10fb  Thesys Gesellschaft für Mikroelektronik mbH
+10fc  I-O Data Device, Inc.
+10fd  Soyo Computer, Inc
+10fe  Fast Multimedia AG
+10ff  NCube
+1100  Jazz Multimedia
+1101  Initio Corporation
+       1060  INI-A100U2W
+       9100  INI-9100/9100W
+       9400  INI-940
+       9401  INI-950
+       9500  360P
+1102  Creative Labs
+       0002  SB Live! EMU10k1
+               1102 0020  CT4850 SBLive! Value
+               1102 0021  CT4620 SBLive!
+               1102 002f  SBLive! mainboard implementation
+               1102 4001  E-mu APS
+               1102 8022  CT4780 SBLive! Value
+               1102 8023  CT4790 SoundBlaster PCI512
+               1102 8024  CT4760 SBLive!
+               1102 8025  SBLive! Mainboard Implementation
+               1102 8026  CT4830 SBLive! Value
+               1102 8027  CT4832 SBLive! Value
+               1102 8031  CT4831 SBLive! Value
+               1102 8040  CT4760 SBLive!
+               1102 8051  CT4850 SBLive! Value
+       7002  SB Live!
+               1102 0020  Gameport Joystick
+       8938  ES1371
+1103  Triones Technologies, Inc.
+       0003  HPT343
+       0004  HPT366 / HPT370
+               1103 0005  HPT370 UDMA100
+1104  RasterOps Corp.
+1105  Sigma Designs, Inc.
+       8300  REALmagic Hollywood Plus DVD Decoder
+1106  VIA Technologies, Inc.
+       0305  VT8363/8365 [KT133/KM133]
+       0391  VT8371 [KX133]
+       0501  VT8501 [Apollo MVP4]
+       0505  VT82C505
+       0561  VT82C561
+       0571  Bus Master IDE
+       0576  VT82C576 3V [Apollo Master]
+       0585  VT82C585VP [Apollo VP1/VPX]
+       0586  VT82C586/A/B PCI-to-ISA [Apollo VP]
+               1106 0000  MVP3 ISA Bridge
+       0595  VT82C595 [Apollo VP2]
+       0596  VT82C596 ISA [Mobile South]
+               1106 0000  VT82C596/A/B PCI to ISA Bridge
+               1458 0596  VT82C596/A/B PCI to ISA Bridge
+       0597  VT82C597 [Apollo VP3]
+       0598  VT82C598 [Apollo MVP3]
+       0601  VT8601 [Apollo ProMedia]
+       0605  VT8605 [ProSavage PM133]
+       0680  VT82C680 [Apollo P6]
+       0686  VT82C686 [Apollo Super South]
+               1106 0000  VT82C686/A PCI to ISA Bridge
+               1106 0686  VT82C686/A PCI to ISA Bridge
+       0691  VT82C693A/694x [Apollo PRO133x]
+               1458 0691  VT82C691 Apollo Pro System Controller
+       0693  VT82C693 [Apollo Pro Plus]
+       0698  VT82C693A [Apollo Pro133 AGP]
+       0926  VT82C926 [Amazon]
+       1000  VT82C570MV
+       1106  VT82C570MV
+       1571  VT82C416MV
+       1595  VT82C595/97 [Apollo VP2/97]
+       3038  UHCI USB
+               1234 0925  MVP3 USB Controller
+       3040  VT82C586B ACPI
+       3043  VT86C100A [Rhine 10/100]
+               10bd 0000  VT86C100A Fast Ethernet Adapter
+               1106 0100  VT86C100A Fast Ethernet Adapter
+               1186 1400  DFE-530TX
+       3044  OHCI Compliant IEEE 1394 Host Controller
+       3050  VT82C596 Power Management
+       3051  VT82C596 Power Management
+       3057  VT82C686 [Apollo Super ACPI]
+       3058  AC97 Audio Controller
+               1462 3091  MS-6309 Onboard Audio
+       3059  AC97 Audio Controller
+       3065  Ethernet Controller
+       3068  AC97 Modem Controller
+       3074  VT8233 PCI to ISA Bridge
+       3091  VT8633 [Apollo Pro266]
+       3099  VT8367 [KT266]
+       3109  VT8233C PCI to ISA Bridge
+       5030  VT82C596 ACPI [Apollo PRO]
+       6100  VT85C100A [Rhine II]
+       8231  VT8231 [PCI-to-ISA Bridge]
+       8235  VT8235 Power Management
+       8305  VT8363/8365 [KT133/KM133 AGP]
+       8391  VT8371 [KX133 AGP]
+       8501  VT8501 [Apollo MVP4 AGP]
+       8596  VT82C596 [Apollo PRO AGP]
+       8597  VT82C597 [Apollo VP3 AGP]
+       8598  VT82C598/694x [Apollo MVP3/Pro133x AGP]
+       8601  VT8601 [Apollo ProMedia AGP]
+       8605  VT8605 [PM133 AGP]
+       8691  VT82C691 [Apollo Pro]
+       b091  VT8633 [Apollo Pro266 AGP]
+       b099  VT8367 [KT266 AGP]
+1107  Stratus Computers
+       0576  VIA VT82C570MV [Apollo] (Wrong vendor ID!)
+1108  Proteon, Inc.
+       0100  p1690plus_AA
+       0101  p1690plus_AB
+       0105  P1690Plus
+       0108  P1690Plus
+       0138  P1690Plus
+       0139  P1690Plus
+       013c  P1690Plus
+       013d  P1690Plus
+1109  Cogent Data Technologies, Inc.
+       1400  EM110TX [EX110TX]
+110a  Siemens Nixdorf AG
+       0002  Pirahna 2-port
+       0005  Tulip controller, power management, switch extender
+       2102  DSCC4 WAN adapter
+       4942  FPGA I-Bus Tracer for MBD
+       6120  SZB6120
+110b  Chromatic Research Inc.
+       0001  Mpact Media Processor
+110c  Mini-Max Technology, Inc.
+110d  Znyx Advanced Systems
+110e  CPU Technology
+110f  Ross Technology
+1110  Powerhouse Systems
+       6037  Firepower Powerized SMP I/O ASIC
+       6073  Firepower Powerized SMP I/O ASIC
+1111  Santa Cruz Operation
+# DJ: Some people say that 0x1112 is Rockwell International
+1112  RNS - Div. of Meret Communications Inc
+       2200  FDDI Adapter
+       2300  Fast Ethernet Adapter
+       2340  4 Port Fast Ethernet Adapter
+       2400  ATM Adapter
+1113  Accton Technology Corporation
+       1211  SMC2-1211TX
+               103c 1207  EN-1207D Fast Ethernet Adapter
+               1113 1211  EN-1207D Fast Ethernet Adapter
+       1216  EN-1216 Ethernet Adapter
+       1217  EN-1217 Ethernet Adapter
+       5105  10Mbps Network card
+       9211  EN-1207D Fast Ethernet Adapter
+               1113 9211  EN-1207D Fast Ethernet Adapter
+1114  Atmel Corporation
+1115  3D Labs
+1116  Data Translation
+       0022  DT3001
+       0023  DT3002
+       0024  DT3003
+       0025  DT3004
+       0026  DT3005
+       0027  DT3001-PGL
+       0028  DT3003-PGL
+1117  Datacube, Inc
+       9500  Max-1C SVGA card
+       9501  Max-1C image processing
+1118  Berg Electronics
+1119  ICP Vortex Computersysteme GmbH
+       0000  GDT 6000/6020/6050
+       0001  GDT 6000B/6010
+       0002  GDT 6110/6510
+       0003  GDT 6120/6520
+       0004  GDT 6530
+       0005  GDT 6550
+       0006  GDT 6x17
+       0007  GDT 6x27
+       0008  GDT 6537
+       0009  GDT 6557
+       000a  GDT 6115/6515
+       000b  GDT 6125/6525
+       000c  GDT 6535
+       000d  GDT 6555
+       0100  GDT 6117RP/6517RP
+       0101  GDT 6127RP/6527RP
+       0102  GDT 6537RP
+       0103  GDT 6557RP
+       0104  GDT 6111RP/6511RP
+       0105  GDT 6121RP/6521RP
+       0110  GDT 6117RD/6517RD
+       0111  GDT 6127RD/6527RD
+       0112  GDT 6537RD
+       0113  GDT 6557RD
+       0114  GDT 6111RD/6511RD
+       0115  GDT 6121RD/6521RD
+       0118  GDT 6118RD/6518RD/6618RD
+       0119  GDT 6128RD/6528RD/6628RD
+       011a  GDT 6538RD/6638RD
+       011b  GDT 6558RD/6658RD
+       0120  GDT 6117RP2/6517RP2
+       0121  GDT 6127RP2/6527RP2
+       0122  GDT 6537RP2
+       0123  GDT 6557RP2
+       0124  GDT 6111RP2/6511RP2
+       0125  GDT 6121RP2/6521RP2
+       0136  GDT 6113RS/6513RS
+       0137  GDT 6123RS/6523RS
+       0138  GDT 6118RS/6518RS/6618RS
+       0139  GDT 6128RS/6528RS/6628RS
+       013a  GDT 6538RS/6638RS
+       013b  GDT 6558RS/6658RS
+       013c  GDT 6533RS/6633RS
+       013d  GDT 6543RS/6643RS
+       013e  GDT 6553RS/6653RS
+       013f  GDT 6563RS/6663RS
+       0166  GDT 7113RN/7513RN/7613RN
+       0167  GDT 7123RN/7523RN/7623RN
+       0168  GDT 7118RN/7518RN/7518RN
+       0169  GDT 7128RN/7528RN/7628RN
+       016a  GDT 7538RN/7638RN
+       016b  GDT 7558RN/7658RN
+       016c  GDT 7533RN/7633RN
+       016d  GDT 7543RN/7643RN
+       016e  GDT 7553RN/7653RN
+       016f  GDT 7563RN/7663RN
+       01d6  GDT 4x13RZ
+       01d7  GDT 4x23RZ
+       01f6  GDT 8x13RZ
+       01f7  GDT 8x23RZ
+       01fc  GDT 8x33RZ
+       01fd  GDT 8x43RZ
+       01fe  GDT 8x53RZ
+       01ff  GDT 8x63RZ
+       0210  GDT 6519RD/6619RD
+       0211  GDT 6529RD/6629RD
+       0260  GDT 7519RN/7619RN
+       0261  GDT 7529RN/7629RN
+       0300  GDT Raid Controller
+111a  Efficient Networks, Inc
+       0000  155P-MF1 (FPGA)
+       0002  155P-MF1 (ASIC)
+       0003  ENI-25P ATM
+               111a 0000  ENI-25p Miniport ATM Adapter
+       0005  SpeedStream (LANAI)
+               111a 0001  ENI-3010 ATM
+               111a 0009  ENI-3060 ADSL (VPI=0)
+               111a 0101  ENI-3010 ATM
+               111a 0109  ENI-3060CO ADSL (VPI=0)
+               111a 0809  ENI-3060 ADSL (VPI=0 or 8)
+               111a 0909  ENI-3060CO ADSL (VPI=0 or 8)
+               111a 0a09  ENI-3060 ADSL (VPI=<0..15>)
+       0007  SpeedStream ADSL
+               111a 1001  ENI-3061 ADSL [ASIC]
+111b  Teledyne Electronic Systems
+111c  Tricord Systems Inc.
+       0001  Powerbis Bridge
+111d  Integrated Device Tech
+       0001  IDT77211 ATM Adapter
+111e  Eldec
+111f  Precision Digital Images
+       4a47  Precision MX Video engine interface
+       5243  Frame capture bus interface
+1120  EMC Corporation
+1121  Zilog
+1122  Multi-tech Systems, Inc.
+1123  Excellent Design, Inc.
+1124  Leutron Vision AG
+1125  Eurocore
+1126  Vigra
+1127  FORE Systems Inc
+       0200  ForeRunner PCA-200 ATM
+       0210  PCA-200PC
+       0250  ATM
+       0300  PCA-200E
+       0310  ATM
+       0400  ForeRunnerHE ATM Adapter
+1129  Firmworks
+112a  Hermes Electronics Company, Ltd.
+112b  Linotype - Hell AG
+112c  Zenith Data Systems
+112d  Ravicad
+112e  Infomedia Microelectronics Inc.
+112f  Imaging Technology Inc
+       0000  MVC IC-PCI
+       0001  MVC IM-PCI Video frame grabber/processor
+1130  Computervision
+1131  Philips Semiconductors
+       7145  SAA7145
+       7146  SAA7146
+               114b 2003  DVRaptor Video Edit/Capture Card
+               11bd 0006  DV500 Overlay
+               11bd 000a  DV500 Overlay
+1132  Mitel Corp.
+1133  Eicon Technology Corporation
+       7901  EiconCard S90
+       7902  EiconCard S90
+       7911  EiconCard S91
+       7912  EiconCard S91
+       7941  EiconCard S94
+       7942  EiconCard S94
+       b921  EiconCard P92
+       b922  EiconCard P92
+       e001  DIVA 20PRO
+               1133 e001  DIVA Pro 2.0 S/T
+       e002  DIVA 20
+               1133 e002  DIVA 2.0 S/T
+       e003  DIVA 20PRO_U
+               1133 e003  DIVA Pro 2.0 U
+       e004  DIVA 20_U
+               1133 e004  DIVA 2.0 U
+       e010  DIVA Server BRI-2M
+               1133 e010  DIVA Server BRI-2M
+       e014  DIVA Server PRI-30M
+               1133 e014  DIVA Server PRI-30M
+1134  Mercury Computer Systems
+       0001  Raceway Bridge
+1135  Fuji Xerox Co Ltd
+       0001  Printer controller
+1136  Momentum Data Systems
+1137  Cisco Systems Inc
+1138  Ziatech Corporation
+       8905  8905 [STD 32 Bridge]
+1139  Dynamic Pictures, Inc
+       0001  VGA Compatable 3D Graphics
+113a  FWB Inc
+113b  Network Computing Devices
+113c  Cyclone Microsystems, Inc.
+       0000  PCI-9060 i960 Bridge
+       0001  PCI-SDK [PCI i960 Evaluation Platform]
+       0911  PCI-911 [i960Jx-based Intelligent I/O Controller]
+       0912  PCI-912 [i960CF-based Intelligent I/O Controller]
+       0913  PCI-913
+       0914  PCI-914 [I/O Controller w/ secondary PCI bus]
+113d  Leading Edge Products Inc
+113e  Sanyo Electric Co - Computer Engineering Dept
+113f  Equinox Systems, Inc.
+       0808  SST-64P Adapter
+       1010  SST-128P Adapter
+       80c0  SST-16P Adapter
+       80c4  SST-16P Adapter
+       80c8  SST-16P Adapter
+       8888  SST-4P Adapter
+       9090  SST-8P Adapter
+1140  Intervoice Inc
+1141  Crest Microsystem Inc
+1142  Alliance Semiconductor Corporation
+       3210  AP6410
+       6422  ProVideo 6422
+       6424  ProVideo 6424
+       6425  ProMotion AT25
+       643d  ProMotion AT3D
+1143  NetPower, Inc
+1144  Cincinnati Milacron
+       0001  Noservo controller
+1145  Workbit Corporation
+1146  Force Computers
+1147  Interface Corp
+1148  Syskonnect (Schneider & Koch)
+       4000  FDDI Adapter
+               0e11 b03b  Netelligent 100 FDDI DAS Fibre SC
+               0e11 b03c  Netelligent 100 FDDI SAS Fibre SC
+               0e11 b03d  Netelligent 100 FDDI DAS UTP
+               0e11 b03e  Netelligent 100 FDDI SAS UTP
+               0e11 b03f  Netelligent 100 FDDI SAS Fibre MIC
+               1148 5521  FDDI SK-5521 (SK-NET FDDI-UP)
+               1148 5522  FDDI SK-5522 (SK-NET FDDI-UP DAS)
+               1148 5541  FDDI SK-5541 (SK-NET FDDI-FP)
+               1148 5543  FDDI SK-5543 (SK-NET FDDI-LP)
+               1148 5544  FDDI SK-5544 (SK-NET FDDI-LP DAS)
+               1148 5821  FDDI SK-5821 (SK-NET FDDI-UP64)
+               1148 5822  FDDI SK-5822 (SK-NET FDDI-UP64 DAS)
+               1148 5841  FDDI SK-5841 (SK-NET FDDI-FP64)
+               1148 5843  FDDI SK-5843 (SK-NET FDDI-LP64)
+               1148 5844  FDDI SK-5844 (SK-NET FDDI-LP64 DAS)
+       4200  Token Ring adapter
+       4300  Gigabit Ethernet
+               1148 9821  SK-9821 (1000Base-T single link)
+               1148 9822  SK-9822 (1000Base-T dual link)
+               1148 9841  SK-9841 (1000Base-LX single link)
+               1148 9842  SK-9842 (1000Base-LX dual link)
+               1148 9843  SK-9843 (1000Base-SX single link)
+               1148 9844  SK-9844 (1000Base-SX dual link)
+               1148 9861  SK-9861 (1000Base-SX VF45 single link)
+               1148 9862  SK-9862 (1000Base-SX VF45 dual link)
+1149  Win System Corporation
+114a  VMIC
+       7587  VMIVME-7587
+114b  Canopus Co., Ltd
+114c  Annabooks
+114d  IC Corporation
+114e  Nikon Systems Inc
+114f  Digi International
+       0002  AccelePort EPC
+       0003  RightSwitch SE-6
+       0004  AccelePort Xem
+       0005  AccelePort Xr
+       0006  AccelePort Xr,C/X
+       0009  AccelePort Xr/J
+       000a  AccelePort EPC/J
+       000c  DataFirePRIme T1 (1-port)
+       000d  SyncPort 2-Port (x.25/FR)
+       0011  AccelePort 8r EIA-232 (IBM)
+       0012  AccelePort 8r EIA-422
+       0013  AccelePort Xr
+       0014  AccelePort 8r EIA-422
+       0015  AccelePort Xem
+       0016  AccelePort EPC/X
+       0017  AccelePort C/X
+       001a  DataFirePRIme E1 (1-port)
+       001b  AccelePort C/X (IBM)
+       001d  DataFire RAS T1/E1/PRI
+       0023  AccelePort RAS
+       0024  DataFire RAS B4 ST/U
+       0026  AccelePort 4r 920
+       0027  AccelePort Xr 920
+       0034  AccelePort 2r 920
+       0035  DataFire DSP T1/E1/PRI cPCI
+       6001  Avanstar
+1150  Thinking Machines Corp
+1151  JAE Electronics Inc.
+1152  Megatek
+1153  Land Win Electronic Corp
+1154  Melco Inc
+1155  Pine Technology Ltd
+1156  Periscope Engineering
+1157  Avsys Corporation
+1158  Voarx R & D Inc
+       3011  Tokenet/vg 1001/10m anylan
+       9050  Lanfleet/Truevalue
+       9051  Lanfleet/Truevalue
+1159  Mutech Corp
+       0001  MV-1000
+115a  Harlequin Ltd
+115b  Parallax Graphics
+115c  Photron Ltd.
+115d  Xircom
+       0003  Cardbus Ethernet 10/100
+               1014 0181  10/100 EtherJet Cardbus Adapter
+               1014 1181  10/100 EtherJet Cardbus Adapter
+               115d 0181  Cardbus Ethernet 10/100
+               115d 1181  Cardbus Ethernet 10/100
+               8086 8181  EtherExpress PRO/100 Mobile CardBus 32 Adapter
+               8086 9181  EtherExpress PRO/100 Mobile CardBus 32 Adapter
+       0005  Cardbus Ethernet 10/100
+               1014 0182  10/100 EtherJet Cardbus Adapter
+               1014 1182  10/100 EtherJet Cardbus Adapter
+               115d 0182  Cardbus Ethernet 10/100
+               115d 1182  Cardbus Ethernet 10/100
+       0007  Cardbus Ethernet 10/100
+               1014 0182  10/100 EtherJet Cardbus Adapter
+               1014 1182  10/100 EtherJet Cardbus Adapter
+               115d 0182  Cardbus Ethernet 10/100
+               115d 1182  Cardbus Ethernet 10/100
+       000b  Cardbus Ethernet 10/100
+               1014 0183  10/100 EtherJet Cardbus Adapter
+               115d 0183  Cardbus Ethernet 10/100
+       000f  Cardbus Ethernet 10/100
+               1014 0183  10/100 EtherJet Cardbus Adapter
+               115d 0183  Cardbus Ethernet 10/100
+       0101  Cardbus 56k modem
+               115d 1081  Cardbus 56k Modem
+       0103  Cardbus Ethernet + 56k Modem
+               115d 1181  CBEM56G-100 Ethernet + 56k Modem
+               8086 9181  PRO/100 LAN + Modem56 CardBus
+115e  Peer Protocols Inc
+115f  Maxtor Corporation
+1160  Megasoft Inc
+1161  PFU Limited
+1162  OA Laboratory Co Ltd
+1163  Rendition
+       0001  Verite 1000
+       2000  Verite V2000/V2100/V2200
+               1092 2000  Stealth II S220
+1164  Advanced Peripherals Technologies
+1165  Imagraph Corporation
+       0001  Motion TPEG Recorder/Player with audio
+1166  ServerWorks
+       0007  CNB20-LE Host Bridge
+       0008  CNB20HE Host Bridge
+       0009  CNB20LE Host Bridge
+       0010  CIOB30
+       0011  CMIC-HE
+       0200  OSB4 South Bridge
+       0201  CSB5 South Bridge
+       0211  OSB4 IDE Controller
+       0212  CSB5 IDE Controller
+       0220  OSB4/CSB5 OHCI USB Controller
+1167  Mutoh Industries Inc
+1168  Thine Electronics Inc
+1169  Centre for Development of Advanced Computing
+116a  Polaris Communications
+       6100  Bus/Tag Channel
+       6800  Escon Channel
+       7100  Bus/Tag Channel
+       7800  Escon Channel
+116b  Connectware Inc
+116c  Intelligent Resources Integrated Systems
+116d  Martin-Marietta
+116e  Electronics for Imaging
+116f  Workstation Technology
+1170  Inventec Corporation
+1171  Loughborough Sound Images Plc
+1172  Altera Corporation
+1173  Adobe Systems, Inc
+1174  Bridgeport Machines
+1175  Mitron Computer Inc.
+1176  SBE Incorporated
+1177  Silicon Engineering
+1178  Alfa, Inc.
+       afa1  Fast Ethernet Adapter
+1179  Toshiba America Info Systems
+       0404  DVD Decoder card
+       0406  Tecra Video Capture device
+       0407  DVD Decoder card (Version 2)
+       0601  601
+       0603  ToPIC95 PCI to CardBus Bridge for Notebooks
+       060a  ToPIC95
+       060f  ToPIC97
+       0617  ToPIC95 PCI to Cardbus Bridge with ZV Support
+       0618  CPU to PCI and PCI to ISA bridge
+# Claimed to be Lucent DSP1645 [Mars], but that's apparently incorrect. Does anyone know the correct ID?
+       0701  FIR Port
+       0d01  FIR Port Type-DO
+               1179 0001  FIR Port Type-DO
+117a  A-Trend Technology
+117b  L G Electronics, Inc.
+117c  Atto Technology
+117d  Becton & Dickinson
+117e  T/R Systems
+117f  Integrated Circuit Systems
+1180  Ricoh Co Ltd
+       0465  RL5c465
+       0466  RL5c466
+       0475  RL5c475
+       0476  RL5c476 II
+       0477  RL5c477
+       0478  RL5c478
+1181  Telmatics International
+1183  Fujikura Ltd
+1184  Forks Inc
+1185  Dataworld International Ltd
+1186  D-Link System Inc
+       0100  DC21041
+       1002  Sundance Ethernet
+       1300  RTL8139 Ethernet
+       4000  DL2K Ethernet
+1187  Advanced Technology Laboratories, Inc.
+1188  Shima Seiki Manufacturing Ltd.
+1189  Matsushita Electronics Co Ltd
+118a  Hilevel Technology
+118b  Hypertec Pty Limited
+118c  Corollary, Inc
+       0014  PCIB [C-bus II to PCI bus host bridge chip]
+       1117  Intel 8-way XEON Profusion Chipset [Cache Coherency Filter]
+118d  BitFlow Inc
+       0001  Raptor-PCI framegrabber
+       0012  Model 12 Road Runner Frame Grabber
+       0014  Model 14 Road Runner Frame Grabber
+       0024  Model 24 Road Runner Frame Grabber
+       0044  Model 44 Road Runner Frame Grabber
+       0112  Model 12 Road Runner Frame Grabber
+       0114  Model 14 Road Runner Frame Grabber
+       0124  Model 24 Road Runner Frame Grabber
+       0144  Model 44 Road Runner Frame Grabber
+       0212  Model 12 Road Runner Frame Grabber
+       0214  Model 14 Road Runner Frame Grabber
+       0224  Model 24 Road Runner Frame Grabber
+       0244  Model 44 Road Runner Frame Grabber
+       0312  Model 12 Road Runner Frame Grabber
+       0314  Model 14 Road Runner Frame Grabber
+       0324  Model 24 Road Runner Frame Grabber
+       0344  Model 44 Road Runner Frame Grabber
+118e  Hermstedt GmbH
+118f  Green Logic
+1190  Tripace
+       c731  TP-910/920/940 PCI Ultra(Wide) SCSI Adapter
+1191  Artop Electronic Corp
+       0003  SCSI Cache Host Adapter
+       0004  ATP8400
+       0005  ATP850UF
+       0006  ATP860 NO-BIOS
+       0007  ATP860
+       8002  AEC6710 SCSI-2 Host Adapter
+       8010  AEC6712UW SCSI
+       8020  AEC6712U SCSI
+       8030  AEC6712S SCSI
+       8040  AEC6712D SCSI
+       8050  AEC6712SUW SCSI
+1192  Densan Company Ltd
+1193  Zeitnet Inc.
+       0001  1221
+       0002  1225
+1194  Toucan Technology
+1195  Ratoc System Inc
+1196  Hytec Electronics Ltd
+1197  Gage Applied Sciences, Inc.
+1198  Lambda Systems Inc
+1199  Attachmate Corporation
+119a  Mind Share, Inc.
+119b  Omega Micro Inc.
+       1221  82C092G
+119c  Information Technology Inst.
+119d  Bug, Inc. Sapporo Japan
+119e  Fujitsu Microelectronics Ltd.
+       0001  FireStream 155
+       0003  FireStream 50
+119f  Bull HN Information Systems
+11a0  Convex Computer Corporation
+11a1  Hamamatsu Photonics K.K.
+11a2  Sierra Research and Technology
+11a3  Deuretzbacher GmbH & Co. Eng. KG
+11a4  Barco Graphics NV
+11a5  Microunity Systems Eng. Inc
+11a6  Pure Data Ltd.
+11a7  Power Computing Corp.
+11a8  Systech Corp.
+11a9  InnoSys Inc.
+       4240  AMCC S933Q Intelligent Serial Card
+11aa  Actel
+11ab  Galileo Technology Ltd.
+       0146  GT-64010
+       4801  GT-48001
+       f003  GT-64010 Primary Image Piranha Image Generator
+11ac  Canon Information Systems Research Aust.
+11ad  Lite-On Communications Inc
+       0002  LNE100TX
+               11ad 0002  LNE100TX
+               11ad 0003  LNE100TX
+               11ad f003  LNE100TX
+               11ad ffff  LNE100TX
+               1385 f004  FA310TX
+       c115  LNE100TX [Linksys EtherFast 10/100]
+11ae  Aztech System Ltd
+11af  Avid Technology Inc.
+11b0  V3 Semiconductor Inc.
+       0002  V300PSC
+       0292  V292PBC [Am29030/40 Bridge]
+       0960  V96xPBC
+       c960  V96DPC
+11b1  Apricot Computers
+11b2  Eastman Kodak
+11b3  Barr Systems Inc.
+11b4  Leitch Technology International
+11b5  Radstone Technology Plc
+11b6  United Video Corp
+11b7  Motorola
+11b8  XPoint Technologies, Inc
+       0001  Quad PeerMaster
+11b9  Pathlight Technology Inc.
+       c0ed  SSA Controller
+11ba  Videotron Corp
+11bb  Pyramid Technology
+11bc  Network Peripherals Inc
+       0001  NP-PCI
+11bd  Pinnacle Systems Inc.
+11be  International Microcircuits Inc
+11bf  Astrodesign, Inc.
+11c0  Hewlett Packard
+11c1  Lucent Microelectronics
+       0440  56k WinModem
+               0001 0440  LT WinModem 56k Data+Fax+Voice+Dsvd
+               1033 8015  LT WinModem 56k Data+Fax+Voice+Dsvd
+               1033 804f  LT WinModem 56k Data+Fax+Voice+Dsvd
+               10cf 102c  LB LT Modem V.90 56k
+               10cf 104a  BIBLO LT Modem 56k
+               10cf 105f  LB2 LT Modem V.90 56k
+               1179 0001  Internal V.90 Modem
+               11c1 0440  LT WinModem 56k Data+Fax+Voice+Dsvd
+               122d 4101  MDP7800-U Modem
+               13e0 0040  LT WinModem 56k Data+Fax+Voice+Dsvd
+               13e0 0440  LT WinModem 56k Data+Fax+Voice+Dsvd
+               13e0 0441  LT WinModem 56k Data+Fax+Voice+Dsvd
+               13e0 f100  LT WinModem 56k Data+Fax+Voice+Dsvd
+               13e0 f101  LT WinModem 56k Data+Fax+Voice+Dsvd
+               144d 2101  LT56PV Modem
+               149f 0440  LT WinModem 56k Data+Fax+Voice+Dsvd
+       0441  56k WinModem
+               1033 804d  LT WinModem 56k Data+Fax
+               1092 0440  Supra 56i
+               1179 0001  Internal V.90 Modem
+               11c1 0440  LT WinModem 56k Data+Fax
+               11c1 0441  LT WinModem 56k Data+Fax
+               122d 4100  MDP7800-U Modem
+               13e0 0040  LT WinModem 56k Data+Fax
+               13e0 0100  LT WinModem 56k Data+Fax
+               13e0 0410  LT WinModem 56k Data+Fax
+               13e0 0420  TelePath Internet 56k WinModem
+               13e0 0443  LT WinModem 56k Data+Fax
+               1416 9804  CommWave 56k Modem
+               141d 0440  LT WinModem 56k Data+Fax
+               144f 0441  Lucent 56k V.90 DF Modem
+               1468 0441  Presario 56k V.90 DF Modem
+       0442  56k WinModem
+               0001 0440  LT WinModem 56k Data+Fax+Voice+VoiceView+Dsvd
+               11c1 0440  LT WinModem 56k Data+Fax+Voice+VoiceView+Dsvd
+               11c1 0442  LT WinModem 56k Data+Fax+Voice+VoiceView+Dsvd
+               13e0 0412  LT WinModem 56k Data+Fax+Voice+VoiceView+Dsvd
+               13e0 0442  LT WinModem 56k Data+Fax+Voice+VoiceView+Dsvd
+               13fc 2471  LT WinModem 56k Data+Fax+Voice+VoiceView+Dsvd
+               144d 2104  LT56PT Modem
+               149f 0440  LT WinModem 56k Data+Fax+Voice+VoiceView+Dsvd
+               1668 0440  LT WinModem 56k Data+Fax+Voice+VoiceView+Dsvd
+       0443  LT WinModem
+       0444  LT WinModem
+       0445  LT WinModem
+       0446  LT WinModem
+       0447  LT WinModem
+       0448  WinModem 56k
+               13e0 0040  LT WinModem 56k Data+Fax+Voice+Dsvd
+       0449  WinModem 56k
+               0e11 b14d  56k V.90 Modem
+               13e0 0020  LT WinModem 56k Data+Fax
+               13e0 0041  TelePath Internet 56k WinModem
+               144f 0449  Lucent 56k V.90 DFi Modem
+               1468 0449  Presario 56k V.90 DFi Modem
+       044a  F-1156IV WinModem (V90, 56KFlex)
+               13e0 0012  LT WinModem 56k Data+Fax+Voice+VoiceView+Dsvd
+               13e0 0042  LT WinModem 56k Data+Fax+Voice+VoiceView+Dsvd
+               144f 1005  LT WinModem 56k Data+Fax+Voice+VoiceView+Dsvd
+       044b  LT WinModem
+       044c  LT WinModem
+       044d  LT WinModem
+       044e  LT WinModem
+       0450  LT WinModem
+       0451  LT WinModem
+       0452  LT WinModem
+       0453  LT WinModem
+       0454  LT WinModem
+       0455  LT WinModem
+       0456  LT WinModem
+       0457  LT WinModem
+       0458  LT WinModem
+       0459  LT WinModem
+       045a  LT WinModem
+       0480  Venus Modem (V90, 56KFlex)
+11c2  Sand Microelectronics
+11c3  NEC Corp
+11c4  Document Technologies, Inc
+11c5  Shiva Corporation
+11c6  Dainippon Screen Mfg. Co. Ltd
+11c7  D.C.M. Data Systems
+11c8  Dolphin Interconnect Solutions AS
+       0658  PSB32 SCI-Adapter D31x
+       d665  PSB64 SCI-Adapter D32x
+       d667  PSB66 SCI-Adapter D33x
+11c9  Magma
+       0010  16-line serial port w/- DMA
+       0011  4-line serial port w/- DMA
+11ca  LSI Systems, Inc
+11cb  Specialix Research Ltd.
+       2000  PCI_9050
+               11cb 0200  SX
+               11cb b008  I/O8+
+       4000  SUPI_1
+       8000  T225
+11cc  Michels & Kleberhoff Computer GmbH
+11cd  HAL Computer Systems, Inc.
+11ce  Netaccess
+11cf  Pioneer Electronic Corporation
+11d0  Lockheed Martin Federal Systems-Manassas
+11d1  Auravision
+       01f7  VxP524
+11d2  Intercom Inc.
+11d3  Trancell Systems Inc
+11d4  Analog Devices
+       1889  AD1889 sound chip
+11d5  Ikon Corporation
+       0115  10115
+       0117  10117
+11d6  Tekelec Telecom
+11d7  Trenton Technology, Inc.
+11d8  Image Technologies Development
+11d9  TEC Corporation
+11da  Novell
+11db  Sega Enterprises Ltd
+11dc  Questra Corporation
+11dd  Crosfield Electronics Limited
+11de  Zoran Corporation
+       6057  ZR36057PQC Video cutting chipset
+               1031 7efe  DC10 Plus
+               1031 fc00  MiroVIDEO DC50, Motion JPEG Capture/CODEC Board
+               13ca 4231  JPEG/TV Card
+       6120  ZR36120
+               1328 f001  Cinemaster C DVD Decoder
+11df  New Wave PDG
+11e0  Cray Communications A/S
+11e1  GEC Plessey Semi Inc.
+11e2  Samsung Information Systems America
+11e3  Quicklogic Corporation
+11e4  Second Wave Inc
+11e5  IIX Consulting
+11e6  Mitsui-Zosen System Research
+11e7  Toshiba America, Elec. Company
+11e8  Digital Processing Systems Inc.
+11e9  Highwater Designs Ltd.
+11ea  Elsag Bailey
+11eb  Formation Inc.
+11ec  Coreco Inc
+11ed  Mediamatics
+11ee  Dome Imaging Systems Inc
+11ef  Nicolet Technologies B.V.
+11f0  Compu-Shack
+       4231  FDDI
+       4232  FASTline UTP Quattro
+       4233  FASTline FO
+       4234  FASTline UTP
+       4235  FASTline-II UTP
+       4236  FASTline-II FO
+       4731  GIGAline
+11f1  Symbios Logic Inc
+11f2  Picture Tel Japan K.K.
+11f3  Keithley Metrabyte
+11f4  Kinetic Systems Corporation
+       2915  CAMAC controller
+11f5  Computing Devices International
+11f6  Compex
+       0112  ENet100VG4
+       0113  FreedomLine 100
+       1401  ReadyLink 2000
+       2011  RL100-ATX 10/100
+       2201  ReadyLink 100TX (Winbond W89C840)
+               11f6 2011  ReadyLink 100TX
+       9881  RL100TX
+11f7  Scientific Atlanta
+11f8  PMC-Sierra Inc.
+       7375  PM7375 [LASAR-155 ATM SAR]
+11f9  I-Cube Inc
+11fa  Kasan Electronics Company, Ltd.
+11fb  Datel Inc
+11fc  Silicon Magic
+11fd  High Street Consultants
+11fe  Comtrol Corporation
+       0001  RocketPort 8 Oct
+       0002  RocketPort 8 Intf
+       0003  RocketPort 16 Intf
+       0004  RocketPort 32 Intf
+       0005  RocketPort Octacable
+       0006  RocketPort 8J
+       0008  RocketPort 8-port
+       0009  RocketPort 16-port
+       000a  RocketPort Plus Quadcable
+       000b  RocketPort Plus Octacable
+       000c  RocketPort 8-port Modem
+11ff  Scion Corporation
+1200  CSS Corporation
+1201  Vista Controls Corp
+1202  Network General Corp.
+1203  Bayer Corporation, Agfa Division
+1204  Lattice Semiconductor Corporation
+1205  Array Corporation
+1206  Amdahl Corporation
+1208  Parsytec GmbH
+       4853  HS-Link Device
+1209  SCI Systems Inc
+120a  Synaptel
+120b  Adaptive Solutions
+120c  Technical Corp.
+120d  Compression Labs, Inc.
+120e  Cyclades Corporation
+       0100  Cyclom_Y below first megabyte
+       0101  Cyclom_Y above first megabyte
+       0102  Cyclom_4Y below first megabyte
+       0103  Cyclom_4Y above first megabyte
+       0104  Cyclom_8Y below first megabyte
+       0105  Cyclom_8Y above first megabyte
+       0200  Cyclom_Z below first megabyte
+       0201  Cyclom_Z above first megabyte
+       0300  PC300 RX 2
+       0301  PC300 RX 1
+       0310  PC300 TE 2
+       0311  PC300 TE 1
+120f  Essential Communications
+       0001  Roadrunner serial HIPPI
+1210  Hyperparallel Technologies
+1211  Braintech Inc
+1212  Kingston Technology Corp.
+1213  Applied Intelligent Systems, Inc.
+1214  Performance Technologies, Inc.
+1215  Interware Co., Ltd
+1216  Purup Prepress A/S
+1217  O2 Micro, Inc.
+       6729  6729
+       673a  6730
+       6832  6832
+       6836  6836
+       6872  OZ6812 Cardbus Controller
+       6933  OZ6933 Cardbus Controller
+1218  Hybricon Corp.
+1219  First Virtual Corporation
+121a  3Dfx Interactive, Inc.
+       0001  Voodoo
+       0002  Voodoo 2
+       0003  Voodoo Banshee
+               1092 0003  Monster Fusion
+               1092 4000  Monster Fusion
+               1092 4002  Monster Fusion
+               1092 4801  Monster Fusion AGP
+               1092 4803  Monster Fusion AGP
+               1092 8030  Monster Fusion
+               1092 8035  Monster Fusion AGP
+               10b0 0001  Dragon 4000
+               1102 1017  CT6760 3D Blaster Banshee
+               121a 0001  Voodoo Banshee AGP
+               121a 0003  Voodoo Banshee AGP SGRAM
+               121a 0004  Voodoo Banshee
+               139c 0016  Raven
+               139c 0017  Raven
+               14af 0002  Maxi Gamer Phoenix
+               3030 3030  Skywell Magic TwinPower
+       0004  Voodoo Banshee [Velocity 100]
+       0005  Voodoo 3
+               121a 0004  Voodoo3 AGP
+               121a 0030  Voodoo3 AGP
+               121a 0031  Voodoo3 AGP
+               121a 0034  Voodoo3 AGP
+               121a 0036  Voodoo3
+               121a 0037  Voodoo3 AGP
+               121a 0038  Voodoo3 AGP
+               121a 003a  Voodoo3 AGP
+               121a 0044  Voodoo3
+               121a 004b  Velocity 100
+               121a 004c  Velocity 200
+               121a 004d  Voodoo3 AGP
+               121a 004e  Voodoo3 AGP
+               121a 0051  Voodoo3 AGP
+               121a 0052  Voodoo3 AGP
+               121a 0060  Voodoo3 3500 TV (NTSC)
+               121a 0061  Voodoo3 3500 TV (PAL)
+               121a 0062  Voodoo3 3500 TV (SECAM)
+       0009  Voodoo 4 / Voodoo 5
+               121a 0009  Voodoo5 AGP 5500/6000
+121b  Advanced Telecommunications Modules
+121c  Nippon Texaco., Ltd
+121d  Lippert Automationstechnik GmbH
+121e  CSPI
+121f  Arcus Technology, Inc.
+1220  Ariel Corporation
+       1220  AMCC 5933 TMS320C80 DSP/Imaging board
+1221  Contec Co., Ltd
+1222  Ancor Communications, Inc.
+1223  Artesyn Communication Products
+       0003  PM/Link
+       0004  PM/T1
+       0005  PM/E1
+       0008  PM/SLS
+       0009  BajaSpan Resource Target
+       000a  BajaSpan Section 0
+       000b  BajaSpan Section 1
+       000c  BajaSpan Section 2
+       000d  BajaSpan Section 3
+       000e  PM/PPC
+1224  Interactive Images
+1225  Power I/O, Inc.
+1227  Tech-Source
+1228  Norsk Elektro Optikk A/S
+1229  Data Kinesis Inc.
+122a  Integrated Telecom
+122b  LG Industrial Systems Co., Ltd
+122c  Sican GmbH
+122d  Aztech System Ltd
+       1206  368DSP
+       50dc  3328 Audio
+               122d 0001  3328 Audio
+       80da  3328 Audio
+               122d 0001  3328 Audio
+122e  Xyratex
+122f  Andrew Corporation
+1230  Fishcamp Engineering
+1231  Woodward McCoach, Inc.
+1232  GPT Limited
+1233  Bus-Tech, Inc.
+1234  Technical Corp.
+1235  Risq Modular Systems, Inc.
+1236  Sigma Designs Corporation
+       0000  RealMagic64/GX
+       6401  REALmagic 64/GX (SD 6425)
+1237  Alta Technology Corporation
+1238  Adtran
+1239  3DO Company
+123a  Visicom Laboratories, Inc.
+123b  Seeq Technology, Inc.
+123c  Century Systems, Inc.
+123d  Engineering Design Team, Inc.
+       0000  EasyConnect 8/32
+       0002  EasyConnect 8/64
+       0003  EasyIO
+123e  Simutech, Inc.
+123f  C-Cube Microsystems
+       00e4  MPEG
+       8120  E4?
+               11bd 0006  DV500 E4
+               11bd 000a  DV500 E4
+       8888  Cinemaster C 3.0 DVD Decoder
+               1002 0001  Cinemaster C 3.0 DVD Decoder
+               1002 0002  Cinemaster C 3.0 DVD Decoder
+               1328 0001  Cinemaster C 3.0 DVD Decoder
+1240  Marathon Technologies Corp.
+1241  DSC Communications
+1242  Jaycor Networks, Inc.
+       4643  FCI-1063 Fibre Channel Adapter
+1243  Delphax
+1244  AVM Audiovisuelles MKTG & Computer System GmbH
+       0700  B1 ISDN
+       0a00  A1 ISDN [Fritz]
+               1244 0a00  FRITZ!Card ISDN Controller
+1245  A.P.D., S.A.
+1246  Dipix Technologies, Inc.
+1247  Xylon Research, Inc.
+1248  Central Data Corporation
+1249  Samsung Electronics Co., Ltd.
+124a  AEG Electrocom GmbH
+124b  SBS/Greenspring Modular I/O
+124c  Solitron Technologies, Inc.
+124d  Stallion Technologies, Inc.
+       0000  EasyConnection 8/32
+       0002  EasyConnection 8/64
+       0003  EasyIO
+124e  Cylink
+124f  Infotrend Technology, Inc.
+       0041  IFT-2000 Series RAID Controller
+1250  Hitachi Microcomputer System Ltd
+1251  VLSI Solutions Oy
+1253  Guzik Technical Enterprises
+1254  Linear Systems Ltd.
+1255  Optibase Ltd
+       1110  MPEG Forge
+       1210  MPEG Fusion
+       2110  VideoPlex
+       2120  VideoPlex CC
+       2130  VideoQuest
+1256  Perceptive Solutions, Inc.
+       4201  PCI-2220I
+       4401  PCI-2240I
+       5201  PCI-2000
+1257  Vertex Networks, Inc.
+1258  Gilbarco, Inc.
+1259  Allied Telesyn International
+       2560  AT-2560 Fast Ethernet Adapter (i82557B)
+125a  ABB Power Systems
+125b  Asix Electronics Corporation
+       1400  ALFA GFC2204
+125c  Aurora Technologies, Inc.
+125d  ESS Technology
+       0000  ES336H Fax Modem (Early Model)
+       1948  Solo?
+       1968  ES1968 Maestro 2
+               1028 0085  ES1968 Maestro-2 PCI
+               1033 8051  ES1968 Maestro-2 Audiodrive
+       1969  ES1969 Solo-1 Audiodrive
+               1014 0166  ES1969 SOLO-1 AudioDrive on IBM Aptiva Mainboard
+               125d 8888  Solo-1 Audio Adapter
+               525f c888  ES1969 SOLO-1 AudioDrive (+ES1938)
+       1978  ES1978 Maestro 2E
+               1033 803c  ES1978 Maestro-2E Audiodrive
+               1033 8058  ES1978 Maestro-2E Audiodrive
+               1092 4000  Monster Sound MX400
+               1179 0001  ES1978 Maestro-2E Audiodrive
+       1988  ES1988 Allegro-1
+               1092 4100  Sonic Impact S100
+               125d 1988  ESS Allegro-1 Audiodrive
+       1989  ESS Modem
+               125d 1989  ESS Modem
+       1998  ES1983S Maestro-3i PCI Audio Accelerator
+       1999  ES1983S Maestro-3i PCI Modem Accelerator
+       2808  ES336H Fax Modem (Later Model)
+       2838  ES2838/2839 SuperLink Modem
+       2898  ES2898 Modem
+125e  Specialvideo Engineering SRL
+125f  Concurrent Technologies, Inc.
+1260  Harris Semiconductor
+       8130  HMP8130 NTSC/PAL Video Decoder
+       8131  HMP8131 NTSC/PAL Video Decoder
+1261  Matsushita-Kotobuki Electronics Industries, Ltd.
+1262  ES Computer Company, Ltd.
+1263  Sonic Solutions
+1264  Aval Nagasaki Corporation
+1265  Casio Computer Co., Ltd.
+1266  Microdyne Corporation
+       0001  NE10/100 Adapter (i82557B)
+       1910  NE2000Plus (RT8029) Ethernet Adapter
+               1266 1910  NE2000Plus Ethernet Adapter
+1267  S. A. Telecommunications
+       5352  PCR2101
+       5a4b  Telsat Turbo
+1268  Tektronix
+1269  Thomson-CSF/TTM
+126a  Lexmark International, Inc.
+126b  Adax, Inc.
+126c  Northern Telecom
+126d  Splash Technology, Inc.
+126e  Sumitomo Metal Industries, Ltd.
+126f  Silicon Motion, Inc.
+       0710  SM710 LynxEM
+       0712  SM712 LynxEM+
+       0720  SM720 Lynx3DM
+       0810  SM810 LynxE
+       0811  SM811 LynxE
+       0820  SM820 Lynx3D
+       0910  SM910
+1270  Olympus Optical Co., Ltd.
+1271  GW Instruments
+1272  Telematics International
+1273  Hughes Network Systems
+       0002  DirecPC
+1274  Ensoniq
+       1371  ES1371 [AudioPCI-97]
+               0e11 b1a7  ES1371, ES1373 AudioPCI
+               1033 80ac  ES1371, ES1373 AudioPCI
+               1042 1854  Tazer
+               107b 8054  Tabor2
+               1274 1371  Creative Sound Blaster AudioPCI64V, AudioPCI128
+               1462 6470  ES1371, ES1373 AudioPCI On Motherboard MS-6147 1.1A
+               1462 6560  ES1371, ES1373 AudioPCI On Motherboard MS-6156 1.10
+               1462 6630  ES1371, ES1373 AudioPCI On Motherboard MS-6163BX 1.0A
+               1462 6631  ES1371, ES1373 AudioPCI On Motherboard MS-6163VIA 1.0A
+               1462 6632  ES1371, ES1373 AudioPCI On Motherboard MS-6163BX 2.0A
+               1462 6633  ES1371, ES1373 AudioPCI On Motherboard MS-6163VIA 2.0A
+               1462 6820  ES1371, ES1373 AudioPCI On Motherboard MS-6182 1.00
+               1462 6822  ES1371, ES1373 AudioPCI On Motherboard MS-6182 1.00A
+               1462 6830  ES1371, ES1373 AudioPCI On Motherboard MS-6183 1.00
+               1462 6880  ES1371, ES1373 AudioPCI On Motherboard MS-6188 1.00
+               1462 6900  ES1371, ES1373 AudioPCI On Motherboard MS-6190 1.00
+               1462 6910  ES1371, ES1373 AudioPCI On Motherboard MS-6191
+               1462 6930  ES1371, ES1373 AudioPCI On Motherboard MS-6193
+               1462 6990  ES1371, ES1373 AudioPCI On Motherboard MS-6199BX 2.0A
+               1462 6991  ES1371, ES1373 AudioPCI On Motherboard MS-6199VIA 2.0A
+               14a4 2077  ES1371, ES1373 AudioPCI On Motherboard KR639
+               14a4 2105  ES1371, ES1373 AudioPCI On Motherboard MR800
+               14a4 2107  ES1371, ES1373 AudioPCI On Motherboard MR801
+               14a4 2172  ES1371, ES1373 AudioPCI On Motherboard DR739
+               1509 9902  ES1371, ES1373 AudioPCI On Motherboard KW11
+               1509 9903  ES1371, ES1373 AudioPCI On Motherboard KW31
+               1509 9904  ES1371, ES1373 AudioPCI On Motherboard KA11
+               1509 9905  ES1371, ES1373 AudioPCI On Motherboard KC13
+               152d 8801  ES1371, ES1373 AudioPCI On Motherboard CP810E
+               152d 8802  ES1371, ES1373 AudioPCI On Motherboard CP810
+               152d 8803  ES1371, ES1373 AudioPCI On Motherboard P3810E
+               152d 8804  ES1371, ES1373 AudioPCI On Motherboard P3810-S
+               152d 8805  ES1371, ES1373 AudioPCI On Motherboard P3820-S
+               270f 2001  ES1371, ES1373 AudioPCI On Motherboard 6CTR
+               270f 2200  ES1371, ES1373 AudioPCI On Motherboard 6WTX
+               270f 3000  ES1371, ES1373 AudioPCI On Motherboard 6WSV
+               270f 3100  ES1371, ES1373 AudioPCI On Motherboard 6WIV2
+               270f 3102  ES1371, ES1373 AudioPCI On Motherboard 6WIV
+               270f 7060  ES1371, ES1373 AudioPCI On Motherboard 6ASA2
+               8086 4249  ES1371, ES1373 AudioPCI On Motherboard BI440ZX
+               8086 424c  ES1371, ES1373 AudioPCI On Motherboard BL440ZX
+               8086 425a  ES1371, ES1373 AudioPCI On Motherboard BZ440ZX
+               8086 4341  ES1371, ES1373 AudioPCI On Motherboard Cayman
+               8086 4343  ES1371, ES1373 AudioPCI On Motherboard Cape Cod
+               8086 4649  ES1371, ES1373 AudioPCI On Motherboard Fire Island
+               8086 464a  ES1371, ES1373 AudioPCI On Motherboard FJ440ZX
+               8086 4d4f  ES1371, ES1373 AudioPCI On Motherboard Montreal
+               8086 4f43  ES1371, ES1373 AudioPCI On Motherboard OC440LX
+               8086 5243  ES1371, ES1373 AudioPCI On Motherboard RC440BX
+               8086 5352  ES1371, ES1373 AudioPCI On Motherboard SunRiver
+               8086 5643  ES1371, ES1373 AudioPCI On Motherboard Vancouver
+               8086 5753  ES1371, ES1373 AudioPCI On Motherboard WS440BX
+       5000  ES1370 [AudioPCI]
+               4942 4c4c  Creative Sound Blaster AudioPCI128
+       5880  5880 AudioPCI
+               1274 2000  Creative Sound Blaster AudioPCI128
+               1274 5880  Creative Sound Blaster AudioPCI128
+               1462 6880  5880 AudioPCI On Motherboard MS-6188 1.00
+               270f 2001  5880 AudioPCI On Motherboard 6CTR
+               270f 2200  5880 AudioPCI On Motherboard 6WTX
+               270f 7040  5880 AudioPCI On Motherboard 6ATA4
+1275  Network Appliance Corporation
+1276  Switched Network Technologies, Inc.
+1277  Comstream
+1278  Transtech Parallel Systems Ltd.
+1279  Transmeta Corporation
+       0295  Northbridge
+       0395  LongRun Northbridge
+       0396  SDRAM controller
+       0397  BIOS scratchpad
+127a  Rockwell International
+       1002  HCF 56k Data/Fax Modem
+               122d 4002  HPG / MDP3858-U # Aztech
+               122d 4005  MDP3858-E # Aztech
+               122d 4007  MDP3858-A/-NZ # Aztech
+               122d 4012  MDP3858-SA # Aztech
+               122d 4017  MDP3858-W # Aztech
+               122d 4018  MDP3858-W # Aztech
+       1003  HCF 56k Data/Fax Modem
+               0e11 b0bc  229-DF Zephyr # Compaq
+               0e11 b114  229-DF Cheetah # Compaq
+               1033 802b  229-DF # NEC
+               13df 1003  PCI56RX Modem # E-Tech Inc
+               13e0 0117  IBM # GVC
+               13e0 0147  IBM # GVC
+               13e0 0197  IBM # GVC
+               13e0 01c7  IBM # GVC
+               13e0 01f7  IBM # GVC
+               1436 1003  IBM # CIS
+               1436 1103  IBM # CIS
+               1436 1602  Compaq 229-DF Ducati
+       1004  HCF 56k Data/Fax/Voice Modem
+               10cf 1059  Fujitsu 229-DFRT
+       1005  HCF 56k Data/Fax/Voice/Spkp (w/Handset) Modem
+               1033 8029  229-DFSV # NEC
+               1033 8054  Modem # NEC
+               10cf 103c  Fujitsu
+               10cf 1055  Fujitsu 229-DFSV
+               10cf 1056  Fujitsu 229-DFSV
+               122d 4003  MDP3858SP-U # Aztech
+               122d 4006  Packard Bell MDP3858V-E # Aztech
+               122d 4008  MDP3858SP-A/SP-NZ # Aztech
+               122d 4009  MDP3858SP-E # Aztech
+               122d 4010  MDP3858V-U # Aztech
+               122d 4011  MDP3858SP-SA # Aztech
+               122d 4013  MDP3858V-A/V-NZ # Aztech
+               122d 4015  MDP3858SP-W # Aztech
+               122d 4016  MDP3858V-W # Aztech
+               122d 4019  MDP3858V-SA # Aztech
+               13df 1005  PCI56RVP Modem  # E-Tech Inc
+               13e0 0187  IBM # GVC
+               13e0 01a7  IBM # GVC
+               13e0 01b7  IBM # GVC
+               13e0 01d7  IBM # GVC
+               1436 1005  IBM # CIS
+               1436 1105  IBM # CIS
+       1023  HCF 56k Data/Fax Modem
+               122d 4020  Packard Bell MDP3858-WE # Aztech
+               122d 4023  MDP3858-UE # Aztech
+               13e0 0247  IBM # GVC
+               13e0 0297  IBM # GVC
+               13e0 02c7  IBM # GVC
+               1436 1203  IBM # CIS
+               1436 1303  IBM # CIS
+       1024  HCF 56k Data/Fax/Voice Modem
+       1025  HCF 56k Data/Fax/Voice/Spkp (w/Handset) Modem
+               10cf 106a  Fujitsu 235-DFSV
+               122d 4021  Packard Bell MDP3858V-WE # Aztech
+               122d 4022  MDP3858SP-WE # Aztech
+               122d 4024  MDP3858V-UE # Aztech
+               122d 4025  MDP3858SP-UE # Aztech
+       1026  HCF 56k PCI Speakerphone Modem
+       1035  HCF 56k PCI Speakerphone Modem
+       1085  HCF 56k Volcano PCI Modem
+       2005  HCF 56k Data/Fax Modem
+               104d 8044  229-DFSV # Sony
+               104d 8045  229-DFSV # Sony
+               104d 8055  PBE/Aztech 235W-DFSV # Sony
+               104d 8056  235-DFSV # Sony
+               104d 805a  Modem # Sony
+               104d 805f  Modem # Sony
+               104d 8074  Modem # Sony
+       2013  HSF 56k Data/Fax Modem
+               1179 0001  Modem # Toshiba
+               1179 ff00  Modem # Toshiba
+       2014  HSF 56k Data/Fax/Voice Modem
+               10cf 1057  Fujitsu Citicorp III
+               122d 4050  MSP3880-U # Aztech
+               122d 4055  MSP3880-W # Aztech
+       2015  HSF 56k Data/Fax/Voice/Spkp (w/Handset) Modem
+               10cf 1063  Fujitsu
+               10cf 1064  Fujitsu
+               1468 2015  Fujitsu
+       2016  HSF 56k Data/Fax/Voice/Spkp Modem
+               122d 4051  MSP3880V-W # Aztech
+               122d 4052  MSP3880SP-W # Aztech
+               122d 4054  MSP3880V-U # Aztech
+               122d 4056  MSP3880SP-U # Aztech
+               122d 4057  MSP3880SP-A # Aztech
+       4311  Riptide HSF 56k PCI Modem
+               127a 4311  Ring Modular? Riptide HSF RT HP Dom
+               13e0 0210  HP-GVC
+       4320  Riptide PCI Audio Controller
+               1235 4320  Riptide PCI Audio Controller
+       4321  Riptide HCF 56k PCI Modem
+               1235 4321  Hewlett Packard DF
+               1235 4324  Hewlett Packard DF
+               13e0 0210  Hewlett Packard DF
+               144d 2321  Riptide # Samsung
+       4322  Riptide PCI Game Controller
+               1235 4322  Riptide PCI Game Controller
+       8234  RapidFire 616X ATM155 Adapter
+               108d 0027  RapidFire 616X ATM155 Adapter
+127b  Pixera Corporation
+127c  Crosspoint Solutions, Inc.
+127d  Vela Research
+127e  Winnov, L.P.
+127f  Fujifilm
+1280  Photoscript Group Ltd.
+1281  Yokogawa Electric Corporation
+1282  Davicom Semiconductor, Inc.
+       9009  Ethernet 100/10 MBit
+       9100  Ethernet 100/10 MBit
+       9102  Ethernet 100/10 MBit
+       9132  Ethernet 100/10 MBit
+1283  Integrated Technology Express, Inc.
+       673a  IT8330G
+       8330  IT8330G
+       8888  IT8888F PCI to ISA Bridge with SMB
+       8889  IT8889F PCI to ISA Bridge
+       e886  IT8330G
+1284  Sahara Networks, Inc.
+1285  Platform Technologies, Inc.
+       0100  AGOGO sound chip (aka ESS Maestro 1)
+1286  Mazet GmbH
+1287  M-Pact, Inc.
+       001e  LS220D DVD Decoder
+       001f  LS220C DVD Decoder
+1288  Timestep Corporation
+1289  AVC Technology, Inc.
+128a  Asante Technologies, Inc.
+128b  Transwitch Corporation
+128c  Retix Corporation
+128d  G2 Networks, Inc.
+       0021  ATM155 Adapter
+128e  Hoontech Corporation/Samho Multi Tech Ltd.
+       0008  ST128 WSS/SB
+       0009  ST128 SAM9407
+       000a  ST128 Game Port
+       000b  ST128 MPU Port
+       000c  ST128 Ctrl Port
+128f  Tateno Dennou, Inc.
+1290  Sord Computer Corporation
+1291  NCS Computer Italia
+1292  Tritech Microelectronics Inc
+1293  Media Reality Technology
+1294  Rhetorex, Inc.
+1295  Imagenation Corporation
+1296  Kofax Image Products
+1297  Holco Enterprise Co, Ltd/Shuttle Computer
+1298  Spellcaster Telecommunications Inc.
+1299  Knowledge Technology Lab.
+129a  VMetro, inc.
+129b  Image Access
+129c  Jaycor
+129d  Compcore Multimedia, Inc.
+129e  Victor Company of Japan, Ltd.
+129f  OEC Medical Systems, Inc.
+12a0  Allen-Bradley Company
+12a1  Simpact Associates, Inc.
+12a2  Newgen Systems Corporation
+12a3  Lucent Technologies
+12a4  NTT Electronics Technology Company
+12a5  Vision Dynamics Ltd.
+12a6  Scalable Networks, Inc.
+12a7  AMO GmbH
+12a8  News Datacom
+12a9  Xiotech Corporation
+12aa  SDL Communications, Inc.
+12ab  Yuan Yuan Enterprise Co., Ltd.
+       3000  MPG-200C PCI DVD Decoder Card
+12ac  Measurex Corporation
+12ad  Multidata GmbH
+12ae  Alteon Networks Inc.
+       0001  AceNIC Gigabit Ethernet (Fibre)
+               1410 0104  Gigabit Ethernet-SX PCI Adapter (14100401)
+       0002  AceNIC Gigabit Ethernet (Copper)
+12af  TDK USA Corp
+12b0  Jorge Scientific Corp
+12b1  GammaLink
+12b2  General Signal Networks
+12b3  Inter-Face Co Ltd
+12b4  FutureTel Inc
+12b5  Granite Systems Inc.
+12b6  Natural Microsystems
+12b7  Cognex Modular Vision Systems Div. - Acumen Inc.
+12b8  Korg
+12b9  US Robotics/3Com
+       1006  WinModem
+               12b9 005c  USR 56k Internal Voice WinModem (Model 3472)
+               12b9 005e  USR 56k Internal WinModem (Models 662975)
+               12b9 0062  USR 56k Internal Voice WinModem (Model 662978)
+               12b9 0068  USR 56k Internal Voice WinModem (Model 5690)
+               12b9 007a  USR 56k Internal Voice WinModem (Model 662974)
+               12b9 007f  USR 56k Internal WinModem (Models 5698, 5699)
+               12b9 0080  USR 56k Internal WinModem (Models 2975, 3528)
+               12b9 0081  USR 56k Internal Voice WinModem (Models 2974, 3529)
+               12b9 0091  USR 56k Internal Voice WinModem (Model 2978)
+       1007  USR 56k Internal WinModem
+               12b9 00a3  USR 56k Internal WinModem (Model 3595)
+       1008  56K FaxModem Model 5610
+               12b9 00a2  USR 56k Internal FAX Modem (Model 2977)
+               12b9 00aa  USR 56k Internal Voice Modem (Model 2976)
+               12b9 00ab  USR 56k Internal Voice Modem (Model 5609)
+               12b9 00ac  USR 56k Internal Voice Modem (Model 3298)
+               12b9 00ad  USR 56k Internal FAX Modem (Model 5610)
+12ba  PMC Sierra
+12bb  Nippon Unisoft Corporation
+12bc  Array Microsystems
+12bd  Computerm Corp.
+12be  Anchor Chips Inc.
+       3041  AN3041Q CO-MEM
+       3042  AN3042Q CO-MEM Lite
+               12be 3042  Anchor Chips Lite Evaluation Board
+12bf  Fujifilm Microdevices
+12c0  Infimed
+12c1  GMM Research Corp
+12c2  Mentec Limited
+12c3  Holtek Microelectronics Inc
+       0058  PCI NE2K Ethernet
+       5598  PCI NE2K Ethernet
+12c4  Connect Tech Inc
+12c5  Picture Elements Incorporated
+       0081  PCIVST [Grayscale Thresholding Engine]
+       0085  Video Simulator/Sender
+       0086  THR2 Multi-scale Thresholder
+12c6  Mitani Corporation
+12c7  Dialogic Corp
+12c8  G Force Co, Ltd
+12c9  Gigi Operations
+12ca  Integrated Computing Engines
+12cb  Antex Electronics Corporation
+12cc  Pluto Technologies International
+12cd  Aims Lab
+12ce  Netspeed Inc.
+12cf  Prophet Systems, Inc.
+12d0  GDE Systems, Inc.
+12d1  PSITech
+12d2  NVidia / SGS Thomson (Joint Venture)
+       0008  NV1
+       0009  DAC64
+       0018  Riva128
+               107b 8030  STB Velocity 128
+               1092 0350  Viper V330
+               1092 1092  Viper V330
+               10b4 1b1b  STB Velocity 128
+               10b4 1b20  STB Velocity 128
+               10b4 1b21  STB Velocity 128
+               10b4 1b22  STB Velocity 128 AGP, NTSC TV-Out
+               10b4 1b23  STB Velocity 128 AGP, PAL TV-Out
+               10b4 1b27  STB Velocity 128 DVD
+               10b4 222a  STB Velocity 128 AGP
+               10b4 2230  STB Velocity 128
+               10b4 2235  STB Velocity 128 AGP
+               2a15 54a3  3DVision-SAGP
+       0019  Riva128ZX
+       0020  TNT
+       0028  TNT2
+       0029  UTNT2
+       002c  VTNT2
+       00a0  ITNT2
+12d3  Vingmed Sound A/S
+12d4  DGM&S
+12d5  Equator Technologies
+12d6  Analogic Corp
+12d7  Biotronic SRL
+12d8  Pericom Semiconductor
+12d9  Aculab PLC
+12da  True Time Inc.
+12db  Annapolis Micro Systems, Inc
+12dc  Symicron Computer Communication Ltd.
+12dd  Management Graphics
+12de  Rainbow Technologies
+12df  SBS Technologies Inc
+12e0  Chase Research
+       0010  ST16C654 Quad UART
+       0020  ST16C654 Quad UART
+       0030  ST16C654 Quad UART
+12e1  Nintendo Co, Ltd
+12e2  Datum Inc. Bancomm-Timing Division
+12e3  Imation Corp - Medical Imaging Systems
+12e4  Brooktrout Technology Inc
+12e5  Apex Semiconductor Inc
+12e6  Cirel Systems
+12e7  Sunsgroup Corporation
+12e8  Crisc Corp
+12e9  GE Spacenet
+12ea  Zuken
+12eb  Aureal Semiconductor
+       0001  Vortex 1
+               104d 8036  AU8820 Vortex Digital Audio Processor
+               1092 2000  Sonic Impact A3D
+               1092 2100  Sonic Impact A3D
+               1092 2110  Sonic Impact A3D
+               1092 2200  Sonic Impact A3D
+               12eb 0001  AU8820 Vortex Digital Audio Processor
+               5053 3355  Montego
+       0002  Vortex 2
+               104d 8049  AU8830 Vortex 3D Digital Audio Processor
+               104d 807b  AU8830 Vortex 3D Digital Audio Processor
+               1092 3000  Monster Sound II
+               1092 3001  Monster Sound II
+               1092 3002  Monster Sound II
+               1092 3003  Monster Sound II
+               1092 3004  Monster Sound II
+               12eb 0001  AU8830 Vortex 3D Digital Audio Processor
+               12eb 0002  AU8830 Vortex 3D Digital Audio Processor
+               12eb 0088  AU8830 Vortex 3D Digital Audio Processor
+               144d 3510  AU8830 Vortex 3D Digital Audio Processor
+               5053 3356  Montego II
+       0003  AU8810 Vortex Digital Audio Processor
+               104d 8049  AU8810 Vortex Digital Audio Processor
+               104d 8077  AU8810 Vortex Digital Audio Processor
+               109f 1000  AU8810 Vortex Digital Audio Processor
+               12eb 0003  AU8810 Vortex Digital Audio Processor
+               1462 6780  AU8810 Vortex Digital Audio Processor
+               14a4 2073  AU8810 Vortex Digital Audio Processor
+               14a4 2091  AU8810 Vortex Digital Audio Processor
+               14a4 2104  AU8810 Vortex Digital Audio Processor
+               14a4 2106  AU8810 Vortex Digital Audio Processor
+       8803  Vortex 56k Software Modem
+               12eb 8803  Vortex 56k Software Modem
+12ec  3A International, Inc.
+12ed  Optivision Inc.
+12ee  Orange Micro
+12ef  Vienna Systems
+12f0  Pentek
+12f1  Sorenson Vision Inc
+12f2  Gammagraphx, Inc.
+12f3  Radstone Technology
+12f4  Megatel
+12f5  Forks
+12f6  Dawson France
+12f7  Cognex
+12f8  Electronic Design GmbH
+       0002  VideoMaker
+12f9  Four Fold Ltd
+12fb  Spectrum Signal Processing
+12fc  Capital Equipment Corp
+12fd  I2S
+12fe  ESD Electronic System Design GmbH
+12ff  Lexicon
+1300  Harman International Industries Inc
+1302  Computer Sciences Corp
+1303  Innovative Integration
+1304  Juniper Networks
+1305  Netphone, Inc
+1306  Duet Technologies
+1307  Computer Boards
+       0001  PCI-DAS1602/16
+       000b  PCI-DIO48H
+       000c  PCI-PDISO8
+       000d  PCI-PDISO16
+       000f  PCI-DAS1200
+       0010  PCI-DAS1602/12
+       0014  PCI-DIO24H
+       0015  PCI-DIO24H/CTR3
+       0016  PCI-DIO48H/CTR15
+       0017  PCI-DIO96H
+       0018  PCI-CTR05
+       0019  PCI-DAS1200/JR
+       001a  PCI-DAS1001
+       001b  PCI-DAS1002
+       001c  PCI-DAS1602JR/16
+       001d  PCI-DAS6402/16
+       001e  PCI-DAS6402/12
+       001f  PCI-DAS16/M1
+       0020  PCI-DDA02/12
+       0021  PCI-DDA04/12
+       0022  PCI-DDA08/12
+       0023  PCI-DDA02/16
+       0024  PCI-DDA04/16
+       0025  PCI-DDA08/16
+       0026  PCI-DAC04/12-HS
+       0027  PCI-DAC04/16-HS
+       0028  PCI-DIO24
+       0029  PCI-DAS08
+       002c  PCI-INT32
+       0033  PCI-DUAL-AC5
+       0034  PCI-DAS-TC
+       0035  PCI-DAS64/M1/16
+       0036  PCI-DAS64/M2/16
+       0037  PCI-DAS64/M3/16
+       004c  PCI-DAS1000
+1308  Jato Technologies Inc.
+       0001  NetCelerator Adapter
+               1308 0001  NetCelerator Adapter
+1309  AB Semiconductor Ltd
+130a  Mitsubishi Electric Microcomputer
+130b  Colorgraphic Communications Corp
+130c  Ambex Technologies, Inc
+130d  Accelerix Inc
+130e  Yamatake-Honeywell Co. Ltd
+130f  Advanet Inc
+1310  Gespac
+1311  Videoserver, Inc
+1312  Acuity Imaging, Inc
+1313  Yaskawa Electric Co.
+1316  Teradyne Inc
+1317  Linksys
+       0981  Fast Ethernet 10/100
+       0985  Network Everywhere Fast Ethernet 10/100 model NC100
+       1985  Fast Ethernet 10/100
+1318  Packet Engines Inc.
+       0911  PCI Ethernet Adapter
+1319  Fortemedia, Inc
+       0801  Xwave QS3000A [FM801]
+       0802  Xwave QS3000A [FM801 game port]
+       1000  FM801 PCI Audio
+       1001  FM801 PCI Joystick
+131a  Finisar Corp.
+131c  Nippon Electro-Sensory Devices Corp
+131d  Sysmic, Inc.
+131e  Xinex Networks Inc
+131f  Siig Inc
+       1000  CyberSerial (1-port) 16550
+       1001  CyberSerial (1-port) 16650
+       1002  CyberSerial (1-port) 16850
+       1010  Duet 1S(16550)+1P
+       1011  Duet 1S(16650)+1P
+       1012  Duet 1S(16850)+1P
+       1020  CyberParallel (1-port)
+       1021  CyberParallel (2-port)
+       1030  CyberSerial (2-port) 16550
+       1031  CyberSerial (2-port) 16650
+       1032  CyberSerial (2-port) 16850
+       1034  Trio 2S(16550)+1P
+       1035  Trio 2S(16650)+1P
+       1036  Trio 2S(16850)+1P
+       1050  CyberSerial (4-port) 16550
+       1051  CyberSerial (4-port) 16650
+       1052  CyberSerial (4-port) 16850
+       2000  CyberSerial (1-port) 16550
+       2001  CyberSerial (1-port) 16650
+       2002  CyberSerial (1-port) 16850
+       2010  Duet 1S(16550)+1P
+       2011  Duet 1S(16650)+1P
+       2012  Duet 1S(16850)+1P
+       2020  CyberParallel (1-port)
+       2021  CyberParallel (2-port)
+       2030  CyberSerial (2-port) 16550
+               131f 2030  PCI Serial Card
+       2031  CyberSerial (2-port) 16650
+       2032  CyberSerial (2-port) 16850
+       2040  Trio 1S(16550)+2P
+       2041  Trio 1S(16650)+2P
+       2042  Trio 1S(16850)+2P
+       2050  CyberSerial (4-port) 16550
+       2051  CyberSerial (4-port) 16650
+       2052  CyberSerial (4-port) 16850
+       2060  Trio 2S(16550)+1P
+       2061  Trio 2S(16650)+1P
+       2062  Trio 2S(16850)+1P
+1320  Crypto AG
+1321  Arcobel Graphics BV
+1322  MTT Co., Ltd
+1323  Dome Inc
+1324  Sphere Communications
+1325  Salix Technologies, Inc
+1326  Seachange international
+1327  Voss scientific
+1328  quadrant international
+1329  Productivity Enhancement
+132a  Microcom Inc.
+132b  Broadband Technologies
+132c  Micrel Inc
+132d  Integrated Silicon Solution, Inc.
+1330  MMC Networks
+1331  Radisys Corp.
+1332  Micro Memory
+1334  Redcreek Communications, Inc
+1335  Videomail, Inc
+1337  Third Planet Publishing
+1338  BT Electronics
+133a  Vtel Corp
+133b  Softcom Microsystems
+133c  Holontech Corp
+133d  SS Technologies
+133e  Virtual Computer Corp
+133f  SCM Microsystems
+1340  Atalla Corp
+1341  Kyoto Microcomputer Co
+1342  Promax Systems Inc
+1343  Phylon Communications Inc
+1344  Crucial Technology
+1345  Arescom Inc
+1347  Odetics
+1349  Sumitomo Electric Industries, Ltd.
+134a  DTC Technology Corp.
+       0001  Domex 536
+       0002  Domex DMX3194UP SCSI Adapter
+134b  ARK Research Corp.
+134c  Chori Joho System Co. Ltd
+134d  PCTel Inc
+       7890  HSP MicroModem 56
+       7891  HSP MicroModem 56
+               134d 0001  HSP MicroModem 56
+       7892  HSP MicroModem 56
+       7893  HSP MicroModem 56
+       7894  HSP MicroModem 56
+       7895  HSP MicroModem 56
+       7896  HSP MicroModem 56
+       7897  HSP MicroModem 56
+134e  CSTI
+134f  Algo System Co Ltd
+1350  Systec Co. Ltd
+1351  Sonix Inc
+1353  Dassault A.T.
+1354  Dwave System Inc
+1355  Kratos Analytical Ltd
+1356  The Logical Co
+1359  Prisa Networks
+135a  Brain Boxes
+135b  Giganet Inc
+135c  Quatech Inc
+135d  ABB Network Partner AB
+135e  Sealevel Systems Inc
+       7101  Single Port RS-232/422/485/530
+       7201  Dual Port RS-232/422/485 Interface
+       7202  Dual Port RS-232 Interface
+       7401  Four Port RS-232 Interface
+       7402  Four Port RS-422/485 Interface
+       7801  Eight Port RS-232 Interface
+       8001  8001 Digital I/O Adapter
+135f  I-Data International A-S
+1360  Meinberg Funkuhren
+1361  Soliton Systems K.K.
+1362  Fujifacom Corporation
+1363  Phoenix Technology Ltd
+1364  ATM Communications Inc
+1365  Hypercope GmbH
+1366  Teijin Seiki Co. Ltd
+1367  Hitachi Zosen Corporation
+1368  Skyware Corporation
+1369  Digigram
+136a  High Soft Tech
+136b  Kawasaki Steel Corporation
+136c  Adtek System Science Co Ltd
+136d  Gigalabs Inc
+136f  Applied Magic Inc
+1370  ATL Products
+1371  CNet Technology Inc
+1373  Silicon Vision Inc
+1374  Silicom Ltd
+1375  Argosystems Inc
+1376  LMC
+1377  Electronic Equipment Production & Distribution GmbH
+1378  Telemann Co. Ltd
+1379  Asahi Kasei Microsystems Co Ltd
+137a  Mark of the Unicorn Inc
+137b  PPT Vision
+137c  Iwatsu Electric Co Ltd
+137d  Dynachip Corporation
+137e  Patriot Scientific Corporation
+137f  Japan Satellite Systems Inc
+1380  Sanritz Automation Co Ltd
+1381  Brains Co. Ltd
+1382  Marian - Electronic & Software
+1383  Controlnet Inc
+1384  Reality Simulation Systems Inc
+1385  Netgear
+       620a  GA620
+       622a  GA622
+       630a  GA630
+       f311  FA311
+1386  Video Domain Technologies
+1387  Systran Corp
+1388  Hitachi Information Technology Co Ltd
+1389  Applicom International
+       0001  PCI1500PFB [Intelligent fieldbus adaptor]
+138a  Fusion Micromedia Corp
+138b  Tokimec Inc
+138c  Silicon Reality
+138d  Future Techno Designs pte Ltd
+138e  Basler GmbH
+138f  Patapsco Designs Inc
+1390  Concept Development Inc
+1391  Development Concepts Inc
+1392  Medialight Inc
+1393  Moxa Technologies Co Ltd
+       1040  Smartio C104H/PCI
+       1680  Smartio C168H/PCI
+       2040  Intellio CP-204J
+       2180  Intellio C218 Turbo PCI
+       3200  Intellio C320 Turbo PCI
+1394  Level One Communications
+1395  Ambicom Inc
+1396  Cipher Systems Inc
+1397  Cologne Chip Designs GmbH
+       2bd0  ISDN network controller [HFC-PCI]
+               1397 2bd0  ISDN Board
+               e4bf 1000  CI1-1-Harp
+1398  Clarion co. Ltd
+1399  Rios systems Co Ltd
+139a  Alacritech Inc
+139b  Mediasonic Multimedia Systems Ltd
+139c  Quantum 3d Inc
+139d  EPL limited
+139e  Media4
+139f  Aethra s.r.l.
+13a0  Crystal Group Inc
+13a1  Kawasaki Heavy Industries Ltd
+13a2  Ositech Communications Inc
+13a3  Hi-Fn
+13a4  Rascom Inc
+13a5  Audio Digital Imaging Inc
+13a6  Videonics Inc
+13a7  Teles AG
+13a8  Exar Corp.
+13a9  Siemens Medical Systems, Ultrasound Group
+13aa  Broadband Networks Inc
+13ab  Arcom Control Systems Ltd
+13ac  Motion Media Technology Ltd
+13ad  Nexus Inc
+13ae  ALD Technology Ltd
+13af  T.Sqware
+13b0  Maxspeed Corp
+13b1  Tamura corporation
+13b2  Techno Chips Co. Ltd
+13b3  Lanart Corporation
+13b4  Wellbean Co Inc
+13b5  ARM
+13b6  Dlog GmbH
+13b7  Logic Devices Inc
+13b8  Nokia Telecommunications oy
+13b9  Elecom Co Ltd
+13ba  Oxford Instruments
+13bb  Sanyo Technosound Co Ltd
+13bc  Bitran Corporation
+13bd  Sharp corporation
+13be  Miroku Jyoho Service Co. Ltd
+13bf  Sharewave Inc
+13c0  Microgate Corporation
+       0010  SyncLink WAN Adapter
+13c1  3ware Inc
+       1000  3ware ATA-RAID
+       1001  3ware 7000-series ATA-RAID
+13c2  Technotrend Systemtechnik GmbH
+13c3  Janz Computer AG
+13c4  Phase Metrics
+13c5  Alphi Technology Corp
+13c6  Condor Engineering Inc
+13c7  Blue Chip Technology Ltd
+13c8  Apptech Inc
+13c9  Eaton Corporation
+13ca  Iomega Corporation
+13cb  Yano Electric Co Ltd
+13cc  Metheus Corporation
+13cd  Compatible Systems Corporation
+13ce  Cocom A/S
+13cf  Studio Audio & Video Ltd
+13d0  Techsan Electronics Co Ltd
+13d1  Abocom Systems Inc
+13d2  Shark Multimedia Inc
+13d3  IMC Networks
+13d4  Graphics Microsystems Inc
+13d5  Media 100 Inc
+13d6  K.I. Technology Co Ltd
+13d7  Toshiba Engineering Corporation
+13d8  Phobos corporation
+13d9  Apex PC Solutions Inc
+13da  Intresource Systems pte Ltd
+13db  Janich & Klass Computertechnik GmbH
+13dc  Netboost Corporation
+13dd  Multimedia Bundle Inc
+13de  ABB Robotics Products AB
+13df  E-Tech Inc
+       0001  PCI56RVP Modem
+               13df 0001  PCI56RVP Modem
+13e0  GVC Corporation
+13e1  Silicom Multimedia Systems Inc
+13e2  Dynamics Research Corporation
+13e3  Nest Inc
+13e4  Calculex Inc
+13e5  Telesoft Design Ltd
+13e6  Argosy research Inc
+13e7  NAC Incorporated
+13e8  Chip Express Corporation
+13e9  Chip Express Corporation
+13ea  Dallas Semiconductor
+13eb  Hauppauge Computer Works Inc
+13ec  Zydacron Inc
+13ed  Raytheion E-Systems
+13ee  Hayes Microcomputer Products Inc
+13ef  Coppercom Inc
+13f0  Sundance Technology Inc
+       0201  Sundance Ethernet
+13f1  Oce' - Technologies B.V.
+13f2  Ford Microelectronics Inc
+13f3  Mcdata Corporation
+13f4  Troika Design Inc
+13f5  Kansai Electric Co. Ltd
+13f6  C-Media Electronics Inc
+       0100  CM8338A
+               13f6 ffff  CMI8338/C3DX PCI Audio Device
+       0101  CM8338B
+               13f6 0101  CMI8338-031 PCI Audio Device
+       0111  CM8738
+               13f6 0111  CMI8738/C3DX PCI Audio Device
+       0211  CM8738
+13f7  Wildfire Communications
+13f8  Ad Lib Multimedia Inc
+13f9  NTT Advanced Technology Corp.
+13fa  Pentland Systems Ltd
+13fb  Aydin Corp
+13fc  Computer Peripherals International
+13fd  Micro Science Inc
+13fe  Advantech Co. Ltd
+13ff  Silicon Spice Inc
+1400  Artx Inc
+       1401  9432 TX
+1401  CR-Systems A/S
+1402  Meilhaus Electronic GmbH
+1403  Ascor Inc
+1404  Fundamental Software Inc
+1405  Excalibur Systems Inc
+1406  Oce' Printing Systems GmbH
+1407  Lava Computer mfg Inc
+       0100  Lava Dual Serial
+       0101  Lava Quatro A
+       0102  Lava Quatro B
+       0200  Lava Port Plus
+       0201  Lava Quad A
+       0202  Lava Quad B
+       0500  Lava Single Serial
+       0600  Lava Port 650
+       8000  Lava Parallel
+       8002  Lava Dual Parallel port A
+       8003  Lava Dual Parallel port B
+       8800  BOCA Research IOPPAR
+1408  Aloka Co. Ltd
+1409  Timedia Technology Co Ltd
+140a  DSP Research Inc
+140b  Ramix Inc
+140c  Elmic Systems Inc
+140d  Matsushita Electric Works Ltd
+140e  Goepel Electronic GmbH
+140f  Salient Systems Corp
+1410  Midas lab Inc
+1411  Ikos Systems Inc
+1412  IC Ensemble Inc
+       1712  ICE1712 [Envy24]
+1413  Addonics
+1414  Microsoft Corporation
+1415  Oxford Semiconductor Ltd
+1416  Multiwave Innovation pte Ltd
+1417  Convergenet Technologies Inc
+1418  Kyushu electronics systems Inc
+1419  Excel Switching Corp
+141a  Apache Micro Peripherals Inc
+141b  Zoom Telephonics Inc
+141d  Digitan Systems Inc
+141e  Fanuc Ltd
+141f  Visiontech Ltd
+1420  Psion Dacom plc
+1421  Ads Technologies Inc
+1422  Ygrec Systems Co Ltd
+1423  Custom Technology Corp.
+1424  Videoserver Connections
+1425  ASIC Designers Inc
+1426  Storage Technology Corp.
+1427  Better On-Line Solutions
+1428  Edec Co Ltd
+1429  Unex Technology Corp.
+142a  Kingmax Technology Inc
+142b  Radiolan
+142c  Minton Optic Industry Co Ltd
+142d  Pix stream Inc
+142e  Vitec Multimedia
+142f  Radicom Research Inc
+1430  ITT Aerospace/Communications Division
+1431  Gilat Satellite Networks
+1432  Edimax Computer Co.
+1433  Eltec Elektronik GmbH
+1435  Real Time Devices US Inc.
+1436  CIS Technology Inc
+1437  Nissin Inc Co
+1438  Atmel-dream
+1439  Outsource Engineering & Mfg. Inc
+143a  Stargate Solutions Inc
+143b  Canon Research Center, America
+143c  Amlogic Inc
+143d  Tamarack Microelectronics Inc
+143e  Jones Futurex Inc
+143f  Lightwell Co Ltd - Zax Division
+1440  ALGOL Corp.
+1441  AGIE Ltd
+1442  Phoenix Contact GmbH & Co.
+1443  Unibrain S.A.
+1444  TRW
+1445  Logical DO Ltd
+1446  Graphin Co Ltd
+1447  AIM GmBH
+1448  Alesis Studio Electronics
+1449  TUT Systems Inc
+144a  Adlink Technology
+       7296  PCI-7296
+       7432  PCI-7432
+       7433  PCI-7433
+       7434  PCI-7434
+       7841  PCI-7841
+       8133  PCI-8133
+       8554  PCI-8554
+       9111  PCI-9111
+       9113  PCI-9113
+       9114  PCI-9114
+144b  Loronix Information Systems Inc
+144c  Catalina Research Inc
+144d  Samsung Electronics Co Ltd
+144e  OLITEC
+144f  Askey Computer Corp.
+1450  Octave Communications Ind.
+1451  SP3D Chip Design GmBH
+1453  MYCOM Inc
+1454  Altiga Networks
+1455  Logic Plus Plus Inc
+1456  Advanced Hardware Architectures
+1457  Nuera Communications Inc
+1458  Giga-byte Technology
+1459  DOOIN Electronics
+145a  Escalate Networks Inc
+145b  PRAIM SRL
+145c  Cryptek
+145d  Gallant Computer Inc
+145e  Aashima Technology B.V.
+145f  Baldor Electric Company
+       0001  NextMove PCI
+1460  DYNARC INC
+1461  Avermedia Technologies Inc
+1462  Micro-star International Co Ltd
+1463  Fast Corporation
+1464  Interactive Circuits & Systems Ltd
+1465  GN NETTEST Telecom DIV.
+1466  Designpro Inc.
+1467  DIGICOM SPA
+1468  AMBIT Microsystem Corp.
+1469  Cleveland Motion Controls
+146a  IFR
+146b  Parascan Technologies Ltd
+146c  Ruby Tech Corp.
+146d  Tachyon, INC.
+146e  Williams Electronics Games, Inc.
+146f  Multi Dimensional Consulting Inc
+1470  Bay Networks
+1471  Integrated Telecom Express Inc
+1472  DAIKIN Industries, Ltd
+1473  ZAPEX Technologies Inc
+1474  Doug Carson & Associates
+1475  PICAZO Communications
+1476  MORTARA Instrument Inc
+1477  Net Insight
+1478  DIATREND Corporation
+1479  TORAY Industries Inc
+147a  FORMOSA Industrial Computing
+147b  ABIT Computer Corp.
+147c  AWARE, Inc.
+147d  Interworks Computer Products
+147e  Matsushita Graphic Communication Systems, Inc.
+147f  NIHON UNISYS, Ltd.
+1480  SCII Telecom
+1481  BIOPAC Systems Inc
+1482  ISYTEC - Integrierte Systemtechnik GmBH
+1483  LABWAY Corporation
+1484  Logic Corporation
+1485  ERMA - Electronic GmBH
+1486  L3 Communications Telemetry & Instrumentation
+1487  MARQUETTE Medical Systems
+1488  KONTRON Electronik GmBH
+1489  KYE Systems Corporation
+148a  OPTO
+148b  INNOMEDIALOGIC Inc.
+148c  C.P. Technology Co. Ltd
+148d  DIGICOM Systems, Inc.
+       1003  HCF 56k Data/Fax Modem
+148e  OSI Plus Corporation
+148f  Plant Equipment, Inc.
+1490  Stone Microsystems PTY Ltd.
+1491  ZEAL Corporation
+1492  Time Logic Corporation
+1493  MAKER Communications
+1494  WINTOP Technology, Inc.
+1495  TOKAI Communications Industry Co. Ltd
+1496  JOYTECH Computer Co., Ltd.
+1497  SMA Regelsysteme GmBH
+1498  TEWS Datentechnik GmBH
+1499  EMTEC CO., Ltd
+149a  ANDOR Technology Ltd
+149b  SEIKO Instruments Inc
+149c  OVISLINK Corp.
+149d  NEWTEK Inc
+149e  Mapletree Networks Inc.
+149f  LECTRON Co Ltd
+14a0  SOFTING GmBH
+14a1  Systembase Co Ltd
+14a2  Millennium Engineering Inc
+14a3  Maverick Networks
+14a4  GVC/BCM Advanced Research
+14a5  XIONICS Document Technologies Inc
+14a6  INOVA Computers GmBH & Co KG
+14a7  MYTHOS Systems Inc
+14a8  FEATRON Technologies Corporation
+14a9  HIVERTEC Inc
+14aa  Advanced MOS Technology Inc
+14ab  Mentor Graphics Corp.
+14ac  Novaweb Technologies Inc
+14ad  Time Space Radio AB
+14ae  CTI, Inc
+14af  Guillemot Corporation
+14b0  BST Communication Technology Ltd
+14b1  Nextcom K.K.
+14b2  ENNOVATE Networks Inc
+14b3  XPEED Inc
+       0000  DSL NIC
+14b4  PHILIPS Business Electronics B.V.
+14b5  Creamware GmBH
+14b6  Quantum Data Corp.
+14b7  PROXIM Inc
+       0001  Symphony 4110
+14b8  Techsoft Technology Co Ltd
+14b9  AIRONET Wireless Communications
+       0001  PC4800
+14ba  INTERNIX Inc.
+14bb  SEMTECH Corporation
+14bc  Globespan Semiconductor Inc.
+14bd  CARDIO Control N.V.
+14be  L3 Communications
+14bf  SPIDER Communications Inc.
+14c0  COMPAL Electronics Inc
+14c1  MYRICOM Inc.
+14c2  DTK Computer
+14c3  MEDIATEK Corp.
+14c4  IWASAKI Information Systems Co Ltd
+14c5  Automation Products AB
+14c6  Data Race Inc
+14c7  Modular Technology Holdings Ltd
+14c8  Turbocomm Tech. Inc.
+14c9  ODIN Telesystems Inc
+14ca  PE Logic Corp.
+14cb  Billionton Systems Inc
+14cc  NAKAYO Telecommunications Inc
+14cd  Universal Scientific Ind.
+14ce  Whistle Communications
+14cf  TEK Microsystems Inc.
+14d0  Ericsson Axe R & D
+14d1  Computer Hi-Tech Co Ltd
+14d2  Titan Electronics Inc
+14d3  CIRTECH (UK) Ltd
+14d4  Panacom Technology Corp
+14d5  Nitsuko Corporation
+14d6  Accusys Inc
+14d7  Hirakawa Hewtech Corp
+14d8  HOPF Elektronik GmBH
+14d9  Alpha Processor Inc
+14da  National Aerospace Laboratories
+14db  AFAVLAB Technology Inc
+       2120  TK9902
+14dc  Amplicon Liveline Ltd
+       0000  PCI230
+       0001  PCI242
+       0002  PCI244
+       0003  PCI247
+       0004  PCI248
+       0005  PCI249
+       0006  PCI260
+       0007  PCI224
+       0008  PCI234
+       0009  PCI236
+14dd  Boulder Design Labs Inc
+14de  Applied Integration Corporation
+14df  ASIC Communications Corp
+14e1  INVERTEX
+14e2  INFOLIBRIA
+14e3  AMTELCO
+14e4  BROADCOM Corporation
+       1644  NetXtreme BCM5700 Gigabit Ethernet
+               10b7 1000  3C996-T 1000BaseTX
+               10b7 1001  3C996B-T 1000BaseTX
+               10b7 1002  3C996C-T 1000BaseTX
+               10b7 1003  3C997-T 1000BaseTX
+               10b7 1004  3C996-SX 1000BaseSX
+               10b7 1005  3C997-SX 1000BaseSX
+               14e4 0002  NetXtreme 1000BaseSX
+               14e4 0003  NetXtreme 1000BaseSX
+               14e4 0004  NetXtreme 1000BaseTX
+               14e4 1644  NetXtreme BCM5700 1000BaseTX
+       1645  NetXtreme BCM5701 Gigabit Ethernet
+               0e11 007c  NC7770 1000BaseTX
+               0e11 007d  NC6770 1000BaseSX
+               0e11 0085  NC7780 1000BaseTX
+               10b7 1004  3C996-SX 1000BaseSX
+               10b7 1006  3C996B-T 1000BaseTX
+               10b7 1007  3C1000-T 1000BaseTX
+               10b7 1008  3C940-BR01 1000BaseTX
+               14e4 0001  NetXtreme BCM5701 1000BaseTX
+               14e4 0005  NetXtreme BCM5701 1000BaseTX
+               14e4 0006  NetXtreme BCM5701 1000BaseTX
+               14e4 0007  NetXtreme BCM5701 1000BaseSX
+               14e4 0008  NetXtreme BCM5701 1000BaseTX
+               14e4 8008  NetXtreme BCM5701 1000BaseTX
+       1647  NetXtreme BCM5703 Gigabit Ethernet
+       5820  BCM5820 Crypto Accelerator
+14e5  Pixelfusion Ltd
+14e6  SHINING Technology Inc
+14e7  3CX
+14e8  RAYCER Inc
+14e9  GARNETS System CO Ltd
+14ea  PLANEX COMMUNICATIONS Inc
+14eb  SEIKO EPSON Corp
+14ec  ACQIRIS
+14ed  DATAKINETICS Ltd
+14ee  MASPRO KENKOH Corp
+14ef  CARRY Computer ENG. CO Ltd
+14f0  CANON RESEACH CENTRE FRANCE
+14f1  Conexant
+       1033  HCF 56k Data/Fax Modem
+               122d 4027  Dell Zeus - MDP3880-W(B) Data Fax Modem
+               122d 4030  Dell Mercury - MDP3880-U(B) Data Fax Modem
+               122d 4034  Dell Thor - MDP3880-W(U) Data Fax Modem
+               13e0 020d  Dell Copper
+               13e0 020e  Dell Silver
+               13e0 0290  Compaq Goldwing
+               13e0 02c0  Compaq Scooter
+               144f 1500  IBM P85-DF (1)
+               144f 1501  IBM P85-DF (2)
+               144f 150a  IBM P85-DF (3)
+               144f 150b  IBM P85-DF Low Profile (1)
+               144f 1510  IBM P85-DF Low Profile (2)
+       1034  HCF 56k Data/Fax/Voice Modem
+       1035  HCF 56k Data/Fax/Voice/Spkp (w/Handset) Modem
+               10cf 1098  Fujitsu P85-DFSV
+       1036  HCF 56k Data/Fax/Voice/Spkp Modem
+               122d 4029  MDP3880SP-W
+               122d 4031  MDP3880SP-U
+               13e0 0209  Dell Titanium
+               13e0 020a  Dell Graphite
+               13e0 0260  Gateway Red Owl
+               13e0 0270  Gateway White Horse
+       1052  HCF 56k Data/Fax Modem (Worldwide)
+       1053  HCF 56k Data/Fax Modem (Worldwide)
+       1054  HCF 56k Data/Fax/Voice Modem (Worldwide)
+       1055  HCF 56k Data/Fax/Voice/Spkp (w/Handset) Modem (Worldwide)
+       1056  HCF 56k Data/Fax/Voice/Spkp Modem (Worldwide)
+       1057  HCF 56k Data/Fax/Voice/Spkp Modem (Worldwide)
+       1059  HCF 56k Data/Fax/Voice Modem (Worldwide)
+       1063  HCF 56k Data/Fax Modem
+       1064  HCF 56k Data/Fax/Voice Modem
+       1065  HCF 56k Data/Fax/Voice/Spkp (w/Handset) Modem
+       1066  HCF 56k Data/Fax/Voice/Spkp Modem
+               122d 4033  Dell Athena - MDP3900V-U
+       1433  HCF 56k Data/Fax Modem
+       1434  HCF 56k Data/Fax/Voice Modem
+       1435  HCF 56k Data/Fax/Voice/Spkp (w/Handset) Modem
+       1436  HCF 56k Data/Fax Modem
+       1453  HCF 56k Data/Fax Modem
+               144f 1502  IBM P95-DF (1)
+               144f 1503  IBM P95-DF (2)
+       1454  HCF 56k Data/Fax/Voice Modem
+       1455  HCF 56k Data/Fax/Voice/Spkp (w/Handset) Modem
+       1456  HCF 56k Data/Fax/Voice/Spkp Modem
+               122d 4035  Dell Europa - MDP3900V-W
+               122d 4302  Dell MP3930V-W(C) MiniPCI
+       1803  HCF 56k Modem
+               0e11 0023  623-LAN Grizzly
+               0e11 0043  623-LAN Yogi
+       1815  HCF 56k Modem
+               0e11 0022  Grizzly
+               0e11 0042  Yogi
+       2003  HSF 56k Data/Fax Modem
+       2004  HSF 56k Data/Fax/Voice Modem
+       2005  HSF 56k Data/Fax/Voice/Spkp (w/Handset) Modem
+       2006  HSF 56k Data/Fax/Voice/Spkp Modem
+       2013  HSF 56k Data/Fax Modem
+               0e11 b195  Bear
+               0e11 b196  Seminole 1
+               0e11 b1be  Seminole 2
+               155d 6793  HP
+               155d 8850  E Machines
+       2014  HSF 56k Data/Fax/Voice Modem
+       2015  HSF 56k Data/Fax/Voice/Spkp (w/Handset) Modem
+       2016  HSF 56k Data/Fax/Voice/Spkp Modem
+       2043  HSF 56k Data/Fax Modem (WorldW SmartDAA)
+       2044  HSF 56k Data/Fax/Voice Modem (WorldW SmartDAA)
+       2045  HSF 56k Data/Fax/Voice/Spkp (w/Handset) Modem (WorldW SmartDAA)
+       2046  HSF 56k Data/Fax/Voice/Spkp Modem (WorldW SmartDAA)
+       2063  HSF 56k Data/Fax Modem (SmartDAA)
+       2064  HSF 56k Data/Fax/Voice Modem (SmartDAA)
+       2065  HSF 56k Data/Fax/Voice/Spkp (w/Handset) Modem (SmartDAA)
+       2066  HSF 56k Data/Fax/Voice/Spkp Modem (SmartDAA)
+       2093  HSF 56k Modem
+               155d 2f07  Legend
+       2143  HSF 56k Data/Fax/Cell Modem (Mob WorldW SmartDAA)
+       2144  HSF 56k Data/Fax/Voice/Cell Modem (Mob WorldW SmartDAA)
+       2145  HSF 56k Data/Fax/Voice/Spkp (w/HS)/Cell Modem (Mob WorldW SmartDAA)
+       2146  HSF 56k Data/Fax/Voice/Spkp/Cell Modem (Mob WorldW SmartDAA)
+       2163  HSF 56k Data/Fax/Cell Modem (Mob SmartDAA)
+       2164  HSF 56k Data/Fax/Voice/Cell Modem (Mob SmartDAA)
+       2165  HSF 56k Data/Fax/Voice/Spkp (w/HS)/Cell Modem (Mob SmartDAA)
+       2166  HSF 56k Data/Fax/Voice/Spkp/Cell Modem (Mob SmartDAA)
+       2343  HSF 56k Data/Fax CardBus Modem (Mob WorldW SmartDAA)
+       2344  HSF 56k Data/Fax/Voice CardBus Modem (Mob WorldW SmartDAA)
+       2345  HSF 56k Data/Fax/Voice/Spkp (w/HS) CardBus Modem (Mob WorldW SmartDAA)
+       2346  HSF 56k Data/Fax/Voice/Spkp CardBus Modem (Mob WorldW SmartDAA)
+       2363  HSF 56k Data/Fax CardBus Modem (Mob SmartDAA)
+       2364  HSF 56k Data/Fax/Voice CardBus Modem (Mob SmartDAA)
+       2365  HSF 56k Data/Fax/Voice/Spkp (w/HS) CardBus Modem (Mob SmartDAA)
+       2366  HSF 56k Data/Fax/Voice/Spkp CardBus Modem (Mob SmartDAA)
+       2443  HSF 56k Data/Fax Modem (Mob WorldW SmartDAA)
+       2444  HSF 56k Data/Fax/Voice Modem (Mob WorldW SmartDAA)
+       2445  HSF 56k Data/Fax/Voice/Spkp (w/HS) Modem (Mob WorldW SmartDAA)
+       2446  HSF 56k Data/Fax/Voice/Spkp Modem (Mob WorldW SmartDAA)
+       2463  HSF 56k Data/Fax Modem (Mob SmartDAA)
+       2464  HSF 56k Data/Fax/Voice Modem (Mob SmartDAA)
+       2465  HSF 56k Data/Fax/Voice/Spkp (w/HS) Modem (Mob SmartDAA)
+       2466  HSF 56k Data/Fax/Voice/Spkp Modem (Mob SmartDAA)
+       2f00  HSF 56k HSFi Modem
+               13e0 8d84  IBM HSFi V.90
+               13e0 8d85  Compaq Stinger
+14f2  MOBILITY Electronics
+14f3  BROADLOGIC
+14f4  TOKYO Electronic Industry CO Ltd
+14f5  SOPAC Ltd
+14f6  COYOTE Technologies LLC
+14f7  WOLF Technology Inc
+14f8  AUDIOCODES Inc
+14f9  AG COMMUNICATIONS
+14fa  WANDEL & GOCHERMANN
+14fb  TRANSAS MARINE (UK) Ltd
+14fc  QUADRICS Supercomputers World
+14fd  JAPAN Computer Industry Inc
+14fe  ARCHTEK TELECOM Corp
+14ff  TWINHEAD INTERNATIONAL Corp
+1500  DELTA Electronics, Inc
+1501  BANKSOFT CANADA Ltd
+1502  MITSUBISHI ELECTRIC LOGISTICS SUPPORT Co Ltd
+1503  KAWASAKI LSI USA Inc
+1504  KAISER Electronics
+1505  ITA INGENIEURBURO FUR TESTAUFGABEN GmbH
+1506  CHAMELEON Systems Inc
+# Should be HTEC Ltd, but there are no known HTEC chips and 1507 is already used by mistake by Motorola (see vendor ID 1057).
+1507  Motorola ?? / HTEC
+       0001  MPC105 [Eagle]
+       0002  MPC106 [Grackle]
+       0003  MPC8240 [Kahlua]
+       0100  MC145575 [HFC-PCI]
+       0431  KTI829c 100VG
+       4801  Raven
+       4802  Falcon
+       4803  Hawk
+       4806  CPX8216
+1508  HONDA CONNECTORS/MHOTRONICS Inc
+1509  FIRST INTERNATIONAL Computer Inc
+150a  FORVUS RESEARCH Inc
+150b  YAMASHITA Systems Corp
+150c  KYOPAL CO Ltd
+150d  WARPSPPED Inc
+150e  C-PORT Corp
+150f  INTEC GmbH
+1510  BEHAVIOR TECH Computer Corp
+1511  CENTILLIUM Technology Corp
+1512  ROSUN Technologies Inc
+1513  Raychem
+1514  TFL LAN Inc
+1515  Advent design
+1516  MYSON Technology Inc
+1517  ECHOTEK Corp
+1518  PEP MODULAR Computers GmbH
+1519  TELEFON AKTIEBOLAGET LM Ericsson
+151a  Globetek
+       1002  PCI-1002
+       1004  PCI-1004
+       1008  PCI-1008
+151b  COMBOX Ltd
+151c  DIGITAL AUDIO LABS Inc
+151d  Fujitsu Computer Products Of America
+151e  MATRIX Corp
+151f  TOPIC SEMICONDUCTOR Corp
+1520  CHAPLET System Inc
+1521  BELL Corp
+1522  MainPine Ltd
+1523  MUSIC Semiconductors
+1524  ENE Technology Inc
+1525  IMPACT Technologies
+1526  ISS, Inc
+1527  SOLECTRON
+1528  ACKSYS
+1529  AMERICAN MICROSystems Inc
+152a  QUICKTURN DESIGN Systems
+152b  FLYTECH Technology CO Ltd
+152c  MACRAIGOR Systems LLC
+152d  QUANTA Computer Inc
+152e  MELEC Inc
+152f  PHILIPS - CRYPTO
+1530  ACQIS Technology Inc
+1531  CHRYON Corp
+1532  ECHELON Corp
+1533  BALTIMORE
+1534  ROAD Corp
+1535  EVERGREEN Technologies Inc
+1537  DATALEX COMMUNCATIONS
+1538  ARALION Inc
+1539  ATELIER INFORMATIQUES et ELECTRONIQUE ETUDES S.A.
+153a  ONO SOKKI
+153b  TERRATEC Electronic GmbH
+153c  ANTAL Electronic
+153d  FILANET Corp
+153e  TECHWELL Inc
+153f  MIPS DENMARK
+1540  PROVIDEO MULTIMEDIA Co Ltd
+1541  MACHONE Communications
+1542  VIVID Technology Inc
+1543  SILICON Laboratories
+1544  DCM DATA Systems
+1545  VISIONTEK
+1546  IOI Technology Corp
+1547  MITUTOYO Corp
+1548  JET PROPULSION Laboratory
+1549  INTERCONNECT Systems Solutions
+154a  MAX Technologies Inc
+154b  COMPUTEX Co Ltd
+154c  VISUAL Technology Inc
+154d  PAN INTERNATIONAL Industrial Corp
+154e  SERVOTEST Ltd
+154f  STRATABEAM Technology
+1550  OPEN NETWORK Co Ltd
+1551  SMART Electronic DEVELOPMENT GmBH
+1552  RACAL AIRTECH Ltd
+1553  CHICONY Electronics Co Ltd
+1554  PROLINK Microsystems Corp
+1555  GESYTEC GmBH
+1556  PLD APPLICATIONS
+1557  MEDIASTAR Co Ltd
+1558  CLEVO/KAPOK Computer
+1559  SI LOGIC Ltd
+155a  INNOMEDIA Inc
+155b  PROTAC INTERNATIONAL Corp
+155c  Cemax-Icon Inc
+155d  Mac System Co Ltd
+155e  LP Elektronik GmbH
+155f  Perle Systems Ltd
+1560  Terayon Communications Systems
+1561  Viewgraphics Inc
+1562  Symbol Technologies
+1563  A-Trend Technology Co Ltd
+1564  Yamakatsu Electronics Industry Co Ltd
+1565  Biostar Microtech Int'l Corp
+1566  Ardent Technologies Inc
+1567  Jungsoft
+1568  DDK Electronics Inc
+1569  Palit Microsystems Inc.
+156a  Avtec Systems
+156b  2wire Inc
+156c  Vidac Electronics GmbH
+156d  Alpha-Top Corp
+156e  Alfa Inc
+156f  M-Systems Flash Disk Pioneers Ltd
+1570  Lecroy Corp
+1571  Contemporary Controls
+       a001  CCSI PCI20-485 ARCnet
+       a002  CCSI PCI20-485D ARCnet
+       a003  CCSI PCI20-485X ARCnet
+       a004  CCSI PCI20-CXB ARCnet
+       a005  CCSI PCI20-CXS ARCnet
+       a006  CCSI PCI20-FOG-SMA ARCnet
+       a007  CCSI PCI20-FOG-ST ARCnet
+       a008  CCSI PCI20-TB5 ARCnet
+       a009  CCSI PCI20-5-485 5Mbit ARCnet
+       a00a  CCSI PCI20-5-485D 5Mbit ARCnet
+       a00b  CCSI PCI20-5-485X 5Mbit ARCnet
+       a00c  CCSI PCI20-5-FOG-ST 5Mbit ARCnet
+       a00d  CCSI PCI20-5-FOG-SMA 5Mbit ARCnet
+       a201  CCSI PCI22-485 10Mbit ARCnet
+       a202  CCSI PCI22-485D 10Mbit ARCnet
+       a203  CCSI PCI22-485X 10Mbit ARCnet
+       a204  CCSI PCI22-CHB 10Mbit ARCnet
+       a205  CCSI PCI22-FOG_ST 10Mbit ARCnet
+       a206  CCSI PCI22-THB 10Mbit ARCnet
+1572  Otis Elevator Company
+1573  Lattice - Vantis
+1574  Fairchild Semiconductor
+1575  Voltaire Advanced Data Security Ltd
+1576  Viewcast COM
+1578  HITT
+1579  Dual Technology Corp
+157a  Japan Elecronics Ind Inc
+157b  Star Multimedia Corp
+157c  Eurosoft (UK)
+       8001  Fix2000 PCI Y2K Compliance Card
+157d  Gemflex Networks
+157e  Transition Networks
+157f  PX Instruments Technology Ltd
+1580  Primex Aerospace Co
+1581  SEH Computertechnik GmbH
+1582  Cytec Corp
+1583  Inet Technologies Inc
+1584  Uniwill Computer Corp
+1585  Logitron
+1586  Lancast Inc
+1587  Konica Corp
+1588  Solidum Systems Corp
+1589  Atlantek Microsystems Pty Ltd
+158a  Digalog Systems Inc
+158b  Allied Data Technologies
+158c  Hitachi Semiconductor & Devices Sales Co Ltd
+158d  Point Multimedia Systems
+158e  Lara Technology Inc
+158f  Ditect Coop
+1590  3pardata Inc
+1591  ARN
+1592  Syba Tech Ltd
+       0781  Multi-IO Card
+       0782  Parallel Port Card 2xEPP
+       0783  Multi-IO Card
+       0785  Multi-IO Card
+       0786  Multi-IO Card
+       0787  Multi-IO Card
+       0788  Multi-IO Card
+       078a  Multi-IO Card
+1593  Bops Inc
+1594  Netgame Ltd
+1595  Diva Systems Corp
+1596  Folsom Research Inc
+1597  Memec Design Services
+1598  Granite Microsystems
+1599  Delta Electronics Inc
+159a  General Instrument
+159b  Faraday Technology Corp
+159c  Stratus Computer Systems
+159d  Ningbo Harrison Electronics Co Ltd
+159e  A-Max Technology Co Ltd
+159f  Galea Network Security
+15a0  Compumaster SRL
+15a1  Geocast Network Systems
+15a2  Catalyst Enterprises Inc
+15a3  Italtel
+15a4  X-Net OY
+15a5  Toyota Macs Inc
+15a6  Sunlight Ultrasound Technologies Ltd
+15a7  SSE Telecom Inc
+15a8  Shanghai Communications Technologies Center
+15aa  Moreton Bay
+15ab  Bluesteel Networks Inc
+15ac  North Atlantic Instruments
+15ad  VMWare Inc
+       0710  Virtual SVGA
+15ae  Amersham Pharmacia Biotech
+15b0  Zoltrix International Ltd
+15b1  Source Technology Inc
+15b2  Mosaid Technologies Inc
+15b3  Mellanox Technology
+15b4  CCI/TRIAD
+15b5  Cimetrics Inc
+15b6  Texas Memory Systems Inc
+15b7  Sandisk Corp
+15b8  ADDI-DATA GmbH
+15b9  Maestro Digital Communications
+15ba  Impacct Technology Corp
+15bb  Portwell Inc
+15bc  Agilent Technologies
+15bd  DFI Inc
+15be  Sola Electronics
+15bf  High Tech Computer Corp (HTC)
+15c0  BVM Ltd
+15c1  Quantel
+15c2  Newer Technology Inc
+15c3  Taiwan Mycomp Co Ltd
+15c4  EVSX Inc
+15c5  Procomp Informatics Ltd
+15c6  Technical University of Budapest
+15c7  Tateyama Dystem Laboratory Co Ltd
+15c8  Penta Media Co Ltd
+15c9  Serome Technology Inc
+15ca  Bitboys OY
+15cb  AG Electronics Ltd
+15cc  Hotrail Inc
+15cd  Dreamtech Co Ltd
+15ce  Genrad Inc
+15cf  Hilscher GmbH
+15d1  Infineon Technologies AG
+15d2  FIC (First International Computer Inc)
+15d3  NDS Technologies Israel Ltd
+15d4  Iwill Corp
+15d5  Tatung Co
+15d6  Entridia Corp
+15d7  Rockwell-Collins Inc
+15d8  Cybernetics Technology Co Ltd
+15d9  Super Micro Computer Inc
+15da  Cyberfirm Inc
+15db  Applied Computing Systems Inc
+15dc  Litronic Inc
+       0001  Argus 300 PCI Cryptography Module
+15dd  Sigmatel Inc
+15de  Malleable Technologies Inc
+15df  Infinilink Corp
+15e0  Cacheflow Inc
+15e1  Voice Technologies Group Inc
+15e2  Quicknet Technologies Inc
+15e3  Networth Technologies Inc
+15e4  VSN Systemen BV
+15e5  Valley technologies Inc
+15e6  Agere Inc
+15e7  Get Engineering Corp
+15e8  National Datacomm Corp
+15e9  Pacific Digital Corp
+15ea  Tokyo Denshi Sekei K.K.
+15eb  Drsearch GmbH
+15ec  Beckhoff GmbH
+15ed  Macrolink Inc
+15ee  In Win Development Inc
+15ef  Intelligent Paradigm Inc
+15f0  B-Tree Systems Inc
+15f1  Times N Systems Inc
+15f2  Diagnostic Instruments Inc
+15f3  Digitmedia Corp
+15f4  Valuesoft
+15f5  Power Micro Research
+15f6  Extreme Packet Device Inc
+15f7  Banctec
+15f8  Koga Electronics Co
+15f9  Zenith Electronics Corp
+15fa  J.P. Axzam Corp
+15fb  Zilog Inc
+15fc  Techsan Electronics Co Ltd
+15fd  N-CUBED.NET
+15fe  Kinpo Electronics Inc
+15ff  Fastpoint Technologies Inc
+1600  Northrop Grumman - Canada Ltd
+1601  Tenta Technology
+1602  Prosys-tec Inc
+1603  Nokia Wireless Communications
+1604  Central System Research Co Ltd
+1605  Pairgain Technologies
+1606  Europop AG
+1607  Lava Semiconductor Manufacturing Inc
+1608  Automated Wagering International
+1609  Scimetric Instruments Inc
+1619  FarSite Communications Ltd
+       0400  FarSync T2P (2 port X.21/V.35/V.24)
+       0440  FarSync T4P (4 port X.21/V.35/V.24)
+1668  Action Tec Electronics Inc
+1813  Ambient Technologies Inc
+1a08  Sierra semiconductor
+       0000  SC15064
+1b13  Jaton Corp
+1c1c  Symphony
+       0001  82C101
+1d44  DPT
+       a400  PM2x24/PM3224
+1de1  Tekram Technology Co.,Ltd.
+       0391  TRM-S1040
+       2020  DC-390
+       690c  690c
+       dc29  DC290
+2001  Temporal Research Ltd
+21c3  21st Century Computer Corp.
+2348  Racore
+       2010  8142 100VG/AnyLAN
+2646  Kingston Technologies
+270b  Xantel Corporation
+270f  Chaintech Computer Co. Ltd
+2711  AVID Technology Inc.
+2a15  3D Vision(???)
+3000  Hansol Electronics Inc.
+3142  Post Impression Systems.
+3388  Hint Corp
+       8011  VXPro II Chipset
+               3388 8011  VXPro II Chipset CPU to PCI Bridge
+       8012  VXPro II Chipset
+               3388 8012  VXPro II Chipset PCI to ISA Bridge
+       8013  VXPro II Chipset
+               3388 8013  VXPro II Chipset EIDE Controller
+3411  Quantum Designs (H.K.) Inc
+3513  ARCOM Control Systems Ltd
+38ef  4Links
+3d3d  3DLabs
+       0001  GLINT 300SX
+       0002  GLINT 500TX
+       0003  GLINT Delta
+       0004  Permedia
+       0005  Permedia
+       0006  GLINT MX
+       0007  3D Extreme
+       0008  GLINT Gamma G1
+       0009  Permedia II 2D+3D
+               3d3d 0100  AccelStar II 3D Accelerator
+               3d3d 0111  Permedia 3:16
+               3d3d 0114  Santa Ana
+               3d3d 0116  Oxygen GVX1
+               3d3d 0119  Scirocco
+               3d3d 0120  Santa Ana PCL
+               3d3d 0125  Oxygen VX1
+               3d3d 0127  Permedia3 Create!
+       000a  GLINT R3
+               3d3d 0121  Oxygen VX1
+       0100  Permedia II 2D+3D
+       1004  Permedia
+       3d04  Permedia
+       ffff  Glint VGA
+4005  Avance Logic Inc.
+       0300  ALS300 PCI Audio Device
+       0308  ALS300+ PCI Audio Device
+       0309  PCI Input Controller
+       1064  ALG-2064
+       2064  ALG-2064i
+       2128  ALG-2364A GUI Accelerator
+       2301  ALG-2301
+       2302  ALG-2302
+       2303  AVG-2302 GUI Accelerator
+       2364  ALG-2364A
+       2464  ALG-2464
+       2501  ALG-2564A/25128A
+       4000  ALS4000 Audio Chipset
+               4005 4000  ALS4000 Audio Chipset
+4033  Addtron Technology Co, Inc.
+       1360  RTL8139 Ethernet
+4143  Digital Equipment Corp
+416c  Aladdin Knowledge Systems
+       0100  AladdinCARD
+       0200  CPC
+4444  Internext Compression Inc
+4468  Bridgeport machines
+4594  Cogetec Informatique Inc
+45fb  Baldor Electric Company
+4680  Umax Computer Corp
+4843  Hercules Computer Technology Inc
+4916  RedCreek Communications Inc
+       1960  RedCreek PCI adapter
+4943  Growth Networks
+4978  Axil Computer Inc
+4a14  NetVin
+       5000  NV5000SC
+               4a14 5000  RT8029-Based Ethernet Adapter
+4b10  Buslogic Inc.
+4c48  LUNG HWA Electronics
+4ca1  Seanix Technology Inc
+4d51  MediaQ Inc.
+       0200  MQ-200
+4d54  Microtechnica Co Ltd
+4ddc  ILC Data Device Corp
+5053  Voyetra Technologies
+       2010  Daytona Audio Adapter
+5136  S S Technologies
+5143  Qualcomm Inc
+5145  Ensoniq (Old)
+       3031  Concert AudioPCI
+5301  Alliance Semiconductor Corp.
+       0001  ProMotion aT3D
+5333  S3 Inc.
+       0551  Plato/PX (system)
+       5631  86c325 [ViRGE]
+       8800  86c866 [Vision 866]
+       8801  86c964 [Vision 964]
+       8810  86c764_0 [Trio 32 vers 0]
+       8811  86c764/765 [Trio32/64/64V+]
+       8812  86cM65 [Aurora64V+]
+       8813  86c764_3 [Trio 32/64 vers 3]
+       8814  86c767 [Trio 64UV+]
+       8815  86cM65 [Aurora 128]
+       883d  86c988 [ViRGE/VX]
+       8870  FireGL
+       8880  86c868 [Vision 868 VRAM] vers 0
+       8881  86c868 [Vision 868 VRAM] vers 1
+       8882  86c868 [Vision 868 VRAM] vers 2
+       8883  86c868 [Vision 868 VRAM] vers 3
+       88b0  86c928 [Vision 928 VRAM] vers 0
+       88b1  86c928 [Vision 928 VRAM] vers 1
+       88b2  86c928 [Vision 928 VRAM] vers 2
+       88b3  86c928 [Vision 928 VRAM] vers 3
+       88c0  86c864 [Vision 864 DRAM] vers 0
+       88c1  86c864 [Vision 864 DRAM] vers 1
+       88c2  86c864 [Vision 864-P DRAM] vers 2
+       88c3  86c864 [Vision 864-P DRAM] vers 3
+       88d0  86c964 [Vision 964 VRAM] vers 0
+       88d1  86c964 [Vision 964 VRAM] vers 1
+       88d2  86c964 [Vision 964-P VRAM] vers 2
+       88d3  86c964 [Vision 964-P VRAM] vers 3
+       88f0  86c968 [Vision 968 VRAM] rev 0
+       88f1  86c968 [Vision 968 VRAM] rev 1
+       88f2  86c968 [Vision 968 VRAM] rev 2
+       88f3  86c968 [Vision 968 VRAM] rev 3
+       8900  86c755 [Trio 64V2/DX]
+               5333 8900  86C775 Trio64V2/DX
+       8901  Trio 64V2/DX or /GX
+               5333 8901  86C775 Trio64V2/DX, 86C785 Trio64V2/GX
+       8902  Plato/PX
+       8903  Trio 3D business multimedia
+       8904  Trio 64 3D
+               1014 00db  Integrated Trio3D
+               5333 8904  86C365 Trio3D AGP
+       8905  Trio 64V+ family
+       8906  Trio 64V+ family
+       8907  Trio 64V+ family
+       8908  Trio 64V+ family
+       8909  Trio 64V+ family
+       890a  Trio 64V+ family
+       890b  Trio 64V+ family
+       890c  Trio 64V+ family
+       890d  Trio 64V+ family
+       890e  Trio 64V+ family
+       890f  Trio 64V+ family
+       8a01  ViRGE/DX or /GX
+               0e11 b032  ViRGE/GX
+               10b4 1617  Nitro 3D
+               10b4 1717  Nitro 3D
+               5333 8a01  ViRGE/DX
+       8a10  ViRGE/GX2
+               1092 8a10  Stealth 3D 4000
+       8a13  86c368 [Trio 3D/2X]
+               5333 8a13  Trio3D/2X
+       8a20  86c794 [Savage 3D]
+               5333 8a20  86C391 Savage3D
+       8a21  86c795 [Savage 3D/MV]
+               5333 8a21  86C390 Savage3D/MV
+       8a22  Savage 4
+               105d 0018  SR9 8Mb SDRAM
+               105d 002a  SR9 Pro 16Mb SDRAM
+               105d 003a  SR9 Pro 32Mb SDRAM
+               105d 092f  SR9 Pro+ 16Mb SGRAM
+               1092 4207  Stealth III S540
+               1092 4800  Stealth III S540
+               1092 4807  SpeedStar A90
+               1092 4808  Stealth III S540
+               1092 4809  Stealth III S540
+               1092 480e  Stealth III S540
+               1092 4904  Stealth III S520
+               1092 4905  SpeedStar A200
+               1092 4a09  Stealth III S540
+               1092 4a0b  Stealth III S540 Xtreme
+               1092 4a0f  Stealth III S540
+               1092 4e01  Stealth III S540
+               1102 101d  3d Blaster Savage 4
+               1102 101e  3d Blaster Savage 4
+               5333 8100  86C394-397 Savage4 SDRAM 100
+               5333 8110  86C394-397 Savage4 SDRAM 110
+               5333 8125  86C394-397 Savage4 SDRAM 125
+               5333 8143  86C394-397 Savage4 SDRAM 143
+               5333 8a22  86C394-397 Savage4
+               5333 8a2e  86C394-397 Savage4 32bit
+               5333 9125  86C394-397 Savage4 SGRAM 125
+               5333 9143  86C394-397 Savage4 SGRAM 143
+       8a23  Savage 4
+       8a25  ProSavage PM133
+       8a26  ProSavage KM133
+       8c00  ViRGE/M3
+       8c01  ViRGE/MX
+       8c02  ViRGE/MX+
+       8c03  ViRGE/MX+MV
+       8c10  86C270-294 Savage/MX-MV
+       8c11  82C270-294 Savage/MX
+       8c12  86C270-294 Savage/IX-MV
+       8c13  86C270-294 Savage/IX
+       9102  86C410 Savage 2000
+               1092 5932  Viper II Z200
+               1092 5934  Viper II Z200
+               1092 5952  Viper II Z200
+               1092 5954  Viper II Z200
+               1092 5a35  Viper II Z200
+               1092 5a37  Viper II Z200
+               1092 5a55  Viper II Z200
+               1092 5a57  Viper II Z200
+       ca00  SonicVibes
+544c  Teralogic Inc
+5455  Technische University Berlin
+       4458  S5933
+5519  Cnet Technologies, Inc.
+5544  Dunord Technologies
+       0001  I-30xx Scanner Interface
+5555  Genroco, Inc
+       0003  TURBOstor HFP-832 [HiPPI NIC]
+5700  Netpower
+6356  UltraStor
+6374  c't Magazin für Computertechnik
+       6773  GPPCI
+6409  Logitec Corp.
+6666  Decision Computer International Co.
+       0001  PCCOM4
+       0002  PCCOM8
+7604  O.N. Electronic Co Ltd.
+7bde  MIDAC Corporation
+7fed  PowerTV
+8008  Quancom Electronic GmbH
+       0010  WDOG1 [PCI-Watchdog 1]
+       0011  PWDOG2 [PCI-Watchdog 2]
+8086  Intel Corp.
+       0007  82379AB
+       0039  21145
+       0122  82437FX
+       0482  82375EB
+       0483  82424ZX [Saturn]
+       0484  82378IB [SIO ISA Bridge]
+       0486  82430ZX [Aries]
+       04a3  82434LX [Mercury/Neptune]
+       04d0  82437FX [Triton FX]
+       0600  RAID Controller
+       0960  80960RP [i960 RP Microprocessor/Bridge]
+       0964  80960RP [i960 RP Microprocessor/Bridge]
+       1000  82542 Gigabit Ethernet Controller
+               0e11 b0df  NC1632 Gigabit Ethernet Adapter
+               0e11 b0e0  NC1633 Gigabit Ethernet Adapter
+               0e11 b123  NC1634 Gigabit Ethernet Adapter
+               1014 0119  Netfinity Gigabit Ethernet SX Adapter
+       1001  82543GC Gigabit Ethernet Controller
+       1004  82543GC Gigabit Ethernet Controller
+       1008  82544EI Gigabit Ethernet Controller
+       1009  82544EI Gigabit Ethernet Controller
+       100c  82544GC Gigabit Ethernet Controller
+       100d  82544GC Gigabit Ethernet Controller
+       1029  82559 Ethernet Controller
+       1030  82559 InBusiness 10/100
+       1031  82801CAM (ICH3) Chipset Ethernet Controller
+       1032  82801CAM (ICH3) Chipset Ethernet Controller
+       1033  82801CAM (ICH3) Chipset Ethernet Controller
+       1034  82801CAM (ICH3) Chipset Ethernet Controller
+       1035  82801CAM (ICH3) Chipset Ethernet Controller
+       1036  82801CAM (ICH3) Chipset Ethernet Controller
+       1037  82801CAM (ICH3) Chipset Ethernet Controller
+       1038  82801CAM (ICH3) Chipset Ethernet Controller
+       1130  82815 815 Chipset Host Bridge and Memory Controller Hub
+       1132  82815 CGC [Chipset Graphics Controller]
+       1161  82806AA PCI64 Hub Advanced Programmable Interrupt Controller
+       1209  82559ER
+       1221  82092AA_0
+       1222  82092AA_1
+       1223  SAA7116
+       1225  82452KX/GX [Orion]
+       1226  82596 PRO/10 PCI
+       1227  82865 EtherExpress PRO/100A
+       1228  82556 EtherExpress PRO/100 Smart
+       1229  82557 [Ethernet Pro 100]
+               0e11 b01e  NC3120
+               0e11 b01f  NC3122
+               0e11 b02f  NC1120
+               0e11 b04a  Netelligent 10/100TX NIC with Wake on LAN
+               0e11 b0c6  Embedded NC3120 with Wake on LAN
+               0e11 b0c7  Embedded NC3121
+               0e11 b0d7  NC3121 with Wake on LAN
+               0e11 b0dd  NC3131 (82558B)
+               0e11 b0de  NC3132
+               0e11 b0e1  NC3133
+               0e11 b144  NC3123 (82559)
+               1014 005c  82558B Ethernet Pro 10/100
+               1014 105c  Netfinity 10/100
+               1033 8000  PC-9821X-B06
+               1033 8016  PK-UG-X006
+               1033 801f  PK-UG-X006
+               103c 10c0  Ethernet Pro 10/100TX
+               103c 10c3  Ethernet Pro 10/100TX
+               103c 1200  Ethernet Pro 10/100TX
+               10c3 1100  SmartEther100 SC1100
+               1179 0002  PCI FastEther LAN on Docker
+               1259 2560  AT-2560 100
+               1259 2561  AT-2560 100 FX Ethernet Adapter
+               1266 0001  NE10/100 Adapter
+               8086 0001  EtherExpress PRO/100B (TX)
+               8086 0002  EtherExpress PRO/100B (T4)
+               8086 0003  EtherExpress PRO/10+
+               8086 0004  EtherExpress PRO/100 WfM
+               8086 0005  82557 10/100
+               8086 0006  82557 10/100 with Wake on LAN
+               8086 0007  82558 10/100 Adapter
+               8086 0008  82558 10/100 with Wake on LAN
+               8086 0009  EtherExpress PRO/100+
+               8086 000a  EtherExpress PRO/100+ Management Adapter
+               8086 000b  EtherExpress PRO/100+
+               8086 000c  EtherExpress PRO/100+ Management Adapter
+               8086 000d  EtherExpress PRO/100+ Alert On LAN II* Adapter
+               8086 000e  EtherExpress PRO/100+ Management Adapter with Alert On LAN*
+               8086 1009  EtherExpress PRO/100+ Server Adapter
+               8086 100c  EtherExpress PRO/100+ Server Adapter (PILA8470B)
+               8086 10f0  EtherExpress PRO/100+ Dual Port Adapter
+               8086 200d  EtherExpress PRO/100 Cardbus
+               8086 200e  EtherExpress PRO/100 LAN+V90 Cardbus Modem
+               8086 3000  82559 Fast Ethernet LAN on Motherboard
+               8086 3001  82559 Fast Ethernet LOM with Basic Alert on LAN*
+               8086 3002  82559 Fast Ethernet LOM with Alert on LAN II*
+       122d  430FX - 82437FX TSC [Triton I]
+       122e  82371FB PIIX ISA [Triton I]
+       1230  82371FB PIIX IDE [Triton I]
+       1231  DSVD Modem
+       1234  430MX - 82371MX Mobile PCI I/O IDE Xcelerator (MPIIX)
+       1235  430MX - 82437MX Mob. System Ctrlr (MTSC) & 82438MX Data Path (MTDP)
+       1237  440FX - 82441FX PMC [Natoma]
+       1239  82371FB
+       123b  82380PB
+       123c  82380AB
+       123d  683053 Programmable Interrupt Device
+       1240  752 AGP
+       124b  82380FB
+       1250  430HX - 82439HX TXC [Triton II]
+       1360  82806AA PCI64 Hub PCI Bridge
+       1361  82806AA PCI64 Hub Controller (HRes)
+       1960  80960RP [i960RP Microprocessor]
+               101e 0438  MegaRaid 438
+               101e 0466  MegaRaid 466
+               101e 0467  MegaRaid 467
+               101e 09a0  PowerEdge Expandable RAID Controller 2/SC
+               1028 0467  PowerEdge Expandable RAID Controller 2/DC
+               1028 1111  PowerEdge Expandable RAID Controller 2/SC
+               103c 03a2  MegaRaid
+               103c 10c6  MegaRaid 438
+               103c 10c7  MegaRaid T5
+               103c 10cc  MegaRaid
+               1111 1111  MegaRaid 466
+               113c 03a2  MegaRaid
+       1a21  82840 840 (Carmel) Chipset Host Bridge (Hub A)
+       1a23  82840 840 (Carmel) Chipset AGP Bridge
+       1a24  82840 840 (Carmel) Chipset PCI Bridge (Hub B)
+       1a30  82845 845 (Brookdale) Chipset Host Bridge
+       1a31  82845 845 (Brookdale) Chipset AGP Bridge
+       2410  82801AA ISA Bridge (LPC)
+       2411  82801AA IDE
+       2412  82801AA USB
+       2413  82801AA SMBus
+       2415  82801AA AC'97 Audio
+               11d4 0040  SoundMAX Integrated Digital Audio
+               11d4 0048  SoundMAX Integrated Digital Audio
+               11d4 5340  SoundMAX Integrated Digital Audio
+       2416  82801AA AC'97 Modem
+       2418  82801AA PCI Bridge
+       2420  82801AB ISA Bridge (LPC)
+       2421  82801AB IDE
+       2422  82801AB USB
+       2423  82801AB SMBus
+       2425  82801AB AC'97 Audio
+               11d4 0040  SoundMAX Integrated Digital Audio
+               11d4 0048  SoundMAX Integrated Digital Audio
+       2426  82801AB AC'97 Modem
+       2428  82801AB PCI Bridge
+       2440  82820 820 (Camino 2) Chipset ISA Bridge (ICH2)
+       2442  82820 820 (Camino 2) Chipset USB (Hub A)
+       2443  82820 820 (Camino 2) Chipset SMBus
+       2444  82820 820 (Camino 2) Chipset USB (Hub B)
+       2445  82820 820 (Camino 2) Chipset AC'97 Audio Controller
+       2446  82820 820 (Camino 2) Chipset AC'97 Modem Controller
+       2448  82820 820 (Camino 2) Chipset PCI (-M)
+       2449  82820 (ICH2) Chipset Ethernet Controller
+       244a  82820 820 (Camino 2) Chipset IDE U100 (-M)
+       244b  82820 820 (Camino 2) Chipset IDE U100
+       244c  82820 820 (Camino 2) Chipset ISA Bridge (ICH2-M)
+       244e  82820 820 (Camino 2) Chipset PCI
+       2485  AC'97 Audio Controller
+       2500  82820 820 (Camino) Chipset Host Bridge (MCH)
+               1043 801c  P3C-2000 system chipset
+       2501  82820 820 (Camino) Chipset Host Bridge (MCH)
+               1043 801c  P3C-2000 system chipset
+       250b  82820 820 (Camino) Chipset Host Bridge
+       250f  82820 820 (Camino) Chipset PCI to AGP Bridge
+       2520  82805AA MTH Memory Translator Hub
+       2521  82804AA MRH-S Memory Repeater Hub for SDRAM
+       2530  82850 850 (Tehama) Chipset Host Bridge (MCH)
+       2531  82850 860 (Wombat) Chipset Host Bridge (MCH)
+       2532  82850 850 (Tehama) Chipset AGP Bridge
+       2533  82860 860 (Wombat) Chipset AGP Bridge
+       3092  Integrated RAID
+       5200  EtherExpress PRO/100 Intelligent Server
+       5201  EtherExpress PRO/100 Intelligent Server
+               8086 0001  EtherExpress PRO/100 Server Ethernet Adapter
+       530d  80310 IOP [IO Processor]
+       7000  82371SB PIIX3 ISA [Natoma/Triton II]
+       7010  82371SB PIIX3 IDE [Natoma/Triton II]
+       7020  82371SB PIIX3 USB [Natoma/Triton II]
+       7030  430VX - 82437VX TVX [Triton VX]
+       7100  430TX - 82439TX MTXC
+       7110  82371AB PIIX4 ISA
+       7111  82371AB PIIX4 IDE
+       7112  82371AB PIIX4 USB
+       7113  82371AB PIIX4 ACPI
+       7120  82810 GMCH [Graphics Memory Controller Hub]
+       7121  82810 CGC [Chipset Graphics Controller]
+       7122  82810-DC100 GMCH [Graphics Memory Controller Hub]
+       7123  82810-DC100 CGC [Chipset Graphics Controller]
+       7124  82810E GMCH [Graphics Memory Controller Hub]
+       7125  82810E CGC [Chipset Graphics Controller]
+       7126  82810 810 Chipset Host Bridge and Memory Controller Hub
+       7180  440LX/EX - 82443LX/EX Host bridge
+       7181  440LX/EX - 82443LX/EX AGP bridge
+       7190  440BX/ZX - 82443BX/ZX Host bridge
+               0e11 0500  Armada 1750 Laptop System Chipset
+       7191  440BX/ZX - 82443BX/ZX AGP bridge
+       7192  440BX/ZX - 82443BX/ZX Host bridge (AGP disabled)
+               0e11 0460  Armada 1700 Laptop System Chipset
+       7194  82440MX I/O Controller
+       7195  82440MX AC'97 Audio Controller
+               10cf 1099  QSound_SigmaTel Stac97 PCI Audio
+               11d4 0040  SoundMAX Integrated Digital Audio
+               11d4 0048  SoundMAX Integrated Digital Audio
+       7198  82440MX PCI to ISA Bridge
+       7199  82440MX EIDE Controller
+       719a  82440MX USB Universal Host Controller
+       719b  82440MX Power Management Controller
+       71a0  440GX - 82443GX Host bridge
+       71a1  440GX - 82443GX AGP bridge
+       71a2  440GX - 82443GX Host bridge (AGP disabled)
+       7600  82372FB PCI to ISA Bridge
+       7601  82372FB PIIX4 IDE
+       7602  82372FB [PCI-to-USB UHCI]
+       7603  82372FB System Management Bus Controller
+       7800  i740
+               1092 0100  Stealth II G460
+               8086 0100  Intel740 Graphics Accelerator
+       84c4  450KX/GX [Orion] - 82454KX/GX PCI bridge
+       84c5  450KX/GX [Orion] - 82453KX/GX Memory controller
+       84ca  450NX - 82451NX Memory & I/O Controller
+       84cb  450NX - 82454NX/84460GX PCI Expander Bridge
+       84e0  460GX - 84460GX System Address Controller (SAC)
+       84e1  460GX - 84460GX System Data Controller (SDC)
+       84e2  460GX - 84460GX AGP Bridge (GXB)
+       84e3  460GX - 84460GX Memory Address Controller (MAC)
+       84e4  460GX - 84460GX Memory Data Controller (MDC)
+       9621  Integrated RAID
+       9622  Integrated RAID
+       9641  Integrated RAID
+       96a1  Integrated RAID
+       ffff  450NX/GX [Orion] - 82453KX/GX Memory controller [BUG]
+8800  Trigem Computer Inc.
+       2008  Video assistent component
+8866  T-Square Design Inc.
+8888  Silicon Magic
+8e0e  Computone Corporation
+8e2e  KTI
+       3000  ET32P2
+9004  Adaptec
+       1078  AIC-7810
+       1160  AIC-1160 [Family Fibre Channel Adapter]
+       2178  AIC-7821
+       3860  AHA-2930CU
+       3b78  AHA-4844W/4844UW
+       5075  AIC-755x
+       5078  AHA-7850
+               9004 7850  AHA-2904/Integrated AIC-7850
+       5175  AIC-755x
+       5178  AIC-7851
+       5275  AIC-755x
+       5278  AIC-7852
+       5375  AIC-755x
+       5378  AIC-7850
+       5475  AIC-2930
+       5478  AIC-7850
+       5575  AVA-2930
+       5578  AIC-7855
+       5675  AIC-755x
+       5678  AIC-7850
+       5775  AIC-755x
+       5778  AIC-7850
+       5800  AIC-5800
+       5900  ANA-5910/5930/5940 ATM155 & 25 LAN Adapter
+       5905  ANA-5910A/5930A/5940A ATM Adapter
+       6038  AIC-3860
+       6075  AIC-1480 / APA-1480
+       6078  AIC-7860
+       6178  AIC-7861
+               9004 7861  AHA-2940AU Single
+       6278  AIC-7860
+       6378  AIC-7860
+       6478  AIC-786
+       6578  AIC-786x
+       6678  AIC-786
+       6778  AIC-786x
+       6915  ANA620xx/ANA69011A
+               9004 0008  ANA69011A/TX 10/100
+               9004 0009  ANA69011A/TX 10/100
+               9004 0010  ANA62022 2-port 10/100
+               9004 0018  ANA62044 4-port 10/100
+               9004 0020  ANA62022 2-port 10/100
+               9004 0028  ANA69011A/TX 10/100
+               9004 8008  ANA69011A/TX 64 bit 10/100
+               9004 8009  ANA69011A/TX 64 bit 10/100
+               9004 8010  ANA62022 2-port 64 bit 10/100
+               9004 8018  ANA62044 4-port 64 bit 10/100
+               9004 8020  ANA62022 2-port 64 bit 10/100
+               9004 8028  ANA69011A/TX 64 bit 10/100
+       7078  AHA-294x / AIC-7870
+       7178  AHA-294x / AIC-7871
+       7278  AHA-3940 / AIC-7872
+       7378  AHA-3985 / AIC-7873
+       7478  AHA-2944 / AIC-7874
+# DJ: Where did the 3rd number come from?
+       7578  AHA-3944 / AHA-3944W / 7875
+# DJ: Where did the 3rd number come from?
+       7678  AHA-4944W/UW / 7876
+       7778  AIC-787x
+       7810  AIC-7810
+       7815  AIC-7815 RAID+Memory Controller IC
+               9004 7815  ARO-1130U2 RAID Controller
+               9004 7840  AIC-7815 RAID+Memory Controller IC
+       7850  AIC-7850
+       7855  AHA-2930
+       7860  AIC-7860
+       7870  AIC-7870
+       7871  AHA-2940
+       7872  AHA-3940
+       7873  AHA-3980
+       7874  AHA-2944
+       7880  AIC-7880P
+       7890  AIC-7890
+       7891  AIC-789x
+       7892  AIC-789x
+       7893  AIC-789x
+       7894  AIC-789x
+       7895  AHA-2940U/UW / AHA-39xx / AIC-7895
+               9004 7895  AHA-2940U/2940UW Dual AHA-394xAU/AUW/AUWD AIC-7895B
+       7896  AIC-789x
+       7897  AIC-789x
+       8078  AIC-7880U
+               9004 7880  AIC-7880P Ultra/Ultra Wide SCSI Chipset
+       8178  AIC-7881U
+               9004 7881  AHA-2940UW SCSI Host Adapter
+       8278  AHA-3940U/UW / AIC-7882U
+       8378  AHA-3940U/UW / AIC-7883U
+       8478  AHA-294x / AIC-7884U
+       8578  AHA-3944U / AHA-3944UWD / 7885
+       8678  AHA-4944UW / 7886
+       8778  AIC-788x
+               9004 7887  2940UW Pro Ultra-Wide SCSI Controller
+       8878  7888
+       8b78  ABA-1030
+       ec78  AHA-4944W/UW
+9005  Adaptec
+       0010  AHA-2940U2/W
+       0011  2930U2
+       0013  78902
+               9005 0003  AAA-131U2 Array1000 1 Channel RAID Controller
+       001f  AHA-2940U2/W / 7890
+               9005 000f  2940U2W SCSI Controller
+               9005 a180  2940U2W SCSI Controller
+       0020  AIC-7890
+       002f  AIC-7890
+       0030  AIC-7890
+       003f  AIC-7890
+       0050  3940U2
+       0051  3950U2D
+       0053  AIC-7896 SCSI Controller
+               9005 ffff  AIC-7896 SCSI Controller mainboard implementation
+       005f  7896
+       0080  7892A
+       0081  7892B
+       0083  7892D
+       008f  7892P
+       00c0  7899A
+       00c1  7899B
+       00c3  7899D
+       00c5  RAID subsystem HBA
+       00cf  7899P
+907f  Atronics
+       2015  IDE-2015PL
+919a  Gigapixel Corp
+9412  Holtek
+       6565  6565
+9699  Omni Media Technology Inc
+       6565  6565
+a0a0  AOPEN Inc.
+a0f1  UNISYS Corporation
+a200  NEC Corporation
+a259  Hewlett Packard
+a25b  Hewlett Packard GmbH PL24-MKT
+a304  Sony
+a727  3Com Corporation
+aa42  Scitex Digital Video
+ac1e  Digital Receiver Technology Inc
+b1b3  Shiva Europe Limited
+c001  TSI Telsys
+c0a9  Micron/Crucial Technology
+c0de  Motorola
+c0fe  Motion Engineering, Inc.
+ca50  Varian Australia Pty Ltd
+cafe  Chrysalis-ITS
+cccc  Catapult Communications
+d4d4  Dy4 Systems Inc
+       0601  PCI Mezzanine Card
+d84d  Exsys
+e000  Winbond
+       e000  W89C940
+e159  Tiger Jet Network Inc.
+       0001  Model 300 128k
+               0059 0001  128k ISDN-S/T Adapter
+               0059 0003  128k ISDN-U Adapter
+e4bf  EKF Elektronik GmbH
+ea01  Eagle Technology
+eabb  Aashima Technology B.V.
+ecc0  Echo Corporation
+edd8  ARK Logic Inc
+       a091  1000PV [Stingray]
+       a099  2000PV [Stingray]
+       a0a1  2000MT
+       a0a9  2000MI
+fa57  Fast Search & Transfer ASA
+feda  Epigram Inc
+fffe  VMWare Inc
+       0710  Virtual SVGA
+ffff  Illegal Vendor ID
+
+
+# List of known device classes, subclasses and programming interfaces
+
+# Syntax:
+# C class      class_name
+#      subclass        subclass_name           <-- single tab
+#              prog-if  prog-if_name   <-- two tabs
+
+C 00  Unclassified device
+       00  Non-VGA unclassified device
+       01  VGA compatible unclassified device
+C 01  Mass storage controller
+       00  SCSI storage controller
+       01  IDE interface
+       02  Floppy disk controller
+       03  IPI bus controller
+       04  RAID bus controller
+       80  Unknown mass storage controller
+C 02  Network controller
+       00  Ethernet controller
+       01  Token ring network controller
+       02  FDDI network controller
+       03  ATM network controller
+       04  ISDN controller
+       80  Network controller
+C 03  Display controller
+       00  VGA compatible controller
+               00  VGA
+               01  8514
+       01  XGA compatible controller
+       02  3D controller
+       80  Display controller
+C 04  Multimedia controller
+       00  Multimedia video controller
+       01  Multimedia audio controller
+       02  Computer telephony device
+       80  Multimedia controller
+C 05  Memory controller
+       00  RAM memory
+       01  FLASH memory
+       80  Memory controller
+C 06  Bridge
+       00  Host bridge
+       01  ISA bridge
+       02  EISA bridge
+       03  MicroChannel bridge
+       04  PCI bridge
+               00  Normal decode
+               01  Subtractive decode
+       05  PCMCIA bridge
+       06  NuBus bridge
+       07  CardBus bridge
+       08  RACEway bridge
+               00  Transparent mode
+               01  Endpoint mode
+       09  Semi-transparent PCI-to-PCI bridge
+               40  Primary bus towards host CPU
+               80  Secondary bus towards host CPU
+       0a  InfiniBand to PCI host bridge
+       80  Bridge
+C 07  Communication controller
+       00  Serial controller
+               00  8250
+               01  16450
+               02  16550
+               03  16650
+               04  16750
+               05  16850
+               06  16950
+       01  Parallel controller
+               00  SPP
+               01  BiDir
+               02  ECP
+               03  IEEE1284
+               fe  IEEE1284 Target
+       02  Multiport serial controller
+       03  Modem
+               00  Generic
+               01  Hayes/16450
+               02  Hayes/16550
+               03  Hayes/16650
+               04  Hayes/16750
+       80  Communication controller
+C 08  Generic system peripheral
+       00  PIC
+               00  8259
+               01  ISA PIC
+               02  EISA PIC
+               10  IO-APIC
+               20  IO(X)-APIC
+       01  DMA controller
+               00  8237
+               01  ISA DMA
+               02  EISA DMA
+       02  Timer
+               00  8254
+               01  ISA Timer
+               02  EISA Timers
+       03  RTC
+               00  Generic
+               01  ISA RTC
+       04  PCI Hot-plug controller
+       80  System peripheral
+C 09  Input device controller
+       00  Keyboard controller
+       01  Digitizer Pen
+       02  Mouse controller
+       03  Scanner controller
+       04  Gameport controller
+               00  Generic
+               10  Extended
+       80  Input device controller
+C 0a  Docking station
+       00  Generic Docking Station
+       80  Docking Station
+C 0b  Processor
+       00  386
+       01  486
+       02  Pentium
+       10  Alpha
+       20  Power PC
+       30  MIPS
+       40  Co-processor
+C 0c  Serial bus controller
+       00  FireWire (IEEE 1394)
+               00  Generic
+               10  OHCI
+       01  ACCESS Bus
+       02  SSA
+       03  USB Controller
+               00  UHCI
+               10  OHCI
+               80  Unspecified
+               fe  USB Device
+       04  Fibre Channel
+       05  SMBus
+       06  InfiniBand
+C 0d  Wireless controller
+       00  IRDA controller
+       01  Consumer IR controller
+       10  RF controller
+       80  Wireless controller
+C 0e  Intelligent controller
+       00  I2O
+C 0f  Satellite communications controller
+       00  Satellite TV controller
+       01  Satellite audio communication controller
+       03  Satellite voice communication controller
+       04  Satellite data communication controller
+C 10  Encryption controller
+       00  Network and computing encryption device
+       10  Entertainment encryption device
+       80  Encryption controller
+C 11  Signal processing controller
+       00  DPIO module
+       01  Performance counters
+       10  Communication synchronizer
+       80  Signal processing controller
diff --git a/xen-2.4.16/drivers/pci/proc.c b/xen-2.4.16/drivers/pci/proc.c
new file mode 100644 (file)
index 0000000..3516d44
--- /dev/null
@@ -0,0 +1,572 @@
+/*
+ *     $Id: proc.c,v 1.13 1998/05/12 07:36:07 mj Exp $
+ *
+ *     Procfs interface for the PCI bus.
+ *
+ *     Copyright (c) 1997--1999 Martin Mares <mj@ucw.cz>
+ */
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/proc_fs.h>
+#include <linux/init.h>
+#include <linux/seq_file.h>
+
+#include <asm/uaccess.h>
+#include <asm/byteorder.h>
+
+#define PCI_CFG_SPACE_SIZE 256
+
+static loff_t
+proc_bus_pci_lseek(struct file *file, loff_t off, int whence)
+{
+       loff_t new;
+
+       switch (whence) {
+       case 0:
+               new = off;
+               break;
+       case 1:
+               new = file->f_pos + off;
+               break;
+       case 2:
+               new = PCI_CFG_SPACE_SIZE + off;
+               break;
+       default:
+               return -EINVAL;
+       }
+       if (new < 0 || new > PCI_CFG_SPACE_SIZE)
+               return -EINVAL;
+       return (file->f_pos = new);
+}
+
+static ssize_t
+proc_bus_pci_read(struct file *file, char *buf, size_t nbytes, loff_t *ppos)
+{
+       const struct inode *ino = file->f_dentry->d_inode;
+       const struct proc_dir_entry *dp = ino->u.generic_ip;
+       struct pci_dev *dev = dp->data;
+       unsigned int pos = *ppos;
+       unsigned int cnt, size;
+
+       /*
+        * Normal users can read only the standardized portion of the
+        * configuration space as several chips lock up when trying to read
+        * undefined locations (think of Intel PIIX4 as a typical example).
+        */
+
+       if (capable(CAP_SYS_ADMIN))
+               size = PCI_CFG_SPACE_SIZE;
+       else if (dev->hdr_type == PCI_HEADER_TYPE_CARDBUS)
+               size = 128;
+       else
+               size = 64;
+
+       if (pos >= size)
+               return 0;
+       if (nbytes >= size)
+               nbytes = size;
+       if (pos + nbytes > size)
+               nbytes = size - pos;
+       cnt = nbytes;
+
+       if (!access_ok(VERIFY_WRITE, buf, cnt))
+               return -EINVAL;
+
+       if ((pos & 1) && cnt) {
+               unsigned char val;
+               pci_read_config_byte(dev, pos, &val);
+               __put_user(val, buf);
+               buf++;
+               pos++;
+               cnt--;
+       }
+
+       if ((pos & 3) && cnt > 2) {
+               unsigned short val;
+               pci_read_config_word(dev, pos, &val);
+               __put_user(cpu_to_le16(val), (unsigned short *) buf);
+               buf += 2;
+               pos += 2;
+               cnt -= 2;
+       }
+
+       while (cnt >= 4) {
+               unsigned int val;
+               pci_read_config_dword(dev, pos, &val);
+               __put_user(cpu_to_le32(val), (unsigned int *) buf);
+               buf += 4;
+               pos += 4;
+               cnt -= 4;
+       }
+
+       if (cnt >= 2) {
+               unsigned short val;
+               pci_read_config_word(dev, pos, &val);
+               __put_user(cpu_to_le16(val), (unsigned short *) buf);
+               buf += 2;
+               pos += 2;
+               cnt -= 2;
+       }
+
+       if (cnt) {
+               unsigned char val;
+               pci_read_config_byte(dev, pos, &val);
+               __put_user(val, buf);
+               buf++;
+               pos++;
+               cnt--;
+       }
+
+       *ppos = pos;
+       return nbytes;
+}
+
+static ssize_t
+proc_bus_pci_write(struct file *file, const char *buf, size_t nbytes, loff_t *ppos)
+{
+       const struct inode *ino = file->f_dentry->d_inode;
+       const struct proc_dir_entry *dp = ino->u.generic_ip;
+       struct pci_dev *dev = dp->data;
+       int pos = *ppos;
+       int cnt;
+
+       if (pos >= PCI_CFG_SPACE_SIZE)
+               return 0;
+       if (nbytes >= PCI_CFG_SPACE_SIZE)
+               nbytes = PCI_CFG_SPACE_SIZE;
+       if (pos + nbytes > PCI_CFG_SPACE_SIZE)
+               nbytes = PCI_CFG_SPACE_SIZE - pos;
+       cnt = nbytes;
+
+       if (!access_ok(VERIFY_READ, buf, cnt))
+               return -EINVAL;
+
+       if ((pos & 1) && cnt) {
+               unsigned char val;
+               __get_user(val, buf);
+               pci_write_config_byte(dev, pos, val);
+               buf++;
+               pos++;
+               cnt--;
+       }
+
+       if ((pos & 3) && cnt > 2) {
+               unsigned short val;
+               __get_user(val, (unsigned short *) buf);
+               pci_write_config_word(dev, pos, le16_to_cpu(val));
+               buf += 2;
+               pos += 2;
+               cnt -= 2;
+       }
+
+       while (cnt >= 4) {
+               unsigned int val;
+               __get_user(val, (unsigned int *) buf);
+               pci_write_config_dword(dev, pos, le32_to_cpu(val));
+               buf += 4;
+               pos += 4;
+               cnt -= 4;
+       }
+
+       if (cnt >= 2) {
+               unsigned short val;
+               __get_user(val, (unsigned short *) buf);
+               pci_write_config_word(dev, pos, le16_to_cpu(val));
+               buf += 2;
+               pos += 2;
+               cnt -= 2;
+       }
+
+       if (cnt) {
+               unsigned char val;
+               __get_user(val, buf);
+               pci_write_config_byte(dev, pos, val);
+               buf++;
+               pos++;
+               cnt--;
+       }
+
+       *ppos = pos;
+       return nbytes;
+}
+
+struct pci_filp_private {
+       enum pci_mmap_state mmap_state;
+       int write_combine;
+};
+
+static int proc_bus_pci_ioctl(struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg)
+{
+       const struct proc_dir_entry *dp = inode->u.generic_ip;
+       struct pci_dev *dev = dp->data;
+#ifdef HAVE_PCI_MMAP
+       struct pci_filp_private *fpriv = file->private_data;
+#endif /* HAVE_PCI_MMAP */
+       int ret = 0;
+
+       switch (cmd) {
+       case PCIIOC_CONTROLLER:
+               ret = pci_controller_num(dev);
+               break;
+
+#ifdef HAVE_PCI_MMAP
+       case PCIIOC_MMAP_IS_IO:
+               fpriv->mmap_state = pci_mmap_io;
+               break;
+
+       case PCIIOC_MMAP_IS_MEM:
+               fpriv->mmap_state = pci_mmap_mem;
+               break;
+
+       case PCIIOC_WRITE_COMBINE:
+               if (arg)
+                       fpriv->write_combine = 1;
+               else
+                       fpriv->write_combine = 0;
+               break;
+
+#endif /* HAVE_PCI_MMAP */
+
+       default:
+               ret = -EINVAL;
+               break;
+       };
+
+       return ret;
+}
+
+#ifdef HAVE_PCI_MMAP
+static int proc_bus_pci_mmap(struct file *file, struct vm_area_struct *vma)
+{
+       struct inode *inode = file->f_dentry->d_inode;
+       const struct proc_dir_entry *dp = inode->u.generic_ip;
+       struct pci_dev *dev = dp->data;
+       struct pci_filp_private *fpriv = file->private_data;
+       int ret;
+
+       if (!capable(CAP_SYS_RAWIO))
+               return -EPERM;
+
+       ret = pci_mmap_page_range(dev, vma,
+                                 fpriv->mmap_state,
+                                 fpriv->write_combine);
+       if (ret < 0)
+               return ret;
+
+       return 0;
+}
+
+static int proc_bus_pci_open(struct inode *inode, struct file *file)
+{
+       struct pci_filp_private *fpriv = kmalloc(sizeof(*fpriv), GFP_KERNEL);
+
+       if (!fpriv)
+               return -ENOMEM;
+
+       fpriv->mmap_state = pci_mmap_io;
+       fpriv->write_combine = 0;
+
+       file->private_data = fpriv;
+
+       return 0;
+}
+
+static int proc_bus_pci_release(struct inode *inode, struct file *file)
+{
+       kfree(file->private_data);
+       file->private_data = NULL;
+
+       return 0;
+}
+#endif /* HAVE_PCI_MMAP */
+
+static struct file_operations proc_bus_pci_operations = {
+       llseek:         proc_bus_pci_lseek,
+       read:           proc_bus_pci_read,
+       write:          proc_bus_pci_write,
+       ioctl:          proc_bus_pci_ioctl,
+#ifdef HAVE_PCI_MMAP
+       open:           proc_bus_pci_open,
+       release:        proc_bus_pci_release,
+       mmap:           proc_bus_pci_mmap,
+#ifdef HAVE_ARCH_PCI_GET_UNMAPPED_AREA
+       get_unmapped_area: get_pci_unmapped_area,
+#endif /* HAVE_ARCH_PCI_GET_UNMAPPED_AREA */
+#endif /* HAVE_PCI_MMAP */
+};
+
+#if BITS_PER_LONG == 32
+#define LONG_FORMAT "\t%08lx"
+#else
+#define LONG_FORMAT "\t%16lx"
+#endif
+
+/* iterator */
+static void *pci_seq_start(struct seq_file *m, loff_t *pos)
+{
+       struct list_head *p = &pci_devices;
+       loff_t n = *pos;
+
+       /* XXX: surely we need some locking for traversing the list? */
+       while (n--) {
+               p = p->next;
+               if (p == &pci_devices)
+                       return NULL;
+       }
+       return p;
+}
+static void *pci_seq_next(struct seq_file *m, void *v, loff_t *pos)
+{
+       struct list_head *p = v;
+       (*pos)++;
+       return p->next != &pci_devices ? p->next : NULL;
+}
+static void pci_seq_stop(struct seq_file *m, void *v)
+{
+       /* release whatever locks we need */
+}
+
+static int show_device(struct seq_file *m, void *v)
+{
+       struct list_head *p = v;
+       const struct pci_dev *dev;
+       const struct pci_driver *drv;
+       int i;
+
+       if (p == &pci_devices)
+               return 0;
+
+       dev = pci_dev_g(p);
+       drv = pci_dev_driver(dev);
+       seq_printf(m, "%02x%02x\t%04x%04x\t%x",
+                       dev->bus->number,
+                       dev->devfn,
+                       dev->vendor,
+                       dev->device,
+                       dev->irq);
+       /* Here should be 7 and not PCI_NUM_RESOURCES as we need to preserve compatibility */
+       for(i=0; i<7; i++)
+               seq_printf(m, LONG_FORMAT,
+                       dev->resource[i].start |
+                       (dev->resource[i].flags & PCI_REGION_FLAG_MASK));
+       for(i=0; i<7; i++)
+               seq_printf(m, LONG_FORMAT,
+                       dev->resource[i].start < dev->resource[i].end ?
+                       dev->resource[i].end - dev->resource[i].start + 1 : 0);
+       seq_putc(m, '\t');
+       if (drv)
+               seq_printf(m, "%s", drv->name);
+       seq_putc(m, '\n');
+       return 0;
+}
+
+static struct seq_operations proc_bus_pci_devices_op = {
+       start:  pci_seq_start,
+       next:   pci_seq_next,
+       stop:   pci_seq_stop,
+       show:   show_device
+};
+
+static struct proc_dir_entry *proc_bus_pci_dir;
+
+int pci_proc_attach_device(struct pci_dev *dev)
+{
+       struct pci_bus *bus = dev->bus;
+       struct proc_dir_entry *de, *e;
+       char name[16];
+
+       if (!(de = bus->procdir)) {
+               sprintf(name, "%02x", bus->number);
+               de = bus->procdir = proc_mkdir(name, proc_bus_pci_dir);
+               if (!de)
+                       return -ENOMEM;
+       }
+       sprintf(name, "%02x.%x", PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn));
+       e = dev->procent = create_proc_entry(name, S_IFREG | S_IRUGO | S_IWUSR, de);
+       if (!e)
+               return -ENOMEM;
+       e->proc_fops = &proc_bus_pci_operations;
+       e->data = dev;
+       e->size = PCI_CFG_SPACE_SIZE;
+       return 0;
+}
+
+int pci_proc_detach_device(struct pci_dev *dev)
+{
+       struct proc_dir_entry *e;
+
+       if ((e = dev->procent)) {
+               if (atomic_read(&e->count))
+                       return -EBUSY;
+               remove_proc_entry(e->name, dev->bus->procdir);
+               dev->procent = NULL;
+       }
+       return 0;
+}
+
+int pci_proc_attach_bus(struct pci_bus* bus)
+{
+       struct proc_dir_entry *de = bus->procdir;
+
+       if (!de) {
+               char name[16];
+               sprintf(name, "%02x", bus->number);
+               de = bus->procdir = proc_mkdir(name, proc_bus_pci_dir);
+               if (!de)
+                       return -ENOMEM;
+       }
+       return 0;
+}
+
+int pci_proc_detach_bus(struct pci_bus* bus)
+{
+       struct proc_dir_entry *de = bus->procdir;
+       if (de)
+               remove_proc_entry(de->name, proc_bus_pci_dir);
+       return 0;
+}
+
+
+/*
+ *  Backward compatible /proc/pci interface.
+ */
+
+/*
+ * Convert some of the configuration space registers of the device at
+ * address (bus,devfn) into a string (possibly several lines each).
+ * The configuration string is stored starting at buf[len].  If the
+ * string would exceed the size of the buffer (SIZE), 0 is returned.
+ */
+static int show_dev_config(struct seq_file *m, void *v)
+{
+       struct list_head *p = v;
+       struct pci_dev *dev;
+       struct pci_driver *drv;
+       u32 class_rev;
+       unsigned char latency, min_gnt, max_lat, *class;
+       int reg;
+
+       if (p == &pci_devices) {
+               seq_puts(m, "PCI devices found:\n");
+               return 0;
+       }
+
+       dev = pci_dev_g(p);
+       drv = pci_dev_driver(dev);
+
+       pci_read_config_dword(dev, PCI_CLASS_REVISION, &class_rev);
+       pci_read_config_byte (dev, PCI_LATENCY_TIMER, &latency);
+       pci_read_config_byte (dev, PCI_MIN_GNT, &min_gnt);
+       pci_read_config_byte (dev, PCI_MAX_LAT, &max_lat);
+       seq_printf(m, "  Bus %2d, device %3d, function %2d:\n",
+              dev->bus->number, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn));
+       class = pci_class_name(class_rev >> 16);
+       if (class)
+               seq_printf(m, "    %s", class);
+       else
+               seq_printf(m, "    Class %04x", class_rev >> 16);
+       seq_printf(m, ": %s (rev %d).\n", dev->name, class_rev & 0xff);
+
+       if (dev->irq)
+               seq_printf(m, "      IRQ %d.\n", dev->irq);
+
+       if (latency || min_gnt || max_lat) {
+               seq_printf(m, "      Master Capable.  ");
+               if (latency)
+                       seq_printf(m, "Latency=%d.  ", latency);
+               else
+                       seq_puts(m, "No bursts.  ");
+               if (min_gnt)
+                       seq_printf(m, "Min Gnt=%d.", min_gnt);
+               if (max_lat)
+                       seq_printf(m, "Max Lat=%d.", max_lat);
+               seq_putc(m, '\n');
+       }
+
+       for (reg = 0; reg < 6; reg++) {
+               struct resource *res = dev->resource + reg;
+               unsigned long base, end, flags;
+
+               base = res->start;
+               end = res->end;
+               flags = res->flags;
+               if (!end)
+                       continue;
+
+               if (flags & PCI_BASE_ADDRESS_SPACE_IO) {
+                       seq_printf(m, "      I/O at 0x%lx [0x%lx].\n",
+                               base, end);
+               } else {
+                       const char *pref, *type = "unknown";
+
+                       if (flags & PCI_BASE_ADDRESS_MEM_PREFETCH)
+                               pref = "P";
+                       else
+                               pref = "Non-p";
+                       switch (flags & PCI_BASE_ADDRESS_MEM_TYPE_MASK) {
+                             case PCI_BASE_ADDRESS_MEM_TYPE_32:
+                               type = "32 bit"; break;
+                             case PCI_BASE_ADDRESS_MEM_TYPE_1M:
+                               type = "20 bit"; break;
+                             case PCI_BASE_ADDRESS_MEM_TYPE_64:
+                               type = "64 bit"; break;
+                       }
+                       seq_printf(m, "      %srefetchable %s memory at "
+                                      "0x%lx [0x%lx].\n", pref, type,
+                                      base,
+                                      end);
+               }
+       }
+       return 0;
+}
+
+static struct seq_operations proc_pci_op = {
+       start:  pci_seq_start,
+       next:   pci_seq_next,
+       stop:   pci_seq_stop,
+       show:   show_dev_config
+};
+
+static int proc_bus_pci_dev_open(struct inode *inode, struct file *file)
+{
+       return seq_open(file, &proc_bus_pci_devices_op);
+}
+static struct file_operations proc_bus_pci_dev_operations = {
+       open:           proc_bus_pci_dev_open,
+       read:           seq_read,
+       llseek:         seq_lseek,
+       release:        seq_release,
+};
+static int proc_pci_open(struct inode *inode, struct file *file)
+{
+       return seq_open(file, &proc_pci_op);
+}
+static struct file_operations proc_pci_operations = {
+       open:           proc_pci_open,
+       read:           seq_read,
+       llseek:         seq_lseek,
+       release:        seq_release,
+};
+
+static int __init pci_proc_init(void)
+{
+       if (pci_present()) {
+               struct proc_dir_entry *entry;
+               struct pci_dev *dev;
+               proc_bus_pci_dir = proc_mkdir("pci", proc_bus);
+               entry = create_proc_entry("devices", 0, proc_bus_pci_dir);
+               if (entry)
+                       entry->proc_fops = &proc_bus_pci_dev_operations;
+               pci_for_each_dev(dev) {
+                       pci_proc_attach_device(dev);
+               }
+               entry = create_proc_entry("pci", 0, NULL);
+               if (entry)
+                       entry->proc_fops = &proc_pci_operations;
+       }
+       return 0;
+}
+
+__initcall(pci_proc_init);
diff --git a/xen-2.4.16/drivers/pci/quirks.c b/xen-2.4.16/drivers/pci/quirks.c
new file mode 100644 (file)
index 0000000..5c06bb5
--- /dev/null
@@ -0,0 +1,534 @@
+/*
+ * $Id: quirks.c,v 1.5 1998/05/02 19:24:14 mj Exp $
+ *
+ *  This file contains work-arounds for many known PCI hardware
+ *  bugs.  Devices present only on certain architectures (host
+ *  bridges et cetera) should be handled in arch-specific code.
+ *
+ *  Copyright (c) 1999 Martin Mares <mj@ucw.cz>
+ *
+ *  The bridge optimization stuff has been removed. If you really
+ *  have a silly BIOS which is unable to set your host bridge right,
+ *  use the PowerTweak utility (see http://powertweak.sourceforge.net).
+ */
+
+#include <linux/config.h>
+#include <linux/types.h>
+//#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+
+#undef DEBUG
+
+/* Deal with broken BIOS'es that neglect to enable passive release,
+   which can cause problems in combination with the 82441FX/PPro MTRRs */
+static void __init quirk_passive_release(struct pci_dev *dev)
+{
+       struct pci_dev *d = NULL;
+       unsigned char dlc;
+
+       /* We have to make sure a particular bit is set in the PIIX3
+          ISA bridge, so we have to go out and find it. */
+       while ((d = pci_find_device(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82371SB_0, d))) {
+               pci_read_config_byte(d, 0x82, &dlc);
+               if (!(dlc & 1<<1)) {
+                       printk(KERN_ERR "PCI: PIIX3: Enabling Passive Release on %s\n", d->slot_name);
+                       dlc |= 1<<1;
+                       pci_write_config_byte(d, 0x82, dlc);
+               }
+       }
+}
+
+/*  The VIA VP2/VP3/MVP3 seem to have some 'features'. There may be a workaround
+    but VIA don't answer queries. If you happen to have good contacts at VIA
+    ask them for me please -- Alan 
+    
+    This appears to be BIOS not version dependent. So presumably there is a 
+    chipset level fix */
+    
+
+int isa_dma_bridge_buggy;              /* Exported */
+    
+static void __init quirk_isa_dma_hangs(struct pci_dev *dev)
+{
+       if (!isa_dma_bridge_buggy) {
+               isa_dma_bridge_buggy=1;
+               printk(KERN_INFO "Activating ISA DMA hang workarounds.\n");
+       }
+}
+
+int pci_pci_problems;
+
+/*
+ *     Chipsets where PCI->PCI transfers vanish or hang
+ */
+
+static void __init quirk_nopcipci(struct pci_dev *dev)
+{
+       if((pci_pci_problems&PCIPCI_FAIL)==0)
+       {
+               printk(KERN_INFO "Disabling direct PCI/PCI transfers.\n");
+               pci_pci_problems|=PCIPCI_FAIL;
+       }
+}
+
+/*
+ *     Triton requires workarounds to be used by the drivers
+ */
+static void __init quirk_triton(struct pci_dev *dev)
+{
+       if((pci_pci_problems&PCIPCI_TRITON)==0)
+       {
+               printk(KERN_INFO "Limiting direct PCI/PCI transfers.\n");
+               pci_pci_problems|=PCIPCI_TRITON;
+       }
+}
+
+/*
+ *     VIA Apollo KT133 needs PCI latency patch
+ *     Made according to a windows driver based patch by George E. Breese
+ *     see PCI Latency Adjust on http://www.viahardware.com/download/viatweak.shtm
+ *      Also see http://home.tiscalinet.de/au-ja/review-kt133a-1-en.html for
+ *      the info on which Mr Breese based his work.
+ *
+ *     Updated based on further information from the site and also on
+ *     information provided by VIA 
+ */
+static void __init quirk_vialatency(struct pci_dev *dev)
+{
+       struct pci_dev *p;
+       u8 rev;
+       u8 busarb;
+       /* Ok we have a potential problem chipset here. Now see if we have
+          a buggy southbridge */
+          
+       p=pci_find_device(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C686, NULL);
+       if(p!=NULL)
+       {
+               pci_read_config_byte(p, PCI_CLASS_REVISION, &rev);
+               /* 0x40 - 0x4f == 686B, 0x10 - 0x2f == 686A; thanks Dan Hollis */
+               /* Check for buggy part revisions */
+               if (rev < 0x40 || rev > 0x42) 
+                       return;
+       }
+       else
+       {
+               p = pci_find_device(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8231, NULL);
+               if(p==NULL)     /* No problem parts */
+                       return;
+               pci_read_config_byte(p, PCI_CLASS_REVISION, &rev);
+               /* Check for buggy part revisions */
+               if (rev < 0x10 || rev > 0x12) 
+                       return;
+       }
+       
+       /*
+        *      Ok we have the problem. Now set the PCI master grant to 
+        *      occur every master grant. The apparent bug is that under high
+        *      PCI load (quite common in Linux of course) you can get data
+        *      loss when the CPU is held off the bus for 3 bus master requests
+        *      This happens to include the IDE controllers....
+        *
+        *      VIA only apply this fix when an SB Live! is present but under
+        *      both Linux and Windows this isnt enough, and we have seen
+        *      corruption without SB Live! but with things like 3 UDMA IDE
+        *      controllers. So we ignore that bit of the VIA recommendation..
+        */
+
+       pci_read_config_byte(dev, 0x76, &busarb);
+       /* Set bit 4 and bi 5 of byte 76 to 0x01 
+          "Master priority rotation on every PCI master grant */
+       busarb &= ~(1<<5);
+       busarb |= (1<<4);
+       pci_write_config_byte(dev, 0x76, busarb);
+       printk(KERN_INFO "Applying VIA southbridge workaround.\n");
+}
+
+/*
+ *     VIA Apollo VP3 needs ETBF on BT848/878
+ */
+static void __init quirk_viaetbf(struct pci_dev *dev)
+{
+       if((pci_pci_problems&PCIPCI_VIAETBF)==0)
+       {
+               printk(KERN_INFO "Limiting direct PCI/PCI transfers.\n");
+               pci_pci_problems|=PCIPCI_VIAETBF;
+       }
+}
+static void __init quirk_vsfx(struct pci_dev *dev)
+{
+       if((pci_pci_problems&PCIPCI_VSFX)==0)
+       {
+               printk(KERN_INFO "Limiting direct PCI/PCI transfers.\n");
+               pci_pci_problems|=PCIPCI_VSFX;
+       }
+}
+
+
+/*
+ *     Natoma has some interesting boundary conditions with Zoran stuff
+ *     at least
+ */
+static void __init quirk_natoma(struct pci_dev *dev)
+{
+       if((pci_pci_problems&PCIPCI_NATOMA)==0)
+       {
+               printk(KERN_INFO "Limiting direct PCI/PCI transfers.\n");
+               pci_pci_problems|=PCIPCI_NATOMA;
+       }
+}
+
+/*
+ *  S3 868 and 968 chips report region size equal to 32M, but they decode 64M.
+ *  If it's needed, re-allocate the region.
+ */
+
+static void __init quirk_s3_64M(struct pci_dev *dev)
+{
+       struct resource *r = &dev->resource[0];
+
+       if ((r->start & 0x3ffffff) || r->end != r->start + 0x3ffffff) {
+               r->start = 0;
+               r->end = 0x3ffffff;
+       }
+}
+
+static void __init quirk_io_region(struct pci_dev *dev, unsigned region, unsigned size, int nr)
+{
+       region &= ~(size-1);
+       if (region) {
+               struct resource *res = dev->resource + nr;
+
+               res->name = dev->name;
+               res->start = region;
+               res->end = region + size - 1;
+               res->flags = IORESOURCE_IO;
+               pci_claim_resource(dev, nr);
+       }
+}      
+
+/*
+ * Let's make the southbridge information explicit instead
+ * of having to worry about people probing the ACPI areas,
+ * for example.. (Yes, it happens, and if you read the wrong
+ * ACPI register it will put the machine to sleep with no
+ * way of waking it up again. Bummer).
+ *
+ * ALI M7101: Two IO regions pointed to by words at
+ *     0xE0 (64 bytes of ACPI registers)
+ *     0xE2 (32 bytes of SMB registers)
+ */
+static void __init quirk_ali7101_acpi(struct pci_dev *dev)
+{
+       u16 region;
+
+       pci_read_config_word(dev, 0xE0, &region);
+       quirk_io_region(dev, region, 64, PCI_BRIDGE_RESOURCES);
+       pci_read_config_word(dev, 0xE2, &region);
+       quirk_io_region(dev, region, 32, PCI_BRIDGE_RESOURCES+1);
+}
+
+/*
+ * PIIX4 ACPI: Two IO regions pointed to by longwords at
+ *     0x40 (64 bytes of ACPI registers)
+ *     0x90 (32 bytes of SMB registers)
+ */
+static void __init quirk_piix4_acpi(struct pci_dev *dev)
+{
+       u32 region;
+
+       pci_read_config_dword(dev, 0x40, &region);
+       quirk_io_region(dev, region, 64, PCI_BRIDGE_RESOURCES);
+       pci_read_config_dword(dev, 0x90, &region);
+       quirk_io_region(dev, region, 32, PCI_BRIDGE_RESOURCES+1);
+}
+
+/*
+ * VIA ACPI: One IO region pointed to by longword at
+ *     0x48 or 0x20 (256 bytes of ACPI registers)
+ */
+static void __init quirk_vt82c586_acpi(struct pci_dev *dev)
+{
+       u8 rev;
+       u32 region;
+
+       pci_read_config_byte(dev, PCI_CLASS_REVISION, &rev);
+       if (rev & 0x10) {
+               pci_read_config_dword(dev, 0x48, &region);
+               region &= PCI_BASE_ADDRESS_IO_MASK;
+               quirk_io_region(dev, region, 256, PCI_BRIDGE_RESOURCES);
+       }
+}
+
+/*
+ * VIA VT82C686 ACPI: Three IO region pointed to by (long)words at
+ *     0x48 (256 bytes of ACPI registers)
+ *     0x70 (128 bytes of hardware monitoring register)
+ *     0x90 (16 bytes of SMB registers)
+ */
+static void __init quirk_vt82c686_acpi(struct pci_dev *dev)
+{
+       u16 hm;
+       u32 smb;
+
+       quirk_vt82c586_acpi(dev);
+
+       pci_read_config_word(dev, 0x70, &hm);
+       hm &= PCI_BASE_ADDRESS_IO_MASK;
+       quirk_io_region(dev, hm, 128, PCI_BRIDGE_RESOURCES + 1);
+
+       pci_read_config_dword(dev, 0x90, &smb);
+       smb &= PCI_BASE_ADDRESS_IO_MASK;
+       quirk_io_region(dev, smb, 16, PCI_BRIDGE_RESOURCES + 2);
+}
+
+
+#ifdef CONFIG_X86_IO_APIC 
+extern int nr_ioapics;
+
+/*
+ * VIA 686A/B: If an IO-APIC is active, we need to route all on-chip
+ * devices to the external APIC.
+ *
+ * TODO: When we have device-specific interrupt routers,
+ * this code will go away from quirks.
+ */
+static void __init quirk_via_ioapic(struct pci_dev *dev)
+{
+       u8 tmp;
+       
+       if (nr_ioapics < 1)
+               tmp = 0;    /* nothing routed to external APIC */
+       else
+               tmp = 0x1f; /* all known bits (4-0) routed to external APIC */
+               
+       printk(KERN_INFO "PCI: %sbling Via external APIC routing\n",
+              tmp == 0 ? "Disa" : "Ena");
+
+       /* Offset 0x58: External APIC IRQ output control */
+       pci_write_config_byte (dev, 0x58, tmp);
+}
+
+#endif /* CONFIG_X86_IO_APIC */
+
+
+/*
+ * Via 686A/B:  The PCI_INTERRUPT_LINE register for the on-chip
+ * devices, USB0/1, AC97, MC97, and ACPI, has an unusual feature:
+ * when written, it makes an internal connection to the PIC.
+ * For these devices, this register is defined to be 4 bits wide.
+ * Normally this is fine.  However for IO-APIC motherboards, or
+ * non-x86 architectures (yes Via exists on PPC among other places),
+ * we must mask the PCI_INTERRUPT_LINE value versus 0xf to get
+ * interrupts delivered properly.
+ *
+ * TODO: When we have device-specific interrupt routers,
+ * quirk_via_irqpic will go away from quirks.
+ */
+
+/*
+ * FIXME: it is questionable that quirk_via_acpi
+ * is needed.  It shows up as an ISA bridge, and does not
+ * support the PCI_INTERRUPT_LINE register at all.  Therefore
+ * it seems like setting the pci_dev's 'irq' to the
+ * value of the ACPI SCI interrupt is only done for convenience.
+ *     -jgarzik
+ */
+static void __init quirk_via_acpi(struct pci_dev *d)
+{
+       /*
+        * VIA ACPI device: SCI IRQ line in PCI config byte 0x42
+        */
+       u8 irq;
+       pci_read_config_byte(d, 0x42, &irq);
+       irq &= 0xf;
+       if (irq && (irq != 2))
+               d->irq = irq;
+}
+
+static void __init quirk_via_irqpic(struct pci_dev *dev)
+{
+       u8 irq, new_irq = dev->irq & 0xf;
+
+       pci_read_config_byte(dev, PCI_INTERRUPT_LINE, &irq);
+
+       if (new_irq != irq) {
+               printk(KERN_INFO "PCI: Via IRQ fixup for %s, from %d to %d\n",
+                      dev->slot_name, irq, new_irq);
+
+               udelay(15);
+               pci_write_config_byte(dev, PCI_INTERRUPT_LINE, new_irq);
+       }
+}
+
+
+/*
+ * PIIX3 USB: We have to disable USB interrupts that are
+ * hardwired to PIRQD# and may be shared with an
+ * external device.
+ *
+ * Legacy Support Register (LEGSUP):
+ *     bit13:  USB PIRQ Enable (USBPIRQDEN),
+ *     bit4:   Trap/SMI On IRQ Enable (USBSMIEN).
+ *
+ * We mask out all r/wc bits, too.
+ */
+static void __init quirk_piix3_usb(struct pci_dev *dev)
+{
+       u16 legsup;
+
+       pci_read_config_word(dev, 0xc0, &legsup);
+       legsup &= 0x50ef;
+       pci_write_config_word(dev, 0xc0, legsup);
+}
+
+/*
+ * VIA VT82C598 has its device ID settable and many BIOSes
+ * set it to the ID of VT82C597 for backward compatibility.
+ * We need to switch it off to be able to recognize the real
+ * type of the chip.
+ */
+static void __init quirk_vt82c598_id(struct pci_dev *dev)
+{
+       pci_write_config_byte(dev, 0xfc, 0);
+       pci_read_config_word(dev, PCI_DEVICE_ID, &dev->device);
+}
+
+/*
+ * CardBus controllers have a legacy base address that enables them
+ * to respond as i82365 pcmcia controllers.  We don't want them to
+ * do this even if the Linux CardBus driver is not loaded, because
+ * the Linux i82365 driver does not (and should not) handle CardBus.
+ */
+static void __init quirk_cardbus_legacy(struct pci_dev *dev)
+{
+       if ((PCI_CLASS_BRIDGE_CARDBUS << 8) ^ dev->class)
+               return;
+       pci_write_config_dword(dev, PCI_CB_LEGACY_MODE_BASE, 0);
+}
+
+/*
+ * The AMD io apic can hang the box when an apic irq is masked.
+ * We check all revs >= B0 (yet not in the pre production!) as the bug
+ * is currently marked NoFix
+ *
+ * We have multiple reports of hangs with this chipset that went away with
+ * noapic specified. For the moment we assume its the errata. We may be wrong
+ * of course. However the advice is demonstrably good even if so..
+ */
+static void __init quirk_amd_ioapic(struct pci_dev *dev)
+{
+       u8 rev;
+
+       pci_read_config_byte(dev, PCI_REVISION_ID, &rev);
+       if(rev >= 0x02)
+       {
+               printk(KERN_WARNING "I/O APIC: AMD Errata #22 may be present. In the event of instability try\n");
+               printk(KERN_WARNING "        : booting with the \"noapic\" option.\n");
+       }
+}
+
+/*
+ * Following the PCI ordering rules is optional on the AMD762. I'm not
+ * sure what the designers were smoking but let's not inhale...
+ *
+ * To be fair to AMD, it follows the spec by default, its BIOS people
+ * who turn it off!
+ */
+static void __init quirk_amd_ordering(struct pci_dev *dev)
+{
+       u32 pcic;
+       
+       pci_read_config_dword(dev, 0x42, &pcic);
+       if((pcic&2)==0)
+       {
+               pcic |= 2;
+               printk(KERN_WARNING "BIOS disabled PCI ordering compliance, so we enabled it again.\n");
+               pci_write_config_dword(dev, 0x42, pcic);                
+       }
+}
+
+/*
+ *  The main table of quirks.
+ */
+
+static struct pci_fixup pci_fixups[] __initdata = {
+       { PCI_FIXUP_FINAL,      PCI_VENDOR_ID_INTEL,    PCI_DEVICE_ID_INTEL_82441,      quirk_passive_release },
+       { PCI_FIXUP_FINAL,      PCI_VENDOR_ID_INTEL,    PCI_DEVICE_ID_INTEL_82441,      quirk_passive_release },
+       /*
+        * Its not totally clear which chipsets are the problematic ones
+        * We know 82C586 and 82C596 variants are affected.
+        */
+       { PCI_FIXUP_FINAL,      PCI_VENDOR_ID_VIA,      PCI_DEVICE_ID_VIA_82C586_0,     quirk_isa_dma_hangs },
+       { PCI_FIXUP_FINAL,      PCI_VENDOR_ID_VIA,      PCI_DEVICE_ID_VIA_82C596,       quirk_isa_dma_hangs },
+       { PCI_FIXUP_FINAL,      PCI_VENDOR_ID_INTEL,    PCI_DEVICE_ID_INTEL_82371SB_0,  quirk_isa_dma_hangs },
+       { PCI_FIXUP_HEADER,     PCI_VENDOR_ID_S3,       PCI_DEVICE_ID_S3_868,           quirk_s3_64M },
+       { PCI_FIXUP_HEADER,     PCI_VENDOR_ID_S3,       PCI_DEVICE_ID_S3_968,           quirk_s3_64M },
+       { PCI_FIXUP_FINAL,      PCI_VENDOR_ID_INTEL,    PCI_DEVICE_ID_INTEL_82437,      quirk_triton }, 
+       { PCI_FIXUP_FINAL,      PCI_VENDOR_ID_INTEL,    PCI_DEVICE_ID_INTEL_82437VX,    quirk_triton }, 
+       { PCI_FIXUP_FINAL,      PCI_VENDOR_ID_INTEL,    PCI_DEVICE_ID_INTEL_82439,      quirk_triton }, 
+       { PCI_FIXUP_FINAL,      PCI_VENDOR_ID_INTEL,    PCI_DEVICE_ID_INTEL_82439TX,    quirk_triton }, 
+       { PCI_FIXUP_FINAL,      PCI_VENDOR_ID_INTEL,    PCI_DEVICE_ID_INTEL_82441,      quirk_natoma }, 
+       { PCI_FIXUP_FINAL,      PCI_VENDOR_ID_INTEL,    PCI_DEVICE_ID_INTEL_82443LX_0,  quirk_natoma }, 
+       { PCI_FIXUP_FINAL,      PCI_VENDOR_ID_INTEL,    PCI_DEVICE_ID_INTEL_82443LX_1,  quirk_natoma }, 
+       { PCI_FIXUP_FINAL,      PCI_VENDOR_ID_INTEL,    PCI_DEVICE_ID_INTEL_82443BX_0,  quirk_natoma }, 
+       { PCI_FIXUP_FINAL,      PCI_VENDOR_ID_INTEL,    PCI_DEVICE_ID_INTEL_82443BX_1,  quirk_natoma }, 
+       { PCI_FIXUP_FINAL,      PCI_VENDOR_ID_INTEL,    PCI_DEVICE_ID_INTEL_82443BX_2,  quirk_natoma },
+       { PCI_FIXUP_FINAL,      PCI_VENDOR_ID_SI,       PCI_DEVICE_ID_SI_5597,          quirk_nopcipci },
+       { PCI_FIXUP_FINAL,      PCI_VENDOR_ID_SI,       PCI_DEVICE_ID_SI_496,           quirk_nopcipci },
+       { PCI_FIXUP_FINAL,      PCI_VENDOR_ID_VIA,      PCI_DEVICE_ID_VIA_8363_0,       quirk_vialatency },
+       { PCI_FIXUP_FINAL,      PCI_VENDOR_ID_VIA,      PCI_DEVICE_ID_VIA_8371_1,       quirk_vialatency },
+       { PCI_FIXUP_FINAL,      PCI_VENDOR_ID_VIA,      0x3112  /* Not out yet ? */,    quirk_vialatency },
+       { PCI_FIXUP_FINAL,      PCI_VENDOR_ID_VIA,      PCI_DEVICE_ID_VIA_82C576,       quirk_vsfx },
+       { PCI_FIXUP_FINAL,      PCI_VENDOR_ID_VIA,      PCI_DEVICE_ID_VIA_82C597_0,     quirk_viaetbf },
+       { PCI_FIXUP_HEADER,     PCI_VENDOR_ID_VIA,      PCI_DEVICE_ID_VIA_82C597_0,     quirk_vt82c598_id },
+       { PCI_FIXUP_HEADER,     PCI_VENDOR_ID_VIA,      PCI_DEVICE_ID_VIA_82C586_3,     quirk_vt82c586_acpi },
+       { PCI_FIXUP_HEADER,     PCI_VENDOR_ID_VIA,      PCI_DEVICE_ID_VIA_82C686_4,     quirk_vt82c686_acpi },
+       { PCI_FIXUP_HEADER,     PCI_VENDOR_ID_INTEL,    PCI_DEVICE_ID_INTEL_82371AB_3,  quirk_piix4_acpi },
+       { PCI_FIXUP_HEADER,     PCI_VENDOR_ID_AL,       PCI_DEVICE_ID_AL_M7101,         quirk_ali7101_acpi },
+       { PCI_FIXUP_HEADER,     PCI_VENDOR_ID_INTEL,    PCI_DEVICE_ID_INTEL_82371SB_2,  quirk_piix3_usb },
+       { PCI_FIXUP_HEADER,     PCI_VENDOR_ID_INTEL,    PCI_DEVICE_ID_INTEL_82371AB_2,  quirk_piix3_usb },
+       { PCI_FIXUP_FINAL,      PCI_ANY_ID,             PCI_ANY_ID,                     quirk_cardbus_legacy },
+
+#ifdef CONFIG_X86_IO_APIC 
+       { PCI_FIXUP_FINAL,      PCI_VENDOR_ID_VIA,      PCI_DEVICE_ID_VIA_82C686,       quirk_via_ioapic },
+#endif
+       { PCI_FIXUP_HEADER,     PCI_VENDOR_ID_VIA,      PCI_DEVICE_ID_VIA_82C586_3,     quirk_via_acpi },
+       { PCI_FIXUP_HEADER,     PCI_VENDOR_ID_VIA,      PCI_DEVICE_ID_VIA_82C686_4,     quirk_via_acpi },
+       { PCI_FIXUP_FINAL,      PCI_VENDOR_ID_VIA,      PCI_DEVICE_ID_VIA_82C586_2,     quirk_via_irqpic },
+       { PCI_FIXUP_FINAL,      PCI_VENDOR_ID_VIA,      PCI_DEVICE_ID_VIA_82C686_5,     quirk_via_irqpic },
+       { PCI_FIXUP_FINAL,      PCI_VENDOR_ID_VIA,      PCI_DEVICE_ID_VIA_82C686_6,     quirk_via_irqpic },
+
+       { PCI_FIXUP_FINAL,      PCI_VENDOR_ID_AMD,      PCI_DEVICE_ID_AMD_VIPER_7410,   quirk_amd_ioapic },
+       { PCI_FIXUP_FINAL,      PCI_VENDOR_ID_AMD,      PCI_DEVICE_ID_AMD_FE_GATE_700C, quirk_amd_ordering },
+
+       { 0 }
+};
+
+
+static void pci_do_fixups(struct pci_dev *dev, int pass, struct pci_fixup *f)
+{
+       while (f->pass) {
+               if (f->pass == pass &&
+                   (f->vendor == dev->vendor || f->vendor == (u16) PCI_ANY_ID) &&
+                   (f->device == dev->device || f->device == (u16) PCI_ANY_ID)) {
+#ifdef DEBUG
+                       printk(KERN_INFO "PCI: Calling quirk %p for %s\n", f->hook, dev->slot_name);
+#endif
+                       f->hook(dev);
+               }
+               f++;
+       }
+}
+
+void pci_fixup_device(int pass, struct pci_dev *dev)
+{
+       pci_do_fixups(dev, pass, pcibios_fixups);
+       pci_do_fixups(dev, pass, pci_fixups);
+}
diff --git a/xen-2.4.16/drivers/pci/setup-bus.c b/xen-2.4.16/drivers/pci/setup-bus.c
new file mode 100644 (file)
index 0000000..6a26a4a
--- /dev/null
@@ -0,0 +1,249 @@
+/*
+ *     drivers/pci/setup-bus.c
+ *
+ * Extruded from code written by
+ *      Dave Rusling (david.rusling@reo.mts.dec.com)
+ *      David Mosberger (davidm@cs.arizona.edu)
+ *     David Miller (davem@redhat.com)
+ *
+ * Support routines for initializing a PCI subsystem.
+ */
+
+/*
+ * Nov 2000, Ivan Kokshaysky <ink@jurassic.park.msu.ru>
+ *          PCI-PCI bridges cleanup, sorted resource allocation
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/errno.h>
+#include <linux/ioport.h>
+#include <linux/cache.h>
+#include <linux/slab.h>
+
+
+#define DEBUG_CONFIG 0
+#if DEBUG_CONFIG
+# define DBGC(args)     printk args
+#else
+# define DBGC(args)
+#endif
+
+#define ROUND_UP(x, a)         (((x) + (a) - 1) & ~((a) - 1))
+
+static int __init
+pbus_assign_resources_sorted(struct pci_bus *bus,
+                            struct pbus_set_ranges_data *ranges)
+{
+       struct list_head *ln;
+       struct resource *res;
+       struct resource_list head_io, head_mem, *list, *tmp;
+       unsigned long io_reserved = 0, mem_reserved = 0;
+       int idx, found_vga = 0;
+
+       head_io.next = head_mem.next = NULL;
+       for (ln=bus->devices.next; ln != &bus->devices; ln=ln->next) {
+               struct pci_dev *dev = pci_dev_b(ln);
+               u16 class = dev->class >> 8;
+               u16 cmd;
+
+               /* First, disable the device to avoid side
+                  effects of possibly overlapping I/O and
+                  memory ranges.
+                  Leave VGA enabled - for obvious reason. :-)
+                  Same with all sorts of bridges - they may
+                  have VGA behind them.  */
+               if (class == PCI_CLASS_DISPLAY_VGA
+                               || class == PCI_CLASS_NOT_DEFINED_VGA)
+                       found_vga = 1;
+               else if (class >> 8 != PCI_BASE_CLASS_BRIDGE) {
+                       pci_read_config_word(dev, PCI_COMMAND, &cmd);
+                       cmd &= ~(PCI_COMMAND_IO | PCI_COMMAND_MEMORY
+                                               | PCI_COMMAND_MASTER);
+                       pci_write_config_word(dev, PCI_COMMAND, cmd);
+               }
+
+               /* Reserve some resources for CardBus.
+                  Are these values reasonable? */
+               if (class == PCI_CLASS_BRIDGE_CARDBUS) {
+                       io_reserved += 8*1024;
+                       mem_reserved += 32*1024*1024;
+                       continue;
+               }
+
+               pdev_sort_resources(dev, &head_io, IORESOURCE_IO);
+               pdev_sort_resources(dev, &head_mem, IORESOURCE_MEM);
+       }
+
+       for (list = head_io.next; list;) {
+               res = list->res;
+               idx = res - &list->dev->resource[0];
+               if (pci_assign_resource(list->dev, idx) == 0
+                   && ranges->io_end < res->end)
+                       ranges->io_end = res->end;
+               tmp = list;
+               list = list->next;
+               kfree(tmp);
+       }
+       for (list = head_mem.next; list;) {
+               res = list->res;
+               idx = res - &list->dev->resource[0];
+               if (pci_assign_resource(list->dev, idx) == 0
+                   && ranges->mem_end < res->end)
+                       ranges->mem_end = res->end;
+               tmp = list;
+               list = list->next;
+               kfree(tmp);
+       }
+
+       ranges->io_end += io_reserved;
+       ranges->mem_end += mem_reserved;
+
+       /* PCI-to-PCI Bridge Architecture Specification rev. 1.1 (1998)
+          requires that if there is no I/O ports or memory behind the
+          bridge, corresponding range must be turned off by writing base
+          value greater than limit to the bridge's base/limit registers.  */
+#if 1
+       /* But assuming that some hardware designed before 1998 might
+          not support this (very unlikely - at least all DEC bridges
+          are ok and I believe that was standard de-facto. -ink), we
+          must allow for at least one unit.  */
+       if (ranges->io_end == ranges->io_start)
+               ranges->io_end += 1;
+       if (ranges->mem_end == ranges->mem_start)
+               ranges->mem_end += 1;
+#endif
+       ranges->io_end = ROUND_UP(ranges->io_end, 4*1024);
+       ranges->mem_end = ROUND_UP(ranges->mem_end, 1024*1024);
+
+       return found_vga;
+}
+
+/* Initialize bridges with base/limit values we have collected */
+static void __init
+pci_setup_bridge(struct pci_bus *bus)
+{
+       struct pbus_set_ranges_data ranges;
+       struct pci_dev *bridge = bus->self;
+       u32 l;
+
+       if (!bridge || (bridge->class >> 8) != PCI_CLASS_BRIDGE_PCI)
+               return;
+       ranges.io_start = bus->resource[0]->start;
+       ranges.io_end = bus->resource[0]->end;
+       ranges.mem_start = bus->resource[1]->start;
+       ranges.mem_end = bus->resource[1]->end;
+       pcibios_fixup_pbus_ranges(bus, &ranges);
+
+       DBGC((KERN_ERR "PCI: Bus %d, bridge: %s\n", bus->number, bridge->name));
+       DBGC((KERN_ERR "  IO window: %04lx-%04lx\n", ranges.io_start, ranges.io_end));
+       DBGC((KERN_ERR "  MEM window: %08lx-%08lx\n", ranges.mem_start, ranges.mem_end));
+
+       /* Set up the top and bottom of the PCI I/O segment for this bus. */
+       pci_read_config_dword(bridge, PCI_IO_BASE, &l);
+       l &= 0xffff0000;
+       l |= (ranges.io_start >> 8) & 0x00f0;
+       l |= ranges.io_end & 0xf000;
+       pci_write_config_dword(bridge, PCI_IO_BASE, l);
+
+       /* Clear upper 16 bits of I/O base/limit. */
+       pci_write_config_dword(bridge, PCI_IO_BASE_UPPER16, 0);
+
+       /* Clear out the upper 32 bits of PREF base/limit. */
+       pci_write_config_dword(bridge, PCI_PREF_BASE_UPPER32, 0);
+       pci_write_config_dword(bridge, PCI_PREF_LIMIT_UPPER32, 0);
+
+       /* Set up the top and bottom of the PCI Memory segment
+          for this bus. */
+       l = (ranges.mem_start >> 16) & 0xfff0;
+       l |= ranges.mem_end & 0xfff00000;
+       pci_write_config_dword(bridge, PCI_MEMORY_BASE, l);
+
+       /* Set up PREF base/limit. */
+       l = (bus->resource[2]->start >> 16) & 0xfff0;
+       l |= bus->resource[2]->end & 0xfff00000;
+       pci_write_config_dword(bridge, PCI_PREF_MEMORY_BASE, l);
+
+       /* Check if we have VGA behind the bridge.
+          Enable ISA in either case. */
+       l = (bus->resource[0]->flags & IORESOURCE_BUS_HAS_VGA) ? 0x0c : 0x04;
+       pci_write_config_word(bridge, PCI_BRIDGE_CONTROL, l);
+}
+
+static void __init
+pbus_assign_resources(struct pci_bus *bus, struct pbus_set_ranges_data *ranges)
+{
+       struct list_head *ln;
+       int found_vga = pbus_assign_resources_sorted(bus, ranges);
+
+       if (!ranges->found_vga && found_vga) {
+               struct pci_bus *b;
+
+               ranges->found_vga = 1;
+               /* Propogate presence of the VGA to upstream bridges */
+               for (b = bus; b->parent; b = b->parent) {
+#if 0
+                       /* ? Do we actually need to enable PF memory? */
+                       b->resource[2]->start = 0;
+#endif
+                       b->resource[0]->flags |= IORESOURCE_BUS_HAS_VGA;
+               }
+       }
+       for (ln=bus->children.next; ln != &bus->children; ln=ln->next) {
+               struct pci_bus *b = pci_bus_b(ln);
+
+               b->resource[0]->start = ranges->io_start = ranges->io_end;
+               b->resource[1]->start = ranges->mem_start = ranges->mem_end;
+
+               pbus_assign_resources(b, ranges);
+
+               b->resource[0]->end = ranges->io_end - 1;
+               b->resource[1]->end = ranges->mem_end - 1;
+
+               pci_setup_bridge(b);
+       }
+}
+
+void __init
+pci_assign_unassigned_resources(void)
+{
+       struct pbus_set_ranges_data ranges;
+       struct list_head *ln;
+       struct pci_dev *dev;
+
+       for(ln=pci_root_buses.next; ln != &pci_root_buses; ln=ln->next) {
+               struct pci_bus *b = pci_bus_b(ln);
+
+               ranges.io_start = b->resource[0]->start + PCIBIOS_MIN_IO;
+               ranges.mem_start = b->resource[1]->start + PCIBIOS_MIN_MEM;
+               ranges.io_end = ranges.io_start;
+               ranges.mem_end = ranges.mem_start;
+               ranges.found_vga = 0;
+               pbus_assign_resources(b, &ranges);
+       }
+       pci_for_each_dev(dev) {
+               pdev_enable_device(dev);
+       }
+}
+
+/* Check whether the bridge supports I/O forwarding.
+   If not, its I/O base/limit register must be
+   read-only and read as 0. */
+unsigned long __init
+pci_bridge_check_io(struct pci_dev *bridge)
+{
+       u16 io;
+
+       pci_read_config_word(bridge, PCI_IO_BASE, &io);
+       if (!io) {
+               pci_write_config_word(bridge, PCI_IO_BASE, 0xf0f0);
+               pci_read_config_word(bridge, PCI_IO_BASE, &io);
+               pci_write_config_word(bridge, PCI_IO_BASE, 0x0);
+       }
+       if (io)
+               return IORESOURCE_IO;
+       printk(KERN_WARNING "PCI: bridge %s does not support I/O forwarding!\n",
+                               bridge->name);
+       return 0;
+}
diff --git a/xen-2.4.16/drivers/pci/setup-irq.c b/xen-2.4.16/drivers/pci/setup-irq.c
new file mode 100644 (file)
index 0000000..4c65b2e
--- /dev/null
@@ -0,0 +1,71 @@
+/*
+ *     drivers/pci/setup-irq.c
+ *
+ * Extruded from code written by
+ *      Dave Rusling (david.rusling@reo.mts.dec.com)
+ *      David Mosberger (davidm@cs.arizona.edu)
+ *     David Miller (davem@redhat.com)
+ *
+ * Support routines for initializing a PCI subsystem.
+ */
+
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/errno.h>
+#include <linux/ioport.h>
+#include <linux/cache.h>
+
+
+#define DEBUG_CONFIG 0
+#if DEBUG_CONFIG
+# define DBGC(args)     printk args
+#else
+# define DBGC(args)
+#endif
+
+
+static void __init
+pdev_fixup_irq(struct pci_dev *dev,
+              u8 (*swizzle)(struct pci_dev *, u8 *),
+              int (*map_irq)(struct pci_dev *, u8, u8))
+{
+       u8 pin, slot;
+       int irq;
+
+       /* If this device is not on the primary bus, we need to figure out
+          which interrupt pin it will come in on.   We know which slot it
+          will come in on 'cos that slot is where the bridge is.   Each
+          time the interrupt line passes through a PCI-PCI bridge we must
+          apply the swizzle function.  */
+
+       pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin);
+       /* Cope with 0 and illegal. */
+       if (pin == 0 || pin > 4)
+               pin = 1;
+
+       /* Follow the chain of bridges, swizzling as we go.  */
+       slot = (*swizzle)(dev, &pin);
+
+       irq = (*map_irq)(dev, slot, pin);
+       if (irq == -1)
+               irq = 0;
+       dev->irq = irq;
+
+       DBGC((KERN_ERR "PCI fixup irq: (%s) got %d\n", dev->name, dev->irq));
+
+       /* Always tell the device, so the driver knows what is
+          the real IRQ to use; the device does not use it. */
+       pcibios_update_irq(dev, irq);
+}
+
+void __init
+pci_fixup_irqs(u8 (*swizzle)(struct pci_dev *, u8 *),
+              int (*map_irq)(struct pci_dev *, u8, u8))
+{
+       struct pci_dev *dev;
+       pci_for_each_dev(dev) {
+               pdev_fixup_irq(dev, swizzle, map_irq);
+       }
+}
diff --git a/xen-2.4.16/drivers/pci/setup-res.c b/xen-2.4.16/drivers/pci/setup-res.c
new file mode 100644 (file)
index 0000000..57932fb
--- /dev/null
@@ -0,0 +1,234 @@
+/*
+ *     drivers/pci/setup-res.c
+ *
+ * Extruded from code written by
+ *      Dave Rusling (david.rusling@reo.mts.dec.com)
+ *      David Mosberger (davidm@cs.arizona.edu)
+ *     David Miller (davem@redhat.com)
+ *
+ * Support routines for initializing a PCI subsystem.
+ */
+
+/* fixed for multiple pci buses, 1999 Andrea Arcangeli <andrea@suse.de> */
+
+/*
+ * Nov 2000, Ivan Kokshaysky <ink@jurassic.park.msu.ru>
+ *          Resource sorting
+ */
+
+#include <linux/init.h>
+//#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/errno.h>
+#include <linux/ioport.h>
+#include <linux/cache.h>
+#include <linux/slab.h>
+
+
+#define DEBUG_CONFIG 0
+#if DEBUG_CONFIG
+# define DBGC(args)     printk args
+#else
+# define DBGC(args)
+#endif
+
+
+int __init
+pci_claim_resource(struct pci_dev *dev, int resource)
+{
+        struct resource *res = &dev->resource[resource];
+       struct resource *root = pci_find_parent_resource(dev, res);
+       int err;
+
+       err = -EINVAL;
+       if (root != NULL) {
+               err = request_resource(root, res);
+               if (err) {
+                       printk(KERN_ERR "PCI: Address space collision on "
+                              "region %d of device %s [%lx:%lx]\n",
+                              resource, dev->name, res->start, res->end);
+               }
+       } else {
+               printk(KERN_ERR "PCI: No parent found for region %d "
+                      "of device %s\n", resource, dev->name);
+       }
+
+       return err;
+}
+
+/*
+ * Given the PCI bus a device resides on, try to
+ * find an acceptable resource allocation for a
+ * specific device resource..
+ */
+static int pci_assign_bus_resource(const struct pci_bus *bus,
+       struct pci_dev *dev,
+       struct resource *res,
+       unsigned long size,
+       unsigned long min,
+       unsigned int type_mask,
+       int resno)
+{
+       int i;
+
+       type_mask |= IORESOURCE_IO | IORESOURCE_MEM;
+       for (i = 0 ; i < 4; i++) {
+               struct resource *r = bus->resource[i];
+               if (!r)
+                       continue;
+
+               /* type_mask must match */
+               if ((res->flags ^ r->flags) & type_mask)
+                       continue;
+
+               /* We cannot allocate a non-prefetching resource from a pre-fetching area */
+               if ((r->flags & IORESOURCE_PREFETCH) && !(res->flags & IORESOURCE_PREFETCH))
+                       continue;
+
+               /* Ok, try it out.. */
+               if (allocate_resource(r, res, size, min, -1, size, pcibios_align_resource, dev) < 0)
+                       continue;
+
+               /* Update PCI config space.  */
+               pcibios_update_resource(dev, r, res, resno);
+               return 0;
+       }
+       return -EBUSY;
+}
+
+int 
+pci_assign_resource(struct pci_dev *dev, int i)
+{
+       const struct pci_bus *bus = dev->bus;
+       struct resource *res = dev->resource + i;
+       unsigned long size, min;
+
+       size = res->end - res->start + 1;
+       min = (res->flags & IORESOURCE_IO) ? PCIBIOS_MIN_IO : PCIBIOS_MIN_MEM;
+
+       /* First, try exact prefetching match.. */
+       if (pci_assign_bus_resource(bus, dev, res, size, min, IORESOURCE_PREFETCH, i) < 0) {
+               /*
+                * That failed.
+                *
+                * But a prefetching area can handle a non-prefetching
+                * window (it will just not perform as well).
+                */
+               if (!(res->flags & IORESOURCE_PREFETCH) || pci_assign_bus_resource(bus, dev, res, size, min, 0, i) < 0) {
+                       printk(KERN_ERR "PCI: Failed to allocate resource %d(%lx-%lx) for %s\n",
+                              i, res->start, res->end, dev->slot_name);
+                       return -EBUSY;
+               }
+       }
+
+       DBGC((KERN_ERR "  got res[%lx:%lx] for resource %d of %s\n", res->start,
+                                               res->end, i, dev->name));
+
+       return 0;
+}
+
+/* Sort resources of a given type by alignment */
+void __init
+pdev_sort_resources(struct pci_dev *dev,
+                   struct resource_list *head, u32 type_mask)
+{
+       int i;
+
+       for (i = 0; i < PCI_NUM_RESOURCES; i++) {
+               struct resource *r;
+               struct resource_list *list, *tmp;
+               unsigned long r_size;
+
+               /* PCI-PCI bridges may have I/O ports or
+                  memory on the primary bus */
+               if (dev->class >> 8 == PCI_CLASS_BRIDGE_PCI &&
+                                               i >= PCI_BRIDGE_RESOURCES)
+                       continue;
+
+               r = &dev->resource[i];
+               r_size = r->end - r->start;
+               
+               if (!(r->flags & type_mask) || r->parent)
+                       continue;
+               if (!r_size) {
+                       printk(KERN_WARNING "PCI: Ignore bogus resource %d "
+                                        "[%lx:%lx] of %s\n",
+                                         i, r->start, r->end, dev->name);
+                       continue;
+               }
+               for (list = head; ; list = list->next) {
+                       unsigned long size = 0;
+                       struct resource_list *ln = list->next;
+
+                       if (ln)
+                               size = ln->res->end - ln->res->start;
+                       if (r_size > size) {
+                               tmp = kmalloc(sizeof(*tmp), GFP_KERNEL);
+                               if (!tmp) {
+                                       printk(KERN_ERR "pdev_sort_resources(): kmalloc() failed!\n");
+                                       continue;
+                               }
+                               tmp->next = ln;
+                               tmp->res = r;
+                               tmp->dev = dev;
+                               list->next = tmp;
+                               break;
+                       }
+               }
+       }
+}
+
+void __init
+pdev_enable_device(struct pci_dev *dev)
+{
+       u32 reg;
+       u16 cmd;
+       int i;
+
+       DBGC((KERN_ERR "PCI enable device: (%s)\n", dev->name));
+
+       pci_read_config_word(dev, PCI_COMMAND, &cmd);
+
+       for (i = 0; i < PCI_NUM_RESOURCES; i++) {
+               struct resource *res = &dev->resource[i];
+
+               if (res->flags & IORESOURCE_IO)
+                       cmd |= PCI_COMMAND_IO;
+               else if (res->flags & IORESOURCE_MEM)
+                       cmd |= PCI_COMMAND_MEMORY;
+       }
+
+       /* Special case, disable the ROM.  Several devices act funny
+          (ie. do not respond to memory space writes) when it is left
+          enabled.  A good example are QlogicISP adapters.  */
+
+       if (dev->rom_base_reg) {
+               pci_read_config_dword(dev, dev->rom_base_reg, &reg);
+               reg &= ~PCI_ROM_ADDRESS_ENABLE;
+               pci_write_config_dword(dev, dev->rom_base_reg, reg);
+               dev->resource[PCI_ROM_RESOURCE].flags &= ~PCI_ROM_ADDRESS_ENABLE;
+       }
+
+       /* All of these (may) have I/O scattered all around and may not
+          use I/O base address registers at all.  So we just have to
+          always enable IO to these devices.  */
+       if ((dev->class >> 8) == PCI_CLASS_NOT_DEFINED
+           || (dev->class >> 8) == PCI_CLASS_NOT_DEFINED_VGA
+           || (dev->class >> 8) == PCI_CLASS_STORAGE_IDE
+           || (dev->class >> 16) == PCI_BASE_CLASS_DISPLAY) {
+               cmd |= PCI_COMMAND_IO;
+       }
+
+       /* ??? Always turn on bus mastering.  If the device doesn't support
+          it, the bit will go into the bucket. */
+       cmd |= PCI_COMMAND_MASTER;
+
+       /* Set the cache line and default latency (32).  */
+       pci_write_config_word(dev, PCI_CACHE_LINE_SIZE,
+                       (32 << 8) | (L1_CACHE_BYTES / sizeof(u32)));
+
+       /* Enable the appropriate bits in the PCI command register.  */
+       pci_write_config_word(dev, PCI_COMMAND, cmd);
+
+       DBGC((KERN_ERR "  cmd reg 0x%x\n", cmd));
+}
diff --git a/xen-2.4.16/drivers/pci/syscall.c b/xen-2.4.16/drivers/pci/syscall.c
new file mode 100644 (file)
index 0000000..c935efd
--- /dev/null
@@ -0,0 +1,144 @@
+/*
+ *     pci_syscall.c
+ *
+ * For architectures where we want to allow direct access
+ * to the PCI config stuff - it would probably be preferable
+ * on PCs too, but there people just do it by hand with the
+ * magic northbridge registers..
+ */
+
+#include <linux/sched.h>
+#include <linux/errno.h>
+#include <linux/pci.h>
+#include <linux/smp_lock.h>
+#include <asm/uaccess.h>
+
+
+asmlinkage long
+sys_pciconfig_read(unsigned long bus, unsigned long dfn,
+                  unsigned long off, unsigned long len, void *buf)
+{
+       struct pci_dev *dev;
+       u8 byte;
+       u16 word;
+       u32 dword;
+       long err, cfg_ret;
+
+       err = -EPERM;
+       if (!capable(CAP_SYS_ADMIN))
+               goto error;
+
+       err = -ENODEV;
+       dev = pci_find_slot(bus, dfn);
+       if (!dev)
+               goto error;
+
+       lock_kernel();
+       switch (len) {
+       case 1:
+               cfg_ret = pci_read_config_byte(dev, off, &byte);
+               break;
+       case 2:
+               cfg_ret = pci_read_config_word(dev, off, &word);
+               break;
+       case 4:
+               cfg_ret = pci_read_config_dword(dev, off, &dword);
+               break;
+       default:
+               err = -EINVAL;
+               unlock_kernel();
+               goto error;
+       };
+       unlock_kernel();
+
+       err = -EIO;
+       if (cfg_ret != PCIBIOS_SUCCESSFUL)
+               goto error;
+
+       switch (len) {
+       case 1:
+               err = put_user(byte, (unsigned char *)buf);
+               break;
+       case 2:
+               err = put_user(word, (unsigned short *)buf);
+               break;
+       case 4:
+               err = put_user(dword, (unsigned int *)buf);
+               break;
+       };
+       return err;
+
+error:
+       /* ??? XFree86 doesn't even check the return value.  They
+          just look for 0xffffffff in the output, since that's what
+          they get instead of a machine check on x86.  */
+       switch (len) {
+       case 1:
+               put_user(-1, (unsigned char *)buf);
+               break;
+       case 2:
+               put_user(-1, (unsigned short *)buf);
+               break;
+       case 4:
+               put_user(-1, (unsigned int *)buf);
+               break;
+       };
+       return err;
+}
+
+asmlinkage long
+sys_pciconfig_write(unsigned long bus, unsigned long dfn,
+                   unsigned long off, unsigned long len, void *buf)
+{
+       struct pci_dev *dev;
+       u8 byte;
+       u16 word;
+       u32 dword;
+       int err = 0;
+
+       if (!capable(CAP_SYS_ADMIN))
+               return -EPERM;
+       if (!pcibios_present())
+               return -ENOSYS;
+
+       dev = pci_find_slot(bus, dfn);
+       if (!dev)
+               return -ENODEV;
+
+       lock_kernel();
+       switch(len) {
+       case 1:
+               err = get_user(byte, (u8 *)buf);
+               if (err)
+                       break;
+               err = pci_write_config_byte(dev, off, byte);
+               if (err != PCIBIOS_SUCCESSFUL)
+                       err = -EIO;
+               break;
+
+       case 2:
+               err = get_user(word, (u16 *)buf);
+               if (err)
+                       break;
+               err = pci_write_config_word(dev, off, word);
+               if (err != PCIBIOS_SUCCESSFUL)
+                       err = -EIO;
+               break;
+
+       case 4:
+               err = get_user(dword, (u32 *)buf);
+               if (err)
+                       break;
+               err = pci_write_config_dword(dev, off, dword);
+               if (err != PCIBIOS_SUCCESSFUL)
+                       err = -EIO;
+               break;
+
+       default:
+               err = -EINVAL;
+               break;
+       };
+       unlock_kernel();
+
+       return err;
+}
diff --git a/xen-2.4.16/drivers/scsi/Makefile b/xen-2.4.16/drivers/scsi/Makefile
new file mode 100644 (file)
index 0000000..574b7d2
--- /dev/null
@@ -0,0 +1,8 @@
+
+include $(BASEDIR)/Rules.mk
+
+default: $(OBJS)
+       $(LD) -r -o driver.o $(OBJS)
+
+clean:
+       rm -f *.o *~ core
diff --git a/xen-2.4.16/drivers/scsi/constants.h b/xen-2.4.16/drivers/scsi/constants.h
new file mode 100644 (file)
index 0000000..e10527e
--- /dev/null
@@ -0,0 +1,6 @@
+#ifndef _CONSTANTS_H
+#define _CONSTANTS_H
+extern int print_msg(unsigned char *);
+extern void print_status(int);
+extern void print_Scsi_Cmnd (Scsi_Cmnd *);
+#endif /* def _CONSTANTS_H */
diff --git a/xen-2.4.16/drivers/scsi/hosts.h b/xen-2.4.16/drivers/scsi/hosts.h
new file mode 100644 (file)
index 0000000..d222fc6
--- /dev/null
@@ -0,0 +1,585 @@
+/*
+ *  hosts.h Copyright (C) 1992 Drew Eckhardt
+ *          Copyright (C) 1993, 1994, 1995, 1998, 1999 Eric Youngdale
+ *
+ *  mid to low-level SCSI driver interface header
+ *      Initial versions: Drew Eckhardt
+ *      Subsequent revisions: Eric Youngdale
+ *
+ *  <drew@colorado.edu>
+ *
+ *      Modified by Eric Youngdale eric@andante.org to
+ *      add scatter-gather, multiple outstanding request, and other
+ *      enhancements.
+ *
+ *  Further modified by Eric Youngdale to support multiple host adapters
+ *  of the same type.
+ *
+ *  Jiffies wrap fixes (host->resetting), 3 Dec 1998 Andrea Arcangeli
+ */
+
+#ifndef _HOSTS_H
+#define _HOSTS_H
+
+/*
+    $Header: /vger/u4/cvs/linux/drivers/scsi/hosts.h,v 1.6 1997/01/19 23:07:13 davem Exp $
+*/
+
+#include <xeno/config.h>
+/*#include <xeno/proc_fs.h>*/
+#include <xeno/pci.h>
+
+/* It is senseless to set SG_ALL any higher than this - the performance
+ *  does not get any better, and it wastes memory
+ */
+#define SG_NONE 0
+#define SG_ALL 0xff
+
+#define DISABLE_CLUSTERING 0
+#define ENABLE_CLUSTERING 1
+
+/* The various choices mean:
+ * NONE: Self evident. Host adapter is not capable of scatter-gather.
+ * ALL:         Means that the host adapter module can do scatter-gather,
+ *      and that there is no limit to the size of the table to which
+ *      we scatter/gather data.
+ * Anything else:  Indicates the maximum number of chains that can be
+ *      used in one scatter-gather request.
+ */
+
+/*
+ * The Scsi_Host_Template type has all that is needed to interface with a SCSI
+ * host in a device independent matter.         There is one entry for each different
+ * type of host adapter that is supported on the system.
+ */
+
+typedef struct scsi_disk Disk;
+
+typedef struct SHT
+{
+
+    /* Used with loadable modules so we can construct a linked list. */
+    struct SHT * next;
+
+    /* Used with loadable modules so that we know when it is safe to unload */
+    struct module * module;
+
+    /* The pointer to the /proc/scsi directory entry */
+    struct proc_dir_entry *proc_dir;
+
+    /* proc-fs info function.
+     * Can be used to export driver statistics and other infos to the world
+     * outside the kernel ie. userspace and it also provides an interface
+     * to feed the driver with information. Check eata_dma_proc.c for reference
+     */
+    int (*proc_info)(char *, char **, off_t, int, int, int);
+
+    /*
+     * The name pointer is a pointer to the name of the SCSI
+     * device detected.
+     */
+    const char *name;
+
+    /*
+     * The detect function shall return non zero on detection,
+     * indicating the number of host adapters of this particular
+     * type were found.         It should also
+     * initialize all data necessary for this particular
+     * SCSI driver.  It is passed the host number, so this host
+     * knows where the first entry is in the scsi_hosts[] array.
+     *
+     * Note that the detect routine MUST not call any of the mid level
+     * functions to queue commands because things are not guaranteed
+     * to be set up yet.  The detect routine can send commands to
+     * the host adapter as long as the program control will not be
+     * passed to scsi.c in the processing of the command.  Note
+     * especially that scsi_malloc/scsi_free must not be called.
+     */
+    int (* detect)(struct SHT *);
+
+    int (*revoke)(Scsi_Device *);
+
+    /* Used with loadable modules to unload the host structures.  Note:
+     * there is a default action built into the modules code which may
+     * be sufficient for most host adapters.  Thus you may not have to supply
+     * this at all.
+     */
+    int (*release)(struct Scsi_Host *);
+
+    /*
+     * The info function will return whatever useful
+     * information the developer sees fit.  If not provided, then
+     * the name field will be used instead.
+     */
+    const char *(* info)(struct Scsi_Host *);
+
+    /*
+     * ioctl interface
+     */
+    int (*ioctl)(Scsi_Device *dev, int cmd, void *arg);
+
+    /*
+     * The command function takes a target, a command (this is a SCSI
+     * command formatted as per the SCSI spec, nothing strange), a
+     * data buffer pointer, and data buffer length pointer.  The return
+     * is a status int, bit fielded as follows :
+     * Byte What
+     * 0    SCSI status code
+     * 1    SCSI 1 byte message
+     * 2    host error return.
+     * 3    mid level error return
+     */
+    int (* command)(Scsi_Cmnd *);
+
+    /*
+     * The QueueCommand function works in a similar manner
+     * to the command function.         It takes an additional parameter,
+     * void (* done)(int host, int code) which is passed the host
+     * # and exit result when the command is complete.
+     * Host number is the POSITION IN THE hosts array of THIS
+     * host adapter.
+     *
+     * The done() function must only be called after QueueCommand() 
+     * has returned.
+     */
+    int (* queuecommand)(Scsi_Cmnd *, void (*done)(Scsi_Cmnd *));
+
+    /*
+     * This is an error handling strategy routine.  You don't need to
+     * define one of these if you don't want to - there is a default
+     * routine that is present that should work in most cases.  For those
+     * driver authors that have the inclination and ability to write their
+     * own strategy routine, this is where it is specified.  Note - the
+     * strategy routine is *ALWAYS* run in the context of the kernel eh
+     * thread.  Thus you are guaranteed to *NOT* be in an interrupt handler
+     * when you execute this, and you are also guaranteed to *NOT* have any
+     * other commands being queued while you are in the strategy routine.
+     * When you return from this function, operations return to normal.
+     *
+     * See scsi_error.c scsi_unjam_host for additional comments about what
+     * this function should and should not be attempting to do.
+     */
+     int (*eh_strategy_handler)(struct Scsi_Host *);
+     int (*eh_abort_handler)(Scsi_Cmnd *);
+     int (*eh_device_reset_handler)(Scsi_Cmnd *);
+     int (*eh_bus_reset_handler)(Scsi_Cmnd *);
+     int (*eh_host_reset_handler)(Scsi_Cmnd *);
+
+    /*
+     * Since the mid level driver handles time outs, etc, we want to
+     * be able to abort the current command.  Abort returns 0 if the
+     * abortion was successful.         The field SCpnt->abort reason
+     * can be filled in with the appropriate reason why we wanted
+     * the abort in the first place, and this will be used
+     * in the mid-level code instead of the host_byte().
+     * If non-zero, the code passed to it
+     * will be used as the return code, otherwise
+     * DID_ABORT  should be returned.
+     *
+     * Note that the scsi driver should "clean up" after itself,
+     * resetting the bus, etc. if necessary.
+     *
+     * NOTE - this interface is depreciated, and will go away.  Use
+     * the eh_ routines instead.
+     */
+    int (* abort)(Scsi_Cmnd *);
+
+    /*
+     * The reset function will reset the SCSI bus.  Any executing
+     * commands should fail with a DID_RESET in the host byte.
+     * The Scsi_Cmnd  is passed so that the reset routine can figure
+     * out which host adapter should be reset, and also which command
+     * within the command block was responsible for the reset in
+     * the first place.         Some hosts do not implement a reset function,
+     * and these hosts must call scsi_request_sense(SCpnt) to keep
+     * the command alive.
+     *
+     * NOTE - this interface is depreciated, and will go away.  Use
+     * the eh_ routines instead.
+     */
+    int (* reset)(Scsi_Cmnd *, unsigned int);
+
+    /*
+     * This function is used to select synchronous communications,
+     * which will result in a higher data throughput.  Not implemented
+     * yet.
+     */
+    int (* slave_attach)(int, int);
+
+    /*
+     * This function determines the bios parameters for a given
+     * harddisk.  These tend to be numbers that are made up by
+     * the host adapter.  Parameters:
+     * size, device number, list (heads, sectors, cylinders)
+     */
+    int (* bios_param)(Disk *, kdev_t, int []);
+
+
+    /*
+     * Used to set the queue depth for a specific device.
+     */
+    void (*select_queue_depths)(struct Scsi_Host *, Scsi_Device *);
+
+    /*
+     * This determines if we will use a non-interrupt driven
+     * or an interrupt driven scheme,  It is set to the maximum number
+     * of simultaneous commands a given host adapter will accept.
+     */
+    int can_queue;
+
+    /*
+     * In many instances, especially where disconnect / reconnect are
+     * supported, our host also has an ID on the SCSI bus.  If this is
+     * the case, then it must be reserved.  Please set this_id to -1 if
+     * your setup is in single initiator mode, and the host lacks an
+     * ID.
+     */
+    int this_id;
+
+    /*
+     * This determines the degree to which the host adapter is capable
+     * of scatter-gather.
+     */
+    short unsigned int sg_tablesize;
+
+    /*
+     * if the host adapter has limitations beside segment count
+     */
+    short unsigned int max_sectors;
+
+    /*
+     * True if this host adapter can make good use of linked commands.
+     * This will allow more than one command to be queued to a given
+     * unit on a given host.  Set this to the maximum number of command
+     * blocks to be provided for each device.  Set this to 1 for one
+     * command block per lun, 2 for two, etc.  Do not set this to 0.
+     * You should make sure that the host adapter will do the right thing
+     * before you try setting this above 1.
+     */
+    short cmd_per_lun;
+
+    /*
+     * present contains counter indicating how many boards of this
+     * type were found when we did the scan.
+     */
+    unsigned char present;
+
+    /*
+     * true if this host adapter uses unchecked DMA onto an ISA bus.
+     */
+    unsigned unchecked_isa_dma:1;
+
+    /*
+     * true if this host adapter can make good use of clustering.
+     * I originally thought that if the tablesize was large that it
+     * was a waste of CPU cycles to prepare a cluster list, but
+     * it works out that the Buslogic is faster if you use a smaller
+     * number of segments (i.e. use clustering).  I guess it is
+     * inefficient.
+     */
+    unsigned use_clustering:1;
+
+    /*
+     * True if this driver uses the new error handling code.  This flag is
+     * really only temporary until all of the other drivers get converted
+     * to use the new error handling code.
+     */
+    unsigned use_new_eh_code:1;
+
+    /*
+     * True for emulated SCSI host adapters (e.g. ATAPI)
+     */
+    unsigned emulated:1;
+
+    /*
+     * Name of proc directory
+     */
+    char *proc_name;
+
+} Scsi_Host_Template;
+
+/*
+ * The scsi_hosts array is the array containing the data for all
+ * possible <supported> scsi hosts.   This is similar to the
+ * Scsi_Host_Template, except that we have one entry for each
+ * actual physical host adapter on the system, stored as a linked
+ * list.  Note that if there are 2 aha1542 boards, then there will
+ * be two Scsi_Host entries, but only 1 Scsi_Host_Template entry.
+ */
+
+struct Scsi_Host
+{
+/* private: */
+    /*
+     * This information is private to the scsi mid-layer.  Wrapping it in a
+     * struct private is a way of marking it in a sort of C++ type of way.
+     */
+    struct Scsi_Host      * next;
+    Scsi_Device           * host_queue;
+
+
+    struct task_struct    * ehandler;  /* Error recovery thread. */
+    struct semaphore      * eh_wait;   /* The error recovery thread waits on
+                                          this. */
+    struct semaphore      * eh_notify; /* wait for eh to begin */
+    struct semaphore      * eh_action; /* Wait for specific actions on the
+                                          host. */
+    unsigned int            eh_active:1; /* Indicates the eh thread is awake and active if
+                                          this is true. */
+    wait_queue_head_t       host_wait;
+    Scsi_Host_Template    * hostt;
+    atomic_t                host_active; /* commands checked out */
+    volatile unsigned short host_busy;   /* commands actually active on low-level */
+    volatile unsigned short host_failed; /* commands that failed. */
+    
+/* public: */
+    unsigned short extra_bytes;
+    unsigned short host_no;  /* Used for IOCTL_GET_IDLUN, /proc/scsi et al. */
+    int resetting; /* if set, it means that last_reset is a valid value */
+    unsigned long last_reset;
+
+
+    /*
+     * These three parameters can be used to allow for wide scsi,
+     * and for host adapters that support multiple busses
+     * The first two should be set to 1 more than the actual max id
+     * or lun (i.e. 8 for normal systems).
+     */
+    unsigned int max_id;
+    unsigned int max_lun;
+    unsigned int max_channel;
+
+    /* These parameters should be set by the detect routine */
+    unsigned long base;
+    unsigned long io_port;
+    unsigned char n_io_port;
+    unsigned char dma_channel;
+    unsigned int  irq;
+
+    /*
+     * This is a unique identifier that must be assigned so that we
+     * have some way of identifying each detected host adapter properly
+     * and uniquely.  For hosts that do not support more than one card
+     * in the system at one time, this does not need to be set.  It is
+     * initialized to 0 in scsi_register.
+     */
+    unsigned int unique_id;
+
+    /*
+     * The rest can be copied from the template, or specifically
+     * initialized, as required.
+     */
+
+    /*
+     * The maximum length of SCSI commands that this host can accept.
+     * Probably 12 for most host adapters, but could be 16 for others.
+     * For drivers that don't set this field, a value of 12 is
+     * assumed.  I am leaving this as a number rather than a bit
+     * because you never know what subsequent SCSI standards might do
+     * (i.e. could there be a 20 byte or a 24-byte command a few years
+     * down the road?).  
+     */
+    unsigned char max_cmd_len;
+
+    int this_id;
+    int can_queue;
+    short cmd_per_lun;
+    short unsigned int sg_tablesize;
+    short unsigned int max_sectors;
+
+    unsigned in_recovery:1;
+    unsigned unchecked_isa_dma:1;
+    unsigned use_clustering:1;
+    /*
+     * True if this host was loaded as a loadable module
+     */
+    unsigned loaded_as_module:1;
+
+    /*
+     * Host has rejected a command because it was busy.
+     */
+    unsigned host_blocked:1;
+
+    /*
+     * Host has requested that no further requests come through for the
+     * time being.
+     */
+    unsigned host_self_blocked:1;
+    
+    /*
+     * Host uses correct SCSI ordering not PC ordering. The bit is
+     * set for the minority of drivers whose authors actually read the spec ;)
+     */
+    unsigned reverse_ordering:1;
+
+    /*
+     * Indicates that one or more devices on this host were starved, and
+     * when the device becomes less busy that we need to feed them.
+     */
+    unsigned some_device_starved:1;
+   
+    void (*select_queue_depths)(struct Scsi_Host *, Scsi_Device *);
+
+    /*
+     * For SCSI hosts which are PCI devices, set pci_dev so that
+     * we can do BIOS EDD 3.0 mappings
+     */
+    struct pci_dev *pci_dev;
+
+    /*
+     * We should ensure that this is aligned, both for better performance
+     * and also because some compilers (m68k) don't automatically force
+     * alignment to a long boundary.
+     */
+    unsigned long hostdata[0]  /* Used for storage of host specific stuff */
+        __attribute__ ((aligned (sizeof(unsigned long))));
+};
+
+/*
+ * These two functions are used to allocate and free a pseudo device
+ * which will connect to the host adapter itself rather than any
+ * physical device.  You must deallocate when you are done with the
+ * thing.  This physical pseudo-device isn't real and won't be available
+ * from any high-level drivers.
+ */
+extern void scsi_free_host_dev(Scsi_Device * SDpnt);
+extern Scsi_Device * scsi_get_host_dev(struct Scsi_Host * SHpnt);
+
+extern void scsi_unblock_requests(struct Scsi_Host * SHpnt);
+extern void scsi_block_requests(struct Scsi_Host * SHpnt);
+extern void scsi_report_bus_reset(struct Scsi_Host * SHpnt, int channel);
+
+typedef struct SHN
+    {
+    struct SHN * next;
+    char * name;
+    unsigned short host_no;
+    unsigned short host_registered;
+    unsigned loaded_as_module;
+    } Scsi_Host_Name;
+       
+extern Scsi_Host_Name * scsi_host_no_list;
+extern struct Scsi_Host * scsi_hostlist;
+extern struct Scsi_Device_Template * scsi_devicelist;
+
+extern Scsi_Host_Template * scsi_hosts;
+
+extern void build_proc_dir_entries(Scsi_Host_Template  *);
+
+/*
+ *  scsi_init initializes the scsi hosts.
+ */
+
+extern int next_scsi_host;
+
+unsigned int scsi_init(void);
+extern struct Scsi_Host * scsi_register(Scsi_Host_Template *, int j);
+extern void scsi_unregister(struct Scsi_Host * i);
+
+extern void scsi_register_blocked_host(struct Scsi_Host * SHpnt);
+extern void scsi_deregister_blocked_host(struct Scsi_Host * SHpnt);
+
+static inline void scsi_set_pci_device(struct Scsi_Host *SHpnt,
+                                       struct pci_dev *pdev)
+{
+       SHpnt->pci_dev = pdev;
+}
+
+
+/*
+ * Prototypes for functions/data in scsi_scan.c
+ */
+extern void scan_scsis(struct Scsi_Host *shpnt,
+                      uint hardcoded,
+                      uint hchannel,
+                      uint hid,
+                       uint hlun);
+
+extern void scsi_mark_host_reset(struct Scsi_Host *Host);
+
+#define BLANK_HOST {"", 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
+
+struct Scsi_Device_Template
+{
+    struct Scsi_Device_Template * next;
+    const char * name;
+    const char * tag;
+    struct module * module;      /* Used for loadable modules */
+    unsigned char scsi_type;
+    unsigned int major;
+    unsigned int min_major;      /* Minimum major in range. */ 
+    unsigned int max_major;      /* Maximum major in range. */
+    unsigned int nr_dev;         /* Number currently attached */
+    unsigned int dev_noticed;    /* Number of devices detected. */
+    unsigned int dev_max;        /* Current size of arrays */
+    unsigned blk:1;              /* 0 if character device */
+    int (*detect)(Scsi_Device *); /* Returns 1 if we can attach this device */
+    int (*init)(void);           /* Sizes arrays based upon number of devices
+                  *  detected */
+    void (*finish)(void);        /* Perform initialization after attachment */
+    int (*attach)(Scsi_Device *); /* Attach devices to arrays */
+    void (*detach)(Scsi_Device *);
+    int (*init_command)(Scsi_Cmnd *);     /* Used by new queueing code. 
+                                           Selects command for blkdevs */
+};
+
+void  scsi_initialize_queue(Scsi_Device * SDpnt, struct Scsi_Host * SHpnt);
+
+int scsi_register_device(struct Scsi_Device_Template * sdpnt);
+void scsi_deregister_device(struct Scsi_Device_Template * tpnt);
+
+/* These are used by loadable modules */
+extern int scsi_register_module(int, void *);
+extern int scsi_unregister_module(int, void *);
+
+/* The different types of modules that we can load and unload */
+#define MODULE_SCSI_HA 1
+#define MODULE_SCSI_CONST 2
+#define MODULE_SCSI_IOCTL 3
+#define MODULE_SCSI_DEV 4
+
+
+/*
+ * This is an ugly hack.  If we expect to be able to load devices at run time,
+ * we need to leave extra room in some of the data structures. Doing a
+ * realloc to enlarge the structures would be riddled with race conditions,
+ * so until a better solution is discovered, we use this crude approach
+ *
+ * Even bigger hack for SparcSTORAGE arrays. Those are at least 6 disks, but
+ * usually up to 30 disks, so everyone would need to change this. -jj
+ *
+ * Note: These things are all evil and all need to go away.  My plan is to
+ * tackle the character devices first, as there aren't any locking implications
+ * in the block device layer.   The block devices will require more work.
+ *
+ * The generics driver has been updated to resize as required.  So as the tape
+ * driver. Two down, two more to go.
+ */
+#ifndef CONFIG_SD_EXTRA_DEVS
+#define CONFIG_SD_EXTRA_DEVS 2
+#endif
+#ifndef CONFIG_SR_EXTRA_DEVS
+#define CONFIG_SR_EXTRA_DEVS 2
+#endif
+#define SD_EXTRA_DEVS CONFIG_SD_EXTRA_DEVS
+#define SR_EXTRA_DEVS CONFIG_SR_EXTRA_DEVS
+
+#endif
+/*
+ * Overrides for Emacs so that we follow Linus's tabbing style.
+ * Emacs will notice this stuff at the end of the file and automatically
+ * adjust the settings for this buffer only.  This must remain at the end
+ * of the file.
+ * ---------------------------------------------------------------------------
+ * Local variables:
+ * c-indent-level: 4
+ * c-brace-imaginary-offset: 0
+ * c-brace-offset: -4
+ * c-argdecl-indent: 4
+ * c-label-offset: -4
+ * c-continued-statement-offset: 4
+ * c-continued-brace-offset: 0
+ * indent-tabs-mode: nil
+ * tab-width: 8
+ * End:
+ */
diff --git a/xen-2.4.16/drivers/scsi/scsi.c b/xen-2.4.16/drivers/scsi/scsi.c
new file mode 100644 (file)
index 0000000..320b7cd
--- /dev/null
@@ -0,0 +1,2845 @@
+/*
+ *  scsi.c Copyright (C) 1992 Drew Eckhardt
+ *         Copyright (C) 1993, 1994, 1995, 1999 Eric Youngdale
+ *
+ *  generic mid-level SCSI driver
+ *      Initial versions: Drew Eckhardt
+ *      Subsequent revisions: Eric Youngdale
+ *
+ *  <drew@colorado.edu>
+ *
+ *  Bug correction thanks go to :
+ *      Rik Faith <faith@cs.unc.edu>
+ *      Tommy Thorn <tthorn>
+ *      Thomas Wuensche <tw@fgb1.fgb.mw.tu-muenchen.de>
+ *
+ *  Modified by Eric Youngdale eric@andante.org or ericy@gnu.ai.mit.edu to
+ *  add scatter-gather, multiple outstanding request, and other
+ *  enhancements.
+ *
+ *  Native multichannel, wide scsi, /proc/scsi and hot plugging
+ *  support added by Michael Neuffer <mike@i-connect.net>
+ *
+ *  Added request_module("scsi_hostadapter") for kerneld:
+ *  (Put an "alias scsi_hostadapter your_hostadapter" in /etc/modules.conf)
+ *  Bjorn Ekwall  <bj0rn@blox.se>
+ *  (changed to kmod)
+ *
+ *  Major improvements to the timeout, abort, and reset processing,
+ *  as well as performance modifications for large queue depths by
+ *  Leonard N. Zubkoff <lnz@dandelion.com>
+ *
+ *  Converted cli() code to spinlocks, Ingo Molnar
+ *
+ *  Jiffies wrap fixes (host->resetting), 3 Dec 1998 Andrea Arcangeli
+ *
+ *  out_of_space hacks, D. Gilbert (dpg) 990608
+ */
+
+#define REVISION       "Revision: 1.00"
+#define VERSION                "Id: scsi.c 1.00 2000/09/26"
+
+#include <xeno/config.h>
+#include <xeno/module.h>
+
+#include <xeno/sched.h>
+#include <xeno/timer.h>
+#include <xeno/lib.h>
+#include <xeno/slab.h>
+#include <xeno/ioport.h>
+/*#include <xeno/stat.h>*/
+#include <xeno/blk.h>
+#include <xeno/interrupt.h>
+#include <xeno/delay.h>
+#include <xeno/init.h>
+/*#include <xeno/smp_lock.h>*/
+/*#include <xeno/completion.h>*/
+
+#define __KERNEL_SYSCALLS__
+
+/*#include <xeno/unistd.h>*/
+#include <xeno/spinlock.h>
+
+#include <asm/system.h>
+#include <asm/irq.h>
+#include <asm/dma.h>
+#include <asm/uaccess.h>
+
+#include "scsi.h"
+#include "hosts.h"
+#include "constants.h"
+
+#ifdef CONFIG_KMOD
+#include <xeno/kmod.h>
+#endif
+
+#undef USE_STATIC_SCSI_MEMORY
+
+struct proc_dir_entry *proc_scsi;
+
+#ifdef CONFIG_PROC_FS
+static int scsi_proc_info(char *buffer, char **start, off_t offset, int length);
+static void scsi_dump_status(int level);
+#endif
+
+/*
+   static const char RCSid[] = "$Header: /vger/u4/cvs/linux/drivers/scsi/scsi.c,v 1.38 1997/01/19 23:07:18 davem Exp $";
+ */
+
+/*
+ * Definitions and constants.
+ */
+
+#define MIN_RESET_DELAY (2*HZ)
+
+/* Do not call reset on error if we just did a reset within 15 sec. */
+#define MIN_RESET_PERIOD (15*HZ)
+
+/*
+ * Macro to determine the size of SCSI command. This macro takes vendor
+ * unique commands into account. SCSI commands in groups 6 and 7 are
+ * vendor unique and we will depend upon the command length being
+ * supplied correctly in cmd_len.
+ */
+#define CDB_SIZE(SCpnt)        ((((SCpnt->cmnd[0] >> 5) & 7) < 6) ? \
+                               COMMAND_SIZE(SCpnt->cmnd[0]) : SCpnt->cmd_len)
+
+/*
+ * Data declarations.
+ */
+unsigned long scsi_pid;
+Scsi_Cmnd *last_cmnd;
+/* Command group 3 is reserved and should never be used.  */
+const unsigned char scsi_command_size[8] =
+{
+       6, 10, 10, 12,
+       16, 12, 10, 10
+};
+static unsigned long serial_number;
+static Scsi_Cmnd *scsi_bh_queue_head;
+static Scsi_Cmnd *scsi_bh_queue_tail;
+
+/*
+ * Note - the initial logging level can be set here to log events at boot time.
+ * After the system is up, you may enable logging via the /proc interface.
+ */
+unsigned int scsi_logging_level;
+
+const char *const scsi_device_types[MAX_SCSI_DEVICE_CODE] =
+{
+       "Direct-Access    ",
+       "Sequential-Access",
+       "Printer          ",
+       "Processor        ",
+       "WORM             ",
+       "CD-ROM           ",
+       "Scanner          ",
+       "Optical Device   ",
+       "Medium Changer   ",
+       "Communications   ",
+       "Unknown          ",
+       "Unknown          ",
+       "Unknown          ",
+       "Enclosure        ",
+};
+
+/* 
+ * Function prototypes.
+ */
+extern void scsi_times_out(Scsi_Cmnd * SCpnt);
+void scsi_build_commandblocks(Scsi_Device * SDpnt);
+
+/*
+ * These are the interface to the old error handling code.  It should go away
+ * someday soon.
+ */
+extern void scsi_old_done(Scsi_Cmnd * SCpnt);
+extern void scsi_old_times_out(Scsi_Cmnd * SCpnt);
+extern int scsi_old_reset(Scsi_Cmnd *SCpnt, unsigned int flag);
+
+/* 
+ * Private interface into the new error handling code.
+ */
+extern int scsi_new_reset(Scsi_Cmnd *SCpnt, unsigned int flag);
+
+/*
+ * Function:    scsi_initialize_queue()
+ *
+ * Purpose:     Selects queue handler function for a device.
+ *
+ * Arguments:   SDpnt   - device for which we need a handler function.
+ *
+ * Returns:     Nothing
+ *
+ * Lock status: No locking assumed or required.
+ *
+ * Notes:       Most devices will end up using scsi_request_fn for the
+ *              handler function (at least as things are done now).
+ *              The "block" feature basically ensures that only one of
+ *              the blocked hosts is active at one time, mainly to work around
+ *              buggy DMA chipsets where the memory gets starved.
+ *              For this case, we have a special handler function, which
+ *              does some checks and ultimately calls scsi_request_fn.
+ *
+ *              The single_lun feature is a similar special case.
+ *
+ *              We handle these things by stacking the handlers.  The
+ *              special case handlers simply check a few conditions,
+ *              and return if they are not supposed to do anything.
+ *              In the event that things are OK, then they call the next
+ *              handler in the list - ultimately they call scsi_request_fn
+ *              to do the dirty deed.
+ */
+void  scsi_initialize_queue(Scsi_Device * SDpnt, struct Scsi_Host * SHpnt) {
+       blk_init_queue(&SDpnt->request_queue, scsi_request_fn);
+        blk_queue_headactive(&SDpnt->request_queue, 0);
+        SDpnt->request_queue.queuedata = (void *) SDpnt;
+}
+
+#ifdef MODULE
+MODULE_PARM(scsi_logging_level, "i");
+MODULE_PARM_DESC(scsi_logging_level, "SCSI logging level; should be zero or nonzero");
+
+#else
+
+static int __init scsi_logging_setup(char *str)
+{
+       int tmp;
+
+       if (get_option(&str, &tmp) == 1) {
+               scsi_logging_level = (tmp ? ~0 : 0);
+               return 1;
+       } else {
+               printk(KERN_INFO "scsi_logging_setup : usage scsi_logging_level=n "
+                      "(n should be 0 or non-zero)\n");
+               return 0;
+       }
+}
+
+__setup("scsi_logging=", scsi_logging_setup);
+
+#endif
+
+/*
+ *     Issue a command and wait for it to complete
+ */
+static void scsi_wait_done(Scsi_Cmnd * SCpnt)
+{
+       struct request *req;
+
+       req = &SCpnt->request;
+       req->rq_status = RQ_SCSI_DONE;  /* Busy, but indicate request done */
+
+       if (req->waiting != NULL) {
+               complete(req->waiting);
+       }
+}
+
+/*
+ * This lock protects the freelist for all devices on the system.
+ * We could make this finer grained by having a single lock per
+ * device if it is ever found that there is excessive contention
+ * on this lock.
+ */
+static spinlock_t device_request_lock = SPIN_LOCK_UNLOCKED;
+
+/*
+ * Used to protect insertion into and removal from the queue of
+ * commands to be processed by the bottom half handler.
+ */
+static spinlock_t scsi_bhqueue_lock = SPIN_LOCK_UNLOCKED;
+
+/*
+ * Function:    scsi_allocate_request
+ *
+ * Purpose:     Allocate a request descriptor.
+ *
+ * Arguments:   device    - device for which we want a request
+ *
+ * Lock status: No locks assumed to be held.  This function is SMP-safe.
+ *
+ * Returns:     Pointer to request block.
+ *
+ * Notes:       With the new queueing code, it becomes important
+ *              to track the difference between a command and a
+ *              request.  A request is a pending item in the queue that
+ *              has not yet reached the top of the queue.
+ */
+
+Scsi_Request *scsi_allocate_request(Scsi_Device * device)
+{
+       Scsi_Request *SRpnt = NULL;
+  
+       if (!device)
+               panic("No device passed to scsi_allocate_request().\n");
+  
+       SRpnt = (Scsi_Request *) kmalloc(sizeof(Scsi_Request), GFP_ATOMIC);
+       if( SRpnt == NULL )
+       {
+               return NULL;
+       }
+
+       memset(SRpnt, 0, sizeof(Scsi_Request));
+       SRpnt->sr_device = device;
+       SRpnt->sr_host = device->host;
+       SRpnt->sr_magic = SCSI_REQ_MAGIC;
+       SRpnt->sr_data_direction = SCSI_DATA_UNKNOWN;
+
+       return SRpnt;
+}
+
+/*
+ * Function:    scsi_release_request
+ *
+ * Purpose:     Release a request descriptor.
+ *
+ * Arguments:   device    - device for which we want a request
+ *
+ * Lock status: No locks assumed to be held.  This function is SMP-safe.
+ *
+ * Returns:     Pointer to request block.
+ *
+ * Notes:       With the new queueing code, it becomes important
+ *              to track the difference between a command and a
+ *              request.  A request is a pending item in the queue that
+ *              has not yet reached the top of the queue.  We still need
+ *              to free a request when we are done with it, of course.
+ */
+void scsi_release_request(Scsi_Request * req)
+{
+       if( req->sr_command != NULL )
+       {
+               scsi_release_command(req->sr_command);
+               req->sr_command = NULL;
+       }
+
+       kfree(req);
+}
+
+/*
+ * Function:    scsi_allocate_device
+ *
+ * Purpose:     Allocate a command descriptor.
+ *
+ * Arguments:   device    - device for which we want a command descriptor
+ *              wait      - 1 if we should wait in the event that none
+ *                          are available.
+ *              interruptible - 1 if we should unblock and return NULL
+ *                          in the event that we must wait, and a signal
+ *                          arrives.
+ *
+ * Lock status: No locks assumed to be held.  This function is SMP-safe.
+ *
+ * Returns:     Pointer to command descriptor.
+ *
+ * Notes:       Prior to the new queue code, this function was not SMP-safe.
+ *
+ *              If the wait flag is true, and we are waiting for a free
+ *              command block, this function will interrupt and return
+ *              NULL in the event that a signal arrives that needs to
+ *              be handled.
+ *
+ *              This function is deprecated, and drivers should be
+ *              rewritten to use Scsi_Request instead of Scsi_Cmnd.
+ */
+
+Scsi_Cmnd *scsi_allocate_device(Scsi_Device * device, int wait, 
+                                int interruptable)
+{
+       struct Scsi_Host *host;
+       Scsi_Cmnd *SCpnt = NULL;
+       Scsi_Device *SDpnt;
+       unsigned long flags;
+  
+       if (!device)
+               panic("No device passed to scsi_allocate_device().\n");
+  
+       host = device->host;
+  
+       spin_lock_irqsave(&device_request_lock, flags);
+       while (1 == 1) {
+               SCpnt = NULL;
+               if (!device->device_blocked) {
+                       if (device->single_lun) {
+                               /*
+                                * FIXME(eric) - this is not at all optimal.  Given that
+                                * single lun devices are rare and usually slow
+                                * (i.e. CD changers), this is good enough for now, but
+                                * we may want to come back and optimize this later.
+                                *
+                                * Scan through all of the devices attached to this
+                                * host, and see if any are active or not.  If so,
+                                * we need to defer this command.
+                                *
+                                * We really need a busy counter per device.  This would
+                                * allow us to more easily figure out whether we should
+                                * do anything here or not.
+                                */
+                               for (SDpnt = host->host_queue;
+                                    SDpnt;
+                                    SDpnt = SDpnt->next) {
+                                       /*
+                                        * Only look for other devices on the same bus
+                                        * with the same target ID.
+                                        */
+                                       if (SDpnt->channel != device->channel
+                                           || SDpnt->id != device->id
+                                           || SDpnt == device) {
+                                               continue;
+                                       }
+                                        if( atomic_read(&SDpnt->device_active) != 0)
+                                        {
+                                                break;
+                                        }
+                               }
+                               if (SDpnt) {
+                                       /*
+                                        * Some other device in this cluster is busy.
+                                        * If asked to wait, we need to wait, otherwise
+                                        * return NULL.
+                                        */
+                                       SCpnt = NULL;
+                                       goto busy;
+                               }
+                       }
+                       /*
+                        * Now we can check for a free command block for this device.
+                        */
+                       for (SCpnt = device->device_queue; SCpnt; SCpnt = SCpnt->next) {
+                               if (SCpnt->request.rq_status == RQ_INACTIVE)
+                                       break;
+                       }
+               }
+               /*
+                * If we couldn't find a free command block, and we have been
+                * asked to wait, then do so.
+                */
+               if (SCpnt) {
+                       break;
+               }
+      busy:
+               /*
+                * If we have been asked to wait for a free block, then
+                * wait here.
+                */
+               if (wait) {
+                        DECLARE_WAITQUEUE(wait, current);
+
+                        /*
+                         * We need to wait for a free commandblock.  We need to
+                         * insert ourselves into the list before we release the
+                         * lock.  This way if a block were released the same
+                         * microsecond that we released the lock, the call
+                         * to schedule() wouldn't block (well, it might switch,
+                         * but the current task will still be schedulable.
+                         */
+                        add_wait_queue(&device->scpnt_wait, &wait);
+                        if( interruptable ) {
+                                set_current_state(TASK_INTERRUPTIBLE);
+                        } else {
+                                set_current_state(TASK_UNINTERRUPTIBLE);
+                        }
+
+                        spin_unlock_irqrestore(&device_request_lock, flags);
+
+                       /*
+                        * This should block until a device command block
+                        * becomes available.
+                        */
+                        schedule();
+
+                       spin_lock_irqsave(&device_request_lock, flags);
+
+                        remove_wait_queue(&device->scpnt_wait, &wait);
+                        /*
+                         * FIXME - Isn't this redundant??  Someone
+                         * else will have forced the state back to running.
+                         */
+                        set_current_state(TASK_RUNNING);
+                        /*
+                         * In the event that a signal has arrived that we need
+                         * to consider, then simply return NULL.  Everyone
+                         * that calls us should be prepared for this
+                         * possibility, and pass the appropriate code back
+                         * to the user.
+                         */
+                        if( interruptable ) {
+                                if (signal_pending(current)) {
+                                        spin_unlock_irqrestore(&device_request_lock, flags);
+                                        return NULL;
+                                }
+                        }
+               } else {
+                        spin_unlock_irqrestore(&device_request_lock, flags);
+                       return NULL;
+               }
+       }
+
+       SCpnt->request.rq_status = RQ_SCSI_BUSY;
+       SCpnt->request.waiting = NULL;  /* And no one is waiting for this
+                                        * to complete */
+       atomic_inc(&SCpnt->host->host_active);
+       atomic_inc(&SCpnt->device->device_active);
+
+       SCpnt->buffer  = NULL;
+       SCpnt->bufflen = 0;
+       SCpnt->request_buffer = NULL;
+       SCpnt->request_bufflen = 0;
+
+       SCpnt->use_sg = 0;      /* Reset the scatter-gather flag */
+       SCpnt->old_use_sg = 0;
+       SCpnt->transfersize = 0;        /* No default transfer size */
+       SCpnt->cmd_len = 0;
+
+       SCpnt->sc_data_direction = SCSI_DATA_UNKNOWN;
+       SCpnt->sc_request = NULL;
+       SCpnt->sc_magic = SCSI_CMND_MAGIC;
+
+        SCpnt->result = 0;
+       SCpnt->underflow = 0;   /* Do not flag underflow conditions */
+       SCpnt->old_underflow = 0;
+       SCpnt->resid = 0;
+       SCpnt->state = SCSI_STATE_INITIALIZING;
+       SCpnt->owner = SCSI_OWNER_HIGHLEVEL;
+
+       spin_unlock_irqrestore(&device_request_lock, flags);
+
+       SCSI_LOG_MLQUEUE(5, printk("Activating command for device %d (%d)\n",
+                                  SCpnt->target,
+                               atomic_read(&SCpnt->host->host_active)));
+
+       return SCpnt;
+}
+
+inline void __scsi_release_command(Scsi_Cmnd * SCpnt)
+{
+       unsigned long flags;
+        Scsi_Device * SDpnt;
+
+       spin_lock_irqsave(&device_request_lock, flags);
+
+        SDpnt = SCpnt->device;
+
+       SCpnt->request.rq_status = RQ_INACTIVE;
+       SCpnt->state = SCSI_STATE_UNUSED;
+       SCpnt->owner = SCSI_OWNER_NOBODY;
+       atomic_dec(&SCpnt->host->host_active);
+       atomic_dec(&SDpnt->device_active);
+
+       SCSI_LOG_MLQUEUE(5, printk("Deactivating command for device %d (active=%d, failed=%d)\n",
+                                  SCpnt->target,
+                                  atomic_read(&SCpnt->host->host_active),
+                                  SCpnt->host->host_failed));
+       if (SCpnt->host->host_failed != 0) {
+               SCSI_LOG_ERROR_RECOVERY(5, printk("Error handler thread %d %d\n",
+                                               SCpnt->host->in_recovery,
+                                               SCpnt->host->eh_active));
+       }
+       /*
+        * If the host is having troubles, then look to see if this was the last
+        * command that might have failed.  If so, wake up the error handler.
+        */
+       if (SCpnt->host->in_recovery
+           && !SCpnt->host->eh_active
+           && SCpnt->host->host_busy == SCpnt->host->host_failed) {
+               SCSI_LOG_ERROR_RECOVERY(5, printk("Waking error handler thread (%d)\n",
+                            atomic_read(&SCpnt->host->eh_wait->count)));
+               up(SCpnt->host->eh_wait);
+       }
+
+       spin_unlock_irqrestore(&device_request_lock, flags);
+
+        /*
+         * Wake up anyone waiting for this device.  Do this after we
+         * have released the lock, as they will need it as soon as
+         * they wake up.  
+         */
+       wake_up(&SDpnt->scpnt_wait);
+}
+
+/*
+ * Function:    scsi_release_command
+ *
+ * Purpose:     Release a command block.
+ *
+ * Arguments:   SCpnt - command block we are releasing.
+ *
+ * Notes:       The command block can no longer be used by the caller once
+ *              this funciton is called.  This is in effect the inverse
+ *              of scsi_allocate_device.  Note that we also must perform
+ *              a couple of additional tasks.  We must first wake up any
+ *              processes that might have blocked waiting for a command
+ *              block, and secondly we must hit the queue handler function
+ *              to make sure that the device is busy.  Note - there is an
+ *              option to not do this - there were instances where we could
+ *              recurse too deeply and blow the stack if this happened
+ *              when we were indirectly called from the request function
+ *              itself.
+ *
+ *              The idea is that a lot of the mid-level internals gunk
+ *              gets hidden in this function.  Upper level drivers don't
+ *              have any chickens to wave in the air to get things to
+ *              work reliably.
+ *
+ *              This function is deprecated, and drivers should be
+ *              rewritten to use Scsi_Request instead of Scsi_Cmnd.
+ */
+void scsi_release_command(Scsi_Cmnd * SCpnt)
+{
+        request_queue_t *q;
+        Scsi_Device * SDpnt;
+
+        SDpnt = SCpnt->device;
+
+        __scsi_release_command(SCpnt);
+
+        /*
+         * Finally, hit the queue request function to make sure that
+         * the device is actually busy if there are requests present.
+         * This won't block - if the device cannot take any more, life
+         * will go on.  
+         */
+        q = &SDpnt->request_queue;
+        scsi_queue_next_request(q, NULL);                
+}
+
+/*
+ * Function:    scsi_dispatch_command
+ *
+ * Purpose:     Dispatch a command to the low-level driver.
+ *
+ * Arguments:   SCpnt - command block we are dispatching.
+ *
+ * Notes:
+ */
+int scsi_dispatch_cmd(Scsi_Cmnd * SCpnt)
+{
+#ifdef DEBUG_DELAY
+       unsigned long clock;
+#endif
+       struct Scsi_Host *host;
+       int rtn = 0;
+       unsigned long flags = 0;
+       unsigned long timeout;
+
+       ASSERT_LOCK(&io_request_lock, 0);
+
+#if DEBUG
+       unsigned long *ret = 0;
+#ifdef __mips__
+       __asm__ __volatile__("move\t%0,$31":"=r"(ret));
+#else
+       ret = __builtin_return_address(0);
+#endif
+#endif
+
+       host = SCpnt->host;
+
+       /* Assign a unique nonzero serial_number. */
+       if (++serial_number == 0)
+               serial_number = 1;
+       SCpnt->serial_number = serial_number;
+       SCpnt->pid = scsi_pid++;
+
+       /*
+        * We will wait MIN_RESET_DELAY clock ticks after the last reset so
+        * we can avoid the drive not being ready.
+        */
+       timeout = host->last_reset + MIN_RESET_DELAY;
+
+       if (host->resetting && time_before(jiffies, timeout)) {
+               int ticks_remaining = timeout - jiffies;
+               /*
+                * NOTE: This may be executed from within an interrupt
+                * handler!  This is bad, but for now, it'll do.  The irq
+                * level of the interrupt handler has been masked out by the
+                * platform dependent interrupt handling code already, so the
+                * sti() here will not cause another call to the SCSI host's
+                * interrupt handler (assuming there is one irq-level per
+                * host).
+                */
+               while (--ticks_remaining >= 0)
+                       mdelay(1 + 999 / HZ);
+               host->resetting = 0;
+       }
+       if (host->hostt->use_new_eh_code) {
+               scsi_add_timer(SCpnt, SCpnt->timeout_per_command, scsi_times_out);
+       } else {
+               scsi_add_timer(SCpnt, SCpnt->timeout_per_command,
+                              scsi_old_times_out);
+       }
+
+       /*
+        * We will use a queued command if possible, otherwise we will emulate the
+        * queuing and calling of completion function ourselves.
+        */
+       SCSI_LOG_MLQUEUE(3, printk("scsi_dispatch_cmnd (host = %d, channel = %d, target = %d, "
+              "command = %p, buffer = %p, \nbufflen = %d, done = %p)\n",
+       SCpnt->host->host_no, SCpnt->channel, SCpnt->target, SCpnt->cmnd,
+                           SCpnt->buffer, SCpnt->bufflen, SCpnt->done));
+
+       SCpnt->state = SCSI_STATE_QUEUED;
+       SCpnt->owner = SCSI_OWNER_LOWLEVEL;
+       if (host->can_queue) {
+               SCSI_LOG_MLQUEUE(3, printk("queuecommand : routine at %p\n",
+                                          host->hostt->queuecommand));
+               /*
+                * Use the old error handling code if we haven't converted the driver
+                * to use the new one yet.  Note - only the new queuecommand variant
+                * passes a meaningful return value.
+                */
+               if (host->hostt->use_new_eh_code) {
+                       /*
+                        * Before we queue this command, check if the command
+                        * length exceeds what the host adapter can handle.
+                        */
+                       if (CDB_SIZE(SCpnt) <= SCpnt->host->max_cmd_len) {
+                               spin_lock_irqsave(&io_request_lock, flags);
+                               rtn = host->hostt->queuecommand(SCpnt, scsi_done);
+                               spin_unlock_irqrestore(&io_request_lock, flags);
+                               if (rtn != 0) {
+                                       scsi_delete_timer(SCpnt);
+                                       scsi_mlqueue_insert(SCpnt, SCSI_MLQUEUE_HOST_BUSY);
+                                       SCSI_LOG_MLQUEUE(3, printk("queuecommand : request rejected\n"));                                
+                               }
+                       } else {
+                               SCSI_LOG_MLQUEUE(3, printk("queuecommand : command too long.\n"));
+                               SCpnt->result = (DID_ABORT << 16);
+                               spin_lock_irqsave(&io_request_lock, flags);
+                               scsi_done(SCpnt);
+                               spin_unlock_irqrestore(&io_request_lock, flags);
+                               rtn = 1;
+                       }
+               } else {
+                       /*
+                        * Before we queue this command, check if the command
+                        * length exceeds what the host adapter can handle.
+                        */
+                       if (CDB_SIZE(SCpnt) <= SCpnt->host->max_cmd_len) {
+                               spin_lock_irqsave(&io_request_lock, flags);
+                               host->hostt->queuecommand(SCpnt, scsi_old_done);
+                               spin_unlock_irqrestore(&io_request_lock, flags);
+                       } else {
+                               SCSI_LOG_MLQUEUE(3, printk("queuecommand : command too long.\n"));
+                               SCpnt->result = (DID_ABORT << 16);
+                               spin_lock_irqsave(&io_request_lock, flags);
+                               scsi_old_done(SCpnt);
+                               spin_unlock_irqrestore(&io_request_lock, flags);
+                               rtn = 1;
+                       }
+               }
+       } else {
+               int temp;
+
+               SCSI_LOG_MLQUEUE(3, printk("command() :  routine at %p\n", host->hostt->command));
+                spin_lock_irqsave(&io_request_lock, flags);
+               temp = host->hostt->command(SCpnt);
+               SCpnt->result = temp;
+#ifdef DEBUG_DELAY
+                spin_unlock_irqrestore(&io_request_lock, flags);
+               clock = jiffies + 4 * HZ;
+               while (time_before(jiffies, clock)) {
+                       barrier();
+                       cpu_relax();
+               }
+               printk("done(host = %d, result = %04x) : routine at %p\n",
+                      host->host_no, temp, host->hostt->command);
+                spin_lock_irqsave(&io_request_lock, flags);
+#endif
+               if (host->hostt->use_new_eh_code) {
+                       scsi_done(SCpnt);
+               } else {
+                       scsi_old_done(SCpnt);
+               }
+                spin_unlock_irqrestore(&io_request_lock, flags);
+       }
+       SCSI_LOG_MLQUEUE(3, printk("leaving scsi_dispatch_cmnd()\n"));
+       return rtn;
+}
+
+devfs_handle_t scsi_devfs_handle;
+
+/*
+ * scsi_do_cmd sends all the commands out to the low-level driver.  It
+ * handles the specifics required for each low level driver - ie queued
+ * or non queued.  It also prevents conflicts when different high level
+ * drivers go for the same host at the same time.
+ */
+
+void scsi_wait_req (Scsi_Request * SRpnt, const void *cmnd ,
+                 void *buffer, unsigned bufflen, 
+                 int timeout, int retries)
+{
+       DECLARE_COMPLETION(wait);
+       request_queue_t *q = &SRpnt->sr_device->request_queue;
+       
+       SRpnt->sr_request.waiting = &wait;
+       SRpnt->sr_request.rq_status = RQ_SCSI_BUSY;
+       scsi_do_req (SRpnt, (void *) cmnd,
+               buffer, bufflen, scsi_wait_done, timeout, retries);
+       generic_unplug_device(q);
+       wait_for_completion(&wait);
+       SRpnt->sr_request.waiting = NULL;
+       if( SRpnt->sr_command != NULL )
+       {
+               scsi_release_command(SRpnt->sr_command);
+               SRpnt->sr_command = NULL;
+       }
+
+}
+/*
+ * Function:    scsi_do_req
+ *
+ * Purpose:     Queue a SCSI request
+ *
+ * Arguments:   SRpnt     - command descriptor.
+ *              cmnd      - actual SCSI command to be performed.
+ *              buffer    - data buffer.
+ *              bufflen   - size of data buffer.
+ *              done      - completion function to be run.
+ *              timeout   - how long to let it run before timeout.
+ *              retries   - number of retries we allow.
+ *
+ * Lock status: With the new queueing code, this is SMP-safe, and no locks
+ *              need be held upon entry.   The old queueing code the lock was
+ *              assumed to be held upon entry.
+ *
+ * Returns:     Nothing.
+ *
+ * Notes:       Prior to the new queue code, this function was not SMP-safe.
+ *              Also, this function is now only used for queueing requests
+ *              for things like ioctls and character device requests - this
+ *              is because we essentially just inject a request into the
+ *              queue for the device. Normal block device handling manipulates
+ *              the queue directly.
+ */
+void scsi_do_req(Scsi_Request * SRpnt, const void *cmnd,
+             void *buffer, unsigned bufflen, void (*done) (Scsi_Cmnd *),
+                int timeout, int retries)
+{
+       Scsi_Device * SDpnt = SRpnt->sr_device;
+       struct Scsi_Host *host = SDpnt->host;
+
+       ASSERT_LOCK(&io_request_lock, 0);
+
+       SCSI_LOG_MLQUEUE(4,
+                        {
+                        int i;
+                        int target = SDpnt->id;
+                        int size = COMMAND_SIZE(((const unsigned char *)cmnd)[0]);
+                        printk("scsi_do_req (host = %d, channel = %d target = %d, "
+                   "buffer =%p, bufflen = %d, done = %p, timeout = %d, "
+                               "retries = %d)\n"
+                               "command : ", host->host_no, SDpnt->channel, target, buffer,
+                               bufflen, done, timeout, retries);
+                        for (i  = 0; i < size; ++i)
+                               printk("%02x  ", ((unsigned char *) cmnd)[i]);
+                               printk("\n");
+                        });
+
+       if (!host) {
+               panic("Invalid or not present host.\n");
+       }
+
+       /*
+        * If the upper level driver is reusing these things, then
+        * we should release the low-level block now.  Another one will
+        * be allocated later when this request is getting queued.
+        */
+       if( SRpnt->sr_command != NULL )
+       {
+               scsi_release_command(SRpnt->sr_command);
+               SRpnt->sr_command = NULL;
+       }
+
+       /*
+        * We must prevent reentrancy to the lowlevel host driver.  This prevents
+        * it - we enter a loop until the host we want to talk to is not busy.
+        * Race conditions are prevented, as interrupts are disabled in between the
+        * time we check for the host being not busy, and the time we mark it busy
+        * ourselves.
+        */
+
+
+       /*
+        * Our own function scsi_done (which marks the host as not busy, disables
+        * the timeout counter, etc) will be called by us or by the
+        * scsi_hosts[host].queuecommand() function needs to also call
+        * the completion function for the high level driver.
+        */
+
+       memcpy((void *) SRpnt->sr_cmnd, (const void *) cmnd, 
+              sizeof(SRpnt->sr_cmnd));
+       SRpnt->sr_bufflen = bufflen;
+       SRpnt->sr_buffer = buffer;
+       SRpnt->sr_allowed = retries;
+       SRpnt->sr_done = done;
+       SRpnt->sr_timeout_per_command = timeout;
+
+       if (SRpnt->sr_cmd_len == 0)
+               SRpnt->sr_cmd_len = COMMAND_SIZE(SRpnt->sr_cmnd[0]);
+
+       /*
+        * At this point, we merely set up the command, stick it in the normal
+        * request queue, and return.  Eventually that request will come to the
+        * top of the list, and will be dispatched.
+        */
+       scsi_insert_special_req(SRpnt, 0);
+
+       SCSI_LOG_MLQUEUE(3, printk("Leaving scsi_do_req()\n"));
+}
+/*
+ * Function:    scsi_init_cmd_from_req
+ *
+ * Purpose:     Queue a SCSI command
+ * Purpose:     Initialize a Scsi_Cmnd from a Scsi_Request
+ *
+ * Arguments:   SCpnt     - command descriptor.
+ *              SRpnt     - Request from the queue.
+ *
+ * Lock status: None needed.
+ *
+ * Returns:     Nothing.
+ *
+ * Notes:       Mainly transfer data from the request structure to the
+ *              command structure.  The request structure is allocated
+ *              using the normal memory allocator, and requests can pile
+ *              up to more or less any depth.  The command structure represents
+ *              a consumable resource, as these are allocated into a pool
+ *              when the SCSI subsystem initializes.  The preallocation is
+ *              required so that in low-memory situations a disk I/O request
+ *              won't cause the memory manager to try and write out a page.
+ *              The request structure is generally used by ioctls and character
+ *              devices.
+ */
+void scsi_init_cmd_from_req(Scsi_Cmnd * SCpnt, Scsi_Request * SRpnt)
+{
+       struct Scsi_Host *host = SCpnt->host;
+
+       ASSERT_LOCK(&io_request_lock, 0);
+
+       SCpnt->owner = SCSI_OWNER_MIDLEVEL;
+       SRpnt->sr_command = SCpnt;
+
+       if (!host) {
+               panic("Invalid or not present host.\n");
+       }
+
+       SCpnt->cmd_len = SRpnt->sr_cmd_len;
+       SCpnt->use_sg = SRpnt->sr_use_sg;
+
+       memcpy((void *) &SCpnt->request, (const void *) &SRpnt->sr_request,
+              sizeof(SRpnt->sr_request));
+       memcpy((void *) SCpnt->data_cmnd, (const void *) SRpnt->sr_cmnd, 
+              sizeof(SCpnt->data_cmnd));
+       SCpnt->reset_chain = NULL;
+       SCpnt->serial_number = 0;
+       SCpnt->serial_number_at_timeout = 0;
+       SCpnt->bufflen = SRpnt->sr_bufflen;
+       SCpnt->buffer = SRpnt->sr_buffer;
+       SCpnt->flags = 0;
+       SCpnt->retries = 0;
+       SCpnt->allowed = SRpnt->sr_allowed;
+       SCpnt->done = SRpnt->sr_done;
+       SCpnt->timeout_per_command = SRpnt->sr_timeout_per_command;
+
+       SCpnt->sc_data_direction = SRpnt->sr_data_direction;
+
+       SCpnt->sglist_len = SRpnt->sr_sglist_len;
+       SCpnt->underflow = SRpnt->sr_underflow;
+
+       SCpnt->sc_request = SRpnt;
+
+       memcpy((void *) SCpnt->cmnd, (const void *) SRpnt->sr_cmnd, 
+              sizeof(SCpnt->cmnd));
+       /* Zero the sense buffer.  Some host adapters automatically request
+        * sense on error.  0 is not a valid sense code.
+        */
+       memset((void *) SCpnt->sense_buffer, 0, sizeof SCpnt->sense_buffer);
+       SCpnt->request_buffer = SRpnt->sr_buffer;
+       SCpnt->request_bufflen = SRpnt->sr_bufflen;
+       SCpnt->old_use_sg = SCpnt->use_sg;
+       if (SCpnt->cmd_len == 0)
+               SCpnt->cmd_len = COMMAND_SIZE(SCpnt->cmnd[0]);
+       SCpnt->old_cmd_len = SCpnt->cmd_len;
+       SCpnt->sc_old_data_direction = SCpnt->sc_data_direction;
+       SCpnt->old_underflow = SCpnt->underflow;
+
+       /* Start the timer ticking.  */
+
+       SCpnt->internal_timeout = NORMAL_TIMEOUT;
+       SCpnt->abort_reason = 0;
+       SCpnt->result = 0;
+
+       SCSI_LOG_MLQUEUE(3, printk("Leaving scsi_init_cmd_from_req()\n"));
+}
+
+/*
+ * Function:    scsi_do_cmd
+ *
+ * Purpose:     Queue a SCSI command
+ *
+ * Arguments:   SCpnt     - command descriptor.
+ *              cmnd      - actual SCSI command to be performed.
+ *              buffer    - data buffer.
+ *              bufflen   - size of data buffer.
+ *              done      - completion function to be run.
+ *              timeout   - how long to let it run before timeout.
+ *              retries   - number of retries we allow.
+ *
+ * Lock status: With the new queueing code, this is SMP-safe, and no locks
+ *              need be held upon entry.   The old queueing code the lock was
+ *              assumed to be held upon entry.
+ *
+ * Returns:     Nothing.
+ *
+ * Notes:       Prior to the new queue code, this function was not SMP-safe.
+ *              Also, this function is now only used for queueing requests
+ *              for things like ioctls and character device requests - this
+ *              is because we essentially just inject a request into the
+ *              queue for the device. Normal block device handling manipulates
+ *              the queue directly.
+ */
+void scsi_do_cmd(Scsi_Cmnd * SCpnt, const void *cmnd,
+             void *buffer, unsigned bufflen, void (*done) (Scsi_Cmnd *),
+                int timeout, int retries)
+{
+       struct Scsi_Host *host = SCpnt->host;
+
+       ASSERT_LOCK(&io_request_lock, 0);
+
+       SCpnt->pid = scsi_pid++;
+       SCpnt->owner = SCSI_OWNER_MIDLEVEL;
+
+       SCSI_LOG_MLQUEUE(4,
+                        {
+                        int i;
+                        int target = SCpnt->target;
+                        int size = COMMAND_SIZE(((const unsigned char *)cmnd)[0]);
+                        printk("scsi_do_cmd (host = %d, channel = %d target = %d, "
+                   "buffer =%p, bufflen = %d, done = %p, timeout = %d, "
+                               "retries = %d)\n"
+                               "command : ", host->host_no, SCpnt->channel, target, buffer,
+                               bufflen, done, timeout, retries);
+                        for (i = 0; i < size; ++i)
+                               printk("%02x  ", ((unsigned char *) cmnd)[i]);
+                               printk("\n");
+                        });
+
+       if (!host) {
+               panic("Invalid or not present host.\n");
+       }
+       /*
+        * We must prevent reentrancy to the lowlevel host driver.  This prevents
+        * it - we enter a loop until the host we want to talk to is not busy.
+        * Race conditions are prevented, as interrupts are disabled in between the
+        * time we check for the host being not busy, and the time we mark it busy
+        * ourselves.
+        */
+
+
+       /*
+        * Our own function scsi_done (which marks the host as not busy, disables
+        * the timeout counter, etc) will be called by us or by the
+        * scsi_hosts[host].queuecommand() function needs to also call
+        * the completion function for the high level driver.
+        */
+
+       memcpy((void *) SCpnt->data_cmnd, (const void *) cmnd, 
+               sizeof(SCpnt->data_cmnd));
+       SCpnt->reset_chain = NULL;
+       SCpnt->serial_number = 0;
+       SCpnt->serial_number_at_timeout = 0;
+       SCpnt->bufflen = bufflen;
+       SCpnt->buffer = buffer;
+       SCpnt->flags = 0;
+       SCpnt->retries = 0;
+       SCpnt->allowed = retries;
+       SCpnt->done = done;
+       SCpnt->timeout_per_command = timeout;
+
+       memcpy((void *) SCpnt->cmnd, (const void *) cmnd, 
+               sizeof(SCpnt->cmnd));
+       /* Zero the sense buffer.  Some host adapters automatically request
+        * sense on error.  0 is not a valid sense code.
+        */
+       memset((void *) SCpnt->sense_buffer, 0, sizeof SCpnt->sense_buffer);
+       SCpnt->request_buffer = buffer;
+       SCpnt->request_bufflen = bufflen;
+       SCpnt->old_use_sg = SCpnt->use_sg;
+       if (SCpnt->cmd_len == 0)
+               SCpnt->cmd_len = COMMAND_SIZE(SCpnt->cmnd[0]);
+       SCpnt->old_cmd_len = SCpnt->cmd_len;
+       SCpnt->sc_old_data_direction = SCpnt->sc_data_direction;
+       SCpnt->old_underflow = SCpnt->underflow;
+
+       /* Start the timer ticking.  */
+
+       SCpnt->internal_timeout = NORMAL_TIMEOUT;
+       SCpnt->abort_reason = 0;
+       SCpnt->result = 0;
+
+       /*
+        * At this point, we merely set up the command, stick it in the normal
+        * request queue, and return.  Eventually that request will come to the
+        * top of the list, and will be dispatched.
+        */
+       scsi_insert_special_cmd(SCpnt, 0);
+
+       SCSI_LOG_MLQUEUE(3, printk("Leaving scsi_do_cmd()\n"));
+}
+
+/*
+ * This function is the mid-level interrupt routine, which decides how
+ *  to handle error conditions.  Each invocation of this function must
+ *  do one and *only* one of the following:
+ *
+ *      1) Insert command in BH queue.
+ *      2) Activate error handler for host.
+ *
+ * FIXME(eric) - I am concerned about stack overflow (still).  An
+ * interrupt could come while we are processing the bottom queue,
+ * which would cause another command to be stuffed onto the bottom
+ * queue, and it would in turn be processed as that interrupt handler
+ * is returning.  Given a sufficiently steady rate of returning
+ * commands, this could cause the stack to overflow.  I am not sure
+ * what is the most appropriate solution here - we should probably
+ * keep a depth count, and not process any commands while we still
+ * have a bottom handler active higher in the stack.
+ *
+ * There is currently code in the bottom half handler to monitor
+ * recursion in the bottom handler and report if it ever happens.  If
+ * this becomes a problem, it won't be hard to engineer something to
+ * deal with it so that only the outer layer ever does any real
+ * processing.  
+ */
+void scsi_done(Scsi_Cmnd * SCpnt)
+{
+       unsigned long flags;
+       int tstatus;
+
+       /*
+        * We don't have to worry about this one timing out any more.
+        */
+       tstatus = scsi_delete_timer(SCpnt);
+
+       /*
+        * If we are unable to remove the timer, it means that the command
+        * has already timed out.  In this case, we have no choice but to
+        * let the timeout function run, as we have no idea where in fact
+        * that function could really be.  It might be on another processor,
+        * etc, etc.
+        */
+       if (!tstatus) {
+               SCpnt->done_late = 1;
+               return;
+       }
+       /* Set the serial numbers back to zero */
+       SCpnt->serial_number = 0;
+
+       /*
+        * First, see whether this command already timed out.  If so, we ignore
+        * the response.  We treat it as if the command never finished.
+        *
+        * Since serial_number is now 0, the error handler cound detect this
+        * situation and avoid to call the low level driver abort routine.
+        * (DB)
+         *
+         * FIXME(eric) - I believe that this test is now redundant, due to
+         * the test of the return status of del_timer().
+        */
+       if (SCpnt->state == SCSI_STATE_TIMEOUT) {
+               SCSI_LOG_MLCOMPLETE(1, printk("Ignoring completion of %p due to timeout status", SCpnt));
+               return;
+       }
+       spin_lock_irqsave(&scsi_bhqueue_lock, flags);
+
+       SCpnt->serial_number_at_timeout = 0;
+       SCpnt->state = SCSI_STATE_BHQUEUE;
+       SCpnt->owner = SCSI_OWNER_BH_HANDLER;
+       SCpnt->bh_next = NULL;
+
+       /*
+        * Next, put this command in the BH queue.
+        * 
+        * We need a spinlock here, or compare and exchange if we can reorder incoming
+        * Scsi_Cmnds, as it happens pretty often scsi_done is called multiple times
+        * before bh is serviced. -jj
+        *
+        * We already have the io_request_lock here, since we are called from the
+        * interrupt handler or the error handler. (DB)
+        *
+        * This may be true at the moment, but I would like to wean all of the low
+        * level drivers away from using io_request_lock.   Technically they should
+        * all use their own locking.  I am adding a small spinlock to protect
+        * this datastructure to make it safe for that day.  (ERY)
+        */
+       if (!scsi_bh_queue_head) {
+               scsi_bh_queue_head = SCpnt;
+               scsi_bh_queue_tail = SCpnt;
+       } else {
+               scsi_bh_queue_tail->bh_next = SCpnt;
+               scsi_bh_queue_tail = SCpnt;
+       }
+
+       spin_unlock_irqrestore(&scsi_bhqueue_lock, flags);
+       /*
+        * Mark the bottom half handler to be run.
+        */
+       mark_bh(SCSI_BH);
+}
+
+/*
+ * Procedure:   scsi_bottom_half_handler
+ *
+ * Purpose:     Called after we have finished processing interrupts, it
+ *              performs post-interrupt handling for commands that may
+ *              have completed.
+ *
+ * Notes:       This is called with all interrupts enabled.  This should reduce
+ *              interrupt latency, stack depth, and reentrancy of the low-level
+ *              drivers.
+ *
+ * The io_request_lock is required in all the routine. There was a subtle
+ * race condition when scsi_done is called after a command has already
+ * timed out but before the time out is processed by the error handler.
+ * (DB)
+ *
+ * I believe I have corrected this.  We simply monitor the return status of
+ * del_timer() - if this comes back as 0, it means that the timer has fired
+ * and that a timeout is in progress.   I have modified scsi_done() such
+ * that in this instance the command is never inserted in the bottom
+ * half queue.  Thus the only time we hold the lock here is when
+ * we wish to atomically remove the contents of the queue.
+ */
+void scsi_bottom_half_handler(void)
+{
+       Scsi_Cmnd *SCpnt;
+       Scsi_Cmnd *SCnext;
+       unsigned long flags;
+
+
+       while (1 == 1) {
+               spin_lock_irqsave(&scsi_bhqueue_lock, flags);
+               SCpnt = scsi_bh_queue_head;
+               scsi_bh_queue_head = NULL;
+               spin_unlock_irqrestore(&scsi_bhqueue_lock, flags);
+
+               if (SCpnt == NULL) {
+                       return;
+               }
+               SCnext = SCpnt->bh_next;
+
+               for (; SCpnt; SCpnt = SCnext) {
+                       SCnext = SCpnt->bh_next;
+
+                       switch (scsi_decide_disposition(SCpnt)) {
+                       case SUCCESS:
+                               /*
+                                * Add to BH queue.
+                                */
+                               SCSI_LOG_MLCOMPLETE(3, printk("Command finished %d %d 0x%x\n", SCpnt->host->host_busy,
+                                               SCpnt->host->host_failed,
+                                                        SCpnt->result));
+
+                               scsi_finish_command(SCpnt);
+                               break;
+                       case NEEDS_RETRY:
+                               /*
+                                * We only come in here if we want to retry a command.  The
+                                * test to see whether the command should be retried should be
+                                * keeping track of the number of tries, so we don't end up looping,
+                                * of course.
+                                */
+                               SCSI_LOG_MLCOMPLETE(3, printk("Command needs retry %d %d 0x%x\n", SCpnt->host->host_busy,
+                               SCpnt->host->host_failed, SCpnt->result));
+
+                               scsi_retry_command(SCpnt);
+                               break;
+                       case ADD_TO_MLQUEUE:
+                               /* 
+                                * This typically happens for a QUEUE_FULL message -
+                                * typically only when the queue depth is only
+                                * approximate for a given device.  Adding a command
+                                * to the queue for the device will prevent further commands
+                                * from being sent to the device, so we shouldn't end up
+                                * with tons of things being sent down that shouldn't be.
+                                */
+                               SCSI_LOG_MLCOMPLETE(3, printk("Command rejected as device queue full, put on ml queue %p\n",
+                                                              SCpnt));
+                               scsi_mlqueue_insert(SCpnt, SCSI_MLQUEUE_DEVICE_BUSY);
+                               break;
+                       default:
+                               /*
+                                * Here we have a fatal error of some sort.  Turn it over to
+                                * the error handler.
+                                */
+                               SCSI_LOG_MLCOMPLETE(3, printk("Command failed %p %x active=%d busy=%d failed=%d\n",
+                                                   SCpnt, SCpnt->result,
+                                 atomic_read(&SCpnt->host->host_active),
+                                                 SCpnt->host->host_busy,
+                                             SCpnt->host->host_failed));
+
+                               /*
+                                * Dump the sense information too.
+                                */
+                               if ((status_byte(SCpnt->result) & CHECK_CONDITION) != 0) {
+                                       SCSI_LOG_MLCOMPLETE(3, print_sense("bh", SCpnt));
+                               }
+                               if (SCpnt->host->eh_wait != NULL) {
+                                       SCpnt->host->host_failed++;
+                                       SCpnt->owner = SCSI_OWNER_ERROR_HANDLER;
+                                       SCpnt->state = SCSI_STATE_FAILED;
+                                       SCpnt->host->in_recovery = 1;
+                                       /*
+                                        * If the host is having troubles, then look to see if this was the last
+                                        * command that might have failed.  If so, wake up the error handler.
+                                        */
+                                       if (SCpnt->host->host_busy == SCpnt->host->host_failed) {
+                                               SCSI_LOG_ERROR_RECOVERY(5, printk("Waking error handler thread (%d)\n",
+                                                                                 atomic_read(&SCpnt->host->eh_wait->count)));
+                                               up(SCpnt->host->eh_wait);
+                                       }
+                               } else {
+                                       /*
+                                        * We only get here if the error recovery thread has died.
+                                        */
+                                       scsi_finish_command(SCpnt);
+                               }
+                       }
+               }               /* for(; SCpnt...) */
+
+       }                       /* while(1==1) */
+
+}
+
+/*
+ * Function:    scsi_retry_command
+ *
+ * Purpose:     Send a command back to the low level to be retried.
+ *
+ * Notes:       This command is always executed in the context of the
+ *              bottom half handler, or the error handler thread. Low
+ *              level drivers should not become re-entrant as a result of
+ *              this.
+ */
+int scsi_retry_command(Scsi_Cmnd * SCpnt)
+{
+       memcpy((void *) SCpnt->cmnd, (void *) SCpnt->data_cmnd,
+              sizeof(SCpnt->data_cmnd));
+       SCpnt->request_buffer = SCpnt->buffer;
+       SCpnt->request_bufflen = SCpnt->bufflen;
+       SCpnt->use_sg = SCpnt->old_use_sg;
+       SCpnt->cmd_len = SCpnt->old_cmd_len;
+       SCpnt->sc_data_direction = SCpnt->sc_old_data_direction;
+       SCpnt->underflow = SCpnt->old_underflow;
+
+        /*
+         * Zero the sense information from the last time we tried
+         * this command.
+         */
+       memset((void *) SCpnt->sense_buffer, 0, sizeof SCpnt->sense_buffer);
+
+       return scsi_dispatch_cmd(SCpnt);
+}
+
+/*
+ * Function:    scsi_finish_command
+ *
+ * Purpose:     Pass command off to upper layer for finishing of I/O
+ *              request, waking processes that are waiting on results,
+ *              etc.
+ */
+void scsi_finish_command(Scsi_Cmnd * SCpnt)
+{
+       struct Scsi_Host *host;
+       Scsi_Device *device;
+       Scsi_Request * SRpnt;
+       unsigned long flags;
+
+       ASSERT_LOCK(&io_request_lock, 0);
+
+       host = SCpnt->host;
+       device = SCpnt->device;
+
+        /*
+         * We need to protect the decrement, as otherwise a race condition
+         * would exist.  Fiddling with SCpnt isn't a problem as the
+         * design only allows a single SCpnt to be active in only
+         * one execution context, but the device and host structures are
+         * shared.
+         */
+       spin_lock_irqsave(&io_request_lock, flags);
+       host->host_busy--;      /* Indicate that we are free */
+       device->device_busy--;  /* Decrement device usage counter. */
+       spin_unlock_irqrestore(&io_request_lock, flags);
+
+        /*
+         * Clear the flags which say that the device/host is no longer
+         * capable of accepting new commands.  These are set in scsi_queue.c
+         * for both the queue full condition on a device, and for a
+         * host full condition on the host.
+         */
+        host->host_blocked = FALSE;
+        device->device_blocked = FALSE;
+
+       /*
+        * If we have valid sense information, then some kind of recovery
+        * must have taken place.  Make a note of this.
+        */
+       if (scsi_sense_valid(SCpnt)) {
+               SCpnt->result |= (DRIVER_SENSE << 24);
+       }
+       SCSI_LOG_MLCOMPLETE(3, printk("Notifying upper driver of completion for device %d %x\n",
+                                     SCpnt->device->id, SCpnt->result));
+
+       SCpnt->owner = SCSI_OWNER_HIGHLEVEL;
+       SCpnt->state = SCSI_STATE_FINISHED;
+
+       /* We can get here with use_sg=0, causing a panic in the upper level (DB) */
+       SCpnt->use_sg = SCpnt->old_use_sg;
+
+       /*
+       * If there is an associated request structure, copy the data over before we call the
+       * completion function.
+       */
+       SRpnt = SCpnt->sc_request;
+       if( SRpnt != NULL ) {
+              SRpnt->sr_result = SRpnt->sr_command->result;
+              if( SRpnt->sr_result != 0 ) {
+                      memcpy(SRpnt->sr_sense_buffer,
+                             SRpnt->sr_command->sense_buffer,
+                             sizeof(SRpnt->sr_sense_buffer));
+              }
+       }
+
+       SCpnt->done(SCpnt);
+}
+
+static int scsi_register_host(Scsi_Host_Template *);
+static int scsi_unregister_host(Scsi_Host_Template *);
+
+/*
+ * Function:    scsi_release_commandblocks()
+ *
+ * Purpose:     Release command blocks associated with a device.
+ *
+ * Arguments:   SDpnt   - device
+ *
+ * Returns:     Nothing
+ *
+ * Lock status: No locking assumed or required.
+ *
+ * Notes:
+ */
+void scsi_release_commandblocks(Scsi_Device * SDpnt)
+{
+       Scsi_Cmnd *SCpnt, *SCnext;
+       unsigned long flags;
+
+       spin_lock_irqsave(&device_request_lock, flags);
+       for (SCpnt = SDpnt->device_queue; SCpnt; SCpnt = SCnext) {
+               SDpnt->device_queue = SCnext = SCpnt->next;
+               kfree((char *) SCpnt);
+       }
+       SDpnt->has_cmdblocks = 0;
+       SDpnt->queue_depth = 0;
+       spin_unlock_irqrestore(&device_request_lock, flags);
+}
+
+/*
+ * Function:    scsi_build_commandblocks()
+ *
+ * Purpose:     Allocate command blocks associated with a device.
+ *
+ * Arguments:   SDpnt   - device
+ *
+ * Returns:     Nothing
+ *
+ * Lock status: No locking assumed or required.
+ *
+ * Notes:
+ */
+void scsi_build_commandblocks(Scsi_Device * SDpnt)
+{
+       unsigned long flags;
+       struct Scsi_Host *host = SDpnt->host;
+       int j;
+       Scsi_Cmnd *SCpnt;
+
+       spin_lock_irqsave(&device_request_lock, flags);
+
+       if (SDpnt->queue_depth == 0)
+       {
+               SDpnt->queue_depth = host->cmd_per_lun;
+               if (SDpnt->queue_depth == 0)
+                       SDpnt->queue_depth = 1; /* live to fight another day */
+       }
+       SDpnt->device_queue = NULL;
+
+       for (j = 0; j < SDpnt->queue_depth; j++) {
+               SCpnt = (Scsi_Cmnd *)
+                   kmalloc(sizeof(Scsi_Cmnd),
+                                    GFP_ATOMIC |
+                               (host->unchecked_isa_dma ? GFP_DMA : 0));
+               if (NULL == SCpnt)
+                       break;  /* If not, the next line will oops ... */
+               memset(SCpnt, 0, sizeof(Scsi_Cmnd));
+               SCpnt->host = host;
+               SCpnt->device = SDpnt;
+               SCpnt->target = SDpnt->id;
+               SCpnt->lun = SDpnt->lun;
+               SCpnt->channel = SDpnt->channel;
+               SCpnt->request.rq_status = RQ_INACTIVE;
+               SCpnt->use_sg = 0;
+               SCpnt->old_use_sg = 0;
+               SCpnt->old_cmd_len = 0;
+               SCpnt->underflow = 0;
+               SCpnt->old_underflow = 0;
+               SCpnt->transfersize = 0;
+               SCpnt->resid = 0;
+               SCpnt->serial_number = 0;
+               SCpnt->serial_number_at_timeout = 0;
+               SCpnt->host_scribble = NULL;
+               SCpnt->next = SDpnt->device_queue;
+               SDpnt->device_queue = SCpnt;
+               SCpnt->state = SCSI_STATE_UNUSED;
+               SCpnt->owner = SCSI_OWNER_NOBODY;
+       }
+       if (j < SDpnt->queue_depth) {   /* low on space (D.Gilbert 990424) */
+               printk(KERN_WARNING "scsi_build_commandblocks: want=%d, space for=%d blocks\n",
+                      SDpnt->queue_depth, j);
+               SDpnt->queue_depth = j;
+               SDpnt->has_cmdblocks = (0 != j);
+       } else {
+               SDpnt->has_cmdblocks = 1;
+       }
+       spin_unlock_irqrestore(&device_request_lock, flags);
+}
+
+void __init scsi_host_no_insert(char *str, int n)
+{
+    Scsi_Host_Name *shn, *shn2;
+    int len;
+    
+    len = strlen(str);
+    if (len && (shn = (Scsi_Host_Name *) kmalloc(sizeof(Scsi_Host_Name), GFP_ATOMIC))) {
+       if ((shn->name = kmalloc(len+1, GFP_ATOMIC))) {
+           strncpy(shn->name, str, len);
+           shn->name[len] = 0;
+           shn->host_no = n;
+           shn->host_registered = 0;
+           shn->loaded_as_module = 1; /* numbers shouldn't be freed in any case */
+           shn->next = NULL;
+           if (scsi_host_no_list) {
+               for (shn2 = scsi_host_no_list;shn2->next;shn2 = shn2->next)
+                   ;
+               shn2->next = shn;
+           }
+           else
+               scsi_host_no_list = shn;
+           max_scsi_hosts = n+1;
+       }
+       else
+           kfree((char *) shn);
+    }
+}
+
+#ifdef CONFIG_PROC_FS
+static int scsi_proc_info(char *buffer, char **start, off_t offset, int length)
+{
+       Scsi_Device *scd;
+       struct Scsi_Host *HBA_ptr;
+       int size, len = 0;
+       off_t begin = 0;
+       off_t pos = 0;
+
+       /*
+        * First, see if there are any attached devices or not.
+        */
+       for (HBA_ptr = scsi_hostlist; HBA_ptr; HBA_ptr = HBA_ptr->next) {
+               if (HBA_ptr->host_queue != NULL) {
+                       break;
+               }
+       }
+       size = sprintf(buffer + len, "Attached devices: %s\n", (HBA_ptr) ? "" : "none");
+       len += size;
+       pos = begin + len;
+       for (HBA_ptr = scsi_hostlist; HBA_ptr; HBA_ptr = HBA_ptr->next) {
+#if 0
+               size += sprintf(buffer + len, "scsi%2d: %s\n", (int) HBA_ptr->host_no,
+                               HBA_ptr->hostt->procname);
+               len += size;
+               pos = begin + len;
+#endif
+               for (scd = HBA_ptr->host_queue; scd; scd = scd->next) {
+                       proc_print_scsidevice(scd, buffer, &size, len);
+                       len += size;
+                       pos = begin + len;
+
+                       if (pos < offset) {
+                               len = 0;
+                               begin = pos;
+                       }
+                       if (pos > offset + length)
+                               goto stop_output;
+               }
+       }
+
+stop_output:
+       *start = buffer + (offset - begin);     /* Start of wanted data */
+       len -= (offset - begin);        /* Start slop */
+       if (len > length)
+               len = length;   /* Ending slop */
+       return (len);
+}
+
+static int proc_scsi_gen_write(struct file * file, const char * buf,
+                              unsigned long length, void *data)
+{
+       struct Scsi_Device_Template *SDTpnt;
+       Scsi_Device *scd;
+       struct Scsi_Host *HBA_ptr;
+       char *p;
+       int host, channel, id, lun;
+       char * buffer;
+       int err;
+
+       if (!buf || length>PAGE_SIZE)
+               return -EINVAL;
+
+       if (!(buffer = (char *) __get_free_page(GFP_KERNEL)))
+               return -ENOMEM;
+       if(copy_from_user(buffer, buf, length))
+       {
+               err =-EFAULT;
+               goto out;
+       }
+
+       err = -EINVAL;
+
+       if (length < PAGE_SIZE)
+               buffer[length] = '\0';
+       else if (buffer[PAGE_SIZE-1])
+               goto out;
+
+       if (length < 11 || strncmp("scsi", buffer, 4))
+               goto out;
+
+       /*
+        * Usage: echo "scsi dump #N" > /proc/scsi/scsi
+        * to dump status of all scsi commands.  The number is used to specify the level
+        * of detail in the dump.
+        */
+       if (!strncmp("dump", buffer + 5, 4)) {
+               unsigned int level;
+
+               p = buffer + 10;
+
+               if (*p == '\0')
+                       goto out;
+
+               level = simple_strtoul(p, NULL, 0);
+               scsi_dump_status(level);
+       }
+       /*
+        * Usage: echo "scsi log token #N" > /proc/scsi/scsi
+        * where token is one of [error,scan,mlqueue,mlcomplete,llqueue,
+        * llcomplete,hlqueue,hlcomplete]
+        */
+#ifdef CONFIG_SCSI_LOGGING             /* { */
+
+       if (!strncmp("log", buffer + 5, 3)) {
+               char *token;
+               unsigned int level;
+
+               p = buffer + 9;
+               token = p;
+               while (*p != ' ' && *p != '\t' && *p != '\0') {
+                       p++;
+               }
+
+               if (*p == '\0') {
+                       if (strncmp(token, "all", 3) == 0) {
+                               /*
+                                * Turn on absolutely everything.
+                                */
+                               scsi_logging_level = ~0;
+                       } else if (strncmp(token, "none", 4) == 0) {
+                               /*
+                                * Turn off absolutely everything.
+                                */
+                               scsi_logging_level = 0;
+                       } else {
+                               goto out;
+                       }
+               } else {
+                       *p++ = '\0';
+
+                       level = simple_strtoul(p, NULL, 0);
+
+                       /*
+                        * Now figure out what to do with it.
+                        */
+                       if (strcmp(token, "error") == 0) {
+                               SCSI_SET_ERROR_RECOVERY_LOGGING(level);
+                       } else if (strcmp(token, "timeout") == 0) {
+                               SCSI_SET_TIMEOUT_LOGGING(level);
+                       } else if (strcmp(token, "scan") == 0) {
+                               SCSI_SET_SCAN_BUS_LOGGING(level);
+                       } else if (strcmp(token, "mlqueue") == 0) {
+                               SCSI_SET_MLQUEUE_LOGGING(level);
+                       } else if (strcmp(token, "mlcomplete") == 0) {
+                               SCSI_SET_MLCOMPLETE_LOGGING(level);
+                       } else if (strcmp(token, "llqueue") == 0) {
+                               SCSI_SET_LLQUEUE_LOGGING(level);
+                       } else if (strcmp(token, "llcomplete") == 0) {
+                               SCSI_SET_LLCOMPLETE_LOGGING(level);
+                       } else if (strcmp(token, "hlqueue") == 0) {
+                               SCSI_SET_HLQUEUE_LOGGING(level);
+                       } else if (strcmp(token, "hlcomplete") == 0) {
+                               SCSI_SET_HLCOMPLETE_LOGGING(level);
+                       } else if (strcmp(token, "ioctl") == 0) {
+                               SCSI_SET_IOCTL_LOGGING(level);
+                       } else {
+                               goto out;
+                       }
+               }
+
+               printk(KERN_INFO "scsi logging level set to 0x%8.8x\n", scsi_logging_level);
+       }
+#endif /* CONFIG_SCSI_LOGGING */ /* } */
+
+       /*
+        * Usage: echo "scsi add-single-device 0 1 2 3" >/proc/scsi/scsi
+        * with  "0 1 2 3" replaced by your "Host Channel Id Lun".
+        * Consider this feature BETA.
+        *     CAUTION: This is not for hotplugging your peripherals. As
+        *     SCSI was not designed for this you could damage your
+        *     hardware !
+        * However perhaps it is legal to switch on an
+        * already connected device. It is perhaps not
+        * guaranteed this device doesn't corrupt an ongoing data transfer.
+        */
+       if (!strncmp("add-single-device", buffer + 5, 17)) {
+               p = buffer + 23;
+
+               host = simple_strtoul(p, &p, 0);
+               channel = simple_strtoul(p + 1, &p, 0);
+               id = simple_strtoul(p + 1, &p, 0);
+               lun = simple_strtoul(p + 1, &p, 0);
+
+               printk(KERN_INFO "scsi singledevice %d %d %d %d\n", host, channel,
+                      id, lun);
+
+               for (HBA_ptr = scsi_hostlist; HBA_ptr; HBA_ptr = HBA_ptr->next) {
+                       if (HBA_ptr->host_no == host) {
+                               break;
+                       }
+               }
+               err = -ENXIO;
+               if (!HBA_ptr)
+                       goto out;
+
+               for (scd = HBA_ptr->host_queue; scd; scd = scd->next) {
+                       if ((scd->channel == channel
+                            && scd->id == id
+                            && scd->lun == lun)) {
+                               break;
+                       }
+               }
+
+               err = -ENOSYS;
+               if (scd)
+                       goto out;       /* We do not yet support unplugging */
+
+               scan_scsis(HBA_ptr, 1, channel, id, lun);
+
+               /* FIXME (DB) This assumes that the queue_depth routines can be used
+                  in this context as well, while they were all designed to be
+                  called only once after the detect routine. (DB) */
+               /* queue_depth routine moved to inside scan_scsis(,1,,,) so
+                  it is called before build_commandblocks() */
+
+               err = length;
+               goto out;
+       }
+       /*
+        * Usage: echo "scsi remove-single-device 0 1 2 3" >/proc/scsi/scsi
+        * with  "0 1 2 3" replaced by your "Host Channel Id Lun".
+        *
+        * Consider this feature pre-BETA.
+        *
+        *     CAUTION: This is not for hotplugging your peripherals. As
+        *     SCSI was not designed for this you could damage your
+        *     hardware and thoroughly confuse the SCSI subsystem.
+        *
+        */
+       else if (!strncmp("remove-single-device", buffer + 5, 20)) {
+               p = buffer + 26;
+
+               host = simple_strtoul(p, &p, 0);
+               channel = simple_strtoul(p + 1, &p, 0);
+               id = simple_strtoul(p + 1, &p, 0);
+               lun = simple_strtoul(p + 1, &p, 0);
+
+
+               for (HBA_ptr = scsi_hostlist; HBA_ptr; HBA_ptr = HBA_ptr->next) {
+                       if (HBA_ptr->host_no == host) {
+                               break;
+                       }
+               }
+               err = -ENODEV;
+               if (!HBA_ptr)
+                       goto out;
+
+               for (scd = HBA_ptr->host_queue; scd; scd = scd->next) {
+                       if ((scd->channel == channel
+                            && scd->id == id
+                            && scd->lun == lun)) {
+                               break;
+                       }
+               }
+
+               if (scd == NULL)
+                       goto out;       /* there is no such device attached */
+
+               err = -EBUSY;
+               if (scd->access_count)
+                       goto out;
+
+               SDTpnt = scsi_devicelist;
+               while (SDTpnt != NULL) {
+                       if (SDTpnt->detach)
+                               (*SDTpnt->detach) (scd);
+                       SDTpnt = SDTpnt->next;
+               }
+
+               if (scd->attached == 0) {
+                       /*
+                        * Nobody is using this device any more.
+                        * Free all of the command structures.
+                        */
+                        if (HBA_ptr->hostt->revoke)
+                                HBA_ptr->hostt->revoke(scd);
+                       devfs_unregister (scd->de);
+                       scsi_release_commandblocks(scd);
+
+                       /* Now we can remove the device structure */
+                       if (scd->next != NULL)
+                               scd->next->prev = scd->prev;
+
+                       if (scd->prev != NULL)
+                               scd->prev->next = scd->next;
+
+                       if (HBA_ptr->host_queue == scd) {
+                               HBA_ptr->host_queue = scd->next;
+                       }
+                       blk_cleanup_queue(&scd->request_queue);
+                       kfree((char *) scd);
+               } else {
+                       goto out;
+               }
+               err = 0;
+       }
+out:
+       
+       free_page((unsigned long) buffer);
+       return err;
+}
+#endif
+
+/*
+ * This entry point should be called by a driver if it is trying
+ * to add a low level scsi driver to the system.
+ */
+static int scsi_register_host(Scsi_Host_Template * tpnt)
+{
+       int pcount;
+       struct Scsi_Host *shpnt;
+       Scsi_Device *SDpnt;
+       struct Scsi_Device_Template *sdtpnt;
+       const char *name;
+       unsigned long flags;
+       int out_of_space = 0;
+
+       if (tpnt->next || !tpnt->detect)
+               return 1;       /* Must be already loaded, or
+                                * no detect routine available
+                                */
+
+       /* If max_sectors isn't set, default to max */
+       if (!tpnt->max_sectors)
+               tpnt->max_sectors = MAX_SECTORS;
+
+       pcount = next_scsi_host;
+
+       MOD_INC_USE_COUNT;
+
+       /* The detect routine must carefully spinunlock/spinlock if 
+          it enables interrupts, since all interrupt handlers do 
+          spinlock as well.
+          All lame drivers are going to fail due to the following 
+          spinlock. For the time beeing let's use it only for drivers 
+          using the new scsi code. NOTE: the detect routine could
+          redefine the value tpnt->use_new_eh_code. (DB, 13 May 1998) */
+
+       if (tpnt->use_new_eh_code) {
+               spin_lock_irqsave(&io_request_lock, flags);
+               tpnt->present = tpnt->detect(tpnt);
+               spin_unlock_irqrestore(&io_request_lock, flags);
+       } else
+               tpnt->present = tpnt->detect(tpnt);
+
+       if (tpnt->present) {
+               if (pcount == next_scsi_host) {
+                       if (tpnt->present > 1) {
+                               printk(KERN_ERR "scsi: Failure to register low-level scsi driver");
+                               scsi_unregister_host(tpnt);
+                               return 1;
+                       }
+                       /* 
+                        * The low-level driver failed to register a driver.
+                        * We can do this now.
+                        */
+                       if(scsi_register(tpnt, 0)==NULL)
+                       {
+                               printk(KERN_ERR "scsi: register failed.\n");
+                               scsi_unregister_host(tpnt);
+                               return 1;
+                       }
+               }
+               tpnt->next = scsi_hosts;        /* Add to the linked list */
+               scsi_hosts = tpnt;
+
+               /* Add the new driver to /proc/scsi */
+#ifdef CONFIG_PROC_FS
+               build_proc_dir_entries(tpnt);
+#endif
+
+
+               /*
+                * Add the kernel threads for each host adapter that will
+                * handle error correction.
+                */
+               for (shpnt = scsi_hostlist; shpnt; shpnt = shpnt->next) {
+                       if (shpnt->hostt == tpnt && shpnt->hostt->use_new_eh_code) {
+                               DECLARE_MUTEX_LOCKED(sem);
+
+                               shpnt->eh_notify = &sem;
+                               kernel_thread((int (*)(void *)) scsi_error_handler,
+                                             (void *) shpnt, 0);
+
+                               /*
+                                * Now wait for the kernel error thread to initialize itself
+                                * as it might be needed when we scan the bus.
+                                */
+                               down(&sem);
+                               shpnt->eh_notify = NULL;
+                       }
+               }
+
+               for (shpnt = scsi_hostlist; shpnt; shpnt = shpnt->next) {
+                       if (shpnt->hostt == tpnt) {
+                               if (tpnt->info) {
+                                       name = tpnt->info(shpnt);
+                               } else {
+                                       name = tpnt->name;
+                               }
+                               printk(KERN_INFO "scsi%d : %s\n",               /* And print a little message */
+                                      shpnt->host_no, name);
+                       }
+               }
+
+               /* The next step is to call scan_scsis here.  This generates the
+                * Scsi_Devices entries
+                */
+               for (shpnt = scsi_hostlist; shpnt; shpnt = shpnt->next) {
+                       if (shpnt->hostt == tpnt) {
+                               scan_scsis(shpnt, 0, 0, 0, 0);
+                               if (shpnt->select_queue_depths != NULL) {
+                                       (shpnt->select_queue_depths) (shpnt, shpnt->host_queue);
+                               }
+                       }
+               }
+
+               for (sdtpnt = scsi_devicelist; sdtpnt; sdtpnt = sdtpnt->next) {
+                       if (sdtpnt->init && sdtpnt->dev_noticed)
+                               (*sdtpnt->init) ();
+               }
+
+               /*
+                * Next we create the Scsi_Cmnd structures for this host 
+                */
+               for (shpnt = scsi_hostlist; shpnt; shpnt = shpnt->next) {
+                       for (SDpnt = shpnt->host_queue; SDpnt; SDpnt = SDpnt->next)
+                               if (SDpnt->host->hostt == tpnt) {
+                                       for (sdtpnt = scsi_devicelist; sdtpnt; sdtpnt = sdtpnt->next)
+                                               if (sdtpnt->attach)
+                                                       (*sdtpnt->attach) (SDpnt);
+                                       if (SDpnt->attached) {
+                                               scsi_build_commandblocks(SDpnt);
+                                               if (0 == SDpnt->has_cmdblocks)
+                                                       out_of_space = 1;
+                                       }
+                               }
+               }
+
+               /*
+                * Now that we have all of the devices, resize the DMA pool,
+                * as required.  */
+               if (!out_of_space)
+                       scsi_resize_dma_pool();
+
+
+               /* This does any final handling that is required. */
+               for (sdtpnt = scsi_devicelist; sdtpnt; sdtpnt = sdtpnt->next) {
+                       if (sdtpnt->finish && sdtpnt->nr_dev) {
+                               (*sdtpnt->finish) ();
+                       }
+               }
+       }
+#if defined(USE_STATIC_SCSI_MEMORY)
+       printk("SCSI memory: total %ldKb, used %ldKb, free %ldKb.\n",
+              (scsi_memory_upper_value - scsi_memory_lower_value) / 1024,
+              (scsi_init_memory_start - scsi_memory_lower_value) / 1024,
+              (scsi_memory_upper_value - scsi_init_memory_start) / 1024);
+#endif
+
+       if (out_of_space) {
+               scsi_unregister_host(tpnt);     /* easiest way to clean up?? */
+               return 1;
+       } else
+               return 0;
+}
+
+/*
+ * Similarly, this entry point should be called by a loadable module if it
+ * is trying to remove a low level scsi driver from the system.
+ */
+static int scsi_unregister_host(Scsi_Host_Template * tpnt)
+{
+       int online_status;
+       int pcount0, pcount;
+       Scsi_Cmnd *SCpnt;
+       Scsi_Device *SDpnt;
+       Scsi_Device *SDpnt1;
+       struct Scsi_Device_Template *sdtpnt;
+       struct Scsi_Host *sh1;
+       struct Scsi_Host *shpnt;
+       char name[10];  /* host_no>=10^9? I don't think so. */
+
+       /* get the big kernel lock, so we don't race with open() */
+       lock_kernel();
+
+       /*
+        * First verify that this host adapter is completely free with no pending
+        * commands 
+        */
+       for (shpnt = scsi_hostlist; shpnt; shpnt = shpnt->next) {
+               for (SDpnt = shpnt->host_queue; SDpnt;
+                    SDpnt = SDpnt->next) {
+                       if (SDpnt->host->hostt == tpnt
+                           && SDpnt->host->hostt->module
+                           && GET_USE_COUNT(SDpnt->host->hostt->module))
+                               goto err_out;
+                       /* 
+                        * FIXME(eric) - We need to find a way to notify the
+                        * low level driver that we are shutting down - via the
+                        * special device entry that still needs to get added. 
+                        *
+                        * Is detach interface below good enough for this?
+                        */
+               }
+       }
+
+       /*
+        * FIXME(eric) put a spinlock on this.  We force all of the devices offline
+        * to help prevent race conditions where other hosts/processors could try and
+        * get in and queue a command.
+        */
+       for (shpnt = scsi_hostlist; shpnt; shpnt = shpnt->next) {
+               for (SDpnt = shpnt->host_queue; SDpnt;
+                    SDpnt = SDpnt->next) {
+                       if (SDpnt->host->hostt == tpnt)
+                               SDpnt->online = FALSE;
+
+               }
+       }
+
+       for (shpnt = scsi_hostlist; shpnt; shpnt = shpnt->next) {
+               if (shpnt->hostt != tpnt) {
+                       continue;
+               }
+               for (SDpnt = shpnt->host_queue; SDpnt;
+                    SDpnt = SDpnt->next) {
+                       /*
+                        * Loop over all of the commands associated with the device.  If any of
+                        * them are busy, then set the state back to inactive and bail.
+                        */
+                       for (SCpnt = SDpnt->device_queue; SCpnt;
+                            SCpnt = SCpnt->next) {
+                               online_status = SDpnt->online;
+                               SDpnt->online = FALSE;
+                               if (SCpnt->request.rq_status != RQ_INACTIVE) {
+                                       printk(KERN_ERR "SCSI device not inactive - rq_status=%d, target=%d, pid=%ld, state=%d, owner=%d.\n",
+                                              SCpnt->request.rq_status, SCpnt->target, SCpnt->pid,
+                                            SCpnt->state, SCpnt->owner);
+                                       for (SDpnt1 = shpnt->host_queue; SDpnt1;
+                                            SDpnt1 = SDpnt1->next) {
+                                               for (SCpnt = SDpnt1->device_queue; SCpnt;
+                                                    SCpnt = SCpnt->next)
+                                                       if (SCpnt->request.rq_status == RQ_SCSI_DISCONNECTING)
+                                                               SCpnt->request.rq_status = RQ_INACTIVE;
+                                       }
+                                       SDpnt->online = online_status;
+                                       printk(KERN_ERR "Device busy???\n");
+                                       goto err_out;
+                               }
+                               /*
+                                * No, this device is really free.  Mark it as such, and
+                                * continue on.
+                                */
+                               SCpnt->state = SCSI_STATE_DISCONNECTING;
+                               SCpnt->request.rq_status = RQ_SCSI_DISCONNECTING;       /* Mark as busy */
+                       }
+               }
+       }
+       /* Next we detach the high level drivers from the Scsi_Device structures */
+
+       for (shpnt = scsi_hostlist; shpnt; shpnt = shpnt->next) {
+               if (shpnt->hostt != tpnt) {
+                       continue;
+               }
+               for (SDpnt = shpnt->host_queue; SDpnt;
+                    SDpnt = SDpnt->next) {
+                       for (sdtpnt = scsi_devicelist; sdtpnt; sdtpnt = sdtpnt->next)
+                               if (sdtpnt->detach)
+                                       (*sdtpnt->detach) (SDpnt);
+
+                       /* If something still attached, punt */
+                       if (SDpnt->attached) {
+                               printk(KERN_ERR "Attached usage count = %d\n", SDpnt->attached);
+                               goto err_out;
+                       }
+                       devfs_unregister (SDpnt->de);
+               }
+       }
+
+       /*
+        * Next, kill the kernel error recovery thread for this host.
+        */
+       for (shpnt = scsi_hostlist; shpnt; shpnt = shpnt->next) {
+               if (shpnt->hostt == tpnt
+                   && shpnt->hostt->use_new_eh_code
+                   && shpnt->ehandler != NULL) {
+                       DECLARE_MUTEX_LOCKED(sem);
+
+                       shpnt->eh_notify = &sem;
+                       send_sig(SIGHUP, shpnt->ehandler, 1);
+                       down(&sem);
+                       shpnt->eh_notify = NULL;
+               }
+       }
+
+       /* Next we free up the Scsi_Cmnd structures for this host */
+
+       for (shpnt = scsi_hostlist; shpnt; shpnt = shpnt->next) {
+               if (shpnt->hostt != tpnt) {
+                       continue;
+               }
+               for (SDpnt = shpnt->host_queue; SDpnt;
+                    SDpnt = shpnt->host_queue) {
+                       scsi_release_commandblocks(SDpnt);
+
+                       blk_cleanup_queue(&SDpnt->request_queue);
+                       /* Next free up the Scsi_Device structures for this host */
+                       shpnt->host_queue = SDpnt->next;
+                       kfree((char *) SDpnt);
+
+               }
+       }
+
+       /* Next we go through and remove the instances of the individual hosts
+        * that were detected */
+
+       pcount0 = next_scsi_host;
+       for (shpnt = scsi_hostlist; shpnt; shpnt = sh1) {
+               sh1 = shpnt->next;
+               if (shpnt->hostt != tpnt)
+                       continue;
+               pcount = next_scsi_host;
+               /* Remove the /proc/scsi directory entry */
+               sprintf(name,"%d",shpnt->host_no);
+               remove_proc_entry(name, tpnt->proc_dir);
+               if (tpnt->release)
+                       (*tpnt->release) (shpnt);
+               else {
+                       /* This is the default case for the release function.
+                        * It should do the right thing for most correctly
+                        * written host adapters.
+                        */
+                       if (shpnt->irq)
+                               free_irq(shpnt->irq, NULL);
+                       if (shpnt->dma_channel != 0xff)
+                               free_dma(shpnt->dma_channel);
+                       if (shpnt->io_port && shpnt->n_io_port)
+                               release_region(shpnt->io_port, shpnt->n_io_port);
+               }
+               if (pcount == next_scsi_host)
+                       scsi_unregister(shpnt);
+               tpnt->present--;
+       }
+
+       /*
+        * If there are absolutely no more hosts left, it is safe
+        * to completely nuke the DMA pool.  The resize operation will
+        * do the right thing and free everything.
+        */
+       if (!scsi_hosts)
+               scsi_resize_dma_pool();
+
+       if (pcount0 != next_scsi_host)
+               printk(KERN_INFO "scsi : %d host%s left.\n", next_scsi_host,
+                      (next_scsi_host == 1) ? "" : "s");
+
+#if defined(USE_STATIC_SCSI_MEMORY)
+       printk("SCSI memory: total %ldKb, used %ldKb, free %ldKb.\n",
+              (scsi_memory_upper_value - scsi_memory_lower_value) / 1024,
+              (scsi_init_memory_start - scsi_memory_lower_value) / 1024,
+              (scsi_memory_upper_value - scsi_init_memory_start) / 1024);
+#endif
+
+       /*
+        * Remove it from the linked list and /proc if all
+        * hosts were successfully removed (ie preset == 0)
+        */
+       if (!tpnt->present) {
+               Scsi_Host_Template **SHTp = &scsi_hosts;
+               Scsi_Host_Template *SHT;
+
+               while ((SHT = *SHTp) != NULL) {
+                       if (SHT == tpnt) {
+                               *SHTp = SHT->next;
+                               remove_proc_entry(tpnt->proc_name, proc_scsi);
+                               break;
+                       }
+                       SHTp = &SHT->next;
+               }
+       }
+       MOD_DEC_USE_COUNT;
+
+       unlock_kernel();
+       return 0;
+
+err_out:
+       unlock_kernel();
+       return -1;
+}
+
+static int scsi_unregister_device(struct Scsi_Device_Template *tpnt);
+
+/*
+ * This entry point should be called by a loadable module if it is trying
+ * add a high level scsi driver to the system.
+ */
+static int scsi_register_device_module(struct Scsi_Device_Template *tpnt)
+{
+       Scsi_Device *SDpnt;
+       struct Scsi_Host *shpnt;
+       int out_of_space = 0;
+
+       if (tpnt->next)
+               return 1;
+
+       scsi_register_device(tpnt);
+       /*
+        * First scan the devices that we know about, and see if we notice them.
+        */
+
+       for (shpnt = scsi_hostlist; shpnt; shpnt = shpnt->next) {
+               for (SDpnt = shpnt->host_queue; SDpnt;
+                    SDpnt = SDpnt->next) {
+                       if (tpnt->detect)
+                               SDpnt->detected = (*tpnt->detect) (SDpnt);
+               }
+       }
+
+       /*
+        * If any of the devices would match this driver, then perform the
+        * init function.
+        */
+       if (tpnt->init && tpnt->dev_noticed) {
+               if ((*tpnt->init) ()) {
+                       for (shpnt = scsi_hostlist; shpnt;
+                            shpnt = shpnt->next) {
+                               for (SDpnt = shpnt->host_queue; SDpnt;
+                                    SDpnt = SDpnt->next) {
+                                       SDpnt->detected = 0;
+                               }
+                       }
+                       scsi_deregister_device(tpnt);
+                       return 1;
+               }
+       }
+
+       /*
+        * Now actually connect the devices to the new driver.
+        */
+       for (shpnt = scsi_hostlist; shpnt; shpnt = shpnt->next) {
+               for (SDpnt = shpnt->host_queue; SDpnt;
+                    SDpnt = SDpnt->next) {
+                       SDpnt->attached += SDpnt->detected;
+                       SDpnt->detected = 0;
+                       if (tpnt->attach)
+                               (*tpnt->attach) (SDpnt);
+                       /*
+                        * If this driver attached to the device, and don't have any
+                        * command blocks for this device, allocate some.
+                        */
+                       if (SDpnt->attached && SDpnt->has_cmdblocks == 0) {
+                               SDpnt->online = TRUE;
+                               scsi_build_commandblocks(SDpnt);
+                               if (0 == SDpnt->has_cmdblocks)
+                                       out_of_space = 1;
+                       }
+               }
+       }
+
+       /*
+        * This does any final handling that is required.
+        */
+       if (tpnt->finish && tpnt->nr_dev)
+               (*tpnt->finish) ();
+       if (!out_of_space)
+               scsi_resize_dma_pool();
+       MOD_INC_USE_COUNT;
+
+       if (out_of_space) {
+               scsi_unregister_device(tpnt);   /* easiest way to clean up?? */
+               return 1;
+       } else
+               return 0;
+}
+
+static int scsi_unregister_device(struct Scsi_Device_Template *tpnt)
+{
+       Scsi_Device *SDpnt;
+       struct Scsi_Host *shpnt;
+
+       lock_kernel();
+       /*
+        * If we are busy, this is not going to fly.
+        */
+       if (GET_USE_COUNT(tpnt->module) != 0)
+               goto error_out;
+
+       /*
+        * Next, detach the devices from the driver.
+        */
+
+       for (shpnt = scsi_hostlist; shpnt; shpnt = shpnt->next) {
+               for (SDpnt = shpnt->host_queue; SDpnt;
+                    SDpnt = SDpnt->next) {
+                       if (tpnt->detach)
+                               (*tpnt->detach) (SDpnt);
+                       if (SDpnt->attached == 0) {
+                               SDpnt->online = FALSE;
+
+                               /*
+                                * Nobody is using this device any more.  Free all of the
+                                * command structures.
+                                */
+                               scsi_release_commandblocks(SDpnt);
+                       }
+               }
+       }
+       /*
+        * Extract the template from the linked list.
+        */
+       scsi_deregister_device(tpnt);
+
+       MOD_DEC_USE_COUNT;
+       unlock_kernel();
+       /*
+        * Final cleanup for the driver is done in the driver sources in the
+        * cleanup function.
+        */
+       return 0;
+error_out:
+       unlock_kernel();
+       return -1;
+}
+
+
+/* This function should be called by drivers which needs to register
+ * with the midlevel scsi system. As of 2.4.0-test9pre3 this is our
+ * main device/hosts register function /mathiasen
+ */
+int scsi_register_module(int module_type, void *ptr)
+{
+       switch (module_type) {
+       case MODULE_SCSI_HA:
+               return scsi_register_host((Scsi_Host_Template *) ptr);
+
+               /* Load upper level device handler of some kind */
+       case MODULE_SCSI_DEV:
+#ifdef CONFIG_KMOD
+               if (scsi_hosts == NULL)
+                       request_module("scsi_hostadapter");
+#endif
+               return scsi_register_device_module((struct Scsi_Device_Template *) ptr);
+               /* The rest of these are not yet implemented */
+
+               /* Load constants.o */
+       case MODULE_SCSI_CONST:
+
+               /* Load specialized ioctl handler for some device.  Intended for
+                * cdroms that have non-SCSI2 audio command sets. */
+       case MODULE_SCSI_IOCTL:
+
+       default:
+               return 1;
+       }
+}
+
+/* Reverse the actions taken above
+ */
+int scsi_unregister_module(int module_type, void *ptr)
+{
+       int retval = 0;
+
+       switch (module_type) {
+       case MODULE_SCSI_HA:
+               retval = scsi_unregister_host((Scsi_Host_Template *) ptr);
+               break;
+       case MODULE_SCSI_DEV:
+               retval = scsi_unregister_device((struct Scsi_Device_Template *)ptr);
+               break;
+               /* The rest of these are not yet implemented. */
+       case MODULE_SCSI_CONST:
+       case MODULE_SCSI_IOCTL:
+               break;
+       default:;
+       }
+       return retval;
+}
+
+#ifdef CONFIG_PROC_FS
+/*
+ * Function:    scsi_dump_status
+ *
+ * Purpose:     Brain dump of scsi system, used for problem solving.
+ *
+ * Arguments:   level - used to indicate level of detail.
+ *
+ * Notes:       The level isn't used at all yet, but we need to find some way
+ *              of sensibly logging varying degrees of information.  A quick one-line
+ *              display of each command, plus the status would be most useful.
+ *
+ *              This does depend upon CONFIG_SCSI_LOGGING - I do want some way of turning
+ *              it all off if the user wants a lean and mean kernel.  It would probably
+ *              also be useful to allow the user to specify one single host to be dumped.
+ *              A second argument to the function would be useful for that purpose.
+ *
+ *              FIXME - some formatting of the output into tables would be very handy.
+ */
+static void scsi_dump_status(int level)
+{
+#ifdef CONFIG_SCSI_LOGGING             /* { */
+       int i;
+       struct Scsi_Host *shpnt;
+       Scsi_Cmnd *SCpnt;
+       Scsi_Device *SDpnt;
+       printk(KERN_INFO "Dump of scsi host parameters:\n");
+       i = 0;
+       for (shpnt = scsi_hostlist; shpnt; shpnt = shpnt->next) {
+               printk(KERN_INFO " %d %d %d : %d %d\n",
+                      shpnt->host_failed,
+                      shpnt->host_busy,
+                      atomic_read(&shpnt->host_active),
+                      shpnt->host_blocked,
+                      shpnt->host_self_blocked);
+       }
+
+       printk(KERN_INFO "\n\n");
+       printk(KERN_INFO "Dump of scsi command parameters:\n");
+       for (shpnt = scsi_hostlist; shpnt; shpnt = shpnt->next) {
+               printk(KERN_INFO "h:c:t:l (dev sect nsect cnumsec sg) (ret all flg) (to/cmd to ito) cmd snse result\n");
+               for (SDpnt = shpnt->host_queue; SDpnt; SDpnt = SDpnt->next) {
+                       for (SCpnt = SDpnt->device_queue; SCpnt; SCpnt = SCpnt->next) {
+                               /*  (0) h:c:t:l (dev sect nsect cnumsec sg) (ret all flg) (to/cmd to ito) cmd snse result %d %x      */
+                               printk(KERN_INFO "(%3d) %2d:%1d:%2d:%2d (%6s %4ld %4ld %4ld %4x %1d) (%1d %1d 0x%2x) (%4d %4d %4d) 0x%2.2x 0x%2.2x 0x%8.8x\n",
+                                      i++,
+
+                                      SCpnt->host->host_no,
+                                      SCpnt->channel,
+                                      SCpnt->target,
+                                      SCpnt->lun,
+
+                                      kdevname(SCpnt->request.rq_dev),
+                                      SCpnt->request.sector,
+                                      SCpnt->request.nr_sectors,
+                                      SCpnt->request.current_nr_sectors,
+                                      SCpnt->request.rq_status,
+                                      SCpnt->use_sg,
+
+                                      SCpnt->retries,
+                                      SCpnt->allowed,
+                                      SCpnt->flags,
+
+                                      SCpnt->timeout_per_command,
+                                      SCpnt->timeout,
+                                      SCpnt->internal_timeout,
+
+                                      SCpnt->cmnd[0],
+                                      SCpnt->sense_buffer[2],
+                                      SCpnt->result);
+                       }
+               }
+       }
+
+       for (shpnt = scsi_hostlist; shpnt; shpnt = shpnt->next) {
+               for (SDpnt = shpnt->host_queue; SDpnt; SDpnt = SDpnt->next) {
+                       /* Now dump the request lists for each block device */
+                       printk(KERN_INFO "Dump of pending block device requests\n");
+                       for (i = 0; i < MAX_BLKDEV; i++) {
+                               struct list_head * queue_head;
+
+                               queue_head = &blk_dev[i].request_queue.queue_head;
+                               if (!list_empty(queue_head)) {
+                                       struct request *req;
+                                       struct list_head * entry;
+
+                                       printk(KERN_INFO "%d: ", i);
+                                       entry = queue_head->next;
+                                       do {
+                                               req = blkdev_entry_to_request(entry);
+                                               printk("(%s %d %ld %ld %ld) ",
+                                                  kdevname(req->rq_dev),
+                                                      req->cmd,
+                                                      req->sector,
+                                                      req->nr_sectors,
+                                               req->current_nr_sectors);
+                                       } while ((entry = entry->next) != queue_head);
+                                       printk("\n");
+                               }
+                       }
+               }
+       }
+#endif /* CONFIG_SCSI_LOGGING */ /* } */
+}
+#endif                         /* CONFIG_PROC_FS */
+
+static int __init scsi_host_no_init (char *str)
+{
+    static int next_no = 0;
+    char *temp;
+
+    while (str) {
+       temp = str;
+       while (*temp && (*temp != ':') && (*temp != ','))
+           temp++;
+       if (!*temp)
+           temp = NULL;
+       else
+           *temp++ = 0;
+       scsi_host_no_insert(str, next_no);
+       str = temp;
+       next_no++;
+    }
+    return 1;
+}
+
+static char *scsihosts;
+
+MODULE_PARM(scsihosts, "s");
+MODULE_DESCRIPTION("SCSI core");
+MODULE_LICENSE("GPL");
+
+#ifndef MODULE
+int __init scsi_setup(char *str)
+{
+       scsihosts = str;
+       return 1;
+}
+
+__setup("scsihosts=", scsi_setup);
+#endif
+
+static int __init init_scsi(void)
+{
+       struct proc_dir_entry *generic;
+
+       printk(KERN_INFO "SCSI subsystem driver " REVISION "\n");
+
+        if( scsi_init_minimal_dma_pool() != 0 )
+        {
+                return 1;
+        }
+
+       /*
+        * This makes /proc/scsi and /proc/scsi/scsi visible.
+        */
+#ifdef CONFIG_PROC_FS
+       proc_scsi = proc_mkdir("scsi", 0);
+       if (!proc_scsi) {
+               printk (KERN_ERR "cannot init /proc/scsi\n");
+               return -ENOMEM;
+       }
+       generic = create_proc_info_entry ("scsi/scsi", 0, 0, scsi_proc_info);
+       if (!generic) {
+               printk (KERN_ERR "cannot init /proc/scsi/scsi\n");
+               remove_proc_entry("scsi", 0);
+               return -ENOMEM;
+       }
+       generic->write_proc = proc_scsi_gen_write;
+#endif
+
+        scsi_devfs_handle = devfs_mk_dir (NULL, "scsi", NULL);
+        if (scsihosts)
+               printk(KERN_INFO "scsi: host order: %s\n", scsihosts);  
+       scsi_host_no_init (scsihosts);
+       /*
+        * This is where the processing takes place for most everything
+        * when commands are completed.
+        */
+       init_bh(SCSI_BH, scsi_bottom_half_handler);
+
+       return 0;
+}
+
+static void __exit exit_scsi(void)
+{
+       Scsi_Host_Name *shn, *shn2 = NULL;
+
+       remove_bh(SCSI_BH);
+
+        devfs_unregister (scsi_devfs_handle);
+        for (shn = scsi_host_no_list;shn;shn = shn->next) {
+               if (shn->name)
+                       kfree(shn->name);
+                if (shn2)
+                       kfree (shn2);
+                shn2 = shn;
+        }
+        if (shn2)
+               kfree (shn2);
+
+#ifdef CONFIG_PROC_FS
+       /* No, we're not here anymore. Don't show the /proc/scsi files. */
+       remove_proc_entry ("scsi/scsi", 0);
+       remove_proc_entry ("scsi", 0);
+#endif
+       
+       /*
+        * Free up the DMA pool.
+        */
+       scsi_resize_dma_pool();
+
+}
+
+module_init(init_scsi);
+module_exit(exit_scsi);
+
+/*
+ * Function:    scsi_get_host_dev()
+ *
+ * Purpose:     Create a Scsi_Device that points to the host adapter itself.
+ *
+ * Arguments:   SHpnt   - Host that needs a Scsi_Device
+ *
+ * Lock status: None assumed.
+ *
+ * Returns:     The Scsi_Device or NULL
+ *
+ * Notes:
+ */
+Scsi_Device * scsi_get_host_dev(struct Scsi_Host * SHpnt)
+{
+        Scsi_Device * SDpnt;
+
+        /*
+         * Attach a single Scsi_Device to the Scsi_Host - this should
+         * be made to look like a "pseudo-device" that points to the
+         * HA itself.  For the moment, we include it at the head of
+         * the host_queue itself - I don't think we want to show this
+         * to the HA in select_queue_depths(), as this would probably confuse
+         * matters.
+         * Note - this device is not accessible from any high-level
+         * drivers (including generics), which is probably not
+         * optimal.  We can add hooks later to attach 
+         */
+        SDpnt = (Scsi_Device *) kmalloc(sizeof(Scsi_Device),
+                                        GFP_ATOMIC);
+        if(SDpnt == NULL)
+               return NULL;
+               
+        memset(SDpnt, 0, sizeof(Scsi_Device));
+
+        SDpnt->host = SHpnt;
+        SDpnt->id = SHpnt->this_id;
+        SDpnt->type = -1;
+        SDpnt->queue_depth = 1;
+        
+       scsi_build_commandblocks(SDpnt);
+
+       scsi_initialize_queue(SDpnt, SHpnt);
+
+       SDpnt->online = TRUE;
+
+        /*
+         * Initialize the object that we will use to wait for command blocks.
+         */
+       init_waitqueue_head(&SDpnt->scpnt_wait);
+        return SDpnt;
+}
+
+/*
+ * Function:    scsi_free_host_dev()
+ *
+ * Purpose:     Create a Scsi_Device that points to the host adapter itself.
+ *
+ * Arguments:   SHpnt   - Host that needs a Scsi_Device
+ *
+ * Lock status: None assumed.
+ *
+ * Returns:     Nothing
+ *
+ * Notes:
+ */
+void scsi_free_host_dev(Scsi_Device * SDpnt)
+{
+        if( (unsigned char) SDpnt->id != (unsigned char) SDpnt->host->this_id )
+        {
+                panic("Attempt to delete wrong device\n");
+        }
+
+        blk_cleanup_queue(&SDpnt->request_queue);
+
+        /*
+         * We only have a single SCpnt attached to this device.  Free
+         * it now.
+         */
+       scsi_release_commandblocks(SDpnt);
+        kfree(SDpnt);
+}
+
+/*
+ * Function:   scsi_reset_provider_done_command
+ *
+ * Purpose:    Dummy done routine.
+ *
+ * Notes:      Some low level drivers will call scsi_done and end up here,
+ *             others won't bother.
+ *             We don't want the bogus command used for the bus/device
+ *             reset to find its way into the mid-layer so we intercept
+ *             it here.
+ */
+static void
+scsi_reset_provider_done_command(Scsi_Cmnd *SCpnt)
+{
+}
+
+/*
+ * Function:   scsi_reset_provider
+ *
+ * Purpose:    Send requested reset to a bus or device at any phase.
+ *
+ * Arguments:  device  - device to send reset to
+ *             flag - reset type (see scsi.h)
+ *
+ * Returns:    SUCCESS/FAILURE.
+ *
+ * Notes:      This is used by the SCSI Generic driver to provide
+ *             Bus/Device reset capability.
+ */
+int
+scsi_reset_provider(Scsi_Device *dev, int flag)
+{
+       Scsi_Cmnd SC, *SCpnt = &SC;
+       int rtn;
+
+       memset(&SCpnt->eh_timeout, 0, sizeof(SCpnt->eh_timeout));
+       SCpnt->host                     = dev->host;
+       SCpnt->device                   = dev;
+       SCpnt->target                   = dev->id;
+       SCpnt->lun                      = dev->lun;
+       SCpnt->channel                  = dev->channel;
+       SCpnt->request.rq_status        = RQ_SCSI_BUSY;
+       SCpnt->request.waiting          = NULL;
+       SCpnt->use_sg                   = 0;
+       SCpnt->old_use_sg               = 0;
+       SCpnt->old_cmd_len              = 0;
+       SCpnt->underflow                = 0;
+       SCpnt->transfersize             = 0;
+       SCpnt->resid                    = 0;
+       SCpnt->serial_number            = 0;
+       SCpnt->serial_number_at_timeout = 0;
+       SCpnt->host_scribble            = NULL;
+       SCpnt->next                     = NULL;
+       SCpnt->state                    = SCSI_STATE_INITIALIZING;
+       SCpnt->owner                    = SCSI_OWNER_MIDLEVEL;
+    
+       memset(&SCpnt->cmnd, '\0', sizeof(SCpnt->cmnd));
+    
+       SCpnt->scsi_done                = scsi_reset_provider_done_command;
+       SCpnt->done                     = NULL;
+       SCpnt->reset_chain              = NULL;
+        
+       SCpnt->buffer                   = NULL;
+       SCpnt->bufflen                  = 0;
+       SCpnt->request_buffer           = NULL;
+       SCpnt->request_bufflen          = 0;
+
+       SCpnt->internal_timeout         = NORMAL_TIMEOUT;
+       SCpnt->abort_reason             = DID_ABORT;
+
+       SCpnt->cmd_len                  = 0;
+
+       SCpnt->sc_data_direction        = SCSI_DATA_UNKNOWN;
+       SCpnt->sc_request               = NULL;
+       SCpnt->sc_magic                 = SCSI_CMND_MAGIC;
+
+       /*
+        * Sometimes the command can get back into the timer chain,
+        * so use the pid as an identifier.
+        */
+       SCpnt->pid                      = 0;
+
+       if (dev->host->hostt->use_new_eh_code) {
+               rtn = scsi_new_reset(SCpnt, flag);
+       } else {
+               unsigned long flags;
+
+               spin_lock_irqsave(&io_request_lock, flags);
+               rtn = scsi_old_reset(SCpnt, flag);
+               spin_unlock_irqrestore(&io_request_lock, flags);
+       }
+
+       scsi_delete_timer(SCpnt);
+       return rtn;
+}
+
+/*
+ * Overrides for Emacs so that we follow Linus's tabbing style.
+ * Emacs will notice this stuff at the end of the file and automatically
+ * adjust the settings for this buffer only.  This must remain at the end
+ * of the file.
+ * ---------------------------------------------------------------------------
+ * Local variables:
+ * c-indent-level: 4
+ * c-brace-imaginary-offset: 0
+ * c-brace-offset: -4
+ * c-argdecl-indent: 4
+ * c-label-offset: -4
+ * c-continued-statement-offset: 4
+ * c-continued-brace-offset: 0
+ * indent-tabs-mode: nil
+ * tab-width: 8
+ * End:
+ */
diff --git a/xen-2.4.16/drivers/scsi/scsi.h b/xen-2.4.16/drivers/scsi/scsi.h
new file mode 100644 (file)
index 0000000..f8199d5
--- /dev/null
@@ -0,0 +1,881 @@
+/*
+ *  scsi.h Copyright (C) 1992 Drew Eckhardt 
+ *         Copyright (C) 1993, 1994, 1995, 1998, 1999 Eric Youngdale
+ *  generic SCSI package header file by
+ *      Initial versions: Drew Eckhardt
+ *      Subsequent revisions: Eric Youngdale
+ *
+ *  <drew@colorado.edu>
+ *
+ *       Modified by Eric Youngdale eric@andante.org to
+ *       add scatter-gather, multiple outstanding request, and other
+ *       enhancements.
+ */
+
+#ifndef _SCSI_H
+#define _SCSI_H
+
+#include <xeno/config.h>       /* for CONFIG_SCSI_LOGGING */
+/*#include <xeno/devfs_fs_kernel.h>*/
+/*#include <xeno/proc_fs.h>*/
+
+/*
+ * Some of the public constants are being moved to this file.
+ * We include it here so that what came from where is transparent.
+ */
+#include <scsi/scsi.h>
+
+/*#include <xeno/random.h>*/
+
+#include <asm/hardirq.h>
+#include <asm/scatterlist.h>
+#include <asm/io.h>
+
+/*
+ * These are the values that the SCpnt->sc_data_direction and 
+ * SRpnt->sr_data_direction can take.  These need to be set
+ * The SCSI_DATA_UNKNOWN value is essentially the default.
+ * In the event that the command creator didn't bother to
+ * set a value, you will see SCSI_DATA_UNKNOWN.
+ */
+#define SCSI_DATA_UNKNOWN       0
+#define SCSI_DATA_WRITE         1
+#define SCSI_DATA_READ          2
+#define SCSI_DATA_NONE          3
+
+#ifdef CONFIG_PCI
+#include <xeno/pci.h>
+#if ((SCSI_DATA_UNKNOWN == PCI_DMA_BIDIRECTIONAL) && (SCSI_DATA_WRITE == PCI_DMA_TODEVICE) && (SCSI_DATA_READ == PCI_DMA_FROMDEVICE) && (SCSI_DATA_NONE == PCI_DMA_NONE))
+#define scsi_to_pci_dma_dir(scsi_dir)  ((int)(scsi_dir))
+#else
+extern __inline__ int scsi_to_pci_dma_dir(unsigned char scsi_dir)
+{
+        if (scsi_dir == SCSI_DATA_UNKNOWN)
+                return PCI_DMA_BIDIRECTIONAL;
+        if (scsi_dir == SCSI_DATA_WRITE)
+                return PCI_DMA_TODEVICE;
+        if (scsi_dir == SCSI_DATA_READ)
+                return PCI_DMA_FROMDEVICE;
+        return PCI_DMA_NONE;
+}
+#endif
+#endif
+
+#if defined(CONFIG_SBUS) && !defined(CONFIG_SUN3) && !defined(CONFIG_SUN3X)
+#include <asm/sbus.h>
+#if ((SCSI_DATA_UNKNOWN == SBUS_DMA_BIDIRECTIONAL) && (SCSI_DATA_WRITE == SBUS_DMA_TODEVICE) && (SCSI_DATA_READ == SBUS_DMA_FROMDEVICE) && (SCSI_DATA_NONE == SBUS_DMA_NONE))
+#define scsi_to_sbus_dma_dir(scsi_dir) ((int)(scsi_dir))
+#else
+extern __inline__ int scsi_to_sbus_dma_dir(unsigned char scsi_dir)
+{
+        if (scsi_dir == SCSI_DATA_UNKNOWN)
+                return SBUS_DMA_BIDIRECTIONAL;
+        if (scsi_dir == SCSI_DATA_WRITE)
+                return SBUS_DMA_TODEVICE;
+        if (scsi_dir == SCSI_DATA_READ)
+                return SBUS_DMA_FROMDEVICE;
+        return SBUS_DMA_NONE;
+}
+#endif
+#endif
+
+/*
+ * Some defs, in case these are not defined elsewhere.
+ */
+#ifndef TRUE
+#define TRUE 1
+#endif
+#ifndef FALSE
+#define FALSE 0
+#endif
+
+#define MAX_SCSI_DEVICE_CODE 14
+extern const char *const scsi_device_types[MAX_SCSI_DEVICE_CODE];
+
+#ifdef DEBUG
+#define SCSI_TIMEOUT (5*HZ)
+#else
+#define SCSI_TIMEOUT (2*HZ)
+#endif
+
+/*
+ * Used for debugging the new queueing code.  We want to make sure
+ * that the lock state is consistent with design.  Only do this in
+ * the user space simulator.
+ */
+#define ASSERT_LOCK(_LOCK, _COUNT)
+
+#if defined(CONFIG_SMP) && defined(CONFIG_USER_DEBUG)
+#undef ASSERT_LOCK
+#define ASSERT_LOCK(_LOCK,_COUNT)       \
+        { if( (_LOCK)->lock != _COUNT )   \
+                panic("Lock count inconsistent %s %d\n", __FILE__, __LINE__); \
+                                                                                       }
+#endif
+
+/*
+ *  Use these to separate status msg and our bytes
+ *
+ *  These are set by:
+ *
+ *      status byte = set from target device
+ *      msg_byte    = return status from host adapter itself.
+ *      host_byte   = set by low-level driver to indicate status.
+ *      driver_byte = set by mid-level.
+ */
+#define status_byte(result) (((result) >> 1) & 0x1f)
+#define msg_byte(result)    (((result) >> 8) & 0xff)
+#define host_byte(result)   (((result) >> 16) & 0xff)
+#define driver_byte(result) (((result) >> 24) & 0xff)
+#define suggestion(result)  (driver_byte(result) & SUGGEST_MASK)
+
+#define sense_class(sense)  (((sense) >> 4) & 0x7)
+#define sense_error(sense)  ((sense) & 0xf)
+#define sense_valid(sense)  ((sense) & 0x80);
+
+#define NEEDS_RETRY     0x2001
+#define SUCCESS         0x2002
+#define FAILED          0x2003
+#define QUEUED          0x2004
+#define SOFT_ERROR      0x2005
+#define ADD_TO_MLQUEUE  0x2006
+
+/*
+ * These are the values that scsi_cmd->state can take.
+ */
+#define SCSI_STATE_TIMEOUT         0x1000
+#define SCSI_STATE_FINISHED        0x1001
+#define SCSI_STATE_FAILED          0x1002
+#define SCSI_STATE_QUEUED          0x1003
+#define SCSI_STATE_UNUSED          0x1006
+#define SCSI_STATE_DISCONNECTING   0x1008
+#define SCSI_STATE_INITIALIZING    0x1009
+#define SCSI_STATE_BHQUEUE         0x100a
+#define SCSI_STATE_MLQUEUE         0x100b
+
+/*
+ * These are the values that the owner field can take.
+ * They are used as an indication of who the command belongs to.
+ */
+#define SCSI_OWNER_HIGHLEVEL      0x100
+#define SCSI_OWNER_MIDLEVEL       0x101
+#define SCSI_OWNER_LOWLEVEL       0x102
+#define SCSI_OWNER_ERROR_HANDLER  0x103
+#define SCSI_OWNER_BH_HANDLER     0x104
+#define SCSI_OWNER_NOBODY         0x105
+
+#define COMMAND_SIZE(opcode) scsi_command_size[((opcode) >> 5) & 7]
+
+#define IDENTIFY_BASE       0x80
+#define IDENTIFY(can_disconnect, lun)   (IDENTIFY_BASE |\
+                    ((can_disconnect) ?  0x40 : 0) |\
+                    ((lun) & 0x07))
+
+
+/*
+ * This defines the scsi logging feature.  It is a means by which the
+ * user can select how much information they get about various goings on,
+ * and it can be really useful for fault tracing.  The logging word is divided
+ * into 8 nibbles, each of which describes a loglevel.  The division of things
+ * is somewhat arbitrary, and the division of the word could be changed if it
+ * were really needed for any reason.  The numbers below are the only place where these
+ * are specified.  For a first go-around, 3 bits is more than enough, since this
+ * gives 8 levels of logging (really 7, since 0 is always off).  Cutting to 2 bits
+ * might be wise at some point.
+ */
+
+#define SCSI_LOG_ERROR_SHIFT              0
+#define SCSI_LOG_TIMEOUT_SHIFT            3
+#define SCSI_LOG_SCAN_SHIFT               6
+#define SCSI_LOG_MLQUEUE_SHIFT            9
+#define SCSI_LOG_MLCOMPLETE_SHIFT         12
+#define SCSI_LOG_LLQUEUE_SHIFT            15
+#define SCSI_LOG_LLCOMPLETE_SHIFT         18
+#define SCSI_LOG_HLQUEUE_SHIFT            21
+#define SCSI_LOG_HLCOMPLETE_SHIFT         24
+#define SCSI_LOG_IOCTL_SHIFT              27
+
+#define SCSI_LOG_ERROR_BITS               3
+#define SCSI_LOG_TIMEOUT_BITS             3
+#define SCSI_LOG_SCAN_BITS                3
+#define SCSI_LOG_MLQUEUE_BITS             3
+#define SCSI_LOG_MLCOMPLETE_BITS          3
+#define SCSI_LOG_LLQUEUE_BITS             3
+#define SCSI_LOG_LLCOMPLETE_BITS          3
+#define SCSI_LOG_HLQUEUE_BITS             3
+#define SCSI_LOG_HLCOMPLETE_BITS          3
+#define SCSI_LOG_IOCTL_BITS               3
+
+#if CONFIG_SCSI_LOGGING
+
+#define SCSI_CHECK_LOGGING(SHIFT, BITS, LEVEL, CMD)     \
+{                                                       \
+        unsigned int mask;                              \
+                                                        \
+        mask = (1 << (BITS)) - 1;                       \
+        if( ((scsi_logging_level >> (SHIFT)) & mask) > (LEVEL) ) \
+        {                                               \
+                (CMD);                                  \
+        }                                              \
+}
+
+#define SCSI_SET_LOGGING(SHIFT, BITS, LEVEL)            \
+{                                                       \
+        unsigned int mask;                              \
+                                                        \
+        mask = ((1 << (BITS)) - 1) << SHIFT;            \
+        scsi_logging_level = ((scsi_logging_level & ~mask) \
+                              | ((LEVEL << SHIFT) & mask));     \
+}
+
+
+
+#else
+
+/*
+ * With no logging enabled, stub these out so they don't do anything.
+ */
+#define SCSI_SET_LOGGING(SHIFT, BITS, LEVEL)
+
+#define SCSI_CHECK_LOGGING(SHIFT, BITS, LEVEL, CMD)
+#endif
+
+/*
+ * These are the macros that are actually used throughout the code to
+ * log events.  If logging isn't enabled, they are no-ops and will be
+ * completely absent from the user's code.
+ *
+ * The 'set' versions of the macros are really intended to only be called
+ * from the /proc filesystem, and in production kernels this will be about
+ * all that is ever used.  It could be useful in a debugging environment to
+ * bump the logging level when certain strange events are detected, however.
+ */
+#define SCSI_LOG_ERROR_RECOVERY(LEVEL,CMD)  \
+        SCSI_CHECK_LOGGING(SCSI_LOG_ERROR_SHIFT, SCSI_LOG_ERROR_BITS, LEVEL,CMD);
+#define SCSI_LOG_TIMEOUT(LEVEL,CMD)  \
+        SCSI_CHECK_LOGGING(SCSI_LOG_TIMEOUT_SHIFT, SCSI_LOG_TIMEOUT_BITS, LEVEL,CMD);
+#define SCSI_LOG_SCAN_BUS(LEVEL,CMD)  \
+        SCSI_CHECK_LOGGING(SCSI_LOG_SCAN_SHIFT, SCSI_LOG_SCAN_BITS, LEVEL,CMD);
+#define SCSI_LOG_MLQUEUE(LEVEL,CMD)  \
+        SCSI_CHECK_LOGGING(SCSI_LOG_MLQUEUE_SHIFT, SCSI_LOG_MLQUEUE_BITS, LEVEL,CMD);
+#define SCSI_LOG_MLCOMPLETE(LEVEL,CMD)  \
+        SCSI_CHECK_LOGGING(SCSI_LOG_MLCOMPLETE_SHIFT, SCSI_LOG_MLCOMPLETE_BITS, LEVEL,CMD);
+#define SCSI_LOG_LLQUEUE(LEVEL,CMD)  \
+        SCSI_CHECK_LOGGING(SCSI_LOG_LLQUEUE_SHIFT, SCSI_LOG_LLQUEUE_BITS, LEVEL,CMD);
+#define SCSI_LOG_LLCOMPLETE(LEVEL,CMD)  \
+        SCSI_CHECK_LOGGING(SCSI_LOG_LLCOMPLETE_SHIFT, SCSI_LOG_LLCOMPLETE_BITS, LEVEL,CMD);
+#define SCSI_LOG_HLQUEUE(LEVEL,CMD)  \
+        SCSI_CHECK_LOGGING(SCSI_LOG_HLQUEUE_SHIFT, SCSI_LOG_HLQUEUE_BITS, LEVEL,CMD);
+#define SCSI_LOG_HLCOMPLETE(LEVEL,CMD)  \
+        SCSI_CHECK_LOGGING(SCSI_LOG_HLCOMPLETE_SHIFT, SCSI_LOG_HLCOMPLETE_BITS, LEVEL,CMD);
+#define SCSI_LOG_IOCTL(LEVEL,CMD)  \
+        SCSI_CHECK_LOGGING(SCSI_LOG_IOCTL_SHIFT, SCSI_LOG_IOCTL_BITS, LEVEL,CMD);
+
+
+#define SCSI_SET_ERROR_RECOVERY_LOGGING(LEVEL)  \
+        SCSI_SET_LOGGING(SCSI_LOG_ERROR_SHIFT, SCSI_LOG_ERROR_BITS, LEVEL);
+#define SCSI_SET_TIMEOUT_LOGGING(LEVEL)  \
+        SCSI_SET_LOGGING(SCSI_LOG_TIMEOUT_SHIFT, SCSI_LOG_TIMEOUT_BITS, LEVEL);
+#define SCSI_SET_SCAN_BUS_LOGGING(LEVEL)  \
+        SCSI_SET_LOGGING(SCSI_LOG_SCAN_SHIFT, SCSI_LOG_SCAN_BITS, LEVEL);
+#define SCSI_SET_MLQUEUE_LOGGING(LEVEL)  \
+        SCSI_SET_LOGGING(SCSI_LOG_MLQUEUE_SHIFT, SCSI_LOG_MLQUEUE_BITS, LEVEL);
+#define SCSI_SET_MLCOMPLETE_LOGGING(LEVEL)  \
+        SCSI_SET_LOGGING(SCSI_LOG_MLCOMPLETE_SHIFT, SCSI_LOG_MLCOMPLETE_BITS, LEVEL);
+#define SCSI_SET_LLQUEUE_LOGGING(LEVEL)  \
+        SCSI_SET_LOGGING(SCSI_LOG_LLQUEUE_SHIFT, SCSI_LOG_LLQUEUE_BITS, LEVEL);
+#define SCSI_SET_LLCOMPLETE_LOGGING(LEVEL)  \
+        SCSI_SET_LOGGING(SCSI_LOG_LLCOMPLETE_SHIFT, SCSI_LOG_LLCOMPLETE_BITS, LEVEL);
+#define SCSI_SET_HLQUEUE_LOGGING(LEVEL)  \
+        SCSI_SET_LOGGING(SCSI_LOG_HLQUEUE_SHIFT, SCSI_LOG_HLQUEUE_BITS, LEVEL);
+#define SCSI_SET_HLCOMPLETE_LOGGING(LEVEL)  \
+        SCSI_SET_LOGGING(SCSI_LOG_HLCOMPLETE_SHIFT, SCSI_LOG_HLCOMPLETE_BITS, LEVEL);
+#define SCSI_SET_IOCTL_LOGGING(LEVEL)  \
+        SCSI_SET_LOGGING(SCSI_LOG_IOCTL_SHIFT, SCSI_LOG_IOCTL_BITS, LEVEL);
+
+/*
+ *  the return of the status word will be in the following format :
+ *  The low byte is the status returned by the SCSI command, 
+ *  with vendor specific bits masked.
+ *  
+ *  The next byte is the message which followed the SCSI status.
+ *  This allows a stos to be used, since the Intel is a little
+ *  endian machine.
+ *  
+ *  The final byte is a host return code, which is one of the following.
+ *  
+ *  IE 
+ *  lsb     msb
+ *  status  msg host code   
+ *  
+ *  Our errors returned by OUR driver, NOT SCSI message.  Or'd with
+ *  SCSI message passed back to driver <IF any>.
+ */
+
+
+#define DID_OK          0x00   /* NO error                                */
+#define DID_NO_CONNECT  0x01   /* Couldn't connect before timeout period  */
+#define DID_BUS_BUSY    0x02   /* BUS stayed busy through time out period */
+#define DID_TIME_OUT    0x03   /* TIMED OUT for other reason              */
+#define DID_BAD_TARGET  0x04   /* BAD target.                             */
+#define DID_ABORT       0x05   /* Told to abort for some other reason     */
+#define DID_PARITY      0x06   /* Parity error                            */
+#define DID_ERROR       0x07   /* Internal error                          */
+#define DID_RESET       0x08   /* Reset by somebody.                      */
+#define DID_BAD_INTR    0x09   /* Got an interrupt we weren't expecting.  */
+#define DID_PASSTHROUGH 0x0a   /* Force command past mid-layer            */
+#define DID_SOFT_ERROR  0x0b   /* The low level driver just wish a retry  */
+#define DRIVER_OK       0x00   /* Driver status                           */
+
+/*
+ *  These indicate the error that occurred, and what is available.
+ */
+
+#define DRIVER_BUSY         0x01
+#define DRIVER_SOFT         0x02
+#define DRIVER_MEDIA        0x03
+#define DRIVER_ERROR        0x04
+
+#define DRIVER_INVALID      0x05
+#define DRIVER_TIMEOUT      0x06
+#define DRIVER_HARD         0x07
+#define DRIVER_SENSE       0x08
+
+#define SUGGEST_RETRY       0x10
+#define SUGGEST_ABORT       0x20
+#define SUGGEST_REMAP       0x30
+#define SUGGEST_DIE         0x40
+#define SUGGEST_SENSE       0x80
+#define SUGGEST_IS_OK       0xff
+
+#define DRIVER_MASK         0x0f
+#define SUGGEST_MASK        0xf0
+
+#define MAX_COMMAND_SIZE    16
+#define SCSI_SENSE_BUFFERSIZE   64
+
+/*
+ *  SCSI command sets
+ */
+
+#define SCSI_UNKNOWN    0
+#define SCSI_1          1
+#define SCSI_1_CCS      2
+#define SCSI_2          3
+#define SCSI_3          4
+
+/*
+ *  Every SCSI command starts with a one byte OP-code.
+ *  The next byte's high three bits are the LUN of the
+ *  device.  Any multi-byte quantities are stored high byte
+ *  first, and may have a 5 bit MSB in the same byte
+ *  as the LUN.
+ */
+
+/*
+ *  As the scsi do command functions are intelligent, and may need to
+ *  redo a command, we need to keep track of the last command
+ *  executed on each one.
+ */
+
+#define WAS_RESET       0x01
+#define WAS_TIMEDOUT    0x02
+#define WAS_SENSE       0x04
+#define IS_RESETTING    0x08
+#define IS_ABORTING     0x10
+#define ASKED_FOR_SENSE 0x20
+#define SYNC_RESET      0x40
+
+#if defined(__mc68000__) || defined(CONFIG_APUS)
+#include <asm/pgtable.h>
+#define CONTIGUOUS_BUFFERS(X,Y) \
+       (virt_to_phys((X)->b_data+(X)->b_size-1)+1==virt_to_phys((Y)->b_data))
+#else
+#define CONTIGUOUS_BUFFERS(X,Y) ((X->b_data+X->b_size) == Y->b_data)
+#endif
+
+
+/*
+ * This is the crap from the old error handling code.  We have it in a special
+ * place so that we can more easily delete it later on.
+ */
+#include "scsi_obsolete.h"
+
+/*
+ * Add some typedefs so that we can prototyope a bunch of the functions.
+ */
+typedef struct scsi_device Scsi_Device;
+typedef struct scsi_cmnd Scsi_Cmnd;
+typedef struct scsi_request Scsi_Request;
+
+#define SCSI_CMND_MAGIC 0xE25C23A5
+#define SCSI_REQ_MAGIC  0x75F6D354
+
+/*
+ * Here is where we prototype most of the mid-layer.
+ */
+
+/*
+ *  Initializes all SCSI devices.  This scans all scsi busses.
+ */
+
+extern unsigned int scsi_logging_level;                /* What do we log? */
+extern unsigned int scsi_dma_free_sectors;     /* How much room do we have left */
+extern unsigned int scsi_need_isa_buffer;      /* True if some devices need indirection
+                                                  * buffers */
+extern volatile int in_scan_scsis;
+extern const unsigned char scsi_command_size[8];
+
+
+/*
+ * These are the error handling functions defined in scsi_error.c
+ */
+extern void scsi_times_out(Scsi_Cmnd * SCpnt);
+extern void scsi_add_timer(Scsi_Cmnd * SCset, int timeout,
+                          void (*complete) (Scsi_Cmnd *));
+extern int scsi_delete_timer(Scsi_Cmnd * SCset);
+extern void scsi_error_handler(void *host);
+extern int scsi_sense_valid(Scsi_Cmnd *);
+extern int scsi_decide_disposition(Scsi_Cmnd * SCpnt);
+extern int scsi_block_when_processing_errors(Scsi_Device *);
+extern void scsi_sleep(int);
+
+/*
+ * Prototypes for functions in scsicam.c
+ */
+extern int  scsi_partsize(struct buffer_head *bh, unsigned long capacity,
+                    unsigned int *cyls, unsigned int *hds,
+                    unsigned int *secs);
+
+/*
+ * Prototypes for functions in scsi_dma.c
+ */
+void scsi_resize_dma_pool(void);
+int scsi_init_minimal_dma_pool(void);
+void *scsi_malloc(unsigned int);
+int scsi_free(void *, unsigned int);
+
+/*
+ * Prototypes for functions in scsi_merge.c
+ */
+extern void recount_segments(Scsi_Cmnd * SCpnt);
+extern void initialize_merge_fn(Scsi_Device * SDpnt);
+
+/*
+ * Prototypes for functions in scsi_queue.c
+ */
+extern int scsi_mlqueue_insert(Scsi_Cmnd * cmd, int reason);
+
+/*
+ * Prototypes for functions in scsi_lib.c
+ */
+extern int scsi_maybe_unblock_host(Scsi_Device * SDpnt);
+extern Scsi_Cmnd *scsi_end_request(Scsi_Cmnd * SCpnt, int uptodate,
+                                  int sectors);
+extern struct Scsi_Device_Template *scsi_get_request_dev(struct request *);
+extern int scsi_init_cmd_errh(Scsi_Cmnd * SCpnt);
+extern int scsi_insert_special_cmd(Scsi_Cmnd * SCpnt, int);
+extern void scsi_io_completion(Scsi_Cmnd * SCpnt, int good_sectors,
+                              int block_sectors);
+extern void scsi_queue_next_request(request_queue_t * q, Scsi_Cmnd * SCpnt);
+extern void scsi_request_fn(request_queue_t * q);
+extern int scsi_starvation_completion(Scsi_Device * SDpnt);
+
+/*
+ * Prototypes for functions in scsi.c
+ */
+extern int scsi_dispatch_cmd(Scsi_Cmnd * SCpnt);
+extern void scsi_bottom_half_handler(void);
+extern void scsi_release_commandblocks(Scsi_Device * SDpnt);
+extern void scsi_build_commandblocks(Scsi_Device * SDpnt);
+extern void scsi_done(Scsi_Cmnd * SCpnt);
+extern void scsi_finish_command(Scsi_Cmnd *);
+extern int scsi_retry_command(Scsi_Cmnd *);
+extern Scsi_Cmnd *scsi_allocate_device(Scsi_Device *, int, int);
+extern void __scsi_release_command(Scsi_Cmnd *);
+extern void scsi_release_command(Scsi_Cmnd *);
+extern void scsi_do_cmd(Scsi_Cmnd *, const void *cmnd,
+                       void *buffer, unsigned bufflen,
+                       void (*done) (struct scsi_cmnd *),
+                       int timeout, int retries);
+extern int scsi_dev_init(void);
+
+/*
+ * Newer request-based interfaces.
+ */
+extern Scsi_Request *scsi_allocate_request(Scsi_Device *);
+extern void scsi_release_request(Scsi_Request *);
+extern void scsi_wait_req(Scsi_Request *, const void *cmnd,
+                         void *buffer, unsigned bufflen,
+                         int timeout, int retries);
+
+extern void scsi_do_req(Scsi_Request *, const void *cmnd,
+                       void *buffer, unsigned bufflen,
+                       void (*done) (struct scsi_cmnd *),
+                       int timeout, int retries);
+extern int scsi_insert_special_req(Scsi_Request * SRpnt, int);
+extern void scsi_init_cmd_from_req(Scsi_Cmnd *, Scsi_Request *);
+
+
+/*
+ * Prototypes for functions/data in hosts.c
+ */
+extern int max_scsi_hosts;
+
+/*
+ * Prototypes for functions in scsi_proc.c
+ */
+extern void proc_print_scsidevice(Scsi_Device *, char *, int *, int);
+extern struct proc_dir_entry *proc_scsi;
+
+/*
+ * Prototypes for functions in constants.c
+ */
+extern void print_command(unsigned char *);
+extern void print_sense(const char *, Scsi_Cmnd *);
+extern void print_req_sense(const char *, Scsi_Request *);
+extern void print_driverbyte(int scsiresult);
+extern void print_hostbyte(int scsiresult);
+extern void print_status (int status);
+
+/*
+ *  The scsi_device struct contains what we know about each given scsi
+ *  device.
+ *
+ * FIXME(eric) - one of the great regrets that I have is that I failed to define
+ * these structure elements as something like sdev_foo instead of foo.  This would
+ * make it so much easier to grep through sources and so forth.  I propose that
+ * all new elements that get added to these structures follow this convention.
+ * As time goes on and as people have the stomach for it, it should be possible to 
+ * go back and retrofit at least some of the elements here with with the prefix.
+ */
+
+struct scsi_device {
+/* private: */
+       /*
+        * This information is private to the scsi mid-layer.  Wrapping it in a
+        * struct private is a way of marking it in a sort of C++ type of way.
+        */
+       struct scsi_device *next;       /* Used for linked list */
+       struct scsi_device *prev;       /* Used for linked list */
+       wait_queue_head_t   scpnt_wait; /* Used to wait if
+                                          device is busy */
+       struct Scsi_Host *host;
+       request_queue_t request_queue;
+        atomic_t                device_active; /* commands checked out for device */
+       volatile unsigned short device_busy;    /* commands actually active on low-level */
+       int (*scsi_init_io_fn) (Scsi_Cmnd *);   /* Used to initialize
+                                                  new request */
+       Scsi_Cmnd *device_queue;        /* queue of SCSI Command structures */
+
+/* public: */
+       unsigned int id, lun, channel;
+
+       unsigned int manufacturer;      /* Manufacturer of device, for using 
+                                        * vendor-specific cmd's */
+       unsigned sector_size;   /* size in bytes */
+
+       int attached;           /* # of high level drivers attached to this */
+       int detected;           /* Delta attached - don't use in drivers! */
+       int access_count;       /* Count of open channels/mounts */
+
+       void *hostdata;         /* available to low-level driver */
+       devfs_handle_t de;      /* directory for the device      */
+       char type;
+       char scsi_level;
+       char vendor[8], model[16], rev[4];
+       unsigned char current_tag;      /* current tag */
+       unsigned char sync_min_period;  /* Not less than this period */
+       unsigned char sync_max_offset;  /* Not greater than this offset */
+       unsigned char queue_depth;      /* How deep a queue to use */
+
+       unsigned online:1;
+       unsigned writeable:1;
+       unsigned removable:1;
+       unsigned random:1;
+       unsigned has_cmdblocks:1;
+       unsigned changed:1;     /* Data invalid due to media change */
+       unsigned busy:1;        /* Used to prevent races */
+       unsigned lockable:1;    /* Able to prevent media removal */
+       unsigned borken:1;      /* Tell the Seagate driver to be 
+                                * painfully slow on this device */
+       unsigned tagged_supported:1;    /* Supports SCSI-II tagged queuing */
+       unsigned tagged_queue:1;        /* SCSI-II tagged queuing enabled */
+       unsigned disconnect:1;  /* can disconnect */
+       unsigned soft_reset:1;  /* Uses soft reset option */
+       unsigned sync:1;        /* Negotiate for sync transfers */
+       unsigned wide:1;        /* Negotiate for WIDE transfers */
+       unsigned single_lun:1;  /* Indicates we should only allow I/O to
+                                * one of the luns for the device at a 
+                                * time. */
+       unsigned was_reset:1;   /* There was a bus reset on the bus for 
+                                * this device */
+       unsigned expecting_cc_ua:1;     /* Expecting a CHECK_CONDITION/UNIT_ATTN
+                                        * because we did a bus reset. */
+       unsigned device_blocked:1;      /* Device returned QUEUE_FULL. */
+       unsigned ten:1;         /* support ten byte read / write */
+       unsigned remap:1;       /* support remapping  */
+       unsigned starved:1;     /* unable to process commands because
+                                  host busy */
+
+       // Flag to allow revalidate to succeed in sd_open
+       int allow_revalidate;
+};
+
+
+/*
+ * The Scsi_Cmnd structure is used by scsi.c internally, and for communication
+ * with low level drivers that support multiple outstanding commands.
+ */
+typedef struct scsi_pointer {
+       char *ptr;              /* data pointer */
+       int this_residual;      /* left in this buffer */
+       struct scatterlist *buffer;     /* which buffer */
+       int buffers_residual;   /* how many buffers left */
+
+        dma_addr_t dma_handle;
+
+       volatile int Status;
+       volatile int Message;
+       volatile int have_data_in;
+       volatile int sent_command;
+       volatile int phase;
+} Scsi_Pointer;
+
+/*
+ * This is essentially a slimmed down version of Scsi_Cmnd.  The point of
+ * having this is that requests that are injected into the queue as result
+ * of things like ioctls and character devices shouldn't be using a
+ * Scsi_Cmnd until such a time that the command is actually at the head
+ * of the queue and being sent to the driver.
+ */
+struct scsi_request {
+       int     sr_magic;
+       int     sr_result;      /* Status code from lower level driver */
+       unsigned char sr_sense_buffer[SCSI_SENSE_BUFFERSIZE];           /* obtained by REQUEST SENSE
+                                                * when CHECK CONDITION is
+                                                * received on original command 
+                                                * (auto-sense) */
+
+       struct Scsi_Host *sr_host;
+       Scsi_Device *sr_device;
+       Scsi_Cmnd *sr_command;
+       struct request sr_request;      /* A copy of the command we are
+                                  working on */
+       unsigned sr_bufflen;    /* Size of data buffer */
+       void *sr_buffer;                /* Data buffer */
+       int sr_allowed;
+       unsigned char sr_data_direction;
+       unsigned char sr_cmd_len;
+       unsigned char sr_cmnd[MAX_COMMAND_SIZE];
+       void (*sr_done) (struct scsi_cmnd *);   /* Mid-level done function */
+       int sr_timeout_per_command;
+       unsigned short sr_use_sg;       /* Number of pieces of scatter-gather */
+       unsigned short sr_sglist_len;   /* size of malloc'd scatter-gather list */
+       unsigned sr_underflow;  /* Return error if less than
+                                  this amount is transferred */
+};
+
+/*
+ * FIXME(eric) - one of the great regrets that I have is that I failed to define
+ * these structure elements as something like sc_foo instead of foo.  This would
+ * make it so much easier to grep through sources and so forth.  I propose that
+ * all new elements that get added to these structures follow this convention.
+ * As time goes on and as people have the stomach for it, it should be possible to 
+ * go back and retrofit at least some of the elements here with with the prefix.
+ */
+struct scsi_cmnd {
+       int     sc_magic;
+/* private: */
+       /*
+        * This information is private to the scsi mid-layer.  Wrapping it in a
+        * struct private is a way of marking it in a sort of C++ type of way.
+        */
+       struct Scsi_Host *host;
+       unsigned short state;
+       unsigned short owner;
+       Scsi_Device *device;
+       Scsi_Request *sc_request;
+       struct scsi_cmnd *next;
+       struct scsi_cmnd *reset_chain;
+
+       int eh_state;           /* Used for state tracking in error handlr */
+       void (*done) (struct scsi_cmnd *);      /* Mid-level done function */
+       /*
+          A SCSI Command is assigned a nonzero serial_number when internal_cmnd
+          passes it to the driver's queue command function.  The serial_number
+          is cleared when scsi_done is entered indicating that the command has
+          been completed.  If a timeout occurs, the serial number at the moment
+          of timeout is copied into serial_number_at_timeout.  By subsequently
+          comparing the serial_number and serial_number_at_timeout fields
+          during abort or reset processing, we can detect whether the command
+          has already completed.  This also detects cases where the command has
+          completed and the SCSI Command structure has already being reused
+          for another command, so that we can avoid incorrectly aborting or
+          resetting the new command.
+        */
+
+       unsigned long serial_number;
+       unsigned long serial_number_at_timeout;
+
+       int retries;
+       int allowed;
+       int timeout_per_command;
+       int timeout_total;
+       int timeout;
+
+       /*
+        * We handle the timeout differently if it happens when a reset, 
+        * abort, etc are in process. 
+        */
+       unsigned volatile char internal_timeout;
+       struct scsi_cmnd *bh_next;      /* To enumerate the commands waiting 
+                                          to be processed. */
+
+/* public: */
+
+       unsigned int target;
+       unsigned int lun;
+       unsigned int channel;
+       unsigned char cmd_len;
+       unsigned char old_cmd_len;
+       unsigned char sc_data_direction;
+       unsigned char sc_old_data_direction;
+
+       /* These elements define the operation we are about to perform */
+       unsigned char cmnd[MAX_COMMAND_SIZE];
+       unsigned request_bufflen;       /* Actual request size */
+
+       struct timer_list eh_timeout;   /* Used to time out the command. */
+       void *request_buffer;           /* Actual requested buffer */
+        void **bounce_buffers;         /* Array of bounce buffers when using scatter-gather */
+
+       /* These elements define the operation we ultimately want to perform */
+       unsigned char data_cmnd[MAX_COMMAND_SIZE];
+       unsigned short old_use_sg;      /* We save  use_sg here when requesting
+                                        * sense info */
+       unsigned short use_sg;  /* Number of pieces of scatter-gather */
+       unsigned short sglist_len;      /* size of malloc'd scatter-gather list */
+       unsigned short abort_reason;    /* If the mid-level code requests an
+                                        * abort, this is the reason. */
+       unsigned bufflen;       /* Size of data buffer */
+       void *buffer;           /* Data buffer */
+
+       unsigned underflow;     /* Return error if less than
+                                  this amount is transferred */
+       unsigned old_underflow; /* save underflow here when reusing the
+                                * command for error handling */
+
+       unsigned transfersize;  /* How much we are guaranteed to
+                                  transfer with each SCSI transfer
+                                  (ie, between disconnect / 
+                                  reconnects.   Probably == sector
+                                  size */
+
+       int resid;              /* Number of bytes requested to be
+                                  transferred less actual number
+                                  transferred (0 if not supported) */
+
+       struct request request; /* A copy of the command we are
+                                  working on */
+
+       unsigned char sense_buffer[SCSI_SENSE_BUFFERSIZE];              /* obtained by REQUEST SENSE
+                                                * when CHECK CONDITION is
+                                                * received on original command 
+                                                * (auto-sense) */
+
+       unsigned flags;
+
+       /*
+        * Used to indicate that a command which has timed out also
+        * completed normally.  Typically the completion function will
+        * do nothing but set this flag in this instance because the
+        * timeout handler is already running.
+        */
+       unsigned done_late:1;
+
+       /* Low-level done function - can be used by low-level driver to point
+        *        to completion function.  Not used by mid/upper level code. */
+       void (*scsi_done) (struct scsi_cmnd *);
+
+       /*
+        * The following fields can be written to by the host specific code. 
+        * Everything else should be left alone. 
+        */
+
+       Scsi_Pointer SCp;       /* Scratchpad used by some host adapters */
+
+       unsigned char *host_scribble;   /* The host adapter is allowed to
+                                          * call scsi_malloc and get some memory
+                                          * and hang it here.     The host adapter
+                                          * is also expected to call scsi_free
+                                          * to release this memory.  (The memory
+                                          * obtained by scsi_malloc is guaranteed
+                                          * to be at an address < 16Mb). */
+
+       int result;             /* Status code from lower level driver */
+
+       unsigned char tag;      /* SCSI-II queued command tag */
+       unsigned long pid;      /* Process ID, starts at 0 */
+};
+
+/*
+ *  Flag bit for the internal_timeout array
+ */
+#define NORMAL_TIMEOUT 0
+
+/*
+ * Definitions and prototypes used for scsi mid-level queue.
+ */
+#define SCSI_MLQUEUE_HOST_BUSY   0x1055
+#define SCSI_MLQUEUE_DEVICE_BUSY 0x1056
+
+#define SCSI_SLEEP(QUEUE, CONDITION) {             \
+    if (CONDITION) {                               \
+       DECLARE_WAITQUEUE(wait, current);           \
+       add_wait_queue(QUEUE, &wait);               \
+       for(;;) {                                   \
+       set_current_state(TASK_UNINTERRUPTIBLE);    \
+       if (CONDITION) {                            \
+            if (in_interrupt())                            \
+               panic("scsi: trying to call schedule() in interrupt" \
+                     ", file %s, line %d.\n", __FILE__, __LINE__);  \
+           schedule();                 \
+        }                              \
+       else                            \
+           break;                      \
+       }                               \
+       remove_wait_queue(QUEUE, &wait);\
+       current->state = TASK_RUNNING;  \
+    }; }
+
+/*
+ * old style reset request from external source
+ * (private to sg.c and scsi_error.c, supplied by scsi_obsolete.c)
+ */
+#define SCSI_TRY_RESET_DEVICE  1
+#define SCSI_TRY_RESET_BUS     2
+#define SCSI_TRY_RESET_HOST    3
+
+extern int scsi_reset_provider(Scsi_Device *, int);
+
+#endif
+
+/*
+ * Overrides for Emacs so that we follow Linus's tabbing style.
+ * Emacs will notice this stuff at the end of the file and automatically
+ * adjust the settings for this buffer only.  This must remain at the end
+ * of the file.
+ * ---------------------------------------------------------------------------
+ * Local variables:
+ * c-indent-level: 4 
+ * c-brace-imaginary-offset: 0
+ * c-brace-offset: -4
+ * c-argdecl-indent: 4
+ * c-label-offset: -4
+ * c-continued-statement-offset: 4
+ * c-continued-brace-offset: 0
+ * indent-tabs-mode: nil
+ * tab-width: 8
+ * End:
+ */
diff --git a/xen-2.4.16/drivers/scsi/scsi_dma.c b/xen-2.4.16/drivers/scsi/scsi_dma.c
new file mode 100644 (file)
index 0000000..5594828
--- /dev/null
@@ -0,0 +1,451 @@
+/*
+ *  scsi_dma.c Copyright (C) 2000 Eric Youngdale
+ *
+ *  mid-level SCSI DMA bounce buffer allocator
+ *
+ */
+
+#define __NO_VERSION__
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/blk.h>
+
+
+#include "scsi.h"
+#include "hosts.h"
+#include "constants.h"
+
+#ifdef CONFIG_KMOD
+#include <linux/kmod.h>
+#endif
+
+/*
+ * PAGE_SIZE must be a multiple of the sector size (512).  True
+ * for all reasonably recent architectures (even the VAX...).
+ */
+#define SECTOR_SIZE            512
+#define SECTORS_PER_PAGE       (PAGE_SIZE/SECTOR_SIZE)
+
+#if SECTORS_PER_PAGE <= 8
+typedef unsigned char FreeSectorBitmap;
+#elif SECTORS_PER_PAGE <= 32
+typedef unsigned int FreeSectorBitmap;
+#else
+#error You lose.
+#endif
+
+/*
+ * Used for access to internal allocator used for DMA safe buffers.
+ */
+static spinlock_t allocator_request_lock = SPIN_LOCK_UNLOCKED;
+
+static FreeSectorBitmap *dma_malloc_freelist = NULL;
+static int need_isa_bounce_buffers;
+static unsigned int dma_sectors = 0;
+unsigned int scsi_dma_free_sectors = 0;
+unsigned int scsi_need_isa_buffer = 0;
+static unsigned char **dma_malloc_pages = NULL;
+
+/*
+ * Function:    scsi_malloc
+ *
+ * Purpose:     Allocate memory from the DMA-safe pool.
+ *
+ * Arguments:   len       - amount of memory we need.
+ *
+ * Lock status: No locks assumed to be held.  This function is SMP-safe.
+ *
+ * Returns:     Pointer to memory block.
+ *
+ * Notes:       Prior to the new queue code, this function was not SMP-safe.
+ *              This function can only allocate in units of sectors
+ *              (i.e. 512 bytes).
+ *
+ *              We cannot use the normal system allocator becuase we need
+ *              to be able to guarantee that we can process a complete disk
+ *              I/O request without touching the system allocator.  Think
+ *              about it - if the system were heavily swapping, and tried to
+ *              write out a block of memory to disk, and the SCSI code needed
+ *              to allocate more memory in order to be able to write the
+ *              data to disk, you would wedge the system.
+ */
+void *scsi_malloc(unsigned int len)
+{
+       unsigned int nbits, mask;
+       unsigned long flags;
+
+       int i, j;
+       if (len % SECTOR_SIZE != 0 || len > PAGE_SIZE)
+               return NULL;
+
+       nbits = len >> 9;
+       mask = (1 << nbits) - 1;
+
+       spin_lock_irqsave(&allocator_request_lock, flags);
+
+       for (i = 0; i < dma_sectors / SECTORS_PER_PAGE; i++)
+               for (j = 0; j <= SECTORS_PER_PAGE - nbits; j++) {
+                       if ((dma_malloc_freelist[i] & (mask << j)) == 0) {
+                               dma_malloc_freelist[i] |= (mask << j);
+                               scsi_dma_free_sectors -= nbits;
+#ifdef DEBUG
+                               SCSI_LOG_MLQUEUE(3, printk("SMalloc: %d %p [From:%p]\n", len, dma_malloc_pages[i] + (j << 9)));
+                               printk("SMalloc: %d %p [From:%p]\n", len, dma_malloc_pages[i] + (j << 9));
+#endif
+                               spin_unlock_irqrestore(&allocator_request_lock, flags);
+                               return (void *) ((unsigned long) dma_malloc_pages[i] + (j << 9));
+                       }
+               }
+       spin_unlock_irqrestore(&allocator_request_lock, flags);
+       return NULL;            /* Nope.  No more */
+}
+
+/*
+ * Function:    scsi_free
+ *
+ * Purpose:     Free memory into the DMA-safe pool.
+ *
+ * Arguments:   ptr       - data block we are freeing.
+ *              len       - size of block we are freeing.
+ *
+ * Lock status: No locks assumed to be held.  This function is SMP-safe.
+ *
+ * Returns:     Nothing
+ *
+ * Notes:       This function *must* only be used to free memory
+ *              allocated from scsi_malloc().
+ *
+ *              Prior to the new queue code, this function was not SMP-safe.
+ *              This function can only allocate in units of sectors
+ *              (i.e. 512 bytes).
+ */
+int scsi_free(void *obj, unsigned int len)
+{
+       unsigned int page, sector, nbits, mask;
+       unsigned long flags;
+
+#ifdef DEBUG
+       unsigned long ret = 0;
+
+#ifdef __mips__
+       __asm__ __volatile__("move\t%0,$31":"=r"(ret));
+#else
+       ret = __builtin_return_address(0);
+#endif
+       printk("scsi_free %p %d\n", obj, len);
+       SCSI_LOG_MLQUEUE(3, printk("SFree: %p %d\n", obj, len));
+#endif
+
+       spin_lock_irqsave(&allocator_request_lock, flags);
+
+       for (page = 0; page < dma_sectors / SECTORS_PER_PAGE; page++) {
+               unsigned long page_addr = (unsigned long) dma_malloc_pages[page];
+               if ((unsigned long) obj >= page_addr &&
+                   (unsigned long) obj < page_addr + PAGE_SIZE) {
+                       sector = (((unsigned long) obj) - page_addr) >> 9;
+
+                       nbits = len >> 9;
+                       mask = (1 << nbits) - 1;
+
+                       if (sector + nbits > SECTORS_PER_PAGE)
+                               panic("scsi_free:Bad memory alignment");
+
+                       if ((dma_malloc_freelist[page] &
+                            (mask << sector)) != (mask << sector)) {
+#ifdef DEBUG
+                               printk("scsi_free(obj=%p, len=%d) called from %08lx\n",
+                                      obj, len, ret);
+#endif
+                               panic("scsi_free:Trying to free unused memory");
+                       }
+                       scsi_dma_free_sectors += nbits;
+                       dma_malloc_freelist[page] &= ~(mask << sector);
+                       spin_unlock_irqrestore(&allocator_request_lock, flags);
+                       return 0;
+               }
+       }
+       panic("scsi_free:Bad offset");
+}
+
+
+/*
+ * Function:    scsi_resize_dma_pool
+ *
+ * Purpose:     Ensure that the DMA pool is sufficiently large to be
+ *              able to guarantee that we can always process I/O requests
+ *              without calling the system allocator.
+ *
+ * Arguments:   None.
+ *
+ * Lock status: No locks assumed to be held.  This function is SMP-safe.
+ *
+ * Returns:     Nothing
+ *
+ * Notes:       Prior to the new queue code, this function was not SMP-safe.
+ *              Go through the device list and recompute the most appropriate
+ *              size for the dma pool.  Then grab more memory (as required).
+ */
+void scsi_resize_dma_pool(void)
+{
+       int i, k;
+       unsigned long size;
+       unsigned long flags;
+       struct Scsi_Host *shpnt;
+       struct Scsi_Host *host = NULL;
+       Scsi_Device *SDpnt;
+       FreeSectorBitmap *new_dma_malloc_freelist = NULL;
+       unsigned int new_dma_sectors = 0;
+       unsigned int new_need_isa_buffer = 0;
+       unsigned char **new_dma_malloc_pages = NULL;
+       int out_of_space = 0;
+
+       spin_lock_irqsave(&allocator_request_lock, flags);
+
+       if (!scsi_hostlist) {
+               /*
+                * Free up the DMA pool.
+                */
+               if (scsi_dma_free_sectors != dma_sectors)
+                       panic("SCSI DMA pool memory leak %d %d\n", scsi_dma_free_sectors, dma_sectors);
+
+               for (i = 0; i < dma_sectors / SECTORS_PER_PAGE; i++)
+                       free_pages((unsigned long) dma_malloc_pages[i], 0);
+               if (dma_malloc_pages)
+                       kfree((char *) dma_malloc_pages);
+               dma_malloc_pages = NULL;
+               if (dma_malloc_freelist)
+                       kfree((char *) dma_malloc_freelist);
+               dma_malloc_freelist = NULL;
+               dma_sectors = 0;
+               scsi_dma_free_sectors = 0;
+               spin_unlock_irqrestore(&allocator_request_lock, flags);
+               return;
+       }
+       /* Next, check to see if we need to extend the DMA buffer pool */
+
+       new_dma_sectors = 2 * SECTORS_PER_PAGE;         /* Base value we use */
+
+       if (__pa(high_memory) - 1 > ISA_DMA_THRESHOLD)
+               need_isa_bounce_buffers = 1;
+       else
+               need_isa_bounce_buffers = 0;
+
+       if (scsi_devicelist)
+               for (shpnt = scsi_hostlist; shpnt; shpnt = shpnt->next)
+                       new_dma_sectors += SECTORS_PER_PAGE;    /* Increment for each host */
+
+       for (host = scsi_hostlist; host; host = host->next) {
+               for (SDpnt = host->host_queue; SDpnt; SDpnt = SDpnt->next) {
+                       /*
+                        * sd and sr drivers allocate scatterlists.
+                        * sr drivers may allocate for each command 1x2048 or 2x1024 extra
+                        * buffers for 2k sector size and 1k fs.
+                        * sg driver allocates buffers < 4k.
+                        * st driver does not need buffers from the dma pool.
+                        * estimate 4k buffer/command for devices of unknown type (should panic).
+                        */
+                       if (SDpnt->type == TYPE_WORM || SDpnt->type == TYPE_ROM ||
+                           SDpnt->type == TYPE_DISK || SDpnt->type == TYPE_MOD) {
+                               int nents = host->sg_tablesize;
+#ifdef DMA_CHUNK_SIZE
+                               /* If the architecture does DMA sg merging, make sure
+                                  we count with at least 64 entries even for HBAs
+                                  which handle very few sg entries.  */
+                               if (nents < 64) nents = 64;
+#endif
+                               new_dma_sectors += ((nents *
+                               sizeof(struct scatterlist) + 511) >> 9) *
+                                SDpnt->queue_depth;
+                               if (SDpnt->type == TYPE_WORM || SDpnt->type == TYPE_ROM)
+                                       new_dma_sectors += (2048 >> 9) * SDpnt->queue_depth;
+                       } else if (SDpnt->type == TYPE_SCANNER ||
+                                  SDpnt->type == TYPE_PRINTER ||
+                                  SDpnt->type == TYPE_PROCESSOR ||
+                                  SDpnt->type == TYPE_COMM ||
+                                  SDpnt->type == TYPE_MEDIUM_CHANGER ||
+                                  SDpnt->type == TYPE_ENCLOSURE) {
+                               new_dma_sectors += (4096 >> 9) * SDpnt->queue_depth;
+                       } else {
+                               if (SDpnt->type != TYPE_TAPE) {
+                                       printk("resize_dma_pool: unknown device type %d\n", SDpnt->type);
+                                       new_dma_sectors += (4096 >> 9) * SDpnt->queue_depth;
+                               }
+                       }
+
+                       if (host->unchecked_isa_dma &&
+                           need_isa_bounce_buffers &&
+                           SDpnt->type != TYPE_TAPE) {
+                               new_dma_sectors += (PAGE_SIZE >> 9) * host->sg_tablesize *
+                                   SDpnt->queue_depth;
+                               new_need_isa_buffer++;
+                       }
+               }
+       }
+
+#ifdef DEBUG_INIT
+       printk("resize_dma_pool: needed dma sectors = %d\n", new_dma_sectors);
+#endif
+
+       /* limit DMA memory to 32MB: */
+       new_dma_sectors = (new_dma_sectors + 15) & 0xfff0;
+
+       /*
+        * We never shrink the buffers - this leads to
+        * race conditions that I would rather not even think
+        * about right now.
+        */
+#if 0                          /* Why do this? No gain and risks out_of_space */
+       if (new_dma_sectors < dma_sectors)
+               new_dma_sectors = dma_sectors;
+#endif
+       if (new_dma_sectors <= dma_sectors) {
+               spin_unlock_irqrestore(&allocator_request_lock, flags);
+               return;         /* best to quit while we are in front */
+        }
+
+       for (k = 0; k < 20; ++k) {      /* just in case */
+               out_of_space = 0;
+               size = (new_dma_sectors / SECTORS_PER_PAGE) *
+                   sizeof(FreeSectorBitmap);
+               new_dma_malloc_freelist = (FreeSectorBitmap *)
+                   kmalloc(size, GFP_ATOMIC);
+               if (new_dma_malloc_freelist) {
+                        memset(new_dma_malloc_freelist, 0, size);
+                       size = (new_dma_sectors / SECTORS_PER_PAGE) *
+                           sizeof(*new_dma_malloc_pages);
+                       new_dma_malloc_pages = (unsigned char **)
+                           kmalloc(size, GFP_ATOMIC);
+                       if (!new_dma_malloc_pages) {
+                               size = (new_dma_sectors / SECTORS_PER_PAGE) *
+                                   sizeof(FreeSectorBitmap);
+                               kfree((char *) new_dma_malloc_freelist);
+                               out_of_space = 1;
+                       } else {
+                                memset(new_dma_malloc_pages, 0, size);
+                        }
+               } else
+                       out_of_space = 1;
+
+               if ((!out_of_space) && (new_dma_sectors > dma_sectors)) {
+                       for (i = dma_sectors / SECTORS_PER_PAGE;
+                          i < new_dma_sectors / SECTORS_PER_PAGE; i++) {
+                               new_dma_malloc_pages[i] = (unsigned char *)
+                                   __get_free_pages(GFP_ATOMIC | GFP_DMA, 0);
+                               if (!new_dma_malloc_pages[i])
+                                       break;
+                       }
+                       if (i != new_dma_sectors / SECTORS_PER_PAGE) {  /* clean up */
+                               int k = i;
+
+                               out_of_space = 1;
+                               for (i = 0; i < k; ++i)
+                                       free_pages((unsigned long) new_dma_malloc_pages[i], 0);
+                       }
+               }
+               if (out_of_space) {     /* try scaling down new_dma_sectors request */
+                       printk("scsi::resize_dma_pool: WARNING, dma_sectors=%u, "
+                              "wanted=%u, scaling\n", dma_sectors, new_dma_sectors);
+                       if (new_dma_sectors < (8 * SECTORS_PER_PAGE))
+                               break;  /* pretty well hopeless ... */
+                       new_dma_sectors = (new_dma_sectors * 3) / 4;
+                       new_dma_sectors = (new_dma_sectors + 15) & 0xfff0;
+                       if (new_dma_sectors <= dma_sectors)
+                               break;  /* stick with what we have got */
+               } else
+                       break;  /* found space ... */
+       }                       /* end of for loop */
+       if (out_of_space) {
+               spin_unlock_irqrestore(&allocator_request_lock, flags);
+               scsi_need_isa_buffer = new_need_isa_buffer;     /* some useful info */
+               printk("      WARNING, not enough memory, pool not expanded\n");
+               return;
+       }
+       /* When we dick with the actual DMA list, we need to
+        * protect things
+        */
+       if (dma_malloc_freelist) {
+               size = (dma_sectors / SECTORS_PER_PAGE) * sizeof(FreeSectorBitmap);
+               memcpy(new_dma_malloc_freelist, dma_malloc_freelist, size);
+               kfree((char *) dma_malloc_freelist);
+       }
+       dma_malloc_freelist = new_dma_malloc_freelist;
+
+       if (dma_malloc_pages) {
+               size = (dma_sectors / SECTORS_PER_PAGE) * sizeof(*dma_malloc_pages);
+               memcpy(new_dma_malloc_pages, dma_malloc_pages, size);
+               kfree((char *) dma_malloc_pages);
+       }
+       scsi_dma_free_sectors += new_dma_sectors - dma_sectors;
+       dma_malloc_pages = new_dma_malloc_pages;
+       dma_sectors = new_dma_sectors;
+       scsi_need_isa_buffer = new_need_isa_buffer;
+
+       spin_unlock_irqrestore(&allocator_request_lock, flags);
+
+#ifdef DEBUG_INIT
+       printk("resize_dma_pool: dma free sectors   = %d\n", scsi_dma_free_sectors);
+       printk("resize_dma_pool: dma sectors        = %d\n", dma_sectors);
+       printk("resize_dma_pool: need isa buffers   = %d\n", scsi_need_isa_buffer);
+#endif
+}
+
+/*
+ * Function:    scsi_init_minimal_dma_pool
+ *
+ * Purpose:     Allocate a minimal (1-page) DMA pool.
+ *
+ * Arguments:   None.
+ *
+ * Lock status: No locks assumed to be held.  This function is SMP-safe.
+ *
+ * Returns:     Nothing
+ *
+ * Notes:       
+ */
+int scsi_init_minimal_dma_pool(void)
+{
+       unsigned long size;
+       unsigned long flags;
+       int has_space = 0;
+
+       spin_lock_irqsave(&allocator_request_lock, flags);
+
+       dma_sectors = PAGE_SIZE / SECTOR_SIZE;
+       scsi_dma_free_sectors = dma_sectors;
+       /*
+        * Set up a minimal DMA buffer list - this will be used during scan_scsis
+        * in some cases.
+        */
+
+       /* One bit per sector to indicate free/busy */
+       size = (dma_sectors / SECTORS_PER_PAGE) * sizeof(FreeSectorBitmap);
+       dma_malloc_freelist = (FreeSectorBitmap *)
+           kmalloc(size, GFP_ATOMIC);
+       if (dma_malloc_freelist) {
+                memset(dma_malloc_freelist, 0, size);
+               /* One pointer per page for the page list */
+               dma_malloc_pages = (unsigned char **) kmalloc(
+                        (dma_sectors / SECTORS_PER_PAGE) * sizeof(*dma_malloc_pages),
+                                                            GFP_ATOMIC);
+               if (dma_malloc_pages) {
+                        memset(dma_malloc_pages, 0, size);
+                       dma_malloc_pages[0] = (unsigned char *)
+                           __get_free_pages(GFP_ATOMIC | GFP_DMA, 0);
+                       if (dma_malloc_pages[0])
+                               has_space = 1;
+               }
+       }
+       if (!has_space) {
+               if (dma_malloc_freelist) {
+                       kfree((char *) dma_malloc_freelist);
+                       if (dma_malloc_pages)
+                               kfree((char *) dma_malloc_pages);
+               }
+               spin_unlock_irqrestore(&allocator_request_lock, flags);
+               printk("scsi::init_module: failed, out of memory\n");
+               return 1;
+       }
+
+       spin_unlock_irqrestore(&allocator_request_lock, flags);
+       return 0;
+}
diff --git a/xen-2.4.16/drivers/scsi/scsi_error.c b/xen-2.4.16/drivers/scsi/scsi_error.c
new file mode 100644 (file)
index 0000000..e4a69bc
--- /dev/null
@@ -0,0 +1,2027 @@
+/*
+ *  scsi_error.c Copyright (C) 1997 Eric Youngdale
+ *
+ *  SCSI error/timeout handling
+ *      Initial versions: Eric Youngdale.  Based upon conversations with
+ *                        Leonard Zubkoff and David Miller at Linux Expo, 
+ *                        ideas originating from all over the place.
+ *
+ */
+
+#define __NO_VERSION__
+#include <linux/module.h>
+
+#include <linux/sched.h>
+#include <linux/timer.h>
+#include <linux/string.h>
+#include <linux/slab.h>
+#include <linux/ioport.h>
+#include <linux/kernel.h>
+#include <linux/stat.h>
+#include <linux/blk.h>
+#include <linux/interrupt.h>
+#include <linux/delay.h>
+#include <linux/smp_lock.h>
+
+#define __KERNEL_SYSCALLS__
+
+#include <linux/unistd.h>
+
+#include <asm/system.h>
+#include <asm/irq.h>
+#include <asm/dma.h>
+
+#include "scsi.h"
+#include "hosts.h"
+#include "constants.h"
+
+/*
+ * We must always allow SHUTDOWN_SIGS.  Even if we are not a module,
+ * the host drivers that we are using may be loaded as modules, and
+ * when we unload these,  we need to ensure that the error handler thread
+ * can be shut down.
+ *
+ * Note - when we unload a module, we send a SIGHUP.  We mustn't
+ * enable SIGTERM, as this is how the init shuts things down when you
+ * go to single-user mode.  For that matter, init also sends SIGKILL,
+ * so we mustn't enable that one either.  We use SIGHUP instead.  Other
+ * options would be SIGPWR, I suppose.
+ */
+#define SHUTDOWN_SIGS  (sigmask(SIGHUP))
+
+#ifdef DEBUG
+#define SENSE_TIMEOUT SCSI_TIMEOUT
+#define ABORT_TIMEOUT SCSI_TIMEOUT
+#define RESET_TIMEOUT SCSI_TIMEOUT
+#else
+#define SENSE_TIMEOUT (10*HZ)
+#define RESET_TIMEOUT (2*HZ)
+#define ABORT_TIMEOUT (15*HZ)
+#endif
+
+#define STATIC
+
+/*
+ * These should *probably* be handled by the host itself.
+ * Since it is allowed to sleep, it probably should.
+ */
+#define BUS_RESET_SETTLE_TIME   5*HZ
+#define HOST_RESET_SETTLE_TIME  10*HZ
+
+
+static const char RCSid[] = "$Header: /mnt/ide/home/eric/CVSROOT/linux/drivers/scsi/scsi_error.c,v 1.10 1997/12/08 04:50:35 eric Exp $";
+
+STATIC int scsi_check_sense(Scsi_Cmnd * SCpnt);
+STATIC int scsi_request_sense(Scsi_Cmnd *);
+STATIC void scsi_send_eh_cmnd(Scsi_Cmnd * SCpnt, int timeout);
+STATIC int scsi_try_to_abort_command(Scsi_Cmnd *, int);
+STATIC int scsi_test_unit_ready(Scsi_Cmnd *);
+STATIC int scsi_try_bus_device_reset(Scsi_Cmnd *, int timeout);
+STATIC int scsi_try_bus_reset(Scsi_Cmnd *);
+STATIC int scsi_try_host_reset(Scsi_Cmnd *);
+STATIC int scsi_unit_is_ready(Scsi_Cmnd *);
+STATIC void scsi_eh_action_done(Scsi_Cmnd *, int);
+STATIC int scsi_eh_retry_command(Scsi_Cmnd *);
+STATIC int scsi_eh_completed_normally(Scsi_Cmnd * SCpnt);
+STATIC void scsi_restart_operations(struct Scsi_Host *);
+STATIC void scsi_eh_finish_command(Scsi_Cmnd ** SClist, Scsi_Cmnd * SCpnt);
+
+
+/*
+ * Function:    scsi_add_timer()
+ *
+ * Purpose:     Start timeout timer for a single scsi command.
+ *
+ * Arguments:   SCset   - command that is about to start running.
+ *              timeout - amount of time to allow this command to run.
+ *              complete - timeout function to call if timer isn't
+ *                      canceled.
+ *
+ * Returns:     Nothing
+ *
+ * Notes:       This should be turned into an inline function.
+ *
+ * More Notes:  Each scsi command has it's own timer, and as it is added to
+ *              the queue, we set up the timer.  When the command completes,
+ *              we cancel the timer.  Pretty simple, really, especially
+ *              compared to the old way of handling this crap.
+ */
+void scsi_add_timer(Scsi_Cmnd * SCset,
+                   int timeout,
+                   void (*complete) (Scsi_Cmnd *))
+{
+
+       /*
+        * If the clock was already running for this command, then
+        * first delete the timer.  The timer handling code gets rather
+        * confused if we don't do this.
+        */
+       if (SCset->eh_timeout.function != NULL) {
+               del_timer(&SCset->eh_timeout);
+       }
+       SCset->eh_timeout.data = (unsigned long) SCset;
+       SCset->eh_timeout.expires = jiffies + timeout;
+       SCset->eh_timeout.function = (void (*)(unsigned long)) complete;
+
+       SCset->done_late = 0;
+
+       SCSI_LOG_ERROR_RECOVERY(5, printk("Adding timer for command %p at %d (%p)\n", SCset, timeout, complete));
+
+       add_timer(&SCset->eh_timeout);
+
+}
+
+/*
+ * Function:    scsi_delete_timer()
+ *
+ * Purpose:     Delete/cancel timer for a given function.
+ *
+ * Arguments:   SCset   - command that we are canceling timer for.
+ *
+ * Returns:     1 if we were able to detach the timer.  0 if we
+ *              blew it, and the timer function has already started
+ *              to run.
+ *
+ * Notes:       This should be turned into an inline function.
+ */
+int scsi_delete_timer(Scsi_Cmnd * SCset)
+{
+       int rtn;
+
+       rtn = del_timer(&SCset->eh_timeout);
+
+       SCSI_LOG_ERROR_RECOVERY(5, printk("Clearing timer for command %p %d\n", SCset, rtn));
+
+       SCset->eh_timeout.data = (unsigned long) NULL;
+       SCset->eh_timeout.function = NULL;
+
+       return rtn;
+}
+
+/*
+ * Function:    scsi_times_out()
+ *
+ * Purpose:     Timeout function for normal scsi commands..
+ *
+ * Arguments:   SCpnt   - command that is timing out.
+ *
+ * Returns:     Nothing.
+ *
+ * Notes:       We do not need to lock this.  There is the potential for
+ *              a race only in that the normal completion handling might
+ *              run, but if the normal completion function determines
+ *              that the timer has already fired, then it mustn't do
+ *              anything.
+ */
+void scsi_times_out(Scsi_Cmnd * SCpnt)
+{
+       /* 
+        * Notify the low-level code that this operation failed and we are
+        * reposessing the command.  
+        */
+#ifdef ERIC_neverdef
+       /*
+        * FIXME(eric)
+        * Allow the host adapter to push a queue ordering tag
+        * out to the bus to force the command in question to complete.
+        * If the host wants to do this, then we just restart the timer
+        * for the command.  Before we really do this, some real thought
+        * as to the optimum way to handle this should be done.  We *do*
+        * need to force ordering every so often to ensure that all requests
+        * do eventually complete, but I am not sure if this is the best way
+        * to actually go about it.
+        *
+        * Better yet, force a sync here, but don't block since we are in an
+        * interrupt.
+        */
+       if (SCpnt->host->hostt->eh_ordered_queue_tag) {
+               if ((*SCpnt->host->hostt->eh_ordered_queue_tag) (SCpnt)) {
+                       scsi_add_timer(SCpnt, SCpnt->internal_timeout,
+                                      scsi_times_out);
+                       return;
+               }
+       }
+       /*
+        * FIXME(eric) - add a second special interface to handle this
+        * case.  Ideally that interface can also be used to request
+        * a queu
+        */
+       if (SCpnt->host->can_queue) {
+               SCpnt->host->hostt->queuecommand(SCpnt, NULL);
+       }
+#endif
+
+       /* Set the serial_number_at_timeout to the current serial_number */
+       SCpnt->serial_number_at_timeout = SCpnt->serial_number;
+
+       SCpnt->eh_state = FAILED;
+       SCpnt->state = SCSI_STATE_TIMEOUT;
+       SCpnt->owner = SCSI_OWNER_ERROR_HANDLER;
+
+       SCpnt->host->in_recovery = 1;
+       SCpnt->host->host_failed++;
+
+       SCSI_LOG_TIMEOUT(3, printk("Command timed out active=%d busy=%d failed=%d\n",
+                                  atomic_read(&SCpnt->host->host_active),
+                                  SCpnt->host->host_busy,
+                                  SCpnt->host->host_failed));
+
+       /*
+        * If the host is having troubles, then look to see if this was the last
+        * command that might have failed.  If so, wake up the error handler.
+        */
+       if( SCpnt->host->eh_wait == NULL ) {
+               panic("Error handler thread not present at %p %p %s %d", 
+                     SCpnt, SCpnt->host, __FILE__, __LINE__);
+       }
+       if (SCpnt->host->host_busy == SCpnt->host->host_failed) {
+               up(SCpnt->host->eh_wait);
+       }
+}
+
+/*
+ * Function     scsi_block_when_processing_errors
+ *
+ * Purpose:     Prevent more commands from being queued while error recovery
+ *              is taking place.
+ *
+ * Arguments:   SDpnt - device on which we are performing recovery.
+ *
+ * Returns:     FALSE   The device was taken offline by error recovery.
+ *              TRUE    OK to proceed.
+ *
+ * Notes:       We block until the host is out of error recovery, and then
+ *              check to see whether the host or the device is offline.
+ */
+int scsi_block_when_processing_errors(Scsi_Device * SDpnt)
+{
+
+       SCSI_SLEEP(&SDpnt->host->host_wait, SDpnt->host->in_recovery);
+
+       SCSI_LOG_ERROR_RECOVERY(5, printk("Open returning %d\n", SDpnt->online));
+
+       return SDpnt->online;
+}
+
+/*
+ * Function:    scsi_eh_times_out()
+ *
+ * Purpose:     Timeout function for error handling.
+ *
+ * Arguments:   SCpnt   - command that is timing out.
+ *
+ * Returns:     Nothing.
+ *
+ * Notes:       During error handling, the kernel thread will be sleeping
+ *              waiting for some action to complete on the device.  Our only
+ *              job is to record that it timed out, and to wake up the
+ *              thread.
+ */
+STATIC
+void scsi_eh_times_out(Scsi_Cmnd * SCpnt)
+{
+       SCpnt->eh_state = SCSI_STATE_TIMEOUT;
+       SCSI_LOG_ERROR_RECOVERY(5, printk("In scsi_eh_times_out %p\n", SCpnt));
+
+       if (SCpnt->host->eh_action != NULL)
+               up(SCpnt->host->eh_action);
+       else
+               printk("Missing scsi error handler thread\n");
+}
+
+
+/*
+ * Function:    scsi_eh_done()
+ *
+ * Purpose:     Completion function for error handling.
+ *
+ * Arguments:   SCpnt   - command that is timing out.
+ *
+ * Returns:     Nothing.
+ *
+ * Notes:       During error handling, the kernel thread will be sleeping
+ *              waiting for some action to complete on the device.  Our only
+ *              job is to record that the action completed, and to wake up the
+ *              thread.
+ */
+STATIC
+void scsi_eh_done(Scsi_Cmnd * SCpnt)
+{
+       int     rtn;
+
+       /*
+        * If the timeout handler is already running, then just set the
+        * flag which says we finished late, and return.  We have no
+        * way of stopping the timeout handler from running, so we must
+        * always defer to it.
+        */
+       rtn = del_timer(&SCpnt->eh_timeout);
+       if (!rtn) {
+               SCpnt->done_late = 1;
+               return;
+       }
+
+       SCpnt->request.rq_status = RQ_SCSI_DONE;
+
+       SCpnt->owner = SCSI_OWNER_ERROR_HANDLER;
+       SCpnt->eh_state = SUCCESS;
+
+       SCSI_LOG_ERROR_RECOVERY(5, printk("In eh_done %p result:%x\n", SCpnt,
+                                         SCpnt->result));
+
+       if (SCpnt->host->eh_action != NULL)
+               up(SCpnt->host->eh_action);
+}
+
+/*
+ * Function:    scsi_eh_action_done()
+ *
+ * Purpose:     Completion function for error handling.
+ *
+ * Arguments:   SCpnt   - command that is timing out.
+ *              answer  - boolean that indicates whether operation succeeded.
+ *
+ * Returns:     Nothing.
+ *
+ * Notes:       This callback is only used for abort and reset operations.
+ */
+STATIC
+void scsi_eh_action_done(Scsi_Cmnd * SCpnt, int answer)
+{
+       SCpnt->request.rq_status = RQ_SCSI_DONE;
+
+       SCpnt->owner = SCSI_OWNER_ERROR_HANDLER;
+       SCpnt->eh_state = (answer ? SUCCESS : FAILED);
+
+       if (SCpnt->host->eh_action != NULL)
+               up(SCpnt->host->eh_action);
+}
+
+/*
+ * Function:  scsi_sense_valid()
+ *
+ * Purpose:     Determine whether a host has automatically obtained sense
+ *              information or not.  If we have it, then give a recommendation
+ *              as to what we should do next.
+ */
+int scsi_sense_valid(Scsi_Cmnd * SCpnt)
+{
+       if (((SCpnt->sense_buffer[0] & 0x70) >> 4) != 7) {
+               return FALSE;
+       }
+       return TRUE;
+}
+
+/*
+ * Function:  scsi_eh_retry_command()
+ *
+ * Purpose:     Retry the original command
+ *
+ * Returns:     SUCCESS - we were able to get the sense data.
+ *              FAILED  - we were not able to get the sense data.
+ * 
+ * Notes:       This function will *NOT* return until the command either
+ *              times out, or it completes.
+ */
+STATIC int scsi_eh_retry_command(Scsi_Cmnd * SCpnt)
+{
+       memcpy((void *) SCpnt->cmnd, (void *) SCpnt->data_cmnd,
+              sizeof(SCpnt->data_cmnd));
+       SCpnt->request_buffer = SCpnt->buffer;
+       SCpnt->request_bufflen = SCpnt->bufflen;
+       SCpnt->use_sg = SCpnt->old_use_sg;
+       SCpnt->cmd_len = SCpnt->old_cmd_len;
+       SCpnt->sc_data_direction = SCpnt->sc_old_data_direction;
+       SCpnt->underflow = SCpnt->old_underflow;
+
+       scsi_send_eh_cmnd(SCpnt, SCpnt->timeout_per_command);
+
+       /*
+        * Hey, we are done.  Let's look to see what happened.
+        */
+       return SCpnt->eh_state;
+}
+
+/*
+ * Function:  scsi_request_sense()
+ *
+ * Purpose:     Request sense data from a particular target.
+ *
+ * Returns:     SUCCESS - we were able to get the sense data.
+ *              FAILED  - we were not able to get the sense data.
+ * 
+ * Notes:       Some hosts automatically obtain this information, others
+ *              require that we obtain it on our own.
+ *
+ *              This function will *NOT* return until the command either
+ *              times out, or it completes.
+ */
+STATIC int scsi_request_sense(Scsi_Cmnd * SCpnt)
+{
+       static unsigned char generic_sense[6] =
+       {REQUEST_SENSE, 0, 0, 0, 255, 0};
+       unsigned char scsi_result0[256], *scsi_result = NULL;
+       int saved_result;
+
+       ASSERT_LOCK(&io_request_lock, 0);
+
+       memcpy((void *) SCpnt->cmnd, (void *) generic_sense,
+              sizeof(generic_sense));
+
+       if (SCpnt->device->scsi_level <= SCSI_2)
+               SCpnt->cmnd[1] = SCpnt->lun << 5;
+
+       scsi_result = (!SCpnt->host->hostt->unchecked_isa_dma)
+           ? &scsi_result0[0] : kmalloc(512, GFP_ATOMIC | GFP_DMA);
+
+       if (scsi_result == NULL) {
+               printk("cannot allocate scsi_result in scsi_request_sense.\n");
+               return FAILED;
+       }
+       /*
+        * Zero the sense buffer.  Some host adapters automatically always request
+        * sense, so it is not a good idea that SCpnt->request_buffer and
+        * SCpnt->sense_buffer point to the same address (DB).
+        * 0 is not a valid sense code. 
+        */
+       memset((void *) SCpnt->sense_buffer, 0, sizeof(SCpnt->sense_buffer));
+       memset((void *) scsi_result, 0, 256);
+
+       saved_result = SCpnt->result;
+       SCpnt->request_buffer = scsi_result;
+       SCpnt->request_bufflen = 256;
+       SCpnt->use_sg = 0;
+       SCpnt->cmd_len = COMMAND_SIZE(SCpnt->cmnd[0]);
+       SCpnt->sc_data_direction = SCSI_DATA_READ;
+       SCpnt->underflow = 0;
+
+       scsi_send_eh_cmnd(SCpnt, SENSE_TIMEOUT);
+
+       /* Last chance to have valid sense data */
+       if (!scsi_sense_valid(SCpnt))
+               memcpy((void *) SCpnt->sense_buffer,
+                      SCpnt->request_buffer,
+                      sizeof(SCpnt->sense_buffer));
+
+       if (scsi_result != &scsi_result0[0] && scsi_result != NULL)
+               kfree(scsi_result);
+
+       /*
+        * When we eventually call scsi_finish, we really wish to complete
+        * the original request, so let's restore the original data. (DB)
+        */
+       memcpy((void *) SCpnt->cmnd, (void *) SCpnt->data_cmnd,
+              sizeof(SCpnt->data_cmnd));
+       SCpnt->result = saved_result;
+       SCpnt->request_buffer = SCpnt->buffer;
+       SCpnt->request_bufflen = SCpnt->bufflen;
+       SCpnt->use_sg = SCpnt->old_use_sg;
+       SCpnt->cmd_len = SCpnt->old_cmd_len;
+       SCpnt->sc_data_direction = SCpnt->sc_old_data_direction;
+       SCpnt->underflow = SCpnt->old_underflow;
+
+       /*
+        * Hey, we are done.  Let's look to see what happened.
+        */
+       return SCpnt->eh_state;
+}
+
+/*
+ * Function:  scsi_test_unit_ready()
+ *
+ * Purpose:     Run test unit ready command to see if the device is talking to us or not.
+ *
+ */
+STATIC int scsi_test_unit_ready(Scsi_Cmnd * SCpnt)
+{
+       static unsigned char tur_command[6] =
+       {TEST_UNIT_READY, 0, 0, 0, 0, 0};
+
+       memcpy((void *) SCpnt->cmnd, (void *) tur_command,
+              sizeof(tur_command));
+
+       if (SCpnt->device->scsi_level <= SCSI_2)
+               SCpnt->cmnd[1] = SCpnt->lun << 5;
+
+       /*
+        * Zero the sense buffer.  The SCSI spec mandates that any
+        * untransferred sense data should be interpreted as being zero.
+        */
+       memset((void *) SCpnt->sense_buffer, 0, sizeof(SCpnt->sense_buffer));
+
+       SCpnt->request_buffer = NULL;
+       SCpnt->request_bufflen = 0;
+       SCpnt->use_sg = 0;
+       SCpnt->cmd_len = COMMAND_SIZE(SCpnt->cmnd[0]);
+       SCpnt->underflow = 0;
+       SCpnt->sc_data_direction = SCSI_DATA_NONE;
+
+       scsi_send_eh_cmnd(SCpnt, SENSE_TIMEOUT);
+
+       /*
+        * When we eventually call scsi_finish, we really wish to complete
+        * the original request, so let's restore the original data. (DB)
+        */
+       memcpy((void *) SCpnt->cmnd, (void *) SCpnt->data_cmnd,
+              sizeof(SCpnt->data_cmnd));
+       SCpnt->request_buffer = SCpnt->buffer;
+       SCpnt->request_bufflen = SCpnt->bufflen;
+       SCpnt->use_sg = SCpnt->old_use_sg;
+       SCpnt->cmd_len = SCpnt->old_cmd_len;
+       SCpnt->sc_data_direction = SCpnt->sc_old_data_direction;
+       SCpnt->underflow = SCpnt->old_underflow;
+
+       /*
+        * Hey, we are done.  Let's look to see what happened.
+        */
+       SCSI_LOG_ERROR_RECOVERY(3,
+               printk("scsi_test_unit_ready: SCpnt %p eh_state %x\n",
+               SCpnt, SCpnt->eh_state));
+       return SCpnt->eh_state;
+}
+
+/*
+ * This would normally need to get the IO request lock,
+ * but as it doesn't actually touch anything that needs
+ * to be locked we can avoid the lock here..
+ */
+STATIC
+void scsi_sleep_done(struct semaphore *sem)
+{
+       if (sem != NULL) {
+               up(sem);
+       }
+}
+
+void scsi_sleep(int timeout)
+{
+       DECLARE_MUTEX_LOCKED(sem);
+       struct timer_list timer;
+
+       init_timer(&timer);
+       timer.data = (unsigned long) &sem;
+       timer.expires = jiffies + timeout;
+       timer.function = (void (*)(unsigned long)) scsi_sleep_done;
+
+       SCSI_LOG_ERROR_RECOVERY(5, printk("Sleeping for timer tics %d\n", timeout));
+
+       add_timer(&timer);
+
+       down(&sem);
+       del_timer(&timer);
+}
+
+/*
+ * Function:  scsi_send_eh_cmnd
+ *
+ * Purpose:     Send a command out to a device as part of error recovery.
+ *
+ * Notes:       The initialization of the structures is quite a bit different
+ *              in this case, and furthermore, there is a different completion
+ *              handler.
+ */
+STATIC void scsi_send_eh_cmnd(Scsi_Cmnd * SCpnt, int timeout)
+{
+       unsigned long flags;
+       struct Scsi_Host *host;
+
+       ASSERT_LOCK(&io_request_lock, 0);
+
+       host = SCpnt->host;
+
+      retry:
+       /*
+        * We will use a queued command if possible, otherwise we will emulate the
+        * queuing and calling of completion function ourselves.
+        */
+       SCpnt->owner = SCSI_OWNER_LOWLEVEL;
+
+       if (host->can_queue) {
+               DECLARE_MUTEX_LOCKED(sem);
+
+               SCpnt->eh_state = SCSI_STATE_QUEUED;
+
+               scsi_add_timer(SCpnt, timeout, scsi_eh_times_out);
+
+               /*
+                * Set up the semaphore so we wait for the command to complete.
+                */
+               SCpnt->host->eh_action = &sem;
+               SCpnt->request.rq_status = RQ_SCSI_BUSY;
+
+               spin_lock_irqsave(&io_request_lock, flags);
+               host->hostt->queuecommand(SCpnt, scsi_eh_done);
+               spin_unlock_irqrestore(&io_request_lock, flags);
+
+               down(&sem);
+
+               SCpnt->host->eh_action = NULL;
+
+               /*
+                * See if timeout.  If so, tell the host to forget about it.
+                * In other words, we don't want a callback any more.
+                */
+               if (SCpnt->eh_state == SCSI_STATE_TIMEOUT) {
+                        SCpnt->owner = SCSI_OWNER_LOWLEVEL;
+
+                       /*
+                        * As far as the low level driver is
+                        * concerned, this command is still active, so
+                        * we must give the low level driver a chance
+                        * to abort it. (DB) 
+                        *
+                        * FIXME(eric) - we are not tracking whether we could
+                        * abort a timed out command or not.  Not sure how
+                        * we should treat them differently anyways.
+                        */
+                       spin_lock_irqsave(&io_request_lock, flags);
+                       if (SCpnt->host->hostt->eh_abort_handler)
+                               SCpnt->host->hostt->eh_abort_handler(SCpnt);
+                       spin_unlock_irqrestore(&io_request_lock, flags);
+                       
+                       SCpnt->request.rq_status = RQ_SCSI_DONE;
+                       SCpnt->owner = SCSI_OWNER_ERROR_HANDLER;
+                       
+                       SCpnt->eh_state = FAILED;
+               }
+               SCSI_LOG_ERROR_RECOVERY(5, printk("send_eh_cmnd: %p eh_state:%x\n",
+                                               SCpnt, SCpnt->eh_state));
+       } else {
+               int temp;
+
+               /*
+                * We damn well had better never use this code.  There is no timeout
+                * protection here, since we would end up waiting in the actual low
+                * level driver, we don't know how to wake it up.
+                */
+               spin_lock_irqsave(&io_request_lock, flags);
+               temp = host->hostt->command(SCpnt);
+               spin_unlock_irqrestore(&io_request_lock, flags);
+
+               SCpnt->result = temp;
+               /* Fall through to code below to examine status. */
+               SCpnt->eh_state = SUCCESS;
+       }
+
+       /*
+        * Now examine the actual status codes to see whether the command actually
+        * did complete normally.
+        */
+       if (SCpnt->eh_state == SUCCESS) {
+               int ret = scsi_eh_completed_normally(SCpnt);
+               SCSI_LOG_ERROR_RECOVERY(3,
+                       printk("scsi_send_eh_cmnd: scsi_eh_completed_normally %x\n", ret));
+               switch (ret) {
+               case SUCCESS:
+                       SCpnt->eh_state = SUCCESS;
+                       break;
+               case NEEDS_RETRY:
+                       goto retry;
+               case FAILED:
+               default:
+                       SCpnt->eh_state = FAILED;
+                       break;
+               }
+       } else {
+               SCpnt->eh_state = FAILED;
+       }
+}
+
+/*
+ * Function:  scsi_unit_is_ready()
+ *
+ * Purpose:     Called after TEST_UNIT_READY is run, to test to see if
+ *              the unit responded in a way that indicates it is ready.
+ */
+STATIC int scsi_unit_is_ready(Scsi_Cmnd * SCpnt)
+{
+       if (SCpnt->result) {
+               if (((driver_byte(SCpnt->result) & DRIVER_SENSE) ||
+                    (status_byte(SCpnt->result) & CHECK_CONDITION)) &&
+                   ((SCpnt->sense_buffer[0] & 0x70) >> 4) == 7) {
+                       if (((SCpnt->sense_buffer[2] & 0xf) != NOT_READY) &&
+                           ((SCpnt->sense_buffer[2] & 0xf) != UNIT_ATTENTION) &&
+                           ((SCpnt->sense_buffer[2] & 0xf) != ILLEGAL_REQUEST)) {
+                               return 0;
+                       }
+               }
+       }
+       return 1;
+}
+
+/*
+ * Function:    scsi_eh_finish_command
+ *
+ * Purpose:     Handle a command that we are finished with WRT error handling.
+ *
+ * Arguments:   SClist - pointer to list into which we are putting completed commands.
+ *              SCpnt  - command that is completing
+ *
+ * Notes:       We don't want to use the normal command completion while we are
+ *              are still handling errors - it may cause other commands to be queued,
+ *              and that would disturb what we are doing.  Thus we really want to keep
+ *              a list of pending commands for final completion, and once we
+ *              are ready to leave error handling we handle completion for real.
+ */
+STATIC void scsi_eh_finish_command(Scsi_Cmnd ** SClist, Scsi_Cmnd * SCpnt)
+{
+       SCpnt->state = SCSI_STATE_BHQUEUE;
+       SCpnt->bh_next = *SClist;
+       /*
+        * Set this back so that the upper level can correctly free up
+        * things.
+        */
+       SCpnt->use_sg = SCpnt->old_use_sg;
+       SCpnt->sc_data_direction = SCpnt->sc_old_data_direction;
+       SCpnt->underflow = SCpnt->old_underflow;
+       *SClist = SCpnt;
+}
+
+/*
+ * Function:  scsi_try_to_abort_command
+ *
+ * Purpose:     Ask host adapter to abort a running command.
+ *
+ * Returns:     FAILED          Operation failed or not supported.
+ *              SUCCESS         Succeeded.
+ *
+ * Notes:       This function will not return until the user's completion
+ *              function has been called.  There is no timeout on this
+ *              operation.  If the author of the low-level driver wishes
+ *              this operation to be timed, they can provide this facility
+ *              themselves.  Helper functions in scsi_error.c can be supplied
+ *              to make this easier to do.
+ *
+ * Notes:       It may be possible to combine this with all of the reset
+ *              handling to eliminate a lot of code duplication.  I don't
+ *              know what makes more sense at the moment - this is just a
+ *              prototype.
+ */
+STATIC int scsi_try_to_abort_command(Scsi_Cmnd * SCpnt, int timeout)
+{
+       int rtn;
+       unsigned long flags;
+
+       SCpnt->eh_state = FAILED;       /* Until we come up with something better */
+
+       if (SCpnt->host->hostt->eh_abort_handler == NULL) {
+               return FAILED;
+       }
+       /* 
+        * scsi_done was called just after the command timed out and before
+        * we had a chance to process it. (DB)
+        */
+       if (SCpnt->serial_number == 0)
+               return SUCCESS;
+
+       SCpnt->owner = SCSI_OWNER_LOWLEVEL;
+
+       spin_lock_irqsave(&io_request_lock, flags);
+       rtn = SCpnt->host->hostt->eh_abort_handler(SCpnt);
+       spin_unlock_irqrestore(&io_request_lock, flags);
+       return rtn;
+}
+
+/*
+ * Function:  scsi_try_bus_device_reset
+ *
+ * Purpose:     Ask host adapter to perform a bus device reset for a given
+ *              device.
+ *
+ * Returns:     FAILED          Operation failed or not supported.
+ *              SUCCESS         Succeeded.
+ *
+ * Notes:       There is no timeout for this operation.  If this operation is
+ *              unreliable for a given host, then the host itself needs to put a
+ *              timer on it, and set the host back to a consistent state prior
+ *              to returning.
+ */
+STATIC int scsi_try_bus_device_reset(Scsi_Cmnd * SCpnt, int timeout)
+{
+       unsigned long flags;
+       int rtn;
+
+       SCpnt->eh_state = FAILED;       /* Until we come up with something better */
+
+       if (SCpnt->host->hostt->eh_device_reset_handler == NULL) {
+               return FAILED;
+       }
+       SCpnt->owner = SCSI_OWNER_LOWLEVEL;
+
+       spin_lock_irqsave(&io_request_lock, flags);
+       rtn = SCpnt->host->hostt->eh_device_reset_handler(SCpnt);
+       spin_unlock_irqrestore(&io_request_lock, flags);
+
+       if (rtn == SUCCESS)
+               SCpnt->eh_state = SUCCESS;
+
+       return SCpnt->eh_state;
+}
+
+/*
+ * Function:  scsi_try_bus_reset
+ *
+ * Purpose:     Ask host adapter to perform a bus reset for a host.
+ *
+ * Returns:     FAILED          Operation failed or not supported.
+ *              SUCCESS         Succeeded.
+ *
+ * Notes:       
+ */
+STATIC int scsi_try_bus_reset(Scsi_Cmnd * SCpnt)
+{
+       unsigned long flags;
+       int rtn;
+
+       SCpnt->eh_state = FAILED;       /* Until we come up with something better */
+       SCpnt->owner = SCSI_OWNER_LOWLEVEL;
+       SCpnt->serial_number_at_timeout = SCpnt->serial_number;
+
+       if (SCpnt->host->hostt->eh_bus_reset_handler == NULL) {
+               return FAILED;
+       }
+
+       spin_lock_irqsave(&io_request_lock, flags);
+       rtn = SCpnt->host->hostt->eh_bus_reset_handler(SCpnt);
+       spin_unlock_irqrestore(&io_request_lock, flags);
+
+       if (rtn == SUCCESS)
+               SCpnt->eh_state = SUCCESS;
+
+       /*
+        * If we had a successful bus reset, mark the command blocks to expect
+        * a condition code of unit attention.
+        */
+       scsi_sleep(BUS_RESET_SETTLE_TIME);
+       if (SCpnt->eh_state == SUCCESS) {
+               Scsi_Device *SDloop;
+               for (SDloop = SCpnt->host->host_queue; SDloop; SDloop = SDloop->next) {
+                       if (SCpnt->channel == SDloop->channel) {
+                               SDloop->was_reset = 1;
+                               SDloop->expecting_cc_ua = 1;
+                       }
+               }
+       }
+       return SCpnt->eh_state;
+}
+
+/*
+ * Function:  scsi_try_host_reset
+ *
+ * Purpose:     Ask host adapter to reset itself, and the bus.
+ *
+ * Returns:     FAILED          Operation failed or not supported.
+ *              SUCCESS         Succeeded.
+ *
+ * Notes:
+ */
+STATIC int scsi_try_host_reset(Scsi_Cmnd * SCpnt)
+{
+       unsigned long flags;
+       int rtn;
+
+       SCpnt->eh_state = FAILED;       /* Until we come up with something better */
+       SCpnt->owner = SCSI_OWNER_LOWLEVEL;
+       SCpnt->serial_number_at_timeout = SCpnt->serial_number;
+
+       if (SCpnt->host->hostt->eh_host_reset_handler == NULL) {
+               return FAILED;
+       }
+       spin_lock_irqsave(&io_request_lock, flags);
+       rtn = SCpnt->host->hostt->eh_host_reset_handler(SCpnt);
+       spin_unlock_irqrestore(&io_request_lock, flags);
+
+       if (rtn == SUCCESS)
+               SCpnt->eh_state = SUCCESS;
+
+       /*
+        * If we had a successful host reset, mark the command blocks to expect
+        * a condition code of unit attention.
+        */
+       scsi_sleep(HOST_RESET_SETTLE_TIME);
+       if (SCpnt->eh_state == SUCCESS) {
+               Scsi_Device *SDloop;
+               for (SDloop = SCpnt->host->host_queue; SDloop; SDloop = SDloop->next) {
+                       SDloop->was_reset = 1;
+                       SDloop->expecting_cc_ua = 1;
+               }
+       }
+       return SCpnt->eh_state;
+}
+
+/*
+ * Function:  scsi_decide_disposition
+ *
+ * Purpose:     Examine a command block that has come back from the low-level
+ *              and figure out what to do next.
+ *
+ * Returns:     SUCCESS         - pass on to upper level.
+ *              FAILED          - pass on to error handler thread.
+ *              RETRY           - command should be retried.
+ *              SOFTERR         - command succeeded, but we need to log
+ *                                a soft error.
+ *
+ * Notes:       This is *ONLY* called when we are examining the status
+ *              after sending out the actual data command.  Any commands
+ *              that are queued for error recovery (i.e. TEST_UNIT_READY)
+ *              do *NOT* come through here.
+ *
+ *              NOTE - When this routine returns FAILED, it means the error
+ *              handler thread is woken.  In cases where the error code
+ *              indicates an error that doesn't require the error handler
+ *              thread (i.e. we don't need to abort/reset), then this function
+ *              should return SUCCESS.
+ */
+int scsi_decide_disposition(Scsi_Cmnd * SCpnt)
+{
+       int rtn;
+
+       /*
+        * If the device is offline, then we clearly just pass the result back
+        * up to the top level.
+        */
+       if (SCpnt->device->online == FALSE) {
+               SCSI_LOG_ERROR_RECOVERY(5, printk("scsi_error.c: device offline - report as SUCCESS\n"));
+               return SUCCESS;
+       }
+       /*
+        * First check the host byte, to see if there is anything in there
+        * that would indicate what we need to do.
+        */
+
+       switch (host_byte(SCpnt->result)) {
+       case DID_PASSTHROUGH:
+               /*
+                * No matter what, pass this through to the upper layer.
+                * Nuke this special code so that it looks like we are saying
+                * DID_OK.
+                */
+               SCpnt->result &= 0xff00ffff;
+               return SUCCESS;
+       case DID_OK:
+               /*
+                * Looks good.  Drop through, and check the next byte.
+                */
+               break;
+       case DID_NO_CONNECT:
+       case DID_BAD_TARGET:
+       case DID_ABORT:
+               /*
+                * Note - this means that we just report the status back to the
+                * top level driver, not that we actually think that it indicates
+                * success.
+                */
+               return SUCCESS;
+               /*
+                * When the low level driver returns DID_SOFT_ERROR,
+                * it is responsible for keeping an internal retry counter 
+                * in order to avoid endless loops (DB)
+                *
+                * Actually this is a bug in this function here.  We should
+                * be mindful of the maximum number of retries specified
+                * and not get stuck in a loop.
+                */
+       case DID_SOFT_ERROR:
+               goto maybe_retry;
+
+       case DID_ERROR:
+               if (msg_byte(SCpnt->result) == COMMAND_COMPLETE &&
+                   status_byte(SCpnt->result) == RESERVATION_CONFLICT)
+                       /*
+                        * execute reservation conflict processing code
+                        * lower down
+                        */
+                       break;
+               /* FALLTHROUGH */
+
+       case DID_BUS_BUSY:
+       case DID_PARITY:
+               goto maybe_retry;
+       case DID_TIME_OUT:
+               /*
+                * When we scan the bus, we get timeout messages for
+                * these commands if there is no device available.
+                * Other hosts report DID_NO_CONNECT for the same thing.
+                */
+               if ((SCpnt->cmnd[0] == TEST_UNIT_READY ||
+                    SCpnt->cmnd[0] == INQUIRY)) {
+                       return SUCCESS;
+               } else {
+                       return FAILED;
+               }
+       case DID_RESET:
+               /*
+                * In the normal case where we haven't initiated a reset, this is
+                * a failure.
+                */
+               if (SCpnt->flags & IS_RESETTING) {
+                       SCpnt->flags &= ~IS_RESETTING;
+                       goto maybe_retry;
+               }
+               return SUCCESS;
+       default:
+               return FAILED;
+       }
+
+       /*
+        * Next, check the message byte.
+        */
+       if (msg_byte(SCpnt->result) != COMMAND_COMPLETE) {
+               return FAILED;
+       }
+       /*
+        * Now, check the status byte to see if this indicates anything special.
+        */
+       switch (status_byte(SCpnt->result)) {
+       case QUEUE_FULL:
+               /*
+                * The case of trying to send too many commands to a tagged queueing
+                * device.
+                */
+               return ADD_TO_MLQUEUE;
+       case GOOD:
+       case COMMAND_TERMINATED:
+               return SUCCESS;
+       case CHECK_CONDITION:
+               rtn = scsi_check_sense(SCpnt);
+               if (rtn == NEEDS_RETRY) {
+                       goto maybe_retry;
+               }
+               return rtn;
+       case CONDITION_GOOD:
+       case INTERMEDIATE_GOOD:
+       case INTERMEDIATE_C_GOOD:
+               /*
+                * Who knows?  FIXME(eric)
+                */
+               return SUCCESS;
+       case BUSY:
+               goto maybe_retry;
+
+       case RESERVATION_CONFLICT:
+               printk("scsi%d (%d,%d,%d) : RESERVATION CONFLICT\n", 
+                      SCpnt->host->host_no, SCpnt->channel,
+                      SCpnt->device->id, SCpnt->device->lun);
+               return SUCCESS; /* causes immediate I/O error */
+       default:
+               return FAILED;
+       }
+       return FAILED;
+
+      maybe_retry:
+
+       if ((++SCpnt->retries) < SCpnt->allowed) {
+               return NEEDS_RETRY;
+       } else {
+                /*
+                 * No more retries - report this one back to upper level.
+                 */
+               return SUCCESS;
+       }
+}
+
+/*
+ * Function:  scsi_eh_completed_normally
+ *
+ * Purpose:     Examine a command block that has come back from the low-level
+ *              and figure out what to do next.
+ *
+ * Returns:     SUCCESS         - pass on to upper level.
+ *              FAILED          - pass on to error handler thread.
+ *              RETRY           - command should be retried.
+ *              SOFTERR         - command succeeded, but we need to log
+ *                                a soft error.
+ *
+ * Notes:       This is *ONLY* called when we are examining the status
+ *              of commands queued during error recovery.  The main
+ *              difference here is that we don't allow for the possibility
+ *              of retries here, and we are a lot more restrictive about what
+ *              we consider acceptable.
+ */
+STATIC int scsi_eh_completed_normally(Scsi_Cmnd * SCpnt)
+{
+       /*
+        * First check the host byte, to see if there is anything in there
+        * that would indicate what we need to do.
+        */
+       if (host_byte(SCpnt->result) == DID_RESET) {
+               if (SCpnt->flags & IS_RESETTING) {
+                       /*
+                        * OK, this is normal.  We don't know whether in fact the
+                        * command in question really needs to be rerun or not - 
+                        * if this was the original data command then the answer is yes,
+                        * otherwise we just flag it as success.
+                        */
+                       SCpnt->flags &= ~IS_RESETTING;
+                       return NEEDS_RETRY;
+               }
+               /*
+                * Rats.  We are already in the error handler, so we now get to try
+                * and figure out what to do next.  If the sense is valid, we have
+                * a pretty good idea of what to do.  If not, we mark it as failed.
+                */
+               return scsi_check_sense(SCpnt);
+       }
+       if (host_byte(SCpnt->result) != DID_OK) {
+               return FAILED;
+       }
+       /*
+        * Next, check the message byte.
+        */
+       if (msg_byte(SCpnt->result) != COMMAND_COMPLETE) {
+               return FAILED;
+       }
+       /*
+        * Now, check the status byte to see if this indicates anything special.
+        */
+       switch (status_byte(SCpnt->result)) {
+       case GOOD:
+       case COMMAND_TERMINATED:
+               return SUCCESS;
+       case CHECK_CONDITION:
+               return scsi_check_sense(SCpnt);
+       case CONDITION_GOOD:
+       case INTERMEDIATE_GOOD:
+       case INTERMEDIATE_C_GOOD:
+               /*
+                * Who knows?  FIXME(eric)
+                */
+               return SUCCESS;
+       case BUSY:
+       case QUEUE_FULL:
+       case RESERVATION_CONFLICT:
+       default:
+               return FAILED;
+       }
+       return FAILED;
+}
+
+/*
+ * Function:  scsi_check_sense
+ *
+ * Purpose:     Examine sense information - give suggestion as to what
+ *              we should do with it.
+ */
+STATIC int scsi_check_sense(Scsi_Cmnd * SCpnt)
+{
+       if (!scsi_sense_valid(SCpnt)) {
+               return FAILED;
+       }
+       if (SCpnt->sense_buffer[2] & 0xe0)
+               return SUCCESS;
+
+       switch (SCpnt->sense_buffer[2] & 0xf) {
+       case NO_SENSE:
+               return SUCCESS;
+       case RECOVERED_ERROR:
+               return /* SOFT_ERROR */ SUCCESS;
+
+       case ABORTED_COMMAND:
+               return NEEDS_RETRY;
+       case NOT_READY:
+       case UNIT_ATTENTION:
+               /*
+                * If we are expecting a CC/UA because of a bus reset that we
+                * performed, treat this just as a retry.  Otherwise this is
+                * information that we should pass up to the upper-level driver
+                * so that we can deal with it there.
+                */
+               if (SCpnt->device->expecting_cc_ua) {
+                       SCpnt->device->expecting_cc_ua = 0;
+                       return NEEDS_RETRY;
+               }
+               /*
+                * If the device is in the process of becoming ready, we 
+                * should retry.
+                */
+               if ((SCpnt->sense_buffer[12] == 0x04) &&
+                       (SCpnt->sense_buffer[13] == 0x01)) {
+                       return NEEDS_RETRY;
+               }
+               return SUCCESS;
+
+               /* these three are not supported */
+       case COPY_ABORTED:
+       case VOLUME_OVERFLOW:
+       case MISCOMPARE:
+               return SUCCESS;
+
+       case MEDIUM_ERROR:
+               return NEEDS_RETRY;
+
+       case ILLEGAL_REQUEST:
+       case BLANK_CHECK:
+       case DATA_PROTECT:
+       case HARDWARE_ERROR:
+       default:
+               return SUCCESS;
+       }
+}
+
+
+/*
+ * Function:  scsi_restart_operations
+ *
+ * Purpose:     Restart IO operations to the specified host.
+ *
+ * Arguments:   host  - host that we are restarting
+ *
+ * Lock status: Assumed that locks are not held upon entry.
+ *
+ * Returns:     Nothing
+ *
+ * Notes:       When we entered the error handler, we blocked all further
+ *              I/O to this device.  We need to 'reverse' this process.
+ */
+STATIC void scsi_restart_operations(struct Scsi_Host *host)
+{
+       Scsi_Device *SDpnt;
+       unsigned long flags;
+
+       ASSERT_LOCK(&io_request_lock, 0);
+
+       /*
+        * Next free up anything directly waiting upon the host.  This will be
+        * requests for character device operations, and also for ioctls to queued
+        * block devices.
+        */
+       SCSI_LOG_ERROR_RECOVERY(5, printk("scsi_error.c: Waking up host to restart\n"));
+
+       wake_up(&host->host_wait);
+
+       /*
+        * Finally we need to re-initiate requests that may be pending.  We will
+        * have had everything blocked while error handling is taking place, and
+        * now that error recovery is done, we will need to ensure that these
+        * requests are started.
+        */
+       spin_lock_irqsave(&io_request_lock, flags);
+       for (SDpnt = host->host_queue; SDpnt; SDpnt = SDpnt->next) {
+               request_queue_t *q;
+               if ((host->can_queue > 0 && (host->host_busy >= host->can_queue))
+                   || (host->host_blocked)
+                   || (host->host_self_blocked)
+                   || (SDpnt->device_blocked)) {
+                       break;
+               }
+               q = &SDpnt->request_queue;
+               q->request_fn(q);
+       }
+       spin_unlock_irqrestore(&io_request_lock, flags);
+}
+
+/*
+ * Function:  scsi_unjam_host
+ *
+ * Purpose:     Attempt to fix a host which has a command that failed for
+ *              some reason.
+ *
+ * Arguments:   host    - host that needs unjamming.
+ * 
+ * Returns:     Nothing
+ *
+ * Notes:       When we come in here, we *know* that all commands on the
+ *              bus have either completed, failed or timed out.  We also
+ *              know that no further commands are being sent to the host,
+ *              so things are relatively quiet and we have freedom to
+ *              fiddle with things as we wish.
+ *
+ * Additional note:  This is only the *default* implementation.  It is possible
+ *              for individual drivers to supply their own version of this
+ *              function, and if the maintainer wishes to do this, it is
+ *              strongly suggested that this function be taken as a template
+ *              and modified.  This function was designed to correctly handle
+ *              problems for about 95% of the different cases out there, and
+ *              it should always provide at least a reasonable amount of error
+ *              recovery.
+ *
+ * Note3:       Any command marked 'FAILED' or 'TIMEOUT' must eventually
+ *              have scsi_finish_command() called for it.  We do all of
+ *              the retry stuff here, so when we restart the host after we
+ *              return it should have an empty queue.
+ */
+STATIC int scsi_unjam_host(struct Scsi_Host *host)
+{
+       int devices_failed;
+       int numfailed;
+       int ourrtn;
+       int rtn = FALSE;
+       int result;
+       Scsi_Cmnd *SCloop;
+       Scsi_Cmnd *SCpnt;
+       Scsi_Device *SDpnt;
+       Scsi_Device *SDloop;
+       Scsi_Cmnd *SCdone;
+       int timed_out;
+
+       ASSERT_LOCK(&io_request_lock, 0);
+
+       SCdone = NULL;
+
+       /*
+        * First, protect against any sort of race condition.  If any of the outstanding
+        * commands are in states that indicate that we are not yet blocked (i.e. we are
+        * not in a quiet state) then we got woken up in error.  If we ever end up here,
+        * we need to re-examine some of the assumptions.
+        */
+       for (SDpnt = host->host_queue; SDpnt; SDpnt = SDpnt->next) {
+               for (SCpnt = SDpnt->device_queue; SCpnt; SCpnt = SCpnt->next) {
+                       if (SCpnt->state == SCSI_STATE_FAILED
+                           || SCpnt->state == SCSI_STATE_TIMEOUT
+                           || SCpnt->state == SCSI_STATE_INITIALIZING
+                           || SCpnt->state == SCSI_STATE_UNUSED) {
+                               continue;
+                       }
+                       /*
+                        * Rats.  Something is still floating around out there.  This could
+                        * be the result of the fact that the upper level drivers are still frobbing
+                        * commands that might have succeeded.  There are two outcomes.  One is that
+                        * the command block will eventually be freed, and the other one is that
+                        * the command will be queued and will be finished along the way.
+                        */
+                       SCSI_LOG_ERROR_RECOVERY(1, printk("Error handler prematurely woken - commands still active (%p %x %d)\n", SCpnt, SCpnt->state, SCpnt->target));
+
+/*
+ *        panic("SCSI Error handler woken too early\n");
+ *
+ * This is no longer a problem, since now the code cares only about
+ * SCSI_STATE_TIMEOUT and SCSI_STATE_FAILED.
+ * Other states are useful only to release active commands when devices are
+ * set offline. If (host->host_active == host->host_busy) we can safely assume
+ * that there are no commands in state other then TIMEOUT od FAILED. (DB)
+ *
+ * FIXME:
+ * It is not easy to release correctly commands according to their state when 
+ * devices are set offline, when the state is neither TIMEOUT nor FAILED.
+ * When a device is set offline, we can have some command with
+ * rq_status=RQ_SCSY_BUSY, owner=SCSI_STATE_HIGHLEVEL, 
+ * state=SCSI_STATE_INITIALIZING and the driver module cannot be released.
+ * (DB, 17 May 1998)
+ */
+               }
+       }
+
+       /*
+        * Next, see if we need to request sense information.  if so,
+        * then get it now, so we have a better idea of what to do.
+        * FIXME(eric) this has the unfortunate side effect that if a host
+        * adapter does not automatically request sense information, that we end
+        * up shutting it down before we request it.  All hosts should be doing this
+        * anyways, so for now all I have to say is tough noogies if you end up in here.
+        * On second thought, this is probably a good idea.  We *really* want to give
+        * authors an incentive to automatically request this.
+        */
+       SCSI_LOG_ERROR_RECOVERY(3, printk("scsi_unjam_host: Checking to see if we need to request sense\n"));
+
+       for (SDpnt = host->host_queue; SDpnt; SDpnt = SDpnt->next) {
+               for (SCpnt = SDpnt->device_queue; SCpnt; SCpnt = SCpnt->next) {
+                       if (SCpnt->state != SCSI_STATE_FAILED || scsi_sense_valid(SCpnt)) {
+                               continue;
+                       }
+                       SCSI_LOG_ERROR_RECOVERY(2, printk("scsi_unjam_host: Requesting sense for %d\n",
+                                                         SCpnt->target));
+                       rtn = scsi_request_sense(SCpnt);
+                       if (rtn != SUCCESS) {
+                               continue;
+                       }
+                       SCSI_LOG_ERROR_RECOVERY(3, printk("Sense requested for %p - result %x\n",
+                                                 SCpnt, SCpnt->result));
+                       SCSI_LOG_ERROR_RECOVERY(3, print_sense("bh", SCpnt));
+
+                       result = scsi_decide_disposition(SCpnt);
+
+                       /*
+                        * If the result was normal, then just pass it along to the
+                        * upper level.
+                        */
+                       if (result == SUCCESS) {
+                               SCpnt->host->host_failed--;
+                               scsi_eh_finish_command(&SCdone, SCpnt);
+                       }
+                       if (result != NEEDS_RETRY) {
+                               continue;
+                       }
+                       /* 
+                        * We only come in here if we want to retry a
+                        * command.  The test to see whether the command
+                        * should be retried should be keeping track of the
+                        * number of tries, so we don't end up looping, of
+                        * course.  
+                        */
+                       SCpnt->state = NEEDS_RETRY;
+                       rtn = scsi_eh_retry_command(SCpnt);
+                       if (rtn != SUCCESS) {
+                               continue;
+                       }
+                       /*
+                        * We eventually hand this one back to the top level.
+                        */
+                       SCpnt->host->host_failed--;
+                       scsi_eh_finish_command(&SCdone, SCpnt);
+               }
+       }
+
+       /*
+        * Go through the list of commands and figure out where we stand and how bad things
+        * really are.
+        */
+       numfailed = 0;
+       timed_out = 0;
+       devices_failed = 0;
+       for (SDpnt = host->host_queue; SDpnt; SDpnt = SDpnt->next) {
+               unsigned int device_error = 0;
+
+               for (SCpnt = SDpnt->device_queue; SCpnt; SCpnt = SCpnt->next) {
+                       if (SCpnt->state == SCSI_STATE_FAILED) {
+                               SCSI_LOG_ERROR_RECOVERY(5, printk("Command to ID %d failed\n",
+                                                        SCpnt->target));
+                               numfailed++;
+                               device_error++;
+                       }
+                       if (SCpnt->state == SCSI_STATE_TIMEOUT) {
+                               SCSI_LOG_ERROR_RECOVERY(5, printk("Command to ID %d timedout\n",
+                                                        SCpnt->target));
+                               timed_out++;
+                               device_error++;
+                       }
+               }
+               if (device_error > 0) {
+                       devices_failed++;
+               }
+       }
+
+       SCSI_LOG_ERROR_RECOVERY(2, printk("Total of %d+%d commands on %d devices require eh work\n",
+                                 numfailed, timed_out, devices_failed));
+
+       if (host->host_failed == 0) {
+               ourrtn = TRUE;
+               goto leave;
+       }
+       /*
+        * Next, try and see whether or not it makes sense to try and abort
+        * the running command.  This only works out to be the case if we have
+        * one command that has timed out.  If the command simply failed, it
+        * makes no sense to try and abort the command, since as far as the
+        * host adapter is concerned, it isn't running.
+        */
+
+       SCSI_LOG_ERROR_RECOVERY(3, printk("scsi_unjam_host: Checking to see if we want to try abort\n"));
+
+       for (SDpnt = host->host_queue; SDpnt; SDpnt = SDpnt->next) {
+               for (SCloop = SDpnt->device_queue; SCloop; SCloop = SCloop->next) {
+                       if (SCloop->state != SCSI_STATE_TIMEOUT) {
+                               continue;
+                       }
+                       rtn = scsi_try_to_abort_command(SCloop, ABORT_TIMEOUT);
+                       if (rtn == SUCCESS) {
+                               rtn = scsi_test_unit_ready(SCloop);
+
+                               if (rtn == SUCCESS && scsi_unit_is_ready(SCloop)) {
+                                       rtn = scsi_eh_retry_command(SCloop);
+
+                                       if (rtn == SUCCESS) {
+                                               SCloop->host->host_failed--;
+                                               scsi_eh_finish_command(&SCdone, SCloop);
+                                       }
+                               }
+                       }
+               }
+       }
+
+       /*
+        * If we have corrected all of the problems, then we are done.
+        */
+       if (host->host_failed == 0) {
+               ourrtn = TRUE;
+               goto leave;
+       }
+       /*
+        * Either the abort wasn't appropriate, or it didn't succeed.
+        * Now try a bus device reset.  Still, look to see whether we have
+        * multiple devices that are jammed or not - if we have multiple devices,
+        * it makes no sense to try BUS_DEVICE_RESET - we really would need
+        * to try a BUS_RESET instead.
+        *
+        * Does this make sense - should we try BDR on each device individually?
+        * Yes, definitely.
+        */
+       SCSI_LOG_ERROR_RECOVERY(3, printk("scsi_unjam_host: Checking to see if we want to try BDR\n"));
+
+       for (SDpnt = host->host_queue; SDpnt; SDpnt = SDpnt->next) {
+               for (SCloop = SDpnt->device_queue; SCloop; SCloop = SCloop->next) {
+                       if (SCloop->state == SCSI_STATE_FAILED
+                           || SCloop->state == SCSI_STATE_TIMEOUT) {
+                               break;
+                       }
+               }
+
+               if (SCloop == NULL) {
+                       continue;
+               }
+               /*
+                * OK, we have a device that is having problems.  Try and send
+                * a bus device reset to it.
+                *
+                * FIXME(eric) - make sure we handle the case where multiple
+                * commands to the same device have failed. They all must
+                * get properly restarted.
+                */
+               rtn = scsi_try_bus_device_reset(SCloop, RESET_TIMEOUT);
+
+               if (rtn == SUCCESS) {
+                       rtn = scsi_test_unit_ready(SCloop);
+
+                       if (rtn == SUCCESS && scsi_unit_is_ready(SCloop)) {
+                               rtn = scsi_eh_retry_command(SCloop);
+
+                               if (rtn == SUCCESS) {
+                                       SCloop->host->host_failed--;
+                                       scsi_eh_finish_command(&SCdone, SCloop);
+                               }
+                       }
+               }
+       }
+
+       if (host->host_failed == 0) {
+               ourrtn = TRUE;
+               goto leave;
+       }
+       /*
+        * If we ended up here, we have serious problems.  The only thing left
+        * to try is a full bus reset.  If someone has grabbed the bus and isn't
+        * letting go, then perhaps this will help.
+        */
+       SCSI_LOG_ERROR_RECOVERY(3, printk("scsi_unjam_host: Try hard bus reset\n"));
+
+       /* 
+        * We really want to loop over the various channels, and do this on
+        * a channel by channel basis.  We should also check to see if any
+        * of the failed commands are on soft_reset devices, and if so, skip
+        * the reset.  
+        */
+       for (SDpnt = host->host_queue; SDpnt; SDpnt = SDpnt->next) {
+             next_device:
+               for (SCpnt = SDpnt->device_queue; SCpnt; SCpnt = SCpnt->next) {
+                       if (SCpnt->state != SCSI_STATE_FAILED
+                           && SCpnt->state != SCSI_STATE_TIMEOUT) {
+                               continue;
+                       }
+                       /*
+                        * We have a failed command.  Make sure there are no other failed
+                        * commands on the same channel that are timed out and implement a
+                        * soft reset.
+                        */
+                       for (SDloop = host->host_queue; SDloop; SDloop = SDloop->next) {
+                               for (SCloop = SDloop->device_queue; SCloop; SCloop = SCloop->next) {
+                                       if (SCloop->channel != SCpnt->channel) {
+                                               continue;
+                                       }
+                                       if (SCloop->state != SCSI_STATE_FAILED
+                                           && SCloop->state != SCSI_STATE_TIMEOUT) {
+                                               continue;
+                                       }
+                                       if (SDloop->soft_reset && SCloop->state == SCSI_STATE_TIMEOUT) {
+                                               /* 
+                                                * If this device uses the soft reset option, and this
+                                                * is one of the devices acting up, then our only
+                                                * option is to wait a bit, since the command is
+                                                * supposedly still running.  
+                                                *
+                                                * FIXME(eric) - right now we will just end up falling
+                                                * through to the 'take device offline' case.
+                                                *
+                                                * FIXME(eric) - It is possible that the command completed
+                                                * *after* the error recovery procedure started, and if this
+                                                * is the case, we are worrying about nothing here.
+                                                */
+
+                                               scsi_sleep(1 * HZ);
+                                               goto next_device;
+                                       }
+                               }
+                       }
+
+                       /*
+                        * We now know that we are able to perform a reset for the
+                        * bus that SCpnt points to.  There are no soft-reset devices
+                        * with outstanding timed out commands.
+                        */
+                       rtn = scsi_try_bus_reset(SCpnt);
+                       if (rtn == SUCCESS) {
+                               for (SDloop = host->host_queue; SDloop; SDloop = SDloop->next) {
+                                       for (SCloop = SDloop->device_queue; SCloop; SCloop = SCloop->next) {
+                                               if (SCloop->channel != SCpnt->channel) {
+                                                       continue;
+                                               }
+                                               if (SCloop->state != SCSI_STATE_FAILED
+                                                   && SCloop->state != SCSI_STATE_TIMEOUT) {
+                                                       continue;
+                                               }
+                                               rtn = scsi_test_unit_ready(SCloop);
+
+                                               if (rtn == SUCCESS && scsi_unit_is_ready(SCloop)) {
+                                                       rtn = scsi_eh_retry_command(SCloop);
+
+                                                       if (rtn == SUCCESS) {
+                                                               SCpnt->host->host_failed--;
+                                                               scsi_eh_finish_command(&SCdone, SCloop);
+                                                       }
+                                               }
+                                               /*
+                                                * If the bus reset worked, but we are still unable to
+                                                * talk to the device, take it offline.
+                                                * FIXME(eric) - is this really the correct thing to do?
+                                                */
+                                               if (rtn != SUCCESS) {
+                                                       printk(KERN_INFO "scsi: device set offline - not ready or command retry failed after bus reset: host %d channel %d id %d lun %d\n", SDloop->host->host_no, SDloop->channel, SDloop->id, SDloop->lun);
+
+                                                       SDloop->online = FALSE;
+                                                       SDloop->host->host_failed--;
+                                                       scsi_eh_finish_command(&SCdone, SCloop);
+                                               }
+                                       }
+                               }
+                       }
+               }
+       }
+
+       if (host->host_failed == 0) {
+               ourrtn = TRUE;
+               goto leave;
+       }
+       /*
+        * If we ended up here, we have serious problems.  The only thing left
+        * to try is a full host reset - perhaps the firmware on the device
+        * crashed, or something like that.
+        *
+        * It is assumed that a succesful host reset will cause *all* information
+        * about the command to be flushed from both the host adapter *and* the
+        * device.
+        *
+        * FIXME(eric) - it isn't clear that devices that implement the soft reset
+        * option can ever be cleared except via cycling the power.  The problem is
+        * that sending the host reset command will cause the host to forget
+        * about the pending command, but the device won't forget.  For now, we
+        * skip the host reset option if any of the failed devices are configured
+        * to use the soft reset option.
+        */
+       for (SDpnt = host->host_queue; SDpnt; SDpnt = SDpnt->next) {
+             next_device2:
+               for (SCpnt = SDpnt->device_queue; SCpnt; SCpnt = SCpnt->next) {
+                       if (SCpnt->state != SCSI_STATE_FAILED
+                           && SCpnt->state != SCSI_STATE_TIMEOUT) {
+                               continue;
+                       }
+                       if (SDpnt->soft_reset && SCpnt->state == SCSI_STATE_TIMEOUT) {
+                               /* 
+                                * If this device uses the soft reset option, and this
+                                * is one of the devices acting up, then our only
+                                * option is to wait a bit, since the command is
+                                * supposedly still running.  
+                                *
+                                * FIXME(eric) - right now we will just end up falling
+                                * through to the 'take device offline' case.
+                                */
+                               SCSI_LOG_ERROR_RECOVERY(3,
+                                                       printk("scsi_unjam_host: Unable to try hard host reset\n"));
+
+                               /*
+                                * Due to the spinlock, we will never get out of this
+                                * loop without a proper wait. (DB)
+                                */
+                               scsi_sleep(1 * HZ);
+
+                               goto next_device2;
+                       }
+                       SCSI_LOG_ERROR_RECOVERY(3, printk("scsi_unjam_host: Try hard host reset\n"));
+
+                       /*
+                        * FIXME(eric) - we need to obtain a valid SCpnt to perform this call.
+                        */
+                       rtn = scsi_try_host_reset(SCpnt);
+                       if (rtn == SUCCESS) {
+                               /*
+                                * FIXME(eric) we assume that all commands are flushed from the
+                                * controller.  We should get a DID_RESET for all of the commands
+                                * that were pending.  We should ignore these so that we can
+                                * guarantee that we are in a consistent state.
+                                *
+                                * I believe this to be the case right now, but this needs to be
+                                * tested.
+                                */
+                               for (SDloop = host->host_queue; SDloop; SDloop = SDloop->next) {
+                                       for (SCloop = SDloop->device_queue; SCloop; SCloop = SCloop->next) {
+                                               if (SCloop->state != SCSI_STATE_FAILED
+                                                   && SCloop->state != SCSI_STATE_TIMEOUT) {
+                                                       continue;
+                                               }
+                                               rtn = scsi_test_unit_ready(SCloop);
+
+                                               if (rtn == SUCCESS && scsi_unit_is_ready(SCloop)) {
+                                                       rtn = scsi_eh_retry_command(SCloop);
+
+                                                       if (rtn == SUCCESS) {
+                                                               SCpnt->host->host_failed--;
+                                                               scsi_eh_finish_command(&SCdone, SCloop);
+                                                       }
+                                               }
+                                               if (rtn != SUCCESS) {
+                                                       printk(KERN_INFO "scsi: device set offline - not ready or command retry failed after host reset: host %d channel %d id %d lun %d\n", SDloop->host->host_no, SDloop->channel, SDloop->id, SDloop->lun);
+                                                       SDloop->online = FALSE;
+                                                       SDloop->host->host_failed--;
+                                                       scsi_eh_finish_command(&SCdone, SCloop);
+                                               }
+                                       }
+                               }
+                       }
+               }
+       }
+
+       /*
+        * If we solved all of the problems, then let's rev up the engines again.
+        */
+       if (host->host_failed == 0) {
+               ourrtn = TRUE;
+               goto leave;
+       }
+       /*
+        * If the HOST RESET failed, then for now we assume that the entire host
+        * adapter is too hosed to be of any use.  For our purposes, however, it is
+        * easier to simply take the devices offline that correspond to commands
+        * that failed.
+        */
+       SCSI_LOG_ERROR_RECOVERY(1, printk("scsi_unjam_host: Take device offline\n"));
+
+       for (SDpnt = host->host_queue; SDpnt; SDpnt = SDpnt->next) {
+               for (SCloop = SDpnt->device_queue; SCloop; SCloop = SCloop->next) {
+                       if (SCloop->state == SCSI_STATE_FAILED || SCloop->state == SCSI_STATE_TIMEOUT) {
+                               SDloop = SCloop->device;
+                               if (SDloop->online == TRUE) {
+                                       printk(KERN_INFO "scsi: device set offline - command error recover failed: host %d channel %d id %d lun %d\n", SDloop->host->host_no, SDloop->channel, SDloop->id, SDloop->lun);
+                                       SDloop->online = FALSE;
+                               }
+
+                               /*
+                                * This should pass the failure up to the top level driver, and
+                                * it will have to try and do something intelligent with it.
+                                */
+                               SCloop->host->host_failed--;
+
+                               if (SCloop->state == SCSI_STATE_TIMEOUT) {
+                                       SCloop->result |= (DRIVER_TIMEOUT << 24);
+                               }
+                               SCSI_LOG_ERROR_RECOVERY(3, printk("Finishing command for device %d %x\n",
+                                   SDloop->id, SCloop->result));
+
+                               scsi_eh_finish_command(&SCdone, SCloop);
+                       }
+               }
+       }
+
+       if (host->host_failed != 0) {
+               panic("scsi_unjam_host: Miscount of number of failed commands.\n");
+       }
+       SCSI_LOG_ERROR_RECOVERY(3, printk("scsi_unjam_host: Returning\n"));
+
+       ourrtn = FALSE;
+
+      leave:
+
+       /*
+        * We should have a list of commands that we 'finished' during the course of
+        * error recovery.  This should be the same as the list of commands that timed out
+        * or failed.  We are currently holding these things in a linked list - we didn't
+        * put them in the bottom half queue because we wanted to keep things quiet while
+        * we were working on recovery, and passing them up to the top level could easily
+        * cause the top level to try and queue something else again.
+        *
+        * Start by marking that the host is no longer in error recovery.
+        */
+       host->in_recovery = 0;
+
+       /*
+        * Take the list of commands, and stick them in the bottom half queue.
+        * The current implementation of scsi_done will do this for us - if need
+        * be we can create a special version of this function to do the
+        * same job for us.
+        */
+       for (SCpnt = SCdone; SCpnt != NULL; SCpnt = SCdone) {
+               SCdone = SCpnt->bh_next;
+               SCpnt->bh_next = NULL;
+                /*
+                 * Oh, this is a vile hack.  scsi_done() expects a timer
+                 * to be running on the command.  If there isn't, it assumes
+                 * that the command has actually timed out, and a timer
+                 * handler is running.  That may well be how we got into
+                 * this fix, but right now things are stable.  We add
+                 * a timer back again so that we can report completion.
+                 * scsi_done() will immediately remove said timer from
+                 * the command, and then process it.
+                 */
+               scsi_add_timer(SCpnt, 100, scsi_eh_times_out);
+               scsi_done(SCpnt);
+       }
+
+       return (ourrtn);
+}
+
+
+/*
+ * Function:  scsi_error_handler
+ *
+ * Purpose:     Handle errors/timeouts of scsi commands, try and clean up
+ *              and unjam the bus, and restart things.
+ *
+ * Arguments:   host    - host for which we are running.
+ *
+ * Returns:     Never returns.
+ *
+ * Notes:       This is always run in the context of a kernel thread.  The
+ *              idea is that we start this thing up when the kernel starts
+ *              up (one per host that we detect), and it immediately goes to
+ *              sleep and waits for some event (i.e. failure).  When this
+ *              takes place, we have the job of trying to unjam the bus
+ *              and restarting things.
+ *
+ */
+void scsi_error_handler(void *data)
+{
+       struct Scsi_Host *host = (struct Scsi_Host *) data;
+       int rtn;
+       DECLARE_MUTEX_LOCKED(sem);
+
+        /*
+         * We only listen to signals if the HA was loaded as a module.
+         * If the HA was compiled into the kernel, then we don't listen
+         * to any signals.
+         */
+        if( host->loaded_as_module ) {
+       siginitsetinv(&current->blocked, SHUTDOWN_SIGS);
+       } else {
+       siginitsetinv(&current->blocked, 0);
+        }
+
+       lock_kernel();
+
+       /*
+        *    Flush resources
+        */
+
+       daemonize();
+       reparent_to_init();
+
+       /*
+        * Set the name of this process.
+        */
+
+       sprintf(current->comm, "scsi_eh_%d", host->host_no);
+
+       host->eh_wait = &sem;
+       host->ehandler = current;
+
+       unlock_kernel();
+
+       /*
+        * Wake up the thread that created us.
+        */
+       SCSI_LOG_ERROR_RECOVERY(3, printk("Wake up parent %d\n", host->eh_notify->count.counter));
+
+       up(host->eh_notify);
+
+       while (1) {
+               /*
+                * If we get a signal, it means we are supposed to go
+                * away and die.  This typically happens if the user is
+                * trying to unload a module.
+                */
+               SCSI_LOG_ERROR_RECOVERY(1, printk("Error handler sleeping\n"));
+
+               /*
+                * Note - we always use down_interruptible with the semaphore
+                * even if the module was loaded as part of the kernel.  The
+                * reason is that down() will cause this thread to be counted
+                * in the load average as a running process, and down
+                * interruptible doesn't.  Given that we need to allow this
+                * thread to die if the driver was loaded as a module, using
+                * semaphores isn't unreasonable.
+                */
+               down_interruptible(&sem);
+               if( host->loaded_as_module ) {
+                       if (signal_pending(current))
+                               break;
+                }
+
+               SCSI_LOG_ERROR_RECOVERY(1, printk("Error handler waking up\n"));
+
+               host->eh_active = 1;
+
+               /*
+                * We have a host that is failing for some reason.  Figure out
+                * what we need to do to get it up and online again (if we can).
+                * If we fail, we end up taking the thing offline.
+                */
+               if (host->hostt->eh_strategy_handler != NULL) {
+                       rtn = host->hostt->eh_strategy_handler(host);
+               } else {
+                       rtn = scsi_unjam_host(host);
+               }
+
+               host->eh_active = 0;
+
+               /*
+                * Note - if the above fails completely, the action is to take
+                * individual devices offline and flush the queue of any
+                * outstanding requests that may have been pending.  When we
+                * restart, we restart any I/O to any other devices on the bus
+                * which are still online.
+                */
+               scsi_restart_operations(host);
+
+       }
+
+       SCSI_LOG_ERROR_RECOVERY(1, printk("Error handler exiting\n"));
+
+       /*
+        * Make sure that nobody tries to wake us up again.
+        */
+       host->eh_wait = NULL;
+
+       /*
+        * Knock this down too.  From this point on, the host is flying
+        * without a pilot.  If this is because the module is being unloaded,
+        * that's fine.  If the user sent a signal to this thing, we are
+        * potentially in real danger.
+        */
+       host->in_recovery = 0;
+       host->eh_active = 0;
+       host->ehandler = NULL;
+
+       /*
+        * If anyone is waiting for us to exit (i.e. someone trying to unload
+        * a driver), then wake up that process to let them know we are on
+        * the way out the door.  This may be overkill - I *think* that we
+        * could probably just unload the driver and send the signal, and when
+        * the error handling thread wakes up that it would just exit without
+        * needing to touch any memory associated with the driver itself.
+        */
+       if (host->eh_notify != NULL)
+               up(host->eh_notify);
+}
+
+/*
+ * Function:   scsi_new_reset
+ *
+ * Purpose:    Send requested reset to a bus or device at any phase.
+ *
+ * Arguments:  SCpnt   - command ptr to send reset with (usually a dummy)
+ *             flag - reset type (see scsi.h)
+ *
+ * Returns:    SUCCESS/FAILURE.
+ *
+ * Notes:      This is used by the SCSI Generic driver to provide
+ *             Bus/Device reset capability.
+ */
+int
+scsi_new_reset(Scsi_Cmnd *SCpnt, int flag)
+{
+       int rtn;
+
+       switch(flag) {
+       case SCSI_TRY_RESET_DEVICE:
+               rtn = scsi_try_bus_device_reset(SCpnt, 0);
+               if (rtn == SUCCESS)
+                       break;
+               /* FALLTHROUGH */
+       case SCSI_TRY_RESET_BUS:
+               rtn = scsi_try_bus_reset(SCpnt);
+               if (rtn == SUCCESS)
+                       break;
+               /* FALLTHROUGH */
+       case SCSI_TRY_RESET_HOST:
+               rtn = scsi_try_host_reset(SCpnt);
+               break;
+       default:
+               rtn = FAILED;
+       }
+
+       return rtn;
+}
+
+/*
+ * Overrides for Emacs so that we follow Linus's tabbing style.
+ * Emacs will notice this stuff at the end of the file and automatically
+ * adjust the settings for this buffer only.  This must remain at the end
+ * of the file.
+ * ---------------------------------------------------------------------------
+ * Local variables:
+ * c-indent-level: 4
+ * c-brace-imaginary-offset: 0
+ * c-brace-offset: -4
+ * c-argdecl-indent: 4
+ * c-label-offset: -4
+ * c-continued-statement-offset: 4
+ * c-continued-brace-offset: 0
+ * indent-tabs-mode: nil
+ * tab-width: 8
+ * End:
+ */
diff --git a/xen-2.4.16/drivers/scsi/scsi_ioctl.c b/xen-2.4.16/drivers/scsi/scsi_ioctl.c
new file mode 100644 (file)
index 0000000..6f891de
--- /dev/null
@@ -0,0 +1,538 @@
+/*
+ * Changes:
+ * Arnaldo Carvalho de Melo <acme@conectiva.com.br> 08/23/2000
+ * - get rid of some verify_areas and use __copy*user and __get/put_user
+ *   for the ones that remain
+ */
+#define __NO_VERSION__
+#include <linux/module.h>
+
+#include <asm/io.h>
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <asm/page.h>
+
+#include <linux/interrupt.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+
+#include <linux/blk.h>
+#include "scsi.h"
+#include "hosts.h"
+#include <scsi/scsi_ioctl.h>
+
+#define NORMAL_RETRIES                 5
+#define IOCTL_NORMAL_TIMEOUT                   (10 * HZ)
+#define FORMAT_UNIT_TIMEOUT            (2 * 60 * 60 * HZ)
+#define START_STOP_TIMEOUT             (60 * HZ)
+#define MOVE_MEDIUM_TIMEOUT            (5 * 60 * HZ)
+#define READ_ELEMENT_STATUS_TIMEOUT    (5 * 60 * HZ)
+#define READ_DEFECT_DATA_TIMEOUT       (60 * HZ )  /* ZIP-250 on parallel port takes as long! */
+
+#define MAX_BUF PAGE_SIZE
+
+/*
+ * If we are told to probe a host, we will return 0 if  the host is not
+ * present, 1 if the host is present, and will return an identifying
+ * string at *arg, if arg is non null, filling to the length stored at
+ * (int *) arg
+ */
+
+static int ioctl_probe(struct Scsi_Host *host, void *buffer)
+{
+       unsigned int len, slen;
+       const char *string;
+       int temp = host->hostt->present;
+
+       if (temp && buffer) {
+               if (get_user(len, (unsigned int *) buffer))
+                       return -EFAULT;
+
+               if (host->hostt->info)
+                       string = host->hostt->info(host);
+               else
+                       string = host->hostt->name;
+               if (string) {
+                       slen = strlen(string);
+                       if (len > slen)
+                               len = slen + 1;
+                       if (copy_to_user(buffer, string, len))
+                               return -EFAULT;
+               }
+       }
+       return temp;
+}
+
+/*
+
+ * The SCSI_IOCTL_SEND_COMMAND ioctl sends a command out to the SCSI host.
+ * The IOCTL_NORMAL_TIMEOUT and NORMAL_RETRIES  variables are used.  
+ * 
+ * dev is the SCSI device struct ptr, *(int *) arg is the length of the
+ * input data, if any, not including the command string & counts, 
+ * *((int *)arg + 1) is the output buffer size in bytes.
+ * 
+ * *(char *) ((int *) arg)[2] the actual command byte.   
+ * 
+ * Note that if more than MAX_BUF bytes are requested to be transferred,
+ * the ioctl will fail with error EINVAL.  MAX_BUF can be increased in
+ * the future by increasing the size that scsi_malloc will accept.
+ * 
+ * This size *does not* include the initial lengths that were passed.
+ * 
+ * The SCSI command is read from the memory location immediately after the
+ * length words, and the input data is right after the command.  The SCSI
+ * routines know the command size based on the opcode decode.  
+ * 
+ * The output area is then filled in starting from the command byte. 
+ */
+
+static int ioctl_internal_command(Scsi_Device * dev, char *cmd,
+                                 int timeout, int retries)
+{
+       int result;
+       Scsi_Request *SRpnt;
+       Scsi_Device *SDpnt;
+
+
+       SCSI_LOG_IOCTL(1, printk("Trying ioctl with scsi command %d\n", cmd[0]));
+       if (NULL == (SRpnt = scsi_allocate_request(dev))) {
+               printk("SCSI internal ioctl failed, no memory\n");
+               return -ENOMEM;
+       }
+
+       SRpnt->sr_data_direction = SCSI_DATA_NONE;
+        scsi_wait_req(SRpnt, cmd, NULL, 0, timeout, retries);
+
+       SCSI_LOG_IOCTL(2, printk("Ioctl returned  0x%x\n", SRpnt->sr_result));
+
+       if (driver_byte(SRpnt->sr_result) != 0)
+               switch (SRpnt->sr_sense_buffer[2] & 0xf) {
+               case ILLEGAL_REQUEST:
+                       if (cmd[0] == ALLOW_MEDIUM_REMOVAL)
+                               dev->lockable = 0;
+                       else
+                               printk("SCSI device (ioctl) reports ILLEGAL REQUEST.\n");
+                       break;
+               case NOT_READY: /* This happens if there is no disc in drive */
+                       if (dev->removable && (cmd[0] != TEST_UNIT_READY)) {
+                               printk(KERN_INFO "Device not ready.  Make sure there is a disc in the drive.\n");
+                               break;
+                       }
+               case UNIT_ATTENTION:
+                       if (dev->removable) {
+                               dev->changed = 1;
+                               SRpnt->sr_result = 0;   /* This is no longer considered an error */
+                               /* gag this error, VFS will log it anyway /axboe */
+                               /* printk(KERN_INFO "Disc change detected.\n"); */
+                               break;
+                       };
+               default:        /* Fall through for non-removable media */
+                       printk("SCSI error: host %d id %d lun %d return code = %x\n",
+                              dev->host->host_no,
+                              dev->id,
+                              dev->lun,
+                              SRpnt->sr_result);
+                       printk("\tSense class %x, sense error %x, extended sense %x\n",
+                              sense_class(SRpnt->sr_sense_buffer[0]),
+                              sense_error(SRpnt->sr_sense_buffer[0]),
+                              SRpnt->sr_sense_buffer[2] & 0xf);
+
+               };
+
+       result = SRpnt->sr_result;
+
+       SCSI_LOG_IOCTL(2, printk("IOCTL Releasing command\n"));
+       SDpnt = SRpnt->sr_device;
+       scsi_release_request(SRpnt);
+       SRpnt = NULL;
+
+       return result;
+}
+
+/*
+ * This interface is depreciated - users should use the scsi generic (sg)
+ * interface instead, as this is a more flexible approach to performing
+ * generic SCSI commands on a device.
+ *
+ * The structure that we are passed should look like:
+ *
+ * struct sdata {
+ *  unsigned int inlen;      [i] Length of data to be written to device 
+ *  unsigned int outlen;     [i] Length of data to be read from device 
+ *  unsigned char cmd[x];    [i] SCSI command (6 <= x <= 12).
+ *                           [o] Data read from device starts here.
+ *                           [o] On error, sense buffer starts here.
+ *  unsigned char wdata[y];  [i] Data written to device starts here.
+ * };
+ * Notes:
+ *   -  The SCSI command length is determined by examining the 1st byte
+ *      of the given command. There is no way to override this.
+ *   -  Data transfers are limited to PAGE_SIZE (4K on i386, 8K on alpha).
+ *   -  The length (x + y) must be at least OMAX_SB_LEN bytes long to
+ *      accomodate the sense buffer when an error occurs.
+ *      The sense buffer is truncated to OMAX_SB_LEN (16) bytes so that
+ *      old code will not be surprised.
+ *   -  If a Unix error occurs (e.g. ENOMEM) then the user will receive
+ *      a negative return and the Unix error code in 'errno'. 
+ *      If the SCSI command succeeds then 0 is returned.
+ *      Positive numbers returned are the compacted SCSI error codes (4 
+ *      bytes in one int) where the lowest byte is the SCSI status.
+ *      See the drivers/scsi/scsi.h file for more information on this.
+ *
+ */
+#define OMAX_SB_LEN 16         /* Old sense buffer length */
+
+int scsi_ioctl_send_command(Scsi_Device * dev, Scsi_Ioctl_Command * sic)
+{
+       char *buf;
+       unsigned char cmd[MAX_COMMAND_SIZE];
+       char *cmd_in;
+       Scsi_Request *SRpnt;
+       Scsi_Device *SDpnt;
+       unsigned char opcode;
+       unsigned int inlen, outlen, cmdlen;
+       unsigned int needed, buf_needed;
+       int timeout, retries, result;
+       int data_direction;
+
+       if (!sic)
+               return -EINVAL;
+       /*
+        * Verify that we can read at least this much.
+        */
+       if (verify_area(VERIFY_READ, sic, sizeof(Scsi_Ioctl_Command)))
+               return -EFAULT;
+
+       if(__get_user(inlen, &sic->inlen))
+               return -EFAULT;
+               
+       if(__get_user(outlen, &sic->outlen))
+               return -EFAULT;
+
+       /*
+        * We do not transfer more than MAX_BUF with this interface.
+        * If the user needs to transfer more data than this, they
+        * should use scsi_generics (sg) instead.
+        */
+       if (inlen > MAX_BUF)
+               return -EINVAL;
+       if (outlen > MAX_BUF)
+               return -EINVAL;
+
+       cmd_in = sic->data;
+       if(get_user(opcode, cmd_in))
+               return -EFAULT;
+
+       needed = buf_needed = (inlen > outlen ? inlen : outlen);
+       if (buf_needed) {
+               buf_needed = (buf_needed + 511) & ~511;
+               if (buf_needed > MAX_BUF)
+                       buf_needed = MAX_BUF;
+               buf = (char *) scsi_malloc(buf_needed);
+               if (!buf)
+                       return -ENOMEM;
+               memset(buf, 0, buf_needed);
+               if( inlen == 0 ) {
+                       data_direction = SCSI_DATA_READ;
+               } else if (outlen == 0 ) {
+                       data_direction = SCSI_DATA_WRITE;
+               } else {
+                       /*
+                        * Can this ever happen?
+                        */
+                       data_direction = SCSI_DATA_UNKNOWN;
+               }
+
+       } else {
+               buf = NULL;
+               data_direction = SCSI_DATA_NONE;
+       }
+
+       /*
+        * Obtain the command from the user's address space.
+        */
+       cmdlen = COMMAND_SIZE(opcode);
+       
+       result = -EFAULT;
+
+       if (verify_area(VERIFY_READ, cmd_in, cmdlen + inlen))
+               goto error;
+
+       if(__copy_from_user(cmd, cmd_in, cmdlen))
+               goto error;
+
+       /*
+        * Obtain the data to be sent to the device (if any).
+        */
+
+       if(copy_from_user(buf, cmd_in + cmdlen, inlen))
+               goto error;
+
+       /*
+        * Set the lun field to the correct value.
+        */
+       if (dev->scsi_level <= SCSI_2)
+               cmd[1] = (cmd[1] & 0x1f) | (dev->lun << 5);
+
+       switch (opcode) {
+       case FORMAT_UNIT:
+               timeout = FORMAT_UNIT_TIMEOUT;
+               retries = 1;
+               break;
+       case START_STOP:
+               timeout = START_STOP_TIMEOUT;
+               retries = NORMAL_RETRIES;
+               break;
+       case MOVE_MEDIUM:
+               timeout = MOVE_MEDIUM_TIMEOUT;
+               retries = NORMAL_RETRIES;
+               break;
+       case READ_ELEMENT_STATUS:
+               timeout = READ_ELEMENT_STATUS_TIMEOUT;
+               retries = NORMAL_RETRIES;
+               break;
+       case READ_DEFECT_DATA:
+               timeout = READ_DEFECT_DATA_TIMEOUT;
+               retries = 1;
+               break;
+       default:
+               timeout = IOCTL_NORMAL_TIMEOUT;
+               retries = NORMAL_RETRIES;
+               break;
+       }
+
+#ifndef DEBUG_NO_CMD
+
+
+       SRpnt = scsi_allocate_request(dev);
+        if( SRpnt == NULL )
+        {
+                result = -EINTR;
+                goto error;
+        }
+
+       SRpnt->sr_data_direction = data_direction;
+        scsi_wait_req(SRpnt, cmd, buf, needed, timeout, retries);
+
+       /* 
+        * If there was an error condition, pass the info back to the user. 
+        */
+
+       result = SRpnt->sr_result;
+
+       if (SRpnt->sr_result) {
+               int sb_len = sizeof(SRpnt->sr_sense_buffer);
+
+               sb_len = (sb_len > OMAX_SB_LEN) ? OMAX_SB_LEN : sb_len;
+               if (copy_to_user(cmd_in, SRpnt->sr_sense_buffer, sb_len))
+                       result = -EFAULT;
+       } else {
+               if (copy_to_user(cmd_in, buf, outlen))
+                       result = -EFAULT;
+       }       
+
+       SDpnt = SRpnt->sr_device;
+       scsi_release_request(SRpnt);
+       SRpnt = NULL;
+
+error:
+       if (buf)
+               scsi_free(buf, buf_needed);
+
+
+       return result;
+#else
+       {
+               int i;
+               printk("scsi_ioctl : device %d.  command = ", dev->id);
+               for (i = 0; i < cmdlen; ++i)
+                       printk("%02x ", cmd[i]);
+               printk("\nbuffer =");
+               for (i = 0; i < 20; ++i)
+                       printk("%02x ", buf[i]);
+               printk("\n");
+               printk("inlen = %d, outlen = %d, cmdlen = %d\n",
+                      inlen, outlen, cmdlen);
+               printk("buffer = %d, cmd_in = %d\n", buffer, cmd_in);
+       }
+       return 0;
+#endif
+}
+
+/*
+ * The scsi_ioctl_get_pci() function places into arg the value
+ * pci_dev::slot_name (8 characters) for the PCI device (if any).
+ * Returns: 0 on success
+ *          -ENXIO if there isn't a PCI device pointer
+ *                 (could be because the SCSI driver hasn't been
+ *                  updated yet, or because it isn't a SCSI
+ *                  device)
+ *          any copy_to_user() error on failure there
+ */
+static int
+scsi_ioctl_get_pci(Scsi_Device * dev, void *arg)
+{
+
+        if (!dev->host->pci_dev)
+               return -ENXIO;
+        if(copy_to_user(arg, dev->host->pci_dev->slot_name,
+                            sizeof(dev->host->pci_dev->slot_name)))
+               return -EFAULT;
+       return 0;
+}
+
+
+/*
+ * the scsi_ioctl() function differs from most ioctls in that it does
+ * not take a major/minor number as the dev field.  Rather, it takes
+ * a pointer to a scsi_devices[] element, a structure. 
+ */
+int scsi_ioctl(Scsi_Device * dev, int cmd, void *arg)
+{
+       char scsi_cmd[MAX_COMMAND_SIZE];
+       char cmd_byte1;
+
+       /* No idea how this happens.... */
+       if (!dev)
+               return -ENXIO;
+
+       /*
+        * If we are in the middle of error recovery, don't let anyone
+        * else try and use this device.  Also, if error recovery fails, it
+        * may try and take the device offline, in which case all further
+        * access to the device is prohibited.
+        */
+       if (!scsi_block_when_processing_errors(dev)) {
+               return -ENODEV;
+       }
+       cmd_byte1 = (dev->scsi_level <= SCSI_2) ? (dev->lun << 5) : 0;
+
+       switch (cmd) {
+       case SCSI_IOCTL_GET_IDLUN:
+               if (verify_area(VERIFY_WRITE, arg, sizeof(Scsi_Idlun)))
+                       return -EFAULT;
+
+               __put_user((dev->id & 0xff)
+                        + ((dev->lun & 0xff) << 8)
+                        + ((dev->channel & 0xff) << 16)
+                        + ((dev->host->host_no & 0xff) << 24),
+                        &((Scsi_Idlun *) arg)->dev_id);
+               __put_user(dev->host->unique_id, &((Scsi_Idlun *) arg)->host_unique_id);
+               return 0;
+       case SCSI_IOCTL_GET_BUS_NUMBER:
+               return put_user(dev->host->host_no, (int *) arg);
+       case SCSI_IOCTL_TAGGED_ENABLE:
+               if (!capable(CAP_SYS_ADMIN))
+                       return -EACCES;
+               if (!dev->tagged_supported)
+                       return -EINVAL;
+               dev->tagged_queue = 1;
+               dev->current_tag = 1;
+               return 0;
+       case SCSI_IOCTL_TAGGED_DISABLE:
+               if (!capable(CAP_SYS_ADMIN))
+                       return -EACCES;
+               if (!dev->tagged_supported)
+                       return -EINVAL;
+               dev->tagged_queue = 0;
+               dev->current_tag = 0;
+               return 0;
+       case SCSI_IOCTL_PROBE_HOST:
+               return ioctl_probe(dev->host, arg);
+       case SCSI_IOCTL_SEND_COMMAND:
+               if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RAWIO))
+                       return -EACCES;
+               return scsi_ioctl_send_command((Scsi_Device *) dev,
+                                            (Scsi_Ioctl_Command *) arg);
+       case SCSI_IOCTL_DOORLOCK:
+               if (!dev->removable || !dev->lockable)
+                       return 0;
+               scsi_cmd[0] = ALLOW_MEDIUM_REMOVAL;
+               scsi_cmd[1] = cmd_byte1;
+               scsi_cmd[2] = scsi_cmd[3] = scsi_cmd[5] = 0;
+               scsi_cmd[4] = SCSI_REMOVAL_PREVENT;
+               return ioctl_internal_command((Scsi_Device *) dev, scsi_cmd,
+                                  IOCTL_NORMAL_TIMEOUT, NORMAL_RETRIES);
+               break;
+       case SCSI_IOCTL_DOORUNLOCK:
+               if (!dev->removable || !dev->lockable)
+                       return 0;
+               scsi_cmd[0] = ALLOW_MEDIUM_REMOVAL;
+               scsi_cmd[1] = cmd_byte1;
+               scsi_cmd[2] = scsi_cmd[3] = scsi_cmd[5] = 0;
+               scsi_cmd[4] = SCSI_REMOVAL_ALLOW;
+               return ioctl_internal_command((Scsi_Device *) dev, scsi_cmd,
+                                  IOCTL_NORMAL_TIMEOUT, NORMAL_RETRIES);
+       case SCSI_IOCTL_TEST_UNIT_READY:
+               scsi_cmd[0] = TEST_UNIT_READY;
+               scsi_cmd[1] = cmd_byte1;
+               scsi_cmd[2] = scsi_cmd[3] = scsi_cmd[5] = 0;
+               scsi_cmd[4] = 0;
+               return ioctl_internal_command((Scsi_Device *) dev, scsi_cmd,
+                                  IOCTL_NORMAL_TIMEOUT, NORMAL_RETRIES);
+               break;
+       case SCSI_IOCTL_START_UNIT:
+               scsi_cmd[0] = START_STOP;
+               scsi_cmd[1] = cmd_byte1;
+               scsi_cmd[2] = scsi_cmd[3] = scsi_cmd[5] = 0;
+               scsi_cmd[4] = 1;
+               return ioctl_internal_command((Scsi_Device *) dev, scsi_cmd,
+                                    START_STOP_TIMEOUT, NORMAL_RETRIES);
+               break;
+       case SCSI_IOCTL_STOP_UNIT:
+               scsi_cmd[0] = START_STOP;
+               scsi_cmd[1] = cmd_byte1;
+               scsi_cmd[2] = scsi_cmd[3] = scsi_cmd[5] = 0;
+               scsi_cmd[4] = 0;
+               return ioctl_internal_command((Scsi_Device *) dev, scsi_cmd,
+                                    START_STOP_TIMEOUT, NORMAL_RETRIES);
+               break;
+        case SCSI_IOCTL_GET_PCI:
+                return scsi_ioctl_get_pci(dev, arg);
+                break;
+       default:
+               if (dev->host->hostt->ioctl)
+                       return dev->host->hostt->ioctl(dev, cmd, arg);
+               return -EINVAL;
+       }
+       return -EINVAL;
+}
+
+/*
+ * Just like scsi_ioctl, only callable from kernel space with no 
+ * fs segment fiddling.
+ */
+
+int kernel_scsi_ioctl(Scsi_Device * dev, int cmd, void *arg)
+{
+       mm_segment_t oldfs;
+       int tmp;
+       oldfs = get_fs();
+       set_fs(get_ds());
+       tmp = scsi_ioctl(dev, cmd, arg);
+       set_fs(oldfs);
+       return tmp;
+}
+
+/*
+ * Overrides for Emacs so that we almost follow Linus's tabbing style.
+ * Emacs will notice this stuff at the end of the file and automatically
+ * adjust the settings for this buffer only.  This must remain at the end
+ * of the file.
+ * ---------------------------------------------------------------------------
+ * Local variables:
+ * c-indent-level: 4
+ * c-brace-imaginary-offset: 0
+ * c-brace-offset: -4
+ * c-argdecl-indent: 4
+ * c-label-offset: -4
+ * c-continued-statement-offset: 4
+ * c-continued-brace-offset: 0
+ * indent-tabs-mode: nil
+ * tab-width: 8
+ * End:
+ */
diff --git a/xen-2.4.16/drivers/scsi/scsi_lib.c b/xen-2.4.16/drivers/scsi/scsi_lib.c
new file mode 100644 (file)
index 0000000..53c1092
--- /dev/null
@@ -0,0 +1,1197 @@
+/*
+ *  scsi_lib.c Copyright (C) 1999 Eric Youngdale
+ *
+ *  SCSI queueing library.
+ *      Initial versions: Eric Youngdale (eric@andante.org).
+ *                        Based upon conversations with large numbers
+ *                        of people at Linux Expo.
+ */
+
+/*
+ * The fundamental purpose of this file is to contain a library of utility
+ * routines that can be used by low-level drivers.   Ultimately the idea
+ * is that there should be a sufficiently rich number of functions that it
+ * would be possible for a driver author to fashion a queueing function for
+ * a low-level driver if they wished.   Note however that this file also
+ * contains the "default" versions of these functions, as we don't want to
+ * go through and retrofit queueing functions into all 30 some-odd drivers.
+ */
+
+#define __NO_VERSION__
+#include <linux/module.h>
+
+#include <linux/sched.h>
+#include <linux/timer.h>
+#include <linux/string.h>
+#include <linux/slab.h>
+#include <linux/ioport.h>
+#include <linux/kernel.h>
+#include <linux/stat.h>
+#include <linux/blk.h>
+#include <linux/interrupt.h>
+#include <linux/delay.h>
+#include <linux/smp_lock.h>
+#include <linux/completion.h>
+
+
+#define __KERNEL_SYSCALLS__
+
+#include <linux/unistd.h>
+
+#include <asm/system.h>
+#include <asm/irq.h>
+#include <asm/dma.h>
+
+#include "scsi.h"
+#include "hosts.h"
+#include "constants.h"
+#include <scsi/scsi_ioctl.h>
+
+/*
+ * This entire source file deals with the new queueing code.
+ */
+
+/*
+ * Function:   __scsi_insert_special()
+ *
+ * Purpose:    worker for scsi_insert_special_*()
+ *
+ * Arguments:  q - request queue where request should be inserted
+ *             rq - request to be inserted
+ *             data - private data
+ *             at_head - insert request at head or tail of queue
+ *
+ * Lock status:        Assumed that io_request_lock is not held upon entry.
+ *
+ * Returns:    Nothing
+ */
+static void __scsi_insert_special(request_queue_t *q, struct request *rq,
+                                 void *data, int at_head)
+{
+       unsigned long flags;
+
+       ASSERT_LOCK(&io_request_lock, 0);
+
+       rq->cmd = SPECIAL;
+       rq->special = data;
+       rq->q = NULL;
+       rq->nr_segments = 0;
+       rq->elevator_sequence = 0;
+
+       /*
+        * We have the option of inserting the head or the tail of the queue.
+        * Typically we use the tail for new ioctls and so forth.  We use the
+        * head of the queue for things like a QUEUE_FULL message from a
+        * device, or a host that is unable to accept a particular command.
+        */
+       spin_lock_irqsave(&io_request_lock, flags);
+
+       if (at_head)
+               list_add(&rq->queue, &q->queue_head);
+       else
+               list_add_tail(&rq->queue, &q->queue_head);
+
+       q->request_fn(q);
+       spin_unlock_irqrestore(&io_request_lock, flags);
+}
+
+
+/*
+ * Function:    scsi_insert_special_cmd()
+ *
+ * Purpose:     Insert pre-formed command into request queue.
+ *
+ * Arguments:   SCpnt   - command that is ready to be queued.
+ *              at_head - boolean.  True if we should insert at head
+ *                        of queue, false if we should insert at tail.
+ *
+ * Lock status: Assumed that lock is not held upon entry.
+ *
+ * Returns:     Nothing
+ *
+ * Notes:       This function is called from character device and from
+ *              ioctl types of functions where the caller knows exactly
+ *              what SCSI command needs to be issued.   The idea is that
+ *              we merely inject the command into the queue (at the head
+ *              for now), and then call the queue request function to actually
+ *              process it.
+ */
+int scsi_insert_special_cmd(Scsi_Cmnd * SCpnt, int at_head)
+{
+       request_queue_t *q = &SCpnt->device->request_queue;
+
+       __scsi_insert_special(q, &SCpnt->request, SCpnt, at_head);
+       return 0;
+}
+
+/*
+ * Function:    scsi_insert_special_req()
+ *
+ * Purpose:     Insert pre-formed request into request queue.
+ *
+ * Arguments:   SRpnt   - request that is ready to be queued.
+ *              at_head - boolean.  True if we should insert at head
+ *                        of queue, false if we should insert at tail.
+ *
+ * Lock status: Assumed that lock is not held upon entry.
+ *
+ * Returns:     Nothing
+ *
+ * Notes:       This function is called from character device and from
+ *              ioctl types of functions where the caller knows exactly
+ *              what SCSI command needs to be issued.   The idea is that
+ *              we merely inject the command into the queue (at the head
+ *              for now), and then call the queue request function to actually
+ *              process it.
+ */
+int scsi_insert_special_req(Scsi_Request * SRpnt, int at_head)
+{
+       request_queue_t *q = &SRpnt->sr_device->request_queue;
+
+       __scsi_insert_special(q, &SRpnt->sr_request, SRpnt, at_head);
+       return 0;
+}
+
+/*
+ * Function:    scsi_init_cmd_errh()
+ *
+ * Purpose:     Initialize SCpnt fields related to error handling.
+ *
+ * Arguments:   SCpnt   - command that is ready to be queued.
+ *
+ * Returns:     Nothing
+ *
+ * Notes:       This function has the job of initializing a number of
+ *              fields related to error handling.   Typically this will
+ *              be called once for each command, as required.
+ */
+int scsi_init_cmd_errh(Scsi_Cmnd * SCpnt)
+{
+       ASSERT_LOCK(&io_request_lock, 0);
+
+       SCpnt->owner = SCSI_OWNER_MIDLEVEL;
+       SCpnt->reset_chain = NULL;
+       SCpnt->serial_number = 0;
+       SCpnt->serial_number_at_timeout = 0;
+       SCpnt->flags = 0;
+       SCpnt->retries = 0;
+
+       SCpnt->abort_reason = 0;
+
+       memset((void *) SCpnt->sense_buffer, 0, sizeof SCpnt->sense_buffer);
+
+       if (SCpnt->cmd_len == 0)
+               SCpnt->cmd_len = COMMAND_SIZE(SCpnt->cmnd[0]);
+
+       /*
+        * We need saved copies of a number of fields - this is because
+        * error handling may need to overwrite these with different values
+        * to run different commands, and once error handling is complete,
+        * we will need to restore these values prior to running the actual
+        * command.
+        */
+       SCpnt->old_use_sg = SCpnt->use_sg;
+       SCpnt->old_cmd_len = SCpnt->cmd_len;
+       SCpnt->sc_old_data_direction = SCpnt->sc_data_direction;
+       SCpnt->old_underflow = SCpnt->underflow;
+       memcpy((void *) SCpnt->data_cmnd,
+              (const void *) SCpnt->cmnd, sizeof(SCpnt->cmnd));
+       SCpnt->buffer = SCpnt->request_buffer;
+       SCpnt->bufflen = SCpnt->request_bufflen;
+
+       SCpnt->reset_chain = NULL;
+
+       SCpnt->internal_timeout = NORMAL_TIMEOUT;
+       SCpnt->abort_reason = 0;
+
+       return 1;
+}
+
+/*
+ * Function:    scsi_queue_next_request()
+ *
+ * Purpose:     Handle post-processing of completed commands.
+ *
+ * Arguments:   SCpnt   - command that may need to be requeued.
+ *
+ * Returns:     Nothing
+ *
+ * Notes:       After command completion, there may be blocks left
+ *              over which weren't finished by the previous command
+ *              this can be for a number of reasons - the main one is
+ *              that a medium error occurred, and the sectors after
+ *              the bad block need to be re-read.
+ *
+ *              If SCpnt is NULL, it means that the previous command
+ *              was completely finished, and we should simply start
+ *              a new command, if possible.
+ *
+ *             This is where a lot of special case code has begun to
+ *             accumulate.  It doesn't really affect readability or
+ *             anything, but it might be considered architecturally
+ *             inelegant.  If more of these special cases start to
+ *             accumulate, I am thinking along the lines of implementing
+ *             an atexit() like technology that gets run when commands
+ *             complete.  I am not convinced that it is worth the
+ *             added overhead, however.  Right now as things stand,
+ *             there are simple conditional checks, and most hosts
+ *             would skip past.
+ *
+ *             Another possible solution would be to tailor different
+ *             handler functions, sort of like what we did in scsi_merge.c.
+ *             This is probably a better solution, but the number of different
+ *             permutations grows as 2**N, and if too many more special cases
+ *             get added, we start to get screwed.
+ */
+void scsi_queue_next_request(request_queue_t * q, Scsi_Cmnd * SCpnt)
+{
+       int all_clear;
+       unsigned long flags;
+       Scsi_Device *SDpnt;
+       struct Scsi_Host *SHpnt;
+
+       ASSERT_LOCK(&io_request_lock, 0);
+
+       spin_lock_irqsave(&io_request_lock, flags);
+       if (SCpnt != NULL) {
+
+               /*
+                * For some reason, we are not done with this request.
+                * This happens for I/O errors in the middle of the request,
+                * in which case we need to request the blocks that come after
+                * the bad sector.
+                */
+               SCpnt->request.special = (void *) SCpnt;
+               list_add(&SCpnt->request.queue, &q->queue_head);
+       }
+
+       /*
+        * Just hit the requeue function for the queue.
+        */
+       q->request_fn(q);
+
+       SDpnt = (Scsi_Device *) q->queuedata;
+       SHpnt = SDpnt->host;
+
+       /*
+        * If this is a single-lun device, and we are currently finished
+        * with this device, then see if we need to get another device
+        * started.  FIXME(eric) - if this function gets too cluttered
+        * with special case code, then spin off separate versions and
+        * use function pointers to pick the right one.
+        */
+       if (SDpnt->single_lun
+           && list_empty(&q->queue_head)
+           && SDpnt->device_busy == 0) {
+               request_queue_t *q;
+
+               for (SDpnt = SHpnt->host_queue;
+                    SDpnt;
+                    SDpnt = SDpnt->next) {
+                       if (((SHpnt->can_queue > 0)
+                            && (SHpnt->host_busy >= SHpnt->can_queue))
+                           || (SHpnt->host_blocked)
+                           || (SHpnt->host_self_blocked)
+                           || (SDpnt->device_blocked)) {
+                               break;
+                       }
+                       q = &SDpnt->request_queue;
+                       q->request_fn(q);
+               }
+       }
+
+       /*
+        * Now see whether there are other devices on the bus which
+        * might be starved.  If so, hit the request function.  If we
+        * don't find any, then it is safe to reset the flag.  If we
+        * find any device that it is starved, it isn't safe to reset the
+        * flag as the queue function releases the lock and thus some
+        * other device might have become starved along the way.
+        */
+       all_clear = 1;
+       if (SHpnt->some_device_starved) {
+               for (SDpnt = SHpnt->host_queue; SDpnt; SDpnt = SDpnt->next) {
+                       request_queue_t *q;
+                       if ((SHpnt->can_queue > 0 && (SHpnt->host_busy >= SHpnt->can_queue))
+                           || (SHpnt->host_blocked) 
+                           || (SHpnt->host_self_blocked)) {
+                               break;
+                       }
+                       if (SDpnt->device_blocked || !SDpnt->starved) {
+                               continue;
+                       }
+                       q = &SDpnt->request_queue;
+                       q->request_fn(q);
+                       all_clear = 0;
+               }
+               if (SDpnt == NULL && all_clear) {
+                       SHpnt->some_device_starved = 0;
+               }
+       }
+       spin_unlock_irqrestore(&io_request_lock, flags);
+}
+
+/*
+ * Function:    scsi_end_request()
+ *
+ * Purpose:     Post-processing of completed commands called from interrupt
+ *              handler or a bottom-half handler.
+ *
+ * Arguments:   SCpnt    - command that is complete.
+ *              uptodate - 1 if I/O indicates success, 0 for I/O error.
+ *              sectors  - number of sectors we want to mark.
+ *             requeue  - indicates whether we should requeue leftovers.
+ *             frequeue - indicates that if we release the command block
+ *                        that the queue request function should be called.
+ *
+ * Lock status: Assumed that lock is not held upon entry.
+ *
+ * Returns:     Nothing
+ *
+ * Notes:       This is called for block device requests in order to
+ *              mark some number of sectors as complete.
+ * 
+ *             We are guaranteeing that the request queue will be goosed
+ *             at some point during this call.
+ */
+static Scsi_Cmnd *__scsi_end_request(Scsi_Cmnd * SCpnt, 
+                                    int uptodate, 
+                                    int sectors,
+                                    int requeue,
+                                    int frequeue)
+{
+       struct request *req;
+       struct buffer_head *bh;
+        Scsi_Device * SDpnt;
+       int nsect;
+
+       ASSERT_LOCK(&io_request_lock, 0);
+
+       req = &SCpnt->request;
+       req->errors = 0;
+       if (!uptodate) {
+               printk(" I/O error: dev %s, sector %lu\n",
+                      kdevname(req->rq_dev), req->sector);
+       }
+       do {
+               if ((bh = req->bh) != NULL) {
+                       nsect = bh->b_size >> 9;
+                       blk_finished_io(nsect);
+                       req->bh = bh->b_reqnext;
+                       bh->b_reqnext = NULL;
+                       sectors -= nsect;
+                       bh->b_end_io(bh, uptodate);
+                       if ((bh = req->bh) != NULL) {
+                               req->hard_sector += nsect;
+                               req->hard_nr_sectors -= nsect;
+                               req->sector += nsect;
+                               req->nr_sectors -= nsect;
+
+                               req->current_nr_sectors = bh->b_size >> 9;
+                               if (req->nr_sectors < req->current_nr_sectors) {
+                                       req->nr_sectors = req->current_nr_sectors;
+                                       printk("scsi_end_request: buffer-list destroyed\n");
+                               }
+                       }
+               }
+       } while (sectors && bh);
+
+       /*
+        * If there are blocks left over at the end, set up the command
+        * to queue the remainder of them.
+        */
+       if (req->bh) {
+                request_queue_t *q;
+
+               if( !requeue )
+               {
+                       return SCpnt;
+               }
+
+                q = &SCpnt->device->request_queue;
+
+               req->buffer = bh->b_data;
+               /*
+                * Bleah.  Leftovers again.  Stick the leftovers in
+                * the front of the queue, and goose the queue again.
+                */
+               scsi_queue_next_request(q, SCpnt);
+               return SCpnt;
+       }
+       /*
+        * This request is done.  If there is someone blocked waiting for this
+        * request, wake them up.  Typically used to wake up processes trying
+        * to swap a page into memory.
+        */
+       if (req->waiting != NULL) {
+               complete(req->waiting);
+       }
+       req_finished_io(req);
+       add_blkdev_randomness(MAJOR(req->rq_dev));
+
+        SDpnt = SCpnt->device;
+
+       /*
+        * This will goose the queue request function at the end, so we don't
+        * need to worry about launching another command.
+        */
+       __scsi_release_command(SCpnt);
+
+       if( frequeue ) {
+               request_queue_t *q;
+
+               q = &SDpnt->request_queue;
+               scsi_queue_next_request(q, NULL);                
+       }
+       return NULL;
+}
+
+/*
+ * Function:    scsi_end_request()
+ *
+ * Purpose:     Post-processing of completed commands called from interrupt
+ *              handler or a bottom-half handler.
+ *
+ * Arguments:   SCpnt    - command that is complete.
+ *              uptodate - 1 if I/O indicates success, 0 for I/O error.
+ *              sectors  - number of sectors we want to mark.
+ *
+ * Lock status: Assumed that lock is not held upon entry.
+ *
+ * Returns:     Nothing
+ *
+ * Notes:       This is called for block device requests in order to
+ *              mark some number of sectors as complete.
+ * 
+ *             We are guaranteeing that the request queue will be goosed
+ *             at some point during this call.
+ */
+Scsi_Cmnd *scsi_end_request(Scsi_Cmnd * SCpnt, int uptodate, int sectors)
+{
+       return __scsi_end_request(SCpnt, uptodate, sectors, 1, 1);
+}
+
+/*
+ * Function:    scsi_release_buffers()
+ *
+ * Purpose:     Completion processing for block device I/O requests.
+ *
+ * Arguments:   SCpnt   - command that we are bailing.
+ *
+ * Lock status: Assumed that no lock is held upon entry.
+ *
+ * Returns:     Nothing
+ *
+ * Notes:       In the event that an upper level driver rejects a
+ *             command, we must release resources allocated during
+ *             the __init_io() function.  Primarily this would involve
+ *             the scatter-gather table, and potentially any bounce
+ *             buffers.
+ */
+static void scsi_release_buffers(Scsi_Cmnd * SCpnt)
+{
+       ASSERT_LOCK(&io_request_lock, 0);
+
+       /*
+        * Free up any indirection buffers we allocated for DMA purposes. 
+        */
+       if (SCpnt->use_sg) {
+               struct scatterlist *sgpnt;
+               void **bbpnt;
+               int i;
+
+               sgpnt = (struct scatterlist *) SCpnt->request_buffer;
+               bbpnt = SCpnt->bounce_buffers;
+
+               if (bbpnt) {
+                       for (i = 0; i < SCpnt->use_sg; i++) {
+                               if (bbpnt[i])
+                                       scsi_free(sgpnt[i].address, sgpnt[i].length);
+                       }
+               }
+               scsi_free(SCpnt->request_buffer, SCpnt->sglist_len);
+       } else {
+               if (SCpnt->request_buffer != SCpnt->request.buffer) {
+                       scsi_free(SCpnt->request_buffer, SCpnt->request_bufflen);
+               }
+       }
+
+       /*
+        * Zero these out.  They now point to freed memory, and it is
+        * dangerous to hang onto the pointers.
+        */
+       SCpnt->buffer  = NULL;
+       SCpnt->bufflen = 0;
+       SCpnt->request_buffer = NULL;
+       SCpnt->request_bufflen = 0;
+}
+
+/*
+ * Function:    scsi_io_completion()
+ *
+ * Purpose:     Completion processing for block device I/O requests.
+ *
+ * Arguments:   SCpnt   - command that is finished.
+ *
+ * Lock status: Assumed that no lock is held upon entry.
+ *
+ * Returns:     Nothing
+ *
+ * Notes:       This function is matched in terms of capabilities to
+ *              the function that created the scatter-gather list.
+ *              In other words, if there are no bounce buffers
+ *              (the normal case for most drivers), we don't need
+ *              the logic to deal with cleaning up afterwards.
+ */
+void scsi_io_completion(Scsi_Cmnd * SCpnt, int good_sectors,
+                       int block_sectors)
+{
+       int result = SCpnt->result;
+       int this_count = SCpnt->bufflen >> 9;
+       request_queue_t *q = &SCpnt->device->request_queue;
+
+       /*
+        * We must do one of several things here:
+        *
+        *      Call scsi_end_request.  This will finish off the specified
+        *      number of sectors.  If we are done, the command block will
+        *      be released, and the queue function will be goosed.  If we
+        *      are not done, then scsi_end_request will directly goose
+        *      the queue.
+        *
+        *      We can just use scsi_queue_next_request() here.  This
+        *      would be used if we just wanted to retry, for example.
+        *
+        */
+       ASSERT_LOCK(&io_request_lock, 0);
+
+       /*
+        * Free up any indirection buffers we allocated for DMA purposes. 
+        * For the case of a READ, we need to copy the data out of the
+        * bounce buffer and into the real buffer.
+        */
+       if (SCpnt->use_sg) {
+               struct scatterlist *sgpnt;
+               void **bbpnt;
+               int i;
+
+               sgpnt = (struct scatterlist *) SCpnt->buffer;
+               bbpnt = SCpnt->bounce_buffers;
+
+               if (bbpnt) {
+                       for (i = 0; i < SCpnt->use_sg; i++) {
+                               if (bbpnt[i]) {
+                                       if (SCpnt->request.cmd == READ) {
+                                               memcpy(bbpnt[i],
+                                                      sgpnt[i].address,
+                                                      sgpnt[i].length);
+                                       }
+                                       scsi_free(sgpnt[i].address, sgpnt[i].length);
+                               }
+                       }
+               }
+               scsi_free(SCpnt->buffer, SCpnt->sglist_len);
+       } else {
+               if (SCpnt->buffer != SCpnt->request.buffer) {
+                       if (SCpnt->request.cmd == READ) {
+                               memcpy(SCpnt->request.buffer, SCpnt->buffer,
+                                      SCpnt->bufflen);
+                       }
+                       scsi_free(SCpnt->buffer, SCpnt->bufflen);
+               }
+       }
+
+       /*
+        * Zero these out.  They now point to freed memory, and it is
+        * dangerous to hang onto the pointers.
+        */
+       SCpnt->buffer  = NULL;
+       SCpnt->bufflen = 0;
+       SCpnt->request_buffer = NULL;
+       SCpnt->request_bufflen = 0;
+
+       /*
+        * Next deal with any sectors which we were able to correctly
+        * handle.
+        */
+       if (good_sectors > 0) {
+               SCSI_LOG_HLCOMPLETE(1, printk("%ld sectors total, %d sectors done.\n",
+                                             SCpnt->request.nr_sectors,
+                                             good_sectors));
+               SCSI_LOG_HLCOMPLETE(1, printk("use_sg is %d\n ", SCpnt->use_sg));
+
+               SCpnt->request.errors = 0;
+               /*
+                * If multiple sectors are requested in one buffer, then
+                * they will have been finished off by the first command.
+                * If not, then we have a multi-buffer command.
+                *
+                * If block_sectors != 0, it means we had a medium error
+                * of some sort, and that we want to mark some number of
+                * sectors as not uptodate.  Thus we want to inhibit
+                * requeueing right here - we will requeue down below
+                * when we handle the bad sectors.
+                */
+               SCpnt = __scsi_end_request(SCpnt, 
+                                          1, 
+                                          good_sectors,
+                                          result == 0,
+                                          1);
+
+               /*
+                * If the command completed without error, then either finish off the
+                * rest of the command, or start a new one.
+                */
+               if (result == 0 || SCpnt == NULL ) {
+                       return;
+               }
+       }
+       /*
+        * Now, if we were good little boys and girls, Santa left us a request
+        * sense buffer.  We can extract information from this, so we
+        * can choose a block to remap, etc.
+        */
+       if (driver_byte(result) != 0) {
+               if (suggestion(result) == SUGGEST_REMAP) {
+#ifdef REMAP
+                       /*
+                        * Not yet implemented.  A read will fail after being remapped,
+                        * a write will call the strategy routine again.
+                        */
+                       if (SCpnt->device->remap) {
+                               result = 0;
+                       }
+#endif
+               }
+               if ((SCpnt->sense_buffer[0] & 0x7f) == 0x70) {
+                       /*
+                        * If the device is in the process of becoming ready,
+                        * retry.
+                        */
+                       if (SCpnt->sense_buffer[12] == 0x04 &&
+                           SCpnt->sense_buffer[13] == 0x01) {
+                               scsi_queue_next_request(q, SCpnt);
+                               return;
+                       }
+                       if ((SCpnt->sense_buffer[2] & 0xf) == UNIT_ATTENTION) {
+                               if (SCpnt->device->removable) {
+                                       /* detected disc change.  set a bit 
+                                        * and quietly refuse further access.
+                                        */
+                                       SCpnt->device->changed = 1;
+                                       SCpnt = scsi_end_request(SCpnt, 0, this_count);
+                                       return;
+                               } else {
+                                       /*
+                                       * Must have been a power glitch, or a
+                                       * bus reset.  Could not have been a
+                                       * media change, so we just retry the
+                                       * request and see what happens.  
+                                       */
+                                       scsi_queue_next_request(q, SCpnt);
+                                       return;
+                               }
+                       }
+               }
+               /* If we had an ILLEGAL REQUEST returned, then we may have
+                * performed an unsupported command.  The only thing this should be
+                * would be a ten byte read where only a six byte read was supported.
+                * Also, on a system where READ CAPACITY failed, we have have read
+                * past the end of the disk.
+                */
+
+               switch (SCpnt->sense_buffer[2]) {
+               case ILLEGAL_REQUEST:
+                       if (SCpnt->device->ten) {
+                               SCpnt->device->ten = 0;
+                               /*
+                                * This will cause a retry with a 6-byte
+                                * command.
+                                */
+                               scsi_queue_next_request(q, SCpnt);
+                               result = 0;
+                       } else {
+                               SCpnt = scsi_end_request(SCpnt, 0, this_count);
+                               return;
+                       }
+                       break;
+               case NOT_READY:
+                       printk(KERN_INFO "Device %s not ready.\n",
+                              kdevname(SCpnt->request.rq_dev));
+                       SCpnt = scsi_end_request(SCpnt, 0, this_count);
+                       return;
+                       break;
+               case MEDIUM_ERROR:
+               case VOLUME_OVERFLOW:
+                       printk("scsi%d: ERROR on channel %d, id %d, lun %d, CDB: ",
+                              SCpnt->host->host_no, (int) SCpnt->channel,
+                              (int) SCpnt->target, (int) SCpnt->lun);
+                       print_command(SCpnt->cmnd);
+                       print_sense("sd", SCpnt);
+                       SCpnt = scsi_end_request(SCpnt, 0, block_sectors);
+                       return;
+               default:
+                       break;
+               }
+       }                       /* driver byte != 0 */
+       if (host_byte(result) == DID_RESET) {
+               /*
+                * Third party bus reset or reset for error
+                * recovery reasons.  Just retry the request
+                * and see what happens.  
+                */
+               scsi_queue_next_request(q, SCpnt);
+               return;
+       }
+       if (result) {
+               struct Scsi_Device_Template *STpnt;
+
+               STpnt = scsi_get_request_dev(&SCpnt->request);
+               printk("SCSI %s error : host %d channel %d id %d lun %d return code = %x\n",
+                      (STpnt ? STpnt->name : "device"),
+                      SCpnt->device->host->host_no,
+                      SCpnt->device->channel,
+                      SCpnt->device->id,
+                      SCpnt->device->lun, result);
+
+               if (driver_byte(result) & DRIVER_SENSE)
+                       print_sense("sd", SCpnt);
+               /*
+                * Mark a single buffer as not uptodate.  Queue the remainder.
+                * We sometimes get this cruft in the event that a medium error
+                * isn't properly reported.
+                */
+               SCpnt = scsi_end_request(SCpnt, 0, SCpnt->request.current_nr_sectors);
+               return;
+       }
+}
+
+/*
+ * Function:    scsi_get_request_dev()
+ *
+ * Purpose:     Find the upper-level driver that is responsible for this
+ *              request
+ *
+ * Arguments:   request   - I/O request we are preparing to queue.
+ *
+ * Lock status: No locks assumed to be held, but as it happens the
+ *              io_request_lock is held when this is called.
+ *
+ * Returns:     Nothing
+ *
+ * Notes:       The requests in the request queue may have originated
+ *              from any block device driver.  We need to find out which
+ *              one so that we can later form the appropriate command.
+ */
+struct Scsi_Device_Template *scsi_get_request_dev(struct request *req)
+{
+       struct Scsi_Device_Template *spnt;
+       kdev_t dev = req->rq_dev;
+       int major = MAJOR(dev);
+
+       ASSERT_LOCK(&io_request_lock, 1);
+
+       for (spnt = scsi_devicelist; spnt; spnt = spnt->next) {
+               /*
+                * Search for a block device driver that supports this
+                * major.
+                */
+               if (spnt->blk && spnt->major == major) {
+                       return spnt;
+               }
+               /*
+                * I am still not entirely satisfied with this solution,
+                * but it is good enough for now.  Disks have a number of
+                * major numbers associated with them, the primary
+                * 8, which we test above, and a secondary range of 7
+                * different consecutive major numbers.   If this ever
+                * becomes insufficient, then we could add another function
+                * to the structure, and generalize this completely.
+                */
+               if( spnt->min_major != 0 
+                   && spnt->max_major != 0
+                   && major >= spnt->min_major
+                   && major <= spnt->max_major )
+               {
+                       return spnt;
+               }
+       }
+       return NULL;
+}
+
+/*
+ * Function:    scsi_request_fn()
+ *
+ * Purpose:     Generic version of request function for SCSI hosts.
+ *
+ * Arguments:   q       - Pointer to actual queue.
+ *
+ * Returns:     Nothing
+ *
+ * Lock status: IO request lock assumed to be held when called.
+ *
+ * Notes:       The theory is that this function is something which individual
+ *              drivers could also supply if they wished to.   The problem
+ *              is that we have 30 some odd low-level drivers in the kernel
+ *              tree already, and it would be most difficult to retrofit
+ *              this crap into all of them.   Thus this function has the job
+ *              of acting as a generic queue manager for all of those existing
+ *              drivers.
+ */
+void scsi_request_fn(request_queue_t * q)
+{
+       struct request *req;
+       Scsi_Cmnd *SCpnt;
+       Scsi_Request *SRpnt;
+       Scsi_Device *SDpnt;
+       struct Scsi_Host *SHpnt;
+       struct Scsi_Device_Template *STpnt;
+
+       ASSERT_LOCK(&io_request_lock, 1);
+
+       SDpnt = (Scsi_Device *) q->queuedata;
+       if (!SDpnt) {
+               panic("Missing device");
+       }
+       SHpnt = SDpnt->host;
+
+       /*
+        * To start with, we keep looping until the queue is empty, or until
+        * the host is no longer able to accept any more requests.
+        */
+       while (1 == 1) {
+               /*
+                * Check this again - each time we loop through we will have
+                * released the lock and grabbed it again, so each time
+                * we need to check to see if the queue is plugged or not.
+                */
+               if (SHpnt->in_recovery || q->plugged)
+                       return;
+
+               /*
+                * If the device cannot accept another request, then quit.
+                */
+               if (SDpnt->device_blocked) {
+                       break;
+               }
+               if ((SHpnt->can_queue > 0 && (SHpnt->host_busy >= SHpnt->can_queue))
+                   || (SHpnt->host_blocked) 
+                   || (SHpnt->host_self_blocked)) {
+                       /*
+                        * If we are unable to process any commands at all for
+                        * this device, then we consider it to be starved.
+                        * What this means is that there are no outstanding
+                        * commands for this device and hence we need a
+                        * little help getting it started again
+                        * once the host isn't quite so busy.
+                        */
+                       if (SDpnt->device_busy == 0) {
+                               SDpnt->starved = 1;
+                               SHpnt->some_device_starved = 1;
+                       }
+                       break;
+               } else {
+                       SDpnt->starved = 0;
+               }
+
+               /*
+                * FIXME(eric)
+                * I am not sure where the best place to do this is.  We need
+                * to hook in a place where we are likely to come if in user
+                * space.   Technically the error handling thread should be
+                * doing this crap, but the error handler isn't used by
+                * most hosts.
+                */
+               if (SDpnt->was_reset) {
+                       /*
+                        * We need to relock the door, but we might
+                        * be in an interrupt handler.  Only do this
+                        * from user space, since we do not want to
+                        * sleep from an interrupt.
+                        *
+                        * FIXME(eric) - have the error handler thread do
+                        * this work.
+                        */
+                       SDpnt->was_reset = 0;
+                       if (SDpnt->removable && !in_interrupt()) {
+                               spin_unlock_irq(&io_request_lock);
+                               scsi_ioctl(SDpnt, SCSI_IOCTL_DOORLOCK, 0);
+                               spin_lock_irq(&io_request_lock);
+                               continue;
+                       }
+               }
+
+               /*
+                * If we couldn't find a request that could be queued, then we
+                * can also quit.
+                */
+               if (list_empty(&q->queue_head))
+                       break;
+
+               /*
+                * Loop through all of the requests in this queue, and find
+                * one that is queueable.
+                */
+               req = blkdev_entry_next_request(&q->queue_head);
+
+               /*
+                * Find the actual device driver associated with this command.
+                * The SPECIAL requests are things like character device or
+                * ioctls, which did not originate from ll_rw_blk.  Note that
+                * the special field is also used to indicate the SCpnt for
+                * the remainder of a partially fulfilled request that can 
+                * come up when there is a medium error.  We have to treat
+                * these two cases differently.  We differentiate by looking
+                * at request.cmd, as this tells us the real story.
+                */
+               if (req->cmd == SPECIAL) {
+                       STpnt = NULL;
+                       SCpnt = (Scsi_Cmnd *) req->special;
+                       SRpnt = (Scsi_Request *) req->special;
+
+                       if( SRpnt->sr_magic == SCSI_REQ_MAGIC ) {
+                               SCpnt = scsi_allocate_device(SRpnt->sr_device, 
+                                                            FALSE, FALSE);
+                               if( !SCpnt ) {
+                                       break;
+                               }
+                               scsi_init_cmd_from_req(SCpnt, SRpnt);
+                       }
+
+               } else {
+                       SRpnt = NULL;
+                       STpnt = scsi_get_request_dev(req);
+                       if (!STpnt) {
+                               panic("Unable to find device associated with request");
+                       }
+                       /*
+                        * Now try and find a command block that we can use.
+                        */
+                       if( req->special != NULL ) {
+                               SCpnt = (Scsi_Cmnd *) req->special;
+                               /*
+                                * We need to recount the number of
+                                * scatter-gather segments here - the
+                                * normal case code assumes this to be
+                                * correct, as it would be a performance
+                                * lose to always recount.  Handling
+                                * errors is always unusual, of course.
+                                */
+                               recount_segments(SCpnt);
+                       } else {
+                               SCpnt = scsi_allocate_device(SDpnt, FALSE, FALSE);
+                       }
+                       /*
+                        * If so, we are ready to do something.  Bump the count
+                        * while the queue is locked and then break out of the
+                        * loop. Otherwise loop around and try another request.
+                        */
+                       if (!SCpnt) {
+                               break;
+                       }
+               }
+
+               /*
+                * Now bump the usage count for both the host and the
+                * device.
+                */
+               SHpnt->host_busy++;
+               SDpnt->device_busy++;
+
+               /*
+                * Finally, before we release the lock, we copy the
+                * request to the command block, and remove the
+                * request from the request list.   Note that we always
+                * operate on the queue head - there is absolutely no
+                * reason to search the list, because all of the commands
+                * in this queue are for the same device.
+                */
+               blkdev_dequeue_request(req);
+
+               if (req != &SCpnt->request && req != &SRpnt->sr_request ) {
+                       memcpy(&SCpnt->request, req, sizeof(struct request));
+
+                       /*
+                        * We have copied the data out of the request block -
+                        * it is now in a field in SCpnt.  Release the request
+                        * block.
+                        */
+                       blkdev_release_request(req);
+               }
+               /*
+                * Now it is finally safe to release the lock.  We are
+                * not going to noodle the request list until this
+                * request has been queued and we loop back to queue
+                * another.  
+                */
+               req = NULL;
+               spin_unlock_irq(&io_request_lock);
+
+               if (SCpnt->request.cmd != SPECIAL) {
+                       /*
+                        * This will do a couple of things:
+                        *  1) Fill in the actual SCSI command.
+                        *  2) Fill in any other upper-level specific fields
+                        * (timeout).
+                        *
+                        * If this returns 0, it means that the request failed
+                        * (reading past end of disk, reading offline device,
+                        * etc).   This won't actually talk to the device, but
+                        * some kinds of consistency checking may cause the     
+                        * request to be rejected immediately.
+                        */
+                       if (STpnt == NULL) {
+                               STpnt = scsi_get_request_dev(req);
+                       }
+                       /* 
+                        * This sets up the scatter-gather table (allocating if
+                        * required).  Hosts that need bounce buffers will also
+                        * get those allocated here.  
+                        */
+                       if (!SDpnt->scsi_init_io_fn(SCpnt)) {
+                               SCpnt = __scsi_end_request(SCpnt, 0, 
+                                                          SCpnt->request.nr_sectors, 0, 0);
+                               if( SCpnt != NULL )
+                               {
+                                       panic("Should not have leftover blocks\n");
+                               }
+                               spin_lock_irq(&io_request_lock);
+                               SHpnt->host_busy--;
+                               SDpnt->device_busy--;
+                               continue;
+                       }
+                       /*
+                        * Initialize the actual SCSI command for this request.
+                        */
+                       if (!STpnt->init_command(SCpnt)) {
+                               scsi_release_buffers(SCpnt);
+                               SCpnt = __scsi_end_request(SCpnt, 0, 
+                                                          SCpnt->request.nr_sectors, 0, 0);
+                               if( SCpnt != NULL )
+                               {
+                                       panic("Should not have leftover blocks\n");
+                               }
+                               spin_lock_irq(&io_request_lock);
+                               SHpnt->host_busy--;
+                               SDpnt->device_busy--;
+                               continue;
+                       }
+               }
+               /*
+                * Finally, initialize any error handling parameters, and set up
+                * the timers for timeouts.
+                */
+               scsi_init_cmd_errh(SCpnt);
+
+               /*
+                * Dispatch the command to the low-level driver.
+                */
+               scsi_dispatch_cmd(SCpnt);
+
+               /*
+                * Now we need to grab the lock again.  We are about to mess
+                * with the request queue and try to find another command.
+                */
+               spin_lock_irq(&io_request_lock);
+       }
+}
+
+/*
+ * Function:    scsi_block_requests()
+ *
+ * Purpose:     Utility function used by low-level drivers to prevent further
+ *             commands from being queued to the device.
+ *
+ * Arguments:   SHpnt       - Host in question
+ *
+ * Returns:     Nothing
+ *
+ * Lock status: No locks are assumed held.
+ *
+ * Notes:       There is no timer nor any other means by which the requests
+ *             get unblocked other than the low-level driver calling
+ *             scsi_unblock_requests().
+ */
+void scsi_block_requests(struct Scsi_Host * SHpnt)
+{
+       SHpnt->host_self_blocked = TRUE;
+}
+
+/*
+ * Function:    scsi_unblock_requests()
+ *
+ * Purpose:     Utility function used by low-level drivers to allow further
+ *             commands from being queued to the device.
+ *
+ * Arguments:   SHpnt       - Host in question
+ *
+ * Returns:     Nothing
+ *
+ * Lock status: No locks are assumed held.
+ *
+ * Notes:       There is no timer nor any other means by which the requests
+ *             get unblocked other than the low-level driver calling
+ *             scsi_unblock_requests().
+ *
+ *             This is done as an API function so that changes to the
+ *             internals of the scsi mid-layer won't require wholesale
+ *             changes to drivers that use this feature.
+ */
+void scsi_unblock_requests(struct Scsi_Host * SHpnt)
+{
+       Scsi_Device *SDloop;
+
+       SHpnt->host_self_blocked = FALSE;
+       /* Now that we are unblocked, try to start the queues. */
+       for (SDloop = SHpnt->host_queue; SDloop; SDloop = SDloop->next)
+               scsi_queue_next_request(&SDloop->request_queue, NULL);
+}
+
+/*
+ * Function:    scsi_report_bus_reset()
+ *
+ * Purpose:     Utility function used by low-level drivers to report that
+ *             they have observed a bus reset on the bus being handled.
+ *
+ * Arguments:   SHpnt       - Host in question
+ *             channel     - channel on which reset was observed.
+ *
+ * Returns:     Nothing
+ *
+ * Lock status: No locks are assumed held.
+ *
+ * Notes:       This only needs to be called if the reset is one which
+ *             originates from an unknown location.  Resets originated
+ *             by the mid-level itself don't need to call this, but there
+ *             should be no harm.
+ *
+ *             The main purpose of this is to make sure that a CHECK_CONDITION
+ *             is properly treated.
+ */
+void scsi_report_bus_reset(struct Scsi_Host * SHpnt, int channel)
+{
+       Scsi_Device *SDloop;
+       for (SDloop = SHpnt->host_queue; SDloop; SDloop = SDloop->next) {
+               if (channel == SDloop->channel) {
+                       SDloop->was_reset = 1;
+                       SDloop->expecting_cc_ua = 1;
+               }
+       }
+}
+
+/*
+ * FIXME(eric) - these are empty stubs for the moment.  I need to re-implement
+ * host blocking from scratch. The theory is that hosts that wish to block
+ * will register/deregister using these functions instead of the old way
+ * of setting the wish_block flag.
+ *
+ * The details of the implementation remain to be settled, however the
+ * stubs are here now so that the actual drivers will properly compile.
+ */
+void scsi_register_blocked_host(struct Scsi_Host * SHpnt)
+{
+}
+
+void scsi_deregister_blocked_host(struct Scsi_Host * SHpnt)
+{
+}
diff --git a/xen-2.4.16/drivers/scsi/scsi_merge.c b/xen-2.4.16/drivers/scsi/scsi_merge.c
new file mode 100644 (file)
index 0000000..985e74a
--- /dev/null
@@ -0,0 +1,1181 @@
+/*
+ *  scsi_merge.c Copyright (C) 1999 Eric Youngdale
+ *
+ *  SCSI queueing library.
+ *      Initial versions: Eric Youngdale (eric@andante.org).
+ *                        Based upon conversations with large numbers
+ *                        of people at Linux Expo.
+ *     Support for dynamic DMA mapping: Jakub Jelinek (jakub@redhat.com).
+ */
+
+/*
+ * This file contains queue management functions that are used by SCSI.
+ * Typically this is used for several purposes.   First, we need to ensure
+ * that commands do not grow so large that they cannot be handled all at
+ * once by a host adapter.   The various flavors of merge functions included
+ * here serve this purpose.
+ *
+ * Note that it would be quite trivial to allow the low-level driver the
+ * flexibility to define it's own queue handling functions.  For the time
+ * being, the hooks are not present.   Right now we are just using the
+ * data in the host template as an indicator of how we should be handling
+ * queues, and we select routines that are optimized for that purpose.
+ *
+ * Some hosts do not impose any restrictions on the size of a request.
+ * In such cases none of the merge functions in this file are called,
+ * and we allow ll_rw_blk to merge requests in the default manner.
+ * This isn't guaranteed to be optimal, but it should be pretty darned
+ * good.   If someone comes up with ideas of better ways of managing queues
+ * to improve on the default behavior, then certainly fit it into this
+ * scheme in whatever manner makes the most sense.   Please note that
+ * since each device has it's own queue, we have considerable flexibility
+ * in queue management.
+ */
+
+#define __NO_VERSION__
+#include <linux/config.h>
+#include <linux/module.h>
+
+#include <linux/sched.h>
+#include <linux/timer.h>
+#include <linux/string.h>
+#include <linux/slab.h>
+#include <linux/ioport.h>
+#include <linux/kernel.h>
+#include <linux/stat.h>
+#include <linux/blk.h>
+#include <linux/interrupt.h>
+#include <linux/delay.h>
+#include <linux/smp_lock.h>
+
+
+#define __KERNEL_SYSCALLS__
+
+#include <linux/unistd.h>
+
+#include <asm/system.h>
+#include <asm/irq.h>
+#include <asm/dma.h>
+#include <asm/io.h>
+
+#include "scsi.h"
+#include "hosts.h"
+#include "constants.h"
+#include <scsi/scsi_ioctl.h>
+
+/*
+ * This means that bounce buffers cannot be allocated in chunks > PAGE_SIZE.
+ * Ultimately we should get away from using a dedicated DMA bounce buffer
+ * pool, and we should instead try and use kmalloc() instead.  If we can
+ * eliminate this pool, then this restriction would no longer be needed.
+ */
+#define DMA_SEGMENT_SIZE_LIMITED
+
+#ifdef CONFIG_SCSI_DEBUG_QUEUES
+/*
+ * Enable a bunch of additional consistency checking.   Turn this off
+ * if you are benchmarking.
+ */
+static int dump_stats(struct request *req,
+                     int use_clustering,
+                     int dma_host,
+                     int segments)
+{
+       struct buffer_head *bh;
+
+       /*
+        * Dump the information that we have.  We know we have an
+        * inconsistency.
+        */
+       printk("nr_segments is %x\n", req->nr_segments);
+       printk("counted segments is %x\n", segments);
+       printk("Flags %d %d\n", use_clustering, dma_host);
+       for (bh = req->bh; bh->b_reqnext != NULL; bh = bh->b_reqnext) 
+       {
+               printk("Segment 0x%p, blocks %d, addr 0x%lx\n",
+                      bh,
+                      bh->b_size >> 9,
+                      virt_to_phys(bh->b_data - 1));
+       }
+       panic("Ththththaats all folks.  Too dangerous to continue.\n");
+}
+
+
+/*
+ * Simple sanity check that we will use for the first go around
+ * in order to ensure that we are doing the counting correctly.
+ * This can be removed for optimization.
+ */
+#define SANITY_CHECK(req, _CLUSTER, _DMA)                              \
+    if( req->nr_segments != __count_segments(req, _CLUSTER, _DMA, NULL) )      \
+    {                                                                  \
+       printk("Incorrect segment count at 0x%p", current_text_addr()); \
+       dump_stats(req, _CLUSTER, _DMA, __count_segments(req, _CLUSTER, _DMA, NULL)); \
+    }
+#else
+#define SANITY_CHECK(req, _CLUSTER, _DMA)
+#endif
+
+static void dma_exhausted(Scsi_Cmnd * SCpnt, int i)
+{
+       int jj;
+       struct scatterlist *sgpnt;
+       void **bbpnt;
+       int consumed = 0;
+
+       sgpnt = (struct scatterlist *) SCpnt->request_buffer;
+       bbpnt = SCpnt->bounce_buffers;
+
+       /*
+        * Now print out a bunch of stats.  First, start with the request
+        * size.
+        */
+       printk("dma_free_sectors:%d\n", scsi_dma_free_sectors);
+       printk("use_sg:%d\ti:%d\n", SCpnt->use_sg, i);
+       printk("request_bufflen:%d\n", SCpnt->request_bufflen);
+       /*
+        * Now dump the scatter-gather table, up to the point of failure.
+        */
+       for(jj=0; jj < SCpnt->use_sg; jj++)
+       {
+               printk("[%d]\tlen:%d\taddr:%p\tbounce:%p\n",
+                      jj,
+                      sgpnt[jj].length,
+                      sgpnt[jj].address,
+                      (bbpnt ? bbpnt[jj] : NULL));
+               if (bbpnt && bbpnt[jj])
+                       consumed += sgpnt[jj].length;
+       }
+       printk("Total %d sectors consumed\n", consumed);
+       panic("DMA pool exhausted");
+}
+
+#define CLUSTERABLE_DEVICE(SH,SD) (SH->use_clustering)
+
+/*
+ * This entire source file deals with the new queueing code.
+ */
+
+/*
+ * Function:    __count_segments()
+ *
+ * Purpose:     Prototype for queue merge function.
+ *
+ * Arguments:   q       - Queue for which we are merging request.
+ *              req     - request into which we wish to merge.
+ *              use_clustering - 1 if this host wishes to use clustering
+ *              dma_host - 1 if this host has ISA DMA issues (bus doesn't
+ *                      expose all of the address lines, so that DMA cannot
+ *                      be done from an arbitrary address).
+ *             remainder - used to track the residual size of the last
+ *                     segment.  Comes in handy when we want to limit the 
+ *                     size of bounce buffer segments to PAGE_SIZE.
+ *
+ * Returns:     Count of the number of SG segments for the request.
+ *
+ * Lock status: 
+ *
+ * Notes:       This is only used for diagnostic purposes.
+ */
+__inline static int __count_segments(struct request *req,
+                                    int use_clustering,
+                                    int dma_host,
+                                    int * remainder)
+{
+       int ret = 1;
+       int reqsize = 0;
+       struct buffer_head *bh;
+       struct buffer_head *bhnext;
+
+       if( remainder != NULL ) {
+               reqsize = *remainder;
+       }
+
+       /*
+        * Add in the size increment for the first buffer.
+        */
+       bh = req->bh;
+#ifdef DMA_SEGMENT_SIZE_LIMITED
+       if( reqsize + bh->b_size > PAGE_SIZE ) {
+               ret++;
+               reqsize = bh->b_size;
+       } else {
+               reqsize += bh->b_size;
+       }
+#else
+       reqsize += bh->b_size;
+#endif
+
+       for (bh = req->bh, bhnext = bh->b_reqnext; 
+            bhnext != NULL; 
+            bh = bhnext, bhnext = bh->b_reqnext) {
+               if (use_clustering) {
+                       /* 
+                        * See if we can do this without creating another
+                        * scatter-gather segment.  In the event that this is a
+                        * DMA capable host, make sure that a segment doesn't span
+                        * the DMA threshold boundary.  
+                        */
+                       if (dma_host &&
+                           virt_to_phys(bhnext->b_data) - 1 == ISA_DMA_THRESHOLD) {
+                               ret++;
+                               reqsize = bhnext->b_size;
+                       } else if (CONTIGUOUS_BUFFERS(bh, bhnext)) {
+                               /*
+                                * This one is OK.  Let it go.
+                                */ 
+#ifdef DMA_SEGMENT_SIZE_LIMITED
+                               /* Note scsi_malloc is only able to hand out
+                                * chunks of memory in sizes of PAGE_SIZE or
+                                * less.  Thus we need to keep track of
+                                * the size of the piece that we have
+                                * seen so far, and if we have hit
+                                * the limit of PAGE_SIZE, then we are
+                                * kind of screwed and we need to start
+                                * another segment.
+                                */
+                               if( dma_host
+                                   && virt_to_phys(bh->b_data) - 1 >= ISA_DMA_THRESHOLD
+                                   && reqsize + bhnext->b_size > PAGE_SIZE )
+                               {
+                                       ret++;
+                                       reqsize = bhnext->b_size;
+                                       continue;
+                               }
+#endif
+                               reqsize += bhnext->b_size;
+                               continue;
+                       }
+                       ret++;
+                       reqsize = bhnext->b_size;
+               } else {
+                       ret++;
+                       reqsize = bhnext->b_size;
+               }
+       }
+       if( remainder != NULL ) {
+               *remainder = reqsize;
+       }
+       return ret;
+}
+
+/*
+ * Function:    recount_segments()
+ *
+ * Purpose:     Recount the number of scatter-gather segments for this request.
+ *
+ * Arguments:   req     - request that needs recounting.
+ *
+ * Returns:     Count of the number of SG segments for the request.
+ *
+ * Lock status: Irrelevant.
+ *
+ * Notes:      This is only used when we have partially completed requests
+ *             and the bit that is leftover is of an indeterminate size.
+ *             This can come up if you get a MEDIUM_ERROR, for example,
+ *             as we will have "completed" all of the sectors up to and
+ *             including the bad sector, and the leftover bit is what
+ *             we have to do now.  This tends to be a rare occurrence, so
+ *             we aren't busting our butts to instantiate separate versions
+ *             of this function for the 4 different flag values.  We
+ *             probably should, however.
+ */
+void
+recount_segments(Scsi_Cmnd * SCpnt)
+{
+       struct request *req;
+       struct Scsi_Host *SHpnt;
+       Scsi_Device * SDpnt;
+
+       req   = &SCpnt->request;
+       SHpnt = SCpnt->host;
+       SDpnt = SCpnt->device;
+
+       req->nr_segments = __count_segments(req, 
+                                           CLUSTERABLE_DEVICE(SHpnt, SDpnt),
+                                           SHpnt->unchecked_isa_dma, NULL);
+}
+
+#define MERGEABLE_BUFFERS(X,Y) \
+(((((long)(X)->b_data+(X)->b_size)|((long)(Y)->b_data)) & \
+  (DMA_CHUNK_SIZE - 1)) == 0)
+
+#ifdef DMA_CHUNK_SIZE
+static inline int scsi_new_mergeable(request_queue_t * q,
+                                    struct request * req,
+                                    struct Scsi_Host *SHpnt,
+                                    int max_segments)
+{
+       /*
+        * pci_map_sg will be able to merge these two
+        * into a single hardware sg entry, check if
+        * we'll have enough memory for the sg list.
+        * scsi.c allocates for this purpose
+        * min(64,sg_tablesize) entries.
+        */
+       if (req->nr_segments >= max_segments ||
+           req->nr_segments >= SHpnt->sg_tablesize)
+               return 0;
+       req->nr_segments++;
+       return 1;
+}
+
+static inline int scsi_new_segment(request_queue_t * q,
+                                  struct request * req,
+                                  struct Scsi_Host *SHpnt,
+                                  int max_segments)
+{
+       /*
+        * pci_map_sg won't be able to map these two
+        * into a single hardware sg entry, so we have to
+        * check if things fit into sg_tablesize.
+        */
+       if (req->nr_hw_segments >= SHpnt->sg_tablesize ||
+            req->nr_segments >= SHpnt->sg_tablesize)
+               return 0;
+       req->nr_hw_segments++;
+       req->nr_segments++;
+       return 1;
+}
+#else
+static inline int scsi_new_segment(request_queue_t * q,
+                                  struct request * req,
+                                  struct Scsi_Host *SHpnt,
+                                  int max_segments)
+{
+       if (req->nr_segments < SHpnt->sg_tablesize &&
+           req->nr_segments < max_segments) {
+               /*
+                * This will form the start of a new segment.  Bump the 
+                * counter.
+                */
+               req->nr_segments++;
+               return 1;
+       } else {
+               return 0;
+       }
+}
+#endif
+
+/*
+ * Function:    __scsi_merge_fn()
+ *
+ * Purpose:     Prototype for queue merge function.
+ *
+ * Arguments:   q       - Queue for which we are merging request.
+ *              req     - request into which we wish to merge.
+ *              bh      - Block which we may wish to merge into request
+ *              use_clustering - 1 if this host wishes to use clustering
+ *              dma_host - 1 if this host has ISA DMA issues (bus doesn't
+ *                      expose all of the address lines, so that DMA cannot
+ *                      be done from an arbitrary address).
+ *
+ * Returns:     1 if it is OK to merge the block into the request.  0
+ *              if it is not OK.
+ *
+ * Lock status: io_request_lock is assumed to be held here.
+ *
+ * Notes:       Some drivers have limited scatter-gather table sizes, and
+ *              thus they cannot queue an infinitely large command.  This
+ *              function is called from ll_rw_blk before it attempts to merge
+ *              a new block into a request to make sure that the request will
+ *              not become too large.
+ *
+ *              This function is not designed to be directly called.  Instead
+ *              it should be referenced from other functions where the
+ *              use_clustering and dma_host parameters should be integer
+ *              constants.  The compiler should thus be able to properly
+ *              optimize the code, eliminating stuff that is irrelevant.
+ *              It is more maintainable to do this way with a single function
+ *              than to have 4 separate functions all doing roughly the
+ *              same thing.
+ */
+__inline static int __scsi_back_merge_fn(request_queue_t * q,
+                                        struct request *req,
+                                        struct buffer_head *bh,
+                                        int max_segments,
+                                        int use_clustering,
+                                        int dma_host)
+{
+       unsigned int count;
+       unsigned int segment_size = 0;
+       Scsi_Device *SDpnt;
+       struct Scsi_Host *SHpnt;
+
+       SDpnt = (Scsi_Device *) q->queuedata;
+       SHpnt = SDpnt->host;
+
+#ifdef DMA_CHUNK_SIZE
+       if (max_segments > 64)
+               max_segments = 64;
+#endif
+
+       if ((req->nr_sectors + (bh->b_size >> 9)) > SHpnt->max_sectors)
+               return 0;
+
+       if (use_clustering) {
+               /* 
+                * See if we can do this without creating another
+                * scatter-gather segment.  In the event that this is a
+                * DMA capable host, make sure that a segment doesn't span
+                * the DMA threshold boundary.  
+                */
+               if (dma_host &&
+                   virt_to_phys(req->bhtail->b_data) - 1 == ISA_DMA_THRESHOLD) {
+                       goto new_end_segment;
+               }
+               if (CONTIGUOUS_BUFFERS(req->bhtail, bh)) {
+#ifdef DMA_SEGMENT_SIZE_LIMITED
+                       if( dma_host
+                           && virt_to_phys(bh->b_data) - 1 >= ISA_DMA_THRESHOLD ) {
+                               segment_size = 0;
+                               count = __count_segments(req, use_clustering, dma_host, &segment_size);
+                               if( segment_size + bh->b_size > PAGE_SIZE ) {
+                                       goto new_end_segment;
+                               }
+                       }
+#endif
+                       /*
+                        * This one is OK.  Let it go.
+                        */
+                       return 1;
+               }
+       }
+ new_end_segment:
+#ifdef DMA_CHUNK_SIZE
+       if (MERGEABLE_BUFFERS(req->bhtail, bh))
+               return scsi_new_mergeable(q, req, SHpnt, max_segments);
+#endif
+       return scsi_new_segment(q, req, SHpnt, max_segments);
+}
+
+__inline static int __scsi_front_merge_fn(request_queue_t * q,
+                                         struct request *req,
+                                         struct buffer_head *bh,
+                                         int max_segments,
+                                         int use_clustering,
+                                         int dma_host)
+{
+       unsigned int count;
+       unsigned int segment_size = 0;
+       Scsi_Device *SDpnt;
+       struct Scsi_Host *SHpnt;
+
+       SDpnt = (Scsi_Device *) q->queuedata;
+       SHpnt = SDpnt->host;
+
+#ifdef DMA_CHUNK_SIZE
+       if (max_segments > 64)
+               max_segments = 64;
+#endif
+
+       if ((req->nr_sectors + (bh->b_size >> 9)) > SHpnt->max_sectors)
+               return 0;
+
+       if (use_clustering) {
+               /* 
+                * See if we can do this without creating another
+                * scatter-gather segment.  In the event that this is a
+                * DMA capable host, make sure that a segment doesn't span
+                * the DMA threshold boundary. 
+                */
+               if (dma_host &&
+                   virt_to_phys(bh->b_data) - 1 == ISA_DMA_THRESHOLD) {
+                       goto new_start_segment;
+               }
+               if (CONTIGUOUS_BUFFERS(bh, req->bh)) {
+#ifdef DMA_SEGMENT_SIZE_LIMITED
+                       if( dma_host
+                           && virt_to_phys(bh->b_data) - 1 >= ISA_DMA_THRESHOLD ) {
+                               segment_size = bh->b_size;
+                               count = __count_segments(req, use_clustering, dma_host, &segment_size);
+                               if( count != req->nr_segments ) {
+                                       goto new_start_segment;
+                               }
+                       }
+#endif
+                       /*
+                        * This one is OK.  Let it go.
+                        */
+                       return 1;
+               }
+       }
+ new_start_segment:
+#ifdef DMA_CHUNK_SIZE
+       if (MERGEABLE_BUFFERS(bh, req->bh))
+               return scsi_new_mergeable(q, req, SHpnt, max_segments);
+#endif
+       return scsi_new_segment(q, req, SHpnt, max_segments);
+}
+
+/*
+ * Function:    scsi_merge_fn_()
+ *
+ * Purpose:     queue merge function.
+ *
+ * Arguments:   q       - Queue for which we are merging request.
+ *              req     - request into which we wish to merge.
+ *              bh      - Block which we may wish to merge into request
+ *
+ * Returns:     1 if it is OK to merge the block into the request.  0
+ *              if it is not OK.
+ *
+ * Lock status: io_request_lock is assumed to be held here.
+ *
+ * Notes:       Optimized for different cases depending upon whether
+ *              ISA DMA is in use and whether clustering should be used.
+ */
+#define MERGEFCT(_FUNCTION, _BACK_FRONT, _CLUSTER, _DMA)               \
+static int _FUNCTION(request_queue_t * q,                              \
+                    struct request * req,                              \
+                    struct buffer_head * bh,                           \
+                    int max_segments)                                  \
+{                                                                      \
+    int ret;                                                           \
+    SANITY_CHECK(req, _CLUSTER, _DMA);                                 \
+    ret =  __scsi_ ## _BACK_FRONT ## _merge_fn(q,                      \
+                                              req,                     \
+                                              bh,                      \
+                                              max_segments,            \
+                                              _CLUSTER,                \
+                                              _DMA);                   \
+    return ret;                                                                \
+}
+
+/* Version with use_clustering 0 and dma_host 1 is not necessary,
+ * since the only use of dma_host above is protected by use_clustering.
+ */
+MERGEFCT(scsi_back_merge_fn_, back, 0, 0)
+MERGEFCT(scsi_back_merge_fn_c, back, 1, 0)
+MERGEFCT(scsi_back_merge_fn_dc, back, 1, 1)
+
+MERGEFCT(scsi_front_merge_fn_, front, 0, 0)
+MERGEFCT(scsi_front_merge_fn_c, front, 1, 0)
+MERGEFCT(scsi_front_merge_fn_dc, front, 1, 1)
+
+/*
+ * Function:    __scsi_merge_requests_fn()
+ *
+ * Purpose:     Prototype for queue merge function.
+ *
+ * Arguments:   q       - Queue for which we are merging request.
+ *              req     - request into which we wish to merge.
+ *              next    - 2nd request that we might want to combine with req
+ *              use_clustering - 1 if this host wishes to use clustering
+ *              dma_host - 1 if this host has ISA DMA issues (bus doesn't
+ *                      expose all of the address lines, so that DMA cannot
+ *                      be done from an arbitrary address).
+ *
+ * Returns:     1 if it is OK to merge the two requests.  0
+ *              if it is not OK.
+ *
+ * Lock status: io_request_lock is assumed to be held here.
+ *
+ * Notes:       Some drivers have limited scatter-gather table sizes, and
+ *              thus they cannot queue an infinitely large command.  This
+ *              function is called from ll_rw_blk before it attempts to merge
+ *              a new block into a request to make sure that the request will
+ *              not become too large.
+ *
+ *              This function is not designed to be directly called.  Instead
+ *              it should be referenced from other functions where the
+ *              use_clustering and dma_host parameters should be integer
+ *              constants.  The compiler should thus be able to properly
+ *              optimize the code, eliminating stuff that is irrelevant.
+ *              It is more maintainable to do this way with a single function
+ *              than to have 4 separate functions all doing roughly the
+ *              same thing.
+ */
+__inline static int __scsi_merge_requests_fn(request_queue_t * q,
+                                            struct request *req,
+                                            struct request *next,
+                                            int max_segments,
+                                            int use_clustering,
+                                            int dma_host)
+{
+       Scsi_Device *SDpnt;
+       struct Scsi_Host *SHpnt;
+
+       /*
+        * First check if the either of the requests are re-queued
+        * requests.  Can't merge them if they are.
+        */
+       if (req->special || next->special)
+               return 0;
+
+       SDpnt = (Scsi_Device *) q->queuedata;
+       SHpnt = SDpnt->host;
+
+#ifdef DMA_CHUNK_SIZE
+       if (max_segments > 64)
+               max_segments = 64;
+
+       /* If it would not fit into prepared memory space for sg chain,
+        * then don't allow the merge.
+        */
+       if (req->nr_segments + next->nr_segments - 1 > max_segments ||
+           req->nr_segments + next->nr_segments - 1 > SHpnt->sg_tablesize) {
+               return 0;
+       }
+       if (req->nr_hw_segments + next->nr_hw_segments - 1 > SHpnt->sg_tablesize) {
+               return 0;
+       }
+#else
+       /*
+        * If the two requests together are too large (even assuming that we
+        * can merge the boundary requests into one segment, then don't
+        * allow the merge.
+        */
+       if (req->nr_segments + next->nr_segments - 1 > SHpnt->sg_tablesize) {
+               return 0;
+       }
+#endif
+
+       if ((req->nr_sectors + next->nr_sectors) > SHpnt->max_sectors)
+               return 0;
+
+       /*
+        * The main question is whether the two segments at the boundaries
+        * would be considered one or two.
+        */
+       if (use_clustering) {
+               /* 
+                * See if we can do this without creating another
+                * scatter-gather segment.  In the event that this is a
+                * DMA capable host, make sure that a segment doesn't span
+                * the DMA threshold boundary.  
+                */
+               if (dma_host &&
+                   virt_to_phys(req->bhtail->b_data) - 1 == ISA_DMA_THRESHOLD) {
+                       goto dont_combine;
+               }
+#ifdef DMA_SEGMENT_SIZE_LIMITED
+               /*
+                * We currently can only allocate scatter-gather bounce
+                * buffers in chunks of PAGE_SIZE or less.
+                */
+               if (dma_host
+                   && CONTIGUOUS_BUFFERS(req->bhtail, next->bh)
+                   && virt_to_phys(req->bhtail->b_data) - 1 >= ISA_DMA_THRESHOLD )
+               {
+                       int segment_size = 0;
+                       int count = 0;
+
+                       count = __count_segments(req, use_clustering, dma_host, &segment_size);
+                       count += __count_segments(next, use_clustering, dma_host, &segment_size);
+                       if( count != req->nr_segments + next->nr_segments ) {
+                               goto dont_combine;
+                       }
+               }
+#endif
+               if (CONTIGUOUS_BUFFERS(req->bhtail, next->bh)) {
+                       /*
+                        * This one is OK.  Let it go.
+                        */
+                       req->nr_segments += next->nr_segments - 1;
+#ifdef DMA_CHUNK_SIZE
+                       req->nr_hw_segments += next->nr_hw_segments - 1;
+#endif
+                       return 1;
+               }
+       }
+      dont_combine:
+#ifdef DMA_CHUNK_SIZE
+       if (req->nr_segments + next->nr_segments > max_segments ||
+           req->nr_segments + next->nr_segments > SHpnt->sg_tablesize) {
+               return 0;
+       }
+       /* If dynamic DMA mapping can merge last segment in req with
+        * first segment in next, then the check for hw segments was
+        * done above already, so we can always merge.
+        */
+       if (MERGEABLE_BUFFERS (req->bhtail, next->bh)) {
+               req->nr_hw_segments += next->nr_hw_segments - 1;
+       } else if (req->nr_hw_segments + next->nr_hw_segments > SHpnt->sg_tablesize) {
+               return 0;
+       } else {
+               req->nr_hw_segments += next->nr_hw_segments;
+       }
+       req->nr_segments += next->nr_segments;
+       return 1;
+#else
+       /*
+        * We know that the two requests at the boundary should not be combined.
+        * Make sure we can fix something that is the sum of the two.
+        * A slightly stricter test than we had above.
+        */
+       if (req->nr_segments + next->nr_segments > max_segments ||
+           req->nr_segments + next->nr_segments > SHpnt->sg_tablesize) {
+               return 0;
+       } else {
+               /*
+                * This will form the start of a new segment.  Bump the 
+                * counter.
+                */
+               req->nr_segments += next->nr_segments;
+               return 1;
+       }
+#endif
+}
+
+/*
+ * Function:    scsi_merge_requests_fn_()
+ *
+ * Purpose:     queue merge function.
+ *
+ * Arguments:   q       - Queue for which we are merging request.
+ *              req     - request into which we wish to merge.
+ *              bh      - Block which we may wish to merge into request
+ *
+ * Returns:     1 if it is OK to merge the block into the request.  0
+ *              if it is not OK.
+ *
+ * Lock status: io_request_lock is assumed to be held here.
+ *
+ * Notes:       Optimized for different cases depending upon whether
+ *              ISA DMA is in use and whether clustering should be used.
+ */
+#define MERGEREQFCT(_FUNCTION, _CLUSTER, _DMA)         \
+static int _FUNCTION(request_queue_t * q,              \
+                    struct request * req,              \
+                    struct request * next,             \
+                    int max_segments)                  \
+{                                                      \
+    int ret;                                           \
+    SANITY_CHECK(req, _CLUSTER, _DMA);                 \
+    ret =  __scsi_merge_requests_fn(q, req, next, max_segments, _CLUSTER, _DMA); \
+    return ret;                                                \
+}
+
+/* Version with use_clustering 0 and dma_host 1 is not necessary,
+ * since the only use of dma_host above is protected by use_clustering.
+ */
+MERGEREQFCT(scsi_merge_requests_fn_, 0, 0)
+MERGEREQFCT(scsi_merge_requests_fn_c, 1, 0)
+MERGEREQFCT(scsi_merge_requests_fn_dc, 1, 1)
+/*
+ * Function:    __init_io()
+ *
+ * Purpose:     Prototype for io initialize function.
+ *
+ * Arguments:   SCpnt   - Command descriptor we wish to initialize
+ *              sg_count_valid  - 1 if the sg count in the req is valid.
+ *              use_clustering - 1 if this host wishes to use clustering
+ *              dma_host - 1 if this host has ISA DMA issues (bus doesn't
+ *                      expose all of the address lines, so that DMA cannot
+ *                      be done from an arbitrary address).
+ *
+ * Returns:     1 on success.
+ *
+ * Lock status: 
+ *
+ * Notes:       Only the SCpnt argument should be a non-constant variable.
+ *              This function is designed in such a way that it will be
+ *              invoked from a series of small stubs, each of which would
+ *              be optimized for specific circumstances.
+ *
+ *              The advantage of this is that hosts that don't do DMA
+ *              get versions of the function that essentially don't have
+ *              any of the DMA code.  Same goes for clustering - in the
+ *              case of hosts with no need for clustering, there is no point
+ *              in a whole bunch of overhead.
+ *
+ *              Finally, in the event that a host has set can_queue to SG_ALL
+ *              implying that there is no limit to the length of a scatter
+ *              gather list, the sg count in the request won't be valid
+ *              (mainly because we don't need queue management functions
+ *              which keep the tally uptodate.
+ */
+__inline static int __init_io(Scsi_Cmnd * SCpnt,
+                             int sg_count_valid,
+                             int use_clustering,
+                             int dma_host)
+{
+       struct buffer_head * bh;
+       struct buffer_head * bhprev;
+       char               * buff;
+       int                  count;
+       int                  i;
+       struct request     * req;
+       int                  sectors;
+       struct scatterlist * sgpnt;
+       int                  this_count;
+       void               ** bbpnt;
+
+       /*
+        * FIXME(eric) - don't inline this - it doesn't depend on the
+        * integer flags.   Come to think of it, I don't think this is even
+        * needed any more.  Need to play with it and see if we hit the
+        * panic.  If not, then don't bother.
+        */
+       if (!SCpnt->request.bh) {
+               /* 
+                * Case of page request (i.e. raw device), or unlinked buffer 
+                * Typically used for swapping, but this isn't how we do
+                * swapping any more.
+                */
+               panic("I believe this is dead code.  If we hit this, I was wrong");
+#if 0
+               SCpnt->request_bufflen = SCpnt->request.nr_sectors << 9;
+               SCpnt->request_buffer = SCpnt->request.buffer;
+               SCpnt->use_sg = 0;
+               /*
+                * FIXME(eric) - need to handle DMA here.
+                */
+#endif
+               return 1;
+       }
+       req = &SCpnt->request;
+       /*
+        * First we need to know how many scatter gather segments are needed.
+        */
+       if (!sg_count_valid) {
+               count = __count_segments(req, use_clustering, dma_host, NULL);
+       } else {
+               count = req->nr_segments;
+       }
+
+       /*
+        * If the dma pool is nearly empty, then queue a minimal request
+        * with a single segment.  Typically this will satisfy a single
+        * buffer.
+        */
+       if (dma_host && scsi_dma_free_sectors <= 10) {
+               this_count = SCpnt->request.current_nr_sectors;
+               goto single_segment;
+       }
+       /*
+        * Don't bother with scatter-gather if there is only one segment.
+        */
+       if (count == 1) {
+               this_count = SCpnt->request.nr_sectors;
+               goto single_segment;
+       }
+       SCpnt->use_sg = count;
+
+       /* 
+        * Allocate the actual scatter-gather table itself.
+        */
+       SCpnt->sglist_len = (SCpnt->use_sg * sizeof(struct scatterlist));
+
+       /* If we could potentially require ISA bounce buffers, allocate
+        * space for this array here.
+        */
+       if (dma_host)
+               SCpnt->sglist_len += (SCpnt->use_sg * sizeof(void *));
+
+       /* scsi_malloc can only allocate in chunks of 512 bytes so
+        * round it up.
+        */
+       SCpnt->sglist_len = (SCpnt->sglist_len + 511) & ~511;
+
+       sgpnt = (struct scatterlist *) scsi_malloc(SCpnt->sglist_len);
+
+       /*
+        * Now fill the scatter-gather table.
+        */
+       if (!sgpnt) {
+               /*
+                * If we cannot allocate the scatter-gather table, then
+                * simply write the first buffer all by itself.
+                */
+               printk("Warning - running *really* short on DMA buffers\n");
+               this_count = SCpnt->request.current_nr_sectors;
+               goto single_segment;
+       }
+       /* 
+        * Next, walk the list, and fill in the addresses and sizes of
+        * each segment.
+        */
+       memset(sgpnt, 0, SCpnt->sglist_len);
+       SCpnt->request_buffer = (char *) sgpnt;
+       SCpnt->request_bufflen = 0;
+       bhprev = NULL;
+
+       if (dma_host)
+               bbpnt = (void **) ((char *)sgpnt +
+                        (SCpnt->use_sg * sizeof(struct scatterlist)));
+       else
+               bbpnt = NULL;
+
+       SCpnt->bounce_buffers = bbpnt;
+
+       for (count = 0, bh = SCpnt->request.bh;
+            bh; bh = bh->b_reqnext) {
+               if (use_clustering && bhprev != NULL) {
+                       if (dma_host &&
+                           virt_to_phys(bhprev->b_data) - 1 == ISA_DMA_THRESHOLD) {
+                               /* Nothing - fall through */
+                       } else if (CONTIGUOUS_BUFFERS(bhprev, bh)) {
+                               /*
+                                * This one is OK.  Let it go.  Note that we
+                                * do not have the ability to allocate
+                                * bounce buffer segments > PAGE_SIZE, so
+                                * for now we limit the thing.
+                                */
+                               if( dma_host ) {
+#ifdef DMA_SEGMENT_SIZE_LIMITED
+                                       if( virt_to_phys(bh->b_data) - 1 < ISA_DMA_THRESHOLD
+                                           || sgpnt[count - 1].length + bh->b_size <= PAGE_SIZE ) {
+                                               sgpnt[count - 1].length += bh->b_size;
+                                               bhprev = bh;
+                                               continue;
+                                       }
+#else
+                                       sgpnt[count - 1].length += bh->b_size;
+                                       bhprev = bh;
+                                       continue;
+#endif
+                               } else {
+                                       sgpnt[count - 1].length += bh->b_size;
+                                       SCpnt->request_bufflen += bh->b_size;
+                                       bhprev = bh;
+                                       continue;
+                               }
+                       }
+               }
+               count++;
+               sgpnt[count - 1].address = bh->b_data;
+               sgpnt[count - 1].page = NULL;
+               sgpnt[count - 1].length += bh->b_size;
+               if (!dma_host) {
+                       SCpnt->request_bufflen += bh->b_size;
+               }
+               bhprev = bh;
+       }
+
+       /*
+        * Verify that the count is correct.
+        */
+       if (count != SCpnt->use_sg) {
+               printk("Incorrect number of segments after building list\n");
+#ifdef CONFIG_SCSI_DEBUG_QUEUES
+               dump_stats(req, use_clustering, dma_host, count);
+#endif
+       }
+       if (!dma_host) {
+               return 1;
+       }
+       /*
+        * Now allocate bounce buffers, if needed.
+        */
+       SCpnt->request_bufflen = 0;
+       for (i = 0; i < count; i++) {
+               sectors = (sgpnt[i].length >> 9);
+               SCpnt->request_bufflen += sgpnt[i].length;
+               if (virt_to_phys(sgpnt[i].address) + sgpnt[i].length - 1 >
+                   ISA_DMA_THRESHOLD) {
+                       if( scsi_dma_free_sectors - sectors <= 10  ) {
+                               /*
+                                * If this would nearly drain the DMA
+                                * pool empty, then let's stop here.
+                                * Don't make this request any larger.
+                                * This is kind of a safety valve that
+                                * we use - we could get screwed later
+                                * on if we run out completely.  
+                                */
+                               SCpnt->request_bufflen -= sgpnt[i].length;
+                               SCpnt->use_sg = i;
+                               if (i == 0) {
+                                       goto big_trouble;
+                               }
+                               break;
+                       }
+
+                       bbpnt[i] = sgpnt[i].address;
+                       sgpnt[i].address =
+                           (char *) scsi_malloc(sgpnt[i].length);
+                       /*
+                        * If we cannot allocate memory for this DMA bounce
+                        * buffer, then queue just what we have done so far.
+                        */
+                       if (sgpnt[i].address == NULL) {
+                               printk("Warning - running low on DMA memory\n");
+                               SCpnt->request_bufflen -= sgpnt[i].length;
+                               SCpnt->use_sg = i;
+                               if (i == 0) {
+                                       goto big_trouble;
+                               }
+                               break;
+                       }
+                       if (SCpnt->request.cmd == WRITE) {
+                               memcpy(sgpnt[i].address, bbpnt[i],
+                                      sgpnt[i].length);
+                       }
+               }
+       }
+       return 1;
+
+      big_trouble:
+       /*
+        * We come here in the event that we get one humongous
+        * request, where we need a bounce buffer, and the buffer is
+        * more than we can allocate in a single call to
+        * scsi_malloc().  In addition, we only come here when it is
+        * the 0th element of the scatter-gather table that gets us
+        * into this trouble.  As a fallback, we fall back to
+        * non-scatter-gather, and ask for a single segment.  We make
+        * a half-hearted attempt to pick a reasonably large request
+        * size mainly so that we don't thrash the thing with
+        * iddy-biddy requests.
+        */
+
+       /*
+        * The original number of sectors in the 0th element of the
+        * scatter-gather table.  
+        */
+       sectors = sgpnt[0].length >> 9;
+
+       /* 
+        * Free up the original scatter-gather table.  Note that since
+        * it was the 0th element that got us here, we don't have to
+        * go in and free up memory from the other slots.  
+        */
+       SCpnt->request_bufflen = 0;
+       SCpnt->use_sg = 0;
+       scsi_free(SCpnt->request_buffer, SCpnt->sglist_len);
+
+       /*
+        * Make an attempt to pick up as much as we reasonably can.
+        * Just keep adding sectors until the pool starts running kind of
+        * low.  The limit of 30 is somewhat arbitrary - the point is that
+        * it would kind of suck if we dropped down and limited ourselves to
+        * single-block requests if we had hundreds of free sectors.
+        */
+       if( scsi_dma_free_sectors > 30 ) {
+               for (this_count = 0, bh = SCpnt->request.bh;
+                    bh; bh = bh->b_reqnext) {
+                       if( scsi_dma_free_sectors - this_count < 30 
+                           || this_count == sectors )
+                       {
+                               break;
+                       }
+                       this_count += bh->b_size >> 9;
+               }
+
+       } else {
+               /*
+                * Yow!   Take the absolute minimum here.
+                */
+               this_count = SCpnt->request.current_nr_sectors;
+       }
+
+       /*
+        * Now drop through into the single-segment case.
+        */
+       
+      single_segment:
+       /*
+        * Come here if for any reason we choose to do this as a single
+        * segment.  Possibly the entire request, or possibly a small
+        * chunk of the entire request.
+        */
+       bh = SCpnt->request.bh;
+       buff = SCpnt->request.buffer;
+
+       if (dma_host) {
+               /*
+                * Allocate a DMA bounce buffer.  If the allocation fails, fall
+                * back and allocate a really small one - enough to satisfy
+                * the first buffer.
+                */
+               if (virt_to_phys(SCpnt->request.bh->b_data)
+                   + (this_count << 9) - 1 > ISA_DMA_THRESHOLD) {
+                       buff = (char *) scsi_malloc(this_count << 9);
+                       if (!buff) {
+                               printk("Warning - running low on DMA memory\n");
+                               this_count = SCpnt->request.current_nr_sectors;
+                               buff = (char *) scsi_malloc(this_count << 9);
+                               if (!buff) {
+                                       dma_exhausted(SCpnt, 0);
+                               }
+                       }
+                       if (SCpnt->request.cmd == WRITE)
+                               memcpy(buff, (char *) SCpnt->request.buffer, this_count << 9);
+               }
+       }
+       SCpnt->request_bufflen = this_count << 9;
+       SCpnt->request_buffer = buff;
+       SCpnt->use_sg = 0;
+       return 1;
+}
+
+#define INITIO(_FUNCTION, _VALID, _CLUSTER, _DMA)      \
+static int _FUNCTION(Scsi_Cmnd * SCpnt)                        \
+{                                                      \
+    return __init_io(SCpnt, _VALID, _CLUSTER, _DMA);   \
+}
+
+/*
+ * ll_rw_blk.c now keeps track of the number of segments in
+ * a request.  Thus we don't have to do it any more here.
+ * We always force "_VALID" to 1.  Eventually clean this up
+ * and get rid of the extra argument.
+ */
+INITIO(scsi_init_io_v, 1, 0, 0)
+INITIO(scsi_init_io_vd, 1, 0, 1)
+INITIO(scsi_init_io_vc, 1, 1, 0)
+INITIO(scsi_init_io_vdc, 1, 1, 1)
+
+/*
+ * Function:    initialize_merge_fn()
+ *
+ * Purpose:     Initialize merge function for a host
+ *
+ * Arguments:   SHpnt   - Host descriptor.
+ *
+ * Returns:     Nothing.
+ *
+ * Lock status: 
+ *
+ * Notes:
+ */
+void initialize_merge_fn(Scsi_Device * SDpnt)
+{
+       request_queue_t *q;
+       struct Scsi_Host *SHpnt;
+       SHpnt = SDpnt->host;
+
+       q = &SDpnt->request_queue;
+
+       /*
+        * If the host has already selected a merge manager, then don't
+        * pick a new one.
+        */
+#if 0
+       if (q->back_merge_fn && q->front_merge_fn)
+               return;
+#endif
+       /*
+        * If this host has an unlimited tablesize, then don't bother with a
+        * merge manager.  The whole point of the operation is to make sure
+        * that requests don't grow too large, and this host isn't picky.
+        *
+        * Note that ll_rw_blk.c is effectively maintaining a segment
+        * count which is only valid if clustering is used, and it obviously
+        * doesn't handle the DMA case.   In the end, it
+        * is simply easier to do it ourselves with our own functions
+        * rather than rely upon the default behavior of ll_rw_blk.
+        */
+       if (!CLUSTERABLE_DEVICE(SHpnt, SDpnt) && SHpnt->unchecked_isa_dma == 0) {
+               q->back_merge_fn = scsi_back_merge_fn_;
+               q->front_merge_fn = scsi_front_merge_fn_;
+               q->merge_requests_fn = scsi_merge_requests_fn_;
+               SDpnt->scsi_init_io_fn = scsi_init_io_v;
+       } else if (!CLUSTERABLE_DEVICE(SHpnt, SDpnt) && SHpnt->unchecked_isa_dma != 0) {
+               q->back_merge_fn = scsi_back_merge_fn_;
+               q->front_merge_fn = scsi_front_merge_fn_;
+               q->merge_requests_fn = scsi_merge_requests_fn_;
+               SDpnt->scsi_init_io_fn = scsi_init_io_vd;
+       } else if (CLUSTERABLE_DEVICE(SHpnt, SDpnt) && SHpnt->unchecked_isa_dma == 0) {
+               q->back_merge_fn = scsi_back_merge_fn_c;
+               q->front_merge_fn = scsi_front_merge_fn_c;
+               q->merge_requests_fn = scsi_merge_requests_fn_c;
+               SDpnt->scsi_init_io_fn = scsi_init_io_vc;
+       } else if (CLUSTERABLE_DEVICE(SHpnt, SDpnt) && SHpnt->unchecked_isa_dma != 0) {
+               q->back_merge_fn = scsi_back_merge_fn_dc;
+               q->front_merge_fn = scsi_front_merge_fn_dc;
+               q->merge_requests_fn = scsi_merge_requests_fn_dc;
+               SDpnt->scsi_init_io_fn = scsi_init_io_vdc;
+       }
+}
diff --git a/xen-2.4.16/drivers/scsi/scsi_module.c b/xen-2.4.16/drivers/scsi/scsi_module.c
new file mode 100644 (file)
index 0000000..c14a8ee
--- /dev/null
@@ -0,0 +1,71 @@
+/*
+ *  scsi_module.c Copyright (1994, 1995) Eric Youngdale.
+ *
+ * Support for loading low-level scsi drivers using the linux kernel loadable
+ * module interface.
+ *
+ * To use, the host adapter should first define and initialize the variable
+ * driver_template (datatype Scsi_Host_Template), and then include this file.
+ * This should also be wrapped in a #ifdef MODULE/#endif.
+ *
+ * The low -level driver must also define a release function which will
+ * free any irq assignments, release any dma channels, release any I/O
+ * address space that might be reserved, and otherwise clean up after itself.
+ * The idea is that the same driver should be able to be reloaded without
+ * any difficulty.  This makes debugging new drivers easier, as you should
+ * be able to load the driver, test it, unload, modify and reload.
+ *
+ * One *very* important caveat.  If the driver may need to do DMA on the
+ * ISA bus, you must have unchecked_isa_dma set in the device template,
+ * even if this might be changed during the detect routine.  This is
+ * because the shpnt structure will be allocated in a special way so that
+ * it will be below the appropriate DMA limit - thus if your driver uses
+ * the hostdata field of shpnt, and the board must be able to access this
+ * via DMA, the shpnt structure must be in a DMA accessible region of
+ * memory.  This comment would be relevant for something like the buslogic
+ * driver where there are many boards, only some of which do DMA onto the
+ * ISA bus.  There is no convenient way of specifying whether the host
+ * needs to be in a ISA DMA accessible region of memory when you call
+ * scsi_register.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+
+static int __init init_this_scsi_driver(void)
+{
+       driver_template.module = THIS_MODULE;
+       scsi_register_module(MODULE_SCSI_HA, &driver_template);
+       if (driver_template.present)
+               return 0;
+
+       scsi_unregister_module(MODULE_SCSI_HA, &driver_template);
+       return -ENODEV;
+}
+
+static void __exit exit_this_scsi_driver(void)
+{
+       scsi_unregister_module(MODULE_SCSI_HA, &driver_template);
+}
+
+module_init(init_this_scsi_driver);
+module_exit(exit_this_scsi_driver);
+
+/*
+ * Overrides for Emacs so that we almost follow Linus's tabbing style.
+ * Emacs will notice this stuff at the end of the file and automatically
+ * adjust the settings for this buffer only.  This must remain at the end
+ * of the file.
+ * ---------------------------------------------------------------------------
+ * Local variables:
+ * c-indent-level: 4
+ * c-brace-imaginary-offset: 0
+ * c-brace-offset: -4
+ * c-argdecl-indent: 4
+ * c-label-offset: -4
+ * c-continued-statement-offset: 4
+ * c-continued-brace-offset: 0
+ * indent-tabs-mode: nil
+ * tab-width: 8
+ * End:
+ */
diff --git a/xen-2.4.16/drivers/scsi/scsi_obsolete.c b/xen-2.4.16/drivers/scsi/scsi_obsolete.c
new file mode 100644 (file)
index 0000000..488bb26
--- /dev/null
@@ -0,0 +1,1162 @@
+/*
+ *  scsi_obsolete.c Copyright (C) 1992 Drew Eckhardt
+ *         Copyright (C) 1993, 1994, 1995 Eric Youngdale
+ *
+ *  generic mid-level SCSI driver
+ *      Initial versions: Drew Eckhardt
+ *      Subsequent revisions: Eric Youngdale
+ *
+ *  <drew@colorado.edu>
+ *
+ *  Bug correction thanks go to :
+ *      Rik Faith <faith@cs.unc.edu>
+ *      Tommy Thorn <tthorn>
+ *      Thomas Wuensche <tw@fgb1.fgb.mw.tu-muenchen.de>
+ *
+ *  Modified by Eric Youngdale eric@andante.org to
+ *  add scatter-gather, multiple outstanding request, and other
+ *  enhancements.
+ *
+ *  Native multichannel, wide scsi, /proc/scsi and hot plugging
+ *  support added by Michael Neuffer <mike@i-connect.net>
+ *
+ *  Major improvements to the timeout, abort, and reset processing,
+ *  as well as performance modifications for large queue depths by
+ *  Leonard N. Zubkoff <lnz@dandelion.com>
+ *
+ *  Improved compatibility with 2.0 behaviour by Manfred Spraul
+ *  <masp0008@stud.uni-sb.de>
+ */
+
+/*
+ *#########################################################################
+ *#########################################################################
+ *#########################################################################
+ *#########################################################################
+ *              NOTE - NOTE - NOTE - NOTE - NOTE - NOTE - NOTE
+ *
+ *#########################################################################
+ *#########################################################################
+ *#########################################################################
+ *#########################################################################
+ *
+ * This file contains the 'old' scsi error handling.  It is only present
+ * while the new error handling code is being debugged, and while the low
+ * level drivers are being converted to use the new code.  Once the last
+ * driver uses the new code this *ENTIRE* file will be nuked.
+ */
+
+#define __NO_VERSION__
+#include <linux/module.h>
+
+#include <linux/sched.h>
+#include <linux/timer.h>
+#include <linux/string.h>
+#include <linux/slab.h>
+#include <linux/ioport.h>
+#include <linux/kernel.h>
+#include <linux/stat.h>
+#include <linux/blk.h>
+#include <linux/interrupt.h>
+#include <linux/delay.h>
+
+#include <asm/system.h>
+#include <asm/irq.h>
+#include <asm/dma.h>
+
+#include "scsi.h"
+#include "hosts.h"
+#include "constants.h"
+
+#undef USE_STATIC_SCSI_MEMORY
+
+/*
+   static const char RCSid[] = "$Header: /mnt/ide/home/eric/CVSROOT/linux/drivers/scsi/scsi_obsolete.c,v 1.1 1997/05/18 23:27:21 eric Exp $";
+ */
+
+
+#define INTERNAL_ERROR (panic ("Internal error in file %s, line %d.\n", __FILE__, __LINE__))
+
+
+static int scsi_abort(Scsi_Cmnd *, int code);
+static int scsi_reset(Scsi_Cmnd *, unsigned int);
+
+extern void scsi_old_done(Scsi_Cmnd * SCpnt);
+int update_timeout(Scsi_Cmnd *, int);
+extern void scsi_old_times_out(Scsi_Cmnd * SCpnt);
+
+extern int scsi_dispatch_cmd(Scsi_Cmnd * SCpnt);
+
+#define SCSI_BLOCK(HOST) (HOST->can_queue && HOST->host_busy >= HOST->can_queue)
+
+static unsigned char generic_sense[6] =
+{REQUEST_SENSE, 0, 0, 0, 255, 0};
+
+/*
+ *  This is the number  of clock ticks we should wait before we time out
+ *  and abort the command.  This is for  where the scsi.c module generates
+ *  the command, not where it originates from a higher level, in which
+ *  case the timeout is specified there.
+ *
+ *  ABORT_TIMEOUT and RESET_TIMEOUT are the timeouts for RESET and ABORT
+ *  respectively.
+ */
+
+#ifdef DEBUG_TIMEOUT
+static void scsi_dump_status(void);
+#endif
+
+
+#ifdef DEBUG
+#define SCSI_TIMEOUT (5*HZ)
+#else
+#define SCSI_TIMEOUT (2*HZ)
+#endif
+
+#ifdef DEBUG
+#define SENSE_TIMEOUT SCSI_TIMEOUT
+#define ABORT_TIMEOUT SCSI_TIMEOUT
+#define RESET_TIMEOUT SCSI_TIMEOUT
+#else
+#define SENSE_TIMEOUT (5*HZ/10)
+#define RESET_TIMEOUT (5*HZ/10)
+#define ABORT_TIMEOUT (5*HZ/10)
+#endif
+
+
+/* Do not call reset on error if we just did a reset within 15 sec. */
+#define MIN_RESET_PERIOD (15*HZ)
+
+
+
+/*
+ *  Flag bits for the internal_timeout array
+ */
+#define IN_ABORT  1
+#define IN_RESET  2
+#define IN_RESET2 4
+#define IN_RESET3 8
+
+/*
+ * This is our time out function, called when the timer expires for a
+ * given host adapter.  It will attempt to abort the currently executing
+ * command, that failing perform a kernel panic.
+ */
+
+void scsi_old_times_out(Scsi_Cmnd * SCpnt)
+{
+       unsigned long flags;
+
+       spin_lock_irqsave(&io_request_lock, flags);
+
+       /* Set the serial_number_at_timeout to the current serial_number */
+       SCpnt->serial_number_at_timeout = SCpnt->serial_number;
+
+       switch (SCpnt->internal_timeout & (IN_ABORT | IN_RESET | IN_RESET2 | IN_RESET3)) {
+       case NORMAL_TIMEOUT:
+               {
+#ifdef DEBUG_TIMEOUT
+                       scsi_dump_status();
+#endif
+               }
+
+               if (!scsi_abort(SCpnt, DID_TIME_OUT))
+                       break;
+       case IN_ABORT:
+               printk("SCSI host %d abort (pid %ld) timed out - resetting\n",
+                      SCpnt->host->host_no, SCpnt->pid);
+               if (!scsi_reset(SCpnt, SCSI_RESET_ASYNCHRONOUS))
+                       break;
+       case IN_RESET:
+       case (IN_ABORT | IN_RESET):
+               /* This might be controversial, but if there is a bus hang,
+                * you might conceivably want the machine up and running
+                * esp if you have an ide disk.
+                */
+               printk("SCSI host %d channel %d reset (pid %ld) timed out - "
+                      "trying harder\n",
+                      SCpnt->host->host_no, SCpnt->channel, SCpnt->pid);
+               SCpnt->internal_timeout &= ~IN_RESET;
+               SCpnt->internal_timeout |= IN_RESET2;
+               scsi_reset(SCpnt,
+                SCSI_RESET_ASYNCHRONOUS | SCSI_RESET_SUGGEST_BUS_RESET);
+               break;
+       case IN_RESET2:
+       case (IN_ABORT | IN_RESET2):
+               /* Obviously the bus reset didn't work.
+                * Let's try even harder and call for an HBA reset.
+                * Maybe the HBA itself crashed and this will shake it loose.
+                */
+               printk("SCSI host %d reset (pid %ld) timed out - trying to shake it loose\n",
+                      SCpnt->host->host_no, SCpnt->pid);
+               SCpnt->internal_timeout &= ~(IN_RESET | IN_RESET2);
+               SCpnt->internal_timeout |= IN_RESET3;
+               scsi_reset(SCpnt,
+               SCSI_RESET_ASYNCHRONOUS | SCSI_RESET_SUGGEST_HOST_RESET);
+               break;
+
+       default:
+               printk("SCSI host %d reset (pid %ld) timed out again -\n",
+                      SCpnt->host->host_no, SCpnt->pid);
+               printk("probably an unrecoverable SCSI bus or device hang.\n");
+               break;
+
+       }
+       spin_unlock_irqrestore(&io_request_lock, flags);
+
+}
+
+/*
+ *  From what I can find in scsi_obsolete.c, this function is only called
+ *  by scsi_old_done and scsi_reset.  Both of these functions run with the
+ *  io_request_lock already held, so we need do nothing here about grabbing
+ *  any locks.
+ */
+static void scsi_request_sense(Scsi_Cmnd * SCpnt)
+{
+       SCpnt->flags |= WAS_SENSE | ASKED_FOR_SENSE;
+       update_timeout(SCpnt, SENSE_TIMEOUT);
+
+
+       memcpy((void *) SCpnt->cmnd, (void *) generic_sense,
+              sizeof(generic_sense));
+       memset((void *) SCpnt->sense_buffer, 0,
+              sizeof(SCpnt->sense_buffer));
+
+       if (SCpnt->device->scsi_level <= SCSI_2)
+               SCpnt->cmnd[1] = SCpnt->lun << 5;
+       SCpnt->cmnd[4] = sizeof(SCpnt->sense_buffer);
+
+       SCpnt->request_buffer = &SCpnt->sense_buffer;
+       SCpnt->request_bufflen = sizeof(SCpnt->sense_buffer);
+       SCpnt->use_sg = 0;
+       SCpnt->cmd_len = COMMAND_SIZE(SCpnt->cmnd[0]);
+       SCpnt->result = 0;
+       SCpnt->sc_data_direction = SCSI_DATA_READ;
+
+        /*
+         * Ugly, ugly.  The newer interfaces all assume that the lock
+         * isn't held.  Mustn't disappoint, or we deadlock the system.
+         */
+        spin_unlock_irq(&io_request_lock);
+       scsi_dispatch_cmd(SCpnt);
+        spin_lock_irq(&io_request_lock);
+}
+
+
+
+
+static int check_sense(Scsi_Cmnd * SCpnt)
+{
+       /* If there is no sense information, request it.  If we have already
+        * requested it, there is no point in asking again - the firmware must
+        * be confused.
+        */
+       if (((SCpnt->sense_buffer[0] & 0x70) >> 4) != 7) {
+               if (!(SCpnt->flags & ASKED_FOR_SENSE))
+                       return SUGGEST_SENSE;
+               else
+                       return SUGGEST_RETRY;
+       }
+       SCpnt->flags &= ~ASKED_FOR_SENSE;
+
+#ifdef DEBUG_INIT
+       printk("scsi%d, channel%d : ", SCpnt->host->host_no, SCpnt->channel);
+       print_sense("", SCpnt);
+       printk("\n");
+#endif
+       if (SCpnt->sense_buffer[2] & 0xe0)
+               return SUGGEST_ABORT;
+
+       switch (SCpnt->sense_buffer[2] & 0xf) {
+       case NO_SENSE:
+               return 0;
+       case RECOVERED_ERROR:
+               return SUGGEST_IS_OK;
+
+       case ABORTED_COMMAND:
+               return SUGGEST_RETRY;
+       case NOT_READY:
+       case UNIT_ATTENTION:
+               /*
+                * If we are expecting a CC/UA because of a bus reset that we
+                * performed, treat this just as a retry.  Otherwise this is
+                * information that we should pass up to the upper-level driver
+                * so that we can deal with it there.
+                */
+               if (SCpnt->device->expecting_cc_ua) {
+                       SCpnt->device->expecting_cc_ua = 0;
+                       return SUGGEST_RETRY;
+               }
+               return SUGGEST_ABORT;
+
+               /* these three are not supported */
+       case COPY_ABORTED:
+       case VOLUME_OVERFLOW:
+       case MISCOMPARE:
+
+       case MEDIUM_ERROR:
+               return SUGGEST_REMAP;
+       case BLANK_CHECK:
+       case DATA_PROTECT:
+       case HARDWARE_ERROR:
+       case ILLEGAL_REQUEST:
+       default:
+               return SUGGEST_ABORT;
+       }
+}
+
+/* This function is the mid-level interrupt routine, which decides how
+ *  to handle error conditions.  Each invocation of this function must
+ *  do one and *only* one of the following:
+ *
+ *  (1) Call last_cmnd[host].done.  This is done for fatal errors and
+ *      normal completion, and indicates that the handling for this
+ *      request is complete.
+ *  (2) Call internal_cmnd to requeue the command.  This will result in
+ *      scsi_done being called again when the retry is complete.
+ *  (3) Call scsi_request_sense.  This asks the host adapter/drive for
+ *      more information about the error condition.  When the information
+ *      is available, scsi_done will be called again.
+ *  (4) Call reset().  This is sort of a last resort, and the idea is that
+ *      this may kick things loose and get the drive working again.  reset()
+ *      automatically calls scsi_request_sense, and thus scsi_done will be
+ *      called again once the reset is complete.
+ *
+ *      If none of the above actions are taken, the drive in question
+ *      will hang. If more than one of the above actions are taken by
+ *      scsi_done, then unpredictable behavior will result.
+ */
+void scsi_old_done(Scsi_Cmnd * SCpnt)
+{
+       int status = 0;
+       int exit = 0;
+       int checked;
+       int oldto;
+       struct Scsi_Host *host = SCpnt->host;
+        Scsi_Device * device = SCpnt->device;
+       int result = SCpnt->result;
+       SCpnt->serial_number = 0;
+       SCpnt->serial_number_at_timeout = 0;
+       oldto = update_timeout(SCpnt, 0);
+
+#ifdef DEBUG_TIMEOUT
+       if (result)
+               printk("Non-zero result in scsi_done %x %d:%d\n",
+                      result, SCpnt->target, SCpnt->lun);
+#endif
+
+       /* If we requested an abort, (and we got it) then fix up the return
+        *  status to say why
+        */
+       if (host_byte(result) == DID_ABORT && SCpnt->abort_reason)
+               SCpnt->result = result = (result & 0xff00ffff) |
+                   (SCpnt->abort_reason << 16);
+
+
+#define CMD_FINISHED 0
+#define MAYREDO  1
+#define REDO     3
+#define PENDING  4
+
+#ifdef DEBUG
+       printk("In scsi_done(host = %d, result = %06x)\n", host->host_no, result);
+#endif
+
+       if (SCpnt->flags & SYNC_RESET) {
+               /*
+                  * The behaviou of scsi_reset(SYNC) was changed in 2.1.? .
+                  * The scsi mid-layer does a REDO after every sync reset, the driver
+                  * must not do that any more. In order to prevent old drivers from
+                  * crashing, all scsi_done() calls during sync resets are ignored.
+                */
+               printk("scsi%d: device driver called scsi_done() "
+                      "for a synchronous reset.\n", SCpnt->host->host_no);
+               return;
+       }
+       if (SCpnt->flags & WAS_SENSE) {
+               SCpnt->use_sg = SCpnt->old_use_sg;
+               SCpnt->cmd_len = SCpnt->old_cmd_len;
+               SCpnt->sc_data_direction = SCpnt->sc_old_data_direction;
+               SCpnt->underflow = SCpnt->old_underflow;
+       }
+       switch (host_byte(result)) {
+       case DID_OK:
+               if (status_byte(result) && (SCpnt->flags & WAS_SENSE))
+                       /* Failed to obtain sense information */
+               {
+                       SCpnt->flags &= ~WAS_SENSE;
+#if 0                          /* This cannot possibly be correct. */
+                       SCpnt->internal_timeout &= ~SENSE_TIMEOUT;
+#endif
+
+                       if (!(SCpnt->flags & WAS_RESET)) {
+                               printk("scsi%d : channel %d target %d lun %d request sense"
+                                      " failed, performing reset.\n",
+                                      SCpnt->host->host_no, SCpnt->channel, SCpnt->target,
+                                      SCpnt->lun);
+                               scsi_reset(SCpnt, SCSI_RESET_SYNCHRONOUS);
+                               status = REDO;
+                               break;
+                       } else {
+                               exit = (DRIVER_HARD | SUGGEST_ABORT);
+                               status = CMD_FINISHED;
+                       }
+               } else
+                       switch (msg_byte(result)) {
+                       case COMMAND_COMPLETE:
+                               switch (status_byte(result)) {
+                               case GOOD:
+                                       if (SCpnt->flags & WAS_SENSE) {
+#ifdef DEBUG
+                                               printk("In scsi_done, GOOD status, COMMAND COMPLETE, "
+                                                      "parsing sense information.\n");
+#endif
+                                               SCpnt->flags &= ~WAS_SENSE;
+#if 0                          /* This cannot possibly be correct. */
+                                               SCpnt->internal_timeout &= ~SENSE_TIMEOUT;
+#endif
+
+                                               switch (checked = check_sense(SCpnt)) {
+                                               case SUGGEST_SENSE:
+                                               case 0:
+#ifdef DEBUG
+                                                       printk("NO SENSE.  status = REDO\n");
+#endif
+                                                       update_timeout(SCpnt, oldto);
+                                                       status = REDO;
+                                                       break;
+                                               case SUGGEST_IS_OK:
+                                                       break;
+                                               case SUGGEST_REMAP:
+#ifdef DEBUG
+                                                       printk("SENSE SUGGEST REMAP - status = CMD_FINISHED\n");
+#endif
+                                                       status = CMD_FINISHED;
+                                                       exit = DRIVER_SENSE | SUGGEST_ABORT;
+                                                       break;
+                                               case SUGGEST_RETRY:
+#ifdef DEBUG
+                                                       printk("SENSE SUGGEST RETRY - status = MAYREDO\n");
+#endif
+                                                       status = MAYREDO;
+                                                       exit = DRIVER_SENSE | SUGGEST_RETRY;
+                                                       break;
+                                               case SUGGEST_ABORT:
+#ifdef DEBUG
+                                                       printk("SENSE SUGGEST ABORT - status = CMD_FINISHED");
+#endif
+                                                       status = CMD_FINISHED;
+                                                       exit = DRIVER_SENSE | SUGGEST_ABORT;
+                                                       break;
+                                               default:
+                                                       printk("Internal error %s %d \n", __FILE__,
+                                                              __LINE__);
+                                               }
+                                       }
+                                       /* end WAS_SENSE */
+                                       else {
+#ifdef DEBUG
+                                               printk("COMMAND COMPLETE message returned, "
+                                                      "status = CMD_FINISHED. \n");
+#endif
+                                               exit = DRIVER_OK;
+                                               status = CMD_FINISHED;
+                                       }
+                                       break;
+
+                               case CHECK_CONDITION:
+                               case COMMAND_TERMINATED:
+                                       switch (check_sense(SCpnt)) {
+                                       case 0:
+                                               update_timeout(SCpnt, oldto);
+                                               status = REDO;
+                                               break;
+                                       case SUGGEST_REMAP:
+                                               status = CMD_FINISHED;
+                                               exit = DRIVER_SENSE | SUGGEST_ABORT;
+                                               break;
+                                       case SUGGEST_RETRY:
+                                               status = MAYREDO;
+                                               exit = DRIVER_SENSE | SUGGEST_RETRY;
+                                               break;
+                                       case SUGGEST_ABORT:
+                                               status = CMD_FINISHED;
+                                               exit = DRIVER_SENSE | SUGGEST_ABORT;
+                                               break;
+                                       case SUGGEST_SENSE:
+                                               scsi_request_sense(SCpnt);
+                                               status = PENDING;
+                                               break;
+                                       }
+                                       break;
+
+                               case CONDITION_GOOD:
+                               case INTERMEDIATE_GOOD:
+                               case INTERMEDIATE_C_GOOD:
+                                       break;
+
+                               case BUSY:
+                               case QUEUE_FULL:
+                                       update_timeout(SCpnt, oldto);
+                                       status = REDO;
+                                       break;
+
+                               case RESERVATION_CONFLICT:
+                                       /*
+                                        * Most HAs will return an error for
+                                        * this, so usually reservation
+                                        * conflicts will  be processed under
+                                        * DID_ERROR code
+                                        */
+                                       printk("scsi%d (%d,%d,%d) : RESERVATION CONFLICT\n", 
+                                              SCpnt->host->host_no, SCpnt->channel,
+                                              SCpnt->device->id, SCpnt->device->lun);
+                                       status = CMD_FINISHED; /* returns I/O error */
+                                       break;
+                                        
+                               default:
+                                       printk("Internal error %s %d \n"
+                                        "status byte = %d \n", __FILE__,
+                                         __LINE__, status_byte(result));
+
+                               }
+                               break;
+                       default:
+                               panic("scsi: unsupported message byte %d received\n",
+                                     msg_byte(result));
+                       }
+               break;
+       case DID_TIME_OUT:
+#ifdef DEBUG
+               printk("Host returned DID_TIME_OUT - ");
+#endif
+
+               if (SCpnt->flags & WAS_TIMEDOUT) {
+#ifdef DEBUG
+                       printk("Aborting\n");
+#endif
+                       /*
+                          Allow TEST_UNIT_READY and INQUIRY commands to timeout early
+                          without causing resets.  All other commands should be retried.
+                        */
+                       if (SCpnt->cmnd[0] != TEST_UNIT_READY &&
+                           SCpnt->cmnd[0] != INQUIRY)
+                               status = MAYREDO;
+                       exit = (DRIVER_TIMEOUT | SUGGEST_ABORT);
+               } else {
+#ifdef DEBUG
+                       printk("Retrying.\n");
+#endif
+                       SCpnt->flags |= WAS_TIMEDOUT;
+                       SCpnt->internal_timeout &= ~IN_ABORT;
+                       status = REDO;
+               }
+               break;
+       case DID_BUS_BUSY:
+       case DID_PARITY:
+               status = REDO;
+               break;
+       case DID_NO_CONNECT:
+#ifdef DEBUG
+               printk("Couldn't connect.\n");
+#endif
+               exit = (DRIVER_HARD | SUGGEST_ABORT);
+               break;
+       case DID_ERROR:
+               if (msg_byte(result) == COMMAND_COMPLETE &&
+                   status_byte(result) == RESERVATION_CONFLICT) {
+                       printk("scsi%d (%d,%d,%d) : RESERVATION CONFLICT\n", 
+                              SCpnt->host->host_no, SCpnt->channel,
+                              SCpnt->device->id, SCpnt->device->lun);
+                       status = CMD_FINISHED; /* returns I/O error */
+                       break;
+               }
+               status = MAYREDO;
+               exit = (DRIVER_HARD | SUGGEST_ABORT);
+               break;
+       case DID_BAD_TARGET:
+       case DID_ABORT:
+               exit = (DRIVER_INVALID | SUGGEST_ABORT);
+               break;
+       case DID_RESET:
+               if (SCpnt->flags & IS_RESETTING) {
+                       SCpnt->flags &= ~IS_RESETTING;
+                       status = REDO;
+                       break;
+               }
+               if (msg_byte(result) == GOOD &&
+                   status_byte(result) == CHECK_CONDITION) {
+                       switch (check_sense(SCpnt)) {
+                       case 0:
+                               update_timeout(SCpnt, oldto);
+                               status = REDO;
+                               break;
+                       case SUGGEST_REMAP:
+                       case SUGGEST_RETRY:
+                               status = MAYREDO;
+                               exit = DRIVER_SENSE | SUGGEST_RETRY;
+                               break;
+                       case SUGGEST_ABORT:
+                               status = CMD_FINISHED;
+                               exit = DRIVER_SENSE | SUGGEST_ABORT;
+                               break;
+                       case SUGGEST_SENSE:
+                               scsi_request_sense(SCpnt);
+                               status = PENDING;
+                               break;
+                       }
+               } else {
+                       status = REDO;
+                       exit = SUGGEST_RETRY;
+               }
+               break;
+       default:
+               exit = (DRIVER_ERROR | SUGGEST_DIE);
+       }
+
+       switch (status) {
+       case CMD_FINISHED:
+       case PENDING:
+               break;
+       case MAYREDO:
+#ifdef DEBUG
+               printk("In MAYREDO, allowing %d retries, have %d\n",
+                      SCpnt->allowed, SCpnt->retries);
+#endif
+               if ((++SCpnt->retries) < SCpnt->allowed) {
+                       if ((SCpnt->retries >= (SCpnt->allowed >> 1))
+                           && !(SCpnt->host->resetting && time_before(jiffies, SCpnt->host->last_reset + MIN_RESET_PERIOD))
+                           && !(SCpnt->flags & WAS_RESET)) {
+                               printk("scsi%d channel %d : resetting for second half of retries.\n",
+                                  SCpnt->host->host_no, SCpnt->channel);
+                               scsi_reset(SCpnt, SCSI_RESET_SYNCHRONOUS);
+                               /* fall through to REDO */
+                       }
+               } else {
+                       status = CMD_FINISHED;
+                       break;
+               }
+               /* fall through to REDO */
+
+       case REDO:
+
+               if (SCpnt->flags & WAS_SENSE)
+                       scsi_request_sense(SCpnt);
+               else {
+                       memcpy((void *) SCpnt->cmnd,
+                              (void *) SCpnt->data_cmnd,
+                              sizeof(SCpnt->data_cmnd));
+                       memset((void *) SCpnt->sense_buffer, 0,
+                              sizeof(SCpnt->sense_buffer));
+                       SCpnt->request_buffer = SCpnt->buffer;
+                       SCpnt->request_bufflen = SCpnt->bufflen;
+                       SCpnt->use_sg = SCpnt->old_use_sg;
+                       SCpnt->cmd_len = SCpnt->old_cmd_len;
+                       SCpnt->sc_data_direction = SCpnt->sc_old_data_direction;
+                       SCpnt->underflow = SCpnt->old_underflow;
+                       SCpnt->result = 0;
+                        /*
+                         * Ugly, ugly.  The newer interfaces all
+                         * assume that the lock isn't held.  Mustn't
+                         * disappoint, or we deadlock the system.  
+                         */
+                        spin_unlock_irq(&io_request_lock);
+                       scsi_dispatch_cmd(SCpnt);
+                        spin_lock_irq(&io_request_lock);
+               }
+               break;
+       default:
+               INTERNAL_ERROR;
+       }
+
+       if (status == CMD_FINISHED) {
+               Scsi_Request *SRpnt;
+#ifdef DEBUG
+               printk("Calling done function - at address %p\n", SCpnt->done);
+#endif
+               host->host_busy--;      /* Indicate that we are free */
+                device->device_busy--; /* Decrement device usage counter. */
+
+               SCpnt->result = result | ((exit & 0xff) << 24);
+               SCpnt->use_sg = SCpnt->old_use_sg;
+               SCpnt->cmd_len = SCpnt->old_cmd_len;
+               SCpnt->sc_data_direction = SCpnt->sc_old_data_direction;
+               SCpnt->underflow = SCpnt->old_underflow;
+                /*
+                 * The upper layers assume the lock isn't held.  We mustn't
+                 * disappoint them.  When the new error handling code is in
+                 * use, the upper code is run from a bottom half handler, so
+                 * it isn't an issue.
+                 */
+                spin_unlock_irq(&io_request_lock);
+               SRpnt = SCpnt->sc_request;
+               if( SRpnt != NULL ) {
+                       SRpnt->sr_result = SRpnt->sr_command->result;
+                       if( SRpnt->sr_result != 0 ) {
+                               memcpy(SRpnt->sr_sense_buffer,
+                                      SRpnt->sr_command->sense_buffer,
+                                      sizeof(SRpnt->sr_sense_buffer));
+                       }
+               }
+
+               SCpnt->done(SCpnt);
+                spin_lock_irq(&io_request_lock);
+       }
+#undef CMD_FINISHED
+#undef REDO
+#undef MAYREDO
+#undef PENDING
+}
+
+/*
+ * The scsi_abort function interfaces with the abort() function of the host
+ * we are aborting, and causes the current command to not complete.  The
+ * caller should deal with any error messages or status returned on the
+ * next call.
+ *
+ * This will not be called reentrantly for a given host.
+ */
+
+/*
+ * Since we're nice guys and specified that abort() and reset()
+ * can be non-reentrant.  The internal_timeout flags are used for
+ * this.
+ */
+
+
+static int scsi_abort(Scsi_Cmnd * SCpnt, int why)
+{
+       int oldto;
+       struct Scsi_Host *host = SCpnt->host;
+
+       while (1) {
+
+               /*
+                * Protect against races here.  If the command is done, or we are
+                * on a different command forget it.
+                */
+               if (SCpnt->serial_number != SCpnt->serial_number_at_timeout) {
+                       return 0;
+               }
+               if (SCpnt->internal_timeout & IN_ABORT) {
+                       spin_unlock_irq(&io_request_lock);
+                       while (SCpnt->internal_timeout & IN_ABORT)
+                               barrier();
+                       spin_lock_irq(&io_request_lock);
+               } else {
+                       SCpnt->internal_timeout |= IN_ABORT;
+                       oldto = update_timeout(SCpnt, ABORT_TIMEOUT);
+
+                       if ((SCpnt->flags & IS_RESETTING) && SCpnt->device->soft_reset) {
+                               /* OK, this command must have died when we did the
+                                *  reset.  The device itself must have lied.
+                                */
+                               printk("Stale command on %d %d:%d appears to have died when"
+                                      " the bus was reset\n",
+                                      SCpnt->channel, SCpnt->target, SCpnt->lun);
+                       }
+                       if (!host->host_busy) {
+                               SCpnt->internal_timeout &= ~IN_ABORT;
+                               update_timeout(SCpnt, oldto);
+                               return 0;
+                       }
+                       printk("scsi : aborting command due to timeout : pid %lu, scsi%d,"
+                              " channel %d, id %d, lun %d ",
+                              SCpnt->pid, SCpnt->host->host_no, (int) SCpnt->channel,
+                              (int) SCpnt->target, (int) SCpnt->lun);
+                       print_command(SCpnt->cmnd);
+                       if (SCpnt->serial_number != SCpnt->serial_number_at_timeout)
+                               return 0;
+                       SCpnt->abort_reason = why;
+                       switch (host->hostt->abort(SCpnt)) {
+                               /* We do not know how to abort.  Try waiting another
+                                * time increment and see if this helps. Set the
+                                * WAS_TIMEDOUT flag set so we do not try this twice
+                                */
+                       case SCSI_ABORT_BUSY:   /* Tough call - returning 1 from
+                                                * this is too severe
+                                                */
+                       case SCSI_ABORT_SNOOZE:
+                               if (why == DID_TIME_OUT) {
+                                       SCpnt->internal_timeout &= ~IN_ABORT;
+                                       if (SCpnt->flags & WAS_TIMEDOUT) {
+                                               return 1;       /* Indicate we cannot handle this.
+                                                                * We drop down into the reset handler
+                                                                * and try again
+                                                                */
+                                       } else {
+                                               SCpnt->flags |= WAS_TIMEDOUT;
+                                               oldto = SCpnt->timeout_per_command;
+                                               update_timeout(SCpnt, oldto);
+                                       }
+                               }
+                               return 0;
+                       case SCSI_ABORT_PENDING:
+                               if (why != DID_TIME_OUT) {
+                                       update_timeout(SCpnt, oldto);
+                               }
+                               return 0;
+                       case SCSI_ABORT_SUCCESS:
+                               /* We should have already aborted this one.  No
+                                * need to adjust timeout
+                                */
+                               SCpnt->internal_timeout &= ~IN_ABORT;
+                               return 0;
+                       case SCSI_ABORT_NOT_RUNNING:
+                               SCpnt->internal_timeout &= ~IN_ABORT;
+                               update_timeout(SCpnt, 0);
+                               return 0;
+                       case SCSI_ABORT_ERROR:
+                       default:
+                               SCpnt->internal_timeout &= ~IN_ABORT;
+                               return 1;
+                       }
+               }
+       }
+}
+
+
+/* Mark a single SCSI Device as having been reset. */
+
+static inline void scsi_mark_device_reset(Scsi_Device * Device)
+{
+       Device->was_reset = 1;
+       Device->expecting_cc_ua = 1;
+}
+
+
+/* Mark all SCSI Devices on a specific Host as having been reset. */
+
+void scsi_mark_host_reset(struct Scsi_Host *Host)
+{
+       Scsi_Cmnd *SCpnt;
+       Scsi_Device *SDpnt;
+
+       for (SDpnt = Host->host_queue; SDpnt; SDpnt = SDpnt->next) {
+               for (SCpnt = SDpnt->device_queue; SCpnt; SCpnt = SCpnt->next)
+                       scsi_mark_device_reset(SCpnt->device);
+       }
+}
+
+
+/* Mark all SCSI Devices on a specific Host Bus as having been reset. */
+
+static void scsi_mark_bus_reset(struct Scsi_Host *Host, int channel)
+{
+       Scsi_Cmnd *SCpnt;
+       Scsi_Device *SDpnt;
+
+       for (SDpnt = Host->host_queue; SDpnt; SDpnt = SDpnt->next) {
+               for (SCpnt = SDpnt->device_queue; SCpnt; SCpnt = SCpnt->next)
+                       if (SCpnt->channel == channel)
+                               scsi_mark_device_reset(SCpnt->device);
+       }
+}
+
+
+static int scsi_reset(Scsi_Cmnd * SCpnt, unsigned int reset_flags)
+{
+       int temp;
+       Scsi_Cmnd *SCpnt1;
+       Scsi_Device *SDpnt;
+       struct Scsi_Host *host = SCpnt->host;
+
+       printk("SCSI bus is being reset for host %d channel %d.\n",
+              host->host_no, SCpnt->channel);
+
+#if 0
+       /*
+        * First of all, we need to make a recommendation to the low-level
+        * driver as to whether a BUS_DEVICE_RESET should be performed,
+        * or whether we should do a full BUS_RESET.  There is no simple
+        * algorithm here - we basically use a series of heuristics
+        * to determine what we should do.
+        */
+       SCpnt->host->suggest_bus_reset = FALSE;
+
+       /*
+        * First see if all of the active devices on the bus have
+        * been jammed up so that we are attempting resets.  If so,
+        * then suggest a bus reset.  Forcing a bus reset could
+        * result in some race conditions, but no more than
+        * you would usually get with timeouts.  We will cross
+        * that bridge when we come to it.
+        *
+        * This is actually a pretty bad idea, since a sequence of
+        * commands will often timeout together and this will cause a
+        * Bus Device Reset followed immediately by a SCSI Bus Reset.
+        * If all of the active devices really are jammed up, the
+        * Bus Device Reset will quickly timeout and scsi_times_out
+        * will follow up with a SCSI Bus Reset anyway.
+        */
+       SCpnt1 = host->host_queue;
+       while (SCpnt1) {
+               if (SCpnt1->request.rq_status != RQ_INACTIVE
+                   && (SCpnt1->flags & (WAS_RESET | IS_RESETTING)) == 0)
+                       break;
+               SCpnt1 = SCpnt1->next;
+       }
+       if (SCpnt1 == NULL) {
+               reset_flags |= SCSI_RESET_SUGGEST_BUS_RESET;
+       }
+       /*
+        * If the code that called us is suggesting a hard reset, then
+        * definitely request it.  This usually occurs because a
+        * BUS_DEVICE_RESET times out.
+        *
+        * Passing reset_flags along takes care of this automatically.
+        */
+       if (reset_flags & SCSI_RESET_SUGGEST_BUS_RESET) {
+               SCpnt->host->suggest_bus_reset = TRUE;
+       }
+#endif
+
+       while (1) {
+
+               /*
+                * Protect against races here.  If the command is done, or we are
+                * on a different command forget it.
+                */
+               if (reset_flags & SCSI_RESET_ASYNCHRONOUS)
+                       if (SCpnt->serial_number != SCpnt->serial_number_at_timeout) {
+                               return 0;
+                       }
+               if (SCpnt->internal_timeout & IN_RESET) {
+                       spin_unlock_irq(&io_request_lock);
+                       while (SCpnt->internal_timeout & IN_RESET)
+                               barrier();
+                       spin_lock_irq(&io_request_lock);
+               } else {
+                       SCpnt->internal_timeout |= IN_RESET;
+                       update_timeout(SCpnt, RESET_TIMEOUT);
+
+                       if (reset_flags & SCSI_RESET_SYNCHRONOUS)
+                               SCpnt->flags |= SYNC_RESET;
+                       if (host->host_busy) {
+                               for (SDpnt = host->host_queue; SDpnt; SDpnt = SDpnt->next) {
+                                       SCpnt1 = SDpnt->device_queue;
+                                       while (SCpnt1) {
+                                               if (SCpnt1->request.rq_status != RQ_INACTIVE) {
+#if 0
+                                                       if (!(SCpnt1->flags & IS_RESETTING) &&
+                                                           !(SCpnt1->internal_timeout & IN_ABORT))
+                                                               scsi_abort(SCpnt1, DID_RESET);
+#endif
+                                                       SCpnt1->flags |= (WAS_RESET | IS_RESETTING);
+                                               }
+                                               SCpnt1 = SCpnt1->next;
+                                       }
+                               }
+
+                               host->last_reset = jiffies;
+                               host->resetting = 1;
+                               /*
+                                * I suppose that the host reset callback will not play
+                                * with the resetting field. We have just set the resetting
+                                * flag here. -arca
+                                */
+                               temp = host->hostt->reset(SCpnt, reset_flags);
+                               /*
+                                  This test allows the driver to introduce an additional bus
+                                  settle time delay by setting last_reset up to 20 seconds in
+                                  the future.  In the normal case where the driver does not
+                                  modify last_reset, it must be assumed that the actual bus
+                                  reset occurred immediately prior to the return to this code,
+                                  and so last_reset must be updated to the current time, so
+                                  that the delay in internal_cmnd will guarantee at least a
+                                  MIN_RESET_DELAY bus settle time.
+                                */
+                               if (host->last_reset - jiffies > 20UL * HZ)
+                                       host->last_reset = jiffies;
+                       } else {
+                               host->host_busy++;
+                               host->last_reset = jiffies;
+                               host->resetting = 1;
+                               SCpnt->flags |= (WAS_RESET | IS_RESETTING);
+                               /*
+                                * I suppose that the host reset callback will not play
+                                * with the resetting field. We have just set the resetting
+                                * flag here. -arca
+                                */
+                               temp = host->hostt->reset(SCpnt, reset_flags);
+                               if (time_before(host->last_reset, jiffies) ||
+                                   (time_after(host->last_reset, jiffies + 20 * HZ)))
+                                       host->last_reset = jiffies;
+                               host->host_busy--;
+                       }
+                       if (reset_flags & SCSI_RESET_SYNCHRONOUS)
+                               SCpnt->flags &= ~SYNC_RESET;
+
+#ifdef DEBUG
+                       printk("scsi reset function returned %d\n", temp);
+#endif
+
+                       /*
+                        * Now figure out what we need to do, based upon
+                        * what the low level driver said that it did.
+                        * If the result is SCSI_RESET_SUCCESS, SCSI_RESET_PENDING,
+                        * or SCSI_RESET_WAKEUP, then the low level driver did a
+                        * bus device reset or bus reset, so we should go through
+                        * and mark one or all of the devices on that bus
+                        * as having been reset.
+                        */
+                       switch (temp & SCSI_RESET_ACTION) {
+                       case SCSI_RESET_SUCCESS:
+                               if (temp & SCSI_RESET_HOST_RESET)
+                                       scsi_mark_host_reset(host);
+                               else if (temp & SCSI_RESET_BUS_RESET)
+                                       scsi_mark_bus_reset(host, SCpnt->channel);
+                               else
+                                       scsi_mark_device_reset(SCpnt->device);
+                               SCpnt->internal_timeout &= ~(IN_RESET | IN_RESET2 | IN_RESET3);
+                               return 0;
+                       case SCSI_RESET_PENDING:
+                               if (temp & SCSI_RESET_HOST_RESET)
+                                       scsi_mark_host_reset(host);
+                               else if (temp & SCSI_RESET_BUS_RESET)
+                                       scsi_mark_bus_reset(host, SCpnt->channel);
+                               else
+                                       scsi_mark_device_reset(SCpnt->device);
+                       case SCSI_RESET_NOT_RUNNING:
+                               return 0;
+                       case SCSI_RESET_PUNT:
+                               SCpnt->internal_timeout &= ~(IN_RESET | IN_RESET2 | IN_RESET3);
+                               scsi_request_sense(SCpnt);
+                               return 0;
+                       case SCSI_RESET_WAKEUP:
+                               if (temp & SCSI_RESET_HOST_RESET)
+                                       scsi_mark_host_reset(host);
+                               else if (temp & SCSI_RESET_BUS_RESET)
+                                       scsi_mark_bus_reset(host, SCpnt->channel);
+                               else
+                                       scsi_mark_device_reset(SCpnt->device);
+                               SCpnt->internal_timeout &= ~(IN_RESET | IN_RESET2 | IN_RESET3);
+                               scsi_request_sense(SCpnt);
+                               /*
+                                * If a bus reset was performed, we
+                                * need to wake up each and every command
+                                * that was active on the bus or if it was a HBA
+                                * reset all active commands on all channels
+                                */
+                               if (temp & SCSI_RESET_HOST_RESET) {
+                                       for (SDpnt = host->host_queue; SDpnt; SDpnt = SDpnt->next) {
+                                               SCpnt1 = SDpnt->device_queue;
+                                               while (SCpnt1) {
+                                                       if (SCpnt1->request.rq_status != RQ_INACTIVE
+                                                           && SCpnt1 != SCpnt)
+                                                               scsi_request_sense(SCpnt1);
+                                                       SCpnt1 = SCpnt1->next;
+                                               }
+                                       }
+                               } else if (temp & SCSI_RESET_BUS_RESET) {
+                                       for (SDpnt = host->host_queue; SDpnt; SDpnt = SDpnt->next) {
+                                               SCpnt1 = SDpnt->device_queue;
+                                               while (SCpnt1) {
+                                                       if (SCpnt1->request.rq_status != RQ_INACTIVE
+                                                       && SCpnt1 != SCpnt
+                                                           && SCpnt1->channel == SCpnt->channel)
+                                                               scsi_request_sense(SCpnt);
+                                                       SCpnt1 = SCpnt1->next;
+                                               }
+                                       }
+                               }
+                               return 0;
+                       case SCSI_RESET_SNOOZE:
+                               /* In this case, we set the timeout field to 0
+                                * so that this command does not time out any more,
+                                * and we return 1 so that we get a message on the
+                                * screen.
+                                */
+                               SCpnt->internal_timeout &= ~(IN_RESET | IN_RESET2 | IN_RESET3);
+                               update_timeout(SCpnt, 0);
+                               /* If you snooze, you lose... */
+                       case SCSI_RESET_ERROR:
+                       default:
+                               return 1;
+                       }
+
+                       return temp;
+               }
+       }
+}
+
+/*
+ * The strategy is to cause the timer code to call scsi_times_out()
+ * when the soonest timeout is pending.
+ * The arguments are used when we are queueing a new command, because
+ * we do not want to subtract the time used from this time, but when we
+ * set the timer, we want to take this value into account.
+ */
+
+int update_timeout(Scsi_Cmnd * SCset, int timeout)
+{
+       int rtn;
+
+       /*
+        * We are using the new error handling code to actually register/deregister
+        * timers for timeout.
+        */
+
+       if (!timer_pending(&SCset->eh_timeout)) {
+               rtn = 0;
+       } else {
+               rtn = SCset->eh_timeout.expires - jiffies;
+       }
+
+       if (timeout == 0) {
+               scsi_delete_timer(SCset);
+       } else {
+               scsi_add_timer(SCset, timeout, scsi_old_times_out);
+       }
+
+       return rtn;
+}
+
+
+/*
+ * This function exports SCSI Bus, Device or Host reset capability
+ * and is for use with the SCSI generic driver.
+ */
+int
+scsi_old_reset(Scsi_Cmnd *SCpnt, unsigned int flag)
+{
+       unsigned int old_flags = SCSI_RESET_SYNCHRONOUS;
+
+       switch(flag) {
+       case SCSI_TRY_RESET_DEVICE:
+               /* no suggestion flags to add, device reset is default */
+               break;
+       case SCSI_TRY_RESET_BUS:
+               old_flags |= SCSI_RESET_SUGGEST_BUS_RESET;
+               break;
+       case SCSI_TRY_RESET_HOST:
+               old_flags |= SCSI_RESET_SUGGEST_HOST_RESET;
+               break;
+       default:
+               return FAILED;
+       }
+
+       if (scsi_reset(SCpnt, old_flags))
+               return FAILED;
+       return SUCCESS;
+}
+
+/*
+ * Overrides for Emacs so that we follow Linus's tabbing style.
+ * Emacs will notice this stuff at the end of the file and automatically
+ * adjust the settings for this buffer only.  This must remain at the end
+ * of the file.
+ * ---------------------------------------------------------------------------
+ * Local variables:
+ * c-indent-level: 4
+ * c-brace-imaginary-offset: 0
+ * c-brace-offset: -4
+ * c-argdecl-indent: 4
+ * c-label-offset: -4
+ * c-continued-statement-offset: 4
+ * c-continued-brace-offset: 0
+ * indent-tabs-mode: nil
+ * tab-width: 8
+ * End:
+ */
diff --git a/xen-2.4.16/drivers/scsi/scsi_obsolete.h b/xen-2.4.16/drivers/scsi/scsi_obsolete.h
new file mode 100644 (file)
index 0000000..abeacb9
--- /dev/null
@@ -0,0 +1,106 @@
+/*
+ *  scsi_obsolete.h Copyright (C) 1997 Eric Youngdale
+ *
+ */
+
+#ifndef _SCSI_OBSOLETE_H
+#define _SCSI_OBSOLETE_H
+
+/*
+ * These are the return codes for the abort and reset functions.  The mid-level
+ * code uses these to decide what to do next.  Each of the low level abort
+ * and reset functions must correctly indicate what it has done.
+ * The descriptions are written from the point of view of the mid-level code,
+ * so that the return code is telling the mid-level drivers exactly what
+ * the low level driver has already done, and what remains to be done.
+ */
+
+/* We did not do anything.  
+ * Wait some more for this command to complete, and if this does not work, 
+ * try something more serious. */
+#define SCSI_ABORT_SNOOZE 0
+
+/* This means that we were able to abort the command.  We have already
+ * called the mid-level done function, and do not expect an interrupt that 
+ * will lead to another call to the mid-level done function for this command */
+#define SCSI_ABORT_SUCCESS 1
+
+/* We called for an abort of this command, and we should get an interrupt 
+ * when this succeeds.  Thus we should not restore the timer for this
+ * command in the mid-level abort function. */
+#define SCSI_ABORT_PENDING 2
+
+/* Unable to abort - command is currently on the bus.  Grin and bear it. */
+#define SCSI_ABORT_BUSY 3
+
+/* The command is not active in the low level code. Command probably
+ * finished. */
+#define SCSI_ABORT_NOT_RUNNING 4
+
+/* Something went wrong.  The low level driver will indicate the correct
+ * error condition when it calls scsi_done, so the mid-level abort function
+ * can simply wait until this comes through */
+#define SCSI_ABORT_ERROR 5
+
+/* We do not know how to reset the bus, or we do not want to.  Bummer.
+ * Anyway, just wait a little more for the command in question, and hope that
+ * it eventually finishes.  If it never finishes, the SCSI device could
+ * hang, so use this with caution. */
+#define SCSI_RESET_SNOOZE 0
+
+/* We do not know how to reset the bus, or we do not want to.  Bummer.
+ * We have given up on this ever completing.  The mid-level code will
+ * request sense information to decide how to proceed from here. */
+#define SCSI_RESET_PUNT 1
+
+/* This means that we were able to reset the bus.  We have restarted all of
+ * the commands that should be restarted, and we should be able to continue
+ * on normally from here.  We do not expect any interrupts that will return
+ * DID_RESET to any of the other commands in the host_queue, and the mid-level
+ * code does not need to do anything special to keep the commands alive. 
+ * If a hard reset was performed then all outstanding commands on the
+ * bus have been restarted. */
+#define SCSI_RESET_SUCCESS 2
+
+/* We called for a reset of this bus, and we should get an interrupt 
+ * when this succeeds.  Each command should get its own status
+ * passed up to scsi_done, but this has not happened yet. 
+ * If a hard reset was performed, then we expect an interrupt
+ * for *each* of the outstanding commands that will have the
+ * effect of restarting the commands.
+ */
+#define SCSI_RESET_PENDING 3
+
+/* We did a reset, but do not expect an interrupt to signal DID_RESET.
+ * This tells the upper level code to request the sense info, and this
+ * should keep the command alive. */
+#define SCSI_RESET_WAKEUP 4
+
+/* The command is not active in the low level code. Command probably
+   finished. */
+#define SCSI_RESET_NOT_RUNNING 5
+
+/* Something went wrong, and we do not know how to fix it. */
+#define SCSI_RESET_ERROR 6
+
+#define SCSI_RESET_SYNCHRONOUS         0x01
+#define SCSI_RESET_ASYNCHRONOUS                0x02
+#define SCSI_RESET_SUGGEST_BUS_RESET   0x04
+#define SCSI_RESET_SUGGEST_HOST_RESET  0x08
+/*
+ * This is a bitmask that is ored with one of the above codes.
+ * It tells the mid-level code that we did a hard reset.
+ */
+#define SCSI_RESET_BUS_RESET 0x100
+/*
+ * This is a bitmask that is ored with one of the above codes.
+ * It tells the mid-level code that we did a host adapter reset.
+ */
+#define SCSI_RESET_HOST_RESET 0x200
+/*
+ * Used to mask off bits and to obtain the basic action that was
+ * performed.  
+ */
+#define SCSI_RESET_ACTION   0xff
+
+#endif                         /* SCSI_OBSOLETE_H */
diff --git a/xen-2.4.16/drivers/scsi/scsi_proc.c b/xen-2.4.16/drivers/scsi/scsi_proc.c
new file mode 100644 (file)
index 0000000..01d6679
--- /dev/null
@@ -0,0 +1,329 @@
+/*
+ * linux/drivers/scsi/scsi_proc.c
+ *
+ * The functions in this file provide an interface between
+ * the PROC file system and the SCSI device drivers
+ * It is mainly used for debugging, statistics and to pass 
+ * information directly to the lowlevel driver.
+ *
+ * (c) 1995 Michael Neuffer neuffer@goofy.zdv.uni-mainz.de 
+ * Version: 0.99.8   last change: 95/09/13
+ * 
+ * generic command parser provided by: 
+ * Andreas Heilwagen <crashcar@informatik.uni-koblenz.de>
+ *
+ * generic_proc_info() support of xxxx_info() by:
+ * Michael A. Griffith <grif@acm.org>
+ */
+
+#include <linux/config.h>      /* for CONFIG_PROC_FS */
+#define __NO_VERSION__
+#include <linux/module.h>
+
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/proc_fs.h>
+#include <linux/errno.h>
+#include <linux/stat.h>
+#include <linux/blk.h>
+
+#include <asm/uaccess.h>
+
+#include "scsi.h"
+#include "hosts.h"
+
+#ifndef TRUE
+#define TRUE  1
+#define FALSE 0
+#endif
+
+#ifdef CONFIG_PROC_FS
+
+/* generic_proc_info
+ * Used if the driver currently has no own support for /proc/scsi
+ */
+int generic_proc_info(char *buffer, char **start, off_t offset, int length, 
+                     const char *(*info) (struct Scsi_Host *),
+                     struct Scsi_Host *sh)
+{
+       int len, pos, begin;
+
+       begin = 0;
+       if (info && sh) {
+               pos = len = sprintf(buffer, "%s\n", info(sh));
+       } else {
+               pos = len = sprintf(buffer,
+                       "The driver does not yet support the proc-fs\n");
+       }
+       if (pos < offset) {
+               len = 0;
+               begin = pos;
+       }
+       *start = buffer + (offset - begin);     /* Start of wanted data */
+       len -= (offset - begin);
+       if (len > length)
+               len = length;
+
+       return (len);
+}
+
+/* dispatch_scsi_info is the central dispatcher 
+ * It is the interface between the proc-fs and the SCSI subsystem code
+ */
+static int proc_scsi_read(char *buffer, char **start, off_t offset,
+       int length, int *eof, void *data)
+{
+       struct Scsi_Host *hpnt = data;
+       int n;
+
+       if (hpnt->hostt->proc_info == NULL)
+               n = generic_proc_info(buffer, start, offset, length,
+                                     hpnt->hostt->info, hpnt);
+       else
+               n = (hpnt->hostt->proc_info(buffer, start, offset,
+                                          length, hpnt->host_no, 0));
+       *eof = (n<length);
+       return n;
+}
+
+#define PROC_BLOCK_SIZE (3*1024)     /* 4K page size, but our output routines 
+                                     * use some slack for overruns 
+                                     */
+
+static int proc_scsi_write(struct file * file, const char * buf,
+                           unsigned long count, void *data)
+{
+       struct Scsi_Host *hpnt = data;
+       ssize_t ret = 0;
+       char * page;
+       char *start;
+    
+       if (hpnt->hostt->proc_info == NULL)
+               ret = -ENOSYS;
+
+       if (count > PROC_BLOCK_SIZE)
+               return -EOVERFLOW;
+
+       if (!(page = (char *) __get_free_page(GFP_KERNEL)))
+               return -ENOMEM;
+       if(copy_from_user(page, buf, count))
+       {
+               free_page((ulong) page);
+               return -EFAULT;
+       }
+
+       ret = hpnt->hostt->proc_info(page, &start, 0, count,
+                                    hpnt->host_no, 1);
+
+       free_page((ulong) page);
+       return(ret);
+}
+
+void build_proc_dir_entries(Scsi_Host_Template * tpnt)
+{
+       struct Scsi_Host *hpnt;
+       char name[10];  /* see scsi_unregister_host() */
+
+       tpnt->proc_dir = proc_mkdir(tpnt->proc_name, proc_scsi);
+        if (!tpnt->proc_dir) {
+                printk(KERN_ERR "Unable to proc_mkdir in scsi.c/build_proc_dir_entries");
+                return;
+        }
+       tpnt->proc_dir->owner = tpnt->module;
+
+       hpnt = scsi_hostlist;
+       while (hpnt) {
+               if (tpnt == hpnt->hostt) {
+                       struct proc_dir_entry *p;
+                       sprintf(name,"%d",hpnt->host_no);
+                       p = create_proc_read_entry(name,
+                                       S_IFREG | S_IRUGO | S_IWUSR,
+                                       tpnt->proc_dir,
+                                       proc_scsi_read,
+                                       (void *)hpnt);
+                       if (!p)
+                               panic("Not enough memory to register SCSI HBA in /proc/scsi !\n");
+                       p->write_proc=proc_scsi_write;
+                       p->owner = tpnt->module;
+               }
+               hpnt = hpnt->next;
+       }
+}
+
+/*
+ *  parseHandle *parseInit(char *buf, char *cmdList, int cmdNum); 
+ *              gets a pointer to a null terminated data buffer
+ *              and a list of commands with blanks as delimiter 
+ *      in between. 
+ *      The commands have to be alphanumerically sorted. 
+ *      cmdNum has to contain the number of commands.
+ *              On success, a pointer to a handle structure
+ *              is returned, NULL on failure
+ *
+ *      int parseOpt(parseHandle *handle, char **param);
+ *              processes the next parameter. On success, the
+ *              index of the appropriate command in the cmdList
+ *              is returned, starting with zero.
+ *              param points to the null terminated parameter string.
+ *              On failure, -1 is returned.
+ *
+ *      The databuffer buf may only contain pairs of commands
+ *          options, separated by blanks:
+ *              <Command> <Parameter> [<Command> <Parameter>]*
+ */
+
+typedef struct {
+       char *buf,              /* command buffer  */
+       *cmdList,               /* command list    */
+       *bufPos,                /* actual position */
+       **cmdPos,               /* cmdList index   */
+        cmdNum;                /* cmd number      */
+} parseHandle;
+
+inline int parseFree(parseHandle * handle)
+{                              /* free memory     */
+       kfree(handle->cmdPos);
+       kfree(handle);
+
+       return -1;
+}
+
+parseHandle *parseInit(char *buf, char *cmdList, int cmdNum)
+{
+       char *ptr;              /* temp pointer    */
+       parseHandle *handle;    /* new handle      */
+
+       if (!buf || !cmdList)   /* bad input ?     */
+               return NULL;
+       handle = (parseHandle *) kmalloc(sizeof(parseHandle), GFP_KERNEL);
+       if (!handle)
+               return NULL;    /* out of memory   */
+       handle->cmdPos = (char **) kmalloc(sizeof(int) * cmdNum, GFP_KERNEL);
+       if (!handle->cmdPos) {
+               kfree(handle);
+               return NULL;    /* out of memory   */
+       }
+       handle->buf = handle->bufPos = buf;     /* init handle     */
+       handle->cmdList = cmdList;
+       handle->cmdNum = cmdNum;
+
+       handle->cmdPos[cmdNum = 0] = cmdList;
+       for (ptr = cmdList; *ptr; ptr++) {      /* scan command string */
+               if (*ptr == ' ') {      /* and insert zeroes   */
+                       *ptr++ = 0;
+                       handle->cmdPos[++cmdNum] = ptr++;
+               }
+       }
+       return handle;
+}
+
+int parseOpt(parseHandle * handle, char **param)
+{
+       int cmdIndex = 0, cmdLen = 0;
+       char *startPos;
+
+       if (!handle)            /* invalid handle  */
+               return (parseFree(handle));
+       /* skip spaces     */
+       for (; *(handle->bufPos) && *(handle->bufPos) == ' '; handle->bufPos++);
+       if (!*(handle->bufPos))
+               return (parseFree(handle));     /* end of data     */
+
+       startPos = handle->bufPos;      /* store cmd start */
+       for (; handle->cmdPos[cmdIndex][cmdLen] && *(handle->bufPos); handle->bufPos++) {       /* no string end?  */
+               for (;;) {
+                       if (*(handle->bufPos) == handle->cmdPos[cmdIndex][cmdLen])
+                               break;  /* char matches ?  */
+                       else if (memcmp(startPos, (char *) (handle->cmdPos[++cmdIndex]), cmdLen))
+                               return (parseFree(handle));     /* unknown command */
+
+                       if (cmdIndex >= handle->cmdNum)
+                               return (parseFree(handle));     /* unknown command */
+               }
+
+               cmdLen++;       /* next char       */
+       }
+
+       /* Get param. First skip all blanks, then insert zero after param  */
+
+       for (; *(handle->bufPos) && *(handle->bufPos) == ' '; handle->bufPos++);
+       *param = handle->bufPos;
+
+       for (; *(handle->bufPos) && *(handle->bufPos) != ' '; handle->bufPos++);
+       *(handle->bufPos++) = 0;
+
+       return (cmdIndex);
+}
+
+void proc_print_scsidevice(Scsi_Device * scd, char *buffer, int *size, int len)
+{
+
+       int x, y = *size;
+       extern const char *const scsi_device_types[MAX_SCSI_DEVICE_CODE];
+
+       y = sprintf(buffer + len,
+            "Host: scsi%d Channel: %02d Id: %02d Lun: %02d\n  Vendor: ",
+                   scd->host->host_no, scd->channel, scd->id, scd->lun);
+       for (x = 0; x < 8; x++) {
+               if (scd->vendor[x] >= 0x20)
+                       y += sprintf(buffer + len + y, "%c", scd->vendor[x]);
+               else
+                       y += sprintf(buffer + len + y, " ");
+       }
+       y += sprintf(buffer + len + y, " Model: ");
+       for (x = 0; x < 16; x++) {
+               if (scd->model[x] >= 0x20)
+                       y += sprintf(buffer + len + y, "%c", scd->model[x]);
+               else
+                       y += sprintf(buffer + len + y, " ");
+       }
+       y += sprintf(buffer + len + y, " Rev: ");
+       for (x = 0; x < 4; x++) {
+               if (scd->rev[x] >= 0x20)
+                       y += sprintf(buffer + len + y, "%c", scd->rev[x]);
+               else
+                       y += sprintf(buffer + len + y, " ");
+       }
+       y += sprintf(buffer + len + y, "\n");
+
+       y += sprintf(buffer + len + y, "  Type:   %s ",
+                    scd->type < MAX_SCSI_DEVICE_CODE ?
+              scsi_device_types[(int) scd->type] : "Unknown          ");
+       y += sprintf(buffer + len + y, "               ANSI"
+                    " SCSI revision: %02x", (scd->scsi_level - 1) ? scd->scsi_level - 1 : 1);
+       if (scd->scsi_level == 2)
+               y += sprintf(buffer + len + y, " CCS\n");
+       else
+               y += sprintf(buffer + len + y, "\n");
+
+       *size = y;
+       return;
+}
+
+#else                          /* if !CONFIG_PROC_FS */
+
+void proc_print_scsidevice(Scsi_Device * scd, char *buffer, int *size, int len)
+{
+}
+
+#endif                         /* CONFIG_PROC_FS */
+
+/*
+ * Overrides for Emacs so that we get a uniform tabbing style.
+ * Emacs will notice this stuff at the end of the file and automatically
+ * adjust the settings for this buffer only.  This must remain at the end
+ * of the file.
+ * ---------------------------------------------------------------------------
+ * Local variables:
+ * c-indent-level: 4
+ * c-brace-imaginary-offset: 0
+ * c-brace-offset: -4
+ * c-argdecl-indent: 4
+ * c-label-offset: -4
+ * c-continued-statement-offset: 4
+ * c-continued-brace-offset: 0
+ * indent-tabs-mode: nil
+ * tab-width: 8
+ * End:
+ */
diff --git a/xen-2.4.16/drivers/scsi/scsi_queue.c b/xen-2.4.16/drivers/scsi/scsi_queue.c
new file mode 100644 (file)
index 0000000..c41f1ce
--- /dev/null
@@ -0,0 +1,151 @@
+/*
+ *  scsi_queue.c Copyright (C) 1997 Eric Youngdale
+ *
+ *  generic mid-level SCSI queueing.
+ *
+ *  The point of this is that we need to track when hosts are unable to
+ *  accept a command because they are busy.  In addition, we track devices
+ *  that cannot accept a command because of a QUEUE_FULL condition.  In both
+ *  of these cases, we enter the command in the queue.  At some later point,
+ *  we attempt to remove commands from the queue and retry them.
+ */
+
+#define __NO_VERSION__
+#include <linux/module.h>
+
+#include <linux/sched.h>
+#include <linux/timer.h>
+#include <linux/string.h>
+#include <linux/slab.h>
+#include <linux/ioport.h>
+#include <linux/kernel.h>
+#include <linux/stat.h>
+#include <linux/blk.h>
+#include <linux/interrupt.h>
+#include <linux/delay.h>
+#include <linux/smp_lock.h>
+
+#define __KERNEL_SYSCALLS__
+
+#include <linux/unistd.h>
+
+#include <asm/system.h>
+#include <asm/irq.h>
+#include <asm/dma.h>
+
+#include "scsi.h"
+#include "hosts.h"
+#include "constants.h"
+
+/*
+ * TODO:
+ *      1) Prevent multiple traversals of list to look for commands to
+ *         queue.
+ *      2) Protect against multiple insertions of list at the same time.
+ * DONE:
+ *      1) Set state of scsi command to a new state value for ml queue.
+ *      2) Insert into queue when host rejects command.
+ *      3) Make sure status code is properly passed from low-level queue func
+ *         so that internal_cmnd properly returns the right value.
+ *      4) Insert into queue when QUEUE_FULL.
+ *      5) Cull queue in bottom half handler.
+ *      6) Check usage count prior to queue insertion.  Requeue if usage
+ *         count is 0.
+ *      7) Don't send down any more commands if the host/device is busy.
+ */
+
+static const char RCSid[] = "$Header: /mnt/ide/home/eric/CVSROOT/linux/drivers/scsi/scsi_queue.c,v 1.1 1997/10/21 11:16:38 eric Exp $";
+
+
+/*
+ * Function:    scsi_mlqueue_insert()
+ *
+ * Purpose:     Insert a command in the midlevel queue.
+ *
+ * Arguments:   cmd    - command that we are adding to queue.
+ *              reason - why we are inserting command to queue.
+ *
+ * Lock status: Assumed that lock is not held upon entry.
+ *
+ * Returns:     Nothing.
+ *
+ * Notes:       We do this for one of two cases.  Either the host is busy
+ *              and it cannot accept any more commands for the time being,
+ *              or the device returned QUEUE_FULL and can accept no more
+ *              commands.
+ * Notes:       This could be called either from an interrupt context or a
+ *              normal process context.
+ */
+int scsi_mlqueue_insert(Scsi_Cmnd * cmd, int reason)
+{
+       struct Scsi_Host *host;
+       unsigned long flags;
+
+       SCSI_LOG_MLQUEUE(1, printk("Inserting command %p into mlqueue\n", cmd));
+
+       /*
+        * We are inserting the command into the ml queue.  First, we
+        * cancel the timer, so it doesn't time out.
+        */
+       scsi_delete_timer(cmd);
+
+       host = cmd->host;
+
+       /*
+        * Next, set the appropriate busy bit for the device/host.
+        */
+       if (reason == SCSI_MLQUEUE_HOST_BUSY) {
+               /*
+                * Protect against race conditions.  If the host isn't busy,
+                * assume that something actually completed, and that we should
+                * be able to queue a command now.  Note that there is an implicit
+                * assumption that every host can always queue at least one command.
+                * If a host is inactive and cannot queue any commands, I don't see
+                * how things could possibly work anyways.
+                */
+               if (host->host_busy == 0) {
+                       if (scsi_retry_command(cmd) == 0) {
+                               return 0;
+                       }
+               }
+               host->host_blocked = TRUE;
+       } else {
+               /*
+                * Protect against race conditions.  If the device isn't busy,
+                * assume that something actually completed, and that we should
+                * be able to queue a command now.  Note that there is an implicit
+                * assumption that every host can always queue at least one command.
+                * If a host is inactive and cannot queue any commands, I don't see
+                * how things could possibly work anyways.
+                */
+               if (cmd->device->device_busy == 0) {
+                       if (scsi_retry_command(cmd) == 0) {
+                               return 0;
+                       }
+               }
+               cmd->device->device_blocked = TRUE;
+       }
+
+       /*
+        * Register the fact that we own the thing for now.
+        */
+       cmd->state = SCSI_STATE_MLQUEUE;
+       cmd->owner = SCSI_OWNER_MIDLEVEL;
+       cmd->bh_next = NULL;
+
+       /*
+        * Decrement the counters, since these commands are no longer
+        * active on the host/device.
+        */
+       spin_lock_irqsave(&io_request_lock, flags);
+       cmd->host->host_busy--;
+       cmd->device->device_busy--;
+       spin_unlock_irqrestore(&io_request_lock, flags);
+
+       /*
+        * Insert this command at the head of the queue for it's device.
+        * It will go before all other commands that are already in the queue.
+        */
+       scsi_insert_special_cmd(cmd, 1);
+       return 0;
+}
diff --git a/xen-2.4.16/drivers/scsi/scsi_scan.c b/xen-2.4.16/drivers/scsi/scsi_scan.c
new file mode 100644 (file)
index 0000000..f6dbf40
--- /dev/null
@@ -0,0 +1,894 @@
+/*
+ *  scsi_scan.c Copyright (C) 2000 Eric Youngdale
+ *
+ *  Bus scan logic.
+ *
+ *  This used to live in scsi.c, but that file was just a laundry basket
+ *  full of misc stuff.  This got separated out in order to make things
+ *  clearer.
+ */
+
+#define __NO_VERSION__
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/init.h>
+
+#include <linux/blk.h>
+
+#include "scsi.h"
+#include "hosts.h"
+#include "constants.h"
+
+#ifdef CONFIG_KMOD
+#include <linux/kmod.h>
+#endif
+
+/* 
+ * Flags for irregular SCSI devices that need special treatment 
+ */
+#define BLIST_NOLUN            0x001   /* Don't scan for LUNs */
+#define BLIST_FORCELUN         0x002   /* Known to have LUNs, force sanning */
+#define BLIST_BORKEN           0x004   /* Flag for broken handshaking */
+#define BLIST_KEY              0x008   /* Needs to be unlocked by special command */
+#define BLIST_SINGLELUN        0x010   /* LUNs should better not be used in parallel */
+#define BLIST_NOTQ             0x020   /* Buggy Tagged Command Queuing */
+#define BLIST_SPARSELUN        0x040   /* Non consecutive LUN numbering */
+#define BLIST_MAX5LUN          0x080   /* Avoid LUNS >= 5 */
+#define BLIST_ISDISK           0x100   /* Treat as (removable) disk */
+#define BLIST_ISROM            0x200   /* Treat as (removable) CD-ROM */
+#define BLIST_LARGELUN         0x400   /* LUNs larger than 7 despite reporting as SCSI 2 */
+
+static void print_inquiry(unsigned char *data);
+static int scan_scsis_single(unsigned int channel, unsigned int dev,
+               unsigned int lun, int lun0_scsi_level, 
+               unsigned int *max_scsi_dev, unsigned int *sparse_lun, 
+               Scsi_Device ** SDpnt, struct Scsi_Host *shpnt, 
+               char *scsi_result);
+static int find_lun0_scsi_level(unsigned int channel, unsigned int dev,
+                               struct Scsi_Host *shpnt);
+
+struct dev_info {
+       const char *vendor;
+       const char *model;
+       const char *revision;   /* Latest revision known to be bad.  Not used yet */
+       unsigned flags;
+};
+
+/*
+ * This is what was previously known as the blacklist.  The concept
+ * has been expanded so that we can specify other types of things we
+ * need to be aware of.
+ */
+static struct dev_info device_list[] =
+{
+/* The following devices are known not to tolerate a lun != 0 scan for
+ * one reason or another.  Some will respond to all luns, others will
+ * lock up.
+ */
+       {"Aashima", "IMAGERY 2400SP", "1.03", BLIST_NOLUN},     /* Locks up if polled for lun != 0 */
+       {"CHINON", "CD-ROM CDS-431", "H42", BLIST_NOLUN},       /* Locks up if polled for lun != 0 */
+       {"CHINON", "CD-ROM CDS-535", "Q14", BLIST_NOLUN},       /* Locks up if polled for lun != 0 */
+       {"DENON", "DRD-25X", "V", BLIST_NOLUN},                 /* Locks up if probed for lun != 0 */
+       {"HITACHI", "DK312C", "CM81", BLIST_NOLUN},             /* Responds to all lun - dtg */
+       {"HITACHI", "DK314C", "CR21", BLIST_NOLUN},             /* responds to all lun */
+       {"IMS", "CDD521/10", "2.06", BLIST_NOLUN},              /* Locks-up when LUN>0 polled. */
+       {"MAXTOR", "XT-3280", "PR02", BLIST_NOLUN},             /* Locks-up when LUN>0 polled. */
+       {"MAXTOR", "XT-4380S", "B3C", BLIST_NOLUN},             /* Locks-up when LUN>0 polled. */
+       {"MAXTOR", "MXT-1240S", "I1.2", BLIST_NOLUN},           /* Locks up when LUN>0 polled */
+       {"MAXTOR", "XT-4170S", "B5A", BLIST_NOLUN},             /* Locks-up sometimes when LUN>0 polled. */
+       {"MAXTOR", "XT-8760S", "B7B", BLIST_NOLUN},             /* guess what? */
+       {"MEDIAVIS", "RENO CD-ROMX2A", "2.03", BLIST_NOLUN},    /*Responds to all lun */
+       {"NEC", "CD-ROM DRIVE:841", "1.0", BLIST_NOLUN},        /* Locks-up when LUN>0 polled. */
+       {"PHILIPS", "PCA80SC", "V4-2", BLIST_NOLUN},            /* Responds to all lun */
+       {"RODIME", "RO3000S", "2.33", BLIST_NOLUN},             /* Locks up if polled for lun != 0 */
+       {"SANYO", "CRD-250S", "1.20", BLIST_NOLUN},             /* causes failed REQUEST SENSE on lun 1
+                                                                * for aha152x controller, which causes
+                                                                * SCSI code to reset bus.*/
+       {"SEAGATE", "ST157N", "\004|j", BLIST_NOLUN},           /* causes failed REQUEST SENSE on lun 1
+                                                                * for aha152x controller, which causes
+                                                                * SCSI code to reset bus.*/
+       {"SEAGATE", "ST296", "921", BLIST_NOLUN},               /* Responds to all lun */
+       {"SEAGATE", "ST1581", "6538", BLIST_NOLUN},             /* Responds to all lun */
+       {"SONY", "CD-ROM CDU-541", "4.3d", BLIST_NOLUN},        
+       {"SONY", "CD-ROM CDU-55S", "1.0i", BLIST_NOLUN},
+       {"SONY", "CD-ROM CDU-561", "1.7x", BLIST_NOLUN},
+       {"SONY", "CD-ROM CDU-8012", "*", BLIST_NOLUN},
+       {"TANDBERG", "TDC 3600", "U07", BLIST_NOLUN},           /* Locks up if polled for lun != 0 */
+       {"TEAC", "CD-R55S", "1.0H", BLIST_NOLUN},               /* Locks up if polled for lun != 0 */
+       {"TEAC", "CD-ROM", "1.06", BLIST_NOLUN},                /* causes failed REQUEST SENSE on lun 1
+                                                                * for seagate controller, which causes
+                                                                * SCSI code to reset bus.*/
+       {"TEAC", "MT-2ST/45S2-27", "RV M", BLIST_NOLUN},        /* Responds to all lun */
+       {"TEXEL", "CD-ROM", "1.06", BLIST_NOLUN},               /* causes failed REQUEST SENSE on lun 1
+                                                                * for seagate controller, which causes
+                                                                * SCSI code to reset bus.*/
+       {"QUANTUM", "LPS525S", "3110", BLIST_NOLUN},            /* Locks sometimes if polled for lun != 0 */
+       {"QUANTUM", "PD1225S", "3110", BLIST_NOLUN},            /* Locks sometimes if polled for lun != 0 */
+       {"QUANTUM", "FIREBALL ST4.3S", "0F0C", BLIST_NOLUN},    /* Locks up when polled for lun != 0 */
+       {"MEDIAVIS", "CDR-H93MV", "1.31", BLIST_NOLUN},         /* Locks up if polled for lun != 0 */
+       {"SANKYO", "CP525", "6.64", BLIST_NOLUN},               /* causes failed REQ SENSE, extra reset */
+       {"HP", "C1750A", "3226", BLIST_NOLUN},                  /* scanjet iic */
+       {"HP", "C1790A", "", BLIST_NOLUN},                      /* scanjet iip */
+       {"HP", "C2500A", "", BLIST_NOLUN},                      /* scanjet iicx */
+       {"HP", "A6188A", "*", BLIST_SPARSELUN},                 /* HP Va7100 Array */
+       {"HP", "A6189A", "*", BLIST_SPARSELUN},                 /* HP Va7400 Array */
+       {"HP", "A6189B", "*", BLIST_SPARSELUN},                 /* HP Va7410 Array */
+       {"HP", "OPEN-", "*", BLIST_SPARSELUN},                  /* HP XP Arrays */
+       {"YAMAHA", "CDR100", "1.00", BLIST_NOLUN},              /* Locks up if polled for lun != 0 */
+       {"YAMAHA", "CDR102", "1.00", BLIST_NOLUN},              /* Locks up if polled for lun != 0  
+                                                                * extra reset */
+       {"YAMAHA", "CRW8424S", "1.0", BLIST_NOLUN},             /* Locks up if polled for lun != 0 */
+       {"YAMAHA", "CRW6416S", "1.0c", BLIST_NOLUN},            /* Locks up if polled for lun != 0 */
+       {"MITSUMI", "CD-R CR-2201CS", "6119", BLIST_NOLUN},     /* Locks up if polled for lun != 0 */
+       {"RELISYS", "Scorpio", "*", BLIST_NOLUN},               /* responds to all LUN */
+       {"RELISYS", "VM3530+", "*", BLIST_NOLUN},               /* responds to all LUN */
+       {"ACROSS", "", "*", BLIST_NOLUN},                       /* responds to all LUN */
+       {"MICROTEK", "ScanMaker II", "5.61", BLIST_NOLUN},      /* responds to all LUN */
+
+/*
+ * Other types of devices that have special flags.
+ */
+       {"SONY", "CD-ROM CDU-8001", "*", BLIST_BORKEN},
+       {"TEXEL", "CD-ROM", "1.06", BLIST_BORKEN},
+       {"IOMEGA", "Io20S         *F", "*", BLIST_KEY},
+       {"INSITE", "Floptical   F*8I", "*", BLIST_KEY},
+       {"INSITE", "I325VM", "*", BLIST_KEY},
+       {"LASOUND","CDX7405","3.10", BLIST_MAX5LUN | BLIST_SINGLELUN},
+       {"MICROP", "4110", "*", BLIST_NOTQ},                    /* Buggy Tagged Queuing */
+       {"NRC", "MBR-7", "*", BLIST_FORCELUN | BLIST_SINGLELUN},
+       {"NRC", "MBR-7.4", "*", BLIST_FORCELUN | BLIST_SINGLELUN},
+       {"REGAL", "CDC-4X", "*", BLIST_MAX5LUN | BLIST_SINGLELUN},
+       {"NAKAMICH", "MJ-4.8S", "*", BLIST_FORCELUN | BLIST_SINGLELUN},
+       {"NAKAMICH", "MJ-5.16S", "*", BLIST_FORCELUN | BLIST_SINGLELUN},
+       {"PIONEER", "CD-ROM DRM-600", "*", BLIST_FORCELUN | BLIST_SINGLELUN},
+       {"PIONEER", "CD-ROM DRM-602X", "*", BLIST_FORCELUN | BLIST_SINGLELUN},
+       {"PIONEER", "CD-ROM DRM-604X", "*", BLIST_FORCELUN | BLIST_SINGLELUN},
+       {"EMULEX", "MD21/S2     ESDI", "*", BLIST_SINGLELUN},
+       {"CANON", "IPUBJD", "*", BLIST_SPARSELUN},
+       {"nCipher", "Fastness Crypto", "*", BLIST_FORCELUN},
+       {"DEC","HSG80","*", BLIST_FORCELUN},
+       {"COMPAQ","LOGICAL VOLUME","*", BLIST_FORCELUN},
+       {"COMPAQ","CR3500","*", BLIST_FORCELUN},
+       {"NEC", "PD-1 ODX654P", "*", BLIST_FORCELUN | BLIST_SINGLELUN},
+       {"MATSHITA", "PD-1", "*", BLIST_FORCELUN | BLIST_SINGLELUN},
+       {"iomega", "jaz 1GB", "J.86", BLIST_NOTQ | BLIST_NOLUN},
+       {"TOSHIBA","CDROM","*", BLIST_ISROM},
+       {"TOSHIBA","CD-ROM","*", BLIST_ISROM},
+       {"MegaRAID", "LD", "*", BLIST_FORCELUN},
+       {"DGC",  "RAID",      "*", BLIST_SPARSELUN | BLIST_LARGELUN}, // Dell PV 650F (tgt @ LUN 0)
+       {"DGC",  "DISK",      "*", BLIST_SPARSELUN | BLIST_LARGELUN}, // Dell PV 650F (no tgt @ LUN 0) 
+       {"DELL", "PV660F",   "*", BLIST_SPARSELUN | BLIST_LARGELUN},
+       {"DELL", "PV660F   PSEUDO",   "*", BLIST_SPARSELUN | BLIST_LARGELUN},
+       {"DELL", "PSEUDO DEVICE .",   "*", BLIST_SPARSELUN | BLIST_LARGELUN}, // Dell PV 530F
+       {"DELL", "PV530F",    "*", BLIST_SPARSELUN | BLIST_LARGELUN}, // Dell PV 530F
+       {"EMC", "SYMMETRIX", "*", BLIST_SPARSELUN | BLIST_LARGELUN | BLIST_FORCELUN},
+       {"HP", "A6189A", "*", BLIST_SPARSELUN |  BLIST_LARGELUN}, // HP VA7400, by Alar Aun
+       {"CMD", "CRA-7280", "*", BLIST_SPARSELUN | BLIST_LARGELUN},   // CMD RAID Controller
+       {"CNSI", "G7324", "*", BLIST_SPARSELUN | BLIST_LARGELUN},     // Chaparral G7324 RAID
+       {"CNSi", "G8324", "*", BLIST_SPARSELUN},     // Chaparral G8324 RAID
+       {"Zzyzx", "RocketStor 500S", "*", BLIST_SPARSELUN},
+       {"Zzyzx", "RocketStor 2000", "*", BLIST_SPARSELUN},
+       {"SONY", "TSL",       "*", BLIST_FORCELUN},  // DDS3 & DDS4 autoloaders
+       {"DELL", "PERCRAID", "*", BLIST_FORCELUN},
+       {"HP", "NetRAID-4M", "*", BLIST_FORCELUN},
+       {"ADAPTEC", "AACRAID", "*", BLIST_FORCELUN},
+       {"ADAPTEC", "Adaptec 5400S", "*", BLIST_FORCELUN},
+       {"COMPAQ", "MSA1000", "*", BLIST_FORCELUN},
+       {"HP", "C1557A", "*", BLIST_FORCELUN},
+       {"IBM", "AuSaV1S2", "*", BLIST_FORCELUN},
+       {"FSC", "CentricStor", "*", BLIST_SPARSELUN | BLIST_LARGELUN},
+       {"DDN", "SAN DataDirector", "*", BLIST_SPARSELUN},
+       {"HITACHI", "DF400", "*", BLIST_SPARSELUN},
+       {"HITACHI", "DF500", "*", BLIST_SPARSELUN},
+       {"HITACHI", "DF600", "*", BLIST_SPARSELUN},
+
+       /*
+        * Must be at end of list...
+        */
+       {NULL, NULL, NULL}
+};
+
+#define MAX_SCSI_LUNS 0xFFFFFFFF
+
+#ifdef CONFIG_SCSI_MULTI_LUN
+static unsigned int max_scsi_luns = MAX_SCSI_LUNS;
+#else
+static unsigned int max_scsi_luns = 1;
+#endif
+
+#ifdef MODULE
+
+MODULE_PARM(max_scsi_luns, "i");
+MODULE_PARM_DESC(max_scsi_luns, "last scsi LUN (should be between 1 and 2^32-1)");
+
+#else
+
+static int __init scsi_luns_setup(char *str)
+{
+       unsigned int tmp;
+
+       if (get_option(&str, &tmp) == 1) {
+               max_scsi_luns = tmp;
+               return 1;
+       } else {
+               printk("scsi_luns_setup : usage max_scsi_luns=n "
+                      "(n should be between 1 and 2^32-1)\n");
+               return 0;
+       }
+}
+
+__setup("max_scsi_luns=", scsi_luns_setup);
+
+#endif
+
+static void print_inquiry(unsigned char *data)
+{
+       int i;
+
+       printk("  Vendor: ");
+       for (i = 8; i < 16; i++) {
+               if (data[i] >= 0x20 && i < data[4] + 5)
+                       printk("%c", data[i]);
+               else
+                       printk(" ");
+       }
+
+       printk("  Model: ");
+       for (i = 16; i < 32; i++) {
+               if (data[i] >= 0x20 && i < data[4] + 5)
+                       printk("%c", data[i]);
+               else
+                       printk(" ");
+       }
+
+       printk("  Rev: ");
+       for (i = 32; i < 36; i++) {
+               if (data[i] >= 0x20 && i < data[4] + 5)
+                       printk("%c", data[i]);
+               else
+                       printk(" ");
+       }
+
+       printk("\n");
+
+       i = data[0] & 0x1f;
+
+       printk("  Type:   %s ",
+              i < MAX_SCSI_DEVICE_CODE ? scsi_device_types[i] : "Unknown          ");
+       printk("                 ANSI SCSI revision: %02x", data[2] & 0x07);
+       if ((data[2] & 0x07) == 1 && (data[3] & 0x0f) == 1)
+               printk(" CCS\n");
+       else
+               printk("\n");
+}
+
+static int get_device_flags(unsigned char *response_data)
+{
+       int i = 0;
+       unsigned char *pnt;
+       for (i = 0; 1; i++) {
+               if (device_list[i].vendor == NULL)
+                       return 0;
+               pnt = &response_data[8];
+               while (*pnt && *pnt == ' ')
+                       pnt++;
+               if (memcmp(device_list[i].vendor, pnt,
+                          strlen(device_list[i].vendor)))
+                       continue;
+               pnt = &response_data[16];
+               while (*pnt && *pnt == ' ')
+                       pnt++;
+               if (memcmp(device_list[i].model, pnt,
+                          strlen(device_list[i].model)))
+                       continue;
+               return device_list[i].flags;
+       }
+       return 0;
+}
+
+/*
+ *  Detecting SCSI devices :
+ *  We scan all present host adapter's busses,  from ID 0 to ID (max_id).
+ *  We use the INQUIRY command, determine device type, and pass the ID /
+ *  lun address of all sequential devices to the tape driver, all random
+ *  devices to the disk driver.
+ */
+void scan_scsis(struct Scsi_Host *shpnt,
+                      uint hardcoded,
+                      uint hchannel,
+                      uint hid,
+                      uint hlun)
+{
+       uint channel;
+       unsigned int dev;
+       unsigned int lun;
+       unsigned int max_dev_lun;
+       unsigned char *scsi_result;
+       unsigned char scsi_result0[256];
+       Scsi_Device *SDpnt;
+       Scsi_Device *SDtail;
+       unsigned int sparse_lun;
+       int lun0_sl;
+
+       scsi_result = NULL;
+
+       SDpnt = (Scsi_Device *) kmalloc(sizeof(Scsi_Device),
+                                       GFP_ATOMIC);
+       if (SDpnt) {
+               memset(SDpnt, 0, sizeof(Scsi_Device));
+               /*
+                * Register the queue for the device.  All I/O requests will
+                * come in through here.  We also need to register a pointer to
+                * ourselves, since the queue handler won't know what device
+                * the queue actually represents.   We could look it up, but it
+                * is pointless work.
+                */
+               scsi_initialize_queue(SDpnt, shpnt);
+               SDpnt->request_queue.queuedata = (void *) SDpnt;
+               /* Make sure we have something that is valid for DMA purposes */
+               scsi_result = ((!shpnt->unchecked_isa_dma)
+                              ? &scsi_result0[0] : kmalloc(512, GFP_DMA));
+       }
+
+       if (scsi_result == NULL) {
+               printk("Unable to obtain scsi_result buffer\n");
+               goto leave;
+       }
+       /*
+        * We must chain ourself in the host_queue, so commands can time out 
+        */
+       SDpnt->queue_depth = 1;
+       SDpnt->host = shpnt;
+       SDpnt->online = TRUE;
+
+       initialize_merge_fn(SDpnt);
+
+        /*
+         * Initialize the object that we will use to wait for command blocks.
+         */
+       init_waitqueue_head(&SDpnt->scpnt_wait);
+
+       /*
+        * Next, hook the device to the host in question.
+        */
+       SDpnt->prev = NULL;
+       SDpnt->next = NULL;
+       if (shpnt->host_queue != NULL) {
+               SDtail = shpnt->host_queue;
+               while (SDtail->next != NULL)
+                       SDtail = SDtail->next;
+
+               SDtail->next = SDpnt;
+               SDpnt->prev = SDtail;
+       } else {
+               shpnt->host_queue = SDpnt;
+       }
+
+       /*
+        * We need to increment the counter for this one device so we can track
+        * when things are quiet.
+        */
+       if (hardcoded == 1) {
+               Scsi_Device *oldSDpnt = SDpnt;
+               struct Scsi_Device_Template *sdtpnt;
+               channel = hchannel;
+               if (channel > shpnt->max_channel)
+                       goto leave;
+               dev = hid;
+               if (dev >= shpnt->max_id)
+                       goto leave;
+               lun = hlun;
+               if (lun >= shpnt->max_lun)
+                       goto leave;
+               if ((0 == lun) || (lun > 7))
+                       lun0_sl = SCSI_3; /* actually don't care for 0 == lun */
+               else
+                       lun0_sl = find_lun0_scsi_level(channel, dev, shpnt);
+               scan_scsis_single(channel, dev, lun, lun0_sl, &max_dev_lun, 
+                                 &sparse_lun, &SDpnt, shpnt, scsi_result);
+               if (SDpnt != oldSDpnt) {
+
+                       /* it could happen the blockdevice hasn't yet been inited */
+                       /* queue_depth() moved from scsi_proc_info() so that
+                          it is called before scsi_build_commandblocks() */
+                       if (shpnt->select_queue_depths != NULL)
+                               (shpnt->select_queue_depths)(shpnt,
+                                                            shpnt->host_queue);
+
+                       for (sdtpnt = scsi_devicelist; sdtpnt; sdtpnt = sdtpnt->next)
+                               if (sdtpnt->init && sdtpnt->dev_noticed)
+                                       (*sdtpnt->init) ();
+
+                       for (sdtpnt = scsi_devicelist; sdtpnt; sdtpnt = sdtpnt->next) {
+                               if (sdtpnt->attach) {
+                                       (*sdtpnt->attach) (oldSDpnt);
+                                       if (oldSDpnt->attached) {
+                                               scsi_build_commandblocks(oldSDpnt);
+                                               if (0 == oldSDpnt->has_cmdblocks) {
+                                                       printk("scan_scsis: DANGER, no command blocks\n");
+                                                       /* What to do now ?? */
+                                               }
+                                       }
+                               }
+                       }
+                       scsi_resize_dma_pool();
+
+                       for (sdtpnt = scsi_devicelist; sdtpnt; sdtpnt = sdtpnt->next) {
+                               if (sdtpnt->finish && sdtpnt->nr_dev) {
+                                       (*sdtpnt->finish) ();
+                               }
+                       }
+               }
+       } else {
+               /* Actual LUN. PC ordering is 0->n IBM/spec ordering is n->0 */
+               int order_dev;
+
+               for (channel = 0; channel <= shpnt->max_channel; channel++) {
+                       for (dev = 0; dev < shpnt->max_id; ++dev) {
+                               if (shpnt->reverse_ordering)
+                                       /* Shift to scanning 15,14,13... or 7,6,5,4, */
+                                       order_dev = shpnt->max_id - dev - 1;
+                               else
+                                       order_dev = dev;
+
+                               if (shpnt->this_id != order_dev) {
+
+                                       /*
+                                        * We need the for so our continue, etc. work fine. We put this in
+                                        * a variable so that we can override it during the scan if we
+                                        * detect a device *KNOWN* to have multiple logical units.
+                                        */
+                                       max_dev_lun = (max_scsi_luns < shpnt->max_lun ?
+                                        max_scsi_luns : shpnt->max_lun);
+                                       sparse_lun = 0;
+                                       for (lun = 0, lun0_sl = SCSI_2; lun < max_dev_lun; ++lun) {
+                                               /* don't probe further for luns > 7 for targets <= SCSI_2 */
+                                               if ((lun0_sl < SCSI_3) && (lun > 7))
+                                                       break;
+
+                                               if (!scan_scsis_single(channel, order_dev, lun, lun0_sl,
+                                                                      &max_dev_lun, &sparse_lun, &SDpnt, shpnt,
+                                                                      scsi_result)
+                                                   && !sparse_lun)
+                                                       break;  /* break means don't probe further for luns!=0 */
+                                               if (SDpnt && (0 == lun)) {
+                                                       int bflags = get_device_flags (scsi_result);
+                                                       if (bflags & BLIST_LARGELUN)
+                                                               lun0_sl = SCSI_3; /* treat as SCSI 3 */
+                                                       else
+                                                               lun0_sl = SDpnt->scsi_level;
+                                               }
+                                       }       /* for lun ends */
+                               }       /* if this_id != id ends */
+                       }       /* for dev ends */
+               }               /* for channel ends */
+       }                       /* if/else hardcoded */
+
+      leave:
+
+       {                       /* Unchain SRpnt from host_queue */
+               Scsi_Device *prev, *next;
+               Scsi_Device *dqptr;
+
+               for (dqptr = shpnt->host_queue; dqptr != SDpnt; dqptr = dqptr->next)
+                       continue;
+               if (dqptr) {
+                       prev = dqptr->prev;
+                       next = dqptr->next;
+                       if (prev)
+                               prev->next = next;
+                       else
+                               shpnt->host_queue = next;
+                       if (next)
+                               next->prev = prev;
+               }
+       }
+
+       /* Last device block does not exist.  Free memory. */
+       if (SDpnt != NULL) {
+               blk_cleanup_queue(&SDpnt->request_queue);
+               kfree((char *) SDpnt);
+       }
+
+       /* If we allocated a buffer so we could do DMA, free it now */
+       if (scsi_result != &scsi_result0[0] && scsi_result != NULL) {
+               kfree(scsi_result);
+       } {
+               Scsi_Device *sdev;
+               Scsi_Cmnd *scmd;
+
+               SCSI_LOG_SCAN_BUS(4, printk("Host status for host %p:\n", shpnt));
+               for (sdev = shpnt->host_queue; sdev; sdev = sdev->next) {
+                       SCSI_LOG_SCAN_BUS(4, printk("Device %d %p: ", sdev->id, sdev));
+                       for (scmd = sdev->device_queue; scmd; scmd = scmd->next) {
+                               SCSI_LOG_SCAN_BUS(4, printk("%p ", scmd));
+                       }
+                       SCSI_LOG_SCAN_BUS(4, printk("\n"));
+               }
+       }
+}
+
+/*
+ * The worker for scan_scsis.
+ * Returning 0 means Please don't ask further for lun!=0, 1 means OK go on.
+ * Global variables used : scsi_devices(linked list)
+ */
+static int scan_scsis_single(unsigned int channel, unsigned int dev,
+               unsigned int lun, int lun0_scsi_level,
+               unsigned int *max_dev_lun, unsigned int *sparse_lun, 
+               Scsi_Device ** SDpnt2, struct Scsi_Host *shpnt, 
+               char *scsi_result)
+{
+       char devname[64];
+       unsigned char scsi_cmd[MAX_COMMAND_SIZE];
+       struct Scsi_Device_Template *sdtpnt;
+       Scsi_Device *SDtail, *SDpnt = *SDpnt2;
+       Scsi_Request * SRpnt;
+       int bflags, type = -1;
+       extern devfs_handle_t scsi_devfs_handle;
+       int scsi_level;
+
+       SDpnt->host = shpnt;
+       SDpnt->id = dev;
+       SDpnt->lun = lun;
+       SDpnt->channel = channel;
+       SDpnt->online = TRUE;
+
+       scsi_build_commandblocks(SDpnt);
+       /* Some low level driver could use device->type (DB) */
+       SDpnt->type = -1;
+
+       /*
+        * Assume that the device will have handshaking problems, and then fix
+        * this field later if it turns out it doesn't
+        */
+       SDpnt->borken = 1;
+       SDpnt->was_reset = 0;
+       SDpnt->expecting_cc_ua = 0;
+       SDpnt->starved = 0;
+
+       if (NULL == (SRpnt = scsi_allocate_request(SDpnt))) {
+               printk("scan_scsis_single: no memory\n");
+               return 0;
+       }
+
+       /*
+        * We used to do a TEST_UNIT_READY before the INQUIRY but that was 
+        * not really necessary.  Spec recommends using INQUIRY to scan for
+        * devices (and TEST_UNIT_READY to poll for media change). - Paul G.
+        */
+
+       SCSI_LOG_SCAN_BUS(3, printk("scsi: performing INQUIRY\n"));
+       /*
+        * Build an INQUIRY command block.
+        */
+       scsi_cmd[0] = INQUIRY;
+       if ((lun > 0) && (lun0_scsi_level <= SCSI_2))
+               scsi_cmd[1] = (lun << 5) & 0xe0;
+       else    
+               scsi_cmd[1] = 0;        /* SCSI_3 and higher, don't touch */
+       scsi_cmd[2] = 0;
+       scsi_cmd[3] = 0;
+       scsi_cmd[4] = 255;
+       scsi_cmd[5] = 0;
+       SRpnt->sr_cmd_len = 0;
+       SRpnt->sr_data_direction = SCSI_DATA_READ;
+
+       scsi_wait_req (SRpnt, (void *) scsi_cmd,
+                 (void *) scsi_result,
+                 256, SCSI_TIMEOUT+4*HZ, 3);
+
+       SCSI_LOG_SCAN_BUS(3, printk("scsi: INQUIRY %s with code 0x%x\n",
+               SRpnt->sr_result ? "failed" : "successful", SRpnt->sr_result));
+
+       /*
+        * Now that we don't do TEST_UNIT_READY anymore, we must be prepared
+        * for media change conditions here, so cannot require zero result.
+        */
+       if (SRpnt->sr_result) {
+               if ((driver_byte(SRpnt->sr_result) & DRIVER_SENSE) != 0 &&
+                   (SRpnt->sr_sense_buffer[2] & 0xf) == UNIT_ATTENTION &&
+                   SRpnt->sr_sense_buffer[12] == 0x28 &&
+                   SRpnt->sr_sense_buffer[13] == 0) {
+                       /* not-ready to ready transition - good */
+               } else {
+                       /* assume no peripheral if any other sort of error */
+                       scsi_release_request(SRpnt);
+                       return 0;
+               }
+       }
+
+       /*
+        * Check for SPARSELUN before checking the peripheral qualifier,
+        * so sparse lun devices are completely scanned.
+        */
+
+       /*
+        * Get any flags for this device.  
+        */
+       bflags = get_device_flags (scsi_result);
+
+       if (bflags & BLIST_SPARSELUN) {
+         *sparse_lun = 1;
+       }
+       /*
+        * Check the peripheral qualifier field - this tells us whether LUNS
+        * are supported here or not.
+        */
+       if ((scsi_result[0] >> 5) == 3) {
+               scsi_release_request(SRpnt);
+               return 0;       /* assume no peripheral if any sort of error */
+       }
+        /*   The Toshiba ROM was "gender-changed" here as an inline hack.
+             This is now much more generic.
+             This is a mess: What we really want is to leave the scsi_result
+             alone, and just change the SDpnt structure. And the SDpnt is what
+             we want print_inquiry to print.  -- REW
+        */
+       if (bflags & BLIST_ISDISK) {
+               scsi_result[0] = TYPE_DISK;                                                
+               scsi_result[1] |= 0x80;     /* removable */
+       }
+
+       if (bflags & BLIST_ISROM) {
+               scsi_result[0] = TYPE_ROM;
+               scsi_result[1] |= 0x80;     /* removable */
+       }
+    
+       memcpy(SDpnt->vendor, scsi_result + 8, 8);
+       memcpy(SDpnt->model, scsi_result + 16, 16);
+       memcpy(SDpnt->rev, scsi_result + 32, 4);
+
+       SDpnt->removable = (0x80 & scsi_result[1]) >> 7;
+       /* Use the peripheral qualifier field to determine online/offline */
+       if (((scsi_result[0] >> 5) & 7) == 1)   SDpnt->online = FALSE;
+       else SDpnt->online = TRUE;
+       SDpnt->lockable = SDpnt->removable;
+       SDpnt->changed = 0;
+       SDpnt->access_count = 0;
+       SDpnt->busy = 0;
+       SDpnt->has_cmdblocks = 0;
+       /*
+        * Currently, all sequential devices are assumed to be tapes, all random
+        * devices disk, with the appropriate read only flags set for ROM / WORM
+        * treated as RO.
+        */
+       switch (type = (scsi_result[0] & 0x1f)) {
+       case TYPE_TAPE:
+       case TYPE_DISK:
+       case TYPE_PRINTER:
+       case TYPE_MOD:
+       case TYPE_PROCESSOR:
+       case TYPE_SCANNER:
+       case TYPE_MEDIUM_CHANGER:
+       case TYPE_ENCLOSURE:
+       case TYPE_COMM:
+               SDpnt->writeable = 1;
+               break;
+       case TYPE_WORM:
+       case TYPE_ROM:
+               SDpnt->writeable = 0;
+               break;
+       default:
+               printk("scsi: unknown type %d\n", type);
+       }
+
+       SDpnt->device_blocked = FALSE;
+       SDpnt->device_busy = 0;
+       SDpnt->single_lun = 0;
+       SDpnt->soft_reset =
+           (scsi_result[7] & 1) && ((scsi_result[3] & 7) == 2);
+       SDpnt->random = (type == TYPE_TAPE) ? 0 : 1;
+       SDpnt->type = (type & 0x1f);
+
+       print_inquiry(scsi_result);
+
+        sprintf (devname, "host%d/bus%d/target%d/lun%d",
+                 SDpnt->host->host_no, SDpnt->channel, SDpnt->id, SDpnt->lun);
+        if (SDpnt->de) printk ("DEBUG: dir: \"%s\" already exists\n", devname);
+        else SDpnt->de = devfs_mk_dir (scsi_devfs_handle, devname, NULL);
+
+       for (sdtpnt = scsi_devicelist; sdtpnt;
+            sdtpnt = sdtpnt->next)
+               if (sdtpnt->detect)
+                       SDpnt->attached +=
+                           (*sdtpnt->detect) (SDpnt);
+
+       SDpnt->scsi_level = scsi_result[2] & 0x07;
+       if (SDpnt->scsi_level >= 2 ||
+           (SDpnt->scsi_level == 1 &&
+            (scsi_result[3] & 0x0f) == 1))
+               SDpnt->scsi_level++;
+       scsi_level = SDpnt->scsi_level;
+
+       /*
+        * Accommodate drivers that want to sleep when they should be in a polling
+        * loop.
+        */
+       SDpnt->disconnect = 0;
+
+
+       /*
+        * Set the tagged_queue flag for SCSI-II devices that purport to support
+        * tagged queuing in the INQUIRY data.
+        */
+       SDpnt->tagged_queue = 0;
+       if ((SDpnt->scsi_level >= SCSI_2) &&
+           (scsi_result[7] & 2) &&
+           !(bflags & BLIST_NOTQ)) {
+               SDpnt->tagged_supported = 1;
+               SDpnt->current_tag = 0;
+       }
+       /*
+        * Some revisions of the Texel CD ROM drives have handshaking problems when
+        * used with the Seagate controllers.  Before we know what type of device
+        * we're talking to, we assume it's borken and then change it here if it
+        * turns out that it isn't a TEXEL drive.
+        */
+       if ((bflags & BLIST_BORKEN) == 0)
+               SDpnt->borken = 0;
+
+       /*
+        * If we want to only allow I/O to one of the luns attached to this device
+        * at a time, then we set this flag.
+        */
+       if (bflags & BLIST_SINGLELUN)
+               SDpnt->single_lun = 1;
+
+       /*
+        * These devices need this "key" to unlock the devices so we can use it
+        */
+       if ((bflags & BLIST_KEY) != 0) {
+               printk("Unlocked floptical drive.\n");
+               SDpnt->lockable = 0;
+               scsi_cmd[0] = MODE_SENSE;
+               if (shpnt->max_lun <= 8)
+                       scsi_cmd[1] = (lun << 5) & 0xe0;
+               else    scsi_cmd[1] = 0;        /* any other idea? */
+               scsi_cmd[2] = 0x2e;
+               scsi_cmd[3] = 0;
+               scsi_cmd[4] = 0x2a;
+               scsi_cmd[5] = 0;
+               SRpnt->sr_cmd_len = 0;
+               SRpnt->sr_data_direction = SCSI_DATA_READ;
+               scsi_wait_req (SRpnt, (void *) scsi_cmd,
+                       (void *) scsi_result, 0x2a,
+                       SCSI_TIMEOUT, 3);
+       }
+
+       scsi_release_request(SRpnt);
+       SRpnt = NULL;
+
+       scsi_release_commandblocks(SDpnt);
+
+       /*
+        * This device was already hooked up to the host in question,
+        * so at this point we just let go of it and it should be fine.  We do need to
+        * allocate a new one and attach it to the host so that we can further scan the bus.
+        */
+       SDpnt = (Scsi_Device *) kmalloc(sizeof(Scsi_Device), GFP_ATOMIC);
+       if (!SDpnt) {
+               printk("scsi: scan_scsis_single: Cannot malloc\n");
+               return 0;
+       }
+        memset(SDpnt, 0, sizeof(Scsi_Device));
+
+       *SDpnt2 = SDpnt;
+       SDpnt->queue_depth = 1;
+       SDpnt->host = shpnt;
+       SDpnt->online = TRUE;
+       SDpnt->scsi_level = scsi_level;
+
+       /*
+        * Register the queue for the device.  All I/O requests will come
+        * in through here.  We also need to register a pointer to
+        * ourselves, since the queue handler won't know what device
+        * the queue actually represents.   We could look it up, but it
+        * is pointless work.
+        */
+       scsi_initialize_queue(SDpnt, shpnt);
+       SDpnt->host = shpnt;
+       initialize_merge_fn(SDpnt);
+
+       /*
+        * Mark this device as online, or otherwise we won't be able to do much with it.
+        */
+       SDpnt->online = TRUE;
+
+        /*
+         * Initialize the object that we will use to wait for command blocks.
+         */
+       init_waitqueue_head(&SDpnt->scpnt_wait);
+
+       /*
+        * Since we just found one device, there had damn well better be one in the list
+        * already.
+        */
+       if (shpnt->host_queue == NULL)
+               panic("scan_scsis_single: Host queue == NULL\n");
+
+       SDtail = shpnt->host_queue;
+       while (SDtail->next) {
+               SDtail = SDtail->next;
+       }
+
+       /* Add this device to the linked list at the end */
+       SDtail->next = SDpnt;
+       SDpnt->prev = SDtail;
+       SDpnt->next = NULL;
+
+       /*
+        * Some scsi devices cannot be polled for lun != 0 due to firmware bugs
+        */
+       if (bflags & BLIST_NOLUN)
+               return 0;       /* break; */
+
+       /*
+        * If this device is known to support sparse multiple units, override the
+        * other settings, and scan all of them.
+        */
+       if (bflags & BLIST_SPARSELUN) {
+               *max_dev_lun = shpnt->max_lun;
+               *sparse_lun = 1;
+               return 1;
+       }
+       /*
+        * If this device is known to support multiple units, override the other
+        * settings, and scan all of them.
+        */
+       if (bflags & BLIST_FORCELUN) {
+               /* 
+                * Scanning MAX_SCSI_LUNS units would be a bad idea.
+                * Any better idea?
+                * I think we need REPORT LUNS in future to avoid scanning
+                * of unused LUNs. But, that is another item.
+                */
+               if (*max_dev_lun < shpnt->max_lun)
+                       *max_dev_lun = shpnt->max_lun;
+               else    if ((max_scsi_luns >> 1) >= *max_dev_lun)
+                               *max_dev_lun += shpnt->max_lun;
+                       else    *max_dev_lun = max_scsi_luns;
+               return 1;
+       }
+       /*
+        * REGAL CDC-4X: avoid hang after LUN 4
+        */
+       if (bflags & BLIST_MAX5LUN) {
+               *max_dev_lun = 5;
+               return 1;
+       }
+
+       /*
+        * We assume the device can't handle lun!=0 if: - it reports scsi-0
+        * (ANSI SCSI Revision 0) (old drives like MAXTOR XT-3280) or - it
+        * reports scsi-1 (ANSI SCSI Revision 1) and Response Data Format 0
+        */
+       if (((scsi_result[2] & 0x07) == 0)
+           ||
+           ((scsi_result[2] & 0x07) == 1 &&
+            (scsi_result[3] & 0x0f) == 0))
+               return 0;
+       return 1;
+}
+
+/*
+ * The worker for scan_scsis.
+ * Returns the scsi_level of lun0 on this host, channel and dev (if already
+ * known), otherwise returns SCSI_2.
+ */
+static int find_lun0_scsi_level(unsigned int channel, unsigned int dev,
+                               struct Scsi_Host *shpnt)
+{
+       int res = SCSI_2;
+       Scsi_Device *SDpnt;
+
+       for (SDpnt = shpnt->host_queue; SDpnt; SDpnt = SDpnt->next)
+       {
+               if ((0 == SDpnt->lun) && (dev == SDpnt->id) &&
+                   (channel == SDpnt->channel))
+                       return (int)SDpnt->scsi_level;
+       }
+       /* haven't found lun0, should send INQUIRY but take easy route */
+       return res;
+}
diff --git a/xen-2.4.16/drivers/scsi/scsi_syms.c b/xen-2.4.16/drivers/scsi/scsi_syms.c
new file mode 100644 (file)
index 0000000..8a25a45
--- /dev/null
@@ -0,0 +1,105 @@
+/*
+ * We should not even be trying to compile this if we are not doing
+ * a module.
+ */
+#define __NO_VERSION__
+#include <linux/config.h>
+#include <linux/module.h>
+
+#include <linux/sched.h>
+#include <linux/timer.h>
+#include <linux/string.h>
+#include <linux/slab.h>
+#include <linux/ioport.h>
+#include <linux/kernel.h>
+#include <linux/blk.h>
+#include <linux/fs.h>
+
+#include <asm/system.h>
+#include <asm/irq.h>
+#include <asm/dma.h>
+
+#include "scsi.h"
+#include <scsi/scsi_ioctl.h>
+#include "hosts.h"
+#include "constants.h"
+
+#include "sd.h"
+#include <scsi/scsicam.h>
+
+/*
+ * This source file contains the symbol table used by scsi loadable
+ * modules.
+ */
+EXPORT_SYMBOL(scsi_register_module);
+EXPORT_SYMBOL(scsi_unregister_module);
+EXPORT_SYMBOL(scsi_free);
+EXPORT_SYMBOL(scsi_malloc);
+EXPORT_SYMBOL(scsi_register);
+EXPORT_SYMBOL(scsi_unregister);
+EXPORT_SYMBOL(scsicam_bios_param);
+EXPORT_SYMBOL(scsi_partsize);
+EXPORT_SYMBOL(scsi_allocate_device);
+EXPORT_SYMBOL(scsi_do_cmd);
+EXPORT_SYMBOL(scsi_command_size);
+EXPORT_SYMBOL(scsi_ioctl);
+EXPORT_SYMBOL(print_command);
+EXPORT_SYMBOL(print_sense);
+EXPORT_SYMBOL(print_req_sense);
+EXPORT_SYMBOL(print_msg);
+EXPORT_SYMBOL(print_status);
+EXPORT_SYMBOL(scsi_dma_free_sectors);
+EXPORT_SYMBOL(kernel_scsi_ioctl);
+EXPORT_SYMBOL(scsi_need_isa_buffer);
+EXPORT_SYMBOL(scsi_release_command);
+EXPORT_SYMBOL(print_Scsi_Cmnd);
+EXPORT_SYMBOL(scsi_block_when_processing_errors);
+EXPORT_SYMBOL(scsi_mark_host_reset);
+EXPORT_SYMBOL(scsi_ioctl_send_command);
+#if defined(CONFIG_SCSI_LOGGING)       /* { */
+EXPORT_SYMBOL(scsi_logging_level);
+#endif
+
+EXPORT_SYMBOL(scsi_allocate_request);
+EXPORT_SYMBOL(scsi_release_request);
+EXPORT_SYMBOL(scsi_wait_req);
+EXPORT_SYMBOL(scsi_do_req);
+
+EXPORT_SYMBOL(scsi_report_bus_reset);
+EXPORT_SYMBOL(scsi_block_requests);
+EXPORT_SYMBOL(scsi_unblock_requests);
+
+EXPORT_SYMBOL(scsi_get_host_dev);
+EXPORT_SYMBOL(scsi_free_host_dev);
+
+EXPORT_SYMBOL(scsi_sleep);
+
+EXPORT_SYMBOL(proc_print_scsidevice);
+EXPORT_SYMBOL(proc_scsi);
+
+EXPORT_SYMBOL(scsi_io_completion);
+EXPORT_SYMBOL(scsi_end_request);
+
+EXPORT_SYMBOL(scsi_register_blocked_host);
+EXPORT_SYMBOL(scsi_deregister_blocked_host);
+
+/*
+ * This symbol is for the highlevel drivers (e.g. sg) only.
+ */
+EXPORT_SYMBOL(scsi_reset_provider);
+
+/*
+ * These are here only while I debug the rest of the scsi stuff.
+ */
+EXPORT_SYMBOL(scsi_hostlist);
+EXPORT_SYMBOL(scsi_hosts);
+EXPORT_SYMBOL(scsi_devicelist);
+EXPORT_SYMBOL(scsi_device_types);
+
+/*
+ * Externalize timers so that HBAs can safely start/restart commands.
+ */
+extern void scsi_add_timer(Scsi_Cmnd *, int, void ((*) (Scsi_Cmnd *)));
+extern int scsi_delete_timer(Scsi_Cmnd *);
+EXPORT_SYMBOL(scsi_add_timer);
+EXPORT_SYMBOL(scsi_delete_timer);
diff --git a/xen-2.4.16/drivers/scsi/scsicam.c b/xen-2.4.16/drivers/scsi/scsicam.c
new file mode 100644 (file)
index 0000000..77c7846
--- /dev/null
@@ -0,0 +1,225 @@
+/*
+ * scsicam.c - SCSI CAM support functions, use for HDIO_GETGEO, etc.
+ *
+ * Copyright 1993, 1994 Drew Eckhardt
+ *      Visionary Computing 
+ *      (Unix and Linux consulting and custom programming)
+ *      drew@Colorado.EDU
+ *      +1 (303) 786-7975
+ *
+ * For more information, please consult the SCSI-CAM draft.
+ */
+
+#define __NO_VERSION__
+#include <linux/module.h>
+
+#include <linux/fs.h>
+#include <linux/genhd.h>
+#include <linux/kernel.h>
+#include <linux/blk.h>
+#include <asm/unaligned.h>
+#include "scsi.h"
+#include "hosts.h"
+#include "sd.h"
+#include <scsi/scsicam.h>
+
+static int setsize(unsigned long capacity, unsigned int *cyls, unsigned int *hds,
+                  unsigned int *secs);
+
+
+/*
+ * Function : int scsicam_bios_param (Disk *disk, int dev, int *ip)
+ *
+ * Purpose : to determine the BIOS mapping used for a drive in a 
+ *      SCSI-CAM system, storing the results in ip as required
+ *      by the HDIO_GETGEO ioctl().
+ *
+ * Returns : -1 on failure, 0 on success.
+ *
+ */
+
+int scsicam_bios_param(Disk * disk,    /* SCSI disk */
+                      kdev_t dev,      /* Device major, minor */
+                 int *ip /* Heads, sectors, cylinders in that order */ )
+{
+       struct buffer_head *bh;
+       int ret_code;
+       int size = disk->capacity;
+       unsigned long temp_cyl;
+
+       if (!(bh = bread(MKDEV(MAJOR(dev), MINOR(dev)&~0xf), 0, block_size(dev))))
+               return -1;
+
+       /* try to infer mapping from partition table */
+       ret_code = scsi_partsize(bh, (unsigned long) size, (unsigned int *) ip + 2,
+                      (unsigned int *) ip + 0, (unsigned int *) ip + 1);
+       brelse(bh);
+
+       if (ret_code == -1) {
+               /* pick some standard mapping with at most 1024 cylinders,
+                  and at most 62 sectors per track - this works up to
+                  7905 MB */
+               ret_code = setsize((unsigned long) size, (unsigned int *) ip + 2,
+                      (unsigned int *) ip + 0, (unsigned int *) ip + 1);
+       }
+       /* if something went wrong, then apparently we have to return
+          a geometry with more than 1024 cylinders */
+       if (ret_code || ip[0] > 255 || ip[1] > 63) {
+               ip[0] = 64;
+               ip[1] = 32;
+               temp_cyl = size / (ip[0] * ip[1]);
+               if (temp_cyl > 65534) {
+                       ip[0] = 255;
+                       ip[1] = 63;
+               }
+               ip[2] = size / (ip[0] * ip[1]);
+       }
+       return 0;
+}
+
+/*
+ * Function : static int scsi_partsize(struct buffer_head *bh, unsigned long 
+ *     capacity,unsigned int *cyls, unsigned int *hds, unsigned int *secs);
+ *
+ * Purpose : to determine the BIOS mapping used to create the partition
+ *      table, storing the results in *cyls, *hds, and *secs 
+ *
+ * Returns : -1 on failure, 0 on success.
+ *
+ */
+
+int scsi_partsize(struct buffer_head *bh, unsigned long capacity,
+              unsigned int *cyls, unsigned int *hds, unsigned int *secs)
+{
+       struct partition *p, *largest = NULL;
+       int i, largest_cyl;
+       int cyl, ext_cyl, end_head, end_cyl, end_sector;
+       unsigned int logical_end, physical_end, ext_physical_end;
+
+
+       if (*(unsigned short *) (bh->b_data + 510) == 0xAA55) {
+               for (largest_cyl = -1, p = (struct partition *)
+                    (0x1BE + bh->b_data), i = 0; i < 4; ++i, ++p) {
+                       if (!p->sys_ind)
+                               continue;
+#ifdef DEBUG
+                       printk("scsicam_bios_param : partition %d has system \n",
+                              i);
+#endif
+                       cyl = p->cyl + ((p->sector & 0xc0) << 2);
+                       if (cyl > largest_cyl) {
+                               largest_cyl = cyl;
+                               largest = p;
+                       }
+               }
+       }
+       if (largest) {
+               end_cyl = largest->end_cyl + ((largest->end_sector & 0xc0) << 2);
+               end_head = largest->end_head;
+               end_sector = largest->end_sector & 0x3f;
+
+               if (end_head + 1 == 0 || end_sector == 0)
+                       return -1;
+
+#ifdef DEBUG
+               printk("scsicam_bios_param : end at h = %d, c = %d, s = %d\n",
+                      end_head, end_cyl, end_sector);
+#endif
+
+               physical_end = end_cyl * (end_head + 1) * end_sector +
+                   end_head * end_sector + end_sector;
+
+               /* This is the actual _sector_ number at the end */
+               logical_end = get_unaligned(&largest->start_sect)
+                   + get_unaligned(&largest->nr_sects);
+
+               /* This is for >1023 cylinders */
+               ext_cyl = (logical_end - (end_head * end_sector + end_sector))
+                   / (end_head + 1) / end_sector;
+               ext_physical_end = ext_cyl * (end_head + 1) * end_sector +
+                   end_head * end_sector + end_sector;
+
+#ifdef DEBUG
+               printk("scsicam_bios_param : logical_end=%d physical_end=%d ext_physical_end=%d ext_cyl=%d\n"
+                 ,logical_end, physical_end, ext_physical_end, ext_cyl);
+#endif
+
+               if ((logical_end == physical_end) ||
+                 (end_cyl == 1023 && ext_physical_end == logical_end)) {
+                       *secs = end_sector;
+                       *hds = end_head + 1;
+                       *cyls = capacity / ((end_head + 1) * end_sector);
+                       return 0;
+               }
+#ifdef DEBUG
+               printk("scsicam_bios_param : logical (%u) != physical (%u)\n",
+                      logical_end, physical_end);
+#endif
+       }
+       return -1;
+}
+
+/*
+ * Function : static int setsize(unsigned long capacity,unsigned int *cyls,
+ *      unsigned int *hds, unsigned int *secs);
+ *
+ * Purpose : to determine a near-optimal int 0x13 mapping for a
+ *      SCSI disk in terms of lost space of size capacity, storing
+ *      the results in *cyls, *hds, and *secs.
+ *
+ * Returns : -1 on failure, 0 on success.
+ *
+ * Extracted from
+ *
+ * WORKING                                                    X3T9.2
+ * DRAFT                                                        792D
+ *
+ *
+ *                                                        Revision 6
+ *                                                         10-MAR-94
+ * Information technology -
+ * SCSI-2 Common access method
+ * transport and SCSI interface module
+ * 
+ * ANNEX A :
+ *
+ * setsize() converts a read capacity value to int 13h
+ * head-cylinder-sector requirements. It minimizes the value for
+ * number of heads and maximizes the number of cylinders. This
+ * will support rather large disks before the number of heads
+ * will not fit in 4 bits (or 6 bits). This algorithm also
+ * minimizes the number of sectors that will be unused at the end
+ * of the disk while allowing for very large disks to be
+ * accommodated. This algorithm does not use physical geometry. 
+ */
+
+static int setsize(unsigned long capacity, unsigned int *cyls, unsigned int *hds,
+                  unsigned int *secs)
+{
+       unsigned int rv = 0;
+       unsigned long heads, sectors, cylinders, temp;
+
+       cylinders = 1024L;      /* Set number of cylinders to max */
+       sectors = 62L;          /* Maximize sectors per track */
+
+       temp = cylinders * sectors;     /* Compute divisor for heads */
+       heads = capacity / temp;        /* Compute value for number of heads */
+       if (capacity % temp) {  /* If no remainder, done! */
+               heads++;        /* Else, increment number of heads */
+               temp = cylinders * heads;       /* Compute divisor for sectors */
+               sectors = capacity / temp;      /* Compute value for sectors per
+                                                  track */
+               if (capacity % temp) {  /* If no remainder, done! */
+                       sectors++;      /* Else, increment number of sectors */
+                       temp = heads * sectors;         /* Compute divisor for cylinders */
+                       cylinders = capacity / temp;    /* Compute number of cylinders */
+               }
+       }
+       if (cylinders == 0)
+               rv = (unsigned) -1;     /* Give error if 0 cylinders */
+
+       *cyls = (unsigned int) cylinders;       /* Stuff return values */
+       *secs = (unsigned int) sectors;
+       *hds = (unsigned int) heads;
+       return (rv);
+}
diff --git a/xen-2.4.16/drivers/scsi/sd.c b/xen-2.4.16/drivers/scsi/sd.c
new file mode 100644 (file)
index 0000000..1e0749b
--- /dev/null
@@ -0,0 +1,1462 @@
+/*
+ *      sd.c Copyright (C) 1992 Drew Eckhardt
+ *           Copyright (C) 1993, 1994, 1995, 1999 Eric Youngdale
+ *
+ *      Linux scsi disk driver
+ *              Initial versions: Drew Eckhardt
+ *              Subsequent revisions: Eric Youngdale
+ *
+ *      <drew@colorado.edu>
+ *
+ *       Modified by Eric Youngdale ericy@andante.org to
+ *       add scatter-gather, multiple outstanding request, and other
+ *       enhancements.
+ *
+ *       Modified by Eric Youngdale eric@andante.org to support loadable
+ *       low-level scsi drivers.
+ *
+ *       Modified by Jirka Hanika geo@ff.cuni.cz to support more
+ *       scsi disks using eight major numbers.
+ *
+ *       Modified by Richard Gooch rgooch@atnf.csiro.au to support devfs.
+ *     
+ *      Modified by Torben Mathiasen tmm@image.dk
+ *       Resource allocation fixes in sd_init and cleanups.
+ *     
+ *      Modified by Alex Davis <letmein@erols.com>
+ *       Fix problem where partition info not being read in sd_open.
+ *     
+ *      Modified by Alex Davis <letmein@erols.com>
+ *       Fix problem where removable media could be ejected after sd_open.
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+
+#include <linux/fs.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/hdreg.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/init.h>
+
+#include <linux/smp.h>
+
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <asm/io.h>
+
+#define MAJOR_NR SCSI_DISK0_MAJOR
+#include <linux/blk.h>
+#include <linux/blkpg.h>
+#include "scsi.h"
+#include "hosts.h"
+#include "sd.h"
+#include <scsi/scsi_ioctl.h>
+#include "constants.h"
+#include <scsi/scsicam.h>      /* must follow "hosts.h" */
+
+#include <linux/genhd.h>
+
+/*
+ *  static const char RCSid[] = "$Header:";
+ */
+
+/* system major --> sd_gendisks index */
+#define SD_MAJOR_IDX(i)                (MAJOR(i) & SD_MAJOR_MASK)
+/* sd_gendisks index --> system major */
+#define SD_MAJOR(i) (!(i) ? SCSI_DISK0_MAJOR : SCSI_DISK1_MAJOR-1+(i))
+
+#define SD_PARTITION(dev)      ((SD_MAJOR_IDX(dev) << 8) | (MINOR(dev) & 255))
+
+#define SCSI_DISKS_PER_MAJOR   16
+#define SD_MAJOR_NUMBER(i)     SD_MAJOR((i) >> 8)
+#define SD_MINOR_NUMBER(i)     ((i) & 255)
+#define MKDEV_SD_PARTITION(i)  MKDEV(SD_MAJOR_NUMBER(i), (i) & 255)
+#define MKDEV_SD(index)                MKDEV_SD_PARTITION((index) << 4)
+#define N_USED_SCSI_DISKS  (sd_template.dev_max + SCSI_DISKS_PER_MAJOR - 1)
+#define N_USED_SD_MAJORS   (N_USED_SCSI_DISKS / SCSI_DISKS_PER_MAJOR)
+
+#define MAX_RETRIES 5
+
+/*
+ *  Time out in seconds for disks and Magneto-opticals (which are slower).
+ */
+
+#define SD_TIMEOUT (30 * HZ)
+#define SD_MOD_TIMEOUT (75 * HZ)
+
+static Scsi_Disk *rscsi_disks;
+static struct gendisk *sd_gendisks;
+static int *sd_sizes;
+static int *sd_blocksizes;
+static int *sd_hardsizes;      /* Hardware sector size */
+static int *sd_max_sectors;
+
+static int check_scsidisk_media_change(kdev_t);
+static int fop_revalidate_scsidisk(kdev_t);
+
+static int sd_init_onedisk(int);
+
+
+static int sd_init(void);
+static void sd_finish(void);
+static int sd_attach(Scsi_Device *);
+static int sd_detect(Scsi_Device *);
+static void sd_detach(Scsi_Device *);
+static int sd_init_command(Scsi_Cmnd *);
+
+static struct Scsi_Device_Template sd_template = {
+       name:"disk",
+       tag:"sd",
+       scsi_type:TYPE_DISK,
+       major:SCSI_DISK0_MAJOR,
+        /*
+         * Secondary range of majors that this driver handles.
+         */
+       min_major:SCSI_DISK1_MAJOR,
+       max_major:SCSI_DISK7_MAJOR,
+       blk:1,
+       detect:sd_detect,
+       init:sd_init,
+       finish:sd_finish,
+       attach:sd_attach,
+       detach:sd_detach,
+       init_command:sd_init_command,
+};
+
+
+static void rw_intr(Scsi_Cmnd * SCpnt);
+
+#if defined(CONFIG_PPC)
+/*
+ * Moved from arch/ppc/pmac_setup.c.  This is where it really belongs.
+ */
+kdev_t __init
+sd_find_target(void *host, int tgt)
+{
+    Scsi_Disk *dp;
+    int i;
+    for (dp = rscsi_disks, i = 0; i < sd_template.dev_max; ++i, ++dp)
+        if (dp->device != NULL && dp->device->host == host
+            && dp->device->id == tgt)
+            return MKDEV_SD(i);
+    return 0;
+}
+#endif
+
+static int sd_ioctl(struct inode * inode, struct file * file, unsigned int cmd, unsigned long arg)
+{
+       kdev_t dev = inode->i_rdev;
+       struct Scsi_Host * host;
+       Scsi_Device * SDev;
+       int diskinfo[4];
+    
+       SDev = rscsi_disks[DEVICE_NR(dev)].device;
+       if (!SDev)
+               return -ENODEV;
+
+       /*
+        * If we are in the middle of error recovery, don't let anyone
+        * else try and use this device.  Also, if error recovery fails, it
+        * may try and take the device offline, in which case all further
+        * access to the device is prohibited.
+        */
+
+       if( !scsi_block_when_processing_errors(SDev) )
+       {
+               return -ENODEV;
+       }
+
+       switch (cmd) 
+       {
+               case HDIO_GETGEO:   /* Return BIOS disk parameters */
+               {
+                       struct hd_geometry *loc = (struct hd_geometry *) arg;
+                       if(!loc)
+                               return -EINVAL;
+
+                       host = rscsi_disks[DEVICE_NR(dev)].device->host;
+       
+                       /* default to most commonly used values */
+       
+                       diskinfo[0] = 0x40;
+                       diskinfo[1] = 0x20;
+                       diskinfo[2] = rscsi_disks[DEVICE_NR(dev)].capacity >> 11;
+       
+                       /* override with calculated, extended default, or driver values */
+       
+                       if(host->hostt->bios_param != NULL)
+                               host->hostt->bios_param(&rscsi_disks[DEVICE_NR(dev)],
+                                           dev,
+                                           &diskinfo[0]);
+                       else scsicam_bios_param(&rscsi_disks[DEVICE_NR(dev)],
+                                       dev, &diskinfo[0]);
+
+                       if (put_user(diskinfo[0], &loc->heads) ||
+                               put_user(diskinfo[1], &loc->sectors) ||
+                               put_user(diskinfo[2], &loc->cylinders) ||
+                               put_user(sd_gendisks[SD_MAJOR_IDX(
+                                   inode->i_rdev)].part[MINOR(
+                                   inode->i_rdev)].start_sect, &loc->start))
+                               return -EFAULT;
+                       return 0;
+               }
+               case HDIO_GETGEO_BIG:
+               {
+                       struct hd_big_geometry *loc = (struct hd_big_geometry *) arg;
+
+                       if(!loc)
+                               return -EINVAL;
+
+                       host = rscsi_disks[DEVICE_NR(dev)].device->host;
+
+                       /* default to most commonly used values */
+
+                       diskinfo[0] = 0x40;
+                       diskinfo[1] = 0x20;
+                       diskinfo[2] = rscsi_disks[DEVICE_NR(dev)].capacity >> 11;
+
+                       /* override with calculated, extended default, or driver values */
+
+                       if(host->hostt->bios_param != NULL)
+                               host->hostt->bios_param(&rscsi_disks[DEVICE_NR(dev)],
+                                           dev,
+                                           &diskinfo[0]);
+                       else scsicam_bios_param(&rscsi_disks[DEVICE_NR(dev)],
+                                       dev, &diskinfo[0]);
+
+                       if (put_user(diskinfo[0], &loc->heads) ||
+                               put_user(diskinfo[1], &loc->sectors) ||
+                               put_user(diskinfo[2], (unsigned int *) &loc->cylinders) ||
+                               put_user(sd_gendisks[SD_MAJOR_IDX(
+                                   inode->i_rdev)].part[MINOR(
+                                   inode->i_rdev)].start_sect, &loc->start))
+                               return -EFAULT;
+                       return 0;
+               }
+               case BLKGETSIZE:
+               case BLKGETSIZE64:
+               case BLKROSET:
+               case BLKROGET:
+               case BLKRASET:
+               case BLKRAGET:
+               case BLKFLSBUF:
+               case BLKSSZGET:
+               case BLKPG:
+               case BLKELVGET:
+               case BLKELVSET:
+               case BLKBSZGET:
+               case BLKBSZSET:
+                       return blk_ioctl(inode->i_rdev, cmd, arg);
+
+               case BLKRRPART: /* Re-read partition tables */
+                       if (!capable(CAP_SYS_ADMIN))
+                               return -EACCES;
+                       return revalidate_scsidisk(dev, 1);
+
+               default:
+                       return scsi_ioctl(rscsi_disks[DEVICE_NR(dev)].device , cmd, (void *) arg);
+       }
+}
+
+static void sd_devname(unsigned int disknum, char *buffer)
+{
+       if (disknum < 26)
+               sprintf(buffer, "sd%c", 'a' + disknum);
+       else {
+               unsigned int min1;
+               unsigned int min2;
+               /*
+                * For larger numbers of disks, we need to go to a new
+                * naming scheme.
+                */
+               min1 = disknum / 26;
+               min2 = disknum % 26;
+               sprintf(buffer, "sd%c%c", 'a' + min1 - 1, 'a' + min2);
+       }
+}
+
+static request_queue_t *sd_find_queue(kdev_t dev)
+{
+       Scsi_Disk *dpnt;
+       int target;
+       target = DEVICE_NR(dev);
+
+       dpnt = &rscsi_disks[target];
+       if (!dpnt->device)
+               return NULL;    /* No such device */
+       return &dpnt->device->request_queue;
+}
+
+static int sd_init_command(Scsi_Cmnd * SCpnt)
+{
+       int dev, block, this_count;
+       struct hd_struct *ppnt;
+       Scsi_Disk *dpnt;
+#if CONFIG_SCSI_LOGGING
+       char nbuff[6];
+#endif
+
+       ppnt = &sd_gendisks[SD_MAJOR_IDX(SCpnt->request.rq_dev)].part[MINOR(SCpnt->request.rq_dev)];
+       dev = DEVICE_NR(SCpnt->request.rq_dev);
+
+       block = SCpnt->request.sector;
+       this_count = SCpnt->request_bufflen >> 9;
+
+       SCSI_LOG_HLQUEUE(1, printk("Doing sd request, dev = 0x%x, block = %d\n",
+           SCpnt->request.rq_dev, block));
+
+       dpnt = &rscsi_disks[dev];
+       if (dev >= sd_template.dev_max ||
+           !dpnt->device ||
+           !dpnt->device->online ||
+           block + SCpnt->request.nr_sectors > ppnt->nr_sects) {
+               SCSI_LOG_HLQUEUE(2, printk("Finishing %ld sectors\n", SCpnt->request.nr_sectors));
+               SCSI_LOG_HLQUEUE(2, printk("Retry with 0x%p\n", SCpnt));
+               return 0;
+       }
+       block += ppnt->start_sect;
+       if (dpnt->device->changed) {
+               /*
+                * quietly refuse to do anything to a changed disc until the changed
+                * bit has been reset
+                */
+               /* printk("SCSI disk has been changed. Prohibiting further I/O.\n"); */
+               return 0;
+       }
+       SCSI_LOG_HLQUEUE(2, sd_devname(dev, nbuff));
+       SCSI_LOG_HLQUEUE(2, printk("%s : real dev = /dev/%d, block = %d\n",
+                                  nbuff, dev, block));
+
+       /*
+        * If we have a 1K hardware sectorsize, prevent access to single
+        * 512 byte sectors.  In theory we could handle this - in fact
+        * the scsi cdrom driver must be able to handle this because
+        * we typically use 1K blocksizes, and cdroms typically have
+        * 2K hardware sectorsizes.  Of course, things are simpler
+        * with the cdrom, since it is read-only.  For performance
+        * reasons, the filesystems should be able to handle this
+        * and not force the scsi disk driver to use bounce buffers
+        * for this.
+        */
+       if (dpnt->device->sector_size == 1024) {
+               if ((block & 1) || (SCpnt->request.nr_sectors & 1)) {
+                       printk("sd.c:Bad block number requested");
+                       return 0;
+               } else {
+                       block = block >> 1;
+                       this_count = this_count >> 1;
+               }
+       }
+       if (dpnt->device->sector_size == 2048) {
+               if ((block & 3) || (SCpnt->request.nr_sectors & 3)) {
+                       printk("sd.c:Bad block number requested");
+                       return 0;
+               } else {
+                       block = block >> 2;
+                       this_count = this_count >> 2;
+               }
+       }
+       if (dpnt->device->sector_size == 4096) {
+               if ((block & 7) || (SCpnt->request.nr_sectors & 7)) {
+                       printk("sd.c:Bad block number requested");
+                       return 0;
+               } else {
+                       block = block >> 3;
+                       this_count = this_count >> 3;
+               }
+       }
+       switch (SCpnt->request.cmd) {
+       case WRITE:
+               if (!dpnt->device->writeable) {
+                       return 0;
+               }
+               SCpnt->cmnd[0] = WRITE_6;
+               SCpnt->sc_data_direction = SCSI_DATA_WRITE;
+               break;
+       case READ:
+               SCpnt->cmnd[0] = READ_6;
+               SCpnt->sc_data_direction = SCSI_DATA_READ;
+               break;
+       default:
+               panic("Unknown sd command %d\n", SCpnt->request.cmd);
+       }
+
+       SCSI_LOG_HLQUEUE(2, printk("%s : %s %d/%ld 512 byte blocks.\n",
+                                  nbuff,
+                  (SCpnt->request.cmd == WRITE) ? "writing" : "reading",
+                                this_count, SCpnt->request.nr_sectors));
+
+       SCpnt->cmnd[1] = (SCpnt->device->scsi_level <= SCSI_2) ?
+                        ((SCpnt->lun << 5) & 0xe0) : 0;
+
+       if (((this_count > 0xff) || (block > 0x1fffff)) || SCpnt->device->ten) {
+               if (this_count > 0xffff)
+                       this_count = 0xffff;
+
+               SCpnt->cmnd[0] += READ_10 - READ_6;
+               SCpnt->cmnd[2] = (unsigned char) (block >> 24) & 0xff;
+               SCpnt->cmnd[3] = (unsigned char) (block >> 16) & 0xff;
+               SCpnt->cmnd[4] = (unsigned char) (block >> 8) & 0xff;
+               SCpnt->cmnd[5] = (unsigned char) block & 0xff;
+               SCpnt->cmnd[6] = SCpnt->cmnd[9] = 0;
+               SCpnt->cmnd[7] = (unsigned char) (this_count >> 8) & 0xff;
+               SCpnt->cmnd[8] = (unsigned char) this_count & 0xff;
+       } else {
+               if (this_count > 0xff)
+                       this_count = 0xff;
+
+               SCpnt->cmnd[1] |= (unsigned char) ((block >> 16) & 0x1f);
+               SCpnt->cmnd[2] = (unsigned char) ((block >> 8) & 0xff);
+               SCpnt->cmnd[3] = (unsigned char) block & 0xff;
+               SCpnt->cmnd[4] = (unsigned char) this_count;
+               SCpnt->cmnd[5] = 0;
+       }
+
+       /*
+        * We shouldn't disconnect in the middle of a sector, so with a dumb
+        * host adapter, it's safe to assume that we can at least transfer
+        * this many bytes between each connect / disconnect.
+        */
+       SCpnt->transfersize = dpnt->device->sector_size;
+       SCpnt->underflow = this_count << 9;
+
+       SCpnt->allowed = MAX_RETRIES;
+       SCpnt->timeout_per_command = (SCpnt->device->type == TYPE_DISK ?
+                                     SD_TIMEOUT : SD_MOD_TIMEOUT);
+
+       /*
+        * This is the completion routine we use.  This is matched in terms
+        * of capability to this function.
+        */
+       SCpnt->done = rw_intr;
+
+       /*
+        * This indicates that the command is ready from our end to be
+        * queued.
+        */
+       return 1;
+}
+
+static int sd_open(struct inode *inode, struct file *filp)
+{
+       int target, retval = -ENXIO;
+       Scsi_Device * SDev;
+       target = DEVICE_NR(inode->i_rdev);
+
+       SCSI_LOG_HLQUEUE(1, printk("target=%d, max=%d\n", target, sd_template.dev_max));
+
+       if (target >= sd_template.dev_max || !rscsi_disks[target].device)
+               return -ENXIO;  /* No such device */
+
+       /*
+        * If the device is in error recovery, wait until it is done.
+        * If the device is offline, then disallow any access to it.
+        */
+       if (!scsi_block_when_processing_errors(rscsi_disks[target].device)) {
+               return -ENXIO;
+       }
+       /*
+        * Make sure that only one process can do a check_change_disk at one time.
+        * This is also used to lock out further access when the partition table
+        * is being re-read.
+        */
+
+       while (rscsi_disks[target].device->busy) {
+               barrier();
+               cpu_relax();
+       }
+       /*
+        * The following code can sleep.
+        * Module unloading must be prevented
+        */
+       SDev = rscsi_disks[target].device;
+       if (SDev->host->hostt->module)
+               __MOD_INC_USE_COUNT(SDev->host->hostt->module);
+       if (sd_template.module)
+               __MOD_INC_USE_COUNT(sd_template.module);
+       SDev->access_count++;
+
+       if (rscsi_disks[target].device->removable) {
+               SDev->allow_revalidate = 1;
+               check_disk_change(inode->i_rdev);
+               SDev->allow_revalidate = 0;
+
+               /*
+                * If the drive is empty, just let the open fail.
+                */
+               if ((!rscsi_disks[target].ready) && !(filp->f_flags & O_NDELAY)) {
+                       retval = -ENOMEDIUM;
+                       goto error_out;
+               }
+
+               /*
+                * Similarly, if the device has the write protect tab set,
+                * have the open fail if the user expects to be able to write
+                * to the thing.
+                */
+               if ((rscsi_disks[target].write_prot) && (filp->f_mode & 2)) {
+                       retval = -EROFS;
+                       goto error_out;
+               }
+       }
+       /*
+        * It is possible that the disk changing stuff resulted in the device
+        * being taken offline.  If this is the case, report this to the user,
+        * and don't pretend that
+        * the open actually succeeded.
+        */
+       if (!SDev->online) {
+               goto error_out;
+       }
+       /*
+        * See if we are requesting a non-existent partition.  Do this
+        * after checking for disk change.
+        */
+       if (sd_sizes[SD_PARTITION(inode->i_rdev)] == 0) {
+               goto error_out;
+       }
+
+       if (SDev->removable)
+               if (SDev->access_count==1)
+                       if (scsi_block_when_processing_errors(SDev))
+                               scsi_ioctl(SDev, SCSI_IOCTL_DOORLOCK, NULL);
+
+       
+       return 0;
+
+error_out:
+       SDev->access_count--;
+       if (SDev->host->hostt->module)
+               __MOD_DEC_USE_COUNT(SDev->host->hostt->module);
+       if (sd_template.module)
+               __MOD_DEC_USE_COUNT(sd_template.module);
+       return retval;  
+}
+
+static int sd_release(struct inode *inode, struct file *file)
+{
+       int target;
+       Scsi_Device * SDev;
+
+       target = DEVICE_NR(inode->i_rdev);
+       SDev = rscsi_disks[target].device;
+       if (!SDev)
+               return -ENODEV;
+
+       SDev->access_count--;
+
+       if (SDev->removable) {
+               if (!SDev->access_count)
+                       if (scsi_block_when_processing_errors(SDev))
+                               scsi_ioctl(SDev, SCSI_IOCTL_DOORUNLOCK, NULL);
+       }
+       if (SDev->host->hostt->module)
+               __MOD_DEC_USE_COUNT(SDev->host->hostt->module);
+       if (sd_template.module)
+               __MOD_DEC_USE_COUNT(sd_template.module);
+       return 0;
+}
+
+static struct block_device_operations sd_fops =
+{
+       owner:                  THIS_MODULE,
+       open:                   sd_open,
+       release:                sd_release,
+       ioctl:                  sd_ioctl,
+       check_media_change:     check_scsidisk_media_change,
+       revalidate:             fop_revalidate_scsidisk
+};
+
+/*
+ *    If we need more than one SCSI disk major (i.e. more than
+ *      16 SCSI disks), we'll have to kmalloc() more gendisks later.
+ */
+
+static struct gendisk sd_gendisk =
+{
+       major:          SCSI_DISK0_MAJOR,
+       major_name:     "sd",
+       minor_shift:    4,
+       max_p:          1 << 4,
+       fops:           &sd_fops,
+};
+
+#define SD_GENDISK(i)    sd_gendisks[(i) / SCSI_DISKS_PER_MAJOR]
+
+/*
+ * rw_intr is the interrupt routine for the device driver.
+ * It will be notified on the end of a SCSI read / write, and
+ * will take one of several actions based on success or failure.
+ */
+
+static void rw_intr(Scsi_Cmnd * SCpnt)
+{
+       int result = SCpnt->result;
+#if CONFIG_SCSI_LOGGING
+       char nbuff[6];
+#endif
+       int this_count = SCpnt->bufflen >> 9;
+       int good_sectors = (result == 0 ? this_count : 0);
+       int block_sectors = 1;
+       long error_sector;
+
+       SCSI_LOG_HLCOMPLETE(1, sd_devname(DEVICE_NR(SCpnt->request.rq_dev), nbuff));
+
+       SCSI_LOG_HLCOMPLETE(1, printk("%s : rw_intr(%d, %x [%x %x])\n", nbuff,
+                                     SCpnt->host->host_no,
+                                     result,
+                                     SCpnt->sense_buffer[0],
+                                     SCpnt->sense_buffer[2]));
+
+       /*
+          Handle MEDIUM ERRORs that indicate partial success.  Since this is a
+          relatively rare error condition, no care is taken to avoid
+          unnecessary additional work such as memcpy's that could be avoided.
+        */
+
+       /* An error occurred */
+       if (driver_byte(result) != 0 &&         /* An error occured */
+           SCpnt->sense_buffer[0] == 0xF0) {   /* Sense data is valid */
+               switch (SCpnt->sense_buffer[2]) {
+               case MEDIUM_ERROR:
+                       error_sector = (SCpnt->sense_buffer[3] << 24) |
+                       (SCpnt->sense_buffer[4] << 16) |
+                       (SCpnt->sense_buffer[5] << 8) |
+                       SCpnt->sense_buffer[6];
+                       if (SCpnt->request.bh != NULL)
+                               block_sectors = SCpnt->request.bh->b_size >> 9;
+                       switch (SCpnt->device->sector_size) {
+                       case 1024:
+                               error_sector <<= 1;
+                               if (block_sectors < 2)
+                                       block_sectors = 2;
+                               break;
+                       case 2048:
+                               error_sector <<= 2;
+                               if (block_sectors < 4)
+                                       block_sectors = 4;
+                               break;
+                       case 4096:
+                               error_sector <<=3;
+                               if (block_sectors < 8)
+                                       block_sectors = 8;
+                               break;
+                       case 256:
+                               error_sector >>= 1;
+                               break;
+                       default:
+                               break;
+                       }
+                       error_sector -= sd_gendisks[SD_MAJOR_IDX(
+                               SCpnt->request.rq_dev)].part[MINOR(
+                               SCpnt->request.rq_dev)].start_sect;
+                       error_sector &= ~(block_sectors - 1);
+                       good_sectors = error_sector - SCpnt->request.sector;
+                       if (good_sectors < 0 || good_sectors >= this_count)
+                               good_sectors = 0;
+                       break;
+
+               case RECOVERED_ERROR:
+                       /*
+                        * An error occured, but it recovered.  Inform the
+                        * user, but make sure that it's not treated as a
+                        * hard error.
+                        */
+                       print_sense("sd", SCpnt);
+                       result = 0;
+                       SCpnt->sense_buffer[0] = 0x0;
+                       good_sectors = this_count;
+                       break;
+
+               case ILLEGAL_REQUEST:
+                       if (SCpnt->device->ten == 1) {
+                               if (SCpnt->cmnd[0] == READ_10 ||
+                                   SCpnt->cmnd[0] == WRITE_10)
+                                       SCpnt->device->ten = 0;
+                       }
+                       break;
+
+               default:
+                       break;
+               }
+       }
+       /*
+        * This calls the generic completion function, now that we know
+        * how many actual sectors finished, and how many sectors we need
+        * to say have failed.
+        */
+       scsi_io_completion(SCpnt, good_sectors, block_sectors);
+}
+/*
+ * requeue_sd_request() is the request handler function for the sd driver.
+ * Its function in life is to take block device requests, and translate
+ * them to SCSI commands.
+ */
+
+
+static int check_scsidisk_media_change(kdev_t full_dev)
+{
+       int retval;
+       int target;
+       int flag = 0;
+       Scsi_Device * SDev;
+
+       target = DEVICE_NR(full_dev);
+       SDev = rscsi_disks[target].device;
+
+       if (target >= sd_template.dev_max || !SDev) {
+               printk("SCSI disk request error: invalid device.\n");
+               return 0;
+       }
+       if (!SDev->removable)
+               return 0;
+
+       /*
+        * If the device is offline, don't send any commands - just pretend as
+        * if the command failed.  If the device ever comes back online, we
+        * can deal with it then.  It is only because of unrecoverable errors
+        * that we would ever take a device offline in the first place.
+        */
+       if (SDev->online == FALSE) {
+               rscsi_disks[target].ready = 0;
+               SDev->changed = 1;
+               return 1;       /* This will force a flush, if called from
+                                * check_disk_change */
+       }
+
+       /* Using Start/Stop enables differentiation between drive with
+        * no cartridge loaded - NOT READY, drive with changed cartridge -
+        * UNIT ATTENTION, or with same cartridge - GOOD STATUS.
+        * This also handles drives that auto spin down. eg iomega jaz 1GB
+        * as this will spin up the drive.
+        */
+       retval = -ENODEV;
+       if (scsi_block_when_processing_errors(SDev))
+               retval = scsi_ioctl(SDev, SCSI_IOCTL_START_UNIT, NULL);
+
+       if (retval) {           /* Unable to test, unit probably not ready.
+                                * This usually means there is no disc in the
+                                * drive.  Mark as changed, and we will figure
+                                * it out later once the drive is available
+                                * again.  */
+
+               rscsi_disks[target].ready = 0;
+               SDev->changed = 1;
+               return 1;       /* This will force a flush, if called from
+                                * check_disk_change */
+       }
+       /*
+        * for removable scsi disk ( FLOPTICAL ) we have to recognise the
+        * presence of disk in the drive. This is kept in the Scsi_Disk
+        * struct and tested at open !  Daniel Roche ( dan@lectra.fr )
+        */
+
+       rscsi_disks[target].ready = 1;  /* FLOPTICAL */
+
+       retval = SDev->changed;
+       if (!flag)
+               SDev->changed = 0;
+       return retval;
+}
+
+static int sd_init_onedisk(int i)
+{
+       unsigned char cmd[10];
+       char nbuff[6];
+       unsigned char *buffer;
+       unsigned long spintime_value = 0;
+       int the_result, retries, spintime;
+       int sector_size;
+       Scsi_Request *SRpnt;
+
+       /*
+        * Get the name of the disk, in case we need to log it somewhere.
+        */
+       sd_devname(i, nbuff);
+
+       /*
+        * If the device is offline, don't try and read capacity or any
+        * of the other niceties.
+        */
+       if (rscsi_disks[i].device->online == FALSE)
+               return i;
+
+       /*
+        * We need to retry the READ_CAPACITY because a UNIT_ATTENTION is
+        * considered a fatal error, and many devices report such an error
+        * just after a scsi bus reset.
+        */
+
+       SRpnt = scsi_allocate_request(rscsi_disks[i].device);
+       if (!SRpnt) {
+               printk(KERN_WARNING "(sd_init_onedisk:) Request allocation failure.\n");
+               return i;
+       }
+
+       buffer = (unsigned char *) scsi_malloc(512);
+       if (!buffer) {
+               printk(KERN_WARNING "(sd_init_onedisk:) Memory allocation failure.\n");
+               scsi_release_request(SRpnt);
+               return i;
+       }
+
+       spintime = 0;
+
+       /* Spin up drives, as required.  Only do this at boot time */
+       /* Spinup needs to be done for module loads too. */
+       do {
+               retries = 0;
+
+               while (retries < 3) {
+                       cmd[0] = TEST_UNIT_READY;
+                       cmd[1] = (rscsi_disks[i].device->scsi_level <= SCSI_2) ?
+                                ((rscsi_disks[i].device->lun << 5) & 0xe0) : 0;
+                       memset((void *) &cmd[2], 0, 8);
+                       SRpnt->sr_cmd_len = 0;
+                       SRpnt->sr_sense_buffer[0] = 0;
+                       SRpnt->sr_sense_buffer[2] = 0;
+                       SRpnt->sr_data_direction = SCSI_DATA_NONE;
+
+                       scsi_wait_req (SRpnt, (void *) cmd, (void *) buffer,
+                               0/*512*/, SD_TIMEOUT, MAX_RETRIES);
+
+                       the_result = SRpnt->sr_result;
+                       retries++;
+                       if (the_result == 0
+                           || SRpnt->sr_sense_buffer[2] != UNIT_ATTENTION)
+                               break;
+               }
+
+               /*
+                * If the drive has indicated to us that it doesn't have
+                * any media in it, don't bother with any of the rest of
+                * this crap.
+                */
+               if( the_result != 0
+                   && ((driver_byte(the_result) & DRIVER_SENSE) != 0)
+                   && SRpnt->sr_sense_buffer[2] == UNIT_ATTENTION
+                   && SRpnt->sr_sense_buffer[12] == 0x3A ) {
+                       rscsi_disks[i].capacity = 0x1fffff;
+                       sector_size = 512;
+                       rscsi_disks[i].device->changed = 1;
+                       rscsi_disks[i].ready = 0;
+                       break;
+               }
+
+               /* Look for non-removable devices that return NOT_READY.
+                * Issue command to spin up drive for these cases. */
+               if (the_result && !rscsi_disks[i].device->removable &&
+                   SRpnt->sr_sense_buffer[2] == NOT_READY) {
+                       unsigned long time1;
+                       if (!spintime) {
+                               printk("%s: Spinning up disk...", nbuff);
+                               cmd[0] = START_STOP;
+                               cmd[1] = (rscsi_disks[i].device->scsi_level <= SCSI_2) ?
+                                        ((rscsi_disks[i].device->lun << 5) & 0xe0) : 0;
+                               cmd[1] |= 1;    /* Return immediately */
+                               memset((void *) &cmd[2], 0, 8);
+                               cmd[4] = 1;     /* Start spin cycle */
+                               SRpnt->sr_cmd_len = 0;
+                               SRpnt->sr_sense_buffer[0] = 0;
+                               SRpnt->sr_sense_buffer[2] = 0;
+
+                               SRpnt->sr_data_direction = SCSI_DATA_READ;
+                               scsi_wait_req(SRpnt, (void *) cmd, (void *) buffer,
+                                           0/*512*/, SD_TIMEOUT, MAX_RETRIES);
+                               spintime_value = jiffies;
+                       }
+                       spintime = 1;
+                       time1 = HZ;
+                       /* Wait 1 second for next try */
+                       do {
+                               current->state = TASK_UNINTERRUPTIBLE;
+                               time1 = schedule_timeout(time1);
+                       } while(time1);
+                       printk(".");
+               }
+       } while (the_result && spintime &&
+                time_after(spintime_value + 100 * HZ, jiffies));
+       if (spintime) {
+               if (the_result)
+                       printk("not responding...\n");
+               else
+                       printk("ready\n");
+       }
+       retries = 3;
+       do {
+               cmd[0] = READ_CAPACITY;
+               cmd[1] = (rscsi_disks[i].device->scsi_level <= SCSI_2) ?
+                        ((rscsi_disks[i].device->lun << 5) & 0xe0) : 0;
+               memset((void *) &cmd[2], 0, 8);
+               memset((void *) buffer, 0, 8);
+               SRpnt->sr_cmd_len = 0;
+               SRpnt->sr_sense_buffer[0] = 0;
+               SRpnt->sr_sense_buffer[2] = 0;
+
+               SRpnt->sr_data_direction = SCSI_DATA_READ;
+               scsi_wait_req(SRpnt, (void *) cmd, (void *) buffer,
+                           8, SD_TIMEOUT, MAX_RETRIES);
+
+               the_result = SRpnt->sr_result;
+               retries--;
+
+       } while (the_result && retries);
+
+       /*
+        * The SCSI standard says:
+        * "READ CAPACITY is necessary for self configuring software"
+        *  While not mandatory, support of READ CAPACITY is strongly
+        *  encouraged.
+        *  We used to die if we couldn't successfully do a READ CAPACITY.
+        *  But, now we go on about our way.  The side effects of this are
+        *
+        *  1. We can't know block size with certainty. I have said
+        *     "512 bytes is it" as this is most common.
+        *
+        *  2. Recovery from when someone attempts to read past the
+        *     end of the raw device will be slower.
+        */
+
+       if (the_result) {
+               printk("%s : READ CAPACITY failed.\n"
+                      "%s : status = %x, message = %02x, host = %d, driver = %02x \n",
+                      nbuff, nbuff,
+                      status_byte(the_result),
+                      msg_byte(the_result),
+                      host_byte(the_result),
+                      driver_byte(the_result)
+                   );
+               if (driver_byte(the_result) & DRIVER_SENSE)
+                       print_req_sense("sd", SRpnt);
+               else
+                       printk("%s : sense not available. \n", nbuff);
+
+               printk("%s : block size assumed to be 512 bytes, disk size 1GB.  \n",
+                      nbuff);
+               rscsi_disks[i].capacity = 0x1fffff;
+               sector_size = 512;
+
+               /* Set dirty bit for removable devices if not ready -
+                * sometimes drives will not report this properly. */
+               if (rscsi_disks[i].device->removable &&
+                   SRpnt->sr_sense_buffer[2] == NOT_READY)
+                       rscsi_disks[i].device->changed = 1;
+
+       } else {
+               /*
+                * FLOPTICAL, if read_capa is ok, drive is assumed to be ready
+                */
+               rscsi_disks[i].ready = 1;
+
+               rscsi_disks[i].capacity = 1 + ((buffer[0] << 24) |
+                                              (buffer[1] << 16) |
+                                              (buffer[2] << 8) |
+                                              buffer[3]);
+
+               sector_size = (buffer[4] << 24) |
+                   (buffer[5] << 16) | (buffer[6] << 8) | buffer[7];
+
+               if (sector_size == 0) {
+                       sector_size = 512;
+                       printk("%s : sector size 0 reported, assuming 512.\n",
+                              nbuff);
+               }
+               if (sector_size != 512 &&
+                   sector_size != 1024 &&
+                   sector_size != 2048 &&
+                   sector_size != 4096 &&
+                   sector_size != 256) {
+                       printk("%s : unsupported sector size %d.\n",
+                              nbuff, sector_size);
+                       /*
+                        * The user might want to re-format the drive with
+                        * a supported sectorsize.  Once this happens, it
+                        * would be relatively trivial to set the thing up.
+                        * For this reason, we leave the thing in the table.
+                        */
+                       rscsi_disks[i].capacity = 0;
+               }
+               if (sector_size > 1024) {
+                       int m;
+
+                       /*
+                        * We must fix the sd_blocksizes and sd_hardsizes
+                        * to allow us to read the partition tables.
+                        * The disk reading code does not allow for reading
+                        * of partial sectors.
+                        */
+                       for (m = i << 4; m < ((i + 1) << 4); m++) {
+                               sd_blocksizes[m] = sector_size;
+                       }
+               } {
+                       /*
+                        * The msdos fs needs to know the hardware sector size
+                        * So I have created this table. See ll_rw_blk.c
+                        * Jacques Gelinas (Jacques@solucorp.qc.ca)
+                        */
+                       int m;
+                       int hard_sector = sector_size;
+                       int sz = rscsi_disks[i].capacity * (hard_sector/256);
+
+                       /* There are 16 minors allocated for each major device */
+                       for (m = i << 4; m < ((i + 1) << 4); m++) {
+                               sd_hardsizes[m] = hard_sector;
+                       }
+
+                       printk("SCSI device %s: "
+                              "%d %d-byte hdwr sectors (%d MB)\n",
+                              nbuff, rscsi_disks[i].capacity,
+                              hard_sector, (sz/2 - sz/1250 + 974)/1950);
+               }
+
+               /* Rescale capacity to 512-byte units */
+               if (sector_size == 4096)
+                       rscsi_disks[i].capacity <<= 3;
+               if (sector_size == 2048)
+                       rscsi_disks[i].capacity <<= 2;
+               if (sector_size == 1024)
+                       rscsi_disks[i].capacity <<= 1;
+               if (sector_size == 256)
+                       rscsi_disks[i].capacity >>= 1;
+       }
+
+
+       /*
+        * Unless otherwise specified, this is not write protected.
+        */
+       rscsi_disks[i].write_prot = 0;
+       if (rscsi_disks[i].device->removable && rscsi_disks[i].ready) {
+               /* FLOPTICAL */
+
+               /*
+                * For removable scsi disk ( FLOPTICAL ) we have to recognise
+                * the Write Protect Flag. This flag is kept in the Scsi_Disk
+                * struct and tested at open !
+                * Daniel Roche ( dan@lectra.fr )
+                *
+                * Changed to get all pages (0x3f) rather than page 1 to
+                * get around devices which do not have a page 1.  Since
+                * we're only interested in the header anyway, this should
+                * be fine.
+                *   -- Matthew Dharm (mdharm-scsi@one-eyed-alien.net)
+                */
+
+               memset((void *) &cmd[0], 0, 8);
+               cmd[0] = MODE_SENSE;
+               cmd[1] = (rscsi_disks[i].device->scsi_level <= SCSI_2) ?
+                        ((rscsi_disks[i].device->lun << 5) & 0xe0) : 0;
+               cmd[2] = 0x3f;  /* Get all pages */
+               cmd[4] = 255;   /* Ask for 255 bytes, even tho we want just the first 8 */
+               SRpnt->sr_cmd_len = 0;
+               SRpnt->sr_sense_buffer[0] = 0;
+               SRpnt->sr_sense_buffer[2] = 0;
+
+               /* same code as READCAPA !! */
+               SRpnt->sr_data_direction = SCSI_DATA_READ;
+               scsi_wait_req(SRpnt, (void *) cmd, (void *) buffer,
+                           512, SD_TIMEOUT, MAX_RETRIES);
+
+               the_result = SRpnt->sr_result;
+
+               if (the_result) {
+                       printk("%s: test WP failed, assume Write Enabled\n", nbuff);
+               } else {
+                       rscsi_disks[i].write_prot = ((buffer[2] & 0x80) != 0);
+                       printk("%s: Write Protect is %s\n", nbuff,
+                              rscsi_disks[i].write_prot ? "on" : "off");
+               }
+
+       }                       /* check for write protect */
+       SRpnt->sr_device->ten = 1;
+       SRpnt->sr_device->remap = 1;
+       SRpnt->sr_device->sector_size = sector_size;
+       /* Wake up a process waiting for device */
+       scsi_release_request(SRpnt);
+       SRpnt = NULL;
+
+       scsi_free(buffer, 512);
+       return i;
+}
+
+/*
+ * The sd_init() function looks at all SCSI drives present, determines
+ * their size, and reads partition table entries for them.
+ */
+
+static int sd_registered;
+
+static int sd_init()
+{
+       int i;
+
+       if (sd_template.dev_noticed == 0)
+               return 0;
+
+       if (!rscsi_disks)
+               sd_template.dev_max = sd_template.dev_noticed + SD_EXTRA_DEVS;
+
+       if (sd_template.dev_max > N_SD_MAJORS * SCSI_DISKS_PER_MAJOR)
+               sd_template.dev_max = N_SD_MAJORS * SCSI_DISKS_PER_MAJOR;
+
+       if (!sd_registered) {
+               for (i = 0; i < N_USED_SD_MAJORS; i++) {
+                       if (devfs_register_blkdev(SD_MAJOR(i), "sd", &sd_fops)) {
+                               printk("Unable to get major %d for SCSI disk\n", SD_MAJOR(i));
+                               sd_template.dev_noticed = 0;
+                               return 1;
+                       }
+               }
+               sd_registered++;
+       }
+       /* We do not support attaching loadable devices yet. */
+       if (rscsi_disks)
+               return 0;
+
+       rscsi_disks = kmalloc(sd_template.dev_max * sizeof(Scsi_Disk), GFP_ATOMIC);
+       if (!rscsi_disks)
+               goto cleanup_devfs;
+       memset(rscsi_disks, 0, sd_template.dev_max * sizeof(Scsi_Disk));
+
+       /* for every (necessary) major: */
+       sd_sizes = kmalloc((sd_template.dev_max << 4) * sizeof(int), GFP_ATOMIC);
+       if (!sd_sizes)
+               goto cleanup_disks;
+       memset(sd_sizes, 0, (sd_template.dev_max << 4) * sizeof(int));
+
+       sd_blocksizes = kmalloc((sd_template.dev_max << 4) * sizeof(int), GFP_ATOMIC);
+       if (!sd_blocksizes)
+               goto cleanup_sizes;
+       
+       sd_hardsizes = kmalloc((sd_template.dev_max << 4) * sizeof(int), GFP_ATOMIC);
+       if (!sd_hardsizes)
+               goto cleanup_blocksizes;
+
+       sd_max_sectors = kmalloc((sd_template.dev_max << 4) * sizeof(int), GFP_ATOMIC);
+       if (!sd_max_sectors)
+               goto cleanup_max_sectors;
+
+       for (i = 0; i < sd_template.dev_max << 4; i++) {
+               sd_blocksizes[i] = 1024;
+               sd_hardsizes[i] = 512;
+               /*
+                * Allow lowlevel device drivers to generate 512k large scsi
+                * commands if they know what they're doing and they ask for it
+                * explicitly via the SHpnt->max_sectors API.
+                */
+               sd_max_sectors[i] = MAX_SEGMENTS*8;
+       }
+
+       for (i = 0; i < N_USED_SD_MAJORS; i++) {
+               blksize_size[SD_MAJOR(i)] = sd_blocksizes + i * (SCSI_DISKS_PER_MAJOR << 4);
+               hardsect_size[SD_MAJOR(i)] = sd_hardsizes + i * (SCSI_DISKS_PER_MAJOR << 4);
+               max_sectors[SD_MAJOR(i)] = sd_max_sectors + i * (SCSI_DISKS_PER_MAJOR << 4);
+       }
+
+       sd_gendisks = kmalloc(N_USED_SD_MAJORS * sizeof(struct gendisk), GFP_ATOMIC);
+       if (!sd_gendisks)
+               goto cleanup_sd_gendisks;
+       for (i = 0; i < N_USED_SD_MAJORS; i++) {
+               sd_gendisks[i] = sd_gendisk;    /* memcpy */
+               sd_gendisks[i].de_arr = kmalloc (SCSI_DISKS_PER_MAJOR * sizeof *sd_gendisks[i].de_arr,
+                                                 GFP_ATOMIC);
+               if (!sd_gendisks[i].de_arr)
+                       goto cleanup_gendisks_de_arr;
+                memset (sd_gendisks[i].de_arr, 0,
+                        SCSI_DISKS_PER_MAJOR * sizeof *sd_gendisks[i].de_arr);
+               sd_gendisks[i].flags = kmalloc (SCSI_DISKS_PER_MAJOR * sizeof *sd_gendisks[i].flags,
+                                                GFP_ATOMIC);
+               if (!sd_gendisks[i].flags)
+                       goto cleanup_gendisks_flags;
+                memset (sd_gendisks[i].flags, 0,
+                        SCSI_DISKS_PER_MAJOR * sizeof *sd_gendisks[i].flags);
+               sd_gendisks[i].major = SD_MAJOR(i);
+               sd_gendisks[i].major_name = "sd";
+               sd_gendisks[i].minor_shift = 4;
+               sd_gendisks[i].max_p = 1 << 4;
+               sd_gendisks[i].part = kmalloc((SCSI_DISKS_PER_MAJOR << 4) * sizeof(struct hd_struct),
+                                               GFP_ATOMIC);
+               if (!sd_gendisks[i].part)
+                       goto cleanup_gendisks_part;
+               memset(sd_gendisks[i].part, 0, (SCSI_DISKS_PER_MAJOR << 4) * sizeof(struct hd_struct));
+               sd_gendisks[i].sizes = sd_sizes + (i * SCSI_DISKS_PER_MAJOR << 4);
+               sd_gendisks[i].nr_real = 0;
+               sd_gendisks[i].real_devices =
+                   (void *) (rscsi_disks + i * SCSI_DISKS_PER_MAJOR);
+       }
+
+       return 0;
+
+cleanup_gendisks_part:
+       kfree(sd_gendisks[i].flags);
+cleanup_gendisks_flags:
+       kfree(sd_gendisks[i].de_arr);
+cleanup_gendisks_de_arr:
+       while (--i >= 0 ) {
+               kfree(sd_gendisks[i].de_arr);
+               kfree(sd_gendisks[i].flags);
+               kfree(sd_gendisks[i].part);
+       }
+       kfree(sd_gendisks);
+       sd_gendisks = NULL;
+cleanup_sd_gendisks:
+       kfree(sd_max_sectors);
+cleanup_max_sectors:
+       kfree(sd_hardsizes);
+cleanup_blocksizes:
+       kfree(sd_blocksizes);
+cleanup_sizes:
+       kfree(sd_sizes);
+cleanup_disks:
+       kfree(rscsi_disks);
+       rscsi_disks = NULL;
+cleanup_devfs:
+       for (i = 0; i < N_USED_SD_MAJORS; i++) {
+               devfs_unregister_blkdev(SD_MAJOR(i), "sd");
+       }
+       sd_registered--;
+       sd_template.dev_noticed = 0;
+       return 1;
+}
+
+
+static void sd_finish()
+{
+       int i;
+
+       for (i = 0; i < N_USED_SD_MAJORS; i++) {
+               blk_dev[SD_MAJOR(i)].queue = sd_find_queue;
+               add_gendisk(&sd_gendisks[i]);
+       }
+
+       for (i = 0; i < sd_template.dev_max; ++i)
+               if (!rscsi_disks[i].capacity && rscsi_disks[i].device) {
+                       sd_init_onedisk(i);
+                       if (!rscsi_disks[i].has_part_table) {
+                               sd_sizes[i << 4] = rscsi_disks[i].capacity;
+                               register_disk(&SD_GENDISK(i), MKDEV_SD(i),
+                                               1<<4, &sd_fops,
+                                               rscsi_disks[i].capacity);
+                               rscsi_disks[i].has_part_table = 1;
+                       }
+               }
+       /* If our host adapter is capable of scatter-gather, then we increase
+        * the read-ahead to 60 blocks (120 sectors).  If not, we use
+        * a two block (4 sector) read ahead. We can only respect this with the
+        * granularity of every 16 disks (one device major).
+        */
+       for (i = 0; i < N_USED_SD_MAJORS; i++) {
+               read_ahead[SD_MAJOR(i)] =
+                   (rscsi_disks[i * SCSI_DISKS_PER_MAJOR].device
+                    && rscsi_disks[i * SCSI_DISKS_PER_MAJOR].device->host->sg_tablesize)
+                   ? 120       /* 120 sector read-ahead */
+                   : 4;        /* 4 sector read-ahead */
+       }
+
+       return;
+}
+
+static int sd_detect(Scsi_Device * SDp)
+{
+       if (SDp->type != TYPE_DISK && SDp->type != TYPE_MOD)
+               return 0;
+       sd_template.dev_noticed++;
+       return 1;
+}
+
+static int sd_attach(Scsi_Device * SDp)
+{
+        unsigned int devnum;
+       Scsi_Disk *dpnt;
+       int i;
+       char nbuff[6];
+
+       if (SDp->type != TYPE_DISK && SDp->type != TYPE_MOD)
+               return 0;
+
+       if (sd_template.nr_dev >= sd_template.dev_max || rscsi_disks == NULL) {
+               SDp->attached--;
+               return 1;
+       }
+       for (dpnt = rscsi_disks, i = 0; i < sd_template.dev_max; i++, dpnt++)
+               if (!dpnt->device)
+                       break;
+
+       if (i >= sd_template.dev_max) {
+               printk(KERN_WARNING "scsi_devices corrupt (sd),"
+                   " nr_dev %d dev_max %d\n",
+                   sd_template.nr_dev, sd_template.dev_max);
+               SDp->attached--;
+               return 1;
+       }
+
+       rscsi_disks[i].device = SDp;
+       rscsi_disks[i].has_part_table = 0;
+       sd_template.nr_dev++;
+       SD_GENDISK(i).nr_real++;
+        devnum = i % SCSI_DISKS_PER_MAJOR;
+        SD_GENDISK(i).de_arr[devnum] = SDp->de;
+        if (SDp->removable)
+               SD_GENDISK(i).flags[devnum] |= GENHD_FL_REMOVABLE;
+       sd_devname(i, nbuff);
+       printk("Attached scsi %sdisk %s at scsi%d, channel %d, id %d, lun %d\n",
+              SDp->removable ? "removable " : "",
+              nbuff, SDp->host->host_no, SDp->channel, SDp->id, SDp->lun);
+       return 0;
+}
+
+#define DEVICE_BUSY rscsi_disks[target].device->busy
+#define ALLOW_REVALIDATE rscsi_disks[target].device->allow_revalidate
+#define USAGE rscsi_disks[target].device->access_count
+#define CAPACITY rscsi_disks[target].capacity
+#define MAYBE_REINIT  sd_init_onedisk(target)
+
+/* This routine is called to flush all partitions and partition tables
+ * for a changed scsi disk, and then re-read the new partition table.
+ * If we are revalidating a disk because of a media change, then we
+ * enter with usage == 0.  If we are using an ioctl, we automatically have
+ * usage == 1 (we need an open channel to use an ioctl :-), so this
+ * is our limit.
+ */
+int revalidate_scsidisk(kdev_t dev, int maxusage)
+{
+       struct gendisk *sdgd;
+       int target;
+       int max_p;
+       int start;
+       int i;
+
+       target = DEVICE_NR(dev);
+
+       if (DEVICE_BUSY || (ALLOW_REVALIDATE == 0 && USAGE > maxusage)) {
+               printk("Device busy for revalidation (usage=%d)\n", USAGE);
+               return -EBUSY;
+       }
+       DEVICE_BUSY = 1;
+
+       sdgd = &SD_GENDISK(target);
+       max_p = sd_gendisk.max_p;
+       start = target << sd_gendisk.minor_shift;
+
+       for (i = max_p - 1; i >= 0; i--) {
+               int index = start + i;
+               invalidate_device(MKDEV_SD_PARTITION(index), 1);
+               sdgd->part[SD_MINOR_NUMBER(index)].start_sect = 0;
+               sdgd->part[SD_MINOR_NUMBER(index)].nr_sects = 0;
+               /*
+                * Reset the blocksize for everything so that we can read
+                * the partition table.  Technically we will determine the
+                * correct block size when we revalidate, but we do this just
+                * to make sure that everything remains consistent.
+                */
+               sd_blocksizes[index] = 1024;
+               if (rscsi_disks[target].device->sector_size == 2048)
+                       sd_blocksizes[index] = 2048;
+               else
+                       sd_blocksizes[index] = 1024;
+       }
+
+#ifdef MAYBE_REINIT
+       MAYBE_REINIT;
+#endif
+
+       grok_partitions(&SD_GENDISK(target), target % SCSI_DISKS_PER_MAJOR,
+                       1<<4, CAPACITY);
+
+       DEVICE_BUSY = 0;
+       return 0;
+}
+
+static int fop_revalidate_scsidisk(kdev_t dev)
+{
+       return revalidate_scsidisk(dev, 0);
+}
+static void sd_detach(Scsi_Device * SDp)
+{
+       Scsi_Disk *dpnt;
+       struct gendisk *sdgd;
+       int i, j;
+       int max_p;
+       int start;
+
+       if (rscsi_disks == NULL)
+               return;
+
+       for (dpnt = rscsi_disks, i = 0; i < sd_template.dev_max; i++, dpnt++)
+               if (dpnt->device == SDp) {
+
+                       /* If we are disconnecting a disk driver, sync and invalidate
+                        * everything */
+                       sdgd = &SD_GENDISK(i);
+                       max_p = sd_gendisk.max_p;
+                       start = i << sd_gendisk.minor_shift;
+
+                       for (j = max_p - 1; j >= 0; j--) {
+                               int index = start + j;
+                               invalidate_device(MKDEV_SD_PARTITION(index), 1);
+                               sdgd->part[SD_MINOR_NUMBER(index)].start_sect = 0;
+                               sdgd->part[SD_MINOR_NUMBER(index)].nr_sects = 0;
+                               sd_sizes[index] = 0;
+                       }
+                        devfs_register_partitions (sdgd,
+                                                   SD_MINOR_NUMBER (start), 1);
+                       /* unregister_disk() */
+                       dpnt->has_part_table = 0;
+                       dpnt->device = NULL;
+                       dpnt->capacity = 0;
+                       SDp->attached--;
+                       sd_template.dev_noticed--;
+                       sd_template.nr_dev--;
+                       SD_GENDISK(i).nr_real--;
+                       return;
+               }
+       return;
+}
+
+static int __init init_sd(void)
+{
+       sd_template.module = THIS_MODULE;
+       return scsi_register_module(MODULE_SCSI_DEV, &sd_template);
+}
+
+static void __exit exit_sd(void)
+{
+       int i;
+
+       scsi_unregister_module(MODULE_SCSI_DEV, &sd_template);
+
+       for (i = 0; i < N_USED_SD_MAJORS; i++)
+               devfs_unregister_blkdev(SD_MAJOR(i), "sd");
+
+       sd_registered--;
+       if (rscsi_disks != NULL) {
+               kfree(rscsi_disks);
+               kfree(sd_sizes);
+               kfree(sd_blocksizes);
+               kfree(sd_hardsizes);
+               for (i = 0; i < N_USED_SD_MAJORS; i++) {
+#if 0 /* XXX aren't we forgetting to deallocate something? */
+                       kfree(sd_gendisks[i].de_arr);
+                       kfree(sd_gendisks[i].flags);
+#endif
+                       kfree(sd_gendisks[i].part);
+               }
+       }
+       for (i = 0; i < N_USED_SD_MAJORS; i++) {
+               del_gendisk(&sd_gendisks[i]);
+               blk_size[SD_MAJOR(i)] = NULL;   /* XXX blksize_size actually? */
+               hardsect_size[SD_MAJOR(i)] = NULL;
+               read_ahead[SD_MAJOR(i)] = 0;
+       }
+       sd_template.dev_max = 0;
+       if (sd_gendisks != NULL)    /* kfree tests for 0, but leave explicit */
+               kfree(sd_gendisks);
+}
+
+module_init(init_sd);
+module_exit(exit_sd);
+MODULE_LICENSE("GPL");
diff --git a/xen-2.4.16/drivers/scsi/sd.h b/xen-2.4.16/drivers/scsi/sd.h
new file mode 100644 (file)
index 0000000..8e29445
--- /dev/null
@@ -0,0 +1,66 @@
+/*
+ *    sd.h Copyright (C) 1992 Drew Eckhardt 
+ *      SCSI disk driver header file by
+ *              Drew Eckhardt 
+ *
+ *      <drew@colorado.edu>
+ *
+ *       Modified by Eric Youngdale eric@andante.org to
+ *       add scatter-gather, multiple outstanding request, and other
+ *       enhancements.
+ */
+#ifndef _SD_H
+#define _SD_H
+/*
+   $Header: /usr/src/linux/kernel/blk_drv/scsi/RCS/sd.h,v 1.1 1992/07/24 06:27:38 root Exp root $
+ */
+
+#ifndef _SCSI_H
+#include "scsi.h"
+#endif
+
+#ifndef _GENDISK_H
+#include <xeno/genhd.h>
+#endif
+
+typedef struct scsi_disk {
+       unsigned capacity;      /* size in blocks */
+       Scsi_Device *device;
+       unsigned char ready;    /* flag ready for FLOPTICAL */
+       unsigned char write_prot;       /* flag write_protect for rmvable dev */
+       unsigned char sector_bit_size;  /* sector_size = 2 to the  bit size power */
+       unsigned char sector_bit_shift;         /* power of 2 sectors per FS block */
+       unsigned has_part_table:1;      /* has partition table */
+} Scsi_Disk;
+
+extern int revalidate_scsidisk(kdev_t dev, int maxusage);
+
+/*
+ * Used by pmac to find the device associated with a target.
+ */
+extern kdev_t sd_find_target(void *host, int tgt);
+
+#define N_SD_MAJORS    8
+
+#define SD_MAJOR_MASK  (N_SD_MAJORS - 1)
+
+#endif
+
+/*
+ * Overrides for Emacs so that we follow Linus's tabbing style.
+ * Emacs will notice this stuff at the end of the file and automatically
+ * adjust the settings for this buffer only.  This must remain at the end
+ * of the file.
+ * ---------------------------------------------------------------------------
+ * Local variables:
+ * c-indent-level: 4
+ * c-brace-imaginary-offset: 0
+ * c-brace-offset: -4
+ * c-argdecl-indent: 4
+ * c-label-offset: -4
+ * c-continued-statement-offset: 4
+ * c-continued-brace-offset: 0
+ * indent-tabs-mode: nil
+ * tab-width: 8
+ * End:
+ */
diff --git a/xen-2.4.16/include/asm-i386/apic.h b/xen-2.4.16/include/asm-i386/apic.h
new file mode 100644 (file)
index 0000000..86dd0fb
--- /dev/null
@@ -0,0 +1,74 @@
+#ifndef __ASM_APIC_H
+#define __ASM_APIC_H
+
+#include <asm/system.h>
+#include <asm/ptrace.h>
+#include <asm/apicdef.h>
+
+#define APIC_DEBUG 0
+
+#if APIC_DEBUG
+#define Dprintk(x...) printk(x)
+#else
+#define Dprintk(x...)
+#endif
+
+/*
+ * Basic functions accessing APICs.
+ */
+
+static __inline void apic_write(unsigned long reg, unsigned long v)
+{
+       *((volatile unsigned long *)(APIC_BASE+reg)) = v;
+}
+
+static __inline void apic_write_atomic(unsigned long reg, unsigned long v)
+{
+       xchg((volatile unsigned long *)(APIC_BASE+reg), v);
+}
+
+static __inline unsigned long apic_read(unsigned long reg)
+{
+       return *((volatile unsigned long *)(APIC_BASE+reg));
+}
+
+static __inline__ void apic_wait_icr_idle(void)
+{
+       do { } while ( apic_read( APIC_ICR ) & APIC_ICR_BUSY );
+}
+
+#define FORCE_READ_AROUND_WRITE 0
+#define apic_read_around(x)
+#define apic_write_around(x,y) apic_write((x),(y))
+
+static inline void ack_APIC_irq(void)
+{
+       /*
+        * ack_APIC_irq() actually gets compiled as a single instruction:
+        * - a single rmw on Pentium/82489DX
+        * - a single write on P6+ cores (CONFIG_X86_GOOD_APIC)
+        * ... yummie.
+        */
+
+       /* Docs say use 0 for future compatibility */
+       apic_write_around(APIC_EOI, 0);
+}
+
+extern int get_maxlvt(void);
+extern void clear_local_APIC(void);
+extern void connect_bsp_APIC (void);
+extern void disconnect_bsp_APIC (void);
+extern void disable_local_APIC (void);
+extern int verify_local_APIC (void);
+extern void cache_APIC_registers (void);
+extern void sync_Arb_IDs (void);
+extern void init_bsp_APIC (void);
+extern void setup_local_APIC (void);
+extern void init_apic_mappings (void);
+extern void smp_local_timer_interrupt (struct pt_regs * regs);
+extern void setup_APIC_clocks (void);
+extern int APIC_init_uniprocessor (void);
+
+extern unsigned int apic_timer_irqs [NR_CPUS];
+
+#endif /* __ASM_APIC_H */
diff --git a/xen-2.4.16/include/asm-i386/apicdef.h b/xen-2.4.16/include/asm-i386/apicdef.h
new file mode 100644 (file)
index 0000000..f855a7d
--- /dev/null
@@ -0,0 +1,363 @@
+#ifndef __ASM_APICDEF_H
+#define __ASM_APICDEF_H
+
+/*
+ * Constants for various Intel APICs. (local APIC, IOAPIC, etc.)
+ *
+ * Alan Cox <Alan.Cox@linux.org>, 1995.
+ * Ingo Molnar <mingo@redhat.com>, 1999, 2000
+ */
+
+#define                APIC_DEFAULT_PHYS_BASE  0xfee00000
+#define                APIC_ID         0x20
+#define                        APIC_ID_MASK            (0x0F<<24)
+#define                        GET_APIC_ID(x)          (((x)>>24)&0x0F)
+#define                APIC_LVR        0x30
+#define                        APIC_LVR_MASK           0xFF00FF
+#define                        GET_APIC_VERSION(x)     ((x)&0xFF)
+#define                        GET_APIC_MAXLVT(x)      (((x)>>16)&0xFF)
+#define                        APIC_INTEGRATED(x)      ((x)&0xF0)
+#define                APIC_TASKPRI    0x80
+#define                        APIC_TPRI_MASK          0xFF
+#define                APIC_ARBPRI     0x90
+#define                        APIC_ARBPRI_MASK        0xFF
+#define                APIC_PROCPRI    0xA0
+#define                APIC_EOI        0xB0
+#define                        APIC_EIO_ACK            0x0             /* Write this to the EOI register */
+#define                APIC_RRR        0xC0
+#define                APIC_LDR        0xD0
+#define                        APIC_LDR_MASK           (0xFF<<24)
+#define                        GET_APIC_LOGICAL_ID(x)  (((x)>>24)&0xFF)
+#define                        SET_APIC_LOGICAL_ID(x)  (((x)<<24))
+#define                        APIC_ALL_CPUS           0xFF
+#define                APIC_DFR        0xE0
+#define                APIC_SPIV       0xF0
+#define                        APIC_SPIV_FOCUS_DISABLED        (1<<9)
+#define                        APIC_SPIV_APIC_ENABLED          (1<<8)
+#define                APIC_ISR        0x100
+#define                APIC_TMR        0x180
+#define        APIC_IRR        0x200
+#define        APIC_ESR        0x280
+#define                        APIC_ESR_SEND_CS        0x00001
+#define                        APIC_ESR_RECV_CS        0x00002
+#define                        APIC_ESR_SEND_ACC       0x00004
+#define                        APIC_ESR_RECV_ACC       0x00008
+#define                        APIC_ESR_SENDILL        0x00020
+#define                        APIC_ESR_RECVILL        0x00040
+#define                        APIC_ESR_ILLREGA        0x00080
+#define                APIC_ICR        0x300
+#define                        APIC_DEST_SELF          0x40000
+#define                        APIC_DEST_ALLINC        0x80000
+#define                        APIC_DEST_ALLBUT        0xC0000
+#define                        APIC_ICR_RR_MASK        0x30000
+#define                        APIC_ICR_RR_INVALID     0x00000
+#define                        APIC_ICR_RR_INPROG      0x10000
+#define                        APIC_ICR_RR_VALID       0x20000
+#define                        APIC_INT_LEVELTRIG      0x08000
+#define                        APIC_INT_ASSERT         0x04000
+#define                        APIC_ICR_BUSY           0x01000
+#define                        APIC_DEST_LOGICAL       0x00800
+#define                        APIC_DM_FIXED           0x00000
+#define                        APIC_DM_LOWEST          0x00100
+#define                        APIC_DM_SMI             0x00200
+#define                        APIC_DM_REMRD           0x00300
+#define                        APIC_DM_NMI             0x00400
+#define                        APIC_DM_INIT            0x00500
+#define                        APIC_DM_STARTUP         0x00600
+#define                        APIC_DM_EXTINT          0x00700
+#define                        APIC_VECTOR_MASK        0x000FF
+#define                APIC_ICR2       0x310
+#define                        GET_APIC_DEST_FIELD(x)  (((x)>>24)&0xFF)
+#define                        SET_APIC_DEST_FIELD(x)  ((x)<<24)
+#define                APIC_LVTT       0x320
+#define                APIC_LVTPC      0x340
+#define                APIC_LVT0       0x350
+#define                        APIC_LVT_TIMER_BASE_MASK        (0x3<<18)
+#define                        GET_APIC_TIMER_BASE(x)          (((x)>>18)&0x3)
+#define                        SET_APIC_TIMER_BASE(x)          (((x)<<18))
+#define                        APIC_TIMER_BASE_CLKIN           0x0
+#define                        APIC_TIMER_BASE_TMBASE          0x1
+#define                        APIC_TIMER_BASE_DIV             0x2
+#define                        APIC_LVT_TIMER_PERIODIC         (1<<17)
+#define                        APIC_LVT_MASKED                 (1<<16)
+#define                        APIC_LVT_LEVEL_TRIGGER          (1<<15)
+#define                        APIC_LVT_REMOTE_IRR             (1<<14)
+#define                        APIC_INPUT_POLARITY             (1<<13)
+#define                        APIC_SEND_PENDING               (1<<12)
+#define                        GET_APIC_DELIVERY_MODE(x)       (((x)>>8)&0x7)
+#define                        SET_APIC_DELIVERY_MODE(x,y)     (((x)&~0x700)|((y)<<8))
+#define                                APIC_MODE_FIXED         0x0
+#define                                APIC_MODE_NMI           0x4
+#define                                APIC_MODE_EXINT         0x7
+#define        APIC_LVT1       0x360
+#define                APIC_LVTERR     0x370
+#define                APIC_TMICT      0x380
+#define                APIC_TMCCT      0x390
+#define                APIC_TDCR       0x3E0
+#define                        APIC_TDR_DIV_TMBASE     (1<<2)
+#define                        APIC_TDR_DIV_1          0xB
+#define                        APIC_TDR_DIV_2          0x0
+#define                        APIC_TDR_DIV_4          0x1
+#define                        APIC_TDR_DIV_8          0x2
+#define                        APIC_TDR_DIV_16         0x3
+#define                        APIC_TDR_DIV_32         0x8
+#define                        APIC_TDR_DIV_64         0x9
+#define                        APIC_TDR_DIV_128        0xA
+
+#define APIC_BASE (fix_to_virt(FIX_APIC_BASE))
+
+#define MAX_IO_APICS 8
+
+/*
+ * the local APIC register structure, memory mapped. Not terribly well
+ * tested, but we might eventually use this one in the future - the
+ * problem why we cannot use it right now is the P5 APIC, it has an
+ * errata which cannot take 8-bit reads and writes, only 32-bit ones ...
+ */
+#define u32 unsigned int
+
+#define lapic ((volatile struct local_apic *)APIC_BASE)
+
+struct local_apic {
+
+/*000*/        struct { u32 __reserved[4]; } __reserved_01;
+
+/*010*/        struct { u32 __reserved[4]; } __reserved_02;
+
+/*020*/        struct { /* APIC ID Register */
+               u32   __reserved_1      : 24,
+                       phys_apic_id    :  4,
+                       __reserved_2    :  4;
+               u32 __reserved[3];
+       } id;
+
+/*030*/        const
+       struct { /* APIC Version Register */
+               u32   version           :  8,
+                       __reserved_1    :  8,
+                       max_lvt         :  8,
+                       __reserved_2    :  8;
+               u32 __reserved[3];
+       } version;
+
+/*040*/        struct { u32 __reserved[4]; } __reserved_03;
+
+/*050*/        struct { u32 __reserved[4]; } __reserved_04;
+
+/*060*/        struct { u32 __reserved[4]; } __reserved_05;
+
+/*070*/        struct { u32 __reserved[4]; } __reserved_06;
+
+/*080*/        struct { /* Task Priority Register */
+               u32   priority  :  8,
+                       __reserved_1    : 24;
+               u32 __reserved_2[3];
+       } tpr;
+
+/*090*/        const
+       struct { /* Arbitration Priority Register */
+               u32   priority  :  8,
+                       __reserved_1    : 24;
+               u32 __reserved_2[3];
+       } apr;
+
+/*0A0*/        const
+       struct { /* Processor Priority Register */
+               u32   priority  :  8,
+                       __reserved_1    : 24;
+               u32 __reserved_2[3];
+       } ppr;
+
+/*0B0*/        struct { /* End Of Interrupt Register */
+               u32   eoi;
+               u32 __reserved[3];
+       } eoi;
+
+/*0C0*/        struct { u32 __reserved[4]; } __reserved_07;
+
+/*0D0*/        struct { /* Logical Destination Register */
+               u32   __reserved_1      : 24,
+                       logical_dest    :  8;
+               u32 __reserved_2[3];
+       } ldr;
+
+/*0E0*/        struct { /* Destination Format Register */
+               u32   __reserved_1      : 28,
+                       model           :  4;
+               u32 __reserved_2[3];
+       } dfr;
+
+/*0F0*/        struct { /* Spurious Interrupt Vector Register */
+               u32     spurious_vector :  8,
+                       apic_enabled    :  1,
+                       focus_cpu       :  1,
+                       __reserved_2    : 22;
+               u32 __reserved_3[3];
+       } svr;
+
+/*100*/        struct { /* In Service Register */
+/*170*/                u32 bitfield;
+               u32 __reserved[3];
+       } isr [8];
+
+/*180*/        struct { /* Trigger Mode Register */
+/*1F0*/                u32 bitfield;
+               u32 __reserved[3];
+       } tmr [8];
+
+/*200*/        struct { /* Interrupt Request Register */
+/*270*/                u32 bitfield;
+               u32 __reserved[3];
+       } irr [8];
+
+/*280*/        union { /* Error Status Register */
+               struct {
+                       u32   send_cs_error                     :  1,
+                               receive_cs_error                :  1,
+                               send_accept_error               :  1,
+                               receive_accept_error            :  1,
+                               __reserved_1                    :  1,
+                               send_illegal_vector             :  1,
+                               receive_illegal_vector          :  1,
+                               illegal_register_address        :  1,
+                               __reserved_2                    : 24;
+                       u32 __reserved_3[3];
+               } error_bits;
+               struct {
+                       u32 errors;
+                       u32 __reserved_3[3];
+               } all_errors;
+       } esr;
+
+/*290*/        struct { u32 __reserved[4]; } __reserved_08;
+
+/*2A0*/        struct { u32 __reserved[4]; } __reserved_09;
+
+/*2B0*/        struct { u32 __reserved[4]; } __reserved_10;
+
+/*2C0*/        struct { u32 __reserved[4]; } __reserved_11;
+
+/*2D0*/        struct { u32 __reserved[4]; } __reserved_12;
+
+/*2E0*/        struct { u32 __reserved[4]; } __reserved_13;
+
+/*2F0*/        struct { u32 __reserved[4]; } __reserved_14;
+
+/*300*/        struct { /* Interrupt Command Register 1 */
+               u32   vector                    :  8,
+                       delivery_mode           :  3,
+                       destination_mode        :  1,
+                       delivery_status         :  1,
+                       __reserved_1            :  1,
+                       level                   :  1,
+                       trigger                 :  1,
+                       __reserved_2            :  2,
+                       shorthand               :  2,
+                       __reserved_3            :  12;
+               u32 __reserved_4[3];
+       } icr1;
+
+/*310*/        struct { /* Interrupt Command Register 2 */
+               union {
+                       u32   __reserved_1      : 24,
+                               phys_dest       :  4,
+                               __reserved_2    :  4;
+                       u32   __reserved_3      : 24,
+                               logical_dest    :  8;
+               } dest;
+               u32 __reserved_4[3];
+       } icr2;
+
+/*320*/        struct { /* LVT - Timer */
+               u32   vector            :  8,
+                       __reserved_1    :  4,
+                       delivery_status :  1,
+                       __reserved_2    :  3,
+                       mask            :  1,
+                       timer_mode      :  1,
+                       __reserved_3    : 14;
+               u32 __reserved_4[3];
+       } lvt_timer;
+
+/*330*/        struct { u32 __reserved[4]; } __reserved_15;
+
+/*340*/        struct { /* LVT - Performance Counter */
+               u32   vector            :  8,
+                       delivery_mode   :  3,
+                       __reserved_1    :  1,
+                       delivery_status :  1,
+                       __reserved_2    :  3,
+                       mask            :  1,
+                       __reserved_3    : 15;
+               u32 __reserved_4[3];
+       } lvt_pc;
+
+/*350*/        struct { /* LVT - LINT0 */
+               u32   vector            :  8,
+                       delivery_mode   :  3,
+                       __reserved_1    :  1,
+                       delivery_status :  1,
+                       polarity        :  1,
+                       remote_irr      :  1,
+                       trigger         :  1,
+                       mask            :  1,
+                       __reserved_2    : 15;
+               u32 __reserved_3[3];
+       } lvt_lint0;
+
+/*360*/        struct { /* LVT - LINT1 */
+               u32   vector            :  8,
+                       delivery_mode   :  3,
+                       __reserved_1    :  1,
+                       delivery_status :  1,
+                       polarity        :  1,
+                       remote_irr      :  1,
+                       trigger         :  1,
+                       mask            :  1,
+                       __reserved_2    : 15;
+               u32 __reserved_3[3];
+       } lvt_lint1;
+
+/*370*/        struct { /* LVT - Error */
+               u32   vector            :  8,
+                       __reserved_1    :  4,
+                       delivery_status :  1,
+                       __reserved_2    :  3,
+                       mask            :  1,
+                       __reserved_3    : 15;
+               u32 __reserved_4[3];
+       } lvt_error;
+
+/*380*/        struct { /* Timer Initial Count Register */
+               u32   initial_count;
+               u32 __reserved_2[3];
+       } timer_icr;
+
+/*390*/        const
+       struct { /* Timer Current Count Register */
+               u32   curr_count;
+               u32 __reserved_2[3];
+       } timer_ccr;
+
+/*3A0*/        struct { u32 __reserved[4]; } __reserved_16;
+
+/*3B0*/        struct { u32 __reserved[4]; } __reserved_17;
+
+/*3C0*/        struct { u32 __reserved[4]; } __reserved_18;
+
+/*3D0*/        struct { u32 __reserved[4]; } __reserved_19;
+
+/*3E0*/        struct { /* Timer Divide Configuration Register */
+               u32   divisor           :  4,
+                       __reserved_1    : 28;
+               u32 __reserved_2[3];
+       } timer_dcr;
+
+/*3F0*/        struct { u32 __reserved[4]; } __reserved_20;
+
+} __attribute__ ((packed));
+
+#undef u32
+
+#endif
diff --git a/xen-2.4.16/include/asm-i386/atomic.h b/xen-2.4.16/include/asm-i386/atomic.h
new file mode 100644 (file)
index 0000000..70a1212
--- /dev/null
@@ -0,0 +1,204 @@
+#ifndef __ARCH_I386_ATOMIC__
+#define __ARCH_I386_ATOMIC__
+
+#include <xeno/config.h>
+
+/*
+ * Atomic operations that C can't guarantee us.  Useful for
+ * resource counting etc..
+ */
+
+#ifdef CONFIG_SMP
+#define LOCK "lock ; "
+#else
+#define LOCK ""
+#endif
+
+/*
+ * Make sure gcc doesn't try to be clever and move things around
+ * on us. We need to use _exactly_ the address the user gave us,
+ * not some alias that contains the same information.
+ */
+typedef struct { volatile int counter; } atomic_t;
+
+#define ATOMIC_INIT(i) { (i) }
+
+/**
+ * atomic_read - read atomic variable
+ * @v: pointer of type atomic_t
+ * 
+ * Atomically reads the value of @v.  Note that the guaranteed
+ * useful range of an atomic_t is only 24 bits.
+ */ 
+#define atomic_read(v)         ((v)->counter)
+
+/**
+ * atomic_set - set atomic variable
+ * @v: pointer of type atomic_t
+ * @i: required value
+ * 
+ * Atomically sets the value of @v to @i.  Note that the guaranteed
+ * useful range of an atomic_t is only 24 bits.
+ */ 
+#define atomic_set(v,i)                (((v)->counter) = (i))
+
+/**
+ * atomic_add - add integer to atomic variable
+ * @i: integer value to add
+ * @v: pointer of type atomic_t
+ * 
+ * Atomically adds @i to @v.  Note that the guaranteed useful range
+ * of an atomic_t is only 24 bits.
+ */
+static __inline__ void atomic_add(int i, atomic_t *v)
+{
+       __asm__ __volatile__(
+               LOCK "addl %1,%0"
+               :"=m" (v->counter)
+               :"ir" (i), "m" (v->counter));
+}
+
+/**
+ * atomic_sub - subtract the atomic variable
+ * @i: integer value to subtract
+ * @v: pointer of type atomic_t
+ * 
+ * Atomically subtracts @i from @v.  Note that the guaranteed
+ * useful range of an atomic_t is only 24 bits.
+ */
+static __inline__ void atomic_sub(int i, atomic_t *v)
+{
+       __asm__ __volatile__(
+               LOCK "subl %1,%0"
+               :"=m" (v->counter)
+               :"ir" (i), "m" (v->counter));
+}
+
+/**
+ * atomic_sub_and_test - subtract value from variable and test result
+ * @i: integer value to subtract
+ * @v: pointer of type atomic_t
+ * 
+ * Atomically subtracts @i from @v and returns
+ * true if the result is zero, or false for all
+ * other cases.  Note that the guaranteed
+ * useful range of an atomic_t is only 24 bits.
+ */
+static __inline__ int atomic_sub_and_test(int i, atomic_t *v)
+{
+       unsigned char c;
+
+       __asm__ __volatile__(
+               LOCK "subl %2,%0; sete %1"
+               :"=m" (v->counter), "=qm" (c)
+               :"ir" (i), "m" (v->counter) : "memory");
+       return c;
+}
+
+/**
+ * atomic_inc - increment atomic variable
+ * @v: pointer of type atomic_t
+ * 
+ * Atomically increments @v by 1.  Note that the guaranteed
+ * useful range of an atomic_t is only 24 bits.
+ */ 
+static __inline__ void atomic_inc(atomic_t *v)
+{
+       __asm__ __volatile__(
+               LOCK "incl %0"
+               :"=m" (v->counter)
+               :"m" (v->counter));
+}
+
+/**
+ * atomic_dec - decrement atomic variable
+ * @v: pointer of type atomic_t
+ * 
+ * Atomically decrements @v by 1.  Note that the guaranteed
+ * useful range of an atomic_t is only 24 bits.
+ */ 
+static __inline__ void atomic_dec(atomic_t *v)
+{
+       __asm__ __volatile__(
+               LOCK "decl %0"
+               :"=m" (v->counter)
+               :"m" (v->counter));
+}
+
+/**
+ * atomic_dec_and_test - decrement and test
+ * @v: pointer of type atomic_t
+ * 
+ * Atomically decrements @v by 1 and
+ * returns true if the result is 0, or false for all other
+ * cases.  Note that the guaranteed
+ * useful range of an atomic_t is only 24 bits.
+ */ 
+static __inline__ int atomic_dec_and_test(atomic_t *v)
+{
+       unsigned char c;
+
+       __asm__ __volatile__(
+               LOCK "decl %0; sete %1"
+               :"=m" (v->counter), "=qm" (c)
+               :"m" (v->counter) : "memory");
+       return c != 0;
+}
+
+/**
+ * atomic_inc_and_test - increment and test 
+ * @v: pointer of type atomic_t
+ * 
+ * Atomically increments @v by 1
+ * and returns true if the result is zero, or false for all
+ * other cases.  Note that the guaranteed
+ * useful range of an atomic_t is only 24 bits.
+ */ 
+static __inline__ int atomic_inc_and_test(atomic_t *v)
+{
+       unsigned char c;
+
+       __asm__ __volatile__(
+               LOCK "incl %0; sete %1"
+               :"=m" (v->counter), "=qm" (c)
+               :"m" (v->counter) : "memory");
+       return c != 0;
+}
+
+/**
+ * atomic_add_negative - add and test if negative
+ * @v: pointer of type atomic_t
+ * @i: integer value to add
+ * 
+ * Atomically adds @i to @v and returns true
+ * if the result is negative, or false when
+ * result is greater than or equal to zero.  Note that the guaranteed
+ * useful range of an atomic_t is only 24 bits.
+ */ 
+static __inline__ int atomic_add_negative(int i, atomic_t *v)
+{
+       unsigned char c;
+
+       __asm__ __volatile__(
+               LOCK "addl %2,%0; sets %1"
+               :"=m" (v->counter), "=qm" (c)
+               :"ir" (i), "m" (v->counter) : "memory");
+       return c;
+}
+
+/* These are x86-specific, used by some header files */
+#define atomic_clear_mask(mask, addr) \
+__asm__ __volatile__(LOCK "andl %0,%1" \
+: : "r" (~(mask)),"m" (*addr) : "memory")
+
+#define atomic_set_mask(mask, addr) \
+__asm__ __volatile__(LOCK "orl %0,%1" \
+: : "r" (mask),"m" (*addr) : "memory")
+
+/* Atomic operations are already serializing on x86 */
+#define smp_mb__before_atomic_dec()    barrier()
+#define smp_mb__after_atomic_dec()     barrier()
+#define smp_mb__before_atomic_inc()    barrier()
+#define smp_mb__after_atomic_inc()     barrier()
+
+#endif
diff --git a/xen-2.4.16/include/asm-i386/bitops.h b/xen-2.4.16/include/asm-i386/bitops.h
new file mode 100644 (file)
index 0000000..73bcd8e
--- /dev/null
@@ -0,0 +1,368 @@
+#ifndef _I386_BITOPS_H
+#define _I386_BITOPS_H
+
+/*
+ * Copyright 1992, Linus Torvalds.
+ */
+
+#include <xeno/config.h>
+
+/*
+ * These have to be done with inline assembly: that way the bit-setting
+ * is guaranteed to be atomic. All bit operations return 0 if the bit
+ * was cleared before the operation and != 0 if it was not.
+ *
+ * bit 0 is the LSB of addr; bit 32 is the LSB of (addr+1).
+ */
+
+#ifdef CONFIG_SMP
+#define LOCK_PREFIX "lock ; "
+#else
+#define LOCK_PREFIX ""
+#endif
+
+#define ADDR (*(volatile long *) addr)
+
+/**
+ * set_bit - Atomically set a bit in memory
+ * @nr: the bit to set
+ * @addr: the address to start counting from
+ *
+ * This function is atomic and may not be reordered.  See __set_bit()
+ * if you do not require the atomic guarantees.
+ * Note that @nr may be almost arbitrarily large; this function is not
+ * restricted to acting on a single-word quantity.
+ */
+static __inline__ void set_bit(int nr, volatile void * addr)
+{
+       __asm__ __volatile__( LOCK_PREFIX
+               "btsl %1,%0"
+               :"=m" (ADDR)
+               :"Ir" (nr));
+}
+
+/**
+ * __set_bit - Set a bit in memory
+ * @nr: the bit to set
+ * @addr: the address to start counting from
+ *
+ * Unlike set_bit(), this function is non-atomic and may be reordered.
+ * If it's called on the same region of memory simultaneously, the effect
+ * may be that only one operation succeeds.
+ */
+static __inline__ void __set_bit(int nr, volatile void * addr)
+{
+       __asm__(
+               "btsl %1,%0"
+               :"=m" (ADDR)
+               :"Ir" (nr));
+}
+
+/**
+ * clear_bit - Clears a bit in memory
+ * @nr: Bit to clear
+ * @addr: Address to start counting from
+ *
+ * clear_bit() is atomic and may not be reordered.  However, it does
+ * not contain a memory barrier, so if it is used for locking purposes,
+ * you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit()
+ * in order to ensure changes are visible on other processors.
+ */
+static __inline__ void clear_bit(int nr, volatile void * addr)
+{
+       __asm__ __volatile__( LOCK_PREFIX
+               "btrl %1,%0"
+               :"=m" (ADDR)
+               :"Ir" (nr));
+}
+#define smp_mb__before_clear_bit()     barrier()
+#define smp_mb__after_clear_bit()      barrier()
+
+/**
+ * __change_bit - Toggle a bit in memory
+ * @nr: the bit to set
+ * @addr: the address to start counting from
+ *
+ * Unlike change_bit(), this function is non-atomic and may be reordered.
+ * If it's called on the same region of memory simultaneously, the effect
+ * may be that only one operation succeeds.
+ */
+static __inline__ void __change_bit(int nr, volatile void * addr)
+{
+       __asm__ __volatile__(
+               "btcl %1,%0"
+               :"=m" (ADDR)
+               :"Ir" (nr));
+}
+
+/**
+ * change_bit - Toggle a bit in memory
+ * @nr: Bit to clear
+ * @addr: Address to start counting from
+ *
+ * change_bit() is atomic and may not be reordered.
+ * Note that @nr may be almost arbitrarily large; this function is not
+ * restricted to acting on a single-word quantity.
+ */
+static __inline__ void change_bit(int nr, volatile void * addr)
+{
+       __asm__ __volatile__( LOCK_PREFIX
+               "btcl %1,%0"
+               :"=m" (ADDR)
+               :"Ir" (nr));
+}
+
+/**
+ * test_and_set_bit - Set a bit and return its old value
+ * @nr: Bit to set
+ * @addr: Address to count from
+ *
+ * This operation is atomic and cannot be reordered.  
+ * It also implies a memory barrier.
+ */
+static __inline__ int test_and_set_bit(int nr, volatile void * addr)
+{
+       int oldbit;
+
+       __asm__ __volatile__( LOCK_PREFIX
+               "btsl %2,%1\n\tsbbl %0,%0"
+               :"=r" (oldbit),"=m" (ADDR)
+               :"Ir" (nr) : "memory");
+       return oldbit;
+}
+
+/**
+ * __test_and_set_bit - Set a bit and return its old value
+ * @nr: Bit to set
+ * @addr: Address to count from
+ *
+ * This operation is non-atomic and can be reordered.  
+ * If two examples of this operation race, one can appear to succeed
+ * but actually fail.  You must protect multiple accesses with a lock.
+ */
+static __inline__ int __test_and_set_bit(int nr, volatile void * addr)
+{
+       int oldbit;
+
+       __asm__(
+               "btsl %2,%1\n\tsbbl %0,%0"
+               :"=r" (oldbit),"=m" (ADDR)
+               :"Ir" (nr));
+       return oldbit;
+}
+
+/**
+ * test_and_clear_bit - Clear a bit and return its old value
+ * @nr: Bit to set
+ * @addr: Address to count from
+ *
+ * This operation is atomic and cannot be reordered.  
+ * It also implies a memory barrier.
+ */
+static __inline__ int test_and_clear_bit(int nr, volatile void * addr)
+{
+       int oldbit;
+
+       __asm__ __volatile__( LOCK_PREFIX
+               "btrl %2,%1\n\tsbbl %0,%0"
+               :"=r" (oldbit),"=m" (ADDR)
+               :"Ir" (nr) : "memory");
+       return oldbit;
+}
+
+/**
+ * __test_and_clear_bit - Clear a bit and return its old value
+ * @nr: Bit to set
+ * @addr: Address to count from
+ *
+ * This operation is non-atomic and can be reordered.  
+ * If two examples of this operation race, one can appear to succeed
+ * but actually fail.  You must protect multiple accesses with a lock.
+ */
+static __inline__ int __test_and_clear_bit(int nr, volatile void * addr)
+{
+       int oldbit;
+
+       __asm__(
+               "btrl %2,%1\n\tsbbl %0,%0"
+               :"=r" (oldbit),"=m" (ADDR)
+               :"Ir" (nr));
+       return oldbit;
+}
+
+/* WARNING: non atomic and it can be reordered! */
+static __inline__ int __test_and_change_bit(int nr, volatile void * addr)
+{
+       int oldbit;
+
+       __asm__ __volatile__(
+               "btcl %2,%1\n\tsbbl %0,%0"
+               :"=r" (oldbit),"=m" (ADDR)
+               :"Ir" (nr) : "memory");
+       return oldbit;
+}
+
+/**
+ * test_and_change_bit - Change a bit and return its new value
+ * @nr: Bit to set
+ * @addr: Address to count from
+ *
+ * This operation is atomic and cannot be reordered.  
+ * It also implies a memory barrier.
+ */
+static __inline__ int test_and_change_bit(int nr, volatile void * addr)
+{
+       int oldbit;
+
+       __asm__ __volatile__( LOCK_PREFIX
+               "btcl %2,%1\n\tsbbl %0,%0"
+               :"=r" (oldbit),"=m" (ADDR)
+               :"Ir" (nr) : "memory");
+       return oldbit;
+}
+
+
+static __inline__ int constant_test_bit(int nr, const volatile void * addr)
+{
+       return ((1UL << (nr & 31)) & (((const volatile unsigned int *) addr)[nr >> 5])) != 0;
+}
+
+static __inline__ int variable_test_bit(int nr, volatile void * addr)
+{
+       int oldbit;
+
+       __asm__ __volatile__(
+               "btl %2,%1\n\tsbbl %0,%0"
+               :"=r" (oldbit)
+               :"m" (ADDR),"Ir" (nr));
+       return oldbit;
+}
+
+#define test_bit(nr,addr) \
+(__builtin_constant_p(nr) ? \
+ constant_test_bit((nr),(addr)) : \
+ variable_test_bit((nr),(addr)))
+
+/**
+ * find_first_zero_bit - find the first zero bit in a memory region
+ * @addr: The address to start the search at
+ * @size: The maximum size to search
+ *
+ * Returns the bit-number of the first zero bit, not the number of the byte
+ * containing a bit.
+ */
+static __inline__ int find_first_zero_bit(void * addr, unsigned size)
+{
+       int d0, d1, d2;
+       int res;
+
+       if (!size)
+               return 0;
+       /* This looks at memory. Mark it volatile to tell gcc not to move it around */
+       __asm__ __volatile__(
+               "movl $-1,%%eax\n\t"
+               "xorl %%edx,%%edx\n\t"
+               "repe; scasl\n\t"
+               "je 1f\n\t"
+               "xorl -4(%%edi),%%eax\n\t"
+               "subl $4,%%edi\n\t"
+               "bsfl %%eax,%%edx\n"
+               "1:\tsubl %%ebx,%%edi\n\t"
+               "shll $3,%%edi\n\t"
+               "addl %%edi,%%edx"
+               :"=d" (res), "=&c" (d0), "=&D" (d1), "=&a" (d2)
+               :"1" ((size + 31) >> 5), "2" (addr), "b" (addr));
+       return res;
+}
+
+/**
+ * find_next_zero_bit - find the first zero bit in a memory region
+ * @addr: The address to base the search on
+ * @offset: The bitnumber to start searching at
+ * @size: The maximum size to search
+ */
+static __inline__ int find_next_zero_bit (void * addr, int size, int offset)
+{
+       unsigned long * p = ((unsigned long *) addr) + (offset >> 5);
+       int set = 0, bit = offset & 31, res;
+       
+       if (bit) {
+               /*
+                * Look for zero in first byte
+                */
+               __asm__("bsfl %1,%0\n\t"
+                       "jne 1f\n\t"
+                       "movl $32, %0\n"
+                       "1:"
+                       : "=r" (set)
+                       : "r" (~(*p >> bit)));
+               if (set < (32 - bit))
+                       return set + offset;
+               set = 32 - bit;
+               p++;
+       }
+       /*
+        * No zero yet, search remaining full bytes for a zero
+        */
+       res = find_first_zero_bit (p, size - 32 * (p - (unsigned long *) addr));
+       return (offset + set + res);
+}
+
+/**
+ * ffz - find first zero in word.
+ * @word: The word to search
+ *
+ * Undefined if no zero exists, so code should check against ~0UL first.
+ */
+static __inline__ unsigned long ffz(unsigned long word)
+{
+       __asm__("bsfl %1,%0"
+               :"=r" (word)
+               :"r" (~word));
+       return word;
+}
+
+/**
+ * ffs - find first bit set
+ * @x: the word to search
+ *
+ * This is defined the same way as
+ * the libc and compiler builtin ffs routines, therefore
+ * differs in spirit from the above ffz (man ffs).
+ */
+static __inline__ int ffs(int x)
+{
+       int r;
+
+       __asm__("bsfl %1,%0\n\t"
+               "jnz 1f\n\t"
+               "movl $-1,%0\n"
+               "1:" : "=r" (r) : "g" (x));
+       return r+1;
+}
+
+/**
+ * hweightN - returns the hamming weight of a N-bit word
+ * @x: the word to weigh
+ *
+ * The Hamming Weight of a number is the total number of bits set in it.
+ */
+
+#define hweight32(x) generic_hweight32(x)
+#define hweight16(x) generic_hweight16(x)
+#define hweight8(x) generic_hweight8(x)
+
+#define ext2_set_bit                 __test_and_set_bit
+#define ext2_clear_bit               __test_and_clear_bit
+#define ext2_test_bit                test_bit
+#define ext2_find_first_zero_bit     find_first_zero_bit
+#define ext2_find_next_zero_bit      find_next_zero_bit
+
+/* Bitmap functions for the minix filesystem.  */
+#define minix_test_and_set_bit(nr,addr) __test_and_set_bit(nr,addr)
+#define minix_set_bit(nr,addr) __set_bit(nr,addr)
+#define minix_test_and_clear_bit(nr,addr) __test_and_clear_bit(nr,addr)
+#define minix_test_bit(nr,addr) test_bit(nr,addr)
+#define minix_find_first_zero_bit(addr,size) find_first_zero_bit(addr,size)
+
+#endif /* _I386_BITOPS_H */
diff --git a/xen-2.4.16/include/asm-i386/byteorder.h b/xen-2.4.16/include/asm-i386/byteorder.h
new file mode 100644 (file)
index 0000000..bbfb629
--- /dev/null
@@ -0,0 +1,47 @@
+#ifndef _I386_BYTEORDER_H
+#define _I386_BYTEORDER_H
+
+#include <asm/types.h>
+
+#ifdef __GNUC__
+
+/* For avoiding bswap on i386 */
+#ifdef __KERNEL__
+#include <linux/config.h>
+#endif
+
+static __inline__ __const__ __u32 ___arch__swab32(__u32 x)
+{
+#ifdef CONFIG_X86_BSWAP
+       __asm__("bswap %0" : "=r" (x) : "0" (x));
+#else
+       __asm__("xchgb %b0,%h0\n\t"     /* swap lower bytes     */
+               "rorl $16,%0\n\t"       /* swap words           */
+               "xchgb %b0,%h0"         /* swap higher bytes    */
+               :"=q" (x)
+               : "0" (x));
+#endif
+       return x;
+}
+
+static __inline__ __const__ __u16 ___arch__swab16(__u16 x)
+{
+       __asm__("xchgb %b0,%h0"         /* swap bytes           */ \
+               : "=q" (x) \
+               :  "0" (x)); \
+               return x;
+}
+
+#define __arch__swab32(x) ___arch__swab32(x)
+#define __arch__swab16(x) ___arch__swab16(x)
+
+#if !defined(__STRICT_ANSI__) || defined(__KERNEL__)
+#  define __BYTEORDER_HAS_U64__
+#  define __SWAB_64_THRU_32__
+#endif
+
+#endif /* __GNUC__ */
+
+#include <linux/byteorder/little_endian.h>
+
+#endif /* _I386_BYTEORDER_H */
diff --git a/xen-2.4.16/include/asm-i386/cache.h b/xen-2.4.16/include/asm-i386/cache.h
new file mode 100644 (file)
index 0000000..502c8ba
--- /dev/null
@@ -0,0 +1,13 @@
+/*
+ * include/asm-i386/cache.h
+ */
+#ifndef __ARCH_I386_CACHE_H
+#define __ARCH_I386_CACHE_H
+
+#include <xeno/config.h>
+
+/* L1 cache line size */
+#define L1_CACHE_SHIFT (CONFIG_X86_L1_CACHE_SHIFT)
+#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT)
+
+#endif
diff --git a/xen-2.4.16/include/asm-i386/cpufeature.h b/xen-2.4.16/include/asm-i386/cpufeature.h
new file mode 100644 (file)
index 0000000..598edbd
--- /dev/null
@@ -0,0 +1,73 @@
+/*
+ * cpufeature.h
+ *
+ * Defines x86 CPU feature bits
+ */
+
+#ifndef __ASM_I386_CPUFEATURE_H
+#define __ASM_I386_CPUFEATURE_H
+
+/* Sample usage: CPU_FEATURE_P(cpu.x86_capability, FPU) */
+#define CPU_FEATURE_P(CAP, FEATURE) test_bit(CAP, X86_FEATURE_##FEATURE ##_BIT)
+
+#define NCAPINTS       4       /* Currently we have 4 32-bit words worth of info */
+
+/* Intel-defined CPU features, CPUID level 0x00000001, word 0 */
+#define X86_FEATURE_FPU                (0*32+ 0) /* Onboard FPU */
+#define X86_FEATURE_VME                (0*32+ 1) /* Virtual Mode Extensions */
+#define X86_FEATURE_DE         (0*32+ 2) /* Debugging Extensions */
+#define X86_FEATURE_PSE        (0*32+ 3) /* Page Size Extensions */
+#define X86_FEATURE_TSC                (0*32+ 4) /* Time Stamp Counter */
+#define X86_FEATURE_MSR                (0*32+ 5) /* Model-Specific Registers, RDMSR, WRMSR */
+#define X86_FEATURE_PAE                (0*32+ 6) /* Physical Address Extensions */
+#define X86_FEATURE_MCE                (0*32+ 7) /* Machine Check Architecture */
+#define X86_FEATURE_CX8                (0*32+ 8) /* CMPXCHG8 instruction */
+#define X86_FEATURE_APIC       (0*32+ 9) /* Onboard APIC */
+#define X86_FEATURE_SEP                (0*32+11) /* SYSENTER/SYSEXIT */
+#define X86_FEATURE_MTRR       (0*32+12) /* Memory Type Range Registers */
+#define X86_FEATURE_PGE                (0*32+13) /* Page Global Enable */
+#define X86_FEATURE_MCA                (0*32+14) /* Machine Check Architecture */
+#define X86_FEATURE_CMOV       (0*32+15) /* CMOV instruction (FCMOVCC and FCOMI too if FPU present) */
+#define X86_FEATURE_PAT                (0*32+16) /* Page Attribute Table */
+#define X86_FEATURE_PSE36      (0*32+17) /* 36-bit PSEs */
+#define X86_FEATURE_PN         (0*32+18) /* Processor serial number */
+#define X86_FEATURE_CLFLSH     (0*32+19) /* Supports the CLFLUSH instruction */
+#define X86_FEATURE_DTES       (0*32+21) /* Debug Trace Store */
+#define X86_FEATURE_ACPI       (0*32+22) /* ACPI via MSR */
+#define X86_FEATURE_MMX                (0*32+23) /* Multimedia Extensions */
+#define X86_FEATURE_FXSR       (0*32+24) /* FXSAVE and FXRSTOR instructions (fast save and restore */
+                                         /* of FPU context), and CR4.OSFXSR available */
+#define X86_FEATURE_XMM                (0*32+25) /* Streaming SIMD Extensions */
+#define X86_FEATURE_XMM2       (0*32+26) /* Streaming SIMD Extensions-2 */
+#define X86_FEATURE_SELFSNOOP  (0*32+27) /* CPU self snoop */
+#define X86_FEATURE_ACC                (0*32+29) /* Automatic clock control */
+#define X86_FEATURE_IA64       (0*32+30) /* IA-64 processor */
+
+/* AMD-defined CPU features, CPUID level 0x80000001, word 1 */
+/* Don't duplicate feature flags which are redundant with Intel! */
+#define X86_FEATURE_SYSCALL    (1*32+11) /* SYSCALL/SYSRET */
+#define X86_FEATURE_MMXEXT     (1*32+22) /* AMD MMX extensions */
+#define X86_FEATURE_LM         (1*32+29) /* Long Mode (x86-64) */
+#define X86_FEATURE_3DNOWEXT   (1*32+30) /* AMD 3DNow! extensions */
+#define X86_FEATURE_3DNOW      (1*32+31) /* 3DNow! */
+
+/* Transmeta-defined CPU features, CPUID level 0x80860001, word 2 */
+#define X86_FEATURE_RECOVERY   (2*32+ 0) /* CPU in recovery mode */
+#define X86_FEATURE_LONGRUN    (2*32+ 1) /* Longrun power control */
+#define X86_FEATURE_LRTI       (2*32+ 3) /* LongRun table interface */
+
+/* Other features, Linux-defined mapping, word 3 */
+/* This range is used for feature bits which conflict or are synthesized */
+#define X86_FEATURE_CXMMX      (3*32+ 0) /* Cyrix MMX extensions */
+#define X86_FEATURE_K6_MTRR    (3*32+ 1) /* AMD K6 nonstandard MTRRs */
+#define X86_FEATURE_CYRIX_ARR  (3*32+ 2) /* Cyrix ARRs (= MTRRs) */
+#define X86_FEATURE_CENTAUR_MCR        (3*32+ 3) /* Centaur MCRs (= MTRRs) */
+
+#endif /* __ASM_I386_CPUFEATURE_H */
+
+/* 
+ * Local Variables:
+ * mode:c
+ * comment-column:42
+ * End:
+ */
diff --git a/xen-2.4.16/include/asm-i386/current.h b/xen-2.4.16/include/asm-i386/current.h
new file mode 100644 (file)
index 0000000..bc1496a
--- /dev/null
@@ -0,0 +1,15 @@
+#ifndef _I386_CURRENT_H
+#define _I386_CURRENT_H
+
+struct task_struct;
+
+static inline struct task_struct * get_current(void)
+{
+       struct task_struct *current;
+       __asm__("andl %%esp,%0; ":"=r" (current) : "0" (~8191UL));
+       return current;
+ }
+#define current get_current()
+
+#endif /* !(_I386_CURRENT_H) */
diff --git a/xen-2.4.16/include/asm-i386/debugreg.h b/xen-2.4.16/include/asm-i386/debugreg.h
new file mode 100644 (file)
index 0000000..f0b2b06
--- /dev/null
@@ -0,0 +1,64 @@
+#ifndef _I386_DEBUGREG_H
+#define _I386_DEBUGREG_H
+
+
+/* Indicate the register numbers for a number of the specific
+   debug registers.  Registers 0-3 contain the addresses we wish to trap on */
+#define DR_FIRSTADDR 0        /* u_debugreg[DR_FIRSTADDR] */
+#define DR_LASTADDR 3         /* u_debugreg[DR_LASTADDR]  */
+
+#define DR_STATUS 6           /* u_debugreg[DR_STATUS]     */
+#define DR_CONTROL 7          /* u_debugreg[DR_CONTROL] */
+
+/* Define a few things for the status register.  We can use this to determine
+   which debugging register was responsible for the trap.  The other bits
+   are either reserved or not of interest to us. */
+
+#define DR_TRAP0       (0x1)           /* db0 */
+#define DR_TRAP1       (0x2)           /* db1 */
+#define DR_TRAP2       (0x4)           /* db2 */
+#define DR_TRAP3       (0x8)           /* db3 */
+
+#define DR_STEP                (0x4000)        /* single-step */
+#define DR_SWITCH      (0x8000)        /* task switch */
+
+/* Now define a bunch of things for manipulating the control register.
+   The top two bytes of the control register consist of 4 fields of 4
+   bits - each field corresponds to one of the four debug registers,
+   and indicates what types of access we trap on, and how large the data
+   field is that we are looking at */
+
+#define DR_CONTROL_SHIFT 16 /* Skip this many bits in ctl register */
+#define DR_CONTROL_SIZE 4   /* 4 control bits per register */
+
+#define DR_RW_EXECUTE (0x0)   /* Settings for the access types to trap on */
+#define DR_RW_WRITE (0x1)
+#define DR_RW_READ (0x3)
+
+#define DR_LEN_1 (0x0) /* Settings for data length to trap on */
+#define DR_LEN_2 (0x4)
+#define DR_LEN_4 (0xC)
+
+/* The low byte to the control register determine which registers are
+   enabled.  There are 4 fields of two bits.  One bit is "local", meaning
+   that the processor will reset the bit after a task switch and the other
+   is global meaning that we have to explicitly reset the bit.  With linux,
+   you can use either one, since we explicitly zero the register when we enter
+   kernel mode. */
+
+#define DR_LOCAL_ENABLE_SHIFT 0    /* Extra shift to the local enable bit */
+#define DR_GLOBAL_ENABLE_SHIFT 1   /* Extra shift to the global enable bit */
+#define DR_ENABLE_SIZE 2           /* 2 enable bits per register */
+
+#define DR_LOCAL_ENABLE_MASK (0x55)  /* Set  local bits for all 4 regs */
+#define DR_GLOBAL_ENABLE_MASK (0xAA) /* Set global bits for all 4 regs */
+
+/* The second byte to the control register has a few special things.
+   We can slow the instruction pipeline for instructions coming via the
+   gdt or the ldt if we want to.  I am not sure why this is an advantage */
+
+#define DR_CONTROL_RESERVED (0xFC00) /* Reserved by Intel */
+#define DR_LOCAL_SLOWDOWN (0x100)   /* Local slow the pipeline */
+#define DR_GLOBAL_SLOWDOWN (0x200)  /* Global slow the pipeline */
+
+#endif
diff --git a/xen-2.4.16/include/asm-i386/delay.h b/xen-2.4.16/include/asm-i386/delay.h
new file mode 100644 (file)
index 0000000..9e0adb4
--- /dev/null
@@ -0,0 +1,14 @@
+#ifndef _I386_DELAY_H
+#define _I386_DELAY_H
+
+/*
+ * Copyright (C) 1993 Linus Torvalds
+ *
+ * Delay routines calling functions in arch/i386/lib/delay.c
+ */
+
+extern unsigned long ticks_per_usec; 
+extern void __udelay(unsigned long usecs);
+#define udelay(n) __udelay(n)
+
+#endif /* defined(_I386_DELAY_H) */
diff --git a/xen-2.4.16/include/asm-i386/desc.h b/xen-2.4.16/include/asm-i386/desc.h
new file mode 100644 (file)
index 0000000..3855ebc
--- /dev/null
@@ -0,0 +1,36 @@
+#ifndef __ARCH_DESC_H
+#define __ARCH_DESC_H
+
+#define __FIRST_TSS_ENTRY 12
+#define __FIRST_LDT_ENTRY (__FIRST_TSS_ENTRY+1)
+
+#define __TSS(n) (((n)<<2) + __FIRST_TSS_ENTRY)
+#define __LDT(n) (((n)<<2) + __FIRST_LDT_ENTRY)
+
+#ifndef __ASSEMBLY__
+struct desc_struct {
+       unsigned long a,b;
+};
+
+extern struct desc_struct gdt_table[];
+extern struct desc_struct *idt, *gdt;
+
+struct Xgt_desc_struct {
+       unsigned short size;
+       unsigned long address __attribute__((packed));
+};
+
+#define idt_descr (*(struct Xgt_desc_struct *)((char *)&idt - 2))
+#define gdt_descr (*(struct Xgt_desc_struct *)((char *)&gdt - 2))
+
+#define load_TR(n) __asm__ __volatile__("ltr %%ax"::"a" (__TSS(n)<<3))
+
+#define __load_LDT(n) __asm__ __volatile__("lldt %%ax"::"a" (__LDT(n)<<3))
+
+extern void set_intr_gate(unsigned int irq, void * addr);
+extern void set_ldt_desc(unsigned int n, void *addr, unsigned int size);
+extern void set_tss_desc(unsigned int n, void *addr);
+
+#endif /* !__ASSEMBLY__ */
+
+#endif
diff --git a/xen-2.4.16/include/asm-i386/elf.h b/xen-2.4.16/include/asm-i386/elf.h
new file mode 100644 (file)
index 0000000..ded2285
--- /dev/null
@@ -0,0 +1,233 @@
+/*
+ *  GRUB  --  GRand Unified Bootloader
+ *  Copyright (C) 1996  Erich Boleyn  <erich@uruk.org>
+ *  Copyright (C) 2001  Free Software Foundation, Inc.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+/* 32-bit data types */
+
+typedef unsigned long Elf32_Addr;
+typedef unsigned short Elf32_Half;
+typedef unsigned long Elf32_Off;
+typedef signed long Elf32_Sword;
+typedef unsigned long Elf32_Word;
+/* "unsigned char" already exists */
+
+/* ELF header */
+typedef struct
+{
+  
+#define EI_NIDENT 16
+  
+  /* first four characters are defined below */
+#define EI_MAG0                0
+#define ELFMAG0                0x7f
+#define EI_MAG1                1
+#define ELFMAG1                'E'
+#define EI_MAG2                2
+#define ELFMAG2                'L'
+#define EI_MAG3                3
+#define ELFMAG3                'F'
+  
+#define EI_CLASS       4       /* data sizes */
+#define ELFCLASS32     1       /* i386 -- up to 32-bit data sizes present */
+  
+#define EI_DATA                5       /* data type and ordering */
+#define ELFDATA2LSB    1       /* i386 -- LSB 2's complement */
+  
+#define EI_VERSION     6       /* version number.  "e_version" must be the same */
+#define EV_CURRENT      1      /* current version number */
+  
+#define EI_PAD         7       /* from here in is just padding */
+  
+#define EI_BRAND       8       /* start of OS branding (This is
+                                  obviously illegal against the ELF
+                                  standard.) */
+  
+  unsigned char e_ident[EI_NIDENT];    /* basic identification block */
+  
+#define ET_EXEC                2       /* we only care about executable types */
+  Elf32_Half e_type;           /* file types */
+  
+#define EM_386         3       /* i386 -- obviously use this one */
+  Elf32_Half e_machine;        /* machine types */
+  Elf32_Word e_version;        /* use same as "EI_VERSION" above */
+  Elf32_Addr e_entry;          /* entry point of the program */
+  Elf32_Off e_phoff;           /* program header table file offset */
+  Elf32_Off e_shoff;           /* section header table file offset */
+  Elf32_Word e_flags;          /* flags */
+  Elf32_Half e_ehsize;         /* elf header size in bytes */
+  Elf32_Half e_phentsize;      /* program header entry size */
+  Elf32_Half e_phnum;          /* number of entries in program header */
+  Elf32_Half e_shentsize;      /* section header entry size */
+  Elf32_Half e_shnum;          /* number of entries in section header */
+  
+#define SHN_UNDEF       0
+#define SHN_LORESERVE   0xff00
+#define SHN_LOPROC      0xff00
+#define SHN_HIPROC      0xff1f
+#define SHN_ABS         0xfff1
+#define SHN_COMMON      0xfff2
+#define SHN_HIRESERVE   0xffff
+  Elf32_Half e_shstrndx;       /* section header table index */
+}
+Elf32_Ehdr;
+
+
+#define BOOTABLE_I386_ELF(h) \
+ ((h.e_ident[EI_MAG0] == ELFMAG0) & (h.e_ident[EI_MAG1] == ELFMAG1) \
+  & (h.e_ident[EI_MAG2] == ELFMAG2) & (h.e_ident[EI_MAG3] == ELFMAG3) \
+  & (h.e_ident[EI_CLASS] == ELFCLASS32) & (h.e_ident[EI_DATA] == ELFDATA2LSB) \
+  & (h.e_ident[EI_VERSION] == EV_CURRENT) & (h.e_type == ET_EXEC) \
+  & (h.e_machine == EM_386) & (h.e_version == EV_CURRENT))
+
+/* section table - ? */
+typedef struct
+{
+  Elf32_Word   sh_name;                /* Section name (string tbl index) */
+  Elf32_Word   sh_type;                /* Section type */
+  Elf32_Word   sh_flags;               /* Section flags */
+  Elf32_Addr   sh_addr;                /* Section virtual addr at execution */
+  Elf32_Off    sh_offset;              /* Section file offset */
+  Elf32_Word   sh_size;                /* Section size in bytes */
+  Elf32_Word   sh_link;                /* Link to another section */
+  Elf32_Word   sh_info;                /* Additional section information */
+  Elf32_Word   sh_addralign;           /* Section alignment */
+  Elf32_Word   sh_entsize;             /* Entry size if section holds table */
+}
+Elf32_Shdr;
+
+/* symbol table - page 4-25, figure 4-15 */
+typedef struct
+{
+  Elf32_Word st_name;
+  Elf32_Addr st_value;
+  Elf32_Word st_size;
+  unsigned char st_info;
+  unsigned char st_other;
+  Elf32_Half st_shndx;
+}
+Elf32_Sym;
+
+/* symbol type and binding attributes - page 4-26 */
+
+#define ELF32_ST_BIND(i)    ((i) >> 4)
+#define ELF32_ST_TYPE(i)    ((i) & 0xf)
+#define ELF32_ST_INFO(b,t)  (((b)<<4)+((t)&0xf))
+
+/* symbol binding - page 4-26, figure 4-16 */
+
+#define STB_LOCAL    0
+#define STB_GLOBAL   1
+#define STB_WEAK     2
+#define STB_LOPROC  13
+#define STB_HIPROC  15
+
+/* symbol types - page 4-28, figure 4-17 */
+
+#define STT_NOTYPE   0
+#define STT_OBJECT   1
+#define STT_FUNC     2
+#define STT_SECTION  3
+#define STT_FILE     4
+#define STT_LOPROC  13
+#define STT_HIPROC  15
+
+
+/* Macros to split/combine relocation type and symbol page 4-32 */
+
+#define ELF32_R_SYM(__i)       ((__i)>>8)
+#define ELF32_R_TYPE(__i)      ((unsigned char) (__i))
+#define ELF32_R_INFO(__s, __t) (((__s)<<8) + (unsigned char) (__t))
+
+
+/* program header - page 5-2, figure 5-1 */
+
+typedef struct
+{
+  Elf32_Word p_type;
+  Elf32_Off p_offset;
+  Elf32_Addr p_vaddr;
+  Elf32_Addr p_paddr;
+  Elf32_Word p_filesz;
+  Elf32_Word p_memsz;
+  Elf32_Word p_flags;
+  Elf32_Word p_align;
+}
+Elf32_Phdr;
+
+/* segment types - page 5-3, figure 5-2 */
+
+#define PT_NULL                0
+#define PT_LOAD                1
+#define PT_DYNAMIC     2
+#define PT_INTERP      3
+#define PT_NOTE                4
+#define PT_SHLIB       5
+#define PT_PHDR                6
+
+#define PT_LOPROC      0x70000000
+#define PT_HIPROC      0x7fffffff
+
+/* segment permissions - page 5-6 */
+
+#define PF_X           0x1
+#define PF_W           0x2
+#define PF_R           0x4
+#define PF_MASKPROC    0xf0000000
+
+
+/* dynamic structure - page 5-15, figure 5-9 */
+
+typedef struct
+{
+  Elf32_Sword d_tag;
+  union
+  {
+    Elf32_Word d_val;
+    Elf32_Addr d_ptr;
+  }
+  d_un;
+}
+Elf32_Dyn;
+
+/* Dynamic array tags - page 5-16, figure 5-10.  */
+
+#define DT_NULL                0
+#define DT_NEEDED      1
+#define DT_PLTRELSZ    2
+#define DT_PLTGOT      3
+#define DT_HASH                4
+#define DT_STRTAB      5
+#define DT_SYMTAB      6
+#define DT_RELA                7
+#define DT_RELASZ      8
+#define DT_RELAENT      9
+#define DT_STRSZ       10
+#define DT_SYMENT      11
+#define DT_INIT                12
+#define DT_FINI                13
+#define DT_SONAME      14
+#define DT_RPATH       15
+#define DT_SYMBOLIC    16
+#define DT_REL         17
+#define DT_RELSZ       18
+#define DT_RELENT      19
+#define DT_PLTREL      20
+#define DT_DEBUG       21
+#define DT_TEXTREL     22
+#define DT_JMPREL      23
diff --git a/xen-2.4.16/include/asm-i386/fixmap.h b/xen-2.4.16/include/asm-i386/fixmap.h
new file mode 100644 (file)
index 0000000..b0f455a
--- /dev/null
@@ -0,0 +1,107 @@
+/*
+ * fixmap.h: compile-time virtual memory allocation
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1998 Ingo Molnar
+ *
+ * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
+ */
+
+#ifndef _ASM_FIXMAP_H
+#define _ASM_FIXMAP_H
+
+#include <xeno/config.h>
+#include <asm/apicdef.h>
+#include <asm/page.h>
+
+/*
+ * Here we define all the compile-time 'special' virtual
+ * addresses. The point is to have a constant address at
+ * compile time, but to set the physical address only
+ * in the boot process. We allocate these special  addresses
+ * from the end of virtual memory (0xfffff000) backwards.
+ * Also this lets us do fail-safe vmalloc(), we
+ * can guarantee that these special addresses and
+ * vmalloc()-ed addresses never overlap.
+ *
+ * these 'compile-time allocated' memory buffers are
+ * fixed-size 4k pages. (or larger if used with an increment
+ * highger than 1) use fixmap_set(idx,phys) to associate
+ * physical memory with fixmap indices.
+ *
+ * TLB entries of such buffers will not be flushed across
+ * task switches.
+ */
+
+/*
+ * on UP currently we will have no trace of the fixmap mechanizm,
+ * no page table allocations, etc. This might change in the
+ * future, say framebuffers for the console driver(s) could be
+ * fix-mapped?
+ */
+enum fixed_addresses {
+#ifdef CONFIG_X86_LOCAL_APIC
+       FIX_APIC_BASE,  /* local (CPU) APIC) -- required for SMP or not */
+#endif
+#ifdef CONFIG_X86_IO_APIC
+       FIX_IO_APIC_BASE_0,
+       FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS-1,
+#endif
+#ifdef CONFIG_HIGHMEM
+       FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */
+       FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1,
+#endif
+       __end_of_fixed_addresses
+};
+
+extern void __set_fixmap (enum fixed_addresses idx,
+                          l1_pgentry_t entry);
+
+#define set_fixmap(idx, phys) \
+               __set_fixmap(idx, mk_l1_pgentry(phys|PAGE_HYPERVISOR))
+/*
+ * Some hardware wants to get fixmapped without caching.
+ */
+#define set_fixmap_nocache(idx, phys) \
+               __set_fixmap(idx, mk_l1_pgentry(phys|PAGE_HYPERVISOR_NOCACHE))
+/*
+ * used by vmalloc.c.
+ *
+ * Leave one empty page between vmalloc'ed areas and
+ * the start of the fixmap, and leave one page empty
+ * at the top of mem..
+ */
+#define FIXADDR_TOP    (0xffffe000UL)
+#define FIXADDR_SIZE   (__end_of_fixed_addresses << PAGE_SHIFT)
+#define FIXADDR_START  (FIXADDR_TOP - FIXADDR_SIZE)
+
+#define __fix_to_virt(x)       (FIXADDR_TOP - ((x) << PAGE_SHIFT))
+
+extern void __this_fixmap_does_not_exist(void);
+
+/*
+ * 'index to address' translation. If anyone tries to use the idx
+ * directly without tranlation, we catch the bug with a NULL-deference
+ * kernel oops. Illegal ranges of incoming indices are caught too.
+ */
+static inline unsigned long fix_to_virt(const unsigned int idx)
+{
+       /*
+        * this branch gets completely eliminated after inlining,
+        * except when someone tries to use fixaddr indices in an
+        * illegal way. (such as mixing up address types or using
+        * out-of-range indices).
+        *
+        * If it doesn't get removed, the linker will complain
+        * loudly with a reasonably clear error message..
+        */
+       if (idx >= __end_of_fixed_addresses)
+               __this_fixmap_does_not_exist();
+
+        return __fix_to_virt(idx);
+}
+
+#endif
diff --git a/xen-2.4.16/include/asm-i386/hardirq.h b/xen-2.4.16/include/asm-i386/hardirq.h
new file mode 100644 (file)
index 0000000..bad529b
--- /dev/null
@@ -0,0 +1,88 @@
+#ifndef __ASM_HARDIRQ_H
+#define __ASM_HARDIRQ_H
+
+#include <xeno/config.h>
+#include <xeno/irq.h>
+
+/* assembly code in softirq.h is sensitive to the offsets of these fields */
+typedef struct {
+       unsigned int __softirq_pending;
+       unsigned int __local_irq_count;
+       unsigned int __local_bh_count;
+       unsigned int __syscall_count;
+} ____cacheline_aligned irq_cpustat_t;
+
+#include <xeno/irq_cpustat.h>  /* Standard mappings for irq_cpustat_t above */
+
+/*
+ * Are we in an interrupt context? Either doing bottom half
+ * or hardware interrupt processing?
+ */
+#define in_interrupt() ({ int __cpu = smp_processor_id(); \
+       (local_irq_count(__cpu) + local_bh_count(__cpu) != 0); })
+
+#define in_irq() (local_irq_count(smp_processor_id()) != 0)
+
+#ifndef CONFIG_SMP
+
+#define hardirq_trylock(cpu)   (local_irq_count(cpu) == 0)
+#define hardirq_endlock(cpu)   do { } while (0)
+
+#define irq_enter(cpu, irq)    (local_irq_count(cpu)++)
+#define irq_exit(cpu, irq)     (local_irq_count(cpu)--)
+
+#define synchronize_irq()      barrier()
+
+#else
+
+#include <asm/atomic.h>
+#include <asm/smp.h>
+
+extern unsigned char global_irq_holder;
+extern unsigned volatile long global_irq_lock; /* long for set_bit -RR */
+
+static inline int irqs_running (void)
+{
+       int i;
+
+       for (i = 0; i < smp_num_cpus; i++)
+               if (local_irq_count(i))
+                       return 1;
+       return 0;
+}
+
+static inline void release_irqlock(int cpu)
+{
+       /* if we didn't own the irq lock, just ignore.. */
+       if (global_irq_holder == (unsigned char) cpu) {
+               global_irq_holder = 0xff;
+               clear_bit(0,&global_irq_lock);
+       }
+}
+
+static inline void irq_enter(int cpu, int irq)
+{
+       ++local_irq_count(cpu);
+
+       while (test_bit(0,&global_irq_lock)) {
+               cpu_relax();
+       }
+}
+
+static inline void irq_exit(int cpu, int irq)
+{
+       --local_irq_count(cpu);
+}
+
+static inline int hardirq_trylock(int cpu)
+{
+       return !local_irq_count(cpu) && !test_bit(0,&global_irq_lock);
+}
+
+#define hardirq_endlock(cpu)   do { } while (0)
+
+extern void synchronize_irq(void);
+
+#endif /* CONFIG_SMP */
+
+#endif /* __ASM_HARDIRQ_H */
diff --git a/xen-2.4.16/include/asm-i386/hdreg.h b/xen-2.4.16/include/asm-i386/hdreg.h
new file mode 100644 (file)
index 0000000..1ad5c07
--- /dev/null
@@ -0,0 +1,12 @@
+/*
+ *  linux/include/asm-i386/hdreg.h
+ *
+ *  Copyright (C) 1994-1996  Linus Torvalds & authors
+ */
+
+#ifndef __ASMi386_HDREG_H
+#define __ASMi386_HDREG_H
+
+typedef unsigned short ide_ioreg_t;
+
+#endif /* __ASMi386_HDREG_H */
diff --git a/xen-2.4.16/include/asm-i386/i387.h b/xen-2.4.16/include/asm-i386/i387.h
new file mode 100644 (file)
index 0000000..7ec679d
--- /dev/null
@@ -0,0 +1,39 @@
+/*
+ * include/asm-i386/i387.h
+ *
+ * Copyright (C) 1994 Linus Torvalds
+ *
+ * Pentium III FXSR, SSE support
+ * General FPU state handling cleanups
+ *     Gareth Hughes <gareth@valinux.com>, May 2000
+ */
+
+#ifndef __ASM_I386_I387_H
+#define __ASM_I386_I387_H
+
+#include <xeno/sched.h>
+#include <asm/processor.h>
+
+extern void init_fpu(void);
+extern void save_init_fpu( struct task_struct *tsk );
+extern void restore_fpu( struct task_struct *tsk );
+
+#define unlazy_fpu( tsk ) do { \
+       if ( tsk->flags & PF_USEDFPU ) \
+               save_init_fpu( tsk ); \
+} while (0)
+
+#define clear_fpu( tsk ) do { \
+       if ( tsk->flags & PF_USEDFPU ) { \
+               asm volatile("fwait"); \
+               tsk->flags &= ~PF_USEDFPU; \
+               stts(); \
+       } \
+} while (0)
+
+#define load_mxcsr( val ) do { \
+        unsigned long __mxcsr = ((unsigned long)(val) & 0xffbf); \
+        asm volatile( "ldmxcsr %0" : : "m" (__mxcsr) ); \
+} while (0)
+
+#endif /* __ASM_I386_I387_H */
diff --git a/xen-2.4.16/include/asm-i386/ide.h b/xen-2.4.16/include/asm-i386/ide.h
new file mode 100644 (file)
index 0000000..6642abf
--- /dev/null
@@ -0,0 +1,128 @@
+/*
+ *  linux/include/asm-i386/ide.h
+ *
+ *  Copyright (C) 1994-1996  Linus Torvalds & authors
+ */
+
+/*
+ *  This file contains the i386 architecture specific IDE code.
+ */
+
+#ifndef __ASMi386_IDE_H
+#define __ASMi386_IDE_H
+
+#ifdef __KERNEL__
+
+#include <linux/config.h>
+
+#ifndef MAX_HWIFS
+# ifdef CONFIG_BLK_DEV_IDEPCI
+#define MAX_HWIFS      10
+# else
+#define MAX_HWIFS      6
+# endif
+#endif
+
+#define ide__sti()     __sti()
+
+static __inline__ int ide_default_irq(ide_ioreg_t base)
+{
+       switch (base) {
+               case 0x1f0: return 14;
+               case 0x170: return 15;
+               case 0x1e8: return 11;
+               case 0x168: return 10;
+               case 0x1e0: return 8;
+               case 0x160: return 12;
+               default:
+                       return 0;
+       }
+}
+
+static __inline__ ide_ioreg_t ide_default_io_base(int index)
+{
+       switch (index) {
+               case 0: return 0x1f0;
+               case 1: return 0x170;
+               case 2: return 0x1e8;
+               case 3: return 0x168;
+               case 4: return 0x1e0;
+               case 5: return 0x160;
+               default:
+                       return 0;
+       }
+}
+
+static __inline__ void ide_init_hwif_ports(hw_regs_t *hw, ide_ioreg_t data_port, ide_ioreg_t ctrl_port, int *irq)
+{
+       ide_ioreg_t reg = data_port;
+       int i;
+
+       for (i = IDE_DATA_OFFSET; i <= IDE_STATUS_OFFSET; i++) {
+               hw->io_ports[i] = reg;
+               reg += 1;
+       }
+       if (ctrl_port) {
+               hw->io_ports[IDE_CONTROL_OFFSET] = ctrl_port;
+       } else {
+               hw->io_ports[IDE_CONTROL_OFFSET] = hw->io_ports[IDE_DATA_OFFSET] + 0x206;
+       }
+       if (irq != NULL)
+               *irq = 0;
+       hw->io_ports[IDE_IRQ_OFFSET] = 0;
+}
+
+static __inline__ void ide_init_default_hwifs(void)
+{
+#ifndef CONFIG_BLK_DEV_IDEPCI
+       hw_regs_t hw;
+       int index;
+
+       for(index = 0; index < MAX_HWIFS; index++) {
+               ide_init_hwif_ports(&hw, ide_default_io_base(index), 0, NULL);
+               hw.irq = ide_default_irq(ide_default_io_base(index));
+               ide_register_hw(&hw, NULL);
+       }
+#endif /* CONFIG_BLK_DEV_IDEPCI */
+}
+
+typedef union {
+       unsigned all                    : 8;    /* all of the bits together */
+       struct {
+               unsigned head           : 4;    /* always zeros here */
+               unsigned unit           : 1;    /* drive select number, 0 or 1 */
+               unsigned bit5           : 1;    /* always 1 */
+               unsigned lba            : 1;    /* using LBA instead of CHS */
+               unsigned bit7           : 1;    /* always 1 */
+       } b;
+} select_t;
+
+typedef union {
+       unsigned all                    : 8;    /* all of the bits together */
+       struct {
+               unsigned bit0           : 1;
+               unsigned nIEN           : 1;    /* device INTRQ to host */
+               unsigned SRST           : 1;    /* host soft reset bit */
+               unsigned bit3           : 1;    /* ATA-2 thingy */
+               unsigned reserved456    : 3;
+               unsigned HOB            : 1;    /* 48-bit address ordering */
+       } b;
+} control_t;
+
+#define ide_request_irq(irq,hand,flg,dev,id)   request_irq((irq),(hand),(flg),(dev),(id))
+#define ide_free_irq(irq,dev_id)               free_irq((irq), (dev_id))
+#define ide_check_region(from,extent)          check_region((from), (extent))
+#define ide_request_region(from,extent,name)   request_region((from), (extent), (name))
+#define ide_release_region(from,extent)                release_region((from), (extent))
+
+/*
+ * The following are not needed for the non-m68k ports
+ */
+#define ide_ack_intr(hwif)             (1)
+#define ide_fix_driveid(id)            do {} while (0)
+#define ide_release_lock(lock)         do {} while (0)
+#define ide_get_lock(lock, hdlr, data) do {} while (0)
+
+#endif /* __KERNEL__ */
+
+#endif /* __ASMi386_IDE_H */
diff --git a/xen-2.4.16/include/asm-i386/io.h b/xen-2.4.16/include/asm-i386/io.h
new file mode 100644 (file)
index 0000000..9b54ae2
--- /dev/null
@@ -0,0 +1,253 @@
+#ifndef _ASM_IO_H
+#define _ASM_IO_H
+
+#include <xeno/config.h>
+#include <asm/page.h>
+
+#define IO_SPACE_LIMIT 0xffff
+
+/*#include <linux/vmalloc.h>*/
+
+/*
+ * Temporary debugging check to catch old code using
+ * unmapped ISA addresses. Will be removed in 2.4.
+ */
+#if CONFIG_DEBUG_IOVIRT
+  extern void *__io_virt_debug(unsigned long x, const char *file, int line);
+  extern unsigned long __io_phys_debug(unsigned long x, const char *file, int line);
+  #define __io_virt(x) __io_virt_debug((unsigned long)(x), __FILE__, __LINE__)
+//#define __io_phys(x) __io_phys_debug((unsigned long)(x), __FILE__, __LINE__)
+#else
+  #define __io_virt(x) ((void *)(x))
+//#define __io_phys(x) __pa(x)
+#endif
+
+/*
+ * Change virtual addresses to physical addresses and vv.
+ * These are pretty trivial
+ */
+static inline unsigned long virt_to_phys(volatile void * address)
+{
+       return __pa(address);
+}
+
+static inline void * phys_to_virt(unsigned long address)
+{
+       return __va(address);
+}
+
+/*
+ * Change "struct page" to physical address.
+ */
+#define page_to_phys(page)     ((page - frame_table) << PAGE_SHIFT)
+
+extern void * __ioremap(unsigned long offset, unsigned long size, unsigned long flags);
+
+static inline void * ioremap (unsigned long offset, unsigned long size)
+{
+       return __ioremap(offset, size, 0);
+}
+
+/*
+ * This one maps high address device memory and turns off caching for that area.
+ * it's useful if some control registers are in such an area and write combining
+ * or read caching is not desirable:
+ */
+static inline void * ioremap_nocache (unsigned long offset, unsigned long size)
+{
+        return __ioremap(offset, size, _PAGE_PCD);
+}
+
+extern void iounmap(void *addr);
+
+/*
+ * IO bus memory addresses are also 1:1 with the physical address
+ */
+#define virt_to_bus virt_to_phys
+#define bus_to_virt phys_to_virt
+#define page_to_bus page_to_phys
+
+/*
+ * readX/writeX() are used to access memory mapped devices. On some
+ * architectures the memory mapped IO stuff needs to be accessed
+ * differently. On the x86 architecture, we just read/write the
+ * memory location directly.
+ */
+
+#define readb(addr) (*(volatile unsigned char *) __io_virt(addr))
+#define readw(addr) (*(volatile unsigned short *) __io_virt(addr))
+#define readl(addr) (*(volatile unsigned int *) __io_virt(addr))
+#define __raw_readb readb
+#define __raw_readw readw
+#define __raw_readl readl
+
+#define writeb(b,addr) (*(volatile unsigned char *) __io_virt(addr) = (b))
+#define writew(b,addr) (*(volatile unsigned short *) __io_virt(addr) = (b))
+#define writel(b,addr) (*(volatile unsigned int *) __io_virt(addr) = (b))
+#define __raw_writeb writeb
+#define __raw_writew writew
+#define __raw_writel writel
+
+#define memset_io(a,b,c)       memset(__io_virt(a),(b),(c))
+#define memcpy_fromio(a,b,c)   memcpy((a),__io_virt(b),(c))
+#define memcpy_toio(a,b,c)     memcpy(__io_virt(a),(b),(c))
+
+/*
+ * ISA space is 'always mapped' on a typical x86 system, no need to
+ * explicitly ioremap() it. The fact that the ISA IO space is mapped
+ * to PAGE_OFFSET is pure coincidence - it does not mean ISA values
+ * are physical addresses. The following constant pointer can be
+ * used as the IO-area pointer (it can be iounmapped as well, so the
+ * analogy with PCI is quite large):
+ */
+#define __ISA_IO_base ((char *)(PAGE_OFFSET))
+
+#define isa_readb(a) readb(__ISA_IO_base + (a))
+#define isa_readw(a) readw(__ISA_IO_base + (a))
+#define isa_readl(a) readl(__ISA_IO_base + (a))
+#define isa_writeb(b,a) writeb(b,__ISA_IO_base + (a))
+#define isa_writew(w,a) writew(w,__ISA_IO_base + (a))
+#define isa_writel(l,a) writel(l,__ISA_IO_base + (a))
+#define isa_memset_io(a,b,c)           memset_io(__ISA_IO_base + (a),(b),(c))
+#define isa_memcpy_fromio(a,b,c)       memcpy_fromio((a),__ISA_IO_base + (b),(c))
+#define isa_memcpy_toio(a,b,c)         memcpy_toio(__ISA_IO_base + (a),(b),(c))
+
+
+/*
+ * Again, i386 does not require mem IO specific function.
+ */
+
+#define eth_io_copy_and_sum(a,b,c,d)           eth_copy_and_sum((a),__io_virt(b),(c),(d))
+#define isa_eth_io_copy_and_sum(a,b,c,d)       eth_copy_and_sum((a),__io_virt(__ISA_IO_base + (b)),(c),(d))
+
+static inline int check_signature(unsigned long io_addr,
+       const unsigned char *signature, int length)
+{
+       int retval = 0;
+       do {
+               if (readb(io_addr) != *signature)
+                       goto out;
+               io_addr++;
+               signature++;
+               length--;
+       } while (length);
+       retval = 1;
+out:
+       return retval;
+}
+
+static inline int isa_check_signature(unsigned long io_addr,
+       const unsigned char *signature, int length)
+{
+       int retval = 0;
+       do {
+               if (isa_readb(io_addr) != *signature)
+                       goto out;
+               io_addr++;
+               signature++;
+               length--;
+       } while (length);
+       retval = 1;
+out:
+       return retval;
+}
+
+/*
+ *     Cache management
+ *
+ *     This needed for two cases
+ *     1. Out of order aware processors
+ *     2. Accidentally out of order processors (PPro errata #51)
+ */
+#if defined(CONFIG_X86_OOSTORE) || defined(CONFIG_X86_PPRO_FENCE)
+
+static inline void flush_write_buffers(void)
+{
+       __asm__ __volatile__ ("lock; addl $0,0(%%esp)": : :"memory");
+}
+
+#define dma_cache_inv(_start,_size)            flush_write_buffers()
+#define dma_cache_wback(_start,_size)          flush_write_buffers()
+#define dma_cache_wback_inv(_start,_size)      flush_write_buffers()
+
+#else
+
+/* Nothing to do */
+
+#define dma_cache_inv(_start,_size)            do { } while (0)
+#define dma_cache_wback(_start,_size)          do { } while (0)
+#define dma_cache_wback_inv(_start,_size)      do { } while (0)
+#define flush_write_buffers()
+
+#endif
+
+#ifdef SLOW_IO_BY_JUMPING
+#define __SLOW_DOWN_IO "\njmp 1f\n1:\tjmp 1f\n1:"
+#else
+#define __SLOW_DOWN_IO "\noutb %%al,$0x80"
+#endif
+
+#ifdef REALLY_SLOW_IO
+#define __FULL_SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO
+#else
+#define __FULL_SLOW_DOWN_IO __SLOW_DOWN_IO
+#endif
+
+
+/*
+ * Talk about misusing macros..
+ */
+#define __OUT1(s,x) \
+static inline void out##s(unsigned x value, unsigned short port) {
+
+#define __OUT2(s,s1,s2) \
+__asm__ __volatile__ ("out" #s " %" s1 "0,%" s2 "1"
+
+#define __OUT(s,s1,x) \
+__OUT1(s,x) __OUT2(s,s1,"w") : : "a" (value), "Nd" (port)); } \
+__OUT1(s##_p,x) __OUT2(s,s1,"w") __FULL_SLOW_DOWN_IO : : "a" (value), "Nd" (port));} 
+
+#define __IN1(s) \
+static inline RETURN_TYPE in##s(unsigned short port) { RETURN_TYPE _v;
+
+#define __IN2(s,s1,s2) \
+__asm__ __volatile__ ("in" #s " %" s2 "1,%" s1 "0"
+
+#define __IN(s,s1,i...) \
+__IN1(s) __IN2(s,s1,"w") : "=a" (_v) : "Nd" (port) ,##i ); return _v; } \
+__IN1(s##_p) __IN2(s,s1,"w") __FULL_SLOW_DOWN_IO : "=a" (_v) : "Nd" (port) ,##i ); return _v; } 
+
+#define __INS(s) \
+static inline void ins##s(unsigned short port, void * addr, unsigned long count) \
+{ __asm__ __volatile__ ("rep ; ins" #s \
+: "=D" (addr), "=c" (count) : "d" (port),"0" (addr),"1" (count)); }
+
+#define __OUTS(s) \
+static inline void outs##s(unsigned short port, const void * addr, unsigned long count) \
+{ __asm__ __volatile__ ("rep ; outs" #s \
+: "=S" (addr), "=c" (count) : "d" (port),"0" (addr),"1" (count)); }
+
+#define RETURN_TYPE unsigned char
+__IN(b,"")
+#undef RETURN_TYPE
+#define RETURN_TYPE unsigned short
+__IN(w,"")
+#undef RETURN_TYPE
+#define RETURN_TYPE unsigned int
+__IN(l,"")
+#undef RETURN_TYPE
+
+__OUT(b,"b",char)
+__OUT(w,"w",short)
+__OUT(l,,int)
+
+__INS(b)
+__INS(w)
+__INS(l)
+
+__OUTS(b)
+__OUTS(w)
+__OUTS(l)
+
+#endif
diff --git a/xen-2.4.16/include/asm-i386/io_apic.h b/xen-2.4.16/include/asm-i386/io_apic.h
new file mode 100644 (file)
index 0000000..a03be4d
--- /dev/null
@@ -0,0 +1,148 @@
+#ifndef __ASM_IO_APIC_H
+#define __ASM_IO_APIC_H
+
+#include <xeno/config.h>
+#include <xeno/types.h>
+
+/*
+ * Intel IO-APIC support for SMP and UP systems.
+ *
+ * Copyright (C) 1997, 1998, 1999, 2000 Ingo Molnar
+ */
+
+#ifdef CONFIG_X86_IO_APIC
+
+#define APIC_MISMATCH_DEBUG
+
+#define IO_APIC_BASE(idx) \
+               ((volatile int *)__fix_to_virt(FIX_IO_APIC_BASE_0 + idx))
+
+/*
+ * The structure of the IO-APIC:
+ */
+struct IO_APIC_reg_00 {
+       __u32   __reserved_2    : 24,
+               ID              :  4,
+               __reserved_1    :  4;
+} __attribute__ ((packed));
+
+struct IO_APIC_reg_01 {
+       __u32   version         :  8,
+               __reserved_2    :  7,
+               PRQ             :  1,
+               entries         :  8,
+               __reserved_1    :  8;
+} __attribute__ ((packed));
+
+struct IO_APIC_reg_02 {
+       __u32   __reserved_2    : 24,
+               arbitration     :  4,
+               __reserved_1    :  4;
+} __attribute__ ((packed));
+
+/*
+ * # of IO-APICs and # of IRQ routing registers
+ */
+extern int nr_ioapics;
+extern int nr_ioapic_registers[MAX_IO_APICS];
+
+enum ioapic_irq_destination_types {
+       dest_Fixed = 0,
+       dest_LowestPrio = 1,
+       dest_SMI = 2,
+       dest__reserved_1 = 3,
+       dest_NMI = 4,
+       dest_INIT = 5,
+       dest__reserved_2 = 6,
+       dest_ExtINT = 7
+};
+
+struct IO_APIC_route_entry {
+       __u32   vector          :  8,
+               delivery_mode   :  3,   /* 000: FIXED
+                                        * 001: lowest prio
+                                        * 111: ExtINT
+                                        */
+               dest_mode       :  1,   /* 0: physical, 1: logical */
+               delivery_status :  1,
+               polarity        :  1,
+               irr             :  1,
+               trigger         :  1,   /* 0: edge, 1: level */
+               mask            :  1,   /* 0: enabled, 1: disabled */
+               __reserved_2    : 15;
+
+       union {         struct { __u32
+                                       __reserved_1    : 24,
+                                       physical_dest   :  4,
+                                       __reserved_2    :  4;
+                       } physical;
+
+                       struct { __u32
+                                       __reserved_1    : 24,
+                                       logical_dest    :  8;
+                       } logical;
+       } dest;
+
+} __attribute__ ((packed));
+
+/*
+ * MP-BIOS irq configuration table structures:
+ */
+
+/* I/O APIC entries */
+extern struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS];
+
+/* # of MP IRQ source entries */
+extern int mp_irq_entries;
+
+/* MP IRQ source entries */
+extern struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
+
+/* non-0 if default (table-less) MP configuration */
+extern int mpc_default_type;
+
+static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg)
+{
+       *IO_APIC_BASE(apic) = reg;
+       return *(IO_APIC_BASE(apic)+4);
+}
+
+static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned int value)
+{
+       *IO_APIC_BASE(apic) = reg;
+       *(IO_APIC_BASE(apic)+4) = value;
+}
+
+/*
+ * Re-write a value: to be used for read-modify-write
+ * cycles where the read already set up the index register.
+ */
+static inline void io_apic_modify(unsigned int apic, unsigned int value)
+{
+       *(IO_APIC_BASE(apic)+4) = value;
+}
+
+/*
+ * Synchronize the IO-APIC and the CPU by doing
+ * a dummy read from the IO-APIC
+ */
+static inline void io_apic_sync(unsigned int apic)
+{
+       (void) *(IO_APIC_BASE(apic)+4);
+}
+
+/* 1 if "noapic" boot option passed */
+//extern int skip_ioapic_setup;
+#define skip_ioapic_setup 0
+
+/*
+ * If we use the IO-APIC for IRQ routing, disable automatic
+ * assignment of PCI IRQ's.
+ */
+#define io_apic_assign_pci_irqs (mp_irq_entries && !skip_ioapic_setup)
+
+#else  /* !CONFIG_X86_IO_APIC */
+#define io_apic_assign_pci_irqs 0
+#endif
+
+#endif
diff --git a/xen-2.4.16/include/asm-i386/ioctl.h b/xen-2.4.16/include/asm-i386/ioctl.h
new file mode 100644 (file)
index 0000000..c75f20a
--- /dev/null
@@ -0,0 +1,75 @@
+/* $Id: ioctl.h,v 1.5 1993/07/19 21:53:50 root Exp root $
+ *
+ * linux/ioctl.h for Linux by H.H. Bergman.
+ */
+
+#ifndef _ASMI386_IOCTL_H
+#define _ASMI386_IOCTL_H
+
+/* ioctl command encoding: 32 bits total, command in lower 16 bits,
+ * size of the parameter structure in the lower 14 bits of the
+ * upper 16 bits.
+ * Encoding the size of the parameter structure in the ioctl request
+ * is useful for catching programs compiled with old versions
+ * and to avoid overwriting user space outside the user buffer area.
+ * The highest 2 bits are reserved for indicating the ``access mode''.
+ * NOTE: This limits the max parameter size to 16kB -1 !
+ */
+
+/*
+ * The following is for compatibility across the various Linux
+ * platforms.  The i386 ioctl numbering scheme doesn't really enforce
+ * a type field.  De facto, however, the top 8 bits of the lower 16
+ * bits are indeed used as a type field, so we might just as well make
+ * this explicit here.  Please be sure to use the decoding macros
+ * below from now on.
+ */
+#define _IOC_NRBITS    8
+#define _IOC_TYPEBITS  8
+#define _IOC_SIZEBITS  14
+#define _IOC_DIRBITS   2
+
+#define _IOC_NRMASK    ((1 << _IOC_NRBITS)-1)
+#define _IOC_TYPEMASK  ((1 << _IOC_TYPEBITS)-1)
+#define _IOC_SIZEMASK  ((1 << _IOC_SIZEBITS)-1)
+#define _IOC_DIRMASK   ((1 << _IOC_DIRBITS)-1)
+
+#define _IOC_NRSHIFT   0
+#define _IOC_TYPESHIFT (_IOC_NRSHIFT+_IOC_NRBITS)
+#define _IOC_SIZESHIFT (_IOC_TYPESHIFT+_IOC_TYPEBITS)
+#define _IOC_DIRSHIFT  (_IOC_SIZESHIFT+_IOC_SIZEBITS)
+
+/*
+ * Direction bits.
+ */
+#define _IOC_NONE      0U
+#define _IOC_WRITE     1U
+#define _IOC_READ      2U
+
+#define _IOC(dir,type,nr,size) \
+       (((dir)  << _IOC_DIRSHIFT) | \
+        ((type) << _IOC_TYPESHIFT) | \
+        ((nr)   << _IOC_NRSHIFT) | \
+        ((size) << _IOC_SIZESHIFT))
+
+/* used to create numbers */
+#define _IO(type,nr)           _IOC(_IOC_NONE,(type),(nr),0)
+#define _IOR(type,nr,size)     _IOC(_IOC_READ,(type),(nr),sizeof(size))
+#define _IOW(type,nr,size)     _IOC(_IOC_WRITE,(type),(nr),sizeof(size))
+#define _IOWR(type,nr,size)    _IOC(_IOC_READ|_IOC_WRITE,(type),(nr),sizeof(size))
+
+/* used to decode ioctl numbers.. */
+#define _IOC_DIR(nr)           (((nr) >> _IOC_DIRSHIFT) & _IOC_DIRMASK)
+#define _IOC_TYPE(nr)          (((nr) >> _IOC_TYPESHIFT) & _IOC_TYPEMASK)
+#define _IOC_NR(nr)            (((nr) >> _IOC_NRSHIFT) & _IOC_NRMASK)
+#define _IOC_SIZE(nr)          (((nr) >> _IOC_SIZESHIFT) & _IOC_SIZEMASK)
+
+/* ...and for the drivers/sound files... */
+
+#define IOC_IN         (_IOC_WRITE << _IOC_DIRSHIFT)
+#define IOC_OUT                (_IOC_READ << _IOC_DIRSHIFT)
+#define IOC_INOUT      ((_IOC_WRITE|_IOC_READ) << _IOC_DIRSHIFT)
+#define IOCSIZE_MASK   (_IOC_SIZEMASK << _IOC_SIZESHIFT)
+#define IOCSIZE_SHIFT  (_IOC_SIZESHIFT)
+
+#endif /* _ASMI386_IOCTL_H */
diff --git a/xen-2.4.16/include/asm-i386/irq.h b/xen-2.4.16/include/asm-i386/irq.h
new file mode 100644 (file)
index 0000000..918f828
--- /dev/null
@@ -0,0 +1,203 @@
+#ifndef _ASM_HW_IRQ_H
+#define _ASM_HW_IRQ_H
+
+/* (C) 1992, 1993 Linus Torvalds, (C) 1997 Ingo Molnar */
+
+#include <xeno/config.h>
+#include <asm/atomic.h>
+
+#define SA_INTERRUPT    0x20000000
+#define SA_SHIRQ        0x04000000
+
+#define TIMER_IRQ 0
+/* 256 in architecture, minus 16 allocated to processor. */
+#define NR_IRQS 224
+
+extern void disable_irq(unsigned int);
+extern void disable_irq_nosync(unsigned int);
+extern void enable_irq(unsigned int);
+
+/*
+ * IDT vectors usable for external interrupt sources start
+ * at 0x20:
+ */
+#define FIRST_EXTERNAL_VECTOR  0x20
+
+#define SYSCALL_VECTOR         0x80   /* application -> OS     */
+#define KDBENTER_VECTOR                0x81   /* anyone      -> KDB    */
+#define HYPERVISOR_CALL_VECTOR 0x82   /* OS          -> monitor*/
+
+/*
+ * Vectors 0x20-0x2f are used for ISA interrupts.
+ */
+
+/*
+ * Special IRQ vectors used by the SMP architecture, 0xf0-0xff
+ *
+ *  some of the following vectors are 'rare', they are merged
+ *  into a single vector (CALL_FUNCTION_VECTOR) to save vector space.
+ *  TLB, reschedule and local APIC vectors are performance-critical.
+ *
+ *  Vectors 0xf0-0xfa are free (reserved for future Linux use).
+ */
+#define SPURIOUS_APIC_VECTOR   0xff
+#define ERROR_APIC_VECTOR      0xfe
+#define INVALIDATE_TLB_VECTOR  0xfd
+#define EVENT_CHECK_VECTOR     0xfc
+#define CALL_FUNCTION_VECTOR   0xfb
+#define KDB_VECTOR             0xfa
+
+/*
+ * Local APIC timer IRQ vector is on a different priority level,
+ * to work around the 'lost local interrupt if more than 2 IRQ
+ * sources per level' errata.
+ */
+#define LOCAL_TIMER_VECTOR     0xef
+
+/*
+ * First APIC vector available to drivers: (vectors 0x30-0xee)
+ * we start at 0x31 to spread out vectors evenly between priority
+ * levels. (0x80 is the syscall vector)
+ */
+#define FIRST_DEVICE_VECTOR    0x31
+#define FIRST_SYSTEM_VECTOR    0xef
+
+extern int irq_vector[NR_IRQS];
+#define IO_APIC_VECTOR(irq)    irq_vector[irq]
+
+/*
+ * Various low-level irq details needed by irq.c, process.c,
+ * time.c, io_apic.c and smp.c
+ *
+ * Interrupt entry/exit code at both C and assembly level
+ */
+
+extern void mask_irq(unsigned int irq);
+extern void unmask_irq(unsigned int irq);
+extern void disable_8259A_irq(unsigned int irq);
+extern void enable_8259A_irq(unsigned int irq);
+extern int i8259A_irq_pending(unsigned int irq);
+extern void make_8259A_irq(unsigned int irq);
+extern void init_8259A(int aeoi);
+extern void FASTCALL(send_IPI_self(int vector));
+extern void init_VISWS_APIC_irqs(void);
+extern void setup_IO_APIC(void);
+extern void disable_IO_APIC(void);
+extern void print_IO_APIC(void);
+extern int IO_APIC_get_PCI_irq_vector(int bus, int slot, int fn);
+extern void send_IPI(int dest, int vector);
+
+extern unsigned long io_apic_irqs;
+
+extern atomic_t irq_err_count;
+extern atomic_t irq_mis_count;
+
+extern char _stext, _etext;
+
+#define IO_APIC_IRQ(x) (((x) >= 16) || ((1<<(x)) & io_apic_irqs))
+
+#define __STR(x) #x
+#define STR(x) __STR(x)
+
+#define SAVE_ALL \
+       "cld\n\t" \
+       "pushl %es\n\t" \
+       "pushl %ds\n\t" \
+       "pushl %eax\n\t" \
+       "pushl %ebp\n\t" \
+       "pushl %edi\n\t" \
+       "pushl %esi\n\t" \
+       "pushl %edx\n\t" \
+       "pushl %ecx\n\t" \
+       "pushl %ebx\n\t" \
+       "movl $" STR(__HYPERVISOR_DS) ",%edx\n\t" \
+       "movl %edx,%ds\n\t" \
+       "movl %edx,%es\n\t"
+
+#define IRQ_NAME2(nr) nr##_interrupt(void)
+#define IRQ_NAME(nr) IRQ_NAME2(IRQ##nr)
+
+#define GET_CURRENT \
+       "movl %esp, %ebx\n\t" \
+       "andl $-8192, %ebx\n\t"
+
+/*
+ *     SMP has a few special interrupts for IPI messages
+ */
+
+       /* there is a second layer of macro just to get the symbolic
+          name for the vector evaluated. This change is for RTLinux */
+#define BUILD_SMP_INTERRUPT(x,v) XBUILD_SMP_INTERRUPT(x,v)
+#define XBUILD_SMP_INTERRUPT(x,v)\
+asmlinkage void x(void); \
+asmlinkage void call_##x(void); \
+__asm__( \
+"\n"__ALIGN_STR"\n" \
+SYMBOL_NAME_STR(x) ":\n\t" \
+       "pushl $"#v"-256\n\t" \
+       SAVE_ALL \
+       SYMBOL_NAME_STR(call_##x)":\n\t" \
+       "call "SYMBOL_NAME_STR(smp_##x)"\n\t" \
+       "jmp ret_from_intr\n");
+
+#define BUILD_SMP_TIMER_INTERRUPT(x,v) XBUILD_SMP_TIMER_INTERRUPT(x,v)
+#define XBUILD_SMP_TIMER_INTERRUPT(x,v) \
+asmlinkage void x(struct pt_regs * regs); \
+asmlinkage void call_##x(void); \
+__asm__( \
+"\n"__ALIGN_STR"\n" \
+SYMBOL_NAME_STR(x) ":\n\t" \
+       "pushl $"#v"-256\n\t" \
+       SAVE_ALL \
+       "movl %esp,%eax\n\t" \
+       "pushl %eax\n\t" \
+       SYMBOL_NAME_STR(call_##x)":\n\t" \
+       "call "SYMBOL_NAME_STR(smp_##x)"\n\t" \
+       "addl $4,%esp\n\t" \
+       "jmp ret_from_intr\n");
+
+#define BUILD_COMMON_IRQ() \
+asmlinkage void call_do_IRQ(void); \
+__asm__( \
+       "\n" __ALIGN_STR"\n" \
+       "common_interrupt:\n\t" \
+       SAVE_ALL \
+       SYMBOL_NAME_STR(call_do_IRQ)":\n\t" \
+       "call " SYMBOL_NAME_STR(do_IRQ) "\n\t" \
+       "jmp ret_from_intr\n");
+
+/* 
+ * subtle. orig_eax is used by the signal code to distinct between
+ * system calls and interrupted 'random user-space'. Thus we have
+ * to put a negative value into orig_eax here. (the problem is that
+ * both system calls and IRQs want to have small integer numbers in
+ * orig_eax, and the syscall code has won the optimization conflict ;)
+ *
+ * Subtle as a pigs ear.  VY
+ */
+
+#define BUILD_IRQ(nr) \
+asmlinkage void IRQ_NAME(nr); \
+__asm__( \
+"\n"__ALIGN_STR"\n" \
+SYMBOL_NAME_STR(IRQ) #nr "_interrupt:\n\t" \
+       "pushl $"#nr"-256\n\t" \
+       "jmp common_interrupt");
+
+extern unsigned long prof_cpu_mask;
+extern unsigned int * prof_buffer;
+extern unsigned long prof_len;
+extern unsigned long prof_shift;
+
+#include <xeno/irq.h>
+
+#ifdef CONFIG_SMP /*more of this file should probably be ifdefed SMP */
+static inline void hw_resend_irq(struct hw_interrupt_type *h, unsigned int i) {
+       if (IO_APIC_IRQ(i))
+               send_IPI_self(IO_APIC_VECTOR(i));
+}
+#else
+static inline void hw_resend_irq(struct hw_interrupt_type *h, unsigned int i) {}
+#endif
+
+#endif /* _ASM_HW_IRQ_H */
diff --git a/xen-2.4.16/include/asm-i386/mc146818rtc.h b/xen-2.4.16/include/asm-i386/mc146818rtc.h
new file mode 100644 (file)
index 0000000..03a4efa
--- /dev/null
@@ -0,0 +1,113 @@
+/*
+ * Machine dependent access functions for RTC registers.
+ */
+#ifndef _ASM_MC146818RTC_H
+#define _ASM_MC146818RTC_H
+
+#include <asm/io.h>
+#include <xeno/spinlock.h>
+
+extern spinlock_t rtc_lock;             /* serialize CMOS RAM access */
+
+/**********************************************************************
+ * register summary
+ **********************************************************************/
+#define RTC_SECONDS             0
+#define RTC_SECONDS_ALARM       1
+#define RTC_MINUTES             2
+#define RTC_MINUTES_ALARM       3
+#define RTC_HOURS               4
+#define RTC_HOURS_ALARM         5
+/* RTC_*_alarm is always true if 2 MSBs are set */
+# define RTC_ALARM_DONT_CARE    0xC0
+
+#define RTC_DAY_OF_WEEK         6
+#define RTC_DAY_OF_MONTH        7
+#define RTC_MONTH               8
+#define RTC_YEAR                9
+
+/* control registers - Moto names
+ */
+#define RTC_REG_A               10
+#define RTC_REG_B               11
+#define RTC_REG_C               12
+#define RTC_REG_D               13
+
+/**********************************************************************
+ * register details
+ **********************************************************************/
+#define RTC_FREQ_SELECT RTC_REG_A
+
+/* update-in-progress  - set to "1" 244 microsecs before RTC goes off the bus,
+ * reset after update (may take 1.984ms @ 32768Hz RefClock) is complete,
+ * totalling to a max high interval of 2.228 ms.
+ */
+# define RTC_UIP                0x80
+# define RTC_DIV_CTL            0x70
+   /* divider control: refclock values 4.194 / 1.049 MHz / 32.768 kHz */
+#  define RTC_REF_CLCK_4MHZ     0x00
+#  define RTC_REF_CLCK_1MHZ     0x10
+#  define RTC_REF_CLCK_32KHZ    0x20
+   /* 2 values for divider stage reset, others for "testing purposes only" */
+#  define RTC_DIV_RESET1        0x60
+#  define RTC_DIV_RESET2        0x70
+  /* Periodic intr. / Square wave rate select. 0=none, 1=32.8kHz,... 15=2Hz */
+# define RTC_RATE_SELECT        0x0F
+
+/**********************************************************************/
+#define RTC_CONTROL     RTC_REG_B
+# define RTC_SET 0x80           /* disable updates for clock setting */
+# define RTC_PIE 0x40           /* periodic interrupt enable */
+# define RTC_AIE 0x20           /* alarm interrupt enable */
+# define RTC_UIE 0x10           /* update-finished interrupt enable */
+# define RTC_SQWE 0x08          /* enable square-wave output */
+# define RTC_DM_BINARY 0x04     /* all time/date values are BCD if clear */
+# define RTC_24H 0x02           /* 24 hour mode - else hours bit 7 means pm */
+# define RTC_DST_EN 0x01        /* auto switch DST - works f. USA only */
+
+/**********************************************************************/
+#define RTC_INTR_FLAGS  RTC_REG_C
+/* caution - cleared by read */
+# define RTC_IRQF 0x80          /* any of the following 3 is active */
+# define RTC_PF 0x40
+# define RTC_AF 0x20
+# define RTC_UF 0x10
+
+/**********************************************************************/
+#define RTC_VALID       RTC_REG_D
+# define RTC_VRT 0x80           /* valid RAM and time */
+/**********************************************************************/
+
+/* example: !(CMOS_READ(RTC_CONTROL) & RTC_DM_BINARY) 
+ * determines if the following two #defines are needed
+ */
+#ifndef BCD_TO_BIN
+#define BCD_TO_BIN(val) ((val)=((val)&15) + ((val)>>4)*10)
+#endif
+
+#ifndef BIN_TO_BCD
+#define BIN_TO_BCD(val) ((val)=(((val)/10)<<4) + (val)%10)
+#endif
+
+
+#ifndef RTC_PORT
+#define RTC_PORT(x)    (0x70 + (x))
+#define RTC_ALWAYS_BCD 1       /* RTC operates in binary mode */
+#endif
+
+/*
+ * The yet supported machines all access the RTC index register via
+ * an ISA port access but the way to access the date register differs ...
+ */
+#define CMOS_READ(addr) ({ \
+outb_p((addr),RTC_PORT(0)); \
+inb_p(RTC_PORT(1)); \
+})
+#define CMOS_WRITE(val, addr) ({ \
+outb_p((addr),RTC_PORT(0)); \
+outb_p((val),RTC_PORT(1)); \
+})
+
+#define RTC_IRQ 8
+
+#endif /* _ASM_MC146818RTC_H */
diff --git a/xen-2.4.16/include/asm-i386/mpspec.h b/xen-2.4.16/include/asm-i386/mpspec.h
new file mode 100644 (file)
index 0000000..2598ea0
--- /dev/null
@@ -0,0 +1,211 @@
+#ifndef __ASM_MPSPEC_H
+#define __ASM_MPSPEC_H
+
+/*
+ * Structure definitions for SMP machines following the
+ * Intel Multiprocessing Specification 1.1 and 1.4.
+ */
+
+/*
+ * This tag identifies where the SMP configuration
+ * information is. 
+ */
+#define SMP_MAGIC_IDENT        (('_'<<24)|('P'<<16)|('M'<<8)|'_')
+
+/* Maximum of 16 APICs with the current APIC ID architecture. */
+#define MAX_APICS 16
+
+#define MAX_MPC_ENTRY 1024
+
+struct intel_mp_floating
+{
+       char mpf_signature[4];          /* "_MP_"                       */
+       unsigned long mpf_physptr;      /* Configuration table address  */
+       unsigned char mpf_length;       /* Our length (paragraphs)      */
+       unsigned char mpf_specification;/* Specification version        */
+       unsigned char mpf_checksum;     /* Checksum (makes sum 0)       */
+       unsigned char mpf_feature1;     /* Standard or configuration ?  */
+       unsigned char mpf_feature2;     /* Bit7 set for IMCR|PIC        */
+       unsigned char mpf_feature3;     /* Unused (0)                   */
+       unsigned char mpf_feature4;     /* Unused (0)                   */
+       unsigned char mpf_feature5;     /* Unused (0)                   */
+};
+
+struct mp_config_table
+{
+       char mpc_signature[4];
+#define MPC_SIGNATURE "PCMP"
+       unsigned short mpc_length;      /* Size of table */
+       char  mpc_spec;                 /* 0x01 */
+       char  mpc_checksum;
+       char  mpc_oem[8];
+       char  mpc_productid[12];
+       unsigned long mpc_oemptr;       /* 0 if not present */
+       unsigned short mpc_oemsize;     /* 0 if not present */
+       unsigned short mpc_oemcount;
+       unsigned long mpc_lapic;        /* APIC address */
+       unsigned long reserved;
+};
+
+/* Followed by entries */
+
+#define        MP_PROCESSOR    0
+#define        MP_BUS          1
+#define        MP_IOAPIC       2
+#define        MP_INTSRC       3
+#define        MP_LINTSRC      4
+#define        MP_TRANSLATION  192  /* Used by IBM NUMA-Q to describe node locality */
+
+struct mpc_config_processor
+{
+       unsigned char mpc_type;
+       unsigned char mpc_apicid;       /* Local APIC number */
+       unsigned char mpc_apicver;      /* Its versions */
+       unsigned char mpc_cpuflag;
+#define CPU_ENABLED            1       /* Processor is available */
+#define CPU_BOOTPROCESSOR      2       /* Processor is the BP */
+       unsigned long mpc_cpufeature;           
+#define CPU_STEPPING_MASK 0x0F
+#define CPU_MODEL_MASK 0xF0
+#define CPU_FAMILY_MASK        0xF00
+       unsigned long mpc_featureflag;  /* CPUID feature value */
+       unsigned long mpc_reserved[2];
+};
+
+struct mpc_config_bus
+{
+       unsigned char mpc_type;
+       unsigned char mpc_busid;
+       unsigned char mpc_bustype[6] __attribute((packed));
+};
+
+/* List of Bus Type string values, Intel MP Spec. */
+#define BUSTYPE_EISA   "EISA"
+#define BUSTYPE_ISA    "ISA"
+#define BUSTYPE_INTERN "INTERN"        /* Internal BUS */
+#define BUSTYPE_MCA    "MCA"
+#define BUSTYPE_VL     "VL"            /* Local bus */
+#define BUSTYPE_PCI    "PCI"
+#define BUSTYPE_PCMCIA "PCMCIA"
+#define BUSTYPE_CBUS   "CBUS"
+#define BUSTYPE_CBUSII "CBUSII"
+#define BUSTYPE_FUTURE "FUTURE"
+#define BUSTYPE_MBI    "MBI"
+#define BUSTYPE_MBII   "MBII"
+#define BUSTYPE_MPI    "MPI"
+#define BUSTYPE_MPSA   "MPSA"
+#define BUSTYPE_NUBUS  "NUBUS"
+#define BUSTYPE_TC     "TC"
+#define BUSTYPE_VME    "VME"
+#define BUSTYPE_XPRESS "XPRESS"
+
+struct mpc_config_ioapic
+{
+       unsigned char mpc_type;
+       unsigned char mpc_apicid;
+       unsigned char mpc_apicver;
+       unsigned char mpc_flags;
+#define MPC_APIC_USABLE                0x01
+       unsigned long mpc_apicaddr;
+};
+
+struct mpc_config_intsrc
+{
+       unsigned char mpc_type;
+       unsigned char mpc_irqtype;
+       unsigned short mpc_irqflag;
+       unsigned char mpc_srcbus;
+       unsigned char mpc_srcbusirq;
+       unsigned char mpc_dstapic;
+       unsigned char mpc_dstirq;
+};
+
+enum mp_irq_source_types {
+       mp_INT = 0,
+       mp_NMI = 1,
+       mp_SMI = 2,
+       mp_ExtINT = 3
+};
+
+#define MP_IRQDIR_DEFAULT      0
+#define MP_IRQDIR_HIGH         1
+#define MP_IRQDIR_LOW          3
+
+
+struct mpc_config_lintsrc
+{
+       unsigned char mpc_type;
+       unsigned char mpc_irqtype;
+       unsigned short mpc_irqflag;
+       unsigned char mpc_srcbusid;
+       unsigned char mpc_srcbusirq;
+       unsigned char mpc_destapic;     
+#define MP_APIC_ALL    0xFF
+       unsigned char mpc_destapiclint;
+};
+
+struct mp_config_oemtable
+{
+       char oem_signature[4];
+#define MPC_OEM_SIGNATURE "_OEM"
+       unsigned short oem_length;      /* Size of table */
+       char  oem_rev;                  /* 0x01 */
+       char  oem_checksum;
+       char  mpc_oem[8];
+};
+
+struct mpc_config_translation
+{
+        unsigned char mpc_type;
+        unsigned char trans_len;
+        unsigned char trans_type;
+        unsigned char trans_quad;
+        unsigned char trans_global;
+        unsigned char trans_local;
+        unsigned short trans_reserved;
+};
+
+/*
+ *     Default configurations
+ *
+ *     1       2 CPU ISA 82489DX
+ *     2       2 CPU EISA 82489DX neither IRQ 0 timer nor IRQ 13 DMA chaining
+ *     3       2 CPU EISA 82489DX
+ *     4       2 CPU MCA 82489DX
+ *     5       2 CPU ISA+PCI
+ *     6       2 CPU EISA+PCI
+ *     7       2 CPU MCA+PCI
+ */
+
+#define MAX_IRQ_SOURCES 256
+
+#define MAX_MP_BUSSES 32
+enum mp_bustype {
+       MP_BUS_ISA = 1,
+       MP_BUS_EISA,
+       MP_BUS_PCI,
+       MP_BUS_MCA
+};
+extern int mp_bus_id_to_type [MAX_MP_BUSSES];
+extern int mp_bus_id_to_pci_bus [MAX_MP_BUSSES];
+
+extern unsigned int boot_cpu_physical_apicid;
+extern unsigned long phys_cpu_present_map;
+extern int smp_found_config;
+extern void find_smp_config (void);
+extern void get_smp_config (void);
+extern int nr_ioapics;
+extern int apic_version [MAX_APICS];
+extern int mp_bus_id_to_type [MAX_MP_BUSSES];
+extern int mp_irq_entries;
+extern struct mpc_config_intsrc mp_irqs [MAX_IRQ_SOURCES];
+extern int mpc_default_type;
+extern int mp_bus_id_to_pci_bus [MAX_MP_BUSSES];
+extern int mp_current_pci_id;
+extern unsigned long mp_lapic_addr;
+extern int pic_mode;
+extern int using_apic_timer;
+
+#endif
+
diff --git a/xen-2.4.16/include/asm-i386/msr.h b/xen-2.4.16/include/asm-i386/msr.h
new file mode 100644 (file)
index 0000000..11bcb7f
--- /dev/null
@@ -0,0 +1,104 @@
+#ifndef __ASM_MSR_H
+#define __ASM_MSR_H
+
+/*
+ * Access to machine-specific registers (available on 586 and better only)
+ * Note: the rd* operations modify the parameters directly (without using
+ * pointer indirection), this allows gcc to optimize better
+ */
+
+#define rdmsr(msr,val1,val2) \
+     __asm__ __volatile__("rdmsr" \
+                         : "=a" (val1), "=d" (val2) \
+                         : "c" (msr))
+
+#define wrmsr(msr,val1,val2) \
+     __asm__ __volatile__("wrmsr" \
+                         : /* no outputs */ \
+                         : "c" (msr), "a" (val1), "d" (val2))
+
+#define rdtsc(low,high) \
+     __asm__ __volatile__("rdtsc" : "=a" (low), "=d" (high))
+
+#define rdtscl(low) \
+     __asm__ __volatile__("rdtsc" : "=a" (low) : : "edx")
+
+#define rdtscll(val) \
+     __asm__ __volatile__("rdtsc" : "=A" (val))
+
+#define write_tsc(val1,val2) wrmsr(0x10, val1, val2)
+
+#define rdpmc(counter,low,high) \
+     __asm__ __volatile__("rdpmc" \
+                         : "=a" (low), "=d" (high) \
+                         : "c" (counter))
+
+/* symbolic names for some interesting MSRs */
+/* Intel defined MSRs. */
+#define MSR_IA32_P5_MC_ADDR            0
+#define MSR_IA32_P5_MC_TYPE            1
+#define MSR_IA32_PLATFORM_ID           0x17
+#define MSR_IA32_EBL_CR_POWERON                0x2a
+
+#define MSR_IA32_APICBASE              0x1b
+#define MSR_IA32_APICBASE_BSP          (1<<8)
+#define MSR_IA32_APICBASE_ENABLE       (1<<11)
+#define MSR_IA32_APICBASE_BASE         (0xfffff<<12)
+
+#define MSR_IA32_UCODE_WRITE           0x79
+#define MSR_IA32_UCODE_REV             0x8b
+
+#define MSR_IA32_PERFCTR0              0xc1
+#define MSR_IA32_PERFCTR1              0xc2
+
+#define MSR_IA32_BBL_CR_CTL            0x119
+
+#define MSR_IA32_MCG_CAP               0x179
+#define MSR_IA32_MCG_STATUS            0x17a
+#define MSR_IA32_MCG_CTL               0x17b
+
+#define MSR_IA32_EVNTSEL0              0x186
+#define MSR_IA32_EVNTSEL1              0x187
+
+#define MSR_IA32_DEBUGCTLMSR           0x1d9
+#define MSR_IA32_LASTBRANCHFROMIP      0x1db
+#define MSR_IA32_LASTBRANCHTOIP                0x1dc
+#define MSR_IA32_LASTINTFROMIP         0x1dd
+#define MSR_IA32_LASTINTTOIP           0x1de
+
+#define MSR_IA32_MC0_CTL               0x400
+#define MSR_IA32_MC0_STATUS            0x401
+#define MSR_IA32_MC0_ADDR              0x402
+#define MSR_IA32_MC0_MISC              0x403
+
+/* AMD Defined MSRs */
+#define MSR_K6_EFER                    0xC0000080
+#define MSR_K6_STAR                    0xC0000081
+#define MSR_K6_WHCR                    0xC0000082
+#define MSR_K6_UWCCR                   0xC0000085
+#define MSR_K6_PSOR                    0xC0000087
+#define MSR_K6_PFIR                    0xC0000088
+
+#define MSR_K7_EVNTSEL0                        0xC0010000
+#define MSR_K7_PERFCTR0                        0xC0010004
+
+/* Centaur-Hauls/IDT defined MSRs. */
+#define MSR_IDT_FCR1                   0x107
+#define MSR_IDT_FCR2                   0x108
+#define MSR_IDT_FCR3                   0x109
+#define MSR_IDT_FCR4                   0x10a
+
+#define MSR_IDT_MCR0                   0x110
+#define MSR_IDT_MCR1                   0x111
+#define MSR_IDT_MCR2                   0x112
+#define MSR_IDT_MCR3                   0x113
+#define MSR_IDT_MCR4                   0x114
+#define MSR_IDT_MCR5                   0x115
+#define MSR_IDT_MCR6                   0x116
+#define MSR_IDT_MCR7                   0x117
+#define MSR_IDT_MCR_CTRL               0x120
+
+/* VIA Cyrix defined MSRs*/
+#define MSR_VIA_FCR                    0x1107
+
+#endif /* __ASM_MSR_H */
diff --git a/xen-2.4.16/include/asm-i386/page.h b/xen-2.4.16/include/asm-i386/page.h
new file mode 100644 (file)
index 0000000..ac1304c
--- /dev/null
@@ -0,0 +1,169 @@
+#ifndef _I386_PAGE_H
+#define _I386_PAGE_H
+
+
+#ifndef __ASSEMBLY__
+#ifdef CONFIG_DEBUG_BUGVERBOSE
+extern void do_BUG(const char *file, int line);
+#define BUG() do {                                     \
+       do_BUG(__FILE__, __LINE__);                     \
+       __asm__ __volatile__("ud2");                    \
+} while (0)
+#else
+#include <xeno/lib.h>
+#define BUG() (panic("BUG at %s:%d\n", __FILE__, __LINE__))
+#endif
+#endif /* __ASSEMBLY__ */
+
+
+#define L1_PAGETABLE_SHIFT       12
+#define L2_PAGETABLE_SHIFT       22
+
+#define ENTRIES_PER_L1_PAGETABLE 1024
+#define ENTRIES_PER_L2_PAGETABLE 1024
+
+#define PAGE_SHIFT               L1_PAGETABLE_SHIFT
+#define PAGE_SIZE               (1UL << PAGE_SHIFT)
+#define PAGE_MASK               (~(PAGE_SIZE-1))
+
+#define clear_page(_p)           memset((void *)(_p), 0, PAGE_SIZE)
+#define copy_page(_t,_f)         memcpy((void *)(_t), (void *)(_f), PAGE_SIZE)
+
+#ifndef __ASSEMBLY__
+#include <xeno/config.h>
+typedef struct { unsigned long l1_lo; } l1_pgentry_t;
+typedef struct { unsigned long l2_lo; } l2_pgentry_t;
+typedef l1_pgentry_t *l1_pagetable_t;
+typedef l2_pgentry_t *l2_pagetable_t;
+typedef struct { unsigned long pt_lo; } pagetable_t;
+#endif /* !__ASSEMBLY__ */
+
+/* Strip type from a table entry. */
+#define l1_pgentry_val(_x) ((_x).l1_lo)
+#define l2_pgentry_val(_x) ((_x).l2_lo)
+
+#define alloc_l1_pagetable()  ((l1_pgentry_t *)get_free_page(GFP_KERNEL))
+#define alloc_l2_pagetable()  ((l2_pgentry_t *)get_free_page(GFP_KERNEL))
+
+#define pagetable_ptr(_x)  ((l2_pagetable_t)((_x).pt_lo))
+#define pagetable_type(_x) (((_x).pt_lo) & ~PAGE_MASK)
+#define mk_pagetable(_x)   ( (pagetable_t) { (_x) } )
+#define pagetable_none(_x) ((_x).pt_lo == 0)
+
+/* Add type to a table entry. */
+#define mk_l1_pgentry(_x)  ( (l1_pgentry_t) { (_x) } )
+#define mk_l2_pgentry(_x)  ( (l2_pgentry_t) { (_x) } )
+
+/* Turn a typed table entry into a page index. */
+#define l1_pgentry_to_pagenr(_x) (l1_pgentry_val(_x) >> PAGE_SHIFT) 
+#define l2_pgentry_to_pagenr(_x) (l2_pgentry_val(_x) >> PAGE_SHIFT)
+
+/* Dereference a typed level-2 entry to yield a typed level-1 table. */
+#define l2_pgentry_to_l1(_x)     \
+  ((l1_pgentry_t *)__va(l2_pgentry_val(_x) & PAGE_MASK))
+
+/* Given a virtual address, get an entry offset into a page table. */
+#define l1_table_offset(_a) \
+  (((_a) >> L1_PAGETABLE_SHIFT) & (ENTRIES_PER_L1_PAGETABLE - 1))
+#define l2_table_offset(_a) \
+  ((_a) >> L2_PAGETABLE_SHIFT)
+
+/* Hypervisor table entries use zero to sugnify 'empty'. */
+#define l1_pgentry_empty(_x) (!l1_pgentry_val(_x))
+#define l2_pgentry_empty(_x) (!l2_pgentry_val(_x))
+
+#define __PAGE_OFFSET          (0xE0000000)
+#define PAGE_OFFSET            ((unsigned long)__PAGE_OFFSET)
+#define __pa(x)                        ((unsigned long)(x)-PAGE_OFFSET)
+#define __va(x)                        ((void *)((unsigned long)(x)+PAGE_OFFSET))
+#define page_address(_p)        (__va(((_p) - frame_table) << PAGE_SHIFT))
+#define virt_to_page(kaddr)    (frame_table + (__pa(kaddr) >> PAGE_SHIFT))
+#define VALID_PAGE(page)       ((page - frame_table) < max_mapnr)
+
+/* High table entries are reserved by the hypervisor. */
+#define DOMAIN_ENTRIES_PER_L2_PAGETABLE            \
+  (PAGE_OFFSET >> L2_PAGETABLE_SHIFT)
+#define HYPERVISOR_ENTRIES_PER_L2_PAGETABLE \
+  (ENTRIES_PER_L2_PAGETABLE - DOMAIN_ENTRIES_PER_L2_PAGETABLE)
+
+#ifndef __ASSEMBLY__
+#include <asm/processor.h>
+#include <asm/fixmap.h>
+#include <asm/bitops.h>
+
+extern l2_pgentry_t idle0_pg_table[ENTRIES_PER_L2_PAGETABLE];
+extern void paging_init(void);
+
+#define __flush_tlb()                                                  \
+       do {                                                            \
+               unsigned int tmpreg;                                    \
+                                                                       \
+               __asm__ __volatile__(                                   \
+                       "movl %%cr3, %0;  # flush TLB \n"               \
+                       "movl %0, %%cr3;              \n"               \
+                       : "=r" (tmpreg)                                 \
+                       :: "memory");                                   \
+       } while (0)
+
+/* Flush global pages as well. */
+#define __flush_tlb_all()                                              \
+       do {                                                            \
+               unsigned int tmpreg;                                    \
+                                                                       \
+               __asm__ __volatile__(                                   \
+                       "movl %1, %%cr4;  # turn off PGE     \n"        \
+                       "movl %%cr3, %0;  # flush TLB        \n"        \
+                       "movl %0, %%cr3;                     \n"        \
+                       "movl %2, %%cr4;  # turn PGE back on \n"        \
+                       : "=&r" (tmpreg)                                \
+                       : "r" (mmu_cr4_features & ~X86_CR4_PGE),        \
+                         "r" (mmu_cr4_features)                        \
+                       : "memory");                                    \
+       } while (0)
+
+#define __flush_tlb_one(addr) \
+__asm__ __volatile__("invlpg %0": :"m" (*(char *) addr))
+
+#endif /* !__ASSEMBLY__ */
+
+
+#define _PAGE_PRESENT  0x001
+#define _PAGE_RW       0x002
+#define _PAGE_USER     0x004
+#define _PAGE_PWT      0x008
+#define _PAGE_PCD      0x010
+#define _PAGE_ACCESSED 0x020
+#define _PAGE_DIRTY    0x040
+#define _PAGE_PAT       0x080
+#define _PAGE_PSE      0x080
+#define _PAGE_GLOBAL   0x100
+
+#define __PAGE_HYPERVISOR \
+       (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED)
+#define __PAGE_HYPERVISOR_NOCACHE \
+       (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_PCD | _PAGE_ACCESSED)
+#define __PAGE_HYPERVISOR_RO \
+       (_PAGE_PRESENT | _PAGE_DIRTY | _PAGE_ACCESSED)
+
+#define MAKE_GLOBAL(_x) ((_x) | _PAGE_GLOBAL)
+
+#define PAGE_HYPERVISOR MAKE_GLOBAL(__PAGE_HYPERVISOR)
+#define PAGE_HYPERVISOR_RO MAKE_GLOBAL(__PAGE_HYPERVISOR_RO)
+#define PAGE_HYPERVISOR_NOCACHE MAKE_GLOBAL(__PAGE_HYPERVISOR_NOCACHE)
+
+#ifndef __ASSEMBLY__
+static __inline__ int get_order(unsigned long size)
+{
+    int order;
+    
+    size = (size-1) >> (PAGE_SHIFT-1);
+    order = -1;
+    do {
+        size >>= 1;
+        order++;
+    } while (size);
+    return order;
+}
+#endif
+
+#endif /* _I386_PAGE_H */
diff --git a/xen-2.4.16/include/asm-i386/pci.h b/xen-2.4.16/include/asm-i386/pci.h
new file mode 100644 (file)
index 0000000..9ab9c28
--- /dev/null
@@ -0,0 +1,268 @@
+#ifndef __i386_PCI_H
+#define __i386_PCI_H
+
+#include <linux/config.h>
+
+#ifdef __KERNEL__
+
+/* Can be used to override the logic in pci_scan_bus for skipping
+   already-configured bus numbers - to be used for buggy BIOSes
+   or architectures with incomplete PCI setup by the loader */
+
+#ifdef CONFIG_PCI
+extern unsigned int pcibios_assign_all_busses(void);
+#else
+#define pcibios_assign_all_busses()    0
+#endif
+
+extern unsigned long pci_mem_start;
+#define PCIBIOS_MIN_IO         0x1000
+#define PCIBIOS_MIN_MEM                (pci_mem_start)
+
+void pcibios_set_master(struct pci_dev *dev);
+void pcibios_penalize_isa_irq(int irq);
+struct irq_routing_table *pcibios_get_irq_routing_table(void);
+int pcibios_set_irq_routing(struct pci_dev *dev, int pin, int irq);
+
+/* Dynamic DMA mapping stuff.
+ * i386 has everything mapped statically.
+ */
+
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <asm/scatterlist.h>
+//#include <linux/string.h>
+#include <asm/io.h>
+
+struct pci_dev;
+
+/* The PCI address space does equal the physical memory
+ * address space.  The networking and block device layers use
+ * this boolean for bounce buffer decisions.
+ */
+#define PCI_DMA_BUS_IS_PHYS    (1)
+
+/* Allocate and map kernel buffer using consistent mode DMA for a device.
+ * hwdev should be valid struct pci_dev pointer for PCI devices,
+ * NULL for PCI-like buses (ISA, EISA).
+ * Returns non-NULL cpu-view pointer to the buffer if successful and
+ * sets *dma_addrp to the pci side dma address as well, else *dma_addrp
+ * is undefined.
+ */
+extern void *pci_alloc_consistent(struct pci_dev *hwdev, size_t size,
+                                 dma_addr_t *dma_handle);
+
+/* Free and unmap a consistent DMA buffer.
+ * cpu_addr is what was returned from pci_alloc_consistent,
+ * size must be the same as what as passed into pci_alloc_consistent,
+ * and likewise dma_addr must be the same as what *dma_addrp was set to.
+ *
+ * References to the memory and mappings associated with cpu_addr/dma_addr
+ * past this call are illegal.
+ */
+extern void pci_free_consistent(struct pci_dev *hwdev, size_t size,
+                               void *vaddr, dma_addr_t dma_handle);
+
+/* Map a single buffer of the indicated size for DMA in streaming mode.
+ * The 32-bit bus address to use is returned.
+ *
+ * Once the device is given the dma address, the device owns this memory
+ * until either pci_unmap_single or pci_dma_sync_single is performed.
+ */
+static inline dma_addr_t pci_map_single(struct pci_dev *hwdev, void *ptr,
+                                       size_t size, int direction)
+{
+       if (direction == PCI_DMA_NONE)
+               BUG();
+       flush_write_buffers();
+       return virt_to_bus(ptr);
+}
+
+/* Unmap a single streaming mode DMA translation.  The dma_addr and size
+ * must match what was provided for in a previous pci_map_single call.  All
+ * other usages are undefined.
+ *
+ * After this call, reads by the cpu to the buffer are guarenteed to see
+ * whatever the device wrote there.
+ */
+static inline void pci_unmap_single(struct pci_dev *hwdev, dma_addr_t dma_addr,
+                                   size_t size, int direction)
+{
+       if (direction == PCI_DMA_NONE)
+               BUG();
+       /* Nothing to do */
+}
+
+/*
+ * pci_{map,unmap}_single_page maps a kernel page to a dma_addr_t. identical
+ * to pci_map_single, but takes a struct page instead of a virtual address
+ */
+static inline dma_addr_t pci_map_page(struct pci_dev *hwdev, struct pfn_info *page,
+                                     unsigned long offset, size_t size, int direction)
+{
+       if (direction == PCI_DMA_NONE)
+               BUG();
+
+       return (page - frame_table) * PAGE_SIZE + offset;
+}
+
+static inline void pci_unmap_page(struct pci_dev *hwdev, dma_addr_t dma_address,
+                                 size_t size, int direction)
+{
+       if (direction == PCI_DMA_NONE)
+               BUG();
+       /* Nothing to do */
+}
+
+/* Map a set of buffers described by scatterlist in streaming
+ * mode for DMA.  This is the scather-gather version of the
+ * above pci_map_single interface.  Here the scatter gather list
+ * elements are each tagged with the appropriate dma address
+ * and length.  They are obtained via sg_dma_{address,length}(SG).
+ *
+ * NOTE: An implementation may be able to use a smaller number of
+ *       DMA address/length pairs than there are SG table elements.
+ *       (for example via virtual mapping capabilities)
+ *       The routine returns the number of addr/length pairs actually
+ *       used, at most nents.
+ *
+ * Device ownership issues as mentioned above for pci_map_single are
+ * the same here.
+ */
+static inline int pci_map_sg(struct pci_dev *hwdev, struct scatterlist *sg,
+                            int nents, int direction)
+{
+       int i;
+
+       if (direction == PCI_DMA_NONE)
+               BUG();
+       /*
+        * temporary 2.4 hack
+        */
+       for (i = 0; i < nents; i++ ) {
+               if (sg[i].address && sg[i].page)
+                       BUG();
+               else if (!sg[i].address && !sg[i].page)
+                       BUG();
+               if (sg[i].address)
+                       sg[i].dma_address = virt_to_bus(sg[i].address);
+               else
+                       sg[i].dma_address = page_to_bus(sg[i].page) + sg[i].offset;
+       }
+       flush_write_buffers();
+       return nents;
+}
+
+/* Unmap a set of streaming mode DMA translations.
+ * Again, cpu read rules concerning calls here are the same as for
+ * pci_unmap_single() above.
+ */
+static inline void pci_unmap_sg(struct pci_dev *hwdev, struct scatterlist *sg,
+                               int nents, int direction)
+{
+       if (direction == PCI_DMA_NONE)
+               BUG();
+       /* Nothing to do */
+}
+
+/* Make physical memory consistent for a single
+ * streaming mode DMA translation after a transfer.
+ *
+ * If you perform a pci_map_single() but wish to interrogate the
+ * buffer using the cpu, yet do not wish to teardown the PCI dma
+ * mapping, you must call this function before doing so.  At the
+ * next point you give the PCI dma address back to the card, the
+ * device again owns the buffer.
+ */
+static inline void pci_dma_sync_single(struct pci_dev *hwdev,
+                                      dma_addr_t dma_handle,
+                                      size_t size, int direction)
+{
+       if (direction == PCI_DMA_NONE)
+               BUG();
+       flush_write_buffers();
+}
+
+/* Make physical memory consistent for a set of streaming
+ * mode DMA translations after a transfer.
+ *
+ * The same as pci_dma_sync_single but for a scatter-gather list,
+ * same rules and usage.
+ */
+static inline void pci_dma_sync_sg(struct pci_dev *hwdev,
+                                  struct scatterlist *sg,
+                                  int nelems, int direction)
+{
+       if (direction == PCI_DMA_NONE)
+               BUG();
+       flush_write_buffers();
+}
+
+/* Return whether the given PCI device DMA address mask can
+ * be supported properly.  For example, if your device can
+ * only drive the low 24-bits during PCI bus mastering, then
+ * you would pass 0x00ffffff as the mask to this function.
+ */
+static inline int pci_dma_supported(struct pci_dev *hwdev, u64 mask)
+{
+        /*
+         * we fall back to GFP_DMA when the mask isn't all 1s,
+         * so we can't guarantee allocations that must be
+         * within a tighter range than GFP_DMA..
+         */
+        if(mask < 0x00ffffff)
+                return 0;
+
+       return 1;
+}
+
+/* This is always fine. */
+#define pci_dac_dma_supported(pci_dev, mask)   (1)
+
+static __inline__ dma64_addr_t
+pci_dac_page_to_dma(struct pci_dev *pdev, struct pfn_info *page, unsigned long offset, int direction)
+{
+       return ((dma64_addr_t) page_to_bus(page) +
+               (dma64_addr_t) offset);
+}
+
+static __inline__ struct pfn_info *
+pci_dac_dma_to_page(struct pci_dev *pdev, dma64_addr_t dma_addr)
+{
+       unsigned long poff = (dma_addr >> PAGE_SHIFT);
+
+       return frame_table + poff;
+}
+
+static __inline__ unsigned long
+pci_dac_dma_to_offset(struct pci_dev *pdev, dma64_addr_t dma_addr)
+{
+       return (dma_addr & ~PAGE_MASK);
+}
+
+static __inline__ void
+pci_dac_dma_sync_single(struct pci_dev *pdev, dma64_addr_t dma_addr, size_t len, int direction)
+{
+       flush_write_buffers();
+}
+
+/* These macros should be used after a pci_map_sg call has been done
+ * to get bus addresses of each of the SG entries and their lengths.
+ * You should only work with the number of sg entries pci_map_sg
+ * returns.
+ */
+#define sg_dma_address(sg)     ((sg)->dma_address)
+#define sg_dma_len(sg)         ((sg)->length)
+
+/* Return the index of the PCI controller for device. */
+static inline int pci_controller_num(struct pci_dev *dev)
+{
+       return 0;
+}
+
+#endif /* __KERNEL__ */
+
+#endif /* __i386_PCI_H */
diff --git a/xen-2.4.16/include/asm-i386/pgalloc.h b/xen-2.4.16/include/asm-i386/pgalloc.h
new file mode 100644 (file)
index 0000000..fcba5e1
--- /dev/null
@@ -0,0 +1,117 @@
+#ifndef _I386_PGALLOC_H
+#define _I386_PGALLOC_H
+
+#include <xeno/config.h>
+#include <xeno/sched.h>
+#include <asm/processor.h>
+#include <asm/fixmap.h>
+
+#define pgd_quicklist (current_cpu_data.pgd_quick)
+#define pmd_quicklist (current_cpu_data.pmd_quick)
+#define pte_quicklist (current_cpu_data.pte_quick)
+#define pgtable_cache_size (current_cpu_data.pgtable_cache_sz)
+
+
+/*
+ * Allocate and free page tables.
+ */
+
+
+#define pte_free(pte)          pte_free_fast(pte)
+#define pgd_alloc(mm)          get_pgd_fast()
+#define pgd_free(pgd)          free_pgd_fast(pgd)
+
+/*
+ * allocating and freeing a pmd is trivial: the 1-entry pmd is
+ * inside the pgd, so has no extra memory associated with it.
+ * (In the PAE case we free the pmds as part of the pgd.)
+ */
+
+#define pmd_alloc_one_fast(mm, addr)   ({ BUG(); ((pmd_t *)1); })
+#define pmd_alloc_one(mm, addr)                ({ BUG(); ((pmd_t *)2); })
+#define pmd_free_slow(x)               do { } while (0)
+#define pmd_free_fast(x)               do { } while (0)
+#define pmd_free(x)                    do { } while (0)
+#define pgd_populate(mm, pmd, pte)     BUG()
+
+/*
+ * TLB flushing:
+ *
+ *  - flush_tlb() flushes the current mm struct TLBs
+ *  - flush_tlb_all() flushes all processes TLBs
+ *  - flush_tlb_mm(mm) flushes the specified mm context TLB's
+ *  - flush_tlb_page(vma, vmaddr) flushes one page
+ *  - flush_tlb_range(mm, start, end) flushes a range of pages
+ *  - flush_tlb_pgtables(mm, start, end) flushes a range of page tables
+ *
+ * ..but the i386 has somewhat limited tlb flushing capabilities,
+ * and page-granular flushes are available only on i486 and up.
+ */
+
+#ifndef CONFIG_SMP
+
+#define flush_tlb() __flush_tlb()
+#define flush_tlb_all() __flush_tlb_all()
+#define local_flush_tlb() __flush_tlb()
+
+static inline void flush_tlb_mm(struct mm_struct *mm)
+{
+       if (mm == current->active_mm)
+               __flush_tlb();
+}
+
+#if 0
+static inline void flush_tlb_page(struct vm_area_struct *vma,
+       unsigned long addr)
+{
+       if (vma->vm_mm == current->active_mm)
+               __flush_tlb_one(addr);
+}
+#endif
+
+static inline void flush_tlb_range(struct mm_struct *mm,
+       unsigned long start, unsigned long end)
+{
+       if (mm == current->active_mm)
+               __flush_tlb();
+}
+
+#else
+
+#include <xeno/smp.h>
+
+#define local_flush_tlb() \
+       __flush_tlb()
+
+extern void flush_tlb_all(void);
+extern void flush_tlb_current_task(void);
+extern void flush_tlb_mm(struct mm_struct *);
+/*extern void flush_tlb_page(struct vm_area_struct *, unsigned long);*/
+
+#define flush_tlb()    flush_tlb_current_task()
+
+static inline void flush_tlb_range(struct mm_struct * mm, unsigned long start, unsigned long end)
+{
+       flush_tlb_mm(mm);
+}
+
+#define TLBSTATE_OK    1
+#define TLBSTATE_LAZY  2
+
+struct tlb_state
+{
+       struct mm_struct *active_mm;
+       int state;
+};
+extern struct tlb_state cpu_tlbstate[NR_CPUS];
+
+
+#endif
+
+static inline void flush_tlb_pgtables(struct mm_struct *mm,
+                                     unsigned long start, unsigned long end)
+{
+       /* i386 does not keep any page table caches in TLB */
+}
+
+#endif /* _I386_PGALLOC_H */
diff --git a/xen-2.4.16/include/asm-i386/processor.h b/xen-2.4.16/include/asm-i386/processor.h
new file mode 100644 (file)
index 0000000..2e98c53
--- /dev/null
@@ -0,0 +1,480 @@
+/*
+ * include/asm-i386/processor.h
+ *
+ * Copyright (C) 1994 Linus Torvalds
+ */
+
+#ifndef __ASM_I386_PROCESSOR_H
+#define __ASM_I386_PROCESSOR_H
+
+#include <asm/page.h>
+#include <asm/types.h>
+#include <asm/cpufeature.h>
+#include <xeno/config.h>
+#include <hypervisor-ifs/hypervisor-if.h>
+
+
+/*
+ * Default implementation of macro that returns current
+ * instruction pointer ("program counter").
+ */
+#define current_text_addr() ({ void *pc; __asm__("movl $1f,%0\n1:":"=g" (pc)); pc; })
+
+/*
+ *  CPU type and hardware bug flags. Kept separately for each CPU.
+ *  Members of this structure are referenced in head.S, so think twice
+ *  before touching them. [mj]
+ */
+
+struct cpuinfo_x86 {
+    __u8       x86;            /* CPU family */
+    __u8       x86_vendor;     /* CPU vendor */
+    __u8       x86_model;
+    __u8       x86_mask;
+    int        cpuid_level;    /* Maximum supported CPUID level, -1=no CPUID */
+    __u32      x86_capability[NCAPINTS];
+    char    x86_vendor_id[16];
+    unsigned long *pgd_quick;
+    unsigned long *pmd_quick;
+    unsigned long *pte_quick;
+    unsigned long pgtable_cache_sz;
+} __attribute__((__aligned__(SMP_CACHE_BYTES)));
+
+#define X86_VENDOR_INTEL 0
+#define X86_VENDOR_CYRIX 1
+#define X86_VENDOR_AMD 2
+#define X86_VENDOR_UMC 3
+#define X86_VENDOR_NEXGEN 4
+#define X86_VENDOR_CENTAUR 5
+#define X86_VENDOR_RISE 6
+#define X86_VENDOR_TRANSMETA 7
+#define X86_VENDOR_UNKNOWN 0xff
+
+/*
+ * capabilities of CPUs
+ */
+
+extern struct cpuinfo_x86 boot_cpu_data;
+extern struct tss_struct init_tss[NR_CPUS];
+
+#ifdef CONFIG_SMP
+extern struct cpuinfo_x86 cpu_data[];
+#define current_cpu_data cpu_data[smp_processor_id()]
+#else
+#define cpu_data (&boot_cpu_data)
+#define current_cpu_data boot_cpu_data
+#endif
+
+#define cpu_has_pge    (test_bit(X86_FEATURE_PGE,  boot_cpu_data.x86_capability))
+#define cpu_has_pse    (test_bit(X86_FEATURE_PSE,  boot_cpu_data.x86_capability))
+#define cpu_has_pae    (test_bit(X86_FEATURE_PAE,  boot_cpu_data.x86_capability))
+#define cpu_has_tsc    (test_bit(X86_FEATURE_TSC,  boot_cpu_data.x86_capability))
+#define cpu_has_de     (test_bit(X86_FEATURE_DE,   boot_cpu_data.x86_capability))
+#define cpu_has_vme    (test_bit(X86_FEATURE_VME,  boot_cpu_data.x86_capability))
+#define cpu_has_fxsr   (test_bit(X86_FEATURE_FXSR, boot_cpu_data.x86_capability))
+#define cpu_has_xmm    (test_bit(X86_FEATURE_XMM,  boot_cpu_data.x86_capability))
+#define cpu_has_fpu    (test_bit(X86_FEATURE_FPU,  boot_cpu_data.x86_capability))
+#define cpu_has_apic   (test_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability))
+
+extern void identify_cpu(struct cpuinfo_x86 *);
+extern void print_cpu_info(struct cpuinfo_x86 *);
+extern void dodgy_tsc(void);
+
+/*
+ * EFLAGS bits
+ */
+#define X86_EFLAGS_CF  0x00000001 /* Carry Flag */
+#define X86_EFLAGS_PF  0x00000004 /* Parity Flag */
+#define X86_EFLAGS_AF  0x00000010 /* Auxillary carry Flag */
+#define X86_EFLAGS_ZF  0x00000040 /* Zero Flag */
+#define X86_EFLAGS_SF  0x00000080 /* Sign Flag */
+#define X86_EFLAGS_TF  0x00000100 /* Trap Flag */
+#define X86_EFLAGS_IF  0x00000200 /* Interrupt Flag */
+#define X86_EFLAGS_DF  0x00000400 /* Direction Flag */
+#define X86_EFLAGS_OF  0x00000800 /* Overflow Flag */
+#define X86_EFLAGS_IOPL        0x00003000 /* IOPL mask */
+#define X86_EFLAGS_NT  0x00004000 /* Nested Task */
+#define X86_EFLAGS_RF  0x00010000 /* Resume Flag */
+#define X86_EFLAGS_VM  0x00020000 /* Virtual Mode */
+#define X86_EFLAGS_AC  0x00040000 /* Alignment Check */
+#define X86_EFLAGS_VIF 0x00080000 /* Virtual Interrupt Flag */
+#define X86_EFLAGS_VIP 0x00100000 /* Virtual Interrupt Pending */
+#define X86_EFLAGS_ID  0x00200000 /* CPUID detection flag */
+
+/*
+ * Generic CPUID function
+ */
+static inline void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx)
+{
+    __asm__("cpuid"
+            : "=a" (*eax),
+            "=b" (*ebx),
+            "=c" (*ecx),
+            "=d" (*edx)
+            : "0" (op));
+}
+
+/*
+ * CPUID functions returning a single datum
+ */
+static inline unsigned int cpuid_eax(unsigned int op)
+{
+    unsigned int eax;
+
+    __asm__("cpuid"
+            : "=a" (eax)
+            : "0" (op)
+            : "bx", "cx", "dx");
+    return eax;
+}
+static inline unsigned int cpuid_ebx(unsigned int op)
+{
+    unsigned int eax, ebx;
+
+    __asm__("cpuid"
+            : "=a" (eax), "=b" (ebx)
+            : "0" (op)
+            : "cx", "dx" );
+    return ebx;
+}
+static inline unsigned int cpuid_ecx(unsigned int op)
+{
+    unsigned int eax, ecx;
+
+    __asm__("cpuid"
+            : "=a" (eax), "=c" (ecx)
+            : "0" (op)
+            : "bx", "dx" );
+    return ecx;
+}
+static inline unsigned int cpuid_edx(unsigned int op)
+{
+    unsigned int eax, edx;
+
+    __asm__("cpuid"
+            : "=a" (eax), "=d" (edx)
+            : "0" (op)
+            : "bx", "cx");
+    return edx;
+}
+
+/*
+ * Intel CPU features in CR4
+ */
+#define X86_CR4_VME            0x0001  /* enable vm86 extensions */
+#define X86_CR4_PVI            0x0002  /* virtual interrupts flag enable */
+#define X86_CR4_TSD            0x0004  /* disable time stamp at ipl 3 */
+#define X86_CR4_DE             0x0008  /* enable debugging extensions */
+#define X86_CR4_PSE            0x0010  /* enable page size extensions */
+#define X86_CR4_PAE            0x0020  /* enable physical address extensions */
+#define X86_CR4_MCE            0x0040  /* Machine check enable */
+#define X86_CR4_PGE            0x0080  /* enable global pages */
+#define X86_CR4_PCE            0x0100  /* enable performance counters at ipl 3 */
+#define X86_CR4_OSFXSR         0x0200  /* enable fast FPU save and restore */
+#define X86_CR4_OSXMMEXCPT     0x0400  /* enable unmasked SSE exceptions */
+
+/*
+ * Save the cr4 feature set we're using (ie
+ * Pentium 4MB enable and PPro Global page
+ * enable), so that any CPU's that boot up
+ * after us can get the correct flags.
+ */
+extern unsigned long mmu_cr4_features;
+
+static inline void set_in_cr4 (unsigned long mask)
+{
+    mmu_cr4_features |= mask;
+    __asm__("movl %%cr4,%%eax\n\t"
+            "orl %0,%%eax\n\t"
+            "movl %%eax,%%cr4\n"
+            : : "irg" (mask)
+            :"ax");
+}
+
+static inline void clear_in_cr4 (unsigned long mask)
+{
+    mmu_cr4_features &= ~mask;
+    __asm__("movl %%cr4,%%eax\n\t"
+            "andl %0,%%eax\n\t"
+            "movl %%eax,%%cr4\n"
+            : : "irg" (~mask)
+            :"ax");
+}
+
+/*
+ *      Cyrix CPU configuration register indexes
+ */
+#define CX86_CCR0 0xc0
+#define CX86_CCR1 0xc1
+#define CX86_CCR2 0xc2
+#define CX86_CCR3 0xc3
+#define CX86_CCR4 0xe8
+#define CX86_CCR5 0xe9
+#define CX86_CCR6 0xea
+#define CX86_CCR7 0xeb
+#define CX86_DIR0 0xfe
+#define CX86_DIR1 0xff
+#define CX86_ARR_BASE 0xc4
+#define CX86_RCR_BASE 0xdc
+
+/*
+ *      Cyrix CPU indexed register access macros
+ */
+
+#define getCx86(reg) ({ outb((reg), 0x22); inb(0x23); })
+
+#define setCx86(reg, data) do { \
+       outb((reg), 0x22); \
+       outb((data), 0x23); \
+} while (0)
+
+#define EISA_bus (0)
+#define MCA_bus  (0)
+
+/* from system description table in BIOS.  Mostly for MCA use, but
+others may find it useful. */
+extern unsigned int machine_id;
+extern unsigned int machine_submodel_id;
+extern unsigned int BIOS_revision;
+extern unsigned int mca_pentium_flag;
+
+/*
+ * User space process size: 3GB (default).
+ */
+#define TASK_SIZE      (PAGE_OFFSET)
+
+/* This decides where the kernel will search for a free chunk of vm
+ * space during mmap's.
+ */
+#define TASK_UNMAPPED_BASE     (TASK_SIZE / 3)
+
+/*
+ * Size of io_bitmap in longwords: 32 is ports 0-0x3ff.
+ */
+#define IO_BITMAP_SIZE 32
+#define IO_BITMAP_OFFSET offsetof(struct tss_struct,io_bitmap)
+#define INVALID_IO_BITMAP_OFFSET 0x8000
+
+struct i387_fsave_struct {
+    long       cwd;
+    long       swd;
+    long       twd;
+    long       fip;
+    long       fcs;
+    long       foo;
+    long       fos;
+    long       st_space[20];   /* 8*10 bytes for each FP-reg = 80 bytes */
+    long       status;         /* software status information */
+};
+
+struct i387_fxsave_struct {
+    unsigned short     cwd;
+    unsigned short     swd;
+    unsigned short     twd;
+    unsigned short     fop;
+    long       fip;
+    long       fcs;
+    long       foo;
+    long       fos;
+    long       mxcsr;
+    long       reserved;
+    long       st_space[32];   /* 8*16 bytes for each FP-reg = 128 bytes */
+    long       xmm_space[32];  /* 8*16 bytes for each XMM-reg = 128 bytes */
+    long       padding[56];
+} __attribute__ ((aligned (16)));
+
+struct i387_soft_struct {
+    long       cwd;
+    long       swd;
+    long       twd;
+    long       fip;
+    long       fcs;
+    long       foo;
+    long       fos;
+    long       st_space[20];   /* 8*10 bytes for each FP-reg = 80 bytes */
+    unsigned char      ftop, changed, lookahead, no_update, rm, alimit;
+    struct info        *info;
+    unsigned long      entry_eip;
+};
+
+union i387_union {
+    struct i387_fsave_struct   fsave;
+    struct i387_fxsave_struct  fxsave;
+    struct i387_soft_struct soft;
+};
+
+typedef struct {
+    unsigned long seg;
+} mm_segment_t;
+
+struct tss_struct {
+    unsigned short     back_link,__blh;
+    unsigned long      esp0;
+    unsigned short     ss0,__ss0h;
+    unsigned long      esp1;
+    unsigned short     ss1,__ss1h;
+    unsigned long      esp2;
+    unsigned short     ss2,__ss2h;
+    unsigned long      __cr3;
+    unsigned long      eip;
+    unsigned long      eflags;
+    unsigned long      eax,ecx,edx,ebx;
+    unsigned long      esp;
+    unsigned long      ebp;
+    unsigned long      esi;
+    unsigned long      edi;
+    unsigned short     es, __esh;
+    unsigned short     cs, __csh;
+    unsigned short     ss, __ssh;
+    unsigned short     ds, __dsh;
+    unsigned short     fs, __fsh;
+    unsigned short     gs, __gsh;
+    unsigned short     ldt, __ldth;
+    unsigned short     trace, bitmap;
+    unsigned long      io_bitmap[IO_BITMAP_SIZE+1];
+    /*
+     * pads the TSS to be cacheline-aligned (size is 0x100)
+     */
+    unsigned long __cacheline_filler[5];
+};
+
+struct thread_struct {
+    unsigned long      esp0; /* top of the stack */
+    unsigned long      eip;  /* in kernel space, saved on task switch */
+    unsigned long      esp;  /* "" */
+    unsigned long      fs;   /* "" (NB. DS/ES constant in mon, so no save) */
+    unsigned long      gs;   /* "" ("") */
+    unsigned long esp1, ss1;
+/* Hardware debugging registers */
+    unsigned long      debugreg[8];  /* %%db0-7 debug registers */
+/* fault info */
+    unsigned long      cr2, trap_no, error_code;
+/* floating point info */
+    union i387_union   i387;
+/* Trap info. */
+    trap_info_t         traps[256];
+};
+
+#define INIT_THREAD  {                                         \
+       sizeof(idle0_stack) + (long) &idle0_stack, /* esp0 */   \
+       0, 0, 0, 0, 0, 0,                                       \
+       { [0 ... 7] = 0 },      /* debugging registers */       \
+       0, 0, 0,                                                \
+       { { 0, }, },            /* 387 state */                 \
+       { {0} }                 /* io permissions */            \
+}
+
+#define INIT_TSS  {                                            \
+       0,0, /* back_link, __blh */                             \
+       sizeof(idle0_stack) + (long) &idle0_stack, /* esp0 */   \
+       __HYPERVISOR_DS, 0, /* ss0 */                           \
+       0,0,0,0,0,0, /* stack1, stack2 */                       \
+       0, /* cr3 */                                            \
+       0,0, /* eip,eflags */                                   \
+       0,0,0,0, /* eax,ecx,edx,ebx */                          \
+       0,0,0,0, /* esp,ebp,esi,edi */                          \
+       0,0,0,0,0,0, /* es,cs,ss */                             \
+       0,0,0,0,0,0, /* ds,fs,gs */                             \
+       __LDT(0),0, /* ldt */                                   \
+       0, INVALID_IO_BITMAP_OFFSET, /* tace, bitmap */         \
+       {~0, } /* ioperm */                                     \
+}
+
+#define start_thread(regs, new_eip, new_esp) do {              \
+       __asm__("movl %0,%%fs ; movl %0,%%gs": :"r" (0));       \
+       set_fs(USER_DS);                                        \
+       regs->xds = __USER_DS;                                  \
+       regs->xes = __USER_DS;                                  \
+       regs->xss = __USER_DS;                                  \
+       regs->xcs = __USER_CS;                                  \
+       regs->eip = new_eip;                                    \
+       regs->esp = new_esp;                                    \
+} while (0)
+
+/* Forward declaration, a strange C thing */
+struct task_struct;
+struct mm_struct;
+
+/* Free all resources held by a thread. */
+extern void release_thread(struct task_struct *);
+/*
+ * create a kernel thread without removing it from tasklists
+ */
+extern int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags);
+
+/* Copy and release all segment info associated with a VM */
+extern void copy_segments(struct task_struct *p, struct mm_struct * mm);
+extern void release_segments(struct mm_struct * mm);
+
+/*
+ * Return saved PC of a blocked thread.
+ */
+static inline unsigned long thread_saved_pc(struct thread_struct *t)
+{
+    return ((unsigned long *)t->esp)[3];
+}
+
+unsigned long get_wchan(struct task_struct *p);
+#define KSTK_EIP(tsk)  (((unsigned long *)(4096+(unsigned long)(tsk)))[1019])
+#define KSTK_ESP(tsk)  (((unsigned long *)(4096+(unsigned long)(tsk)))[1022])
+
+#define THREAD_SIZE (2*PAGE_SIZE)
+#define alloc_task_struct() ((struct task_struct *) __get_free_pages(GFP_KERNEL,1))
+#define free_task_struct(p) free_pages((unsigned long) (p), 1)
+#define get_task_struct(tsk)      atomic_inc(&virt_to_page(tsk)->count)
+
+#define idle0_task     (idle0_task_union.task)
+#define idle0_stack    (idle0_task_union.stack)
+
+struct microcode {
+    unsigned int hdrver;
+    unsigned int rev;
+    unsigned int date;
+    unsigned int sig;
+    unsigned int cksum;
+    unsigned int ldrver;
+    unsigned int pf;
+    unsigned int reserved[5];
+    unsigned int bits[500];
+};
+
+/* '6' because it used to be for P6 only (but now covers Pentium 4 as well) */
+#define MICROCODE_IOCFREE      _IO('6',0)
+
+/* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */
+static inline void rep_nop(void)
+{
+    __asm__ __volatile__("rep;nop");
+}
+
+#define cpu_relax()    rep_nop()
+
+/* Prefetch instructions for Pentium III and AMD Athlon */
+#ifdef         CONFIG_MPENTIUMIII
+
+#define ARCH_HAS_PREFETCH
+extern inline void prefetch(const void *x)
+{
+    __asm__ __volatile__ ("prefetchnta (%0)" : : "r"(x));
+}
+
+#elif CONFIG_X86_USE_3DNOW
+
+#define ARCH_HAS_PREFETCH
+#define ARCH_HAS_PREFETCHW
+#define ARCH_HAS_SPINLOCK_PREFETCH
+
+extern inline void prefetch(const void *x)
+{
+    __asm__ __volatile__ ("prefetch (%0)" : : "r"(x));
+}
+
+extern inline void prefetchw(const void *x)
+{
+    __asm__ __volatile__ ("prefetchw (%0)" : : "r"(x));
+}
+#define spin_lock_prefetch(x)  prefetchw(x)
+
+#endif
+
+#endif /* __ASM_I386_PROCESSOR_H */
diff --git a/xen-2.4.16/include/asm-i386/ptrace.h b/xen-2.4.16/include/asm-i386/ptrace.h
new file mode 100644 (file)
index 0000000..509001c
--- /dev/null
@@ -0,0 +1,86 @@
+#ifndef _I386_PTRACE_H
+#define _I386_PTRACE_H
+
+#define EBX 0
+#define ECX 1
+#define EDX 2
+#define ESI 3
+#define EDI 4
+#define EBP 5
+#define EAX 6
+#define DS 7
+#define ES 8
+#define FS 9
+#define GS 10
+#define ORIG_EAX 11
+#define EIP 12
+#define CS  13
+#define EFL 14
+#define UESP 15
+#define SS   16
+#define FRAME_SIZE 17
+
+/* this struct defines the way the registers are stored on the 
+   stack during a system call. */
+
+struct pt_regs {
+       long ebx;
+       long ecx;
+       long edx;
+       long esi;
+       long edi;
+       long ebp;
+       long eax;
+       int  xds;
+       int  xes;
+       long orig_eax;
+       long eip;
+       int  xcs;
+       long eflags;
+       long esp;
+       int  xss;
+};
+
+/* Arbitrarily choose the same ptrace numbers as used by the Sparc code. */
+#define PTRACE_GETREGS            12
+#define PTRACE_SETREGS            13
+#define PTRACE_GETFPREGS          14
+#define PTRACE_SETFPREGS          15
+#define PTRACE_GETFPXREGS         18
+#define PTRACE_SETFPXREGS         19
+
+#define PTRACE_SETOPTIONS         21
+
+/* options set using PTRACE_SETOPTIONS */
+#define PTRACE_O_TRACESYSGOOD     0x00000001
+
+enum EFLAGS {
+        EF_CF   = 0x00000001,
+        EF_PF   = 0x00000004,
+        EF_AF   = 0x00000010,
+        EF_ZF   = 0x00000040,
+        EF_SF   = 0x00000080,
+        EF_TF   = 0x00000100,
+        EF_IE   = 0x00000200,
+        EF_DF   = 0x00000400,
+        EF_OF   = 0x00000800,
+        EF_IOPL = 0x00003000,
+        EF_IOPL_RING0 = 0x00000000,
+        EF_IOPL_RING1 = 0x00001000,
+        EF_IOPL_RING2 = 0x00002000,
+        EF_NT   = 0x00004000,   /* nested task */
+        EF_RF   = 0x00010000,   /* resume */
+        EF_VM   = 0x00020000,   /* virtual mode */
+        EF_AC   = 0x00040000,   /* alignment */
+        EF_VIF  = 0x00080000,   /* virtual interrupt */
+        EF_VIP  = 0x00100000,   /* virtual interrupt pending */
+        EF_ID   = 0x00200000,   /* id */
+};
+
+#ifdef __KERNEL__
+#define user_mode(regs) ((3 & (regs)->xcs))
+#define instruction_pointer(regs) ((regs)->eip)
+extern void show_regs(struct pt_regs *);
+#endif
+
+#endif
diff --git a/xen-2.4.16/include/asm-i386/rwlock.h b/xen-2.4.16/include/asm-i386/rwlock.h
new file mode 100644 (file)
index 0000000..9475419
--- /dev/null
@@ -0,0 +1,83 @@
+/* include/asm-i386/rwlock.h
+ *
+ *     Helpers used by both rw spinlocks and rw semaphores.
+ *
+ *     Based in part on code from semaphore.h and
+ *     spinlock.h Copyright 1996 Linus Torvalds.
+ *
+ *     Copyright 1999 Red Hat, Inc.
+ *
+ *     Written by Benjamin LaHaise.
+ *
+ *     This program is free software; you can redistribute it and/or
+ *     modify it under the terms of the GNU General Public License
+ *     as published by the Free Software Foundation; either version
+ *     2 of the License, or (at your option) any later version.
+ */
+#ifndef _ASM_I386_RWLOCK_H
+#define _ASM_I386_RWLOCK_H
+
+#define RW_LOCK_BIAS            0x01000000
+#define RW_LOCK_BIAS_STR       "0x01000000"
+
+#define __build_read_lock_ptr(rw, helper)   \
+       asm volatile(LOCK "subl $1,(%0)\n\t" \
+                    "js 2f\n" \
+                    "1:\n" \
+                    ".section .text.lock,\"ax\"\n" \
+                    "2:\tcall " helper "\n\t" \
+                    "jmp 1b\n" \
+                    ".previous" \
+                    ::"a" (rw) : "memory")
+
+#define __build_read_lock_const(rw, helper)   \
+       asm volatile(LOCK "subl $1,%0\n\t" \
+                    "js 2f\n" \
+                    "1:\n" \
+                    ".section .text.lock,\"ax\"\n" \
+                    "2:\tpushl %%eax\n\t" \
+                    "leal %0,%%eax\n\t" \
+                    "call " helper "\n\t" \
+                    "popl %%eax\n\t" \
+                    "jmp 1b\n" \
+                    ".previous" \
+                    :"=m" (*(volatile int *)rw) : : "memory")
+
+#define __build_read_lock(rw, helper)  do { \
+                                               if (__builtin_constant_p(rw)) \
+                                                       __build_read_lock_const(rw, helper); \
+                                               else \
+                                                       __build_read_lock_ptr(rw, helper); \
+                                       } while (0)
+
+#define __build_write_lock_ptr(rw, helper) \
+       asm volatile(LOCK "subl $" RW_LOCK_BIAS_STR ",(%0)\n\t" \
+                    "jnz 2f\n" \
+                    "1:\n" \
+                    ".section .text.lock,\"ax\"\n" \
+                    "2:\tcall " helper "\n\t" \
+                    "jmp 1b\n" \
+                    ".previous" \
+                    ::"a" (rw) : "memory")
+
+#define __build_write_lock_const(rw, helper) \
+       asm volatile(LOCK "subl $" RW_LOCK_BIAS_STR ",(%0)\n\t" \
+                    "jnz 2f\n" \
+                    "1:\n" \
+                    ".section .text.lock,\"ax\"\n" \
+                    "2:\tpushl %%eax\n\t" \
+                    "leal %0,%%eax\n\t" \
+                    "call " helper "\n\t" \
+                    "popl %%eax\n\t" \
+                    "jmp 1b\n" \
+                    ".previous" \
+                    :"=m" (*(volatile int *)rw) : : "memory")
+
+#define __build_write_lock(rw, helper) do { \
+                                               if (__builtin_constant_p(rw)) \
+                                                       __build_write_lock_const(rw, helper); \
+                                               else \
+                                                       __build_write_lock_ptr(rw, helper); \
+                                       } while (0)
+
+#endif
diff --git a/xen-2.4.16/include/asm-i386/scatterlist.h b/xen-2.4.16/include/asm-i386/scatterlist.h
new file mode 100644 (file)
index 0000000..9d85841
--- /dev/null
@@ -0,0 +1,16 @@
+#ifndef _I386_SCATTERLIST_H
+#define _I386_SCATTERLIST_H
+
+struct scatterlist {
+    char *  address;    /* Location data is to be transferred to, NULL for
+                        * highmem page */
+    struct pfn_info * page; /* Location for highmem page, if any */
+    unsigned int offset;/* for highmem, page offset */
+
+    dma_addr_t dma_address;
+    unsigned int length;
+};
+
+#define ISA_DMA_THRESHOLD (0x00ffffff)
+
+#endif /* !(_I386_SCATTERLIST_H) */
diff --git a/xen-2.4.16/include/asm-i386/smp.h b/xen-2.4.16/include/asm-i386/smp.h
new file mode 100644 (file)
index 0000000..560f5ea
--- /dev/null
@@ -0,0 +1,94 @@
+#ifndef __ASM_SMP_H
+#define __ASM_SMP_H
+
+#ifndef __ASSEMBLY__
+#include <xeno/config.h>
+#include <asm/ptrace.h>
+#include <asm/fixmap.h>
+#include <asm/bitops.h>
+#include <asm/mpspec.h>
+#include <asm/io_apic.h>
+#include <asm/apic.h>
+#endif
+
+#ifdef CONFIG_SMP
+#define TARGET_CPUS cpu_online_map
+#define INT_DELIVERY_MODE 1     /* logical delivery broadcast to all procs */
+#else
+#define INT_DELIVERY_MODE 1     /* logical delivery */
+#define TARGET_CPUS 0x01
+#endif
+
+#ifdef CONFIG_SMP
+#ifndef __ASSEMBLY__
+
+/*
+ * Private routines/data
+ */
+extern void smp_alloc_memory(void);
+extern unsigned long phys_cpu_present_map;
+extern unsigned long cpu_online_map;
+extern volatile unsigned long smp_invalidate_needed;
+extern int pic_mode;
+extern void smp_flush_tlb(void);
+extern void smp_message_irq(int cpl, void *dev_id, struct pt_regs *regs);
+extern void smp_invalidate_rcv(void);          /* Process an NMI */
+extern void (*mtrr_hook) (void);
+extern void zap_low_mappings (void);
+
+/*
+ * On x86 all CPUs are mapped 1:1 to the APIC space.
+ * This simplifies scheduling and IPI sending and
+ * compresses data structures.
+ */
+static inline int cpu_logical_map(int cpu)
+{
+       return cpu;
+}
+static inline int cpu_number_map(int cpu)
+{
+       return cpu;
+}
+
+/*
+ * Some lowlevel functions might want to know about
+ * the real APIC ID <-> CPU # mapping.
+ */
+#define MAX_APICID 256
+extern volatile int cpu_to_physical_apicid[NR_CPUS];
+extern volatile int physical_apicid_to_cpu[MAX_APICID];
+extern volatile int cpu_to_logical_apicid[NR_CPUS];
+extern volatile int logical_apicid_to_cpu[MAX_APICID];
+
+/*
+ * General functions that each host system must provide.
+ */
+extern void smp_boot_cpus(void);
+extern void smp_store_cpu_info(int id);                /* Store per CPU info (like the initial udelay numbers */
+
+/*
+ * This function is needed by all SMP systems. It must _always_ be valid
+ * from the initial startup. We map APIC_BASE very early in page_setup(),
+ * so this is correct in the x86 case.
+ */
+
+#define smp_processor_id() (current->processor)
+
+static __inline int hard_smp_processor_id(void)
+{
+       /* we don't want to mark this access volatile - bad code generation */
+       return GET_APIC_ID(*(unsigned long *)(APIC_BASE+APIC_ID));
+}
+
+static __inline int logical_smp_processor_id(void)
+{
+       /* we don't want to mark this access volatile - bad code generation */
+       return GET_APIC_LOGICAL_ID(*(unsigned long *)(APIC_BASE+APIC_LDR));
+}
+
+#endif /* !__ASSEMBLY__ */
+
+#endif
+#endif
diff --git a/xen-2.4.16/include/asm-i386/smpboot.h b/xen-2.4.16/include/asm-i386/smpboot.h
new file mode 100644 (file)
index 0000000..67bbedb
--- /dev/null
@@ -0,0 +1,28 @@
+#ifndef __ASM_SMPBOOT_H
+#define __ASM_SMPBOOT_H
+
+#define TRAMPOLINE_LOW phys_to_virt(0x467)
+#define TRAMPOLINE_HIGH phys_to_virt(0x469)
+
+#define boot_cpu_apicid boot_cpu_physical_apicid
+
+/* How to map from the cpu_present_map. */
+#define cpu_present_to_apicid(apicid) (apicid)
+
+/*
+ * Mappings between logical cpu number and logical / physical apicid
+ * The first four macros are trivial, but it keeps the abstraction consistent
+ */
+extern volatile int logical_apicid_2_cpu[];
+extern volatile int cpu_2_logical_apicid[];
+extern volatile int physical_apicid_2_cpu[];
+extern volatile int cpu_2_physical_apicid[];
+
+#define logical_apicid_to_cpu(apicid) logical_apicid_2_cpu[apicid]
+#define cpu_to_logical_apicid(cpu) cpu_2_logical_apicid[cpu]
+#define physical_apicid_to_cpu(apicid) physical_apicid_2_cpu[apicid]
+#define cpu_to_physical_apicid(cpu) cpu_2_physical_apicid[cpu]
+#define boot_apicid_to_cpu(apicid) physical_apicid_2_cpu[apicid]
+#define cpu_to_boot_apicid(cpu) cpu_2_physical_apicid[cpu]
+
+#endif
diff --git a/xen-2.4.16/include/asm-i386/softirq.h b/xen-2.4.16/include/asm-i386/softirq.h
new file mode 100644 (file)
index 0000000..2542244
--- /dev/null
@@ -0,0 +1,48 @@
+#ifndef __ASM_SOFTIRQ_H
+#define __ASM_SOFTIRQ_H
+
+#include <asm/atomic.h>
+#include <asm/hardirq.h>
+
+#define __cpu_bh_enable(cpu) \
+               do { barrier(); local_bh_count(cpu)--; } while (0)
+#define cpu_bh_disable(cpu) \
+               do { local_bh_count(cpu)++; barrier(); } while (0)
+
+#define local_bh_disable()     cpu_bh_disable(smp_processor_id())
+#define __local_bh_enable()    __cpu_bh_enable(smp_processor_id())
+
+#define in_softirq() (local_bh_count(smp_processor_id()) != 0)
+
+/*
+ * NOTE: this assembly code assumes:
+ *
+ *    (char *)&local_bh_count - 8 == (char *)&softirq_pending
+ *
+ * If you change the offsets in irq_stat then you have to
+ * update this code as well.
+ */
+#define local_bh_enable()                                              \
+do {                                                                   \
+       unsigned int *ptr = &local_bh_count(smp_processor_id());        \
+                                                                       \
+       barrier();                                                      \
+       if (!--*ptr)                                                    \
+               __asm__ __volatile__ (                                  \
+                       "cmpl $0, -8(%0);"                              \
+                       "jnz 2f;"                                       \
+                       "1:;"                                           \
+                                                                       \
+                       ".section .text.lock,\"ax\";"                   \
+                       "2: pushl %%eax; pushl %%ecx; pushl %%edx;"     \
+                       "call %c1;"                                     \
+                       "popl %%edx; popl %%ecx; popl %%eax;"           \
+                       "jmp 1b;"                                       \
+                       ".previous;"                                    \
+                                                                       \
+               : /* no output */                                       \
+               : "r" (ptr), "i" (do_softirq)                           \
+               /* no registers clobbered */ );                         \
+} while (0)
+
+#endif /* __ASM_SOFTIRQ_H */
diff --git a/xen-2.4.16/include/asm-i386/spinlock.h b/xen-2.4.16/include/asm-i386/spinlock.h
new file mode 100644 (file)
index 0000000..59dc7b2
--- /dev/null
@@ -0,0 +1,206 @@
+#ifndef __ASM_SPINLOCK_H
+#define __ASM_SPINLOCK_H
+
+#include <asm/atomic.h>
+#include <asm/rwlock.h>
+#include <asm/page.h>
+#include <xeno/config.h>
+#include <xeno/lib.h>
+
+#if 0
+#define SPINLOCK_DEBUG 1
+#else
+#define SPINLOCK_DEBUG 0
+#endif
+
+/*
+ * Your basic SMP spinlocks, allowing only a single CPU anywhere
+ */
+
+typedef struct {
+       volatile unsigned int lock;
+#if SPINLOCK_DEBUG
+       unsigned magic;
+#endif
+} spinlock_t;
+
+#define SPINLOCK_MAGIC 0xdead4ead
+
+#if SPINLOCK_DEBUG
+#define SPINLOCK_MAGIC_INIT    , SPINLOCK_MAGIC
+#else
+#define SPINLOCK_MAGIC_INIT    /* */
+#endif
+
+#define SPIN_LOCK_UNLOCKED (spinlock_t) { 1 SPINLOCK_MAGIC_INIT }
+
+#define spin_lock_init(x)      do { *(x) = SPIN_LOCK_UNLOCKED; } while(0)
+
+/*
+ * Simple spin lock operations.  There are two variants, one clears IRQ's
+ * on the local processor, one does not.
+ *
+ * We make no fairness assumptions. They have a cost.
+ */
+
+#define spin_is_locked(x)      (*(volatile char *)(&(x)->lock) <= 0)
+#define spin_unlock_wait(x)    do { barrier(); } while(spin_is_locked(x))
+
+#define spin_lock_string \
+       "\n1:\t" \
+       "lock ; decb %0\n\t" \
+       "js 2f\n" \
+       ".section .text.lock,\"ax\"\n" \
+       "2:\t" \
+       "cmpb $0,%0\n\t" \
+       "rep;nop\n\t" \
+       "jle 2b\n\t" \
+       "jmp 1b\n" \
+       ".previous"
+
+/*
+ * This works. Despite all the confusion.
+ * (except on PPro SMP or if we are using OOSTORE)
+ * (PPro errata 66, 92)
+ */
+#if !defined(CONFIG_X86_OOSTORE) && !defined(CONFIG_X86_PPRO_FENCE)
+
+#define spin_unlock_string \
+       "movb $1,%0" \
+               :"=m" (lock->lock) : : "memory"
+
+
+static inline void spin_unlock(spinlock_t *lock)
+{
+#if SPINLOCK_DEBUG
+       if (lock->magic != SPINLOCK_MAGIC)
+               BUG();
+       if (!spin_is_locked(lock))
+               BUG();
+#endif
+       __asm__ __volatile__(
+               spin_unlock_string
+       );
+}
+
+#else
+
+#define spin_unlock_string \
+       "xchgb %b0, %1" \
+               :"=q" (oldval), "=m" (lock->lock) \
+               :"0" (oldval) : "memory"
+
+static inline void spin_unlock(spinlock_t *lock)
+{
+       char oldval = 1;
+#if SPINLOCK_DEBUG
+       if (lock->magic != SPINLOCK_MAGIC)
+               BUG();
+       if (!spin_is_locked(lock))
+               BUG();
+#endif
+       __asm__ __volatile__(
+               spin_unlock_string
+       );
+}
+
+#endif
+
+static inline int spin_trylock(spinlock_t *lock)
+{
+       char oldval;
+       __asm__ __volatile__(
+               "xchgb %b0,%1"
+               :"=q" (oldval), "=m" (lock->lock)
+               :"0" (0) : "memory");
+       return oldval > 0;
+}
+
+static inline void spin_lock(spinlock_t *lock)
+{
+#if SPINLOCK_DEBUG
+       __label__ here;
+here:
+       if (lock->magic != SPINLOCK_MAGIC) {
+printk("eip: %p\n", &&here);
+               BUG();
+       }
+#endif
+       __asm__ __volatile__(
+               spin_lock_string
+               :"=m" (lock->lock) : : "memory");
+}
+
+
+/*
+ * Read-write spinlocks, allowing multiple readers
+ * but only one writer.
+ *
+ * NOTE! it is quite common to have readers in interrupts
+ * but no interrupt writers. For those circumstances we
+ * can "mix" irq-safe locks - any writer needs to get a
+ * irq-safe write-lock, but readers can get non-irqsafe
+ * read-locks.
+ */
+typedef struct {
+       volatile unsigned int lock;
+#if SPINLOCK_DEBUG
+       unsigned magic;
+#endif
+} rwlock_t;
+
+#define RWLOCK_MAGIC   0xdeaf1eed
+
+#if SPINLOCK_DEBUG
+#define RWLOCK_MAGIC_INIT      , RWLOCK_MAGIC
+#else
+#define RWLOCK_MAGIC_INIT      /* */
+#endif
+
+#define RW_LOCK_UNLOCKED (rwlock_t) { RW_LOCK_BIAS RWLOCK_MAGIC_INIT }
+
+#define rwlock_init(x) do { *(x) = RW_LOCK_UNLOCKED; } while(0)
+
+/*
+ * On x86, we implement read-write locks as a 32-bit counter
+ * with the high bit (sign) being the "contended" bit.
+ *
+ * The inline assembly is non-obvious. Think about it.
+ *
+ * Changed to use the same technique as rw semaphores.  See
+ * semaphore.h for details.  -ben
+ */
+/* the spinlock helpers are in arch/i386/kernel/semaphore.c */
+
+static inline void read_lock(rwlock_t *rw)
+{
+#if SPINLOCK_DEBUG
+       if (rw->magic != RWLOCK_MAGIC)
+               BUG();
+#endif
+       __build_read_lock(rw, "__read_lock_failed");
+}
+
+static inline void write_lock(rwlock_t *rw)
+{
+#if SPINLOCK_DEBUG
+       if (rw->magic != RWLOCK_MAGIC)
+               BUG();
+#endif
+       __build_write_lock(rw, "__write_lock_failed");
+}
+
+#define read_unlock(rw)                asm volatile("lock ; incl %0" :"=m" ((rw)->lock) : : "memory")
+#define write_unlock(rw)       asm volatile("lock ; addl $" RW_LOCK_BIAS_STR ",%0":"=m" ((rw)->lock) : : "memory")
+
+static inline int write_trylock(rwlock_t *lock)
+{
+       atomic_t *count = (atomic_t *)lock;
+       if (atomic_sub_and_test(RW_LOCK_BIAS, count))
+               return 1;
+       atomic_add(RW_LOCK_BIAS, count);
+       return 0;
+}
+
+#endif /* __ASM_SPINLOCK_H */
diff --git a/xen-2.4.16/include/asm-i386/system.h b/xen-2.4.16/include/asm-i386/system.h
new file mode 100644 (file)
index 0000000..18d2901
--- /dev/null
@@ -0,0 +1,354 @@
+#ifndef __ASM_SYSTEM_H
+#define __ASM_SYSTEM_H
+
+#include <xeno/config.h>
+#include <asm/bitops.h>
+
+struct task_struct;    /* one of the stranger aspects of C forward declarations.. */
+extern void FASTCALL(__switch_to(struct task_struct *prev, struct task_struct *next));
+
+#define prepare_to_switch()    do { } while(0)
+#define switch_to(prev,next) do {                                      \
+       asm volatile("pushl %%ebp\n\t"                                  \
+                     "pushl %%ebx\n\t"                                  \
+                     "pushl %%esi\n\t"                                  \
+                     "pushl %%edi\n\t"                                  \
+                    "movl %%esp,%0\n\t"        /* save ESP */          \
+                     "cli\n\t"                                          \
+                    "movl %2,%%esp\n\t"        /* restore ESP */       \
+                     "movl %6,%%cr3\n\t"        /* restore pagetables */\
+                     "sti\n\t"                                          \
+                    "movl $1f,%1\n\t"          /* save EIP */          \
+                    "pushl %3\n\t"             /* restore EIP */       \
+                    "jmp __switch_to\n"                                \
+                    "1:\t"                                             \
+                    "popl %%edi\n\t"                                   \
+                    "popl %%esi\n\t"                                   \
+                    "popl %%ebx\n\t"                                   \
+                    "popl %%ebp\n\t"                                   \
+                    :"=m" (prev->thread.esp),"=m" (prev->thread.eip)   \
+                    :"m" (next->thread.esp),"m" (next->thread.eip),    \
+                     "a" (prev), "d" (next),                           \
+                      "c" (__pa(pagetable_ptr(next->mm.pagetable)))     \
+                     :"memory");                                        \
+} while (0)
+
+#define _set_base(addr,base) do { unsigned long __pr; \
+__asm__ __volatile__ ("movw %%dx,%1\n\t" \
+       "rorl $16,%%edx\n\t" \
+       "movb %%dl,%2\n\t" \
+       "movb %%dh,%3" \
+       :"=&d" (__pr) \
+       :"m" (*((addr)+2)), \
+        "m" (*((addr)+4)), \
+        "m" (*((addr)+7)), \
+         "0" (base) \
+        ); } while(0)
+
+#define _set_limit(addr,limit) do { unsigned long __lr; \
+__asm__ __volatile__ ("movw %%dx,%1\n\t" \
+       "rorl $16,%%edx\n\t" \
+       "movb %2,%%dh\n\t" \
+       "andb $0xf0,%%dh\n\t" \
+       "orb %%dh,%%dl\n\t" \
+       "movb %%dl,%2" \
+       :"=&d" (__lr) \
+       :"m" (*(addr)), \
+        "m" (*((addr)+6)), \
+        "0" (limit) \
+        ); } while(0)
+
+#define set_base(ldt,base) _set_base( ((char *)&(ldt)) , (base) )
+#define set_limit(ldt,limit) _set_limit( ((char *)&(ldt)) , ((limit)-1)>>12 )
+
+static inline unsigned long _get_base(char * addr)
+{
+       unsigned long __base;
+       __asm__("movb %3,%%dh\n\t"
+               "movb %2,%%dl\n\t"
+               "shll $16,%%edx\n\t"
+               "movw %1,%%dx"
+               :"=&d" (__base)
+               :"m" (*((addr)+2)),
+                "m" (*((addr)+4)),
+                "m" (*((addr)+7)));
+       return __base;
+}
+
+#define get_base(ldt) _get_base( ((char *)&(ldt)) )
+
+/*
+ * Load a segment. Fall back on loading the zero
+ * segment if something goes wrong..
+ */
+#define loadsegment(seg,value)                 \
+       asm volatile("\n"                       \
+               "1:\t"                          \
+               "movl %0,%%" #seg "\n"          \
+               "2:\n"                          \
+               ".section .fixup,\"ax\"\n"      \
+               "3:\t"                          \
+               "pushl $0\n\t"                  \
+               "popl %%" #seg "\n\t"           \
+               "jmp 2b\n"                      \
+               ".previous\n"                   \
+               ".section __ex_table,\"a\"\n\t" \
+               ".align 4\n\t"                  \
+               ".long 1b,3b\n"                 \
+               ".previous"                     \
+               : :"m" (*(unsigned int *)&(value)))
+
+/*
+ * Clear and set 'TS' bit respectively
+ */
+#define clts() __asm__ __volatile__ ("clts")
+#define read_cr0() ({ \
+       unsigned int __dummy; \
+       __asm__( \
+               "movl %%cr0,%0\n\t" \
+               :"=r" (__dummy)); \
+       __dummy; \
+})
+#define write_cr0(x) \
+       __asm__("movl %0,%%cr0": :"r" (x));
+
+#define read_cr4() ({ \
+       unsigned int __dummy; \
+       __asm__( \
+               "movl %%cr4,%0\n\t" \
+               :"=r" (__dummy)); \
+       __dummy; \
+})
+#define write_cr4(x) \
+       __asm__("movl %0,%%cr4": :"r" (x));
+#define stts() write_cr0(8 | read_cr0())
+
+#define wbinvd() \
+       __asm__ __volatile__ ("wbinvd": : :"memory");
+
+static inline unsigned long get_limit(unsigned long segment)
+{
+       unsigned long __limit;
+       __asm__("lsll %1,%0"
+               :"=r" (__limit):"r" (segment));
+       return __limit+1;
+}
+
+#define nop() __asm__ __volatile__ ("nop")
+
+#define xchg(ptr,v) ((__typeof__(*(ptr)))__xchg((unsigned long)(v),(ptr),sizeof(*(ptr))))
+
+#define tas(ptr) (xchg((ptr),1))
+
+struct __xchg_dummy { unsigned long a[100]; };
+#define __xg(x) ((struct __xchg_dummy *)(x))
+
+
+/*
+ * The semantics of XCHGCMP8B are a bit strange, this is why
+ * there is a loop and the loading of %%eax and %%edx has to
+ * be inside. This inlines well in most cases, the cached
+ * cost is around ~38 cycles. (in the future we might want
+ * to do an SIMD/3DNOW!/MMX/FPU 64-bit store here, but that
+ * might have an implicit FPU-save as a cost, so it's not
+ * clear which path to go.)
+ */
+static inline void __set_64bit (unsigned long long * ptr,
+               unsigned int low, unsigned int high)
+{
+       __asm__ __volatile__ (
+               "\n1:\t"
+               "movl (%0), %%eax\n\t"
+               "movl 4(%0), %%edx\n\t"
+               "cmpxchg8b (%0)\n\t"
+               "jnz 1b"
+               : /* no outputs */
+               :       "D"(ptr),
+                       "b"(low),
+                       "c"(high)
+               :       "ax","dx","memory");
+}
+
+static inline void __set_64bit_constant (unsigned long long *ptr,
+                                                unsigned long long value)
+{
+       __set_64bit(ptr,(unsigned int)(value), (unsigned int)((value)>>32ULL));
+}
+#define ll_low(x)      *(((unsigned int*)&(x))+0)
+#define ll_high(x)     *(((unsigned int*)&(x))+1)
+
+static inline void __set_64bit_var (unsigned long long *ptr,
+                        unsigned long long value)
+{
+       __set_64bit(ptr,ll_low(value), ll_high(value));
+}
+
+#define set_64bit(ptr,value) \
+(__builtin_constant_p(value) ? \
+ __set_64bit_constant(ptr, value) : \
+ __set_64bit_var(ptr, value) )
+
+#define _set_64bit(ptr,value) \
+(__builtin_constant_p(value) ? \
+ __set_64bit(ptr, (unsigned int)(value), (unsigned int)((value)>>32ULL) ) : \
+ __set_64bit(ptr, ll_low(value), ll_high(value)) )
+
+/*
+ * Note: no "lock" prefix even on SMP: xchg always implies lock anyway
+ * Note 2: xchg has side effect, so that attribute volatile is necessary,
+ *       but generally the primitive is invalid, *ptr is output argument. --ANK
+ */
+static inline unsigned long __xchg(unsigned long x, volatile void * ptr, int size)
+{
+       switch (size) {
+               case 1:
+                       __asm__ __volatile__("xchgb %b0,%1"
+                               :"=q" (x)
+                               :"m" (*__xg(ptr)), "0" (x)
+                               :"memory");
+                       break;
+               case 2:
+                       __asm__ __volatile__("xchgw %w0,%1"
+                               :"=r" (x)
+                               :"m" (*__xg(ptr)), "0" (x)
+                               :"memory");
+                       break;
+               case 4:
+                       __asm__ __volatile__("xchgl %0,%1"
+                               :"=r" (x)
+                               :"m" (*__xg(ptr)), "0" (x)
+                               :"memory");
+                       break;
+       }
+       return x;
+}
+
+/*
+ * Atomic compare and exchange.  Compare OLD with MEM, if identical,
+ * store NEW in MEM.  Return the initial value in MEM.  Success is
+ * indicated by comparing RETURN with OLD.
+ */
+
+#ifdef CONFIG_X86_CMPXCHG
+#define __HAVE_ARCH_CMPXCHG 1
+
+static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
+                                     unsigned long new, int size)
+{
+       unsigned long prev;
+       switch (size) {
+       case 1:
+               __asm__ __volatile__(LOCK_PREFIX "cmpxchgb %b1,%2"
+                                    : "=a"(prev)
+                                    : "q"(new), "m"(*__xg(ptr)), "0"(old)
+                                    : "memory");
+               return prev;
+       case 2:
+               __asm__ __volatile__(LOCK_PREFIX "cmpxchgw %w1,%2"
+                                    : "=a"(prev)
+                                    : "q"(new), "m"(*__xg(ptr)), "0"(old)
+                                    : "memory");
+               return prev;
+       case 4:
+               __asm__ __volatile__(LOCK_PREFIX "cmpxchgl %1,%2"
+                                    : "=a"(prev)
+                                    : "q"(new), "m"(*__xg(ptr)), "0"(old)
+                                    : "memory");
+               return prev;
+       }
+       return old;
+}
+
+#define cmpxchg(ptr,o,n)\
+       ((__typeof__(*(ptr)))__cmpxchg((ptr),(unsigned long)(o),\
+                                       (unsigned long)(n),sizeof(*(ptr))))
+    
+#else
+/* Compiling for a 386 proper. Is it worth implementing via cli/sti?  */
+#endif
+
+/*
+ * Force strict CPU ordering.
+ * And yes, this is required on UP too when we're talking
+ * to devices.
+ *
+ * For now, "wmb()" doesn't actually do anything, as all
+ * Intel CPU's follow what Intel calls a *Processor Order*,
+ * in which all writes are seen in the program order even
+ * outside the CPU.
+ *
+ * I expect future Intel CPU's to have a weaker ordering,
+ * but I'd also expect them to finally get their act together
+ * and add some real memory barriers if so.
+ *
+ * Some non intel clones support out of order store. wmb() ceases to be a
+ * nop for these.
+ */
+#define mb()   __asm__ __volatile__ ("lock; addl $0,0(%%esp)": : :"memory")
+#define rmb()  mb()
+
+#ifdef CONFIG_X86_OOSTORE
+#define wmb()  __asm__ __volatile__ ("lock; addl $0,0(%%esp)": : :"memory")
+#else
+#define wmb()  __asm__ __volatile__ ("": : :"memory")
+#endif
+
+#ifdef CONFIG_SMP
+#define smp_mb()       mb()
+#define smp_rmb()      rmb()
+#define smp_wmb()      wmb()
+#else
+#define smp_mb()       barrier()
+#define smp_rmb()      barrier()
+#define smp_wmb()      barrier()
+#endif
+
+#define set_mb(var, value) do { xchg(&var, value); } while (0)
+#define set_wmb(var, value) do { var = value; wmb(); } while (0)
+
+/* interrupt control.. */
+#define __save_flags(x)                __asm__ __volatile__("pushfl ; popl %0":"=g" (x): /* no input */)
+#define __restore_flags(x)     __asm__ __volatile__("pushl %0 ; popfl": /* no output */ :"g" (x):"memory", "cc")
+#define __cli()                __asm__ __volatile__("cli": : :"memory")
+#define __sti()                        __asm__ __volatile__("sti": : :"memory")
+/* used in the idle loop; sti takes one instruction cycle to complete */
+#define safe_halt()            __asm__ __volatile__("sti; hlt": : :"memory")
+
+/* For spinlocks etc */
+#define local_irq_save(x)      __asm__ __volatile__("pushfl ; popl %0 ; cli":"=g" (x): /* no input */ :"memory")
+#define local_irq_restore(x)   __restore_flags(x)
+#define local_irq_disable()    __cli()
+#define local_irq_enable()     __sti()
+
+#ifdef CONFIG_SMP
+
+extern void __global_cli(void);
+extern void __global_sti(void);
+extern unsigned long __global_save_flags(void);
+extern void __global_restore_flags(unsigned long);
+#define cli() __global_cli()
+#define sti() __global_sti()
+#define save_flags(x) ((x)=__global_save_flags())
+#define restore_flags(x) __global_restore_flags(x)
+
+#else
+
+#define cli() __cli()
+#define sti() __sti()
+#define save_flags(x) __save_flags(x)
+#define restore_flags(x) __restore_flags(x)
+
+#endif
+
+/*
+ * disable hlt during certain critical i/o operations
+ */
+#define HAVE_DISABLE_HLT
+void disable_hlt(void);
+void enable_hlt(void);
+
+#define BROKEN_ACPI_Sx         0x0001
+#define BROKEN_INIT_AFTER_S1   0x0002
+
+#endif
diff --git a/xen-2.4.16/include/asm-i386/types.h b/xen-2.4.16/include/asm-i386/types.h
new file mode 100644 (file)
index 0000000..2bd0f25
--- /dev/null
@@ -0,0 +1,50 @@
+#ifndef _I386_TYPES_H
+#define _I386_TYPES_H
+
+typedef unsigned short umode_t;
+
+/*
+ * __xx is ok: it doesn't pollute the POSIX namespace. Use these in the
+ * header files exported to user space
+ */
+
+typedef __signed__ char __s8;
+typedef unsigned char __u8;
+
+typedef __signed__ short __s16;
+typedef unsigned short __u16;
+
+typedef __signed__ int __s32;
+typedef unsigned int __u32;
+
+#if defined(__GNUC__) && !defined(__STRICT_ANSI__)
+typedef __signed__ long long __s64;
+typedef unsigned long long __u64;
+#endif
+
+#include <xeno/config.h>
+
+typedef signed char s8;
+typedef unsigned char u8;
+
+typedef signed short s16;
+typedef unsigned short u16;
+
+typedef signed int s32;
+typedef unsigned int u32;
+
+typedef signed long long s64;
+typedef unsigned long long u64;
+
+#define BITS_PER_LONG 32
+
+/* DMA addresses come in generic and 64-bit flavours.  */
+
+#ifdef CONFIG_HIGHMEM
+typedef u64 dma_addr_t;
+#else
+typedef u32 dma_addr_t;
+#endif
+typedef u64 dma64_addr_t;
+
+#endif
diff --git a/xen-2.4.16/include/asm-i386/uaccess.h b/xen-2.4.16/include/asm-i386/uaccess.h
new file mode 100644 (file)
index 0000000..5b4ba6e
--- /dev/null
@@ -0,0 +1,596 @@
+#ifndef __i386_UACCESS_H
+#define __i386_UACCESS_H
+
+/*
+ * User space memory access functions
+ */
+#include <linux/config.h>
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/prefetch.h>
+#include <asm/page.h>
+
+#define VERIFY_READ 0
+#define VERIFY_WRITE 1
+
+/*
+ * The fs value determines whether argument validity checking should be
+ * performed or not.  If get_fs() == USER_DS, checking is performed, with
+ * get_fs() == KERNEL_DS, checking is bypassed.
+ *
+ * For historical reasons, these macros are grossly misnamed.
+ */
+
+#define MAKE_MM_SEG(s) ((mm_segment_t) { (s) })
+
+
+#define KERNEL_DS      MAKE_MM_SEG(0xFFFFFFFF)
+#define USER_DS                MAKE_MM_SEG(PAGE_OFFSET)
+
+#define get_ds()       (KERNEL_DS)
+#define get_fs()       (current->addr_limit)
+#define set_fs(x)      (current->addr_limit = (x))
+
+#define segment_eq(a,b)        ((a).seg == (b).seg)
+
+extern int __verify_write(const void *, unsigned long);
+
+#define __addr_ok(addr) ((unsigned long)(addr) < (current->addr_limit.seg))
+
+/*
+ * Uhhuh, this needs 33-bit arithmetic. We have a carry..
+ */
+#define __range_ok(addr,size) ({ \
+       unsigned long flag,sum; \
+       asm("addl %3,%1 ; sbbl %0,%0; cmpl %1,%4; sbbl $0,%0" \
+               :"=&r" (flag), "=r" (sum) \
+               :"1" (addr),"g" ((int)(size)),"g" (current->addr_limit.seg)); \
+       flag; })
+
+#define access_ok(type,addr,size) (__range_ok(addr,size) == 0)
+
+static inline int verify_area(int type, const void * addr, unsigned long size)
+{
+       return access_ok(type,addr,size) ? 0 : -EFAULT;
+}
+
+
+/*
+ * The exception table consists of pairs of addresses: the first is the
+ * address of an instruction that is allowed to fault, and the second is
+ * the address at which the program should continue.  No registers are
+ * modified, so it is entirely up to the continuation code to figure out
+ * what to do.
+ *
+ * All the routines below use bits of fixup code that are out of line
+ * with the main instruction path.  This means when everything is well,
+ * we don't even have to jump over them.  Further, they do not intrude
+ * on our cache or tlb entries.
+ */
+
+struct exception_table_entry
+{
+       unsigned long insn, fixup;
+};
+
+/* Returns 0 if exception not found and fixup otherwise.  */
+extern unsigned long search_exception_table(unsigned long);
+
+
+/*
+ * These are the main single-value transfer routines.  They automatically
+ * use the right size if we just have the right pointer type.
+ *
+ * This gets kind of ugly. We want to return _two_ values in "get_user()"
+ * and yet we don't want to do any pointers, because that is too much
+ * of a performance impact. Thus we have a few rather ugly macros here,
+ * and hide all the uglyness from the user.
+ *
+ * The "__xxx" versions of the user access functions are versions that
+ * do not verify the address space, that must have been done previously
+ * with a separate "access_ok()" call (this is used when we do multiple
+ * accesses to the same area of user memory).
+ */
+
+extern void __get_user_1(void);
+extern void __get_user_2(void);
+extern void __get_user_4(void);
+
+#define __get_user_x(size,ret,x,ptr) \
+       __asm__ __volatile__("call __get_user_" #size \
+               :"=a" (ret),"=d" (x) \
+               :"0" (ptr))
+
+/* Careful: we have to cast the result to the type of the pointer for sign reasons */
+#define get_user(x,ptr)                                                        \
+({     int __ret_gu,__val_gu;                                          \
+       switch(sizeof (*(ptr))) {                                       \
+       case 1:  __get_user_x(1,__ret_gu,__val_gu,ptr); break;          \
+       case 2:  __get_user_x(2,__ret_gu,__val_gu,ptr); break;          \
+       case 4:  __get_user_x(4,__ret_gu,__val_gu,ptr); break;          \
+       default: __get_user_x(X,__ret_gu,__val_gu,ptr); break;          \
+       }                                                               \
+       (x) = (__typeof__(*(ptr)))__val_gu;                             \
+       __ret_gu;                                                       \
+})
+
+extern void __put_user_1(void);
+extern void __put_user_2(void);
+extern void __put_user_4(void);
+extern void __put_user_8(void);
+
+extern void __put_user_bad(void);
+
+#define put_user(x,ptr)                                                        \
+  __put_user_check((__typeof__(*(ptr)))(x),(ptr),sizeof(*(ptr)))
+
+#define __get_user(x,ptr) \
+  __get_user_nocheck((x),(ptr),sizeof(*(ptr)))
+#define __put_user(x,ptr) \
+  __put_user_nocheck((__typeof__(*(ptr)))(x),(ptr),sizeof(*(ptr)))
+
+#define __put_user_nocheck(x,ptr,size)                 \
+({                                                     \
+       long __pu_err;                                  \
+       __put_user_size((x),(ptr),(size),__pu_err);     \
+       __pu_err;                                       \
+})
+
+
+#define __put_user_check(x,ptr,size)                   \
+({                                                     \
+       long __pu_err = -EFAULT;                                        \
+       __typeof__(*(ptr)) *__pu_addr = (ptr);          \
+       if (access_ok(VERIFY_WRITE,__pu_addr,size))     \
+               __put_user_size((x),__pu_addr,(size),__pu_err); \
+       __pu_err;                                       \
+})                                                     
+
+#define __put_user_u64(x, addr, err)                           \
+       __asm__ __volatile__(                                   \
+               "1:     movl %%eax,0(%2)\n"                     \
+               "2:     movl %%edx,4(%2)\n"                     \
+               "3:\n"                                          \
+               ".section .fixup,\"ax\"\n"                      \
+               "4:     movl %3,%0\n"                           \
+               "       jmp 3b\n"                               \
+               ".previous\n"                                   \
+               ".section __ex_table,\"a\"\n"                   \
+               "       .align 4\n"                             \
+               "       .long 1b,4b\n"                          \
+               "       .long 2b,4b\n"                          \
+               ".previous"                                     \
+               : "=r"(err)                                     \
+               : "A" (x), "r" (addr), "i"(-EFAULT), "0"(err))
+
+#define __put_user_size(x,ptr,size,retval)                             \
+do {                                                                   \
+       retval = 0;                                                     \
+       switch (size) {                                                 \
+         case 1: __put_user_asm(x,ptr,retval,"b","b","iq"); break;     \
+         case 2: __put_user_asm(x,ptr,retval,"w","w","ir"); break;     \
+         case 4: __put_user_asm(x,ptr,retval,"l","","ir"); break;      \
+         case 8: __put_user_u64(x,ptr,retval); break;                  \
+         default: __put_user_bad();                                    \
+       }                                                               \
+} while (0)
+
+struct __large_struct { unsigned long buf[100]; };
+#define __m(x) (*(struct __large_struct *)(x))
+
+/*
+ * Tell gcc we read from memory instead of writing: this is because
+ * we do not write to any memory gcc knows about, so there are no
+ * aliasing issues.
+ */
+#define __put_user_asm(x, addr, err, itype, rtype, ltype)      \
+       __asm__ __volatile__(                                   \
+               "1:     mov"itype" %"rtype"1,%2\n"              \
+               "2:\n"                                          \
+               ".section .fixup,\"ax\"\n"                      \
+               "3:     movl %3,%0\n"                           \
+               "       jmp 2b\n"                               \
+               ".previous\n"                                   \
+               ".section __ex_table,\"a\"\n"                   \
+               "       .align 4\n"                             \
+               "       .long 1b,3b\n"                          \
+               ".previous"                                     \
+               : "=r"(err)                                     \
+               : ltype (x), "m"(__m(addr)), "i"(-EFAULT), "0"(err))
+
+
+#define __get_user_nocheck(x,ptr,size)                         \
+({                                                             \
+       long __gu_err, __gu_val;                                \
+       __get_user_size(__gu_val,(ptr),(size),__gu_err);        \
+       (x) = (__typeof__(*(ptr)))__gu_val;                     \
+       __gu_err;                                               \
+})
+
+extern long __get_user_bad(void);
+
+#define __get_user_size(x,ptr,size,retval)                             \
+do {                                                                   \
+       retval = 0;                                                     \
+       switch (size) {                                                 \
+         case 1: __get_user_asm(x,ptr,retval,"b","b","=q"); break;     \
+         case 2: __get_user_asm(x,ptr,retval,"w","w","=r"); break;     \
+         case 4: __get_user_asm(x,ptr,retval,"l","","=r"); break;      \
+         default: (x) = __get_user_bad();                              \
+       }                                                               \
+} while (0)
+
+#define __get_user_asm(x, addr, err, itype, rtype, ltype)      \
+       __asm__ __volatile__(                                   \
+               "1:     mov"itype" %2,%"rtype"1\n"              \
+               "2:\n"                                          \
+               ".section .fixup,\"ax\"\n"                      \
+               "3:     movl %3,%0\n"                           \
+               "       xor"itype" %"rtype"1,%"rtype"1\n"       \
+               "       jmp 2b\n"                               \
+               ".previous\n"                                   \
+               ".section __ex_table,\"a\"\n"                   \
+               "       .align 4\n"                             \
+               "       .long 1b,3b\n"                          \
+               ".previous"                                     \
+               : "=r"(err), ltype (x)                          \
+               : "m"(__m(addr)), "i"(-EFAULT), "0"(err))
+
+
+/*
+ * Copy To/From Userspace
+ */
+
+/* Generic arbitrary sized copy.  */
+#define __copy_user(to,from,size)                                      \
+do {                                                                   \
+       int __d0, __d1;                                                 \
+       __asm__ __volatile__(                                           \
+               "0:     rep; movsl\n"                                   \
+               "       movl %3,%0\n"                                   \
+               "1:     rep; movsb\n"                                   \
+               "2:\n"                                                  \
+               ".section .fixup,\"ax\"\n"                              \
+               "3:     lea 0(%3,%0,4),%0\n"                            \
+               "       jmp 2b\n"                                       \
+               ".previous\n"                                           \
+               ".section __ex_table,\"a\"\n"                           \
+               "       .align 4\n"                                     \
+               "       .long 0b,3b\n"                                  \
+               "       .long 1b,2b\n"                                  \
+               ".previous"                                             \
+               : "=&c"(size), "=&D" (__d0), "=&S" (__d1)               \
+               : "r"(size & 3), "0"(size / 4), "1"(to), "2"(from)      \
+               : "memory");                                            \
+} while (0)
+
+#define __copy_user_zeroing(to,from,size)                              \
+do {                                                                   \
+       int __d0, __d1;                                                 \
+       __asm__ __volatile__(                                           \
+               "0:     rep; movsl\n"                                   \
+               "       movl %3,%0\n"                                   \
+               "1:     rep; movsb\n"                                   \
+               "2:\n"                                                  \
+               ".section .fixup,\"ax\"\n"                              \
+               "3:     lea 0(%3,%0,4),%0\n"                            \
+               "4:     pushl %0\n"                                     \
+               "       pushl %%eax\n"                                  \
+               "       xorl %%eax,%%eax\n"                             \
+               "       rep; stosb\n"                                   \
+               "       popl %%eax\n"                                   \
+               "       popl %0\n"                                      \
+               "       jmp 2b\n"                                       \
+               ".previous\n"                                           \
+               ".section __ex_table,\"a\"\n"                           \
+               "       .align 4\n"                                     \
+               "       .long 0b,3b\n"                                  \
+               "       .long 1b,4b\n"                                  \
+               ".previous"                                             \
+               : "=&c"(size), "=&D" (__d0), "=&S" (__d1)               \
+               : "r"(size & 3), "0"(size / 4), "1"(to), "2"(from)      \
+               : "memory");                                            \
+} while (0)
+
+/* We let the __ versions of copy_from/to_user inline, because they're often
+ * used in fast paths and have only a small space overhead.
+ */
+static inline unsigned long
+__generic_copy_from_user_nocheck(void *to, const void *from, unsigned long n)
+{
+       __copy_user_zeroing(to,from,n);
+       return n;
+}
+
+static inline unsigned long
+__generic_copy_to_user_nocheck(void *to, const void *from, unsigned long n)
+{
+       __copy_user(to,from,n);
+       return n;
+}
+
+
+/* Optimize just a little bit when we know the size of the move. */
+#define __constant_copy_user(to, from, size)                   \
+do {                                                           \
+       int __d0, __d1;                                         \
+       switch (size & 3) {                                     \
+       default:                                                \
+               __asm__ __volatile__(                           \
+                       "0:     rep; movsl\n"                   \
+                       "1:\n"                                  \
+                       ".section .fixup,\"ax\"\n"              \
+                       "2:     shl $2,%0\n"                    \
+                       "       jmp 1b\n"                       \
+                       ".previous\n"                           \
+                       ".section __ex_table,\"a\"\n"           \
+                       "       .align 4\n"                     \
+                       "       .long 0b,2b\n"                  \
+                       ".previous"                             \
+                       : "=c"(size), "=&S" (__d0), "=&D" (__d1)\
+                       : "1"(from), "2"(to), "0"(size/4)       \
+                       : "memory");                            \
+               break;                                          \
+       case 1:                                                 \
+               __asm__ __volatile__(                           \
+                       "0:     rep; movsl\n"                   \
+                       "1:     movsb\n"                        \
+                       "2:\n"                                  \
+                       ".section .fixup,\"ax\"\n"              \
+                       "3:     shl $2,%0\n"                    \
+                       "4:     incl %0\n"                      \
+                       "       jmp 2b\n"                       \
+                       ".previous\n"                           \
+                       ".section __ex_table,\"a\"\n"           \
+                       "       .align 4\n"                     \
+                       "       .long 0b,3b\n"                  \
+                       "       .long 1b,4b\n"                  \
+                       ".previous"                             \
+                       : "=c"(size), "=&S" (__d0), "=&D" (__d1)\
+                       : "1"(from), "2"(to), "0"(size/4)       \
+                       : "memory");                            \
+               break;                                          \
+       case 2:                                                 \
+               __asm__ __volatile__(                           \
+                       "0:     rep; movsl\n"                   \
+                       "1:     movsw\n"                        \
+                       "2:\n"                                  \
+                       ".section .fixup,\"ax\"\n"              \
+                       "3:     shl $2,%0\n"                    \
+                       "4:     addl $2,%0\n"                   \
+                       "       jmp 2b\n"                       \
+                       ".previous\n"                           \
+                       ".section __ex_table,\"a\"\n"           \
+                       "       .align 4\n"                     \
+                       "       .long 0b,3b\n"                  \
+                       "       .long 1b,4b\n"                  \
+                       ".previous"                             \
+                       : "=c"(size), "=&S" (__d0), "=&D" (__d1)\
+                       : "1"(from), "2"(to), "0"(size/4)       \
+                       : "memory");                            \
+               break;                                          \
+       case 3:                                                 \
+               __asm__ __volatile__(                           \
+                       "0:     rep; movsl\n"                   \
+                       "1:     movsw\n"                        \
+                       "2:     movsb\n"                        \
+                       "3:\n"                                  \
+                       ".section .fixup,\"ax\"\n"              \
+                       "4:     shl $2,%0\n"                    \
+                       "5:     addl $2,%0\n"                   \
+                       "6:     incl %0\n"                      \
+                       "       jmp 3b\n"                       \
+                       ".previous\n"                           \
+                       ".section __ex_table,\"a\"\n"           \
+                       "       .align 4\n"                     \
+                       "       .long 0b,4b\n"                  \
+                       "       .long 1b,5b\n"                  \
+                       "       .long 2b,6b\n"                  \
+                       ".previous"                             \
+                       : "=c"(size), "=&S" (__d0), "=&D" (__d1)\
+                       : "1"(from), "2"(to), "0"(size/4)       \
+                       : "memory");                            \
+               break;                                          \
+       }                                                       \
+} while (0)
+
+/* Optimize just a little bit when we know the size of the move. */
+#define __constant_copy_user_zeroing(to, from, size)           \
+do {                                                           \
+       int __d0, __d1;                                         \
+       switch (size & 3) {                                     \
+       default:                                                \
+               __asm__ __volatile__(                           \
+                       "0:     rep; movsl\n"                   \
+                       "1:\n"                                  \
+                       ".section .fixup,\"ax\"\n"              \
+                       "2:     pushl %0\n"                     \
+                       "       pushl %%eax\n"                  \
+                       "       xorl %%eax,%%eax\n"             \
+                       "       rep; stosl\n"                   \
+                       "       popl %%eax\n"                   \
+                       "       popl %0\n"                      \
+                       "       shl $2,%0\n"                    \
+                       "       jmp 1b\n"                       \
+                       ".previous\n"                           \
+                       ".section __ex_table,\"a\"\n"           \
+                       "       .align 4\n"                     \
+                       "       .long 0b,2b\n"                  \
+                       ".previous"                             \
+                       : "=c"(size), "=&S" (__d0), "=&D" (__d1)\
+                       : "1"(from), "2"(to), "0"(size/4)       \
+                       : "memory");                            \
+               break;                                          \
+       case 1:                                                 \
+               __asm__ __volatile__(                           \
+                       "0:     rep; movsl\n"                   \
+                       "1:     movsb\n"                        \
+                       "2:\n"                                  \
+                       ".section .fixup,\"ax\"\n"              \
+                       "3:     pushl %0\n"                     \
+                       "       pushl %%eax\n"                  \
+                       "       xorl %%eax,%%eax\n"             \
+                       "       rep; stosl\n"                   \
+                       "       stosb\n"                        \
+                       "       popl %%eax\n"                   \
+                       "       popl %0\n"                      \
+                       "       shl $2,%0\n"                    \
+                       "       incl %0\n"                      \
+                       "       jmp 2b\n"                       \
+                       "4:     pushl %%eax\n"                  \
+                       "       xorl %%eax,%%eax\n"             \
+                       "       stosb\n"                        \
+                       "       popl %%eax\n"                   \
+                       "       incl %0\n"                      \
+                       "       jmp 2b\n"                       \
+                       ".previous\n"                           \
+                       ".section __ex_table,\"a\"\n"           \
+                       "       .align 4\n"                     \
+                       "       .long 0b,3b\n"                  \
+                       "       .long 1b,4b\n"                  \
+                       ".previous"                             \
+                       : "=c"(size), "=&S" (__d0), "=&D" (__d1)\
+                       : "1"(from), "2"(to), "0"(size/4)       \
+                       : "memory");                            \
+               break;                                          \
+       case 2:                                                 \
+               __asm__ __volatile__(                           \
+                       "0:     rep; movsl\n"                   \
+                       "1:     movsw\n"                        \
+                       "2:\n"                                  \
+                       ".section .fixup,\"ax\"\n"              \
+                       "3:     pushl %0\n"                     \
+                       "       pushl %%eax\n"                  \
+                       "       xorl %%eax,%%eax\n"             \
+                       "       rep; stosl\n"                   \
+                       "       stosw\n"                        \
+                       "       popl %%eax\n"                   \
+                       "       popl %0\n"                      \
+                       "       shl $2,%0\n"                    \
+                       "       addl $2,%0\n"                   \
+                       "       jmp 2b\n"                       \
+                       "4:     pushl %%eax\n"                  \
+                       "       xorl %%eax,%%eax\n"             \
+                       "       stosw\n"                        \
+                       "       popl %%eax\n"                   \
+                       "       addl $2,%0\n"                   \
+                       "       jmp 2b\n"                       \
+                       ".previous\n"                           \
+                       ".section __ex_table,\"a\"\n"           \
+                       "       .align 4\n"                     \
+                       "       .long 0b,3b\n"                  \
+                       "       .long 1b,4b\n"                  \
+                       ".previous"                             \
+                       : "=c"(size), "=&S" (__d0), "=&D" (__d1)\
+                       : "1"(from), "2"(to), "0"(size/4)       \
+                       : "memory");                            \
+               break;                                          \
+       case 3:                                                 \
+               __asm__ __volatile__(                           \
+                       "0:     rep; movsl\n"                   \
+                       "1:     movsw\n"                        \
+                       "2:     movsb\n"                        \
+                       "3:\n"                                  \
+                       ".section .fixup,\"ax\"\n"              \
+                       "4:     pushl %0\n"                     \
+                       "       pushl %%eax\n"                  \
+                       "       xorl %%eax,%%eax\n"             \
+                       "       rep; stosl\n"                   \
+                       "       stosw\n"                        \
+                       "       stosb\n"                        \
+                       "       popl %%eax\n"                   \
+                       "       popl %0\n"                      \
+                       "       shl $2,%0\n"                    \
+                       "       addl $3,%0\n"                   \
+                       "       jmp 2b\n"                       \
+                       "5:     pushl %%eax\n"                  \
+                       "       xorl %%eax,%%eax\n"             \
+                       "       stosw\n"                        \
+                       "       stosb\n"                        \
+                       "       popl %%eax\n"                   \
+                       "       addl $3,%0\n"                   \
+                       "       jmp 2b\n"                       \
+                       "6:     pushl %%eax\n"                  \
+                       "       xorl %%eax,%%eax\n"             \
+                       "       stosb\n"                        \
+                       "       popl %%eax\n"                   \
+                       "       incl %0\n"                      \
+                       "       jmp 3b\n"                       \
+                       ".previous\n"                           \
+                       ".section __ex_table,\"a\"\n"           \
+                       "       .align 4\n"                     \
+                       "       .long 0b,4b\n"                  \
+                       "       .long 1b,5b\n"                  \
+                       "       .long 2b,6b\n"                  \
+                       ".previous"                             \
+                       : "=c"(size), "=&S" (__d0), "=&D" (__d1)\
+                       : "1"(from), "2"(to), "0"(size/4)       \
+                       : "memory");                            \
+               break;                                          \
+       }                                                       \
+} while (0)
+
+unsigned long __generic_copy_to_user(void *, const void *, unsigned long);
+unsigned long __generic_copy_from_user(void *, const void *, unsigned long);
+
+static inline unsigned long
+__constant_copy_to_user(void *to, const void *from, unsigned long n)
+{
+       prefetch(from);
+       if (access_ok(VERIFY_WRITE, to, n))
+               __constant_copy_user(to,from,n);
+       return n;
+}
+
+static inline unsigned long
+__constant_copy_from_user(void *to, const void *from, unsigned long n)
+{
+       if (access_ok(VERIFY_READ, from, n))
+               __constant_copy_user_zeroing(to,from,n);
+       else
+               memset(to, 0, n);
+       return n;
+}
+
+static inline unsigned long
+__constant_copy_to_user_nocheck(void *to, const void *from, unsigned long n)
+{
+       __constant_copy_user(to,from,n);
+       return n;
+}
+
+static inline unsigned long
+__constant_copy_from_user_nocheck(void *to, const void *from, unsigned long n)
+{
+       __constant_copy_user_zeroing(to,from,n);
+       return n;
+}
+
+#define copy_to_user(to,from,n)                                \
+       (__builtin_constant_p(n) ?                      \
+        __constant_copy_to_user((to),(from),(n)) :     \
+        __generic_copy_to_user((to),(from),(n)))
+
+#define copy_from_user(to,from,n)                      \
+       (__builtin_constant_p(n) ?                      \
+        __constant_copy_from_user((to),(from),(n)) :   \
+        __generic_copy_from_user((to),(from),(n)))
+
+#define __copy_to_user(to,from,n)                      \
+       (__builtin_constant_p(n) ?                      \
+        __constant_copy_to_user_nocheck((to),(from),(n)) :     \
+        __generic_copy_to_user_nocheck((to),(from),(n)))
+
+#define __copy_from_user(to,from,n)                    \
+       (__builtin_constant_p(n) ?                      \
+        __constant_copy_from_user_nocheck((to),(from),(n)) :   \
+        __generic_copy_from_user_nocheck((to),(from),(n)))
+
+long strncpy_from_user(char *dst, const char *src, long count);
+long __strncpy_from_user(char *dst, const char *src, long count);
+#define strlen_user(str) strnlen_user(str, ~0UL >> 1)
+long strnlen_user(const char *str, long n);
+unsigned long clear_user(void *mem, unsigned long len);
+unsigned long __clear_user(void *mem, unsigned long len);
+
+#endif /* __i386_UACCESS_H */
diff --git a/xen-2.4.16/include/asm-i386/unaligned.h b/xen-2.4.16/include/asm-i386/unaligned.h
new file mode 100644 (file)
index 0000000..7acd795
--- /dev/null
@@ -0,0 +1,37 @@
+#ifndef __I386_UNALIGNED_H
+#define __I386_UNALIGNED_H
+
+/*
+ * The i386 can do unaligned accesses itself. 
+ *
+ * The strange macros are there to make sure these can't
+ * be misused in a way that makes them not work on other
+ * architectures where unaligned accesses aren't as simple.
+ */
+
+/**
+ * get_unaligned - get value from possibly mis-aligned location
+ * @ptr: pointer to value
+ *
+ * This macro should be used for accessing values larger in size than 
+ * single bytes at locations that are expected to be improperly aligned, 
+ * e.g. retrieving a u16 value from a location not u16-aligned.
+ *
+ * Note that unaligned accesses can be very expensive on some architectures.
+ */
+#define get_unaligned(ptr) (*(ptr))
+
+/**
+ * put_unaligned - put value to a possibly mis-aligned location
+ * @val: value to place
+ * @ptr: pointer to location
+ *
+ * This macro should be used for placing values larger in size than 
+ * single bytes at locations that are expected to be improperly aligned, 
+ * e.g. writing a u16 value to a location not u16-aligned.
+ *
+ * Note that unaligned accesses can be very expensive on some architectures.
+ */
+#define put_unaligned(val, ptr) ((void)( *(ptr) = (val) ))
+
+#endif
diff --git a/xen-2.4.16/include/hypervisor-ifs/block.h b/xen-2.4.16/include/hypervisor-ifs/block.h
new file mode 100644 (file)
index 0000000..55f7a33
--- /dev/null
@@ -0,0 +1,40 @@
+/******************************************************************************
+ * block.h
+ *
+ * Block IO communication rings.
+ *
+ * These are the ring data structures for buffering messages between 
+ * the hypervisor and guestos's.  
+ *
+ * For now we'll start with our own rings for the block IO code instead
+ * of using the network rings.  Hopefully, this will give us additional
+ * flexibility in the future should we choose to move away from a 
+ * ring producer consumer communication model.
+ */
+
+#ifndef __BLOCK_H__
+#define __BLOCK_H__
+
+typedef struct blk_tx_entry_st {
+       unsigned long addr; /* virtual address */
+       unsigned long size; /* in bytes */
+} blk_tx_entry_t;
+
+typedef struct blk_rx_entry_st {
+       unsigned long addr; /* virtual address */
+       unsigned long size; /* in bytes */
+} blk_rx_entry_t;
+
+typedef struct blk_ring_st {
+       blk_tx_entry_t  *tx_ring;
+       unsigned int    tx_prod, tx_cons, tx_event;
+       unsigned int    tx_ring_size;
+
+       blk_rx_entry_t  *rx_ring;
+       unsigned int    rx_prod, rx_cons, rx_event;
+       unsigned int    rx_ring_size;
+} blk_ring_t;
+
+int blk_create_ring(int domain, unsigned long ptr);
+
+#endif
diff --git a/xen-2.4.16/include/hypervisor-ifs/hypervisor-if.h b/xen-2.4.16/include/hypervisor-ifs/hypervisor-if.h
new file mode 100644 (file)
index 0000000..5f57125
--- /dev/null
@@ -0,0 +1,272 @@
+/******************************************************************************
+ * hypervisor-if.h
+ * 
+ * Interface to Xeno hypervisor.
+ */
+
+#include <hypervisor-ifs/network.h>
+#include <hypervisor-ifs/block.h>
+
+#ifndef __HYPERVISOR_IF_H__
+#define __HYPERVISOR_IF_H__
+
+typedef struct trap_info_st
+{
+    unsigned char  vector;  /* exception/interrupt vector */
+    unsigned char  dpl;     /* privilege level            */
+    unsigned short cs;      /* code selector              */
+    unsigned long  address; /* code address               */
+} trap_info_t;
+
+
+typedef struct
+{
+#define PGREQ_ADD_BASEPTR    0
+#define PGREQ_REMOVE_BASEPTR 1
+    unsigned long ptr, val; /* *ptr = val */
+} page_update_request_t;
+
+
+/* EAX = vector; EBX, ECX, EDX, ESI, EDI = args 1, 2, 3, 4, 5. */
+
+#define __HYPERVISOR_set_trap_table  0
+#define __HYPERVISOR_pt_update       1
+#define __HYPERVISOR_console_write   2
+#define __HYPERVISOR_set_pagetable   3
+#define __HYPERVISOR_set_guest_stack 4
+#define __HYPERVISOR_net_update      5
+#define __HYPERVISOR_fpu_taskswitch  6
+#define __HYPERVISOR_yield           7
+#define __HYPERVISOR_exit            8
+#define __HYPERVISOR_dom0_op         9
+#define __HYPERVISOR_network_op     10
+
+#define TRAP_INSTR "int $0x82"
+
+
+static inline int HYPERVISOR_set_trap_table(trap_info_t *table)
+{
+    int ret;
+    __asm__ __volatile__ (
+        TRAP_INSTR
+        : "=a" (ret) : "0" (__HYPERVISOR_set_trap_table),
+        "b" (table) );
+
+    return ret;
+}
+
+
+static inline int HYPERVISOR_pt_update(page_update_request_t *req, int count)
+{
+    int ret;
+    __asm__ __volatile__ (
+        TRAP_INSTR
+        : "=a" (ret) : "0" (__HYPERVISOR_pt_update), 
+        "b" (req), "c" (count) );
+
+    return ret;
+}
+
+
+static inline int HYPERVISOR_console_write(const char *str, int count)
+{
+    int ret;
+    __asm__ __volatile__ (
+        TRAP_INSTR
+        : "=a" (ret) : "0" (__HYPERVISOR_console_write), 
+        "b" (str), "c" (count) );
+
+
+    return ret;
+}
+
+static inline int HYPERVISOR_set_pagetable(unsigned long ptr)
+{
+    int ret;
+    __asm__ __volatile__ (
+        TRAP_INSTR
+        : "=a" (ret) : "0" (__HYPERVISOR_set_pagetable),
+        "b" (ptr) );
+
+    return ret;
+}
+
+static inline int HYPERVISOR_set_guest_stack(
+    unsigned long ss, unsigned long esp)
+{
+    int ret;
+    __asm__ __volatile__ (
+        TRAP_INSTR
+        : "=a" (ret) : "0" (__HYPERVISOR_set_guest_stack),
+        "b" (ss), "c" (esp) );
+
+    return ret;
+}
+
+static inline int HYPERVISOR_net_update(void)
+{
+    int ret;
+    __asm__ __volatile__ (
+        TRAP_INSTR
+        : "=a" (ret) : "0" (__HYPERVISOR_net_update) );
+
+    return ret;
+}
+
+static inline int HYPERVISOR_fpu_taskswitch(void)
+{
+    int ret;
+    __asm__ __volatile__ (
+        TRAP_INSTR
+        : "=a" (ret) : "0" (__HYPERVISOR_fpu_taskswitch) );
+
+    return ret;
+}
+
+static inline int HYPERVISOR_yield(void)
+{
+    int ret;
+    __asm__ __volatile__ (
+        TRAP_INSTR
+        : "=a" (ret) : "0" (__HYPERVISOR_yield) );
+
+    return ret;
+}
+
+static inline int HYPERVISOR_exit(void)
+{
+    int ret;
+    __asm__ __volatile__ (
+        TRAP_INSTR
+        : "=a" (ret) : "0" (__HYPERVISOR_exit) );
+
+    return ret;
+}
+
+static inline int HYPERVISOR_dom0_op(void *dom0_op)
+{
+    int ret;
+    __asm__ __volatile__ (
+        TRAP_INSTR
+        : "=a" (ret) : "0" (__HYPERVISOR_dom0_op),
+        "b" (dom0_op) );
+
+    return ret;
+}
+
+static inline int HYPERVISOR_network_op(void *network_op)
+{
+    int ret;
+    __asm__ __volatile__ (
+        TRAP_INSTR
+        : "=a" (ret) : "0" (__HYPERVISOR_network_op),
+        "b" (network_op) );
+
+    return ret;
+}
+
+/* Events that a guest OS may receive from the hypervisor. */
+#define EVENT_NET_TX  0x01 /* packets for transmission. */
+#define EVENT_NET_RX  0x02 /* empty buffers for receive. */
+#define EVENT_TIMER   0x04 /* a timeout has been updated. */
+#define EVENT_DIE     0x08 /* OS is about to be killed. Clean up please! */
+#define EVENT_BLK_TX  0x10 /* packets for transmission. */
+#define EVENT_BLK_RX  0x20 /* empty buffers for receive. */
+
+/* Bit offsets, as opposed to the above masks. */
+#define _EVENT_NET_TX 0
+#define _EVENT_NET_RX 1
+#define _EVENT_TIMER  2
+#define _EVENT_DIE    3
+#define _EVENT_BLK_TX 4
+#define _EVENT_BLK_RX 5
+
+/*
+ * NB. We expect that this struct is smaller than a page.
+ */
+typedef struct shared_info_st {
+
+    /* Bitmask of outstanding event notifications hypervisor -> guest OS. */
+    unsigned long events;
+    /*
+     * Hypervisor will only signal event delivery via the "callback
+     * exception" when this value is non-zero. Hypervisor clears this when
+     * notiying the guest OS -- thsi prevents unbounded reentrancy and
+     * stack overflow (in this way, acts as an interrupt-enable flag).
+     */
+    unsigned long events_enable;
+
+    /*
+     * Address for callbacks hypervisor -> guest OS.
+     * Stack frame looks like that of an interrupt.
+     * Code segment is the default flat selector.
+     * This handler will only be called when events_enable is non-zero.
+     */
+    unsigned long event_address;
+
+    /*
+     * Hypervisor uses this callback when it takes a fault on behalf of
+     * an application. This can happen when returning from interrupts for
+     * example: various faults can occur when reloading the segment
+     * registers, and executing 'iret'.
+     * This callback is provided with an extended stack frame, augmented
+     * with saved values for segment registers %ds and %es:
+     *  %ds, %es, %eip, %cs, %eflags [, %oldesp, %oldss]
+     * Code segment is the default flat selector.
+     * FAULTS WHEN CALLING THIS HANDLER WILL TERMINATE THE DOMAIN!!!
+     */
+    unsigned long failsafe_address;
+
+    /*
+     * CPU ticks since start of day.
+     * `wall_time' counts CPU ticks in real time.
+     * `domain_time' counts CPU ticks during which this domain has run.
+     */
+    unsigned long ticks_per_ms; /* CPU ticks per millisecond */
+    /*
+     * Current wall_time can be found by rdtsc. Only possible use of
+     * variable below is that it provides a timestamp for last update
+     * of domain_time.
+     */
+    unsigned long long wall_time;
+    unsigned long long domain_time;
+
+    /*
+     * Timeouts for points at which guest OS would like a callback.
+     * This will probably be backed up by a timer heap in the guest OS.
+     * In Linux we use timeouts to update 'jiffies'.
+     */
+    unsigned long long wall_timeout;
+    unsigned long long domain_timeout;
+
+    /*
+     * Real-Time Clock. This shows time, in seconds, since 1.1.1980.
+     * The timestamp shows the CPU 'wall time' when RTC was last read.
+     * Thus it allows a mapping between 'real time' and 'wall time'.
+     */
+    unsigned long      rtc_time;
+    unsigned long long rtc_timestamp;
+
+} shared_info_t;
+
+/*
+ * NB. We expect that this struct is smaller than a page.
+ */
+typedef struct start_info_st {
+    unsigned long nr_pages;       /* total pages allocated to this domain */
+    shared_info_t *shared_info;   /* start address of shared info struct */
+    unsigned long  pt_base;       /* address of page directory */
+    unsigned long phys_base;
+    unsigned long mod_start;      /* start address of pre-loaded module */
+    unsigned long mod_len;        /* size (bytes) of pre-loaded module */
+    net_ring_t *net_rings;
+    int num_net_rings;
+    blk_ring_t *blk_ring;         /* block io communication rings */
+    unsigned char cmd_line[1];    /* variable-length */
+    unsigned long frame_table;    /* mapping of the frame_table for dom0 */
+} start_info_t;
+
+/* For use in guest OSes. */
+extern shared_info_t *HYPERVISOR_shared_info;
+
+#endif /* __HYPERVISOR_IF_H__ */
diff --git a/xen-2.4.16/include/hypervisor-ifs/network.h b/xen-2.4.16/include/hypervisor-ifs/network.h
new file mode 100644 (file)
index 0000000..30a5bf3
--- /dev/null
@@ -0,0 +1,108 @@
+/******************************************************************************
+ * network.h
+ *
+ * ring data structures for buffering messages between hypervisor and
+ * guestos's.  As it stands this is only used for network buffer exchange.
+ *
+ * This file also contains structures and interfaces for the per-domain
+ * routing/filtering tables in the hypervisor.
+ *
+ */
+
+#ifndef __RING_H__
+#define __RING_H__
+
+#include <linux/types.h>
+
+typedef struct tx_entry_st {
+       unsigned long addr; /* virtual address */
+       unsigned long size; /* in bytes */
+} tx_entry_t;
+
+typedef struct rx_entry_st {
+       unsigned long addr; /* virtual address */
+       unsigned long size; /* in bytes */
+} rx_entry_t;
+
+#define TX_RING_SIZE 1024
+#define RX_RING_SIZE 1024
+typedef struct net_ring_st {
+    /*
+     * Guest OS places packets into ring at tx_prod.
+     * Hypervisor removes at tx_cons.
+     * Ring is empty when tx_prod == tx_cons.
+     * Guest OS receives a DOMAIN_EVENT_NET_TX when tx_cons passes tx_event.
+     * Hypervisor may be prodded whenever tx_prod is updated, but this is
+     * only necessary when tx_cons == old_tx_prod (ie. transmitter stalled).
+     */
+    tx_entry_t *tx_ring;
+    unsigned int tx_prod, tx_cons, tx_event;
+    unsigned int tx_ring_size;
+    /*
+     * Guest OS places empty buffers into ring at rx_prod.
+     * Hypervisor fills buffers as rx_cons.
+     * Ring is empty when rx_prod == rx_cons.
+     * Guest OS receives a DOMAIN_EVENT_NET_RX when rx_cons passes rx_event.
+     * Hypervisor may be prodded whenever rx_prod is updated, but this is
+     * only necessary when rx_cons == old_rx_prod (ie. receiver stalled).
+     */
+    rx_entry_t *rx_ring;
+    unsigned int rx_prod, rx_cons, rx_event;
+    unsigned int rx_ring_size;
+} net_ring_t;
+
+/* This is here for consideration:  Having a global lookup for vifs
+ * may make the guest /proc stuff more straight forward, and could 
+ * be used in the routing code.  I don't know if it warrants the 
+ * overhead yet.
+ */
+
+/* net_vif_t sys_vif_list[MAX_SYSTEM_VIFS]; */
+
+/* Specify base of per-domain array. Get returned free slot in the array. */
+net_ring_t *create_net_vif(int domain);
+
+/* Packet routing/filtering code follows:
+ */
+
+#define NETWORK_ACTION_DROP 0
+#define NETWORK_ACTION_PASS 1
+
+typedef struct net_rule_st 
+{
+    u32  src_addr;
+    u32  dst_addr;
+    u16  src_port;
+    u16  dst_port;
+    u32  src_addr_mask;
+    u32  dst_addr_mask;
+    u16  src_port_mask;
+    u16  dst_port_mask;
+
+    int  src_interface;
+    int  dst_interface;
+    int  action;
+} net_rule_t;
+
+/* Network trap operations and associated structure. 
+ * This presently just handles rule insertion and deletion, but will
+ * evenually have code to add and remove interfaces.
+ */
+
+#define NETWORK_OP_ADDRULE      0
+#define NETWORK_OP_DELETERULE   1
+
+typedef struct network_op_st 
+{
+    unsigned long cmd;
+    union
+    {
+        net_rule_t net_rule;
+    }
+    u;
+} network_op_t;
+    
+/* Drop a new rule down to the network tables. */
+int add_net_rule(net_rule_t *rule);
+
+#endif
diff --git a/xen-2.4.16/include/scsi/scsi.h b/xen-2.4.16/include/scsi/scsi.h
new file mode 100644 (file)
index 0000000..ffcb419
--- /dev/null
@@ -0,0 +1,237 @@
+#ifndef _LINUX_SCSI_H
+#define _LINUX_SCSI_H
+
+/*
+ * This header file contains public constants and structures used by
+ * the scsi code for linux.
+ */
+
+/*
+    $Header: /usr/src/linux/include/linux/RCS/scsi.h,v 1.3 1993/09/24 12:20:33 drew Exp $
+
+    For documentation on the OPCODES, MESSAGES, and SENSE values,
+    please consult the SCSI standard.
+
+*/
+
+/*
+ *      SCSI opcodes
+ */
+
+#define TEST_UNIT_READY       0x00
+#define REZERO_UNIT           0x01
+#define REQUEST_SENSE         0x03
+#define FORMAT_UNIT           0x04
+#define READ_BLOCK_LIMITS     0x05
+#define REASSIGN_BLOCKS       0x07
+#define READ_6                0x08
+#define WRITE_6               0x0a
+#define SEEK_6                0x0b
+#define READ_REVERSE          0x0f
+#define WRITE_FILEMARKS       0x10
+#define SPACE                 0x11
+#define INQUIRY               0x12
+#define RECOVER_BUFFERED_DATA 0x14
+#define MODE_SELECT           0x15
+#define RESERVE               0x16
+#define RELEASE               0x17
+#define COPY                  0x18
+#define ERASE                 0x19
+#define MODE_SENSE            0x1a
+#define START_STOP            0x1b
+#define RECEIVE_DIAGNOSTIC    0x1c
+#define SEND_DIAGNOSTIC       0x1d
+#define ALLOW_MEDIUM_REMOVAL  0x1e
+
+#define SET_WINDOW            0x24
+#define READ_CAPACITY         0x25
+#define READ_10               0x28
+#define WRITE_10              0x2a
+#define SEEK_10               0x2b
+#define WRITE_VERIFY          0x2e
+#define VERIFY                0x2f
+#define SEARCH_HIGH           0x30
+#define SEARCH_EQUAL          0x31
+#define SEARCH_LOW            0x32
+#define SET_LIMITS            0x33
+#define PRE_FETCH             0x34
+#define READ_POSITION         0x34
+#define SYNCHRONIZE_CACHE     0x35
+#define LOCK_UNLOCK_CACHE     0x36
+#define READ_DEFECT_DATA      0x37
+#define MEDIUM_SCAN           0x38
+#define COMPARE               0x39
+#define COPY_VERIFY           0x3a
+#define WRITE_BUFFER          0x3b
+#define READ_BUFFER           0x3c
+#define UPDATE_BLOCK          0x3d
+#define READ_LONG             0x3e
+#define WRITE_LONG            0x3f
+#define CHANGE_DEFINITION     0x40
+#define WRITE_SAME            0x41
+#define READ_TOC              0x43
+#define LOG_SELECT            0x4c
+#define LOG_SENSE             0x4d
+#define MODE_SELECT_10        0x55
+#define RESERVE_10            0x56
+#define RELEASE_10            0x57
+#define MODE_SENSE_10         0x5a
+#define PERSISTENT_RESERVE_IN 0x5e
+#define PERSISTENT_RESERVE_OUT 0x5f
+#define MOVE_MEDIUM           0xa5
+#define READ_12               0xa8
+#define WRITE_12              0xaa
+#define WRITE_VERIFY_12       0xae
+#define SEARCH_HIGH_12        0xb0
+#define SEARCH_EQUAL_12       0xb1
+#define SEARCH_LOW_12         0xb2
+#define READ_ELEMENT_STATUS   0xb8
+#define SEND_VOLUME_TAG       0xb6
+#define WRITE_LONG_2          0xea
+
+/*
+ *  Status codes
+ */
+
+#define GOOD                 0x00
+#define CHECK_CONDITION      0x01
+#define CONDITION_GOOD       0x02
+#define BUSY                 0x04
+#define INTERMEDIATE_GOOD    0x08
+#define INTERMEDIATE_C_GOOD  0x0a
+#define RESERVATION_CONFLICT 0x0c
+#define COMMAND_TERMINATED   0x11
+#define QUEUE_FULL           0x14
+
+#define STATUS_MASK          0x3e
+
+/*
+ *  SENSE KEYS
+ */
+
+#define NO_SENSE            0x00
+#define RECOVERED_ERROR     0x01
+#define NOT_READY           0x02
+#define MEDIUM_ERROR        0x03
+#define HARDWARE_ERROR      0x04
+#define ILLEGAL_REQUEST     0x05
+#define UNIT_ATTENTION      0x06
+#define DATA_PROTECT        0x07
+#define BLANK_CHECK         0x08
+#define COPY_ABORTED        0x0a
+#define ABORTED_COMMAND     0x0b
+#define VOLUME_OVERFLOW     0x0d
+#define MISCOMPARE          0x0e
+
+
+/*
+ *  DEVICE TYPES
+ */
+
+#define TYPE_DISK           0x00
+#define TYPE_TAPE           0x01
+#define TYPE_PRINTER        0x02
+#define TYPE_PROCESSOR      0x03    /* HP scanners use this */
+#define TYPE_WORM           0x04    /* Treated as ROM by our system */
+#define TYPE_ROM            0x05
+#define TYPE_SCANNER        0x06
+#define TYPE_MOD            0x07    /* Magneto-optical disk - 
+                                    * - treated as TYPE_DISK */
+#define TYPE_MEDIUM_CHANGER 0x08
+#define TYPE_COMM           0x09    /* Communications device */
+#define TYPE_ENCLOSURE      0x0d    /* Enclosure Services Device */
+#define TYPE_NO_LUN         0x7f
+
+/*
+ * standard mode-select header prepended to all mode-select commands
+ *
+ * moved here from cdrom.h -- kraxel
+ */
+
+struct ccs_modesel_head
+{
+    u_char  _r1;    /* reserved */
+    u_char  medium; /* device-specific medium type */
+    u_char  _r2;    /* reserved */
+    u_char  block_desc_length; /* block descriptor length */
+    u_char  density; /* device-specific density code */
+    u_char  number_blocks_hi; /* number of blocks in this block desc */
+    u_char  number_blocks_med;
+    u_char  number_blocks_lo;
+    u_char  _r3;
+    u_char  block_length_hi; /* block length for blocks in this desc */
+    u_char  block_length_med;
+    u_char  block_length_lo;
+};
+
+/*
+ *  MESSAGE CODES
+ */
+
+#define COMMAND_COMPLETE    0x00
+#define EXTENDED_MESSAGE    0x01
+#define     EXTENDED_MODIFY_DATA_POINTER    0x00
+#define     EXTENDED_SDTR                   0x01
+#define     EXTENDED_EXTENDED_IDENTIFY      0x02    /* SCSI-I only */
+#define     EXTENDED_WDTR                   0x03
+#define SAVE_POINTERS       0x02
+#define RESTORE_POINTERS    0x03
+#define DISCONNECT          0x04
+#define INITIATOR_ERROR     0x05
+#define ABORT               0x06
+#define MESSAGE_REJECT      0x07
+#define NOP                 0x08
+#define MSG_PARITY_ERROR    0x09
+#define LINKED_CMD_COMPLETE 0x0a
+#define LINKED_FLG_CMD_COMPLETE 0x0b
+#define BUS_DEVICE_RESET    0x0c
+
+#define INITIATE_RECOVERY   0x0f            /* SCSI-II only */
+#define RELEASE_RECOVERY    0x10            /* SCSI-II only */
+
+#define SIMPLE_QUEUE_TAG    0x20
+#define HEAD_OF_QUEUE_TAG   0x21
+#define ORDERED_QUEUE_TAG   0x22
+
+/*
+ * Here are some scsi specific ioctl commands which are sometimes useful.
+ */
+/* These are a few other constants  only used by scsi  devices */
+/* Note that include/linux/cdrom.h also defines IOCTL 0x5300 - 0x5395 */
+
+#define SCSI_IOCTL_GET_IDLUN 0x5382    /* conflicts with CDROMAUDIOBUFSIZ */
+
+/* Used to turn on and off tagged queuing for scsi devices */
+
+#define SCSI_IOCTL_TAGGED_ENABLE 0x5383
+#define SCSI_IOCTL_TAGGED_DISABLE 0x5384
+
+/* Used to obtain the host number of a device. */
+#define SCSI_IOCTL_PROBE_HOST 0x5385
+
+/* Used to get the bus number for a device */
+#define SCSI_IOCTL_GET_BUS_NUMBER 0x5386
+
+/* Used to get the PCI location of a device */
+#define SCSI_IOCTL_GET_PCI 0x5387
+
+/*
+ * Overrides for Emacs so that we follow Linus's tabbing style.
+ * Emacs will notice this stuff at the end of the file and automatically
+ * adjust the settings for this buffer only.  This must remain at the end
+ * of the file.
+ * ---------------------------------------------------------------------------
+ * Local variables:
+ * c-indent-level: 4 
+ * c-brace-imaginary-offset: 0
+ * c-brace-offset: -4
+ * c-argdecl-indent: 4
+ * c-label-offset: -4
+ * c-continued-statement-offset: 4
+ * c-continued-brace-offset: 0
+ * indent-tabs-mode: nil
+ * tab-width: 8
+ * End:
+ */
+
+#endif
diff --git a/xen-2.4.16/include/scsi/scsi_ioctl.h b/xen-2.4.16/include/scsi/scsi_ioctl.h
new file mode 100644 (file)
index 0000000..937cadf
--- /dev/null
@@ -0,0 +1,51 @@
+#ifndef _SCSI_IOCTL_H
+#define _SCSI_IOCTL_H 
+
+#define SCSI_IOCTL_SEND_COMMAND 1
+#define SCSI_IOCTL_TEST_UNIT_READY 2
+#define SCSI_IOCTL_BENCHMARK_COMMAND 3
+#define SCSI_IOCTL_SYNC 4                      /* Request synchronous parameters */
+#define SCSI_IOCTL_START_UNIT 5
+#define SCSI_IOCTL_STOP_UNIT 6
+/* The door lock/unlock constants are compatible with Sun constants for
+   the cdrom */
+#define SCSI_IOCTL_DOORLOCK 0x5380             /* lock the eject mechanism */
+#define SCSI_IOCTL_DOORUNLOCK 0x5381           /* unlock the mechanism   */
+
+#define        SCSI_REMOVAL_PREVENT    1
+#define        SCSI_REMOVAL_ALLOW      0
+
+#ifdef __KERNEL__
+
+/*
+ * Structures used for scsi_ioctl et al.
+ */
+
+typedef struct scsi_ioctl_command {
+       unsigned int inlen;
+       unsigned int outlen;
+       unsigned char data[0];
+} Scsi_Ioctl_Command;
+
+typedef struct scsi_idlun {
+       __u32 dev_id;
+       __u32 host_unique_id;
+} Scsi_Idlun;
+
+/* Fibre Channel WWN, port_id struct */
+typedef struct scsi_fctargaddress
+{
+       __u32 host_port_id;
+       unsigned char host_wwn[8]; // include NULL term.
+} Scsi_FCTargAddress;
+
+extern int scsi_ioctl (Scsi_Device *dev, int cmd, void *arg);
+extern int kernel_scsi_ioctl (Scsi_Device *dev, int cmd, void *arg);
+extern int scsi_ioctl_send_command(Scsi_Device *dev,
+                                  Scsi_Ioctl_Command *arg);
+
+#endif
+
+#endif
+
+
diff --git a/xen-2.4.16/include/scsi/scsicam.h b/xen-2.4.16/include/scsi/scsicam.h
new file mode 100644 (file)
index 0000000..13e9378
--- /dev/null
@@ -0,0 +1,19 @@
+/*
+ * scsicam.h - SCSI CAM support functions, use for HDIO_GETGEO, etc.
+ *
+ * Copyright 1993, 1994 Drew Eckhardt
+ *      Visionary Computing 
+ *      (Unix and Linux consulting and custom programming)
+ *      drew@Colorado.EDU
+ *     +1 (303) 786-7975
+ *
+ * For more information, please consult the SCSI-CAM draft.
+ */
+
+#ifndef SCSICAM_H
+#define SCSICAM_H
+#include <xeno/kdev_t.h>
+extern int scsicam_bios_param (Disk *disk, kdev_t dev, int *ip);
+extern int scsi_partsize(struct buffer_head *bh, unsigned long capacity,
+           unsigned int  *cyls, unsigned int *hds, unsigned int *secs);
+#endif /* def SCSICAM_H */
diff --git a/xen-2.4.16/include/scsi/sg.h b/xen-2.4.16/include/scsi/sg.h
new file mode 100644 (file)
index 0000000..ccb47c8
--- /dev/null
@@ -0,0 +1,330 @@
+#ifndef _SCSI_GENERIC_H
+#define _SCSI_GENERIC_H
+
+/*
+   History:
+    Started: Aug 9 by Lawrence Foard (entropy@world.std.com), to allow user
+     process control of SCSI devices.
+    Development Sponsored by Killy Corp. NY NY
+Original driver (sg.h):
+*       Copyright (C) 1992 Lawrence Foard
+Version 2 and 3 extensions to driver:
+*       Copyright (C) 1998 - 2002 Douglas Gilbert
+
+    Version: 3.1.23 (20020318)
+    This version is for 2.4 series kernels.
+
+    Changes since 3.1.22 (20011208)
+       - change EACCES to EPERM when O_RDONLY is insufficient
+       - suppress newlines in host string ( /proc/scsi/sg/host_strs output)
+       - fix xfer direction, old interface, short reply_len [Travers Carter]
+    Changes since 3.1.21 (20011029)
+       - add support for SG_FLAG_MMAP_IO [permit mmap() on sg devices]
+       - update documentation pointers in this header
+       - put KERNEL_VERSION macros around code that breaks early 2.4 series
+       - fix use count for multiple queued requests on closed fd
+       - switch back to alloc_kiovec()
+    Changes since 3.1.20 (20010814)
+       - use alloc_kiovec_sz() to speed dio [set num_buffer_heads==0]
+       - changes to cope with larger scatter gather element sizes
+       - clean up some printk()s
+       - add MODULE_LICENSE("GPL") [in a 3.1.20 subversion]
+       - fix race around generic_unplug_device() [in a 3.1.20 subversion]
+    Changes since 3.1.19 (20010623)
+       - add SG_GET_ACCESS_COUNT ioctl 
+       - make open() increment and close() decrement access_count
+       - only register first 256 devices, reject subsequent devices
+    Changes since 3.1.18 (20010505)
+       - fix bug that caused long wait when large buffer requested
+       - fix leak in error case of sg_new_read() [report: Eric Barton]
+       - add 'online' column to /proc/scsi/sg/devices
+    Changes since 3.1.17 (20000921)
+       - add CAP_SYS_RAWIO capability for sensitive stuff
+       - compile in dio stuff, procfs 'allow_dio' defaulted off (0)
+       - make premature close and detach more robust
+       - lun masked into commands <= SCSI_2
+       - poll() and async notification now yield POLL_HUP on detach
+       - various 3rd party tweaks tracking lk 2.4 internal changes
+
+Map of SG verions to the Linux kernels in which they appear:
+       ----------        ----------------------------------
+       original          all kernels < 2.2.6
+       2.1.40            2.2.20
+       3.0.x             optional version 3 sg driver for 2.2 series
+       3.1.17++          2.4.0++
+
+Major new features in SG 3.x driver (cf SG 2.x drivers)
+       - SG_IO ioctl() combines function if write() and read()
+       - new interface (sg_io_hdr_t) but still supports old interface
+       - scatter/gather in user space, direct IO, and mmap supported
+
+ The normal action of this driver is to use the adapter (HBA) driver to DMA
+ data into kernel buffers and then use the CPU to copy the data into the 
+ user space (vice versa for writes). That is called "indirect" IO due to 
+ the double handling of data. There are two methods offered to remove the
+ redundant copy: 1) direct IO which uses the kernel kiobuf mechanism and 
+ 2) using the mmap() system call to map the reserve buffer (this driver has 
+ one reserve buffer per fd) into the user space. Both have their advantages.
+ In terms of absolute speed mmap() is faster. If speed is not a concern, 
+ indirect IO should be fine. Read the documentation for more information.
+
+ ** N.B. To use direct IO 'echo 1 > /proc/scsi/sg/allow_dio' may be
+         needed. That pseudo file's content is defaulted to 0. **
+ Historical note: this SCSI pass-through driver has been known as "sg" for 
+ a decade. In broader kernel discussions "sg" is used to refer to scatter
+ gather techniques. The context should clarify which "sg" is referred to.
+
+ Documentation
+ =============
+ A web site for the SG device driver can be found at:
+       http://www.torque.net/sg  [alternatively check the MAINTAINERS file]
+ The documentation for the sg version 3 driver can be found at:
+       http://www.torque.net/sg/p/sg_v3_ho.html
+ This is a rendering from DocBook source [change the extension to "sgml"
+ or "xml"]. There are renderings in "ps", "pdf", "rtf" and "txt" (soon).
+
+ The older, version 2 documents discuss the original sg interface in detail:
+       http://www.torque.net/sg/p/scsi-generic.txt
+       http://www.torque.net/sg/p/scsi-generic_long.txt
+ A version of this document (potentially out of date) may also be found in
+ the kernel source tree, probably at:
+        /usr/src/linux/Documentation/scsi-generic.txt .
+
+ Utility and test programs are available at the sg web site. They are 
+ bundled as sg_utils (for the lk 2.2 series) and sg3_utils (for the
+ lk 2.4 series).
+
+ There is a HOWTO on the Linux SCSI subsystem in the lk 2.4 series at:
+       http://www.linuxdoc.org/HOWTO/SCSI-2.4-HOWTO
+*/
+
+
+/* New interface introduced in the 3.x SG drivers follows */
+
+typedef struct sg_iovec /* same structure as used by readv() Linux system */
+{                       /* call. It defines one scatter-gather element. */
+    void * iov_base;            /* Starting address  */
+    size_t iov_len;             /* Length in bytes  */
+} sg_iovec_t;
+
+
+typedef struct sg_io_hdr
+{
+    int interface_id;           /* [i] 'S' for SCSI generic (required) */
+    int dxfer_direction;        /* [i] data transfer direction  */
+    unsigned char cmd_len;      /* [i] SCSI command length ( <= 16 bytes) */
+    unsigned char mx_sb_len;    /* [i] max length to write to sbp */
+    unsigned short iovec_count; /* [i] 0 implies no scatter gather */
+    unsigned int dxfer_len;     /* [i] byte count of data transfer */
+    void * dxferp;              /* [i], [*io] points to data transfer memory
+                                             or scatter gather list */
+    unsigned char * cmdp;       /* [i], [*i] points to command to perform */
+    unsigned char * sbp;        /* [i], [*o] points to sense_buffer memory */
+    unsigned int timeout;       /* [i] MAX_UINT->no timeout (unit: millisec) */
+    unsigned int flags;         /* [i] 0 -> default, see SG_FLAG... */
+    int pack_id;                /* [i->o] unused internally (normally) */
+    void * usr_ptr;             /* [i->o] unused internally */
+    unsigned char status;       /* [o] scsi status */
+    unsigned char masked_status;/* [o] shifted, masked scsi status */
+    unsigned char msg_status;   /* [o] messaging level data (optional) */
+    unsigned char sb_len_wr;    /* [o] byte count actually written to sbp */
+    unsigned short host_status; /* [o] errors from host adapter */
+    unsigned short driver_status;/* [o] errors from software driver */
+    int resid;                  /* [o] dxfer_len - actual_transferred */
+    unsigned int duration;      /* [o] time taken by cmd (unit: millisec) */
+    unsigned int info;          /* [o] auxiliary information */
+} sg_io_hdr_t;  /* 64 bytes long (on i386) */
+
+/* Use negative values to flag difference from original sg_header structure */
+#define SG_DXFER_NONE (-1)      /* e.g. a SCSI Test Unit Ready command */
+#define SG_DXFER_TO_DEV (-2)    /* e.g. a SCSI WRITE command */
+#define SG_DXFER_FROM_DEV (-3)  /* e.g. a SCSI READ command */
+#define SG_DXFER_TO_FROM_DEV (-4) /* treated like SG_DXFER_FROM_DEV with the
+                                  additional property than during indirect
+                                  IO the user buffer is copied into the
+                                  kernel buffers before the transfer */
+#define SG_DXFER_UNKNOWN (-5)   /* Unknown data direction */
+
+/* following flag values can be "or"-ed together */
+#define SG_FLAG_DIRECT_IO 1     /* default is indirect IO */
+#define SG_FLAG_LUN_INHIBIT 2   /* default is overwrite lun in SCSI */
+                               /* command block (when <= SCSI_2) */
+#define SG_FLAG_MMAP_IO 4       /* request memory mapped IO */
+#define SG_FLAG_NO_DXFER 0x10000 /* no transfer of kernel buffers to/from */
+                               /* user space (debug indirect IO) */
+
+/* following 'info' values are "or"-ed together */
+#define SG_INFO_OK_MASK 0x1
+#define SG_INFO_OK 0x0          /* no sense, host nor driver "noise" */
+#define SG_INFO_CHECK 0x1       /* something abnormal happened */
+
+#define SG_INFO_DIRECT_IO_MASK 0x6
+#define SG_INFO_INDIRECT_IO 0x0 /* data xfer via kernel buffers (or no xfer) */
+#define SG_INFO_DIRECT_IO 0x2   /* direct IO requested and performed */
+#define SG_INFO_MIXED_IO 0x4    /* part direct, part indirect IO */
+
+
+typedef struct sg_scsi_id { /* used by SG_GET_SCSI_ID ioctl() */
+    int host_no;        /* as in "scsi<n>" where 'n' is one of 0, 1, 2 etc */
+    int channel;
+    int scsi_id;        /* scsi id of target device */
+    int lun;
+    int scsi_type;      /* TYPE_... defined in scsi/scsi.h */
+    short h_cmd_per_lun;/* host (adapter) maximum commands per lun */
+    short d_queue_depth;/* device (or adapter) maximum queue length */
+    int unused[2];      /* probably find a good use, set 0 for now */
+} sg_scsi_id_t; /* 32 bytes long on i386 */
+
+typedef struct sg_req_info { /* used by SG_GET_REQUEST_TABLE ioctl() */
+    char req_state;     /* 0 -> not used, 1 -> written, 2 -> ready to read */
+    char orphan;        /* 0 -> normal request, 1 -> from interruped SG_IO */
+    char sg_io_owned;   /* 0 -> complete with read(), 1 -> owned by SG_IO */
+    char problem;       /* 0 -> no problem detected, 1 -> error to report */
+    int pack_id;        /* pack_id associated with request */
+    void * usr_ptr;     /* user provided pointer (in new interface) */
+    unsigned int duration; /* millisecs elapsed since written (req_state==1)
+                             or request duration (req_state==2) */
+    int unused;
+} sg_req_info_t; /* 20 bytes long on i386 */
+
+
+/* IOCTLs: Those ioctls that are relevant to the SG 3.x drivers follow.
+ [Those that only apply to the SG 2.x drivers are at the end of the file.]
+ (_GET_s yield result via 'int *' 3rd argument unless otherwise indicated) */
+
+#define SG_EMULATED_HOST 0x2203 /* true for emulated host adapter (ATAPI) */
+
+/* Used to configure SCSI command transformation layer for ATAPI devices */
+/* Only supported by the ide-scsi driver */
+#define SG_SET_TRANSFORM 0x2204 /* N.B. 3rd arg is not pointer but value: */
+                     /* 3rd arg = 0 to disable transform, 1 to enable it */
+#define SG_GET_TRANSFORM 0x2205
+
+#define SG_SET_RESERVED_SIZE 0x2275  /* request a new reserved buffer size */
+#define SG_GET_RESERVED_SIZE 0x2272  /* actual size of reserved buffer */
+
+/* The following ioctl has a 'sg_scsi_id_t *' object as its 3rd argument. */
+#define SG_GET_SCSI_ID 0x2276   /* Yields fd's bus, chan, dev, lun + type */
+/* SCSI id information can also be obtained from SCSI_IOCTL_GET_IDLUN */
+
+/* Override host setting and always DMA using low memory ( <16MB on i386) */
+#define SG_SET_FORCE_LOW_DMA 0x2279  /* 0-> use adapter setting, 1-> force */
+#define SG_GET_LOW_DMA 0x227a   /* 0-> use all ram for dma; 1-> low dma ram */
+
+/* When SG_SET_FORCE_PACK_ID set to 1, pack_id is input to read() which
+   tries to fetch a packet with a matching pack_id, waits, or returns EAGAIN.
+   If pack_id is -1 then read oldest waiting. When ...FORCE_PACK_ID set to 0
+   then pack_id ignored by read() and oldest readable fetched. */
+#define SG_SET_FORCE_PACK_ID 0x227b
+#define SG_GET_PACK_ID 0x227c /* Yields oldest readable pack_id (or -1) */
+
+#define SG_GET_NUM_WAITING 0x227d /* Number of commands awaiting read() */
+
+/* Yields max scatter gather tablesize allowed by current host adapter */
+#define SG_GET_SG_TABLESIZE 0x227F  /* 0 implies can't do scatter gather */
+
+#define SG_GET_VERSION_NUM 0x2282 /* Example: version 2.1.34 yields 20134 */
+
+/* Returns -EBUSY if occupied. 3rd argument pointer to int (see next) */
+#define SG_SCSI_RESET 0x2284
+/* Associated values that can be given to SG_SCSI_RESET follow */
+#define                SG_SCSI_RESET_NOTHING   0
+#define                SG_SCSI_RESET_DEVICE    1
+#define                SG_SCSI_RESET_BUS       2
+#define                SG_SCSI_RESET_HOST      3
+
+/* synchronous SCSI command ioctl, (only in version 3 interface) */
+#define SG_IO 0x2285   /* similar effect as write() followed by read() */
+
+#define SG_GET_REQUEST_TABLE 0x2286   /* yields table of active requests */
+
+/* How to treat EINTR during SG_IO ioctl(), only in SG 3.x series */
+#define SG_SET_KEEP_ORPHAN 0x2287 /* 1 -> hold for read(), 0 -> drop (def) */
+#define SG_GET_KEEP_ORPHAN 0x2288
+
+/* yields scsi midlevel's access_count for this SCSI device */
+#define SG_GET_ACCESS_COUNT 0x2289  
+
+
+#define SG_SCATTER_SZ (8 * 4096)  /* PAGE_SIZE not available to user */
+/* Largest size (in bytes) a single scatter-gather list element can have.
+   The value must be a power of 2 and <= (PAGE_SIZE * 32) [131072 bytes on
+   i386]. The minimum value is PAGE_SIZE. If scatter-gather not supported
+   by adapter then this value is the largest data block that can be
+   read/written by a single scsi command. The user can find the value of
+   PAGE_SIZE by calling getpagesize() defined in unistd.h . */
+
+#define SG_DEFAULT_RETRIES 1
+
+/* Defaults, commented if they differ from original sg driver */
+#define SG_DEF_FORCE_LOW_DMA 0  /* was 1 -> memory below 16MB on i386 */
+#define SG_DEF_FORCE_PACK_ID 0
+#define SG_DEF_KEEP_ORPHAN 0
+#define SG_DEF_RESERVED_SIZE SG_SCATTER_SZ /* load time option */
+
+/* maximum outstanding requests, write() yields EDOM if exceeded */
+#define SG_MAX_QUEUE 16
+
+#define SG_BIG_BUFF SG_DEF_RESERVED_SIZE    /* for backward compatibility */
+
+/* Alternate style type names, "..._t" variants preferred */
+typedef struct sg_io_hdr Sg_io_hdr;
+typedef struct sg_io_vec Sg_io_vec;
+typedef struct sg_scsi_id Sg_scsi_id;
+typedef struct sg_req_info Sg_req_info;
+
+
+/* vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv */
+/*   The older SG interface based on the 'sg_header' structure follows.   */
+/* ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ */
+
+#define SG_MAX_SENSE 16   /* this only applies to the sg_header interface */
+
+struct sg_header
+{
+    int pack_len;    /* [o] reply_len (ie useless), ignored as input */
+    int reply_len;   /* [i] max length of expected reply (inc. sg_header) */
+    int pack_id;     /* [io] id number of packet (use ints >= 0) */
+    int result;      /* [o] 0==ok, else (+ve) Unix errno (best ignored) */
+    unsigned int twelve_byte:1;
+       /* [i] Force 12 byte command length for group 6 & 7 commands  */
+    unsigned int target_status:5;   /* [o] scsi status from target */
+    unsigned int host_status:8;     /* [o] host status (see "DID" codes) */
+    unsigned int driver_status:8;   /* [o] driver status+suggestion */
+    unsigned int other_flags:10;    /* unused */
+    unsigned char sense_buffer[SG_MAX_SENSE]; /* [o] Output in 3 cases:
+          when target_status is CHECK_CONDITION or
+          when target_status is COMMAND_TERMINATED or
+          when (driver_status & DRIVER_SENSE) is true. */
+};      /* This structure is 36 bytes long on i386 */
+
+
+/* IOCTLs: The following are not required (or ignored) when the sg_io_hdr_t
+          interface is used. They are kept for backward compatibility with
+          the original and version 2 drivers. */
+
+#define SG_SET_TIMEOUT 0x2201  /* unit: jiffies (10ms on i386) */
+#define SG_GET_TIMEOUT 0x2202  /* yield timeout as _return_ value */
+
+/* Get/set command queuing state per fd (default is SG_DEF_COMMAND_Q.
+   Each time a sg_io_hdr_t object is seen on this file descriptor, this
+   command queuing flag is set on (overriding the previous setting). */
+#define SG_GET_COMMAND_Q 0x2270   /* Yields 0 (queuing off) or 1 (on) */
+#define SG_SET_COMMAND_Q 0x2271   /* Change queuing state with 0 or 1 */
+
+/* Turn on/off error sense trace (1 and 0 respectively, default is off).
+   Try using: "# cat /proc/scsi/sg/debug" instead in the v3 driver */
+#define SG_SET_DEBUG 0x227e    /* 0 -> turn off debug */
+
+#define SG_NEXT_CMD_LEN 0x2283  /* override SCSI command length with given
+                  number on the next write() on this file descriptor */
+
+
+/* Defaults, commented if they differ from original sg driver */
+#define SG_DEFAULT_TIMEOUT (60*HZ) /* HZ == 'jiffies in 1 second' */
+#define SG_DEF_COMMAND_Q 0     /* command queuing is always on when
+                                 the new interface is used */
+#define SG_DEF_UNDERRUN_FLAG 0
+
+#endif
diff --git a/xen-2.4.16/include/xeno/blk.h b/xen-2.4.16/include/xeno/blk.h
new file mode 100644 (file)
index 0000000..bc3f554
--- /dev/null
@@ -0,0 +1,409 @@
+#ifndef _BLK_H
+#define _BLK_H
+
+#include <xeno/blkdev.h>
+/*#include <xeno/locks.h>*/
+#include <xeno/config.h>
+#include <xeno/spinlock.h>
+
+/*
+ * Spinlock for protecting the request queue which
+ * is mucked around with in interrupts on potentially
+ * multiple CPU's..
+ */
+extern spinlock_t io_request_lock;
+
+/*
+ * Initialization functions.
+ */
+extern int isp16_init(void);
+extern int cdu31a_init(void);
+extern int acsi_init(void);
+extern int mcd_init(void);
+extern int mcdx_init(void);
+extern int sbpcd_init(void);
+extern int aztcd_init(void);
+extern int sony535_init(void);
+extern int gscd_init(void);
+extern int cm206_init(void);
+extern int optcd_init(void);
+extern int sjcd_init(void);
+extern int cdi_init(void);
+extern int hd_init(void);
+extern int ide_init(void);
+extern int xd_init(void);
+extern int mfm_init(void);
+extern int loop_init(void);
+extern int md_init(void);
+extern int ap_init(void);
+extern int ddv_init(void);
+extern int z2_init(void);
+extern int swim3_init(void);
+extern int swimiop_init(void);
+extern int amiga_floppy_init(void);
+extern int atari_floppy_init(void);
+extern int ez_init(void);
+extern int bpcd_init(void);
+extern int ps2esdi_init(void);
+extern int jsfd_init(void);
+extern int viodasd_init(void);
+extern int viocd_init(void);
+
+#if defined(CONFIG_ARCH_S390)
+extern int dasd_init(void);
+extern int xpram_init(void);
+extern int tapeblock_init(void);
+#endif /* CONFIG_ARCH_S390 */
+
+extern void set_device_ro(kdev_t dev,int flag);
+#if 0
+void add_blkdev_randomness(int major);
+#else
+#define add_blkdev_randomness(_major) ((void)0)
+#endif
+
+extern int floppy_init(void);
+extern int rd_doload;          /* 1 = load ramdisk, 0 = don't load */
+extern int rd_prompt;          /* 1 = prompt for ramdisk, 0 = don't prompt */
+extern int rd_image_start;     /* starting block # of image */
+
+#ifdef CONFIG_BLK_DEV_INITRD
+
+#define INITRD_MINOR 250 /* shouldn't collide with /dev/ram* too soon ... */
+
+extern unsigned long initrd_start,initrd_end;
+extern int initrd_below_start_ok; /* 1 if it is not an error if initrd_start < memory_start */
+void initrd_init(void);
+
+#endif
+
+                
+/*
+ * end_request() and friends. Must be called with the request queue spinlock
+ * acquired. All functions called within end_request() _must_be_ atomic.
+ *
+ * Several drivers define their own end_request and call
+ * end_that_request_first() and end_that_request_last()
+ * for parts of the original function. This prevents
+ * code duplication in drivers.
+ */
+
+static inline void blkdev_dequeue_request(struct request * req)
+{
+       list_del(&req->queue);
+}
+
+int end_that_request_first(struct request *req, int uptodate, char *name);
+void end_that_request_last(struct request *req);
+
+#if defined(MAJOR_NR) || defined(IDE_DRIVER)
+
+#undef DEVICE_ON
+#undef DEVICE_OFF
+
+/*
+ * Add entries as needed.
+ */
+
+#ifdef IDE_DRIVER
+
+#define DEVICE_NR(device)      (MINOR(device) >> PARTN_BITS)
+#define DEVICE_NAME "ide"
+
+#elif (MAJOR_NR == RAMDISK_MAJOR)
+
+/* ram disk */
+#define DEVICE_NAME "ramdisk"
+#define DEVICE_NR(device) (MINOR(device))
+#define DEVICE_NO_RANDOM
+
+#elif (MAJOR_NR == Z2RAM_MAJOR)
+
+/* Zorro II Ram */
+#define DEVICE_NAME "Z2RAM"
+#define DEVICE_REQUEST do_z2_request
+#define DEVICE_NR(device) (MINOR(device))
+
+#elif (MAJOR_NR == FLOPPY_MAJOR)
+
+static void floppy_off(unsigned int nr);
+
+#define DEVICE_NAME "floppy"
+#define DEVICE_INTR do_floppy
+#define DEVICE_REQUEST do_fd_request
+#define DEVICE_NR(device) ( (MINOR(device) & 3) | ((MINOR(device) & 0x80 ) >> 5 ))
+#define DEVICE_OFF(device) floppy_off(DEVICE_NR(device))
+
+#elif (MAJOR_NR == HD_MAJOR)
+
+/* Hard disk:  timeout is 6 seconds. */
+#define DEVICE_NAME "hard disk"
+#define DEVICE_INTR do_hd
+#define TIMEOUT_VALUE (6*HZ)
+#define DEVICE_REQUEST do_hd_request
+#define DEVICE_NR(device) (MINOR(device)>>6)
+
+#elif (SCSI_DISK_MAJOR(MAJOR_NR))
+
+#define DEVICE_NAME "scsidisk"
+#define TIMEOUT_VALUE (2*HZ)
+#define DEVICE_NR(device) (((MAJOR(device) & SD_MAJOR_MASK) << (8 - 4)) + (MINOR(device) >> 4))
+
+/* Kludge to use the same number for both char and block major numbers */
+#elif  (MAJOR_NR == MD_MAJOR) && defined(MD_DRIVER)
+
+#define DEVICE_NAME "Multiple devices driver"
+#define DEVICE_REQUEST do_md_request
+#define DEVICE_NR(device) (MINOR(device))
+
+#elif (MAJOR_NR == SCSI_TAPE_MAJOR)
+
+#define DEVICE_NAME "scsitape"
+#define DEVICE_INTR do_st  
+#define DEVICE_NR(device) (MINOR(device) & 0x7f)
+
+#elif (MAJOR_NR == OSST_MAJOR)
+
+#define DEVICE_NAME "onstream" 
+#define DEVICE_INTR do_osst
+#define DEVICE_NR(device) (MINOR(device) & 0x7f) 
+#define DEVICE_ON(device) 
+#define DEVICE_OFF(device) 
+
+#elif (MAJOR_NR == SCSI_CDROM_MAJOR)
+
+#define DEVICE_NAME "CD-ROM"
+#define DEVICE_NR(device) (MINOR(device))
+
+#elif (MAJOR_NR == XT_DISK_MAJOR)
+
+#define DEVICE_NAME "xt disk"
+#define DEVICE_REQUEST do_xd_request
+#define DEVICE_NR(device) (MINOR(device) >> 6)
+
+#elif (MAJOR_NR == PS2ESDI_MAJOR)
+
+#define DEVICE_NAME "PS/2 ESDI"
+#define DEVICE_REQUEST do_ps2esdi_request
+#define DEVICE_NR(device) (MINOR(device) >> 6)
+
+#elif (MAJOR_NR == CDU31A_CDROM_MAJOR)
+
+#define DEVICE_NAME "CDU31A"
+#define DEVICE_REQUEST do_cdu31a_request
+#define DEVICE_NR(device) (MINOR(device))
+
+#elif (MAJOR_NR == ACSI_MAJOR) && (defined(CONFIG_ATARI_ACSI) || defined(CONFIG_ATARI_ACSI_MODULE))
+
+#define DEVICE_NAME "ACSI"
+#define DEVICE_INTR do_acsi
+#define DEVICE_REQUEST do_acsi_request
+#define DEVICE_NR(device) (MINOR(device) >> 4)
+
+#elif (MAJOR_NR == MITSUMI_CDROM_MAJOR)
+
+#define DEVICE_NAME "Mitsumi CD-ROM"
+/* #define DEVICE_INTR do_mcd */
+#define DEVICE_REQUEST do_mcd_request
+#define DEVICE_NR(device) (MINOR(device))
+
+#elif (MAJOR_NR == MITSUMI_X_CDROM_MAJOR)
+
+#define DEVICE_NAME "Mitsumi CD-ROM"
+/* #define DEVICE_INTR do_mcdx */
+#define DEVICE_REQUEST do_mcdx_request
+#define DEVICE_NR(device) (MINOR(device))
+
+#elif (MAJOR_NR == MATSUSHITA_CDROM_MAJOR)
+
+#define DEVICE_NAME "Matsushita CD-ROM controller #1"
+#define DEVICE_REQUEST do_sbpcd_request
+#define DEVICE_NR(device) (MINOR(device))
+
+#elif (MAJOR_NR == MATSUSHITA_CDROM2_MAJOR)
+
+#define DEVICE_NAME "Matsushita CD-ROM controller #2"
+#define DEVICE_REQUEST do_sbpcd2_request
+#define DEVICE_NR(device) (MINOR(device))
+
+#elif (MAJOR_NR == MATSUSHITA_CDROM3_MAJOR)
+
+#define DEVICE_NAME "Matsushita CD-ROM controller #3"
+#define DEVICE_REQUEST do_sbpcd3_request
+#define DEVICE_NR(device) (MINOR(device))
+
+#elif (MAJOR_NR == MATSUSHITA_CDROM4_MAJOR)
+
+#define DEVICE_NAME "Matsushita CD-ROM controller #4"
+#define DEVICE_REQUEST do_sbpcd4_request
+#define DEVICE_NR(device) (MINOR(device))
+
+#elif (MAJOR_NR == AZTECH_CDROM_MAJOR)
+
+#define DEVICE_NAME "Aztech CD-ROM"
+#define DEVICE_REQUEST do_aztcd_request
+#define DEVICE_NR(device) (MINOR(device))
+
+#elif (MAJOR_NR == CDU535_CDROM_MAJOR)
+
+#define DEVICE_NAME "SONY-CDU535"
+#define DEVICE_INTR do_cdu535
+#define DEVICE_REQUEST do_cdu535_request
+#define DEVICE_NR(device) (MINOR(device))
+
+#elif (MAJOR_NR == GOLDSTAR_CDROM_MAJOR)
+
+#define DEVICE_NAME "Goldstar R420"
+#define DEVICE_REQUEST do_gscd_request
+#define DEVICE_NR(device) (MINOR(device))
+
+#elif (MAJOR_NR == CM206_CDROM_MAJOR)
+#define DEVICE_NAME "Philips/LMS CD-ROM cm206"
+#define DEVICE_REQUEST do_cm206_request
+#define DEVICE_NR(device) (MINOR(device))
+
+#elif (MAJOR_NR == OPTICS_CDROM_MAJOR)
+
+#define DEVICE_NAME "DOLPHIN 8000AT CD-ROM"
+#define DEVICE_REQUEST do_optcd_request
+#define DEVICE_NR(device) (MINOR(device))
+
+#elif (MAJOR_NR == SANYO_CDROM_MAJOR)
+
+#define DEVICE_NAME "Sanyo H94A CD-ROM"
+#define DEVICE_REQUEST do_sjcd_request
+#define DEVICE_NR(device) (MINOR(device))
+
+#elif (MAJOR_NR == APBLOCK_MAJOR)
+
+#define DEVICE_NAME "apblock"
+#define DEVICE_REQUEST ap_request
+#define DEVICE_NR(device) (MINOR(device))
+
+#elif (MAJOR_NR == DDV_MAJOR)
+
+#define DEVICE_NAME "ddv"
+#define DEVICE_REQUEST ddv_request
+#define DEVICE_NR(device) (MINOR(device)>>PARTN_BITS)
+
+#elif (MAJOR_NR == MFM_ACORN_MAJOR)
+
+#define DEVICE_NAME "mfm disk"
+#define DEVICE_INTR do_mfm
+#define DEVICE_REQUEST do_mfm_request
+#define DEVICE_NR(device) (MINOR(device) >> 6)
+
+#elif (MAJOR_NR == NBD_MAJOR)
+
+#define DEVICE_NAME "nbd"
+#define DEVICE_REQUEST do_nbd_request
+#define DEVICE_NR(device) (MINOR(device))
+
+#elif (MAJOR_NR == MDISK_MAJOR)
+
+#define DEVICE_NAME "mdisk"
+#define DEVICE_REQUEST mdisk_request
+#define DEVICE_NR(device) (MINOR(device))
+
+#elif (MAJOR_NR == DASD_MAJOR)
+
+#define DEVICE_NAME "dasd"
+#define DEVICE_REQUEST do_dasd_request
+#define DEVICE_NR(device) (MINOR(device) >> PARTN_BITS)
+
+#elif (MAJOR_NR == I2O_MAJOR)
+
+#define DEVICE_NAME "I2O block"
+#define DEVICE_REQUEST i2ob_request
+#define DEVICE_NR(device) (MINOR(device)>>4)
+
+#elif (MAJOR_NR == COMPAQ_SMART2_MAJOR)
+
+#define DEVICE_NAME "ida"
+#define TIMEOUT_VALUE (25*HZ)
+#define DEVICE_REQUEST do_ida_request
+#define DEVICE_NR(device) (MINOR(device) >> 4)
+
+#endif /* MAJOR_NR == whatever */
+
+/* provide DEVICE_xxx defaults, if not explicitly defined
+ * above in the MAJOR_NR==xxx if-elif tree */
+#ifndef DEVICE_ON
+#define DEVICE_ON(device) do {} while (0)
+#endif
+#ifndef DEVICE_OFF
+#define DEVICE_OFF(device) do {} while (0)
+#endif
+
+#if (MAJOR_NR != SCSI_TAPE_MAJOR) && (MAJOR_NR != OSST_MAJOR)
+#if !defined(IDE_DRIVER)
+
+#ifndef CURRENT
+#define CURRENT blkdev_entry_next_request(&blk_dev[MAJOR_NR].request_queue.queue_head)
+#endif
+#ifndef QUEUE_EMPTY
+#define QUEUE_EMPTY list_empty(&blk_dev[MAJOR_NR].request_queue.queue_head)
+#endif
+
+#ifndef DEVICE_NAME
+#define DEVICE_NAME "unknown"
+#endif
+
+#define CURRENT_DEV DEVICE_NR(CURRENT->rq_dev)
+
+#ifdef DEVICE_INTR
+static void (*DEVICE_INTR)(void) = NULL;
+#endif
+
+#define SET_INTR(x) (DEVICE_INTR = (x))
+
+#ifdef DEVICE_REQUEST
+static void (DEVICE_REQUEST)(request_queue_t *);
+#endif 
+  
+#ifdef DEVICE_INTR
+#define CLEAR_INTR SET_INTR(NULL)
+#else
+#define CLEAR_INTR
+#endif
+
+#define INIT_REQUEST \
+       if (QUEUE_EMPTY) {\
+               CLEAR_INTR; \
+               return; \
+       } \
+       if (MAJOR(CURRENT->rq_dev) != MAJOR_NR) \
+               panic(DEVICE_NAME ": request list destroyed"); \
+       if (CURRENT->bh) { \
+               if (!buffer_locked(CURRENT->bh)) \
+                       panic(DEVICE_NAME ": block not locked"); \
+       }
+
+#endif /* !defined(IDE_DRIVER) */
+
+
+#ifndef LOCAL_END_REQUEST      /* If we have our own end_request, we do not want to include this mess */
+
+#if ! SCSI_BLK_MAJOR(MAJOR_NR) && (MAJOR_NR != COMPAQ_SMART2_MAJOR)
+
+static inline void end_request(int uptodate) {
+       struct request *req = CURRENT;
+
+       if (end_that_request_first(req, uptodate, DEVICE_NAME))
+               return;
+
+#ifndef DEVICE_NO_RANDOM
+       add_blkdev_randomness(MAJOR(req->rq_dev));
+#endif
+       DEVICE_OFF(req->rq_dev);
+       blkdev_dequeue_request(req);
+       end_that_request_last(req);
+}
+
+#endif /* ! SCSI_BLK_MAJOR(MAJOR_NR) */
+#endif /* LOCAL_END_REQUEST */
+
+#endif /* (MAJOR_NR != SCSI_TAPE_MAJOR) */
+#endif /* defined(MAJOR_NR) || defined(IDE_DRIVER) */
+
+#endif /* _BLK_H */
diff --git a/xen-2.4.16/include/xeno/blkdev.h b/xen-2.4.16/include/xeno/blkdev.h
new file mode 100644 (file)
index 0000000..2701eb8
--- /dev/null
@@ -0,0 +1,375 @@
+#ifndef _LINUX_BLKDEV_H
+#define _LINUX_BLKDEV_H
+
+#include <xeno/lib.h>
+#include <asm/atomic.h>
+#include <asm/bitops.h>
+#include <xeno/list.h>
+#include <xeno/kdev_t.h>
+
+/* Some defines from fs.h that may actually be useful to the blkdev layer. */
+#define READ 0
+#define WRITE 1
+#define READA 2
+#define BLOCK_SIZE_BITS 10
+#define BLOCK_SIZE (1<<BLOCK_SIZE_BITS)
+
+extern int unregister_blkdev(unsigned int, const char *);
+extern int invalidate_device(kdev_t, int);
+extern int check_disk_change(kdev_t);
+struct block_device;
+extern void invalidate_bdev(struct block_device *, int);
+
+/*
+ * Metainformation regarding block devices is kept in inode and file
+ * structures. We don't actually want those so we define just as much 
+ * as we need right here.
+ */
+struct file {
+};
+struct inode {
+    kdev_t i_rdev; /* for _open and _release, specifies the blkdev */
+    struct block_device *i_bdev;
+};
+
+struct block_device_operations {
+        int (*open) (struct inode *, struct file *);
+        int (*release) (struct inode *, struct file *);
+        int (*ioctl) (struct inode *, struct file *, unsigned, unsigned long);
+        int (*check_media_change) (kdev_t);
+        int (*revalidate) (kdev_t);
+};
+
+
+/*** BUFFER_HEAD stuff: maybe this will die, or live on in reduced form */
+enum bh_state_bits {
+        BH_Uptodate,    /* 1 if the buffer contains valid data */
+        BH_Dirty,       /* 1 if the buffer is dirty */
+        BH_Lock,        /* 1 if the buffer is locked */
+        BH_Req,         /* 0 if the buffer has been invalidated */
+        BH_Mapped,      /* 1 if the buffer has a disk mapping */
+        BH_New,         /* 1 if the buffer is new and not yet written out */
+        BH_Async,       /* 1 if the buffer is under end_buffer_io_async I/O */
+        BH_Wait_IO,     /* 1 if we should write out this buffer */
+        BH_Launder,     /* 1 if we can throttle on this buffer */
+        BH_JBD,         /* 1 if it has an attached journal_head */
+
+        BH_PrivateStart,/* not a state bit, but the first bit available
+                         * for private allocation by other entities
+                         */
+};
+struct buffer_head {
+        struct buffer_head *b_next;     /* Hash queue list */
+        unsigned long b_blocknr;        /* block number */
+        unsigned short b_size;          /* block size */
+        unsigned short b_list;          /* List that this buffer appears */
+        kdev_t b_dev;                   /* device (B_FREE = free) */
+
+        atomic_t b_count;               /* users using this block */
+        kdev_t b_rdev;                  /* Real device */
+        unsigned long b_state;          /* buffer state bitmap (see above) */
+        unsigned long b_flushtime;      /* Time when (dirty) buffer should be written */
+
+        struct buffer_head *b_next_free;/* lru/free list linkage */
+        struct buffer_head *b_prev_free;/* doubly linked list of buffers */
+        struct buffer_head *b_this_page;/* circular list of buffers in one page */
+        struct buffer_head *b_reqnext;  /* request queue */
+
+        struct buffer_head **b_pprev;   /* doubly linked list of hash-queue */
+        char * b_data;                  /* pointer to data block */
+        struct pfn_info *b_page;            /* the page this bh is mapped to */
+        void (*b_end_io)(struct buffer_head *bh, int uptodate); /* I/O completion */
+        void *b_private;                /* reserved for b_end_io */
+
+        unsigned long b_rsector;        /* Real buffer location on disk */
+
+        struct inode *       b_inode;
+        struct list_head     b_inode_buffers;   /* doubly linked list of inode dirty buffers */
+};
+
+typedef void (bh_end_io_t)(struct buffer_head *bh, int uptodate);
+void init_buffer(struct buffer_head *, bh_end_io_t *, void *);
+
+#define __buffer_state(bh, state)       (((bh)->b_state & (1UL << BH_##state)) != 0)
+
+#define buffer_uptodate(bh)     __buffer_state(bh,Uptodate)
+#define buffer_dirty(bh)        __buffer_state(bh,Dirty)
+#define buffer_locked(bh)       __buffer_state(bh,Lock)
+#define buffer_req(bh)          __buffer_state(bh,Req)
+#define buffer_mapped(bh)       __buffer_state(bh,Mapped)
+#define buffer_new(bh)          __buffer_state(bh,New)
+#define buffer_async(bh)        __buffer_state(bh,Async)
+#define buffer_launder(bh)      __buffer_state(bh,Launder)
+
+#define bh_offset(bh)           ((unsigned long)(bh)->b_data & ~PAGE_MASK)
+
+extern void set_bh_page(struct buffer_head *bh, struct pfn_info *page, unsigned long offset);
+
+#define touch_buffer(bh)        mark_page_accessed(bh->b_page)
+
+#define atomic_set_buffer_clean(bh) test_and_clear_bit(BH_Dirty, &(bh)->b_state)
+
+static inline void __mark_buffer_clean(struct buffer_head *bh)
+{
+    panic("__mark_buffer_clean");
+}
+
+static inline void mark_buffer_clean(struct buffer_head * bh)
+{
+        if (atomic_set_buffer_clean(bh))
+                __mark_buffer_clean(bh);
+}
+
+static inline void buffer_IO_error(struct buffer_head * bh)
+{
+        mark_buffer_clean(bh);
+        /*
+         * b_end_io has to clear the BH_Uptodate bitflag in the error case!
+         */
+        bh->b_end_io(bh, 0);
+}
+
+/**** XXX END OF BUFFER_HEAD STUFF XXXX ****/
+
+#include <xeno/major.h>
+#include <xeno/sched.h>
+#include <xeno/genhd.h>
+#include <xeno/tqueue.h>
+#include <xeno/list.h>
+
+struct request_queue;
+typedef struct request_queue request_queue_t;
+struct elevator_s;
+typedef struct elevator_s elevator_t;
+
+/*
+ * Ok, this is an expanded form so that we can use the same
+ * request for paging requests.
+ */
+struct request {
+       struct list_head queue;
+       int elevator_sequence;
+
+       volatile int rq_status; /* should split this into a few status bits */
+#define RQ_INACTIVE            (-1)
+#define RQ_ACTIVE              1
+#define RQ_SCSI_BUSY           0xffff
+#define RQ_SCSI_DONE           0xfffe
+#define RQ_SCSI_DISCONNECTING  0xffe0
+
+       kdev_t rq_dev;
+       int cmd;                /* READ or WRITE */
+       int errors;
+       unsigned long start_time;
+       unsigned long sector;
+       unsigned long nr_sectors;
+       unsigned long hard_sector, hard_nr_sectors;
+       unsigned int nr_segments;
+       unsigned int nr_hw_segments;
+       unsigned long current_nr_sectors;
+       void * special;
+       char * buffer;
+       struct completion * waiting;
+       struct buffer_head * bh;
+       struct buffer_head * bhtail;
+       request_queue_t *q;
+};
+
+#include <xeno/elevator.h>
+
+typedef int (merge_request_fn) (request_queue_t *q, 
+                               struct request  *req,
+                               struct buffer_head *bh,
+                               int);
+typedef int (merge_requests_fn) (request_queue_t *q, 
+                                struct request  *req,
+                                struct request  *req2,
+                                int);
+typedef void (request_fn_proc) (request_queue_t *q);
+typedef request_queue_t * (queue_proc) (kdev_t dev);
+typedef int (make_request_fn) (request_queue_t *q, int rw, struct buffer_head *bh);
+typedef void (plug_device_fn) (request_queue_t *q, kdev_t device);
+typedef void (unplug_device_fn) (void *q);
+
+/*
+ * Default nr free requests per queue, ll_rw_blk will scale it down
+ * according to available RAM at init time
+ */
+#define QUEUE_NR_REQUESTS      8192
+
+struct request_list {
+       unsigned int count;
+       struct list_head free;
+};
+
+struct request_queue
+{
+       /*
+        * the queue request freelist, one for reads and one for writes
+        */
+       struct request_list     rq[2];
+
+       /*
+        * The total number of requests on each queue
+        */
+       int nr_requests;
+
+       /*
+        * Batching threshold for sleep/wakeup decisions
+        */
+       int batch_requests;
+
+       /*
+        * Together with queue_head for cacheline sharing
+        */
+       struct list_head        queue_head;
+       elevator_t              elevator;
+
+       request_fn_proc         * request_fn;
+       merge_request_fn        * back_merge_fn;
+       merge_request_fn        * front_merge_fn;
+       merge_requests_fn       * merge_requests_fn;
+       make_request_fn         * make_request_fn;
+       plug_device_fn          * plug_device_fn;
+       /*
+        * The queue owner gets to use this for whatever they like.
+        * ll_rw_blk doesn't touch it.
+        */
+       void                    * queuedata;
+
+       /*
+        * This is used to remove the plug when tq_disk runs.
+        */
+       struct tq_struct        plug_tq;
+
+       /*
+        * Boolean that indicates whether this queue is plugged or not.
+        */
+       char                    plugged;
+
+       /*
+        * Boolean that indicates whether current_request is active or
+        * not.
+        */
+       char                    head_active;
+
+       /*
+        * Is meant to protect the queue in the future instead of
+        * io_request_lock
+        */
+       spinlock_t              queue_lock;
+
+#if 0
+       /*
+        * Tasks wait here for free read and write requests
+        */
+       wait_queue_head_t       wait_for_requests[2];
+#endif
+};
+
+struct blk_dev_struct {
+       /*
+        * queue_proc has to be atomic
+        */
+       request_queue_t         request_queue;
+       queue_proc              *queue;
+       void                    *data;
+};
+
+struct sec_size {
+       unsigned block_size;
+       unsigned block_size_bits;
+};
+
+/*
+ * Used to indicate the default queue for drivers that don't bother
+ * to implement multiple queues.  We have this access macro here
+ * so as to eliminate the need for each and every block device
+ * driver to know about the internal structure of blk_dev[].
+ */
+#define BLK_DEFAULT_QUEUE(_MAJOR)  &blk_dev[_MAJOR].request_queue
+
+extern struct sec_size * blk_sec[MAX_BLKDEV];
+extern struct blk_dev_struct blk_dev[MAX_BLKDEV];
+extern void grok_partitions(struct gendisk *dev, int drive, unsigned minors, long size);
+extern void register_disk(struct gendisk *dev, kdev_t first, unsigned minors, struct block_device_operations *ops, long size);
+extern void generic_make_request(int rw, struct buffer_head * bh);
+extern inline request_queue_t *blk_get_queue(kdev_t dev);
+extern void blkdev_release_request(struct request *);
+
+/*
+ * Access functions for manipulating queue properties
+ */
+extern int blk_grow_request_list(request_queue_t *q, int nr_requests);
+extern void blk_init_queue(request_queue_t *, request_fn_proc *);
+extern void blk_cleanup_queue(request_queue_t *);
+extern void blk_queue_headactive(request_queue_t *, int);
+extern void blk_queue_make_request(request_queue_t *, make_request_fn *);
+extern void generic_unplug_device(void *);
+
+extern int * blk_size[MAX_BLKDEV];
+
+extern int * blksize_size[MAX_BLKDEV];
+
+extern int * hardsect_size[MAX_BLKDEV];
+
+/*extern int * max_readahead[MAX_BLKDEV];*/
+
+extern int * max_sectors[MAX_BLKDEV];
+
+extern int * max_segments[MAX_BLKDEV];
+
+#define MAX_SEGMENTS 128
+#define MAX_SECTORS 255
+
+#define PageAlignSize(size) (((size) + PAGE_SIZE -1) & PAGE_MASK)
+
+#define blkdev_entry_to_request(entry) list_entry((entry), struct request, queue)
+#define blkdev_entry_next_request(entry) blkdev_entry_to_request((entry)->next)
+#define blkdev_entry_prev_request(entry) blkdev_entry_to_request((entry)->prev)
+#define blkdev_next_request(req) blkdev_entry_to_request((req)->queue.next)
+#define blkdev_prev_request(req) blkdev_entry_to_request((req)->queue.prev)
+
+extern void drive_stat_acct (kdev_t dev, int rw,
+                                       unsigned long nr_sectors, int new_io);
+
+static inline int get_hardsect_size(kdev_t dev)
+{
+       int retval = 512;
+       int major = MAJOR(dev);
+
+       if (hardsect_size[major]) {
+               int minor = MINOR(dev);
+               if (hardsect_size[major][minor])
+                       retval = hardsect_size[major][minor];
+       }
+       return retval;
+}
+
+#define blk_finished_io(nsects)        do { } while (0)
+#define blk_started_io(nsects) do { } while (0)
+
+static inline unsigned int blksize_bits(unsigned int size)
+{
+       unsigned int bits = 8;
+       do {
+               bits++;
+               size >>= 1;
+       } while (size > 256);
+       return bits;
+}
+
+static inline unsigned int block_size(kdev_t dev)
+{
+       int retval = BLOCK_SIZE;
+       int major = MAJOR(dev);
+
+       if (blksize_size[major]) {
+               int minor = MINOR(dev);
+               if (blksize_size[major][minor])
+                       retval = blksize_size[major][minor];
+       }
+       return retval;
+}
+
+#endif
diff --git a/xen-2.4.16/include/xeno/blkpg.h b/xen-2.4.16/include/xeno/blkpg.h
new file mode 100644 (file)
index 0000000..f4240ab
--- /dev/null
@@ -0,0 +1,64 @@
+#ifndef _LINUX_BLKPG_H
+#define _LINUX_BLKPG_H
+
+/*
+ * Partition table and disk geometry handling
+ *
+ * A single ioctl with lots of subfunctions:
+ *
+ * Device number stuff:
+ *    get_whole_disk()         (given the device number of a partition,
+ *                               find the device number of the encompassing disk)
+ *    get_all_partitions()     (given the device number of a disk, return the
+ *                              device numbers of all its known partitions)
+ *
+ * Partition stuff:
+ *    add_partition()
+ *    delete_partition()
+ *    test_partition_in_use()  (also for test_disk_in_use)
+ *
+ * Geometry stuff:
+ *    get_geometry()
+ *    set_geometry()
+ *    get_bios_drivedata()
+ *
+ * For today, only the partition stuff - aeb, 990515
+ */
+#include <xeno/ioctl.h>
+
+#define BLKPG      _IO(0x12,105)
+
+/* The argument structure */
+struct blkpg_ioctl_arg {
+        int op;
+        int flags;
+        int datalen;
+        void *data;
+};
+
+/* The subfunctions (for the op field) */
+#define BLKPG_ADD_PARTITION    1
+#define BLKPG_DEL_PARTITION    2
+
+/* Sizes of name fields. Unused at present. */
+#define BLKPG_DEVNAMELTH       64
+#define BLKPG_VOLNAMELTH       64
+
+/* The data structure for ADD_PARTITION and DEL_PARTITION */
+struct blkpg_partition {
+       long long start;                /* starting offset in bytes */
+       long long length;               /* length in bytes */
+       int pno;                        /* partition number */
+       char devname[BLKPG_DEVNAMELTH]; /* partition name, like sda5 or c0d1p2,
+                                          to be used in kernel messages */
+       char volname[BLKPG_VOLNAMELTH]; /* volume label */
+};
+
+#ifdef __KERNEL__
+
+extern char * partition_name(kdev_t dev);
+extern int blk_ioctl(kdev_t dev, unsigned int cmd, unsigned long arg);
+
+#endif /* __KERNEL__ */
+
+#endif /* _LINUX_BLKPG_H */
diff --git a/xen-2.4.16/include/xeno/block.h b/xen-2.4.16/include/xeno/block.h
new file mode 100644 (file)
index 0000000..bb80d09
--- /dev/null
@@ -0,0 +1,11 @@
+/* block.h
+ * 
+ * this is the hypervisor end of the block io code. 
+ */
+
+#include <hypervisor-ifs/block.h>
+
+/* vif prototypes */
+blk_ring_t *create_block_ring(int domain);
+void destroy_block_ring(struct task_struct *p);
+
diff --git a/xen-2.4.16/include/xeno/bootmem.h b/xen-2.4.16/include/xeno/bootmem.h
new file mode 100644 (file)
index 0000000..5e07d1c
--- /dev/null
@@ -0,0 +1,64 @@
+/*
+ * Discontiguous memory support, Kanoj Sarcar, SGI, Nov 1999
+ */
+#ifndef _LINUX_BOOTMEM_H
+#define _LINUX_BOOTMEM_H
+
+//#include <asm/pgtable.h>
+
+/*
+ *  simple boot-time physical memory area allocator.
+ */
+
+extern unsigned long max_low_pfn, max_page;
+extern unsigned long min_low_pfn;
+
+#if 0
+
+/*
+ * node_bootmem_map is a map pointer - the bits represent all physical 
+ * memory pages (including holes) on the node.
+ */
+typedef struct bootmem_data {
+       unsigned long node_boot_start;
+       unsigned long node_low_pfn;
+       void *node_bootmem_map;
+       unsigned long last_offset;
+       unsigned long last_pos;
+} bootmem_data_t;
+
+extern unsigned long __init bootmem_bootmap_pages (unsigned long);
+extern unsigned long __init init_bootmem (unsigned long addr, unsigned long memend);
+extern void __init reserve_bootmem (unsigned long addr, unsigned long size);
+extern void __init free_bootmem (unsigned long addr, unsigned long size);
+extern void * __init __alloc_bootmem (unsigned long size, unsigned long align, unsigned long goal);
+#define alloc_bootmem(x) \
+       __alloc_bootmem((x), SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS))
+#define alloc_bootmem_low(x) \
+       __alloc_bootmem((x), SMP_CACHE_BYTES, 0)
+#define alloc_bootmem_pages(x) \
+       __alloc_bootmem((x), PAGE_SIZE, __pa(MAX_DMA_ADDRESS))
+#define alloc_bootmem_low_pages(x) \
+       __alloc_bootmem((x), PAGE_SIZE, 0)
+extern unsigned long __init free_all_bootmem (void);
+
+extern unsigned long __init init_bootmem_node (pg_data_t *pgdat, unsigned long freepfn, unsigned long startpfn, unsigned long endpfn);
+extern void __init reserve_bootmem_node (pg_data_t *pgdat, unsigned long physaddr, unsigned long size);
+extern void __init free_bootmem_node (pg_data_t *pgdat, unsigned long addr, unsigned long size);
+extern unsigned long __init free_all_bootmem_node (pg_data_t *pgdat);
+extern void * __init __alloc_bootmem_node (pg_data_t *pgdat, unsigned long size, unsigned long align, unsigned long goal);
+#define alloc_bootmem_node(pgdat, x) \
+       __alloc_bootmem_node((pgdat), (x), SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS))
+#define alloc_bootmem_pages_node(pgdat, x) \
+       __alloc_bootmem_node((pgdat), (x), PAGE_SIZE, __pa(MAX_DMA_ADDRESS))
+#define alloc_bootmem_low_pages_node(pgdat, x) \
+       __alloc_bootmem_node((pgdat), (x), PAGE_SIZE, 0)
+
+#else
+
+extern void __init init_bootmem (unsigned long addr, unsigned long memend);
+extern void * __init alloc_bootmem_low_pages(unsigned long size);
+
+#endif
+
+#endif /* _LINUX_BOOTMEM_H */
diff --git a/xen-2.4.16/include/xeno/brlock.h b/xen-2.4.16/include/xeno/brlock.h
new file mode 100644 (file)
index 0000000..208c457
--- /dev/null
@@ -0,0 +1,220 @@
+#ifndef __LINUX_BRLOCK_H
+#define __LINUX_BRLOCK_H
+
+/*
+ * 'Big Reader' read-write spinlocks.
+ *
+ * super-fast read/write locks, with write-side penalty. The point
+ * is to have a per-CPU read/write lock. Readers lock their CPU-local
+ * readlock, writers must lock all locks to get write access. These
+ * CPU-read-write locks are semantically identical to normal rwlocks.
+ * Memory usage is higher as well. (NR_CPUS*L1_CACHE_BYTES bytes)
+ *
+ * The most important feature is that these spinlocks do not cause
+ * cacheline ping-pong in the 'most readonly data' case.
+ *
+ * Copyright 2000, Ingo Molnar <mingo@redhat.com>
+ *
+ * Registry idea and naming [ crutial! :-) ] by:
+ *
+ *                 David S. Miller <davem@redhat.com>
+ *
+ * David has an implementation that doesnt use atomic operations in
+ * the read branch via memory ordering tricks - i guess we need to
+ * split this up into a per-arch thing? The atomicity issue is a
+ * secondary item in profiles, at least on x86 platforms.
+ *
+ * The atomic op version overhead is indeed a big deal on
+ * load-locked/store-conditional cpus (ALPHA/MIPS/PPC) and
+ * compare-and-swap cpus (Sparc64).  So we control which
+ * implementation to use with a __BRLOCK_USE_ATOMICS define. -DaveM
+ */
+
+/* Register bigreader lock indices here. */
+enum brlock_indices {
+       BR_GLOBALIRQ_LOCK,
+       BR_NETPROTO_LOCK,
+
+       __BR_END
+};
+
+#include <linux/config.h>
+
+#ifdef CONFIG_SMP
+
+#include <linux/cache.h>
+#include <linux/spinlock.h>
+
+#if defined(__i386__) || defined(__ia64__) || defined(__x86_64__)
+#define __BRLOCK_USE_ATOMICS
+#else
+#undef __BRLOCK_USE_ATOMICS
+#endif
+
+#ifdef __BRLOCK_USE_ATOMICS
+typedef rwlock_t       brlock_read_lock_t;
+#else
+typedef unsigned int   brlock_read_lock_t;
+#endif
+
+/*
+ * align last allocated index to the next cacheline:
+ */
+#define __BR_IDX_MAX \
+       (((sizeof(brlock_read_lock_t)*__BR_END + SMP_CACHE_BYTES-1) & ~(SMP_CACHE_BYTES-1)) / sizeof(brlock_read_lock_t))
+
+extern brlock_read_lock_t __brlock_array[NR_CPUS][__BR_IDX_MAX];
+
+#ifndef __BRLOCK_USE_ATOMICS
+struct br_wrlock {
+       spinlock_t lock;
+} __attribute__ ((__aligned__(SMP_CACHE_BYTES)));
+
+extern struct br_wrlock __br_write_locks[__BR_IDX_MAX];
+#endif
+
+extern void __br_lock_usage_bug (void);
+
+#ifdef __BRLOCK_USE_ATOMICS
+
+static inline void br_read_lock (enum brlock_indices idx)
+{
+       /*
+        * This causes a link-time bug message if an
+        * invalid index is used:
+        */
+       if (idx >= __BR_END)
+               __br_lock_usage_bug();
+
+       read_lock(&__brlock_array[smp_processor_id()][idx]);
+}
+
+static inline void br_read_unlock (enum brlock_indices idx)
+{
+       if (idx >= __BR_END)
+               __br_lock_usage_bug();
+
+       read_unlock(&__brlock_array[smp_processor_id()][idx]);
+}
+
+#else /* ! __BRLOCK_USE_ATOMICS */
+static inline void br_read_lock (enum brlock_indices idx)
+{
+       unsigned int *ctr;
+       spinlock_t *lock;
+
+       /*
+        * This causes a link-time bug message if an
+        * invalid index is used:
+        */
+       if (idx >= __BR_END)
+               __br_lock_usage_bug();
+
+       ctr = &__brlock_array[smp_processor_id()][idx];
+       lock = &__br_write_locks[idx].lock;
+again:
+       (*ctr)++;
+       mb();
+       if (spin_is_locked(lock)) {
+               (*ctr)--;
+               wmb(); /*
+                       * The release of the ctr must become visible
+                       * to the other cpus eventually thus wmb(),
+                       * we don't care if spin_is_locked is reordered
+                       * before the releasing of the ctr.
+                       * However IMHO this wmb() is superflous even in theory.
+                       * It would not be superflous only if on the
+                       * other CPUs doing a ldl_l instead of an ldl
+                       * would make a difference and I don't think this is
+                       * the case.
+                       * I'd like to clarify this issue further
+                       * but for now this is a slow path so adding the
+                       * wmb() will keep us on the safe side.
+                       */
+               while (spin_is_locked(lock))
+                       barrier();
+               goto again;
+       }
+}
+
+static inline void br_read_unlock (enum brlock_indices idx)
+{
+       unsigned int *ctr;
+
+       if (idx >= __BR_END)
+               __br_lock_usage_bug();
+
+       ctr = &__brlock_array[smp_processor_id()][idx];
+
+       wmb();
+       (*ctr)--;
+}
+#endif /* __BRLOCK_USE_ATOMICS */
+
+/* write path not inlined - it's rare and larger */
+
+extern void FASTCALL(__br_write_lock (enum brlock_indices idx));
+extern void FASTCALL(__br_write_unlock (enum brlock_indices idx));
+
+static inline void br_write_lock (enum brlock_indices idx)
+{
+       if (idx >= __BR_END)
+               __br_lock_usage_bug();
+       __br_write_lock(idx);
+}
+
+static inline void br_write_unlock (enum brlock_indices idx)
+{
+       if (idx >= __BR_END)
+               __br_lock_usage_bug();
+       __br_write_unlock(idx);
+}
+
+#else
+# define br_read_lock(idx)     ((void)(idx))
+# define br_read_unlock(idx)   ((void)(idx))
+# define br_write_lock(idx)    ((void)(idx))
+# define br_write_unlock(idx)  ((void)(idx))
+#endif
+
+/*
+ * Now enumerate all of the possible sw/hw IRQ protected
+ * versions of the interfaces.
+ */
+#define br_read_lock_irqsave(idx, flags) \
+       do { local_irq_save(flags); br_read_lock(idx); } while (0)
+
+#define br_read_lock_irq(idx) \
+       do { local_irq_disable(); br_read_lock(idx); } while (0)
+
+#define br_read_lock_bh(idx) \
+       do { local_bh_disable(); br_read_lock(idx); } while (0)
+
+#define br_write_lock_irqsave(idx, flags) \
+       do { local_irq_save(flags); br_write_lock(idx); } while (0)
+
+#define br_write_lock_irq(idx) \
+       do { local_irq_disable(); br_write_lock(idx); } while (0)
+
+#define br_write_lock_bh(idx) \
+       do { local_bh_disable(); br_write_lock(idx); } while (0)
+
+#define br_read_unlock_irqrestore(idx, flags) \
+       do { br_read_unlock(irx); local_irq_restore(flags); } while (0)
+
+#define br_read_unlock_irq(idx) \
+       do { br_read_unlock(idx); local_irq_enable(); } while (0)
+
+#define br_read_unlock_bh(idx) \
+       do { br_read_unlock(idx); local_bh_enable(); } while (0)
+
+#define br_write_unlock_irqrestore(idx, flags) \
+       do { br_write_unlock(irx); local_irq_restore(flags); } while (0)
+
+#define br_write_unlock_irq(idx) \
+       do { br_write_unlock(idx); local_irq_enable(); } while (0)
+
+#define br_write_unlock_bh(idx) \
+       do { br_write_unlock(idx); local_bh_enable(); } while (0)
+
+#endif /* __LINUX_BRLOCK_H */
diff --git a/xen-2.4.16/include/xeno/byteorder/big_endian.h b/xen-2.4.16/include/xeno/byteorder/big_endian.h
new file mode 100644 (file)
index 0000000..b84efd7
--- /dev/null
@@ -0,0 +1,68 @@
+#ifndef _LINUX_BYTEORDER_BIG_ENDIAN_H
+#define _LINUX_BYTEORDER_BIG_ENDIAN_H
+
+#ifndef __BIG_ENDIAN
+#define __BIG_ENDIAN 4321
+#endif
+#ifndef __BIG_ENDIAN_BITFIELD
+#define __BIG_ENDIAN_BITFIELD
+#endif
+
+#include <linux/byteorder/swab.h>
+
+#define __constant_htonl(x) ((__u32)(x))
+#define __constant_ntohl(x) ((__u32)(x))
+#define __constant_htons(x) ((__u16)(x))
+#define __constant_ntohs(x) ((__u16)(x))
+#define __constant_cpu_to_le64(x) ___constant_swab64((x))
+#define __constant_le64_to_cpu(x) ___constant_swab64((x))
+#define __constant_cpu_to_le32(x) ___constant_swab32((x))
+#define __constant_le32_to_cpu(x) ___constant_swab32((x))
+#define __constant_cpu_to_le16(x) ___constant_swab16((x))
+#define __constant_le16_to_cpu(x) ___constant_swab16((x))
+#define __constant_cpu_to_be64(x) ((__u64)(x))
+#define __constant_be64_to_cpu(x) ((__u64)(x))
+#define __constant_cpu_to_be32(x) ((__u32)(x))
+#define __constant_be32_to_cpu(x) ((__u32)(x))
+#define __constant_cpu_to_be16(x) ((__u16)(x))
+#define __constant_be16_to_cpu(x) ((__u16)(x))
+#define __cpu_to_le64(x) __swab64((x))
+#define __le64_to_cpu(x) __swab64((x))
+#define __cpu_to_le32(x) __swab32((x))
+#define __le32_to_cpu(x) __swab32((x))
+#define __cpu_to_le16(x) __swab16((x))
+#define __le16_to_cpu(x) __swab16((x))
+#define __cpu_to_be64(x) ((__u64)(x))
+#define __be64_to_cpu(x) ((__u64)(x))
+#define __cpu_to_be32(x) ((__u32)(x))
+#define __be32_to_cpu(x) ((__u32)(x))
+#define __cpu_to_be16(x) ((__u16)(x))
+#define __be16_to_cpu(x) ((__u16)(x))
+#define __cpu_to_le64p(x) __swab64p((x))
+#define __le64_to_cpup(x) __swab64p((x))
+#define __cpu_to_le32p(x) __swab32p((x))
+#define __le32_to_cpup(x) __swab32p((x))
+#define __cpu_to_le16p(x) __swab16p((x))
+#define __le16_to_cpup(x) __swab16p((x))
+#define __cpu_to_be64p(x) (*(__u64*)(x))
+#define __be64_to_cpup(x) (*(__u64*)(x))
+#define __cpu_to_be32p(x) (*(__u32*)(x))
+#define __be32_to_cpup(x) (*(__u32*)(x))
+#define __cpu_to_be16p(x) (*(__u16*)(x))
+#define __be16_to_cpup(x) (*(__u16*)(x))
+#define __cpu_to_le64s(x) __swab64s((x))
+#define __le64_to_cpus(x) __swab64s((x))
+#define __cpu_to_le32s(x) __swab32s((x))
+#define __le32_to_cpus(x) __swab32s((x))
+#define __cpu_to_le16s(x) __swab16s((x))
+#define __le16_to_cpus(x) __swab16s((x))
+#define __cpu_to_be64s(x) do {} while (0)
+#define __be64_to_cpus(x) do {} while (0)
+#define __cpu_to_be32s(x) do {} while (0)
+#define __be32_to_cpus(x) do {} while (0)
+#define __cpu_to_be16s(x) do {} while (0)
+#define __be16_to_cpus(x) do {} while (0)
+
+#include <linux/byteorder/generic.h>
+
+#endif /* _LINUX_BYTEORDER_BIG_ENDIAN_H */
diff --git a/xen-2.4.16/include/xeno/byteorder/generic.h b/xen-2.4.16/include/xeno/byteorder/generic.h
new file mode 100644 (file)
index 0000000..d3d63a5
--- /dev/null
@@ -0,0 +1,180 @@
+#ifndef _LINUX_BYTEORDER_GENERIC_H
+#define _LINUX_BYTEORDER_GENERIC_H
+
+/*
+ * linux/byteorder_generic.h
+ * Generic Byte-reordering support
+ *
+ * Francois-Rene Rideau <fare@tunes.org> 19970707
+ *    gathered all the good ideas from all asm-foo/byteorder.h into one file,
+ *    cleaned them up.
+ *    I hope it is compliant with non-GCC compilers.
+ *    I decided to put __BYTEORDER_HAS_U64__ in byteorder.h,
+ *    because I wasn't sure it would be ok to put it in types.h
+ *    Upgraded it to 2.1.43
+ * Francois-Rene Rideau <fare@tunes.org> 19971012
+ *    Upgraded it to 2.1.57
+ *    to please Linus T., replaced huge #ifdef's between little/big endian
+ *    by nestedly #include'd files.
+ * Francois-Rene Rideau <fare@tunes.org> 19971205
+ *    Made it to 2.1.71; now a facelift:
+ *    Put files under include/linux/byteorder/
+ *    Split swab from generic support.
+ *
+ * TODO:
+ *   = Regular kernel maintainers could also replace all these manual
+ *    byteswap macros that remain, disseminated among drivers,
+ *    after some grep or the sources...
+ *   = Linus might want to rename all these macros and files to fit his taste,
+ *    to fit his personal naming scheme.
+ *   = it seems that a few drivers would also appreciate
+ *    nybble swapping support...
+ *   = every architecture could add their byteswap macro in asm/byteorder.h
+ *    see how some architectures already do (i386, alpha, ppc, etc)
+ *   = cpu_to_beXX and beXX_to_cpu might some day need to be well
+ *    distinguished throughout the kernel. This is not the case currently,
+ *    since little endian, big endian, and pdp endian machines needn't it.
+ *    But this might be the case for, say, a port of Linux to 20/21 bit
+ *    architectures (and F21 Linux addict around?).
+ */
+
+/*
+ * The following macros are to be defined by <asm/byteorder.h>:
+ *
+ * Conversion of long and short int between network and host format
+ *     ntohl(__u32 x)
+ *     ntohs(__u16 x)
+ *     htonl(__u32 x)
+ *     htons(__u16 x)
+ * It seems that some programs (which? where? or perhaps a standard? POSIX?)
+ * might like the above to be functions, not macros (why?).
+ * if that's true, then detect them, and take measures.
+ * Anyway, the measure is: define only ___ntohl as a macro instead,
+ * and in a separate file, have
+ * unsigned long inline ntohl(x){return ___ntohl(x);}
+ *
+ * The same for constant arguments
+ *     __constant_ntohl(__u32 x)
+ *     __constant_ntohs(__u16 x)
+ *     __constant_htonl(__u32 x)
+ *     __constant_htons(__u16 x)
+ *
+ * Conversion of XX-bit integers (16- 32- or 64-)
+ * between native CPU format and little/big endian format
+ * 64-bit stuff only defined for proper architectures
+ *     cpu_to_[bl]eXX(__uXX x)
+ *     [bl]eXX_to_cpu(__uXX x)
+ *
+ * The same, but takes a pointer to the value to convert
+ *     cpu_to_[bl]eXXp(__uXX x)
+ *     [bl]eXX_to_cpup(__uXX x)
+ *
+ * The same, but change in situ
+ *     cpu_to_[bl]eXXs(__uXX x)
+ *     [bl]eXX_to_cpus(__uXX x)
+ *
+ * See asm-foo/byteorder.h for examples of how to provide
+ * architecture-optimized versions
+ *
+ */
+
+
+#if defined(__KERNEL__)
+/*
+ * inside the kernel, we can use nicknames;
+ * outside of it, we must avoid POSIX namespace pollution...
+ */
+#define cpu_to_le64 __cpu_to_le64
+#define le64_to_cpu __le64_to_cpu
+#define cpu_to_le32 __cpu_to_le32
+#define le32_to_cpu __le32_to_cpu
+#define cpu_to_le16 __cpu_to_le16
+#define le16_to_cpu __le16_to_cpu
+#define cpu_to_be64 __cpu_to_be64
+#define be64_to_cpu __be64_to_cpu
+#define cpu_to_be32 __cpu_to_be32
+#define be32_to_cpu __be32_to_cpu
+#define cpu_to_be16 __cpu_to_be16
+#define be16_to_cpu __be16_to_cpu
+#define cpu_to_le64p __cpu_to_le64p
+#define le64_to_cpup __le64_to_cpup
+#define cpu_to_le32p __cpu_to_le32p
+#define le32_to_cpup __le32_to_cpup
+#define cpu_to_le16p __cpu_to_le16p
+#define le16_to_cpup __le16_to_cpup
+#define cpu_to_be64p __cpu_to_be64p
+#define be64_to_cpup __be64_to_cpup
+#define cpu_to_be32p __cpu_to_be32p
+#define be32_to_cpup __be32_to_cpup
+#define cpu_to_be16p __cpu_to_be16p
+#define be16_to_cpup __be16_to_cpup
+#define cpu_to_le64s __cpu_to_le64s
+#define le64_to_cpus __le64_to_cpus
+#define cpu_to_le32s __cpu_to_le32s
+#define le32_to_cpus __le32_to_cpus
+#define cpu_to_le16s __cpu_to_le16s
+#define le16_to_cpus __le16_to_cpus
+#define cpu_to_be64s __cpu_to_be64s
+#define be64_to_cpus __be64_to_cpus
+#define cpu_to_be32s __cpu_to_be32s
+#define be32_to_cpus __be32_to_cpus
+#define cpu_to_be16s __cpu_to_be16s
+#define be16_to_cpus __be16_to_cpus
+#endif
+
+
+/*
+ * Handle ntohl and suches. These have various compatibility
+ * issues - like we want to give the prototype even though we
+ * also have a macro for them in case some strange program
+ * wants to take the address of the thing or something..
+ *
+ * Note that these used to return a "long" in libc5, even though
+ * long is often 64-bit these days.. Thus the casts.
+ *
+ * They have to be macros in order to do the constant folding
+ * correctly - if the argument passed into a inline function
+ * it is no longer constant according to gcc..
+ */
+
+#undef ntohl
+#undef ntohs
+#undef htonl
+#undef htons
+
+/*
+ * Do the prototypes. Somebody might want to take the
+ * address or some such sick thing..
+ */
+#if defined(__KERNEL__) || (defined (__GLIBC__) && __GLIBC__ >= 2)
+extern __u32                   ntohl(__u32);
+extern __u32                   htonl(__u32);
+#else
+extern unsigned long int       ntohl(unsigned long int);
+extern unsigned long int       htonl(unsigned long int);
+#endif
+extern unsigned short int      ntohs(unsigned short int);
+extern unsigned short int      htons(unsigned short int);
+
+
+#if defined(__GNUC__) && (__GNUC__ >= 2) && defined(__OPTIMIZE__)
+
+#define ___htonl(x) __cpu_to_be32(x)
+#define ___htons(x) __cpu_to_be16(x)
+#define ___ntohl(x) __be32_to_cpu(x)
+#define ___ntohs(x) __be16_to_cpu(x)
+
+#if defined(__KERNEL__) || (defined (__GLIBC__) && __GLIBC__ >= 2)
+#define htonl(x) ___htonl(x)
+#define ntohl(x) ___ntohl(x)
+#else
+#define htonl(x) ((unsigned long)___htonl(x))
+#define ntohl(x) ((unsigned long)___ntohl(x))
+#endif
+#define htons(x) ___htons(x)
+#define ntohs(x) ___ntohs(x)
+
+#endif /* OPTIMIZE */
+
+
+#endif /* _LINUX_BYTEORDER_GENERIC_H */
diff --git a/xen-2.4.16/include/xeno/byteorder/little_endian.h b/xen-2.4.16/include/xeno/byteorder/little_endian.h
new file mode 100644 (file)
index 0000000..1431663
--- /dev/null
@@ -0,0 +1,68 @@
+#ifndef _LINUX_BYTEORDER_LITTLE_ENDIAN_H
+#define _LINUX_BYTEORDER_LITTLE_ENDIAN_H
+
+#ifndef __LITTLE_ENDIAN
+#define __LITTLE_ENDIAN 1234
+#endif
+#ifndef __LITTLE_ENDIAN_BITFIELD
+#define __LITTLE_ENDIAN_BITFIELD
+#endif
+
+#include <linux/byteorder/swab.h>
+
+#define __constant_htonl(x) ___constant_swab32((x))
+#define __constant_ntohl(x) ___constant_swab32((x))
+#define __constant_htons(x) ___constant_swab16((x))
+#define __constant_ntohs(x) ___constant_swab16((x))
+#define __constant_cpu_to_le64(x) ((__u64)(x))
+#define __constant_le64_to_cpu(x) ((__u64)(x))
+#define __constant_cpu_to_le32(x) ((__u32)(x))
+#define __constant_le32_to_cpu(x) ((__u32)(x))
+#define __constant_cpu_to_le16(x) ((__u16)(x))
+#define __constant_le16_to_cpu(x) ((__u16)(x))
+#define __constant_cpu_to_be64(x) ___constant_swab64((x))
+#define __constant_be64_to_cpu(x) ___constant_swab64((x))
+#define __constant_cpu_to_be32(x) ___constant_swab32((x))
+#define __constant_be32_to_cpu(x) ___constant_swab32((x))
+#define __constant_cpu_to_be16(x) ___constant_swab16((x))
+#define __constant_be16_to_cpu(x) ___constant_swab16((x))
+#define __cpu_to_le64(x) ((__u64)(x))
+#define __le64_to_cpu(x) ((__u64)(x))
+#define __cpu_to_le32(x) ((__u32)(x))
+#define __le32_to_cpu(x) ((__u32)(x))
+#define __cpu_to_le16(x) ((__u16)(x))
+#define __le16_to_cpu(x) ((__u16)(x))
+#define __cpu_to_be64(x) __swab64((x))
+#define __be64_to_cpu(x) __swab64((x))
+#define __cpu_to_be32(x) __swab32((x))
+#define __be32_to_cpu(x) __swab32((x))
+#define __cpu_to_be16(x) __swab16((x))
+#define __be16_to_cpu(x) __swab16((x))
+#define __cpu_to_le64p(x) (*(__u64*)(x))
+#define __le64_to_cpup(x) (*(__u64*)(x))
+#define __cpu_to_le32p(x) (*(__u32*)(x))
+#define __le32_to_cpup(x) (*(__u32*)(x))
+#define __cpu_to_le16p(x) (*(__u16*)(x))
+#define __le16_to_cpup(x) (*(__u16*)(x))
+#define __cpu_to_be64p(x) __swab64p((x))
+#define __be64_to_cpup(x) __swab64p((x))
+#define __cpu_to_be32p(x) __swab32p((x))
+#define __be32_to_cpup(x) __swab32p((x))
+#define __cpu_to_be16p(x) __swab16p((x))
+#define __be16_to_cpup(x) __swab16p((x))
+#define __cpu_to_le64s(x) do {} while (0)
+#define __le64_to_cpus(x) do {} while (0)
+#define __cpu_to_le32s(x) do {} while (0)
+#define __le32_to_cpus(x) do {} while (0)
+#define __cpu_to_le16s(x) do {} while (0)
+#define __le16_to_cpus(x) do {} while (0)
+#define __cpu_to_be64s(x) __swab64s((x))
+#define __be64_to_cpus(x) __swab64s((x))
+#define __cpu_to_be32s(x) __swab32s((x))
+#define __be32_to_cpus(x) __swab32s((x))
+#define __cpu_to_be16s(x) __swab16s((x))
+#define __be16_to_cpus(x) __swab16s((x))
+
+#include <linux/byteorder/generic.h>
+
+#endif /* _LINUX_BYTEORDER_LITTLE_ENDIAN_H */
diff --git a/xen-2.4.16/include/xeno/byteorder/pdp_endian.h b/xen-2.4.16/include/xeno/byteorder/pdp_endian.h
new file mode 100644 (file)
index 0000000..618631c
--- /dev/null
@@ -0,0 +1,88 @@
+#ifndef _LINUX_BYTEORDER_PDP_ENDIAN_H
+#define _LINUX_BYTEORDER_PDP_ENDIAN_H
+
+/*
+ * Could have been named NUXI-endian, but we use the same name as in glibc.
+ * hopefully only the PDP and its evolutions (old VAXen in compatibility mode)
+ * should ever use this braindead byteorder.
+ * This file *should* work, but has not been tested.
+ *
+ * little-endian is 1234; big-endian is 4321; nuxi/pdp-endian is 3412
+ *
+ * I thought vaxen were NUXI-endian, but was told they were correct-endian
+ * (little-endian), though indeed there existed NUXI-endian machines
+ * (DEC PDP-11 and old VAXen in compatibility mode).
+ * This makes this file a bit useless, but as a proof-of-concept.
+ *
+ * But what does a __u64 look like: is it 34127856 or 78563412 ???
+ * I don't dare imagine! Hence, no 64-bit byteorder support yet.
+ * Hopefully, there 64-bit pdp-endian support shouldn't ever be required.
+ *
+ */
+
+#ifndef __PDP_ENDIAN
+#define __PDP_ENDIAN 3412
+#endif
+#ifndef __PDP_ENDIAN_BITFIELD
+#define __PDP_ENDIAN_BITFIELD
+#endif
+
+#include <linux/byteorder/swab.h>
+#include <linux/byteorder/swabb.h>
+
+#define __constant_htonl(x) ___constant_swahb32((x))
+#define __constant_ntohl(x) ___constant_swahb32((x))
+#define __constant_htons(x) ___constant_swab16((x))
+#define __constant_ntohs(x) ___constant_swab16((x))
+#define __constant_cpu_to_le64(x) I DON'T KNOW
+#define __constant_le64_to_cpu(x) I DON'T KNOW
+#define __constant_cpu_to_le32(x) ___constant_swahw32((x))
+#define __constant_le32_to_cpu(x) ___constant_swahw32((x))
+#define __constant_cpu_to_le16(x) ((__u16)(x)
+#define __constant_le16_to_cpu(x) ((__u16)(x)
+#define __constant_cpu_to_be64(x) I DON'T KNOW
+#define __constant_be64_to_cpu(x) I DON'T KNOW
+#define __constant_cpu_to_be32(x) ___constant_swahb32((x))
+#define __constant_be32_to_cpu(x) ___constant_swahb32((x))
+#define __constant_cpu_to_be16(x) ___constant_swab16((x))
+#define __constant_be16_to_cpu(x) ___constant_swab16((x))
+#define __cpu_to_le64(x) I DON'T KNOW
+#define __le64_to_cpu(x) I DON'T KNOW
+#define __cpu_to_le32(x) ___swahw32((x))
+#define __le32_to_cpu(x) ___swahw32((x))
+#define __cpu_to_le16(x) ((__u16)(x)
+#define __le16_to_cpu(x) ((__u16)(x)
+#define __cpu_to_be64(x) I DON'T KNOW
+#define __be64_to_cpu(x) I DON'T KNOW
+#define __cpu_to_be32(x) __swahb32((x))
+#define __be32_to_cpu(x) __swahb32((x))
+#define __cpu_to_be16(x) __swab16((x))
+#define __be16_to_cpu(x) __swab16((x))
+#define __cpu_to_le64p(x) I DON'T KNOW
+#define __le64_to_cpup(x) I DON'T KNOW
+#define __cpu_to_le32p(x) ___swahw32p((x))
+#define __le32_to_cpup(x) ___swahw32p((x))
+#define __cpu_to_le16p(x) (*(__u16*)(x))
+#define __le16_to_cpup(x) (*(__u16*)(x))
+#define __cpu_to_be64p(x) I DON'T KNOW
+#define __be64_to_cpup(x) I DON'T KNOW
+#define __cpu_to_be32p(x) __swahb32p((x))
+#define __be32_to_cpup(x) __swahb32p((x))
+#define __cpu_to_be16p(x) __swab16p((x))
+#define __be16_to_cpup(x) __swab16p((x))
+#define __cpu_to_le64s(x) I DON'T KNOW
+#define __le64_to_cpus(x) I DON'T KNOW
+#define __cpu_to_le32s(x) ___swahw32s((x))
+#define __le32_to_cpus(x) ___swahw32s((x))
+#define __cpu_to_le16s(x) do {} while (0)
+#define __le16_to_cpus(x) do {} while (0)
+#define __cpu_to_be64s(x) I DON'T KNOW
+#define __be64_to_cpus(x) I DON'T KNOW
+#define __cpu_to_be32s(x) __swahb32s((x))
+#define __be32_to_cpus(x) __swahb32s((x))
+#define __cpu_to_be16s(x) __swab16s((x))
+#define __be16_to_cpus(x) __swab16s((x))
+
+#include <linux/byteorder/generic.h>
+
+#endif /* _LINUX_BYTEORDER_PDP_ENDIAN_H */
diff --git a/xen-2.4.16/include/xeno/byteorder/swab.h b/xen-2.4.16/include/xeno/byteorder/swab.h
new file mode 100644 (file)
index 0000000..814b451
--- /dev/null
@@ -0,0 +1,190 @@
+#ifndef _LINUX_BYTEORDER_SWAB_H
+#define _LINUX_BYTEORDER_SWAB_H
+
+/*
+ * linux/byteorder/swab.h
+ * Byte-swapping, independently from CPU endianness
+ *     swabXX[ps]?(foo)
+ *
+ * Francois-Rene Rideau <fare@tunes.org> 19971205
+ *    separated swab functions from cpu_to_XX,
+ *    to clean up support for bizarre-endian architectures.
+ *
+ * See asm-i386/byteorder.h and suches for examples of how to provide
+ * architecture-dependent optimized versions
+ *
+ */
+
+/* casts are necessary for constants, because we never know how for sure
+ * how U/UL/ULL map to __u16, __u32, __u64. At least not in a portable way.
+ */
+#define ___swab16(x) \
+({ \
+       __u16 __x = (x); \
+       ((__u16)( \
+               (((__u16)(__x) & (__u16)0x00ffU) << 8) | \
+               (((__u16)(__x) & (__u16)0xff00U) >> 8) )); \
+})
+
+#define ___swab32(x) \
+({ \
+       __u32 __x = (x); \
+       ((__u32)( \
+               (((__u32)(__x) & (__u32)0x000000ffUL) << 24) | \
+               (((__u32)(__x) & (__u32)0x0000ff00UL) <<  8) | \
+               (((__u32)(__x) & (__u32)0x00ff0000UL) >>  8) | \
+               (((__u32)(__x) & (__u32)0xff000000UL) >> 24) )); \
+})
+
+#define ___swab64(x) \
+({ \
+       __u64 __x = (x); \
+       ((__u64)( \
+               (__u64)(((__u64)(__x) & (__u64)0x00000000000000ffULL) << 56) | \
+               (__u64)(((__u64)(__x) & (__u64)0x000000000000ff00ULL) << 40) | \
+               (__u64)(((__u64)(__x) & (__u64)0x0000000000ff0000ULL) << 24) | \
+               (__u64)(((__u64)(__x) & (__u64)0x00000000ff000000ULL) <<  8) | \
+               (__u64)(((__u64)(__x) & (__u64)0x000000ff00000000ULL) >>  8) | \
+               (__u64)(((__u64)(__x) & (__u64)0x0000ff0000000000ULL) >> 24) | \
+               (__u64)(((__u64)(__x) & (__u64)0x00ff000000000000ULL) >> 40) | \
+               (__u64)(((__u64)(__x) & (__u64)0xff00000000000000ULL) >> 56) )); \
+})
+
+#define ___constant_swab16(x) \
+       ((__u16)( \
+               (((__u16)(x) & (__u16)0x00ffU) << 8) | \
+               (((__u16)(x) & (__u16)0xff00U) >> 8) ))
+#define ___constant_swab32(x) \
+       ((__u32)( \
+               (((__u32)(x) & (__u32)0x000000ffUL) << 24) | \
+               (((__u32)(x) & (__u32)0x0000ff00UL) <<  8) | \
+               (((__u32)(x) & (__u32)0x00ff0000UL) >>  8) | \
+               (((__u32)(x) & (__u32)0xff000000UL) >> 24) ))
+#define ___constant_swab64(x) \
+       ((__u64)( \
+               (__u64)(((__u64)(x) & (__u64)0x00000000000000ffULL) << 56) | \
+               (__u64)(((__u64)(x) & (__u64)0x000000000000ff00ULL) << 40) | \
+               (__u64)(((__u64)(x) & (__u64)0x0000000000ff0000ULL) << 24) | \
+               (__u64)(((__u64)(x) & (__u64)0x00000000ff000000ULL) <<  8) | \
+               (__u64)(((__u64)(x) & (__u64)0x000000ff00000000ULL) >>  8) | \
+               (__u64)(((__u64)(x) & (__u64)0x0000ff0000000000ULL) >> 24) | \
+               (__u64)(((__u64)(x) & (__u64)0x00ff000000000000ULL) >> 40) | \
+               (__u64)(((__u64)(x) & (__u64)0xff00000000000000ULL) >> 56) ))
+
+/*
+ * provide defaults when no architecture-specific optimization is detected
+ */
+#ifndef __arch__swab16
+#  define __arch__swab16(x) ({ __u16 __tmp = (x) ; ___swab16(__tmp); })
+#endif
+#ifndef __arch__swab32
+#  define __arch__swab32(x) ({ __u32 __tmp = (x) ; ___swab32(__tmp); })
+#endif
+#ifndef __arch__swab64
+#  define __arch__swab64(x) ({ __u64 __tmp = (x) ; ___swab64(__tmp); })
+#endif
+
+#ifndef __arch__swab16p
+#  define __arch__swab16p(x) __arch__swab16(*(x))
+#endif
+#ifndef __arch__swab32p
+#  define __arch__swab32p(x) __arch__swab32(*(x))
+#endif
+#ifndef __arch__swab64p
+#  define __arch__swab64p(x) __arch__swab64(*(x))
+#endif
+
+#ifndef __arch__swab16s
+#  define __arch__swab16s(x) do { *(x) = __arch__swab16p((x)); } while (0)
+#endif
+#ifndef __arch__swab32s
+#  define __arch__swab32s(x) do { *(x) = __arch__swab32p((x)); } while (0)
+#endif
+#ifndef __arch__swab64s
+#  define __arch__swab64s(x) do { *(x) = __arch__swab64p((x)); } while (0)
+#endif
+
+
+/*
+ * Allow constant folding
+ */
+#if defined(__GNUC__) && (__GNUC__ >= 2) && defined(__OPTIMIZE__)
+#  define __swab16(x) \
+(__builtin_constant_p((__u16)(x)) ? \
+ ___swab16((x)) : \
+ __fswab16((x)))
+#  define __swab32(x) \
+(__builtin_constant_p((__u32)(x)) ? \
+ ___swab32((x)) : \
+ __fswab32((x)))
+#  define __swab64(x) \
+(__builtin_constant_p((__u64)(x)) ? \
+ ___swab64((x)) : \
+ __fswab64((x)))
+#else
+#  define __swab16(x) __fswab16(x)
+#  define __swab32(x) __fswab32(x)
+#  define __swab64(x) __fswab64(x)
+#endif /* OPTIMIZE */
+
+
+static __inline__ __const__ __u16 __fswab16(__u16 x)
+{
+       return __arch__swab16(x);
+}
+static __inline__ __u16 __swab16p(__u16 *x)
+{
+       return __arch__swab16p(x);
+}
+static __inline__ void __swab16s(__u16 *addr)
+{
+       __arch__swab16s(addr);
+}
+
+static __inline__ __const__ __u32 __fswab32(__u32 x)
+{
+       return __arch__swab32(x);
+}
+static __inline__ __u32 __swab32p(__u32 *x)
+{
+       return __arch__swab32p(x);
+}
+static __inline__ void __swab32s(__u32 *addr)
+{
+       __arch__swab32s(addr);
+}
+
+#ifdef __BYTEORDER_HAS_U64__
+static __inline__ __const__ __u64 __fswab64(__u64 x)
+{
+#  ifdef __SWAB_64_THRU_32__
+       __u32 h = x >> 32;
+        __u32 l = x & ((1ULL<<32)-1);
+        return (((__u64)__swab32(l)) << 32) | ((__u64)(__swab32(h)));
+#  else
+       return __arch__swab64(x);
+#  endif
+}
+static __inline__ __u64 __swab64p(__u64 *x)
+{
+       return __arch__swab64p(x);
+}
+static __inline__ void __swab64s(__u64 *addr)
+{
+       __arch__swab64s(addr);
+}
+#endif /* __BYTEORDER_HAS_U64__ */
+
+#if defined(__KERNEL__)
+#define swab16 __swab16
+#define swab32 __swab32
+#define swab64 __swab64
+#define swab16p __swab16p
+#define swab32p __swab32p
+#define swab64p __swab64p
+#define swab16s __swab16s
+#define swab32s __swab32s
+#define swab64s __swab64s
+#endif
+
+#endif /* _LINUX_BYTEORDER_SWAB_H */
diff --git a/xen-2.4.16/include/xeno/byteorder/swabb.h b/xen-2.4.16/include/xeno/byteorder/swabb.h
new file mode 100644 (file)
index 0000000..d28d9a8
--- /dev/null
@@ -0,0 +1,137 @@
+#ifndef _LINUX_BYTEORDER_SWABB_H
+#define _LINUX_BYTEORDER_SWABB_H
+
+/*
+ * linux/byteorder/swabb.h
+ * SWAp Bytes Bizarrely
+ *     swaHHXX[ps]?(foo)
+ *
+ * Support for obNUXIous pdp-endian and other bizarre architectures.
+ * Will Linux ever run on such ancient beasts? if not, this file
+ * will be but a programming pearl. Still, it's a reminder that we
+ * shouldn't be making too many assumptions when trying to be portable.
+ *
+ */
+
+/*
+ * Meaning of the names I chose (vaxlinux people feel free to correct them):
+ * swahw32     swap 16-bit half-words in a 32-bit word
+ * swahb32     swap 8-bit halves of each 16-bit half-word in a 32-bit word
+ *
+ * No 64-bit support yet. I don't know NUXI conventions for long longs.
+ * I guarantee it will be a mess when it's there, though :->
+ * It will be even worse if there are conflicting 64-bit conventions.
+ * Hopefully, no one ever used 64-bit objects on NUXI machines.
+ *
+ */
+
+#define ___swahw32(x) \
+({ \
+       __u32 __x = (x); \
+       ((__u32)( \
+               (((__u32)(__x) & (__u32)0x0000ffffUL) << 16) | \
+               (((__u32)(__x) & (__u32)0xffff0000UL) >> 16) )); \
+})
+#define ___swahb32(x) \
+({ \
+       __u32 __x = (x); \
+       ((__u32)( \
+               (((__u32)(__x) & (__u32)0x00ff00ffUL) << 8) | \
+               (((__u32)(__x) & (__u32)0xff00ff00UL) >> 8) )); \
+})
+
+#define ___constant_swahw32(x) \
+       ((__u32)( \
+               (((__u32)(x) & (__u32)0x0000ffffUL) << 16) | \
+               (((__u32)(x) & (__u32)0xffff0000UL) >> 16) ))
+#define ___constant_swahb32(x) \
+       ((__u32)( \
+               (((__u32)(x) & (__u32)0x00ff00ffUL) << 8) | \
+               (((__u32)(x) & (__u32)0xff00ff00UL) >> 8) ))
+
+/*
+ * provide defaults when no architecture-specific optimization is detected
+ */
+#ifndef __arch__swahw32
+#  define __arch__swahw32(x) ___swahw32(x)
+#endif
+#ifndef __arch__swahb32
+#  define __arch__swahb32(x) ___swahb32(x)
+#endif
+
+#ifndef __arch__swahw32p
+#  define __arch__swahw32p(x) __swahw32(*(x))
+#endif
+#ifndef __arch__swahb32p
+#  define __arch__swahb32p(x) __swahb32(*(x))
+#endif
+
+#ifndef __arch__swahw32s
+#  define __arch__swahw32s(x) do { *(x) = __swahw32p((x)); } while (0)
+#endif
+#ifndef __arch__swahb32s
+#  define __arch__swahb32s(x) do { *(x) = __swahb32p((x)); } while (0)
+#endif
+
+
+/*
+ * Allow constant folding
+ */
+#if defined(__GNUC__) && (__GNUC__ >= 2) && defined(__OPTIMIZE__)
+#  define __swahw32(x) \
+(__builtin_constant_p((__u32)(x)) ? \
+ ___swahw32((x)) : \
+ __fswahw32((x)))
+#  define __swahb32(x) \
+(__builtin_constant_p((__u32)(x)) ? \
+ ___swahb32((x)) : \
+ __fswahb32((x)))
+#else
+#  define __swahw32(x) __fswahw32(x)
+#  define __swahb32(x) __fswahb32(x)
+#endif /* OPTIMIZE */
+
+
+static __inline__ __const__ __u32 __fswahw32(__u32 x)
+{
+       return __arch__swahw32(x);
+}
+static __inline__ __u32 __swahw32p(__u32 *x)
+{
+       return __arch__swahw32p(x);
+}
+static __inline__ void __swahw32s(__u32 *addr)
+{
+       __arch__swahw32s(addr);
+}
+
+
+static __inline__ __const__ __u32 __fswahb32(__u32 x)
+{
+       return __arch__swahb32(x);
+}
+static __inline__ __u32 __swahb32p(__u32 *x)
+{
+       return __arch__swahb32p(x);
+}
+static __inline__ void __swahb32s(__u32 *addr)
+{
+       __arch__swahb32s(addr);
+}
+
+#ifdef __BYTEORDER_HAS_U64__
+/*
+ * Not supported yet
+ */
+#endif /* __BYTEORDER_HAS_U64__ */
+
+#if defined(__KERNEL__)
+#define swahw32 __swahw32
+#define swahb32 __swahb32
+#define swahw32p __swahw32p
+#define swahb32p __swahb32p
+#define swahw32s __swahw32s
+#define swahb32s __swahb32s
+#endif
+
+#endif /* _LINUX_BYTEORDER_SWABB_H */
diff --git a/xen-2.4.16/include/xeno/cache.h b/xen-2.4.16/include/xeno/cache.h
new file mode 100644 (file)
index 0000000..73a3be7
--- /dev/null
@@ -0,0 +1,37 @@
+#ifndef __LINUX_CACHE_H
+#define __LINUX_CACHE_H
+
+#include <xeno/config.h>
+#include <asm/cache.h>
+
+#ifndef L1_CACHE_ALIGN
+#define L1_CACHE_ALIGN(x) (((x)+(L1_CACHE_BYTES-1))&~(L1_CACHE_BYTES-1))
+#endif
+
+#ifndef SMP_CACHE_BYTES
+#define SMP_CACHE_BYTES L1_CACHE_BYTES
+#endif
+
+#ifndef ____cacheline_aligned
+#define ____cacheline_aligned __attribute__((__aligned__(SMP_CACHE_BYTES)))
+#endif
+
+#ifndef ____cacheline_aligned_in_smp
+#ifdef CONFIG_SMP
+#define ____cacheline_aligned_in_smp ____cacheline_aligned
+#else
+#define ____cacheline_aligned_in_smp
+#endif /* CONFIG_SMP */
+#endif
+
+#ifndef __cacheline_aligned
+#ifdef MODULE
+#define __cacheline_aligned ____cacheline_aligned
+#else
+#define __cacheline_aligned                                    \
+  __attribute__((__aligned__(SMP_CACHE_BYTES),                 \
+                __section__(".data.cacheline_aligned")))
+#endif
+#endif /* __cacheline_aligned */
+
+#endif /* __LINUX_CACHE_H */
diff --git a/xen-2.4.16/include/xeno/config.h b/xen-2.4.16/include/xeno/config.h
new file mode 100644 (file)
index 0000000..d48fadb
--- /dev/null
@@ -0,0 +1,105 @@
+/******************************************************************************
+ * config.h
+ * 
+ * A Linux-style configuration list.
+ */
+
+#ifndef __XENO_CONFIG_H__
+#define __XENO_CONFIG_H__
+
+#define CONFIG_SMP 1
+#define CONFIG_X86_LOCAL_APIC 1
+#define CONFIG_X86_IO_APIC 1
+#define CONFIG_X86_L1_CACHE_SHIFT 5
+
+#define CONFIG_PCI 1
+#define CONFIG_PCI_BIOS 1
+#define CONFIG_PCI_DIRECT 1
+
+#define CONFIG_IDE 1
+#define CONFIG_BLK_DEV_IDE 1
+#define CONFIG_BLK_DEV_IDEDMA 1
+#define CONFIG_BLK_DEV_IDEPCI 1
+#define CONFIG_IDEDISK_MULTI_MODE 1
+#define CONFIG_IDEDISK_STROKE 1
+#define CONFIG_IDEPCI_SHARE_IRQ 1
+#define CONFIG_BLK_DEV_IDEDMA_PCI 1
+#define CONFIG_IDEDMA_PCI_AUTO 1
+#define CONFIG_IDEDMA_AUTO 1
+#define CONFIG_BLK_DEV_IDE_MODES 1
+
+#define CONFIG_SCSI 1
+#define CONFIG_BLK_DEV_SD 1
+#define CONFIG_SD_EXTRA_DEVS 40
+#define CONFIG_SCSI_MULTI_LUN 1
+
+#define HZ 100
+
+/* Just to keep compiler happy. */
+#define SMP_CACHE_BYTES 64
+#define NR_CPUS 16
+#define __cacheline_aligned __attribute__((__aligned__(SMP_CACHE_BYTES)))
+#define ____cacheline_aligned __cacheline_aligned
+
+/* 0-8MB is fixed monitor space for now. */
+#define MAX_MONITOR_ADDRESS ( 16*1024*1024)
+#define MAX_DMA_ADDRESS     ( 16*1024*1024)
+#define MAX_USABLE_ADDRESS  ((0xfc000000-__PAGE_OFFSET) & ~((1<<22)-1))
+                             /*^^^^^^^^^*/
+                             /*arbitrary*/
+
+/* Linkage for x86 */
+#define FASTCALL(x)     x __attribute__((regparm(3)))
+#define asmlinkage        __attribute__((regparm(0)))
+#define __ALIGN .align 16,0x90
+#define __ALIGN_STR ".align 16,0x90"
+#define SYMBOL_NAME_STR(X) #X
+#define SYMBOL_NAME(X) X
+#define SYMBOL_NAME_LABEL(X) X##:
+#ifdef __ASSEMBLY__
+#define ALIGN __ALIGN
+#define ALIGN_STR __ALIGN_STR
+#define ENTRY(name) \
+  .globl SYMBOL_NAME(name); \
+  ALIGN; \
+  SYMBOL_NAME_LABEL(name)
+#endif
+
+/* syslog levels ==> nothing! */
+#define KERN_NOTICE
+#define KERN_WARNING
+#define KERN_DEBUG
+#define KERN_INFO
+#define KERN_ERR
+#define KERN_CRIT
+#define KERN_EMERG
+#define KERN_ALERT
+
+#define barrier() __asm__ __volatile__("": : :"memory")
+
+#define __HYPERVISOR_CS 0x30
+#define __HYPERVISOR_DS 0x38
+#define __GUEST_CS      0x11
+#define __GUEST_DS      0x19
+
+#define NR_syscalls 255
+
+#define offsetof(_p,_f) ((unsigned long)&(((_p *)0)->_f))
+#define struct_cpy(_x,_y) (memcpy((_x),(_y),sizeof(*(_x))))
+
+#define likely(_x) (_x)
+#define unlikely(_x) (_x)
+
+#define dev_probe_lock() ((void)0)
+#define dev_probe_unlock() ((void)0)
+
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+
+#define capable(_c) 0
+
+#ifndef __ASSEMBLY__
+extern unsigned long opt_ipbase, opt_nfsserv, opt_gateway, opt_netmask;
+extern unsigned char opt_nfsroot[];
+#endif
+
+#endif /* __XENO_CONFIG_H__ */
diff --git a/xen-2.4.16/include/xeno/ctype.h b/xen-2.4.16/include/xeno/ctype.h
new file mode 100644 (file)
index 0000000..afa3639
--- /dev/null
@@ -0,0 +1,54 @@
+#ifndef _LINUX_CTYPE_H
+#define _LINUX_CTYPE_H
+
+/*
+ * NOTE! This ctype does not handle EOF like the standard C
+ * library is required to.
+ */
+
+#define _U     0x01    /* upper */
+#define _L     0x02    /* lower */
+#define _D     0x04    /* digit */
+#define _C     0x08    /* cntrl */
+#define _P     0x10    /* punct */
+#define _S     0x20    /* white space (space/lf/tab) */
+#define _X     0x40    /* hex digit */
+#define _SP    0x80    /* hard space (0x20) */
+
+extern unsigned char _ctype[];
+
+#define __ismask(x) (_ctype[(int)(unsigned char)(x)])
+
+#define isalnum(c)     ((__ismask(c)&(_U|_L|_D)) != 0)
+#define isalpha(c)     ((__ismask(c)&(_U|_L)) != 0)
+#define iscntrl(c)     ((__ismask(c)&(_C)) != 0)
+#define isdigit(c)     ((__ismask(c)&(_D)) != 0)
+#define isgraph(c)     ((__ismask(c)&(_P|_U|_L|_D)) != 0)
+#define islower(c)     ((__ismask(c)&(_L)) != 0)
+#define isprint(c)     ((__ismask(c)&(_P|_U|_L|_D|_SP)) != 0)
+#define ispunct(c)     ((__ismask(c)&(_P)) != 0)
+#define isspace(c)     ((__ismask(c)&(_S)) != 0)
+#define isupper(c)     ((__ismask(c)&(_U)) != 0)
+#define isxdigit(c)    ((__ismask(c)&(_D|_X)) != 0)
+
+#define isascii(c) (((unsigned char)(c))<=0x7f)
+#define toascii(c) (((unsigned char)(c))&0x7f)
+
+static inline unsigned char __tolower(unsigned char c)
+{
+       if (isupper(c))
+               c -= 'A'-'a';
+       return c;
+}
+
+static inline unsigned char __toupper(unsigned char c)
+{
+       if (islower(c))
+               c -= 'a'-'A';
+       return c;
+}
+
+#define tolower(c) __tolower(c)
+#define toupper(c) __toupper(c)
+
+#endif
diff --git a/xen-2.4.16/include/xeno/delay.h b/xen-2.4.16/include/xeno/delay.h
new file mode 100644 (file)
index 0000000..9d70ef0
--- /dev/null
@@ -0,0 +1,10 @@
+#ifndef _LINUX_DELAY_H
+#define _LINUX_DELAY_H
+
+/* Copyright (C) 1993 Linus Torvalds */
+
+#include <asm/delay.h>
+#define mdelay(n) (\
+       {unsigned long msec=(n); while (msec--) udelay(1000);})
+
+#endif /* defined(_LINUX_DELAY_H) */
diff --git a/xen-2.4.16/include/xeno/dom0_ops.h b/xen-2.4.16/include/xeno/dom0_ops.h
new file mode 100644 (file)
index 0000000..651a2b7
--- /dev/null
@@ -0,0 +1,37 @@
+/******************************************************************************
+ * dom0_ops.h
+ * 
+ * Process command requests from domain-0 guest OS.
+ * 
+ * Copyright (c) 2002, K A Fraser
+ */
+
+#ifndef __DOM0_OPS_H__
+#define __DOM0_OPS_H__
+
+#define DOM0_NEWDOMAIN   0
+#define DOM0_KILLDOMAIN  1
+
+typedef struct dom0_newdomain_st
+{
+    unsigned int memory_kb;
+    unsigned int num_vifs;
+} dom0_newdomain_t;
+
+typedef struct dom0_killdomain_st
+{
+    unsigned int domain;
+} dom0_killdomain_t;
+
+typedef struct dom0_op_st
+{
+    unsigned long cmd;
+    union
+    {
+        dom0_newdomain_t newdomain;
+        dom0_killdomain_t killdomain;
+    }
+    u;
+} dom0_op_t;
+
+#endif
diff --git a/xen-2.4.16/include/xeno/elevator.h b/xen-2.4.16/include/xeno/elevator.h
new file mode 100644 (file)
index 0000000..1a8bb5c
--- /dev/null
@@ -0,0 +1,104 @@
+#ifndef _LINUX_ELEVATOR_H
+#define _LINUX_ELEVATOR_H
+
+typedef void (elevator_fn) (struct request *, elevator_t *,
+                           struct list_head *,
+                           struct list_head *, int);
+
+typedef int (elevator_merge_fn) (request_queue_t *, struct request **, struct list_head *,
+                                struct buffer_head *, int, int);
+
+typedef void (elevator_merge_cleanup_fn) (request_queue_t *, struct request *, int);
+
+typedef void (elevator_merge_req_fn) (struct request *, struct request *);
+
+struct elevator_s
+{
+       int read_latency;
+       int write_latency;
+
+       elevator_merge_fn *elevator_merge_fn;
+       elevator_merge_cleanup_fn *elevator_merge_cleanup_fn;
+       elevator_merge_req_fn *elevator_merge_req_fn;
+
+       unsigned int queue_ID;
+};
+
+int elevator_noop_merge(request_queue_t *, struct request **, struct list_head *, struct buffer_head *, int, int);
+void elevator_noop_merge_cleanup(request_queue_t *, struct request *, int);
+void elevator_noop_merge_req(struct request *, struct request *);
+
+int elevator_linus_merge(request_queue_t *, struct request **, struct list_head *, struct buffer_head *, int, int);
+void elevator_linus_merge_cleanup(request_queue_t *, struct request *, int);
+void elevator_linus_merge_req(struct request *, struct request *);
+
+typedef struct blkelv_ioctl_arg_s {
+       int queue_ID;
+       int read_latency;
+       int write_latency;
+       int max_bomb_segments;
+} blkelv_ioctl_arg_t;
+
+#define BLKELVGET   _IOR(0x12,106,sizeof(blkelv_ioctl_arg_t))
+#define BLKELVSET   _IOW(0x12,107,sizeof(blkelv_ioctl_arg_t))
+
+extern int blkelvget_ioctl(elevator_t *, blkelv_ioctl_arg_t *);
+extern int blkelvset_ioctl(elevator_t *, const blkelv_ioctl_arg_t *);
+
+extern void elevator_init(elevator_t *, elevator_t);
+
+/*
+ * Return values from elevator merger
+ */
+#define ELEVATOR_NO_MERGE      0
+#define ELEVATOR_FRONT_MERGE   1
+#define ELEVATOR_BACK_MERGE    2
+
+/*
+ * This is used in the elevator algorithm.  We don't prioritise reads
+ * over writes any more --- although reads are more time-critical than
+ * writes, by treating them equally we increase filesystem throughput.
+ * This turns out to give better overall performance.  -- sct
+ */
+#define IN_ORDER(s1,s2)                                \
+       ((((s1)->rq_dev == (s2)->rq_dev &&      \
+          (s1)->sector < (s2)->sector)) ||     \
+        (s1)->rq_dev < (s2)->rq_dev)
+
+#define BHRQ_IN_ORDER(bh, rq)                  \
+       ((((bh)->b_rdev == (rq)->rq_dev &&      \
+          (bh)->b_rsector < (rq)->sector)) ||  \
+        (bh)->b_rdev < (rq)->rq_dev)
+
+static inline int elevator_request_latency(elevator_t * elevator, int rw)
+{
+       int latency;
+
+       latency = elevator->read_latency;
+       if (rw != READ)
+               latency = elevator->write_latency;
+
+       return latency;
+}
+
+#define ELEVATOR_NOOP                                                  \
+((elevator_t) {                                                                \
+       0,                              /* read_latency */              \
+       0,                              /* write_latency */             \
+                                                                       \
+       elevator_noop_merge,            /* elevator_merge_fn */         \
+       elevator_noop_merge_cleanup,    /* elevator_merge_cleanup_fn */ \
+       elevator_noop_merge_req,        /* elevator_merge_req_fn */     \
+       })
+
+#define ELEVATOR_LINUS                                                 \
+((elevator_t) {                                                                \
+       8192,                           /* read passovers */            \
+       16384,                          /* write passovers */           \
+                                                                       \
+       elevator_linus_merge,           /* elevator_merge_fn */         \
+       elevator_linus_merge_cleanup,   /* elevator_merge_cleanup_fn */ \
+       elevator_linus_merge_req,       /* elevator_merge_req_fn */     \
+       })
+
+#endif
diff --git a/xen-2.4.16/include/xeno/errno.h b/xen-2.4.16/include/xeno/errno.h
new file mode 100644 (file)
index 0000000..7cf599f
--- /dev/null
@@ -0,0 +1,132 @@
+#ifndef _I386_ERRNO_H
+#define _I386_ERRNO_H
+
+#define        EPERM            1      /* Operation not permitted */
+#define        ENOENT           2      /* No such file or directory */
+#define        ESRCH            3      /* No such process */
+#define        EINTR            4      /* Interrupted system call */
+#define        EIO              5      /* I/O error */
+#define        ENXIO            6      /* No such device or address */
+#define        E2BIG            7      /* Arg list too long */
+#define        ENOEXEC          8      /* Exec format error */
+#define        EBADF            9      /* Bad file number */
+#define        ECHILD          10      /* No child processes */
+#define        EAGAIN          11      /* Try again */
+#define        ENOMEM          12      /* Out of memory */
+#define        EACCES          13      /* Permission denied */
+#define        EFAULT          14      /* Bad address */
+#define        ENOTBLK         15      /* Block device required */
+#define        EBUSY           16      /* Device or resource busy */
+#define        EEXIST          17      /* File exists */
+#define        EXDEV           18      /* Cross-device link */
+#define        ENODEV          19      /* No such device */
+#define        ENOTDIR         20      /* Not a directory */
+#define        EISDIR          21      /* Is a directory */
+#define        EINVAL          22      /* Invalid argument */
+#define        ENFILE          23      /* File table overflow */
+#define        EMFILE          24      /* Too many open files */
+#define        ENOTTY          25      /* Not a typewriter */
+#define        ETXTBSY         26      /* Text file busy */
+#define        EFBIG           27      /* File too large */
+#define        ENOSPC          28      /* No space left on device */
+#define        ESPIPE          29      /* Illegal seek */
+#define        EROFS           30      /* Read-only file system */
+#define        EMLINK          31      /* Too many links */
+#define        EPIPE           32      /* Broken pipe */
+#define        EDOM            33      /* Math argument out of domain of func */
+#define        ERANGE          34      /* Math result not representable */
+#define        EDEADLK         35      /* Resource deadlock would occur */
+#define        ENAMETOOLONG    36      /* File name too long */
+#define        ENOLCK          37      /* No record locks available */
+#define        ENOSYS          38      /* Function not implemented */
+#define        ENOTEMPTY       39      /* Directory not empty */
+#define        ELOOP           40      /* Too many symbolic links encountered */
+#define        EWOULDBLOCK     EAGAIN  /* Operation would block */
+#define        ENOMSG          42      /* No message of desired type */
+#define        EIDRM           43      /* Identifier removed */
+#define        ECHRNG          44      /* Channel number out of range */
+#define        EL2NSYNC        45      /* Level 2 not synchronized */
+#define        EL3HLT          46      /* Level 3 halted */
+#define        EL3RST          47      /* Level 3 reset */
+#define        ELNRNG          48      /* Link number out of range */
+#define        EUNATCH         49      /* Protocol driver not attached */
+#define        ENOCSI          50      /* No CSI structure available */
+#define        EL2HLT          51      /* Level 2 halted */
+#define        EBADE           52      /* Invalid exchange */
+#define        EBADR           53      /* Invalid request descriptor */
+#define        EXFULL          54      /* Exchange full */
+#define        ENOANO          55      /* No anode */
+#define        EBADRQC         56      /* Invalid request code */
+#define        EBADSLT         57      /* Invalid slot */
+
+#define        EDEADLOCK       EDEADLK
+
+#define        EBFONT          59      /* Bad font file format */
+#define        ENOSTR          60      /* Device not a stream */
+#define        ENODATA         61      /* No data available */
+#define        ETIME           62      /* Timer expired */
+#define        ENOSR           63      /* Out of streams resources */
+#define        ENONET          64      /* Machine is not on the network */
+#define        ENOPKG          65      /* Package not installed */
+#define        EREMOTE         66      /* Object is remote */
+#define        ENOLINK         67      /* Link has been severed */
+#define        EADV            68      /* Advertise error */
+#define        ESRMNT          69      /* Srmount error */
+#define        ECOMM           70      /* Communication error on send */
+#define        EPROTO          71      /* Protocol error */
+#define        EMULTIHOP       72      /* Multihop attempted */
+#define        EDOTDOT         73      /* RFS specific error */
+#define        EBADMSG         74      /* Not a data message */
+#define        EOVERFLOW       75      /* Value too large for defined data type */
+#define        ENOTUNIQ        76      /* Name not unique on network */
+#define        EBADFD          77      /* File descriptor in bad state */
+#define        EREMCHG         78      /* Remote address changed */
+#define        ELIBACC         79      /* Can not access a needed shared library */
+#define        ELIBBAD         80      /* Accessing a corrupted shared library */
+#define        ELIBSCN         81      /* .lib section in a.out corrupted */
+#define        ELIBMAX         82      /* Attempting to link in too many shared libraries */
+#define        ELIBEXEC        83      /* Cannot exec a shared library directly */
+#define        EILSEQ          84      /* Illegal byte sequence */
+#define        ERESTART        85      /* Interrupted system call should be restarted */
+#define        ESTRPIPE        86      /* Streams pipe error */
+#define        EUSERS          87      /* Too many users */
+#define        ENOTSOCK        88      /* Socket operation on non-socket */
+#define        EDESTADDRREQ    89      /* Destination address required */
+#define        EMSGSIZE        90      /* Message too long */
+#define        EPROTOTYPE      91      /* Protocol wrong type for socket */
+#define        ENOPROTOOPT     92      /* Protocol not available */
+#define        EPROTONOSUPPORT 93      /* Protocol not supported */
+#define        ESOCKTNOSUPPORT 94      /* Socket type not supported */
+#define        EOPNOTSUPP      95      /* Operation not supported on transport endpoint */
+#define        EPFNOSUPPORT    96      /* Protocol family not supported */
+#define        EAFNOSUPPORT    97      /* Address family not supported by protocol */
+#define        EADDRINUSE      98      /* Address already in use */
+#define        EADDRNOTAVAIL   99      /* Cannot assign requested address */
+#define        ENETDOWN        100     /* Network is down */
+#define        ENETUNREACH     101     /* Network is unreachable */
+#define        ENETRESET       102     /* Network dropped connection because of reset */
+#define        ECONNABORTED    103     /* Software caused connection abort */
+#define        ECONNRESET      104     /* Connection reset by peer */
+#define        ENOBUFS         105     /* No buffer space available */
+#define        EISCONN         106     /* Transport endpoint is already connected */
+#define        ENOTCONN        107     /* Transport endpoint is not connected */
+#define        ESHUTDOWN       108     /* Cannot send after transport endpoint shutdown */
+#define        ETOOMANYREFS    109     /* Too many references: cannot splice */
+#define        ETIMEDOUT       110     /* Connection timed out */
+#define        ECONNREFUSED    111     /* Connection refused */
+#define        EHOSTDOWN       112     /* Host is down */
+#define        EHOSTUNREACH    113     /* No route to host */
+#define        EALREADY        114     /* Operation already in progress */
+#define        EINPROGRESS     115     /* Operation now in progress */
+#define        ESTALE          116     /* Stale NFS file handle */
+#define        EUCLEAN         117     /* Structure needs cleaning */
+#define        ENOTNAM         118     /* Not a XENIX named type file */
+#define        ENAVAIL         119     /* No XENIX semaphores available */
+#define        EISNAM          120     /* Is a named type file */
+#define        EREMOTEIO       121     /* Remote I/O error */
+#define        EDQUOT          122     /* Quota exceeded */
+
+#define        ENOMEDIUM       123     /* No medium found */
+#define        EMEDIUMTYPE     124     /* Wrong medium type */
+
+#endif
diff --git a/xen-2.4.16/include/xeno/etherdevice.h b/xen-2.4.16/include/xeno/etherdevice.h
new file mode 100644 (file)
index 0000000..bac9b4d
--- /dev/null
@@ -0,0 +1,68 @@
+/*
+ * INET                An implementation of the TCP/IP protocol suite for the LINUX
+ *             operating system.  NET  is implemented using the  BSD Socket
+ *             interface as the means of communication with the user level.
+ *
+ *             Definitions for the Ethernet handlers.
+ *
+ * Version:    @(#)eth.h       1.0.4   05/13/93
+ *
+ * Authors:    Ross Biro, <bir7@leland.Stanford.Edu>
+ *             Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
+ *
+ *             Relocated to include/linux where it belongs by Alan Cox 
+ *                                                     <gw4pts@gw4pts.ampr.org>
+ *
+ *             This program is free software; you can redistribute it and/or
+ *             modify it under the terms of the GNU General Public License
+ *             as published by the Free Software Foundation; either version
+ *             2 of the License, or (at your option) any later version.
+ *
+ *     WARNING: This move may well be temporary. This file will get merged with others RSN.
+ *
+ */
+#ifndef _LINUX_ETHERDEVICE_H
+#define _LINUX_ETHERDEVICE_H
+
+#include <linux/if_ether.h>
+
+#ifdef __KERNEL__
+extern int             eth_header(struct sk_buff *skb, struct net_device *dev,
+                                  unsigned short type, void *daddr,
+                                  void *saddr, unsigned len);
+extern int             eth_rebuild_header(struct sk_buff *skb);
+extern unsigned short  eth_type_trans(struct sk_buff *skb, struct net_device *dev);
+extern void            eth_header_cache_update(struct hh_cache *hh, struct net_device *dev,
+                                               unsigned char * haddr);
+extern int             eth_header_cache(struct neighbour *neigh,
+                                        struct hh_cache *hh);
+extern int             eth_header_parse(struct sk_buff *skb,
+                                        unsigned char *haddr);
+extern struct net_device *init_etherdev(struct net_device *dev, int sizeof_priv);
+extern struct net_device *alloc_etherdev(int sizeof_priv);
+
+static inline void eth_copy_and_sum (struct sk_buff *dest, unsigned char *src, int len, int base)
+{
+       memcpy (dest->data, src, len);
+}
+
+/**
+ * is_valid_ether_addr - Determine if the given Ethernet address is valid
+ * @addr: Pointer to a six-byte array containing the Ethernet address
+ *
+ * Check that the Ethernet address (MAC) is not 00:00:00:00:00:00, is not
+ * a multicast address, and is not FF:FF:FF:FF:FF:FF.  The multicast
+ * and FF:FF:... tests are combined into the single test "!(addr[0]&1)".
+ *
+ * Return true if the address is valid.
+ */
+static inline int is_valid_ether_addr( u8 *addr )
+{
+       const char zaddr[6] = {0,};
+
+       return !(addr[0]&1) && memcmp( addr, zaddr, 6);
+}
+
+#endif
+
+#endif /* _LINUX_ETHERDEVICE_H */
diff --git a/xen-2.4.16/include/xeno/ethtool.h b/xen-2.4.16/include/xeno/ethtool.h
new file mode 100644 (file)
index 0000000..9b65fe1
--- /dev/null
@@ -0,0 +1,165 @@
+/*
+ * ethtool.h: Defines for Linux ethtool.
+ *
+ * Copyright (C) 1998 David S. Miller (davem@redhat.com)
+ * Copyright 2001 Jeff Garzik <jgarzik@mandrakesoft.com>
+ * Portions Copyright 2001 Sun Microsystems (thockin@sun.com)
+ */
+
+#ifndef _LINUX_ETHTOOL_H
+#define _LINUX_ETHTOOL_H
+
+
+/* This should work for both 32 and 64 bit userland. */
+struct ethtool_cmd {
+       u32     cmd;
+       u32     supported;      /* Features this interface supports */
+       u32     advertising;    /* Features this interface advertises */
+       u16     speed;          /* The forced speed, 10Mb, 100Mb, gigabit */
+       u8      duplex;         /* Duplex, half or full */
+       u8      port;           /* Which connector port */
+       u8      phy_address;
+       u8      transceiver;    /* Which tranceiver to use */
+       u8      autoneg;        /* Enable or disable autonegotiation */
+       u32     maxtxpkt;       /* Tx pkts before generating tx int */
+       u32     maxrxpkt;       /* Rx pkts before generating rx int */
+       u32     reserved[4];
+};
+
+#define ETHTOOL_BUSINFO_LEN    32
+/* these strings are set to whatever the driver author decides... */
+struct ethtool_drvinfo {
+       u32     cmd;
+       char    driver[32];     /* driver short name, "tulip", "eepro100" */
+       char    version[32];    /* driver version string */
+       char    fw_version[32]; /* firmware version string, if applicable */
+       char    bus_info[ETHTOOL_BUSINFO_LEN];  /* Bus info for this IF. */
+                               /* For PCI devices, use pci_dev->slot_name. */
+       char    reserved1[32];
+       char    reserved2[24];
+       u32     eedump_len;     /* Size of data from ETHTOOL_GEEPROM (bytes) */
+       u32     regdump_len;    /* Size of data from ETHTOOL_GREGS (bytes) */
+};
+
+#define SOPASS_MAX     6
+/* wake-on-lan settings */
+struct ethtool_wolinfo {
+       u32     cmd;
+       u32     supported;
+       u32     wolopts;
+       u8      sopass[SOPASS_MAX]; /* SecureOn(tm) password */
+};
+
+/* for passing single values */
+struct ethtool_value {
+       u32     cmd;
+       u32     data;
+};
+
+/* for passing big chunks of data */
+struct ethtool_regs {
+       u32     cmd;
+       u32     version; /* driver-specific, indicates different chips/revs */
+       u32     len; /* bytes */
+       u8      data[0];
+};
+
+/* for passing EEPROM chunks */
+struct ethtool_eeprom {
+       u32     cmd;
+       u32     magic;
+       u32     offset; /* in bytes */
+       u32     len; /* in bytes */
+       u8      data[0];
+};
+/* CMDs currently supported */
+#define ETHTOOL_GSET           0x00000001 /* Get settings. */
+#define ETHTOOL_SSET           0x00000002 /* Set settings, privileged. */
+#define ETHTOOL_GDRVINFO       0x00000003 /* Get driver info. */
+#define ETHTOOL_GREGS          0x00000004 /* Get NIC registers, privileged. */
+#define ETHTOOL_GWOL           0x00000005 /* Get wake-on-lan options. */
+#define ETHTOOL_SWOL           0x00000006 /* Set wake-on-lan options, priv. */
+#define ETHTOOL_GMSGLVL                0x00000007 /* Get driver message level */
+#define ETHTOOL_SMSGLVL                0x00000008 /* Set driver msg level, priv. */
+#define ETHTOOL_NWAY_RST       0x00000009 /* Restart autonegotiation, priv. */
+#define ETHTOOL_GLINK          0x0000000a /* Get link status */
+#define ETHTOOL_GEEPROM                0x0000000b /* Get EEPROM data */
+#define ETHTOOL_SEEPROM                0x0000000c /* Set EEPROM data */
+
+/* compatibility with older code */
+#define SPARC_ETH_GSET         ETHTOOL_GSET
+#define SPARC_ETH_SSET         ETHTOOL_SSET
+
+/* Indicates what features are supported by the interface. */
+#define SUPPORTED_10baseT_Half         (1 << 0)
+#define SUPPORTED_10baseT_Full         (1 << 1)
+#define SUPPORTED_100baseT_Half                (1 << 2)
+#define SUPPORTED_100baseT_Full                (1 << 3)
+#define SUPPORTED_1000baseT_Half       (1 << 4)
+#define SUPPORTED_1000baseT_Full       (1 << 5)
+#define SUPPORTED_Autoneg              (1 << 6)
+#define SUPPORTED_TP                   (1 << 7)
+#define SUPPORTED_AUI                  (1 << 8)
+#define SUPPORTED_MII                  (1 << 9)
+#define SUPPORTED_FIBRE                        (1 << 10)
+#define SUPPORTED_BNC                  (1 << 11)
+
+/* Indicates what features are advertised by the interface. */
+#define ADVERTISED_10baseT_Half                (1 << 0)
+#define ADVERTISED_10baseT_Full                (1 << 1)
+#define ADVERTISED_100baseT_Half       (1 << 2)
+#define ADVERTISED_100baseT_Full       (1 << 3)
+#define ADVERTISED_1000baseT_Half      (1 << 4)
+#define ADVERTISED_1000baseT_Full      (1 << 5)
+#define ADVERTISED_Autoneg             (1 << 6)
+#define ADVERTISED_TP                  (1 << 7)
+#define ADVERTISED_AUI                 (1 << 8)
+#define ADVERTISED_MII                 (1 << 9)
+#define ADVERTISED_FIBRE               (1 << 10)
+#define ADVERTISED_BNC                 (1 << 11)
+
+/* The following are all involved in forcing a particular link
+ * mode for the device for setting things.  When getting the
+ * devices settings, these indicate the current mode and whether
+ * it was foced up into this mode or autonegotiated.
+ */
+
+/* The forced speed, 10Mb, 100Mb, gigabit. */
+#define SPEED_10               10
+#define SPEED_100              100
+#define SPEED_1000             1000
+
+/* Duplex, half or full. */
+#define DUPLEX_HALF            0x00
+#define DUPLEX_FULL            0x01
+
+/* Which connector port. */
+#define PORT_TP                        0x00
+#define PORT_AUI               0x01
+#define PORT_MII               0x02
+#define PORT_FIBRE             0x03
+#define PORT_BNC               0x04
+
+/* Which tranceiver to use. */
+#define XCVR_INTERNAL          0x00
+#define XCVR_EXTERNAL          0x01
+#define XCVR_DUMMY1            0x02
+#define XCVR_DUMMY2            0x03
+#define XCVR_DUMMY3            0x04
+
+/* Enable or disable autonegotiation.  If this is set to enable,
+ * the forced link modes above are completely ignored.
+ */
+#define AUTONEG_DISABLE                0x00
+#define AUTONEG_ENABLE         0x01
+
+/* Wake-On-Lan options. */
+#define WAKE_PHY               (1 << 0)
+#define WAKE_UCAST             (1 << 1)
+#define WAKE_MCAST             (1 << 2)
+#define WAKE_BCAST             (1 << 3)
+#define WAKE_ARP               (1 << 4)
+#define WAKE_MAGIC             (1 << 5)
+#define WAKE_MAGICSECURE       (1 << 6) /* only meaningful if WAKE_MAGIC */
+
+#endif /* _LINUX_ETHTOOL_H */
diff --git a/xen-2.4.16/include/xeno/event.h b/xen-2.4.16/include/xeno/event.h
new file mode 100644 (file)
index 0000000..fdb9fed
--- /dev/null
@@ -0,0 +1,101 @@
+/******************************************************************************
+ * event.h
+ * 
+ * A nice interface for passing asynchronous events to guest OSes.
+ * 
+ * Copyright (c) 2002, K A Fraser
+ */
+
+#include <xeno/config.h>
+#include <xeno/sched.h>
+#include <asm/bitops.h>
+
+#ifdef CONFIG_SMP
+
+/*
+ * mark_guest_event:
+ *  @p:        Domain to which event should be passed
+ *  @event:    Event number
+ *  RETURNS:   "Bitmask" of CPU on which process is currently running
+ * 
+ * Idea is that caller may loop on task_list, looking for domains
+ * to pass events to (using this function). The caller accumulates the
+ * bits returned by this function (ORing them together) then calls
+ * event_notify().
+ * 
+ * Guest_events are per-domain events passed directly to the guest OS
+ * in ring 1. 
+ */
+static inline unsigned long mark_guest_event(struct task_struct *p, int event)
+{
+    set_bit(event, &p->shared_info->events);
+
+    /*
+     * No need for the runqueue_lock! The check below does not race
+     * with the setting of has_cpu, because that is set with runqueue_lock
+     * held. The lock must be released before hypervisor exit (and so
+     * a write barrier executed). And, just before hypervisor exit, 
+     * outstanding events are checked. So bit is certainly set early enough.
+     */
+    smp_mb();
+    if ( p->state == TASK_INTERRUPTIBLE ) wake_up(p);
+    reschedule(p);
+    return p->has_cpu ? (1 << p->processor) : 0;
+}
+
+/* As above, but hyp_events are handled within the hypervisor. */
+static inline unsigned long mark_hyp_event(struct task_struct *p, int event)
+{
+    set_bit(event, &p->hyp_events);
+    smp_mb();
+    if ( p->state == TASK_INTERRUPTIBLE ) wake_up(p);
+    reschedule(p);
+    return p->has_cpu ? (1 << p->processor) : 0;
+}
+
+/* Notify the given set of CPUs that guest events may be outstanding. */
+static inline void guest_event_notify(unsigned long cpu_mask)
+{
+    cpu_mask &= ~(1 << smp_processor_id());
+    if ( cpu_mask != 0 ) smp_send_event_check_mask(cpu_mask);
+}
+
+#else
+
+static inline unsigned long mark_guest_event(struct task_struct *p, int event)
+{
+    set_bit(event, &p->shared_info->events);
+    if ( p->state == TASK_INTERRUPTIBLE ) wake_up(p);
+    reschedule(p);
+    return 0;
+}
+
+static inline unsigned long mark_hyp_event(struct task_struct *p, int event)
+{
+    set_bit(event, &p->hyp_events);
+    if ( p->state == TASK_INTERRUPTIBLE ) wake_up(p);
+    reschedule(p);
+    return 0;
+}
+
+#define guest_event_notify(_mask) ((void)0)
+
+#endif
+
+/* Notify hypervisor events in thesame way as for guest OS events. */
+#define hyp_event_notify(_mask) guest_event_notify(_mask)
+
+/* Clear a guest-OS event from a per-domain mask. */
+static inline void clear_guest_event(struct task_struct *p, int event)
+{
+    clear_bit(event, &p->shared_info->events);
+}
+
+/* Clear a hypervisor event from a per-domain mask. */
+static inline void clear_hyp_event(struct task_struct *p, int event)
+{
+    clear_bit(event, &p->hyp_events);
+}
+
+/* Called on return from (architecture-dependent) entry.S. */
+void do_hyp_events(void);
diff --git a/xen-2.4.16/include/xeno/genhd.h b/xen-2.4.16/include/xeno/genhd.h
new file mode 100644 (file)
index 0000000..58a1734
--- /dev/null
@@ -0,0 +1,313 @@
+#ifndef _LINUX_GENHD_H
+#define _LINUX_GENHD_H
+
+/*
+ *     genhd.h Copyright (C) 1992 Drew Eckhardt
+ *     Generic hard disk header file by  
+ *             Drew Eckhardt
+ *
+ *             <drew@colorado.edu>
+ */
+
+#include <xeno/config.h>
+#include <xeno/types.h>
+#include <xeno/major.h>
+
+enum {
+/* These three have identical behaviour; use the second one if DOS fdisk gets
+   confused about extended/logical partitions starting past cylinder 1023. */
+       DOS_EXTENDED_PARTITION = 5,
+       LINUX_EXTENDED_PARTITION = 0x85,
+       WIN98_EXTENDED_PARTITION = 0x0f,
+
+       LINUX_SWAP_PARTITION = 0x82,
+       LINUX_RAID_PARTITION = 0xfd,    /* autodetect RAID partition */
+
+       SOLARIS_X86_PARTITION = LINUX_SWAP_PARTITION,
+
+       DM6_PARTITION = 0x54,   /* has DDO: use xlated geom & offset */
+       EZD_PARTITION = 0x55,   /* EZ-DRIVE */
+       DM6_AUX1PARTITION = 0x51,       /* no DDO:  use xlated geom */
+       DM6_AUX3PARTITION = 0x53,       /* no DDO:  use xlated geom */
+
+       FREEBSD_PARTITION = 0xa5,    /* FreeBSD Partition ID */
+       OPENBSD_PARTITION = 0xa6,    /* OpenBSD Partition ID */
+       NETBSD_PARTITION = 0xa9,   /* NetBSD Partition ID */
+       BSDI_PARTITION = 0xb7,    /* BSDI Partition ID */
+/* Ours is not to wonder why.. */
+       BSD_PARTITION = FREEBSD_PARTITION,
+       MINIX_PARTITION = 0x81,  /* Minix Partition ID */
+       PLAN9_PARTITION = 0x39,  /* Plan 9 Partition ID */
+       UNIXWARE_PARTITION = 0x63,              /* Partition ID, same as */
+                                               /* GNU_HURD and SCO Unix */
+};
+
+struct partition {
+       unsigned char boot_ind;         /* 0x80 - active */
+       unsigned char head;             /* starting head */
+       unsigned char sector;           /* starting sector */
+       unsigned char cyl;              /* starting cylinder */
+       unsigned char sys_ind;          /* What partition type */
+       unsigned char end_head;         /* end head */
+       unsigned char end_sector;       /* end sector */
+       unsigned char end_cyl;          /* end cylinder */
+       unsigned int start_sect;        /* starting sector counting from 0 */
+       unsigned int nr_sects;          /* nr of sectors in partition */
+} __attribute__((packed));
+
+#ifdef __KERNEL__
+/*#  include <linux/devfs_fs_kernel.h>*/
+
+struct hd_struct {
+       unsigned long start_sect;
+       unsigned long nr_sects;
+    /*devfs_handle_t de;*/              /* primary (master) devfs entry  */
+       int number;                     /* stupid old code wastes space  */
+
+       /* Performance stats: */
+       unsigned int ios_in_flight;
+       unsigned int io_ticks;
+       unsigned int last_idle_time;
+       unsigned int last_queue_change;
+       unsigned int aveq;
+       
+       unsigned int rd_ios;
+       unsigned int rd_merges;
+       unsigned int rd_ticks;
+       unsigned int rd_sectors;
+       unsigned int wr_ios;
+       unsigned int wr_merges;
+       unsigned int wr_ticks;
+       unsigned int wr_sectors;        
+};
+
+#define GENHD_FL_REMOVABLE  1
+
+struct gendisk {
+       int major;                      /* major number of driver */
+       const char *major_name;         /* name of major driver */
+       int minor_shift;                /* number of times minor is shifted to
+                                          get real minor */
+       int max_p;                      /* maximum partitions per device */
+
+       struct hd_struct *part;         /* [indexed by minor] */
+       int *sizes;                     /* [idem], device size in blocks */
+       int nr_real;                    /* number of real devices */
+
+       void *real_devices;             /* internal use */
+       struct gendisk *next;
+       struct block_device_operations *fops;
+
+    /*devfs_handle_t *de_arr;*/         /* one per physical disc */
+       char *flags;                    /* one per physical disc */
+};
+
+/* drivers/block/genhd.c */
+extern struct gendisk *gendisk_head;
+
+extern void add_gendisk(struct gendisk *gp);
+extern void del_gendisk(struct gendisk *gp);
+extern struct gendisk *get_gendisk(kdev_t dev);
+extern int walk_gendisk(int (*walk)(struct gendisk *, void *), void *);
+
+#endif  /*  __KERNEL__  */
+
+#ifdef CONFIG_SOLARIS_X86_PARTITION
+
+#define SOLARIS_X86_NUMSLICE   8
+#define SOLARIS_X86_VTOC_SANE  (0x600DDEEEUL)
+
+struct solaris_x86_slice {
+       ushort  s_tag;                  /* ID tag of partition */
+       ushort  s_flag;                 /* permission flags */
+       unsigned int s_start;           /* start sector no of partition */
+       unsigned int s_size;            /* # of blocks in partition */
+};
+
+struct solaris_x86_vtoc {
+       unsigned int v_bootinfo[3];     /* info needed by mboot (unsupported) */
+       unsigned int v_sanity;          /* to verify vtoc sanity */
+       unsigned int v_version;         /* layout version */
+       char    v_volume[8];            /* volume name */
+       ushort  v_sectorsz;             /* sector size in bytes */
+       ushort  v_nparts;               /* number of partitions */
+       unsigned int v_reserved[10];    /* free space */
+       struct solaris_x86_slice
+               v_slice[SOLARIS_X86_NUMSLICE]; /* slice headers */
+       unsigned int timestamp[SOLARIS_X86_NUMSLICE]; /* timestamp (unsupported) */
+       char    v_asciilabel[128];      /* for compatibility */
+};
+
+#endif /* CONFIG_SOLARIS_X86_PARTITION */
+
+#ifdef CONFIG_BSD_DISKLABEL
+/*
+ * BSD disklabel support by Yossi Gottlieb <yogo@math.tau.ac.il>
+ * updated by Marc Espie <Marc.Espie@openbsd.org>
+ */
+
+/* check against BSD src/sys/sys/disklabel.h for consistency */
+
+#define BSD_DISKMAGIC  (0x82564557UL)  /* The disk magic number */
+#define BSD_MAXPARTITIONS      8
+#define OPENBSD_MAXPARTITIONS  16
+#define BSD_FS_UNUSED          0       /* disklabel unused partition entry ID */
+struct bsd_disklabel {
+       __u32   d_magic;                /* the magic number */
+       __s16   d_type;                 /* drive type */
+       __s16   d_subtype;              /* controller/d_type specific */
+       char    d_typename[16];         /* type name, e.g. "eagle" */
+       char    d_packname[16];                 /* pack identifier */ 
+       __u32   d_secsize;              /* # of bytes per sector */
+       __u32   d_nsectors;             /* # of data sectors per track */
+       __u32   d_ntracks;              /* # of tracks per cylinder */
+       __u32   d_ncylinders;           /* # of data cylinders per unit */
+       __u32   d_secpercyl;            /* # of data sectors per cylinder */
+       __u32   d_secperunit;           /* # of data sectors per unit */
+       __u16   d_sparespertrack;       /* # of spare sectors per track */
+       __u16   d_sparespercyl;         /* # of spare sectors per cylinder */
+       __u32   d_acylinders;           /* # of alt. cylinders per unit */
+       __u16   d_rpm;                  /* rotational speed */
+       __u16   d_interleave;           /* hardware sector interleave */
+       __u16   d_trackskew;            /* sector 0 skew, per track */
+       __u16   d_cylskew;              /* sector 0 skew, per cylinder */
+       __u32   d_headswitch;           /* head switch time, usec */
+       __u32   d_trkseek;              /* track-to-track seek, usec */
+       __u32   d_flags;                /* generic flags */
+#define NDDATA 5
+       __u32   d_drivedata[NDDATA];    /* drive-type specific information */
+#define NSPARE 5
+       __u32   d_spare[NSPARE];        /* reserved for future use */
+       __u32   d_magic2;               /* the magic number (again) */
+       __u16   d_checksum;             /* xor of data incl. partitions */
+
+                       /* filesystem and partition information: */
+       __u16   d_npartitions;          /* number of partitions in following */
+       __u32   d_bbsize;               /* size of boot area at sn0, bytes */
+       __u32   d_sbsize;               /* max size of fs superblock, bytes */
+       struct  bsd_partition {         /* the partition table */
+               __u32   p_size;         /* number of sectors in partition */
+               __u32   p_offset;       /* starting sector */
+               __u32   p_fsize;        /* filesystem basic fragment size */
+               __u8    p_fstype;       /* filesystem type, see below */
+               __u8    p_frag;         /* filesystem fragments per block */
+               __u16   p_cpg;          /* filesystem cylinders per group */
+       } d_partitions[BSD_MAXPARTITIONS];      /* actually may be more */
+};
+
+#endif /* CONFIG_BSD_DISKLABEL */
+
+#ifdef CONFIG_UNIXWARE_DISKLABEL
+/*
+ * Unixware slices support by Andrzej Krzysztofowicz <ankry@mif.pg.gda.pl>
+ * and Krzysztof G. Baranowski <kgb@knm.org.pl>
+ */
+
+#define UNIXWARE_DISKMAGIC     (0xCA5E600DUL)  /* The disk magic number */
+#define UNIXWARE_DISKMAGIC2    (0x600DDEEEUL)  /* The slice table magic nr */
+#define UNIXWARE_NUMSLICE      16
+#define UNIXWARE_FS_UNUSED     0               /* Unused slice entry ID */
+
+struct unixware_slice {
+       __u16   s_label;        /* label */
+       __u16   s_flags;        /* permission flags */
+       __u32   start_sect;     /* starting sector */
+       __u32   nr_sects;       /* number of sectors in slice */
+};
+
+struct unixware_disklabel {
+       __u32   d_type;                 /* drive type */
+       __u32   d_magic;                /* the magic number */
+       __u32   d_version;              /* version number */
+       char    d_serial[12];           /* serial number of the device */
+       __u32   d_ncylinders;           /* # of data cylinders per device */
+       __u32   d_ntracks;              /* # of tracks per cylinder */
+       __u32   d_nsectors;             /* # of data sectors per track */
+       __u32   d_secsize;              /* # of bytes per sector */
+       __u32   d_part_start;           /* # of first sector of this partition */
+       __u32   d_unknown1[12];         /* ? */
+       __u32   d_alt_tbl;              /* byte offset of alternate table */
+       __u32   d_alt_len;              /* byte length of alternate table */
+       __u32   d_phys_cyl;             /* # of physical cylinders per device */
+       __u32   d_phys_trk;             /* # of physical tracks per cylinder */
+       __u32   d_phys_sec;             /* # of physical sectors per track */
+       __u32   d_phys_bytes;           /* # of physical bytes per sector */
+       __u32   d_unknown2;             /* ? */
+       __u32   d_unknown3;             /* ? */
+       __u32   d_pad[8];               /* pad */
+
+       struct unixware_vtoc {
+               __u32   v_magic;                /* the magic number */
+               __u32   v_version;              /* version number */
+               char    v_name[8];              /* volume name */
+               __u16   v_nslices;              /* # of slices */
+               __u16   v_unknown1;             /* ? */
+               __u32   v_reserved[10];         /* reserved */
+               struct unixware_slice
+                       v_slice[UNIXWARE_NUMSLICE];     /* slice headers */
+       } vtoc;
+
+};  /* 408 */
+
+#endif /* CONFIG_UNIXWARE_DISKLABEL */
+
+#ifdef CONFIG_MINIX_SUBPARTITION
+#   define MINIX_NR_SUBPARTITIONS  4
+#endif /* CONFIG_MINIX_SUBPARTITION */
+
+#ifdef __KERNEL__
+
+char *disk_name (struct gendisk *hd, int minor, char *buf);
+
+/*
+ * disk_round_stats is used to round off the IO statistics for a disk
+ * for a complete clock tick.
+ */
+void disk_round_stats(struct hd_struct *hd);
+
+/* 
+ * Account for the completion of an IO request (used by drivers which 
+ * bypass the normal end_request processing) 
+ */
+struct request;
+void req_finished_io(struct request *);
+
+#ifdef DEVFS_MUST_DIE
+extern void devfs_register_partitions (struct gendisk *dev, int minor,
+                                      int unregister);
+#endif
+
+
+
+/*
+ * FIXME: this should use genhd->minor_shift, but that is slow to look up.
+ */
+static inline unsigned int disk_index (kdev_t dev)
+{
+       int major = MAJOR(dev);
+       int minor = MINOR(dev);
+       unsigned int index;
+
+       switch (major) {
+               case DAC960_MAJOR+0:
+                       index = (minor & 0x00f8) >> 3;
+                       break;
+               case SCSI_DISK0_MAJOR:
+                       index = (minor & 0x00f0) >> 4;
+                       break;
+               case IDE0_MAJOR:        /* same as HD_MAJOR */
+               case XT_DISK_MAJOR:
+                       index = (minor & 0x0040) >> 6;
+                       break;
+               case IDE1_MAJOR:
+                       index = ((minor & 0x0040) >> 6) + 2;
+                       break;
+               default:
+                       return 0;
+       }
+       return index;
+}
+
+#endif
+
+#endif
diff --git a/xen-2.4.16/include/xeno/hdreg.h b/xen-2.4.16/include/xeno/hdreg.h
new file mode 100644 (file)
index 0000000..703b750
--- /dev/null
@@ -0,0 +1,662 @@
+#ifndef _LINUX_HDREG_H
+#define _LINUX_HDREG_H
+
+/*
+ * This file contains some defines for the AT-hd-controller.
+ * Various sources.  
+ */
+
+#define HD_IRQ 14                      /* the standard disk interrupt */
+
+/* ide.c has its own port definitions in "ide.h" */
+
+/* Hd controller regs. Ref: IBM AT Bios-listing */
+#define HD_DATA                0x1f0           /* _CTL when writing */
+#define HD_ERROR       0x1f1           /* see err-bits */
+#define HD_NSECTOR     0x1f2           /* nr of sectors to read/write */
+#define HD_SECTOR      0x1f3           /* starting sector */
+#define HD_LCYL                0x1f4           /* starting cylinder */
+#define HD_HCYL                0x1f5           /* high byte of starting cyl */
+#define HD_CURRENT     0x1f6           /* 101dhhhh , d=drive, hhhh=head */
+#define HD_STATUS      0x1f7           /* see status-bits */
+#define HD_FEATURE     HD_ERROR        /* same io address, read=error, write=feature */
+#define HD_PRECOMP     HD_FEATURE      /* obsolete use of this port - predates IDE */
+#define HD_COMMAND     HD_STATUS       /* same io address, read=status, write=cmd */
+
+#define HD_CMD         0x3f6           /* used for resets */
+#define HD_ALTSTATUS   0x3f6           /* same as HD_STATUS but doesn't clear irq */
+
+/* remainder is shared between hd.c, ide.c, ide-cd.c, and the hdparm utility */
+
+/* Bits of HD_STATUS */
+#define ERR_STAT               0x01
+#define INDEX_STAT             0x02
+#define ECC_STAT               0x04    /* Corrected error */
+#define DRQ_STAT               0x08
+#define SEEK_STAT              0x10
+#define WRERR_STAT             0x20
+#define READY_STAT             0x40
+#define BUSY_STAT              0x80
+
+/* Bits for HD_ERROR */
+#define MARK_ERR               0x01    /* Bad address mark */
+#define TRK0_ERR               0x02    /* couldn't find track 0 */
+#define ABRT_ERR               0x04    /* Command aborted */
+#define MCR_ERR                        0x08    /* media change request */
+#define ID_ERR                 0x10    /* ID field not found */
+#define MC_ERR                 0x20    /* media changed */
+#define ECC_ERR                        0x40    /* Uncorrectable ECC error */
+#define BBD_ERR                        0x80    /* pre-EIDE meaning:  block marked bad */
+#define ICRC_ERR               0x80    /* new meaning:  CRC error during transfer */
+
+/*
+ * Command Header sizes for IOCTL commands
+ *     HDIO_DRIVE_CMD, HDIO_DRIVE_TASK, and HDIO_DRIVE_TASKFILE
+ */
+
+#if 0
+#include <asm/hdreg.h>
+typedef ide_ioreg_t task_ioreg_t;
+#else
+typedef unsigned char task_ioreg_t;
+#endif
+
+#define HDIO_DRIVE_CMD_HDR_SIZE                4*sizeof(task_ioreg_t)
+#define HDIO_DRIVE_TASK_HDR_SIZE       8*sizeof(task_ioreg_t)
+#define HDIO_DRIVE_HOB_HDR_SIZE                8*sizeof(task_ioreg_t)
+
+#define IDE_DRIVE_TASK_INVALID         -1
+#define IDE_DRIVE_TASK_NO_DATA         0
+#define IDE_DRIVE_TASK_SET_XFER                1
+
+#define IDE_DRIVE_TASK_IN              2
+
+#define IDE_DRIVE_TASK_OUT             3
+#define IDE_DRIVE_TASK_RAW_WRITE       4
+
+struct hd_drive_cmd_hdr {
+       task_ioreg_t command;
+       task_ioreg_t sector_number;
+       task_ioreg_t feature;
+       task_ioreg_t sector_count;
+};
+
+typedef struct hd_drive_task_hdr {
+       task_ioreg_t data;
+       task_ioreg_t feature;
+       task_ioreg_t sector_count;
+       task_ioreg_t sector_number;
+       task_ioreg_t low_cylinder;
+       task_ioreg_t high_cylinder;
+       task_ioreg_t device_head;
+       task_ioreg_t command;
+} task_struct_t;
+
+typedef struct hd_drive_hob_hdr {
+       task_ioreg_t data;
+       task_ioreg_t feature;
+       task_ioreg_t sector_count;
+       task_ioreg_t sector_number;
+       task_ioreg_t low_cylinder;
+       task_ioreg_t high_cylinder;
+       task_ioreg_t device_head;
+       task_ioreg_t control;
+} hob_struct_t;
+
+typedef union ide_reg_valid_s {
+       unsigned all                            : 16;
+       struct {
+               unsigned data                   : 1;
+               unsigned error_feature          : 1;
+               unsigned sector                 : 1;
+               unsigned nsector                : 1;
+               unsigned lcyl                   : 1;
+               unsigned hcyl                   : 1;
+               unsigned select                 : 1;
+               unsigned status_command         : 1;
+
+               unsigned data_hob               : 1;
+               unsigned error_feature_hob      : 1;
+               unsigned sector_hob             : 1;
+               unsigned nsector_hob            : 1;
+               unsigned lcyl_hob               : 1;
+               unsigned hcyl_hob               : 1;
+               unsigned select_hob             : 1;
+               unsigned control_hob            : 1;
+       } b;
+} ide_reg_valid_t;
+
+/*
+ * Define standard taskfile in/out register
+ */
+#define IDE_TASKFILE_STD_OUT_FLAGS     0xFE
+#define IDE_TASKFILE_STD_IN_FLAGS      0xFE
+#define IDE_HOB_STD_OUT_FLAGS          0xC0
+#define IDE_HOB_STD_IN_FLAGS           0xC0
+
+typedef struct ide_task_request_s {
+       task_ioreg_t    io_ports[8];
+       task_ioreg_t    hob_ports[8];
+       ide_reg_valid_t out_flags;
+       ide_reg_valid_t in_flags;
+       int             data_phase;
+       int             req_cmd;
+       unsigned long   out_size;
+       unsigned long   in_size;
+} ide_task_request_t;
+
+typedef struct ide_ioctl_request_s {
+       ide_task_request_t      *task_request;
+       unsigned char           *out_buffer;
+       unsigned char           *in_buffer;
+} ide_ioctl_request_t;
+
+#define TASKFILE_INVALID               0x7fff
+#define TASKFILE_48                    0x8000
+
+#define TASKFILE_NO_DATA               0x0000
+
+#define TASKFILE_IN                    0x0001
+#define TASKFILE_MULTI_IN              0x0002
+
+#define TASKFILE_OUT                   0x0004
+#define TASKFILE_MULTI_OUT             0x0008
+#define TASKFILE_IN_OUT                        0x0010
+
+#define TASKFILE_IN_DMA                        0x0020
+#define TASKFILE_OUT_DMA               0x0040
+#define TASKFILE_IN_DMAQ               0x0080
+#define TASKFILE_OUT_DMAQ              0x0100
+
+#define TASKFILE_P_IN                  0x0200
+#define TASKFILE_P_OUT                 0x0400
+#define TASKFILE_P_IN_DMA              0x0800
+#define TASKFILE_P_OUT_DMA             0x1000
+#define TASKFILE_P_IN_DMAQ             0x2000
+#define TASKFILE_P_OUT_DMAQ            0x4000
+
+/* ATA/ATAPI Commands pre T13 Spec */
+#define WIN_NOP                                0x00
+#define CFA_REQ_EXT_ERROR_CODE         0x03 /* CFA Request Extended Error Code */
+#define WIN_SRST                       0x08 /* ATAPI soft reset command */
+#define WIN_DEVICE_RESET               0x08
+#define WIN_RESTORE                    0x10
+#define WIN_READ                       0x20 /* 28-Bit */
+#define WIN_READ_EXT                   0x24 /* 48-Bit */
+#define WIN_READDMA_EXT                        0x25 /* 48-Bit */
+#define WIN_READDMA_QUEUED_EXT         0x26 /* 48-Bit */
+#define WIN_READ_NATIVE_MAX_EXT                0x27 /* 48-Bit */
+#define WIN_MULTREAD_EXT               0x29 /* 48-Bit */
+#define WIN_WRITE                      0x30 /* 28-Bit */
+#define WIN_WRITE_EXT                  0x34 /* 48-Bit */
+#define WIN_WRITEDMA_EXT               0x35 /* 48-Bit */
+#define WIN_WRITEDMA_QUEUED_EXT                0x36 /* 48-Bit */
+#define WIN_SET_MAX_EXT                        0x37 /* 48-Bit */
+#define CFA_WRITE_SECT_WO_ERASE                0x38 /* CFA Write Sectors without erase */
+#define WIN_MULTWRITE_EXT              0x39 /* 48-Bit */
+#define WIN_WRITE_VERIFY               0x3C /* 28-Bit */
+#define WIN_VERIFY                     0x40 /* 28-Bit - Read Verify Sectors */
+#define WIN_VERIFY_EXT                 0x42 /* 48-Bit */
+#define WIN_FORMAT                     0x50
+#define WIN_INIT                       0x60
+#define WIN_SEEK                       0x70
+#define CFA_TRANSLATE_SECTOR           0x87 /* CFA Translate Sector */
+#define WIN_DIAGNOSE                   0x90
+#define WIN_SPECIFY                    0x91 /* set drive geometry translation */
+#define WIN_DOWNLOAD_MICROCODE         0x92
+#define WIN_STANDBYNOW2                        0x94
+#define WIN_SETIDLE2                   0x97
+#define WIN_CHECKPOWERMODE2            0x98
+#define WIN_SLEEPNOW2                  0x99
+#define WIN_PACKETCMD                  0xA0 /* Send a packet command. */
+#define WIN_PIDENTIFY                  0xA1 /* identify ATAPI device   */
+#define WIN_QUEUED_SERVICE             0xA2
+#define WIN_SMART                      0xB0 /* self-monitoring and reporting */
+#define CFA_ERASE_SECTORS              0xC0
+#define WIN_MULTREAD                   0xC4 /* read sectors using multiple mode*/
+#define WIN_MULTWRITE                  0xC5 /* write sectors using multiple mode */
+#define WIN_SETMULT                    0xC6 /* enable/disable multiple mode */
+#define WIN_READDMA_QUEUED             0xC7 /* read sectors using Queued DMA transfers */
+#define WIN_READDMA                    0xC8 /* read sectors using DMA transfers */
+#define WIN_WRITEDMA                   0xCA /* write sectors using DMA transfers */
+#define WIN_WRITEDMA_QUEUED            0xCC /* write sectors using Queued DMA transfers */
+#define CFA_WRITE_MULTI_WO_ERASE       0xCD /* CFA Write multiple without erase */
+#define WIN_GETMEDIASTATUS             0xDA    
+#define WIN_DOORLOCK                   0xDE /* lock door on removable drives */
+#define WIN_DOORUNLOCK                 0xDF /* unlock door on removable drives */
+#define WIN_STANDBYNOW1                        0xE0
+#define WIN_IDLEIMMEDIATE              0xE1 /* force drive to become "ready" */
+#define WIN_STANDBY                    0xE2 /* Set device in Standby Mode */
+#define WIN_SETIDLE1                   0xE3
+#define WIN_READ_BUFFER                        0xE4 /* force read only 1 sector */
+#define WIN_CHECKPOWERMODE1            0xE5
+#define WIN_SLEEPNOW1                  0xE6
+#define WIN_FLUSH_CACHE                        0xE7
+#define WIN_WRITE_BUFFER               0xE8 /* force write only 1 sector */
+#define WIN_FLUSH_CACHE_EXT            0xEA /* 48-Bit */
+#define WIN_IDENTIFY                   0xEC /* ask drive to identify itself    */
+#define WIN_MEDIAEJECT                 0xED
+#define WIN_IDENTIFY_DMA               0xEE /* same as WIN_IDENTIFY, but DMA */
+#define WIN_SETFEATURES                        0xEF /* set special drive features */
+#define EXABYTE_ENABLE_NEST            0xF0
+#define WIN_SECURITY_SET_PASS          0xF1
+#define WIN_SECURITY_UNLOCK            0xF2
+#define WIN_SECURITY_ERASE_PREPARE     0xF3
+#define WIN_SECURITY_ERASE_UNIT                0xF4
+#define WIN_SECURITY_FREEZE_LOCK       0xF5
+#define WIN_SECURITY_DISABLE           0xF6
+#define WIN_READ_NATIVE_MAX            0xF8 /* return the native maximum address */
+#define WIN_SET_MAX                    0xF9
+#define DISABLE_SEAGATE                        0xFB
+
+/* WIN_SMART sub-commands */
+
+#define SMART_READ_VALUES              0xD0
+#define SMART_READ_THRESHOLDS          0xD1
+#define SMART_AUTOSAVE                 0xD2
+#define SMART_SAVE                     0xD3
+#define SMART_IMMEDIATE_OFFLINE                0xD4
+#define SMART_READ_LOG_SECTOR          0xD5
+#define SMART_WRITE_LOG_SECTOR         0xD6
+#define SMART_WRITE_THRESHOLDS         0xD7
+#define SMART_ENABLE                   0xD8
+#define SMART_DISABLE                  0xD9
+#define SMART_STATUS                   0xDA
+#define SMART_AUTO_OFFLINE             0xDB
+
+/* Password used in TF4 & TF5 executing SMART commands */
+
+#define SMART_LCYL_PASS                        0x4F
+#define SMART_HCYL_PASS                        0xC2
+               
+/* WIN_SETFEATURES sub-commands */
+
+#define SETFEATURES_EN_WCACHE  0x02    /* Enable write cache */
+#define SETFEATURES_XFER       0x03    /* Set transfer mode */
+#      define XFER_UDMA_7      0x47    /* 0100|0111 */
+#      define XFER_UDMA_6      0x46    /* 0100|0110 */
+#      define XFER_UDMA_5      0x45    /* 0100|0101 */
+#      define XFER_UDMA_4      0x44    /* 0100|0100 */
+#      define XFER_UDMA_3      0x43    /* 0100|0011 */
+#      define XFER_UDMA_2      0x42    /* 0100|0010 */
+#      define XFER_UDMA_1      0x41    /* 0100|0001 */
+#      define XFER_UDMA_0      0x40    /* 0100|0000 */
+#      define XFER_MW_DMA_2    0x22    /* 0010|0010 */
+#      define XFER_MW_DMA_1    0x21    /* 0010|0001 */
+#      define XFER_MW_DMA_0    0x20    /* 0010|0000 */
+#      define XFER_SW_DMA_2    0x12    /* 0001|0010 */
+#      define XFER_SW_DMA_1    0x11    /* 0001|0001 */
+#      define XFER_SW_DMA_0    0x10    /* 0001|0000 */
+#      define XFER_PIO_4       0x0C    /* 0000|1100 */
+#      define XFER_PIO_3       0x0B    /* 0000|1011 */
+#      define XFER_PIO_2       0x0A    /* 0000|1010 */
+#      define XFER_PIO_1       0x09    /* 0000|1001 */
+#      define XFER_PIO_0       0x08    /* 0000|1000 */
+#      define XFER_PIO_SLOW    0x00    /* 0000|0000 */
+#define SETFEATURES_DIS_DEFECT 0x04    /* Disable Defect Management */
+#define SETFEATURES_EN_APM     0x05    /* Enable advanced power management */
+#define SETFEATURES_DIS_MSN    0x31    /* Disable Media Status Notification */
+#define SETFEATURES_EN_AAM     0x42    /* Enable Automatic Acoustic Management */
+#define SETFEATURES_DIS_RLA    0x55    /* Disable read look-ahead feature */
+#define SETFEATURES_EN_RI      0x5D    /* Enable release interrupt */
+#define SETFEATURES_EN_SI      0x5E    /* Enable SERVICE interrupt */
+#define SETFEATURES_DIS_RPOD   0x66    /* Disable reverting to power on defaults */
+#define SETFEATURES_DIS_WCACHE 0x82    /* Disable write cache */
+#define SETFEATURES_EN_DEFECT  0x84    /* Enable Defect Management */
+#define SETFEATURES_DIS_APM    0x85    /* Disable advanced power management */
+#define SETFEATURES_EN_MSN     0x95    /* Enable Media Status Notification */
+#define SETFEATURES_EN_RLA     0xAA    /* Enable read look-ahead feature */
+#define SETFEATURES_PREFETCH   0xAB    /* Sets drive prefetch value */
+#define SETFEATURES_DIS_AAM    0xC2    /* Disable Automatic Acoustic Management */
+#define SETFEATURES_EN_RPOD    0xCC    /* Enable reverting to power on defaults */
+#define SETFEATURES_DIS_RI     0xDD    /* Disable release interrupt */
+#define SETFEATURES_DIS_SI     0xDE    /* Disable SERVICE interrupt */
+
+/* WIN_SECURITY sub-commands */
+
+#define SECURITY_SET_PASSWORD          0xBA
+#define SECURITY_UNLOCK                        0xBB
+#define SECURITY_ERASE_PREPARE         0xBC
+#define SECURITY_ERASE_UNIT            0xBD
+#define SECURITY_FREEZE_LOCK           0xBE
+#define SECURITY_DISABLE_PASSWORD      0xBF
+
+struct hd_geometry {
+      unsigned char heads;
+      unsigned char sectors;
+      unsigned short cylinders;
+      unsigned long start;
+};
+
+/* BIG GEOMETRY */
+struct hd_big_geometry {
+       unsigned char heads;
+       unsigned char sectors;
+       unsigned int cylinders;
+       unsigned long start;
+};
+
+/* hd/ide ctl's that pass (arg) ptrs to user space are numbered 0x030n/0x031n */
+#define HDIO_GETGEO            0x0301  /* get device geometry */
+#define HDIO_GET_UNMASKINTR    0x0302  /* get current unmask setting */
+#define HDIO_GET_MULTCOUNT     0x0304  /* get current IDE blockmode setting */
+#define HDIO_GET_QDMA          0x0305  /* get use-qdma flag */
+#define HDIO_OBSOLETE_IDENTITY 0x0307  /* OBSOLETE, DO NOT USE: returns 142 bytes */
+#define HDIO_GET_KEEPSETTINGS  0x0308  /* get keep-settings-on-reset flag */
+#define HDIO_GET_32BIT         0x0309  /* get current io_32bit setting */
+#define HDIO_GET_NOWERR                0x030a  /* get ignore-write-error flag */
+#define HDIO_GET_DMA           0x030b  /* get use-dma flag */
+#define HDIO_GET_NICE          0x030c  /* get nice flags */
+#define HDIO_GET_IDENTITY      0x030d  /* get IDE identification info */
+#define HDIO_GET_WCACHE                0x030e  /* get write cache mode on|off */
+#define HDIO_GET_ACOUSTIC      0x030f  /* get acoustic value */
+#define        HDIO_GET_ADDRESS        0x0310  /* */
+
+#define HDIO_GET_BUSSTATE      0x031a  /* get the bus state of the hwif */
+#define HDIO_TRISTATE_HWIF     0x031b  /* execute a channel tristate */
+#define HDIO_DRIVE_RESET       0x031c  /* execute a device reset */
+#define HDIO_DRIVE_TASKFILE    0x031d  /* execute raw taskfile */
+#define HDIO_DRIVE_TASK                0x031e  /* execute task and special drive command */
+#define HDIO_DRIVE_CMD         0x031f  /* execute a special drive command */
+
+#define HDIO_DRIVE_CMD_AEB     HDIO_DRIVE_TASK
+
+/* hd/ide ctl's that pass (arg) non-ptr values are numbered 0x032n/0x033n */
+#define HDIO_SET_MULTCOUNT     0x0321  /* change IDE blockmode */
+#define HDIO_SET_UNMASKINTR    0x0322  /* permit other irqs during I/O */
+#define HDIO_SET_KEEPSETTINGS  0x0323  /* keep ioctl settings on reset */
+#define HDIO_SET_32BIT         0x0324  /* change io_32bit flags */
+#define HDIO_SET_NOWERR                0x0325  /* change ignore-write-error flag */
+#define HDIO_SET_DMA           0x0326  /* change use-dma flag */
+#define HDIO_SET_PIO_MODE      0x0327  /* reconfig interface to new speed */
+#define HDIO_SCAN_HWIF         0x0328  /* register and (re)scan interface */
+#define HDIO_SET_NICE          0x0329  /* set nice flags */
+#define HDIO_UNREGISTER_HWIF   0x032a  /* unregister interface */
+#define HDIO_SET_WCACHE                0x032b  /* change write cache enable-disable */
+#define HDIO_SET_ACOUSTIC      0x032c  /* change acoustic behavior */
+#define HDIO_SET_BUSSTATE      0x032d  /* set the bus state of the hwif */
+#define HDIO_SET_QDMA          0x032e  /* change use-qdma flag */
+#define HDIO_SET_ADDRESS       0x032f  /* change lba addressing modes */
+
+/* bus states */
+enum {
+       BUSSTATE_OFF = 0,
+       BUSSTATE_ON,
+       BUSSTATE_TRISTATE
+};
+
+/* hd/ide ctl's that pass (arg) ptrs to user space are numbered 0x033n/0x033n */
+#define HDIO_GETGEO_BIG                0x0330  /* */
+#define HDIO_GETGEO_BIG_RAW    0x0331  /* */
+
+#define __NEW_HD_DRIVE_ID
+/* structure returned by HDIO_GET_IDENTITY,
+ * as per ANSI NCITS ATA6 rev.1b spec
+ */
+struct hd_driveid {
+       unsigned short  config;         /* lots of obsolete bit flags */
+       unsigned short  cyls;           /* Obsolete, "physical" cyls */
+       unsigned short  reserved2;      /* reserved (word 2) */
+       unsigned short  heads;          /* Obsolete, "physical" heads */
+       unsigned short  track_bytes;    /* unformatted bytes per track */
+       unsigned short  sector_bytes;   /* unformatted bytes per sector */
+       unsigned short  sectors;        /* Obsolete, "physical" sectors per track */
+       unsigned short  vendor0;        /* vendor unique */
+       unsigned short  vendor1;        /* vendor unique */
+       unsigned short  vendor2;        /* Retired vendor unique */
+       unsigned char   serial_no[20];  /* 0 = not_specified */
+       unsigned short  buf_type;       /* Retired */
+       unsigned short  buf_size;       /* Retired, 512 byte increments
+                                        * 0 = not_specified
+                                        */
+       unsigned short  ecc_bytes;      /* for r/w long cmds; 0 = not_specified */
+       unsigned char   fw_rev[8];      /* 0 = not_specified */
+       unsigned char   model[40];      /* 0 = not_specified */
+       unsigned char   max_multsect;   /* 0=not_implemented */
+       unsigned char   vendor3;        /* vendor unique */
+       unsigned short  dword_io;       /* 0=not_implemented; 1=implemented */
+       unsigned char   vendor4;        /* vendor unique */
+       unsigned char   capability;     /* (upper byte of word 49)
+                                        *  3:  IORDYsup
+                                        *  2:  IORDYsw
+                                        *  1:  LBA
+                                        *  0:  DMA
+                                        */
+       unsigned short  reserved50;     /* reserved (word 50) */
+       unsigned char   vendor5;        /* Obsolete, vendor unique */
+       unsigned char   tPIO;           /* Obsolete, 0=slow, 1=medium, 2=fast */
+       unsigned char   vendor6;        /* Obsolete, vendor unique */
+       unsigned char   tDMA;           /* Obsolete, 0=slow, 1=medium, 2=fast */
+       unsigned short  field_valid;    /* (word 53)
+                                        *  2:  ultra_ok        word  88
+                                        *  1:  eide_ok         words 64-70
+                                        *  0:  cur_ok          words 54-58
+                                        */
+       unsigned short  cur_cyls;       /* Obsolete, logical cylinders */
+       unsigned short  cur_heads;      /* Obsolete, l heads */
+       unsigned short  cur_sectors;    /* Obsolete, l sectors per track */
+       unsigned short  cur_capacity0;  /* Obsolete, l total sectors on drive */
+       unsigned short  cur_capacity1;  /* Obsolete, (2 words, misaligned int)     */
+       unsigned char   multsect;       /* current multiple sector count */
+       unsigned char   multsect_valid; /* when (bit0==1) multsect is ok */
+       unsigned int    lba_capacity;   /* Obsolete, total number of sectors */
+       unsigned short  dma_1word;      /* Obsolete, single-word dma info */
+       unsigned short  dma_mword;      /* multiple-word dma info */
+       unsigned short  eide_pio_modes; /* bits 0:mode3 1:mode4 */
+       unsigned short  eide_dma_min;   /* min mword dma cycle time (ns) */
+       unsigned short  eide_dma_time;  /* recommended mword dma cycle time (ns) */
+       unsigned short  eide_pio;       /* min cycle time (ns), no IORDY  */
+       unsigned short  eide_pio_iordy; /* min cycle time (ns), with IORDY */
+       unsigned short  words69_70[2];  /* reserved words 69-70
+                                        * future command overlap and queuing
+                                        */
+       /* HDIO_GET_IDENTITY currently returns only words 0 through 70 */
+       unsigned short  words71_74[4];  /* reserved words 71-74
+                                        * for IDENTIFY PACKET DEVICE command
+                                        */
+       unsigned short  queue_depth;    /* (word 75)
+                                        * 15:5 reserved
+                                        *  4:0 Maximum queue depth -1
+                                        */
+       unsigned short  words76_79[4];  /* reserved words 76-79 */
+       unsigned short  major_rev_num;  /* (word 80) */
+       unsigned short  minor_rev_num;  /* (word 81) */
+       unsigned short  command_set_1;  /* (word 82) supported
+                                        * 15:  Obsolete
+                                        * 14:  NOP command
+                                        * 13:  READ_BUFFER
+                                        * 12:  WRITE_BUFFER
+                                        * 11:  Obsolete
+                                        * 10:  Host Protected Area
+                                        *  9:  DEVICE Reset
+                                        *  8:  SERVICE Interrupt
+                                        *  7:  Release Interrupt
+                                        *  6:  look-ahead
+                                        *  5:  write cache
+                                        *  4:  PACKET Command
+                                        *  3:  Power Management Feature Set
+                                        *  2:  Removable Feature Set
+                                        *  1:  Security Feature Set
+                                        *  0:  SMART Feature Set
+                                        */
+       unsigned short  command_set_2;  /* (word 83)
+                                        * 15:  Shall be ZERO
+                                        * 14:  Shall be ONE
+                                        * 13:  FLUSH CACHE EXT
+                                        * 12:  FLUSH CACHE
+                                        * 11:  Device Configuration Overlay
+                                        * 10:  48-bit Address Feature Set
+                                        *  9:  Automatic Acoustic Management
+                                        *  8:  SET MAX security
+                                        *  7:  reserved 1407DT PARTIES
+                                        *  6:  SetF sub-command Power-Up
+                                        *  5:  Power-Up in Standby Feature Set
+                                        *  4:  Removable Media Notification
+                                        *  3:  APM Feature Set
+                                        *  2:  CFA Feature Set
+                                        *  1:  READ/WRITE DMA QUEUED
+                                        *  0:  Download MicroCode
+                                        */
+       unsigned short  cfsse;          /* (word 84)
+                                        * cmd set-feature supported extensions
+                                        * 15:  Shall be ZERO
+                                        * 14:  Shall be ONE
+                                        * 13:3 reserved
+                                        *  2:  Media Serial Number Valid
+                                        *  1:  SMART selt-test supported
+                                        *  0:  SMART error logging
+                                        */
+       unsigned short  cfs_enable_1;   /* (word 85)
+                                        * command set-feature enabled
+                                        * 15:  Obsolete
+                                        * 14:  NOP command
+                                        * 13:  READ_BUFFER
+                                        * 12:  WRITE_BUFFER
+                                        * 11:  Obsolete
+                                        * 10:  Host Protected Area
+                                        *  9:  DEVICE Reset
+                                        *  8:  SERVICE Interrupt
+                                        *  7:  Release Interrupt
+                                        *  6:  look-ahead
+                                        *  5:  write cache
+                                        *  4:  PACKET Command
+                                        *  3:  Power Management Feature Set
+                                        *  2:  Removable Feature Set
+                                        *  1:  Security Feature Set
+                                        *  0:  SMART Feature Set
+                                        */
+       unsigned short  cfs_enable_2;   /* (word 86)
+                                        * command set-feature enabled
+                                        * 15:  Shall be ZERO
+                                        * 14:  Shall be ONE
+                                        * 13:  FLUSH CACHE EXT
+                                        * 12:  FLUSH CACHE
+                                        * 11:  Device Configuration Overlay
+                                        * 10:  48-bit Address Feature Set
+                                        *  9:  Automatic Acoustic Management
+                                        *  8:  SET MAX security
+                                        *  7:  reserved 1407DT PARTIES
+                                        *  6:  SetF sub-command Power-Up
+                                        *  5:  Power-Up in Standby Feature Set
+                                        *  4:  Removable Media Notification
+                                        *  3:  APM Feature Set
+                                        *  2:  CFA Feature Set
+                                        *  1:  READ/WRITE DMA QUEUED
+                                        *  0:  Download MicroCode
+                                        */
+       unsigned short  csf_default;    /* (word 87)
+                                        * command set-feature default
+                                        * 15:  Shall be ZERO
+                                        * 14:  Shall be ONE
+                                        * 13:3 reserved
+                                        *  2:  Media Serial Number Valid
+                                        *  1:  SMART selt-test supported
+                                        *  0:  SMART error logging
+                                        */
+       unsigned short  dma_ultra;      /* (word 88) */
+       unsigned short  word89;         /* reserved (word 89) */
+       unsigned short  word90;         /* reserved (word 90) */
+       unsigned short  CurAPMvalues;   /* current APM values */
+       unsigned short  word92;         /* reserved (word 92) */
+       unsigned short  hw_config;      /* hardware config (word 93)
+                                        * 15:
+                                        * 14:
+                                        * 13:
+                                        * 12:
+                                        * 11:
+                                        * 10:
+                                        *  9:
+                                        *  8:
+                                        *  7:
+                                        *  6:
+                                        *  5:
+                                        *  4:
+                                        *  3:
+                                        *  2:
+                                        *  1:
+                                        *  0:
+                                        */
+       unsigned short  acoustic;       /* (word 94)
+                                        * 15:8 Vendor's recommended value
+                                        *  7:0 current value
+                                        */
+       unsigned short  words95_99[5];  /* reserved words 95-99 */
+#if 0
+       unsigned short  words100_103[4] ;/* reserved words 100-103 */
+#else
+       unsigned long long lba_capacity_2;/* 48-bit total number of sectors */
+#endif
+       unsigned short  words104_125[22];/* reserved words 104-125 */
+       unsigned short  last_lun;       /* (word 126) */
+       unsigned short  word127;        /* (word 127) Feature Set
+                                        * Removable Media Notification
+                                        * 15:2 reserved
+                                        *  1:0 00 = not supported
+                                        *      01 = supported
+                                        *      10 = reserved
+                                        *      11 = reserved
+                                        */
+       unsigned short  dlf;            /* (word 128)
+                                        * device lock function
+                                        * 15:9 reserved
+                                        *  8   security level 1:max 0:high
+                                        *  7:6 reserved
+                                        *  5   enhanced erase
+                                        *  4   expire
+                                        *  3   frozen
+                                        *  2   locked
+                                        *  1   en/disabled
+                                        *  0   capability
+                                        */
+       unsigned short  csfo;           /*  (word 129)
+                                        * current set features options
+                                        * 15:4 reserved
+                                        *  3:  auto reassign
+                                        *  2:  reverting
+                                        *  1:  read-look-ahead
+                                        *  0:  write cache
+                                        */
+       unsigned short  words130_155[26];/* reserved vendor words 130-155 */
+       unsigned short  word156;        /* reserved vendor word 156 */
+       unsigned short  words157_159[3];/* reserved vendor words 157-159 */
+       unsigned short  cfa_power;      /* (word 160) CFA Power Mode
+                                        * 15 word 160 supported
+                                        * 14 reserved
+                                        * 13
+                                        * 12
+                                        * 11:0
+                                        */
+       unsigned short  words161_175[14];/* Reserved for CFA */
+       unsigned short  words176_205[31];/* Current Media Serial Number */
+       unsigned short  words206_254[48];/* reserved words 206-254 */
+       unsigned short  integrity_word; /* (word 255)
+                                        * 15:8 Checksum
+                                        *  7:0 Signature
+                                        */
+};
+
+/*
+ * IDE "nice" flags. These are used on a per drive basis to determine
+ * when to be nice and give more bandwidth to the other devices which
+ * share the same IDE bus.
+ */
+#define IDE_NICE_DSC_OVERLAP   (0)     /* per the DSC overlap protocol */
+#define IDE_NICE_ATAPI_OVERLAP (1)     /* not supported yet */
+#define IDE_NICE_0             (2)     /* when sure that it won't affect us */
+#define IDE_NICE_1             (3)     /* when probably won't affect us much */
+#define IDE_NICE_2             (4)     /* when we know it's on our expense */
+
+#ifdef __KERNEL__
+/*
+ * These routines are used for kernel command line parameters from main.c:
+ */
+#include <linux/config.h>
+
+#if defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_IDE_MODULE)
+int ide_register(int io_port, int ctl_port, int irq);
+void ide_unregister(unsigned int);
+#endif /* CONFIG_BLK_DEV_IDE || CONFIG_BLK_DEV_IDE_MODULE */
+
+#endif  /* __KERNEL__ */
+
+#endif /* _LINUX_HDREG_H */
diff --git a/xen-2.4.16/include/xeno/hdsmart.h b/xen-2.4.16/include/xeno/hdsmart.h
new file mode 100644 (file)
index 0000000..7974a47
--- /dev/null
@@ -0,0 +1,124 @@
+/*
+ * linux/include/linux/hdsmart.h
+ *
+ * Copyright (C) 1999-2000     Michael Cornwell <cornwell@acm.org>
+ * Copyright (C) 2000          Andre Hedrick <andre@linux-ide.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * You should have received a copy of the GNU General Public License
+ * (for example /usr/src/linux/COPYING); if not, write to the Free
+ * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef _LINUX_HDSMART_H
+#define _LINUX_HDSMART_H
+
+#define OFFLINE_FULL_SCAN              0
+#define SHORT_SELF_TEST                        1
+#define EXTEND_SELF_TEST               2
+#define SHORT_CAPTIVE_SELF_TEST                129
+#define EXTEND_CAPTIVE_SELF_TEST       130
+
+/* smart_attribute is the vendor specific in SFF-8035 spec */
+typedef struct ata_smart_attribute_s {
+       unsigned char                   id;
+       unsigned short                  status_flag;
+       unsigned char                   normalized;
+       unsigned char                   worse_normal;
+       unsigned char                   raw[6];
+       unsigned char                   reserv;
+} __attribute__ ((packed)) ata_smart_attribute_t;
+
+/* smart_values is format of the read drive Atrribute command */
+typedef struct ata_smart_values_s {
+       unsigned short                  revnumber;
+       ata_smart_attribute_t           vendor_attributes [30];
+        unsigned char                  offline_data_collection_status;
+        unsigned char                  self_test_exec_status;
+       unsigned short                  total_time_to_complete_off_line;
+       unsigned char                   vendor_specific_366;
+       unsigned char                   offline_data_collection_capability;
+       unsigned short                  smart_capability;
+       unsigned char                   errorlog_capability;
+       unsigned char                   vendor_specific_371;
+       unsigned char                   short_test_completion_time;
+       unsigned char                   extend_test_completion_time;
+       unsigned char                   reserved_374_385 [12];
+       unsigned char                   vendor_specific_386_509 [125];
+       unsigned char                   chksum;
+} __attribute__ ((packed)) ata_smart_values_t;
+
+/* Smart Threshold data structures */
+/* Vendor attribute of SMART Threshold */
+typedef struct ata_smart_threshold_entry_s {
+       unsigned char                   id;
+       unsigned char                   normalized_threshold;
+       unsigned char                   reserved[10];
+} __attribute__ ((packed)) ata_smart_threshold_entry_t;
+
+/* Format of Read SMART THreshold Command */
+typedef struct ata_smart_thresholds_s {
+       unsigned short                  revnumber;
+       ata_smart_threshold_entry_t     thres_entries[30];
+       unsigned char                   reserved[149];
+       unsigned char                   chksum;
+} __attribute__ ((packed)) ata_smart_thresholds_t;
+
+typedef struct ata_smart_errorlog_command_struct_s {
+       unsigned char                   devicecontrolreg;
+       unsigned char                   featuresreg;
+       unsigned char                   sector_count;
+       unsigned char                   sector_number;
+       unsigned char                   cylinder_low;
+       unsigned char                   cylinder_high;
+       unsigned char                   drive_head;
+       unsigned char                   commandreg;
+       unsigned int                    timestamp;
+} __attribute__ ((packed)) ata_smart_errorlog_command_struct_t;
+
+typedef struct ata_smart_errorlog_error_struct_s {
+       unsigned char                   error_condition;
+       unsigned char                   extended_error[14];
+       unsigned char                   state;
+       unsigned short                  timestamp;
+} __attribute__ ((packed)) ata_smart_errorlog_error_struct_t;
+
+typedef struct ata_smart_errorlog_struct_s {
+       ata_smart_errorlog_command_struct_t     commands[6];
+       ata_smart_errorlog_error_struct_t       error_struct;
+} __attribute__ ((packed)) ata_smart_errorlog_struct_t;
+
+typedef struct ata_smart_errorlog_s {
+       unsigned char                   revnumber;
+       unsigned char                   error_log_pointer;
+       ata_smart_errorlog_struct_t     errorlog_struct[5];
+       unsigned short                  ata_error_count;
+       unsigned short                  non_fatal_count;
+       unsigned short                  drive_timeout_count;
+       unsigned char                   reserved[53];
+       unsigned char                   chksum;
+} __attribute__ ((packed)) ata_smart_errorlog_t;
+
+typedef struct ata_smart_selftestlog_struct_s {
+       unsigned char                   selftestnumber;
+       unsigned char                   selfteststatus;
+       unsigned short                  timestamp;
+       unsigned char                   selftestfailurecheckpoint;
+       unsigned int                    lbafirstfailure;
+       unsigned char                   vendorspecific[15];
+} __attribute__ ((packed)) ata_smart_selftestlog_struct_t;
+
+typedef struct ata_smart_selftestlog_s {
+       unsigned short                  revnumber;
+       ata_smart_selftestlog_struct_t  selftest_struct[21];
+       unsigned char                   vendorspecific[2];
+       unsigned char                   mostrecenttest;
+       unsigned char                   resevered[2];
+       unsigned char                   chksum;
+} __attribute__ ((packed)) ata_smart_selftestlog_t;
+
+#endif /* _LINUX_HDSMART_H */
diff --git a/xen-2.4.16/include/xeno/ide.h b/xen-2.4.16/include/xeno/ide.h
new file mode 100644 (file)
index 0000000..dacfd89
--- /dev/null
@@ -0,0 +1,1105 @@
+#ifndef _IDE_H
+#define _IDE_H
+/*
+ *  linux/include/linux/ide.h
+ *
+ *  Copyright (C) 1994-1998  Linus Torvalds & authors
+ */
+
+#include <xeno/config.h>
+#include <xeno/init.h>
+#include <xeno/ioport.h>
+#include <xeno/hdreg.h>
+#include <xeno/hdsmart.h>
+#include <xeno/major.h>
+#include <xeno/blkdev.h>
+/*#include <xeno/proc_fs.h>*/
+/*#include <xeno/devfs_fs_kernel.h>*/
+#include <asm/hdreg.h>
+
+/*
+ * This is the multiple IDE interface driver, as evolved from hd.c.
+ * It supports up to four IDE interfaces, on one or more IRQs (usually 14 & 15).
+ * There can be up to two drives per interface, as per the ATA-2 spec.
+ *
+ * Primary i/f:    ide0: major=3;  (hda)         minor=0; (hdb)         minor=64
+ * Secondary i/f:  ide1: major=22; (hdc or hd1a) minor=0; (hdd or hd1b) minor=64
+ * Tertiary i/f:   ide2: major=33; (hde)         minor=0; (hdf)         minor=64
+ * Quaternary i/f: ide3: major=34; (hdg)         minor=0; (hdh)         minor=64
+ */
+
+/******************************************************************************
+ * IDE driver configuration options (play with these as desired):
+ *
+ * REALLY_SLOW_IO can be defined in ide.c and ide-cd.c, if necessary
+ */
+#undef REALLY_FAST_IO                  /* define if ide ports are perfect */
+#define INITIAL_MULT_COUNT     0       /* off=0; on=2,4,8,16,32, etc.. */
+
+#ifndef SUPPORT_SLOW_DATA_PORTS                /* 1 to support slow data ports */
+#define SUPPORT_SLOW_DATA_PORTS        1       /* 0 to reduce kernel size */
+#endif
+#ifndef SUPPORT_VLB_SYNC               /* 1 to support weird 32-bit chips */
+#define SUPPORT_VLB_SYNC       1       /* 0 to reduce kernel size */
+#endif
+#ifndef DISK_RECOVERY_TIME             /* off=0; on=access_delay_time */
+#define DISK_RECOVERY_TIME     0       /*  for hardware that needs it */
+#endif
+#ifndef OK_TO_RESET_CONTROLLER         /* 1 needed for good error recovery */
+#define OK_TO_RESET_CONTROLLER 1       /* 0 for use with AH2372A/B interface */
+#endif
+#ifndef FANCY_STATUS_DUMPS             /* 1 for human-readable drive errors */
+#define FANCY_STATUS_DUMPS     1       /* 0 to reduce kernel size */
+#endif
+
+#ifdef CONFIG_BLK_DEV_CMD640
+#if 0  /* change to 1 when debugging cmd640 problems */
+void cmd640_dump_regs (void);
+#define CMD640_DUMP_REGS cmd640_dump_regs() /* for debugging cmd640 chipset */
+#endif
+#endif  /* CONFIG_BLK_DEV_CMD640 */
+
+#ifndef DISABLE_IRQ_NOSYNC
+#define DISABLE_IRQ_NOSYNC     0
+#endif
+
+/*
+ * IDE_DRIVE_CMD is used to implement many features of the hdparm utility
+ */
+#define IDE_DRIVE_CMD                  99      /* (magic) undef to reduce kernel size*/
+
+#define IDE_DRIVE_TASK                 98
+
+/*
+ * IDE_DRIVE_TASKFILE is used to implement many features needed for raw tasks
+ */
+#define IDE_DRIVE_TASKFILE             97
+
+/*
+ *  "No user-serviceable parts" beyond this point  :)
+ *****************************************************************************/
+
+typedef unsigned char  byte;   /* used everywhere */
+
+/*
+ * Probably not wise to fiddle with these
+ */
+#define ERROR_MAX      8       /* Max read/write errors per sector */
+#define ERROR_RESET    3       /* Reset controller every 4th retry */
+#define ERROR_RECAL    1       /* Recalibrate every 2nd retry */
+
+/*
+ * state flags
+ */
+#define DMA_PIO_RETRY  1       /* retrying in PIO */
+
+/*
+ * Ensure that various configuration flags have compatible settings
+ */
+#ifdef REALLY_SLOW_IO
+#undef REALLY_FAST_IO
+#endif
+
+#define HWIF(drive)            ((ide_hwif_t *)((drive)->hwif))
+#define HWGROUP(drive)         ((ide_hwgroup_t *)(HWIF(drive)->hwgroup))
+
+/*
+ * Definitions for accessing IDE controller registers
+ */
+#define IDE_NR_PORTS           (10)
+
+#define IDE_DATA_OFFSET                (0)
+#define IDE_ERROR_OFFSET       (1)
+#define IDE_NSECTOR_OFFSET     (2)
+#define IDE_SECTOR_OFFSET      (3)
+#define IDE_LCYL_OFFSET                (4)
+#define IDE_HCYL_OFFSET                (5)
+#define IDE_SELECT_OFFSET      (6)
+#define IDE_STATUS_OFFSET      (7)
+#define IDE_CONTROL_OFFSET     (8)
+#define IDE_IRQ_OFFSET         (9)
+
+#define IDE_FEATURE_OFFSET     IDE_ERROR_OFFSET
+#define IDE_COMMAND_OFFSET     IDE_STATUS_OFFSET
+
+#define IDE_DATA_OFFSET_HOB    (0)
+#define IDE_ERROR_OFFSET_HOB   (1)
+#define IDE_NSECTOR_OFFSET_HOB (2)
+#define IDE_SECTOR_OFFSET_HOB  (3)
+#define IDE_LCYL_OFFSET_HOB    (4)
+#define IDE_HCYL_OFFSET_HOB    (5)
+#define IDE_SELECT_OFFSET_HOB  (6)
+#define IDE_CONTROL_OFFSET_HOB (7)
+
+#define IDE_FEATURE_OFFSET_HOB IDE_ERROR_OFFSET_HOB
+
+#define IDE_DATA_REG           (HWIF(drive)->io_ports[IDE_DATA_OFFSET])
+#define IDE_ERROR_REG          (HWIF(drive)->io_ports[IDE_ERROR_OFFSET])
+#define IDE_NSECTOR_REG                (HWIF(drive)->io_ports[IDE_NSECTOR_OFFSET])
+#define IDE_SECTOR_REG         (HWIF(drive)->io_ports[IDE_SECTOR_OFFSET])
+#define IDE_LCYL_REG           (HWIF(drive)->io_ports[IDE_LCYL_OFFSET])
+#define IDE_HCYL_REG           (HWIF(drive)->io_ports[IDE_HCYL_OFFSET])
+#define IDE_SELECT_REG         (HWIF(drive)->io_ports[IDE_SELECT_OFFSET])
+#define IDE_STATUS_REG         (HWIF(drive)->io_ports[IDE_STATUS_OFFSET])
+#define IDE_CONTROL_REG                (HWIF(drive)->io_ports[IDE_CONTROL_OFFSET])
+#define IDE_IRQ_REG            (HWIF(drive)->io_ports[IDE_IRQ_OFFSET])
+
+#define IDE_DATA_REG_HOB       (HWIF(drive)->io_ports[IDE_DATA_OFFSET])
+#define IDE_ERROR_REG_HOB      (HWIF(drive)->io_ports[IDE_ERROR_OFFSET])
+#define IDE_NSECTOR_REG_HOB    (HWIF(drive)->io_ports[IDE_NSECTOR_OFFSET])
+#define IDE_SECTOR_REG_HOB     (HWIF(drive)->io_ports[IDE_SECTOR_OFFSET])
+#define IDE_LCYL_REG_HOB       (HWIF(drive)->io_ports[IDE_LCYL_OFFSET])
+#define IDE_HCYL_REG_HOB       (HWIF(drive)->io_ports[IDE_HCYL_OFFSET])
+#define IDE_SELECT_REG_HOB     (HWIF(drive)->io_ports[IDE_SELECT_OFFSET])
+#define IDE_STATUS_REG_HOB     (HWIF(drive)->io_ports[IDE_STATUS_OFFSET])
+#define IDE_CONTROL_REG_HOB    (HWIF(drive)->io_ports[IDE_CONTROL_OFFSET])
+
+#define IDE_FEATURE_REG                IDE_ERROR_REG
+#define IDE_COMMAND_REG                IDE_STATUS_REG
+#define IDE_ALTSTATUS_REG      IDE_CONTROL_REG
+#define IDE_IREASON_REG                IDE_NSECTOR_REG
+#define IDE_BCOUNTL_REG                IDE_LCYL_REG
+#define IDE_BCOUNTH_REG                IDE_HCYL_REG
+
+#define GET_ERR()              IN_BYTE(IDE_ERROR_REG)
+#define GET_STAT()             IN_BYTE(IDE_STATUS_REG)
+#define GET_ALTSTAT()          IN_BYTE(IDE_CONTROL_REG)
+#define OK_STAT(stat,good,bad) (((stat)&((good)|(bad)))==(good))
+#define BAD_R_STAT             (BUSY_STAT   | ERR_STAT)
+#define BAD_W_STAT             (BAD_R_STAT  | WRERR_STAT)
+#define BAD_STAT               (BAD_R_STAT  | DRQ_STAT)
+#define DRIVE_READY            (READY_STAT  | SEEK_STAT)
+#define DATA_READY             (DRQ_STAT)
+
+/*
+ * Some more useful definitions
+ */
+#define IDE_MAJOR_NAME "hd"    /* the same for all i/f; see also genhd.c */
+#define MAJOR_NAME     IDE_MAJOR_NAME
+#define PARTN_BITS     6       /* number of minor dev bits for partitions */
+#define PARTN_MASK     ((1<<PARTN_BITS)-1)     /* a useful bit mask */
+#define MAX_DRIVES     2       /* per interface; 2 assumed by lots of code */
+#define CASCADE_DRIVES 8       /* per interface; 8|2 assumed by lots of code */
+#define SECTOR_SIZE    512
+#define SECTOR_WORDS   (SECTOR_SIZE / 4)       /* number of 32bit words per sector */
+#define IDE_LARGE_SEEK(b1,b2,t)        (((b1) > (b2) + (t)) || ((b2) > (b1) + (t)))
+#define IDE_MIN(a,b)   ((a)<(b) ? (a):(b))
+#define IDE_MAX(a,b)   ((a)>(b) ? (a):(b))
+
+#ifndef SPLIT_WORD
+#  define SPLIT_WORD(W,HB,LB) ((HB)=(W>>8), (LB)=(W-((W>>8)<<8)))
+#endif
+#ifndef MAKE_WORD
+#  define MAKE_WORD(W,HB,LB) ((W)=((HB<<8)+LB))
+#endif
+
+
+/*
+ * Timeouts for various operations:
+ */
+#define WAIT_DRQ       (5*HZ/100)      /* 50msec - spec allows up to 20ms */
+#if defined(CONFIG_APM) || defined(CONFIG_APM_MODULE)
+#define WAIT_READY     (5*HZ)          /* 5sec - some laptops are very slow */
+#else
+#define WAIT_READY     (3*HZ/100)      /* 30msec - should be instantaneous */
+#endif /* CONFIG_APM || CONFIG_APM_MODULE */
+#define WAIT_PIDENTIFY (10*HZ) /* 10sec  - should be less than 3ms (?), if all ATAPI CD is closed at boot */
+#define WAIT_WORSTCASE (30*HZ) /* 30sec  - worst case when spinning up */
+#define WAIT_CMD       (10*HZ) /* 10sec  - maximum wait for an IRQ to happen */
+#define WAIT_MIN_SLEEP (2*HZ/100)      /* 20msec - minimum sleep time */
+
+#define SELECT_DRIVE(hwif,drive)                               \
+{                                                              \
+       if (hwif->selectproc)                                   \
+               hwif->selectproc(drive);                        \
+       OUT_BYTE((drive)->select.all, hwif->io_ports[IDE_SELECT_OFFSET]); \
+}
+
+#define SELECT_INTERRUPT(hwif,drive)                           \
+{                                                              \
+       if (hwif->intrproc)                                     \
+               hwif->intrproc(drive);                          \
+       else                                                    \
+               OUT_BYTE((drive)->ctl|2, hwif->io_ports[IDE_CONTROL_OFFSET]);   \
+}
+
+#define SELECT_MASK(hwif,drive,mask)                           \
+{                                                              \
+       if (hwif->maskproc)                                     \
+               hwif->maskproc(drive,mask);                     \
+}
+
+#define SELECT_READ_WRITE(hwif,drive,func)                     \
+{                                                              \
+       if (hwif->rwproc)                                       \
+               hwif->rwproc(drive,func);                       \
+}
+
+#define QUIRK_LIST(hwif,drive)                                 \
+{                                                              \
+       if (hwif->quirkproc)                                    \
+               (drive)->quirk_list = hwif->quirkproc(drive);   \
+}
+
+#define HOST(hwif,chipset)                                     \
+{                                                              \
+       return ((hwif)->chipset == chipset) ? 1 : 0;            \
+}
+
+#define IDE_DEBUG(lineno) \
+       printk("%s,%s,line=%d\n", __FILE__, __FUNCTION__, (lineno))
+
+/*
+ * Check for an interrupt and acknowledge the interrupt status
+ */
+struct hwif_s;
+typedef int (ide_ack_intr_t)(struct hwif_s *);
+
+#ifndef NO_DMA
+#define NO_DMA  255
+#endif
+
+/*
+ * hwif_chipset_t is used to keep track of the specific hardware
+ * chipset used by each IDE interface, if known.
+ */
+typedef enum { ide_unknown,    ide_generic,    ide_pci,
+               ide_cmd640,     ide_dtc2278,    ide_ali14xx,
+               ide_qd65xx,     ide_umc8672,    ide_ht6560b,
+               ide_pdc4030,    ide_rz1000,     ide_trm290,
+               ide_cmd646,     ide_cy82c693,   ide_4drives,
+               ide_pmac,       ide_etrax100
+} hwif_chipset_t;
+
+/*
+ * Structure to hold all information about the location of this port
+ */
+typedef struct hw_regs_s {
+       ide_ioreg_t     io_ports[IDE_NR_PORTS]; /* task file registers */
+       int             irq;                    /* our irq number */
+       int             dma;                    /* our dma entry */
+       ide_ack_intr_t  *ack_intr;              /* acknowledge interrupt */
+       void            *priv;                  /* interface specific data */
+       hwif_chipset_t  chipset;
+} hw_regs_t;
+
+/*
+ * Register new hardware with ide
+ */
+int ide_register_hw(hw_regs_t *hw, struct hwif_s **hwifp);
+
+/*
+ * Set up hw_regs_t structure before calling ide_register_hw (optional)
+ */
+void ide_setup_ports(  hw_regs_t *hw,
+                       ide_ioreg_t base,
+                       int *offsets,
+                       ide_ioreg_t ctrl,
+                       ide_ioreg_t intr,
+                       ide_ack_intr_t *ack_intr,
+                       int irq);
+
+#include <asm/ide.h>
+
+/*
+ * If the arch-dependant ide.h did not declare/define any OUT_BYTE
+ * or IN_BYTE functions, we make some defaults here.
+ */
+
+#ifndef HAVE_ARCH_OUT_BYTE
+#ifdef REALLY_FAST_IO
+#define OUT_BYTE(b,p)          outb((b),(p))
+#define OUT_WORD(w,p)          outw((w),(p))
+#else
+#define OUT_BYTE(b,p)          outb_p((b),(p))
+#define OUT_WORD(w,p)          outw_p((w),(p))
+#endif
+#endif
+
+#ifndef HAVE_ARCH_IN_BYTE
+#ifdef REALLY_FAST_IO
+#define IN_BYTE(p)             (byte)inb(p)
+#define IN_WORD(p)             (short)inw(p)
+#else
+#define IN_BYTE(p)             (byte)inb_p(p)
+#define IN_WORD(p)             (short)inw_p(p)
+#endif
+#endif
+
+/*
+ * Now for the data we need to maintain per-drive:  ide_drive_t
+ */
+
+#define ide_scsi       0x21
+#define ide_disk       0x20
+#define ide_optical    0x7
+#define ide_cdrom      0x5
+#define ide_tape       0x1
+#define ide_floppy     0x0
+
+typedef union {
+       unsigned all                    : 8;    /* all of the bits together */
+       struct {
+               unsigned set_geometry   : 1;    /* respecify drive geometry */
+               unsigned recalibrate    : 1;    /* seek to cyl 0      */
+               unsigned set_multmode   : 1;    /* set multmode count */
+               unsigned set_tune       : 1;    /* tune interface for drive */
+               unsigned reserved       : 4;    /* unused */
+       } b;
+} special_t;
+
+typedef struct ide_drive_s {
+       request_queue_t          queue; /* request queue */
+       struct ide_drive_s      *next;  /* circular list of hwgroup drives */
+       unsigned long sleep;            /* sleep until this time */
+       unsigned long service_start;    /* time we started last request */
+       unsigned long service_time;     /* service time of last request */
+       unsigned long timeout;          /* max time to wait for irq */
+       special_t       special;        /* special action flags */
+       byte     keep_settings;         /* restore settings after drive reset */
+       byte     using_dma;             /* disk is using dma for read/write */
+       byte     retry_pio;             /* retrying dma capable host in pio */
+       byte     state;                 /* retry state */
+       byte     waiting_for_dma;       /* dma currently in progress */
+       byte     unmask;                /* flag: okay to unmask other irqs */
+       byte     slow;                  /* flag: slow data port */
+       byte     bswap;                 /* flag: byte swap data */
+       byte     dsc_overlap;           /* flag: DSC overlap */
+       byte     nice1;                 /* flag: give potential excess bandwidth */
+       unsigned present        : 1;    /* drive is physically present */
+       unsigned noprobe        : 1;    /* from:  hdx=noprobe */
+       unsigned busy           : 1;    /* currently doing revalidate_disk() */
+       unsigned removable      : 1;    /* 1 if need to do check_media_change */
+       unsigned forced_geom    : 1;    /* 1 if hdx=c,h,s was given at boot */
+       unsigned no_unmask      : 1;    /* disallow setting unmask bit */
+       unsigned no_io_32bit    : 1;    /* disallow enabling 32bit I/O */
+       unsigned nobios         : 1;    /* flag: do not probe bios for drive */
+       unsigned revalidate     : 1;    /* request revalidation */
+       unsigned atapi_overlap  : 1;    /* flag: ATAPI overlap (not supported) */
+       unsigned nice0          : 1;    /* flag: give obvious excess bandwidth */
+       unsigned nice2          : 1;    /* flag: give a share in our own bandwidth */
+       unsigned doorlocking    : 1;    /* flag: for removable only: door lock/unlock works */
+       unsigned autotune       : 2;    /* 1=autotune, 2=noautotune, 0=default */
+       unsigned remap_0_to_1   : 2;    /* 0=remap if ezdrive, 1=remap, 2=noremap */
+       unsigned ata_flash      : 1;    /* 1=present, 0=default */
+       unsigned        addressing;     /* : 2; 0=28-bit, 1=48-bit, 2=64-bit */
+       byte            scsi;           /* 0=default, 1=skip current ide-subdriver for ide-scsi emulation */
+       byte            media;          /* disk, cdrom, tape, floppy, ... */
+       select_t        select;         /* basic drive/head select reg value */
+       byte            ctl;            /* "normal" value for IDE_CONTROL_REG */
+       byte            ready_stat;     /* min status value for drive ready */
+       byte            mult_count;     /* current multiple sector setting */
+       byte            mult_req;       /* requested multiple sector setting */
+       byte            tune_req;       /* requested drive tuning setting */
+       byte            io_32bit;       /* 0=16-bit, 1=32-bit, 2/3=32bit+sync */
+       byte            bad_wstat;      /* used for ignoring WRERR_STAT */
+       byte            nowerr;         /* used for ignoring WRERR_STAT */
+       byte            sect0;          /* offset of first sector for DM6:DDO */
+       unsigned int    usage;          /* current "open()" count for drive */
+       byte            head;           /* "real" number of heads */
+       byte            sect;           /* "real" sectors per track */
+       byte            bios_head;      /* BIOS/fdisk/LILO number of heads */
+       byte            bios_sect;      /* BIOS/fdisk/LILO sectors per track */
+       unsigned int    bios_cyl;       /* BIOS/fdisk/LILO number of cyls */
+       unsigned int    cyl;            /* "real" number of cyls */
+       unsigned long   capacity;       /* total number of sectors */
+       unsigned long long capacity48;  /* total number of sectors */
+       unsigned int    drive_data;     /* for use by tuneproc/selectproc as needed */
+       void              *hwif;        /* actually (ide_hwif_t *) */
+    /*wait_queue_head_t wqueue;*/      /* used to wait for drive in open() */
+       struct hd_driveid *id;          /* drive model identification info */
+       struct hd_struct  *part;        /* drive partition table */
+       char            name[4];        /* drive name, such as "hda" */
+       void            *driver;        /* (ide_driver_t *) */
+       void            *driver_data;   /* extra driver data */
+    /*devfs_handle_t   de;     */      /* directory for device */
+       struct proc_dir_entry *proc;    /* /proc/ide/ directory entry */
+       void            *settings;      /* /proc/ide/ drive settings */
+       char            driver_req[10]; /* requests specific driver */
+       int             last_lun;       /* last logical unit */
+       int             forced_lun;     /* if hdxlun was given at boot */
+       int             lun;            /* logical unit */
+       int             crc_count;      /* crc counter to reduce drive speed */
+       byte            quirk_list;     /* drive is considered quirky if set for a specific host */
+       byte            suspend_reset;  /* drive suspend mode flag, soft-reset recovers */
+       byte            init_speed;     /* transfer rate set at boot */
+       byte            current_speed;  /* current transfer rate set */
+       byte            dn;             /* now wide spread use */
+       byte            wcache;         /* status of write cache */
+       byte            acoustic;       /* acoustic management */
+       unsigned int    failures;       /* current failure count */
+       unsigned int    max_failures;   /* maximum allowed failure count */
+} ide_drive_t;
+
+/*
+ * An ide_dmaproc_t() initiates/aborts DMA read/write operations on a drive.
+ *
+ * The caller is assumed to have selected the drive and programmed the drive's
+ * sector address using CHS or LBA.  All that remains is to prepare for DMA
+ * and then issue the actual read/write DMA/PIO command to the drive.
+ *
+ * Returns 0 if all went well.
+ * Returns 1 if DMA read/write could not be started, in which case the caller
+ * should either try again later, or revert to PIO for the current request.
+ */
+typedef enum { ide_dma_read,   ide_dma_write,          ide_dma_begin,
+               ide_dma_end,    ide_dma_check,          ide_dma_on,
+               ide_dma_off,    ide_dma_off_quietly,    ide_dma_test_irq,
+               ide_dma_bad_drive,                      ide_dma_good_drive,
+               ide_dma_verbose,                        ide_dma_retune,
+               ide_dma_lostirq,                        ide_dma_timeout
+} ide_dma_action_t;
+
+typedef int (ide_dmaproc_t)(ide_dma_action_t, ide_drive_t *);
+
+/*
+ * An ide_ideproc_t() performs CPU-polled transfers to/from a drive.
+ * Arguments are: the drive, the buffer pointer, and the length (in bytes or
+ * words depending on if it's an IDE or ATAPI call).
+ *
+ * If it is not defined for a controller, standard-code is used from ide.c.
+ *
+ * Controllers which are not memory-mapped in the standard way need to 
+ * override that mechanism using this function to work.
+ *
+ */
+typedef enum { ideproc_ide_input_data,    ideproc_ide_output_data,
+              ideproc_atapi_input_bytes, ideproc_atapi_output_bytes
+} ide_ide_action_t;
+
+typedef void (ide_ideproc_t)(ide_ide_action_t, ide_drive_t *, void *, unsigned int);
+
+/*
+ * An ide_tuneproc_t() is used to set the speed of an IDE interface
+ * to a particular PIO mode.  The "byte" parameter is used
+ * to select the PIO mode by number (0,1,2,3,4,5), and a value of 255
+ * indicates that the interface driver should "auto-tune" the PIO mode
+ * according to the drive capabilities in drive->id;
+ *
+ * Not all interface types support tuning, and not all of those
+ * support all possible PIO settings.  They may silently ignore
+ * or round values as they see fit.
+ */
+typedef void (ide_tuneproc_t) (ide_drive_t *, byte);
+typedef int (ide_speedproc_t) (ide_drive_t *, byte);
+
+/*
+ * This is used to provide support for strange interfaces
+ */
+typedef void (ide_selectproc_t) (ide_drive_t *);
+typedef void (ide_resetproc_t) (ide_drive_t *);
+typedef int (ide_quirkproc_t) (ide_drive_t *);
+typedef void (ide_intrproc_t) (ide_drive_t *);
+typedef void (ide_maskproc_t) (ide_drive_t *, int);
+typedef void (ide_rw_proc_t) (ide_drive_t *, ide_dma_action_t);
+
+/*
+ * ide soft-power support
+ */
+typedef int (ide_busproc_t) (ide_drive_t *, int);
+
+#define IDE_CHIPSET_PCI_MASK   \
+    ((1<<ide_pci)|(1<<ide_cmd646)|(1<<ide_ali14xx))
+#define IDE_CHIPSET_IS_PCI(c)  ((IDE_CHIPSET_PCI_MASK >> (c)) & 1)
+
+#ifdef CONFIG_BLK_DEV_IDEPCI
+typedef struct ide_pci_devid_s {
+       unsigned short  vid;
+       unsigned short  did;
+} ide_pci_devid_t;
+
+#define IDE_PCI_DEVID_NULL     ((ide_pci_devid_t){0,0})
+#define IDE_PCI_DEVID_EQ(a,b)  (a.vid == b.vid && a.did == b.did)
+#endif /* CONFIG_BLK_DEV_IDEPCI */
+
+typedef struct hwif_s {
+       struct hwif_s   *next;          /* for linked-list in ide_hwgroup_t */
+       void            *hwgroup;       /* actually (ide_hwgroup_t *) */
+       ide_ioreg_t     io_ports[IDE_NR_PORTS]; /* task file registers */
+       hw_regs_t       hw;             /* Hardware info */
+       ide_drive_t     drives[MAX_DRIVES];     /* drive info */
+       struct gendisk  *gd;            /* gendisk structure */
+       ide_tuneproc_t  *tuneproc;      /* routine to tune PIO mode for drives */
+       ide_speedproc_t *speedproc;     /* routine to retune DMA modes for drives */
+       ide_selectproc_t *selectproc;   /* tweaks hardware to select drive */
+       ide_resetproc_t *resetproc;     /* routine to reset controller after a disk reset */
+       ide_intrproc_t  *intrproc;      /* special interrupt handling for shared pci interrupts */
+       ide_maskproc_t  *maskproc;      /* special host masking for drive selection */
+       ide_quirkproc_t *quirkproc;     /* check host's drive quirk list */
+       ide_rw_proc_t   *rwproc;        /* adjust timing based upon rq->cmd direction */
+       ide_ideproc_t   *ideproc;       /* CPU-polled transfer routine */
+       ide_dmaproc_t   *dmaproc;       /* dma read/write/abort routine */
+       unsigned int    *dmatable_cpu;  /* dma physical region descriptor table (cpu view) */
+       dma_addr_t      dmatable_dma;   /* dma physical region descriptor table (dma view) */
+       struct scatterlist *sg_table;   /* Scatter-gather list used to build the above */
+       int sg_nents;                   /* Current number of entries in it */
+       int sg_dma_direction;           /* dma transfer direction */
+       int sg_dma_active;              /* is it in use */
+       struct hwif_s   *mate;          /* other hwif from same PCI chip */
+       unsigned long   dma_base;       /* base addr for dma ports */
+       unsigned        dma_extra;      /* extra addr for dma ports */
+       unsigned long   config_data;    /* for use by chipset-specific code */
+       unsigned long   select_data;    /* for use by chipset-specific code */
+       struct proc_dir_entry *proc;    /* /proc/ide/ directory entry */
+       int             irq;            /* our irq number */
+       byte            major;          /* our major number */
+       char            name[6];        /* name of interface, eg. "ide0" */
+       byte            index;          /* 0 for ide0; 1 for ide1; ... */
+       hwif_chipset_t  chipset;        /* sub-module for tuning.. */
+       unsigned        noprobe    : 1; /* don't probe for this interface */
+       unsigned        present    : 1; /* this interface exists */
+       unsigned        serialized : 1; /* serialized operation with mate hwif */
+       unsigned        sharing_irq: 1; /* 1 = sharing irq with another hwif */
+       unsigned        reset      : 1; /* reset after probe */
+       unsigned        autodma    : 1; /* automatically try to enable DMA at boot */
+       unsigned        udma_four  : 1; /* 1=ATA-66 capable, 0=default */
+       byte            channel;        /* for dual-port chips: 0=primary, 1=secondary */
+#ifdef CONFIG_BLK_DEV_IDEPCI
+       struct pci_dev  *pci_dev;       /* for pci chipsets */
+       ide_pci_devid_t pci_devid;      /* for pci chipsets: {VID,DID} */
+#endif /* CONFIG_BLK_DEV_IDEPCI */
+#if (DISK_RECOVERY_TIME > 0)
+       unsigned long   last_time;      /* time when previous rq was done */
+#endif
+       byte            straight8;      /* Alan's straight 8 check */
+       void            *hwif_data;     /* extra hwif data */
+       ide_busproc_t   *busproc;       /* driver soft-power interface */
+       byte            bus_state;      /* power state of the IDE bus */
+} ide_hwif_t;
+
+/*
+ * Status returned from various ide_ functions
+ */
+typedef enum {
+       ide_stopped,    /* no drive operation was started */
+       ide_started     /* a drive operation was started, and a handler was set */
+} ide_startstop_t;
+
+/*
+ *  internal ide interrupt handler type
+ */
+typedef ide_startstop_t (ide_pre_handler_t)(ide_drive_t *, struct request *);
+typedef ide_startstop_t (ide_handler_t)(ide_drive_t *);
+typedef ide_startstop_t (ide_post_handler_t)(ide_drive_t *);
+
+/*
+ * when ide_timer_expiry fires, invoke a handler of this type
+ * to decide what to do.
+ */
+typedef int (ide_expiry_t)(ide_drive_t *);
+
+typedef struct hwgroup_s {
+       ide_handler_t           *handler;/* irq handler, if active */
+       volatile int            busy;   /* BOOL: protects all fields below */
+       int                     sleeping; /* BOOL: wake us up on timer expiry */
+       ide_drive_t             *drive; /* current drive */
+       ide_hwif_t              *hwif;  /* ptr to current hwif in linked-list */
+       struct request          *rq;    /* current request */
+       struct timer_list       timer;  /* failsafe timer */
+       struct request          wrq;    /* local copy of current write rq */
+       unsigned long           poll_timeout;   /* timeout value during long polls */
+       ide_expiry_t            *expiry;        /* queried upon timeouts */
+} ide_hwgroup_t;
+
+/* structure attached to the request for IDE_TASK_CMDS */
+
+/*
+ * configurable drive settings
+ */
+
+#define TYPE_INT       0
+#define TYPE_INTA      1
+#define TYPE_BYTE      2
+#define TYPE_SHORT     3
+
+#define SETTING_READ   (1 << 0)
+#define SETTING_WRITE  (1 << 1)
+#define SETTING_RW     (SETTING_READ | SETTING_WRITE)
+
+typedef int (ide_procset_t)(ide_drive_t *, int);
+typedef struct ide_settings_s {
+       char                    *name;
+       int                     rw;
+       int                     read_ioctl;
+       int                     write_ioctl;
+       int                     data_type;
+       int                     min;
+       int                     max;
+       int                     mul_factor;
+       int                     div_factor;
+       void                    *data;
+       ide_procset_t           *set;
+       int                     auto_remove;
+       struct ide_settings_s   *next;
+} ide_settings_t;
+
+void ide_add_setting(ide_drive_t *drive, const char *name, int rw, int read_ioctl, int write_ioctl, int data_type, int min, int max, int mul_factor, int div_factor, void *data, ide_procset_t *set);
+void ide_remove_setting(ide_drive_t *drive, char *name);
+ide_settings_t *ide_find_setting_by_name(ide_drive_t *drive, char *name);
+int ide_read_setting(ide_drive_t *t, ide_settings_t *setting);
+int ide_write_setting(ide_drive_t *drive, ide_settings_t *setting, int val);
+void ide_add_generic_settings(ide_drive_t *drive);
+
+#if 0
+/*
+ * /proc/ide interface
+ */
+typedef struct {
+       const char      *name;
+       mode_t          mode;
+       read_proc_t     *read_proc;
+       write_proc_t    *write_proc;
+} ide_proc_entry_t;
+#endif
+
+#ifdef CONFIG_PROC_FS
+void proc_ide_create(void);
+void proc_ide_destroy(void);
+void recreate_proc_ide_device(ide_hwif_t *, ide_drive_t *);
+void destroy_proc_ide_device(ide_hwif_t *, ide_drive_t *);
+void destroy_proc_ide_drives(ide_hwif_t *);
+void create_proc_ide_interfaces(void);
+void ide_add_proc_entries(struct proc_dir_entry *dir, ide_proc_entry_t *p, void *data);
+void ide_remove_proc_entries(struct proc_dir_entry *dir, ide_proc_entry_t *p);
+read_proc_t proc_ide_read_capacity;
+read_proc_t proc_ide_read_geometry;
+
+/*
+ * Standard exit stuff:
+ */
+#define PROC_IDE_READ_RETURN(page,start,off,count,eof,len) \
+{                                      \
+       len -= off;                     \
+       if (len < count) {              \
+               *eof = 1;               \
+               if (len <= 0)           \
+                       return 0;       \
+       } else                          \
+               len = count;            \
+       *start = page + off;            \
+       return len;                     \
+}
+#else
+#define PROC_IDE_READ_RETURN(page,start,off,count,eof,len) return 0;
+#endif
+
+/*
+ * Subdrivers support.
+ */
+#define IDE_SUBDRIVER_VERSION  1
+
+typedef int            (ide_cleanup_proc)(ide_drive_t *);
+typedef int            (ide_standby_proc)(ide_drive_t *);
+typedef int            (ide_flushcache_proc)(ide_drive_t *);
+typedef ide_startstop_t        (ide_do_request_proc)(ide_drive_t *, struct request *, unsigned long);
+typedef void           (ide_end_request_proc)(byte, ide_hwgroup_t *);
+typedef int            (ide_ioctl_proc)(ide_drive_t *, struct inode *, struct file *, unsigned int, unsigned long);
+typedef int            (ide_open_proc)(struct inode *, struct file *, ide_drive_t *);
+typedef void           (ide_release_proc)(struct inode *, struct file *, ide_drive_t *);
+typedef int            (ide_check_media_change_proc)(ide_drive_t *);
+typedef void           (ide_revalidate_proc)(ide_drive_t *);
+typedef void           (ide_pre_reset_proc)(ide_drive_t *);
+typedef unsigned long  (ide_capacity_proc)(ide_drive_t *);
+typedef ide_startstop_t        (ide_special_proc)(ide_drive_t *);
+typedef void           (ide_setting_proc)(ide_drive_t *);
+typedef int            (ide_reinit_proc)(ide_drive_t *);
+typedef void           (ata_prebuilder_proc)(ide_drive_t *);
+typedef void           (atapi_prebuilder_proc)(ide_drive_t *);
+
+typedef struct ide_driver_s {
+       const char                      *name;
+       const char                      *version;
+       byte                            media;
+       unsigned busy                   : 1;
+       unsigned supports_dma           : 1;
+       unsigned supports_dsc_overlap   : 1;
+       ide_cleanup_proc                *cleanup;
+       ide_standby_proc                *standby;
+       ide_flushcache_proc             *flushcache;
+       ide_do_request_proc             *do_request;
+       ide_end_request_proc            *end_request;
+       ide_ioctl_proc                  *ioctl;
+       ide_open_proc                   *open;
+       ide_release_proc                *release;
+       ide_check_media_change_proc     *media_change;
+       ide_revalidate_proc             *revalidate;
+       ide_pre_reset_proc              *pre_reset;
+       ide_capacity_proc               *capacity;
+       ide_special_proc                *special;
+    /*ide_proc_entry_t         *proc;*/
+       ide_reinit_proc                 *reinit;
+       ata_prebuilder_proc             *ata_prebuilder;
+       atapi_prebuilder_proc           *atapi_prebuilder;
+} ide_driver_t;
+
+#define DRIVER(drive)          ((ide_driver_t *)((drive)->driver))
+
+/*
+ * IDE modules.
+ */
+#define IDE_CHIPSET_MODULE             0       /* not supported yet */
+#define IDE_PROBE_MODULE               1
+#define IDE_DRIVER_MODULE              2
+
+typedef int    (ide_module_init_proc)(void);
+
+typedef struct ide_module_s {
+       int                             type;
+       ide_module_init_proc            *init;
+       void                            *info;
+       struct ide_module_s             *next;
+} ide_module_t;
+
+/*
+ * ide_hwifs[] is the master data structure used to keep track
+ * of just about everything in ide.c.  Whenever possible, routines
+ * should be using pointers to a drive (ide_drive_t *) or
+ * pointers to a hwif (ide_hwif_t *), rather than indexing this
+ * structure directly (the allocation/layout may change!).
+ *
+ */
+#ifndef _IDE_C
+extern ide_hwif_t      ide_hwifs[];            /* master data repository */
+extern ide_module_t    *ide_modules;
+extern ide_module_t    *ide_probe;
+#endif
+extern int noautodma;
+
+/*
+ * We need blk.h, but we replace its end_request by our own version.
+ */
+#define IDE_DRIVER             /* Toggle some magic bits in blk.h */
+#define LOCAL_END_REQUEST      /* Don't generate end_request in blk.h */
+#include <xeno/blk.h>
+
+void ide_end_request(byte uptodate, ide_hwgroup_t *hwgroup);
+
+/*
+ * This is used for (nearly) all data transfers from/to the IDE interface
+ * FIXME for 2.5, to a pointer pass verses memcpy........
+ */
+void ide_input_data (ide_drive_t *drive, void *buffer, unsigned int wcount);
+void ide_output_data (ide_drive_t *drive, void *buffer, unsigned int wcount);
+
+/*
+ * This is used for (nearly) all ATAPI data transfers from/to the IDE interface
+ * FIXME for 2.5, to a pointer pass verses memcpy........
+ */
+void atapi_input_bytes (ide_drive_t *drive, void *buffer, unsigned int bytecount);
+void atapi_output_bytes (ide_drive_t *drive, void *buffer, unsigned int bytecount);
+
+int drive_is_ready (ide_drive_t *drive);
+
+/*
+ * This is used on exit from the driver, to designate the next irq handler
+ * and also to start the safety timer.
+ */
+void ide_set_handler (ide_drive_t *drive, ide_handler_t *handler, unsigned int timeout, ide_expiry_t *expiry);
+
+/*
+ * Error reporting, in human readable form (luxurious, but a memory hog).
+ */
+byte ide_dump_status (ide_drive_t *drive, const char *msg, byte stat);
+
+/*
+ * ide_error() takes action based on the error returned by the controller.
+ * The caller should return immediately after invoking this.
+ */
+ide_startstop_t ide_error (ide_drive_t *drive, const char *msg, byte stat);
+
+/*
+ * Issue a simple drive command
+ * The drive must be selected beforehand.
+ */
+void ide_cmd (ide_drive_t *drive, byte cmd, byte nsect, ide_handler_t *handler);
+
+/*
+ * ide_fixstring() cleans up and (optionally) byte-swaps a text string,
+ * removing leading/trailing blanks and compressing internal blanks.
+ * It is primarily used to tidy up the model name/number fields as
+ * returned by the WIN_[P]IDENTIFY commands.
+ */
+void ide_fixstring (byte *s, const int bytecount, const int byteswap);
+
+/*
+ * This routine busy-waits for the drive status to be not "busy".
+ * It then checks the status for all of the "good" bits and none
+ * of the "bad" bits, and if all is okay it returns 0.  All other
+ * cases return 1 after doing "*startstop = ide_error()", and the
+ * caller should return the updated value of "startstop" in this case.
+ * "startstop" is unchanged when the function returns 0;
+ */
+int ide_wait_stat (ide_startstop_t *startstop, ide_drive_t *drive, byte good, byte bad, unsigned long timeout);
+
+int ide_wait_noerr (ide_drive_t *drive, byte good, byte bad, unsigned long timeout);
+
+/*
+ * This routine is called from the partition-table code in genhd.c
+ * to "convert" a drive to a logical geometry with fewer than 1024 cyls.
+ */
+int ide_xlate_1024 (kdev_t, int, int, const char *);
+
+/*
+ * Convert kdev_t structure into ide_drive_t * one.
+ */
+ide_drive_t *get_info_ptr (kdev_t i_rdev);
+
+/*
+ * Return the current idea about the total capacity of this drive.
+ */
+unsigned long current_capacity (ide_drive_t *drive);
+
+/*
+ * Start a reset operation for an IDE interface.
+ * The caller should return immediately after invoking this.
+ */
+ide_startstop_t ide_do_reset (ide_drive_t *);
+
+/*
+ * Re-Start an operation for an IDE interface.
+ * The caller should return immediately after invoking this.
+ */
+ide_startstop_t restart_request (ide_drive_t *);
+
+/*
+ * This function is intended to be used prior to invoking ide_do_drive_cmd().
+ */
+void ide_init_drive_cmd (struct request *rq);
+
+/*
+ * "action" parameter type for ide_do_drive_cmd() below.
+ */
+typedef enum {
+       ide_wait,       /* insert rq at end of list, and wait for it */
+       ide_next,       /* insert rq immediately after current request */
+       ide_preempt,    /* insert rq in front of current request */
+       ide_end         /* insert rq at end of list, but don't wait for it */
+} ide_action_t;
+
+/*
+ * This function issues a special IDE device request
+ * onto the request queue.
+ *
+ * If action is ide_wait, then the rq is queued at the end of the
+ * request queue, and the function sleeps until it has been processed.
+ * This is for use when invoked from an ioctl handler.
+ *
+ * If action is ide_preempt, then the rq is queued at the head of
+ * the request queue, displacing the currently-being-processed
+ * request and this function returns immediately without waiting
+ * for the new rq to be completed.  This is VERY DANGEROUS, and is
+ * intended for careful use by the ATAPI tape/cdrom driver code.
+ *
+ * If action is ide_next, then the rq is queued immediately after
+ * the currently-being-processed-request (if any), and the function
+ * returns without waiting for the new rq to be completed.  As above,
+ * This is VERY DANGEROUS, and is intended for careful use by the
+ * ATAPI tape/cdrom driver code.
+ *
+ * If action is ide_end, then the rq is queued at the end of the
+ * request queue, and the function returns immediately without waiting
+ * for the new rq to be completed. This is again intended for careful
+ * use by the ATAPI tape/cdrom driver code.
+ */
+int ide_do_drive_cmd (ide_drive_t *drive, struct request *rq, ide_action_t action);
+
+/*
+ * Clean up after success/failure of an explicit drive cmd.
+ * stat/err are used only when (HWGROUP(drive)->rq->cmd == IDE_DRIVE_CMD).
+ * stat/err are used only when (HWGROUP(drive)->rq->cmd == IDE_DRIVE_TASK_MASK).
+ */
+void ide_end_drive_cmd (ide_drive_t *drive, byte stat, byte err);
+
+/*
+ * Issue ATA command and wait for completion. use for implementing commands in kernel
+ */
+int ide_wait_cmd (ide_drive_t *drive, int cmd, int nsect, int feature, int sectors, byte *buf);
+
+int ide_wait_cmd_task (ide_drive_t *drive, byte *buf);
+typedef struct ide_task_s {
+       task_ioreg_t            tfRegister[8];
+       task_ioreg_t            hobRegister[8];
+       ide_reg_valid_t         tf_out_flags;
+       ide_reg_valid_t         tf_in_flags;
+       int                     data_phase;
+       int                     command_type;
+       ide_pre_handler_t       *prehandler;
+       ide_handler_t           *handler;
+       ide_post_handler_t      *posthandler;
+       void                    *special;       /* valid_t generally */
+       struct request          *rq;            /* copy of request */
+       unsigned long           block;          /* copy of block */
+} ide_task_t;
+
+typedef struct pkt_task_s {
+       task_ioreg_t            tfRegister[8];
+       int                     data_phase;
+       int                     command_type;
+       ide_handler_t           *handler;
+       void                    *special;
+       struct request          *rq;            /* copy of request */
+       unsigned long           block;          /* copy of block */
+} pkt_task_t;
+
+/*
+ * taskfile io for disks for now...
+ */
+ide_startstop_t do_rw_taskfile (ide_drive_t *drive, ide_task_t *task);
+
+/*
+ * Builds request from ide_ioctl
+ */
+void do_taskfile (ide_drive_t *drive, struct hd_drive_task_hdr *taskfile, struct hd_drive_hob_hdr *hobfile, ide_handler_t *handler);
+
+/*
+ * Special Flagged Register Validation Caller
+ */
+// ide_startstop_t flagged_taskfile (ide_drive_t *drive, ide_task_t *task);
+
+ide_startstop_t set_multmode_intr (ide_drive_t *drive);
+ide_startstop_t set_geometry_intr (ide_drive_t *drive);
+ide_startstop_t recal_intr (ide_drive_t *drive);
+ide_startstop_t task_no_data_intr (ide_drive_t *drive);
+ide_startstop_t task_in_intr (ide_drive_t *drive);
+ide_startstop_t task_mulin_intr (ide_drive_t *drive);
+ide_startstop_t pre_task_out_intr (ide_drive_t *drive, struct request *rq);
+ide_startstop_t task_out_intr (ide_drive_t *drive);
+ide_startstop_t task_mulout_intr (ide_drive_t *drive);
+void ide_init_drive_taskfile (struct request *rq);
+
+int ide_wait_taskfile (ide_drive_t *drive, struct hd_drive_task_hdr *taskfile, struct hd_drive_hob_hdr *hobfile, byte *buf);
+
+int ide_raw_taskfile (ide_drive_t *drive, ide_task_t *cmd, byte *buf);
+
+ide_pre_handler_t * ide_pre_handler_parser (struct hd_drive_task_hdr *taskfile, struct hd_drive_hob_hdr *hobfile);
+ide_handler_t * ide_handler_parser (struct hd_drive_task_hdr *taskfile, struct hd_drive_hob_hdr *hobfile);
+/* Expects args is a full set of TF registers and parses the command type */
+int ide_cmd_type_parser (ide_task_t *args);
+
+int ide_taskfile_ioctl (ide_drive_t *drive, struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg);
+
+#ifdef CONFIG_PKT_TASK_IOCTL
+int pkt_taskfile_ioctl (ide_drive_t *drive, struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg);
+#endif /* CONFIG_PKT_TASK_IOCTL */
+
+void ide_delay_50ms (void);
+int system_bus_clock(void);
+
+byte ide_auto_reduce_xfer (ide_drive_t *drive);
+int ide_driveid_update (ide_drive_t *drive);
+int ide_ata66_check (ide_drive_t *drive, ide_task_t *args);
+int ide_config_drive_speed (ide_drive_t *drive, byte speed);
+byte eighty_ninty_three (ide_drive_t *drive);
+int set_transfer (ide_drive_t *drive, ide_task_t *args);
+
+/*
+ * ide_system_bus_speed() returns what we think is the system VESA/PCI
+ * bus speed (in MHz).  This is used for calculating interface PIO timings.
+ * The default is 40 for known PCI systems, 50 otherwise.
+ * The "idebus=xx" parameter can be used to override this value.
+ */
+int ide_system_bus_speed (void);
+
+/*
+ * ide_multwrite() transfers a block of up to mcount sectors of data
+ * to a drive as part of a disk multwrite operation.
+ */
+int ide_multwrite (ide_drive_t *drive, unsigned int mcount);
+
+/*
+ * ide_stall_queue() can be used by a drive to give excess bandwidth back
+ * to the hwgroup by sleeping for timeout jiffies.
+ */
+void ide_stall_queue (ide_drive_t *drive, unsigned long timeout);
+
+/*
+ * ide_get_queue() returns the queue which corresponds to a given device.
+ */
+request_queue_t *ide_get_queue (kdev_t dev);
+
+/*
+ * CompactFlash cards and their brethern pretend to be removable hard disks,
+ * but they never have a slave unit, and they don't have doorlock mechanisms.
+ * This test catches them, and is invoked elsewhere when setting appropriate config bits.
+ */
+int drive_is_flashcard (ide_drive_t *drive);
+
+int ide_spin_wait_hwgroup (ide_drive_t *drive);
+void ide_timer_expiry (unsigned long data);
+void ide_intr (int irq, void *dev_id, struct pt_regs *regs);
+void do_ide_request (request_queue_t * q);
+void ide_init_subdrivers (void);
+
+#ifndef _IDE_C
+extern struct block_device_operations ide_fops[];
+/*extern ide_proc_entry_t generic_subdriver_entries[];*/
+#endif
+
+int ide_reinit_drive (ide_drive_t *drive);
+
+#ifdef _IDE_C
+#ifdef CONFIG_BLK_DEV_IDE
+int ideprobe_init (void);
+#endif /* CONFIG_BLK_DEV_IDE */
+#ifdef CONFIG_BLK_DEV_IDEDISK
+int idedisk_reinit (ide_drive_t *drive);
+int idedisk_init (void);
+#endif /* CONFIG_BLK_DEV_IDEDISK */
+#ifdef CONFIG_BLK_DEV_IDECD
+int ide_cdrom_reinit (ide_drive_t *drive);
+int ide_cdrom_init (void);
+#endif /* CONFIG_BLK_DEV_IDECD */
+#ifdef CONFIG_BLK_DEV_IDETAPE
+int idetape_reinit (ide_drive_t *drive);
+int idetape_init (void);
+#endif /* CONFIG_BLK_DEV_IDETAPE */
+#ifdef CONFIG_BLK_DEV_IDEFLOPPY
+int idefloppy_reinit (ide_drive_t *drive);
+int idefloppy_init (void);
+#endif /* CONFIG_BLK_DEV_IDEFLOPPY */
+#ifdef CONFIG_BLK_DEV_IDESCSI
+int idescsi_reinit (ide_drive_t *drive);
+int idescsi_init (void);
+#endif /* CONFIG_BLK_DEV_IDESCSI */
+#endif /* _IDE_C */
+
+int ide_register_module (ide_module_t *module);
+void ide_unregister_module (ide_module_t *module);
+ide_drive_t *ide_scan_devices (byte media, const char *name, ide_driver_t *driver, int n);
+int ide_register_subdriver (ide_drive_t *drive, ide_driver_t *driver, int version);
+int ide_unregister_subdriver (ide_drive_t *drive);
+int ide_replace_subdriver(ide_drive_t *drive, const char *driver);
+
+#ifdef CONFIG_BLK_DEV_IDEPCI
+#define ON_BOARD               1
+#define NEVER_BOARD            0
+#ifdef CONFIG_BLK_DEV_OFFBOARD
+#  define OFF_BOARD            ON_BOARD
+#else /* CONFIG_BLK_DEV_OFFBOARD */
+#  define OFF_BOARD            NEVER_BOARD
+#endif /* CONFIG_BLK_DEV_OFFBOARD */
+
+unsigned long ide_find_free_region (unsigned short size) __init;
+void ide_scan_pcibus (int scan_direction) __init;
+#endif
+#ifdef CONFIG_BLK_DEV_IDEDMA
+#define BAD_DMA_DRIVE          0
+#define GOOD_DMA_DRIVE         1
+int ide_build_dmatable (ide_drive_t *drive, ide_dma_action_t func);
+void ide_destroy_dmatable (ide_drive_t *drive);
+ide_startstop_t ide_dma_intr (ide_drive_t *drive);
+int check_drive_lists (ide_drive_t *drive, int good_bad);
+int report_drive_dmaing (ide_drive_t *drive);
+int ide_dmaproc (ide_dma_action_t func, ide_drive_t *drive);
+int ide_release_dma (ide_hwif_t *hwif);
+void ide_setup_dma (ide_hwif_t *hwif, unsigned long dmabase, unsigned int num_ports) __init;
+unsigned long ide_get_or_set_dma_base (ide_hwif_t *hwif, int extra, const char *name) __init;
+#endif
+
+void hwif_unregister (ide_hwif_t *hwif);
+
+void export_ide_init_queue (ide_drive_t *drive);
+byte export_probe_for_drive (ide_drive_t *drive);
+
+#endif /* _IDE_H */
diff --git a/xen-2.4.16/include/xeno/if.h b/xen-2.4.16/include/xeno/if.h
new file mode 100644 (file)
index 0000000..8d3fc2b
--- /dev/null
@@ -0,0 +1,141 @@
+/*
+ * INET                An implementation of the TCP/IP protocol suite for the LINUX
+ *             operating system.  INET is implemented using the  BSD Socket
+ *             interface as the means of communication with the user level.
+ *
+ *             Global definitions for the INET interface module.
+ *
+ * Version:    @(#)if.h        1.0.2   04/18/93
+ *
+ * Authors:    Original taken from Berkeley UNIX 4.3, (c) UCB 1982-1988
+ *             Ross Biro, <bir7@leland.Stanford.Edu>
+ *             Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
+ *
+ *             This program is free software; you can redistribute it and/or
+ *             modify it under the terms of the GNU General Public License
+ *             as published by the Free Software Foundation; either version
+ *             2 of the License, or (at your option) any later version.
+ */
+#ifndef _LINUX_IF_H
+#define _LINUX_IF_H
+
+#include <linux/types.h>               /* for "__kernel_caddr_t" et al */
+#include <linux/socket.h>              /* for "struct sockaddr" et al  */
+
+/* Standard interface flags (netdevice->flags). */
+#define        IFF_UP          0x1             /* interface is up              */
+#define        IFF_BROADCAST   0x2             /* broadcast address valid      */
+#define        IFF_DEBUG       0x4             /* turn on debugging            */
+#define        IFF_LOOPBACK    0x8             /* is a loopback net            */
+#define        IFF_POINTOPOINT 0x10            /* interface is has p-p link    */
+#define        IFF_NOTRAILERS  0x20            /* avoid use of trailers        */
+#define        IFF_RUNNING     0x40            /* resources allocated          */
+#define        IFF_NOARP       0x80            /* no ARP protocol              */
+#define        IFF_PROMISC     0x100           /* receive all packets          */
+#define        IFF_ALLMULTI    0x200           /* receive all multicast packets*/
+
+#define IFF_MASTER     0x400           /* master of a load balancer    */
+#define IFF_SLAVE      0x800           /* slave of a load balancer     */
+
+#define IFF_MULTICAST  0x1000          /* Supports multicast           */
+
+#define IFF_VOLATILE   (IFF_LOOPBACK|IFF_POINTOPOINT|IFF_BROADCAST|IFF_MASTER|IFF_SLAVE|IFF_RUNNING)
+
+#define IFF_PORTSEL    0x2000          /* can set media type           */
+#define IFF_AUTOMEDIA  0x4000          /* auto media select active     */
+#define IFF_DYNAMIC    0x8000          /* dialup device with changing addresses*/
+
+/* Private (from user) interface flags (netdevice->priv_flags). */
+#define IFF_802_1Q_VLAN 0x1             /* 802.1Q VLAN device.          */
+
+/*
+ *     Device mapping structure. I'd just gone off and designed a 
+ *     beautiful scheme using only loadable modules with arguments
+ *     for driver options and along come the PCMCIA people 8)
+ *
+ *     Ah well. The get() side of this is good for WDSETUP, and it'll
+ *     be handy for debugging things. The set side is fine for now and
+ *     being very small might be worth keeping for clean configuration.
+ */
+
+struct ifmap 
+{
+       unsigned long mem_start;
+       unsigned long mem_end;
+       unsigned short base_addr; 
+       unsigned char irq;
+       unsigned char dma;
+       unsigned char port;
+       /* 3 bytes spare */
+};
+
+/*`
+ * Interface request structure used for socket
+ * ioctl's.  All interface ioctl's must have parameter
+ * definitions which begin with ifr_name.  The
+ * remainder may be interface specific.
+ */
+
+struct ifreq 
+{
+#define IFHWADDRLEN    6
+#define        IFNAMSIZ        16
+       union
+       {
+               char    ifrn_name[IFNAMSIZ];            /* if name, e.g. "en0" */
+       } ifr_ifrn;
+       
+       union {
+               struct  sockaddr ifru_addr;
+               struct  sockaddr ifru_dstaddr;
+               struct  sockaddr ifru_broadaddr;
+               struct  sockaddr ifru_netmask;
+               struct  sockaddr ifru_hwaddr;
+               short   ifru_flags;
+               int     ifru_ivalue;
+               int     ifru_mtu;
+               struct  ifmap ifru_map;
+               char    ifru_slave[IFNAMSIZ];   /* Just fits the size */
+               char    ifru_newname[IFNAMSIZ];
+               char *  ifru_data;
+       } ifr_ifru;
+};
+
+#define ifr_name       ifr_ifrn.ifrn_name      /* interface name       */
+#define ifr_hwaddr     ifr_ifru.ifru_hwaddr    /* MAC address          */
+#define        ifr_addr        ifr_ifru.ifru_addr      /* address              */
+#define        ifr_dstaddr     ifr_ifru.ifru_dstaddr   /* other end of p-p lnk */
+#define        ifr_broadaddr   ifr_ifru.ifru_broadaddr /* broadcast address    */
+#define        ifr_netmask     ifr_ifru.ifru_netmask   /* interface net mask   */
+#define        ifr_flags       ifr_ifru.ifru_flags     /* flags                */
+#define        ifr_metric      ifr_ifru.ifru_ivalue    /* metric               */
+#define        ifr_mtu         ifr_ifru.ifru_mtu       /* mtu                  */
+#define ifr_map                ifr_ifru.ifru_map       /* device map           */
+#define ifr_slave      ifr_ifru.ifru_slave     /* slave device         */
+#define        ifr_data        ifr_ifru.ifru_data      /* for use by interface */
+#define ifr_ifindex    ifr_ifru.ifru_ivalue    /* interface index      */
+#define ifr_bandwidth  ifr_ifru.ifru_ivalue    /* link bandwidth       */
+#define ifr_qlen       ifr_ifru.ifru_ivalue    /* Queue length         */
+#define ifr_newname    ifr_ifru.ifru_newname   /* New name             */
+
+/*
+ * Structure used in SIOCGIFCONF request.
+ * Used to retrieve interface configuration
+ * for machine (useful for programs which
+ * must know all networks accessible).
+ */
+
+struct ifconf 
+{
+       int     ifc_len;                        /* size of buffer       */
+       union 
+       {
+               char *                  ifcu_buf;
+               struct  ifreq           *ifcu_req;
+       } ifc_ifcu;
+};
+#define        ifc_buf ifc_ifcu.ifcu_buf               /* buffer address       */
+#define        ifc_req ifc_ifcu.ifcu_req               /* array of structures  */
+
+
+#endif /* _LINUX_IF_H */
diff --git a/xen-2.4.16/include/xeno/if_ether.h b/xen-2.4.16/include/xeno/if_ether.h
new file mode 100644 (file)
index 0000000..b64559d
--- /dev/null
@@ -0,0 +1,100 @@
+/*
+ * INET                An implementation of the TCP/IP protocol suite for the LINUX
+ *             operating system.  INET is implemented using the  BSD Socket
+ *             interface as the means of communication with the user level.
+ *
+ *             Global definitions for the Ethernet IEEE 802.3 interface.
+ *
+ * Version:    @(#)if_ether.h  1.0.1a  02/08/94
+ *
+ * Author:     Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
+ *             Donald Becker, <becker@super.org>
+ *             Alan Cox, <alan@redhat.com>
+ *             Steve Whitehouse, <gw7rrm@eeshack3.swan.ac.uk>
+ *
+ *             This program is free software; you can redistribute it and/or
+ *             modify it under the terms of the GNU General Public License
+ *             as published by the Free Software Foundation; either version
+ *             2 of the License, or (at your option) any later version.
+ */
+#ifndef _LINUX_IF_ETHER_H
+#define _LINUX_IF_ETHER_H
+
+/*
+ *     IEEE 802.3 Ethernet magic constants.  The frame sizes omit the preamble
+ *     and FCS/CRC (frame check sequence). 
+ */
+
+#define ETH_ALEN       6               /* Octets in one ethernet addr   */
+#define ETH_HLEN       14              /* Total octets in header.       */
+#define ETH_ZLEN       60              /* Min. octets in frame sans FCS */
+#define ETH_DATA_LEN   1500            /* Max. octets in payload        */
+#define ETH_FRAME_LEN  1514            /* Max. octets in frame sans FCS */
+
+/*
+ *     These are the defined Ethernet Protocol ID's.
+ */
+
+#define ETH_P_LOOP     0x0060          /* Ethernet Loopback packet     */
+#define ETH_P_PUP      0x0200          /* Xerox PUP packet             */
+#define ETH_P_PUPAT    0x0201          /* Xerox PUP Addr Trans packet  */
+#define ETH_P_IP       0x0800          /* Internet Protocol packet     */
+#define ETH_P_X25      0x0805          /* CCITT X.25                   */
+#define ETH_P_ARP      0x0806          /* Address Resolution packet    */
+#define        ETH_P_BPQ       0x08FF          /* G8BPQ AX.25 Ethernet Packet  [ NOT AN OFFICIALLY REGISTERED ID ] */
+#define ETH_P_IEEEPUP  0x0a00          /* Xerox IEEE802.3 PUP packet */
+#define ETH_P_IEEEPUPAT        0x0a01          /* Xerox IEEE802.3 PUP Addr Trans packet */
+#define ETH_P_DEC       0x6000          /* DEC Assigned proto           */
+#define ETH_P_DNA_DL    0x6001          /* DEC DNA Dump/Load            */
+#define ETH_P_DNA_RC    0x6002          /* DEC DNA Remote Console       */
+#define ETH_P_DNA_RT    0x6003          /* DEC DNA Routing              */
+#define ETH_P_LAT       0x6004          /* DEC LAT                      */
+#define ETH_P_DIAG      0x6005          /* DEC Diagnostics              */
+#define ETH_P_CUST      0x6006          /* DEC Customer use             */
+#define ETH_P_SCA       0x6007          /* DEC Systems Comms Arch       */
+#define ETH_P_RARP      0x8035         /* Reverse Addr Res packet      */
+#define ETH_P_ATALK    0x809B          /* Appletalk DDP                */
+#define ETH_P_AARP     0x80F3          /* Appletalk AARP               */
+#define ETH_P_8021Q    0x8100          /* 802.1Q VLAN Extended Header  */
+#define ETH_P_IPX      0x8137          /* IPX over DIX                 */
+#define ETH_P_IPV6     0x86DD          /* IPv6 over bluebook           */
+#define ETH_P_PPP_DISC 0x8863          /* PPPoE discovery messages     */
+#define ETH_P_PPP_SES  0x8864          /* PPPoE session messages       */
+#define ETH_P_ATMMPOA  0x884c          /* MultiProtocol Over ATM       */
+#define ETH_P_ATMFATE  0x8884          /* Frame-based ATM Transport
+                                        * over Ethernet
+                                        */
+
+/*
+ *     Non DIX types. Won't clash for 1500 types.
+ */
+#define ETH_P_802_3    0x0001          /* Dummy type for 802.3 frames  */
+#define ETH_P_AX25     0x0002          /* Dummy protocol id for AX.25  */
+#define ETH_P_ALL      0x0003          /* Every packet (be careful!!!) */
+#define ETH_P_802_2    0x0004          /* 802.2 frames                 */
+#define ETH_P_SNAP     0x0005          /* Internal only                */
+#define ETH_P_DDCMP     0x0006          /* DEC DDCMP: Internal only     */
+#define ETH_P_WAN_PPP   0x0007          /* Dummy type for WAN PPP frames*/
+#define ETH_P_PPP_MP    0x0008          /* Dummy type for PPP MP frames */
+#define ETH_P_LOCALTALK 0x0009         /* Localtalk pseudo type        */
+#define ETH_P_PPPTALK  0x0010          /* Dummy type for Atalk over PPP*/
+#define ETH_P_TR_802_2 0x0011          /* 802.2 frames                 */
+#define ETH_P_MOBITEX  0x0015          /* Mobitex (kaz@cafe.net)       */
+#define ETH_P_CONTROL  0x0016          /* Card specific control frames */
+#define ETH_P_IRDA     0x0017          /* Linux-IrDA                   */
+#define ETH_P_ECONET   0x0018          /* Acorn Econet                 */
+
+/*
+ *     This is an Ethernet frame header.
+ */
+struct ethhdr 
+{
+       unsigned char   h_dest[ETH_ALEN];       /* destination eth addr */
+       unsigned char   h_source[ETH_ALEN];     /* source ether addr    */
+       unsigned short  h_proto;                /* packet type ID field */
+};
+
+#endif /* _LINUX_IF_ETHER_H */
diff --git a/xen-2.4.16/include/xeno/if_packet.h b/xen-2.4.16/include/xeno/if_packet.h
new file mode 100644 (file)
index 0000000..b925585
--- /dev/null
@@ -0,0 +1,102 @@
+#ifndef __LINUX_IF_PACKET_H
+#define __LINUX_IF_PACKET_H
+
+struct sockaddr_pkt
+{
+       unsigned short spkt_family;
+       unsigned char spkt_device[14];
+       unsigned short spkt_protocol;
+};
+
+struct sockaddr_ll
+{
+       unsigned short  sll_family;
+       unsigned short  sll_protocol;
+       int             sll_ifindex;
+       unsigned short  sll_hatype;
+       unsigned char   sll_pkttype;
+       unsigned char   sll_halen;
+       unsigned char   sll_addr[8];
+};
+
+/* Packet types */
+
+#define PACKET_HOST            0               /* To us                */
+#define PACKET_BROADCAST       1               /* To all               */
+#define PACKET_MULTICAST       2               /* To group             */
+#define PACKET_OTHERHOST       3               /* To someone else      */
+#define PACKET_OUTGOING                4               /* Outgoing of any type */
+/* These ones are invisible by user level */
+#define PACKET_LOOPBACK                5               /* MC/BRD frame looped back */
+#define PACKET_FASTROUTE       6               /* Fastrouted frame     */
+
+/* Packet socket options */
+
+#define PACKET_ADD_MEMBERSHIP          1
+#define PACKET_DROP_MEMBERSHIP         2
+#define PACKET_RECV_OUTPUT             3
+/* Value 4 is still used by obsolete turbo-packet. */
+#define PACKET_RX_RING                 5
+#define PACKET_STATISTICS              6
+#define PACKET_COPY_THRESH             7
+
+struct tpacket_stats
+{
+       unsigned int    tp_packets;
+       unsigned int    tp_drops;
+};
+
+struct tpacket_hdr
+{
+       unsigned long   tp_status;
+#define TP_STATUS_KERNEL       0
+#define TP_STATUS_USER         1
+#define TP_STATUS_COPY         2
+#define TP_STATUS_LOSING       4
+#define TP_STATUS_CSUMNOTREADY 8
+       unsigned int    tp_len;
+       unsigned int    tp_snaplen;
+       unsigned short  tp_mac;
+       unsigned short  tp_net;
+       unsigned int    tp_sec;
+       unsigned int    tp_usec;
+};
+
+#define TPACKET_ALIGNMENT      16
+#define TPACKET_ALIGN(x)       (((x)+TPACKET_ALIGNMENT-1)&~(TPACKET_ALIGNMENT-1))
+#define TPACKET_HDRLEN         (TPACKET_ALIGN(sizeof(struct tpacket_hdr)) + sizeof(struct sockaddr_ll))
+
+/*
+   Frame structure:
+
+   - Start. Frame must be aligned to TPACKET_ALIGNMENT=16
+   - struct tpacket_hdr
+   - pad to TPACKET_ALIGNMENT=16
+   - struct sockaddr_ll
+   - Gap, chosen so that packet data (Start+tp_net) alignes to TPACKET_ALIGNMENT=16
+   - Start+tp_mac: [ Optional MAC header ]
+   - Start+tp_net: Packet data, aligned to TPACKET_ALIGNMENT=16.
+   - Pad to align to TPACKET_ALIGNMENT=16
+ */
+
+struct tpacket_req
+{
+       unsigned int    tp_block_size;  /* Minimal size of contiguous block */
+       unsigned int    tp_block_nr;    /* Number of blocks */
+       unsigned int    tp_frame_size;  /* Size of frame */
+       unsigned int    tp_frame_nr;    /* Total number of frames */
+};
+
+struct packet_mreq
+{
+       int             mr_ifindex;
+       unsigned short  mr_type;
+       unsigned short  mr_alen;
+       unsigned char   mr_address[8];
+};
+
+#define PACKET_MR_MULTICAST    0
+#define PACKET_MR_PROMISC      1
+#define PACKET_MR_ALLMULTI     2
+
+#endif
diff --git a/xen-2.4.16/include/xeno/init.h b/xen-2.4.16/include/xeno/init.h
new file mode 100644 (file)
index 0000000..5c4477f
--- /dev/null
@@ -0,0 +1,170 @@
+#ifndef _LINUX_INIT_H
+#define _LINUX_INIT_H
+
+#include <linux/config.h>
+
+/* These macros are used to mark some functions or 
+ * initialized data (doesn't apply to uninitialized data)
+ * as `initialization' functions. The kernel can take this
+ * as hint that the function is used only during the initialization
+ * phase and free up used memory resources after
+ *
+ * Usage:
+ * For functions:
+ * 
+ * You should add __init immediately before the function name, like:
+ *
+ * static void __init initme(int x, int y)
+ * {
+ *    extern int z; z = x * y;
+ * }
+ *
+ * If the function has a prototype somewhere, you can also add
+ * __init between closing brace of the prototype and semicolon:
+ *
+ * extern int initialize_foobar_device(int, int, int) __init;
+ *
+ * For initialized data:
+ * You should insert __initdata between the variable name and equal
+ * sign followed by value, e.g.:
+ *
+ * static int init_variable __initdata = 0;
+ * static char linux_logo[] __initdata = { 0x32, 0x36, ... };
+ *
+ * Don't forget to initialize data not at file scope, i.e. within a function,
+ * as gcc otherwise puts the data into the bss section and not into the init
+ * section.
+ * 
+ * Also note, that this data cannot be "const".
+ */
+
+#ifndef MODULE
+
+#ifndef __ASSEMBLY__
+
+/*
+ * Used for initialization calls..
+ */
+typedef int (*initcall_t)(void);
+typedef void (*exitcall_t)(void);
+
+extern initcall_t __initcall_start, __initcall_end;
+
+#define __initcall(fn)                                                         \
+       static initcall_t __initcall_##fn __init_call = fn
+#define __exitcall(fn)                                                         \
+       static exitcall_t __exitcall_##fn __exit_call = fn
+
+/*
+ * Used for kernel command line parameter setup
+ */
+struct kernel_param {
+       const char *str;
+       int (*setup_func)(char *);
+};
+
+extern struct kernel_param __setup_start, __setup_end;
+
+#define __setup(str, fn)                                                               \
+       static char __setup_str_##fn[] __initdata = str;                                \
+       static struct kernel_param __setup_##fn __attribute__((unused)) __initsetup = { __setup_str_##fn, fn }
+
+#endif /* __ASSEMBLY__ */
+
+/*
+ * Mark functions and data as being only used at initialization
+ * or exit time.
+ */
+#define __init         __attribute__ ((__section__ (".text.init")))
+#define __exit         __attribute__ ((unused, __section__(".text.exit")))
+#define __initdata     __attribute__ ((__section__ (".data.init")))
+#define __exitdata     __attribute__ ((unused, __section__ (".data.exit")))
+#define __initsetup    __attribute__ ((unused,__section__ (".setup.init")))
+#define __init_call    __attribute__ ((unused,__section__ (".initcall.init")))
+#define __exit_call    __attribute__ ((unused,__section__ (".exitcall.exit")))
+
+/* For assembly routines */
+#define __INIT         .section        ".text.init","ax"
+#define __FINIT                .previous
+#define __INITDATA     .section        ".data.init","aw"
+
+/**
+ * module_init() - driver initialization entry point
+ * @x: function to be run at kernel boot time or module insertion
+ * 
+ * module_init() will add the driver initialization routine in
+ * the "__initcall.int" code segment if the driver is checked as
+ * "y" or static, or else it will wrap the driver initialization
+ * routine with init_module() which is used by insmod and
+ * modprobe when the driver is used as a module.
+ */
+#define module_init(x) __initcall(x);
+
+/**
+ * module_exit() - driver exit entry point
+ * @x: function to be run when driver is removed
+ * 
+ * module_exit() will wrap the driver clean-up code
+ * with cleanup_module() when used with rmmod when
+ * the driver is a module.  If the driver is statically
+ * compiled into the kernel, module_exit() has no effect.
+ */
+#define module_exit(x) __exitcall(x);
+
+#else
+
+#define __init
+#define __exit
+#define __initdata
+#define __exitdata
+#define __initcall(fn)
+/* For assembly routines */
+#define __INIT
+#define __FINIT
+#define __INITDATA
+
+/* These macros create a dummy inline: gcc 2.9x does not count alias
+ as usage, hence the `unused function' warning when __init functions
+ are declared static. We use the dummy __*_module_inline functions
+ both to kill the warning and check the type of the init/cleanup
+ function. */
+typedef int (*__init_module_func_t)(void);
+typedef void (*__cleanup_module_func_t)(void);
+#define module_init(x) \
+       int init_module(void) __attribute__((alias(#x))); \
+       static inline __init_module_func_t __init_module_inline(void) \
+       { return x; }
+#define module_exit(x) \
+       void cleanup_module(void) __attribute__((alias(#x))); \
+       static inline __cleanup_module_func_t __cleanup_module_inline(void) \
+       { return x; }
+
+#define __setup(str,func) /* nothing */
+
+#endif
+
+#ifdef CONFIG_HOTPLUG
+#define __devinit
+#define __devinitdata
+#define __devexit
+#define __devexitdata
+#else
+#define __devinit __init
+#define __devinitdata __initdata
+#define __devexit __exit
+#define __devexitdata __exitdata
+#endif
+
+/* Functions marked as __devexit may be discarded at kernel link time, depending
+   on config options.  Newer versions of binutils detect references from
+   retained sections to discarded sections and flag an error.  Pointers to
+   __devexit functions must use __devexit_p(function_name), the wrapper will
+   insert either the function_name or NULL, depending on the config options.
+ */
+#if defined(MODULE) || defined(CONFIG_HOTPLUG)
+#define __devexit_p(x) x
+#else
+#define __devexit_p(x) NULL
+#endif
+
+#endif /* _LINUX_INIT_H */
diff --git a/xen-2.4.16/include/xeno/interrupt.h b/xen-2.4.16/include/xeno/interrupt.h
new file mode 100644 (file)
index 0000000..0102f3e
--- /dev/null
@@ -0,0 +1,267 @@
+/* interrupt.h */
+#ifndef _LINUX_INTERRUPT_H
+#define _LINUX_INTERRUPT_H
+
+#include <linux/config.h>
+//#include <linux/kernel.h>
+#include <linux/smp.h>
+#include <linux/cache.h>
+
+#include <asm/bitops.h>
+#include <asm/atomic.h>
+#include <asm/ptrace.h>
+
+struct irqaction {
+       void (*handler)(int, void *, struct pt_regs *);
+       unsigned long flags;
+       unsigned long mask;
+       const char *name;
+       void *dev_id;
+       struct irqaction *next;
+};
+
+
+/* Who gets which entry in bh_base.  Things which will occur most often
+   should come first */
+   
+enum {
+       TIMER_BH = 0,
+       TQUEUE_BH,
+       DIGI_BH,
+       SERIAL_BH,
+       RISCOM8_BH,
+       SPECIALIX_BH,
+       AURORA_BH,
+       ESP_BH,
+       SCSI_BH,
+       IMMEDIATE_BH,
+       CYCLADES_BH,
+       CM206_BH,
+       JS_BH,
+       MACSERIAL_BH,
+       ISICOM_BH
+};
+
+#include <asm/hardirq.h>
+#include <asm/softirq.h>
+
+
+
+/* PLEASE, avoid to allocate new softirqs, if you need not _really_ high
+   frequency threaded job scheduling. For almost all the purposes
+   tasklets are more than enough. F.e. all serial device BHs et
+   al. should be converted to tasklets, not to softirqs.
+ */
+
+enum
+{
+       HI_SOFTIRQ=0,
+       NET_TX_SOFTIRQ,
+       NET_RX_SOFTIRQ,
+       TASKLET_SOFTIRQ
+};
+
+/* softirq mask and active fields moved to irq_cpustat_t in
+ * asm/hardirq.h to get better cache usage.  KAO
+ */
+
+struct softirq_action
+{
+       void    (*action)(struct softirq_action *);
+       void    *data;
+};
+
+asmlinkage void do_softirq(void);
+extern void open_softirq(int nr, void (*action)(struct softirq_action*), void *data);
+extern void softirq_init(void);
+#define __cpu_raise_softirq(cpu, nr) do { softirq_pending(cpu) |= 1UL << (nr); } while (0)
+extern void FASTCALL(cpu_raise_softirq(unsigned int cpu, unsigned int nr));
+extern void FASTCALL(raise_softirq(unsigned int nr));
+
+
+
+/* Tasklets --- multithreaded analogue of BHs.
+
+   Main feature differing them of generic softirqs: tasklet
+   is running only on one CPU simultaneously.
+
+   Main feature differing them of BHs: different tasklets
+   may be run simultaneously on different CPUs.
+
+   Properties:
+   * If tasklet_schedule() is called, then tasklet is guaranteed
+     to be executed on some cpu at least once after this.
+   * If the tasklet is already scheduled, but its excecution is still not
+     started, it will be executed only once.
+   * If this tasklet is already running on another CPU (or schedule is called
+     from tasklet itself), it is rescheduled for later.
+   * Tasklet is strictly serialized wrt itself, but not
+     wrt another tasklets. If client needs some intertask synchronization,
+     he makes it with spinlocks.
+ */
+
+struct tasklet_struct
+{
+       struct tasklet_struct *next;
+       unsigned long state;
+       atomic_t count;
+       void (*func)(unsigned long);
+       unsigned long data;
+};
+
+#define DECLARE_TASKLET(name, func, data) \
+struct tasklet_struct name = { NULL, 0, ATOMIC_INIT(0), func, data }
+
+#define DECLARE_TASKLET_DISABLED(name, func, data) \
+struct tasklet_struct name = { NULL, 0, ATOMIC_INIT(1), func, data }
+
+
+enum
+{
+       TASKLET_STATE_SCHED,    /* Tasklet is scheduled for execution */
+       TASKLET_STATE_RUN       /* Tasklet is running (SMP only) */
+};
+
+struct tasklet_head
+{
+       struct tasklet_struct *list;
+} __attribute__ ((__aligned__(SMP_CACHE_BYTES)));
+
+extern struct tasklet_head tasklet_vec[NR_CPUS];
+extern struct tasklet_head tasklet_hi_vec[NR_CPUS];
+
+#ifdef CONFIG_SMP
+static inline int tasklet_trylock(struct tasklet_struct *t)
+{
+       return !test_and_set_bit(TASKLET_STATE_RUN, &(t)->state);
+}
+
+static inline void tasklet_unlock(struct tasklet_struct *t)
+{
+       smp_mb__before_clear_bit(); 
+       clear_bit(TASKLET_STATE_RUN, &(t)->state);
+}
+
+static inline void tasklet_unlock_wait(struct tasklet_struct *t)
+{
+       while (test_bit(TASKLET_STATE_RUN, &(t)->state)) { barrier(); }
+}
+#else
+#define tasklet_trylock(t) 1
+#define tasklet_unlock_wait(t) do { } while (0)
+#define tasklet_unlock(t) do { } while (0)
+#endif
+
+extern void FASTCALL(__tasklet_schedule(struct tasklet_struct *t));
+
+static inline void tasklet_schedule(struct tasklet_struct *t)
+{
+       if (!test_and_set_bit(TASKLET_STATE_SCHED, &t->state))
+               __tasklet_schedule(t);
+}
+
+extern void FASTCALL(__tasklet_hi_schedule(struct tasklet_struct *t));
+
+static inline void tasklet_hi_schedule(struct tasklet_struct *t)
+{
+       if (!test_and_set_bit(TASKLET_STATE_SCHED, &t->state))
+               __tasklet_hi_schedule(t);
+}
+
+
+static inline void tasklet_disable_nosync(struct tasklet_struct *t)
+{
+       atomic_inc(&t->count);
+       smp_mb__after_atomic_inc();
+}
+
+static inline void tasklet_disable(struct tasklet_struct *t)
+{
+       tasklet_disable_nosync(t);
+       tasklet_unlock_wait(t);
+       smp_mb();
+}
+
+static inline void tasklet_enable(struct tasklet_struct *t)
+{
+       smp_mb__before_atomic_dec();
+       atomic_dec(&t->count);
+}
+
+static inline void tasklet_hi_enable(struct tasklet_struct *t)
+{
+       smp_mb__before_atomic_dec();
+       atomic_dec(&t->count);
+}
+
+extern void tasklet_kill(struct tasklet_struct *t);
+extern void tasklet_init(struct tasklet_struct *t,
+                        void (*func)(unsigned long), unsigned long data);
+
+#ifdef CONFIG_SMP
+
+#define SMP_TIMER_NAME(name) name##__thr
+
+#define SMP_TIMER_DEFINE(name, task) \
+DECLARE_TASKLET(task, name##__thr, 0); \
+static void name (unsigned long dummy) \
+{ \
+       tasklet_schedule(&(task)); \
+}
+
+#else /* CONFIG_SMP */
+
+#define SMP_TIMER_NAME(name) name
+#define SMP_TIMER_DEFINE(name, task)
+
+#endif /* CONFIG_SMP */
+
+
+/* Old BH definitions */
+
+extern struct tasklet_struct bh_task_vec[];
+
+/* It is exported _ONLY_ for wait_on_irq(). */
+extern spinlock_t global_bh_lock;
+
+static inline void mark_bh(int nr)
+{
+       tasklet_hi_schedule(bh_task_vec+nr);
+}
+
+extern void init_bh(int nr, void (*routine)(void));
+extern void remove_bh(int nr);
+
+
+/*
+ * Autoprobing for irqs:
+ *
+ * probe_irq_on() and probe_irq_off() provide robust primitives
+ * for accurate IRQ probing during kernel initialization.  They are
+ * reasonably simple to use, are not "fooled" by spurious interrupts,
+ * and, unlike other attempts at IRQ probing, they do not get hung on
+ * stuck interrupts (such as unused PS2 mouse interfaces on ASUS boards).
+ *
+ * For reasonably foolproof probing, use them as follows:
+ *
+ * 1. clear and/or mask the device's internal interrupt.
+ * 2. sti();
+ * 3. irqs = probe_irq_on();      // "take over" all unassigned idle IRQs
+ * 4. enable the device and cause it to trigger an interrupt.
+ * 5. wait for the device to interrupt, using non-intrusive polling or a delay.
+ * 6. irq = probe_irq_off(irqs);  // get IRQ number, 0=none, negative=multiple
+ * 7. service the device to clear its pending interrupt.
+ * 8. loop again if paranoia is required.
+ *
+ * probe_irq_on() returns a mask of allocated irq's.
+ *
+ * probe_irq_off() takes the mask as a parameter,
+ * and returns the irq number which occurred,
+ * or zero if none occurred, or a negative irq number
+ * if more than one irq occurred.
+ */
+extern unsigned long probe_irq_on(void);       /* returns 0 on failure */
+extern int probe_irq_off(unsigned long);       /* returns 0 or negative on failure */
+extern unsigned int probe_irq_mask(unsigned long);     /* returns mask of ISA interrupts */
+
+#endif
diff --git a/xen-2.4.16/include/xeno/ioctl.h b/xen-2.4.16/include/xeno/ioctl.h
new file mode 100644 (file)
index 0000000..aa91eb3
--- /dev/null
@@ -0,0 +1,7 @@
+#ifndef _LINUX_IOCTL_H
+#define _LINUX_IOCTL_H
+
+#include <asm/ioctl.h>
+
+#endif /* _LINUX_IOCTL_H */
+
diff --git a/xen-2.4.16/include/xeno/ioport.h b/xen-2.4.16/include/xeno/ioport.h
new file mode 100644 (file)
index 0000000..7c35699
--- /dev/null
@@ -0,0 +1,120 @@
+/*
+ * ioport.h    Definitions of routines for detecting, reserving and
+ *             allocating system resources.
+ *
+ * Authors:    Linus Torvalds
+ */
+
+#ifndef _LINUX_IOPORT_H
+#define _LINUX_IOPORT_H
+
+/*
+ * Resources are tree-like, allowing
+ * nesting etc..
+ */
+struct resource {
+       const char *name;
+       unsigned long start, end;
+       unsigned long flags;
+       struct resource *parent, *sibling, *child;
+};
+
+struct resource_list {
+       struct resource_list *next;
+       struct resource *res;
+       struct pci_dev *dev;
+};
+
+/*
+ * IO resources have these defined flags.
+ */
+#define IORESOURCE_BITS                0x000000ff      /* Bus-specific bits */
+
+#define IORESOURCE_IO          0x00000100      /* Resource type */
+#define IORESOURCE_MEM         0x00000200
+#define IORESOURCE_IRQ         0x00000400
+#define IORESOURCE_DMA         0x00000800
+
+#define IORESOURCE_PREFETCH    0x00001000      /* No side effects */
+#define IORESOURCE_READONLY    0x00002000
+#define IORESOURCE_CACHEABLE   0x00004000
+#define IORESOURCE_RANGELENGTH 0x00008000
+#define IORESOURCE_SHADOWABLE  0x00010000
+#define IORESOURCE_BUS_HAS_VGA 0x00080000
+
+#define IORESOURCE_UNSET       0x20000000
+#define IORESOURCE_AUTO                0x40000000
+#define IORESOURCE_BUSY                0x80000000      /* Driver has marked this resource busy */
+
+/* ISA PnP IRQ specific bits (IORESOURCE_BITS) */
+#define IORESOURCE_IRQ_HIGHEDGE                (1<<0)
+#define IORESOURCE_IRQ_LOWEDGE         (1<<1)
+#define IORESOURCE_IRQ_HIGHLEVEL       (1<<2)
+#define IORESOURCE_IRQ_LOWLEVEL                (1<<3)
+
+/* ISA PnP DMA specific bits (IORESOURCE_BITS) */
+#define IORESOURCE_DMA_TYPE_MASK       (3<<0)
+#define IORESOURCE_DMA_8BIT            (0<<0)
+#define IORESOURCE_DMA_8AND16BIT       (1<<0)
+#define IORESOURCE_DMA_16BIT           (2<<0)
+
+#define IORESOURCE_DMA_MASTER          (1<<2)
+#define IORESOURCE_DMA_BYTE            (1<<3)
+#define IORESOURCE_DMA_WORD            (1<<4)
+
+#define IORESOURCE_DMA_SPEED_MASK      (3<<6)
+#define IORESOURCE_DMA_COMPATIBLE      (0<<6)
+#define IORESOURCE_DMA_TYPEA           (1<<6)
+#define IORESOURCE_DMA_TYPEB           (2<<6)
+#define IORESOURCE_DMA_TYPEF           (3<<6)
+
+/* ISA PnP memory I/O specific bits (IORESOURCE_BITS) */
+#define IORESOURCE_MEM_WRITEABLE       (1<<0)  /* dup: IORESOURCE_READONLY */
+#define IORESOURCE_MEM_CACHEABLE       (1<<1)  /* dup: IORESOURCE_CACHEABLE */
+#define IORESOURCE_MEM_RANGELENGTH     (1<<2)  /* dup: IORESOURCE_RANGELENGTH */
+#define IORESOURCE_MEM_TYPE_MASK       (3<<3)
+#define IORESOURCE_MEM_8BIT            (0<<3)
+#define IORESOURCE_MEM_16BIT           (1<<3)
+#define IORESOURCE_MEM_8AND16BIT       (2<<3)
+#define IORESOURCE_MEM_SHADOWABLE      (1<<5)  /* dup: IORESOURCE_SHADOWABLE */
+#define IORESOURCE_MEM_EXPANSIONROM    (1<<6)
+
+/* PC/ISA/whatever - the normal PC address spaces: IO and memory */
+extern struct resource ioport_resource;
+extern struct resource iomem_resource;
+
+extern int get_resource_list(struct resource *, char *buf, int size);
+
+extern int check_resource(struct resource *root, unsigned long, unsigned long);
+extern int request_resource(struct resource *root, struct resource *new);
+extern int release_resource(struct resource *new);
+extern int allocate_resource(struct resource *root, struct resource *new,
+                            unsigned long size,
+                            unsigned long min, unsigned long max,
+                            unsigned long align,
+                            void (*alignf)(void *, struct resource *, unsigned long),
+                            void *alignf_data);
+
+/* Convenience shorthand with allocation */
+#define request_region(start,n,name)   __request_region(&ioport_resource, (start), (n), (name))
+#define request_mem_region(start,n,name) __request_region(&iomem_resource, (start), (n), (name))
+
+extern struct resource * __request_region(struct resource *, unsigned long start, unsigned long n, const char *name);
+
+/* Compatibility cruft */
+#define check_region(start,n)  __check_region(&ioport_resource, (start), (n))
+#define release_region(start,n)        __release_region(&ioport_resource, (start), (n))
+#define check_mem_region(start,n)      __check_region(&iomem_resource, (start), (n))
+#define release_mem_region(start,n)    __release_region(&iomem_resource, (start), (n))
+
+extern int __check_region(struct resource *, unsigned long, unsigned long);
+extern void __release_region(struct resource *, unsigned long, unsigned long);
+
+#define get_ioport_list(buf)   get_resource_list(&ioport_resource, buf, PAGE_SIZE)
+#define get_mem_list(buf)      get_resource_list(&iomem_resource, buf, PAGE_SIZE)
+
+#define HAVE_AUTOIRQ
+extern void autoirq_setup(int waittime);
+extern int autoirq_report(int waittime);
+
+#endif /* _LINUX_IOPORT_H */
diff --git a/xen-2.4.16/include/xeno/irq.h b/xen-2.4.16/include/xeno/irq.h
new file mode 100644 (file)
index 0000000..7342491
--- /dev/null
@@ -0,0 +1,63 @@
+#ifndef __irq_h
+#define __irq_h
+
+#include <xeno/config.h>
+#include <xeno/spinlock.h>
+#include <asm/ptrace.h>
+
+/*
+ * IRQ line status.
+ */
+#define IRQ_INPROGRESS 1       /* IRQ handler active - do not enter! */
+#define IRQ_DISABLED   2       /* IRQ disabled - do not enter! */
+#define IRQ_PENDING    4       /* IRQ pending - replay on enable */
+#define IRQ_REPLAY     8       /* IRQ has been replayed but not acked yet */
+#define IRQ_AUTODETECT 16      /* IRQ is being autodetected */
+#define IRQ_WAITING    32      /* IRQ not yet seen - for autodetection */
+#define IRQ_LEVEL      64      /* IRQ level triggered */
+#define IRQ_MASKED     128     /* IRQ masked - shouldn't be seen again */
+#define IRQ_PER_CPU    256     /* IRQ is per CPU */
+
+/*
+ * Interrupt controller descriptor. This is all we need
+ * to describe about the low-level hardware. 
+ */
+struct hw_interrupt_type {
+       const char * typename;
+       unsigned int (*startup)(unsigned int irq);
+       void (*shutdown)(unsigned int irq);
+       void (*enable)(unsigned int irq);
+       void (*disable)(unsigned int irq);
+       void (*ack)(unsigned int irq);
+       void (*end)(unsigned int irq);
+       void (*set_affinity)(unsigned int irq, unsigned long mask);
+};
+
+typedef struct hw_interrupt_type  hw_irq_controller;
+
+#include <asm/irq.h>
+
+/*
+ * This is the "IRQ descriptor", which contains various information
+ * about the irq, including what kind of hardware handling it has,
+ * whether it is disabled etc etc.
+ *
+ * Pad this out to 32 bytes for cache and indexing reasons.
+ */
+typedef struct {
+       unsigned int status;            /* IRQ status */
+       hw_irq_controller *handler;
+       struct irqaction *action;       /* IRQ action list */
+       unsigned int depth;             /* nested irq disables */
+       spinlock_t lock;
+} ____cacheline_aligned irq_desc_t;
+
+extern irq_desc_t irq_desc [NR_IRQS];
+
+extern int handle_IRQ_event(unsigned int, struct pt_regs *, struct irqaction *);
+extern int setup_irq(unsigned int , struct irqaction * );
+
+extern hw_irq_controller no_irq_type;  /* needed in every arch ? */
+extern void no_action(int cpl, void *dev_id, struct pt_regs *regs);
+
+#endif /* __asm_h */
diff --git a/xen-2.4.16/include/xeno/irq_cpustat.h b/xen-2.4.16/include/xeno/irq_cpustat.h
new file mode 100644 (file)
index 0000000..6466554
--- /dev/null
@@ -0,0 +1,34 @@
+#ifndef __irq_cpustat_h
+#define __irq_cpustat_h
+
+/*
+ * Contains default mappings for irq_cpustat_t, used by almost every
+ * architecture.  Some arch (like s390) have per cpu hardware pages and
+ * they define their own mappings for irq_stat.
+ *
+ * Keith Owens <kaos@ocs.com.au> July 2000.
+ */
+
+#include <xeno/config.h>
+
+/*
+ * Simple wrappers reducing source bloat.  Define all irq_stat fields
+ * here, even ones that are arch dependent.  That way we get common
+ * definitions instead of differing sets for each arch.
+ */
+
+extern irq_cpustat_t irq_stat[];                       /* defined in asm/hardirq.h */
+
+#ifdef CONFIG_SMP
+#define __IRQ_STAT(cpu, member)        (irq_stat[cpu].member)
+#else
+#define __IRQ_STAT(cpu, member)        ((void)(cpu), irq_stat[0].member)
+#endif 
+
+  /* arch independent irq_stat fields */
+#define softirq_pending(cpu)   __IRQ_STAT((cpu), __softirq_pending)
+#define local_irq_count(cpu)   __IRQ_STAT((cpu), __local_irq_count)
+#define local_bh_count(cpu)    __IRQ_STAT((cpu), __local_bh_count)
+#define syscall_count(cpu)     __IRQ_STAT((cpu), __syscall_count)
+
+#endif /* __irq_cpustat_h */
diff --git a/xen-2.4.16/include/xeno/kdev_t.h b/xen-2.4.16/include/xeno/kdev_t.h
new file mode 100644 (file)
index 0000000..9d85cba
--- /dev/null
@@ -0,0 +1,123 @@
+#ifndef _LINUX_KDEV_T_H
+#define _LINUX_KDEV_T_H
+#if defined(__KERNEL__) || defined(_LVM_H_INCLUDE)
+/*
+As a preparation for the introduction of larger device numbers,
+we introduce a type kdev_t to hold them. No information about
+this type is known outside of this include file.
+
+Objects of type kdev_t designate a device. Outside of the kernel
+the corresponding things are objects of type dev_t - usually an
+integral type with the device major and minor in the high and low
+bits, respectively. Conversion is done by
+
+extern kdev_t to_kdev_t(int);
+
+It is up to the various file systems to decide how objects of type
+dev_t are stored on disk.
+The only other point of contact between kernel and outside world
+are the system calls stat and mknod, new versions of which will
+eventually have to be used in libc.
+
+[Unfortunately, the floppy control ioctls fail to hide the internal
+kernel structures, and the fd_device field of a struct floppy_drive_struct
+is user-visible. So, it remains a dev_t for the moment, with some ugly
+conversions in floppy.c.]
+
+Inside the kernel, we aim for a kdev_t type that is a pointer
+to a structure with information about the device (like major,
+minor, size, blocksize, sectorsize, name, read-only flag,
+struct file_operations etc.).
+
+However, for the time being we let kdev_t be almost the same as dev_t:
+
+typedef struct { unsigned short major, minor; } kdev_t;
+
+Admissible operations on an object of type kdev_t:
+- passing it along
+- comparing it for equality with another such object
+- storing it in ROOT_DEV, inode->i_dev, inode->i_rdev, sb->s_dev,
+  bh->b_dev, req->rq_dev, de->dc_dev, tty->device
+- using its bit pattern as argument in a hash function
+- finding its major and minor
+- complaining about it
+
+An object of type kdev_t is created only by the function MKDEV(),
+with the single exception of the constant 0 (no device).
+
+Right now the other information mentioned above is usually found
+in static arrays indexed by major or major,minor.
+
+An obstacle to immediately using
+    typedef struct { ... (* lots of information *) } *kdev_t
+is the case of mknod used to create a block device that the
+kernel doesn't know about at present (but first learns about
+when some module is inserted).
+
+aeb - 950811
+*/
+
+/* Since MINOR(dev) is used as index in static arrays,
+   the kernel is not quite ready yet for larger minors.
+   However, everything runs fine with an arbitrary kdev_t type. */
+
+#define MINORBITS      8
+#define MINORMASK      ((1U << MINORBITS) - 1)
+
+typedef unsigned short kdev_t;
+
+#define MAJOR(dev)     ((unsigned int) ((dev) >> MINORBITS))
+#define MINOR(dev)     ((unsigned int) ((dev) & MINORMASK))
+#define HASHDEV(dev)   ((unsigned int) (dev))
+#define NODEV          0
+#define MKDEV(ma,mi)   (((ma) << MINORBITS) | (mi))
+#define B_FREE         0xffff          /* yuk */
+
+extern const char * kdevname(kdev_t);  /* note: returns pointer to static data! */
+
+/* 2.5.x compatibility */
+#define mk_kdev(a,b)   MKDEV(a,b)
+#define major(d)       MAJOR(d)
+#define minor(d)       MINOR(d)
+#define kdev_same(a,b) ((a) == (b))
+#define kdev_none(d)   (!(d))
+#define kdev_val(d)    ((unsigned int)(d))
+#define val_to_kdev(d) ((kdev_t)(d))
+
+/*
+As long as device numbers in the outside world have 16 bits only,
+we use these conversions.
+*/
+
+static inline unsigned int kdev_t_to_nr(kdev_t dev) {
+       return (MAJOR(dev)<<8) | MINOR(dev);
+}
+
+static inline kdev_t to_kdev_t(int dev)
+{
+       int major, minor;
+#if 0
+       major = (dev >> 16);
+       if (!major) {
+               major = (dev >> 8);
+               minor = (dev & 0xff);
+       } else
+               minor = (dev & 0xffff);
+#else
+       major = (dev >> 8);
+       minor = (dev & 0xff);
+#endif
+       return MKDEV(major, minor);
+}
+
+#else /* __KERNEL__ || _LVM_H_INCLUDE */
+
+/*
+Some programs want their definitions of MAJOR and MINOR and MKDEV
+from the kernel sources. These must be the externally visible ones.
+*/
+#define MAJOR(dev)     ((dev)>>8)
+#define MINOR(dev)     ((dev) & 0xff)
+#define MKDEV(ma,mi)   ((ma)<<8 | (mi))
+#endif /* __KERNEL__ || _LVM_H_INCLUDE */
+#endif
diff --git a/xen-2.4.16/include/xeno/lib.h b/xen-2.4.16/include/xeno/lib.h
new file mode 100644 (file)
index 0000000..cd40d11
--- /dev/null
@@ -0,0 +1,51 @@
+#ifndef __LIB_H__
+#define __LIB_H__
+
+#include <stdarg.h>
+#include <xeno/types.h>
+
+#ifndef NDEBUG
+#define ASSERT(_p) if ( !(_p) ) { printk("Assertion '%s' failed, line %d, file %s", #_p , __LINE__, __FILE__); *(int*)0=0; }
+#else
+#define ASSERT(_p) ((void)0)
+#endif
+
+#define reserve_bootmem(_p,_l) \
+printk("Memory Reservation 0x%lx, %lu bytes\n", (_p), (_l))
+
+/* lib.c */
+int memcmp(const void * cs,const void * ct,size_t count);
+void * memcpy(void * dest,const void *src,size_t count);
+int strncmp(const char * cs,const char * ct,size_t count);
+int strcmp(const char * cs,const char * ct);
+char * strcpy(char * dest,const char *src);
+char * strncpy(char * dest,const char *src,size_t count);
+void * memset(void * s,int c,size_t count);
+size_t strnlen(const char * s, size_t count);
+size_t strlen(const char * s);
+char * strchr(const char *,int);
+char * strstr(const char * s1,const char * s2);
+unsigned long str_to_quad(unsigned char *s);
+unsigned char *quad_to_str(unsigned long q, unsigned char *s);
+
+/* kernel.c */
+#define printk printf
+void printf (const char *format, ...);
+void cls(void);
+void panic(const char *format, ...);
+
+/* vsprintf.c */
+extern int sprintf(char * buf, const char * fmt, ...)
+       __attribute__ ((format (printf, 2, 3)));
+extern int vsprintf(char *buf, const char *, va_list);
+extern int snprintf(char * buf, size_t size, const char * fmt, ...)
+       __attribute__ ((format (printf, 3, 4)));
+extern int vsnprintf(char *buf, size_t size, const char *fmt, va_list args);
+
+extern int sscanf(const char *, const char *, ...)
+       __attribute__ ((format (scanf,2,3)));
+extern int vsscanf(const char *, const char *, va_list);
+long simple_strtol(const char *cp,char **endp,unsigned int base);
+long long simple_strtoll(const char *cp,char **endp,unsigned int base);
+
+#endif /* __LIB_H__ */
diff --git a/xen-2.4.16/include/xeno/list.h b/xen-2.4.16/include/xeno/list.h
new file mode 100644 (file)
index 0000000..4124a9a
--- /dev/null
@@ -0,0 +1,160 @@
+#ifndef _LINUX_LIST_H
+#define _LINUX_LIST_H
+
+/*
+ * Simple doubly linked list implementation.
+ *
+ * Some of the internal functions ("__xxx") are useful when
+ * manipulating whole lists rather than single entries, as
+ * sometimes we already know the next/prev entries and we can
+ * generate better code by using them directly rather than
+ * using the generic single-entry routines.
+ */
+
+struct list_head {
+       struct list_head *next, *prev;
+};
+
+#define LIST_HEAD_INIT(name) { &(name), &(name) }
+
+#define LIST_HEAD(name) \
+       struct list_head name = LIST_HEAD_INIT(name)
+
+#define INIT_LIST_HEAD(ptr) do { \
+       (ptr)->next = (ptr); (ptr)->prev = (ptr); \
+} while (0)
+
+/*
+ * Insert a new entry between two known consecutive entries. 
+ *
+ * This is only for internal list manipulation where we know
+ * the prev/next entries already!
+ */
+static __inline__ void __list_add(struct list_head * new,
+       struct list_head * prev,
+       struct list_head * next)
+{
+       next->prev = new;
+       new->next = next;
+       new->prev = prev;
+       prev->next = new;
+}
+
+/**
+ * list_add - add a new entry
+ * @new: new entry to be added
+ * @head: list head to add it after
+ *
+ * Insert a new entry after the specified head.
+ * This is good for implementing stacks.
+ */
+static __inline__ void list_add(struct list_head *new, struct list_head *head)
+{
+       __list_add(new, head, head->next);
+}
+
+/**
+ * list_add_tail - add a new entry
+ * @new: new entry to be added
+ * @head: list head to add it before
+ *
+ * Insert a new entry before the specified head.
+ * This is useful for implementing queues.
+ */
+static __inline__ void list_add_tail(struct list_head *new, struct list_head *head)
+{
+       __list_add(new, head->prev, head);
+}
+
+/*
+ * Delete a list entry by making the prev/next entries
+ * point to each other.
+ *
+ * This is only for internal list manipulation where we know
+ * the prev/next entries already!
+ */
+static __inline__ void __list_del(struct list_head * prev,
+                                 struct list_head * next)
+{
+       next->prev = prev;
+       prev->next = next;
+}
+
+/**
+ * list_del - deletes entry from list.
+ * @entry: the element to delete from the list.
+ * Note: list_empty on entry does not return true after this, the entry is in an undefined state.
+ */
+static __inline__ void list_del(struct list_head *entry)
+{
+       __list_del(entry->prev, entry->next);
+}
+
+/**
+ * list_del_init - deletes entry from list and reinitialize it.
+ * @entry: the element to delete from the list.
+ */
+static __inline__ void list_del_init(struct list_head *entry)
+{
+       __list_del(entry->prev, entry->next);
+       INIT_LIST_HEAD(entry); 
+}
+
+/**
+ * list_empty - tests whether a list is empty
+ * @head: the list to test.
+ */
+static __inline__ int list_empty(struct list_head *head)
+{
+       return head->next == head;
+}
+
+/**
+ * list_splice - join two lists
+ * @list: the new list to add.
+ * @head: the place to add it in the first list.
+ */
+static __inline__ void list_splice(struct list_head *list, struct list_head *head)
+{
+       struct list_head *first = list->next;
+
+       if (first != list) {
+               struct list_head *last = list->prev;
+               struct list_head *at = head->next;
+
+               first->prev = head;
+               head->next = first;
+
+               last->next = at;
+               at->prev = last;
+       }
+}
+
+/**
+ * list_entry - get the struct for this entry
+ * @ptr:       the &struct list_head pointer.
+ * @type:      the type of the struct this is embedded in.
+ * @member:    the name of the list_struct within the struct.
+ */
+#define list_entry(ptr, type, member) \
+       ((type *)((char *)(ptr)-(unsigned long)(&((type *)0)->member)))
+
+/**
+ * list_for_each       -       iterate over a list
+ * @pos:       the &struct list_head to use as a loop counter.
+ * @head:      the head for your list.
+ */
+#define list_for_each(pos, head) \
+       for (pos = (head)->next; pos != (head); pos = pos->next)
+               
+/**
+ * list_for_each_safe  -       iterate over a list safe against removal of list entry
+ * @pos:       the &struct list_head to use as a loop counter.
+ * @n:         another &struct list_head to use as temporary storage
+ * @head:      the head for your list.
+ */
+#define list_for_each_safe(pos, n, head) \
+       for (pos = (head)->next, n = pos->next; pos != (head); \
+               pos = n, n = pos->next)
+
+#endif
diff --git a/xen-2.4.16/include/xeno/major.h b/xen-2.4.16/include/xeno/major.h
new file mode 100644 (file)
index 0000000..b30f88b
--- /dev/null
@@ -0,0 +1,199 @@
+#ifndef _LINUX_MAJOR_H
+#define _LINUX_MAJOR_H
+
+#include <xeno/kdev_t.h>
+
+/*
+ * This file has definitions for major device numbers.
+ * For the device number assignments, see Documentation/devices.txt.
+ */
+
+/* limits */
+
+/*
+ * Important: Don't change this to 256.  Major number 255 is and must be
+ * reserved for future expansion into a larger dev_t space.
+ */
+#define MAX_CHRDEV     255
+#define MAX_BLKDEV     255
+
+#define UNNAMED_MAJOR  0
+#define MEM_MAJOR      1
+#define RAMDISK_MAJOR  1
+#define FLOPPY_MAJOR   2
+#define PTY_MASTER_MAJOR 2
+#define IDE0_MAJOR     3
+#define PTY_SLAVE_MAJOR 3
+#define HD_MAJOR       IDE0_MAJOR
+#define TTY_MAJOR      4
+#define TTYAUX_MAJOR   5
+#define LP_MAJOR       6
+#define VCS_MAJOR      7
+#define LOOP_MAJOR     7
+#define SCSI_DISK0_MAJOR 8
+#define SCSI_TAPE_MAJOR        9
+#define MD_MAJOR        9
+#define MISC_MAJOR     10
+#define SCSI_CDROM_MAJOR 11
+#define QIC02_TAPE_MAJOR 12
+#define XT_DISK_MAJOR  13
+#define SOUND_MAJOR    14
+#define CDU31A_CDROM_MAJOR 15
+#define JOYSTICK_MAJOR 15
+#define GOLDSTAR_CDROM_MAJOR 16
+#define OPTICS_CDROM_MAJOR 17
+#define SANYO_CDROM_MAJOR 18
+#define CYCLADES_MAJOR  19
+#define CYCLADESAUX_MAJOR 20
+#define MITSUMI_X_CDROM_MAJOR 20
+#define MFM_ACORN_MAJOR 21     /* ARM Linux /dev/mfm */
+#define SCSI_GENERIC_MAJOR 21
+#define Z8530_MAJOR 34
+#define DIGI_MAJOR 23
+#define IDE1_MAJOR     22
+#define DIGICU_MAJOR 22
+#define MITSUMI_CDROM_MAJOR 23
+#define CDU535_CDROM_MAJOR 24
+#define STL_SERIALMAJOR 24
+#define MATSUSHITA_CDROM_MAJOR 25
+#define STL_CALLOUTMAJOR 25
+#define MATSUSHITA_CDROM2_MAJOR 26
+#define QIC117_TAPE_MAJOR 27
+#define MATSUSHITA_CDROM3_MAJOR 27
+#define MATSUSHITA_CDROM4_MAJOR 28
+#define STL_SIOMEMMAJOR 28
+#define ACSI_MAJOR     28
+#define AZTECH_CDROM_MAJOR 29
+#define GRAPHDEV_MAJOR 29      /* SparcLinux & Linux/68k /dev/fb */
+#define SHMIQ_MAJOR    85      /* Linux/mips, SGI /dev/shmiq */
+#define CM206_CDROM_MAJOR 32
+#define IDE2_MAJOR     33
+#define IDE3_MAJOR     34
+#define XPRAM_MAJOR     35      /* expanded storage on S/390 = "slow ram" */
+                                /* proposed by Peter                      */
+#define NETLINK_MAJOR  36
+#define PS2ESDI_MAJOR  36
+#define IDETAPE_MAJOR  37
+#define Z2RAM_MAJOR    37
+#define APBLOCK_MAJOR   38   /* AP1000 Block device */
+#define DDV_MAJOR       39   /* AP1000 DDV block device */
+#define NBD_MAJOR      43   /* Network block device    */
+#define RISCOM8_NORMAL_MAJOR 48
+#define DAC960_MAJOR   48      /* 48..55 */
+#define RISCOM8_CALLOUT_MAJOR 49
+#define MKISS_MAJOR    55
+#define DSP56K_MAJOR    55   /* DSP56001 processor device */
+
+#define IDE4_MAJOR     56
+#define IDE5_MAJOR     57
+
+#define LVM_BLK_MAJOR  58      /* Logical Volume Manager */
+
+#define SCSI_DISK1_MAJOR       65
+#define SCSI_DISK2_MAJOR       66
+#define SCSI_DISK3_MAJOR       67
+#define SCSI_DISK4_MAJOR       68
+#define SCSI_DISK5_MAJOR       69
+#define SCSI_DISK6_MAJOR       70
+#define SCSI_DISK7_MAJOR       71
+
+
+#define COMPAQ_SMART2_MAJOR    72
+#define COMPAQ_SMART2_MAJOR1   73
+#define COMPAQ_SMART2_MAJOR2   74
+#define COMPAQ_SMART2_MAJOR3   75
+#define COMPAQ_SMART2_MAJOR4   76
+#define COMPAQ_SMART2_MAJOR5   77
+#define COMPAQ_SMART2_MAJOR6   78
+#define COMPAQ_SMART2_MAJOR7   79
+
+#define SPECIALIX_NORMAL_MAJOR 75
+#define SPECIALIX_CALLOUT_MAJOR 76
+
+#define COMPAQ_CISS_MAJOR      104
+#define COMPAQ_CISS_MAJOR1     105
+#define COMPAQ_CISS_MAJOR2      106
+#define COMPAQ_CISS_MAJOR3      107
+#define COMPAQ_CISS_MAJOR4      108
+#define COMPAQ_CISS_MAJOR5      109
+#define COMPAQ_CISS_MAJOR6      110
+#define COMPAQ_CISS_MAJOR7      111
+
+#define ATARAID_MAJOR          114
+
+#define DASD_MAJOR      94     /* Official assignations from Peter */
+
+#define MDISK_MAJOR     95     /* Official assignations from Peter */
+
+#define I2O_MAJOR              80      /* 80->87 */
+
+#define IDE6_MAJOR     88
+#define IDE7_MAJOR     89
+#define IDE8_MAJOR     90
+#define IDE9_MAJOR     91
+
+#define UBD_MAJOR      98
+
+#define AURORA_MAJOR 79
+
+#define JSFD_MAJOR     99
+
+#define PHONE_MAJOR    100
+
+#define LVM_CHAR_MAJOR 109     /* Logical Volume Manager */
+
+#define        UMEM_MAJOR      116     /* http://www.umem.com/ Battery Backed RAM */
+
+#define RTF_MAJOR      150
+#define RAW_MAJOR      162
+
+#define USB_ACM_MAJOR          166
+#define USB_ACM_AUX_MAJOR      167
+#define USB_CHAR_MAJOR         180
+
+#define UNIX98_PTY_MASTER_MAJOR        128
+#define UNIX98_PTY_MAJOR_COUNT 8
+#define UNIX98_PTY_SLAVE_MAJOR (UNIX98_PTY_MASTER_MAJOR+UNIX98_PTY_MAJOR_COUNT)
+
+#define VXVM_MAJOR             199     /* VERITAS volume i/o driver    */
+#define VXSPEC_MAJOR           200     /* VERITAS volume config driver */
+#define VXDMP_MAJOR            201     /* VERITAS volume multipath driver */
+
+#define MSR_MAJOR              202
+#define CPUID_MAJOR            203
+
+#define OSST_MAJOR     206     /* OnStream-SCx0 SCSI tape */
+
+#define IBM_TTY3270_MAJOR       227    /* Official allocations now */
+#define IBM_FS3270_MAJOR        228
+
+/*
+ * Tests for SCSI devices.
+ */
+
+#define SCSI_DISK_MAJOR(M) ((M) == SCSI_DISK0_MAJOR || \
+  ((M) >= SCSI_DISK1_MAJOR && (M) <= SCSI_DISK7_MAJOR))
+  
+#define SCSI_BLK_MAJOR(M) \
+  (SCSI_DISK_MAJOR(M)  \
+   || (M) == SCSI_CDROM_MAJOR)
+
+static __inline__ int scsi_blk_major(int m) {
+       return SCSI_BLK_MAJOR(m);
+}
+
+/*
+ * Tests for IDE devices
+ */
+#define IDE_DISK_MAJOR(M)      ((M) == IDE0_MAJOR || (M) == IDE1_MAJOR || \
+                               (M) == IDE2_MAJOR || (M) == IDE3_MAJOR || \
+                               (M) == IDE4_MAJOR || (M) == IDE5_MAJOR || \
+                               (M) == IDE6_MAJOR || (M) == IDE7_MAJOR || \
+                               (M) == IDE8_MAJOR || (M) == IDE9_MAJOR)
+
+static __inline__ int ide_blk_major(int m)
+{
+       return IDE_DISK_MAJOR(m);
+}
+
+#endif
diff --git a/xen-2.4.16/include/xeno/mii.h b/xen-2.4.16/include/xeno/mii.h
new file mode 100644 (file)
index 0000000..9439135
--- /dev/null
@@ -0,0 +1,165 @@
+/*
+ * linux/mii.h: definitions for MII-compatible transceivers
+ * Originally drivers/net/sunhme.h.
+ *
+ * Copyright (C) 1996, 1999, 2001 David S. Miller (davem@redhat.com)
+ */
+
+#ifndef __LINUX_MII_H__
+#define __LINUX_MII_H__
+
+#include <linux/types.h>
+
+/* Generic MII registers. */
+
+#define MII_BMCR            0x00        /* Basic mode control register */
+#define MII_BMSR            0x01        /* Basic mode status register  */
+#define MII_PHYSID1         0x02        /* PHYS ID 1                   */
+#define MII_PHYSID2         0x03        /* PHYS ID 2                   */
+#define MII_ADVERTISE       0x04        /* Advertisement control reg   */
+#define MII_LPA             0x05        /* Link partner ability reg    */
+#define MII_EXPANSION       0x06        /* Expansion register          */
+#define MII_DCOUNTER        0x12        /* Disconnect counter          */
+#define MII_FCSCOUNTER      0x13        /* False carrier counter       */
+#define MII_NWAYTEST        0x14        /* N-way auto-neg test reg     */
+#define MII_RERRCOUNTER     0x15        /* Receive error counter       */
+#define MII_SREVISION       0x16        /* Silicon revision            */
+#define MII_RESV1           0x17        /* Reserved...                 */
+#define MII_LBRERROR        0x18        /* Lpback, rx, bypass error    */
+#define MII_PHYADDR         0x19        /* PHY address                 */
+#define MII_RESV2           0x1a        /* Reserved...                 */
+#define MII_TPISTATUS       0x1b        /* TPI status for 10mbps       */
+#define MII_NCONFIG         0x1c        /* Network interface config    */
+
+/* Basic mode control register. */
+#define BMCR_RESV               0x007f  /* Unused...                   */
+#define BMCR_CTST               0x0080  /* Collision test              */
+#define BMCR_FULLDPLX           0x0100  /* Full duplex                 */
+#define BMCR_ANRESTART          0x0200  /* Auto negotiation restart    */
+#define BMCR_ISOLATE            0x0400  /* Disconnect DP83840 from MII */
+#define BMCR_PDOWN              0x0800  /* Powerdown the DP83840       */
+#define BMCR_ANENABLE           0x1000  /* Enable auto negotiation     */
+#define BMCR_SPEED100           0x2000  /* Select 100Mbps              */
+#define BMCR_LOOPBACK           0x4000  /* TXD loopback bits           */
+#define BMCR_RESET              0x8000  /* Reset the DP83840           */
+
+/* Basic mode status register. */
+#define BMSR_ERCAP              0x0001  /* Ext-reg capability          */
+#define BMSR_JCD                0x0002  /* Jabber detected             */
+#define BMSR_LSTATUS            0x0004  /* Link status                 */
+#define BMSR_ANEGCAPABLE        0x0008  /* Able to do auto-negotiation */
+#define BMSR_RFAULT             0x0010  /* Remote fault detected       */
+#define BMSR_ANEGCOMPLETE       0x0020  /* Auto-negotiation complete   */
+#define BMSR_RESV               0x07c0  /* Unused...                   */
+#define BMSR_10HALF             0x0800  /* Can do 10mbps, half-duplex  */
+#define BMSR_10FULL             0x1000  /* Can do 10mbps, full-duplex  */
+#define BMSR_100HALF            0x2000  /* Can do 100mbps, half-duplex */
+#define BMSR_100FULL            0x4000  /* Can do 100mbps, full-duplex */
+#define BMSR_100BASE4           0x8000  /* Can do 100mbps, 4k packets  */
+
+/* Advertisement control register. */
+#define ADVERTISE_SLCT          0x001f  /* Selector bits               */
+#define ADVERTISE_CSMA          0x0001  /* Only selector supported     */
+#define ADVERTISE_10HALF        0x0020  /* Try for 10mbps half-duplex  */
+#define ADVERTISE_10FULL        0x0040  /* Try for 10mbps full-duplex  */
+#define ADVERTISE_100HALF       0x0080  /* Try for 100mbps half-duplex */
+#define ADVERTISE_100FULL       0x0100  /* Try for 100mbps full-duplex */
+#define ADVERTISE_100BASE4      0x0200  /* Try for 100mbps 4k packets  */
+#define ADVERTISE_RESV          0x1c00  /* Unused...                   */
+#define ADVERTISE_RFAULT        0x2000  /* Say we can detect faults    */
+#define ADVERTISE_LPACK         0x4000  /* Ack link partners response  */
+#define ADVERTISE_NPAGE         0x8000  /* Next page bit               */
+
+#define ADVERTISE_ALL (ADVERTISE_10HALF | ADVERTISE_10FULL | \
+                       ADVERTISE_100HALF | ADVERTISE_100FULL)
+
+/* Link partner ability register. */
+#define LPA_SLCT                0x001f  /* Same as advertise selector  */
+#define LPA_10HALF              0x0020  /* Can do 10mbps half-duplex   */
+#define LPA_10FULL              0x0040  /* Can do 10mbps full-duplex   */
+#define LPA_100HALF             0x0080  /* Can do 100mbps half-duplex  */
+#define LPA_100FULL             0x0100  /* Can do 100mbps full-duplex  */
+#define LPA_100BASE4            0x0200  /* Can do 100mbps 4k packets   */
+#define LPA_RESV                0x1c00  /* Unused...                   */
+#define LPA_RFAULT              0x2000  /* Link partner faulted        */
+#define LPA_LPACK               0x4000  /* Link partner acked us       */
+#define LPA_NPAGE               0x8000  /* Next page bit               */
+
+#define LPA_DUPLEX             (LPA_10FULL | LPA_100FULL)
+#define LPA_100                        (LPA_100FULL | LPA_100HALF | LPA_100BASE4)
+
+/* Expansion register for auto-negotiation. */
+#define EXPANSION_NWAY          0x0001  /* Can do N-way auto-nego      */
+#define EXPANSION_LCWP          0x0002  /* Got new RX page code word   */
+#define EXPANSION_ENABLENPAGE   0x0004  /* This enables npage words    */
+#define EXPANSION_NPCAPABLE     0x0008  /* Link partner supports npage */
+#define EXPANSION_MFAULTS       0x0010  /* Multiple faults detected    */
+#define EXPANSION_RESV          0xffe0  /* Unused...                   */
+
+/* N-way test register. */
+#define NWAYTEST_RESV1          0x00ff  /* Unused...                   */
+#define NWAYTEST_LOOPBACK       0x0100  /* Enable loopback for N-way   */
+#define NWAYTEST_RESV2          0xfe00  /* Unused...                   */
+
+/* This structure is used in all SIOCxMIIxxx ioctl calls */
+struct mii_ioctl_data {
+       u16             phy_id;
+       u16             reg_num;
+       u16             val_in;
+       u16             val_out;
+};
+
+
+/**
+ * mii_nway_result
+ * @negotiated: value of MII ANAR and'd with ANLPAR
+ *
+ * Given a set of MII abilities, check each bit and returns the
+ * currently supported media, in the priority order defined by
+ * IEEE 802.3u.  We use LPA_xxx constants but note this is not the
+ * value of LPA solely, as described above.
+ *
+ * The one exception to IEEE 802.3u is that 100baseT4 is placed
+ * between 100T-full and 100T-half.  If your phy does not support
+ * 100T4 this is fine.  If your phy places 100T4 elsewhere in the
+ * priority order, you will need to roll your own function.
+ */
+static inline unsigned int mii_nway_result (unsigned int negotiated)
+{
+       unsigned int ret;
+
+       if (negotiated & LPA_100FULL)
+               ret = LPA_100FULL;
+       else if (negotiated & LPA_100BASE4)
+               ret = LPA_100BASE4;
+       else if (negotiated & LPA_100HALF)
+               ret = LPA_100HALF;
+       else if (negotiated & LPA_10FULL)
+               ret = LPA_10FULL;
+       else
+               ret = LPA_10HALF;
+
+       return ret;
+}
+
+/**
+ * mii_duplex
+ * @duplex_lock: Non-zero if duplex is locked at full
+ * @negotiated: value of MII ANAR and'd with ANLPAR
+ *
+ * A small helper function for a common case.  Returns one
+ * if the media is operating or locked at full duplex, and
+ * returns zero otherwise.
+ */
+static inline unsigned int mii_duplex (unsigned int duplex_lock,
+                                      unsigned int negotiated)
+{
+       if (duplex_lock)
+               return 1;
+       if (mii_nway_result(negotiated) & LPA_DUPLEX)
+               return 1;
+       return 0;
+}
+
+
+#endif /* __LINUX_MII_H__ */
diff --git a/xen-2.4.16/include/xeno/mm.h b/xen-2.4.16/include/xeno/mm.h
new file mode 100644 (file)
index 0000000..8a547ea
--- /dev/null
@@ -0,0 +1,109 @@
+
+#ifndef __XENO_MM_H__
+#define __XENO_MM_H__
+
+#include <xeno/config.h>
+#include <asm/atomic.h>
+#include <xeno/list.h>
+#include <hypervisor-ifs/hypervisor-if.h>
+
+/* XXX KAF: These may die eventually, but so many refs in slab.c :((( */
+
+/* Zone modifiers in GFP_ZONEMASK (see linux/mmzone.h - low four bits) */
+#define __GFP_DMA       0x01
+
+/* Action modifiers - doesn't change the zoning */
+#define __GFP_WAIT      0x10    /* Can wait and reschedule? */
+#define __GFP_HIGH      0x20    /* Should access emergency pools? */
+#define __GFP_IO        0x40    /* Can start low memory physical IO? */
+#define __GFP_HIGHIO    0x80    /* Can start high mem physical IO? */
+#define __GFP_FS        0x100   /* Can call down to low-level FS? */
+
+#define GFP_ATOMIC      (__GFP_HIGH)
+#define GFP_KERNEL      (__GFP_HIGH | __GFP_WAIT | __GFP_IO | __GFP_HIGHIO | __GFP_FS)
+
+/* Flag - indicates that the buffer will be suitable for DMA.  Ignored on some
+   platforms, used as appropriate on others */
+
+#define GFP_DMA         __GFP_DMA
+
+
+/******************************************************************************
+ * The following is for page_alloc.c.
+ */
+
+void init_page_allocator(unsigned long min, unsigned long max);
+unsigned long __get_free_pages(int mask, int order);
+void __free_pages(unsigned long p, int order);
+#define get_free_page(_m) (__get_free_pages((_m),0))
+#define __get_free_page(_m) (__get_free_pages((_m),0))
+#define free_pages(_p,_o) (__free_pages(_p,_o))
+#define free_page(_p) (__free_pages(_p,0))
+
+
+/******************************************************************************
+ * The following is the array of page info. One entry per page owned
+ * by the hypervisor, indexed from `mem_map', just like Linux.
+ *
+ * 12.11.02. We no longer use struct page or mem_map, these are replaced
+ * with struct pfn_info and frame_table respectively. Boris Dragovic
+ */
+
+/*
+ * This is still fatter than I'd like. Do we need the count?
+ * Do we need the flags? The list at least seems req'd by slab.c.
+ */
+typedef struct pfn_info {
+    struct list_head list;      /* ->mapping has some page lists. */
+    unsigned long next;         /* used for threading pages belonging */
+    unsigned long prev;         /* to same domain */
+    unsigned long flags;        /* atomic flags. */
+    unsigned long tot_count;    /* Total domain usage count. */
+    unsigned long type_count;   /* pagetable/dir, or domain-writeable refs. */
+} frame_table_t;
+
+#define get_page_tot(p)                 ((p)->tot_count++)
+#define put_page_tot(p)                 (--(p)->tot_count)
+#define page_tot_count(p)       ((p)->tot_count)
+#define set_page_tot_count(p,v)  ((p)->tot_count = v)
+
+#define get_page_type(p)        ((p)->type_count++)
+#define put_page_type(p)        (--(p)->type_count)
+#define page_type_count(p)      ((p)->type_count)
+#define set_page_type_count(p,v) ((p)->type_count = v)
+
+#define PG_domain_mask 0x00ffffff /* owning domain (24 bits) */
+/* hypervisor flags (domain == 0) */
+#define PG_slab               24
+/* domain flags (domain != 0) */
+/*
+ * NB. The following three flags are MUTUALLY EXCLUSIVE!
+ * At most one can be true at any point, and 'type_count' counts how many
+ * references exist of teh current type. A change in type can only occur
+ * when type_count == 0.
+ */
+#define PG_type_mask        (7<<25) /* bits 25-27 */
+#define PGT_none            (0<<25) /* no special uses of this page */
+#define PGT_l1_page_table   (1<<25) /* using this page as an L1 page table? */
+#define PGT_l2_page_table   (2<<25) /* using this page as an L2 page table? */
+#define PGT_l3_page_table   (3<<25) /* using this page as an L3 page table? */
+#define PGT_l4_page_table   (4<<25) /* using this page as an L4 page table? */
+#define PGT_writeable_page  (7<<25) /* has writable mappings of this page? */
+
+#define PageSlab(page)         test_bit(PG_slab, &(page)->flags)
+#define PageSetSlab(page)      set_bit(PG_slab, &(page)->flags)
+#define PageClearSlab(page)    clear_bit(PG_slab, &(page)->flags)
+
+/* The array of struct pfn_info,  
+ * free pfn list and number of free pfns in the free list
+ */
+extern frame_table_t * frame_table;
+extern unsigned long frame_table_size;
+extern struct list_head free_list;
+extern unsigned int free_pfns;
+unsigned long init_frametable(unsigned long nr_pages);
+
+/* Part of the domain API. */
+int do_process_page_updates(page_update_request_t *updates, int count);
+
+#endif /* __XENO_MM_H__ */
diff --git a/xen-2.4.16/include/xeno/module.h b/xen-2.4.16/include/xeno/module.h
new file mode 100644 (file)
index 0000000..5e8ce69
--- /dev/null
@@ -0,0 +1,417 @@
+/*
+ * Dynamic loading of modules into the kernel.
+ *
+ * Rewritten by Richard Henderson <rth@tamu.edu> Dec 1996
+ */
+
+#ifndef _LINUX_MODULE_H
+#define _LINUX_MODULE_H
+
+#include <linux/config.h>
+#include <linux/spinlock.h>
+#include <linux/list.h>
+
+#ifdef __GENKSYMS__
+#  define _set_ver(sym) sym
+#  undef  MODVERSIONS
+#  define MODVERSIONS
+#else /* ! __GENKSYMS__ */
+# if !defined(MODVERSIONS) && defined(EXPORT_SYMTAB)
+#   define _set_ver(sym) sym
+#   include <linux/modversions.h>
+# endif
+#endif /* __GENKSYMS__ */
+
+#include <asm/atomic.h>
+
+/* Don't need to bring in all of uaccess.h just for this decl.  */
+struct exception_table_entry;
+
+/* Used by get_kernel_syms, which is obsolete.  */
+struct kernel_sym
+{
+       unsigned long value;
+       char name[60];          /* should have been 64-sizeof(long); oh well */
+};
+
+struct module_symbol
+{
+       unsigned long value;
+       const char *name;
+};
+
+struct module_ref
+{
+       struct module *dep;     /* "parent" pointer */
+       struct module *ref;     /* "child" pointer */
+       struct module_ref *next_ref;
+};
+
+/* TBD */
+struct module_persist;
+
+struct module
+{
+       unsigned long size_of_struct;   /* == sizeof(module) */
+       struct module *next;
+       const char *name;
+       unsigned long size;
+
+       union
+       {
+               atomic_t usecount;
+               long pad;
+       } uc;                           /* Needs to keep its size - so says rth */
+
+       unsigned long flags;            /* AUTOCLEAN et al */
+
+       unsigned nsyms;
+       unsigned ndeps;
+
+       struct module_symbol *syms;
+       struct module_ref *deps;
+       struct module_ref *refs;
+       int (*init)(void);
+       void (*cleanup)(void);
+       const struct exception_table_entry *ex_table_start;
+       const struct exception_table_entry *ex_table_end;
+#ifdef __alpha__
+       unsigned long gp;
+#endif
+       /* Members past this point are extensions to the basic
+          module support and are optional.  Use mod_member_present()
+          to examine them.  */
+       const struct module_persist *persist_start;
+       const struct module_persist *persist_end;
+       int (*can_unload)(void);
+       int runsize;                    /* In modutils, not currently used */
+       const char *kallsyms_start;     /* All symbols for kernel debugging */
+       const char *kallsyms_end;
+       const char *archdata_start;     /* arch specific data for module */
+       const char *archdata_end;
+       const char *kernel_data;        /* Reserved for kernel internal use */
+};
+
+struct module_info
+{
+       unsigned long addr;
+       unsigned long size;
+       unsigned long flags;
+       long usecount;
+};
+
+/* Bits of module.flags.  */
+
+#define MOD_UNINITIALIZED      0
+#define MOD_RUNNING            1
+#define MOD_DELETED            2
+#define MOD_AUTOCLEAN          4
+#define MOD_VISITED            8
+#define MOD_USED_ONCE          16
+#define MOD_JUST_FREED         32
+#define MOD_INITIALIZING       64
+
+/* Values for query_module's which.  */
+
+#define QM_MODULES     1
+#define QM_DEPS                2
+#define QM_REFS                3
+#define QM_SYMBOLS     4
+#define QM_INFO                5
+
+/* Can the module be queried? */
+#define MOD_CAN_QUERY(mod) (((mod)->flags & (MOD_RUNNING | MOD_INITIALIZING)) && !((mod)->flags & MOD_DELETED))
+
+/* When struct module is extended, we must test whether the new member
+   is present in the header received from insmod before we can use it.  
+   This function returns true if the member is present.  */
+
+#define mod_member_present(mod,member)                                         \
+       ((unsigned long)(&((struct module *)0L)->member + 1)            \
+        <= (mod)->size_of_struct)
+
+/*
+ * Ditto for archdata.  Assumes mod->archdata_start and mod->archdata_end
+ * are validated elsewhere.
+ */
+#define mod_archdata_member_present(mod, type, member)                 \
+       (((unsigned long)(&((type *)0L)->member) +                      \
+         sizeof(((type *)0L)->member)) <=                              \
+        ((mod)->archdata_end - (mod)->archdata_start))
+        
+
+/* Check if an address p with number of entries n is within the body of module m */
+#define mod_bound(p, n, m) ((unsigned long)(p) >= ((unsigned long)(m) + ((m)->size_of_struct)) && \
+                (unsigned long)((p)+(n)) <= (unsigned long)(m) + (m)->size)
+
+/* Backwards compatibility definition.  */
+
+#define GET_USE_COUNT(module)  (atomic_read(&(module)->uc.usecount))
+
+/* Poke the use count of a module.  */
+
+#define __MOD_INC_USE_COUNT(mod)                                       \
+       (atomic_inc(&(mod)->uc.usecount), (mod)->flags |= MOD_VISITED|MOD_USED_ONCE)
+#define __MOD_DEC_USE_COUNT(mod)                                       \
+       (atomic_dec(&(mod)->uc.usecount), (mod)->flags |= MOD_VISITED)
+#define __MOD_IN_USE(mod)                                              \
+       (mod_member_present((mod), can_unload) && (mod)->can_unload     \
+        ? (mod)->can_unload() : atomic_read(&(mod)->uc.usecount))
+
+/* Indirect stringification.  */
+
+#define __MODULE_STRING_1(x)   #x
+#define __MODULE_STRING(x)     __MODULE_STRING_1(x)
+
+/* Generic inter module communication.
+ *
+ * NOTE: This interface is intended for small amounts of data that are
+ *       passed between two objects and either or both of the objects
+ *       might be compiled as modules.  Do not over use this interface.
+ *
+ *       If more than two objects need to communicate then you probably
+ *       need a specific interface instead of abusing this generic
+ *       interface.  If both objects are *always* built into the kernel
+ *       then a global extern variable is good enough, you do not need
+ *       this interface.
+ *
+ * Keith Owens <kaos@ocs.com.au> 28 Oct 2000.
+ */
+
+#ifdef __KERNEL__
+#define HAVE_INTER_MODULE
+extern void inter_module_register(const char *, struct module *, const void *);
+extern void inter_module_unregister(const char *);
+extern const void *inter_module_get(const char *);
+extern const void *inter_module_get_request(const char *, const char *);
+extern void inter_module_put(const char *);
+
+struct inter_module_entry {
+       struct list_head list;
+       const char *im_name;
+       struct module *owner;
+       const void *userdata;
+};
+
+#if 0
+extern int try_inc_mod_count(struct module *mod);
+#else
+static inline int try_inc_mod_count(struct module * mod)
+{
+    if ( mod ) __MOD_INC_USE_COUNT(mod);
+    return 1;
+}
+#endif
+#endif /* __KERNEL__ */
+
+#if defined(MODULE) && !defined(__GENKSYMS__)
+
+/* Embedded module documentation macros.  */
+
+/* For documentation purposes only.  */
+
+#define MODULE_AUTHOR(name)                                               \
+const char __module_author[] __attribute__((section(".modinfo"))) =       \
+"author=" name
+
+#define MODULE_DESCRIPTION(desc)                                          \
+const char __module_description[] __attribute__((section(".modinfo"))) =   \
+"description=" desc
+
+/* Could potentially be used by kmod...  */
+
+#define MODULE_SUPPORTED_DEVICE(dev)                                      \
+const char __module_device[] __attribute__((section(".modinfo"))) =       \
+"device=" dev
+
+/* Used to verify parameters given to the module.  The TYPE arg should
+   be a string in the following format:
+       [min[-max]]{b,h,i,l,s}
+   The MIN and MAX specifiers delimit the length of the array.  If MAX
+   is omitted, it defaults to MIN; if both are omitted, the default is 1.
+   The final character is a type specifier:
+       b       byte
+       h       short
+       i       int
+       l       long
+       s       string
+*/
+
+#define MODULE_PARM(var,type)                  \
+const char __module_parm_##var[]               \
+__attribute__((section(".modinfo"))) =         \
+"parm_" __MODULE_STRING(var) "=" type
+
+#define MODULE_PARM_DESC(var,desc)             \
+const char __module_parm_desc_##var[]          \
+__attribute__((section(".modinfo"))) =         \
+"parm_desc_" __MODULE_STRING(var) "=" desc
+
+/*
+ * MODULE_DEVICE_TABLE exports information about devices
+ * currently supported by this module.  A device type, such as PCI,
+ * is a C-like identifier passed as the first arg to this macro.
+ * The second macro arg is the variable containing the device
+ * information being made public.
+ *
+ * The following is a list of known device types (arg 1),
+ * and the C types which are to be passed as arg 2.
+ * pci - struct pci_device_id - List of PCI ids supported by this module
+ * isapnp - struct isapnp_device_id - List of ISA PnP ids supported by this module
+ * usb - struct usb_device_id - List of USB ids supported by this module
+ */
+#define MODULE_GENERIC_TABLE(gtype,name)       \
+static const unsigned long __module_##gtype##_size \
+  __attribute__ ((unused)) = sizeof(struct gtype##_id); \
+static const struct gtype##_id * __module_##gtype##_table \
+  __attribute__ ((unused)) = name
+
+/*
+ * The following license idents are currently accepted as indicating free
+ * software modules
+ *
+ *     "GPL"                           [GNU Public License v2 or later]
+ *     "GPL and additional rights"     [GNU Public License v2 rights and more]
+ *     "Dual BSD/GPL"                  [GNU Public License v2 or BSD license choice]
+ *     "Dual MPL/GPL"                  [GNU Public License v2 or Mozilla license choice]
+ *
+ * The following other idents are available
+ *
+ *     "Proprietary"                   [Non free products]
+ *
+ * There are dual licensed components, but when running with Linux it is the
+ * GPL that is relevant so this is a non issue. Similarly LGPL linked with GPL
+ * is a GPL combined work.
+ *
+ * This exists for several reasons
+ * 1.  So modinfo can show license info for users wanting to vet their setup 
+ *     is free
+ * 2.  So the community can ignore bug reports including proprietary modules
+ * 3.  So vendors can do likewise based on their own policies
+ */
+#define MODULE_LICENSE(license)        \
+static const char __module_license[] __attribute__((section(".modinfo"))) =   \
+"license=" license
+
+/* Define the module variable, and usage macros.  */
+extern struct module __this_module;
+
+#define THIS_MODULE            (&__this_module)
+#define MOD_INC_USE_COUNT      __MOD_INC_USE_COUNT(THIS_MODULE)
+#define MOD_DEC_USE_COUNT      __MOD_DEC_USE_COUNT(THIS_MODULE)
+#define MOD_IN_USE             __MOD_IN_USE(THIS_MODULE)
+
+#if 0
+#include <linux/version.h>
+static const char __module_kernel_version[] __attribute__((section(".modinfo"))) =
+"kernel_version=" UTS_RELEASE;
+#ifdef MODVERSIONS
+static const char __module_using_checksums[] __attribute__((section(".modinfo"))) =
+"using_checksums=1";
+#endif
+#endif
+
+#else /* MODULE */
+
+#define MODULE_AUTHOR(name)
+#define MODULE_LICENSE(license)
+#define MODULE_DESCRIPTION(desc)
+#define MODULE_SUPPORTED_DEVICE(name)
+#define MODULE_PARM(var,type)
+#define MODULE_PARM_DESC(var,desc)
+
+/* Create a dummy reference to the table to suppress gcc unused warnings.  Put
+ * the reference in the .data.exit section which is discarded when code is built
+ * in, so the reference does not bloat the running kernel.  Note: cannot be
+ * const, other exit data may be writable.
+ */
+#define MODULE_GENERIC_TABLE(gtype,name) \
+static const struct gtype##_id * __module_##gtype##_table \
+  __attribute__ ((unused, __section__(".data.exit"))) = name
+
+#ifndef __GENKSYMS__
+
+#define THIS_MODULE            NULL
+#define MOD_INC_USE_COUNT      do { } while (0)
+#define MOD_DEC_USE_COUNT      do { } while (0)
+#define MOD_IN_USE             1
+
+extern struct module *module_list;
+
+#endif /* !__GENKSYMS__ */
+
+#endif /* MODULE */
+
+#define MODULE_DEVICE_TABLE(type,name)         \
+  MODULE_GENERIC_TABLE(type##_device,name)
+
+/* Export a symbol either from the kernel or a module.
+
+   In the kernel, the symbol is added to the kernel's global symbol table.
+
+   In a module, it controls which variables are exported.  If no
+   variables are explicitly exported, the action is controled by the
+   insmod -[xX] flags.  Otherwise, only the variables listed are exported.
+   This obviates the need for the old register_symtab() function.  */
+
+#if defined(__GENKSYMS__)
+
+/* We want the EXPORT_SYMBOL tag left intact for recognition.  */
+
+#elif !defined(CONFIG_MODULES)
+
+#define __EXPORT_SYMBOL(sym,str)
+#define EXPORT_SYMBOL(var)
+#define EXPORT_SYMBOL_NOVERS(var)
+#define EXPORT_SYMBOL_GPL(var)
+
+#elif !defined(EXPORT_SYMTAB)
+
+#define __EXPORT_SYMBOL(sym,str)   error this_object_must_be_defined_as_export_objs_in_the_Makefile
+#define EXPORT_SYMBOL(var)        error this_object_must_be_defined_as_export_objs_in_the_Makefile
+#define EXPORT_SYMBOL_NOVERS(var)  error this_object_must_be_defined_as_export_objs_in_the_Makefile
+#define EXPORT_SYMBOL_GPL(var)  error this_object_must_be_defined_as_export_objs_in_the_Makefile
+
+#else
+
+#define __EXPORT_SYMBOL(sym, str)                      \
+const char __kstrtab_##sym[]                           \
+__attribute__((section(".kstrtab"))) = str;            \
+const struct module_symbol __ksymtab_##sym             \
+__attribute__((section("__ksymtab"))) =                        \
+{ (unsigned long)&sym, __kstrtab_##sym }
+
+#define __EXPORT_SYMBOL_GPL(sym, str)                  \
+const char __kstrtab_##sym[]                           \
+__attribute__((section(".kstrtab"))) = "GPLONLY_" str; \
+const struct module_symbol __ksymtab_##sym             \
+__attribute__((section("__ksymtab"))) =                        \
+{ (unsigned long)&sym, __kstrtab_##sym }
+
+#if defined(MODVERSIONS) || !defined(CONFIG_MODVERSIONS)
+#define EXPORT_SYMBOL(var)  __EXPORT_SYMBOL(var, __MODULE_STRING(var))
+#define EXPORT_SYMBOL_GPL(var)  __EXPORT_SYMBOL_GPL(var, __MODULE_STRING(var))
+#else
+#define EXPORT_SYMBOL(var)  __EXPORT_SYMBOL(var, __MODULE_STRING(__VERSIONED_SYMBOL(var)))
+#define EXPORT_SYMBOL_GPL(var)  __EXPORT_SYMBOL(var, __MODULE_STRING(__VERSIONED_SYMBOL(var)))
+#endif
+
+#define EXPORT_SYMBOL_NOVERS(var)  __EXPORT_SYMBOL(var, __MODULE_STRING(var))
+
+#endif /* __GENKSYMS__ */
+
+#ifdef MODULE
+/* Force a module to export no symbols.  */
+#define EXPORT_NO_SYMBOLS  __asm__(".section __ksymtab\n.previous")
+#else
+#define EXPORT_NO_SYMBOLS
+#endif /* MODULE */
+
+#ifdef CONFIG_MODULES
+#define SET_MODULE_OWNER(some_struct) do { (some_struct)->owner = THIS_MODULE; } while (0)
+#else
+#define SET_MODULE_OWNER(some_struct) do { } while (0)
+#endif
+
+#endif /* _LINUX_MODULE_H */
diff --git a/xen-2.4.16/include/xeno/multiboot.h b/xen-2.4.16/include/xeno/multiboot.h
new file mode 100644 (file)
index 0000000..a61117b
--- /dev/null
@@ -0,0 +1,81 @@
+/* multiboot.h - the header for Multiboot */
+/* Copyright (C) 1999, 2001  Free Software Foundation, Inc.
+   
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2 of the License, or
+   (at your option) any later version.
+   
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+   
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.  */
+
+#ifndef __ELF__
+#error "Build on a 32-bit ELF system"
+#endif
+
+/* The magic number passed by a Multiboot-compliant boot loader. */
+#define MULTIBOOT_BOOTLOADER_MAGIC     0x2BADB002
+
+/* The symbol table for a.out.  */
+typedef struct aout_symbol_table
+{
+  unsigned long tabsize;
+  unsigned long strsize;
+  unsigned long addr;
+  unsigned long reserved;
+} aout_symbol_table_t;
+
+/* The section header table for ELF.  */
+typedef struct elf_section_header_table
+{
+  unsigned long num;
+  unsigned long size;
+  unsigned long addr;
+  unsigned long shndx;
+} elf_section_header_table_t;
+
+/* The Multiboot information.  */
+typedef struct multiboot_info
+{
+  unsigned long flags;
+  unsigned long mem_lower;
+  unsigned long mem_upper;
+  unsigned long boot_device;
+  unsigned long cmdline;
+  unsigned long mods_count;
+  unsigned long mods_addr;
+  union
+  {
+    aout_symbol_table_t aout_sym;
+    elf_section_header_table_t elf_sec;
+  } u;
+  unsigned long mmap_length;
+  unsigned long mmap_addr;
+} multiboot_info_t;
+
+/* The module structure.  */
+typedef struct module
+{
+  unsigned long mod_start;
+  unsigned long mod_end;
+  unsigned long string;
+  unsigned long reserved;
+} module_t;
+
+/* The memory map. Be careful that the offset 0 is base_addr_low
+   but no size.  */
+typedef struct memory_map
+{
+  unsigned long size;
+  unsigned long base_addr_low;
+  unsigned long base_addr_high;
+  unsigned long length_low;
+  unsigned long length_high;
+  unsigned long type;
+} memory_map_t;
diff --git a/xen-2.4.16/include/xeno/netdevice.h b/xen-2.4.16/include/xeno/netdevice.h
new file mode 100644 (file)
index 0000000..51b063a
--- /dev/null
@@ -0,0 +1,688 @@
+/*
+ * INET                An implementation of the TCP/IP protocol suite for the LINUX
+ *             operating system.  INET is implemented using the  BSD Socket
+ *             interface as the means of communication with the user level.
+ *
+ *             Definitions for the Interfaces handler.
+ *
+ * Version:    @(#)dev.h       1.0.10  08/12/93
+ *
+ * Authors:    Ross Biro, <bir7@leland.Stanford.Edu>
+ *             Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
+ *             Corey Minyard <wf-rch!minyard@relay.EU.net>
+ *             Donald J. Becker, <becker@cesdis.gsfc.nasa.gov>
+ *             Alan Cox, <Alan.Cox@linux.org>
+ *             Bjorn Ekwall. <bj0rn@blox.se>
+ *              Pekka Riikonen <priikone@poseidon.pspt.fi>
+ *
+ *             This program is free software; you can redistribute it and/or
+ *             modify it under the terms of the GNU General Public License
+ *             as published by the Free Software Foundation; either version
+ *             2 of the License, or (at your option) any later version.
+ *
+ *             Moved to /usr/include/linux for NET3
+ */
+#ifndef _LINUX_NETDEVICE_H
+#define _LINUX_NETDEVICE_H
+
+#include <linux/if.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <linux/sched.h>
+
+#include <asm/atomic.h>
+#include <asm/cache.h>
+#include <asm/byteorder.h>
+
+#ifdef __KERNEL__
+#include <linux/config.h>
+
+struct divert_blk;
+
+#define HAVE_ALLOC_NETDEV              /* feature macro: alloc_xxxdev
+                                          functions are available. */
+
+#define NET_XMIT_SUCCESS       0
+#define NET_XMIT_DROP          1       /* skb dropped                  */
+#define NET_XMIT_CN            2       /* congestion notification      */
+#define NET_XMIT_POLICED       3       /* skb is shot by police        */
+#define NET_XMIT_BYPASS                4       /* packet does not leave via dequeue;
+                                          (TC use only - dev_queue_xmit
+                                          returns this as NET_XMIT_SUCCESS) */
+
+/* Backlog congestion levels */
+#define NET_RX_SUCCESS         0   /* keep 'em coming, baby */
+#define NET_RX_DROP            1  /* packet dropped */
+#define NET_RX_CN_LOW          2   /* storm alert, just in case */
+#define NET_RX_CN_MOD          3   /* Storm on its way! */
+#define NET_RX_CN_HIGH         4   /* The storm is here */
+#define NET_RX_BAD             5  /* packet dropped due to kernel error */
+
+#define net_xmit_errno(e)      ((e) != NET_XMIT_CN ? -ENOBUFS : 0)
+
+#endif
+
+#define MAX_ADDR_LEN   8               /* Largest hardware address length */
+
+/*
+ *     Compute the worst case header length according to the protocols
+ *     used.
+ */
+#if !defined(CONFIG_AX25) && !defined(CONFIG_AX25_MODULE) && !defined(CONFIG_TR)
+#define LL_MAX_HEADER  32
+#else
+#if defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE)
+#define LL_MAX_HEADER  96
+#else
+#define LL_MAX_HEADER  48
+#endif
+#endif
+
+#if !defined(CONFIG_NET_IPIP) && \
+    !defined(CONFIG_IPV6) && !defined(CONFIG_IPV6_MODULE)
+#define MAX_HEADER LL_MAX_HEADER
+#else
+#define MAX_HEADER (LL_MAX_HEADER + 48)
+#endif
+
+/*
+ *     Network device statistics. Akin to the 2.0 ether stats but
+ *     with byte counters.
+ */
+struct net_device_stats
+{
+       unsigned long   rx_packets;             /* total packets received       */
+       unsigned long   tx_packets;             /* total packets transmitted    */
+       unsigned long   rx_bytes;               /* total bytes received         */
+       unsigned long   tx_bytes;               /* total bytes transmitted      */
+       unsigned long   rx_errors;              /* bad packets received         */
+       unsigned long   tx_errors;              /* packet transmit problems     */
+       unsigned long   rx_dropped;             /* no space in linux buffers    */
+       unsigned long   tx_dropped;             /* no space available in linux  */
+       unsigned long   multicast;              /* multicast packets received   */
+       unsigned long   collisions;
+
+       /* detailed rx_errors: */
+       unsigned long   rx_length_errors;
+       unsigned long   rx_over_errors;         /* receiver ring buff overflow  */
+       unsigned long   rx_crc_errors;          /* recved pkt with crc error    */
+       unsigned long   rx_frame_errors;        /* recv'd frame alignment error */
+       unsigned long   rx_fifo_errors;         /* recv'r fifo overrun          */
+       unsigned long   rx_missed_errors;       /* receiver missed packet       */
+
+       /* detailed tx_errors */
+       unsigned long   tx_aborted_errors;
+       unsigned long   tx_carrier_errors;
+       unsigned long   tx_fifo_errors;
+       unsigned long   tx_heartbeat_errors;
+       unsigned long   tx_window_errors;
+       
+       /* for cslip etc */
+       unsigned long   rx_compressed;
+       unsigned long   tx_compressed;
+};
+
+
+/* Media selection options. */
+enum {
+        IF_PORT_UNKNOWN = 0,
+        IF_PORT_10BASE2,
+        IF_PORT_10BASET,
+        IF_PORT_AUI,
+        IF_PORT_100BASET,
+        IF_PORT_100BASETX,
+        IF_PORT_100BASEFX
+};
+
+#ifdef __KERNEL__
+
+extern const char *if_port_text[];
+
+#include <linux/cache.h>
+#include <linux/skbuff.h>
+
+struct neighbour;
+struct neigh_parms;
+struct sk_buff;
+
+struct netif_rx_stats
+{
+       unsigned total;
+       unsigned dropped;
+       unsigned time_squeeze;
+       unsigned throttled;
+       unsigned fastroute_hit;
+       unsigned fastroute_success;
+       unsigned fastroute_defer;
+       unsigned fastroute_deferred_out;
+       unsigned fastroute_latency_reduction;
+       unsigned cpu_collision;
+} __attribute__ ((__aligned__(SMP_CACHE_BYTES)));
+
+extern struct netif_rx_stats netdev_rx_stat[];
+
+
+/*
+ *     We tag multicasts with these structures.
+ */
+struct dev_mc_list
+{      
+       struct dev_mc_list      *next;
+       __u8                    dmi_addr[MAX_ADDR_LEN];
+       unsigned char           dmi_addrlen;
+       int                     dmi_users;
+       int                     dmi_gusers;
+};
+
+struct hh_cache
+{
+       struct hh_cache *hh_next;       /* Next entry                        */
+       atomic_t        hh_refcnt;      /* number of users                   */
+       unsigned short  hh_type;        /* protocol identifier, f.e ETH_P_IP
+                                         *  NOTE:  For VLANs, this will be the
+                                         *  encapuslated type. --BLG
+                                         */
+       int             hh_len;         /* length of header */
+       int             (*hh_output)(struct sk_buff *skb);
+       rwlock_t        hh_lock;
+       /* cached hardware header; allow for machine alignment needs.        */
+       unsigned long   hh_data[16/sizeof(unsigned long)];
+};
+
+/* These flag bits are private to the generic network queueing
+ * layer, they may not be explicitly referenced by any other
+ * code.
+ */
+
+enum netdev_state_t
+{
+       __LINK_STATE_XOFF=0,
+       __LINK_STATE_START,
+       __LINK_STATE_PRESENT,
+       __LINK_STATE_SCHED,
+       __LINK_STATE_NOCARRIER
+};
+
+
+/*
+ *     The DEVICE structure.
+ *     Actually, this whole structure is a big mistake.  It mixes I/O
+ *     data with strictly "high-level" data, and it has to know about
+ *     almost every data structure used in the INET module.
+ *
+ *     FIXME: cleanup struct net_device such that network protocol info
+ *     moves out.
+ */
+
+struct net_device
+{
+       /*
+        * This is the first field of the "visible" part of this structure
+        * (i.e. as seen by users in the "Space.c" file).  It is the name
+        * the interface.
+        */
+       char                    name[IFNAMSIZ];
+
+       /*
+        *      I/O specific fields
+        *      FIXME: Merge these and struct ifmap into one
+        */
+       unsigned long           rmem_end;       /* shmem "recv" end     */
+       unsigned long           rmem_start;     /* shmem "recv" start   */
+       unsigned long           mem_end;        /* shared mem end       */
+       unsigned long           mem_start;      /* shared mem start     */
+       unsigned long           base_addr;      /* device I/O address   */
+       unsigned int            irq;            /* device IRQ number    */
+
+       /*
+        *      Some hardware also needs these fields, but they are not
+        *      part of the usual set specified in Space.c.
+        */
+
+       unsigned char           if_port;        /* Selectable AUI, TP,..*/
+       unsigned char           dma;            /* DMA channel          */
+
+       unsigned long           state;
+
+       struct net_device       *next;
+       
+       /* The device initialization function. Called only once. */
+       int                     (*init)(struct net_device *dev);
+
+       /* ------- Fields preinitialized in Space.c finish here ------- */
+
+       struct net_device       *next_sched;
+
+       /* Interface index. Unique device identifier    */
+       int                     ifindex;
+       int                     iflink;
+
+
+       struct net_device_stats* (*get_stats)(struct net_device *dev);
+       struct iw_statistics*   (*get_wireless_stats)(struct net_device *dev);
+
+       /*
+        * This marks the end of the "visible" part of the structure. All
+        * fields hereafter are internal to the system, and may change at
+        * will (read: may be cleaned up at will).
+        */
+
+       /* These may be needed for future network-power-down code. */
+       unsigned long           trans_start;    /* Time (in jiffies) of last Tx */
+       unsigned long           last_rx;        /* Time of last Rx      */
+
+       unsigned short          flags;  /* interface flags (a la BSD)   */
+       unsigned short          gflags;
+        unsigned short          priv_flags; /* Like 'flags' but invisible to userspace. */
+        unsigned short          unused_alignment_fixer; /* Because we need priv_flags,
+                                                         * and we want to be 32-bit aligned.
+                                                         */
+
+       unsigned                mtu;    /* interface MTU value          */
+       unsigned short          type;   /* interface hardware type      */
+       unsigned short          hard_header_len;        /* hardware hdr length  */
+       void                    *priv;  /* pointer to private data      */
+
+       struct net_device       *master; /* Pointer to master device of a group,
+                                         * which this device is member of.
+                                         */
+
+       /* Interface address info. */
+       unsigned char           broadcast[MAX_ADDR_LEN];        /* hw bcast add */
+       unsigned char           dev_addr[MAX_ADDR_LEN]; /* hw address   */
+       unsigned char           addr_len;       /* hardware address length      */
+
+       struct dev_mc_list      *mc_list;       /* Multicast mac addresses      */
+       int                     mc_count;       /* Number of installed mcasts   */
+       int                     promiscuity;
+       int                     allmulti;
+
+       int                     watchdog_timeo;
+       struct timer_list       watchdog_timer;
+
+       /* Protocol specific pointers */
+       
+       void                    *atalk_ptr;     /* AppleTalk link       */
+       void                    *ip_ptr;        /* IPv4 specific data   */  
+       void                    *dn_ptr;        /* DECnet specific data */
+       void                    *ip6_ptr;       /* IPv6 specific data */
+       void                    *ec_ptr;        /* Econet specific data */
+
+       struct Qdisc            *qdisc;
+       struct Qdisc            *qdisc_sleeping;
+       struct Qdisc            *qdisc_list;
+       struct Qdisc            *qdisc_ingress;
+       unsigned long           tx_queue_len;   /* Max frames per queue allowed */
+
+       /* hard_start_xmit synchronizer */
+       spinlock_t              xmit_lock;
+       /* cpu id of processor entered to hard_start_xmit or -1,
+          if nobody entered there.
+        */
+       int                     xmit_lock_owner;
+       /* device queue lock */
+       spinlock_t              queue_lock;
+       /* Number of references to this device */
+       atomic_t                refcnt;
+       /* The flag marking that device is unregistered, but held by an user */
+       int                     deadbeaf;
+
+       /* Net device features */
+       int                     features;
+#define NETIF_F_SG             1       /* Scatter/gather IO. */
+#define NETIF_F_IP_CSUM                2       /* Can checksum only TCP/UDP over IPv4. */
+#define NETIF_F_NO_CSUM                4       /* Does not require checksum. F.e. loopack. */
+#define NETIF_F_HW_CSUM                8       /* Can checksum all the packets. */
+#define NETIF_F_DYNALLOC       16      /* Self-dectructable device. */
+#define NETIF_F_HIGHDMA                32      /* Can DMA to high memory. */
+#define NETIF_F_FRAGLIST       64      /* Scatter/gather IO. */
+
+       /* Called after device is detached from network. */
+       void                    (*uninit)(struct net_device *dev);
+       /* Called after last user reference disappears. */
+       void                    (*destructor)(struct net_device *dev);
+
+       /* Pointers to interface service routines.      */
+       int                     (*open)(struct net_device *dev);
+       int                     (*stop)(struct net_device *dev);
+       int                     (*hard_start_xmit) (struct sk_buff *skb,
+                                                   struct net_device *dev);
+       int                     (*hard_header) (struct sk_buff *skb,
+                                               struct net_device *dev,
+                                               unsigned short type,
+                                               void *daddr,
+                                               void *saddr,
+                                               unsigned len);
+       int                     (*rebuild_header)(struct sk_buff *skb);
+#define HAVE_MULTICAST                  
+       void                    (*set_multicast_list)(struct net_device *dev);
+#define HAVE_SET_MAC_ADDR               
+       int                     (*set_mac_address)(struct net_device *dev,
+                                                  void *addr);
+#define HAVE_PRIVATE_IOCTL
+       int                     (*do_ioctl)(struct net_device *dev,
+                                           struct ifreq *ifr, int cmd);
+#define HAVE_SET_CONFIG
+       int                     (*set_config)(struct net_device *dev,
+                                             struct ifmap *map);
+#define HAVE_HEADER_CACHE
+       int                     (*hard_header_cache)(struct neighbour *neigh,
+                                                    struct hh_cache *hh);
+       void                    (*header_cache_update)(struct hh_cache *hh,
+                                                      struct net_device *dev,
+                                                      unsigned char *  haddr);
+#define HAVE_CHANGE_MTU
+       int                     (*change_mtu)(struct net_device *dev, int new_mtu);
+
+#define HAVE_TX_TIMEOUT
+       void                    (*tx_timeout) (struct net_device *dev);
+
+       int                     (*hard_header_parse)(struct sk_buff *skb,
+                                                    unsigned char *haddr);
+       int                     (*neigh_setup)(struct net_device *dev, struct neigh_parms *);
+//     int                     (*accept_fastpath)(struct net_device *, struct dst_entry*);
+
+       /* open/release and usage marking */
+       struct module *owner;
+
+       /* bridge stuff */
+       struct net_bridge_port  *br_port;
+};
+
+
+struct packet_type 
+{
+       unsigned short          type;   /* This is really htons(ether_type).    */
+       struct net_device               *dev;   /* NULL is wildcarded here              */
+       int                     (*func) (struct sk_buff *, struct net_device *,
+                                        struct packet_type *);
+       void                    *data;  /* Private to the packet type           */
+       struct packet_type      *next;
+};
+
+
+#include <linux/interrupt.h>
+//#include <linux/notifier.h>
+
+extern struct net_device               loopback_dev;           /* The loopback */
+extern struct net_device               *dev_base;              /* All devices */
+extern rwlock_t                                dev_base_lock;          /* Device list lock */
+
+extern int                     netdev_boot_setup_add(char *name, struct ifmap *map);
+extern int                     netdev_boot_setup_check(struct net_device *dev);
+extern struct net_device    *dev_getbyhwaddr(unsigned short type, char *hwaddr);
+extern void            dev_add_pack(struct packet_type *pt);
+extern void            dev_remove_pack(struct packet_type *pt);
+extern int             dev_get(const char *name);
+extern struct net_device       *dev_get_by_name(const char *name);
+extern struct net_device       *__dev_get_by_name(const char *name);
+extern struct net_device       *dev_alloc(const char *name, int *err);
+extern int             dev_alloc_name(struct net_device *dev, const char *name);
+extern int             dev_open(struct net_device *dev);
+extern int             dev_close(struct net_device *dev);
+extern int             dev_queue_xmit(struct sk_buff *skb);
+extern int             register_netdevice(struct net_device *dev);
+extern int             unregister_netdevice(struct net_device *dev);
+//extern int           register_netdevice_notifier(struct notifier_block *nb);
+//extern int           unregister_netdevice_notifier(struct notifier_block *nb);
+extern int             dev_new_index(void);
+extern struct net_device       *dev_get_by_index(int ifindex);
+extern struct net_device       *__dev_get_by_index(int ifindex);
+extern int             dev_restart(struct net_device *dev);
+
+typedef int gifconf_func_t(struct net_device * dev, char * bufptr, int len);
+extern int             register_gifconf(unsigned int family, gifconf_func_t * gifconf);
+static inline int unregister_gifconf(unsigned int family)
+{
+       return register_gifconf(family, 0);
+}
+
+/*
+ * Incoming packets are placed on per-cpu queues so that
+ * no locking is needed.
+ */
+
+struct softnet_data
+{
+       int                     throttle;
+       int                     cng_level;
+       int                     avg_blog;
+       struct sk_buff_head     input_pkt_queue;
+       struct net_device       *output_queue;
+       struct sk_buff          *completion_queue;
+} __attribute__((__aligned__(SMP_CACHE_BYTES)));
+
+
+extern struct softnet_data softnet_data[NR_CPUS];
+
+#define HAVE_NETIF_QUEUE
+
+static inline void __netif_schedule(struct net_device *dev)
+{
+       if (!test_and_set_bit(__LINK_STATE_SCHED, &dev->state)) {
+               unsigned long flags;
+               int cpu = smp_processor_id();
+
+               local_irq_save(flags);
+               dev->next_sched = softnet_data[cpu].output_queue;
+               softnet_data[cpu].output_queue = dev;
+               cpu_raise_softirq(cpu, NET_TX_SOFTIRQ);
+               local_irq_restore(flags);
+       }
+}
+
+static inline void netif_schedule(struct net_device *dev)
+{
+       if (!test_bit(__LINK_STATE_XOFF, &dev->state))
+               __netif_schedule(dev);
+}
+
+static inline void netif_start_queue(struct net_device *dev)
+{
+       clear_bit(__LINK_STATE_XOFF, &dev->state);
+}
+
+static inline void netif_wake_queue(struct net_device *dev)
+{
+       if (test_and_clear_bit(__LINK_STATE_XOFF, &dev->state))
+               __netif_schedule(dev);
+}
+
+static inline void netif_stop_queue(struct net_device *dev)
+{
+       set_bit(__LINK_STATE_XOFF, &dev->state);
+}
+
+static inline int netif_queue_stopped(struct net_device *dev)
+{
+       return test_bit(__LINK_STATE_XOFF, &dev->state);
+}
+
+static inline int netif_running(struct net_device *dev)
+{
+       return test_bit(__LINK_STATE_START, &dev->state);
+}
+
+/* Use this variant when it is known for sure that it
+ * is executing from interrupt context.
+ */
+static inline void dev_kfree_skb_irq(struct sk_buff *skb)
+{
+       if (atomic_dec_and_test(&skb->users)) {
+               int cpu =smp_processor_id();
+               unsigned long flags;
+
+               local_irq_save(flags);
+               skb->next = softnet_data[cpu].completion_queue;
+               softnet_data[cpu].completion_queue = skb;
+               cpu_raise_softirq(cpu, NET_TX_SOFTIRQ);
+               local_irq_restore(flags);
+       }
+}
+
+/* Use this variant in places where it could be invoked
+ * either from interrupt or non-interrupt context.
+ */
+static inline void dev_kfree_skb_any(struct sk_buff *skb)
+{
+       if (in_irq())
+               dev_kfree_skb_irq(skb);
+       else
+               dev_kfree_skb(skb);
+}
+
+extern void            net_call_rx_atomic(void (*fn)(void));
+#define HAVE_NETIF_RX 1
+extern int             netif_rx(struct sk_buff *skb);
+extern int             dev_ioctl(unsigned int cmd, void *);
+extern int             dev_change_flags(struct net_device *, unsigned);
+extern void            dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev);
+
+extern void            dev_init(void);
+
+extern int             netdev_nit;
+
+/* Post buffer to the network code from _non interrupt_ context.
+ * see net/core/dev.c for netif_rx description.
+ */
+static inline int netif_rx_ni(struct sk_buff *skb)
+{
+       int err = netif_rx(skb);
+       if (softirq_pending(smp_processor_id()))
+               do_softirq();
+       return err;
+}
+
+static inline void dev_init_buffers(struct net_device *dev)
+{
+       /* WILL BE REMOVED IN 2.5.0 */
+}
+
+extern int netdev_finish_unregister(struct net_device *dev);
+
+static inline void dev_put(struct net_device *dev)
+{
+       if (atomic_dec_and_test(&dev->refcnt))
+               netdev_finish_unregister(dev);
+}
+
+#define __dev_put(dev) atomic_dec(&(dev)->refcnt)
+#define dev_hold(dev) atomic_inc(&(dev)->refcnt)
+
+/* Carrier loss detection, dial on demand. The functions netif_carrier_on
+ * and _off may be called from IRQ context, but it is caller
+ * who is responsible for serialization of these calls.
+ */
+
+static inline int netif_carrier_ok(struct net_device *dev)
+{
+       return !test_bit(__LINK_STATE_NOCARRIER, &dev->state);
+}
+
+extern void __netdev_watchdog_up(struct net_device *dev);
+
+static inline void netif_carrier_on(struct net_device *dev)
+{
+       clear_bit(__LINK_STATE_NOCARRIER, &dev->state);
+       if (netif_running(dev))
+               __netdev_watchdog_up(dev);
+}
+
+static inline void netif_carrier_off(struct net_device *dev)
+{
+       set_bit(__LINK_STATE_NOCARRIER, &dev->state);
+}
+
+/* Hot-plugging. */
+static inline int netif_device_present(struct net_device *dev)
+{
+       return test_bit(__LINK_STATE_PRESENT, &dev->state);
+}
+
+static inline void netif_device_detach(struct net_device *dev)
+{
+       if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) &&
+           netif_running(dev)) {
+               netif_stop_queue(dev);
+       }
+}
+
+static inline void netif_device_attach(struct net_device *dev)
+{
+       if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) &&
+           netif_running(dev)) {
+               netif_wake_queue(dev);
+               __netdev_watchdog_up(dev);
+       }
+}
+
+/*
+ * Network interface message level settings
+ */
+#define HAVE_NETIF_MSG 1
+
+enum {
+       NETIF_MSG_DRV           = 0x0001,
+       NETIF_MSG_PROBE         = 0x0002,
+       NETIF_MSG_LINK          = 0x0004,
+       NETIF_MSG_TIMER         = 0x0008,
+       NETIF_MSG_IFDOWN        = 0x0010,
+       NETIF_MSG_IFUP          = 0x0020,
+       NETIF_MSG_RX_ERR        = 0x0040,
+       NETIF_MSG_TX_ERR        = 0x0080,
+       NETIF_MSG_TX_QUEUED     = 0x0100,
+       NETIF_MSG_INTR          = 0x0200,
+       NETIF_MSG_TX_DONE       = 0x0400,
+       NETIF_MSG_RX_STATUS     = 0x0800,
+       NETIF_MSG_PKTDATA       = 0x1000,
+};
+
+#define netif_msg_drv(p)       ((p)->msg_enable & NETIF_MSG_DRV)
+#define netif_msg_probe(p)     ((p)->msg_enable & NETIF_MSG_PROBE)
+#define netif_msg_link(p)      ((p)->msg_enable & NETIF_MSG_LINK)
+#define netif_msg_timer(p)     ((p)->msg_enable & NETIF_MSG_TIMER)
+#define netif_msg_ifdown(p)    ((p)->msg_enable & NETIF_MSG_IFDOWN)
+#define netif_msg_ifup(p)      ((p)->msg_enable & NETIF_MSG_IFUP)
+#define netif_msg_rx_err(p)    ((p)->msg_enable & NETIF_MSG_RX_ERR)
+#define netif_msg_tx_err(p)    ((p)->msg_enable & NETIF_MSG_TX_ERR)
+#define netif_msg_tx_queued(p) ((p)->msg_enable & NETIF_MSG_TX_QUEUED)
+#define netif_msg_intr(p)      ((p)->msg_enable & NETIF_MSG_INTR)
+#define netif_msg_tx_done(p)   ((p)->msg_enable & NETIF_MSG_TX_DONE)
+#define netif_msg_rx_status(p) ((p)->msg_enable & NETIF_MSG_RX_STATUS)
+#define netif_msg_pktdata(p)   ((p)->msg_enable & NETIF_MSG_PKTDATA)
+
+/* These functions live elsewhere (drivers/net/net_init.c, but related) */
+
+extern void            ether_setup(struct net_device *dev);
+extern void            fddi_setup(struct net_device *dev);
+extern void            tr_setup(struct net_device *dev);
+extern void            fc_setup(struct net_device *dev);
+extern void            fc_freedev(struct net_device *dev);
+/* Support for loadable net-drivers */
+extern int             register_netdev(struct net_device *dev);
+extern void            unregister_netdev(struct net_device *dev);
+/* Functions used for multicast support */
+extern void            dev_mc_upload(struct net_device *dev);
+extern int             dev_mc_delete(struct net_device *dev, void *addr, int alen, int all);
+extern int             dev_mc_add(struct net_device *dev, void *addr, int alen, int newonly);
+extern void            dev_mc_discard(struct net_device *dev);
+extern void            dev_set_promiscuity(struct net_device *dev, int inc);
+extern void            dev_set_allmulti(struct net_device *dev, int inc);
+extern void            netdev_state_change(struct net_device *dev);
+/* Load a device via the kmod */
+extern void            dev_load(const char *name);
+extern void            dev_mcast_init(void);
+extern int             netdev_register_fc(struct net_device *dev, void (*stimul)(struct net_device *dev));
+extern void            netdev_unregister_fc(int bit);
+extern int             netdev_max_backlog;
+extern unsigned long   netdev_fc_xoff;
+extern atomic_t netdev_dropping;
+extern int             netdev_set_master(struct net_device *dev, struct net_device *master);
+extern struct sk_buff * skb_checksum_help(struct sk_buff *skb);
+
+#endif /* __KERNEL__ */
+
+#endif /* _LINUX_DEV_H */
diff --git a/xen-2.4.16/include/xeno/pci.h b/xen-2.4.16/include/xeno/pci.h
new file mode 100644 (file)
index 0000000..15e51d2
--- /dev/null
@@ -0,0 +1,761 @@
+/*
+ *     $Id: pci.h,v 1.87 1998/10/11 15:13:12 mj Exp $
+ *
+ *     PCI defines and function prototypes
+ *     Copyright 1994, Drew Eckhardt
+ *     Copyright 1997--1999 Martin Mares <mj@ucw.cz>
+ *
+ *     For more information, please consult the following manuals (look at
+ *     http://www.pcisig.com/ for how to get them):
+ *
+ *     PCI BIOS Specification
+ *     PCI Local Bus Specification
+ *     PCI to PCI Bridge Specification
+ *     PCI System Design Guide
+ */
+
+#ifndef LINUX_PCI_H
+#define LINUX_PCI_H
+
+/*
+ * Under PCI, each device has 256 bytes of configuration address space,
+ * of which the first 64 bytes are standardized as follows:
+ */
+#define PCI_VENDOR_ID          0x00    /* 16 bits */
+#define PCI_DEVICE_ID          0x02    /* 16 bits */
+#define PCI_COMMAND            0x04    /* 16 bits */
+#define  PCI_COMMAND_IO                0x1     /* Enable response in I/O space */
+#define  PCI_COMMAND_MEMORY    0x2     /* Enable response in Memory space */
+#define  PCI_COMMAND_MASTER    0x4     /* Enable bus mastering */
+#define  PCI_COMMAND_SPECIAL   0x8     /* Enable response to special cycles */
+#define  PCI_COMMAND_INVALIDATE        0x10    /* Use memory write and invalidate */
+#define  PCI_COMMAND_VGA_PALETTE 0x20  /* Enable palette snooping */
+#define  PCI_COMMAND_PARITY    0x40    /* Enable parity checking */
+#define  PCI_COMMAND_WAIT      0x80    /* Enable address/data stepping */
+#define  PCI_COMMAND_SERR      0x100   /* Enable SERR */
+#define  PCI_COMMAND_FAST_BACK 0x200   /* Enable back-to-back writes */
+
+#define PCI_STATUS             0x06    /* 16 bits */
+#define  PCI_STATUS_CAP_LIST   0x10    /* Support Capability List */
+#define  PCI_STATUS_66MHZ      0x20    /* Support 66 Mhz PCI 2.1 bus */
+#define  PCI_STATUS_UDF                0x40    /* Support User Definable Features [obsolete] */
+#define  PCI_STATUS_FAST_BACK  0x80    /* Accept fast-back to back */
+#define  PCI_STATUS_PARITY     0x100   /* Detected parity error */
+#define  PCI_STATUS_DEVSEL_MASK        0x600   /* DEVSEL timing */
+#define  PCI_STATUS_DEVSEL_FAST        0x000   
+#define  PCI_STATUS_DEVSEL_MEDIUM 0x200
+#define  PCI_STATUS_DEVSEL_SLOW 0x400
+#define  PCI_STATUS_SIG_TARGET_ABORT 0x800 /* Set on target abort */
+#define  PCI_STATUS_REC_TARGET_ABORT 0x1000 /* Master ack of " */
+#define  PCI_STATUS_REC_MASTER_ABORT 0x2000 /* Set on master abort */
+#define  PCI_STATUS_SIG_SYSTEM_ERROR 0x4000 /* Set when we drive SERR */
+#define  PCI_STATUS_DETECTED_PARITY 0x8000 /* Set on parity error */
+
+#define PCI_CLASS_REVISION     0x08    /* High 24 bits are class, low 8
+                                          revision */
+#define PCI_REVISION_ID         0x08    /* Revision ID */
+#define PCI_CLASS_PROG          0x09    /* Reg. Level Programming Interface */
+#define PCI_CLASS_DEVICE        0x0a    /* Device class */
+
+#define PCI_CACHE_LINE_SIZE    0x0c    /* 8 bits */
+#define PCI_LATENCY_TIMER      0x0d    /* 8 bits */
+#define PCI_HEADER_TYPE                0x0e    /* 8 bits */
+#define  PCI_HEADER_TYPE_NORMAL        0
+#define  PCI_HEADER_TYPE_BRIDGE 1
+#define  PCI_HEADER_TYPE_CARDBUS 2
+
+#define PCI_BIST               0x0f    /* 8 bits */
+#define PCI_BIST_CODE_MASK     0x0f    /* Return result */
+#define PCI_BIST_START         0x40    /* 1 to start BIST, 2 secs or less */
+#define PCI_BIST_CAPABLE       0x80    /* 1 if BIST capable */
+
+/*
+ * Base addresses specify locations in memory or I/O space.
+ * Decoded size can be determined by writing a value of 
+ * 0xffffffff to the register, and reading it back.  Only 
+ * 1 bits are decoded.
+ */
+#define PCI_BASE_ADDRESS_0     0x10    /* 32 bits */
+#define PCI_BASE_ADDRESS_1     0x14    /* 32 bits [htype 0,1 only] */
+#define PCI_BASE_ADDRESS_2     0x18    /* 32 bits [htype 0 only] */
+#define PCI_BASE_ADDRESS_3     0x1c    /* 32 bits */
+#define PCI_BASE_ADDRESS_4     0x20    /* 32 bits */
+#define PCI_BASE_ADDRESS_5     0x24    /* 32 bits */
+#define  PCI_BASE_ADDRESS_SPACE        0x01    /* 0 = memory, 1 = I/O */
+#define  PCI_BASE_ADDRESS_SPACE_IO 0x01
+#define  PCI_BASE_ADDRESS_SPACE_MEMORY 0x00
+#define  PCI_BASE_ADDRESS_MEM_TYPE_MASK 0x06
+#define  PCI_BASE_ADDRESS_MEM_TYPE_32  0x00    /* 32 bit address */
+#define  PCI_BASE_ADDRESS_MEM_TYPE_1M  0x02    /* Below 1M [obsolete] */
+#define  PCI_BASE_ADDRESS_MEM_TYPE_64  0x04    /* 64 bit address */
+#define  PCI_BASE_ADDRESS_MEM_PREFETCH 0x08    /* prefetchable? */
+#define  PCI_BASE_ADDRESS_MEM_MASK     (~0x0fUL)
+#define  PCI_BASE_ADDRESS_IO_MASK      (~0x03UL)
+/* bit 1 is reserved if address_space = 1 */
+
+/* Header type 0 (normal devices) */
+#define PCI_CARDBUS_CIS                0x28
+#define PCI_SUBSYSTEM_VENDOR_ID        0x2c
+#define PCI_SUBSYSTEM_ID       0x2e  
+#define PCI_ROM_ADDRESS                0x30    /* Bits 31..11 are address, 10..1 reserved */
+#define  PCI_ROM_ADDRESS_ENABLE        0x01
+#define PCI_ROM_ADDRESS_MASK   (~0x7ffUL)
+
+#define PCI_CAPABILITY_LIST    0x34    /* Offset of first capability list entry */
+
+/* 0x35-0x3b are reserved */
+#define PCI_INTERRUPT_LINE     0x3c    /* 8 bits */
+#define PCI_INTERRUPT_PIN      0x3d    /* 8 bits */
+#define PCI_MIN_GNT            0x3e    /* 8 bits */
+#define PCI_MAX_LAT            0x3f    /* 8 bits */
+
+/* Header type 1 (PCI-to-PCI bridges) */
+#define PCI_PRIMARY_BUS                0x18    /* Primary bus number */
+#define PCI_SECONDARY_BUS      0x19    /* Secondary bus number */
+#define PCI_SUBORDINATE_BUS    0x1a    /* Highest bus number behind the bridge */
+#define PCI_SEC_LATENCY_TIMER  0x1b    /* Latency timer for secondary interface */
+#define PCI_IO_BASE            0x1c    /* I/O range behind the bridge */
+#define PCI_IO_LIMIT           0x1d
+#define  PCI_IO_RANGE_TYPE_MASK        0x0fUL  /* I/O bridging type */
+#define  PCI_IO_RANGE_TYPE_16  0x00
+#define  PCI_IO_RANGE_TYPE_32  0x01
+#define  PCI_IO_RANGE_MASK     (~0x0fUL)
+#define PCI_SEC_STATUS         0x1e    /* Secondary status register, only bit 14 used */
+#define PCI_MEMORY_BASE                0x20    /* Memory range behind */
+#define PCI_MEMORY_LIMIT       0x22
+#define  PCI_MEMORY_RANGE_TYPE_MASK 0x0fUL
+#define  PCI_MEMORY_RANGE_MASK (~0x0fUL)
+#define PCI_PREF_MEMORY_BASE   0x24    /* Prefetchable memory range behind */
+#define PCI_PREF_MEMORY_LIMIT  0x26
+#define  PCI_PREF_RANGE_TYPE_MASK 0x0fUL
+#define  PCI_PREF_RANGE_TYPE_32        0x00
+#define  PCI_PREF_RANGE_TYPE_64        0x01
+#define  PCI_PREF_RANGE_MASK   (~0x0fUL)
+#define PCI_PREF_BASE_UPPER32  0x28    /* Upper half of prefetchable memory range */
+#define PCI_PREF_LIMIT_UPPER32 0x2c
+#define PCI_IO_BASE_UPPER16    0x30    /* Upper half of I/O addresses */
+#define PCI_IO_LIMIT_UPPER16   0x32
+/* 0x34 same as for htype 0 */
+/* 0x35-0x3b is reserved */
+#define PCI_ROM_ADDRESS1       0x38    /* Same as PCI_ROM_ADDRESS, but for htype 1 */
+/* 0x3c-0x3d are same as for htype 0 */
+#define PCI_BRIDGE_CONTROL     0x3e
+#define  PCI_BRIDGE_CTL_PARITY 0x01    /* Enable parity detection on secondary interface */
+#define  PCI_BRIDGE_CTL_SERR   0x02    /* The same for SERR forwarding */
+#define  PCI_BRIDGE_CTL_NO_ISA 0x04    /* Disable bridging of ISA ports */
+#define  PCI_BRIDGE_CTL_VGA    0x08    /* Forward VGA addresses */
+#define  PCI_BRIDGE_CTL_MASTER_ABORT 0x20  /* Report master aborts */
+#define  PCI_BRIDGE_CTL_BUS_RESET 0x40 /* Secondary bus reset */
+#define  PCI_BRIDGE_CTL_FAST_BACK 0x80 /* Fast Back2Back enabled on secondary interface */
+
+/* Header type 2 (CardBus bridges) */
+#define PCI_CB_CAPABILITY_LIST 0x14
+/* 0x15 reserved */
+#define PCI_CB_SEC_STATUS      0x16    /* Secondary status */
+#define PCI_CB_PRIMARY_BUS     0x18    /* PCI bus number */
+#define PCI_CB_CARD_BUS                0x19    /* CardBus bus number */
+#define PCI_CB_SUBORDINATE_BUS 0x1a    /* Subordinate bus number */
+#define PCI_CB_LATENCY_TIMER   0x1b    /* CardBus latency timer */
+#define PCI_CB_MEMORY_BASE_0   0x1c
+#define PCI_CB_MEMORY_LIMIT_0  0x20
+#define PCI_CB_MEMORY_BASE_1   0x24
+#define PCI_CB_MEMORY_LIMIT_1  0x28
+#define PCI_CB_IO_BASE_0       0x2c
+#define PCI_CB_IO_BASE_0_HI    0x2e
+#define PCI_CB_IO_LIMIT_0      0x30
+#define PCI_CB_IO_LIMIT_0_HI   0x32
+#define PCI_CB_IO_BASE_1       0x34
+#define PCI_CB_IO_BASE_1_HI    0x36
+#define PCI_CB_IO_LIMIT_1      0x38
+#define PCI_CB_IO_LIMIT_1_HI   0x3a
+#define  PCI_CB_IO_RANGE_MASK  (~0x03UL)
+/* 0x3c-0x3d are same as for htype 0 */
+#define PCI_CB_BRIDGE_CONTROL  0x3e
+#define  PCI_CB_BRIDGE_CTL_PARITY      0x01    /* Similar to standard bridge control register */
+#define  PCI_CB_BRIDGE_CTL_SERR                0x02
+#define  PCI_CB_BRIDGE_CTL_ISA         0x04
+#define  PCI_CB_BRIDGE_CTL_VGA         0x08
+#define  PCI_CB_BRIDGE_CTL_MASTER_ABORT        0x20
+#define  PCI_CB_BRIDGE_CTL_CB_RESET    0x40    /* CardBus reset */
+#define  PCI_CB_BRIDGE_CTL_16BIT_INT   0x80    /* Enable interrupt for 16-bit cards */
+#define  PCI_CB_BRIDGE_CTL_PREFETCH_MEM0 0x100 /* Prefetch enable for both memory regions */
+#define  PCI_CB_BRIDGE_CTL_PREFETCH_MEM1 0x200
+#define  PCI_CB_BRIDGE_CTL_POST_WRITES 0x400
+#define PCI_CB_SUBSYSTEM_VENDOR_ID 0x40
+#define PCI_CB_SUBSYSTEM_ID    0x42
+#define PCI_CB_LEGACY_MODE_BASE        0x44    /* 16-bit PC Card legacy mode base address (ExCa) */
+/* 0x48-0x7f reserved */
+
+/* Capability lists */
+
+#define PCI_CAP_LIST_ID                0       /* Capability ID */
+#define  PCI_CAP_ID_PM         0x01    /* Power Management */
+#define  PCI_CAP_ID_AGP                0x02    /* Accelerated Graphics Port */
+#define  PCI_CAP_ID_VPD                0x03    /* Vital Product Data */
+#define  PCI_CAP_ID_SLOTID     0x04    /* Slot Identification */
+#define  PCI_CAP_ID_MSI                0x05    /* Message Signalled Interrupts */
+#define  PCI_CAP_ID_CHSWP      0x06    /* CompactPCI HotSwap */
+#define PCI_CAP_LIST_NEXT      1       /* Next capability in the list */
+#define PCI_CAP_FLAGS          2       /* Capability defined flags (16 bits) */
+#define PCI_CAP_SIZEOF         4
+
+/* Power Management Registers */
+
+#define PCI_PM_PMC              2       /* PM Capabilities Register */
+#define  PCI_PM_CAP_VER_MASK   0x0007  /* Version */
+#define  PCI_PM_CAP_PME_CLOCK  0x0008  /* PME clock required */
+#define  PCI_PM_CAP_RESERVED    0x0010  /* Reserved field */
+#define  PCI_PM_CAP_DSI                0x0020  /* Device specific initialization */
+#define  PCI_PM_CAP_AUX_POWER  0x01C0  /* Auxilliary power support mask */
+#define  PCI_PM_CAP_D1         0x0200  /* D1 power state support */
+#define  PCI_PM_CAP_D2         0x0400  /* D2 power state support */
+#define  PCI_PM_CAP_PME                0x0800  /* PME pin supported */
+#define  PCI_PM_CAP_PME_MASK    0xF800  /* PME Mask of all supported states */
+#define  PCI_PM_CAP_PME_D0      0x0800  /* PME# from D0 */
+#define  PCI_PM_CAP_PME_D1      0x1000  /* PME# from D1 */
+#define  PCI_PM_CAP_PME_D2      0x2000  /* PME# from D2 */
+#define  PCI_PM_CAP_PME_D3      0x4000  /* PME# from D3 (hot) */
+#define  PCI_PM_CAP_PME_D3cold  0x8000  /* PME# from D3 (cold) */
+#define PCI_PM_CTRL            4       /* PM control and status register */
+#define  PCI_PM_CTRL_STATE_MASK        0x0003  /* Current power state (D0 to D3) */
+#define  PCI_PM_CTRL_PME_ENABLE        0x0100  /* PME pin enable */
+#define  PCI_PM_CTRL_DATA_SEL_MASK     0x1e00  /* Data select (??) */
+#define  PCI_PM_CTRL_DATA_SCALE_MASK   0x6000  /* Data scale (??) */
+#define  PCI_PM_CTRL_PME_STATUS        0x8000  /* PME pin status */
+#define PCI_PM_PPB_EXTENSIONS  6       /* PPB support extensions (??) */
+#define  PCI_PM_PPB_B2_B3      0x40    /* Stop clock when in D3hot (??) */
+#define  PCI_PM_BPCC_ENABLE    0x80    /* Bus power/clock control enable (??) */
+#define PCI_PM_DATA_REGISTER   7       /* (??) */
+#define PCI_PM_SIZEOF          8
+
+/* AGP registers */
+
+#define PCI_AGP_VERSION                2       /* BCD version number */
+#define PCI_AGP_RFU            3       /* Rest of capability flags */
+#define PCI_AGP_STATUS         4       /* Status register */
+#define  PCI_AGP_STATUS_RQ_MASK        0xff000000      /* Maximum number of requests - 1 */
+#define  PCI_AGP_STATUS_SBA    0x0200  /* Sideband addressing supported */
+#define  PCI_AGP_STATUS_64BIT  0x0020  /* 64-bit addressing supported */
+#define  PCI_AGP_STATUS_FW     0x0010  /* FW transfers supported */
+#define  PCI_AGP_STATUS_RATE4  0x0004  /* 4x transfer rate supported */
+#define  PCI_AGP_STATUS_RATE2  0x0002  /* 2x transfer rate supported */
+#define  PCI_AGP_STATUS_RATE1  0x0001  /* 1x transfer rate supported */
+#define PCI_AGP_COMMAND                8       /* Control register */
+#define  PCI_AGP_COMMAND_RQ_MASK 0xff000000  /* Master: Maximum number of requests */
+#define  PCI_AGP_COMMAND_SBA   0x0200  /* Sideband addressing enabled */
+#define  PCI_AGP_COMMAND_AGP   0x0100  /* Allow processing of AGP transactions */
+#define  PCI_AGP_COMMAND_64BIT 0x0020  /* Allow processing of 64-bit addresses */
+#define  PCI_AGP_COMMAND_FW    0x0010  /* Force FW transfers */
+#define  PCI_AGP_COMMAND_RATE4 0x0004  /* Use 4x rate */
+#define  PCI_AGP_COMMAND_RATE2 0x0002  /* Use 2x rate */
+#define  PCI_AGP_COMMAND_RATE1 0x0001  /* Use 1x rate */
+#define PCI_AGP_SIZEOF         12
+
+/* Slot Identification */
+
+#define PCI_SID_ESR            2       /* Expansion Slot Register */
+#define  PCI_SID_ESR_NSLOTS    0x1f    /* Number of expansion slots available */
+#define  PCI_SID_ESR_FIC       0x20    /* First In Chassis Flag */
+#define PCI_SID_CHASSIS_NR     3       /* Chassis Number */
+
+/* Message Signalled Interrupts registers */
+
+#define PCI_MSI_FLAGS          2       /* Various flags */
+#define  PCI_MSI_FLAGS_64BIT   0x80    /* 64-bit addresses allowed */
+#define  PCI_MSI_FLAGS_QSIZE   0x70    /* Message queue size configured */
+#define  PCI_MSI_FLAGS_QMASK   0x0e    /* Maximum queue size available */
+#define  PCI_MSI_FLAGS_ENABLE  0x01    /* MSI feature enabled */
+#define PCI_MSI_RFU            3       /* Rest of capability flags */
+#define PCI_MSI_ADDRESS_LO     4       /* Lower 32 bits */
+#define PCI_MSI_ADDRESS_HI     8       /* Upper 32 bits (if PCI_MSI_FLAGS_64BIT set) */
+#define PCI_MSI_DATA_32                8       /* 16 bits of data for 32-bit devices */
+#define PCI_MSI_DATA_64                12      /* 16 bits of data for 64-bit devices */
+
+/* Include the ID list */
+
+#include <linux/pci_ids.h>
+
+/*
+ * The PCI interface treats multi-function devices as independent
+ * devices.  The slot/function address of each device is encoded
+ * in a single byte as follows:
+ *
+ *     7:3 = slot
+ *     2:0 = function
+ */
+#define PCI_DEVFN(slot,func)   ((((slot) & 0x1f) << 3) | ((func) & 0x07))
+#define PCI_SLOT(devfn)                (((devfn) >> 3) & 0x1f)
+#define PCI_FUNC(devfn)                ((devfn) & 0x07)
+
+/* Ioctls for /proc/bus/pci/X/Y nodes. */
+#define PCIIOC_BASE            ('P' << 24 | 'C' << 16 | 'I' << 8)
+#define PCIIOC_CONTROLLER      (PCIIOC_BASE | 0x00)    /* Get controller for PCI device. */
+#define PCIIOC_MMAP_IS_IO      (PCIIOC_BASE | 0x01)    /* Set mmap state to I/O space. */
+#define PCIIOC_MMAP_IS_MEM     (PCIIOC_BASE | 0x02)    /* Set mmap state to MEM space. */
+#define PCIIOC_WRITE_COMBINE   (PCIIOC_BASE | 0x03)    /* Enable/disable write-combining. */
+
+#ifdef __KERNEL__
+
+#include <linux/types.h>
+#include <linux/config.h>
+#include <linux/ioport.h>
+#include <linux/list.h>
+#include <linux/errno.h>
+
+/* File state for mmap()s on /proc/bus/pci/X/Y */
+enum pci_mmap_state {
+       pci_mmap_io,
+       pci_mmap_mem
+};
+
+/* This defines the direction arg to the DMA mapping routines. */
+#define PCI_DMA_BIDIRECTIONAL  0
+#define PCI_DMA_TODEVICE       1
+#define PCI_DMA_FROMDEVICE     2
+#define PCI_DMA_NONE           3
+
+#define DEVICE_COUNT_COMPATIBLE        4
+#define DEVICE_COUNT_IRQ       2
+#define DEVICE_COUNT_DMA       2
+#define DEVICE_COUNT_RESOURCE  12
+
+#define PCI_ANY_ID (~0)
+
+#define pci_present pcibios_present
+
+
+#define pci_for_each_dev_reverse(dev) \
+       for(dev = pci_dev_g(pci_devices.prev); dev != pci_dev_g(&pci_devices); dev = pci_dev_g(dev->global_list.prev))
+
+#define pci_for_each_bus(bus) \
+for(bus = pci_bus_b(pci_root_buses.next); bus != pci_bus_b(&pci_root_buses); bus = pci_bus_b(bus->node.next))
+
+/*
+ * The pci_dev structure is used to describe both PCI and ISAPnP devices.
+ */
+struct pci_dev {
+       struct list_head global_list;   /* node in list of all PCI devices */
+       struct list_head bus_list;      /* node in per-bus list */
+       struct pci_bus  *bus;           /* bus this device is on */
+       struct pci_bus  *subordinate;   /* bus this device bridges to */
+
+       void            *sysdata;       /* hook for sys-specific extension */
+       struct proc_dir_entry *procent; /* device entry in /proc/bus/pci */
+
+       unsigned int    devfn;          /* encoded device & function index */
+       unsigned short  vendor;
+       unsigned short  device;
+       unsigned short  subsystem_vendor;
+       unsigned short  subsystem_device;
+       unsigned int    class;          /* 3 bytes: (base,sub,prog-if) */
+       u8              hdr_type;       /* PCI header type (`multi' flag masked out) */
+       u8              rom_base_reg;   /* which config register controls the ROM */
+
+       struct pci_driver *driver;      /* which driver has allocated this device */
+       void            *driver_data;   /* data private to the driver */
+       u64             dma_mask;       /* Mask of the bits of bus address this
+                                          device implements.  Normally this is
+                                          0xffffffff.  You only need to change
+                                          this if your device has broken DMA
+                                          or supports 64-bit transfers.  */
+
+       u32             current_state;  /* Current operating state. In ACPI-speak,
+                                          this is D0-D3, D0 being fully functional,
+                                          and D3 being off. */
+
+       /* device is compatible with these IDs */
+       unsigned short vendor_compatible[DEVICE_COUNT_COMPATIBLE];
+       unsigned short device_compatible[DEVICE_COUNT_COMPATIBLE];
+
+       /*
+        * Instead of touching interrupt line and base address registers
+        * directly, use the values stored here. They might be different!
+        */
+       unsigned int    irq;
+       struct resource resource[DEVICE_COUNT_RESOURCE]; /* I/O and memory regions + expansion ROMs */
+       struct resource dma_resource[DEVICE_COUNT_DMA];
+       struct resource irq_resource[DEVICE_COUNT_IRQ];
+
+       char            name[80];       /* device name */
+       char            slot_name[8];   /* slot name */
+       int             active;         /* ISAPnP: device is active */
+       int             ro;             /* ISAPnP: read only */
+       unsigned short  regs;           /* ISAPnP: supported registers */
+
+       int (*prepare)(struct pci_dev *dev);    /* ISAPnP hooks */
+       int (*activate)(struct pci_dev *dev);
+       int (*deactivate)(struct pci_dev *dev);
+};
+
+#define pci_dev_g(n) list_entry(n, struct pci_dev, global_list)
+#define pci_dev_b(n) list_entry(n, struct pci_dev, bus_list)
+
+/*
+ *  For PCI devices, the region numbers are assigned this way:
+ *
+ *     0-5     standard PCI regions
+ *     6       expansion ROM
+ *     7-10    bridges: address space assigned to buses behind the bridge
+ */
+
+#define PCI_ROM_RESOURCE 6
+#define PCI_BRIDGE_RESOURCES 7
+#define PCI_NUM_RESOURCES 11
+  
+#define PCI_REGION_FLAG_MASK 0x0fU     /* These bits of resource flags tell us the PCI region flags */
+
+struct pci_bus {
+       struct list_head node;          /* node in list of buses */
+       struct pci_bus  *parent;        /* parent bus this bridge is on */
+       struct list_head children;      /* list of child buses */
+       struct list_head devices;       /* list of devices on this bus */
+       struct pci_dev  *self;          /* bridge device as seen by parent */
+       struct resource *resource[4];   /* address space routed to this bus */
+
+       struct pci_ops  *ops;           /* configuration access functions */
+       void            *sysdata;       /* hook for sys-specific extension */
+       struct proc_dir_entry *procdir; /* directory entry in /proc/bus/pci */
+
+       unsigned char   number;         /* bus number */
+       unsigned char   primary;        /* number of primary bridge */
+       unsigned char   secondary;      /* number of secondary bridge */
+       unsigned char   subordinate;    /* max number of subordinate buses */
+
+       char            name[48];
+       unsigned short  vendor;
+       unsigned short  device;
+       unsigned int    serial;         /* serial number */
+       unsigned char   pnpver;         /* Plug & Play version */
+       unsigned char   productver;     /* product version */
+       unsigned char   checksum;       /* if zero - checksum passed */
+       unsigned char   pad1;
+};
+
+#define pci_bus_b(n) list_entry(n, struct pci_bus, node)
+
+extern struct list_head pci_root_buses;        /* list of all known PCI buses */
+extern struct list_head pci_devices;   /* list of all devices */
+
+/*
+ * Error values that may be returned by PCI functions.
+ */
+#define PCIBIOS_SUCCESSFUL             0x00
+#define PCIBIOS_FUNC_NOT_SUPPORTED     0x81
+#define PCIBIOS_BAD_VENDOR_ID          0x83
+#define PCIBIOS_DEVICE_NOT_FOUND       0x86
+#define PCIBIOS_BAD_REGISTER_NUMBER    0x87
+#define PCIBIOS_SET_FAILED             0x88
+#define PCIBIOS_BUFFER_TOO_SMALL       0x89
+
+/* Low-level architecture-dependent routines */
+
+struct pci_ops {
+       int (*read_byte)(struct pci_dev *, int where, u8 *val);
+       int (*read_word)(struct pci_dev *, int where, u16 *val);
+       int (*read_dword)(struct pci_dev *, int where, u32 *val);
+       int (*write_byte)(struct pci_dev *, int where, u8 val);
+       int (*write_word)(struct pci_dev *, int where, u16 val);
+       int (*write_dword)(struct pci_dev *, int where, u32 val);
+};
+
+struct pbus_set_ranges_data
+{
+       int found_vga;
+       unsigned long io_start, io_end;
+       unsigned long mem_start, mem_end;
+};
+
+struct pci_device_id {
+       unsigned int vendor, device;            /* Vendor and device ID or PCI_ANY_ID */
+       unsigned int subvendor, subdevice;      /* Subsystem ID's or PCI_ANY_ID */
+       unsigned int class, class_mask;         /* (class,subclass,prog-if) triplet */
+       unsigned long driver_data;              /* Data private to the driver */
+};
+
+struct pci_driver {
+       struct list_head node;
+       char *name;
+       const struct pci_device_id *id_table;   /* NULL if wants all devices */
+       int  (*probe)  (struct pci_dev *dev, const struct pci_device_id *id);   /* New device inserted */
+       void (*remove) (struct pci_dev *dev);   /* Device removed (NULL if not a hot-plug capable driver) */
+       int  (*save_state) (struct pci_dev *dev, u32 state);    /* Save Device Context */
+       int  (*suspend)(struct pci_dev *dev, u32 state);        /* Device suspended */
+       int  (*resume) (struct pci_dev *dev);                   /* Device woken up */
+       int  (*enable_wake) (struct pci_dev *dev, u32 state, int enable);   /* Enable wake event */
+};
+
+
+/* these external functions are only available when PCI support is enabled */
+#ifdef CONFIG_PCI
+
+#define pci_for_each_dev(dev) \
+       for(dev = pci_dev_g(pci_devices.next); dev != pci_dev_g(&pci_devices); dev = pci_dev_g(dev->global_list.next))
+
+void pcibios_init(void);
+void pcibios_fixup_bus(struct pci_bus *);
+int pcibios_enable_device(struct pci_dev *);
+char *pcibios_setup (char *str);
+
+/* Used only when drivers/pci/setup.c is used */
+void pcibios_align_resource(void *, struct resource *, unsigned long);
+void pcibios_update_resource(struct pci_dev *, struct resource *,
+                            struct resource *, int);
+void pcibios_update_irq(struct pci_dev *, int irq);
+void pcibios_fixup_pbus_ranges(struct pci_bus *, struct pbus_set_ranges_data *);
+
+/* Backward compatibility, don't use in new code! */
+
+int pcibios_present(void);
+int pcibios_read_config_byte (unsigned char bus, unsigned char dev_fn,
+                             unsigned char where, unsigned char *val);
+int pcibios_read_config_word (unsigned char bus, unsigned char dev_fn,
+                             unsigned char where, unsigned short *val);
+int pcibios_read_config_dword (unsigned char bus, unsigned char dev_fn,
+                              unsigned char where, unsigned int *val);
+int pcibios_write_config_byte (unsigned char bus, unsigned char dev_fn,
+                              unsigned char where, unsigned char val);
+int pcibios_write_config_word (unsigned char bus, unsigned char dev_fn,
+                              unsigned char where, unsigned short val);
+int pcibios_write_config_dword (unsigned char bus, unsigned char dev_fn,
+                               unsigned char where, unsigned int val);
+int pcibios_find_class (unsigned int class_code, unsigned short index, unsigned char *bus, unsigned char *dev_fn);
+int pcibios_find_device (unsigned short vendor, unsigned short dev_id,
+                        unsigned short index, unsigned char *bus,
+                        unsigned char *dev_fn);
+
+/* Generic PCI functions used internally */
+
+void pci_init(void);
+int pci_bus_exists(const struct list_head *list, int nr);
+struct pci_bus *pci_scan_bus(int bus, struct pci_ops *ops, void *sysdata);
+struct pci_bus *pci_alloc_primary_bus(int bus);
+struct pci_dev *pci_scan_slot(struct pci_dev *temp);
+int pci_proc_attach_device(struct pci_dev *dev);
+int pci_proc_detach_device(struct pci_dev *dev);
+int pci_proc_attach_bus(struct pci_bus *bus);
+int pci_proc_detach_bus(struct pci_bus *bus);
+void pci_name_device(struct pci_dev *dev);
+char *pci_class_name(u32 class);
+void pci_read_bridge_bases(struct pci_bus *child);
+struct resource *pci_find_parent_resource(const struct pci_dev *dev, struct resource *res);
+int pci_setup_device(struct pci_dev *dev);
+int pci_get_interrupt_pin(struct pci_dev *dev, struct pci_dev **bridge);
+
+/* Generic PCI functions exported to card drivers */
+
+struct pci_dev *pci_find_device (unsigned int vendor, unsigned int device, const struct pci_dev *from);
+struct pci_dev *pci_find_subsys (unsigned int vendor, unsigned int device,
+                                unsigned int ss_vendor, unsigned int ss_device,
+                                const struct pci_dev *from);
+struct pci_dev *pci_find_class (unsigned int class, const struct pci_dev *from);
+struct pci_dev *pci_find_slot (unsigned int bus, unsigned int devfn);
+int pci_find_capability (struct pci_dev *dev, int cap);
+
+int pci_read_config_byte(struct pci_dev *dev, int where, u8 *val);
+int pci_read_config_word(struct pci_dev *dev, int where, u16 *val);
+int pci_read_config_dword(struct pci_dev *dev, int where, u32 *val);
+int pci_write_config_byte(struct pci_dev *dev, int where, u8 val);
+int pci_write_config_word(struct pci_dev *dev, int where, u16 val);
+int pci_write_config_dword(struct pci_dev *dev, int where, u32 val);
+
+int pci_enable_device(struct pci_dev *dev);
+void pci_disable_device(struct pci_dev *dev);
+void pci_set_master(struct pci_dev *dev);
+int pci_set_dma_mask(struct pci_dev *dev, u64 mask);
+int pci_dac_set_dma_mask(struct pci_dev *dev, u64 mask);
+int pci_assign_resource(struct pci_dev *dev, int i);
+
+/* Power management related routines */
+int pci_save_state(struct pci_dev *dev, u32 *buffer);
+int pci_restore_state(struct pci_dev *dev, u32 *buffer);
+int pci_set_power_state(struct pci_dev *dev, int state);
+int pci_enable_wake(struct pci_dev *dev, u32 state, int enable);
+
+/* Helper functions for low-level code (drivers/pci/setup-[bus,res].c) */
+
+int pci_claim_resource(struct pci_dev *, int);
+void pci_assign_unassigned_resources(void);
+void pdev_enable_device(struct pci_dev *);
+void pdev_sort_resources(struct pci_dev *, struct resource_list *, u32);
+unsigned long pci_bridge_check_io(struct pci_dev *);
+void pci_fixup_irqs(u8 (*)(struct pci_dev *, u8 *),
+                   int (*)(struct pci_dev *, u8, u8));
+#define HAVE_PCI_REQ_REGIONS
+int pci_request_regions(struct pci_dev *, char *);
+void pci_release_regions(struct pci_dev *);
+
+/* New-style probing supporting hot-pluggable devices */
+int pci_register_driver(struct pci_driver *);
+void pci_unregister_driver(struct pci_driver *);
+void pci_insert_device(struct pci_dev *, struct pci_bus *);
+void pci_remove_device(struct pci_dev *);
+struct pci_driver *pci_dev_driver(const struct pci_dev *);
+const struct pci_device_id *pci_match_device(const struct pci_device_id *ids, const struct pci_dev *dev);
+void pci_announce_device_to_drivers(struct pci_dev *);
+unsigned int pci_do_scan_bus(struct pci_bus *bus);
+struct pci_bus * pci_add_new_bus(struct pci_bus *parent, struct pci_dev *dev, int busnr);
+
+#if 0
+/* kmem_cache style wrapper around pci_alloc_consistent() */
+struct pci_pool *pci_pool_create (const char *name, struct pci_dev *dev,
+               size_t size, size_t align, size_t allocation, int flags);
+void pci_pool_destroy (struct pci_pool *pool);
+
+void *pci_pool_alloc (struct pci_pool *pool, int flags, dma_addr_t *handle);
+void pci_pool_free (struct pci_pool *pool, void *vaddr, dma_addr_t addr);
+#endif
+
+#endif /* CONFIG_PCI */
+
+/* Include architecture-dependent settings and functions */
+
+#include <asm/pci.h>
+
+/*
+ *  If the system does not have PCI, clearly these return errors.  Define
+ *  these as simple inline functions to avoid hair in drivers.
+ */
+
+#ifndef CONFIG_PCI
+static inline int pcibios_present(void) { return 0; }
+static inline int pcibios_find_class (unsigned int class_code, unsigned short index, unsigned char *bus, unsigned char *dev_fn) 
+{      return PCIBIOS_DEVICE_NOT_FOUND; }
+
+#define _PCI_NOP(o,s,t) \
+       static inline int pcibios_##o##_config_##s (u8 bus, u8 dfn, u8 where, t val) \
+               { return PCIBIOS_FUNC_NOT_SUPPORTED; } \
+       static inline int pci_##o##_config_##s (struct pci_dev *dev, int where, t val) \
+               { return PCIBIOS_FUNC_NOT_SUPPORTED; }
+#define _PCI_NOP_ALL(o,x)      _PCI_NOP(o,byte,u8 x) \
+                               _PCI_NOP(o,word,u16 x) \
+                               _PCI_NOP(o,dword,u32 x)
+_PCI_NOP_ALL(read, *)
+_PCI_NOP_ALL(write,)
+
+static inline struct pci_dev *pci_find_device(unsigned int vendor, unsigned int device, const struct pci_dev *from)
+{ return NULL; }
+
+static inline struct pci_dev *pci_find_class(unsigned int class, const struct pci_dev *from)
+{ return NULL; }
+
+static inline struct pci_dev *pci_find_slot(unsigned int bus, unsigned int devfn)
+{ return NULL; }
+
+static inline struct pci_dev *pci_find_subsys(unsigned int vendor, unsigned int device,
+unsigned int ss_vendor, unsigned int ss_device, const struct pci_dev *from)
+{ return NULL; }
+
+static inline void pci_set_master(struct pci_dev *dev) { }
+static inline int pci_enable_device(struct pci_dev *dev) { return -EIO; }
+static inline void pci_disable_device(struct pci_dev *dev) { }
+static inline int pci_module_init(struct pci_driver *drv) { return -ENODEV; }
+static inline int pci_set_dma_mask(struct pci_dev *dev, u64 mask) { return -EIO; }
+static inline int pci_dac_set_dma_mask(struct pci_dev *dev, u64 mask) { return -EIO; }
+static inline int pci_assign_resource(struct pci_dev *dev, int i) { return -EBUSY;}
+static inline int pci_register_driver(struct pci_driver *drv) { return 0;}
+static inline void pci_unregister_driver(struct pci_driver *drv) { }
+static inline int scsi_to_pci_dma_dir(unsigned char scsi_dir) { return scsi_dir; }
+static inline int pci_find_capability (struct pci_dev *dev, int cap) {return 0; }
+static inline const struct pci_device_id *pci_match_device(const struct pci_device_id *ids, const struct pci_dev *dev) { return NULL; }
+
+/* Power management related routines */
+static inline int pci_save_state(struct pci_dev *dev, u32 *buffer) { return 0; }
+static inline int pci_restore_state(struct pci_dev *dev, u32 *buffer) { return 0; }
+static inline int pci_set_power_state(struct pci_dev *dev, int state) { return 0; }
+static inline int pci_enable_wake(struct pci_dev *dev, u32 state, int enable) { return 0; }
+
+#define pci_for_each_dev(dev) \
+       for(dev = NULL; 0; )
+
+#else
+
+/*
+ * a helper function which helps ensure correct pci_driver
+ * setup and cleanup for commonly-encountered hotplug/modular cases
+ *
+ * This MUST stay in a header, as it checks for -DMODULE
+ */
+static inline int pci_module_init(struct pci_driver *drv)
+{
+       int rc = pci_register_driver (drv);
+
+       if (rc > 0)
+               return 0;
+
+       /* iff CONFIG_HOTPLUG and built into kernel, we should
+        * leave the driver around for future hotplug events.
+        * For the module case, a hotplug daemon of some sort
+        * should load a module in response to an insert event. */
+#if defined(CONFIG_HOTPLUG) && !defined(MODULE)
+       if (rc == 0)
+               return 0;
+#else
+       if (rc == 0)
+               rc = -ENODEV;           
+#endif
+
+       /* if we get here, we need to clean up pci driver instance
+        * and return some sort of error */
+       pci_unregister_driver (drv);
+       
+       return rc;
+}
+
+#endif /* !CONFIG_PCI */
+
+/* these helpers provide future and backwards compatibility
+ * for accessing popular PCI BAR info */
+#define pci_resource_start(dev,bar)   ((dev)->resource[(bar)].start)
+#define pci_resource_end(dev,bar)     ((dev)->resource[(bar)].end)
+#define pci_resource_flags(dev,bar)   ((dev)->resource[(bar)].flags)
+#define pci_resource_len(dev,bar) \
+       ((pci_resource_start((dev),(bar)) == 0 &&       \
+         pci_resource_end((dev),(bar)) ==              \
+         pci_resource_start((dev),(bar))) ? 0 :        \
+                                                       \
+        (pci_resource_end((dev),(bar)) -               \
+         pci_resource_start((dev),(bar)) + 1))
+
+/* Similar to the helpers above, these manipulate per-pci_dev
+ * driver-specific data.  Currently stored as pci_dev::driver_data,
+ * a void pointer, but it is not present on older kernels.
+ */
+static inline void *pci_get_drvdata (struct pci_dev *pdev)
+{
+       return pdev->driver_data;
+}
+
+static inline void pci_set_drvdata (struct pci_dev *pdev, void *data)
+{
+       pdev->driver_data = data;
+}
+
+/*
+ *  The world is not perfect and supplies us with broken PCI devices.
+ *  For at least a part of these bugs we need a work-around, so both
+ *  generic (drivers/pci/quirks.c) and per-architecture code can define
+ *  fixup hooks to be called for particular buggy devices.
+ */
+
+struct pci_fixup {
+       int pass;
+       u16 vendor, device;                     /* You can use PCI_ANY_ID here of course */
+       void (*hook)(struct pci_dev *dev);
+};
+
+extern struct pci_fixup pcibios_fixups[];
+
+#define PCI_FIXUP_HEADER       1               /* Called immediately after reading configuration header */
+#define PCI_FIXUP_FINAL                2               /* Final phase of device fixups */
+
+void pci_fixup_device(int pass, struct pci_dev *dev);
+
+extern int pci_pci_problems;
+#define PCIPCI_FAIL            1
+#define PCIPCI_TRITON          2
+#define PCIPCI_NATOMA          4
+#define PCIPCI_VIAETBF         8
+#define PCIPCI_VSFX            16
+
+#endif /* __KERNEL__ */
+#endif /* LINUX_PCI_H */
diff --git a/xen-2.4.16/include/xeno/pci_ids.h b/xen-2.4.16/include/xeno/pci_ids.h
new file mode 100644 (file)
index 0000000..4149c50
--- /dev/null
@@ -0,0 +1,1803 @@
+/*
+ *     PCI Class, Vendor and Device IDs
+ *
+ *     Please keep sorted.
+ */
+
+/* Device classes and subclasses */
+
+#define PCI_CLASS_NOT_DEFINED          0x0000
+#define PCI_CLASS_NOT_DEFINED_VGA      0x0001
+
+#define PCI_BASE_CLASS_STORAGE         0x01
+#define PCI_CLASS_STORAGE_SCSI         0x0100
+#define PCI_CLASS_STORAGE_IDE          0x0101
+#define PCI_CLASS_STORAGE_FLOPPY       0x0102
+#define PCI_CLASS_STORAGE_IPI          0x0103
+#define PCI_CLASS_STORAGE_RAID         0x0104
+#define PCI_CLASS_STORAGE_OTHER                0x0180
+
+#define PCI_BASE_CLASS_NETWORK         0x02
+#define PCI_CLASS_NETWORK_ETHERNET     0x0200
+#define PCI_CLASS_NETWORK_TOKEN_RING   0x0201
+#define PCI_CLASS_NETWORK_FDDI         0x0202
+#define PCI_CLASS_NETWORK_ATM          0x0203
+#define PCI_CLASS_NETWORK_OTHER                0x0280
+
+#define PCI_BASE_CLASS_DISPLAY         0x03
+#define PCI_CLASS_DISPLAY_VGA          0x0300
+#define PCI_CLASS_DISPLAY_XGA          0x0301
+#define PCI_CLASS_DISPLAY_3D           0x0302
+#define PCI_CLASS_DISPLAY_OTHER                0x0380
+
+#define PCI_BASE_CLASS_MULTIMEDIA      0x04
+#define PCI_CLASS_MULTIMEDIA_VIDEO     0x0400
+#define PCI_CLASS_MULTIMEDIA_AUDIO     0x0401
+#define PCI_CLASS_MULTIMEDIA_PHONE     0x0402
+#define PCI_CLASS_MULTIMEDIA_OTHER     0x0480
+
+#define PCI_BASE_CLASS_MEMORY          0x05
+#define PCI_CLASS_MEMORY_RAM           0x0500
+#define PCI_CLASS_MEMORY_FLASH         0x0501
+#define PCI_CLASS_MEMORY_OTHER         0x0580
+
+#define PCI_BASE_CLASS_BRIDGE          0x06
+#define PCI_CLASS_BRIDGE_HOST          0x0600
+#define PCI_CLASS_BRIDGE_ISA           0x0601
+#define PCI_CLASS_BRIDGE_EISA          0x0602
+#define PCI_CLASS_BRIDGE_MC            0x0603
+#define PCI_CLASS_BRIDGE_PCI           0x0604
+#define PCI_CLASS_BRIDGE_PCMCIA                0x0605
+#define PCI_CLASS_BRIDGE_NUBUS         0x0606
+#define PCI_CLASS_BRIDGE_CARDBUS       0x0607
+#define PCI_CLASS_BRIDGE_RACEWAY       0x0608
+#define PCI_CLASS_BRIDGE_OTHER         0x0680
+
+#define PCI_BASE_CLASS_COMMUNICATION   0x07
+#define PCI_CLASS_COMMUNICATION_SERIAL 0x0700
+#define PCI_CLASS_COMMUNICATION_PARALLEL 0x0701
+#define PCI_CLASS_COMMUNICATION_MULTISERIAL 0x0702
+#define PCI_CLASS_COMMUNICATION_MODEM  0x0703
+#define PCI_CLASS_COMMUNICATION_OTHER  0x0780
+
+#define PCI_BASE_CLASS_SYSTEM          0x08
+#define PCI_CLASS_SYSTEM_PIC           0x0800
+#define PCI_CLASS_SYSTEM_DMA           0x0801
+#define PCI_CLASS_SYSTEM_TIMER         0x0802
+#define PCI_CLASS_SYSTEM_RTC           0x0803
+#define PCI_CLASS_SYSTEM_PCI_HOTPLUG   0x0804
+#define PCI_CLASS_SYSTEM_OTHER         0x0880
+
+#define PCI_BASE_CLASS_INPUT           0x09
+#define PCI_CLASS_INPUT_KEYBOARD       0x0900
+#define PCI_CLASS_INPUT_PEN            0x0901
+#define PCI_CLASS_INPUT_MOUSE          0x0902
+#define PCI_CLASS_INPUT_SCANNER                0x0903
+#define PCI_CLASS_INPUT_GAMEPORT       0x0904
+#define PCI_CLASS_INPUT_OTHER          0x0980
+
+#define PCI_BASE_CLASS_DOCKING         0x0a
+#define PCI_CLASS_DOCKING_GENERIC      0x0a00
+#define PCI_CLASS_DOCKING_OTHER                0x0a80
+
+#define PCI_BASE_CLASS_PROCESSOR       0x0b
+#define PCI_CLASS_PROCESSOR_386                0x0b00
+#define PCI_CLASS_PROCESSOR_486                0x0b01
+#define PCI_CLASS_PROCESSOR_PENTIUM    0x0b02
+#define PCI_CLASS_PROCESSOR_ALPHA      0x0b10
+#define PCI_CLASS_PROCESSOR_POWERPC    0x0b20
+#define PCI_CLASS_PROCESSOR_MIPS       0x0b30
+#define PCI_CLASS_PROCESSOR_CO         0x0b40
+
+#define PCI_BASE_CLASS_SERIAL          0x0c
+#define PCI_CLASS_SERIAL_FIREWIRE      0x0c00
+#define PCI_CLASS_SERIAL_ACCESS                0x0c01
+#define PCI_CLASS_SERIAL_SSA           0x0c02
+#define PCI_CLASS_SERIAL_USB           0x0c03
+#define PCI_CLASS_SERIAL_FIBER         0x0c04
+#define PCI_CLASS_SERIAL_SMBUS         0x0c05
+
+#define PCI_BASE_CLASS_INTELLIGENT     0x0e
+#define PCI_CLASS_INTELLIGENT_I2O      0x0e00
+
+#define PCI_BASE_CLASS_SATELLITE       0x0f
+#define PCI_CLASS_SATELLITE_TV         0x0f00
+#define PCI_CLASS_SATELLITE_AUDIO      0x0f01
+#define PCI_CLASS_SATELLITE_VOICE      0x0f03
+#define PCI_CLASS_SATELLITE_DATA       0x0f04
+
+#define PCI_BASE_CLASS_CRYPT           0x10
+#define PCI_CLASS_CRYPT_NETWORK                0x1000
+#define PCI_CLASS_CRYPT_ENTERTAINMENT  0x1001
+#define PCI_CLASS_CRYPT_OTHER          0x1080
+
+#define PCI_BASE_CLASS_SIGNAL_PROCESSING 0x11
+#define PCI_CLASS_SP_DPIO              0x1100
+#define PCI_CLASS_SP_OTHER             0x1180
+
+#define PCI_CLASS_OTHERS               0xff
+
+/* Vendors and devices.  Sort key: vendor first, device next. */
+
+#define PCI_VENDOR_ID_DYNALINK         0x0675
+#define PCI_DEVICE_ID_DYNALINK_IS64PH  0x1702
+
+#define PCI_VENDOR_ID_BERKOM                   0x0871
+#define PCI_DEVICE_ID_BERKOM_A1T               0xffa1
+#define PCI_DEVICE_ID_BERKOM_T_CONCEPT         0xffa2
+#define PCI_DEVICE_ID_BERKOM_A4T               0xffa4
+#define PCI_DEVICE_ID_BERKOM_SCITEL_QUADRO     0xffa8
+
+#define PCI_VENDOR_ID_COMPAQ           0x0e11
+#define PCI_DEVICE_ID_COMPAQ_TOKENRING 0x0508
+#define PCI_DEVICE_ID_COMPAQ_1280      0x3033
+#define PCI_DEVICE_ID_COMPAQ_TRIFLEX   0x4000
+#define PCI_DEVICE_ID_COMPAQ_6010      0x6010
+#define PCI_DEVICE_ID_COMPAQ_TACHYON   0xa0fc
+#define PCI_DEVICE_ID_COMPAQ_SMART2P   0xae10
+#define PCI_DEVICE_ID_COMPAQ_NETEL100  0xae32
+#define PCI_DEVICE_ID_COMPAQ_NETEL10   0xae34
+#define PCI_DEVICE_ID_COMPAQ_NETFLEX3I 0xae35
+#define PCI_DEVICE_ID_COMPAQ_NETEL100D 0xae40
+#define PCI_DEVICE_ID_COMPAQ_NETEL100PI        0xae43
+#define PCI_DEVICE_ID_COMPAQ_NETEL100I 0xb011
+#define PCI_DEVICE_ID_COMPAQ_CISS      0xb060
+#define PCI_DEVICE_ID_COMPAQ_CISSB     0xb178
+#define PCI_DEVICE_ID_COMPAQ_THUNDER   0xf130
+#define PCI_DEVICE_ID_COMPAQ_NETFLEX3B 0xf150
+
+#define PCI_VENDOR_ID_NCR              0x1000
+#define PCI_VENDOR_ID_LSI_LOGIC                0x1000
+#define PCI_DEVICE_ID_NCR_53C810       0x0001
+#define PCI_DEVICE_ID_NCR_53C820       0x0002
+#define PCI_DEVICE_ID_NCR_53C825       0x0003
+#define PCI_DEVICE_ID_NCR_53C815       0x0004
+#define PCI_DEVICE_ID_LSI_53C810AP     0x0005
+#define PCI_DEVICE_ID_NCR_53C860       0x0006
+#define PCI_DEVICE_ID_LSI_53C1510      0x000a
+#define PCI_DEVICE_ID_NCR_53C896       0x000b
+#define PCI_DEVICE_ID_NCR_53C895       0x000c
+#define PCI_DEVICE_ID_NCR_53C885       0x000d
+#define PCI_DEVICE_ID_NCR_53C875       0x000f
+#define PCI_DEVICE_ID_NCR_53C1510      0x0010
+#define PCI_DEVICE_ID_LSI_53C895A      0x0012
+#define PCI_DEVICE_ID_LSI_53C875A      0x0013
+#define PCI_DEVICE_ID_LSI_53C1010_33   0x0020
+#define PCI_DEVICE_ID_LSI_53C1010_66   0x0021
+#define PCI_DEVICE_ID_LSI_53C1030      0x0030
+#define PCI_DEVICE_ID_LSI_53C1035      0x0040
+#define PCI_DEVICE_ID_NCR_53C875J      0x008f
+#define PCI_DEVICE_ID_LSI_FC909                0x0621
+#define PCI_DEVICE_ID_LSI_FC929                0x0622
+#define PCI_DEVICE_ID_LSI_FC929_LAN    0x0623
+#define PCI_DEVICE_ID_LSI_FC919                0x0624
+#define PCI_DEVICE_ID_LSI_FC919_LAN    0x0625
+#define PCI_DEVICE_ID_NCR_YELLOWFIN    0x0701
+#define PCI_DEVICE_ID_LSI_61C102       0x0901
+#define PCI_DEVICE_ID_LSI_63C815       0x1000
+
+#define PCI_VENDOR_ID_ATI              0x1002
+/* Mach64 */
+#define PCI_DEVICE_ID_ATI_68800                0x4158
+#define PCI_DEVICE_ID_ATI_215CT222     0x4354
+#define PCI_DEVICE_ID_ATI_210888CX     0x4358
+#define PCI_DEVICE_ID_ATI_215ET222     0x4554
+/* Mach64 / Rage */
+#define PCI_DEVICE_ID_ATI_215GB                0x4742
+#define PCI_DEVICE_ID_ATI_215GD                0x4744
+#define PCI_DEVICE_ID_ATI_215GI                0x4749
+#define PCI_DEVICE_ID_ATI_215GP                0x4750
+#define PCI_DEVICE_ID_ATI_215GQ                0x4751
+#define PCI_DEVICE_ID_ATI_215XL                0x4752
+#define PCI_DEVICE_ID_ATI_215GT                0x4754
+#define PCI_DEVICE_ID_ATI_215GTB       0x4755
+#define PCI_DEVICE_ID_ATI_215_IV       0x4756
+#define PCI_DEVICE_ID_ATI_215_IW       0x4757
+#define PCI_DEVICE_ID_ATI_215_IZ       0x475A
+#define PCI_DEVICE_ID_ATI_210888GX     0x4758
+#define PCI_DEVICE_ID_ATI_215_LB       0x4c42
+#define PCI_DEVICE_ID_ATI_215_LD       0x4c44
+#define PCI_DEVICE_ID_ATI_215_LG       0x4c47
+#define PCI_DEVICE_ID_ATI_215_LI       0x4c49
+#define PCI_DEVICE_ID_ATI_215_LM       0x4c4D
+#define PCI_DEVICE_ID_ATI_215_LN       0x4c4E
+#define PCI_DEVICE_ID_ATI_215_LR       0x4c52
+#define PCI_DEVICE_ID_ATI_215_LS       0x4c53
+#define PCI_DEVICE_ID_ATI_264_LT       0x4c54
+/* Mach64 VT */
+#define PCI_DEVICE_ID_ATI_264VT                0x5654
+#define PCI_DEVICE_ID_ATI_264VU                0x5655
+#define PCI_DEVICE_ID_ATI_264VV                0x5656
+/* Rage128 Pro GL */
+#define PCI_DEVICE_ID_ATI_Rage128_PA   0x5041
+#define PCI_DEVICE_ID_ATI_Rage128_PB   0x5042
+#define PCI_DEVICE_ID_ATI_Rage128_PC   0x5043
+#define PCI_DEVICE_ID_ATI_Rage128_PD   0x5044
+#define PCI_DEVICE_ID_ATI_Rage128_PE   0x5045
+#define PCI_DEVICE_ID_ATI_RAGE128_PF   0x5046
+/* Rage128 Pro VR */
+#define PCI_DEVICE_ID_ATI_RAGE128_PG   0x5047
+#define PCI_DEVICE_ID_ATI_RAGE128_PH   0x5048
+#define PCI_DEVICE_ID_ATI_RAGE128_PI   0x5049
+#define PCI_DEVICE_ID_ATI_RAGE128_PJ   0x504A
+#define PCI_DEVICE_ID_ATI_RAGE128_PK   0x504B
+#define PCI_DEVICE_ID_ATI_RAGE128_PL   0x504C
+#define PCI_DEVICE_ID_ATI_RAGE128_PM   0x504D
+#define PCI_DEVICE_ID_ATI_RAGE128_PN   0x504E
+#define PCI_DEVICE_ID_ATI_RAGE128_PO   0x504F
+#define PCI_DEVICE_ID_ATI_RAGE128_PP   0x5050
+#define PCI_DEVICE_ID_ATI_RAGE128_PQ   0x5051
+#define PCI_DEVICE_ID_ATI_RAGE128_PR   0x5052
+#define PCI_DEVICE_ID_ATI_RAGE128_TR   0x5452
+#define PCI_DEVICE_ID_ATI_RAGE128_PS   0x5053
+#define PCI_DEVICE_ID_ATI_RAGE128_PT   0x5054
+#define PCI_DEVICE_ID_ATI_RAGE128_PU   0x5055
+#define PCI_DEVICE_ID_ATI_RAGE128_PV   0x5056
+#define PCI_DEVICE_ID_ATI_RAGE128_PW   0x5057
+#define PCI_DEVICE_ID_ATI_RAGE128_PX   0x5058
+/* Rage128 GL */
+#define PCI_DEVICE_ID_ATI_RAGE128_RE   0x5245
+#define PCI_DEVICE_ID_ATI_RAGE128_RF   0x5246
+#define PCI_DEVICE_ID_ATI_RAGE128_RG   0x534b
+#define PCI_DEVICE_ID_ATI_RAGE128_RH   0x534c
+#define PCI_DEVICE_ID_ATI_RAGE128_RI   0x534d
+/* Rage128 VR */
+#define PCI_DEVICE_ID_ATI_RAGE128_RK   0x524b
+#define PCI_DEVICE_ID_ATI_RAGE128_RL   0x524c
+#define PCI_DEVICE_ID_ATI_RAGE128_RM   0x5345
+#define PCI_DEVICE_ID_ATI_RAGE128_RN   0x5346
+#define PCI_DEVICE_ID_ATI_RAGE128_RO   0x5347
+/* Rage128 M3 */
+#define PCI_DEVICE_ID_ATI_RAGE128_LE   0x4c45
+#define PCI_DEVICE_ID_ATI_RAGE128_LF   0x4c46
+/* Rage128 Pro Ultra */
+#define PCI_DEVICE_ID_ATI_RAGE128_U1   0x5446
+#define PCI_DEVICE_ID_ATI_RAGE128_U2   0x544C
+#define PCI_DEVICE_ID_ATI_RAGE128_U3   0x5452
+/* Radeon M4 */
+#define PCI_DEVICE_ID_ATI_RADEON_LE    0x4d45
+#define PCI_DEVICE_ID_ATI_RADEON_LF    0x4d46
+/* Radeon NV-100 */
+#define PCI_DEVICE_ID_ATI_RADEON_N1    0x5159
+#define PCI_DEVICE_ID_ATI_RADEON_N2    0x515a
+/* Radeon */
+#define PCI_DEVICE_ID_ATI_RADEON_RA    0x5144
+#define PCI_DEVICE_ID_ATI_RADEON_RB    0x5145
+#define PCI_DEVICE_ID_ATI_RADEON_RC    0x5146
+#define PCI_DEVICE_ID_ATI_RADEON_RD    0x5147
+
+#define PCI_VENDOR_ID_VLSI             0x1004
+#define PCI_DEVICE_ID_VLSI_82C592      0x0005
+#define PCI_DEVICE_ID_VLSI_82C593      0x0006
+#define PCI_DEVICE_ID_VLSI_82C594      0x0007
+#define PCI_DEVICE_ID_VLSI_82C597      0x0009
+#define PCI_DEVICE_ID_VLSI_82C541      0x000c
+#define PCI_DEVICE_ID_VLSI_82C543      0x000d
+#define PCI_DEVICE_ID_VLSI_82C532      0x0101
+#define PCI_DEVICE_ID_VLSI_82C534      0x0102
+#define PCI_DEVICE_ID_VLSI_82C535      0x0104
+#define PCI_DEVICE_ID_VLSI_82C147      0x0105
+#define PCI_DEVICE_ID_VLSI_VAS96011    0x0702
+
+#define PCI_VENDOR_ID_ADL              0x1005
+#define PCI_DEVICE_ID_ADL_2301         0x2301
+
+#define PCI_VENDOR_ID_NS               0x100b
+#define PCI_DEVICE_ID_NS_87415         0x0002
+#define PCI_DEVICE_ID_NS_87560_LIO     0x000e
+#define PCI_DEVICE_ID_NS_87560_USB     0x0012
+#define PCI_DEVICE_ID_NS_83815         0x0020
+#define PCI_DEVICE_ID_NS_83820         0x0022
+#define PCI_DEVICE_ID_NS_87410         0xd001
+
+#define PCI_VENDOR_ID_TSENG            0x100c
+#define PCI_DEVICE_ID_TSENG_W32P_2     0x3202
+#define PCI_DEVICE_ID_TSENG_W32P_b     0x3205
+#define PCI_DEVICE_ID_TSENG_W32P_c     0x3206
+#define PCI_DEVICE_ID_TSENG_W32P_d     0x3207
+#define PCI_DEVICE_ID_TSENG_ET6000     0x3208
+
+#define PCI_VENDOR_ID_WEITEK           0x100e
+#define PCI_DEVICE_ID_WEITEK_P9000     0x9001
+#define PCI_DEVICE_ID_WEITEK_P9100     0x9100
+
+#define PCI_VENDOR_ID_DEC              0x1011
+#define PCI_DEVICE_ID_DEC_BRD          0x0001
+#define PCI_DEVICE_ID_DEC_TULIP                0x0002
+#define PCI_DEVICE_ID_DEC_TGA          0x0004
+#define PCI_DEVICE_ID_DEC_TULIP_FAST   0x0009
+#define PCI_DEVICE_ID_DEC_TGA2         0x000D
+#define PCI_DEVICE_ID_DEC_FDDI         0x000F
+#define PCI_DEVICE_ID_DEC_TULIP_PLUS   0x0014
+#define PCI_DEVICE_ID_DEC_21142                0x0019
+#define PCI_DEVICE_ID_DEC_21052                0x0021
+#define PCI_DEVICE_ID_DEC_21150                0x0022
+#define PCI_DEVICE_ID_DEC_21152                0x0024
+#define PCI_DEVICE_ID_DEC_21153                0x0025
+#define PCI_DEVICE_ID_DEC_21154                0x0026
+#define PCI_DEVICE_ID_DEC_21285                0x1065
+#define PCI_DEVICE_ID_COMPAQ_42XX      0x0046
+
+#define PCI_VENDOR_ID_CIRRUS           0x1013
+#define PCI_DEVICE_ID_CIRRUS_7548      0x0038
+#define PCI_DEVICE_ID_CIRRUS_5430      0x00a0
+#define PCI_DEVICE_ID_CIRRUS_5434_4    0x00a4
+#define PCI_DEVICE_ID_CIRRUS_5434_8    0x00a8
+#define PCI_DEVICE_ID_CIRRUS_5436      0x00ac
+#define PCI_DEVICE_ID_CIRRUS_5446      0x00b8
+#define PCI_DEVICE_ID_CIRRUS_5480      0x00bc
+#define PCI_DEVICE_ID_CIRRUS_5462      0x00d0
+#define PCI_DEVICE_ID_CIRRUS_5464      0x00d4
+#define PCI_DEVICE_ID_CIRRUS_5465      0x00d6
+#define PCI_DEVICE_ID_CIRRUS_6729      0x1100
+#define PCI_DEVICE_ID_CIRRUS_6832      0x1110
+#define PCI_DEVICE_ID_CIRRUS_7542      0x1200
+#define PCI_DEVICE_ID_CIRRUS_7543      0x1202
+#define PCI_DEVICE_ID_CIRRUS_7541      0x1204
+
+#define PCI_VENDOR_ID_IBM              0x1014
+#define PCI_DEVICE_ID_IBM_FIRE_CORAL   0x000a
+#define PCI_DEVICE_ID_IBM_TR           0x0018
+#define PCI_DEVICE_ID_IBM_82G2675      0x001d
+#define PCI_DEVICE_ID_IBM_MCA          0x0020
+#define PCI_DEVICE_ID_IBM_82351                0x0022
+#define PCI_DEVICE_ID_IBM_PYTHON       0x002d
+#define PCI_DEVICE_ID_IBM_SERVERAID    0x002e
+#define PCI_DEVICE_ID_IBM_TR_WAKE      0x003e
+#define PCI_DEVICE_ID_IBM_MPIC         0x0046
+#define PCI_DEVICE_ID_IBM_3780IDSP     0x007d
+#define PCI_DEVICE_ID_IBM_CHUKAR       0x0096
+#define PCI_DEVICE_ID_IBM_CPC710_PCI64 0x00fc
+#define PCI_DEVICE_ID_IBM_CPC710_PCI32 0x0105
+#define        PCI_DEVICE_ID_IBM_405GP         0x0156
+#define PCI_DEVICE_ID_IBM_SERVERAIDI960        0x01bd
+#define PCI_DEVICE_ID_IBM_MPIC_2       0xffff
+
+#define PCI_VENDOR_ID_COMPEX2          0x101a // pci.ids says "AT&T GIS (NCR)"
+#define PCI_DEVICE_ID_COMPEX2_100VG    0x0005
+
+#define PCI_VENDOR_ID_WD               0x101c
+#define PCI_DEVICE_ID_WD_7197          0x3296
+#define PCI_DEVICE_ID_WD_90C           0xc24a
+
+#define PCI_VENDOR_ID_AMI              0x101e
+#define PCI_DEVICE_ID_AMI_MEGARAID3    0x1960
+#define PCI_DEVICE_ID_AMI_MEGARAID     0x9010
+#define PCI_DEVICE_ID_AMI_MEGARAID2    0x9060
+
+#define PCI_VENDOR_ID_AMD              0x1022
+#define PCI_DEVICE_ID_AMD_LANCE                0x2000
+#define PCI_DEVICE_ID_AMD_LANCE_HOME   0x2001
+#define PCI_DEVICE_ID_AMD_SCSI         0x2020
+#define PCI_DEVICE_ID_AMD_FE_GATE_7006 0x7006
+#define PCI_DEVICE_ID_AMD_FE_GATE_7007 0x7007
+#define PCI_DEVICE_ID_AMD_FE_GATE_700C 0x700C
+#define PCI_DEVIDE_ID_AMD_FE_GATE_700D 0x700D
+#define PCI_DEVICE_ID_AMD_FE_GATE_700E 0x700E
+#define PCI_DEVICE_ID_AMD_FE_GATE_700F 0x700F
+#define PCI_DEVICE_ID_AMD_COBRA_7400   0x7400
+#define PCI_DEVICE_ID_AMD_COBRA_7401   0x7401
+#define PCI_DEVICE_ID_AMD_COBRA_7403   0x7403
+#define PCI_DEVICE_ID_AMD_COBRA_7404   0x7404
+#define PCI_DEVICE_ID_AMD_VIPER_7408   0x7408
+#define PCI_DEVICE_ID_AMD_VIPER_7409   0x7409
+#define PCI_DEVICE_ID_AMD_VIPER_740B   0x740B
+#define PCI_DEVICE_ID_AMD_VIPER_740C   0x740C
+#define PCI_DEVICE_ID_AMD_VIPER_7410   0x7410
+#define PCI_DEVICE_ID_AMD_VIPER_7411   0x7411
+#define PCI_DEVICE_ID_AMD_VIPER_7413   0x7413
+#define PCI_DEVICE_ID_AMD_VIPER_7414   0x7414
+#define PCI_DEVICE_ID_AMD_VIPER_7440   0x7440
+#define PCI_DEVICE_ID_AMD_VIPER_7441   0x7441
+#define PCI_DEVICE_ID_AMD_VIPER_7443   0x7443
+#define PCI_DEVICE_ID_AMD_VIPER_7448   0x7448
+#define PCI_DEVICE_ID_AMD_VIPER_7449   0x7449
+
+#define PCI_VENDOR_ID_TRIDENT          0x1023
+#define PCI_DEVICE_ID_TRIDENT_4DWAVE_DX        0x2000
+#define PCI_DEVICE_ID_TRIDENT_4DWAVE_NX        0x2001
+#define PCI_DEVICE_ID_TRIDENT_9320     0x9320
+#define PCI_DEVICE_ID_TRIDENT_9388     0x9388
+#define PCI_DEVICE_ID_TRIDENT_9397     0x9397
+#define PCI_DEVICE_ID_TRIDENT_939A     0x939A
+#define PCI_DEVICE_ID_TRIDENT_9520     0x9520
+#define PCI_DEVICE_ID_TRIDENT_9525     0x9525
+#define PCI_DEVICE_ID_TRIDENT_9420     0x9420
+#define PCI_DEVICE_ID_TRIDENT_9440     0x9440
+#define PCI_DEVICE_ID_TRIDENT_9660     0x9660
+#define PCI_DEVICE_ID_TRIDENT_9750     0x9750
+#define PCI_DEVICE_ID_TRIDENT_9850     0x9850
+#define PCI_DEVICE_ID_TRIDENT_9880     0x9880
+#define PCI_DEVICE_ID_TRIDENT_8400     0x8400
+#define PCI_DEVICE_ID_TRIDENT_8420     0x8420
+#define PCI_DEVICE_ID_TRIDENT_8500     0x8500
+
+#define PCI_VENDOR_ID_AI               0x1025
+#define PCI_DEVICE_ID_AI_M1435         0x1435
+
+#define PCI_VENDOR_ID_DELL              0x1028
+
+#define PCI_VENDOR_ID_MATROX           0x102B
+#define PCI_DEVICE_ID_MATROX_MGA_2     0x0518
+#define PCI_DEVICE_ID_MATROX_MIL       0x0519
+#define PCI_DEVICE_ID_MATROX_MYS       0x051A
+#define PCI_DEVICE_ID_MATROX_MIL_2     0x051b
+#define PCI_DEVICE_ID_MATROX_MIL_2_AGP 0x051f
+#define PCI_DEVICE_ID_MATROX_MGA_IMP   0x0d10
+#define PCI_DEVICE_ID_MATROX_G100_MM   0x1000
+#define PCI_DEVICE_ID_MATROX_G100_AGP  0x1001
+#define PCI_DEVICE_ID_MATROX_G200_PCI  0x0520
+#define PCI_DEVICE_ID_MATROX_G200_AGP  0x0521
+#define        PCI_DEVICE_ID_MATROX_G400       0x0525
+#define PCI_DEVICE_ID_MATROX_G550      0x2527
+#define PCI_DEVICE_ID_MATROX_VIA       0x4536
+
+#define PCI_VENDOR_ID_CT               0x102c
+#define PCI_DEVICE_ID_CT_65545         0x00d8
+#define PCI_DEVICE_ID_CT_65548         0x00dc
+#define PCI_DEVICE_ID_CT_65550         0x00e0
+#define PCI_DEVICE_ID_CT_65554         0x00e4
+#define PCI_DEVICE_ID_CT_65555         0x00e5
+
+#define PCI_VENDOR_ID_MIRO             0x1031
+#define PCI_DEVICE_ID_MIRO_36050       0x5601
+
+#define PCI_VENDOR_ID_NEC              0x1033
+#define PCI_DEVICE_ID_NEC_PCX2         0x0046
+#define PCI_DEVICE_ID_NEC_NILE4                0x005a
+#define PCI_DEVICE_ID_NEC_VRC5476       0x009b
+#define PCI_DEVICE_ID_NEC_VRC5477_AC97  0x00a6
+
+#define PCI_VENDOR_ID_FD               0x1036
+#define PCI_DEVICE_ID_FD_36C70         0x0000
+
+#define PCI_VENDOR_ID_SI               0x1039
+#define PCI_DEVICE_ID_SI_5591_AGP      0x0001
+#define PCI_DEVICE_ID_SI_6202          0x0002
+#define PCI_DEVICE_ID_SI_503           0x0008
+#define PCI_DEVICE_ID_SI_ACPI          0x0009
+#define PCI_DEVICE_ID_SI_5597_VGA      0x0200
+#define PCI_DEVICE_ID_SI_6205          0x0205
+#define PCI_DEVICE_ID_SI_501           0x0406
+#define PCI_DEVICE_ID_SI_496           0x0496
+#define PCI_DEVICE_ID_SI_300           0x0300
+#define PCI_DEVICE_ID_SI_315H          0x0310
+#define PCI_DEVICE_ID_SI_315           0x0315
+#define PCI_DEVICE_ID_SI_315PRO                0x0325
+#define PCI_DEVICE_ID_SI_530           0x0530
+#define PCI_DEVICE_ID_SI_540           0x0540
+#define PCI_DEVICE_ID_SI_550           0x0550
+#define PCI_DEVICE_ID_SI_540_VGA       0x5300
+#define PCI_DEVICE_ID_SI_550_VGA       0x5315
+#define PCI_DEVICE_ID_SI_601           0x0601
+#define PCI_DEVICE_ID_SI_620           0x0620
+#define PCI_DEVICE_ID_SI_630           0x0630
+#define PCI_DEVICE_ID_SI_635           0x0635
+#define PCI_DEVICE_ID_SI_640           0x0640
+#define PCI_DEVICE_ID_SI_645           0x0645
+#define PCI_DEVICE_ID_SI_650           0x0650
+#define PCI_DEVICE_ID_SI_730           0x0730
+#define PCI_DEVICE_ID_SI_630_VGA       0x6300
+#define PCI_DEVICE_ID_SI_730_VGA       0x7300
+#define PCI_DEVICE_ID_SI_735           0x0735
+#define PCI_DEVICE_ID_SI_740           0x0740
+#define PCI_DEVICE_ID_SI_745           0x0745
+#define PCI_DEVICE_ID_SI_750           0x0750
+#define PCI_DEVICE_ID_SI_900           0x0900
+#define PCI_DEVICE_ID_SI_5107          0x5107
+#define PCI_DEVICE_ID_SI_5300          0x5300
+#define PCI_DEVICE_ID_SI_5511          0x5511
+#define PCI_DEVICE_ID_SI_5513          0x5513
+#define PCI_DEVICE_ID_SI_5571          0x5571
+#define PCI_DEVICE_ID_SI_5591          0x5591
+#define PCI_DEVICE_ID_SI_5597          0x5597
+#define PCI_DEVICE_ID_SI_5598          0x5598
+#define PCI_DEVICE_ID_SI_5600          0x5600
+#define PCI_DEVICE_ID_SI_6300          0x6300
+#define PCI_DEVICE_ID_SI_6306          0x6306
+#define PCI_DEVICE_ID_SI_6326          0x6326
+#define PCI_DEVICE_ID_SI_7001          0x7001
+#define PCI_DEVICE_ID_SI_7016          0x7016
+
+#define PCI_VENDOR_ID_HP               0x103c
+#define PCI_DEVICE_ID_HP_DONNER_GFX    0x1008
+#define PCI_DEVICE_ID_HP_TACHYON       0x1028
+#define PCI_DEVICE_ID_HP_TACHLITE      0x1029
+#define PCI_DEVICE_ID_HP_J2585A                0x1030
+#define PCI_DEVICE_ID_HP_J2585B                0x1031
+#define PCI_DEVICE_ID_HP_SAS           0x1048
+#define PCI_DEVICE_ID_HP_DIVA1         0x1049
+#define PCI_DEVICE_ID_HP_DIVA2         0x104A
+#define PCI_DEVICE_ID_HP_SP2_0         0x104B
+#define PCI_DEVICE_ID_HP_ZX1_SBA       0x1229
+#define PCI_DEVICE_ID_HP_ZX1_IOC       0x122a
+#define PCI_DEVICE_ID_HP_ZX1_LBA       0x122e
+
+#define PCI_VENDOR_ID_PCTECH           0x1042
+#define PCI_DEVICE_ID_PCTECH_RZ1000    0x1000
+#define PCI_DEVICE_ID_PCTECH_RZ1001    0x1001
+#define PCI_DEVICE_ID_PCTECH_SAMURAI_0 0x3000
+#define PCI_DEVICE_ID_PCTECH_SAMURAI_1 0x3010
+#define PCI_DEVICE_ID_PCTECH_SAMURAI_IDE 0x3020
+
+#define PCI_VENDOR_ID_ASUSTEK          0x1043
+#define PCI_DEVICE_ID_ASUSTEK_0675     0x0675
+
+#define PCI_VENDOR_ID_DPT              0x1044
+#define PCI_DEVICE_ID_DPT              0xa400
+
+#define PCI_VENDOR_ID_OPTI             0x1045
+#define PCI_DEVICE_ID_OPTI_92C178      0xc178
+#define PCI_DEVICE_ID_OPTI_82C557      0xc557
+#define PCI_DEVICE_ID_OPTI_82C558      0xc558
+#define PCI_DEVICE_ID_OPTI_82C621      0xc621
+#define PCI_DEVICE_ID_OPTI_82C700      0xc700
+#define PCI_DEVICE_ID_OPTI_82C701      0xc701
+#define PCI_DEVICE_ID_OPTI_82C814      0xc814
+#define PCI_DEVICE_ID_OPTI_82C822      0xc822
+#define PCI_DEVICE_ID_OPTI_82C861      0xc861
+#define PCI_DEVICE_ID_OPTI_82C825      0xd568
+
+#define PCI_VENDOR_ID_ELSA             0x1048
+#define PCI_DEVICE_ID_ELSA_MICROLINK   0x1000
+#define PCI_DEVICE_ID_ELSA_QS3000      0x3000
+
+#define PCI_VENDOR_ID_SGS              0x104a
+#define PCI_DEVICE_ID_SGS_2000         0x0008
+#define PCI_DEVICE_ID_SGS_1764         0x0009
+
+#define PCI_VENDOR_ID_BUSLOGIC               0x104B
+#define PCI_DEVICE_ID_BUSLOGIC_MULTIMASTER_NC 0x0140
+#define PCI_DEVICE_ID_BUSLOGIC_MULTIMASTER    0x1040
+#define PCI_DEVICE_ID_BUSLOGIC_FLASHPOINT     0x8130
+
+#define PCI_VENDOR_ID_TI               0x104c
+#define PCI_DEVICE_ID_TI_TVP4010       0x3d04
+#define PCI_DEVICE_ID_TI_TVP4020       0x3d07
+#define PCI_DEVICE_ID_TI_1130          0xac12
+#define PCI_DEVICE_ID_TI_1031          0xac13
+#define PCI_DEVICE_ID_TI_1131          0xac15
+#define PCI_DEVICE_ID_TI_1250          0xac16
+#define PCI_DEVICE_ID_TI_1220          0xac17
+#define PCI_DEVICE_ID_TI_1221          0xac19
+#define PCI_DEVICE_ID_TI_1210          0xac1a
+#define PCI_DEVICE_ID_TI_1410          0xac50
+#define PCI_DEVICE_ID_TI_1450          0xac1b
+#define PCI_DEVICE_ID_TI_1225          0xac1c
+#define PCI_DEVICE_ID_TI_1251A         0xac1d
+#define PCI_DEVICE_ID_TI_1211          0xac1e
+#define PCI_DEVICE_ID_TI_1251B         0xac1f
+#define PCI_DEVICE_ID_TI_4410          0xac41
+#define PCI_DEVICE_ID_TI_4451          0xac42
+#define PCI_DEVICE_ID_TI_1420          0xac51
+
+#define PCI_VENDOR_ID_SONY             0x104d
+#define PCI_DEVICE_ID_SONY_CXD3222     0x8039
+
+#define PCI_VENDOR_ID_OAK              0x104e
+#define PCI_DEVICE_ID_OAK_OTI107       0x0107
+
+/* Winbond have two vendor IDs! See 0x10ad as well */
+#define PCI_VENDOR_ID_WINBOND2         0x1050
+#define PCI_DEVICE_ID_WINBOND2_89C940  0x0940
+#define PCI_DEVICE_ID_WINBOND2_89C940F 0x5a5a
+#define PCI_DEVICE_ID_WINBOND2_6692    0x6692
+
+#define PCI_VENDOR_ID_ANIGMA           0x1051
+#define PCI_DEVICE_ID_ANIGMA_MC145575  0x0100
+  
+#define PCI_VENDOR_ID_EFAR             0x1055
+#define PCI_DEVICE_ID_EFAR_SLC90E66_1  0x9130
+#define PCI_DEVICE_ID_EFAR_SLC90E66_0  0x9460
+#define PCI_DEVICE_ID_EFAR_SLC90E66_2  0x9462
+#define PCI_DEVICE_ID_EFAR_SLC90E66_3  0x9463
+
+#define PCI_VENDOR_ID_MOTOROLA         0x1057
+#define PCI_VENDOR_ID_MOTOROLA_OOPS    0x1507
+#define PCI_DEVICE_ID_MOTOROLA_MPC105  0x0001
+#define PCI_DEVICE_ID_MOTOROLA_MPC106  0x0002
+#define PCI_DEVICE_ID_MOTOROLA_RAVEN   0x4801
+#define PCI_DEVICE_ID_MOTOROLA_FALCON  0x4802
+#define PCI_DEVICE_ID_MOTOROLA_HAWK    0x4803
+#define PCI_DEVICE_ID_MOTOROLA_CPX8216 0x4806
+
+#define PCI_VENDOR_ID_PROMISE          0x105a
+#define PCI_DEVICE_ID_PROMISE_20265    0x0d30
+#define PCI_DEVICE_ID_PROMISE_20267    0x4d30
+#define PCI_DEVICE_ID_PROMISE_20246    0x4d33
+#define PCI_DEVICE_ID_PROMISE_20262    0x4d38
+#define PCI_DEVICE_ID_PROMISE_20263    0x0D38
+#define PCI_DEVICE_ID_PROMISE_20268    0x4d68
+#define PCI_DEVICE_ID_PROMISE_20270    0x6268
+#define PCI_DEVICE_ID_PROMISE_20269    0x4d69
+#define PCI_DEVICE_ID_PROMISE_20271    0x6269
+#define PCI_DEVICE_ID_PROMISE_20275    0x1275
+#define PCI_DEVICE_ID_PROMISE_20276    0x5275
+#define PCI_DEVICE_ID_PROMISE_20277    0x7275
+#define PCI_DEVICE_ID_PROMISE_5300     0x5300
+
+#define PCI_VENDOR_ID_N9               0x105d
+#define PCI_DEVICE_ID_N9_I128          0x2309
+#define PCI_DEVICE_ID_N9_I128_2                0x2339
+#define PCI_DEVICE_ID_N9_I128_T2R      0x493d
+
+#define PCI_VENDOR_ID_UMC              0x1060
+#define PCI_DEVICE_ID_UMC_UM8673F      0x0101
+#define PCI_DEVICE_ID_UMC_UM8891A      0x0891
+#define PCI_DEVICE_ID_UMC_UM8886BF     0x673a
+#define PCI_DEVICE_ID_UMC_UM8886A      0x886a
+#define PCI_DEVICE_ID_UMC_UM8881F      0x8881
+#define PCI_DEVICE_ID_UMC_UM8886F      0x8886
+#define PCI_DEVICE_ID_UMC_UM9017F      0x9017
+#define PCI_DEVICE_ID_UMC_UM8886N      0xe886
+#define PCI_DEVICE_ID_UMC_UM8891N      0xe891
+
+#define PCI_VENDOR_ID_X                        0x1061
+#define PCI_DEVICE_ID_X_AGX016         0x0001
+
+#define PCI_VENDOR_ID_MYLEX            0x1069
+#define PCI_DEVICE_ID_MYLEX_DAC960_P   0x0001
+#define PCI_DEVICE_ID_MYLEX_DAC960_PD  0x0002
+#define PCI_DEVICE_ID_MYLEX_DAC960_PG  0x0010
+#define PCI_DEVICE_ID_MYLEX_DAC960_LA  0x0020
+#define PCI_DEVICE_ID_MYLEX_DAC960_LP  0x0050
+#define PCI_DEVICE_ID_MYLEX_DAC960_BA  0xBA56
+
+#define PCI_VENDOR_ID_PICOP            0x1066
+#define PCI_DEVICE_ID_PICOP_PT86C52X   0x0001
+#define PCI_DEVICE_ID_PICOP_PT80C524   0x8002
+
+#define PCI_VENDOR_ID_APPLE            0x106b
+#define PCI_DEVICE_ID_APPLE_BANDIT     0x0001
+#define PCI_DEVICE_ID_APPLE_GC         0x0002
+#define PCI_DEVICE_ID_APPLE_HYDRA      0x000e
+#define PCI_DEVICE_ID_APPLE_UNI_N_FW   0x0018
+#define PCI_DEVICE_ID_APPLE_KL_USB     0x0019
+#define PCI_DEVICE_ID_APPLE_UNI_N_AGP  0x0020
+#define PCI_DEVICE_ID_APPLE_UNI_N_GMAC 0x0021
+#define PCI_DEVICE_ID_APPLE_KEYLARGO   0x0022
+#define PCI_DEVICE_ID_APPLE_UNI_N_GMACP        0x0024
+#define PCI_DEVICE_ID_APPLE_KEYLARGO_P 0x0025
+#define PCI_DEVICE_ID_APPLE_KL_USB_P   0x0026
+#define PCI_DEVICE_ID_APPLE_UNI_N_AGP_P        0x0027
+#define PCI_DEVICE_ID_APPLE_UNI_N_AGP15        0x002d
+#define PCI_DEVICE_ID_APPLE_UNI_N_FW2  0x0030
+
+#define PCI_VENDOR_ID_YAMAHA           0x1073
+#define PCI_DEVICE_ID_YAMAHA_724       0x0004
+#define PCI_DEVICE_ID_YAMAHA_724F      0x000d
+#define PCI_DEVICE_ID_YAMAHA_740       0x000a
+#define PCI_DEVICE_ID_YAMAHA_740C      0x000c
+#define PCI_DEVICE_ID_YAMAHA_744       0x0010
+#define PCI_DEVICE_ID_YAMAHA_754       0x0012
+
+#define PCI_VENDOR_ID_NEXGEN           0x1074
+#define PCI_DEVICE_ID_NEXGEN_82C501    0x4e78
+
+#define PCI_VENDOR_ID_QLOGIC           0x1077
+#define PCI_DEVICE_ID_QLOGIC_ISP1020   0x1020
+#define PCI_DEVICE_ID_QLOGIC_ISP1022   0x1022
+#define PCI_DEVICE_ID_QLOGIC_ISP2100   0x2100
+#define PCI_DEVICE_ID_QLOGIC_ISP2200   0x2200
+
+#define PCI_VENDOR_ID_CYRIX            0x1078
+#define PCI_DEVICE_ID_CYRIX_5510       0x0000
+#define PCI_DEVICE_ID_CYRIX_PCI_MASTER 0x0001
+#define PCI_DEVICE_ID_CYRIX_5520       0x0002
+#define PCI_DEVICE_ID_CYRIX_5530_LEGACY        0x0100
+#define PCI_DEVICE_ID_CYRIX_5530_SMI   0x0101
+#define PCI_DEVICE_ID_CYRIX_5530_IDE   0x0102
+#define PCI_DEVICE_ID_CYRIX_5530_AUDIO 0x0103
+#define PCI_DEVICE_ID_CYRIX_5530_VIDEO 0x0104
+
+#define PCI_VENDOR_ID_LEADTEK          0x107d
+#define PCI_DEVICE_ID_LEADTEK_805      0x0000
+
+#define PCI_VENDOR_ID_INTERPHASE       0x107e
+#define PCI_DEVICE_ID_INTERPHASE_5526  0x0004
+#define PCI_DEVICE_ID_INTERPHASE_55x6  0x0005
+#define PCI_DEVICE_ID_INTERPHASE_5575  0x0008
+
+#define PCI_VENDOR_ID_CONTAQ           0x1080
+#define PCI_DEVICE_ID_CONTAQ_82C599    0x0600
+#define PCI_DEVICE_ID_CONTAQ_82C693    0xc693
+
+#define PCI_VENDOR_ID_FOREX            0x1083
+
+#define PCI_VENDOR_ID_OLICOM           0x108d
+#define PCI_DEVICE_ID_OLICOM_OC3136    0x0001
+#define PCI_DEVICE_ID_OLICOM_OC2315    0x0011
+#define PCI_DEVICE_ID_OLICOM_OC2325    0x0012
+#define PCI_DEVICE_ID_OLICOM_OC2183    0x0013
+#define PCI_DEVICE_ID_OLICOM_OC2326    0x0014
+#define PCI_DEVICE_ID_OLICOM_OC6151    0x0021
+
+#define PCI_VENDOR_ID_SUN              0x108e
+#define PCI_DEVICE_ID_SUN_EBUS         0x1000
+#define PCI_DEVICE_ID_SUN_HAPPYMEAL    0x1001
+#define PCI_DEVICE_ID_SUN_RIO_EBUS     0x1100
+#define PCI_DEVICE_ID_SUN_RIO_GEM      0x1101
+#define PCI_DEVICE_ID_SUN_RIO_1394     0x1102
+#define PCI_DEVICE_ID_SUN_RIO_USB      0x1103
+#define PCI_DEVICE_ID_SUN_GEM          0x2bad
+#define PCI_DEVICE_ID_SUN_SIMBA                0x5000
+#define PCI_DEVICE_ID_SUN_PBM          0x8000
+#define PCI_DEVICE_ID_SUN_SCHIZO       0x8001
+#define PCI_DEVICE_ID_SUN_SABRE                0xa000
+#define PCI_DEVICE_ID_SUN_HUMMINGBIRD  0xa001
+
+#define PCI_VENDOR_ID_CMD              0x1095
+#define PCI_DEVICE_ID_CMD_640          0x0640
+#define PCI_DEVICE_ID_CMD_643          0x0643
+#define PCI_DEVICE_ID_CMD_646          0x0646
+#define PCI_DEVICE_ID_CMD_647          0x0647
+#define PCI_DEVICE_ID_CMD_648          0x0648
+#define PCI_DEVICE_ID_CMD_649          0x0649
+#define PCI_DEVICE_ID_CMD_670          0x0670
+#define PCI_DEVICE_ID_CMD_680          0x0680
+
+#define PCI_VENDOR_ID_VISION           0x1098
+#define PCI_DEVICE_ID_VISION_QD8500    0x0001
+#define PCI_DEVICE_ID_VISION_QD8580    0x0002
+
+#define PCI_VENDOR_ID_BROOKTREE                0x109e
+#define PCI_DEVICE_ID_BROOKTREE_848    0x0350
+#define PCI_DEVICE_ID_BROOKTREE_849A   0x0351
+#define PCI_DEVICE_ID_BROOKTREE_878_1  0x036e
+#define PCI_DEVICE_ID_BROOKTREE_878    0x0878
+#define PCI_DEVICE_ID_BROOKTREE_8474   0x8474
+
+#define PCI_VENDOR_ID_SIERRA           0x10a8
+#define PCI_DEVICE_ID_SIERRA_STB       0x0000
+
+#define PCI_VENDOR_ID_SGI              0x10a9
+#define PCI_DEVICE_ID_SGI_IOC3         0x0003
+
+#define PCI_VENDOR_ID_ACC              0x10aa
+#define PCI_DEVICE_ID_ACC_2056         0x0000
+
+#define PCI_VENDOR_ID_WINBOND          0x10ad
+#define PCI_DEVICE_ID_WINBOND_83769    0x0001
+#define PCI_DEVICE_ID_WINBOND_82C105   0x0105
+#define PCI_DEVICE_ID_WINBOND_83C553   0x0565
+
+#define PCI_VENDOR_ID_DATABOOK         0x10b3
+#define PCI_DEVICE_ID_DATABOOK_87144   0xb106
+
+#define PCI_VENDOR_ID_PLX              0x10b5
+#define PCI_DEVICE_ID_PLX_R685         0x1030
+#define PCI_DEVICE_ID_PLX_ROMULUS      0x106a
+#define PCI_DEVICE_ID_PLX_SPCOM800     0x1076
+#define PCI_DEVICE_ID_PLX_1077         0x1077
+#define PCI_DEVICE_ID_PLX_SPCOM200     0x1103
+#define PCI_DEVICE_ID_PLX_DJINN_ITOO   0x1151
+#define PCI_DEVICE_ID_PLX_R753         0x1152
+#define PCI_DEVICE_ID_PLX_9050         0x9050
+#define PCI_DEVICE_ID_PLX_9060         0x9060
+#define PCI_DEVICE_ID_PLX_9060ES       0x906E
+#define PCI_DEVICE_ID_PLX_9060SD       0x906D
+#define PCI_DEVICE_ID_PLX_9080         0x9080
+#define PCI_DEVICE_ID_PLX_GTEK_SERIAL2 0xa001
+
+#define PCI_VENDOR_ID_MADGE            0x10b6
+#define PCI_DEVICE_ID_MADGE_MK2                0x0002
+#define PCI_DEVICE_ID_MADGE_C155S      0x1001
+
+#define PCI_VENDOR_ID_3COM             0x10b7
+#define PCI_DEVICE_ID_3COM_3C985       0x0001
+#define PCI_DEVICE_ID_3COM_3C339       0x3390
+#define PCI_DEVICE_ID_3COM_3C590       0x5900
+#define PCI_DEVICE_ID_3COM_3C595TX     0x5950
+#define PCI_DEVICE_ID_3COM_3C595T4     0x5951
+#define PCI_DEVICE_ID_3COM_3C595MII    0x5952
+#define PCI_DEVICE_ID_3COM_3C900TPO    0x9000
+#define PCI_DEVICE_ID_3COM_3C900COMBO  0x9001
+#define PCI_DEVICE_ID_3COM_3C905TX     0x9050
+#define PCI_DEVICE_ID_3COM_3C905T4     0x9051
+#define PCI_DEVICE_ID_3COM_3C905B_TX   0x9055
+
+#define PCI_VENDOR_ID_SMC              0x10b8
+#define PCI_DEVICE_ID_SMC_EPIC100      0x0005
+
+#define PCI_VENDOR_ID_AL               0x10b9
+#define PCI_DEVICE_ID_AL_M1445         0x1445
+#define PCI_DEVICE_ID_AL_M1449         0x1449
+#define PCI_DEVICE_ID_AL_M1451         0x1451
+#define PCI_DEVICE_ID_AL_M1461         0x1461
+#define PCI_DEVICE_ID_AL_M1489         0x1489
+#define PCI_DEVICE_ID_AL_M1511         0x1511
+#define PCI_DEVICE_ID_AL_M1513         0x1513
+#define PCI_DEVICE_ID_AL_M1521         0x1521
+#define PCI_DEVICE_ID_AL_M1523         0x1523
+#define PCI_DEVICE_ID_AL_M1531         0x1531
+#define PCI_DEVICE_ID_AL_M1533         0x1533
+#define PCI_DEVICE_ID_AL_M1541         0x1541
+#define PCI_DEVICE_ID_AL_M1621          0x1621
+#define PCI_DEVICE_ID_AL_M1631          0x1631
+#define PCI_DEVICE_ID_AL_M1641          0x1641
+#define PCI_DEVICE_ID_AL_M1644          0x1644
+#define PCI_DEVICE_ID_AL_M1647          0x1647
+#define PCI_DEVICE_ID_AL_M1651          0x1651
+#define PCI_DEVICE_ID_AL_M1543         0x1543
+#define PCI_DEVICE_ID_AL_M3307         0x3307
+#define PCI_DEVICE_ID_AL_M4803         0x5215
+#define PCI_DEVICE_ID_AL_M5219         0x5219
+#define PCI_DEVICE_ID_AL_M5229         0x5229
+#define PCI_DEVICE_ID_AL_M5237         0x5237
+#define PCI_DEVICE_ID_AL_M5243         0x5243
+#define PCI_DEVICE_ID_AL_M5451         0x5451
+#define PCI_DEVICE_ID_AL_M7101         0x7101
+
+#define PCI_VENDOR_ID_MITSUBISHI       0x10ba
+
+#define PCI_VENDOR_ID_SURECOM          0x10bd
+#define PCI_DEVICE_ID_SURECOM_NE34     0x0e34
+
+#define PCI_VENDOR_ID_NEOMAGIC         0x10c8
+#define PCI_DEVICE_ID_NEOMAGIC_MAGICGRAPH_NM2070 0x0001
+#define PCI_DEVICE_ID_NEOMAGIC_MAGICGRAPH_128V 0x0002
+#define PCI_DEVICE_ID_NEOMAGIC_MAGICGRAPH_128ZV 0x0003
+#define PCI_DEVICE_ID_NEOMAGIC_MAGICGRAPH_NM2160 0x0004
+#define PCI_DEVICE_ID_NEOMAGIC_MAGICMEDIA_256AV       0x0005
+#define PCI_DEVICE_ID_NEOMAGIC_MAGICGRAPH_128ZVPLUS   0x0083
+
+#define PCI_VENDOR_ID_ASP              0x10cd
+#define PCI_DEVICE_ID_ASP_ABP940       0x1200
+#define PCI_DEVICE_ID_ASP_ABP940U      0x1300
+#define PCI_DEVICE_ID_ASP_ABP940UW     0x2300
+
+#define PCI_VENDOR_ID_MACRONIX         0x10d9
+#define PCI_DEVICE_ID_MACRONIX_MX98713 0x0512
+#define PCI_DEVICE_ID_MACRONIX_MX987x5 0x0531
+
+#define PCI_VENDOR_ID_TCONRAD          0x10da
+#define PCI_DEVICE_ID_TCONRAD_TOKENRING        0x0508
+
+#define PCI_VENDOR_ID_CERN             0x10dc
+#define PCI_DEVICE_ID_CERN_SPSB_PMC    0x0001
+#define PCI_DEVICE_ID_CERN_SPSB_PCI    0x0002
+#define PCI_DEVICE_ID_CERN_HIPPI_DST   0x0021
+#define PCI_DEVICE_ID_CERN_HIPPI_SRC   0x0022
+
+#define PCI_VENDOR_ID_NVIDIA                   0x10de
+#define PCI_DEVICE_ID_NVIDIA_TNT               0x0020
+#define PCI_DEVICE_ID_NVIDIA_TNT2              0x0028
+#define PCI_DEVICE_ID_NVIDIA_UTNT2             0x0029
+#define PCI_DEVICE_ID_NVIDIA_VTNT2             0x002C
+#define PCI_DEVICE_ID_NVIDIA_UVTNT2            0x002D
+#define PCI_DEVICE_ID_NVIDIA_ITNT2             0x00A0
+#define PCI_DEVICE_ID_NVIDIA_GEFORCE_SDR       0x0100
+#define PCI_DEVICE_ID_NVIDIA_GEFORCE_DDR       0x0101
+#define PCI_DEVICE_ID_NVIDIA_QUADRO            0x0103
+#define PCI_DEVICE_ID_NVIDIA_GEFORCE2_MX       0x0110
+#define PCI_DEVICE_ID_NVIDIA_GEFORCE2_MX2      0x0111
+#define PCI_DEVICE_ID_NVIDIA_GEFORCE2_GO       0x0112
+#define PCI_DEVICE_ID_NVIDIA_QUADRO2_MXR       0x0113
+#define PCI_DEVICE_ID_NVIDIA_GEFORCE2_GTS      0x0150
+#define PCI_DEVICE_ID_NVIDIA_GEFORCE2_GTS2     0x0151
+#define PCI_DEVICE_ID_NVIDIA_GEFORCE2_ULTRA    0x0152
+#define PCI_DEVICE_ID_NVIDIA_QUADRO2_PRO       0x0153
+#define PCI_DEVICE_ID_NVIDIA_IGEFORCE2         0x01a0
+#define PCI_DEVICE_ID_NVIDIA_GEFORCE3          0x0200
+#define PCI_DEVICE_ID_NVIDIA_GEFORCE3_1                0x0201
+#define PCI_DEVICE_ID_NVIDIA_GEFORCE3_2                0x0202
+#define PCI_DEVICE_ID_NVIDIA_QUADRO_DDC                0x0203
+
+#define PCI_VENDOR_ID_IMS              0x10e0
+#define PCI_DEVICE_ID_IMS_8849         0x8849
+#define PCI_DEVICE_ID_IMS_TT128                0x9128
+#define PCI_DEVICE_ID_IMS_TT3D         0x9135
+
+#define PCI_VENDOR_ID_TEKRAM2          0x10e1
+#define PCI_DEVICE_ID_TEKRAM2_690c     0x690c
+
+#define PCI_VENDOR_ID_TUNDRA           0x10e3
+#define PCI_DEVICE_ID_TUNDRA_CA91C042  0x0000
+
+#define PCI_VENDOR_ID_AMCC             0x10e8
+#define PCI_DEVICE_ID_AMCC_MYRINET     0x8043
+#define PCI_DEVICE_ID_AMCC_PARASTATION 0x8062
+#define PCI_DEVICE_ID_AMCC_S5933       0x807d
+#define PCI_DEVICE_ID_AMCC_S5933_HEPC3 0x809c
+
+#define PCI_VENDOR_ID_INTERG           0x10ea
+#define PCI_DEVICE_ID_INTERG_1680      0x1680
+#define PCI_DEVICE_ID_INTERG_1682      0x1682
+#define PCI_DEVICE_ID_INTERG_2000      0x2000
+#define PCI_DEVICE_ID_INTERG_2010      0x2010
+#define PCI_DEVICE_ID_INTERG_5000      0x5000
+#define PCI_DEVICE_ID_INTERG_5050      0x5050
+
+#define PCI_VENDOR_ID_REALTEK          0x10ec
+#define PCI_DEVICE_ID_REALTEK_8029     0x8029
+#define PCI_DEVICE_ID_REALTEK_8129     0x8129
+#define PCI_DEVICE_ID_REALTEK_8139     0x8139
+
+#define PCI_VENDOR_ID_XILINX           0x10ee
+#define PCI_DEVICE_ID_TURBOPAM         0x4020
+
+#define PCI_VENDOR_ID_TRUEVISION       0x10fa
+#define PCI_DEVICE_ID_TRUEVISION_T1000 0x000c
+
+#define PCI_VENDOR_ID_INIT             0x1101
+#define PCI_DEVICE_ID_INIT_320P                0x9100
+#define PCI_DEVICE_ID_INIT_360P                0x9500
+
+#define PCI_VENDOR_ID_CREATIVE         0x1102 // duplicate: ECTIVA
+#define PCI_DEVICE_ID_CREATIVE_EMU10K1 0x0002
+
+#define PCI_VENDOR_ID_ECTIVA           0x1102 // duplicate: CREATIVE
+#define PCI_DEVICE_ID_ECTIVA_EV1938    0x8938
+
+#define PCI_VENDOR_ID_TTI              0x1103
+#define PCI_DEVICE_ID_TTI_HPT343       0x0003
+#define PCI_DEVICE_ID_TTI_HPT366       0x0004
+#define PCI_DEVICE_ID_TTI_HPT372       0x0005
+#define PCI_DEVICE_ID_TTI_HPT302       0x0006
+#define PCI_DEVICE_ID_TTI_HPT371       0x0007
+#define PCI_DEVICE_ID_TTI_HPT374       0x0008
+
+#define PCI_VENDOR_ID_VIA              0x1106
+#define PCI_DEVICE_ID_VIA_8363_0       0x0305
+#define PCI_DEVICE_ID_VIA_8371_0       0x0391
+#define PCI_DEVICE_ID_VIA_8501_0       0x0501
+#define PCI_DEVICE_ID_VIA_82C505       0x0505
+#define PCI_DEVICE_ID_VIA_82C561       0x0561
+#define PCI_DEVICE_ID_VIA_82C586_1     0x0571
+#define PCI_DEVICE_ID_VIA_82C576       0x0576
+#define PCI_DEVICE_ID_VIA_82C585       0x0585
+#define PCI_DEVICE_ID_VIA_82C586_0     0x0586
+#define PCI_DEVICE_ID_VIA_82C595       0x0595
+#define PCI_DEVICE_ID_VIA_82C596       0x0596
+#define PCI_DEVICE_ID_VIA_82C597_0     0x0597
+#define PCI_DEVICE_ID_VIA_82C598_0     0x0598
+#define PCI_DEVICE_ID_VIA_8601_0       0x0601
+#define PCI_DEVICE_ID_VIA_8605_0       0x0605
+#define PCI_DEVICE_ID_VIA_82C680       0x0680
+#define PCI_DEVICE_ID_VIA_82C686       0x0686
+#define PCI_DEVICE_ID_VIA_82C691       0x0691
+#define PCI_DEVICE_ID_VIA_82C693       0x0693
+#define PCI_DEVICE_ID_VIA_82C693_1     0x0698
+#define PCI_DEVICE_ID_VIA_82C926       0x0926
+#define PCI_DEVICE_ID_VIA_82C576_1     0x1571
+#define PCI_DEVICE_ID_VIA_82C595_97    0x1595
+#define PCI_DEVICE_ID_VIA_82C586_2     0x3038
+#define PCI_DEVICE_ID_VIA_82C586_3     0x3040
+#define PCI_DEVICE_ID_VIA_6305         0x3044
+#define PCI_DEVICE_ID_VIA_82C596_3     0x3050
+#define PCI_DEVICE_ID_VIA_82C596B_3    0x3051
+#define PCI_DEVICE_ID_VIA_82C686_4     0x3057
+#define PCI_DEVICE_ID_VIA_82C686_5     0x3058
+#define PCI_DEVICE_ID_VIA_8233_5       0x3059
+#define PCI_DEVICE_ID_VIA_8233_7       0x3065
+#define PCI_DEVICE_ID_VIA_82C686_6     0x3068
+#define PCI_DEVICE_ID_VIA_8233_0       0x3074
+#define PCI_DEVICE_ID_VIA_8633_0       0x3091
+#define PCI_DEVICE_ID_VIA_8367_0       0x3099
+#define PCI_DEVICE_ID_VIA_8622         0x3102
+#define PCI_DEVICE_ID_VIA_8233C_0      0x3109
+#define PCI_DEVICE_ID_VIA_8361         0x3112
+#define PCI_DEVICE_ID_VIA_8233A                0x3147
+#define PCI_DEVICE_ID_VIA_86C100A      0x6100
+#define PCI_DEVICE_ID_VIA_8231         0x8231
+#define PCI_DEVICE_ID_VIA_8231_4       0x8235
+#define PCI_DEVICE_ID_VIA_8365_1       0x8305
+#define PCI_DEVICE_ID_VIA_8371_1       0x8391
+#define PCI_DEVICE_ID_VIA_8501_1       0x8501
+#define PCI_DEVICE_ID_VIA_82C597_1     0x8597
+#define PCI_DEVICE_ID_VIA_82C598_1     0x8598
+#define PCI_DEVICE_ID_VIA_8601_1       0x8601
+#define PCI_DEVICE_ID_VIA_8505_1       0x8605
+#define PCI_DEVICE_ID_VIA_8633_1       0xB091
+#define PCI_DEVICE_ID_VIA_8367_1       0xB099
+
+#define PCI_VENDOR_ID_SIEMENS           0x110A
+#define PCI_DEVICE_ID_SIEMENS_DSCC4     0x2102
+
+#define PCI_VENDOR_ID_SMC2             0x1113
+#define PCI_DEVICE_ID_SMC2_1211TX      0x1211
+
+#define PCI_VENDOR_ID_VORTEX           0x1119
+#define PCI_DEVICE_ID_VORTEX_GDT60x0   0x0000
+#define PCI_DEVICE_ID_VORTEX_GDT6000B  0x0001
+#define PCI_DEVICE_ID_VORTEX_GDT6x10   0x0002
+#define PCI_DEVICE_ID_VORTEX_GDT6x20   0x0003
+#define PCI_DEVICE_ID_VORTEX_GDT6530   0x0004
+#define PCI_DEVICE_ID_VORTEX_GDT6550   0x0005
+#define PCI_DEVICE_ID_VORTEX_GDT6x17   0x0006
+#define PCI_DEVICE_ID_VORTEX_GDT6x27   0x0007
+#define PCI_DEVICE_ID_VORTEX_GDT6537   0x0008
+#define PCI_DEVICE_ID_VORTEX_GDT6557   0x0009
+#define PCI_DEVICE_ID_VORTEX_GDT6x15   0x000a
+#define PCI_DEVICE_ID_VORTEX_GDT6x25   0x000b
+#define PCI_DEVICE_ID_VORTEX_GDT6535   0x000c
+#define PCI_DEVICE_ID_VORTEX_GDT6555   0x000d
+#define PCI_DEVICE_ID_VORTEX_GDT6x17RP 0x0100
+#define PCI_DEVICE_ID_VORTEX_GDT6x27RP 0x0101
+#define PCI_DEVICE_ID_VORTEX_GDT6537RP 0x0102
+#define PCI_DEVICE_ID_VORTEX_GDT6557RP 0x0103
+#define PCI_DEVICE_ID_VORTEX_GDT6x11RP 0x0104
+#define PCI_DEVICE_ID_VORTEX_GDT6x21RP 0x0105
+#define PCI_DEVICE_ID_VORTEX_GDT6x17RP1        0x0110
+#define PCI_DEVICE_ID_VORTEX_GDT6x27RP1        0x0111
+#define PCI_DEVICE_ID_VORTEX_GDT6537RP1        0x0112
+#define PCI_DEVICE_ID_VORTEX_GDT6557RP1        0x0113
+#define PCI_DEVICE_ID_VORTEX_GDT6x11RP1        0x0114
+#define PCI_DEVICE_ID_VORTEX_GDT6x21RP1        0x0115
+#define PCI_DEVICE_ID_VORTEX_GDT6x17RP2        0x0120
+#define PCI_DEVICE_ID_VORTEX_GDT6x27RP2        0x0121
+#define PCI_DEVICE_ID_VORTEX_GDT6537RP2        0x0122
+#define PCI_DEVICE_ID_VORTEX_GDT6557RP2        0x0123
+#define PCI_DEVICE_ID_VORTEX_GDT6x11RP2        0x0124
+#define PCI_DEVICE_ID_VORTEX_GDT6x21RP2        0x0125
+
+#define PCI_VENDOR_ID_EF               0x111a
+#define PCI_DEVICE_ID_EF_ATM_FPGA      0x0000
+#define PCI_DEVICE_ID_EF_ATM_ASIC      0x0002
+
+#define PCI_VENDOR_ID_IDT              0x111d
+#define PCI_DEVICE_ID_IDT_IDT77201     0x0001
+
+#define PCI_VENDOR_ID_FORE             0x1127
+#define PCI_DEVICE_ID_FORE_PCA200PC    0x0210
+#define PCI_DEVICE_ID_FORE_PCA200E     0x0300
+
+#define PCI_VENDOR_ID_IMAGINGTECH      0x112f
+#define PCI_DEVICE_ID_IMAGINGTECH_ICPCI        0x0000
+
+#define PCI_VENDOR_ID_PHILIPS          0x1131
+#define PCI_DEVICE_ID_PHILIPS_SAA7145  0x7145
+#define PCI_DEVICE_ID_PHILIPS_SAA7146  0x7146
+#define PCI_DEVICE_ID_PHILIPS_SAA9730  0x9730
+
+#define PCI_VENDOR_ID_EICON            0x1133
+#define PCI_DEVICE_ID_EICON_DIVA20PRO  0xe001
+#define PCI_DEVICE_ID_EICON_DIVA20     0xe002
+#define PCI_DEVICE_ID_EICON_DIVA20PRO_U        0xe003
+#define PCI_DEVICE_ID_EICON_DIVA20_U   0xe004
+#define PCI_DEVICE_ID_EICON_DIVA201    0xe005
+#define PCI_DEVICE_ID_EICON_MAESTRA    0xe010
+#define PCI_DEVICE_ID_EICON_MAESTRAQ   0xe012
+#define PCI_DEVICE_ID_EICON_MAESTRAQ_U 0xe013
+#define PCI_DEVICE_ID_EICON_MAESTRAP   0xe014
+#define PCI_VENDOR_ID_CYCLONE          0x113c
+#define PCI_DEVICE_ID_CYCLONE_SDK      0x0001
+
+#define PCI_VENDOR_ID_ALLIANCE         0x1142
+#define PCI_DEVICE_ID_ALLIANCE_PROMOTIO        0x3210
+#define PCI_DEVICE_ID_ALLIANCE_PROVIDEO        0x6422
+#define PCI_DEVICE_ID_ALLIANCE_AT24    0x6424
+#define PCI_DEVICE_ID_ALLIANCE_AT3D    0x643d
+
+#define PCI_VENDOR_ID_SYSKONNECT       0x1148
+#define PCI_DEVICE_ID_SYSKONNECT_FP    0x4000
+#define PCI_DEVICE_ID_SYSKONNECT_TR    0x4200
+#define PCI_DEVICE_ID_SYSKONNECT_GE    0x4300
+
+#define PCI_VENDOR_ID_VMIC             0x114a
+#define PCI_DEVICE_ID_VMIC_VME         0x7587
+
+#define PCI_VENDOR_ID_DIGI             0x114f
+#define PCI_DEVICE_ID_DIGI_EPC         0x0002
+#define PCI_DEVICE_ID_DIGI_RIGHTSWITCH 0x0003
+#define PCI_DEVICE_ID_DIGI_XEM         0x0004
+#define PCI_DEVICE_ID_DIGI_XR          0x0005
+#define PCI_DEVICE_ID_DIGI_CX          0x0006
+#define PCI_DEVICE_ID_DIGI_XRJ         0x0009
+#define PCI_DEVICE_ID_DIGI_EPCJ                0x000a
+#define PCI_DEVICE_ID_DIGI_XR_920      0x0027
+#define PCI_DEVICE_ID_DIGI_DF_M_IOM2_E 0x0070
+#define PCI_DEVICE_ID_DIGI_DF_M_E      0x0071
+#define PCI_DEVICE_ID_DIGI_DF_M_IOM2_A 0x0072
+#define PCI_DEVICE_ID_DIGI_DF_M_A      0x0073
+
+#define PCI_VENDOR_ID_MUTECH           0x1159
+#define PCI_DEVICE_ID_MUTECH_MV1000    0x0001
+
+#define PCI_VENDOR_ID_XIRCOM           0x115d
+#define PCI_DEVICE_ID_XIRCOM_X3201_ETH 0x0003
+#define PCI_DEVICE_ID_XIRCOM_X3201_MDM 0x0103
+
+#define PCI_VENDOR_ID_RENDITION                0x1163
+#define PCI_DEVICE_ID_RENDITION_VERITE 0x0001
+#define PCI_DEVICE_ID_RENDITION_VERITE2100 0x2000
+
+#define PCI_VENDOR_ID_SERVERWORKS        0x1166
+#define PCI_DEVICE_ID_SERVERWORKS_HE     0x0008
+#define PCI_DEVICE_ID_SERVERWORKS_LE     0x0009
+#define PCI_DEVICE_ID_SERVERWORKS_CIOB30  0x0010
+#define PCI_DEVICE_ID_SERVERWORKS_CMIC_HE 0x0011
+#define PCI_DEVICE_ID_SERVERWORKS_GCNB_LE 0x0017
+#define PCI_DEVICE_ID_SERVERWORKS_OSB4   0x0200
+#define PCI_DEVICE_ID_SERVERWORKS_CSB5   0x0201
+#define PCI_DEVICE_ID_SERVERWORKS_CSB6    0x0203
+#define PCI_DEVICE_ID_SERVERWORKS_OSB4IDE 0x0211
+#define PCI_DEVICE_ID_SERVERWORKS_CSB5IDE 0x0212
+#define PCI_DEVICE_ID_SERVERWORKS_CSB6IDE 0x0213
+#define PCI_DEVICE_ID_SERVERWORKS_OSB4USB 0x0220
+#define PCI_DEVICE_ID_SERVERWORKS_CSB5USB PCI_DEVICE_ID_SERVERWORKS_OSB4USB
+#define PCI_DEVICE_ID_SERVERWORKS_CSB6USB 0x0221
+#define PCI_DEVICE_ID_SERVERWORKS_GCLE    0x0225
+#define PCI_DEVICE_ID_SERVERWORKS_GCLE2   0x0227
+#define PCI_DEVICE_ID_SERVERWORKS_CSB5ISA 0x0230
+
+#define PCI_VENDOR_ID_SBE              0x1176
+#define PCI_DEVICE_ID_SBE_WANXL100     0x0301
+#define PCI_DEVICE_ID_SBE_WANXL200     0x0302
+#define PCI_DEVICE_ID_SBE_WANXL400     0x0104
+
+#define PCI_VENDOR_ID_TOSHIBA          0x1179
+#define PCI_DEVICE_ID_TOSHIBA_601      0x0601
+#define PCI_DEVICE_ID_TOSHIBA_TOPIC95  0x060a
+#define PCI_DEVICE_ID_TOSHIBA_TOPIC97  0x060f
+
+#define PCI_VENDOR_ID_TOSHIBA_2                0x102f
+#define PCI_DEVICE_ID_TOSHIBA_TX3927   0x000a
+#define PCI_DEVICE_ID_TOSHIBA_TC35815CF        0x0030
+#define PCI_DEVICE_ID_TOSHIBA_TX4927   0x0180
+
+#define PCI_VENDOR_ID_RICOH            0x1180
+#define PCI_DEVICE_ID_RICOH_RL5C465    0x0465
+#define PCI_DEVICE_ID_RICOH_RL5C466    0x0466
+#define PCI_DEVICE_ID_RICOH_RL5C475    0x0475
+#define PCI_DEVICE_ID_RICOH_RL5C476    0x0476
+#define PCI_DEVICE_ID_RICOH_RL5C478    0x0478
+
+#define PCI_VENDOR_ID_ARTOP            0x1191
+#define PCI_DEVICE_ID_ARTOP_ATP8400    0x0004
+#define PCI_DEVICE_ID_ARTOP_ATP850UF   0x0005
+#define PCI_DEVICE_ID_ARTOP_ATP860     0x0006
+#define PCI_DEVICE_ID_ARTOP_ATP860R    0x0007
+#define PCI_DEVICE_ID_ARTOP_ATP865     0x0008
+#define PCI_DEVICE_ID_ARTOP_ATP865R    0x0009
+#define PCI_DEVICE_ID_ARTOP_AEC7610    0x8002
+#define PCI_DEVICE_ID_ARTOP_AEC7612UW  0x8010
+#define PCI_DEVICE_ID_ARTOP_AEC7612U   0x8020
+#define PCI_DEVICE_ID_ARTOP_AEC7612S   0x8030
+#define PCI_DEVICE_ID_ARTOP_AEC7612D   0x8040
+#define PCI_DEVICE_ID_ARTOP_AEC7612SUW 0x8050
+#define PCI_DEVICE_ID_ARTOP_8060       0x8060
+
+#define PCI_VENDOR_ID_ZEITNET          0x1193
+#define PCI_DEVICE_ID_ZEITNET_1221     0x0001
+#define PCI_DEVICE_ID_ZEITNET_1225     0x0002
+
+#define PCI_VENDOR_ID_OMEGA            0x119b
+#define PCI_DEVICE_ID_OMEGA_82C092G    0x1221
+
+#define PCI_VENDOR_ID_FUJITSU_ME       0x119e
+#define PCI_DEVICE_ID_FUJITSU_FS155    0x0001
+#define PCI_DEVICE_ID_FUJITSU_FS50     0x0003
+
+#define PCI_SUBVENDOR_ID_KEYSPAN       0x11a9
+#define PCI_SUBDEVICE_ID_KEYSPAN_SX2   0x5334
+
+#define PCI_VENDOR_ID_GALILEO          0x11ab
+#define PCI_DEVICE_ID_GALILEO_GT64011  0x4146
+#define PCI_DEVICE_ID_GALILEO_GT64111  0x4146
+#define PCI_DEVICE_ID_GALILEO_GT96100  0x9652
+#define PCI_DEVICE_ID_GALILEO_GT96100A 0x9653
+
+#define PCI_VENDOR_ID_LITEON           0x11ad
+#define PCI_DEVICE_ID_LITEON_LNE100TX  0x0002
+
+#define PCI_VENDOR_ID_V3               0x11b0
+#define PCI_DEVICE_ID_V3_V960          0x0001
+#define PCI_DEVICE_ID_V3_V350          0x0001
+#define PCI_DEVICE_ID_V3_V961          0x0002
+#define PCI_DEVICE_ID_V3_V351          0x0002
+
+#define PCI_VENDOR_ID_NP               0x11bc
+#define PCI_DEVICE_ID_NP_PCI_FDDI      0x0001
+
+#define PCI_VENDOR_ID_ATT              0x11c1
+#define PCI_DEVICE_ID_ATT_L56XMF       0x0440
+#define PCI_DEVICE_ID_ATT_VENUS_MODEM  0x480
+
+#define PCI_VENDOR_ID_SPECIALIX                0x11cb
+#define PCI_DEVICE_ID_SPECIALIX_IO8    0x2000
+#define PCI_DEVICE_ID_SPECIALIX_XIO    0x4000
+#define PCI_DEVICE_ID_SPECIALIX_RIO    0x8000
+#define PCI_SUBDEVICE_ID_SPECIALIX_SPEED4 0xa004
+
+#define PCI_VENDOR_ID_AURAVISION       0x11d1
+#define PCI_DEVICE_ID_AURAVISION_VXP524        0x01f7
+
+#define PCI_VENDOR_ID_ANALOG_DEVICES   0x11d4
+#define PCI_DEVICE_ID_AD1889JS         0x1889
+
+#define PCI_VENDOR_ID_IKON             0x11d5
+#define PCI_DEVICE_ID_IKON_10115       0x0115
+#define PCI_DEVICE_ID_IKON_10117       0x0117
+
+#define PCI_VENDOR_ID_ZORAN            0x11de
+#define PCI_DEVICE_ID_ZORAN_36057      0x6057
+#define PCI_DEVICE_ID_ZORAN_36120      0x6120
+
+#define PCI_VENDOR_ID_KINETIC          0x11f4
+#define PCI_DEVICE_ID_KINETIC_2915     0x2915
+
+#define PCI_VENDOR_ID_COMPEX           0x11f6
+#define PCI_DEVICE_ID_COMPEX_ENET100VG4        0x0112
+#define PCI_DEVICE_ID_COMPEX_RL2000    0x1401
+
+#define PCI_VENDOR_ID_RP               0x11fe
+#define PCI_DEVICE_ID_RP32INTF         0x0001
+#define PCI_DEVICE_ID_RP8INTF          0x0002
+#define PCI_DEVICE_ID_RP16INTF         0x0003
+#define PCI_DEVICE_ID_RP4QUAD          0x0004
+#define PCI_DEVICE_ID_RP8OCTA          0x0005
+#define PCI_DEVICE_ID_RP8J             0x0006
+#define PCI_DEVICE_ID_RPP4             0x000A
+#define PCI_DEVICE_ID_RPP8             0x000B
+#define PCI_DEVICE_ID_RP8M             0x000C
+
+#define PCI_VENDOR_ID_CYCLADES         0x120e
+#define PCI_DEVICE_ID_CYCLOM_Y_Lo      0x0100
+#define PCI_DEVICE_ID_CYCLOM_Y_Hi      0x0101
+#define PCI_DEVICE_ID_CYCLOM_4Y_Lo     0x0102
+#define PCI_DEVICE_ID_CYCLOM_4Y_Hi     0x0103
+#define PCI_DEVICE_ID_CYCLOM_8Y_Lo     0x0104
+#define PCI_DEVICE_ID_CYCLOM_8Y_Hi     0x0105
+#define PCI_DEVICE_ID_CYCLOM_Z_Lo      0x0200
+#define PCI_DEVICE_ID_CYCLOM_Z_Hi      0x0201
+#define PCI_DEVICE_ID_PC300_RX_2       0x0300
+#define PCI_DEVICE_ID_PC300_RX_1       0x0301
+#define PCI_DEVICE_ID_PC300_TE_2       0x0310
+#define PCI_DEVICE_ID_PC300_TE_1       0x0311
+
+#define PCI_VENDOR_ID_ESSENTIAL                0x120f
+#define PCI_DEVICE_ID_ESSENTIAL_ROADRUNNER     0x0001
+
+#define PCI_VENDOR_ID_O2               0x1217
+#define PCI_DEVICE_ID_O2_6729          0x6729
+#define PCI_DEVICE_ID_O2_6730          0x673a
+#define PCI_DEVICE_ID_O2_6832          0x6832
+#define PCI_DEVICE_ID_O2_6836          0x6836
+
+#define PCI_VENDOR_ID_3DFX             0x121a
+#define PCI_DEVICE_ID_3DFX_VOODOO      0x0001
+#define PCI_DEVICE_ID_3DFX_VOODOO2     0x0002
+#define PCI_DEVICE_ID_3DFX_BANSHEE     0x0003
+#define PCI_DEVICE_ID_3DFX_VOODOO3     0x0005
+
+#define PCI_VENDOR_ID_SIGMADES         0x1236
+#define PCI_DEVICE_ID_SIGMADES_6425    0x6401
+
+#define PCI_VENDOR_ID_CCUBE            0x123f
+
+#define PCI_VENDOR_ID_AVM              0x1244
+#define PCI_DEVICE_ID_AVM_B1           0x0700
+#define PCI_DEVICE_ID_AVM_C4           0x0800
+#define PCI_DEVICE_ID_AVM_A1           0x0a00
+#define PCI_DEVICE_ID_AVM_A1_V2                0x0e00
+#define PCI_DEVICE_ID_AVM_C2           0x1100
+#define PCI_DEVICE_ID_AVM_T1           0x1200
+
+#define PCI_VENDOR_ID_DIPIX            0x1246
+
+#define PCI_VENDOR_ID_STALLION         0x124d
+#define PCI_DEVICE_ID_STALLION_ECHPCI832 0x0000
+#define PCI_DEVICE_ID_STALLION_ECHPCI864 0x0002
+#define PCI_DEVICE_ID_STALLION_EIOPCI  0x0003
+
+#define PCI_VENDOR_ID_OPTIBASE         0x1255
+#define PCI_DEVICE_ID_OPTIBASE_FORGE   0x1110
+#define PCI_DEVICE_ID_OPTIBASE_FUSION  0x1210
+#define PCI_DEVICE_ID_OPTIBASE_VPLEX   0x2110
+#define PCI_DEVICE_ID_OPTIBASE_VPLEXCC 0x2120
+#define PCI_DEVICE_ID_OPTIBASE_VQUEST  0x2130
+
+#define PCI_VENDOR_ID_ESS              0x125d
+#define PCI_DEVICE_ID_ESS_ESS1968      0x1968
+#define PCI_DEVICE_ID_ESS_AUDIOPCI     0x1969
+#define PCI_DEVICE_ID_ESS_ESS1978      0x1978
+
+#define PCI_VENDOR_ID_SATSAGEM         0x1267
+#define PCI_DEVICE_ID_SATSAGEM_NICCY   0x1016
+#define PCI_DEVICE_ID_SATSAGEM_PCR2101 0x5352
+#define PCI_DEVICE_ID_SATSAGEM_TELSATTURBO 0x5a4b
+
+#define PCI_VENDOR_ID_HUGHES           0x1273
+#define PCI_DEVICE_ID_HUGHES_DIRECPC   0x0002
+
+#define PCI_VENDOR_ID_ENSONIQ          0x1274
+#define PCI_DEVICE_ID_ENSONIQ_CT5880   0x5880
+#define PCI_DEVICE_ID_ENSONIQ_ES1370   0x5000
+#define PCI_DEVICE_ID_ENSONIQ_ES1371   0x1371
+
+#define PCI_VENDOR_ID_ROCKWELL         0x127A
+
+#define PCI_VENDOR_ID_ITE              0x1283
+#define PCI_DEVICE_ID_ITE_IT8172G      0x8172
+#define PCI_DEVICE_ID_ITE_IT8172G_AUDIO 0x0801
+#define PCI_DEVICE_ID_ITE_8872         0x8872
+
+#define PCI_DEVICE_ID_ITE_IT8330G_0    0xe886
+
+/* formerly Platform Tech */
+#define PCI_VENDOR_ID_ESS_OLD          0x1285
+#define PCI_DEVICE_ID_ESS_ESS0100      0x0100
+
+#define PCI_VENDOR_ID_ALTEON           0x12ae
+#define PCI_DEVICE_ID_ALTEON_ACENIC    0x0001
+
+#define PCI_VENDOR_ID_USR              0x12B9
+
+#define PCI_SUBVENDOR_ID_CONNECT_TECH                  0x12c4
+#define PCI_SUBDEVICE_ID_CONNECT_TECH_BH8_232          0x0001
+#define PCI_SUBDEVICE_ID_CONNECT_TECH_BH4_232          0x0002
+#define PCI_SUBDEVICE_ID_CONNECT_TECH_BH2_232          0x0003
+#define PCI_SUBDEVICE_ID_CONNECT_TECH_BH8_485          0x0004
+#define PCI_SUBDEVICE_ID_CONNECT_TECH_BH8_485_4_4      0x0005
+#define PCI_SUBDEVICE_ID_CONNECT_TECH_BH4_485          0x0006
+#define PCI_SUBDEVICE_ID_CONNECT_TECH_BH4_485_2_2      0x0007
+#define PCI_SUBDEVICE_ID_CONNECT_TECH_BH2_485          0x0008
+#define PCI_SUBDEVICE_ID_CONNECT_TECH_BH8_485_2_6      0x0009
+#define PCI_SUBDEVICE_ID_CONNECT_TECH_BH081101V1       0x000A
+#define PCI_SUBDEVICE_ID_CONNECT_TECH_BH041101V1       0x000B
+
+#define PCI_VENDOR_ID_PICTUREL         0x12c5
+#define PCI_DEVICE_ID_PICTUREL_PCIVST  0x0081
+
+#define PCI_VENDOR_ID_NVIDIA_SGS       0x12d2
+#define PCI_DEVICE_ID_NVIDIA_SGS_RIVA128 0x0018
+
+#define PCI_SUBVENDOR_ID_CHASE_PCIFAST         0x12E0
+#define PCI_SUBDEVICE_ID_CHASE_PCIFAST4                0x0031
+#define PCI_SUBDEVICE_ID_CHASE_PCIFAST8                0x0021
+#define PCI_SUBDEVICE_ID_CHASE_PCIFAST16       0x0011
+#define PCI_SUBDEVICE_ID_CHASE_PCIFAST16FMC    0x0041
+#define PCI_SUBVENDOR_ID_CHASE_PCIRAS          0x124D
+#define PCI_SUBDEVICE_ID_CHASE_PCIRAS4         0xF001
+#define PCI_SUBDEVICE_ID_CHASE_PCIRAS8         0xF010
+
+#define PCI_VENDOR_ID_AUREAL           0x12eb
+#define PCI_DEVICE_ID_AUREAL_VORTEX_1  0x0001
+#define PCI_DEVICE_ID_AUREAL_VORTEX_2  0x0002
+
+#define PCI_VENDOR_ID_CBOARDS          0x1307
+#define PCI_DEVICE_ID_CBOARDS_DAS1602_16 0x0001
+
+#define PCI_VENDOR_ID_SIIG             0x131f
+#define PCI_DEVICE_ID_SIIG_1S_10x_550  0x1000
+#define PCI_DEVICE_ID_SIIG_1S_10x_650  0x1001
+#define PCI_DEVICE_ID_SIIG_1S_10x_850  0x1002
+#define PCI_DEVICE_ID_SIIG_1S1P_10x_550        0x1010
+#define PCI_DEVICE_ID_SIIG_1S1P_10x_650        0x1011
+#define PCI_DEVICE_ID_SIIG_1S1P_10x_850        0x1012
+#define PCI_DEVICE_ID_SIIG_1P_10x      0x1020
+#define PCI_DEVICE_ID_SIIG_2P_10x      0x1021
+#define PCI_DEVICE_ID_SIIG_2S_10x_550  0x1030
+#define PCI_DEVICE_ID_SIIG_2S_10x_650  0x1031
+#define PCI_DEVICE_ID_SIIG_2S_10x_850  0x1032
+#define PCI_DEVICE_ID_SIIG_2S1P_10x_550        0x1034
+#define PCI_DEVICE_ID_SIIG_2S1P_10x_650        0x1035
+#define PCI_DEVICE_ID_SIIG_2S1P_10x_850        0x1036
+#define PCI_DEVICE_ID_SIIG_4S_10x_550  0x1050
+#define PCI_DEVICE_ID_SIIG_4S_10x_650  0x1051
+#define PCI_DEVICE_ID_SIIG_4S_10x_850  0x1052
+#define PCI_DEVICE_ID_SIIG_1S_20x_550  0x2000
+#define PCI_DEVICE_ID_SIIG_1S_20x_650  0x2001
+#define PCI_DEVICE_ID_SIIG_1S_20x_850  0x2002
+#define PCI_DEVICE_ID_SIIG_1P_20x      0x2020
+#define PCI_DEVICE_ID_SIIG_2P_20x      0x2021
+#define PCI_DEVICE_ID_SIIG_2S_20x_550  0x2030
+#define PCI_DEVICE_ID_SIIG_2S_20x_650  0x2031
+#define PCI_DEVICE_ID_SIIG_2S_20x_850  0x2032
+#define PCI_DEVICE_ID_SIIG_2P1S_20x_550        0x2040
+#define PCI_DEVICE_ID_SIIG_2P1S_20x_650        0x2041
+#define PCI_DEVICE_ID_SIIG_2P1S_20x_850        0x2042
+#define PCI_DEVICE_ID_SIIG_1S1P_20x_550        0x2010
+#define PCI_DEVICE_ID_SIIG_1S1P_20x_650        0x2011
+#define PCI_DEVICE_ID_SIIG_1S1P_20x_850        0x2012
+#define PCI_DEVICE_ID_SIIG_4S_20x_550  0x2050
+#define PCI_DEVICE_ID_SIIG_4S_20x_650  0x2051
+#define PCI_DEVICE_ID_SIIG_4S_20x_850  0x2052
+#define PCI_DEVICE_ID_SIIG_2S1P_20x_550        0x2060
+#define PCI_DEVICE_ID_SIIG_2S1P_20x_650        0x2061
+#define PCI_DEVICE_ID_SIIG_2S1P_20x_850        0x2062
+
+#define PCI_VENDOR_ID_DOMEX            0x134a
+#define PCI_DEVICE_ID_DOMEX_DMX3191D   0x0001
+
+#define PCI_VENDOR_ID_QUATECH          0x135C
+#define PCI_DEVICE_ID_QUATECH_QSC100   0x0010
+#define PCI_DEVICE_ID_QUATECH_DSC100   0x0020
+#define PCI_DEVICE_ID_QUATECH_DSC200   0x0030
+#define PCI_DEVICE_ID_QUATECH_QSC200   0x0040
+#define PCI_DEVICE_ID_QUATECH_ESC100D  0x0050
+#define PCI_DEVICE_ID_QUATECH_ESC100M  0x0060
+
+#define PCI_VENDOR_ID_SEALEVEL         0x135e
+#define PCI_DEVICE_ID_SEALEVEL_U530    0x7101
+#define PCI_DEVICE_ID_SEALEVEL_UCOMM2  0x7201
+#define PCI_DEVICE_ID_SEALEVEL_UCOMM422        0x7402
+#define PCI_DEVICE_ID_SEALEVEL_UCOMM232        0x7202
+#define PCI_DEVICE_ID_SEALEVEL_COMM4   0x7401
+#define PCI_DEVICE_ID_SEALEVEL_COMM8   0x7801
+
+#define PCI_VENDOR_ID_HYPERCOPE                0x1365
+#define PCI_DEVICE_ID_HYPERCOPE_PLX    0x9050
+#define PCI_SUBDEVICE_ID_HYPERCOPE_OLD_ERGO    0x0104
+#define PCI_SUBDEVICE_ID_HYPERCOPE_ERGO                0x0106
+#define PCI_SUBDEVICE_ID_HYPERCOPE_METRO       0x0107
+#define PCI_SUBDEVICE_ID_HYPERCOPE_CHAMP2      0x0108
+#define PCI_SUBDEVICE_ID_HYPERCOPE_PLEXUS      0x0109
+
+#define PCI_VENDOR_ID_KAWASAKI         0x136b
+#define PCI_DEVICE_ID_MCHIP_KL5A72002  0xff01
+
+#define PCI_VENDOR_ID_LMC              0x1376
+#define PCI_DEVICE_ID_LMC_HSSI         0x0003
+#define PCI_DEVICE_ID_LMC_DS3          0x0004
+#define PCI_DEVICE_ID_LMC_SSI          0x0005
+#define PCI_DEVICE_ID_LMC_T1           0x0006
+
+#define PCI_VENDOR_ID_NETGEAR          0x1385
+#define PCI_DEVICE_ID_NETGEAR_GA620    0x620a
+#define PCI_DEVICE_ID_NETGEAR_GA622    0x622a
+
+#define PCI_VENDOR_ID_APPLICOM         0x1389
+#define PCI_DEVICE_ID_APPLICOM_PCIGENERIC 0x0001
+#define PCI_DEVICE_ID_APPLICOM_PCI2000IBS_CAN 0x0002
+#define PCI_DEVICE_ID_APPLICOM_PCI2000PFB 0x0003
+
+#define PCI_VENDOR_ID_MOXA             0x1393
+#define PCI_DEVICE_ID_MOXA_C104                0x1040
+#define PCI_DEVICE_ID_MOXA_C168                0x1680
+#define PCI_DEVICE_ID_MOXA_CP204J      0x2040
+#define PCI_DEVICE_ID_MOXA_C218                0x2180
+#define PCI_DEVICE_ID_MOXA_C320                0x3200
+
+#define PCI_VENDOR_ID_CCD              0x1397
+#define PCI_DEVICE_ID_CCD_2BD0         0x2bd0
+#define PCI_DEVICE_ID_CCD_B000         0xb000
+#define PCI_DEVICE_ID_CCD_B006         0xb006
+#define PCI_DEVICE_ID_CCD_B007         0xb007
+#define PCI_DEVICE_ID_CCD_B008         0xb008
+#define PCI_DEVICE_ID_CCD_B009         0xb009
+#define PCI_DEVICE_ID_CCD_B00A         0xb00a
+#define PCI_DEVICE_ID_CCD_B00B         0xb00b
+#define PCI_DEVICE_ID_CCD_B00C         0xb00c
+#define PCI_DEVICE_ID_CCD_B100         0xb100
+
+#define PCI_VENDOR_ID_3WARE            0x13C1
+#define PCI_DEVICE_ID_3WARE_1000       0x1000
+
+#define PCI_VENDOR_ID_ABOCOM           0x13D1
+#define PCI_DEVICE_ID_ABOCOM_2BD1       0x2BD1
+
+#define PCI_VENDOR_ID_CMEDIA           0x13f6
+#define PCI_DEVICE_ID_CMEDIA_CM8338A   0x0100
+#define PCI_DEVICE_ID_CMEDIA_CM8338B   0x0101
+#define PCI_DEVICE_ID_CMEDIA_CM8738    0x0111
+#define PCI_DEVICE_ID_CMEDIA_CM8738B   0x0112
+
+#define PCI_VENDOR_ID_LAVA             0x1407
+#define PCI_DEVICE_ID_LAVA_DSERIAL     0x0100 /* 2x 16550 */
+#define PCI_DEVICE_ID_LAVA_QUATRO_A    0x0101 /* 2x 16550, half of 4 port */
+#define PCI_DEVICE_ID_LAVA_QUATRO_B    0x0102 /* 2x 16550, half of 4 port */
+#define PCI_DEVICE_ID_LAVA_OCTO_A      0x0180 /* 4x 16550A, half of 8 port */
+#define PCI_DEVICE_ID_LAVA_OCTO_B      0x0181 /* 4x 16550A, half of 8 port */
+#define PCI_DEVICE_ID_LAVA_PORT_PLUS   0x0200 /* 2x 16650 */
+#define PCI_DEVICE_ID_LAVA_QUAD_A      0x0201 /* 2x 16650, half of 4 port */
+#define PCI_DEVICE_ID_LAVA_QUAD_B      0x0202 /* 2x 16650, half of 4 port */
+#define PCI_DEVICE_ID_LAVA_SSERIAL     0x0500 /* 1x 16550 */
+#define PCI_DEVICE_ID_LAVA_PORT_650    0x0600 /* 1x 16650 */
+#define PCI_DEVICE_ID_LAVA_PARALLEL    0x8000
+#define PCI_DEVICE_ID_LAVA_DUAL_PAR_A  0x8002 /* The Lava Dual Parallel is */
+#define PCI_DEVICE_ID_LAVA_DUAL_PAR_B  0x8003 /* two PCI devices on a card */
+#define PCI_DEVICE_ID_LAVA_BOCA_IOPPAR 0x8800
+
+#define PCI_VENDOR_ID_TIMEDIA          0x1409
+#define PCI_DEVICE_ID_TIMEDIA_1889     0x7168
+
+#define PCI_VENDOR_ID_OXSEMI           0x1415
+#define PCI_DEVICE_ID_OXSEMI_12PCI840  0x8403
+#define PCI_DEVICE_ID_OXSEMI_16PCI954  0x9501
+#define PCI_DEVICE_ID_OXSEMI_16PCI95N  0x9511
+#define PCI_DEVICE_ID_OXSEMI_16PCI954PP        0x9513
+#define PCI_DEVICE_ID_OXSEMI_16PCI952  0x9521
+
+#define PCI_VENDOR_ID_AIRONET          0x14b9
+#define PCI_DEVICE_ID_AIRONET_4800_1   0x0001
+#define PCI_DEVICE_ID_AIRONET_4800     0x4500 // values switched?  see
+#define PCI_DEVICE_ID_AIRONET_4500     0x4800 // drivers/net/aironet4500_card.c
+
+#define PCI_VENDOR_ID_TITAN            0x14D2
+#define PCI_DEVICE_ID_TITAN_010L       0x8001
+#define PCI_DEVICE_ID_TITAN_100L       0x8010
+#define PCI_DEVICE_ID_TITAN_110L       0x8011
+#define PCI_DEVICE_ID_TITAN_200L       0x8020
+#define PCI_DEVICE_ID_TITAN_210L       0x8021
+#define PCI_DEVICE_ID_TITAN_400L       0x8040
+#define PCI_DEVICE_ID_TITAN_800L       0x8080
+#define PCI_DEVICE_ID_TITAN_100                0xA001
+#define PCI_DEVICE_ID_TITAN_200                0xA005
+#define PCI_DEVICE_ID_TITAN_400                0xA003
+#define PCI_DEVICE_ID_TITAN_800B       0xA004
+
+#define PCI_VENDOR_ID_PANACOM          0x14d4
+#define PCI_DEVICE_ID_PANACOM_QUADMODEM        0x0400
+#define PCI_DEVICE_ID_PANACOM_DUALMODEM        0x0402
+
+#define PCI_VENDOR_ID_AFAVLAB          0x14db
+#define PCI_DEVICE_ID_AFAVLAB_P028     0x2180
+
+#define PCI_VENDOR_ID_BROADCOM         0x14e4
+#define PCI_DEVICE_ID_TIGON3_5700      0x1644
+#define PCI_DEVICE_ID_TIGON3_5701      0x1645
+#define PCI_DEVICE_ID_TIGON3_5702      0x1646
+#define PCI_DEVICE_ID_TIGON3_5703      0x1647
+#define PCI_DEVICE_ID_TIGON3_5702FE    0x164d
+#define PCI_DEVICE_ID_TIGON3_5702X     0x16a6
+#define PCI_DEVICE_ID_TIGON3_5703X     0x16a7
+
+#define PCI_VENDOR_ID_SYBA             0x1592
+#define PCI_DEVICE_ID_SYBA_2P_EPP      0x0782
+#define PCI_DEVICE_ID_SYBA_1P_ECP      0x0783
+
+#define PCI_VENDOR_ID_MORETON          0x15aa
+#define PCI_DEVICE_ID_RASTEL_2PORT     0x2000
+
+#define PCI_VENDOR_ID_ZOLTRIX          0x15b0
+#define PCI_DEVICE_ID_ZOLTRIX_2BD0     0x2bd0 
+
+#define PCI_VENDOR_ID_PDC              0x15e9
+#define PCI_DEVICE_ID_PDC_1841         0x1841
+
+#define PCI_VENDOR_ID_ALTIMA           0x173b
+#define PCI_DEVICE_ID_ALTIMA_AC1000    0x03e8
+#define PCI_DEVICE_ID_ALTIMA_AC9100    0x03ea
+
+#define PCI_VENDOR_ID_SYMPHONY         0x1c1c
+#define PCI_DEVICE_ID_SYMPHONY_101     0x0001
+
+#define PCI_VENDOR_ID_TEKRAM           0x1de1
+#define PCI_DEVICE_ID_TEKRAM_DC290     0xdc29
+
+#define PCI_VENDOR_ID_3DLABS           0x3d3d
+#define PCI_DEVICE_ID_3DLABS_300SX     0x0001
+#define PCI_DEVICE_ID_3DLABS_500TX     0x0002
+#define PCI_DEVICE_ID_3DLABS_DELTA     0x0003
+#define PCI_DEVICE_ID_3DLABS_PERMEDIA  0x0004
+#define PCI_DEVICE_ID_3DLABS_MX                0x0006
+#define PCI_DEVICE_ID_3DLABS_PERMEDIA2 0x0007
+#define PCI_DEVICE_ID_3DLABS_GAMMA     0x0008
+#define PCI_DEVICE_ID_3DLABS_PERMEDIA2V        0x0009
+
+#define PCI_VENDOR_ID_AVANCE           0x4005
+#define PCI_DEVICE_ID_AVANCE_ALG2064   0x2064
+#define PCI_DEVICE_ID_AVANCE_2302      0x2302
+
+#define PCI_VENDOR_ID_AKS              0x416c
+#define PCI_DEVICE_ID_AKS_ALADDINCARD  0x0100
+#define PCI_DEVICE_ID_AKS_CPC          0x0200
+
+#define PCI_VENDOR_ID_NETVIN           0x4a14
+#define PCI_DEVICE_ID_NETVIN_NV5000SC  0x5000
+
+#define PCI_VENDOR_ID_S3               0x5333
+#define PCI_DEVICE_ID_S3_PLATO_PXS     0x0551
+#define PCI_DEVICE_ID_S3_ViRGE         0x5631
+#define PCI_DEVICE_ID_S3_TRIO          0x8811
+#define PCI_DEVICE_ID_S3_AURORA64VP    0x8812
+#define PCI_DEVICE_ID_S3_TRIO64UVP     0x8814
+#define PCI_DEVICE_ID_S3_ViRGE_VX      0x883d
+#define PCI_DEVICE_ID_S3_868           0x8880
+#define PCI_DEVICE_ID_S3_928           0x88b0
+#define PCI_DEVICE_ID_S3_864_1         0x88c0
+#define PCI_DEVICE_ID_S3_864_2         0x88c1
+#define PCI_DEVICE_ID_S3_964_1         0x88d0
+#define PCI_DEVICE_ID_S3_964_2         0x88d1
+#define PCI_DEVICE_ID_S3_968           0x88f0
+#define PCI_DEVICE_ID_S3_TRIO64V2      0x8901
+#define PCI_DEVICE_ID_S3_PLATO_PXG     0x8902
+#define PCI_DEVICE_ID_S3_ViRGE_DXGX    0x8a01
+#define PCI_DEVICE_ID_S3_ViRGE_GX2     0x8a10
+#define PCI_DEVICE_ID_S3_ViRGE_MX      0x8c01
+#define PCI_DEVICE_ID_S3_ViRGE_MXP     0x8c02
+#define PCI_DEVICE_ID_S3_ViRGE_MXPMV   0x8c03
+#define PCI_DEVICE_ID_S3_SONICVIBES    0xca00
+
+#define PCI_VENDOR_ID_DUNORD           0x5544
+#define PCI_DEVICE_ID_DUNORD_I3000     0x0001
+#define PCI_VENDOR_ID_GENROCO          0x5555
+#define PCI_DEVICE_ID_GENROCO_HFP832   0x0003
+
+#define PCI_VENDOR_ID_DCI              0x6666
+#define PCI_DEVICE_ID_DCI_PCCOM4       0x0001
+#define PCI_DEVICE_ID_DCI_PCCOM8       0x0002
+
+#define PCI_VENDOR_ID_INTEL            0x8086
+#define PCI_DEVICE_ID_INTEL_21145      0x0039
+#define PCI_DEVICE_ID_INTEL_82375      0x0482
+#define PCI_DEVICE_ID_INTEL_82424      0x0483
+#define PCI_DEVICE_ID_INTEL_82378      0x0484
+#define PCI_DEVICE_ID_INTEL_82430      0x0486
+#define PCI_DEVICE_ID_INTEL_82434      0x04a3
+#define PCI_DEVICE_ID_INTEL_I960       0x0960
+#define PCI_DEVICE_ID_INTEL_82562ET    0x1031
+#define PCI_DEVICE_ID_INTEL_82559ER    0x1209
+#define PCI_DEVICE_ID_INTEL_82092AA_0  0x1221
+#define PCI_DEVICE_ID_INTEL_82092AA_1  0x1222
+#define PCI_DEVICE_ID_INTEL_7116       0x1223
+#define PCI_DEVICE_ID_INTEL_82596      0x1226
+#define PCI_DEVICE_ID_INTEL_82865      0x1227
+#define PCI_DEVICE_ID_INTEL_82557      0x1229
+#define PCI_DEVICE_ID_INTEL_82437      0x122d
+#define PCI_DEVICE_ID_INTEL_82371FB_0  0x122e
+#define PCI_DEVICE_ID_INTEL_82371FB_1  0x1230
+#define PCI_DEVICE_ID_INTEL_82371MX    0x1234
+#define PCI_DEVICE_ID_INTEL_82437MX    0x1235
+#define PCI_DEVICE_ID_INTEL_82441      0x1237
+#define PCI_DEVICE_ID_INTEL_82380FB    0x124b
+#define PCI_DEVICE_ID_INTEL_82439      0x1250
+#define PCI_DEVICE_ID_INTEL_80960_RP   0x1960
+#define PCI_DEVICE_ID_INTEL_82371SB_0  0x7000
+#define PCI_DEVICE_ID_INTEL_82371SB_1  0x7010
+#define PCI_DEVICE_ID_INTEL_82371SB_2  0x7020
+#define PCI_DEVICE_ID_INTEL_82437VX    0x7030
+#define PCI_DEVICE_ID_INTEL_82439TX    0x7100
+#define PCI_DEVICE_ID_INTEL_82371AB_0  0x7110
+#define PCI_DEVICE_ID_INTEL_82371AB    0x7111
+#define PCI_DEVICE_ID_INTEL_82371AB_2  0x7112
+#define PCI_DEVICE_ID_INTEL_82371AB_3  0x7113
+#define PCI_DEVICE_ID_INTEL_82801AA_0  0x2410
+#define PCI_DEVICE_ID_INTEL_82801AA_1  0x2411
+#define PCI_DEVICE_ID_INTEL_82801AA_2  0x2412
+#define PCI_DEVICE_ID_INTEL_82801AA_3  0x2413
+#define PCI_DEVICE_ID_INTEL_82801AA_5  0x2415
+#define PCI_DEVICE_ID_INTEL_82801AA_6  0x2416
+#define PCI_DEVICE_ID_INTEL_82801AA_8  0x2418
+#define PCI_DEVICE_ID_INTEL_82801AB_0  0x2420
+#define PCI_DEVICE_ID_INTEL_82801AB_1  0x2421
+#define PCI_DEVICE_ID_INTEL_82801AB_2  0x2422
+#define PCI_DEVICE_ID_INTEL_82801AB_3  0x2423
+#define PCI_DEVICE_ID_INTEL_82801AB_5  0x2425
+#define PCI_DEVICE_ID_INTEL_82801AB_6  0x2426
+#define PCI_DEVICE_ID_INTEL_82801AB_8  0x2428
+#define PCI_DEVICE_ID_INTEL_82801BA_0  0x2440
+#define PCI_DEVICE_ID_INTEL_82801BA_1  0x2442
+#define PCI_DEVICE_ID_INTEL_82801BA_2  0x2443
+#define PCI_DEVICE_ID_INTEL_82801BA_3  0x2444
+#define PCI_DEVICE_ID_INTEL_82801BA_4  0x2445
+#define PCI_DEVICE_ID_INTEL_82801BA_5  0x2446
+#define PCI_DEVICE_ID_INTEL_82801BA_6  0x2448
+#define PCI_DEVICE_ID_INTEL_82801BA_7  0x2449
+#define PCI_DEVICE_ID_INTEL_82801BA_8  0x244a
+#define PCI_DEVICE_ID_INTEL_82801BA_9  0x244b
+#define PCI_DEVICE_ID_INTEL_82801BA_10 0x244c
+#define PCI_DEVICE_ID_INTEL_82801BA_11 0x244e
+#define PCI_DEVICE_ID_INTEL_82801E_0   0x2450
+#define PCI_DEVICE_ID_INTEL_82801E_2   0x2452
+#define PCI_DEVICE_ID_INTEL_82801E_3   0x2453
+#define PCI_DEVICE_ID_INTEL_82801E_9   0x2459
+#define PCI_DEVICE_ID_INTEL_82801E_11  0x245b
+#define PCI_DEVICE_ID_INTEL_82801E_13  0x245d
+#define PCI_DEVICE_ID_INTEL_82801E_14  0x245e
+#define PCI_DEVICE_ID_INTEL_82801CA_0  0x2480
+#define PCI_DEVICE_ID_INTEL_82801CA_2  0x2482
+#define PCI_DEVICE_ID_INTEL_82801CA_3  0x2483
+#define PCI_DEVICE_ID_INTEL_82801CA_4  0x2484
+#define PCI_DEVICE_ID_INTEL_82801CA_5  0x2485
+#define PCI_DEVICE_ID_INTEL_82801CA_6  0x2486
+#define PCI_DEVICE_ID_INTEL_82801CA_7  0x2487
+#define PCI_DEVICE_ID_INTEL_82801CA_10 0x248a
+#define PCI_DEVICE_ID_INTEL_82801CA_11 0x248b
+#define PCI_DEVICE_ID_INTEL_82801CA_12 0x248c
+#define PCI_DEVICE_ID_INTEL_82801DB_0  0x24c0
+#define PCI_DEVICE_ID_INTEL_82801DB_2  0x24c2
+#define PCI_DEVICE_ID_INTEL_82801DB_3  0x24c3
+#define PCI_DEVICE_ID_INTEL_82801DB_4  0x24c4
+#define PCI_DEVICE_ID_INTEL_82801DB_5  0x24c5
+#define PCI_DEVICE_ID_INTEL_82801DB_6  0x24c6
+#define PCI_DEVICE_ID_INTEL_82801DB_7  0x24c7
+#define PCI_DEVICE_ID_INTEL_82801DB_11 0x24cb
+#define PCI_DEVICE_ID_INTEL_82801DB_13 0x24cd
+#define PCI_DEVICE_ID_INTEL_80310      0x530d
+#define PCI_DEVICE_ID_INTEL_82810_MC1  0x7120
+#define PCI_DEVICE_ID_INTEL_82810_IG1  0x7121
+#define PCI_DEVICE_ID_INTEL_82810_MC3  0x7122
+#define PCI_DEVICE_ID_INTEL_82810_IG3  0x7123
+#define PCI_DEVICE_ID_INTEL_82443LX_0  0x7180
+#define PCI_DEVICE_ID_INTEL_82443LX_1  0x7181
+#define PCI_DEVICE_ID_INTEL_82443BX_0  0x7190
+#define PCI_DEVICE_ID_INTEL_82443BX_1  0x7191
+#define PCI_DEVICE_ID_INTEL_82443BX_2  0x7192
+#define PCI_DEVICE_ID_INTEL_82443MX_0  0x7198
+#define PCI_DEVICE_ID_INTEL_82443MX_1  0x7199
+#define PCI_DEVICE_ID_INTEL_82443MX_2  0x719a
+#define PCI_DEVICE_ID_INTEL_82443MX_3  0x719b
+#define PCI_DEVICE_ID_INTEL_82372FB_0  0x7600
+#define PCI_DEVICE_ID_INTEL_82372FB_1  0x7601
+#define PCI_DEVICE_ID_INTEL_82372FB_2  0x7602
+#define PCI_DEVICE_ID_INTEL_82372FB_3  0x7603
+#define PCI_DEVICE_ID_INTEL_82454GX    0x84c4
+#define PCI_DEVICE_ID_INTEL_82450GX    0x84c5
+#define PCI_DEVICE_ID_INTEL_82451NX    0x84ca
+
+#define PCI_VENDOR_ID_COMPUTONE                0x8e0e
+#define PCI_DEVICE_ID_COMPUTONE_IP2EX  0x0291
+#define PCI_DEVICE_ID_COMPUTONE_PG     0x0302
+#define PCI_SUBVENDOR_ID_COMPUTONE     0x8e0e
+#define PCI_SUBDEVICE_ID_COMPUTONE_PG4 0x0001
+#define PCI_SUBDEVICE_ID_COMPUTONE_PG8 0x0002
+#define PCI_SUBDEVICE_ID_COMPUTONE_PG6 0x0003
+
+#define PCI_VENDOR_ID_KTI              0x8e2e
+#define PCI_DEVICE_ID_KTI_ET32P2       0x3000
+
+#define PCI_VENDOR_ID_ADAPTEC          0x9004
+#define PCI_DEVICE_ID_ADAPTEC_7810     0x1078
+#define PCI_DEVICE_ID_ADAPTEC_7821     0x2178
+#define PCI_DEVICE_ID_ADAPTEC_38602    0x3860
+#define PCI_DEVICE_ID_ADAPTEC_7850     0x5078
+#define PCI_DEVICE_ID_ADAPTEC_7855     0x5578
+#define PCI_DEVICE_ID_ADAPTEC_5800     0x5800
+#define PCI_DEVICE_ID_ADAPTEC_3860     0x6038
+#define PCI_DEVICE_ID_ADAPTEC_1480A    0x6075
+#define PCI_DEVICE_ID_ADAPTEC_7860     0x6078
+#define PCI_DEVICE_ID_ADAPTEC_7861     0x6178
+#define PCI_DEVICE_ID_ADAPTEC_7870     0x7078
+#define PCI_DEVICE_ID_ADAPTEC_7871     0x7178
+#define PCI_DEVICE_ID_ADAPTEC_7872     0x7278
+#define PCI_DEVICE_ID_ADAPTEC_7873     0x7378
+#define PCI_DEVICE_ID_ADAPTEC_7874     0x7478
+#define PCI_DEVICE_ID_ADAPTEC_7895     0x7895
+#define PCI_DEVICE_ID_ADAPTEC_7880     0x8078
+#define PCI_DEVICE_ID_ADAPTEC_7881     0x8178
+#define PCI_DEVICE_ID_ADAPTEC_7882     0x8278
+#define PCI_DEVICE_ID_ADAPTEC_7883     0x8378
+#define PCI_DEVICE_ID_ADAPTEC_7884     0x8478
+#define PCI_DEVICE_ID_ADAPTEC_7885     0x8578
+#define PCI_DEVICE_ID_ADAPTEC_7886     0x8678
+#define PCI_DEVICE_ID_ADAPTEC_7887     0x8778
+#define PCI_DEVICE_ID_ADAPTEC_7888     0x8878
+#define PCI_DEVICE_ID_ADAPTEC_1030     0x8b78
+
+#define PCI_VENDOR_ID_ADAPTEC2         0x9005
+#define PCI_DEVICE_ID_ADAPTEC2_2940U2  0x0010
+#define PCI_DEVICE_ID_ADAPTEC2_2930U2  0x0011
+#define PCI_DEVICE_ID_ADAPTEC2_7890B   0x0013
+#define PCI_DEVICE_ID_ADAPTEC2_7890    0x001f
+#define PCI_DEVICE_ID_ADAPTEC2_3940U2  0x0050
+#define PCI_DEVICE_ID_ADAPTEC2_3950U2D 0x0051
+#define PCI_DEVICE_ID_ADAPTEC2_7896    0x005f
+#define PCI_DEVICE_ID_ADAPTEC2_7892A   0x0080
+#define PCI_DEVICE_ID_ADAPTEC2_7892B   0x0081
+#define PCI_DEVICE_ID_ADAPTEC2_7892D   0x0083
+#define PCI_DEVICE_ID_ADAPTEC2_7892P   0x008f
+#define PCI_DEVICE_ID_ADAPTEC2_7899A   0x00c0
+#define PCI_DEVICE_ID_ADAPTEC2_7899B   0x00c1
+#define PCI_DEVICE_ID_ADAPTEC2_7899D   0x00c3
+#define PCI_DEVICE_ID_ADAPTEC2_7899P   0x00cf
+
+#define PCI_VENDOR_ID_ATRONICS         0x907f
+#define PCI_DEVICE_ID_ATRONICS_2015    0x2015
+
+#define PCI_VENDOR_ID_HOLTEK           0x9412
+#define PCI_DEVICE_ID_HOLTEK_6565      0x6565
+
+#define PCI_VENDOR_ID_NETMOS           0x9710
+#define PCI_DEVICE_ID_NETMOS_9735      0x9735
+#define PCI_DEVICE_ID_NETMOS_9835      0x9835
+
+#define PCI_SUBVENDOR_ID_EXSYS         0xd84d
+#define PCI_SUBDEVICE_ID_EXSYS_4014    0x4014
+
+#define PCI_VENDOR_ID_TIGERJET         0xe159
+#define PCI_DEVICE_ID_TIGERJET_300     0x0001
+#define PCI_DEVICE_ID_TIGERJET_100     0x0002
+
+#define PCI_VENDOR_ID_ARK              0xedd8
+#define PCI_DEVICE_ID_ARK_STING                0xa091
+#define PCI_DEVICE_ID_ARK_STINGARK     0xa099
+#define PCI_DEVICE_ID_ARK_2000MT       0xa0a1
+
+#define PCI_VENDOR_ID_MICROGATE                0x13c0
+#define PCI_DEVICE_ID_MICROGATE_USC    0x0010
+#define PCI_DEVICE_ID_MICROGATE_SCC    0x0020
+#define PCI_DEVICE_ID_MICROGATE_SCA    0x0030
diff --git a/xen-2.4.16/include/xeno/pkt_sched.h b/xen-2.4.16/include/xeno/pkt_sched.h
new file mode 100644 (file)
index 0000000..87b1a08
--- /dev/null
@@ -0,0 +1,816 @@
+#ifndef __NET_PKT_SCHED_H
+#define __NET_PKT_SCHED_H
+
+#define PSCHED_GETTIMEOFDAY    1
+#define PSCHED_JIFFIES                 2
+#define PSCHED_CPU             3
+
+#define PSCHED_CLOCK_SOURCE    PSCHED_JIFFIES
+
+#include <linux/config.h>
+#include <linux/pkt_sched.h>
+//#include <net/pkt_cls.h>
+
+#ifdef CONFIG_X86_TSC
+#include <asm/msr.h>
+#endif
+
+struct rtattr;
+struct Qdisc;
+
+struct qdisc_walker
+{
+       int     stop;
+       int     skip;
+       int     count;
+       int     (*fn)(struct Qdisc *, unsigned long cl, struct qdisc_walker *);
+};
+
+struct Qdisc_class_ops
+{
+       /* Child qdisc manipulation */
+       int                     (*graft)(struct Qdisc *, unsigned long cl, struct Qdisc *, struct Qdisc **);
+       struct Qdisc *          (*leaf)(struct Qdisc *, unsigned long cl);
+
+       /* Class manipulation routines */
+       unsigned long           (*get)(struct Qdisc *, u32 classid);
+       void                    (*put)(struct Qdisc *, unsigned long);
+       int                     (*change)(struct Qdisc *, u32, u32, struct rtattr **, unsigned long *);
+       int                     (*delete)(struct Qdisc *, unsigned long);
+       void                    (*walk)(struct Qdisc *, struct qdisc_walker * arg);
+
+#if 0
+       /* Filter manipulation */
+       struct tcf_proto **     (*tcf_chain)(struct Qdisc *, unsigned long);
+       unsigned long           (*bind_tcf)(struct Qdisc *, unsigned long, u32 classid);
+       void                    (*unbind_tcf)(struct Qdisc *, unsigned long);
+#endif
+};
+
+struct Qdisc_ops
+{
+       struct Qdisc_ops        *next;
+       struct Qdisc_class_ops  *cl_ops;
+       char                    id[IFNAMSIZ];
+       int                     priv_size;
+
+       int                     (*enqueue)(struct sk_buff *, struct Qdisc *);
+       struct sk_buff *        (*dequeue)(struct Qdisc *);
+       int                     (*requeue)(struct sk_buff *, struct Qdisc *);
+       int                     (*drop)(struct Qdisc *);
+
+       int                     (*init)(struct Qdisc *, struct rtattr *arg);
+       void                    (*reset)(struct Qdisc *);
+       void                    (*destroy)(struct Qdisc *);
+       int                     (*change)(struct Qdisc *, struct rtattr *arg);
+
+       int                     (*dump)(struct Qdisc *, struct sk_buff *);
+};
+
+extern rwlock_t qdisc_tree_lock;
+
+struct Qdisc
+{
+       int                     (*enqueue)(struct sk_buff *skb, struct Qdisc *dev);
+       struct sk_buff *        (*dequeue)(struct Qdisc *dev);
+       unsigned                flags;
+#define TCQ_F_BUILTIN  1
+#define TCQ_F_THROTTLED        2
+#define TCQ_F_INGRES   4
+       struct Qdisc_ops        *ops;
+       struct Qdisc            *next;
+       u32                     handle;
+       atomic_t                refcnt;
+       struct sk_buff_head     q;
+       struct net_device       *dev;
+
+    //struct tc_stats          stats;
+       int                     (*reshape_fail)(struct sk_buff *skb, struct Qdisc *q);
+
+       /* This field is deprecated, but it is still used by CBQ
+        * and it will live until better solution will be invented.
+        */
+       struct Qdisc            *__parent;
+
+       char                    data[0];
+};
+
+struct qdisc_rate_table
+{
+    //struct tc_ratespec rate;
+       u32             data[256];
+       struct qdisc_rate_table *next;
+       int             refcnt;
+};
+
+static inline void sch_tree_lock(struct Qdisc *q)
+{
+       write_lock(&qdisc_tree_lock);
+       spin_lock_bh(&q->dev->queue_lock);
+}
+
+static inline void sch_tree_unlock(struct Qdisc *q)
+{
+       spin_unlock_bh(&q->dev->queue_lock);
+       write_unlock(&qdisc_tree_lock);
+}
+
+#if 0
+static inline void tcf_tree_lock(struct tcf_proto *tp)
+{
+       write_lock(&qdisc_tree_lock);
+       spin_lock_bh(&tp->q->dev->queue_lock);
+}
+
+static inline void tcf_tree_unlock(struct tcf_proto *tp)
+{
+       spin_unlock_bh(&tp->q->dev->queue_lock);
+       write_unlock(&qdisc_tree_lock);
+}
+
+static inline unsigned long
+cls_set_class(struct tcf_proto *tp, unsigned long *clp, unsigned long cl)
+{
+       unsigned long old_cl;
+
+       tcf_tree_lock(tp);
+       old_cl = *clp;
+       *clp = cl;
+       tcf_tree_unlock(tp);
+       return old_cl;
+}
+
+static inline unsigned long
+__cls_set_class(unsigned long *clp, unsigned long cl)
+{
+       unsigned long old_cl;
+
+       old_cl = *clp;
+       *clp = cl;
+       return old_cl;
+}
+#endif
+
+
+/* 
+   Timer resolution MUST BE < 10% of min_schedulable_packet_size/bandwidth
+   
+   Normal IP packet size ~ 512byte, hence:
+
+   0.5Kbyte/1Mbyte/sec = 0.5msec, so that we need 50usec timer for
+   10Mbit ethernet.
+
+   10msec resolution -> <50Kbit/sec.
+   
+   The result: [34]86 is not good choice for QoS router :-(
+
+   The things are not so bad, because we may use artifical
+   clock evaluated by integration of network data flow
+   in the most critical places.
+
+   Note: we do not use fastgettimeofday.
+   The reason is that, when it is not the same thing as
+   gettimeofday, it returns invalid timestamp, which is
+   not updated, when net_bh is active.
+
+   So, use PSCHED_CLOCK_SOURCE = PSCHED_CPU on alpha and pentiums
+   with rtdsc. And PSCHED_JIFFIES on all other architectures, including [34]86
+   and pentiums without rtdsc.
+   You can use PSCHED_GETTIMEOFDAY on another architectures,
+   which have fast and precise clock source, but it is too expensive.
+ */
+
+/* General note about internal clock.
+
+   Any clock source returns time intervals, measured in units
+   close to 1usec. With source PSCHED_GETTIMEOFDAY it is precisely
+   microseconds, otherwise something close but different chosen to minimize
+   arithmetic cost. Ratio usec/internal untis in form nominator/denominator
+   may be read from /proc/net/psched.
+ */
+
+
+#if PSCHED_CLOCK_SOURCE == PSCHED_GETTIMEOFDAY
+
+typedef struct timeval psched_time_t;
+typedef long           psched_tdiff_t;
+
+#define PSCHED_GET_TIME(stamp) do_gettimeofday(&(stamp))
+#define PSCHED_US2JIFFIE(usecs) (((usecs)+(1000000/HZ-1))/(1000000/HZ))
+
+#define PSCHED_EXPORTLIST EXPORT_SYMBOL(psched_tod_diff);
+
+#else /* PSCHED_CLOCK_SOURCE != PSCHED_GETTIMEOFDAY */
+
+#define PSCHED_EXPORTLIST PSCHED_EXPORTLIST_1 PSCHED_EXPORTLIST_2
+
+typedef u64    psched_time_t;
+typedef long   psched_tdiff_t;
+
+extern psched_time_t   psched_time_base;
+
+#if PSCHED_CLOCK_SOURCE == PSCHED_JIFFIES
+
+#if HZ == 100
+#define PSCHED_JSCALE 13
+#elif HZ == 1024
+#define PSCHED_JSCALE 10
+#else
+#define PSCHED_JSCALE 0
+#endif
+
+#define PSCHED_EXPORTLIST_2
+
+#if ~0UL == 0xFFFFFFFF
+
+#define PSCHED_WATCHER unsigned long
+
+extern PSCHED_WATCHER psched_time_mark;
+
+#define PSCHED_GET_TIME(stamp) ((stamp) = psched_time_base + (((unsigned long)(jiffies-psched_time_mark))<<PSCHED_JSCALE))
+
+#define PSCHED_EXPORTLIST_1 EXPORT_SYMBOL(psched_time_base); \
+                            EXPORT_SYMBOL(psched_time_mark);
+
+#else
+
+#define PSCHED_GET_TIME(stamp) ((stamp) = (jiffies<<PSCHED_JSCALE))
+
+#define PSCHED_EXPORTLIST_1 
+
+#endif
+
+#define PSCHED_US2JIFFIE(delay) (((delay)+(1<<PSCHED_JSCALE)-1)>>PSCHED_JSCALE)
+
+#elif PSCHED_CLOCK_SOURCE == PSCHED_CPU
+
+extern psched_tdiff_t psched_clock_per_hz;
+extern int psched_clock_scale;
+
+#define PSCHED_EXPORTLIST_2 EXPORT_SYMBOL(psched_clock_per_hz); \
+                            EXPORT_SYMBOL(psched_clock_scale);
+
+#define PSCHED_US2JIFFIE(delay) (((delay)+psched_clock_per_hz-1)/psched_clock_per_hz)
+
+#ifdef CONFIG_X86_TSC
+
+#define PSCHED_GET_TIME(stamp) \
+({ u64 __cur; \
+   rdtscll(__cur); \
+   (stamp) = __cur>>psched_clock_scale; \
+})
+
+#define PSCHED_EXPORTLIST_1
+
+#elif defined (__alpha__)
+
+#define PSCHED_WATCHER u32
+
+extern PSCHED_WATCHER psched_time_mark;
+
+#define PSCHED_GET_TIME(stamp) \
+({ u32 __res; \
+   __asm__ __volatile__ ("rpcc %0" : "r="(__res)); \
+   if (__res <= psched_time_mark) psched_time_base += 0x100000000UL; \
+   psched_time_mark = __res; \
+   (stamp) = (psched_time_base + __res)>>psched_clock_scale; \
+})
+
+#define PSCHED_EXPORTLIST_1 EXPORT_SYMBOL(psched_time_base); \
+                            EXPORT_SYMBOL(psched_time_mark);
+
+#else
+
+#error PSCHED_CLOCK_SOURCE=PSCHED_CPU is not supported on this arch.
+
+#endif /* ARCH */
+
+#endif /* PSCHED_CLOCK_SOURCE == PSCHED_JIFFIES */
+
+#endif /* PSCHED_CLOCK_SOURCE == PSCHED_GETTIMEOFDAY */
+
+#if PSCHED_CLOCK_SOURCE == PSCHED_GETTIMEOFDAY
+#define PSCHED_TDIFF(tv1, tv2) \
+({ \
+          int __delta_sec = (tv1).tv_sec - (tv2).tv_sec; \
+          int __delta = (tv1).tv_usec - (tv2).tv_usec; \
+          if (__delta_sec) { \
+                  switch (__delta_sec) { \
+                  default: \
+                          __delta = 0; \
+                  case 2: \
+                          __delta += 1000000; \
+                  case 1: \
+                          __delta += 1000000; \
+                  } \
+          } \
+          __delta; \
+})
+
+extern int psched_tod_diff(int delta_sec, int bound);
+
+#define PSCHED_TDIFF_SAFE(tv1, tv2, bound, guard) \
+({ \
+          int __delta_sec = (tv1).tv_sec - (tv2).tv_sec; \
+          int __delta = (tv1).tv_usec - (tv2).tv_usec; \
+          switch (__delta_sec) { \
+          default: \
+                  __delta = psched_tod_diff(__delta_sec, bound); guard; break; \
+          case 2: \
+                  __delta += 1000000; \
+          case 1: \
+                  __delta += 1000000; \
+          case 0: ; \
+          } \
+          __delta; \
+})
+
+#define PSCHED_TLESS(tv1, tv2) (((tv1).tv_usec < (tv2).tv_usec && \
+                               (tv1).tv_sec <= (tv2).tv_sec) || \
+                                (tv1).tv_sec < (tv2).tv_sec)
+
+#define PSCHED_TADD2(tv, delta, tv_res) \
+({ \
+          int __delta = (tv).tv_usec + (delta); \
+          (tv_res).tv_sec = (tv).tv_sec; \
+          if (__delta > 1000000) { (tv_res).tv_sec++; __delta -= 1000000; } \
+          (tv_res).tv_usec = __delta; \
+})
+
+#define PSCHED_TADD(tv, delta) \
+({ \
+          (tv).tv_usec += (delta); \
+          if ((tv).tv_usec > 1000000) { (tv).tv_sec++; \
+                (tv).tv_usec -= 1000000; } \
+})
+
+/* Set/check that time is in the "past perfect";
+   it depends on concrete representation of system time
+ */
+
+#define PSCHED_SET_PASTPERFECT(t)      ((t).tv_sec = 0)
+#define PSCHED_IS_PASTPERFECT(t)       ((t).tv_sec == 0)
+
+#define        PSCHED_AUDIT_TDIFF(t) ({ if ((t) > 2000000) (t) = 2000000; })
+
+#else
+
+#define PSCHED_TDIFF(tv1, tv2) (long)((tv1) - (tv2))
+#define PSCHED_TDIFF_SAFE(tv1, tv2, bound, guard) \
+({ \
+          long __delta = (tv1) - (tv2); \
+          if ( __delta > (bound)) {  __delta = (bound); guard; } \
+          __delta; \
+})
+
+
+#define PSCHED_TLESS(tv1, tv2) ((tv1) < (tv2))
+#define PSCHED_TADD2(tv, delta, tv_res) ((tv_res) = (tv) + (delta))
+#define PSCHED_TADD(tv, delta) ((tv) += (delta))
+#define PSCHED_SET_PASTPERFECT(t)      ((t) = 0)
+#define PSCHED_IS_PASTPERFECT(t)       ((t) == 0)
+#define        PSCHED_AUDIT_TDIFF(t)
+
+#endif
+
+struct tcf_police
+{
+       struct tcf_police *next;
+       int             refcnt;
+       u32             index;
+
+       int             action;
+       int             result;
+       u32             ewma_rate;
+       u32             burst;
+       u32             mtu;
+
+       u32             toks;
+       u32             ptoks;
+       psched_time_t   t_c;
+       spinlock_t      lock;
+       struct qdisc_rate_table *R_tab;
+       struct qdisc_rate_table *P_tab;
+
+    //struct tc_stats  stats;
+};
+
+//extern int qdisc_copy_stats(struct sk_buff *skb, struct tc_stats *st);
+extern void tcf_police_destroy(struct tcf_police *p);
+extern struct tcf_police * tcf_police_locate(struct rtattr *rta, struct rtattr *est);
+extern int tcf_police_dump(struct sk_buff *skb, struct tcf_police *p);
+extern int tcf_police(struct sk_buff *skb, struct tcf_police *p);
+
+static inline void tcf_police_release(struct tcf_police *p)
+{
+       if (p && --p->refcnt == 0)
+               tcf_police_destroy(p);
+}
+
+extern struct Qdisc noop_qdisc;
+extern struct Qdisc_ops noop_qdisc_ops;
+extern struct Qdisc_ops pfifo_qdisc_ops;
+extern struct Qdisc_ops bfifo_qdisc_ops;
+
+int register_qdisc(struct Qdisc_ops *qops);
+int unregister_qdisc(struct Qdisc_ops *qops);
+struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle);
+struct Qdisc *qdisc_lookup_class(struct net_device *dev, u32 handle);
+void dev_init_scheduler(struct net_device *dev);
+void dev_shutdown(struct net_device *dev);
+void dev_activate(struct net_device *dev);
+void dev_deactivate(struct net_device *dev);
+void qdisc_reset(struct Qdisc *qdisc);
+void qdisc_destroy(struct Qdisc *qdisc);
+struct Qdisc * qdisc_create_dflt(struct net_device *dev, struct Qdisc_ops *ops);
+//int qdisc_new_estimator(struct tc_stats *stats, struct rtattr *opt);
+//void qdisc_kill_estimator(struct tc_stats *stats);
+//struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, struct rtattr *tab);
+void qdisc_put_rtab(struct qdisc_rate_table *tab);
+int teql_init(void);
+int tc_filter_init(void);
+int pktsched_init(void);
+
+extern int qdisc_restart(struct net_device *dev);
+
+static inline void qdisc_run(struct net_device *dev)
+{
+       while (!netif_queue_stopped(dev) &&
+              qdisc_restart(dev)<0)
+               /* NOTHING */;
+}
+
+/* Calculate maximal size of packet seen by hard_start_xmit
+   routine of this device.
+ */
+static inline unsigned psched_mtu(struct net_device *dev)
+{
+       unsigned mtu = dev->mtu;
+       return dev->hard_header ? mtu + dev->hard_header_len : mtu;
+}
+
+
+/******************************************************************************
+ * XXXXXXXXX Rest of this file is real linux/pkt_sched.h XXXXX
+ */
+
+
+/* Logical priority bands not depending on specific packet scheduler.
+   Every scheduler will map them to real traffic classes, if it has
+   no more precise mechanism to classify packets.
+
+   These numbers have no special meaning, though their coincidence
+   with obsolete IPv6 values is not occasional :-). New IPv6 drafts
+   preferred full anarchy inspired by diffserv group.
+
+   Note: TC_PRIO_BESTEFFORT does not mean that it is the most unhappy
+   class, actually, as rule it will be handled with more care than
+   filler or even bulk.
+ */
+
+#define TC_PRIO_BESTEFFORT             0
+#define TC_PRIO_FILLER                 1
+#define TC_PRIO_BULK                   2
+#define TC_PRIO_INTERACTIVE_BULK       4
+#define TC_PRIO_INTERACTIVE            6
+#define TC_PRIO_CONTROL                        7
+
+#define TC_PRIO_MAX                    15
+
+/* Generic queue statistics, available for all the elements.
+   Particular schedulers may have also their private records.
+ */
+
+struct tc_stats
+{
+       __u64   bytes;                  /* NUmber of enqueues bytes */
+       __u32   packets;                /* Number of enqueued packets   */
+       __u32   drops;                  /* Packets dropped because of lack of resources */
+       __u32   overlimits;             /* Number of throttle events when this
+                                        * flow goes out of allocated bandwidth */
+       __u32   bps;                    /* Current flow byte rate */
+       __u32   pps;                    /* Current flow packet rate */
+       __u32   qlen;
+       __u32   backlog;
+#ifdef __KERNEL__
+       spinlock_t *lock;
+#endif
+};
+
+struct tc_estimator
+{
+       char            interval;
+       unsigned char   ewma_log;
+};
+
+/* "Handles"
+   ---------
+
+    All the traffic control objects have 32bit identifiers, or "handles".
+
+    They can be considered as opaque numbers from user API viewpoint,
+    but actually they always consist of two fields: major and
+    minor numbers, which are interpreted by kernel specially,
+    that may be used by applications, though not recommended.
+
+    F.e. qdisc handles always have minor number equal to zero,
+    classes (or flows) have major equal to parent qdisc major, and
+    minor uniquely identifying class inside qdisc.
+
+    Macros to manipulate handles:
+ */
+
+#define TC_H_MAJ_MASK (0xFFFF0000U)
+#define TC_H_MIN_MASK (0x0000FFFFU)
+#define TC_H_MAJ(h) ((h)&TC_H_MAJ_MASK)
+#define TC_H_MIN(h) ((h)&TC_H_MIN_MASK)
+#define TC_H_MAKE(maj,min) (((maj)&TC_H_MAJ_MASK)|((min)&TC_H_MIN_MASK))
+
+#define TC_H_UNSPEC    (0U)
+#define TC_H_ROOT      (0xFFFFFFFFU)
+#define TC_H_INGRESS    (0xFFFFFFF1U)
+
+struct tc_ratespec
+{
+       unsigned char   cell_log;
+       unsigned char   __reserved;
+       unsigned short  feature;
+       short           addend;
+       unsigned short  mpu;
+       __u32           rate;
+};
+
+/* FIFO section */
+
+struct tc_fifo_qopt
+{
+       __u32   limit;  /* Queue length: bytes for bfifo, packets for pfifo */
+};
+
+/* PRIO section */
+
+#define TCQ_PRIO_BANDS 16
+
+struct tc_prio_qopt
+{
+       int     bands;                  /* Number of bands */
+       __u8    priomap[TC_PRIO_MAX+1]; /* Map: logical priority -> PRIO band */
+};
+
+/* CSZ section */
+
+struct tc_csz_qopt
+{
+       int             flows;          /* Maximal number of guaranteed flows */
+       unsigned char   R_log;          /* Fixed point position for round number */
+       unsigned char   delta_log;      /* Log of maximal managed time interval */
+       __u8            priomap[TC_PRIO_MAX+1]; /* Map: logical priority -> CSZ band */
+};
+
+struct tc_csz_copt
+{
+       struct tc_ratespec slice;
+       struct tc_ratespec rate;
+       struct tc_ratespec peakrate;
+       __u32           limit;
+       __u32           buffer;
+       __u32           mtu;
+};
+
+enum
+{
+       TCA_CSZ_UNSPEC,
+       TCA_CSZ_PARMS,
+       TCA_CSZ_RTAB,
+       TCA_CSZ_PTAB,
+};
+
+/* TBF section */
+
+struct tc_tbf_qopt
+{
+       struct tc_ratespec rate;
+       struct tc_ratespec peakrate;
+       __u32           limit;
+       __u32           buffer;
+       __u32           mtu;
+};
+
+enum
+{
+       TCA_TBF_UNSPEC,
+       TCA_TBF_PARMS,
+       TCA_TBF_RTAB,
+       TCA_TBF_PTAB,
+};
+
+
+/* TEQL section */
+
+/* TEQL does not require any parameters */
+
+/* SFQ section */
+
+struct tc_sfq_qopt
+{
+       unsigned        quantum;        /* Bytes per round allocated to flow */
+       int             perturb_period; /* Period of hash perturbation */
+       __u32           limit;          /* Maximal packets in queue */
+       unsigned        divisor;        /* Hash divisor  */
+       unsigned        flows;          /* Maximal number of flows  */
+};
+
+/*
+ *  NOTE: limit, divisor and flows are hardwired to code at the moment.
+ *
+ *     limit=flows=128, divisor=1024;
+ *
+ *     The only reason for this is efficiency, it is possible
+ *     to change these parameters in compile time.
+ */
+
+/* RED section */
+
+enum
+{
+       TCA_RED_UNSPEC,
+       TCA_RED_PARMS,
+       TCA_RED_STAB,
+};
+
+struct tc_red_qopt
+{
+       __u32           limit;          /* HARD maximal queue length (bytes)    */
+       __u32           qth_min;        /* Min average length threshold (bytes) */
+       __u32           qth_max;        /* Max average length threshold (bytes) */
+       unsigned char   Wlog;           /* log(W)               */
+       unsigned char   Plog;           /* log(P_max/(qth_max-qth_min)) */
+       unsigned char   Scell_log;      /* cell size for idle damping */
+       unsigned char   flags;
+#define TC_RED_ECN     1
+};
+
+struct tc_red_xstats
+{
+       __u32           early;          /* Early drops */
+       __u32           pdrop;          /* Drops due to queue limits */
+       __u32           other;          /* Drops due to drop() calls */
+       __u32           marked;         /* Marked packets */
+};
+
+/* GRED section */
+
+#define MAX_DPs 16
+
+enum
+{
+       TCA_GRED_UNSPEC,
+       TCA_GRED_PARMS,
+       TCA_GRED_STAB,
+       TCA_GRED_DPS,
+};
+
+#define TCA_SET_OFF TCA_GRED_PARMS
+struct tc_gred_qopt
+{
+       __u32           limit;          /* HARD maximal queue length (bytes)    
+*/
+       __u32           qth_min;        /* Min average length threshold (bytes) 
+*/
+       __u32           qth_max;        /* Max average length threshold (bytes) 
+*/
+       __u32           DP;             /* upto 2^32 DPs */
+       __u32           backlog;        
+       __u32           qave;   
+       __u32           forced; 
+       __u32           early;  
+       __u32           other;  
+       __u32           pdrop;  
+
+       unsigned char   Wlog;           /* log(W)               */
+       unsigned char   Plog;           /* log(P_max/(qth_max-qth_min)) */
+       unsigned char   Scell_log;      /* cell size for idle damping */
+       __u8            prio;           /* prio of this VQ */
+       __u32   packets;
+       __u32   bytesin;
+};
+/* gred setup */
+struct tc_gred_sopt
+{
+       __u32           DPs;
+       __u32           def_DP;
+       __u8            grio;
+};
+
+/* CBQ section */
+
+#define TC_CBQ_MAXPRIO         8
+#define TC_CBQ_MAXLEVEL                8
+#define TC_CBQ_DEF_EWMA                5
+
+struct tc_cbq_lssopt
+{
+       unsigned char   change;
+       unsigned char   flags;
+#define TCF_CBQ_LSS_BOUNDED    1
+#define TCF_CBQ_LSS_ISOLATED   2
+       unsigned char   ewma_log;
+       unsigned char   level;
+#define TCF_CBQ_LSS_FLAGS      1
+#define TCF_CBQ_LSS_EWMA       2
+#define TCF_CBQ_LSS_MAXIDLE    4
+#define TCF_CBQ_LSS_MINIDLE    8
+#define TCF_CBQ_LSS_OFFTIME    0x10
+#define TCF_CBQ_LSS_AVPKT      0x20
+       __u32           maxidle;
+       __u32           minidle;
+       __u32           offtime;
+       __u32           avpkt;
+};
+
+struct tc_cbq_wrropt
+{
+       unsigned char   flags;
+       unsigned char   priority;
+       unsigned char   cpriority;
+       unsigned char   __reserved;
+       __u32           allot;
+       __u32           weight;
+};
+
+struct tc_cbq_ovl
+{
+       unsigned char   strategy;
+#define        TC_CBQ_OVL_CLASSIC      0
+#define        TC_CBQ_OVL_DELAY        1
+#define        TC_CBQ_OVL_LOWPRIO      2
+#define        TC_CBQ_OVL_DROP         3
+#define        TC_CBQ_OVL_RCLASSIC     4
+       unsigned char   priority2;
+       __u32           penalty;
+};
+
+struct tc_cbq_police
+{
+       unsigned char   police;
+       unsigned char   __res1;
+       unsigned short  __res2;
+};
+
+struct tc_cbq_fopt
+{
+       __u32           split;
+       __u32           defmap;
+       __u32           defchange;
+};
+
+struct tc_cbq_xstats
+{
+       __u32           borrows;
+       __u32           overactions;
+       __s32           avgidle;
+       __s32           undertime;
+};
+
+enum
+{
+       TCA_CBQ_UNSPEC,
+       TCA_CBQ_LSSOPT,
+       TCA_CBQ_WRROPT,
+       TCA_CBQ_FOPT,
+       TCA_CBQ_OVL_STRATEGY,
+       TCA_CBQ_RATE,
+       TCA_CBQ_RTAB,
+       TCA_CBQ_POLICE,
+};
+
+#define TCA_CBQ_MAX    TCA_CBQ_POLICE
+
+/* dsmark section */
+
+enum {
+       TCA_DSMARK_UNSPEC,
+       TCA_DSMARK_INDICES,
+       TCA_DSMARK_DEFAULT_INDEX,
+       TCA_DSMARK_SET_TC_INDEX,
+       TCA_DSMARK_MASK,
+       TCA_DSMARK_VALUE
+};
+
+#define TCA_DSMARK_MAX TCA_DSMARK_VALUE
+
+/* ATM  section */
+
+enum {
+       TCA_ATM_UNSPEC,
+       TCA_ATM_FD,             /* file/socket descriptor */
+       TCA_ATM_PTR,            /* pointer to descriptor - later */
+       TCA_ATM_HDR,            /* LL header */
+       TCA_ATM_EXCESS,         /* excess traffic class (0 for CLP)  */
+       TCA_ATM_ADDR,           /* PVC address (for output only) */
+       TCA_ATM_STATE           /* VC state (ATM_VS_*; for output only) */
+};
+
+#define TCA_ATM_MAX    TCA_ATM_STATE
+
+#endif
diff --git a/xen-2.4.16/include/xeno/prefetch.h b/xen-2.4.16/include/xeno/prefetch.h
new file mode 100644 (file)
index 0000000..8d7d3ff
--- /dev/null
@@ -0,0 +1,60 @@
+/*
+ *  Generic cache management functions. Everything is arch-specific,  
+ *  but this header exists to make sure the defines/functions can be
+ *  used in a generic way.
+ *
+ *  2000-11-13  Arjan van de Ven   <arjan@fenrus.demon.nl>
+ *
+ */
+
+#ifndef _LINUX_PREFETCH_H
+#define _LINUX_PREFETCH_H
+
+#include <asm/processor.h>
+#include <asm/cache.h>
+
+/*
+       prefetch(x) attempts to pre-emptively get the memory pointed to
+       by address "x" into the CPU L1 cache. 
+       prefetch(x) should not cause any kind of exception, prefetch(0) is
+       specifically ok.
+
+       prefetch() should be defined by the architecture, if not, the 
+       #define below provides a no-op define.  
+       
+       There are 3 prefetch() macros:
+       
+       prefetch(x)     - prefetches the cacheline at "x" for read
+       prefetchw(x)    - prefetches the cacheline at "x" for write
+       spin_lock_prefetch(x) - prefectches the spinlock *x for taking
+       
+       there is also PREFETCH_STRIDE which is the architecure-prefered 
+       "lookahead" size for prefetching streamed operations.
+       
+*/
+
+/*
+ *     These cannot be do{}while(0) macros. See the mental gymnastics in
+ *     the loop macro.
+ */
+#ifndef ARCH_HAS_PREFETCH
+#define ARCH_HAS_PREFETCH
+static inline void prefetch(const void *x) {;}
+#endif
+
+#ifndef ARCH_HAS_PREFETCHW
+#define ARCH_HAS_PREFETCHW
+static inline void prefetchw(const void *x) {;}
+#endif
+
+#ifndef ARCH_HAS_SPINLOCK_PREFETCH
+#define ARCH_HAS_SPINLOCK_PREFETCH
+#define spin_lock_prefetch(x) prefetchw(x)
+#endif
+
+#ifndef PREFETCH_STRIDE
+#define PREFETCH_STRIDE (4*L1_CACHE_BYTES)
+#endif
+
+#endif
diff --git a/xen-2.4.16/include/xeno/sched.h b/xen-2.4.16/include/xeno/sched.h
new file mode 100644 (file)
index 0000000..ae3457c
--- /dev/null
@@ -0,0 +1,222 @@
+#ifndef _LINUX_SCHED_H
+#define _LINUX_SCHED_H
+
+#include <xeno/config.h>
+#include <xeno/types.h>
+#include <xeno/spinlock.h>
+#include <asm/page.h>
+#include <asm/ptrace.h>
+#include <xeno/smp.h>
+#include <asm/processor.h>
+#include <asm/current.h>
+#include <hypervisor-ifs/hypervisor-if.h>
+#include <xeno/dom0_ops.h>
+
+extern unsigned long volatile jiffies;
+extern rwlock_t tasklist_lock;
+
+#include <xeno/spinlock.h>
+
+struct mm_struct {
+    unsigned long cpu_vm_mask;
+    pagetable_t  pagetable;
+};
+
+extern struct mm_struct init_mm;
+#define IDLE0_MM                                                    \
+{                                                                   \
+    cpu_vm_mask: 0,                                                 \
+    pagetable:   mk_pagetable((unsigned long)idle0_pg_table)        \
+}
+
+#define _HYP_EVENT_NEED_RESCHED 0
+#define _HYP_EVENT_NET_RX       1
+#define _HYP_EVENT_DIE          2
+
+#define PF_DONEFPUINIT  0x1  /* Has the FPU been initialised for this task? */
+#define PF_USEDFPU      0x2  /* Has this task used the FPU since last save? */
+#define PF_GUEST_STTS   0x4  /* Has the guest OS requested 'stts'?          */
+
+#include <xeno/vif.h>
+#include <xeno/block.h>
+
+struct task_struct {
+    int processor;
+    int state, hyp_events;
+    unsigned int domain;
+
+    /* An unsafe pointer into a shared data area. */
+    shared_info_t *shared_info;
+    
+    /* Pointer to this guest's virtual interfaces. */
+    /* network */
+    net_ring_t *net_ring_base;
+    net_vif_t *net_vif_list[MAX_GUEST_VIFS];
+    int num_net_vifs;
+    /* block io */
+    blk_ring_t *blk_ring_base;
+
+    int has_cpu, policy, counter;
+
+    struct list_head run_list;
+    
+    struct mm_struct mm;
+
+    mm_segment_t addr_limit;        /* thread address space:
+                                       0-0xBFFFFFFF for user-thead
+                                       0-0xFFFFFFFF for kernel-thread
+                                     */
+
+    /*
+     * active_mm stays for now. It's entangled in the tricky TLB flushing
+     * stuff which I haven't addressed yet. It stays until I'm man enough
+     * to venture in.
+     */
+    struct mm_struct *active_mm;
+    struct thread_struct thread;
+    struct task_struct *prev_task, *next_task;
+       
+    /* index into frame_table threading pages belonging to this
+     * domain together
+     */
+    unsigned long pg_head;
+    unsigned long pg_tail;
+    unsigned int tot_pages;
+
+    unsigned long flags;
+};
+
+#define TASK_RUNNING            0
+#define TASK_INTERRUPTIBLE      1
+#define TASK_UNINTERRUPTIBLE    2
+#define TASK_STOPPED            4
+#define TASK_DYING              8
+
+#define SCHED_YIELD             0x10
+
+#include <asm/uaccess.h> /* for KERNEL_DS */
+
+#define IDLE0_TASK(_t)           \
+{                                \
+    processor:   0,              \
+    domain:      IDLE_DOMAIN_ID, \
+    state:       TASK_RUNNING,   \
+    has_cpu:     0,              \
+    mm:          IDLE0_MM,       \
+    addr_limit:  KERNEL_DS,      \
+    active_mm:   &idle0_task.mm, \
+    thread:      INIT_THREAD,    \
+    prev_task:   &(_t),          \
+    next_task:   &(_t)           \
+}
+
+#define IDLE_DOMAIN_ID   (~0)
+#define is_idle_task(_p) ((_p)->domain == IDLE_DOMAIN_ID)
+
+#ifndef IDLE0_TASK_SIZE
+#define IDLE0_TASK_SIZE        2048*sizeof(long)
+#endif
+
+union task_union {
+    struct task_struct task;
+    unsigned long stack[IDLE0_TASK_SIZE/sizeof(long)];
+};
+
+extern union task_union idle0_task_union;
+extern struct task_struct first_task_struct;
+
+extern struct task_struct *do_newdomain(void);
+extern int setup_guestos(struct task_struct *p, dom0_newdomain_t *params);
+
+struct task_struct *find_domain_by_id(unsigned int dom);
+extern void release_task(struct task_struct *);
+extern void kill_domain(void);
+extern void kill_domain_with_errmsg(const char *err);
+extern long kill_other_domain(unsigned int dom);
+
+/* arch/process.c */
+void new_thread(struct task_struct *p,
+                unsigned long start_pc,
+                unsigned long start_stack,
+                unsigned long start_info);
+extern void flush_thread(void);
+extern void exit_thread(void);
+
+/* Linux puts these here for some reason! */
+extern int request_irq(unsigned int,
+                       void (*handler)(int, void *, struct pt_regs *),
+                       unsigned long, const char *, void *);
+extern void free_irq(unsigned int, void *);
+
+extern unsigned long wait_init_idle;
+#define init_idle() clear_bit(smp_processor_id(), &wait_init_idle);
+
+#define set_current_state(_s) do { current->state = (_s); } while (0)
+#define MAX_SCHEDULE_TIMEOUT LONG_MAX
+long schedule_timeout(long timeout);
+asmlinkage void schedule(void);
+
+void reschedule(struct task_struct *p);
+
+typedef struct schedule_data_st
+{
+    spinlock_t lock;
+    struct list_head runqueue;
+    struct task_struct *prev, *curr;
+} __cacheline_aligned schedule_data_t;
+extern schedule_data_t schedule_data[NR_CPUS];
+
+static inline void __add_to_runqueue(struct task_struct * p)
+{
+    list_add(&p->run_list, &schedule_data[p->processor].runqueue);
+}
+
+
+static inline void __move_last_runqueue(struct task_struct * p)
+{
+    list_del(&p->run_list);
+    list_add_tail(&p->run_list, &schedule_data[p->processor].runqueue);
+}
+
+
+static inline void __move_first_runqueue(struct task_struct * p)
+{
+    list_del(&p->run_list);
+    list_add(&p->run_list, &schedule_data[p->processor].runqueue);
+}
+
+static inline void __del_from_runqueue(struct task_struct * p)
+{
+    list_del(&p->run_list);
+    p->run_list.next = NULL;
+}
+
+static inline int __task_on_runqueue(struct task_struct *p)
+{
+    return (p->run_list.next != NULL);
+}
+
+int wake_up(struct task_struct *p);
+
+#define signal_pending(_p) ((_p)->hyp_events || \
+                            (_p)->shared_info->events)
+
+void domain_init(void);
+
+void cpu_idle(void);
+
+#define REMOVE_LINKS(p) do { \
+        (p)->next_task->prev_task = (p)->prev_task; \
+        (p)->prev_task->next_task = (p)->next_task; \
+        } while (0)
+
+#define SET_LINKS(p) do { \
+        (p)->next_task = &idle0_task; \
+        (p)->prev_task = idle0_task.prev_task; \
+        idle0_task.prev_task->next_task = (p); \
+        idle0_task.prev_task = (p); \
+        } while (0)
+
+extern void update_process_times(int user);
+
+#endif
diff --git a/xen-2.4.16/include/xeno/skbuff.h b/xen-2.4.16/include/xeno/skbuff.h
new file mode 100644 (file)
index 0000000..4a540d8
--- /dev/null
@@ -0,0 +1,1139 @@
+/*
+ *     Definitions for the 'struct sk_buff' memory handlers.
+ *
+ *     Authors:
+ *             Alan Cox, <gw4pts@gw4pts.ampr.org>
+ *             Florian La Roche, <rzsfl@rz.uni-sb.de>
+ *
+ *     This program is free software; you can redistribute it and/or
+ *     modify it under the terms of the GNU General Public License
+ *     as published by the Free Software Foundation; either version
+ *     2 of the License, or (at your option) any later version.
+ */
+#ifndef _LINUX_SKBUFF_H
+#define _LINUX_SKBUFF_H
+
+#include <linux/config.h>
+#include <linux/lib.h>
+//#include <linux/kernel.h>
+//#include <linux/sched.h>
+#include <linux/time.h>
+#include <linux/timer.h>
+#include <linux/cache.h>
+
+#include <asm/atomic.h>
+#include <asm/types.h>
+#include <linux/spinlock.h>
+#include <linux/mm.h>
+//#include <linux/highmem.h>
+
+#define HAVE_ALLOC_SKB         /* For the drivers to know */
+#define HAVE_ALIGNABLE_SKB     /* Ditto 8)                */
+#define SLAB_SKB               /* Slabified skbuffs       */
+
+#define CHECKSUM_NONE 0
+#define CHECKSUM_HW 1
+#define CHECKSUM_UNNECESSARY 2
+
+#define SKB_DATA_ALIGN(X)      (((X) + (SMP_CACHE_BYTES-1)) & ~(SMP_CACHE_BYTES-1))
+#define SKB_MAX_ORDER(X,ORDER) (((PAGE_SIZE<<(ORDER)) - (X) - sizeof(struct skb_shared_info))&~(SMP_CACHE_BYTES-1))
+#define SKB_MAX_HEAD(X)                (SKB_MAX_ORDER((X),0))
+#define SKB_MAX_ALLOC          (SKB_MAX_ORDER(0,2))
+
+/* A. Checksumming of received packets by device.
+ *
+ *     NONE: device failed to checksum this packet.
+ *             skb->csum is undefined.
+ *
+ *     UNNECESSARY: device parsed packet and wouldbe verified checksum.
+ *             skb->csum is undefined.
+ *           It is bad option, but, unfortunately, many of vendors do this.
+ *           Apparently with secret goal to sell you new device, when you
+ *           will add new protocol to your host. F.e. IPv6. 8)
+ *
+ *     HW: the most generic way. Device supplied checksum of _all_
+ *         the packet as seen by netif_rx in skb->csum.
+ *         NOTE: Even if device supports only some protocols, but
+ *         is able to produce some skb->csum, it MUST use HW,
+ *         not UNNECESSARY.
+ *
+ * B. Checksumming on output.
+ *
+ *     NONE: skb is checksummed by protocol or csum is not required.
+ *
+ *     HW: device is required to csum packet as seen by hard_start_xmit
+ *     from skb->h.raw to the end and to record the checksum
+ *     at skb->h.raw+skb->csum.
+ *
+ *     Device must show its capabilities in dev->features, set
+ *     at device setup time.
+ *     NETIF_F_HW_CSUM - it is clever device, it is able to checksum
+ *                       everything.
+ *     NETIF_F_NO_CSUM - loopback or reliable single hop media.
+ *     NETIF_F_IP_CSUM - device is dumb. It is able to csum only
+ *                       TCP/UDP over IPv4. Sigh. Vendors like this
+ *                       way by an unknown reason. Though, see comment above
+ *                       about CHECKSUM_UNNECESSARY. 8)
+ *
+ *     Any questions? No questions, good.              --ANK
+ */
+
+#ifdef __i386__
+#define NET_CALLER(arg) (*(((void**)&arg)-1))
+#else
+#define NET_CALLER(arg) __builtin_return_address(0)
+#endif
+
+#ifdef CONFIG_NETFILTER
+struct nf_conntrack {
+       atomic_t use;
+       void (*destroy)(struct nf_conntrack *);
+};
+
+struct nf_ct_info {
+       struct nf_conntrack *master;
+};
+#endif
+
+struct sk_buff_head {
+       /* These two members must be first. */
+       struct sk_buff  * next;
+       struct sk_buff  * prev;
+
+       __u32           qlen;
+       spinlock_t      lock;
+};
+
+struct sk_buff;
+
+#define MAX_SKB_FRAGS 6
+
+typedef struct skb_frag_struct skb_frag_t;
+
+struct skb_frag_struct
+{
+       struct pfn_info *page;
+       __u16 page_offset;
+       __u16 size;
+};
+
+/* This data is invariant across clones and lives at
+ * the end of the header data, ie. at skb->end.
+ */
+struct skb_shared_info {
+       atomic_t        dataref;
+       unsigned int    nr_frags;
+       struct sk_buff  *frag_list;
+       skb_frag_t      frags[MAX_SKB_FRAGS];
+};
+
+struct sk_buff {
+       /* These two members must be first. */
+       struct sk_buff  * next;                 /* Next buffer in list                          */
+       struct sk_buff  * prev;                 /* Previous buffer in list                      */
+
+       struct sk_buff_head * list;             /* List we are on                               */
+       struct sock     *sk;                    /* Socket we are owned by                       */
+       struct timeval  stamp;                  /* Time we arrived                              */
+       struct net_device       *dev;           /* Device we arrived on/are leaving by          */
+
+       /* Transport layer header */
+       union
+       {
+               struct tcphdr   *th;
+               struct udphdr   *uh;
+               struct icmphdr  *icmph;
+               struct igmphdr  *igmph;
+               struct iphdr    *ipiph;
+               struct spxhdr   *spxh;
+               unsigned char   *raw;
+       } h;
+
+       /* Network layer header */
+       union
+       {
+               struct iphdr    *iph;
+               struct ipv6hdr  *ipv6h;
+               struct arphdr   *arph;
+               struct ipxhdr   *ipxh;
+               unsigned char   *raw;
+       } nh;
+  
+       /* Link layer header */
+       union 
+       {       
+               struct ethhdr   *ethernet;
+               unsigned char   *raw;
+       } mac;
+
+//     struct  dst_entry *dst;
+
+       /* 
+        * This is the control buffer. It is free to use for every
+        * layer. Please put your private variables there. If you
+        * want to keep them across layers you have to do a skb_clone()
+        * first. This is owned by whoever has the skb queued ATM.
+        */ 
+       char            cb[48];  
+
+       unsigned int    len;                    /* Length of actual data                        */
+       unsigned int    data_len;
+       unsigned int    csum;                   /* Checksum                                     */
+       unsigned char   __unused,               /* Dead field, may be reused                    */
+                       cloned,                 /* head may be cloned (check refcnt to be sure). */
+                       pkt_type,               /* Packet class                                 */
+                       ip_summed;              /* Driver fed us an IP checksum                 */
+       __u32           priority;               /* Packet queueing priority                     */
+       atomic_t        users;                  /* User count - see datagram.c,tcp.c            */
+       unsigned short  protocol;               /* Packet protocol from driver.                 */
+       unsigned short  security;               /* Security level of packet                     */
+       unsigned int    truesize;               /* Buffer size                                  */
+
+       unsigned char   *head;                  /* Head of buffer                               */
+       unsigned char   *data;                  /* Data head pointer                            */
+       unsigned char   *tail;                  /* Tail pointer                                 */
+       unsigned char   *end;                   /* End pointer                                  */
+
+       void            (*destructor)(struct sk_buff *);        /* Destruct function            */
+#ifdef CONFIG_NETFILTER
+       /* Can be used for communication between hooks. */
+        unsigned long  nfmark;
+       /* Cache info */
+       __u32           nfcache;
+       /* Associated connection, if any */
+       struct nf_ct_info *nfct;
+#ifdef CONFIG_NETFILTER_DEBUG
+        unsigned int nf_debug;
+#endif
+#endif /*CONFIG_NETFILTER*/
+
+#if defined(CONFIG_HIPPI)
+       union{
+               __u32   ifield;
+       } private;
+#endif
+
+#ifdef CONFIG_NET_SCHED
+       __u32           tc_index;               /* traffic control index */
+#endif
+};
+
+#define SK_WMEM_MAX    65535
+#define SK_RMEM_MAX    65535
+
+#ifdef __KERNEL__
+/*
+ *     Handling routines are only of interest to the kernel
+ */
+#include <linux/slab.h>
+
+#include <asm/system.h>
+
+extern void                    __kfree_skb(struct sk_buff *skb);
+extern struct sk_buff *                alloc_skb(unsigned int size, int priority);
+extern void                    kfree_skbmem(struct sk_buff *skb);
+extern struct sk_buff *                skb_clone(struct sk_buff *skb, int priority);
+extern struct sk_buff *                skb_copy(const struct sk_buff *skb, int priority);
+extern struct sk_buff *                pskb_copy(struct sk_buff *skb, int gfp_mask);
+extern int                     pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, int gfp_mask);
+extern struct sk_buff *                skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom);
+extern struct sk_buff *                skb_copy_expand(const struct sk_buff *skb, 
+                                               int newheadroom,
+                                               int newtailroom,
+                                               int priority);
+#define dev_kfree_skb(a)       kfree_skb(a)
+extern void    skb_over_panic(struct sk_buff *skb, int len, void *here);
+extern void    skb_under_panic(struct sk_buff *skb, int len, void *here);
+
+/* Internal */
+#define skb_shinfo(SKB)                ((struct skb_shared_info *)((SKB)->end))
+
+/**
+ *     skb_queue_empty - check if a queue is empty
+ *     @list: queue head
+ *
+ *     Returns true if the queue is empty, false otherwise.
+ */
+static inline int skb_queue_empty(struct sk_buff_head *list)
+{
+       return (list->next == (struct sk_buff *) list);
+}
+
+/**
+ *     skb_get - reference buffer
+ *     @skb: buffer to reference
+ *
+ *     Makes another reference to a socket buffer and returns a pointer
+ *     to the buffer.
+ */
+static inline struct sk_buff *skb_get(struct sk_buff *skb)
+{
+       atomic_inc(&skb->users);
+       return skb;
+}
+
+/*
+ * If users==1, we are the only owner and are can avoid redundant
+ * atomic change.
+ */
+/**
+ *     kfree_skb - free an sk_buff
+ *     @skb: buffer to free
+ *
+ *     Drop a reference to the buffer and free it if the usage count has
+ *     hit zero.
+ */
+static inline void kfree_skb(struct sk_buff *skb)
+{
+       if (atomic_read(&skb->users) == 1 || atomic_dec_and_test(&skb->users))
+               __kfree_skb(skb);
+}
+
+/* Use this if you didn't touch the skb state [for fast switching] */
+static inline void kfree_skb_fast(struct sk_buff *skb)
+{
+       if (atomic_read(&skb->users) == 1 || atomic_dec_and_test(&skb->users))
+               kfree_skbmem(skb);      
+}
+
+/**
+ *     skb_cloned - is the buffer a clone
+ *     @skb: buffer to check
+ *
+ *     Returns true if the buffer was generated with skb_clone() and is
+ *     one of multiple shared copies of the buffer. Cloned buffers are
+ *     shared data so must not be written to under normal circumstances.
+ */
+
+static inline int skb_cloned(struct sk_buff *skb)
+{
+       return skb->cloned && atomic_read(&skb_shinfo(skb)->dataref) != 1;
+}
+
+/**
+ *     skb_shared - is the buffer shared
+ *     @skb: buffer to check
+ *
+ *     Returns true if more than one person has a reference to this
+ *     buffer.
+ */
+static inline int skb_shared(struct sk_buff *skb)
+{
+       return (atomic_read(&skb->users) != 1);
+}
+
+/** 
+ *     skb_share_check - check if buffer is shared and if so clone it
+ *     @skb: buffer to check
+ *     @pri: priority for memory allocation
+ *     
+ *     If the buffer is shared the buffer is cloned and the old copy
+ *     drops a reference. A new clone with a single reference is returned.
+ *     If the buffer is not shared the original buffer is returned. When
+ *     being called from interrupt status or with spinlocks held pri must
+ *     be GFP_ATOMIC.
+ *
+ *     NULL is returned on a memory allocation failure.
+ */
+static inline struct sk_buff *skb_share_check(struct sk_buff *skb, int pri)
+{
+       if (skb_shared(skb)) {
+               struct sk_buff *nskb;
+               nskb = skb_clone(skb, pri);
+               kfree_skb(skb);
+               return nskb;
+       }
+       return skb;
+}
+
+
+/*
+ *     Copy shared buffers into a new sk_buff. We effectively do COW on
+ *     packets to handle cases where we have a local reader and forward
+ *     and a couple of other messy ones. The normal one is tcpdumping
+ *     a packet thats being forwarded.
+ */
+/**
+ *     skb_unshare - make a copy of a shared buffer
+ *     @skb: buffer to check
+ *     @pri: priority for memory allocation
+ *
+ *     If the socket buffer is a clone then this function creates a new
+ *     copy of the data, drops a reference count on the old copy and returns
+ *     the new copy with the reference count at 1. If the buffer is not a clone
+ *     the original buffer is returned. When called with a spinlock held or
+ *     from interrupt state @pri must be %GFP_ATOMIC
+ *
+ *     %NULL is returned on a memory allocation failure.
+ */
+static inline struct sk_buff *skb_unshare(struct sk_buff *skb, int pri)
+{
+       struct sk_buff *nskb;
+       if(!skb_cloned(skb))
+               return skb;
+       nskb=skb_copy(skb, pri);
+       kfree_skb(skb);         /* Free our shared copy */
+       return nskb;
+}
+
+/**
+ *     skb_peek
+ *     @list_: list to peek at
+ *
+ *     Peek an &sk_buff. Unlike most other operations you _MUST_
+ *     be careful with this one. A peek leaves the buffer on the
+ *     list and someone else may run off with it. You must hold
+ *     the appropriate locks or have a private queue to do this.
+ *
+ *     Returns %NULL for an empty list or a pointer to the head element.
+ *     The reference count is not incremented and the reference is therefore
+ *     volatile. Use with caution.
+ */
+static inline struct sk_buff *skb_peek(struct sk_buff_head *list_)
+{
+       struct sk_buff *list = ((struct sk_buff *)list_)->next;
+       if (list == (struct sk_buff *)list_)
+               list = NULL;
+       return list;
+}
+
+/**
+ *     skb_peek_tail
+ *     @list_: list to peek at
+ *
+ *     Peek an &sk_buff. Unlike most other operations you _MUST_
+ *     be careful with this one. A peek leaves the buffer on the
+ *     list and someone else may run off with it. You must hold
+ *     the appropriate locks or have a private queue to do this.
+ *
+ *     Returns %NULL for an empty list or a pointer to the tail element.
+ *     The reference count is not incremented and the reference is therefore
+ *     volatile. Use with caution.
+ */
+
+static inline struct sk_buff *skb_peek_tail(struct sk_buff_head *list_)
+{
+       struct sk_buff *list = ((struct sk_buff *)list_)->prev;
+       if (list == (struct sk_buff *)list_)
+               list = NULL;
+       return list;
+}
+
+/**
+ *     skb_queue_len   - get queue length
+ *     @list_: list to measure
+ *
+ *     Return the length of an &sk_buff queue. 
+ */
+static inline __u32 skb_queue_len(struct sk_buff_head *list_)
+{
+       return(list_->qlen);
+}
+
+static inline void skb_queue_head_init(struct sk_buff_head *list)
+{
+       spin_lock_init(&list->lock);
+       list->prev = (struct sk_buff *)list;
+       list->next = (struct sk_buff *)list;
+       list->qlen = 0;
+}
+
+/*
+ *     Insert an sk_buff at the start of a list.
+ *
+ *     The "__skb_xxxx()" functions are the non-atomic ones that
+ *     can only be called with interrupts disabled.
+ */
+
+/**
+ *     __skb_queue_head - queue a buffer at the list head
+ *     @list: list to use
+ *     @newsk: buffer to queue
+ *
+ *     Queue a buffer at the start of a list. This function takes no locks
+ *     and you must therefore hold required locks before calling it.
+ *
+ *     A buffer cannot be placed on two lists at the same time.
+ */    
+static inline void __skb_queue_head(struct sk_buff_head *list, struct sk_buff *newsk)
+{
+       struct sk_buff *prev, *next;
+
+       newsk->list = list;
+       list->qlen++;
+       prev = (struct sk_buff *)list;
+       next = prev->next;
+       newsk->next = next;
+       newsk->prev = prev;
+       next->prev = newsk;
+       prev->next = newsk;
+}
+
+
+/**
+ *     skb_queue_head - queue a buffer at the list head
+ *     @list: list to use
+ *     @newsk: buffer to queue
+ *
+ *     Queue a buffer at the start of the list. This function takes the
+ *     list lock and can be used safely with other locking &sk_buff functions
+ *     safely.
+ *
+ *     A buffer cannot be placed on two lists at the same time.
+ */    
+
+static inline void skb_queue_head(struct sk_buff_head *list, struct sk_buff *newsk)
+{
+       unsigned long flags;
+
+       spin_lock_irqsave(&list->lock, flags);
+       __skb_queue_head(list, newsk);
+       spin_unlock_irqrestore(&list->lock, flags);
+}
+
+/**
+ *     __skb_queue_tail - queue a buffer at the list tail
+ *     @list: list to use
+ *     @newsk: buffer to queue
+ *
+ *     Queue a buffer at the end of a list. This function takes no locks
+ *     and you must therefore hold required locks before calling it.
+ *
+ *     A buffer cannot be placed on two lists at the same time.
+ */    
+
+static inline void __skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk)
+{
+       struct sk_buff *prev, *next;
+
+       newsk->list = list;
+       list->qlen++;
+       next = (struct sk_buff *)list;
+       prev = next->prev;
+       newsk->next = next;
+       newsk->prev = prev;
+       next->prev = newsk;
+       prev->next = newsk;
+}
+
+/**
+ *     skb_queue_tail - queue a buffer at the list tail
+ *     @list: list to use
+ *     @newsk: buffer to queue
+ *
+ *     Queue a buffer at the tail of the list. This function takes the
+ *     list lock and can be used safely with other locking &sk_buff functions
+ *     safely.
+ *
+ *     A buffer cannot be placed on two lists at the same time.
+ */    
+
+static inline void skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk)
+{
+       unsigned long flags;
+
+       spin_lock_irqsave(&list->lock, flags);
+       __skb_queue_tail(list, newsk);
+       spin_unlock_irqrestore(&list->lock, flags);
+}
+
+/**
+ *     __skb_dequeue - remove from the head of the queue
+ *     @list: list to dequeue from
+ *
+ *     Remove the head of the list. This function does not take any locks
+ *     so must be used with appropriate locks held only. The head item is
+ *     returned or %NULL if the list is empty.
+ */
+
+static inline struct sk_buff *__skb_dequeue(struct sk_buff_head *list)
+{
+       struct sk_buff *next, *prev, *result;
+
+       prev = (struct sk_buff *) list;
+       next = prev->next;
+       result = NULL;
+       if (next != prev) {
+               result = next;
+               next = next->next;
+               list->qlen--;
+               next->prev = prev;
+               prev->next = next;
+               result->next = NULL;
+               result->prev = NULL;
+               result->list = NULL;
+       }
+       return result;
+}
+
+/**
+ *     skb_dequeue - remove from the head of the queue
+ *     @list: list to dequeue from
+ *
+ *     Remove the head of the list. The list lock is taken so the function
+ *     may be used safely with other locking list functions. The head item is
+ *     returned or %NULL if the list is empty.
+ */
+
+static inline struct sk_buff *skb_dequeue(struct sk_buff_head *list)
+{
+       long flags;
+       struct sk_buff *result;
+
+       spin_lock_irqsave(&list->lock, flags);
+       result = __skb_dequeue(list);
+       spin_unlock_irqrestore(&list->lock, flags);
+       return result;
+}
+
+/*
+ *     Insert a packet on a list.
+ */
+
+static inline void __skb_insert(struct sk_buff *newsk,
+       struct sk_buff * prev, struct sk_buff *next,
+       struct sk_buff_head * list)
+{
+       newsk->next = next;
+       newsk->prev = prev;
+       next->prev = newsk;
+       prev->next = newsk;
+       newsk->list = list;
+       list->qlen++;
+}
+
+/**
+ *     skb_insert      -       insert a buffer
+ *     @old: buffer to insert before
+ *     @newsk: buffer to insert
+ *
+ *     Place a packet before a given packet in a list. The list locks are taken
+ *     and this function is atomic with respect to other list locked calls
+ *     A buffer cannot be placed on two lists at the same time.
+ */
+
+static inline void skb_insert(struct sk_buff *old, struct sk_buff *newsk)
+{
+       unsigned long flags;
+
+       spin_lock_irqsave(&old->list->lock, flags);
+       __skb_insert(newsk, old->prev, old, old->list);
+       spin_unlock_irqrestore(&old->list->lock, flags);
+}
+
+/*
+ *     Place a packet after a given packet in a list.
+ */
+
+static inline void __skb_append(struct sk_buff *old, struct sk_buff *newsk)
+{
+       __skb_insert(newsk, old, old->next, old->list);
+}
+
+/**
+ *     skb_append      -       append a buffer
+ *     @old: buffer to insert after
+ *     @newsk: buffer to insert
+ *
+ *     Place a packet after a given packet in a list. The list locks are taken
+ *     and this function is atomic with respect to other list locked calls.
+ *     A buffer cannot be placed on two lists at the same time.
+ */
+
+
+static inline void skb_append(struct sk_buff *old, struct sk_buff *newsk)
+{
+       unsigned long flags;
+
+       spin_lock_irqsave(&old->list->lock, flags);
+       __skb_append(old, newsk);
+       spin_unlock_irqrestore(&old->list->lock, flags);
+}
+
+/*
+ * remove sk_buff from list. _Must_ be called atomically, and with
+ * the list known..
+ */
+static inline void __skb_unlink(struct sk_buff *skb, struct sk_buff_head *list)
+{
+       struct sk_buff * next, * prev;
+
+       list->qlen--;
+       next = skb->next;
+       prev = skb->prev;
+       skb->next = NULL;
+       skb->prev = NULL;
+       skb->list = NULL;
+       next->prev = prev;
+       prev->next = next;
+}
+
+/**
+ *     skb_unlink      -       remove a buffer from a list
+ *     @skb: buffer to remove
+ *
+ *     Place a packet after a given packet in a list. The list locks are taken
+ *     and this function is atomic with respect to other list locked calls
+ *     
+ *     Works even without knowing the list it is sitting on, which can be 
+ *     handy at times. It also means that THE LIST MUST EXIST when you 
+ *     unlink. Thus a list must have its contents unlinked before it is
+ *     destroyed.
+ */
+
+static inline void skb_unlink(struct sk_buff *skb)
+{
+       struct sk_buff_head *list = skb->list;
+
+       if(list) {
+               unsigned long flags;
+
+               spin_lock_irqsave(&list->lock, flags);
+               if(skb->list == list)
+                       __skb_unlink(skb, skb->list);
+               spin_unlock_irqrestore(&list->lock, flags);
+       }
+}
+
+/* XXX: more streamlined implementation */
+
+/**
+ *     __skb_dequeue_tail - remove from the tail of the queue
+ *     @list: list to dequeue from
+ *
+ *     Remove the tail of the list. This function does not take any locks
+ *     so must be used with appropriate locks held only. The tail item is
+ *     returned or %NULL if the list is empty.
+ */
+
+static inline struct sk_buff *__skb_dequeue_tail(struct sk_buff_head *list)
+{
+       struct sk_buff *skb = skb_peek_tail(list); 
+       if (skb)
+               __skb_unlink(skb, list);
+       return skb;
+}
+
+/**
+ *     skb_dequeue - remove from the head of the queue
+ *     @list: list to dequeue from
+ *
+ *     Remove the head of the list. The list lock is taken so the function
+ *     may be used safely with other locking list functions. The tail item is
+ *     returned or %NULL if the list is empty.
+ */
+
+static inline struct sk_buff *skb_dequeue_tail(struct sk_buff_head *list)
+{
+       long flags;
+       struct sk_buff *result;
+
+       spin_lock_irqsave(&list->lock, flags);
+       result = __skb_dequeue_tail(list);
+       spin_unlock_irqrestore(&list->lock, flags);
+       return result;
+}
+
+static inline int skb_is_nonlinear(const struct sk_buff *skb)
+{
+       return skb->data_len;
+}
+
+static inline int skb_headlen(const struct sk_buff *skb)
+{
+       return skb->len - skb->data_len;
+}
+
+#define SKB_PAGE_ASSERT(skb) do { if (skb_shinfo(skb)->nr_frags) BUG(); } while (0)
+#define SKB_FRAG_ASSERT(skb) do { if (skb_shinfo(skb)->frag_list) BUG(); } while (0)
+#define SKB_LINEAR_ASSERT(skb) do { if (skb_is_nonlinear(skb)) BUG(); } while (0)
+
+/*
+ *     Add data to an sk_buff
+ */
+static inline unsigned char *__skb_put(struct sk_buff *skb, unsigned int len)
+{
+       unsigned char *tmp=skb->tail;
+       SKB_LINEAR_ASSERT(skb);
+       skb->tail+=len;
+       skb->len+=len;
+       return tmp;
+}
+
+/**
+ *     skb_put - add data to a buffer
+ *     @skb: buffer to use 
+ *     @len: amount of data to add
+ *
+ *     This function extends the used data area of the buffer. If this would
+ *     exceed the total buffer size the kernel will panic. A pointer to the
+ *     first byte of the extra data is returned.
+ */
+static inline unsigned char *skb_put(struct sk_buff *skb, unsigned int len)
+{
+       unsigned char *tmp=skb->tail;
+       SKB_LINEAR_ASSERT(skb);
+       skb->tail+=len;
+       skb->len+=len;
+       if(skb->tail>skb->end) {
+               skb_over_panic(skb, len, current_text_addr());
+       }
+       return tmp;
+}
+
+static inline unsigned char *__skb_push(struct sk_buff *skb, unsigned int len)
+{
+       skb->data-=len;
+       skb->len+=len;
+       return skb->data;
+}
+
+/**
+ *     skb_push - add data to the start of a buffer
+ *     @skb: buffer to use 
+ *     @len: amount of data to add
+ *
+ *     This function extends the used data area of the buffer at the buffer
+ *     start. If this would exceed the total buffer headroom the kernel will
+ *     panic. A pointer to the first byte of the extra data is returned.
+ */
+
+static inline unsigned char *skb_push(struct sk_buff *skb, unsigned int len)
+{
+       skb->data-=len;
+       skb->len+=len;
+       if(skb->data<skb->head) {
+               skb_under_panic(skb, len, current_text_addr());
+       }
+       return skb->data;
+}
+
+static inline char *__skb_pull(struct sk_buff *skb, unsigned int len)
+{
+       skb->len-=len;
+       if (skb->len < skb->data_len)
+               BUG();
+       return  skb->data+=len;
+}
+
+/**
+ *     skb_pull - remove data from the start of a buffer
+ *     @skb: buffer to use 
+ *     @len: amount of data to remove
+ *
+ *     This function removes data from the start of a buffer, returning
+ *     the memory to the headroom. A pointer to the next data in the buffer
+ *     is returned. Once the data has been pulled future pushes will overwrite
+ *     the old data.
+ */
+
+static inline unsigned char * skb_pull(struct sk_buff *skb, unsigned int len)
+{      
+       if (len > skb->len)
+               return NULL;
+       return __skb_pull(skb,len);
+}
+
+extern unsigned char * __pskb_pull_tail(struct sk_buff *skb, int delta);
+
+static inline char *__pskb_pull(struct sk_buff *skb, unsigned int len)
+{
+       if (len > skb_headlen(skb) &&
+           __pskb_pull_tail(skb, len-skb_headlen(skb)) == NULL)
+               return NULL;
+       skb->len -= len;
+       return  skb->data += len;
+}
+
+static inline unsigned char * pskb_pull(struct sk_buff *skb, unsigned int len)
+{      
+       if (len > skb->len)
+               return NULL;
+       return __pskb_pull(skb,len);
+}
+
+static inline int pskb_may_pull(struct sk_buff *skb, unsigned int len)
+{
+       if (len <= skb_headlen(skb))
+               return 1;
+       if (len > skb->len)
+               return 0;
+       return (__pskb_pull_tail(skb, len-skb_headlen(skb)) != NULL);
+}
+
+/**
+ *     skb_headroom - bytes at buffer head
+ *     @skb: buffer to check
+ *
+ *     Return the number of bytes of free space at the head of an &sk_buff.
+ */
+static inline int skb_headroom(const struct sk_buff *skb)
+{
+       return skb->data-skb->head;
+}
+
+/**
+ *     skb_tailroom - bytes at buffer end
+ *     @skb: buffer to check
+ *
+ *     Return the number of bytes of free space at the tail of an sk_buff
+ */
+
+static inline int skb_tailroom(const struct sk_buff *skb)
+{
+       return skb_is_nonlinear(skb) ? 0 : skb->end-skb->tail;
+}
+
+/**
+ *     skb_reserve - adjust headroom
+ *     @skb: buffer to alter
+ *     @len: bytes to move
+ *
+ *     Increase the headroom of an empty &sk_buff by reducing the tail
+ *     room. This is only allowed for an empty buffer.
+ */
+
+static inline void skb_reserve(struct sk_buff *skb, unsigned int len)
+{
+       skb->data+=len;
+       skb->tail+=len;
+}
+
+extern int ___pskb_trim(struct sk_buff *skb, unsigned int len, int realloc);
+
+static inline void __skb_trim(struct sk_buff *skb, unsigned int len)
+{
+       if (!skb->data_len) {
+               skb->len = len;
+               skb->tail = skb->data+len;
+       } else {
+               ___pskb_trim(skb, len, 0);
+       }
+}
+
+/**
+ *     skb_trim - remove end from a buffer
+ *     @skb: buffer to alter
+ *     @len: new length
+ *
+ *     Cut the length of a buffer down by removing data from the tail. If
+ *     the buffer is already under the length specified it is not modified.
+ */
+
+static inline void skb_trim(struct sk_buff *skb, unsigned int len)
+{
+       if (skb->len > len) {
+               __skb_trim(skb, len);
+       }
+}
+
+
+static inline int __pskb_trim(struct sk_buff *skb, unsigned int len)
+{
+       if (!skb->data_len) {
+               skb->len = len;
+               skb->tail = skb->data+len;
+               return 0;
+       } else {
+               return ___pskb_trim(skb, len, 1);
+       }
+}
+
+static inline int pskb_trim(struct sk_buff *skb, unsigned int len)
+{
+       if (len < skb->len)
+               return __pskb_trim(skb, len);
+       return 0;
+}
+
+/**
+ *     skb_orphan - orphan a buffer
+ *     @skb: buffer to orphan
+ *
+ *     If a buffer currently has an owner then we call the owner's
+ *     destructor function and make the @skb unowned. The buffer continues
+ *     to exist but is no longer charged to its former owner.
+ */
+
+
+static inline void skb_orphan(struct sk_buff *skb)
+{
+       if (skb->destructor)
+               skb->destructor(skb);
+       skb->destructor = NULL;
+       skb->sk = NULL;
+}
+
+/**
+ *     skb_purge - empty a list
+ *     @list: list to empty
+ *
+ *     Delete all buffers on an &sk_buff list. Each buffer is removed from
+ *     the list and one reference dropped. This function takes the list
+ *     lock and is atomic with respect to other list locking functions.
+ */
+
+
+static inline void skb_queue_purge(struct sk_buff_head *list)
+{
+       struct sk_buff *skb;
+       while ((skb=skb_dequeue(list))!=NULL)
+               kfree_skb(skb);
+}
+
+/**
+ *     __skb_purge - empty a list
+ *     @list: list to empty
+ *
+ *     Delete all buffers on an &sk_buff list. Each buffer is removed from
+ *     the list and one reference dropped. This function does not take the
+ *     list lock and the caller must hold the relevant locks to use it.
+ */
+
+
+static inline void __skb_queue_purge(struct sk_buff_head *list)
+{
+       struct sk_buff *skb;
+       while ((skb=__skb_dequeue(list))!=NULL)
+               kfree_skb(skb);
+}
+
+/**
+ *     __dev_alloc_skb - allocate an skbuff for sending
+ *     @length: length to allocate
+ *     @gfp_mask: get_free_pages mask, passed to alloc_skb
+ *
+ *     Allocate a new &sk_buff and assign it a usage count of one. The
+ *     buffer has unspecified headroom built in. Users should allocate
+ *     the headroom they think they need without accounting for the
+ *     built in space. The built in space is used for optimisations.
+ *
+ *     %NULL is returned in there is no free memory.
+ */
+static inline struct sk_buff *__dev_alloc_skb(unsigned int length,
+                                             int gfp_mask)
+{
+       struct sk_buff *skb;
+
+       skb = alloc_skb(length+16, gfp_mask);
+       if (skb)
+               skb_reserve(skb,16);
+       return skb;
+}
+
+/**
+ *     dev_alloc_skb - allocate an skbuff for sending
+ *     @length: length to allocate
+ *
+ *     Allocate a new &sk_buff and assign it a usage count of one. The
+ *     buffer has unspecified headroom built in. Users should allocate
+ *     the headroom they think they need without accounting for the
+ *     built in space. The built in space is used for optimisations.
+ *
+ *     %NULL is returned in there is no free memory. Although this function
+ *     allocates memory it can be called from an interrupt.
+ */
+static inline struct sk_buff *dev_alloc_skb(unsigned int length)
+{
+       return __dev_alloc_skb(length, GFP_ATOMIC);
+}
+
+/**
+ *     skb_cow - copy header of skb when it is required
+ *     @skb: buffer to cow
+ *     @headroom: needed headroom
+ *
+ *     If the skb passed lacks sufficient headroom or its data part
+ *     is shared, data is reallocated. If reallocation fails, an error
+ *     is returned and original skb is not changed.
+ *
+ *     The result is skb with writable area skb->head...skb->tail
+ *     and at least @headroom of space at head.
+ */
+
+static inline int
+skb_cow(struct sk_buff *skb, unsigned int headroom)
+{
+       int delta = (headroom > 16 ? headroom : 16) - skb_headroom(skb);
+
+       if (delta < 0)
+               delta = 0;
+
+       if (delta || skb_cloned(skb))
+               return pskb_expand_head(skb, (delta+15)&~15, 0, GFP_ATOMIC);
+       return 0;
+}
+
+/**
+ *     skb_linearize - convert paged skb to linear one
+ *     @skb: buffer to linarize
+ *     @gfp: allocation mode
+ *
+ *     If there is no free memory -ENOMEM is returned, otherwise zero
+ *     is returned and the old skb data released.  */
+int skb_linearize(struct sk_buff *skb, int gfp);
+
+static inline void *kmap_skb_frag(const skb_frag_t *frag)
+{
+       return page_address(frag->page);
+}
+
+static inline void kunmap_skb_frag(void *vaddr)
+{
+}
+
+#define skb_queue_walk(queue, skb) \
+               for (skb = (queue)->next;                       \
+                    (skb != (struct sk_buff *)(queue));        \
+                    skb=skb->next)
+
+
+extern struct sk_buff *                skb_recv_datagram(struct sock *sk,unsigned flags,int noblock, int *err);
+extern int                     skb_copy_datagram(const struct sk_buff *from, int offset, char *to,int size);
+extern int                     skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset, u8 *to, int len, unsigned int *csump);
+extern void                    skb_free_datagram(struct sock * sk, struct sk_buff *skb);
+
+extern unsigned int            skb_checksum(const struct sk_buff *skb, int offset, int len, unsigned int csum);
+extern int                     skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len);
+extern unsigned int            skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, u8 *to, int len, unsigned int csum);
+extern void                    skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to);
+
+extern void skb_init(void);
+extern void skb_add_mtu(int mtu);
+
+#ifdef CONFIG_NETFILTER
+static inline void
+nf_conntrack_put(struct nf_ct_info *nfct)
+{
+       if (nfct && atomic_dec_and_test(&nfct->master->use))
+               nfct->master->destroy(nfct->master);
+}
+static inline void
+nf_conntrack_get(struct nf_ct_info *nfct)
+{
+       if (nfct)
+               atomic_inc(&nfct->master->use);
+}
+#endif
+
+#endif /* __KERNEL__ */
+#endif /* _LINUX_SKBUFF_H */
diff --git a/xen-2.4.16/include/xeno/slab.h b/xen-2.4.16/include/xeno/slab.h
new file mode 100644 (file)
index 0000000..c7aadff
--- /dev/null
@@ -0,0 +1,81 @@
+/*
+ * linux/mm/slab.h
+ * Written by Mark Hemment, 1996.
+ * (markhe@nextd.demon.co.uk)
+ */
+
+#if    !defined(_LINUX_SLAB_H)
+#define        _LINUX_SLAB_H
+
+typedef struct kmem_cache_s kmem_cache_t;
+
+#include       <xeno/mm.h>
+#include       <xeno/cache.h>
+
+/* flags for kmem_cache_alloc() */
+#define        SLAB_NOFS               GFP_NOFS
+#define        SLAB_NOIO               GFP_NOIO
+#define SLAB_NOHIGHIO          GFP_NOHIGHIO
+#define        SLAB_ATOMIC             GFP_ATOMIC
+#define        SLAB_USER               GFP_USER
+#define        SLAB_KERNEL             GFP_KERNEL
+#define        SLAB_NFS                GFP_NFS
+#define        SLAB_DMA                GFP_DMA
+
+#define SLAB_LEVEL_MASK                (__GFP_WAIT|__GFP_HIGH|__GFP_IO|__GFP_HIGHIO|__GFP_FS)
+#define        SLAB_NO_GROW            0x00001000UL    /* don't grow a cache */
+
+/* flags to pass to kmem_cache_create().
+ * The first 3 are only valid when the allocator as been build
+ * SLAB_DEBUG_SUPPORT.
+ */
+#define        SLAB_DEBUG_FREE         0x00000100UL    /* Peform (expensive) checks on free */
+#define        SLAB_DEBUG_INITIAL      0x00000200UL    /* Call constructor (as verifier) */
+#define        SLAB_RED_ZONE           0x00000400UL    /* Red zone objs in a cache */
+#define        SLAB_POISON             0x00000800UL    /* Poison objects */
+#define        SLAB_NO_REAP            0x00001000UL    /* never reap from the cache */
+#define        SLAB_HWCACHE_ALIGN      0x00002000UL    /* align objs on a h/w cache lines */
+#define SLAB_CACHE_DMA         0x00004000UL    /* use GFP_DMA memory */
+
+/* flags passed to a constructor func */
+#define        SLAB_CTOR_CONSTRUCTOR   0x001UL         /* if not set, then deconstructor */
+#define SLAB_CTOR_ATOMIC       0x002UL         /* tell constructor it can't sleep */
+#define        SLAB_CTOR_VERIFY        0x004UL         /* tell constructor it's a verify call */
+
+/* prototypes */
+extern void kmem_cache_init(void);
+extern void kmem_cache_sizes_init(unsigned long);
+
+extern kmem_cache_t *kmem_find_general_cachep(size_t, int gfpflags);
+extern kmem_cache_t *kmem_cache_create(const char *, size_t, size_t, unsigned long,
+                                      void (*)(void *, kmem_cache_t *, unsigned long),
+                                      void (*)(void *, kmem_cache_t *, unsigned long));
+extern int kmem_cache_destroy(kmem_cache_t *);
+extern int kmem_cache_shrink(kmem_cache_t *);
+extern void *kmem_cache_alloc(kmem_cache_t *, int);
+extern void kmem_cache_free(kmem_cache_t *, void *);
+
+extern void *kmalloc(size_t, int);
+extern void kfree(const void *);
+
+extern int FASTCALL(kmem_cache_reap(int));
+#if 0
+extern int slabinfo_read_proc(char *page, char **start, off_t off,
+                                int count, int *eof, void *data);
+extern int slabinfo_write_proc(struct file *file, const char *buffer,
+                          unsigned long count, void *data);
+#endif
+
+/* System wide caches */
+extern kmem_cache_t    *vm_area_cachep;
+extern kmem_cache_t    *mm_cachep;
+extern kmem_cache_t    *names_cachep;
+extern kmem_cache_t    *files_cachep;
+extern kmem_cache_t    *filp_cachep;
+extern kmem_cache_t    *dquot_cachep;
+extern kmem_cache_t    *bh_cachep;
+extern kmem_cache_t    *fs_cachep;
+extern kmem_cache_t    *sigact_cachep;
+
+
+#endif /* _LINUX_SLAB_H */
diff --git a/xen-2.4.16/include/xeno/smp.h b/xen-2.4.16/include/xeno/smp.h
new file mode 100644 (file)
index 0000000..7860266
--- /dev/null
@@ -0,0 +1,88 @@
+#ifndef __LINUX_SMP_H
+#define __LINUX_SMP_H
+
+/*
+ *     Generic SMP support
+ *             Alan Cox. <alan@redhat.com>
+ */
+
+#include <xeno/config.h>
+
+#ifdef CONFIG_SMP
+
+#include <asm/smp.h>
+
+/*
+ * main cross-CPU interfaces, handles INIT, TLB flush, STOP, etc.
+ * (defined in asm header):
+ */ 
+
+/*
+ * stops all CPUs but the current one:
+ */
+extern void smp_send_stop(void);
+
+extern void FASTCALL(smp_send_event_check_mask(unsigned long cpu_mask));
+#define smp_send_event_check_cpu(_cpu) smp_send_event_check_mask(1<<(_cpu))
+
+
+/*
+ * Boot processor call to load the other CPU's
+ */
+extern void smp_boot_cpus(void);
+
+/*
+ * Processor call in. Must hold processors until ..
+ */
+extern void smp_callin(void);
+
+/*
+ * Multiprocessors may now schedule
+ */
+extern void smp_commence(void);
+
+/*
+ * Call a function on all other processors
+ */
+extern int smp_call_function (void (*func) (void *info), void *info,
+                             int retry, int wait);
+
+/*
+ * True once the per process idle is forked
+ */
+extern int smp_threads_ready;
+
+extern int smp_num_cpus;
+
+extern volatile unsigned long smp_msg_data;
+extern volatile int smp_src_cpu;
+extern volatile int smp_msg_id;
+
+#define MSG_ALL_BUT_SELF       0x8000  /* Assume <32768 CPU's */
+#define MSG_ALL                        0x8001
+
+#define MSG_INVALIDATE_TLB     0x0001  /* Remote processor TLB invalidate */
+#define MSG_STOP_CPU           0x0002  /* Sent to shut down slave CPU's
+                                        * when rebooting
+                                        */
+#define MSG_RESCHEDULE         0x0003  /* Reschedule request from master CPU*/
+#define MSG_CALL_FUNCTION       0x0004  /* Call function on all other CPUs */
+
+#else
+
+/*
+ *     These macros fold the SMP functionality into a single CPU system
+ */
+#define smp_num_cpus                           1
+#define smp_processor_id()                     0
+#define hard_smp_processor_id()                        0
+#define smp_threads_ready                      1
+#define kernel_lock()
+#define cpu_logical_map(cpu)                   0
+#define cpu_number_map(cpu)                    0
+#define smp_call_function(func,info,retry,wait)        ({ 0; })
+#define cpu_online_map                         1
+
+#endif
+#endif
diff --git a/xen-2.4.16/include/xeno/socket.h b/xen-2.4.16/include/xeno/socket.h
new file mode 100644 (file)
index 0000000..92e7b93
--- /dev/null
@@ -0,0 +1,136 @@
+#ifndef _LINUX_SOCKET_H
+#define _LINUX_SOCKET_H
+
+#if defined(__KERNEL__) || !defined(__GLIBC__) || (__GLIBC__ < 2)
+
+//#include <asm/socket.h>                      /* arch-dependent defines       */
+#include <linux/sockios.h>             /* the SIOCxxx I/O controls     */
+//#include <linux/uio.h>                       /* iovec support                */
+#include <linux/types.h>               /* pid_t                        */
+
+typedef unsigned short sa_family_t;
+
+/*
+ *     1003.1g requires sa_family_t and that sa_data is char.
+ */
+struct sockaddr {
+       sa_family_t     sa_family;      /* address family, AF_xxx       */
+       char            sa_data[14];    /* 14 bytes of protocol address */
+};
+
+/* Supported address families. */
+#define AF_UNSPEC      0
+#define AF_UNIX                1       /* Unix domain sockets          */
+#define AF_LOCAL       1       /* POSIX name for AF_UNIX       */
+#define AF_INET                2       /* Internet IP Protocol         */
+#define AF_AX25                3       /* Amateur Radio AX.25          */
+#define AF_IPX         4       /* Novell IPX                   */
+#define AF_APPLETALK   5       /* AppleTalk DDP                */
+#define AF_NETROM      6       /* Amateur Radio NET/ROM        */
+#define AF_BRIDGE      7       /* Multiprotocol bridge         */
+#define AF_ATMPVC      8       /* ATM PVCs                     */
+#define AF_X25         9       /* Reserved for X.25 project    */
+#define AF_INET6       10      /* IP version 6                 */
+#define AF_ROSE                11      /* Amateur Radio X.25 PLP       */
+#define AF_DECnet      12      /* Reserved for DECnet project  */
+#define AF_NETBEUI     13      /* Reserved for 802.2LLC project*/
+#define AF_SECURITY    14      /* Security callback pseudo AF */
+#define AF_KEY         15      /* PF_KEY key management API */
+#define AF_NETLINK     16
+#define AF_ROUTE       AF_NETLINK /* Alias to emulate 4.4BSD */
+#define AF_PACKET      17      /* Packet family                */
+#define AF_ASH         18      /* Ash                          */
+#define AF_ECONET      19      /* Acorn Econet                 */
+#define AF_ATMSVC      20      /* ATM SVCs                     */
+#define AF_SNA         22      /* Linux SNA Project (nutters!) */
+#define AF_IRDA                23      /* IRDA sockets                 */
+#define AF_PPPOX       24      /* PPPoX sockets                */
+#define AF_WANPIPE     25      /* Wanpipe API Sockets */
+#define AF_BLUETOOTH   31      /* Bluetooth sockets            */
+#define AF_MAX         32      /* For now.. */
+
+/* Protocol families, same as address families. */
+#define PF_UNSPEC      AF_UNSPEC
+#define PF_UNIX                AF_UNIX
+#define PF_LOCAL       AF_LOCAL
+#define PF_INET                AF_INET
+#define PF_AX25                AF_AX25
+#define PF_IPX         AF_IPX
+#define PF_APPLETALK   AF_APPLETALK
+#define        PF_NETROM       AF_NETROM
+#define PF_BRIDGE      AF_BRIDGE
+#define PF_ATMPVC      AF_ATMPVC
+#define PF_X25         AF_X25
+#define PF_INET6       AF_INET6
+#define PF_ROSE                AF_ROSE
+#define PF_DECnet      AF_DECnet
+#define PF_NETBEUI     AF_NETBEUI
+#define PF_SECURITY    AF_SECURITY
+#define PF_KEY         AF_KEY
+#define PF_NETLINK     AF_NETLINK
+#define PF_ROUTE       AF_ROUTE
+#define PF_PACKET      AF_PACKET
+#define PF_ASH         AF_ASH
+#define PF_ECONET      AF_ECONET
+#define PF_ATMSVC      AF_ATMSVC
+#define PF_SNA         AF_SNA
+#define PF_IRDA                AF_IRDA
+#define PF_PPPOX       AF_PPPOX
+#define PF_WANPIPE     AF_WANPIPE
+#define PF_BLUETOOTH   AF_BLUETOOTH
+#define PF_MAX         AF_MAX
+
+/* Maximum queue length specifiable by listen.  */
+#define SOMAXCONN      128
+
+/* Flags we can use with send/ and recv. 
+   Added those for 1003.1g not all are supported yet
+ */
+#define MSG_OOB                1
+#define MSG_PEEK       2
+#define MSG_DONTROUTE  4
+#define MSG_TRYHARD     4       /* Synonym for MSG_DONTROUTE for DECnet */
+#define MSG_CTRUNC     8
+#define MSG_PROBE      0x10    /* Do not send. Only probe path f.e. for MTU */
+#define MSG_TRUNC      0x20
+#define MSG_DONTWAIT   0x40    /* Nonblocking io                */
+#define MSG_EOR         0x80   /* End of record */
+#define MSG_WAITALL    0x100   /* Wait for a full request */
+#define MSG_FIN         0x200
+#define MSG_SYN                0x400
+#define MSG_CONFIRM    0x800   /* Confirm path validity */
+#define MSG_RST                0x1000
+#define MSG_ERRQUEUE   0x2000  /* Fetch message from error queue */
+#define MSG_NOSIGNAL   0x4000  /* Do not generate SIGPIPE */
+#define MSG_MORE       0x8000  /* Sender will send more */
+
+#define MSG_EOF         MSG_FIN
+
+
+/* Setsockoptions(2) level. Thanks to BSD these must match IPPROTO_xxx */
+#define SOL_IP         0
+/* #define SOL_ICMP    1       No-no-no! Due to Linux :-) we cannot use SOL_ICMP=1 */
+#define SOL_TCP                6
+#define SOL_UDP                17
+#define SOL_IPV6       41
+#define SOL_ICMPV6     58
+#define SOL_RAW                255
+#define SOL_IPX                256
+#define SOL_AX25       257
+#define SOL_ATALK      258
+#define SOL_NETROM     259
+#define SOL_ROSE       260
+#define SOL_DECNET     261
+#define        SOL_X25         262
+#define SOL_PACKET     263
+#define SOL_ATM                264     /* ATM layer (cell level) */
+#define SOL_AAL                265     /* ATM Adaption Layer (packet level) */
+#define SOL_IRDA        266
+
+/* IPX options */
+#define IPX_TYPE       1
+
+#endif /* not kernel and not glibc */
+#endif /* _LINUX_SOCKET_H */
diff --git a/xen-2.4.16/include/xeno/sockios.h b/xen-2.4.16/include/xeno/sockios.h
new file mode 100644 (file)
index 0000000..81f99a1
--- /dev/null
@@ -0,0 +1,135 @@
+/*
+ * INET                An implementation of the TCP/IP protocol suite for the LINUX
+ *             operating system.  INET is implemented using the  BSD Socket
+ *             interface as the means of communication with the user level.
+ *
+ *             Definitions of the socket-level I/O control calls.
+ *
+ * Version:    @(#)sockios.h   1.0.2   03/09/93
+ *
+ * Authors:    Ross Biro, <bir7@leland.Stanford.Edu>
+ *             Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
+ *
+ *             This program is free software; you can redistribute it and/or
+ *             modify it under the terms of the GNU General Public License
+ *             as published by the Free Software Foundation; either version
+ *             2 of the License, or (at your option) any later version.
+ */
+#ifndef _LINUX_SOCKIOS_H
+#define _LINUX_SOCKIOS_H
+
+//#include <asm/sockios.h>
+
+/* Linux-specific socket ioctls */
+#define SIOCINQ                FIONREAD
+#define SIOCOUTQ       TIOCOUTQ
+
+/* Routing table calls. */
+#define SIOCADDRT      0x890B          /* add routing table entry      */
+#define SIOCDELRT      0x890C          /* delete routing table entry   */
+#define SIOCRTMSG      0x890D          /* call to routing system       */
+
+/* Socket configuration controls. */
+#define SIOCGIFNAME    0x8910          /* get iface name               */
+#define SIOCSIFLINK    0x8911          /* set iface channel            */
+#define SIOCGIFCONF    0x8912          /* get iface list               */
+#define SIOCGIFFLAGS   0x8913          /* get flags                    */
+#define SIOCSIFFLAGS   0x8914          /* set flags                    */
+#define SIOCGIFADDR    0x8915          /* get PA address               */
+#define SIOCSIFADDR    0x8916          /* set PA address               */
+#define SIOCGIFDSTADDR 0x8917          /* get remote PA address        */
+#define SIOCSIFDSTADDR 0x8918          /* set remote PA address        */
+#define SIOCGIFBRDADDR 0x8919          /* get broadcast PA address     */
+#define SIOCSIFBRDADDR 0x891a          /* set broadcast PA address     */
+#define SIOCGIFNETMASK 0x891b          /* get network PA mask          */
+#define SIOCSIFNETMASK 0x891c          /* set network PA mask          */
+#define SIOCGIFMETRIC  0x891d          /* get metric                   */
+#define SIOCSIFMETRIC  0x891e          /* set metric                   */
+#define SIOCGIFMEM     0x891f          /* get memory address (BSD)     */
+#define SIOCSIFMEM     0x8920          /* set memory address (BSD)     */
+#define SIOCGIFMTU     0x8921          /* get MTU size                 */
+#define SIOCSIFMTU     0x8922          /* set MTU size                 */
+#define SIOCSIFNAME    0x8923          /* set interface name */
+#define        SIOCSIFHWADDR   0x8924          /* set hardware address         */
+#define SIOCGIFENCAP   0x8925          /* get/set encapsulations       */
+#define SIOCSIFENCAP   0x8926          
+#define SIOCGIFHWADDR  0x8927          /* Get hardware address         */
+#define SIOCGIFSLAVE   0x8929          /* Driver slaving support       */
+#define SIOCSIFSLAVE   0x8930
+#define SIOCADDMULTI   0x8931          /* Multicast address lists      */
+#define SIOCDELMULTI   0x8932
+#define SIOCGIFINDEX   0x8933          /* name -> if_index mapping     */
+#define SIOGIFINDEX    SIOCGIFINDEX    /* misprint compatibility :-)   */
+#define SIOCSIFPFLAGS  0x8934          /* set/get extended flags set   */
+#define SIOCGIFPFLAGS  0x8935
+#define SIOCDIFADDR    0x8936          /* delete PA address            */
+#define        SIOCSIFHWBROADCAST      0x8937  /* set hardware broadcast addr  */
+#define SIOCGIFCOUNT   0x8938          /* get number of devices */
+
+#define SIOCGIFBR      0x8940          /* Bridging support             */
+#define SIOCSIFBR      0x8941          /* Set bridging options         */
+
+#define SIOCGIFTXQLEN  0x8942          /* Get the tx queue length      */
+#define SIOCSIFTXQLEN  0x8943          /* Set the tx queue length      */
+
+#define SIOCGIFDIVERT  0x8944          /* Frame diversion support */
+#define SIOCSIFDIVERT  0x8945          /* Set frame diversion options */
+
+#define SIOCETHTOOL    0x8946          /* Ethtool interface            */
+
+#define SIOCGMIIPHY    0x8947          /* Get address of MII PHY in use. */
+#define SIOCGMIIREG    0x8948          /* Read MII PHY register.       */
+#define SIOCSMIIREG    0x8949          /* Write MII PHY register.      */
+
+/* ARP cache control calls. */
+                   /*  0x8950 - 0x8952  * obsolete calls, don't re-use */
+#define SIOCDARP       0x8953          /* delete ARP table entry       */
+#define SIOCGARP       0x8954          /* get ARP table entry          */
+#define SIOCSARP       0x8955          /* set ARP table entry          */
+
+/* RARP cache control calls. */
+#define SIOCDRARP      0x8960          /* delete RARP table entry      */
+#define SIOCGRARP      0x8961          /* get RARP table entry         */
+#define SIOCSRARP      0x8962          /* set RARP table entry         */
+
+/* Driver configuration calls */
+
+#define SIOCGIFMAP     0x8970          /* Get device parameters        */
+#define SIOCSIFMAP     0x8971          /* Set device parameters        */
+
+/* DLCI configuration calls */
+
+#define SIOCADDDLCI    0x8980          /* Create new DLCI device       */
+#define SIOCDELDLCI    0x8981          /* Delete DLCI device           */
+
+#define SIOCGIFVLAN    0x8982          /* 802.1Q VLAN support          */
+#define SIOCSIFVLAN    0x8983          /* Set 802.1Q VLAN options      */
+
+/* bonding calls */
+
+#define SIOCBONDENSLAVE        0x8990          /* enslave a device to the bond */
+#define SIOCBONDRELEASE 0x8991         /* release a slave from the bond*/
+#define SIOCBONDSETHWADDR      0x8992  /* set the hw addr of the bond  */
+#define SIOCBONDSLAVEINFOQUERY 0x8993   /* rtn info about slave state   */
+#define SIOCBONDINFOQUERY      0x8994  /* rtn info about bond state    */
+#define SIOCBONDCHANGEACTIVE   0x8995   /* update to a new active slave */
+                       
+/* Device private ioctl calls */
+
+/*
+ *     These 16 ioctls are available to devices via the do_ioctl() device
+ *     vector. Each device should include this file and redefine these names
+ *     as their own. Because these are device dependent it is a good idea
+ *     _NOT_ to issue them to random objects and hope.
+ *
+ *     THESE IOCTLS ARE _DEPRECATED_ AND WILL DISAPPEAR IN 2.5.X -DaveM
+ */
+#define SIOCDEVPRIVATE 0x89F0  /* to 89FF */
+
+/*
+ *     These 16 ioctl calls are protocol private
+ */
+#define SIOCPROTOPRIVATE 0x89E0 /* to 89EF */
+#endif /* _LINUX_SOCKIOS_H */
diff --git a/xen-2.4.16/include/xeno/spinlock.h b/xen-2.4.16/include/xeno/spinlock.h
new file mode 100644 (file)
index 0000000..08f2eb6
--- /dev/null
@@ -0,0 +1,142 @@
+#ifndef __LINUX_SPINLOCK_H
+#define __LINUX_SPINLOCK_H
+
+#include <xeno/config.h>
+#include <asm/system.h>
+
+/*
+ * These are the generic versions of the spinlocks and read-write
+ * locks..
+ */
+#define spin_lock_irqsave(lock, flags)         do { local_irq_save(flags);       spin_lock(lock); } while (0)
+#define spin_lock_irq(lock)                    do { local_irq_disable();         spin_lock(lock); } while (0)
+#define spin_lock_bh(lock)                     do { local_bh_disable();          spin_lock(lock); } while (0)
+
+#define read_lock_irqsave(lock, flags)         do { local_irq_save(flags);       read_lock(lock); } while (0)
+#define read_lock_irq(lock)                    do { local_irq_disable();         read_lock(lock); } while (0)
+#define read_lock_bh(lock)                     do { local_bh_disable();          read_lock(lock); } while (0)
+
+#define write_lock_irqsave(lock, flags)                do { local_irq_save(flags);      write_lock(lock); } while (0)
+#define write_lock_irq(lock)                   do { local_irq_disable();        write_lock(lock); } while (0)
+#define write_lock_bh(lock)                    do { local_bh_disable();         write_lock(lock); } while (0)
+
+#define spin_unlock_irqrestore(lock, flags)    do { spin_unlock(lock);  local_irq_restore(flags); } while (0)
+#define spin_unlock_irq(lock)                  do { spin_unlock(lock);  local_irq_enable();       } while (0)
+#define spin_unlock_bh(lock)                   do { spin_unlock(lock);  local_bh_enable();        } while (0)
+
+#define read_unlock_irqrestore(lock, flags)    do { read_unlock(lock);  local_irq_restore(flags); } while (0)
+#define read_unlock_irq(lock)                  do { read_unlock(lock);  local_irq_enable();       } while (0)
+#define read_unlock_bh(lock)                   do { read_unlock(lock);  local_bh_enable();        } while (0)
+
+#define write_unlock_irqrestore(lock, flags)   do { write_unlock(lock); local_irq_restore(flags); } while (0)
+#define write_unlock_irq(lock)                 do { write_unlock(lock); local_irq_enable();       } while (0)
+#define write_unlock_bh(lock)                  do { write_unlock(lock); local_bh_enable();        } while (0)
+#define spin_trylock_bh(lock)                  ({ int __r; local_bh_disable();\
+                                               __r = spin_trylock(lock);      \
+                                               if (!__r) local_bh_enable();   \
+                                               __r; })
+
+#ifdef CONFIG_SMP
+#include <asm/spinlock.h>
+
+#elif !defined(spin_lock_init) /* !SMP and spin_lock_init not previously
+                                  defined (e.g. by including asm/spinlock.h */
+
+#define DEBUG_SPINLOCKS        0       /* 0 == no debugging, 1 == maintain lock state, 2 == full debug */
+
+#if (DEBUG_SPINLOCKS < 1)
+
+#define atomic_dec_and_lock(atomic,lock) atomic_dec_and_test(atomic)
+#define ATOMIC_DEC_AND_LOCK
+
+/*
+ * Your basic spinlocks, allowing only a single CPU anywhere
+ *
+ * Most gcc versions have a nasty bug with empty initializers.
+ */
+#if (__GNUC__ > 2)
+  typedef struct { } spinlock_t;
+  #define SPIN_LOCK_UNLOCKED (spinlock_t) { }
+#else
+  typedef struct { int gcc_is_buggy; } spinlock_t;
+  #define SPIN_LOCK_UNLOCKED (spinlock_t) { 0 }
+#endif
+
+#define spin_lock_init(lock)   do { } while(0)
+#define spin_lock(lock)                (void)(lock) /* Not "unused variable". */
+#define spin_is_locked(lock)   (0)
+#define spin_trylock(lock)     ({1; })
+#define spin_unlock_wait(lock) do { } while(0)
+#define spin_unlock(lock)      do { } while(0)
+
+#elif (DEBUG_SPINLOCKS < 2)
+
+typedef struct {
+       volatile unsigned long lock;
+} spinlock_t;
+#define SPIN_LOCK_UNLOCKED (spinlock_t) { 0 }
+
+#define spin_lock_init(x)      do { (x)->lock = 0; } while (0)
+#define spin_is_locked(lock)   (test_bit(0,(lock)))
+#define spin_trylock(lock)     (!test_and_set_bit(0,(lock)))
+
+#define spin_lock(x)           do { (x)->lock = 1; } while (0)
+#define spin_unlock_wait(x)    do { } while (0)
+#define spin_unlock(x)         do { (x)->lock = 0; } while (0)
+
+#else /* (DEBUG_SPINLOCKS >= 2) */
+
+typedef struct {
+       volatile unsigned long lock;
+       volatile unsigned int babble;
+       const char *module;
+} spinlock_t;
+#define SPIN_LOCK_UNLOCKED (spinlock_t) { 0, 25, __BASE_FILE__ }
+
+/*#include <linux/kernel.h>*/
+
+#define spin_lock_init(x)      do { (x)->lock = 0; } while (0)
+#define spin_is_locked(lock)   (test_bit(0,(lock)))
+#define spin_trylock(lock)     (!test_and_set_bit(0,(lock)))
+
+#define spin_lock(x)           do {unsigned long __spinflags; save_flags(__spinflags); cli(); if ((x)->lock&&(x)->babble) {printk("%s:%d: spin_lock(%s:%p) already locked\n", __BASE_FILE__,__LINE__, (x)->module, (x));(x)->babble--;} (x)->lock = 1; restore_flags(__spinflags);} while (0)
+#define spin_unlock_wait(x)    do {unsigned long __spinflags; save_flags(__spinflags); cli(); if ((x)->lock&&(x)->babble) {printk("%s:%d: spin_unlock_wait(%s:%p) deadlock\n", __BASE_FILE__,__LINE__, (x)->module, (x));(x)->babble--;} restore_flags(__spinflags);} while (0)
+#define spin_unlock(x)         do {unsigned long __spinflags; save_flags(__spinflags); cli(); if (!(x)->lock&&(x)->babble) {printk("%s:%d: spin_unlock(%s:%p) not locked\n", __BASE_FILE__,__LINE__, (x)->module, (x));(x)->babble--;} (x)->lock = 0; restore_flags(__spinflags);} while (0)
+
+#endif /* DEBUG_SPINLOCKS */
+
+/*
+ * Read-write spinlocks, allowing multiple readers
+ * but only one writer.
+ *
+ * NOTE! it is quite common to have readers in interrupts
+ * but no interrupt writers. For those circumstances we
+ * can "mix" irq-safe locks - any writer needs to get a
+ * irq-safe write-lock, but readers can get non-irqsafe
+ * read-locks.
+ *
+ * Most gcc versions have a nasty bug with empty initializers.
+ */
+#if (__GNUC__ > 2)
+  typedef struct { } rwlock_t;
+  #define RW_LOCK_UNLOCKED (rwlock_t) { }
+#else
+  typedef struct { int gcc_is_buggy; } rwlock_t;
+  #define RW_LOCK_UNLOCKED (rwlock_t) { 0 }
+#endif
+
+#define rwlock_init(lock)      do { } while(0)
+#define read_lock(lock)                (void)(lock) /* Not "unused variable". */
+#define read_unlock(lock)      do { } while(0)
+#define write_lock(lock)       (void)(lock) /* Not "unused variable". */
+#define write_unlock(lock)     do { } while(0)
+
+#endif /* !SMP */
+
+/* "lock on reference count zero" */
+#ifndef ATOMIC_DEC_AND_LOCK
+#include <asm/atomic.h>
+extern int atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock);
+#endif
+
+#endif /* __LINUX_SPINLOCK_H */
diff --git a/xen-2.4.16/include/xeno/time.h b/xen-2.4.16/include/xeno/time.h
new file mode 100644 (file)
index 0000000..33837c5
--- /dev/null
@@ -0,0 +1,30 @@
+/******************************************************************************
+ * time.h
+ */
+
+#ifndef __XENO_TIME_H__
+#define __XENO_TIME_H__
+
+#include <xeno/types.h>
+#include <asm/ptrace.h>
+
+struct timeval {
+    long            tv_sec;         /* seconds */
+    long            tv_usec;        /* microseconds */
+};
+  
+struct timezone {
+    int     tz_minuteswest; /* minutes west of Greenwich */
+    int     tz_dsttime;     /* type of dst correction */
+};
+
+#ifdef __KERNEL__
+extern void do_gettimeofday(struct timeval *tv);
+extern void do_settimeofday(struct timeval *tv);
+extern void get_fast_time(struct timeval *tv);
+extern void (*do_get_fast_time)(struct timeval *);
+#endif
+
+extern void do_timer(struct pt_regs *regs);
+
+#endif /* __XENO_TIME_H__ */
diff --git a/xen-2.4.16/include/xeno/timer.h b/xen-2.4.16/include/xeno/timer.h
new file mode 100644 (file)
index 0000000..c4f01ad
--- /dev/null
@@ -0,0 +1,74 @@
+#ifndef _LINUX_TIMER_H
+#define _LINUX_TIMER_H
+
+#include <linux/config.h>
+#include <linux/list.h>
+
+/*
+ * In Linux 2.4, static timers have been removed from the kernel.
+ * Timers may be dynamically created and destroyed, and should be initialized
+ * by a call to init_timer() upon creation.
+ *
+ * The "data" field enables use of a common timeout function for several
+ * timeouts. You can use this field to distinguish between the different
+ * invocations.
+ */
+struct timer_list {
+       struct list_head list;
+       unsigned long expires;
+       unsigned long data;
+       void (*function)(unsigned long);
+};
+
+extern void add_timer(struct timer_list * timer);
+extern int del_timer(struct timer_list * timer);
+
+#ifdef CONFIG_SMP
+extern int del_timer_sync(struct timer_list * timer);
+extern void sync_timers(void);
+#else
+#define del_timer_sync(t)      del_timer(t)
+#define sync_timers()          do { } while (0)
+#endif
+
+/*
+ * mod_timer is a more efficient way to update the expire field of an
+ * active timer (if the timer is inactive it will be activated)
+ * mod_timer(a,b) is equivalent to del_timer(a); a->expires = b; add_timer(a).
+ * If the timer is known to be not pending (ie, in the handler), mod_timer
+ * is less efficient than a->expires = b; add_timer(a).
+ */
+int mod_timer(struct timer_list *timer, unsigned long expires);
+
+extern void it_real_fn(unsigned long);
+
+static inline void init_timer(struct timer_list * timer)
+{
+       timer->list.next = timer->list.prev = NULL;
+}
+
+static inline int timer_pending (const struct timer_list * timer)
+{
+       return timer->list.next != NULL;
+}
+
+/*
+ *     These inlines deal with timer wrapping correctly. You are 
+ *     strongly encouraged to use them
+ *     1. Because people otherwise forget
+ *     2. Because if the timer wrap changes in future you wont have to
+ *        alter your driver code.
+ *
+ * time_after(a,b) returns true if the time a is after time b.
+ *
+ * Do this with "<0" and ">=0" to only test the sign of the result. A
+ * good compiler would generate better code (and a really good compiler
+ * wouldn't care). Gcc is currently neither.
+ */
+#define time_after(a,b)                ((long)(b) - (long)(a) < 0)
+#define time_before(a,b)       time_after(b,a)
+
+#define time_after_eq(a,b)     ((long)(a) - (long)(b) >= 0)
+#define time_before_eq(a,b)    time_after_eq(b,a)
+
+#endif
diff --git a/xen-2.4.16/include/xeno/timex.h b/xen-2.4.16/include/xeno/timex.h
new file mode 100644 (file)
index 0000000..3a00a26
--- /dev/null
@@ -0,0 +1,291 @@
+/*****************************************************************************
+ *                                                                           *
+ * Copyright (c) David L. Mills 1993                                         *
+ *                                                                           *
+ * Permission to use, copy, modify, and distribute this software and its     *
+ * documentation for any purpose and without fee is hereby granted, provided *
+ * that the above copyright notice appears in all copies and that both the   *
+ * copyright notice and this permission notice appear in supporting          *
+ * documentation, and that the name University of Delaware not be used in    *
+ * advertising or publicity pertaining to distribution of the software       *
+ * without specific, written prior permission.  The University of Delaware   *
+ * makes no representations about the suitability this software for any      *
+ * purpose.  It is provided "as is" without express or implied warranty.     *
+ *                                                                           *
+ *****************************************************************************/
+
+/*
+ * Modification history timex.h
+ *
+ * 29 Dec 97   Russell King
+ *     Moved CLOCK_TICK_RATE, CLOCK_TICK_FACTOR and FINETUNE to asm/timex.h
+ *     for ARM machines
+ *
+ *  9 Jan 97    Adrian Sun
+ *      Shifted LATCH define to allow access to alpha machines.
+ *
+ * 26 Sep 94   David L. Mills
+ *     Added defines for hybrid phase/frequency-lock loop.
+ *
+ * 19 Mar 94   David L. Mills
+ *     Moved defines from kernel routines to header file and added new
+ *     defines for PPS phase-lock loop.
+ *
+ * 20 Feb 94   David L. Mills
+ *     Revised status codes and structures for external clock and PPS
+ *     signal discipline.
+ *
+ * 28 Nov 93   David L. Mills
+ *     Adjusted parameters to improve stability and increase poll
+ *     interval.
+ *
+ * 17 Sep 93    David L. Mills
+ *      Created file $NTP/include/sys/timex.h
+ * 07 Oct 93    Torsten Duwe
+ *      Derived linux/timex.h
+ * 1995-08-13    Torsten Duwe
+ *      kernel PLL updated to 1994-12-13 specs (rfc-1589)
+ * 1997-08-30    Ulrich Windl
+ *      Added new constant NTP_PHASE_LIMIT
+ */
+#ifndef _LINUX_TIMEX_H
+#define _LINUX_TIMEX_H
+
+#include <asm/param.h>
+
+/*
+ * The following defines establish the engineering parameters of the PLL
+ * model. The HZ variable establishes the timer interrupt frequency, 100 Hz
+ * for the SunOS kernel, 256 Hz for the Ultrix kernel and 1024 Hz for the
+ * OSF/1 kernel. The SHIFT_HZ define expresses the same value as the
+ * nearest power of two in order to avoid hardware multiply operations.
+ */
+#if HZ >= 12 && HZ < 24
+# define SHIFT_HZ      4
+#elif HZ >= 24 && HZ < 48
+# define SHIFT_HZ      5
+#elif HZ >= 48 && HZ < 96
+# define SHIFT_HZ      6
+#elif HZ >= 96 && HZ < 192
+# define SHIFT_HZ      7
+#elif HZ >= 192 && HZ < 384
+# define SHIFT_HZ      8
+#elif HZ >= 384 && HZ < 768
+# define SHIFT_HZ      9
+#elif HZ >= 768 && HZ < 1536
+# define SHIFT_HZ      10
+#else
+# error You lose.
+#endif
+
+/*
+ * SHIFT_KG and SHIFT_KF establish the damping of the PLL and are chosen
+ * for a slightly underdamped convergence characteristic. SHIFT_KH
+ * establishes the damping of the FLL and is chosen by wisdom and black
+ * art.
+ *
+ * MAXTC establishes the maximum time constant of the PLL. With the
+ * SHIFT_KG and SHIFT_KF values given and a time constant range from
+ * zero to MAXTC, the PLL will converge in 15 minutes to 16 hours,
+ * respectively.
+ */
+#define SHIFT_KG 6             /* phase factor (shift) */
+#define SHIFT_KF 16            /* PLL frequency factor (shift) */
+#define SHIFT_KH 2             /* FLL frequency factor (shift) */
+#define MAXTC 6                        /* maximum time constant (shift) */
+
+/*
+ * The SHIFT_SCALE define establishes the decimal point of the time_phase
+ * variable which serves as an extension to the low-order bits of the
+ * system clock variable. The SHIFT_UPDATE define establishes the decimal
+ * point of the time_offset variable which represents the current offset
+ * with respect to standard time. The FINEUSEC define represents 1 usec in
+ * scaled units.
+ *
+ * SHIFT_USEC defines the scaling (shift) of the time_freq and
+ * time_tolerance variables, which represent the current frequency
+ * offset and maximum frequency tolerance.
+ *
+ * FINEUSEC is 1 us in SHIFT_UPDATE units of the time_phase variable.
+ */
+#define SHIFT_SCALE 22         /* phase scale (shift) */
+#define SHIFT_UPDATE (SHIFT_KG + MAXTC) /* time offset scale (shift) */
+#define SHIFT_USEC 16          /* frequency offset scale (shift) */
+#define FINEUSEC (1L << SHIFT_SCALE) /* 1 us in phase units */
+
+#define MAXPHASE 512000L        /* max phase error (us) */
+#define MAXFREQ (512L << SHIFT_USEC)  /* max frequency error (ppm) */
+#define MAXTIME (200L << PPS_AVG) /* max PPS error (jitter) (200 us) */
+#define MINSEC 16L              /* min interval between updates (s) */
+#define MAXSEC 1200L            /* max interval between updates (s) */
+#define        NTP_PHASE_LIMIT (MAXPHASE << 5) /* beyond max. dispersion */
+
+/*
+ * The following defines are used only if a pulse-per-second (PPS)
+ * signal is available and connected via a modem control lead, such as
+ * produced by the optional ppsclock feature incorporated in the Sun
+ * asynch driver. They establish the design parameters of the frequency-
+ * lock loop used to discipline the CPU clock oscillator to the PPS
+ * signal.
+ *
+ * PPS_AVG is the averaging factor for the frequency loop, as well as
+ * the time and frequency dispersion.
+ *
+ * PPS_SHIFT and PPS_SHIFTMAX specify the minimum and maximum
+ * calibration intervals, respectively, in seconds as a power of two.
+ *
+ * PPS_VALID is the maximum interval before the PPS signal is considered
+ * invalid and protocol updates used directly instead.
+ *
+ * MAXGLITCH is the maximum interval before a time offset of more than
+ * MAXTIME is believed.
+ */
+#define PPS_AVG 2              /* pps averaging constant (shift) */
+#define PPS_SHIFT 2            /* min interval duration (s) (shift) */
+#define PPS_SHIFTMAX 8         /* max interval duration (s) (shift) */
+#define PPS_VALID 120          /* pps signal watchdog max (s) */
+#define MAXGLITCH 30           /* pps signal glitch max (s) */
+
+/*
+ * Pick up the architecture specific timex specifications
+ */
+#include <asm/timex.h>
+
+/* LATCH is used in the interval timer and ftape setup. */
+#define LATCH  ((CLOCK_TICK_RATE + HZ/2) / HZ) /* For divider */
+
+/*
+ * syscall interface - used (mainly by NTP daemon)
+ * to discipline kernel clock oscillator
+ */
+struct timex {
+       unsigned int modes;     /* mode selector */
+       long offset;            /* time offset (usec) */
+       long freq;              /* frequency offset (scaled ppm) */
+       long maxerror;          /* maximum error (usec) */
+       long esterror;          /* estimated error (usec) */
+       int status;             /* clock command/status */
+       long constant;          /* pll time constant */
+       long precision;         /* clock precision (usec) (read only) */
+       long tolerance;         /* clock frequency tolerance (ppm)
+                                * (read only)
+                                */
+       struct timeval time;    /* (read only) */
+       long tick;              /* (modified) usecs between clock ticks */
+
+       long ppsfreq;           /* pps frequency (scaled ppm) (ro) */
+       long jitter;            /* pps jitter (us) (ro) */
+       int shift;              /* interval duration (s) (shift) (ro) */
+       long stabil;            /* pps stability (scaled ppm) (ro) */
+       long jitcnt;            /* jitter limit exceeded (ro) */
+       long calcnt;            /* calibration intervals (ro) */
+       long errcnt;            /* calibration errors (ro) */
+       long stbcnt;            /* stability limit exceeded (ro) */
+
+       int  :32; int  :32; int  :32; int  :32;
+       int  :32; int  :32; int  :32; int  :32;
+       int  :32; int  :32; int  :32; int  :32;
+};
+
+/*
+ * Mode codes (timex.mode)
+ */
+#define ADJ_OFFSET             0x0001  /* time offset */
+#define ADJ_FREQUENCY          0x0002  /* frequency offset */
+#define ADJ_MAXERROR           0x0004  /* maximum time error */
+#define ADJ_ESTERROR           0x0008  /* estimated time error */
+#define ADJ_STATUS             0x0010  /* clock status */
+#define ADJ_TIMECONST          0x0020  /* pll time constant */
+#define ADJ_TICK               0x4000  /* tick value */
+#define ADJ_OFFSET_SINGLESHOT  0x8001  /* old-fashioned adjtime */
+
+/* xntp 3.4 compatibility names */
+#define MOD_OFFSET     ADJ_OFFSET
+#define MOD_FREQUENCY  ADJ_FREQUENCY
+#define MOD_MAXERROR   ADJ_MAXERROR
+#define MOD_ESTERROR   ADJ_ESTERROR
+#define MOD_STATUS     ADJ_STATUS
+#define MOD_TIMECONST  ADJ_TIMECONST
+#define MOD_CLKB       ADJ_TICK
+#define MOD_CLKA       ADJ_OFFSET_SINGLESHOT /* 0x8000 in original */
+
+
+/*
+ * Status codes (timex.status)
+ */
+#define STA_PLL                0x0001  /* enable PLL updates (rw) */
+#define STA_PPSFREQ    0x0002  /* enable PPS freq discipline (rw) */
+#define STA_PPSTIME    0x0004  /* enable PPS time discipline (rw) */
+#define STA_FLL                0x0008  /* select frequency-lock mode (rw) */
+
+#define STA_INS                0x0010  /* insert leap (rw) */
+#define STA_DEL                0x0020  /* delete leap (rw) */
+#define STA_UNSYNC     0x0040  /* clock unsynchronized (rw) */
+#define STA_FREQHOLD   0x0080  /* hold frequency (rw) */
+
+#define STA_PPSSIGNAL  0x0100  /* PPS signal present (ro) */
+#define STA_PPSJITTER  0x0200  /* PPS signal jitter exceeded (ro) */
+#define STA_PPSWANDER  0x0400  /* PPS signal wander exceeded (ro) */
+#define STA_PPSERROR   0x0800  /* PPS signal calibration error (ro) */
+
+#define STA_CLOCKERR   0x1000  /* clock hardware fault (ro) */
+
+#define STA_RONLY (STA_PPSSIGNAL | STA_PPSJITTER | STA_PPSWANDER | \
+    STA_PPSERROR | STA_CLOCKERR) /* read-only bits */
+
+/*
+ * Clock states (time_state)
+ */
+#define TIME_OK                0       /* clock synchronized, no leap second */
+#define TIME_INS       1       /* insert leap second */
+#define TIME_DEL       2       /* delete leap second */
+#define TIME_OOP       3       /* leap second in progress */
+#define TIME_WAIT      4       /* leap second has occurred */
+#define TIME_ERROR     5       /* clock not synchronized */
+#define TIME_BAD       TIME_ERROR /* bw compat */
+
+#ifdef __KERNEL__
+/*
+ * kernel variables
+ * Note: maximum error = NTP synch distance = dispersion + delay / 2;
+ * estimated error = NTP dispersion.
+ */
+extern long tick;                      /* timer interrupt period */
+extern int tickadj;                    /* amount of adjustment per tick */
+
+/*
+ * phase-lock loop variables
+ */
+extern int time_state;         /* clock status */
+extern int time_status;                /* clock synchronization status bits */
+extern long time_offset;       /* time adjustment (us) */
+extern long time_constant;     /* pll time constant */
+extern long time_tolerance;    /* frequency tolerance (ppm) */
+extern long time_precision;    /* clock precision (us) */
+extern long time_maxerror;     /* maximum error */
+extern long time_esterror;     /* estimated error */
+
+extern long time_phase;                /* phase offset (scaled us) */
+extern long time_freq;         /* frequency offset (scaled ppm) */
+extern long time_adj;          /* tick adjust (scaled 1 / HZ) */
+extern long time_reftime;      /* time at last adjustment (s) */
+
+extern long time_adjust;       /* The amount of adjtime left */
+
+/* interface variables pps->timer interrupt */
+extern long pps_offset;                /* pps time offset (us) */
+extern long pps_jitter;                /* time dispersion (jitter) (us) */
+extern long pps_freq;          /* frequency offset (scaled ppm) */
+extern long pps_stabil;                /* frequency dispersion (scaled ppm) */
+extern long pps_valid;         /* pps signal watchdog counter */
+
+/* interface variables pps->adjtimex */
+extern int pps_shift;          /* interval duration (s) (shift) */
+extern long pps_jitcnt;                /* jitter limit exceeded */
+extern long pps_calcnt;                /* calibration intervals */
+extern long pps_errcnt;                /* calibration errors */
+extern long pps_stbcnt;                /* stability limit exceeded */
+
+#endif /* KERNEL */
+
+#endif /* LINUX_TIMEX_H */
diff --git a/xen-2.4.16/include/xeno/tqueue.h b/xen-2.4.16/include/xeno/tqueue.h
new file mode 100644 (file)
index 0000000..4a730f0
--- /dev/null
@@ -0,0 +1,125 @@
+/*
+ * tqueue.h --- task queue handling for Linux.
+ *
+ * Mostly based on a proposed bottom-half replacement code written by
+ * Kai Petzke, wpp@marie.physik.tu-berlin.de.
+ *
+ * Modified for use in the Linux kernel by Theodore Ts'o,
+ * tytso@mit.edu.  Any bugs are my fault, not Kai's.
+ *
+ * The original comment follows below.
+ */
+
+#ifndef _LINUX_TQUEUE_H
+#define _LINUX_TQUEUE_H
+
+#include <linux/spinlock.h>
+#include <linux/list.h>
+#include <asm/bitops.h>
+#include <asm/system.h>
+
+/*
+ * New proposed "bottom half" handlers:
+ * (C) 1994 Kai Petzke, wpp@marie.physik.tu-berlin.de
+ *
+ * Advantages:
+ * - Bottom halfs are implemented as a linked list.  You can have as many
+ *   of them, as you want.
+ * - No more scanning of a bit field is required upon call of a bottom half.
+ * - Support for chained bottom half lists.  The run_task_queue() function can be
+ *   used as a bottom half handler.  This is for example useful for bottom
+ *   halfs, which want to be delayed until the next clock tick.
+ *
+ * Notes:
+ * - Bottom halfs are called in the reverse order that they were linked into
+ *   the list.
+ */
+
+struct tq_struct {
+       struct list_head list;          /* linked list of active bh's */
+       unsigned long sync;             /* must be initialized to zero */
+       void (*routine)(void *);        /* function to call */
+       void *data;                     /* argument to function */
+};
+
+/*
+ * Emit code to initialise a tq_struct's routine and data pointers
+ */
+#define PREPARE_TQUEUE(_tq, _routine, _data)                   \
+       do {                                                    \
+               (_tq)->routine = _routine;                      \
+               (_tq)->data = _data;                            \
+       } while (0)
+
+/*
+ * Emit code to initialise all of a tq_struct
+ */
+#define INIT_TQUEUE(_tq, _routine, _data)                      \
+       do {                                                    \
+               INIT_LIST_HEAD(&(_tq)->list);                   \
+               (_tq)->sync = 0;                                \
+               PREPARE_TQUEUE((_tq), (_routine), (_data));     \
+       } while (0)
+
+typedef struct list_head task_queue;
+
+#define DECLARE_TASK_QUEUE(q)  LIST_HEAD(q)
+#define TQ_ACTIVE(q)           (!list_empty(&q))
+
+extern task_queue tq_timer, tq_immediate, tq_disk;
+
+/*
+ * To implement your own list of active bottom halfs, use the following
+ * two definitions:
+ *
+ * DECLARE_TASK_QUEUE(my_tqueue);
+ * struct tq_struct my_task = {
+ *     routine: (void (*)(void *)) my_routine,
+ *     data: &my_data
+ * };
+ *
+ * To activate a bottom half on a list, use:
+ *
+ *     queue_task(&my_task, &my_tqueue);
+ *
+ * To later run the queued tasks use
+ *
+ *     run_task_queue(&my_tqueue);
+ *
+ * This allows you to do deferred processing.  For example, you could
+ * have a task queue called tq_timer, which is executed within the timer
+ * interrupt.
+ */
+
+extern spinlock_t tqueue_lock;
+
+/*
+ * Queue a task on a tq.  Return non-zero if it was successfully
+ * added.
+ */
+static inline int queue_task(struct tq_struct *bh_pointer, task_queue *bh_list)
+{
+       int ret = 0;
+       if (!test_and_set_bit(0,&bh_pointer->sync)) {
+               unsigned long flags;
+               spin_lock_irqsave(&tqueue_lock, flags);
+               list_add_tail(&bh_pointer->list, bh_list);
+               spin_unlock_irqrestore(&tqueue_lock, flags);
+               ret = 1;
+       }
+       return ret;
+}
+
+/*
+ * Call all "bottom halfs" on a given list.
+ */
+
+extern void __run_task_queue(task_queue *list);
+
+static inline void run_task_queue(task_queue *list)
+{
+       if (TQ_ACTIVE(*list))
+               __run_task_queue(list);
+}
+
+#endif /* _LINUX_TQUEUE_H */
diff --git a/xen-2.4.16/include/xeno/types.h b/xen-2.4.16/include/xeno/types.h
new file mode 100644 (file)
index 0000000..6a270f3
--- /dev/null
@@ -0,0 +1,31 @@
+#ifndef __TYPES_H__
+#define __TYPES_H__
+
+#include <asm/types.h>
+
+#ifndef NULL
+#define NULL ((void*)0)
+#endif
+
+#define INT_MAX         ((int)(~0U>>1))
+#define INT_MIN         (-INT_MAX - 1)
+#define UINT_MAX        (~0U)
+#define LONG_MAX        ((long)(~0UL>>1))
+#define LONG_MIN        (-LONG_MAX - 1)
+#define ULONG_MAX       (~0UL)
+
+typedef unsigned int size_t;
+
+/* bsd */
+typedef unsigned char           u_char;
+typedef unsigned short          u_short;
+typedef unsigned int            u_int;
+typedef unsigned long           u_long;
+
+/* sysv */
+typedef unsigned char           unchar;
+typedef unsigned short          ushort;
+typedef unsigned int            uint;
+typedef unsigned long           ulong;
+
+#endif /* __TYPES_H__ */
diff --git a/xen-2.4.16/include/xeno/vif.h b/xen-2.4.16/include/xeno/vif.h
new file mode 100644 (file)
index 0000000..3ef0f1f
--- /dev/null
@@ -0,0 +1,36 @@
+/* vif.h
+ * 
+ * this is the hypervisor end of the network code.  The net_ring structure
+ * stored in each vif is placed on a shared page to interact with the guest VM.
+ */
+
+/* virtual network interface struct and associated defines. */
+/* net_vif_st is the larger struct that describes a virtual network interface
+ * it contains a pointer to the net_ring_t structure that needs to be on a 
+ * shared page between the hypervisor and guest.  The vif struct is private 
+ * to the hypervisor and is used primarily as a container to allow routing 
+ * and interface administration.  This define should eventually be moved to 
+ * a non-shared interface file, as it is of no relevance to the guest.
+ */
+
+#include <hypervisor-ifs/network.h>
+#include <xeno/skbuff.h>
+
+typedef struct net_vif_st {
+    net_ring_t  *net_ring;
+    int          id;
+    struct sk_buff_head skb_list;
+    // rules table goes here in next revision.
+} net_vif_t;
+
+/* VIF-related defines. */
+#define MAX_GUEST_VIFS    2 // each VIF is a small overhead in task_struct
+#define MAX_SYSTEM_VIFS 256 // trying to avoid dynamic allocation 
+
+/* vif globals */
+extern int sys_vif_count;
+
+/* vif prototypes */
+net_ring_t *create_net_vif(int domain);
+void destroy_net_vif(struct task_struct *p);
+
diff --git a/xen-2.4.16/net/Makefile b/xen-2.4.16/net/Makefile
new file mode 100644 (file)
index 0000000..e9a8eba
--- /dev/null
@@ -0,0 +1,8 @@
+
+include $(BASEDIR)/Rules.mk
+
+default: $(OBJS)
+       $(LD) -r -o network.o $(OBJS)
+
+clean:
+       rm -f *.o *~ core
diff --git a/xen-2.4.16/net/dev.c b/xen-2.4.16/net/dev.c
new file mode 100644 (file)
index 0000000..160aedc
--- /dev/null
@@ -0,0 +1,2007 @@
+/*
+ *     NET3    Protocol independent device support routines.
+ *
+ *             This program is free software; you can redistribute it and/or
+ *             modify it under the terms of the GNU General Public License
+ *             as published by the Free Software Foundation; either version
+ *             2 of the License, or (at your option) any later version.
+ */
+
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <asm/bitops.h>
+#include <linux/config.h>
+#include <linux/delay.h>
+#include <linux/lib.h>
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/if_ether.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/skbuff.h>
+#include <linux/brlock.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/pkt_sched.h>
+
+#include <linux/event.h>
+
+#define BUG_TRAP ASSERT
+#define notifier_call_chain(_a,_b,_c) ((void)0)
+#define rtmsg_ifinfo(_a,_b,_c) ((void)0)
+#define rtnl_lock() ((void)0)
+#define rtnl_unlock() ((void)0)
+#define dst_init() ((void)0)
+
+struct net_device *the_dev = NULL;
+
+/*
+ *     Device drivers call our routines to queue packets here. We empty the
+ *     queue in the local softnet handler.
+ */
+struct softnet_data softnet_data[NR_CPUS] __cacheline_aligned;
+
+
+/*****************************************************************************************
+
+                           Device Interface Subroutines
+
+******************************************************************************************/
+
+/**
+ *     __dev_get_by_name       - find a device by its name 
+ *     @name: name to find
+ *
+ *     Find an interface by name. Must be called under RTNL semaphore
+ *     or @dev_base_lock. If the name is found a pointer to the device
+ *     is returned. If the name is not found then %NULL is returned. The
+ *     reference counters are not incremented so the caller must be
+ *     careful with locks.
+ */
+
+struct net_device *__dev_get_by_name(const char *name)
+{
+       struct net_device *dev;
+
+       for (dev = dev_base; dev != NULL; dev = dev->next) {
+               if (strncmp(dev->name, name, IFNAMSIZ) == 0)
+                       return dev;
+       }
+       return NULL;
+}
+
+/**
+ *     dev_get_by_name         - find a device by its name
+ *     @name: name to find
+ *
+ *     Find an interface by name. This can be called from any 
+ *     context and does its own locking. The returned handle has
+ *     the usage count incremented and the caller must use dev_put() to
+ *     release it when it is no longer needed. %NULL is returned if no
+ *     matching device is found.
+ */
+
+struct net_device *dev_get_by_name(const char *name)
+{
+       struct net_device *dev;
+
+       read_lock(&dev_base_lock);
+       dev = __dev_get_by_name(name);
+       if (dev)
+               dev_hold(dev);
+       read_unlock(&dev_base_lock);
+       return dev;
+}
+
+/* 
+   Return value is changed to int to prevent illegal usage in future.
+   It is still legal to use to check for device existance.
+
+   User should understand, that the result returned by this function
+   is meaningless, if it was not issued under rtnl semaphore.
+ */
+
+/**
+ *     dev_get -       test if a device exists
+ *     @name:  name to test for
+ *
+ *     Test if a name exists. Returns true if the name is found. In order
+ *     to be sure the name is not allocated or removed during the test the
+ *     caller must hold the rtnl semaphore.
+ *
+ *     This function primarily exists for back compatibility with older
+ *     drivers. 
+ */
+int dev_get(const char *name)
+{
+       struct net_device *dev;
+
+       read_lock(&dev_base_lock);
+       dev = __dev_get_by_name(name);
+       read_unlock(&dev_base_lock);
+       return dev != NULL;
+}
+
+/**
+ *     __dev_get_by_index - find a device by its ifindex
+ *     @ifindex: index of device
+ *
+ *     Search for an interface by index. Returns %NULL if the device
+ *     is not found or a pointer to the device. The device has not
+ *     had its reference counter increased so the caller must be careful
+ *     about locking. The caller must hold either the RTNL semaphore
+ *     or @dev_base_lock.
+ */
+
+struct net_device * __dev_get_by_index(int ifindex)
+{
+       struct net_device *dev;
+
+       for (dev = dev_base; dev != NULL; dev = dev->next) {
+               if (dev->ifindex == ifindex)
+                       return dev;
+       }
+       return NULL;
+}
+
+
+/**
+ *     dev_get_by_index - find a device by its ifindex
+ *     @ifindex: index of device
+ *
+ *     Search for an interface by index. Returns NULL if the device
+ *     is not found or a pointer to the device. The device returned has 
+ *     had a reference added and the pointer is safe until the user calls
+ *     dev_put to indicate they have finished with it.
+ */
+
+struct net_device * dev_get_by_index(int ifindex)
+{
+       struct net_device *dev;
+
+       read_lock(&dev_base_lock);
+       dev = __dev_get_by_index(ifindex);
+       if (dev)
+               dev_hold(dev);
+       read_unlock(&dev_base_lock);
+       return dev;
+}
+
+/**
+ *     dev_getbyhwaddr - find a device by its hardware address
+ *     @type: media type of device
+ *     @ha: hardware address
+ *
+ *     Search for an interface by MAC address. Returns NULL if the device
+ *     is not found or a pointer to the device. The caller must hold the
+ *     rtnl semaphore. The returned device has not had its ref count increased
+ *     and the caller must therefore be careful about locking
+ *
+ *     BUGS:
+ *     If the API was consistent this would be __dev_get_by_hwaddr
+ */
+
+struct net_device *dev_getbyhwaddr(unsigned short type, char *ha)
+{
+       struct net_device *dev;
+
+       for (dev = dev_base; dev != NULL; dev = dev->next) {
+               if (dev->type == type &&
+                   memcmp(dev->dev_addr, ha, dev->addr_len) == 0)
+                       return dev;
+       }
+       return NULL;
+}
+
+/**
+ *     dev_alloc_name - allocate a name for a device
+ *     @dev: device 
+ *     @name: name format string
+ *
+ *     Passed a format string - eg "lt%d" it will try and find a suitable
+ *     id. Not efficient for many devices, not called a lot. The caller
+ *     must hold the dev_base or rtnl lock while allocating the name and
+ *     adding the device in order to avoid duplicates. Returns the number
+ *     of the unit assigned or a negative errno code.
+ */
+
+int dev_alloc_name(struct net_device *dev, const char *name)
+{
+       int i;
+       char buf[32];
+       char *p;
+
+       /*
+        * Verify the string as this thing may have come from
+        * the user.  There must be either one "%d" and no other "%"
+        * characters, or no "%" characters at all.
+        */
+       p = strchr(name, '%');
+       if (p && (p[1] != 'd' || strchr(p+2, '%')))
+               return -EINVAL;
+
+       /*
+        * If you need over 100 please also fix the algorithm...
+        */
+       for (i = 0; i < 100; i++) {
+               snprintf(buf,sizeof(buf),name,i);
+               if (__dev_get_by_name(buf) == NULL) {
+                       strcpy(dev->name, buf);
+                       return i;
+               }
+       }
+       return -ENFILE; /* Over 100 of the things .. bail out! */
+}
+
+/**
+ *     dev_alloc - allocate a network device and name
+ *     @name: name format string
+ *     @err: error return pointer
+ *
+ *     Passed a format string, eg. "lt%d", it will allocate a network device
+ *     and space for the name. %NULL is returned if no memory is available.
+ *     If the allocation succeeds then the name is assigned and the 
+ *     device pointer returned. %NULL is returned if the name allocation
+ *     failed. The cause of an error is returned as a negative errno code
+ *     in the variable @err points to.
+ *
+ *     The caller must hold the @dev_base or RTNL locks when doing this in
+ *     order to avoid duplicate name allocations.
+ */
+
+struct net_device *dev_alloc(const char *name, int *err)
+{
+       struct net_device *dev=kmalloc(sizeof(struct net_device), GFP_KERNEL);
+       if (dev == NULL) {
+               *err = -ENOBUFS;
+               return NULL;
+       }
+       memset(dev, 0, sizeof(struct net_device));
+       *err = dev_alloc_name(dev, name);
+       if (*err < 0) {
+               kfree(dev);
+               return NULL;
+       }
+       return dev;
+}
+
+/**
+ *     netdev_state_change - device changes state
+ *     @dev: device to cause notification
+ *
+ *     Called to indicate a device has changed state. This function calls
+ *     the notifier chains for netdev_chain and sends a NEWLINK message
+ *     to the routing socket.
+ */
+void netdev_state_change(struct net_device *dev)
+{
+       if (dev->flags&IFF_UP) {
+               notifier_call_chain(&netdev_chain, NETDEV_CHANGE, dev);
+               rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
+       }
+}
+
+
+#ifdef CONFIG_KMOD
+
+/**
+ *     dev_load        - load a network module
+ *     @name: name of interface
+ *
+ *     If a network interface is not present and the process has suitable
+ *     privileges this function loads the module. If module loading is not
+ *     available in this kernel then it becomes a nop.
+ */
+
+void dev_load(const char *name)
+{
+       if (!dev_get(name) && capable(CAP_SYS_MODULE))
+               request_module(name);
+}
+
+#else
+
+extern inline void dev_load(const char *unused){;}
+
+#endif
+
+static int default_rebuild_header(struct sk_buff *skb)
+{
+       printk(KERN_DEBUG "%s: default_rebuild_header called -- BUG!\n", skb->dev ? skb->dev->name : "NULL!!!");
+       kfree_skb(skb);
+       return 1;
+}
+
+/**
+ *     dev_open        - prepare an interface for use. 
+ *     @dev:   device to open
+ *
+ *     Takes a device from down to up state. The device's private open
+ *     function is invoked and then the multicast lists are loaded. Finally
+ *     the device is moved into the up state and a %NETDEV_UP message is
+ *     sent to the netdev notifier chain.
+ *
+ *     Calling this function on an active interface is a nop. On a failure
+ *     a negative errno code is returned.
+ */
+int dev_open(struct net_device *dev)
+{
+       int ret = 0;
+
+       /*
+        *      Is it already up?
+        */
+
+       if (dev->flags&IFF_UP)
+               return 0;
+
+       /*
+        *      Is it even present?
+        */
+       if (!netif_device_present(dev))
+               return -ENODEV;
+
+       /*
+        *      Call device private open method
+        */
+       if (try_inc_mod_count(dev->owner)) {
+               if (dev->open) {
+                       ret = dev->open(dev);
+                       if (ret != 0 && dev->owner)
+                               __MOD_DEC_USE_COUNT(dev->owner);
+               }
+       } else {
+               ret = -ENODEV;
+       }
+
+       /*
+        *      If it went open OK then:
+        */
+        
+       if (ret == 0) 
+       {
+               /*
+                *      Set the flags.
+                */
+               dev->flags |= IFF_UP;
+
+               set_bit(__LINK_STATE_START, &dev->state);
+
+               /*
+                *      Initialize multicasting status 
+                */
+               dev_mc_upload(dev);
+
+               /*
+                *      Wakeup transmit queue engine
+                */
+               dev_activate(dev);
+
+               /*
+                *      ... and announce new interface.
+                */
+               notifier_call_chain(&netdev_chain, NETDEV_UP, dev);
+       }
+       return(ret);
+}
+
+
+/**
+ *     dev_close - shutdown an interface.
+ *     @dev: device to shutdown
+ *
+ *     This function moves an active device into down state. A 
+ *     %NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device
+ *     is then deactivated and finally a %NETDEV_DOWN is sent to the notifier
+ *     chain.
+ */
+int dev_close(struct net_device *dev)
+{
+       if (!(dev->flags&IFF_UP))
+               return 0;
+
+       /*
+        *      Tell people we are going down, so that they can
+        *      prepare to death, when device is still operating.
+        */
+       notifier_call_chain(&netdev_chain, NETDEV_GOING_DOWN, dev);
+
+       dev_deactivate(dev);
+
+       clear_bit(__LINK_STATE_START, &dev->state);
+
+       /*
+        *      Call the device specific close. This cannot fail.
+        *      Only if device is UP
+        *
+        *      We allow it to be called even after a DETACH hot-plug
+        *      event.
+        */
+        
+       if (dev->stop)
+               dev->stop(dev);
+
+       /*
+        *      Device is now down.
+        */
+
+       dev->flags &= ~IFF_UP;
+
+       /*
+        *      Tell people we are down
+        */
+       notifier_call_chain(&netdev_chain, NETDEV_DOWN, dev);
+
+       /*
+        * Drop the module refcount
+        */
+       if (dev->owner)
+               __MOD_DEC_USE_COUNT(dev->owner);
+
+       return(0);
+}
+
+
+#ifdef CONFIG_HIGHMEM
+/* Actually, we should eliminate this check as soon as we know, that:
+ * 1. IOMMU is present and allows to map all the memory.
+ * 2. No high memory really exists on this machine.
+ */
+
+static inline int
+illegal_highdma(struct net_device *dev, struct sk_buff *skb)
+{
+       int i;
+
+       if (dev->features&NETIF_F_HIGHDMA)
+               return 0;
+
+       for (i=0; i<skb_shinfo(skb)->nr_frags; i++)
+               if (skb_shinfo(skb)->frags[i].page >= highmem_start_page)
+                       return 1;
+
+       return 0;
+}
+#else
+#define illegal_highdma(dev, skb)      (0)
+#endif
+
+/**
+ *     dev_queue_xmit - transmit a buffer
+ *     @skb: buffer to transmit
+ *     
+ *     Queue a buffer for transmission to a network device. The caller must
+ *     have set the device and priority and built the buffer before calling this 
+ *     function. The function can be called from an interrupt.
+ *
+ *     A negative errno code is returned on a failure. A success does not
+ *     guarantee the frame will be transmitted as it may be dropped due
+ *     to congestion or traffic shaping.
+ */
+
+int dev_queue_xmit(struct sk_buff *skb)
+{
+       struct net_device *dev = skb->dev;
+       struct Qdisc  *q;
+
+       if (skb_shinfo(skb)->frag_list &&
+           !(dev->features&NETIF_F_FRAGLIST) &&
+           skb_linearize(skb, GFP_ATOMIC) != 0) {
+               kfree_skb(skb);
+               return -ENOMEM;
+       }
+
+       /* Fragmented skb is linearized if device does not support SG,
+        * or if at least one of fragments is in highmem and device
+        * does not support DMA from it.
+        */
+       if (skb_shinfo(skb)->nr_frags &&
+           (!(dev->features&NETIF_F_SG) || illegal_highdma(dev, skb)) &&
+           skb_linearize(skb, GFP_ATOMIC) != 0) {
+               kfree_skb(skb);
+               return -ENOMEM;
+       }
+
+       /* Grab device queue */
+       spin_lock_bh(&dev->queue_lock);
+       q = dev->qdisc;
+       if (q->enqueue) {
+               int ret = q->enqueue(skb, q);
+
+               qdisc_run(dev);
+
+               spin_unlock_bh(&dev->queue_lock);
+               return ret == NET_XMIT_BYPASS ? NET_XMIT_SUCCESS : ret;
+       }
+
+       /* The device has no queue. Common case for software devices:
+          loopback, all the sorts of tunnels...
+
+          Really, it is unlikely that xmit_lock protection is necessary here.
+          (f.e. loopback and IP tunnels are clean ignoring statistics counters.)
+          However, it is possible, that they rely on protection
+          made by us here.
+
+          Check this and shot the lock. It is not prone from deadlocks.
+          Either shot noqueue qdisc, it is even simpler 8)
+        */
+       if (dev->flags&IFF_UP) {
+               int cpu = smp_processor_id();
+
+               if (dev->xmit_lock_owner != cpu) {
+                       spin_unlock(&dev->queue_lock);
+                       spin_lock(&dev->xmit_lock);
+                       dev->xmit_lock_owner = cpu;
+
+                       if (!netif_queue_stopped(dev)) {
+                               if (dev->hard_start_xmit(skb, dev) == 0) {
+                                       dev->xmit_lock_owner = -1;
+                                       spin_unlock_bh(&dev->xmit_lock);
+                                       return 0;
+                               }
+                       }
+                       dev->xmit_lock_owner = -1;
+                       spin_unlock_bh(&dev->xmit_lock);
+                       kfree_skb(skb);
+                       return -ENETDOWN;
+               } else {
+                       /* Recursion is detected! It is possible, unfortunately */
+               }
+       }
+       spin_unlock_bh(&dev->queue_lock);
+
+       kfree_skb(skb);
+       return -ENETDOWN;
+}
+
+
+/*=======================================================================
+                       Receiver routines
+  =======================================================================*/
+
+int netdev_max_backlog = 300;
+/* These numbers are selected based on intuition and some
+ * experimentatiom, if you have more scientific way of doing this
+ * please go ahead and fix things.
+ */
+int no_cong_thresh = 10;
+int no_cong = 20;
+int lo_cong = 100;
+int mod_cong = 290;
+
+struct netif_rx_stats netdev_rx_stat[NR_CPUS];
+
+
+#ifdef CONFIG_NET_HW_FLOWCONTROL
+atomic_t netdev_dropping = ATOMIC_INIT(0);
+static unsigned long netdev_fc_mask = 1;
+unsigned long netdev_fc_xoff = 0;
+spinlock_t netdev_fc_lock = SPIN_LOCK_UNLOCKED;
+
+static struct
+{
+       void (*stimul)(struct net_device *);
+       struct net_device *dev;
+} netdev_fc_slots[BITS_PER_LONG];
+
+int netdev_register_fc(struct net_device *dev, void (*stimul)(struct net_device *dev))
+{
+       int bit = 0;
+       unsigned long flags;
+
+       spin_lock_irqsave(&netdev_fc_lock, flags);
+       if (netdev_fc_mask != ~0UL) {
+               bit = ffz(netdev_fc_mask);
+               netdev_fc_slots[bit].stimul = stimul;
+               netdev_fc_slots[bit].dev = dev;
+               set_bit(bit, &netdev_fc_mask);
+               clear_bit(bit, &netdev_fc_xoff);
+       }
+       spin_unlock_irqrestore(&netdev_fc_lock, flags);
+       return bit;
+}
+
+void netdev_unregister_fc(int bit)
+{
+       unsigned long flags;
+
+       spin_lock_irqsave(&netdev_fc_lock, flags);
+       if (bit > 0) {
+               netdev_fc_slots[bit].stimul = NULL;
+               netdev_fc_slots[bit].dev = NULL;
+               clear_bit(bit, &netdev_fc_mask);
+               clear_bit(bit, &netdev_fc_xoff);
+       }
+       spin_unlock_irqrestore(&netdev_fc_lock, flags);
+}
+
+static void netdev_wakeup(void)
+{
+       unsigned long xoff;
+
+       spin_lock(&netdev_fc_lock);
+       xoff = netdev_fc_xoff;
+       netdev_fc_xoff = 0;
+       while (xoff) {
+               int i = ffz(~xoff);
+               xoff &= ~(1<<i);
+               netdev_fc_slots[i].stimul(netdev_fc_slots[i].dev);
+       }
+       spin_unlock(&netdev_fc_lock);
+}
+#endif
+
+static void get_sample_stats(int cpu)
+{
+       int blog = softnet_data[cpu].input_pkt_queue.qlen;
+       int avg_blog = softnet_data[cpu].avg_blog;
+
+       avg_blog = (avg_blog >> 1)+ (blog >> 1);
+
+       if (avg_blog > mod_cong) {
+               /* Above moderate congestion levels. */
+               softnet_data[cpu].cng_level = NET_RX_CN_HIGH;
+       } else if (avg_blog > lo_cong) {
+               softnet_data[cpu].cng_level = NET_RX_CN_MOD;
+       } else if (avg_blog > no_cong) 
+               softnet_data[cpu].cng_level = NET_RX_CN_LOW;
+       else  /* no congestion */
+               softnet_data[cpu].cng_level = NET_RX_SUCCESS;
+
+       softnet_data[cpu].avg_blog = avg_blog;
+}
+
+
+/**
+ *     netif_rx        -       post buffer to the network code
+ *     @skb: buffer to post
+ *
+ *     This function receives a packet from a device driver and queues it for
+ *     the upper (protocol) levels to process.  It always succeeds. The buffer
+ *     may be dropped during processing for congestion control or by the 
+ *     protocol layers.
+ *      
+ *     return values:
+ *     NET_RX_SUCCESS  (no congestion)           
+ *     NET_RX_CN_LOW     (low congestion) 
+ *     NET_RX_CN_MOD     (moderate congestion)
+ *     NET_RX_CN_HIGH    (high congestion) 
+ *     NET_RX_DROP    (packet was dropped)
+ *      
+ *      
+ */
+
+int netif_rx(struct sk_buff *skb)
+{
+#ifdef CONFIG_SMP
+        unsigned long cpu_mask;
+#endif
+        struct task_struct *p;
+        unsigned int dest_dom;
+       int this_cpu = smp_processor_id();
+       struct softnet_data *queue;
+       unsigned long flags;
+
+       if (skb->stamp.tv_sec == 0)
+               get_fast_time(&skb->stamp);
+
+       /* The code is rearranged so that the path is the most
+          short when CPU is congested, but is still operating.
+        */
+       queue = &softnet_data[this_cpu];
+
+       local_irq_save(flags);
+        
+       netdev_rx_stat[this_cpu].total++;
+
+        skb->h.raw = skb->nh.raw = skb->data;
+        
+        if ( skb->len < 2 ) goto drop;
+        switch ( ntohs(skb->mac.ethernet->h_proto) )
+        {
+        case ETH_P_ARP:
+            if ( skb->len < 28 ) goto drop;
+            dest_dom = ntohl(*(unsigned long *)
+                             (skb->nh.raw + 24));
+            break;
+        case ETH_P_IP:
+            if ( skb->len < 20 ) goto drop;
+            dest_dom = ntohl(*(unsigned long *)
+                             (skb->nh.raw + 16));
+            break;
+        default:
+            goto drop;
+        }
+        
+        if ( (dest_dom < opt_ipbase) ||
+             (dest_dom > (opt_ipbase + 16)) )
+            goto drop;
+        
+        dest_dom -= opt_ipbase;
+        
+        read_lock(&tasklist_lock);
+        p = &idle0_task;
+        do {
+            if ( p->domain != dest_dom ) continue;
+            skb_queue_tail(&p->net_vif_list[0]->skb_list, skb); // vfr will fix.
+            cpu_mask = mark_hyp_event(p, _HYP_EVENT_NET_RX);
+            read_unlock(&tasklist_lock);
+            goto found;
+        }
+        while ( (p = p->next_task) != &idle0_task );
+        read_unlock(&tasklist_lock);
+        goto drop;
+
+ found:
+#if 0
+        __skb_queue_tail(&queue->input_pkt_queue,skb);
+        /* Runs from irqs or BH's, no need to wake BH */
+        cpu_raise_softirq(this_cpu, NET_RX_SOFTIRQ);
+        local_irq_restore(flags);
+        get_sample_stats(this_cpu);
+        return softnet_data[this_cpu].cng_level;
+#else
+        hyp_event_notify(cpu_mask);
+        local_irq_restore(flags);
+        return 0;
+#endif
+
+drop:
+       netdev_rx_stat[this_cpu].dropped++;
+       local_irq_restore(flags);
+
+       kfree_skb(skb);
+       return NET_RX_DROP;
+}
+
+/* Deliver skb to an old protocol, which is not threaded well
+   or which do not understand shared skbs.
+ */
+static int deliver_to_old_ones(struct packet_type *pt, struct sk_buff *skb, int last)
+{
+       static spinlock_t net_bh_lock = SPIN_LOCK_UNLOCKED;
+       int ret = NET_RX_DROP;
+
+
+       if (!last) {
+               skb = skb_clone(skb, GFP_ATOMIC);
+               if (skb == NULL)
+                       return ret;
+       }
+       if (skb_is_nonlinear(skb) && skb_linearize(skb, GFP_ATOMIC) != 0) {
+               kfree_skb(skb);
+               return ret;
+       }
+
+       /* The assumption (correct one) is that old protocols
+          did not depened on BHs different of NET_BH and TIMER_BH.
+        */
+
+       /* Emulate NET_BH with special spinlock */
+       spin_lock(&net_bh_lock);
+
+       /* Disable timers and wait for all timers completion */
+       tasklet_disable(bh_task_vec+TIMER_BH);
+
+       ret = pt->func(skb, skb->dev, pt);
+
+       tasklet_hi_enable(bh_task_vec+TIMER_BH);
+       spin_unlock(&net_bh_lock);
+       return ret;
+}
+
+static void net_tx_action(struct softirq_action *h)
+{
+       int cpu = smp_processor_id();
+
+       if (softnet_data[cpu].completion_queue) {
+               struct sk_buff *clist;
+
+               local_irq_disable();
+               clist = softnet_data[cpu].completion_queue;
+               softnet_data[cpu].completion_queue = NULL;
+               local_irq_enable();
+
+               while (clist != NULL) {
+                       struct sk_buff *skb = clist;
+                       clist = clist->next;
+
+                       BUG_TRAP(atomic_read(&skb->users) == 0);
+                       __kfree_skb(skb);
+               }
+       }
+
+       if (softnet_data[cpu].output_queue) {
+               struct net_device *head;
+
+               local_irq_disable();
+               head = softnet_data[cpu].output_queue;
+               softnet_data[cpu].output_queue = NULL;
+               local_irq_enable();
+
+               while (head != NULL) {
+                       struct net_device *dev = head;
+                       head = head->next_sched;
+
+                       smp_mb__before_clear_bit();
+                       clear_bit(__LINK_STATE_SCHED, &dev->state);
+
+                       if (spin_trylock(&dev->queue_lock)) {
+                               qdisc_run(dev);
+                               spin_unlock(&dev->queue_lock);
+                       } else {
+                               netif_schedule(dev);
+                       }
+               }
+       }
+}
+
+
+#if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
+void (*br_handle_frame_hook)(struct sk_buff *skb) = NULL;
+#endif
+
+static __inline__ int handle_bridge(struct sk_buff *skb,
+                                    struct packet_type *pt_prev)
+{
+       int ret = NET_RX_DROP;
+
+       if (pt_prev) {
+               if (!pt_prev->data)
+                       ret = deliver_to_old_ones(pt_prev, skb, 0);
+               else {
+                       atomic_inc(&skb->users);
+                       ret = pt_prev->func(skb, skb->dev, pt_prev);
+               }
+       }
+
+#if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
+       br_handle_frame_hook(skb);
+#endif
+       return ret;
+}
+
+
+#ifdef CONFIG_NET_DIVERT
+static inline void handle_diverter(struct sk_buff *skb)
+{
+       /* if diversion is supported on device, then divert */
+       if (skb->dev->divert && skb->dev->divert->divert)
+               divert_frame(skb);
+}
+#endif   /* CONFIG_NET_DIVERT */
+
+
+void flush_rx_queue(void)
+{
+    struct sk_buff *skb;
+    shared_info_t *s = current->shared_info;
+    net_ring_t *net_ring;
+    unsigned int i, nvif;
+    rx_entry_t rx;
+
+    clear_bit(_HYP_EVENT_NET_RX, &current->hyp_events);
+
+    for (nvif = 0; nvif < current->num_net_vifs; nvif++)
+    {
+        net_ring = current->net_vif_list[nvif]->net_ring;
+        while ( (skb = skb_dequeue(&current->net_vif_list[nvif]->skb_list)) 
+                        != NULL )
+        {
+            /*
+             * Write the virtual MAC address into the destination field
+             * of the ethernet packet. Furthermore, do the same for ARP
+             * reply packets. This is easy because the virtual MAC address
+             * is always 00-00-00-00-00-00.
+             */
+            memset(skb->mac.ethernet->h_dest, 0, ETH_ALEN);
+            if ( ntohs(skb->mac.ethernet->h_proto) == ETH_P_ARP )
+                memset(skb->nh.raw + 18, 0, ETH_ALEN);
+
+            i = net_ring->rx_cons;
+            if ( i != net_ring->rx_prod )
+            {
+                if ( !copy_from_user(&rx, net_ring->rx_ring+i, sizeof(rx)) )
+                {
+                    if ( (skb->len + ETH_HLEN) < rx.size )
+                        rx.size = skb->len + ETH_HLEN;
+                    copy_to_user((void *)rx.addr, skb->mac.raw, rx.size);
+                    copy_to_user(net_ring->rx_ring+i, &rx, sizeof(rx));
+                }
+                net_ring->rx_cons = (i+1) & (RX_RING_SIZE-1);
+                if ( net_ring->rx_cons == net_ring->rx_event )
+                    set_bit(_EVENT_NET_RX, &s->events);
+            }
+            kfree_skb(skb);
+        }
+    }
+}
+
+
+/*
+ *     Map an interface index to its name (SIOCGIFNAME)
+ */
+
+/*
+ *     We need this ioctl for efficient implementation of the
+ *     if_indextoname() function required by the IPv6 API.  Without
+ *     it, we would have to search all the interfaces to find a
+ *     match.  --pb
+ */
+
+static int dev_ifname(struct ifreq *arg)
+{
+       struct net_device *dev;
+       struct ifreq ifr;
+
+       /*
+        *      Fetch the caller's info block. 
+        */
+       
+       if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
+               return -EFAULT;
+
+       read_lock(&dev_base_lock);
+       dev = __dev_get_by_index(ifr.ifr_ifindex);
+       if (!dev) {
+               read_unlock(&dev_base_lock);
+               return -ENODEV;
+       }
+
+       strcpy(ifr.ifr_name, dev->name);
+       read_unlock(&dev_base_lock);
+
+       if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
+               return -EFAULT;
+       return 0;
+}
+
+
+/**
+ *     netdev_set_master       -       set up master/slave pair
+ *     @slave: slave device
+ *     @master: new master device
+ *
+ *     Changes the master device of the slave. Pass %NULL to break the
+ *     bonding. The caller must hold the RTNL semaphore. On a failure
+ *     a negative errno code is returned. On success the reference counts
+ *     are adjusted, %RTM_NEWLINK is sent to the routing socket and the
+ *     function returns zero.
+ */
+int netdev_set_master(struct net_device *slave, struct net_device *master)
+{
+       struct net_device *old = slave->master;
+
+       if (master) {
+               if (old)
+                       return -EBUSY;
+               dev_hold(master);
+       }
+
+       br_write_lock_bh(BR_NETPROTO_LOCK);
+       slave->master = master;
+       br_write_unlock_bh(BR_NETPROTO_LOCK);
+
+       if (old)
+               dev_put(old);
+
+       if (master)
+               slave->flags |= IFF_SLAVE;
+       else
+               slave->flags &= ~IFF_SLAVE;
+
+       rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE);
+       return 0;
+}
+
+/**
+ *     dev_set_promiscuity     - update promiscuity count on a device
+ *     @dev: device
+ *     @inc: modifier
+ *
+ *     Add or remove promsicuity from a device. While the count in the device
+ *     remains above zero the interface remains promiscuous. Once it hits zero
+ *     the device reverts back to normal filtering operation. A negative inc
+ *     value is used to drop promiscuity on the device.
+ */
+void dev_set_promiscuity(struct net_device *dev, int inc)
+{
+       unsigned short old_flags = dev->flags;
+
+       dev->flags |= IFF_PROMISC;
+       if ((dev->promiscuity += inc) == 0)
+               dev->flags &= ~IFF_PROMISC;
+       if (dev->flags^old_flags) {
+#ifdef CONFIG_NET_FASTROUTE
+               if (dev->flags&IFF_PROMISC) {
+                       netdev_fastroute_obstacles++;
+                       dev_clear_fastroute(dev);
+               } else
+                       netdev_fastroute_obstacles--;
+#endif
+               dev_mc_upload(dev);
+               printk(KERN_INFO "device %s %s promiscuous mode\n",
+                      dev->name, (dev->flags&IFF_PROMISC) ? "entered" : "left");
+       }
+}
+
+/**
+ *     dev_set_allmulti        - update allmulti count on a device
+ *     @dev: device
+ *     @inc: modifier
+ *
+ *     Add or remove reception of all multicast frames to a device. While the
+ *     count in the device remains above zero the interface remains listening
+ *     to all interfaces. Once it hits zero the device reverts back to normal
+ *     filtering operation. A negative @inc value is used to drop the counter
+ *     when releasing a resource needing all multicasts.
+ */
+
+void dev_set_allmulti(struct net_device *dev, int inc)
+{
+       unsigned short old_flags = dev->flags;
+
+       dev->flags |= IFF_ALLMULTI;
+       if ((dev->allmulti += inc) == 0)
+               dev->flags &= ~IFF_ALLMULTI;
+       if (dev->flags^old_flags)
+               dev_mc_upload(dev);
+}
+
+int dev_change_flags(struct net_device *dev, unsigned flags)
+{
+       int ret;
+       int old_flags = dev->flags;
+
+       /*
+        *      Set the flags on our device.
+        */
+
+       dev->flags = (flags & (IFF_DEBUG|IFF_NOTRAILERS|IFF_NOARP|IFF_DYNAMIC|
+                              IFF_MULTICAST|IFF_PORTSEL|IFF_AUTOMEDIA)) |
+                                      (dev->flags & (IFF_UP|IFF_VOLATILE|IFF_PROMISC|IFF_ALLMULTI));
+
+       /*
+        *      Load in the correct multicast list now the flags have changed.
+        */                             
+
+       dev_mc_upload(dev);
+
+       /*
+        *      Have we downed the interface. We handle IFF_UP ourselves
+        *      according to user attempts to set it, rather than blindly
+        *      setting it.
+        */
+
+       ret = 0;
+       if ((old_flags^flags)&IFF_UP)   /* Bit is different  ? */
+       {
+               ret = ((old_flags & IFF_UP) ? dev_close : dev_open)(dev);
+
+               if (ret == 0) 
+                       dev_mc_upload(dev);
+       }
+
+       if (dev->flags&IFF_UP &&
+           ((old_flags^dev->flags)&~(IFF_UP|IFF_PROMISC|IFF_ALLMULTI|IFF_VOLATILE)))
+               notifier_call_chain(&netdev_chain, NETDEV_CHANGE, dev);
+
+       if ((flags^dev->gflags)&IFF_PROMISC) {
+               int inc = (flags&IFF_PROMISC) ? +1 : -1;
+               dev->gflags ^= IFF_PROMISC;
+               dev_set_promiscuity(dev, inc);
+       }
+
+       /* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI
+          is important. Some (broken) drivers set IFF_PROMISC, when
+          IFF_ALLMULTI is requested not asking us and not reporting.
+        */
+       if ((flags^dev->gflags)&IFF_ALLMULTI) {
+               int inc = (flags&IFF_ALLMULTI) ? +1 : -1;
+               dev->gflags ^= IFF_ALLMULTI;
+               dev_set_allmulti(dev, inc);
+       }
+
+       if (old_flags^dev->flags)
+               rtmsg_ifinfo(RTM_NEWLINK, dev, old_flags^dev->flags);
+
+       return ret;
+}
+
+/*
+ *     Perform the SIOCxIFxxx calls. 
+ */
+static int dev_ifsioc(struct ifreq *ifr, unsigned int cmd)
+{
+       struct net_device *dev;
+       int err;
+
+       if ((dev = __dev_get_by_name(ifr->ifr_name)) == NULL)
+               return -ENODEV;
+
+       switch(cmd) 
+       {
+               case SIOCGIFFLAGS:      /* Get interface flags */
+                       ifr->ifr_flags = (dev->flags&~(IFF_PROMISC|IFF_ALLMULTI|IFF_RUNNING))
+                               |(dev->gflags&(IFF_PROMISC|IFF_ALLMULTI));
+                       if (netif_running(dev) && netif_carrier_ok(dev))
+                               ifr->ifr_flags |= IFF_RUNNING;
+                       return 0;
+
+               case SIOCSIFFLAGS:      /* Set interface flags */
+                       return dev_change_flags(dev, ifr->ifr_flags);
+               
+               case SIOCGIFMETRIC:     /* Get the metric on the interface (currently unused) */
+                       ifr->ifr_metric = 0;
+                       return 0;
+                       
+               case SIOCSIFMETRIC:     /* Set the metric on the interface (currently unused) */
+                       return -EOPNOTSUPP;
+       
+               case SIOCGIFMTU:        /* Get the MTU of a device */
+                       ifr->ifr_mtu = dev->mtu;
+                       return 0;
+       
+               case SIOCSIFMTU:        /* Set the MTU of a device */
+                       if (ifr->ifr_mtu == dev->mtu)
+                               return 0;
+
+                       /*
+                        *      MTU must be positive.
+                        */
+                        
+                       if (ifr->ifr_mtu<0)
+                               return -EINVAL;
+
+                       if (!netif_device_present(dev))
+                               return -ENODEV;
+
+                       if (dev->change_mtu)
+                               err = dev->change_mtu(dev, ifr->ifr_mtu);
+                       else {
+                               dev->mtu = ifr->ifr_mtu;
+                               err = 0;
+                       }
+                       if (!err && dev->flags&IFF_UP)
+                               notifier_call_chain(&netdev_chain, NETDEV_CHANGEMTU, dev);
+                       return err;
+
+               case SIOCGIFHWADDR:
+                       memcpy(ifr->ifr_hwaddr.sa_data,dev->dev_addr, MAX_ADDR_LEN);
+                       ifr->ifr_hwaddr.sa_family=dev->type;
+                       return 0;
+                               
+               case SIOCSIFHWADDR:
+                       if (dev->set_mac_address == NULL)
+                               return -EOPNOTSUPP;
+                       if (ifr->ifr_hwaddr.sa_family!=dev->type)
+                               return -EINVAL;
+                       if (!netif_device_present(dev))
+                               return -ENODEV;
+                       err = dev->set_mac_address(dev, &ifr->ifr_hwaddr);
+                       if (!err)
+                               notifier_call_chain(&netdev_chain, NETDEV_CHANGEADDR, dev);
+                       return err;
+                       
+               case SIOCSIFHWBROADCAST:
+                       if (ifr->ifr_hwaddr.sa_family!=dev->type)
+                               return -EINVAL;
+                       memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data, MAX_ADDR_LEN);
+                       notifier_call_chain(&netdev_chain, NETDEV_CHANGEADDR, dev);
+                       return 0;
+
+               case SIOCGIFMAP:
+                       ifr->ifr_map.mem_start=dev->mem_start;
+                       ifr->ifr_map.mem_end=dev->mem_end;
+                       ifr->ifr_map.base_addr=dev->base_addr;
+                       ifr->ifr_map.irq=dev->irq;
+                       ifr->ifr_map.dma=dev->dma;
+                       ifr->ifr_map.port=dev->if_port;
+                       return 0;
+                       
+               case SIOCSIFMAP:
+                       if (dev->set_config) {
+                               if (!netif_device_present(dev))
+                                       return -ENODEV;
+                               return dev->set_config(dev,&ifr->ifr_map);
+                       }
+                       return -EOPNOTSUPP;
+                       
+               case SIOCADDMULTI:
+                       if (dev->set_multicast_list == NULL ||
+                           ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
+                               return -EINVAL;
+                       if (!netif_device_present(dev))
+                               return -ENODEV;
+                       dev_mc_add(dev,ifr->ifr_hwaddr.sa_data, dev->addr_len, 1);
+                       return 0;
+
+               case SIOCDELMULTI:
+                       if (dev->set_multicast_list == NULL ||
+                           ifr->ifr_hwaddr.sa_family!=AF_UNSPEC)
+                               return -EINVAL;
+                       if (!netif_device_present(dev))
+                               return -ENODEV;
+                       dev_mc_delete(dev,ifr->ifr_hwaddr.sa_data,dev->addr_len, 1);
+                       return 0;
+
+               case SIOCGIFINDEX:
+                       ifr->ifr_ifindex = dev->ifindex;
+                       return 0;
+
+               case SIOCGIFTXQLEN:
+                       ifr->ifr_qlen = dev->tx_queue_len;
+                       return 0;
+
+               case SIOCSIFTXQLEN:
+                       if (ifr->ifr_qlen<0)
+                               return -EINVAL;
+                       dev->tx_queue_len = ifr->ifr_qlen;
+                       return 0;
+
+               case SIOCSIFNAME:
+                       if (dev->flags&IFF_UP)
+                               return -EBUSY;
+                       if (__dev_get_by_name(ifr->ifr_newname))
+                               return -EEXIST;
+                       memcpy(dev->name, ifr->ifr_newname, IFNAMSIZ);
+                       dev->name[IFNAMSIZ-1] = 0;
+                       notifier_call_chain(&netdev_chain, NETDEV_CHANGENAME, dev);
+                       return 0;
+
+#ifdef WIRELESS_EXT
+               case SIOCGIWSTATS:
+                       return dev_iwstats(dev, ifr);
+#endif /* WIRELESS_EXT */
+
+               /*
+                *      Unknown or private ioctl
+                */
+
+               default:
+                       if ((cmd >= SIOCDEVPRIVATE &&
+                           cmd <= SIOCDEVPRIVATE + 15) ||
+                           cmd == SIOCBONDENSLAVE ||
+                           cmd == SIOCBONDRELEASE ||
+                           cmd == SIOCBONDSETHWADDR ||
+                           cmd == SIOCBONDSLAVEINFOQUERY ||
+                           cmd == SIOCBONDINFOQUERY ||
+                           cmd == SIOCBONDCHANGEACTIVE ||
+                           cmd == SIOCETHTOOL ||
+                           cmd == SIOCGMIIPHY ||
+                           cmd == SIOCGMIIREG ||
+                           cmd == SIOCSMIIREG) {
+                               if (dev->do_ioctl) {
+                                       if (!netif_device_present(dev))
+                                               return -ENODEV;
+                                       return dev->do_ioctl(dev, ifr, cmd);
+                               }
+                               return -EOPNOTSUPP;
+                       }
+
+#ifdef WIRELESS_EXT
+                       if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
+                               if (dev->do_ioctl) {
+                                       if (!netif_device_present(dev))
+                                               return -ENODEV;
+                                       return dev->do_ioctl(dev, ifr, cmd);
+                               }
+                               return -EOPNOTSUPP;
+                       }
+#endif /* WIRELESS_EXT */
+
+       }
+       return -EINVAL;
+}
+
+/*
+ *     This function handles all "interface"-type I/O control requests. The actual
+ *     'doing' part of this is dev_ifsioc above.
+ */
+
+/**
+ *     dev_ioctl       -       network device ioctl
+ *     @cmd: command to issue
+ *     @arg: pointer to a struct ifreq in user space
+ *
+ *     Issue ioctl functions to devices. This is normally called by the
+ *     user space syscall interfaces but can sometimes be useful for 
+ *     other purposes. The return value is the return from the syscall if
+ *     positive or a negative errno code on error.
+ */
+
+int dev_ioctl(unsigned int cmd, void *arg)
+{
+       struct ifreq ifr;
+       int ret;
+       char *colon;
+
+       /* One special case: SIOCGIFCONF takes ifconf argument
+          and requires shared lock, because it sleeps writing
+          to user space.
+        */
+          
+       if (cmd == SIOCGIFCONF) {
+            return -ENOSYS;
+       }
+       if (cmd == SIOCGIFNAME) {
+               return dev_ifname((struct ifreq *)arg);
+       }
+
+       if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
+               return -EFAULT;
+
+       ifr.ifr_name[IFNAMSIZ-1] = 0;
+
+       colon = strchr(ifr.ifr_name, ':');
+       if (colon)
+               *colon = 0;
+
+       /*
+        *      See which interface the caller is talking about. 
+        */
+        
+       switch(cmd) 
+       {
+               /*
+                *      These ioctl calls:
+                *      - can be done by all.
+                *      - atomic and do not require locking.
+                *      - return a value
+                */
+                
+               case SIOCGIFFLAGS:
+               case SIOCGIFMETRIC:
+               case SIOCGIFMTU:
+               case SIOCGIFHWADDR:
+               case SIOCGIFSLAVE:
+               case SIOCGIFMAP:
+               case SIOCGIFINDEX:
+               case SIOCGIFTXQLEN:
+                       dev_load(ifr.ifr_name);
+                       read_lock(&dev_base_lock);
+                       ret = dev_ifsioc(&ifr, cmd);
+                       read_unlock(&dev_base_lock);
+                       if (!ret) {
+                               if (colon)
+                                       *colon = ':';
+                               if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
+                                       return -EFAULT;
+                       }
+                       return ret;
+
+               /*
+                *      These ioctl calls:
+                *      - require superuser power.
+                *      - require strict serialization.
+                *      - return a value
+                */
+                
+               case SIOCETHTOOL:
+               case SIOCGMIIPHY:
+               case SIOCGMIIREG:
+                       if (!capable(CAP_NET_ADMIN))
+                               return -EPERM;
+                       dev_load(ifr.ifr_name);
+                       dev_probe_lock();
+                       rtnl_lock();
+                       ret = dev_ifsioc(&ifr, cmd);
+                       rtnl_unlock();
+                       dev_probe_unlock();
+                       if (!ret) {
+                               if (colon)
+                                       *colon = ':';
+                               if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
+                                       return -EFAULT;
+                       }
+                       return ret;
+
+               /*
+                *      These ioctl calls:
+                *      - require superuser power.
+                *      - require strict serialization.
+                *      - do not return a value
+                */
+                
+               case SIOCSIFFLAGS:
+               case SIOCSIFMETRIC:
+               case SIOCSIFMTU:
+               case SIOCSIFMAP:
+               case SIOCSIFHWADDR:
+               case SIOCSIFSLAVE:
+               case SIOCADDMULTI:
+               case SIOCDELMULTI:
+               case SIOCSIFHWBROADCAST:
+               case SIOCSIFTXQLEN:
+               case SIOCSIFNAME:
+               case SIOCSMIIREG:
+               case SIOCBONDENSLAVE:
+               case SIOCBONDRELEASE:
+               case SIOCBONDSETHWADDR:
+               case SIOCBONDSLAVEINFOQUERY:
+               case SIOCBONDINFOQUERY:
+               case SIOCBONDCHANGEACTIVE:
+                       if (!capable(CAP_NET_ADMIN))
+                               return -EPERM;
+                       dev_load(ifr.ifr_name);
+                       dev_probe_lock();
+                       rtnl_lock();
+                       ret = dev_ifsioc(&ifr, cmd);
+                       rtnl_unlock();
+                       dev_probe_unlock();
+                       return ret;
+       
+               case SIOCGIFMEM:
+                       /* Get the per device memory space. We can add this but currently
+                          do not support it */
+               case SIOCSIFMEM:
+                       /* Set the per device memory buffer space. Not applicable in our case */
+               case SIOCSIFLINK:
+                       return -EINVAL;
+
+               /*
+                *      Unknown or private ioctl.
+                */     
+                
+               default:
+                       if (cmd >= SIOCDEVPRIVATE &&
+                           cmd <= SIOCDEVPRIVATE + 15) {
+                               dev_load(ifr.ifr_name);
+                               dev_probe_lock();
+                               rtnl_lock();
+                               ret = dev_ifsioc(&ifr, cmd);
+                               rtnl_unlock();
+                               dev_probe_unlock();
+                               if (!ret && copy_to_user(arg, &ifr, sizeof(struct ifreq)))
+                                       return -EFAULT;
+                               return ret;
+                       }
+#ifdef WIRELESS_EXT
+                       /* Take care of Wireless Extensions */
+                       if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
+                               /* If command is `set a parameter', or
+                                * `get the encoding parameters', check if
+                                * the user has the right to do it */
+                               if (IW_IS_SET(cmd) || (cmd == SIOCGIWENCODE)) {
+                                       if(!capable(CAP_NET_ADMIN))
+                                               return -EPERM;
+                               }
+                               dev_load(ifr.ifr_name);
+                               rtnl_lock();
+                               ret = dev_ifsioc(&ifr, cmd);
+                               rtnl_unlock();
+                               if (!ret && IW_IS_GET(cmd) &&
+                                   copy_to_user(arg, &ifr, sizeof(struct ifreq)))
+                                       return -EFAULT;
+                               return ret;
+                       }
+#endif /* WIRELESS_EXT */
+                       return -EINVAL;
+       }
+}
+
+
+/**
+ *     dev_new_index   -       allocate an ifindex
+ *
+ *     Returns a suitable unique value for a new device interface
+ *     number.  The caller must hold the rtnl semaphore or the
+ *     dev_base_lock to be sure it remains unique.
+ */
+int dev_new_index(void)
+{
+       static int ifindex;
+       for (;;) {
+               if (++ifindex <= 0)
+                       ifindex=1;
+               if (__dev_get_by_index(ifindex) == NULL)
+                       return ifindex;
+       }
+}
+
+static int dev_boot_phase = 1;
+
+/**
+ *     register_netdevice      - register a network device
+ *     @dev: device to register
+ *     
+ *     Take a completed network device structure and add it to the kernel
+ *     interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
+ *     chain. 0 is returned on success. A negative errno code is returned
+ *     on a failure to set up the device, or if the name is a duplicate.
+ *
+ *     Callers must hold the rtnl semaphore.  See the comment at the
+ *     end of Space.c for details about the locking.  You may want
+ *     register_netdev() instead of this.
+ *
+ *     BUGS:
+ *     The locking appears insufficient to guarantee two parallel registers
+ *     will not get the same name.
+ */
+
+int net_dev_init(void);
+
+int register_netdevice(struct net_device *dev)
+{
+       struct net_device *d, **dp;
+#ifdef CONFIG_NET_DIVERT
+       int ret;
+#endif
+
+       spin_lock_init(&dev->queue_lock);
+       spin_lock_init(&dev->xmit_lock);
+       dev->xmit_lock_owner = -1;
+#ifdef CONFIG_NET_FASTROUTE
+       dev->fastpath_lock=RW_LOCK_UNLOCKED;
+#endif
+
+       if (dev_boot_phase)
+               net_dev_init();
+
+#ifdef CONFIG_NET_DIVERT
+       ret = alloc_divert_blk(dev);
+       if (ret)
+               return ret;
+#endif /* CONFIG_NET_DIVERT */
+       
+       dev->iflink = -1;
+
+       /* Init, if this function is available */
+       if (dev->init && dev->init(dev) != 0) {
+#ifdef CONFIG_NET_DIVERT
+               free_divert_blk(dev);
+#endif
+               return -EIO;
+       }
+
+       dev->ifindex = dev_new_index();
+       if (dev->iflink == -1)
+               dev->iflink = dev->ifindex;
+
+       /* Check for existence, and append to tail of chain */
+       for (dp=&dev_base; (d=*dp) != NULL; dp=&d->next) {
+               if (d == dev || strcmp(d->name, dev->name) == 0) {
+#ifdef CONFIG_NET_DIVERT
+                       free_divert_blk(dev);
+#endif
+                       return -EEXIST;
+               }
+       }
+       /*
+        *      nil rebuild_header routine,
+        *      that should be never called and used as just bug trap.
+        */
+
+       if (dev->rebuild_header == NULL)
+               dev->rebuild_header = default_rebuild_header;
+
+       /*
+        *      Default initial state at registry is that the
+        *      device is present.
+        */
+
+       set_bit(__LINK_STATE_PRESENT, &dev->state);
+
+       dev->next = NULL;
+       dev_init_scheduler(dev);
+       write_lock_bh(&dev_base_lock);
+       *dp = dev;
+       dev_hold(dev);
+       dev->deadbeaf = 0;
+       write_unlock_bh(&dev_base_lock);
+
+       /* Notify protocols, that a new device appeared. */
+       notifier_call_chain(&netdev_chain, NETDEV_REGISTER, dev);
+
+       return 0;
+}
+
+/**
+ *     netdev_finish_unregister - complete unregistration
+ *     @dev: device
+ *
+ *     Destroy and free a dead device. A value of zero is returned on
+ *     success.
+ */
+int netdev_finish_unregister(struct net_device *dev)
+{
+       BUG_TRAP(dev->ip_ptr==NULL);
+       BUG_TRAP(dev->ip6_ptr==NULL);
+       BUG_TRAP(dev->dn_ptr==NULL);
+
+       if (!dev->deadbeaf) {
+               printk(KERN_ERR "Freeing alive device %p, %s\n", dev, dev->name);
+               return 0;
+       }
+#ifdef NET_REFCNT_DEBUG
+       printk(KERN_DEBUG "netdev_finish_unregister: %s%s.\n", dev->name,
+              (dev->features & NETIF_F_DYNALLOC)?"":", old style");
+#endif
+       if (dev->destructor)
+               dev->destructor(dev);
+       if (dev->features & NETIF_F_DYNALLOC)
+               kfree(dev);
+       return 0;
+}
+
+/**
+ *     unregister_netdevice - remove device from the kernel
+ *     @dev: device
+ *
+ *     This function shuts down a device interface and removes it
+ *     from the kernel tables. On success 0 is returned, on a failure
+ *     a negative errno code is returned.
+ *
+ *     Callers must hold the rtnl semaphore.  See the comment at the
+ *     end of Space.c for details about the locking.  You may want
+ *     unregister_netdev() instead of this.
+ */
+
+int unregister_netdevice(struct net_device *dev)
+{
+       unsigned long now, warning_time;
+       struct net_device *d, **dp;
+
+       /* If device is running, close it first. */
+       if (dev->flags & IFF_UP)
+               dev_close(dev);
+
+       BUG_TRAP(dev->deadbeaf==0);
+       dev->deadbeaf = 1;
+
+       /* And unlink it from device chain. */
+       for (dp = &dev_base; (d=*dp) != NULL; dp=&d->next) {
+               if (d == dev) {
+                       write_lock_bh(&dev_base_lock);
+                       *dp = d->next;
+                       write_unlock_bh(&dev_base_lock);
+                       break;
+               }
+       }
+       if (d == NULL) {
+               printk(KERN_DEBUG "unregister_netdevice: device %s/%p never was registered\n", dev->name, dev);
+               return -ENODEV;
+       }
+
+       /* Synchronize to net_rx_action. */
+       br_write_lock_bh(BR_NETPROTO_LOCK);
+       br_write_unlock_bh(BR_NETPROTO_LOCK);
+
+       if (dev_boot_phase == 0) {
+
+               /* Shutdown queueing discipline. */
+               dev_shutdown(dev);
+
+               /* Notify protocols, that we are about to destroy
+                  this device. They should clean all the things.
+                */
+               notifier_call_chain(&netdev_chain, NETDEV_UNREGISTER, dev);
+
+               /*
+                *      Flush the multicast chain
+                */
+               dev_mc_discard(dev);
+       }
+
+       if (dev->uninit)
+               dev->uninit(dev);
+
+       /* Notifier chain MUST detach us from master device. */
+       BUG_TRAP(dev->master==NULL);
+
+#ifdef CONFIG_NET_DIVERT
+       free_divert_blk(dev);
+#endif
+
+       if (dev->features & NETIF_F_DYNALLOC) {
+#ifdef NET_REFCNT_DEBUG
+               if (atomic_read(&dev->refcnt) != 1)
+                       printk(KERN_DEBUG "unregister_netdevice: holding %s refcnt=%d\n", dev->name, atomic_read(&dev->refcnt)-1);
+#endif
+               dev_put(dev);
+               return 0;
+       }
+
+       /* Last reference is our one */
+       if (atomic_read(&dev->refcnt) == 1) {
+               dev_put(dev);
+               return 0;
+       }
+
+#ifdef NET_REFCNT_DEBUG
+       printk("unregister_netdevice: waiting %s refcnt=%d\n", dev->name, atomic_read(&dev->refcnt));
+#endif
+
+       /* EXPLANATION. If dev->refcnt is not now 1 (our own reference)
+          it means that someone in the kernel still has a reference
+          to this device and we cannot release it.
+
+          "New style" devices have destructors, hence we can return from this
+          function and destructor will do all the work later.  As of kernel 2.4.0
+          there are very few "New Style" devices.
+
+          "Old style" devices expect that the device is free of any references
+          upon exit from this function.
+          We cannot return from this function until all such references have
+          fallen away.  This is because the caller of this function will probably
+          immediately kfree(*dev) and then be unloaded via sys_delete_module.
+
+          So, we linger until all references fall away.  The duration of the
+          linger is basically unbounded! It is driven by, for example, the
+          current setting of sysctl_ipfrag_time.
+
+          After 1 second, we start to rebroadcast unregister notifications
+          in hope that careless clients will release the device.
+
+        */
+
+       now = warning_time = jiffies;
+       while (atomic_read(&dev->refcnt) != 1) {
+               if ((jiffies - now) > 1*HZ) {
+                       /* Rebroadcast unregister notification */
+                       notifier_call_chain(&netdev_chain, NETDEV_UNREGISTER, dev);
+               }
+                mdelay(250);
+               if ((jiffies - warning_time) > 10*HZ) {
+                       printk(KERN_EMERG "unregister_netdevice: waiting for %s to "
+                                       "become free. Usage count = %d\n",
+                                       dev->name, atomic_read(&dev->refcnt));
+                       warning_time = jiffies;
+               }
+       }
+       dev_put(dev);
+       return 0;
+}
+
+
+/*
+ *     Initialize the DEV module. At boot time this walks the device list and
+ *     unhooks any devices that fail to initialise (normally hardware not 
+ *     present) and leaves us with a valid list of present and active devices.
+ *
+ */
+
+extern void net_device_init(void);
+extern void ip_auto_config(void);
+#ifdef CONFIG_NET_DIVERT
+extern void dv_init(void);
+#endif /* CONFIG_NET_DIVERT */
+
+
+/*
+ *       Callers must hold the rtnl semaphore.  See the comment at the
+ *       end of Space.c for details about the locking.
+ */
+int __init net_dev_init(void)
+{
+       struct net_device *dev, **dp;
+       int i;
+
+       if (!dev_boot_phase)
+               return 0;
+
+        /*
+         * KAF: was sone in socket_init, but that top-half stuff is gone.
+         */
+        skb_init();
+
+       /*
+        *      Initialise the packet receive queues.
+        */
+
+       for (i = 0; i < NR_CPUS; i++) {
+               struct softnet_data *queue;
+
+               queue = &softnet_data[i];
+                skb_queue_head_init(&queue->input_pkt_queue);
+               queue->throttle = 0;
+               queue->cng_level = 0;
+               queue->avg_blog = 10; /* arbitrary non-zero */
+               queue->completion_queue = NULL;
+       }
+       
+       /*
+        *      Add the devices.
+        *      If the call to dev->init fails, the dev is removed
+        *      from the chain disconnecting the device until the
+        *      next reboot.
+        *
+        *      NB At boot phase networking is dead. No locking is required.
+        *      But we still preserve dev_base_lock for sanity.
+        */
+
+       dp = &dev_base;
+       while ((dev = *dp) != NULL) {
+               spin_lock_init(&dev->queue_lock);
+               spin_lock_init(&dev->xmit_lock);
+
+               dev->xmit_lock_owner = -1;
+               dev->iflink = -1;
+               dev_hold(dev);
+
+               /*
+                * Allocate name. If the init() fails
+                * the name will be reissued correctly.
+                */
+               if (strchr(dev->name, '%'))
+                       dev_alloc_name(dev, dev->name);
+
+               if (dev->init && dev->init(dev)) {
+                       /*
+                        * It failed to come up. It will be unhooked later.
+                        * dev_alloc_name can now advance to next suitable
+                        * name that is checked next.
+                        */
+                       dev->deadbeaf = 1;
+                       dp = &dev->next;
+               } else {
+                       dp = &dev->next;
+                       dev->ifindex = dev_new_index();
+                       if (dev->iflink == -1)
+                               dev->iflink = dev->ifindex;
+                       if (dev->rebuild_header == NULL)
+                               dev->rebuild_header = default_rebuild_header;
+                       dev_init_scheduler(dev);
+                       set_bit(__LINK_STATE_PRESENT, &dev->state);
+               }
+       }
+
+       /*
+        * Unhook devices that failed to come up
+        */
+       dp = &dev_base;
+       while ((dev = *dp) != NULL) {
+               if (dev->deadbeaf) {
+                       write_lock_bh(&dev_base_lock);
+                       *dp = dev->next;
+                       write_unlock_bh(&dev_base_lock);
+                       dev_put(dev);
+               } else {
+                       dp = &dev->next;
+               }
+       }
+
+       dev_boot_phase = 0;
+
+       open_softirq(NET_TX_SOFTIRQ, net_tx_action, NULL);
+       //open_softirq(NET_RX_SOFTIRQ, net_rx_action, NULL);
+
+       dst_init();
+       dev_mcast_init();
+
+#ifdef CONFIG_NET_SCHED
+       pktsched_init();
+#endif
+
+       /*
+        *      Initialise network devices
+        */
+        
+       net_device_init();
+
+       return 0;
+}
+
+
+/*
+ * do_net_update:
+ * 
+ * Called from guest OS to notify updates to its transmit and/or receive
+ * descriptor rings.
+ */
+#define TX_RING_INC(_i)    (((_i)+1) & (TX_RING_SIZE-1))
+#define RX_RING_INC(_i)    (((_i)+1) & (RX_RING_SIZE-1))
+#define TX_RING_ADD(_i,_j) (((_i)+(_j)) & (TX_RING_SIZE-1))
+#define RX_RING_ADD(_i,_j) (((_i)+(_j)) & (RX_RING_SIZE-1))
+long do_net_update(void)
+{
+    shared_info_t *shared = current->shared_info;    
+    net_ring_t *net_ring = current->net_ring_base;
+    unsigned int i;
+    struct sk_buff *skb;
+    tx_entry_t tx;
+
+    for ( i = net_ring->tx_cons; i != net_ring->tx_prod; i = TX_RING_INC(i) )
+    {
+        if ( copy_from_user(&tx, net_ring->tx_ring+i, sizeof(tx)) )
+            continue;
+
+        if ( TX_RING_INC(i) == net_ring->tx_event )
+            set_bit(_EVENT_NET_TX, &shared->events);
+
+        skb = alloc_skb(tx.size, GFP_KERNEL);
+        if ( skb == NULL ) continue;
+        skb_put(skb, tx.size);
+        if ( copy_from_user(skb->data, (void *)tx.addr, tx.size) )
+        {
+            kfree_skb(skb);
+            continue;
+        }
+        skb->dev = the_dev;
+
+        if ( skb->len < 16 )
+        {
+            kfree_skb(skb);
+            continue;
+        }
+
+        memcpy(skb->data + ETH_ALEN, skb->dev->dev_addr, ETH_ALEN);
+        
+        switch ( ntohs(*(unsigned short *)(skb->data + 12)) )
+        {
+        case ETH_P_ARP:
+            skb->protocol = __constant_htons(ETH_P_ARP);
+            if ( skb->len < 42 ) break;
+            memcpy(skb->data + 22, skb->dev->dev_addr, 6);
+            break;
+        case ETH_P_IP:
+            skb->protocol = __constant_htons(ETH_P_IP);
+            break;
+        default:
+            kfree_skb(skb);
+            skb = NULL;
+            break;
+        }
+
+        if ( skb != NULL )
+        {
+            skb_get(skb); /* get a reference for non-local delivery */
+            skb->protocol = eth_type_trans(skb, skb->dev);
+            if ( netif_rx(skb) == 0 )
+            {
+                /* Give up non-local reference. Packet delivered locally. */
+                kfree_skb(skb);
+            }
+            else
+            {
+                /* Pass the non-local reference to the net device. */
+                skb_push(skb, skb->dev->hard_header_len);
+                dev_queue_xmit(skb);
+            }
+        }
+    }
+
+    net_ring->tx_cons = i;
+
+    return 0;
+}
+
+
+int setup_network_devices(void)
+{
+    int ret;
+    struct net_device *dev = dev_get_by_name("eth0");
+
+    if ( dev == NULL ) 
+    {
+        printk("Could not find device eth0\n");
+        return 0;
+    }
+
+    ret = dev_open(dev);
+    if ( ret != 0 )
+    {
+        printk("Error opening device eth0 for use (%d)\n", ret);
+        return 0;
+    }
+    printk("Device eth0 opened and ready for use\n");
+    the_dev = dev;
+
+    return 1;
+}
+
diff --git a/xen-2.4.16/net/dev_mcast.c b/xen-2.4.16/net/dev_mcast.c
new file mode 100644 (file)
index 0000000..d7d2ae3
--- /dev/null
@@ -0,0 +1,276 @@
+/*
+ *     Linux NET3:     Multicast List maintenance. 
+ *
+ *     Authors:
+ *             Tim Kordas <tjk@nostromo.eeap.cwru.edu> 
+ *             Richard Underwood <richard@wuzz.demon.co.uk>
+ *
+ *     Stir fried together from the IP multicast and CAP patches above
+ *             Alan Cox <Alan.Cox@linux.org>   
+ *
+ *     Fixes:
+ *             Alan Cox        :       Update the device on a real delete
+ *                                     rather than any time but...
+ *             Alan Cox        :       IFF_ALLMULTI support.
+ *             Alan Cox        :       New format set_multicast_list() calls.
+ *             Gleb Natapov    :       Remove dev_mc_lock.
+ *
+ *     This program is free software; you can redistribute it and/or
+ *     modify it under the terms of the GNU General Public License
+ *     as published by the Free Software Foundation; either version
+ *     2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/config.h> 
+#include <linux/lib.h> 
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <asm/bitops.h>
+#include <linux/types.h>
+//#include <linux/kernel.h>
+#include <linux/sched.h>
+//#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+//#include <linux/in.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/if_ether.h>
+//#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+//#include <linux/proc_fs.h>
+#include <linux/init.h>
+//#include <net/ip.h>
+//#include <net/route.h>
+#include <linux/skbuff.h>
+//#include <net/sock.h>
+//#include <net/arp.h>
+
+
+/*
+ *     Device multicast list maintenance. 
+ *
+ *     This is used both by IP and by the user level maintenance functions. 
+ *     Unlike BSD we maintain a usage count on a given multicast address so 
+ *     that a casual user application can add/delete multicasts used by 
+ *     protocols without doing damage to the protocols when it deletes the
+ *     entries. It also helps IP as it tracks overlapping maps.
+ *
+ *     Device mc lists are changed by bh at least if IPv6 is enabled,
+ *     so that it must be bh protected.
+ *
+ *     We block accesses to device mc filters with dev->xmit_lock.
+ */
+
+/*
+ *     Update the multicast list into the physical NIC controller.
+ */
+static void __dev_mc_upload(struct net_device *dev)
+{
+       /* Don't do anything till we up the interface
+        * [dev_open will call this function so the list will
+        * stay sane]
+        */
+
+       if (!(dev->flags&IFF_UP))
+               return;
+
+       /*
+        *      Devices with no set multicast or which have been
+        *      detached don't get set.
+        */
+
+       if (dev->set_multicast_list == NULL ||
+           !netif_device_present(dev))
+               return;
+
+       dev->set_multicast_list(dev);
+}
+
+void dev_mc_upload(struct net_device *dev)
+{
+       spin_lock_bh(&dev->xmit_lock);
+       __dev_mc_upload(dev);
+       spin_unlock_bh(&dev->xmit_lock);
+}
+
+/*
+ *     Delete a device level multicast
+ */
+int dev_mc_delete(struct net_device *dev, void *addr, int alen, int glbl)
+{
+       int err = 0;
+       struct dev_mc_list *dmi, **dmip;
+
+       spin_lock_bh(&dev->xmit_lock);
+
+       for (dmip = &dev->mc_list; (dmi = *dmip) != NULL; dmip = &dmi->next) {
+               /*
+                *      Find the entry we want to delete. The device could
+                *      have variable length entries so check these too.
+                */
+               if (memcmp(dmi->dmi_addr, addr, dmi->dmi_addrlen) == 0 &&
+                   alen == dmi->dmi_addrlen) {
+                       if (glbl) {
+                               int old_glbl = dmi->dmi_gusers;
+                               dmi->dmi_gusers = 0;
+                               if (old_glbl == 0)
+                                       break;
+                       }
+                       if (--dmi->dmi_users)
+                               goto done;
+
+                       /*
+                        *      Last user. So delete the entry.
+                        */
+                       *dmip = dmi->next;
+                       dev->mc_count--;
+
+                       kfree(dmi);
+
+                       /*
+                        *      We have altered the list, so the card
+                        *      loaded filter is now wrong. Fix it
+                        */
+                       __dev_mc_upload(dev);
+                       
+                       spin_unlock_bh(&dev->xmit_lock);
+                       return 0;
+               }
+       }
+       err = -ENOENT;
+done:
+       spin_unlock_bh(&dev->xmit_lock);
+       return err;
+}
+
+/*
+ *     Add a device level multicast
+ */
+int dev_mc_add(struct net_device *dev, void *addr, int alen, int glbl)
+{
+       int err = 0;
+       struct dev_mc_list *dmi, *dmi1;
+
+       dmi1 = (struct dev_mc_list *)kmalloc(sizeof(*dmi), GFP_ATOMIC);
+
+       spin_lock_bh(&dev->xmit_lock);
+       for (dmi = dev->mc_list; dmi != NULL; dmi = dmi->next) {
+               if (memcmp(dmi->dmi_addr, addr, dmi->dmi_addrlen) == 0 &&
+                   dmi->dmi_addrlen == alen) {
+                       if (glbl) {
+                               int old_glbl = dmi->dmi_gusers;
+                               dmi->dmi_gusers = 1;
+                               if (old_glbl)
+                                       goto done;
+                       }
+                       dmi->dmi_users++;
+                       goto done;
+               }
+       }
+
+       if ((dmi = dmi1) == NULL) {
+               spin_unlock_bh(&dev->xmit_lock);
+               return -ENOMEM;
+       }
+       memcpy(dmi->dmi_addr, addr, alen);
+       dmi->dmi_addrlen = alen;
+       dmi->next = dev->mc_list;
+       dmi->dmi_users = 1;
+       dmi->dmi_gusers = glbl ? 1 : 0;
+       dev->mc_list = dmi;
+       dev->mc_count++;
+
+       __dev_mc_upload(dev);
+       
+       spin_unlock_bh(&dev->xmit_lock);
+       return 0;
+
+done:
+       spin_unlock_bh(&dev->xmit_lock);
+       if (dmi1)
+               kfree(dmi1);
+       return err;
+}
+
+/*
+ *     Discard multicast list when a device is downed
+ */
+
+void dev_mc_discard(struct net_device *dev)
+{
+       spin_lock_bh(&dev->xmit_lock);
+       
+       while (dev->mc_list != NULL) {
+               struct dev_mc_list *tmp = dev->mc_list;
+               dev->mc_list = tmp->next;
+               if (tmp->dmi_users > tmp->dmi_gusers)
+                       printk("dev_mc_discard: multicast leakage! dmi_users=%d\n", tmp->dmi_users);
+               kfree(tmp);
+       }
+       dev->mc_count = 0;
+
+       spin_unlock_bh(&dev->xmit_lock);
+}
+
+#ifdef CONFIG_PROC_FS
+static int dev_mc_read_proc(char *buffer, char **start, off_t offset,
+                           int length, int *eof, void *data)
+{
+       off_t pos = 0, begin = 0;
+       struct dev_mc_list *m;
+       int len = 0;
+       struct net_device *dev;
+
+       read_lock(&dev_base_lock);
+       for (dev = dev_base; dev; dev = dev->next) {
+               spin_lock_bh(&dev->xmit_lock);
+               for (m = dev->mc_list; m; m = m->next) {
+                       int i;
+
+                       len += sprintf(buffer+len,"%-4d %-15s %-5d %-5d ", dev->ifindex,
+                                      dev->name, m->dmi_users, m->dmi_gusers);
+
+                       for (i = 0; i < m->dmi_addrlen; i++)
+                               len += sprintf(buffer+len, "%02x", m->dmi_addr[i]);
+
+                       len += sprintf(buffer+len, "\n");
+
+                       pos = begin + len;
+                       if (pos < offset) {
+                               len = 0;
+                               begin = pos;
+                       }
+                       if (pos > offset + length) {
+                               spin_unlock_bh(&dev->xmit_lock);
+                               goto done;
+                       }
+               }
+               spin_unlock_bh(&dev->xmit_lock);
+       }
+       *eof = 1;
+
+done:
+       read_unlock(&dev_base_lock);
+       *start = buffer + (offset - begin);
+       len -= (offset - begin);
+       if (len > length)
+               len = length;
+       if (len < 0)
+               len = 0;
+       return len;
+}
+#endif
+
+void __init dev_mcast_init(void)
+{
+#ifdef CONFIG_PROC_FS
+       create_proc_read_entry("net/dev_mcast", 0, 0, dev_mc_read_proc, NULL);
+#endif
+}
+
diff --git a/xen-2.4.16/net/eth.c b/xen-2.4.16/net/eth.c
new file mode 100644 (file)
index 0000000..d982eef
--- /dev/null
@@ -0,0 +1,242 @@
+/*
+ * INET                An implementation of the TCP/IP protocol suite for the LINUX
+ *             operating system.  INET is implemented using the  BSD Socket
+ *             interface as the means of communication with the user level.
+ *
+ *             Ethernet-type device handling.
+ *
+ * Version:    @(#)eth.c       1.0.7   05/25/93
+ *
+ * Authors:    Ross Biro, <bir7@leland.Stanford.Edu>
+ *             Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
+ *             Mark Evans, <evansmp@uhura.aston.ac.uk>
+ *             Florian  La Roche, <rzsfl@rz.uni-sb.de>
+ *             Alan Cox, <gw4pts@gw4pts.ampr.org>
+ * 
+ * Fixes:
+ *             Mr Linux        : Arp problems
+ *             Alan Cox        : Generic queue tidyup (very tiny here)
+ *             Alan Cox        : eth_header ntohs should be htons
+ *             Alan Cox        : eth_rebuild_header missing an htons and
+ *                               minor other things.
+ *             Tegge           : Arp bug fixes. 
+ *             Florian         : Removed many unnecessary functions, code cleanup
+ *                               and changes for new arp and skbuff.
+ *             Alan Cox        : Redid header building to reflect new format.
+ *             Alan Cox        : ARP only when compiled with CONFIG_INET
+ *             Greg Page       : 802.2 and SNAP stuff.
+ *             Alan Cox        : MAC layer pointers/new format.
+ *             Paul Gortmaker  : eth_copy_and_sum shouldn't csum padding.
+ *             Alan Cox        : Protect against forwarding explosions with
+ *                               older network drivers and IFF_ALLMULTI.
+ *     Christer Weinigel       : Better rebuild header message.
+ *             Andrew Morton    : 26Feb01: kill ether_setup() - use netdev_boot_setup().
+ *
+ *             This program is free software; you can redistribute it and/or
+ *             modify it under the terms of the GNU General Public License
+ *             as published by the Free Software Foundation; either version
+ *             2 of the License, or (at your option) any later version.
+ */
+#include <linux/types.h>
+//#include <linux/kernel.h>
+#include <linux/sched.h>
+//#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/socket.h>
+//#include <linux/in.h>
+//#include <linux/inet.h>
+//#include <linux/ip.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/skbuff.h>
+#include <linux/errno.h>
+#include <linux/config.h>
+#include <linux/lib.h>
+#include <linux/init.h>
+//#include <net/dst.h>
+//#include <net/arp.h>
+//#include <net/sock.h>
+//#include <net/ipv6.h>
+//#include <net/ip.h>
+#include <asm/uaccess.h>
+#include <asm/system.h>
+//#include <asm/checksum.h>
+
+//extern int __init netdev_boot_setup(char *str);
+
+//__setup("ether=", netdev_boot_setup);
+
+/*
+ *      Create the Ethernet MAC header for an arbitrary protocol layer 
+ *
+ *     saddr=NULL      means use device source address
+ *     daddr=NULL      means leave destination address (eg unresolved arp)
+ */
+
+int eth_header(struct sk_buff *skb, struct net_device *dev, unsigned short type,
+          void *daddr, void *saddr, unsigned len)
+{
+       struct ethhdr *eth = (struct ethhdr *)skb_push(skb,ETH_HLEN);
+
+       /* 
+        *      Set the protocol type. For a packet of type ETH_P_802_3 we put the length
+        *      in here instead. It is up to the 802.2 layer to carry protocol information.
+        */
+       
+       if(type!=ETH_P_802_3) 
+               eth->h_proto = htons(type);
+       else
+               eth->h_proto = htons(len);
+
+       /*
+        *      Set the source hardware address. 
+        */
+        
+       if(saddr)
+               memcpy(eth->h_source,saddr,dev->addr_len);
+       else
+               memcpy(eth->h_source,dev->dev_addr,dev->addr_len);
+
+       /*
+        *      Anyway, the loopback-device should never use this function... 
+        */
+
+       if (dev->flags & (IFF_LOOPBACK|IFF_NOARP)) 
+       {
+               memset(eth->h_dest, 0, dev->addr_len);
+               return(dev->hard_header_len);
+       }
+       
+       if(daddr)
+       {
+               memcpy(eth->h_dest,daddr,dev->addr_len);
+               return dev->hard_header_len;
+       }
+       
+       return -dev->hard_header_len;
+}
+
+
+/*
+ *     Rebuild the Ethernet MAC header. This is called after an ARP
+ *     (or in future other address resolution) has completed on this
+ *     sk_buff. We now let ARP fill in the other fields.
+ *
+ *     This routine CANNOT use cached dst->neigh!
+ *     Really, it is used only when dst->neigh is wrong.
+ */
+
+int eth_rebuild_header(struct sk_buff *skb)
+{
+       struct ethhdr *eth = (struct ethhdr *)skb->data;
+       struct net_device *dev = skb->dev;
+
+       switch (eth->h_proto)
+       {
+#ifdef CONFIG_INET
+       case __constant_htons(ETH_P_IP):
+               return arp_find(eth->h_dest, skb);
+#endif 
+       default:
+               printk(KERN_DEBUG
+                      "%s: unable to resolve type %X addresses.\n", 
+                      dev->name, (int)eth->h_proto);
+               
+               memcpy(eth->h_source, dev->dev_addr, dev->addr_len);
+               break;
+       }
+
+       return 0;
+}
+
+
+/*
+ *     Determine the packet's protocol ID. The rule here is that we 
+ *     assume 802.3 if the type field is short enough to be a length.
+ *     This is normal practice and works for any 'now in use' protocol.
+ */
+unsigned short eth_type_trans(struct sk_buff *skb, struct net_device *dev)
+{
+       struct ethhdr *eth;
+       unsigned char *rawp;
+       
+       skb->mac.raw=skb->data;
+       skb_pull(skb,dev->hard_header_len);
+       eth= skb->mac.ethernet;
+       
+       if(*eth->h_dest&1)
+       {
+               if(memcmp(eth->h_dest,dev->broadcast, ETH_ALEN)==0)
+                       skb->pkt_type=PACKET_BROADCAST;
+               else
+                       skb->pkt_type=PACKET_MULTICAST;
+       }
+       
+       /*
+        *      This ALLMULTI check should be redundant by 1.4
+        *      so don't forget to remove it.
+        *
+        *      Seems, you forgot to remove it. All silly devices
+        *      seems to set IFF_PROMISC.
+        */
+        
+       else if(1 /*dev->flags&IFF_PROMISC*/)
+       {
+               if(memcmp(eth->h_dest,dev->dev_addr, ETH_ALEN))
+                       skb->pkt_type=PACKET_OTHERHOST;
+       }
+       
+       if (ntohs(eth->h_proto) >= 1536)
+               return eth->h_proto;
+               
+       rawp = skb->data;
+       
+       /*
+        *      This is a magic hack to spot IPX packets. Older Novell breaks
+        *      the protocol design and runs IPX over 802.3 without an 802.2 LLC
+        *      layer. We look for FFFF which isn't a used 802.2 SSAP/DSAP. This
+        *      won't work for fault tolerant netware but does for the rest.
+        */
+       if (*(unsigned short *)rawp == 0xFFFF)
+               return htons(ETH_P_802_3);
+               
+       /*
+        *      Real 802.2 LLC
+        */
+       return htons(ETH_P_802_2);
+}
+
+int eth_header_parse(struct sk_buff *skb, unsigned char *haddr)
+{
+       struct ethhdr *eth = skb->mac.ethernet;
+       memcpy(haddr, eth->h_source, ETH_ALEN);
+       return ETH_ALEN;
+}
+
+int eth_header_cache(struct neighbour *neigh, struct hh_cache *hh)
+{
+#if 0
+       unsigned short type = hh->hh_type;
+       struct ethhdr *eth = (struct ethhdr*)(((u8*)hh->hh_data) + 2);
+       struct net_device *dev = neigh->dev;
+
+       if (type == __constant_htons(ETH_P_802_3))
+               return -1;
+
+       eth->h_proto = type;
+       memcpy(eth->h_source, dev->dev_addr, dev->addr_len);
+       memcpy(eth->h_dest, neigh->ha, dev->addr_len);
+       hh->hh_len = ETH_HLEN;
+#endif
+       return 0;
+}
+
+/*
+ * Called by Address Resolution module to notify changes in address.
+ */
+
+void eth_header_cache_update(struct hh_cache *hh, struct net_device *dev, unsigned char * haddr)
+{
+       memcpy(((u8*)hh->hh_data) + 2, haddr, dev->addr_len);
+}
diff --git a/xen-2.4.16/net/sch_generic.c b/xen-2.4.16/net/sch_generic.c
new file mode 100644 (file)
index 0000000..135142f
--- /dev/null
@@ -0,0 +1,525 @@
+/*
+ * net/sched/sch_generic.c     Generic packet scheduler routines.
+ *
+ *             This program is free software; you can redistribute it and/or
+ *             modify it under the terms of the GNU General Public License
+ *             as published by the Free Software Foundation; either version
+ *             2 of the License, or (at your option) any later version.
+ *
+ * Authors:    Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ *              Jamal Hadi Salim, <hadi@nortelnetworks.com> 990601
+ *              - Ingress support
+ */
+
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <asm/bitops.h>
+#include <linux/config.h>
+#include <linux/types.h>
+//#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/lib.h>
+#include <linux/mm.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+//#include <linux/in.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+//#include <linux/rtnetlink.h>
+#include <linux/init.h>
+//#include <net/sock.h>
+#include <linux/pkt_sched.h>
+
+extern int net_ratelimit(void);
+#define BUG_TRAP ASSERT
+
+/* Main transmission queue. */
+
+/* Main qdisc structure lock. 
+
+   However, modifications
+   to data, participating in scheduling must be additionally
+   protected with dev->queue_lock spinlock.
+
+   The idea is the following:
+   - enqueue, dequeue are serialized via top level device
+     spinlock dev->queue_lock.
+   - tree walking is protected by read_lock(qdisc_tree_lock)
+     and this lock is used only in process context.
+   - updates to tree are made only under rtnl semaphore,
+     hence this lock may be made without local bh disabling.
+
+   qdisc_tree_lock must be grabbed BEFORE dev->queue_lock!
+ */
+rwlock_t qdisc_tree_lock = RW_LOCK_UNLOCKED;
+
+/* 
+   dev->queue_lock serializes queue accesses for this device
+   AND dev->qdisc pointer itself.
+
+   dev->xmit_lock serializes accesses to device driver.
+
+   dev->queue_lock and dev->xmit_lock are mutually exclusive,
+   if one is grabbed, another must be free.
+ */
+
+
+/* Kick device.
+   Note, that this procedure can be called by a watchdog timer, so that
+   we do not check dev->tbusy flag here.
+
+   Returns:  0  - queue is empty.
+            >0  - queue is not empty, but throttled.
+           <0  - queue is not empty. Device is throttled, if dev->tbusy != 0.
+
+   NOTE: Called under dev->queue_lock with locally disabled BH.
+*/
+
+int qdisc_restart(struct net_device *dev)
+{
+       struct Qdisc *q = dev->qdisc;
+       struct sk_buff *skb;
+
+       /* Dequeue packet */
+       if ((skb = q->dequeue(q)) != NULL) {
+               if (spin_trylock(&dev->xmit_lock)) {
+                       /* Remember that the driver is grabbed by us. */
+                       dev->xmit_lock_owner = smp_processor_id();
+
+                       /* And release queue */
+                       spin_unlock(&dev->queue_lock);
+
+                       if (!netif_queue_stopped(dev)) {
+#if 0
+                               if (netdev_nit)
+                                       dev_queue_xmit_nit(skb, dev);
+#endif
+
+                               if (dev->hard_start_xmit(skb, dev) == 0) {
+                                       dev->xmit_lock_owner = -1;
+                                       spin_unlock(&dev->xmit_lock);
+
+                                       spin_lock(&dev->queue_lock);
+                                       return -1;
+                               }
+                       }
+
+                       /* Release the driver */
+                       dev->xmit_lock_owner = -1;
+                       spin_unlock(&dev->xmit_lock);
+                       spin_lock(&dev->queue_lock);
+                       q = dev->qdisc;
+               } else {
+                       /* So, someone grabbed the driver. */
+
+                       /* It may be transient configuration error,
+                          when hard_start_xmit() recurses. We detect
+                          it by checking xmit owner and drop the
+                          packet when deadloop is detected.
+                        */
+                       if (dev->xmit_lock_owner == smp_processor_id()) {
+                               kfree_skb(skb);
+                               if (net_ratelimit())
+                                       printk(KERN_DEBUG "Dead loop on netdevice %s, fix it urgently!\n", dev->name);
+                               return -1;
+                       }
+                       netdev_rx_stat[smp_processor_id()].cpu_collision++;
+               }
+
+               /* Device kicked us out :(
+                  This is possible in three cases:
+
+                  0. driver is locked
+                  1. fastroute is enabled
+                  2. device cannot determine busy state
+                     before start of transmission (f.e. dialout)
+                  3. device is buggy (ppp)
+                */
+
+               q->ops->requeue(skb, q);
+               netif_schedule(dev);
+               return 1;
+       }
+       return q->q.qlen;
+}
+
+static void dev_watchdog(unsigned long arg)
+{
+       struct net_device *dev = (struct net_device *)arg;
+
+       spin_lock(&dev->xmit_lock);
+       if (dev->qdisc != &noop_qdisc) {
+               if (netif_device_present(dev) &&
+                   netif_running(dev) &&
+                   netif_carrier_ok(dev)) {
+                       if (netif_queue_stopped(dev) &&
+                           (jiffies - dev->trans_start) > dev->watchdog_timeo) {
+                               printk(KERN_INFO "NETDEV WATCHDOG: %s: transmit timed out\n", dev->name);
+                               dev->tx_timeout(dev);
+                       }
+                       if (!mod_timer(&dev->watchdog_timer, jiffies + dev->watchdog_timeo))
+                               dev_hold(dev);
+               }
+       }
+       spin_unlock(&dev->xmit_lock);
+
+       dev_put(dev);
+}
+
+static void dev_watchdog_init(struct net_device *dev)
+{
+       init_timer(&dev->watchdog_timer);
+       dev->watchdog_timer.data = (unsigned long)dev;
+       dev->watchdog_timer.function = dev_watchdog;
+}
+
+void __netdev_watchdog_up(struct net_device *dev)
+{
+       if (dev->tx_timeout) {
+               if (dev->watchdog_timeo <= 0)
+                       dev->watchdog_timeo = 5*HZ;
+               if (!mod_timer(&dev->watchdog_timer, jiffies + dev->watchdog_timeo))
+                       dev_hold(dev);
+       }
+}
+
+static void dev_watchdog_up(struct net_device *dev)
+{
+       spin_lock_bh(&dev->xmit_lock);
+       __netdev_watchdog_up(dev);
+       spin_unlock_bh(&dev->xmit_lock);
+}
+
+static void dev_watchdog_down(struct net_device *dev)
+{
+       spin_lock_bh(&dev->xmit_lock);
+       if (del_timer(&dev->watchdog_timer))
+               __dev_put(dev);
+       spin_unlock_bh(&dev->xmit_lock);
+}
+
+/* "NOOP" scheduler: the best scheduler, recommended for all interfaces
+   under all circumstances. It is difficult to invent anything faster or
+   cheaper.
+ */
+
+static int
+noop_enqueue(struct sk_buff *skb, struct Qdisc * qdisc)
+{
+       kfree_skb(skb);
+       return NET_XMIT_CN;
+}
+
+static struct sk_buff *
+noop_dequeue(struct Qdisc * qdisc)
+{
+       return NULL;
+}
+
+static int
+noop_requeue(struct sk_buff *skb, struct Qdisc* qdisc)
+{
+       if (net_ratelimit())
+               printk(KERN_DEBUG "%s deferred output. It is buggy.\n", skb->dev->name);
+       kfree_skb(skb);
+       return NET_XMIT_CN;
+}
+
+struct Qdisc_ops noop_qdisc_ops =
+{
+       NULL,
+       NULL,
+       "noop",
+       0,
+
+       noop_enqueue,
+       noop_dequeue,
+       noop_requeue,
+};
+
+struct Qdisc noop_qdisc =
+{
+       noop_enqueue,
+       noop_dequeue,
+       TCQ_F_BUILTIN,
+       &noop_qdisc_ops,        
+};
+
+
+struct Qdisc_ops noqueue_qdisc_ops =
+{
+       NULL,
+       NULL,
+       "noqueue",
+       0,
+
+       noop_enqueue,
+       noop_dequeue,
+       noop_requeue,
+
+};
+
+struct Qdisc noqueue_qdisc =
+{
+       NULL,
+       noop_dequeue,
+       TCQ_F_BUILTIN,
+       &noqueue_qdisc_ops,
+};
+
+
+static const u8 prio2band[TC_PRIO_MAX+1] =
+{ 1, 2, 2, 2, 1, 2, 0, 0 , 1, 1, 1, 1, 1, 1, 1, 1 };
+
+/* 3-band FIFO queue: old style, but should be a bit faster than
+   generic prio+fifo combination.
+ */
+
+static int
+pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc* qdisc)
+{
+       struct sk_buff_head *list;
+
+       list = ((struct sk_buff_head*)qdisc->data) +
+               prio2band[skb->priority&TC_PRIO_MAX];
+
+       if (list->qlen <= skb->dev->tx_queue_len) {
+               __skb_queue_tail(list, skb);
+               qdisc->q.qlen++;
+               return 0;
+       }
+       //qdisc->stats.drops++;
+       kfree_skb(skb);
+       return NET_XMIT_DROP;
+}
+
+static struct sk_buff *
+pfifo_fast_dequeue(struct Qdisc* qdisc)
+{
+       int prio;
+       struct sk_buff_head *list = ((struct sk_buff_head*)qdisc->data);
+       struct sk_buff *skb;
+
+       for (prio = 0; prio < 3; prio++, list++) {
+               skb = __skb_dequeue(list);
+               if (skb) {
+                       qdisc->q.qlen--;
+                       return skb;
+               }
+       }
+       return NULL;
+}
+
+static int
+pfifo_fast_requeue(struct sk_buff *skb, struct Qdisc* qdisc)
+{
+       struct sk_buff_head *list;
+
+       list = ((struct sk_buff_head*)qdisc->data) +
+               prio2band[skb->priority&TC_PRIO_MAX];
+
+       __skb_queue_head(list, skb);
+       qdisc->q.qlen++;
+       return 0;
+}
+
+static void
+pfifo_fast_reset(struct Qdisc* qdisc)
+{
+       int prio;
+       struct sk_buff_head *list = ((struct sk_buff_head*)qdisc->data);
+
+       for (prio=0; prio < 3; prio++)
+               skb_queue_purge(list+prio);
+       qdisc->q.qlen = 0;
+}
+
+static int pfifo_fast_init(struct Qdisc *qdisc, struct rtattr *opt)
+{
+       int i;
+       struct sk_buff_head *list;
+
+       list = ((struct sk_buff_head*)qdisc->data);
+
+       for (i=0; i<3; i++)
+               skb_queue_head_init(list+i);
+
+       return 0;
+}
+
+static struct Qdisc_ops pfifo_fast_ops =
+{
+       NULL,
+       NULL,
+       "pfifo_fast",
+       3 * sizeof(struct sk_buff_head),
+
+       pfifo_fast_enqueue,
+       pfifo_fast_dequeue,
+       pfifo_fast_requeue,
+       NULL,
+
+       pfifo_fast_init,
+       pfifo_fast_reset,
+};
+
+struct Qdisc * qdisc_create_dflt(struct net_device *dev, struct Qdisc_ops *ops)
+{
+       struct Qdisc *sch;
+       int size = sizeof(*sch) + ops->priv_size;
+
+       sch = kmalloc(size, GFP_KERNEL);
+       if (!sch)
+               return NULL;
+       memset(sch, 0, size);
+
+       skb_queue_head_init(&sch->q);
+       sch->ops = ops;
+       sch->enqueue = ops->enqueue;
+       sch->dequeue = ops->dequeue;
+       sch->dev = dev;
+       //sch->stats.lock = &dev->queue_lock;
+       atomic_set(&sch->refcnt, 1);
+       if (!ops->init || ops->init(sch, NULL) == 0)
+               return sch;
+
+       kfree(sch);
+       return NULL;
+}
+
+/* Under dev->queue_lock and BH! */
+
+void qdisc_reset(struct Qdisc *qdisc)
+{
+       struct Qdisc_ops *ops = qdisc->ops;
+
+       if (ops->reset)
+               ops->reset(qdisc);
+}
+
+/* Under dev->queue_lock and BH! */
+
+void qdisc_destroy(struct Qdisc *qdisc)
+{
+       struct Qdisc_ops *ops = qdisc->ops;
+       struct net_device *dev;
+
+       if (!atomic_dec_and_test(&qdisc->refcnt))
+               return;
+
+       dev = qdisc->dev;
+
+#ifdef CONFIG_NET_SCHED
+       if (dev) {
+               struct Qdisc *q, **qp;
+               for (qp = &qdisc->dev->qdisc_list; (q=*qp) != NULL; qp = &q->next) {
+                       if (q == qdisc) {
+                               *qp = q->next;
+                               break;
+                       }
+               }
+       }
+#ifdef CONFIG_NET_ESTIMATOR
+       qdisc_kill_estimator(&qdisc->stats);
+#endif
+#endif
+       if (ops->reset)
+               ops->reset(qdisc);
+       if (ops->destroy)
+               ops->destroy(qdisc);
+       if (!(qdisc->flags&TCQ_F_BUILTIN))
+               kfree(qdisc);
+}
+
+
+void dev_activate(struct net_device *dev)
+{
+       /* No queueing discipline is attached to device;
+          create default one i.e. pfifo_fast for devices,
+          which need queueing and noqueue_qdisc for
+          virtual interfaces
+        */
+
+       if (dev->qdisc_sleeping == &noop_qdisc) {
+               struct Qdisc *qdisc;
+               if (dev->tx_queue_len) {
+                       qdisc = qdisc_create_dflt(dev, &pfifo_fast_ops);
+                       if (qdisc == NULL) {
+                               printk(KERN_INFO "%s: activation failed\n", dev->name);
+                               return;
+                       }
+               } else {
+                       qdisc =  &noqueue_qdisc;
+               }
+               write_lock(&qdisc_tree_lock);
+               dev->qdisc_sleeping = qdisc;
+               write_unlock(&qdisc_tree_lock);
+       }
+
+       spin_lock_bh(&dev->queue_lock);
+       if ((dev->qdisc = dev->qdisc_sleeping) != &noqueue_qdisc) {
+               dev->trans_start = jiffies;
+               dev_watchdog_up(dev);
+       }
+       spin_unlock_bh(&dev->queue_lock);
+}
+
+void dev_deactivate(struct net_device *dev)
+{
+       struct Qdisc *qdisc;
+
+       spin_lock_bh(&dev->queue_lock);
+       qdisc = dev->qdisc;
+       dev->qdisc = &noop_qdisc;
+
+       qdisc_reset(qdisc);
+
+       spin_unlock_bh(&dev->queue_lock);
+
+       dev_watchdog_down(dev);
+
+       while (test_bit(__LINK_STATE_SCHED, &dev->state)) {
+            current->policy |= SCHED_YIELD;
+            schedule();
+       }
+
+       spin_unlock_wait(&dev->xmit_lock);
+}
+
+void dev_init_scheduler(struct net_device *dev)
+{
+       write_lock(&qdisc_tree_lock);
+       spin_lock_bh(&dev->queue_lock);
+       dev->qdisc = &noop_qdisc;
+       spin_unlock_bh(&dev->queue_lock);
+       dev->qdisc_sleeping = &noop_qdisc;
+       dev->qdisc_list = NULL;
+       write_unlock(&qdisc_tree_lock);
+
+       dev_watchdog_init(dev);
+}
+
+void dev_shutdown(struct net_device *dev)
+{
+       struct Qdisc *qdisc;
+
+       write_lock(&qdisc_tree_lock);
+       spin_lock_bh(&dev->queue_lock);
+       qdisc = dev->qdisc_sleeping;
+       dev->qdisc = &noop_qdisc;
+       dev->qdisc_sleeping = &noop_qdisc;
+       qdisc_destroy(qdisc);
+#if defined(CONFIG_NET_SCH_INGRESS) || defined(CONFIG_NET_SCH_INGRESS_MODULE)
+        if ((qdisc = dev->qdisc_ingress) != NULL) {
+               dev->qdisc_ingress = NULL;
+               qdisc_destroy(qdisc);
+        }
+#endif
+       BUG_TRAP(dev->qdisc_list == NULL);
+       BUG_TRAP(!timer_pending(&dev->watchdog_timer));
+       dev->qdisc_list = NULL;
+       spin_unlock_bh(&dev->queue_lock);
+       write_unlock(&qdisc_tree_lock);
+}
diff --git a/xen-2.4.16/net/skbuff.c b/xen-2.4.16/net/skbuff.c
new file mode 100644 (file)
index 0000000..7ed619c
--- /dev/null
@@ -0,0 +1,1217 @@
+/*
+ *     Routines having to do with the 'struct sk_buff' memory handlers.
+ *
+ *     Authors:        Alan Cox <iiitac@pyr.swan.ac.uk>
+ *                     Florian La Roche <rzsfl@rz.uni-sb.de>
+ *
+ *     Version:        $Id: skbuff.c,v 1.89 2001/08/06 13:25:02 davem Exp $
+ *
+ *     Fixes:  
+ *             Alan Cox        :       Fixed the worst of the load balancer bugs.
+ *             Dave Platt      :       Interrupt stacking fix.
+ *     Richard Kooijman        :       Timestamp fixes.
+ *             Alan Cox        :       Changed buffer format.
+ *             Alan Cox        :       destructor hook for AF_UNIX etc.
+ *             Linus Torvalds  :       Better skb_clone.
+ *             Alan Cox        :       Added skb_copy.
+ *             Alan Cox        :       Added all the changed routines Linus
+ *                                     only put in the headers
+ *             Ray VanTassle   :       Fixed --skb->lock in free
+ *             Alan Cox        :       skb_copy copy arp field
+ *             Andi Kleen      :       slabified it.
+ *
+ *     NOTE:
+ *             The __skb_ routines should be called with interrupts 
+ *     disabled, or you better be *real* sure that the operation is atomic 
+ *     with respect to whatever list is being frobbed (e.g. via lock_sock()
+ *     or via disabling bottom half handlers, etc).
+ *
+ *     This program is free software; you can redistribute it and/or
+ *     modify it under the terms of the GNU General Public License
+ *     as published by the Free Software Foundation; either version
+ *     2 of the License, or (at your option) any later version.
+ */
+
+/*
+ *     The functions in this file will not compile correctly with gcc 2.4.x
+ */
+
+#include <linux/config.h>
+#include <linux/lib.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+//#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+//#include <linux/in.h>
+//#include <linux/inet.h>
+#include <linux/slab.h>
+#include <linux/netdevice.h>
+//#include <linux/string.h>
+#include <linux/skbuff.h>
+#include <linux/cache.h>
+#include <linux/init.h>
+//#include <linux/highmem.h>
+
+//#include <net/ip.h>
+//#include <net/protocol.h>
+//#include <net/dst.h>
+//#include <net/tcp.h>
+//#include <net/udp.h>
+//#include <net/sock.h>
+
+#include <asm/uaccess.h>
+#include <asm/system.h>
+
+#define BUG_TRAP ASSERT
+
+#define put_page(_p) ((void)0) /* XXXX KAF */
+#define get_page(_p) ((void)0)
+
+int sysctl_hot_list_len = 128;
+
+static kmem_cache_t *skbuff_head_cache;
+
+static union {
+       struct sk_buff_head     list;
+       char                    pad[SMP_CACHE_BYTES];
+} skb_head_pool[NR_CPUS];
+
+/*
+ *     Keep out-of-line to prevent kernel bloat.
+ *     __builtin_return_address is not used because it is not always
+ *     reliable. 
+ */
+
+/**
+ *     skb_over_panic  -       private function
+ *     @skb: buffer
+ *     @sz: size
+ *     @here: address
+ *
+ *     Out of line support code for skb_put(). Not user callable.
+ */
+void skb_over_panic(struct sk_buff *skb, int sz, void *here)
+{
+       printk("skput:over: %p:%d put:%d dev:%s", 
+               here, skb->len, sz, skb->dev ? skb->dev->name : "<NULL>");
+       BUG();
+}
+
+/**
+ *     skb_under_panic -       private function
+ *     @skb: buffer
+ *     @sz: size
+ *     @here: address
+ *
+ *     Out of line support code for skb_push(). Not user callable.
+ */
+
+void skb_under_panic(struct sk_buff *skb, int sz, void *here)
+{
+        printk("skput:under: %p:%d put:%d dev:%s",
+                here, skb->len, sz, skb->dev ? skb->dev->name : "<NULL>");
+       BUG();
+}
+
+static __inline__ struct sk_buff *skb_head_from_pool(void)
+{
+       struct sk_buff_head *list = &skb_head_pool[smp_processor_id()].list;
+
+       if (skb_queue_len(list)) {
+               struct sk_buff *skb;
+               unsigned long flags;
+
+               local_irq_save(flags);
+               skb = __skb_dequeue(list);
+               local_irq_restore(flags);
+               return skb;
+       }
+       return NULL;
+}
+
+static __inline__ void skb_head_to_pool(struct sk_buff *skb)
+{
+       struct sk_buff_head *list = &skb_head_pool[smp_processor_id()].list;
+
+       if (skb_queue_len(list) < sysctl_hot_list_len) {
+               unsigned long flags;
+
+               local_irq_save(flags);
+               __skb_queue_head(list, skb);
+               local_irq_restore(flags);
+
+               return;
+       }
+       kmem_cache_free(skbuff_head_cache, skb);
+}
+
+
+/*     Allocate a new skbuff. We do this ourselves so we can fill in a few
+ *     'private' fields and also do memory statistics to find all the
+ *     [BEEP] leaks.
+ * 
+ */
+
+/**
+ *     alloc_skb       -       allocate a network buffer
+ *     @size: size to allocate
+ *     @gfp_mask: allocation mask
+ *
+ *     Allocate a new &sk_buff. The returned buffer has no headroom and a
+ *     tail room of size bytes. The object has a reference count of one.
+ *     The return is the buffer. On a failure the return is %NULL.
+ *
+ *     Buffers may only be allocated from interrupts using a @gfp_mask of
+ *     %GFP_ATOMIC.
+ */
+struct sk_buff *alloc_skb(unsigned int size,int gfp_mask)
+{
+       struct sk_buff *skb;
+       u8 *data;
+
+       if (in_interrupt() && (gfp_mask & __GFP_WAIT)) {
+               static int count = 0;
+               if (++count < 5) {
+                       printk(KERN_ERR "alloc_skb called nonatomically "
+                              "from interrupt %p\n", NET_CALLER(size));
+                       BUG();
+               }
+               gfp_mask &= ~__GFP_WAIT;
+       }
+
+       /* Get the HEAD */
+       skb = skb_head_from_pool();
+       if (skb == NULL) {
+               skb = kmem_cache_alloc(skbuff_head_cache, gfp_mask & ~__GFP_DMA);
+               if (skb == NULL)
+                       goto nohead;
+       }
+
+       /* Get the DATA. Size must match skb_add_mtu(). */
+       size = SKB_DATA_ALIGN(size);
+       data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);
+       if (data == NULL)
+               goto nodata;
+
+       /* XXX: does not include slab overhead */ 
+       skb->truesize = size + sizeof(struct sk_buff);
+
+       /* Load the data pointers. */
+       skb->head = data;
+       skb->data = data;
+       skb->tail = data;
+       skb->end = data + size;
+
+       /* Set up other state */
+       skb->len = 0;
+       skb->cloned = 0;
+       skb->data_len = 0;
+
+       atomic_set(&skb->users, 1); 
+       atomic_set(&(skb_shinfo(skb)->dataref), 1);
+       skb_shinfo(skb)->nr_frags = 0;
+       skb_shinfo(skb)->frag_list = NULL;
+       return skb;
+
+nodata:
+       skb_head_to_pool(skb);
+nohead:
+       return NULL;
+}
+
+
+/*
+ *     Slab constructor for a skb head. 
+ */ 
+static inline void skb_headerinit(void *p, kmem_cache_t *cache, 
+                                 unsigned long flags)
+{
+       struct sk_buff *skb = p;
+
+       skb->next = NULL;
+       skb->prev = NULL;
+       skb->list = NULL;
+       skb->sk = NULL;
+       skb->stamp.tv_sec=0;    /* No idea about time */
+       skb->dev = NULL;
+//     skb->dst = NULL;
+       memset(skb->cb, 0, sizeof(skb->cb));
+       skb->pkt_type = PACKET_HOST;    /* Default type */
+       skb->ip_summed = 0;
+       skb->priority = 0;
+       skb->security = 0;      /* By default packets are insecure */
+       skb->destructor = NULL;
+
+#ifdef CONFIG_NETFILTER
+       skb->nfmark = skb->nfcache = 0;
+       skb->nfct = NULL;
+#ifdef CONFIG_NETFILTER_DEBUG
+       skb->nf_debug = 0;
+#endif
+#endif
+#ifdef CONFIG_NET_SCHED
+       skb->tc_index = 0;
+#endif
+}
+
+static void skb_drop_fraglist(struct sk_buff *skb)
+{
+       struct sk_buff *list = skb_shinfo(skb)->frag_list;
+
+       skb_shinfo(skb)->frag_list = NULL;
+
+       do {
+               struct sk_buff *this = list;
+               list = list->next;
+               kfree_skb(this);
+       } while (list);
+}
+
+static void skb_clone_fraglist(struct sk_buff *skb)
+{
+       struct sk_buff *list;
+
+       for (list = skb_shinfo(skb)->frag_list; list; list=list->next)
+               skb_get(list);
+}
+
+static void skb_release_data(struct sk_buff *skb)
+{
+       if (!skb->cloned ||
+           atomic_dec_and_test(&(skb_shinfo(skb)->dataref))) {
+               if (skb_shinfo(skb)->nr_frags) {
+                       int i;
+                       for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
+                               put_page(skb_shinfo(skb)->frags[i].page);
+               }
+
+               if (skb_shinfo(skb)->frag_list)
+                       skb_drop_fraglist(skb);
+
+               kfree(skb->head);
+       }
+}
+
+/*
+ *     Free an skbuff by memory without cleaning the state. 
+ */
+void kfree_skbmem(struct sk_buff *skb)
+{
+       skb_release_data(skb);
+       skb_head_to_pool(skb);
+}
+
+/**
+ *     __kfree_skb - private function 
+ *     @skb: buffer
+ *
+ *     Free an sk_buff. Release anything attached to the buffer. 
+ *     Clean the state. This is an internal helper function. Users should
+ *     always call kfree_skb
+ */
+
+void __kfree_skb(struct sk_buff *skb)
+{
+       if (skb->list) {
+               printk(KERN_WARNING "Warning: kfree_skb passed an skb still "
+                      "on a list (from %p).\n", NET_CALLER(skb));
+               BUG();
+       }
+
+//     dst_release(skb->dst);
+       if(skb->destructor) {
+               if (in_irq()) {
+                       printk(KERN_WARNING "Warning: kfree_skb on hard IRQ %p\n",
+                               NET_CALLER(skb));
+               }
+               skb->destructor(skb);
+       }
+#ifdef CONFIG_NETFILTER
+       nf_conntrack_put(skb->nfct);
+#endif
+       skb_headerinit(skb, NULL, 0);  /* clean state */
+       kfree_skbmem(skb);
+}
+
+/**
+ *     skb_clone       -       duplicate an sk_buff
+ *     @skb: buffer to clone
+ *     @gfp_mask: allocation priority
+ *
+ *     Duplicate an &sk_buff. The new one is not owned by a socket. Both
+ *     copies share the same packet data but not structure. The new
+ *     buffer has a reference count of 1. If the allocation fails the 
+ *     function returns %NULL otherwise the new buffer is returned.
+ *     
+ *     If this function is called from an interrupt gfp_mask() must be
+ *     %GFP_ATOMIC.
+ */
+
+struct sk_buff *skb_clone(struct sk_buff *skb, int gfp_mask)
+{
+       struct sk_buff *n;
+
+       n = skb_head_from_pool();
+       if (!n) {
+               n = kmem_cache_alloc(skbuff_head_cache, gfp_mask);
+               if (!n)
+                       return NULL;
+       }
+
+#define C(x) n->x = skb->x
+
+       n->next = n->prev = NULL;
+       n->list = NULL;
+       n->sk = NULL;
+       C(stamp);
+       C(dev);
+       C(h);
+       C(nh);
+       C(mac);
+//     C(dst);
+//     dst_clone(n->dst);
+       memcpy(n->cb, skb->cb, sizeof(skb->cb));
+       C(len);
+       C(data_len);
+       C(csum);
+       n->cloned = 1;
+       C(pkt_type);
+       C(ip_summed);
+       C(priority);
+       atomic_set(&n->users, 1);
+       C(protocol);
+       C(security);
+       C(truesize);
+       C(head);
+       C(data);
+       C(tail);
+       C(end);
+       n->destructor = NULL;
+#ifdef CONFIG_NETFILTER
+       C(nfmark);
+       C(nfcache);
+       C(nfct);
+#ifdef CONFIG_NETFILTER_DEBUG
+       C(nf_debug);
+#endif
+#endif /*CONFIG_NETFILTER*/
+#if defined(CONFIG_HIPPI)
+       C(private);
+#endif
+#ifdef CONFIG_NET_SCHED
+       C(tc_index);
+#endif
+
+       atomic_inc(&(skb_shinfo(skb)->dataref));
+       skb->cloned = 1;
+#ifdef CONFIG_NETFILTER
+       nf_conntrack_get(skb->nfct);
+#endif
+       return n;
+}
+
+static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
+{
+       /*
+        *      Shift between the two data areas in bytes
+        */
+       unsigned long offset = new->data - old->data;
+
+       new->list=NULL;
+       new->sk=NULL;
+       new->dev=old->dev;
+       new->priority=old->priority;
+       new->protocol=old->protocol;
+//     new->dst=dst_clone(old->dst);
+       new->h.raw=old->h.raw+offset;
+       new->nh.raw=old->nh.raw+offset;
+       new->mac.raw=old->mac.raw+offset;
+       memcpy(new->cb, old->cb, sizeof(old->cb));
+       atomic_set(&new->users, 1);
+       new->pkt_type=old->pkt_type;
+       new->stamp=old->stamp;
+       new->destructor = NULL;
+       new->security=old->security;
+#ifdef CONFIG_NETFILTER
+       new->nfmark=old->nfmark;
+       new->nfcache=old->nfcache;
+       new->nfct=old->nfct;
+       nf_conntrack_get(new->nfct);
+#ifdef CONFIG_NETFILTER_DEBUG
+       new->nf_debug=old->nf_debug;
+#endif
+#endif
+#ifdef CONFIG_NET_SCHED
+       new->tc_index = old->tc_index;
+#endif
+}
+
+/**
+ *     skb_copy        -       create private copy of an sk_buff
+ *     @skb: buffer to copy
+ *     @gfp_mask: allocation priority
+ *
+ *     Make a copy of both an &sk_buff and its data. This is used when the
+ *     caller wishes to modify the data and needs a private copy of the 
+ *     data to alter. Returns %NULL on failure or the pointer to the buffer
+ *     on success. The returned buffer has a reference count of 1.
+ *
+ *     As by-product this function converts non-linear &sk_buff to linear
+ *     one, so that &sk_buff becomes completely private and caller is allowed
+ *     to modify all the data of returned buffer. This means that this
+ *     function is not recommended for use in circumstances when only
+ *     header is going to be modified. Use pskb_copy() instead.
+ */
+struct sk_buff *skb_copy(const struct sk_buff *skb, int gfp_mask)
+{
+       struct sk_buff *n;
+       int headerlen = skb->data-skb->head;
+
+       /*
+        *      Allocate the copy buffer
+        */
+       n=alloc_skb(skb->end - skb->head + skb->data_len, gfp_mask);
+       if(n==NULL)
+               return NULL;
+
+       /* Set the data pointer */
+       skb_reserve(n,headerlen);
+       /* Set the tail pointer and length */
+       skb_put(n,skb->len);
+       n->csum = skb->csum;
+       n->ip_summed = skb->ip_summed;
+
+       if (skb_copy_bits(skb, -headerlen, n->head, headerlen+skb->len))
+               BUG();
+
+       copy_skb_header(n, skb);
+
+       return n;
+}
+
+/* Keep head the same: replace data */
+int skb_linearize(struct sk_buff *skb, int gfp_mask)
+{
+       unsigned int size;
+       u8 *data;
+       long offset;
+       int headerlen = skb->data - skb->head;
+       int expand = (skb->tail+skb->data_len) - skb->end;
+
+       if (skb_shared(skb))
+               BUG();
+
+       if (expand <= 0)
+               expand = 0;
+
+       size = (skb->end - skb->head + expand);
+       size = SKB_DATA_ALIGN(size);
+       data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);
+       if (data == NULL)
+               return -ENOMEM;
+
+       /* Copy entire thing */
+       if (skb_copy_bits(skb, -headerlen, data, headerlen+skb->len))
+               BUG();
+
+       /* Offset between the two in bytes */
+       offset = data - skb->head;
+
+       /* Free old data. */
+       skb_release_data(skb);
+
+       skb->head = data;
+       skb->end  = data + size;
+
+       /* Set up new pointers */
+       skb->h.raw += offset;
+       skb->nh.raw += offset;
+       skb->mac.raw += offset;
+       skb->tail += offset;
+       skb->data += offset;
+
+       /* Set up shinfo */
+       atomic_set(&(skb_shinfo(skb)->dataref), 1);
+       skb_shinfo(skb)->nr_frags = 0;
+       skb_shinfo(skb)->frag_list = NULL;
+
+       /* We are no longer a clone, even if we were. */
+       skb->cloned = 0;
+
+       skb->tail += skb->data_len;
+       skb->data_len = 0;
+       return 0;
+}
+
+
+/**
+ *     pskb_copy       -       create copy of an sk_buff with private head.
+ *     @skb: buffer to copy
+ *     @gfp_mask: allocation priority
+ *
+ *     Make a copy of both an &sk_buff and part of its data, located
+ *     in header. Fragmented data remain shared. This is used when
+ *     the caller wishes to modify only header of &sk_buff and needs
+ *     private copy of the header to alter. Returns %NULL on failure
+ *     or the pointer to the buffer on success.
+ *     The returned buffer has a reference count of 1.
+ */
+
+struct sk_buff *pskb_copy(struct sk_buff *skb, int gfp_mask)
+{
+       struct sk_buff *n;
+
+       /*
+        *      Allocate the copy buffer
+        */
+       n=alloc_skb(skb->end - skb->head, gfp_mask);
+       if(n==NULL)
+               return NULL;
+
+       /* Set the data pointer */
+       skb_reserve(n,skb->data-skb->head);
+       /* Set the tail pointer and length */
+       skb_put(n,skb_headlen(skb));
+       /* Copy the bytes */
+       memcpy(n->data, skb->data, n->len);
+       n->csum = skb->csum;
+       n->ip_summed = skb->ip_summed;
+
+       n->data_len = skb->data_len;
+       n->len = skb->len;
+
+       if (skb_shinfo(skb)->nr_frags) {
+               int i;
+
+               for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+                       skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i];
+                       get_page(skb_shinfo(n)->frags[i].page);
+               }
+               skb_shinfo(n)->nr_frags = i;
+       }
+
+       if (skb_shinfo(skb)->frag_list) {
+               skb_shinfo(n)->frag_list = skb_shinfo(skb)->frag_list;
+               skb_clone_fraglist(n);
+       }
+
+       copy_skb_header(n, skb);
+
+       return n;
+}
+
+/**
+ *     pskb_expand_head - reallocate header of &sk_buff
+ *     @skb: buffer to reallocate
+ *     @nhead: room to add at head
+ *     @ntail: room to add at tail
+ *     @gfp_mask: allocation priority
+ *
+ *     Expands (or creates identical copy, if &nhead and &ntail are zero)
+ *     header of skb. &sk_buff itself is not changed. &sk_buff MUST have
+ *     reference count of 1. Returns zero in the case of success or error,
+ *     if expansion failed. In the last case, &sk_buff is not changed.
+ *
+ *     All the pointers pointing into skb header may change and must be
+ *     reloaded after call to this function.
+ */
+
+int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, int gfp_mask)
+{
+       int i;
+       u8 *data;
+       int size = nhead + (skb->end - skb->head) + ntail;
+       long off;
+
+       if (skb_shared(skb))
+               BUG();
+
+       size = SKB_DATA_ALIGN(size);
+
+       data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);
+       if (data == NULL)
+               goto nodata;
+
+       /* Copy only real data... and, alas, header. This should be
+        * optimized for the cases when header is void. */
+       memcpy(data+nhead, skb->head, skb->tail-skb->head);
+       memcpy(data+size, skb->end, sizeof(struct skb_shared_info));
+
+       for (i=0; i<skb_shinfo(skb)->nr_frags; i++)
+               get_page(skb_shinfo(skb)->frags[i].page);
+
+       if (skb_shinfo(skb)->frag_list)
+               skb_clone_fraglist(skb);
+
+       skb_release_data(skb);
+
+       off = (data+nhead) - skb->head;
+
+       skb->head = data;
+       skb->end  = data+size;
+
+       skb->data += off;
+       skb->tail += off;
+       skb->mac.raw += off;
+       skb->h.raw += off;
+       skb->nh.raw += off;
+       skb->cloned = 0;
+       atomic_set(&skb_shinfo(skb)->dataref, 1);
+       return 0;
+
+nodata:
+       return -ENOMEM;
+}
+
+/* Make private copy of skb with writable head and some headroom */
+
+struct sk_buff *
+skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom)
+{
+       struct sk_buff *skb2;
+       int delta = headroom - skb_headroom(skb);
+
+       if (delta <= 0)
+               return pskb_copy(skb, GFP_ATOMIC);
+
+       skb2 = skb_clone(skb, GFP_ATOMIC);
+       if (skb2 == NULL ||
+           !pskb_expand_head(skb2, SKB_DATA_ALIGN(delta), 0, GFP_ATOMIC))
+               return skb2;
+
+       kfree_skb(skb2);
+       return NULL;
+}
+
+
+/**
+ *     skb_copy_expand -       copy and expand sk_buff
+ *     @skb: buffer to copy
+ *     @newheadroom: new free bytes at head
+ *     @newtailroom: new free bytes at tail
+ *     @gfp_mask: allocation priority
+ *
+ *     Make a copy of both an &sk_buff and its data and while doing so 
+ *     allocate additional space.
+ *
+ *     This is used when the caller wishes to modify the data and needs a 
+ *     private copy of the data to alter as well as more space for new fields.
+ *     Returns %NULL on failure or the pointer to the buffer
+ *     on success. The returned buffer has a reference count of 1.
+ *
+ *     You must pass %GFP_ATOMIC as the allocation priority if this function
+ *     is called from an interrupt.
+ */
+
+struct sk_buff *skb_copy_expand(const struct sk_buff *skb,
+                               int newheadroom,
+                               int newtailroom,
+                               int gfp_mask)
+{
+       struct sk_buff *n;
+
+       /*
+        *      Allocate the copy buffer
+        */
+        
+       n=alloc_skb(newheadroom + skb->len + newtailroom,
+                   gfp_mask);
+       if(n==NULL)
+               return NULL;
+
+       skb_reserve(n,newheadroom);
+
+       /* Set the tail pointer and length */
+       skb_put(n,skb->len);
+
+       /* Copy the data only. */
+       if (skb_copy_bits(skb, 0, n->data, skb->len))
+               BUG();
+
+       copy_skb_header(n, skb);
+       return n;
+}
+
+/* Trims skb to length len. It can change skb pointers, if "realloc" is 1.
+ * If realloc==0 and trimming is impossible without change of data,
+ * it is BUG().
+ */
+
+int ___pskb_trim(struct sk_buff *skb, unsigned int len, int realloc)
+{
+       int offset = skb_headlen(skb);
+       int nfrags = skb_shinfo(skb)->nr_frags;
+       int i;
+
+       for (i=0; i<nfrags; i++) {
+               int end = offset + skb_shinfo(skb)->frags[i].size;
+               if (end > len) {
+                       if (skb_cloned(skb)) {
+                               if (!realloc)
+                                       BUG();
+                               if (!pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
+                                       return -ENOMEM;
+                       }
+                       if (len <= offset) {
+                               put_page(skb_shinfo(skb)->frags[i].page);
+                               skb_shinfo(skb)->nr_frags--;
+                       } else {
+                               skb_shinfo(skb)->frags[i].size = len-offset;
+                       }
+               }
+               offset = end;
+       }
+
+       if (offset < len) {
+               skb->data_len -= skb->len - len;
+               skb->len = len;
+       } else {
+               if (len <= skb_headlen(skb)) {
+                       skb->len = len;
+                       skb->data_len = 0;
+                       skb->tail = skb->data + len;
+                       if (skb_shinfo(skb)->frag_list && !skb_cloned(skb))
+                               skb_drop_fraglist(skb);
+               } else {
+                       skb->data_len -= skb->len - len;
+                       skb->len = len;
+               }
+       }
+
+       return 0;
+}
+
+/**
+ *     __pskb_pull_tail - advance tail of skb header 
+ *     @skb: buffer to reallocate
+ *     @delta: number of bytes to advance tail
+ *
+ *     The function makes a sense only on a fragmented &sk_buff,
+ *     it expands header moving its tail forward and copying necessary
+ *     data from fragmented part.
+ *
+ *     &sk_buff MUST have reference count of 1.
+ *
+ *     Returns %NULL (and &sk_buff does not change) if pull failed
+ *     or value of new tail of skb in the case of success.
+ *
+ *     All the pointers pointing into skb header may change and must be
+ *     reloaded after call to this function.
+ */
+
+/* Moves tail of skb head forward, copying data from fragmented part,
+ * when it is necessary.
+ * 1. It may fail due to malloc failure.
+ * 2. It may change skb pointers.
+ *
+ * It is pretty complicated. Luckily, it is called only in exceptional cases.
+ */
+unsigned char * __pskb_pull_tail(struct sk_buff *skb, int delta)
+{
+       int i, k, eat;
+
+       /* If skb has not enough free space at tail, get new one
+        * plus 128 bytes for future expansions. If we have enough
+        * room at tail, reallocate without expansion only if skb is cloned.
+        */
+       eat = (skb->tail+delta) - skb->end;
+
+       if (eat > 0 || skb_cloned(skb)) {
+               if (pskb_expand_head(skb, 0, eat>0 ? eat+128 : 0, GFP_ATOMIC))
+                       return NULL;
+       }
+
+       if (skb_copy_bits(skb, skb_headlen(skb), skb->tail, delta))
+               BUG();
+
+       /* Optimization: no fragments, no reasons to preestimate
+        * size of pulled pages. Superb.
+        */
+       if (skb_shinfo(skb)->frag_list == NULL)
+               goto pull_pages;
+
+       /* Estimate size of pulled pages. */
+       eat = delta;
+       for (i=0; i<skb_shinfo(skb)->nr_frags; i++) {
+               if (skb_shinfo(skb)->frags[i].size >= eat)
+                       goto pull_pages;
+               eat -= skb_shinfo(skb)->frags[i].size;
+       }
+
+       /* If we need update frag list, we are in troubles.
+        * Certainly, it possible to add an offset to skb data,
+        * but taking into account that pulling is expected to
+        * be very rare operation, it is worth to fight against
+        * further bloating skb head and crucify ourselves here instead.
+        * Pure masohism, indeed. 8)8)
+        */
+       if (eat) {
+               struct sk_buff *list = skb_shinfo(skb)->frag_list;
+               struct sk_buff *clone = NULL;
+               struct sk_buff *insp = NULL;
+
+               do {
+                       if (list == NULL)
+                               BUG();
+
+                       if (list->len <= eat) {
+                               /* Eaten as whole. */
+                               eat -= list->len;
+                               list = list->next;
+                               insp = list;
+                       } else {
+                               /* Eaten partially. */
+
+                               if (skb_shared(list)) {
+                                       /* Sucks! We need to fork list. :-( */
+                                       clone = skb_clone(list, GFP_ATOMIC);
+                                       if (clone == NULL)
+                                               return NULL;
+                                       insp = list->next;
+                                       list = clone;
+                               } else {
+                                       /* This may be pulled without
+                                        * problems. */
+                                       insp = list;
+                               }
+                               if (pskb_pull(list, eat) == NULL) {
+                                       if (clone)
+                                               kfree_skb(clone);
+                                       return NULL;
+                               }
+                               break;
+                       }
+               } while (eat);
+
+               /* Free pulled out fragments. */
+               while ((list = skb_shinfo(skb)->frag_list) != insp) {
+                       skb_shinfo(skb)->frag_list = list->next;
+                       kfree_skb(list);
+               }
+               /* And insert new clone at head. */
+               if (clone) {
+                       clone->next = list;
+                       skb_shinfo(skb)->frag_list = clone;
+               }
+       }
+       /* Success! Now we may commit changes to skb data. */
+
+pull_pages:
+       eat = delta;
+       k = 0;
+       for (i=0; i<skb_shinfo(skb)->nr_frags; i++) {
+               if (skb_shinfo(skb)->frags[i].size <= eat) {
+                       put_page(skb_shinfo(skb)->frags[i].page);
+                       eat -= skb_shinfo(skb)->frags[i].size;
+               } else {
+                       skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
+                       if (eat) {
+                               skb_shinfo(skb)->frags[k].page_offset += eat;
+                               skb_shinfo(skb)->frags[k].size -= eat;
+                               eat = 0;
+                       }
+                       k++;
+               }
+       }
+       skb_shinfo(skb)->nr_frags = k;
+
+       skb->tail += delta;
+       skb->data_len -= delta;
+
+       return skb->tail;
+}
+
+/* Copy some data bits from skb to kernel buffer. */
+
+int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len)
+{
+       int i, copy;
+       int start = skb->len - skb->data_len;
+
+       if (offset > (int)skb->len-len)
+               goto fault;
+
+       /* Copy header. */
+       if ((copy = start-offset) > 0) {
+               if (copy > len)
+                       copy = len;
+               memcpy(to, skb->data + offset, copy);
+               if ((len -= copy) == 0)
+                       return 0;
+               offset += copy;
+               to += copy;
+       }
+
+       for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+               int end;
+
+               BUG_TRAP(start <= offset+len);
+
+               end = start + skb_shinfo(skb)->frags[i].size;
+               if ((copy = end-offset) > 0) {
+                       u8 *vaddr;
+
+                       if (copy > len)
+                               copy = len;
+
+                       vaddr = kmap_skb_frag(&skb_shinfo(skb)->frags[i]);
+                       memcpy(to, vaddr+skb_shinfo(skb)->frags[i].page_offset+
+                              offset-start, copy);
+                       kunmap_skb_frag(vaddr);
+
+                       if ((len -= copy) == 0)
+                               return 0;
+                       offset += copy;
+                       to += copy;
+               }
+               start = end;
+       }
+
+       if (skb_shinfo(skb)->frag_list) {
+               struct sk_buff *list;
+
+               for (list = skb_shinfo(skb)->frag_list; list; list=list->next) {
+                       int end;
+
+                       BUG_TRAP(start <= offset+len);
+
+                       end = start + list->len;
+                       if ((copy = end-offset) > 0) {
+                               if (copy > len)
+                                       copy = len;
+                               if (skb_copy_bits(list, offset-start, to, copy))
+                                       goto fault;
+                               if ((len -= copy) == 0)
+                                       return 0;
+                               offset += copy;
+                               to += copy;
+                       }
+                       start = end;
+               }
+       }
+       if (len == 0)
+               return 0;
+
+fault:
+       return -EFAULT;
+}
+
+/* Checksum skb data. */
+
+#if 0
+
+unsigned int skb_checksum(const struct sk_buff *skb, int offset, int len, unsigned int csum)
+{
+       int i, copy;
+       int start = skb->len - skb->data_len;
+       int pos = 0;
+
+       /* Checksum header. */
+       if ((copy = start-offset) > 0) {
+               if (copy > len)
+                       copy = len;
+               csum = csum_partial(skb->data+offset, copy, csum);
+               if ((len -= copy) == 0)
+                       return csum;
+               offset += copy;
+               pos = copy;
+       }
+
+       for (i=0; i<skb_shinfo(skb)->nr_frags; i++) {
+               int end;
+
+               BUG_TRAP(start <= offset+len);
+
+               end = start + skb_shinfo(skb)->frags[i].size;
+               if ((copy = end-offset) > 0) {
+                       unsigned int csum2;
+                       u8 *vaddr;
+                       skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+
+                       if (copy > len)
+                               copy = len;
+                       vaddr = kmap_skb_frag(frag);
+                       csum2 = csum_partial(vaddr + frag->page_offset +
+                                            offset-start, copy, 0);
+                       kunmap_skb_frag(vaddr);
+                       csum = csum_block_add(csum, csum2, pos);
+                       if (!(len -= copy))
+                               return csum;
+                       offset += copy;
+                       pos += copy;
+               }
+               start = end;
+       }
+
+       if (skb_shinfo(skb)->frag_list) {
+               struct sk_buff *list;
+
+               for (list = skb_shinfo(skb)->frag_list; list; list=list->next) {
+                       int end;
+
+                       BUG_TRAP(start <= offset+len);
+
+                       end = start + list->len;
+                       if ((copy = end-offset) > 0) {
+                               unsigned int csum2;
+                               if (copy > len)
+                                       copy = len;
+                               csum2 = skb_checksum(list, offset-start, copy, 0);
+                               csum = csum_block_add(csum, csum2, pos);
+                               if ((len -= copy) == 0)
+                                       return csum;
+                               offset += copy;
+                               pos += copy;
+                       }
+                       start = end;
+               }
+       }
+       if (len == 0)
+               return csum;
+
+       BUG();
+       return csum;
+}
+
+/* Both of above in one bottle. */
+
+unsigned int skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, u8 *to, int len, unsigned int csum)
+{
+       int i, copy;
+       int start = skb->len - skb->data_len;
+       int pos = 0;
+
+       /* Copy header. */
+       if ((copy = start-offset) > 0) {
+               if (copy > len)
+                       copy = len;
+               csum = csum_partial_copy_nocheck(skb->data+offset, to, copy, csum);
+               if ((len -= copy) == 0)
+                       return csum;
+               offset += copy;
+               to += copy;
+               pos = copy;
+       }
+
+       for (i=0; i<skb_shinfo(skb)->nr_frags; i++) {
+               int end;
+
+               BUG_TRAP(start <= offset+len);
+
+               end = start + skb_shinfo(skb)->frags[i].size;
+               if ((copy = end-offset) > 0) {
+                       unsigned int csum2;
+                       u8 *vaddr;
+                       skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+
+                       if (copy > len)
+                               copy = len;
+                       vaddr = kmap_skb_frag(frag);
+                       csum2 = csum_partial_copy_nocheck(vaddr + frag->page_offset +
+                                                     offset-start, to, copy, 0);
+                       kunmap_skb_frag(vaddr);
+                       csum = csum_block_add(csum, csum2, pos);
+                       if (!(len -= copy))
+                               return csum;
+                       offset += copy;
+                       to += copy;
+                       pos += copy;
+               }
+               start = end;
+       }
+
+       if (skb_shinfo(skb)->frag_list) {
+               struct sk_buff *list;
+
+               for (list = skb_shinfo(skb)->frag_list; list; list=list->next) {
+                       unsigned int csum2;
+                       int end;
+
+                       BUG_TRAP(start <= offset+len);
+
+                       end = start + list->len;
+                       if ((copy = end-offset) > 0) {
+                               if (copy > len)
+                                       copy = len;
+                               csum2 = skb_copy_and_csum_bits(list, offset-start, to, copy, 0);
+                               csum = csum_block_add(csum, csum2, pos);
+                               if ((len -= copy) == 0)
+                                       return csum;
+                               offset += copy;
+                               to += copy;
+                               pos += copy;
+                       }
+                       start = end;
+               }
+       }
+       if (len == 0)
+               return csum;
+
+       BUG();
+       return csum;
+}
+
+void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to)
+{
+       unsigned int csum;
+       long csstart;
+
+       if (skb->ip_summed == CHECKSUM_HW)
+               csstart = skb->h.raw - skb->data;
+       else
+               csstart = skb->len - skb->data_len;
+
+       if (csstart > skb->len - skb->data_len)
+               BUG();
+
+       memcpy(to, skb->data, csstart);
+
+       csum = 0;
+       if (csstart != skb->len)
+               csum = skb_copy_and_csum_bits(skb, csstart, to+csstart,
+                               skb->len-csstart, 0);
+
+       if (skb->ip_summed == CHECKSUM_HW) {
+               long csstuff = csstart + skb->csum;
+
+               *((unsigned short *)(to + csstuff)) = csum_fold(csum);
+       }
+}
+
+#endif
+
+#if 0
+/* 
+ *     Tune the memory allocator for a new MTU size.
+ */
+void skb_add_mtu(int mtu)
+{
+       /* Must match allocation in alloc_skb */
+       mtu = SKB_DATA_ALIGN(mtu) + sizeof(struct skb_shared_info);
+
+       kmem_add_cache_size(mtu);
+}
+#endif
+
+void __init skb_init(void)
+{
+       int i;
+
+       skbuff_head_cache = kmem_cache_create("skbuff_head_cache",
+                                             sizeof(struct sk_buff),
+                                             0,
+                                             SLAB_HWCACHE_ALIGN,
+                                             skb_headerinit, NULL);
+       if (!skbuff_head_cache)
+               panic("cannot create skbuff cache");
+
+       for (i=0; i<NR_CPUS; i++)
+               skb_queue_head_init(&skb_head_pool[i].list);
+}
diff --git a/xen-2.4.16/net/utils.c b/xen-2.4.16/net/utils.c
new file mode 100644 (file)
index 0000000..4cf312a
--- /dev/null
@@ -0,0 +1,75 @@
+/*
+ *     Generic address resultion entity
+ *
+ *     Authors:
+ *     net_random Alan Cox
+ *     net_ratelimit Andy Kleen
+ *
+ *     Created by Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
+ *
+ *     This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/config.h>
+#include <linux/lib.h>
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <linux/types.h>
+//#include <linux/kernel.h>
+#include <linux/sched.h>
+//#include <linux/string.h>
+#include <linux/mm.h>
+
+static unsigned long net_rand_seed = 152L;
+
+unsigned long net_random(void)
+{
+       net_rand_seed=net_rand_seed*69069L+1;
+        return net_rand_seed^jiffies;
+}
+
+void net_srandom(unsigned long entropy)
+{
+       net_rand_seed ^= entropy;
+       net_random();
+}
+
+int net_msg_cost = 5*HZ;
+int net_msg_burst = 10*5*HZ;
+
+/* 
+ * This enforces a rate limit: not more than one kernel message
+ * every 5secs to make a denial-of-service attack impossible.
+ *
+ * All warning printk()s should be guarded by this function. 
+ */ 
+int net_ratelimit(void)
+{
+       static spinlock_t ratelimit_lock = SPIN_LOCK_UNLOCKED;
+       static unsigned long toks = 10*5*HZ;
+       static unsigned long last_msg; 
+       static int missed;
+       unsigned long flags;
+       unsigned long now = jiffies;
+
+       spin_lock_irqsave(&ratelimit_lock, flags);
+       toks += now - last_msg;
+       last_msg = now;
+       if (toks > net_msg_burst)
+               toks = net_msg_burst;
+       if (toks >= net_msg_cost) {
+               int lost = missed;
+               missed = 0;
+               toks -= net_msg_cost;
+               spin_unlock_irqrestore(&ratelimit_lock, flags);
+               if (lost)
+                       printk(KERN_WARNING "NET: %d messages suppressed.\n", lost);
+               return 1;
+       }
+       missed++;
+       spin_unlock_irqrestore(&ratelimit_lock, flags);
+       return 0;
+}
diff --git a/xen-2.4.16/tools/Makefile b/xen-2.4.16/tools/Makefile
new file mode 100644 (file)
index 0000000..ccf535a
--- /dev/null
@@ -0,0 +1,6 @@
+
+elf-reloc: elf-reloc.c
+       gcc -O2 -Wall -o $@ $<
+
+clean:
+       rm -f elf-reloc *~ core
diff --git a/xen-2.4.16/tools/elf-reloc.c b/xen-2.4.16/tools/elf-reloc.c
new file mode 100644 (file)
index 0000000..19a839e
--- /dev/null
@@ -0,0 +1,118 @@
+/******************************************************************************
+ * elf-reloc.c
+ * 
+ * Usage: elf-reloc <old base> <new base> <image>
+ * 
+ * Relocates <image> from <old base> address to <new base> address by
+ * frobbing the Elf headers. Segment contents are unmodified!
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+typedef unsigned long  Elf32_Addr;
+typedef unsigned short Elf32_Half;
+typedef unsigned long  Elf32_Off;
+typedef unsigned long  Elf32_Word;
+
+typedef struct {
+    unsigned char e_ident[16];
+    Elf32_Half    e_type;
+    Elf32_Half    e_machine;
+    Elf32_Word    e_version;
+    Elf32_Addr    e_entry;
+    Elf32_Off     e_phoff;
+    Elf32_Off     e_shoff;
+    Elf32_Word    e_flags;
+    Elf32_Half    e_ehsize;
+    Elf32_Half    e_phentsize;
+    Elf32_Half    e_phnum;
+    Elf32_Half    e_shentsize;
+    Elf32_Half    e_shnum;
+    Elf32_Half    e_shstrndx;
+} Elf32_Ehdr;
+
+typedef struct {
+    Elf32_Word    p_type;
+    Elf32_Off     p_offset;
+    Elf32_Addr    p_vaddr;
+    Elf32_Addr    p_paddr;
+    Elf32_Word    p_filesz;
+    Elf32_Word    p_memsz;
+    Elf32_Word    p_flags;
+    Elf32_Word    p_align;
+} Elf32_Phdr;
+
+#define offsetof(_f,_p) ((unsigned long)&(((_p *)0)->_f))
+
+
+/* Add @reloc_distance to address at offset @off in file @fp. */
+void reloc(FILE *fp, long off, unsigned long reloc_distance)
+{
+    unsigned long base;
+    fseek(fp, off, SEEK_SET);
+    fread(&base, sizeof(base), 1, fp);
+    base += reloc_distance;
+    fseek(fp, off, SEEK_SET);
+    fwrite(&base, sizeof(base), 1, fp);
+
+}
+
+
+int main(int argc, char **argv)
+{
+    unsigned long old_base, new_base, reloc_distance;
+    long virt_section, phys_section;
+    char *image_name;
+    FILE *fp;
+    Elf32_Off phoff;
+    Elf32_Half phnum, phentsz;
+    int i;
+
+    if ( argc != 4 )
+    {
+        fprintf(stderr, "Usage: elf-reloc <old base> <new base> <image>\n");
+        return(1);
+    }
+
+    old_base = strtoul(argv[1], NULL, 16);
+    new_base = strtoul(argv[2], NULL, 16);
+    image_name = argv[3];
+
+    printf("Relocating `%s' from 0x%08lX to 0x%08lX\n",
+           image_name, old_base, new_base);
+
+    fp = fopen(image_name, "rb+");
+    if ( !fp )
+    {
+        fprintf(stderr, "Failed to load image!\n");
+        return(1);
+    }
+
+    reloc_distance = new_base - old_base;
+
+    /* First frob the entry address. */
+    reloc(fp, offsetof(e_entry, Elf32_Ehdr), reloc_distance);
+
+    fseek(fp, offsetof(e_phoff, Elf32_Ehdr), SEEK_SET);
+    fread(&phoff, sizeof(phoff), 1, fp);
+    fseek(fp, offsetof(e_phnum, Elf32_Ehdr), SEEK_SET);
+    fread(&phnum, sizeof(phnum), 1, fp);
+    fseek(fp, offsetof(e_phentsize, Elf32_Ehdr), SEEK_SET);
+    fread(&phentsz, sizeof(phentsz), 1, fp);
+
+    virt_section = (long)phoff + offsetof(p_vaddr, Elf32_Phdr);
+    phys_section = (long)phoff + offsetof(p_paddr, Elf32_Phdr);
+    for ( i = 0; i < phnum; i++ )
+    {
+        reloc(fp, phys_section, reloc_distance);
+        reloc(fp, virt_section, reloc_distance);
+        phys_section += phentsz;
+        virt_section += phentsz;
+    }
+
+    fclose(fp);
+
+    return(0);
+}
diff --git a/xenolinux-2.4.16-sparse/Makefile b/xenolinux-2.4.16-sparse/Makefile
new file mode 100644 (file)
index 0000000..bf7463b
--- /dev/null
@@ -0,0 +1,580 @@
+VERSION = 2
+PATCHLEVEL = 4
+SUBLEVEL = 16
+EXTRAVERSION = -kdb-xeno
+
+KERNELRELEASE=$(VERSION).$(PATCHLEVEL).$(SUBLEVEL)$(EXTRAVERSION)
+
+HW_ARCH := $(shell uname -m | sed -e s/i.86/i386/ -e s/sun4u/sparc64/ -e s/arm.*/arm/ -e s/sa110/arm/)
+ifndef ARCH
+ARCH := $(HW_ARCH)
+endif
+KERNELPATH=kernel-$(shell echo $(KERNELRELEASE) | sed -e "s/-//")
+
+CONFIG_SHELL := $(shell if [ -x "$$BASH" ]; then echo $$BASH; \
+         else if [ -x /bin/bash ]; then echo /bin/bash; \
+         else echo sh; fi ; fi)
+TOPDIR := $(shell /bin/pwd)
+
+HPATH          = $(TOPDIR)/include
+FINDHPATH      = $(HPATH)/asm $(HPATH)/linux $(HPATH)/scsi $(HPATH)/net
+
+HOSTCC         = gcc
+HOSTCFLAGS     = -Wall -Wstrict-prototypes -O2 -fomit-frame-pointer
+
+CROSS_COMPILE  =
+
+#
+# Include the make variables (CC, etc...)
+#
+
+AS             = $(CROSS_COMPILE)as
+LD             = $(CROSS_COMPILE)ld
+CC             = $(CROSS_COMPILE)gcc
+CPP            = $(CC) -E
+AR             = $(CROSS_COMPILE)ar
+NM             = $(CROSS_COMPILE)nm
+STRIP          = $(CROSS_COMPILE)strip
+OBJCOPY                = $(CROSS_COMPILE)objcopy
+OBJDUMP                = $(CROSS_COMPILE)objdump
+MAKEFILES      = $(TOPDIR)/.config
+GENKSYMS       = /sbin/genksyms
+DEPMOD         = /sbin/depmod
+KALLSYMS       = /sbin/kallsyms
+MODFLAGS       = -DMODULE
+CFLAGS_KERNEL  =
+PERL           = perl
+AWK            = awk
+TMPPREFIX      =
+
+export VERSION PATCHLEVEL SUBLEVEL EXTRAVERSION KERNELRELEASE ARCH \
+       CONFIG_SHELL TOPDIR HPATH HOSTCC HOSTCFLAGS CROSS_COMPILE AS LD CC \
+       CPP AR NM STRIP OBJCOPY OBJDUMP MAKE MAKEFILES GENKSYMS MODFLAGS PERL AWK
+
+all:   do-it-all
+
+#
+# Make "config" the default target if there is no configuration file or
+# "depend" the target if there is no top-level dependency information.
+#
+
+ifeq (.config,$(wildcard .config))
+include .config
+ifeq (.depend,$(wildcard .depend))
+include .depend
+do-it-all:     Version vmlinux
+else
+CONFIGURATION = depend
+do-it-all:     depend
+endif
+else
+CONFIGURATION = config
+do-it-all:     config
+endif
+
+#
+# INSTALL_PATH specifies where to place the updated kernel and system map
+# images.  Uncomment if you want to place them anywhere other than root.
+#
+
+#export        INSTALL_PATH=/boot
+
+#
+# INSTALL_MOD_PATH specifies a prefix to MODLIB for module directory
+# relocations required by build roots.  This is not defined in the
+# makefile but the arguement can be passed to make if needed.
+#
+
+MODLIB := $(INSTALL_MOD_PATH)/lib/modules/$(KERNELRELEASE)
+export MODLIB
+
+#
+# standard CFLAGS
+#
+
+CPPFLAGS := -D__KERNEL__ -I$(HPATH)
+CPPFLAGS += $(patsubst %,-I%,$(CROSS_COMPILE_INC))
+
+CFLAGS := $(CPPFLAGS) -Wall -Wstrict-prototypes -Wno-trigraphs -O2 \
+         -fno-strict-aliasing -fno-common
+ifndef CONFIG_FRAME_POINTER
+CFLAGS += -fomit-frame-pointer
+endif
+AFLAGS := -D__ASSEMBLY__ $(CPPFLAGS)
+
+#
+# ROOT_DEV specifies the default root-device when making the image.
+# This can be either FLOPPY, CURRENT, /dev/xxxx or empty, in which case
+# the default of FLOPPY is used by 'build'.
+# This is i386 specific.
+#
+
+export ROOT_DEV = CURRENT
+
+#
+# If you want to preset the SVGA mode, uncomment the next line and
+# set SVGA_MODE to whatever number you want.
+# Set it to -DSVGA_MODE=NORMAL_VGA if you just want the EGA/VGA mode.
+# The number is the same as you would ordinarily press at bootup.
+# This is i386 specific.
+#
+
+export SVGA_MODE = -DSVGA_MODE=NORMAL_VGA
+
+#
+# If you want the RAM disk device, define this to be the size in blocks.
+# This is i386 specific.
+#
+
+#export RAMDISK = -DRAMDISK=512
+
+CORE_FILES     =kernel/kernel.o mm/mm.o fs/fs.o ipc/ipc.o
+NETWORKS       =net/network.o
+
+LIBS           =$(TOPDIR)/lib/lib.a
+SUBDIRS                =kernel drivers mm fs net ipc lib
+
+ifeq ($(CONFIG_KDB),y)
+CORE_FILES     += kdb/kdb.o
+SUBDIRS                += kdb
+endif
+
+DRIVERS-n :=
+DRIVERS-y :=
+DRIVERS-m :=
+DRIVERS-  :=
+
+DRIVERS-$(CONFIG_ACPI) += drivers/acpi/acpi.o
+DRIVERS-$(CONFIG_PARPORT) += drivers/parport/driver.o
+DRIVERS-y += drivers/char/char.o \
+       drivers/block/block.o \
+       drivers/misc/misc.o \
+       drivers/net/net.o \
+       drivers/media/media.o
+DRIVERS-$(CONFIG_AGP) += drivers/char/agp/agp.o
+DRIVERS-$(CONFIG_DRM) += drivers/char/drm/drm.o
+DRIVERS-$(CONFIG_NUBUS) += drivers/nubus/nubus.a
+DRIVERS-$(CONFIG_ISDN) += drivers/isdn/isdn.a
+DRIVERS-$(CONFIG_NET_FC) += drivers/net/fc/fc.o
+DRIVERS-$(CONFIG_APPLETALK) += drivers/net/appletalk/appletalk.o
+DRIVERS-$(CONFIG_TR) += drivers/net/tokenring/tr.o
+DRIVERS-$(CONFIG_WAN) += drivers/net/wan/wan.o
+DRIVERS-$(CONFIG_ARCNET) += drivers/net/arcnet/arcnetdrv.o
+DRIVERS-$(CONFIG_ATM) += drivers/atm/atm.o
+DRIVERS-$(CONFIG_IDE) += drivers/ide/idedriver.o
+DRIVERS-$(CONFIG_FC4) += drivers/fc4/fc4.a
+DRIVERS-$(CONFIG_SCSI) += drivers/scsi/scsidrv.o
+DRIVERS-$(CONFIG_FUSION_BOOT) += drivers/message/fusion/fusion.o
+DRIVERS-$(CONFIG_IEEE1394) += drivers/ieee1394/ieee1394drv.o
+
+ifneq ($(CONFIG_CD_NO_IDESCSI)$(CONFIG_BLK_DEV_IDECD)$(CONFIG_BLK_DEV_SR)$(CONFIG_PARIDE_PCD),)
+DRIVERS-y += drivers/cdrom/driver.o
+endif
+
+DRIVERS-$(CONFIG_SOUND) += drivers/sound/sounddrivers.o
+DRIVERS-$(CONFIG_PCI) += drivers/pci/driver.o
+DRIVERS-$(CONFIG_MTD) += drivers/mtd/mtdlink.o
+DRIVERS-$(CONFIG_PCMCIA) += drivers/pcmcia/pcmcia.o
+DRIVERS-$(CONFIG_NET_PCMCIA) += drivers/net/pcmcia/pcmcia_net.o
+DRIVERS-$(CONFIG_NET_WIRELESS) += drivers/net/wireless/wireless_net.o
+DRIVERS-$(CONFIG_PCMCIA_CHRDEV) += drivers/char/pcmcia/pcmcia_char.o
+DRIVERS-$(CONFIG_DIO) += drivers/dio/dio.a
+DRIVERS-$(CONFIG_SBUS) += drivers/sbus/sbus_all.o
+DRIVERS-$(CONFIG_ZORRO) += drivers/zorro/driver.o
+DRIVERS-$(CONFIG_FC4) += drivers/fc4/fc4.a
+DRIVERS-$(CONFIG_ALL_PPC) += drivers/macintosh/macintosh.o
+DRIVERS-$(CONFIG_MAC) += drivers/macintosh/macintosh.o
+DRIVERS-$(CONFIG_ISAPNP) += drivers/pnp/pnp.o
+DRIVERS-$(CONFIG_SGI_IP22) += drivers/sgi/sgi.a
+DRIVERS-$(CONFIG_VT) += drivers/video/video.o
+DRIVERS-$(CONFIG_PARIDE) += drivers/block/paride/paride.a
+DRIVERS-$(CONFIG_HAMRADIO) += drivers/net/hamradio/hamradio.o
+DRIVERS-$(CONFIG_TC) += drivers/tc/tc.a
+DRIVERS-$(CONFIG_USB) += drivers/usb/usbdrv.o
+DRIVERS-$(CONFIG_INPUT) += drivers/input/inputdrv.o
+DRIVERS-$(CONFIG_I2O) += drivers/message/i2o/i2o.o
+DRIVERS-$(CONFIG_IRDA) += drivers/net/irda/irda.o
+DRIVERS-$(CONFIG_I2C) += drivers/i2c/i2c.o
+DRIVERS-$(CONFIG_PHONE) += drivers/telephony/telephony.o
+DRIVERS-$(CONFIG_MD) += drivers/md/mddev.o
+DRIVERS-$(CONFIG_BLUEZ) += drivers/bluetooth/bluetooth.o
+DRIVERS-$(CONFIG_HOTPLUG_PCI) += drivers/hotplug/vmlinux-obj.o
+
+DRIVERS := $(DRIVERS-y)
+
+
+# files removed with 'make clean'
+CLEAN_FILES = \
+       kernel/ksyms.lst include/linux/compile.h \
+       vmlinux System.map \
+       $(TMPPREFIX).tmp* \
+       drivers/char/consolemap_deftbl.c drivers/video/promcon_tbl.c \
+       drivers/char/conmakehash \
+       drivers/char/drm/*-mod.c \
+       drivers/pci/devlist.h drivers/pci/classlist.h drivers/pci/gen-devlist \
+       drivers/zorro/devlist.h drivers/zorro/gen-devlist \
+       drivers/sound/bin2hex drivers/sound/hex2hex \
+       drivers/atm/fore200e_mkfirm drivers/atm/{pca,sba}*{.bin,.bin1,.bin2} \
+       drivers/scsi/aic7xxx/aicasm/aicasm_gram.c \
+       drivers/scsi/aic7xxx/aicasm/aicasm_scan.c \
+       drivers/scsi/aic7xxx/aicasm/y.tab.h \
+       drivers/scsi/aic7xxx/aicasm/aicasm \
+       drivers/scsi/53c700-mem.c \
+       net/khttpd/make_times_h \
+       net/khttpd/times.h \
+       submenu*
+# directories removed with 'make clean'
+CLEAN_DIRS = \
+       modules
+
+# files removed with 'make mrproper'
+MRPROPER_FILES = \
+       include/linux/autoconf.h include/linux/version.h \
+       drivers/net/hamradio/soundmodem/sm_tbl_{afsk1200,afsk2666,fsk9600}.h \
+       drivers/net/hamradio/soundmodem/sm_tbl_{hapn4800,psk4800}.h \
+       drivers/net/hamradio/soundmodem/sm_tbl_{afsk2400_7,afsk2400_8}.h \
+       drivers/net/hamradio/soundmodem/gentbl \
+       drivers/sound/*_boot.h drivers/sound/.*.boot \
+       drivers/sound/msndinit.c \
+       drivers/sound/msndperm.c \
+       drivers/sound/pndsperm.c \
+       drivers/sound/pndspini.c \
+       drivers/atm/fore200e_*_fw.c drivers/atm/.fore200e_*.fw \
+       .version .config* config.in config.old \
+       scripts/tkparse scripts/kconfig.tk scripts/kconfig.tmp \
+       scripts/lxdialog/*.o scripts/lxdialog/lxdialog \
+       .menuconfig.log \
+       include/asm \
+       kdb/gen-kdb_cmds.c \
+       .hdepend scripts/mkdep scripts/split-include scripts/docproc \
+       $(TOPDIR)/include/linux/modversions.h \
+       kernel.spec
+
+# directories removed with 'make mrproper'
+MRPROPER_DIRS = \
+       include/config \
+       $(TOPDIR)/include/linux/modules
+
+
+include arch/$(ARCH)/Makefile
+
+export CPPFLAGS CFLAGS CFLAGS_KERNEL AFLAGS AFLAGS_KERNEL
+
+export NETWORKS DRIVERS LIBS HEAD LDFLAGS LINKFLAGS MAKEBOOT ASFLAGS
+
+.S.s:
+       $(CPP) $(AFLAGS) $(AFLAGS_KERNEL) -traditional -o $*.s $<
+.S.o:
+       $(CC) $(AFLAGS) $(AFLAGS_KERNEL) -traditional -c -o $*.o $<
+
+Version: dummy
+       @rm -f include/linux/compile.h
+
+boot: vmlinux
+       @$(MAKE) CFLAGS="$(CFLAGS) $(CFLAGS_KERNEL)" -C arch/$(ARCH)/boot
+
+LD_VMLINUX     := $(LD) $(LINKFLAGS) $(HEAD) init/main.o init/version.o \
+                       --start-group \
+                       $(CORE_FILES) \
+                       $(DRIVERS) \
+                       $(NETWORKS) \
+                       $(LIBS) \
+                       --end-group
+ifeq ($(CONFIG_KALLSYMS),y)
+LD_VMLINUX_KALLSYMS    := $(TMPPREFIX).tmp_kallsyms3.o
+else
+LD_VMLINUX_KALLSYMS    :=
+endif
+
+vmlinux: include/linux/version.h $(CONFIGURATION) init/main.o init/version.o linuxsubdirs
+       @$(MAKE) CFLAGS="$(CFLAGS) $(CFLAGS_KERNEL)" kallsyms
+
+.PHONY:        kallsyms
+
+kallsyms:
+ifeq ($(CONFIG_KALLSYMS),y)
+       @echo kallsyms pass 1
+       $(LD_VMLINUX) -o $(TMPPREFIX).tmp_vmlinux1
+       @$(KALLSYMS) $(TMPPREFIX).tmp_vmlinux1 > $(TMPPREFIX).tmp_kallsyms1.o
+       @echo kallsyms pass 2
+       @$(LD_VMLINUX) $(TMPPREFIX).tmp_kallsyms1.o -o $(TMPPREFIX).tmp_vmlinux2
+       @$(KALLSYMS) $(TMPPREFIX).tmp_vmlinux2 > $(TMPPREFIX).tmp_kallsyms2.o
+       @echo kallsyms pass 3
+       @$(LD_VMLINUX) $(TMPPREFIX).tmp_kallsyms2.o -o $(TMPPREFIX).tmp_vmlinux3
+       @$(KALLSYMS) $(TMPPREFIX).tmp_vmlinux3 > $(TMPPREFIX).tmp_kallsyms3.o
+endif
+       $(LD_VMLINUX) $(LD_VMLINUX_KALLSYMS) -o $(TMPPREFIX)vmlinux
+ifneq ($(TMPPREFIX),)
+       mv $(TMPPREFIX)vmlinux vmlinux
+endif
+       $(NM) vmlinux | grep -v '\(compiled\)\|\(\.o$$\)\|\( [aUw] \)\|\(\.\.ng$$\)\|\(LASH[RL]DI\)' | sort > System.map
+       @rm -f $(TMPPREFIX).tmp_vmlinux* $(TMPPREFIX).tmp_kallsyms*
+
+symlinks:
+       rm -f include/asm
+       ( cd include ; ln -sf asm-$(ARCH) asm)
+       @if [ ! -d include/linux/modules ]; then \
+               mkdir include/linux/modules; \
+       fi
+
+oldconfig: symlinks
+       $(CONFIG_SHELL) scripts/Configure -d arch/$(ARCH)/config.in
+
+xconfig: symlinks
+       $(MAKE) -C scripts kconfig.tk
+       wish -f scripts/kconfig.tk
+
+menuconfig: include/linux/version.h symlinks
+       $(MAKE) -C scripts/lxdialog all
+       $(CONFIG_SHELL) scripts/Menuconfig arch/$(ARCH)/config.in
+
+config: symlinks
+       $(CONFIG_SHELL) scripts/Configure arch/$(ARCH)/config.in
+
+include/config/MARKER: scripts/split-include include/linux/autoconf.h
+       scripts/split-include include/linux/autoconf.h include/config
+       @ touch include/config/MARKER
+
+linuxsubdirs: $(patsubst %, _dir_%, $(SUBDIRS))
+
+$(patsubst %, _dir_%, $(SUBDIRS)) : dummy include/linux/version.h include/config/MARKER
+       $(MAKE) CFLAGS="$(CFLAGS) $(CFLAGS_KERNEL)" -C $(patsubst _dir_%, %, $@)
+
+$(TOPDIR)/include/linux/version.h: include/linux/version.h
+$(TOPDIR)/include/linux/compile.h: include/linux/compile.h
+
+newversion:
+       . scripts/mkversion > .tmpversion
+       @mv -f .tmpversion .version
+
+include/linux/compile.h: $(CONFIGURATION) include/linux/version.h newversion
+       @echo -n \#define UTS_VERSION \"\#`cat .version` > .ver
+       @if [ -n "$(CONFIG_SMP)" ] ; then echo -n " SMP" >> .ver; fi
+       @if [ -f .name ]; then  echo -n \-`cat .name` >> .ver; fi
+       @echo ' '`date`'"' >> .ver
+       @echo \#define LINUX_COMPILE_TIME \"`date +%T`\" >> .ver
+       @echo \#define LINUX_COMPILE_BY \"`whoami`\" >> .ver
+       @echo \#define LINUX_COMPILE_HOST \"`hostname`\" >> .ver
+       @if [ -x /bin/dnsdomainname ]; then \
+          echo \#define LINUX_COMPILE_DOMAIN \"`dnsdomainname`\"; \
+        elif [ -x /bin/domainname ]; then \
+          echo \#define LINUX_COMPILE_DOMAIN \"`domainname`\"; \
+        else \
+          echo \#define LINUX_COMPILE_DOMAIN ; \
+        fi >> .ver
+       @echo \#define LINUX_COMPILER \"`$(CC) $(CFLAGS) -v 2>&1 | tail -1`\" >> .ver
+       @mv -f .ver $@
+
+include/linux/version.h: ./Makefile
+       @echo \#define UTS_RELEASE \"$(KERNELRELEASE)\" > .ver
+       @echo \#define LINUX_VERSION_CODE `expr $(VERSION) \\* 65536 + $(PATCHLEVEL) \\* 256 + $(SUBLEVEL)` >> .ver
+       @echo '#define KERNEL_VERSION(a,b,c) (((a) << 16) + ((b) << 8) + (c))' >>.ver
+       @mv -f .ver $@
+
+init/version.o: init/version.c include/linux/compile.h include/config/MARKER
+       $(CC) $(CFLAGS) $(CFLAGS_KERNEL) -DUTS_MACHINE='"$(HW_ARCH)"' -c -o init/version.o init/version.c
+
+init/main.o: init/main.c include/config/MARKER
+       $(CC) $(CFLAGS) $(CFLAGS_KERNEL) $(PROFILING) -c -o $*.o $<
+
+fs lib mm ipc kernel drivers net: dummy
+       $(MAKE) CFLAGS="$(CFLAGS) $(CFLAGS_KERNEL)" $(subst $@, _dir_$@, $@)
+
+TAGS: dummy
+       etags `find include/asm-$(ARCH) -name '*.h'`
+       find include -type d \( -name "asm-*" -o -name config \) -prune -o -name '*.h' -print | xargs etags -a
+       find $(SUBDIRS) init -name '*.[ch]' | xargs etags -a
+
+# Exuberant ctags works better with -I
+tags: dummy
+       CTAGSF=`ctags --version | grep -i exuberant >/dev/null && echo "-I __initdata,__exitdata,EXPORT_SYMBOL,EXPORT_SYMBOL_NOVERS"`; \
+       ctags $$CTAGSF `find include/asm-$(ARCH) -name '*.h'` && \
+       find include -type d \( -name "asm-*" -o -name config \) -prune -o -name '*.h' -print | xargs ctags $$CTAGSF -a && \
+       find $(SUBDIRS) init -name '*.[ch]' | xargs ctags $$CTAGSF -a
+
+ifdef CONFIG_MODULES
+ifdef CONFIG_MODVERSIONS
+MODFLAGS += -DMODVERSIONS -include $(HPATH)/linux/modversions.h
+endif
+
+.PHONY: modules
+modules: $(patsubst %, _mod_%, $(SUBDIRS))
+
+.PHONY: $(patsubst %, _mod_%, $(SUBDIRS))
+$(patsubst %, _mod_%, $(SUBDIRS)) : include/linux/version.h include/config/MARKER
+       $(MAKE) -C $(patsubst _mod_%, %, $@) CFLAGS="$(CFLAGS) $(MODFLAGS)" MAKING_MODULES=1 modules
+
+.PHONY: modules_install
+modules_install: _modinst_ $(patsubst %, _modinst_%, $(SUBDIRS)) _modinst_post
+
+.PHONY: _modinst_
+_modinst_:
+       @rm -rf $(MODLIB)/kernel
+       @rm -f $(MODLIB)/build
+       @mkdir -p $(MODLIB)/kernel
+       @ln -s $(TOPDIR) $(MODLIB)/build
+
+# If System.map exists, run depmod.  This deliberately does not have a
+# dependency on System.map since that would run the dependency tree on
+# vmlinux.  This depmod is only for convenience to give the initial
+# boot a modules.dep even before / is mounted read-write.  However the
+# boot script depmod is the master version.
+ifeq "$(strip $(INSTALL_MOD_PATH))" ""
+depmod_opts    :=
+else
+depmod_opts    := -b $(INSTALL_MOD_PATH) -r
+endif
+.PHONY: _modinst_post
+_modinst_post: _modinst_post_pcmcia
+#      if [ -r System.map ]; then $(DEPMOD) -ae -F System.map $(depmod_opts) $(KERNELRELEASE); fi
+
+# Backwards compatibilty symlinks for people still using old versions
+# of pcmcia-cs with hard coded pathnames on insmod.  Remove
+# _modinst_post_pcmcia for kernel 2.4.1.
+.PHONY: _modinst_post_pcmcia
+_modinst_post_pcmcia:
+       cd $(MODLIB); \
+       mkdir -p pcmcia; \
+       find kernel -path '*/pcmcia/*' -name '*.o' | xargs -i -r ln -sf ../{} pcmcia
+
+.PHONY: $(patsubst %, _modinst_%, $(SUBDIRS))
+$(patsubst %, _modinst_%, $(SUBDIRS)) :
+       $(MAKE) -C $(patsubst _modinst_%, %, $@) modules_install
+
+# modules disabled....
+
+else
+modules modules_install: dummy
+       @echo
+       @echo "The present kernel configuration has modules disabled."
+       @echo "Type 'make config' and enable loadable module support."
+       @echo "Then build a kernel with module support enabled."
+       @echo
+       @exit 1
+endif
+
+clean: archclean
+       find . \( -name '*.[oas]' -o -name core -o -name '.*.flags' \) -type f -print \
+               | grep -v lxdialog/ | xargs rm -f
+       rm -f $(CLEAN_FILES)
+       rm -rf $(CLEAN_DIRS)
+       $(MAKE) -C Documentation/DocBook clean
+
+mrproper: clean archmrproper
+       find . \( -size 0 -o -name .depend \) -type f -print | xargs rm -f
+       rm -f $(MRPROPER_FILES)
+       rm -rf $(MRPROPER_DIRS)
+       $(MAKE) -C Documentation/DocBook mrproper
+
+distclean: mrproper
+       rm -f core `find . \( -not -type d \) -and \
+               \( -name '*.orig' -o -name '*.rej' -o -name '*~' \
+               -o -name '*.bak' -o -name '#*#' -o -name '.*.orig' \
+               -o -name '.*.rej' -o -name '.SUMS' -o -size 0 \) -type f -print` TAGS tags
+
+backup: mrproper
+       cd .. && tar cf - linux/ | gzip -9 > backup.gz
+       sync
+
+sgmldocs: 
+       chmod 755 $(TOPDIR)/scripts/docgen
+       chmod 755 $(TOPDIR)/scripts/gen-all-syms
+       chmod 755 $(TOPDIR)/scripts/kernel-doc
+       $(MAKE) -C $(TOPDIR)/Documentation/DocBook books
+
+psdocs: sgmldocs
+       $(MAKE) -C Documentation/DocBook ps
+
+pdfdocs: sgmldocs
+       $(MAKE) -C Documentation/DocBook pdf
+
+htmldocs: sgmldocs
+       $(MAKE) -C Documentation/DocBook html
+
+sums:
+       find . -type f -print | sort | xargs sum > .SUMS
+
+dep-files: scripts/mkdep archdep include/linux/version.h
+       scripts/mkdep -- init/*.c > .depend
+       scripts/mkdep -- `find $(FINDHPATH) -name SCCS -prune -o -follow -name \*.h ! -name modversions.h -print` > .hdepend
+       $(MAKE) $(patsubst %,_sfdep_%,$(SUBDIRS)) _FASTDEP_ALL_SUB_DIRS="$(SUBDIRS)"
+ifdef CONFIG_MODVERSIONS
+       $(MAKE) update-modverfile
+endif
+
+ifdef CONFIG_MODVERSIONS
+MODVERFILE := $(TOPDIR)/include/linux/modversions.h
+else
+MODVERFILE :=
+endif
+export MODVERFILE
+
+depend dep: dep-files
+
+checkconfig:
+       find * -name '*.[hcS]' -type f -print | sort | xargs $(PERL) -w scripts/checkconfig.pl
+
+checkhelp:
+       find * -name [cC]onfig.in -print | sort | xargs $(PERL) -w scripts/checkhelp.pl
+
+checkincludes:
+       find * -name '*.[hcS]' -type f -print | sort | xargs $(PERL) -w scripts/checkincludes.pl
+
+ifdef CONFIGURATION
+..$(CONFIGURATION):
+       @echo
+       @echo "You have a bad or nonexistent" .$(CONFIGURATION) ": running 'make" $(CONFIGURATION)"'"
+       @echo
+       $(MAKE) $(CONFIGURATION)
+       @echo
+       @echo "Successful. Try re-making (ignore the error that follows)"
+       @echo
+       exit 1
+
+#dummy: ..$(CONFIGURATION)
+dummy:
+
+else
+
+dummy:
+
+endif
+
+include Rules.make
+
+#
+# This generates dependencies for the .h files.
+#
+
+scripts/mkdep: scripts/mkdep.c
+       $(HOSTCC) $(HOSTCFLAGS) -o scripts/mkdep scripts/mkdep.c
+
+scripts/split-include: scripts/split-include.c
+       $(HOSTCC) $(HOSTCFLAGS) -o scripts/split-include scripts/split-include.c
+
+#
+# RPM target
+#
+#      If you do a make spec before packing the tarball you can rpm -ta it
+#
+spec:
+       . scripts/mkspec >kernel.spec
+
+#
+#      Build a tar ball, generate an rpm from it and pack the result
+#      There arw two bits of magic here
+#      1) The use of /. to avoid tar packing just the symlink
+#      2) Removing the .dep files as they have source paths in them that
+#         will become invalid
+#
+rpm:   clean spec
+       find . \( -size 0 -o -name .depend -o -name .hdepend \) -type f -print | xargs rm -f
+       set -e; \
+       cd $(TOPDIR)/.. ; \
+       ln -sf $(TOPDIR) $(KERNELPATH) ; \
+       tar -cvz --exclude CVS -f $(KERNELPATH).tar.gz $(KERNELPATH)/. ; \
+       rm $(KERNELPATH) ; \
+       cd $(TOPDIR) ; \
+       . scripts/mkversion > .version ; \
+       rpm -ta $(TOPDIR)/../$(KERNELPATH).tar.gz ; \
+       rm $(TOPDIR)/../$(KERNELPATH).tar.gz
diff --git a/xenolinux-2.4.16-sparse/arch/xeno/Makefile b/xenolinux-2.4.16-sparse/arch/xeno/Makefile
new file mode 100644 (file)
index 0000000..ffc378e
--- /dev/null
@@ -0,0 +1,98 @@
+#
+# xeno/Makefile
+#
+# This file is included by the global makefile so that you can add your own
+# architecture-specific flags and dependencies. Remember to do have actions
+# for "archclean" and "archdep" for cleaning up and making dependencies for
+# this architecture
+#
+# This file is subject to the terms and conditions of the GNU General Public
+# License.  See the file "COPYING" in the main directory of this archive
+# for more details.
+#
+# Copyright (C) 1994 by Linus Torvalds
+#
+# 19990713  Artur Skawina <skawina@geocities.com>
+#           Added '-march' and '-mpreferred-stack-boundary' support
+#
+
+LD=$(CROSS_COMPILE)ld -m elf_i386
+OBJCOPY=$(CROSS_COMPILE)objcopy -O binary -R .note -R .comment -S
+LDFLAGS=-e stext
+LINKFLAGS =-T $(TOPDIR)/arch/xeno/vmlinux.lds $(LDFLAGS)
+
+CFLAGS += -pipe
+
+# prevent gcc from keeping the stack 16 byte aligned
+CFLAGS += $(shell if $(CC) -mpreferred-stack-boundary=2 -S -o /dev/null -xc /dev/null >/dev/null 2>&1; then echo "-mpreferred-stack-boundary=2"; fi)
+
+ifdef CONFIG_M686
+CFLAGS += -march=i686
+endif
+
+ifdef CONFIG_MPENTIUMIII
+CFLAGS += -march=i686
+endif
+
+ifdef CONFIG_MPENTIUM4
+CFLAGS += -march=i686
+endif
+
+ifdef CONFIG_MK7
+CFLAGS += $(shell if $(CC) -march=athlon -S -o /dev/null -xc /dev/null >/dev/null 2>&1; then echo "-march=athlon"; else echo "-march=i686 -malign-functions=4"; fi) 
+endif
+
+HEAD := arch/xeno/kernel/head.o arch/xeno/kernel/init_task.o
+
+SUBDIRS += arch/xeno/kernel arch/xeno/mm arch/xeno/lib
+SUBDIRS += arch/xeno/drivers/console arch/xeno/drivers/network
+SUBDIRS += arch/xeno/drivers/dom0 arch/xeno/drivers/block
+
+CORE_FILES += arch/xeno/kernel/kernel.o arch/xeno/mm/mm.o
+CORE_FILES += arch/xeno/drivers/console/con.o
+CORE_FILES += arch/xeno/drivers/block/blk.o
+CORE_FILES += arch/xeno/drivers/network/net.o
+CORE_FILES += arch/xeno/drivers/dom0/dom0.o
+LIBS := $(TOPDIR)/arch/xeno/lib/lib.a $(LIBS) $(TOPDIR)/arch/xeno/lib/lib.a
+
+arch/xeno/kernel: dummy
+       $(MAKE) linuxsubdirs SUBDIRS=arch/xeno/kernel
+
+arch/xeno/mm: dummy
+       $(MAKE) linuxsubdirs SUBDIRS=arch/xeno/mm
+
+arch/xeno/drivers/console: dummy
+       $(MAKE) linuxsubdirs SUBDIRS=arch/xeno/drivers/console
+
+arch/xeno/drivers/network: dummy
+       $(MAKE) linuxsubdirs SUBDIRS=arch/xeno/drivers/network
+
+arch/xeno/drivers/block: dummy
+       $(MAKE) linuxsubdirs SUBDIRS=arch/xeno/drivers/block
+
+arch/xeno/drivers/dom0: dummy
+       $(MAKE) linuxsubdirs SUBDIRS=arch/xeno/drivers/dom0
+
+ifdef CONFIG_KDB
+arch/xeno/kdb: dummy
+       $(MAKE) linuxsubdirs SUBDIRS=arch/xeno/kdb
+endif
+
+MAKEBOOT = $(MAKE) -C arch/$(ARCH)/boot
+
+vmlinux: arch/xeno/vmlinux.lds
+
+FORCE: ;
+
+.PHONY: bzImage compressed clean archclean archmrproper archdep
+
+bzImage: vmlinux
+       @$(MAKEBOOT) image.gz
+
+archclean:
+       @$(MAKEBOOT) clean
+
+archmrproper:
+
+archdep:
+       @$(MAKEBOOT) dep
diff --git a/xenolinux-2.4.16-sparse/arch/xeno/boot/Makefile b/xenolinux-2.4.16-sparse/arch/xeno/boot/Makefile
new file mode 100644 (file)
index 0000000..252daf5
--- /dev/null
@@ -0,0 +1,22 @@
+#
+# arch/xeno/boot/Makefile
+#
+
+image.gz: image
+       gzip -f -9 < $< > $@
+
+image: $(TOPDIR)/vmlinux
+       # Guest OS header -- first 8 bytes are identifier 'XenoGues'.
+       echo -e -n 'XenoGues' >$@ 
+       # Guest OS header -- next 4 bytes are load address (0xC0000000).
+       echo -e -n '\000\000\000\300' >>$@
+       $(OBJCOPY) $< image.body
+       # Guest OS header is immediately followed by raw OS image.
+       # Start address must be at byte 0.
+       cat image.body >>$@
+       rm -f image.body
+
+dep:
+
+clean:
+       rm -f image image.gz
diff --git a/xenolinux-2.4.16-sparse/arch/xeno/config.in b/xenolinux-2.4.16-sparse/arch/xeno/config.in
new file mode 100644 (file)
index 0000000..f6e48b4
--- /dev/null
@@ -0,0 +1,135 @@
+#
+# For a description of the syntax of this configuration file,
+# see Documentation/kbuild/config-language.txt.
+#
+mainmenu_name "Linux Kernel Configuration"
+
+define_bool CONFIG_XENO y
+
+define_bool CONFIG_X86 y
+define_bool CONFIG_ISA y
+define_bool CONFIG_SBUS n
+
+define_bool CONFIG_UID16 y
+
+mainmenu_option next_comment
+comment 'Code maturity level options'
+bool 'Prompt for development and/or incomplete code/drivers' CONFIG_EXPERIMENTAL
+endmenu
+
+mainmenu_option next_comment
+comment 'Loadable module support'
+bool 'Enable loadable module support' CONFIG_MODULES
+if [ "$CONFIG_MODULES" = "y" ]; then
+   bool '  Set version information on all module symbols' CONFIG_MODVERSIONS
+   bool '  Kernel module loader' CONFIG_KMOD
+fi
+endmenu
+
+mainmenu_option next_comment
+comment 'Processor type and features'
+choice 'Processor family' \
+       "Pentium-Pro/Celeron/Pentium-II         CONFIG_M686 \
+        Pentium-III/Celeron(Coppermine)        CONFIG_MPENTIUMIII \
+        Pentium-4                              CONFIG_MPENTIUM4 \
+        Athlon/Duron/K7                        CONFIG_MK7" Pentium-Pro
+
+   define_bool CONFIG_X86_WP_WORKS_OK y
+   define_bool CONFIG_X86_INVLPG y
+   define_bool CONFIG_X86_CMPXCHG y
+   define_bool CONFIG_X86_XADD y
+   define_bool CONFIG_X86_BSWAP y
+   define_bool CONFIG_X86_POPAD_OK y
+   define_bool CONFIG_RWSEM_GENERIC_SPINLOCK n
+   define_bool CONFIG_RWSEM_XCHGADD_ALGORITHM y
+
+   define_bool CONFIG_X86_GOOD_APIC y
+   define_bool CONFIG_X86_PGE y
+   define_bool CONFIG_X86_USE_PPRO_CHECKSUM y
+   define_bool CONFIG_X86_TSC y
+
+if [ "$CONFIG_M686" = "y" ]; then
+   define_int  CONFIG_X86_L1_CACHE_SHIFT 5
+fi
+if [ "$CONFIG_MPENTIUMIII" = "y" ]; then
+   define_int  CONFIG_X86_L1_CACHE_SHIFT 5
+fi
+if [ "$CONFIG_MPENTIUM4" = "y" ]; then
+   define_int  CONFIG_X86_L1_CACHE_SHIFT 7
+fi
+if [ "$CONFIG_MK7" = "y" ]; then
+   define_int  CONFIG_X86_L1_CACHE_SHIFT 6
+   define_bool CONFIG_X86_USE_3DNOW y
+fi
+
+choice 'High Memory Support' \
+       "off    CONFIG_NOHIGHMEM \
+        4GB    CONFIG_HIGHMEM4G \
+        64GB   CONFIG_HIGHMEM64G" off
+if [ "$CONFIG_HIGHMEM4G" = "y" ]; then
+   define_bool CONFIG_HIGHMEM y
+fi
+if [ "$CONFIG_HIGHMEM64G" = "y" ]; then
+   define_bool CONFIG_HIGHMEM y
+   define_bool CONFIG_X86_PAE y
+fi
+
+#bool 'Symmetric multi-processing support' CONFIG_SMP
+#if [ "$CONFIG_SMP" = "y" -a "$CONFIG_X86_CMPXCHG" = "y" ]; then
+#   define_bool CONFIG_HAVE_DEC_LOCK y
+#fi
+endmenu
+
+mainmenu_option next_comment
+comment 'General setup'
+
+bool 'Networking support' CONFIG_NET
+
+bool 'System V IPC' CONFIG_SYSVIPC
+bool 'BSD Process Accounting' CONFIG_BSD_PROCESS_ACCT
+bool 'Sysctl support' CONFIG_SYSCTL
+if [ "$CONFIG_PROC_FS" = "y" ]; then
+   choice 'Kernel core (/proc/kcore) format' \
+       "ELF            CONFIG_KCORE_ELF        \
+        A.OUT          CONFIG_KCORE_AOUT" ELF
+fi
+tristate 'Kernel support for a.out binaries' CONFIG_BINFMT_AOUT
+tristate 'Kernel support for ELF binaries' CONFIG_BINFMT_ELF
+tristate 'Kernel support for MISC binaries' CONFIG_BINFMT_MISC
+
+endmenu
+
+if [ "$CONFIG_NET" = "y" ]; then
+   source net/Config.in
+fi
+
+source drivers/block/Config.in
+define_bool CONFIG_BLK_DEV_IDE_MODES n
+define_bool CONFIG_BLK_DEV_HD n
+
+source fs/Config.in
+
+mainmenu_option next_comment
+comment 'Kernel hacking'
+
+bool 'Kernel debugging' CONFIG_DEBUG_KERNEL
+if [ "$CONFIG_DEBUG_KERNEL" != "n" ]; then
+   bool '  Debug high memory support' CONFIG_DEBUG_HIGHMEM
+   bool '  Debug memory allocations' CONFIG_DEBUG_SLAB
+   bool '  Memory mapped I/O debugging' CONFIG_DEBUG_IOVIRT
+   bool '  Magic SysRq key' CONFIG_MAGIC_SYSRQ
+   bool '  Spinlock debugging' CONFIG_DEBUG_SPINLOCK
+   bool '  Verbose BUG() reporting (adds 70K)' CONFIG_DEBUG_BUGVERBOSE
+   bool '  Built-in Kernel Debugger support' CONFIG_KDB
+   dep_tristate '    KDB modules' CONFIG_KDB_MODULES $CONFIG_KDB
+   if [ "$CONFIG_KDB" = "y" ]; then
+      bool '    KDB off by default' CONFIG_KDB_OFF
+      comment '  Load all symbols for debugging is required for KDB'
+      define_bool CONFIG_KALLSYMS y
+   else
+      bool '  Load all symbols for debugging' CONFIG_KALLSYMS
+   fi
+   bool '  Compile the kernel with frame pointers' CONFIG_FRAME_POINTER
+fi
+
+endmenu
diff --git a/xenolinux-2.4.16-sparse/arch/xeno/defconfig b/xenolinux-2.4.16-sparse/arch/xeno/defconfig
new file mode 100644 (file)
index 0000000..b278dec
--- /dev/null
@@ -0,0 +1,391 @@
+#
+# Automatically generated make config: don't edit
+#
+CONFIG_X86=y
+CONFIG_ISA=y
+# CONFIG_SBUS is not set
+CONFIG_UID16=y
+
+#
+# Code maturity level options
+#
+# CONFIG_EXPERIMENTAL is not set
+
+#
+# Loadable module support
+#
+CONFIG_MODULES=y
+CONFIG_MODVERSIONS=y
+CONFIG_KMOD=y
+
+#
+# Processor type and features
+#
+# CONFIG_M386 is not set
+# CONFIG_M486 is not set
+# CONFIG_M586 is not set
+# CONFIG_M586TSC is not set
+# CONFIG_M586MMX is not set
+CONFIG_M686=y
+# CONFIG_MPENTIUMIII is not set
+# CONFIG_MPENTIUM4 is not set
+# CONFIG_MK6 is not set
+# CONFIG_MK7 is not set
+# CONFIG_MCRUSOE is not set
+# CONFIG_MWINCHIPC6 is not set
+# CONFIG_MWINCHIP2 is not set
+# CONFIG_MWINCHIP3D is not set
+# CONFIG_MCYRIXIII is not set
+CONFIG_X86_WP_WORKS_OK=y
+CONFIG_X86_INVLPG=y
+CONFIG_X86_CMPXCHG=y
+CONFIG_X86_XADD=y
+CONFIG_X86_BSWAP=y
+CONFIG_X86_POPAD_OK=y
+# CONFIG_RWSEM_GENERIC_SPINLOCK is not set
+CONFIG_RWSEM_XCHGADD_ALGORITHM=y
+CONFIG_X86_L1_CACHE_SHIFT=5
+CONFIG_X86_TSC=y
+CONFIG_X86_GOOD_APIC=y
+CONFIG_X86_PGE=y
+CONFIG_X86_USE_PPRO_CHECKSUM=y
+# CONFIG_TOSHIBA is not set
+# CONFIG_I8K is not set
+# CONFIG_MICROCODE is not set
+# CONFIG_X86_MSR is not set
+# CONFIG_X86_CPUID is not set
+CONFIG_NOHIGHMEM=y
+# CONFIG_HIGHMEM4G is not set
+# CONFIG_HIGHMEM64G is not set
+# CONFIG_MATH_EMULATION is not set
+# CONFIG_MTRR is not set
+# CONFIG_SMP is not set
+# CONFIG_X86_UP_APIC is not set
+
+#
+# General setup
+#
+CONFIG_NET=y
+# CONFIG_PCI is not set
+# CONFIG_EISA is not set
+# CONFIG_MCA is not set
+# CONFIG_HOTPLUG is not set
+# CONFIG_PCMCIA is not set
+# CONFIG_HOTPLUG_PCI is not set
+CONFIG_SYSVIPC=y
+# CONFIG_BSD_PROCESS_ACCT is not set
+CONFIG_SYSCTL=y
+CONFIG_KCORE_ELF=y
+# CONFIG_KCORE_AOUT is not set
+CONFIG_BINFMT_AOUT=y
+CONFIG_BINFMT_ELF=y
+# CONFIG_BINFMT_MISC is not set
+# CONFIG_PM is not set
+# CONFIG_APM_IGNORE_USER_SUSPEND is not set
+# CONFIG_APM_DO_ENABLE is not set
+# CONFIG_APM_CPU_IDLE is not set
+# CONFIG_APM_DISPLAY_BLANK is not set
+# CONFIG_APM_RTC_IS_GMT is not set
+# CONFIG_APM_ALLOW_INTS is not set
+# CONFIG_APM_REAL_MODE_POWER_OFF is not set
+
+#
+# Memory Technology Devices (MTD)
+#
+# CONFIG_MTD is not set
+
+#
+# Parallel port support
+#
+# CONFIG_PARPORT is not set
+
+#
+# Plug and Play configuration
+#
+# CONFIG_PNP is not set
+
+#
+# Block devices
+#
+# CONFIG_BLK_DEV_FD is not set
+# CONFIG_BLK_DEV_XD is not set
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_BLK_DEV_NBD=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_SIZE=4096
+CONFIG_BLK_DEV_INITRD=y
+
+#
+# Multi-device support (RAID and LVM)
+#
+# CONFIG_MD is not set
+
+#
+# Networking options
+#
+CONFIG_PACKET=y
+CONFIG_PACKET_MMAP=y
+CONFIG_NETLINK=y
+CONFIG_RTNETLINK=y
+# CONFIG_NETLINK_DEV is not set
+# CONFIG_NETFILTER is not set
+CONFIG_FILTER=y
+CONFIG_UNIX=y
+CONFIG_INET=y
+# CONFIG_IP_MULTICAST is not set
+# CONFIG_IP_ADVANCED_ROUTER is not set
+CONFIG_IP_PNP=y
+# CONFIG_IP_PNP_DHCP is not set
+# CONFIG_IP_PNP_BOOTP is not set
+# CONFIG_IP_PNP_RARP is not set
+# CONFIG_NET_IPIP is not set
+# CONFIG_NET_IPGRE is not set
+# CONFIG_INET_ECN is not set
+# CONFIG_SYN_COOKIES is not set
+
+#
+#  
+#
+# CONFIG_IPX is not set
+# CONFIG_ATALK is not set
+# CONFIG_DECNET is not set
+# CONFIG_BRIDGE is not set
+
+#
+# QoS and/or fair queueing
+#
+# CONFIG_NET_SCHED is not set
+
+#
+# Telephony Support
+#
+# CONFIG_PHONE is not set
+
+#
+# ATA/IDE/MFM/RLL support
+#
+# CONFIG_IDE is not set
+# CONFIG_BLK_DEV_IDE_MODES is not set
+# CONFIG_BLK_DEV_HD is not set
+
+#
+# SCSI support
+#
+# CONFIG_SCSI is not set
+
+#
+# Fusion MPT device support
+#
+# CONFIG_FUSION_BOOT is not set
+# CONFIG_FUSION_ISENSE is not set
+# CONFIG_FUSION_CTL is not set
+# CONFIG_FUSION_LAN is not set
+
+#
+# I2O device support
+#
+# CONFIG_I2O is not set
+
+#
+# Network device support
+#
+# CONFIG_NETDEVICES is not set
+
+#
+# Amateur Radio support
+#
+# CONFIG_HAMRADIO is not set
+
+#
+# IrDA (infrared) support
+#
+# CONFIG_IRDA is not set
+
+#
+# ISDN subsystem
+#
+# CONFIG_ISDN is not set
+
+#
+# Old CD-ROM drivers (not SCSI, not IDE)
+#
+# CONFIG_CD_NO_IDESCSI is not set
+
+#
+# Input core support
+#
+# CONFIG_INPUT is not set
+CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024
+CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768
+
+#
+# Character devices
+#
+# CONFIG_VT is not set
+# CONFIG_SERIAL is not set
+# CONFIG_SERIAL_NONSTANDARD is not set
+# CONFIG_UNIX98_PTYS is not set
+
+#
+# I2C support
+#
+# CONFIG_I2C is not set
+
+#
+# Mice
+#
+# CONFIG_BUSMOUSE is not set
+# CONFIG_MOUSE is not set
+
+#
+# Joysticks
+#
+# CONFIG_INPUT_GAMEPORT is not set
+
+#
+# Input core support is needed for gameports
+#
+
+#
+# Input core support is needed for joysticks
+#
+# CONFIG_QIC02_TAPE is not set
+
+#
+# Watchdog Cards
+#
+# CONFIG_WATCHDOG is not set
+# CONFIG_NVRAM is not set
+# CONFIG_RTC is not set
+# CONFIG_DTLK is not set
+# CONFIG_R3964 is not set
+# CONFIG_APPLICOM is not set
+
+#
+# Ftape, the floppy tape device driver
+#
+# CONFIG_FTAPE is not set
+# CONFIG_AGP is not set
+# CONFIG_DRM is not set
+# CONFIG_MWAVE is not set
+
+#
+# Multimedia devices
+#
+# CONFIG_VIDEO_DEV is not set
+
+#
+# File systems
+#
+# CONFIG_QUOTA is not set
+CONFIG_AUTOFS_FS=y
+CONFIG_AUTOFS4_FS=y
+# CONFIG_EXT3_FS is not set
+# CONFIG_JBD is not set
+# CONFIG_FAT_FS is not set
+# CONFIG_CRAMFS is not set
+# CONFIG_TMPFS is not set
+# CONFIG_RAMFS is not set
+# CONFIG_ISO9660_FS is not set
+# CONFIG_MINIX_FS is not set
+# CONFIG_VXFS_FS is not set
+# CONFIG_NTFS_FS is not set
+# CONFIG_HPFS_FS is not set
+CONFIG_PROC_FS=y
+# CONFIG_ROMFS_FS is not set
+CONFIG_EXT2_FS=y
+# CONFIG_SYSV_FS is not set
+# CONFIG_UDF_FS is not set
+# CONFIG_UFS_FS is not set
+
+#
+# Network File Systems
+#
+# CONFIG_CODA_FS is not set
+CONFIG_NFS_FS=y
+CONFIG_NFS_V3=y
+CONFIG_ROOT_NFS=y
+CONFIG_NFSD=y
+CONFIG_NFSD_V3=y
+CONFIG_SUNRPC=y
+CONFIG_LOCKD=y
+CONFIG_LOCKD_V4=y
+# CONFIG_SMB_FS is not set
+# CONFIG_NCP_FS is not set
+# CONFIG_ZISOFS_FS is not set
+# CONFIG_ZLIB_FS_INFLATE is not set
+
+#
+# Partition Types
+#
+# CONFIG_PARTITION_ADVANCED is not set
+CONFIG_MSDOS_PARTITION=y
+# CONFIG_SMB_NLS is not set
+# CONFIG_NLS is not set
+
+#
+# Sound
+#
+# CONFIG_SOUND is not set
+
+#
+# USB support
+#
+
+#
+# USB Controllers
+#
+
+#
+# USB Device Class drivers
+#
+
+#
+# USB Human Interface Devices (HID)
+#
+
+#
+#   Input core support is needed for USB HID
+#
+
+#
+# USB Imaging devices
+#
+
+#
+# USB Multimedia devices
+#
+
+#
+#   Video4Linux support is needed for USB Multimedia device support
+#
+
+#
+# USB Network adaptors
+#
+
+#
+# USB port drivers
+#
+
+#
+# USB Serial Converter support
+#
+
+#
+# USB Miscellaneous drivers
+#
+
+#
+# Kernel hacking
+#
+CONFIG_DEBUG_KERNEL=y
+CONFIG_DEBUG_HIGHMEM=y
+CONFIG_DEBUG_SLAB=y
+CONFIG_DEBUG_IOVIRT=y
+# CONFIG_MAGIC_SYSRQ is not set
+CONFIG_DEBUG_SPINLOCK=y
+CONFIG_DEBUG_BUGVERBOSE=y
+# CONFIG_KDB is not set
+CONFIG_KALLSYMS=y
+CONFIG_FRAME_POINTER=y
diff --git a/xenolinux-2.4.16-sparse/arch/xeno/drivers/block/Makefile b/xenolinux-2.4.16-sparse/arch/xeno/drivers/block/Makefile
new file mode 100644 (file)
index 0000000..9361a01
--- /dev/null
@@ -0,0 +1,3 @@
+O_TARGET := blk.o
+obj-y := block.o
+include $(TOPDIR)/Rules.make
diff --git a/xenolinux-2.4.16-sparse/arch/xeno/drivers/block/block.c b/xenolinux-2.4.16-sparse/arch/xeno/drivers/block/block.c
new file mode 100644 (file)
index 0000000..bf7d416
--- /dev/null
@@ -0,0 +1,392 @@
+/******************************************************************************
+ * block.c
+ * 
+ * Virtual block driver for XenoLinux.
+ * 
+ * adapted from network.c
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+
+#include <asm/hypervisor-ifs/block.h>
+
+#ifdef UNDEFINED
+
+#include <linux/netdevice.h>
+#include <linux/inetdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/skbuff.h>
+#include <linux/init.h>
+
+#include <net/sock.h>
+
+#define BLK_TX_IRQ _EVENT_BLK_TX
+#define BLK_RX_IRQ _EVENT_BLK_RX
+
+#define TX_MAX_ENTRIES (TX_RING_SIZE - 2)
+#define RX_MAX_ENTRIES (RX_RING_SIZE - 2)
+
+#define TX_RING_INC(_i)    (((_i)+1) & (TX_RING_SIZE-1))
+#define RX_RING_INC(_i)    (((_i)+1) & (RX_RING_SIZE-1))
+#define TX_RING_ADD(_i,_j) (((_i)+(_j)) & (TX_RING_SIZE-1))
+#define RX_RING_ADD(_i,_j) (((_i)+(_j)) & (RX_RING_SIZE-1))
+
+#define RX_BUF_SIZE 1600 /* Ethernet MTU + plenty of slack! */
+
+
+
+int        network_probe(struct net_device *dev);
+static int  network_open(struct net_device *dev);
+static int  network_start_xmit(struct sk_buff *skb, struct net_device *dev);
+static int  network_close(struct net_device *dev);
+static struct net_device_stats *network_get_stats(struct net_device *dev);
+static void network_rx_int(int irq, void *dev_id, struct pt_regs *ptregs);
+static void network_tx_int(int irq, void *dev_id, struct pt_regs *ptregs);
+static void network_tx_buf_gc(struct net_device *dev);
+static void network_alloc_rx_buffers(struct net_device *dev);
+static void network_free_rx_buffers(struct net_device *dev);
+
+static struct net_device dev_net_xeno;
+
+/*
+ * RX RING:   RX_IDX <= rx_cons <= rx_prod
+ * TX RING:   TX_IDX <= tx_cons <= tx_prod
+ * (*_IDX allocated privately here, *_cons & *_prod shared with hypervisor)
+ */
+struct net_private
+{
+    struct net_device_stats stats;
+    struct sk_buff **tx_skb_ring;
+    struct sk_buff **rx_skb_ring;
+    atomic_t tx_entries;
+    unsigned int rx_idx, tx_idx, tx_full;
+    net_ring_t *net_ring;
+    spinlock_t tx_lock;
+};
+
+int __init network_probe(struct net_device *dev)
+{
+    SET_MODULE_OWNER(dev);
+
+    memcpy(dev->dev_addr, "\xFE\xFD\x00\x00\x00\x00", 6);
+
+    dev->open = network_open;
+    dev->hard_start_xmit = network_start_xmit;
+    dev->stop = network_close;
+    dev->get_stats = network_get_stats;
+
+    ether_setup(dev);
+    
+    return 0;
+}
+
+
+static int network_open(struct net_device *dev)
+{
+    struct net_private *np;
+    int error;
+
+    np = kmalloc(sizeof(struct net_private), GFP_KERNEL);
+    if ( np == NULL ) 
+    {
+        printk(KERN_WARNING "%s: No memory for private data\n", dev->name);
+        return -ENOMEM;
+    }
+    memset(np, 0, sizeof(struct net_private));
+    dev->priv = np;
+
+    spin_lock_init(&np->tx_lock);
+
+    atomic_set(&np->tx_entries, 0);
+
+    np->net_ring  = start_info.net_rings;
+    np->net_ring->tx_prod = np->net_ring->tx_cons = np->net_ring->tx_event = 0;
+    np->net_ring->rx_prod = np->net_ring->rx_cons = np->net_ring->rx_event = 0;
+    np->net_ring->tx_ring = NULL;
+    np->net_ring->rx_ring = NULL;
+
+    np->tx_skb_ring = kmalloc(TX_RING_SIZE * sizeof(struct sk_buff *),
+                              GFP_KERNEL);
+    np->rx_skb_ring = kmalloc(RX_RING_SIZE * sizeof(struct sk_buff *),
+                              GFP_KERNEL);
+    np->net_ring->tx_ring = kmalloc(TX_RING_SIZE * sizeof(tx_entry_t), 
+                                  GFP_KERNEL);
+    np->net_ring->rx_ring = kmalloc(RX_RING_SIZE * sizeof(rx_entry_t), 
+                                  GFP_KERNEL);
+    if ( (np->tx_skb_ring == NULL) || (np->rx_skb_ring == NULL) ||
+         (np->net_ring->tx_ring == NULL) || (np->net_ring->rx_ring == NULL) )
+    {
+        printk(KERN_WARNING "%s; Could not allocate ring memory\n", dev->name);
+        error = -ENOBUFS;
+        goto fail;
+    }
+
+    network_alloc_rx_buffers(dev);
+
+    error = request_irq(NET_RX_IRQ, network_rx_int, 0, "net-rx", dev);
+    if ( error )
+    {
+        printk(KERN_WARNING "%s: Could not allocate receive interrupt\n",
+               dev->name);
+        goto fail;
+    }
+
+    error = request_irq(NET_TX_IRQ, network_tx_int, 0, "net-tx", dev);
+    if ( error )
+    {
+        printk(KERN_WARNING "%s: Could not allocate transmit interrupt\n",
+               dev->name);
+        free_irq(NET_RX_IRQ, dev);
+        goto fail;
+    }
+
+    printk("XenoLinux Virtual Network Driver installed as %s\n", dev->name);
+
+    netif_start_queue(dev);
+
+    MOD_INC_USE_COUNT;
+
+    return 0;
+
+ fail:
+    if ( np->net_ring->rx_ring ) kfree(np->net_ring->rx_ring);
+    if ( np->net_ring->tx_ring ) kfree(np->net_ring->tx_ring);
+    if ( np->rx_skb_ring ) kfree(np->rx_skb_ring);
+    if ( np->tx_skb_ring ) kfree(np->tx_skb_ring);
+    kfree(np);
+    return error;
+}
+
+
+static void network_tx_buf_gc(struct net_device *dev)
+{
+    unsigned int i;
+    struct net_private *np = dev->priv;
+    struct sk_buff *skb;
+    unsigned long flags;
+
+    spin_lock_irqsave(&np->tx_lock, flags);
+
+    for ( i = np->tx_idx; i != np->net_ring->tx_cons; i = TX_RING_INC(i) )
+    {
+        skb = np->tx_skb_ring[i];
+        dev_kfree_skb_any(skb);
+        atomic_dec(&np->tx_entries);
+    }
+
+    np->tx_idx = i;
+
+    if ( np->tx_full && (atomic_read(&np->tx_entries) < TX_MAX_ENTRIES) )
+    {
+        np->tx_full = 0;
+        netif_wake_queue(dev);
+    }
+
+    spin_unlock_irqrestore(&np->tx_lock, flags);
+}
+
+
+static void network_alloc_rx_buffers(struct net_device *dev)
+{
+    unsigned int i;
+    struct net_private *np = dev->priv;
+    struct sk_buff *skb;
+    unsigned int end = RX_RING_ADD(np->rx_idx, RX_MAX_ENTRIES);
+
+    for ( i = np->net_ring->rx_prod; i != end; i = RX_RING_INC(i) )
+    {
+        skb = dev_alloc_skb(RX_BUF_SIZE);
+        if ( skb == NULL ) break;
+        skb->dev = dev;
+        skb_reserve(skb, 2); /* word align the IP header */
+        np->rx_skb_ring[i] = skb;
+        np->net_ring->rx_ring[i].addr = (unsigned long)skb->data;
+        np->net_ring->rx_ring[i].size = RX_BUF_SIZE - 16; /* arbitrary */
+    }
+
+    np->net_ring->rx_prod = i;
+
+    np->net_ring->rx_event = RX_RING_INC(np->rx_idx);
+
+    HYPERVISOR_net_update();
+}
+
+
+static void network_free_rx_buffers(struct net_device *dev)
+{
+    unsigned int i;
+    struct net_private *np = dev->priv;
+    struct sk_buff *skb;    
+
+    for ( i = np->rx_idx; i != np->net_ring->rx_prod; i = RX_RING_INC(i) )
+    {
+        skb = np->rx_skb_ring[i];
+        dev_kfree_skb(skb);
+    }
+}
+
+
+static int network_start_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+    unsigned int i;
+    struct net_private *np = (struct net_private *)dev->priv;
+
+    if ( np->tx_full )
+    {
+        printk(KERN_WARNING "%s: full queue wasn't stopped!\n", dev->name);
+        netif_stop_queue(dev);
+        return -ENOBUFS;
+    }
+
+    i = np->net_ring->tx_prod;
+    np->tx_skb_ring[i] = skb;
+    np->net_ring->tx_ring[i].addr = (unsigned long)skb->data;
+    np->net_ring->tx_ring[i].size = skb->len;
+    np->net_ring->tx_prod = TX_RING_INC(i);
+    atomic_inc(&np->tx_entries);
+
+    np->stats.tx_bytes += skb->len;
+    np->stats.tx_packets++;
+
+    spin_lock_irq(&np->tx_lock);
+    if ( atomic_read(&np->tx_entries) >= TX_MAX_ENTRIES )
+    {
+        np->tx_full = 1;
+        netif_stop_queue(dev);
+        np->net_ring->tx_event = TX_RING_ADD(np->tx_idx,
+                                           atomic_read(&np->tx_entries) >> 1);
+    }
+    else
+    {
+        /* Avoid unnecessary tx interrupts. */
+        np->net_ring->tx_event = TX_RING_INC(np->net_ring->tx_prod);
+    }
+    spin_unlock_irq(&np->tx_lock);
+
+    /* Must do this after setting tx_event: race with updates of tx_cons. */
+    network_tx_buf_gc(dev);
+
+    HYPERVISOR_net_update();
+
+    return 0;
+}
+
+
+static void network_rx_int(int irq, void *dev_id, struct pt_regs *ptregs)
+{
+    unsigned int i;
+    struct net_device *dev = (struct net_device *)dev_id;
+    struct net_private *np = dev->priv;
+    struct sk_buff *skb;
+    
+ again:
+    for ( i = np->rx_idx; i != np->net_ring->rx_cons; i = RX_RING_INC(i) )
+    {
+        skb = np->rx_skb_ring[i];
+        skb_put(skb, np->net_ring->rx_ring[i].size);
+        skb->protocol = eth_type_trans(skb, dev);
+        np->stats.rx_packets++;
+        np->stats.rx_bytes += np->net_ring->rx_ring[i].size;
+        netif_rx(skb);
+        dev->last_rx = jiffies;
+    }
+
+    np->rx_idx = i;
+
+    network_alloc_rx_buffers(dev);
+    
+    /* Deal with hypervisor racing our resetting of rx_event. */
+    smp_mb();
+    if ( np->net_ring->rx_cons != i ) goto again;
+}
+
+
+static void network_tx_int(int irq, void *dev_id, struct pt_regs *ptregs)
+{
+    struct net_device *dev = (struct net_device *)dev_id;
+    network_tx_buf_gc(dev);
+}
+
+
+static int network_close(struct net_device *dev)
+{
+    struct net_private *np = dev->priv;
+
+    netif_stop_queue(dev);
+    free_irq(NET_RX_IRQ, dev);
+    free_irq(NET_TX_IRQ, dev);
+    network_free_rx_buffers(dev);
+    kfree(np->net_ring->rx_ring);
+    kfree(np->net_ring->tx_ring);
+    kfree(np->rx_skb_ring);
+    kfree(np->tx_skb_ring);
+    kfree(np);
+    MOD_DEC_USE_COUNT;
+    return 0;
+}
+
+
+static struct net_device_stats *network_get_stats(struct net_device *dev)
+{
+    struct net_private *np = (struct net_private *)dev->priv;
+    return &np->stats;
+}
+
+
+static int __init init_module(void)
+{
+    memset(&dev_net_xeno, 0, sizeof(dev_net_xeno));
+    strcpy(dev_net_xeno.name, "eth%d");
+    dev_net_xeno.init = network_probe;
+    return (register_netdev(&dev_net_xeno) != 0) ? -EIO : 0;
+}
+
+
+static void __exit cleanup_module(void)
+{
+    unregister_netdev(&dev_net_xeno);
+}
+
+#endif /* UNDEFINED */
+
+
+static void block_initialize(void)
+{
+  blk_ring_t *blk_ring = start_info.blk_ring;
+
+  if ( blk_ring == NULL ) return;
+
+  blk_ring->tx_prod = blk_ring->tx_cons = blk_ring->tx_event = 0;
+  blk_ring->rx_prod = blk_ring->rx_cons = blk_ring->rx_event = 0;
+  blk_ring->tx_ring = NULL;
+  blk_ring->rx_ring = NULL;
+}
+
+
+/*
+ * block_setup initialized the xeno block device driver
+ */
+
+static int __init init_module(void)
+{
+  block_initialize();
+  printk("XenoLinux Virtual Block Device Driver installed\n");
+  return 0;
+}
+
+static void __exit cleanup_module(void)
+{
+  printk("XenoLinux Virtual Block Device Driver uninstalled\n");
+}
+
+module_init(init_module);
+module_exit(cleanup_module);
+
diff --git a/xenolinux-2.4.16-sparse/arch/xeno/drivers/console/Makefile b/xenolinux-2.4.16-sparse/arch/xeno/drivers/console/Makefile
new file mode 100644 (file)
index 0000000..5a0e7b3
--- /dev/null
@@ -0,0 +1,3 @@
+O_TARGET := con.o
+obj-y := console.o
+include $(TOPDIR)/Rules.make
diff --git a/xenolinux-2.4.16-sparse/arch/xeno/drivers/console/console.c b/xenolinux-2.4.16-sparse/arch/xeno/drivers/console/console.c
new file mode 100644 (file)
index 0000000..11548f8
--- /dev/null
@@ -0,0 +1,204 @@
+/******************************************************************************
+ * console.c
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/interrupt.h>
+#include <linux/tty.h>
+#include <linux/tty_flip.h>
+#include <linux/serial.h>
+#include <linux/major.h>
+#include <linux/ptrace.h>
+#include <linux/ioport.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/init.h>
+#include <linux/console.h>
+
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <asm/uaccess.h>
+#include <asm/hypervisor.h>
+
+/******************** Kernel console driver ********************************/
+
+static void kconsole_write(struct console *co, const char *s, unsigned count)
+{
+#define STRLEN 256
+    static char str[STRLEN];
+    static int pos = 0;
+    int len;
+    
+    /* We buffer output until we see a newline, or until the buffer is full. */
+    while ( count != 0 )
+    {
+        len = ((STRLEN - pos) > count) ? count : STRLEN - pos;
+        memcpy(str + pos, s, len);
+        pos   += len;
+        s     += len;
+        count -= len;
+        if ( (pos == STRLEN) || (str[pos-1] == '\n') )
+        {
+            (void)HYPERVISOR_console_write(str, pos);
+            pos = 0;
+        }
+    }
+}
+
+static kdev_t kconsole_device(struct console *c)
+{
+    /*
+     * This is the magic that binds our "struct console" to our
+     * "tty_struct", defined below.
+     */
+    return MKDEV(TTY_MAJOR, 0);
+}
+
+static struct console kconsole_info = {
+    name:              "xenocon",
+    write:             kconsole_write,
+    device:             kconsole_device,
+    flags:             CON_PRINTBUFFER,
+    index:             -1,
+};
+
+void xeno_console_init(void)
+{
+    register_console(&kconsole_info);
+}
+
+
+/******************** Initial /dev/console *********************************/
+
+
+static struct tty_driver console_driver;
+static int console_refcount;
+static struct tty_struct *console_table[1];
+static struct termios *console_termios[1];
+static struct termios *console_termios_locked[1];
+
+static int console_write_room(struct tty_struct *tty)
+{
+    return INT_MAX;
+}
+
+static int console_chars_in_buffer(struct tty_struct *tty)
+{
+    return 0;
+}
+
+static inline int console_xmit(int ch)
+{
+    char _ch = ch;
+    kconsole_write(NULL, &_ch, 1);
+    return 1;
+}
+
+static int console_write(struct tty_struct *tty, int from_user,
+                       const u_char * buf, int count)
+{
+    int i;
+
+    if ( from_user && verify_area(VERIFY_READ, buf, count) )
+    {
+        return -EINVAL;
+    }
+
+    for ( i = 0; i < count; i++ )
+    {
+        char ch;
+        if ( from_user )
+        {
+            __get_user(ch, buf + i);
+        }
+        else
+        {
+            ch = buf[i];
+        }
+        console_xmit(ch);
+    }
+    return i;
+}
+
+static void console_put_char(struct tty_struct *tty, u_char ch)
+{
+    console_xmit(ch);
+}
+
+static int console_open(struct tty_struct *tty, struct file *filp)
+{
+    int line;
+
+    MOD_INC_USE_COUNT;
+    line = MINOR(tty->device) - tty->driver.minor_start;
+    if ( line )
+    {
+        MOD_DEC_USE_COUNT;
+        return -ENODEV;
+    }
+
+    tty->driver_data = NULL;
+
+    return 0;
+}
+
+static void console_close(struct tty_struct *tty, struct file *filp)
+{
+    MOD_DEC_USE_COUNT;
+}
+
+static int __init console_ini(void)
+{
+    memset(&console_driver, 0, sizeof(struct tty_driver));
+    console_driver.magic           = TTY_DRIVER_MAGIC;
+    console_driver.driver_name     = "xeno_console";
+    console_driver.name            = "console";
+    console_driver.major           = TTY_MAJOR;
+    console_driver.minor_start     = 0;
+    console_driver.num             = 1;
+    console_driver.type            = TTY_DRIVER_TYPE_SERIAL;
+    console_driver.subtype         = SERIAL_TYPE_NORMAL;
+    console_driver.init_termios    = tty_std_termios;
+    console_driver.flags           = TTY_DRIVER_REAL_RAW;
+    console_driver.refcount        = &console_refcount;
+    console_driver.table           = console_table;
+    console_driver.termios         = console_termios;
+    console_driver.termios_locked  = console_termios_locked;
+    /* Functions */
+    console_driver.open            = console_open;
+    console_driver.close           = console_close;
+    console_driver.write           = console_write;
+    console_driver.write_room      = console_write_room;
+    console_driver.put_char        = console_put_char;
+    console_driver.chars_in_buffer = console_chars_in_buffer;
+
+    if ( tty_register_driver(&console_driver) )
+    {
+        printk(KERN_ERR "Couldn't register Xeno console driver\n");
+    }
+    else
+    {
+        printk("Xeno console successfully installed\n");
+    }
+
+    return 0;
+}
+
+static void __exit console_fin(void)
+{
+    int ret;
+
+    ret = tty_unregister_driver(&console_driver);
+    if ( ret != 0 )
+    {
+        printk(KERN_ERR "Unable to unregister Xeno console driver: %d\n", ret);
+    }
+}
+
+module_init(console_ini);
+module_exit(console_fin);
+
diff --git a/xenolinux-2.4.16-sparse/arch/xeno/drivers/dom0/Makefile b/xenolinux-2.4.16-sparse/arch/xeno/drivers/dom0/Makefile
new file mode 100644 (file)
index 0000000..38ba617
--- /dev/null
@@ -0,0 +1,3 @@
+O_TARGET := dom0.o
+obj-y := dom0_core.o
+include $(TOPDIR)/Rules.make
diff --git a/xenolinux-2.4.16-sparse/arch/xeno/drivers/dom0/dom0_core.c b/xenolinux-2.4.16-sparse/arch/xeno/drivers/dom0/dom0_core.c
new file mode 100644 (file)
index 0000000..7121f89
--- /dev/null
@@ -0,0 +1,129 @@
+/******************************************************************************
+ * dom0_core.c
+ * 
+ * Interface to privileged domain-0 commands.
+ * 
+ * Copyright (c) 2002, K A Fraser
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/proc_fs.h>
+
+#include "dom0_ops.h"
+
+static struct proc_dir_entry *proc_dom0;
+
+static unsigned char readbuf[1204];
+
+static int dom0_read_proc(char *page, char **start, off_t off,
+                          int count, int *eof, void *data)
+{
+    strcpy(page, readbuf);
+    *readbuf = '\0';
+    *eof = 1;
+    *start = page;
+    return strlen(page);
+}
+
+
+static int dom0_write_proc(struct file *file, const char *buffer, 
+                           u_long count, void *data)
+{
+    dom0_op_t op;
+    unsigned char c;
+    unsigned int val;
+    unsigned char result[20];
+    int len = count, ret;
+
+    while ( count )
+    {
+        c = *buffer++;
+        count--;
+        val = 0;
+        if ( c == 'N' )
+        {
+            op.cmd = DOM0_NEWDOMAIN;
+            while ( count && ((c = *buffer) >= '0') && (c <= '9') )
+            {
+                val *= 10;
+                val += c - '0';
+                buffer++; count--;
+            }      
+            op.u.newdomain.memory_kb = val;
+            val = 0;
+            if (count && (*buffer == ',')) 
+            { 
+                buffer++; count--;
+                while ( count && ((c = *buffer) >= '0') && (c <= '9') )
+                {
+                    val *= 10;
+                    val += c - '0';
+                    buffer++; count--;
+                }
+            } 
+            else 
+            {
+                val = 1; // default to 1 vif.
+            }
+            op.u.newdomain.num_vifs = val;
+            ret = HYPERVISOR_dom0_op(&op);
+        }
+        else if ( c == 'K' )
+        {
+            op.cmd = DOM0_KILLDOMAIN;
+            while ( count && ((c = *buffer) >= '0') && (c <= '9') )
+            {
+                val *= 10;
+                val += c - '0';
+                buffer++; count--;
+            }        
+            op.u.killdomain.domain = val;
+            ret = HYPERVISOR_dom0_op(&op);
+        }
+        else
+        {
+            ret = -ENOSYS;
+        }
+        
+        sprintf(result, "%d\n", ret);
+        strcat(readbuf, result);
+
+        while ( count-- && (*buffer++ != '\n') ) continue;
+    }
+
+    return len;
+}
+
+
+static int __init init_module(void)
+{
+    *readbuf = '\0';
+    proc_dom0 = create_proc_entry ("dom0", 0600, &proc_root);
+    if ( proc_dom0 != NULL )
+    {
+        proc_dom0->owner      = THIS_MODULE;
+        proc_dom0->nlink      = 1;
+        proc_dom0->read_proc  = dom0_read_proc;
+        proc_dom0->write_proc = dom0_write_proc;
+        printk("Successfully installed domain-0 control interface\n");
+    }
+    return 0;
+}
+
+
+static void __exit cleanup_module(void)
+{
+    if ( proc_dom0 == NULL ) return;
+    remove_proc_entry("dom0", &proc_root);
+    proc_dom0 = NULL;
+}
+
+
+module_init(init_module);
+module_exit(cleanup_module);
diff --git a/xenolinux-2.4.16-sparse/arch/xeno/drivers/dom0/dom0_ops.h b/xenolinux-2.4.16-sparse/arch/xeno/drivers/dom0/dom0_ops.h
new file mode 100644 (file)
index 0000000..118202a
--- /dev/null
@@ -0,0 +1,32 @@
+/******************************************************************************
+ * dom0_ops.h
+ * 
+ * Process command requests from domain-0 guest OS.
+ * 
+ * Copyright (c) 2002, K A Fraser
+ */
+
+#define DOM0_NEWDOMAIN   0
+#define DOM0_KILLDOMAIN  1
+
+typedef struct dom0_newdomain_st
+{
+    unsigned int memory_kb;
+    unsigned int num_vifs;
+} dom0_newdomain_t;
+
+typedef struct dom0_killdomain_st
+{
+    unsigned int domain;
+} dom0_killdomain_t;
+
+typedef struct dom0_op_st
+{
+    unsigned long cmd;
+    union
+    {
+        dom0_newdomain_t newdomain;
+        dom0_killdomain_t killdomain;
+    }
+    u;
+} dom0_op_t;
diff --git a/xenolinux-2.4.16-sparse/arch/xeno/drivers/network/Makefile b/xenolinux-2.4.16-sparse/arch/xeno/drivers/network/Makefile
new file mode 100644 (file)
index 0000000..b44a288
--- /dev/null
@@ -0,0 +1,3 @@
+O_TARGET := net.o
+obj-y := network.o
+include $(TOPDIR)/Rules.make
diff --git a/xenolinux-2.4.16-sparse/arch/xeno/drivers/network/network.c b/xenolinux-2.4.16-sparse/arch/xeno/drivers/network/network.c
new file mode 100644 (file)
index 0000000..7e1825f
--- /dev/null
@@ -0,0 +1,374 @@
+/******************************************************************************
+ * network.c
+ * 
+ * Virtual network driver for XenoLinux.
+ * 
+ * Copyright (c) 2002, K A Fraser
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+
+#include <linux/netdevice.h>
+#include <linux/inetdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/skbuff.h>
+#include <linux/init.h>
+
+#include <net/sock.h>
+
+#define NET_TX_IRQ _EVENT_NET_TX
+#define NET_RX_IRQ _EVENT_NET_RX
+
+#define TX_MAX_ENTRIES (TX_RING_SIZE - 2)
+#define RX_MAX_ENTRIES (RX_RING_SIZE - 2)
+
+#define TX_RING_INC(_i)    (((_i)+1) & (TX_RING_SIZE-1))
+#define RX_RING_INC(_i)    (((_i)+1) & (RX_RING_SIZE-1))
+#define TX_RING_ADD(_i,_j) (((_i)+(_j)) & (TX_RING_SIZE-1))
+#define RX_RING_ADD(_i,_j) (((_i)+(_j)) & (RX_RING_SIZE-1))
+
+#define RX_BUF_SIZE 1600 /* Ethernet MTU + plenty of slack! */
+
+static void network_rx_int(int irq, void *dev_id, struct pt_regs *ptregs);
+static void network_tx_int(int irq, void *dev_id, struct pt_regs *ptregs);
+static void network_tx_buf_gc(struct net_device *dev);
+static void network_alloc_rx_buffers(struct net_device *dev);
+static void network_free_rx_buffers(struct net_device *dev);
+static void cleanup_module(void);
+
+static struct list_head dev_list;
+
+/*
+ * RX RING:   RX_IDX <= rx_cons <= rx_prod
+ * TX RING:   TX_IDX <= tx_cons <= tx_prod
+ * (*_IDX allocated privately here, *_cons & *_prod shared with hypervisor)
+ */
+struct net_private
+{
+    struct list_head list;
+    struct net_device *dev;
+
+    struct net_device_stats stats;
+    struct sk_buff **tx_skb_ring;
+    struct sk_buff **rx_skb_ring;
+    atomic_t tx_entries;
+    unsigned int rx_idx, tx_idx, tx_full;
+    net_ring_t *net_ring;
+    spinlock_t tx_lock;
+};
+
+static int network_open(struct net_device *dev)
+{
+    struct net_private *np = dev->priv;
+    int error;
+
+    np->rx_idx = np->tx_idx = np->tx_full = 0;
+
+    memset(&np->stats, 0, sizeof(np->stats));
+
+    spin_lock_init(&np->tx_lock);
+
+    atomic_set(&np->tx_entries, 0);
+
+    np->net_ring->tx_prod = np->net_ring->tx_cons = np->net_ring->tx_event = 0;
+    np->net_ring->rx_prod = np->net_ring->rx_cons = np->net_ring->rx_event = 0;
+    np->net_ring->tx_ring = NULL;
+    np->net_ring->rx_ring = NULL;
+
+    np->tx_skb_ring = kmalloc(TX_RING_SIZE * sizeof(struct sk_buff *),
+                              GFP_KERNEL);
+    np->rx_skb_ring = kmalloc(RX_RING_SIZE * sizeof(struct sk_buff *),
+                              GFP_KERNEL);
+    np->net_ring->tx_ring = kmalloc(TX_RING_SIZE * sizeof(tx_entry_t), 
+                                  GFP_KERNEL);
+    np->net_ring->rx_ring = kmalloc(RX_RING_SIZE * sizeof(rx_entry_t), 
+                                  GFP_KERNEL);
+    if ( (np->tx_skb_ring == NULL) || (np->rx_skb_ring == NULL) ||
+         (np->net_ring->tx_ring == NULL) || (np->net_ring->rx_ring == NULL) )
+    {
+        printk(KERN_WARNING "%s; Could not allocate ring memory\n", dev->name);
+        error = -ENOBUFS;
+        goto fail;
+    }
+
+    network_alloc_rx_buffers(dev);
+
+    error = request_irq(NET_RX_IRQ, network_rx_int, 0, "net-rx", dev);
+    if ( error )
+    {
+        printk(KERN_WARNING "%s: Could not allocate receive interrupt\n",
+               dev->name);
+        goto fail;
+    }
+
+    error = request_irq(NET_TX_IRQ, network_tx_int, 0, "net-tx", dev);
+    if ( error )
+    {
+        printk(KERN_WARNING "%s: Could not allocate transmit interrupt\n",
+               dev->name);
+        free_irq(NET_RX_IRQ, dev);
+        goto fail;
+    }
+
+    printk("XenoLinux Virtual Network Driver installed as %s\n", dev->name);
+
+    netif_start_queue(dev);
+
+    MOD_INC_USE_COUNT;
+
+    return 0;
+
+ fail:
+    if ( np->net_ring->rx_ring ) kfree(np->net_ring->rx_ring);
+    if ( np->net_ring->tx_ring ) kfree(np->net_ring->tx_ring);
+    if ( np->rx_skb_ring ) kfree(np->rx_skb_ring);
+    if ( np->tx_skb_ring ) kfree(np->tx_skb_ring);
+    kfree(np);
+    return error;
+}
+
+
+static void network_tx_buf_gc(struct net_device *dev)
+{
+    unsigned int i;
+    struct net_private *np = dev->priv;
+    struct sk_buff *skb;
+    unsigned long flags;
+
+    spin_lock_irqsave(&np->tx_lock, flags);
+
+    for ( i = np->tx_idx; i != np->net_ring->tx_cons; i = TX_RING_INC(i) )
+    {
+        skb = np->tx_skb_ring[i];
+        dev_kfree_skb_any(skb);
+        atomic_dec(&np->tx_entries);
+    }
+
+    np->tx_idx = i;
+
+    if ( np->tx_full && (atomic_read(&np->tx_entries) < TX_MAX_ENTRIES) )
+    {
+        np->tx_full = 0;
+        netif_wake_queue(dev);
+    }
+
+    spin_unlock_irqrestore(&np->tx_lock, flags);
+}
+
+
+static void network_alloc_rx_buffers(struct net_device *dev)
+{
+    unsigned int i;
+    struct net_private *np = dev->priv;
+    struct sk_buff *skb;
+    unsigned int end = RX_RING_ADD(np->rx_idx, RX_MAX_ENTRIES);
+
+    for ( i = np->net_ring->rx_prod; i != end; i = RX_RING_INC(i) )
+    {
+        skb = dev_alloc_skb(RX_BUF_SIZE);
+        if ( skb == NULL ) break;
+        skb->dev = dev;
+        skb_reserve(skb, 2); /* word align the IP header */
+        np->rx_skb_ring[i] = skb;
+        np->net_ring->rx_ring[i].addr = (unsigned long)skb->data;
+        np->net_ring->rx_ring[i].size = RX_BUF_SIZE - 16; /* arbitrary */
+    }
+
+    np->net_ring->rx_prod = i;
+
+    np->net_ring->rx_event = RX_RING_INC(np->rx_idx);
+
+    HYPERVISOR_net_update();
+}
+
+
+static void network_free_rx_buffers(struct net_device *dev)
+{
+    unsigned int i;
+    struct net_private *np = dev->priv;
+    struct sk_buff *skb;    
+
+    for ( i = np->rx_idx; i != np->net_ring->rx_prod; i = RX_RING_INC(i) )
+    {
+        skb = np->rx_skb_ring[i];
+        dev_kfree_skb(skb);
+    }
+}
+
+
+static int network_start_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+    unsigned int i;
+    struct net_private *np = (struct net_private *)dev->priv;
+
+    if ( np->tx_full )
+    {
+        printk(KERN_WARNING "%s: full queue wasn't stopped!\n", dev->name);
+        netif_stop_queue(dev);
+        return -ENOBUFS;
+    }
+
+    i = np->net_ring->tx_prod;
+    np->tx_skb_ring[i] = skb;
+    np->net_ring->tx_ring[i].addr = (unsigned long)skb->data;
+    np->net_ring->tx_ring[i].size = skb->len;
+    np->net_ring->tx_prod = TX_RING_INC(i);
+    atomic_inc(&np->tx_entries);
+
+    np->stats.tx_bytes += skb->len;
+    np->stats.tx_packets++;
+
+    spin_lock_irq(&np->tx_lock);
+    if ( atomic_read(&np->tx_entries) >= TX_MAX_ENTRIES )
+    {
+        np->tx_full = 1;
+        netif_stop_queue(dev);
+        np->net_ring->tx_event = TX_RING_ADD(np->tx_idx,
+                                           atomic_read(&np->tx_entries) >> 1);
+    }
+    else
+    {
+        /* Avoid unnecessary tx interrupts. */
+        np->net_ring->tx_event = TX_RING_INC(np->net_ring->tx_prod);
+    }
+    spin_unlock_irq(&np->tx_lock);
+
+    /* Must do this after setting tx_event: race with updates of tx_cons. */
+    network_tx_buf_gc(dev);
+
+    HYPERVISOR_net_update();
+
+    return 0;
+}
+
+
+static void network_rx_int(int irq, void *dev_id, struct pt_regs *ptregs)
+{
+    unsigned int i;
+    struct net_device *dev = (struct net_device *)dev_id;
+    struct net_private *np = dev->priv;
+    struct sk_buff *skb;
+    
+ again:
+    for ( i = np->rx_idx; i != np->net_ring->rx_cons; i = RX_RING_INC(i) )
+    {
+        skb = np->rx_skb_ring[i];
+        skb_put(skb, np->net_ring->rx_ring[i].size);
+        skb->protocol = eth_type_trans(skb, dev);
+        np->stats.rx_packets++;
+        np->stats.rx_bytes += np->net_ring->rx_ring[i].size;
+        netif_rx(skb);
+        dev->last_rx = jiffies;
+    }
+
+    np->rx_idx = i;
+
+    network_alloc_rx_buffers(dev);
+    
+    /* Deal with hypervisor racing our resetting of rx_event. */
+    smp_mb();
+    if ( np->net_ring->rx_cons != i ) goto again;
+}
+
+
+static void network_tx_int(int irq, void *dev_id, struct pt_regs *ptregs)
+{
+    struct net_device *dev = (struct net_device *)dev_id;
+    network_tx_buf_gc(dev);
+}
+
+
+static int network_close(struct net_device *dev)
+{
+    struct net_private *np = dev->priv;
+
+    netif_stop_queue(dev);
+    free_irq(NET_RX_IRQ, dev);
+    free_irq(NET_TX_IRQ, dev);
+    network_free_rx_buffers(dev);
+    kfree(np->net_ring->rx_ring);
+    kfree(np->net_ring->tx_ring);
+    kfree(np->rx_skb_ring);
+    kfree(np->tx_skb_ring);
+    MOD_DEC_USE_COUNT;
+    return 0;
+}
+
+
+static struct net_device_stats *network_get_stats(struct net_device *dev)
+{
+    struct net_private *np = (struct net_private *)dev->priv;
+    return &np->stats;
+}
+
+
+static int __init init_module(void)
+{
+    int i, err;
+    struct net_device *dev;
+    struct net_private *np;
+
+    INIT_LIST_HEAD(&dev_list);
+
+    for ( i = 0; i < start_info.num_net_rings; i++ )
+    {
+        dev = alloc_etherdev(sizeof(struct net_private));
+        if ( dev == NULL )
+        {
+            err = -ENOMEM;
+            goto fail;
+        }
+
+        np = dev->priv;
+        np->net_ring = start_info.net_rings + i;
+
+        SET_MODULE_OWNER(dev);
+        dev->open            = network_open;
+        dev->hard_start_xmit = network_start_xmit;
+        dev->stop            = network_close;
+        dev->get_stats       = network_get_stats;
+
+        if ( (err = register_netdev(dev)) != 0 )
+        {
+            kfree(dev);
+            goto fail;
+        }
+
+        np->dev = dev;
+        list_add(&np->list, &dev_list);
+    }
+
+    return 0;
+
+ fail:
+    cleanup_module();
+    return err;
+}
+
+
+static void cleanup_module(void)
+{
+    struct net_private *np;
+    struct net_device *dev;
+
+    while ( !list_empty(&dev_list) )
+    {
+        np = list_entry(dev_list.next, struct net_private, list);
+        list_del(&np->list);
+        dev = np->dev;
+        unregister_netdev(dev);
+        kfree(dev);
+    }
+}
+
+
+module_init(init_module);
+module_exit(cleanup_module);
diff --git a/xenolinux-2.4.16-sparse/arch/xeno/kernel/Makefile b/xenolinux-2.4.16-sparse/arch/xeno/kernel/Makefile
new file mode 100644 (file)
index 0000000..ea830e8
--- /dev/null
@@ -0,0 +1,15 @@
+
+.S.o:
+       $(CC) $(AFLAGS) -traditional -c $< -o $*.o
+
+all: kernel.o head.o init_task.o
+
+O_TARGET := kernel.o
+
+export-objs := i386_ksyms.o
+
+obj-y := process.o semaphore.o signal.o entry.o traps.o irq.o \
+       ptrace.o ioport.o ldt.o setup.o time.o sys_i386.o \
+       i386_ksyms.o i387.o hypervisor.o
+
+include $(TOPDIR)/Rules.make
diff --git a/xenolinux-2.4.16-sparse/arch/xeno/kernel/entry.S b/xenolinux-2.4.16-sparse/arch/xeno/kernel/entry.S
new file mode 100644 (file)
index 0000000..b888ae7
--- /dev/null
@@ -0,0 +1,717 @@
+/*
+ *  linux/arch/i386/entry.S
+ *
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ */
+
+/*
+ * entry.S contains the system-call and fault low-level handling routines.
+ * This also contains the timer-interrupt handler, as well as all interrupts
+ * and faults that can result in a task-switch.
+ *
+ * NOTE: This code handles signal-recognition, which happens every time
+ * after a timer-interrupt and after each system call.
+ *
+ * I changed all the .align's to 4 (16 byte alignment), as that's faster
+ * on a 486.
+ *
+ * Stack layout in 'ret_from_system_call':
+ *     ptrace needs to have all regs on the stack.
+ *     if the order here is changed, it needs to be
+ *     updated in fork.c:copy_process, signal.c:do_signal,
+ *     ptrace.c and ptrace.h
+ *
+ *      0(%esp) - %ebx
+ *      4(%esp) - %ecx
+ *      8(%esp) - %edx
+ *       C(%esp) - %esi
+ *     10(%esp) - %edi
+ *     14(%esp) - %ebp
+ *     18(%esp) - %eax
+ *     1C(%esp) - %ds
+ *     20(%esp) - %es
+ *     24(%esp) - orig_eax
+ *     28(%esp) - %eip
+ *     2C(%esp) - %cs
+ *     30(%esp) - %eflags
+ *     34(%esp) - %oldesp
+ *     38(%esp) - %oldss
+ *
+ * "current" is in register %ebx during any slow entries.
+ */
+
+#include <linux/config.h>
+#include <linux/sys.h>
+#include <linux/linkage.h>
+#include <asm/segment.h>
+#include <asm/smp.h>
+
+EBX            = 0x00
+ECX            = 0x04
+EDX            = 0x08
+ESI            = 0x0C
+EDI            = 0x10
+EBP            = 0x14
+EAX            = 0x18
+DS             = 0x1C
+ES             = 0x20
+ORIG_EAX       = 0x24
+EIP            = 0x28
+CS             = 0x2C
+EFLAGS         = 0x30
+OLDESP         = 0x34
+OLDSS          = 0x38
+
+CF_MASK                = 0x00000001
+IF_MASK                = 0x00000200
+NT_MASK                = 0x00004000
+
+/*
+ * these are offsets into the task-struct.
+ */
+state          =  0
+flags          =  4
+sigpending     =  8
+addr_limit     = 12
+exec_domain    = 16
+need_resched   = 20
+tsk_ptrace     = 24
+processor      = 52
+
+ENOSYS = 38
+
+
+#define SAVE_ALL \
+       cld; \
+       pushl %es; \
+       pushl %ds; \
+       pushl %eax; \
+       pushl %ebp; \
+       pushl %edi; \
+       pushl %esi; \
+       pushl %edx; \
+       pushl %ecx; \
+       pushl %ebx; \
+       movl $(__KERNEL_DS),%edx; \
+       movl %edx,%ds; \
+       movl %edx,%es;
+
+#define RESTORE_ALL    \
+       popl %ebx;      \
+       popl %ecx;      \
+       popl %edx;      \
+       popl %esi;      \
+       popl %edi;      \
+       popl %ebp;      \
+       popl %eax;      \
+1:     popl %ds;       \
+2:     popl %es;       \
+       addl $4,%esp;   \
+3:     iret;           \
+.section .fixup,"ax";  \
+4:     movl $0,(%esp); \
+       jmp 1b;         \
+5:     movl $0,(%esp); \
+       jmp 2b;         \
+6:     pushl %ss;      \
+       popl %ds;       \
+       pushl %ss;      \
+       popl %es;       \
+       pushl $11;      \
+        call do_exit;  \
+.previous;             \
+.section __ex_table,"a";\
+       .align 4;       \
+       .long 1b,4b;    \
+       .long 2b,5b;    \
+       .long 3b,6b;    \
+.previous
+
+#define GET_CURRENT(reg) \
+       movl $-8192, reg; \
+       andl %esp, reg
+
+
+ENTRY(ret_from_fork)
+       pushl %ebx
+       call SYMBOL_NAME(schedule_tail)
+       addl $4, %esp
+       GET_CURRENT(%ebx)
+       testb $0x02,tsk_ptrace(%ebx)    # PT_TRACESYS
+       jne tracesys_exit
+       jmp     ret_from_sys_call
+
+#if defined(CONFIG_KDB)
+ENTRY(kdb_call)
+       pushl %eax              # save orig EAX
+       SAVE_ALL
+       pushl %esp              # struct pt_regs
+       pushl $0                # error_code
+       pushl $7                # KDB_REASON_ENTRY
+       call  SYMBOL_NAME(kdb)
+       addl  $12,%esp          # remove args
+       RESTORE_ALL
+#endif
+
+/*
+ * Return to user mode is not as complex as all this looks,
+ * but we want the default path for a system call return to
+ * go as quickly as possible which is why some of this is
+ * less clear than it otherwise should be.
+ */
+
+ENTRY(system_call)
+       pushl %eax                      # save orig_eax
+       SAVE_ALL
+       GET_CURRENT(%ebx)
+        testb $0x02,tsk_ptrace(%ebx)   # PT_TRACESYS
+       jne tracesys
+       cmpl $(NR_syscalls),%eax
+       jae badsys        
+        call *SYMBOL_NAME(sys_call_table)(,%eax,4)
+       movl %eax,EAX(%esp)             # save the return value
+ENTRY(ret_from_sys_call)
+        movl SYMBOL_NAME(HYPERVISOR_shared_info),%esi
+        xorl %eax,%eax
+        movl %eax,4(%esi)               # need_resched and signals atomic test
+ret_syscall_tests:      
+        cmpl $0,need_resched(%ebx)
+       jne reschedule
+       cmpl $0,sigpending(%ebx)
+       je   safesti                    # ensure need_resched updates are seen
+signal_return:
+       btsl %eax,4(%esi)               # reenable event callbacks
+       movl %esp,%eax
+       xorl %edx,%edx
+       call SYMBOL_NAME(do_signal)
+       jmp  ret_from_sys_call
+
+        ALIGN
+restore_all:
+       RESTORE_ALL
+
+       ALIGN
+tracesys:
+       movl $-ENOSYS,EAX(%esp)
+       call SYMBOL_NAME(syscall_trace)
+       movl ORIG_EAX(%esp),%eax
+       cmpl $(NR_syscalls),%eax
+       jae tracesys_exit
+       call *SYMBOL_NAME(sys_call_table)(,%eax,4)
+       movl %eax,EAX(%esp)             # save the return value
+tracesys_exit:
+       call SYMBOL_NAME(syscall_trace)
+       jmp ret_from_sys_call
+badsys:
+       movl $-ENOSYS,EAX(%esp)
+       jmp ret_from_sys_call
+
+       ALIGN
+ENTRY(ret_from_intr)
+       GET_CURRENT(%ebx)
+ret_from_exception:
+       movb CS(%esp),%al
+       testl $2,%eax
+       jne ret_from_sys_call
+       jmp restore_all
+
+       ALIGN
+reschedule:
+        btsl %eax,4(%esi)             # reenable event callbacks
+       call SYMBOL_NAME(schedule)    # test
+       jmp ret_from_sys_call
+
+ENTRY(divide_error)
+       pushl $0                # no error code
+       pushl $ SYMBOL_NAME(do_divide_error)
+       ALIGN
+error_code:
+       pushl %ds
+       pushl %eax
+       xorl %eax,%eax
+       pushl %ebp
+       pushl %edi
+       pushl %esi
+       pushl %edx
+       decl %eax                       # eax = -1
+       pushl %ecx
+       pushl %ebx
+       GET_CURRENT(%ebx)
+       cld
+       movl %es,%ecx
+       movl ORIG_EAX(%esp), %esi       # get the error code
+       movl ES(%esp), %edi             # get the function address
+       movl %eax, ORIG_EAX(%esp)
+       movl %ecx, ES(%esp)
+       movl %esp,%edx
+       pushl %esi                      # push the error code
+       pushl %edx                      # push the pt_regs pointer
+       movl $(__KERNEL_DS),%edx
+       movl %edx,%ds
+       movl %edx,%es
+       call *%edi
+       addl $8,%esp
+       jmp ret_from_exception
+
+# A note on the "critical region" in our callback handler.
+# We want to avoid stacking callback handlers due to events occurring
+# during handling of the last event. To do this, we keep events disabled
+# until we've done all processing. HOWEVER, we must enable events before
+# popping the stack frame (can't be done atomically) and so it would still
+# be possible to get enough handler activations to overflow the stack.
+# Although unlikely, bugs of that kind are hard to track down, so we'd
+# like to avoid the possibility.
+# So, on entry to the handler we detect whether we interrupted an
+# existing activation in its critical region -- if so, we pop the current
+# activation and restart the handler using the previous one.
+ENTRY(hypervisor_callback)
+        pushl %eax
+        SAVE_ALL
+        GET_CURRENT(%ebx)
+        movl EIP(%esp),%eax
+        cmpl $scrit,%eax
+        jb   11f
+        cmpl $ecrit,%eax
+        jb   critical_region_fixup
+11:     push %esp
+        call do_hypervisor_callback
+        add  $4,%esp
+        movl SYMBOL_NAME(HYPERVISOR_shared_info),%esi
+        xorl %eax,%eax
+        movb CS(%esp),%cl
+       test $2,%cl          # slow return to ring 2 or 3
+       jne  ret_syscall_tests
+safesti:btsl %eax,4(%esi)     # reenable event callbacks
+scrit:  /**** START OF CRITICAL REGION ****/
+        cmpl %eax,(%esi)
+        jne  14f              # process more events if necessary...
+        RESTORE_ALL
+14:     btrl %eax,4(%esi)
+        jmp  11b
+ecrit:  /**** END OF CRITICAL REGION ****/
+# [How we do the fixup]. We want to merge the current stack frame with the
+# just-interrupted frame. How we do this depends on where in the critical
+# region the interrupted handler was executing, and so how many saved
+# registers are in each frame. We do this quickly using the lookup table
+# 'critical_fixup_table'. For each byte offset in the critical region, it
+# provides the number of bytes which have already been popped from the
+# interrupted stack frame. 
+critical_region_fixup:
+        addl $critical_fixup_table-scrit,%eax
+        movzbl (%eax),%eax    # %eax contains num bytes popped
+        mov  %esp,%esi
+        add  %eax,%esi        # %esi points at end of src region
+        mov  %esp,%edi
+        add  $0x34,%edi       # %edi points at end of dst region
+        mov  %eax,%ecx
+        shr  $2,%ecx          # convert words to bytes
+        je   16f              # skip loop if nothing to copy
+15:     subl $4,%esi          # pre-decrementing copy loop
+        subl $4,%edi
+        movl (%esi),%eax
+        movl %eax,(%edi)
+        loop 15b
+16:     movl %edi,%esp        # final %edi is top of merged stack
+        jmp  11b
+        
+critical_fixup_table:        
+        .byte 0x00,0x00                       # cmpl %eax,(%esi)
+        .byte 0x00,0x00                       # jne  14f
+        .byte 0x00                            # pop  %ebx
+        .byte 0x04                            # pop  %ecx
+        .byte 0x08                            # pop  %edx
+        .byte 0x0c                            # pop  %esi
+        .byte 0x10                            # pop  %edi
+        .byte 0x14                            # pop  %ebp
+        .byte 0x18                            # pop  %eax
+        .byte 0x1c                            # pop  %ds
+        .byte 0x20                            # pop  %es
+        .byte 0x24,0x24,0x24                  # add  $4,%esp
+        .byte 0x28                            # iret
+        .byte 0x00,0x00,0x00,0x00             # btrl %eax,4(%esi)
+        .byte 0x00,0x00                       # jmp  11b
+
+# Hypervisor uses this for application faults while it executes.
+ENTRY(failsafe_callback)
+1:      pop  %ds
+2:      pop  %es
+3:      iret
+.section .fixup,"ax";  \
+4:     movl $0,(%esp); \
+       jmp 1b;         \
+5:     movl $0,(%esp); \
+       jmp 2b;         \
+6:     pushl %ss;      \
+       popl %ds;       \
+       pushl %ss;      \
+       popl %es;       \
+       pushl $11;      \
+       call do_exit;   \
+.previous;             \
+.section __ex_table,"a";\
+       .align 4;       \
+       .long 1b,4b;    \
+       .long 2b,5b;    \
+       .long 3b,6b;    \
+.previous
+        
+ENTRY(coprocessor_error)
+       pushl $0
+       pushl $ SYMBOL_NAME(do_coprocessor_error)
+       jmp error_code
+
+ENTRY(simd_coprocessor_error)
+       pushl $0
+       pushl $ SYMBOL_NAME(do_simd_coprocessor_error)
+       jmp error_code
+
+ENTRY(device_not_available)
+       pushl $-1               # mark this as an int
+       SAVE_ALL
+       GET_CURRENT(%ebx)
+       call SYMBOL_NAME(math_state_restore)
+       jmp ret_from_exception
+
+ENTRY(debug)
+       pushl $0
+       pushl $ SYMBOL_NAME(do_debug)
+       jmp error_code
+
+ENTRY(int3)
+       pushl $0
+       pushl $ SYMBOL_NAME(do_int3)
+       jmp error_code
+
+ENTRY(overflow)
+       pushl $0
+       pushl $ SYMBOL_NAME(do_overflow)
+       jmp error_code
+
+ENTRY(bounds)
+       pushl $0
+       pushl $ SYMBOL_NAME(do_bounds)
+       jmp error_code
+
+ENTRY(invalid_op)
+       pushl $0
+       pushl $ SYMBOL_NAME(do_invalid_op)
+       jmp error_code
+
+ENTRY(coprocessor_segment_overrun)
+       pushl $0
+       pushl $ SYMBOL_NAME(do_coprocessor_segment_overrun)
+       jmp error_code
+
+ENTRY(double_fault)
+       pushl $ SYMBOL_NAME(do_double_fault)
+       jmp error_code
+
+ENTRY(invalid_TSS)
+       pushl $ SYMBOL_NAME(do_invalid_TSS)
+       jmp error_code
+
+ENTRY(segment_not_present)
+       pushl $ SYMBOL_NAME(do_segment_not_present)
+       jmp error_code
+
+ENTRY(stack_segment)
+       pushl $ SYMBOL_NAME(do_stack_segment)
+       jmp error_code
+
+ENTRY(general_protection)
+       pushl $ SYMBOL_NAME(do_general_protection)
+       jmp error_code
+
+ENTRY(alignment_check)
+       pushl $ SYMBOL_NAME(do_alignment_check)
+       jmp error_code
+
+#if defined(CONFIG_KDB)
+ENTRY(page_fault_mca)
+       pushl %ecx
+       pushl %edx
+       pushl %eax
+       movl  $473,%ecx
+       rdmsr
+       andl  $0xfffffffe,%eax          /* Disable last branch recording */
+       wrmsr
+       popl  %eax
+       popl  %edx
+       popl  %ecx
+       pushl $ SYMBOL_NAME(do_page_fault)
+       jmp error_code
+#endif
+
+# This handler is special, because it gets an extra value on its stack,
+# which is the linear faulting address.
+ENTRY(page_fault)
+       pushl %ds
+       pushl %eax
+       xorl %eax,%eax
+       pushl %ebp
+       pushl %edi
+       pushl %esi
+       pushl %edx
+       decl %eax                       # eax = -1
+       pushl %ecx
+       pushl %ebx
+       GET_CURRENT(%ebx)
+       cld
+       movl %es,%ecx
+       movl ORIG_EAX(%esp), %esi       # get the error code
+       movl ES(%esp), %edi             # get the faulting address
+       movl %eax, ORIG_EAX(%esp)
+       movl %ecx, ES(%esp)
+       movl %esp,%edx
+        pushl %edi                      # push the faulting address
+       pushl %esi                      # push the error code
+       pushl %edx                      # push the pt_regs pointer
+       movl $(__KERNEL_DS),%edx
+       movl %edx,%ds
+       movl %edx,%es
+       call SYMBOL_NAME(do_page_fault)
+       addl $12,%esp
+       jmp ret_from_exception
+
+ENTRY(machine_check)
+       pushl $0
+       pushl $ SYMBOL_NAME(do_machine_check)
+       jmp error_code
+
+ENTRY(spurious_interrupt_bug)
+       pushl $0
+       pushl $ SYMBOL_NAME(do_spurious_interrupt_bug)
+       jmp error_code
+
+.data
+ENTRY(sys_call_table)
+       .long SYMBOL_NAME(sys_ni_syscall)       /* 0  -  old "setup()" system call*/
+       .long SYMBOL_NAME(sys_exit)
+       .long SYMBOL_NAME(sys_fork)
+       .long SYMBOL_NAME(sys_read)
+       .long SYMBOL_NAME(sys_write)
+       .long SYMBOL_NAME(sys_open)             /* 5 */
+       .long SYMBOL_NAME(sys_close)
+       .long SYMBOL_NAME(sys_waitpid)
+       .long SYMBOL_NAME(sys_creat)
+       .long SYMBOL_NAME(sys_link)
+       .long SYMBOL_NAME(sys_unlink)           /* 10 */
+       .long SYMBOL_NAME(sys_execve)
+       .long SYMBOL_NAME(sys_chdir)
+       .long SYMBOL_NAME(sys_time)
+       .long SYMBOL_NAME(sys_mknod)
+       .long SYMBOL_NAME(sys_chmod)            /* 15 */
+       .long SYMBOL_NAME(sys_lchown16)
+       .long SYMBOL_NAME(sys_ni_syscall)                               /* old break syscall holder */
+       .long SYMBOL_NAME(sys_stat)
+       .long SYMBOL_NAME(sys_lseek)
+       .long SYMBOL_NAME(sys_getpid)           /* 20 */
+       .long SYMBOL_NAME(sys_mount)
+       .long SYMBOL_NAME(sys_oldumount)
+       .long SYMBOL_NAME(sys_setuid16)
+       .long SYMBOL_NAME(sys_getuid16)
+       .long SYMBOL_NAME(sys_stime)            /* 25 */
+       .long SYMBOL_NAME(sys_ptrace)
+       .long SYMBOL_NAME(sys_alarm)
+       .long SYMBOL_NAME(sys_fstat)
+       .long SYMBOL_NAME(sys_pause)
+       .long SYMBOL_NAME(sys_utime)            /* 30 */
+       .long SYMBOL_NAME(sys_ni_syscall)                               /* old stty syscall holder */
+       .long SYMBOL_NAME(sys_ni_syscall)                               /* old gtty syscall holder */
+       .long SYMBOL_NAME(sys_access)
+       .long SYMBOL_NAME(sys_nice)
+       .long SYMBOL_NAME(sys_ni_syscall)       /* 35 */                /* old ftime syscall holder */
+       .long SYMBOL_NAME(sys_sync)
+       .long SYMBOL_NAME(sys_kill)
+       .long SYMBOL_NAME(sys_rename)
+       .long SYMBOL_NAME(sys_mkdir)
+       .long SYMBOL_NAME(sys_rmdir)            /* 40 */
+       .long SYMBOL_NAME(sys_dup)
+       .long SYMBOL_NAME(sys_pipe)
+       .long SYMBOL_NAME(sys_times)
+       .long SYMBOL_NAME(sys_ni_syscall)                               /* old prof syscall holder */
+       .long SYMBOL_NAME(sys_brk)              /* 45 */
+       .long SYMBOL_NAME(sys_setgid16)
+       .long SYMBOL_NAME(sys_getgid16)
+       .long SYMBOL_NAME(sys_signal)
+       .long SYMBOL_NAME(sys_geteuid16)
+       .long SYMBOL_NAME(sys_getegid16)        /* 50 */
+       .long SYMBOL_NAME(sys_acct)
+       .long SYMBOL_NAME(sys_umount)                                   /* recycled never used phys() */
+       .long SYMBOL_NAME(sys_ni_syscall)                               /* old lock syscall holder */
+       .long SYMBOL_NAME(sys_ioctl)
+       .long SYMBOL_NAME(sys_fcntl)            /* 55 */
+       .long SYMBOL_NAME(sys_ni_syscall)                               /* old mpx syscall holder */
+       .long SYMBOL_NAME(sys_setpgid)
+       .long SYMBOL_NAME(sys_ni_syscall)                               /* old ulimit syscall holder */
+       .long SYMBOL_NAME(sys_olduname)
+       .long SYMBOL_NAME(sys_umask)            /* 60 */
+       .long SYMBOL_NAME(sys_chroot)
+       .long SYMBOL_NAME(sys_ustat)
+       .long SYMBOL_NAME(sys_dup2)
+       .long SYMBOL_NAME(sys_getppid)
+       .long SYMBOL_NAME(sys_getpgrp)          /* 65 */
+       .long SYMBOL_NAME(sys_setsid)
+       .long SYMBOL_NAME(sys_sigaction)
+       .long SYMBOL_NAME(sys_sgetmask)
+       .long SYMBOL_NAME(sys_ssetmask)
+       .long SYMBOL_NAME(sys_setreuid16)       /* 70 */
+       .long SYMBOL_NAME(sys_setregid16)
+       .long SYMBOL_NAME(sys_sigsuspend)
+       .long SYMBOL_NAME(sys_sigpending)
+       .long SYMBOL_NAME(sys_sethostname)
+       .long SYMBOL_NAME(sys_setrlimit)        /* 75 */
+       .long SYMBOL_NAME(sys_old_getrlimit)
+       .long SYMBOL_NAME(sys_getrusage)
+       .long SYMBOL_NAME(sys_gettimeofday)
+       .long SYMBOL_NAME(sys_settimeofday)
+       .long SYMBOL_NAME(sys_getgroups16)      /* 80 */
+       .long SYMBOL_NAME(sys_setgroups16)
+       .long SYMBOL_NAME(old_select)
+       .long SYMBOL_NAME(sys_symlink)
+       .long SYMBOL_NAME(sys_lstat)
+       .long SYMBOL_NAME(sys_readlink)         /* 85 */
+       .long SYMBOL_NAME(sys_uselib)
+       .long SYMBOL_NAME(sys_swapon)
+       .long SYMBOL_NAME(sys_reboot)
+       .long SYMBOL_NAME(old_readdir)
+       .long SYMBOL_NAME(old_mmap)             /* 90 */
+       .long SYMBOL_NAME(sys_munmap)
+       .long SYMBOL_NAME(sys_truncate)
+       .long SYMBOL_NAME(sys_ftruncate)
+       .long SYMBOL_NAME(sys_fchmod)
+       .long SYMBOL_NAME(sys_fchown16)         /* 95 */
+       .long SYMBOL_NAME(sys_getpriority)
+       .long SYMBOL_NAME(sys_setpriority)
+       .long SYMBOL_NAME(sys_ni_syscall)                               /* old profil syscall holder */
+       .long SYMBOL_NAME(sys_statfs)
+       .long SYMBOL_NAME(sys_fstatfs)          /* 100 */
+       .long SYMBOL_NAME(sys_ioperm)
+       .long SYMBOL_NAME(sys_socketcall)
+       .long SYMBOL_NAME(sys_syslog)
+       .long SYMBOL_NAME(sys_setitimer)
+       .long SYMBOL_NAME(sys_getitimer)        /* 105 */
+       .long SYMBOL_NAME(sys_newstat)
+       .long SYMBOL_NAME(sys_newlstat)
+       .long SYMBOL_NAME(sys_newfstat)
+       .long SYMBOL_NAME(sys_uname)
+       .long SYMBOL_NAME(sys_iopl)             /* 110 */
+       .long SYMBOL_NAME(sys_vhangup)
+       .long SYMBOL_NAME(sys_ni_syscall)       /* old "idle" system call */
+       .long SYMBOL_NAME(sys_ni_syscall) /* was VM86 */
+       .long SYMBOL_NAME(sys_wait4)
+       .long SYMBOL_NAME(sys_swapoff)          /* 115 */
+       .long SYMBOL_NAME(sys_sysinfo)
+       .long SYMBOL_NAME(sys_ipc)
+       .long SYMBOL_NAME(sys_fsync)
+       .long SYMBOL_NAME(sys_sigreturn)
+       .long SYMBOL_NAME(sys_clone)            /* 120 */
+       .long SYMBOL_NAME(sys_setdomainname)
+       .long SYMBOL_NAME(sys_newuname)
+       .long SYMBOL_NAME(sys_modify_ldt)
+       .long SYMBOL_NAME(sys_adjtimex)
+       .long SYMBOL_NAME(sys_mprotect)         /* 125 */
+       .long SYMBOL_NAME(sys_sigprocmask)
+       .long SYMBOL_NAME(sys_create_module)
+       .long SYMBOL_NAME(sys_init_module)
+       .long SYMBOL_NAME(sys_delete_module)
+       .long SYMBOL_NAME(sys_get_kernel_syms)  /* 130 */
+       .long SYMBOL_NAME(sys_quotactl)
+       .long SYMBOL_NAME(sys_getpgid)
+       .long SYMBOL_NAME(sys_fchdir)
+       .long SYMBOL_NAME(sys_bdflush)
+       .long SYMBOL_NAME(sys_sysfs)            /* 135 */
+       .long SYMBOL_NAME(sys_personality)
+       .long SYMBOL_NAME(sys_ni_syscall)       /* for afs_syscall */
+       .long SYMBOL_NAME(sys_setfsuid16)
+       .long SYMBOL_NAME(sys_setfsgid16)
+       .long SYMBOL_NAME(sys_llseek)           /* 140 */
+       .long SYMBOL_NAME(sys_getdents)
+       .long SYMBOL_NAME(sys_select)
+       .long SYMBOL_NAME(sys_flock)
+       .long SYMBOL_NAME(sys_msync)
+       .long SYMBOL_NAME(sys_readv)            /* 145 */
+       .long SYMBOL_NAME(sys_writev)
+       .long SYMBOL_NAME(sys_getsid)
+       .long SYMBOL_NAME(sys_fdatasync)
+       .long SYMBOL_NAME(sys_sysctl)
+       .long SYMBOL_NAME(sys_mlock)            /* 150 */
+       .long SYMBOL_NAME(sys_munlock)
+       .long SYMBOL_NAME(sys_mlockall)
+       .long SYMBOL_NAME(sys_munlockall)
+       .long SYMBOL_NAME(sys_sched_setparam)
+       .long SYMBOL_NAME(sys_sched_getparam)   /* 155 */
+       .long SYMBOL_NAME(sys_sched_setscheduler)
+       .long SYMBOL_NAME(sys_sched_getscheduler)
+       .long SYMBOL_NAME(sys_sched_yield)
+       .long SYMBOL_NAME(sys_sched_get_priority_max)
+       .long SYMBOL_NAME(sys_sched_get_priority_min)  /* 160 */
+       .long SYMBOL_NAME(sys_sched_rr_get_interval)
+       .long SYMBOL_NAME(sys_nanosleep)
+       .long SYMBOL_NAME(sys_mremap)
+       .long SYMBOL_NAME(sys_setresuid16)
+       .long SYMBOL_NAME(sys_getresuid16)      /* 165 */
+       .long SYMBOL_NAME(sys_ni_syscall) /* was VM86 */
+       .long SYMBOL_NAME(sys_query_module)
+       .long SYMBOL_NAME(sys_poll)
+       .long SYMBOL_NAME(sys_nfsservctl)
+       .long SYMBOL_NAME(sys_setresgid16)      /* 170 */
+       .long SYMBOL_NAME(sys_getresgid16)
+       .long SYMBOL_NAME(sys_prctl)
+       .long SYMBOL_NAME(sys_rt_sigreturn)
+       .long SYMBOL_NAME(sys_rt_sigaction)
+       .long SYMBOL_NAME(sys_rt_sigprocmask)   /* 175 */
+       .long SYMBOL_NAME(sys_rt_sigpending)
+       .long SYMBOL_NAME(sys_rt_sigtimedwait)
+       .long SYMBOL_NAME(sys_rt_sigqueueinfo)
+       .long SYMBOL_NAME(sys_rt_sigsuspend)
+       .long SYMBOL_NAME(sys_pread)            /* 180 */
+       .long SYMBOL_NAME(sys_pwrite)
+       .long SYMBOL_NAME(sys_chown16)
+       .long SYMBOL_NAME(sys_getcwd)
+       .long SYMBOL_NAME(sys_capget)
+       .long SYMBOL_NAME(sys_capset)           /* 185 */
+       .long SYMBOL_NAME(sys_sigaltstack)
+       .long SYMBOL_NAME(sys_sendfile)
+       .long SYMBOL_NAME(sys_ni_syscall)               /* streams1 */
+       .long SYMBOL_NAME(sys_ni_syscall)               /* streams2 */
+       .long SYMBOL_NAME(sys_vfork)            /* 190 */
+       .long SYMBOL_NAME(sys_getrlimit)
+       .long SYMBOL_NAME(sys_mmap2)
+       .long SYMBOL_NAME(sys_truncate64)
+       .long SYMBOL_NAME(sys_ftruncate64)
+       .long SYMBOL_NAME(sys_stat64)           /* 195 */
+       .long SYMBOL_NAME(sys_lstat64)
+       .long SYMBOL_NAME(sys_fstat64)
+       .long SYMBOL_NAME(sys_lchown)
+       .long SYMBOL_NAME(sys_getuid)
+       .long SYMBOL_NAME(sys_getgid)           /* 200 */
+       .long SYMBOL_NAME(sys_geteuid)
+       .long SYMBOL_NAME(sys_getegid)
+       .long SYMBOL_NAME(sys_setreuid)
+       .long SYMBOL_NAME(sys_setregid)
+       .long SYMBOL_NAME(sys_getgroups)        /* 205 */
+       .long SYMBOL_NAME(sys_setgroups)
+       .long SYMBOL_NAME(sys_fchown)
+       .long SYMBOL_NAME(sys_setresuid)
+       .long SYMBOL_NAME(sys_getresuid)
+       .long SYMBOL_NAME(sys_setresgid)        /* 210 */
+       .long SYMBOL_NAME(sys_getresgid)
+       .long SYMBOL_NAME(sys_chown)
+       .long SYMBOL_NAME(sys_setuid)
+       .long SYMBOL_NAME(sys_setgid)
+       .long SYMBOL_NAME(sys_setfsuid)         /* 215 */
+       .long SYMBOL_NAME(sys_setfsgid)
+       .long SYMBOL_NAME(sys_pivot_root)
+       .long SYMBOL_NAME(sys_mincore)
+       .long SYMBOL_NAME(sys_madvise)
+       .long SYMBOL_NAME(sys_getdents64)       /* 220 */
+       .long SYMBOL_NAME(sys_fcntl64)
+       .long SYMBOL_NAME(sys_ni_syscall)       /* reserved for TUX */
+       .long SYMBOL_NAME(sys_ni_syscall)       /* Reserved for Security */
+       .long SYMBOL_NAME(sys_gettid)
+       .long SYMBOL_NAME(sys_readahead)        /* 225 */
+
+       .rept NR_syscalls-(.-sys_call_table)/4
+               .long SYMBOL_NAME(sys_ni_syscall)
+       .endr
diff --git a/xenolinux-2.4.16-sparse/arch/xeno/kernel/head.S b/xenolinux-2.4.16-sparse/arch/xeno/kernel/head.S
new file mode 100644 (file)
index 0000000..b1a4e3e
--- /dev/null
@@ -0,0 +1,61 @@
+
+.text
+#include <linux/config.h>
+#include <linux/threads.h>
+#include <linux/linkage.h>
+#include <asm/segment.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/desc.h>
+
+/* Offsets in start_info structure */
+#define SHARED_INFO  4
+#define MOD_START   16
+#define MOD_LEN     20
+                
+startup_32:
+        cld
+        
+        lss stack_start,%esp
+
+        /* Copy initrd somewhere safe before it's clobbered by BSS. */
+        mov  MOD_LEN(%esi),%ecx
+        shr  $2,%ecx
+        jz   2f        /* bail from copy loop if no initrd */
+        mov  $SYMBOL_NAME(_end),%edi
+        add  MOD_LEN(%esi),%edi
+        mov  MOD_START(%esi),%eax
+        add  MOD_LEN(%esi),%eax
+1:      sub  $4,%eax
+        sub  $4,%edi
+        mov  (%eax),%ebx
+        mov  %ebx,(%edi)
+        loop 1b
+        mov  %edi,MOD_START(%esi)
+                
+        /* Clear BSS first so that there are no surprises... */
+2:      xorl %eax,%eax
+       movl $SYMBOL_NAME(__bss_start),%edi
+       movl $SYMBOL_NAME(_end),%ecx
+       subl %edi,%ecx
+       rep stosb
+
+        /* Copy the necessary stuff from start_info structure. */
+        /* We need to copy shared_info early, so that sti/cli work */
+        mov  SHARED_INFO(%esi),%eax
+        mov  %eax,SYMBOL_NAME(HYPERVISOR_shared_info)
+        mov  $SYMBOL_NAME(start_info_union),%edi
+        mov  $128,%ecx
+        rep movsl
+                
+        jmp SYMBOL_NAME(start_kernel)
+
+ENTRY(stack_start)
+       .long SYMBOL_NAME(init_task_union)+8192, __KERNEL_DS
+
+.org 0x1000
+ENTRY(empty_zero_page)
+
+.org 0x2000
+ENTRY(stext)
+ENTRY(_stext)
diff --git a/xenolinux-2.4.16-sparse/arch/xeno/kernel/hypervisor.c b/xenolinux-2.4.16-sparse/arch/xeno/kernel/hypervisor.c
new file mode 100644 (file)
index 0000000..c490871
--- /dev/null
@@ -0,0 +1,118 @@
+/******************************************************************************
+ * hypervisor.c
+ * 
+ * Communication to/from hypervisor.
+ * 
+ * Copyright (c) 2002, K A Fraser
+ */
+
+#include <linux/config.h>
+#include <asm/atomic.h>
+#include <linux/irq.h>
+#include <asm/hypervisor.h>
+#include <asm/system.h>
+#include <asm/ptrace.h>
+
+static unsigned long event_mask = 0;
+
+void frobb(void) {}
+
+void do_hypervisor_callback(struct pt_regs *regs)
+{
+    unsigned long events, flags;
+    shared_info_t *shared = HYPERVISOR_shared_info;
+
+    do {
+        /* Specialised local_irq_save(). */
+        flags = shared->events_enable;
+        shared->events_enable = 0;
+        barrier();
+
+        events  = xchg(&shared->events, 0);
+        events &= event_mask;
+
+        __asm__ __volatile__ (
+            "   push %1                            ;"
+            "   sub  $4,%%esp                      ;"
+            "   jmp  2f                            ;"
+            "1: btrl %%eax,%0                      ;" /* clear bit     */
+            "   mov  %%eax,(%%esp)                 ;"
+            "   call do_IRQ                        ;" /* do_IRQ(event) */
+            "2: bsfl %0,%%eax                      ;" /* %eax == bit # */
+            "   jnz  1b                            ;"
+            "   add  $8,%%esp                      ;"
+            /* we use %ebx because it is callee-saved */
+            : : "b" (events), "r" (regs)
+            /* clobbered by callback function calls */
+            : "eax", "ecx", "edx", "memory" ); 
+
+        /* Specialised local_irq_restore(). */
+        shared->events_enable = flags;
+        barrier();
+    }
+    while ( shared->events );
+}
+
+
+
+/*
+ * Define interface to generic handling in irq.c
+ */
+
+static unsigned int startup_hypervisor_event(unsigned int irq)
+{
+    set_bit(irq, &event_mask);
+    return 0;
+}
+
+static void shutdown_hypervisor_event(unsigned int irq)
+{
+    clear_bit(irq, &event_mask);
+}
+
+static void enable_hypervisor_event(unsigned int irq)
+{
+    set_bit(irq, &event_mask);
+}
+
+static void disable_hypervisor_event(unsigned int irq)
+{
+    clear_bit(irq, &event_mask);
+}
+
+static void ack_hypervisor_event(unsigned int irq)
+{
+    if ( !(event_mask & (1<<irq)) )
+    {
+        printk("Unexpected hypervisor event %d\n", irq);
+        atomic_inc(&irq_err_count);
+    }
+}
+
+static void end_hypervisor_event(unsigned int irq)
+{
+}
+
+static struct hw_interrupt_type hypervisor_irq_type = {
+    "Hypervisor-event",
+    startup_hypervisor_event,
+    shutdown_hypervisor_event,
+    enable_hypervisor_event,
+    disable_hypervisor_event,
+    ack_hypervisor_event,
+    end_hypervisor_event,
+    NULL
+};
+
+void __init init_IRQ(void)
+{
+    int i;
+
+    for ( i = 0; i < NR_IRQS; i++ )
+    {
+        irq_desc[i].status  = IRQ_DISABLED;
+        irq_desc[i].action  = 0;
+        irq_desc[i].depth   = 1;
+        irq_desc[i].handler = &hypervisor_irq_type;
+    }
+}
diff --git a/xenolinux-2.4.16-sparse/arch/xeno/kernel/i386_ksyms.c b/xenolinux-2.4.16-sparse/arch/xeno/kernel/i386_ksyms.c
new file mode 100644 (file)
index 0000000..12db771
--- /dev/null
@@ -0,0 +1,144 @@
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/smp.h>
+#include <linux/user.h>
+#include <linux/elfcore.h>
+#include <linux/mca.h>
+#include <linux/sched.h>
+#include <linux/in6.h>
+#include <linux/interrupt.h>
+#include <linux/smp_lock.h>
+#include <linux/pm.h>
+#include <linux/pci.h>
+#include <linux/apm_bios.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/tty.h>
+
+#include <asm/semaphore.h>
+#include <asm/processor.h>
+#include <asm/i387.h>
+#include <asm/uaccess.h>
+#include <asm/checksum.h>
+#include <asm/io.h>
+#include <asm/hardirq.h>
+#include <asm/delay.h>
+#include <asm/irq.h>
+#include <asm/mmx.h>
+#include <asm/desc.h>
+#include <asm/pgtable.h>
+#include <asm/pgalloc.h>
+
+extern void dump_thread(struct pt_regs *, struct user *);
+extern spinlock_t rtc_lock;
+
+#ifdef CONFIG_SMP
+extern void FASTCALL( __write_lock_failed(rwlock_t *rw));
+extern void FASTCALL( __read_lock_failed(rwlock_t *rw));
+#endif
+
+#if defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_HD) || defined(CONFIG_BLK_DEV_IDE_MODULE) || defined(CONFIG_BLK_DEV_HD_MODULE)
+extern struct drive_info_struct drive_info;
+EXPORT_SYMBOL(drive_info);
+#endif
+
+extern unsigned long get_cmos_time(void);
+
+/* platform dependent support */
+EXPORT_SYMBOL(boot_cpu_data);
+EXPORT_SYMBOL(MCA_bus);
+EXPORT_SYMBOL(__verify_write);
+EXPORT_SYMBOL(dump_thread);
+EXPORT_SYMBOL(dump_fpu);
+EXPORT_SYMBOL(dump_extended_fpu);
+EXPORT_SYMBOL(enable_irq);
+EXPORT_SYMBOL(disable_irq);
+EXPORT_SYMBOL(disable_irq_nosync);
+EXPORT_SYMBOL(probe_irq_mask);
+EXPORT_SYMBOL(kernel_thread);
+EXPORT_SYMBOL(pm_idle);
+EXPORT_SYMBOL(pm_power_off);
+EXPORT_SYMBOL(get_cmos_time);
+EXPORT_SYMBOL(apm_info);
+
+#ifdef CONFIG_DEBUG_IOVIRT
+EXPORT_SYMBOL(__io_virt_debug);
+#endif
+
+EXPORT_SYMBOL_NOVERS(__down_failed);
+EXPORT_SYMBOL_NOVERS(__down_failed_interruptible);
+EXPORT_SYMBOL_NOVERS(__down_failed_trylock);
+EXPORT_SYMBOL_NOVERS(__up_wakeup);
+/* Networking helper routines. */
+EXPORT_SYMBOL(csum_partial_copy_generic);
+/* Delay loops */
+EXPORT_SYMBOL(__udelay);
+EXPORT_SYMBOL(__delay);
+EXPORT_SYMBOL(__const_udelay);
+
+EXPORT_SYMBOL_NOVERS(__get_user_1);
+EXPORT_SYMBOL_NOVERS(__get_user_2);
+EXPORT_SYMBOL_NOVERS(__get_user_4);
+
+EXPORT_SYMBOL(strtok);
+EXPORT_SYMBOL(strpbrk);
+EXPORT_SYMBOL(simple_strtol);
+EXPORT_SYMBOL(strstr);
+
+EXPORT_SYMBOL(strncpy_from_user);
+EXPORT_SYMBOL(__strncpy_from_user);
+EXPORT_SYMBOL(clear_user);
+EXPORT_SYMBOL(__clear_user);
+EXPORT_SYMBOL(__generic_copy_from_user);
+EXPORT_SYMBOL(__generic_copy_to_user);
+EXPORT_SYMBOL(strnlen_user);
+
+#ifdef CONFIG_X86_USE_3DNOW
+EXPORT_SYMBOL(_mmx_memcpy);
+EXPORT_SYMBOL(mmx_clear_page);
+EXPORT_SYMBOL(mmx_copy_page);
+#endif
+
+#ifdef CONFIG_SMP
+EXPORT_SYMBOL(cpu_data);
+EXPORT_SYMBOL(kernel_flag);
+EXPORT_SYMBOL(smp_num_cpus);
+EXPORT_SYMBOL(cpu_online_map);
+EXPORT_SYMBOL_NOVERS(__write_lock_failed);
+EXPORT_SYMBOL_NOVERS(__read_lock_failed);
+
+/* Global SMP irq stuff */
+EXPORT_SYMBOL(synchronize_irq);
+EXPORT_SYMBOL(global_irq_holder);
+EXPORT_SYMBOL(__global_cli);
+EXPORT_SYMBOL(__global_sti);
+EXPORT_SYMBOL(__global_save_flags);
+EXPORT_SYMBOL(__global_restore_flags);
+EXPORT_SYMBOL(smp_call_function);
+
+/* TLB flushing */
+EXPORT_SYMBOL(flush_tlb_page);
+#endif
+
+#ifdef CONFIG_VT
+EXPORT_SYMBOL(screen_info);
+#endif
+
+EXPORT_SYMBOL(get_wchan);
+
+EXPORT_SYMBOL(rtc_lock);
+
+#undef memcpy
+#undef memset
+extern void * memset(void *,int,__kernel_size_t);
+extern void * memcpy(void *,const void *,__kernel_size_t);
+EXPORT_SYMBOL_NOVERS(memcpy);
+EXPORT_SYMBOL_NOVERS(memset);
+
+#ifdef CONFIG_HAVE_DEC_LOCK
+EXPORT_SYMBOL(atomic_dec_and_lock);
+#endif
+
+#ifdef CONFIG_DEBUG_BUGVERBOSE
+EXPORT_SYMBOL(do_BUG);
+#endif
diff --git a/xenolinux-2.4.16-sparse/arch/xeno/kernel/i387.c b/xenolinux-2.4.16-sparse/arch/xeno/kernel/i387.c
new file mode 100644 (file)
index 0000000..9b8dfb0
--- /dev/null
@@ -0,0 +1,518 @@
+/*
+ *  linux/arch/i386/kernel/i387.c
+ *
+ *  Copyright (C) 1994 Linus Torvalds
+ *
+ *  Pentium III FXSR, SSE support
+ *  General FPU state handling cleanups
+ *     Gareth Hughes <gareth@valinux.com>, May 2000
+ */
+
+#include <linux/config.h>
+#include <linux/sched.h>
+#include <asm/processor.h>
+#include <asm/i387.h>
+#include <asm/math_emu.h>
+#include <asm/sigcontext.h>
+#include <asm/user.h>
+#include <asm/ptrace.h>
+#include <asm/uaccess.h>
+
+#define HAVE_HWFP 1
+
+/*
+ * The _current_ task is using the FPU for the first time
+ * so initialize it and set the mxcsr to its default
+ * value at reset if we support XMM instructions and then
+ * remeber the current task has used the FPU.
+ */
+void init_fpu(void)
+{
+       __asm__("fninit");
+       if ( cpu_has_xmm )
+               load_mxcsr(0x1f80);
+               
+       current->used_math = 1;
+}
+
+/*
+ * FPU lazy state save handling.
+ */
+
+static inline void __save_init_fpu( struct task_struct *tsk )
+{
+       if ( cpu_has_fxsr ) {
+               asm volatile( "fxsave %0 ; fnclex"
+                             : "=m" (tsk->thread.i387.fxsave) );
+       } else {
+               asm volatile( "fnsave %0 ; fwait"
+                             : "=m" (tsk->thread.i387.fsave) );
+       }
+       tsk->flags &= ~PF_USEDFPU;
+}
+
+void save_init_fpu( struct task_struct *tsk )
+{
+       __save_init_fpu(tsk);
+       stts();
+}
+
+void kernel_fpu_begin(void)
+{
+       struct task_struct *tsk = current;
+
+       if (tsk->flags & PF_USEDFPU) {
+               __save_init_fpu(tsk);
+               return;
+       }
+       clts();
+}
+
+void restore_fpu( struct task_struct *tsk )
+{
+       if ( cpu_has_fxsr ) {
+               asm volatile( "fxrstor %0"
+                             : : "m" (tsk->thread.i387.fxsave) );
+       } else {
+               asm volatile( "frstor %0"
+                             : : "m" (tsk->thread.i387.fsave) );
+       }
+}
+
+/*
+ * FPU tag word conversions.
+ */
+
+static inline unsigned short twd_i387_to_fxsr( unsigned short twd )
+{
+       unsigned int tmp; /* to avoid 16 bit prefixes in the code */
+       /* Transform each pair of bits into 01 (valid) or 00 (empty) */
+        tmp = ~twd;
+        tmp = (tmp | (tmp>>1)) & 0x5555; /* 0V0V0V0V0V0V0V0V */
+        /* and move the valid bits to the lower byte. */
+        tmp = (tmp | (tmp >> 1)) & 0x3333; /* 00VV00VV00VV00VV */
+        tmp = (tmp | (tmp >> 2)) & 0x0f0f; /* 0000VVVV0000VVVV */
+        tmp = (tmp | (tmp >> 4)) & 0x00ff; /* 00000000VVVVVVVV */
+        return tmp;
+}
+
+static inline unsigned long twd_fxsr_to_i387( struct i387_fxsave_struct *fxsave )
+{
+       struct _fpxreg *st = NULL;
+       unsigned long twd = (unsigned long) fxsave->twd;
+       unsigned long tag;
+       unsigned long ret = 0xffff0000;
+       int i;
+
+#define FPREG_ADDR(f, n)       ((char *)&(f)->st_space + (n) * 16);
+
+       for ( i = 0 ; i < 8 ; i++ ) {
+               if ( twd & 0x1 ) {
+                       st = (struct _fpxreg *) FPREG_ADDR( fxsave, i );
+
+                       switch ( st->exponent & 0x7fff ) {
+                       case 0x7fff:
+                               tag = 2;                /* Special */
+                               break;
+                       case 0x0000:
+                               if ( !st->significand[0] &&
+                                    !st->significand[1] &&
+                                    !st->significand[2] &&
+                                    !st->significand[3] ) {
+                                       tag = 1;        /* Zero */
+                               } else {
+                                       tag = 2;        /* Special */
+                               }
+                               break;
+                       default:
+                               if ( st->significand[3] & 0x8000 ) {
+                                       tag = 0;        /* Valid */
+                               } else {
+                                       tag = 2;        /* Special */
+                               }
+                               break;
+                       }
+               } else {
+                       tag = 3;                        /* Empty */
+               }
+               ret |= (tag << (2 * i));
+               twd = twd >> 1;
+       }
+       return ret;
+}
+
+/*
+ * FPU state interaction.
+ */
+
+unsigned short get_fpu_cwd( struct task_struct *tsk )
+{
+       if ( cpu_has_fxsr ) {
+               return tsk->thread.i387.fxsave.cwd;
+       } else {
+               return (unsigned short)tsk->thread.i387.fsave.cwd;
+       }
+}
+
+unsigned short get_fpu_swd( struct task_struct *tsk )
+{
+       if ( cpu_has_fxsr ) {
+               return tsk->thread.i387.fxsave.swd;
+       } else {
+               return (unsigned short)tsk->thread.i387.fsave.swd;
+       }
+}
+
+unsigned short get_fpu_twd( struct task_struct *tsk )
+{
+       if ( cpu_has_fxsr ) {
+               return tsk->thread.i387.fxsave.twd;
+       } else {
+               return (unsigned short)tsk->thread.i387.fsave.twd;
+       }
+}
+
+unsigned short get_fpu_mxcsr( struct task_struct *tsk )
+{
+       if ( cpu_has_xmm ) {
+               return tsk->thread.i387.fxsave.mxcsr;
+       } else {
+               return 0x1f80;
+       }
+}
+
+void set_fpu_cwd( struct task_struct *tsk, unsigned short cwd )
+{
+       if ( cpu_has_fxsr ) {
+               tsk->thread.i387.fxsave.cwd = cwd;
+       } else {
+               tsk->thread.i387.fsave.cwd = ((long)cwd | 0xffff0000);
+       }
+}
+
+void set_fpu_swd( struct task_struct *tsk, unsigned short swd )
+{
+       if ( cpu_has_fxsr ) {
+               tsk->thread.i387.fxsave.swd = swd;
+       } else {
+               tsk->thread.i387.fsave.swd = ((long)swd | 0xffff0000);
+       }
+}
+
+void set_fpu_twd( struct task_struct *tsk, unsigned short twd )
+{
+       if ( cpu_has_fxsr ) {
+               tsk->thread.i387.fxsave.twd = twd_i387_to_fxsr(twd);
+       } else {
+               tsk->thread.i387.fsave.twd = ((long)twd | 0xffff0000);
+       }
+}
+
+void set_fpu_mxcsr( struct task_struct *tsk, unsigned short mxcsr )
+{
+       if ( cpu_has_xmm ) {
+               tsk->thread.i387.fxsave.mxcsr = (mxcsr & 0xffbf);
+       }
+}
+
+/*
+ * FXSR floating point environment conversions.
+ */
+
+static inline int convert_fxsr_to_user( struct _fpstate *buf,
+                                       struct i387_fxsave_struct *fxsave )
+{
+       unsigned long env[7];
+       struct _fpreg *to;
+       struct _fpxreg *from;
+       int i;
+
+       env[0] = (unsigned long)fxsave->cwd | 0xffff0000;
+       env[1] = (unsigned long)fxsave->swd | 0xffff0000;
+       env[2] = twd_fxsr_to_i387(fxsave);
+       env[3] = fxsave->fip;
+       env[4] = fxsave->fcs | ((unsigned long)fxsave->fop << 16);
+       env[5] = fxsave->foo;
+       env[6] = fxsave->fos;
+
+       if ( __copy_to_user( buf, env, 7 * sizeof(unsigned long) ) )
+               return 1;
+
+       to = &buf->_st[0];
+       from = (struct _fpxreg *) &fxsave->st_space[0];
+       for ( i = 0 ; i < 8 ; i++, to++, from++ ) {
+               if ( __copy_to_user( to, from, sizeof(*to) ) )
+                       return 1;
+       }
+       return 0;
+}
+
+static inline int convert_fxsr_from_user( struct i387_fxsave_struct *fxsave,
+                                         struct _fpstate *buf )
+{
+       unsigned long env[7];
+       struct _fpxreg *to;
+       struct _fpreg *from;
+       int i;
+
+       if ( __copy_from_user( env, buf, 7 * sizeof(long) ) )
+               return 1;
+
+       fxsave->cwd = (unsigned short)(env[0] & 0xffff);
+       fxsave->swd = (unsigned short)(env[1] & 0xffff);
+       fxsave->twd = twd_i387_to_fxsr((unsigned short)(env[2] & 0xffff));
+       fxsave->fip = env[3];
+       fxsave->fop = (unsigned short)((env[4] & 0xffff0000) >> 16);
+       fxsave->fcs = (env[4] & 0xffff);
+       fxsave->foo = env[5];
+       fxsave->fos = env[6];
+
+       to = (struct _fpxreg *) &fxsave->st_space[0];
+       from = &buf->_st[0];
+       for ( i = 0 ; i < 8 ; i++, to++, from++ ) {
+               if ( __copy_from_user( to, from, sizeof(*from) ) )
+                       return 1;
+       }
+       return 0;
+}
+
+/*
+ * Signal frame handlers.
+ */
+
+static inline int save_i387_fsave( struct _fpstate *buf )
+{
+       struct task_struct *tsk = current;
+
+       unlazy_fpu( tsk );
+       tsk->thread.i387.fsave.status = tsk->thread.i387.fsave.swd;
+       if ( __copy_to_user( buf, &tsk->thread.i387.fsave,
+                            sizeof(struct i387_fsave_struct) ) )
+               return -1;
+       return 1;
+}
+
+static inline int save_i387_fxsave( struct _fpstate *buf )
+{
+       struct task_struct *tsk = current;
+       int err = 0;
+
+       unlazy_fpu( tsk );
+
+       if ( convert_fxsr_to_user( buf, &tsk->thread.i387.fxsave ) )
+               return -1;
+
+       err |= __put_user( tsk->thread.i387.fxsave.swd, &buf->status );
+       err |= __put_user( X86_FXSR_MAGIC, &buf->magic );
+       if ( err )
+               return -1;
+
+       if ( __copy_to_user( &buf->_fxsr_env[0], &tsk->thread.i387.fxsave,
+                            sizeof(struct i387_fxsave_struct) ) )
+               return -1;
+       return 1;
+}
+
+int save_i387( struct _fpstate *buf )
+{
+       if ( !current->used_math )
+               return 0;
+
+       /* This will cause a "finit" to be triggered by the next
+        * attempted FPU operation by the 'current' process.
+        */
+       current->used_math = 0;
+
+       if ( HAVE_HWFP ) {
+               if ( cpu_has_fxsr ) {
+                       return save_i387_fxsave( buf );
+               } else {
+                       return save_i387_fsave( buf );
+               }
+       } else {
+               return save_i387_soft( &current->thread.i387.soft, buf );
+       }
+}
+
+static inline int restore_i387_fsave( struct _fpstate *buf )
+{
+       struct task_struct *tsk = current;
+       clear_fpu( tsk );
+       return __copy_from_user( &tsk->thread.i387.fsave, buf,
+                                sizeof(struct i387_fsave_struct) );
+}
+
+static inline int restore_i387_fxsave( struct _fpstate *buf )
+{
+       struct task_struct *tsk = current;
+       clear_fpu( tsk );
+       if ( __copy_from_user( &tsk->thread.i387.fxsave, &buf->_fxsr_env[0],
+                              sizeof(struct i387_fxsave_struct) ) )
+               return 1;
+       /* mxcsr bit 6 and 31-16 must be zero for security reasons */
+       tsk->thread.i387.fxsave.mxcsr &= 0xffbf;
+       return convert_fxsr_from_user( &tsk->thread.i387.fxsave, buf );
+}
+
+int restore_i387( struct _fpstate *buf )
+{
+       int err;
+
+       if ( HAVE_HWFP ) {
+               if ( cpu_has_fxsr ) {
+                       err =  restore_i387_fxsave( buf );
+               } else {
+                       err = restore_i387_fsave( buf );
+               }
+       } else {
+               err = restore_i387_soft( &current->thread.i387.soft, buf );
+       }
+       current->used_math = 1;
+       return err;
+}
+
+/*
+ * ptrace request handlers.
+ */
+
+static inline int get_fpregs_fsave( struct user_i387_struct *buf,
+                                   struct task_struct *tsk )
+{
+       return __copy_to_user( buf, &tsk->thread.i387.fsave,
+                              sizeof(struct user_i387_struct) );
+}
+
+static inline int get_fpregs_fxsave( struct user_i387_struct *buf,
+                                    struct task_struct *tsk )
+{
+       return convert_fxsr_to_user( (struct _fpstate *)buf,
+                                    &tsk->thread.i387.fxsave );
+}
+
+int get_fpregs( struct user_i387_struct *buf, struct task_struct *tsk )
+{
+       if ( HAVE_HWFP ) {
+               if ( cpu_has_fxsr ) {
+                       return get_fpregs_fxsave( buf, tsk );
+               } else {
+                       return get_fpregs_fsave( buf, tsk );
+               }
+       } else {
+               return save_i387_soft( &tsk->thread.i387.soft,
+                                      (struct _fpstate *)buf );
+       }
+}
+
+static inline int set_fpregs_fsave( struct task_struct *tsk,
+                                   struct user_i387_struct *buf )
+{
+       return __copy_from_user( &tsk->thread.i387.fsave, buf,
+                                sizeof(struct user_i387_struct) );
+}
+
+static inline int set_fpregs_fxsave( struct task_struct *tsk,
+                                    struct user_i387_struct *buf )
+{
+       return convert_fxsr_from_user( &tsk->thread.i387.fxsave,
+                                      (struct _fpstate *)buf );
+}
+
+int set_fpregs( struct task_struct *tsk, struct user_i387_struct *buf )
+{
+       if ( HAVE_HWFP ) {
+               if ( cpu_has_fxsr ) {
+                       return set_fpregs_fxsave( tsk, buf );
+               } else {
+                       return set_fpregs_fsave( tsk, buf );
+               }
+       } else {
+               return restore_i387_soft( &tsk->thread.i387.soft,
+                                         (struct _fpstate *)buf );
+       }
+}
+
+int get_fpxregs( struct user_fxsr_struct *buf, struct task_struct *tsk )
+{
+       if ( cpu_has_fxsr ) {
+               if (__copy_to_user( (void *)buf, &tsk->thread.i387.fxsave,
+                                   sizeof(struct user_fxsr_struct) ))
+                       return -EFAULT;
+               return 0;
+       } else {
+               return -EIO;
+       }
+}
+
+int set_fpxregs( struct task_struct *tsk, struct user_fxsr_struct *buf )
+{
+       if ( cpu_has_fxsr ) {
+               __copy_from_user( &tsk->thread.i387.fxsave, (void *)buf,
+                                 sizeof(struct user_fxsr_struct) );
+               /* mxcsr bit 6 and 31-16 must be zero for security reasons */
+               tsk->thread.i387.fxsave.mxcsr &= 0xffbf;
+               return 0;
+       } else {
+               return -EIO;
+       }
+}
+
+/*
+ * FPU state for core dumps.
+ */
+
+static inline void copy_fpu_fsave( struct task_struct *tsk,
+                                  struct user_i387_struct *fpu )
+{
+       memcpy( fpu, &tsk->thread.i387.fsave,
+               sizeof(struct user_i387_struct) );
+}
+
+static inline void copy_fpu_fxsave( struct task_struct *tsk,
+                                  struct user_i387_struct *fpu )
+{
+       unsigned short *to;
+       unsigned short *from;
+       int i;
+
+       memcpy( fpu, &tsk->thread.i387.fxsave, 7 * sizeof(long) );
+
+       to = (unsigned short *)&fpu->st_space[0];
+       from = (unsigned short *)&tsk->thread.i387.fxsave.st_space[0];
+       for ( i = 0 ; i < 8 ; i++, to += 5, from += 8 ) {
+               memcpy( to, from, 5 * sizeof(unsigned short) );
+       }
+}
+
+int dump_fpu( struct pt_regs *regs, struct user_i387_struct *fpu )
+{
+       int fpvalid;
+       struct task_struct *tsk = current;
+
+       fpvalid = tsk->used_math;
+       if ( fpvalid ) {
+               unlazy_fpu( tsk );
+               if ( cpu_has_fxsr ) {
+                       copy_fpu_fxsave( tsk, fpu );
+               } else {
+                       copy_fpu_fsave( tsk, fpu );
+               }
+       }
+
+       return fpvalid;
+}
+
+int dump_extended_fpu( struct pt_regs *regs, struct user_fxsr_struct *fpu )
+{
+       int fpvalid;
+       struct task_struct *tsk = current;
+
+       fpvalid = tsk->used_math && cpu_has_fxsr;
+       if ( fpvalid ) {
+               unlazy_fpu( tsk );
+               memcpy( fpu, &tsk->thread.i387.fxsave,
+                       sizeof(struct user_fxsr_struct) );
+       }
+
+       return fpvalid;
+}
diff --git a/xenolinux-2.4.16-sparse/arch/xeno/kernel/init_task.c b/xenolinux-2.4.16-sparse/arch/xeno/kernel/init_task.c
new file mode 100644 (file)
index 0000000..7779809
--- /dev/null
@@ -0,0 +1,33 @@
+#include <linux/mm.h>
+#include <linux/sched.h>
+#include <linux/init.h>
+
+#include <asm/uaccess.h>
+#include <asm/pgtable.h>
+#include <asm/desc.h>
+
+static struct fs_struct init_fs = INIT_FS;
+static struct files_struct init_files = INIT_FILES;
+static struct signal_struct init_signals = INIT_SIGNALS;
+struct mm_struct init_mm = INIT_MM(init_mm);
+
+/*
+ * Initial task structure.
+ *
+ * We need to make sure that this is 8192-byte aligned due to the
+ * way process stacks are handled. This is done by having a special
+ * "init_task" linker map entry..
+ */
+union task_union init_task_union 
+       __attribute__((__section__(".data.init_task"))) =
+               { INIT_TASK(init_task_union.task) };
+
+/*
+ * per-CPU TSS segments. Threads are completely 'soft' on Linux,
+ * no more per-task TSS's. The TSS size is kept cacheline-aligned
+ * so they are allowed to end up in the .data.cacheline_aligned
+ * section. Since TSS's are completely CPU-local, we want them
+ * on exact cacheline boundaries, to eliminate cacheline ping-pong.
+ */ 
+struct tss_struct init_tss[NR_CPUS] __cacheline_aligned = { [0 ... NR_CPUS-1] = INIT_TSS };
+
diff --git a/xenolinux-2.4.16-sparse/arch/xeno/kernel/ioport.c b/xenolinux-2.4.16-sparse/arch/xeno/kernel/ioport.c
new file mode 100644 (file)
index 0000000..6bce25a
--- /dev/null
@@ -0,0 +1,19 @@
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/stddef.h>
+
+
+asmlinkage int sys_ioperm(unsigned long from, unsigned long num, int turn_on)
+{
+    /* No IO permission! */
+    return -EPERM;
+}
+
+
+asmlinkage int sys_iopl(unsigned long unused)
+{
+    /* The hypervisor won't allow it! */
+    return -EPERM;
+}
diff --git a/xenolinux-2.4.16-sparse/arch/xeno/kernel/irq.c b/xenolinux-2.4.16-sparse/arch/xeno/kernel/irq.c
new file mode 100644 (file)
index 0000000..7c85590
--- /dev/null
@@ -0,0 +1,1129 @@
+/*
+ *     linux/arch/i386/kernel/irq.c
+ *
+ *     Copyright (C) 1992, 1998 Linus Torvalds, Ingo Molnar
+ *
+ * This file contains the code used by various IRQ handling routines:
+ * asking for different IRQ's should be done through these routines
+ * instead of just grabbing them. Thus setups with different IRQ numbers
+ * shouldn't result in any weird surprises, and installing new handlers
+ * should be easier.
+ */
+
+/*
+ * (mostly architecture independent, will move to kernel/irq.c in 2.5.)
+ *
+ * IRQs are in fact implemented a bit like signal handlers for the kernel.
+ * Naturally it's not a 1:1 relation, but there are similarities.
+ */
+
+#include <linux/config.h>
+#include <linux/ptrace.h>
+#include <linux/errno.h>
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/ioport.h>
+#include <linux/interrupt.h>
+#include <linux/timex.h>
+#include <linux/slab.h>
+#include <linux/random.h>
+#include <linux/smp_lock.h>
+#include <linux/init.h>
+#include <linux/kernel_stat.h>
+#include <linux/irq.h>
+#include <linux/proc_fs.h>
+#include <linux/kdb.h>
+
+#include <asm/atomic.h>
+#include <asm/io.h>
+#include <asm/smp.h>
+#include <asm/system.h>
+#include <asm/bitops.h>
+#include <asm/uaccess.h>
+#include <asm/pgalloc.h>
+#include <asm/delay.h>
+#include <asm/desc.h>
+#include <asm/irq.h>
+
+
+
+/*
+ * Linux has a controller-independent x86 interrupt architecture.
+ * every controller has a 'controller-template', that is used
+ * by the main code to do the right thing. Each driver-visible
+ * interrupt source is transparently wired to the apropriate
+ * controller. Thus drivers need not be aware of the
+ * interrupt-controller.
+ *
+ * Various interrupt controllers we handle: 8259 PIC, SMP IO-APIC,
+ * PIIX4's internal 8259 PIC and SGI's Visual Workstation Cobalt (IO-)APIC.
+ * (IO-APICs assumed to be messaging to Pentium local-APICs)
+ *
+ * the code is designed to be easily extended with new/different
+ * interrupt controllers, without having to do assembly magic.
+ */
+
+/*
+ * Controller mappings for all interrupt sources:
+ */
+irq_desc_t irq_desc[NR_IRQS] __cacheline_aligned =
+       { [0 ... NR_IRQS-1] = { 0, &no_irq_type, NULL, 0, SPIN_LOCK_UNLOCKED}};
+
+static void register_irq_proc (unsigned int irq);
+
+/*
+ * Special irq handlers.
+ */
+
+void no_action(int cpl, void *dev_id, struct pt_regs *regs) { }
+
+/*
+ * Generic no controller code
+ */
+
+static void enable_none(unsigned int irq) { }
+static unsigned int startup_none(unsigned int irq) { return 0; }
+static void disable_none(unsigned int irq) { }
+static void ack_none(unsigned int irq)
+{
+    printk("unexpected IRQ trap at vector %02x\n", irq);
+}
+
+/* startup is the same as "enable", shutdown is same as "disable" */
+#define shutdown_none  disable_none
+#define end_none       enable_none
+
+struct hw_interrupt_type no_irq_type = {
+       "none",
+       startup_none,
+       shutdown_none,
+       enable_none,
+       disable_none,
+       ack_none,
+       end_none
+};
+
+atomic_t irq_err_count;
+#ifdef CONFIG_X86_IO_APIC
+#ifdef APIC_MISMATCH_DEBUG
+atomic_t irq_mis_count;
+#endif
+#endif
+
+/*
+ * Generic, controller-independent functions:
+ */
+
+int get_irq_list(char *buf)
+{
+       int i, j;
+       struct irqaction * action;
+       char *p = buf;
+
+       p += sprintf(p, "           ");
+       for (j=0; j<smp_num_cpus; j++)
+               p += sprintf(p, "CPU%d       ",j);
+       *p++ = '\n';
+
+       for (i = 0 ; i < NR_IRQS ; i++) {
+               action = irq_desc[i].action;
+               if (!action) 
+                       continue;
+               p += sprintf(p, "%3d: ",i);
+#ifndef CONFIG_SMP
+               p += sprintf(p, "%10u ", kstat_irqs(i));
+#else
+               for (j = 0; j < smp_num_cpus; j++)
+                       p += sprintf(p, "%10u ",
+                               kstat.irqs[cpu_logical_map(j)][i]);
+#endif
+               p += sprintf(p, " %14s", irq_desc[i].handler->typename);
+               p += sprintf(p, "  %s", action->name);
+
+               for (action=action->next; action; action = action->next)
+                       p += sprintf(p, ", %s", action->name);
+               *p++ = '\n';
+       }
+       p += sprintf(p, "NMI: ");
+       for (j = 0; j < smp_num_cpus; j++)
+               p += sprintf(p, "%10u ",
+                       nmi_count(cpu_logical_map(j)));
+       p += sprintf(p, "\n");
+#if CONFIG_X86_LOCAL_APIC
+       p += sprintf(p, "LOC: ");
+       for (j = 0; j < smp_num_cpus; j++)
+               p += sprintf(p, "%10u ",
+                       apic_timer_irqs[cpu_logical_map(j)]);
+       p += sprintf(p, "\n");
+#endif
+       p += sprintf(p, "ERR: %10u\n", atomic_read(&irq_err_count));
+#ifdef CONFIG_X86_IO_APIC
+#ifdef APIC_MISMATCH_DEBUG
+       p += sprintf(p, "MIS: %10u\n", atomic_read(&irq_mis_count));
+#endif
+#endif
+       return p - buf;
+}
+
+
+/*
+ * Global interrupt locks for SMP. Allow interrupts to come in on any
+ * CPU, yet make cli/sti act globally to protect critical regions..
+ */
+
+#ifdef CONFIG_SMP
+unsigned char global_irq_holder = NO_PROC_ID;
+unsigned volatile long global_irq_lock; /* pendantic: long for set_bit --RR */
+
+extern void show_stack(unsigned long* esp);
+
+static void show(char * str)
+{
+       int i;
+       int cpu = smp_processor_id();
+
+       printk("\n%s, CPU %d:\n", str, cpu);
+       printk("irq:  %d [",irqs_running());
+       for(i=0;i < smp_num_cpus;i++)
+               printk(" %d",local_irq_count(i));
+       printk(" ]\nbh:   %d [",spin_is_locked(&global_bh_lock) ? 1 : 0);
+       for(i=0;i < smp_num_cpus;i++)
+               printk(" %d",local_bh_count(i));
+
+       printk(" ]\nStack dumps:");
+       for(i = 0; i < smp_num_cpus; i++) {
+               unsigned long esp;
+               if (i == cpu)
+                       continue;
+               printk("\nCPU %d:",i);
+               esp = init_tss[i].esp0;
+               if (!esp) {
+                       /* tss->esp0 is set to NULL in cpu_init(),
+                        * it's initialized when the cpu returns to user
+                        * space. -- manfreds
+                        */
+                       printk(" <unknown> ");
+                       continue;
+               }
+               esp &= ~(THREAD_SIZE-1);
+               esp += sizeof(struct task_struct);
+               show_stack((void*)esp);
+       }
+       printk("\nCPU %d:",cpu);
+       show_stack(NULL);
+       printk("\n");
+}
+       
+#define MAXCOUNT 100000000
+
+/*
+ * I had a lockup scenario where a tight loop doing
+ * spin_unlock()/spin_lock() on CPU#1 was racing with
+ * spin_lock() on CPU#0. CPU#0 should have noticed spin_unlock(), but
+ * apparently the spin_unlock() information did not make it
+ * through to CPU#0 ... nasty, is this by design, do we have to limit
+ * 'memory update oscillation frequency' artificially like here?
+ *
+ * Such 'high frequency update' races can be avoided by careful design, but
+ * some of our major constructs like spinlocks use similar techniques,
+ * it would be nice to clarify this issue. Set this define to 0 if you
+ * want to check whether your system freezes.  I suspect the delay done
+ * by SYNC_OTHER_CORES() is in correlation with 'snooping latency', but
+ * i thought that such things are guaranteed by design, since we use
+ * the 'LOCK' prefix.
+ */
+#define SUSPECTED_CPU_OR_CHIPSET_BUG_WORKAROUND 0
+
+#if SUSPECTED_CPU_OR_CHIPSET_BUG_WORKAROUND
+# define SYNC_OTHER_CORES(x) udelay(x+1)
+#else
+/*
+ * We have to allow irqs to arrive between __sti and __cli
+ */
+# define SYNC_OTHER_CORES(x) __asm__ __volatile__ ("nop")
+#endif
+
+static inline void wait_on_irq(int cpu)
+{
+       int count = MAXCOUNT;
+
+       for (;;) {
+
+               /*
+                * Wait until all interrupts are gone. Wait
+                * for bottom half handlers unless we're
+                * already executing in one..
+                */
+               if (!irqs_running())
+                       if (local_bh_count(cpu) || !spin_is_locked(&global_bh_lock))
+                               break;
+
+               /* Duh, we have to loop. Release the lock to avoid deadlocks */
+               clear_bit(0,&global_irq_lock);
+
+               for (;;) {
+                       if (!--count) {
+                               show("wait_on_irq");
+                               count = ~0;
+                       }
+                       __sti();
+                       SYNC_OTHER_CORES(cpu);
+                       __cli();
+                       if (irqs_running())
+                               continue;
+                       if (global_irq_lock)
+                               continue;
+                       if (!local_bh_count(cpu) && spin_is_locked(&global_bh_lock))
+                               continue;
+                       if (!test_and_set_bit(0,&global_irq_lock))
+                               break;
+               }
+       }
+}
+
+/*
+ * This is called when we want to synchronize with
+ * interrupts. We may for example tell a device to
+ * stop sending interrupts: but to make sure there
+ * are no interrupts that are executing on another
+ * CPU we need to call this function.
+ */
+void synchronize_irq(void)
+{
+       if (irqs_running()) {
+               /* Stupid approach */
+               cli();
+               sti();
+       }
+}
+
+static inline void get_irqlock(int cpu)
+{
+#ifdef CONFIG_KDB
+       static int kdb_rate;
+       if (KDB_IS_RUNNING() && kdb_rate++ < 10)
+               kdb_printf("Warning: get_irqlock on cpu %d while kdb is running, may hang\n", smp_processor_id());
+#endif /* CONFIG_KDB */
+       if (test_and_set_bit(0,&global_irq_lock)) {
+               /* do we already hold the lock? */
+               if ((unsigned char) cpu == global_irq_holder)
+                       return;
+               /* Uhhuh.. Somebody else got it. Wait.. */
+               do {
+                       do {
+                               rep_nop();
+                       } while (test_bit(0,&global_irq_lock));
+               } while (test_and_set_bit(0,&global_irq_lock));         
+       }
+       /* 
+        * We also to make sure that nobody else is running
+        * in an interrupt context. 
+        */
+       wait_on_irq(cpu);
+
+       /*
+        * Ok, finally..
+        */
+       global_irq_holder = cpu;
+}
+
+void __global_cli(void)
+{
+    panic("__global_cli");
+}
+
+void __global_sti(void)
+{
+    panic("__global_sti");
+}
+
+/*
+ * SMP flags value to restore to:
+ * 0 - global cli
+ * 1 - global sti
+ * 2 - local cli
+ * 3 - local sti
+ */
+unsigned long __global_save_flags(void)
+{
+    panic("__global_save_flags");
+}
+
+void __global_restore_flags(unsigned long flags)
+{
+    panic("__global_restore_flags");
+}
+
+#endif
+
+/*
+ * This should really return information about whether
+ * we should do bottom half handling etc. Right now we
+ * end up _always_ checking the bottom half, which is a
+ * waste of time and is not what some drivers would
+ * prefer.
+ */
+int handle_IRQ_event(unsigned int irq, struct pt_regs * regs, struct irqaction * action)
+{
+       int status;
+       int cpu = smp_processor_id();
+
+       irq_enter(cpu, irq);
+
+       status = 1;     /* Force the "do bottom halves" bit */
+
+       if (!(action->flags & SA_INTERRUPT))
+               __sti();
+
+       do {
+               status |= action->flags;
+               action->handler(irq, action->dev_id, regs);
+               action = action->next;
+       } while (action);
+       if (status & SA_SAMPLE_RANDOM)
+               add_interrupt_randomness(irq);
+       __cli();
+
+       irq_exit(cpu, irq);
+
+       return status;
+}
+
+/*
+ * Generic enable/disable code: this just calls
+ * down into the PIC-specific version for the actual
+ * hardware disable after having gotten the irq
+ * controller lock. 
+ */
+/**
+ *     disable_irq_nosync - disable an irq without waiting
+ *     @irq: Interrupt to disable
+ *
+ *     Disable the selected interrupt line.  Disables and Enables are
+ *     nested.
+ *     Unlike disable_irq(), this function does not ensure existing
+ *     instances of the IRQ handler have completed before returning.
+ *
+ *     This function may be called from IRQ context.
+ */
+inline void disable_irq_nosync(unsigned int irq)
+{
+       irq_desc_t *desc = irq_desc + irq;
+       unsigned long flags;
+
+       spin_lock_irqsave(&desc->lock, flags);
+       if (!desc->depth++) {
+               desc->status |= IRQ_DISABLED;
+               desc->handler->disable(irq);
+       }
+       spin_unlock_irqrestore(&desc->lock, flags);
+}
+
+/**
+ *     disable_irq - disable an irq and wait for completion
+ *     @irq: Interrupt to disable
+ *
+ *     Disable the selected interrupt line.  Enables and Disables are
+ *     nested.
+ *     This function waits for any pending IRQ handlers for this interrupt
+ *     to complete before returning. If you use this function while
+ *     holding a resource the IRQ handler may need you will deadlock.
+ *
+ *     This function may be called - with care - from IRQ context.
+ */
+void disable_irq(unsigned int irq)
+{
+       disable_irq_nosync(irq);
+
+       if (!local_irq_count(smp_processor_id())) {
+               do {
+                       barrier();
+                       cpu_relax();
+               } while (irq_desc[irq].status & IRQ_INPROGRESS);
+       }
+}
+
+/**
+ *     enable_irq - enable handling of an irq
+ *     @irq: Interrupt to enable
+ *
+ *     Undoes the effect of one call to disable_irq().  If this
+ *     matches the last disable, processing of interrupts on this
+ *     IRQ line is re-enabled.
+ *
+ *     This function may be called from IRQ context.
+ */
+void enable_irq(unsigned int irq)
+{
+       irq_desc_t *desc = irq_desc + irq;
+       unsigned long flags;
+
+       spin_lock_irqsave(&desc->lock, flags);
+       switch (desc->depth) {
+       case 1: {
+               unsigned int status = desc->status & ~IRQ_DISABLED;
+               desc->status = status;
+               if ((status & (IRQ_PENDING | IRQ_REPLAY)) == IRQ_PENDING) {
+                       desc->status = status | IRQ_REPLAY;
+                       hw_resend_irq(desc->handler,irq);
+               }
+               desc->handler->enable(irq);
+               /* fall-through */
+       }
+       default:
+               desc->depth--;
+               break;
+       case 0:
+               printk("enable_irq(%u) unbalanced from %p\n", irq,
+                      __builtin_return_address(0));
+       }
+       spin_unlock_irqrestore(&desc->lock, flags);
+}
+
+/*
+ * do_IRQ handles all normal device IRQ's (the special
+ * SMP cross-CPU interrupts have their own specific
+ * handlers).
+ */
+asmlinkage unsigned int do_IRQ(int irq, struct pt_regs *regs)
+{      
+       /* 
+        * We ack quickly, we don't want the irq controller
+        * thinking we're snobs just because some other CPU has
+        * disabled global interrupts (we have already done the
+        * INT_ACK cycles, it's too late to try to pretend to the
+        * controller that we aren't taking the interrupt).
+        *
+        * 0 return value means that this irq is already being
+        * handled by some other CPU. (or is disabled)
+        */
+       int cpu = smp_processor_id();
+       irq_desc_t *desc = irq_desc + irq;
+       struct irqaction * action;
+       unsigned int status;
+
+       kstat.irqs[cpu][irq]++;
+       spin_lock(&desc->lock);
+       desc->handler->ack(irq);
+       /*
+          REPLAY is when Linux resends an IRQ that was dropped earlier
+          WAITING is used by probe to mark irqs that are being tested
+          */
+       status = desc->status & ~(IRQ_REPLAY | IRQ_WAITING);
+       status |= IRQ_PENDING; /* we _want_ to handle it */
+
+       /*
+        * If the IRQ is disabled for whatever reason, we cannot
+        * use the action we have.
+        */
+       action = NULL;
+       if (!(status & (IRQ_DISABLED | IRQ_INPROGRESS))) {
+               action = desc->action;
+               status &= ~IRQ_PENDING; /* we commit to handling */
+               status |= IRQ_INPROGRESS; /* we are handling it */
+       }
+       desc->status = status;
+
+       /*
+        * If there is no IRQ handler or it was disabled, exit early.
+          Since we set PENDING, if another processor is handling
+          a different instance of this same irq, the other processor
+          will take care of it.
+        */
+       if (!action)
+               goto out;
+
+       /*
+        * Edge triggered interrupts need to remember
+        * pending events.
+        * This applies to any hw interrupts that allow a second
+        * instance of the same irq to arrive while we are in do_IRQ
+        * or in the handler. But the code here only handles the _second_
+        * instance of the irq, not the third or fourth. So it is mostly
+        * useful for irq hardware that does not mask cleanly in an
+        * SMP environment.
+        */
+       for (;;) {
+               spin_unlock(&desc->lock);
+               handle_IRQ_event(irq, regs, action);
+               spin_lock(&desc->lock);
+               
+               if (!(desc->status & IRQ_PENDING))
+                       break;
+               desc->status &= ~IRQ_PENDING;
+       }
+       desc->status &= ~IRQ_INPROGRESS;
+out:
+       /*
+        * The ->end() handler has to deal with interrupts which got
+        * disabled while the handler was running.
+        */
+       desc->handler->end(irq);
+       spin_unlock(&desc->lock);
+
+       if (softirq_pending(cpu))
+               do_softirq();
+
+       return 1;
+}
+
+/**
+ *     request_irq - allocate an interrupt line
+ *     @irq: Interrupt line to allocate
+ *     @handler: Function to be called when the IRQ occurs
+ *     @irqflags: Interrupt type flags
+ *     @devname: An ascii name for the claiming device
+ *     @dev_id: A cookie passed back to the handler function
+ *
+ *     This call allocates interrupt resources and enables the
+ *     interrupt line and IRQ handling. From the point this
+ *     call is made your handler function may be invoked. Since
+ *     your handler function must clear any interrupt the board 
+ *     raises, you must take care both to initialise your hardware
+ *     and to set up the interrupt handler in the right order.
+ *
+ *     Dev_id must be globally unique. Normally the address of the
+ *     device data structure is used as the cookie. Since the handler
+ *     receives this value it makes sense to use it.
+ *
+ *     If your interrupt is shared you must pass a non NULL dev_id
+ *     as this is required when freeing the interrupt.
+ *
+ *     Flags:
+ *
+ *     SA_SHIRQ                Interrupt is shared
+ *
+ *     SA_INTERRUPT            Disable local interrupts while processing
+ *
+ *     SA_SAMPLE_RANDOM        The interrupt can be used for entropy
+ *
+ */
+int request_irq(unsigned int irq, 
+               void (*handler)(int, void *, struct pt_regs *),
+               unsigned long irqflags, 
+               const char * devname,
+               void *dev_id)
+{
+       int retval;
+       struct irqaction * action;
+
+#if 1
+       /*
+        * Sanity-check: shared interrupts should REALLY pass in
+        * a real dev-ID, otherwise we'll have trouble later trying
+        * to figure out which interrupt is which (messes up the
+        * interrupt freeing logic etc).
+        */
+       if (irqflags & SA_SHIRQ) {
+               if (!dev_id)
+                       printk("Bad boy: %s (at 0x%x) called us without a dev_id!\n", devname, (&irq)[-1]);
+       }
+#endif
+
+       if (irq >= NR_IRQS)
+               return -EINVAL;
+       if (!handler)
+               return -EINVAL;
+
+       action = (struct irqaction *)
+                       kmalloc(sizeof(struct irqaction), GFP_KERNEL);
+       if (!action)
+               return -ENOMEM;
+
+       action->handler = handler;
+       action->flags = irqflags;
+       action->mask = 0;
+       action->name = devname;
+       action->next = NULL;
+       action->dev_id = dev_id;
+
+       retval = setup_irq(irq, action);
+       if (retval)
+               kfree(action);
+       return retval;
+}
+
+/**
+ *     free_irq - free an interrupt
+ *     @irq: Interrupt line to free
+ *     @dev_id: Device identity to free
+ *
+ *     Remove an interrupt handler. The handler is removed and if the
+ *     interrupt line is no longer in use by any driver it is disabled.
+ *     On a shared IRQ the caller must ensure the interrupt is disabled
+ *     on the card it drives before calling this function. The function
+ *     does not return until any executing interrupts for this IRQ
+ *     have completed.
+ *
+ *     This function may be called from interrupt context. 
+ *
+ *     Bugs: Attempting to free an irq in a handler for the same irq hangs
+ *           the machine.
+ */
+void free_irq(unsigned int irq, void *dev_id)
+{
+       irq_desc_t *desc;
+       struct irqaction **p;
+       unsigned long flags;
+
+       if (irq >= NR_IRQS)
+               return;
+
+       desc = irq_desc + irq;
+       spin_lock_irqsave(&desc->lock,flags);
+       p = &desc->action;
+       for (;;) {
+               struct irqaction * action = *p;
+               if (action) {
+                       struct irqaction **pp = p;
+                       p = &action->next;
+                       if (action->dev_id != dev_id)
+                               continue;
+
+                       /* Found it - now remove it from the list of entries */
+                       *pp = action->next;
+                       if (!desc->action) {
+                               desc->status |= IRQ_DISABLED;
+                               desc->handler->shutdown(irq);
+                       }
+                       spin_unlock_irqrestore(&desc->lock,flags);
+
+#ifdef CONFIG_SMP
+                       /* Wait to make sure it's not being used on another CPU */
+                       while (desc->status & IRQ_INPROGRESS) {
+                               barrier();
+                               cpu_relax();
+                       }
+#endif
+                       kfree(action);
+                       return;
+               }
+               printk("Trying to free free IRQ%d\n",irq);
+               spin_unlock_irqrestore(&desc->lock,flags);
+               return;
+       }
+}
+
+/*
+ * IRQ autodetection code..
+ *
+ * This depends on the fact that any interrupt that
+ * comes in on to an unassigned handler will get stuck
+ * with "IRQ_WAITING" cleared and the interrupt
+ * disabled.
+ */
+
+static DECLARE_MUTEX(probe_sem);
+
+/**
+ *     probe_irq_on    - begin an interrupt autodetect
+ *
+ *     Commence probing for an interrupt. The interrupts are scanned
+ *     and a mask of potential interrupt lines is returned.
+ *
+ */
+unsigned long probe_irq_on(void)
+{
+       unsigned int i;
+       irq_desc_t *desc;
+       unsigned long val;
+       unsigned long delay;
+
+       down(&probe_sem);
+       /* 
+        * something may have generated an irq long ago and we want to
+        * flush such a longstanding irq before considering it as spurious. 
+        */
+       for (i = NR_IRQS-1; i > 0; i--)  {
+               desc = irq_desc + i;
+
+               spin_lock_irq(&desc->lock);
+               if (!irq_desc[i].action) 
+                       irq_desc[i].handler->startup(i);
+               spin_unlock_irq(&desc->lock);
+       }
+
+       /* Wait for longstanding interrupts to trigger. */
+       for (delay = jiffies + HZ/50; time_after(delay, jiffies); )
+               /* about 20ms delay */ synchronize_irq();
+
+       /*
+        * enable any unassigned irqs
+        * (we must startup again here because if a longstanding irq
+        * happened in the previous stage, it may have masked itself)
+        */
+       for (i = NR_IRQS-1; i > 0; i--) {
+               desc = irq_desc + i;
+
+               spin_lock_irq(&desc->lock);
+               if (!desc->action) {
+                       desc->status |= IRQ_AUTODETECT | IRQ_WAITING;
+                       if (desc->handler->startup(i))
+                               desc->status |= IRQ_PENDING;
+               }
+               spin_unlock_irq(&desc->lock);
+       }
+
+       /*
+        * Wait for spurious interrupts to trigger
+        */
+       for (delay = jiffies + HZ/10; time_after(delay, jiffies); )
+               /* about 100ms delay */ synchronize_irq();
+
+       /*
+        * Now filter out any obviously spurious interrupts
+        */
+       val = 0;
+       for (i = 0; i < NR_IRQS; i++) {
+               irq_desc_t *desc = irq_desc + i;
+               unsigned int status;
+
+               spin_lock_irq(&desc->lock);
+               status = desc->status;
+
+               if (status & IRQ_AUTODETECT) {
+                       /* It triggered already - consider it spurious. */
+                       if (!(status & IRQ_WAITING)) {
+                               desc->status = status & ~IRQ_AUTODETECT;
+                               desc->handler->shutdown(i);
+                       } else
+                               if (i < 32)
+                                       val |= 1 << i;
+               }
+               spin_unlock_irq(&desc->lock);
+       }
+
+       return val;
+}
+
+/*
+ * Return a mask of triggered interrupts (this
+ * can handle only legacy ISA interrupts).
+ */
+/**
+ *     probe_irq_mask - scan a bitmap of interrupt lines
+ *     @val:   mask of interrupts to consider
+ *
+ *     Scan the ISA bus interrupt lines and return a bitmap of
+ *     active interrupts. The interrupt probe logic state is then
+ *     returned to its previous value.
+ *
+ *     Note: we need to scan all the irq's even though we will
+ *     only return ISA irq numbers - just so that we reset them
+ *     all to a known state.
+ */
+unsigned int probe_irq_mask(unsigned long val)
+{
+       int i;
+       unsigned int mask;
+
+       mask = 0;
+       for (i = 0; i < NR_IRQS; i++) {
+               irq_desc_t *desc = irq_desc + i;
+               unsigned int status;
+
+               spin_lock_irq(&desc->lock);
+               status = desc->status;
+
+               if (status & IRQ_AUTODETECT) {
+                       if (i < 16 && !(status & IRQ_WAITING))
+                               mask |= 1 << i;
+
+                       desc->status = status & ~IRQ_AUTODETECT;
+                       desc->handler->shutdown(i);
+               }
+               spin_unlock_irq(&desc->lock);
+       }
+       up(&probe_sem);
+
+       return mask & val;
+}
+
+/*
+ * Return the one interrupt that triggered (this can
+ * handle any interrupt source).
+ */
+
+/**
+ *     probe_irq_off   - end an interrupt autodetect
+ *     @val: mask of potential interrupts (unused)
+ *
+ *     Scans the unused interrupt lines and returns the line which
+ *     appears to have triggered the interrupt. If no interrupt was
+ *     found then zero is returned. If more than one interrupt is
+ *     found then minus the first candidate is returned to indicate
+ *     their is doubt.
+ *
+ *     The interrupt probe logic state is returned to its previous
+ *     value.
+ *
+ *     BUGS: When used in a module (which arguably shouldnt happen)
+ *     nothing prevents two IRQ probe callers from overlapping. The
+ *     results of this are non-optimal.
+ */
+int probe_irq_off(unsigned long val)
+{
+       int i, irq_found, nr_irqs;
+
+       nr_irqs = 0;
+       irq_found = 0;
+       for (i = 0; i < NR_IRQS; i++) {
+               irq_desc_t *desc = irq_desc + i;
+               unsigned int status;
+
+               spin_lock_irq(&desc->lock);
+               status = desc->status;
+
+               if (status & IRQ_AUTODETECT) {
+                       if (!(status & IRQ_WAITING)) {
+                               if (!nr_irqs)
+                                       irq_found = i;
+                               nr_irqs++;
+                       }
+                       desc->status = status & ~IRQ_AUTODETECT;
+                       desc->handler->shutdown(i);
+               }
+               spin_unlock_irq(&desc->lock);
+       }
+       up(&probe_sem);
+
+       if (nr_irqs > 1)
+               irq_found = -irq_found;
+       return irq_found;
+}
+
+/* this was setup_x86_irq but it seems pretty generic */
+int setup_irq(unsigned int irq, struct irqaction * new)
+{
+       int shared = 0;
+       unsigned long flags;
+       struct irqaction *old, **p;
+       irq_desc_t *desc = irq_desc + irq;
+
+       /*
+        * Some drivers like serial.c use request_irq() heavily,
+        * so we have to be careful not to interfere with a
+        * running system.
+        */
+       if (new->flags & SA_SAMPLE_RANDOM) {
+               /*
+                * This function might sleep, we want to call it first,
+                * outside of the atomic block.
+                * Yes, this might clear the entropy pool if the wrong
+                * driver is attempted to be loaded, without actually
+                * installing a new handler, but is this really a problem,
+                * only the sysadmin is able to do this.
+                */
+               rand_initialize_irq(irq);
+       }
+
+       /*
+        * The following block of code has to be executed atomically
+        */
+       spin_lock_irqsave(&desc->lock,flags);
+       p = &desc->action;
+       if ((old = *p) != NULL) {
+               /* Can't share interrupts unless both agree to */
+               if (!(old->flags & new->flags & SA_SHIRQ)) {
+                       spin_unlock_irqrestore(&desc->lock,flags);
+                       return -EBUSY;
+               }
+
+               /* add new interrupt at end of irq queue */
+               do {
+                       p = &old->next;
+                       old = *p;
+               } while (old);
+               shared = 1;
+       }
+
+       *p = new;
+
+       if (!shared) {
+               desc->depth = 0;
+               desc->status &= ~(IRQ_DISABLED | IRQ_AUTODETECT | IRQ_WAITING);
+               desc->handler->startup(irq);
+       }
+       spin_unlock_irqrestore(&desc->lock,flags);
+
+       register_irq_proc(irq);
+       return 0;
+}
+
+static struct proc_dir_entry * root_irq_dir;
+static struct proc_dir_entry * irq_dir [NR_IRQS];
+
+#define HEX_DIGITS 8
+
+static unsigned int parse_hex_value (const char *buffer,
+               unsigned long count, unsigned long *ret)
+{
+       unsigned char hexnum [HEX_DIGITS];
+       unsigned long value;
+       int i;
+
+       if (!count)
+               return -EINVAL;
+       if (count > HEX_DIGITS)
+               count = HEX_DIGITS;
+       if (copy_from_user(hexnum, buffer, count))
+               return -EFAULT;
+
+       /*
+        * Parse the first 8 characters as a hex string, any non-hex char
+        * is end-of-string. '00e1', 'e1', '00E1', 'E1' are all the same.
+        */
+       value = 0;
+
+       for (i = 0; i < count; i++) {
+               unsigned int c = hexnum[i];
+
+               switch (c) {
+                       case '0' ... '9': c -= '0'; break;
+                       case 'a' ... 'f': c -= 'a'-10; break;
+                       case 'A' ... 'F': c -= 'A'-10; break;
+               default:
+                       goto out;
+               }
+               value = (value << 4) | c;
+       }
+out:
+       *ret = value;
+       return 0;
+}
+
+#if CONFIG_SMP
+
+static struct proc_dir_entry * smp_affinity_entry [NR_IRQS];
+
+static unsigned long irq_affinity [NR_IRQS] = { [0 ... NR_IRQS-1] = ~0UL };
+static int irq_affinity_read_proc (char *page, char **start, off_t off,
+                       int count, int *eof, void *data)
+{
+       if (count < HEX_DIGITS+1)
+               return -EINVAL;
+       return sprintf (page, "%08lx\n", irq_affinity[(long)data]);
+}
+
+static int irq_affinity_write_proc (struct file *file, const char *buffer,
+                                       unsigned long count, void *data)
+{
+       int irq = (long) data, full_count = count, err;
+       unsigned long new_value;
+
+       if (!irq_desc[irq].handler->set_affinity)
+               return -EIO;
+
+       err = parse_hex_value(buffer, count, &new_value);
+
+       /*
+        * Do not allow disabling IRQs completely - it's a too easy
+        * way to make the system unusable accidentally :-) At least
+        * one online CPU still has to be targeted.
+        */
+       if (!(new_value & cpu_online_map))
+               return -EINVAL;
+
+       irq_affinity[irq] = new_value;
+       irq_desc[irq].handler->set_affinity(irq, new_value);
+
+       return full_count;
+}
+
+#endif
+
+static int prof_cpu_mask_read_proc (char *page, char **start, off_t off,
+                       int count, int *eof, void *data)
+{
+       unsigned long *mask = (unsigned long *) data;
+       if (count < HEX_DIGITS+1)
+               return -EINVAL;
+       return sprintf (page, "%08lx\n", *mask);
+}
+
+static int prof_cpu_mask_write_proc (struct file *file, const char *buffer,
+                                       unsigned long count, void *data)
+{
+       unsigned long *mask = (unsigned long *) data, full_count = count, err;
+       unsigned long new_value;
+
+       err = parse_hex_value(buffer, count, &new_value);
+       if (err)
+               return err;
+
+       *mask = new_value;
+       return full_count;
+}
+
+#define MAX_NAMELEN 10
+
+static void register_irq_proc (unsigned int irq)
+{
+       char name [MAX_NAMELEN];
+
+       if (!root_irq_dir || (irq_desc[irq].handler == &no_irq_type) ||
+                       irq_dir[irq])
+               return;
+
+       memset(name, 0, MAX_NAMELEN);
+       sprintf(name, "%d", irq);
+
+       /* create /proc/irq/1234 */
+       irq_dir[irq] = proc_mkdir(name, root_irq_dir);
+
+#if CONFIG_SMP
+       {
+               struct proc_dir_entry *entry;
+
+               /* create /proc/irq/1234/smp_affinity */
+               entry = create_proc_entry("smp_affinity", 0600, irq_dir[irq]);
+
+               if (entry) {
+                       entry->nlink = 1;
+                       entry->data = (void *)(long)irq;
+                       entry->read_proc = irq_affinity_read_proc;
+                       entry->write_proc = irq_affinity_write_proc;
+               }
+
+               smp_affinity_entry[irq] = entry;
+       }
+#endif
+}
+
+unsigned long prof_cpu_mask = -1;
+
+void init_irq_proc (void)
+{
+       struct proc_dir_entry *entry;
+       int i;
+
+       /* create /proc/irq */
+       root_irq_dir = proc_mkdir("irq", 0);
+
+       /* create /proc/irq/prof_cpu_mask */
+       entry = create_proc_entry("prof_cpu_mask", 0600, root_irq_dir);
+
+       if (!entry)
+           return;
+
+       entry->nlink = 1;
+       entry->data = (void *)&prof_cpu_mask;
+       entry->read_proc = prof_cpu_mask_read_proc;
+       entry->write_proc = prof_cpu_mask_write_proc;
+
+       /*
+        * Create entries for all existing IRQs.
+        */
+       for (i = 0; i < NR_IRQS; i++)
+               register_irq_proc(i);
+}
+
diff --git a/xenolinux-2.4.16-sparse/arch/xeno/kernel/ldt.c b/xenolinux-2.4.16-sparse/arch/xeno/kernel/ldt.c
new file mode 100644 (file)
index 0000000..6c93943
--- /dev/null
@@ -0,0 +1,26 @@
+/*
+ * linux/kernel/ldt.c
+ *
+ * Copyright (C) 1992 Krishna Balasubramanian and Linus Torvalds
+ * Copyright (C) 1999 Ingo Molnar <mingo@redhat.com>
+ */
+
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+
+/*
+ * XXX KAF (28/7/02): This stuff is only used for DOS emulation, and is
+ * the default way of finding current TCB in linuxthreads. Supporting
+ * table update svia the hypervisor is feasible, but a hassle: for now,
+ * recompiling linuxthreads is the most sensible option.
+ * 
+ * Oh, this may become an issue depending on what JVM we use for
+ * running the xeno-daemon.
+ */
+
+asmlinkage int sys_modify_ldt(int func, void *ptr, unsigned long bytecount)
+{
+    return -ENOSYS;
+}
diff --git a/xenolinux-2.4.16-sparse/arch/xeno/kernel/process.c b/xenolinux-2.4.16-sparse/arch/xeno/kernel/process.c
new file mode 100644 (file)
index 0000000..e4c236b
--- /dev/null
@@ -0,0 +1,462 @@
+/*
+ *  linux/arch/i386/kernel/process.c
+ *
+ *  Copyright (C) 1995  Linus Torvalds
+ *
+ *  Pentium III FXSR, SSE support
+ *     Gareth Hughes <gareth@valinux.com>, May 2000
+ */
+
+/*
+ * This file handles the architecture-dependent parts of process handling..
+ */
+
+#define __KERNEL_SYSCALLS__
+#include <stdarg.h>
+
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/stddef.h>
+#include <linux/unistd.h>
+#include <linux/ptrace.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/user.h>
+#include <linux/a.out.h>
+#include <linux/interrupt.h>
+#include <linux/config.h>
+#include <linux/delay.h>
+#include <linux/reboot.h>
+#include <linux/init.h>
+#include <linux/mc146818rtc.h>
+#include <linux/kdb.h>
+
+#include <asm/uaccess.h>
+#include <asm/pgtable.h>
+#include <asm/system.h>
+#include <asm/io.h>
+#include <asm/ldt.h>
+#include <asm/processor.h>
+#include <asm/i387.h>
+#include <asm/desc.h>
+#include <asm/mmu_context.h>
+
+#include <linux/irq.h>
+
+asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
+
+int hlt_counter;
+
+/*
+ * Powermanagement idle function, if any..
+ */
+void (*pm_idle)(void);
+
+/*
+ * Power off function, if any
+ */
+void (*pm_power_off)(void);
+
+void disable_hlt(void)
+{
+    hlt_counter++;
+}
+
+void enable_hlt(void)
+{
+    hlt_counter--;
+}
+
+/*
+ * The idle thread. There's no useful work to be
+ * done, so just try to conserve power and have a
+ * low exit latency (ie sit in a loop waiting for
+ * somebody to say that they'd like to reschedule)
+ */
+void cpu_idle (void)
+{
+    /* endless idle loop with no priority at all */
+    init_idle();
+    current->nice = 20;
+    current->counter = -100;
+
+    while (1) {
+        while (!current->need_resched)
+            HYPERVISOR_yield();
+        schedule();
+        check_pgt_cache();
+    }
+}
+
+void machine_restart(char * __unused)
+{
+    HYPERVISOR_exit();
+}
+
+void machine_halt(void)
+{
+    HYPERVISOR_exit();
+}
+
+void machine_power_off(void)
+{
+    HYPERVISOR_exit();
+}
+
+extern void show_trace(unsigned long* esp);
+
+void show_regs(struct pt_regs * regs)
+{
+    printk("\n");
+    printk("Pid: %d, comm: %20s\n", current->pid, current->comm);
+    printk("EIP: %04x:[<%08lx>] CPU: %d",0xffff & regs->xcs,regs->eip, smp_processor_id());
+    if (regs->xcs & 2)
+        printk(" ESP: %04x:%08lx",0xffff & regs->xss,regs->esp);
+    printk(" EFLAGS: %08lx    %s\n",regs->eflags, print_tainted());
+    printk("EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n",
+           regs->eax,regs->ebx,regs->ecx,regs->edx);
+    printk("ESI: %08lx EDI: %08lx EBP: %08lx",
+           regs->esi, regs->edi, regs->ebp);
+    printk(" DS: %04x ES: %04x\n",
+           0xffff & regs->xds,0xffff & regs->xes);
+
+    show_trace(&regs->esp);
+}
+
+/*
+ * No need to lock the MM as we are the last user
+ */
+void release_segments(struct mm_struct *mm)
+{
+    void * ldt = mm->context.segments;
+
+    /*
+     * free the LDT
+     */
+    if (ldt) {
+        mm->context.segments = NULL;
+        clear_LDT();
+        vfree(ldt);
+    }
+}
+
+/*
+ * Create a kernel thread
+ */
+int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
+{
+    long retval, d0;
+
+    __asm__ __volatile__(
+        "movl %%esp,%%esi\n\t"
+        "int $0x80\n\t"                /* Linux/i386 system call */
+        "cmpl %%esp,%%esi\n\t" /* child or parent? */
+        "je 1f\n\t"            /* parent - jump */
+        /* Load the argument into eax, and push it.  That way, it does
+         * not matter whether the called function is compiled with
+         * -mregparm or not.  */
+        "movl %4,%%eax\n\t"
+        "pushl %%eax\n\t"              
+        "call *%5\n\t"         /* call fn */
+        "movl %3,%0\n\t"       /* exit */
+        "int $0x80\n"
+        "1:\t"
+        :"=&a" (retval), "=&S" (d0)
+        :"0" (__NR_clone), "i" (__NR_exit),
+        "r" (arg), "r" (fn),
+        "b" (flags | CLONE_VM)
+        : "memory");
+
+    return retval;
+}
+
+/*
+ * Free current thread data structures etc..
+ */
+void exit_thread(void)
+{
+    /* nothing to do ... */
+}
+
+void flush_thread(void)
+{
+    struct task_struct *tsk = current;
+
+    memset(tsk->thread.debugreg, 0, sizeof(unsigned long)*8);
+    /*
+     * Forget coprocessor state..
+     */
+    clear_fpu(tsk);
+    tsk->used_math = 0;
+}
+
+void release_thread(struct task_struct *dead_task)
+{
+    if (dead_task->mm) {
+        void * ldt = dead_task->mm->context.segments;
+
+        // temporary debugging check
+        if (ldt) {
+            printk("WARNING: dead process %8s still has LDT? <%p>\n",
+                   dead_task->comm, ldt);
+            BUG();
+        }
+    }
+}
+
+/*
+ * we do not have to muck with descriptors here, that is
+ * done in switch_mm() as needed.
+ */
+void copy_segments(struct task_struct *p, struct mm_struct *new_mm)
+{
+    struct mm_struct * old_mm;
+    void *old_ldt, *ldt;
+
+    ldt = NULL;
+    old_mm = current->mm;
+    if (old_mm && (old_ldt = old_mm->context.segments) != NULL) {
+        /*
+         * Completely new LDT, we initialize it from the parent:
+         */
+        ldt = vmalloc(LDT_ENTRIES*LDT_ENTRY_SIZE);
+        if (!ldt)
+            printk(KERN_WARNING "ldt allocation failed\n");
+        else
+            memcpy(ldt, old_ldt, LDT_ENTRIES*LDT_ENTRY_SIZE);
+    }
+    new_mm->context.segments = ldt;
+    new_mm->context.cpuvalid = ~0UL;   /* valid on all CPU's - they can't have stale data */
+}
+
+/*
+ * Save a segment.
+ */
+#define savesegment(seg,value) \
+       asm volatile("movl %%" #seg ",%0":"=m" (*(int *)&(value)))
+
+int copy_thread(int nr, unsigned long clone_flags, unsigned long esp,
+                unsigned long unused,
+                struct task_struct * p, struct pt_regs * regs)
+{
+    struct pt_regs * childregs;
+
+    childregs = ((struct pt_regs *) (THREAD_SIZE + (unsigned long) p)) - 1;
+    struct_cpy(childregs, regs);
+    childregs->eax = 0;
+    childregs->esp = esp;
+
+    p->thread.esp = (unsigned long) childregs;
+    p->thread.esp0 = (unsigned long) (childregs+1);
+
+    p->thread.eip = (unsigned long) ret_from_fork;
+
+    savesegment(fs,p->thread.fs);
+    savesegment(gs,p->thread.gs);
+
+    unlazy_fpu(current);
+    struct_cpy(&p->thread.i387, &current->thread.i387);
+
+    return 0;
+}
+
+/*
+ * fill in the user structure for a core dump..
+ */
+void dump_thread(struct pt_regs * regs, struct user * dump)
+{
+    int i;
+
+/* changed the size calculations - should hopefully work better. lbt */
+    dump->magic = CMAGIC;
+    dump->start_code = 0;
+    dump->start_stack = regs->esp & ~(PAGE_SIZE - 1);
+    dump->u_tsize = ((unsigned long) current->mm->end_code) >> PAGE_SHIFT;
+    dump->u_dsize = ((unsigned long) (current->mm->brk + (PAGE_SIZE-1))) >> PAGE_SHIFT;
+    dump->u_dsize -= dump->u_tsize;
+    dump->u_ssize = 0;
+    for (i = 0; i < 8; i++)
+        dump->u_debugreg[i] = current->thread.debugreg[i];  
+
+    if (dump->start_stack < TASK_SIZE)
+        dump->u_ssize = ((unsigned long) (TASK_SIZE - dump->start_stack)) >> PAGE_SHIFT;
+
+    dump->regs.ebx = regs->ebx;
+    dump->regs.ecx = regs->ecx;
+    dump->regs.edx = regs->edx;
+    dump->regs.esi = regs->esi;
+    dump->regs.edi = regs->edi;
+    dump->regs.ebp = regs->ebp;
+    dump->regs.eax = regs->eax;
+    dump->regs.ds = regs->xds;
+    dump->regs.es = regs->xes;
+    savesegment(fs,dump->regs.fs);
+    savesegment(gs,dump->regs.gs);
+    dump->regs.orig_eax = regs->orig_eax;
+    dump->regs.eip = regs->eip;
+    dump->regs.cs = regs->xcs;
+    dump->regs.eflags = regs->eflags;
+    dump->regs.esp = regs->esp;
+    dump->regs.ss = regs->xss;
+
+    dump->u_fpvalid = dump_fpu (regs, &dump->i387);
+}
+
+/*
+ * This special macro can be used to load a debugging register
+ */
+#define loaddebug(thread,register) \
+               __asm__("movl %0,%%db" #register  \
+                       : /* no output */ \
+                       :"r" (thread->debugreg[register]))
+
+/*
+ *     switch_to(x,yn) should switch tasks from x to y.
+ *
+ * We fsave/fwait so that an exception goes off at the right time
+ * (as a call from the fsave or fwait in effect) rather than to
+ * the wrong process. Lazy FP saving no longer makes any sense
+ * with modern CPU's, and this simplifies a lot of things (SMP
+ * and UP become the same).
+ *
+ * NOTE! We used to use the x86 hardware context switching. The
+ * reason for not using it any more becomes apparent when you
+ * try to recover gracefully from saved state that is no longer
+ * valid (stale segment register values in particular). With the
+ * hardware task-switch, there is no way to fix up bad state in
+ * a reasonable manner.
+ *
+ * The fact that Intel documents the hardware task-switching to
+ * be slow is a fairly red herring - this code is not noticeably
+ * faster. However, there _is_ some room for improvement here,
+ * so the performance issues may eventually be a valid point.
+ * More important, however, is the fact that this allows us much
+ * more flexibility.
+ */
+void __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
+{
+    struct thread_struct *prev = &prev_p->thread,
+        *next = &next_p->thread;
+
+    unlazy_fpu(prev_p);
+
+    HYPERVISOR_set_guest_stack(__KERNEL_DS, next->esp0);
+
+    /*
+     * Save away %fs and %gs. No need to save %es and %ds, as
+     * those are always kernel segments while inside the kernel.
+     */
+    asm volatile("movl %%fs,%0":"=m" (*(int *)&prev->fs));
+    asm volatile("movl %%gs,%0":"=m" (*(int *)&prev->gs));
+
+    /*
+     * Restore %fs and %gs.
+     */
+    loadsegment(fs, next->fs);
+    loadsegment(gs, next->gs);
+
+#if 0
+    /*
+     * Now maybe reload the debug registers
+     */
+    if (next->debugreg[7]){
+        loaddebug(next, 0);
+        loaddebug(next, 1);
+        loaddebug(next, 2);
+        loaddebug(next, 3);
+        /* no 4 and 5 */
+        loaddebug(next, 6);
+        loaddebug(next, 7);
+    }
+#endif
+}
+
+asmlinkage int sys_fork(struct pt_regs regs)
+{
+    return do_fork(SIGCHLD, regs.esp, &regs, 0);
+}
+
+asmlinkage int sys_clone(struct pt_regs regs)
+{
+    unsigned long clone_flags;
+    unsigned long newsp;
+
+    clone_flags = regs.ebx;
+    newsp = regs.ecx;
+    if (!newsp)
+        newsp = regs.esp;
+    return do_fork(clone_flags, newsp, &regs, 0);
+}
+
+/*
+ * This is trivial, and on the face of it looks like it
+ * could equally well be done in user mode.
+ *
+ * Not so, for quite unobvious reasons - register pressure.
+ * In user mode vfork() cannot have a stack frame, and if
+ * done by calling the "clone()" system call directly, you
+ * do not have enough call-clobbered registers to hold all
+ * the information you need.
+ */
+asmlinkage int sys_vfork(struct pt_regs regs)
+{
+    return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs.esp, &regs, 0);
+}
+
+/*
+ * sys_execve() executes a new program.
+ */
+asmlinkage int sys_execve(struct pt_regs regs)
+{
+    int error;
+    char * filename;
+
+    filename = getname((char *) regs.ebx);
+    error = PTR_ERR(filename);
+    if (IS_ERR(filename))
+        goto out;
+    error = do_execve(filename, (char **) regs.ecx, (char **) regs.edx, &regs);
+    if (error == 0)
+        current->ptrace &= ~PT_DTRACE;
+    putname(filename);
+ out:
+    return error;
+}
+
+/*
+ * These bracket the sleeping functions..
+ */
+extern void scheduling_functions_start_here(void);
+extern void scheduling_functions_end_here(void);
+#define first_sched    ((unsigned long) scheduling_functions_start_here)
+#define last_sched     ((unsigned long) scheduling_functions_end_here)
+
+unsigned long get_wchan(struct task_struct *p)
+{
+    unsigned long ebp, esp, eip;
+    unsigned long stack_page;
+    int count = 0;
+    if (!p || p == current || p->state == TASK_RUNNING)
+        return 0;
+    stack_page = (unsigned long)p;
+    esp = p->thread.esp;
+    if (!stack_page || esp < stack_page || esp > 8188+stack_page)
+        return 0;
+    /* include/asm-i386/system.h:switch_to() pushes ebp last. */
+    ebp = *(unsigned long *) esp;
+    do {
+        if (ebp < stack_page || ebp > 8184+stack_page)
+            return 0;
+        eip = *(unsigned long *) (ebp+4);
+        if (eip < first_sched || eip >= last_sched)
+            return eip;
+        ebp = *(unsigned long *) ebp;
+    } while (count++ < 16);
+    return 0;
+}
+#undef last_sched
+#undef first_sched
diff --git a/xenolinux-2.4.16-sparse/arch/xeno/kernel/ptrace.c b/xenolinux-2.4.16-sparse/arch/xeno/kernel/ptrace.c
new file mode 100644 (file)
index 0000000..0fe8689
--- /dev/null
@@ -0,0 +1,463 @@
+/* ptrace.c */
+/* By Ross Biro 1/23/92 */
+/*
+ * Pentium III FXSR, SSE support
+ *     Gareth Hughes <gareth@valinux.com>, May 2000
+ */
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/errno.h>
+#include <linux/ptrace.h>
+#include <linux/user.h>
+
+#include <asm/uaccess.h>
+#include <asm/pgtable.h>
+#include <asm/system.h>
+#include <asm/processor.h>
+#include <asm/i387.h>
+#include <asm/debugreg.h>
+
+/*
+ * does not yet catch signals sent when the child dies.
+ * in exit.c or in signal.c.
+ */
+
+/* determines which flags the user has access to. */
+/* 1 = access 0 = no access */
+#define FLAG_MASK 0x00044dd5
+
+/* set's the trap flag. */
+#define TRAP_FLAG 0x100
+
+/*
+ * Offset of eflags on child stack..
+ */
+#define EFL_OFFSET ((EFL-2)*4-sizeof(struct pt_regs))
+
+/*
+ * this routine will get a word off of the processes privileged stack. 
+ * the offset is how far from the base addr as stored in the TSS.  
+ * this routine assumes that all the privileged stacks are in our
+ * data space.
+ */   
+static inline int get_stack_long(struct task_struct *task, int offset)
+{
+       unsigned char *stack;
+
+       stack = (unsigned char *)task->thread.esp0;
+       stack += offset;
+       return (*((int *)stack));
+}
+
+/*
+ * this routine will put a word on the processes privileged stack. 
+ * the offset is how far from the base addr as stored in the TSS.  
+ * this routine assumes that all the privileged stacks are in our
+ * data space.
+ */
+static inline int put_stack_long(struct task_struct *task, int offset,
+       unsigned long data)
+{
+       unsigned char * stack;
+
+       stack = (unsigned char *) task->thread.esp0;
+       stack += offset;
+       *(unsigned long *) stack = data;
+       return 0;
+}
+
+static int putreg(struct task_struct *child,
+       unsigned long regno, unsigned long value)
+{
+       switch (regno >> 2) {
+               case FS:
+                       if (value && (value & 3) != 3)
+                               return -EIO;
+                       child->thread.fs = value;
+                       return 0;
+               case GS:
+                       if (value && (value & 3) != 3)
+                               return -EIO;
+                       child->thread.gs = value;
+                       return 0;
+               case DS:
+               case ES:
+                       if (value && (value & 3) != 3)
+                               return -EIO;
+                       value &= 0xffff;
+                       break;
+               case SS:
+               case CS:
+                       if ((value & 3) != 3)
+                               return -EIO;
+                       value &= 0xffff;
+                       break;
+               case EFL:
+                       value &= FLAG_MASK;
+                       value |= get_stack_long(child, EFL_OFFSET) & ~FLAG_MASK;
+                       break;
+       }
+       if (regno > GS*4)
+               regno -= 2*4;
+       put_stack_long(child, regno - sizeof(struct pt_regs), value);
+       return 0;
+}
+
+static unsigned long getreg(struct task_struct *child,
+       unsigned long regno)
+{
+       unsigned long retval = ~0UL;
+
+       switch (regno >> 2) {
+               case FS:
+                       retval = child->thread.fs;
+                       break;
+               case GS:
+                       retval = child->thread.gs;
+                       break;
+               case DS:
+               case ES:
+               case SS:
+               case CS:
+                       retval = 0xffff;
+                       /* fall through */
+               default:
+                       if (regno > GS*4)
+                               regno -= 2*4;
+                       regno = regno - sizeof(struct pt_regs);
+                       retval &= get_stack_long(child, regno);
+       }
+       return retval;
+}
+
+/*
+ * Called by kernel/ptrace.c when detaching..
+ *
+ * Make sure the single step bit is not set.
+ */
+void ptrace_disable(struct task_struct *child)
+{ 
+       long tmp;
+
+       tmp = get_stack_long(child, EFL_OFFSET) & ~TRAP_FLAG;
+       put_stack_long(child, EFL_OFFSET, tmp);
+}
+
+asmlinkage int sys_ptrace(long request, long pid, long addr, long data)
+{
+       struct task_struct *child;
+       struct user * dummy = NULL;
+       int i, ret;
+
+       lock_kernel();
+       ret = -EPERM;
+       if (request == PTRACE_TRACEME) {
+               /* are we already being traced? */
+               if (current->ptrace & PT_PTRACED)
+                       goto out;
+               /* set the ptrace bit in the process flags. */
+               current->ptrace |= PT_PTRACED;
+               ret = 0;
+               goto out;
+       }
+       ret = -ESRCH;
+       read_lock(&tasklist_lock);
+       child = find_task_by_pid(pid);
+       if (child)
+               get_task_struct(child);
+       read_unlock(&tasklist_lock);
+       if (!child)
+               goto out;
+
+       ret = -EPERM;
+       if (pid == 1)           /* you may not mess with init */
+               goto out_tsk;
+
+       if (request == PTRACE_ATTACH) {
+               ret = ptrace_attach(child);
+               goto out_tsk;
+       }
+
+       ret = ptrace_check_attach(child, request == PTRACE_KILL);
+       if (ret < 0)
+               goto out_tsk;
+
+       switch (request) {
+       /* when I and D space are separate, these will need to be fixed. */
+       case PTRACE_PEEKTEXT: /* read word at location addr. */ 
+       case PTRACE_PEEKDATA: {
+               unsigned long tmp;
+               int copied;
+
+               copied = access_process_vm(child, addr, &tmp, sizeof(tmp), 0);
+               ret = -EIO;
+               if (copied != sizeof(tmp))
+                       break;
+               ret = put_user(tmp,(unsigned long *) data);
+               break;
+       }
+
+       /* read the word at location addr in the USER area. */
+       case PTRACE_PEEKUSR: {
+               unsigned long tmp;
+
+               ret = -EIO;
+               if ((addr & 3) || addr < 0 || 
+                   addr > sizeof(struct user) - 3)
+                       break;
+
+               tmp = 0;  /* Default return condition */
+               if(addr < FRAME_SIZE*sizeof(long))
+                       tmp = getreg(child, addr);
+               if(addr >= (long) &dummy->u_debugreg[0] &&
+                  addr <= (long) &dummy->u_debugreg[7]){
+                       addr -= (long) &dummy->u_debugreg[0];
+                       addr = addr >> 2;
+                       tmp = child->thread.debugreg[addr];
+               }
+               ret = put_user(tmp,(unsigned long *) data);
+               break;
+       }
+
+       /* when I and D space are separate, this will have to be fixed. */
+       case PTRACE_POKETEXT: /* write the word at location addr. */
+       case PTRACE_POKEDATA:
+               ret = 0;
+               if (access_process_vm(child, addr, &data, sizeof(data), 1) == sizeof(data))
+                       break;
+               ret = -EIO;
+               break;
+
+       case PTRACE_POKEUSR: /* write the word at location addr in the USER area */
+               ret = -EIO;
+               if ((addr & 3) || addr < 0 || 
+                   addr > sizeof(struct user) - 3)
+                       break;
+
+               if (addr < FRAME_SIZE*sizeof(long)) {
+                       ret = putreg(child, addr, data);
+                       break;
+               }
+               /* We need to be very careful here.  We implicitly
+                  want to modify a portion of the task_struct, and we
+                  have to be selective about what portions we allow someone
+                  to modify. */
+
+                 ret = -EIO;
+                 if(addr >= (long) &dummy->u_debugreg[0] &&
+                    addr <= (long) &dummy->u_debugreg[7]){
+
+                         if(addr == (long) &dummy->u_debugreg[4]) break;
+                         if(addr == (long) &dummy->u_debugreg[5]) break;
+                         if(addr < (long) &dummy->u_debugreg[4] &&
+                            ((unsigned long) data) >= TASK_SIZE-3) break;
+                         
+                         if(addr == (long) &dummy->u_debugreg[7]) {
+                                 data &= ~DR_CONTROL_RESERVED;
+                                 for(i=0; i<4; i++)
+                                         if ((0x5f54 >> ((data >> (16 + 4*i)) & 0xf)) & 1)
+                                                 goto out_tsk;
+                         }
+
+                         addr -= (long) &dummy->u_debugreg;
+                         addr = addr >> 2;
+                         child->thread.debugreg[addr] = data;
+                         ret = 0;
+                 }
+                 break;
+
+       case PTRACE_SYSCALL: /* continue and stop at next (return from) syscall */
+       case PTRACE_CONT: { /* restart after signal. */
+               long tmp;
+
+               ret = -EIO;
+               if ((unsigned long) data > _NSIG)
+                       break;
+               if (request == PTRACE_SYSCALL)
+                       child->ptrace |= PT_TRACESYS;
+               else
+                       child->ptrace &= ~PT_TRACESYS;
+               child->exit_code = data;
+       /* make sure the single step bit is not set. */
+               tmp = get_stack_long(child, EFL_OFFSET) & ~TRAP_FLAG;
+               put_stack_long(child, EFL_OFFSET,tmp);
+               wake_up_process(child);
+               ret = 0;
+               break;
+       }
+
+/*
+ * make the child exit.  Best I can do is send it a sigkill. 
+ * perhaps it should be put in the status that it wants to 
+ * exit.
+ */
+       case PTRACE_KILL: {
+               long tmp;
+
+               ret = 0;
+               if (child->state == TASK_ZOMBIE)        /* already dead */
+                       break;
+               child->exit_code = SIGKILL;
+               /* make sure the single step bit is not set. */
+               tmp = get_stack_long(child, EFL_OFFSET) & ~TRAP_FLAG;
+               put_stack_long(child, EFL_OFFSET, tmp);
+               wake_up_process(child);
+               break;
+       }
+
+       case PTRACE_SINGLESTEP: {  /* set the trap flag. */
+               long tmp;
+
+               ret = -EIO;
+               if ((unsigned long) data > _NSIG)
+                       break;
+               child->ptrace &= ~PT_TRACESYS;
+               if ((child->ptrace & PT_DTRACE) == 0) {
+                       /* Spurious delayed TF traps may occur */
+                       child->ptrace |= PT_DTRACE;
+               }
+               tmp = get_stack_long(child, EFL_OFFSET) | TRAP_FLAG;
+               put_stack_long(child, EFL_OFFSET, tmp);
+               child->exit_code = data;
+               /* give it a chance to run. */
+               wake_up_process(child);
+               ret = 0;
+               break;
+       }
+
+       case PTRACE_DETACH:
+               /* detach a process that was attached. */
+               ret = ptrace_detach(child, data);
+               break;
+
+       case PTRACE_GETREGS: { /* Get all gp regs from the child. */
+               if (!access_ok(VERIFY_WRITE, (unsigned *)data, FRAME_SIZE*sizeof(long))) {
+                       ret = -EIO;
+                       break;
+               }
+               for ( i = 0; i < FRAME_SIZE*sizeof(long); i += sizeof(long) ) {
+                       __put_user(getreg(child, i),(unsigned long *) data);
+                       data += sizeof(long);
+               }
+               ret = 0;
+               break;
+       }
+
+       case PTRACE_SETREGS: { /* Set all gp regs in the child. */
+               unsigned long tmp;
+               if (!access_ok(VERIFY_READ, (unsigned *)data, FRAME_SIZE*sizeof(long))) {
+                       ret = -EIO;
+                       break;
+               }
+               for ( i = 0; i < FRAME_SIZE*sizeof(long); i += sizeof(long) ) {
+                       __get_user(tmp, (unsigned long *) data);
+                       putreg(child, i, tmp);
+                       data += sizeof(long);
+               }
+               ret = 0;
+               break;
+       }
+
+       case PTRACE_GETFPREGS: { /* Get the child FPU state. */
+               if (!access_ok(VERIFY_WRITE, (unsigned *)data,
+                              sizeof(struct user_i387_struct))) {
+                       ret = -EIO;
+                       break;
+               }
+               ret = 0;
+               if ( !child->used_math ) {
+                       /* Simulate an empty FPU. */
+                       set_fpu_cwd(child, 0x037f);
+                       set_fpu_swd(child, 0x0000);
+                       set_fpu_twd(child, 0xffff);
+               }
+               get_fpregs((struct user_i387_struct *)data, child);
+               break;
+       }
+
+       case PTRACE_SETFPREGS: { /* Set the child FPU state. */
+               if (!access_ok(VERIFY_READ, (unsigned *)data,
+                              sizeof(struct user_i387_struct))) {
+                       ret = -EIO;
+                       break;
+               }
+               child->used_math = 1;
+               set_fpregs(child, (struct user_i387_struct *)data);
+               ret = 0;
+               break;
+       }
+
+       case PTRACE_GETFPXREGS: { /* Get the child extended FPU state. */
+               if (!access_ok(VERIFY_WRITE, (unsigned *)data,
+                              sizeof(struct user_fxsr_struct))) {
+                       ret = -EIO;
+                       break;
+               }
+               if ( !child->used_math ) {
+                       /* Simulate an empty FPU. */
+                       set_fpu_cwd(child, 0x037f);
+                       set_fpu_swd(child, 0x0000);
+                       set_fpu_twd(child, 0xffff);
+                       set_fpu_mxcsr(child, 0x1f80);
+               }
+               ret = get_fpxregs((struct user_fxsr_struct *)data, child);
+               break;
+       }
+
+       case PTRACE_SETFPXREGS: { /* Set the child extended FPU state. */
+               if (!access_ok(VERIFY_READ, (unsigned *)data,
+                              sizeof(struct user_fxsr_struct))) {
+                       ret = -EIO;
+                       break;
+               }
+               child->used_math = 1;
+               ret = set_fpxregs(child, (struct user_fxsr_struct *)data);
+               break;
+       }
+
+       case PTRACE_SETOPTIONS: {
+               if (data & PTRACE_O_TRACESYSGOOD)
+                       child->ptrace |= PT_TRACESYSGOOD;
+               else
+                       child->ptrace &= ~PT_TRACESYSGOOD;
+               ret = 0;
+               break;
+       }
+
+       default:
+               ret = -EIO;
+               break;
+       }
+out_tsk:
+       free_task_struct(child);
+out:
+       unlock_kernel();
+       return ret;
+}
+
+asmlinkage void syscall_trace(void)
+{
+       if ((current->ptrace & (PT_PTRACED|PT_TRACESYS)) !=
+                       (PT_PTRACED|PT_TRACESYS))
+               return;
+       /* the 0x80 provides a way for the tracing parent to distinguish
+          between a syscall stop and SIGTRAP delivery */
+       current->exit_code = SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD)
+                                       ? 0x80 : 0);
+       current->state = TASK_STOPPED;
+       notify_parent(current, SIGCHLD);
+       schedule();
+       /*
+        * this isn't the same as continuing with a signal, but it will do
+        * for normal use.  strace only continues with a signal if the
+        * stopping signal is not SIGTRAP.  -brl
+        */
+       if (current->exit_code) {
+               send_sig(current->exit_code, current, 1);
+               current->exit_code = 0;
+       }
+}
diff --git a/xenolinux-2.4.16-sparse/arch/xeno/kernel/semaphore.c b/xenolinux-2.4.16-sparse/arch/xeno/kernel/semaphore.c
new file mode 100644 (file)
index 0000000..08ff686
--- /dev/null
@@ -0,0 +1,292 @@
+/*
+ * i386 semaphore implementation.
+ *
+ * (C) Copyright 1999 Linus Torvalds
+ *
+ * Portions Copyright 1999 Red Hat, Inc.
+ *
+ *     This program is free software; you can redistribute it and/or
+ *     modify it under the terms of the GNU General Public License
+ *     as published by the Free Software Foundation; either version
+ *     2 of the License, or (at your option) any later version.
+ *
+ * rw semaphores implemented November 1999 by Benjamin LaHaise <bcrl@redhat.com>
+ */
+#include <linux/config.h>
+#include <linux/sched.h>
+#include <asm/semaphore.h>
+
+/*
+ * Semaphores are implemented using a two-way counter:
+ * The "count" variable is decremented for each process
+ * that tries to acquire the semaphore, while the "sleeping"
+ * variable is a count of such acquires.
+ *
+ * Notably, the inline "up()" and "down()" functions can
+ * efficiently test if they need to do any extra work (up
+ * needs to do something only if count was negative before
+ * the increment operation.
+ *
+ * "sleeping" and the contention routine ordering is
+ * protected by the semaphore spinlock.
+ *
+ * Note that these functions are only called when there is
+ * contention on the lock, and as such all this is the
+ * "non-critical" part of the whole semaphore business. The
+ * critical part is the inline stuff in <asm/semaphore.h>
+ * where we want to avoid any extra jumps and calls.
+ */
+
+/*
+ * Logic:
+ *  - only on a boundary condition do we need to care. When we go
+ *    from a negative count to a non-negative, we wake people up.
+ *  - when we go from a non-negative count to a negative do we
+ *    (a) synchronize with the "sleeper" count and (b) make sure
+ *    that we're on the wakeup list before we synchronize so that
+ *    we cannot lose wakeup events.
+ */
+
+void __up(struct semaphore *sem)
+{
+       wake_up(&sem->wait);
+}
+
+static spinlock_t semaphore_lock = SPIN_LOCK_UNLOCKED;
+
+void __down(struct semaphore * sem)
+{
+       struct task_struct *tsk = current;
+       DECLARE_WAITQUEUE(wait, tsk);
+       tsk->state = TASK_UNINTERRUPTIBLE;
+       add_wait_queue_exclusive(&sem->wait, &wait);
+
+       spin_lock_irq(&semaphore_lock);
+       sem->sleepers++;
+       for (;;) {
+               int sleepers = sem->sleepers;
+
+               /*
+                * Add "everybody else" into it. They aren't
+                * playing, because we own the spinlock.
+                */
+               if (!atomic_add_negative(sleepers - 1, &sem->count)) {
+                       sem->sleepers = 0;
+                       break;
+               }
+               sem->sleepers = 1;      /* us - see -1 above */
+               spin_unlock_irq(&semaphore_lock);
+
+               schedule();
+               tsk->state = TASK_UNINTERRUPTIBLE;
+               spin_lock_irq(&semaphore_lock);
+       }
+       spin_unlock_irq(&semaphore_lock);
+       remove_wait_queue(&sem->wait, &wait);
+       tsk->state = TASK_RUNNING;
+       wake_up(&sem->wait);
+}
+
+int __down_interruptible(struct semaphore * sem)
+{
+       int retval = 0;
+       struct task_struct *tsk = current;
+       DECLARE_WAITQUEUE(wait, tsk);
+       tsk->state = TASK_INTERRUPTIBLE;
+       add_wait_queue_exclusive(&sem->wait, &wait);
+
+       spin_lock_irq(&semaphore_lock);
+       sem->sleepers ++;
+       for (;;) {
+               int sleepers = sem->sleepers;
+
+               /*
+                * With signals pending, this turns into
+                * the trylock failure case - we won't be
+                * sleeping, and we* can't get the lock as
+                * it has contention. Just correct the count
+                * and exit.
+                */
+               if (signal_pending(current)) {
+                       retval = -EINTR;
+                       sem->sleepers = 0;
+                       atomic_add(sleepers, &sem->count);
+                       break;
+               }
+
+               /*
+                * Add "everybody else" into it. They aren't
+                * playing, because we own the spinlock. The
+                * "-1" is because we're still hoping to get
+                * the lock.
+                */
+               if (!atomic_add_negative(sleepers - 1, &sem->count)) {
+                       sem->sleepers = 0;
+                       break;
+               }
+               sem->sleepers = 1;      /* us - see -1 above */
+               spin_unlock_irq(&semaphore_lock);
+
+               schedule();
+               tsk->state = TASK_INTERRUPTIBLE;
+               spin_lock_irq(&semaphore_lock);
+       }
+       spin_unlock_irq(&semaphore_lock);
+       tsk->state = TASK_RUNNING;
+       remove_wait_queue(&sem->wait, &wait);
+       wake_up(&sem->wait);
+       return retval;
+}
+
+/*
+ * Trylock failed - make sure we correct for
+ * having decremented the count.
+ *
+ * We could have done the trylock with a
+ * single "cmpxchg" without failure cases,
+ * but then it wouldn't work on a 386.
+ */
+int __down_trylock(struct semaphore * sem)
+{
+       int sleepers;
+       unsigned long flags;
+
+       spin_lock_irqsave(&semaphore_lock, flags);
+       sleepers = sem->sleepers + 1;
+       sem->sleepers = 0;
+
+       /*
+        * Add "everybody else" and us into it. They aren't
+        * playing, because we own the spinlock.
+        */
+       if (!atomic_add_negative(sleepers, &sem->count))
+               wake_up(&sem->wait);
+
+       spin_unlock_irqrestore(&semaphore_lock, flags);
+       return 1;
+}
+
+
+/*
+ * The semaphore operations have a special calling sequence that
+ * allow us to do a simpler in-line version of them. These routines
+ * need to convert that sequence back into the C sequence when
+ * there is contention on the semaphore.
+ *
+ * %ecx contains the semaphore pointer on entry. Save the C-clobbered
+ * registers (%eax, %edx and %ecx) except %eax when used as a return
+ * value..
+ */
+asm(
+".text\n"
+".align 4\n"
+".globl __down_failed\n"
+"__down_failed:\n\t"
+#if defined(CONFIG_FRAME_POINTER)
+       "pushl %ebp\n\t"
+       "movl  %esp,%ebp\n\t"
+#endif
+       "pushl %eax\n\t"
+       "pushl %edx\n\t"
+       "pushl %ecx\n\t"
+       "call __down\n\t"
+       "popl %ecx\n\t"
+       "popl %edx\n\t"
+       "popl %eax\n\t"
+#if defined(CONFIG_FRAME_POINTER)
+       "movl %ebp,%esp\n\t"
+       "popl %ebp\n\t"
+#endif
+       "ret"
+);
+
+asm(
+".text\n"
+".align 4\n"
+".globl __down_failed_interruptible\n"
+"__down_failed_interruptible:\n\t"
+#if defined(CONFIG_FRAME_POINTER)
+       "pushl %ebp\n\t"
+       "movl  %esp,%ebp\n\t"
+#endif
+       "pushl %edx\n\t"
+       "pushl %ecx\n\t"
+       "call __down_interruptible\n\t"
+       "popl %ecx\n\t"
+       "popl %edx\n\t"
+#if defined(CONFIG_FRAME_POINTER)
+       "movl %ebp,%esp\n\t"
+       "popl %ebp\n\t"
+#endif
+       "ret"
+);
+
+asm(
+".text\n"
+".align 4\n"
+".globl __down_failed_trylock\n"
+"__down_failed_trylock:\n\t"
+#if defined(CONFIG_FRAME_POINTER)
+       "pushl %ebp\n\t"
+       "movl  %esp,%ebp\n\t"
+#endif
+       "pushl %edx\n\t"
+       "pushl %ecx\n\t"
+       "call __down_trylock\n\t"
+       "popl %ecx\n\t"
+       "popl %edx\n\t"
+#if defined(CONFIG_FRAME_POINTER)
+       "movl %ebp,%esp\n\t"
+       "popl %ebp\n\t"
+#endif
+       "ret"
+);
+
+asm(
+".text\n"
+".align 4\n"
+".globl __up_wakeup\n"
+"__up_wakeup:\n\t"
+       "pushl %eax\n\t"
+       "pushl %edx\n\t"
+       "pushl %ecx\n\t"
+       "call __up\n\t"
+       "popl %ecx\n\t"
+       "popl %edx\n\t"
+       "popl %eax\n\t"
+       "ret"
+);
+
+/*
+ * rw spinlock fallbacks
+ */
+#if defined(CONFIG_SMP)
+asm(
+"
+.align 4
+.globl __write_lock_failed
+__write_lock_failed:
+       " LOCK "addl    $" RW_LOCK_BIAS_STR ",(%eax)
+1:     rep; nop
+       cmpl    $" RW_LOCK_BIAS_STR ",(%eax)
+       jne     1b
+
+       " LOCK "subl    $" RW_LOCK_BIAS_STR ",(%eax)
+       jnz     __write_lock_failed
+       ret
+
+
+.align 4
+.globl __read_lock_failed
+__read_lock_failed:
+       lock ; incl     (%eax)
+1:     rep; nop
+       cmpl    $1,(%eax)
+       js      1b
+
+       lock ; decl     (%eax)
+       js      __read_lock_failed
+       ret
+"
+);
+#endif
diff --git a/xenolinux-2.4.16-sparse/arch/xeno/kernel/setup.c b/xenolinux-2.4.16-sparse/arch/xeno/kernel/setup.c
new file mode 100644 (file)
index 0000000..3d4325f
--- /dev/null
@@ -0,0 +1,975 @@
+/*
+ *  linux/arch/i386/kernel/setup.c
+ *
+ *  Copyright (C) 1995  Linus Torvalds
+ */
+
+/*
+ * This file handles the architecture-dependent parts of initialization
+ */
+
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/stddef.h>
+#include <linux/unistd.h>
+#include <linux/ptrace.h>
+#include <linux/slab.h>
+#include <linux/user.h>
+#include <linux/a.out.h>
+#include <linux/tty.h>
+#include <linux/ioport.h>
+#include <linux/delay.h>
+#include <linux/config.h>
+#include <linux/init.h>
+#include <linux/apm_bios.h>
+#ifdef CONFIG_BLK_DEV_RAM
+#include <linux/blk.h>
+#endif
+#include <linux/highmem.h>
+#include <linux/bootmem.h>
+#include <linux/seq_file.h>
+#include <asm/processor.h>
+#include <linux/console.h>
+#include <asm/mtrr.h>
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <asm/io.h>
+#include <asm/smp.h>
+#include <asm/msr.h>
+#include <asm/desc.h>
+#include <asm/dma.h>
+#include <asm/mpspec.h>
+#include <asm/mmu_context.h>
+#include <asm/hypervisor.h>
+
+shared_info_t *HYPERVISOR_shared_info;
+
+/*
+ * Machine setup..
+ */
+
+char ignore_irq13;             /* set if exception 16 works */
+struct cpuinfo_x86 boot_cpu_data = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
+
+unsigned long mmu_cr4_features;
+
+/*
+ * Bus types ..
+ */
+#ifdef CONFIG_EISA
+int EISA_bus;
+#endif
+int MCA_bus;
+
+/* for MCA, but anyone else can use it if they want */
+unsigned int machine_id;
+unsigned int machine_submodel_id;
+unsigned int BIOS_revision;
+unsigned int mca_pentium_flag;
+
+/* For PCI or other memory-mapped resources */
+unsigned long pci_mem_start = 0x10000000;
+
+/*
+ * Setup options
+ */
+struct drive_info_struct { char dummy[32]; } drive_info;
+struct screen_info screen_info;
+struct apm_info apm_info;
+struct sys_desc_table_struct {
+    unsigned short length;
+    unsigned char table[0];
+};
+
+unsigned char aux_device_present;
+
+extern int root_mountflags;
+extern char _text, _etext, _edata, _end;
+
+int enable_acpi_smp_table;
+
+/* Raw start-of-day parameters from the hypervisor. */
+union start_info_union start_info_union;
+
+#define COMMAND_LINE_SIZE 256
+static char command_line[COMMAND_LINE_SIZE];
+char saved_command_line[COMMAND_LINE_SIZE];
+
+static void __init parse_mem_cmdline (char ** cmdline_p)
+{
+    char c = ' ', *to = command_line, *from = saved_command_line;
+    int len = 0;
+
+    /* Save unparsed command line copy for /proc/cmdline */
+    memcpy(saved_command_line, start_info.cmd_line, COMMAND_LINE_SIZE);
+    saved_command_line[COMMAND_LINE_SIZE-1] = '\0';
+
+    for (;;) {
+        /*
+         * "mem=nopentium" disables the 4MB page tables.
+         * "mem=XXX[kKmM]" defines a memory region from HIGH_MEM
+         * to <mem>, overriding the bios size.
+         * "mem=XXX[KkmM]@XXX[KkmM]" defines a memory region from
+         * <start> to <start>+<mem>, overriding the bios size.
+         */
+        if (c == ' ' && !memcmp(from, "mem=", 4)) {
+            if (to != command_line)
+                to--;
+            if (!memcmp(from+4, "nopentium", 9)) {
+                from += 9+4;
+            } else if (!memcmp(from+4, "exactmap", 8)) {
+                from += 8+4;
+            } else {
+                (void)memparse(from+4, &from);
+                if (*from == '@')
+                    (void)memparse(from+1, &from);
+            }
+        }
+
+        c = *(from++);
+        if (!c)
+            break;
+        if (COMMAND_LINE_SIZE <= ++len)
+            break;
+        *(to++) = c;
+    }
+    *to = '\0';
+    *cmdline_p = command_line;
+}
+
+void __init setup_arch(char **cmdline_p)
+{
+    unsigned long start_pfn, max_pfn, max_low_pfn;
+    unsigned long bootmap_size;
+    char str[256]; int strcnt;
+
+    void hypervisor_callback(void);
+    void failsafe_callback(void);
+
+    HYPERVISOR_shared_info->event_address    = 
+        (unsigned long)hypervisor_callback;
+    HYPERVISOR_shared_info->failsafe_address =
+        (unsigned long)failsafe_callback;
+
+    ROOT_DEV = MKDEV(RAMDISK_MAJOR,0);
+    memset(&drive_info, 0, sizeof(drive_info));
+    memset(&screen_info, 0, sizeof(screen_info));
+    memset(&apm_info.bios, 0, sizeof(apm_info.bios));
+    aux_device_present = 0;
+
+#ifdef CONFIG_BLK_DEV_RAM
+    rd_image_start = 0;
+    rd_prompt = 0;
+    rd_doload = 0;
+#endif
+
+    root_mountflags &= ~MS_RDONLY;
+    init_mm.start_code = (unsigned long) &_text;
+    init_mm.end_code = (unsigned long) &_etext;
+    init_mm.end_data = (unsigned long) &_edata;
+    init_mm.brk = (unsigned long) &_end;
+
+    parse_mem_cmdline(cmdline_p);
+
+#define PFN_UP(x)      (((x) + PAGE_SIZE-1) >> PAGE_SHIFT)
+#define PFN_DOWN(x)    ((x) >> PAGE_SHIFT)
+#define PFN_PHYS(x)    ((x) << PAGE_SHIFT)
+
+/*
+ * 128MB for vmalloc and initrd
+ */
+#define VMALLOC_RESERVE        (unsigned long)(128 << 20)
+#define MAXMEM         (unsigned long)(-PAGE_OFFSET-VMALLOC_RESERVE)
+#define MAXMEM_PFN     PFN_DOWN(MAXMEM)
+#define MAX_NONPAE_PFN (1 << 20)
+
+    /*
+     * partially used pages are not usable - thus
+     * we are rounding upwards:
+     */
+#ifdef CONFIG_BLK_DEV_INITRD
+    if ( start_info.mod_start )
+        start_pfn = PFN_UP(__pa(start_info.mod_start + start_info.mod_len));
+    else
+#endif
+    start_pfn = PFN_UP(__pa(&_end));
+    max_pfn = start_info.nr_pages;
+
+    /*
+     * Determine low and high memory ranges:
+     */
+    max_low_pfn = max_pfn;
+    if (max_low_pfn > MAXMEM_PFN) {
+        max_low_pfn = MAXMEM_PFN;
+#ifndef CONFIG_HIGHMEM
+        /* Maximum memory usable is what is directly addressable */
+        printk(KERN_WARNING "Warning only %ldMB will be used.\n",
+               MAXMEM>>20);
+        if (max_pfn > MAX_NONPAE_PFN)
+            printk(KERN_WARNING "Use a PAE enabled kernel.\n");
+        else
+            printk(KERN_WARNING "Use a HIGHMEM enabled kernel.\n");
+#else /* !CONFIG_HIGHMEM */
+#ifndef CONFIG_X86_PAE
+        if (max_pfn > MAX_NONPAE_PFN) {
+            max_pfn = MAX_NONPAE_PFN;
+            printk(KERN_WARNING "Warning only 4GB will be used.\n");
+            printk(KERN_WARNING "Use a PAE enabled kernel.\n");
+        }
+#endif /* !CONFIG_X86_PAE */
+#endif /* !CONFIG_HIGHMEM */
+    }
+
+#ifdef CONFIG_HIGHMEM
+    highstart_pfn = highend_pfn = max_pfn;
+    if (max_pfn > MAXMEM_PFN) {
+        highstart_pfn = MAXMEM_PFN;
+        printk(KERN_NOTICE "%ldMB HIGHMEM available.\n",
+               pages_to_mb(highend_pfn - highstart_pfn));
+    }
+#endif
+
+    /*
+     * Initialize the boot-time allocator, and free up all RAM.
+     * Then reserve space for OS image, and the bootmem bitmap.
+     */
+    bootmap_size = init_bootmem(start_pfn, max_low_pfn);
+    free_bootmem(0, PFN_PHYS(max_pfn));
+    reserve_bootmem(0, PFN_PHYS(start_pfn) + bootmap_size + PAGE_SIZE-1);
+
+    /* Now reserve space for the hypervisor-provided page tables. */
+    {
+        unsigned long *pgd = (unsigned long *)start_info.pt_base;
+        unsigned long  pte;
+        int i;
+        reserve_bootmem(__pa(pgd), PAGE_SIZE);
+        for ( i = 0; i < (0xE0000000UL>>22); i++ )
+        {
+            unsigned long pgde = *pgd++;
+            if ( !(pgde & 1) ) continue;
+            pte = (pgde & PAGE_MASK) - start_info.phys_base;
+            reserve_bootmem(pte, PAGE_SIZE);
+        }
+    }
+    cur_pgd = init_mm.pgd = (pgd_t *)start_info.pt_base;
+
+#ifdef CONFIG_BLK_DEV_INITRD
+    if (start_info.mod_start) {
+        if ((__pa(start_info.mod_start) + start_info.mod_len) <= 
+            (max_low_pfn << PAGE_SHIFT)) {
+            initrd_start = start_info.mod_start;
+            initrd_end   = initrd_start + start_info.mod_len;
+            initrd_below_start_ok = 1;
+        }
+        else {
+            printk(KERN_ERR "initrd extends beyond end of memory "
+                   "(0x%08lx > 0x%08lx)\ndisabling initrd\n",
+                   __pa(start_info.mod_start) + start_info.mod_len,
+                   max_low_pfn << PAGE_SHIFT);
+            initrd_start = 0;
+        }
+    }
+#endif
+
+    paging_init();
+}
+
+static int cachesize_override __initdata = -1;
+static int __init cachesize_setup(char *str)
+{
+    get_option (&str, &cachesize_override);
+    return 1;
+}
+__setup("cachesize=", cachesize_setup);
+
+
+static int __init get_model_name(struct cpuinfo_x86 *c)
+{
+    unsigned int *v;
+    char *p, *q;
+
+    if (cpuid_eax(0x80000000) < 0x80000004)
+        return 0;
+
+    v = (unsigned int *) c->x86_model_id;
+    cpuid(0x80000002, &v[0], &v[1], &v[2], &v[3]);
+    cpuid(0x80000003, &v[4], &v[5], &v[6], &v[7]);
+    cpuid(0x80000004, &v[8], &v[9], &v[10], &v[11]);
+    c->x86_model_id[48] = 0;
+
+    /* Intel chips right-justify this string for some dumb reason;
+       undo that brain damage */
+    p = q = &c->x86_model_id[0];
+    while ( *p == ' ' )
+        p++;
+    if ( p != q ) {
+        while ( *p )
+            *q++ = *p++;
+        while ( q <= &c->x86_model_id[48] )
+            *q++ = '\0';       /* Zero-pad the rest */
+    }
+
+    return 1;
+}
+
+
+static void __init display_cacheinfo(struct cpuinfo_x86 *c)
+{
+    unsigned int n, dummy, ecx, edx, l2size;
+
+    n = cpuid_eax(0x80000000);
+
+    if (n >= 0x80000005) {
+        cpuid(0x80000005, &dummy, &dummy, &ecx, &edx);
+        printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), D cache %dK (%d bytes/line)\n",
+               edx>>24, edx&0xFF, ecx>>24, ecx&0xFF);
+        c->x86_cache_size=(ecx>>24)+(edx>>24); 
+    }
+
+    if (n < 0x80000006)        /* Some chips just has a large L1. */
+        return;
+
+    ecx = cpuid_ecx(0x80000006);
+    l2size = ecx >> 16;
+
+    /* AMD errata T13 (order #21922) */
+    if ((c->x86_vendor == X86_VENDOR_AMD) && (c->x86 == 6)) {
+        if (c->x86_model == 3 && c->x86_mask == 0)     /* Duron Rev A0 */
+            l2size = 64;
+        if (c->x86_model == 4 &&
+            (c->x86_mask==0 || c->x86_mask==1))        /* Tbird rev A1/A2 */
+            l2size = 256;
+    }
+
+    /* Intel PIII Tualatin. This comes in two flavours.
+     * One has 256kb of cache, the other 512. We have no way
+     * to determine which, so we use a boottime override
+     * for the 512kb model, and assume 256 otherwise.
+     */
+    if ((c->x86_vendor == X86_VENDOR_INTEL) && (c->x86 == 6) &&
+        (c->x86_model == 11) && (l2size == 0))
+        l2size = 256;
+
+    /* VIA C3 CPUs (670-68F) need further shifting. */
+    if (c->x86_vendor == X86_VENDOR_CENTAUR && (c->x86 == 6) &&
+        ((c->x86_model == 7) || (c->x86_model == 8))) {
+        l2size = l2size >> 8;
+    }
+
+    /* Allow user to override all this if necessary. */
+    if (cachesize_override != -1)
+        l2size = cachesize_override;
+
+    if ( l2size == 0 )
+        return;                /* Again, no L2 cache is possible */
+
+    c->x86_cache_size = l2size;
+
+    printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n",
+           l2size, ecx & 0xFF);
+}
+
+
+static int __init init_amd(struct cpuinfo_x86 *c)
+{
+    int r;
+
+    /* Bit 31 in normal CPUID used for nonstandard 3DNow ID;
+       3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */
+    clear_bit(0*32+31, &c->x86_capability);
+       
+    r = get_model_name(c);
+
+    switch(c->x86)
+    {
+    case 6:    /* An Athlon/Duron. We can trust the BIOS probably */
+        break;         
+    default:
+        panic("Unsupported AMD processor\n");
+    }
+
+    display_cacheinfo(c);
+    return r;
+}
+
+
+static void __init init_intel(struct cpuinfo_x86 *c)
+{
+    char *p = NULL;
+    unsigned int l1i = 0, l1d = 0, l2 = 0, l3 = 0; /* Cache sizes */
+
+    if (c->cpuid_level > 1) {
+        /* supports eax=2  call */
+        int i, j, n;
+        int regs[4];
+        unsigned char *dp = (unsigned char *)regs;
+
+        /* Number of times to iterate */
+        n = cpuid_eax(2) & 0xFF;
+
+        for ( i = 0 ; i < n ; i++ ) {
+            cpuid(2, &regs[0], &regs[1], &regs[2], &regs[3]);
+                       
+            /* If bit 31 is set, this is an unknown format */
+            for ( j = 0 ; j < 3 ; j++ ) {
+                if ( regs[j] < 0 ) regs[j] = 0;
+            }
+
+            /* Byte 0 is level count, not a descriptor */
+            for ( j = 1 ; j < 16 ; j++ ) {
+                unsigned char des = dp[j];
+                unsigned char dl, dh;
+                unsigned int cs;
+
+                dh = des >> 4;
+                dl = des & 0x0F;
+
+                               /* Black magic... */
+
+                switch ( dh )
+                {
+                case 0:
+                    switch ( dl ) {
+                    case 6:
+                        /* L1 I cache */
+                        l1i += 8;
+                        break;
+                    case 8:
+                        /* L1 I cache */
+                        l1i += 16;
+                        break;
+                    case 10:
+                        /* L1 D cache */
+                        l1d += 8;
+                        break;
+                    case 12:
+                        /* L1 D cache */
+                        l1d += 16;
+                        break;
+                    default:;
+                        /* TLB, or unknown */
+                    }
+                    break;
+                case 2:
+                    if ( dl ) {
+                        /* L3 cache */
+                        cs = (dl-1) << 9;
+                        l3 += cs;
+                    }
+                    break;
+                case 4:
+                    if ( c->x86 > 6 && dl ) {
+                        /* P4 family */
+                        /* L3 cache */
+                        cs = 128 << (dl-1);
+                        l3 += cs;
+                        break;
+                    }
+                    /* else same as 8 - fall through */
+                case 8:
+                    if ( dl ) {
+                        /* L2 cache */
+                        cs = 128 << (dl-1);
+                        l2 += cs;
+                    }
+                    break;
+                case 6:
+                    if (dl > 5) {
+                        /* L1 D cache */
+                        cs = 8<<(dl-6);
+                        l1d += cs;
+                    }
+                    break;
+                case 7:
+                    if ( dl >= 8 ) 
+                    {
+                        /* L2 cache */
+                        cs = 64<<(dl-8);
+                        l2 += cs;
+                    } else {
+                        /* L0 I cache, count as L1 */
+                        cs = dl ? (16 << (dl-1)) : 12;
+                        l1i += cs;
+                    }
+                    break;
+                default:
+                    /* TLB, or something else we don't know about */
+                    break;
+                }
+            }
+        }
+        if ( l1i || l1d )
+            printk(KERN_INFO "CPU: L1 I cache: %dK, L1 D cache: %dK\n",
+                   l1i, l1d);
+        if ( l2 )
+            printk(KERN_INFO "CPU: L2 cache: %dK\n", l2);
+        if ( l3 )
+            printk(KERN_INFO "CPU: L3 cache: %dK\n", l3);
+
+        /*
+         * This assumes the L3 cache is shared; it typically lives in
+         * the northbridge.  The L1 caches are included by the L2
+         * cache, and so should not be included for the purpose of
+         * SMP switching weights.
+         */
+        c->x86_cache_size = l2 ? l2 : (l1i+l1d);
+    }
+
+    /* SEP CPUID bug: Pentium Pro reports SEP but doesn't have it */
+    if ( c->x86 == 6 && c->x86_model < 3 && c->x86_mask < 3 )
+        clear_bit(X86_FEATURE_SEP, &c->x86_capability);
+       
+    /* Names for the Pentium II/Celeron processors 
+       detectable only by also checking the cache size.
+       Dixon is NOT a Celeron. */
+    if (c->x86 == 6) {
+        switch (c->x86_model) {
+        case 5:
+            if (l2 == 0)
+                p = "Celeron (Covington)";
+            if (l2 == 256)
+                p = "Mobile Pentium II (Dixon)";
+            break;
+                       
+        case 6:
+            if (l2 == 128)
+                p = "Celeron (Mendocino)";
+            break;
+                       
+        case 8:
+            if (l2 == 128)
+                p = "Celeron (Coppermine)";
+            break;
+        }
+    }
+
+    if ( p )
+        strcpy(c->x86_model_id, p);
+}
+
+void __init get_cpu_vendor(struct cpuinfo_x86 *c)
+{
+    char *v = c->x86_vendor_id;
+
+    if (!strcmp(v, "GenuineIntel"))
+        c->x86_vendor = X86_VENDOR_INTEL;
+    else if (!strcmp(v, "AuthenticAMD"))
+        c->x86_vendor = X86_VENDOR_AMD;
+    else
+        c->x86_vendor = X86_VENDOR_UNKNOWN;
+}
+
+struct cpu_model_info {
+    int vendor;
+    int family;
+    char *model_names[16];
+};
+
+/* Naming convention should be: <Name> [(<Codename>)] */
+/* This table only is used unless init_<vendor>() below doesn't set it; */
+/* in particular, if CPUID levels 0x80000002..4 are supported, this isn't used */
+static struct cpu_model_info cpu_models[] __initdata = {
+    { X86_VENDOR_INTEL,        6,
+      { "Pentium Pro A-step", "Pentium Pro", NULL, "Pentium II (Klamath)", 
+        NULL, "Pentium II (Deschutes)", "Mobile Pentium II",
+        "Pentium III (Katmai)", "Pentium III (Coppermine)", NULL,
+        "Pentium III (Cascades)", NULL, NULL, NULL, NULL }},
+    { X86_VENDOR_AMD,  6, /* Is this this really necessary?? */
+      { "Athlon", "Athlon",
+        "Athlon", NULL, "Athlon", NULL,
+        NULL, NULL, NULL,
+        NULL, NULL, NULL, NULL, NULL, NULL, NULL }}
+};
+
+/* Look up CPU names by table lookup. */
+static char __init *table_lookup_model(struct cpuinfo_x86 *c)
+{
+    struct cpu_model_info *info = cpu_models;
+    int i;
+
+    if ( c->x86_model >= 16 )
+        return NULL;   /* Range check */
+
+    for ( i = 0 ; i < sizeof(cpu_models)/sizeof(struct cpu_model_info) ; i++ ) {
+        if ( info->vendor == c->x86_vendor &&
+             info->family == c->x86 ) {
+            return info->model_names[c->x86_model];
+        }
+        info++;
+    }
+    return NULL;               /* Not found */
+}
+
+
+
+/* Standard macro to see if a specific flag is changeable */
+static inline int flag_is_changeable_p(u32 flag)
+{
+    u32 f1, f2;
+
+    asm("pushfl\n\t"
+        "pushfl\n\t"
+        "popl %0\n\t"
+        "movl %0,%1\n\t"
+        "xorl %2,%0\n\t"
+        "pushl %0\n\t"
+        "popfl\n\t"
+        "pushfl\n\t"
+        "popl %0\n\t"
+        "popfl\n\t"
+        : "=&r" (f1), "=&r" (f2)
+        : "ir" (flag));
+
+    return ((f1^f2) & flag) != 0;
+}
+
+
+/* Probe for the CPUID instruction */
+static int __init have_cpuid_p(void)
+{
+    return flag_is_changeable_p(X86_EFLAGS_ID);
+}
+
+
+
+/*
+ * This does the hard work of actually picking apart the CPU stuff...
+ */
+void __init identify_cpu(struct cpuinfo_x86 *c)
+{
+    int junk, i;
+    u32 xlvl, tfms;
+
+    c->loops_per_jiffy = loops_per_jiffy;
+    c->x86_cache_size = -1;
+    c->x86_vendor = X86_VENDOR_UNKNOWN;
+    c->cpuid_level = -1;       /* CPUID not detected */
+    c->x86_model = c->x86_mask = 0;    /* So far unknown... */
+    c->x86_vendor_id[0] = '\0'; /* Unset */
+    c->x86_model_id[0] = '\0';  /* Unset */
+    memset(&c->x86_capability, 0, sizeof c->x86_capability);
+    c->hard_math = 1;
+
+    if ( !have_cpuid_p() ) {
+        panic("Processor must support CPUID\n");
+    } else {
+        /* CPU does have CPUID */
+
+        /* Get vendor name */
+        cpuid(0x00000000, &c->cpuid_level,
+              (int *)&c->x86_vendor_id[0],
+              (int *)&c->x86_vendor_id[8],
+              (int *)&c->x86_vendor_id[4]);
+               
+        get_cpu_vendor(c);
+        /* Initialize the standard set of capabilities */
+        /* Note that the vendor-specific code below might override */
+
+        /* Intel-defined flags: level 0x00000001 */
+        if ( c->cpuid_level >= 0x00000001 ) {
+            cpuid(0x00000001, &tfms, &junk, &junk,
+                  &c->x86_capability[0]);
+            c->x86 = (tfms >> 8) & 15;
+            c->x86_model = (tfms >> 4) & 15;
+            c->x86_mask = tfms & 15;
+        } else {
+            /* Have CPUID level 0 only - unheard of */
+            c->x86 = 4;
+        }
+
+        /* AMD-defined flags: level 0x80000001 */
+        xlvl = cpuid_eax(0x80000000);
+        if ( (xlvl & 0xffff0000) == 0x80000000 ) {
+            if ( xlvl >= 0x80000001 )
+                c->x86_capability[1] = cpuid_edx(0x80000001);
+            if ( xlvl >= 0x80000004 )
+                get_model_name(c); /* Default name */
+        }
+
+        /* Transmeta-defined flags: level 0x80860001 */
+        xlvl = cpuid_eax(0x80860000);
+        if ( (xlvl & 0xffff0000) == 0x80860000 ) {
+            if (  xlvl >= 0x80860001 )
+                c->x86_capability[2] = cpuid_edx(0x80860001);
+        }
+    }
+
+    printk(KERN_DEBUG "CPU: Before vendor init, caps: %08x %08x %08x, vendor = %d\n",
+           c->x86_capability[0],
+           c->x86_capability[1],
+           c->x86_capability[2],
+           c->x86_vendor);
+
+    /*
+     * Vendor-specific initialization.  In this section we
+     * canonicalize the feature flags, meaning if there are
+     * features a certain CPU supports which CPUID doesn't
+     * tell us, CPUID claiming incorrect flags, or other bugs,
+     * we handle them here.
+     *
+     * At the end of this section, c->x86_capability better
+     * indicate the features this CPU genuinely supports!
+     */
+    switch ( c->x86_vendor ) {
+    case X86_VENDOR_AMD:
+        init_amd(c);
+        break;
+
+    case X86_VENDOR_INTEL:
+        init_intel(c);
+        break;
+
+    default:
+        panic("Unsupported CPU vendor\n");
+    }
+       
+    printk(KERN_DEBUG "CPU: After vendor init, caps: %08x %08x %08x %08x\n",
+           c->x86_capability[0],
+           c->x86_capability[1],
+           c->x86_capability[2],
+           c->x86_capability[3]);
+
+
+    /* If the model name is still unset, do table lookup. */
+    if ( !c->x86_model_id[0] ) {
+        char *p;
+        p = table_lookup_model(c);
+        if ( p )
+            strcpy(c->x86_model_id, p);
+        else
+            /* Last resort... */
+            sprintf(c->x86_model_id, "%02x/%02x",
+                    c->x86_vendor, c->x86_model);
+    }
+
+    /* Now the feature flags better reflect actual CPU features! */
+
+    printk(KERN_DEBUG "CPU:     After generic, caps: %08x %08x %08x %08x\n",
+           c->x86_capability[0],
+           c->x86_capability[1],
+           c->x86_capability[2],
+           c->x86_capability[3]);
+
+    /*
+     * On SMP, boot_cpu_data holds the common feature set between
+     * all CPUs; so make sure that we indicate which features are
+     * common between the CPUs.  The first time this routine gets
+     * executed, c == &boot_cpu_data.
+     */
+    if ( c != &boot_cpu_data ) {
+        /* AND the already accumulated flags with these */
+        for ( i = 0 ; i < NCAPINTS ; i++ )
+            boot_cpu_data.x86_capability[i] &= c->x86_capability[i];
+    }
+
+    printk(KERN_DEBUG "CPU:             Common caps: %08x %08x %08x %08x\n",
+           boot_cpu_data.x86_capability[0],
+           boot_cpu_data.x86_capability[1],
+           boot_cpu_data.x86_capability[2],
+           boot_cpu_data.x86_capability[3]);
+}
+
+
+/* These need to match <asm/processor.h> */
+static char *cpu_vendor_names[] __initdata = {
+    "Intel", "Cyrix", "AMD", "UMC", "NexGen", "Centaur", "Rise", "Transmeta" };
+
+
+void __init print_cpu_info(struct cpuinfo_x86 *c)
+{
+    char *vendor = NULL;
+
+    if (c->x86_vendor < sizeof(cpu_vendor_names)/sizeof(char *))
+        vendor = cpu_vendor_names[c->x86_vendor];
+    else if (c->cpuid_level >= 0)
+        vendor = c->x86_vendor_id;
+
+    if (vendor && strncmp(c->x86_model_id, vendor, strlen(vendor)))
+        printk("%s ", vendor);
+
+    if (!c->x86_model_id[0])
+        printk("%d86", c->x86);
+    else
+        printk("%s", c->x86_model_id);
+
+    if (c->x86_mask || c->cpuid_level >= 0) 
+        printk(" stepping %02x\n", c->x86_mask);
+    else
+        printk("\n");
+}
+
+/*
+ *     Get CPU information for use by the procfs.
+ */
+static int show_cpuinfo(struct seq_file *m, void *v)
+{
+    /* 
+     * These flag bits must match the definitions in <asm/cpufeature.h>.
+     * NULL means this bit is undefined or reserved; either way it doesn't
+     * have meaning as far as Linux is concerned.  Note that it's important
+     * to realize there is a difference between this table and CPUID -- if
+     * applications want to get the raw CPUID data, they should access
+     * /dev/cpu/<cpu_nr>/cpuid instead.
+        */
+    static char *x86_cap_flags[] = {
+        /* Intel-defined */
+        "fpu", "vme", "de", "pse", "tsc", "msr", "pae", "mce",
+        "cx8", "apic", NULL, "sep", "mtrr", "pge", "mca", "cmov",
+        "pat", "pse36", "pn", "clflush", NULL, "dts", "acpi", "mmx",
+        "fxsr", "sse", "sse2", "ss", NULL, "tm", "ia64", NULL,
+
+        /* AMD-defined */
+        NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+        NULL, NULL, NULL, "syscall", NULL, NULL, NULL, NULL,
+        NULL, NULL, NULL, NULL, NULL, NULL, "mmxext", NULL,
+        NULL, NULL, NULL, NULL, NULL, "lm", "3dnowext", "3dnow",
+
+        /* Transmeta-defined */
+        "recovery", "longrun", NULL, "lrti", NULL, NULL, NULL, NULL,
+        NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+        NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+        NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+
+        /* Other (Linux-defined) */
+        "cxmmx", "k6_mtrr", "cyrix_arr", "centaur_mcr", NULL, NULL, NULL, NULL,
+        NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+        NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+        NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+    };
+    struct cpuinfo_x86 *c = v;
+    int i, n = c - cpu_data;
+    int fpu_exception;
+
+#ifdef CONFIG_SMP
+    if (!(cpu_online_map & (1<<n)))
+        return 0;
+#endif
+    seq_printf(m, "processor\t: %d\n"
+               "vendor_id\t: %s\n"
+               "cpu family\t: %d\n"
+               "model\t\t: %d\n"
+               "model name\t: %s\n",
+               n,
+               c->x86_vendor_id[0] ? c->x86_vendor_id : "unknown",
+               c->x86,
+               c->x86_model,
+               c->x86_model_id[0] ? c->x86_model_id : "unknown");
+
+    if (c->x86_mask || c->cpuid_level >= 0)
+        seq_printf(m, "stepping\t: %d\n", c->x86_mask);
+    else
+        seq_printf(m, "stepping\t: unknown\n");
+
+    if ( test_bit(X86_FEATURE_TSC, &c->x86_capability) ) {
+        seq_printf(m, "cpu MHz\t\t: %lu.%03lu\n",
+                   cpu_khz / 1000, (cpu_khz % 1000));
+    }
+
+    /* Cache size */
+    if (c->x86_cache_size >= 0)
+        seq_printf(m, "cache size\t: %d KB\n", c->x86_cache_size);
+       
+       /* We use exception 16 if we have hardware math and we've either seen it or the CPU claims it is internal */
+    fpu_exception = c->hard_math && (ignore_irq13 || cpu_has_fpu);
+    seq_printf(m, "fdiv_bug\t: %s\n"
+               "hlt_bug\t\t: %s\n"
+               "f00f_bug\t: %s\n"
+               "coma_bug\t: %s\n"
+               "fpu\t\t: %s\n"
+               "fpu_exception\t: %s\n"
+               "cpuid level\t: %d\n"
+               "wp\t\t: %s\n"
+               "flags\t\t:",
+               c->fdiv_bug ? "yes" : "no",
+               c->hlt_works_ok ? "no" : "yes",
+               c->f00f_bug ? "yes" : "no",
+               c->coma_bug ? "yes" : "no",
+               c->hard_math ? "yes" : "no",
+               fpu_exception ? "yes" : "no",
+               c->cpuid_level,
+               c->wp_works_ok ? "yes" : "no");
+
+    for ( i = 0 ; i < 32*NCAPINTS ; i++ )
+        if ( test_bit(i, &c->x86_capability) &&
+             x86_cap_flags[i] != NULL )
+            seq_printf(m, " %s", x86_cap_flags[i]);
+
+    seq_printf(m, "\nbogomips\t: %lu.%02lu\n\n",
+               c->loops_per_jiffy/(500000/HZ),
+               (c->loops_per_jiffy/(5000/HZ)) % 100);
+    return 0;
+}
+
+static void *c_start(struct seq_file *m, loff_t *pos)
+{
+    return *pos < NR_CPUS ? cpu_data + *pos : NULL;
+}
+static void *c_next(struct seq_file *m, void *v, loff_t *pos)
+{
+    ++*pos;
+    return c_start(m, pos);
+}
+static void c_stop(struct seq_file *m, void *v)
+{
+}
+struct seq_operations cpuinfo_op = {
+    start:     c_start,
+    next:      c_next,
+    stop:      c_stop,
+    show:      show_cpuinfo,
+};
+
+unsigned long cpu_initialized __initdata = 0;
+
+/*
+ * cpu_init() initializes state that is per-CPU. Some data is already
+ * initialized (naturally) in the bootstrap process, such as the GDT
+ * and IDT. We reload them nevertheless, this function acts as a
+ * 'CPU state barrier', nothing should get across.
+ */
+void __init cpu_init (void)
+{
+    int nr = smp_processor_id();
+
+    if (test_and_set_bit(nr, &cpu_initialized)) {
+        printk(KERN_WARNING "CPU#%d already initialized!\n", nr);
+        for (;;) __sti();
+    }
+    printk(KERN_INFO "Initializing CPU#%d\n", nr);
+
+    /*
+     * set up and load the per-CPU TSS and LDT
+     */
+    atomic_inc(&init_mm.mm_count);
+    current->active_mm = &init_mm;
+    if(current->mm)
+        BUG();
+    enter_lazy_tlb(&init_mm, current, nr);
+
+    HYPERVISOR_set_guest_stack(__KERNEL_DS, current->thread.esp0);
+
+    /* Force FPU initialization. */
+    current->flags &= ~PF_USEDFPU;
+    current->used_math = 0;
+    stts();
+}
+
+
+/******************************************************************************
+ * Time-to-die callback handling.
+ */
+
+static void time_to_die(int irq, void *unused, struct pt_regs *regs)
+{
+    extern void ctrl_alt_del(void);
+    ctrl_alt_del();
+}
+
+static int __init setup_death_event(void)
+{
+    (void)request_irq(_EVENT_DIE, time_to_die, 0, "die", NULL);
+}
+
+__initcall(setup_death_event);
diff --git a/xenolinux-2.4.16-sparse/arch/xeno/kernel/signal.c b/xenolinux-2.4.16-sparse/arch/xeno/kernel/signal.c
new file mode 100644 (file)
index 0000000..c0f43e6
--- /dev/null
@@ -0,0 +1,712 @@
+/*
+ *  linux/arch/i386/kernel/signal.c
+ *
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ *
+ *  1997-11-28  Modified for POSIX.1b signals by Richard Henderson
+ *  2000-06-20  Pentium III FXSR, SSE support by Gareth Hughes
+ */
+
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/kernel.h>
+#include <linux/signal.h>
+#include <linux/errno.h>
+#include <linux/wait.h>
+#include <linux/ptrace.h>
+#include <linux/unistd.h>
+#include <linux/stddef.h>
+#include <linux/tty.h>
+#include <linux/personality.h>
+#include <asm/ucontext.h>
+#include <asm/uaccess.h>
+#include <asm/i387.h>
+
+#define DEBUG_SIG 0
+
+#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
+
+int FASTCALL(do_signal(struct pt_regs *regs, sigset_t *oldset));
+
+int copy_siginfo_to_user(siginfo_t *to, siginfo_t *from)
+{
+       if (!access_ok (VERIFY_WRITE, to, sizeof(siginfo_t)))
+               return -EFAULT;
+       if (from->si_code < 0)
+               return __copy_to_user(to, from, sizeof(siginfo_t));
+       else {
+               int err;
+
+               /* If you change siginfo_t structure, please be sure
+                  this code is fixed accordingly.
+                  It should never copy any pad contained in the structure
+                  to avoid security leaks, but must copy the generic
+                  3 ints plus the relevant union member.  */
+               err = __put_user(from->si_signo, &to->si_signo);
+               err |= __put_user(from->si_errno, &to->si_errno);
+               err |= __put_user((short)from->si_code, &to->si_code);
+               /* First 32bits of unions are always present.  */
+               err |= __put_user(from->si_pid, &to->si_pid);
+               switch (from->si_code >> 16) {
+               case __SI_FAULT >> 16:
+                       break;
+               case __SI_CHLD >> 16:
+                       err |= __put_user(from->si_utime, &to->si_utime);
+                       err |= __put_user(from->si_stime, &to->si_stime);
+                       err |= __put_user(from->si_status, &to->si_status);
+               default:
+                       err |= __put_user(from->si_uid, &to->si_uid);
+                       break;
+               /* case __SI_RT: This is not generated by the kernel as of now.  */
+               }
+               return err;
+       }
+}
+
+/*
+ * Atomically swap in the new signal mask, and wait for a signal.
+ */
+asmlinkage int
+sys_sigsuspend(int history0, int history1, old_sigset_t mask)
+{
+       struct pt_regs * regs = (struct pt_regs *) &history0;
+       sigset_t saveset;
+
+       mask &= _BLOCKABLE;
+       spin_lock_irq(&current->sigmask_lock);
+       saveset = current->blocked;
+       siginitset(&current->blocked, mask);
+       recalc_sigpending(current);
+       spin_unlock_irq(&current->sigmask_lock);
+
+       regs->eax = -EINTR;
+       while (1) {
+               current->state = TASK_INTERRUPTIBLE;
+               schedule();
+               if (do_signal(regs, &saveset))
+                       return -EINTR;
+       }
+}
+
+asmlinkage int
+sys_rt_sigsuspend(sigset_t *unewset, size_t sigsetsize)
+{
+       struct pt_regs * regs = (struct pt_regs *) &unewset;
+       sigset_t saveset, newset;
+
+       /* XXX: Don't preclude handling different sized sigset_t's.  */
+       if (sigsetsize != sizeof(sigset_t))
+               return -EINVAL;
+
+       if (copy_from_user(&newset, unewset, sizeof(newset)))
+               return -EFAULT;
+       sigdelsetmask(&newset, ~_BLOCKABLE);
+
+       spin_lock_irq(&current->sigmask_lock);
+       saveset = current->blocked;
+       current->blocked = newset;
+       recalc_sigpending(current);
+       spin_unlock_irq(&current->sigmask_lock);
+
+       regs->eax = -EINTR;
+       while (1) {
+               current->state = TASK_INTERRUPTIBLE;
+               schedule();
+               if (do_signal(regs, &saveset))
+                       return -EINTR;
+       }
+}
+
+asmlinkage int 
+sys_sigaction(int sig, const struct old_sigaction *act,
+             struct old_sigaction *oact)
+{
+       struct k_sigaction new_ka, old_ka;
+       int ret;
+
+       if (act) {
+               old_sigset_t mask;
+               if (verify_area(VERIFY_READ, act, sizeof(*act)) ||
+                   __get_user(new_ka.sa.sa_handler, &act->sa_handler) ||
+                   __get_user(new_ka.sa.sa_restorer, &act->sa_restorer))
+                       return -EFAULT;
+               __get_user(new_ka.sa.sa_flags, &act->sa_flags);
+               __get_user(mask, &act->sa_mask);
+               siginitset(&new_ka.sa.sa_mask, mask);
+       }
+
+       ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL);
+
+       if (!ret && oact) {
+               if (verify_area(VERIFY_WRITE, oact, sizeof(*oact)) ||
+                   __put_user(old_ka.sa.sa_handler, &oact->sa_handler) ||
+                   __put_user(old_ka.sa.sa_restorer, &oact->sa_restorer))
+                       return -EFAULT;
+               __put_user(old_ka.sa.sa_flags, &oact->sa_flags);
+               __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask);
+       }
+
+       return ret;
+}
+
+asmlinkage int
+sys_sigaltstack(const stack_t *uss, stack_t *uoss)
+{
+       struct pt_regs *regs = (struct pt_regs *) &uss;
+       return do_sigaltstack(uss, uoss, regs->esp);
+}
+
+
+/*
+ * Do a signal return; undo the signal stack.
+ */
+
+struct sigframe
+{
+       char *pretcode;
+       int sig;
+       struct sigcontext sc;
+       struct _fpstate fpstate;
+       unsigned long extramask[_NSIG_WORDS-1];
+       char retcode[8];
+};
+
+struct rt_sigframe
+{
+       char *pretcode;
+       int sig;
+       struct siginfo *pinfo;
+       void *puc;
+       struct siginfo info;
+       struct ucontext uc;
+       struct _fpstate fpstate;
+       char retcode[8];
+};
+
+static int
+restore_sigcontext(struct pt_regs *regs, struct sigcontext *sc, int *peax)
+{
+       unsigned int err = 0;
+
+#define COPY(x)                err |= __get_user(regs->x, &sc->x)
+
+#define COPY_SEG(seg)                                                  \
+       { unsigned short tmp;                                           \
+         err |= __get_user(tmp, &sc->seg);                             \
+         regs->x##seg = tmp; }
+
+#define COPY_SEG_STRICT(seg)                                           \
+       { unsigned short tmp;                                           \
+         err |= __get_user(tmp, &sc->seg);                             \
+         regs->x##seg = tmp|3; }
+
+#define GET_SEG(seg)                                                   \
+       { unsigned short tmp;                                           \
+         err |= __get_user(tmp, &sc->seg);                             \
+         loadsegment(seg,tmp); }
+
+       GET_SEG(gs);
+       GET_SEG(fs);
+       COPY_SEG(es);
+       COPY_SEG(ds);
+       COPY(edi);
+       COPY(esi);
+       COPY(ebp);
+       COPY(esp);
+       COPY(ebx);
+       COPY(edx);
+       COPY(ecx);
+       COPY(eip);
+       COPY_SEG_STRICT(cs);
+       COPY_SEG_STRICT(ss);
+       
+       {
+               unsigned int tmpflags;
+               err |= __get_user(tmpflags, &sc->eflags);
+               regs->eflags = (regs->eflags & ~0x40DD5) | (tmpflags & 0x40DD5);
+               regs->orig_eax = -1;            /* disable syscall checks */
+       }
+
+       {
+               struct _fpstate * buf;
+               err |= __get_user(buf, &sc->fpstate);
+               if (buf) {
+                       if (verify_area(VERIFY_READ, buf, sizeof(*buf)))
+                               goto badframe;
+                       err |= restore_i387(buf);
+               }
+       }
+
+       err |= __get_user(*peax, &sc->eax);
+       return err;
+
+badframe:
+       return 1;
+}
+
+asmlinkage int sys_sigreturn(unsigned long __unused)
+{
+       struct pt_regs *regs = (struct pt_regs *) &__unused;
+       struct sigframe *frame = (struct sigframe *)(regs->esp - 8);
+       sigset_t set;
+       int eax;
+
+       if (verify_area(VERIFY_READ, frame, sizeof(*frame)))
+               goto badframe;
+       if (__get_user(set.sig[0], &frame->sc.oldmask)
+           || (_NSIG_WORDS > 1
+               && __copy_from_user(&set.sig[1], &frame->extramask,
+                                   sizeof(frame->extramask))))
+               goto badframe;
+
+       sigdelsetmask(&set, ~_BLOCKABLE);
+       spin_lock_irq(&current->sigmask_lock);
+       current->blocked = set;
+       recalc_sigpending(current);
+       spin_unlock_irq(&current->sigmask_lock);
+       
+       if (restore_sigcontext(regs, &frame->sc, &eax))
+               goto badframe;
+       return eax;
+
+badframe:
+       force_sig(SIGSEGV, current);
+       return 0;
+}      
+
+asmlinkage int sys_rt_sigreturn(unsigned long __unused)
+{
+       struct pt_regs *regs = (struct pt_regs *) &__unused;
+       struct rt_sigframe *frame = (struct rt_sigframe *)(regs->esp - 4);
+       sigset_t set;
+       stack_t st;
+       int eax;
+
+       if (verify_area(VERIFY_READ, frame, sizeof(*frame)))
+               goto badframe;
+       if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
+               goto badframe;
+
+       sigdelsetmask(&set, ~_BLOCKABLE);
+       spin_lock_irq(&current->sigmask_lock);
+       current->blocked = set;
+       recalc_sigpending(current);
+       spin_unlock_irq(&current->sigmask_lock);
+       
+       if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &eax))
+               goto badframe;
+
+       if (__copy_from_user(&st, &frame->uc.uc_stack, sizeof(st)))
+               goto badframe;
+       /* It is more difficult to avoid calling this function than to
+          call it and ignore errors.  */
+       do_sigaltstack(&st, NULL, regs->esp);
+
+       return eax;
+
+badframe:
+       force_sig(SIGSEGV, current);
+       return 0;
+}      
+
+/*
+ * Set up a signal frame.
+ */
+
+static int
+setup_sigcontext(struct sigcontext *sc, struct _fpstate *fpstate,
+                struct pt_regs *regs, unsigned long mask)
+{
+       int tmp, err = 0;
+
+       tmp = 0;
+       __asm__("movl %%gs,%0" : "=r"(tmp): "0"(tmp));
+       err |= __put_user(tmp, (unsigned int *)&sc->gs);
+       __asm__("movl %%fs,%0" : "=r"(tmp): "0"(tmp));
+       err |= __put_user(tmp, (unsigned int *)&sc->fs);
+
+       err |= __put_user(regs->xes, (unsigned int *)&sc->es);
+       err |= __put_user(regs->xds, (unsigned int *)&sc->ds);
+       err |= __put_user(regs->edi, &sc->edi);
+       err |= __put_user(regs->esi, &sc->esi);
+       err |= __put_user(regs->ebp, &sc->ebp);
+       err |= __put_user(regs->esp, &sc->esp);
+       err |= __put_user(regs->ebx, &sc->ebx);
+       err |= __put_user(regs->edx, &sc->edx);
+       err |= __put_user(regs->ecx, &sc->ecx);
+       err |= __put_user(regs->eax, &sc->eax);
+       err |= __put_user(current->thread.trap_no, &sc->trapno);
+       err |= __put_user(current->thread.error_code, &sc->err);
+       err |= __put_user(regs->eip, &sc->eip);
+       err |= __put_user(regs->xcs, (unsigned int *)&sc->cs);
+       err |= __put_user(regs->eflags, &sc->eflags);
+       err |= __put_user(regs->esp, &sc->esp_at_signal);
+       err |= __put_user(regs->xss, (unsigned int *)&sc->ss);
+
+       tmp = save_i387(fpstate);
+       if (tmp < 0)
+         err = 1;
+       else
+         err |= __put_user(tmp ? fpstate : NULL, &sc->fpstate);
+
+       /* non-iBCS2 extensions.. */
+       err |= __put_user(mask, &sc->oldmask);
+       err |= __put_user(current->thread.cr2, &sc->cr2);
+
+       return err;
+}
+
+/*
+ * Determine which stack to use..
+ */
+static inline void *
+get_sigframe(struct k_sigaction *ka, struct pt_regs * regs, size_t frame_size)
+{
+       unsigned long esp;
+
+       /* Default to using normal stack */
+       esp = regs->esp;
+
+       /* This is the X/Open sanctioned signal stack switching.  */
+       if (ka->sa.sa_flags & SA_ONSTACK) {
+               if (sas_ss_flags(esp) == 0)
+                       esp = current->sas_ss_sp + current->sas_ss_size;
+       }
+
+       /* This is the legacy signal stack switching. */
+       else if ((regs->xss & 0xffff) != __USER_DS &&
+                !(ka->sa.sa_flags & SA_RESTORER) &&
+                ka->sa.sa_restorer) {
+               esp = (unsigned long) ka->sa.sa_restorer;
+       }
+
+       return (void *)((esp - frame_size) & -8ul);
+}
+
+static void setup_frame(int sig, struct k_sigaction *ka,
+                       sigset_t *set, struct pt_regs * regs)
+{
+       struct sigframe *frame;
+       int err = 0;
+
+       frame = get_sigframe(ka, regs, sizeof(*frame));
+
+       if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
+               goto give_sigsegv;
+
+       err |= __put_user((current->exec_domain
+                          && current->exec_domain->signal_invmap
+                          && sig < 32
+                          ? current->exec_domain->signal_invmap[sig]
+                          : sig),
+                         &frame->sig);
+       if (err)
+               goto give_sigsegv;
+
+       err |= setup_sigcontext(&frame->sc, &frame->fpstate, regs, set->sig[0]);
+       if (err)
+               goto give_sigsegv;
+
+       if (_NSIG_WORDS > 1) {
+               err |= __copy_to_user(frame->extramask, &set->sig[1],
+                                     sizeof(frame->extramask));
+       }
+       if (err)
+               goto give_sigsegv;
+
+       /* Set up to return from userspace.  If provided, use a stub
+          already in userspace.  */
+       if (ka->sa.sa_flags & SA_RESTORER) {
+               err |= __put_user(ka->sa.sa_restorer, &frame->pretcode);
+       } else {
+               err |= __put_user(frame->retcode, &frame->pretcode);
+               /* This is popl %eax ; movl $,%eax ; int $0x80 */
+               err |= __put_user(0xb858, (short *)(frame->retcode+0));
+               err |= __put_user(__NR_sigreturn, (int *)(frame->retcode+2));
+               err |= __put_user(0x80cd, (short *)(frame->retcode+6));
+       }
+
+       if (err)
+               goto give_sigsegv;
+
+       /* Set up registers for signal handler */
+       regs->esp = (unsigned long) frame;
+       regs->eip = (unsigned long) ka->sa.sa_handler;
+
+       set_fs(USER_DS);
+       regs->xds = __USER_DS;
+       regs->xes = __USER_DS;
+       regs->xss = __USER_DS;
+       regs->xcs = __USER_CS;
+       regs->eflags &= ~TF_MASK;
+
+#if DEBUG_SIG
+       printk("SIG deliver (%s:%d): sp=%p pc=%p ra=%p\n",
+               current->comm, current->pid, frame, regs->eip, frame->pretcode);
+#endif
+
+       return;
+
+give_sigsegv:
+       if (sig == SIGSEGV)
+               ka->sa.sa_handler = SIG_DFL;
+       force_sig(SIGSEGV, current);
+}
+
+static void setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
+                          sigset_t *set, struct pt_regs * regs)
+{
+       struct rt_sigframe *frame;
+       int err = 0;
+
+       frame = get_sigframe(ka, regs, sizeof(*frame));
+
+       if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
+               goto give_sigsegv;
+
+       err |= __put_user((current->exec_domain
+                          && current->exec_domain->signal_invmap
+                          && sig < 32
+                          ? current->exec_domain->signal_invmap[sig]
+                          : sig),
+                         &frame->sig);
+       err |= __put_user(&frame->info, &frame->pinfo);
+       err |= __put_user(&frame->uc, &frame->puc);
+       err |= copy_siginfo_to_user(&frame->info, info);
+       if (err)
+               goto give_sigsegv;
+
+       /* Create the ucontext.  */
+       err |= __put_user(0, &frame->uc.uc_flags);
+       err |= __put_user(0, &frame->uc.uc_link);
+       err |= __put_user(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
+       err |= __put_user(sas_ss_flags(regs->esp),
+                         &frame->uc.uc_stack.ss_flags);
+       err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
+       err |= setup_sigcontext(&frame->uc.uc_mcontext, &frame->fpstate,
+                               regs, set->sig[0]);
+       err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
+       if (err)
+               goto give_sigsegv;
+
+       /* Set up to return from userspace.  If provided, use a stub
+          already in userspace.  */
+       if (ka->sa.sa_flags & SA_RESTORER) {
+               err |= __put_user(ka->sa.sa_restorer, &frame->pretcode);
+       } else {
+               err |= __put_user(frame->retcode, &frame->pretcode);
+               /* This is movl $,%eax ; int $0x80 */
+               err |= __put_user(0xb8, (char *)(frame->retcode+0));
+               err |= __put_user(__NR_rt_sigreturn, (int *)(frame->retcode+1));
+               err |= __put_user(0x80cd, (short *)(frame->retcode+5));
+       }
+
+       if (err)
+               goto give_sigsegv;
+
+       /* Set up registers for signal handler */
+       regs->esp = (unsigned long) frame;
+       regs->eip = (unsigned long) ka->sa.sa_handler;
+
+       set_fs(USER_DS);
+       regs->xds = __USER_DS;
+       regs->xes = __USER_DS;
+       regs->xss = __USER_DS;
+       regs->xcs = __USER_CS;
+       regs->eflags &= ~TF_MASK;
+
+#if DEBUG_SIG
+       printk("SIG deliver (%s:%d): sp=%p pc=%p ra=%p\n",
+               current->comm, current->pid, frame, regs->eip, frame->pretcode);
+#endif
+
+       return;
+
+give_sigsegv:
+       if (sig == SIGSEGV)
+               ka->sa.sa_handler = SIG_DFL;
+       force_sig(SIGSEGV, current);
+}
+
+/*
+ * OK, we're invoking a handler
+ */    
+
+static void
+handle_signal(unsigned long sig, struct k_sigaction *ka,
+             siginfo_t *info, sigset_t *oldset, struct pt_regs * regs)
+{
+       /* Are we from a system call? */
+       if (regs->orig_eax >= 0) {
+               /* If so, check system call restarting.. */
+               switch (regs->eax) {
+                       case -ERESTARTNOHAND:
+                               regs->eax = -EINTR;
+                               break;
+
+                       case -ERESTARTSYS:
+                               if (!(ka->sa.sa_flags & SA_RESTART)) {
+                                       regs->eax = -EINTR;
+                                       break;
+                               }
+                       /* fallthrough */
+                       case -ERESTARTNOINTR:
+                               regs->eax = regs->orig_eax;
+                               regs->eip -= 2;
+               }
+       }
+
+       /* Set up the stack frame */
+       if (ka->sa.sa_flags & SA_SIGINFO)
+               setup_rt_frame(sig, ka, info, oldset, regs);
+       else
+               setup_frame(sig, ka, oldset, regs);
+
+       if (ka->sa.sa_flags & SA_ONESHOT)
+               ka->sa.sa_handler = SIG_DFL;
+
+       if (!(ka->sa.sa_flags & SA_NODEFER)) {
+               spin_lock_irq(&current->sigmask_lock);
+               sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
+               sigaddset(&current->blocked,sig);
+               recalc_sigpending(current);
+               spin_unlock_irq(&current->sigmask_lock);
+       }
+}
+
+/*
+ * Note that 'init' is a special process: it doesn't get signals it doesn't
+ * want to handle. Thus you cannot kill init even with a SIGKILL even by
+ * mistake.
+ */
+int do_signal(struct pt_regs *regs, sigset_t *oldset)
+{
+       siginfo_t info;
+       struct k_sigaction *ka;
+
+       /*
+        * We want the common case to go fast, which
+        * is why we may in certain cases get here from
+        * kernel mode. Just return without doing anything
+        * if so.
+        */
+       if ((regs->xcs & 2) != 2)
+               return 1;
+
+       if (!oldset)
+               oldset = &current->blocked;
+
+       for (;;) {
+               unsigned long signr;
+
+               spin_lock_irq(&current->sigmask_lock);
+               signr = dequeue_signal(&current->blocked, &info);
+               spin_unlock_irq(&current->sigmask_lock);
+
+               if (!signr)
+                       break;
+
+               if ((current->ptrace & PT_PTRACED) && signr != SIGKILL) {
+                       /* Let the debugger run.  */
+                       current->exit_code = signr;
+                       current->state = TASK_STOPPED;
+                       notify_parent(current, SIGCHLD);
+                       schedule();
+
+                       /* We're back.  Did the debugger cancel the sig?  */
+                       if (!(signr = current->exit_code))
+                               continue;
+                       current->exit_code = 0;
+
+                       /* The debugger continued.  Ignore SIGSTOP.  */
+                       if (signr == SIGSTOP)
+                               continue;
+
+                       /* Update the siginfo structure.  Is this good?  */
+                       if (signr != info.si_signo) {
+                               info.si_signo = signr;
+                               info.si_errno = 0;
+                               info.si_code = SI_USER;
+                               info.si_pid = current->p_pptr->pid;
+                               info.si_uid = current->p_pptr->uid;
+                       }
+
+                       /* If the (new) signal is now blocked, requeue it.  */
+                       if (sigismember(&current->blocked, signr)) {
+                               send_sig_info(signr, &info, current);
+                               continue;
+                       }
+               }
+
+               ka = &current->sig->action[signr-1];
+               if (ka->sa.sa_handler == SIG_IGN) {
+                       if (signr != SIGCHLD)
+                               continue;
+                       /* Check for SIGCHLD: it's special.  */
+                       while (sys_wait4(-1, NULL, WNOHANG, NULL) > 0)
+                               /* nothing */;
+                       continue;
+               }
+
+               if (ka->sa.sa_handler == SIG_DFL) {
+                       int exit_code = signr;
+
+                       /* Init gets no signals it doesn't want.  */
+                       if (current->pid == 1)
+                               continue;
+
+                       switch (signr) {
+                       case SIGCONT: case SIGCHLD: case SIGWINCH:
+                               continue;
+
+                       case SIGTSTP: case SIGTTIN: case SIGTTOU:
+                               if (is_orphaned_pgrp(current->pgrp))
+                                       continue;
+                               /* FALLTHRU */
+
+                       case SIGSTOP: {
+                               struct signal_struct *sig;
+                               current->state = TASK_STOPPED;
+                               current->exit_code = signr;
+                               sig = current->p_pptr->sig;
+                               if (sig && !(sig->action[SIGCHLD-1].sa.sa_flags & SA_NOCLDSTOP))
+                                       notify_parent(current, SIGCHLD);
+                               schedule();
+                               continue;
+                       }
+
+                       case SIGQUIT: case SIGILL: case SIGTRAP:
+                       case SIGABRT: case SIGFPE: case SIGSEGV:
+                       case SIGBUS: case SIGSYS: case SIGXCPU: case SIGXFSZ:
+                               if (do_coredump(signr, regs))
+                                       exit_code |= 0x80;
+                               /* FALLTHRU */
+
+                       default:
+                               sigaddset(&current->pending.signal, signr);
+                               recalc_sigpending(current);
+                               current->flags |= PF_SIGNALED;
+                               do_exit(exit_code);
+                               /* NOTREACHED */
+                       }
+               }
+
+               /* Whee!  Actually deliver the signal.  */
+               handle_signal(signr, ka, &info, oldset, regs);
+               return 1;
+       }
+
+       /* Did we come from a system call? */
+       if (regs->orig_eax >= 0) {
+               /* Restart the system call - no handlers present */
+               if (regs->eax == -ERESTARTNOHAND ||
+                   regs->eax == -ERESTARTSYS ||
+                   regs->eax == -ERESTARTNOINTR) {
+                       regs->eax = regs->orig_eax;
+                       regs->eip -= 2;
+               }
+       }
+       return 0;
+}
diff --git a/xenolinux-2.4.16-sparse/arch/xeno/kernel/sys_i386.c b/xenolinux-2.4.16-sparse/arch/xeno/kernel/sys_i386.c
new file mode 100644 (file)
index 0000000..5fd6910
--- /dev/null
@@ -0,0 +1,256 @@
+/*
+ * linux/arch/i386/kernel/sys_i386.c
+ *
+ * This file contains various random system calls that
+ * have a non-standard calling sequence on the Linux/i386
+ * platform.
+ */
+
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/sem.h>
+#include <linux/msg.h>
+#include <linux/shm.h>
+#include <linux/stat.h>
+#include <linux/mman.h>
+#include <linux/file.h>
+#include <linux/utsname.h>
+
+#include <asm/uaccess.h>
+#include <asm/ipc.h>
+
+/*
+ * sys_pipe() is the normal C calling standard for creating
+ * a pipe. It's not the way Unix traditionally does this, though.
+ */
+asmlinkage int sys_pipe(unsigned long * fildes)
+{
+       int fd[2];
+       int error;
+
+       error = do_pipe(fd);
+       if (!error) {
+               if (copy_to_user(fildes, fd, 2*sizeof(int)))
+                       error = -EFAULT;
+       }
+       return error;
+}
+
+/* common code for old and new mmaps */
+static inline long do_mmap2(
+       unsigned long addr, unsigned long len,
+       unsigned long prot, unsigned long flags,
+       unsigned long fd, unsigned long pgoff)
+{
+       int error = -EBADF;
+       struct file * file = NULL;
+
+       flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
+       if (!(flags & MAP_ANONYMOUS)) {
+               file = fget(fd);
+               if (!file)
+                       goto out;
+       }
+
+       down_write(&current->mm->mmap_sem);
+       error = do_mmap_pgoff(file, addr, len, prot, flags, pgoff);
+       up_write(&current->mm->mmap_sem);
+
+       if (file)
+               fput(file);
+out:
+       return error;
+}
+
+asmlinkage long sys_mmap2(unsigned long addr, unsigned long len,
+       unsigned long prot, unsigned long flags,
+       unsigned long fd, unsigned long pgoff)
+{
+       return do_mmap2(addr, len, prot, flags, fd, pgoff);
+}
+
+/*
+ * Perform the select(nd, in, out, ex, tv) and mmap() system
+ * calls. Linux/i386 didn't use to be able to handle more than
+ * 4 system call parameters, so these system calls used a memory
+ * block for parameter passing..
+ */
+
+struct mmap_arg_struct {
+       unsigned long addr;
+       unsigned long len;
+       unsigned long prot;
+       unsigned long flags;
+       unsigned long fd;
+       unsigned long offset;
+};
+
+asmlinkage int old_mmap(struct mmap_arg_struct *arg)
+{
+       struct mmap_arg_struct a;
+       int err = -EFAULT;
+
+       if (copy_from_user(&a, arg, sizeof(a)))
+               goto out;
+
+       err = -EINVAL;
+       if (a.offset & ~PAGE_MASK)
+               goto out;
+
+       err = do_mmap2(a.addr, a.len, a.prot, a.flags, a.fd, a.offset >> PAGE_SHIFT);
+out:
+       return err;
+}
+
+
+extern asmlinkage int sys_select(int, fd_set *, fd_set *, fd_set *, struct timeval *);
+
+struct sel_arg_struct {
+       unsigned long n;
+       fd_set *inp, *outp, *exp;
+       struct timeval *tvp;
+};
+
+asmlinkage int old_select(struct sel_arg_struct *arg)
+{
+       struct sel_arg_struct a;
+
+       if (copy_from_user(&a, arg, sizeof(a)))
+               return -EFAULT;
+       /* sys_select() does the appropriate kernel locking */
+       return sys_select(a.n, a.inp, a.outp, a.exp, a.tvp);
+}
+
+/*
+ * sys_ipc() is the de-multiplexer for the SysV IPC calls..
+ *
+ * This is really horribly ugly.
+ */
+asmlinkage int sys_ipc (uint call, int first, int second,
+                       int third, void *ptr, long fifth)
+{
+       int version, ret;
+
+       version = call >> 16; /* hack for backward compatibility */
+       call &= 0xffff;
+
+       switch (call) {
+       case SEMOP:
+               return sys_semop (first, (struct sembuf *)ptr, second);
+       case SEMGET:
+               return sys_semget (first, second, third);
+       case SEMCTL: {
+               union semun fourth;
+               if (!ptr)
+                       return -EINVAL;
+               if (get_user(fourth.__pad, (void **) ptr))
+                       return -EFAULT;
+               return sys_semctl (first, second, third, fourth);
+       }
+
+       case MSGSND:
+               return sys_msgsnd (first, (struct msgbuf *) ptr, 
+                                  second, third);
+       case MSGRCV:
+               switch (version) {
+               case 0: {
+                       struct ipc_kludge tmp;
+                       if (!ptr)
+                               return -EINVAL;
+                       
+                       if (copy_from_user(&tmp,
+                                          (struct ipc_kludge *) ptr, 
+                                          sizeof (tmp)))
+                               return -EFAULT;
+                       return sys_msgrcv (first, tmp.msgp, second,
+                                          tmp.msgtyp, third);
+               }
+               default:
+                       return sys_msgrcv (first,
+                                          (struct msgbuf *) ptr,
+                                          second, fifth, third);
+               }
+       case MSGGET:
+               return sys_msgget ((key_t) first, second);
+       case MSGCTL:
+               return sys_msgctl (first, second, (struct msqid_ds *) ptr);
+
+       case SHMAT:
+               switch (version) {
+               default: {
+                       ulong raddr;
+                       ret = sys_shmat (first, (char *) ptr, second, &raddr);
+                       if (ret)
+                               return ret;
+                       return put_user (raddr, (ulong *) third);
+               }
+               case 1: /* iBCS2 emulator entry point */
+                       if (!segment_eq(get_fs(), get_ds()))
+                               return -EINVAL;
+                       return sys_shmat (first, (char *) ptr, second, (ulong *) third);
+               }
+       case SHMDT: 
+               return sys_shmdt ((char *)ptr);
+       case SHMGET:
+               return sys_shmget (first, second, third);
+       case SHMCTL:
+               return sys_shmctl (first, second,
+                                  (struct shmid_ds *) ptr);
+       default:
+               return -EINVAL;
+       }
+}
+
+/*
+ * Old cruft
+ */
+asmlinkage int sys_uname(struct old_utsname * name)
+{
+       int err;
+       if (!name)
+               return -EFAULT;
+       down_read(&uts_sem);
+       err=copy_to_user(name, &system_utsname, sizeof (*name));
+       up_read(&uts_sem);
+       return err?-EFAULT:0;
+}
+
+asmlinkage int sys_olduname(struct oldold_utsname * name)
+{
+       int error;
+
+       if (!name)
+               return -EFAULT;
+       if (!access_ok(VERIFY_WRITE,name,sizeof(struct oldold_utsname)))
+               return -EFAULT;
+  
+       down_read(&uts_sem);
+       
+       error = __copy_to_user(&name->sysname,&system_utsname.sysname,__OLD_UTS_LEN);
+       error |= __put_user(0,name->sysname+__OLD_UTS_LEN);
+       error |= __copy_to_user(&name->nodename,&system_utsname.nodename,__OLD_UTS_LEN);
+       error |= __put_user(0,name->nodename+__OLD_UTS_LEN);
+       error |= __copy_to_user(&name->release,&system_utsname.release,__OLD_UTS_LEN);
+       error |= __put_user(0,name->release+__OLD_UTS_LEN);
+       error |= __copy_to_user(&name->version,&system_utsname.version,__OLD_UTS_LEN);
+       error |= __put_user(0,name->version+__OLD_UTS_LEN);
+       error |= __copy_to_user(&name->machine,&system_utsname.machine,__OLD_UTS_LEN);
+       error |= __put_user(0,name->machine+__OLD_UTS_LEN);
+       
+       up_read(&uts_sem);
+       
+       error = error ? -EFAULT : 0;
+
+       return error;
+}
+
+asmlinkage int sys_pause(void)
+{
+       current->state = TASK_INTERRUPTIBLE;
+       schedule();
+       return -ERESTARTNOHAND;
+}
+
diff --git a/xenolinux-2.4.16-sparse/arch/xeno/kernel/time.c b/xenolinux-2.4.16-sparse/arch/xeno/kernel/time.c
new file mode 100644 (file)
index 0000000..1e331e5
--- /dev/null
@@ -0,0 +1,305 @@
+/*
+ *  linux/arch/i386/kernel/time.c
+ *
+ *  Copyright (C) 1991, 1992, 1995  Linus Torvalds
+ *
+ * This file contains the PC-specific time handling details:
+ * reading the RTC at bootup, etc..
+ * 1994-07-02    Alan Modra
+ *     fixed set_rtc_mmss, fixed time.year for >= 2000, new mktime
+ * 1995-03-26    Markus Kuhn
+ *      fixed 500 ms bug at call to set_rtc_mmss, fixed DS12887
+ *      precision CMOS clock update
+ * 1996-05-03    Ingo Molnar
+ *      fixed time warps in do_[slow|fast]_gettimeoffset()
+ * 1997-09-10  Updated NTP code according to technical memorandum Jan '96
+ *             "A Kernel Model for Precision Timekeeping" by Dave Mills
+ * 1998-09-05    (Various)
+ *     More robust do_fast_gettimeoffset() algorithm implemented
+ *     (works with APM, Cyrix 6x86MX and Centaur C6),
+ *     monotonic gettimeofday() with fast_get_timeoffset(),
+ *     drift-proof precision TSC calibration on boot
+ *     (C. Scott Ananian <cananian@alumni.princeton.edu>, Andrew D.
+ *     Balsa <andrebalsa@altern.org>, Philip Gladstone <philip@raptor.com>;
+ *     ported from 2.0.35 Jumbo-9 by Michael Krause <m.krause@tu-harburg.de>).
+ * 1998-12-16    Andrea Arcangeli
+ *     Fixed Jumbo-9 code in 2.1.131: do_gettimeofday was missing 1 jiffy
+ *     because was not accounting lost_ticks.
+ * 1998-12-24 Copyright (C) 1998  Andrea Arcangeli
+ *     Fixed a xtime SMP race (we need the xtime_lock rw spinlock to
+ *     serialize accesses to xtime/lost_ticks).
+ */
+
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/param.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/time.h>
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/smp.h>
+
+#include <asm/io.h>
+#include <asm/smp.h>
+#include <asm/irq.h>
+#include <asm/msr.h>
+#include <asm/delay.h>
+#include <asm/mpspec.h>
+#include <asm/uaccess.h>
+#include <asm/processor.h>
+
+#include <linux/mc146818rtc.h>
+#include <linux/timex.h>
+#include <linux/config.h>
+
+#include <asm/fixmap.h>
+#include <asm/hypervisor.h>
+
+#include <linux/irq.h>
+
+
+unsigned long cpu_khz; /* Detected as we calibrate the TSC */
+
+/* Cached *multiplier* to convert TSC counts to microseconds.
+ * (see the equation below).
+ * Equal to 2^32 * (1 / (clocks per usec) ).
+ * Initialized in time_init.
+ */
+unsigned long fast_gettimeoffset_quotient;
+
+extern rwlock_t xtime_lock;
+extern unsigned long wall_jiffies;
+
+spinlock_t rtc_lock = SPIN_LOCK_UNLOCKED;
+
+static inline unsigned long ticks_to_secs(unsigned long long ticks)
+{
+    unsigned long lo, hi;
+    unsigned long little_ticks;
+
+    little_ticks = ticks /* XXX URK! XXX / 1000000ULL */;
+
+    __asm__ __volatile__ (
+        "mull %2"
+        : "=a" (lo), "=d" (hi)
+        : "rm" (fast_gettimeoffset_quotient), "0" (little_ticks) );
+
+    return(hi);
+}
+
+/* NB. Only 32 bits of ticks are considered here. */
+static inline unsigned long ticks_to_us(unsigned long ticks)
+{
+    unsigned long lo, hi;
+
+    __asm__ __volatile__ (
+        "mull %2"
+        : "=a" (lo), "=d" (hi)
+        : "rm" (fast_gettimeoffset_quotient), "0" (ticks) );
+
+    return(hi);
+}
+
+static inline unsigned long do_gettimeoffset(void)
+{
+#if 0
+    register unsigned long eax, edx;
+
+    /* Read the Time Stamp Counter */
+
+    rdtsc(eax,edx);
+
+    /* .. relative to previous jiffy (32 bits is enough) */
+    eax -= last_tsc_low;       /* tsc_low delta */
+
+    /*
+     * Time offset = (tsc_low delta) * fast_gettimeoffset_quotient
+     *             = (tsc_low delta) * (usecs_per_clock)
+     *             = (tsc_low delta) * (usecs_per_jiffy / clocks_per_jiffy)
+     *
+     * Using a mull instead of a divl saves up to 31 clock cycles
+     * in the critical path.
+     */
+    
+    edx = ticks_to_us(eax);
+
+    /* our adjusted time offset in microseconds */
+    return delay_at_last_interrupt + edx;
+#else
+    /*
+     * We should keep a 'last_tsc_low' thing which incorporates 
+     * delay_at_last_interrupt, adjusted in timer_interrupt after
+     * do_timer_interrupt. It would look at change in xtime, and
+     * make appropriate adjustment to a last_tsc variable.
+     * 
+     * We'd be affected by rounding error in ticks_per_usec, and by
+     * processor clock drift (which should be no more than in an
+     * external interrupt source anyhow). 
+     * 
+     * Perhaps a bit rough and ready, but never mind!
+     */
+    return 0;
+#endif
+}
+
+/*
+ * This version of gettimeofday has microsecond resolution
+ * and better than microsecond precision on fast x86 machines with TSC.
+ */
+void do_gettimeofday(struct timeval *tv)
+{
+    unsigned long flags;
+    unsigned long usec, sec, lost;
+
+    read_lock_irqsave(&xtime_lock, flags);
+    usec = do_gettimeoffset();
+    lost = jiffies - wall_jiffies;
+    if ( lost != 0 ) usec += lost * (1000000 / HZ);
+    sec = xtime.tv_sec;
+    usec += xtime.tv_usec;
+    read_unlock_irqrestore(&xtime_lock, flags);
+
+    while ( usec >= 1000000 ) 
+    {
+        usec -= 1000000;
+        sec++;
+    }
+
+    tv->tv_sec = sec;
+    tv->tv_usec = usec;
+}
+
+void do_settimeofday(struct timeval *tv)
+{
+    write_lock_irq(&xtime_lock);
+    /*
+     * This is revolting. We need to set "xtime" correctly. However, the
+     * value in this location is the value at the most recent update of
+     * wall time.  Discover what correction gettimeofday() would have
+     * made, and then undo it!
+     */
+    tv->tv_usec -= do_gettimeoffset();
+    tv->tv_usec -= (jiffies - wall_jiffies) * (1000000 / HZ);
+
+    while ( tv->tv_usec < 0 )
+    {
+        tv->tv_usec += 1000000;
+        tv->tv_sec--;
+    }
+
+    xtime = *tv;
+    time_adjust = 0;           /* stop active adjtime() */
+    time_status |= STA_UNSYNC;
+    time_maxerror = NTP_PHASE_LIMIT;
+    time_esterror = NTP_PHASE_LIMIT;
+    write_unlock_irq(&xtime_lock);
+}
+
+
+/*
+ * timer_interrupt() needs to keep up the real-time clock,
+ * as well as call the "do_timer()" routine every clocktick
+ */
+static inline void do_timer_interrupt(
+    int irq, void *dev_id, struct pt_regs *regs)
+{
+    do_timer(regs);
+#if 0
+    if (!user_mode(regs))
+        x86_do_profile(regs->eip);
+#endif
+}
+
+
+/*
+ * This is the same as the above, except we _also_ save the current
+ * Time Stamp Counter value at the time of the timer interrupt, so that
+ * we later on can estimate the time of day more exactly.
+ */
+static void timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
+{
+    write_lock(&xtime_lock);
+    do_timer_interrupt(irq, NULL, regs);
+    write_unlock(&xtime_lock);
+}
+
+static struct irqaction irq_timer = {
+    timer_interrupt, 
+    SA_INTERRUPT, 
+    0, 
+    "timer", 
+    NULL, 
+    NULL
+};
+
+
+unsigned long get_cmos_time(void)
+{
+    unsigned long secs = HYPERVISOR_shared_info->rtc_time;
+    unsigned long diff;
+
+    rdtscl(diff);
+    diff -= (unsigned long)HYPERVISOR_shared_info->rtc_timestamp;
+
+    secs += ticks_to_us(diff);
+
+    return(secs + ticks_to_secs(diff));
+}
+
+
+/* Return 2^32 * (1 / (TSC clocks per usec)) for do_fast_gettimeoffset(). */
+static unsigned long __init calibrate_tsc(void)
+{
+    unsigned long quo, rem;
+
+    /* quotient == (1000 * 2^32) / ticks_per ms */
+    __asm__ __volatile__ (
+        "divl %2"
+        : "=a" (quo), "=d" (rem)
+        : "r" (HYPERVISOR_shared_info->ticks_per_ms), "0" (0), "1" (1000) );
+
+    return(quo);
+}
+
+void __init time_init(void)
+{
+    unsigned long long alarm;
+       
+    fast_gettimeoffset_quotient = calibrate_tsc();
+    do_get_fast_time = do_gettimeofday;
+
+    /* report CPU clock rate in Hz.
+     * The formula is (10^6 * 2^32) / (2^32 * 1 / (clocks/us)) =
+     * clock/second. Our precision is about 100 ppm.
+     */
+    {  
+        unsigned long eax=0, edx=1000;
+        __asm__ __volatile__
+            ("divl %2"
+             :"=a" (cpu_khz), "=d" (edx)
+             :"r" (fast_gettimeoffset_quotient),
+             "0" (eax), "1" (edx));
+        printk("Detected %lu.%03lu MHz processor.\n", 
+               cpu_khz / 1000, cpu_khz % 1000);
+    }
+
+    setup_irq(TIMER_IRQ, &irq_timer);
+
+    /*
+     * Start ticker. Note that timing runs of wall clock, not virtual
+     * 'domain' time. This means that clock sshould run at the correct
+     * rate. For things like scheduling, it's not clear whether it
+     * matters which sort of time we use.
+     */
+    rdtscll(alarm);
+    alarm += (1000/HZ)*HYPERVISOR_shared_info->ticks_per_ms;
+    HYPERVISOR_shared_info->wall_timeout   = alarm;
+    HYPERVISOR_shared_info->domain_timeout = ~0ULL;
+    clear_bit(_EVENT_TIMER, &HYPERVISOR_shared_info->events);
+
+    xtime.tv_sec = get_cmos_time();
+    xtime.tv_usec = 0;
+}
diff --git a/xenolinux-2.4.16-sparse/arch/xeno/kernel/traps.c b/xenolinux-2.4.16-sparse/arch/xeno/kernel/traps.c
new file mode 100644 (file)
index 0000000..6e8f347
--- /dev/null
@@ -0,0 +1,517 @@
+/*
+ *  linux/arch/i386/traps.c
+ *
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ *
+ *  Pentium III FXSR, SSE support
+ *     Gareth Hughes <gareth@valinux.com>, May 2000
+ */
+
+/*
+ * 'Traps.c' handles hardware traps and faults after we have saved some
+ * state in 'asm.s'.
+ */
+#include <linux/config.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/ptrace.h>
+#include <linux/timer.h>
+#include <linux/mm.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/spinlock.h>
+#include <linux/interrupt.h>
+#include <linux/highmem.h>
+
+#include <linux/kdb.h>
+
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <asm/io.h>
+#include <asm/atomic.h>
+#include <asm/debugreg.h>
+#include <asm/desc.h>
+#include <asm/i387.h>
+
+#include <asm/smp.h>
+#include <asm/pgalloc.h>
+
+#include <asm/hypervisor.h>
+
+#include <linux/irq.h>
+#include <linux/module.h>
+
+asmlinkage int system_call(void);
+
+asmlinkage void divide_error(void);
+asmlinkage void debug(void);
+asmlinkage void int3(void);
+asmlinkage void overflow(void);
+asmlinkage void bounds(void);
+asmlinkage void invalid_op(void);
+asmlinkage void device_not_available(void);
+asmlinkage void double_fault(void);
+asmlinkage void coprocessor_segment_overrun(void);
+asmlinkage void invalid_TSS(void);
+asmlinkage void segment_not_present(void);
+asmlinkage void stack_segment(void);
+asmlinkage void general_protection(void);
+asmlinkage void page_fault(void);
+asmlinkage void coprocessor_error(void);
+asmlinkage void simd_coprocessor_error(void);
+asmlinkage void alignment_check(void);
+asmlinkage void spurious_interrupt_bug(void);
+asmlinkage void machine_check(void);
+
+int kstack_depth_to_print = 24;
+
+
+/*
+ * If the address is either in the .text section of the
+ * kernel, or in the vmalloc'ed module regions, it *may* 
+ * be the address of a calling routine
+ */
+
+#ifdef CONFIG_MODULES
+
+extern struct module *module_list;
+extern struct module kernel_module;
+
+static inline int kernel_text_address(unsigned long addr)
+{
+       int retval = 0;
+       struct module *mod;
+
+       if (addr >= (unsigned long) &_stext &&
+           addr <= (unsigned long) &_etext)
+               return 1;
+
+       for (mod = module_list; mod != &kernel_module; mod = mod->next) {
+               /* mod_bound tests for addr being inside the vmalloc'ed
+                * module area. Of course it'd be better to test only
+                * for the .text subset... */
+               if (mod_bound(addr, 0, mod)) {
+                       retval = 1;
+                       break;
+               }
+       }
+
+       return retval;
+}
+
+#else
+
+static inline int kernel_text_address(unsigned long addr)
+{
+       return (addr >= (unsigned long) &_stext &&
+               addr <= (unsigned long) &_etext);
+}
+
+#endif
+
+void show_trace(unsigned long * stack)
+{
+       int i;
+       unsigned long addr;
+
+       if (!stack)
+               stack = (unsigned long*)&stack;
+
+       printk("Call Trace: ");
+       i = 1;
+       while (((long) stack & (THREAD_SIZE-1)) != 0) {
+               addr = *stack++;
+               if (kernel_text_address(addr)) {
+                       if (i && ((i % 6) == 0))
+                               printk("\n   ");
+                       printk("[<%08lx>] ", addr);
+                       i++;
+               }
+       }
+       printk("\n");
+}
+
+void show_trace_task(struct task_struct *tsk)
+{
+       unsigned long esp = tsk->thread.esp;
+
+       /* User space on another CPU? */
+       if ((esp ^ (unsigned long)tsk) & (PAGE_MASK<<1))
+               return;
+       show_trace((unsigned long *)esp);
+}
+
+void show_stack(unsigned long * esp)
+{
+       unsigned long *stack;
+       int i;
+
+       // debugging aid: "show_stack(NULL);" prints the
+       // back trace for this cpu.
+
+       if(esp==NULL)
+               esp=(unsigned long*)&esp;
+
+       stack = esp;
+       for(i=0; i < kstack_depth_to_print; i++) {
+               if (((long) stack & (THREAD_SIZE-1)) == 0)
+                       break;
+               if (i && ((i % 8) == 0))
+                       printk("\n       ");
+               printk("%08lx ", *stack++);
+       }
+       printk("\n");
+       show_trace(esp);
+}
+
+void show_registers(struct pt_regs *regs)
+{
+       int i;
+       int in_kernel = 1;
+       unsigned long esp;
+       unsigned short ss;
+
+       esp = (unsigned long) (&regs->esp);
+       ss = __KERNEL_DS;
+       if (regs->xcs & 2) {
+               in_kernel = 0;
+               esp = regs->esp;
+               ss = regs->xss & 0xffff;
+       }
+       printk("CPU:    %d\nEIP:    %04x:[<%08lx>]    %s\nEFLAGS: %08lx\n",
+               smp_processor_id(), 0xffff & regs->xcs, regs->eip, print_tainted(), regs->eflags);
+       printk("eax: %08lx   ebx: %08lx   ecx: %08lx   edx: %08lx\n",
+               regs->eax, regs->ebx, regs->ecx, regs->edx);
+       printk("esi: %08lx   edi: %08lx   ebp: %08lx   esp: %08lx\n",
+               regs->esi, regs->edi, regs->ebp, esp);
+       printk("ds: %04x   es: %04x   ss: %04x\n",
+               regs->xds & 0xffff, regs->xes & 0xffff, ss);
+       printk("Process %s (pid: %d, stackpage=%08lx)",
+               current->comm, current->pid, 4096+(unsigned long)current);
+       /*
+        * When in-kernel, we also print out the stack and code at the
+        * time of the fault..
+        */
+       if (in_kernel) {
+
+               printk("\nStack: ");
+               show_stack((unsigned long*)esp);
+
+#if 0
+               printk("\nCode: ");
+               if(regs->eip < PAGE_OFFSET)
+                       goto bad;
+
+               for(i=0;i<20;i++)
+               {
+                       unsigned char c;
+                       if(__get_user(c, &((unsigned char*)regs->eip)[i])) {
+bad:
+                               printk(" Bad EIP value.");
+                               break;
+                       }
+                       printk("%02x ", c);
+               }
+#endif
+       }
+       printk("\n");
+}      
+
+spinlock_t die_lock = SPIN_LOCK_UNLOCKED;
+
+void die(const char * str, struct pt_regs * regs, long err)
+{
+       console_verbose();
+       spin_lock_irq(&die_lock);
+       bust_spinlocks(1);
+       printk("%s: %04lx\n", str, err & 0xffff);
+       show_registers(regs);
+       bust_spinlocks(0);
+       spin_unlock_irq(&die_lock);
+       do_exit(SIGSEGV);
+}
+
+static inline void die_if_kernel(const char * str, struct pt_regs * regs, long err)
+{
+       if (!(2 & regs->xcs))
+               die(str, regs, err);
+}
+
+
+static void inline do_trap(int trapnr, int signr, char *str,
+                          struct pt_regs * regs, long error_code,
+                           siginfo_t *info)
+{
+       if (!(regs->xcs & 2))
+               goto kernel_trap;
+
+       /*trap_signal:*/ {
+               struct task_struct *tsk = current;
+               tsk->thread.error_code = error_code;
+               tsk->thread.trap_no = trapnr;
+               if (info)
+                       force_sig_info(signr, info, tsk);
+               else
+                       force_sig(signr, tsk);
+               return;
+       }
+
+       kernel_trap: {
+               unsigned long fixup = search_exception_table(regs->eip);
+               if (fixup)
+                       regs->eip = fixup;
+               else    
+                       die(str, regs, error_code);
+               return;
+       }
+}
+
+#define DO_ERROR(trapnr, signr, str, name) \
+asmlinkage void do_##name(struct pt_regs * regs, long error_code) \
+{ \
+       do_trap(trapnr, signr, str, regs, error_code, NULL); \
+}
+
+#define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \
+asmlinkage void do_##name(struct pt_regs * regs, long error_code) \
+{ \
+       siginfo_t info; \
+       info.si_signo = signr; \
+       info.si_errno = 0; \
+       info.si_code = sicode; \
+       info.si_addr = (void *)siaddr; \
+       do_trap(trapnr, signr, str, regs, error_code, &info); \
+}
+
+DO_ERROR_INFO( 0, SIGFPE,  "divide error", divide_error, FPE_INTDIV, regs->eip)
+DO_ERROR( 3, SIGTRAP, "int3", int3)
+DO_ERROR( 4, SIGSEGV, "overflow", overflow)
+DO_ERROR( 5, SIGSEGV, "bounds", bounds)
+DO_ERROR_INFO( 6, SIGILL,  "invalid operand", invalid_op, ILL_ILLOPN, regs->eip)
+DO_ERROR( 7, SIGSEGV, "device not available", device_not_available)
+DO_ERROR( 8, SIGSEGV, "double fault", double_fault)
+DO_ERROR( 9, SIGFPE,  "coprocessor segment overrun", coprocessor_segment_overrun)
+DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS)
+DO_ERROR(11, SIGBUS,  "segment not present", segment_not_present)
+DO_ERROR(12, SIGBUS,  "stack segment", stack_segment)
+DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0)
+DO_ERROR(18, SIGBUS, "machine check", machine_check)
+
+asmlinkage void do_general_protection(struct pt_regs * regs, long error_code)
+{
+       if (!(regs->xcs & 2))
+               goto gp_in_kernel;
+
+       current->thread.error_code = error_code;
+       current->thread.trap_no = 13;
+       force_sig(SIGSEGV, current);
+       return;
+
+gp_in_kernel:
+       {
+               unsigned long fixup;
+               fixup = search_exception_table(regs->eip);
+               if (fixup) {
+                       regs->eip = fixup;
+                       return;
+               }
+               die("general protection fault", regs, error_code);
+       }
+}
+
+
+asmlinkage void do_debug(struct pt_regs * regs, long error_code)
+{
+    /*
+     * We don't mess with breakpoints, so the only way this exception
+     * type can occur is through single-step mode.
+     */
+    regs->eflags &= ~TF_MASK;
+}
+
+
+/*
+ * Note that we play around with the 'TS' bit in an attempt to get
+ * the correct behaviour even in the presence of the asynchronous
+ * IRQ13 behaviour
+ */
+void math_error(void *eip)
+{
+       struct task_struct * task;
+       siginfo_t info;
+       unsigned short cwd, swd;
+
+       /*
+        * Save the info for the exception handler and clear the error.
+        */
+       task = current;
+       save_init_fpu(task);
+       task->thread.trap_no = 16;
+       task->thread.error_code = 0;
+       info.si_signo = SIGFPE;
+       info.si_errno = 0;
+       info.si_code = __SI_FAULT;
+       info.si_addr = eip;
+       /*
+        * (~cwd & swd) will mask out exceptions that are not set to unmasked
+        * status.  0x3f is the exception bits in these regs, 0x200 is the
+        * C1 reg you need in case of a stack fault, 0x040 is the stack
+        * fault bit.  We should only be taking one exception at a time,
+        * so if this combination doesn't produce any single exception,
+        * then we have a bad program that isn't syncronizing its FPU usage
+        * and it will suffer the consequences since we won't be able to
+        * fully reproduce the context of the exception
+        */
+       cwd = get_fpu_cwd(task);
+       swd = get_fpu_swd(task);
+       switch (((~cwd) & swd & 0x3f) | (swd & 0x240)) {
+               case 0x000:
+               default:
+                       break;
+               case 0x001: /* Invalid Op */
+               case 0x040: /* Stack Fault */
+               case 0x240: /* Stack Fault | Direction */
+                       info.si_code = FPE_FLTINV;
+                       break;
+               case 0x002: /* Denormalize */
+               case 0x010: /* Underflow */
+                       info.si_code = FPE_FLTUND;
+                       break;
+               case 0x004: /* Zero Divide */
+                       info.si_code = FPE_FLTDIV;
+                       break;
+               case 0x008: /* Overflow */
+                       info.si_code = FPE_FLTOVF;
+                       break;
+               case 0x020: /* Precision */
+                       info.si_code = FPE_FLTRES;
+                       break;
+       }
+       force_sig_info(SIGFPE, &info, task);
+}
+
+asmlinkage void do_coprocessor_error(struct pt_regs * regs, long error_code)
+{
+       ignore_irq13 = 1;
+       math_error((void *)regs->eip);
+}
+
+void simd_math_error(void *eip)
+{
+       struct task_struct * task;
+       siginfo_t info;
+       unsigned short mxcsr;
+
+       /*
+        * Save the info for the exception handler and clear the error.
+        */
+       task = current;
+       save_init_fpu(task);
+       task->thread.trap_no = 19;
+       task->thread.error_code = 0;
+       info.si_signo = SIGFPE;
+       info.si_errno = 0;
+       info.si_code = __SI_FAULT;
+       info.si_addr = eip;
+       /*
+        * The SIMD FPU exceptions are handled a little differently, as there
+        * is only a single status/control register.  Thus, to determine which
+        * unmasked exception was caught we must mask the exception mask bits
+        * at 0x1f80, and then use these to mask the exception bits at 0x3f.
+        */
+       mxcsr = get_fpu_mxcsr(task);
+       switch (~((mxcsr & 0x1f80) >> 7) & (mxcsr & 0x3f)) {
+               case 0x000:
+               default:
+                       break;
+               case 0x001: /* Invalid Op */
+                       info.si_code = FPE_FLTINV;
+                       break;
+               case 0x002: /* Denormalize */
+               case 0x010: /* Underflow */
+                       info.si_code = FPE_FLTUND;
+                       break;
+               case 0x004: /* Zero Divide */
+                       info.si_code = FPE_FLTDIV;
+                       break;
+               case 0x008: /* Overflow */
+                       info.si_code = FPE_FLTOVF;
+                       break;
+               case 0x020: /* Precision */
+                       info.si_code = FPE_FLTRES;
+                       break;
+       }
+       force_sig_info(SIGFPE, &info, task);
+}
+
+asmlinkage void do_simd_coprocessor_error(struct pt_regs * regs,
+                                         long error_code)
+{
+       if (cpu_has_xmm) {
+               /* Handle SIMD FPU exceptions on PIII+ processors. */
+               ignore_irq13 = 1;
+               simd_math_error((void *)regs->eip);
+       } else {
+               die_if_kernel("cache flush denied", regs, error_code);
+               current->thread.trap_no = 19;
+               current->thread.error_code = error_code;
+               force_sig(SIGSEGV, current);
+       }
+}
+
+asmlinkage void do_spurious_interrupt_bug(struct pt_regs * regs,
+                                         long error_code)
+{
+}
+
+/*
+ *  'math_state_restore()' saves the current math information in the
+ * old math state array, and gets the new ones from the current task
+ *
+ * Careful.. There are problems with IBM-designed IRQ13 behaviour.
+ * Don't touch unless you *really* know how it works.
+ */
+asmlinkage void math_state_restore(struct pt_regs regs)
+{
+       if (current->used_math) {
+               restore_fpu(current);
+       } else {
+               init_fpu();
+       }
+       current->flags |= PF_USEDFPU;   /* So we fnsave on switch_to() */
+}
+
+static trap_info_t trap_table[] = {
+    {  0, 0, __KERNEL_CS, (unsigned long)divide_error                },
+    {  1, 0, __KERNEL_CS, (unsigned long)debug                       },
+    {  3, 3, __KERNEL_CS, (unsigned long)int3                        },
+    {  4, 3, __KERNEL_CS, (unsigned long)overflow                    },
+    {  5, 3, __KERNEL_CS, (unsigned long)bounds                      },
+    {  6, 0, __KERNEL_CS, (unsigned long)invalid_op                  },
+    {  7, 0, __KERNEL_CS, (unsigned long)device_not_available        },
+    {  8, 0, __KERNEL_CS, (unsigned long)double_fault                },
+    {  9, 0, __KERNEL_CS, (unsigned long)coprocessor_segment_overrun },
+    { 10, 0, __KERNEL_CS, (unsigned long)invalid_TSS                 },
+    { 11, 0, __KERNEL_CS, (unsigned long)segment_not_present         },
+    { 12, 0, __KERNEL_CS, (unsigned long)stack_segment               },
+    { 13, 0, __KERNEL_CS, (unsigned long)general_protection          },
+    { 14, 0, __KERNEL_CS, (unsigned long)page_fault                  },
+    { 15, 0, __KERNEL_CS, (unsigned long)spurious_interrupt_bug      },
+    { 16, 0, __KERNEL_CS, (unsigned long)coprocessor_error           },
+    { 17, 0, __KERNEL_CS, (unsigned long)alignment_check             },
+    { 18, 0, __KERNEL_CS, (unsigned long)machine_check               },
+    { 19, 0, __KERNEL_CS, (unsigned long)simd_coprocessor_error      },
+    { SYSCALL_VECTOR, 
+          3, __KERNEL_CS, (unsigned long)system_call                 },
+    {  0, 0,           0, 0                           }
+};
+    
+
+
+void __init trap_init(void)
+{
+    HYPERVISOR_set_trap_table(trap_table);    
+    cpu_init();
+}
diff --git a/xenolinux-2.4.16-sparse/arch/xeno/lib/Makefile b/xenolinux-2.4.16-sparse/arch/xeno/lib/Makefile
new file mode 100644 (file)
index 0000000..2224f03
--- /dev/null
@@ -0,0 +1,15 @@
+
+.S.o:
+       $(CC) $(AFLAGS) -c $< -o $*.o
+
+L_TARGET = lib.a
+
+obj-y = checksum.o old-checksum.o delay.o \
+       usercopy.o getuser.o \
+       memcpy.o strstr.o
+
+obj-$(CONFIG_X86_USE_3DNOW) += mmx.o
+obj-$(CONFIG_HAVE_DEC_LOCK) += dec_and_lock.o
+obj-$(CONFIG_DEBUG_IOVIRT)  += iodebug.o
+
+include $(TOPDIR)/Rules.make
diff --git a/xenolinux-2.4.16-sparse/arch/xeno/lib/checksum.S b/xenolinux-2.4.16-sparse/arch/xeno/lib/checksum.S
new file mode 100644 (file)
index 0000000..bfc6389
--- /dev/null
@@ -0,0 +1,460 @@
+/*
+ * INET                An implementation of the TCP/IP protocol suite for the LINUX
+ *             operating system.  INET is implemented using the  BSD Socket
+ *             interface as the means of communication with the user level.
+ *
+ *             IP/TCP/UDP checksumming routines
+ *
+ * Authors:    Jorge Cwik, <jorge@laser.satlink.net>
+ *             Arnt Gulbrandsen, <agulbra@nvg.unit.no>
+ *             Tom May, <ftom@netcom.com>
+ *              Pentium Pro/II routines:
+ *              Alexander Kjeldaas <astor@guardian.no>
+ *              Finn Arne Gangstad <finnag@guardian.no>
+ *             Lots of code moved from tcp.c and ip.c; see those files
+ *             for more names.
+ *
+ * Changes:     Ingo Molnar, converted csum_partial_copy() to 2.1 exception
+ *                          handling.
+ *             Andi Kleen,  add zeroing on error
+ *                   converted to pure assembler
+ *
+ *             This program is free software; you can redistribute it and/or
+ *             modify it under the terms of the GNU General Public License
+ *             as published by the Free Software Foundation; either version
+ *             2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/config.h>
+#include <asm/errno.h>
+                               
+/*
+ * computes a partial checksum, e.g. for TCP/UDP fragments
+ */
+
+/*     
+unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum)
+ */
+               
+.text
+.align 4
+.globl csum_partial                                                            
+               
+#ifndef CONFIG_X86_USE_PPRO_CHECKSUM
+
+         /*            
+          * Experiments with Ethernet and SLIP connections show that buff
+          * is aligned on either a 2-byte or 4-byte boundary.  We get at
+          * least a twofold speedup on 486 and Pentium if it is 4-byte aligned.
+          * Fortunately, it is easy to convert 2-byte alignment to 4-byte
+          * alignment for the unrolled loop.
+          */           
+csum_partial:  
+       pushl %esi
+       pushl %ebx
+       movl 20(%esp),%eax      # Function arg: unsigned int sum
+       movl 16(%esp),%ecx      # Function arg: int len
+       movl 12(%esp),%esi      # Function arg: unsigned char *buff
+       testl $2, %esi          # Check alignment.
+       jz 2f                   # Jump if alignment is ok.
+       subl $2, %ecx           # Alignment uses up two bytes.
+       jae 1f                  # Jump if we had at least two bytes.
+       addl $2, %ecx           # ecx was < 2.  Deal with it.
+       jmp 4f
+1:     movw (%esi), %bx
+       addl $2, %esi
+       addw %bx, %ax
+       adcl $0, %eax
+2:
+       movl %ecx, %edx
+       shrl $5, %ecx
+       jz 2f
+       testl %esi, %esi
+1:     movl (%esi), %ebx
+       adcl %ebx, %eax
+       movl 4(%esi), %ebx
+       adcl %ebx, %eax
+       movl 8(%esi), %ebx
+       adcl %ebx, %eax
+       movl 12(%esi), %ebx
+       adcl %ebx, %eax
+       movl 16(%esi), %ebx
+       adcl %ebx, %eax
+       movl 20(%esi), %ebx
+       adcl %ebx, %eax
+       movl 24(%esi), %ebx
+       adcl %ebx, %eax
+       movl 28(%esi), %ebx
+       adcl %ebx, %eax
+       lea 32(%esi), %esi
+       dec %ecx
+       jne 1b
+       adcl $0, %eax
+2:     movl %edx, %ecx
+       andl $0x1c, %edx
+       je 4f
+       shrl $2, %edx           # This clears CF
+3:     adcl (%esi), %eax
+       lea 4(%esi), %esi
+       dec %edx
+       jne 3b
+       adcl $0, %eax
+4:     andl $3, %ecx
+       jz 7f
+       cmpl $2, %ecx
+       jb 5f
+       movw (%esi),%cx
+       leal 2(%esi),%esi
+       je 6f
+       shll $16,%ecx
+5:     movb (%esi),%cl
+6:     addl %ecx,%eax
+       adcl $0, %eax 
+7:     
+       popl %ebx
+       popl %esi
+       ret
+
+#else
+
+/* Version for PentiumII/PPro */
+
+csum_partial:
+       pushl %esi
+       pushl %ebx
+       movl 20(%esp),%eax      # Function arg: unsigned int sum
+       movl 16(%esp),%ecx      # Function arg: int len
+       movl 12(%esp),%esi      # Function arg: const unsigned char *buf
+
+       testl $2, %esi         
+       jnz 30f                 
+10:
+       movl %ecx, %edx
+       movl %ecx, %ebx
+       andl $0x7c, %ebx
+       shrl $7, %ecx
+       addl %ebx,%esi
+       shrl $2, %ebx  
+       negl %ebx
+       lea 45f(%ebx,%ebx,2), %ebx
+       testl %esi, %esi
+       jmp *%ebx
+
+       # Handle 2-byte-aligned regions
+20:    addw (%esi), %ax
+       lea 2(%esi), %esi
+       adcl $0, %eax
+       jmp 10b
+
+30:    subl $2, %ecx          
+       ja 20b                 
+       je 32f
+       movzbl (%esi),%ebx      # csumming 1 byte, 2-aligned
+       addl %ebx, %eax
+       adcl $0, %eax
+       jmp 80f
+32:
+       addw (%esi), %ax        # csumming 2 bytes, 2-aligned
+       adcl $0, %eax
+       jmp 80f
+
+40: 
+       addl -128(%esi), %eax
+       adcl -124(%esi), %eax
+       adcl -120(%esi), %eax
+       adcl -116(%esi), %eax   
+       adcl -112(%esi), %eax   
+       adcl -108(%esi), %eax
+       adcl -104(%esi), %eax
+       adcl -100(%esi), %eax
+       adcl -96(%esi), %eax
+       adcl -92(%esi), %eax
+       adcl -88(%esi), %eax
+       adcl -84(%esi), %eax
+       adcl -80(%esi), %eax
+       adcl -76(%esi), %eax
+       adcl -72(%esi), %eax
+       adcl -68(%esi), %eax
+       adcl -64(%esi), %eax     
+       adcl -60(%esi), %eax     
+       adcl -56(%esi), %eax     
+       adcl -52(%esi), %eax   
+       adcl -48(%esi), %eax   
+       adcl -44(%esi), %eax
+       adcl -40(%esi), %eax
+       adcl -36(%esi), %eax
+       adcl -32(%esi), %eax
+       adcl -28(%esi), %eax
+       adcl -24(%esi), %eax
+       adcl -20(%esi), %eax
+       adcl -16(%esi), %eax
+       adcl -12(%esi), %eax
+       adcl -8(%esi), %eax
+       adcl -4(%esi), %eax
+45:
+       lea 128(%esi), %esi
+       adcl $0, %eax
+       dec %ecx
+       jge 40b
+       movl %edx, %ecx
+50:    andl $3, %ecx
+       jz 80f
+
+       # Handle the last 1-3 bytes without jumping
+       notl %ecx               # 1->2, 2->1, 3->0, higher bits are masked
+       movl $0xffffff,%ebx     # by the shll and shrl instructions
+       shll $3,%ecx
+       shrl %cl,%ebx
+       andl -128(%esi),%ebx    # esi is 4-aligned so should be ok
+       addl %ebx,%eax
+       adcl $0,%eax
+80: 
+       popl %ebx
+       popl %esi
+       ret
+                               
+#endif
+
+/*
+unsigned int csum_partial_copy_generic (const char *src, char *dst,
+                                 int len, int sum, int *src_err_ptr, int *dst_err_ptr)
+ */ 
+
+/*
+ * Copy from ds while checksumming, otherwise like csum_partial
+ *
+ * The macros SRC and DST specify the type of access for the instruction.
+ * thus we can call a custom exception handler for all access types.
+ *
+ * FIXME: could someone double-check whether I haven't mixed up some SRC and
+ *       DST definitions? It's damn hard to trigger all cases.  I hope I got
+ *       them all but there's no guarantee.
+ */
+
+#define SRC(y...)                      \
+       9999: y;                        \
+       .section __ex_table, "a";       \
+       .long 9999b, 6001f      ;       \
+       .previous
+
+#define DST(y...)                      \
+       9999: y;                        \
+       .section __ex_table, "a";       \
+       .long 9999b, 6002f      ;       \
+       .previous
+
+.align 4
+.globl csum_partial_copy_generic
+                               
+#ifndef CONFIG_X86_USE_PPRO_CHECKSUM
+
+#define ARGBASE 16             
+#define FP             12
+               
+csum_partial_copy_generic:
+       subl  $4,%esp   
+       pushl %edi
+       pushl %esi
+       pushl %ebx
+       movl ARGBASE+16(%esp),%eax      # sum
+       movl ARGBASE+12(%esp),%ecx      # len
+       movl ARGBASE+4(%esp),%esi       # src
+       movl ARGBASE+8(%esp),%edi       # dst
+
+       testl $2, %edi                  # Check alignment. 
+       jz 2f                           # Jump if alignment is ok.
+       subl $2, %ecx                   # Alignment uses up two bytes.
+       jae 1f                          # Jump if we had at least two bytes.
+       addl $2, %ecx                   # ecx was < 2.  Deal with it.
+       jmp 4f
+SRC(1: movw (%esi), %bx        )
+       addl $2, %esi
+DST(   movw %bx, (%edi)        )
+       addl $2, %edi
+       addw %bx, %ax   
+       adcl $0, %eax
+2:
+       movl %ecx, FP(%esp)
+       shrl $5, %ecx
+       jz 2f
+       testl %esi, %esi
+SRC(1: movl (%esi), %ebx       )
+SRC(   movl 4(%esi), %edx      )
+       adcl %ebx, %eax
+DST(   movl %ebx, (%edi)       )
+       adcl %edx, %eax
+DST(   movl %edx, 4(%edi)      )
+
+SRC(   movl 8(%esi), %ebx      )
+SRC(   movl 12(%esi), %edx     )
+       adcl %ebx, %eax
+DST(   movl %ebx, 8(%edi)      )
+       adcl %edx, %eax
+DST(   movl %edx, 12(%edi)     )
+
+SRC(   movl 16(%esi), %ebx     )
+SRC(   movl 20(%esi), %edx     )
+       adcl %ebx, %eax
+DST(   movl %ebx, 16(%edi)     )
+       adcl %edx, %eax
+DST(   movl %edx, 20(%edi)     )
+
+SRC(   movl 24(%esi), %ebx     )
+SRC(   movl 28(%esi), %edx     )
+       adcl %ebx, %eax
+DST(   movl %ebx, 24(%edi)     )
+       adcl %edx, %eax
+DST(   movl %edx, 28(%edi)     )
+
+       lea 32(%esi), %esi
+       lea 32(%edi), %edi
+       dec %ecx
+       jne 1b
+       adcl $0, %eax
+2:     movl FP(%esp), %edx
+       movl %edx, %ecx
+       andl $0x1c, %edx
+       je 4f
+       shrl $2, %edx                   # This clears CF
+SRC(3: movl (%esi), %ebx       )
+       adcl %ebx, %eax
+DST(   movl %ebx, (%edi)       )
+       lea 4(%esi), %esi
+       lea 4(%edi), %edi
+       dec %edx
+       jne 3b
+       adcl $0, %eax
+4:     andl $3, %ecx
+       jz 7f
+       cmpl $2, %ecx
+       jb 5f
+SRC(   movw (%esi), %cx        )
+       leal 2(%esi), %esi
+DST(   movw %cx, (%edi)        )
+       leal 2(%edi), %edi
+       je 6f
+       shll $16,%ecx
+SRC(5: movb (%esi), %cl        )
+DST(   movb %cl, (%edi)        )
+6:     addl %ecx, %eax
+       adcl $0, %eax
+7:
+5000:
+
+# Exception handler:
+.section .fixup, "ax"                                                  
+
+6001:
+       movl ARGBASE+20(%esp), %ebx     # src_err_ptr
+       movl $-EFAULT, (%ebx)
+
+       # zero the complete destination - computing the rest
+       # is too much work 
+       movl ARGBASE+8(%esp), %edi      # dst
+       movl ARGBASE+12(%esp), %ecx     # len
+       xorl %eax,%eax
+       rep ; stosb
+
+       jmp 5000b
+
+6002:
+       movl ARGBASE+24(%esp), %ebx     # dst_err_ptr
+       movl $-EFAULT,(%ebx)
+       jmp 5000b
+
+.previous
+
+       popl %ebx
+       popl %esi
+       popl %edi
+       popl %ecx                       # equivalent to addl $4,%esp
+       ret     
+
+#else
+
+/* Version for PentiumII/PPro */
+
+#define ROUND1(x) \
+       SRC(movl x(%esi), %ebx  )       ;       \
+       addl %ebx, %eax                 ;       \
+       DST(movl %ebx, x(%edi)  )       ; 
+
+#define ROUND(x) \
+       SRC(movl x(%esi), %ebx  )       ;       \
+       adcl %ebx, %eax                 ;       \
+       DST(movl %ebx, x(%edi)  )       ;
+
+#define ARGBASE 12
+               
+csum_partial_copy_generic:
+       pushl %ebx
+       pushl %edi
+       pushl %esi
+       movl ARGBASE+4(%esp),%esi       #src
+       movl ARGBASE+8(%esp),%edi       #dst    
+       movl ARGBASE+12(%esp),%ecx      #len
+       movl ARGBASE+16(%esp),%eax      #sum
+#      movl %ecx, %edx  
+       movl %ecx, %ebx  
+       movl %esi, %edx
+       shrl $6, %ecx     
+       andl $0x3c, %ebx  
+       negl %ebx
+       subl %ebx, %esi  
+       subl %ebx, %edi  
+       lea  -1(%esi),%edx
+       andl $-32,%edx
+       lea 3f(%ebx,%ebx), %ebx
+       testl %esi, %esi 
+       jmp *%ebx
+1:     addl $64,%esi
+       addl $64,%edi 
+       SRC(movb -32(%edx),%bl) ; SRC(movb (%edx),%bl)
+       ROUND1(-64) ROUND(-60) ROUND(-56) ROUND(-52)    
+       ROUND (-48) ROUND(-44) ROUND(-40) ROUND(-36)    
+       ROUND (-32) ROUND(-28) ROUND(-24) ROUND(-20)    
+       ROUND (-16) ROUND(-12) ROUND(-8)  ROUND(-4)     
+3:     adcl $0,%eax
+       addl $64, %edx
+       dec %ecx
+       jge 1b
+4:     movl ARGBASE+12(%esp),%edx      #len
+       andl $3, %edx
+       jz 7f
+       cmpl $2, %edx
+       jb 5f
+SRC(   movw (%esi), %dx         )
+       leal 2(%esi), %esi
+DST(   movw %dx, (%edi)         )
+       leal 2(%edi), %edi
+       je 6f
+       shll $16,%edx
+5:
+SRC(   movb (%esi), %dl         )
+DST(   movb %dl, (%edi)         )
+6:     addl %edx, %eax
+       adcl $0, %eax
+7:
+.section .fixup, "ax"
+6001:  movl    ARGBASE+20(%esp), %ebx  # src_err_ptr   
+       movl $-EFAULT, (%ebx)
+       # zero the complete destination (computing the rest is too much work)
+       movl ARGBASE+8(%esp),%edi       # dst
+       movl ARGBASE+12(%esp),%ecx      # len
+       xorl %eax,%eax
+       rep; stosb
+       jmp 7b
+6002:  movl ARGBASE+24(%esp), %ebx     # dst_err_ptr
+       movl $-EFAULT, (%ebx)
+       jmp  7b                 
+.previous                              
+
+       popl %esi
+       popl %edi
+       popl %ebx
+       ret
+                               
+#undef ROUND
+#undef ROUND1          
+               
+#endif
diff --git a/xenolinux-2.4.16-sparse/arch/xeno/lib/dec_and_lock.c b/xenolinux-2.4.16-sparse/arch/xeno/lib/dec_and_lock.c
new file mode 100644 (file)
index 0000000..ffd4869
--- /dev/null
@@ -0,0 +1,40 @@
+/*
+ * x86 version of "atomic_dec_and_lock()" using
+ * the atomic "cmpxchg" instruction.
+ *
+ * (For CPU's lacking cmpxchg, we use the slow
+ * generic version, and this one never even gets
+ * compiled).
+ */
+
+#include <linux/spinlock.h>
+#include <asm/atomic.h>
+
+int atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock)
+{
+       int counter;
+       int newcount;
+
+repeat:
+       counter = atomic_read(atomic);
+       newcount = counter-1;
+
+       if (!newcount)
+               goto slow_path;
+
+       asm volatile("lock; cmpxchgl %1,%2"
+               :"=a" (newcount)
+               :"r" (newcount), "m" (atomic->counter), "0" (counter));
+
+       /* If the above failed, "eax" will have changed */
+       if (newcount != counter)
+               goto repeat;
+       return 0;
+
+slow_path:
+       spin_lock(lock);
+       if (atomic_dec_and_test(atomic))
+               return 1;
+       spin_unlock(lock);
+       return 0;
+}
diff --git a/xenolinux-2.4.16-sparse/arch/xeno/lib/delay.c b/xenolinux-2.4.16-sparse/arch/xeno/lib/delay.c
new file mode 100644 (file)
index 0000000..c918026
--- /dev/null
@@ -0,0 +1,47 @@
+/*
+ *     Precise Delay Loops for i386
+ *
+ *     Copyright (C) 1993 Linus Torvalds
+ *     Copyright (C) 1997 Martin Mares <mj@atrey.karlin.mff.cuni.cz>
+ *
+ *     The __delay function must _NOT_ be inlined as its execution time
+ *     depends wildly on alignment on many x86 processors. The additional
+ *     jump magic is needed to get the timing stable on all the CPU's
+ *     we have to worry about.
+ */
+
+#include <linux/config.h>
+#include <linux/sched.h>
+#include <linux/delay.h>
+#include <asm/processor.h>
+#include <asm/delay.h>
+
+#ifdef CONFIG_SMP
+#include <asm/smp.h>
+#endif
+       
+void __delay(unsigned long loops)
+{
+       unsigned long bclock, now;
+       
+       rdtscl(bclock);
+       do
+       {
+               rep_nop();
+               rdtscl(now);
+       } while ((now-bclock) < loops);
+}
+
+inline void __const_udelay(unsigned long xloops)
+{
+       int d0;
+       __asm__("mull %0"
+               :"=d" (xloops), "=&a" (d0)
+               :"1" (xloops),"0" (current_cpu_data.loops_per_jiffy));
+        __delay(xloops * HZ);
+}
+
+void __udelay(unsigned long usecs)
+{
+       __const_udelay(usecs * 0x000010c6);  /* 2**32 / 1000000 */
+}
diff --git a/xenolinux-2.4.16-sparse/arch/xeno/lib/getuser.S b/xenolinux-2.4.16-sparse/arch/xeno/lib/getuser.S
new file mode 100644 (file)
index 0000000..c244721
--- /dev/null
@@ -0,0 +1,73 @@
+/*
+ * __get_user functions.
+ *
+ * (C) Copyright 1998 Linus Torvalds
+ *
+ * These functions have a non-standard call interface
+ * to make them more efficient, especially as they
+ * return an error value in addition to the "real"
+ * return value.
+ */
+
+/*
+ * __get_user_X
+ *
+ * Inputs:     %eax contains the address
+ *
+ * Outputs:    %eax is error code (0 or -EFAULT)
+ *             %edx contains zero-extended value
+ *
+ * These functions should not modify any other registers,
+ * as they get called from within inline assembly.
+ */
+
+addr_limit = 12
+
+.text
+.align 4
+.globl __get_user_1
+__get_user_1:
+       movl %esp,%edx
+       andl $0xffffe000,%edx
+       cmpl addr_limit(%edx),%eax
+       jae bad_get_user
+1:     movzbl (%eax),%edx
+       xorl %eax,%eax
+       ret
+
+.align 4
+.globl __get_user_2
+__get_user_2:
+       addl $1,%eax
+       movl %esp,%edx
+       jc bad_get_user
+       andl $0xffffe000,%edx
+       cmpl addr_limit(%edx),%eax
+       jae bad_get_user
+2:     movzwl -1(%eax),%edx
+       xorl %eax,%eax
+       ret
+
+.align 4
+.globl __get_user_4
+__get_user_4:
+       addl $3,%eax
+       movl %esp,%edx
+       jc bad_get_user
+       andl $0xffffe000,%edx
+       cmpl addr_limit(%edx),%eax
+       jae bad_get_user
+3:     movl -3(%eax),%edx
+       xorl %eax,%eax
+       ret
+
+bad_get_user:
+       xorl %edx,%edx
+       movl $-14,%eax
+       ret
+
+.section __ex_table,"a"
+       .long 1b,bad_get_user
+       .long 2b,bad_get_user
+       .long 3b,bad_get_user
+.previous
diff --git a/xenolinux-2.4.16-sparse/arch/xeno/lib/iodebug.c b/xenolinux-2.4.16-sparse/arch/xeno/lib/iodebug.c
new file mode 100644 (file)
index 0000000..701a07f
--- /dev/null
@@ -0,0 +1,19 @@
+#include <asm/io.h>
+
+void * __io_virt_debug(unsigned long x, const char *file, int line)
+{
+       if (x < PAGE_OFFSET) {
+               printk("io mapaddr 0x%05lx not valid at %s:%d!\n", x, file, line);
+               return __va(x);
+       }
+       return (void *)x;
+}
+
+unsigned long __io_phys_debug(unsigned long x, const char *file, int line)
+{
+       if (x < PAGE_OFFSET) {
+               printk("io mapaddr 0x%05lx not valid at %s:%d!\n", x, file, line);
+               return x;
+       }
+       return __pa(x);
+}
diff --git a/xenolinux-2.4.16-sparse/arch/xeno/lib/memcpy.c b/xenolinux-2.4.16-sparse/arch/xeno/lib/memcpy.c
new file mode 100644 (file)
index 0000000..4cb37b6
--- /dev/null
@@ -0,0 +1,19 @@
+#include <linux/config.h>
+#include <linux/string.h>
+
+#undef memcpy
+#undef memset
+
+void * memcpy(void * to, const void * from, size_t n)
+{
+#ifdef CONFIG_X86_USE_3DNOW
+       return __memcpy3d(to, from, n);
+#else
+       return __memcpy(to, from, n);
+#endif
+}
+
+void * memset(void * s, int c, size_t count)
+{
+       return __memset(s, c, count);
+}
diff --git a/xenolinux-2.4.16-sparse/arch/xeno/lib/mmx.c b/xenolinux-2.4.16-sparse/arch/xeno/lib/mmx.c
new file mode 100644 (file)
index 0000000..0433cfa
--- /dev/null
@@ -0,0 +1,375 @@
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/sched.h>
+
+#include <asm/i387.h>
+#include <asm/hardirq.h> 
+
+
+/*
+ *     MMX 3DNow! library helper functions
+ *
+ *     To do:
+ *     We can use MMX just for prefetch in IRQ's. This may be a win. 
+ *             (reported so on K6-III)
+ *     We should use a better code neutral filler for the short jump
+ *             leal ebx. [ebx] is apparently best for K6-2, but Cyrix ??
+ *     We also want to clobber the filler register so we dont get any
+ *             register forwarding stalls on the filler. 
+ *
+ *     Add *user handling. Checksums are not a win with MMX on any CPU
+ *     tested so far for any MMX solution figured.
+ *
+ *     22/09/2000 - Arjan van de Ven 
+ *             Improved for non-egineering-sample Athlons 
+ *
+ */
+void *_mmx_memcpy(void *to, const void *from, size_t len)
+{
+       void *p;
+       int i;
+
+       if (in_interrupt())
+               return __memcpy(to, from, len);
+
+       p = to;
+       i = len >> 6; /* len/64 */
+
+       kernel_fpu_begin();
+
+       __asm__ __volatile__ (
+               "1: prefetch (%0)\n"            /* This set is 28 bytes */
+               "   prefetch 64(%0)\n"
+               "   prefetch 128(%0)\n"
+               "   prefetch 192(%0)\n"
+               "   prefetch 256(%0)\n"
+               "2:  \n"
+               ".section .fixup, \"ax\"\n"
+               "3: movw $0x1AEB, 1b\n" /* jmp on 26 bytes */
+               "   jmp 2b\n"
+               ".previous\n"
+               ".section __ex_table,\"a\"\n"
+               "       .align 4\n"
+               "       .long 1b, 3b\n"
+               ".previous"
+               : : "r" (from) );
+               
+       
+       for(; i>0; i--)
+       {
+               __asm__ __volatile__ (
+               "1:  prefetch 320(%0)\n"
+               "2:  movq (%0), %%mm0\n"
+               "  movq 8(%0), %%mm1\n"
+               "  movq 16(%0), %%mm2\n"
+               "  movq 24(%0), %%mm3\n"
+               "  movq %%mm0, (%1)\n"
+               "  movq %%mm1, 8(%1)\n"
+               "  movq %%mm2, 16(%1)\n"
+               "  movq %%mm3, 24(%1)\n"
+               "  movq 32(%0), %%mm0\n"
+               "  movq 40(%0), %%mm1\n"
+               "  movq 48(%0), %%mm2\n"
+               "  movq 56(%0), %%mm3\n"
+               "  movq %%mm0, 32(%1)\n"
+               "  movq %%mm1, 40(%1)\n"
+               "  movq %%mm2, 48(%1)\n"
+               "  movq %%mm3, 56(%1)\n"
+               ".section .fixup, \"ax\"\n"
+               "3: movw $0x05EB, 1b\n" /* jmp on 5 bytes */
+               "   jmp 2b\n"
+               ".previous\n"
+               ".section __ex_table,\"a\"\n"
+               "       .align 4\n"
+               "       .long 1b, 3b\n"
+               ".previous"
+               : : "r" (from), "r" (to) : "memory");
+               from+=64;
+               to+=64;
+       }
+       /*
+        *      Now do the tail of the block
+        */
+       __memcpy(to, from, len&63);
+       kernel_fpu_end();
+       return p;
+}
+
+#ifdef CONFIG_MK7
+
+/*
+ *     The K7 has streaming cache bypass load/store. The Cyrix III, K6 and
+ *     other MMX using processors do not.
+ */
+
+static void fast_clear_page(void *page)
+{
+       int i;
+
+       kernel_fpu_begin();
+       
+       __asm__ __volatile__ (
+               "  pxor %%mm0, %%mm0\n" : :
+       );
+
+       for(i=0;i<4096/64;i++)
+       {
+               __asm__ __volatile__ (
+               "  movntq %%mm0, (%0)\n"
+               "  movntq %%mm0, 8(%0)\n"
+               "  movntq %%mm0, 16(%0)\n"
+               "  movntq %%mm0, 24(%0)\n"
+               "  movntq %%mm0, 32(%0)\n"
+               "  movntq %%mm0, 40(%0)\n"
+               "  movntq %%mm0, 48(%0)\n"
+               "  movntq %%mm0, 56(%0)\n"
+               : : "r" (page) : "memory");
+               page+=64;
+       }
+       /* since movntq is weakly-ordered, a "sfence" is needed to become
+        * ordered again.
+        */
+       __asm__ __volatile__ (
+               "  sfence \n" : :
+       );
+       kernel_fpu_end();
+}
+
+static void fast_copy_page(void *to, void *from)
+{
+       int i;
+
+       kernel_fpu_begin();
+
+       /* maybe the prefetch stuff can go before the expensive fnsave...
+        * but that is for later. -AV
+        */
+       __asm__ __volatile__ (
+               "1: prefetch (%0)\n"
+               "   prefetch 64(%0)\n"
+               "   prefetch 128(%0)\n"
+               "   prefetch 192(%0)\n"
+               "   prefetch 256(%0)\n"
+               "2:  \n"
+               ".section .fixup, \"ax\"\n"
+               "3: movw $0x1AEB, 1b\n" /* jmp on 26 bytes */
+               "   jmp 2b\n"
+               ".previous\n"
+               ".section __ex_table,\"a\"\n"
+               "       .align 4\n"
+               "       .long 1b, 3b\n"
+               ".previous"
+               : : "r" (from) );
+
+       for(i=0; i<(4096-320)/64; i++)
+       {
+               __asm__ __volatile__ (
+               "1: prefetch 320(%0)\n"
+               "2: movq (%0), %%mm0\n"
+               "   movntq %%mm0, (%1)\n"
+               "   movq 8(%0), %%mm1\n"
+               "   movntq %%mm1, 8(%1)\n"
+               "   movq 16(%0), %%mm2\n"
+               "   movntq %%mm2, 16(%1)\n"
+               "   movq 24(%0), %%mm3\n"
+               "   movntq %%mm3, 24(%1)\n"
+               "   movq 32(%0), %%mm4\n"
+               "   movntq %%mm4, 32(%1)\n"
+               "   movq 40(%0), %%mm5\n"
+               "   movntq %%mm5, 40(%1)\n"
+               "   movq 48(%0), %%mm6\n"
+               "   movntq %%mm6, 48(%1)\n"
+               "   movq 56(%0), %%mm7\n"
+               "   movntq %%mm7, 56(%1)\n"
+               ".section .fixup, \"ax\"\n"
+               "3: movw $0x05EB, 1b\n" /* jmp on 5 bytes */
+               "   jmp 2b\n"
+               ".previous\n"
+               ".section __ex_table,\"a\"\n"
+               "       .align 4\n"
+               "       .long 1b, 3b\n"
+               ".previous"
+               : : "r" (from), "r" (to) : "memory");
+               from+=64;
+               to+=64;
+       }
+       for(i=(4096-320)/64; i<4096/64; i++)
+       {
+               __asm__ __volatile__ (
+               "2: movq (%0), %%mm0\n"
+               "   movntq %%mm0, (%1)\n"
+               "   movq 8(%0), %%mm1\n"
+               "   movntq %%mm1, 8(%1)\n"
+               "   movq 16(%0), %%mm2\n"
+               "   movntq %%mm2, 16(%1)\n"
+               "   movq 24(%0), %%mm3\n"
+               "   movntq %%mm3, 24(%1)\n"
+               "   movq 32(%0), %%mm4\n"
+               "   movntq %%mm4, 32(%1)\n"
+               "   movq 40(%0), %%mm5\n"
+               "   movntq %%mm5, 40(%1)\n"
+               "   movq 48(%0), %%mm6\n"
+               "   movntq %%mm6, 48(%1)\n"
+               "   movq 56(%0), %%mm7\n"
+               "   movntq %%mm7, 56(%1)\n"
+               : : "r" (from), "r" (to) : "memory");
+               from+=64;
+               to+=64;
+       }
+       /* since movntq is weakly-ordered, a "sfence" is needed to become
+        * ordered again.
+        */
+       __asm__ __volatile__ (
+               "  sfence \n" : :
+       );
+       kernel_fpu_end();
+}
+
+#else
+
+/*
+ *     Generic MMX implementation without K7 specific streaming
+ */
+static void fast_clear_page(void *page)
+{
+       int i;
+       
+       kernel_fpu_begin();
+       
+       __asm__ __volatile__ (
+               "  pxor %%mm0, %%mm0\n" : :
+       );
+
+       for(i=0;i<4096/128;i++)
+       {
+               __asm__ __volatile__ (
+               "  movq %%mm0, (%0)\n"
+               "  movq %%mm0, 8(%0)\n"
+               "  movq %%mm0, 16(%0)\n"
+               "  movq %%mm0, 24(%0)\n"
+               "  movq %%mm0, 32(%0)\n"
+               "  movq %%mm0, 40(%0)\n"
+               "  movq %%mm0, 48(%0)\n"
+               "  movq %%mm0, 56(%0)\n"
+               "  movq %%mm0, 64(%0)\n"
+               "  movq %%mm0, 72(%0)\n"
+               "  movq %%mm0, 80(%0)\n"
+               "  movq %%mm0, 88(%0)\n"
+               "  movq %%mm0, 96(%0)\n"
+               "  movq %%mm0, 104(%0)\n"
+               "  movq %%mm0, 112(%0)\n"
+               "  movq %%mm0, 120(%0)\n"
+               : : "r" (page) : "memory");
+               page+=128;
+       }
+
+       kernel_fpu_end();
+}
+
+static void fast_copy_page(void *to, void *from)
+{
+       int i;
+       
+       
+       kernel_fpu_begin();
+
+       __asm__ __volatile__ (
+               "1: prefetch (%0)\n"
+               "   prefetch 64(%0)\n"
+               "   prefetch 128(%0)\n"
+               "   prefetch 192(%0)\n"
+               "   prefetch 256(%0)\n"
+               "2:  \n"
+               ".section .fixup, \"ax\"\n"
+               "3: movw $0x1AEB, 1b\n" /* jmp on 26 bytes */
+               "   jmp 2b\n"
+               ".previous\n"
+               ".section __ex_table,\"a\"\n"
+               "       .align 4\n"
+               "       .long 1b, 3b\n"
+               ".previous"
+               : : "r" (from) );
+
+       for(i=0; i<4096/64; i++)
+       {
+               __asm__ __volatile__ (
+               "1: prefetch 320(%0)\n"
+               "2: movq (%0), %%mm0\n"
+               "   movq 8(%0), %%mm1\n"
+               "   movq 16(%0), %%mm2\n"
+               "   movq 24(%0), %%mm3\n"
+               "   movq %%mm0, (%1)\n"
+               "   movq %%mm1, 8(%1)\n"
+               "   movq %%mm2, 16(%1)\n"
+               "   movq %%mm3, 24(%1)\n"
+               "   movq 32(%0), %%mm0\n"
+               "   movq 40(%0), %%mm1\n"
+               "   movq 48(%0), %%mm2\n"
+               "   movq 56(%0), %%mm3\n"
+               "   movq %%mm0, 32(%1)\n"
+               "   movq %%mm1, 40(%1)\n"
+               "   movq %%mm2, 48(%1)\n"
+               "   movq %%mm3, 56(%1)\n"
+               ".section .fixup, \"ax\"\n"
+               "3: movw $0x05EB, 1b\n" /* jmp on 5 bytes */
+               "   jmp 2b\n"
+               ".previous\n"
+               ".section __ex_table,\"a\"\n"
+               "       .align 4\n"
+               "       .long 1b, 3b\n"
+               ".previous"
+               : : "r" (from), "r" (to) : "memory");
+               from+=64;
+               to+=64;
+       }
+       kernel_fpu_end();
+}
+
+
+#endif
+
+/*
+ *     Favour MMX for page clear and copy. 
+ */
+
+static void slow_zero_page(void * page)
+{
+       int d0, d1;
+       __asm__ __volatile__( \
+               "cld\n\t" \
+               "rep ; stosl" \
+               : "=&c" (d0), "=&D" (d1)
+               :"a" (0),"1" (page),"0" (1024)
+               :"memory");
+}
+void mmx_clear_page(void * page)
+{
+       if(in_interrupt())
+               slow_zero_page(page);
+       else
+               fast_clear_page(page);
+}
+
+static void slow_copy_page(void *to, void *from)
+{
+       int d0, d1, d2;
+       __asm__ __volatile__( \
+               "cld\n\t" \
+               "rep ; movsl" \
+               : "=&c" (d0), "=&D" (d1), "=&S" (d2) \
+               : "0" (1024),"1" ((long) to),"2" ((long) from) \
+               : "memory");
+}
+  
+
+void mmx_copy_page(void *to, void *from)
+{
+       if(in_interrupt())
+               slow_copy_page(to, from);
+       else
+               fast_copy_page(to, from);
+}
diff --git a/xenolinux-2.4.16-sparse/arch/xeno/lib/old-checksum.c b/xenolinux-2.4.16-sparse/arch/xeno/lib/old-checksum.c
new file mode 100644 (file)
index 0000000..ae3a380
--- /dev/null
@@ -0,0 +1,19 @@
+/*
+ * FIXME: old compatibility stuff, will be removed soon.
+ */
+
+#include <net/checksum.h>
+
+unsigned int csum_partial_copy( const char *src, char *dst, int len, int sum)
+{
+       int src_err=0, dst_err=0;
+
+       sum = csum_partial_copy_generic ( src, dst, len, sum, &src_err, &dst_err);
+
+       if (src_err || dst_err)
+               printk("old csum_partial_copy_fromuser(), tell mingo to convert me.\n");
+
+       return sum;
+}
+
+
diff --git a/xenolinux-2.4.16-sparse/arch/xeno/lib/strstr.c b/xenolinux-2.4.16-sparse/arch/xeno/lib/strstr.c
new file mode 100644 (file)
index 0000000..a3dafbf
--- /dev/null
@@ -0,0 +1,31 @@
+#include <linux/string.h>
+
+char * strstr(const char * cs,const char * ct)
+{
+int    d0, d1;
+register char * __res;
+__asm__ __volatile__(
+       "movl %6,%%edi\n\t"
+       "repne\n\t"
+       "scasb\n\t"
+       "notl %%ecx\n\t"
+       "decl %%ecx\n\t"        /* NOTE! This also sets Z if searchstring='' */
+       "movl %%ecx,%%edx\n"
+       "1:\tmovl %6,%%edi\n\t"
+       "movl %%esi,%%eax\n\t"
+       "movl %%edx,%%ecx\n\t"
+       "repe\n\t"
+       "cmpsb\n\t"
+       "je 2f\n\t"             /* also works for empty string, see above */
+       "xchgl %%eax,%%esi\n\t"
+       "incl %%esi\n\t"
+       "cmpb $0,-1(%%eax)\n\t"
+       "jne 1b\n\t"
+       "xorl %%eax,%%eax\n\t"
+       "2:"
+       :"=a" (__res), "=&c" (d0), "=&S" (d1)
+       :"0" (0), "1" (0xffffffff), "2" (cs), "g" (ct)
+       :"dx", "di");
+return __res;
+}
+
diff --git a/xenolinux-2.4.16-sparse/arch/xeno/lib/usercopy.c b/xenolinux-2.4.16-sparse/arch/xeno/lib/usercopy.c
new file mode 100644 (file)
index 0000000..d81fa81
--- /dev/null
@@ -0,0 +1,190 @@
+/* 
+ * User address space access functions.
+ * The non inlined parts of asm-i386/uaccess.h are here.
+ *
+ * Copyright 1997 Andi Kleen <ak@muc.de>
+ * Copyright 1997 Linus Torvalds
+ */
+#include <linux/config.h>
+#include <asm/uaccess.h>
+#include <asm/mmx.h>
+
+#ifdef CONFIG_X86_USE_3DNOW_AND_WORKS
+
+unsigned long
+__generic_copy_to_user(void *to, const void *from, unsigned long n)
+{
+       if (access_ok(VERIFY_WRITE, to, n))
+       {
+               if(n<512)
+                       __copy_user(to,from,n);
+               else
+                       mmx_copy_user(to,from,n);
+       }
+       return n;
+}
+
+unsigned long
+__generic_copy_from_user(void *to, const void *from, unsigned long n)
+{
+       if (access_ok(VERIFY_READ, from, n))
+       {
+               if(n<512)
+                       __copy_user_zeroing(to,from,n);
+               else
+                       mmx_copy_user_zeroing(to, from, n);
+       }
+       else
+               memset(to, 0, n);
+       return n;
+}
+
+#else
+
+unsigned long
+__generic_copy_to_user(void *to, const void *from, unsigned long n)
+{
+       prefetch(from);
+       if (access_ok(VERIFY_WRITE, to, n))
+               __copy_user(to,from,n);
+       return n;
+}
+
+unsigned long
+__generic_copy_from_user(void *to, const void *from, unsigned long n)
+{
+       prefetchw(to);
+       if (access_ok(VERIFY_READ, from, n))
+               __copy_user_zeroing(to,from,n);
+       else
+               memset(to, 0, n);
+       return n;
+}
+
+#endif
+
+/*
+ * Copy a null terminated string from userspace.
+ */
+
+#define __do_strncpy_from_user(dst,src,count,res)                         \
+do {                                                                      \
+       int __d0, __d1, __d2;                                              \
+       __asm__ __volatile__(                                              \
+               "       testl %1,%1\n"                                     \
+               "       jz 2f\n"                                           \
+               "0:     lodsb\n"                                           \
+               "       stosb\n"                                           \
+               "       testb %%al,%%al\n"                                 \
+               "       jz 1f\n"                                           \
+               "       decl %1\n"                                         \
+               "       jnz 0b\n"                                          \
+               "1:     subl %1,%0\n"                                      \
+               "2:\n"                                                     \
+               ".section .fixup,\"ax\"\n"                                 \
+               "3:     movl %5,%0\n"                                      \
+               "       jmp 2b\n"                                          \
+               ".previous\n"                                              \
+               ".section __ex_table,\"a\"\n"                              \
+               "       .align 4\n"                                        \
+               "       .long 0b,3b\n"                                     \
+               ".previous"                                                \
+               : "=d"(res), "=c"(count), "=&a" (__d0), "=&S" (__d1),      \
+                 "=&D" (__d2)                                             \
+               : "i"(-EFAULT), "0"(count), "1"(count), "3"(src), "4"(dst) \
+               : "memory");                                               \
+} while (0)
+
+long
+__strncpy_from_user(char *dst, const char *src, long count)
+{
+       long res;
+       __do_strncpy_from_user(dst, src, count, res);
+       return res;
+}
+
+long
+strncpy_from_user(char *dst, const char *src, long count)
+{
+       long res = -EFAULT;
+       if (access_ok(VERIFY_READ, src, 1))
+               __do_strncpy_from_user(dst, src, count, res);
+       return res;
+}
+
+
+/*
+ * Zero Userspace
+ */
+
+#define __do_clear_user(addr,size)                                     \
+do {                                                                   \
+       int __d0;                                                       \
+       __asm__ __volatile__(                                           \
+               "0:     rep; stosl\n"                                   \
+               "       movl %2,%0\n"                                   \
+               "1:     rep; stosb\n"                                   \
+               "2:\n"                                                  \
+               ".section .fixup,\"ax\"\n"                              \
+               "3:     lea 0(%2,%0,4),%0\n"                            \
+               "       jmp 2b\n"                                       \
+               ".previous\n"                                           \
+               ".section __ex_table,\"a\"\n"                           \
+               "       .align 4\n"                                     \
+               "       .long 0b,3b\n"                                  \
+               "       .long 1b,2b\n"                                  \
+               ".previous"                                             \
+               : "=&c"(size), "=&D" (__d0)                             \
+               : "r"(size & 3), "0"(size / 4), "1"(addr), "a"(0));     \
+} while (0)
+
+unsigned long
+clear_user(void *to, unsigned long n)
+{
+       if (access_ok(VERIFY_WRITE, to, n))
+               __do_clear_user(to, n);
+       return n;
+}
+
+unsigned long
+__clear_user(void *to, unsigned long n)
+{
+       __do_clear_user(to, n);
+       return n;
+}
+
+/*
+ * Return the size of a string (including the ending 0)
+ *
+ * Return 0 on exception, a value greater than N if too long
+ */
+
+long strnlen_user(const char *s, long n)
+{
+       unsigned long mask = -__addr_ok(s);
+       unsigned long res, tmp;
+
+       __asm__ __volatile__(
+               "       testl %0, %0\n"
+               "       jz 3f\n"
+               "       andl %0,%%ecx\n"
+               "0:     repne; scasb\n"
+               "       setne %%al\n"
+               "       subl %%ecx,%0\n"
+               "       addl %0,%%eax\n"
+               "1:\n"
+               ".section .fixup,\"ax\"\n"
+               "2:     xorl %%eax,%%eax\n"
+               "       jmp 1b\n"
+               "3:     movb $1,%%al\n"
+               "       jmp 1b\n"
+               ".previous\n"
+               ".section __ex_table,\"a\"\n"
+               "       .align 4\n"
+               "       .long 0b,2b\n"
+               ".previous"
+               :"=r" (n), "=D" (s), "=a" (res), "=c" (tmp)
+               :"0" (n), "1" (s), "2" (0), "3" (mask)
+               :"cc");
+       return res & mask;
+}
diff --git a/xenolinux-2.4.16-sparse/arch/xeno/mm/Makefile b/xenolinux-2.4.16-sparse/arch/xeno/mm/Makefile
new file mode 100644 (file)
index 0000000..4628394
--- /dev/null
@@ -0,0 +1,6 @@
+
+O_TARGET := mm.o
+
+obj-y   := init.o fault.o extable.o hypervisor.o
+
+include $(TOPDIR)/Rules.make
diff --git a/xenolinux-2.4.16-sparse/arch/xeno/mm/extable.c b/xenolinux-2.4.16-sparse/arch/xeno/mm/extable.c
new file mode 100644 (file)
index 0000000..4cd9f06
--- /dev/null
@@ -0,0 +1,62 @@
+/*
+ * linux/arch/i386/mm/extable.c
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <asm/uaccess.h>
+
+extern const struct exception_table_entry __start___ex_table[];
+extern const struct exception_table_entry __stop___ex_table[];
+
+static inline unsigned long
+search_one_table(const struct exception_table_entry *first,
+                const struct exception_table_entry *last,
+                unsigned long value)
+{
+        while (first <= last) {
+               const struct exception_table_entry *mid;
+               long diff;
+
+               mid = (last - first) / 2 + first;
+               diff = mid->insn - value;
+                if (diff == 0)
+                        return mid->fixup;
+                else if (diff < 0)
+                        first = mid+1;
+                else
+                        last = mid-1;
+        }
+        return 0;
+}
+
+extern spinlock_t modlist_lock;
+
+unsigned long
+search_exception_table(unsigned long addr)
+{
+       unsigned long ret = 0;
+       
+#ifndef CONFIG_MODULES
+       /* There is only the kernel to search.  */
+       ret = search_one_table(__start___ex_table, __stop___ex_table-1, addr);
+       return ret;
+#else
+       unsigned long flags;
+       /* The kernel is the last "module" -- no need to treat it special.  */
+       struct module *mp;
+
+       spin_lock_irqsave(&modlist_lock, flags);
+       for (mp = module_list; mp != NULL; mp = mp->next) {
+               if (mp->ex_table_start == NULL || !(mp->flags&(MOD_RUNNING|MOD_INITIALIZING)))
+                       continue;
+               ret = search_one_table(mp->ex_table_start,
+                                      mp->ex_table_end - 1, addr);
+               if (ret)
+                       break;
+       }
+       spin_unlock_irqrestore(&modlist_lock, flags);
+       return ret;
+#endif
+}
diff --git a/xenolinux-2.4.16-sparse/arch/xeno/mm/fault.c b/xenolinux-2.4.16-sparse/arch/xeno/mm/fault.c
new file mode 100644 (file)
index 0000000..18e2167
--- /dev/null
@@ -0,0 +1,375 @@
+/*
+ *  linux/arch/i386/mm/fault.c
+ *
+ *  Copyright (C) 1995  Linus Torvalds
+ */
+
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/ptrace.h>
+#include <linux/mman.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/interrupt.h>
+#include <linux/init.h>
+#include <linux/tty.h>
+#include <linux/vt_kern.h>             /* For unblank_screen() */
+
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <asm/pgalloc.h>
+#include <asm/hardirq.h>
+
+extern void die(const char *,struct pt_regs *,long);
+
+extern int console_loglevel;
+
+pgd_t *cur_pgd;
+
+/*
+ * Ugly, ugly, but the goto's result in better assembly..
+ */
+int __verify_write(const void * addr, unsigned long size)
+{
+       struct vm_area_struct * vma;
+       unsigned long start = (unsigned long) addr;
+
+       if (!size)
+               return 1;
+
+       vma = find_vma(current->mm, start);
+       if (!vma)
+               goto bad_area;
+       if (vma->vm_start > start)
+               goto check_stack;
+
+good_area:
+       if (!(vma->vm_flags & VM_WRITE))
+               goto bad_area;
+       size--;
+       size += start & ~PAGE_MASK;
+       size >>= PAGE_SHIFT;
+       start &= PAGE_MASK;
+
+       for (;;) {
+       survive:
+               {
+                       int fault = handle_mm_fault(current->mm, vma, start, 1);
+                       if (!fault)
+                               goto bad_area;
+                       if (fault < 0)
+                               goto out_of_memory;
+               }
+               if (!size)
+                       break;
+               size--;
+               start += PAGE_SIZE;
+               if (start < vma->vm_end)
+                       continue;
+               vma = vma->vm_next;
+               if (!vma || vma->vm_start != start)
+                       goto bad_area;
+               if (!(vma->vm_flags & VM_WRITE))
+                       goto bad_area;;
+       }
+       return 1;
+
+check_stack:
+       if (!(vma->vm_flags & VM_GROWSDOWN))
+               goto bad_area;
+       if (expand_stack(vma, start) == 0)
+               goto good_area;
+
+bad_area:
+       return 0;
+
+out_of_memory:
+       if (current->pid == 1) {
+               current->policy |= SCHED_YIELD;
+               schedule();
+               goto survive;
+       }
+       goto bad_area;
+}
+
+extern spinlock_t timerlist_lock;
+
+/*
+ * Unlock any spinlocks which will prevent us from getting the
+ * message out (timerlist_lock is acquired through the
+ * console unblank code)
+ */
+void bust_spinlocks(int yes)
+{
+       spin_lock_init(&timerlist_lock);
+       if (yes) {
+               oops_in_progress = 1;
+       } else {
+               int loglevel_save = console_loglevel;
+#ifdef CONFIG_VT
+               unblank_screen();
+#endif
+               oops_in_progress = 0;
+               /*
+                * OK, the message is on the console.  Now we call printk()
+                * without oops_in_progress set so that printk will give klogd
+                * a poke.  Hold onto your hats...
+                */
+               console_loglevel = 15;          /* NMI oopser may have shut the console up */
+               printk(" ");
+               console_loglevel = loglevel_save;
+       }
+}
+
+void do_BUG(const char *file, int line)
+{
+       bust_spinlocks(1);
+       printk("kernel BUG at %s:%d!\n", file, line);
+}
+
+/*
+ * This routine handles page faults.  It determines the address,
+ * and the problem, and then passes it off to one of the appropriate
+ * routines.
+ *
+ * error_code:
+ *     bit 0 == 0 means no page found, 1 means protection fault
+ *     bit 1 == 0 means read, 1 means write
+ *     bit 2 == 0 means kernel, 1 means user-mode
+ */
+asmlinkage void do_page_fault(struct pt_regs *regs, 
+                              unsigned long error_code,
+                              unsigned long address)
+{
+       struct task_struct *tsk = current;
+       struct mm_struct *mm;
+       struct vm_area_struct * vma;
+        unsigned long page;
+       unsigned long fixup;
+       int write;
+       siginfo_t info;
+
+        /* Set the "privileged fault" bit to something sane. */
+        error_code &= ~4;
+        error_code |= (regs->xcs & 2) << 1;
+
+       /*
+        * We fault-in kernel-space virtual memory on-demand. The
+        * 'reference' page table is init_mm.pgd.
+        *
+        * NOTE! We MUST NOT take any locks for this case. We may
+        * be in an interrupt or a critical region, and should
+        * only copy the information from the master page table,
+        * nothing more.
+        *
+        * This verifies that the fault happens in kernel space
+        * (error_code & 4) == 0, and that the fault was not a
+        * protection error (error_code & 1) == 0.
+        */
+       if (address >= TASK_SIZE && !(error_code & 5))
+               goto vmalloc_fault;
+
+       mm = tsk->mm;
+       info.si_code = SEGV_MAPERR;
+
+       /*
+        * If we're in an interrupt or have no user
+        * context, we must not take the fault..
+        */
+       if (in_interrupt() || !mm)
+               goto no_context;
+
+       down_read(&mm->mmap_sem);
+
+       vma = find_vma(mm, address);
+       if (!vma)
+               goto bad_area;
+       if (vma->vm_start <= address)
+               goto good_area;
+       if (!(vma->vm_flags & VM_GROWSDOWN))
+               goto bad_area;
+       if (error_code & 4) {
+               /*
+                * accessing the stack below %esp is always a bug.
+                * The "+ 32" is there due to some instructions (like
+                * pusha) doing post-decrement on the stack and that
+                * doesn't show up until later..
+                */
+               if (address + 32 < regs->esp)
+                       goto bad_area;
+       }
+       if (expand_stack(vma, address))
+               goto bad_area;
+/*
+ * Ok, we have a good vm_area for this memory access, so
+ * we can handle it..
+ */
+good_area:
+       info.si_code = SEGV_ACCERR;
+       write = 0;
+       switch (error_code & 3) {
+               default:        /* 3: write, present */
+                       /* fall through */
+               case 2:         /* write, not present */
+                       if (!(vma->vm_flags & VM_WRITE))
+                               goto bad_area;
+                       write++;
+                       break;
+               case 1:         /* read, present */
+                       goto bad_area;
+               case 0:         /* read, not present */
+                       if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
+                               goto bad_area;
+       }
+
+ survive:
+       /*
+        * If for any reason at all we couldn't handle the fault,
+        * make sure we exit gracefully rather than endlessly redo
+        * the fault.
+        */
+       switch (handle_mm_fault(mm, vma, address, write)) {
+       case 1:
+               tsk->min_flt++;
+               break;
+       case 2:
+               tsk->maj_flt++;
+               break;
+       case 0:
+               goto do_sigbus;
+       default:
+               goto out_of_memory;
+       }
+
+       up_read(&mm->mmap_sem);
+       return;
+
+/*
+ * Something tried to access memory that isn't in our memory map..
+ * Fix it, but check if it's kernel or user first..
+ */
+bad_area:
+       up_read(&mm->mmap_sem);
+
+       /* User mode accesses just cause a SIGSEGV */
+       if (error_code & 4) {
+               tsk->thread.cr2 = address;
+               tsk->thread.error_code = error_code;
+               tsk->thread.trap_no = 14;
+               info.si_signo = SIGSEGV;
+               info.si_errno = 0;
+               /* info.si_code has been set above */
+               info.si_addr = (void *)address;
+               force_sig_info(SIGSEGV, &info, tsk);
+               return;
+       }
+
+no_context:
+       /* Are we prepared to handle this kernel fault?  */
+       if ((fixup = search_exception_table(regs->eip)) != 0) {
+               regs->eip = fixup;
+               return;
+       }
+
+/*
+ * Oops. The kernel tried to access some bad page. We'll have to
+ * terminate things with extreme prejudice.
+ */
+
+       bust_spinlocks(1);
+
+       if (address < PAGE_SIZE)
+               printk(KERN_ALERT "Unable to handle kernel NULL pointer dereference");
+       else
+               printk(KERN_ALERT "Unable to handle kernel paging request");
+       printk(" at virtual address %08lx\n",address);
+       printk(" printing eip:\n");
+       printk("%08lx\n", regs->eip);
+        page = ((unsigned long *) cur_pgd)[address >> 22];
+        printk(KERN_ALERT "*pde = %08lx\n", page);
+        if (page & 1) {
+                page &= PAGE_MASK;
+                address &= 0x003ff000;
+                page = ((unsigned long *) __va(page))[address >> PAGE_SHIFT];
+                printk(KERN_ALERT "*pte = %08lx\n", page);
+        }
+       die("Oops", regs, error_code);
+       bust_spinlocks(0);
+       do_exit(SIGKILL);
+
+/*
+ * We ran out of memory, or some other thing happened to us that made
+ * us unable to handle the page fault gracefully.
+ */
+out_of_memory:
+       up_read(&mm->mmap_sem);
+       if (tsk->pid == 1) {
+               tsk->policy |= SCHED_YIELD;
+               schedule();
+               down_read(&mm->mmap_sem);
+               goto survive;
+       }
+       printk("VM: killing process %s\n", tsk->comm);
+       if (error_code & 4)
+               do_exit(SIGKILL);
+       goto no_context;
+
+do_sigbus:
+       up_read(&mm->mmap_sem);
+
+       /*
+        * Send a sigbus, regardless of whether we were in kernel
+        * or user mode.
+        */
+       tsk->thread.cr2 = address;
+       tsk->thread.error_code = error_code;
+       tsk->thread.trap_no = 14;
+       info.si_signo = SIGBUS;
+       info.si_errno = 0;
+       info.si_code = BUS_ADRERR;
+       info.si_addr = (void *)address;
+       force_sig_info(SIGBUS, &info, tsk);
+
+       /* Kernel mode? Handle exceptions or die */
+       if (!(error_code & 4))
+               goto no_context;
+       return;
+
+vmalloc_fault:
+       {
+               /*
+                * Synchronize this task's top level page-table
+                * with the 'reference' page table.
+                *
+                * Do _not_ use "tsk" here. We might be inside
+                * an interrupt in the middle of a task switch..
+                */
+               int offset = __pgd_offset(address);
+               pgd_t *pgd, *pgd_k;
+               pmd_t *pmd, *pmd_k;
+               pte_t *pte_k;
+
+               pgd = offset + cur_pgd;
+               pgd_k = init_mm.pgd + offset;
+
+               if (!pgd_present(*pgd_k))
+                       goto no_context;
+               set_pgd(pgd, *pgd_k);
+               
+               pmd = pmd_offset(pgd, address);
+               pmd_k = pmd_offset(pgd_k, address);
+               if (!pmd_present(*pmd_k))
+                       goto no_context;
+               set_pmd(pmd, *pmd_k);
+
+               pte_k = pte_offset(pmd_k, address);
+               if (!pte_present(*pte_k))
+                       goto no_context;
+               return;
+       }
+}
diff --git a/xenolinux-2.4.16-sparse/arch/xeno/mm/hypervisor.c b/xenolinux-2.4.16-sparse/arch/xeno/mm/hypervisor.c
new file mode 100644 (file)
index 0000000..8454c9b
--- /dev/null
@@ -0,0 +1,58 @@
+/******************************************************************************
+ * xeno/mm/hypervisor.c
+ * 
+ * Update page tables via the hypervisor.
+ * 
+ * Copyright (c) 2002, K A Fraser
+ */
+
+#include <linux/config.h>
+#include <asm/hypervisor.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+
+#define QUEUE_SIZE 1
+static page_update_request_t update_queue[QUEUE_SIZE];
+
+void queue_l1_entry_update(unsigned long ptr, unsigned long val)
+{
+    update_queue[0].ptr = ptr + start_info.phys_base;
+    update_queue[0].val = val;
+    flush_page_update_queue();
+}
+
+void queue_l2_entry_update(unsigned long ptr, unsigned long val)
+{
+    update_queue[0].ptr = ptr + start_info.phys_base;
+    update_queue[0].val = val;
+    flush_page_update_queue();
+}
+
+void queue_baseptr_create(unsigned long ptr)
+{
+    update_queue[0].ptr = PGREQ_ADD_BASEPTR;
+    update_queue[0].val = ptr + start_info.phys_base;
+    flush_page_update_queue();
+}
+
+void queue_baseptr_remove(unsigned long ptr)
+{
+    update_queue[0].ptr = PGREQ_REMOVE_BASEPTR;
+    update_queue[0].val = ptr + start_info.phys_base;
+    flush_page_update_queue();
+}
+
+void queue_tlb_flush(void)
+{
+    /* nothing */
+}
+
+void queue_tlb_flush_one(unsigned long ptr)
+{
+    /* nothing */
+}
+
+void flush_page_update_queue(void)
+{
+    HYPERVISOR_pt_update(update_queue, 1);
+}
diff --git a/xenolinux-2.4.16-sparse/arch/xeno/mm/init.c b/xenolinux-2.4.16-sparse/arch/xeno/mm/init.c
new file mode 100644 (file)
index 0000000..022a511
--- /dev/null
@@ -0,0 +1,368 @@
+/*
+ *  linux/arch/i386/mm/init.c
+ *
+ *  Copyright (C) 1995  Linus Torvalds
+ *
+ *  Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
+ */
+
+#include <linux/config.h>
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/ptrace.h>
+#include <linux/mman.h>
+#include <linux/mm.h>
+#include <linux/swap.h>
+#include <linux/smp.h>
+#include <linux/init.h>
+#ifdef CONFIG_BLK_DEV_INITRD
+#include <linux/blk.h>
+#endif
+#include <linux/highmem.h>
+#include <linux/pagemap.h>
+#include <linux/bootmem.h>
+
+#include <asm/processor.h>
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <asm/pgtable.h>
+#include <asm/pgalloc.h>
+#include <asm/dma.h>
+#include <asm/fixmap.h>
+#include <asm/apic.h>
+#include <asm/tlb.h>
+
+mmu_gather_t mmu_gathers[NR_CPUS];
+unsigned long highstart_pfn, highend_pfn;
+static unsigned long totalram_pages;
+static unsigned long totalhigh_pages;
+
+int do_check_pgt_cache(int low, int high)
+{
+    int freed = 0;
+    if(pgtable_cache_size > high) {
+        do {
+            if (pgd_quicklist) {
+                free_pgd_slow(get_pgd_fast());
+                freed++;
+            }
+            if (pmd_quicklist) {
+                pmd_free_slow(pmd_alloc_one_fast(NULL, 0));
+                freed++;
+            }
+            if (pte_quicklist) {
+                pte_free_slow(pte_alloc_one_fast(NULL, 0));
+                freed++;
+            }
+        } while(pgtable_cache_size > low);
+    }
+    return freed;
+}
+
+/*
+ * NOTE: pagetable_init alloc all the fixmap pagetables contiguous on the
+ * physical space so we can cache the place of the first one and move
+ * around without checking the pgd every time.
+ */
+
+#if CONFIG_HIGHMEM
+pte_t *kmap_pte;
+pgprot_t kmap_prot;
+
+#define kmap_get_fixmap_pte(vaddr)                                     \
+       pte_offset(pmd_offset(pgd_offset_k(vaddr), (vaddr)), (vaddr))
+
+void __init kmap_init(void)
+{
+    unsigned long kmap_vstart;
+
+    /* cache the first kmap pte */
+    kmap_vstart = __fix_to_virt(FIX_KMAP_BEGIN);
+    kmap_pte = kmap_get_fixmap_pte(kmap_vstart);
+
+    kmap_prot = PAGE_KERNEL;
+}
+#endif /* CONFIG_HIGHMEM */
+
+void show_mem(void)
+{
+    int i, total = 0, reserved = 0;
+    int shared = 0, cached = 0;
+    int highmem = 0;
+
+    printk("Mem-info:\n");
+    show_free_areas();
+    printk("Free swap:       %6dkB\n",nr_swap_pages<<(PAGE_SHIFT-10));
+    i = max_mapnr;
+    while (i-- > 0) {
+        total++;
+        if (PageHighMem(mem_map+i))
+            highmem++;
+        if (PageReserved(mem_map+i))
+            reserved++;
+        else if (PageSwapCache(mem_map+i))
+            cached++;
+        else if (page_count(mem_map+i))
+            shared += page_count(mem_map+i) - 1;
+    }
+    printk("%d pages of RAM\n", total);
+    printk("%d pages of HIGHMEM\n",highmem);
+    printk("%d reserved pages\n",reserved);
+    printk("%d pages shared\n",shared);
+    printk("%d pages swap cached\n",cached);
+    printk("%ld pages in page table cache\n",pgtable_cache_size);
+    show_buffers();
+}
+
+/* References to section boundaries */
+
+extern char _text, _etext, _edata, __bss_start, _end;
+extern char __init_begin, __init_end;
+
+static inline void set_pte_phys (unsigned long vaddr,
+                                 unsigned long phys, pgprot_t flags)
+{
+    pgprot_t prot;
+    pgd_t *pgd;
+    pmd_t *pmd;
+    pte_t *pte;
+
+    pgd = init_mm.pgd + __pgd_offset(vaddr);
+    if (pgd_none(*pgd)) {
+        printk("PAE BUG #00!\n");
+        return;
+    }
+    pmd = pmd_offset(pgd, vaddr);
+    if (pmd_none(*pmd)) {
+        printk("PAE BUG #01!\n");
+        return;
+    }
+    pte = pte_offset(pmd, vaddr);
+    if (pte_val(*pte))
+        pte_ERROR(*pte);
+    pgprot_val(prot) = pgprot_val(PAGE_KERNEL) | pgprot_val(flags);
+    set_pte(pte, mk_pte_phys(phys, prot));
+
+    /*
+     * It's enough to flush this one mapping.
+     * (PGE mappings get flushed as well)
+     */
+    __flush_tlb_one(vaddr);
+}
+
+void __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t flags)
+{
+    unsigned long address = __fix_to_virt(idx);
+
+    if (idx >= __end_of_fixed_addresses) {
+        printk("Invalid __set_fixmap\n");
+        return;
+    }
+    set_pte_phys(address, phys, flags);
+}
+
+#if 0
+static void __init fixrange_init (unsigned long start, unsigned long end, pgd_t *pgd_base)
+{
+    pgd_t *pgd;
+    pmd_t *pmd;
+    pte_t *pte;
+    int i, j;
+    unsigned long vaddr;
+
+    vaddr = start;
+    i = __pgd_offset(vaddr);
+    j = __pmd_offset(vaddr);
+    pgd = pgd_base + i;
+
+    for ( ; (i < PTRS_PER_PGD) && (vaddr != end); pgd++, i++) {
+#if CONFIG_X86_PAE
+        if (pgd_none(*pgd)) {
+            pmd = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE);
+            set_pgd(pgd, __pgd(__pa(pmd) + 0x1));
+            if (pmd != pmd_offset(pgd, 0))
+                printk("PAE BUG #02!\n");
+        }
+        pmd = pmd_offset(pgd, vaddr);
+#else
+        pmd = (pmd_t *)pgd;
+#endif
+        for (; (j < PTRS_PER_PMD) && (vaddr != end); pmd++, j++) {
+            if (pmd_none(*pmd)) {
+                pte = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE);
+                set_pmd(pmd, __pmd(_KERNPG_TABLE + __pa(pte)));
+                if (pte != pte_offset(pmd, 0))
+                    BUG();
+            }
+            vaddr += PMD_SIZE;
+        }
+        j = 0;
+    }
+}
+#endif
+
+static void __init pagetable_init (void)
+{
+#if 0
+    vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK;
+    fixrange_init(vaddr, 0, pgd_base);
+#endif
+
+#if CONFIG_HIGHMEM
+    /*
+     * Permanent kmaps:
+     */
+    vaddr = PKMAP_BASE;
+    fixrange_init(vaddr, vaddr + PAGE_SIZE*LAST_PKMAP, pgd_base);
+
+    pgd = init_mm.pgd + __pgd_offset(vaddr);
+    pmd = pmd_offset(pgd, vaddr);
+    pte = pte_offset(pmd, vaddr);
+    pkmap_page_table = pte;
+#endif
+}
+
+/*
+ * paging_init() sets up the page tables - note that the first 8MB are
+ * already mapped by head.S.
+ *
+ * This routines also unmaps the page at virtual kernel address 0, so
+ * that we can trap those pesky NULL-reference errors in the kernel.
+ */
+void __init paging_init(void)
+{
+    pagetable_init();
+
+#ifdef CONFIG_HIGHMEM
+    kmap_init();
+#endif
+    {
+        unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0};
+        unsigned int max_dma, high, low;
+
+        max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
+        low = max_low_pfn;
+        high = highend_pfn;
+
+        if (low < max_dma)
+            zones_size[ZONE_DMA] = low;
+        else {
+            zones_size[ZONE_DMA] = max_dma;
+            zones_size[ZONE_NORMAL] = low - max_dma;
+#ifdef CONFIG_HIGHMEM
+            zones_size[ZONE_HIGHMEM] = high - low;
+#endif
+        }
+        free_area_init(zones_size);
+    }
+    return;
+}
+
+
+static inline int page_is_ram (unsigned long pagenr)
+{
+    return 1;
+}
+
+void __init mem_init(void)
+{
+    int codesize, reservedpages, datasize, initsize;
+    int tmp;
+
+#ifdef CONFIG_HIGHMEM
+    highmem_start_page = mem_map + highstart_pfn;
+    max_mapnr = num_physpages = highend_pfn;
+#else
+    max_mapnr = num_physpages = max_low_pfn;
+#endif
+    high_memory = (void *) __va(max_low_pfn * PAGE_SIZE);
+
+    /* clear the zero-page */
+    memset(empty_zero_page, 0, PAGE_SIZE);
+
+    /* this will put all low memory onto the freelists */
+    totalram_pages += free_all_bootmem();
+
+    reservedpages = 0;
+    for (tmp = 0; tmp < max_low_pfn; tmp++)
+        /*
+         * Only count reserved RAM pages
+         */
+        if (page_is_ram(tmp) && PageReserved(mem_map+tmp))
+            reservedpages++;
+#ifdef CONFIG_HIGHMEM
+    for (tmp = highstart_pfn; tmp < highend_pfn; tmp++) {
+        struct page *page = mem_map + tmp;
+
+        if (!page_is_ram(tmp)) {
+            SetPageReserved(page);
+            continue;
+        }
+        ClearPageReserved(page);
+        set_bit(PG_highmem, &page->flags);
+        atomic_set(&page->count, 1);
+        __free_page(page);
+        totalhigh_pages++;
+    }
+    totalram_pages += totalhigh_pages;
+#endif
+    codesize =  (unsigned long) &_etext - (unsigned long) &_text;
+    datasize =  (unsigned long) &_edata - (unsigned long) &_etext;
+    initsize =  (unsigned long) &__init_end - (unsigned long) &__init_begin;
+
+    printk("Memory: %luk/%luk available (%dk kernel code, %dk reserved, %dk data, %dk init, %ldk highmem)\n",
+           (unsigned long) nr_free_pages() << (PAGE_SHIFT-10),
+           max_mapnr << (PAGE_SHIFT-10),
+           codesize >> 10,
+           reservedpages << (PAGE_SHIFT-10),
+           datasize >> 10,
+           initsize >> 10,
+           (unsigned long) (totalhigh_pages << (PAGE_SHIFT-10))
+        );
+
+    boot_cpu_data.wp_works_ok = 1;
+}
+
+void free_initmem(void)
+{
+    unsigned long addr;
+
+    addr = (unsigned long)(&__init_begin);
+    for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) {
+        ClearPageReserved(virt_to_page(addr));
+        set_page_count(virt_to_page(addr), 1);
+        free_page(addr);
+        totalram_pages++;
+    }
+    printk ("Freeing unused kernel memory: %dk freed\n", (&__init_end - &__init_begin) >> 10);
+}
+
+#ifdef CONFIG_BLK_DEV_INITRD
+void free_initrd_mem(unsigned long start, unsigned long end)
+{
+    if (start < end)
+        printk ("Freeing initrd memory: %ldk freed\n", (end - start) >> 10);
+    for (; start < end; start += PAGE_SIZE) {
+        ClearPageReserved(virt_to_page(start));
+        set_page_count(virt_to_page(start), 1);
+        free_page(start);
+        totalram_pages++;
+    }
+}
+#endif
+
+void si_meminfo(struct sysinfo *val)
+{
+    val->totalram = totalram_pages;
+    val->sharedram = 0;
+    val->freeram = nr_free_pages();
+    val->bufferram = atomic_read(&buffermem_pages);
+    val->totalhigh = totalhigh_pages;
+    val->freehigh = nr_free_highpages();
+    val->mem_unit = PAGE_SIZE;
+    return;
+}
diff --git a/xenolinux-2.4.16-sparse/arch/xeno/vmlinux.lds b/xenolinux-2.4.16-sparse/arch/xeno/vmlinux.lds
new file mode 100644 (file)
index 0000000..26a59aa
--- /dev/null
@@ -0,0 +1,87 @@
+/* ld script to make i386 Linux kernel
+ * Written by Martin Mares <mj@atrey.karlin.mff.cuni.cz>;
+ */
+OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386")
+OUTPUT_ARCH(i386)
+ENTRY(_start)
+SECTIONS
+{
+  . = 0xC0000000 + 0x000000;
+  _text = .;                   /* Text and read-only data */
+  .text : {
+       *(.text)
+       *(.fixup)
+       *(.gnu.warning)
+       } = 0x9090
+  .text.lock : { *(.text.lock) }       /* out-of-line lock text */
+
+  _etext = .;                  /* End of text section */
+
+  .rodata : { *(.rodata) *(.rodata.*) }
+  .kstrtab : { *(.kstrtab) }
+
+  . = ALIGN(16);               /* Exception table */
+  __start___ex_table = .;
+  __ex_table : { *(__ex_table) }
+  __stop___ex_table = .;
+
+  __start___ksymtab = .;       /* Kernel symbol table */
+  __ksymtab : { *(__ksymtab) }
+  __stop___ksymtab = .;
+
+  __start___kallsyms = .;      /* All kernel symbols */
+  __kallsyms : { *(__kallsyms) }
+  __stop___kallsyms = .;
+
+  .data : {                    /* Data */
+       *(.data)
+       CONSTRUCTORS
+       }
+
+  _edata = .;                  /* End of data section */
+
+  . = ALIGN(8192);             /* init_task */
+  .data.init_task : { *(.data.init_task) }
+
+  . = ALIGN(4096);             /* Init code and data */
+  __init_begin = .;
+  .text.init : { *(.text.init) }
+  .data.init : { *(.data.init) }
+  . = ALIGN(16);
+  __setup_start = .;
+  .setup.init : { *(.setup.init) }
+  __setup_end = .;
+  __initcall_start = .;
+  .initcall.init : { *(.initcall.init) }
+  __initcall_end = .;
+  . = ALIGN(4096);
+  __init_end = .;
+
+  . = ALIGN(4096);
+  .data.page_aligned : { *(.data.idt) }
+
+  . = ALIGN(32);
+  .data.cacheline_aligned : { *(.data.cacheline_aligned) }
+
+  __bss_start = .;             /* BSS */
+  .bss : {
+       *(.bss)
+       }
+  _end = . ;
+
+  /* Sections to be discarded */
+  /DISCARD/ : {
+       *(.text.exit)
+       *(.data.exit)
+       *(.exitcall.exit)
+       }
+
+  /* Stabs debugging sections.  */
+  .stab 0 : { *(.stab) }
+  .stabstr 0 : { *(.stabstr) }
+  .stab.excl 0 : { *(.stab.excl) }
+  .stab.exclstr 0 : { *(.stab.exclstr) }
+  .stab.index 0 : { *(.stab.index) }
+  .stab.indexstr 0 : { *(.stab.indexstr) }
+  .comment 0 : { *(.comment) }
+}
diff --git a/xenolinux-2.4.16-sparse/drivers/block/ll_rw_blk.c b/xenolinux-2.4.16-sparse/drivers/block/ll_rw_blk.c
new file mode 100644 (file)
index 0000000..a9e973d
--- /dev/null
@@ -0,0 +1,1243 @@
+/*
+ *  linux/drivers/block/ll_rw_blk.c
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ * Copyright (C) 1994,      Karl Keyte: Added support for disk statistics
+ * Elevator latency, (C) 2000  Andrea Arcangeli <andrea@suse.de> SuSE
+ * Queue request tables / lock, selectable elevator, Jens Axboe <axboe@suse.de>
+ * kernel-doc documentation started by NeilBrown <neilb@cse.unsw.edu.au> -  July2000
+ */
+
+/*
+ * This handles all read/write requests to block devices
+ */
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/kernel_stat.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/config.h>
+#include <linux/locks.h>
+#include <linux/mm.h>
+#include <linux/swap.h>
+#include <linux/init.h>
+#include <linux/smp_lock.h>
+#include <linux/completion.h>
+
+#include <asm/system.h>
+#include <asm/io.h>
+#include <linux/blk.h>
+#include <linux/highmem.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+
+/*
+ * MAC Floppy IWM hooks
+ */
+
+#ifdef CONFIG_MAC_FLOPPY_IWM
+extern int mac_floppy_init(void);
+#endif
+
+/*
+ * For the allocated request tables
+ */
+static kmem_cache_t *request_cachep;
+
+/*
+ * The "disk" task queue is used to start the actual requests
+ * after a plug
+ */
+DECLARE_TASK_QUEUE(tq_disk);
+
+/*
+ * Protect the request list against multiple users..
+ *
+ * With this spinlock the Linux block IO subsystem is 100% SMP threaded
+ * from the IRQ event side, and almost 100% SMP threaded from the syscall
+ * side (we still have protect against block device array operations, and
+ * the do_request() side is casually still unsafe. The kernel lock protects
+ * this part currently.).
+ *
+ * there is a fair chance that things will work just OK if these functions
+ * are called with no global kernel lock held ...
+ */
+spinlock_t io_request_lock = SPIN_LOCK_UNLOCKED;
+
+/* This specifies how many sectors to read ahead on the disk. */
+
+int read_ahead[MAX_BLKDEV];
+
+/* blk_dev_struct is:
+ *     *request_fn
+ *     *current_request
+ */
+struct blk_dev_struct blk_dev[MAX_BLKDEV]; /* initialized by blk_dev_init() */
+
+/*
+ * blk_size contains the size of all block-devices in units of 1024 byte
+ * sectors:
+ *
+ * blk_size[MAJOR][MINOR]
+ *
+ * if (!blk_size[MAJOR]) then no minor size checking is done.
+ */
+int * blk_size[MAX_BLKDEV];
+
+/*
+ * blksize_size contains the size of all block-devices:
+ *
+ * blksize_size[MAJOR][MINOR]
+ *
+ * if (!blksize_size[MAJOR]) then 1024 bytes is assumed.
+ */
+int * blksize_size[MAX_BLKDEV];
+
+/*
+ * hardsect_size contains the size of the hardware sector of a device.
+ *
+ * hardsect_size[MAJOR][MINOR]
+ *
+ * if (!hardsect_size[MAJOR])
+ *             then 512 bytes is assumed.
+ * else
+ *             sector_size is hardsect_size[MAJOR][MINOR]
+ * This is currently set by some scsi devices and read by the msdos fs driver.
+ * Other uses may appear later.
+ */
+int * hardsect_size[MAX_BLKDEV];
+
+/*
+ * The following tunes the read-ahead algorithm in mm/filemap.c
+ */
+int * max_readahead[MAX_BLKDEV];
+
+/*
+ * Max number of sectors per request
+ */
+int * max_sectors[MAX_BLKDEV];
+
+/*
+ * How many reqeusts do we allocate per queue,
+ * and how many do we "batch" on freeing them?
+ */
+static int queue_nr_requests, batch_requests;
+
+static inline int get_max_sectors(kdev_t dev)
+{
+       if (!max_sectors[MAJOR(dev)])
+               return MAX_SECTORS;
+       return max_sectors[MAJOR(dev)][MINOR(dev)];
+}
+
+inline request_queue_t *blk_get_queue(kdev_t dev)
+{
+       struct blk_dev_struct *bdev = blk_dev + MAJOR(dev);
+
+       if (bdev->queue)
+               return bdev->queue(dev);
+       else
+               return &blk_dev[MAJOR(dev)].request_queue;
+}
+
+static int __blk_cleanup_queue(struct request_list *list)
+{
+       struct list_head *head = &list->free;
+       struct request *rq;
+       int i = 0;
+
+       while (!list_empty(head)) {
+               rq = list_entry(head->next, struct request, queue);
+               list_del(&rq->queue);
+               kmem_cache_free(request_cachep, rq);
+               i++;
+       };
+
+       if (i != list->count)
+               printk("request list leak!\n");
+
+       list->count = 0;
+       return i;
+}
+
+/**
+ * blk_cleanup_queue: - release a &request_queue_t when it is no longer needed
+ * @q:    the request queue to be released
+ *
+ * Description:
+ *     blk_cleanup_queue is the pair to blk_init_queue().  It should
+ *     be called when a request queue is being released; typically
+ *     when a block device is being de-registered.  Currently, its
+ *     primary task it to free all the &struct request structures that
+ *     were allocated to the queue.
+ * Caveat: 
+ *     Hopefully the low level driver will have finished any
+ *     outstanding requests first...
+ **/
+void blk_cleanup_queue(request_queue_t * q)
+{
+       int count = queue_nr_requests;
+
+       count -= __blk_cleanup_queue(&q->rq[READ]);
+       count -= __blk_cleanup_queue(&q->rq[WRITE]);
+
+       if (count)
+               printk("blk_cleanup_queue: leaked requests (%d)\n", count);
+
+       memset(q, 0, sizeof(*q));
+}
+
+/**
+ * blk_queue_headactive - indicate whether head of request queue may be active
+ * @q:       The queue which this applies to.
+ * @active:  A flag indication where the head of the queue is active.
+ *
+ * Description:
+ *    The driver for a block device may choose to leave the currently active
+ *    request on the request queue, removing it only when it has completed.
+ *    The queue handling routines assume this by default for safety reasons
+ *    and will not involve the head of the request queue in any merging or
+ *    reordering of requests when the queue is unplugged (and thus may be
+ *    working on this particular request).
+ *
+ *    If a driver removes requests from the queue before processing them, then
+ *    it may indicate that it does so, there by allowing the head of the queue
+ *    to be involved in merging and reordering.  This is done be calling
+ *    blk_queue_headactive() with an @active flag of %0.
+ *
+ *    If a driver processes several requests at once, it must remove them (or
+ *    at least all but one of them) from the request queue.
+ *
+ *    When a queue is plugged the head will be assumed to be inactive.
+ **/
+void blk_queue_headactive(request_queue_t * q, int active)
+{
+       q->head_active = active;
+}
+
+/**
+ * blk_queue_make_request - define an alternate make_request function for a device
+ * @q:  the request queue for the device to be affected
+ * @mfn: the alternate make_request function
+ *
+ * Description:
+ *    The normal way for &struct buffer_heads to be passed to a device
+ *    driver is for them to be collected into requests on a request
+ *    queue, and then to allow the device driver to select requests
+ *    off that queue when it is ready.  This works well for many block
+ *    devices. However some block devices (typically virtual devices
+ *    such as md or lvm) do not benefit from the processing on the
+ *    request queue, and are served best by having the requests passed
+ *    directly to them.  This can be achieved by providing a function
+ *    to blk_queue_make_request().
+ *
+ * Caveat:
+ *    The driver that does this *must* be able to deal appropriately
+ *    with buffers in "highmemory", either by calling bh_kmap() to get
+ *    a kernel mapping, to by calling create_bounce() to create a
+ *    buffer in normal memory.
+ **/
+
+void blk_queue_make_request(request_queue_t * q, make_request_fn * mfn)
+{
+       q->make_request_fn = mfn;
+}
+
+static inline int ll_new_segment(request_queue_t *q, struct request *req, int max_segments)
+{
+       if (req->nr_segments < max_segments) {
+               req->nr_segments++;
+               return 1;
+       }
+       return 0;
+}
+
+static int ll_back_merge_fn(request_queue_t *q, struct request *req, 
+                           struct buffer_head *bh, int max_segments)
+{
+       if (req->bhtail->b_data + req->bhtail->b_size == bh->b_data)
+               return 1;
+       return ll_new_segment(q, req, max_segments);
+}
+
+static int ll_front_merge_fn(request_queue_t *q, struct request *req, 
+                            struct buffer_head *bh, int max_segments)
+{
+       if (bh->b_data + bh->b_size == req->bh->b_data)
+               return 1;
+       return ll_new_segment(q, req, max_segments);
+}
+
+static int ll_merge_requests_fn(request_queue_t *q, struct request *req,
+                               struct request *next, int max_segments)
+{
+       int total_segments = req->nr_segments + next->nr_segments;
+
+       if (req->bhtail->b_data + req->bhtail->b_size == next->bh->b_data)
+               total_segments--;
+    
+       if (total_segments > max_segments)
+               return 0;
+
+       req->nr_segments = total_segments;
+       return 1;
+}
+
+/*
+ * "plug" the device if there are no outstanding requests: this will
+ * force the transfer to start only after we have put all the requests
+ * on the list.
+ *
+ * This is called with interrupts off and no requests on the queue.
+ * (and with the request spinlock acquired)
+ */
+static void generic_plug_device(request_queue_t *q, kdev_t dev)
+{
+       /*
+        * no need to replug device
+        */
+       if (!list_empty(&q->queue_head) || q->plugged)
+               return;
+
+       q->plugged = 1;
+       queue_task(&q->plug_tq, &tq_disk);
+}
+
+/*
+ * remove the plug and let it rip..
+ */
+static inline void __generic_unplug_device(request_queue_t *q)
+{
+       if (q->plugged) {
+               q->plugged = 0;
+               if (!list_empty(&q->queue_head))
+                       q->request_fn(q);
+       }
+}
+
+void generic_unplug_device(void *data)
+{
+       request_queue_t *q = (request_queue_t *) data;
+       unsigned long flags;
+
+       spin_lock_irqsave(&io_request_lock, flags);
+       __generic_unplug_device(q);
+       spin_unlock_irqrestore(&io_request_lock, flags);
+}
+
+static void blk_init_free_list(request_queue_t *q)
+{
+       struct request *rq;
+       int i;
+
+       INIT_LIST_HEAD(&q->rq[READ].free);
+       INIT_LIST_HEAD(&q->rq[WRITE].free);
+       q->rq[READ].count = 0;
+       q->rq[WRITE].count = 0;
+
+       /*
+        * Divide requests in half between read and write
+        */
+       for (i = 0; i < queue_nr_requests; i++) {
+               rq = kmem_cache_alloc(request_cachep, SLAB_KERNEL);
+               if (rq == NULL) {
+                       /* We'll get a `leaked requests' message from blk_cleanup_queue */
+                       printk(KERN_EMERG "blk_init_free_list: error allocating requests\n");
+                       break;
+               }
+               memset(rq, 0, sizeof(struct request));
+               rq->rq_status = RQ_INACTIVE;
+               list_add(&rq->queue, &q->rq[i&1].free);
+               q->rq[i&1].count++;
+       }
+
+       init_waitqueue_head(&q->wait_for_request);
+       spin_lock_init(&q->queue_lock);
+}
+
+static int __make_request(request_queue_t * q, int rw, struct buffer_head * bh);
+
+/**
+ * blk_init_queue  - prepare a request queue for use with a block device
+ * @q:    The &request_queue_t to be initialised
+ * @rfn:  The function to be called to process requests that have been
+ *        placed on the queue.
+ *
+ * Description:
+ *    If a block device wishes to use the standard request handling procedures,
+ *    which sorts requests and coalesces adjacent requests, then it must
+ *    call blk_init_queue().  The function @rfn will be called when there
+ *    are requests on the queue that need to be processed.  If the device
+ *    supports plugging, then @rfn may not be called immediately when requests
+ *    are available on the queue, but may be called at some time later instead.
+ *    Plugged queues are generally unplugged when a buffer belonging to one
+ *    of the requests on the queue is needed, or due to memory pressure.
+ *
+ *    @rfn is not required, or even expected, to remove all requests off the
+ *    queue, but only as many as it can handle at a time.  If it does leave
+ *    requests on the queue, it is responsible for arranging that the requests
+ *    get dealt with eventually.
+ *
+ *    A global spin lock $io_request_lock must be held while manipulating the
+ *    requests on the request queue.
+ *
+ *    The request on the head of the queue is by default assumed to be
+ *    potentially active, and it is not considered for re-ordering or merging
+ *    whenever the given queue is unplugged. This behaviour can be changed with
+ *    blk_queue_headactive().
+ *
+ * Note:
+ *    blk_init_queue() must be paired with a blk_cleanup_queue() call
+ *    when the block device is deactivated (such as at module unload).
+ **/
+void blk_init_queue(request_queue_t * q, request_fn_proc * rfn)
+{
+       INIT_LIST_HEAD(&q->queue_head);
+       elevator_init(&q->elevator, ELEVATOR_LINUS);
+       blk_init_free_list(q);
+       q->request_fn           = rfn;
+       q->back_merge_fn        = ll_back_merge_fn;
+       q->front_merge_fn       = ll_front_merge_fn;
+       q->merge_requests_fn    = ll_merge_requests_fn;
+       q->make_request_fn      = __make_request;
+       q->plug_tq.sync         = 0;
+       q->plug_tq.routine      = &generic_unplug_device;
+       q->plug_tq.data         = q;
+       q->plugged              = 0;
+       /*
+        * These booleans describe the queue properties.  We set the
+        * default (and most common) values here.  Other drivers can
+        * use the appropriate functions to alter the queue properties.
+        * as appropriate.
+        */
+       q->plug_device_fn       = generic_plug_device;
+       q->head_active          = 1;
+}
+
+#define blkdev_free_rq(list) list_entry((list)->next, struct request, queue);
+/*
+ * Get a free request. io_request_lock must be held and interrupts
+ * disabled on the way in.
+ */
+static inline struct request *get_request(request_queue_t *q, int rw)
+{
+       struct request *rq = NULL;
+       struct request_list *rl = q->rq + rw;
+
+       if (!list_empty(&rl->free)) {
+               rq = blkdev_free_rq(&rl->free);
+               list_del(&rq->queue);
+               rl->count--;
+               rq->rq_status = RQ_ACTIVE;
+               rq->special = NULL;
+               rq->q = q;
+       }
+
+       return rq;
+}
+
+/*
+ * No available requests for this queue, unplug the device.
+ */
+static struct request *__get_request_wait(request_queue_t *q, int rw)
+{
+       register struct request *rq;
+       DECLARE_WAITQUEUE(wait, current);
+
+       generic_unplug_device(q);
+       add_wait_queue(&q->wait_for_request, &wait);
+       do {
+               set_current_state(TASK_UNINTERRUPTIBLE);
+               if (q->rq[rw].count < batch_requests)
+                       schedule();
+               spin_lock_irq(&io_request_lock);
+               rq = get_request(q,rw);
+               spin_unlock_irq(&io_request_lock);
+       } while (rq == NULL);
+       remove_wait_queue(&q->wait_for_request, &wait);
+       current->state = TASK_RUNNING;
+       return rq;
+}
+
+static inline struct request *get_request_wait(request_queue_t *q, int rw)
+{
+       register struct request *rq;
+
+       spin_lock_irq(&io_request_lock);
+       rq = get_request(q, rw);
+       spin_unlock_irq(&io_request_lock);
+       if (rq)
+               return rq;
+       return __get_request_wait(q, rw);
+}
+
+/* RO fail safe mechanism */
+
+static long ro_bits[MAX_BLKDEV][8];
+
+int is_read_only(kdev_t dev)
+{
+       int minor,major;
+
+       major = MAJOR(dev);
+       minor = MINOR(dev);
+       if (major < 0 || major >= MAX_BLKDEV) return 0;
+       return ro_bits[major][minor >> 5] & (1 << (minor & 31));
+}
+
+void set_device_ro(kdev_t dev,int flag)
+{
+       int minor,major;
+
+       major = MAJOR(dev);
+       minor = MINOR(dev);
+       if (major < 0 || major >= MAX_BLKDEV) return;
+       if (flag) ro_bits[major][minor >> 5] |= 1 << (minor & 31);
+       else ro_bits[major][minor >> 5] &= ~(1 << (minor & 31));
+}
+
+inline void drive_stat_acct (kdev_t dev, int rw,
+                               unsigned long nr_sectors, int new_io)
+{
+       unsigned int major = MAJOR(dev);
+       unsigned int index;
+
+       index = disk_index(dev);
+       if ((index >= DK_MAX_DISK) || (major >= DK_MAX_MAJOR))
+               return;
+
+       kstat.dk_drive[major][index] += new_io;
+       if (rw == READ) {
+               kstat.dk_drive_rio[major][index] += new_io;
+               kstat.dk_drive_rblk[major][index] += nr_sectors;
+       } else if (rw == WRITE) {
+               kstat.dk_drive_wio[major][index] += new_io;
+               kstat.dk_drive_wblk[major][index] += nr_sectors;
+       } else
+               printk(KERN_ERR "drive_stat_acct: cmd not R/W?\n");
+}
+
+/*
+ * add-request adds a request to the linked list.
+ * io_request_lock is held and interrupts disabled, as we muck with the
+ * request queue list.
+ *
+ * By this point, req->cmd is always either READ/WRITE, never READA,
+ * which is important for drive_stat_acct() above.
+ */
+static inline void add_request(request_queue_t * q, struct request * req,
+                              struct list_head *insert_here)
+{
+       drive_stat_acct(req->rq_dev, req->cmd, req->nr_sectors, 1);
+
+       if (!q->plugged && q->head_active && insert_here == &q->queue_head) {
+               spin_unlock_irq(&io_request_lock);
+               BUG();
+       }
+
+       /*
+        * elevator indicated where it wants this request to be
+        * inserted at elevator_merge time
+        */
+       list_add(&req->queue, insert_here);
+}
+
+/*
+ * Must be called with io_request_lock held and interrupts disabled
+ */
+inline void blkdev_release_request(struct request *req)
+{
+       request_queue_t *q = req->q;
+       int rw = req->cmd;
+
+       req->rq_status = RQ_INACTIVE;
+       req->q = NULL;
+
+       /*
+        * Request may not have originated from ll_rw_blk. if not,
+        * assume it has free buffers and check waiters
+        */
+       if (q) {
+               list_add(&req->queue, &q->rq[rw].free);
+               if (++q->rq[rw].count >= batch_requests && waitqueue_active(&q->wait_for_request))
+                       wake_up(&q->wait_for_request);
+       }
+}
+
+/*
+ * Has to be called with the request spinlock acquired
+ */
+static void attempt_merge(request_queue_t * q,
+                         struct request *req,
+                         int max_sectors,
+                         int max_segments)
+{
+       struct request *next;
+  
+       next = blkdev_next_request(req);
+       if (req->sector + req->nr_sectors != next->sector)
+               return;
+       if (req->cmd != next->cmd
+           || req->rq_dev != next->rq_dev
+           || req->nr_sectors + next->nr_sectors > max_sectors
+           || next->waiting)
+               return;
+       /*
+        * If we are not allowed to merge these requests, then
+        * return.  If we are allowed to merge, then the count
+        * will have been updated to the appropriate number,
+        * and we shouldn't do it here too.
+        */
+       if (!q->merge_requests_fn(q, req, next, max_segments))
+               return;
+
+       q->elevator.elevator_merge_req_fn(req, next);
+       req->bhtail->b_reqnext = next->bh;
+       req->bhtail = next->bhtail;
+       req->nr_sectors = req->hard_nr_sectors += next->hard_nr_sectors;
+       list_del(&next->queue);
+       blkdev_release_request(next);
+}
+
+static inline void attempt_back_merge(request_queue_t * q,
+                                     struct request *req,
+                                     int max_sectors,
+                                     int max_segments)
+{
+       if (&req->queue == q->queue_head.prev)
+               return;
+       attempt_merge(q, req, max_sectors, max_segments);
+}
+
+static inline void attempt_front_merge(request_queue_t * q,
+                                      struct list_head * head,
+                                      struct request *req,
+                                      int max_sectors,
+                                      int max_segments)
+{
+       struct list_head * prev;
+
+       prev = req->queue.prev;
+       if (head == prev)
+               return;
+       attempt_merge(q, blkdev_entry_to_request(prev), max_sectors, max_segments);
+}
+
+static int __make_request(request_queue_t * q, int rw,
+                                 struct buffer_head * bh)
+{
+       unsigned int sector, count;
+       int max_segments = MAX_SEGMENTS;
+       struct request * req, *freereq = NULL;
+       int rw_ahead, max_sectors, el_ret;
+       struct list_head *head, *insert_here;
+       int latency;
+       elevator_t *elevator = &q->elevator;
+
+       count = bh->b_size >> 9;
+       sector = bh->b_rsector;
+
+       rw_ahead = 0;   /* normal case; gets changed below for READA */
+       switch (rw) {
+               case READA:
+                       rw_ahead = 1;
+                       rw = READ;      /* drop into READ */
+               case READ:
+               case WRITE:
+                       latency = elevator_request_latency(elevator, rw);
+                       break;
+               default:
+                       BUG();
+                       goto end_io;
+       }
+
+       /* We'd better have a real physical mapping!
+          Check this bit only if the buffer was dirty and just locked
+          down by us so at this point flushpage will block and
+          won't clear the mapped bit under us. */
+       if (!buffer_mapped(bh))
+               BUG();
+
+       /*
+        * Temporary solution - in 2.5 this will be done by the lowlevel
+        * driver. Create a bounce buffer if the buffer data points into
+        * high memory - keep the original buffer otherwise.
+        */
+#if CONFIG_HIGHMEM
+       bh = create_bounce(rw, bh);
+#endif
+
+/* look for a free request. */
+       /*
+        * Try to coalesce the new request with old requests
+        */
+       max_sectors = get_max_sectors(bh->b_rdev);
+
+again:
+       req = NULL;
+       head = &q->queue_head;
+       /*
+        * Now we acquire the request spinlock, we have to be mega careful
+        * not to schedule or do something nonatomic
+        */
+       spin_lock_irq(&io_request_lock);
+
+       insert_here = head->prev;
+       if (list_empty(head)) {
+               q->plug_device_fn(q, bh->b_rdev); /* is atomic */
+               goto get_rq;
+       } else if (q->head_active && !q->plugged)
+               head = head->next;
+
+       el_ret = elevator->elevator_merge_fn(q, &req, head, bh, rw,max_sectors);
+       switch (el_ret) {
+
+               case ELEVATOR_BACK_MERGE:
+                       if (!q->back_merge_fn(q, req, bh, max_segments))
+                               break;
+                       elevator->elevator_merge_cleanup_fn(q, req, count);
+                       req->bhtail->b_reqnext = bh;
+                       req->bhtail = bh;
+                       req->nr_sectors = req->hard_nr_sectors += count;
+                       blk_started_io(count);
+                       drive_stat_acct(req->rq_dev, req->cmd, count, 0);
+                       attempt_back_merge(q, req, max_sectors, max_segments);
+                       goto out;
+
+               case ELEVATOR_FRONT_MERGE:
+                       if (!q->front_merge_fn(q, req, bh, max_segments))
+                               break;
+                       elevator->elevator_merge_cleanup_fn(q, req, count);
+                       bh->b_reqnext = req->bh;
+                       req->bh = bh;
+                       req->buffer = bh->b_data;
+                       req->current_nr_sectors = count;
+                       req->sector = req->hard_sector = sector;
+                       req->nr_sectors = req->hard_nr_sectors += count;
+                       blk_started_io(count);
+                       drive_stat_acct(req->rq_dev, req->cmd, count, 0);
+                       attempt_front_merge(q, head, req, max_sectors, max_segments);
+                       goto out;
+
+               /*
+                * elevator says don't/can't merge. get new request
+                */
+               case ELEVATOR_NO_MERGE:
+                       /*
+                        * use elevator hints as to where to insert the
+                        * request. if no hints, just add it to the back
+                        * of the queue
+                        */
+                       if (req)
+                               insert_here = &req->queue;
+                       break;
+
+               default:
+                       printk("elevator returned crap (%d)\n", el_ret);
+                       BUG();
+       }
+               
+       /*
+        * Grab a free request from the freelist - if that is empty, check
+        * if we are doing read ahead and abort instead of blocking for
+        * a free slot.
+        */
+get_rq:
+       if (freereq) {
+               req = freereq;
+               freereq = NULL;
+       } else if ((req = get_request(q, rw)) == NULL) {
+               spin_unlock_irq(&io_request_lock);
+               if (rw_ahead)
+                       goto end_io;
+
+               freereq = __get_request_wait(q, rw);
+               goto again;
+       }
+
+/* fill up the request-info, and add it to the queue */
+       req->elevator_sequence = latency;
+       req->cmd = rw;
+       req->errors = 0;
+       req->hard_sector = req->sector = sector;
+       req->hard_nr_sectors = req->nr_sectors = count;
+       req->current_nr_sectors = count;
+       req->nr_segments = 1; /* Always 1 for a new request. */
+       req->nr_hw_segments = 1; /* Always 1 for a new request. */
+       req->buffer = bh->b_data;
+       req->waiting = NULL;
+       req->bh = bh;
+       req->bhtail = bh;
+       req->rq_dev = bh->b_rdev;
+       blk_started_io(count);
+       add_request(q, req, insert_here);
+out:
+       if (freereq)
+               blkdev_release_request(freereq);
+       spin_unlock_irq(&io_request_lock);
+       return 0;
+end_io:
+       bh->b_end_io(bh, test_bit(BH_Uptodate, &bh->b_state));
+       return 0;
+}
+
+/**
+ * generic_make_request: hand a buffer head to it's device driver for I/O
+ * @rw:  READ, WRITE, or READA - what sort of I/O is desired.
+ * @bh:  The buffer head describing the location in memory and on the device.
+ *
+ * generic_make_request() is used to make I/O requests of block
+ * devices. It is passed a &struct buffer_head and a &rw value.  The
+ * %READ and %WRITE options are (hopefully) obvious in meaning.  The
+ * %READA value means that a read is required, but that the driver is
+ * free to fail the request if, for example, it cannot get needed
+ * resources immediately.
+ *
+ * generic_make_request() does not return any status.  The
+ * success/failure status of the request, along with notification of
+ * completion, is delivered asynchronously through the bh->b_end_io
+ * function described (one day) else where.
+ *
+ * The caller of generic_make_request must make sure that b_page,
+ * b_addr, b_size are set to describe the memory buffer, that b_rdev
+ * and b_rsector are set to describe the device address, and the
+ * b_end_io and optionally b_private are set to describe how
+ * completion notification should be signaled.  BH_Mapped should also
+ * be set (to confirm that b_dev and b_blocknr are valid).
+ *
+ * generic_make_request and the drivers it calls may use b_reqnext,
+ * and may change b_rdev and b_rsector.  So the values of these fields
+ * should NOT be depended on after the call to generic_make_request.
+ * Because of this, the caller should record the device address
+ * information in b_dev and b_blocknr.
+ *
+ * Apart from those fields mentioned above, no other fields, and in
+ * particular, no other flags, are changed by generic_make_request or
+ * any lower level drivers.
+ * */
+void generic_make_request (int rw, struct buffer_head * bh)
+{
+       int major = MAJOR(bh->b_rdev);
+       int minorsize = 0;
+       request_queue_t *q;
+
+       if (!bh->b_end_io)
+               BUG();
+
+       /* Test device size, when known. */
+       if (blk_size[major])
+               minorsize = blk_size[major][MINOR(bh->b_rdev)];
+       if (minorsize) {
+               unsigned long maxsector = (minorsize << 1) + 1;
+               unsigned long sector = bh->b_rsector;
+               unsigned int count = bh->b_size >> 9;
+
+               if (maxsector < count || maxsector - count < sector) {
+                       /* Yecch */
+                       bh->b_state &= (1 << BH_Lock) | (1 << BH_Mapped);
+
+                       /* This may well happen - the kernel calls bread()
+                          without checking the size of the device, e.g.,
+                          when mounting a device. */
+                       printk(KERN_INFO
+                              "attempt to access beyond end of device\n");
+                       printk(KERN_INFO "%s: rw=%d, want=%ld, limit=%d\n",
+                              kdevname(bh->b_rdev), rw,
+                              (sector + count)>>1, minorsize);
+
+                       /* Yecch again */
+                       bh->b_end_io(bh, 0);
+                       return;
+               }
+       }
+
+       /*
+        * Resolve the mapping until finished. (drivers are
+        * still free to implement/resolve their own stacking
+        * by explicitly returning 0)
+        */
+       /* NOTE: we don't repeat the blk_size check for each new device.
+        * Stacking drivers are expected to know what they are doing.
+        */
+       do {
+               q = blk_get_queue(bh->b_rdev);
+               if (!q) {
+                       printk(KERN_ERR
+                              "generic_make_request: Trying to access "
+                              "nonexistent block-device %s (%ld)\n",
+                              kdevname(bh->b_rdev), bh->b_rsector);
+                       buffer_IO_error(bh);
+                       break;
+               }
+       } while (q->make_request_fn(q, rw, bh));
+}
+
+
+/**
+ * submit_bh: submit a buffer_head to the block device later for I/O
+ * @rw: whether to %READ or %WRITE, or maybe to %READA (read ahead)
+ * @bh: The &struct buffer_head which describes the I/O
+ *
+ * submit_bh() is very similar in purpose to generic_make_request(), and
+ * uses that function to do most of the work.
+ *
+ * The extra functionality provided by submit_bh is to determine
+ * b_rsector from b_blocknr and b_size, and to set b_rdev from b_dev.
+ * This is is appropriate for IO requests that come from the buffer
+ * cache and page cache which (currently) always use aligned blocks.
+ */
+void submit_bh(int rw, struct buffer_head * bh)
+{
+       int count = bh->b_size >> 9;
+
+       if (!test_bit(BH_Lock, &bh->b_state))
+               BUG();
+
+       set_bit(BH_Req, &bh->b_state);
+
+       /*
+        * First step, 'identity mapping' - RAID or LVM might
+        * further remap this.
+        */
+       bh->b_rdev = bh->b_dev;
+       bh->b_rsector = bh->b_blocknr * count;
+
+       generic_make_request(rw, bh);
+
+       switch (rw) {
+               case WRITE:
+                       kstat.pgpgout += count;
+                       break;
+               default:
+                       kstat.pgpgin += count;
+                       break;
+       }
+}
+
+/**
+ * ll_rw_block: low-level access to block devices
+ * @rw: whether to %READ or %WRITE or maybe %READA (readahead)
+ * @nr: number of &struct buffer_heads in the array
+ * @bhs: array of pointers to &struct buffer_head
+ *
+ * ll_rw_block() takes an array of pointers to &struct buffer_heads,
+ * and requests an I/O operation on them, either a %READ or a %WRITE.
+ * The third %READA option is described in the documentation for
+ * generic_make_request() which ll_rw_block() calls.
+ *
+ * This function provides extra functionality that is not in
+ * generic_make_request() that is relevant to buffers in the buffer
+ * cache or page cache.  In particular it drops any buffer that it
+ * cannot get a lock on (with the BH_Lock state bit), any buffer that
+ * appears to be clean when doing a write request, and any buffer that
+ * appears to be up-to-date when doing read request.  Further it marks
+ * as clean buffers that are processed for writing (the buffer cache
+ * wont assume that they are actually clean until the buffer gets
+ * unlocked).
+ *
+ * ll_rw_block sets b_end_io to simple completion handler that marks
+ * the buffer up-to-date (if approriate), unlocks the buffer and wakes
+ * any waiters.  As client that needs a more interesting completion
+ * routine should call submit_bh() (or generic_make_request())
+ * directly.
+ *
+ * Caveat:
+ *  All of the buffers must be for the same device, and must also be
+ *  of the current approved size for the device.  */
+
+void ll_rw_block(int rw, int nr, struct buffer_head * bhs[])
+{
+       unsigned int major;
+       int correct_size;
+       int i;
+
+       if (!nr)
+               return;
+
+       major = MAJOR(bhs[0]->b_dev);
+
+       /* Determine correct block size for this device. */
+       correct_size = get_hardsect_size(bhs[0]->b_dev);
+
+       /* Verify requested block sizes. */
+       for (i = 0; i < nr; i++) {
+               struct buffer_head *bh = bhs[i];
+               if (bh->b_size % correct_size) {
+                       printk(KERN_NOTICE "ll_rw_block: device %s: "
+                              "only %d-char blocks implemented (%u)\n",
+                              kdevname(bhs[0]->b_dev),
+                              correct_size, bh->b_size);
+                       goto sorry;
+               }
+       }
+
+       if ((rw & WRITE) && is_read_only(bhs[0]->b_dev)) {
+               printk(KERN_NOTICE "Can't write to read-only device %s\n",
+                      kdevname(bhs[0]->b_dev));
+               goto sorry;
+       }
+
+       for (i = 0; i < nr; i++) {
+               struct buffer_head *bh = bhs[i];
+
+               /* Only one thread can actually submit the I/O. */
+               if (test_and_set_bit(BH_Lock, &bh->b_state))
+                       continue;
+
+               /* We have the buffer lock */
+               atomic_inc(&bh->b_count);
+               bh->b_end_io = end_buffer_io_sync;
+
+               switch(rw) {
+               case WRITE:
+                       if (!atomic_set_buffer_clean(bh))
+                               /* Hmmph! Nothing to write */
+                               goto end_io;
+                       __mark_buffer_clean(bh);
+                       break;
+
+               case READA:
+               case READ:
+                       if (buffer_uptodate(bh))
+                               /* Hmmph! Already have it */
+                               goto end_io;
+                       break;
+               default:
+                       BUG();
+       end_io:
+                       bh->b_end_io(bh, test_bit(BH_Uptodate, &bh->b_state));
+                       continue;
+               }
+
+               submit_bh(rw, bh);
+       }
+       return;
+
+sorry:
+       /* Make sure we don't get infinite dirty retries.. */
+       for (i = 0; i < nr; i++)
+               mark_buffer_clean(bhs[i]);
+}
+
+#ifdef CONFIG_STRAM_SWAP
+extern int stram_device_init (void);
+#endif
+
+
+/**
+ * end_that_request_first - end I/O on one buffer.
+ * @req:      the request being processed
+ * @uptodate: 0 for I/O error
+ * @name:     the name printed for an I/O error
+ *
+ * Description:
+ *     Ends I/O on the first buffer attached to @req, and sets it up
+ *     for the next buffer_head (if any) in the cluster.
+ *     
+ * Return:
+ *     0 - we are done with this request, call end_that_request_last()
+ *     1 - still buffers pending for this request
+ *
+ * Caveat: 
+ *     Drivers implementing their own end_request handling must call
+ *     blk_finished_io() appropriately.
+ **/
+
+int end_that_request_first (struct request *req, int uptodate, char *name)
+{
+       struct buffer_head * bh;
+       int nsect;
+
+       req->errors = 0;
+       if (!uptodate)
+               printk("end_request: I/O error, dev %s (%s), sector %lu\n",
+                       kdevname(req->rq_dev), name, req->sector);
+
+       if ((bh = req->bh) != NULL) {
+               nsect = bh->b_size >> 9;
+               blk_finished_io(nsect);
+               req->bh = bh->b_reqnext;
+               bh->b_reqnext = NULL;
+               bh->b_end_io(bh, uptodate);
+               if ((bh = req->bh) != NULL) {
+                       req->hard_sector += nsect;
+                       req->hard_nr_sectors -= nsect;
+                       req->sector = req->hard_sector;
+                       req->nr_sectors = req->hard_nr_sectors;
+
+                       req->current_nr_sectors = bh->b_size >> 9;
+                       if (req->nr_sectors < req->current_nr_sectors) {
+                               req->nr_sectors = req->current_nr_sectors;
+                               printk("end_request: buffer-list destroyed\n");
+                       }
+                       req->buffer = bh->b_data;
+                       return 1;
+               }
+       }
+       return 0;
+}
+
+void end_that_request_last(struct request *req)
+{
+       if (req->waiting != NULL)
+               complete(req->waiting);
+
+       blkdev_release_request(req);
+}
+
+#define MB(kb) ((kb) << 10)
+
+int __init blk_dev_init(void)
+{
+       struct blk_dev_struct *dev;
+       int total_ram;
+
+       request_cachep = kmem_cache_create("blkdev_requests",
+                                          sizeof(struct request),
+                                          0, SLAB_HWCACHE_ALIGN, NULL, NULL);
+
+       if (!request_cachep)
+               panic("Can't create request pool slab cache\n");
+
+       for (dev = blk_dev + MAX_BLKDEV; dev-- != blk_dev;)
+               dev->queue = NULL;
+
+       memset(ro_bits,0,sizeof(ro_bits));
+       memset(max_readahead, 0, sizeof(max_readahead));
+       memset(max_sectors, 0, sizeof(max_sectors));
+
+       total_ram = nr_free_pages() << (PAGE_SHIFT - 10);
+
+       /*
+        * Free request slots per queue.
+        * (Half for reads, half for writes)
+        */
+       queue_nr_requests = 64;
+       if (total_ram > MB(32))
+               queue_nr_requests = 128;
+
+       /*
+        * Batch frees according to queue length
+        */
+       batch_requests = queue_nr_requests/4;
+       printk("block: %d slots per queue, batch=%d\n", queue_nr_requests, batch_requests);
+
+#ifdef CONFIG_AMIGA_Z2RAM
+       z2_init();
+#endif
+#ifdef CONFIG_STRAM_SWAP
+       stram_device_init();
+#endif
+#ifdef CONFIG_BLK_DEV_RAM
+       rd_init();
+#endif
+#ifdef CONFIG_ISP16_CDI
+       isp16_init();
+#endif
+#if defined(CONFIG_IDE) && defined(CONFIG_BLK_DEV_IDE)
+       ide_init();             /* this MUST precede hd_init */
+#endif
+#if defined(CONFIG_IDE) && defined(CONFIG_BLK_DEV_HD)
+       hd_init();
+#endif
+#ifdef CONFIG_BLK_DEV_PS2
+       ps2esdi_init();
+#endif
+#ifdef CONFIG_BLK_DEV_XD
+       xd_init();
+#endif
+#ifdef CONFIG_BLK_DEV_MFM
+       mfm_init();
+#endif
+#ifdef CONFIG_PARIDE
+       { extern void paride_init(void); paride_init(); };
+#endif
+#ifdef CONFIG_MAC_FLOPPY
+       swim3_init();
+#endif
+#ifdef CONFIG_BLK_DEV_SWIM_IOP
+       swimiop_init();
+#endif
+#ifdef CONFIG_AMIGA_FLOPPY
+       amiga_floppy_init();
+#endif
+#ifdef CONFIG_ATARI_FLOPPY
+       atari_floppy_init();
+#endif
+#ifdef CONFIG_BLK_DEV_FD
+       floppy_init();
+#else
+#if defined(__i386__) && !defined(CONFIG_XENO) /* Do we even need this? */
+       outb_p(0xc, 0x3f2);
+#endif
+#endif
+#ifdef CONFIG_CDU31A
+       cdu31a_init();
+#endif
+#ifdef CONFIG_ATARI_ACSI
+       acsi_init();
+#endif
+#ifdef CONFIG_MCD
+       mcd_init();
+#endif
+#ifdef CONFIG_MCDX
+       mcdx_init();
+#endif
+#ifdef CONFIG_SBPCD
+       sbpcd_init();
+#endif
+#ifdef CONFIG_AZTCD
+       aztcd_init();
+#endif
+#ifdef CONFIG_CDU535
+       sony535_init();
+#endif
+#ifdef CONFIG_GSCD
+       gscd_init();
+#endif
+#ifdef CONFIG_CM206
+       cm206_init();
+#endif
+#ifdef CONFIG_OPTCD
+       optcd_init();
+#endif
+#ifdef CONFIG_SJCD
+       sjcd_init();
+#endif
+#ifdef CONFIG_APBLOCK
+       ap_init();
+#endif
+#ifdef CONFIG_DDV
+       ddv_init();
+#endif
+#ifdef CONFIG_MDISK
+       mdisk_init();
+#endif
+#ifdef CONFIG_DASD
+       dasd_init();
+#endif
+#if defined(CONFIG_S390_TAPE) && defined(CONFIG_S390_TAPE_BLOCK)
+       tapeblock_init();
+#endif
+#ifdef CONFIG_BLK_DEV_XPRAM
+        xpram_init();
+#endif
+
+#ifdef CONFIG_SUN_JSFLASH
+       jsfd_init();
+#endif
+       return 0;
+};
+
+EXPORT_SYMBOL(io_request_lock);
+EXPORT_SYMBOL(end_that_request_first);
+EXPORT_SYMBOL(end_that_request_last);
+EXPORT_SYMBOL(blk_init_queue);
+EXPORT_SYMBOL(blk_get_queue);
+EXPORT_SYMBOL(blk_cleanup_queue);
+EXPORT_SYMBOL(blk_queue_headactive);
+EXPORT_SYMBOL(blk_queue_make_request);
+EXPORT_SYMBOL(generic_make_request);
+EXPORT_SYMBOL(blkdev_release_request);
+EXPORT_SYMBOL(generic_unplug_device);
diff --git a/xenolinux-2.4.16-sparse/drivers/block/rd.c b/xenolinux-2.4.16-sparse/drivers/block/rd.c
new file mode 100644 (file)
index 0000000..ffcc9d3
--- /dev/null
@@ -0,0 +1,1009 @@
+/*
+ * ramdisk.c - Multiple RAM disk driver - gzip-loading version - v. 0.8 beta.
+ * 
+ * (C) Chad Page, Theodore Ts'o, et. al, 1995. 
+ *
+ * This RAM disk is designed to have filesystems created on it and mounted
+ * just like a regular floppy disk.  
+ *  
+ * It also does something suggested by Linus: use the buffer cache as the
+ * RAM disk data.  This makes it possible to dynamically allocate the RAM disk
+ * buffer - with some consequences I have to deal with as I write this. 
+ * 
+ * This code is based on the original ramdisk.c, written mostly by
+ * Theodore Ts'o (TYT) in 1991.  The code was largely rewritten by
+ * Chad Page to use the buffer cache to store the RAM disk data in
+ * 1995; Theodore then took over the driver again, and cleaned it up
+ * for inclusion in the mainline kernel.
+ *
+ * The original CRAMDISK code was written by Richard Lyons, and
+ * adapted by Chad Page to use the new RAM disk interface.  Theodore
+ * Ts'o rewrote it so that both the compressed RAM disk loader and the
+ * kernel decompressor uses the same inflate.c codebase.  The RAM disk
+ * loader now also loads into a dynamic (buffer cache based) RAM disk,
+ * not the old static RAM disk.  Support for the old static RAM disk has
+ * been completely removed.
+ *
+ * Loadable module support added by Tom Dyas.
+ *
+ * Further cleanups by Chad Page (page0588@sundance.sjsu.edu):
+ *     Cosmetic changes in #ifdef MODULE, code movement, etc.
+ *     When the RAM disk module is removed, free the protected buffers
+ *     Default RAM disk size changed to 2.88 MB
+ *
+ *  Added initrd: Werner Almesberger & Hans Lermen, Feb '96
+ *
+ * 4/25/96 : Made RAM disk size a parameter (default is now 4 MB) 
+ *             - Chad Page
+ *
+ * Add support for fs images split across >1 disk, Paul Gortmaker, Mar '98
+ *
+ * Make block size and block size shift for RAM disks a global macro
+ * and set blk_size for -ENOSPC,     Werner Fink <werner@suse.de>, Apr '99
+ */
+
+#include <linux/config.h>
+#include <linux/sched.h>
+#include <linux/minix_fs.h>
+#include <linux/ext2_fs.h>
+#include <linux/romfs_fs.h>
+#include <linux/fs.h>
+#include <linux/kernel.h>
+#include <linux/hdreg.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/mman.h>
+#include <linux/slab.h>
+#include <linux/ioctl.h>
+#include <linux/fd.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/devfs_fs_kernel.h>
+#include <linux/smp_lock.h>
+
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <asm/byteorder.h>
+
+extern void wait_for_keypress(void);
+
+/*
+ * 35 has been officially registered as the RAMDISK major number, but
+ * so is the original MAJOR number of 1.  We're using 1 in
+ * include/linux/major.h for now
+ */
+#define MAJOR_NR RAMDISK_MAJOR
+#include <linux/blk.h>
+#include <linux/blkpg.h>
+
+/* The RAM disk size is now a parameter */
+#define NUM_RAMDISKS 16                /* This cannot be overridden (yet) */ 
+
+#ifndef MODULE
+/* We don't have to load RAM disks or gunzip them in a module. */
+#define RD_LOADER
+#define BUILD_CRAMDISK
+
+void rd_load(void);
+static int crd_load(struct file *fp, struct file *outfp);
+
+#ifdef CONFIG_BLK_DEV_INITRD
+static int initrd_users;
+#endif
+#endif
+
+/* Various static variables go here.  Most are used only in the RAM disk code.
+ */
+
+static unsigned long rd_length[NUM_RAMDISKS];  /* Size of RAM disks in bytes   */
+static int rd_hardsec[NUM_RAMDISKS];           /* Size of real blocks in bytes */
+static int rd_blocksizes[NUM_RAMDISKS];                /* Size of 1024 byte blocks :)  */
+static int rd_kbsize[NUM_RAMDISKS];            /* Size in blocks of 1024 bytes */
+static devfs_handle_t devfs_handle;
+static struct block_device *rd_bdev[NUM_RAMDISKS];/* Protected device data */
+
+/*
+ * Parameters for the boot-loading of the RAM disk.  These are set by
+ * init/main.c (from arguments to the kernel command line) or from the
+ * architecture-specific setup routine (from the stored boot sector
+ * information). 
+ */
+int rd_size = CONFIG_BLK_DEV_RAM_SIZE;         /* Size of the RAM disks */
+/*
+ * It would be very desiderable to have a soft-blocksize (that in the case
+ * of the ramdisk driver is also the hardblocksize ;) of PAGE_SIZE because
+ * doing that we'll achieve a far better MM footprint. Using a rd_blocksize of
+ * BLOCK_SIZE in the worst case we'll make PAGE_SIZE/BLOCK_SIZE buffer-pages
+ * unfreeable. With a rd_blocksize of PAGE_SIZE instead we are sure that only
+ * 1 page will be protected. Depending on the size of the ramdisk you
+ * may want to change the ramdisk blocksize to achieve a better or worse MM
+ * behaviour. The default is still BLOCK_SIZE (needed by rd_load_image that
+ * supposes the filesystem in the image uses a BLOCK_SIZE blocksize).
+ */
+int rd_blocksize = BLOCK_SIZE;                 /* blocksize of the RAM disks */
+
+#ifndef MODULE
+
+int rd_doload;                 /* 1 = load RAM disk, 0 = don't load */
+int rd_prompt = 1;             /* 1 = prompt for RAM disk, 0 = don't prompt */
+int rd_image_start;            /* starting block # of image */
+#ifdef CONFIG_BLK_DEV_INITRD
+unsigned long initrd_start, initrd_end;
+int mount_initrd = 1;          /* zero if initrd should not be mounted */
+int initrd_below_start_ok;
+
+static int __init no_initrd(char *str)
+{
+       mount_initrd = 0;
+       return 1;
+}
+
+__setup("noinitrd", no_initrd);
+
+#endif
+
+static int __init ramdisk_start_setup(char *str)
+{
+       rd_image_start = simple_strtol(str,NULL,0);
+       return 1;
+}
+
+static int __init load_ramdisk(char *str)
+{
+       rd_doload = simple_strtol(str,NULL,0) & 3;
+       return 1;
+}
+
+static int __init prompt_ramdisk(char *str)
+{
+       rd_prompt = simple_strtol(str,NULL,0) & 1;
+       return 1;
+}
+
+static int __init ramdisk_size(char *str)
+{
+       rd_size = simple_strtol(str,NULL,0);
+       return 1;
+}
+
+static int __init ramdisk_size2(char *str)
+{
+       return ramdisk_size(str);
+}
+
+static int __init ramdisk_blocksize(char *str)
+{
+       rd_blocksize = simple_strtol(str,NULL,0);
+       return 1;
+}
+
+__setup("ramdisk_start=", ramdisk_start_setup);
+__setup("load_ramdisk=", load_ramdisk);
+__setup("prompt_ramdisk=", prompt_ramdisk);
+__setup("ramdisk=", ramdisk_size);
+__setup("ramdisk_size=", ramdisk_size2);
+__setup("ramdisk_blocksize=", ramdisk_blocksize);
+
+#endif
+
+/*
+ * Copyright (C) 2000 Linus Torvalds.
+ *               2000 Transmeta Corp.
+ * aops copied from ramfs.
+ */
+static int ramdisk_readpage(struct file *file, struct page * page)
+{
+       if (!Page_Uptodate(page)) {
+               memset(kmap(page), 0, PAGE_CACHE_SIZE);
+               kunmap(page);
+               flush_dcache_page(page);
+               SetPageUptodate(page);
+       }
+       UnlockPage(page);
+       return 0;
+}
+
+static int ramdisk_prepare_write(struct file *file, struct page *page, unsigned offset, unsigned to)
+{
+       if (!Page_Uptodate(page)) {
+               void *addr = page_address(page);
+               memset(addr, 0, PAGE_CACHE_SIZE);
+               flush_dcache_page(page);
+               SetPageUptodate(page);
+       }
+       SetPageDirty(page);
+       return 0;
+}
+
+static int ramdisk_commit_write(struct file *file, struct page *page, unsigned offset, unsigned to)
+{
+       return 0;
+}
+
+static struct address_space_operations ramdisk_aops = {
+       readpage: ramdisk_readpage,
+       writepage: fail_writepage,
+       prepare_write: ramdisk_prepare_write,
+       commit_write: ramdisk_commit_write,
+};
+
+static int rd_blkdev_pagecache_IO(int rw, struct buffer_head * sbh, int minor)
+{
+       struct address_space * mapping;
+       unsigned long index;
+       int offset, size, err;
+
+       err = -EIO;
+       err = 0;
+       mapping = rd_bdev[minor]->bd_inode->i_mapping;
+
+       index = sbh->b_rsector >> (PAGE_CACHE_SHIFT - 9);
+       offset = (sbh->b_rsector << 9) & ~PAGE_CACHE_MASK;
+       size = sbh->b_size;
+
+       do {
+               int count;
+               struct page ** hash;
+               struct page * page;
+               char * src, * dst;
+               int unlock = 0;
+
+               count = PAGE_CACHE_SIZE - offset;
+               if (count > size)
+                       count = size;
+               size -= count;
+
+               hash = page_hash(mapping, index);
+               page = __find_get_page(mapping, index, hash);
+               if (!page) {
+                       page = grab_cache_page(mapping, index);
+                       err = -ENOMEM;
+                       if (!page)
+                               goto out;
+                       err = 0;
+
+                       if (!Page_Uptodate(page)) {
+                               memset(kmap(page), 0, PAGE_CACHE_SIZE);
+                               kunmap(page);
+                               SetPageUptodate(page);
+                       }
+
+                       unlock = 1;
+               }
+
+               index++;
+
+               if (rw == READ) {
+                       src = kmap(page);
+                       src += offset;
+                       dst = bh_kmap(sbh);
+               } else {
+                       dst = kmap(page);
+                       dst += offset;
+                       src = bh_kmap(sbh);
+               }
+               offset = 0;
+
+               memcpy(dst, src, count);
+
+               kunmap(page);
+               bh_kunmap(sbh);
+
+               if (rw == READ) {
+                       flush_dcache_page(page);
+               } else {
+                       SetPageDirty(page);
+               }
+               if (unlock)
+                       UnlockPage(page);
+               __free_page(page);
+       } while (size);
+
+ out:
+       return err;
+}
+
+/*
+ *  Basically, my strategy here is to set up a buffer-head which can't be
+ *  deleted, and make that my Ramdisk.  If the request is outside of the
+ *  allocated size, we must get rid of it...
+ *
+ * 19-JAN-1998  Richard Gooch <rgooch@atnf.csiro.au>  Added devfs support
+ *
+ */
+static int rd_make_request(request_queue_t * q, int rw, struct buffer_head *sbh)
+{
+       unsigned int minor;
+       unsigned long offset, len;
+
+       minor = MINOR(sbh->b_rdev);
+
+       if (minor >= NUM_RAMDISKS)
+               goto fail;
+
+       
+       offset = sbh->b_rsector << 9;
+       len = sbh->b_size;
+
+       if ((offset + len) > rd_length[minor])
+               goto fail;
+
+       if (rw==READA)
+               rw=READ;
+       if ((rw != READ) && (rw != WRITE)) {
+               printk(KERN_INFO "RAMDISK: bad command: %d\n", rw);
+               goto fail;
+       }
+
+       if (rd_blkdev_pagecache_IO(rw, sbh, minor))
+               goto fail;
+
+       sbh->b_end_io(sbh,1);
+       return 0;
+ fail:
+       sbh->b_end_io(sbh,0);
+       return 0;
+} 
+
+static int rd_ioctl(struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg)
+{
+       int error = -EINVAL;
+       unsigned int minor;
+
+       if (!inode || !inode->i_rdev)   
+               goto out;
+
+       minor = MINOR(inode->i_rdev);
+
+       switch (cmd) {
+               case BLKFLSBUF:
+                       if (!capable(CAP_SYS_ADMIN))
+                               return -EACCES;
+                       /* special: we want to release the ramdisk memory,
+                          it's not like with the other blockdevices where
+                          this ioctl only flushes away the buffer cache. */
+                       error = -EBUSY;
+                       down(&inode->i_bdev->bd_sem);
+                       if (inode->i_bdev->bd_openers <= 2) {
+                               truncate_inode_pages(inode->i_mapping, 0);
+                               error = 0;
+                       }
+                       up(&inode->i_bdev->bd_sem);
+                       break;
+               case BLKGETSIZE:   /* Return device size */
+                       if (!arg)
+                               break;
+                       error = put_user(rd_kbsize[minor] << 1, (unsigned long *) arg);
+                       break;
+               case BLKGETSIZE64:
+                       error = put_user((u64)rd_kbsize[minor]<<10, (u64*)arg);
+                       break;
+               case BLKROSET:
+               case BLKROGET:
+               case BLKSSZGET:
+                       error = blk_ioctl(inode->i_rdev, cmd, arg);
+       };
+out:
+       return error;
+}
+
+
+#ifdef CONFIG_BLK_DEV_INITRD
+
+static ssize_t initrd_read(struct file *file, char *buf,
+                          size_t count, loff_t *ppos)
+{
+       int left;
+
+       left = initrd_end - initrd_start - *ppos;
+       if (count > left) count = left;
+       if (count == 0) return 0;
+       copy_to_user(buf, (char *)initrd_start + *ppos, count);
+       *ppos += count;
+       return count;
+}
+
+
+static int initrd_release(struct inode *inode,struct file *file)
+{
+       extern void free_initrd_mem(unsigned long, unsigned long);
+
+       lock_kernel();
+       if (!--initrd_users) {
+               free_initrd_mem(initrd_start, initrd_end);
+               initrd_start = 0;
+       }
+       unlock_kernel();
+       blkdev_put(inode->i_bdev, BDEV_FILE);
+       return 0;
+}
+
+
+static struct file_operations initrd_fops = {
+       read:           initrd_read,
+       release:        initrd_release,
+};
+
+#endif
+
+
+static int rd_open(struct inode * inode, struct file * filp)
+{
+       int unit = DEVICE_NR(inode->i_rdev);
+
+#ifdef CONFIG_BLK_DEV_INITRD
+       if (unit == INITRD_MINOR) {
+               if (!initrd_start) return -ENODEV;
+               initrd_users++;
+               filp->f_op = &initrd_fops;
+               return 0;
+       }
+#endif
+
+       if (unit >= NUM_RAMDISKS)
+               return -ENXIO;
+
+       /*
+        * Immunize device against invalidate_buffers() and prune_icache().
+        */
+       if (rd_bdev[unit] == NULL) {
+               rd_bdev[unit] = bdget(kdev_t_to_nr(inode->i_rdev));
+               rd_bdev[unit]->bd_openers++;
+               rd_bdev[unit]->bd_inode->i_mapping->a_ops = &ramdisk_aops;
+       }
+
+       return 0;
+}
+
+static struct block_device_operations rd_bd_op = {
+       owner:          THIS_MODULE,
+       open:           rd_open,
+       ioctl:          rd_ioctl,
+};
+
+#ifdef MODULE
+/* Before freeing the module, invalidate all of the protected buffers! */
+static void __exit rd_cleanup (void)
+{
+       int i;
+
+       for (i = 0 ; i < NUM_RAMDISKS; i++) {
+               struct block_device *bdev = rd_bdev[i];
+               rd_bdev[i] = NULL;
+               if (bdev)
+                       blkdev_put(bdev, BDEV_FILE);
+               destroy_buffers(MKDEV(MAJOR_NR, i));
+       }
+
+       devfs_unregister (devfs_handle);
+       unregister_blkdev( MAJOR_NR, "ramdisk" );
+       hardsect_size[MAJOR_NR] = NULL;
+       blksize_size[MAJOR_NR] = NULL;
+       blk_size[MAJOR_NR] = NULL;
+}
+#endif
+
+/* This is the registration and initialization section of the RAM disk driver */
+int __init rd_init (void)
+{
+       int             i;
+
+       if (rd_blocksize > PAGE_SIZE || rd_blocksize < 512 ||
+           (rd_blocksize & (rd_blocksize-1)))
+       {
+               printk("RAMDISK: wrong blocksize %d, reverting to defaults\n",
+                      rd_blocksize);
+               rd_blocksize = BLOCK_SIZE;
+       }
+
+       if (register_blkdev(MAJOR_NR, "ramdisk", &rd_bd_op)) {
+               printk("RAMDISK: Could not get major %d", MAJOR_NR);
+               return -EIO;
+       }
+
+       blk_queue_make_request(BLK_DEFAULT_QUEUE(MAJOR_NR), &rd_make_request);
+
+       for (i = 0; i < NUM_RAMDISKS; i++) {
+               /* rd_size is given in kB */
+               rd_length[i] = rd_size << 10;
+               rd_hardsec[i] = rd_blocksize;
+               rd_blocksizes[i] = rd_blocksize;
+               rd_kbsize[i] = rd_size;
+       }
+       devfs_handle = devfs_mk_dir (NULL, "rd", NULL);
+       devfs_register_series (devfs_handle, "%u", NUM_RAMDISKS,
+                              DEVFS_FL_DEFAULT, MAJOR_NR, 0,
+                              S_IFBLK | S_IRUSR | S_IWUSR,
+                              &rd_bd_op, NULL);
+
+       for (i = 0; i < NUM_RAMDISKS; i++)
+               register_disk(NULL, MKDEV(MAJOR_NR,i), 1, &rd_bd_op, rd_size<<1);
+
+#ifdef CONFIG_BLK_DEV_INITRD
+       /* We ought to separate initrd operations here */
+       register_disk(NULL, MKDEV(MAJOR_NR,INITRD_MINOR), 1, &rd_bd_op, rd_size<<1);
+#endif
+
+       hardsect_size[MAJOR_NR] = rd_hardsec;           /* Size of the RAM disk blocks */
+       blksize_size[MAJOR_NR] = rd_blocksizes;         /* Avoid set_blocksize() check */
+       blk_size[MAJOR_NR] = rd_kbsize;                 /* Size of the RAM disk in kB  */
+
+               /* rd_size is given in kB */
+       printk("RAMDISK driver initialized: "
+              "%d RAM disks of %dK size %d blocksize\n",
+              NUM_RAMDISKS, rd_size, rd_blocksize);
+
+       return 0;
+}
+
+#ifdef MODULE
+module_init(rd_init);
+module_exit(rd_cleanup);
+#endif
+
+/* loadable module support */
+MODULE_PARM     (rd_size, "1i");
+MODULE_PARM_DESC(rd_size, "Size of each RAM disk in kbytes.");
+MODULE_PARM     (rd_blocksize, "i");
+MODULE_PARM_DESC(rd_blocksize, "Blocksize of each RAM disk in bytes.");
+
+MODULE_LICENSE("GPL");
+
+/* End of non-loading portions of the RAM disk driver */
+
+#ifdef RD_LOADER 
+/*
+ * This routine tries to find a RAM disk image to load, and returns the
+ * number of blocks to read for a non-compressed image, 0 if the image
+ * is a compressed image, and -1 if an image with the right magic
+ * numbers could not be found.
+ *
+ * We currently check for the following magic numbers:
+ *     minix
+ *     ext2
+ *     romfs
+ *     gzip
+ */
+static int __init 
+identify_ramdisk_image(kdev_t device, struct file *fp, int start_block)
+{
+       const int size = 512;
+       struct minix_super_block *minixsb;
+       struct ext2_super_block *ext2sb;
+       struct romfs_super_block *romfsb;
+       int nblocks = -1;
+       unsigned char *buf;
+
+       buf = kmalloc(size, GFP_KERNEL);
+       if (buf == 0)
+               return -1;
+
+       minixsb = (struct minix_super_block *) buf;
+       ext2sb = (struct ext2_super_block *) buf;
+       romfsb = (struct romfs_super_block *) buf;
+       memset(buf, 0xe5, size);
+
+       /*
+        * Read block 0 to test for gzipped kernel
+        */
+       if (fp->f_op->llseek)
+               fp->f_op->llseek(fp, start_block * BLOCK_SIZE, 0);
+       fp->f_pos = start_block * BLOCK_SIZE;
+       
+       fp->f_op->read(fp, buf, size, &fp->f_pos);
+
+       /*
+        * If it matches the gzip magic numbers, return -1
+        */
+       if (buf[0] == 037 && ((buf[1] == 0213) || (buf[1] == 0236))) {
+               printk(KERN_NOTICE
+                      "RAMDISK: Compressed image found at block %d\n",
+                      start_block);
+               nblocks = 0;
+               goto done;
+       }
+
+       /* romfs is at block zero too */
+       if (romfsb->word0 == ROMSB_WORD0 &&
+           romfsb->word1 == ROMSB_WORD1) {
+               printk(KERN_NOTICE
+                      "RAMDISK: romfs filesystem found at block %d\n",
+                      start_block);
+               nblocks = (ntohl(romfsb->size)+BLOCK_SIZE-1)>>BLOCK_SIZE_BITS;
+               goto done;
+       }
+
+       /*
+        * Read block 1 to test for minix and ext2 superblock
+        */
+       if (fp->f_op->llseek)
+               fp->f_op->llseek(fp, (start_block+1) * BLOCK_SIZE, 0);
+       fp->f_pos = (start_block+1) * BLOCK_SIZE;
+
+       fp->f_op->read(fp, buf, size, &fp->f_pos);
+               
+       /* Try minix */
+       if (minixsb->s_magic == MINIX_SUPER_MAGIC ||
+           minixsb->s_magic == MINIX_SUPER_MAGIC2) {
+               printk(KERN_NOTICE
+                      "RAMDISK: Minix filesystem found at block %d\n",
+                      start_block);
+               nblocks = minixsb->s_nzones << minixsb->s_log_zone_size;
+               goto done;
+       }
+
+       /* Try ext2 */
+       if (ext2sb->s_magic == cpu_to_le16(EXT2_SUPER_MAGIC)) {
+               printk(KERN_NOTICE
+                      "RAMDISK: ext2 filesystem found at block %d\n",
+                      start_block);
+               nblocks = le32_to_cpu(ext2sb->s_blocks_count);
+               goto done;
+       }
+
+       printk(KERN_NOTICE
+              "RAMDISK: Couldn't find valid RAM disk image starting at %d.\n",
+              start_block);
+       
+done:
+       if (fp->f_op->llseek)
+               fp->f_op->llseek(fp, start_block * BLOCK_SIZE, 0);
+       fp->f_pos = start_block * BLOCK_SIZE;   
+
+       kfree(buf);
+       return nblocks;
+}
+
+/*
+ * This routine loads in the RAM disk image.
+ */
+static void __init rd_load_image(kdev_t device, int offset, int unit)
+{
+       struct inode *inode, *out_inode;
+       struct file infile, outfile;
+       struct dentry in_dentry, out_dentry;
+       mm_segment_t fs;
+       kdev_t ram_device;
+       int nblocks, i;
+       char *buf;
+       unsigned short rotate = 0;
+       unsigned short devblocks = 0;
+#if !defined(CONFIG_ARCH_S390) && !defined(CONFIG_PPC_ISERIES) && !defined(CONFIG_XENO)
+       char rotator[4] = { '|' , '/' , '-' , '\\' };
+#endif
+       ram_device = MKDEV(MAJOR_NR, unit);
+
+       if ((inode = get_empty_inode()) == NULL)
+               return;
+       memset(&infile, 0, sizeof(infile));
+       memset(&in_dentry, 0, sizeof(in_dentry));
+       infile.f_mode = 1; /* read only */
+       infile.f_dentry = &in_dentry;
+       in_dentry.d_inode = inode;
+       infile.f_op = &def_blk_fops;
+       init_special_inode(inode, S_IFBLK | S_IRUSR, kdev_t_to_nr(device));
+
+       if ((out_inode = get_empty_inode()) == NULL)
+               goto free_inode;
+       memset(&outfile, 0, sizeof(outfile));
+       memset(&out_dentry, 0, sizeof(out_dentry));
+       outfile.f_mode = 3; /* read/write */
+       outfile.f_dentry = &out_dentry;
+       out_dentry.d_inode = out_inode;
+       outfile.f_op = &def_blk_fops;
+       init_special_inode(out_inode, S_IFBLK | S_IRUSR | S_IWUSR, kdev_t_to_nr(ram_device));
+
+       if (blkdev_open(inode, &infile) != 0) {
+               iput(out_inode);
+               goto free_inode;
+       }
+       if (blkdev_open(out_inode, &outfile) != 0)
+               goto free_inodes;
+
+       fs = get_fs();
+       set_fs(KERNEL_DS);
+       
+       nblocks = identify_ramdisk_image(device, &infile, offset);
+       if (nblocks < 0)
+               goto done;
+
+       if (nblocks == 0) {
+#ifdef BUILD_CRAMDISK
+               if (crd_load(&infile, &outfile) == 0)
+                       goto successful_load;
+#else
+               printk(KERN_NOTICE
+                      "RAMDISK: Kernel does not support compressed "
+                      "RAM disk images\n");
+#endif
+               goto done;
+       }
+
+       /*
+        * NOTE NOTE: nblocks suppose that the blocksize is BLOCK_SIZE, so
+        * rd_load_image will work only with filesystem BLOCK_SIZE wide!
+        * So make sure to use 1k blocksize while generating ext2fs
+        * ramdisk-images.
+        */
+       if (nblocks > (rd_length[unit] >> BLOCK_SIZE_BITS)) {
+               printk("RAMDISK: image too big! (%d/%ld blocks)\n",
+                      nblocks, rd_length[unit] >> BLOCK_SIZE_BITS);
+               goto done;
+       }
+               
+       /*
+        * OK, time to copy in the data
+        */
+       buf = kmalloc(BLOCK_SIZE, GFP_KERNEL);
+       if (buf == 0) {
+               printk(KERN_ERR "RAMDISK: could not allocate buffer\n");
+               goto done;
+       }
+
+       if (blk_size[MAJOR(device)])
+               devblocks = blk_size[MAJOR(device)][MINOR(device)];
+
+#ifdef CONFIG_BLK_DEV_INITRD
+       if (MAJOR(device) == MAJOR_NR && MINOR(device) == INITRD_MINOR)
+               devblocks = nblocks;
+#endif
+
+       if (devblocks == 0) {
+               printk(KERN_ERR "RAMDISK: could not determine device size\n");
+               goto done;
+       }
+
+       printk(KERN_NOTICE "RAMDISK: Loading %d blocks [%d disk%s] into ram disk... ", 
+               nblocks, ((nblocks-1)/devblocks)+1, nblocks>devblocks ? "s" : "");
+       for (i=0; i < nblocks; i++) {
+               if (i && (i % devblocks == 0)) {
+                       printk("done disk #%d.\n", i/devblocks);
+                       rotate = 0;
+                       if (infile.f_op->release(inode, &infile) != 0) {
+                               printk("Error closing the disk.\n");
+                               goto noclose_input;
+                       }
+                       printk("Please insert disk #%d and press ENTER\n", i/devblocks+1);
+                       wait_for_keypress();
+                       if (blkdev_open(inode, &infile) != 0)  {
+                               printk("Error opening disk.\n");
+                               goto noclose_input;
+                       }
+                       infile.f_pos = 0;
+                       printk("Loading disk #%d... ", i/devblocks+1);
+               }
+               infile.f_op->read(&infile, buf, BLOCK_SIZE, &infile.f_pos);
+               outfile.f_op->write(&outfile, buf, BLOCK_SIZE, &outfile.f_pos);
+#if !defined(CONFIG_ARCH_S390) && !defined(CONFIG_PPC_ISERIES) && !defined(CONFIG_XENO)
+               if (!(i % 16)) {
+                       printk("%c\b", rotator[rotate & 0x3]);
+                       rotate++;
+               }
+#endif
+       }
+       printk("done.\n");
+       kfree(buf);
+
+successful_load:
+       ROOT_DEV = MKDEV(MAJOR_NR, unit);
+       if (ROOT_DEVICE_NAME != NULL) strcpy (ROOT_DEVICE_NAME, "rd/0");
+
+done:
+       infile.f_op->release(inode, &infile);
+noclose_input:
+       blkdev_close(out_inode, &outfile);
+       iput(inode);
+       iput(out_inode);
+       set_fs(fs);
+       return;
+free_inodes: /* free inodes on error */ 
+       iput(out_inode);
+       infile.f_op->release(inode, &infile);
+free_inode:
+       iput(inode);
+}
+
+#ifdef CONFIG_MAC_FLOPPY
+int swim3_fd_eject(int devnum);
+#endif
+
+static void __init rd_load_disk(int n)
+{
+
+       if (rd_doload == 0)
+               return;
+
+       if (MAJOR(ROOT_DEV) != FLOPPY_MAJOR
+#ifdef CONFIG_BLK_DEV_INITRD
+               && MAJOR(real_root_dev) != FLOPPY_MAJOR
+#endif
+       )
+               return;
+
+       if (rd_prompt) {
+#ifdef CONFIG_BLK_DEV_FD
+               floppy_eject();
+#endif
+#ifdef CONFIG_MAC_FLOPPY
+               if(MAJOR(ROOT_DEV) == FLOPPY_MAJOR)
+                       swim3_fd_eject(MINOR(ROOT_DEV));
+               else if(MAJOR(real_root_dev) == FLOPPY_MAJOR)
+                       swim3_fd_eject(MINOR(real_root_dev));
+#endif
+               printk(KERN_NOTICE
+                      "VFS: Insert root floppy disk to be loaded into RAM disk and press ENTER\n");
+               wait_for_keypress();
+       }
+
+       rd_load_image(ROOT_DEV,rd_image_start, n);
+
+}
+
+void __init rd_load(void)
+{
+       rd_load_disk(0);
+}
+
+void __init rd_load_secondary(void)
+{
+       rd_load_disk(1);
+}
+
+#ifdef CONFIG_BLK_DEV_INITRD
+void __init initrd_load(void)
+{
+       rd_load_image(MKDEV(MAJOR_NR, INITRD_MINOR),rd_image_start,0);
+}
+#endif
+
+#endif /* RD_LOADER */
+
+#ifdef BUILD_CRAMDISK
+
+/*
+ * gzip declarations
+ */
+
+#define OF(args)  args
+
+#ifndef memzero
+#define memzero(s, n)     memset ((s), 0, (n))
+#endif
+
+typedef unsigned char  uch;
+typedef unsigned short ush;
+typedef unsigned long  ulg;
+
+#define INBUFSIZ 4096
+#define WSIZE 0x8000    /* window size--must be a power of two, and */
+                       /*  at least 32K for zip's deflate method */
+
+static uch *inbuf;
+static uch *window;
+
+static unsigned insize;  /* valid bytes in inbuf */
+static unsigned inptr;   /* index of next byte to be processed in inbuf */
+static unsigned outcnt;  /* bytes in output buffer */
+static int exit_code;
+static long bytes_out;
+static struct file *crd_infp, *crd_outfp;
+
+#define get_byte()  (inptr < insize ? inbuf[inptr++] : fill_inbuf())
+               
+/* Diagnostic functions (stubbed out) */
+#define Assert(cond,msg)
+#define Trace(x)
+#define Tracev(x)
+#define Tracevv(x)
+#define Tracec(c,x)
+#define Tracecv(c,x)
+
+#define STATIC static
+
+static int  fill_inbuf(void);
+static void flush_window(void);
+static void *malloc(int size);
+static void free(void *where);
+static void error(char *m);
+static void gzip_mark(void **);
+static void gzip_release(void **);
+
+#include "../../lib/inflate.c"
+
+static void __init *malloc(int size)
+{
+       return kmalloc(size, GFP_KERNEL);
+}
+
+static void __init free(void *where)
+{
+       kfree(where);
+}
+
+static void __init gzip_mark(void **ptr)
+{
+}
+
+static void __init gzip_release(void **ptr)
+{
+}
+
+
+/* ===========================================================================
+ * Fill the input buffer. This is called only when the buffer is empty
+ * and at least one byte is really needed.
+ */
+static int __init fill_inbuf(void)
+{
+       if (exit_code) return -1;
+       
+       insize = crd_infp->f_op->read(crd_infp, inbuf, INBUFSIZ,
+                                     &crd_infp->f_pos);
+       if (insize == 0) return -1;
+
+       inptr = 1;
+
+       return inbuf[0];
+}
+
+/* ===========================================================================
+ * Write the output window window[0..outcnt-1] and update crc and bytes_out.
+ * (Used for the decompressed data only.)
+ */
+static void __init flush_window(void)
+{
+    ulg c = crc;         /* temporary variable */
+    unsigned n;
+    uch *in, ch;
+    
+    crd_outfp->f_op->write(crd_outfp, window, outcnt, &crd_outfp->f_pos);
+    in = window;
+    for (n = 0; n < outcnt; n++) {
+           ch = *in++;
+           c = crc_32_tab[((int)c ^ ch) & 0xff] ^ (c >> 8);
+    }
+    crc = c;
+    bytes_out += (ulg)outcnt;
+    outcnt = 0;
+}
+
+static void __init error(char *x)
+{
+       printk(KERN_ERR "%s", x);
+       exit_code = 1;
+}
+
+static int __init 
+crd_load(struct file * fp, struct file *outfp)
+{
+       int result;
+
+       insize = 0;             /* valid bytes in inbuf */
+       inptr = 0;              /* index of next byte to be processed in inbuf */
+       outcnt = 0;             /* bytes in output buffer */
+       exit_code = 0;
+       bytes_out = 0;
+       crc = (ulg)0xffffffffL; /* shift register contents */
+
+       crd_infp = fp;
+       crd_outfp = outfp;
+       inbuf = kmalloc(INBUFSIZ, GFP_KERNEL);
+       if (inbuf == 0) {
+               printk(KERN_ERR "RAMDISK: Couldn't allocate gzip buffer\n");
+               return -1;
+       }
+       window = kmalloc(WSIZE, GFP_KERNEL);
+       if (window == 0) {
+               printk(KERN_ERR "RAMDISK: Couldn't allocate gzip window\n");
+               kfree(inbuf);
+               return -1;
+       }
+       makecrc();
+       result = gunzip();
+       kfree(inbuf);
+       kfree(window);
+       return result;
+}
+
+#endif  /* BUILD_CRAMDISK */
+
diff --git a/xenolinux-2.4.16-sparse/drivers/char/tty_io.c b/xenolinux-2.4.16-sparse/drivers/char/tty_io.c
new file mode 100644 (file)
index 0000000..1466b03
--- /dev/null
@@ -0,0 +1,2388 @@
+/*
+ *  linux/drivers/char/tty_io.c
+ *
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ */
+
+/*
+ * 'tty_io.c' gives an orthogonal feeling to tty's, be they consoles
+ * or rs-channels. It also implements echoing, cooked mode etc.
+ *
+ * Kill-line thanks to John T Kohl, who also corrected VMIN = VTIME = 0.
+ *
+ * Modified by Theodore Ts'o, 9/14/92, to dynamically allocate the
+ * tty_struct and tty_queue structures.  Previously there was an array
+ * of 256 tty_struct's which was statically allocated, and the
+ * tty_queue structures were allocated at boot time.  Both are now
+ * dynamically allocated only when the tty is open.
+ *
+ * Also restructured routines so that there is more of a separation
+ * between the high-level tty routines (tty_io.c and tty_ioctl.c) and
+ * the low-level tty routines (serial.c, pty.c, console.c).  This
+ * makes for cleaner and more compact code.  -TYT, 9/17/92 
+ *
+ * Modified by Fred N. van Kempen, 01/29/93, to add line disciplines
+ * which can be dynamically activated and de-activated by the line
+ * discipline handling modules (like SLIP).
+ *
+ * NOTE: pay no attention to the line discipline code (yet); its
+ * interface is still subject to change in this version...
+ * -- TYT, 1/31/92
+ *
+ * Added functionality to the OPOST tty handling.  No delays, but all
+ * other bits should be there.
+ *     -- Nick Holloway <alfie@dcs.warwick.ac.uk>, 27th May 1993.
+ *
+ * Rewrote canonical mode and added more termios flags.
+ *     -- julian@uhunix.uhcc.hawaii.edu (J. Cowley), 13Jan94
+ *
+ * Reorganized FASYNC support so mouse code can share it.
+ *     -- ctm@ardi.com, 9Sep95
+ *
+ * New TIOCLINUX variants added.
+ *     -- mj@k332.feld.cvut.cz, 19-Nov-95
+ * 
+ * Restrict vt switching via ioctl()
+ *      -- grif@cs.ucr.edu, 5-Dec-95
+ *
+ * Move console and virtual terminal code to more appropriate files,
+ * implement CONFIG_VT and generalize console device interface.
+ *     -- Marko Kohtala <Marko.Kohtala@hut.fi>, March 97
+ *
+ * Rewrote init_dev and release_dev to eliminate races.
+ *     -- Bill Hawes <whawes@star.net>, June 97
+ *
+ * Added devfs support.
+ *      -- C. Scott Ananian <cananian@alumni.princeton.edu>, 13-Jan-1998
+ *
+ * Added support for a Unix98-style ptmx device.
+ *      -- C. Scott Ananian <cananian@alumni.princeton.edu>, 14-Jan-1998
+ *
+ * Reduced memory usage for older ARM systems
+ *      -- Russell King <rmk@arm.linux.org.uk>
+ *
+ * Move do_SAK() into process context.  Less stack use in devfs functions.
+ * alloc_tty_struct() always uses kmalloc() -- Andrew Morton <andrewm@uow.edu.eu> 17Mar01
+ */
+
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/major.h>
+#include <linux/errno.h>
+#include <linux/signal.h>
+#include <linux/fcntl.h>
+#include <linux/sched.h>
+#include <linux/interrupt.h>
+#include <linux/tty.h>
+#include <linux/tty_driver.h>
+#include <linux/tty_flip.h>
+#include <linux/devpts_fs.h>
+#include <linux/file.h>
+#include <linux/console.h>
+#include <linux/timer.h>
+#include <linux/ctype.h>
+#include <linux/kd.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/slab.h>
+#include <linux/poll.h>
+#include <linux/proc_fs.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/smp_lock.h>
+
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <asm/bitops.h>
+
+#include <linux/kbd_kern.h>
+#include <linux/vt_kern.h>
+#include <linux/selection.h>
+#include <linux/devfs_fs_kernel.h>
+
+#include <linux/kmod.h>
+
+#ifdef CONFIG_XENO
+extern void xeno_console_init(void);
+#endif
+
+#ifdef CONFIG_VT
+extern void con_init_devfs (void);
+#endif
+
+#define CONSOLE_DEV MKDEV(TTY_MAJOR,0)
+#define TTY_DEV MKDEV(TTYAUX_MAJOR,0)
+#define SYSCONS_DEV MKDEV(TTYAUX_MAJOR,1)
+#define PTMX_DEV MKDEV(TTYAUX_MAJOR,2)
+
+#undef TTY_DEBUG_HANGUP
+
+#define TTY_PARANOIA_CHECK 1
+#define CHECK_TTY_COUNT 1
+
+struct termios tty_std_termios;                /* for the benefit of tty drivers  */
+struct tty_driver *tty_drivers;                /* linked list of tty drivers */
+struct tty_ldisc ldiscs[NR_LDISCS];    /* line disc dispatch table     */
+
+#ifdef CONFIG_UNIX98_PTYS
+extern struct tty_driver ptm_driver[]; /* Unix98 pty masters; for /dev/ptmx */
+extern struct tty_driver pts_driver[]; /* Unix98 pty slaves;  for /dev/ptmx */
+#endif
+
+/*
+ * redirect is the pseudo-tty that console output
+ * is redirected to if asked by TIOCCONS.
+ */
+struct tty_struct * redirect;
+
+static void initialize_tty_struct(struct tty_struct *tty);
+
+static ssize_t tty_read(struct file *, char *, size_t, loff_t *);
+static ssize_t tty_write(struct file *, const char *, size_t, loff_t *);
+static unsigned int tty_poll(struct file *, poll_table *);
+static int tty_open(struct inode *, struct file *);
+static int tty_release(struct inode *, struct file *);
+int tty_ioctl(struct inode * inode, struct file * file,
+             unsigned int cmd, unsigned long arg);
+static int tty_fasync(int fd, struct file * filp, int on);
+extern int vme_scc_init (void);
+extern long vme_scc_console_init(void);
+extern int serial167_init(void);
+extern long serial167_console_init(void);
+extern void console_8xx_init(void);
+extern int rs_8xx_init(void);
+extern void mac_scc_console_init(void);
+extern void hwc_console_init(void);
+extern void hwc_tty_init(void);
+extern void con3215_init(void);
+extern void tty3215_init(void);
+extern void tub3270_con_init(void);
+extern void tub3270_init(void);
+extern void rs285_console_init(void);
+extern void sa1100_rs_console_init(void);
+extern void sgi_serial_console_init(void);
+extern void sci_console_init(void);
+extern void tx3912_console_init(void);
+extern void tx3912_rs_init(void);
+
+#ifndef MIN
+#define MIN(a,b)       ((a) < (b) ? (a) : (b))
+#endif
+#ifndef MAX
+#define MAX(a,b)       ((a) < (b) ? (b) : (a))
+#endif
+
+static struct tty_struct *alloc_tty_struct(void)
+{
+       struct tty_struct *tty;
+
+       tty = kmalloc(sizeof(struct tty_struct), GFP_KERNEL);
+       if (tty)
+               memset(tty, 0, sizeof(struct tty_struct));
+       return tty;
+}
+
+static inline void free_tty_struct(struct tty_struct *tty)
+{
+       kfree(tty);
+}
+
+/*
+ * This routine returns the name of tty.
+ */
+static char *
+_tty_make_name(struct tty_struct *tty, const char *name, char *buf)
+{
+       int idx = (tty)?MINOR(tty->device) - tty->driver.minor_start:0;
+
+       if (!tty) /* Hmm.  NULL pointer.  That's fun. */
+               strcpy(buf, "NULL tty");
+       else
+               sprintf(buf, name,
+                       idx + tty->driver.name_base);
+               
+       return buf;
+}
+
+#define TTY_NUMBER(tty) (MINOR((tty)->device) - (tty)->driver.minor_start + \
+                        (tty)->driver.name_base)
+
+char *tty_name(struct tty_struct *tty, char *buf)
+{
+       return _tty_make_name(tty, (tty)?tty->driver.name:NULL, buf);
+}
+
+inline int tty_paranoia_check(struct tty_struct *tty, kdev_t device,
+                             const char *routine)
+{
+#ifdef TTY_PARANOIA_CHECK
+       static const char badmagic[] = KERN_WARNING
+               "Warning: bad magic number for tty struct (%s) in %s\n";
+       static const char badtty[] = KERN_WARNING
+               "Warning: null TTY for (%s) in %s\n";
+
+       if (!tty) {
+               printk(badtty, kdevname(device), routine);
+               return 1;
+       }
+       if (tty->magic != TTY_MAGIC) {
+               printk(badmagic, kdevname(device), routine);
+               return 1;
+       }
+#endif
+       return 0;
+}
+
+static int check_tty_count(struct tty_struct *tty, const char *routine)
+{
+#ifdef CHECK_TTY_COUNT
+       struct list_head *p;
+       int count = 0;
+       
+       file_list_lock();
+       for(p = tty->tty_files.next; p != &tty->tty_files; p = p->next) {
+               if(list_entry(p, struct file, f_list)->private_data == tty)
+                       count++;
+       }
+       file_list_unlock();
+       if (tty->driver.type == TTY_DRIVER_TYPE_PTY &&
+           tty->driver.subtype == PTY_TYPE_SLAVE &&
+           tty->link && tty->link->count)
+               count++;
+       if (tty->count != count) {
+               printk(KERN_WARNING "Warning: dev (%s) tty->count(%d) "
+                                   "!= #fd's(%d) in %s\n",
+                      kdevname(tty->device), tty->count, count, routine);
+               return count;
+       }       
+#endif
+       return 0;
+}
+
+int tty_register_ldisc(int disc, struct tty_ldisc *new_ldisc)
+{
+       if (disc < N_TTY || disc >= NR_LDISCS)
+               return -EINVAL;
+       
+       if (new_ldisc) {
+               ldiscs[disc] = *new_ldisc;
+               ldiscs[disc].flags |= LDISC_FLAG_DEFINED;
+               ldiscs[disc].num = disc;
+       } else
+               memset(&ldiscs[disc], 0, sizeof(struct tty_ldisc));
+       
+       return 0;
+}
+
+EXPORT_SYMBOL(tty_register_ldisc);
+
+/* Set the discipline of a tty line. */
+static int tty_set_ldisc(struct tty_struct *tty, int ldisc)
+{
+       int     retval = 0;
+       struct  tty_ldisc o_ldisc;
+       char buf[64];
+
+       if ((ldisc < N_TTY) || (ldisc >= NR_LDISCS))
+               return -EINVAL;
+       /* Eduardo Blanco <ejbs@cs.cs.com.uy> */
+       /* Cyrus Durgin <cider@speakeasy.org> */
+       if (!(ldiscs[ldisc].flags & LDISC_FLAG_DEFINED)) {
+               char modname [20];
+               sprintf(modname, "tty-ldisc-%d", ldisc);
+               request_module (modname);
+       }
+       if (!(ldiscs[ldisc].flags & LDISC_FLAG_DEFINED))
+               return -EINVAL;
+
+       if (tty->ldisc.num == ldisc)
+               return 0;       /* We are already in the desired discipline */
+       o_ldisc = tty->ldisc;
+
+       tty_wait_until_sent(tty, 0);
+       
+       /* Shutdown the current discipline. */
+       if (tty->ldisc.close)
+               (tty->ldisc.close)(tty);
+
+       /* Now set up the new line discipline. */
+       tty->ldisc = ldiscs[ldisc];
+       tty->termios->c_line = ldisc;
+       if (tty->ldisc.open)
+               retval = (tty->ldisc.open)(tty);
+       if (retval < 0) {
+               tty->ldisc = o_ldisc;
+               tty->termios->c_line = tty->ldisc.num;
+               if (tty->ldisc.open && (tty->ldisc.open(tty) < 0)) {
+                       tty->ldisc = ldiscs[N_TTY];
+                       tty->termios->c_line = N_TTY;
+                       if (tty->ldisc.open) {
+                               int r = tty->ldisc.open(tty);
+
+                               if (r < 0)
+                                       panic("Couldn't open N_TTY ldisc for "
+                                             "%s --- error %d.",
+                                             tty_name(tty, buf), r);
+                       }
+               }
+       }
+       if (tty->ldisc.num != o_ldisc.num && tty->driver.set_ldisc)
+               tty->driver.set_ldisc(tty);
+       return retval;
+}
+
+/*
+ * This routine returns a tty driver structure, given a device number
+ */
+struct tty_driver *get_tty_driver(kdev_t device)
+{
+       int     major, minor;
+       struct tty_driver *p;
+       
+       minor = MINOR(device);
+       major = MAJOR(device);
+
+       for (p = tty_drivers; p; p = p->next) {
+               if (p->major != major)
+                       continue;
+               if (minor < p->minor_start)
+                       continue;
+               if (minor >= p->minor_start + p->num)
+                       continue;
+               return p;
+       }
+       return NULL;
+}
+
+/*
+ * If we try to write to, or set the state of, a terminal and we're
+ * not in the foreground, send a SIGTTOU.  If the signal is blocked or
+ * ignored, go ahead and perform the operation.  (POSIX 7.2)
+ */
+int tty_check_change(struct tty_struct * tty)
+{
+       if (current->tty != tty)
+               return 0;
+       if (tty->pgrp <= 0) {
+               printk(KERN_WARNING "tty_check_change: tty->pgrp <= 0!\n");
+               return 0;
+       }
+       if (current->pgrp == tty->pgrp)
+               return 0;
+       if (is_ignored(SIGTTOU))
+               return 0;
+       if (is_orphaned_pgrp(current->pgrp))
+               return -EIO;
+       (void) kill_pg(current->pgrp,SIGTTOU,1);
+       return -ERESTARTSYS;
+}
+
+static ssize_t hung_up_tty_read(struct file * file, char * buf,
+                               size_t count, loff_t *ppos)
+{
+       /* Can't seek (pread) on ttys.  */
+       if (ppos != &file->f_pos)
+               return -ESPIPE;
+       return 0;
+}
+
+static ssize_t hung_up_tty_write(struct file * file, const char * buf,
+                                size_t count, loff_t *ppos)
+{
+       /* Can't seek (pwrite) on ttys.  */
+       if (ppos != &file->f_pos)
+               return -ESPIPE;
+       return -EIO;
+}
+
+/* No kernel lock held - none needed ;) */
+static unsigned int hung_up_tty_poll(struct file * filp, poll_table * wait)
+{
+       return POLLIN | POLLOUT | POLLERR | POLLHUP | POLLRDNORM | POLLWRNORM;
+}
+
+static int hung_up_tty_ioctl(struct inode * inode, struct file * file,
+                            unsigned int cmd, unsigned long arg)
+{
+       return cmd == TIOCSPGRP ? -ENOTTY : -EIO;
+}
+
+static struct file_operations tty_fops = {
+       llseek:         no_llseek,
+       read:           tty_read,
+       write:          tty_write,
+       poll:           tty_poll,
+       ioctl:          tty_ioctl,
+       open:           tty_open,
+       release:        tty_release,
+       fasync:         tty_fasync,
+};
+
+static struct file_operations hung_up_tty_fops = {
+       llseek:         no_llseek,
+       read:           hung_up_tty_read,
+       write:          hung_up_tty_write,
+       poll:           hung_up_tty_poll,
+       ioctl:          hung_up_tty_ioctl,
+       release:        tty_release,
+};
+
+/*
+ * This can be called by the "eventd" kernel thread.  That is process synchronous,
+ * but doesn't hold any locks, so we need to make sure we have the appropriate
+ * locks for what we're doing..
+ */
+void do_tty_hangup(void *data)
+{
+       struct tty_struct *tty = (struct tty_struct *) data;
+       struct file * cons_filp = NULL;
+       struct task_struct *p;
+       struct list_head *l;
+       int    closecount = 0, n;
+
+       if (!tty)
+               return;
+
+       /* inuse_filps is protected by the single kernel lock */
+       lock_kernel();
+       
+       check_tty_count(tty, "do_tty_hangup");
+       file_list_lock();
+       for (l = tty->tty_files.next; l != &tty->tty_files; l = l->next) {
+               struct file * filp = list_entry(l, struct file, f_list);
+               if (filp->f_dentry->d_inode->i_rdev == CONSOLE_DEV ||
+                   filp->f_dentry->d_inode->i_rdev == SYSCONS_DEV) {
+                       cons_filp = filp;
+                       continue;
+               }
+               if (filp->f_op != &tty_fops)
+                       continue;
+               closecount++;
+               tty_fasync(-1, filp, 0);        /* can't block */
+               filp->f_op = &hung_up_tty_fops;
+       }
+       file_list_unlock();
+       
+       /* FIXME! What are the locking issues here? This may me overdoing things.. */
+       {
+               unsigned long flags;
+
+               save_flags(flags); cli();
+               if (tty->ldisc.flush_buffer)
+                       tty->ldisc.flush_buffer(tty);
+               if (tty->driver.flush_buffer)
+                       tty->driver.flush_buffer(tty);
+               if ((test_bit(TTY_DO_WRITE_WAKEUP, &tty->flags)) &&
+                   tty->ldisc.write_wakeup)
+                       (tty->ldisc.write_wakeup)(tty);
+               restore_flags(flags);
+       }
+
+       wake_up_interruptible(&tty->write_wait);
+       wake_up_interruptible(&tty->read_wait);
+
+       /*
+        * Shutdown the current line discipline, and reset it to
+        * N_TTY.
+        */
+       if (tty->driver.flags & TTY_DRIVER_RESET_TERMIOS)
+               *tty->termios = tty->driver.init_termios;
+       if (tty->ldisc.num != ldiscs[N_TTY].num) {
+               if (tty->ldisc.close)
+                       (tty->ldisc.close)(tty);
+               tty->ldisc = ldiscs[N_TTY];
+               tty->termios->c_line = N_TTY;
+               if (tty->ldisc.open) {
+                       int i = (tty->ldisc.open)(tty);
+                       if (i < 0)
+                               printk(KERN_ERR "do_tty_hangup: N_TTY open: "
+                                               "error %d\n", -i);
+               }
+       }
+       
+       read_lock(&tasklist_lock);
+       for_each_task(p) {
+               if ((tty->session > 0) && (p->session == tty->session) &&
+                   p->leader) {
+                       send_sig(SIGHUP,p,1);
+                       send_sig(SIGCONT,p,1);
+                       if (tty->pgrp > 0)
+                               p->tty_old_pgrp = tty->pgrp;
+               }
+               if (p->tty == tty)
+                       p->tty = NULL;
+       }
+       read_unlock(&tasklist_lock);
+
+       tty->flags = 0;
+       tty->session = 0;
+       tty->pgrp = -1;
+       tty->ctrl_status = 0;
+       /*
+        *      If one of the devices matches a console pointer, we
+        *      cannot just call hangup() because that will cause
+        *      tty->count and state->count to go out of sync.
+        *      So we just call close() the right number of times.
+        */
+       if (cons_filp) {
+               if (tty->driver.close)
+                       for (n = 0; n < closecount; n++)
+                               tty->driver.close(tty, cons_filp);
+       } else if (tty->driver.hangup)
+               (tty->driver.hangup)(tty);
+       unlock_kernel();
+}
+
+void tty_hangup(struct tty_struct * tty)
+{
+#ifdef TTY_DEBUG_HANGUP
+       char    buf[64];
+       
+       printk(KERN_DEBUG "%s hangup...\n", tty_name(tty, buf));
+#endif
+       schedule_task(&tty->tq_hangup);
+}
+
+void tty_vhangup(struct tty_struct * tty)
+{
+#ifdef TTY_DEBUG_HANGUP
+       char    buf[64];
+
+       printk(KERN_DEBUG "%s vhangup...\n", tty_name(tty, buf));
+#endif
+       do_tty_hangup((void *) tty);
+}
+
+int tty_hung_up_p(struct file * filp)
+{
+       return (filp->f_op == &hung_up_tty_fops);
+}
+
+/*
+ * This function is typically called only by the session leader, when
+ * it wants to disassociate itself from its controlling tty.
+ *
+ * It performs the following functions:
+ *     (1)  Sends a SIGHUP and SIGCONT to the foreground process group
+ *     (2)  Clears the tty from being controlling the session
+ *     (3)  Clears the controlling tty for all processes in the
+ *             session group.
+ *
+ * The argument on_exit is set to 1 if called when a process is
+ * exiting; it is 0 if called by the ioctl TIOCNOTTY.
+ */
+void disassociate_ctty(int on_exit)
+{
+       struct tty_struct *tty = current->tty;
+       struct task_struct *p;
+       int tty_pgrp = -1;
+
+       if (tty) {
+               tty_pgrp = tty->pgrp;
+               if (on_exit && tty->driver.type != TTY_DRIVER_TYPE_PTY)
+                       tty_vhangup(tty);
+       } else {
+               if (current->tty_old_pgrp) {
+                       kill_pg(current->tty_old_pgrp, SIGHUP, on_exit);
+                       kill_pg(current->tty_old_pgrp, SIGCONT, on_exit);
+               }
+               return;
+       }
+       if (tty_pgrp > 0) {
+               kill_pg(tty_pgrp, SIGHUP, on_exit);
+               if (!on_exit)
+                       kill_pg(tty_pgrp, SIGCONT, on_exit);
+       }
+
+       current->tty_old_pgrp = 0;
+       tty->session = 0;
+       tty->pgrp = -1;
+
+       read_lock(&tasklist_lock);
+       for_each_task(p)
+               if (p->session == current->session)
+                       p->tty = NULL;
+       read_unlock(&tasklist_lock);
+}
+
+void wait_for_keypress(void)
+{
+        struct console *c = console_drivers;
+        if (c) c->wait_key(c);
+}
+
+void stop_tty(struct tty_struct *tty)
+{
+       if (tty->stopped)
+               return;
+       tty->stopped = 1;
+       if (tty->link && tty->link->packet) {
+               tty->ctrl_status &= ~TIOCPKT_START;
+               tty->ctrl_status |= TIOCPKT_STOP;
+               wake_up_interruptible(&tty->link->read_wait);
+       }
+       if (tty->driver.stop)
+               (tty->driver.stop)(tty);
+}
+
+void start_tty(struct tty_struct *tty)
+{
+       if (!tty->stopped || tty->flow_stopped)
+               return;
+       tty->stopped = 0;
+       if (tty->link && tty->link->packet) {
+               tty->ctrl_status &= ~TIOCPKT_STOP;
+               tty->ctrl_status |= TIOCPKT_START;
+               wake_up_interruptible(&tty->link->read_wait);
+       }
+       if (tty->driver.start)
+               (tty->driver.start)(tty);
+       if ((test_bit(TTY_DO_WRITE_WAKEUP, &tty->flags)) &&
+           tty->ldisc.write_wakeup)
+               (tty->ldisc.write_wakeup)(tty);
+       wake_up_interruptible(&tty->write_wait);
+}
+
+static ssize_t tty_read(struct file * file, char * buf, size_t count, 
+                       loff_t *ppos)
+{
+       int i;
+       struct tty_struct * tty;
+       struct inode *inode;
+
+       /* Can't seek (pread) on ttys.  */
+       if (ppos != &file->f_pos)
+               return -ESPIPE;
+
+       tty = (struct tty_struct *)file->private_data;
+       inode = file->f_dentry->d_inode;
+       if (tty_paranoia_check(tty, inode->i_rdev, "tty_read"))
+               return -EIO;
+       if (!tty || (test_bit(TTY_IO_ERROR, &tty->flags)))
+               return -EIO;
+
+       /* This check not only needs to be done before reading, but also
+          whenever read_chan() gets woken up after sleeping, so I've
+          moved it to there.  This should only be done for the N_TTY
+          line discipline, anyway.  Same goes for write_chan(). -- jlc. */
+#if 0
+       if ((inode->i_rdev != CONSOLE_DEV) && /* don't stop on /dev/console */
+           (tty->pgrp > 0) &&
+           (current->tty == tty) &&
+           (tty->pgrp != current->pgrp))
+               if (is_ignored(SIGTTIN) || is_orphaned_pgrp(current->pgrp))
+                       return -EIO;
+               else {
+                       (void) kill_pg(current->pgrp, SIGTTIN, 1);
+                       return -ERESTARTSYS;
+               }
+#endif
+       lock_kernel();
+       if (tty->ldisc.read)
+               i = (tty->ldisc.read)(tty,file,buf,count);
+       else
+               i = -EIO;
+       unlock_kernel();
+       if (i > 0)
+               inode->i_atime = CURRENT_TIME;
+       return i;
+}
+
+/*
+ * Split writes up in sane blocksizes to avoid
+ * denial-of-service type attacks
+ */
+static inline ssize_t do_tty_write(
+       ssize_t (*write)(struct tty_struct *, struct file *, const unsigned char *, size_t),
+       struct tty_struct *tty,
+       struct file *file,
+       const unsigned char *buf,
+       size_t count)
+{
+       ssize_t ret = 0, written = 0;
+       
+       if (down_interruptible(&tty->atomic_write)) {
+               return -ERESTARTSYS;
+       }
+       if ( test_bit(TTY_NO_WRITE_SPLIT, &tty->flags) ) {
+               lock_kernel();
+               written = write(tty, file, buf, count);
+               unlock_kernel();
+       } else {
+               for (;;) {
+                       unsigned long size = MAX(PAGE_SIZE*2,16384);
+                       if (size > count)
+                               size = count;
+                       lock_kernel();
+                       ret = write(tty, file, buf, size);
+                       unlock_kernel();
+                       if (ret <= 0)
+                               break;
+                       written += ret;
+                       buf += ret;
+                       count -= ret;
+                       if (!count)
+                               break;
+                       ret = -ERESTARTSYS;
+                       if (signal_pending(current))
+                               break;
+                       if (current->need_resched)
+                               schedule();
+               }
+       }
+       if (written) {
+               file->f_dentry->d_inode->i_mtime = CURRENT_TIME;
+               ret = written;
+       }
+       up(&tty->atomic_write);
+       return ret;
+}
+
+
+static ssize_t tty_write(struct file * file, const char * buf, size_t count,
+                        loff_t *ppos)
+{
+       int is_console;
+       struct tty_struct * tty;
+       struct inode *inode;
+
+       /* Can't seek (pwrite) on ttys.  */
+       if (ppos != &file->f_pos)
+               return -ESPIPE;
+
+       /*
+        *      For now, we redirect writes from /dev/console as
+        *      well as /dev/tty0.
+        */
+       inode = file->f_dentry->d_inode;
+       is_console = (inode->i_rdev == SYSCONS_DEV ||
+                     inode->i_rdev == CONSOLE_DEV);
+
+       if (is_console && redirect)
+               tty = redirect;
+       else
+               tty = (struct tty_struct *)file->private_data;
+       if (tty_paranoia_check(tty, inode->i_rdev, "tty_write"))
+               return -EIO;
+       if (!tty || !tty->driver.write || (test_bit(TTY_IO_ERROR, &tty->flags)))
+               return -EIO;
+#if 0
+       if (!is_console && L_TOSTOP(tty) && (tty->pgrp > 0) &&
+           (current->tty == tty) && (tty->pgrp != current->pgrp)) {
+               if (is_orphaned_pgrp(current->pgrp))
+                       return -EIO;
+               if (!is_ignored(SIGTTOU)) {
+                       (void) kill_pg(current->pgrp, SIGTTOU, 1);
+                       return -ERESTARTSYS;
+               }
+       }
+#endif
+       if (!tty->ldisc.write)
+               return -EIO;
+       return do_tty_write(tty->ldisc.write, tty, file,
+                           (const unsigned char *)buf, count);
+}
+
+/* Semaphore to protect creating and releasing a tty */
+static DECLARE_MUTEX(tty_sem);
+
+static void down_tty_sem(int index)
+{
+       down(&tty_sem);
+}
+
+static void up_tty_sem(int index)
+{
+       up(&tty_sem);
+}
+
+static void release_mem(struct tty_struct *tty, int idx);
+
+/*
+ * WSH 06/09/97: Rewritten to remove races and properly clean up after a
+ * failed open.  The new code protects the open with a semaphore, so it's
+ * really quite straightforward.  The semaphore locking can probably be
+ * relaxed for the (most common) case of reopening a tty.
+ */
+static int init_dev(kdev_t device, struct tty_struct **ret_tty)
+{
+       struct tty_struct *tty, *o_tty;
+       struct termios *tp, **tp_loc, *o_tp, **o_tp_loc;
+       struct termios *ltp, **ltp_loc, *o_ltp, **o_ltp_loc;
+       struct tty_driver *driver;      
+       int retval=0;
+       int idx;
+
+       driver = get_tty_driver(device);
+       if (!driver)
+               return -ENODEV;
+
+       idx = MINOR(device) - driver->minor_start;
+
+       /* 
+        * Check whether we need to acquire the tty semaphore to avoid
+        * race conditions.  For now, play it safe.
+        */
+       down_tty_sem(idx);
+
+       /* check whether we're reopening an existing tty */
+       tty = driver->table[idx];
+       if (tty) goto fast_track;
+
+       /*
+        * First time open is complex, especially for PTY devices.
+        * This code guarantees that either everything succeeds and the
+        * TTY is ready for operation, or else the table slots are vacated
+        * and the allocated memory released.  (Except that the termios 
+        * and locked termios may be retained.)
+        */
+
+       o_tty = NULL;
+       tp = o_tp = NULL;
+       ltp = o_ltp = NULL;
+
+       tty = alloc_tty_struct();
+       if(!tty)
+               goto fail_no_mem;
+       initialize_tty_struct(tty);
+       tty->device = device;
+       tty->driver = *driver;
+
+       tp_loc = &driver->termios[idx];
+       if (!*tp_loc) {
+               tp = (struct termios *) kmalloc(sizeof(struct termios),
+                                               GFP_KERNEL);
+               if (!tp)
+                       goto free_mem_out;
+               *tp = driver->init_termios;
+       }
+
+       ltp_loc = &driver->termios_locked[idx];
+       if (!*ltp_loc) {
+               ltp = (struct termios *) kmalloc(sizeof(struct termios),
+                                                GFP_KERNEL);
+               if (!ltp)
+                       goto free_mem_out;
+               memset(ltp, 0, sizeof(struct termios));
+       }
+
+       if (driver->type == TTY_DRIVER_TYPE_PTY) {
+               o_tty = alloc_tty_struct();
+               if (!o_tty)
+                       goto free_mem_out;
+               initialize_tty_struct(o_tty);
+               o_tty->device = (kdev_t) MKDEV(driver->other->major,
+                                       driver->other->minor_start + idx);
+               o_tty->driver = *driver->other;
+
+               o_tp_loc  = &driver->other->termios[idx];
+               if (!*o_tp_loc) {
+                       o_tp = (struct termios *)
+                               kmalloc(sizeof(struct termios), GFP_KERNEL);
+                       if (!o_tp)
+                               goto free_mem_out;
+                       *o_tp = driver->other->init_termios;
+               }
+
+               o_ltp_loc = &driver->other->termios_locked[idx];
+               if (!*o_ltp_loc) {
+                       o_ltp = (struct termios *)
+                               kmalloc(sizeof(struct termios), GFP_KERNEL);
+                       if (!o_ltp)
+                               goto free_mem_out;
+                       memset(o_ltp, 0, sizeof(struct termios));
+               }
+
+               /*
+                * Everything allocated ... set up the o_tty structure.
+                */
+               driver->other->table[idx] = o_tty;
+               if (!*o_tp_loc)
+                       *o_tp_loc = o_tp;
+               if (!*o_ltp_loc)
+                       *o_ltp_loc = o_ltp;
+               o_tty->termios = *o_tp_loc;
+               o_tty->termios_locked = *o_ltp_loc;
+               (*driver->other->refcount)++;
+               if (driver->subtype == PTY_TYPE_MASTER)
+                       o_tty->count++;
+
+               /* Establish the links in both directions */
+               tty->link   = o_tty;
+               o_tty->link = tty;
+       }
+
+       /* 
+        * All structures have been allocated, so now we install them.
+        * Failures after this point use release_mem to clean up, so 
+        * there's no need to null out the local pointers.
+        */
+       driver->table[idx] = tty;
+       
+       if (!*tp_loc)
+               *tp_loc = tp;
+       if (!*ltp_loc)
+               *ltp_loc = ltp;
+       tty->termios = *tp_loc;
+       tty->termios_locked = *ltp_loc;
+       (*driver->refcount)++;
+       tty->count++;
+
+       /* 
+        * Structures all installed ... call the ldisc open routines.
+        * If we fail here just call release_mem to clean up.  No need
+        * to decrement the use counts, as release_mem doesn't care.
+        */
+       if (tty->ldisc.open) {
+               retval = (tty->ldisc.open)(tty);
+               if (retval)
+                       goto release_mem_out;
+       }
+       if (o_tty && o_tty->ldisc.open) {
+               retval = (o_tty->ldisc.open)(o_tty);
+               if (retval) {
+                       if (tty->ldisc.close)
+                               (tty->ldisc.close)(tty);
+                       goto release_mem_out;
+               }
+       }
+       goto success;
+
+       /*
+        * This fast open can be used if the tty is already open.
+        * No memory is allocated, and the only failures are from
+        * attempting to open a closing tty or attempting multiple
+        * opens on a pty master.
+        */
+fast_track:
+       if (test_bit(TTY_CLOSING, &tty->flags)) {
+               retval = -EIO;
+               goto end_init;
+       }
+       if (driver->type == TTY_DRIVER_TYPE_PTY &&
+           driver->subtype == PTY_TYPE_MASTER) {
+               /*
+                * special case for PTY masters: only one open permitted, 
+                * and the slave side open count is incremented as well.
+                */
+               if (tty->count) {
+                       retval = -EIO;
+                       goto end_init;
+               }
+               tty->link->count++;
+       }
+       tty->count++;
+       tty->driver = *driver; /* N.B. why do this every time?? */
+
+success:
+       *ret_tty = tty;
+       
+       /* All paths come through here to release the semaphore */
+end_init:
+       up_tty_sem(idx);
+       return retval;
+
+       /* Release locally allocated memory ... nothing placed in slots */
+free_mem_out:
+       if (o_tp)
+               kfree(o_tp);
+       if (o_tty)
+               free_tty_struct(o_tty);
+       if (ltp)
+               kfree(ltp);
+       if (tp)
+               kfree(tp);
+       free_tty_struct(tty);
+
+fail_no_mem:
+       retval = -ENOMEM;
+       goto end_init;
+
+       /* call the tty release_mem routine to clean out this slot */
+release_mem_out:
+       printk(KERN_INFO "init_dev: ldisc open failed, "
+                        "clearing slot %d\n", idx);
+       release_mem(tty, idx);
+       goto end_init;
+}
+
+/*
+ * Releases memory associated with a tty structure, and clears out the
+ * driver table slots.
+ */
+static void release_mem(struct tty_struct *tty, int idx)
+{
+       struct tty_struct *o_tty;
+       struct termios *tp;
+
+       if ((o_tty = tty->link) != NULL) {
+               o_tty->driver.table[idx] = NULL;
+               if (o_tty->driver.flags & TTY_DRIVER_RESET_TERMIOS) {
+                       tp = o_tty->driver.termios[idx];
+                       o_tty->driver.termios[idx] = NULL;
+                       kfree(tp);
+               }
+               o_tty->magic = 0;
+               (*o_tty->driver.refcount)--;
+               list_del(&o_tty->tty_files);
+               free_tty_struct(o_tty);
+       }
+
+       tty->driver.table[idx] = NULL;
+       if (tty->driver.flags & TTY_DRIVER_RESET_TERMIOS) {
+               tp = tty->driver.termios[idx];
+               tty->driver.termios[idx] = NULL;
+               kfree(tp);
+       }
+       tty->magic = 0;
+       (*tty->driver.refcount)--;
+       list_del(&tty->tty_files);
+       free_tty_struct(tty);
+}
+
+/*
+ * Even releasing the tty structures is a tricky business.. We have
+ * to be very careful that the structures are all released at the
+ * same time, as interrupts might otherwise get the wrong pointers.
+ *
+ * WSH 09/09/97: rewritten to avoid some nasty race conditions that could
+ * lead to double frees or releasing memory still in use.
+ */
+static void release_dev(struct file * filp)
+{
+       struct tty_struct *tty, *o_tty;
+       int     pty_master, tty_closing, o_tty_closing, do_sleep;
+       int     idx;
+       char    buf[64];
+       
+       tty = (struct tty_struct *)filp->private_data;
+       if (tty_paranoia_check(tty, filp->f_dentry->d_inode->i_rdev, "release_dev"))
+               return;
+
+       check_tty_count(tty, "release_dev");
+
+       tty_fasync(-1, filp, 0);
+
+       idx = MINOR(tty->device) - tty->driver.minor_start;
+       pty_master = (tty->driver.type == TTY_DRIVER_TYPE_PTY &&
+                     tty->driver.subtype == PTY_TYPE_MASTER);
+       o_tty = tty->link;
+
+#ifdef TTY_PARANOIA_CHECK
+       if (idx < 0 || idx >= tty->driver.num) {
+               printk(KERN_DEBUG "release_dev: bad idx when trying to "
+                                 "free (%s)\n", kdevname(tty->device));
+               return;
+       }
+       if (tty != tty->driver.table[idx]) {
+               printk(KERN_DEBUG "release_dev: driver.table[%d] not tty "
+                                 "for (%s)\n", idx, kdevname(tty->device));
+               return;
+       }
+       if (tty->termios != tty->driver.termios[idx]) {
+               printk(KERN_DEBUG "release_dev: driver.termios[%d] not termios "
+                      "for (%s)\n",
+                      idx, kdevname(tty->device));
+               return;
+       }
+       if (tty->termios_locked != tty->driver.termios_locked[idx]) {
+               printk(KERN_DEBUG "release_dev: driver.termios_locked[%d] not "
+                      "termios_locked for (%s)\n",
+                      idx, kdevname(tty->device));
+               return;
+       }
+#endif
+
+#ifdef TTY_DEBUG_HANGUP
+       printk(KERN_DEBUG "release_dev of %s (tty count=%d)...",
+              tty_name(tty, buf), tty->count);
+#endif
+
+#ifdef TTY_PARANOIA_CHECK
+       if (tty->driver.other) {
+               if (o_tty != tty->driver.other->table[idx]) {
+                       printk(KERN_DEBUG "release_dev: other->table[%d] "
+                                         "not o_tty for (%s)\n",
+                              idx, kdevname(tty->device));
+                       return;
+               }
+               if (o_tty->termios != tty->driver.other->termios[idx]) {
+                       printk(KERN_DEBUG "release_dev: other->termios[%d] "
+                                         "not o_termios for (%s)\n",
+                              idx, kdevname(tty->device));
+                       return;
+               }
+               if (o_tty->termios_locked != 
+                     tty->driver.other->termios_locked[idx]) {
+                       printk(KERN_DEBUG "release_dev: other->termios_locked["
+                                         "%d] not o_termios_locked for (%s)\n",
+                              idx, kdevname(tty->device));
+                       return;
+               }
+               if (o_tty->link != tty) {
+                       printk(KERN_DEBUG "release_dev: bad pty pointers\n");
+                       return;
+               }
+       }
+#endif
+
+       if (tty->driver.close)
+               tty->driver.close(tty, filp);
+
+       /*
+        * Sanity check: if tty->count is going to zero, there shouldn't be
+        * any waiters on tty->read_wait or tty->write_wait.  We test the
+        * wait queues and kick everyone out _before_ actually starting to
+        * close.  This ensures that we won't block while releasing the tty
+        * structure.
+        *
+        * The test for the o_tty closing is necessary, since the master and
+        * slave sides may close in any order.  If the slave side closes out
+        * first, its count will be one, since the master side holds an open.
+        * Thus this test wouldn't be triggered at the time the slave closes,
+        * so we do it now.
+        *
+        * Note that it's possible for the tty to be opened again while we're
+        * flushing out waiters.  By recalculating the closing flags before
+        * each iteration we avoid any problems.
+        */
+       while (1) {
+               tty_closing = tty->count <= 1;
+               o_tty_closing = o_tty &&
+                       (o_tty->count <= (pty_master ? 1 : 0));
+               do_sleep = 0;
+
+               if (tty_closing) {
+                       if (waitqueue_active(&tty->read_wait)) {
+                               wake_up(&tty->read_wait);
+                               do_sleep++;
+                       }
+                       if (waitqueue_active(&tty->write_wait)) {
+                               wake_up(&tty->write_wait);
+                               do_sleep++;
+                       }
+               }
+               if (o_tty_closing) {
+                       if (waitqueue_active(&o_tty->read_wait)) {
+                               wake_up(&o_tty->read_wait);
+                               do_sleep++;
+                       }
+                       if (waitqueue_active(&o_tty->write_wait)) {
+                               wake_up(&o_tty->write_wait);
+                               do_sleep++;
+                       }
+               }
+               if (!do_sleep)
+                       break;
+
+               printk(KERN_WARNING "release_dev: %s: read/write wait queue "
+                                   "active!\n", tty_name(tty, buf));
+               schedule();
+       }       
+
+       /*
+        * The closing flags are now consistent with the open counts on 
+        * both sides, and we've completed the last operation that could 
+        * block, so it's safe to proceed with closing.
+        */
+       if (pty_master) {
+               if (--o_tty->count < 0) {
+                       printk(KERN_WARNING "release_dev: bad pty slave count "
+                                           "(%d) for %s\n",
+                              o_tty->count, tty_name(o_tty, buf));
+                       o_tty->count = 0;
+               }
+       }
+       if (--tty->count < 0) {
+               printk(KERN_WARNING "release_dev: bad tty->count (%d) for %s\n",
+                      tty->count, tty_name(tty, buf));
+               tty->count = 0;
+       }
+
+       /*
+        * We've decremented tty->count, so we should zero out
+        * filp->private_data, to break the link between the tty and
+        * the file descriptor.  Otherwise if filp_close() blocks before
+        * the file descriptor is removed from the inuse_filp
+        * list, check_tty_count() could observe a discrepancy and
+        * printk a warning message to the user.
+        */
+       filp->private_data = 0;
+
+       /*
+        * Perform some housekeeping before deciding whether to return.
+        *
+        * Set the TTY_CLOSING flag if this was the last open.  In the
+        * case of a pty we may have to wait around for the other side
+        * to close, and TTY_CLOSING makes sure we can't be reopened.
+        */
+       if(tty_closing)
+               set_bit(TTY_CLOSING, &tty->flags);
+       if(o_tty_closing)
+               set_bit(TTY_CLOSING, &o_tty->flags);
+
+       /*
+        * If _either_ side is closing, make sure there aren't any
+        * processes that still think tty or o_tty is their controlling
+        * tty.  Also, clear redirect if it points to either tty.
+        */
+       if (tty_closing || o_tty_closing) {
+               struct task_struct *p;
+
+               read_lock(&tasklist_lock);
+               for_each_task(p) {
+                       if (p->tty == tty || (o_tty && p->tty == o_tty))
+                               p->tty = NULL;
+               }
+               read_unlock(&tasklist_lock);
+
+               if (redirect == tty || (o_tty && redirect == o_tty))
+                       redirect = NULL;
+       }
+
+       /* check whether both sides are closing ... */
+       if (!tty_closing || (o_tty && !o_tty_closing))
+               return;
+       
+#ifdef TTY_DEBUG_HANGUP
+       printk(KERN_DEBUG "freeing tty structure...");
+#endif
+
+       /*
+        * Shutdown the current line discipline, and reset it to N_TTY.
+        * N.B. why reset ldisc when we're releasing the memory??
+        */
+       if (tty->ldisc.close)
+               (tty->ldisc.close)(tty);
+       tty->ldisc = ldiscs[N_TTY];
+       tty->termios->c_line = N_TTY;
+       if (o_tty) {
+               if (o_tty->ldisc.close)
+                       (o_tty->ldisc.close)(o_tty);
+               o_tty->ldisc = ldiscs[N_TTY];
+       }
+       
+       /*
+        * Make sure that the tty's task queue isn't activated. 
+        */
+       run_task_queue(&tq_timer);
+       flush_scheduled_tasks();
+
+       /* 
+        * The release_mem function takes care of the details of clearing
+        * the slots and preserving the termios structure.
+        */
+       release_mem(tty, idx);
+}
+
+/*
+ * tty_open and tty_release keep up the tty count that contains the
+ * number of opens done on a tty. We cannot use the inode-count, as
+ * different inodes might point to the same tty.
+ *
+ * Open-counting is needed for pty masters, as well as for keeping
+ * track of serial lines: DTR is dropped when the last close happens.
+ * (This is not done solely through tty->count, now.  - Ted 1/27/92)
+ *
+ * The termios state of a pty is reset on first open so that
+ * settings don't persist across reuse.
+ */
+static int tty_open(struct inode * inode, struct file * filp)
+{
+       struct tty_struct *tty;
+       int noctty, retval;
+       kdev_t device;
+       unsigned short saved_flags;
+       char    buf[64];
+
+       saved_flags = filp->f_flags;
+retry_open:
+       noctty = filp->f_flags & O_NOCTTY;
+       device = inode->i_rdev;
+       if (device == TTY_DEV) {
+               if (!current->tty)
+                       return -ENXIO;
+               device = current->tty->device;
+               filp->f_flags |= O_NONBLOCK; /* Don't let /dev/tty block */
+               /* noctty = 1; */
+       }
+#ifdef CONFIG_VT
+       if (device == CONSOLE_DEV) {
+               extern int fg_console;
+               device = MKDEV(TTY_MAJOR, fg_console + 1);
+               noctty = 1;
+       }
+#endif
+       if (device == SYSCONS_DEV) {
+               struct console *c = console_drivers;
+               while(c && !c->device)
+                       c = c->next;
+               if (!c)
+                        return -ENODEV;
+                device = c->device(c);
+               filp->f_flags |= O_NONBLOCK; /* Don't let /dev/console block */
+               noctty = 1;
+       }
+
+       if (device == PTMX_DEV) {
+#ifdef CONFIG_UNIX98_PTYS
+
+               /* find a free pty. */
+               int major, minor;
+               struct tty_driver *driver;
+
+               /* find a device that is not in use. */
+               retval = -1;
+               for ( major = 0 ; major < UNIX98_NR_MAJORS ; major++ ) {
+                       driver = &ptm_driver[major];
+                       for (minor = driver->minor_start ;
+                            minor < driver->minor_start + driver->num ;
+                            minor++) {
+                               device = MKDEV(driver->major, minor);
+                               if (!init_dev(device, &tty)) goto ptmx_found; /* ok! */
+                       }
+               }
+               return -EIO; /* no free ptys */
+       ptmx_found:
+               set_bit(TTY_PTY_LOCK, &tty->flags); /* LOCK THE SLAVE */
+               minor -= driver->minor_start;
+               devpts_pty_new(driver->other->name_base + minor, MKDEV(driver->other->major, minor + driver->other->minor_start));
+               tty_register_devfs(&pts_driver[major], DEVFS_FL_NO_PERSISTENCE,
+                                  pts_driver[major].minor_start + minor);
+               noctty = 1;
+               goto init_dev_done;
+
+#else   /* CONFIG_UNIX_98_PTYS */
+
+               return -ENODEV;
+
+#endif  /* CONFIG_UNIX_98_PTYS */
+       }
+
+       retval = init_dev(device, &tty);
+       if (retval)
+               return retval;
+
+#ifdef CONFIG_UNIX98_PTYS
+init_dev_done:
+#endif
+       filp->private_data = tty;
+       file_move(filp, &tty->tty_files);
+       check_tty_count(tty, "tty_open");
+       if (tty->driver.type == TTY_DRIVER_TYPE_PTY &&
+           tty->driver.subtype == PTY_TYPE_MASTER)
+               noctty = 1;
+#ifdef TTY_DEBUG_HANGUP
+       printk(KERN_DEBUG "opening %s...", tty_name(tty, buf));
+#endif
+       if (tty->driver.open)
+               retval = tty->driver.open(tty, filp);
+       else
+               retval = -ENODEV;
+       filp->f_flags = saved_flags;
+
+       if (!retval && test_bit(TTY_EXCLUSIVE, &tty->flags) && !suser())
+               retval = -EBUSY;
+
+       if (retval) {
+#ifdef TTY_DEBUG_HANGUP
+               printk(KERN_DEBUG "error %d in opening %s...", retval,
+                      tty_name(tty, buf));
+#endif
+
+               release_dev(filp);
+               if (retval != -ERESTARTSYS)
+                       return retval;
+               if (signal_pending(current))
+                       return retval;
+               schedule();
+               /*
+                * Need to reset f_op in case a hangup happened.
+                */
+               filp->f_op = &tty_fops;
+               goto retry_open;
+       }
+       if (!noctty &&
+           current->leader &&
+           !current->tty &&
+           tty->session == 0) {
+               task_lock(current);
+               current->tty = tty;
+               task_unlock(current);
+               current->tty_old_pgrp = 0;
+               tty->session = current->session;
+               tty->pgrp = current->pgrp;
+       }
+       if ((tty->driver.type == TTY_DRIVER_TYPE_SERIAL) &&
+           (tty->driver.subtype == SERIAL_TYPE_CALLOUT) &&
+           (tty->count == 1)) {
+               static int nr_warns;
+               if (nr_warns < 5) {
+                       printk(KERN_WARNING "tty_io.c: "
+                               "process %d (%s) used obsolete /dev/%s - "
+                               "update software to use /dev/ttyS%d\n",
+                               current->pid, current->comm,
+                               tty_name(tty, buf), TTY_NUMBER(tty));
+                       nr_warns++;
+               }
+       }
+       return 0;
+}
+
+static int tty_release(struct inode * inode, struct file * filp)
+{
+       lock_kernel();
+       release_dev(filp);
+       unlock_kernel();
+       return 0;
+}
+
+/* No kernel lock held - fine */
+static unsigned int tty_poll(struct file * filp, poll_table * wait)
+{
+       struct tty_struct * tty;
+
+       tty = (struct tty_struct *)filp->private_data;
+       if (tty_paranoia_check(tty, filp->f_dentry->d_inode->i_rdev, "tty_poll"))
+               return 0;
+
+       if (tty->ldisc.poll)
+               return (tty->ldisc.poll)(tty, filp, wait);
+       return 0;
+}
+
+static int tty_fasync(int fd, struct file * filp, int on)
+{
+       struct tty_struct * tty;
+       int retval;
+
+       tty = (struct tty_struct *)filp->private_data;
+       if (tty_paranoia_check(tty, filp->f_dentry->d_inode->i_rdev, "tty_fasync"))
+               return 0;
+       
+       retval = fasync_helper(fd, filp, on, &tty->fasync);
+       if (retval <= 0)
+               return retval;
+
+       if (on) {
+               if (!waitqueue_active(&tty->read_wait))
+                       tty->minimum_to_wake = 1;
+               if (filp->f_owner.pid == 0) {
+                       filp->f_owner.pid = (-tty->pgrp) ? : current->pid;
+                       filp->f_owner.uid = current->uid;
+                       filp->f_owner.euid = current->euid;
+               }
+       } else {
+               if (!tty->fasync && !waitqueue_active(&tty->read_wait))
+                       tty->minimum_to_wake = N_TTY_BUF_SIZE;
+       }
+       return 0;
+}
+
+static int tiocsti(struct tty_struct *tty, char * arg)
+{
+       char ch, mbz = 0;
+
+       if ((current->tty != tty) && !suser())
+               return -EPERM;
+       if (get_user(ch, arg))
+               return -EFAULT;
+       tty->ldisc.receive_buf(tty, &ch, &mbz, 1);
+       return 0;
+}
+
+static int tiocgwinsz(struct tty_struct *tty, struct winsize * arg)
+{
+       if (copy_to_user(arg, &tty->winsize, sizeof(*arg)))
+               return -EFAULT;
+       return 0;
+}
+
+static int tiocswinsz(struct tty_struct *tty, struct tty_struct *real_tty,
+       struct winsize * arg)
+{
+       struct winsize tmp_ws;
+
+       if (copy_from_user(&tmp_ws, arg, sizeof(*arg)))
+               return -EFAULT;
+       if (!memcmp(&tmp_ws, &tty->winsize, sizeof(*arg)))
+               return 0;
+       if (tty->pgrp > 0)
+               kill_pg(tty->pgrp, SIGWINCH, 1);
+       if ((real_tty->pgrp != tty->pgrp) && (real_tty->pgrp > 0))
+               kill_pg(real_tty->pgrp, SIGWINCH, 1);
+       tty->winsize = tmp_ws;
+       real_tty->winsize = tmp_ws;
+       return 0;
+}
+
+static int tioccons(struct inode *inode,
+       struct tty_struct *tty, struct tty_struct *real_tty)
+{
+       if (inode->i_rdev == SYSCONS_DEV ||
+           inode->i_rdev == CONSOLE_DEV) {
+               if (!suser())
+                       return -EPERM;
+               redirect = NULL;
+               return 0;
+       }
+       if (redirect)
+               return -EBUSY;
+       redirect = real_tty;
+       return 0;
+}
+
+
+static int fionbio(struct file *file, int *arg)
+{
+       int nonblock;
+
+       if (get_user(nonblock, arg))
+               return -EFAULT;
+
+       if (nonblock)
+               file->f_flags |= O_NONBLOCK;
+       else
+               file->f_flags &= ~O_NONBLOCK;
+       return 0;
+}
+
+static int tiocsctty(struct tty_struct *tty, int arg)
+{
+       if (current->leader &&
+           (current->session == tty->session))
+               return 0;
+       /*
+        * The process must be a session leader and
+        * not have a controlling tty already.
+        */
+       if (!current->leader || current->tty)
+               return -EPERM;
+       if (tty->session > 0) {
+               /*
+                * This tty is already the controlling
+                * tty for another session group!
+                */
+               if ((arg == 1) && suser()) {
+                       /*
+                        * Steal it away
+                        */
+                       struct task_struct *p;
+
+                       read_lock(&tasklist_lock);
+                       for_each_task(p)
+                               if (p->tty == tty)
+                                       p->tty = NULL;
+                       read_unlock(&tasklist_lock);
+               } else
+                       return -EPERM;
+       }
+       task_lock(current);
+       current->tty = tty;
+       task_unlock(current);
+       current->tty_old_pgrp = 0;
+       tty->session = current->session;
+       tty->pgrp = current->pgrp;
+       return 0;
+}
+
+static int tiocgpgrp(struct tty_struct *tty, struct tty_struct *real_tty, pid_t *arg)
+{
+       /*
+        * (tty == real_tty) is a cheap way of
+        * testing if the tty is NOT a master pty.
+        */
+       if (tty == real_tty && current->tty != real_tty)
+               return -ENOTTY;
+       return put_user(real_tty->pgrp, arg);
+}
+
+static int tiocspgrp(struct tty_struct *tty, struct tty_struct *real_tty, pid_t *arg)
+{
+       pid_t pgrp;
+       int retval = tty_check_change(real_tty);
+
+       if (retval == -EIO)
+               return -ENOTTY;
+       if (retval)
+               return retval;
+       if (!current->tty ||
+           (current->tty != real_tty) ||
+           (real_tty->session != current->session))
+               return -ENOTTY;
+       if (get_user(pgrp, (pid_t *) arg))
+               return -EFAULT;
+       if (pgrp < 0)
+               return -EINVAL;
+       if (session_of_pgrp(pgrp) != current->session)
+               return -EPERM;
+       real_tty->pgrp = pgrp;
+       return 0;
+}
+
+static int tiocgsid(struct tty_struct *tty, struct tty_struct *real_tty, pid_t *arg)
+{
+       /*
+        * (tty == real_tty) is a cheap way of
+        * testing if the tty is NOT a master pty.
+       */
+       if (tty == real_tty && current->tty != real_tty)
+               return -ENOTTY;
+       if (real_tty->session <= 0)
+               return -ENOTTY;
+       return put_user(real_tty->session, arg);
+}
+
+static int tiocttygstruct(struct tty_struct *tty, struct tty_struct *arg)
+{
+       if (copy_to_user(arg, tty, sizeof(*arg)))
+               return -EFAULT;
+       return 0;
+}
+
+static int tiocsetd(struct tty_struct *tty, int *arg)
+{
+       int ldisc;
+
+       if (get_user(ldisc, arg))
+               return -EFAULT;
+       return tty_set_ldisc(tty, ldisc);
+}
+
+static int send_break(struct tty_struct *tty, int duration)
+{
+       set_current_state(TASK_INTERRUPTIBLE);
+
+       tty->driver.break_ctl(tty, -1);
+       if (!signal_pending(current))
+               schedule_timeout(duration);
+       tty->driver.break_ctl(tty, 0);
+       if (signal_pending(current))
+               return -EINTR;
+       return 0;
+}
+
+/*
+ * Split this up, as gcc can choke on it otherwise..
+ */
+int tty_ioctl(struct inode * inode, struct file * file,
+             unsigned int cmd, unsigned long arg)
+{
+       struct tty_struct *tty, *real_tty;
+       int retval;
+       
+       tty = (struct tty_struct *)file->private_data;
+       if (tty_paranoia_check(tty, inode->i_rdev, "tty_ioctl"))
+               return -EINVAL;
+
+       real_tty = tty;
+       if (tty->driver.type == TTY_DRIVER_TYPE_PTY &&
+           tty->driver.subtype == PTY_TYPE_MASTER)
+               real_tty = tty->link;
+
+       /*
+        * Break handling by driver
+        */
+       if (!tty->driver.break_ctl) {
+               switch(cmd) {
+               case TIOCSBRK:
+               case TIOCCBRK:
+                       if (tty->driver.ioctl)
+                               return tty->driver.ioctl(tty, file, cmd, arg);
+                       return -EINVAL;
+                       
+               /* These two ioctl's always return success; even if */
+               /* the driver doesn't support them. */
+               case TCSBRK:
+               case TCSBRKP:
+                       if (!tty->driver.ioctl)
+                               return 0;
+                       retval = tty->driver.ioctl(tty, file, cmd, arg);
+                       if (retval == -ENOIOCTLCMD)
+                               retval = 0;
+                       return retval;
+               }
+       }
+
+       /*
+        * Factor out some common prep work
+        */
+       switch (cmd) {
+       case TIOCSETD:
+       case TIOCSBRK:
+       case TIOCCBRK:
+       case TCSBRK:
+       case TCSBRKP:                   
+               retval = tty_check_change(tty);
+               if (retval)
+                       return retval;
+               if (cmd != TIOCCBRK) {
+                       tty_wait_until_sent(tty, 0);
+                       if (signal_pending(current))
+                               return -EINTR;
+               }
+               break;
+       }
+
+       switch (cmd) {
+               case TIOCSTI:
+                       return tiocsti(tty, (char *)arg);
+               case TIOCGWINSZ:
+                       return tiocgwinsz(tty, (struct winsize *) arg);
+               case TIOCSWINSZ:
+                       return tiocswinsz(tty, real_tty, (struct winsize *) arg);
+               case TIOCCONS:
+                       return tioccons(inode, tty, real_tty);
+               case FIONBIO:
+                       return fionbio(file, (int *) arg);
+               case TIOCEXCL:
+                       set_bit(TTY_EXCLUSIVE, &tty->flags);
+                       return 0;
+               case TIOCNXCL:
+                       clear_bit(TTY_EXCLUSIVE, &tty->flags);
+                       return 0;
+               case TIOCNOTTY:
+                       if (current->tty != tty)
+                               return -ENOTTY;
+                       if (current->leader)
+                               disassociate_ctty(0);
+                       task_lock(current);
+                       current->tty = NULL;
+                       task_unlock(current);
+                       return 0;
+               case TIOCSCTTY:
+                       return tiocsctty(tty, arg);
+               case TIOCGPGRP:
+                       return tiocgpgrp(tty, real_tty, (pid_t *) arg);
+               case TIOCSPGRP:
+                       return tiocspgrp(tty, real_tty, (pid_t *) arg);
+               case TIOCGSID:
+                       return tiocgsid(tty, real_tty, (pid_t *) arg);
+               case TIOCGETD:
+                       return put_user(tty->ldisc.num, (int *) arg);
+               case TIOCSETD:
+                       return tiocsetd(tty, (int *) arg);
+#ifdef CONFIG_VT
+               case TIOCLINUX:
+                       return tioclinux(tty, arg);
+#endif
+               case TIOCTTYGSTRUCT:
+                       return tiocttygstruct(tty, (struct tty_struct *) arg);
+
+               /*
+                * Break handling
+                */
+               case TIOCSBRK:  /* Turn break on, unconditionally */
+                       tty->driver.break_ctl(tty, -1);
+                       return 0;
+                       
+               case TIOCCBRK:  /* Turn break off, unconditionally */
+                       tty->driver.break_ctl(tty, 0);
+                       return 0;
+               case TCSBRK:   /* SVID version: non-zero arg --> no break */
+                       /*
+                        * XXX is the above comment correct, or the
+                        * code below correct?  Is this ioctl used at
+                        * all by anyone?
+                        */
+                       if (!arg)
+                               return send_break(tty, HZ/4);
+                       return 0;
+               case TCSBRKP:   /* support for POSIX tcsendbreak() */   
+                       return send_break(tty, arg ? arg*(HZ/10) : HZ/4);
+       }
+       if (tty->driver.ioctl) {
+               int retval = (tty->driver.ioctl)(tty, file, cmd, arg);
+               if (retval != -ENOIOCTLCMD)
+                       return retval;
+       }
+       if (tty->ldisc.ioctl) {
+               int retval = (tty->ldisc.ioctl)(tty, file, cmd, arg);
+               if (retval != -ENOIOCTLCMD)
+                       return retval;
+       }
+       return -EINVAL;
+}
+
+
+/*
+ * This implements the "Secure Attention Key" ---  the idea is to
+ * prevent trojan horses by killing all processes associated with this
+ * tty when the user hits the "Secure Attention Key".  Required for
+ * super-paranoid applications --- see the Orange Book for more details.
+ * 
+ * This code could be nicer; ideally it should send a HUP, wait a few
+ * seconds, then send a INT, and then a KILL signal.  But you then
+ * have to coordinate with the init process, since all processes associated
+ * with the current tty must be dead before the new getty is allowed
+ * to spawn.
+ *
+ * Now, if it would be correct ;-/ The current code has a nasty hole -
+ * it doesn't catch files in flight. We may send the descriptor to ourselves
+ * via AF_UNIX socket, close it and later fetch from socket. FIXME.
+ *
+ * Nasty bug: do_SAK is being called in interrupt context.  This can
+ * deadlock.  We punt it up to process context.  AKPM - 16Mar2001
+ */
+static void __do_SAK(void *arg)
+{
+#ifdef TTY_SOFT_SAK
+       tty_hangup(tty);
+#else
+       struct tty_struct *tty = arg;
+       struct task_struct *p;
+       int session;
+       int             i;
+       struct file     *filp;
+       
+       if (!tty)
+               return;
+       session  = tty->session;
+       if (tty->ldisc.flush_buffer)
+               tty->ldisc.flush_buffer(tty);
+       if (tty->driver.flush_buffer)
+               tty->driver.flush_buffer(tty);
+       read_lock(&tasklist_lock);
+       for_each_task(p) {
+               if ((p->tty == tty) ||
+                   ((session > 0) && (p->session == session))) {
+                       send_sig(SIGKILL, p, 1);
+                       continue;
+               }
+               task_lock(p);
+               if (p->files) {
+                       read_lock(&p->files->file_lock);
+                       for (i=0; i < p->files->max_fds; i++) {
+                               filp = fcheck_files(p->files, i);
+                               if (filp && (filp->f_op == &tty_fops) &&
+                                   (filp->private_data == tty)) {
+                                       send_sig(SIGKILL, p, 1);
+                                       break;
+                               }
+                       }
+                       read_unlock(&p->files->file_lock);
+               }
+               task_unlock(p);
+       }
+       read_unlock(&tasklist_lock);
+#endif
+}
+
+/*
+ * The tq handling here is a little racy - tty->SAK_tq may already be queued.
+ * But there's no mechanism to fix that without futzing with tqueue_lock.
+ * Fortunately we don't need to worry, because if ->SAK_tq is already queued,
+ * the values which we write to it will be identical to the values which it
+ * already has. --akpm
+ */
+void do_SAK(struct tty_struct *tty)
+{
+       if (!tty)
+               return;
+       PREPARE_TQUEUE(&tty->SAK_tq, __do_SAK, tty);
+       schedule_task(&tty->SAK_tq);
+}
+
+/*
+ * This routine is called out of the software interrupt to flush data
+ * from the flip buffer to the line discipline.
+ */
+static void flush_to_ldisc(void *private_)
+{
+       struct tty_struct *tty = (struct tty_struct *) private_;
+       unsigned char   *cp;
+       char            *fp;
+       int             count;
+       unsigned long flags;
+
+       if (test_bit(TTY_DONT_FLIP, &tty->flags)) {
+               queue_task(&tty->flip.tqueue, &tq_timer);
+               return;
+       }
+       if (tty->flip.buf_num) {
+               cp = tty->flip.char_buf + TTY_FLIPBUF_SIZE;
+               fp = tty->flip.flag_buf + TTY_FLIPBUF_SIZE;
+               tty->flip.buf_num = 0;
+
+               save_flags(flags); cli();
+               tty->flip.char_buf_ptr = tty->flip.char_buf;
+               tty->flip.flag_buf_ptr = tty->flip.flag_buf;
+       } else {
+               cp = tty->flip.char_buf;
+               fp = tty->flip.flag_buf;
+               tty->flip.buf_num = 1;
+
+               save_flags(flags); cli();
+               tty->flip.char_buf_ptr = tty->flip.char_buf + TTY_FLIPBUF_SIZE;
+               tty->flip.flag_buf_ptr = tty->flip.flag_buf + TTY_FLIPBUF_SIZE;
+       }
+       count = tty->flip.count;
+       tty->flip.count = 0;
+       restore_flags(flags);
+       
+       tty->ldisc.receive_buf(tty, cp, fp, count);
+}
+
+/*
+ * Routine which returns the baud rate of the tty
+ *
+ * Note that the baud_table needs to be kept in sync with the
+ * include/asm/termbits.h file.
+ */
+static int baud_table[] = {
+       0, 50, 75, 110, 134, 150, 200, 300, 600, 1200, 1800, 2400, 4800,
+       9600, 19200, 38400, 57600, 115200, 230400, 460800,
+#ifdef __sparc__
+       76800, 153600, 307200, 614400, 921600
+#else
+       500000, 576000, 921600, 1000000, 1152000, 1500000, 2000000,
+       2500000, 3000000, 3500000, 4000000
+#endif
+};
+
+static int n_baud_table = sizeof(baud_table)/sizeof(int);
+
+int tty_get_baud_rate(struct tty_struct *tty)
+{
+       unsigned int cflag, i;
+
+       cflag = tty->termios->c_cflag;
+
+       i = cflag & CBAUD;
+       if (i & CBAUDEX) {
+               i &= ~CBAUDEX;
+               if (i < 1 || i+15 >= n_baud_table) 
+                       tty->termios->c_cflag &= ~CBAUDEX;
+               else
+                       i += 15;
+       }
+       if (i==15 && tty->alt_speed) {
+               if (!tty->warned) {
+                       printk(KERN_WARNING "Use of setserial/setrocket to "
+                                           "set SPD_* flags is deprecated\n");
+                       tty->warned = 1;
+               }
+               return(tty->alt_speed);
+       }
+       
+       return baud_table[i];
+}
+
+void tty_flip_buffer_push(struct tty_struct *tty)
+{
+       if (tty->low_latency)
+               flush_to_ldisc((void *) tty);
+       else
+               queue_task(&tty->flip.tqueue, &tq_timer);
+}
+
+/*
+ * This subroutine initializes a tty structure.
+ */
+static void initialize_tty_struct(struct tty_struct *tty)
+{
+       memset(tty, 0, sizeof(struct tty_struct));
+       tty->magic = TTY_MAGIC;
+       tty->ldisc = ldiscs[N_TTY];
+       tty->pgrp = -1;
+       tty->flip.char_buf_ptr = tty->flip.char_buf;
+       tty->flip.flag_buf_ptr = tty->flip.flag_buf;
+       tty->flip.tqueue.routine = flush_to_ldisc;
+       tty->flip.tqueue.data = tty;
+       init_MUTEX(&tty->flip.pty_sem);
+       init_waitqueue_head(&tty->write_wait);
+       init_waitqueue_head(&tty->read_wait);
+       tty->tq_hangup.routine = do_tty_hangup;
+       tty->tq_hangup.data = tty;
+       sema_init(&tty->atomic_read, 1);
+       sema_init(&tty->atomic_write, 1);
+       spin_lock_init(&tty->read_lock);
+       INIT_LIST_HEAD(&tty->tty_files);
+       INIT_TQUEUE(&tty->SAK_tq, 0, 0);
+}
+
+/*
+ * The default put_char routine if the driver did not define one.
+ */
+void tty_default_put_char(struct tty_struct *tty, unsigned char ch)
+{
+       tty->driver.write(tty, 0, &ch, 1);
+}
+
+/*
+ * Register a tty device described by <driver>, with minor number <minor>.
+ */
+void tty_register_devfs (struct tty_driver *driver, unsigned int flags, unsigned minor)
+{
+#ifdef CONFIG_DEVFS_FS
+       umode_t mode = S_IFCHR | S_IRUSR | S_IWUSR;
+       kdev_t device = MKDEV (driver->major, minor);
+       int idx = minor - driver->minor_start;
+       char buf[32];
+
+       switch (device) {
+               case TTY_DEV:
+               case PTMX_DEV:
+                       mode |= S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH;
+                       break;
+               default:
+                       if (driver->major == PTY_MASTER_MAJOR)
+                               mode |= S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH;
+                       break;
+       }
+       if ( (minor <  driver->minor_start) || 
+            (minor >= driver->minor_start + driver->num) ) {
+               printk(KERN_ERR "Attempt to register invalid minor number "
+                      "with devfs (%d:%d).\n", (int)driver->major,(int)minor);
+               return;
+       }
+#  ifdef CONFIG_UNIX98_PTYS
+       if ( (driver->major >= UNIX98_PTY_SLAVE_MAJOR) &&
+            (driver->major < UNIX98_PTY_SLAVE_MAJOR + UNIX98_NR_MAJORS) )
+               flags |= DEVFS_FL_CURRENT_OWNER;
+#  endif
+       sprintf(buf, driver->name, idx + driver->name_base);
+       devfs_register (NULL, buf, flags | DEVFS_FL_DEFAULT,
+                       driver->major, minor, mode, &tty_fops, NULL);
+#endif /* CONFIG_DEVFS_FS */
+}
+
+void tty_unregister_devfs (struct tty_driver *driver, unsigned minor)
+{
+#ifdef CONFIG_DEVFS_FS
+       void * handle;
+       int idx = minor - driver->minor_start;
+       char buf[32];
+
+       sprintf(buf, driver->name, idx + driver->name_base);
+       handle = devfs_find_handle (NULL, buf, driver->major, minor,
+                                   DEVFS_SPECIAL_CHR, 0);
+       devfs_unregister (handle);
+#endif /* CONFIG_DEVFS_FS */
+}
+
+EXPORT_SYMBOL(tty_register_devfs);
+EXPORT_SYMBOL(tty_unregister_devfs);
+
+/*
+ * Called by a tty driver to register itself.
+ */
+int tty_register_driver(struct tty_driver *driver)
+{
+       int error;
+        int i;
+
+       if (driver->flags & TTY_DRIVER_INSTALLED)
+               return 0;
+
+       error = devfs_register_chrdev(driver->major, driver->name, &tty_fops);
+       if (error < 0)
+               return error;
+       else if(driver->major == 0)
+               driver->major = error;
+
+       if (!driver->put_char)
+               driver->put_char = tty_default_put_char;
+       
+       driver->prev = 0;
+       driver->next = tty_drivers;
+       if (tty_drivers) tty_drivers->prev = driver;
+       tty_drivers = driver;
+       
+       if ( !(driver->flags & TTY_DRIVER_NO_DEVFS) ) {
+               for(i = 0; i < driver->num; i++)
+                   tty_register_devfs(driver, 0, driver->minor_start + i);
+       }
+       proc_tty_register_driver(driver);
+       return error;
+}
+
+/*
+ * Called by a tty driver to unregister itself.
+ */
+int tty_unregister_driver(struct tty_driver *driver)
+{
+       int     retval;
+       struct tty_driver *p;
+       int     i, found = 0;
+       struct termios *tp;
+       const char *othername = NULL;
+       
+       if (*driver->refcount)
+               return -EBUSY;
+
+       for (p = tty_drivers; p; p = p->next) {
+               if (p == driver)
+                       found++;
+               else if (p->major == driver->major)
+                       othername = p->name;
+       }
+       
+       if (!found)
+               return -ENOENT;
+
+       if (othername == NULL) {
+               retval = devfs_unregister_chrdev(driver->major, driver->name);
+               if (retval)
+                       return retval;
+       } else
+               devfs_register_chrdev(driver->major, othername, &tty_fops);
+
+       if (driver->prev)
+               driver->prev->next = driver->next;
+       else
+               tty_drivers = driver->next;
+       
+       if (driver->next)
+               driver->next->prev = driver->prev;
+
+       /*
+        * Free the termios and termios_locked structures because
+        * we don't want to get memory leaks when modular tty
+        * drivers are removed from the kernel.
+        */
+       for (i = 0; i < driver->num; i++) {
+               tp = driver->termios[i];
+               if (tp) {
+                       driver->termios[i] = NULL;
+                       kfree(tp);
+               }
+               tp = driver->termios_locked[i];
+               if (tp) {
+                       driver->termios_locked[i] = NULL;
+                       kfree(tp);
+               }
+               tty_unregister_devfs(driver, driver->minor_start + i);
+       }
+       proc_tty_unregister_driver(driver);
+       return 0;
+}
+
+
+/*
+ * Initialize the console device. This is called *early*, so
+ * we can't necessarily depend on lots of kernel help here.
+ * Just do some early initializations, and do the complex setup
+ * later.
+ */
+void __init console_init(void)
+{
+       /* Setup the default TTY line discipline. */
+       memset(ldiscs, 0, sizeof(ldiscs));
+       (void) tty_register_ldisc(N_TTY, &tty_ldisc_N_TTY);
+
+       /*
+        * Set up the standard termios.  Individual tty drivers may 
+        * deviate from this; this is used as a template.
+        */
+       memset(&tty_std_termios, 0, sizeof(struct termios));
+       memcpy(tty_std_termios.c_cc, INIT_C_CC, NCCS);
+       tty_std_termios.c_iflag = ICRNL | IXON;
+       tty_std_termios.c_oflag = OPOST | ONLCR;
+       tty_std_termios.c_cflag = B38400 | CS8 | CREAD | HUPCL;
+       tty_std_termios.c_lflag = ISIG | ICANON | ECHO | ECHOE | ECHOK |
+               ECHOCTL | ECHOKE | IEXTEN;
+
+       /*
+        * set up the console device so that later boot sequences can 
+        * inform about problems etc..
+        */
+#ifdef CONFIG_XENO
+        xeno_console_init();
+#endif
+#ifdef CONFIG_VT
+       con_init();
+#endif
+#ifdef CONFIG_AU1000_SERIAL_CONSOLE
+       au1000_serial_console_init();
+#endif
+#ifdef CONFIG_SERIAL_CONSOLE
+#if (defined(CONFIG_8xx) || defined(CONFIG_8260))
+       console_8xx_init();
+#elif defined(CONFIG_MAC_SERIAL)
+       mac_scc_console_init();
+#elif defined(CONFIG_PARISC)
+       pdc_console_init();
+#elif defined(CONFIG_SERIAL)
+       serial_console_init();
+#endif /* CONFIG_8xx */
+#ifdef CONFIG_SGI_SERIAL
+       sgi_serial_console_init();
+#endif
+#if defined(CONFIG_MVME162_SCC) || defined(CONFIG_BVME6000_SCC) || defined(CONFIG_MVME147_SCC)
+       vme_scc_console_init();
+#endif
+#if defined(CONFIG_SERIAL167)
+       serial167_console_init();
+#endif
+#if defined(CONFIG_SH_SCI)
+       sci_console_init();
+#endif
+#endif
+#ifdef CONFIG_TN3270_CONSOLE
+       tub3270_con_init();
+#endif
+#ifdef CONFIG_TN3215
+       con3215_init();
+#endif
+#ifdef CONFIG_HWC
+        hwc_console_init();
+#endif
+#ifdef CONFIG_STDIO_CONSOLE
+       stdio_console_init();
+#endif
+#ifdef CONFIG_SERIAL_21285_CONSOLE
+       rs285_console_init();
+#endif
+#ifdef CONFIG_SERIAL_SA1100_CONSOLE
+       sa1100_rs_console_init();
+#endif
+#ifdef CONFIG_ARC_CONSOLE
+       arc_console_init();
+#endif
+#ifdef CONFIG_SERIAL_AMBA_CONSOLE
+       ambauart_console_init();
+#endif
+#ifdef CONFIG_SERIAL_TX3912_CONSOLE
+       tx3912_console_init();
+#endif
+}
+
+static struct tty_driver dev_tty_driver, dev_syscons_driver;
+#ifdef CONFIG_UNIX98_PTYS
+static struct tty_driver dev_ptmx_driver;
+#endif
+#ifdef CONFIG_VT
+static struct tty_driver dev_console_driver;
+#endif
+
+/*
+ * Ok, now we can initialize the rest of the tty devices and can count
+ * on memory allocations, interrupts etc..
+ */
+void __init tty_init(void)
+{
+       /*
+        * dev_tty_driver and dev_console_driver are actually magic
+        * devices which get redirected at open time.  Nevertheless,
+        * we register them so that register_chrdev is called
+        * appropriately.
+        */
+       memset(&dev_tty_driver, 0, sizeof(struct tty_driver));
+       dev_tty_driver.magic = TTY_DRIVER_MAGIC;
+       dev_tty_driver.driver_name = "/dev/tty";
+       dev_tty_driver.name = dev_tty_driver.driver_name + 5;
+       dev_tty_driver.name_base = 0;
+       dev_tty_driver.major = TTYAUX_MAJOR;
+       dev_tty_driver.minor_start = 0;
+       dev_tty_driver.num = 1;
+       dev_tty_driver.type = TTY_DRIVER_TYPE_SYSTEM;
+       dev_tty_driver.subtype = SYSTEM_TYPE_TTY;
+       
+       if (tty_register_driver(&dev_tty_driver))
+               panic("Couldn't register /dev/tty driver\n");
+
+       dev_syscons_driver = dev_tty_driver;
+       dev_syscons_driver.driver_name = "/dev/console";
+       dev_syscons_driver.name = dev_syscons_driver.driver_name + 5;
+       dev_syscons_driver.major = TTYAUX_MAJOR;
+       dev_syscons_driver.minor_start = 1;
+       dev_syscons_driver.type = TTY_DRIVER_TYPE_SYSTEM;
+       dev_syscons_driver.subtype = SYSTEM_TYPE_SYSCONS;
+
+       if (tty_register_driver(&dev_syscons_driver))
+               panic("Couldn't register /dev/console driver\n");
+
+       /* console calls tty_register_driver() before kmalloc() works.
+        * Thus, we can't devfs_register() then.  Do so now, instead. 
+        */
+#ifdef CONFIG_VT
+       con_init_devfs();
+#endif
+
+#ifdef CONFIG_UNIX98_PTYS
+       dev_ptmx_driver = dev_tty_driver;
+       dev_ptmx_driver.driver_name = "/dev/ptmx";
+       dev_ptmx_driver.name = dev_ptmx_driver.driver_name + 5;
+       dev_ptmx_driver.major= MAJOR(PTMX_DEV);
+       dev_ptmx_driver.minor_start = MINOR(PTMX_DEV);
+       dev_ptmx_driver.type = TTY_DRIVER_TYPE_SYSTEM;
+       dev_ptmx_driver.subtype = SYSTEM_TYPE_SYSPTMX;
+
+       if (tty_register_driver(&dev_ptmx_driver))
+               panic("Couldn't register /dev/ptmx driver\n");
+#endif
+       
+#ifdef CONFIG_VT
+       dev_console_driver = dev_tty_driver;
+       dev_console_driver.driver_name = "/dev/vc/0";
+       dev_console_driver.name = dev_console_driver.driver_name + 5;
+       dev_console_driver.major = TTY_MAJOR;
+       dev_console_driver.type = TTY_DRIVER_TYPE_SYSTEM;
+       dev_console_driver.subtype = SYSTEM_TYPE_CONSOLE;
+
+       if (tty_register_driver(&dev_console_driver))
+               panic("Couldn't register /dev/tty0 driver\n");
+
+       kbd_init();
+#endif
+
+#ifdef CONFIG_ESPSERIAL  /* init ESP before rs, so rs doesn't see the port */
+       espserial_init();
+#endif
+#if defined(CONFIG_MVME162_SCC) || defined(CONFIG_BVME6000_SCC) || defined(CONFIG_MVME147_SCC)
+       vme_scc_init();
+#endif
+#ifdef CONFIG_SERIAL_TX3912
+       tx3912_rs_init();
+#endif
+#ifdef CONFIG_ROCKETPORT
+       rp_init();
+#endif
+#ifdef CONFIG_SERIAL167
+       serial167_init();
+#endif
+#ifdef CONFIG_CYCLADES
+       cy_init();
+#endif
+#ifdef CONFIG_STALLION
+       stl_init();
+#endif
+#ifdef CONFIG_ISTALLION
+       stli_init();
+#endif
+#ifdef CONFIG_DIGI
+       pcxe_init();
+#endif
+#ifdef CONFIG_DIGIEPCA
+       pc_init();
+#endif
+#ifdef CONFIG_SPECIALIX
+       specialix_init();
+#endif
+#if (defined(CONFIG_8xx) || defined(CONFIG_8260))
+       rs_8xx_init();
+#endif /* CONFIG_8xx */
+       pty_init();
+#ifdef CONFIG_MOXA_SMARTIO
+       mxser_init();
+#endif 
+#ifdef CONFIG_MOXA_INTELLIO
+       moxa_init();
+#endif 
+#ifdef CONFIG_VT
+       vcs_init();
+#endif
+#ifdef CONFIG_TN3270
+       tub3270_init();
+#endif
+#ifdef CONFIG_TN3215
+       tty3215_init();
+#endif
+#ifdef CONFIG_HWC
+       hwc_tty_init();
+#endif
+#ifdef CONFIG_A2232
+       a2232board_init();
+#endif
+}
diff --git a/xenolinux-2.4.16-sparse/fs/nfs/nfsroot.c b/xenolinux-2.4.16-sparse/fs/nfs/nfsroot.c
new file mode 100644 (file)
index 0000000..a237346
--- /dev/null
@@ -0,0 +1,466 @@
+/*
+ *  $Id: nfsroot.c,v 1.45 1998/03/07 10:44:46 mj Exp $
+ *
+ *  Copyright (C) 1995, 1996  Gero Kuhlmann <gero@gkminix.han.de>
+ *
+ *  Allow an NFS filesystem to be mounted as root. The way this works is:
+ *     (1) Use the IP autoconfig mechanism to set local IP addresses and routes.
+ *     (2) Handle RPC negotiation with the system which replied to RARP or
+ *         was reported as a boot server by BOOTP or manually.
+ *     (3) The actual mounting is done later, when init() is running.
+ *
+ *
+ *     Changes:
+ *
+ *     Alan Cox        :       Removed get_address name clash with FPU.
+ *     Alan Cox        :       Reformatted a bit.
+ *     Gero Kuhlmann   :       Code cleanup
+ *     Michael Rausch  :       Fixed recognition of an incoming RARP answer.
+ *     Martin Mares    : (2.0) Auto-configuration via BOOTP supported.
+ *     Martin Mares    :       Manual selection of interface & BOOTP/RARP.
+ *     Martin Mares    :       Using network routes instead of host routes,
+ *                             allowing the default configuration to be used
+ *                             for normal operation of the host.
+ *     Martin Mares    :       Randomized timer with exponential backoff
+ *                             installed to minimize network congestion.
+ *     Martin Mares    :       Code cleanup.
+ *     Martin Mares    : (2.1) BOOTP and RARP made configuration options.
+ *     Martin Mares    :       Server hostname generation fixed.
+ *     Gerd Knorr      :       Fixed wired inode handling
+ *     Martin Mares    : (2.2) "0.0.0.0" addresses from command line ignored.
+ *     Martin Mares    :       RARP replies not tested for server address.
+ *     Gero Kuhlmann   : (2.3) Some bug fixes and code cleanup again (please
+ *                             send me your new patches _before_ bothering
+ *                             Linus so that I don' always have to cleanup
+ *                             _afterwards_ - thanks)
+ *     Gero Kuhlmann   :       Last changes of Martin Mares undone.
+ *     Gero Kuhlmann   :       RARP replies are tested for specified server
+ *                             again. However, it's now possible to have
+ *                             different RARP and NFS servers.
+ *     Gero Kuhlmann   :       "0.0.0.0" addresses from command line are
+ *                             now mapped to INADDR_NONE.
+ *     Gero Kuhlmann   :       Fixed a bug which prevented BOOTP path name
+ *                             from being used (thanks to Leo Spiekman)
+ *     Andy Walker     :       Allow to specify the NFS server in nfs_root
+ *                             without giving a path name
+ *     Swen Thümmler   :       Allow to specify the NFS options in nfs_root
+ *                             without giving a path name. Fix BOOTP request
+ *                             for domainname (domainname is NIS domain, not
+ *                             DNS domain!). Skip dummy devices for BOOTP.
+ *     Jacek Zapala    :       Fixed a bug which prevented server-ip address
+ *                             from nfsroot parameter from being used.
+ *     Olaf Kirch      :       Adapted to new NFS code.
+ *     Jakub Jelinek   :       Free used code segment.
+ *     Marko Kohtala   :       Fixed some bugs.
+ *     Martin Mares    :       Debug message cleanup
+ *     Martin Mares    :       Changed to use the new generic IP layer autoconfig
+ *                             code. BOOTP and RARP moved there.
+ *     Martin Mares    :       Default path now contains host name instead of
+ *                             host IP address (but host name defaults to IP
+ *                             address anyway).
+ *     Martin Mares    :       Use root_server_addr appropriately during setup.
+ *     Martin Mares    :       Rewrote parameter parsing, now hopefully giving
+ *                             correct overriding.
+ *     Trond Myklebust :       Add in preliminary support for NFSv3 and TCP.
+ *                             Fix bug in root_nfs_addr(). nfs_data.namlen
+ *                             is NOT for the length of the hostname.
+ */
+
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/fs.h>
+#include <linux/init.h>
+#include <linux/sunrpc/clnt.h>
+#include <linux/nfs.h>
+#include <linux/nfs_fs.h>
+#include <linux/nfs_mount.h>
+#include <linux/in.h>
+#include <linux/inet.h>
+#include <linux/major.h>
+#include <linux/utsname.h>
+#include <net/ipconfig.h>
+
+/* Define this to allow debugging output */
+#undef NFSROOT_DEBUG
+#define NFSDBG_FACILITY NFSDBG_ROOT
+
+/* Default path we try to mount. "%s" gets replaced by our IP address */
+#define NFS_ROOT               "/tftpboot/%s"
+
+/* Parameters passed from the kernel command line */
+static char nfs_root_name[256] __initdata = "";
+
+/* Address of NFS server */
+static __u32 servaddr __initdata = 0;
+
+/* Name of directory to mount */
+static char nfs_path[NFS_MAXPATHLEN] __initdata = { 0, };
+
+/* NFS-related data */
+static struct nfs_mount_data nfs_data __initdata = { 0, };/* NFS mount info */
+static int nfs_port __initdata = 0;            /* Port to connect to for NFS */
+static int mount_port __initdata = 0;          /* Mount daemon port number */
+
+
+/***************************************************************************
+
+                            Parsing of options
+
+ ***************************************************************************/
+
+/*
+ *  The following integer options are recognized
+ */
+static struct nfs_int_opts {
+       char *name;
+       int  *val;
+} root_int_opts[] __initdata = {
+       { "port",       &nfs_port },
+       { "rsize",      &nfs_data.rsize },
+       { "wsize",      &nfs_data.wsize },
+       { "timeo",      &nfs_data.timeo },
+       { "retrans",    &nfs_data.retrans },
+       { "acregmin",   &nfs_data.acregmin },
+       { "acregmax",   &nfs_data.acregmax },
+       { "acdirmin",   &nfs_data.acdirmin },
+       { "acdirmax",   &nfs_data.acdirmax },
+       { NULL,         NULL }
+};
+
+
+/*
+ *  And now the flag options
+ */
+static struct nfs_bool_opts {
+       char *name;
+       int  and_mask;
+       int  or_mask;
+} root_bool_opts[] __initdata = {
+       { "soft",       ~NFS_MOUNT_SOFT,        NFS_MOUNT_SOFT },
+       { "hard",       ~NFS_MOUNT_SOFT,        0 },
+       { "intr",       ~NFS_MOUNT_INTR,        NFS_MOUNT_INTR },
+       { "nointr",     ~NFS_MOUNT_INTR,        0 },
+       { "posix",      ~NFS_MOUNT_POSIX,       NFS_MOUNT_POSIX },
+       { "noposix",    ~NFS_MOUNT_POSIX,       0 },
+       { "cto",        ~NFS_MOUNT_NOCTO,       0 },
+       { "nocto",      ~NFS_MOUNT_NOCTO,       NFS_MOUNT_NOCTO },
+       { "ac",         ~NFS_MOUNT_NOAC,        0 },
+       { "noac",       ~NFS_MOUNT_NOAC,        NFS_MOUNT_NOAC },
+       { "lock",       ~NFS_MOUNT_NONLM,       0 },
+       { "nolock",     ~NFS_MOUNT_NONLM,       NFS_MOUNT_NONLM },
+#ifdef CONFIG_NFS_V3
+       { "v2",         ~NFS_MOUNT_VER3,        0 },
+       { "v3",         ~NFS_MOUNT_VER3,        NFS_MOUNT_VER3 },
+#endif
+       { "udp",        ~NFS_MOUNT_TCP,         0 },
+       { "tcp",        ~NFS_MOUNT_TCP,         NFS_MOUNT_TCP },
+       { "broken_suid",~NFS_MOUNT_BROKEN_SUID, NFS_MOUNT_BROKEN_SUID },
+       { NULL,         0,                      0 }
+};
+
+
+/*
+ *  Extract IP address from the parameter string if needed. Note that we
+ *  need to have root_server_addr set _before_ IPConfig gets called as it
+ *  can override it.
+ */
+static void __init root_nfs_parse_addr(char *name)
+{
+       int octets = 0;
+       char *cp, *cq;
+
+       cp = cq = name;
+       while (octets < 4) {
+               while (*cp >= '0' && *cp <= '9')
+                       cp++;
+               if (cp == cq || cp - cq > 3)
+                       break;
+               if (*cp == '.' || octets == 3)
+                       octets++;
+               if (octets < 4)
+                       cp++;
+               cq = cp;
+       }
+       if (octets == 4 && (*cp == ':' || *cp == '\0')) {
+               if (*cp == ':')
+                       *cp++ = '\0';
+               root_server_addr = in_aton(name);
+               strcpy(name, cp);
+       }
+}
+
+
+/*
+ *  Parse option string.
+ */
+static void __init root_nfs_parse(char *name, char *buf)
+{
+       char *options, *val, *cp;
+
+       if ((options = strchr(name, ','))) {
+               *options++ = 0;
+               cp = strtok(options, ",");
+               while (cp) {
+                       if ((val = strchr(cp, '='))) {
+                               struct nfs_int_opts *opts = root_int_opts;
+                               *val++ = '\0';
+                               while (opts->name && strcmp(opts->name, cp))
+                                       opts++;
+                               if (opts->name)
+                                       *(opts->val) = (int) simple_strtoul(val, NULL, 10);
+                       } else {
+                               struct nfs_bool_opts *opts = root_bool_opts;
+                               while (opts->name && strcmp(opts->name, cp))
+                                       opts++;
+                               if (opts->name) {
+                                       nfs_data.flags &= opts->and_mask;
+                                       nfs_data.flags |= opts->or_mask;
+                               }
+                       }
+                       cp = strtok(NULL, ",");
+               }
+       }
+       if (name[0] && strcmp(name, "default")) {
+               strncpy(buf, name, NFS_MAXPATHLEN-1);
+               buf[NFS_MAXPATHLEN-1] = 0;
+       }
+}
+
+
+/*
+ *  Prepare the NFS data structure and parse all options.
+ */
+static int __init root_nfs_name(char *name)
+{
+       char buf[NFS_MAXPATHLEN];
+       char *cp;
+
+       /* Set some default values */
+       memset(&nfs_data, 0, sizeof(nfs_data));
+       nfs_port          = -1;
+       nfs_data.version  = NFS_MOUNT_VERSION;
+       nfs_data.flags    = NFS_MOUNT_NONLM;    /* No lockd in nfs root yet */
+       nfs_data.rsize    = NFS_DEF_FILE_IO_BUFFER_SIZE;
+       nfs_data.wsize    = NFS_DEF_FILE_IO_BUFFER_SIZE;
+       nfs_data.bsize    = 0;
+       nfs_data.timeo    = 7;
+       nfs_data.retrans  = 3;
+       nfs_data.acregmin = 3;
+       nfs_data.acregmax = 60;
+       nfs_data.acdirmin = 30;
+       nfs_data.acdirmax = 60;
+       strcpy(buf, NFS_ROOT);
+
+       /* Process options received from the remote server */
+       root_nfs_parse(root_server_path, buf);
+
+       /* Override them by options set on kernel command-line */
+       root_nfs_parse(name, buf);
+
+       cp = system_utsname.nodename;
+       if (strlen(buf) + strlen(cp) > NFS_MAXPATHLEN) {
+               printk(KERN_ERR "Root-NFS: Pathname for remote directory too long.\n");
+               return -1;
+       }
+       sprintf(nfs_path, buf, cp);
+
+       return 1;
+}
+
+
+/*
+ *  Get NFS server address.
+ */
+static int __init root_nfs_addr(void)
+{
+       if ((servaddr = root_server_addr) == INADDR_NONE) {
+               printk(KERN_ERR "Root-NFS: No NFS server available, giving up.\n");
+               return -1;
+       }
+
+       strncpy(nfs_data.hostname, in_ntoa(servaddr), sizeof(nfs_data.hostname)-1);
+       return 0;
+}
+
+/*
+ *  Tell the user what's going on.
+ */
+#ifdef NFSROOT_DEBUG
+static void __init root_nfs_print(void)
+{
+       printk(KERN_NOTICE "Root-NFS: Mounting %s on server %s as root\n",
+               nfs_path, nfs_data.hostname);
+       printk(KERN_NOTICE "Root-NFS:     rsize = %d, wsize = %d, timeo = %d, retrans = %d\n",
+               nfs_data.rsize, nfs_data.wsize, nfs_data.timeo, nfs_data.retrans);
+       printk(KERN_NOTICE "Root-NFS:     acreg (min,max) = (%d,%d), acdir (min,max) = (%d,%d)\n",
+               nfs_data.acregmin, nfs_data.acregmax,
+               nfs_data.acdirmin, nfs_data.acdirmax);
+       printk(KERN_NOTICE "Root-NFS:     nfsd port = %d, mountd port = %d, flags = %08x\n",
+               nfs_port, mount_port, nfs_data.flags);
+}
+#endif
+
+
+int __init root_nfs_init(void)
+{
+#ifdef NFSROOT_DEBUG
+       nfs_debug |= NFSDBG_ROOT;
+#endif
+
+       /*
+        * Decode the root directory path name and NFS options from
+        * the kernel command line. This has to go here in order to
+        * be able to use the client IP address for the remote root
+        * directory (necessary for pure RARP booting).
+        */
+       if (root_nfs_name(nfs_root_name) < 0 ||
+           root_nfs_addr() < 0)
+               return -1;
+
+#ifdef NFSROOT_DEBUG
+       root_nfs_print();
+#endif
+
+       return 0;
+}
+
+
+/*
+ *  Parse NFS server and directory information passed on the kernel
+ *  command line.
+ */
+int __init nfs_root_setup(char *line)
+{
+       ROOT_DEV = MKDEV(UNNAMED_MAJOR, 255);
+       if (line[0] == '/' || line[0] == ',' || (line[0] >= '0' && line[0] <= '9')) {
+               strncpy(nfs_root_name, line, sizeof(nfs_root_name));
+               nfs_root_name[sizeof(nfs_root_name)-1] = '\0';
+       } else {
+               int n = strlen(line) + strlen(NFS_ROOT);
+               if (n >= sizeof(nfs_root_name))
+                       line[sizeof(nfs_root_name) - strlen(NFS_ROOT) - 1] = '\0';
+               sprintf(nfs_root_name, NFS_ROOT, line);
+       }
+       root_nfs_parse_addr(nfs_root_name);
+       return 1;
+}
+
+__setup("nfsroot=", nfs_root_setup);
+
+/***************************************************************************
+
+              Routines to actually mount the root directory
+
+ ***************************************************************************/
+
+/*
+ *  Construct sockaddr_in from address and port number.
+ */
+static inline void
+set_sockaddr(struct sockaddr_in *sin, __u32 addr, __u16 port)
+{
+       sin->sin_family = AF_INET;
+       sin->sin_addr.s_addr = addr;
+       sin->sin_port = port;
+}
+
+/*
+ *  Query server portmapper for the port of a daemon program.
+ */
+static int __init root_nfs_getport(int program, int version, int proto)
+{
+       struct sockaddr_in sin;
+
+       printk(KERN_NOTICE "Looking up port of RPC %d/%d on %s\n",
+               program, version, in_ntoa(servaddr));
+       set_sockaddr(&sin, servaddr, 0);
+       return rpc_getport_external(&sin, program, version, proto);
+}
+
+
+/*
+ *  Use portmapper to find mountd and nfsd port numbers if not overriden
+ *  by the user. Use defaults if portmapper is not available.
+ *  XXX: Is there any nfs server with no portmapper?
+ */
+static int __init root_nfs_ports(void)
+{
+       int port;
+       int nfsd_ver, mountd_ver;
+       int nfsd_port, mountd_port;
+       int proto;
+
+       if (nfs_data.flags & NFS_MOUNT_VER3) {
+               nfsd_ver = NFS3_VERSION;
+               mountd_ver = NFS_MNT3_VERSION;
+               nfsd_port = NFS_PORT;
+               mountd_port = NFS_MNT_PORT;
+       } else {
+               nfsd_ver = NFS2_VERSION;
+               mountd_ver = NFS_MNT_VERSION;
+               nfsd_port = NFS_PORT;
+               mountd_port = NFS_MNT_PORT;
+       }
+
+       proto = (nfs_data.flags & NFS_MOUNT_TCP) ? IPPROTO_TCP : IPPROTO_UDP;
+
+       if (nfs_port < 0) {
+               if ((port = root_nfs_getport(NFS_PROGRAM, nfsd_ver, proto)) < 0) {
+                       printk(KERN_ERR "Root-NFS: Unable to get nfsd port "
+                                       "number from server, using default\n");
+                       port = nfsd_port;
+               }
+               nfs_port = htons(port);
+               dprintk("Root-NFS: Portmapper on server returned %d "
+                       "as nfsd port\n", port);
+       }
+
+       if ((port = root_nfs_getport(NFS_MNT_PROGRAM, mountd_ver, proto)) < 0) {
+               printk(KERN_ERR "Root-NFS: Unable to get mountd port "
+                               "number from server, using default\n");
+               port = mountd_port;
+       }
+       mount_port = htons(port);
+       dprintk("Root-NFS: mountd port is %d\n", port);
+
+       return 0;
+}
+
+
+/*
+ *  Get a file handle from the server for the directory which is to be
+ *  mounted.
+ */
+static int __init root_nfs_get_handle(void)
+{
+       struct sockaddr_in sin;
+       int status;
+
+       set_sockaddr(&sin, servaddr, mount_port);
+       if (nfs_data.flags & NFS_MOUNT_VER3)
+               status = nfs3_mount(&sin, nfs_path, &nfs_data.root);
+       else
+               status = nfs_mount(&sin, nfs_path, &nfs_data.root);
+       if (status < 0)
+               printk(KERN_ERR "Root-NFS: Server returned error %d "
+                               "while mounting %s\n", status, nfs_path);
+
+       return status;
+}
+
+/*
+ *  Get the NFS port numbers and file handle, and return the prepared 'data'
+ *  argument for ->read_super() if everything went OK. Return NULL otherwise.
+ */
+void * __init nfs_root_data(void)
+{
+       if (root_nfs_init() < 0
+        || root_nfs_ports() < 0
+        || root_nfs_get_handle() < 0)
+               return NULL;
+       set_sockaddr((struct sockaddr_in *) &nfs_data.addr, servaddr, nfs_port);
+       return (void*)&nfs_data;
+}
diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/a.out.h b/xenolinux-2.4.16-sparse/include/asm-xeno/a.out.h
new file mode 100644 (file)
index 0000000..ab17bb8
--- /dev/null
@@ -0,0 +1,26 @@
+#ifndef __I386_A_OUT_H__
+#define __I386_A_OUT_H__
+
+struct exec
+{
+  unsigned long a_info;                /* Use macros N_MAGIC, etc for access */
+  unsigned a_text;             /* length of text, in bytes */
+  unsigned a_data;             /* length of data, in bytes */
+  unsigned a_bss;              /* length of uninitialized data area for file, in bytes */
+  unsigned a_syms;             /* length of symbol table data in file, in bytes */
+  unsigned a_entry;            /* start address */
+  unsigned a_trsize;           /* length of relocation info for text, in bytes */
+  unsigned a_drsize;           /* length of relocation info for data, in bytes */
+};
+
+#define N_TRSIZE(a)    ((a).a_trsize)
+#define N_DRSIZE(a)    ((a).a_drsize)
+#define N_SYMSIZE(a)   ((a).a_syms)
+
+#ifdef __KERNEL__
+
+#define STACK_TOP      TASK_SIZE
+
+#endif
+
+#endif /* __A_OUT_GNU_H__ */
diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/apic.h b/xenolinux-2.4.16-sparse/include/asm-xeno/apic.h
new file mode 100644 (file)
index 0000000..5f682e9
--- /dev/null
@@ -0,0 +1,97 @@
+#ifndef __ASM_APIC_H
+#define __ASM_APIC_H
+
+#include <linux/config.h>
+#include <linux/pm.h>
+#include <asm/apicdef.h>
+#include <asm/system.h>
+
+#ifdef CONFIG_X86_LOCAL_APIC
+
+#define APIC_DEBUG 0
+
+#if APIC_DEBUG
+#define Dprintk(x...) printk(x)
+#else
+#define Dprintk(x...)
+#endif
+
+/*
+ * Basic functions accessing APICs.
+ */
+
+static __inline void apic_write(unsigned long reg, unsigned long v)
+{
+       *((volatile unsigned long *)(APIC_BASE+reg)) = v;
+}
+
+static __inline void apic_write_atomic(unsigned long reg, unsigned long v)
+{
+       xchg((volatile unsigned long *)(APIC_BASE+reg), v);
+}
+
+static __inline unsigned long apic_read(unsigned long reg)
+{
+       return *((volatile unsigned long *)(APIC_BASE+reg));
+}
+
+static __inline__ void apic_wait_icr_idle(void)
+{
+       do { } while ( apic_read( APIC_ICR ) & APIC_ICR_BUSY );
+}
+
+#ifdef CONFIG_X86_GOOD_APIC
+# define FORCE_READ_AROUND_WRITE 0
+# define apic_read_around(x)
+# define apic_write_around(x,y) apic_write((x),(y))
+#else
+# define FORCE_READ_AROUND_WRITE 1
+# define apic_read_around(x) apic_read(x)
+# define apic_write_around(x,y) apic_write_atomic((x),(y))
+#endif
+
+static inline void ack_APIC_irq(void)
+{
+       /*
+        * ack_APIC_irq() actually gets compiled as a single instruction:
+        * - a single rmw on Pentium/82489DX
+        * - a single write on P6+ cores (CONFIG_X86_GOOD_APIC)
+        * ... yummie.
+        */
+
+       /* Docs say use 0 for future compatibility */
+       apic_write_around(APIC_EOI, 0);
+}
+
+extern int get_maxlvt(void);
+extern void clear_local_APIC(void);
+extern void connect_bsp_APIC (void);
+extern void disconnect_bsp_APIC (void);
+extern void disable_local_APIC (void);
+extern int verify_local_APIC (void);
+extern void cache_APIC_registers (void);
+extern void sync_Arb_IDs (void);
+extern void init_bsp_APIC (void);
+extern void setup_local_APIC (void);
+extern void init_apic_mappings (void);
+extern void smp_local_timer_interrupt (struct pt_regs * regs);
+extern void setup_APIC_clocks (void);
+extern void setup_apic_nmi_watchdog (void);
+extern inline void nmi_watchdog_tick (struct pt_regs * regs);
+extern int APIC_init_uniprocessor (void);
+
+extern struct pm_dev *apic_pm_register(pm_dev_t, unsigned long, pm_callback);
+extern void apic_pm_unregister(struct pm_dev*);
+
+extern unsigned int apic_timer_irqs [NR_CPUS];
+extern int check_nmi_watchdog (void);
+
+extern unsigned int nmi_watchdog;
+#define NMI_NONE       0
+#define NMI_IO_APIC    1
+#define NMI_LOCAL_APIC 2
+#define NMI_INVALID    3
+
+#endif /* CONFIG_X86_LOCAL_APIC */
+
+#endif /* __ASM_APIC_H */
diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/apicdef.h b/xenolinux-2.4.16-sparse/include/asm-xeno/apicdef.h
new file mode 100644 (file)
index 0000000..f855a7d
--- /dev/null
@@ -0,0 +1,363 @@
+#ifndef __ASM_APICDEF_H
+#define __ASM_APICDEF_H
+
+/*
+ * Constants for various Intel APICs. (local APIC, IOAPIC, etc.)
+ *
+ * Alan Cox <Alan.Cox@linux.org>, 1995.
+ * Ingo Molnar <mingo@redhat.com>, 1999, 2000
+ */
+
+#define                APIC_DEFAULT_PHYS_BASE  0xfee00000
+#define                APIC_ID         0x20
+#define                        APIC_ID_MASK            (0x0F<<24)
+#define                        GET_APIC_ID(x)          (((x)>>24)&0x0F)
+#define                APIC_LVR        0x30
+#define                        APIC_LVR_MASK           0xFF00FF
+#define                        GET_APIC_VERSION(x)     ((x)&0xFF)
+#define                        GET_APIC_MAXLVT(x)      (((x)>>16)&0xFF)
+#define                        APIC_INTEGRATED(x)      ((x)&0xF0)
+#define                APIC_TASKPRI    0x80
+#define                        APIC_TPRI_MASK          0xFF
+#define                APIC_ARBPRI     0x90
+#define                        APIC_ARBPRI_MASK        0xFF
+#define                APIC_PROCPRI    0xA0
+#define                APIC_EOI        0xB0
+#define                        APIC_EIO_ACK            0x0             /* Write this to the EOI register */
+#define                APIC_RRR        0xC0
+#define                APIC_LDR        0xD0
+#define                        APIC_LDR_MASK           (0xFF<<24)
+#define                        GET_APIC_LOGICAL_ID(x)  (((x)>>24)&0xFF)
+#define                        SET_APIC_LOGICAL_ID(x)  (((x)<<24))
+#define                        APIC_ALL_CPUS           0xFF
+#define                APIC_DFR        0xE0
+#define                APIC_SPIV       0xF0
+#define                        APIC_SPIV_FOCUS_DISABLED        (1<<9)
+#define                        APIC_SPIV_APIC_ENABLED          (1<<8)
+#define                APIC_ISR        0x100
+#define                APIC_TMR        0x180
+#define        APIC_IRR        0x200
+#define        APIC_ESR        0x280
+#define                        APIC_ESR_SEND_CS        0x00001
+#define                        APIC_ESR_RECV_CS        0x00002
+#define                        APIC_ESR_SEND_ACC       0x00004
+#define                        APIC_ESR_RECV_ACC       0x00008
+#define                        APIC_ESR_SENDILL        0x00020
+#define                        APIC_ESR_RECVILL        0x00040
+#define                        APIC_ESR_ILLREGA        0x00080
+#define                APIC_ICR        0x300
+#define                        APIC_DEST_SELF          0x40000
+#define                        APIC_DEST_ALLINC        0x80000
+#define                        APIC_DEST_ALLBUT        0xC0000
+#define                        APIC_ICR_RR_MASK        0x30000
+#define                        APIC_ICR_RR_INVALID     0x00000
+#define                        APIC_ICR_RR_INPROG      0x10000
+#define                        APIC_ICR_RR_VALID       0x20000
+#define                        APIC_INT_LEVELTRIG      0x08000
+#define                        APIC_INT_ASSERT         0x04000
+#define                        APIC_ICR_BUSY           0x01000
+#define                        APIC_DEST_LOGICAL       0x00800
+#define                        APIC_DM_FIXED           0x00000
+#define                        APIC_DM_LOWEST          0x00100
+#define                        APIC_DM_SMI             0x00200
+#define                        APIC_DM_REMRD           0x00300
+#define                        APIC_DM_NMI             0x00400
+#define                        APIC_DM_INIT            0x00500
+#define                        APIC_DM_STARTUP         0x00600
+#define                        APIC_DM_EXTINT          0x00700
+#define                        APIC_VECTOR_MASK        0x000FF
+#define                APIC_ICR2       0x310
+#define                        GET_APIC_DEST_FIELD(x)  (((x)>>24)&0xFF)
+#define                        SET_APIC_DEST_FIELD(x)  ((x)<<24)
+#define                APIC_LVTT       0x320
+#define                APIC_LVTPC      0x340
+#define                APIC_LVT0       0x350
+#define                        APIC_LVT_TIMER_BASE_MASK        (0x3<<18)
+#define                        GET_APIC_TIMER_BASE(x)          (((x)>>18)&0x3)
+#define                        SET_APIC_TIMER_BASE(x)          (((x)<<18))
+#define                        APIC_TIMER_BASE_CLKIN           0x0
+#define                        APIC_TIMER_BASE_TMBASE          0x1
+#define                        APIC_TIMER_BASE_DIV             0x2
+#define                        APIC_LVT_TIMER_PERIODIC         (1<<17)
+#define                        APIC_LVT_MASKED                 (1<<16)
+#define                        APIC_LVT_LEVEL_TRIGGER          (1<<15)
+#define                        APIC_LVT_REMOTE_IRR             (1<<14)
+#define                        APIC_INPUT_POLARITY             (1<<13)
+#define                        APIC_SEND_PENDING               (1<<12)
+#define                        GET_APIC_DELIVERY_MODE(x)       (((x)>>8)&0x7)
+#define                        SET_APIC_DELIVERY_MODE(x,y)     (((x)&~0x700)|((y)<<8))
+#define                                APIC_MODE_FIXED         0x0
+#define                                APIC_MODE_NMI           0x4
+#define                                APIC_MODE_EXINT         0x7
+#define        APIC_LVT1       0x360
+#define                APIC_LVTERR     0x370
+#define                APIC_TMICT      0x380
+#define                APIC_TMCCT      0x390
+#define                APIC_TDCR       0x3E0
+#define                        APIC_TDR_DIV_TMBASE     (1<<2)
+#define                        APIC_TDR_DIV_1          0xB
+#define                        APIC_TDR_DIV_2          0x0
+#define                        APIC_TDR_DIV_4          0x1
+#define                        APIC_TDR_DIV_8          0x2
+#define                        APIC_TDR_DIV_16         0x3
+#define                        APIC_TDR_DIV_32         0x8
+#define                        APIC_TDR_DIV_64         0x9
+#define                        APIC_TDR_DIV_128        0xA
+
+#define APIC_BASE (fix_to_virt(FIX_APIC_BASE))
+
+#define MAX_IO_APICS 8
+
+/*
+ * the local APIC register structure, memory mapped. Not terribly well
+ * tested, but we might eventually use this one in the future - the
+ * problem why we cannot use it right now is the P5 APIC, it has an
+ * errata which cannot take 8-bit reads and writes, only 32-bit ones ...
+ */
+#define u32 unsigned int
+
+#define lapic ((volatile struct local_apic *)APIC_BASE)
+
+struct local_apic {
+
+/*000*/        struct { u32 __reserved[4]; } __reserved_01;
+
+/*010*/        struct { u32 __reserved[4]; } __reserved_02;
+
+/*020*/        struct { /* APIC ID Register */
+               u32   __reserved_1      : 24,
+                       phys_apic_id    :  4,
+                       __reserved_2    :  4;
+               u32 __reserved[3];
+       } id;
+
+/*030*/        const
+       struct { /* APIC Version Register */
+               u32   version           :  8,
+                       __reserved_1    :  8,
+                       max_lvt         :  8,
+                       __reserved_2    :  8;
+               u32 __reserved[3];
+       } version;
+
+/*040*/        struct { u32 __reserved[4]; } __reserved_03;
+
+/*050*/        struct { u32 __reserved[4]; } __reserved_04;
+
+/*060*/        struct { u32 __reserved[4]; } __reserved_05;
+
+/*070*/        struct { u32 __reserved[4]; } __reserved_06;
+
+/*080*/        struct { /* Task Priority Register */
+               u32   priority  :  8,
+                       __reserved_1    : 24;
+               u32 __reserved_2[3];
+       } tpr;
+
+/*090*/        const
+       struct { /* Arbitration Priority Register */
+               u32   priority  :  8,
+                       __reserved_1    : 24;
+               u32 __reserved_2[3];
+       } apr;
+
+/*0A0*/        const
+       struct { /* Processor Priority Register */
+               u32   priority  :  8,
+                       __reserved_1    : 24;
+               u32 __reserved_2[3];
+       } ppr;
+
+/*0B0*/        struct { /* End Of Interrupt Register */
+               u32   eoi;
+               u32 __reserved[3];
+       } eoi;
+
+/*0C0*/        struct { u32 __reserved[4]; } __reserved_07;
+
+/*0D0*/        struct { /* Logical Destination Register */
+               u32   __reserved_1      : 24,
+                       logical_dest    :  8;
+               u32 __reserved_2[3];
+       } ldr;
+
+/*0E0*/        struct { /* Destination Format Register */
+               u32   __reserved_1      : 28,
+                       model           :  4;
+               u32 __reserved_2[3];
+       } dfr;
+
+/*0F0*/        struct { /* Spurious Interrupt Vector Register */
+               u32     spurious_vector :  8,
+                       apic_enabled    :  1,
+                       focus_cpu       :  1,
+                       __reserved_2    : 22;
+               u32 __reserved_3[3];
+       } svr;
+
+/*100*/        struct { /* In Service Register */
+/*170*/                u32 bitfield;
+               u32 __reserved[3];
+       } isr [8];
+
+/*180*/        struct { /* Trigger Mode Register */
+/*1F0*/                u32 bitfield;
+               u32 __reserved[3];
+       } tmr [8];
+
+/*200*/        struct { /* Interrupt Request Register */
+/*270*/                u32 bitfield;
+               u32 __reserved[3];
+       } irr [8];
+
+/*280*/        union { /* Error Status Register */
+               struct {
+                       u32   send_cs_error                     :  1,
+                               receive_cs_error                :  1,
+                               send_accept_error               :  1,
+                               receive_accept_error            :  1,
+                               __reserved_1                    :  1,
+                               send_illegal_vector             :  1,
+                               receive_illegal_vector          :  1,
+                               illegal_register_address        :  1,
+                               __reserved_2                    : 24;
+                       u32 __reserved_3[3];
+               } error_bits;
+               struct {
+                       u32 errors;
+                       u32 __reserved_3[3];
+               } all_errors;
+       } esr;
+
+/*290*/        struct { u32 __reserved[4]; } __reserved_08;
+
+/*2A0*/        struct { u32 __reserved[4]; } __reserved_09;
+
+/*2B0*/        struct { u32 __reserved[4]; } __reserved_10;
+
+/*2C0*/        struct { u32 __reserved[4]; } __reserved_11;
+
+/*2D0*/        struct { u32 __reserved[4]; } __reserved_12;
+
+/*2E0*/        struct { u32 __reserved[4]; } __reserved_13;
+
+/*2F0*/        struct { u32 __reserved[4]; } __reserved_14;
+
+/*300*/        struct { /* Interrupt Command Register 1 */
+               u32   vector                    :  8,
+                       delivery_mode           :  3,
+                       destination_mode        :  1,
+                       delivery_status         :  1,
+                       __reserved_1            :  1,
+                       level                   :  1,
+                       trigger                 :  1,
+                       __reserved_2            :  2,
+                       shorthand               :  2,
+                       __reserved_3            :  12;
+               u32 __reserved_4[3];
+       } icr1;
+
+/*310*/        struct { /* Interrupt Command Register 2 */
+               union {
+                       u32   __reserved_1      : 24,
+                               phys_dest       :  4,
+                               __reserved_2    :  4;
+                       u32   __reserved_3      : 24,
+                               logical_dest    :  8;
+               } dest;
+               u32 __reserved_4[3];
+       } icr2;
+
+/*320*/        struct { /* LVT - Timer */
+               u32   vector            :  8,
+                       __reserved_1    :  4,
+                       delivery_status :  1,
+                       __reserved_2    :  3,
+                       mask            :  1,
+                       timer_mode      :  1,
+                       __reserved_3    : 14;
+               u32 __reserved_4[3];
+       } lvt_timer;
+
+/*330*/        struct { u32 __reserved[4]; } __reserved_15;
+
+/*340*/        struct { /* LVT - Performance Counter */
+               u32   vector            :  8,
+                       delivery_mode   :  3,
+                       __reserved_1    :  1,
+                       delivery_status :  1,
+                       __reserved_2    :  3,
+                       mask            :  1,
+                       __reserved_3    : 15;
+               u32 __reserved_4[3];
+       } lvt_pc;
+
+/*350*/        struct { /* LVT - LINT0 */
+               u32   vector            :  8,
+                       delivery_mode   :  3,
+                       __reserved_1    :  1,
+                       delivery_status :  1,
+                       polarity        :  1,
+                       remote_irr      :  1,
+                       trigger         :  1,
+                       mask            :  1,
+                       __reserved_2    : 15;
+               u32 __reserved_3[3];
+       } lvt_lint0;
+
+/*360*/        struct { /* LVT - LINT1 */
+               u32   vector            :  8,
+                       delivery_mode   :  3,
+                       __reserved_1    :  1,
+                       delivery_status :  1,
+                       polarity        :  1,
+                       remote_irr      :  1,
+                       trigger         :  1,
+                       mask            :  1,
+                       __reserved_2    : 15;
+               u32 __reserved_3[3];
+       } lvt_lint1;
+
+/*370*/        struct { /* LVT - Error */
+               u32   vector            :  8,
+                       __reserved_1    :  4,
+                       delivery_status :  1,
+                       __reserved_2    :  3,
+                       mask            :  1,
+                       __reserved_3    : 15;
+               u32 __reserved_4[3];
+       } lvt_error;
+
+/*380*/        struct { /* Timer Initial Count Register */
+               u32   initial_count;
+               u32 __reserved_2[3];
+       } timer_icr;
+
+/*390*/        const
+       struct { /* Timer Current Count Register */
+               u32   curr_count;
+               u32 __reserved_2[3];
+       } timer_ccr;
+
+/*3A0*/        struct { u32 __reserved[4]; } __reserved_16;
+
+/*3B0*/        struct { u32 __reserved[4]; } __reserved_17;
+
+/*3C0*/        struct { u32 __reserved[4]; } __reserved_18;
+
+/*3D0*/        struct { u32 __reserved[4]; } __reserved_19;
+
+/*3E0*/        struct { /* Timer Divide Configuration Register */
+               u32   divisor           :  4,
+                       __reserved_1    : 28;
+               u32 __reserved_2[3];
+       } timer_dcr;
+
+/*3F0*/        struct { u32 __reserved[4]; } __reserved_20;
+
+} __attribute__ ((packed));
+
+#undef u32
+
+#endif
diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/atomic.h b/xenolinux-2.4.16-sparse/include/asm-xeno/atomic.h
new file mode 100644 (file)
index 0000000..b356b37
--- /dev/null
@@ -0,0 +1,204 @@
+#ifndef __ARCH_I386_ATOMIC__
+#define __ARCH_I386_ATOMIC__
+
+#include <linux/config.h>
+
+/*
+ * Atomic operations that C can't guarantee us.  Useful for
+ * resource counting etc..
+ */
+
+#ifdef CONFIG_SMP
+#define LOCK "lock ; "
+#else
+#define LOCK ""
+#endif
+
+/*
+ * Make sure gcc doesn't try to be clever and move things around
+ * on us. We need to use _exactly_ the address the user gave us,
+ * not some alias that contains the same information.
+ */
+typedef struct { volatile int counter; } atomic_t;
+
+#define ATOMIC_INIT(i) { (i) }
+
+/**
+ * atomic_read - read atomic variable
+ * @v: pointer of type atomic_t
+ * 
+ * Atomically reads the value of @v.  Note that the guaranteed
+ * useful range of an atomic_t is only 24 bits.
+ */ 
+#define atomic_read(v)         ((v)->counter)
+
+/**
+ * atomic_set - set atomic variable
+ * @v: pointer of type atomic_t
+ * @i: required value
+ * 
+ * Atomically sets the value of @v to @i.  Note that the guaranteed
+ * useful range of an atomic_t is only 24 bits.
+ */ 
+#define atomic_set(v,i)                (((v)->counter) = (i))
+
+/**
+ * atomic_add - add integer to atomic variable
+ * @i: integer value to add
+ * @v: pointer of type atomic_t
+ * 
+ * Atomically adds @i to @v.  Note that the guaranteed useful range
+ * of an atomic_t is only 24 bits.
+ */
+static __inline__ void atomic_add(int i, atomic_t *v)
+{
+       __asm__ __volatile__(
+               LOCK "addl %1,%0"
+               :"=m" (v->counter)
+               :"ir" (i), "m" (v->counter));
+}
+
+/**
+ * atomic_sub - subtract the atomic variable
+ * @i: integer value to subtract
+ * @v: pointer of type atomic_t
+ * 
+ * Atomically subtracts @i from @v.  Note that the guaranteed
+ * useful range of an atomic_t is only 24 bits.
+ */
+static __inline__ void atomic_sub(int i, atomic_t *v)
+{
+       __asm__ __volatile__(
+               LOCK "subl %1,%0"
+               :"=m" (v->counter)
+               :"ir" (i), "m" (v->counter));
+}
+
+/**
+ * atomic_sub_and_test - subtract value from variable and test result
+ * @i: integer value to subtract
+ * @v: pointer of type atomic_t
+ * 
+ * Atomically subtracts @i from @v and returns
+ * true if the result is zero, or false for all
+ * other cases.  Note that the guaranteed
+ * useful range of an atomic_t is only 24 bits.
+ */
+static __inline__ int atomic_sub_and_test(int i, atomic_t *v)
+{
+       unsigned char c;
+
+       __asm__ __volatile__(
+               LOCK "subl %2,%0; sete %1"
+               :"=m" (v->counter), "=qm" (c)
+               :"ir" (i), "m" (v->counter) : "memory");
+       return c;
+}
+
+/**
+ * atomic_inc - increment atomic variable
+ * @v: pointer of type atomic_t
+ * 
+ * Atomically increments @v by 1.  Note that the guaranteed
+ * useful range of an atomic_t is only 24 bits.
+ */ 
+static __inline__ void atomic_inc(atomic_t *v)
+{
+       __asm__ __volatile__(
+               LOCK "incl %0"
+               :"=m" (v->counter)
+               :"m" (v->counter));
+}
+
+/**
+ * atomic_dec - decrement atomic variable
+ * @v: pointer of type atomic_t
+ * 
+ * Atomically decrements @v by 1.  Note that the guaranteed
+ * useful range of an atomic_t is only 24 bits.
+ */ 
+static __inline__ void atomic_dec(atomic_t *v)
+{
+       __asm__ __volatile__(
+               LOCK "decl %0"
+               :"=m" (v->counter)
+               :"m" (v->counter));
+}
+
+/**
+ * atomic_dec_and_test - decrement and test
+ * @v: pointer of type atomic_t
+ * 
+ * Atomically decrements @v by 1 and
+ * returns true if the result is 0, or false for all other
+ * cases.  Note that the guaranteed
+ * useful range of an atomic_t is only 24 bits.
+ */ 
+static __inline__ int atomic_dec_and_test(atomic_t *v)
+{
+       unsigned char c;
+
+       __asm__ __volatile__(
+               LOCK "decl %0; sete %1"
+               :"=m" (v->counter), "=qm" (c)
+               :"m" (v->counter) : "memory");
+       return c != 0;
+}
+
+/**
+ * atomic_inc_and_test - increment and test 
+ * @v: pointer of type atomic_t
+ * 
+ * Atomically increments @v by 1
+ * and returns true if the result is zero, or false for all
+ * other cases.  Note that the guaranteed
+ * useful range of an atomic_t is only 24 bits.
+ */ 
+static __inline__ int atomic_inc_and_test(atomic_t *v)
+{
+       unsigned char c;
+
+       __asm__ __volatile__(
+               LOCK "incl %0; sete %1"
+               :"=m" (v->counter), "=qm" (c)
+               :"m" (v->counter) : "memory");
+       return c != 0;
+}
+
+/**
+ * atomic_add_negative - add and test if negative
+ * @v: pointer of type atomic_t
+ * @i: integer value to add
+ * 
+ * Atomically adds @i to @v and returns true
+ * if the result is negative, or false when
+ * result is greater than or equal to zero.  Note that the guaranteed
+ * useful range of an atomic_t is only 24 bits.
+ */ 
+static __inline__ int atomic_add_negative(int i, atomic_t *v)
+{
+       unsigned char c;
+
+       __asm__ __volatile__(
+               LOCK "addl %2,%0; sets %1"
+               :"=m" (v->counter), "=qm" (c)
+               :"ir" (i), "m" (v->counter) : "memory");
+       return c;
+}
+
+/* These are x86-specific, used by some header files */
+#define atomic_clear_mask(mask, addr) \
+__asm__ __volatile__(LOCK "andl %0,%1" \
+: : "r" (~(mask)),"m" (*addr) : "memory")
+
+#define atomic_set_mask(mask, addr) \
+__asm__ __volatile__(LOCK "orl %0,%1" \
+: : "r" (mask),"m" (*addr) : "memory")
+
+/* Atomic operations are already serializing on x86 */
+#define smp_mb__before_atomic_dec()    barrier()
+#define smp_mb__after_atomic_dec()     barrier()
+#define smp_mb__before_atomic_inc()    barrier()
+#define smp_mb__after_atomic_inc()     barrier()
+
+#endif
diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/bitops.h b/xenolinux-2.4.16-sparse/include/asm-xeno/bitops.h
new file mode 100644 (file)
index 0000000..a3063ca
--- /dev/null
@@ -0,0 +1,384 @@
+#ifndef _I386_BITOPS_H
+#define _I386_BITOPS_H
+
+/*
+ * Copyright 1992, Linus Torvalds.
+ */
+
+#include <linux/config.h>
+
+/*
+ * These have to be done with inline assembly: that way the bit-setting
+ * is guaranteed to be atomic. All bit operations return 0 if the bit
+ * was cleared before the operation and != 0 if it was not.
+ *
+ * bit 0 is the LSB of addr; bit 32 is the LSB of (addr+1).
+ */
+
+#ifdef CONFIG_SMP
+#define LOCK_PREFIX "lock ; "
+#else
+#define LOCK_PREFIX ""
+#endif
+
+#define ADDR (*(volatile long *) addr)
+
+/**
+ * set_bit - Atomically set a bit in memory
+ * @nr: the bit to set
+ * @addr: the address to start counting from
+ *
+ * This function is atomic and may not be reordered.  See __set_bit()
+ * if you do not require the atomic guarantees.
+ * Note that @nr may be almost arbitrarily large; this function is not
+ * restricted to acting on a single-word quantity.
+ */
+static __inline__ void set_bit(int nr, volatile void * addr)
+{
+       __asm__ __volatile__( LOCK_PREFIX
+               "btsl %1,%0"
+               :"=m" (ADDR)
+               :"Ir" (nr));
+}
+
+/**
+ * __set_bit - Set a bit in memory
+ * @nr: the bit to set
+ * @addr: the address to start counting from
+ *
+ * Unlike set_bit(), this function is non-atomic and may be reordered.
+ * If it's called on the same region of memory simultaneously, the effect
+ * may be that only one operation succeeds.
+ */
+static __inline__ void __set_bit(int nr, volatile void * addr)
+{
+       __asm__(
+               "btsl %1,%0"
+               :"=m" (ADDR)
+               :"Ir" (nr));
+}
+
+/**
+ * clear_bit - Clears a bit in memory
+ * @nr: Bit to clear
+ * @addr: Address to start counting from
+ *
+ * clear_bit() is atomic and may not be reordered.  However, it does
+ * not contain a memory barrier, so if it is used for locking purposes,
+ * you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit()
+ * in order to ensure changes are visible on other processors.
+ */
+static __inline__ void clear_bit(int nr, volatile void * addr)
+{
+       __asm__ __volatile__( LOCK_PREFIX
+               "btrl %1,%0"
+               :"=m" (ADDR)
+               :"Ir" (nr));
+}
+#define smp_mb__before_clear_bit()     barrier()
+#define smp_mb__after_clear_bit()      barrier()
+
+/**
+ * __change_bit - Toggle a bit in memory
+ * @nr: the bit to set
+ * @addr: the address to start counting from
+ *
+ * Unlike change_bit(), this function is non-atomic and may be reordered.
+ * If it's called on the same region of memory simultaneously, the effect
+ * may be that only one operation succeeds.
+ */
+static __inline__ void __change_bit(int nr, volatile void * addr)
+{
+       __asm__ __volatile__(
+               "btcl %1,%0"
+               :"=m" (ADDR)
+               :"Ir" (nr));
+}
+
+/**
+ * change_bit - Toggle a bit in memory
+ * @nr: Bit to clear
+ * @addr: Address to start counting from
+ *
+ * change_bit() is atomic and may not be reordered.
+ * Note that @nr may be almost arbitrarily large; this function is not
+ * restricted to acting on a single-word quantity.
+ */
+static __inline__ void change_bit(int nr, volatile void * addr)
+{
+       __asm__ __volatile__( LOCK_PREFIX
+               "btcl %1,%0"
+               :"=m" (ADDR)
+               :"Ir" (nr));
+}
+
+/**
+ * test_and_set_bit - Set a bit and return its old value
+ * @nr: Bit to set
+ * @addr: Address to count from
+ *
+ * This operation is atomic and cannot be reordered.  
+ * It also implies a memory barrier.
+ */
+static __inline__ int test_and_set_bit(int nr, volatile void * addr)
+{
+       int oldbit;
+
+       __asm__ __volatile__( LOCK_PREFIX
+               "btsl %2,%1\n\tsbbl %0,%0"
+               :"=r" (oldbit),"=m" (ADDR)
+               :"Ir" (nr) : "memory");
+       return oldbit;
+}
+
+/**
+ * __test_and_set_bit - Set a bit and return its old value
+ * @nr: Bit to set
+ * @addr: Address to count from
+ *
+ * This operation is non-atomic and can be reordered.  
+ * If two examples of this operation race, one can appear to succeed
+ * but actually fail.  You must protect multiple accesses with a lock.
+ */
+static __inline__ int __test_and_set_bit(int nr, volatile void * addr)
+{
+       int oldbit;
+
+       __asm__(
+               "btsl %2,%1\n\tsbbl %0,%0"
+               :"=r" (oldbit),"=m" (ADDR)
+               :"Ir" (nr));
+       return oldbit;
+}
+
+/**
+ * test_and_clear_bit - Clear a bit and return its old value
+ * @nr: Bit to set
+ * @addr: Address to count from
+ *
+ * This operation is atomic and cannot be reordered.  
+ * It also implies a memory barrier.
+ */
+static __inline__ int test_and_clear_bit(int nr, volatile void * addr)
+{
+       int oldbit;
+
+       __asm__ __volatile__( LOCK_PREFIX
+               "btrl %2,%1\n\tsbbl %0,%0"
+               :"=r" (oldbit),"=m" (ADDR)
+               :"Ir" (nr) : "memory");
+       return oldbit;
+}
+
+/**
+ * __test_and_clear_bit - Clear a bit and return its old value
+ * @nr: Bit to set
+ * @addr: Address to count from
+ *
+ * This operation is non-atomic and can be reordered.  
+ * If two examples of this operation race, one can appear to succeed
+ * but actually fail.  You must protect multiple accesses with a lock.
+ */
+static __inline__ int __test_and_clear_bit(int nr, volatile void * addr)
+{
+       int oldbit;
+
+       __asm__(
+               "btrl %2,%1\n\tsbbl %0,%0"
+               :"=r" (oldbit),"=m" (ADDR)
+               :"Ir" (nr));
+       return oldbit;
+}
+
+/* WARNING: non atomic and it can be reordered! */
+static __inline__ int __test_and_change_bit(int nr, volatile void * addr)
+{
+       int oldbit;
+
+       __asm__ __volatile__(
+               "btcl %2,%1\n\tsbbl %0,%0"
+               :"=r" (oldbit),"=m" (ADDR)
+               :"Ir" (nr) : "memory");
+       return oldbit;
+}
+
+/**
+ * test_and_change_bit - Change a bit and return its new value
+ * @nr: Bit to set
+ * @addr: Address to count from
+ *
+ * This operation is atomic and cannot be reordered.  
+ * It also implies a memory barrier.
+ */
+static __inline__ int test_and_change_bit(int nr, volatile void * addr)
+{
+       int oldbit;
+
+       __asm__ __volatile__( LOCK_PREFIX
+               "btcl %2,%1\n\tsbbl %0,%0"
+               :"=r" (oldbit),"=m" (ADDR)
+               :"Ir" (nr) : "memory");
+       return oldbit;
+}
+
+#if 0 /* Fool kernel-doc since it doesn't do macros yet */
+/**
+ * test_bit - Determine whether a bit is set
+ * @nr: bit number to test
+ * @addr: Address to start counting from
+ */
+static int test_bit(int nr, const volatile void * addr);
+#endif
+
+static __inline__ int constant_test_bit(int nr, const volatile void * addr)
+{
+       return ((1UL << (nr & 31)) & (((const volatile unsigned int *) addr)[nr >> 5])) != 0;
+}
+
+static __inline__ int variable_test_bit(int nr, volatile void * addr)
+{
+       int oldbit;
+
+       __asm__ __volatile__(
+               "btl %2,%1\n\tsbbl %0,%0"
+               :"=r" (oldbit)
+               :"m" (ADDR),"Ir" (nr));
+       return oldbit;
+}
+
+#define test_bit(nr,addr) \
+(__builtin_constant_p(nr) ? \
+ constant_test_bit((nr),(addr)) : \
+ variable_test_bit((nr),(addr)))
+
+/**
+ * find_first_zero_bit - find the first zero bit in a memory region
+ * @addr: The address to start the search at
+ * @size: The maximum size to search
+ *
+ * Returns the bit-number of the first zero bit, not the number of the byte
+ * containing a bit.
+ */
+static __inline__ int find_first_zero_bit(void * addr, unsigned size)
+{
+       int d0, d1, d2;
+       int res;
+
+       if (!size)
+               return 0;
+       /* This looks at memory. Mark it volatile to tell gcc not to move it around */
+       __asm__ __volatile__(
+               "movl $-1,%%eax\n\t"
+               "xorl %%edx,%%edx\n\t"
+               "repe; scasl\n\t"
+               "je 1f\n\t"
+               "xorl -4(%%edi),%%eax\n\t"
+               "subl $4,%%edi\n\t"
+               "bsfl %%eax,%%edx\n"
+               "1:\tsubl %%ebx,%%edi\n\t"
+               "shll $3,%%edi\n\t"
+               "addl %%edi,%%edx"
+               :"=d" (res), "=&c" (d0), "=&D" (d1), "=&a" (d2)
+               :"1" ((size + 31) >> 5), "2" (addr), "b" (addr));
+       return res;
+}
+
+/**
+ * find_next_zero_bit - find the first zero bit in a memory region
+ * @addr: The address to base the search on
+ * @offset: The bitnumber to start searching at
+ * @size: The maximum size to search
+ */
+static __inline__ int find_next_zero_bit (void * addr, int size, int offset)
+{
+       unsigned long * p = ((unsigned long *) addr) + (offset >> 5);
+       int set = 0, bit = offset & 31, res;
+       
+       if (bit) {
+               /*
+                * Look for zero in first byte
+                */
+               __asm__("bsfl %1,%0\n\t"
+                       "jne 1f\n\t"
+                       "movl $32, %0\n"
+                       "1:"
+                       : "=r" (set)
+                       : "r" (~(*p >> bit)));
+               if (set < (32 - bit))
+                       return set + offset;
+               set = 32 - bit;
+               p++;
+       }
+       /*
+        * No zero yet, search remaining full bytes for a zero
+        */
+       res = find_first_zero_bit (p, size - 32 * (p - (unsigned long *) addr));
+       return (offset + set + res);
+}
+
+/**
+ * ffz - find first zero in word.
+ * @word: The word to search
+ *
+ * Undefined if no zero exists, so code should check against ~0UL first.
+ */
+static __inline__ unsigned long ffz(unsigned long word)
+{
+       __asm__("bsfl %1,%0"
+               :"=r" (word)
+               :"r" (~word));
+       return word;
+}
+
+#ifdef __KERNEL__
+
+/**
+ * ffs - find first bit set
+ * @x: the word to search
+ *
+ * This is defined the same way as
+ * the libc and compiler builtin ffs routines, therefore
+ * differs in spirit from the above ffz (man ffs).
+ */
+static __inline__ int ffs(int x)
+{
+       int r;
+
+       __asm__("bsfl %1,%0\n\t"
+               "jnz 1f\n\t"
+               "movl $-1,%0\n"
+               "1:" : "=r" (r) : "g" (x));
+       return r+1;
+}
+
+/**
+ * hweightN - returns the hamming weight of a N-bit word
+ * @x: the word to weigh
+ *
+ * The Hamming Weight of a number is the total number of bits set in it.
+ */
+
+#define hweight32(x) generic_hweight32(x)
+#define hweight16(x) generic_hweight16(x)
+#define hweight8(x) generic_hweight8(x)
+
+#endif /* __KERNEL__ */
+
+#ifdef __KERNEL__
+
+#define ext2_set_bit                 __test_and_set_bit
+#define ext2_clear_bit               __test_and_clear_bit
+#define ext2_test_bit                test_bit
+#define ext2_find_first_zero_bit     find_first_zero_bit
+#define ext2_find_next_zero_bit      find_next_zero_bit
+
+/* Bitmap functions for the minix filesystem.  */
+#define minix_test_and_set_bit(nr,addr) __test_and_set_bit(nr,addr)
+#define minix_set_bit(nr,addr) __set_bit(nr,addr)
+#define minix_test_and_clear_bit(nr,addr) __test_and_clear_bit(nr,addr)
+#define minix_test_bit(nr,addr) test_bit(nr,addr)
+#define minix_find_first_zero_bit(addr,size) find_first_zero_bit(addr,size)
+
+#endif /* __KERNEL__ */
+
+#endif /* _I386_BITOPS_H */
diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/boot.h b/xenolinux-2.4.16-sparse/include/asm-xeno/boot.h
new file mode 100644 (file)
index 0000000..96b228e
--- /dev/null
@@ -0,0 +1,15 @@
+#ifndef _LINUX_BOOT_H
+#define _LINUX_BOOT_H
+
+/* Don't touch these, unless you really know what you're doing. */
+#define DEF_INITSEG    0x9000
+#define DEF_SYSSEG     0x1000
+#define DEF_SETUPSEG   0x9020
+#define DEF_SYSSIZE    0x7F00
+
+/* Internal svga startup constants */
+#define NORMAL_VGA     0xffff          /* 80x25 mode */
+#define EXTENDED_VGA   0xfffe          /* 80x50 mode */
+#define ASK_VGA                0xfffd          /* ask for it at bootup */
+
+#endif
diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/bugs.h b/xenolinux-2.4.16-sparse/include/asm-xeno/bugs.h
new file mode 100644 (file)
index 0000000..07fee92
--- /dev/null
@@ -0,0 +1,50 @@
+/*
+ *  include/asm-i386/bugs.h
+ *
+ *  Copyright (C) 1994  Linus Torvalds
+ *
+ *  Cyrix stuff, June 1998 by:
+ *     - Rafael R. Reilova (moved everything from head.S),
+ *        <rreilova@ececs.uc.edu>
+ *     - Channing Corn (tests & fixes),
+ *     - Andrew D. Balsa (code cleanup).
+ *
+ *  Pentium III FXSR, SSE support
+ *     Gareth Hughes <gareth@valinux.com>, May 2000
+ */
+
+/*
+ * This is included by init/main.c to check for architecture-dependent bugs.
+ *
+ * Needs:
+ *     void check_bugs(void);
+ */
+
+#include <linux/config.h>
+#include <asm/processor.h>
+#include <asm/i387.h>
+#include <asm/msr.h>
+
+
+static void __init check_fpu(void)
+{
+    boot_cpu_data.fdiv_bug = 0;
+}
+
+static void __init check_hlt(void)
+{
+    boot_cpu_data.hlt_works_ok = 1;
+}
+
+static void __init check_bugs(void)
+{
+    identify_cpu(&boot_cpu_data);
+#ifndef CONFIG_SMP
+    printk("CPU: ");
+    print_cpu_info(&boot_cpu_data);
+#endif
+    check_fpu();
+    check_hlt();
+    system_utsname.machine[1] = '0' + 
+        (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86);
+}
diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/byteorder.h b/xenolinux-2.4.16-sparse/include/asm-xeno/byteorder.h
new file mode 100644 (file)
index 0000000..bbfb629
--- /dev/null
@@ -0,0 +1,47 @@
+#ifndef _I386_BYTEORDER_H
+#define _I386_BYTEORDER_H
+
+#include <asm/types.h>
+
+#ifdef __GNUC__
+
+/* For avoiding bswap on i386 */
+#ifdef __KERNEL__
+#include <linux/config.h>
+#endif
+
+static __inline__ __const__ __u32 ___arch__swab32(__u32 x)
+{
+#ifdef CONFIG_X86_BSWAP
+       __asm__("bswap %0" : "=r" (x) : "0" (x));
+#else
+       __asm__("xchgb %b0,%h0\n\t"     /* swap lower bytes     */
+               "rorl $16,%0\n\t"       /* swap words           */
+               "xchgb %b0,%h0"         /* swap higher bytes    */
+               :"=q" (x)
+               : "0" (x));
+#endif
+       return x;
+}
+
+static __inline__ __const__ __u16 ___arch__swab16(__u16 x)
+{
+       __asm__("xchgb %b0,%h0"         /* swap bytes           */ \
+               : "=q" (x) \
+               :  "0" (x)); \
+               return x;
+}
+
+#define __arch__swab32(x) ___arch__swab32(x)
+#define __arch__swab16(x) ___arch__swab16(x)
+
+#if !defined(__STRICT_ANSI__) || defined(__KERNEL__)
+#  define __BYTEORDER_HAS_U64__
+#  define __SWAB_64_THRU_32__
+#endif
+
+#endif /* __GNUC__ */
+
+#include <linux/byteorder/little_endian.h>
+
+#endif /* _I386_BYTEORDER_H */
diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/cache.h b/xenolinux-2.4.16-sparse/include/asm-xeno/cache.h
new file mode 100644 (file)
index 0000000..615911e
--- /dev/null
@@ -0,0 +1,13 @@
+/*
+ * include/asm-i386/cache.h
+ */
+#ifndef __ARCH_I386_CACHE_H
+#define __ARCH_I386_CACHE_H
+
+#include <linux/config.h>
+
+/* L1 cache line size */
+#define L1_CACHE_SHIFT (CONFIG_X86_L1_CACHE_SHIFT)
+#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT)
+
+#endif
diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/checksum.h b/xenolinux-2.4.16-sparse/include/asm-xeno/checksum.h
new file mode 100644 (file)
index 0000000..0de58ab
--- /dev/null
@@ -0,0 +1,197 @@
+#ifndef _I386_CHECKSUM_H
+#define _I386_CHECKSUM_H
+
+
+/*
+ * computes the checksum of a memory block at buff, length len,
+ * and adds in "sum" (32-bit)
+ *
+ * returns a 32-bit number suitable for feeding into itself
+ * or csum_tcpudp_magic
+ *
+ * this function must be called with even lengths, except
+ * for the last fragment, which may be odd
+ *
+ * it's best to have buff aligned on a 32-bit boundary
+ */
+asmlinkage unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum);
+
+/*
+ * the same as csum_partial, but copies from src while it
+ * checksums, and handles user-space pointer exceptions correctly, when needed.
+ *
+ * here even more important to align src and dst on a 32-bit (or even
+ * better 64-bit) boundary
+ */
+
+asmlinkage unsigned int csum_partial_copy_generic( const char *src, char *dst, int len, int sum,
+                                                  int *src_err_ptr, int *dst_err_ptr);
+
+/*
+ *     Note: when you get a NULL pointer exception here this means someone
+ *     passed in an incorrect kernel address to one of these functions. 
+ *     
+ *     If you use these functions directly please don't forget the 
+ *     verify_area().
+ */
+static __inline__
+unsigned int csum_partial_copy_nocheck ( const char *src, char *dst,
+                                       int len, int sum)
+{
+       return csum_partial_copy_generic ( src, dst, len, sum, NULL, NULL);
+}
+
+static __inline__
+unsigned int csum_partial_copy_from_user ( const char *src, char *dst,
+                                               int len, int sum, int *err_ptr)
+{
+       return csum_partial_copy_generic ( src, dst, len, sum, err_ptr, NULL);
+}
+
+/*
+ * These are the old (and unsafe) way of doing checksums, a warning message will be
+ * printed if they are used and an exeption occurs.
+ *
+ * these functions should go away after some time.
+ */
+
+#define csum_partial_copy_fromuser csum_partial_copy
+unsigned int csum_partial_copy( const char *src, char *dst, int len, int sum);
+
+/*
+ *     This is a version of ip_compute_csum() optimized for IP headers,
+ *     which always checksum on 4 octet boundaries.
+ *
+ *     By Jorge Cwik <jorge@laser.satlink.net>, adapted for linux by
+ *     Arnt Gulbrandsen.
+ */
+static inline unsigned short ip_fast_csum(unsigned char * iph,
+                                         unsigned int ihl) {
+       unsigned int sum;
+
+       __asm__ __volatile__("
+           movl (%1), %0
+           subl $4, %2
+           jbe 2f
+           addl 4(%1), %0
+           adcl 8(%1), %0
+           adcl 12(%1), %0
+1:         adcl 16(%1), %0
+           lea 4(%1), %1
+           decl %2
+           jne 1b
+           adcl $0, %0
+           movl %0, %2
+           shrl $16, %0
+           addw %w2, %w0
+           adcl $0, %0
+           notl %0
+2:
+           "
+       /* Since the input registers which are loaded with iph and ipl
+          are modified, we must also specify them as outputs, or gcc
+          will assume they contain their original values. */
+       : "=r" (sum), "=r" (iph), "=r" (ihl)
+       : "1" (iph), "2" (ihl));
+       return(sum);
+}
+
+/*
+ *     Fold a partial checksum
+ */
+
+static inline unsigned int csum_fold(unsigned int sum)
+{
+       __asm__("
+               addl %1, %0
+               adcl $0xffff, %0
+               "
+               : "=r" (sum)
+               : "r" (sum << 16), "0" (sum & 0xffff0000)
+       );
+       return (~sum) >> 16;
+}
+static inline unsigned long csum_tcpudp_nofold(unsigned long saddr,
+                                                  unsigned long daddr,
+                                                  unsigned short len,
+                                                  unsigned short proto,
+                                                  unsigned int sum) 
+{
+    __asm__("
+       addl %1, %0
+       adcl %2, %0
+       adcl %3, %0
+       adcl $0, %0
+       "
+       : "=r" (sum)
+       : "g" (daddr), "g"(saddr), "g"((ntohs(len)<<16)+proto*256), "0"(sum));
+    return sum;
+}
+
+/*
+ * computes the checksum of the TCP/UDP pseudo-header
+ * returns a 16-bit checksum, already complemented
+ */
+static inline unsigned short int csum_tcpudp_magic(unsigned long saddr,
+                                                  unsigned long daddr,
+                                                  unsigned short len,
+                                                  unsigned short proto,
+                                                  unsigned int sum) 
+{
+       return csum_fold(csum_tcpudp_nofold(saddr,daddr,len,proto,sum));
+}
+
+/*
+ * this routine is used for miscellaneous IP-like checksums, mainly
+ * in icmp.c
+ */
+
+static inline unsigned short ip_compute_csum(unsigned char * buff, int len) {
+    return csum_fold (csum_partial(buff, len, 0));
+}
+
+#define _HAVE_ARCH_IPV6_CSUM
+static __inline__ unsigned short int csum_ipv6_magic(struct in6_addr *saddr,
+                                                    struct in6_addr *daddr,
+                                                    __u32 len,
+                                                    unsigned short proto,
+                                                    unsigned int sum) 
+{
+       __asm__("
+               addl 0(%1), %0
+               adcl 4(%1), %0
+               adcl 8(%1), %0
+               adcl 12(%1), %0
+               adcl 0(%2), %0
+               adcl 4(%2), %0
+               adcl 8(%2), %0
+               adcl 12(%2), %0
+               adcl %3, %0
+               adcl %4, %0
+               adcl $0, %0
+               "
+               : "=&r" (sum)
+               : "r" (saddr), "r" (daddr), 
+                 "r"(htonl(len)), "r"(htonl(proto)), "0"(sum));
+
+       return csum_fold(sum);
+}
+
+/* 
+ *     Copy and checksum to user
+ */
+#define HAVE_CSUM_COPY_USER
+static __inline__ unsigned int csum_and_copy_to_user (const char *src, char *dst,
+                                   int len, int sum, int *err_ptr)
+{
+       if (access_ok(VERIFY_WRITE, dst, len))
+               return csum_partial_copy_generic(src, dst, len, sum, NULL, err_ptr);
+
+       if (len)
+               *err_ptr = -EFAULT;
+
+       return -1; /* invalid checksum */
+}
+
+#endif
diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/cpufeature.h b/xenolinux-2.4.16-sparse/include/asm-xeno/cpufeature.h
new file mode 100644 (file)
index 0000000..598edbd
--- /dev/null
@@ -0,0 +1,73 @@
+/*
+ * cpufeature.h
+ *
+ * Defines x86 CPU feature bits
+ */
+
+#ifndef __ASM_I386_CPUFEATURE_H
+#define __ASM_I386_CPUFEATURE_H
+
+/* Sample usage: CPU_FEATURE_P(cpu.x86_capability, FPU) */
+#define CPU_FEATURE_P(CAP, FEATURE) test_bit(CAP, X86_FEATURE_##FEATURE ##_BIT)
+
+#define NCAPINTS       4       /* Currently we have 4 32-bit words worth of info */
+
+/* Intel-defined CPU features, CPUID level 0x00000001, word 0 */
+#define X86_FEATURE_FPU                (0*32+ 0) /* Onboard FPU */
+#define X86_FEATURE_VME                (0*32+ 1) /* Virtual Mode Extensions */
+#define X86_FEATURE_DE         (0*32+ 2) /* Debugging Extensions */
+#define X86_FEATURE_PSE        (0*32+ 3) /* Page Size Extensions */
+#define X86_FEATURE_TSC                (0*32+ 4) /* Time Stamp Counter */
+#define X86_FEATURE_MSR                (0*32+ 5) /* Model-Specific Registers, RDMSR, WRMSR */
+#define X86_FEATURE_PAE                (0*32+ 6) /* Physical Address Extensions */
+#define X86_FEATURE_MCE                (0*32+ 7) /* Machine Check Architecture */
+#define X86_FEATURE_CX8                (0*32+ 8) /* CMPXCHG8 instruction */
+#define X86_FEATURE_APIC       (0*32+ 9) /* Onboard APIC */
+#define X86_FEATURE_SEP                (0*32+11) /* SYSENTER/SYSEXIT */
+#define X86_FEATURE_MTRR       (0*32+12) /* Memory Type Range Registers */
+#define X86_FEATURE_PGE                (0*32+13) /* Page Global Enable */
+#define X86_FEATURE_MCA                (0*32+14) /* Machine Check Architecture */
+#define X86_FEATURE_CMOV       (0*32+15) /* CMOV instruction (FCMOVCC and FCOMI too if FPU present) */
+#define X86_FEATURE_PAT                (0*32+16) /* Page Attribute Table */
+#define X86_FEATURE_PSE36      (0*32+17) /* 36-bit PSEs */
+#define X86_FEATURE_PN         (0*32+18) /* Processor serial number */
+#define X86_FEATURE_CLFLSH     (0*32+19) /* Supports the CLFLUSH instruction */
+#define X86_FEATURE_DTES       (0*32+21) /* Debug Trace Store */
+#define X86_FEATURE_ACPI       (0*32+22) /* ACPI via MSR */
+#define X86_FEATURE_MMX                (0*32+23) /* Multimedia Extensions */
+#define X86_FEATURE_FXSR       (0*32+24) /* FXSAVE and FXRSTOR instructions (fast save and restore */
+                                         /* of FPU context), and CR4.OSFXSR available */
+#define X86_FEATURE_XMM                (0*32+25) /* Streaming SIMD Extensions */
+#define X86_FEATURE_XMM2       (0*32+26) /* Streaming SIMD Extensions-2 */
+#define X86_FEATURE_SELFSNOOP  (0*32+27) /* CPU self snoop */
+#define X86_FEATURE_ACC                (0*32+29) /* Automatic clock control */
+#define X86_FEATURE_IA64       (0*32+30) /* IA-64 processor */
+
+/* AMD-defined CPU features, CPUID level 0x80000001, word 1 */
+/* Don't duplicate feature flags which are redundant with Intel! */
+#define X86_FEATURE_SYSCALL    (1*32+11) /* SYSCALL/SYSRET */
+#define X86_FEATURE_MMXEXT     (1*32+22) /* AMD MMX extensions */
+#define X86_FEATURE_LM         (1*32+29) /* Long Mode (x86-64) */
+#define X86_FEATURE_3DNOWEXT   (1*32+30) /* AMD 3DNow! extensions */
+#define X86_FEATURE_3DNOW      (1*32+31) /* 3DNow! */
+
+/* Transmeta-defined CPU features, CPUID level 0x80860001, word 2 */
+#define X86_FEATURE_RECOVERY   (2*32+ 0) /* CPU in recovery mode */
+#define X86_FEATURE_LONGRUN    (2*32+ 1) /* Longrun power control */
+#define X86_FEATURE_LRTI       (2*32+ 3) /* LongRun table interface */
+
+/* Other features, Linux-defined mapping, word 3 */
+/* This range is used for feature bits which conflict or are synthesized */
+#define X86_FEATURE_CXMMX      (3*32+ 0) /* Cyrix MMX extensions */
+#define X86_FEATURE_K6_MTRR    (3*32+ 1) /* AMD K6 nonstandard MTRRs */
+#define X86_FEATURE_CYRIX_ARR  (3*32+ 2) /* Cyrix ARRs (= MTRRs) */
+#define X86_FEATURE_CENTAUR_MCR        (3*32+ 3) /* Centaur MCRs (= MTRRs) */
+
+#endif /* __ASM_I386_CPUFEATURE_H */
+
+/* 
+ * Local Variables:
+ * mode:c
+ * comment-column:42
+ * End:
+ */
diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/current.h b/xenolinux-2.4.16-sparse/include/asm-xeno/current.h
new file mode 100644 (file)
index 0000000..bc1496a
--- /dev/null
@@ -0,0 +1,15 @@
+#ifndef _I386_CURRENT_H
+#define _I386_CURRENT_H
+
+struct task_struct;
+
+static inline struct task_struct * get_current(void)
+{
+       struct task_struct *current;
+       __asm__("andl %%esp,%0; ":"=r" (current) : "0" (~8191UL));
+       return current;
+ }
+#define current get_current()
+
+#endif /* !(_I386_CURRENT_H) */
diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/debugreg.h b/xenolinux-2.4.16-sparse/include/asm-xeno/debugreg.h
new file mode 100644 (file)
index 0000000..f0b2b06
--- /dev/null
@@ -0,0 +1,64 @@
+#ifndef _I386_DEBUGREG_H
+#define _I386_DEBUGREG_H
+
+
+/* Indicate the register numbers for a number of the specific
+   debug registers.  Registers 0-3 contain the addresses we wish to trap on */
+#define DR_FIRSTADDR 0        /* u_debugreg[DR_FIRSTADDR] */
+#define DR_LASTADDR 3         /* u_debugreg[DR_LASTADDR]  */
+
+#define DR_STATUS 6           /* u_debugreg[DR_STATUS]     */
+#define DR_CONTROL 7          /* u_debugreg[DR_CONTROL] */
+
+/* Define a few things for the status register.  We can use this to determine
+   which debugging register was responsible for the trap.  The other bits
+   are either reserved or not of interest to us. */
+
+#define DR_TRAP0       (0x1)           /* db0 */
+#define DR_TRAP1       (0x2)           /* db1 */
+#define DR_TRAP2       (0x4)           /* db2 */
+#define DR_TRAP3       (0x8)           /* db3 */
+
+#define DR_STEP                (0x4000)        /* single-step */
+#define DR_SWITCH      (0x8000)        /* task switch */
+
+/* Now define a bunch of things for manipulating the control register.
+   The top two bytes of the control register consist of 4 fields of 4
+   bits - each field corresponds to one of the four debug registers,
+   and indicates what types of access we trap on, and how large the data
+   field is that we are looking at */
+
+#define DR_CONTROL_SHIFT 16 /* Skip this many bits in ctl register */
+#define DR_CONTROL_SIZE 4   /* 4 control bits per register */
+
+#define DR_RW_EXECUTE (0x0)   /* Settings for the access types to trap on */
+#define DR_RW_WRITE (0x1)
+#define DR_RW_READ (0x3)
+
+#define DR_LEN_1 (0x0) /* Settings for data length to trap on */
+#define DR_LEN_2 (0x4)
+#define DR_LEN_4 (0xC)
+
+/* The low byte to the control register determine which registers are
+   enabled.  There are 4 fields of two bits.  One bit is "local", meaning
+   that the processor will reset the bit after a task switch and the other
+   is global meaning that we have to explicitly reset the bit.  With linux,
+   you can use either one, since we explicitly zero the register when we enter
+   kernel mode. */
+
+#define DR_LOCAL_ENABLE_SHIFT 0    /* Extra shift to the local enable bit */
+#define DR_GLOBAL_ENABLE_SHIFT 1   /* Extra shift to the global enable bit */
+#define DR_ENABLE_SIZE 2           /* 2 enable bits per register */
+
+#define DR_LOCAL_ENABLE_MASK (0x55)  /* Set  local bits for all 4 regs */
+#define DR_GLOBAL_ENABLE_MASK (0xAA) /* Set global bits for all 4 regs */
+
+/* The second byte to the control register has a few special things.
+   We can slow the instruction pipeline for instructions coming via the
+   gdt or the ldt if we want to.  I am not sure why this is an advantage */
+
+#define DR_CONTROL_RESERVED (0xFC00) /* Reserved by Intel */
+#define DR_LOCAL_SLOWDOWN (0x100)   /* Local slow the pipeline */
+#define DR_GLOBAL_SLOWDOWN (0x200)  /* Global slow the pipeline */
+
+#endif
diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/delay.h b/xenolinux-2.4.16-sparse/include/asm-xeno/delay.h
new file mode 100644 (file)
index 0000000..c7d2184
--- /dev/null
@@ -0,0 +1,20 @@
+#ifndef _I386_DELAY_H
+#define _I386_DELAY_H
+
+/*
+ * Copyright (C) 1993 Linus Torvalds
+ *
+ * Delay routines calling functions in arch/i386/lib/delay.c
+ */
+extern void __bad_udelay(void);
+
+extern void __udelay(unsigned long usecs);
+extern void __const_udelay(unsigned long usecs);
+extern void __delay(unsigned long loops);
+
+#define udelay(n) (__builtin_constant_p(n) ? \
+       ((n) > 20000 ? __bad_udelay() : __const_udelay((n) * 0x10c6ul)) : \
+       __udelay(n))
+
+#endif /* defined(_I386_DELAY_H) */
diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/desc.h b/xenolinux-2.4.16-sparse/include/asm-xeno/desc.h
new file mode 100644 (file)
index 0000000..1920de0
--- /dev/null
@@ -0,0 +1,11 @@
+#ifndef __ARCH_DESC_H
+#define __ARCH_DESC_H
+
+#include <asm/ldt.h>
+
+#define __LDT(_X)     (0)
+
+#define clear_LDT()   ((void)0)
+#define load_LDT(_mm) ((void)0)
+
+#endif
diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/div64.h b/xenolinux-2.4.16-sparse/include/asm-xeno/div64.h
new file mode 100644 (file)
index 0000000..ef915df
--- /dev/null
@@ -0,0 +1,17 @@
+#ifndef __I386_DIV64
+#define __I386_DIV64
+
+#define do_div(n,base) ({ \
+       unsigned long __upper, __low, __high, __mod; \
+       asm("":"=a" (__low), "=d" (__high):"A" (n)); \
+       __upper = __high; \
+       if (__high) { \
+               __upper = __high % (base); \
+               __high = __high / (base); \
+       } \
+       asm("divl %2":"=a" (__low), "=d" (__mod):"rm" (base), "0" (__low), "1" (__upper)); \
+       asm("":"=A" (n):"a" (__low),"d" (__high)); \
+       __mod; \
+})
+
+#endif
diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/dma.h b/xenolinux-2.4.16-sparse/include/asm-xeno/dma.h
new file mode 100644 (file)
index 0000000..1bc9899
--- /dev/null
@@ -0,0 +1,298 @@
+/* $Id: dma.h,v 1.7 1992/12/14 00:29:34 root Exp root $
+ * linux/include/asm/dma.h: Defines for using and allocating dma channels.
+ * Written by Hennus Bergman, 1992.
+ * High DMA channel support & info by Hannu Savolainen
+ * and John Boyd, Nov. 1992.
+ */
+
+#ifndef _ASM_DMA_H
+#define _ASM_DMA_H
+
+#include <linux/config.h>
+#include <linux/spinlock.h>    /* And spinlocks */
+#include <asm/io.h>            /* need byte IO */
+#include <linux/delay.h>
+
+
+#ifdef HAVE_REALLY_SLOW_DMA_CONTROLLER
+#define dma_outb       outb_p
+#else
+#define dma_outb       outb
+#endif
+
+#define dma_inb                inb
+
+/*
+ * NOTES about DMA transfers:
+ *
+ *  controller 1: channels 0-3, byte operations, ports 00-1F
+ *  controller 2: channels 4-7, word operations, ports C0-DF
+ *
+ *  - ALL registers are 8 bits only, regardless of transfer size
+ *  - channel 4 is not used - cascades 1 into 2.
+ *  - channels 0-3 are byte - addresses/counts are for physical bytes
+ *  - channels 5-7 are word - addresses/counts are for physical words
+ *  - transfers must not cross physical 64K (0-3) or 128K (5-7) boundaries
+ *  - transfer count loaded to registers is 1 less than actual count
+ *  - controller 2 offsets are all even (2x offsets for controller 1)
+ *  - page registers for 5-7 don't use data bit 0, represent 128K pages
+ *  - page registers for 0-3 use bit 0, represent 64K pages
+ *
+ * DMA transfers are limited to the lower 16MB of _physical_ memory.  
+ * Note that addresses loaded into registers must be _physical_ addresses,
+ * not logical addresses (which may differ if paging is active).
+ *
+ *  Address mapping for channels 0-3:
+ *
+ *   A23 ... A16 A15 ... A8  A7 ... A0    (Physical addresses)
+ *    |  ...  |   |  ... |   |  ... |
+ *    |  ...  |   |  ... |   |  ... |
+ *    |  ...  |   |  ... |   |  ... |
+ *   P7  ...  P0  A7 ... A0  A7 ... A0   
+ * |    Page    | Addr MSB | Addr LSB |   (DMA registers)
+ *
+ *  Address mapping for channels 5-7:
+ *
+ *   A23 ... A17 A16 A15 ... A9 A8 A7 ... A1 A0    (Physical addresses)
+ *    |  ...  |   \   \   ... \  \  \  ... \  \
+ *    |  ...  |    \   \   ... \  \  \  ... \  (not used)
+ *    |  ...  |     \   \   ... \  \  \  ... \
+ *   P7  ...  P1 (0) A7 A6  ... A0 A7 A6 ... A0   
+ * |      Page      |  Addr MSB   |  Addr LSB  |   (DMA registers)
+ *
+ * Again, channels 5-7 transfer _physical_ words (16 bits), so addresses
+ * and counts _must_ be word-aligned (the lowest address bit is _ignored_ at
+ * the hardware level, so odd-byte transfers aren't possible).
+ *
+ * Transfer count (_not # bytes_) is limited to 64K, represented as actual
+ * count - 1 : 64K => 0xFFFF, 1 => 0x0000.  Thus, count is always 1 or more,
+ * and up to 128K bytes may be transferred on channels 5-7 in one operation. 
+ *
+ */
+
+#define MAX_DMA_CHANNELS       8
+
+/* The maximum address that we can perform a DMA transfer to on this platform */
+#define MAX_DMA_ADDRESS      (PAGE_OFFSET+0x1000000)
+
+/* 8237 DMA controllers */
+#define IO_DMA1_BASE   0x00    /* 8 bit slave DMA, channels 0..3 */
+#define IO_DMA2_BASE   0xC0    /* 16 bit master DMA, ch 4(=slave input)..7 */
+
+/* DMA controller registers */
+#define DMA1_CMD_REG           0x08    /* command register (w) */
+#define DMA1_STAT_REG          0x08    /* status register (r) */
+#define DMA1_REQ_REG            0x09    /* request register (w) */
+#define DMA1_MASK_REG          0x0A    /* single-channel mask (w) */
+#define DMA1_MODE_REG          0x0B    /* mode register (w) */
+#define DMA1_CLEAR_FF_REG      0x0C    /* clear pointer flip-flop (w) */
+#define DMA1_TEMP_REG           0x0D    /* Temporary Register (r) */
+#define DMA1_RESET_REG         0x0D    /* Master Clear (w) */
+#define DMA1_CLR_MASK_REG       0x0E    /* Clear Mask */
+#define DMA1_MASK_ALL_REG       0x0F    /* all-channels mask (w) */
+
+#define DMA2_CMD_REG           0xD0    /* command register (w) */
+#define DMA2_STAT_REG          0xD0    /* status register (r) */
+#define DMA2_REQ_REG            0xD2    /* request register (w) */
+#define DMA2_MASK_REG          0xD4    /* single-channel mask (w) */
+#define DMA2_MODE_REG          0xD6    /* mode register (w) */
+#define DMA2_CLEAR_FF_REG      0xD8    /* clear pointer flip-flop (w) */
+#define DMA2_TEMP_REG           0xDA    /* Temporary Register (r) */
+#define DMA2_RESET_REG         0xDA    /* Master Clear (w) */
+#define DMA2_CLR_MASK_REG       0xDC    /* Clear Mask */
+#define DMA2_MASK_ALL_REG       0xDE    /* all-channels mask (w) */
+
+#define DMA_ADDR_0              0x00    /* DMA address registers */
+#define DMA_ADDR_1              0x02
+#define DMA_ADDR_2              0x04
+#define DMA_ADDR_3              0x06
+#define DMA_ADDR_4              0xC0
+#define DMA_ADDR_5              0xC4
+#define DMA_ADDR_6              0xC8
+#define DMA_ADDR_7              0xCC
+
+#define DMA_CNT_0               0x01    /* DMA count registers */
+#define DMA_CNT_1               0x03
+#define DMA_CNT_2               0x05
+#define DMA_CNT_3               0x07
+#define DMA_CNT_4               0xC2
+#define DMA_CNT_5               0xC6
+#define DMA_CNT_6               0xCA
+#define DMA_CNT_7               0xCE
+
+#define DMA_PAGE_0              0x87    /* DMA page registers */
+#define DMA_PAGE_1              0x83
+#define DMA_PAGE_2              0x81
+#define DMA_PAGE_3              0x82
+#define DMA_PAGE_5              0x8B
+#define DMA_PAGE_6              0x89
+#define DMA_PAGE_7              0x8A
+
+#define DMA_MODE_READ  0x44    /* I/O to memory, no autoinit, increment, single mode */
+#define DMA_MODE_WRITE 0x48    /* memory to I/O, no autoinit, increment, single mode */
+#define DMA_MODE_CASCADE 0xC0   /* pass thru DREQ->HRQ, DACK<-HLDA only */
+
+#define DMA_AUTOINIT   0x10
+
+
+extern spinlock_t  dma_spin_lock;
+
+static __inline__ unsigned long claim_dma_lock(void)
+{
+       unsigned long flags;
+       spin_lock_irqsave(&dma_spin_lock, flags);
+       return flags;
+}
+
+static __inline__ void release_dma_lock(unsigned long flags)
+{
+       spin_unlock_irqrestore(&dma_spin_lock, flags);
+}
+
+/* enable/disable a specific DMA channel */
+static __inline__ void enable_dma(unsigned int dmanr)
+{
+       if (dmanr<=3)
+               dma_outb(dmanr,  DMA1_MASK_REG);
+       else
+               dma_outb(dmanr & 3,  DMA2_MASK_REG);
+}
+
+static __inline__ void disable_dma(unsigned int dmanr)
+{
+       if (dmanr<=3)
+               dma_outb(dmanr | 4,  DMA1_MASK_REG);
+       else
+               dma_outb((dmanr & 3) | 4,  DMA2_MASK_REG);
+}
+
+/* Clear the 'DMA Pointer Flip Flop'.
+ * Write 0 for LSB/MSB, 1 for MSB/LSB access.
+ * Use this once to initialize the FF to a known state.
+ * After that, keep track of it. :-)
+ * --- In order to do that, the DMA routines below should ---
+ * --- only be used while holding the DMA lock ! ---
+ */
+static __inline__ void clear_dma_ff(unsigned int dmanr)
+{
+       if (dmanr<=3)
+               dma_outb(0,  DMA1_CLEAR_FF_REG);
+       else
+               dma_outb(0,  DMA2_CLEAR_FF_REG);
+}
+
+/* set mode (above) for a specific DMA channel */
+static __inline__ void set_dma_mode(unsigned int dmanr, char mode)
+{
+       if (dmanr<=3)
+               dma_outb(mode | dmanr,  DMA1_MODE_REG);
+       else
+               dma_outb(mode | (dmanr&3),  DMA2_MODE_REG);
+}
+
+/* Set only the page register bits of the transfer address.
+ * This is used for successive transfers when we know the contents of
+ * the lower 16 bits of the DMA current address register, but a 64k boundary
+ * may have been crossed.
+ */
+static __inline__ void set_dma_page(unsigned int dmanr, char pagenr)
+{
+       switch(dmanr) {
+               case 0:
+                       dma_outb(pagenr, DMA_PAGE_0);
+                       break;
+               case 1:
+                       dma_outb(pagenr, DMA_PAGE_1);
+                       break;
+               case 2:
+                       dma_outb(pagenr, DMA_PAGE_2);
+                       break;
+               case 3:
+                       dma_outb(pagenr, DMA_PAGE_3);
+                       break;
+               case 5:
+                       dma_outb(pagenr & 0xfe, DMA_PAGE_5);
+                       break;
+               case 6:
+                       dma_outb(pagenr & 0xfe, DMA_PAGE_6);
+                       break;
+               case 7:
+                       dma_outb(pagenr & 0xfe, DMA_PAGE_7);
+                       break;
+       }
+}
+
+
+/* Set transfer address & page bits for specific DMA channel.
+ * Assumes dma flipflop is clear.
+ */
+static __inline__ void set_dma_addr(unsigned int dmanr, unsigned int a)
+{
+       set_dma_page(dmanr, a>>16);
+       if (dmanr <= 3)  {
+           dma_outb( a & 0xff, ((dmanr&3)<<1) + IO_DMA1_BASE );
+            dma_outb( (a>>8) & 0xff, ((dmanr&3)<<1) + IO_DMA1_BASE );
+       }  else  {
+           dma_outb( (a>>1) & 0xff, ((dmanr&3)<<2) + IO_DMA2_BASE );
+           dma_outb( (a>>9) & 0xff, ((dmanr&3)<<2) + IO_DMA2_BASE );
+       }
+}
+
+
+/* Set transfer size (max 64k for DMA1..3, 128k for DMA5..7) for
+ * a specific DMA channel.
+ * You must ensure the parameters are valid.
+ * NOTE: from a manual: "the number of transfers is one more
+ * than the initial word count"! This is taken into account.
+ * Assumes dma flip-flop is clear.
+ * NOTE 2: "count" represents _bytes_ and must be even for channels 5-7.
+ */
+static __inline__ void set_dma_count(unsigned int dmanr, unsigned int count)
+{
+        count--;
+       if (dmanr <= 3)  {
+           dma_outb( count & 0xff, ((dmanr&3)<<1) + 1 + IO_DMA1_BASE );
+           dma_outb( (count>>8) & 0xff, ((dmanr&3)<<1) + 1 + IO_DMA1_BASE );
+        } else {
+           dma_outb( (count>>1) & 0xff, ((dmanr&3)<<2) + 2 + IO_DMA2_BASE );
+           dma_outb( (count>>9) & 0xff, ((dmanr&3)<<2) + 2 + IO_DMA2_BASE );
+        }
+}
+
+
+/* Get DMA residue count. After a DMA transfer, this
+ * should return zero. Reading this while a DMA transfer is
+ * still in progress will return unpredictable results.
+ * If called before the channel has been used, it may return 1.
+ * Otherwise, it returns the number of _bytes_ left to transfer.
+ *
+ * Assumes DMA flip-flop is clear.
+ */
+static __inline__ int get_dma_residue(unsigned int dmanr)
+{
+       unsigned int io_port = (dmanr<=3)? ((dmanr&3)<<1) + 1 + IO_DMA1_BASE
+                                        : ((dmanr&3)<<2) + 2 + IO_DMA2_BASE;
+
+       /* using short to get 16-bit wrap around */
+       unsigned short count;
+
+       count = 1 + dma_inb(io_port);
+       count += dma_inb(io_port) << 8;
+       
+       return (dmanr<=3)? count : (count<<1);
+}
+
+
+/* These are in kernel/dma.c: */
+extern int request_dma(unsigned int dmanr, const char * device_id);    /* reserve a DMA channel */
+extern void free_dma(unsigned int dmanr);      /* release it again */
+
+/* From PCI */
+
+#ifdef CONFIG_PCI
+extern int isa_dma_bridge_buggy;
+#else
+#define isa_dma_bridge_buggy   (0)
+#endif
+
+#endif /* _ASM_DMA_H */
diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/elf.h b/xenolinux-2.4.16-sparse/include/asm-xeno/elf.h
new file mode 100644 (file)
index 0000000..9b14bcf
--- /dev/null
@@ -0,0 +1,104 @@
+#ifndef __ASMi386_ELF_H
+#define __ASMi386_ELF_H
+
+/*
+ * ELF register definitions..
+ */
+
+#include <asm/ptrace.h>
+#include <asm/user.h>
+
+#include <linux/utsname.h>
+
+typedef unsigned long elf_greg_t;
+
+#define ELF_NGREG (sizeof (struct user_regs_struct) / sizeof(elf_greg_t))
+typedef elf_greg_t elf_gregset_t[ELF_NGREG];
+
+typedef struct user_i387_struct elf_fpregset_t;
+typedef struct user_fxsr_struct elf_fpxregset_t;
+
+/*
+ * This is used to ensure we don't load something for the wrong architecture.
+ */
+#define elf_check_arch(x) \
+       (((x)->e_machine == EM_386) || ((x)->e_machine == EM_486))
+
+/*
+ * These are used to set parameters in the core dumps.
+ */
+#define ELF_CLASS      ELFCLASS32
+#define ELF_DATA       ELFDATA2LSB
+#define ELF_ARCH       EM_386
+
+/* SVR4/i386 ABI (pages 3-31, 3-32) says that when the program starts %edx
+   contains a pointer to a function which might be registered using `atexit'.
+   This provides a mean for the dynamic linker to call DT_FINI functions for
+   shared libraries that have been loaded before the code runs.
+
+   A value of 0 tells we have no such handler. 
+
+   We might as well make sure everything else is cleared too (except for %esp),
+   just to make things more deterministic.
+ */
+#define ELF_PLAT_INIT(_r)      do { \
+       _r->ebx = 0; _r->ecx = 0; _r->edx = 0; \
+       _r->esi = 0; _r->edi = 0; _r->ebp = 0; \
+       _r->eax = 0; \
+} while (0)
+
+#define USE_ELF_CORE_DUMP
+#define ELF_EXEC_PAGESIZE      4096
+
+/* This is the location that an ET_DYN program is loaded if exec'ed.  Typical
+   use of this is to invoke "./ld.so someprog" to test out a new version of
+   the loader.  We need to make sure that it is out of the way of the program
+   that it will "exec", and that there is sufficient room for the brk.  */
+
+#define ELF_ET_DYN_BASE         (TASK_SIZE / 3 * 2)
+
+/* Wow, the "main" arch needs arch dependent functions too.. :) */
+
+/* regs is struct pt_regs, pr_reg is elf_gregset_t (which is
+   now struct_user_regs, they are different) */
+
+#define ELF_CORE_COPY_REGS(pr_reg, regs)               \
+       pr_reg[0] = regs->ebx;                          \
+       pr_reg[1] = regs->ecx;                          \
+       pr_reg[2] = regs->edx;                          \
+       pr_reg[3] = regs->esi;                          \
+       pr_reg[4] = regs->edi;                          \
+       pr_reg[5] = regs->ebp;                          \
+       pr_reg[6] = regs->eax;                          \
+       pr_reg[7] = regs->xds;                          \
+       pr_reg[8] = regs->xes;                          \
+       /* fake once used fs and gs selectors? */       \
+       pr_reg[9] = regs->xds;  /* was fs and __fs */   \
+       pr_reg[10] = regs->xds; /* was gs and __gs */   \
+       pr_reg[11] = regs->orig_eax;                    \
+       pr_reg[12] = regs->eip;                         \
+       pr_reg[13] = regs->xcs;                         \
+       pr_reg[14] = regs->eflags;                      \
+       pr_reg[15] = regs->esp;                         \
+       pr_reg[16] = regs->xss;
+
+/* This yields a mask that user programs can use to figure out what
+   instruction set this CPU supports.  This could be done in user space,
+   but it's not easy, and we've already done it here.  */
+
+#define ELF_HWCAP      (boot_cpu_data.x86_capability[0])
+
+/* This yields a string that ld.so will use to load implementation
+   specific libraries for optimization.  This is more specific in
+   intent than poking at uname or /proc/cpuinfo.
+
+   For the moment, we have only optimizations for the Intel generations,
+   but that could change... */
+
+#define ELF_PLATFORM  (system_utsname.machine)
+
+#ifdef __KERNEL__
+#define SET_PERSONALITY(ex, ibcs2) set_personality((ibcs2)?PER_SVR4:PER_LINUX)
+#endif
+
+#endif
diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/errno.h b/xenolinux-2.4.16-sparse/include/asm-xeno/errno.h
new file mode 100644 (file)
index 0000000..7cf599f
--- /dev/null
@@ -0,0 +1,132 @@
+#ifndef _I386_ERRNO_H
+#define _I386_ERRNO_H
+
+#define        EPERM            1      /* Operation not permitted */
+#define        ENOENT           2      /* No such file or directory */
+#define        ESRCH            3      /* No such process */
+#define        EINTR            4      /* Interrupted system call */
+#define        EIO              5      /* I/O error */
+#define        ENXIO            6      /* No such device or address */
+#define        E2BIG            7      /* Arg list too long */
+#define        ENOEXEC          8      /* Exec format error */
+#define        EBADF            9      /* Bad file number */
+#define        ECHILD          10      /* No child processes */
+#define        EAGAIN          11      /* Try again */
+#define        ENOMEM          12      /* Out of memory */
+#define        EACCES          13      /* Permission denied */
+#define        EFAULT          14      /* Bad address */
+#define        ENOTBLK         15      /* Block device required */
+#define        EBUSY           16      /* Device or resource busy */
+#define        EEXIST          17      /* File exists */
+#define        EXDEV           18      /* Cross-device link */
+#define        ENODEV          19      /* No such device */
+#define        ENOTDIR         20      /* Not a directory */
+#define        EISDIR          21      /* Is a directory */
+#define        EINVAL          22      /* Invalid argument */
+#define        ENFILE          23      /* File table overflow */
+#define        EMFILE          24      /* Too many open files */
+#define        ENOTTY          25      /* Not a typewriter */
+#define        ETXTBSY         26      /* Text file busy */
+#define        EFBIG           27      /* File too large */
+#define        ENOSPC          28      /* No space left on device */
+#define        ESPIPE          29      /* Illegal seek */
+#define        EROFS           30      /* Read-only file system */
+#define        EMLINK          31      /* Too many links */
+#define        EPIPE           32      /* Broken pipe */
+#define        EDOM            33      /* Math argument out of domain of func */
+#define        ERANGE          34      /* Math result not representable */
+#define        EDEADLK         35      /* Resource deadlock would occur */
+#define        ENAMETOOLONG    36      /* File name too long */
+#define        ENOLCK          37      /* No record locks available */
+#define        ENOSYS          38      /* Function not implemented */
+#define        ENOTEMPTY       39      /* Directory not empty */
+#define        ELOOP           40      /* Too many symbolic links encountered */
+#define        EWOULDBLOCK     EAGAIN  /* Operation would block */
+#define        ENOMSG          42      /* No message of desired type */
+#define        EIDRM           43      /* Identifier removed */
+#define        ECHRNG          44      /* Channel number out of range */
+#define        EL2NSYNC        45      /* Level 2 not synchronized */
+#define        EL3HLT          46      /* Level 3 halted */
+#define        EL3RST          47      /* Level 3 reset */
+#define        ELNRNG          48      /* Link number out of range */
+#define        EUNATCH         49      /* Protocol driver not attached */
+#define        ENOCSI          50      /* No CSI structure available */
+#define        EL2HLT          51      /* Level 2 halted */
+#define        EBADE           52      /* Invalid exchange */
+#define        EBADR           53      /* Invalid request descriptor */
+#define        EXFULL          54      /* Exchange full */
+#define        ENOANO          55      /* No anode */
+#define        EBADRQC         56      /* Invalid request code */
+#define        EBADSLT         57      /* Invalid slot */
+
+#define        EDEADLOCK       EDEADLK
+
+#define        EBFONT          59      /* Bad font file format */
+#define        ENOSTR          60      /* Device not a stream */
+#define        ENODATA         61      /* No data available */
+#define        ETIME           62      /* Timer expired */
+#define        ENOSR           63      /* Out of streams resources */
+#define        ENONET          64      /* Machine is not on the network */
+#define        ENOPKG          65      /* Package not installed */
+#define        EREMOTE         66      /* Object is remote */
+#define        ENOLINK         67      /* Link has been severed */
+#define        EADV            68      /* Advertise error */
+#define        ESRMNT          69      /* Srmount error */
+#define        ECOMM           70      /* Communication error on send */
+#define        EPROTO          71      /* Protocol error */
+#define        EMULTIHOP       72      /* Multihop attempted */
+#define        EDOTDOT         73      /* RFS specific error */
+#define        EBADMSG         74      /* Not a data message */
+#define        EOVERFLOW       75      /* Value too large for defined data type */
+#define        ENOTUNIQ        76      /* Name not unique on network */
+#define        EBADFD          77      /* File descriptor in bad state */
+#define        EREMCHG         78      /* Remote address changed */
+#define        ELIBACC         79      /* Can not access a needed shared library */
+#define        ELIBBAD         80      /* Accessing a corrupted shared library */
+#define        ELIBSCN         81      /* .lib section in a.out corrupted */
+#define        ELIBMAX         82      /* Attempting to link in too many shared libraries */
+#define        ELIBEXEC        83      /* Cannot exec a shared library directly */
+#define        EILSEQ          84      /* Illegal byte sequence */
+#define        ERESTART        85      /* Interrupted system call should be restarted */
+#define        ESTRPIPE        86      /* Streams pipe error */
+#define        EUSERS          87      /* Too many users */
+#define        ENOTSOCK        88      /* Socket operation on non-socket */
+#define        EDESTADDRREQ    89      /* Destination address required */
+#define        EMSGSIZE        90      /* Message too long */
+#define        EPROTOTYPE      91      /* Protocol wrong type for socket */
+#define        ENOPROTOOPT     92      /* Protocol not available */
+#define        EPROTONOSUPPORT 93      /* Protocol not supported */
+#define        ESOCKTNOSUPPORT 94      /* Socket type not supported */
+#define        EOPNOTSUPP      95      /* Operation not supported on transport endpoint */
+#define        EPFNOSUPPORT    96      /* Protocol family not supported */
+#define        EAFNOSUPPORT    97      /* Address family not supported by protocol */
+#define        EADDRINUSE      98      /* Address already in use */
+#define        EADDRNOTAVAIL   99      /* Cannot assign requested address */
+#define        ENETDOWN        100     /* Network is down */
+#define        ENETUNREACH     101     /* Network is unreachable */
+#define        ENETRESET       102     /* Network dropped connection because of reset */
+#define        ECONNABORTED    103     /* Software caused connection abort */
+#define        ECONNRESET      104     /* Connection reset by peer */
+#define        ENOBUFS         105     /* No buffer space available */
+#define        EISCONN         106     /* Transport endpoint is already connected */
+#define        ENOTCONN        107     /* Transport endpoint is not connected */
+#define        ESHUTDOWN       108     /* Cannot send after transport endpoint shutdown */
+#define        ETOOMANYREFS    109     /* Too many references: cannot splice */
+#define        ETIMEDOUT       110     /* Connection timed out */
+#define        ECONNREFUSED    111     /* Connection refused */
+#define        EHOSTDOWN       112     /* Host is down */
+#define        EHOSTUNREACH    113     /* No route to host */
+#define        EALREADY        114     /* Operation already in progress */
+#define        EINPROGRESS     115     /* Operation now in progress */
+#define        ESTALE          116     /* Stale NFS file handle */
+#define        EUCLEAN         117     /* Structure needs cleaning */
+#define        ENOTNAM         118     /* Not a XENIX named type file */
+#define        ENAVAIL         119     /* No XENIX semaphores available */
+#define        EISNAM          120     /* Is a named type file */
+#define        EREMOTEIO       121     /* Remote I/O error */
+#define        EDQUOT          122     /* Quota exceeded */
+
+#define        ENOMEDIUM       123     /* No medium found */
+#define        EMEDIUMTYPE     124     /* Wrong medium type */
+
+#endif
diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/fcntl.h b/xenolinux-2.4.16-sparse/include/asm-xeno/fcntl.h
new file mode 100644 (file)
index 0000000..41e3c4d
--- /dev/null
@@ -0,0 +1,87 @@
+#ifndef _I386_FCNTL_H
+#define _I386_FCNTL_H
+
+/* open/fcntl - O_SYNC is only implemented on blocks devices and on files
+   located on an ext2 file system */
+#define O_ACCMODE         0003
+#define O_RDONLY            00
+#define O_WRONLY            01
+#define O_RDWR              02
+#define O_CREAT                   0100 /* not fcntl */
+#define O_EXCL            0200 /* not fcntl */
+#define O_NOCTTY          0400 /* not fcntl */
+#define O_TRUNC                  01000 /* not fcntl */
+#define O_APPEND         02000
+#define O_NONBLOCK       04000
+#define O_NDELAY       O_NONBLOCK
+#define O_SYNC          010000
+#define FASYNC          020000 /* fcntl, for BSD compatibility */
+#define O_DIRECT        040000 /* direct disk access hint */
+#define O_LARGEFILE    0100000
+#define O_DIRECTORY    0200000 /* must be a directory */
+#define O_NOFOLLOW     0400000 /* don't follow links */
+
+#define F_DUPFD                0       /* dup */
+#define F_GETFD                1       /* get close_on_exec */
+#define F_SETFD                2       /* set/clear close_on_exec */
+#define F_GETFL                3       /* get file->f_flags */
+#define F_SETFL                4       /* set file->f_flags */
+#define F_GETLK                5
+#define F_SETLK                6
+#define F_SETLKW       7
+
+#define F_SETOWN       8       /*  for sockets. */
+#define F_GETOWN       9       /*  for sockets. */
+#define F_SETSIG       10      /*  for sockets. */
+#define F_GETSIG       11      /*  for sockets. */
+
+#define F_GETLK64      12      /*  using 'struct flock64' */
+#define F_SETLK64      13
+#define F_SETLKW64     14
+
+/* for F_[GET|SET]FL */
+#define FD_CLOEXEC     1       /* actually anything with low bit set goes */
+
+/* for posix fcntl() and lockf() */
+#define F_RDLCK                0
+#define F_WRLCK                1
+#define F_UNLCK                2
+
+/* for old implementation of bsd flock () */
+#define F_EXLCK                4       /* or 3 */
+#define F_SHLCK                8       /* or 4 */
+
+/* for leases */
+#define F_INPROGRESS   16
+
+/* operations for bsd flock(), also used by the kernel implementation */
+#define LOCK_SH                1       /* shared lock */
+#define LOCK_EX                2       /* exclusive lock */
+#define LOCK_NB                4       /* or'd with one of the above to prevent
+                                  blocking */
+#define LOCK_UN                8       /* remove lock */
+
+#define LOCK_MAND      32      /* This is a mandatory flock */
+#define LOCK_READ      64      /* ... Which allows concurrent read operations */
+#define LOCK_WRITE     128     /* ... Which allows concurrent write operations */
+#define LOCK_RW                192     /* ... Which allows concurrent read & write ops */
+
+struct flock {
+       short l_type;
+       short l_whence;
+       off_t l_start;
+       off_t l_len;
+       pid_t l_pid;
+};
+
+struct flock64 {
+       short  l_type;
+       short  l_whence;
+       loff_t l_start;
+       loff_t l_len;
+       pid_t  l_pid;
+};
+
+#define F_LINUX_SPECIFIC_BASE  1024
+
+#endif
diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/fixmap.h b/xenolinux-2.4.16-sparse/include/asm-xeno/fixmap.h
new file mode 100644 (file)
index 0000000..381e9e9
--- /dev/null
@@ -0,0 +1,118 @@
+/*
+ * fixmap.h: compile-time virtual memory allocation
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1998 Ingo Molnar
+ *
+ * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
+ */
+
+#ifndef _ASM_FIXMAP_H
+#define _ASM_FIXMAP_H
+
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <asm/apicdef.h>
+#include <asm/page.h>
+#ifdef CONFIG_HIGHMEM
+#include <linux/threads.h>
+#include <asm/kmap_types.h>
+#endif
+
+/*
+ * Here we define all the compile-time 'special' virtual
+ * addresses. The point is to have a constant address at
+ * compile time, but to set the physical address only
+ * in the boot process. We allocate these special  addresses
+ * from the end of virtual memory (0xfffff000) backwards.
+ * Also this lets us do fail-safe vmalloc(), we
+ * can guarantee that these special addresses and
+ * vmalloc()-ed addresses never overlap.
+ *
+ * these 'compile-time allocated' memory buffers are
+ * fixed-size 4k pages. (or larger if used with an increment
+ * highger than 1) use fixmap_set(idx,phys) to associate
+ * physical memory with fixmap indices.
+ *
+ * TLB entries of such buffers will not be flushed across
+ * task switches.
+ */
+
+/*
+ * on UP currently we will have no trace of the fixmap mechanizm,
+ * no page table allocations, etc. This might change in the
+ * future, say framebuffers for the console driver(s) could be
+ * fix-mapped?
+ */
+enum fixed_addresses {
+#ifdef CONFIG_X86_LOCAL_APIC
+       FIX_APIC_BASE,  /* local (CPU) APIC) -- required for SMP or not */
+#endif
+#ifdef CONFIG_X86_IO_APIC
+       FIX_IO_APIC_BASE_0,
+       FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS-1,
+#endif
+#ifdef CONFIG_X86_VISWS_APIC
+       FIX_CO_CPU,     /* Cobalt timer */
+       FIX_CO_APIC,    /* Cobalt APIC Redirection Table */ 
+       FIX_LI_PCIA,    /* Lithium PCI Bridge A */
+       FIX_LI_PCIB,    /* Lithium PCI Bridge B */
+#endif
+#ifdef CONFIG_HIGHMEM
+       FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */
+       FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1,
+#endif
+       __end_of_fixed_addresses
+};
+
+extern void __set_fixmap (enum fixed_addresses idx,
+                                       unsigned long phys, pgprot_t flags);
+
+#define set_fixmap(idx, phys) \
+               __set_fixmap(idx, phys, PAGE_KERNEL)
+/*
+ * Some hardware wants to get fixmapped without caching.
+ */
+#define set_fixmap_nocache(idx, phys) \
+               __set_fixmap(idx, phys, PAGE_KERNEL_NOCACHE)
+/*
+ * used by vmalloc.c.
+ *
+ * Leave one empty page between vmalloc'ed areas and
+ * the start of the fixmap, and leave one page empty
+ * at the top of mem..
+ */
+#define FIXADDR_TOP    (0xffffe000UL)
+#define FIXADDR_SIZE   (__end_of_fixed_addresses << PAGE_SHIFT)
+#define FIXADDR_START  (FIXADDR_TOP - FIXADDR_SIZE)
+
+#define __fix_to_virt(x)       (FIXADDR_TOP - ((x) << PAGE_SHIFT))
+
+extern void __this_fixmap_does_not_exist(void);
+
+/*
+ * 'index to address' translation. If anyone tries to use the idx
+ * directly without tranlation, we catch the bug with a NULL-deference
+ * kernel oops. Illegal ranges of incoming indices are caught too.
+ */
+static inline unsigned long fix_to_virt(const unsigned int idx)
+{
+       /*
+        * this branch gets completely eliminated after inlining,
+        * except when someone tries to use fixaddr indices in an
+        * illegal way. (such as mixing up address types or using
+        * out-of-range indices).
+        *
+        * If it doesn't get removed, the linker will complain
+        * loudly with a reasonably clear error message..
+        */
+       if (idx >= __end_of_fixed_addresses)
+               __this_fixmap_does_not_exist();
+
+        return __fix_to_virt(idx);
+}
+
+#endif
diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/floppy.h b/xenolinux-2.4.16-sparse/include/asm-xeno/floppy.h
new file mode 100644 (file)
index 0000000..62f9fd8
--- /dev/null
@@ -0,0 +1,320 @@
+/*
+ * Architecture specific parts of the Floppy driver
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1995
+ */
+#ifndef __ASM_I386_FLOPPY_H
+#define __ASM_I386_FLOPPY_H
+
+#include <linux/vmalloc.h>
+
+
+/*
+ * The DMA channel used by the floppy controller cannot access data at
+ * addresses >= 16MB
+ *
+ * Went back to the 1MB limit, as some people had problems with the floppy
+ * driver otherwise. It doesn't matter much for performance anyway, as most
+ * floppy accesses go through the track buffer.
+ */
+#define _CROSS_64KB(a,s,vdma) \
+(!vdma && ((unsigned long)(a)/K_64 != ((unsigned long)(a) + (s) - 1) / K_64))
+
+#define CROSS_64KB(a,s) _CROSS_64KB(a,s,use_virtual_dma & 1)
+
+
+#define SW fd_routine[use_virtual_dma&1]
+#define CSW fd_routine[can_use_virtual_dma & 1]
+
+
+#define fd_inb(port)                   inb_p(port)
+#define fd_outb(port,value)            outb_p(port,value)
+
+#define fd_request_dma()        CSW._request_dma(FLOPPY_DMA,"floppy")
+#define fd_free_dma()           CSW._free_dma(FLOPPY_DMA)
+#define fd_enable_irq()         enable_irq(FLOPPY_IRQ)
+#define fd_disable_irq()        disable_irq(FLOPPY_IRQ)
+#define fd_free_irq()          free_irq(FLOPPY_IRQ, NULL)
+#define fd_get_dma_residue()    SW._get_dma_residue(FLOPPY_DMA)
+#define fd_dma_mem_alloc(size) SW._dma_mem_alloc(size)
+#define fd_dma_setup(addr, size, mode, io) SW._dma_setup(addr, size, mode, io)
+
+#define FLOPPY_CAN_FALLBACK_ON_NODMA
+
+static int virtual_dma_count;
+static int virtual_dma_residue;
+static char *virtual_dma_addr;
+static int virtual_dma_mode;
+static int doing_pdma;
+
+static void floppy_hardint(int irq, void *dev_id, struct pt_regs * regs)
+{
+       register unsigned char st;
+
+#undef TRACE_FLPY_INT
+#define NO_FLOPPY_ASSEMBLER
+
+#ifdef TRACE_FLPY_INT
+       static int calls=0;
+       static int bytes=0;
+       static int dma_wait=0;
+#endif
+       if(!doing_pdma) {
+               floppy_interrupt(irq, dev_id, regs);
+               return;
+       }
+
+#ifdef TRACE_FLPY_INT
+       if(!calls)
+               bytes = virtual_dma_count;
+#endif
+
+#ifndef NO_FLOPPY_ASSEMBLER
+       __asm__ (
+       "testl %1,%1
+       je 3f
+1:     inb %w4,%b0
+       andb $160,%b0
+       cmpb $160,%b0
+       jne 2f
+       incw %w4
+       testl %3,%3
+       jne 4f
+       inb %w4,%b0
+       movb %0,(%2)
+       jmp 5f
+4:             movb (%2),%0
+       outb %b0,%w4
+5:     decw %w4
+       outb %0,$0x80
+       decl %1
+       incl %2
+       testl %1,%1
+       jne 1b
+3:     inb %w4,%b0
+2:     "
+       : "=a" ((char) st), 
+       "=c" ((long) virtual_dma_count), 
+       "=S" ((long) virtual_dma_addr)
+       : "b" ((long) virtual_dma_mode),
+       "d" ((short) virtual_dma_port+4), 
+       "1" ((long) virtual_dma_count),
+       "2" ((long) virtual_dma_addr));
+#else  
+       {
+               register int lcount;
+               register char *lptr;
+
+               st = 1;
+               for(lcount=virtual_dma_count, lptr=virtual_dma_addr; 
+                   lcount; lcount--, lptr++) {
+                       st=inb(virtual_dma_port+4) & 0xa0 ;
+                       if(st != 0xa0) 
+                               break;
+                       if(virtual_dma_mode)
+                               outb_p(*lptr, virtual_dma_port+5);
+                       else
+                               *lptr = inb_p(virtual_dma_port+5);
+               }
+               virtual_dma_count = lcount;
+               virtual_dma_addr = lptr;
+               st = inb(virtual_dma_port+4);
+       }
+#endif
+
+#ifdef TRACE_FLPY_INT
+       calls++;
+#endif
+       if(st == 0x20)
+               return;
+       if(!(st & 0x20)) {
+               virtual_dma_residue += virtual_dma_count;
+               virtual_dma_count=0;
+#ifdef TRACE_FLPY_INT
+               printk("count=%x, residue=%x calls=%d bytes=%d dma_wait=%d\n", 
+                      virtual_dma_count, virtual_dma_residue, calls, bytes,
+                      dma_wait);
+               calls = 0;
+               dma_wait=0;
+#endif
+               doing_pdma = 0;
+               floppy_interrupt(irq, dev_id, regs);
+               return;
+       }
+#ifdef TRACE_FLPY_INT
+       if(!virtual_dma_count)
+               dma_wait++;
+#endif
+}
+
+static void fd_disable_dma(void)
+{
+       if(! (can_use_virtual_dma & 1))
+               disable_dma(FLOPPY_DMA);
+       doing_pdma = 0;
+       virtual_dma_residue += virtual_dma_count;
+       virtual_dma_count=0;
+}
+
+static int vdma_request_dma(unsigned int dmanr, const char * device_id)
+{
+       return 0;
+}
+
+static void vdma_nop(unsigned int dummy)
+{
+}
+
+
+static int vdma_get_dma_residue(unsigned int dummy)
+{
+       return virtual_dma_count + virtual_dma_residue;
+}
+
+
+static int fd_request_irq(void)
+{
+       if(can_use_virtual_dma)
+               return request_irq(FLOPPY_IRQ, floppy_hardint,SA_INTERRUPT,
+                                                  "floppy", NULL);
+       else
+               return request_irq(FLOPPY_IRQ, floppy_interrupt,
+                                                  SA_INTERRUPT|SA_SAMPLE_RANDOM,
+                                                  "floppy", NULL);     
+
+}
+
+static unsigned long dma_mem_alloc(unsigned long size)
+{
+       return __get_dma_pages(GFP_KERNEL,get_order(size));
+}
+
+
+static unsigned long vdma_mem_alloc(unsigned long size)
+{
+       return (unsigned long) vmalloc(size);
+
+}
+
+#define nodma_mem_alloc(size) vdma_mem_alloc(size)
+
+static void _fd_dma_mem_free(unsigned long addr, unsigned long size)
+{
+       if((unsigned int) addr >= (unsigned int) high_memory)
+               return vfree((void *)addr);
+       else
+               free_pages(addr, get_order(size));              
+}
+
+#define fd_dma_mem_free(addr, size)  _fd_dma_mem_free(addr, size) 
+
+static void _fd_chose_dma_mode(char *addr, unsigned long size)
+{
+       if(can_use_virtual_dma == 2) {
+               if((unsigned int) addr >= (unsigned int) high_memory ||
+                  virt_to_bus(addr) >= 0x1000000 ||
+                  _CROSS_64KB(addr, size, 0))
+                       use_virtual_dma = 1;
+               else
+                       use_virtual_dma = 0;
+       } else {
+               use_virtual_dma = can_use_virtual_dma & 1;
+       }
+}
+
+#define fd_chose_dma_mode(addr, size) _fd_chose_dma_mode(addr, size)
+
+
+static int vdma_dma_setup(char *addr, unsigned long size, int mode, int io)
+{
+       doing_pdma = 1;
+       virtual_dma_port = io;
+       virtual_dma_mode = (mode  == DMA_MODE_WRITE);
+       virtual_dma_addr = addr;
+       virtual_dma_count = size;
+       virtual_dma_residue = 0;
+       return 0;
+}
+
+static int hard_dma_setup(char *addr, unsigned long size, int mode, int io)
+{
+#ifdef FLOPPY_SANITY_CHECK
+       if (CROSS_64KB(addr, size)) {
+               printk("DMA crossing 64-K boundary %p-%p\n", addr, addr+size);
+               return -1;
+       }
+#endif
+       /* actual, physical DMA */
+       doing_pdma = 0;
+       clear_dma_ff(FLOPPY_DMA);
+       set_dma_mode(FLOPPY_DMA,mode);
+       set_dma_addr(FLOPPY_DMA,virt_to_bus(addr));
+       set_dma_count(FLOPPY_DMA,size);
+       enable_dma(FLOPPY_DMA);
+       return 0;
+}
+
+struct fd_routine_l {
+       int (*_request_dma)(unsigned int dmanr, const char * device_id);
+       void (*_free_dma)(unsigned int dmanr);
+       int (*_get_dma_residue)(unsigned int dummy);
+       unsigned long (*_dma_mem_alloc) (unsigned long size);
+       int (*_dma_setup)(char *addr, unsigned long size, int mode, int io);
+} fd_routine[] = {
+       {
+               request_dma,
+               free_dma,
+               get_dma_residue,
+               dma_mem_alloc,
+               hard_dma_setup
+       },
+       {
+               vdma_request_dma,
+               vdma_nop,
+               vdma_get_dma_residue,
+               vdma_mem_alloc,
+               vdma_dma_setup
+       }
+};
+
+
+static int FDC1 = 0x3f0;
+static int FDC2 = -1;
+
+/*
+ * Floppy types are stored in the rtc's CMOS RAM and so rtc_lock
+ * is needed to prevent corrupted CMOS RAM in case "insmod floppy"
+ * coincides with another rtc CMOS user.               Paul G.
+ */
+#define FLOPPY0_TYPE   ({                              \
+       unsigned long flags;                            \
+       unsigned char val;                              \
+       spin_lock_irqsave(&rtc_lock, flags);            \
+       val = (CMOS_READ(0x10) >> 4) & 15;              \
+       spin_unlock_irqrestore(&rtc_lock, flags);       \
+       val;                                            \
+})
+
+#define FLOPPY1_TYPE   ({                              \
+       unsigned long flags;                            \
+       unsigned char val;                              \
+       spin_lock_irqsave(&rtc_lock, flags);            \
+       val = CMOS_READ(0x10) & 15;                     \
+       spin_unlock_irqrestore(&rtc_lock, flags);       \
+       val;                                            \
+})
+
+#define N_FDC 2
+#define N_DRIVE 8
+
+#define FLOPPY_MOTOR_MASK 0xf0
+
+#define AUTO_DMA
+
+#define EXTRA_FLOPPY_PARAMS
+
+#endif /* __ASM_I386_FLOPPY_H */
diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/hardirq.h b/xenolinux-2.4.16-sparse/include/asm-xeno/hardirq.h
new file mode 100644 (file)
index 0000000..4acb4b0
--- /dev/null
@@ -0,0 +1,91 @@
+#ifndef __ASM_HARDIRQ_H
+#define __ASM_HARDIRQ_H
+
+#include <linux/config.h>
+#include <linux/threads.h>
+#include <linux/irq.h>
+
+/* assembly code in softirq.h is sensitive to the offsets of these fields */
+typedef struct {
+       unsigned int __softirq_pending;
+       unsigned int __local_irq_count;
+       unsigned int __local_bh_count;
+       unsigned int __syscall_count;
+       struct task_struct * __ksoftirqd_task; /* waitqueue is too large */
+       unsigned int __nmi_count;       /* arch dependent */
+} ____cacheline_aligned irq_cpustat_t;
+
+#include <linux/irq_cpustat.h> /* Standard mappings for irq_cpustat_t above */
+
+/*
+ * Are we in an interrupt context? Either doing bottom half
+ * or hardware interrupt processing?
+ */
+#define in_interrupt() ({ int __cpu = smp_processor_id(); \
+       (local_irq_count(__cpu) + local_bh_count(__cpu) != 0); })
+
+#define in_irq() (local_irq_count(smp_processor_id()) != 0)
+
+#ifndef CONFIG_SMP
+
+#define hardirq_trylock(cpu)   (local_irq_count(cpu) == 0)
+#define hardirq_endlock(cpu)   do { } while (0)
+
+#define irq_enter(cpu, irq)    (local_irq_count(cpu)++)
+#define irq_exit(cpu, irq)     (local_irq_count(cpu)--)
+
+#define synchronize_irq()      barrier()
+
+#else
+
+#include <asm/atomic.h>
+#include <asm/smp.h>
+
+extern unsigned char global_irq_holder;
+extern unsigned volatile long global_irq_lock; /* long for set_bit -RR */
+
+static inline int irqs_running (void)
+{
+       int i;
+
+       for (i = 0; i < smp_num_cpus; i++)
+               if (local_irq_count(i))
+                       return 1;
+       return 0;
+}
+
+static inline void release_irqlock(int cpu)
+{
+       /* if we didn't own the irq lock, just ignore.. */
+       if (global_irq_holder == (unsigned char) cpu) {
+               global_irq_holder = NO_PROC_ID;
+               clear_bit(0,&global_irq_lock);
+       }
+}
+
+static inline void irq_enter(int cpu, int irq)
+{
+       ++local_irq_count(cpu);
+
+       while (test_bit(0,&global_irq_lock)) {
+               cpu_relax();
+       }
+}
+
+static inline void irq_exit(int cpu, int irq)
+{
+       --local_irq_count(cpu);
+}
+
+static inline int hardirq_trylock(int cpu)
+{
+       return !local_irq_count(cpu) && !test_bit(0,&global_irq_lock);
+}
+
+#define hardirq_endlock(cpu)   do { } while (0)
+
+extern void synchronize_irq(void);
+
+#endif /* CONFIG_SMP */
+
+#endif /* __ASM_HARDIRQ_H */
diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/hdreg.h b/xenolinux-2.4.16-sparse/include/asm-xeno/hdreg.h
new file mode 100644 (file)
index 0000000..1ad5c07
--- /dev/null
@@ -0,0 +1,12 @@
+/*
+ *  linux/include/asm-i386/hdreg.h
+ *
+ *  Copyright (C) 1994-1996  Linus Torvalds & authors
+ */
+
+#ifndef __ASMi386_HDREG_H
+#define __ASMi386_HDREG_H
+
+typedef unsigned short ide_ioreg_t;
+
+#endif /* __ASMi386_HDREG_H */
diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/highmem.h b/xenolinux-2.4.16-sparse/include/asm-xeno/highmem.h
new file mode 100644 (file)
index 0000000..42f3242
--- /dev/null
@@ -0,0 +1,129 @@
+/*
+ * highmem.h: virtual kernel memory mappings for high memory
+ *
+ * Used in CONFIG_HIGHMEM systems for memory pages which
+ * are not addressable by direct kernel virtual addresses.
+ *
+ * Copyright (C) 1999 Gerhard Wichert, Siemens AG
+ *                   Gerhard.Wichert@pdb.siemens.de
+ *
+ *
+ * Redesigned the x86 32-bit VM architecture to deal with 
+ * up to 16 Terabyte physical memory. With current x86 CPUs
+ * we now support up to 64 Gigabytes physical RAM.
+ *
+ * Copyright (C) 1999 Ingo Molnar <mingo@redhat.com>
+ */
+
+#ifndef _ASM_HIGHMEM_H
+#define _ASM_HIGHMEM_H
+
+#ifdef __KERNEL__
+
+#include <linux/config.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <asm/kmap_types.h>
+#include <asm/pgtable.h>
+
+#ifdef CONFIG_DEBUG_HIGHMEM
+#define HIGHMEM_DEBUG 1
+#else
+#define HIGHMEM_DEBUG 0
+#endif
+
+/* declarations for highmem.c */
+extern unsigned long highstart_pfn, highend_pfn;
+
+extern pte_t *kmap_pte;
+extern pgprot_t kmap_prot;
+extern pte_t *pkmap_page_table;
+
+extern void kmap_init(void) __init;
+
+/*
+ * Right now we initialize only a single pte table. It can be extended
+ * easily, subsequent pte tables have to be allocated in one physical
+ * chunk of RAM.
+ */
+#define PKMAP_BASE (0xfe000000UL)
+#ifdef CONFIG_X86_PAE
+#define LAST_PKMAP 512
+#else
+#define LAST_PKMAP 1024
+#endif
+#define LAST_PKMAP_MASK (LAST_PKMAP-1)
+#define PKMAP_NR(virt)  ((virt-PKMAP_BASE) >> PAGE_SHIFT)
+#define PKMAP_ADDR(nr)  (PKMAP_BASE + ((nr) << PAGE_SHIFT))
+
+extern void * FASTCALL(kmap_high(struct page *page));
+extern void FASTCALL(kunmap_high(struct page *page));
+
+static inline void *kmap(struct page *page)
+{
+       if (in_interrupt())
+               BUG();
+       if (page < highmem_start_page)
+               return page_address(page);
+       return kmap_high(page);
+}
+
+static inline void kunmap(struct page *page)
+{
+       if (in_interrupt())
+               BUG();
+       if (page < highmem_start_page)
+               return;
+       kunmap_high(page);
+}
+
+/*
+ * The use of kmap_atomic/kunmap_atomic is discouraged - kmap/kunmap
+ * gives a more generic (and caching) interface. But kmap_atomic can
+ * be used in IRQ contexts, so in some (very limited) cases we need
+ * it.
+ */
+static inline void *kmap_atomic(struct page *page, enum km_type type)
+{
+       enum fixed_addresses idx;
+       unsigned long vaddr;
+
+       if (page < highmem_start_page)
+               return page_address(page);
+
+       idx = type + KM_TYPE_NR*smp_processor_id();
+       vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
+#if HIGHMEM_DEBUG
+       if (!pte_none(*(kmap_pte-idx)))
+               BUG();
+#endif
+       set_pte(kmap_pte-idx, mk_pte(page, kmap_prot));
+       __flush_tlb_one(vaddr);
+
+       return (void*) vaddr;
+}
+
+static inline void kunmap_atomic(void *kvaddr, enum km_type type)
+{
+#if HIGHMEM_DEBUG
+       unsigned long vaddr = (unsigned long) kvaddr;
+       enum fixed_addresses idx = type + KM_TYPE_NR*smp_processor_id();
+
+       if (vaddr < FIXADDR_START) // FIXME
+               return;
+
+       if (vaddr != __fix_to_virt(FIX_KMAP_BEGIN+idx))
+               BUG();
+
+       /*
+        * force other mappings to Oops if they'll try to access
+        * this pte without first remap it
+        */
+       pte_clear(kmap_pte-idx);
+       __flush_tlb_one(vaddr);
+#endif
+}
+
+#endif /* __KERNEL__ */
+
+#endif /* _ASM_HIGHMEM_H */
diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/hw_irq.h b/xenolinux-2.4.16-sparse/include/asm-xeno/hw_irq.h
new file mode 100644 (file)
index 0000000..a7303f0
--- /dev/null
@@ -0,0 +1,62 @@
+#ifndef _ASM_HW_IRQ_H
+#define _ASM_HW_IRQ_H
+
+/*
+ *     linux/include/asm/hw_irq.h
+ *
+ *     (C) 1992, 1993 Linus Torvalds, (C) 1997 Ingo Molnar
+ */
+
+#include <linux/config.h>
+#include <linux/smp.h>
+#include <asm/atomic.h>
+#include <asm/irq.h>
+
+#define SYSCALL_VECTOR         0x80
+#define KDBENTER_VECTOR                0x81
+
+extern int irq_vector[NR_IRQS];
+
+extern atomic_t irq_err_count;
+extern atomic_t irq_mis_count;
+
+extern char _stext, _etext;
+
+extern unsigned long prof_cpu_mask;
+extern unsigned int * prof_buffer;
+extern unsigned long prof_len;
+extern unsigned long prof_shift;
+
+/*
+ * x86 profiling function, SMP safe. We might want to do this in
+ * assembly totally?
+ */
+static inline void x86_do_profile (unsigned long eip)
+{
+        if (!prof_buffer)
+                return;
+
+        /*
+         * Only measure the CPUs specified by /proc/irq/prof_cpu_mask.
+         * (default is all CPUs.)
+         */
+        if (!((1<<smp_processor_id()) & prof_cpu_mask))
+                return;
+
+        eip -= (unsigned long) &_stext;
+        eip >>= prof_shift;
+        /*
+         * Don't ignore out-of-bounds EIP values silently,
+         * put them into the last histogram slot, so if
+         * present, they will show up as a sharp peak.
+         */
+        if (eip > prof_len-1)
+                eip = prof_len-1;
+        atomic_inc((atomic_t *)&prof_buffer[eip]);
+}
+
+static inline void hw_resend_irq(struct hw_interrupt_type *h,
+                                 unsigned int i)
+{}
+
+#endif /* _ASM_HW_IRQ_H */
diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/hypervisor-ifs/block.h b/xenolinux-2.4.16-sparse/include/asm-xeno/hypervisor-ifs/block.h
new file mode 100644 (file)
index 0000000..55f7a33
--- /dev/null
@@ -0,0 +1,40 @@
+/******************************************************************************
+ * block.h
+ *
+ * Block IO communication rings.
+ *
+ * These are the ring data structures for buffering messages between 
+ * the hypervisor and guestos's.  
+ *
+ * For now we'll start with our own rings for the block IO code instead
+ * of using the network rings.  Hopefully, this will give us additional
+ * flexibility in the future should we choose to move away from a 
+ * ring producer consumer communication model.
+ */
+
+#ifndef __BLOCK_H__
+#define __BLOCK_H__
+
+typedef struct blk_tx_entry_st {
+       unsigned long addr; /* virtual address */
+       unsigned long size; /* in bytes */
+} blk_tx_entry_t;
+
+typedef struct blk_rx_entry_st {
+       unsigned long addr; /* virtual address */
+       unsigned long size; /* in bytes */
+} blk_rx_entry_t;
+
+typedef struct blk_ring_st {
+       blk_tx_entry_t  *tx_ring;
+       unsigned int    tx_prod, tx_cons, tx_event;
+       unsigned int    tx_ring_size;
+
+       blk_rx_entry_t  *rx_ring;
+       unsigned int    rx_prod, rx_cons, rx_event;
+       unsigned int    rx_ring_size;
+} blk_ring_t;
+
+int blk_create_ring(int domain, unsigned long ptr);
+
+#endif
diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/hypervisor-ifs/hypervisor-if.h b/xenolinux-2.4.16-sparse/include/asm-xeno/hypervisor-ifs/hypervisor-if.h
new file mode 100644 (file)
index 0000000..6a362d3
--- /dev/null
@@ -0,0 +1,271 @@
+/******************************************************************************
+ * hypervisor-if.h
+ * 
+ * Interface to Xeno hypervisor.
+ */
+
+#include <asm/hypervisor-ifs/network.h>
+#include <asm/hypervisor-ifs/block.h>
+
+#ifndef __HYPERVISOR_IF_H__
+#define __HYPERVISOR_IF_H__
+
+typedef struct trap_info_st
+{
+    unsigned char  vector;  /* exception/interrupt vector */
+    unsigned char  dpl;     /* privilege level            */
+    unsigned short cs;      /* code selector              */
+    unsigned long  address; /* code address               */
+} trap_info_t;
+
+
+typedef struct
+{
+#define PGREQ_ADD_BASEPTR    0
+#define PGREQ_REMOVE_BASEPTR 1
+    unsigned long ptr, val; /* *ptr = val */
+} page_update_request_t;
+
+
+/* EAX = vector; EBX, ECX, EDX, ESI, EDI = args 1, 2, 3, 4, 5. */
+
+#define __HYPERVISOR_set_trap_table  0
+#define __HYPERVISOR_pt_update       1
+#define __HYPERVISOR_console_write   2
+#define __HYPERVISOR_set_pagetable   3
+#define __HYPERVISOR_set_guest_stack 4
+#define __HYPERVISOR_net_update      5
+#define __HYPERVISOR_fpu_taskswitch  6
+#define __HYPERVISOR_yield           7
+#define __HYPERVISOR_exit            8
+#define __HYPERVISOR_dom0_op         9
+#define __HYPERVISOR_network_op     10
+
+#define TRAP_INSTR "int $0x82"
+
+
+static inline int HYPERVISOR_set_trap_table(trap_info_t *table)
+{
+    int ret;
+    __asm__ __volatile__ (
+        TRAP_INSTR
+        : "=a" (ret) : "0" (__HYPERVISOR_set_trap_table),
+        "b" (table) );
+
+    return ret;
+}
+
+
+static inline int HYPERVISOR_pt_update(page_update_request_t *req, int count)
+{
+    int ret;
+    __asm__ __volatile__ (
+        TRAP_INSTR
+        : "=a" (ret) : "0" (__HYPERVISOR_pt_update), 
+        "b" (req), "c" (count) );
+
+    return ret;
+}
+
+
+static inline int HYPERVISOR_console_write(const char *str, int count)
+{
+    int ret;
+    __asm__ __volatile__ (
+        TRAP_INSTR
+        : "=a" (ret) : "0" (__HYPERVISOR_console_write), 
+        "b" (str), "c" (count) );
+
+
+    return ret;
+}
+
+static inline int HYPERVISOR_set_pagetable(unsigned long ptr)
+{
+    int ret;
+    __asm__ __volatile__ (
+        TRAP_INSTR
+        : "=a" (ret) : "0" (__HYPERVISOR_set_pagetable),
+        "b" (ptr) );
+
+    return ret;
+}
+
+static inline int HYPERVISOR_set_guest_stack(
+    unsigned long ss, unsigned long esp)
+{
+    int ret;
+    __asm__ __volatile__ (
+        TRAP_INSTR
+        : "=a" (ret) : "0" (__HYPERVISOR_set_guest_stack),
+        "b" (ss), "c" (esp) );
+
+    return ret;
+}
+
+static inline int HYPERVISOR_net_update(void)
+{
+    int ret;
+    __asm__ __volatile__ (
+        TRAP_INSTR
+        : "=a" (ret) : "0" (__HYPERVISOR_net_update) );
+
+    return ret;
+}
+
+static inline int HYPERVISOR_fpu_taskswitch(void)
+{
+    int ret;
+    __asm__ __volatile__ (
+        TRAP_INSTR
+        : "=a" (ret) : "0" (__HYPERVISOR_fpu_taskswitch) );
+
+    return ret;
+}
+
+static inline int HYPERVISOR_yield(void)
+{
+    int ret;
+    __asm__ __volatile__ (
+        TRAP_INSTR
+        : "=a" (ret) : "0" (__HYPERVISOR_yield) );
+
+    return ret;
+}
+
+static inline int HYPERVISOR_exit(void)
+{
+    int ret;
+    __asm__ __volatile__ (
+        TRAP_INSTR
+        : "=a" (ret) : "0" (__HYPERVISOR_exit) );
+
+    return ret;
+}
+
+static inline int HYPERVISOR_dom0_op(void *dom0_op)
+{
+    int ret;
+    __asm__ __volatile__ (
+        TRAP_INSTR
+        : "=a" (ret) : "0" (__HYPERVISOR_dom0_op),
+        "b" (dom0_op) );
+
+    return ret;
+}
+
+static inline int HYPERVISOR_network_op(void *network_op)
+{
+    int ret;
+    __asm__ __volatile__ (
+        TRAP_INSTR
+        : "=a" (ret) : "0" (__HYPERVISOR_network_op),
+        "b" (network_op) );
+
+    return ret;
+}
+
+/* Events that a guest OS may receive from the hypervisor. */
+#define EVENT_NET_TX  0x01 /* packets for transmission. */
+#define EVENT_NET_RX  0x02 /* empty buffers for receive. */
+#define EVENT_TIMER   0x04 /* a timeout has been updated. */
+#define EVENT_DIE     0x08 /* OS is about to be killed. Clean up please! */
+#define EVENT_BLK_TX  0x10 /* packets for transmission. */
+#define EVENT_BLK_RX  0x20 /* empty buffers for receive. */
+
+/* Bit offsets, as opposed to the above masks. */
+#define _EVENT_NET_TX 0
+#define _EVENT_NET_RX 1
+#define _EVENT_TIMER  2
+#define _EVENT_DIE    3
+#define _EVENT_BLK_TX 4
+#define _EVENT_BLK_RX 5
+
+/*
+ * NB. We expect that this struct is smaller than a page.
+ */
+typedef struct shared_info_st {
+
+    /* Bitmask of outstanding event notifications hypervisor -> guest OS. */
+    unsigned long events;
+    /*
+     * Hypervisor will only signal event delivery via the "callback
+     * exception" when this value is non-zero. Hypervisor clears this when
+     * notiying the guest OS -- thsi prevents unbounded reentrancy and
+     * stack overflow (in this way, acts as an interrupt-enable flag).
+     */
+    unsigned long events_enable;
+
+    /*
+     * Address for callbacks hypervisor -> guest OS.
+     * Stack frame looks like that of an interrupt.
+     * Code segment is the default flat selector.
+     * This handler will only be called when events_enable is non-zero.
+     */
+    unsigned long event_address;
+
+    /*
+     * Hypervisor uses this callback when it takes a fault on behalf of
+     * an application. This can happen when returning from interrupts for
+     * example: various faults can occur when reloading the segment
+     * registers, and executing 'iret'.
+     * This callback is provided with an extended stack frame, augmented
+     * with saved values for segment registers %ds and %es:
+     *  %ds, %es, %eip, %cs, %eflags [, %oldesp, %oldss]
+     * Code segment is the default flat selector.
+     * FAULTS WHEN CALLING THIS HANDLER WILL TERMINATE THE DOMAIN!!!
+     */
+    unsigned long failsafe_address;
+
+    /*
+     * CPU ticks since start of day.
+     * `wall_time' counts CPU ticks in real time.
+     * `domain_time' counts CPU ticks during which this domain has run.
+     */
+    unsigned long ticks_per_ms; /* CPU ticks per millisecond */
+    /*
+     * Current wall_time can be found by rdtsc. Only possible use of
+     * variable below is that it provides a timestamp for last update
+     * of domain_time.
+     */
+    unsigned long long wall_time;
+    unsigned long long domain_time;
+
+    /*
+     * Timeouts for points at which guest OS would like a callback.
+     * This will probably be backed up by a timer heap in the guest OS.
+     * In Linux we use timeouts to update 'jiffies'.
+     */
+    unsigned long long wall_timeout;
+    unsigned long long domain_timeout;
+
+    /*
+     * Real-Time Clock. This shows time, in seconds, since 1.1.1980.
+     * The timestamp shows the CPU 'wall time' when RTC was last read.
+     * Thus it allows a mapping between 'real time' and 'wall time'.
+     */
+    unsigned long      rtc_time;
+    unsigned long long rtc_timestamp;
+
+} shared_info_t;
+
+/*
+ * NB. We expect that this struct is smaller than a page.
+ */
+typedef struct start_info_st {
+    unsigned long nr_pages;       /* total pages allocated to this domain */
+    shared_info_t *shared_info;   /* start address of shared info struct */
+    unsigned long  pt_base;       /* address of page directory */
+    unsigned long phys_base;
+    unsigned long mod_start;      /* start address of pre-loaded module */
+    unsigned long mod_len;        /* size (bytes) of pre-loaded module */
+    net_ring_t *net_rings;
+    int num_net_rings;
+    blk_ring_t *blk_ring;         /* block io communication rings */
+    unsigned char cmd_line[1];    /* variable-length */
+} start_info_t;
+
+/* For use in guest OSes. */
+extern shared_info_t *HYPERVISOR_shared_info;
+
+#endif /* __HYPERVISOR_IF_H__ */
diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/hypervisor-ifs/network.h b/xenolinux-2.4.16-sparse/include/asm-xeno/hypervisor-ifs/network.h
new file mode 100644 (file)
index 0000000..c10bef0
--- /dev/null
@@ -0,0 +1,129 @@
+/******************************************************************************
+ * network.h
+ *
+ * ring data structures for buffering messages between hypervisor and
+ * guestos's.  As it stands this is only used for network buffer exchange.
+ *
+ * This file also contains structures and interfaces for the per-domain
+ * routing/filtering tables in the hypervisor.
+ *
+ */
+
+#ifndef __RING_H__
+#define __RING_H__
+
+#include <linux/types.h>
+
+typedef struct tx_entry_st {
+       unsigned long addr; /* virtual address */
+       unsigned long size; /* in bytes */
+} tx_entry_t;
+
+typedef struct rx_entry_st {
+       unsigned long addr; /* virtual address */
+       unsigned long size; /* in bytes */
+} rx_entry_t;
+
+#define TX_RING_SIZE 1024
+#define RX_RING_SIZE 1024
+typedef struct net_ring_st {
+    /*
+     * Guest OS places packets into ring at tx_prod.
+     * Hypervisor removes at tx_cons.
+     * Ring is empty when tx_prod == tx_cons.
+     * Guest OS receives a DOMAIN_EVENT_NET_TX when tx_cons passes tx_event.
+     * Hypervisor may be prodded whenever tx_prod is updated, but this is
+     * only necessary when tx_cons == old_tx_prod (ie. transmitter stalled).
+     */
+    tx_entry_t *tx_ring;
+    unsigned int tx_prod, tx_cons, tx_event;
+    unsigned int tx_ring_size;
+    /*
+     * Guest OS places empty buffers into ring at rx_prod.
+     * Hypervisor fills buffers as rx_cons.
+     * Ring is empty when rx_prod == rx_cons.
+     * Guest OS receives a DOMAIN_EVENT_NET_RX when rx_cons passes rx_event.
+     * Hypervisor may be prodded whenever rx_prod is updated, but this is
+     * only necessary when rx_cons == old_rx_prod (ie. receiver stalled).
+     */
+    rx_entry_t *rx_ring;
+    unsigned int rx_prod, rx_cons, rx_event;
+    unsigned int rx_ring_size;
+} net_ring_t;
+
+/* net_vif_st is the larger struct that describes a virtual network interface
+ * it contains a pointer to the net_ring_t structure that needs to be on a 
+ * shared page between the hypervisor and guest.  The vif struct is private 
+ * to the hypervisor and is used primarily as a container to allow routing 
+ * and interface administration.  This define should eventually be moved to 
+ * a non-shared interface file, as it is of no relevance to the guest.
+ */
+
+typedef struct net_vif_st {
+    net_ring_t  *net_ring;
+    int          id;
+    // rules table goes here in next revision.
+} net_vif_t;
+
+/* VIF-related defines. */
+#define MAX_GUEST_VIFS    2 // each VIF is a small overhead in task_struct
+#define MAX_SYSTEM_VIFS 256 // trying to avoid dynamic allocation 
+
+/* vif globals */
+extern int sys_vif_count;
+
+/* This is here for consideration:  Having a global lookup for vifs
+ * may make the guest /proc stuff more straight forward, and could 
+ * be used in the routing code.  I don't know if it warrants the 
+ * overhead yet.
+ */
+
+/* net_vif_t sys_vif_list[MAX_SYSTEM_VIFS]; */
+
+/* Specify base of per-domain array. Get returned free slot in the array. */
+net_ring_t *create_net_vif(int domain);
+
+/* Packet routing/filtering code follows:
+ */
+
+#define NETWORK_ACTION_DROP 0
+#define NETWORK_ACTION_PASS 1
+
+typedef struct net_rule_st 
+{
+    u32  src_addr;
+    u32  dst_addr;
+    u16  src_port;
+    u16  dst_port;
+    u32  src_addr_mask;
+    u32  dst_addr_mask;
+    u16  src_port_mask;
+    u16  dst_port_mask;
+
+    int  src_interface;
+    int  dst_interface;
+    int  action;
+} net_rule_t;
+
+/* Network trap operations and associated structure. 
+ * This presently just handles rule insertion and deletion, but will
+ * evenually have code to add and remove interfaces.
+ */
+
+#define NETWORK_OP_ADDRULE      0
+#define NETWORK_OP_DELETERULE   1
+
+typedef struct network_op_st 
+{
+    unsigned long cmd;
+    union
+    {
+        net_rule_t net_rule;
+    }
+    u;
+} network_op_t;
+    
+/* Drop a new rule down to the network tables. */
+int add_net_rule(net_rule_t *rule);
+
+#endif
diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/hypervisor.h b/xenolinux-2.4.16-sparse/include/asm-xeno/hypervisor.h
new file mode 100644 (file)
index 0000000..62f5b6d
--- /dev/null
@@ -0,0 +1,40 @@
+/******************************************************************************
+ * hypervisor.h
+ * 
+ * Linux-specific hypervisor handling.
+ * 
+ * Copyright (c) 2002, K A Fraser
+ */
+
+#ifndef __HYPERVISOR_H__
+#define __HYPERVISOR_H__
+
+#include <asm/hypervisor-ifs/hypervisor-if.h>
+#include <asm/ptrace.h>
+
+/* arch/xeno/kernel/setup.c */
+union start_info_union
+{
+    start_info_t start_info;
+    char padding[512];
+};
+extern union start_info_union start_info_union;
+#define start_info (start_info_union.start_info)
+
+/* arch/xeno/kernel/hypervisor.c */
+void do_hypervisor_callback(struct pt_regs *regs);
+
+/* arch/xeno/mm/hypervisor.c */
+/*
+ * NB. ptr values should be fake-physical. 'vals' should be alread
+ * fully adjusted (ie. for start_info.phys_base).
+ */
+void queue_l1_entry_update(unsigned long ptr, unsigned long val);
+void queue_l2_entry_update(unsigned long ptr, unsigned long val);
+void queue_baseptr_create(unsigned long ptr);
+void queue_baseptr_remove(unsigned long ptr);
+void queue_tlb_flush(void);
+void queue_tlb_flush_one(unsigned long ptr);
+void flush_page_update_queue(void);
+
+#endif /* __HYPERVISOR_H__ */
diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/i387.h b/xenolinux-2.4.16-sparse/include/asm-xeno/i387.h
new file mode 100644 (file)
index 0000000..1cf8dc2
--- /dev/null
@@ -0,0 +1,88 @@
+/*
+ * include/asm-i386/i387.h
+ *
+ * Copyright (C) 1994 Linus Torvalds
+ *
+ * Pentium III FXSR, SSE support
+ * General FPU state handling cleanups
+ *     Gareth Hughes <gareth@valinux.com>, May 2000
+ */
+
+#ifndef __ASM_I386_I387_H
+#define __ASM_I386_I387_H
+
+#include <linux/sched.h>
+#include <asm/processor.h>
+#include <asm/sigcontext.h>
+#include <asm/user.h>
+
+extern void init_fpu(void);
+/*
+ * FPU lazy state save handling...
+ */
+extern void save_init_fpu( struct task_struct *tsk );
+extern void restore_fpu( struct task_struct *tsk );
+
+extern void kernel_fpu_begin(void);
+#define kernel_fpu_end() stts()
+
+
+#define unlazy_fpu( tsk ) do { \
+       if ( tsk->flags & PF_USEDFPU ) \
+               save_init_fpu( tsk ); \
+} while (0)
+
+#define clear_fpu( tsk ) do { \
+       if ( tsk->flags & PF_USEDFPU ) { \
+               asm volatile("fwait"); \
+               tsk->flags &= ~PF_USEDFPU; \
+               stts(); \
+       } \
+} while (0)
+
+/*
+ * FPU state interaction...
+ */
+extern unsigned short get_fpu_cwd( struct task_struct *tsk );
+extern unsigned short get_fpu_swd( struct task_struct *tsk );
+extern unsigned short get_fpu_twd( struct task_struct *tsk );
+extern unsigned short get_fpu_mxcsr( struct task_struct *tsk );
+
+extern void set_fpu_cwd( struct task_struct *tsk, unsigned short cwd );
+extern void set_fpu_swd( struct task_struct *tsk, unsigned short swd );
+extern void set_fpu_twd( struct task_struct *tsk, unsigned short twd );
+extern void set_fpu_mxcsr( struct task_struct *tsk, unsigned short mxcsr );
+
+#define load_mxcsr( val ) do { \
+       unsigned long __mxcsr = ((unsigned long)(val) & 0xffbf); \
+       asm volatile( "ldmxcsr %0" : : "m" (__mxcsr) ); \
+} while (0)
+
+/*
+ * Signal frame handlers...
+ */
+extern int save_i387( struct _fpstate *buf );
+extern int restore_i387( struct _fpstate *buf );
+
+/*
+ * ptrace request handers...
+ */
+extern int get_fpregs( struct user_i387_struct *buf,
+                      struct task_struct *tsk );
+extern int set_fpregs( struct task_struct *tsk,
+                      struct user_i387_struct *buf );
+
+extern int get_fpxregs( struct user_fxsr_struct *buf,
+                       struct task_struct *tsk );
+extern int set_fpxregs( struct task_struct *tsk,
+                       struct user_fxsr_struct *buf );
+
+/*
+ * FPU state for core dumps...
+ */
+extern int dump_fpu( struct pt_regs *regs,
+                    struct user_i387_struct *fpu );
+extern int dump_extended_fpu( struct pt_regs *regs,
+                             struct user_fxsr_struct *fpu );
+
+#endif /* __ASM_I386_I387_H */
diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/ide.h b/xenolinux-2.4.16-sparse/include/asm-xeno/ide.h
new file mode 100644 (file)
index 0000000..6ac7876
--- /dev/null
@@ -0,0 +1,116 @@
+/*
+ *  linux/include/asm-i386/ide.h
+ *
+ *  Copyright (C) 1994-1996  Linus Torvalds & authors
+ */
+
+/*
+ *  This file contains the i386 architecture specific IDE code.
+ */
+
+#ifndef __ASMi386_IDE_H
+#define __ASMi386_IDE_H
+
+#ifdef __KERNEL__
+
+#include <linux/config.h>
+
+#ifndef MAX_HWIFS
+# ifdef CONFIG_BLK_DEV_IDEPCI
+#define MAX_HWIFS      10
+# else
+#define MAX_HWIFS      6
+# endif
+#endif
+
+#define ide__sti()     __sti()
+
+static __inline__ int ide_default_irq(ide_ioreg_t base)
+{
+       switch (base) {
+               case 0x1f0: return 14;
+               case 0x170: return 15;
+               case 0x1e8: return 11;
+               case 0x168: return 10;
+               case 0x1e0: return 8;
+               case 0x160: return 12;
+               default:
+                       return 0;
+       }
+}
+
+static __inline__ ide_ioreg_t ide_default_io_base(int index)
+{
+       switch (index) {
+               case 0: return 0x1f0;
+               case 1: return 0x170;
+               case 2: return 0x1e8;
+               case 3: return 0x168;
+               case 4: return 0x1e0;
+               case 5: return 0x160;
+               default:
+                       return 0;
+       }
+}
+
+static __inline__ void ide_init_hwif_ports(hw_regs_t *hw, ide_ioreg_t data_port, ide_ioreg_t ctrl_port, int *irq)
+{
+       ide_ioreg_t reg = data_port;
+       int i;
+
+       for (i = IDE_DATA_OFFSET; i <= IDE_STATUS_OFFSET; i++) {
+               hw->io_ports[i] = reg;
+               reg += 1;
+       }
+       if (ctrl_port) {
+               hw->io_ports[IDE_CONTROL_OFFSET] = ctrl_port;
+       } else {
+               hw->io_ports[IDE_CONTROL_OFFSET] = hw->io_ports[IDE_DATA_OFFSET] + 0x206;
+       }
+       if (irq != NULL)
+               *irq = 0;
+       hw->io_ports[IDE_IRQ_OFFSET] = 0;
+}
+
+static __inline__ void ide_init_default_hwifs(void)
+{
+#ifndef CONFIG_BLK_DEV_IDEPCI
+       hw_regs_t hw;
+       int index;
+
+       for(index = 0; index < MAX_HWIFS; index++) {
+               ide_init_hwif_ports(&hw, ide_default_io_base(index), 0, NULL);
+               hw.irq = ide_default_irq(ide_default_io_base(index));
+               ide_register_hw(&hw, NULL);
+       }
+#endif /* CONFIG_BLK_DEV_IDEPCI */
+}
+
+typedef union {
+       unsigned all                    : 8;    /* all of the bits together */
+       struct {
+               unsigned head           : 4;    /* always zeros here */
+               unsigned unit           : 1;    /* drive select number, 0 or 1 */
+               unsigned bit5           : 1;    /* always 1 */
+               unsigned lba            : 1;    /* using LBA instead of CHS */
+               unsigned bit7           : 1;    /* always 1 */
+       } b;
+       } select_t;
+
+#define ide_request_irq(irq,hand,flg,dev,id)   request_irq((irq),(hand),(flg),(dev),(id))
+#define ide_free_irq(irq,dev_id)               free_irq((irq), (dev_id))
+#define ide_check_region(from,extent)          check_region((from), (extent))
+#define ide_request_region(from,extent,name)   request_region((from), (extent), (name))
+#define ide_release_region(from,extent)                release_region((from), (extent))
+
+/*
+ * The following are not needed for the non-m68k ports
+ */
+#define ide_ack_intr(hwif)             (1)
+#define ide_fix_driveid(id)            do {} while (0)
+#define ide_release_lock(lock)         do {} while (0)
+#define ide_get_lock(lock, hdlr, data) do {} while (0)
+
+#endif /* __KERNEL__ */
+
+#endif /* __ASMi386_IDE_H */
diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/init.h b/xenolinux-2.4.16-sparse/include/asm-xeno/init.h
new file mode 100644 (file)
index 0000000..17d2155
--- /dev/null
@@ -0,0 +1 @@
+#error "<asm/init.h> should never be used - use <linux/init.h> instead"
diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/io.h b/xenolinux-2.4.16-sparse/include/asm-xeno/io.h
new file mode 100644 (file)
index 0000000..250b64f
--- /dev/null
@@ -0,0 +1,311 @@
+#ifndef _ASM_IO_H
+#define _ASM_IO_H
+
+#include <linux/config.h>
+
+/*
+ * This file contains the definitions for the x86 IO instructions
+ * inb/inw/inl/outb/outw/outl and the "string versions" of the same
+ * (insb/insw/insl/outsb/outsw/outsl). You can also use "pausing"
+ * versions of the single-IO instructions (inb_p/inw_p/..).
+ *
+ * This file is not meant to be obfuscating: it's just complicated
+ * to (a) handle it all in a way that makes gcc able to optimize it
+ * as well as possible and (b) trying to avoid writing the same thing
+ * over and over again with slight variations and possibly making a
+ * mistake somewhere.
+ */
+
+/*
+ * Thanks to James van Artsdalen for a better timing-fix than
+ * the two short jumps: using outb's to a nonexistent port seems
+ * to guarantee better timings even on fast machines.
+ *
+ * On the other hand, I'd like to be sure of a non-existent port:
+ * I feel a bit unsafe about using 0x80 (should be safe, though)
+ *
+ *             Linus
+ */
+
+ /*
+  *  Bit simplified and optimized by Jan Hubicka
+  *  Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999.
+  *
+  *  isa_memset_io, isa_memcpy_fromio, isa_memcpy_toio added,
+  *  isa_read[wl] and isa_write[wl] fixed
+  *  - Arnaldo Carvalho de Melo <acme@conectiva.com.br>
+  */
+
+#define IO_SPACE_LIMIT 0xffff
+
+#define XQUAD_PORTIO_BASE 0xfe400000
+#define XQUAD_PORTIO_LEN  0x40000   /* 256k per quad. Only remapping 1st */
+
+#ifdef __KERNEL__
+
+#include <linux/vmalloc.h>
+
+/*
+ * Temporary debugging check to catch old code using
+ * unmapped ISA addresses. Will be removed in 2.4.
+ */
+#if CONFIG_DEBUG_IOVIRT
+  extern void *__io_virt_debug(unsigned long x, const char *file, int line);
+  extern unsigned long __io_phys_debug(unsigned long x, const char *file, int line);
+  #define __io_virt(x) __io_virt_debug((unsigned long)(x), __FILE__, __LINE__)
+//#define __io_phys(x) __io_phys_debug((unsigned long)(x), __FILE__, __LINE__)
+#else
+  #define __io_virt(x) ((void *)(x))
+//#define __io_phys(x) __pa(x)
+#endif
+
+/*
+ * Change virtual addresses to physical addresses and vv.
+ * These are pretty trivial
+ */
+static inline unsigned long virt_to_phys(volatile void * address)
+{
+       return __pa(address);
+}
+
+static inline void * phys_to_virt(unsigned long address)
+{
+       return __va(address);
+}
+
+/*
+ * Change "struct page" to physical address.
+ */
+#define page_to_phys(page)     ((page - mem_map) << PAGE_SHIFT)
+
+/*
+ * IO bus memory addresses are also 1:1 with the physical address
+ */
+#define virt_to_bus virt_to_phys
+#define bus_to_virt phys_to_virt
+#define page_to_bus page_to_phys
+
+/*
+ * readX/writeX() are used to access memory mapped devices. On some
+ * architectures the memory mapped IO stuff needs to be accessed
+ * differently. On the x86 architecture, we just read/write the
+ * memory location directly.
+ */
+
+#define readb(addr) (*(volatile unsigned char *) __io_virt(addr))
+#define readw(addr) (*(volatile unsigned short *) __io_virt(addr))
+#define readl(addr) (*(volatile unsigned int *) __io_virt(addr))
+#define __raw_readb readb
+#define __raw_readw readw
+#define __raw_readl readl
+
+#define writeb(b,addr) (*(volatile unsigned char *) __io_virt(addr) = (b))
+#define writew(b,addr) (*(volatile unsigned short *) __io_virt(addr) = (b))
+#define writel(b,addr) (*(volatile unsigned int *) __io_virt(addr) = (b))
+#define __raw_writeb writeb
+#define __raw_writew writew
+#define __raw_writel writel
+
+#define memset_io(a,b,c)       memset(__io_virt(a),(b),(c))
+#define memcpy_fromio(a,b,c)   memcpy((a),__io_virt(b),(c))
+#define memcpy_toio(a,b,c)     memcpy(__io_virt(a),(b),(c))
+
+/*
+ * ISA space is 'always mapped' on a typical x86 system, no need to
+ * explicitly ioremap() it. The fact that the ISA IO space is mapped
+ * to PAGE_OFFSET is pure coincidence - it does not mean ISA values
+ * are physical addresses. The following constant pointer can be
+ * used as the IO-area pointer (it can be iounmapped as well, so the
+ * analogy with PCI is quite large):
+ */
+#define __ISA_IO_base ((char *)(PAGE_OFFSET))
+
+#define isa_readb(a) readb(__ISA_IO_base + (a))
+#define isa_readw(a) readw(__ISA_IO_base + (a))
+#define isa_readl(a) readl(__ISA_IO_base + (a))
+#define isa_writeb(b,a) writeb(b,__ISA_IO_base + (a))
+#define isa_writew(w,a) writew(w,__ISA_IO_base + (a))
+#define isa_writel(l,a) writel(l,__ISA_IO_base + (a))
+#define isa_memset_io(a,b,c)           memset_io(__ISA_IO_base + (a),(b),(c))
+#define isa_memcpy_fromio(a,b,c)       memcpy_fromio((a),__ISA_IO_base + (b),(c))
+#define isa_memcpy_toio(a,b,c)         memcpy_toio(__ISA_IO_base + (a),(b),(c))
+
+
+/*
+ * Again, i386 does not require mem IO specific function.
+ */
+
+#define eth_io_copy_and_sum(a,b,c,d)           eth_copy_and_sum((a),__io_virt(b),(c),(d))
+#define isa_eth_io_copy_and_sum(a,b,c,d)       eth_copy_and_sum((a),__io_virt(__ISA_IO_base + (b)),(c),(d))
+
+static inline int check_signature(unsigned long io_addr,
+       const unsigned char *signature, int length)
+{
+       int retval = 0;
+       do {
+               if (readb(io_addr) != *signature)
+                       goto out;
+               io_addr++;
+               signature++;
+               length--;
+       } while (length);
+       retval = 1;
+out:
+       return retval;
+}
+
+static inline int isa_check_signature(unsigned long io_addr,
+       const unsigned char *signature, int length)
+{
+       int retval = 0;
+       do {
+               if (isa_readb(io_addr) != *signature)
+                       goto out;
+               io_addr++;
+               signature++;
+               length--;
+       } while (length);
+       retval = 1;
+out:
+       return retval;
+}
+
+/*
+ *     Cache management
+ *
+ *     This needed for two cases
+ *     1. Out of order aware processors
+ *     2. Accidentally out of order processors (PPro errata #51)
+ */
+#if defined(CONFIG_X86_OOSTORE) || defined(CONFIG_X86_PPRO_FENCE)
+
+static inline void flush_write_buffers(void)
+{
+       __asm__ __volatile__ ("lock; addl $0,0(%%esp)": : :"memory");
+}
+
+#define dma_cache_inv(_start,_size)            flush_write_buffers()
+#define dma_cache_wback(_start,_size)          flush_write_buffers()
+#define dma_cache_wback_inv(_start,_size)      flush_write_buffers()
+
+#else
+
+/* Nothing to do */
+
+#define dma_cache_inv(_start,_size)            do { } while (0)
+#define dma_cache_wback(_start,_size)          do { } while (0)
+#define dma_cache_wback_inv(_start,_size)      do { } while (0)
+#define flush_write_buffers()
+
+#endif
+
+#endif /* __KERNEL__ */
+
+#ifdef SLOW_IO_BY_JUMPING
+#define __SLOW_DOWN_IO "\njmp 1f\n1:\tjmp 1f\n1:"
+#else
+#define __SLOW_DOWN_IO "\noutb %%al,$0x80"
+#endif
+
+#ifdef REALLY_SLOW_IO
+#define __FULL_SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO
+#else
+#define __FULL_SLOW_DOWN_IO __SLOW_DOWN_IO
+#endif
+
+#ifdef CONFIG_MULTIQUAD
+extern void *xquad_portio;    /* Where the IO area was mapped */
+#endif /* CONFIG_MULTIQUAD */
+
+/*
+ * Talk about misusing macros..
+ */
+#define __OUT1(s,x) \
+static inline void out##s(unsigned x value, unsigned short port) {
+
+#define __OUT2(s,s1,s2) \
+__asm__ __volatile__ ("out" #s " %" s1 "0,%" s2 "1"
+
+#ifdef CONFIG_MULTIQUAD
+/* Make the default portio routines operate on quad 0 for now */
+#define __OUT(s,s1,x) \
+__OUT1(s##_local,x) __OUT2(s,s1,"w") : : "a" (value), "Nd" (port)); } \
+__OUT1(s##_p_local,x) __OUT2(s,s1,"w") __FULL_SLOW_DOWN_IO : : "a" (value), "Nd" (port));} \
+__OUTQ0(s,s,x) \
+__OUTQ0(s,s##_p,x) 
+#else
+#define __OUT(s,s1,x) \
+__OUT1(s,x) __OUT2(s,s1,"w") : : "a" (value), "Nd" (port)); } \
+__OUT1(s##_p,x) __OUT2(s,s1,"w") __FULL_SLOW_DOWN_IO : : "a" (value), "Nd" (port));} 
+#endif /* CONFIG_MULTIQUAD */
+
+#ifdef CONFIG_MULTIQUAD
+#define __OUTQ0(s,ss,x)    /* Do the equivalent of the portio op on quad 0 */ \
+static inline void out##ss(unsigned x value, unsigned short port) { \
+       if (xquad_portio) \
+               write##s(value, (unsigned long) xquad_portio + port); \
+       else               /* We're still in early boot, running on quad 0 */ \
+               out##ss##_local(value, port); \
+} 
+
+#define __INQ0(s,ss)       /* Do the equivalent of the portio op on quad 0 */ \
+static inline RETURN_TYPE in##ss(unsigned short port) { \
+       if (xquad_portio) \
+               return read##s((unsigned long) xquad_portio + port); \
+       else               /* We're still in early boot, running on quad 0 */ \
+               return in##ss##_local(port); \
+}
+#endif /* CONFIG_MULTIQUAD */
+
+#define __IN1(s) \
+static inline RETURN_TYPE in##s(unsigned short port) { RETURN_TYPE _v;
+
+#define __IN2(s,s1,s2) \
+__asm__ __volatile__ ("in" #s " %" s2 "1,%" s1 "0"
+
+#ifdef CONFIG_MULTIQUAD
+#define __IN(s,s1,i...) \
+__IN1(s##_local) __IN2(s,s1,"w") : "=a" (_v) : "Nd" (port) ,##i ); return _v; } \
+__IN1(s##_p_local) __IN2(s,s1,"w") __FULL_SLOW_DOWN_IO : "=a" (_v) : "Nd" (port) ,##i ); return _v; } \
+__INQ0(s,s) \
+__INQ0(s,s##_p) 
+#else
+#define __IN(s,s1,i...) \
+__IN1(s) __IN2(s,s1,"w") : "=a" (_v) : "Nd" (port) ,##i ); return _v; } \
+__IN1(s##_p) __IN2(s,s1,"w") __FULL_SLOW_DOWN_IO : "=a" (_v) : "Nd" (port) ,##i ); return _v; } 
+#endif /* CONFIG_MULTIQUAD */
+
+#define __INS(s) \
+static inline void ins##s(unsigned short port, void * addr, unsigned long count) \
+{ __asm__ __volatile__ ("rep ; ins" #s \
+: "=D" (addr), "=c" (count) : "d" (port),"0" (addr),"1" (count)); }
+
+#define __OUTS(s) \
+static inline void outs##s(unsigned short port, const void * addr, unsigned long count) \
+{ __asm__ __volatile__ ("rep ; outs" #s \
+: "=S" (addr), "=c" (count) : "d" (port),"0" (addr),"1" (count)); }
+
+#define RETURN_TYPE unsigned char
+__IN(b,"")
+#undef RETURN_TYPE
+#define RETURN_TYPE unsigned short
+__IN(w,"")
+#undef RETURN_TYPE
+#define RETURN_TYPE unsigned int
+__IN(l,"")
+#undef RETURN_TYPE
+
+__OUT(b,"b",char)
+__OUT(w,"w",short)
+__OUT(l,,int)
+
+__INS(b)
+__INS(w)
+__INS(l)
+
+__OUTS(b)
+__OUTS(w)
+__OUTS(l)
+
+#endif
diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/io_apic.h b/xenolinux-2.4.16-sparse/include/asm-xeno/io_apic.h
new file mode 100644 (file)
index 0000000..b132819
--- /dev/null
@@ -0,0 +1,147 @@
+#ifndef __ASM_IO_APIC_H
+#define __ASM_IO_APIC_H
+
+#include <linux/config.h>
+#include <asm/types.h>
+
+/*
+ * Intel IO-APIC support for SMP and UP systems.
+ *
+ * Copyright (C) 1997, 1998, 1999, 2000 Ingo Molnar
+ */
+
+#ifdef CONFIG_X86_IO_APIC
+
+#define APIC_MISMATCH_DEBUG
+
+#define IO_APIC_BASE(idx) \
+               ((volatile int *)__fix_to_virt(FIX_IO_APIC_BASE_0 + idx))
+
+/*
+ * The structure of the IO-APIC:
+ */
+struct IO_APIC_reg_00 {
+       __u32   __reserved_2    : 24,
+               ID              :  4,
+               __reserved_1    :  4;
+} __attribute__ ((packed));
+
+struct IO_APIC_reg_01 {
+       __u32   version         :  8,
+               __reserved_2    :  7,
+               PRQ             :  1,
+               entries         :  8,
+               __reserved_1    :  8;
+} __attribute__ ((packed));
+
+struct IO_APIC_reg_02 {
+       __u32   __reserved_2    : 24,
+               arbitration     :  4,
+               __reserved_1    :  4;
+} __attribute__ ((packed));
+
+/*
+ * # of IO-APICs and # of IRQ routing registers
+ */
+extern int nr_ioapics;
+extern int nr_ioapic_registers[MAX_IO_APICS];
+
+enum ioapic_irq_destination_types {
+       dest_Fixed = 0,
+       dest_LowestPrio = 1,
+       dest_SMI = 2,
+       dest__reserved_1 = 3,
+       dest_NMI = 4,
+       dest_INIT = 5,
+       dest__reserved_2 = 6,
+       dest_ExtINT = 7
+};
+
+struct IO_APIC_route_entry {
+       __u32   vector          :  8,
+               delivery_mode   :  3,   /* 000: FIXED
+                                        * 001: lowest prio
+                                        * 111: ExtINT
+                                        */
+               dest_mode       :  1,   /* 0: physical, 1: logical */
+               delivery_status :  1,
+               polarity        :  1,
+               irr             :  1,
+               trigger         :  1,   /* 0: edge, 1: level */
+               mask            :  1,   /* 0: enabled, 1: disabled */
+               __reserved_2    : 15;
+
+       union {         struct { __u32
+                                       __reserved_1    : 24,
+                                       physical_dest   :  4,
+                                       __reserved_2    :  4;
+                       } physical;
+
+                       struct { __u32
+                                       __reserved_1    : 24,
+                                       logical_dest    :  8;
+                       } logical;
+       } dest;
+
+} __attribute__ ((packed));
+
+/*
+ * MP-BIOS irq configuration table structures:
+ */
+
+/* I/O APIC entries */
+extern struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS];
+
+/* # of MP IRQ source entries */
+extern int mp_irq_entries;
+
+/* MP IRQ source entries */
+extern struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
+
+/* non-0 if default (table-less) MP configuration */
+extern int mpc_default_type;
+
+static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg)
+{
+       *IO_APIC_BASE(apic) = reg;
+       return *(IO_APIC_BASE(apic)+4);
+}
+
+static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned int value)
+{
+       *IO_APIC_BASE(apic) = reg;
+       *(IO_APIC_BASE(apic)+4) = value;
+}
+
+/*
+ * Re-write a value: to be used for read-modify-write
+ * cycles where the read already set up the index register.
+ */
+static inline void io_apic_modify(unsigned int apic, unsigned int value)
+{
+       *(IO_APIC_BASE(apic)+4) = value;
+}
+
+/*
+ * Synchronize the IO-APIC and the CPU by doing
+ * a dummy read from the IO-APIC
+ */
+static inline void io_apic_sync(unsigned int apic)
+{
+       (void) *(IO_APIC_BASE(apic)+4);
+}
+
+/* 1 if "noapic" boot option passed */
+extern int skip_ioapic_setup;
+
+/*
+ * If we use the IO-APIC for IRQ routing, disable automatic
+ * assignment of PCI IRQ's.
+ */
+#define io_apic_assign_pci_irqs (mp_irq_entries && !skip_ioapic_setup)
+
+#else  /* !CONFIG_X86_IO_APIC */
+#define io_apic_assign_pci_irqs 0
+#endif
+
+#endif
diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/ioctl.h b/xenolinux-2.4.16-sparse/include/asm-xeno/ioctl.h
new file mode 100644 (file)
index 0000000..c75f20a
--- /dev/null
@@ -0,0 +1,75 @@
+/* $Id: ioctl.h,v 1.5 1993/07/19 21:53:50 root Exp root $
+ *
+ * linux/ioctl.h for Linux by H.H. Bergman.
+ */
+
+#ifndef _ASMI386_IOCTL_H
+#define _ASMI386_IOCTL_H
+
+/* ioctl command encoding: 32 bits total, command in lower 16 bits,
+ * size of the parameter structure in the lower 14 bits of the
+ * upper 16 bits.
+ * Encoding the size of the parameter structure in the ioctl request
+ * is useful for catching programs compiled with old versions
+ * and to avoid overwriting user space outside the user buffer area.
+ * The highest 2 bits are reserved for indicating the ``access mode''.
+ * NOTE: This limits the max parameter size to 16kB -1 !
+ */
+
+/*
+ * The following is for compatibility across the various Linux
+ * platforms.  The i386 ioctl numbering scheme doesn't really enforce
+ * a type field.  De facto, however, the top 8 bits of the lower 16
+ * bits are indeed used as a type field, so we might just as well make
+ * this explicit here.  Please be sure to use the decoding macros
+ * below from now on.
+ */
+#define _IOC_NRBITS    8
+#define _IOC_TYPEBITS  8
+#define _IOC_SIZEBITS  14
+#define _IOC_DIRBITS   2
+
+#define _IOC_NRMASK    ((1 << _IOC_NRBITS)-1)
+#define _IOC_TYPEMASK  ((1 << _IOC_TYPEBITS)-1)
+#define _IOC_SIZEMASK  ((1 << _IOC_SIZEBITS)-1)
+#define _IOC_DIRMASK   ((1 << _IOC_DIRBITS)-1)
+
+#define _IOC_NRSHIFT   0
+#define _IOC_TYPESHIFT (_IOC_NRSHIFT+_IOC_NRBITS)
+#define _IOC_SIZESHIFT (_IOC_TYPESHIFT+_IOC_TYPEBITS)
+#define _IOC_DIRSHIFT  (_IOC_SIZESHIFT+_IOC_SIZEBITS)
+
+/*
+ * Direction bits.
+ */
+#define _IOC_NONE      0U
+#define _IOC_WRITE     1U
+#define _IOC_READ      2U
+
+#define _IOC(dir,type,nr,size) \
+       (((dir)  << _IOC_DIRSHIFT) | \
+        ((type) << _IOC_TYPESHIFT) | \
+        ((nr)   << _IOC_NRSHIFT) | \
+        ((size) << _IOC_SIZESHIFT))
+
+/* used to create numbers */
+#define _IO(type,nr)           _IOC(_IOC_NONE,(type),(nr),0)
+#define _IOR(type,nr,size)     _IOC(_IOC_READ,(type),(nr),sizeof(size))
+#define _IOW(type,nr,size)     _IOC(_IOC_WRITE,(type),(nr),sizeof(size))
+#define _IOWR(type,nr,size)    _IOC(_IOC_READ|_IOC_WRITE,(type),(nr),sizeof(size))
+
+/* used to decode ioctl numbers.. */
+#define _IOC_DIR(nr)           (((nr) >> _IOC_DIRSHIFT) & _IOC_DIRMASK)
+#define _IOC_TYPE(nr)          (((nr) >> _IOC_TYPESHIFT) & _IOC_TYPEMASK)
+#define _IOC_NR(nr)            (((nr) >> _IOC_NRSHIFT) & _IOC_NRMASK)
+#define _IOC_SIZE(nr)          (((nr) >> _IOC_SIZESHIFT) & _IOC_SIZEMASK)
+
+/* ...and for the drivers/sound files... */
+
+#define IOC_IN         (_IOC_WRITE << _IOC_DIRSHIFT)
+#define IOC_OUT                (_IOC_READ << _IOC_DIRSHIFT)
+#define IOC_INOUT      ((_IOC_WRITE|_IOC_READ) << _IOC_DIRSHIFT)
+#define IOCSIZE_MASK   (_IOC_SIZEMASK << _IOC_SIZESHIFT)
+#define IOCSIZE_SHIFT  (_IOC_SIZESHIFT)
+
+#endif /* _ASMI386_IOCTL_H */
diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/ioctls.h b/xenolinux-2.4.16-sparse/include/asm-xeno/ioctls.h
new file mode 100644 (file)
index 0000000..9fc340a
--- /dev/null
@@ -0,0 +1,82 @@
+#ifndef __ARCH_I386_IOCTLS_H__
+#define __ARCH_I386_IOCTLS_H__
+
+#include <asm/ioctl.h>
+
+/* 0x54 is just a magic number to make these relatively unique ('T') */
+
+#define TCGETS         0x5401
+#define TCSETS         0x5402
+#define TCSETSW                0x5403
+#define TCSETSF                0x5404
+#define TCGETA         0x5405
+#define TCSETA         0x5406
+#define TCSETAW                0x5407
+#define TCSETAF                0x5408
+#define TCSBRK         0x5409
+#define TCXONC         0x540A
+#define TCFLSH         0x540B
+#define TIOCEXCL       0x540C
+#define TIOCNXCL       0x540D
+#define TIOCSCTTY      0x540E
+#define TIOCGPGRP      0x540F
+#define TIOCSPGRP      0x5410
+#define TIOCOUTQ       0x5411
+#define TIOCSTI                0x5412
+#define TIOCGWINSZ     0x5413
+#define TIOCSWINSZ     0x5414
+#define TIOCMGET       0x5415
+#define TIOCMBIS       0x5416
+#define TIOCMBIC       0x5417
+#define TIOCMSET       0x5418
+#define TIOCGSOFTCAR   0x5419
+#define TIOCSSOFTCAR   0x541A
+#define FIONREAD       0x541B
+#define TIOCINQ                FIONREAD
+#define TIOCLINUX      0x541C
+#define TIOCCONS       0x541D
+#define TIOCGSERIAL    0x541E
+#define TIOCSSERIAL    0x541F
+#define TIOCPKT                0x5420
+#define FIONBIO                0x5421
+#define TIOCNOTTY      0x5422
+#define TIOCSETD       0x5423
+#define TIOCGETD       0x5424
+#define TCSBRKP                0x5425  /* Needed for POSIX tcsendbreak() */
+#define TIOCTTYGSTRUCT 0x5426  /* For debugging only */
+#define TIOCSBRK       0x5427  /* BSD compatibility */
+#define TIOCCBRK       0x5428  /* BSD compatibility */
+#define TIOCGSID       0x5429  /* Return the session ID of FD */
+#define TIOCGPTN       _IOR('T',0x30, unsigned int) /* Get Pty Number (of pty-mux device) */
+#define TIOCSPTLCK     _IOW('T',0x31, int)  /* Lock/unlock Pty */
+
+#define FIONCLEX       0x5450  /* these numbers need to be adjusted. */
+#define FIOCLEX                0x5451
+#define FIOASYNC       0x5452
+#define TIOCSERCONFIG  0x5453
+#define TIOCSERGWILD   0x5454
+#define TIOCSERSWILD   0x5455
+#define TIOCGLCKTRMIOS 0x5456
+#define TIOCSLCKTRMIOS 0x5457
+#define TIOCSERGSTRUCT 0x5458 /* For debugging only */
+#define TIOCSERGETLSR   0x5459 /* Get line status register */
+#define TIOCSERGETMULTI 0x545A /* Get multiport config  */
+#define TIOCSERSETMULTI 0x545B /* Set multiport config */
+
+#define TIOCMIWAIT     0x545C  /* wait for a change on serial input line(s) */
+#define TIOCGICOUNT    0x545D  /* read serial port inline interrupt counts */
+#define TIOCGHAYESESP   0x545E  /* Get Hayes ESP configuration */
+#define TIOCSHAYESESP   0x545F  /* Set Hayes ESP configuration */
+
+/* Used for packet mode */
+#define TIOCPKT_DATA            0
+#define TIOCPKT_FLUSHREAD       1
+#define TIOCPKT_FLUSHWRITE      2
+#define TIOCPKT_STOP            4
+#define TIOCPKT_START           8
+#define TIOCPKT_NOSTOP         16
+#define TIOCPKT_DOSTOP         32
+
+#define TIOCSER_TEMT    0x01   /* Transmitter physically empty */
+
+#endif
diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/ipc.h b/xenolinux-2.4.16-sparse/include/asm-xeno/ipc.h
new file mode 100644 (file)
index 0000000..36f4306
--- /dev/null
@@ -0,0 +1,31 @@
+#ifndef __i386_IPC_H__
+#define __i386_IPC_H__
+
+/* 
+ * These are used to wrap system calls on x86.
+ *
+ * See arch/i386/kernel/sys_i386.c for ugly details..
+ */
+struct ipc_kludge {
+       struct msgbuf *msgp;
+       long msgtyp;
+};
+
+#define SEMOP           1
+#define SEMGET          2
+#define SEMCTL          3
+#define MSGSND         11
+#define MSGRCV         12
+#define MSGGET         13
+#define MSGCTL         14
+#define SHMAT          21
+#define SHMDT          22
+#define SHMGET         23
+#define SHMCTL         24
+
+/* Used by the DIPC package, try and avoid reusing it */
+#define DIPC            25
+
+#define IPCCALL(version,op)    ((version)<<16 | (op))
+
+#endif
diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/ipcbuf.h b/xenolinux-2.4.16-sparse/include/asm-xeno/ipcbuf.h
new file mode 100644 (file)
index 0000000..0dcad4f
--- /dev/null
@@ -0,0 +1,29 @@
+#ifndef __i386_IPCBUF_H__
+#define __i386_IPCBUF_H__
+
+/*
+ * The ipc64_perm structure for i386 architecture.
+ * Note extra padding because this structure is passed back and forth
+ * between kernel and user space.
+ *
+ * Pad space is left for:
+ * - 32-bit mode_t and seq
+ * - 2 miscellaneous 32-bit values
+ */
+
+struct ipc64_perm
+{
+       __kernel_key_t          key;
+       __kernel_uid32_t        uid;
+       __kernel_gid32_t        gid;
+       __kernel_uid32_t        cuid;
+       __kernel_gid32_t        cgid;
+       __kernel_mode_t         mode;
+       unsigned short          __pad1;
+       unsigned short          seq;
+       unsigned short          __pad2;
+       unsigned long           __unused1;
+       unsigned long           __unused2;
+};
+
+#endif /* __i386_IPCBUF_H__ */
diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/irq.h b/xenolinux-2.4.16-sparse/include/asm-xeno/irq.h
new file mode 100644 (file)
index 0000000..f5f65b8
--- /dev/null
@@ -0,0 +1,30 @@
+#ifndef _ASM_IRQ_H
+#define _ASM_IRQ_H
+
+/*
+ *     linux/include/asm/irq.h
+ *
+ *     (C) 1992, 1993 Linus Torvalds, (C) 1997 Ingo Molnar
+ *
+ *     IRQ/IPI changes taken from work by Thomas Radke
+ *     <tomsoft@informatik.tu-chemnitz.de>
+ */
+
+#include <linux/config.h>
+#include <asm/hypervisor.h>
+#include <asm/ptrace.h>
+
+#define NET_TX_IRQ  _EVENT_NET_TX
+#define NET_RX_IRQ  _EVENT_NET_RX
+#define TIMER_IRQ   _EVENT_TIMER
+
+#define NR_IRQS (sizeof(HYPERVISOR_shared_info->events) * 8)
+
+#define irq_cannonicalize(_irq) (_irq)
+
+extern void disable_irq(unsigned int);
+extern void disable_irq_nosync(unsigned int);
+extern void enable_irq(unsigned int);
+extern unsigned int do_IRQ(int, struct pt_regs *);
+
+#endif /* _ASM_IRQ_H */
diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/kdb.h b/xenolinux-2.4.16-sparse/include/asm-xeno/kdb.h
new file mode 100644 (file)
index 0000000..315bde8
--- /dev/null
@@ -0,0 +1,62 @@
+/*
+ * Minimalist Kernel Debugger
+ *
+ * Copyright (C) 1999 Silicon Graphics, Inc.
+ * Copyright (C) Scott Lurndal (slurn@engr.sgi.com)
+ * Copyright (C) Scott Foehner (sfoehner@engr.sgi.com)
+ * Copyright (C) Srinivasa Thirumalachar (sprasad@engr.sgi.com)
+ *
+ * See the file LIA-COPYRIGHT for additional information.
+ *
+ * Written March 1999 by Scott Lurndal at Silicon Graphics, Inc.
+ *
+ * Modifications from:
+ *      Richard Bass                    1999/07/20
+ *              Many bug fixes and enhancements.
+ *      Scott Foehner
+ *              Port to ia64
+ *     Scott Lurndal                   1999/12/12
+ *             v1.0 restructuring.
+ */
+#if !defined(_ASM_KDB_H)
+#define _ASM_KDB_H
+
+       /*
+        * KDB_ENTER() is a macro which causes entry into the kernel
+        * debugger from any point in the kernel code stream.  If it 
+        * is intended to be used from interrupt level, it must  use
+        * a non-maskable entry method.
+        */
+#define KDB_ENTER()    asm("\tint $129\n")
+
+       /*
+        * Define the exception frame for this architeture
+        */
+struct pt_regs;
+typedef struct pt_regs *kdb_eframe_t;
+
+       /*
+        * Needed for exported symbols.
+        */
+typedef unsigned long kdb_machreg_t;
+
+#define kdb_machreg_fmt                "0x%lx"
+#define kdb_machreg_fmt0       "0x%08lx"
+#define kdb_bfd_vma_fmt                "0x%lx"
+#define kdb_bfd_vma_fmt0       "0x%08lx"
+#define kdb_elfw_addr_fmt      "0x%x"
+#define kdb_elfw_addr_fmt0     "0x%08x"
+
+       /*
+        * Per cpu arch specific kdb state.  Must be in range 0xff000000.
+        */
+#define KDB_STATE_A_IF         0x01000000      /* Saved IF flag */
+
+        /*
+         * Interface from kernel trap handling code to kernel debugger.
+         */
+extern int     kdba_callback_die(struct pt_regs *, int, long, void*);
+extern int     kdba_callback_bp(struct pt_regs *, int, long, void*);
+extern int     kdba_callback_debug(struct pt_regs *, int, long, void *);
+
+#endif /* ASM_KDB_H */
diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/kdbprivate.h b/xenolinux-2.4.16-sparse/include/asm-xeno/kdbprivate.h
new file mode 100644 (file)
index 0000000..7bbd86b
--- /dev/null
@@ -0,0 +1,178 @@
+/*
+ * Minimalist Kernel Debugger
+ *
+ * Copyright (C) 1999 Silicon Graphics, Inc.
+ * Copyright (C) Scott Lurndal (slurn@engr.sgi.com)
+ * Copyright (C) Scott Foehner (sfoehner@engr.sgi.com)
+ * Copyright (C) Srinivasa Thirumalachar (sprasad@engr.sgi.com)
+ *
+ * See the file LIA-COPYRIGHT for additional information.
+ *
+ * Written March 1999 by Scott Lurndal at Silicon Graphics, Inc.
+ *
+ * Modifications from:
+ *      Richard Bass                    1999/07/20
+ *              Many bug fixes and enhancements.
+ *      Scott Foehner
+ *              Port to ia64
+ *     Scott Lurndal                   1999/12/12
+ *             v1.0 restructuring.
+ *     Keith Owens                     2000/05/23
+ *             KDB v1.2
+ */
+#if !defined(_ASM_KDBPRIVATE_H)
+#define _ASM_KDBPRIVATE_H
+
+typedef unsigned char kdb_machinst_t;
+
+       /*
+        * KDB_MAXBPT describes the total number of breakpoints
+        * supported by this architecure.  
+        */
+#define KDB_MAXBPT     16
+       /*
+        * KDB_MAXHARDBPT describes the total number of hardware
+        * breakpoint registers that exist.
+        */
+#define KDB_MAXHARDBPT  4
+        /*
+         * Provide space for KDB_MAX_COMMANDS commands.
+         */
+#define KDB_MAX_COMMANDS        125
+
+       /*
+        * Platform specific environment entries
+        */
+#define KDB_PLATFORM_ENV       "IDMODE=x86", "BYTESPERWORD=4", "IDCOUNT=16"
+
+       /*
+        * Define the direction that the stack grows
+        */
+#define KDB_STACK_DIRECTION    (-1)    /* Stack grows down */
+
+       /*
+        * Support for ia32 debug registers 
+        */
+typedef struct _kdbhard_bp {
+       kdb_machreg_t   bph_reg;        /* Register this breakpoint uses */
+
+       unsigned int    bph_free:1;     /* Register available for use */
+       unsigned int    bph_data:1;     /* Data Access breakpoint */
+
+       unsigned int    bph_write:1;    /* Write Data breakpoint */
+       unsigned int    bph_mode:2;     /* 0=inst, 1=write, 2=io, 3=read */
+       unsigned int    bph_length:2;   /* 0=1, 1=2, 2=BAD, 3=4 (bytes) */
+} kdbhard_bp_t;
+
+extern kdbhard_bp_t    kdb_hardbreaks[/* KDB_MAXHARDBPT */];
+
+#define IA32_BREAKPOINT_INSTRUCTION    0xcc
+
+#define DR6_BT  0x00008000
+#define DR6_BS  0x00004000
+#define DR6_BD  0x00002000
+
+#define DR6_B3  0x00000008
+#define DR6_B2  0x00000004
+#define DR6_B1  0x00000002
+#define DR6_B0  0x00000001
+
+#define DR7_RW_VAL(dr, drnum) \
+       (((dr) >> (16 + (4 * (drnum)))) & 0x3)
+
+#define DR7_RW_SET(dr, drnum, rw)                              \
+       do {                                                    \
+               (dr) &= ~(0x3 << (16 + (4 * (drnum))));         \
+               (dr) |= (((rw) & 0x3) << (16 + (4 * (drnum)))); \
+       } while (0)
+
+#define DR7_RW0(dr)       DR7_RW_VAL(dr, 0)
+#define DR7_RW0SET(dr,rw)  DR7_RW_SET(dr, 0, rw)
+#define DR7_RW1(dr)       DR7_RW_VAL(dr, 1)
+#define DR7_RW1SET(dr,rw)  DR7_RW_SET(dr, 1, rw)
+#define DR7_RW2(dr)       DR7_RW_VAL(dr, 2)
+#define DR7_RW2SET(dr,rw)  DR7_RW_SET(dr, 2, rw)
+#define DR7_RW3(dr)       DR7_RW_VAL(dr, 3)
+#define DR7_RW3SET(dr,rw)  DR7_RW_SET(dr, 3, rw)
+
+
+#define DR7_LEN_VAL(dr, drnum) \
+       (((dr) >> (18 + (4 * (drnum)))) & 0x3)
+
+#define DR7_LEN_SET(dr, drnum, rw)                             \
+       do {                                                    \
+               (dr) &= ~(0x3 << (18 + (4 * (drnum))));         \
+               (dr) |= (((rw) & 0x3) << (18 + (4 * (drnum)))); \
+       } while (0)
+#define DR7_LEN0(dr)        DR7_LEN_VAL(dr, 0)
+#define DR7_LEN0SET(dr,len)  DR7_LEN_SET(dr, 0, len)
+#define DR7_LEN1(dr)        DR7_LEN_VAL(dr, 1)
+#define DR7_LEN1SET(dr,len)  DR7_LEN_SET(dr, 1, len)
+#define DR7_LEN2(dr)        DR7_LEN_VAL(dr, 2)
+#define DR7_LEN2SET(dr,len)  DR7_LEN_SET(dr, 2, len)
+#define DR7_LEN3(dr)        DR7_LEN_VAL(dr, 3)
+#define DR7_LEN3SET(dr,len)  DR7_LEN_SET(dr, 3, len)
+
+#define DR7_G0(dr)    (((dr)>>1)&0x1)
+#define DR7_G0SET(dr) ((dr) |= 0x2)
+#define DR7_G0CLR(dr) ((dr) &= ~0x2)
+#define DR7_G1(dr)    (((dr)>>3)&0x1)
+#define DR7_G1SET(dr) ((dr) |= 0x8)
+#define DR7_G1CLR(dr) ((dr) &= ~0x8)
+#define DR7_G2(dr)    (((dr)>>5)&0x1)
+#define DR7_G2SET(dr) ((dr) |= 0x20)
+#define DR7_G2CLR(dr) ((dr) &= ~0x20)
+#define DR7_G3(dr)    (((dr)>>7)&0x1)
+#define DR7_G3SET(dr) ((dr) |= 0x80)
+#define DR7_G3CLR(dr) ((dr) &= ~0x80)
+
+#define DR7_L0(dr)    (((dr))&0x1)
+#define DR7_L0SET(dr) ((dr) |= 0x1)
+#define DR7_L0CLR(dr) ((dr) &= ~0x1)
+#define DR7_L1(dr)    (((dr)>>2)&0x1)
+#define DR7_L1SET(dr) ((dr) |= 0x4)
+#define DR7_L1CLR(dr) ((dr) &= ~0x4)
+#define DR7_L2(dr)    (((dr)>>4)&0x1)
+#define DR7_L2SET(dr) ((dr) |= 0x10)
+#define DR7_L2CLR(dr) ((dr) &= ~0x10)
+#define DR7_L3(dr)    (((dr)>>6)&0x1)
+#define DR7_L3SET(dr) ((dr) |= 0x40)
+#define DR7_L3CLR(dr) ((dr) &= ~0x40)
+
+#define DR7_GD          0x00002000              /* General Detect Enable */
+#define DR7_GE          0x00000200              /* Global exact */
+#define DR7_LE          0x00000100              /* Local exact */
+
+extern kdb_machreg_t kdba_getdr6(void);
+extern void kdba_putdr6(kdb_machreg_t);
+
+extern kdb_machreg_t kdba_getdr7(void);
+
+extern kdb_machreg_t kdba_getdr(int);
+extern void kdba_putdr(int, kdb_machreg_t);
+
+extern kdb_machreg_t kdb_getcr(int);
+
+#define KDB_HAVE_LONGJMP
+#ifdef KDB_HAVE_LONGJMP
+/*
+ * Support for setjmp/longjmp
+ */
+#define JB_BX   0
+#define JB_SI   1
+#define JB_DI   2
+#define JB_BP   3
+#define JB_SP   4
+#define JB_PC   5
+
+typedef struct __kdb_jmp_buf {
+        unsigned long   regs[6];       /* kdba_setjmp assumes fixed offsets here */
+} kdb_jmp_buf;
+
+extern int kdba_setjmp(kdb_jmp_buf *);
+extern void kdba_longjmp(kdb_jmp_buf *, int);
+
+extern kdb_jmp_buf  kdbjmpbuf[];
+#endif /* KDB_HAVE_LONGJMP */
+
+#endif /* !_ASM_KDBPRIVATE_H */
diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/keyboard.h b/xenolinux-2.4.16-sparse/include/asm-xeno/keyboard.h
new file mode 100644 (file)
index 0000000..3b41967
--- /dev/null
@@ -0,0 +1,73 @@
+/*
+ *  linux/include/asm-i386/keyboard.h
+ *
+ *  Created 3 Nov 1996 by Geert Uytterhoeven
+ */
+
+/*
+ *  This file contains the i386 architecture specific keyboard definitions
+ */
+
+#ifndef _I386_KEYBOARD_H
+#define _I386_KEYBOARD_H
+
+#ifdef __KERNEL__
+
+#include <linux/kernel.h>
+#include <linux/ioport.h>
+#include <linux/kd.h>
+#include <linux/pm.h>
+#include <asm/io.h>
+
+#define KEYBOARD_IRQ                   1
+#define DISABLE_KBD_DURING_INTERRUPTS  0
+
+extern int pckbd_setkeycode(unsigned int scancode, unsigned int keycode);
+extern int pckbd_getkeycode(unsigned int scancode);
+extern int pckbd_translate(unsigned char scancode, unsigned char *keycode,
+                          char raw_mode);
+extern char pckbd_unexpected_up(unsigned char keycode);
+extern void pckbd_leds(unsigned char leds);
+extern void pckbd_init_hw(void);
+extern int pckbd_pm_resume(struct pm_dev *, pm_request_t, void *);
+extern pm_callback pm_kbd_request_override;
+extern unsigned char pckbd_sysrq_xlate[128];
+
+#define kbd_setkeycode         pckbd_setkeycode
+#define kbd_getkeycode         pckbd_getkeycode
+#define kbd_translate          pckbd_translate
+#define kbd_unexpected_up      pckbd_unexpected_up
+#define kbd_leds               pckbd_leds
+#define kbd_init_hw            pckbd_init_hw
+#define kbd_sysrq_xlate                pckbd_sysrq_xlate
+
+#define SYSRQ_KEY 0x54
+#define E1_PAUSE   119         /* PAUSE key */
+
+/* resource allocation */
+#define kbd_request_region()
+#define kbd_request_irq(handler) request_irq(KEYBOARD_IRQ, handler, 0, \
+                                             "keyboard", NULL)
+
+/* How to access the keyboard macros on this platform.  */
+#define kbd_read_input() inb(KBD_DATA_REG)
+#define kbd_read_status() inb(KBD_STATUS_REG)
+#define kbd_write_output(val) outb(val, KBD_DATA_REG)
+#define kbd_write_command(val) outb(val, KBD_CNTL_REG)
+
+/* Some stoneage hardware needs delays after some operations.  */
+#define kbd_pause() do { } while(0)
+
+/*
+ * Machine specific bits for the PS/2 driver
+ */
+
+#define AUX_IRQ 12
+
+#define aux_request_irq(hand, dev_id)                                  \
+       request_irq(AUX_IRQ, hand, SA_SHIRQ, "PS/2 Mouse", dev_id)
+
+#define aux_free_irq(dev_id) free_irq(AUX_IRQ, dev_id)
+
+#endif /* __KERNEL__ */
+#endif /* _I386_KEYBOARD_H */
diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/kmap_types.h b/xenolinux-2.4.16-sparse/include/asm-xeno/kmap_types.h
new file mode 100644 (file)
index 0000000..5107c3d
--- /dev/null
@@ -0,0 +1,13 @@
+#ifndef _ASM_KMAP_TYPES_H
+#define _ASM_KMAP_TYPES_H
+
+enum km_type {
+       KM_BOUNCE_READ,
+       KM_SKB_DATA,
+       KM_SKB_DATA_SOFTIRQ,
+       KM_USER0,
+       KM_USER1,
+       KM_TYPE_NR
+};
+
+#endif
diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/ldt.h b/xenolinux-2.4.16-sparse/include/asm-xeno/ldt.h
new file mode 100644 (file)
index 0000000..9d1110f
--- /dev/null
@@ -0,0 +1,32 @@
+/*
+ * ldt.h
+ *
+ * Definitions of structures used with the modify_ldt system call.
+ */
+#ifndef _LINUX_LDT_H
+#define _LINUX_LDT_H
+
+/* Maximum number of LDT entries supported. */
+#define LDT_ENTRIES    8192
+/* The size of each LDT entry. */
+#define LDT_ENTRY_SIZE 8
+
+#ifndef __ASSEMBLY__
+struct modify_ldt_ldt_s {
+       unsigned int  entry_number;
+       unsigned long base_addr;
+       unsigned int  limit;
+       unsigned int  seg_32bit:1;
+       unsigned int  contents:2;
+       unsigned int  read_exec_only:1;
+       unsigned int  limit_in_pages:1;
+       unsigned int  seg_not_present:1;
+       unsigned int  useable:1;
+};
+
+#define MODIFY_LDT_CONTENTS_DATA       0
+#define MODIFY_LDT_CONTENTS_STACK      1
+#define MODIFY_LDT_CONTENTS_CODE       2
+
+#endif /* !__ASSEMBLY__ */
+#endif
diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/linux_logo.h b/xenolinux-2.4.16-sparse/include/asm-xeno/linux_logo.h
new file mode 100644 (file)
index 0000000..1e2fe6c
--- /dev/null
@@ -0,0 +1,27 @@
+/* $Id: linux_logo.h,v 1.8 1998/07/30 16:30:24 jj Exp $
+ * include/asm-i386/linux_logo.h: This is a linux logo
+ *                                to be displayed on boot.
+ *
+ * Copyright (C) 1996 Larry Ewing (lewing@isc.tamu.edu)
+ * Copyright (C) 1996 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
+ *
+ * You can put anything here, but:
+ * LINUX_LOGO_COLORS has to be less than 224
+ * image size has to be 80x80
+ * values have to start from 0x20
+ * (i.e. RGB(linux_logo_red[0],
+ *          linux_logo_green[0],
+ *          linux_logo_blue[0]) is color 0x20)
+ * BW image has to be 80x80 as well, with MS bit
+ * on the left
+ * Serial_console ascii image can be any size,
+ * but should contain %s to display the version
+ */
+#include <linux/init.h>
+#include <linux/version.h>
+
+#define linux_logo_banner "Linux/ia32 version " UTS_RELEASE
+
+#include <linux/linux_logo.h>
+
diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/locks.h b/xenolinux-2.4.16-sparse/include/asm-xeno/locks.h
new file mode 100644 (file)
index 0000000..ffcab0a
--- /dev/null
@@ -0,0 +1,135 @@
+/*
+ *     SMP locks primitives for building ix86 locks
+ *     (not yet used).
+ *
+ *             Alan Cox, alan@redhat.com, 1995
+ */
+/*
+ *     This would be much easier but far less clear and easy
+ *     to borrow for other processors if it was just assembler.
+ */
+
+static __inline__ void prim_spin_lock(struct spinlock *sp)
+{
+       int processor=smp_processor_id();
+       
+       /*
+        *      Grab the lock bit
+        */
+        
+       while(lock_set_bit(0,&sp->lock))
+       {
+               /*
+                *      Failed, but that's cos we own it!
+                */
+                
+               if(sp->cpu==processor)
+               {
+                       sp->users++;
+                       return 0;
+               }
+               /*
+                *      Spin in the cache S state if possible
+                */
+               while(sp->lock)
+               {
+                       /*
+                        *      Wait for any invalidates to go off
+                        */
+                        
+                       if(smp_invalidate_needed&(1<<processor))
+                               while(lock_clear_bit(processor,&smp_invalidate_needed))
+                                       local_flush_tlb();
+                       sp->spins++;
+               }
+               /*
+                *      Someone wrote the line, we go 'I' and get
+                *      the cache entry. Now try to regrab
+                */
+       }
+       sp->users++;sp->cpu=processor;
+       return 1;
+}
+
+/*
+ *     Release a spin lock
+ */
+static __inline__ int prim_spin_unlock(struct spinlock *sp)
+{
+       /* This is safe. The decrement is still guarded by the lock. A multilock would
+          not be safe this way */
+       if(!--sp->users)
+       {
+               sp->cpu= NO_PROC_ID;lock_clear_bit(0,&sp->lock);
+               return 1;
+       }
+       return 0;
+}
+
+
+/*
+ *     Non blocking lock grab
+ */
+static __inline__ int prim_spin_lock_nb(struct spinlock *sp)
+{
+       if(lock_set_bit(0,&sp->lock))
+               return 0;               /* Locked already */
+       sp->users++;
+       return 1;                       /* We got the lock */
+}
+
+
+/*
+ *     These wrap the locking primitives up for usage
+ */
+static __inline__ void spinlock(struct spinlock *sp)
+{
+       if(sp->priority<current->lock_order)
+               panic("lock order violation: %s (%d)\n", sp->name, current->lock_order);
+       if(prim_spin_lock(sp))
+       {
+               /*
+                *      We got a new lock. Update the priority chain
+                */
+               sp->oldpri=current->lock_order;
+               current->lock_order=sp->priority;
+       }
+}
+
+static __inline__ void spinunlock(struct spinlock *sp)
+{
+       int pri;
+       if(current->lock_order!=sp->priority)
+               panic("lock release order violation %s (%d)\n", sp->name, current->lock_order);
+       pri=sp->oldpri;
+       if(prim_spin_unlock(sp))
+       {
+               /*
+                *      Update the debugging lock priority chain. We dumped
+                *      our last right to the lock.
+                */
+               current->lock_order=sp->pri;
+       }       
+}
+
+static __inline__ void spintestlock(struct spinlock *sp)
+{
+       /*
+        *      We do no sanity checks, it's legal to optimistically
+        *      get a lower lock.
+        */
+       prim_spin_lock_nb(sp);
+}
+
+static __inline__ void spintestunlock(struct spinlock *sp)
+{
+       /*
+        *      A testlock doesn't update the lock chain so we
+        *      must not update it on free
+        */
+       prim_spin_unlock(sp);
+}
diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/math_emu.h b/xenolinux-2.4.16-sparse/include/asm-xeno/math_emu.h
new file mode 100644 (file)
index 0000000..bc8421d
--- /dev/null
@@ -0,0 +1,35 @@
+#ifndef _I386_MATH_EMU_H
+#define _I386_MATH_EMU_H
+
+#include <asm/sigcontext.h>
+
+int restore_i387_soft(void *s387, struct _fpstate *buf);
+int save_i387_soft(void *s387, struct _fpstate * buf);
+
+/* This structure matches the layout of the data saved to the stack
+   following a device-not-present interrupt, part of it saved
+   automatically by the 80386/80486.
+   */
+struct info {
+       long ___orig_eip;
+       long ___ebx;
+       long ___ecx;
+       long ___edx;
+       long ___esi;
+       long ___edi;
+       long ___ebp;
+       long ___eax;
+       long ___ds;
+       long ___es;
+       long ___orig_eax;
+       long ___eip;
+       long ___cs;
+       long ___eflags;
+       long ___esp;
+       long ___ss;
+       long ___vm86_es; /* This and the following only in vm86 mode */
+       long ___vm86_ds;
+       long ___vm86_fs;
+       long ___vm86_gs;
+};
+#endif
diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/mc146818rtc.h b/xenolinux-2.4.16-sparse/include/asm-xeno/mc146818rtc.h
new file mode 100644 (file)
index 0000000..d6e3009
--- /dev/null
@@ -0,0 +1,29 @@
+/*
+ * Machine dependent access functions for RTC registers.
+ */
+#ifndef _ASM_MC146818RTC_H
+#define _ASM_MC146818RTC_H
+
+#include <asm/io.h>
+
+#ifndef RTC_PORT
+#define RTC_PORT(x)    (0x70 + (x))
+#define RTC_ALWAYS_BCD 1       /* RTC operates in binary mode */
+#endif
+
+/*
+ * The yet supported machines all access the RTC index register via
+ * an ISA port access but the way to access the date register differs ...
+ */
+#define CMOS_READ(addr) ({ \
+outb_p((addr),RTC_PORT(0)); \
+inb_p(RTC_PORT(1)); \
+})
+#define CMOS_WRITE(val, addr) ({ \
+outb_p((addr),RTC_PORT(0)); \
+outb_p((val),RTC_PORT(1)); \
+})
+
+#define RTC_IRQ 8
+
+#endif /* _ASM_MC146818RTC_H */
diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/mca_dma.h b/xenolinux-2.4.16-sparse/include/asm-xeno/mca_dma.h
new file mode 100644 (file)
index 0000000..4b3b526
--- /dev/null
@@ -0,0 +1,202 @@
+#ifndef MCA_DMA_H
+#define MCA_DMA_H
+
+#include <asm/io.h>
+#include <linux/ioport.h>
+
+/*
+ * Microchannel specific DMA stuff.  DMA on an MCA machine is fairly similar to
+ *   standard PC dma, but it certainly has its quirks.  DMA register addresses
+ *   are in a different place and there are some added functions.  Most of this
+ *   should be pretty obvious on inspection.  Note that the user must divide
+ *   count by 2 when using 16-bit dma; that is not handled by these functions.
+ *
+ * Ramen Noodles are yummy.
+ * 
+ *  1998 Tymm Twillman <tymm@computer.org>  
+ */
+
+/*
+ * Registers that are used by the DMA controller; FN is the function register 
+ *   (tell the controller what to do) and EXE is the execution register (how
+ *   to do it)
+ */
+
+#define MCA_DMA_REG_FN  0x18
+#define MCA_DMA_REG_EXE 0x1A 
+
+/*
+ * Functions that the DMA controller can do
+ */
+
+#define MCA_DMA_FN_SET_IO       0x00
+#define MCA_DMA_FN_SET_ADDR     0x20
+#define MCA_DMA_FN_GET_ADDR     0x30
+#define MCA_DMA_FN_SET_COUNT    0x40
+#define MCA_DMA_FN_GET_COUNT    0x50
+#define MCA_DMA_FN_GET_STATUS   0x60
+#define MCA_DMA_FN_SET_MODE     0x70
+#define MCA_DMA_FN_SET_ARBUS    0x80
+#define MCA_DMA_FN_MASK         0x90
+#define MCA_DMA_FN_RESET_MASK   0xA0
+#define MCA_DMA_FN_MASTER_CLEAR 0xD0
+
+/*
+ * Modes (used by setting MCA_DMA_FN_MODE in the function register)
+ * 
+ * Note that the MODE_READ is read from memory (write to device), and
+ *   MODE_WRITE is vice-versa.  
+ */
+
+#define MCA_DMA_MODE_XFER  0x04  /* read by default */
+#define MCA_DMA_MODE_READ  0x04  /* same as XFER */
+#define MCA_DMA_MODE_WRITE 0x08  /* OR with MODE_XFER to use */
+#define MCA_DMA_MODE_IO    0x01  /* DMA from IO register */
+#define MCA_DMA_MODE_16    0x40  /* 16 bit xfers */
+
+
+/**
+ *     mca_enable_dma  -       channel to enable DMA on
+ *     @dmanr: DMA channel
+ *
+ *     Enable the MCA bus DMA on a channel. This can be called from
+ *     IRQ context.
+ */
+
+static __inline__ void mca_enable_dma(unsigned int dmanr)
+{
+       outb(MCA_DMA_FN_RESET_MASK | dmanr, MCA_DMA_REG_FN);
+}
+
+/**
+ *     mca_disble_dma  -       channel to disable DMA on
+ *     @dmanr: DMA channel
+ *
+ *     Enable the MCA bus DMA on a channel. This can be called from
+ *     IRQ context.
+ */
+
+static __inline__ void mca_disable_dma(unsigned int dmanr)
+{
+       outb(MCA_DMA_FN_MASK | dmanr, MCA_DMA_REG_FN);
+}
+
+/**
+ *     mca_set_dma_addr -      load a 24bit DMA address
+ *     @dmanr: DMA channel
+ *     @a: 24bit bus address
+ *
+ *     Load the address register in the DMA controller. This has a 24bit
+ *     limitation (16Mb). 
+ */
+
+static __inline__ void mca_set_dma_addr(unsigned int dmanr, unsigned int a)
+{
+       outb(MCA_DMA_FN_SET_ADDR | dmanr, MCA_DMA_REG_FN);
+       outb(a & 0xff, MCA_DMA_REG_EXE);
+       outb((a >> 8) & 0xff, MCA_DMA_REG_EXE);
+       outb((a >> 16) & 0xff, MCA_DMA_REG_EXE);
+}
+
+/**
+ *     mca_get_dma_addr -      load a 24bit DMA address
+ *     @dmanr: DMA channel
+ *
+ *     Read the address register in the DMA controller. This has a 24bit
+ *     limitation (16Mb). The return is a bus address.
+ */
+
+static __inline__ unsigned int mca_get_dma_addr(unsigned int dmanr)
+{
+       unsigned int addr;
+
+       outb(MCA_DMA_FN_GET_ADDR | dmanr, MCA_DMA_REG_FN);
+       addr = inb(MCA_DMA_REG_EXE);
+       addr |= inb(MCA_DMA_REG_EXE) << 8;
+       addr |= inb(MCA_DMA_REG_EXE) << 16;  
+
+       return addr;
+}
+
+/**
+ *     mca_set_dma_count -     load a 16bit transfer count
+ *     @dmanr: DMA channel
+ *     @count: count
+ *
+ *     Set the DMA count for this channel. This can be up to 64Kbytes.
+ *     Setting a count of zero will not do what you expect.
+ */
+
+static __inline__ void mca_set_dma_count(unsigned int dmanr, unsigned int count)
+{
+       count--;  /* transfers one more than count -- correct for this */
+
+       outb(MCA_DMA_FN_SET_COUNT | dmanr, MCA_DMA_REG_FN);
+       outb(count & 0xff, MCA_DMA_REG_EXE);
+       outb((count >> 8) & 0xff, MCA_DMA_REG_EXE);
+}
+
+/**
+ *     mca_get_dma_residue -   get the remaining bytes to transfer
+ *     @dmanr: DMA channel
+ *
+ *     This function returns the number of bytes left to transfer
+ *     on this DMA channel.
+ */
+
+static __inline__ unsigned int mca_get_dma_residue(unsigned int dmanr)
+{
+       unsigned short count;
+
+       outb(MCA_DMA_FN_GET_COUNT | dmanr, MCA_DMA_REG_FN);
+       count = 1 + inb(MCA_DMA_REG_EXE);
+       count += inb(MCA_DMA_REG_EXE) << 8;
+
+       return count;
+}
+
+/**
+ *     mca_set_dma_io -        set the port for an I/O transfer
+ *     @dmanr: DMA channel
+ *     @io_addr: an I/O port number
+ *
+ *     Unlike the ISA bus DMA controllers the DMA on MCA bus can transfer
+ *     with an I/O port target.
+ */
+
+static __inline__ void mca_set_dma_io(unsigned int dmanr, unsigned int io_addr)
+{
+       /*
+        * DMA from a port address -- set the io address
+        */
+       
+       outb(MCA_DMA_FN_SET_IO | dmanr, MCA_DMA_REG_FN);
+       outb(io_addr & 0xff, MCA_DMA_REG_EXE);
+       outb((io_addr >>  8) & 0xff, MCA_DMA_REG_EXE);
+}
+
+/**
+ *     mca_set_dma_mode -      set the DMA mode
+ *     @dmanr: DMA channel
+ *     @mode: mode to set
+ *
+ *     The DMA controller supports several modes. The mode values you can
+ *     set are :
+ *
+ *     %MCA_DMA_MODE_READ when reading from the DMA device.
+ *
+ *     %MCA_DMA_MODE_WRITE to writing to the DMA device.
+ *
+ *     %MCA_DMA_MODE_IO to do DMA to or from an I/O port.
+ *
+ *     %MCA_DMA_MODE_16 to do 16bit transfers.
+ *
+ */
+
+static __inline__ void mca_set_dma_mode(unsigned int dmanr, unsigned int mode)
+{
+       outb(MCA_DMA_FN_SET_MODE | dmanr, MCA_DMA_REG_FN);
+       outb(mode, MCA_DMA_REG_EXE);
+}
+
+#endif /* MCA_DMA_H */
diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/mman.h b/xenolinux-2.4.16-sparse/include/asm-xeno/mman.h
new file mode 100644 (file)
index 0000000..f953c43
--- /dev/null
@@ -0,0 +1,38 @@
+#ifndef __I386_MMAN_H__
+#define __I386_MMAN_H__
+
+#define PROT_READ      0x1             /* page can be read */
+#define PROT_WRITE     0x2             /* page can be written */
+#define PROT_EXEC      0x4             /* page can be executed */
+#define PROT_NONE      0x0             /* page can not be accessed */
+
+#define MAP_SHARED     0x01            /* Share changes */
+#define MAP_PRIVATE    0x02            /* Changes are private */
+#define MAP_TYPE       0x0f            /* Mask for type of mapping */
+#define MAP_FIXED      0x10            /* Interpret addr exactly */
+#define MAP_ANONYMOUS  0x20            /* don't use a file */
+
+#define MAP_GROWSDOWN  0x0100          /* stack-like segment */
+#define MAP_DENYWRITE  0x0800          /* ETXTBSY */
+#define MAP_EXECUTABLE 0x1000          /* mark it as an executable */
+#define MAP_LOCKED     0x2000          /* pages are locked */
+#define MAP_NORESERVE  0x4000          /* don't check for reservations */
+
+#define MS_ASYNC       1               /* sync memory asynchronously */
+#define MS_INVALIDATE  2               /* invalidate the caches */
+#define MS_SYNC                4               /* synchronous memory sync */
+
+#define MCL_CURRENT    1               /* lock all current mappings */
+#define MCL_FUTURE     2               /* lock all future mappings */
+
+#define MADV_NORMAL    0x0             /* default page-in behavior */
+#define MADV_RANDOM    0x1             /* page-in minimum required */
+#define MADV_SEQUENTIAL        0x2             /* read-ahead aggressively */
+#define MADV_WILLNEED  0x3             /* pre-fault pages */
+#define MADV_DONTNEED  0x4             /* discard these pages */
+
+/* compatibility flags */
+#define MAP_ANON       MAP_ANONYMOUS
+#define MAP_FILE       0
+
+#endif /* __I386_MMAN_H__ */
diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/mmu.h b/xenolinux-2.4.16-sparse/include/asm-xeno/mmu.h
new file mode 100644 (file)
index 0000000..de01403
--- /dev/null
@@ -0,0 +1,13 @@
+#ifndef __i386_MMU_H
+#define __i386_MMU_H
+
+/*
+ * The i386 doesn't have a mmu context, but
+ * we put the segment information here.
+ */
+typedef struct { 
+       void *segments;
+       unsigned long cpuvalid;
+} mm_context_t;
+
+#endif
diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/mmu_context.h b/xenolinux-2.4.16-sparse/include/asm-xeno/mmu_context.h
new file mode 100644 (file)
index 0000000..aecc685
--- /dev/null
@@ -0,0 +1,70 @@
+#ifndef __I386_MMU_CONTEXT_H
+#define __I386_MMU_CONTEXT_H
+
+#include <linux/config.h>
+#include <asm/desc.h>
+#include <asm/atomic.h>
+#include <asm/pgalloc.h>
+
+/*
+ * possibly do the LDT unload here?
+ */
+#define destroy_context(mm)            do { } while(0)
+#define init_new_context(tsk,mm)       0
+
+#ifdef CONFIG_SMP
+
+static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk, unsigned cpu)
+{
+       if(cpu_tlbstate[cpu].state == TLBSTATE_OK)
+               cpu_tlbstate[cpu].state = TLBSTATE_LAZY;        
+}
+#else
+static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk, unsigned cpu)
+{
+}
+#endif
+
+extern pgd_t *cur_pgd;
+
+static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk, unsigned cpu)
+{
+       if (prev != next) {
+               /* stop flush ipis for the previous mm */
+               clear_bit(cpu, &prev->cpu_vm_mask);
+               /*
+                * Re-load LDT if necessary
+                */
+               if (prev->context.segments != next->context.segments)
+                       load_LDT(next);
+#ifdef CONFIG_SMP
+               cpu_tlbstate[cpu].state = TLBSTATE_OK;
+               cpu_tlbstate[cpu].active_mm = next;
+#endif
+               set_bit(cpu, &next->cpu_vm_mask);
+               set_bit(cpu, &next->context.cpuvalid);
+               /* Re-load page tables */
+                cur_pgd = next->pgd;
+                HYPERVISOR_set_pagetable(__pa(cur_pgd) + start_info.phys_base);
+       }
+#ifdef CONFIG_SMP
+       else {
+               cpu_tlbstate[cpu].state = TLBSTATE_OK;
+               if(cpu_tlbstate[cpu].active_mm != next)
+                       BUG();
+               if(!test_and_set_bit(cpu, &next->cpu_vm_mask)) {
+                       /* We were in lazy tlb mode and leave_mm disabled 
+                        * tlb flush IPI delivery. We must flush our tlb.
+                        */
+                       local_flush_tlb();
+               }
+               if (!test_and_set_bit(cpu, &next->context.cpuvalid))
+                       load_LDT(next);
+       }
+#endif
+}
+
+#define activate_mm(prev, next) \
+       switch_mm((prev),(next),NULL,smp_processor_id())
+
+#endif
diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/mmx.h b/xenolinux-2.4.16-sparse/include/asm-xeno/mmx.h
new file mode 100644 (file)
index 0000000..46b71da
--- /dev/null
@@ -0,0 +1,14 @@
+#ifndef _ASM_MMX_H
+#define _ASM_MMX_H
+
+/*
+ *     MMX 3Dnow! helper operations
+ */
+
+#include <linux/types.h>
+extern void *_mmx_memcpy(void *to, const void *from, size_t size);
+extern void mmx_clear_page(void *page);
+extern void mmx_copy_page(void *to, void *from);
+
+#endif
diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/module.h b/xenolinux-2.4.16-sparse/include/asm-xeno/module.h
new file mode 100644 (file)
index 0000000..a552173
--- /dev/null
@@ -0,0 +1,12 @@
+#ifndef _ASM_I386_MODULE_H
+#define _ASM_I386_MODULE_H
+/*
+ * This file contains the i386 architecture specific module code.
+ */
+
+#define module_map(x)          vmalloc(x)
+#define module_unmap(x)                vfree(x)
+#define module_arch_init(x)    (0)
+#define arch_init_modules(x)   do { } while (0)
+
+#endif /* _ASM_I386_MODULE_H */
diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/mpspec.h b/xenolinux-2.4.16-sparse/include/asm-xeno/mpspec.h
new file mode 100644 (file)
index 0000000..b5719c0
--- /dev/null
@@ -0,0 +1,221 @@
+#ifndef __ASM_MPSPEC_H
+#define __ASM_MPSPEC_H
+
+/*
+ * Structure definitions for SMP machines following the
+ * Intel Multiprocessing Specification 1.1 and 1.4.
+ */
+
+/*
+ * This tag identifies where the SMP configuration
+ * information is. 
+ */
+#define SMP_MAGIC_IDENT        (('_'<<24)|('P'<<16)|('M'<<8)|'_')
+
+/*
+ * a maximum of 16 APICs with the current APIC ID architecture.
+ */
+#ifdef CONFIG_MULTIQUAD
+#define MAX_APICS 256
+#else /* !CONFIG_MULTIQUAD */
+#define MAX_APICS 16
+#endif /* CONFIG_MULTIQUAD */
+
+#define MAX_MPC_ENTRY 1024
+
+struct intel_mp_floating
+{
+       char mpf_signature[4];          /* "_MP_"                       */
+       unsigned long mpf_physptr;      /* Configuration table address  */
+       unsigned char mpf_length;       /* Our length (paragraphs)      */
+       unsigned char mpf_specification;/* Specification version        */
+       unsigned char mpf_checksum;     /* Checksum (makes sum 0)       */
+       unsigned char mpf_feature1;     /* Standard or configuration ?  */
+       unsigned char mpf_feature2;     /* Bit7 set for IMCR|PIC        */
+       unsigned char mpf_feature3;     /* Unused (0)                   */
+       unsigned char mpf_feature4;     /* Unused (0)                   */
+       unsigned char mpf_feature5;     /* Unused (0)                   */
+};
+
+struct mp_config_table
+{
+       char mpc_signature[4];
+#define MPC_SIGNATURE "PCMP"
+       unsigned short mpc_length;      /* Size of table */
+       char  mpc_spec;                 /* 0x01 */
+       char  mpc_checksum;
+       char  mpc_oem[8];
+       char  mpc_productid[12];
+       unsigned long mpc_oemptr;       /* 0 if not present */
+       unsigned short mpc_oemsize;     /* 0 if not present */
+       unsigned short mpc_oemcount;
+       unsigned long mpc_lapic;        /* APIC address */
+       unsigned long reserved;
+};
+
+/* Followed by entries */
+
+#define        MP_PROCESSOR    0
+#define        MP_BUS          1
+#define        MP_IOAPIC       2
+#define        MP_INTSRC       3
+#define        MP_LINTSRC      4
+#define        MP_TRANSLATION  192  /* Used by IBM NUMA-Q to describe node locality */
+
+struct mpc_config_processor
+{
+       unsigned char mpc_type;
+       unsigned char mpc_apicid;       /* Local APIC number */
+       unsigned char mpc_apicver;      /* Its versions */
+       unsigned char mpc_cpuflag;
+#define CPU_ENABLED            1       /* Processor is available */
+#define CPU_BOOTPROCESSOR      2       /* Processor is the BP */
+       unsigned long mpc_cpufeature;           
+#define CPU_STEPPING_MASK 0x0F
+#define CPU_MODEL_MASK 0xF0
+#define CPU_FAMILY_MASK        0xF00
+       unsigned long mpc_featureflag;  /* CPUID feature value */
+       unsigned long mpc_reserved[2];
+};
+
+struct mpc_config_bus
+{
+       unsigned char mpc_type;
+       unsigned char mpc_busid;
+       unsigned char mpc_bustype[6] __attribute((packed));
+};
+
+/* List of Bus Type string values, Intel MP Spec. */
+#define BUSTYPE_EISA   "EISA"
+#define BUSTYPE_ISA    "ISA"
+#define BUSTYPE_INTERN "INTERN"        /* Internal BUS */
+#define BUSTYPE_MCA    "MCA"
+#define BUSTYPE_VL     "VL"            /* Local bus */
+#define BUSTYPE_PCI    "PCI"
+#define BUSTYPE_PCMCIA "PCMCIA"
+#define BUSTYPE_CBUS   "CBUS"
+#define BUSTYPE_CBUSII "CBUSII"
+#define BUSTYPE_FUTURE "FUTURE"
+#define BUSTYPE_MBI    "MBI"
+#define BUSTYPE_MBII   "MBII"
+#define BUSTYPE_MPI    "MPI"
+#define BUSTYPE_MPSA   "MPSA"
+#define BUSTYPE_NUBUS  "NUBUS"
+#define BUSTYPE_TC     "TC"
+#define BUSTYPE_VME    "VME"
+#define BUSTYPE_XPRESS "XPRESS"
+
+struct mpc_config_ioapic
+{
+       unsigned char mpc_type;
+       unsigned char mpc_apicid;
+       unsigned char mpc_apicver;
+       unsigned char mpc_flags;
+#define MPC_APIC_USABLE                0x01
+       unsigned long mpc_apicaddr;
+};
+
+struct mpc_config_intsrc
+{
+       unsigned char mpc_type;
+       unsigned char mpc_irqtype;
+       unsigned short mpc_irqflag;
+       unsigned char mpc_srcbus;
+       unsigned char mpc_srcbusirq;
+       unsigned char mpc_dstapic;
+       unsigned char mpc_dstirq;
+};
+
+enum mp_irq_source_types {
+       mp_INT = 0,
+       mp_NMI = 1,
+       mp_SMI = 2,
+       mp_ExtINT = 3
+};
+
+#define MP_IRQDIR_DEFAULT      0
+#define MP_IRQDIR_HIGH         1
+#define MP_IRQDIR_LOW          3
+
+
+struct mpc_config_lintsrc
+{
+       unsigned char mpc_type;
+       unsigned char mpc_irqtype;
+       unsigned short mpc_irqflag;
+       unsigned char mpc_srcbusid;
+       unsigned char mpc_srcbusirq;
+       unsigned char mpc_destapic;     
+#define MP_APIC_ALL    0xFF
+       unsigned char mpc_destapiclint;
+};
+
+struct mp_config_oemtable
+{
+       char oem_signature[4];
+#define MPC_OEM_SIGNATURE "_OEM"
+       unsigned short oem_length;      /* Size of table */
+       char  oem_rev;                  /* 0x01 */
+       char  oem_checksum;
+       char  mpc_oem[8];
+};
+
+struct mpc_config_translation
+{
+        unsigned char mpc_type;
+        unsigned char trans_len;
+        unsigned char trans_type;
+        unsigned char trans_quad;
+        unsigned char trans_global;
+        unsigned char trans_local;
+        unsigned short trans_reserved;
+};
+
+/*
+ *     Default configurations
+ *
+ *     1       2 CPU ISA 82489DX
+ *     2       2 CPU EISA 82489DX neither IRQ 0 timer nor IRQ 13 DMA chaining
+ *     3       2 CPU EISA 82489DX
+ *     4       2 CPU MCA 82489DX
+ *     5       2 CPU ISA+PCI
+ *     6       2 CPU EISA+PCI
+ *     7       2 CPU MCA+PCI
+ */
+
+#ifdef CONFIG_MULTIQUAD
+#define MAX_IRQ_SOURCES 512
+#else /* !CONFIG_MULTIQUAD */
+#define MAX_IRQ_SOURCES 256
+#endif /* CONFIG_MULTIQUAD */
+
+#define MAX_MP_BUSSES 32
+enum mp_bustype {
+       MP_BUS_ISA = 1,
+       MP_BUS_EISA,
+       MP_BUS_PCI,
+       MP_BUS_MCA
+};
+extern int mp_bus_id_to_type [MAX_MP_BUSSES];
+extern int mp_bus_id_to_pci_bus [MAX_MP_BUSSES];
+
+extern unsigned int boot_cpu_physical_apicid;
+extern unsigned long phys_cpu_present_map;
+extern int smp_found_config;
+extern void find_smp_config (void);
+extern void get_smp_config (void);
+extern int nr_ioapics;
+extern int apic_version [MAX_APICS];
+extern int mp_bus_id_to_type [MAX_MP_BUSSES];
+extern int mp_irq_entries;
+extern struct mpc_config_intsrc mp_irqs [MAX_IRQ_SOURCES];
+extern int mpc_default_type;
+extern int mp_bus_id_to_pci_bus [MAX_MP_BUSSES];
+extern int mp_current_pci_id;
+extern unsigned long mp_lapic_addr;
+extern int pic_mode;
+extern int using_apic_timer;
+
+#endif
+
diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/msgbuf.h b/xenolinux-2.4.16-sparse/include/asm-xeno/msgbuf.h
new file mode 100644 (file)
index 0000000..b8d659c
--- /dev/null
@@ -0,0 +1,31 @@
+#ifndef _I386_MSGBUF_H
+#define _I386_MSGBUF_H
+
+/* 
+ * The msqid64_ds structure for i386 architecture.
+ * Note extra padding because this structure is passed back and forth
+ * between kernel and user space.
+ *
+ * Pad space is left for:
+ * - 64-bit time_t to solve y2038 problem
+ * - 2 miscellaneous 32-bit values
+ */
+
+struct msqid64_ds {
+       struct ipc64_perm msg_perm;
+       __kernel_time_t msg_stime;      /* last msgsnd time */
+       unsigned long   __unused1;
+       __kernel_time_t msg_rtime;      /* last msgrcv time */
+       unsigned long   __unused2;
+       __kernel_time_t msg_ctime;      /* last change time */
+       unsigned long   __unused3;
+       unsigned long  msg_cbytes;      /* current number of bytes on queue */
+       unsigned long  msg_qnum;        /* number of messages in queue */
+       unsigned long  msg_qbytes;      /* max number of bytes on queue */
+       __kernel_pid_t msg_lspid;       /* pid of last msgsnd */
+       __kernel_pid_t msg_lrpid;       /* last receive pid */
+       unsigned long  __unused4;
+       unsigned long  __unused5;
+};
+
+#endif /* _I386_MSGBUF_H */
diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/msr.h b/xenolinux-2.4.16-sparse/include/asm-xeno/msr.h
new file mode 100644 (file)
index 0000000..11bcb7f
--- /dev/null
@@ -0,0 +1,104 @@
+#ifndef __ASM_MSR_H
+#define __ASM_MSR_H
+
+/*
+ * Access to machine-specific registers (available on 586 and better only)
+ * Note: the rd* operations modify the parameters directly (without using
+ * pointer indirection), this allows gcc to optimize better
+ */
+
+#define rdmsr(msr,val1,val2) \
+     __asm__ __volatile__("rdmsr" \
+                         : "=a" (val1), "=d" (val2) \
+                         : "c" (msr))
+
+#define wrmsr(msr,val1,val2) \
+     __asm__ __volatile__("wrmsr" \
+                         : /* no outputs */ \
+                         : "c" (msr), "a" (val1), "d" (val2))
+
+#define rdtsc(low,high) \
+     __asm__ __volatile__("rdtsc" : "=a" (low), "=d" (high))
+
+#define rdtscl(low) \
+     __asm__ __volatile__("rdtsc" : "=a" (low) : : "edx")
+
+#define rdtscll(val) \
+     __asm__ __volatile__("rdtsc" : "=A" (val))
+
+#define write_tsc(val1,val2) wrmsr(0x10, val1, val2)
+
+#define rdpmc(counter,low,high) \
+     __asm__ __volatile__("rdpmc" \
+                         : "=a" (low), "=d" (high) \
+                         : "c" (counter))
+
+/* symbolic names for some interesting MSRs */
+/* Intel defined MSRs. */
+#define MSR_IA32_P5_MC_ADDR            0
+#define MSR_IA32_P5_MC_TYPE            1
+#define MSR_IA32_PLATFORM_ID           0x17
+#define MSR_IA32_EBL_CR_POWERON                0x2a
+
+#define MSR_IA32_APICBASE              0x1b
+#define MSR_IA32_APICBASE_BSP          (1<<8)
+#define MSR_IA32_APICBASE_ENABLE       (1<<11)
+#define MSR_IA32_APICBASE_BASE         (0xfffff<<12)
+
+#define MSR_IA32_UCODE_WRITE           0x79
+#define MSR_IA32_UCODE_REV             0x8b
+
+#define MSR_IA32_PERFCTR0              0xc1
+#define MSR_IA32_PERFCTR1              0xc2
+
+#define MSR_IA32_BBL_CR_CTL            0x119
+
+#define MSR_IA32_MCG_CAP               0x179
+#define MSR_IA32_MCG_STATUS            0x17a
+#define MSR_IA32_MCG_CTL               0x17b
+
+#define MSR_IA32_EVNTSEL0              0x186
+#define MSR_IA32_EVNTSEL1              0x187
+
+#define MSR_IA32_DEBUGCTLMSR           0x1d9
+#define MSR_IA32_LASTBRANCHFROMIP      0x1db
+#define MSR_IA32_LASTBRANCHTOIP                0x1dc
+#define MSR_IA32_LASTINTFROMIP         0x1dd
+#define MSR_IA32_LASTINTTOIP           0x1de
+
+#define MSR_IA32_MC0_CTL               0x400
+#define MSR_IA32_MC0_STATUS            0x401
+#define MSR_IA32_MC0_ADDR              0x402
+#define MSR_IA32_MC0_MISC              0x403
+
+/* AMD Defined MSRs */
+#define MSR_K6_EFER                    0xC0000080
+#define MSR_K6_STAR                    0xC0000081
+#define MSR_K6_WHCR                    0xC0000082
+#define MSR_K6_UWCCR                   0xC0000085
+#define MSR_K6_PSOR                    0xC0000087
+#define MSR_K6_PFIR                    0xC0000088
+
+#define MSR_K7_EVNTSEL0                        0xC0010000
+#define MSR_K7_PERFCTR0                        0xC0010004
+
+/* Centaur-Hauls/IDT defined MSRs. */
+#define MSR_IDT_FCR1                   0x107
+#define MSR_IDT_FCR2                   0x108
+#define MSR_IDT_FCR3                   0x109
+#define MSR_IDT_FCR4                   0x10a
+
+#define MSR_IDT_MCR0                   0x110
+#define MSR_IDT_MCR1                   0x111
+#define MSR_IDT_MCR2                   0x112
+#define MSR_IDT_MCR3                   0x113
+#define MSR_IDT_MCR4                   0x114
+#define MSR_IDT_MCR5                   0x115
+#define MSR_IDT_MCR6                   0x116
+#define MSR_IDT_MCR7                   0x117
+#define MSR_IDT_MCR_CTRL               0x120
+
+/* VIA Cyrix defined MSRs*/
+#define MSR_VIA_FCR                    0x1107
+
+#endif /* __ASM_MSR_H */
diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/mtrr.h b/xenolinux-2.4.16-sparse/include/asm-xeno/mtrr.h
new file mode 100644 (file)
index 0000000..ff3ea87
--- /dev/null
@@ -0,0 +1,127 @@
+/*  Generic MTRR (Memory Type Range Register) ioctls.
+
+    Copyright (C) 1997-1999  Richard Gooch
+
+    This library is free software; you can redistribute it and/or
+    modify it under the terms of the GNU Library General Public
+    License as published by the Free Software Foundation; either
+    version 2 of the License, or (at your option) any later version.
+
+    This library is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+    Library General Public License for more details.
+
+    You should have received a copy of the GNU Library General Public
+    License along with this library; if not, write to the Free
+    Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+    Richard Gooch may be reached by email at  rgooch@atnf.csiro.au
+    The postal address is:
+      Richard Gooch, c/o ATNF, P. O. Box 76, Epping, N.S.W., 2121, Australia.
+*/
+#ifndef _LINUX_MTRR_H
+#define _LINUX_MTRR_H
+
+#include <linux/config.h>
+#include <linux/ioctl.h>
+
+#define        MTRR_IOCTL_BASE 'M'
+
+struct mtrr_sentry
+{
+    unsigned long base;    /*  Base address     */
+    unsigned long size;    /*  Size of region   */
+    unsigned int type;     /*  Type of region   */
+};
+
+struct mtrr_gentry
+{
+    unsigned int regnum;   /*  Register number  */
+    unsigned long base;    /*  Base address     */
+    unsigned long size;    /*  Size of region   */
+    unsigned int type;     /*  Type of region   */
+};
+
+/*  These are the various ioctls  */
+#define MTRRIOC_ADD_ENTRY        _IOW(MTRR_IOCTL_BASE,  0, struct mtrr_sentry)
+#define MTRRIOC_SET_ENTRY        _IOW(MTRR_IOCTL_BASE,  1, struct mtrr_sentry)
+#define MTRRIOC_DEL_ENTRY        _IOW(MTRR_IOCTL_BASE,  2, struct mtrr_sentry)
+#define MTRRIOC_GET_ENTRY        _IOWR(MTRR_IOCTL_BASE, 3, struct mtrr_gentry)
+#define MTRRIOC_KILL_ENTRY       _IOW(MTRR_IOCTL_BASE,  4, struct mtrr_sentry)
+#define MTRRIOC_ADD_PAGE_ENTRY   _IOW(MTRR_IOCTL_BASE,  5, struct mtrr_sentry)
+#define MTRRIOC_SET_PAGE_ENTRY   _IOW(MTRR_IOCTL_BASE,  6, struct mtrr_sentry)
+#define MTRRIOC_DEL_PAGE_ENTRY   _IOW(MTRR_IOCTL_BASE,  7, struct mtrr_sentry)
+#define MTRRIOC_GET_PAGE_ENTRY   _IOWR(MTRR_IOCTL_BASE, 8, struct mtrr_gentry)
+#define MTRRIOC_KILL_PAGE_ENTRY  _IOW(MTRR_IOCTL_BASE,  9, struct mtrr_sentry)
+
+/*  These are the region types  */
+#define MTRR_TYPE_UNCACHABLE 0
+#define MTRR_TYPE_WRCOMB     1
+/*#define MTRR_TYPE_         2*/
+/*#define MTRR_TYPE_         3*/
+#define MTRR_TYPE_WRTHROUGH  4
+#define MTRR_TYPE_WRPROT     5
+#define MTRR_TYPE_WRBACK     6
+#define MTRR_NUM_TYPES       7
+
+#ifdef MTRR_NEED_STRINGS
+static char *mtrr_strings[MTRR_NUM_TYPES] =
+{
+    "uncachable",               /* 0 */
+    "write-combining",          /* 1 */
+    "?",                        /* 2 */
+    "?",                        /* 3 */
+    "write-through",            /* 4 */
+    "write-protect",            /* 5 */
+    "write-back",               /* 6 */
+};
+#endif
+
+#ifdef __KERNEL__
+
+/*  The following functions are for use by other drivers  */
+# ifdef CONFIG_MTRR
+extern int mtrr_add (unsigned long base, unsigned long size,
+                    unsigned int type, char increment);
+extern int mtrr_add_page (unsigned long base, unsigned long size,
+                    unsigned int type, char increment);
+extern int mtrr_del (int reg, unsigned long base, unsigned long size);
+extern int mtrr_del_page (int reg, unsigned long base, unsigned long size);
+extern void mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi);
+#  else
+static __inline__ int mtrr_add (unsigned long base, unsigned long size,
+                               unsigned int type, char increment)
+{
+    return -ENODEV;
+}
+static __inline__ int mtrr_add_page (unsigned long base, unsigned long size,
+                               unsigned int type, char increment)
+{
+    return -ENODEV;
+}
+static __inline__ int mtrr_del (int reg, unsigned long base,
+                               unsigned long size)
+{
+    return -ENODEV;
+}
+static __inline__ int mtrr_del_page (int reg, unsigned long base,
+                               unsigned long size)
+{
+    return -ENODEV;
+}
+
+static __inline__ void mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi) {;}
+
+#  endif
+
+/*  The following functions are for initialisation: don't use them!  */
+extern int mtrr_init (void);
+#  if defined(CONFIG_SMP) && defined(CONFIG_MTRR)
+extern void mtrr_init_boot_cpu (void);
+extern void mtrr_init_secondary_cpu (void);
+#  endif
+
+#endif
+
+#endif  /*  _LINUX_MTRR_H  */
diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/namei.h b/xenolinux-2.4.16-sparse/include/asm-xeno/namei.h
new file mode 100644 (file)
index 0000000..8148650
--- /dev/null
@@ -0,0 +1,17 @@
+/* $Id: namei.h,v 1.1 1996/12/13 14:48:21 jj Exp $
+ * linux/include/asm-i386/namei.h
+ *
+ * Included from linux/fs/namei.c
+ */
+
+#ifndef __I386_NAMEI_H
+#define __I386_NAMEI_H
+
+/* This dummy routine maybe changed to something useful
+ * for /usr/gnemul/ emulation stuff.
+ * Look at asm-sparc/namei.h for details.
+ */
+
+#define __emul_prefix() NULL
+
+#endif /* __I386_NAMEI_H */
diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/page.h b/xenolinux-2.4.16-sparse/include/asm-xeno/page.h
new file mode 100644 (file)
index 0000000..74c8824
--- /dev/null
@@ -0,0 +1,147 @@
+#ifndef _I386_PAGE_H
+#define _I386_PAGE_H
+
+/* PAGE_SHIFT determines the page size */
+#define PAGE_SHIFT     12
+#define PAGE_SIZE      (1UL << PAGE_SHIFT)
+#define PAGE_MASK      (~(PAGE_SIZE-1))
+
+#ifdef __KERNEL__
+#ifndef __ASSEMBLY__
+
+#include <linux/config.h>
+#include <asm/hypervisor.h>
+
+#ifdef CONFIG_X86_USE_3DNOW
+
+#include <asm/mmx.h>
+
+#define clear_page(page)       mmx_clear_page((void *)(page))
+#define copy_page(to,from)     mmx_copy_page(to,from)
+
+#else
+
+/*
+ *     On older X86 processors its not a win to use MMX here it seems.
+ *     Maybe the K6-III ?
+ */
+#define clear_page(page)       memset((void *)(page), 0, PAGE_SIZE)
+#define copy_page(to,from)     memcpy((void *)(to), (void *)(from), PAGE_SIZE)
+
+#endif
+
+#define clear_user_page(page, vaddr)   clear_page(page)
+#define copy_user_page(to, from, vaddr)        copy_page(to, from)
+
+/*
+ * These are used to make use of C type-checking..
+ */
+#if CONFIG_X86_PAE
+typedef struct { unsigned long pte_low, pte_high; } pte_t;
+typedef struct { unsigned long long pmd; } pmd_t;
+typedef struct { unsigned long long pgd; } pgd_t;
+#define pte_val(x)     ((x).pte_low | ((unsigned long long)(x).pte_high << 32))
+#else
+typedef struct { unsigned long pte_low; } pte_t;
+typedef struct { unsigned long pmd; } pmd_t;
+typedef struct { unsigned long pgd; } pgd_t;
+static inline unsigned long pte_val(pte_t x)
+{
+    unsigned long ret = x.pte_low;
+    if ( (ret & 1) ) ret -= start_info.phys_base;
+    return ret;
+}
+#endif
+#define PTE_MASK       PAGE_MASK
+
+typedef struct { unsigned long pgprot; } pgprot_t;
+
+static inline unsigned long pmd_val(pmd_t x)
+{
+    unsigned long ret = x.pmd;
+    if ( (ret & 1) ) ret -= start_info.phys_base;
+    return ret;
+}
+#define pgd_val(x)     ({ BUG(); (unsigned long)0; })
+#define pgprot_val(x)  ((x).pgprot)
+
+static inline pte_t __pte(unsigned long x)
+{
+    if ( (x & 1) ) x += start_info.phys_base;
+    return ((pte_t) { (x) });
+}
+static inline pmd_t __pmd(unsigned long x)
+{
+    if ( (x & 1) ) x += start_info.phys_base;
+    return ((pmd_t) { (x) });
+}
+#define __pgd(x) ({ BUG(); (pgprot_t) { 0 }; })
+#define __pgprot(x)    ((pgprot_t) { (x) } )
+
+#endif /* !__ASSEMBLY__ */
+
+/* to align the pointer to the (next) page boundary */
+#define PAGE_ALIGN(addr)       (((addr)+PAGE_SIZE-1)&PAGE_MASK)
+
+/*
+ * This handles the memory map.. We could make this a config
+ * option, but too many people screw it up, and too few need
+ * it.
+ *
+ * A __PAGE_OFFSET of 0xC0000000 means that the kernel has
+ * a virtual address space of one gigabyte, which limits the
+ * amount of physical memory you can use to about 950MB. 
+ *
+ * If you want more physical memory than this then see the CONFIG_HIGHMEM4G
+ * and CONFIG_HIGHMEM64G options in the kernel configuration.
+ */
+
+#define __PAGE_OFFSET          (0xC0000000)
+
+#ifndef __ASSEMBLY__
+
+/*
+ * Tell the user there is some problem. Beep too, so we can
+ * see^H^H^Hhear bugs in early bootup as well!
+ */
+
+#ifdef CONFIG_DEBUG_BUGVERBOSE
+extern void do_BUG(const char *file, int line);
+#define BUG() do {                                     \
+       do_BUG(__FILE__, __LINE__);                     \
+       __asm__ __volatile__("ud2");                    \
+} while (0)
+#else
+#define BUG() __asm__ __volatile__(".byte 0x0f,0x0b")
+#endif
+
+#define PAGE_BUG(page) do { \
+       BUG(); \
+} while (0)
+
+/* Pure 2^n version of get_order */
+static __inline__ int get_order(unsigned long size)
+{
+       int order;
+
+       size = (size-1) >> (PAGE_SHIFT-1);
+       order = -1;
+       do {
+               size >>= 1;
+               order++;
+       } while (size);
+       return order;
+}
+
+#endif /* __ASSEMBLY__ */
+
+#define PAGE_OFFSET            ((unsigned long)__PAGE_OFFSET)
+#define __pa(x)                        ((unsigned long)(x)-PAGE_OFFSET)
+#define __va(x)                        ((void *)((unsigned long)(x)+PAGE_OFFSET))
+#define virt_to_page(kaddr)    (mem_map + (__pa(kaddr) >> PAGE_SHIFT))
+#define VALID_PAGE(page)       ((page - mem_map) < max_mapnr)
+
+#endif /* __KERNEL__ */
+
+#endif /* _I386_PAGE_H */
diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/param.h b/xenolinux-2.4.16-sparse/include/asm-xeno/param.h
new file mode 100644 (file)
index 0000000..1b10bf4
--- /dev/null
@@ -0,0 +1,24 @@
+#ifndef _ASMi386_PARAM_H
+#define _ASMi386_PARAM_H
+
+#ifndef HZ
+#define HZ 100
+#endif
+
+#define EXEC_PAGESIZE  4096
+
+#ifndef NGROUPS
+#define NGROUPS                32
+#endif
+
+#ifndef NOGROUP
+#define NOGROUP                (-1)
+#endif
+
+#define MAXHOSTNAMELEN 64      /* max length of hostname */
+
+#ifdef __KERNEL__
+# define CLOCKS_PER_SEC        100     /* frequency at which times() counts */
+#endif
+
+#endif
diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/parport.h b/xenolinux-2.4.16-sparse/include/asm-xeno/parport.h
new file mode 100644 (file)
index 0000000..fa0e321
--- /dev/null
@@ -0,0 +1,18 @@
+/*
+ * parport.h: ia32-specific parport initialisation
+ *
+ * Copyright (C) 1999, 2000  Tim Waugh <tim@cyberelk.demon.co.uk>
+ *
+ * This file should only be included by drivers/parport/parport_pc.c.
+ */
+
+#ifndef _ASM_I386_PARPORT_H
+#define _ASM_I386_PARPORT_H 1
+
+static int __devinit parport_pc_find_isa_ports (int autoirq, int autodma);
+static int __devinit parport_pc_find_nonpci_ports (int autoirq, int autodma)
+{
+       return parport_pc_find_isa_ports (autoirq, autodma);
+}
+
+#endif /* !(_ASM_I386_PARPORT_H) */
diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/pgalloc.h b/xenolinux-2.4.16-sparse/include/asm-xeno/pgalloc.h
new file mode 100644 (file)
index 0000000..da6bf84
--- /dev/null
@@ -0,0 +1,185 @@
+#ifndef _I386_PGALLOC_H
+#define _I386_PGALLOC_H
+
+#include <linux/config.h>
+#include <asm/processor.h>
+#include <asm/hypervisor.h>
+#include <asm/fixmap.h>
+#include <linux/threads.h>
+
+#define pgd_quicklist (current_cpu_data.pgd_quick)
+#define pmd_quicklist (current_cpu_data.pmd_quick)
+#define pte_quicklist (current_cpu_data.pte_quick)
+#define pgtable_cache_size (current_cpu_data.pgtable_cache_sz)
+
+#define pmd_populate(mm, pmd, pte) \
+ set_pmd(pmd, __pmd(_PAGE_TABLE + __pa(pte)))
+
+static __inline__ pgd_t *get_pgd_slow(void)
+{
+    pgd_t *pgd = (pgd_t *)__get_free_page(GFP_KERNEL);
+    pgd_t *kpgd;
+    pmd_t *kpmd;
+    pte_t *kpte;
+
+    if (pgd) {
+        memset(pgd, 0, USER_PTRS_PER_PGD * sizeof(pgd_t));
+        memcpy(pgd + USER_PTRS_PER_PGD, 
+               init_mm.pgd + USER_PTRS_PER_PGD, 
+               (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t));
+        kpgd = pgd_offset_k((unsigned long)pgd);
+        kpmd = pmd_offset(kpgd, (unsigned long)pgd);
+        kpte = pte_offset(kpmd, (unsigned long)pgd);
+        queue_l1_entry_update(__pa(kpte), (*(unsigned long *)kpte)&~_PAGE_RW);
+        queue_baseptr_create(__pa(pgd));
+    }
+
+    return pgd;
+}
+
+#if 0
+static __inline__ pgd_t *get_pgd_fast(void)
+{
+    unsigned long *ret;
+
+    if ((ret = pgd_quicklist) != NULL) {
+        pgd_quicklist = (unsigned long *)(*ret);
+        ret[0] = 0;
+        pgtable_cache_size--;
+    } else
+        ret = (unsigned long *)get_pgd_slow();
+    return (pgd_t *)ret;
+}
+
+static __inline__ void free_pgd_fast(pgd_t *pgd)
+{
+    *(unsigned long *)pgd = (unsigned long) pgd_quicklist;
+    pgd_quicklist = (unsigned long *) pgd;
+    pgtable_cache_size++;
+}
+#else
+#define get_pgd_fast get_pgd_slow
+#define free_pgd_fast free_pgd_slow
+#endif
+
+static __inline__ void free_pgd_slow(pgd_t *pgd)
+{
+    pgd_t *kpgd;
+    pmd_t *kpmd;
+    pte_t *kpte;
+    queue_baseptr_remove(__pa(pgd));
+    kpgd = pgd_offset_k((unsigned long)pgd);
+    kpmd = pmd_offset(kpgd, (unsigned long)pgd);
+    kpte = pte_offset(kpmd, (unsigned long)pgd);
+    queue_l1_entry_update(__pa(kpte), (*(unsigned long *)kpte)|_PAGE_RW);
+    free_page((unsigned long)pgd);
+}
+
+static inline pte_t *pte_alloc_one(struct mm_struct *mm, unsigned long address)
+{
+    pte_t *pte;
+    pgd_t *kpgd;
+    pmd_t *kpmd;
+    pte_t *kpte;
+
+    pte = (pte_t *) __get_free_page(GFP_KERNEL);
+    if (pte)
+    {
+        clear_page(pte);
+        kpgd = pgd_offset_k((unsigned long)pte);
+        kpmd = pmd_offset(kpgd, (unsigned long)pte);
+        kpte = pte_offset(kpmd, (unsigned long)pte);
+        queue_l1_entry_update(__pa(kpte), (*(unsigned long *)kpte)&~_PAGE_RW);
+    }
+    return pte;
+}
+
+#if 0
+static inline pte_t *pte_alloc_one_fast(struct mm_struct *mm, unsigned long address)
+{
+    unsigned long *ret;
+
+    if ((ret = (unsigned long *)pte_quicklist) != NULL) {
+        pte_quicklist = (unsigned long *)(*ret);
+        ret[0] = ret[1];
+        pgtable_cache_size--;
+    }
+    return (pte_t *)ret;
+}
+
+static __inline__ void pte_free_fast(pte_t *pte)
+{
+    *(unsigned long *)pte = (unsigned long) pte_quicklist;
+    pte_quicklist = (unsigned long *) pte;
+    pgtable_cache_size++;
+}
+#else
+#define pte_alloc_one_fast pte_alloc_one
+#define pte_free_fast pte_free_slow
+#endif
+
+static __inline__ void pte_free_slow(pte_t *pte)
+{
+    pgd_t *kpgd;
+    pmd_t *kpmd;
+    pte_t *kpte;
+    kpgd = pgd_offset_k((unsigned long)pte);
+    kpmd = pmd_offset(kpgd, (unsigned long)pte);
+    kpte = pte_offset(kpmd, (unsigned long)pte);
+    queue_l1_entry_update(__pa(kpte), (*(unsigned long *)kpte)|_PAGE_RW);
+    free_page((unsigned long)pte);
+}
+
+#define pte_free(pte)          pte_free_fast(pte)
+#define pgd_alloc(mm)          get_pgd_fast()
+#define pgd_free(pgd)          free_pgd_fast(pgd)
+
+#define pmd_alloc_one_fast(mm, addr)   ({ BUG(); ((pmd_t *)1); })
+#define pmd_alloc_one(mm, addr)                ({ BUG(); ((pmd_t *)2); })
+#define pmd_free_slow(x)               do { } while (0)
+#define pmd_free_fast(x)               do { } while (0)
+#define pmd_free(x)                    do { } while (0)
+#define pgd_populate(mm, pmd, pte)     BUG()
+
+extern int do_check_pgt_cache(int, int);
+
+/*
+ *  - flush_tlb() flushes the current mm struct TLBs
+ *  - flush_tlb_all() flushes all processes TLBs
+ *  - flush_tlb_mm(mm) flushes the specified mm context TLB's
+ *  - flush_tlb_page(vma, vmaddr) flushes one page
+ *  - flush_tlb_range(mm, start, end) flushes a range of pages
+ *  - flush_tlb_pgtables(mm, start, end) flushes a range of page tables
+ */
+
+#define flush_tlb() __flush_tlb()
+#define flush_tlb_all() __flush_tlb_all()
+#define local_flush_tlb() __flush_tlb()
+
+static inline void flush_tlb_mm(struct mm_struct *mm)
+{
+    if (mm == current->active_mm)
+        __flush_tlb();
+}
+
+static inline void flush_tlb_page(struct vm_area_struct *vma,
+                                  unsigned long addr)
+{
+    if (vma->vm_mm == current->active_mm)
+        __flush_tlb_one(addr);
+}
+
+static inline void flush_tlb_range(struct mm_struct *mm,
+                                   unsigned long start, unsigned long end)
+{
+    if (mm == current->active_mm)
+        __flush_tlb();
+}
+
+static inline void flush_tlb_pgtables(struct mm_struct *mm,
+                                     unsigned long start, unsigned long end)
+{
+    /* i386 does not keep any page table caches in TLB */
+}
+
+#endif /* _I386_PGALLOC_H */
diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/pgtable-2level.h b/xenolinux-2.4.16-sparse/include/asm-xeno/pgtable-2level.h
new file mode 100644 (file)
index 0000000..735ac99
--- /dev/null
@@ -0,0 +1,70 @@
+#ifndef _I386_PGTABLE_2LEVEL_H
+#define _I386_PGTABLE_2LEVEL_H
+
+/*
+ * traditional i386 two-level paging structure:
+ */
+
+#define PGDIR_SHIFT    22
+#define PTRS_PER_PGD   1024
+
+/*
+ * the i386 is two-level, so we don't really have any
+ * PMD directory physically.
+ */
+#define PMD_SHIFT      22
+#define PTRS_PER_PMD   1
+
+#define PTRS_PER_PTE   1024
+
+#define pte_ERROR(e) \
+       printk("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, (e).pte_low)
+#define pmd_ERROR(e) \
+       printk("%s:%d: bad pmd %08lx.\n", __FILE__, __LINE__, pmd_val(e))
+#define pgd_ERROR(e) \
+       printk("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e))
+
+/*
+ * The "pgd_xxx()" functions here are trivial for a folded two-level
+ * setup: the pgd is never bad, and a pmd always exists (as it's folded
+ * into the pgd entry)
+ */
+static inline int pgd_none(pgd_t pgd)          { return 0; }
+static inline int pgd_bad(pgd_t pgd)           { return 0; }
+static inline int pgd_present(pgd_t pgd)       { return 1; }
+#define pgd_clear(xp)                          do { } while (0)
+
+#define set_pte(pteptr, pteval) queue_l1_entry_update(__pa(pteptr), (pteval).pte_low)
+#define set_pmd(pmdptr, pmdval) queue_l2_entry_update(__pa(pmdptr), (pmdval).pmd)
+#define set_pgd(pgdptr, pgdval) ((void)0)
+
+#define pgd_page(pgd) \
+((unsigned long) __va(pgd_val(pgd) & PAGE_MASK))
+
+static inline pmd_t * pmd_offset(pgd_t * dir, unsigned long address)
+{
+       return (pmd_t *) dir;
+}
+
+/*
+ * A note on implementation of this atomic 'get-and-clear' operation.
+ * This is actually very simple because XenoLinux can only run on a single
+ * processor. Therefore, we cannot race other processors setting the 'accessed'
+ * or 'dirty' bits on a page-table entry.
+ * Even if pages are shared between domains, that is not a problem because
+ * each domain will have separate page tables, with their own versions of
+ * accessed & dirty state.
+ */
+static inline pte_t ptep_get_and_clear(pte_t *xp)
+{
+    pte_t pte = *xp;
+    queue_l1_entry_update(__pa(xp), 0);
+    return pte;
+}
+
+#define pte_same(a, b)         ((a).pte_low == (b).pte_low)
+#define pte_page(x)            (mem_map+((unsigned long)((pte_val(x) >> PAGE_SHIFT))))
+#define pte_none(x)            (!(x).pte_low)
+#define __mk_pte(page_nr,pgprot) __pte(((page_nr) << PAGE_SHIFT) | pgprot_val(pgprot))
+
+#endif /* _I386_PGTABLE_2LEVEL_H */
diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/pgtable-3level.h b/xenolinux-2.4.16-sparse/include/asm-xeno/pgtable-3level.h
new file mode 100644 (file)
index 0000000..22c69d2
--- /dev/null
@@ -0,0 +1,103 @@
+#ifndef _I386_PGTABLE_3LEVEL_H
+#define _I386_PGTABLE_3LEVEL_H
+
+#error "PAE unsupported"
+
+/*
+ * Intel Physical Address Extension (PAE) Mode - three-level page
+ * tables on PPro+ CPUs.
+ *
+ * Copyright (C) 1999 Ingo Molnar <mingo@redhat.com>
+ */
+
+/*
+ * PGDIR_SHIFT determines what a top-level page table entry can map
+ */
+#define PGDIR_SHIFT    30
+#define PTRS_PER_PGD   4
+
+/*
+ * PMD_SHIFT determines the size of the area a middle-level
+ * page table can map
+ */
+#define PMD_SHIFT      21
+#define PTRS_PER_PMD   512
+
+/*
+ * entries per page directory level
+ */
+#define PTRS_PER_PTE   512
+
+#define pte_ERROR(e) \
+       printk("%s:%d: bad pte %p(%08lx%08lx).\n", __FILE__, __LINE__, &(e), (e).pte_high, (e).pte_low)
+#define pmd_ERROR(e) \
+       printk("%s:%d: bad pmd %p(%016Lx).\n", __FILE__, __LINE__, &(e), pmd_val(e))
+#define pgd_ERROR(e) \
+       printk("%s:%d: bad pgd %p(%016Lx).\n", __FILE__, __LINE__, &(e), pgd_val(e))
+
+static inline int pgd_none(pgd_t pgd)          { return 0; }
+static inline int pgd_bad(pgd_t pgd)           { return 0; }
+static inline int pgd_present(pgd_t pgd)       { return 1; }
+
+/* Rules for using set_pte: the pte being assigned *must* be
+ * either not present or in a state where the hardware will
+ * not attempt to update the pte.  In places where this is
+ * not possible, use pte_get_and_clear to obtain the old pte
+ * value and then use set_pte to update it.  -ben
+ */
+static inline void set_pte(pte_t *ptep, pte_t pte)
+{
+       ptep->pte_high = pte.pte_high;
+       smp_wmb();
+       ptep->pte_low = pte.pte_low;
+}
+#define set_pmd(pmdptr,pmdval) \
+               set_64bit((unsigned long long *)(pmdptr),pmd_val(pmdval))
+#define set_pgd(pgdptr,pgdval) \
+               set_64bit((unsigned long long *)(pgdptr),pgd_val(pgdval))
+
+/*
+ * Pentium-II erratum A13: in PAE mode we explicitly have to flush
+ * the TLB via cr3 if the top-level pgd is changed...
+ * We do not let the generic code free and clear pgd entries due to
+ * this erratum.
+ */
+static inline void pgd_clear (pgd_t * pgd) { }
+
+#define pgd_page(pgd) \
+((unsigned long) __va(pgd_val(pgd) & PAGE_MASK))
+
+/* Find an entry in the second-level page table.. */
+#define pmd_offset(dir, address) ((pmd_t *) pgd_page(*(dir)) + \
+                       __pmd_offset(address))
+
+static inline pte_t ptep_get_and_clear(pte_t *ptep)
+{
+       pte_t res;
+
+       /* xchg acts as a barrier before the setting of the high bits */
+       res.pte_low = xchg(&ptep->pte_low, 0);
+       res.pte_high = ptep->pte_high;
+       ptep->pte_high = 0;
+
+       return res;
+}
+
+static inline int pte_same(pte_t a, pte_t b)
+{
+       return a.pte_low == b.pte_low && a.pte_high == b.pte_high;
+}
+
+#define pte_page(x)    (mem_map+(((x).pte_low >> PAGE_SHIFT) | ((x).pte_high << (32 - PAGE_SHIFT))))
+#define pte_none(x)    (!(x).pte_low && !(x).pte_high)
+
+static inline pte_t __mk_pte(unsigned long page_nr, pgprot_t pgprot)
+{
+       pte_t pte;
+
+       pte.pte_high = page_nr >> (32 - PAGE_SHIFT);
+       pte.pte_low = (page_nr << PAGE_SHIFT) | pgprot_val(pgprot);
+       return pte;
+}
+
+#endif /* _I386_PGTABLE_3LEVEL_H */
diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/pgtable.h b/xenolinux-2.4.16-sparse/include/asm-xeno/pgtable.h
new file mode 100644 (file)
index 0000000..2eaa287
--- /dev/null
@@ -0,0 +1,309 @@
+#ifndef _I386_PGTABLE_H
+#define _I386_PGTABLE_H
+
+#include <linux/config.h>
+
+/*
+ * The Linux memory management assumes a three-level page table setup. On
+ * the i386, we use that, but "fold" the mid level into the top-level page
+ * table, so that we physically have the same two-level page table as the
+ * i386 mmu expects.
+ *
+ * This file contains the functions and defines necessary to modify and use
+ * the i386 page table tree.
+ */
+#ifndef __ASSEMBLY__
+#include <asm/processor.h>
+#include <asm/hypervisor.h>
+#include <asm/fixmap.h>
+#include <linux/threads.h>
+
+#ifndef _I386_BITOPS_H
+#include <asm/bitops.h>
+#endif
+
+#define swapper_pg_dir 0
+extern void paging_init(void);
+
+/* Caches aren't brain-dead on the intel. */
+#define flush_cache_all()                      do { } while (0)
+#define flush_cache_mm(mm)                     do { } while (0)
+#define flush_cache_range(mm, start, end)      do { } while (0)
+#define flush_cache_page(vma, vmaddr)          do { } while (0)
+#define flush_page_to_ram(page)                        do { } while (0)
+#define flush_dcache_page(page)                        do { } while (0)
+#define flush_icache_range(start, end)         do { } while (0)
+#define flush_icache_page(vma,pg)              do { } while (0)
+
+extern unsigned long pgkern_mask;
+
+#define __flush_tlb() queue_tlb_flush()
+#define __flush_tlb_global() __flush_tlb()
+#define __flush_tlb_all() __flush_tlb_global()
+#define __flush_tlb_one(addr) queue_tlb_flush_one(addr)
+
+/*
+ * ZERO_PAGE is a global shared page that is always zero: used
+ * for zero-mapped memory areas etc..
+ */
+extern unsigned long empty_zero_page[1024];
+#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page))
+
+#endif /* !__ASSEMBLY__ */
+
+/*
+ * The Linux x86 paging architecture is 'compile-time dual-mode', it
+ * implements both the traditional 2-level x86 page tables and the
+ * newer 3-level PAE-mode page tables.
+ */
+#ifndef __ASSEMBLY__
+#if CONFIG_X86_PAE
+# include <asm/pgtable-3level.h>
+
+/*
+ * Need to initialise the X86 PAE caches
+ */
+extern void pgtable_cache_init(void);
+
+#else
+# include <asm/pgtable-2level.h>
+
+/*
+ * No page table caches to initialise
+ */
+#define pgtable_cache_init()   do { } while (0)
+
+#endif
+#endif
+
+#define PMD_SIZE       (1UL << PMD_SHIFT)
+#define PMD_MASK       (~(PMD_SIZE-1))
+#define PGDIR_SIZE     (1UL << PGDIR_SHIFT)
+#define PGDIR_MASK     (~(PGDIR_SIZE-1))
+
+#define USER_PTRS_PER_PGD      (TASK_SIZE/PGDIR_SIZE)
+#define FIRST_USER_PGD_NR      0
+
+#define USER_PGD_PTRS (PAGE_OFFSET >> PGDIR_SHIFT)
+#define KERNEL_PGD_PTRS (PTRS_PER_PGD-USER_PGD_PTRS)
+
+#define TWOLEVEL_PGDIR_SHIFT   22
+#define BOOT_USER_PGD_PTRS (__PAGE_OFFSET >> TWOLEVEL_PGDIR_SHIFT)
+#define BOOT_KERNEL_PGD_PTRS (1024-BOOT_USER_PGD_PTRS)
+
+
+#ifndef __ASSEMBLY__
+/* Just any arbitrary offset to the start of the vmalloc VM area: the
+ * current 8MB value just means that there will be a 8MB "hole" after the
+ * physical memory until the kernel virtual memory starts.  That means that
+ * any out-of-bounds memory accesses will hopefully be caught.
+ * The vmalloc() routines leaves a hole of 4kB between each vmalloced
+ * area for the same reason. ;)
+ */
+#define VMALLOC_OFFSET (8*1024*1024)
+#define VMALLOC_START  (((unsigned long) high_memory + 2*VMALLOC_OFFSET-1) & \
+                                               ~(VMALLOC_OFFSET-1))
+#define VMALLOC_VMADDR(x) ((unsigned long)(x))
+#if CONFIG_HIGHMEM
+# define VMALLOC_END   (PKMAP_BASE-2*PAGE_SIZE)
+#else
+# define VMALLOC_END   (FIXADDR_START-2*PAGE_SIZE)
+#endif
+
+#define _PAGE_BIT_PRESENT      0
+#define _PAGE_BIT_RW           1
+#define _PAGE_BIT_USER         2
+#define _PAGE_BIT_PWT          3
+#define _PAGE_BIT_PCD          4
+#define _PAGE_BIT_ACCESSED     5
+#define _PAGE_BIT_DIRTY                6
+#define _PAGE_BIT_PSE          7       /* 4 MB (or 2MB) page, Pentium+, if present.. */
+#define _PAGE_BIT_GLOBAL       8       /* Global TLB entry PPro+ */
+
+#define _PAGE_PRESENT  0x001
+#define _PAGE_RW       0x002
+#define _PAGE_USER     0x004
+#define _PAGE_PWT      0x008
+#define _PAGE_PCD      0x010
+#define _PAGE_ACCESSED 0x020
+#define _PAGE_DIRTY    0x040
+#define _PAGE_PSE      0x080   /* 4 MB (or 2MB) page, Pentium+, if present.. */
+#define _PAGE_GLOBAL   0x100   /* Global TLB entry PPro+ */
+
+#define _PAGE_PROTNONE 0x080   /* If not present */
+
+#define _PAGE_TABLE    (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED | _PAGE_DIRTY)
+#define _KERNPG_TABLE  (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
+#define _PAGE_CHG_MASK (PTE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY)
+
+#define PAGE_NONE      __pgprot(_PAGE_PROTNONE | _PAGE_ACCESSED)
+#define PAGE_SHARED    __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED)
+#define PAGE_COPY      __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED)
+#define PAGE_READONLY  __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED)
+
+#define __PAGE_KERNEL \
+       (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED)
+#define __PAGE_KERNEL_NOCACHE \
+       (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_PCD | _PAGE_ACCESSED)
+#define __PAGE_KERNEL_RO \
+       (_PAGE_PRESENT | _PAGE_DIRTY | _PAGE_ACCESSED)
+
+#if 0
+#define MAKE_GLOBAL(x) __pgprot((x) | _PAGE_GLOBAL)
+#else
+#define MAKE_GLOBAL(x) __pgprot(x)
+#endif
+
+#define PAGE_KERNEL MAKE_GLOBAL(__PAGE_KERNEL)
+#define PAGE_KERNEL_RO MAKE_GLOBAL(__PAGE_KERNEL_RO)
+#define PAGE_KERNEL_NOCACHE MAKE_GLOBAL(__PAGE_KERNEL_NOCACHE)
+
+/*
+ * The i386 can't do page protection for execute, and considers that
+ * the same are read. Also, write permissions imply read permissions.
+ * This is the closest we can get..
+ */
+#define __P000 PAGE_NONE
+#define __P001 PAGE_READONLY
+#define __P010 PAGE_COPY
+#define __P011 PAGE_COPY
+#define __P100 PAGE_READONLY
+#define __P101 PAGE_READONLY
+#define __P110 PAGE_COPY
+#define __P111 PAGE_COPY
+
+#define __S000 PAGE_NONE
+#define __S001 PAGE_READONLY
+#define __S010 PAGE_SHARED
+#define __S011 PAGE_SHARED
+#define __S100 PAGE_READONLY
+#define __S101 PAGE_READONLY
+#define __S110 PAGE_SHARED
+#define __S111 PAGE_SHARED
+
+#define pte_present(x) ((x).pte_low & (_PAGE_PRESENT | _PAGE_PROTNONE))
+#define pte_clear(xp)  queue_l1_entry_update(__pa(xp), 0)
+
+#define pmd_none(x)    (!(x).pmd)
+#define pmd_present(x) ((x).pmd & _PAGE_PRESENT)
+#define pmd_clear(xp)  do { set_pmd(xp, __pmd(0)); } while (0)
+#define        pmd_bad(x)      (((x).pmd & (~PAGE_MASK & ~_PAGE_USER)) != _KERNPG_TABLE)
+
+/*
+ * Permanent address of a page. Obviously must never be
+ * called on a highmem page.
+ */
+#define page_address(page) ((page)->virtual)
+#define pages_to_mb(x) ((x) >> (20-PAGE_SHIFT))
+
+/*
+ * The following only work if pte_present() is true.
+ * Undefined behaviour if not..
+ */
+static inline int pte_read(pte_t pte)          { return (pte).pte_low & _PAGE_USER; }
+static inline int pte_exec(pte_t pte)          { return (pte).pte_low & _PAGE_USER; }
+static inline int pte_dirty(pte_t pte)         { return (pte).pte_low & _PAGE_DIRTY; }
+static inline int pte_young(pte_t pte)         { return (pte).pte_low & _PAGE_ACCESSED; }
+static inline int pte_write(pte_t pte)         { return (pte).pte_low & _PAGE_RW; }
+
+static inline pte_t pte_rdprotect(pte_t pte)   { (pte).pte_low &= ~_PAGE_USER; return pte; }
+static inline pte_t pte_exprotect(pte_t pte)   { (pte).pte_low &= ~_PAGE_USER; return pte; }
+static inline pte_t pte_mkclean(pte_t pte)     { (pte).pte_low &= ~_PAGE_DIRTY; return pte; }
+static inline pte_t pte_mkold(pte_t pte)       { (pte).pte_low &= ~_PAGE_ACCESSED; return pte; }
+static inline pte_t pte_wrprotect(pte_t pte)   { (pte).pte_low &= ~_PAGE_RW; return pte; }
+static inline pte_t pte_mkread(pte_t pte)      { (pte).pte_low |= _PAGE_USER; return pte; }
+static inline pte_t pte_mkexec(pte_t pte)      { (pte).pte_low |= _PAGE_USER; return pte; }
+static inline pte_t pte_mkdirty(pte_t pte)     { (pte).pte_low |= _PAGE_DIRTY; return pte; }
+static inline pte_t pte_mkyoung(pte_t pte)     { (pte).pte_low |= _PAGE_ACCESSED; return pte; }
+static inline pte_t pte_mkwrite(pte_t pte)     { (pte).pte_low |= _PAGE_RW; return pte; }
+
+static inline int ptep_test_and_clear_dirty(pte_t *ptep)
+{
+    unsigned long pteval = *(unsigned long *)ptep;
+    int ret = pteval & _PAGE_DIRTY;
+    if ( ret ) queue_l1_entry_update(__pa(ptep), pteval & ~_PAGE_DIRTY);
+    return ret;
+}
+static inline  int ptep_test_and_clear_young(pte_t *ptep)
+{
+    unsigned long pteval = *(unsigned long *)ptep;
+    int ret = pteval & _PAGE_ACCESSED;
+    if ( ret ) queue_l1_entry_update(__pa(ptep), pteval & ~_PAGE_ACCESSED);
+    return ret;
+}
+static inline void ptep_set_wrprotect(pte_t *ptep)
+{
+    unsigned long pteval = *(unsigned long *)ptep;
+    if ( (pteval & _PAGE_RW) )
+        queue_l1_entry_update(__pa(ptep), pteval & ~_PAGE_RW);
+}
+static inline void ptep_mkdirty(pte_t *ptep)
+{
+    unsigned long pteval = *(unsigned long *)ptep;
+    if ( !(pteval & _PAGE_DIRTY) )
+        queue_l1_entry_update(__pa(ptep), pteval | _PAGE_DIRTY);
+}
+
+/*
+ * Conversion functions: convert a page and protection to a page entry,
+ * and a page entry and page directory to the page they refer to.
+ */
+
+#define mk_pte(page, pgprot)   __mk_pte((page) - mem_map, (pgprot))
+
+/* This takes a physical page address that is used by the remapping functions */
+#define mk_pte_phys(physpage, pgprot)  __mk_pte((physpage) >> PAGE_SHIFT, pgprot)
+
+static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
+{
+       pte.pte_low &= _PAGE_CHG_MASK;
+       pte.pte_low |= pgprot_val(newprot);
+       return pte;
+}
+
+#define page_pte(page) page_pte_prot(page, __pgprot(0))
+
+#define pmd_page(pmd) \
+((unsigned long) __va(pmd_val(pmd) & PAGE_MASK))
+
+/* to find an entry in a page-table-directory. */
+#define pgd_index(address) ((address >> PGDIR_SHIFT) & (PTRS_PER_PGD-1))
+
+#define __pgd_offset(address) pgd_index(address)
+
+#define pgd_offset(mm, address) ((mm)->pgd+pgd_index(address))
+
+/* to find an entry in a kernel page-table-directory */
+#define pgd_offset_k(address) pgd_offset(&init_mm, address)
+
+#define __pmd_offset(address) \
+               (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1))
+
+/* Find an entry in the third-level page table.. */
+#define __pte_offset(address) \
+               ((address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
+#define pte_offset(dir, address) ((pte_t *) pmd_page(*(dir)) + \
+                       __pte_offset(address))
+
+/*
+ * The i386 doesn't have any external MMU info: the kernel page
+ * tables contain all the necessary information.
+ */
+#define update_mmu_cache(vma,address,pte) do { } while (0)
+
+/* Encode and de-code a swap entry */
+#define SWP_TYPE(x)                    (((x).val >> 1) & 0x3f)
+#define SWP_OFFSET(x)                  ((x).val >> 8)
+#define SWP_ENTRY(type, offset)                ((swp_entry_t) { ((type) << 1) | ((offset) << 8) })
+#define pte_to_swp_entry(pte)          ((swp_entry_t) { (pte).pte_low })
+#define swp_entry_to_pte(x)            ((pte_t) { (x).val })
+
+#endif /* !__ASSEMBLY__ */
+
+/* Needs to be defined here and not in linux/mm.h, as it is arch dependent */
+#define PageSkip(page)         (0)
+#define kern_addr_valid(addr)  (1)
+
+#define io_remap_page_range remap_page_range
+
+#endif /* _I386_PGTABLE_H */
diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/poll.h b/xenolinux-2.4.16-sparse/include/asm-xeno/poll.h
new file mode 100644 (file)
index 0000000..e5feda7
--- /dev/null
@@ -0,0 +1,25 @@
+#ifndef __i386_POLL_H
+#define __i386_POLL_H
+
+/* These are specified by iBCS2 */
+#define POLLIN         0x0001
+#define POLLPRI                0x0002
+#define POLLOUT                0x0004
+#define POLLERR                0x0008
+#define POLLHUP                0x0010
+#define POLLNVAL       0x0020
+
+/* The rest seem to be more-or-less nonstandard. Check them! */
+#define POLLRDNORM     0x0040
+#define POLLRDBAND     0x0080
+#define POLLWRNORM     0x0100
+#define POLLWRBAND     0x0200
+#define POLLMSG                0x0400
+
+struct pollfd {
+       int fd;
+       short events;
+       short revents;
+};
+
+#endif
diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/posix_types.h b/xenolinux-2.4.16-sparse/include/asm-xeno/posix_types.h
new file mode 100644 (file)
index 0000000..5529f32
--- /dev/null
@@ -0,0 +1,80 @@
+#ifndef __ARCH_I386_POSIX_TYPES_H
+#define __ARCH_I386_POSIX_TYPES_H
+
+/*
+ * This file is generally used by user-level software, so you need to
+ * be a little careful about namespace pollution etc.  Also, we cannot
+ * assume GCC is being used.
+ */
+
+typedef unsigned short __kernel_dev_t;
+typedef unsigned long  __kernel_ino_t;
+typedef unsigned short __kernel_mode_t;
+typedef unsigned short __kernel_nlink_t;
+typedef long           __kernel_off_t;
+typedef int            __kernel_pid_t;
+typedef unsigned short __kernel_ipc_pid_t;
+typedef unsigned short __kernel_uid_t;
+typedef unsigned short __kernel_gid_t;
+typedef unsigned int   __kernel_size_t;
+typedef int            __kernel_ssize_t;
+typedef int            __kernel_ptrdiff_t;
+typedef long           __kernel_time_t;
+typedef long           __kernel_suseconds_t;
+typedef long           __kernel_clock_t;
+typedef int            __kernel_daddr_t;
+typedef char *         __kernel_caddr_t;
+typedef unsigned short __kernel_uid16_t;
+typedef unsigned short __kernel_gid16_t;
+typedef unsigned int   __kernel_uid32_t;
+typedef unsigned int   __kernel_gid32_t;
+
+typedef unsigned short __kernel_old_uid_t;
+typedef unsigned short __kernel_old_gid_t;
+
+#ifdef __GNUC__
+typedef long long      __kernel_loff_t;
+#endif
+
+typedef struct {
+#if defined(__KERNEL__) || defined(__USE_ALL)
+       int     val[2];
+#else /* !defined(__KERNEL__) && !defined(__USE_ALL) */
+       int     __val[2];
+#endif /* !defined(__KERNEL__) && !defined(__USE_ALL) */
+} __kernel_fsid_t;
+
+#if defined(__KERNEL__) || !defined(__GLIBC__) || (__GLIBC__ < 2)
+
+#undef __FD_SET
+#define __FD_SET(fd,fdsetp) \
+               __asm__ __volatile__("btsl %1,%0": \
+                       "=m" (*(__kernel_fd_set *) (fdsetp)):"r" ((int) (fd)))
+
+#undef __FD_CLR
+#define __FD_CLR(fd,fdsetp) \
+               __asm__ __volatile__("btrl %1,%0": \
+                       "=m" (*(__kernel_fd_set *) (fdsetp)):"r" ((int) (fd)))
+
+#undef __FD_ISSET
+#define __FD_ISSET(fd,fdsetp) (__extension__ ({ \
+               unsigned char __result; \
+               __asm__ __volatile__("btl %1,%2 ; setb %0" \
+                       :"=q" (__result) :"r" ((int) (fd)), \
+                       "m" (*(__kernel_fd_set *) (fdsetp))); \
+               __result; }))
+
+#undef __FD_ZERO
+#define __FD_ZERO(fdsetp) \
+do { \
+       int __d0, __d1; \
+       __asm__ __volatile__("cld ; rep ; stosl" \
+                       :"=m" (*(__kernel_fd_set *) (fdsetp)), \
+                         "=&c" (__d0), "=&D" (__d1) \
+                       :"a" (0), "1" (__FDSET_LONGS), \
+                       "2" ((__kernel_fd_set *) (fdsetp)) : "memory"); \
+} while (0)
+
+#endif /* defined(__KERNEL__) || !defined(__GLIBC__) || (__GLIBC__ < 2) */
+
+#endif
diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/processor.h b/xenolinux-2.4.16-sparse/include/asm-xeno/processor.h
new file mode 100644 (file)
index 0000000..3e65e19
--- /dev/null
@@ -0,0 +1,484 @@
+/*
+ * include/asm-i386/processor.h
+ *
+ * Copyright (C) 1994 Linus Torvalds
+ */
+
+#ifndef __ASM_I386_PROCESSOR_H
+#define __ASM_I386_PROCESSOR_H
+
+#include <asm/math_emu.h>
+#include <asm/segment.h>
+#include <asm/page.h>
+#include <asm/types.h>
+#include <asm/sigcontext.h>
+#include <asm/cpufeature.h>
+#include <linux/cache.h>
+#include <linux/config.h>
+#include <linux/threads.h>
+
+/*
+ * Default implementation of macro that returns current
+ * instruction pointer ("program counter").
+ */
+#define current_text_addr() ({ void *pc; __asm__("movl $1f,%0\n1:":"=g" (pc)); pc; })
+
+/*
+ *  CPU type and hardware bug flags. Kept separately for each CPU.
+ *  Members of this structure are referenced in head.S, so think twice
+ *  before touching them. [mj]
+ */
+
+struct cpuinfo_x86 {
+       __u8    x86;            /* CPU family */
+       __u8    x86_vendor;     /* CPU vendor */
+       __u8    x86_model;
+       __u8    x86_mask;
+       char    wp_works_ok;    /* It doesn't on 386's */
+       char    hlt_works_ok;   /* Problems on some 486Dx4's and old 386's */
+       char    hard_math;
+       char    rfu;
+               int     cpuid_level;    /* Maximum supported CPUID level, -1=no CPUID */
+       __u32   x86_capability[NCAPINTS];
+       char    x86_vendor_id[16];
+       char    x86_model_id[64];
+       int     x86_cache_size;  /* in KB - valid for CPUS which support this
+                                   call  */
+       int     fdiv_bug;
+       int     f00f_bug;
+       int     coma_bug;
+       unsigned long loops_per_jiffy;
+       unsigned long *pgd_quick;
+       unsigned long *pmd_quick;
+       unsigned long *pte_quick;
+       unsigned long pgtable_cache_sz;
+} __attribute__((__aligned__(SMP_CACHE_BYTES)));
+
+#define X86_VENDOR_INTEL 0
+#define X86_VENDOR_CYRIX 1
+#define X86_VENDOR_AMD 2
+#define X86_VENDOR_UMC 3
+#define X86_VENDOR_NEXGEN 4
+#define X86_VENDOR_CENTAUR 5
+#define X86_VENDOR_RISE 6
+#define X86_VENDOR_TRANSMETA 7
+#define X86_VENDOR_UNKNOWN 0xff
+
+/*
+ * capabilities of CPUs
+ */
+
+extern struct cpuinfo_x86 boot_cpu_data;
+extern struct tss_struct init_tss[NR_CPUS];
+
+#ifdef CONFIG_SMP
+extern struct cpuinfo_x86 cpu_data[];
+#define current_cpu_data cpu_data[smp_processor_id()]
+#else
+#define cpu_data (&boot_cpu_data)
+#define current_cpu_data boot_cpu_data
+#endif
+
+#define cpu_has_pge    (test_bit(X86_FEATURE_PGE,  boot_cpu_data.x86_capability))
+#define cpu_has_pse    (test_bit(X86_FEATURE_PSE,  boot_cpu_data.x86_capability))
+#define cpu_has_pae    (test_bit(X86_FEATURE_PAE,  boot_cpu_data.x86_capability))
+#define cpu_has_tsc    (test_bit(X86_FEATURE_TSC,  boot_cpu_data.x86_capability))
+#define cpu_has_de     (test_bit(X86_FEATURE_DE,   boot_cpu_data.x86_capability))
+#define cpu_has_vme    (test_bit(X86_FEATURE_VME,  boot_cpu_data.x86_capability))
+#define cpu_has_fxsr   (test_bit(X86_FEATURE_FXSR, boot_cpu_data.x86_capability))
+#define cpu_has_xmm    (test_bit(X86_FEATURE_XMM,  boot_cpu_data.x86_capability))
+#define cpu_has_fpu    (test_bit(X86_FEATURE_FPU,  boot_cpu_data.x86_capability))
+#define cpu_has_apic   (test_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability))
+
+extern char ignore_irq13;
+
+extern void identify_cpu(struct cpuinfo_x86 *);
+extern void print_cpu_info(struct cpuinfo_x86 *);
+
+/*
+ * EFLAGS bits
+ */
+#define X86_EFLAGS_CF  0x00000001 /* Carry Flag */
+#define X86_EFLAGS_PF  0x00000004 /* Parity Flag */
+#define X86_EFLAGS_AF  0x00000010 /* Auxillary carry Flag */
+#define X86_EFLAGS_ZF  0x00000040 /* Zero Flag */
+#define X86_EFLAGS_SF  0x00000080 /* Sign Flag */
+#define X86_EFLAGS_TF  0x00000100 /* Trap Flag */
+#define X86_EFLAGS_IF  0x00000200 /* Interrupt Flag */
+#define X86_EFLAGS_DF  0x00000400 /* Direction Flag */
+#define X86_EFLAGS_OF  0x00000800 /* Overflow Flag */
+#define X86_EFLAGS_IOPL        0x00003000 /* IOPL mask */
+#define X86_EFLAGS_NT  0x00004000 /* Nested Task */
+#define X86_EFLAGS_RF  0x00010000 /* Resume Flag */
+#define X86_EFLAGS_VM  0x00020000 /* Virtual Mode */
+#define X86_EFLAGS_AC  0x00040000 /* Alignment Check */
+#define X86_EFLAGS_VIF 0x00080000 /* Virtual Interrupt Flag */
+#define X86_EFLAGS_VIP 0x00100000 /* Virtual Interrupt Pending */
+#define X86_EFLAGS_ID  0x00200000 /* CPUID detection flag */
+
+/*
+ * Generic CPUID function
+ */
+static inline void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx)
+{
+       __asm__("cpuid"
+               : "=a" (*eax),
+                 "=b" (*ebx),
+                 "=c" (*ecx),
+                 "=d" (*edx)
+               : "0" (op));
+}
+
+/*
+ * CPUID functions returning a single datum
+ */
+static inline unsigned int cpuid_eax(unsigned int op)
+{
+       unsigned int eax;
+
+       __asm__("cpuid"
+               : "=a" (eax)
+               : "0" (op)
+               : "bx", "cx", "dx");
+       return eax;
+}
+static inline unsigned int cpuid_ebx(unsigned int op)
+{
+       unsigned int eax, ebx;
+
+       __asm__("cpuid"
+               : "=a" (eax), "=b" (ebx)
+               : "0" (op)
+               : "cx", "dx" );
+       return ebx;
+}
+static inline unsigned int cpuid_ecx(unsigned int op)
+{
+       unsigned int eax, ecx;
+
+       __asm__("cpuid"
+               : "=a" (eax), "=c" (ecx)
+               : "0" (op)
+               : "bx", "dx" );
+       return ecx;
+}
+static inline unsigned int cpuid_edx(unsigned int op)
+{
+       unsigned int eax, edx;
+
+       __asm__("cpuid"
+               : "=a" (eax), "=d" (edx)
+               : "0" (op)
+               : "bx", "cx");
+       return edx;
+}
+
+/*
+ * Intel CPU features in CR4
+ */
+#define X86_CR4_VME            0x0001  /* enable vm86 extensions */
+#define X86_CR4_PVI            0x0002  /* virtual interrupts flag enable */
+#define X86_CR4_TSD            0x0004  /* disable time stamp at ipl 3 */
+#define X86_CR4_DE             0x0008  /* enable debugging extensions */
+#define X86_CR4_PSE            0x0010  /* enable page size extensions */
+#define X86_CR4_PAE            0x0020  /* enable physical address extensions */
+#define X86_CR4_MCE            0x0040  /* Machine check enable */
+#define X86_CR4_PGE            0x0080  /* enable global pages */
+#define X86_CR4_PCE            0x0100  /* enable performance counters at ipl 3 */
+#define X86_CR4_OSFXSR         0x0200  /* enable fast FPU save and restore */
+#define X86_CR4_OSXMMEXCPT     0x0400  /* enable unmasked SSE exceptions */
+
+extern unsigned long mmu_cr4_features;
+
+#include <asm/hypervisor.h>
+
+static inline void set_in_cr4 (unsigned long mask)
+{
+    HYPERVISOR_console_write("No set_in_cr4", 13);
+}
+
+static inline void clear_in_cr4 (unsigned long mask)
+{
+    HYPERVISOR_console_write("No clear_in_cr4", 15);
+}
+
+/*
+ *      Cyrix CPU configuration register indexes
+ */
+#define CX86_CCR0 0xc0
+#define CX86_CCR1 0xc1
+#define CX86_CCR2 0xc2
+#define CX86_CCR3 0xc3
+#define CX86_CCR4 0xe8
+#define CX86_CCR5 0xe9
+#define CX86_CCR6 0xea
+#define CX86_CCR7 0xeb
+#define CX86_DIR0 0xfe
+#define CX86_DIR1 0xff
+#define CX86_ARR_BASE 0xc4
+#define CX86_RCR_BASE 0xdc
+
+/*
+ *      Cyrix CPU indexed register access macros
+ */
+
+#define getCx86(reg) ({ outb((reg), 0x22); inb(0x23); })
+
+#define setCx86(reg, data) do { \
+       outb((reg), 0x22); \
+       outb((data), 0x23); \
+} while (0)
+
+/*
+ * Bus types (default is ISA, but people can check others with these..)
+ */
+#ifdef CONFIG_EISA
+extern int EISA_bus;
+#else
+#define EISA_bus (0)
+#endif
+extern int MCA_bus;
+
+/* from system description table in BIOS.  Mostly for MCA use, but
+others may find it useful. */
+extern unsigned int machine_id;
+extern unsigned int machine_submodel_id;
+extern unsigned int BIOS_revision;
+extern unsigned int mca_pentium_flag;
+
+/*
+ * User space process size: 3GB (default).
+ */
+#define TASK_SIZE      (PAGE_OFFSET)
+
+/* This decides where the kernel will search for a free chunk of vm
+ * space during mmap's.
+ */
+#define TASK_UNMAPPED_BASE     (TASK_SIZE / 3)
+
+/*
+ * Size of io_bitmap in longwords: 32 is ports 0-0x3ff.
+ */
+#define IO_BITMAP_SIZE 32
+#define IO_BITMAP_OFFSET offsetof(struct tss_struct,io_bitmap)
+#define INVALID_IO_BITMAP_OFFSET 0x8000
+
+struct i387_fsave_struct {
+       long    cwd;
+       long    swd;
+       long    twd;
+       long    fip;
+       long    fcs;
+       long    foo;
+       long    fos;
+       long    st_space[20];   /* 8*10 bytes for each FP-reg = 80 bytes */
+       long    status;         /* software status information */
+};
+
+struct i387_fxsave_struct {
+       unsigned short  cwd;
+       unsigned short  swd;
+       unsigned short  twd;
+       unsigned short  fop;
+       long    fip;
+       long    fcs;
+       long    foo;
+       long    fos;
+       long    mxcsr;
+       long    reserved;
+       long    st_space[32];   /* 8*16 bytes for each FP-reg = 128 bytes */
+       long    xmm_space[32];  /* 8*16 bytes for each XMM-reg = 128 bytes */
+       long    padding[56];
+} __attribute__ ((aligned (16)));
+
+struct i387_soft_struct {
+       long    cwd;
+       long    swd;
+       long    twd;
+       long    fip;
+       long    fcs;
+       long    foo;
+       long    fos;
+       long    st_space[20];   /* 8*10 bytes for each FP-reg = 80 bytes */
+       unsigned char   ftop, changed, lookahead, no_update, rm, alimit;
+       struct info     *info;
+       unsigned long   entry_eip;
+};
+
+union i387_union {
+       struct i387_fsave_struct        fsave;
+       struct i387_fxsave_struct       fxsave;
+       struct i387_soft_struct soft;
+};
+
+typedef struct {
+       unsigned long seg;
+} mm_segment_t;
+
+struct tss_struct {
+       unsigned short  back_link,__blh;
+       unsigned long   esp0;
+       unsigned short  ss0,__ss0h;
+       unsigned long   esp1;
+       unsigned short  ss1,__ss1h;
+       unsigned long   esp2;
+       unsigned short  ss2,__ss2h;
+       unsigned long   __cr3;
+       unsigned long   eip;
+       unsigned long   eflags;
+       unsigned long   eax,ecx,edx,ebx;
+       unsigned long   esp;
+       unsigned long   ebp;
+       unsigned long   esi;
+       unsigned long   edi;
+       unsigned short  es, __esh;
+       unsigned short  cs, __csh;
+       unsigned short  ss, __ssh;
+       unsigned short  ds, __dsh;
+       unsigned short  fs, __fsh;
+       unsigned short  gs, __gsh;
+       unsigned short  ldt, __ldth;
+       unsigned short  trace, bitmap;
+       unsigned long   io_bitmap[IO_BITMAP_SIZE+1];
+       /*
+        * pads the TSS to be cacheline-aligned (size is 0x100)
+        */
+       unsigned long __cacheline_filler[5];
+};
+
+struct thread_struct {
+       unsigned long   esp0;
+       unsigned long   eip;
+       unsigned long   esp;
+       unsigned long   fs;
+       unsigned long   gs;
+/* Hardware debugging registers */
+       unsigned long   debugreg[8];  /* %%db0-7 debug registers */
+/* fault info */
+       unsigned long   cr2, trap_no, error_code;
+/* floating point info */
+       union i387_union        i387;
+/* virtual 86 mode info */
+       struct vm86_struct      * vm86_info;
+       unsigned long           screen_bitmap;
+       unsigned long           v86flags, v86mask, v86mode, saved_esp0;
+};
+
+#define INIT_THREAD  { 0 }
+
+#define INIT_TSS  {                                            \
+       0,0, /* back_link, __blh */                             \
+       sizeof(init_stack) + (long) &init_stack, /* esp0 */     \
+       __KERNEL_DS, 0, /* ss0 */                               \
+       0,0,0,0,0,0, /* stack1, stack2 */                       \
+       0, /* cr3 */                                            \
+       0,0, /* eip,eflags */                                   \
+       0,0,0,0, /* eax,ecx,edx,ebx */                          \
+       0,0,0,0, /* esp,ebp,esi,edi */                          \
+       0,0,0,0,0,0, /* es,cs,ss */                             \
+       0,0,0,0,0,0, /* ds,fs,gs */                             \
+       __LDT(0),0, /* ldt */                                   \
+       0, INVALID_IO_BITMAP_OFFSET, /* tace, bitmap */         \
+       {~0, } /* ioperm */                                     \
+}
+
+#define start_thread(regs, new_eip, new_esp) do {              \
+       __asm__("movl %0,%%fs ; movl %0,%%gs": :"r" (0));       \
+       set_fs(USER_DS);                                        \
+       regs->xds = __USER_DS;                                  \
+       regs->xes = __USER_DS;                                  \
+       regs->xss = __USER_DS;                                  \
+       regs->xcs = __USER_CS;                                  \
+       regs->eip = new_eip;                                    \
+       regs->esp = new_esp;                                    \
+} while (0)
+
+/* Forward declaration, a strange C thing */
+struct task_struct;
+struct mm_struct;
+
+/* Free all resources held by a thread. */
+extern void release_thread(struct task_struct *);
+/*
+ * create a kernel thread without removing it from tasklists
+ */
+extern int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags);
+
+/* Copy and release all segment info associated with a VM */
+extern void copy_segments(struct task_struct *p, struct mm_struct * mm);
+extern void release_segments(struct mm_struct * mm);
+
+/*
+ * Return saved PC of a blocked thread.
+ */
+static inline unsigned long thread_saved_pc(struct thread_struct *t)
+{
+       return ((unsigned long *)t->esp)[3];
+}
+
+unsigned long get_wchan(struct task_struct *p);
+#define KSTK_EIP(tsk)  (((unsigned long *)(4096+(unsigned long)(tsk)))[1019])
+#define KSTK_ESP(tsk)  (((unsigned long *)(4096+(unsigned long)(tsk)))[1022])
+
+#define THREAD_SIZE (2*PAGE_SIZE)
+#define alloc_task_struct() ((struct task_struct *) __get_free_pages(GFP_KERNEL,1))
+#define free_task_struct(p) free_pages((unsigned long) (p), 1)
+#define get_task_struct(tsk)      atomic_inc(&virt_to_page(tsk)->count)
+
+#define init_task      (init_task_union.task)
+#define init_stack     (init_task_union.stack)
+
+struct microcode {
+       unsigned int hdrver;
+       unsigned int rev;
+       unsigned int date;
+       unsigned int sig;
+       unsigned int cksum;
+       unsigned int ldrver;
+       unsigned int pf;
+       unsigned int reserved[5];
+       unsigned int bits[500];
+};
+
+/* '6' because it used to be for P6 only (but now covers Pentium 4 as well) */
+#define MICROCODE_IOCFREE      _IO('6',0)
+
+/* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */
+static inline void rep_nop(void)
+{
+       __asm__ __volatile__("rep;nop");
+}
+
+#define cpu_relax()    rep_nop()
+
+/* Prefetch instructions for Pentium III and AMD Athlon */
+#ifdef         CONFIG_MPENTIUMIII
+
+#define ARCH_HAS_PREFETCH
+extern inline void prefetch(const void *x)
+{
+       __asm__ __volatile__ ("prefetchnta (%0)" : : "r"(x));
+}
+
+#elif CONFIG_X86_USE_3DNOW
+
+#define ARCH_HAS_PREFETCH
+#define ARCH_HAS_PREFETCHW
+#define ARCH_HAS_SPINLOCK_PREFETCH
+
+extern inline void prefetch(const void *x)
+{
+        __asm__ __volatile__ ("prefetch (%0)" : : "r"(x));
+}
+
+extern inline void prefetchw(const void *x)
+{
+        __asm__ __volatile__ ("prefetchw (%0)" : : "r"(x));
+}
+#define spin_lock_prefetch(x)  prefetchw(x)
+
+#endif
+
+#define TF_MASK 0x100
+
+#endif /* __ASM_I386_PROCESSOR_H */
diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/ptrace.h b/xenolinux-2.4.16-sparse/include/asm-xeno/ptrace.h
new file mode 100644 (file)
index 0000000..f6d95dc
--- /dev/null
@@ -0,0 +1,86 @@
+#ifndef _I386_PTRACE_H
+#define _I386_PTRACE_H
+
+#define EBX 0
+#define ECX 1
+#define EDX 2
+#define ESI 3
+#define EDI 4
+#define EBP 5
+#define EAX 6
+#define DS 7
+#define ES 8
+#define FS 9
+#define GS 10
+#define ORIG_EAX 11
+#define EIP 12
+#define CS  13
+#define EFL 14
+#define UESP 15
+#define SS   16
+#define FRAME_SIZE 17
+
+/* this struct defines the way the registers are stored on the 
+   stack during a system call. */
+
+struct pt_regs {
+       long ebx;
+       long ecx;
+       long edx;
+       long esi;
+       long edi;
+       long ebp;
+       long eax;
+       int  xds;
+       int  xes;
+       long orig_eax;
+       long eip;
+       int  xcs;
+       long eflags;
+       long esp;
+       int  xss;
+};
+
+/* Arbitrarily choose the same ptrace numbers as used by the Sparc code. */
+#define PTRACE_GETREGS            12
+#define PTRACE_SETREGS            13
+#define PTRACE_GETFPREGS          14
+#define PTRACE_SETFPREGS          15
+#define PTRACE_GETFPXREGS         18
+#define PTRACE_SETFPXREGS         19
+
+#define PTRACE_SETOPTIONS         21
+
+/* options set using PTRACE_SETOPTIONS */
+#define PTRACE_O_TRACESYSGOOD     0x00000001
+
+enum EFLAGS {
+        EF_CF   = 0x00000001,
+        EF_PF   = 0x00000004,
+        EF_AF   = 0x00000010,
+        EF_ZF   = 0x00000040,
+        EF_SF   = 0x00000080,
+        EF_TF   = 0x00000100,
+        EF_IE   = 0x00000200,
+        EF_DF   = 0x00000400,
+        EF_OF   = 0x00000800,
+        EF_IOPL = 0x00003000,
+        EF_IOPL_RING0 = 0x00000000,
+        EF_IOPL_RING1 = 0x00001000,
+        EF_IOPL_RING2 = 0x00002000,
+        EF_NT   = 0x00004000,   /* nested task */
+        EF_RF   = 0x00010000,   /* resume */
+        EF_VM   = 0x00020000,   /* virtual mode */
+        EF_AC   = 0x00040000,   /* alignment */
+        EF_VIF  = 0x00080000,   /* virtual interrupt */
+        EF_VIP  = 0x00100000,   /* virtual interrupt pending */
+        EF_ID   = 0x00200000,   /* id */
+};
+
+#ifdef __KERNEL__
+#define user_mode(regs) ((regs) && (3 & (regs)->xcs))
+#define instruction_pointer(regs) ((regs) ? (regs)->eip : NULL)
+extern void show_regs(struct pt_regs *);
+#endif
+
+#endif
diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/resource.h b/xenolinux-2.4.16-sparse/include/asm-xeno/resource.h
new file mode 100644 (file)
index 0000000..e0da3ad
--- /dev/null
@@ -0,0 +1,47 @@
+#ifndef _I386_RESOURCE_H
+#define _I386_RESOURCE_H
+
+/*
+ * Resource limits
+ */
+
+#define RLIMIT_CPU     0               /* CPU time in ms */
+#define RLIMIT_FSIZE   1               /* Maximum filesize */
+#define RLIMIT_DATA    2               /* max data size */
+#define RLIMIT_STACK   3               /* max stack size */
+#define RLIMIT_CORE    4               /* max core file size */
+#define RLIMIT_RSS     5               /* max resident set size */
+#define RLIMIT_NPROC   6               /* max number of processes */
+#define RLIMIT_NOFILE  7               /* max number of open files */
+#define RLIMIT_MEMLOCK 8               /* max locked-in-memory address space */
+#define RLIMIT_AS      9               /* address space limit */
+#define RLIMIT_LOCKS   10              /* maximum file locks held */
+
+#define RLIM_NLIMITS   11
+
+/*
+ * SuS says limits have to be unsigned.
+ * Which makes a ton more sense anyway.
+ */
+#define RLIM_INFINITY  (~0UL)
+
+#ifdef __KERNEL__
+
+#define INIT_RLIMITS                                   \
+{                                                      \
+       { RLIM_INFINITY, RLIM_INFINITY },               \
+       { RLIM_INFINITY, RLIM_INFINITY },               \
+       { RLIM_INFINITY, RLIM_INFINITY },               \
+       {      _STK_LIM, RLIM_INFINITY },               \
+       {             0, RLIM_INFINITY },               \
+       { RLIM_INFINITY, RLIM_INFINITY },               \
+       {             0,             0 },               \
+       {      INR_OPEN,     INR_OPEN  },               \
+       { RLIM_INFINITY, RLIM_INFINITY },               \
+       { RLIM_INFINITY, RLIM_INFINITY },               \
+        { RLIM_INFINITY, RLIM_INFINITY },              \
+}
+
+#endif /* __KERNEL__ */
+
+#endif
diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/rwlock.h b/xenolinux-2.4.16-sparse/include/asm-xeno/rwlock.h
new file mode 100644 (file)
index 0000000..9475419
--- /dev/null
@@ -0,0 +1,83 @@
+/* include/asm-i386/rwlock.h
+ *
+ *     Helpers used by both rw spinlocks and rw semaphores.
+ *
+ *     Based in part on code from semaphore.h and
+ *     spinlock.h Copyright 1996 Linus Torvalds.
+ *
+ *     Copyright 1999 Red Hat, Inc.
+ *
+ *     Written by Benjamin LaHaise.
+ *
+ *     This program is free software; you can redistribute it and/or
+ *     modify it under the terms of the GNU General Public License
+ *     as published by the Free Software Foundation; either version
+ *     2 of the License, or (at your option) any later version.
+ */
+#ifndef _ASM_I386_RWLOCK_H
+#define _ASM_I386_RWLOCK_H
+
+#define RW_LOCK_BIAS            0x01000000
+#define RW_LOCK_BIAS_STR       "0x01000000"
+
+#define __build_read_lock_ptr(rw, helper)   \
+       asm volatile(LOCK "subl $1,(%0)\n\t" \
+                    "js 2f\n" \
+                    "1:\n" \
+                    ".section .text.lock,\"ax\"\n" \
+                    "2:\tcall " helper "\n\t" \
+                    "jmp 1b\n" \
+                    ".previous" \
+                    ::"a" (rw) : "memory")
+
+#define __build_read_lock_const(rw, helper)   \
+       asm volatile(LOCK "subl $1,%0\n\t" \
+                    "js 2f\n" \
+                    "1:\n" \
+                    ".section .text.lock,\"ax\"\n" \
+                    "2:\tpushl %%eax\n\t" \
+                    "leal %0,%%eax\n\t" \
+                    "call " helper "\n\t" \
+                    "popl %%eax\n\t" \
+                    "jmp 1b\n" \
+                    ".previous" \
+                    :"=m" (*(volatile int *)rw) : : "memory")
+
+#define __build_read_lock(rw, helper)  do { \
+                                               if (__builtin_constant_p(rw)) \
+                                                       __build_read_lock_const(rw, helper); \
+                                               else \
+                                                       __build_read_lock_ptr(rw, helper); \
+                                       } while (0)
+
+#define __build_write_lock_ptr(rw, helper) \
+       asm volatile(LOCK "subl $" RW_LOCK_BIAS_STR ",(%0)\n\t" \
+                    "jnz 2f\n" \
+                    "1:\n" \
+                    ".section .text.lock,\"ax\"\n" \
+                    "2:\tcall " helper "\n\t" \
+                    "jmp 1b\n" \
+                    ".previous" \
+                    ::"a" (rw) : "memory")
+
+#define __build_write_lock_const(rw, helper) \
+       asm volatile(LOCK "subl $" RW_LOCK_BIAS_STR ",(%0)\n\t" \
+                    "jnz 2f\n" \
+                    "1:\n" \
+                    ".section .text.lock,\"ax\"\n" \
+                    "2:\tpushl %%eax\n\t" \
+                    "leal %0,%%eax\n\t" \
+                    "call " helper "\n\t" \
+                    "popl %%eax\n\t" \
+                    "jmp 1b\n" \
+                    ".previous" \
+                    :"=m" (*(volatile int *)rw) : : "memory")
+
+#define __build_write_lock(rw, helper) do { \
+                                               if (__builtin_constant_p(rw)) \
+                                                       __build_write_lock_const(rw, helper); \
+                                               else \
+                                                       __build_write_lock_ptr(rw, helper); \
+                                       } while (0)
+
+#endif
diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/rwsem.h b/xenolinux-2.4.16-sparse/include/asm-xeno/rwsem.h
new file mode 100644 (file)
index 0000000..0d416f7
--- /dev/null
@@ -0,0 +1,226 @@
+/* rwsem.h: R/W semaphores implemented using XADD/CMPXCHG for i486+
+ *
+ * Written by David Howells (dhowells@redhat.com).
+ *
+ * Derived from asm-i386/semaphore.h
+ *
+ *
+ * The MSW of the count is the negated number of active writers and waiting
+ * lockers, and the LSW is the total number of active locks
+ *
+ * The lock count is initialized to 0 (no active and no waiting lockers).
+ *
+ * When a writer subtracts WRITE_BIAS, it'll get 0xffff0001 for the case of an
+ * uncontended lock. This can be determined because XADD returns the old value.
+ * Readers increment by 1 and see a positive value when uncontended, negative
+ * if there are writers (and maybe) readers waiting (in which case it goes to
+ * sleep).
+ *
+ * The value of WAITING_BIAS supports up to 32766 waiting processes. This can
+ * be extended to 65534 by manually checking the whole MSW rather than relying
+ * on the S flag.
+ *
+ * The value of ACTIVE_BIAS supports up to 65535 active processes.
+ *
+ * This should be totally fair - if anything is waiting, a process that wants a
+ * lock will go to the back of the queue. When the currently active lock is
+ * released, if there's a writer at the front of the queue, then that and only
+ * that will be woken up; if there's a bunch of consequtive readers at the
+ * front, then they'll all be woken up, but no other readers will be.
+ */
+
+#ifndef _I386_RWSEM_H
+#define _I386_RWSEM_H
+
+#ifndef _LINUX_RWSEM_H
+#error please dont include asm/rwsem.h directly, use linux/rwsem.h instead
+#endif
+
+#ifdef __KERNEL__
+
+#include <linux/list.h>
+#include <linux/spinlock.h>
+
+struct rwsem_waiter;
+
+extern struct rw_semaphore *FASTCALL(rwsem_down_read_failed(struct rw_semaphore *sem));
+extern struct rw_semaphore *FASTCALL(rwsem_down_write_failed(struct rw_semaphore *sem));
+extern struct rw_semaphore *FASTCALL(rwsem_wake(struct rw_semaphore *));
+
+/*
+ * the semaphore definition
+ */
+struct rw_semaphore {
+       signed long             count;
+#define RWSEM_UNLOCKED_VALUE           0x00000000
+#define RWSEM_ACTIVE_BIAS              0x00000001
+#define RWSEM_ACTIVE_MASK              0x0000ffff
+#define RWSEM_WAITING_BIAS             (-0x00010000)
+#define RWSEM_ACTIVE_READ_BIAS         RWSEM_ACTIVE_BIAS
+#define RWSEM_ACTIVE_WRITE_BIAS                (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
+       spinlock_t              wait_lock;
+       struct list_head        wait_list;
+#if RWSEM_DEBUG
+       int                     debug;
+#endif
+};
+
+/*
+ * initialisation
+ */
+#if RWSEM_DEBUG
+#define __RWSEM_DEBUG_INIT      , 0
+#else
+#define __RWSEM_DEBUG_INIT     /* */
+#endif
+
+#define __RWSEM_INITIALIZER(name) \
+{ RWSEM_UNLOCKED_VALUE, SPIN_LOCK_UNLOCKED, LIST_HEAD_INIT((name).wait_list) \
+       __RWSEM_DEBUG_INIT }
+
+#define DECLARE_RWSEM(name) \
+       struct rw_semaphore name = __RWSEM_INITIALIZER(name)
+
+static inline void init_rwsem(struct rw_semaphore *sem)
+{
+       sem->count = RWSEM_UNLOCKED_VALUE;
+       spin_lock_init(&sem->wait_lock);
+       INIT_LIST_HEAD(&sem->wait_list);
+#if RWSEM_DEBUG
+       sem->debug = 0;
+#endif
+}
+
+/*
+ * lock for reading
+ */
+static inline void __down_read(struct rw_semaphore *sem)
+{
+       __asm__ __volatile__(
+               "# beginning down_read\n\t"
+LOCK_PREFIX    "  incl      (%%eax)\n\t" /* adds 0x00000001, returns the old value */
+               "  js        2f\n\t" /* jump if we weren't granted the lock */
+               "1:\n\t"
+               ".section .text.lock,\"ax\"\n"
+               "2:\n\t"
+               "  pushl     %%ecx\n\t"
+               "  pushl     %%edx\n\t"
+               "  call      rwsem_down_read_failed\n\t"
+               "  popl      %%edx\n\t"
+               "  popl      %%ecx\n\t"
+               "  jmp       1b\n"
+               ".previous"
+               "# ending down_read\n\t"
+               : "+m"(sem->count)
+               : "a"(sem)
+               : "memory", "cc");
+}
+
+/*
+ * lock for writing
+ */
+static inline void __down_write(struct rw_semaphore *sem)
+{
+       int tmp;
+
+       tmp = RWSEM_ACTIVE_WRITE_BIAS;
+       __asm__ __volatile__(
+               "# beginning down_write\n\t"
+LOCK_PREFIX    "  xadd      %0,(%%eax)\n\t" /* subtract 0x0000ffff, returns the old value */
+               "  testl     %0,%0\n\t" /* was the count 0 before? */
+               "  jnz       2f\n\t" /* jump if we weren't granted the lock */
+               "1:\n\t"
+               ".section .text.lock,\"ax\"\n"
+               "2:\n\t"
+               "  pushl     %%ecx\n\t"
+               "  call      rwsem_down_write_failed\n\t"
+               "  popl      %%ecx\n\t"
+               "  jmp       1b\n"
+               ".previous\n"
+               "# ending down_write"
+               : "+d"(tmp), "+m"(sem->count)
+               : "a"(sem)
+               : "memory", "cc");
+}
+
+/*
+ * unlock after reading
+ */
+static inline void __up_read(struct rw_semaphore *sem)
+{
+       __s32 tmp = -RWSEM_ACTIVE_READ_BIAS;
+       __asm__ __volatile__(
+               "# beginning __up_read\n\t"
+LOCK_PREFIX    "  xadd      %%edx,(%%eax)\n\t" /* subtracts 1, returns the old value */
+               "  js        2f\n\t" /* jump if the lock is being waited upon */
+               "1:\n\t"
+               ".section .text.lock,\"ax\"\n"
+               "2:\n\t"
+               "  decw      %%dx\n\t" /* do nothing if still outstanding active readers */
+               "  jnz       1b\n\t"
+               "  pushl     %%ecx\n\t"
+               "  call      rwsem_wake\n\t"
+               "  popl      %%ecx\n\t"
+               "  jmp       1b\n"
+               ".previous\n"
+               "# ending __up_read\n"
+               : "+m"(sem->count), "+d"(tmp)
+               : "a"(sem)
+               : "memory", "cc");
+}
+
+/*
+ * unlock after writing
+ */
+static inline void __up_write(struct rw_semaphore *sem)
+{
+       __asm__ __volatile__(
+               "# beginning __up_write\n\t"
+               "  movl      %2,%%edx\n\t"
+LOCK_PREFIX    "  xaddl     %%edx,(%%eax)\n\t" /* tries to transition 0xffff0001 -> 0x00000000 */
+               "  jnz       2f\n\t" /* jump if the lock is being waited upon */
+               "1:\n\t"
+               ".section .text.lock,\"ax\"\n"
+               "2:\n\t"
+               "  decw      %%dx\n\t" /* did the active count reduce to 0? */
+               "  jnz       1b\n\t" /* jump back if not */
+               "  pushl     %%ecx\n\t"
+               "  call      rwsem_wake\n\t"
+               "  popl      %%ecx\n\t"
+               "  jmp       1b\n"
+               ".previous\n"
+               "# ending __up_write\n"
+               : "+m"(sem->count)
+               : "a"(sem), "i"(-RWSEM_ACTIVE_WRITE_BIAS)
+               : "memory", "cc", "edx");
+}
+
+/*
+ * implement atomic add functionality
+ */
+static inline void rwsem_atomic_add(int delta, struct rw_semaphore *sem)
+{
+       __asm__ __volatile__(
+LOCK_PREFIX    "addl %1,%0"
+               :"=m"(sem->count)
+               :"ir"(delta), "m"(sem->count));
+}
+
+/*
+ * implement exchange and add functionality
+ */
+static inline int rwsem_atomic_update(int delta, struct rw_semaphore *sem)
+{
+       int tmp = delta;
+
+       __asm__ __volatile__(
+LOCK_PREFIX    "xadd %0,(%2)"
+               : "+r"(tmp), "=m"(sem->count)
+               : "r"(sem), "m"(sem->count)
+               : "memory");
+
+       return tmp+delta;
+}
+
+#endif /* __KERNEL__ */
+#endif /* _I386_RWSEM_H */
diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/scatterlist.h b/xenolinux-2.4.16-sparse/include/asm-xeno/scatterlist.h
new file mode 100644 (file)
index 0000000..9e2614f
--- /dev/null
@@ -0,0 +1,16 @@
+#ifndef _I386_SCATTERLIST_H
+#define _I386_SCATTERLIST_H
+
+struct scatterlist {
+    char *  address;    /* Location data is to be transferred to, NULL for
+                        * highmem page */
+    struct page * page; /* Location for highmem page, if any */
+    unsigned int offset;/* for highmem, page offset */
+
+    dma_addr_t dma_address;
+    unsigned int length;
+};
+
+#define ISA_DMA_THRESHOLD (0x00ffffff)
+
+#endif /* !(_I386_SCATTERLIST_H) */
diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/segment.h b/xenolinux-2.4.16-sparse/include/asm-xeno/segment.h
new file mode 100644 (file)
index 0000000..5623211
--- /dev/null
@@ -0,0 +1,10 @@
+#ifndef _ASM_SEGMENT_H
+#define _ASM_SEGMENT_H
+
+#define __KERNEL_CS    0x11
+#define __KERNEL_DS    0x19
+
+#define __USER_CS      0x23
+#define __USER_DS      0x2B
+
+#endif
diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/semaphore.h b/xenolinux-2.4.16-sparse/include/asm-xeno/semaphore.h
new file mode 100644 (file)
index 0000000..76c738e
--- /dev/null
@@ -0,0 +1,216 @@
+#ifndef _I386_SEMAPHORE_H
+#define _I386_SEMAPHORE_H
+
+#include <linux/linkage.h>
+
+#ifdef __KERNEL__
+
+/*
+ * SMP- and interrupt-safe semaphores..
+ *
+ * (C) Copyright 1996 Linus Torvalds
+ *
+ * Modified 1996-12-23 by Dave Grothe <dave@gcom.com> to fix bugs in
+ *                     the original code and to make semaphore waits
+ *                     interruptible so that processes waiting on
+ *                     semaphores can be killed.
+ * Modified 1999-02-14 by Andrea Arcangeli, split the sched.c helper
+ *                    functions in asm/sempahore-helper.h while fixing a
+ *                    potential and subtle race discovered by Ulrich Schmid
+ *                    in down_interruptible(). Since I started to play here I
+ *                    also implemented the `trylock' semaphore operation.
+ *          1999-07-02 Artur Skawina <skawina@geocities.com>
+ *                     Optimized "0(ecx)" -> "(ecx)" (the assembler does not
+ *                     do this). Changed calling sequences from push/jmp to
+ *                     traditional call/ret.
+ * Modified 2001-01-01 Andreas Franck <afranck@gmx.de>
+ *                    Some hacks to ensure compatibility with recent
+ *                    GCC snapshots, to avoid stack corruption when compiling
+ *                    with -fomit-frame-pointer. It's not sure if this will
+ *                    be fixed in GCC, as our previous implementation was a
+ *                    bit dubious.
+ *
+ * If you would like to see an analysis of this implementation, please
+ * ftp to gcom.com and download the file
+ * /pub/linux/src/semaphore/semaphore-2.0.24.tar.gz.
+ *
+ */
+
+#include <asm/system.h>
+#include <asm/atomic.h>
+#include <linux/wait.h>
+#include <linux/rwsem.h>
+
+struct semaphore {
+       atomic_t count;
+       int sleepers;
+       wait_queue_head_t wait;
+#if WAITQUEUE_DEBUG
+       long __magic;
+#endif
+};
+
+#if WAITQUEUE_DEBUG
+# define __SEM_DEBUG_INIT(name) \
+               , (int)&(name).__magic
+#else
+# define __SEM_DEBUG_INIT(name)
+#endif
+
+#define __SEMAPHORE_INITIALIZER(name,count) \
+{ ATOMIC_INIT(count), 0, __WAIT_QUEUE_HEAD_INITIALIZER((name).wait) \
+       __SEM_DEBUG_INIT(name) }
+
+#define __MUTEX_INITIALIZER(name) \
+       __SEMAPHORE_INITIALIZER(name,1)
+
+#define __DECLARE_SEMAPHORE_GENERIC(name,count) \
+       struct semaphore name = __SEMAPHORE_INITIALIZER(name,count)
+
+#define DECLARE_MUTEX(name) __DECLARE_SEMAPHORE_GENERIC(name,1)
+#define DECLARE_MUTEX_LOCKED(name) __DECLARE_SEMAPHORE_GENERIC(name,0)
+
+static inline void sema_init (struct semaphore *sem, int val)
+{
+/*
+ *     *sem = (struct semaphore)__SEMAPHORE_INITIALIZER((*sem),val);
+ *
+ * i'd rather use the more flexible initialization above, but sadly
+ * GCC 2.7.2.3 emits a bogus warning. EGCS doesnt. Oh well.
+ */
+       atomic_set(&sem->count, val);
+       sem->sleepers = 0;
+       init_waitqueue_head(&sem->wait);
+#if WAITQUEUE_DEBUG
+       sem->__magic = (int)&sem->__magic;
+#endif
+}
+
+static inline void init_MUTEX (struct semaphore *sem)
+{
+       sema_init(sem, 1);
+}
+
+static inline void init_MUTEX_LOCKED (struct semaphore *sem)
+{
+       sema_init(sem, 0);
+}
+
+asmlinkage void __down_failed(void /* special register calling convention */);
+asmlinkage int  __down_failed_interruptible(void  /* params in registers */);
+asmlinkage int  __down_failed_trylock(void  /* params in registers */);
+asmlinkage void __up_wakeup(void /* special register calling convention */);
+
+asmlinkage void __down(struct semaphore * sem);
+asmlinkage int  __down_interruptible(struct semaphore * sem);
+asmlinkage int  __down_trylock(struct semaphore * sem);
+asmlinkage void __up(struct semaphore * sem);
+
+/*
+ * This is ugly, but we want the default case to fall through.
+ * "__down_failed" is a special asm handler that calls the C
+ * routine that actually waits. See arch/i386/kernel/semaphore.c
+ */
+static inline void down(struct semaphore * sem)
+{
+#if WAITQUEUE_DEBUG
+       CHECK_MAGIC(sem->__magic);
+#endif
+
+       __asm__ __volatile__(
+               "# atomic down operation\n\t"
+               LOCK "decl %0\n\t"     /* --sem->count */
+               "js 2f\n"
+               "1:\n"
+               ".section .text.lock,\"ax\"\n"
+               "2:\tcall __down_failed\n\t"
+               "jmp 1b\n"
+               ".previous"
+               :"=m" (sem->count)
+               :"c" (sem)
+               :"memory");
+}
+
+/*
+ * Interruptible try to acquire a semaphore.  If we obtained
+ * it, return zero.  If we were interrupted, returns -EINTR
+ */
+static inline int down_interruptible(struct semaphore * sem)
+{
+       int result;
+
+#if WAITQUEUE_DEBUG
+       CHECK_MAGIC(sem->__magic);
+#endif
+
+       __asm__ __volatile__(
+               "# atomic interruptible down operation\n\t"
+               LOCK "decl %1\n\t"     /* --sem->count */
+               "js 2f\n\t"
+               "xorl %0,%0\n"
+               "1:\n"
+               ".section .text.lock,\"ax\"\n"
+               "2:\tcall __down_failed_interruptible\n\t"
+               "jmp 1b\n"
+               ".previous"
+               :"=a" (result), "=m" (sem->count)
+               :"c" (sem)
+               :"memory");
+       return result;
+}
+
+/*
+ * Non-blockingly attempt to down() a semaphore.
+ * Returns zero if we acquired it
+ */
+static inline int down_trylock(struct semaphore * sem)
+{
+       int result;
+
+#if WAITQUEUE_DEBUG
+       CHECK_MAGIC(sem->__magic);
+#endif
+
+       __asm__ __volatile__(
+               "# atomic interruptible down operation\n\t"
+               LOCK "decl %1\n\t"     /* --sem->count */
+               "js 2f\n\t"
+               "xorl %0,%0\n"
+               "1:\n"
+               ".section .text.lock,\"ax\"\n"
+               "2:\tcall __down_failed_trylock\n\t"
+               "jmp 1b\n"
+               ".previous"
+               :"=a" (result), "=m" (sem->count)
+               :"c" (sem)
+               :"memory");
+       return result;
+}
+
+/*
+ * Note! This is subtle. We jump to wake people up only if
+ * the semaphore was negative (== somebody was waiting on it).
+ * The default case (no contention) will result in NO
+ * jumps for both down() and up().
+ */
+static inline void up(struct semaphore * sem)
+{
+#if WAITQUEUE_DEBUG
+       CHECK_MAGIC(sem->__magic);
+#endif
+       __asm__ __volatile__(
+               "# atomic up operation\n\t"
+               LOCK "incl %0\n\t"     /* ++sem->count */
+               "jle 2f\n"
+               "1:\n"
+               ".section .text.lock,\"ax\"\n"
+               "2:\tcall __up_wakeup\n\t"
+               "jmp 1b\n"
+               ".previous"
+               :"=m" (sem->count)
+               :"c" (sem)
+               :"memory");
+}
+
+#endif
+#endif
diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/sembuf.h b/xenolinux-2.4.16-sparse/include/asm-xeno/sembuf.h
new file mode 100644 (file)
index 0000000..3238351
--- /dev/null
@@ -0,0 +1,25 @@
+#ifndef _I386_SEMBUF_H
+#define _I386_SEMBUF_H
+
+/* 
+ * The semid64_ds structure for i386 architecture.
+ * Note extra padding because this structure is passed back and forth
+ * between kernel and user space.
+ *
+ * Pad space is left for:
+ * - 64-bit time_t to solve y2038 problem
+ * - 2 miscellaneous 32-bit values
+ */
+
+struct semid64_ds {
+       struct ipc64_perm sem_perm;             /* permissions .. see ipc.h */
+       __kernel_time_t sem_otime;              /* last semop time */
+       unsigned long   __unused1;
+       __kernel_time_t sem_ctime;              /* last change time */
+       unsigned long   __unused2;
+       unsigned long   sem_nsems;              /* no. of semaphores in array */
+       unsigned long   __unused3;
+       unsigned long   __unused4;
+};
+
+#endif /* _I386_SEMBUF_H */
diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/serial.h b/xenolinux-2.4.16-sparse/include/asm-xeno/serial.h
new file mode 100644 (file)
index 0000000..ecc8278
--- /dev/null
@@ -0,0 +1,133 @@
+/*
+ * include/asm-i386/serial.h
+ */
+
+#include <linux/config.h>
+
+/*
+ * This assumes you have a 1.8432 MHz clock for your UART.
+ *
+ * It'd be nice if someone built a serial card with a 24.576 MHz
+ * clock, since the 16550A is capable of handling a top speed of 1.5
+ * megabits/second; but this requires the faster clock.
+ */
+#define BASE_BAUD ( 1843200 / 16 )
+
+/* Standard COM flags (except for COM4, because of the 8514 problem) */
+#ifdef CONFIG_SERIAL_DETECT_IRQ
+#define STD_COM_FLAGS (ASYNC_BOOT_AUTOCONF | ASYNC_SKIP_TEST | ASYNC_AUTO_IRQ)
+#define STD_COM4_FLAGS (ASYNC_BOOT_AUTOCONF | ASYNC_AUTO_IRQ)
+#else
+#define STD_COM_FLAGS (ASYNC_BOOT_AUTOCONF | ASYNC_SKIP_TEST)
+#define STD_COM4_FLAGS ASYNC_BOOT_AUTOCONF
+#endif
+
+#ifdef CONFIG_SERIAL_MANY_PORTS
+#define FOURPORT_FLAGS ASYNC_FOURPORT
+#define ACCENT_FLAGS 0
+#define BOCA_FLAGS 0
+#define HUB6_FLAGS 0
+#define RS_TABLE_SIZE  64
+#else
+#define RS_TABLE_SIZE
+#endif
+
+#define MCA_COM_FLAGS  (STD_COM_FLAGS|ASYNC_BOOT_ONLYMCA)
+
+/*
+ * The following define the access methods for the HUB6 card. All
+ * access is through two ports for all 24 possible chips. The card is
+ * selected through the high 2 bits, the port on that card with the
+ * "middle" 3 bits, and the register on that port with the bottom
+ * 3 bits.
+ *
+ * While the access port and interrupt is configurable, the default
+ * port locations are 0x302 for the port control register, and 0x303
+ * for the data read/write register. Normally, the interrupt is at irq3
+ * but can be anything from 3 to 7 inclusive. Note that using 3 will
+ * require disabling com2.
+ */
+
+#define C_P(card,port) (((card)<<6|(port)<<3) + 1)
+
+#define STD_SERIAL_PORT_DEFNS                  \
+       /* UART CLK   PORT IRQ     FLAGS        */                      \
+       { 0, BASE_BAUD, 0x3F8, 4, STD_COM_FLAGS },      /* ttyS0 */     \
+       { 0, BASE_BAUD, 0x2F8, 3, STD_COM_FLAGS },      /* ttyS1 */     \
+       { 0, BASE_BAUD, 0x3E8, 4, STD_COM_FLAGS },      /* ttyS2 */     \
+       { 0, BASE_BAUD, 0x2E8, 3, STD_COM4_FLAGS },     /* ttyS3 */
+
+
+#ifdef CONFIG_SERIAL_MANY_PORTS
+#define EXTRA_SERIAL_PORT_DEFNS                        \
+       { 0, BASE_BAUD, 0x1A0, 9, FOURPORT_FLAGS },     /* ttyS4 */     \
+       { 0, BASE_BAUD, 0x1A8, 9, FOURPORT_FLAGS },     /* ttyS5 */     \
+       { 0, BASE_BAUD, 0x1B0, 9, FOURPORT_FLAGS },     /* ttyS6 */     \
+       { 0, BASE_BAUD, 0x1B8, 9, FOURPORT_FLAGS },     /* ttyS7 */     \
+       { 0, BASE_BAUD, 0x2A0, 5, FOURPORT_FLAGS },     /* ttyS8 */     \
+       { 0, BASE_BAUD, 0x2A8, 5, FOURPORT_FLAGS },     /* ttyS9 */     \
+       { 0, BASE_BAUD, 0x2B0, 5, FOURPORT_FLAGS },     /* ttyS10 */    \
+       { 0, BASE_BAUD, 0x2B8, 5, FOURPORT_FLAGS },     /* ttyS11 */    \
+       { 0, BASE_BAUD, 0x330, 4, ACCENT_FLAGS },       /* ttyS12 */    \
+       { 0, BASE_BAUD, 0x338, 4, ACCENT_FLAGS },       /* ttyS13 */    \
+       { 0, BASE_BAUD, 0x000, 0, 0 },  /* ttyS14 (spare) */            \
+       { 0, BASE_BAUD, 0x000, 0, 0 },  /* ttyS15 (spare) */            \
+       { 0, BASE_BAUD, 0x100, 12, BOCA_FLAGS },        /* ttyS16 */    \
+       { 0, BASE_BAUD, 0x108, 12, BOCA_FLAGS },        /* ttyS17 */    \
+       { 0, BASE_BAUD, 0x110, 12, BOCA_FLAGS },        /* ttyS18 */    \
+       { 0, BASE_BAUD, 0x118, 12, BOCA_FLAGS },        /* ttyS19 */    \
+       { 0, BASE_BAUD, 0x120, 12, BOCA_FLAGS },        /* ttyS20 */    \
+       { 0, BASE_BAUD, 0x128, 12, BOCA_FLAGS },        /* ttyS21 */    \
+       { 0, BASE_BAUD, 0x130, 12, BOCA_FLAGS },        /* ttyS22 */    \
+       { 0, BASE_BAUD, 0x138, 12, BOCA_FLAGS },        /* ttyS23 */    \
+       { 0, BASE_BAUD, 0x140, 12, BOCA_FLAGS },        /* ttyS24 */    \
+       { 0, BASE_BAUD, 0x148, 12, BOCA_FLAGS },        /* ttyS25 */    \
+       { 0, BASE_BAUD, 0x150, 12, BOCA_FLAGS },        /* ttyS26 */    \
+       { 0, BASE_BAUD, 0x158, 12, BOCA_FLAGS },        /* ttyS27 */    \
+       { 0, BASE_BAUD, 0x160, 12, BOCA_FLAGS },        /* ttyS28 */    \
+       { 0, BASE_BAUD, 0x168, 12, BOCA_FLAGS },        /* ttyS29 */    \
+       { 0, BASE_BAUD, 0x170, 12, BOCA_FLAGS },        /* ttyS30 */    \
+       { 0, BASE_BAUD, 0x178, 12, BOCA_FLAGS },        /* ttyS31 */
+#else
+#define EXTRA_SERIAL_PORT_DEFNS
+#endif
+
+/* You can have up to four HUB6's in the system, but I've only
+ * included two cards here for a total of twelve ports.
+ */
+#if (defined(CONFIG_HUB6) && defined(CONFIG_SERIAL_MANY_PORTS))
+#define HUB6_SERIAL_PORT_DFNS          \
+       { 0, BASE_BAUD, 0x302, 3, HUB6_FLAGS, C_P(0,0) },  /* ttyS32 */ \
+       { 0, BASE_BAUD, 0x302, 3, HUB6_FLAGS, C_P(0,1) },  /* ttyS33 */ \
+       { 0, BASE_BAUD, 0x302, 3, HUB6_FLAGS, C_P(0,2) },  /* ttyS34 */ \
+       { 0, BASE_BAUD, 0x302, 3, HUB6_FLAGS, C_P(0,3) },  /* ttyS35 */ \
+       { 0, BASE_BAUD, 0x302, 3, HUB6_FLAGS, C_P(0,4) },  /* ttyS36 */ \
+       { 0, BASE_BAUD, 0x302, 3, HUB6_FLAGS, C_P(0,5) },  /* ttyS37 */ \
+       { 0, BASE_BAUD, 0x302, 3, HUB6_FLAGS, C_P(1,0) },  /* ttyS38 */ \
+       { 0, BASE_BAUD, 0x302, 3, HUB6_FLAGS, C_P(1,1) },  /* ttyS39 */ \
+       { 0, BASE_BAUD, 0x302, 3, HUB6_FLAGS, C_P(1,2) },  /* ttyS40 */ \
+       { 0, BASE_BAUD, 0x302, 3, HUB6_FLAGS, C_P(1,3) },  /* ttyS41 */ \
+       { 0, BASE_BAUD, 0x302, 3, HUB6_FLAGS, C_P(1,4) },  /* ttyS42 */ \
+       { 0, BASE_BAUD, 0x302, 3, HUB6_FLAGS, C_P(1,5) },  /* ttyS43 */
+#else
+#define HUB6_SERIAL_PORT_DFNS
+#endif
+
+#ifdef CONFIG_MCA
+#define MCA_SERIAL_PORT_DFNS                   \
+       { 0, BASE_BAUD, 0x3220, 3, MCA_COM_FLAGS },     \
+       { 0, BASE_BAUD, 0x3228, 3, MCA_COM_FLAGS },     \
+       { 0, BASE_BAUD, 0x4220, 3, MCA_COM_FLAGS },     \
+       { 0, BASE_BAUD, 0x4228, 3, MCA_COM_FLAGS },     \
+       { 0, BASE_BAUD, 0x5220, 3, MCA_COM_FLAGS },     \
+       { 0, BASE_BAUD, 0x5228, 3, MCA_COM_FLAGS },
+#else
+#define MCA_SERIAL_PORT_DFNS
+#endif
+
+#define SERIAL_PORT_DFNS               \
+       STD_SERIAL_PORT_DEFNS           \
+       EXTRA_SERIAL_PORT_DEFNS         \
+       HUB6_SERIAL_PORT_DFNS           \
+       MCA_SERIAL_PORT_DFNS
+
diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/setup.h b/xenolinux-2.4.16-sparse/include/asm-xeno/setup.h
new file mode 100644 (file)
index 0000000..ae25cc4
--- /dev/null
@@ -0,0 +1,10 @@
+/*
+ *     Just a place holder. We don't want to have to test x86 before
+ *     we include stuff
+ */
+
+#ifndef _i386_SETUP_H
+#define _i386_SETUP_H
+
+
+#endif /* _i386_SETUP_H */
diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/shmbuf.h b/xenolinux-2.4.16-sparse/include/asm-xeno/shmbuf.h
new file mode 100644 (file)
index 0000000..d1cdc3c
--- /dev/null
@@ -0,0 +1,42 @@
+#ifndef _I386_SHMBUF_H
+#define _I386_SHMBUF_H
+
+/* 
+ * The shmid64_ds structure for i386 architecture.
+ * Note extra padding because this structure is passed back and forth
+ * between kernel and user space.
+ *
+ * Pad space is left for:
+ * - 64-bit time_t to solve y2038 problem
+ * - 2 miscellaneous 32-bit values
+ */
+
+struct shmid64_ds {
+       struct ipc64_perm       shm_perm;       /* operation perms */
+       size_t                  shm_segsz;      /* size of segment (bytes) */
+       __kernel_time_t         shm_atime;      /* last attach time */
+       unsigned long           __unused1;
+       __kernel_time_t         shm_dtime;      /* last detach time */
+       unsigned long           __unused2;
+       __kernel_time_t         shm_ctime;      /* last change time */
+       unsigned long           __unused3;
+       __kernel_pid_t          shm_cpid;       /* pid of creator */
+       __kernel_pid_t          shm_lpid;       /* pid of last operator */
+       unsigned long           shm_nattch;     /* no. of current attaches */
+       unsigned long           __unused4;
+       unsigned long           __unused5;
+};
+
+struct shminfo64 {
+       unsigned long   shmmax;
+       unsigned long   shmmin;
+       unsigned long   shmmni;
+       unsigned long   shmseg;
+       unsigned long   shmall;
+       unsigned long   __unused1;
+       unsigned long   __unused2;
+       unsigned long   __unused3;
+       unsigned long   __unused4;
+};
+
+#endif /* _I386_SHMBUF_H */
diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/shmparam.h b/xenolinux-2.4.16-sparse/include/asm-xeno/shmparam.h
new file mode 100644 (file)
index 0000000..786243a
--- /dev/null
@@ -0,0 +1,6 @@
+#ifndef _ASMI386_SHMPARAM_H
+#define _ASMI386_SHMPARAM_H
+
+#define        SHMLBA PAGE_SIZE                 /* attach addr a multiple of this */
+
+#endif /* _ASMI386_SHMPARAM_H */
diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/sigcontext.h b/xenolinux-2.4.16-sparse/include/asm-xeno/sigcontext.h
new file mode 100644 (file)
index 0000000..b511459
--- /dev/null
@@ -0,0 +1,83 @@
+#ifndef _ASMi386_SIGCONTEXT_H
+#define _ASMi386_SIGCONTEXT_H
+
+/*
+ * As documented in the iBCS2 standard..
+ *
+ * The first part of "struct _fpstate" is just the normal i387
+ * hardware setup, the extra "status" word is used to save the
+ * coprocessor status word before entering the handler.
+ *
+ * Pentium III FXSR, SSE support
+ *     Gareth Hughes <gareth@valinux.com>, May 2000
+ *
+ * The FPU state data structure has had to grow to accomodate the
+ * extended FPU state required by the Streaming SIMD Extensions.
+ * There is no documented standard to accomplish this at the moment.
+ */
+struct _fpreg {
+       unsigned short significand[4];
+       unsigned short exponent;
+};
+
+struct _fpxreg {
+       unsigned short significand[4];
+       unsigned short exponent;
+       unsigned short padding[3];
+};
+
+struct _xmmreg {
+       unsigned long element[4];
+};
+
+struct _fpstate {
+       /* Regular FPU environment */
+       unsigned long   cw;
+       unsigned long   sw;
+       unsigned long   tag;
+       unsigned long   ipoff;
+       unsigned long   cssel;
+       unsigned long   dataoff;
+       unsigned long   datasel;
+       struct _fpreg   _st[8];
+       unsigned short  status;
+       unsigned short  magic;          /* 0xffff = regular FPU data only */
+
+       /* FXSR FPU environment */
+       unsigned long   _fxsr_env[6];   /* FXSR FPU env is ignored */
+       unsigned long   mxcsr;
+       unsigned long   reserved;
+       struct _fpxreg  _fxsr_st[8];    /* FXSR FPU reg data is ignored */
+       struct _xmmreg  _xmm[8];
+       unsigned long   padding[56];
+};
+
+#define X86_FXSR_MAGIC         0x0000
+
+struct sigcontext {
+       unsigned short gs, __gsh;
+       unsigned short fs, __fsh;
+       unsigned short es, __esh;
+       unsigned short ds, __dsh;
+       unsigned long edi;
+       unsigned long esi;
+       unsigned long ebp;
+       unsigned long esp;
+       unsigned long ebx;
+       unsigned long edx;
+       unsigned long ecx;
+       unsigned long eax;
+       unsigned long trapno;
+       unsigned long err;
+       unsigned long eip;
+       unsigned short cs, __csh;
+       unsigned long eflags;
+       unsigned long esp_at_signal;
+       unsigned short ss, __ssh;
+       struct _fpstate * fpstate;
+       unsigned long oldmask;
+       unsigned long cr2;
+};
+
+
+#endif
diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/siginfo.h b/xenolinux-2.4.16-sparse/include/asm-xeno/siginfo.h
new file mode 100644 (file)
index 0000000..9abf542
--- /dev/null
@@ -0,0 +1,232 @@
+#ifndef _I386_SIGINFO_H
+#define _I386_SIGINFO_H
+
+#include <linux/types.h>
+
+/* XXX: This structure was copied from the Alpha; is there an iBCS version?  */
+
+typedef union sigval {
+       int sival_int;
+       void *sival_ptr;
+} sigval_t;
+
+#define SI_MAX_SIZE    128
+#define SI_PAD_SIZE    ((SI_MAX_SIZE/sizeof(int)) - 3)
+
+typedef struct siginfo {
+       int si_signo;
+       int si_errno;
+       int si_code;
+
+       union {
+               int _pad[SI_PAD_SIZE];
+
+               /* kill() */
+               struct {
+                       pid_t _pid;             /* sender's pid */
+                       uid_t _uid;             /* sender's uid */
+               } _kill;
+
+               /* POSIX.1b timers */
+               struct {
+                       unsigned int _timer1;
+                       unsigned int _timer2;
+               } _timer;
+
+               /* POSIX.1b signals */
+               struct {
+                       pid_t _pid;             /* sender's pid */
+                       uid_t _uid;             /* sender's uid */
+                       sigval_t _sigval;
+               } _rt;
+
+               /* SIGCHLD */
+               struct {
+                       pid_t _pid;             /* which child */
+                       uid_t _uid;             /* sender's uid */
+                       int _status;            /* exit code */
+                       clock_t _utime;
+                       clock_t _stime;
+               } _sigchld;
+
+               /* SIGILL, SIGFPE, SIGSEGV, SIGBUS */
+               struct {
+                       void *_addr; /* faulting insn/memory ref. */
+               } _sigfault;
+
+               /* SIGPOLL */
+               struct {
+                       int _band;      /* POLL_IN, POLL_OUT, POLL_MSG */
+                       int _fd;
+               } _sigpoll;
+       } _sifields;
+} siginfo_t;
+
+/*
+ * How these fields are to be accessed.
+ */
+#define si_pid         _sifields._kill._pid
+#define si_uid         _sifields._kill._uid
+#define si_status      _sifields._sigchld._status
+#define si_utime       _sifields._sigchld._utime
+#define si_stime       _sifields._sigchld._stime
+#define si_value       _sifields._rt._sigval
+#define si_int         _sifields._rt._sigval.sival_int
+#define si_ptr         _sifields._rt._sigval.sival_ptr
+#define si_addr                _sifields._sigfault._addr
+#define si_band                _sifields._sigpoll._band
+#define si_fd          _sifields._sigpoll._fd
+
+#ifdef __KERNEL__
+#define __SI_MASK      0xffff0000
+#define __SI_KILL      (0 << 16)
+#define __SI_TIMER     (1 << 16)
+#define __SI_POLL      (2 << 16)
+#define __SI_FAULT     (3 << 16)
+#define __SI_CHLD      (4 << 16)
+#define __SI_RT                (5 << 16)
+#define __SI_CODE(T,N) ((T) << 16 | ((N) & 0xffff))
+#else
+#define __SI_KILL      0
+#define __SI_TIMER     0
+#define __SI_POLL      0
+#define __SI_FAULT     0
+#define __SI_CHLD      0
+#define __SI_RT                0
+#define __SI_CODE(T,N) (N)
+#endif
+
+/*
+ * si_code values
+ * Digital reserves positive values for kernel-generated signals.
+ */
+#define SI_USER                0               /* sent by kill, sigsend, raise */
+#define SI_KERNEL      0x80            /* sent by the kernel from somewhere */
+#define SI_QUEUE       -1              /* sent by sigqueue */
+#define SI_TIMER __SI_CODE(__SI_TIMER,-2) /* sent by timer expiration */
+#define SI_MESGQ       -3              /* sent by real time mesq state change */
+#define SI_ASYNCIO     -4              /* sent by AIO completion */
+#define SI_SIGIO       -5              /* sent by queued SIGIO */
+
+#define SI_FROMUSER(siptr)     ((siptr)->si_code <= 0)
+#define SI_FROMKERNEL(siptr)   ((siptr)->si_code > 0)
+
+/*
+ * SIGILL si_codes
+ */
+#define ILL_ILLOPC     (__SI_FAULT|1)  /* illegal opcode */
+#define ILL_ILLOPN     (__SI_FAULT|2)  /* illegal operand */
+#define ILL_ILLADR     (__SI_FAULT|3)  /* illegal addressing mode */
+#define ILL_ILLTRP     (__SI_FAULT|4)  /* illegal trap */
+#define ILL_PRVOPC     (__SI_FAULT|5)  /* privileged opcode */
+#define ILL_PRVREG     (__SI_FAULT|6)  /* privileged register */
+#define ILL_COPROC     (__SI_FAULT|7)  /* coprocessor error */
+#define ILL_BADSTK     (__SI_FAULT|8)  /* internal stack error */
+#define NSIGILL                8
+
+/*
+ * SIGFPE si_codes
+ */
+#define FPE_INTDIV     (__SI_FAULT|1)  /* integer divide by zero */
+#define FPE_INTOVF     (__SI_FAULT|2)  /* integer overflow */
+#define FPE_FLTDIV     (__SI_FAULT|3)  /* floating point divide by zero */
+#define FPE_FLTOVF     (__SI_FAULT|4)  /* floating point overflow */
+#define FPE_FLTUND     (__SI_FAULT|5)  /* floating point underflow */
+#define FPE_FLTRES     (__SI_FAULT|6)  /* floating point inexact result */
+#define FPE_FLTINV     (__SI_FAULT|7)  /* floating point invalid operation */
+#define FPE_FLTSUB     (__SI_FAULT|8)  /* subscript out of range */
+#define NSIGFPE                8
+
+/*
+ * SIGSEGV si_codes
+ */
+#define SEGV_MAPERR    (__SI_FAULT|1)  /* address not mapped to object */
+#define SEGV_ACCERR    (__SI_FAULT|2)  /* invalid permissions for mapped object */
+#define NSIGSEGV       2
+
+/*
+ * SIGBUS si_codes
+ */
+#define BUS_ADRALN     (__SI_FAULT|1)  /* invalid address alignment */
+#define BUS_ADRERR     (__SI_FAULT|2)  /* non-existant physical address */
+#define BUS_OBJERR     (__SI_FAULT|3)  /* object specific hardware error */
+#define NSIGBUS                3
+
+/*
+ * SIGTRAP si_codes
+ */
+#define TRAP_BRKPT     (__SI_FAULT|1)  /* process breakpoint */
+#define TRAP_TRACE     (__SI_FAULT|2)  /* process trace trap */
+#define NSIGTRAP       2
+
+/*
+ * SIGCHLD si_codes
+ */
+#define CLD_EXITED     (__SI_CHLD|1)   /* child has exited */
+#define CLD_KILLED     (__SI_CHLD|2)   /* child was killed */
+#define CLD_DUMPED     (__SI_CHLD|3)   /* child terminated abnormally */
+#define CLD_TRAPPED    (__SI_CHLD|4)   /* traced child has trapped */
+#define CLD_STOPPED    (__SI_CHLD|5)   /* child has stopped */
+#define CLD_CONTINUED  (__SI_CHLD|6)   /* stopped child has continued */
+#define NSIGCHLD       6
+
+/*
+ * SIGPOLL si_codes
+ */
+#define POLL_IN                (__SI_POLL|1)   /* data input available */
+#define POLL_OUT       (__SI_POLL|2)   /* output buffers available */
+#define POLL_MSG       (__SI_POLL|3)   /* input message available */
+#define POLL_ERR       (__SI_POLL|4)   /* i/o error */
+#define POLL_PRI       (__SI_POLL|5)   /* high priority input available */
+#define POLL_HUP       (__SI_POLL|6)   /* device disconnected */
+#define NSIGPOLL       6
+
+/*
+ * sigevent definitions
+ * 
+ * It seems likely that SIGEV_THREAD will have to be handled from 
+ * userspace, libpthread transmuting it to SIGEV_SIGNAL, which the
+ * thread manager then catches and does the appropriate nonsense.
+ * However, everything is written out here so as to not get lost.
+ */
+#define SIGEV_SIGNAL   0       /* notify via signal */
+#define SIGEV_NONE     1       /* other notification: meaningless */
+#define SIGEV_THREAD   2       /* deliver via thread creation */
+
+#define SIGEV_MAX_SIZE 64
+#define SIGEV_PAD_SIZE ((SIGEV_MAX_SIZE/sizeof(int)) - 3)
+
+typedef struct sigevent {
+       sigval_t sigev_value;
+       int sigev_signo;
+       int sigev_notify;
+       union {
+               int _pad[SIGEV_PAD_SIZE];
+
+               struct {
+                       void (*_function)(sigval_t);
+                       void *_attribute;       /* really pthread_attr_t */
+               } _sigev_thread;
+       } _sigev_un;
+} sigevent_t;
+
+#define sigev_notify_function  _sigev_un._sigev_thread._function
+#define sigev_notify_attributes        _sigev_un._sigev_thread._attribute
+
+#ifdef __KERNEL__
+#include <linux/string.h>
+
+static inline void copy_siginfo(siginfo_t *to, siginfo_t *from)
+{
+       if (from->si_code < 0)
+               memcpy(to, from, sizeof(siginfo_t));
+       else
+               /* _sigchld is currently the largest know union member */
+               memcpy(to, from, 3*sizeof(int) + sizeof(from->_sifields._sigchld));
+}
+
+extern int copy_siginfo_to_user(siginfo_t *to, siginfo_t *from);
+
+#endif /* __KERNEL__ */
+
+#endif
diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/signal.h b/xenolinux-2.4.16-sparse/include/asm-xeno/signal.h
new file mode 100644 (file)
index 0000000..8740d4e
--- /dev/null
@@ -0,0 +1,221 @@
+#ifndef _ASMi386_SIGNAL_H
+#define _ASMi386_SIGNAL_H
+
+#include <linux/types.h>
+
+/* Avoid too many header ordering problems.  */
+struct siginfo;
+
+#ifdef __KERNEL__
+/* Most things should be clean enough to redefine this at will, if care
+   is taken to make libc match.  */
+
+#define _NSIG          64
+#define _NSIG_BPW      32
+#define _NSIG_WORDS    (_NSIG / _NSIG_BPW)
+
+typedef unsigned long old_sigset_t;            /* at least 32 bits */
+
+typedef struct {
+       unsigned long sig[_NSIG_WORDS];
+} sigset_t;
+
+#else
+/* Here we must cater to libcs that poke about in kernel headers.  */
+
+#define NSIG           32
+typedef unsigned long sigset_t;
+
+#endif /* __KERNEL__ */
+
+#define SIGHUP          1
+#define SIGINT          2
+#define SIGQUIT                 3
+#define SIGILL          4
+#define SIGTRAP                 5
+#define SIGABRT                 6
+#define SIGIOT          6
+#define SIGBUS          7
+#define SIGFPE          8
+#define SIGKILL                 9
+#define SIGUSR1                10
+#define SIGSEGV                11
+#define SIGUSR2                12
+#define SIGPIPE                13
+#define SIGALRM                14
+#define SIGTERM                15
+#define SIGSTKFLT      16
+#define SIGCHLD                17
+#define SIGCONT                18
+#define SIGSTOP                19
+#define SIGTSTP                20
+#define SIGTTIN                21
+#define SIGTTOU                22
+#define SIGURG         23
+#define SIGXCPU                24
+#define SIGXFSZ                25
+#define SIGVTALRM      26
+#define SIGPROF                27
+#define SIGWINCH       28
+#define SIGIO          29
+#define SIGPOLL                SIGIO
+/*
+#define SIGLOST                29
+*/
+#define SIGPWR         30
+#define SIGSYS         31
+#define        SIGUNUSED       31
+
+/* These should not be considered constants from userland.  */
+#define SIGRTMIN       32
+#define SIGRTMAX       (_NSIG-1)
+
+/*
+ * SA_FLAGS values:
+ *
+ * SA_ONSTACK indicates that a registered stack_t will be used.
+ * SA_INTERRUPT is a no-op, but left due to historical reasons. Use the
+ * SA_RESTART flag to get restarting signals (which were the default long ago)
+ * SA_NOCLDSTOP flag to turn off SIGCHLD when children stop.
+ * SA_RESETHAND clears the handler when the signal is delivered.
+ * SA_NOCLDWAIT flag on SIGCHLD to inhibit zombies.
+ * SA_NODEFER prevents the current signal from being masked in the handler.
+ *
+ * SA_ONESHOT and SA_NOMASK are the historical Linux names for the Single
+ * Unix names RESETHAND and NODEFER respectively.
+ */
+#define SA_NOCLDSTOP   0x00000001
+#define SA_NOCLDWAIT   0x00000002 /* not supported yet */
+#define SA_SIGINFO     0x00000004
+#define SA_ONSTACK     0x08000000
+#define SA_RESTART     0x10000000
+#define SA_NODEFER     0x40000000
+#define SA_RESETHAND   0x80000000
+
+#define SA_NOMASK      SA_NODEFER
+#define SA_ONESHOT     SA_RESETHAND
+#define SA_INTERRUPT   0x20000000 /* dummy -- ignored */
+
+#define SA_RESTORER    0x04000000
+
+/* 
+ * sigaltstack controls
+ */
+#define SS_ONSTACK     1
+#define SS_DISABLE     2
+
+#define MINSIGSTKSZ    2048
+#define SIGSTKSZ       8192
+
+#ifdef __KERNEL__
+
+/*
+ * These values of sa_flags are used only by the kernel as part of the
+ * irq handling routines.
+ *
+ * SA_INTERRUPT is also used by the irq handling routines.
+ * SA_SHIRQ is for shared interrupt support on PCI and EISA.
+ */
+#define SA_PROBE               SA_ONESHOT
+#define SA_SAMPLE_RANDOM       SA_RESTART
+#define SA_SHIRQ               0x04000000
+#endif
+
+#define SIG_BLOCK          0   /* for blocking signals */
+#define SIG_UNBLOCK        1   /* for unblocking signals */
+#define SIG_SETMASK        2   /* for setting the signal mask */
+
+/* Type of a signal handler.  */
+typedef void (*__sighandler_t)(int);
+
+#define SIG_DFL        ((__sighandler_t)0)     /* default signal handling */
+#define SIG_IGN        ((__sighandler_t)1)     /* ignore signal */
+#define SIG_ERR        ((__sighandler_t)-1)    /* error return from signal */
+
+#ifdef __KERNEL__
+struct old_sigaction {
+       __sighandler_t sa_handler;
+       old_sigset_t sa_mask;
+       unsigned long sa_flags;
+       void (*sa_restorer)(void);
+};
+
+struct sigaction {
+       __sighandler_t sa_handler;
+       unsigned long sa_flags;
+       void (*sa_restorer)(void);
+       sigset_t sa_mask;               /* mask last for extensibility */
+};
+
+struct k_sigaction {
+       struct sigaction sa;
+};
+#else
+/* Here we must cater to libcs that poke about in kernel headers.  */
+
+struct sigaction {
+       union {
+         __sighandler_t _sa_handler;
+         void (*_sa_sigaction)(int, struct siginfo *, void *);
+       } _u;
+       sigset_t sa_mask;
+       unsigned long sa_flags;
+       void (*sa_restorer)(void);
+};
+
+#define sa_handler     _u._sa_handler
+#define sa_sigaction   _u._sa_sigaction
+
+#endif /* __KERNEL__ */
+
+typedef struct sigaltstack {
+       void *ss_sp;
+       int ss_flags;
+       size_t ss_size;
+} stack_t;
+
+#ifdef __KERNEL__
+#include <asm/sigcontext.h>
+
+#define __HAVE_ARCH_SIG_BITOPS
+
+static __inline__ void sigaddset(sigset_t *set, int _sig)
+{
+       __asm__("btsl %1,%0" : "=m"(*set) : "Ir"(_sig - 1) : "cc");
+}
+
+static __inline__ void sigdelset(sigset_t *set, int _sig)
+{
+       __asm__("btrl %1,%0" : "=m"(*set) : "Ir"(_sig - 1) : "cc");
+}
+
+static __inline__ int __const_sigismember(sigset_t *set, int _sig)
+{
+       unsigned long sig = _sig - 1;
+       return 1 & (set->sig[sig / _NSIG_BPW] >> (sig % _NSIG_BPW));
+}
+
+static __inline__ int __gen_sigismember(sigset_t *set, int _sig)
+{
+       int ret;
+       __asm__("btl %2,%1\n\tsbbl %0,%0"
+               : "=r"(ret) : "m"(*set), "Ir"(_sig-1) : "cc");
+       return ret;
+}
+
+#define sigismember(set,sig)                   \
+       (__builtin_constant_p(sig) ?            \
+        __const_sigismember((set),(sig)) :     \
+        __gen_sigismember((set),(sig)))
+
+#define sigmask(sig)   (1UL << ((sig) - 1))
+
+static __inline__ int sigfindinword(unsigned long word)
+{
+       __asm__("bsfl %1,%0" : "=r"(word) : "rm"(word) : "cc");
+       return word;
+}
+
+#endif /* __KERNEL__ */
+
+#endif
diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/smp.h b/xenolinux-2.4.16-sparse/include/asm-xeno/smp.h
new file mode 100644 (file)
index 0000000..e485ca0
--- /dev/null
@@ -0,0 +1,113 @@
+#ifndef __ASM_SMP_H
+#define __ASM_SMP_H
+
+/*
+ * We need the APIC definitions automatically as part of 'smp.h'
+ */
+#ifndef __ASSEMBLY__
+#include <linux/config.h>
+#include <linux/threads.h>
+#include <linux/ptrace.h>
+#endif
+
+#ifdef CONFIG_X86_LOCAL_APIC
+#ifndef __ASSEMBLY__
+#include <asm/fixmap.h>
+#include <asm/bitops.h>
+#include <asm/mpspec.h>
+#ifdef CONFIG_X86_IO_APIC
+#include <asm/io_apic.h>
+#endif
+#include <asm/apic.h>
+#endif
+#endif
+
+#ifdef CONFIG_SMP
+#  define TARGET_CPUS cpu_online_map
+#  define INT_DELIVERY_MODE 1     /* logical delivery broadcast to all procs */
+#else
+# define INT_DELIVERY_MODE 1     /* logical delivery */
+# define TARGET_CPUS 0x01
+#endif
+
+#ifndef clustered_apic_mode
+  #define clustered_apic_mode (0)
+  #define esr_disable (0)
+#endif 
+
+#ifdef CONFIG_SMP
+#ifndef __ASSEMBLY__
+
+/*
+ * Private routines/data
+ */
+extern void smp_alloc_memory(void);
+extern unsigned long phys_cpu_present_map;
+extern unsigned long cpu_online_map;
+extern volatile unsigned long smp_invalidate_needed;
+extern int pic_mode;
+extern void smp_flush_tlb(void);
+extern void smp_message_irq(int cpl, void *dev_id, struct pt_regs *regs);
+extern void smp_send_reschedule(int cpu);
+extern void smp_invalidate_rcv(void);          /* Process an NMI */
+extern void (*mtrr_hook) (void);
+extern void zap_low_mappings (void);
+
+/*
+ * On x86 all CPUs are mapped 1:1 to the APIC space.
+ * This simplifies scheduling and IPI sending and
+ * compresses data structures.
+ */
+static inline int cpu_logical_map(int cpu)
+{
+       return cpu;
+}
+static inline int cpu_number_map(int cpu)
+{
+       return cpu;
+}
+
+/*
+ * Some lowlevel functions might want to know about
+ * the real APIC ID <-> CPU # mapping.
+ */
+#define MAX_APICID 256
+extern volatile int cpu_to_physical_apicid[NR_CPUS];
+extern volatile int physical_apicid_to_cpu[MAX_APICID];
+extern volatile int cpu_to_logical_apicid[NR_CPUS];
+extern volatile int logical_apicid_to_cpu[MAX_APICID];
+
+/*
+ * General functions that each host system must provide.
+ */
+extern void smp_boot_cpus(void);
+extern void smp_store_cpu_info(int id);                /* Store per CPU info (like the initial udelay numbers */
+
+/*
+ * This function is needed by all SMP systems. It must _always_ be valid
+ * from the initial startup. We map APIC_BASE very early in page_setup(),
+ * so this is correct in the x86 case.
+ */
+
+#define smp_processor_id() (current->processor)
+
+#endif /* !__ASSEMBLY__ */
+
+#define NO_PROC_ID             0xFF            /* No processor magic marker */
+
+/*
+ *     This magic constant controls our willingness to transfer
+ *     a process across CPUs. Such a transfer incurs misses on the L1
+ *     cache, and on a P6 or P5 with multiple L2 caches L2 hits. My
+ *     gut feeling is this will vary by board in value. For a board
+ *     with separate L2 cache it probably depends also on the RSS, and
+ *     for a board with shared L2 cache it ought to decay fast as other
+ *     processes are run.
+ */
+#define PROC_CHANGE_PENALTY    15              /* Schedule penalty */
+
+#endif
+#endif
diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/smplock.h b/xenolinux-2.4.16-sparse/include/asm-xeno/smplock.h
new file mode 100644 (file)
index 0000000..864351c
--- /dev/null
@@ -0,0 +1,75 @@
+/*
+ * <asm/smplock.h>
+ *
+ * i386 SMP lock implementation
+ */
+#include <linux/interrupt.h>
+#include <linux/spinlock.h>
+#include <linux/sched.h>
+#include <asm/current.h>
+
+extern spinlock_t kernel_flag;
+
+#define kernel_locked()                spin_is_locked(&kernel_flag)
+
+/*
+ * Release global kernel lock and global interrupt lock
+ */
+#define release_kernel_lock(task, cpu) \
+do { \
+       if (task->lock_depth >= 0) \
+               spin_unlock(&kernel_flag); \
+       release_irqlock(cpu); \
+       __sti(); \
+} while (0)
+
+/*
+ * Re-acquire the kernel lock
+ */
+#define reacquire_kernel_lock(task) \
+do { \
+       if (task->lock_depth >= 0) \
+               spin_lock(&kernel_flag); \
+} while (0)
+
+
+/*
+ * Getting the big kernel lock.
+ *
+ * This cannot happen asynchronously,
+ * so we only need to worry about other
+ * CPU's.
+ */
+static __inline__ void lock_kernel(void)
+{
+#if 1
+       if (!++current->lock_depth)
+               spin_lock(&kernel_flag);
+#else
+       __asm__ __volatile__(
+               "incl %1\n\t"
+               "jne 9f"
+               spin_lock_string
+               "\n9:"
+               :"=m" (__dummy_lock(&kernel_flag)),
+                "=m" (current->lock_depth));
+#endif
+}
+
+static __inline__ void unlock_kernel(void)
+{
+       if (current->lock_depth < 0)
+               BUG();
+#if 1
+       if (--current->lock_depth < 0)
+               spin_unlock(&kernel_flag);
+#else
+       __asm__ __volatile__(
+               "decl %1\n\t"
+               "jns 9f\n\t"
+               spin_unlock_string
+               "\n9:"
+               :"=m" (__dummy_lock(&kernel_flag)),
+                "=m" (current->lock_depth));
+#endif
+}
diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/socket.h b/xenolinux-2.4.16-sparse/include/asm-xeno/socket.h
new file mode 100644 (file)
index 0000000..fbcc44d
--- /dev/null
@@ -0,0 +1,64 @@
+#ifndef _ASM_SOCKET_H
+#define _ASM_SOCKET_H
+
+#include <asm/sockios.h>
+
+/* For setsockoptions(2) */
+#define SOL_SOCKET     1
+
+#define SO_DEBUG       1
+#define SO_REUSEADDR   2
+#define SO_TYPE                3
+#define SO_ERROR       4
+#define SO_DONTROUTE   5
+#define SO_BROADCAST   6
+#define SO_SNDBUF      7
+#define SO_RCVBUF      8
+#define SO_KEEPALIVE   9
+#define SO_OOBINLINE   10
+#define SO_NO_CHECK    11
+#define SO_PRIORITY    12
+#define SO_LINGER      13
+#define SO_BSDCOMPAT   14
+/* To add :#define SO_REUSEPORT 15 */
+#define SO_PASSCRED    16
+#define SO_PEERCRED    17
+#define SO_RCVLOWAT    18
+#define SO_SNDLOWAT    19
+#define SO_RCVTIMEO    20
+#define SO_SNDTIMEO    21
+
+/* Security levels - as per NRL IPv6 - don't actually do anything */
+#define SO_SECURITY_AUTHENTICATION             22
+#define SO_SECURITY_ENCRYPTION_TRANSPORT       23
+#define SO_SECURITY_ENCRYPTION_NETWORK         24
+
+#define SO_BINDTODEVICE        25
+
+/* Socket filtering */
+#define SO_ATTACH_FILTER        26
+#define SO_DETACH_FILTER        27
+
+#define SO_PEERNAME            28
+#define SO_TIMESTAMP           29
+#define SCM_TIMESTAMP          SO_TIMESTAMP
+
+#define SO_ACCEPTCONN          30
+
+/* Nasty libc5 fixup - bletch */
+#if defined(__KERNEL__) || !defined(__GLIBC__) || (__GLIBC__ < 2)
+/* Socket types. */
+#define SOCK_STREAM    1               /* stream (connection) socket   */
+#define SOCK_DGRAM     2               /* datagram (conn.less) socket  */
+#define SOCK_RAW       3               /* raw socket                   */
+#define SOCK_RDM       4               /* reliably-delivered message   */
+#define SOCK_SEQPACKET 5               /* sequential packet socket     */
+#define SOCK_PACKET    10              /* linux specific way of        */
+                                       /* getting packets at the dev   */
+                                       /* level.  For writing rarp and */
+                                       /* other similar things on the  */
+                                       /* user level.                  */
+#define        SOCK_MAX        (SOCK_PACKET+1)
+#endif
+
+#endif /* _ASM_SOCKET_H */
diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/sockios.h b/xenolinux-2.4.16-sparse/include/asm-xeno/sockios.h
new file mode 100644 (file)
index 0000000..6b747f8
--- /dev/null
@@ -0,0 +1,12 @@
+#ifndef __ARCH_I386_SOCKIOS__
+#define __ARCH_I386_SOCKIOS__
+
+/* Socket-level I/O control calls. */
+#define FIOSETOWN      0x8901
+#define SIOCSPGRP      0x8902
+#define FIOGETOWN      0x8903
+#define SIOCGPGRP      0x8904
+#define SIOCATMARK     0x8905
+#define SIOCGSTAMP     0x8906          /* Get stamp */
+
+#endif
diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/softirq.h b/xenolinux-2.4.16-sparse/include/asm-xeno/softirq.h
new file mode 100644 (file)
index 0000000..2542244
--- /dev/null
@@ -0,0 +1,48 @@
+#ifndef __ASM_SOFTIRQ_H
+#define __ASM_SOFTIRQ_H
+
+#include <asm/atomic.h>
+#include <asm/hardirq.h>
+
+#define __cpu_bh_enable(cpu) \
+               do { barrier(); local_bh_count(cpu)--; } while (0)
+#define cpu_bh_disable(cpu) \
+               do { local_bh_count(cpu)++; barrier(); } while (0)
+
+#define local_bh_disable()     cpu_bh_disable(smp_processor_id())
+#define __local_bh_enable()    __cpu_bh_enable(smp_processor_id())
+
+#define in_softirq() (local_bh_count(smp_processor_id()) != 0)
+
+/*
+ * NOTE: this assembly code assumes:
+ *
+ *    (char *)&local_bh_count - 8 == (char *)&softirq_pending
+ *
+ * If you change the offsets in irq_stat then you have to
+ * update this code as well.
+ */
+#define local_bh_enable()                                              \
+do {                                                                   \
+       unsigned int *ptr = &local_bh_count(smp_processor_id());        \
+                                                                       \
+       barrier();                                                      \
+       if (!--*ptr)                                                    \
+               __asm__ __volatile__ (                                  \
+                       "cmpl $0, -8(%0);"                              \
+                       "jnz 2f;"                                       \
+                       "1:;"                                           \
+                                                                       \
+                       ".section .text.lock,\"ax\";"                   \
+                       "2: pushl %%eax; pushl %%ecx; pushl %%edx;"     \
+                       "call %c1;"                                     \
+                       "popl %%edx; popl %%ecx; popl %%eax;"           \
+                       "jmp 1b;"                                       \
+                       ".previous;"                                    \
+                                                                       \
+               : /* no output */                                       \
+               : "r" (ptr), "i" (do_softirq)                           \
+               /* no registers clobbered */ );                         \
+} while (0)
+
+#endif /* __ASM_SOFTIRQ_H */
diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/spinlock.h b/xenolinux-2.4.16-sparse/include/asm-xeno/spinlock.h
new file mode 100644 (file)
index 0000000..dbdd68b
--- /dev/null
@@ -0,0 +1,212 @@
+#ifndef __ASM_SPINLOCK_H
+#define __ASM_SPINLOCK_H
+
+#include <asm/atomic.h>
+#include <asm/rwlock.h>
+#include <asm/page.h>
+#include <linux/config.h>
+
+extern int printk(const char * fmt, ...)
+       __attribute__ ((format (printf, 1, 2)));
+
+/* It seems that people are forgetting to
+ * initialize their spinlocks properly, tsk tsk.
+ * Remember to turn this off in 2.4. -ben
+ */
+#if defined(CONFIG_DEBUG_SPINLOCK)
+#define SPINLOCK_DEBUG 1
+#else
+#define SPINLOCK_DEBUG 0
+#endif
+
+/*
+ * Your basic SMP spinlocks, allowing only a single CPU anywhere
+ */
+
+typedef struct {
+       volatile unsigned int lock;
+#if SPINLOCK_DEBUG
+       unsigned magic;
+#endif
+} spinlock_t;
+
+#define SPINLOCK_MAGIC 0xdead4ead
+
+#if SPINLOCK_DEBUG
+#define SPINLOCK_MAGIC_INIT    , SPINLOCK_MAGIC
+#else
+#define SPINLOCK_MAGIC_INIT    /* */
+#endif
+
+#define SPIN_LOCK_UNLOCKED (spinlock_t) { 1 SPINLOCK_MAGIC_INIT }
+
+#define spin_lock_init(x)      do { *(x) = SPIN_LOCK_UNLOCKED; } while(0)
+
+/*
+ * Simple spin lock operations.  There are two variants, one clears IRQ's
+ * on the local processor, one does not.
+ *
+ * We make no fairness assumptions. They have a cost.
+ */
+
+#define spin_is_locked(x)      (*(volatile char *)(&(x)->lock) <= 0)
+#define spin_unlock_wait(x)    do { barrier(); } while(spin_is_locked(x))
+
+#define spin_lock_string \
+       "\n1:\t" \
+       "lock ; decb %0\n\t" \
+       "js 2f\n" \
+       ".section .text.lock,\"ax\"\n" \
+       "2:\t" \
+       "cmpb $0,%0\n\t" \
+       "rep;nop\n\t" \
+       "jle 2b\n\t" \
+       "jmp 1b\n" \
+       ".previous"
+
+/*
+ * This works. Despite all the confusion.
+ * (except on PPro SMP or if we are using OOSTORE)
+ * (PPro errata 66, 92)
+ */
+#if !defined(CONFIG_X86_OOSTORE) && !defined(CONFIG_X86_PPRO_FENCE)
+
+#define spin_unlock_string \
+       "movb $1,%0" \
+               :"=m" (lock->lock) : : "memory"
+
+
+static inline void spin_unlock(spinlock_t *lock)
+{
+#if SPINLOCK_DEBUG
+       if (lock->magic != SPINLOCK_MAGIC)
+               BUG();
+       if (!spin_is_locked(lock))
+               BUG();
+#endif
+       __asm__ __volatile__(
+               spin_unlock_string
+       );
+}
+
+#else
+
+#define spin_unlock_string \
+       "xchgb %b0, %1" \
+               :"=q" (oldval), "=m" (lock->lock) \
+               :"0" (oldval) : "memory"
+
+static inline void spin_unlock(spinlock_t *lock)
+{
+       char oldval = 1;
+#if SPINLOCK_DEBUG
+       if (lock->magic != SPINLOCK_MAGIC)
+               BUG();
+       if (!spin_is_locked(lock))
+               BUG();
+#endif
+       __asm__ __volatile__(
+               spin_unlock_string
+       );
+}
+
+#endif
+
+static inline int spin_trylock(spinlock_t *lock)
+{
+       char oldval;
+       __asm__ __volatile__(
+               "xchgb %b0,%1"
+               :"=q" (oldval), "=m" (lock->lock)
+               :"0" (0) : "memory");
+       return oldval > 0;
+}
+
+static inline void spin_lock(spinlock_t *lock)
+{
+#if SPINLOCK_DEBUG
+       __label__ here;
+here:
+       if (lock->magic != SPINLOCK_MAGIC) {
+printk("eip: %p\n", &&here);
+               BUG();
+       }
+#endif
+       __asm__ __volatile__(
+               spin_lock_string
+               :"=m" (lock->lock) : : "memory");
+}
+
+
+/*
+ * Read-write spinlocks, allowing multiple readers
+ * but only one writer.
+ *
+ * NOTE! it is quite common to have readers in interrupts
+ * but no interrupt writers. For those circumstances we
+ * can "mix" irq-safe locks - any writer needs to get a
+ * irq-safe write-lock, but readers can get non-irqsafe
+ * read-locks.
+ */
+typedef struct {
+       volatile unsigned int lock;
+#if SPINLOCK_DEBUG
+       unsigned magic;
+#endif
+} rwlock_t;
+
+#define RWLOCK_MAGIC   0xdeaf1eed
+
+#if SPINLOCK_DEBUG
+#define RWLOCK_MAGIC_INIT      , RWLOCK_MAGIC
+#else
+#define RWLOCK_MAGIC_INIT      /* */
+#endif
+
+#define RW_LOCK_UNLOCKED (rwlock_t) { RW_LOCK_BIAS RWLOCK_MAGIC_INIT }
+
+#define rwlock_init(x) do { *(x) = RW_LOCK_UNLOCKED; } while(0)
+
+/*
+ * On x86, we implement read-write locks as a 32-bit counter
+ * with the high bit (sign) being the "contended" bit.
+ *
+ * The inline assembly is non-obvious. Think about it.
+ *
+ * Changed to use the same technique as rw semaphores.  See
+ * semaphore.h for details.  -ben
+ */
+/* the spinlock helpers are in arch/i386/kernel/semaphore.c */
+
+static inline void read_lock(rwlock_t *rw)
+{
+#if SPINLOCK_DEBUG
+       if (rw->magic != RWLOCK_MAGIC)
+               BUG();
+#endif
+       __build_read_lock(rw, "__read_lock_failed");
+}
+
+static inline void write_lock(rwlock_t *rw)
+{
+#if SPINLOCK_DEBUG
+       if (rw->magic != RWLOCK_MAGIC)
+               BUG();
+#endif
+       __build_write_lock(rw, "__write_lock_failed");
+}
+
+#define read_unlock(rw)                asm volatile("lock ; incl %0" :"=m" ((rw)->lock) : : "memory")
+#define write_unlock(rw)       asm volatile("lock ; addl $" RW_LOCK_BIAS_STR ",%0":"=m" ((rw)->lock) : : "memory")
+
+static inline int write_trylock(rwlock_t *lock)
+{
+       atomic_t *count = (atomic_t *)lock;
+       if (atomic_sub_and_test(RW_LOCK_BIAS, count))
+               return 1;
+       atomic_add(RW_LOCK_BIAS, count);
+       return 0;
+}
+
+#endif /* __ASM_SPINLOCK_H */
diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/stat.h b/xenolinux-2.4.16-sparse/include/asm-xeno/stat.h
new file mode 100644 (file)
index 0000000..ef16311
--- /dev/null
@@ -0,0 +1,78 @@
+#ifndef _I386_STAT_H
+#define _I386_STAT_H
+
+struct __old_kernel_stat {
+       unsigned short st_dev;
+       unsigned short st_ino;
+       unsigned short st_mode;
+       unsigned short st_nlink;
+       unsigned short st_uid;
+       unsigned short st_gid;
+       unsigned short st_rdev;
+       unsigned long  st_size;
+       unsigned long  st_atime;
+       unsigned long  st_mtime;
+       unsigned long  st_ctime;
+};
+
+struct stat {
+       unsigned short st_dev;
+       unsigned short __pad1;
+       unsigned long st_ino;
+       unsigned short st_mode;
+       unsigned short st_nlink;
+       unsigned short st_uid;
+       unsigned short st_gid;
+       unsigned short st_rdev;
+       unsigned short __pad2;
+       unsigned long  st_size;
+       unsigned long  st_blksize;
+       unsigned long  st_blocks;
+       unsigned long  st_atime;
+       unsigned long  __unused1;
+       unsigned long  st_mtime;
+       unsigned long  __unused2;
+       unsigned long  st_ctime;
+       unsigned long  __unused3;
+       unsigned long  __unused4;
+       unsigned long  __unused5;
+};
+
+/* This matches struct stat64 in glibc2.1, hence the absolutely
+ * insane amounts of padding around dev_t's.
+ */
+struct stat64 {
+       unsigned short  st_dev;
+       unsigned char   __pad0[10];
+
+#define STAT64_HAS_BROKEN_ST_INO       1
+       unsigned long   __st_ino;
+
+       unsigned int    st_mode;
+       unsigned int    st_nlink;
+
+       unsigned long   st_uid;
+       unsigned long   st_gid;
+
+       unsigned short  st_rdev;
+       unsigned char   __pad3[10];
+
+       long long       st_size;
+       unsigned long   st_blksize;
+
+       unsigned long   st_blocks;      /* Number 512-byte blocks allocated. */
+       unsigned long   __pad4;         /* future possible st_blocks high bits */
+
+       unsigned long   st_atime;
+       unsigned long   __pad5;
+
+       unsigned long   st_mtime;
+       unsigned long   __pad6;
+
+       unsigned long   st_ctime;
+       unsigned long   __pad7;         /* will be high 32 bits of ctime someday */
+
+       unsigned long long      st_ino;
+};
+
+#endif
diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/statfs.h b/xenolinux-2.4.16-sparse/include/asm-xeno/statfs.h
new file mode 100644 (file)
index 0000000..113d5d4
--- /dev/null
@@ -0,0 +1,25 @@
+#ifndef _I386_STATFS_H
+#define _I386_STATFS_H
+
+#ifndef __KERNEL_STRICT_NAMES
+
+#include <linux/types.h>
+
+typedef __kernel_fsid_t        fsid_t;
+
+#endif
+
+struct statfs {
+       long f_type;
+       long f_bsize;
+       long f_blocks;
+       long f_bfree;
+       long f_bavail;
+       long f_files;
+       long f_ffree;
+       __kernel_fsid_t f_fsid;
+       long f_namelen;
+       long f_spare[6];
+};
+
+#endif
diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/string-486.h b/xenolinux-2.4.16-sparse/include/asm-xeno/string-486.h
new file mode 100644 (file)
index 0000000..51bfd05
--- /dev/null
@@ -0,0 +1,617 @@
+#ifndef _I386_STRING_I486_H_
+#define _I386_STRING_I486_H_
+
+/*
+ * This string-include defines all string functions as inline
+ * functions. Use gcc. It also assumes ds=es=data space, this should be
+ * normal. Most of the string-functions are rather heavily hand-optimized,
+ * see especially strtok,strstr,str[c]spn. They should work, but are not
+ * very easy to understand. Everything is done entirely within the register
+ * set, making the functions fast and clean. 
+ *
+ *             Copyright (C) 1991, 1992 Linus Torvalds
+ *             Revised and optimized for i486/pentium
+ *             1994/03/15 by Alberto Vignani/Davide Parodi @crf.it
+ *
+ *     Split into 2 CPU specific files by Alan Cox to keep #ifdef noise down.
+ *
+ *     1999/10/5       Proper register args for newer GCCs and minor bugs
+ *                     fixed - Petko Manolov (petkan@spct.net)
+ *     1999/10/14      3DNow memscpy() added - Petkan
+ *     2000/05/09      extern changed to static in function definitions
+ *                     and a few cleanups - Petkan
+ */
+
+#define __HAVE_ARCH_STRCPY
+static inline char * strcpy(char * dest,const char *src)
+{
+register char *tmp= (char *)dest;
+register char dummy;
+__asm__ __volatile__(
+       "\n1:\t"
+       "movb (%0),%2\n\t"
+       "incl %0\n\t"
+       "movb %2,(%1)\n\t"
+       "incl %1\n\t"
+       "testb %2,%2\n\t"
+       "jne 1b"
+       :"=r" (src), "=r" (tmp), "=q" (dummy)
+       :"0" (src), "1" (tmp)
+       :"memory");
+return dest;
+}
+
+#define __HAVE_ARCH_STRNCPY
+static inline char * strncpy(char * dest,const char *src,size_t count)
+{
+register char *tmp= (char *)dest;
+register char dummy;
+if (count) {
+__asm__ __volatile__(
+       "\n1:\t"
+       "movb (%0),%2\n\t"
+       "incl %0\n\t"
+       "movb %2,(%1)\n\t"
+       "incl %1\n\t"
+       "decl %3\n\t"
+       "je 3f\n\t"
+       "testb %2,%2\n\t"
+       "jne 1b\n\t"
+       "2:\tmovb %2,(%1)\n\t"
+       "incl %1\n\t"
+       "decl %3\n\t"
+       "jne 2b\n\t"
+       "3:"
+       :"=r" (src), "=r" (tmp), "=q" (dummy), "=r" (count)
+       :"0" (src), "1" (tmp), "3" (count)
+       :"memory");
+    } /* if (count) */
+return dest;
+}
+
+#define __HAVE_ARCH_STRCAT
+static inline char * strcat(char * dest,const char * src)
+{
+register char *tmp = (char *)(dest-1);
+register char dummy;
+__asm__ __volatile__(
+       "\n1:\tincl %1\n\t"
+       "cmpb $0,(%1)\n\t"
+       "jne 1b\n"
+       "2:\tmovb (%2),%b0\n\t"
+       "incl %2\n\t"
+       "movb %b0,(%1)\n\t"
+       "incl %1\n\t"
+       "testb %b0,%b0\n\t"
+       "jne 2b\n"
+       :"=q" (dummy), "=r" (tmp), "=r" (src)
+       :"1"  (tmp), "2"  (src)
+       :"memory");
+return dest;
+}
+
+#define __HAVE_ARCH_STRNCAT
+static inline char * strncat(char * dest,const char * src,size_t count)
+{
+register char *tmp = (char *)(dest-1);
+register char dummy;
+__asm__ __volatile__(
+       "\n1:\tincl %1\n\t"
+       "cmpb $0,(%1)\n\t"
+       "jne 1b\n"
+       "2:\tdecl %3\n\t"
+       "js 3f\n\t"
+       "movb (%2),%b0\n\t"
+       "incl %2\n\t"
+       "movb %b0,(%1)\n\t"
+       "incl %1\n\t"
+       "testb %b0,%b0\n\t"
+       "jne 2b\n"
+       "3:\txorb %0,%0\n\t"
+       "movb %b0,(%1)\n\t"
+       :"=q" (dummy), "=r" (tmp), "=r" (src), "=r" (count)
+       :"1"  (tmp), "2"  (src), "3"  (count)
+       :"memory");
+return dest;
+}
+
+#define __HAVE_ARCH_STRCMP
+static inline int strcmp(const char * cs,const char * ct)
+{
+register int __res;
+__asm__ __volatile__(
+       "\n1:\tmovb (%1),%b0\n\t"
+       "incl %1\n\t"
+       "cmpb %b0,(%2)\n\t"
+       "jne 2f\n\t"
+       "incl %2\n\t"
+       "testb %b0,%b0\n\t"
+       "jne 1b\n\t"
+       "xorl %0,%0\n\t"
+       "jmp 3f\n"
+       "2:\tmovl $1,%0\n\t"
+       "jb 3f\n\t"
+       "negl %0\n"
+       "3:"
+       :"=q" (__res), "=r" (cs), "=r" (ct)
+       :"1" (cs), "2" (ct)
+       : "memory" );
+return __res;
+}
+
+#define __HAVE_ARCH_STRNCMP
+static inline int strncmp(const char * cs,const char * ct,size_t count)
+{
+register int __res;
+__asm__ __volatile__(
+       "\n1:\tdecl %3\n\t"
+       "js 2f\n\t"
+       "movb (%1),%b0\n\t"
+       "incl %1\n\t"
+       "cmpb %b0,(%2)\n\t"
+       "jne 3f\n\t"
+       "incl %2\n\t"
+       "testb %b0,%b0\n\t"
+       "jne 1b\n"
+       "2:\txorl %0,%0\n\t"
+       "jmp 4f\n"
+       "3:\tmovl $1,%0\n\t"
+       "jb 4f\n\t"
+       "negl %0\n"
+       "4:"
+       :"=q" (__res), "=r" (cs), "=r" (ct), "=r" (count)
+       :"1"  (cs), "2"  (ct),  "3" (count));
+return __res;
+}
+
+#define __HAVE_ARCH_STRCHR
+static inline char * strchr(const char * s, int c)
+{
+register char * __res;
+__asm__ __volatile__(
+       "movb %%al,%%ah\n"
+       "1:\tmovb (%1),%%al\n\t"
+       "cmpb %%ah,%%al\n\t"
+       "je 2f\n\t"
+       "incl %1\n\t"
+       "testb %%al,%%al\n\t"
+       "jne 1b\n\t"
+       "xorl %1,%1\n"
+       "2:\tmovl %1,%0\n\t"
+       :"=a" (__res), "=r" (s)
+       :"0" (c),      "1"  (s));
+return __res;
+}
+
+#define __HAVE_ARCH_STRRCHR
+static inline char * strrchr(const char * s, int c)
+{
+int    d0, d1;
+register char * __res;
+__asm__ __volatile__(
+       "movb %%al,%%ah\n"
+       "1:\tlodsb\n\t"
+       "cmpb %%ah,%%al\n\t"
+       "jne 2f\n\t"
+       "leal -1(%%esi),%0\n"
+       "2:\ttestb %%al,%%al\n\t"
+       "jne 1b"
+       :"=d" (__res), "=&S" (d0), "=&a" (d1)
+       :"0" (0), "1" (s), "2" (c));
+return __res;
+}
+
+
+#define __HAVE_ARCH_STRCSPN
+static inline size_t strcspn(const char * cs, const char * ct)
+{
+int    d0, d1;
+register char * __res;
+__asm__ __volatile__(
+       "movl %6,%%edi\n\t"
+       "repne\n\t"
+       "scasb\n\t"
+       "notl %%ecx\n\t"
+       "decl %%ecx\n\t"
+       "movl %%ecx,%%edx\n"
+       "1:\tlodsb\n\t"
+       "testb %%al,%%al\n\t"
+       "je 2f\n\t"
+       "movl %6,%%edi\n\t"
+       "movl %%edx,%%ecx\n\t"
+       "repne\n\t"
+       "scasb\n\t"
+       "jne 1b\n"
+       "2:\tdecl %0"
+       :"=S" (__res), "=&a" (d0), "=&c" (d1)
+       :"0" (cs), "1" (0), "2" (0xffffffff), "g" (ct)
+       :"dx", "di");
+return __res-cs;
+}
+
+
+#define __HAVE_ARCH_STRLEN
+static inline size_t strlen(const char * s)
+{
+/*
+ * slightly slower on a 486, but with better chances of
+ * register allocation
+ */
+register char dummy, *tmp= (char *)s;
+__asm__ __volatile__(
+       "\n1:\t"
+       "movb\t(%0),%1\n\t"
+       "incl\t%0\n\t"
+       "testb\t%1,%1\n\t"
+       "jne\t1b"
+       :"=r" (tmp),"=q" (dummy)
+       :"0" (s)
+       : "memory" );
+return (tmp-s-1);
+}
+
+/* Added by Gertjan van Wingerde to make minix and sysv module work */
+#define __HAVE_ARCH_STRNLEN
+static inline size_t strnlen(const char * s, size_t count)
+{
+int    d0;
+register int __res;
+__asm__ __volatile__(
+       "movl %3,%0\n\t"
+       "jmp 2f\n"
+       "1:\tcmpb $0,(%0)\n\t"
+       "je 3f\n\t"
+       "incl %0\n"
+       "2:\tdecl %2\n\t"
+       "cmpl $-1,%2\n\t"
+       "jne 1b\n"
+       "3:\tsubl %3,%0"
+       :"=a" (__res), "=&d" (d0)
+       :"1" (count), "c" (s));
+return __res;
+}
+/* end of additional stuff */
+
+
+/*
+ *     These ought to get tweaked to do some cache priming.
+ */
+static inline void * __memcpy_by4(void * to, const void * from, size_t n)
+{
+register void *tmp = (void *)to;
+register int dummy1,dummy2;
+__asm__ __volatile__ (
+       "\n1:\tmovl (%2),%0\n\t"
+       "addl $4,%2\n\t"
+       "movl %0,(%1)\n\t"
+       "addl $4,%1\n\t"
+       "decl %3\n\t"
+       "jnz 1b"
+       :"=r" (dummy1), "=r" (tmp), "=r" (from), "=r" (dummy2) 
+       :"1" (tmp), "2" (from), "3" (n/4)
+       :"memory");
+return (to);
+}
+
+static inline void * __memcpy_by2(void * to, const void * from, size_t n)
+{
+register void *tmp = (void *)to;
+register int dummy1,dummy2;
+__asm__ __volatile__ (
+       "shrl $1,%3\n\t"
+       "jz 2f\n"                 /* only a word */
+       "1:\tmovl (%2),%0\n\t"
+       "addl $4,%2\n\t"
+       "movl %0,(%1)\n\t"
+       "addl $4,%1\n\t"
+       "decl %3\n\t"
+       "jnz 1b\n"
+       "2:\tmovw (%2),%w0\n\t"
+       "movw %w0,(%1)"
+       :"=r" (dummy1), "=r" (tmp), "=r" (from), "=r" (dummy2) 
+       :"1" (tmp), "2" (from), "3" (n/2)
+       :"memory");
+return (to);
+}
+
+static inline void * __memcpy_g(void * to, const void * from, size_t n)
+{
+int    d0, d1, d2;
+register void *tmp = (void *)to;
+__asm__ __volatile__ (
+       "shrl $1,%%ecx\n\t"
+       "jnc 1f\n\t"
+       "movsb\n"
+       "1:\tshrl $1,%%ecx\n\t"
+       "jnc 2f\n\t"
+       "movsw\n"
+       "2:\trep\n\t"
+       "movsl"
+       :"=&c" (d0), "=&D" (d1), "=&S" (d2)
+       :"0" (n), "1" ((long) tmp), "2" ((long) from)
+       :"memory");
+return (to);
+}
+
+#define __memcpy_c(d,s,count) \
+((count%4==0) ? \
+ __memcpy_by4((d),(s),(count)) : \
+ ((count%2==0) ? \
+  __memcpy_by2((d),(s),(count)) : \
+  __memcpy_g((d),(s),(count))))
+  
+#define __memcpy(d,s,count) \
+(__builtin_constant_p(count) ? \
+ __memcpy_c((d),(s),(count)) : \
+ __memcpy_g((d),(s),(count)))
+#define __HAVE_ARCH_MEMCPY
+
+#include <linux/config.h>
+
+#ifdef CONFIG_X86_USE_3DNOW
+
+#include <asm/mmx.h>
+
+/*
+**      This CPU favours 3DNow strongly (eg AMD K6-II, K6-III, Athlon)
+*/
+
+static inline void * __constant_memcpy3d(void * to, const void * from, size_t len)
+{
+       if (len < 512)
+               return __memcpy_c(to, from, len);
+       return _mmx_memcpy(to, from, len);
+}
+
+static inline void *__memcpy3d(void *to, const void *from, size_t len)
+{
+       if(len < 512)
+               return __memcpy_g(to, from, len);
+       return _mmx_memcpy(to, from, len);
+}
+
+#define memcpy(d, s, count) \
+(__builtin_constant_p(count) ? \
+ __constant_memcpy3d((d),(s),(count)) : \
+ __memcpy3d((d),(s),(count)))
+#else /* CONFIG_X86_USE_3DNOW */
+
+/*
+**     Generic routines
+*/
+
+
+#define memcpy(d, s, count) __memcpy(d, s, count)
+
+#endif /* CONFIG_X86_USE_3DNOW */ 
+
+
+extern void __struct_cpy_bug( void );
+
+#define struct_cpy(x,y)                                \
+({                                             \
+       if (sizeof(*(x)) != sizeof(*(y)))       \
+               __struct_cpy_bug;               \
+       memcpy(x, y, sizeof(*(x)));             \
+})
+
+
+#define __HAVE_ARCH_MEMMOVE
+static inline void * memmove(void * dest,const void * src, size_t n)
+{
+int    d0, d1, d2;
+register void *tmp = (void *)dest;
+if (dest<src)
+__asm__ __volatile__ (
+       "rep\n\t"
+       "movsb"
+       :"=&c" (d0), "=&S" (d1), "=&D" (d2)
+       :"0" (n), "1" (src), "2" (tmp)
+       :"memory");
+else
+__asm__ __volatile__ (
+       "std\n\t"
+       "rep\n\t"
+       "movsb\n\t"
+       "cld"
+       :"=&c" (d0), "=&S" (d1), "=&D" (d2)
+       :"0" (n), "1" (n-1+(const char *)src), "2" (n-1+(char *)tmp)
+       :"memory");
+return dest;
+}
+
+
+#define        __HAVE_ARCH_MEMCMP
+static inline int memcmp(const void * cs,const void * ct,size_t count)
+{
+int    d0, d1, d2;
+register int __res;
+__asm__ __volatile__(
+       "repe\n\t"
+       "cmpsb\n\t"
+       "je 1f\n\t"
+       "sbbl %0,%0\n\t"
+       "orb $1,%b0\n"
+       "1:"
+       :"=a" (__res), "=&S" (d0), "=&D" (d1), "=&c" (d2)
+       :"0" (0), "1" (cs), "2" (ct), "3" (count));
+return __res;
+}
+
+
+#define __HAVE_ARCH_MEMCHR
+static inline void * memchr(const void * cs,int c,size_t count)
+{
+int    d0;
+register void * __res;
+if (!count)
+       return NULL;
+__asm__ __volatile__(
+       "repne\n\t"
+       "scasb\n\t"
+       "je 1f\n\t"
+       "movl $1,%0\n"
+       "1:\tdecl %0"
+       :"=D" (__res), "=&c" (d0)
+       :"a" (c), "0" (cs), "1" (count));
+return __res;
+}
+
+#define __memset_cc(s,c,count) \
+((count%4==0) ? \
+ __memset_cc_by4((s),(c),(count)) : \
+ ((count%2==0) ? \
+  __memset_cc_by2((s),(c),(count)) : \
+  __memset_cg((s),(c),(count))))
+
+#define __memset_gc(s,c,count) \
+((count%4==0) ? \
+ __memset_gc_by4((s),(c),(count)) : \
+ ((count%2==0) ? \
+  __memset_gc_by2((s),(c),(count)) : \
+  __memset_gg((s),(c),(count))))
+
+#define __HAVE_ARCH_MEMSET
+#define memset(s,c,count) \
+(__builtin_constant_p(c) ? \
+ (__builtin_constant_p(count) ? \
+  __memset_cc((s),(c),(count)) : \
+  __memset_cg((s),(c),(count))) : \
+ (__builtin_constant_p(count) ? \
+  __memset_gc((s),(c),(count)) : \
+  __memset_gg((s),(c),(count))))
+
+static inline void * __memset_cc_by4(void * s, char c, size_t count)
+{
+/*
+ * register char *tmp = s;
+ */
+register char *tmp = (char *)s;
+register int  dummy;
+__asm__ __volatile__ (
+       "\n1:\tmovl %2,(%0)\n\t"
+       "addl $4,%0\n\t"
+       "decl %1\n\t"
+       "jnz 1b"
+       :"=r" (tmp), "=r" (dummy)
+       :"q" (0x01010101UL * (unsigned char) c), "0" (tmp), "1" (count/4)
+       :"memory");
+return s;
+}
+
+static inline void * __memset_cc_by2(void * s, char c, size_t count)
+{
+register void *tmp = (void *)s;
+register int  dummy;
+__asm__ __volatile__ (
+       "shrl $1,%1\n\t"          /* may be divisible also by 4 */
+       "jz 2f\n"
+       "\n1:\tmovl %2,(%0)\n\t"
+       "addl $4,%0\n\t"
+       "decl %1\n\t"
+       "jnz 1b\n"
+       "2:\tmovw %w2,(%0)"
+       :"=r" (tmp), "=r" (dummy)
+       :"q" (0x01010101UL * (unsigned char) c), "0" (tmp), "1" (count/2)
+       :"memory");
+return s;
+}
+
+static inline void * __memset_gc_by4(void * s, char c, size_t count)
+{
+register void *tmp = (void *)s;
+register int dummy;
+__asm__ __volatile__ (
+       "movb %b0,%h0\n"
+       "pushw %w0\n\t"
+       "shll $16,%0\n\t"
+       "popw %w0\n"
+       "1:\tmovl %0,(%1)\n\t"
+       "addl $4,%1\n\t"
+       "decl %2\n\t"
+       "jnz 1b\n"
+       :"=q" (c), "=r" (tmp), "=r" (dummy)
+       :"0" ((unsigned) c),  "1"  (tmp), "2" (count/4)
+       :"memory");
+return s;
+}
+
+static inline void * __memset_gc_by2(void * s, char c, size_t count)
+{
+register void *tmp = (void *)s;
+register int dummy1,dummy2;
+__asm__ __volatile__ (
+       "movb %b0,%h0\n\t"
+       "shrl $1,%2\n\t"          /* may be divisible also by 4 */
+       "jz 2f\n\t"
+       "pushw %w0\n\t"
+       "shll $16,%0\n\t"
+       "popw %w0\n"
+       "1:\tmovl %0,(%1)\n\t"
+       "addl $4,%1\n\t"
+       "decl %2\n\t"
+       "jnz 1b\n"
+       "2:\tmovw %w0,(%1)"
+       :"=q" (dummy1), "=r" (tmp), "=r" (dummy2)
+       :"0" ((unsigned) c),  "1"  (tmp), "2" (count/2)
+       :"memory");
+return s;
+}
+
+static inline void * __memset_cg(void * s, char c, size_t count)
+{
+int    d0, d1;
+register void *tmp = (void *)s;
+__asm__ __volatile__ (
+       "shrl $1,%%ecx\n\t"
+       "rep\n\t"
+       "stosw\n\t"
+       "jnc 1f\n\t"
+       "movb %%al,(%%edi)\n"
+       "1:"
+       :"=&c" (d0), "=&D" (d1) 
+       :"a" (0x0101U * (unsigned char) c), "0" (count), "1" (tmp)
+       :"memory");
+return s;
+}
+
+static inline void * __memset_gg(void * s,char c,size_t count)
+{
+int    d0, d1, d2;
+register void *tmp = (void *)s;
+__asm__ __volatile__ (
+       "movb %%al,%%ah\n\t"
+       "shrl $1,%%ecx\n\t"
+       "rep\n\t"
+       "stosw\n\t"
+       "jnc 1f\n\t"
+       "movb %%al,(%%edi)\n"
+       "1:"
+       :"=&c" (d0), "=&D" (d1), "=&D" (d2)
+       :"0" (count), "1" (tmp), "2" (c)
+       :"memory");
+return s;
+}
+
+
+/*
+ * find the first occurrence of byte 'c', or 1 past the area if none
+ */
+#define __HAVE_ARCH_MEMSCAN
+static inline void * memscan(void * addr, int c, size_t size)
+{
+       if (!size)
+               return addr;
+       __asm__("repnz; scasb
+               jnz 1f
+               dec %%edi
+1:             "
+               : "=D" (addr), "=c" (size)
+               : "0" (addr), "1" (size), "a" (c));
+       return addr;
+}
+
+#endif
diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/string.h b/xenolinux-2.4.16-sparse/include/asm-xeno/string.h
new file mode 100644 (file)
index 0000000..d4d7899
--- /dev/null
@@ -0,0 +1,531 @@
+#ifndef _I386_STRING_H_
+#define _I386_STRING_H_
+
+#ifdef __KERNEL__
+#include <linux/config.h>
+/*
+ * On a 486 or Pentium, we are better off not using the
+ * byte string operations. But on a 386 or a PPro the
+ * byte string ops are faster than doing it by hand
+ * (MUCH faster on a Pentium).
+ *
+ * Also, the byte strings actually work correctly. Forget
+ * the i486 routines for now as they may be broken..
+ */
+#if FIXED_486_STRING && defined(CONFIG_X86_USE_STRING_486)
+#include <asm/string-486.h>
+#else
+
+/*
+ * This string-include defines all string functions as inline
+ * functions. Use gcc. It also assumes ds=es=data space, this should be
+ * normal. Most of the string-functions are rather heavily hand-optimized,
+ * see especially strtok,strstr,str[c]spn. They should work, but are not
+ * very easy to understand. Everything is done entirely within the register
+ * set, making the functions fast and clean. String instructions have been
+ * used through-out, making for "slightly" unclear code :-)
+ *
+ *             NO Copyright (C) 1991, 1992 Linus Torvalds,
+ *             consider these trivial functions to be PD.
+ */
+
+#define __HAVE_ARCH_STRCPY
+static inline char * strcpy(char * dest,const char *src)
+{
+int d0, d1, d2;
+__asm__ __volatile__(
+       "1:\tlodsb\n\t"
+       "stosb\n\t"
+       "testb %%al,%%al\n\t"
+       "jne 1b"
+       : "=&S" (d0), "=&D" (d1), "=&a" (d2)
+       :"0" (src),"1" (dest) : "memory");
+return dest;
+}
+
+#define __HAVE_ARCH_STRNCPY
+static inline char * strncpy(char * dest,const char *src,size_t count)
+{
+int d0, d1, d2, d3;
+__asm__ __volatile__(
+       "1:\tdecl %2\n\t"
+       "js 2f\n\t"
+       "lodsb\n\t"
+       "stosb\n\t"
+       "testb %%al,%%al\n\t"
+       "jne 1b\n\t"
+       "rep\n\t"
+       "stosb\n"
+       "2:"
+       : "=&S" (d0), "=&D" (d1), "=&c" (d2), "=&a" (d3)
+       :"0" (src),"1" (dest),"2" (count) : "memory");
+return dest;
+}
+
+#define __HAVE_ARCH_STRCAT
+static inline char * strcat(char * dest,const char * src)
+{
+int d0, d1, d2, d3;
+__asm__ __volatile__(
+       "repne\n\t"
+       "scasb\n\t"
+       "decl %1\n"
+       "1:\tlodsb\n\t"
+       "stosb\n\t"
+       "testb %%al,%%al\n\t"
+       "jne 1b"
+       : "=&S" (d0), "=&D" (d1), "=&a" (d2), "=&c" (d3)
+       : "0" (src), "1" (dest), "2" (0), "3" (0xffffffff):"memory");
+return dest;
+}
+
+#define __HAVE_ARCH_STRNCAT
+static inline char * strncat(char * dest,const char * src,size_t count)
+{
+int d0, d1, d2, d3;
+__asm__ __volatile__(
+       "repne\n\t"
+       "scasb\n\t"
+       "decl %1\n\t"
+       "movl %8,%3\n"
+       "1:\tdecl %3\n\t"
+       "js 2f\n\t"
+       "lodsb\n\t"
+       "stosb\n\t"
+       "testb %%al,%%al\n\t"
+       "jne 1b\n"
+       "2:\txorl %2,%2\n\t"
+       "stosb"
+       : "=&S" (d0), "=&D" (d1), "=&a" (d2), "=&c" (d3)
+       : "0" (src),"1" (dest),"2" (0),"3" (0xffffffff), "g" (count)
+       : "memory");
+return dest;
+}
+
+#define __HAVE_ARCH_STRCMP
+static inline int strcmp(const char * cs,const char * ct)
+{
+int d0, d1;
+register int __res;
+__asm__ __volatile__(
+       "1:\tlodsb\n\t"
+       "scasb\n\t"
+       "jne 2f\n\t"
+       "testb %%al,%%al\n\t"
+       "jne 1b\n\t"
+       "xorl %%eax,%%eax\n\t"
+       "jmp 3f\n"
+       "2:\tsbbl %%eax,%%eax\n\t"
+       "orb $1,%%al\n"
+       "3:"
+       :"=a" (__res), "=&S" (d0), "=&D" (d1)
+                    :"1" (cs),"2" (ct));
+return __res;
+}
+
+#define __HAVE_ARCH_STRNCMP
+static inline int strncmp(const char * cs,const char * ct,size_t count)
+{
+register int __res;
+int d0, d1, d2;
+__asm__ __volatile__(
+       "1:\tdecl %3\n\t"
+       "js 2f\n\t"
+       "lodsb\n\t"
+       "scasb\n\t"
+       "jne 3f\n\t"
+       "testb %%al,%%al\n\t"
+       "jne 1b\n"
+       "2:\txorl %%eax,%%eax\n\t"
+       "jmp 4f\n"
+       "3:\tsbbl %%eax,%%eax\n\t"
+       "orb $1,%%al\n"
+       "4:"
+                    :"=a" (__res), "=&S" (d0), "=&D" (d1), "=&c" (d2)
+                    :"1" (cs),"2" (ct),"3" (count));
+return __res;
+}
+
+#define __HAVE_ARCH_STRCHR
+static inline char * strchr(const char * s, int c)
+{
+int d0;
+register char * __res;
+__asm__ __volatile__(
+       "movb %%al,%%ah\n"
+       "1:\tlodsb\n\t"
+       "cmpb %%ah,%%al\n\t"
+       "je 2f\n\t"
+       "testb %%al,%%al\n\t"
+       "jne 1b\n\t"
+       "movl $1,%1\n"
+       "2:\tmovl %1,%0\n\t"
+       "decl %0"
+       :"=a" (__res), "=&S" (d0) : "1" (s),"0" (c));
+return __res;
+}
+
+#define __HAVE_ARCH_STRRCHR
+static inline char * strrchr(const char * s, int c)
+{
+int d0, d1;
+register char * __res;
+__asm__ __volatile__(
+       "movb %%al,%%ah\n"
+       "1:\tlodsb\n\t"
+       "cmpb %%ah,%%al\n\t"
+       "jne 2f\n\t"
+       "leal -1(%%esi),%0\n"
+       "2:\ttestb %%al,%%al\n\t"
+       "jne 1b"
+       :"=g" (__res), "=&S" (d0), "=&a" (d1) :"0" (0),"1" (s),"2" (c));
+return __res;
+}
+
+#define __HAVE_ARCH_STRLEN
+static inline size_t strlen(const char * s)
+{
+int d0;
+register int __res;
+__asm__ __volatile__(
+       "repne\n\t"
+       "scasb\n\t"
+       "notl %0\n\t"
+       "decl %0"
+       :"=c" (__res), "=&D" (d0) :"1" (s),"a" (0), "0" (0xffffffff));
+return __res;
+}
+
+static inline void * __memcpy(void * to, const void * from, size_t n)
+{
+int d0, d1, d2;
+__asm__ __volatile__(
+       "rep ; movsl\n\t"
+       "testb $2,%b4\n\t"
+       "je 1f\n\t"
+       "movsw\n"
+       "1:\ttestb $1,%b4\n\t"
+       "je 2f\n\t"
+       "movsb\n"
+       "2:"
+       : "=&c" (d0), "=&D" (d1), "=&S" (d2)
+       :"0" (n/4), "q" (n),"1" ((long) to),"2" ((long) from)
+       : "memory");
+return (to);
+}
+
+/*
+ * This looks horribly ugly, but the compiler can optimize it totally,
+ * as the count is constant.
+ */
+static inline void * __constant_memcpy(void * to, const void * from, size_t n)
+{
+       switch (n) {
+               case 0:
+                       return to;
+               case 1:
+                       *(unsigned char *)to = *(const unsigned char *)from;
+                       return to;
+               case 2:
+                       *(unsigned short *)to = *(const unsigned short *)from;
+                       return to;
+               case 3:
+                       *(unsigned short *)to = *(const unsigned short *)from;
+                       *(2+(unsigned char *)to) = *(2+(const unsigned char *)from);
+                       return to;
+               case 4:
+                       *(unsigned long *)to = *(const unsigned long *)from;
+                       return to;
+               case 6: /* for Ethernet addresses */
+                       *(unsigned long *)to = *(const unsigned long *)from;
+                       *(2+(unsigned short *)to) = *(2+(const unsigned short *)from);
+                       return to;
+               case 8:
+                       *(unsigned long *)to = *(const unsigned long *)from;
+                       *(1+(unsigned long *)to) = *(1+(const unsigned long *)from);
+                       return to;
+               case 12:
+                       *(unsigned long *)to = *(const unsigned long *)from;
+                       *(1+(unsigned long *)to) = *(1+(const unsigned long *)from);
+                       *(2+(unsigned long *)to) = *(2+(const unsigned long *)from);
+                       return to;
+               case 16:
+                       *(unsigned long *)to = *(const unsigned long *)from;
+                       *(1+(unsigned long *)to) = *(1+(const unsigned long *)from);
+                       *(2+(unsigned long *)to) = *(2+(const unsigned long *)from);
+                       *(3+(unsigned long *)to) = *(3+(const unsigned long *)from);
+                       return to;
+               case 20:
+                       *(unsigned long *)to = *(const unsigned long *)from;
+                       *(1+(unsigned long *)to) = *(1+(const unsigned long *)from);
+                       *(2+(unsigned long *)to) = *(2+(const unsigned long *)from);
+                       *(3+(unsigned long *)to) = *(3+(const unsigned long *)from);
+                       *(4+(unsigned long *)to) = *(4+(const unsigned long *)from);
+                       return to;
+       }
+#define COMMON(x) \
+__asm__ __volatile__( \
+       "rep ; movsl" \
+       x \
+       : "=&c" (d0), "=&D" (d1), "=&S" (d2) \
+       : "0" (n/4),"1" ((long) to),"2" ((long) from) \
+       : "memory");
+{
+       int d0, d1, d2;
+       switch (n % 4) {
+               case 0: COMMON(""); return to;
+               case 1: COMMON("\n\tmovsb"); return to;
+               case 2: COMMON("\n\tmovsw"); return to;
+               default: COMMON("\n\tmovsw\n\tmovsb"); return to;
+       }
+}
+  
+#undef COMMON
+}
+
+#define __HAVE_ARCH_MEMCPY
+
+#ifdef CONFIG_X86_USE_3DNOW
+
+#include <asm/mmx.h>
+
+/*
+ *     This CPU favours 3DNow strongly (eg AMD Athlon)
+ */
+
+static inline void * __constant_memcpy3d(void * to, const void * from, size_t len)
+{
+       if (len < 512)
+               return __constant_memcpy(to, from, len);
+       return _mmx_memcpy(to, from, len);
+}
+
+static __inline__ void *__memcpy3d(void *to, const void *from, size_t len)
+{
+       if (len < 512)
+               return __memcpy(to, from, len);
+       return _mmx_memcpy(to, from, len);
+}
+
+#define memcpy(t, f, n) \
+(__builtin_constant_p(n) ? \
+ __constant_memcpy3d((t),(f),(n)) : \
+ __memcpy3d((t),(f),(n)))
+
+#else
+
+/*
+ *     No 3D Now!
+ */
+#define memcpy(t, f, n) \
+(__builtin_constant_p(n) ? \
+ __constant_memcpy((t),(f),(n)) : \
+ __memcpy((t),(f),(n)))
+
+#endif
+
+/*
+ * struct_cpy(x,y), copy structure *x into (matching structure) *y.
+ *
+ * We get link-time errors if the structure sizes do not match.
+ * There is no runtime overhead, it's all optimized away at
+ * compile time.
+ */
+extern void __struct_cpy_bug (void);
+
+#define struct_cpy(x,y)                        \
+({                                             \
+       if (sizeof(*(x)) != sizeof(*(y)))       \
+               __struct_cpy_bug;               \
+       memcpy(x, y, sizeof(*(x)));             \
+})
+
+#define __HAVE_ARCH_MEMMOVE
+static inline void * memmove(void * dest,const void * src, size_t n)
+{
+int d0, d1, d2;
+if (dest<src)
+__asm__ __volatile__(
+       "rep\n\t"
+       "movsb"
+       : "=&c" (d0), "=&S" (d1), "=&D" (d2)
+       :"0" (n),"1" (src),"2" (dest)
+       : "memory");
+else
+__asm__ __volatile__(
+       "std\n\t"
+       "rep\n\t"
+       "movsb\n\t"
+       "cld"
+       : "=&c" (d0), "=&S" (d1), "=&D" (d2)
+       :"0" (n),
+        "1" (n-1+(const char *)src),
+        "2" (n-1+(char *)dest)
+       :"memory");
+return dest;
+}
+
+#define memcmp __builtin_memcmp
+
+#define __HAVE_ARCH_MEMCHR
+static inline void * memchr(const void * cs,int c,size_t count)
+{
+int d0;
+register void * __res;
+if (!count)
+       return NULL;
+__asm__ __volatile__(
+       "repne\n\t"
+       "scasb\n\t"
+       "je 1f\n\t"
+       "movl $1,%0\n"
+       "1:\tdecl %0"
+       :"=D" (__res), "=&c" (d0) : "a" (c),"0" (cs),"1" (count));
+return __res;
+}
+
+static inline void * __memset_generic(void * s, char c,size_t count)
+{
+int d0, d1;
+__asm__ __volatile__(
+       "rep\n\t"
+       "stosb"
+       : "=&c" (d0), "=&D" (d1)
+       :"a" (c),"1" (s),"0" (count)
+       :"memory");
+return s;
+}
+
+/* we might want to write optimized versions of these later */
+#define __constant_count_memset(s,c,count) __memset_generic((s),(c),(count))
+
+/*
+ * memset(x,0,y) is a reasonably common thing to do, so we want to fill
+ * things 32 bits at a time even when we don't know the size of the
+ * area at compile-time..
+ */
+static inline void * __constant_c_memset(void * s, unsigned long c, size_t count)
+{
+int d0, d1;
+__asm__ __volatile__(
+       "rep ; stosl\n\t"
+       "testb $2,%b3\n\t"
+       "je 1f\n\t"
+       "stosw\n"
+       "1:\ttestb $1,%b3\n\t"
+       "je 2f\n\t"
+       "stosb\n"
+       "2:"
+       : "=&c" (d0), "=&D" (d1)
+       :"a" (c), "q" (count), "0" (count/4), "1" ((long) s)
+       :"memory");
+return (s);    
+}
+
+/* Added by Gertjan van Wingerde to make minix and sysv module work */
+#define __HAVE_ARCH_STRNLEN
+static inline size_t strnlen(const char * s, size_t count)
+{
+int d0;
+register int __res;
+__asm__ __volatile__(
+       "movl %2,%0\n\t"
+       "jmp 2f\n"
+       "1:\tcmpb $0,(%0)\n\t"
+       "je 3f\n\t"
+       "incl %0\n"
+       "2:\tdecl %1\n\t"
+       "cmpl $-1,%1\n\t"
+       "jne 1b\n"
+       "3:\tsubl %2,%0"
+       :"=a" (__res), "=&d" (d0)
+       :"c" (s),"1" (count));
+return __res;
+}
+/* end of additional stuff */
+
+#define __HAVE_ARCH_STRSTR
+
+extern char *strstr(const char *cs, const char *ct);
+
+/*
+ * This looks horribly ugly, but the compiler can optimize it totally,
+ * as we by now know that both pattern and count is constant..
+ */
+static inline void * __constant_c_and_count_memset(void * s, unsigned long pattern, size_t count)
+{
+       switch (count) {
+               case 0:
+                       return s;
+               case 1:
+                       *(unsigned char *)s = pattern;
+                       return s;
+               case 2:
+                       *(unsigned short *)s = pattern;
+                       return s;
+               case 3:
+                       *(unsigned short *)s = pattern;
+                       *(2+(unsigned char *)s) = pattern;
+                       return s;
+               case 4:
+                       *(unsigned long *)s = pattern;
+                       return s;
+       }
+#define COMMON(x) \
+__asm__  __volatile__( \
+       "rep ; stosl" \
+       x \
+       : "=&c" (d0), "=&D" (d1) \
+       : "a" (pattern),"0" (count/4),"1" ((long) s) \
+       : "memory")
+{
+       int d0, d1;
+       switch (count % 4) {
+               case 0: COMMON(""); return s;
+               case 1: COMMON("\n\tstosb"); return s;
+               case 2: COMMON("\n\tstosw"); return s;
+               default: COMMON("\n\tstosw\n\tstosb"); return s;
+       }
+}
+  
+#undef COMMON
+}
+
+#define __constant_c_x_memset(s, c, count) \
+(__builtin_constant_p(count) ? \
+ __constant_c_and_count_memset((s),(c),(count)) : \
+ __constant_c_memset((s),(c),(count)))
+
+#define __memset(s, c, count) \
+(__builtin_constant_p(count) ? \
+ __constant_count_memset((s),(c),(count)) : \
+ __memset_generic((s),(c),(count)))
+
+#define __HAVE_ARCH_MEMSET
+#define memset(s, c, count) \
+(__builtin_constant_p(c) ? \
+ __constant_c_x_memset((s),(0x01010101UL*(unsigned char)(c)),(count)) : \
+ __memset((s),(c),(count)))
+
+/*
+ * find the first occurrence of byte 'c', or 1 past the area if none
+ */
+#define __HAVE_ARCH_MEMSCAN
+static inline void * memscan(void * addr, int c, size_t size)
+{
+       if (!size)
+               return addr;
+       __asm__("repnz; scasb\n\t"
+               "jnz 1f\n\t"
+               "dec %%edi\n"
+               "1:"
+               : "=D" (addr), "=c" (size)
+               : "0" (addr), "1" (size), "a" (c));
+       return addr;
+}
+
+#endif /* CONFIG_X86_USE_STRING_486 */
+#endif /* __KERNEL__ */
+
+#endif
diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/system.h b/xenolinux-2.4.16-sparse/include/asm-xeno/system.h
new file mode 100644 (file)
index 0000000..6091708
--- /dev/null
@@ -0,0 +1,367 @@
+#ifndef __ASM_SYSTEM_H
+#define __ASM_SYSTEM_H
+
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <asm/segment.h>
+#include <asm/hypervisor.h>
+#include <linux/bitops.h> /* for LOCK_PREFIX */
+
+#ifdef __KERNEL__
+
+struct task_struct;    /* one of the stranger aspects of C forward declarations.. */
+extern void FASTCALL(__switch_to(struct task_struct *prev, struct task_struct *next));
+
+#define prepare_to_switch()    do { } while(0)
+#define switch_to(prev,next,last) do {                                 \
+       asm volatile("pushl %%esi\n\t"                                  \
+                    "pushl %%edi\n\t"                                  \
+                    "pushl %%ebp\n\t"                                  \
+                    "movl %%esp,%0\n\t"        /* save ESP */          \
+                    "movl %3,%%esp\n\t"        /* restore ESP */       \
+                    "movl $1f,%1\n\t"          /* save EIP */          \
+                    "pushl %4\n\t"             /* restore EIP */       \
+                    "jmp __switch_to\n"                                \
+                    "1:\t"                                             \
+                    "popl %%ebp\n\t"                                   \
+                    "popl %%edi\n\t"                                   \
+                    "popl %%esi\n\t"                                   \
+                    :"=m" (prev->thread.esp),"=m" (prev->thread.eip),  \
+                     "=b" (last)                                       \
+                    :"m" (next->thread.esp),"m" (next->thread.eip),    \
+                     "a" (prev), "d" (next),                           \
+                     "b" (prev));                                      \
+} while (0)
+
+#define _set_base(addr,base) do { unsigned long __pr; \
+__asm__ __volatile__ ("movw %%dx,%1\n\t" \
+       "rorl $16,%%edx\n\t" \
+       "movb %%dl,%2\n\t" \
+       "movb %%dh,%3" \
+       :"=&d" (__pr) \
+       :"m" (*((addr)+2)), \
+        "m" (*((addr)+4)), \
+        "m" (*((addr)+7)), \
+         "0" (base) \
+        ); } while(0)
+
+#define _set_limit(addr,limit) do { unsigned long __lr; \
+__asm__ __volatile__ ("movw %%dx,%1\n\t" \
+       "rorl $16,%%edx\n\t" \
+       "movb %2,%%dh\n\t" \
+       "andb $0xf0,%%dh\n\t" \
+       "orb %%dh,%%dl\n\t" \
+       "movb %%dl,%2" \
+       :"=&d" (__lr) \
+       :"m" (*(addr)), \
+        "m" (*((addr)+6)), \
+        "0" (limit) \
+        ); } while(0)
+
+#define set_base(ldt,base) _set_base( ((char *)&(ldt)) , (base) )
+#define set_limit(ldt,limit) _set_limit( ((char *)&(ldt)) , ((limit)-1)>>12 )
+
+static inline unsigned long _get_base(char * addr)
+{
+       unsigned long __base;
+       __asm__("movb %3,%%dh\n\t"
+               "movb %2,%%dl\n\t"
+               "shll $16,%%edx\n\t"
+               "movw %1,%%dx"
+               :"=&d" (__base)
+               :"m" (*((addr)+2)),
+                "m" (*((addr)+4)),
+                "m" (*((addr)+7)));
+       return __base;
+}
+
+#define get_base(ldt) _get_base( ((char *)&(ldt)) )
+
+/*
+ * Load a segment. Fall back on loading the zero
+ * segment if something goes wrong..
+ */
+#define loadsegment(seg,value)                 \
+       asm volatile("\n"                       \
+               "1:\t"                          \
+               "movl %0,%%" #seg "\n"          \
+               "2:\n"                          \
+               ".section .fixup,\"ax\"\n"      \
+               "3:\t"                          \
+               "pushl $0\n\t"                  \
+               "popl %%" #seg "\n\t"           \
+               "jmp 2b\n"                      \
+               ".previous\n"                   \
+               ".section __ex_table,\"a\"\n\t" \
+               ".align 4\n\t"                  \
+               ".long 1b,3b\n"                 \
+               ".previous"                     \
+               : :"m" (*(unsigned int *)&(value)))
+
+#define clts() ((void)0)
+#define read_cr0() ({ \
+       unsigned int __dummy; \
+       __asm__( \
+               "movl %%cr0,%0\n\t" \
+               :"=r" (__dummy)); \
+       __dummy; \
+})
+#define write_cr0(x) \
+       __asm__("movl %0,%%cr0": :"r" (x));
+
+#define read_cr4() ({ \
+       unsigned int __dummy; \
+       __asm__( \
+               "movl %%cr4,%0\n\t" \
+               :"=r" (__dummy)); \
+       __dummy; \
+})
+#define write_cr4(x) \
+       __asm__("movl %0,%%cr4": :"r" (x));
+#define stts() (HYPERVISOR_fpu_taskswitch())
+
+#endif /* __KERNEL__ */
+
+#define wbinvd() \
+       __asm__ __volatile__ ("wbinvd": : :"memory");
+
+static inline unsigned long get_limit(unsigned long segment)
+{
+       unsigned long __limit;
+       __asm__("lsll %1,%0"
+               :"=r" (__limit):"r" (segment));
+       return __limit+1;
+}
+
+#define nop() __asm__ __volatile__ ("nop")
+
+#define xchg(ptr,v) ((__typeof__(*(ptr)))__xchg((unsigned long)(v),(ptr),sizeof(*(ptr))))
+
+#define tas(ptr) (xchg((ptr),1))
+
+struct __xchg_dummy { unsigned long a[100]; };
+#define __xg(x) ((struct __xchg_dummy *)(x))
+
+
+/*
+ * The semantics of XCHGCMP8B are a bit strange, this is why
+ * there is a loop and the loading of %%eax and %%edx has to
+ * be inside. This inlines well in most cases, the cached
+ * cost is around ~38 cycles. (in the future we might want
+ * to do an SIMD/3DNOW!/MMX/FPU 64-bit store here, but that
+ * might have an implicit FPU-save as a cost, so it's not
+ * clear which path to go.)
+ */
+static inline void __set_64bit (unsigned long long * ptr,
+               unsigned int low, unsigned int high)
+{
+       __asm__ __volatile__ (
+               "\n1:\t"
+               "movl (%0), %%eax\n\t"
+               "movl 4(%0), %%edx\n\t"
+               "cmpxchg8b (%0)\n\t"
+               "jnz 1b"
+               : /* no outputs */
+               :       "D"(ptr),
+                       "b"(low),
+                       "c"(high)
+               :       "ax","dx","memory");
+}
+
+static inline void __set_64bit_constant (unsigned long long *ptr,
+                                                unsigned long long value)
+{
+       __set_64bit(ptr,(unsigned int)(value), (unsigned int)((value)>>32ULL));
+}
+#define ll_low(x)      *(((unsigned int*)&(x))+0)
+#define ll_high(x)     *(((unsigned int*)&(x))+1)
+
+static inline void __set_64bit_var (unsigned long long *ptr,
+                        unsigned long long value)
+{
+       __set_64bit(ptr,ll_low(value), ll_high(value));
+}
+
+#define set_64bit(ptr,value) \
+(__builtin_constant_p(value) ? \
+ __set_64bit_constant(ptr, value) : \
+ __set_64bit_var(ptr, value) )
+
+#define _set_64bit(ptr,value) \
+(__builtin_constant_p(value) ? \
+ __set_64bit(ptr, (unsigned int)(value), (unsigned int)((value)>>32ULL) ) : \
+ __set_64bit(ptr, ll_low(value), ll_high(value)) )
+
+/*
+ * Note: no "lock" prefix even on SMP: xchg always implies lock anyway
+ * Note 2: xchg has side effect, so that attribute volatile is necessary,
+ *       but generally the primitive is invalid, *ptr is output argument. --ANK
+ */
+static inline unsigned long __xchg(unsigned long x, volatile void * ptr, int size)
+{
+       switch (size) {
+               case 1:
+                       __asm__ __volatile__("xchgb %b0,%1"
+                               :"=q" (x)
+                               :"m" (*__xg(ptr)), "0" (x)
+                               :"memory");
+                       break;
+               case 2:
+                       __asm__ __volatile__("xchgw %w0,%1"
+                               :"=r" (x)
+                               :"m" (*__xg(ptr)), "0" (x)
+                               :"memory");
+                       break;
+               case 4:
+                       __asm__ __volatile__("xchgl %0,%1"
+                               :"=r" (x)
+                               :"m" (*__xg(ptr)), "0" (x)
+                               :"memory");
+                       break;
+       }
+       return x;
+}
+
+/*
+ * Atomic compare and exchange.  Compare OLD with MEM, if identical,
+ * store NEW in MEM.  Return the initial value in MEM.  Success is
+ * indicated by comparing RETURN with OLD.
+ */
+
+#ifdef CONFIG_X86_CMPXCHG
+#define __HAVE_ARCH_CMPXCHG 1
+
+static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
+                                     unsigned long new, int size)
+{
+       unsigned long prev;
+       switch (size) {
+       case 1:
+               __asm__ __volatile__(LOCK_PREFIX "cmpxchgb %b1,%2"
+                                    : "=a"(prev)
+                                    : "q"(new), "m"(*__xg(ptr)), "0"(old)
+                                    : "memory");
+               return prev;
+       case 2:
+               __asm__ __volatile__(LOCK_PREFIX "cmpxchgw %w1,%2"
+                                    : "=a"(prev)
+                                    : "q"(new), "m"(*__xg(ptr)), "0"(old)
+                                    : "memory");
+               return prev;
+       case 4:
+               __asm__ __volatile__(LOCK_PREFIX "cmpxchgl %1,%2"
+                                    : "=a"(prev)
+                                    : "q"(new), "m"(*__xg(ptr)), "0"(old)
+                                    : "memory");
+               return prev;
+       }
+       return old;
+}
+
+#define cmpxchg(ptr,o,n)\
+       ((__typeof__(*(ptr)))__cmpxchg((ptr),(unsigned long)(o),\
+                                       (unsigned long)(n),sizeof(*(ptr))))
+    
+#else
+/* Compiling for a 386 proper. Is it worth implementing via cli/sti?  */
+#endif
+
+/*
+ * Force strict CPU ordering.
+ * And yes, this is required on UP too when we're talking
+ * to devices.
+ *
+ * For now, "wmb()" doesn't actually do anything, as all
+ * Intel CPU's follow what Intel calls a *Processor Order*,
+ * in which all writes are seen in the program order even
+ * outside the CPU.
+ *
+ * I expect future Intel CPU's to have a weaker ordering,
+ * but I'd also expect them to finally get their act together
+ * and add some real memory barriers if so.
+ *
+ * Some non intel clones support out of order store. wmb() ceases to be a
+ * nop for these.
+ */
+#define mb()   __asm__ __volatile__ ("lock; addl $0,0(%%esp)": : :"memory")
+#define rmb()  mb()
+
+#ifdef CONFIG_X86_OOSTORE
+#define wmb()  __asm__ __volatile__ ("lock; addl $0,0(%%esp)": : :"memory")
+#else
+#define wmb()  __asm__ __volatile__ ("": : :"memory")
+#endif
+
+#ifdef CONFIG_SMP
+#define smp_mb()       mb()
+#define smp_rmb()      rmb()
+#define smp_wmb()      wmb()
+#else
+#define smp_mb()       barrier()
+#define smp_rmb()      barrier()
+#define smp_wmb()      barrier()
+#endif
+
+#define set_mb(var, value) do { xchg(&var, value); } while (0)
+#define set_wmb(var, value) do { var = value; wmb(); } while (0)
+
+#define __save_flags(x)                ((x) = HYPERVISOR_shared_info->events_enable); barrier()
+#define __restore_flags(x)                           \
+do {                                                  \
+    shared_info_t *_shared = HYPERVISOR_shared_info;  \
+    _shared->events_enable = (x);                     \
+    barrier();                                        \
+    if ( _shared->events && (x) ) do_hypervisor_callback(NULL);  \
+} while (0)
+#define __cli()                (HYPERVISOR_shared_info->events_enable = 0); barrier()
+#define __sti()                                      \
+do {                                                  \
+    shared_info_t *_shared = HYPERVISOR_shared_info;  \
+    _shared->events_enable = 1;                       \
+    barrier();                                        \
+    if ( _shared->events ) do_hypervisor_callback(NULL);  \
+} while (0)
+#define safe_halt()            ((void)0)
+
+/* For spinlocks etc */
+#define local_irq_save(x)      ((x) = HYPERVISOR_shared_info->events_enable); (HYPERVISOR_shared_info->events_enable = 0); barrier()
+#define local_irq_restore(x)   __restore_flags(x)
+#define local_irq_disable()    __cli()
+#define local_irq_enable()     __sti()
+
+#ifdef CONFIG_SMP
+
+extern void __global_cli(void);
+extern void __global_sti(void);
+extern unsigned long __global_save_flags(void);
+extern void __global_restore_flags(unsigned long);
+#define cli() __global_cli()
+#define sti() __global_sti()
+#define save_flags(x) ((x)=__global_save_flags())
+#define restore_flags(x) __global_restore_flags(x)
+
+#else
+
+#define cli() __cli()
+#define sti() __sti()
+#define save_flags(x) __save_flags(x)
+#define restore_flags(x) __restore_flags(x)
+
+#endif
+
+/*
+ * disable hlt during certain critical i/o operations
+ */
+#define HAVE_DISABLE_HLT
+void disable_hlt(void);
+void enable_hlt(void);
+
+extern unsigned long dmi_broken;
+extern int is_sony_vaio_laptop;
+
+#define BROKEN_ACPI_Sx         0x0001
+#define BROKEN_INIT_AFTER_S1   0x0002
+
+#endif
diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/termbits.h b/xenolinux-2.4.16-sparse/include/asm-xeno/termbits.h
new file mode 100644 (file)
index 0000000..5ccd7d8
--- /dev/null
@@ -0,0 +1,172 @@
+#ifndef __ARCH_I386_TERMBITS_H__
+#define __ARCH_I386_TERMBITS_H__
+
+#include <linux/posix_types.h>
+
+typedef unsigned char  cc_t;
+typedef unsigned int   speed_t;
+typedef unsigned int   tcflag_t;
+
+#define NCCS 19
+struct termios {
+       tcflag_t c_iflag;               /* input mode flags */
+       tcflag_t c_oflag;               /* output mode flags */
+       tcflag_t c_cflag;               /* control mode flags */
+       tcflag_t c_lflag;               /* local mode flags */
+       cc_t c_line;                    /* line discipline */
+       cc_t c_cc[NCCS];                /* control characters */
+};
+
+/* c_cc characters */
+#define VINTR 0
+#define VQUIT 1
+#define VERASE 2
+#define VKILL 3
+#define VEOF 4
+#define VTIME 5
+#define VMIN 6
+#define VSWTC 7
+#define VSTART 8
+#define VSTOP 9
+#define VSUSP 10
+#define VEOL 11
+#define VREPRINT 12
+#define VDISCARD 13
+#define VWERASE 14
+#define VLNEXT 15
+#define VEOL2 16
+
+/* c_iflag bits */
+#define IGNBRK 0000001
+#define BRKINT 0000002
+#define IGNPAR 0000004
+#define PARMRK 0000010
+#define INPCK  0000020
+#define ISTRIP 0000040
+#define INLCR  0000100
+#define IGNCR  0000200
+#define ICRNL  0000400
+#define IUCLC  0001000
+#define IXON   0002000
+#define IXANY  0004000
+#define IXOFF  0010000
+#define IMAXBEL        0020000
+
+/* c_oflag bits */
+#define OPOST  0000001
+#define OLCUC  0000002
+#define ONLCR  0000004
+#define OCRNL  0000010
+#define ONOCR  0000020
+#define ONLRET 0000040
+#define OFILL  0000100
+#define OFDEL  0000200
+#define NLDLY  0000400
+#define   NL0  0000000
+#define   NL1  0000400
+#define CRDLY  0003000
+#define   CR0  0000000
+#define   CR1  0001000
+#define   CR2  0002000
+#define   CR3  0003000
+#define TABDLY 0014000
+#define   TAB0 0000000
+#define   TAB1 0004000
+#define   TAB2 0010000
+#define   TAB3 0014000
+#define   XTABS        0014000
+#define BSDLY  0020000
+#define   BS0  0000000
+#define   BS1  0020000
+#define VTDLY  0040000
+#define   VT0  0000000
+#define   VT1  0040000
+#define FFDLY  0100000
+#define   FF0  0000000
+#define   FF1  0100000
+
+/* c_cflag bit meaning */
+#define CBAUD  0010017
+#define  B0    0000000         /* hang up */
+#define  B50   0000001
+#define  B75   0000002
+#define  B110  0000003
+#define  B134  0000004
+#define  B150  0000005
+#define  B200  0000006
+#define  B300  0000007
+#define  B600  0000010
+#define  B1200 0000011
+#define  B1800 0000012
+#define  B2400 0000013
+#define  B4800 0000014
+#define  B9600 0000015
+#define  B19200        0000016
+#define  B38400        0000017
+#define EXTA B19200
+#define EXTB B38400
+#define CSIZE  0000060
+#define   CS5  0000000
+#define   CS6  0000020
+#define   CS7  0000040
+#define   CS8  0000060
+#define CSTOPB 0000100
+#define CREAD  0000200
+#define PARENB 0000400
+#define PARODD 0001000
+#define HUPCL  0002000
+#define CLOCAL 0004000
+#define CBAUDEX 0010000
+#define    B57600 0010001
+#define   B115200 0010002
+#define   B230400 0010003
+#define   B460800 0010004
+#define   B500000 0010005
+#define   B576000 0010006
+#define   B921600 0010007
+#define  B1000000 0010010
+#define  B1152000 0010011
+#define  B1500000 0010012
+#define  B2000000 0010013
+#define  B2500000 0010014
+#define  B3000000 0010015
+#define  B3500000 0010016
+#define  B4000000 0010017
+#define CIBAUD   002003600000  /* input baud rate (not used) */
+#define CMSPAR   010000000000          /* mark or space (stick) parity */
+#define CRTSCTS          020000000000          /* flow control */
+
+/* c_lflag bits */
+#define ISIG   0000001
+#define ICANON 0000002
+#define XCASE  0000004
+#define ECHO   0000010
+#define ECHOE  0000020
+#define ECHOK  0000040
+#define ECHONL 0000100
+#define NOFLSH 0000200
+#define TOSTOP 0000400
+#define ECHOCTL        0001000
+#define ECHOPRT        0002000
+#define ECHOKE 0004000
+#define FLUSHO 0010000
+#define PENDIN 0040000
+#define IEXTEN 0100000
+
+/* tcflow() and TCXONC use these */
+#define        TCOOFF          0
+#define        TCOON           1
+#define        TCIOFF          2
+#define        TCION           3
+
+/* tcflush() and TCFLSH use these */
+#define        TCIFLUSH        0
+#define        TCOFLUSH        1
+#define        TCIOFLUSH       2
+
+/* tcsetattr uses these */
+#define        TCSANOW         0
+#define        TCSADRAIN       1
+#define        TCSAFLUSH       2
+
+#endif
diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/termios.h b/xenolinux-2.4.16-sparse/include/asm-xeno/termios.h
new file mode 100644 (file)
index 0000000..c4cc5c8
--- /dev/null
@@ -0,0 +1,106 @@
+#ifndef _I386_TERMIOS_H
+#define _I386_TERMIOS_H
+
+#include <asm/termbits.h>
+#include <asm/ioctls.h>
+
+struct winsize {
+       unsigned short ws_row;
+       unsigned short ws_col;
+       unsigned short ws_xpixel;
+       unsigned short ws_ypixel;
+};
+
+#define NCC 8
+struct termio {
+       unsigned short c_iflag;         /* input mode flags */
+       unsigned short c_oflag;         /* output mode flags */
+       unsigned short c_cflag;         /* control mode flags */
+       unsigned short c_lflag;         /* local mode flags */
+       unsigned char c_line;           /* line discipline */
+       unsigned char c_cc[NCC];        /* control characters */
+};
+
+/* modem lines */
+#define TIOCM_LE       0x001
+#define TIOCM_DTR      0x002
+#define TIOCM_RTS      0x004
+#define TIOCM_ST       0x008
+#define TIOCM_SR       0x010
+#define TIOCM_CTS      0x020
+#define TIOCM_CAR      0x040
+#define TIOCM_RNG      0x080
+#define TIOCM_DSR      0x100
+#define TIOCM_CD       TIOCM_CAR
+#define TIOCM_RI       TIOCM_RNG
+#define TIOCM_OUT1     0x2000
+#define TIOCM_OUT2     0x4000
+#define TIOCM_LOOP     0x8000
+
+/* ioctl (fd, TIOCSERGETLSR, &result) where result may be as below */
+
+/* line disciplines */
+#define N_TTY          0
+#define N_SLIP         1
+#define N_MOUSE                2
+#define N_PPP          3
+#define N_STRIP                4
+#define N_AX25         5
+#define N_X25          6       /* X.25 async */
+#define N_6PACK                7
+#define N_MASC         8       /* Reserved for Mobitex module <kaz@cafe.net> */
+#define N_R3964                9       /* Reserved for Simatic R3964 module */
+#define N_PROFIBUS_FDL 10      /* Reserved for Profibus <Dave@mvhi.com> */
+#define N_IRDA         11      /* Linux IR - http://irda.sourceforge.net/ */
+#define N_SMSBLOCK     12      /* SMS block mode - for talking to GSM data cards about SMS messages */
+#define N_HDLC         13      /* synchronous HDLC */
+#define N_SYNC_PPP     14      /* synchronous PPP */
+#define N_HCI          15  /* Bluetooth HCI UART */
+
+#ifdef __KERNEL__
+
+/*     intr=^C         quit=^\         erase=del       kill=^U
+       eof=^D          vtime=\0        vmin=\1         sxtc=\0
+       start=^Q        stop=^S         susp=^Z         eol=\0
+       reprint=^R      discard=^U      werase=^W       lnext=^V
+       eol2=\0
+*/
+#define INIT_C_CC "\003\034\177\025\004\0\1\0\021\023\032\0\022\017\027\026\0"
+
+/*
+ * Translate a "termio" structure into a "termios". Ugh.
+ */
+#define SET_LOW_TERMIOS_BITS(termios, termio, x) { \
+       unsigned short __tmp; \
+       get_user(__tmp,&(termio)->x); \
+       *(unsigned short *) &(termios)->x = __tmp; \
+}
+
+#define user_termio_to_kernel_termios(termios, termio) \
+({ \
+       SET_LOW_TERMIOS_BITS(termios, termio, c_iflag); \
+       SET_LOW_TERMIOS_BITS(termios, termio, c_oflag); \
+       SET_LOW_TERMIOS_BITS(termios, termio, c_cflag); \
+       SET_LOW_TERMIOS_BITS(termios, termio, c_lflag); \
+       copy_from_user((termios)->c_cc, (termio)->c_cc, NCC); \
+})
+
+/*
+ * Translate a "termios" structure into a "termio". Ugh.
+ */
+#define kernel_termios_to_user_termio(termio, termios) \
+({ \
+       put_user((termios)->c_iflag, &(termio)->c_iflag); \
+       put_user((termios)->c_oflag, &(termio)->c_oflag); \
+       put_user((termios)->c_cflag, &(termio)->c_cflag); \
+       put_user((termios)->c_lflag, &(termio)->c_lflag); \
+       put_user((termios)->c_line,  &(termio)->c_line); \
+       copy_to_user((termio)->c_cc, (termios)->c_cc, NCC); \
+})
+
+#define user_termios_to_kernel_termios(k, u) copy_from_user(k, u, sizeof(struct termios))
+#define kernel_termios_to_user_termios(u, k) copy_to_user(u, k, sizeof(struct termios))
+
+#endif /* __KERNEL__ */
+
+#endif /* _I386_TERMIOS_H */
diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/timex.h b/xenolinux-2.4.16-sparse/include/asm-xeno/timex.h
new file mode 100644 (file)
index 0000000..97099dd
--- /dev/null
@@ -0,0 +1,50 @@
+/*
+ * linux/include/asm-i386/timex.h
+ *
+ * i386 architecture timex specifications
+ */
+#ifndef _ASMi386_TIMEX_H
+#define _ASMi386_TIMEX_H
+
+#include <linux/config.h>
+#include <asm/msr.h>
+
+#define CLOCK_TICK_RATE        1193180 /* Underlying HZ */
+#define CLOCK_TICK_FACTOR      20      /* Factor of both 1000000 and CLOCK_TICK_RATE */
+#define FINETUNE ((((((long)LATCH * HZ - CLOCK_TICK_RATE) << SHIFT_HZ) * \
+       (1000000/CLOCK_TICK_FACTOR) / (CLOCK_TICK_RATE/CLOCK_TICK_FACTOR)) \
+               << (SHIFT_SCALE-SHIFT_HZ)) / HZ)
+
+/*
+ * Standard way to access the cycle counter on i586+ CPUs.
+ * Currently only used on SMP.
+ *
+ * If you really have a SMP machine with i486 chips or older,
+ * compile for that, and this will just always return zero.
+ * That's ok, it just means that the nicer scheduling heuristics
+ * won't work for you.
+ *
+ * We only use the low 32 bits, and we'd simply better make sure
+ * that we reschedule before that wraps. Scheduling at least every
+ * four billion cycles just basically sounds like a good idea,
+ * regardless of how fast the machine is. 
+ */
+typedef unsigned long long cycles_t;
+
+extern cycles_t cacheflush_time;
+
+static inline cycles_t get_cycles (void)
+{
+#ifndef CONFIG_X86_TSC
+       return 0;
+#else
+       unsigned long long ret;
+
+       rdtscll(ret);
+       return ret;
+#endif
+}
+
+extern unsigned long cpu_khz;
+
+#endif
diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/tlb.h b/xenolinux-2.4.16-sparse/include/asm-xeno/tlb.h
new file mode 100644 (file)
index 0000000..69c0faa
--- /dev/null
@@ -0,0 +1 @@
+#include <asm-generic/tlb.h>
diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/types.h b/xenolinux-2.4.16-sparse/include/asm-xeno/types.h
new file mode 100644 (file)
index 0000000..238635a
--- /dev/null
@@ -0,0 +1,57 @@
+#ifndef _I386_TYPES_H
+#define _I386_TYPES_H
+
+typedef unsigned short umode_t;
+
+/*
+ * __xx is ok: it doesn't pollute the POSIX namespace. Use these in the
+ * header files exported to user space
+ */
+
+typedef __signed__ char __s8;
+typedef unsigned char __u8;
+
+typedef __signed__ short __s16;
+typedef unsigned short __u16;
+
+typedef __signed__ int __s32;
+typedef unsigned int __u32;
+
+#if defined(__GNUC__) && !defined(__STRICT_ANSI__)
+typedef __signed__ long long __s64;
+typedef unsigned long long __u64;
+#endif
+
+/*
+ * These aren't exported outside the kernel to avoid name space clashes
+ */
+#ifdef __KERNEL__
+
+#include <linux/config.h>
+
+typedef signed char s8;
+typedef unsigned char u8;
+
+typedef signed short s16;
+typedef unsigned short u16;
+
+typedef signed int s32;
+typedef unsigned int u32;
+
+typedef signed long long s64;
+typedef unsigned long long u64;
+
+#define BITS_PER_LONG 32
+
+/* DMA addresses come in generic and 64-bit flavours.  */
+
+#ifdef CONFIG_HIGHMEM
+typedef u64 dma_addr_t;
+#else
+typedef u32 dma_addr_t;
+#endif
+typedef u64 dma64_addr_t;
+
+#endif /* __KERNEL__ */
+
+#endif
diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/uaccess.h b/xenolinux-2.4.16-sparse/include/asm-xeno/uaccess.h
new file mode 100644 (file)
index 0000000..ecc6693
--- /dev/null
@@ -0,0 +1,606 @@
+#ifndef __i386_UACCESS_H
+#define __i386_UACCESS_H
+
+/*
+ * User space memory access functions
+ */
+#include <linux/config.h>
+#include <linux/sched.h>
+#include <linux/prefetch.h>
+#include <asm/page.h>
+
+#define VERIFY_READ 0
+#define VERIFY_WRITE 1
+
+/*
+ * The fs value determines whether argument validity checking should be
+ * performed or not.  If get_fs() == USER_DS, checking is performed, with
+ * get_fs() == KERNEL_DS, checking is bypassed.
+ *
+ * For historical reasons, these macros are grossly misnamed.
+ */
+
+#define MAKE_MM_SEG(s) ((mm_segment_t) { (s) })
+
+
+#define KERNEL_DS      MAKE_MM_SEG(0xFFFFFFFF)
+#define USER_DS                MAKE_MM_SEG(PAGE_OFFSET)
+
+#define get_ds()       (KERNEL_DS)
+#define get_fs()       (current->addr_limit)
+#define set_fs(x)      (current->addr_limit = (x))
+
+#define segment_eq(a,b)        ((a).seg == (b).seg)
+
+extern int __verify_write(const void *, unsigned long);
+
+#define __addr_ok(addr) ((unsigned long)(addr) < (current->addr_limit.seg))
+
+/*
+ * Uhhuh, this needs 33-bit arithmetic. We have a carry..
+ */
+#define __range_ok(addr,size) ({ \
+       unsigned long flag,sum; \
+       asm("addl %3,%1 ; sbbl %0,%0; cmpl %1,%4; sbbl $0,%0" \
+               :"=&r" (flag), "=r" (sum) \
+               :"1" (addr),"g" ((int)(size)),"g" (current->addr_limit.seg)); \
+       flag; })
+
+#ifdef CONFIG_X86_WP_WORKS_OK
+
+#define access_ok(type,addr,size) (__range_ok(addr,size) == 0)
+
+#else
+
+#define access_ok(type,addr,size) ( (__range_ok(addr,size) == 0) && \
+                        ((type) == VERIFY_READ || boot_cpu_data.wp_works_ok || \
+                        segment_eq(get_fs(),KERNEL_DS) || \
+                         __verify_write((void *)(addr),(size))))
+
+#endif
+
+static inline int verify_area(int type, const void * addr, unsigned long size)
+{
+       return access_ok(type,addr,size) ? 0 : -EFAULT;
+}
+
+
+/*
+ * The exception table consists of pairs of addresses: the first is the
+ * address of an instruction that is allowed to fault, and the second is
+ * the address at which the program should continue.  No registers are
+ * modified, so it is entirely up to the continuation code to figure out
+ * what to do.
+ *
+ * All the routines below use bits of fixup code that are out of line
+ * with the main instruction path.  This means when everything is well,
+ * we don't even have to jump over them.  Further, they do not intrude
+ * on our cache or tlb entries.
+ */
+
+struct exception_table_entry
+{
+       unsigned long insn, fixup;
+};
+
+/* Returns 0 if exception not found and fixup otherwise.  */
+extern unsigned long search_exception_table(unsigned long);
+
+
+/*
+ * These are the main single-value transfer routines.  They automatically
+ * use the right size if we just have the right pointer type.
+ *
+ * This gets kind of ugly. We want to return _two_ values in "get_user()"
+ * and yet we don't want to do any pointers, because that is too much
+ * of a performance impact. Thus we have a few rather ugly macros here,
+ * and hide all the uglyness from the user.
+ *
+ * The "__xxx" versions of the user access functions are versions that
+ * do not verify the address space, that must have been done previously
+ * with a separate "access_ok()" call (this is used when we do multiple
+ * accesses to the same area of user memory).
+ */
+
+extern void __get_user_1(void);
+extern void __get_user_2(void);
+extern void __get_user_4(void);
+
+#define __get_user_x(size,ret,x,ptr) \
+       __asm__ __volatile__("call __get_user_" #size \
+               :"=a" (ret),"=d" (x) \
+               :"0" (ptr))
+
+/* Careful: we have to cast the result to the type of the pointer for sign reasons */
+#define get_user(x,ptr)                                                        \
+({     int __ret_gu,__val_gu;                                          \
+       switch(sizeof (*(ptr))) {                                       \
+       case 1:  __get_user_x(1,__ret_gu,__val_gu,ptr); break;          \
+       case 2:  __get_user_x(2,__ret_gu,__val_gu,ptr); break;          \
+       case 4:  __get_user_x(4,__ret_gu,__val_gu,ptr); break;          \
+       default: __get_user_x(X,__ret_gu,__val_gu,ptr); break;          \
+       }                                                               \
+       (x) = (__typeof__(*(ptr)))__val_gu;                             \
+       __ret_gu;                                                       \
+})
+
+extern void __put_user_1(void);
+extern void __put_user_2(void);
+extern void __put_user_4(void);
+extern void __put_user_8(void);
+
+extern void __put_user_bad(void);
+
+#define put_user(x,ptr)                                                        \
+  __put_user_check((__typeof__(*(ptr)))(x),(ptr),sizeof(*(ptr)))
+
+#define __get_user(x,ptr) \
+  __get_user_nocheck((x),(ptr),sizeof(*(ptr)))
+#define __put_user(x,ptr) \
+  __put_user_nocheck((__typeof__(*(ptr)))(x),(ptr),sizeof(*(ptr)))
+
+#define __put_user_nocheck(x,ptr,size)                 \
+({                                                     \
+       long __pu_err;                                  \
+       __put_user_size((x),(ptr),(size),__pu_err);     \
+       __pu_err;                                       \
+})
+
+
+#define __put_user_check(x,ptr,size)                   \
+({                                                     \
+       long __pu_err = -EFAULT;                                        \
+       __typeof__(*(ptr)) *__pu_addr = (ptr);          \
+       if (access_ok(VERIFY_WRITE,__pu_addr,size))     \
+               __put_user_size((x),__pu_addr,(size),__pu_err); \
+       __pu_err;                                       \
+})                                                     
+
+#define __put_user_u64(x, addr, err)                           \
+       __asm__ __volatile__(                                   \
+               "1:     movl %%eax,0(%2)\n"                     \
+               "2:     movl %%edx,4(%2)\n"                     \
+               "3:\n"                                          \
+               ".section .fixup,\"ax\"\n"                      \
+               "4:     movl %3,%0\n"                           \
+               "       jmp 3b\n"                               \
+               ".previous\n"                                   \
+               ".section __ex_table,\"a\"\n"                   \
+               "       .align 4\n"                             \
+               "       .long 1b,4b\n"                          \
+               "       .long 2b,4b\n"                          \
+               ".previous"                                     \
+               : "=r"(err)                                     \
+               : "A" (x), "r" (addr), "i"(-EFAULT), "0"(err))
+
+#define __put_user_size(x,ptr,size,retval)                             \
+do {                                                                   \
+       retval = 0;                                                     \
+       switch (size) {                                                 \
+         case 1: __put_user_asm(x,ptr,retval,"b","b","iq"); break;     \
+         case 2: __put_user_asm(x,ptr,retval,"w","w","ir"); break;     \
+         case 4: __put_user_asm(x,ptr,retval,"l","","ir"); break;      \
+         case 8: __put_user_u64(x,ptr,retval); break;                  \
+         default: __put_user_bad();                                    \
+       }                                                               \
+} while (0)
+
+struct __large_struct { unsigned long buf[100]; };
+#define __m(x) (*(struct __large_struct *)(x))
+
+/*
+ * Tell gcc we read from memory instead of writing: this is because
+ * we do not write to any memory gcc knows about, so there are no
+ * aliasing issues.
+ */
+#define __put_user_asm(x, addr, err, itype, rtype, ltype)      \
+       __asm__ __volatile__(                                   \
+               "1:     mov"itype" %"rtype"1,%2\n"              \
+               "2:\n"                                          \
+               ".section .fixup,\"ax\"\n"                      \
+               "3:     movl %3,%0\n"                           \
+               "       jmp 2b\n"                               \
+               ".previous\n"                                   \
+               ".section __ex_table,\"a\"\n"                   \
+               "       .align 4\n"                             \
+               "       .long 1b,3b\n"                          \
+               ".previous"                                     \
+               : "=r"(err)                                     \
+               : ltype (x), "m"(__m(addr)), "i"(-EFAULT), "0"(err))
+
+
+#define __get_user_nocheck(x,ptr,size)                         \
+({                                                             \
+       long __gu_err, __gu_val;                                \
+       __get_user_size(__gu_val,(ptr),(size),__gu_err);        \
+       (x) = (__typeof__(*(ptr)))__gu_val;                     \
+       __gu_err;                                               \
+})
+
+extern long __get_user_bad(void);
+
+#define __get_user_size(x,ptr,size,retval)                             \
+do {                                                                   \
+       retval = 0;                                                     \
+       switch (size) {                                                 \
+         case 1: __get_user_asm(x,ptr,retval,"b","b","=q"); break;     \
+         case 2: __get_user_asm(x,ptr,retval,"w","w","=r"); break;     \
+         case 4: __get_user_asm(x,ptr,retval,"l","","=r"); break;      \
+         default: (x) = __get_user_bad();                              \
+       }                                                               \
+} while (0)
+
+#define __get_user_asm(x, addr, err, itype, rtype, ltype)      \
+       __asm__ __volatile__(                                   \
+               "1:     mov"itype" %2,%"rtype"1\n"              \
+               "2:\n"                                          \
+               ".section .fixup,\"ax\"\n"                      \
+               "3:     movl %3,%0\n"                           \
+               "       xor"itype" %"rtype"1,%"rtype"1\n"       \
+               "       jmp 2b\n"                               \
+               ".previous\n"                                   \
+               ".section __ex_table,\"a\"\n"                   \
+               "       .align 4\n"                             \
+               "       .long 1b,3b\n"                          \
+               ".previous"                                     \
+               : "=r"(err), ltype (x)                          \
+               : "m"(__m(addr)), "i"(-EFAULT), "0"(err))
+
+
+/*
+ * Copy To/From Userspace
+ */
+
+/* Generic arbitrary sized copy.  */
+#define __copy_user(to,from,size)                                      \
+do {                                                                   \
+       int __d0, __d1;                                                 \
+       __asm__ __volatile__(                                           \
+               "0:     rep; movsl\n"                                   \
+               "       movl %3,%0\n"                                   \
+               "1:     rep; movsb\n"                                   \
+               "2:\n"                                                  \
+               ".section .fixup,\"ax\"\n"                              \
+               "3:     lea 0(%3,%0,4),%0\n"                            \
+               "       jmp 2b\n"                                       \
+               ".previous\n"                                           \
+               ".section __ex_table,\"a\"\n"                           \
+               "       .align 4\n"                                     \
+               "       .long 0b,3b\n"                                  \
+               "       .long 1b,2b\n"                                  \
+               ".previous"                                             \
+               : "=&c"(size), "=&D" (__d0), "=&S" (__d1)               \
+               : "r"(size & 3), "0"(size / 4), "1"(to), "2"(from)      \
+               : "memory");                                            \
+} while (0)
+
+#define __copy_user_zeroing(to,from,size)                              \
+do {                                                                   \
+       int __d0, __d1;                                                 \
+       __asm__ __volatile__(                                           \
+               "0:     rep; movsl\n"                                   \
+               "       movl %3,%0\n"                                   \
+               "1:     rep; movsb\n"                                   \
+               "2:\n"                                                  \
+               ".section .fixup,\"ax\"\n"                              \
+               "3:     lea 0(%3,%0,4),%0\n"                            \
+               "4:     pushl %0\n"                                     \
+               "       pushl %%eax\n"                                  \
+               "       xorl %%eax,%%eax\n"                             \
+               "       rep; stosb\n"                                   \
+               "       popl %%eax\n"                                   \
+               "       popl %0\n"                                      \
+               "       jmp 2b\n"                                       \
+               ".previous\n"                                           \
+               ".section __ex_table,\"a\"\n"                           \
+               "       .align 4\n"                                     \
+               "       .long 0b,3b\n"                                  \
+               "       .long 1b,4b\n"                                  \
+               ".previous"                                             \
+               : "=&c"(size), "=&D" (__d0), "=&S" (__d1)               \
+               : "r"(size & 3), "0"(size / 4), "1"(to), "2"(from)      \
+               : "memory");                                            \
+} while (0)
+
+/* We let the __ versions of copy_from/to_user inline, because they're often
+ * used in fast paths and have only a small space overhead.
+ */
+static inline unsigned long
+__generic_copy_from_user_nocheck(void *to, const void *from, unsigned long n)
+{
+       __copy_user_zeroing(to,from,n);
+       return n;
+}
+
+static inline unsigned long
+__generic_copy_to_user_nocheck(void *to, const void *from, unsigned long n)
+{
+       __copy_user(to,from,n);
+       return n;
+}
+
+
+/* Optimize just a little bit when we know the size of the move. */
+#define __constant_copy_user(to, from, size)                   \
+do {                                                           \
+       int __d0, __d1;                                         \
+       switch (size & 3) {                                     \
+       default:                                                \
+               __asm__ __volatile__(                           \
+                       "0:     rep; movsl\n"                   \
+                       "1:\n"                                  \
+                       ".section .fixup,\"ax\"\n"              \
+                       "2:     shl $2,%0\n"                    \
+                       "       jmp 1b\n"                       \
+                       ".previous\n"                           \
+                       ".section __ex_table,\"a\"\n"           \
+                       "       .align 4\n"                     \
+                       "       .long 0b,2b\n"                  \
+                       ".previous"                             \
+                       : "=c"(size), "=&S" (__d0), "=&D" (__d1)\
+                       : "1"(from), "2"(to), "0"(size/4)       \
+                       : "memory");                            \
+               break;                                          \
+       case 1:                                                 \
+               __asm__ __volatile__(                           \
+                       "0:     rep; movsl\n"                   \
+                       "1:     movsb\n"                        \
+                       "2:\n"                                  \
+                       ".section .fixup,\"ax\"\n"              \
+                       "3:     shl $2,%0\n"                    \
+                       "4:     incl %0\n"                      \
+                       "       jmp 2b\n"                       \
+                       ".previous\n"                           \
+                       ".section __ex_table,\"a\"\n"           \
+                       "       .align 4\n"                     \
+                       "       .long 0b,3b\n"                  \
+                       "       .long 1b,4b\n"                  \
+                       ".previous"                             \
+                       : "=c"(size), "=&S" (__d0), "=&D" (__d1)\
+                       : "1"(from), "2"(to), "0"(size/4)       \
+                       : "memory");                            \
+               break;                                          \
+       case 2:                                                 \
+               __asm__ __volatile__(                           \
+                       "0:     rep; movsl\n"                   \
+                       "1:     movsw\n"                        \
+                       "2:\n"                                  \
+                       ".section .fixup,\"ax\"\n"              \
+                       "3:     shl $2,%0\n"                    \
+                       "4:     addl $2,%0\n"                   \
+                       "       jmp 2b\n"                       \
+                       ".previous\n"                           \
+                       ".section __ex_table,\"a\"\n"           \
+                       "       .align 4\n"                     \
+                       "       .long 0b,3b\n"                  \
+                       "       .long 1b,4b\n"                  \
+                       ".previous"                             \
+                       : "=c"(size), "=&S" (__d0), "=&D" (__d1)\
+                       : "1"(from), "2"(to), "0"(size/4)       \
+                       : "memory");                            \
+               break;                                          \
+       case 3:                                                 \
+               __asm__ __volatile__(                           \
+                       "0:     rep; movsl\n"                   \
+                       "1:     movsw\n"                        \
+                       "2:     movsb\n"                        \
+                       "3:\n"                                  \
+                       ".section .fixup,\"ax\"\n"              \
+                       "4:     shl $2,%0\n"                    \
+                       "5:     addl $2,%0\n"                   \
+                       "6:     incl %0\n"                      \
+                       "       jmp 3b\n"                       \
+                       ".previous\n"                           \
+                       ".section __ex_table,\"a\"\n"           \
+                       "       .align 4\n"                     \
+                       "       .long 0b,4b\n"                  \
+                       "       .long 1b,5b\n"                  \
+                       "       .long 2b,6b\n"                  \
+                       ".previous"                             \
+                       : "=c"(size), "=&S" (__d0), "=&D" (__d1)\
+                       : "1"(from), "2"(to), "0"(size/4)       \
+                       : "memory");                            \
+               break;                                          \
+       }                                                       \
+} while (0)
+
+/* Optimize just a little bit when we know the size of the move. */
+#define __constant_copy_user_zeroing(to, from, size)           \
+do {                                                           \
+       int __d0, __d1;                                         \
+       switch (size & 3) {                                     \
+       default:                                                \
+               __asm__ __volatile__(                           \
+                       "0:     rep; movsl\n"                   \
+                       "1:\n"                                  \
+                       ".section .fixup,\"ax\"\n"              \
+                       "2:     pushl %0\n"                     \
+                       "       pushl %%eax\n"                  \
+                       "       xorl %%eax,%%eax\n"             \
+                       "       rep; stosl\n"                   \
+                       "       popl %%eax\n"                   \
+                       "       popl %0\n"                      \
+                       "       shl $2,%0\n"                    \
+                       "       jmp 1b\n"                       \
+                       ".previous\n"                           \
+                       ".section __ex_table,\"a\"\n"           \
+                       "       .align 4\n"                     \
+                       "       .long 0b,2b\n"                  \
+                       ".previous"                             \
+                       : "=c"(size), "=&S" (__d0), "=&D" (__d1)\
+                       : "1"(from), "2"(to), "0"(size/4)       \
+                       : "memory");                            \
+               break;                                          \
+       case 1:                                                 \
+               __asm__ __volatile__(                           \
+                       "0:     rep; movsl\n"                   \
+                       "1:     movsb\n"                        \
+                       "2:\n"                                  \
+                       ".section .fixup,\"ax\"\n"              \
+                       "3:     pushl %0\n"                     \
+                       "       pushl %%eax\n"                  \
+                       "       xorl %%eax,%%eax\n"             \
+                       "       rep; stosl\n"                   \
+                       "       stosb\n"                        \
+                       "       popl %%eax\n"                   \
+                       "       popl %0\n"                      \
+                       "       shl $2,%0\n"                    \
+                       "       incl %0\n"                      \
+                       "       jmp 2b\n"                       \
+                       "4:     pushl %%eax\n"                  \
+                       "       xorl %%eax,%%eax\n"             \
+                       "       stosb\n"                        \
+                       "       popl %%eax\n"                   \
+                       "       incl %0\n"                      \
+                       "       jmp 2b\n"                       \
+                       ".previous\n"                           \
+                       ".section __ex_table,\"a\"\n"           \
+                       "       .align 4\n"                     \
+                       "       .long 0b,3b\n"                  \
+                       "       .long 1b,4b\n"                  \
+                       ".previous"                             \
+                       : "=c"(size), "=&S" (__d0), "=&D" (__d1)\
+                       : "1"(from), "2"(to), "0"(size/4)       \
+                       : "memory");                            \
+               break;                                          \
+       case 2:                                                 \
+               __asm__ __volatile__(                           \
+                       "0:     rep; movsl\n"                   \
+                       "1:     movsw\n"                        \
+                       "2:\n"                                  \
+                       ".section .fixup,\"ax\"\n"              \
+                       "3:     pushl %0\n"                     \
+                       "       pushl %%eax\n"                  \
+                       "       xorl %%eax,%%eax\n"             \
+                       "       rep; stosl\n"                   \
+                       "       stosw\n"                        \
+                       "       popl %%eax\n"                   \
+                       "       popl %0\n"                      \
+                       "       shl $2,%0\n"                    \
+                       "       addl $2,%0\n"                   \
+                       "       jmp 2b\n"                       \
+                       "4:     pushl %%eax\n"                  \
+                       "       xorl %%eax,%%eax\n"             \
+                       "       stosw\n"                        \
+                       "       popl %%eax\n"                   \
+                       "       addl $2,%0\n"                   \
+                       "       jmp 2b\n"                       \
+                       ".previous\n"                           \
+                       ".section __ex_table,\"a\"\n"           \
+                       "       .align 4\n"                     \
+                       "       .long 0b,3b\n"                  \
+                       "       .long 1b,4b\n"                  \
+                       ".previous"                             \
+                       : "=c"(size), "=&S" (__d0), "=&D" (__d1)\
+                       : "1"(from), "2"(to), "0"(size/4)       \
+                       : "memory");                            \
+               break;                                          \
+       case 3:                                                 \
+               __asm__ __volatile__(                           \
+                       "0:     rep; movsl\n"                   \
+                       "1:     movsw\n"                        \
+                       "2:     movsb\n"                        \
+                       "3:\n"                                  \
+                       ".section .fixup,\"ax\"\n"              \
+                       "4:     pushl %0\n"                     \
+                       "       pushl %%eax\n"                  \
+                       "       xorl %%eax,%%eax\n"             \
+                       "       rep; stosl\n"                   \
+                       "       stosw\n"                        \
+                       "       stosb\n"                        \
+                       "       popl %%eax\n"                   \
+                       "       popl %0\n"                      \
+                       "       shl $2,%0\n"                    \
+                       "       addl $3,%0\n"                   \
+                       "       jmp 2b\n"                       \
+                       "5:     pushl %%eax\n"                  \
+                       "       xorl %%eax,%%eax\n"             \
+                       "       stosw\n"                        \
+                       "       stosb\n"                        \
+                       "       popl %%eax\n"                   \
+                       "       addl $3,%0\n"                   \
+                       "       jmp 2b\n"                       \
+                       "6:     pushl %%eax\n"                  \
+                       "       xorl %%eax,%%eax\n"             \
+                       "       stosb\n"                        \
+                       "       popl %%eax\n"                   \
+                       "       incl %0\n"                      \
+                       "       jmp 3b\n"                       \
+                       ".previous\n"                           \
+                       ".section __ex_table,\"a\"\n"           \
+                       "       .align 4\n"                     \
+                       "       .long 0b,4b\n"                  \
+                       "       .long 1b,5b\n"                  \
+                       "       .long 2b,6b\n"                  \
+                       ".previous"                             \
+                       : "=c"(size), "=&S" (__d0), "=&D" (__d1)\
+                       : "1"(from), "2"(to), "0"(size/4)       \
+                       : "memory");                            \
+               break;                                          \
+       }                                                       \
+} while (0)
+
+unsigned long __generic_copy_to_user(void *, const void *, unsigned long);
+unsigned long __generic_copy_from_user(void *, const void *, unsigned long);
+
+static inline unsigned long
+__constant_copy_to_user(void *to, const void *from, unsigned long n)
+{
+       prefetch(from);
+       if (access_ok(VERIFY_WRITE, to, n))
+               __constant_copy_user(to,from,n);
+       return n;
+}
+
+static inline unsigned long
+__constant_copy_from_user(void *to, const void *from, unsigned long n)
+{
+       if (access_ok(VERIFY_READ, from, n))
+               __constant_copy_user_zeroing(to,from,n);
+       else
+               memset(to, 0, n);
+       return n;
+}
+
+static inline unsigned long
+__constant_copy_to_user_nocheck(void *to, const void *from, unsigned long n)
+{
+       __constant_copy_user(to,from,n);
+       return n;
+}
+
+static inline unsigned long
+__constant_copy_from_user_nocheck(void *to, const void *from, unsigned long n)
+{
+       __constant_copy_user_zeroing(to,from,n);
+       return n;
+}
+
+#define copy_to_user(to,from,n)                                \
+       (__builtin_constant_p(n) ?                      \
+        __constant_copy_to_user((to),(from),(n)) :     \
+        __generic_copy_to_user((to),(from),(n)))
+
+#define copy_from_user(to,from,n)                      \
+       (__builtin_constant_p(n) ?                      \
+        __constant_copy_from_user((to),(from),(n)) :   \
+        __generic_copy_from_user((to),(from),(n)))
+
+#define __copy_to_user(to,from,n)                      \
+       (__builtin_constant_p(n) ?                      \
+        __constant_copy_to_user_nocheck((to),(from),(n)) :     \
+        __generic_copy_to_user_nocheck((to),(from),(n)))
+
+#define __copy_from_user(to,from,n)                    \
+       (__builtin_constant_p(n) ?                      \
+        __constant_copy_from_user_nocheck((to),(from),(n)) :   \
+        __generic_copy_from_user_nocheck((to),(from),(n)))
+
+long strncpy_from_user(char *dst, const char *src, long count);
+long __strncpy_from_user(char *dst, const char *src, long count);
+#define strlen_user(str) strnlen_user(str, ~0UL >> 1)
+long strnlen_user(const char *str, long n);
+unsigned long clear_user(void *mem, unsigned long len);
+unsigned long __clear_user(void *mem, unsigned long len);
+
+#endif /* __i386_UACCESS_H */
diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/ucontext.h b/xenolinux-2.4.16-sparse/include/asm-xeno/ucontext.h
new file mode 100644 (file)
index 0000000..b0db369
--- /dev/null
@@ -0,0 +1,12 @@
+#ifndef _ASMi386_UCONTEXT_H
+#define _ASMi386_UCONTEXT_H
+
+struct ucontext {
+       unsigned long     uc_flags;
+       struct ucontext  *uc_link;
+       stack_t           uc_stack;
+       struct sigcontext uc_mcontext;
+       sigset_t          uc_sigmask;   /* mask last for extensibility */
+};
+
+#endif /* !_ASMi386_UCONTEXT_H */
diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/unaligned.h b/xenolinux-2.4.16-sparse/include/asm-xeno/unaligned.h
new file mode 100644 (file)
index 0000000..7acd795
--- /dev/null
@@ -0,0 +1,37 @@
+#ifndef __I386_UNALIGNED_H
+#define __I386_UNALIGNED_H
+
+/*
+ * The i386 can do unaligned accesses itself. 
+ *
+ * The strange macros are there to make sure these can't
+ * be misused in a way that makes them not work on other
+ * architectures where unaligned accesses aren't as simple.
+ */
+
+/**
+ * get_unaligned - get value from possibly mis-aligned location
+ * @ptr: pointer to value
+ *
+ * This macro should be used for accessing values larger in size than 
+ * single bytes at locations that are expected to be improperly aligned, 
+ * e.g. retrieving a u16 value from a location not u16-aligned.
+ *
+ * Note that unaligned accesses can be very expensive on some architectures.
+ */
+#define get_unaligned(ptr) (*(ptr))
+
+/**
+ * put_unaligned - put value to a possibly mis-aligned location
+ * @val: value to place
+ * @ptr: pointer to location
+ *
+ * This macro should be used for placing values larger in size than 
+ * single bytes at locations that are expected to be improperly aligned, 
+ * e.g. writing a u16 value to a location not u16-aligned.
+ *
+ * Note that unaligned accesses can be very expensive on some architectures.
+ */
+#define put_unaligned(val, ptr) ((void)( *(ptr) = (val) ))
+
+#endif
diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/unistd.h b/xenolinux-2.4.16-sparse/include/asm-xeno/unistd.h
new file mode 100644 (file)
index 0000000..36de103
--- /dev/null
@@ -0,0 +1,359 @@
+#ifndef _ASM_I386_UNISTD_H_
+#define _ASM_I386_UNISTD_H_
+
+/*
+ * This file contains the system call numbers.
+ */
+
+#define __NR_exit                1
+#define __NR_fork                2
+#define __NR_read                3
+#define __NR_write               4
+#define __NR_open                5
+#define __NR_close               6
+#define __NR_waitpid             7
+#define __NR_creat               8
+#define __NR_link                9
+#define __NR_unlink             10
+#define __NR_execve             11
+#define __NR_chdir              12
+#define __NR_time               13
+#define __NR_mknod              14
+#define __NR_chmod              15
+#define __NR_lchown             16
+#define __NR_break              17
+#define __NR_oldstat            18
+#define __NR_lseek              19
+#define __NR_getpid             20
+#define __NR_mount              21
+#define __NR_umount             22
+#define __NR_setuid             23
+#define __NR_getuid             24
+#define __NR_stime              25
+#define __NR_ptrace             26
+#define __NR_alarm              27
+#define __NR_oldfstat           28
+#define __NR_pause              29
+#define __NR_utime              30
+#define __NR_stty               31
+#define __NR_gtty               32
+#define __NR_access             33
+#define __NR_nice               34
+#define __NR_ftime              35
+#define __NR_sync               36
+#define __NR_kill               37
+#define __NR_rename             38
+#define __NR_mkdir              39
+#define __NR_rmdir              40
+#define __NR_dup                41
+#define __NR_pipe               42
+#define __NR_times              43
+#define __NR_prof               44
+#define __NR_brk                45
+#define __NR_setgid             46
+#define __NR_getgid             47
+#define __NR_signal             48
+#define __NR_geteuid            49
+#define __NR_getegid            50
+#define __NR_acct               51
+#define __NR_umount2            52
+#define __NR_lock               53
+#define __NR_ioctl              54
+#define __NR_fcntl              55
+#define __NR_mpx                56
+#define __NR_setpgid            57
+#define __NR_ulimit             58
+#define __NR_oldolduname        59
+#define __NR_umask              60
+#define __NR_chroot             61
+#define __NR_ustat              62
+#define __NR_dup2               63
+#define __NR_getppid            64
+#define __NR_getpgrp            65
+#define __NR_setsid             66
+#define __NR_sigaction          67
+#define __NR_sgetmask           68
+#define __NR_ssetmask           69
+#define __NR_setreuid           70
+#define __NR_setregid           71
+#define __NR_sigsuspend                 72
+#define __NR_sigpending                 73
+#define __NR_sethostname        74
+#define __NR_setrlimit          75
+#define __NR_getrlimit          76     /* Back compatible 2Gig limited rlimit */
+#define __NR_getrusage          77
+#define __NR_gettimeofday       78
+#define __NR_settimeofday       79
+#define __NR_getgroups          80
+#define __NR_setgroups          81
+#define __NR_select             82
+#define __NR_symlink            83
+#define __NR_oldlstat           84
+#define __NR_readlink           85
+#define __NR_uselib             86
+#define __NR_swapon             87
+#define __NR_reboot             88
+#define __NR_readdir            89
+#define __NR_mmap               90
+#define __NR_munmap             91
+#define __NR_truncate           92
+#define __NR_ftruncate          93
+#define __NR_fchmod             94
+#define __NR_fchown             95
+#define __NR_getpriority        96
+#define __NR_setpriority        97
+#define __NR_profil             98
+#define __NR_statfs             99
+#define __NR_fstatfs           100
+#define __NR_ioperm            101
+#define __NR_socketcall                102
+#define __NR_syslog            103
+#define __NR_setitimer         104
+#define __NR_getitimer         105
+#define __NR_stat              106
+#define __NR_lstat             107
+#define __NR_fstat             108
+#define __NR_olduname          109
+#define __NR_iopl              110
+#define __NR_vhangup           111
+#define __NR_idle              112
+#define __NR_vm86old           113
+#define __NR_wait4             114
+#define __NR_swapoff           115
+#define __NR_sysinfo           116
+#define __NR_ipc               117
+#define __NR_fsync             118
+#define __NR_sigreturn         119
+#define __NR_clone             120
+#define __NR_setdomainname     121
+#define __NR_uname             122
+#define __NR_modify_ldt                123
+#define __NR_adjtimex          124
+#define __NR_mprotect          125
+#define __NR_sigprocmask       126
+#define __NR_create_module     127
+#define __NR_init_module       128
+#define __NR_delete_module     129
+#define __NR_get_kernel_syms   130
+#define __NR_quotactl          131
+#define __NR_getpgid           132
+#define __NR_fchdir            133
+#define __NR_bdflush           134
+#define __NR_sysfs             135
+#define __NR_personality       136
+#define __NR_afs_syscall       137 /* Syscall for Andrew File System */
+#define __NR_setfsuid          138
+#define __NR_setfsgid          139
+#define __NR__llseek           140
+#define __NR_getdents          141
+#define __NR__newselect                142
+#define __NR_flock             143
+#define __NR_msync             144
+#define __NR_readv             145
+#define __NR_writev            146
+#define __NR_getsid            147
+#define __NR_fdatasync         148
+#define __NR__sysctl           149
+#define __NR_mlock             150
+#define __NR_munlock           151
+#define __NR_mlockall          152
+#define __NR_munlockall                153
+#define __NR_sched_setparam            154
+#define __NR_sched_getparam            155
+#define __NR_sched_setscheduler                156
+#define __NR_sched_getscheduler                157
+#define __NR_sched_yield               158
+#define __NR_sched_get_priority_max    159
+#define __NR_sched_get_priority_min    160
+#define __NR_sched_rr_get_interval     161
+#define __NR_nanosleep         162
+#define __NR_mremap            163
+#define __NR_setresuid         164
+#define __NR_getresuid         165
+#define __NR_vm86              166
+#define __NR_query_module      167
+#define __NR_poll              168
+#define __NR_nfsservctl                169
+#define __NR_setresgid         170
+#define __NR_getresgid         171
+#define __NR_prctl              172
+#define __NR_rt_sigreturn      173
+#define __NR_rt_sigaction      174
+#define __NR_rt_sigprocmask    175
+#define __NR_rt_sigpending     176
+#define __NR_rt_sigtimedwait   177
+#define __NR_rt_sigqueueinfo   178
+#define __NR_rt_sigsuspend     179
+#define __NR_pread             180
+#define __NR_pwrite            181
+#define __NR_chown             182
+#define __NR_getcwd            183
+#define __NR_capget            184
+#define __NR_capset            185
+#define __NR_sigaltstack       186
+#define __NR_sendfile          187
+#define __NR_getpmsg           188     /* some people actually want streams */
+#define __NR_putpmsg           189     /* some people actually want streams */
+#define __NR_vfork             190
+#define __NR_ugetrlimit                191     /* SuS compliant getrlimit */
+#define __NR_mmap2             192
+#define __NR_truncate64                193
+#define __NR_ftruncate64       194
+#define __NR_stat64            195
+#define __NR_lstat64           196
+#define __NR_fstat64           197
+#define __NR_lchown32          198
+#define __NR_getuid32          199
+#define __NR_getgid32          200
+#define __NR_geteuid32         201
+#define __NR_getegid32         202
+#define __NR_setreuid32                203
+#define __NR_setregid32                204
+#define __NR_getgroups32       205
+#define __NR_setgroups32       206
+#define __NR_fchown32          207
+#define __NR_setresuid32       208
+#define __NR_getresuid32       209
+#define __NR_setresgid32       210
+#define __NR_getresgid32       211
+#define __NR_chown32           212
+#define __NR_setuid32          213
+#define __NR_setgid32          214
+#define __NR_setfsuid32                215
+#define __NR_setfsgid32                216
+#define __NR_pivot_root                217
+#define __NR_mincore           218
+#define __NR_madvise           219
+#define __NR_madvise1          219     /* delete when C lib stub is removed */
+#define __NR_getdents64                220
+#define __NR_fcntl64           221
+#define __NR_security          223     /* syscall for security modules */
+#define __NR_gettid            224
+#define __NR_readahead         225
+
+/* user-visible error numbers are in the range -1 - -124: see <asm-i386/errno.h> */
+
+#define __syscall_return(type, res) \
+do { \
+       if ((unsigned long)(res) >= (unsigned long)(-125)) { \
+               errno = -(res); \
+               res = -1; \
+       } \
+       return (type) (res); \
+} while (0)
+
+/* XXX - _foo needs to be __foo, while __NR_bar could be _NR_bar. */
+#define _syscall0(type,name) \
+type name(void) \
+{ \
+long __res; \
+__asm__ volatile ("int $0x80" \
+       : "=a" (__res) \
+       : "0" (__NR_##name)); \
+__syscall_return(type,__res); \
+}
+
+#define _syscall1(type,name,type1,arg1) \
+type name(type1 arg1) \
+{ \
+long __res; \
+__asm__ volatile ("int $0x80" \
+       : "=a" (__res) \
+       : "0" (__NR_##name),"b" ((long)(arg1))); \
+__syscall_return(type,__res); \
+}
+
+#define _syscall2(type,name,type1,arg1,type2,arg2) \
+type name(type1 arg1,type2 arg2) \
+{ \
+long __res; \
+__asm__ volatile ("int $0x80" \
+       : "=a" (__res) \
+       : "0" (__NR_##name),"b" ((long)(arg1)),"c" ((long)(arg2))); \
+__syscall_return(type,__res); \
+}
+
+#define _syscall3(type,name,type1,arg1,type2,arg2,type3,arg3) \
+type name(type1 arg1,type2 arg2,type3 arg3) \
+{ \
+long __res; \
+__asm__ volatile ("int $0x80" \
+       : "=a" (__res) \
+       : "0" (__NR_##name),"b" ((long)(arg1)),"c" ((long)(arg2)), \
+                 "d" ((long)(arg3))); \
+__syscall_return(type,__res); \
+}
+
+#define _syscall4(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4) \
+type name (type1 arg1, type2 arg2, type3 arg3, type4 arg4) \
+{ \
+long __res; \
+__asm__ volatile ("int $0x80" \
+       : "=a" (__res) \
+       : "0" (__NR_##name),"b" ((long)(arg1)),"c" ((long)(arg2)), \
+         "d" ((long)(arg3)),"S" ((long)(arg4))); \
+__syscall_return(type,__res); \
+} 
+
+#define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \
+         type5,arg5) \
+type name (type1 arg1,type2 arg2,type3 arg3,type4 arg4,type5 arg5) \
+{ \
+long __res; \
+__asm__ volatile ("int $0x80" \
+       : "=a" (__res) \
+       : "0" (__NR_##name),"b" ((long)(arg1)),"c" ((long)(arg2)), \
+         "d" ((long)(arg3)),"S" ((long)(arg4)),"D" ((long)(arg5))); \
+__syscall_return(type,__res); \
+}
+
+#define _syscall6(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \
+         type5,arg5,type6,arg6) \
+type name (type1 arg1,type2 arg2,type3 arg3,type4 arg4,type5 arg5,type6 arg6) \
+{ \
+long __res; \
+__asm__ volatile ("push %%ebp ; movl %%eax,%%ebp ; movl %1,%%eax ; int $0x80 ; pop %%ebp" \
+       : "=a" (__res) \
+       : "i" (__NR_##name),"b" ((long)(arg1)),"c" ((long)(arg2)), \
+         "d" ((long)(arg3)),"S" ((long)(arg4)),"D" ((long)(arg5)), \
+         "0" ((long)(arg6))); \
+__syscall_return(type,__res); \
+}
+
+#ifdef __KERNEL_SYSCALLS__
+
+/*
+ * we need this inline - forking from kernel space will result
+ * in NO COPY ON WRITE (!!!), until an execve is executed. This
+ * is no problem, but for the stack. This is handled by not letting
+ * main() use the stack at all after fork(). Thus, no function
+ * calls - which means inline code for fork too, as otherwise we
+ * would use the stack upon exit from 'fork()'.
+ *
+ * Actually only pause and fork are needed inline, so that there
+ * won't be any messing with the stack from main(), but we define
+ * some others too.
+ */
+#define __NR__exit __NR_exit
+static inline _syscall0(int,pause)
+static inline _syscall0(int,sync)
+static inline _syscall0(pid_t,setsid)
+static inline _syscall3(int,write,int,fd,const char *,buf,off_t,count)
+static inline _syscall3(int,read,int,fd,char *,buf,off_t,count)
+static inline _syscall3(off_t,lseek,int,fd,off_t,offset,int,count)
+static inline _syscall1(int,dup,int,fd)
+static inline _syscall3(int,execve,const char *,file,char **,argv,char **,envp)
+static inline _syscall3(int,open,const char *,file,int,flag,int,mode)
+static inline _syscall1(int,close,int,fd)
+static inline _syscall1(int,_exit,int,exitcode)
+static inline _syscall3(pid_t,waitpid,pid_t,pid,int *,wait_stat,int,options)
+static inline _syscall1(int,delete_module,const char *,name)
+
+static inline pid_t wait(int * wait_stat)
+{
+       return waitpid(-1,wait_stat,0);
+}
+
+#endif
+
+#endif /* _ASM_I386_UNISTD_H_ */
diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/user.h b/xenolinux-2.4.16-sparse/include/asm-xeno/user.h
new file mode 100644 (file)
index 0000000..ddc06ea
--- /dev/null
@@ -0,0 +1,122 @@
+#ifndef _I386_USER_H
+#define _I386_USER_H
+
+#include <asm/page.h>
+#include <linux/ptrace.h>
+/* Core file format: The core file is written in such a way that gdb
+   can understand it and provide useful information to the user (under
+   linux we use the 'trad-core' bfd).  There are quite a number of
+   obstacles to being able to view the contents of the floating point
+   registers, and until these are solved you will not be able to view the
+   contents of them.  Actually, you can read in the core file and look at
+   the contents of the user struct to find out what the floating point
+   registers contain.
+   The actual file contents are as follows:
+   UPAGE: 1 page consisting of a user struct that tells gdb what is present
+   in the file.  Directly after this is a copy of the task_struct, which
+   is currently not used by gdb, but it may come in useful at some point.
+   All of the registers are stored as part of the upage.  The upage should
+   always be only one page.
+   DATA: The data area is stored.  We use current->end_text to
+   current->brk to pick up all of the user variables, plus any memory
+   that may have been malloced.  No attempt is made to determine if a page
+   is demand-zero or if a page is totally unused, we just cover the entire
+   range.  All of the addresses are rounded in such a way that an integral
+   number of pages is written.
+   STACK: We need the stack information in order to get a meaningful
+   backtrace.  We need to write the data from (esp) to
+   current->start_stack, so we round each of these off in order to be able
+   to write an integer number of pages.
+   The minimum core file size is 3 pages, or 12288 bytes.
+*/
+
+/*
+ * Pentium III FXSR, SSE support
+ *     Gareth Hughes <gareth@valinux.com>, May 2000
+ *
+ * Provide support for the GDB 5.0+ PTRACE_{GET|SET}FPXREGS requests for
+ * interacting with the FXSR-format floating point environment.  Floating
+ * point data can be accessed in the regular format in the usual manner,
+ * and both the standard and SIMD floating point data can be accessed via
+ * the new ptrace requests.  In either case, changes to the FPU environment
+ * will be reflected in the task's state as expected.
+ */
+
+struct user_i387_struct {
+       long    cwd;
+       long    swd;
+       long    twd;
+       long    fip;
+       long    fcs;
+       long    foo;
+       long    fos;
+       long    st_space[20];   /* 8*10 bytes for each FP-reg = 80 bytes */
+};
+
+struct user_fxsr_struct {
+       unsigned short  cwd;
+       unsigned short  swd;
+       unsigned short  twd;
+       unsigned short  fop;
+       long    fip;
+       long    fcs;
+       long    foo;
+       long    fos;
+       long    mxcsr;
+       long    reserved;
+       long    st_space[32];   /* 8*16 bytes for each FP-reg = 128 bytes */
+       long    xmm_space[32];  /* 8*16 bytes for each XMM-reg = 128 bytes */
+       long    padding[56];
+};
+
+/*
+ * This is the old layout of "struct pt_regs", and
+ * is still the layout used by user mode (the new
+ * pt_regs doesn't have all registers as the kernel
+ * doesn't use the extra segment registers)
+ */
+struct user_regs_struct {
+       long ebx, ecx, edx, esi, edi, ebp, eax;
+       unsigned short ds, __ds, es, __es;
+       unsigned short fs, __fs, gs, __gs;
+       long orig_eax, eip;
+       unsigned short cs, __cs;
+       long eflags, esp;
+       unsigned short ss, __ss;
+};
+
+/* When the kernel dumps core, it starts by dumping the user struct -
+   this will be used by gdb to figure out where the data and stack segments
+   are within the file, and what virtual addresses to use. */
+struct user{
+/* We start with the registers, to mimic the way that "memory" is returned
+   from the ptrace(3,...) function.  */
+  struct user_regs_struct regs;                /* Where the registers are actually stored */
+/* ptrace does not yet supply these.  Someday.... */
+  int u_fpvalid;               /* True if math co-processor being used. */
+                                /* for this mess. Not yet used. */
+  struct user_i387_struct i387;        /* Math Co-processor registers. */
+/* The rest of this junk is to help gdb figure out what goes where */
+  unsigned long int u_tsize;   /* Text segment size (pages). */
+  unsigned long int u_dsize;   /* Data segment size (pages). */
+  unsigned long int u_ssize;   /* Stack segment size (pages). */
+  unsigned long start_code;     /* Starting virtual address of text. */
+  unsigned long start_stack;   /* Starting virtual address of stack area.
+                                  This is actually the bottom of the stack,
+                                  the top of the stack is always found in the
+                                  esp register.  */
+  long int signal;                     /* Signal that caused the core dump. */
+  int reserved;                        /* No longer used */
+  struct user_pt_regs * u_ar0; /* Used by gdb to help find the values for */
+                               /* the registers. */
+  struct user_i387_struct* u_fpstate;  /* Math Co-processor pointer. */
+  unsigned long magic;         /* To uniquely identify a core file */
+  char u_comm[32];             /* User command that was responsible */
+  int u_debugreg[8];
+};
+#define NBPG PAGE_SIZE
+#define UPAGES 1
+#define HOST_TEXT_START_ADDR (u.start_code)
+#define HOST_STACK_END_ADDR (u.start_stack + u.u_ssize * NBPG)
+
+#endif /* _I386_USER_H */
diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/vga.h b/xenolinux-2.4.16-sparse/include/asm-xeno/vga.h
new file mode 100644 (file)
index 0000000..ef0c0e5
--- /dev/null
@@ -0,0 +1,20 @@
+/*
+ *     Access to VGA videoram
+ *
+ *     (c) 1998 Martin Mares <mj@ucw.cz>
+ */
+
+#ifndef _LINUX_ASM_VGA_H_
+#define _LINUX_ASM_VGA_H_
+
+/*
+ *     On the PC, we can just recalculate addresses and then
+ *     access the videoram directly without any black magic.
+ */
+
+#define VGA_MAP_MEM(x) (unsigned long)phys_to_virt(x)
+
+#define vga_readb(x) (*(x))
+#define vga_writeb(x,y) (*(y) = (x))
+
+#endif
diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/xor.h b/xenolinux-2.4.16-sparse/include/asm-xeno/xor.h
new file mode 100644 (file)
index 0000000..5a99f54
--- /dev/null
@@ -0,0 +1,859 @@
+/*
+ * include/asm-i386/xor.h
+ *
+ * Optimized RAID-5 checksumming functions for MMX and SSE.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * You should have received a copy of the GNU General Public License
+ * (for example /usr/src/linux/COPYING); if not, write to the Free
+ * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+/*
+ * High-speed RAID5 checksumming functions utilizing MMX instructions.
+ * Copyright (C) 1998 Ingo Molnar.
+ */
+
+#define FPU_SAVE                                                       \
+  do {                                                                 \
+       if (!(current->flags & PF_USEDFPU))                             \
+               __asm__ __volatile__ (" clts;\n");                      \
+       __asm__ __volatile__ ("fsave %0; fwait": "=m"(fpu_save[0]));    \
+  } while (0)
+
+#define FPU_RESTORE                                                    \
+  do {                                                                 \
+       __asm__ __volatile__ ("frstor %0": : "m"(fpu_save[0]));         \
+       if (!(current->flags & PF_USEDFPU))                             \
+               stts();                                                 \
+  } while (0)
+
+#define LD(x,y)                "       movq   8*("#x")(%1), %%mm"#y"   ;\n"
+#define ST(x,y)                "       movq %%mm"#y",   8*("#x")(%1)   ;\n"
+#define XO1(x,y)       "       pxor   8*("#x")(%2), %%mm"#y"   ;\n"
+#define XO2(x,y)       "       pxor   8*("#x")(%3), %%mm"#y"   ;\n"
+#define XO3(x,y)       "       pxor   8*("#x")(%4), %%mm"#y"   ;\n"
+#define XO4(x,y)       "       pxor   8*("#x")(%5), %%mm"#y"   ;\n"
+
+
+static void
+xor_pII_mmx_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
+{
+       unsigned long lines = bytes >> 7;
+       char fpu_save[108];
+
+       FPU_SAVE;
+
+       __asm__ __volatile__ (
+#undef BLOCK
+#define BLOCK(i) \
+       LD(i,0)                                 \
+               LD(i+1,1)                       \
+                       LD(i+2,2)               \
+                               LD(i+3,3)       \
+       XO1(i,0)                                \
+       ST(i,0)                                 \
+               XO1(i+1,1)                      \
+               ST(i+1,1)                       \
+                       XO1(i+2,2)              \
+                       ST(i+2,2)               \
+                               XO1(i+3,3)      \
+                               ST(i+3,3)
+
+       " .align 32                     ;\n"
+       " 1:                            ;\n"
+
+       BLOCK(0)
+       BLOCK(4)
+       BLOCK(8)
+       BLOCK(12)
+
+       "       addl $128, %1         ;\n"
+       "       addl $128, %2         ;\n"
+       "       decl %0               ;\n"
+       "       jnz 1b                ;\n"
+               :
+       : "r" (lines),
+         "r" (p1), "r" (p2)
+       : "memory");
+
+       FPU_RESTORE;
+}
+
+static void
+xor_pII_mmx_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
+             unsigned long *p3)
+{
+       unsigned long lines = bytes >> 7;
+       char fpu_save[108];
+
+       FPU_SAVE;
+
+       __asm__ __volatile__ (
+#undef BLOCK
+#define BLOCK(i) \
+       LD(i,0)                                 \
+               LD(i+1,1)                       \
+                       LD(i+2,2)               \
+                               LD(i+3,3)       \
+       XO1(i,0)                                \
+               XO1(i+1,1)                      \
+                       XO1(i+2,2)              \
+                               XO1(i+3,3)      \
+       XO2(i,0)                                \
+       ST(i,0)                                 \
+               XO2(i+1,1)                      \
+               ST(i+1,1)                       \
+                       XO2(i+2,2)              \
+                       ST(i+2,2)               \
+                               XO2(i+3,3)      \
+                               ST(i+3,3)
+
+       " .align 32                     ;\n"
+       " 1:                            ;\n"
+
+       BLOCK(0)
+       BLOCK(4)
+       BLOCK(8)
+       BLOCK(12)
+
+       "       addl $128, %1         ;\n"
+       "       addl $128, %2         ;\n"
+       "       addl $128, %3         ;\n"
+       "       decl %0               ;\n"
+       "       jnz 1b                ;\n"
+               :
+       : "r" (lines),
+         "r" (p1), "r" (p2), "r" (p3)
+       : "memory");
+
+       FPU_RESTORE;
+}
+
+static void
+xor_pII_mmx_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
+             unsigned long *p3, unsigned long *p4)
+{
+       unsigned long lines = bytes >> 7;
+       char fpu_save[108];
+
+       FPU_SAVE;
+
+       __asm__ __volatile__ (
+#undef BLOCK
+#define BLOCK(i) \
+       LD(i,0)                                 \
+               LD(i+1,1)                       \
+                       LD(i+2,2)               \
+                               LD(i+3,3)       \
+       XO1(i,0)                                \
+               XO1(i+1,1)                      \
+                       XO1(i+2,2)              \
+                               XO1(i+3,3)      \
+       XO2(i,0)                                \
+               XO2(i+1,1)                      \
+                       XO2(i+2,2)              \
+                               XO2(i+3,3)      \
+       XO3(i,0)                                \
+       ST(i,0)                                 \
+               XO3(i+1,1)                      \
+               ST(i+1,1)                       \
+                       XO3(i+2,2)              \
+                       ST(i+2,2)               \
+                               XO3(i+3,3)      \
+                               ST(i+3,3)
+
+       " .align 32                     ;\n"
+       " 1:                            ;\n"
+
+       BLOCK(0)
+       BLOCK(4)
+       BLOCK(8)
+       BLOCK(12)
+
+       "       addl $128, %1         ;\n"
+       "       addl $128, %2         ;\n"
+       "       addl $128, %3         ;\n"
+       "       addl $128, %4         ;\n"
+       "       decl %0               ;\n"
+       "       jnz 1b                ;\n"
+               :
+       : "r" (lines),
+         "r" (p1), "r" (p2), "r" (p3), "r" (p4)
+       : "memory");
+
+       FPU_RESTORE;
+}
+
+static void
+xor_pII_mmx_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
+             unsigned long *p3, unsigned long *p4, unsigned long *p5)
+{
+       unsigned long lines = bytes >> 7;
+       char fpu_save[108];
+
+       FPU_SAVE;
+
+       __asm__ __volatile__ (
+#undef BLOCK
+#define BLOCK(i) \
+       LD(i,0)                                 \
+               LD(i+1,1)                       \
+                       LD(i+2,2)               \
+                               LD(i+3,3)       \
+       XO1(i,0)                                \
+               XO1(i+1,1)                      \
+                       XO1(i+2,2)              \
+                               XO1(i+3,3)      \
+       XO2(i,0)                                \
+               XO2(i+1,1)                      \
+                       XO2(i+2,2)              \
+                               XO2(i+3,3)      \
+       XO3(i,0)                                \
+               XO3(i+1,1)                      \
+                       XO3(i+2,2)              \
+                               XO3(i+3,3)      \
+       XO4(i,0)                                \
+       ST(i,0)                                 \
+               XO4(i+1,1)                      \
+               ST(i+1,1)                       \
+                       XO4(i+2,2)              \
+                       ST(i+2,2)               \
+                               XO4(i+3,3)      \
+                               ST(i+3,3)
+
+       " .align 32                     ;\n"
+       " 1:                            ;\n"
+
+       BLOCK(0)
+       BLOCK(4)
+       BLOCK(8)
+       BLOCK(12)
+
+       "       addl $128, %1         ;\n"
+       "       addl $128, %2         ;\n"
+       "       addl $128, %3         ;\n"
+       "       addl $128, %4         ;\n"
+       "       addl $128, %5         ;\n"
+       "       decl %0               ;\n"
+       "       jnz 1b                ;\n"
+               :
+       : "g" (lines),
+         "r" (p1), "r" (p2), "r" (p3), "r" (p4), "r" (p5)
+       : "memory");
+
+       FPU_RESTORE;
+}
+
+#undef LD
+#undef XO1
+#undef XO2
+#undef XO3
+#undef XO4
+#undef ST
+#undef BLOCK
+
+static void
+xor_p5_mmx_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
+{
+       unsigned long lines = bytes >> 6;
+       char fpu_save[108];
+
+       FPU_SAVE;
+
+       __asm__ __volatile__ (
+       " .align 32                  ;\n"
+       " 1:                         ;\n"
+       "       movq   (%1), %%mm0   ;\n"
+       "       movq  8(%1), %%mm1   ;\n"
+       "       pxor   (%2), %%mm0   ;\n"
+       "       movq 16(%1), %%mm2   ;\n"
+       "       movq %%mm0,   (%1)   ;\n"
+       "       pxor  8(%2), %%mm1   ;\n"
+       "       movq 24(%1), %%mm3   ;\n"
+       "       movq %%mm1,  8(%1)   ;\n"
+       "       pxor 16(%2), %%mm2   ;\n"
+       "       movq 32(%1), %%mm4   ;\n"
+       "       movq %%mm2, 16(%1)   ;\n"
+       "       pxor 24(%2), %%mm3   ;\n"
+       "       movq 40(%1), %%mm5   ;\n"
+       "       movq %%mm3, 24(%1)   ;\n"
+       "       pxor 32(%2), %%mm4   ;\n"
+       "       movq 48(%1), %%mm6   ;\n"
+       "       movq %%mm4, 32(%1)   ;\n"
+       "       pxor 40(%2), %%mm5   ;\n"
+       "       movq 56(%1), %%mm7   ;\n"
+       "       movq %%mm5, 40(%1)   ;\n"
+       "       pxor 48(%2), %%mm6   ;\n"
+       "       pxor 56(%2), %%mm7   ;\n"
+       "       movq %%mm6, 48(%1)   ;\n"
+       "       movq %%mm7, 56(%1)   ;\n"
+       
+       "       addl $64, %1         ;\n"
+       "       addl $64, %2         ;\n"
+       "       decl %0              ;\n"
+       "       jnz 1b               ;\n"
+       : 
+       : "r" (lines),
+         "r" (p1), "r" (p2)
+       : "memory");
+
+       FPU_RESTORE;
+}
+
+static void
+xor_p5_mmx_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
+            unsigned long *p3)
+{
+       unsigned long lines = bytes >> 6;
+       char fpu_save[108];
+
+       FPU_SAVE;
+
+       __asm__ __volatile__ (
+       " .align 32,0x90             ;\n"
+       " 1:                         ;\n"
+       "       movq   (%1), %%mm0   ;\n"
+       "       movq  8(%1), %%mm1   ;\n"
+       "       pxor   (%2), %%mm0   ;\n"
+       "       movq 16(%1), %%mm2   ;\n"
+       "       pxor  8(%2), %%mm1   ;\n"
+       "       pxor   (%3), %%mm0   ;\n"
+       "       pxor 16(%2), %%mm2   ;\n"
+       "       movq %%mm0,   (%1)   ;\n"
+       "       pxor  8(%3), %%mm1   ;\n"
+       "       pxor 16(%3), %%mm2   ;\n"
+       "       movq 24(%1), %%mm3   ;\n"
+       "       movq %%mm1,  8(%1)   ;\n"
+       "       movq 32(%1), %%mm4   ;\n"
+       "       movq 40(%1), %%mm5   ;\n"
+       "       pxor 24(%2), %%mm3   ;\n"
+       "       movq %%mm2, 16(%1)   ;\n"
+       "       pxor 32(%2), %%mm4   ;\n"
+       "       pxor 24(%3), %%mm3   ;\n"
+       "       pxor 40(%2), %%mm5   ;\n"
+       "       movq %%mm3, 24(%1)   ;\n"
+       "       pxor 32(%3), %%mm4   ;\n"
+       "       pxor 40(%3), %%mm5   ;\n"
+       "       movq 48(%1), %%mm6   ;\n"
+       "       movq %%mm4, 32(%1)   ;\n"
+       "       movq 56(%1), %%mm7   ;\n"
+       "       pxor 48(%2), %%mm6   ;\n"
+       "       movq %%mm5, 40(%1)   ;\n"
+       "       pxor 56(%2), %%mm7   ;\n"
+       "       pxor 48(%3), %%mm6   ;\n"
+       "       pxor 56(%3), %%mm7   ;\n"
+       "       movq %%mm6, 48(%1)   ;\n"
+       "       movq %%mm7, 56(%1)   ;\n"
+      
+       "       addl $64, %1         ;\n"
+       "       addl $64, %2         ;\n"
+       "       addl $64, %3         ;\n"
+       "       decl %0              ;\n"
+       "       jnz 1b               ;\n"
+       : 
+       : "r" (lines),
+         "r" (p1), "r" (p2), "r" (p3)
+       : "memory" );
+
+       FPU_RESTORE;
+}
+
+static void
+xor_p5_mmx_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
+            unsigned long *p3, unsigned long *p4)
+{
+       unsigned long lines = bytes >> 6;
+       char fpu_save[108];
+
+       FPU_SAVE;
+
+       __asm__ __volatile__ (
+       " .align 32,0x90             ;\n"
+       " 1:                         ;\n"
+       "       movq   (%1), %%mm0   ;\n"
+       "       movq  8(%1), %%mm1   ;\n"
+       "       pxor   (%2), %%mm0   ;\n"
+       "       movq 16(%1), %%mm2   ;\n"
+       "       pxor  8(%2), %%mm1   ;\n"
+       "       pxor   (%3), %%mm0   ;\n"
+       "       pxor 16(%2), %%mm2   ;\n"
+       "       pxor  8(%3), %%mm1   ;\n"
+       "       pxor   (%4), %%mm0   ;\n"
+       "       movq 24(%1), %%mm3   ;\n"
+       "       pxor 16(%3), %%mm2   ;\n"
+       "       pxor  8(%4), %%mm1   ;\n"
+       "       movq %%mm0,   (%1)   ;\n"
+       "       movq 32(%1), %%mm4   ;\n"
+       "       pxor 24(%2), %%mm3   ;\n"
+       "       pxor 16(%4), %%mm2   ;\n"
+       "       movq %%mm1,  8(%1)   ;\n"
+       "       movq 40(%1), %%mm5   ;\n"
+       "       pxor 32(%2), %%mm4   ;\n"
+       "       pxor 24(%3), %%mm3   ;\n"
+       "       movq %%mm2, 16(%1)   ;\n"
+       "       pxor 40(%2), %%mm5   ;\n"
+       "       pxor 32(%3), %%mm4   ;\n"
+       "       pxor 24(%4), %%mm3   ;\n"
+       "       movq %%mm3, 24(%1)   ;\n"
+       "       movq 56(%1), %%mm7   ;\n"
+       "       movq 48(%1), %%mm6   ;\n"
+       "       pxor 40(%3), %%mm5   ;\n"
+       "       pxor 32(%4), %%mm4   ;\n"
+       "       pxor 48(%2), %%mm6   ;\n"
+       "       movq %%mm4, 32(%1)   ;\n"
+       "       pxor 56(%2), %%mm7   ;\n"
+       "       pxor 40(%4), %%mm5   ;\n"
+       "       pxor 48(%3), %%mm6   ;\n"
+       "       pxor 56(%3), %%mm7   ;\n"
+       "       movq %%mm5, 40(%1)   ;\n"
+       "       pxor 48(%4), %%mm6   ;\n"
+       "       pxor 56(%4), %%mm7   ;\n"
+       "       movq %%mm6, 48(%1)   ;\n"
+       "       movq %%mm7, 56(%1)   ;\n"
+      
+       "       addl $64, %1         ;\n"
+       "       addl $64, %2         ;\n"
+       "       addl $64, %3         ;\n"
+       "       addl $64, %4         ;\n"
+       "       decl %0              ;\n"
+       "       jnz 1b               ;\n"
+       : 
+       : "r" (lines),
+         "r" (p1), "r" (p2), "r" (p3), "r" (p4)
+       : "memory");
+
+       FPU_RESTORE;
+}
+
+static void
+xor_p5_mmx_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
+            unsigned long *p3, unsigned long *p4, unsigned long *p5)
+{
+       unsigned long lines = bytes >> 6;
+       char fpu_save[108];
+
+       FPU_SAVE;
+
+       __asm__ __volatile__ (
+       " .align 32,0x90             ;\n"
+       " 1:                         ;\n"
+       "       movq   (%1), %%mm0   ;\n"
+       "       movq  8(%1), %%mm1   ;\n"
+       "       pxor   (%2), %%mm0   ;\n"
+       "       pxor  8(%2), %%mm1   ;\n"
+       "       movq 16(%1), %%mm2   ;\n"
+       "       pxor   (%3), %%mm0   ;\n"
+       "       pxor  8(%3), %%mm1   ;\n"
+       "       pxor 16(%2), %%mm2   ;\n"
+       "       pxor   (%4), %%mm0   ;\n"
+       "       pxor  8(%4), %%mm1   ;\n"
+       "       pxor 16(%3), %%mm2   ;\n"
+       "       movq 24(%1), %%mm3   ;\n"
+       "       pxor   (%5), %%mm0   ;\n"
+       "       pxor  8(%5), %%mm1   ;\n"
+       "       movq %%mm0,   (%1)   ;\n"
+       "       pxor 16(%4), %%mm2   ;\n"
+       "       pxor 24(%2), %%mm3   ;\n"
+       "       movq %%mm1,  8(%1)   ;\n"
+       "       pxor 16(%5), %%mm2   ;\n"
+       "       pxor 24(%3), %%mm3   ;\n"
+       "       movq 32(%1), %%mm4   ;\n"
+       "       movq %%mm2, 16(%1)   ;\n"
+       "       pxor 24(%4), %%mm3   ;\n"
+       "       pxor 32(%2), %%mm4   ;\n"
+       "       movq 40(%1), %%mm5   ;\n"
+       "       pxor 24(%5), %%mm3   ;\n"
+       "       pxor 32(%3), %%mm4   ;\n"
+       "       pxor 40(%2), %%mm5   ;\n"
+       "       movq %%mm3, 24(%1)   ;\n"
+       "       pxor 32(%4), %%mm4   ;\n"
+       "       pxor 40(%3), %%mm5   ;\n"
+       "       movq 48(%1), %%mm6   ;\n"
+       "       movq 56(%1), %%mm7   ;\n"
+       "       pxor 32(%5), %%mm4   ;\n"
+       "       pxor 40(%4), %%mm5   ;\n"
+       "       pxor 48(%2), %%mm6   ;\n"
+       "       pxor 56(%2), %%mm7   ;\n"
+       "       movq %%mm4, 32(%1)   ;\n"
+       "       pxor 48(%3), %%mm6   ;\n"
+       "       pxor 56(%3), %%mm7   ;\n"
+       "       pxor 40(%5), %%mm5   ;\n"
+       "       pxor 48(%4), %%mm6   ;\n"
+       "       pxor 56(%4), %%mm7   ;\n"
+       "       movq %%mm5, 40(%1)   ;\n"
+       "       pxor 48(%5), %%mm6   ;\n"
+       "       pxor 56(%5), %%mm7   ;\n"
+       "       movq %%mm6, 48(%1)   ;\n"
+       "       movq %%mm7, 56(%1)   ;\n"
+      
+       "       addl $64, %1         ;\n"
+       "       addl $64, %2         ;\n"
+       "       addl $64, %3         ;\n"
+       "       addl $64, %4         ;\n"
+       "       addl $64, %5         ;\n"
+       "       decl %0              ;\n"
+       "       jnz 1b               ;\n"
+       : 
+       : "g" (lines),
+         "r" (p1), "r" (p2), "r" (p3), "r" (p4), "r" (p5)
+       : "memory");
+
+       FPU_RESTORE;
+}
+
+static struct xor_block_template xor_block_pII_mmx = {
+       name: "pII_mmx",
+       do_2: xor_pII_mmx_2,
+       do_3: xor_pII_mmx_3,
+       do_4: xor_pII_mmx_4,
+       do_5: xor_pII_mmx_5,
+};
+
+static struct xor_block_template xor_block_p5_mmx = {
+       name: "p5_mmx",
+       do_2: xor_p5_mmx_2,
+       do_3: xor_p5_mmx_3,
+       do_4: xor_p5_mmx_4,
+       do_5: xor_p5_mmx_5,
+};
+
+#undef FPU_SAVE
+#undef FPU_RESTORE
+
+/*
+ * Cache avoiding checksumming functions utilizing KNI instructions
+ * Copyright (C) 1999 Zach Brown (with obvious credit due Ingo)
+ */
+
+#define XMMS_SAVE                              \
+       __asm__ __volatile__ (                  \
+               "movl %%cr0,%0          ;\n\t"  \
+               "clts                   ;\n\t"  \
+               "movups %%xmm0,(%1)     ;\n\t"  \
+               "movups %%xmm1,0x10(%1) ;\n\t"  \
+               "movups %%xmm2,0x20(%1) ;\n\t"  \
+               "movups %%xmm3,0x30(%1) ;\n\t"  \
+               : "=r" (cr0)                    \
+               : "r" (xmm_save)                \
+               : "memory")
+
+#define XMMS_RESTORE                           \
+       __asm__ __volatile__ (                  \
+               "sfence                 ;\n\t"  \
+               "movups (%1),%%xmm0     ;\n\t"  \
+               "movups 0x10(%1),%%xmm1 ;\n\t"  \
+               "movups 0x20(%1),%%xmm2 ;\n\t"  \
+               "movups 0x30(%1),%%xmm3 ;\n\t"  \
+               "movl   %0,%%cr0        ;\n\t"  \
+               :                               \
+               : "r" (cr0), "r" (xmm_save)     \
+               : "memory")
+
+#define OFFS(x)                "16*("#x")"
+#define PF_OFFS(x)     "256+16*("#x")"
+#define        PF0(x)          "       prefetchnta "PF_OFFS(x)"(%1)            ;\n"
+#define LD(x,y)                "       movaps   "OFFS(x)"(%1), %%xmm"#y"       ;\n"
+#define ST(x,y)                "       movaps %%xmm"#y",   "OFFS(x)"(%1)       ;\n"
+#define PF1(x)         "       prefetchnta "PF_OFFS(x)"(%2)            ;\n"
+#define PF2(x)         "       prefetchnta "PF_OFFS(x)"(%3)            ;\n"
+#define PF3(x)         "       prefetchnta "PF_OFFS(x)"(%4)            ;\n"
+#define PF4(x)         "       prefetchnta "PF_OFFS(x)"(%5)            ;\n"
+#define PF5(x)         "       prefetchnta "PF_OFFS(x)"(%6)            ;\n"
+#define XO1(x,y)       "       xorps   "OFFS(x)"(%2), %%xmm"#y"        ;\n"
+#define XO2(x,y)       "       xorps   "OFFS(x)"(%3), %%xmm"#y"        ;\n"
+#define XO3(x,y)       "       xorps   "OFFS(x)"(%4), %%xmm"#y"        ;\n"
+#define XO4(x,y)       "       xorps   "OFFS(x)"(%5), %%xmm"#y"        ;\n"
+#define XO5(x,y)       "       xorps   "OFFS(x)"(%6), %%xmm"#y"        ;\n"
+
+
+static void
+xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
+{
+        unsigned long lines = bytes >> 8;
+       char xmm_save[16*4];
+       int cr0;
+
+       XMMS_SAVE;
+
+        __asm__ __volatile__ (
+#undef BLOCK
+#define BLOCK(i) \
+               LD(i,0)                                 \
+                       LD(i+1,1)                       \
+               PF1(i)                                  \
+                               PF1(i+2)                \
+                               LD(i+2,2)               \
+                                       LD(i+3,3)       \
+               PF0(i+4)                                \
+                               PF0(i+6)                \
+               XO1(i,0)                                \
+                       XO1(i+1,1)                      \
+                               XO1(i+2,2)              \
+                                       XO1(i+3,3)      \
+               ST(i,0)                                 \
+                       ST(i+1,1)                       \
+                               ST(i+2,2)               \
+                                       ST(i+3,3)       \
+
+
+               PF0(0)
+                               PF0(2)
+
+       " .align 32                     ;\n"
+        " 1:                            ;\n"
+
+               BLOCK(0)
+               BLOCK(4)
+               BLOCK(8)
+               BLOCK(12)
+
+        "       addl $256, %1           ;\n"
+        "       addl $256, %2           ;\n"
+        "       decl %0                 ;\n"
+        "       jnz 1b                  ;\n"
+       :
+       : "r" (lines),
+         "r" (p1), "r" (p2)
+        : "memory");
+
+       XMMS_RESTORE;
+}
+
+static void
+xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
+         unsigned long *p3)
+{
+        unsigned long lines = bytes >> 8;
+       char xmm_save[16*4];
+       int cr0;
+
+       XMMS_SAVE;
+
+        __asm__ __volatile__ (
+#undef BLOCK
+#define BLOCK(i) \
+               PF1(i)                                  \
+                               PF1(i+2)                \
+               LD(i,0)                                 \
+                       LD(i+1,1)                       \
+                               LD(i+2,2)               \
+                                       LD(i+3,3)       \
+               PF2(i)                                  \
+                               PF2(i+2)                \
+               PF0(i+4)                                \
+                               PF0(i+6)                \
+               XO1(i,0)                                \
+                       XO1(i+1,1)                      \
+                               XO1(i+2,2)              \
+                                       XO1(i+3,3)      \
+               XO2(i,0)                                \
+                       XO2(i+1,1)                      \
+                               XO2(i+2,2)              \
+                                       XO2(i+3,3)      \
+               ST(i,0)                                 \
+                       ST(i+1,1)                       \
+                               ST(i+2,2)               \
+                                       ST(i+3,3)       \
+
+
+               PF0(0)
+                               PF0(2)
+
+       " .align 32                     ;\n"
+        " 1:                            ;\n"
+
+               BLOCK(0)
+               BLOCK(4)
+               BLOCK(8)
+               BLOCK(12)
+
+        "       addl $256, %1           ;\n"
+        "       addl $256, %2           ;\n"
+        "       addl $256, %3           ;\n"
+        "       decl %0                 ;\n"
+        "       jnz 1b                  ;\n"
+       :
+       : "r" (lines),
+         "r" (p1), "r"(p2), "r"(p3)
+        : "memory" );
+
+       XMMS_RESTORE;
+}
+
+static void
+xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
+         unsigned long *p3, unsigned long *p4)
+{
+        unsigned long lines = bytes >> 8;
+       char xmm_save[16*4];
+       int cr0;
+
+       XMMS_SAVE;
+
+        __asm__ __volatile__ (
+#undef BLOCK
+#define BLOCK(i) \
+               PF1(i)                                  \
+                               PF1(i+2)                \
+               LD(i,0)                                 \
+                       LD(i+1,1)                       \
+                               LD(i+2,2)               \
+                                       LD(i+3,3)       \
+               PF2(i)                                  \
+                               PF2(i+2)                \
+               XO1(i,0)                                \
+                       XO1(i+1,1)                      \
+                               XO1(i+2,2)              \
+                                       XO1(i+3,3)      \
+               PF3(i)                                  \
+                               PF3(i+2)                \
+               PF0(i+4)                                \
+                               PF0(i+6)                \
+               XO2(i,0)                                \
+                       XO2(i+1,1)                      \
+                               XO2(i+2,2)              \
+                                       XO2(i+3,3)      \
+               XO3(i,0)                                \
+                       XO3(i+1,1)                      \
+                               XO3(i+2,2)              \
+                                       XO3(i+3,3)      \
+               ST(i,0)                                 \
+                       ST(i+1,1)                       \
+                               ST(i+2,2)               \
+                                       ST(i+3,3)       \
+
+
+               PF0(0)
+                               PF0(2)
+
+       " .align 32                     ;\n"
+        " 1:                            ;\n"
+
+               BLOCK(0)
+               BLOCK(4)
+               BLOCK(8)
+               BLOCK(12)
+
+        "       addl $256, %1           ;\n"
+        "       addl $256, %2           ;\n"
+        "       addl $256, %3           ;\n"
+        "       addl $256, %4           ;\n"
+        "       decl %0                 ;\n"
+        "       jnz 1b                  ;\n"
+       :
+       : "r" (lines),
+         "r" (p1), "r" (p2), "r" (p3), "r" (p4)
+        : "memory" );
+
+       XMMS_RESTORE;
+}
+
+static void
+xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
+         unsigned long *p3, unsigned long *p4, unsigned long *p5)
+{
+        unsigned long lines = bytes >> 8;
+       char xmm_save[16*4];
+       int cr0;
+
+       XMMS_SAVE;
+
+        __asm__ __volatile__ (
+#undef BLOCK
+#define BLOCK(i) \
+               PF1(i)                                  \
+                               PF1(i+2)                \
+               LD(i,0)                                 \
+                       LD(i+1,1)                       \
+                               LD(i+2,2)               \
+                                       LD(i+3,3)       \
+               PF2(i)                                  \
+                               PF2(i+2)                \
+               XO1(i,0)                                \
+                       XO1(i+1,1)                      \
+                               XO1(i+2,2)              \
+                                       XO1(i+3,3)      \
+               PF3(i)                                  \
+                               PF3(i+2)                \
+               XO2(i,0)                                \
+                       XO2(i+1,1)                      \
+                               XO2(i+2,2)              \
+                                       XO2(i+3,3)      \
+               PF4(i)                                  \
+                               PF4(i+2)                \
+               PF0(i+4)                                \
+                               PF0(i+6)                \
+               XO3(i,0)                                \
+                       XO3(i+1,1)                      \
+                               XO3(i+2,2)              \
+                                       XO3(i+3,3)      \
+               XO4(i,0)                                \
+                       XO4(i+1,1)                      \
+                               XO4(i+2,2)              \
+                                       XO4(i+3,3)      \
+               ST(i,0)                                 \
+                       ST(i+1,1)                       \
+                               ST(i+2,2)               \
+                                       ST(i+3,3)       \
+
+
+               PF0(0)
+                               PF0(2)
+
+       " .align 32                     ;\n"
+        " 1:                            ;\n"
+
+               BLOCK(0)
+               BLOCK(4)
+               BLOCK(8)
+               BLOCK(12)
+
+        "       addl $256, %1           ;\n"
+        "       addl $256, %2           ;\n"
+        "       addl $256, %3           ;\n"
+        "       addl $256, %4           ;\n"
+        "       addl $256, %5           ;\n"
+        "       decl %0                 ;\n"
+        "       jnz 1b                  ;\n"
+       :
+       : "r" (lines),
+         "r" (p1), "r" (p2), "r" (p3), "r" (p4), "r" (p5)
+       : "memory");
+
+       XMMS_RESTORE;
+}
+
+static struct xor_block_template xor_block_pIII_sse = {
+        name: "pIII_sse",
+        do_2: xor_sse_2,
+        do_3: xor_sse_3,
+        do_4: xor_sse_4,
+        do_5: xor_sse_5,
+};
+
+/* Also try the generic routines.  */
+#include <asm-generic/xor.h>
+
+#undef XOR_TRY_TEMPLATES
+#define XOR_TRY_TEMPLATES                              \
+       do {                                            \
+               xor_speed(&xor_block_8regs);            \
+               xor_speed(&xor_block_32regs);           \
+               if (cpu_has_xmm)                        \
+                       xor_speed(&xor_block_pIII_sse); \
+               if (md_cpu_has_mmx()) {                 \
+                       xor_speed(&xor_block_pII_mmx);  \
+                       xor_speed(&xor_block_p5_mmx);   \
+               }                                       \
+       } while (0)
+
+/* We force the use of the SSE xor block because it can write around L2.
+   We may also be able to load into the L1 only depending on how the cpu
+   deals with a load to a line that is being prefetched.  */
+#define XOR_SELECT_TEMPLATE(FASTEST) \
+       (cpu_has_xmm ? &xor_block_pIII_sse : FASTEST)
diff --git a/xenolinux-2.4.16-sparse/include/linux/sunrpc/debug.h b/xenolinux-2.4.16-sparse/include/linux/sunrpc/debug.h
new file mode 100644 (file)
index 0000000..67dbfb8
--- /dev/null
@@ -0,0 +1,95 @@
+/*
+ * linux/include/linux/sunrpc/debug.h
+ *
+ * Debugging support for sunrpc module
+ *
+ * Copyright (C) 1996, Olaf Kirch <okir@monad.swb.de>
+ */
+
+#ifndef _LINUX_SUNRPC_DEBUG_H_
+#define _LINUX_SUNRPC_DEBUG_H_
+
+#include <linux/config.h>
+
+#include <linux/timer.h>
+#include <linux/tqueue.h>
+
+/*
+ * Enable RPC debugging/profiling.
+ */
+/*#ifdef CONFIG_SYSCTL*/
+/*#define  RPC_DEBUG*/
+/*#endif*/
+/* #define  RPC_PROFILE */
+
+/*
+ * RPC debug facilities
+ */
+#define RPCDBG_XPRT            0x0001
+#define RPCDBG_CALL            0x0002
+#define RPCDBG_DEBUG           0x0004
+#define RPCDBG_NFS             0x0008
+#define RPCDBG_AUTH            0x0010
+#define RPCDBG_PMAP            0x0020
+#define RPCDBG_SCHED           0x0040
+#define RPCDBG_SVCSOCK         0x0100
+#define RPCDBG_SVCDSP          0x0200
+#define RPCDBG_MISC            0x0400
+#define RPCDBG_ALL             0x7fff
+
+#ifdef __KERNEL__
+
+/*
+ * Debugging macros etc
+ */
+#ifdef RPC_DEBUG
+extern unsigned int            rpc_debug;
+extern unsigned int            nfs_debug;
+extern unsigned int            nfsd_debug;
+extern unsigned int            nlm_debug;
+#endif
+
+#define dprintk(args...)       dfprintk(FACILITY, ## args)
+
+#undef ifdebug
+#ifdef RPC_DEBUG                       
+# define ifdebug(fac)          if (rpc_debug & RPCDBG_##fac)
+# define dfprintk(fac, args...)        do { ifdebug(fac) printk(args); } while(0)
+# define RPC_IFDEBUG(x)                x
+#else
+# define dfprintk(fac, args...)        do ; while (0)
+# define RPC_IFDEBUG(x)
+#endif
+
+#ifdef RPC_PROFILE
+# define pprintk(args...)      printk(## args)
+#else
+# define pprintk(args...)      do ; while (0)
+#endif
+
+/*
+ * Sysctl interface for RPC debugging
+ */
+#ifdef RPC_DEBUG
+void           rpc_register_sysctl(void);
+void           rpc_unregister_sysctl(void);
+#endif
+
+#endif /* __KERNEL__ */
+
+/*
+ * Declarations for the sysctl debug interface, which allows to read or
+ * change the debug flags for rpc, nfs, nfsd, and lockd. Since the sunrpc
+ * module currently registers its sysctl table dynamically, the sysctl path
+ * for module FOO is <CTL_SUNRPC, CTL_FOODEBUG>.
+ */
+#define CTL_SUNRPC     7249    /* arbitrary and hopefully unused */
+
+enum {
+       CTL_RPCDEBUG = 1,
+       CTL_NFSDEBUG,
+       CTL_NFSDDEBUG,
+       CTL_NLMDEBUG,
+};
+
+#endif /* _LINUX_SUNRPC_DEBUG_H_ */
diff --git a/xenolinux-2.4.16-sparse/kernel/panic.c b/xenolinux-2.4.16-sparse/kernel/panic.c
new file mode 100644 (file)
index 0000000..8a7237f
--- /dev/null
@@ -0,0 +1,125 @@
+/*
+ *  linux/kernel/panic.c
+ *
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ */
+
+/*
+ * This function is used through-out the kernel (including mm and fs)
+ * to indicate a major problem.
+ */
+#include <linux/config.h>
+#include <linux/sched.h>
+#include <linux/delay.h>
+#include <linux/reboot.h>
+#include <linux/notifier.h>
+#include <linux/init.h>
+#include <linux/sysrq.h>
+#include <linux/interrupt.h>
+
+asmlinkage void sys_sync(void);        /* it's really int */
+
+int panic_timeout;
+
+struct notifier_block *panic_notifier_list;
+
+static int __init panic_setup(char *str)
+{
+       panic_timeout = simple_strtoul(str, NULL, 0);
+       return 1;
+}
+
+__setup("panic=", panic_setup);
+
+/**
+ *     panic - halt the system
+ *     @fmt: The text string to print
+ *
+ *     Display a message, then perform cleanups. Functions in the panic
+ *     notifier list are called after the filesystem cache is flushed (when possible).
+ *
+ *     This function never returns.
+ */
+NORET_TYPE void panic(const char * fmt, ...)
+{
+       static char buf[1024];
+       va_list args;
+#if defined(CONFIG_ARCH_S390)
+        unsigned long caller = (unsigned long) __builtin_return_address(0);
+#endif
+
+       bust_spinlocks(1);
+       va_start(args, fmt);
+       vsprintf(buf, fmt, args);
+       va_end(args);
+       printk(KERN_EMERG "Kernel panic: %s\n",buf);
+       if (in_interrupt())
+               printk(KERN_EMERG "In interrupt handler - not syncing\n");
+       else if (!current->pid)
+               printk(KERN_EMERG "In idle task - not syncing\n");
+       else
+               sys_sync();
+       bust_spinlocks(0);
+
+#ifdef CONFIG_SMP
+       smp_send_stop();
+#endif
+
+       notifier_call_chain(&panic_notifier_list, 0, NULL);
+
+       if (panic_timeout > 0)
+       {
+               /*
+                * Delay timeout seconds before rebooting the machine. 
+                * We can't use the "normal" timers since we just panicked..
+                */
+               printk(KERN_EMERG "Rebooting in %d seconds..",panic_timeout);
+               mdelay(panic_timeout*1000);
+               /*
+                *      Should we run the reboot notifier. For the moment Im
+                *      choosing not too. It might crash, be corrupt or do
+                *      more harm than good for other reasons.
+                */
+               machine_restart(NULL);
+       }
+#ifdef __sparc__
+       {
+               extern int stop_a_enabled;
+               /* Make sure the user can actually press L1-A */
+               stop_a_enabled = 1;
+               printk("Press L1-A to return to the boot prom\n");
+       }
+#endif
+#if defined(CONFIG_ARCH_S390)
+        disabled_wait(caller);
+#endif
+       sti();
+       for(;;) {
+               CHECK_EMERGENCY_SYNC
+#if defined(CONFIG_XENO)
+                HYPERVISOR_exit();
+#endif
+       }
+}
+
+/**
+ *     print_tainted - return a string to represent the kernel taint state.
+ *
+ *     The string is overwritten by the next call to print_taint().
+ */
+const char *print_tainted()
+{
+       static char buf[20];
+       if (tainted) {
+               snprintf(buf, sizeof(buf), "Tainted: %c%c",
+                       tainted & 1 ? 'P' : 'G',
+                       tainted & 2 ? 'F' : ' ');
+       }
+       else
+               snprintf(buf, sizeof(buf), "Not tainted");
+       return(buf);
+}
+
+int tainted = 0;
diff --git a/xenolinux-2.4.16-sparse/mk b/xenolinux-2.4.16-sparse/mk
new file mode 100755 (executable)
index 0000000..70cb988
--- /dev/null
@@ -0,0 +1,6 @@
+export INSTALL_MOD_PATH=/local/scratch/kaf24/xeno/linux-install
+ARCH=xeno make -j 4 bzImage
+ARCH=xeno make -j 4 modules
+ARCH=xeno INSTALL_MOD_PATH=/anfs/nta3/xeno-roots/kaf24/root0 really make modules_install
+ARCH=xeno INSTALL_MOD_PATH=/anfs/nta3/xeno-roots/kaf24/root1 really make modules_install
+cp arch/xeno/boot/image.gz /usr/groups/pegasus/boot/kaf24/meteors/vmlinux.gz