From: smh22@boulderdash.cl.cam.ac.uk Date: Wed, 20 Nov 2002 12:02:17 +0000 (+0000) Subject: bitkeeper revision 1.2 (3ddb79c9KusG02eh7i-uXkgY0IksKA) X-Git-Tag: beta1~231 X-Git-Url: http://xenbits.xensource.com/gitweb?a=commitdiff_plain;h=4676bbf96dc8;p=xen.git bitkeeper revision 1.2 (3ddb79c9KusG02eh7i-uXkgY0IksKA) Import changeset --- diff --git a/.rootkeys b/.rootkeys index f93931bc9c..1544186af7 100644 --- a/.rootkeys +++ b/.rootkeys @@ -1,2 +1,402 @@ 3ddb6b0bKlMz_dz-M59a1mkUa1lASw BitKeeper/etc/config 3ddb6b0buTaC5zg1_a8FoAR9FWi_mw BitKeeper/etc/ignore +3ddb79c9_hgSp-gsQm8HqWM_9W3B_A BitKeeper/etc/logging_ok +3ddb79bcbOVHh38VJzc97-JEGD4dJQ xen-2.4.16/Makefile +3ddb79bcCa2VbsMp7mWKlhgwLQUQGA xen-2.4.16/README +3ddb79bcWnTwYsQRWl_PaneJfa6p0w xen-2.4.16/Rules.mk +3ddb79bcZbRBzT3elFWSX7u6NtMagQ xen-2.4.16/arch/i386/Makefile +3ddb79bcBQF85CfLS4i1WGZ4oLLaCA xen-2.4.16/arch/i386/Rules.mk +3ddb79bcsjinG9k1KcvbVBuas1R2dA xen-2.4.16/arch/i386/apic.c +3ddb79bcSC_LvnmFlX-T5iTgaR0SKg xen-2.4.16/arch/i386/boot/boot.S +3ddb79bcUrk2EIaM5VsT6wUudH1kkg xen-2.4.16/arch/i386/delay.c +3ddb79bcecupHj56ZbTa3B0FxDowMg xen-2.4.16/arch/i386/entry.S +3ddb79bcY5zW7KhvI9gvfuPi3ZumEg xen-2.4.16/arch/i386/extable.c +3ddb79bcesE5E-lS4QhRhlqXxqj9cA xen-2.4.16/arch/i386/i387.c +3ddb79bcCAq6IpdkHueChoVTfXqEQQ xen-2.4.16/arch/i386/i8259.c +3ddb79bcBit4xJXbwtX0kb1hh2uO1Q xen-2.4.16/arch/i386/idle0_task.c +3ddb79bcKIkRR0kqWaJhe5VUDkMdxg xen-2.4.16/arch/i386/io_apic.c +3ddb79bc1uNlAtc-84Ioq4qfcnI_CQ xen-2.4.16/arch/i386/ioremap.c +3ddb79bdqfIcjkz_h9Hvtp8Tk_19Zw xen-2.4.16/arch/i386/irq.c +3ddb79bcHwuCQDjBICDTSis52hWguw xen-2.4.16/arch/i386/mm.c +3ddb79bdS4UeWWXDH-FaBKqcpMFcnw xen-2.4.16/arch/i386/mpparse.c +3ddb79bcnL-_Dtsbtjgxl7vJU3vBiQ xen-2.4.16/arch/i386/pci-dma.c +3ddb79bdeJ7_86z03yTAPIeeywOg3Q xen-2.4.16/arch/i386/pci-i386.c +3ddb79bdIKgipvGoqExEQ7jawfVowA xen-2.4.16/arch/i386/pci-i386.h +3ddb79bdHe6_Uij4-glW91vInNtBYQ xen-2.4.16/arch/i386/pci-irq.c +3ddb79bcZ_2FxINljqNSkqa17ISyJw xen-2.4.16/arch/i386/pci-pc.c +3ddb79bc1_2bAt67x9MFCP4AZrQnvQ xen-2.4.16/arch/i386/process.c +3ddb79bc7KxGCEJsgBnkDX7XjD_ZEQ xen-2.4.16/arch/i386/rwlock.c +3ddb79bcrD6Z_rUvSDgrvjyb4846Eg xen-2.4.16/arch/i386/setup.c +3ddb79bcSx2e8JSR3pdSGa8x1ScYzA xen-2.4.16/arch/i386/smp.c +3ddb79bcfUN3-UBCPzX26IU8bq-3aw xen-2.4.16/arch/i386/smpboot.c +3ddb79bc-Udq7ol-NX4q9XsYnN7A2Q xen-2.4.16/arch/i386/time.c +3ddb79bccYVzXZJyVaxuv5T42Z1Fsw xen-2.4.16/arch/i386/trampoline.S +3ddb79bcOftONV9h4QCxXOfiT0h91w xen-2.4.16/arch/i386/traps.c +3ddb79bc4nTpGQOe6_-MbyZzkhlhFQ xen-2.4.16/arch/i386/usercopy.c +3ddb79bcOMCu9-5mKpjIh5d0qqBDPg xen-2.4.16/arch/i386/xeno.lds +3ddb79bdff-gj-jFGKjOejeHLqL8Lg xen-2.4.16/common/Makefile +3ddb79bddEYJbcURvqqcx99Yl2iAhQ xen-2.4.16/common/block.c +3ddb79bdrqnW93GR9gZk1OJe1qK-iQ xen-2.4.16/common/brlock.c +3ddb79bdLX_P6iB7ILiblRLWvebapg xen-2.4.16/common/dom0_ops.c +3ddb79bdYO5D8Av12NHqPeSviav7cg xen-2.4.16/common/domain.c +3ddb79bdeyutmaXEfpQvvxj7eQ0fCw xen-2.4.16/common/event.c +3ddb79bd9drcFPVxd4w2GPOIjLlXpA xen-2.4.16/common/kernel.c +3ddb79bduhSEZI8xa7IbGQCpap5y2A xen-2.4.16/common/lib.c +3ddb79bdS39UXxUtZnaScie83-7VTQ xen-2.4.16/common/memory.c +3ddb79bdN51qpRC-6bOH-v5hl_AK6A xen-2.4.16/common/network.c +3ddb79bdD4SLmmdMD7yLW5HcUWucXw xen-2.4.16/common/page_alloc.c +3ddb79bdHqdQpATqC0rmUZNbsb6L6A xen-2.4.16/common/resource.c +3ddb79bdB9RNMnkQnUyZ5C9hhMSQQw xen-2.4.16/common/slab.c +3ddb79bd0gVQYmL2zvuJnldvD0AGxQ xen-2.4.16/common/softirq.c +3ddb79bdQqFHtHRGEO2dsxGgo6eAhw xen-2.4.16/common/timer.c +3ddb79bd3zgV33PHdt-cgh3sxcb1hw xen-2.4.16/common/vsprintf.c +3ddb79c0ppNeJtjC4va8j41ADCnchA xen-2.4.16/drivers/Makefile +3ddb79beWzgPS8ozf2BL2g3ZkiWhhQ xen-2.4.16/drivers/block/Makefile +3ddb79be04dyXzyXqDbMRS_1funwXQ xen-2.4.16/drivers/block/blkpg.c +3ddb79beME_0abStePF6fU8XLuQnWw xen-2.4.16/drivers/block/elevator.c +3ddb79beNQVrdGyoI4njXhgAjD6a4A xen-2.4.16/drivers/block/genhd.c +3ddb79beyWwLRP_BiM2t1JKgr_plEw xen-2.4.16/drivers/block/ll_rw_blk.c +3ddb79bdhcqD9ebrslr0O0oHqTiiXg xen-2.4.16/drivers/ide/Makefile +3ddb79bdErDn_WC3G-fWxKNR3viLnA xen-2.4.16/drivers/ide/ide-disk.c +3ddb79bdIPNW36FrlId94jTXaW8HoA xen-2.4.16/drivers/ide/ide-dma.c +3ddb79be5Ysvhn4se_Z-LQY_hI6UPw xen-2.4.16/drivers/ide/ide-features.c +3ddb79bdh1ohsWYRH_KdaXr7cqs12w xen-2.4.16/drivers/ide/ide-geometry.c +3ddb79bdYcxXT-2UEaDcG0Ic4MIK1g xen-2.4.16/drivers/ide/ide-pci.c +3ddb79bdOXTbcImJo8DwmlNX88k78Q xen-2.4.16/drivers/ide/ide-probe.c +3ddb79bdDWFwINnKn29RlFDwGJhjYg xen-2.4.16/drivers/ide/ide-taskfile.c +3ddb79bdkDY1bSOYkToP1Cc49VdBxg xen-2.4.16/drivers/ide/ide.c +3ddb79bdPyAvT_WZTAFhaX0jp-yXSw xen-2.4.16/drivers/ide/ide_modes.h +3ddb79bfogeJNHTIepPjd8fy1TyoTw xen-2.4.16/drivers/net/3c509.c +3ddb79bfMlOcWUwjtg6oMYhGySHDDw xen-2.4.16/drivers/net/3c59x.c +3ddb79bfl_DWxZQFKiJ2BXrSedV4lg xen-2.4.16/drivers/net/8139cp.c +3ddb79bfLVGtyXNJS4NQg-lP21rndA xen-2.4.16/drivers/net/8139too.c +3ddb79c0tWiE8xIFHszxipeVCGKTSA xen-2.4.16/drivers/net/Makefile +3ddb79bfU-H1Hms4BuJEPPydjXUEaQ xen-2.4.16/drivers/net/Space.c +3ddb79c0GejJrp1U6W4G6dYi-RiH4A xen-2.4.16/drivers/net/eepro100.c +3ddb79bfKvn9mt0kofpkw0QaWjxO6A xen-2.4.16/drivers/net/net_init.c +3ddb79c0fQgORkFlqWZdP-6cDHyFIQ xen-2.4.16/drivers/net/pcnet32.c +3ddb79bf_CBcu3QWYwq4bNAOnM2RqQ xen-2.4.16/drivers/net/setup.c +3ddb79bfh8ucmq_HqRSaURalpeAmPg xen-2.4.16/drivers/net/tulip/.depend +3ddb79bfsJ-hdQ17EXTFiUOHisjNgQ xen-2.4.16/drivers/net/tulip/21142.c +3ddb79bf0lzTL-ywAdOO7vctTYAmJA xen-2.4.16/drivers/net/tulip/ChangeLog +3ddb79bfRbGBTu5mznHtxpFPtnQYSQ xen-2.4.16/drivers/net/tulip/Makefile +3ddb79bfLLamkCaJZDJ7i6qrCUhwBw xen-2.4.16/drivers/net/tulip/eeprom.c +3ddb79bf-zt39-zIUgWC9Kb4As--Ew xen-2.4.16/drivers/net/tulip/interrupt.c +3ddb79bfdr1I4DtFnXCzpaHkEkHE2Q xen-2.4.16/drivers/net/tulip/media.c +3ddb79bftPnJDLCAo0Do4KPr5raERA xen-2.4.16/drivers/net/tulip/pnic.c +3ddb79bf5lr4NIjy1oZOM_jhpxGidw xen-2.4.16/drivers/net/tulip/pnic2.c +3ddb79bfBhawkz-2DT9lakMPbqzljQ xen-2.4.16/drivers/net/tulip/timer.c +3ddb79bf7yw7hGBMM60aMgMgxY_G2g xen-2.4.16/drivers/net/tulip/tulip.h +3ddb79bfhUr3baP8Lf4oZjhBX7i5kw xen-2.4.16/drivers/net/tulip/tulip_core.c +3ddb79beUWngyIhMHgyPtuTem4o4JA xen-2.4.16/drivers/pci/Makefile +3ddb79beU9td0Mnm0VUMklerBa37qQ xen-2.4.16/drivers/pci/compat.c +3ddb79beHkGQE58z5t5gyUCYiwOxvw xen-2.4.16/drivers/pci/gen-devlist.c +3ddb79bfoQcFKLf5P6wZlDl36alWdQ xen-2.4.16/drivers/pci/names.c +3ddb79bfyX7-pD6XdxY_mdNrJR20iw xen-2.4.16/drivers/pci/pci.c +3ddb79bf2AS7YBGwooE_Kbv7XgUqNQ xen-2.4.16/drivers/pci/pci.ids +3ddb79bfGf5-CZSdzn0DGBYWjQiDjw xen-2.4.16/drivers/pci/proc.c +3ddb79bf7sTn85WtP_8Nc2YEmmVExQ xen-2.4.16/drivers/pci/quirks.c +3ddb79bfkVLMq5CWjZLACPDivqxq_w xen-2.4.16/drivers/pci/setup-bus.c +3ddb79bfl1H1arbB0pzAEC2uPmY_3g xen-2.4.16/drivers/pci/setup-irq.c +3ddb79bfJaf0bkE1Y67bnll8-kjEPg xen-2.4.16/drivers/pci/setup-res.c +3ddb79bfIcCWJsBDNcQQE3ok2Azn-Q xen-2.4.16/drivers/pci/syscall.c +3ddb79be3kwzyKagpMHGoXZFdan7dg xen-2.4.16/drivers/scsi/Makefile +3ddb79beXZxwKh7cGyPfr40bhDyRrA xen-2.4.16/drivers/scsi/constants.h +3ddb79beGiGljlTNq_kRnCBZECgC9Q xen-2.4.16/drivers/scsi/hosts.h +3ddb79bexarQo1tQ541PPUyK9HXNDA xen-2.4.16/drivers/scsi/scsi.c +3ddb79beBOiYxQUiWTHosepRlJyuGA xen-2.4.16/drivers/scsi/scsi.h +3ddb79beVTYJj6_KMxYLJmCP7p9MuQ xen-2.4.16/drivers/scsi/scsi_dma.c +3ddb79beDrImFCFGgB_GLgUbeuHjog xen-2.4.16/drivers/scsi/scsi_error.c +3ddb79bepDvUltYDsInaUsH9lII9Sw xen-2.4.16/drivers/scsi/scsi_ioctl.c +3ddb79berPStE_-ILQHgcl1BLDLywA xen-2.4.16/drivers/scsi/scsi_lib.c +3ddb79beRXjB7_nNUbJMIRyjDmeByQ xen-2.4.16/drivers/scsi/scsi_merge.c +3ddb79beGNb7Es1bATZAGsPZEu5F2Q xen-2.4.16/drivers/scsi/scsi_module.c +3ddb79beZ--AZB0twliIm3qmQJO8Zg xen-2.4.16/drivers/scsi/scsi_obsolete.c +3ddb79beQgG_st0eBZUX8AQI7kBkHA xen-2.4.16/drivers/scsi/scsi_obsolete.h +3ddb79beK65cNRldY0CFGXjZ3-A74Q xen-2.4.16/drivers/scsi/scsi_proc.c +3ddb79beeIuwGDE0Ldl8wy6mt86Bag xen-2.4.16/drivers/scsi/scsi_queue.c +3ddb79beQVxjXLLSY896cqce3j6Ehg xen-2.4.16/drivers/scsi/scsi_scan.c +3ddb79beVrSvakLg_9MSo22vJ_TGrA xen-2.4.16/drivers/scsi/scsi_syms.c +3ddb79beC6PIqDEaxAfO3bLKcmMLeA xen-2.4.16/drivers/scsi/scsicam.c +3ddb79bedAG8DPsr3S1N4IASxUuBug xen-2.4.16/drivers/scsi/sd.c +3ddb79beA27dAK0xtNh4k6SJniKnlA xen-2.4.16/drivers/scsi/sd.h +3ddb79c3l4IiQtf6MS2jIzcd-hJS8g xen-2.4.16/include/asm-i386/apic.h +3ddb79c3QJYWr8LLGdonLbWmNb9pQQ xen-2.4.16/include/asm-i386/apicdef.h +3ddb79c3OiG9eTsi9Dy3F_OkuRAzKA xen-2.4.16/include/asm-i386/atomic.h +3ddb79c3rM-Ote0Xn6Ytg8Y6YqAG-A xen-2.4.16/include/asm-i386/bitops.h +3ddb79c3pXaTAGGWSIZF9EnRV5PlRw xen-2.4.16/include/asm-i386/byteorder.h +3ddb79c3KhTI0F_Iw_hRL9QEyOVK-g xen-2.4.16/include/asm-i386/cache.h +3ddb79c2LLt11EQHjrd6sB7FUqvFfA xen-2.4.16/include/asm-i386/cpufeature.h +3ddb79c2ADvRmdexd9y3AYK9_NTx-Q xen-2.4.16/include/asm-i386/current.h +3ddb79c2jFkPAZTDmU35L6IUssYMgQ xen-2.4.16/include/asm-i386/debugreg.h +3ddb79c3r9-31dIsewPV3P3i8HALsQ xen-2.4.16/include/asm-i386/delay.h +3ddb79c34BFiXjBJ_cCKB0aCsV1IDw xen-2.4.16/include/asm-i386/desc.h +3ddb79c2O729EttZTYu1c8LcsUO_GQ xen-2.4.16/include/asm-i386/elf.h +3ddb79c3NU8Zy40OTrq3D-i30Y3t4A xen-2.4.16/include/asm-i386/fixmap.h +3ddb79c39o75zPP0T1aQQ4mNrCAN2w xen-2.4.16/include/asm-i386/hardirq.h +3ddb79c3BFEIwXR4IsWbwp4BoL4DkA xen-2.4.16/include/asm-i386/hdreg.h +3ddb79c3TMDjkxVndKFKnGiwY0HzDg xen-2.4.16/include/asm-i386/i387.h +3ddb79c3otbjpnqFDSzSeD0J-0xcwg xen-2.4.16/include/asm-i386/ide.h +3ddb79c3fQ_O3o5NHK2N8AJdk0Ea4Q xen-2.4.16/include/asm-i386/io.h +3ddb79c2TKeScYHQZreTdHqYNLbehQ xen-2.4.16/include/asm-i386/io_apic.h +3ddb79c3S9Tga4XZRPrD4-aN3XIV6w xen-2.4.16/include/asm-i386/ioctl.h +3ddb79c2L7rTlFzazOLW1XuSZefpFw xen-2.4.16/include/asm-i386/irq.h +3ddb79c3I98vWcQR8xEo34JMJ4Ahyw xen-2.4.16/include/asm-i386/mc146818rtc.h +3ddb79c3n_UbPuxlkNxvvLycClIkxA xen-2.4.16/include/asm-i386/mpspec.h +3ddb79c2wa0dA_LGigxOelSGbJ284Q xen-2.4.16/include/asm-i386/msr.h +3ddb79c3xjYnrv5t3VqYlR4tNEOl4Q xen-2.4.16/include/asm-i386/page.h +3ddb79c3ysKUbxZuwKBRK3WXU2TlEg xen-2.4.16/include/asm-i386/pci.h +3ddb79c3nm2zdzeO6Mj8g7ex3txgGw xen-2.4.16/include/asm-i386/pgalloc.h +3ddb79c2QF5-pZGzuX4QukPCDAl59A xen-2.4.16/include/asm-i386/processor.h +3ddb79c3mbqEM7QQr3zVq7NiBNhouA xen-2.4.16/include/asm-i386/ptrace.h +3ddb79c2plf7ciNgoNjU-RsbUzawsw xen-2.4.16/include/asm-i386/rwlock.h +3ddb79c2mJI9YuGMScjofPlD8EdtgA xen-2.4.16/include/asm-i386/scatterlist.h +3ddb79c3Hgbb2g8CyWLMCK-6_ZVQSQ xen-2.4.16/include/asm-i386/smp.h +3ddb79c3jn8ALV_S9W5aeTYUQRKBpg xen-2.4.16/include/asm-i386/smpboot.h +3ddb79c3e9DCEoR-WzNxcOQDzLu7BQ xen-2.4.16/include/asm-i386/softirq.h +3ddb79c3NiyQE2vQnyGiaBnNjBO1rA xen-2.4.16/include/asm-i386/spinlock.h +3ddb79c3ezddh34MdelJpa5tNR00Dw xen-2.4.16/include/asm-i386/system.h +3ddb79c4HugMq7IYGxcQKFBpKwKhzA xen-2.4.16/include/asm-i386/types.h +3ddb79c3M2n1ROZH6xk3HbyN4CPDqg xen-2.4.16/include/asm-i386/uaccess.h +3ddb79c3uPGcP_l_2xyGgBSWd5aC-Q xen-2.4.16/include/asm-i386/unaligned.h +3ddb79c2YTaZwOqWin9-QNgHge5RVw xen-2.4.16/include/hypervisor-ifs/block.h +3ddb79c25UE59iu4JJcbRalx95mvcg xen-2.4.16/include/hypervisor-ifs/hypervisor-if.h +3ddb79c2oRPrzClk3zbTkRHlpumzKA xen-2.4.16/include/hypervisor-ifs/network.h +3ddb79c4qbCoOFHrv9sCGshbWzBVlQ xen-2.4.16/include/scsi/scsi.h +3ddb79c4R4iVwqIIeychVQYmIH4FUg xen-2.4.16/include/scsi/scsi_ioctl.h +3ddb79c4yw_mfd4Uikn3v_IOPRpa1Q xen-2.4.16/include/scsi/scsicam.h +3ddb79c4HKPMLvDBP9LxzPi_szVxGA xen-2.4.16/include/scsi/sg.h +3ddb79c0nTsjSpVK4ZVTI9WwN24xtQ xen-2.4.16/include/xeno/blk.h +3ddb79c0dVhTHLsv6CPTf4baKix4mA xen-2.4.16/include/xeno/blkdev.h +3ddb79c18ePBgitnOs7GiOCFilODVw xen-2.4.16/include/xeno/blkpg.h +3ddb79c2SisDOHDyTeK5-MV3m7pNbA xen-2.4.16/include/xeno/block.h +3ddb79c2JOriBs0mWh-Tlolq78tg3w xen-2.4.16/include/xeno/bootmem.h +3ddb79c1oOjpQbp68MW7yiUpoi-S-w xen-2.4.16/include/xeno/brlock.h +3ddb79c1x7Ie3kifu7dQRx8y7HVyvA xen-2.4.16/include/xeno/byteorder/big_endian.h +3ddb79c1qFXOEX1eD0yXJ_gsGkUt8w xen-2.4.16/include/xeno/byteorder/generic.h +3ddb79c1VbwFALNpgx6uC_iZKFHD-A xen-2.4.16/include/xeno/byteorder/little_endian.h +3ddb79c1VvNRMM35bpdZMekirCXP-A xen-2.4.16/include/xeno/byteorder/pdp_endian.h +3ddb79c116WbJV8bwGZXFFJy_GNNvw xen-2.4.16/include/xeno/byteorder/swab.h +3ddb79c1pwmlw8VXW8aaSKAVGVmjDA xen-2.4.16/include/xeno/byteorder/swabb.h +3ddb79c0c0cX_DZE209-Bb-Rx1v-Aw xen-2.4.16/include/xeno/cache.h +3ddb79c259jh8hE7vre_8NuE7nwNSA xen-2.4.16/include/xeno/config.h +3ddb79c1V44RD26YqCUm-kqIupM37A xen-2.4.16/include/xeno/ctype.h +3ddb79c05DdHQ0UxX_jKsXdR4QlMCA xen-2.4.16/include/xeno/delay.h +3ddb79c2PMeWTK86y4C3F4MzHw4A1g xen-2.4.16/include/xeno/dom0_ops.h +3ddb79c1uaWQZj551j1O0B5z8AnHOg xen-2.4.16/include/xeno/elevator.h +3ddb79c0HIghfBF8zFUdmXhOU8i6hA xen-2.4.16/include/xeno/errno.h +3ddb79c0rMjudDKkJku_mkm0J-BZgw xen-2.4.16/include/xeno/etherdevice.h +3ddb79c0T3X07lFnM9OSE-W5bqIDSQ xen-2.4.16/include/xeno/ethtool.h +3ddb79c1W0lQca8gRV7sN6j3iY4Luw xen-2.4.16/include/xeno/event.h +3ddb79c1J4I_AjNflZL-1c1jOIlSyg xen-2.4.16/include/xeno/genhd.h +3ddb79c1i-chIoeniqgYwMM3EgaR5w xen-2.4.16/include/xeno/hdreg.h +3ddb79c12GuUuaxBKiMuwf-Qvuwpng xen-2.4.16/include/xeno/hdsmart.h +3ddb79c0MM575N4YvMSiw9EqKH4JDA xen-2.4.16/include/xeno/ide.h +3ddb79c1yHLp08JhgPxIMcZ8DwN9hg xen-2.4.16/include/xeno/if.h +3ddb79c1RCWOkWPQRzbYVTX_e-E7CA xen-2.4.16/include/xeno/if_ether.h +3ddb79c2IYah7z7hkzPyOiG8szKkyw xen-2.4.16/include/xeno/if_packet.h +3ddb79c0GurNF9tDWqQbAwJFH8ugfA xen-2.4.16/include/xeno/init.h +3ddb79c1Vi5VleJAOKHAlY0G2zAsgw xen-2.4.16/include/xeno/interrupt.h +3ddb79c2J6EnruiygRhBCgftzMzTeQ xen-2.4.16/include/xeno/ioctl.h +3ddb79c1nzaWu8NoF4xCCMSFJR4MlA xen-2.4.16/include/xeno/ioport.h +3ddb79c2qAxCOABlkKtD8Txohe-qEw xen-2.4.16/include/xeno/irq.h +3ddb79c2b3qe-6Ann09FqZBF4IrJaQ xen-2.4.16/include/xeno/irq_cpustat.h +3ddb79c11w_O7z7YZJnzuDSxaK5LlA xen-2.4.16/include/xeno/kdev_t.h +3ddb79c1NfYlOrWNqgZkj9EwtFfJow xen-2.4.16/include/xeno/lib.h +3ddb79c18Ajy7micDGQQfJ0zWgEHtA xen-2.4.16/include/xeno/list.h +3ddb79c0_s2_wgV0cA6tztEaeyy1NA xen-2.4.16/include/xeno/major.h +3ddb79c1fsWuKI2sGlW5bqoG2lPVNA xen-2.4.16/include/xeno/mii.h +3ddb79c1gs2VbLbQlw0dcDUXYIepDA xen-2.4.16/include/xeno/mm.h +3ddb79c13p9iHn1XAp0IS1qvj4yDsg xen-2.4.16/include/xeno/module.h +3ddb79c1ieLZfGSFwfvvSQ2NK1BMSg xen-2.4.16/include/xeno/multiboot.h +3ddb79c0CLfAlJLg1ohdPD-Jjn-jxg xen-2.4.16/include/xeno/netdevice.h +3ddb79c2Fg44_PBPVxHSC0gTOMq4Ow xen-2.4.16/include/xeno/pci.h +3ddb79c0MOVXq8qZDQRGb6z64_xAwg xen-2.4.16/include/xeno/pci_ids.h +3ddb79c2byJwwNNkiES__A9H4Cvc4g xen-2.4.16/include/xeno/pkt_sched.h +3ddb79c04nQVR3EYM5L4zxDV_MCo1g xen-2.4.16/include/xeno/prefetch.h +3ddb79c0LzqqS0LhAQ50ekgj4oGl7Q xen-2.4.16/include/xeno/sched.h +3ddb79c0VDeD-Oft5eNfMneTU3D1dQ xen-2.4.16/include/xeno/skbuff.h +3ddb79c14dXIhP7C2ahnoD08K90G_w xen-2.4.16/include/xeno/slab.h +3ddb79c09xbS-xxfKxuV3JETIhBzmg xen-2.4.16/include/xeno/smp.h +3ddb79c1-yIt89RT02wIPp2xDR8YjQ xen-2.4.16/include/xeno/socket.h +3ddb79c2V2P9F2xMCzDJ9vbUofSg_Q xen-2.4.16/include/xeno/sockios.h +3ddb79c2iIcESrDAB8samy_yAh6olQ xen-2.4.16/include/xeno/spinlock.h +3ddb79c0BnA20PbgmuMPSGIBljNRQw xen-2.4.16/include/xeno/time.h +3ddb79c2HFkXuRxi1CriJtSFmY6Ybw xen-2.4.16/include/xeno/timer.h +3ddb79c2_m8lT9jDKse_tePj7zcnNQ xen-2.4.16/include/xeno/timex.h +3ddb79c2e2C14HkndNEJlYwXaPrF5A xen-2.4.16/include/xeno/tqueue.h +3ddb79c1-kVvF8cVa0k3ZHDdBMj01Q xen-2.4.16/include/xeno/types.h +3ddb79c2Ae5KpzhC9LCYG7mP_Vi4Aw xen-2.4.16/include/xeno/vif.h +3ddb79c4YQCQ6r0xNLLu0jfbM7pVmA xen-2.4.16/net/Makefile +3ddb79c4AkfDkTCw0comx4L8wsUOMg xen-2.4.16/net/dev.c +3ddb79c4x1L_soh8b-r_1jQW_37Icw xen-2.4.16/net/dev_mcast.c +3ddb79c4NSDwiQ-AmrYdxcRAwLPzwQ xen-2.4.16/net/eth.c +3ddb79c4KZhNxUuYJ7lul8cc-wRkyg xen-2.4.16/net/sch_generic.c +3ddb79c4TZj1wXPKQt36O72SddtBNQ xen-2.4.16/net/skbuff.c +3ddb79c4ARyIHqv3Y6YFckIUbyA8Tw xen-2.4.16/net/utils.c +3ddb79c4x8dvwPtzclghWAKFWpEBFA xen-2.4.16/tools/Makefile +3ddb79c4yGZ7_22QAFFwPzqP4NSHwA xen-2.4.16/tools/elf-reloc.c +3ddb79bbYMXGmQTsr5BeGS_RuZ5f_w xenolinux-2.4.16-sparse/Makefile +3ddb79b7e0ssyz3Q1GoqjDds-x1PLQ xenolinux-2.4.16-sparse/arch/xeno/Makefile +3ddb79b7_rLvYZU3tOY6Wwuw_Sg3_w xenolinux-2.4.16-sparse/arch/xeno/boot/Makefile +3ddb79b8L4xnwrcvWk6nAbgKVbNkSA xenolinux-2.4.16-sparse/arch/xeno/config.in +3ddb79b7v_Be34as7_mlzFlw65hOjQ xenolinux-2.4.16-sparse/arch/xeno/defconfig +3ddb79b7KUvtx0knQJoRaBDZQeNidg xenolinux-2.4.16-sparse/arch/xeno/drivers/block/Makefile +3ddb79b6Rc0uAOGFthIFxq1KGWZ_Iw xenolinux-2.4.16-sparse/arch/xeno/drivers/block/block.c +3ddb79b7LLVJBGynxHSOh9A9l97sug xenolinux-2.4.16-sparse/arch/xeno/drivers/console/Makefile +3ddb79b7UG2QiRAU-Wvc1Y_BLigu1Q xenolinux-2.4.16-sparse/arch/xeno/drivers/console/console.c +3ddb79b75eo4PRXkT6Th9popt_SJhg xenolinux-2.4.16-sparse/arch/xeno/drivers/dom0/Makefile +3ddb79b7Xyaoep6U0kLvx6Kx7OauDw xenolinux-2.4.16-sparse/arch/xeno/drivers/dom0/dom0_core.c +3ddb79b7PulSkF9m3c7K5MkxHRf4hA xenolinux-2.4.16-sparse/arch/xeno/drivers/dom0/dom0_ops.h +3ddb79b7s7yYBioHidSkIoHtQxYmOw xenolinux-2.4.16-sparse/arch/xeno/drivers/network/Makefile +3ddb79b7CpLL98ScdpbKkVBktlbCtQ xenolinux-2.4.16-sparse/arch/xeno/drivers/network/network.c +3ddb79b7hqi9krq6h98lnpONHGzvEA xenolinux-2.4.16-sparse/arch/xeno/kernel/Makefile +3ddb79b7eyEv5bsN8EQkjIG0y11Q1A xenolinux-2.4.16-sparse/arch/xeno/kernel/entry.S +3ddb79b70XAg9bJwp0-DWHe0LtzlBw xenolinux-2.4.16-sparse/arch/xeno/kernel/head.S +3ddb79b7dDsKjU22VxQ-C5BMFaMUmw xenolinux-2.4.16-sparse/arch/xeno/kernel/hypervisor.c +3ddb79b7xzwEc8-lo1vu3BxB-gBURQ xenolinux-2.4.16-sparse/arch/xeno/kernel/i386_ksyms.c +3ddb79b7i7pfPEcy_zjDfW9JHD305g xenolinux-2.4.16-sparse/arch/xeno/kernel/i387.c +3ddb79b7Ti2i5ztQzM-w67zN-cJD8A xenolinux-2.4.16-sparse/arch/xeno/kernel/init_task.c +3ddb79b7MEQGMZrsF94atNJZ4-OGzA xenolinux-2.4.16-sparse/arch/xeno/kernel/ioport.c +3ddb79b7DOz-Mz5jsjRd5W8jN0XbPw xenolinux-2.4.16-sparse/arch/xeno/kernel/irq.c +3ddb79b7bOAPk_YAUUsruhVGO2GOOg xenolinux-2.4.16-sparse/arch/xeno/kernel/ldt.c +3ddb79b7qf2WK6vMKcmOLIeKN5GSjg xenolinux-2.4.16-sparse/arch/xeno/kernel/process.c +3ddb79b7HdaBR7yk_u51auihbr31aQ xenolinux-2.4.16-sparse/arch/xeno/kernel/ptrace.c +3ddb79b7yJunvnrgWD2VTy6yot0PMg xenolinux-2.4.16-sparse/arch/xeno/kernel/semaphore.c +3ddb79b7BIitpVygiksiMBQYvh5Z2A xenolinux-2.4.16-sparse/arch/xeno/kernel/setup.c +3ddb79b7DTevmwhNla67jZxjBSIKFg xenolinux-2.4.16-sparse/arch/xeno/kernel/signal.c +3ddb79b76mCf-gZPR4KLjL8ktZ37GA xenolinux-2.4.16-sparse/arch/xeno/kernel/sys_i386.c +3ddb79b7xDsEKErRFeqcSm6eRrTEIg xenolinux-2.4.16-sparse/arch/xeno/kernel/time.c +3ddb79b7V4dv_KAQu4Msa2Ebhd0aKw xenolinux-2.4.16-sparse/arch/xeno/kernel/traps.c +3ddb79b8oUnwncDaZuRWF3-n3jPsIA xenolinux-2.4.16-sparse/arch/xeno/lib/Makefile +3ddb79b8BMxi8qW3_NT44SSd1uMD3Q xenolinux-2.4.16-sparse/arch/xeno/lib/checksum.S +3ddb79b8WcWel2g9zU9pBJb-yA8jBw xenolinux-2.4.16-sparse/arch/xeno/lib/dec_and_lock.c +3ddb79b8oxeiaIW6Au95OM0GlsMrMw xenolinux-2.4.16-sparse/arch/xeno/lib/delay.c +3ddb79b8XPasnRhvK-_6xYksf3S6qA xenolinux-2.4.16-sparse/arch/xeno/lib/getuser.S +3ddb79b8HFLUh8mwcl4X44ta-ny1KA xenolinux-2.4.16-sparse/arch/xeno/lib/iodebug.c +3ddb79b879qeoLlarHXvNIDEva6ssA xenolinux-2.4.16-sparse/arch/xeno/lib/memcpy.c +3ddb79b869CY_yr4HymV6k98pfpMgg xenolinux-2.4.16-sparse/arch/xeno/lib/mmx.c +3ddb79b8S77yf1--Qa4C0ZYmqKXCww xenolinux-2.4.16-sparse/arch/xeno/lib/old-checksum.c +3ddb79b8ffZ79cU2ZzfA2ekeo6pqeg xenolinux-2.4.16-sparse/arch/xeno/lib/strstr.c +3ddb79b82kQ5oIXpxq3TUmlgxsLzLg xenolinux-2.4.16-sparse/arch/xeno/lib/usercopy.c +3ddb79b8qdD_svLCCAja_oP2w4Tn8Q xenolinux-2.4.16-sparse/arch/xeno/mm/Makefile +3ddb79b8ukY8dsPYmR8eNk-aCzFPsQ xenolinux-2.4.16-sparse/arch/xeno/mm/extable.c +3ddb79b856Zta9b3s0bgUCGbG1blvQ xenolinux-2.4.16-sparse/arch/xeno/mm/fault.c +3ddb79b85fpsKT8A9WYnuJg03b715g xenolinux-2.4.16-sparse/arch/xeno/mm/hypervisor.c +3ddb79b83Zj7Xn2QVhU4HeMuAC9FjA xenolinux-2.4.16-sparse/arch/xeno/mm/init.c +3ddb79b7aKdTkbr3u6aze8tVwGh_TQ xenolinux-2.4.16-sparse/arch/xeno/vmlinux.lds +3ddb79bbx682YH6vR2zbVOXwg73ULg xenolinux-2.4.16-sparse/drivers/block/ll_rw_blk.c +3ddb79bcJfHdwrPsjqgI33_OsGdVCg xenolinux-2.4.16-sparse/drivers/block/rd.c +3ddb79bcpVu-IbnqwQqpRqsEbLpsuw xenolinux-2.4.16-sparse/drivers/char/tty_io.c +3ddb79bba_zKpuurHVeWfgDkyPoq8A xenolinux-2.4.16-sparse/fs/nfs/nfsroot.c +3ddb79b8VFtfWSCrXKPN2K21zd_vtw xenolinux-2.4.16-sparse/include/asm-xeno/a.out.h +3ddb79b8Zzi13p3OAPV25QgiC3THAQ xenolinux-2.4.16-sparse/include/asm-xeno/apic.h +3ddb79baZDlsdV_m6C5CXnWMl15p1g xenolinux-2.4.16-sparse/include/asm-xeno/apicdef.h +3ddb79baZM88u4CnriVA8ZXBdnMNvg xenolinux-2.4.16-sparse/include/asm-xeno/atomic.h +3ddb79baYHyZsDCiXiq8Y8_XxHE-jQ xenolinux-2.4.16-sparse/include/asm-xeno/bitops.h +3ddb79b8vFGtGb6pg3GZFXSiwOZfcg xenolinux-2.4.16-sparse/include/asm-xeno/boot.h +3ddb79baW8tf6PiBQUF50QQM5nY9sw xenolinux-2.4.16-sparse/include/asm-xeno/bugs.h +3ddb79b80msOlzTZRoVudYdemzgOlA xenolinux-2.4.16-sparse/include/asm-xeno/byteorder.h +3ddb79b8brNSUEujnq8f_zr8kA-cUg xenolinux-2.4.16-sparse/include/asm-xeno/cache.h +3ddb79bayhr6C6prVhAYlFRChhf3wg xenolinux-2.4.16-sparse/include/asm-xeno/checksum.h +3ddb79b8RNUaDbpPjdVVwKAsbiTBKQ xenolinux-2.4.16-sparse/include/asm-xeno/cpufeature.h +3ddb79b8pJe4aNsUKkfHEoBT9Y-UMA xenolinux-2.4.16-sparse/include/asm-xeno/current.h +3ddb79b8KL7icUfxKRoWDIkHkLQ1kQ xenolinux-2.4.16-sparse/include/asm-xeno/debugreg.h +3ddb79baDUP_cRdFgqaH0rXUvMxx4A xenolinux-2.4.16-sparse/include/asm-xeno/delay.h +3ddb79b89CgBTFsS3joEJ1ZniSHEgA xenolinux-2.4.16-sparse/include/asm-xeno/desc.h +3ddb79ba6xyT4mJOYSp1Fg2l0ta93A xenolinux-2.4.16-sparse/include/asm-xeno/div64.h +3ddb79b80Z4ZUIqbD1Xu_t4OCuEHeQ xenolinux-2.4.16-sparse/include/asm-xeno/dma.h +3ddb79bac26NkKcPIEsfxETc5Snyag xenolinux-2.4.16-sparse/include/asm-xeno/elf.h +3ddb79ba722pCJ_g_xI8ebsE31IK-Q xenolinux-2.4.16-sparse/include/asm-xeno/errno.h +3ddb79b8vIpUpgaSNEneFkg5hYSvNg xenolinux-2.4.16-sparse/include/asm-xeno/fcntl.h +3ddb79b8c_oKu2_BGNJctM4DBET31Q xenolinux-2.4.16-sparse/include/asm-xeno/fixmap.h +3ddb79b8780YvqvK1g5KPIWzQ6P15w xenolinux-2.4.16-sparse/include/asm-xeno/floppy.h +3ddb79bas-nFywnmilbUeT34PEAA0g xenolinux-2.4.16-sparse/include/asm-xeno/hardirq.h +3ddb79batzR40ZFY9dvgs5f1aM9I6g xenolinux-2.4.16-sparse/include/asm-xeno/hdreg.h +3ddb79b90xBgbeYgCcImS2ZxJakxBA xenolinux-2.4.16-sparse/include/asm-xeno/highmem.h +3ddb79baXLZV3dUKQI2gIYpAy67RuA xenolinux-2.4.16-sparse/include/asm-xeno/hw_irq.h +3ddb79b82xfEY3yBet-2FXY4p8b7yg xenolinux-2.4.16-sparse/include/asm-xeno/hypervisor-ifs/block.h +3ddb79b8KUSolAgH19qEzo1Ey0f1Ng xenolinux-2.4.16-sparse/include/asm-xeno/hypervisor-ifs/hypervisor-if.h +3ddb79b8J0Y2UA8NKoN5Ng71WFQRIg xenolinux-2.4.16-sparse/include/asm-xeno/hypervisor-ifs/network.h +3ddb79bapQ9Z9ewa5O1pqAVaNBTazg xenolinux-2.4.16-sparse/include/asm-xeno/hypervisor.h +3ddb79baL-pjPI8hg5xjPgd4__SlOA xenolinux-2.4.16-sparse/include/asm-xeno/i387.h +3ddb79ba66TwvG7HpbBo04fRhmj3KQ xenolinux-2.4.16-sparse/include/asm-xeno/ide.h +3ddb79bahFfCuRcmd9kBYA-CuGtCSg xenolinux-2.4.16-sparse/include/asm-xeno/init.h +3ddb79ba3wnwflaNW6QheYvxoj5S8Q xenolinux-2.4.16-sparse/include/asm-xeno/io.h +3ddb79ba1MhHpElCXFlijej2zWVk4g xenolinux-2.4.16-sparse/include/asm-xeno/io_apic.h +3ddb79baiyfcZN9rJwhq0UeFjI6GkQ xenolinux-2.4.16-sparse/include/asm-xeno/ioctl.h +3ddb79badReKYSok3yHShb4jg0vA-A xenolinux-2.4.16-sparse/include/asm-xeno/ioctls.h +3ddb79b9iuIxteTXg8_myIsrWF5uxg xenolinux-2.4.16-sparse/include/asm-xeno/ipc.h +3ddb79baw5Dxo78880UMSanDV70WdA xenolinux-2.4.16-sparse/include/asm-xeno/ipcbuf.h +3ddb79ban9FUBuEaznRZvPfry3xnHQ xenolinux-2.4.16-sparse/include/asm-xeno/irq.h +3ddb79banJ5r-mqE7LZ8nPGTADaGBA xenolinux-2.4.16-sparse/include/asm-xeno/kdb.h +3ddb79baoaoHGTW0oiBXFPUg4KWydw xenolinux-2.4.16-sparse/include/asm-xeno/kdbprivate.h +3ddb79bam2K6c7tS2HX6jis2Bqx71w xenolinux-2.4.16-sparse/include/asm-xeno/keyboard.h +3ddb79baQ8gGrZm2Jlo3I1ntD0H5HA xenolinux-2.4.16-sparse/include/asm-xeno/kmap_types.h +3ddb79b9MeQEYrafy-Mx9OoeVFM_uw xenolinux-2.4.16-sparse/include/asm-xeno/ldt.h +3ddb79baAsy5W-cJ9ML_w9chqqUh4A xenolinux-2.4.16-sparse/include/asm-xeno/linux_logo.h +3ddb79b9uhsumPVE0wGgarfIkYDSFA xenolinux-2.4.16-sparse/include/asm-xeno/locks.h +3ddb79b954ISbF9e68hB0WTulCJRgg xenolinux-2.4.16-sparse/include/asm-xeno/math_emu.h +3ddb79b9EZDlC6RGn_y0OYr0nyQWlw xenolinux-2.4.16-sparse/include/asm-xeno/mc146818rtc.h +3ddb79b9M6pTF4maDgh8TYbg_HHUbw xenolinux-2.4.16-sparse/include/asm-xeno/mca_dma.h +3ddb79baA7dlps8FkicOUEXKEQuQsA xenolinux-2.4.16-sparse/include/asm-xeno/mman.h +3ddb79baKfF36-eRvkxnEvMRQRai-w xenolinux-2.4.16-sparse/include/asm-xeno/mmu.h +3ddb79baQyKbT5U4EmZNePY9Txp-tA xenolinux-2.4.16-sparse/include/asm-xeno/mmu_context.h +3ddb79bberC3Ghs4vy-06Pu-LSiWtw xenolinux-2.4.16-sparse/include/asm-xeno/mmx.h +3ddb79bbsJLF10xQcKDoV8f_7gcOXg xenolinux-2.4.16-sparse/include/asm-xeno/module.h +3ddb79bbY5RffJ8_F1oC4VI7q3Eejg xenolinux-2.4.16-sparse/include/asm-xeno/mpspec.h +3ddb79b90vB4Vzzy_wL6SYXQMy9N9Q xenolinux-2.4.16-sparse/include/asm-xeno/msgbuf.h +3ddb79bbZ5a_vTk0xcgOHuPokaqwMw xenolinux-2.4.16-sparse/include/asm-xeno/msr.h +3ddb79b9y1xAKaPiBd79MBcCopNNYw xenolinux-2.4.16-sparse/include/asm-xeno/mtrr.h +3ddb79b90hX0QBJlWQN_VsHtX5Ijkw xenolinux-2.4.16-sparse/include/asm-xeno/namei.h +3ddb79bbG2p9MNq7tuISz8md1Oj2lg xenolinux-2.4.16-sparse/include/asm-xeno/page.h +3ddb79bb_iUa2piFSwaB8YPw-rB5SQ xenolinux-2.4.16-sparse/include/asm-xeno/param.h +3ddb79b9Y8UU0S9AoDznoqqcYxg9-A xenolinux-2.4.16-sparse/include/asm-xeno/parport.h +3ddb79b9K9_edWxBHS7TdCpyDmfp6g xenolinux-2.4.16-sparse/include/asm-xeno/pgalloc.h +3ddb79bahaS_P3UYp9VEU6kHxXbajA xenolinux-2.4.16-sparse/include/asm-xeno/pgtable-2level.h +3ddb79b9MjXUB_rk29GJgaNY24feCw xenolinux-2.4.16-sparse/include/asm-xeno/pgtable-3level.h +3ddb79bbPF2ENpNHBru8K3hyYVPmkQ xenolinux-2.4.16-sparse/include/asm-xeno/pgtable.h +3ddb79b9t9xKrOf8aP3X5jEit9tj-w xenolinux-2.4.16-sparse/include/asm-xeno/poll.h +3ddb79b9xHtTDWFaI9ncAtxyUth0Yg xenolinux-2.4.16-sparse/include/asm-xeno/posix_types.h +3ddb79b9VwZ9YsSpA7CkJmYXVadrCA xenolinux-2.4.16-sparse/include/asm-xeno/processor.h +3ddb79bbKaOkHrGG9j05AGlii-voaQ xenolinux-2.4.16-sparse/include/asm-xeno/ptrace.h +3ddb79bbVpJCVTXmc2yNf1rsC00YEg xenolinux-2.4.16-sparse/include/asm-xeno/resource.h +3ddb79bbtU0Kh27NbNpqKAIhshJvXQ xenolinux-2.4.16-sparse/include/asm-xeno/rwlock.h +3ddb79b97LhdmW6hYmybQOPDkK6plg xenolinux-2.4.16-sparse/include/asm-xeno/rwsem.h +3ddb79bbgRzM1NoXYbLoOCbZt8s5NA xenolinux-2.4.16-sparse/include/asm-xeno/scatterlist.h +3ddb79b9xCi3F80Z6xxx6nzkCAo8vQ xenolinux-2.4.16-sparse/include/asm-xeno/segment.h +3ddb79b9WxySbCKDoMgTgPtn1G3BFw xenolinux-2.4.16-sparse/include/asm-xeno/semaphore.h +3ddb79b9gNK3dtgXONloSBaNgZPjAg xenolinux-2.4.16-sparse/include/asm-xeno/sembuf.h +3ddb79bb2UTDgfwju2J3weDXemw3LA xenolinux-2.4.16-sparse/include/asm-xeno/serial.h +3ddb79bbAPFyA_n4dNVCRoee8obOKA xenolinux-2.4.16-sparse/include/asm-xeno/setup.h +3ddb79b9XTOB5DwWBGzPLLR4rNEkDQ xenolinux-2.4.16-sparse/include/asm-xeno/shmbuf.h +3ddb79b9-_jDlAj3qVZe4opi3zectQ xenolinux-2.4.16-sparse/include/asm-xeno/shmparam.h +3ddb79b9AW75ErwlTRX4McxO15sEaQ xenolinux-2.4.16-sparse/include/asm-xeno/sigcontext.h +3ddb79b9NBJW-KAI3mgveUCr7sIOwA xenolinux-2.4.16-sparse/include/asm-xeno/siginfo.h +3ddb79bbj0i8tUVNMKtZVrLJqv3Nsw xenolinux-2.4.16-sparse/include/asm-xeno/signal.h +3ddb79bbfAmpotdy-No2dwGez2fnIg xenolinux-2.4.16-sparse/include/asm-xeno/smp.h +3ddb79b9pDERXiqSumFWMTFJ1X9xIw xenolinux-2.4.16-sparse/include/asm-xeno/smplock.h +3ddb79bbuCOIWTlWEHgOTexEBbdDow xenolinux-2.4.16-sparse/include/asm-xeno/socket.h +3ddb79b9ExeUznVBlSn1e2nvOCrJ4A xenolinux-2.4.16-sparse/include/asm-xeno/sockios.h +3ddb79b9kL3xvucBb-Gmg4_vo-99vw xenolinux-2.4.16-sparse/include/asm-xeno/softirq.h +3ddb79b9rJ8AfSzGzA0arI8mazYLlQ xenolinux-2.4.16-sparse/include/asm-xeno/spinlock.h +3ddb79bbXaA_zUHNPkAKRNz1h0gIJw xenolinux-2.4.16-sparse/include/asm-xeno/stat.h +3ddb79b9G004IlCplrjWgF1aXbp8dA xenolinux-2.4.16-sparse/include/asm-xeno/statfs.h +3ddb79bbRsy3GlCFrQEbVMVp--xlwQ xenolinux-2.4.16-sparse/include/asm-xeno/string-486.h +3ddb79bb4xug4cDph6ODLQFQIan_sg xenolinux-2.4.16-sparse/include/asm-xeno/string.h +3ddb79b9JhjJtJUO3g5LmrHPkdxgKg xenolinux-2.4.16-sparse/include/asm-xeno/system.h +3ddb79b9tbjCU9zSbqKbbv4m8tijlg xenolinux-2.4.16-sparse/include/asm-xeno/termbits.h +3ddb79bbi0mW10tH4xX1_KHXKM_xPg xenolinux-2.4.16-sparse/include/asm-xeno/termios.h +3ddb79b9JuR1VvNzlkyMlA-Dnlmy9Q xenolinux-2.4.16-sparse/include/asm-xeno/timex.h +3ddb79b9Bofq-p3sCTF0ELVuf_iBYA xenolinux-2.4.16-sparse/include/asm-xeno/tlb.h +3ddb79b9tpBUqS8-S6euSqyk2hFkKg xenolinux-2.4.16-sparse/include/asm-xeno/types.h +3ddb79bb5bkAaEzD7pdqQZdWyA_0eQ xenolinux-2.4.16-sparse/include/asm-xeno/uaccess.h +3ddb79bbiDIz1dxgFixHKyGuqRqfDQ xenolinux-2.4.16-sparse/include/asm-xeno/ucontext.h +3ddb79ba_Smn-GiYtr5ZTMaZXn-AHg xenolinux-2.4.16-sparse/include/asm-xeno/unaligned.h +3ddb79bb3cMSs_k2X5Oq2hOIBvmPYA xenolinux-2.4.16-sparse/include/asm-xeno/unistd.h +3ddb79ba2qYtIQAT_-vCFkkZUXu_UQ xenolinux-2.4.16-sparse/include/asm-xeno/user.h +3ddb79bbqhb9X9qWOz5Bv4wOzrkITg xenolinux-2.4.16-sparse/include/asm-xeno/vga.h +3ddb79bbA52x94o6uwDYsbzrH2hjzA xenolinux-2.4.16-sparse/include/asm-xeno/xor.h +3ddb79bb_7YG4U75ZmEic9YXWTW7Vw xenolinux-2.4.16-sparse/include/linux/sunrpc/debug.h +3ddb79bcxkVPfWlZ1PQKvDrfArzOVw xenolinux-2.4.16-sparse/kernel/panic.c +3ddb79bbP31im-mx2NbfthSeqty1Dg xenolinux-2.4.16-sparse/mk diff --git a/BitKeeper/etc/logging_ok b/BitKeeper/etc/logging_ok new file mode 100644 index 0000000000..d8aa55ca1f --- /dev/null +++ b/BitKeeper/etc/logging_ok @@ -0,0 +1 @@ +smh22@boulderdash.cl.cam.ac.uk diff --git a/xen-2.4.16/Makefile b/xen-2.4.16/Makefile new file mode 100644 index 0000000000..7ede454ee8 --- /dev/null +++ b/xen-2.4.16/Makefile @@ -0,0 +1,39 @@ + +export BASEDIR := $(shell pwd) + +include Rules.mk + +default: $(TARGET) + +install: $(TARGET) + gzip -f -9 < $(TARGET) > $(TARGET).gz + cp $(TARGET).gz ../../install/images/image + +clean: delete-links + $(MAKE) -C tools clean + $(MAKE) -C common clean + $(MAKE) -C net clean + $(MAKE) -C drivers clean + $(MAKE) -C arch/$(ARCH) clean + rm -f *.o $(TARGET)* *~ core + +$(TARGET): make-links + $(MAKE) -C tools + $(MAKE) -C common + $(MAKE) -C net + $(MAKE) -C drivers + $(MAKE) -C arch/$(ARCH) + +make-links: + ln -sf xeno include/linux + ln -sf asm-$(ARCH) include/asm + +delete-links: + rm -f include/linux include/asm + +SUBDIRS =arch common drivers net +TAGS: + etags `find include/asm-$(ARCH) -name '*.h'` + find include -type d \( -name "asm-*" -o -name config \) -prune -o -name '*.h' -print | xargs etags -a + find $(SUBDIRS) -name '*.[ch]' | xargs etags -a + diff --git a/xen-2.4.16/README b/xen-2.4.16/README new file mode 100644 index 0000000000..3518b8254a --- /dev/null +++ b/xen-2.4.16/README @@ -0,0 +1,145 @@ + +***************************************************** + Xeno Hypervisor (18/7/02) + +1) Tree layout +Looks rather like a simplified Linux :-) +Headers are in include/xeno and include asm-. +At build time we create symlinks: + include/linux -> include/xeno + include/asm -> include/asm- +In this way, Linux device drivers should need less tweaking of +their #include lines. + +For source files, mapping between hypervisor and Linux is: + Linux Hypervisor + ----- ---------- + kernel/init/mm/lib -> common + net/* -> net/* + drivers/* -> drivers/* + arch/* -> arch/* + +Note that the use of #include and #include can +lead to confusion, as such files will often exist on the system include +path, even if a version doesn't exist within the hypervisor tree. +Unfortunately '-nostdinc' cannot be specified to the compiler, as that +prevents us using stdarg.h in the compiler's own header directory. + +We try to not modify things in driver/* as much as possible, so we can +easily take updates from Linux. arch/* is basically straight from +Linux, with fingers in Linux-specific pies hacked off. common/* has +a lot of Linux code in it, but certain subsystems (task maintenance, +low-level memory handling) have been replaced. net/* contains enough +Linux-like gloop to get network drivers to work with little/no +modification. + +2) Building +'make': Builds ELF executable called 'image' in base directory +'make install': gzip-compresses 'image' and copies it to TFTP server +'make clean': removes *all* build and target files + + +***************************************************** +Random thoughts and stuff from here down... + +Todo list +--------- +* Hypervisor need only directly map its own memory pool + (maybe 128MB, tops). That would need 0x08000000.... + This would allow 512MB Linux with plenty room for vmalloc'ed areas. +* Network device -- port drivers to hypervisor, implement virtual + driver for xeno-linux. Looks like Ethernet. + -- Hypervisor needs to do (at a minimum): + - packet filtering on tx (unicast IP only) + - packet demux on rx (unicast IP only) + - provide DHCP [maybedo something simpler?] + and ARP [at least for hypervisor IP address] + + +Segment descriptor tables +------------------------- +We want to allow guest OSes to specify GDT and LDT tables using their +own pages of memory (just like with page tables). So allow the following: + * new_table_entry(ptr, val) + [Allows insertion of a code, data, or LDT descriptor into given + location. Can simply be checked then poked, with no need to look at + page type.] + * new_GDT() -- relevent virtual pages are resolved to frames. Either + (i) page not present; or (ii) page is only mapped read-only and checks + out okay (then marked as special page). Old table is resolved first, + and the pages are unmarked (no longer special type). + * new_LDT() -- same as for new_GDT(), with same special page type. + +Page table updates must be hooked, so we look for updates to virtual page +addresses in the GDT/LDT range. If map to not present, then old physpage +has type_count decremented. If map to present, ensure read-only, check the +page, and set special type. + +Merge set_{LDT,GDT} into update_baseptr, by passing four args: + update_baseptrs(mask, ptab, gdttab, ldttab); +Update of ptab requires update of gtab (or set to internal default). +Update of gtab requires update of ltab (or set to internal default). + + +The hypervisor page cache +------------------------- +This will allow guest OSes to make use of spare pages in the system, but +allow them to be immediately used for any new domains or memory requests. +The idea is that, when a page is laundered and falls off Linux's clean_LRU +list, rather than freeing it it becomes a candidate for passing down into +the hypervisor. In return, xeno-linux may ask for one of its previously- +cached pages back: + (page, new_id) = cache_query(page, old_id); +If the requested page couldn't be kept, a blank page is returned. +When would Linux make the query? Whenever it wants a page back without +the delay or going to disc. Also, whenever a page would otherwise be +flushed to disc. + +To try and add to the cache: (blank_page, new_id) = cache_query(page, NULL); + [NULL means "give me a blank page"]. +To try and retrieve from the cache: (page, new_id) = cache_query(x_page, id) + [we may request that x_page just be discarded, and therefore not impinge + on this domain's cache quota]. + + +Booting secondary processors +---------------------------- + +start_of_day (i386/setup.c) +smp_boot_cpus (i386/smpboot.c) + * initialises boot CPU data + * parses APIC tables + * for each cpu: + do_boot_cpu (i386/smpboot.c) + * forks a new idle process + * points initial stack inside new task struct + * points initial EIP at a trampoline in very low memory + * frobs remote APIC.... + +On other processor: + * trampoline sets GDT and IDT + * jumps at main boot address with magic register value + * after setting proper page and descriptor tables, jumps at... + initialize_secondary (i386/smpboot.c) + * simply reads ESP/EIP out of the (new) idle task + * this causes a jump to... + start_secondary (i386/smpboot.c) + * reset all processor state + * barrier, then write bitmasks to signal back to boot cpu + * then barrel into... + cpu_idle (i386/process.c) + [THIS IS PROBABLY REASONABLE -- BOOT CPU SHOULD KICK + SECONDARIES TO GET WORK DONE] + + +SMP capabilities +---------------- + +Current intention is to allow hypervisor to schedule on all processors in +SMP boxen, but to tie each domain to a single processor. This simplifies +many SMP intricacies both in terms of correctness and efficiency (eg. +TLB flushing, network packet delivery, ...). + +Clients can still make use of SMP by installing multiple domains on a single +machine, and treating it as a fast cluster (at the very least, the +hypervisor will have fast routing of locally-destined packets). diff --git a/xen-2.4.16/Rules.mk b/xen-2.4.16/Rules.mk new file mode 100644 index 0000000000..25c6e47f1f --- /dev/null +++ b/xen-2.4.16/Rules.mk @@ -0,0 +1,31 @@ + +ARCH := i386 + +TARGET := $(BASEDIR)/image +HDRS := $(wildcard $(BASEDIR)/include/xeno/*.h) +HDRS += $(wildcard $(BASEDIR)/include/scsi/*.h) +HDRS += $(wildcard $(BASEDIR)/include/hypervisor-ifs/*.h) +HDRS += $(wildcard $(BASEDIR)/include/asm-$(ARCH)/*.h) + +C_SRCS := $(wildcard *.c) +S_SRCS := $(wildcard *.S) +OBJS := $(patsubst %.S,%.o,$(S_SRCS)) +OBJS += $(patsubst %.c,%.o,$(C_SRCS)) + +# Note that link order matters! +ALL_OBJS := $(BASEDIR)/common/common.o +ALL_OBJS += $(BASEDIR)/net/network.o +ALL_OBJS += $(BASEDIR)/drivers/pci/driver.o +ALL_OBJS += $(BASEDIR)/drivers/net/driver.o +ALL_OBJS += $(BASEDIR)/drivers/block/driver.o +ALL_OBJS += $(BASEDIR)/drivers/ide/driver.o +ALL_OBJS += $(BASEDIR)/arch/$(ARCH)/arch.o + +include $(BASEDIR)/arch/$(ARCH)/Rules.mk + +%.o: %.c $(HDRS) Makefile + $(CC) $(CFLAGS) -c $< -o $@ + +%.o: %.S $(HDRS) Makefile + $(CC) $(CFLAGS) -D__ASSEMBLY__ -c $< -o $@ + diff --git a/xen-2.4.16/arch/i386/Makefile b/xen-2.4.16/arch/i386/Makefile new file mode 100644 index 0000000000..6778324206 --- /dev/null +++ b/xen-2.4.16/arch/i386/Makefile @@ -0,0 +1,17 @@ + +include $(BASEDIR)/Rules.mk + +# What happens here? We link monitor object files together, starting +# at MONITOR_BASE (a very high address). But bootloader cannot put +# things there, so we initially load at LOAD_BASE. A hacky little +# tool called `elf-reloc' is used to modify segment offsets from +# MONITOR_BASE-relative to LOAD_BASE-relative. +# (NB. Linux gets round this by turning its image into raw binary, then +# wrapping that with a low-memory bootstrapper.) +default: boot/boot.o $(OBJS) + $(LD) -r -o arch.o $(OBJS) + $(LD) $(LDFLAGS) boot/boot.o $(ALL_OBJS) -o $(TARGET) + $(BASEDIR)/tools/elf-reloc $(MONITOR_BASE) $(LOAD_BASE) $(TARGET) + +clean: + rm -f *.o *~ core boot/*.o boot/*~ boot/core diff --git a/xen-2.4.16/arch/i386/Rules.mk b/xen-2.4.16/arch/i386/Rules.mk new file mode 100644 index 0000000000..172777193a --- /dev/null +++ b/xen-2.4.16/arch/i386/Rules.mk @@ -0,0 +1,14 @@ +######################################## +# x86-specific definitions + +CC := gcc +LD := ld +# Linker should relocate monitor to this address +MONITOR_BASE := 0xE0100000 +# Bootloader should load monitor to this real address +LOAD_BASE := 0x00100000 +CFLAGS := -fno-builtin -O3 -Wall -DMONITOR_BASE=$(MONITOR_BASE) +CFLAGS += -I$(BASEDIR)/include -D__KERNEL__ +LDFLAGS := -T xeno.lds -N + + diff --git a/xen-2.4.16/arch/i386/apic.c b/xen-2.4.16/arch/i386/apic.c new file mode 100644 index 0000000000..a09613bee4 --- /dev/null +++ b/xen-2.4.16/arch/i386/apic.c @@ -0,0 +1,836 @@ +/* + * Local APIC handling, local APIC timers + * + * (c) 1999, 2000 Ingo Molnar + * + * Fixes + * Maciej W. Rozycki : Bits for genuine 82489DX APICs; + * thanks to Eric Gilmore + * and Rolf G. Tews + * for testing these extensively. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* Using APIC to generate smp_local_timer_interrupt? */ +int using_apic_timer = 0; + +int get_maxlvt(void) +{ + unsigned int v, ver, maxlvt; + + v = apic_read(APIC_LVR); + ver = GET_APIC_VERSION(v); + /* 82489DXs do not report # of LVT entries. */ + maxlvt = APIC_INTEGRATED(ver) ? GET_APIC_MAXLVT(v) : 2; + return maxlvt; +} + +void clear_local_APIC(void) +{ + int maxlvt; + unsigned long v; + + maxlvt = get_maxlvt(); + + /* + * Careful: we have to set masks only first to deassert + * any level-triggered sources. + */ + v = apic_read(APIC_LVTT); + apic_write_around(APIC_LVTT, v | APIC_LVT_MASKED); + v = apic_read(APIC_LVT0); + apic_write_around(APIC_LVT0, v | APIC_LVT_MASKED); + v = apic_read(APIC_LVT1); + apic_write_around(APIC_LVT1, v | APIC_LVT_MASKED); + if (maxlvt >= 3) { + v = apic_read(APIC_LVTERR); + apic_write_around(APIC_LVTERR, v | APIC_LVT_MASKED); + } + if (maxlvt >= 4) { + v = apic_read(APIC_LVTPC); + apic_write_around(APIC_LVTPC, v | APIC_LVT_MASKED); + } + + /* + * Clean APIC state for other OSs: + */ + apic_write_around(APIC_LVTT, APIC_LVT_MASKED); + apic_write_around(APIC_LVT0, APIC_LVT_MASKED); + apic_write_around(APIC_LVT1, APIC_LVT_MASKED); + if (maxlvt >= 3) + apic_write_around(APIC_LVTERR, APIC_LVT_MASKED); + if (maxlvt >= 4) + apic_write_around(APIC_LVTPC, APIC_LVT_MASKED); +} + +void __init connect_bsp_APIC(void) +{ + if (pic_mode) { + /* + * Do not trust the local APIC being empty at bootup. + */ + clear_local_APIC(); + /* + * PIC mode, enable APIC mode in the IMCR, i.e. + * connect BSP's local APIC to INT and NMI lines. + */ + printk("leaving PIC mode, enabling APIC mode.\n"); + outb(0x70, 0x22); + outb(0x01, 0x23); + } +} + +void disconnect_bsp_APIC(void) +{ + if (pic_mode) { + /* + * Put the board back into PIC mode (has an effect + * only on certain older boards). Note that APIC + * interrupts, including IPIs, won't work beyond + * this point! The only exception are INIT IPIs. + */ + printk("disabling APIC mode, entering PIC mode.\n"); + outb(0x70, 0x22); + outb(0x00, 0x23); + } +} + +void disable_local_APIC(void) +{ + unsigned long value; + + clear_local_APIC(); + + /* + * Disable APIC (implies clearing of registers + * for 82489DX!). + */ + value = apic_read(APIC_SPIV); + value &= ~APIC_SPIV_APIC_ENABLED; + apic_write_around(APIC_SPIV, value); +} + +/* + * This is to verify that we're looking at a real local APIC. + * Check these against your board if the CPUs aren't getting + * started for no apparent reason. + */ +int __init verify_local_APIC(void) +{ + unsigned int reg0, reg1; + + /* + * The version register is read-only in a real APIC. + */ + reg0 = apic_read(APIC_LVR); + Dprintk("Getting VERSION: %x\n", reg0); + apic_write(APIC_LVR, reg0 ^ APIC_LVR_MASK); + reg1 = apic_read(APIC_LVR); + Dprintk("Getting VERSION: %x\n", reg1); + + /* + * The two version reads above should print the same + * numbers. If the second one is different, then we + * poke at a non-APIC. + */ + if (reg1 != reg0) + return 0; + + /* + * Check if the version looks reasonably. + */ + reg1 = GET_APIC_VERSION(reg0); + if (reg1 == 0x00 || reg1 == 0xff) + return 0; + reg1 = get_maxlvt(); + if (reg1 < 0x02 || reg1 == 0xff) + return 0; + + /* + * The ID register is read/write in a real APIC. + */ + reg0 = apic_read(APIC_ID); + Dprintk("Getting ID: %x\n", reg0); + apic_write(APIC_ID, reg0 ^ APIC_ID_MASK); + reg1 = apic_read(APIC_ID); + Dprintk("Getting ID: %x\n", reg1); + apic_write(APIC_ID, reg0); + if (reg1 != (reg0 ^ APIC_ID_MASK)) + return 0; + + /* + * The next two are just to see if we have sane values. + * They're only really relevant if we're in Virtual Wire + * compatibility mode, but most boxes are anymore. + */ + reg0 = apic_read(APIC_LVT0); + Dprintk("Getting LVT0: %x\n", reg0); + reg1 = apic_read(APIC_LVT1); + Dprintk("Getting LVT1: %x\n", reg1); + + return 1; +} + +void __init sync_Arb_IDs(void) +{ + /* + * Wait for idle. + */ + apic_wait_icr_idle(); + + Dprintk("Synchronizing Arb IDs.\n"); + apic_write_around(APIC_ICR, APIC_DEST_ALLINC | APIC_INT_LEVELTRIG + | APIC_DM_INIT); +} + +extern void __error_in_apic_c (void); + +/* + * An initial setup of the virtual wire mode. + */ +void __init init_bsp_APIC(void) +{ + unsigned long value, ver; + + /* + * Don't do the setup now if we have a SMP BIOS as the + * through-I/O-APIC virtual wire mode might be active. + */ + if (smp_found_config || !cpu_has_apic) + return; + + value = apic_read(APIC_LVR); + ver = GET_APIC_VERSION(value); + + /* + * Do not trust the local APIC being empty at bootup. + */ + clear_local_APIC(); + + /* + * Enable APIC. + */ + value = apic_read(APIC_SPIV); + value &= ~APIC_VECTOR_MASK; + value |= APIC_SPIV_APIC_ENABLED; + value |= APIC_SPIV_FOCUS_DISABLED; + value |= SPURIOUS_APIC_VECTOR; + apic_write_around(APIC_SPIV, value); + + /* + * Set up the virtual wire mode. + */ + apic_write_around(APIC_LVT0, APIC_DM_EXTINT); + value = APIC_DM_NMI; + if (!APIC_INTEGRATED(ver)) /* 82489DX */ + value |= APIC_LVT_LEVEL_TRIGGER; + apic_write_around(APIC_LVT1, value); +} + +void __init setup_local_APIC (void) +{ + unsigned long value, ver, maxlvt; + + value = apic_read(APIC_LVR); + ver = GET_APIC_VERSION(value); + + if ((SPURIOUS_APIC_VECTOR & 0x0f) != 0x0f) + __error_in_apic_c(); + + /* Double-check wether this APIC is really registered. */ + if (!test_bit(GET_APIC_ID(apic_read(APIC_ID)), &phys_cpu_present_map)) + BUG(); + + /* + * Intel recommends to set DFR, LDR and TPR before enabling + * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel + * document number 292116). So here it goes... + */ + + /* + * In clustered apic mode, the firmware does this for us + * Put the APIC into flat delivery mode. + * Must be "all ones" explicitly for 82489DX. + */ + apic_write_around(APIC_DFR, 0xffffffff); + + /* + * Set up the logical destination ID. + */ + value = apic_read(APIC_LDR); + value &= ~APIC_LDR_MASK; + value |= (1<<(smp_processor_id()+24)); + apic_write_around(APIC_LDR, value); + + /* + * Set Task Priority to 'accept all'. We never change this + * later on. + */ + value = apic_read(APIC_TASKPRI); + value &= ~APIC_TPRI_MASK; + apic_write_around(APIC_TASKPRI, value); + + /* + * Now that we are all set up, enable the APIC + */ + value = apic_read(APIC_SPIV); + value &= ~APIC_VECTOR_MASK; + /* + * Enable APIC + */ + value |= APIC_SPIV_APIC_ENABLED; + + /* Enable focus processor (bit==0) */ + value &= ~APIC_SPIV_FOCUS_DISABLED; + + /* Set spurious IRQ vector */ + value |= SPURIOUS_APIC_VECTOR; + apic_write_around(APIC_SPIV, value); + + /* + * Set up LVT0, LVT1: + * + * set up through-local-APIC on the BP's LINT0. This is not + * strictly necessery in pure symmetric-IO mode, but sometimes + * we delegate interrupts to the 8259A. + */ + /* + * TODO: set up through-local-APIC from through-I/O-APIC? --macro + */ + value = apic_read(APIC_LVT0) & APIC_LVT_MASKED; + if (!smp_processor_id()) { +/* && (pic_mode || !value)) { */ + value = APIC_DM_EXTINT; + printk("enabled ExtINT on CPU#%d\n", smp_processor_id()); + } else { + value = APIC_DM_EXTINT | APIC_LVT_MASKED; + printk("masked ExtINT on CPU#%d\n", smp_processor_id()); + } + apic_write_around(APIC_LVT0, value); + + /* + * only the BP should see the LINT1 NMI signal, obviously. + */ + if (!smp_processor_id()) + value = APIC_DM_NMI; + else + value = APIC_DM_NMI | APIC_LVT_MASKED; + if (!APIC_INTEGRATED(ver)) /* 82489DX */ + value |= APIC_LVT_LEVEL_TRIGGER; + apic_write_around(APIC_LVT1, value); + + if (APIC_INTEGRATED(ver)) { /* !82489DX */ + maxlvt = get_maxlvt(); + if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ + apic_write(APIC_ESR, 0); + value = apic_read(APIC_ESR); + printk("ESR value before enabling vector: %08lx\n", value); + + value = ERROR_APIC_VECTOR; // enables sending errors + apic_write_around(APIC_LVTERR, value); + /* + * spec says clear errors after enabling vector. + */ + if (maxlvt > 3) + apic_write(APIC_ESR, 0); + value = apic_read(APIC_ESR); + printk("ESR value after enabling vector: %08lx\n", value); + } else { + printk("No ESR for 82489DX.\n"); + } +} + + +static inline void apic_pm_init1(void) { } +static inline void apic_pm_init2(void) { } + + +/* + * Detect and enable local APICs on non-SMP boards. + * Original code written by Keir Fraser. + */ + +static int __init detect_init_APIC (void) +{ + u32 h, l, features; + extern void get_cpu_vendor(struct cpuinfo_x86*); + + /* Workaround for us being called before identify_cpu(). */ + get_cpu_vendor(&boot_cpu_data); + + switch (boot_cpu_data.x86_vendor) { + case X86_VENDOR_AMD: + if (boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model > 1) + break; + goto no_apic; + case X86_VENDOR_INTEL: + if (boot_cpu_data.x86 == 6 || + (boot_cpu_data.x86 == 15 && cpu_has_apic) || + (boot_cpu_data.x86 == 5 && cpu_has_apic)) + break; + goto no_apic; + default: + goto no_apic; + } + + if (!cpu_has_apic) { + /* + * Some BIOSes disable the local APIC in the + * APIC_BASE MSR. This can only be done in + * software for Intel P6 and AMD K7 (Model > 1). + */ + rdmsr(MSR_IA32_APICBASE, l, h); + if (!(l & MSR_IA32_APICBASE_ENABLE)) { + printk("Local APIC disabled by BIOS -- reenabling.\n"); + l &= ~MSR_IA32_APICBASE_BASE; + l |= MSR_IA32_APICBASE_ENABLE | APIC_DEFAULT_PHYS_BASE; + wrmsr(MSR_IA32_APICBASE, l, h); + } + } + /* + * The APIC feature bit should now be enabled + * in `cpuid' + */ + features = cpuid_edx(1); + if (!(features & (1 << X86_FEATURE_APIC))) { + printk("Could not enable APIC!\n"); + return -1; + } + + set_bit(X86_FEATURE_APIC, &boot_cpu_data.x86_capability); + mp_lapic_addr = APIC_DEFAULT_PHYS_BASE; + boot_cpu_physical_apicid = 0; + + printk("Found and enabled local APIC!\n"); + + apic_pm_init1(); + + return 0; + + no_apic: + printk("No local APIC present or hardware disabled\n"); + return -1; +} + +void __init init_apic_mappings(void) +{ + unsigned long apic_phys = 0; + + /* + * If no local APIC can be found then set up a fake all zeroes page to + * simulate the local APIC and another one for the IO-APIC. + */ + if (!smp_found_config && detect_init_APIC()) { + apic_phys = get_free_page(GFP_KERNEL); + apic_phys = __pa(apic_phys); + } else + apic_phys = mp_lapic_addr; + + set_fixmap_nocache(FIX_APIC_BASE, apic_phys); + Dprintk("mapped APIC to %08lx (%08lx)\n", APIC_BASE, apic_phys); + + /* + * Fetch the APIC ID of the BSP in case we have a + * default configuration (or the MP table is broken). + */ + if (boot_cpu_physical_apicid == -1U) + boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID)); + +#ifdef CONFIG_X86_IO_APIC + { + unsigned long ioapic_phys = 0, idx = FIX_IO_APIC_BASE_0; + int i; + + for (i = 0; i < nr_ioapics; i++) { + if (smp_found_config) + ioapic_phys = mp_ioapics[i].mpc_apicaddr; + set_fixmap_nocache(idx, ioapic_phys); + Dprintk("mapped IOAPIC to %08lx (%08lx)\n", + __fix_to_virt(idx), ioapic_phys); + idx++; + } + } +#endif +} + +/* + * This part sets up the APIC 32 bit clock in LVTT1, with HZ interrupts + * per second. We assume that the caller has already set up the local + * APIC. + * + * The APIC timer is not exactly sync with the external timer chip, it + * closely follows bus clocks. + */ + +/* + * The timer chip is already set up at HZ interrupts per second here, + * but we do not accept timer interrupts yet. We only allow the BP + * to calibrate. + */ +static unsigned int __init get_8254_timer_count(void) +{ + /*extern spinlock_t i8253_lock;*/ + /*unsigned long flags;*/ + + unsigned int count; + + /*spin_lock_irqsave(&i8253_lock, flags);*/ + + outb_p(0x00, 0x43); + count = inb_p(0x40); + count |= inb_p(0x40) << 8; + + /*spin_unlock_irqrestore(&i8253_lock, flags);*/ + + return count; +} + +void __init wait_8254_wraparound(void) +{ + unsigned int curr_count, prev_count=~0; + int delta; + + curr_count = get_8254_timer_count(); + + do { + prev_count = curr_count; + curr_count = get_8254_timer_count(); + delta = curr_count-prev_count; + + /* + * This limit for delta seems arbitrary, but it isn't, it's + * slightly above the level of error a buggy Mercury/Neptune + * chipset timer can cause. + */ + + } while (delta < 300); +} + +/* + * This function sets up the local APIC timer, with a timeout of + * 'clocks' APIC bus clock. During calibration we actually call + * this function twice on the boot CPU, once with a bogus timeout + * value, second time for real. The other (noncalibrating) CPUs + * call this function only once, with the real, calibrated value. + * + * We do reads before writes even if unnecessary, to get around the + * P5 APIC double write bug. + */ + +#define APIC_DIVISOR 16 + +void __setup_APIC_LVTT(unsigned int clocks) +{ + unsigned int lvtt1_value, tmp_value; + + lvtt1_value = SET_APIC_TIMER_BASE(APIC_TIMER_BASE_DIV) | + APIC_LVT_TIMER_PERIODIC | LOCAL_TIMER_VECTOR; + apic_write_around(APIC_LVTT, lvtt1_value); + + /* + * Divide PICLK by 16 + */ + tmp_value = apic_read(APIC_TDCR); + apic_write_around(APIC_TDCR, (tmp_value + & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) + | APIC_TDR_DIV_16); + + apic_write_around(APIC_TMICT, clocks/APIC_DIVISOR); +} + +void setup_APIC_timer(void * data) +{ + unsigned int clocks = (unsigned int) data, slice, t0, t1; + unsigned long flags; + int delta; + + __save_flags(flags); + __sti(); + /* + * ok, Intel has some smart code in their APIC that knows + * if a CPU was in 'hlt' lowpower mode, and this increases + * its APIC arbitration priority. To avoid the external timer + * IRQ APIC event being in synchron with the APIC clock we + * introduce an interrupt skew to spread out timer events. + * + * The number of slices within a 'big' timeslice is smp_num_cpus+1 + */ + + slice = clocks / (smp_num_cpus+1); + printk("cpu: %d, clocks: %d, slice: %d\n", + smp_processor_id(), clocks, slice); + + /* + * Wait for IRQ0's slice: + */ + wait_8254_wraparound(); + + __setup_APIC_LVTT(clocks); + + t0 = apic_read(APIC_TMICT)*APIC_DIVISOR; + /* Wait till TMCCT gets reloaded from TMICT... */ + do { + t1 = apic_read(APIC_TMCCT)*APIC_DIVISOR; + delta = (int)(t0 - t1 - slice*(smp_processor_id()+1)); + } while (delta >= 0); + /* Now wait for our slice for real. */ + do { + t1 = apic_read(APIC_TMCCT)*APIC_DIVISOR; + delta = (int)(t0 - t1 - slice*(smp_processor_id()+1)); + } while (delta < 0); + + __setup_APIC_LVTT(clocks); + + printk("CPU%d\n", + smp_processor_id(), t0, t1, delta, slice, clocks); + + __restore_flags(flags); +} + +/* + * In this function we calibrate APIC bus clocks to the external timer. + * + * We want to do the calibration only once since we + * want to have local timer irqs syncron. CPUs connected + * by the same APIC bus have the very same bus frequency. + * And we want to have irqs off anyways, no accidental + * APIC irq that way. + */ + +int __init calibrate_APIC_clock(void) +{ + unsigned long long t1 = 0, t2 = 0; + long tt1, tt2; + long result; + int i; + const int LOOPS = HZ/10; + + printk("calibrating APIC timer ...\n"); + + /* + * Put whatever arbitrary (but long enough) timeout + * value into the APIC clock, we just want to get the + * counter running for calibration. + */ + __setup_APIC_LVTT(1000000000); + + /* + * The timer chip counts down to zero. Let's wait + * for a wraparound to start exact measurement: + * (the current tick might have been already half done) + */ + + wait_8254_wraparound(); + + /* + * We wrapped around just now. Let's start: + */ + rdtscll(t1); + tt1 = apic_read(APIC_TMCCT); + + /* + * Let's wait LOOPS wraprounds: + */ + for (i = 0; i < LOOPS; i++) + wait_8254_wraparound(); + + tt2 = apic_read(APIC_TMCCT); + rdtscll(t2); + + /* + * The APIC bus clock counter is 32 bits only, it + * might have overflown, but note that we use signed + * longs, thus no extra care needed. + * + * underflown to be exact, as the timer counts down ;) + */ + + result = (tt1-tt2)*APIC_DIVISOR/LOOPS; + + printk("..... CPU clock speed is %ld.%04ld MHz.\n", + ((long)(t2-t1)/LOOPS)/(1000000/HZ), + ((long)(t2-t1)/LOOPS)%(1000000/HZ)); + + printk("..... host bus clock speed is %ld.%04ld MHz.\n", + result/(1000000/HZ), + result%(1000000/HZ)); + + return result; +} + +static unsigned int calibration_result; + +void __init setup_APIC_clocks (void) +{ + printk("Using local APIC timer interrupts.\n"); + using_apic_timer = 1; + + __cli(); + + calibration_result = calibrate_APIC_clock(); + /* + * Now set up the timer for real. + */ + setup_APIC_timer((void *)calibration_result); + + __sti(); + + /* and update all other cpus */ + smp_call_function(setup_APIC_timer, (void *)calibration_result, 1, 1); +} + +#undef APIC_DIVISOR + +/* + * Local timer interrupt handler. It does both profiling and + * process statistics/rescheduling. + * + * We do profiling in every local tick, statistics/rescheduling + * happen only every 'profiling multiplier' ticks. The default + * multiplier is 1 and it can be changed by writing the new multiplier + * value into /proc/profile. + */ + +inline void smp_local_timer_interrupt(struct pt_regs * regs) +{ + update_process_times(user_mode(regs)); +} + +/* + * Local APIC timer interrupt. This is the most natural way for doing + * local interrupts, but local timer interrupts can be emulated by + * broadcast interrupts too. [in case the hw doesnt support APIC timers] + * + * [ if a single-CPU system runs an SMP kernel then we call the local + * interrupt as well. Thus we cannot inline the local irq ... ] + */ +unsigned int apic_timer_irqs [NR_CPUS]; + +void smp_apic_timer_interrupt(struct pt_regs * regs) +{ + int cpu = smp_processor_id(); + + /* + * the NMI deadlock-detector uses this. + */ + apic_timer_irqs[cpu]++; + + /* + * NOTE! We'd better ACK the irq immediately, + * because timer handling can be slow. + */ + ack_APIC_irq(); + /* + * update_process_times() expects us to have done irq_enter(). + * Besides, if we don't timer interrupts ignore the global + * interrupt lock, which is the WrongThing (tm) to do. + */ + irq_enter(cpu, 0); + smp_local_timer_interrupt(regs); + irq_exit(cpu, 0); + + if (softirq_pending(cpu)) + do_softirq(); +} + +/* + * This interrupt should _never_ happen with our APIC/SMP architecture + */ +asmlinkage void smp_spurious_interrupt(void) +{ + unsigned long v; + + /* + * Check if this really is a spurious interrupt and ACK it + * if it is a vectored one. Just in case... + * Spurious interrupts should not be ACKed. + */ + v = apic_read(APIC_ISR + ((SPURIOUS_APIC_VECTOR & ~0x1f) >> 1)); + if (v & (1 << (SPURIOUS_APIC_VECTOR & 0x1f))) + ack_APIC_irq(); + + /* see sw-dev-man vol 3, chapter 7.4.13.5 */ + printk("spurious APIC interrupt on CPU#%d, should never happen.\n", + smp_processor_id()); +} + +/* + * This interrupt should never happen with our APIC/SMP architecture + */ + +asmlinkage void smp_error_interrupt(void) +{ + unsigned long v, v1; + + /* First tickle the hardware, only then report what went on. -- REW */ + v = apic_read(APIC_ESR); + apic_write(APIC_ESR, 0); + v1 = apic_read(APIC_ESR); + ack_APIC_irq(); + atomic_inc(&irq_err_count); + + /* Here is what the APIC error bits mean: + 0: Send CS error + 1: Receive CS error + 2: Send accept error + 3: Receive accept error + 4: Reserved + 5: Send illegal vector + 6: Received illegal vector + 7: Illegal register address + */ + printk ("APIC error on CPU%d: %02lx(%02lx)\n", + smp_processor_id(), v , v1); +} + +/* + * This initializes the IO-APIC and APIC hardware if this is + * a UP kernel. + */ +int __init APIC_init_uniprocessor (void) +{ + if (!smp_found_config && !cpu_has_apic) + return -1; + + /* + * Complain if the BIOS pretends there is one. + */ + if (!cpu_has_apic && APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) { + printk("BIOS bug, local APIC #%d not detected!...\n", + boot_cpu_physical_apicid); + return -1; + } + + verify_local_APIC(); + + connect_bsp_APIC(); + + phys_cpu_present_map = 1; + apic_write_around(APIC_ID, boot_cpu_physical_apicid); + + apic_pm_init2(); + + setup_local_APIC(); + +#ifdef CONFIG_X86_IO_APIC + if (smp_found_config && nr_ioapics) + setup_IO_APIC(); +#endif + setup_APIC_clocks(); + + return 0; +} diff --git a/xen-2.4.16/arch/i386/boot/boot.S b/xen-2.4.16/arch/i386/boot/boot.S new file mode 100644 index 0000000000..1b7323ee7e --- /dev/null +++ b/xen-2.4.16/arch/i386/boot/boot.S @@ -0,0 +1,241 @@ +#include +#include + +#define SECONDARY_CPU_FLAG 0xA5A5A5A5 + + .text + +ENTRY(start) + jmp hal_entry + + .align 4 + +/*** MULTIBOOT HEADER ****/ + /* Magic number indicating a Multiboot header. */ + .long 0x1BADB002 + /* Flags to bootloader (see Multiboot spec). */ + .long 0x00000006 + /* Checksum: must be the negated sum of the first two fields. */ + .long -0x1BADB008 + /* Unused loader addresses (ELF header has all this already).*/ + .long 0,0,0,0,0 + /* EGA text mode. */ + .long 1,0,0,0 + +hal_entry: + /* Set up a few descriptors: on entry only CS is guaranteed good. */ + lgdt %cs:nopaging_gdt_descr-__PAGE_OFFSET + mov $(__HYPERVISOR_DS),%ecx + mov %ecx,%ds + mov %ecx,%es + ljmp $(__HYPERVISOR_CS),$(1f)-__PAGE_OFFSET +1: lss stack_start-__PAGE_OFFSET,%esp + + /* Reset EFLAGS (subsumes CLI and CLD). */ + pushl $0 + popf + + /* CPU type checks. We need P6+. */ + mov $0x200000,%edx + pushfl + pop %ecx + and %edx,%ecx + jne bad_cpu # ID bit should be clear + pushl %edx + popfl + pushfl + pop %ecx + and %edx,%ecx + je bad_cpu # ID bit should be set + + /* Set up CR0. */ + mov %cr0,%ecx + and $0x00000011,%ecx # save ET and PE + or $0x00050022,%ecx # set AM, WP, NE and MP + mov %ecx,%cr0 + + /* Set up FPU. */ + fninit + + /* Set up CR4, except global flag which Intel requires should be */ + /* left until after paging is enabled (IA32 Manual Vol. 3, Sec. 2.5) */ + mov %cr4,%ecx + or mmu_cr4_features-__PAGE_OFFSET,%ecx + mov %ecx,mmu_cr4_features-__PAGE_OFFSET + and $0x7f,%ecx /* disable GLOBAL bit */ + mov %ecx,%cr4 + + /* Is this a non-boot processor? */ + cmp $(SECONDARY_CPU_FLAG),%ebx + jne continue_boot_cpu + + call start_paging + lidt idt_descr + jmp initialize_secondary + +continue_boot_cpu: + add $__PAGE_OFFSET,%ebx + push %ebx /* Multiboot info struct */ + push %eax /* Multiboot magic value */ + + /* Initialize BSS (no nasty surprises!) */ + mov $__bss_start-__PAGE_OFFSET,%edi + mov $_end-__PAGE_OFFSET,%ecx + sub %edi,%ecx + xor %eax,%eax + rep stosb + + /* Initialize low and high mappings of all memory with 4MB pages */ + mov $idle0_pg_table-__PAGE_OFFSET,%edi + mov $0x1e3,%eax /* PRESENT+RW+A+D+4MB+GLOBAL */ +1: mov %eax,__PAGE_OFFSET>>20(%edi) /* high mapping */ + stosl /* low mapping */ + add $(1< + * + * The __delay function must _NOT_ be inlined as its execution time + * depends wildly on alignment on many x86 processors. The additional + * jump magic is needed to get the timing stable on all the CPU's + * we have to worry about. + */ + +#include +#include +#include +#include + +void __udelay(unsigned long usecs) +{ + unsigned long ticks = usecs * ticks_per_usec; + unsigned long s, e; + + rdtscl(s); + do + { + rep_nop(); + rdtscl(e); + } while ((e-s) < ticks); +} diff --git a/xen-2.4.16/arch/i386/entry.S b/xen-2.4.16/arch/i386/entry.S new file mode 100644 index 0000000000..996c1eb82f --- /dev/null +++ b/xen-2.4.16/arch/i386/entry.S @@ -0,0 +1,526 @@ +/* + * linux/arch/i386/entry.S + * + * Copyright (C) 1991, 1992 Linus Torvalds + */ + +/* + * entry.S contains the system-call and fault low-level handling routines. + * This also contains the timer-interrupt handler, as well as all interrupts + * and faults that can result in a task-switch. + * + * Stack layout in 'ret_from_system_call': + * 0(%esp) - %ebx + * 4(%esp) - %ecx + * 8(%esp) - %edx + * C(%esp) - %esi + * 10(%esp) - %edi + * 14(%esp) - %ebp + * 18(%esp) - %eax + * 1C(%esp) - %ds + * 20(%esp) - %es + * 24(%esp) - orig_eax + * 28(%esp) - %eip + * 2C(%esp) - %cs + * 30(%esp) - %eflags + * 34(%esp) - %oldesp + * 38(%esp) - %oldss + * + * "current" is in register %ebx during any slow entries. + */ +/* The idea for callbacks from monitor -> guest OS. + * + * First, we require that all callbacks (either via a supplied + * interrupt-descriptor-table, or via the special event or failsafe callbacks + * in the shared-info-structure) are to ring 1. This just makes life easier, + * in that it means we don't have to do messy GDT/LDT lookups to find + * out which the privilege-level of the return code-selector. That code + * would just be a hassle to write, and would need to account for running + * off the end of the GDT/LDT, for example. The event callback has quite + * a constrained callback method: the guest OS provides a linear address + * which we call back to using the hard-coded __GUEST_CS descriptor (which + * is a ring 1 descriptor). For IDT callbacks, we check that the provided + * return CS is not == __HYPERVISOR_{CS,DS}. Apart from that we're safe as + * don't allow a guest OS to install ring-0 privileges into the GDT/LDT. + * It's up to the guest OS to ensure all returns via the IDT are to ring 1. + * If not, we load incorrect SS/ESP values from the TSS (for ring 1 rather + * than the correct ring) and bad things are bound to ensue -- IRET is + * likely to fault, and we may end up killing the domain (no harm can + * come to the hypervisor itself, though). + * + * When doing a callback, we check if the return CS is in ring 0. If so, + * callback is delayed until next return to ring != 0. + * If return CS is in ring 1, then we create a callback frame + * starting at return SS/ESP. The base of the frame does an intra-privilege + * interrupt-return. + * If return CS is in ring > 1, we create a callback frame starting + * at SS/ESP taken from appropriate section of the current TSS. The base + * of the frame does an inter-privilege interrupt-return. + * + * Note that the "failsafe callback" uses a special stackframe: + * { return_DS, return_ES, return_EIP, return_CS, return_EFLAGS, ... } + * That is, original values for DS/ES are placed on stack rather than + * in DS/ES themselves. Why? It saves us loading them, only to have them + * saved/restored in guest OS. Furthermore, if we load them we may cause + * a fault if they are invalid, which is a hassle to deal with. We avoid + * that problem if we don't load them :-) This property allows us to use + * the failsafe callback as a fallback: if we ever fault on loading DS/ES + * on return to ring != 0, we can simply package it up as a return via + * the failsafe callback, and let the guest OS sort it out (perhaps by + * killing an application process). Note that we also do this for any + * faulting IRET -- just let the guest OS handle it via the event + * callback. + * + * We terminate a domain in the following cases: + * - creating a callback stack frame (due to bad ring-1 stack). + * - faulting IRET on entry to failsafe callback handler. + * So, each domain must keep its ring-1 %ss/%esp and failsafe callback + * handler in good order (absolutely no faults allowed!). + */ + +#include +#include + +EBX = 0x00 +ECX = 0x04 +EDX = 0x08 +ESI = 0x0C +EDI = 0x10 +EBP = 0x14 +EAX = 0x18 +DS = 0x1C +ES = 0x20 +ORIG_EAX = 0x24 +EIP = 0x28 +CS = 0x2C +EFLAGS = 0x30 +OLDESP = 0x34 +OLDSS = 0x38 + +/* Offsets in task_struct */ +PROCESSOR = 0 +STATE = 4 +HYP_EVENTS = 8 +DOMAIN = 12 +SHARED_INFO = 16 + +/* Offsets in shared_info_t */ +EVENTS = 0 +EVENTS_ENABLE = 4 +EVENT_ADDR = 8 +FAILSAFE_ADDR = 12 + +/* Offsets in guest_trap_bounce */ +GTB_ERROR_CODE = 0 +GTB_CR2 = 4 +GTB_FLAGS = 8 +GTB_CS = 10 +GTB_EIP = 12 +GTBF_TRAP = 1 +GTBF_TRAP_NOCODE = 2 +GTBF_TRAP_CR2 = 4 + +CF_MASK = 0x00000001 +IF_MASK = 0x00000200 +NT_MASK = 0x00004000 + +#define SAVE_ALL \ + cld; \ + pushl %es; \ + pushl %ds; \ + pushl %eax; \ + pushl %ebp; \ + pushl %edi; \ + pushl %esi; \ + pushl %edx; \ + pushl %ecx; \ + pushl %ebx; \ + movl $(__HYPERVISOR_DS),%edx; \ + movl %edx,%ds; \ + movl %edx,%es; + +#define RESTORE_ALL \ + popl %ebx; \ + popl %ecx; \ + popl %edx; \ + popl %esi; \ + popl %edi; \ + popl %ebp; \ + popl %eax; \ +1: popl %ds; \ +2: popl %es; \ + addl $4,%esp; \ +3: iret; \ +.section .fixup,"ax"; \ +6: subl $4,%esp; \ + pushl %es; \ +5: pushl %ds; \ +4: pushl %eax; \ + pushl %ebp; \ + pushl %edi; \ + pushl %esi; \ + pushl %edx; \ + pushl %ecx; \ + pushl %ebx; \ + pushl %ss; \ + popl %ds; \ + pushl %ss; \ + popl %es; \ + jmp failsafe_callback; \ +.previous; \ +.section __ex_table,"a"; \ + .align 4; \ + .long 1b,4b; \ + .long 2b,5b; \ + .long 3b,6b; \ +.previous + +#define GET_CURRENT(reg) \ + movl $-8192, reg; \ + andl %esp, reg + +ENTRY(ret_from_newdomain) + GET_CURRENT(%ebx) + jmp test_all_events + + ALIGN +restore_all: + RESTORE_ALL + + ALIGN +ENTRY(hypervisor_call) + pushl %eax # save orig_eax + SAVE_ALL + GET_CURRENT(%ebx) + andl $255,%eax + call *SYMBOL_NAME(hypervisor_call_table)(,%eax,4) + movl %eax,EAX(%esp) # save the return value + +test_all_events: + mov PROCESSOR(%ebx),%eax + shl $4,%eax # sizeof(irq_cpustat) == 16 + lea guest_trap_bounce(%eax),%edx + cli # tests must not race interrupts + xorl %ecx,%ecx + notl %ecx +test_softirqs: + mov PROCESSOR(%ebx),%eax + shl $4,%eax # sizeof(irq_cpustat) == 16 + test %ecx,SYMBOL_NAME(irq_stat)(%eax,1) + jnz process_softirqs +test_hyp_events: + test %ecx, HYP_EVENTS(%ebx) + jnz process_hyp_events +test_guest_events: + movl SHARED_INFO(%ebx),%eax + test %ecx,EVENTS(%eax) + jz restore_all + test %ecx,EVENTS_ENABLE(%eax) + jz restore_all + /* Prevent unnecessary reentry of event callback (stack overflow!) */ + xorl %ecx,%ecx + movl %ecx,EVENTS_ENABLE(%eax) +/* %eax == shared_info, %ebx == task_struct, %edx == guest_trap_bounce */ +process_guest_events: + movl EVENT_ADDR(%eax),%eax + movl %eax,GTB_EIP(%edx) + movw $__GUEST_CS,GTB_CS(%edx) + call create_bounce_frame + jmp restore_all + + ALIGN +process_softirqs: + push %edx + call SYMBOL_NAME(do_softirq) + pop %edx + jmp test_hyp_events + + ALIGN +process_hyp_events: + sti + call SYMBOL_NAME(do_hyp_events) + jmp test_all_events + +/* No special register assumptions */ +failsafe_callback: + GET_CURRENT(%ebx) + mov PROCESSOR(%ebx),%eax + shl $4,%eax + lea guest_trap_bounce(%eax),%edx + movl SHARED_INFO(%ebx),%eax + movl FAILSAFE_ADDR(%eax),%eax + movl %eax,GTB_EIP(%edx) + movw $__GUEST_CS,GTB_CS(%edx) + call create_bounce_frame + subl $8,%esi # add DS/ES to failsafe stack frame + movl DS(%esp),%eax +FAULT3: movl %eax,(%esi) + movl ES(%esp),%eax +FAULT4: movl %eax,4(%esi) + movl %esi,OLDESP(%esp) + popl %ebx + popl %ecx + popl %edx + popl %esi + popl %edi + popl %ebp + popl %eax + addl $12,%esp +FAULT5: iret + + +/* CREATE A BASIC EXCEPTION FRAME ON GUEST OS (RING-1) STACK: */ +/* {EIP, CS, EFLAGS, [ESP, SS]} */ +/* %edx == guest_trap_bounce, %ebx == task_struct */ +/* %eax,%ecx are clobbered. %ds:%esi contain new OLDSS/OLDESP. */ +create_bounce_frame: + mov CS+4(%esp),%cl + test $2,%cl + jz 1f /* jump if returning to an existing ring-1 activation */ + /* obtain ss/esp from TSS -- no current ring-1 activations */ + movl PROCESSOR(%ebx),%eax + shll $8,%eax /* multiply by 256 */ + addl $init_tss + 12,%eax + movl (%eax),%esi /* tss->esp1 */ +FAULT6: movl 4(%eax),%ds /* tss->ss1 */ + /* base of stack frame must contain ss/esp (inter-priv iret) */ + subl $8,%esi + movl OLDESP+4(%esp),%eax +FAULT7: movl %eax,(%esi) + movl OLDSS+4(%esp),%eax +FAULT8: movl %eax,4(%esi) + jmp 2f +1: /* obtain ss/esp from oldss/oldesp -- a ring-1 activation exists */ + movl OLDESP+4(%esp),%esi +FAULT9: movl OLDSS+4(%esp),%ds +2: /* Construct a stack frame: EFLAGS, CS/EIP */ + subl $12,%esi + movl EIP+4(%esp),%eax +FAULT10:movl %eax,(%esi) + movl CS+4(%esp),%eax +FAULT11:movl %eax,4(%esi) + movl EFLAGS+4(%esp),%eax +FAULT12:movl %eax,8(%esi) + /* Rewrite our stack frame and return to ring 1. */ + movl %ds,OLDSS+4(%esp) + movl %esi,OLDESP+4(%esp) + movzwl %es:GTB_CS(%edx),%eax + movl %eax,CS+4(%esp) + movl %es:GTB_EIP(%edx),%eax + movl %eax,EIP+4(%esp) + ret + + +.section __ex_table,"a" + .align 4 + .long FAULT1, kill_domain_fixup3 # Fault writing to ring-1 stack + .long FAULT2, kill_domain_fixup3 # Fault writing to ring-1 stack + .long FAULT3, kill_domain_fixup3 # Fault writing to ring-1 stack + .long FAULT4, kill_domain_fixup3 # Fault writing to ring-1 stack + .long FAULT5, kill_domain_fixup1 # Fault executing failsafe iret + .long FAULT6, kill_domain_fixup2 # Fault loading ring-1 stack selector + .long FAULT7, kill_domain_fixup2 # Fault writing to ring-1 stack + .long FAULT8, kill_domain_fixup2 # Fault writing to ring-1 stack + .long FAULT9, kill_domain_fixup2 # Fault loading ring-1 stack selector + .long FAULT10,kill_domain_fixup2 # Fault writing to ring-1 stack + .long FAULT11,kill_domain_fixup2 # Fault writing to ring-1 stack + .long FAULT12,kill_domain_fixup2 # Fault writing to ring-1 stack +.previous + +# This handler kills domains which experience unrecoverable faults. +.section .fixup,"ax" +kill_domain_fixup1: + subl $4,%esp + SAVE_ALL + jmp kill_domain +kill_domain_fixup2: + addl $4,%esp +kill_domain_fixup3: + pushl %ss + popl %ds + jmp kill_domain +.previous + + ALIGN +process_guest_exception_and_events: + mov PROCESSOR(%ebx),%eax + shl $4,%eax # sizeof(irq_cpustat) == 16 + lea guest_trap_bounce(%eax),%edx + testb $~0,GTB_FLAGS(%edx) + jz test_all_events + call create_bounce_frame # just the basic frame + mov %es:GTB_FLAGS(%edx),%cl + test $GTBF_TRAP_NOCODE,%cl + jnz 2f + subl $4,%esi # push error_code onto guest frame + movl %es:GTB_ERROR_CODE(%edx),%eax +FAULT1: movl %eax,(%esi) + test $GTBF_TRAP_CR2,%cl + jz 1f + subl $4,%esi # push %cr2 onto guest frame + movl %es:GTB_CR2(%edx),%eax +FAULT2: movl %eax,(%esi) +1: movl %esi,OLDESP(%esp) +2: push %es # unclobber %ds + pop %ds + movb $0,GTB_FLAGS(%edx) + jmp test_all_events + + ALIGN +ENTRY(ret_from_intr) + GET_CURRENT(%ebx) + movb CS(%esp),%al + testb $3,%al # return to non-supervisor? + jne test_all_events + jmp restore_all + + ALIGN +ret_from_exception: + movb CS(%esp),%al + testb $3,%al # return to non-supervisor? + jne process_guest_exception_and_events + jmp restore_all + + ALIGN + +ENTRY(divide_error) + pushl $0 # no error code + pushl $ SYMBOL_NAME(do_divide_error) + ALIGN +error_code: + pushl %ds + pushl %eax + xorl %eax,%eax + pushl %ebp + pushl %edi + pushl %esi + pushl %edx + decl %eax # eax = -1 + pushl %ecx + pushl %ebx + cld + movl %es,%ecx + movl ORIG_EAX(%esp), %esi # get the error code + movl ES(%esp), %edi # get the function address + movl %eax, ORIG_EAX(%esp) + movl %ecx, ES(%esp) + movl %esp,%edx + pushl %esi # push the error code + pushl %edx # push the pt_regs pointer + movl $(__HYPERVISOR_DS),%edx + movl %edx,%ds + movl %edx,%es + GET_CURRENT(%ebx) + call *%edi + addl $8,%esp + jmp ret_from_exception + +ENTRY(coprocessor_error) + pushl $0 + pushl $ SYMBOL_NAME(do_coprocessor_error) + jmp error_code + +ENTRY(simd_coprocessor_error) + pushl $0 + pushl $ SYMBOL_NAME(do_simd_coprocessor_error) + jmp error_code + +ENTRY(device_not_available) + pushl $0 + pushl $SYMBOL_NAME(math_state_restore) + jmp error_code + +ENTRY(debug) + pushl $0 + pushl $ SYMBOL_NAME(do_debug) + jmp error_code + +ENTRY(nmi) + pushl %eax + SAVE_ALL + movl %esp,%edx + pushl $0 + pushl %edx + call SYMBOL_NAME(do_nmi) + addl $8,%esp + RESTORE_ALL + +ENTRY(int3) + pushl $0 + pushl $ SYMBOL_NAME(do_int3) + jmp error_code + +ENTRY(overflow) + pushl $0 + pushl $ SYMBOL_NAME(do_overflow) + jmp error_code + +ENTRY(bounds) + pushl $0 + pushl $ SYMBOL_NAME(do_bounds) + jmp error_code + +ENTRY(invalid_op) + pushl $0 + pushl $ SYMBOL_NAME(do_invalid_op) + jmp error_code + +ENTRY(coprocessor_segment_overrun) + pushl $0 + pushl $ SYMBOL_NAME(do_coprocessor_segment_overrun) + jmp error_code + +ENTRY(double_fault) + pushl $ SYMBOL_NAME(do_double_fault) + jmp error_code + +ENTRY(invalid_TSS) + pushl $ SYMBOL_NAME(do_invalid_TSS) + jmp error_code + +ENTRY(segment_not_present) + pushl $ SYMBOL_NAME(do_segment_not_present) + jmp error_code + +ENTRY(stack_segment) + pushl $ SYMBOL_NAME(do_stack_segment) + jmp error_code + +ENTRY(general_protection) + pushl $ SYMBOL_NAME(do_general_protection) + jmp error_code + +ENTRY(alignment_check) + pushl $ SYMBOL_NAME(do_alignment_check) + jmp error_code + +ENTRY(page_fault) + pushl $ SYMBOL_NAME(do_page_fault) + jmp error_code + +ENTRY(machine_check) + pushl $0 + pushl $ SYMBOL_NAME(do_machine_check) + jmp error_code + +ENTRY(spurious_interrupt_bug) + pushl $0 + pushl $ SYMBOL_NAME(do_spurious_interrupt_bug) + jmp error_code + +.data +ENTRY(hypervisor_call_table) + .long SYMBOL_NAME(do_set_trap_table) + .long SYMBOL_NAME(do_process_page_updates) + .long SYMBOL_NAME(do_console_write) + .long SYMBOL_NAME(do_set_pagetable) + .long SYMBOL_NAME(do_set_guest_stack) + .long SYMBOL_NAME(do_net_update) + .long SYMBOL_NAME(do_fpu_taskswitch) + .long SYMBOL_NAME(do_yield) + .long SYMBOL_NAME(kill_domain) + .long SYMBOL_NAME(do_dom0_op) + .long SYMBOL_NAME(do_network_op) + .rept NR_syscalls-(.-hypervisor_call_table)/4 + .long SYMBOL_NAME(sys_ni_syscall) + .endr diff --git a/xen-2.4.16/arch/i386/extable.c b/xen-2.4.16/arch/i386/extable.c new file mode 100644 index 0000000000..4cd9f064c3 --- /dev/null +++ b/xen-2.4.16/arch/i386/extable.c @@ -0,0 +1,62 @@ +/* + * linux/arch/i386/mm/extable.c + */ + +#include +#include +#include +#include + +extern const struct exception_table_entry __start___ex_table[]; +extern const struct exception_table_entry __stop___ex_table[]; + +static inline unsigned long +search_one_table(const struct exception_table_entry *first, + const struct exception_table_entry *last, + unsigned long value) +{ + while (first <= last) { + const struct exception_table_entry *mid; + long diff; + + mid = (last - first) / 2 + first; + diff = mid->insn - value; + if (diff == 0) + return mid->fixup; + else if (diff < 0) + first = mid+1; + else + last = mid-1; + } + return 0; +} + +extern spinlock_t modlist_lock; + +unsigned long +search_exception_table(unsigned long addr) +{ + unsigned long ret = 0; + +#ifndef CONFIG_MODULES + /* There is only the kernel to search. */ + ret = search_one_table(__start___ex_table, __stop___ex_table-1, addr); + return ret; +#else + unsigned long flags; + /* The kernel is the last "module" -- no need to treat it special. */ + struct module *mp; + + spin_lock_irqsave(&modlist_lock, flags); + for (mp = module_list; mp != NULL; mp = mp->next) { + if (mp->ex_table_start == NULL || !(mp->flags&(MOD_RUNNING|MOD_INITIALIZING))) + continue; + ret = search_one_table(mp->ex_table_start, + mp->ex_table_end - 1, addr); + if (ret) + break; + } + spin_unlock_irqrestore(&modlist_lock, flags); + return ret; +#endif +} diff --git a/xen-2.4.16/arch/i386/i387.c b/xen-2.4.16/arch/i386/i387.c new file mode 100644 index 0000000000..dc94cc1dad --- /dev/null +++ b/xen-2.4.16/arch/i386/i387.c @@ -0,0 +1,50 @@ +/* + * linux/arch/i386/kernel/i387.c + * + * Copyright (C) 1994 Linus Torvalds + * + * Pentium III FXSR, SSE support + * General FPU state handling cleanups + * Gareth Hughes , May 2000 + */ + +#include +#include +#include +#include + +void init_fpu(void) +{ + __asm__("fninit"); + if ( cpu_has_xmm ) load_mxcsr(0x1f80); + current->flags |= PF_DONEFPUINIT; +} + +static inline void __save_init_fpu( struct task_struct *tsk ) +{ + if ( cpu_has_fxsr ) { + asm volatile( "fxsave %0 ; fnclex" + : "=m" (tsk->thread.i387.fxsave) ); + } else { + asm volatile( "fnsave %0 ; fwait" + : "=m" (tsk->thread.i387.fsave) ); + } + tsk->flags &= ~PF_USEDFPU; +} + +void save_init_fpu( struct task_struct *tsk ) +{ + __save_init_fpu(tsk); + stts(); +} + +void restore_fpu( struct task_struct *tsk ) +{ + if ( cpu_has_fxsr ) { + asm volatile( "fxrstor %0" + : : "m" (tsk->thread.i387.fxsave) ); + } else { + asm volatile( "frstor %0" + : : "m" (tsk->thread.i387.fsave) ); + } +} diff --git a/xen-2.4.16/arch/i386/i8259.c b/xen-2.4.16/arch/i386/i8259.c new file mode 100644 index 0000000000..c9d4dfe670 --- /dev/null +++ b/xen-2.4.16/arch/i386/i8259.c @@ -0,0 +1,469 @@ +/****************************************************************************** + * i8259.c + * + * Well, this is required for SMP systems as well, as it build interrupt + * tables for IO APICS as well as uniprocessor 8259-alikes. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + + +/* + * Common place to define all x86 IRQ vectors + * + * This builds up the IRQ handler stubs using some ugly macros in irq.h + * + * These macros create the low-level assembly IRQ routines that save + * register context and call do_IRQ(). do_IRQ() then does all the + * operations that are needed to keep the AT (or SMP IOAPIC) + * interrupt-controller happy. + */ + +BUILD_COMMON_IRQ() + +#define BI(x,y) \ + BUILD_IRQ(x##y) + +#define BUILD_16_IRQS(x) \ + BI(x,0) BI(x,1) BI(x,2) BI(x,3) \ + BI(x,4) BI(x,5) BI(x,6) BI(x,7) \ + BI(x,8) BI(x,9) BI(x,a) BI(x,b) \ + BI(x,c) BI(x,d) BI(x,e) BI(x,f) + +/* + * ISA PIC or low IO-APIC triggered (INTA-cycle or APIC) interrupts: + * (these are usually mapped to vectors 0x20-0x2f) + */ + BUILD_16_IRQS(0x0) + +#ifdef CONFIG_X86_IO_APIC +/* + * The IO-APIC gives us many more interrupt sources. Most of these + * are unused but an SMP system is supposed to have enough memory ... + * sometimes (mostly wrt. hw bugs) we get corrupted vectors all + * across the spectrum, so we really want to be prepared to get all + * of these. Plus, more powerful systems might have more than 64 + * IO-APIC registers. + * + * (these are usually mapped into the 0x30-0xff vector range) + */ + BUILD_16_IRQS(0x1) BUILD_16_IRQS(0x2) BUILD_16_IRQS(0x3) + BUILD_16_IRQS(0x4) BUILD_16_IRQS(0x5) BUILD_16_IRQS(0x6) BUILD_16_IRQS(0x7) + BUILD_16_IRQS(0x8) BUILD_16_IRQS(0x9) BUILD_16_IRQS(0xa) BUILD_16_IRQS(0xb) + BUILD_16_IRQS(0xc) BUILD_16_IRQS(0xd) +#endif + +#undef BUILD_16_IRQS +#undef BI + + +/* + * The following vectors are part of the Linux architecture, there + * is no hardware IRQ pin equivalent for them, they are triggered + * through the ICC by us (IPIs) + */ +#ifdef CONFIG_SMP + BUILD_SMP_INTERRUPT(event_check_interrupt,EVENT_CHECK_VECTOR) + BUILD_SMP_INTERRUPT(invalidate_interrupt,INVALIDATE_TLB_VECTOR) + BUILD_SMP_INTERRUPT(call_function_interrupt,CALL_FUNCTION_VECTOR) +#endif + +/* + * every pentium local APIC has two 'local interrupts', with a + * soft-definable vector attached to both interrupts, one of + * which is a timer interrupt, the other one is error counter + * overflow. Linux uses the local APIC timer interrupt to get + * a much simpler SMP time architecture: + */ +#ifdef CONFIG_X86_LOCAL_APIC + BUILD_SMP_TIMER_INTERRUPT(apic_timer_interrupt,LOCAL_TIMER_VECTOR) + BUILD_SMP_INTERRUPT(error_interrupt,ERROR_APIC_VECTOR) + BUILD_SMP_INTERRUPT(spurious_interrupt,SPURIOUS_APIC_VECTOR) +#endif + +#define IRQ(x,y) \ + IRQ##x##y##_interrupt + +#define IRQLIST_16(x) \ + IRQ(x,0), IRQ(x,1), IRQ(x,2), IRQ(x,3), \ + IRQ(x,4), IRQ(x,5), IRQ(x,6), IRQ(x,7), \ + IRQ(x,8), IRQ(x,9), IRQ(x,a), IRQ(x,b), \ + IRQ(x,c), IRQ(x,d), IRQ(x,e), IRQ(x,f) + + void (*interrupt[NR_IRQS])(void) = { + IRQLIST_16(0x0), + +#ifdef CONFIG_X86_IO_APIC + IRQLIST_16(0x1), IRQLIST_16(0x2), IRQLIST_16(0x3), + IRQLIST_16(0x4), IRQLIST_16(0x5), IRQLIST_16(0x6), IRQLIST_16(0x7), + IRQLIST_16(0x8), IRQLIST_16(0x9), IRQLIST_16(0xa), IRQLIST_16(0xb), + IRQLIST_16(0xc), IRQLIST_16(0xd) +#endif + }; + +#undef IRQ +#undef IRQLIST_16 + +/* + * This is the 'legacy' 8259A Programmable Interrupt Controller, + * present in the majority of PC/AT boxes. + * plus some generic x86 specific things if generic specifics makes + * any sense at all. + * this file should become arch/i386/kernel/irq.c when the old irq.c + * moves to arch independent land + */ + +spinlock_t i8259A_lock = SPIN_LOCK_UNLOCKED; + +static void end_8259A_irq (unsigned int irq) +{ + if (!(irq_desc[irq].status & (IRQ_DISABLED|IRQ_INPROGRESS))) + enable_8259A_irq(irq); +} + +#define shutdown_8259A_irq disable_8259A_irq + +void mask_and_ack_8259A(unsigned int); + +static unsigned int startup_8259A_irq(unsigned int irq) +{ + enable_8259A_irq(irq); + return 0; /* never anything pending */ +} + +static struct hw_interrupt_type i8259A_irq_type = { + "XT-PIC", + startup_8259A_irq, + shutdown_8259A_irq, + enable_8259A_irq, + disable_8259A_irq, + mask_and_ack_8259A, + end_8259A_irq, + NULL +}; + +/* + * 8259A PIC functions to handle ISA devices: + */ + +/* + * This contains the irq mask for both 8259A irq controllers, + */ +static unsigned int cached_irq_mask = 0xffff; + +#define __byte(x,y) (((unsigned char *)&(y))[x]) +#define cached_21 (__byte(0,cached_irq_mask)) +#define cached_A1 (__byte(1,cached_irq_mask)) + +/* + * Not all IRQs can be routed through the IO-APIC, eg. on certain (older) + * boards the timer interrupt is not really connected to any IO-APIC pin, + * it's fed to the master 8259A's IR0 line only. + * + * Any '1' bit in this mask means the IRQ is routed through the IO-APIC. + * this 'mixed mode' IRQ handling costs nothing because it's only used + * at IRQ setup time. + */ +unsigned long io_apic_irqs; + +void disable_8259A_irq(unsigned int irq) +{ + unsigned int mask = 1 << irq; + unsigned long flags; + + spin_lock_irqsave(&i8259A_lock, flags); + cached_irq_mask |= mask; + if (irq & 8) + outb(cached_A1,0xA1); + else + outb(cached_21,0x21); + spin_unlock_irqrestore(&i8259A_lock, flags); +} + +void enable_8259A_irq(unsigned int irq) +{ + unsigned int mask = ~(1 << irq); + unsigned long flags; + + spin_lock_irqsave(&i8259A_lock, flags); + cached_irq_mask &= mask; + if (irq & 8) + outb(cached_A1,0xA1); + else + outb(cached_21,0x21); + spin_unlock_irqrestore(&i8259A_lock, flags); +} + +int i8259A_irq_pending(unsigned int irq) +{ + unsigned int mask = 1<> 8); + spin_unlock_irqrestore(&i8259A_lock, flags); + + return ret; +} + +void make_8259A_irq(unsigned int irq) +{ + disable_irq_nosync(irq); + io_apic_irqs &= ~(1<> 8); + outb(0x0A,0xA0); /* back to the IRR register */ + return value; +} + +/* + * Careful! The 8259A is a fragile beast, it pretty + * much _has_ to be done exactly like this (mask it + * first, _then_ send the EOI, and the order of EOI + * to the two 8259s is important! + */ +void mask_and_ack_8259A(unsigned int irq) +{ + unsigned int irqmask = 1 << irq; + unsigned long flags; + + spin_lock_irqsave(&i8259A_lock, flags); + /* + * Lightweight spurious IRQ detection. We do not want + * to overdo spurious IRQ handling - it's usually a sign + * of hardware problems, so we only do the checks we can + * do without slowing down good hardware unnecesserily. + * + * Note that IRQ7 and IRQ15 (the two spurious IRQs + * usually resulting from the 8259A-1|2 PICs) occur + * even if the IRQ is masked in the 8259A. Thus we + * can check spurious 8259A IRQs without doing the + * quite slow i8259A_irq_real() call for every IRQ. + * This does not cover 100% of spurious interrupts, + * but should be enough to warn the user that there + * is something bad going on ... + */ + if (cached_irq_mask & irqmask) + goto spurious_8259A_irq; + cached_irq_mask |= irqmask; + + handle_real_irq: + if (irq & 8) { + inb(0xA1); /* DUMMY - (do we need this?) */ + outb(cached_A1,0xA1); + outb(0x60+(irq&7),0xA0);/* 'Specific EOI' to slave */ + outb(0x62,0x20); /* 'Specific EOI' to master-IRQ2 */ + } else { + inb(0x21); /* DUMMY - (do we need this?) */ + outb(cached_21,0x21); + outb(0x60+irq,0x20); /* 'Specific EOI' to master */ + } + spin_unlock_irqrestore(&i8259A_lock, flags); + return; + + spurious_8259A_irq: + /* + * this is the slow path - should happen rarely. + */ + if (i8259A_irq_real(irq)) + /* + * oops, the IRQ _is_ in service according to the + * 8259A - not spurious, go handle it. + */ + goto handle_real_irq; + + { + static int spurious_irq_mask; + /* + * At this point we can be sure the IRQ is spurious, + * lets ACK and report it. [once per IRQ] + */ + if (!(spurious_irq_mask & irqmask)) { + printk("spurious 8259A interrupt: IRQ%d.\n", irq); + spurious_irq_mask |= irqmask; + } + atomic_inc(&irq_err_count); + /* + * Theoretically we do not have to handle this IRQ, + * but in Linux this does not cause problems and is + * simpler for us. + */ + goto handle_real_irq; + } +} + +void __init init_8259A(int auto_eoi) +{ + unsigned long flags; + + spin_lock_irqsave(&i8259A_lock, flags); + + outb(0xff, 0x21); /* mask all of 8259A-1 */ + outb(0xff, 0xA1); /* mask all of 8259A-2 */ + + /* + * outb_p - this has to work on a wide range of PC hardware. + */ + outb_p(0x11, 0x20); /* ICW1: select 8259A-1 init */ + outb_p(0x20 + 0, 0x21); /* ICW2: 8259A-1 IR0-7 mapped to 0x20-0x27 */ + outb_p(0x04, 0x21); /* 8259A-1 (the master) has a slave on IR2 */ + if (auto_eoi) + outb_p(0x03, 0x21); /* master does Auto EOI */ + else + outb_p(0x01, 0x21); /* master expects normal EOI */ + + outb_p(0x11, 0xA0); /* ICW1: select 8259A-2 init */ + outb_p(0x20 + 8, 0xA1); /* ICW2: 8259A-2 IR0-7 mapped to 0x28-0x2f */ + outb_p(0x02, 0xA1); /* 8259A-2 is a slave on master's IR2 */ + outb_p(0x01, 0xA1); /* (slave's support for AEOI in flat mode + is to be investigated) */ + + if (auto_eoi) + /* + * in AEOI mode we just have to mask the interrupt + * when acking. + */ + i8259A_irq_type.ack = disable_8259A_irq; + else + i8259A_irq_type.ack = mask_and_ack_8259A; + + udelay(100); /* wait for 8259A to initialize */ + + outb(cached_21, 0x21); /* restore master IRQ mask */ + outb(cached_A1, 0xA1); /* restore slave IRQ mask */ + + spin_unlock_irqrestore(&i8259A_lock, flags); +} + + +/* + * IRQ2 is cascade interrupt to second interrupt controller + */ + +static struct irqaction irq2 = { no_action, 0, 0, "cascade", NULL, NULL}; + +void __init init_ISA_irqs (void) +{ + int i; + +#ifdef CONFIG_X86_LOCAL_APIC + init_bsp_APIC(); +#endif + init_8259A(0); + + for (i = 0; i < NR_IRQS; i++) { + irq_desc[i].status = IRQ_DISABLED; + irq_desc[i].action = 0; + irq_desc[i].depth = 1; + + if (i < 16) { + /* + * 16 old-style INTA-cycle interrupts: + */ + irq_desc[i].handler = &i8259A_irq_type; + } else { + /* + * 'high' PCI IRQs filled in on demand + */ + irq_desc[i].handler = &no_irq_type; + } + } +} + +void __init init_IRQ(void) +{ + int i; + + init_ISA_irqs(); + + /* + * Cover the whole vector space, no vector can escape + * us. (some of these will be overridden and become + * 'special' SMP interrupts) + */ + for (i = 0; i < NR_IRQS; i++) { + int vector = FIRST_EXTERNAL_VECTOR + i; + if (vector != HYPERVISOR_CALL_VECTOR) + set_intr_gate(vector, interrupt[i]); + } + +#ifdef CONFIG_SMP + /* + * IRQ0 must be given a fixed assignment and initialized, + * because it's used before the IO-APIC is set up. + */ + set_intr_gate(FIRST_DEVICE_VECTOR, interrupt[0]); + + /* + * The reschedule interrupt is a CPU-to-CPU reschedule-helper + * IPI, driven by wakeup. + */ + set_intr_gate(EVENT_CHECK_VECTOR, event_check_interrupt); + + /* IPI for invalidation */ + set_intr_gate(INVALIDATE_TLB_VECTOR, invalidate_interrupt); + + /* IPI for generic function call */ + set_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); +#endif + +#ifdef CONFIG_X86_LOCAL_APIC + /* self generated IPI for local APIC timer */ + set_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt); + + /* IPI vectors for APIC spurious and error interrupts */ + set_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); + set_intr_gate(ERROR_APIC_VECTOR, error_interrupt); +#endif + + /* + * Set the clock to HZ Hz, we already have a valid + * vector now: + */ +#define CLOCK_TICK_RATE 1193180 /* crystal freq (Hz) */ +#define LATCH (((CLOCK_TICK_RATE)+(HZ/2))/HZ) + outb_p(0x34,0x43); /* binary, mode 2, LSB/MSB, ch 0 */ + outb_p(LATCH & 0xff , 0x40); /* LSB */ + outb(LATCH >> 8 , 0x40); /* MSB */ + + setup_irq(2, &irq2); +} diff --git a/xen-2.4.16/arch/i386/idle0_task.c b/xen-2.4.16/arch/i386/idle0_task.c new file mode 100644 index 0000000000..0d2b9e40bf --- /dev/null +++ b/xen-2.4.16/arch/i386/idle0_task.c @@ -0,0 +1,20 @@ +#include +#include +#include + +/* + * Initial task structure. XXX KAF: To get this 8192-byte aligned without + * linker tricks I copy it into aligned BSS area at boot time. + * Actual name idle0_task_union now declared in boot.S. + */ +struct task_struct first_task_struct = IDLE0_TASK(idle0_task_union.task); + +/* + * per-CPU TSS segments. Threads are completely 'soft' on Linux, + * no more per-task TSS's. The TSS size is kept cacheline-aligned + * so they are allowed to end up in the .data.cacheline_aligned + * section. Since TSS's are completely CPU-local, we want them + * on exact cacheline boundaries, to eliminate cacheline ping-pong. + */ +struct tss_struct init_tss[NR_CPUS] __cacheline_aligned = { [0 ... NR_CPUS-1] = INIT_TSS }; + diff --git a/xen-2.4.16/arch/i386/io_apic.c b/xen-2.4.16/arch/i386/io_apic.c new file mode 100644 index 0000000000..190e6be36c --- /dev/null +++ b/xen-2.4.16/arch/i386/io_apic.c @@ -0,0 +1,1487 @@ +/* + * Intel IO-APIC support for multi-Pentium hosts. + * + * Copyright (C) 1997, 1998, 1999, 2000 Ingo Molnar, Hajnalka Szabo + * + * Many thanks to Stig Venaas for trying out countless experimental + * patches and reporting/debugging problems patiently! + * + * (c) 1999, Multiple IO-APIC support, developed by + * Ken-ichi Yaku and + * Hidemi Kishimoto , + * further tested and cleaned up by Zach Brown + * and Ingo Molnar + * + * Fixes + * Maciej W. Rozycki : Bits for genuine 82489DX APICs; + * thanks to Eric Gilmore + * and Rolf G. Tews + * for testing these extensively + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static spinlock_t ioapic_lock = SPIN_LOCK_UNLOCKED; + +/* + * # of IRQ routing registers + */ +int nr_ioapic_registers[MAX_IO_APICS]; + +/* + * Rough estimation of how many shared IRQs there are, can + * be changed anytime. + */ +#define MAX_PLUS_SHARED_IRQS NR_IRQS +#define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS) + +/* + * This is performance-critical, we want to do it O(1) + * the indexing order of this array favors 1:1 mappings + * between pins and IRQs. + */ + +static struct irq_pin_list { + int apic, pin, next; +} irq_2_pin[PIN_MAP_SIZE]; + +/* + * The common case is 1:1 IRQ<->pin mappings. Sometimes there are + * shared ISA-space IRQs, so we have to support them. We are super + * fast in the common case, and fast for shared ISA-space IRQs. + */ +static void add_pin_to_irq(unsigned int irq, int apic, int pin) +{ + static int first_free_entry = NR_IRQS; + struct irq_pin_list *entry = irq_2_pin + irq; + + while (entry->next) + entry = irq_2_pin + entry->next; + + if (entry->pin != -1) { + entry->next = first_free_entry; + entry = irq_2_pin + entry->next; + if (++first_free_entry >= PIN_MAP_SIZE) + panic("io_apic.c: whoops"); + } + entry->apic = apic; + entry->pin = pin; +} + +#define __DO_ACTION(R, ACTION, FINAL) \ + \ +{ \ + int pin; \ + struct irq_pin_list *entry = irq_2_pin + irq; \ + \ + for (;;) { \ + unsigned int reg; \ + pin = entry->pin; \ + if (pin == -1) \ + break; \ + reg = io_apic_read(entry->apic, 0x10 + R + pin*2); \ + reg ACTION; \ + io_apic_modify(entry->apic, reg); \ + if (!entry->next) \ + break; \ + entry = irq_2_pin + entry->next; \ + } \ + FINAL; \ +} + +#define DO_ACTION(name,R,ACTION, FINAL) \ + \ + static void name##_IO_APIC_irq (unsigned int irq) \ + __DO_ACTION(R, ACTION, FINAL) + +DO_ACTION( __mask, 0, |= 0x00010000, io_apic_sync(entry->apic) ) + /* mask = 1 */ +DO_ACTION( __unmask, 0, &= 0xfffeffff, ) + /* mask = 0 */ +DO_ACTION( __mask_and_edge, 0, = (reg & 0xffff7fff) | 0x00010000, ) + /* mask = 1, trigger = 0 */ +DO_ACTION( __unmask_and_level, 0, = (reg & 0xfffeffff) | 0x00008000, ) + /* mask = 0, trigger = 1 */ + +static void mask_IO_APIC_irq (unsigned int irq) +{ + unsigned long flags; + + spin_lock_irqsave(&ioapic_lock, flags); + __mask_IO_APIC_irq(irq); + spin_unlock_irqrestore(&ioapic_lock, flags); +} + +static void unmask_IO_APIC_irq (unsigned int irq) +{ + unsigned long flags; + + spin_lock_irqsave(&ioapic_lock, flags); + __unmask_IO_APIC_irq(irq); + spin_unlock_irqrestore(&ioapic_lock, flags); +} + +void clear_IO_APIC_pin(unsigned int apic, unsigned int pin) +{ + struct IO_APIC_route_entry entry; + unsigned long flags; + + /* + * Disable it in the IO-APIC irq-routing table: + */ + memset(&entry, 0, sizeof(entry)); + entry.mask = 1; + spin_lock_irqsave(&ioapic_lock, flags); + io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry) + 0)); + io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry) + 1)); + spin_unlock_irqrestore(&ioapic_lock, flags); +} + +static void clear_IO_APIC (void) +{ + int apic, pin; + + for (apic = 0; apic < nr_ioapics; apic++) + for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) + clear_IO_APIC_pin(apic, pin); +} + +/* + * Find the IRQ entry number of a certain pin. + */ +static int __init find_irq_entry(int apic, int pin, int type) +{ + int i; + + for (i = 0; i < mp_irq_entries; i++) + if (mp_irqs[i].mpc_irqtype == type && + (mp_irqs[i].mpc_dstapic == mp_ioapics[apic].mpc_apicid || + mp_irqs[i].mpc_dstapic == MP_APIC_ALL) && + mp_irqs[i].mpc_dstirq == pin) + return i; + + return -1; +} + +/* + * Find the pin to which IRQ[irq] (ISA) is connected + */ +static int __init find_isa_irq_pin(int irq, int type) +{ + int i; + + for (i = 0; i < mp_irq_entries; i++) { + int lbus = mp_irqs[i].mpc_srcbus; + + if ((mp_bus_id_to_type[lbus] == MP_BUS_ISA || + mp_bus_id_to_type[lbus] == MP_BUS_EISA || + mp_bus_id_to_type[lbus] == MP_BUS_MCA) && + (mp_irqs[i].mpc_irqtype == type) && + (mp_irqs[i].mpc_srcbusirq == irq)) + + return mp_irqs[i].mpc_dstirq; + } + return -1; +} + +/* + * Find a specific PCI IRQ entry. + * Not an __init, possibly needed by modules + */ +static int pin_2_irq(int idx, int apic, int pin); + +int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin) +{ + int apic, i, best_guess = -1; + + Dprintk("querying PCI -> IRQ mapping bus:%d, slot:%d, pin:%d.\n", + bus, slot, pin); + if (mp_bus_id_to_pci_bus[bus] == -1) { + printk(KERN_WARNING "PCI BIOS passed nonexistent PCI bus %d!\n", bus); + return -1; + } + for (i = 0; i < mp_irq_entries; i++) { + int lbus = mp_irqs[i].mpc_srcbus; + + for (apic = 0; apic < nr_ioapics; apic++) + if (mp_ioapics[apic].mpc_apicid == mp_irqs[i].mpc_dstapic || + mp_irqs[i].mpc_dstapic == MP_APIC_ALL) + break; + + if ((mp_bus_id_to_type[lbus] == MP_BUS_PCI) && + !mp_irqs[i].mpc_irqtype && + (bus == lbus) && + (slot == ((mp_irqs[i].mpc_srcbusirq >> 2) & 0x1f))) { + int irq = pin_2_irq(i,apic,mp_irqs[i].mpc_dstirq); + + if (!(apic || IO_APIC_IRQ(irq))) + continue; + + if (pin == (mp_irqs[i].mpc_srcbusirq & 3)) + return irq; + /* + * Use the first all-but-pin matching entry as a + * best-guess fuzzy result for broken mptables. + */ + if (best_guess < 0) + best_guess = irq; + } + } + return best_guess; +} + +/* + * EISA Edge/Level control register, ELCR + */ +static int __init EISA_ELCR(unsigned int irq) +{ + if (irq < 16) { + unsigned int port = 0x4d0 + (irq >> 3); + return (inb(port) >> (irq & 7)) & 1; + } + printk(KERN_INFO "Broken MPtable reports ISA irq %d\n", irq); + return 0; +} + +/* EISA interrupts are always polarity zero and can be edge or level + * trigger depending on the ELCR value. If an interrupt is listed as + * EISA conforming in the MP table, that means its trigger type must + * be read in from the ELCR */ + +#define default_EISA_trigger(idx) (EISA_ELCR(mp_irqs[idx].mpc_srcbusirq)) +#define default_EISA_polarity(idx) (0) + +/* ISA interrupts are always polarity zero edge triggered, + * when listed as conforming in the MP table. */ + +#define default_ISA_trigger(idx) (0) +#define default_ISA_polarity(idx) (0) + +/* PCI interrupts are always polarity one level triggered, + * when listed as conforming in the MP table. */ + +#define default_PCI_trigger(idx) (1) +#define default_PCI_polarity(idx) (1) + +/* MCA interrupts are always polarity zero level triggered, + * when listed as conforming in the MP table. */ + +#define default_MCA_trigger(idx) (1) +#define default_MCA_polarity(idx) (0) + +static int __init MPBIOS_polarity(int idx) +{ + int bus = mp_irqs[idx].mpc_srcbus; + int polarity; + + /* + * Determine IRQ line polarity (high active or low active): + */ + switch (mp_irqs[idx].mpc_irqflag & 3) + { + case 0: /* conforms, ie. bus-type dependent polarity */ + { + switch (mp_bus_id_to_type[bus]) + { + case MP_BUS_ISA: /* ISA pin */ + { + polarity = default_ISA_polarity(idx); + break; + } + case MP_BUS_EISA: /* EISA pin */ + { + polarity = default_EISA_polarity(idx); + break; + } + case MP_BUS_PCI: /* PCI pin */ + { + polarity = default_PCI_polarity(idx); + break; + } + case MP_BUS_MCA: /* MCA pin */ + { + polarity = default_MCA_polarity(idx); + break; + } + default: + { + printk(KERN_WARNING "broken BIOS!!\n"); + polarity = 1; + break; + } + } + break; + } + case 1: /* high active */ + { + polarity = 0; + break; + } + case 2: /* reserved */ + { + printk(KERN_WARNING "broken BIOS!!\n"); + polarity = 1; + break; + } + case 3: /* low active */ + { + polarity = 1; + break; + } + default: /* invalid */ + { + printk(KERN_WARNING "broken BIOS!!\n"); + polarity = 1; + break; + } + } + return polarity; +} + +static int __init MPBIOS_trigger(int idx) +{ + int bus = mp_irqs[idx].mpc_srcbus; + int trigger; + + /* + * Determine IRQ trigger mode (edge or level sensitive): + */ + switch ((mp_irqs[idx].mpc_irqflag>>2) & 3) + { + case 0: /* conforms, ie. bus-type dependent */ + { + switch (mp_bus_id_to_type[bus]) + { + case MP_BUS_ISA: /* ISA pin */ + { + trigger = default_ISA_trigger(idx); + break; + } + case MP_BUS_EISA: /* EISA pin */ + { + trigger = default_EISA_trigger(idx); + break; + } + case MP_BUS_PCI: /* PCI pin */ + { + trigger = default_PCI_trigger(idx); + break; + } + case MP_BUS_MCA: /* MCA pin */ + { + trigger = default_MCA_trigger(idx); + break; + } + default: + { + printk(KERN_WARNING "broken BIOS!!\n"); + trigger = 1; + break; + } + } + break; + } + case 1: /* edge */ + { + trigger = 0; + break; + } + case 2: /* reserved */ + { + printk(KERN_WARNING "broken BIOS!!\n"); + trigger = 1; + break; + } + case 3: /* level */ + { + trigger = 1; + break; + } + default: /* invalid */ + { + printk(KERN_WARNING "broken BIOS!!\n"); + trigger = 0; + break; + } + } + return trigger; +} + +static inline int irq_polarity(int idx) +{ + return MPBIOS_polarity(idx); +} + +static inline int irq_trigger(int idx) +{ + return MPBIOS_trigger(idx); +} + +static int pin_2_irq(int idx, int apic, int pin) +{ + int irq, i; + int bus = mp_irqs[idx].mpc_srcbus; + + /* + * Debugging check, we are in big trouble if this message pops up! + */ + if (mp_irqs[idx].mpc_dstirq != pin) + printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n"); + + switch (mp_bus_id_to_type[bus]) + { + case MP_BUS_ISA: /* ISA pin */ + case MP_BUS_EISA: + case MP_BUS_MCA: + { + irq = mp_irqs[idx].mpc_srcbusirq; + break; + } + case MP_BUS_PCI: /* PCI pin */ + { + /* + * PCI IRQs are mapped in order + */ + i = irq = 0; + while (i < apic) + irq += nr_ioapic_registers[i++]; + irq += pin; + break; + } + default: + { + printk(KERN_ERR "unknown bus type %d.\n",bus); + irq = 0; + break; + } + } + + return irq; +} + +static inline int IO_APIC_irq_trigger(int irq) +{ + int apic, idx, pin; + + for (apic = 0; apic < nr_ioapics; apic++) { + for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { + idx = find_irq_entry(apic,pin,mp_INT); + if ((idx != -1) && (irq == pin_2_irq(idx,apic,pin))) + return irq_trigger(idx); + } + } + /* + * nonexistent IRQs are edge default + */ + return 0; +} + +int irq_vector[NR_IRQS] = { FIRST_DEVICE_VECTOR , 0 }; + +static int __init assign_irq_vector(int irq) +{ + static int current_vector = FIRST_DEVICE_VECTOR, offset = 0; + if (IO_APIC_VECTOR(irq) > 0) + return IO_APIC_VECTOR(irq); +next: + current_vector += 8; + if (current_vector == HYPERVISOR_CALL_VECTOR) + goto next; + + if (current_vector > FIRST_SYSTEM_VECTOR) { + offset++; + current_vector = FIRST_DEVICE_VECTOR + offset; + } + + if (current_vector == FIRST_SYSTEM_VECTOR) + panic("ran out of interrupt sources!"); + + IO_APIC_VECTOR(irq) = current_vector; + return current_vector; +} + +extern void (*interrupt[NR_IRQS])(void); +static struct hw_interrupt_type ioapic_level_irq_type; +static struct hw_interrupt_type ioapic_edge_irq_type; + +void __init setup_IO_APIC_irqs(void) +{ + struct IO_APIC_route_entry entry; + int apic, pin, idx, irq, first_notcon = 1, vector; + unsigned long flags; + + printk(KERN_DEBUG "init IO_APIC IRQs\n"); + + for (apic = 0; apic < nr_ioapics; apic++) { + for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { + + /* + * add it to the IO-APIC irq-routing table: + */ + memset(&entry,0,sizeof(entry)); + + entry.delivery_mode = dest_LowestPrio; + entry.dest_mode = INT_DELIVERY_MODE; + entry.mask = 0; /* enable IRQ */ + entry.dest.logical.logical_dest = TARGET_CPUS; + + idx = find_irq_entry(apic,pin,mp_INT); + if (idx == -1) { + if (first_notcon) { + printk(KERN_DEBUG " IO-APIC (apicid-pin) %d-%d", mp_ioapics[apic].mpc_apicid, pin); + first_notcon = 0; + } else + printk(", %d-%d", mp_ioapics[apic].mpc_apicid, pin); + continue; + } + + entry.trigger = irq_trigger(idx); + entry.polarity = irq_polarity(idx); + + if (irq_trigger(idx)) { + entry.trigger = 1; + entry.mask = 1; + entry.dest.logical.logical_dest = TARGET_CPUS; + } + + irq = pin_2_irq(idx, apic, pin); + add_pin_to_irq(irq, apic, pin); + + if (!apic && !IO_APIC_IRQ(irq)) + continue; + + if (IO_APIC_IRQ(irq)) { + vector = assign_irq_vector(irq); + entry.vector = vector; + + if (IO_APIC_irq_trigger(irq)) + irq_desc[irq].handler = &ioapic_level_irq_type; + else + irq_desc[irq].handler = &ioapic_edge_irq_type; + + set_intr_gate(vector, interrupt[irq]); + + if (!apic && (irq < 16)) + disable_8259A_irq(irq); + } + spin_lock_irqsave(&ioapic_lock, flags); + io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1)); + io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0)); + spin_unlock_irqrestore(&ioapic_lock, flags); + } + } + + if (!first_notcon) + printk(" not connected.\n"); +} + +/* + * Set up the 8259A-master output pin as broadcast to all + * CPUs. + */ +void __init setup_ExtINT_IRQ0_pin(unsigned int pin, int vector) +{ + struct IO_APIC_route_entry entry; + unsigned long flags; + + memset(&entry,0,sizeof(entry)); + + disable_8259A_irq(0); + + /* mask LVT0 */ + apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); + + /* + * We use logical delivery to get the timer IRQ + * to the first CPU. + */ + entry.dest_mode = INT_DELIVERY_MODE; + entry.mask = 0; /* unmask IRQ now */ + entry.dest.logical.logical_dest = TARGET_CPUS; + entry.delivery_mode = dest_LowestPrio; + entry.polarity = 0; + entry.trigger = 0; + entry.vector = vector; + + /* + * The timer IRQ doesnt have to know that behind the + * scene we have a 8259A-master in AEOI mode ... + */ + irq_desc[0].handler = &ioapic_edge_irq_type; + + /* + * Add it to the IO-APIC irq-routing table: + */ + spin_lock_irqsave(&ioapic_lock, flags); + io_apic_write(0, 0x11+2*pin, *(((int *)&entry)+1)); + io_apic_write(0, 0x10+2*pin, *(((int *)&entry)+0)); + spin_unlock_irqrestore(&ioapic_lock, flags); + + enable_8259A_irq(0); +} + +void __init UNEXPECTED_IO_APIC(void) +{ + printk(KERN_WARNING " WARNING: unexpected IO-APIC, please mail\n"); + printk(KERN_WARNING " to linux-smp@vger.kernel.org\n"); +} + +void __init print_IO_APIC(void) +{ + int apic, i; + struct IO_APIC_reg_00 reg_00; + struct IO_APIC_reg_01 reg_01; + struct IO_APIC_reg_02 reg_02; + unsigned long flags; + + printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries); + for (i = 0; i < nr_ioapics; i++) + printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n", + mp_ioapics[i].mpc_apicid, nr_ioapic_registers[i]); + + /* + * We are a bit conservative about what we expect. We have to + * know about every hardware change ASAP. + */ + printk(KERN_INFO "testing the IO APIC.......................\n"); + + for (apic = 0; apic < nr_ioapics; apic++) { + + spin_lock_irqsave(&ioapic_lock, flags); + *(int *)®_00 = io_apic_read(apic, 0); + *(int *)®_01 = io_apic_read(apic, 1); + if (reg_01.version >= 0x10) + *(int *)®_02 = io_apic_read(apic, 2); + spin_unlock_irqrestore(&ioapic_lock, flags); + + printk("\n"); + printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mpc_apicid); + printk(KERN_DEBUG ".... register #00: %08X\n", *(int *)®_00); + printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.ID); + if (reg_00.__reserved_1 || reg_00.__reserved_2) + UNEXPECTED_IO_APIC(); + + printk(KERN_DEBUG ".... register #01: %08X\n", *(int *)®_01); + printk(KERN_DEBUG "....... : max redirection entries: %04X\n", reg_01.entries); + if ( (reg_01.entries != 0x0f) && /* older (Neptune) boards */ + (reg_01.entries != 0x17) && /* typical ISA+PCI boards */ + (reg_01.entries != 0x1b) && /* Compaq Proliant boards */ + (reg_01.entries != 0x1f) && /* dual Xeon boards */ + (reg_01.entries != 0x22) && /* bigger Xeon boards */ + (reg_01.entries != 0x2E) && + (reg_01.entries != 0x3F) + ) + UNEXPECTED_IO_APIC(); + + printk(KERN_DEBUG "....... : PRQ implemented: %X\n", reg_01.PRQ); + printk(KERN_DEBUG "....... : IO APIC version: %04X\n", reg_01.version); + if ( (reg_01.version != 0x01) && /* 82489DX IO-APICs */ + (reg_01.version != 0x10) && /* oldest IO-APICs */ + (reg_01.version != 0x11) && /* Pentium/Pro IO-APICs */ + (reg_01.version != 0x13) && /* Xeon IO-APICs */ + (reg_01.version != 0x20) /* Intel P64H (82806 AA) */ + ) + UNEXPECTED_IO_APIC(); + if (reg_01.__reserved_1 || reg_01.__reserved_2) + UNEXPECTED_IO_APIC(); + + if (reg_01.version >= 0x10) { + printk(KERN_DEBUG ".... register #02: %08X\n", *(int *)®_02); + printk(KERN_DEBUG "....... : arbitration: %02X\n", reg_02.arbitration); + if (reg_02.__reserved_1 || reg_02.__reserved_2) + UNEXPECTED_IO_APIC(); + } + + printk(KERN_DEBUG ".... IRQ redirection table:\n"); + + printk(KERN_DEBUG " NR Log Phy Mask Trig IRR Pol" + " Stat Dest Deli Vect: \n"); + + for (i = 0; i <= reg_01.entries; i++) { + struct IO_APIC_route_entry entry; + + spin_lock_irqsave(&ioapic_lock, flags); + *(((int *)&entry)+0) = io_apic_read(apic, 0x10+i*2); + *(((int *)&entry)+1) = io_apic_read(apic, 0x11+i*2); + spin_unlock_irqrestore(&ioapic_lock, flags); + + printk(KERN_DEBUG " %02x %03X %02X ", + i, + entry.dest.logical.logical_dest, + entry.dest.physical.physical_dest + ); + + printk("%1d %1d %1d %1d %1d %1d %1d %02X\n", + entry.mask, + entry.trigger, + entry.irr, + entry.polarity, + entry.delivery_status, + entry.dest_mode, + entry.delivery_mode, + entry.vector + ); + } + } + printk(KERN_DEBUG "IRQ to pin mappings:\n"); + for (i = 0; i < NR_IRQS; i++) { + struct irq_pin_list *entry = irq_2_pin + i; + if (entry->pin < 0) + continue; + printk(KERN_DEBUG "IRQ%d ", i); + for (;;) { + printk("-> %d:%d", entry->apic, entry->pin); + if (!entry->next) + break; + entry = irq_2_pin + entry->next; + } + printk("\n"); + } + + printk(KERN_INFO ".................................... done.\n"); + + return; +} + +static void print_APIC_bitfield (int base) +{ + unsigned int v; + int i, j; + + printk(KERN_DEBUG "0123456789abcdef0123456789abcdef\n" KERN_DEBUG); + for (i = 0; i < 8; i++) { + v = apic_read(base + i*0x10); + for (j = 0; j < 32; j++) { + if (v & (1< 3) /* Due to the Pentium erratum 3AP. */ + apic_write(APIC_ESR, 0); + v = apic_read(APIC_ESR); + printk(KERN_DEBUG "... APIC ESR: %08x\n", v); + } + + v = apic_read(APIC_ICR); + printk(KERN_DEBUG "... APIC ICR: %08x\n", v); + v = apic_read(APIC_ICR2); + printk(KERN_DEBUG "... APIC ICR2: %08x\n", v); + + v = apic_read(APIC_LVTT); + printk(KERN_DEBUG "... APIC LVTT: %08x\n", v); + + if (maxlvt > 3) { /* PC is LVT#4. */ + v = apic_read(APIC_LVTPC); + printk(KERN_DEBUG "... APIC LVTPC: %08x\n", v); + } + v = apic_read(APIC_LVT0); + printk(KERN_DEBUG "... APIC LVT0: %08x\n", v); + v = apic_read(APIC_LVT1); + printk(KERN_DEBUG "... APIC LVT1: %08x\n", v); + + if (maxlvt > 2) { /* ERR is LVT#3. */ + v = apic_read(APIC_LVTERR); + printk(KERN_DEBUG "... APIC LVTERR: %08x\n", v); + } + + v = apic_read(APIC_TMICT); + printk(KERN_DEBUG "... APIC TMICT: %08x\n", v); + v = apic_read(APIC_TMCCT); + printk(KERN_DEBUG "... APIC TMCCT: %08x\n", v); + v = apic_read(APIC_TDCR); + printk(KERN_DEBUG "... APIC TDCR: %08x\n", v); + printk("\n"); +} + +void print_all_local_APICs (void) +{ + smp_call_function(print_local_APIC, NULL, 1, 1); + print_local_APIC(NULL); +} + +void /*__init*/ print_PIC(void) +{ + extern spinlock_t i8259A_lock; + unsigned int v, flags; + + printk(KERN_DEBUG "\nprinting PIC contents\n"); + + spin_lock_irqsave(&i8259A_lock, flags); + + v = inb(0xa1) << 8 | inb(0x21); + printk(KERN_DEBUG "... PIC IMR: %04x\n", v); + + v = inb(0xa0) << 8 | inb(0x20); + printk(KERN_DEBUG "... PIC IRR: %04x\n", v); + + outb(0x0b,0xa0); + outb(0x0b,0x20); + v = inb(0xa0) << 8 | inb(0x20); + outb(0x0a,0xa0); + outb(0x0a,0x20); + + spin_unlock_irqrestore(&i8259A_lock, flags); + + printk(KERN_DEBUG "... PIC ISR: %04x\n", v); + + v = inb(0x4d1) << 8 | inb(0x4d0); + printk(KERN_DEBUG "... PIC ELCR: %04x\n", v); +} + +static void __init enable_IO_APIC(void) +{ + struct IO_APIC_reg_01 reg_01; + int i; + unsigned long flags; + + for (i = 0; i < PIN_MAP_SIZE; i++) { + irq_2_pin[i].pin = -1; + irq_2_pin[i].next = 0; + } + + /* + * The number of IO-APIC IRQ registers (== #pins): + */ + for (i = 0; i < nr_ioapics; i++) { + spin_lock_irqsave(&ioapic_lock, flags); + *(int *)®_01 = io_apic_read(i, 1); + spin_unlock_irqrestore(&ioapic_lock, flags); + nr_ioapic_registers[i] = reg_01.entries+1; + } + + /* + * Do not trust the IO-APIC being empty at bootup + */ + clear_IO_APIC(); +} + +/* + * Not an __init, needed by the reboot code + */ +void disable_IO_APIC(void) +{ + /* + * Clear the IO-APIC before rebooting: + */ + clear_IO_APIC(); + + disconnect_bsp_APIC(); +} + +/* + * function to set the IO-APIC physical IDs based on the + * values stored in the MPC table. + * + * by Matt Domsch Tue Dec 21 12:25:05 CST 1999 + */ + +static void __init setup_ioapic_ids_from_mpc (void) +{ + struct IO_APIC_reg_00 reg_00; + unsigned long phys_id_present_map = phys_cpu_present_map; + int apic; + int i; + unsigned char old_id; + unsigned long flags; + + /* + * Set the IOAPIC ID to the value stored in the MPC table. + */ + for (apic = 0; apic < nr_ioapics; apic++) { + + /* Read the register 0 value */ + spin_lock_irqsave(&ioapic_lock, flags); + *(int *)®_00 = io_apic_read(apic, 0); + spin_unlock_irqrestore(&ioapic_lock, flags); + + old_id = mp_ioapics[apic].mpc_apicid; + + if (mp_ioapics[apic].mpc_apicid >= 0xf) { + printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n", + apic, mp_ioapics[apic].mpc_apicid); + printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", + reg_00.ID); + mp_ioapics[apic].mpc_apicid = reg_00.ID; + } + + /* + * Sanity check, is the ID really free? Every APIC in a + * system must have a unique ID or we get lots of nice + * 'stuck on smp_invalidate_needed IPI wait' messages. + */ + if (phys_id_present_map & (1 << mp_ioapics[apic].mpc_apicid)) { + printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n", + apic, mp_ioapics[apic].mpc_apicid); + for (i = 0; i < 0xf; i++) + if (!(phys_id_present_map & (1 << i))) + break; + if (i >= 0xf) + panic("Max APIC ID exceeded!\n"); + printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", + i); + phys_id_present_map |= 1 << i; + mp_ioapics[apic].mpc_apicid = i; + } else { + printk("Setting %d in the phys_id_present_map\n", mp_ioapics[apic].mpc_apicid); + phys_id_present_map |= 1 << mp_ioapics[apic].mpc_apicid; + } + + + /* + * We need to adjust the IRQ routing table + * if the ID changed. + */ + if (old_id != mp_ioapics[apic].mpc_apicid) + for (i = 0; i < mp_irq_entries; i++) + if (mp_irqs[i].mpc_dstapic == old_id) + mp_irqs[i].mpc_dstapic + = mp_ioapics[apic].mpc_apicid; + + /* + * Read the right value from the MPC table and + * write it into the ID register. + */ + printk(KERN_INFO "...changing IO-APIC physical APIC ID to %d ...", + mp_ioapics[apic].mpc_apicid); + + reg_00.ID = mp_ioapics[apic].mpc_apicid; + spin_lock_irqsave(&ioapic_lock, flags); + io_apic_write(apic, 0, *(int *)®_00); + spin_unlock_irqrestore(&ioapic_lock, flags); + + /* + * Sanity check + */ + spin_lock_irqsave(&ioapic_lock, flags); + *(int *)®_00 = io_apic_read(apic, 0); + spin_unlock_irqrestore(&ioapic_lock, flags); + if (reg_00.ID != mp_ioapics[apic].mpc_apicid) + panic("could not set ID!\n"); + else + printk(" ok.\n"); + } +} + +/* + * There is a nasty bug in some older SMP boards, their mptable lies + * about the timer IRQ. We do the following to work around the situation: + * + * - timer IRQ defaults to IO-APIC IRQ + * - if this function detects that timer IRQs are defunct, then we fall + * back to ISA timer IRQs + */ +static int __init timer_irq_works(void) +{ + unsigned int t1 = jiffies; + + sti(); + /* Let ten ticks pass... */ + mdelay((10 * 1000) / HZ); + + /* + * Expect a few ticks at least, to be sure some possible + * glue logic does not lock up after one or two first + * ticks in a non-ExtINT mode. Also the local APIC + * might have cached one ExtINT interrupt. Finally, at + * least one tick may be lost due to delays. + */ + if (jiffies - t1 > 4) + return 1; + + return 0; +} + +/* + * In the SMP+IOAPIC case it might happen that there are an unspecified + * number of pending IRQ events unhandled. These cases are very rare, + * so we 'resend' these IRQs via IPIs, to the same CPU. It's much + * better to do it this way as thus we do not have to be aware of + * 'pending' interrupts in the IRQ path, except at this point. + */ +/* + * Edge triggered needs to resend any interrupt + * that was delayed but this is now handled in the device + * independent code. + */ +#define enable_edge_ioapic_irq unmask_IO_APIC_irq + +static void disable_edge_ioapic_irq (unsigned int irq) { /* nothing */ } + +/* + * Starting up a edge-triggered IO-APIC interrupt is + * nasty - we need to make sure that we get the edge. + * If it is already asserted for some reason, we need + * return 1 to indicate that is was pending. + * + * This is not complete - we should be able to fake + * an edge even if it isn't on the 8259A... + */ + +static unsigned int startup_edge_ioapic_irq(unsigned int irq) +{ + int was_pending = 0; + unsigned long flags; + + spin_lock_irqsave(&ioapic_lock, flags); + if (irq < 16) { + disable_8259A_irq(irq); + if (i8259A_irq_pending(irq)) + was_pending = 1; + } + __unmask_IO_APIC_irq(irq); + spin_unlock_irqrestore(&ioapic_lock, flags); + + return was_pending; +} + +#define shutdown_edge_ioapic_irq disable_edge_ioapic_irq + +/* + * Once we have recorded IRQ_PENDING already, we can mask the + * interrupt for real. This prevents IRQ storms from unhandled + * devices. + */ +static void ack_edge_ioapic_irq(unsigned int irq) +{ + if ((irq_desc[irq].status & (IRQ_PENDING | IRQ_DISABLED)) + == (IRQ_PENDING | IRQ_DISABLED)) + mask_IO_APIC_irq(irq); + ack_APIC_irq(); +} + +static void end_edge_ioapic_irq (unsigned int i) { /* nothing */ } + + +/* + * Level triggered interrupts can just be masked, + * and shutting down and starting up the interrupt + * is the same as enabling and disabling them -- except + * with a startup need to return a "was pending" value. + * + * Level triggered interrupts are special because we + * do not touch any IO-APIC register while handling + * them. We ack the APIC in the end-IRQ handler, not + * in the start-IRQ-handler. Protection against reentrance + * from the same interrupt is still provided, both by the + * generic IRQ layer and by the fact that an unacked local + * APIC does not accept IRQs. + */ +static unsigned int startup_level_ioapic_irq (unsigned int irq) +{ + unmask_IO_APIC_irq(irq); + + return 0; /* don't check for pending */ +} + +#define shutdown_level_ioapic_irq mask_IO_APIC_irq +#define enable_level_ioapic_irq unmask_IO_APIC_irq +#define disable_level_ioapic_irq mask_IO_APIC_irq + +static void end_level_ioapic_irq (unsigned int irq) +{ + unsigned long v; + int i; + +/* + * It appears there is an erratum which affects at least version 0x11 + * of I/O APIC (that's the 82093AA and cores integrated into various + * chipsets). Under certain conditions a level-triggered interrupt is + * erroneously delivered as edge-triggered one but the respective IRR + * bit gets set nevertheless. As a result the I/O unit expects an EOI + * message but it will never arrive and further interrupts are blocked + * from the source. The exact reason is so far unknown, but the + * phenomenon was observed when two consecutive interrupt requests + * from a given source get delivered to the same CPU and the source is + * temporarily disabled in between. + * + * A workaround is to simulate an EOI message manually. We achieve it + * by setting the trigger mode to edge and then to level when the edge + * trigger mode gets detected in the TMR of a local APIC for a + * level-triggered interrupt. We mask the source for the time of the + * operation to prevent an edge-triggered interrupt escaping meanwhile. + * The idea is from Manfred Spraul. --macro + */ + i = IO_APIC_VECTOR(irq); + v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1)); + + ack_APIC_irq(); + + if (!(v & (1 << (i & 0x1f)))) { +#ifdef APIC_LOCKUP_DEBUG + struct irq_pin_list *entry; +#endif + spin_lock(&ioapic_lock); + __mask_and_edge_IO_APIC_irq(irq); +#ifdef APIC_LOCKUP_DEBUG + for (entry = irq_2_pin + irq;;) { + unsigned int reg; + + if (entry->pin == -1) + break; + reg = io_apic_read(entry->apic, 0x10 + entry->pin * 2); + if (reg & 0x00004000) + printk(KERN_CRIT "Aieee!!! Remote IRR" + " still set after unlock!\n"); + if (!entry->next) + break; + entry = irq_2_pin + entry->next; + } +#endif + __unmask_and_level_IO_APIC_irq(irq); + spin_unlock(&ioapic_lock); + } +} + +static void mask_and_ack_level_ioapic_irq (unsigned int irq) { /* nothing */ } + +static void set_ioapic_affinity (unsigned int irq, unsigned long mask) +{ + unsigned long flags; + /* + * Only the first 8 bits are valid. + */ + mask = mask << 24; + + spin_lock_irqsave(&ioapic_lock, flags); + __DO_ACTION(1, = mask, ) + spin_unlock_irqrestore(&ioapic_lock, flags); +} + +/* + * Level and edge triggered IO-APIC interrupts need different handling, + * so we use two separate IRQ descriptors. Edge triggered IRQs can be + * handled with the level-triggered descriptor, but that one has slightly + * more overhead. Level-triggered interrupts cannot be handled with the + * edge-triggered handler, without risking IRQ storms and other ugly + * races. + */ + +static struct hw_interrupt_type ioapic_edge_irq_type = { + "IO-APIC-edge", + startup_edge_ioapic_irq, + shutdown_edge_ioapic_irq, + enable_edge_ioapic_irq, + disable_edge_ioapic_irq, + ack_edge_ioapic_irq, + end_edge_ioapic_irq, + set_ioapic_affinity, +}; + +static struct hw_interrupt_type ioapic_level_irq_type = { + "IO-APIC-level", + startup_level_ioapic_irq, + shutdown_level_ioapic_irq, + enable_level_ioapic_irq, + disable_level_ioapic_irq, + mask_and_ack_level_ioapic_irq, + end_level_ioapic_irq, + set_ioapic_affinity, +}; + +static inline void init_IO_APIC_traps(void) +{ + int irq; + + /* + * NOTE! The local APIC isn't very good at handling + * multiple interrupts at the same interrupt level. + * As the interrupt level is determined by taking the + * vector number and shifting that right by 4, we + * want to spread these out a bit so that they don't + * all fall in the same interrupt level. + * + * Also, we've got to be careful not to trash gate + * 0x80, because int 0x80 is hm, kind of importantish. ;) + */ + for (irq = 0; irq < NR_IRQS ; irq++) { + if (IO_APIC_IRQ(irq) && !IO_APIC_VECTOR(irq)) { + /* + * Hmm.. We don't have an entry for this, + * so default to an old-fashioned 8259 + * interrupt if we can.. + */ + if (irq < 16) + make_8259A_irq(irq); + else + /* Strange. Oh, well.. */ + irq_desc[irq].handler = &no_irq_type; + } + } +} + +static void enable_lapic_irq (unsigned int irq) +{ + unsigned long v; + + v = apic_read(APIC_LVT0); + apic_write_around(APIC_LVT0, v & ~APIC_LVT_MASKED); +} + +static void disable_lapic_irq (unsigned int irq) +{ + unsigned long v; + + v = apic_read(APIC_LVT0); + apic_write_around(APIC_LVT0, v | APIC_LVT_MASKED); +} + +static void ack_lapic_irq (unsigned int irq) +{ + ack_APIC_irq(); +} + +static void end_lapic_irq (unsigned int i) { /* nothing */ } + +static struct hw_interrupt_type lapic_irq_type = { + "local-APIC-edge", + NULL, /* startup_irq() not used for IRQ0 */ + NULL, /* shutdown_irq() not used for IRQ0 */ + enable_lapic_irq, + disable_lapic_irq, + ack_lapic_irq, + end_lapic_irq +}; + + +/* + * This looks a bit hackish but it's about the only one way of sending + * a few INTA cycles to 8259As and any associated glue logic. ICR does + * not support the ExtINT mode, unfortunately. We need to send these + * cycles as some i82489DX-based boards have glue logic that keeps the + * 8259A interrupt line asserted until INTA. --macro + */ +static inline void unlock_ExtINT_logic(void) +{ + int pin, i; + struct IO_APIC_route_entry entry0, entry1; + unsigned char save_control, save_freq_select; + unsigned long flags; + + pin = find_isa_irq_pin(8, mp_INT); + if (pin == -1) + return; + + spin_lock_irqsave(&ioapic_lock, flags); + *(((int *)&entry0) + 1) = io_apic_read(0, 0x11 + 2 * pin); + *(((int *)&entry0) + 0) = io_apic_read(0, 0x10 + 2 * pin); + spin_unlock_irqrestore(&ioapic_lock, flags); + clear_IO_APIC_pin(0, pin); + + memset(&entry1, 0, sizeof(entry1)); + + entry1.dest_mode = 0; /* physical delivery */ + entry1.mask = 0; /* unmask IRQ now */ + entry1.dest.physical.physical_dest = hard_smp_processor_id(); + entry1.delivery_mode = dest_ExtINT; + entry1.polarity = entry0.polarity; + entry1.trigger = 0; + entry1.vector = 0; + + spin_lock_irqsave(&ioapic_lock, flags); + io_apic_write(0, 0x11 + 2 * pin, *(((int *)&entry1) + 1)); + io_apic_write(0, 0x10 + 2 * pin, *(((int *)&entry1) + 0)); + spin_unlock_irqrestore(&ioapic_lock, flags); + + save_control = CMOS_READ(RTC_CONTROL); + save_freq_select = CMOS_READ(RTC_FREQ_SELECT); + CMOS_WRITE((save_freq_select & ~RTC_RATE_SELECT) | 0x6, + RTC_FREQ_SELECT); + CMOS_WRITE(save_control | RTC_PIE, RTC_CONTROL); + + i = 100; + while (i-- > 0) { + mdelay(10); + if ((CMOS_READ(RTC_INTR_FLAGS) & RTC_PF) == RTC_PF) + i -= 10; + } + + CMOS_WRITE(save_control, RTC_CONTROL); + CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT); + clear_IO_APIC_pin(0, pin); + + spin_lock_irqsave(&ioapic_lock, flags); + io_apic_write(0, 0x11 + 2 * pin, *(((int *)&entry0) + 1)); + io_apic_write(0, 0x10 + 2 * pin, *(((int *)&entry0) + 0)); + spin_unlock_irqrestore(&ioapic_lock, flags); +} + +/* + * This code may look a bit paranoid, but it's supposed to cooperate with + * a wide range of boards and BIOS bugs. Fortunately only the timer IRQ + * is so screwy. Thanks to Brian Perkins for testing/hacking this beast + * fanatically on his truly buggy board. + */ +static inline void check_timer(void) +{ + extern int timer_ack; + int pin1, pin2; + int vector; + + /* + * get/set the timer IRQ vector: + */ + disable_8259A_irq(0); + vector = assign_irq_vector(0); + set_intr_gate(vector, interrupt[0]); + + /* + * Subtle, code in do_timer_interrupt() expects an AEOI + * mode for the 8259A whenever interrupts are routed + * through I/O APICs. Also IRQ0 has to be enabled in + * the 8259A which implies the virtual wire has to be + * disabled in the local APIC. + */ + apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); + init_8259A(1); + timer_ack = 1; + enable_8259A_irq(0); + + pin1 = find_isa_irq_pin(0, mp_INT); + pin2 = find_isa_irq_pin(0, mp_ExtINT); + + printk(KERN_INFO "..TIMER: vector=0x%02X pin1=%d pin2=%d\n", vector, pin1, pin2); + + if (pin1 != -1) { + /* + * Ok, does IRQ0 through the IOAPIC work? + */ + unmask_IO_APIC_irq(0); + if (timer_irq_works()) { + return; + } + clear_IO_APIC_pin(0, pin1); + printk(KERN_ERR "..MP-BIOS bug: 8254 timer not connected to IO-APIC\n"); + } + + printk(KERN_INFO "...trying to set up timer (IRQ0) through the 8259A ... "); + if (pin2 != -1) { + printk("\n..... (found pin %d) ...", pin2); + /* + * legacy devices should be connected to IO APIC #0 + */ + setup_ExtINT_IRQ0_pin(pin2, vector); + if (timer_irq_works()) { + printk("works.\n"); + return; + } + /* + * Cleanup, just in case ... + */ + clear_IO_APIC_pin(0, pin2); + } + printk(" failed.\n"); + + printk(KERN_INFO "...trying to set up timer as Virtual Wire IRQ..."); + + disable_8259A_irq(0); + irq_desc[0].handler = &lapic_irq_type; + apic_write_around(APIC_LVT0, APIC_DM_FIXED | vector); /* Fixed mode */ + enable_8259A_irq(0); + + if (timer_irq_works()) { + printk(" works.\n"); + return; + } + apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | vector); + printk(" failed.\n"); + + printk(KERN_INFO "...trying to set up timer as ExtINT IRQ..."); + + init_8259A(0); + make_8259A_irq(0); + apic_write_around(APIC_LVT0, APIC_DM_EXTINT); + + unlock_ExtINT_logic(); + + if (timer_irq_works()) { + printk(" works.\n"); + return; + } + printk(" failed :(.\n"); + panic("IO-APIC + timer doesn't work! pester mingo@redhat.com"); +} + +/* + * IRQ's that are handled by the old PIC in all cases: + * - IRQ2 is the cascade IRQ, and cannot be a io-apic IRQ. + * Linux doesn't really care, as it's not actually used + * for any interrupt handling anyway. + */ +#define PIC_IRQS (1<<2) + +void __init setup_IO_APIC(void) +{ + enable_IO_APIC(); + + io_apic_irqs = ~PIC_IRQS; + printk("ENABLING IO-APIC IRQs\n"); + + /* + * Set up the IO-APIC IRQ routing table by parsing the MP-BIOS + * mptable: + */ + setup_ioapic_ids_from_mpc(); + sync_Arb_IDs(); + setup_IO_APIC_irqs(); + init_IO_APIC_traps(); + check_timer(); + print_IO_APIC(); +} diff --git a/xen-2.4.16/arch/i386/ioremap.c b/xen-2.4.16/arch/i386/ioremap.c new file mode 100644 index 0000000000..aefe75e8e7 --- /dev/null +++ b/xen-2.4.16/arch/i386/ioremap.c @@ -0,0 +1,106 @@ +/* + * arch/i386/mm/ioremap.c + * + * Re-map IO memory to kernel address space so that we can access it. + * This is needed for high PCI addresses that aren't mapped in the + * 640k-1MB IO memory area on PC's + * + * (C) Copyright 1995 1996 Linus Torvalds + */ + +//#include +#include +#include +#include + +static unsigned long remap_base = 0; + +#define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED) +#define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY) + +#define PAGE_ALIGN(addr) (((addr)+PAGE_SIZE-1)&PAGE_MASK) + +static void new_l2e(l2_pgentry_t *pl2e) +{ + l1_pgentry_t *pl1e = (l1_pgentry_t *)get_free_page(GFP_KERNEL); + if ( !pl1e ) BUG(); + clear_page(pl1e); + *pl2e = mk_l2_pgentry(__pa(pl1e)|L2_PROT); +} + +void * __ioremap(unsigned long phys_addr, unsigned long size, unsigned long flags) +{ + unsigned long vaddr; + unsigned long offset, cur=0, last_addr; + l2_pgentry_t *pl2e; + l1_pgentry_t *pl1e; + + /* First time through, start allocating from end of real memory. */ + if ( !remap_base ) + remap_base = (unsigned long)phys_to_virt(MAX_USABLE_ADDRESS); + + /* Don't allow wraparound or zero size */ + last_addr = phys_addr + size - 1; + if (!size || last_addr < phys_addr) + return NULL; + + /* + * Don't remap the low PCI/ISA area, it's always mapped.. + */ + if (phys_addr >= 0xA0000 && last_addr < 0x100000) + return phys_to_virt(phys_addr); + +#if 0 + /* + * Don't allow anybody to remap normal RAM that we're using.. + */ + if (phys_addr < virt_to_phys(high_memory)) { + char *t_addr, *t_end; + struct pfn_info *page; + + t_addr = __va(phys_addr); + t_end = t_addr + (size - 1); + + for(page = virt_to_page(t_addr); page <= virt_to_page(t_end); page++) + if(!PageReserved(page)) + return NULL; + } +#endif + + /* + * Mappings have to be page-aligned + */ + offset = phys_addr & ~PAGE_MASK; + phys_addr &= PAGE_MASK; + size = PAGE_ALIGN(last_addr) - phys_addr; + + /* + * Ok, go for it.. + */ + vaddr = remap_base; + remap_base += size; + pl2e = idle0_pg_table + l2_table_offset(vaddr); + if ( l2_pgentry_empty(*pl2e) ) new_l2e(pl2e); + pl1e = l2_pgentry_to_l1(*pl2e++) + l1_table_offset(vaddr); + for ( ; ; ) + { + if ( !l1_pgentry_empty(*pl1e) ) BUG(); + *pl1e++ = mk_l1_pgentry((phys_addr+cur)|L1_PROT|flags); + cur += PAGE_SIZE; + if ( cur == size ) break; + if ( !((unsigned long)pl1e & (PAGE_SIZE-1)) ) + { + if ( l2_pgentry_empty(*pl2e) ) new_l2e(pl2e); + pl1e = l2_pgentry_to_l1(*pl2e++); + } + } + + flush_tlb_all(); + + return (void *) (offset + (char *)vaddr); +} + +void iounmap(void *addr) +{ + /* NOP for now. */ +} diff --git a/xen-2.4.16/arch/i386/irq.c b/xen-2.4.16/arch/i386/irq.c new file mode 100644 index 0000000000..e58fb8f2ad --- /dev/null +++ b/xen-2.4.16/arch/i386/irq.c @@ -0,0 +1,895 @@ +/* + * linux/arch/i386/kernel/irq.c + * + * Copyright (C) 1992, 1998 Linus Torvalds, Ingo Molnar + * + * This file contains the code used by various IRQ handling routines: + * asking for different IRQ's should be done through these routines + * instead of just grabbing them. Thus setup_irqs with different IRQ numbers + * shouldn't result in any weird surprises, and installing new handlers + * should be easier. + */ + +/* + * (mostly architecture independent, will move to kernel/irq.c in 2.5.) + * + * IRQs are in fact implemented a bit like signal handlers for the kernel. + * Naturally it's not a 1:1 relation, but there are similarities. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +/* + * Linux has a controller-independent x86 interrupt architecture. + * every controller has a 'controller-template', that is used + * by the main code to do the right thing. Each driver-visible + * interrupt source is transparently wired to the apropriate + * controller. Thus drivers need not be aware of the + * interrupt-controller. + * + * Various interrupt controllers we handle: 8259 PIC, SMP IO-APIC, + * PIIX4's internal 8259 PIC and SGI's Visual Workstation Cobalt (IO-)APIC. + * (IO-APICs assumed to be messaging to Pentium local-APICs) + * + * the code is designed to be easily extended with new/different + * interrupt controllers, without having to do assembly magic. + */ + +/* + * Controller mappings for all interrupt sources: + */ +irq_desc_t irq_desc[NR_IRQS] __cacheline_aligned = +{ [0 ... NR_IRQS-1] = { 0, &no_irq_type, NULL, 0, SPIN_LOCK_UNLOCKED}}; + +/* + * Special irq handlers. + */ + +void no_action(int cpl, void *dev_id, struct pt_regs *regs) { } + +/* + * Generic no controller code + */ + +static void enable_none(unsigned int irq) { } +static unsigned int startup_none(unsigned int irq) { return 0; } +static void disable_none(unsigned int irq) { } +static void ack_none(unsigned int irq) +{ +/* + * 'what should we do if we get a hw irq event on an illegal vector'. + * each architecture has to answer this themselves, it doesnt deserve + * a generic callback i think. + */ +#if CONFIG_X86 + printk("unexpected IRQ trap at vector %02x\n", irq); +#ifdef CONFIG_X86_LOCAL_APIC + /* + * Currently unexpected vectors happen only on SMP and APIC. + * We _must_ ack these because every local APIC has only N + * irq slots per priority level, and a 'hanging, unacked' IRQ + * holds up an irq slot - in excessive cases (when multiple + * unexpected vectors occur) that might lock up the APIC + * completely. + */ + ack_APIC_irq(); +#endif +#endif +} + +/* startup is the same as "enable", shutdown is same as "disable" */ +#define shutdown_none disable_none +#define end_none enable_none + +struct hw_interrupt_type no_irq_type = { + "none", + startup_none, + shutdown_none, + enable_none, + disable_none, + ack_none, + end_none +}; + +atomic_t irq_err_count; +#ifdef CONFIG_X86_IO_APIC +#ifdef APIC_MISMATCH_DEBUG +atomic_t irq_mis_count; +#endif +#endif + +/* + * Generic, controller-independent functions: + */ + +/* + * Global interrupt locks for SMP. Allow interrupts to come in on any + * CPU, yet make cli/sti act globally to protect critical regions.. + */ + +#ifdef CONFIG_SMP +unsigned char global_irq_holder = 0xff; +unsigned volatile long global_irq_lock; /* pendantic: long for set_bit --RR */ + +#define MAXCOUNT 100000000 + +/* + * I had a lockup scenario where a tight loop doing + * spin_unlock()/spin_lock() on CPU#1 was racing with + * spin_lock() on CPU#0. CPU#0 should have noticed spin_unlock(), but + * apparently the spin_unlock() information did not make it + * through to CPU#0 ... nasty, is this by design, do we have to limit + * 'memory update oscillation frequency' artificially like here? + * + * Such 'high frequency update' races can be avoided by careful design, but + * some of our major constructs like spinlocks use similar techniques, + * it would be nice to clarify this issue. Set this define to 0 if you + * want to check whether your system freezes. I suspect the delay done + * by SYNC_OTHER_CORES() is in correlation with 'snooping latency', but + * i thought that such things are guaranteed by design, since we use + * the 'LOCK' prefix. + */ +#define SUSPECTED_CPU_OR_CHIPSET_BUG_WORKAROUND 0 + +#if SUSPECTED_CPU_OR_CHIPSET_BUG_WORKAROUND +# define SYNC_OTHER_CORES(x) udelay(x+1) +#else +/* + * We have to allow irqs to arrive between __sti and __cli + */ +# define SYNC_OTHER_CORES(x) __asm__ __volatile__ ("nop") +#endif + +static inline void wait_on_irq(int cpu) +{ + for (;;) { + + /* + * Wait until all interrupts are gone. Wait + * for bottom half handlers unless we're + * already executing in one.. + */ + if (!irqs_running()) + if (local_bh_count(cpu) || !spin_is_locked(&global_bh_lock)) + break; + + /* Duh, we have to loop. Release the lock to avoid deadlocks */ + clear_bit(0,&global_irq_lock); + + for (;;) { + __sti(); + SYNC_OTHER_CORES(cpu); + __cli(); + if (irqs_running()) + continue; + if (global_irq_lock) + continue; + if (!local_bh_count(cpu) && spin_is_locked(&global_bh_lock)) + continue; + if (!test_and_set_bit(0,&global_irq_lock)) + break; + } + } +} + +/* + * This is called when we want to synchronize with + * interrupts. We may for example tell a device to + * stop sending interrupts: but to make sure there + * are no interrupts that are executing on another + * CPU we need to call this function. + */ +void synchronize_irq(void) +{ + if (irqs_running()) { + /* Stupid approach */ + cli(); + sti(); + } +} + +static inline void get_irqlock(int cpu) +{ + if (test_and_set_bit(0,&global_irq_lock)) { + /* do we already hold the lock? */ + if ((unsigned char) cpu == global_irq_holder) + return; + /* Uhhuh.. Somebody else got it. Wait.. */ + do { + do { + rep_nop(); + } while (test_bit(0,&global_irq_lock)); + } while (test_and_set_bit(0,&global_irq_lock)); + } + /* + * We also to make sure that nobody else is running + * in an interrupt context. + */ + wait_on_irq(cpu); + + /* + * Ok, finally.. + */ + global_irq_holder = cpu; +} + +#define EFLAGS_IF_SHIFT 9 + +/* + * A global "cli()" while in an interrupt context + * turns into just a local cli(). Interrupts + * should use spinlocks for the (very unlikely) + * case that they ever want to protect against + * each other. + * + * If we already have local interrupts disabled, + * this will not turn a local disable into a + * global one (problems with spinlocks: this makes + * save_flags+cli+sti usable inside a spinlock). + */ +void __global_cli(void) +{ + unsigned int flags; + + __save_flags(flags); + if (flags & (1 << EFLAGS_IF_SHIFT)) { + int cpu = smp_processor_id(); + __cli(); + if (!local_irq_count(cpu)) + get_irqlock(cpu); + } +} + +void __global_sti(void) +{ + int cpu = smp_processor_id(); + + if (!local_irq_count(cpu)) + release_irqlock(cpu); + __sti(); +} + +/* + * SMP flags value to restore to: + * 0 - global cli + * 1 - global sti + * 2 - local cli + * 3 - local sti + */ +unsigned long __global_save_flags(void) +{ + int retval; + int local_enabled; + unsigned long flags; + int cpu = smp_processor_id(); + + __save_flags(flags); + local_enabled = (flags >> EFLAGS_IF_SHIFT) & 1; + /* default to local */ + retval = 2 + local_enabled; + + /* check for global flags if we're not in an interrupt */ + if (!local_irq_count(cpu)) { + if (local_enabled) + retval = 1; + if (global_irq_holder == cpu) + retval = 0; + } + return retval; +} + +void __global_restore_flags(unsigned long flags) +{ + switch (flags) { + case 0: + __global_cli(); + break; + case 1: + __global_sti(); + break; + case 2: + __cli(); + break; + case 3: + __sti(); + break; + default: + printk("global_restore_flags: %08lx (%08lx)\n", + flags, (&flags)[-1]); + } +} + +#endif + +/* + * This should really return information about whether + * we should do bottom half handling etc. Right now we + * end up _always_ checking the bottom half, which is a + * waste of time and is not what some drivers would + * prefer. + */ +int handle_IRQ_event(unsigned int irq, struct pt_regs * regs, struct irqaction * action) +{ + int status; + int cpu = smp_processor_id(); + + irq_enter(cpu, irq); + + status = 1; /* Force the "do bottom halves" bit */ + + if (!(action->flags & SA_INTERRUPT)) + __sti(); + + do { + status |= action->flags; + action->handler(irq, action->dev_id, regs); + action = action->next; + } while (action); + + __cli(); + + irq_exit(cpu, irq); + + return status; +} + +/* + * Generic enable/disable code: this just calls + * down into the PIC-specific version for the actual + * hardware disable after having gotten the irq + * controller lock. + */ + +/** + * disable_irq_nosync - disable an irq without waiting + * @irq: Interrupt to disable + * + * Disable the selected interrupt line. Disables and Enables are + * nested. + * Unlike disable_irq(), this function does not ensure existing + * instances of the IRQ handler have completed before returning. + * + * This function may be called from IRQ context. + */ + +inline void disable_irq_nosync(unsigned int irq) +{ + irq_desc_t *desc = irq_desc + irq; + unsigned long flags; + + spin_lock_irqsave(&desc->lock, flags); + if (!desc->depth++) { + desc->status |= IRQ_DISABLED; + desc->handler->disable(irq); + } + spin_unlock_irqrestore(&desc->lock, flags); +} + +/** + * disable_irq - disable an irq and wait for completion + * @irq: Interrupt to disable + * + * Disable the selected interrupt line. Enables and Disables are + * nested. + * This function waits for any pending IRQ handlers for this interrupt + * to complete before returning. If you use this function while + * holding a resource the IRQ handler may need you will deadlock. + * + * This function may be called - with care - from IRQ context. + */ + +void disable_irq(unsigned int irq) +{ + disable_irq_nosync(irq); + + if (!local_irq_count(smp_processor_id())) { + do { + barrier(); + cpu_relax(); + } while (irq_desc[irq].status & IRQ_INPROGRESS); + } +} + +/** + * enable_irq - enable handling of an irq + * @irq: Interrupt to enable + * + * Undoes the effect of one call to disable_irq(). If this + * matches the last disable, processing of interrupts on this + * IRQ line is re-enabled. + * + * This function may be called from IRQ context. + */ + +void enable_irq(unsigned int irq) +{ + irq_desc_t *desc = irq_desc + irq; + unsigned long flags; + + spin_lock_irqsave(&desc->lock, flags); + switch (desc->depth) { + case 1: { + unsigned int status = desc->status & ~IRQ_DISABLED; + desc->status = status; + if ((status & (IRQ_PENDING | IRQ_REPLAY)) == IRQ_PENDING) { + desc->status = status | IRQ_REPLAY; + hw_resend_irq(desc->handler,irq); + } + desc->handler->enable(irq); + /* fall-through */ + } + default: + desc->depth--; + break; + case 0: + printk("enable_irq(%u) unbalanced from %p\n", irq, + __builtin_return_address(0)); + } + spin_unlock_irqrestore(&desc->lock, flags); +} + +/* + * do_IRQ handles all normal device IRQ's (the special + * SMP cross-CPU interrupts have their own specific + * handlers). + */ +asmlinkage unsigned int do_IRQ(struct pt_regs regs) +{ + /* + * We ack quickly, we don't want the irq controller + * thinking we're snobs just because some other CPU has + * disabled global interrupts (we have already done the + * INT_ACK cycles, it's too late to try to pretend to the + * controller that we aren't taking the interrupt). + * + * 0 return value means that this irq is already being + * handled by some other CPU. (or is disabled) + */ + int irq = regs.orig_eax & 0xff; /* high bits used in ret_from_ code */ + int cpu = smp_processor_id(); + irq_desc_t *desc = irq_desc + irq; + struct irqaction * action; + unsigned int status; + + spin_lock(&desc->lock); + desc->handler->ack(irq); + /* + REPLAY is when Linux resends an IRQ that was dropped earlier + WAITING is used by probe to mark irqs that are being tested + */ + status = desc->status & ~(IRQ_REPLAY | IRQ_WAITING); + status |= IRQ_PENDING; /* we _want_ to handle it */ + + /* + * If the IRQ is disabled for whatever reason, we cannot + * use the action we have. + */ + action = NULL; + if (!(status & (IRQ_DISABLED | IRQ_INPROGRESS))) { + action = desc->action; + status &= ~IRQ_PENDING; /* we commit to handling */ + status |= IRQ_INPROGRESS; /* we are handling it */ + } + desc->status = status; + + /* + * If there is no IRQ handler or it was disabled, exit early. + Since we set PENDING, if another processor is handling + a different instance of this same irq, the other processor + will take care of it. + */ + if (!action) + goto out; + + /* + * Edge triggered interrupts need to remember + * pending events. + * This applies to any hw interrupts that allow a second + * instance of the same irq to arrive while we are in do_IRQ + * or in the handler. But the code here only handles the _second_ + * instance of the irq, not the third or fourth. So it is mostly + * useful for irq hardware that does not mask cleanly in an + * SMP environment. + */ + for (;;) { + spin_unlock(&desc->lock); + handle_IRQ_event(irq, ®s, action); + spin_lock(&desc->lock); + + if (!(desc->status & IRQ_PENDING)) + break; + desc->status &= ~IRQ_PENDING; + } + desc->status &= ~IRQ_INPROGRESS; + out: + /* + * The ->end() handler has to deal with interrupts which got + * disabled while the handler was running. + */ + desc->handler->end(irq); + spin_unlock(&desc->lock); + + if (softirq_pending(cpu)) + do_softirq(); + + return 1; +} + +/** + * request_irq - allocate an interrupt line + * @irq: Interrupt line to allocate + * @handler: Function to be called when the IRQ occurs + * @irqflags: Interrupt type flags + * @devname: An ascii name for the claiming device + * @dev_id: A cookie passed back to the handler function + * + * This call allocates interrupt resources and enables the + * interrupt line and IRQ handling. From the point this + * call is made your handler function may be invoked. Since + * your handler function must clear any interrupt the board + * raises, you must take care both to initialise your hardware + * and to set up the interrupt handler in the right order. + * + * Dev_id must be globally unique. Normally the address of the + * device data structure is used as the cookie. Since the handler + * receives this value it makes sense to use it. + * + * If your interrupt is shared you must pass a non NULL dev_id + * as this is required when freeing the interrupt. + * + * Flags: + * + * SA_SHIRQ Interrupt is shared + * + * SA_INTERRUPT Disable local interrupts while processing + */ + +int request_irq(unsigned int irq, + void (*handler)(int, void *, struct pt_regs *), + unsigned long irqflags, + const char * devname, + void *dev_id) +{ + int retval; + struct irqaction * action; + + if (irq >= NR_IRQS) + return -EINVAL; + if (!handler) + return -EINVAL; + + action = (struct irqaction *) + kmalloc(sizeof(struct irqaction), GFP_KERNEL); + if (!action) + return -ENOMEM; + + action->handler = handler; + action->flags = irqflags; + action->mask = 0; + action->name = devname; + action->next = NULL; + action->dev_id = dev_id; + + retval = setup_irq(irq, action); + if (retval) + kfree(action); + + return retval; +} + +/** + * free_irq - free an interrupt + * @irq: Interrupt line to free + * @dev_id: Device identity to free + * + * Remove an interrupt handler. The handler is removed and if the + * interrupt line is no longer in use by any driver it is disabled. + * On a shared IRQ the caller must ensure the interrupt is disabled + * on the card it drives before calling this function. The function + * does not return until any executing interrupts for this IRQ + * have completed. + * + * This function may be called from interrupt context. + * + * Bugs: Attempting to free an irq in a handler for the same irq hangs + * the machine. + */ + +void free_irq(unsigned int irq, void *dev_id) +{ + irq_desc_t *desc; + struct irqaction **p; + unsigned long flags; + + if (irq >= NR_IRQS) + return; + + desc = irq_desc + irq; + spin_lock_irqsave(&desc->lock,flags); + p = &desc->action; + for (;;) { + struct irqaction * action = *p; + if (action) { + struct irqaction **pp = p; + p = &action->next; + if (action->dev_id != dev_id) + continue; + + /* Found it - now remove it from the list of entries */ + *pp = action->next; + if (!desc->action) { + desc->status |= IRQ_DISABLED; + desc->handler->shutdown(irq); + } + spin_unlock_irqrestore(&desc->lock,flags); + +#ifdef CONFIG_SMP + /* Wait to make sure it's not being used on another CPU */ + while (desc->status & IRQ_INPROGRESS) { + barrier(); + cpu_relax(); + } +#endif + kfree(action); + return; + } + printk("Trying to free free IRQ%d\n",irq); + spin_unlock_irqrestore(&desc->lock,flags); + return; + } +} + +/* + * IRQ autodetection code.. + * + * This depends on the fact that any interrupt that + * comes in on to an unassigned handler will get stuck + * with "IRQ_WAITING" cleared and the interrupt + * disabled. + */ + +static spinlock_t probe_sem = SPIN_LOCK_UNLOCKED; + +/** + * probe_irq_on - begin an interrupt autodetect + * + * Commence probing for an interrupt. The interrupts are scanned + * and a mask of potential interrupt lines is returned. + * + */ + +unsigned long probe_irq_on(void) +{ + unsigned int i; + irq_desc_t *desc; + unsigned long val; + unsigned long s=0, e=0; + + spin_lock(&probe_sem); + /* + * something may have generated an irq long ago and we want to + * flush such a longstanding irq before considering it as spurious. + */ + for (i = NR_IRQS-1; i > 0; i--) { + desc = irq_desc + i; + + spin_lock_irq(&desc->lock); + if (!irq_desc[i].action) + irq_desc[i].handler->startup(i); + spin_unlock_irq(&desc->lock); + } + + /* Wait for longstanding interrupts to trigger (20ms delay). */ + rdtscl(s); + do { + synchronize_irq(); + rdtscl(e); + } while ( ((e-s)/ticks_per_usec) < 20000 ); + + /* + * enable any unassigned irqs + * (we must startup again here because if a longstanding irq + * happened in the previous stage, it may have masked itself) + */ + for (i = NR_IRQS-1; i > 0; i--) { + desc = irq_desc + i; + + spin_lock_irq(&desc->lock); + if (!desc->action) { + desc->status |= IRQ_AUTODETECT | IRQ_WAITING; + if (desc->handler->startup(i)) + desc->status |= IRQ_PENDING; + } + spin_unlock_irq(&desc->lock); + } + + /* + * Wait for spurious interrupts to trigger (100ms delay). + */ + rdtscl(s); + do { + synchronize_irq(); + rdtscl(e); + } while ( ((e-s)/ticks_per_usec) < 100000 ); + + /* + * Now filter out any obviously spurious interrupts + */ + val = 0; + for (i = 0; i < NR_IRQS; i++) { + irq_desc_t *desc = irq_desc + i; + unsigned int status; + + spin_lock_irq(&desc->lock); + status = desc->status; + + if (status & IRQ_AUTODETECT) { + /* It triggered already - consider it spurious. */ + if (!(status & IRQ_WAITING)) { + desc->status = status & ~IRQ_AUTODETECT; + desc->handler->shutdown(i); + } else + if (i < 32) + val |= 1 << i; + } + spin_unlock_irq(&desc->lock); + } + + return val; +} + +/* + * Return a mask of triggered interrupts (this + * can handle only legacy ISA interrupts). + */ + +/** + * probe_irq_mask - scan a bitmap of interrupt lines + * @val: mask of interrupts to consider + * + * Scan the ISA bus interrupt lines and return a bitmap of + * active interrupts. The interrupt probe logic state is then + * returned to its previous value. + * + * Note: we need to scan all the irq's even though we will + * only return ISA irq numbers - just so that we reset them + * all to a known state. + */ +unsigned int probe_irq_mask(unsigned long val) +{ + int i; + unsigned int mask; + + mask = 0; + for (i = 0; i < NR_IRQS; i++) { + irq_desc_t *desc = irq_desc + i; + unsigned int status; + + spin_lock_irq(&desc->lock); + status = desc->status; + + if (status & IRQ_AUTODETECT) { + if (i < 16 && !(status & IRQ_WAITING)) + mask |= 1 << i; + + desc->status = status & ~IRQ_AUTODETECT; + desc->handler->shutdown(i); + } + spin_unlock_irq(&desc->lock); + } + spin_unlock(&probe_sem); + + return mask & val; +} + +/* + * Return the one interrupt that triggered (this can + * handle any interrupt source). + */ + +/** + * probe_irq_off - end an interrupt autodetect + * @val: mask of potential interrupts (unused) + * + * Scans the unused interrupt lines and returns the line which + * appears to have triggered the interrupt. If no interrupt was + * found then zero is returned. If more than one interrupt is + * found then minus the first candidate is returned to indicate + * their is doubt. + * + * The interrupt probe logic state is returned to its previous + * value. + * + * BUGS: When used in a module (which arguably shouldnt happen) + * nothing prevents two IRQ probe callers from overlapping. The + * results of this are non-optimal. + */ + +int probe_irq_off(unsigned long val) +{ + int i, irq_found, nr_irqs; + + nr_irqs = 0; + irq_found = 0; + for (i = 0; i < NR_IRQS; i++) { + irq_desc_t *desc = irq_desc + i; + unsigned int status; + + spin_lock_irq(&desc->lock); + status = desc->status; + + if (status & IRQ_AUTODETECT) { + if (!(status & IRQ_WAITING)) { + if (!nr_irqs) + irq_found = i; + nr_irqs++; + } + desc->status = status & ~IRQ_AUTODETECT; + desc->handler->shutdown(i); + } + spin_unlock_irq(&desc->lock); + } + spin_unlock(&probe_sem); + + if (nr_irqs > 1) + irq_found = -irq_found; + return irq_found; +} + +/* this was setup_x86_irq but it seems pretty generic */ +int setup_irq(unsigned int irq, struct irqaction * new) +{ + int shared = 0; + unsigned long flags; + struct irqaction *old, **p; + irq_desc_t *desc = irq_desc + irq; + + /* + * The following block of code has to be executed atomically + */ + spin_lock_irqsave(&desc->lock,flags); + p = &desc->action; + if ((old = *p) != NULL) { + /* Can't share interrupts unless both agree to */ + if (!(old->flags & new->flags & SA_SHIRQ)) { + spin_unlock_irqrestore(&desc->lock,flags); + return -EBUSY; + } + + /* add new interrupt at end of irq queue */ + do { + p = &old->next; + old = *p; + } while (old); + shared = 1; + } + + *p = new; + + if (!shared) { + desc->depth = 0; + desc->status &= ~(IRQ_DISABLED | IRQ_AUTODETECT | IRQ_WAITING); + desc->handler->startup(irq); + } + spin_unlock_irqrestore(&desc->lock,flags); + + return 0; +} diff --git a/xen-2.4.16/arch/i386/mm.c b/xen-2.4.16/arch/i386/mm.c new file mode 100644 index 0000000000..967816b469 --- /dev/null +++ b/xen-2.4.16/arch/i386/mm.c @@ -0,0 +1,96 @@ +#include +#include +#include +#include +#include +#include +#include + +static inline void set_pte_phys (unsigned long vaddr, + l1_pgentry_t entry) +{ + l2_pgentry_t *l2ent; + l1_pgentry_t *l1ent; + + l2ent = idle0_pg_table + l2_table_offset(vaddr); + l1ent = l2_pgentry_to_l1(*l2ent) + l1_table_offset(vaddr); + *l1ent = entry; + + /* It's enough to flush this one mapping. */ + __flush_tlb_one(vaddr); +} + +void __set_fixmap (enum fixed_addresses idx, + l1_pgentry_t entry) +{ + unsigned long address = __fix_to_virt(idx); + + if (idx >= __end_of_fixed_addresses) { + printk("Invalid __set_fixmap\n"); + return; + } + set_pte_phys(address, entry); +} + +static void __init fixrange_init (unsigned long start, + unsigned long end, l2_pgentry_t *pg_base) +{ + l2_pgentry_t *l2e; + int i; + unsigned long vaddr, page; + + vaddr = start; + i = l2_table_offset(vaddr); + l2e = pg_base + i; + + for ( ; (i < ENTRIES_PER_L2_PAGETABLE) && (vaddr != end); l2e++, i++ ) + { + if ( !l2_pgentry_empty(*l2e) ) continue; + page = (unsigned long)get_free_page(GFP_KERNEL); + clear_page(page); + *l2e = mk_l2_pgentry(__pa(page) | PAGE_HYPERVISOR); + vaddr += 1 << L2_PAGETABLE_SHIFT; + } +} + +void __init paging_init(void) +{ + unsigned long addr; + + /* XXX initialised in boot.S */ + /*if ( cpu_has_pge ) set_in_cr4(X86_CR4_PGE);*/ + /*if ( cpu_has_pse ) set_in_cr4(X86_CR4_PSE);*/ + /*if ( cpu_has_pae ) set_in_cr4(X86_CR4_PAE);*/ + + /* + * Fixed mappings, only the page table structure has to be + * created - mappings will be set by set_fixmap(): + */ + addr = FIXADDR_START & ~((1<thread.ss1 = ss; + current->thread.esp1 = esp; + t->ss1 = ss; + t->esp1 = esp; + + return 0; +} diff --git a/xen-2.4.16/arch/i386/mpparse.c b/xen-2.4.16/arch/i386/mpparse.c new file mode 100644 index 0000000000..c5cf58a312 --- /dev/null +++ b/xen-2.4.16/arch/i386/mpparse.c @@ -0,0 +1,630 @@ +/* + * Intel Multiprocessor Specificiation 1.1 and 1.4 + * compliant MP-table parsing routines. + * + * (c) 1995 Alan Cox, Building #3 + * (c) 1998, 1999, 2000 Ingo Molnar + * + * Fixes + * Erich Boleyn : MP v1.4 and additional changes. + * Alan Cox : Added EBDA scanning + * Ingo Molnar : various cleanups and rewrites + * Maciej W. Rozycki : Bits for default MP configurations + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +/* Have we found an MP table */ +int smp_found_config; + +/* + * Various Linux-internal data structures created from the + * MP-table. + */ +int apic_version [MAX_APICS]; +int mp_bus_id_to_type [MAX_MP_BUSSES]; +int mp_bus_id_to_node [MAX_MP_BUSSES]; +int mp_bus_id_to_pci_bus [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 }; +int mp_current_pci_id; + +/* I/O APIC entries */ +struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS]; + +/* # of MP IRQ source entries */ +struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES]; + +/* MP IRQ source entries */ +int mp_irq_entries; + +int nr_ioapics; + +int pic_mode; +unsigned long mp_lapic_addr; + +/* Processor that is doing the boot up */ +unsigned int boot_cpu_physical_apicid = -1U; +unsigned int boot_cpu_logical_apicid = -1U; +/* Internal processor count */ +static unsigned int num_processors; + +/* Bitmask of physically existing CPUs */ +unsigned long phys_cpu_present_map; + +/* + * Intel MP BIOS table parsing routines: + */ + +/* + * Checksum an MP configuration block. + */ + +static int __init mpf_checksum(unsigned char *mp, int len) +{ + int sum = 0; + + while (len--) + sum += *mp++; + + return sum & 0xFF; +} + +/* + * Processor encoding in an MP configuration block + */ + +static char __init *mpc_family(int family,int model) +{ + static char n[32]; + static char *model_defs[]= + { + "80486DX","80486DX", + "80486SX","80486DX/2 or 80487", + "80486SL","80486SX/2", + "Unknown","80486DX/2-WB", + "80486DX/4","80486DX/4-WB" + }; + + switch (family) { + case 0x04: + if (model < 10) + return model_defs[model]; + break; + + case 0x05: + return("Pentium(tm)"); + + case 0x06: + return("Pentium(tm) Pro"); + + case 0x0F: + if (model == 0x00) + return("Pentium 4(tm)"); + if (model == 0x0F) + return("Special controller"); + } + sprintf(n,"Unknown CPU [%d:%d]",family, model); + return n; +} + +/* + * Have to match translation table entries to main table entries by counter + * hence the mpc_record variable .... can't see a less disgusting way of + * doing this .... + */ + +static int mpc_record; + +void __init MP_processor_info (struct mpc_config_processor *m) +{ + int ver, logical_apicid; + + if (!(m->mpc_cpuflag & CPU_ENABLED)) + return; + + logical_apicid = m->mpc_apicid; + printk("Processor #%d %s APIC version %d\n", + m->mpc_apicid, + mpc_family((m->mpc_cpufeature & CPU_FAMILY_MASK)>>8 , + (m->mpc_cpufeature & CPU_MODEL_MASK)>>4), + m->mpc_apicver); + + if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) { + Dprintk(" Bootup CPU\n"); + boot_cpu_physical_apicid = m->mpc_apicid; + boot_cpu_logical_apicid = logical_apicid; + } + + num_processors++; + + if (m->mpc_apicid > MAX_APICS) { + printk("Processor #%d INVALID. (Max ID: %d).\n", + m->mpc_apicid, MAX_APICS); + return; + } + ver = m->mpc_apicver; + + phys_cpu_present_map |= 1 << m->mpc_apicid; + + /* + * Validate version + */ + if (ver == 0x0) { + printk("BIOS bug, APIC version is 0 for CPU#%d! fixing up to 0x10. (tell your hw vendor)\n", m->mpc_apicid); + ver = 0x10; + } + apic_version[m->mpc_apicid] = ver; +} + +static void __init MP_bus_info (struct mpc_config_bus *m) +{ + char str[7]; + + memcpy(str, m->mpc_bustype, 6); + str[6] = 0; + + Dprintk("Bus #%d is %s\n", m->mpc_busid, str); + + if (strncmp(str, BUSTYPE_ISA, sizeof(BUSTYPE_ISA)-1) == 0) { + mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA; + } else if (strncmp(str, BUSTYPE_EISA, sizeof(BUSTYPE_EISA)-1) == 0) { + mp_bus_id_to_type[m->mpc_busid] = MP_BUS_EISA; + } else if (strncmp(str, BUSTYPE_PCI, sizeof(BUSTYPE_PCI)-1) == 0) { + mp_bus_id_to_type[m->mpc_busid] = MP_BUS_PCI; + mp_bus_id_to_pci_bus[m->mpc_busid] = mp_current_pci_id; + mp_current_pci_id++; + } else if (strncmp(str, BUSTYPE_MCA, sizeof(BUSTYPE_MCA)-1) == 0) { + mp_bus_id_to_type[m->mpc_busid] = MP_BUS_MCA; + } else { + printk("Unknown bustype %s - ignoring\n", str); + } +} + +static void __init MP_ioapic_info (struct mpc_config_ioapic *m) +{ + if (!(m->mpc_flags & MPC_APIC_USABLE)) + return; + + printk("I/O APIC #%d Version %d at 0x%lX.\n", + m->mpc_apicid, m->mpc_apicver, m->mpc_apicaddr); + if (nr_ioapics >= MAX_IO_APICS) { + printk("Max # of I/O APICs (%d) exceeded (found %d).\n", + MAX_IO_APICS, nr_ioapics); + panic("Recompile kernel with bigger MAX_IO_APICS!.\n"); + } + if (!m->mpc_apicaddr) { + printk("WARNING: bogus zero I/O APIC address" + " found in MP table, skipping!\n"); + return; + } + mp_ioapics[nr_ioapics] = *m; + nr_ioapics++; +} + +static void __init MP_intsrc_info (struct mpc_config_intsrc *m) +{ + mp_irqs [mp_irq_entries] = *m; + Dprintk("Int: type %d, pol %d, trig %d, bus %d," + " IRQ %02x, APIC ID %x, APIC INT %02x\n", + m->mpc_irqtype, m->mpc_irqflag & 3, + (m->mpc_irqflag >> 2) & 3, m->mpc_srcbus, + m->mpc_srcbusirq, m->mpc_dstapic, m->mpc_dstirq); + if (++mp_irq_entries == MAX_IRQ_SOURCES) + panic("Max # of irq sources exceeded!!\n"); +} + +static void __init MP_lintsrc_info (struct mpc_config_lintsrc *m) +{ + Dprintk("Lint: type %d, pol %d, trig %d, bus %d," + " IRQ %02x, APIC ID %x, APIC LINT %02x\n", + m->mpc_irqtype, m->mpc_irqflag & 3, + (m->mpc_irqflag >> 2) &3, m->mpc_srcbusid, + m->mpc_srcbusirq, m->mpc_destapic, m->mpc_destapiclint); + /* + * Well it seems all SMP boards in existence + * use ExtINT/LVT1 == LINT0 and + * NMI/LVT2 == LINT1 - the following check + * will show us if this assumptions is false. + * Until then we do not have to add baggage. + */ + if ((m->mpc_irqtype == mp_ExtINT) && + (m->mpc_destapiclint != 0)) + BUG(); + if ((m->mpc_irqtype == mp_NMI) && + (m->mpc_destapiclint != 1)) + BUG(); +} + + +/* + * Read/parse the MPC + */ + +static int __init smp_read_mpc(struct mp_config_table *mpc) +{ + char str[16]; + int count=sizeof(*mpc); + unsigned char *mpt=((unsigned char *)mpc)+count; + + if (memcmp(mpc->mpc_signature,MPC_SIGNATURE,4)) { + panic("SMP mptable: bad signature [%c%c%c%c]!\n", + mpc->mpc_signature[0], + mpc->mpc_signature[1], + mpc->mpc_signature[2], + mpc->mpc_signature[3]); + return 0; + } + if (mpf_checksum((unsigned char *)mpc,mpc->mpc_length)) { + panic("SMP mptable: checksum error!\n"); + return 0; + } + if (mpc->mpc_spec!=0x01 && mpc->mpc_spec!=0x04) { + printk("SMP mptable: bad table version (%d)!!\n", + mpc->mpc_spec); + return 0; + } + if (!mpc->mpc_lapic) { + printk("SMP mptable: null local APIC address!\n"); + return 0; + } + memcpy(str,mpc->mpc_oem,8); + str[8]=0; + printk("OEM ID: %s ",str); + + memcpy(str,mpc->mpc_productid,12); + str[12]=0; + printk("Product ID: %s ",str); + + printk("APIC at: 0x%lX\n", mpc->mpc_lapic); + + /* save the local APIC address, it might be non-default. */ + mp_lapic_addr = mpc->mpc_lapic; + + /* + * Now process the configuration blocks. + */ + while (count < mpc->mpc_length) { + switch(*mpt) { + case MP_PROCESSOR: + { + struct mpc_config_processor *m= + (struct mpc_config_processor *)mpt; + + MP_processor_info(m); + mpt += sizeof(*m); + count += sizeof(*m); + break; + } + case MP_BUS: + { + struct mpc_config_bus *m= + (struct mpc_config_bus *)mpt; + MP_bus_info(m); + mpt += sizeof(*m); + count += sizeof(*m); + break; + } + case MP_IOAPIC: + { + struct mpc_config_ioapic *m= + (struct mpc_config_ioapic *)mpt; + MP_ioapic_info(m); + mpt+=sizeof(*m); + count+=sizeof(*m); + break; + } + case MP_INTSRC: + { + struct mpc_config_intsrc *m= + (struct mpc_config_intsrc *)mpt; + + MP_intsrc_info(m); + mpt+=sizeof(*m); + count+=sizeof(*m); + break; + } + case MP_LINTSRC: + { + struct mpc_config_lintsrc *m= + (struct mpc_config_lintsrc *)mpt; + MP_lintsrc_info(m); + mpt+=sizeof(*m); + count+=sizeof(*m); + break; + } + default: + { + count = mpc->mpc_length; + break; + } + } + ++mpc_record; + } + + if (!num_processors) + printk("SMP mptable: no processors registered!\n"); + return num_processors; +} + +static int __init ELCR_trigger(unsigned int irq) +{ + unsigned int port; + + port = 0x4d0 + (irq >> 3); + return (inb(port) >> (irq & 7)) & 1; +} + +static void __init construct_default_ioirq_mptable(int mpc_default_type) +{ + struct mpc_config_intsrc intsrc; + int i; + int ELCR_fallback = 0; + + intsrc.mpc_type = MP_INTSRC; + intsrc.mpc_irqflag = 0; /* conforming */ + intsrc.mpc_srcbus = 0; + intsrc.mpc_dstapic = mp_ioapics[0].mpc_apicid; + + intsrc.mpc_irqtype = mp_INT; + + /* + * If true, we have an ISA/PCI system with no IRQ entries + * in the MP table. To prevent the PCI interrupts from being set up + * incorrectly, we try to use the ELCR. The sanity check to see if + * there is good ELCR data is very simple - IRQ0, 1, 2 and 13 can + * never be level sensitive, so we simply see if the ELCR agrees. + * If it does, we assume it's valid. + */ + if (mpc_default_type == 5) { + printk("ISA/PCI bus type with no IRQ information... falling back to ELCR\n"); + + if (ELCR_trigger(0) || ELCR_trigger(1) || ELCR_trigger(2) || ELCR_trigger(13)) + printk("ELCR contains invalid data... not using ELCR\n"); + else { + printk("Using ELCR to identify PCI interrupts\n"); + ELCR_fallback = 1; + } + } + + for (i = 0; i < 16; i++) { + switch (mpc_default_type) { + case 2: + if (i == 0 || i == 13) + continue; /* IRQ0 & IRQ13 not connected */ + /* fall through */ + default: + if (i == 2) + continue; /* IRQ2 is never connected */ + } + + if (ELCR_fallback) { + /* + * If the ELCR indicates a level-sensitive interrupt, we + * copy that information over to the MP table in the + * irqflag field (level sensitive, active high polarity). + */ + if (ELCR_trigger(i)) + intsrc.mpc_irqflag = 13; + else + intsrc.mpc_irqflag = 0; + } + + intsrc.mpc_srcbusirq = i; + intsrc.mpc_dstirq = i ? i : 2; /* IRQ0 to INTIN2 */ + MP_intsrc_info(&intsrc); + } + + intsrc.mpc_irqtype = mp_ExtINT; + intsrc.mpc_srcbusirq = 0; + intsrc.mpc_dstirq = 0; /* 8259A to INTIN0 */ + MP_intsrc_info(&intsrc); +} + +static inline void __init construct_default_ISA_mptable(int mpc_default_type) +{ + struct mpc_config_processor processor; + struct mpc_config_bus bus; + struct mpc_config_ioapic ioapic; + struct mpc_config_lintsrc lintsrc; + int linttypes[2] = { mp_ExtINT, mp_NMI }; + int i; + + /* + * local APIC has default address + */ + mp_lapic_addr = APIC_DEFAULT_PHYS_BASE; + + /* + * 2 CPUs, numbered 0 & 1. + */ + processor.mpc_type = MP_PROCESSOR; + /* Either an integrated APIC or a discrete 82489DX. */ + processor.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01; + processor.mpc_cpuflag = CPU_ENABLED; + processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) | + (boot_cpu_data.x86_model << 4) | + boot_cpu_data.x86_mask; + processor.mpc_featureflag = boot_cpu_data.x86_capability[0]; + processor.mpc_reserved[0] = 0; + processor.mpc_reserved[1] = 0; + for (i = 0; i < 2; i++) { + processor.mpc_apicid = i; + MP_processor_info(&processor); + } + + bus.mpc_type = MP_BUS; + bus.mpc_busid = 0; + switch (mpc_default_type) { + default: + printk("???\nUnknown standard configuration %d\n", + mpc_default_type); + /* fall through */ + case 1: + case 5: + memcpy(bus.mpc_bustype, "ISA ", 6); + break; + case 2: + case 6: + case 3: + memcpy(bus.mpc_bustype, "EISA ", 6); + break; + case 4: + case 7: + memcpy(bus.mpc_bustype, "MCA ", 6); + } + MP_bus_info(&bus); + if (mpc_default_type > 4) { + bus.mpc_busid = 1; + memcpy(bus.mpc_bustype, "PCI ", 6); + MP_bus_info(&bus); + } + + ioapic.mpc_type = MP_IOAPIC; + ioapic.mpc_apicid = 2; + ioapic.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01; + ioapic.mpc_flags = MPC_APIC_USABLE; + ioapic.mpc_apicaddr = 0xFEC00000; + MP_ioapic_info(&ioapic); + + /* + * We set up most of the low 16 IO-APIC pins according to MPS rules. + */ + construct_default_ioirq_mptable(mpc_default_type); + + lintsrc.mpc_type = MP_LINTSRC; + lintsrc.mpc_irqflag = 0; /* conforming */ + lintsrc.mpc_srcbusid = 0; + lintsrc.mpc_srcbusirq = 0; + lintsrc.mpc_destapic = MP_APIC_ALL; + for (i = 0; i < 2; i++) { + lintsrc.mpc_irqtype = linttypes[i]; + lintsrc.mpc_destapiclint = i; + MP_lintsrc_info(&lintsrc); + } +} + +static struct intel_mp_floating *mpf_found; + +/* + * Scan the memory blocks for an SMP configuration block. + */ +void __init get_smp_config (void) +{ + struct intel_mp_floating *mpf = mpf_found; + + printk("Intel MultiProcessor Specification v1.%d\n", mpf->mpf_specification); + if (mpf->mpf_feature2 & (1<<7)) { + printk(" IMCR and PIC compatibility mode.\n"); + pic_mode = 1; + } else { + printk(" Virtual Wire compatibility mode.\n"); + pic_mode = 0; + } + + /* + * Now see if we need to read further. + */ + if (mpf->mpf_feature1 != 0) { + + printk("Default MP configuration #%d\n", mpf->mpf_feature1); + construct_default_ISA_mptable(mpf->mpf_feature1); + + } else if (mpf->mpf_physptr) { + + /* + * Read the physical hardware table. Anything here will + * override the defaults. + */ + if (!smp_read_mpc((void *)mpf->mpf_physptr)) { + smp_found_config = 0; + printk("BIOS bug, MP table errors detected!...\n"); + printk("... disabling SMP support. (tell your hw vendor)\n"); + return; + } + /* + * If there are no explicit MP IRQ entries, then we are + * broken. We set up most of the low 16 IO-APIC pins to + * ISA defaults and hope it will work. + */ + if (!mp_irq_entries) { + struct mpc_config_bus bus; + + printk("BIOS bug, no explicit IRQ entries, using default mptable. (tell your hw vendor)\n"); + + bus.mpc_type = MP_BUS; + bus.mpc_busid = 0; + memcpy(bus.mpc_bustype, "ISA ", 6); + MP_bus_info(&bus); + + construct_default_ioirq_mptable(0); + } + + } else + BUG(); + + printk("Processors: %d\n", num_processors); + /* + * Only use the first configuration found. + */ +} + +static int __init smp_scan_config (unsigned long base, unsigned long length) +{ + unsigned long *bp = phys_to_virt(base); + struct intel_mp_floating *mpf; + + Dprintk("Scan SMP from %p for %ld bytes.\n", bp,length); + if (sizeof(*mpf) != 16) + printk("Error: MPF size\n"); + + while (length > 0) { + mpf = (struct intel_mp_floating *)bp; + if ((*bp == SMP_MAGIC_IDENT) && + (mpf->mpf_length == 1) && + !mpf_checksum((unsigned char *)bp, 16) && + ((mpf->mpf_specification == 1) + || (mpf->mpf_specification == 4)) ) { + + smp_found_config = 1; + printk("found SMP MP-table at %08lx\n", + virt_to_phys(mpf)); + reserve_bootmem(virt_to_phys(mpf), PAGE_SIZE); + if (mpf->mpf_physptr) + reserve_bootmem(mpf->mpf_physptr, PAGE_SIZE); + mpf_found = mpf; + return 1; + } + bp += 4; + length -= 16; + } + return 0; +} + +void __init find_intel_smp (void) +{ + /* + * 1) Scan the bottom 1K for a signature + * 2) Scan the top 1K of base RAM + * 3) Scan the 64K of bios + */ + if (smp_scan_config(0x0,0x400) || + smp_scan_config(639*0x400,0x400) || + smp_scan_config(0xF0000,0x10000)) + return; +} + +/* + * - Intel MP Configuration Table + * - or SGI Visual Workstation configuration + */ +void __init find_smp_config (void) +{ + find_intel_smp(); +} + diff --git a/xen-2.4.16/arch/i386/pci-dma.c b/xen-2.4.16/arch/i386/pci-dma.c new file mode 100644 index 0000000000..7cf3a4ee9d --- /dev/null +++ b/xen-2.4.16/arch/i386/pci-dma.c @@ -0,0 +1,37 @@ +/* + * Dynamic DMA mapping support. + * + * On i386 there is no hardware dynamic DMA address translation, + * so consistent alloc/free are merely page allocation/freeing. + * The rest of the dynamic DMA mapping interface is implemented + * in asm/pci.h. + */ + +#include +#include +#include +#include +#include + +void *pci_alloc_consistent(struct pci_dev *hwdev, size_t size, + dma_addr_t *dma_handle) +{ + void *ret; + int gfp = GFP_ATOMIC; + + if (hwdev == NULL || hwdev->dma_mask != 0xffffffff) + gfp |= GFP_DMA; + ret = (void *)__get_free_pages(gfp, get_order(size)); + + if (ret != NULL) { + memset(ret, 0, size); + *dma_handle = virt_to_bus(ret); + } + return ret; +} + +void pci_free_consistent(struct pci_dev *hwdev, size_t size, + void *vaddr, dma_addr_t dma_handle) +{ + free_pages((unsigned long)vaddr, get_order(size)); +} diff --git a/xen-2.4.16/arch/i386/pci-i386.c b/xen-2.4.16/arch/i386/pci-i386.c new file mode 100644 index 0000000000..f2e9c43e64 --- /dev/null +++ b/xen-2.4.16/arch/i386/pci-i386.c @@ -0,0 +1,354 @@ +/* + * Low-Level PCI Access for i386 machines + * + * Copyright 1993, 1994 Drew Eckhardt + * Visionary Computing + * (Unix and Linux consulting and custom programming) + * Drew@Colorado.EDU + * +1 (303) 786-7975 + * + * Drew's work was sponsored by: + * iX Multiuser Multitasking Magazine + * Hannover, Germany + * hm@ix.de + * + * Copyright 1997--2000 Martin Mares + * + * For more information, please consult the following manuals (look at + * http://www.pcisig.com/ for how to get them): + * + * PCI BIOS Specification + * PCI Local Bus Specification + * PCI to PCI Bridge Specification + * PCI System Design Guide + * + * + * CHANGELOG : + * Jun 17, 1994 : Modified to accommodate the broken pre-PCI BIOS SPECIFICATION + * Revision 2.0 present on 's ASUS mainboard. + * + * Jan 5, 1995 : Modified to probe PCI hardware at boot time by Frederic + * Potter, potter@cao-vlsi.ibp.fr + * + * Jan 10, 1995 : Modified to store the information about configured pci + * devices into a list, which can be accessed via /proc/pci by + * Curtis Varner, cvarner@cs.ucr.edu + * + * Jan 12, 1995 : CPU-PCI bridge optimization support by Frederic Potter. + * Alpha version. Intel & UMC chipset support only. + * + * Apr 16, 1995 : Source merge with the DEC Alpha PCI support. Most of the code + * moved to drivers/pci/pci.c. + * + * Dec 7, 1996 : Added support for direct configuration access of boards + * with Intel compatible access schemes (tsbogend@alpha.franken.de) + * + * Feb 3, 1997 : Set internal functions to static, save/restore flags + * avoid dead locks reading broken PCI BIOS, werner@suse.de + * + * Apr 26, 1997 : Fixed case when there is BIOS32, but not PCI BIOS + * (mj@atrey.karlin.mff.cuni.cz) + * + * May 7, 1997 : Added some missing cli()'s. [mj] + * + * Jun 20, 1997 : Corrected problems in "conf1" type accesses. + * (paubert@iram.es) + * + * Aug 2, 1997 : Split to PCI BIOS handling and direct PCI access parts + * and cleaned it up... Martin Mares + * + * Feb 6, 1998 : No longer using BIOS to find devices and device classes. [mj] + * + * May 1, 1998 : Support for peer host bridges. [mj] + * + * Jun 19, 1998 : Changed to use spinlocks, so that PCI configuration space + * can be accessed from interrupts even on SMP systems. [mj] + * + * August 1998 : Better support for peer host bridges and more paranoid + * checks for direct hardware access. Ugh, this file starts to look as + * a large gallery of common hardware bug workarounds (watch the comments) + * -- the PCI specs themselves are sane, but most implementors should be + * hit hard with \hammer scaled \magstep5. [mj] + * + * Jan 23, 1999 : More improvements to peer host bridge logic. i450NX fixup. [mj] + * + * Feb 8, 1999 : Added UM8886BF I/O address fixup. [mj] + * + * August 1999 : New resource management and configuration access stuff. [mj] + * + * Sep 19, 1999 : Use PCI IRQ routing tables for detection of peer host bridges. + * Based on ideas by Chris Frantz and David Hinds. [mj] + * + * Sep 28, 1999 : Handle unreported/unassigned IRQs. Thanks to Shuu Yamaguchi + * for a lot of patience during testing. [mj] + * + * Oct 8, 1999 : Split to pci-i386.c, pci-pc.c and pci-visws.c. [mj] + */ + +#include +//#include +#include +#include +#include +#include + +#include "pci-i386.h" + +void +pcibios_update_resource(struct pci_dev *dev, struct resource *root, + struct resource *res, int resource) +{ + u32 new, check; + int reg; + + new = res->start | (res->flags & PCI_REGION_FLAG_MASK); + if (resource < 6) { + reg = PCI_BASE_ADDRESS_0 + 4*resource; + } else if (resource == PCI_ROM_RESOURCE) { + res->flags |= PCI_ROM_ADDRESS_ENABLE; + new |= PCI_ROM_ADDRESS_ENABLE; + reg = dev->rom_base_reg; + } else { + /* Somebody might have asked allocation of a non-standard resource */ + return; + } + + pci_write_config_dword(dev, reg, new); + pci_read_config_dword(dev, reg, &check); + if ((new ^ check) & ((new & PCI_BASE_ADDRESS_SPACE_IO) ? PCI_BASE_ADDRESS_IO_MASK : PCI_BASE_ADDRESS_MEM_MASK)) { + printk(KERN_ERR "PCI: Error while updating region " + "%s/%d (%08x != %08x)\n", dev->slot_name, resource, + new, check); + } +} + +/* + * We need to avoid collisions with `mirrored' VGA ports + * and other strange ISA hardware, so we always want the + * addresses to be allocated in the 0x000-0x0ff region + * modulo 0x400. + * + * Why? Because some silly external IO cards only decode + * the low 10 bits of the IO address. The 0x00-0xff region + * is reserved for motherboard devices that decode all 16 + * bits, so it's ok to allocate at, say, 0x2800-0x28ff, + * but we want to try to avoid allocating at 0x2900-0x2bff + * which might have be mirrored at 0x0100-0x03ff.. + */ +void +pcibios_align_resource(void *data, struct resource *res, unsigned long size) +{ + if (res->flags & IORESOURCE_IO) { + unsigned long start = res->start; + + if (start & 0x300) { + start = (start + 0x3ff) & ~0x3ff; + res->start = start; + } + } +} + + +/* + * Handle resources of PCI devices. If the world were perfect, we could + * just allocate all the resource regions and do nothing more. It isn't. + * On the other hand, we cannot just re-allocate all devices, as it would + * require us to know lots of host bridge internals. So we attempt to + * keep as much of the original configuration as possible, but tweak it + * when it's found to be wrong. + * + * Known BIOS problems we have to work around: + * - I/O or memory regions not configured + * - regions configured, but not enabled in the command register + * - bogus I/O addresses above 64K used + * - expansion ROMs left enabled (this may sound harmless, but given + * the fact the PCI specs explicitly allow address decoders to be + * shared between expansion ROMs and other resource regions, it's + * at least dangerous) + * + * Our solution: + * (1) Allocate resources for all buses behind PCI-to-PCI bridges. + * This gives us fixed barriers on where we can allocate. + * (2) Allocate resources for all enabled devices. If there is + * a collision, just mark the resource as unallocated. Also + * disable expansion ROMs during this step. + * (3) Try to allocate resources for disabled devices. If the + * resources were assigned correctly, everything goes well, + * if they weren't, they won't disturb allocation of other + * resources. + * (4) Assign new addresses to resources which were either + * not configured at all or misconfigured. If explicitly + * requested by the user, configure expansion ROM address + * as well. + */ + +static void __init pcibios_allocate_bus_resources(struct list_head *bus_list) +{ + struct list_head *ln; + struct pci_bus *bus; + struct pci_dev *dev; + int idx; + struct resource *r, *pr; + + /* Depth-First Search on bus tree */ + for (ln=bus_list->next; ln != bus_list; ln=ln->next) { + bus = pci_bus_b(ln); + if ((dev = bus->self)) { + for (idx = PCI_BRIDGE_RESOURCES; idx < PCI_NUM_RESOURCES; idx++) { + r = &dev->resource[idx]; + if (!r->start) + continue; + pr = pci_find_parent_resource(dev, r); + if (!pr || request_resource(pr, r) < 0) + printk(KERN_ERR "PCI: Cannot allocate resource region %d of bridge %s\n", idx, dev->slot_name); + } + } + pcibios_allocate_bus_resources(&bus->children); + } +} + +static void __init pcibios_allocate_resources(int pass) +{ + struct pci_dev *dev; + int idx, disabled; + u16 command; + struct resource *r, *pr; + + pci_for_each_dev(dev) { + pci_read_config_word(dev, PCI_COMMAND, &command); + for(idx = 0; idx < 6; idx++) { + r = &dev->resource[idx]; + if (r->parent) /* Already allocated */ + continue; + if (!r->start) /* Address not assigned at all */ + continue; + if (r->flags & IORESOURCE_IO) + disabled = !(command & PCI_COMMAND_IO); + else + disabled = !(command & PCI_COMMAND_MEMORY); + if (pass == disabled) { + DBG("PCI: Resource %08lx-%08lx (f=%lx, d=%d, p=%d)\n", + r->start, r->end, r->flags, disabled, pass); + pr = pci_find_parent_resource(dev, r); + if (!pr || request_resource(pr, r) < 0) { + printk(KERN_ERR "PCI: Cannot allocate resource region %d of device %s\n", idx, dev->slot_name); + /* We'll assign a new address later */ + r->end -= r->start; + r->start = 0; + } + } + } + if (!pass) { + r = &dev->resource[PCI_ROM_RESOURCE]; + if (r->flags & PCI_ROM_ADDRESS_ENABLE) { + /* Turn the ROM off, leave the resource region, but keep it unregistered. */ + u32 reg; + DBG("PCI: Switching off ROM of %s\n", dev->slot_name); + r->flags &= ~PCI_ROM_ADDRESS_ENABLE; + pci_read_config_dword(dev, dev->rom_base_reg, ®); + pci_write_config_dword(dev, dev->rom_base_reg, reg & ~PCI_ROM_ADDRESS_ENABLE); + } + } + } +} + +static void __init pcibios_assign_resources(void) +{ + struct pci_dev *dev; + int idx; + struct resource *r; + + pci_for_each_dev(dev) { + int class = dev->class >> 8; + + /* Don't touch classless devices and host bridges */ + if (!class || class == PCI_CLASS_BRIDGE_HOST) + continue; + + for(idx=0; idx<6; idx++) { + r = &dev->resource[idx]; + + /* + * Don't touch IDE controllers and I/O ports of video cards! + */ + if ((class == PCI_CLASS_STORAGE_IDE && idx < 4) || + (class == PCI_CLASS_DISPLAY_VGA && (r->flags & IORESOURCE_IO))) + continue; + + /* + * We shall assign a new address to this resource, either because + * the BIOS forgot to do so or because we have decided the old + * address was unusable for some reason. + */ + if (!r->start && r->end) + pci_assign_resource(dev, idx); + } + + if (pci_probe & PCI_ASSIGN_ROMS) { + r = &dev->resource[PCI_ROM_RESOURCE]; + r->end -= r->start; + r->start = 0; + if (r->end) + pci_assign_resource(dev, PCI_ROM_RESOURCE); + } + } +} + +void __init pcibios_resource_survey(void) +{ + DBG("PCI: Allocating resources\n"); + pcibios_allocate_bus_resources(&pci_root_buses); + pcibios_allocate_resources(0); + pcibios_allocate_resources(1); + pcibios_assign_resources(); +} + +int pcibios_enable_resources(struct pci_dev *dev) +{ + u16 cmd, old_cmd; + int idx; + struct resource *r; + + pci_read_config_word(dev, PCI_COMMAND, &cmd); + old_cmd = cmd; + for(idx=0; idx<6; idx++) { + r = &dev->resource[idx]; + if (!r->start && r->end) { + printk(KERN_ERR "PCI: Device %s not available because of resource collisions\n", dev->slot_name); + return -EINVAL; + } + if (r->flags & IORESOURCE_IO) + cmd |= PCI_COMMAND_IO; + if (r->flags & IORESOURCE_MEM) + cmd |= PCI_COMMAND_MEMORY; + } + if (dev->resource[PCI_ROM_RESOURCE].start) + cmd |= PCI_COMMAND_MEMORY; + if (cmd != old_cmd) { + printk("PCI: Enabling device %s (%04x -> %04x)\n", dev->slot_name, old_cmd, cmd); + pci_write_config_word(dev, PCI_COMMAND, cmd); + } + + return 0; +} + +/* + * If we set up a device for bus mastering, we need to check the latency + * timer as certain crappy BIOSes forget to set it properly. + */ +unsigned int pcibios_max_latency = 255; + +void pcibios_set_master(struct pci_dev *dev) +{ + u8 lat; + pci_read_config_byte(dev, PCI_LATENCY_TIMER, &lat); + if (lat < 16) + lat = (64 <= pcibios_max_latency) ? 64 : pcibios_max_latency; + else if (lat > pcibios_max_latency) + lat = pcibios_max_latency; + else + return; + printk("PCI: Setting latency timer of device %s to %d\n", dev->slot_name, lat); + pci_write_config_byte(dev, PCI_LATENCY_TIMER, lat); +} + diff --git a/xen-2.4.16/arch/i386/pci-i386.h b/xen-2.4.16/arch/i386/pci-i386.h new file mode 100644 index 0000000000..2c821af08f --- /dev/null +++ b/xen-2.4.16/arch/i386/pci-i386.h @@ -0,0 +1,69 @@ +/* + * Low-Level PCI Access for i386 machines. + * + * (c) 1999 Martin Mares + */ + +#undef DEBUG + +#ifdef DEBUG +#define DBG(x...) printk(x) +#else +#define DBG(x...) +#endif + +#define PCI_PROBE_BIOS 0x0001 +#define PCI_PROBE_CONF1 0x0002 +#define PCI_PROBE_CONF2 0x0004 +#define PCI_NO_SORT 0x0100 +#define PCI_BIOS_SORT 0x0200 +#define PCI_NO_CHECKS 0x0400 +#define PCI_ASSIGN_ROMS 0x1000 +#define PCI_BIOS_IRQ_SCAN 0x2000 +#define PCI_ASSIGN_ALL_BUSSES 0x4000 + +extern unsigned int pci_probe; + +/* pci-i386.c */ + +extern unsigned int pcibios_max_latency; + +void pcibios_resource_survey(void); +int pcibios_enable_resources(struct pci_dev *); + +/* pci-pc.c */ + +extern int pcibios_last_bus; +extern struct pci_bus *pci_root_bus; +extern struct pci_ops *pci_root_ops; + +/* pci-irq.c */ + +struct irq_info { + u8 bus, devfn; /* Bus, device and function */ + struct { + u8 link; /* IRQ line ID, chipset dependent, 0=not routed */ + u16 bitmap; /* Available IRQs */ + } __attribute__((packed)) irq[4]; + u8 slot; /* Slot number, 0=onboard */ + u8 rfu; +} __attribute__((packed)); + +struct irq_routing_table { + u32 signature; /* PIRQ_SIGNATURE should be here */ + u16 version; /* PIRQ_VERSION */ + u16 size; /* Table size in bytes */ + u8 rtr_bus, rtr_devfn; /* Where the interrupt router lies */ + u16 exclusive_irqs; /* IRQs devoted exclusively to PCI usage */ + u16 rtr_vendor, rtr_device; /* Vendor and device ID of interrupt router */ + u32 miniport_data; /* Crap */ + u8 rfu[11]; + u8 checksum; /* Modulo 256 checksum must give zero */ + struct irq_info slots[0]; +} __attribute__((packed)); + +extern unsigned int pcibios_irq_mask; + +void pcibios_irq_init(void); +void pcibios_fixup_irqs(void); +void pcibios_enable_irq(struct pci_dev *dev); diff --git a/xen-2.4.16/arch/i386/pci-irq.c b/xen-2.4.16/arch/i386/pci-irq.c new file mode 100644 index 0000000000..f08f3790bc --- /dev/null +++ b/xen-2.4.16/arch/i386/pci-irq.c @@ -0,0 +1,753 @@ +/* + * Low-Level PCI Support for PC -- Routing of Interrupts + * + * (c) 1999--2000 Martin Mares + */ + +#include +#include +//#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "pci-i386.h" + +#define PIRQ_SIGNATURE (('$' << 0) + ('P' << 8) + ('I' << 16) + ('R' << 24)) +#define PIRQ_VERSION 0x0100 + +static struct irq_routing_table *pirq_table; + +/* + * Never use: 0, 1, 2 (timer, keyboard, and cascade) + * Avoid using: 13, 14 and 15 (FP error and IDE). + * Penalize: 3, 4, 6, 7, 12 (known ISA uses: serial, floppy, parallel and mouse) + */ +unsigned int pcibios_irq_mask = 0xfff8; + +static int pirq_penalty[16] = { + 1000000, 1000000, 1000000, 1000, 1000, 0, 1000, 1000, + 0, 0, 0, 0, 1000, 100000, 100000, 100000 +}; + +struct irq_router { + char *name; + u16 vendor, device; + int (*get)(struct pci_dev *router, struct pci_dev *dev, int pirq); + int (*set)(struct pci_dev *router, struct pci_dev *dev, int pirq, int new); +}; + +/* + * Search 0xf0000 -- 0xfffff for the PCI IRQ Routing Table. + */ + +static struct irq_routing_table * __init pirq_find_routing_table(void) +{ + u8 *addr; + struct irq_routing_table *rt; + int i; + u8 sum; + + for(addr = (u8 *) __va(0xf0000); addr < (u8 *) __va(0x100000); addr += 16) { + rt = (struct irq_routing_table *) addr; + if (rt->signature != PIRQ_SIGNATURE || + rt->version != PIRQ_VERSION || + rt->size % 16 || + rt->size < sizeof(struct irq_routing_table)) + continue; + sum = 0; + for(i=0; isize; i++) + sum += addr[i]; + if (!sum) { + DBG("PCI: Interrupt Routing Table found at 0x%p\n", rt); + return rt; + } + } + return NULL; +} + +/* + * If we have a IRQ routing table, use it to search for peer host + * bridges. It's a gross hack, but since there are no other known + * ways how to get a list of buses, we have to go this way. + */ + +static void __init pirq_peer_trick(void) +{ + struct irq_routing_table *rt = pirq_table; + u8 busmap[256]; + int i; + struct irq_info *e; + + memset(busmap, 0, sizeof(busmap)); + for(i=0; i < (rt->size - sizeof(struct irq_routing_table)) / sizeof(struct irq_info); i++) { + e = &rt->slots[i]; +#ifdef DEBUG + { + int j; + DBG("%02x:%02x slot=%02x", e->bus, e->devfn/8, e->slot); + for(j=0; j<4; j++) + DBG(" %d:%02x/%04x", j, e->irq[j].link, e->irq[j].bitmap); + DBG("\n"); + } +#endif + busmap[e->bus] = 1; + } + for(i=1; i<256; i++) + /* + * It might be a secondary bus, but in this case its parent is already + * known (ascending bus order) and therefore pci_scan_bus returns immediately. + */ + if (busmap[i] && pci_scan_bus(i, pci_root_bus->ops, NULL)) + printk(KERN_INFO "PCI: Discovered primary peer bus %02x [IRQ]\n", i); + pcibios_last_bus = -1; +} + +/* + * Code for querying and setting of IRQ routes on various interrupt routers. + */ + +static void eisa_set_level_irq(unsigned int irq) +{ + unsigned char mask = 1 << (irq & 7); + unsigned int port = 0x4d0 + (irq >> 3); + unsigned char val = inb(port); + + if (!(val & mask)) { + DBG(" -> edge"); + outb(val | mask, port); + } +} + +/* + * Common IRQ routing practice: nybbles in config space, + * offset by some magic constant. + */ +static unsigned int read_config_nybble(struct pci_dev *router, unsigned offset, unsigned nr) +{ + u8 x; + unsigned reg = offset + (nr >> 1); + + pci_read_config_byte(router, reg, &x); + return (nr & 1) ? (x >> 4) : (x & 0xf); +} + +static void write_config_nybble(struct pci_dev *router, unsigned offset, unsigned nr, unsigned int val) +{ + u8 x; + unsigned reg = offset + (nr >> 1); + + pci_read_config_byte(router, reg, &x); + x = (nr & 1) ? ((x & 0x0f) | (val << 4)) : ((x & 0xf0) | val); + pci_write_config_byte(router, reg, x); +} + +/* + * ALI pirq entries are damn ugly, and completely undocumented. + * This has been figured out from pirq tables, and it's not a pretty + * picture. + */ +static int pirq_ali_get(struct pci_dev *router, struct pci_dev *dev, int pirq) +{ + static unsigned char irqmap[16] = { 0, 9, 3, 10, 4, 5, 7, 6, 1, 11, 0, 12, 0, 14, 0, 15 }; + + return irqmap[read_config_nybble(router, 0x48, pirq-1)]; +} + +static int pirq_ali_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) +{ + static unsigned char irqmap[16] = { 0, 8, 0, 2, 4, 5, 7, 6, 0, 1, 3, 9, 11, 0, 13, 15 }; + unsigned int val = irqmap[irq]; + + if (val) { + write_config_nybble(router, 0x48, pirq-1, val); + return 1; + } + return 0; +} + +/* + * The Intel PIIX4 pirq rules are fairly simple: "pirq" is + * just a pointer to the config space. + */ +static int pirq_piix_get(struct pci_dev *router, struct pci_dev *dev, int pirq) +{ + u8 x; + + pci_read_config_byte(router, pirq, &x); + return (x < 16) ? x : 0; +} + +static int pirq_piix_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) +{ + pci_write_config_byte(router, pirq, irq); + return 1; +} + +/* + * The VIA pirq rules are nibble-based, like ALI, + * but without the ugly irq number munging. + */ +static int pirq_via_get(struct pci_dev *router, struct pci_dev *dev, int pirq) +{ + return read_config_nybble(router, 0x55, pirq); +} + +static int pirq_via_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) +{ + write_config_nybble(router, 0x55, pirq, irq); + return 1; +} + +/* + * OPTI: high four bits are nibble pointer.. + * I wonder what the low bits do? + */ +static int pirq_opti_get(struct pci_dev *router, struct pci_dev *dev, int pirq) +{ + return read_config_nybble(router, 0xb8, pirq >> 4); +} + +static int pirq_opti_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) +{ + write_config_nybble(router, 0xb8, pirq >> 4, irq); + return 1; +} + +/* + * Cyrix: nibble offset 0x5C + */ +static int pirq_cyrix_get(struct pci_dev *router, struct pci_dev *dev, int pirq) +{ + return read_config_nybble(router, 0x5C, pirq-1); +} + +static int pirq_cyrix_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) +{ + write_config_nybble(router, 0x5C, pirq-1, irq); + return 1; +} + +/* + * PIRQ routing for SiS 85C503 router used in several SiS chipsets + * According to the SiS 5595 datasheet (preliminary V1.0, 12/24/1997) + * the related registers work as follows: + * + * general: one byte per re-routable IRQ, + * bit 7 IRQ mapping enabled (0) or disabled (1) + * bits [6:4] reserved + * bits [3:0] IRQ to map to + * allowed: 3-7, 9-12, 14-15 + * reserved: 0, 1, 2, 8, 13 + * + * individual registers in device config space: + * + * 0x41/0x42/0x43/0x44: PCI INT A/B/C/D - bits as in general case + * + * 0x61: IDEIRQ: bits as in general case - but: + * bits [6:5] must be written 01 + * bit 4 channel-select primary (0), secondary (1) + * + * 0x62: USBIRQ: bits as in general case - but: + * bit 4 OHCI function disabled (0), enabled (1) + * + * 0x6a: ACPI/SCI IRQ - bits as in general case + * + * 0x7e: Data Acq. Module IRQ - bits as in general case + * + * Apparently there are systems implementing PCI routing table using both + * link values 0x01-0x04 and 0x41-0x44 for PCI INTA..D, but register offsets + * like 0x62 as link values for USBIRQ e.g. So there is no simple + * "register = offset + pirq" relation. + * Currently we support PCI INTA..D and USBIRQ and try our best to handle + * both link mappings. + * IDE/ACPI/DAQ mapping is currently unsupported (left untouched as set by BIOS). + */ + +static int pirq_sis_get(struct pci_dev *router, struct pci_dev *dev, int pirq) +{ + u8 x; + int reg = pirq; + + switch(pirq) { + case 0x01: + case 0x02: + case 0x03: + case 0x04: + reg += 0x40; + case 0x41: + case 0x42: + case 0x43: + case 0x44: + case 0x62: + pci_read_config_byte(router, reg, &x); + if (reg != 0x62) + break; + if (!(x & 0x40)) + return 0; + break; + case 0x61: + case 0x6a: + case 0x7e: + printk(KERN_INFO "SiS pirq: advanced IDE/ACPI/DAQ mapping not yet implemented\n"); + return 0; + default: + printk(KERN_INFO "SiS router pirq escape (%d)\n", pirq); + return 0; + } + return (x & 0x80) ? 0 : (x & 0x0f); +} + +static int pirq_sis_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) +{ + u8 x; + int reg = pirq; + + switch(pirq) { + case 0x01: + case 0x02: + case 0x03: + case 0x04: + reg += 0x40; + case 0x41: + case 0x42: + case 0x43: + case 0x44: + case 0x62: + x = (irq&0x0f) ? (irq&0x0f) : 0x80; + if (reg != 0x62) + break; + /* always mark OHCI enabled, as nothing else knows about this */ + x |= 0x40; + break; + case 0x61: + case 0x6a: + case 0x7e: + printk(KERN_INFO "advanced SiS pirq mapping not yet implemented\n"); + return 0; + default: + printk(KERN_INFO "SiS router pirq escape (%d)\n", pirq); + return 0; + } + pci_write_config_byte(router, reg, x); + + return 1; +} + +/* + * VLSI: nibble offset 0x74 - educated guess due to routing table and + * config space of VLSI 82C534 PCI-bridge/router (1004:0102) + * Tested on HP OmniBook 800 covering PIRQ 1, 2, 4, 8 for onboard + * devices, PIRQ 3 for non-pci(!) soundchip and (untested) PIRQ 6 + * for the busbridge to the docking station. + */ + +static int pirq_vlsi_get(struct pci_dev *router, struct pci_dev *dev, int pirq) +{ + if (pirq > 8) { + printk(KERN_INFO "VLSI router pirq escape (%d)\n", pirq); + return 0; + } + return read_config_nybble(router, 0x74, pirq-1); +} + +static int pirq_vlsi_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) +{ + if (pirq > 8) { + printk(KERN_INFO "VLSI router pirq escape (%d)\n", pirq); + return 0; + } + write_config_nybble(router, 0x74, pirq-1, irq); + return 1; +} + +/* + * ServerWorks: PCI interrupts mapped to system IRQ lines through Index + * and Redirect I/O registers (0x0c00 and 0x0c01). The Index register + * format is (PCIIRQ## | 0x10), e.g.: PCIIRQ10=0x1a. The Redirect + * register is a straight binary coding of desired PIC IRQ (low nibble). + * + * The 'link' value in the PIRQ table is already in the correct format + * for the Index register. There are some special index values: + * 0x00 for ACPI (SCI), 0x01 for USB, 0x02 for IDE0, 0x04 for IDE1, + * and 0x03 for SMBus. + */ +static int pirq_serverworks_get(struct pci_dev *router, struct pci_dev *dev, int pirq) +{ + outb_p(pirq, 0xc00); + return inb(0xc01) & 0xf; +} + +static int pirq_serverworks_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) +{ + outb_p(pirq, 0xc00); + outb_p(irq, 0xc01); + return 1; +} + +/* Support for AMD756 PCI IRQ Routing + * Jhon H. Caicedo + * Jun/21/2001 0.2.0 Release, fixed to use "nybble" functions... (jhcaiced) + * Jun/19/2001 Alpha Release 0.1.0 (jhcaiced) + * The AMD756 pirq rules are nibble-based + * offset 0x56 0-3 PIRQA 4-7 PIRQB + * offset 0x57 0-3 PIRQC 4-7 PIRQD + */ +static int pirq_amd756_get(struct pci_dev *router, struct pci_dev *dev, int pirq) +{ + u8 irq; + irq = 0; + if (pirq <= 4) + { + irq = read_config_nybble(router, 0x56, pirq - 1); + } + printk(KERN_INFO "AMD756: dev %04x:%04x, router pirq : %d get irq : %2d\n", + dev->vendor, dev->device, pirq, irq); + return irq; +} + +static int pirq_amd756_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) +{ + printk(KERN_INFO "AMD756: dev %04x:%04x, router pirq : %d SET irq : %2d\n", + dev->vendor, dev->device, pirq, irq); + if (pirq <= 4) + { + write_config_nybble(router, 0x56, pirq - 1, irq); + } + return 1; +} + +#ifdef CONFIG_PCI_BIOS + +static int pirq_bios_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) +{ + struct pci_dev *bridge; + int pin = pci_get_interrupt_pin(dev, &bridge); + return pcibios_set_irq_routing(bridge, pin, irq); +} + +static struct irq_router pirq_bios_router = + { "BIOS", 0, 0, NULL, pirq_bios_set }; + +#endif + +static struct irq_router pirq_routers[] = { + { "PIIX", PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82371FB_0, pirq_piix_get, pirq_piix_set }, + { "PIIX", PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82371SB_0, pirq_piix_get, pirq_piix_set }, + { "PIIX", PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82371AB_0, pirq_piix_get, pirq_piix_set }, + { "PIIX", PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82371MX, pirq_piix_get, pirq_piix_set }, + { "PIIX", PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82443MX_0, pirq_piix_get, pirq_piix_set }, + { "PIIX", PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801BA_0, pirq_piix_get, pirq_piix_set }, + + { "ALI", PCI_VENDOR_ID_AL, PCI_DEVICE_ID_AL_M1533, pirq_ali_get, pirq_ali_set }, + + { "VIA", PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C586_0, pirq_via_get, pirq_via_set }, + { "VIA", PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C596, pirq_via_get, pirq_via_set }, + { "VIA", PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C686, pirq_via_get, pirq_via_set }, + + { "OPTI", PCI_VENDOR_ID_OPTI, PCI_DEVICE_ID_OPTI_82C700, pirq_opti_get, pirq_opti_set }, + + { "NatSemi", PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5520, pirq_cyrix_get, pirq_cyrix_set }, + { "SIS", PCI_VENDOR_ID_SI, PCI_DEVICE_ID_SI_503, pirq_sis_get, pirq_sis_set }, + { "VLSI 82C534", PCI_VENDOR_ID_VLSI, PCI_DEVICE_ID_VLSI_82C534, pirq_vlsi_get, pirq_vlsi_set }, + { "ServerWorks", PCI_VENDOR_ID_SERVERWORKS, PCI_DEVICE_ID_SERVERWORKS_OSB4, + pirq_serverworks_get, pirq_serverworks_set }, + { "ServerWorks", PCI_VENDOR_ID_SERVERWORKS, PCI_DEVICE_ID_SERVERWORKS_CSB5, + pirq_serverworks_get, pirq_serverworks_set }, + { "AMD756 VIPER", PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_VIPER_740B, + pirq_amd756_get, pirq_amd756_set }, + + { "default", 0, 0, NULL, NULL } +}; + +static struct irq_router *pirq_router; +static struct pci_dev *pirq_router_dev; + +static void __init pirq_find_router(void) +{ + struct irq_routing_table *rt = pirq_table; + struct irq_router *r; + +#ifdef CONFIG_PCI_BIOS + if (!rt->signature) { + printk(KERN_INFO "PCI: Using BIOS for IRQ routing\n"); + pirq_router = &pirq_bios_router; + return; + } +#endif + + DBG("PCI: Attempting to find IRQ router for %04x:%04x\n", + rt->rtr_vendor, rt->rtr_device); + + /* fall back to default router if nothing else found */ + pirq_router = &pirq_routers[ARRAY_SIZE(pirq_routers) - 1]; + + pirq_router_dev = pci_find_slot(rt->rtr_bus, rt->rtr_devfn); + if (!pirq_router_dev) { + DBG("PCI: Interrupt router not found at %02x:%02x\n", rt->rtr_bus, rt->rtr_devfn); + return; + } + + for(r=pirq_routers; r->vendor; r++) { + /* Exact match against router table entry? Use it! */ + if (r->vendor == rt->rtr_vendor && r->device == rt->rtr_device) { + pirq_router = r; + break; + } + /* Match against router device entry? Use it as a fallback */ + if (r->vendor == pirq_router_dev->vendor && r->device == pirq_router_dev->device) { + pirq_router = r; + } + } + printk(KERN_INFO "PCI: Using IRQ router %s [%04x/%04x] at %s\n", + pirq_router->name, + pirq_router_dev->vendor, + pirq_router_dev->device, + pirq_router_dev->slot_name); +} + +static struct irq_info *pirq_get_info(struct pci_dev *dev) +{ + struct irq_routing_table *rt = pirq_table; + int entries = (rt->size - sizeof(struct irq_routing_table)) / sizeof(struct irq_info); + struct irq_info *info; + + for (info = rt->slots; entries--; info++) + if (info->bus == dev->bus->number && PCI_SLOT(info->devfn) == PCI_SLOT(dev->devfn)) + return info; + return NULL; +} + +static void pcibios_test_irq_handler(int irq, void *dev_id, struct pt_regs *regs) +{ +} + +static int pcibios_lookup_irq(struct pci_dev *dev, int assign) +{ + u8 pin; + struct irq_info *info; + int i, pirq, newirq; + int irq = 0; + u32 mask; + struct irq_router *r = pirq_router; + struct pci_dev *dev2; + char *msg = NULL; + + if (!pirq_table) + return 0; + + /* Find IRQ routing entry */ + pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin); + if (!pin) { + DBG(" -> no interrupt pin\n"); + return 0; + } + pin = pin - 1; + + DBG("IRQ for %s:%d", dev->slot_name, pin); + info = pirq_get_info(dev); + if (!info) { + DBG(" -> not found in routing table\n"); + return 0; + } + pirq = info->irq[pin].link; + mask = info->irq[pin].bitmap; + if (!pirq) { + DBG(" -> not routed\n"); + return 0; + } + DBG(" -> PIRQ %02x, mask %04x, excl %04x", pirq, mask, pirq_table->exclusive_irqs); + mask &= pcibios_irq_mask; + + /* + * Find the best IRQ to assign: use the one + * reported by the device if possible. + */ + newirq = dev->irq; + if (!newirq && assign) { + for (i = 0; i < 16; i++) { + if (!(mask & (1 << i))) + continue; + if (pirq_penalty[i] < pirq_penalty[newirq] && + !request_irq(i, pcibios_test_irq_handler, SA_SHIRQ, "pci-test", dev)) { + free_irq(i, dev); + newirq = i; + } + } + } + DBG(" -> newirq=%d", newirq); + + /* Check if it is hardcoded */ + if ((pirq & 0xf0) == 0xf0) { + irq = pirq & 0xf; + DBG(" -> hardcoded IRQ %d\n", irq); + msg = "Hardcoded"; + } else if (r->get && (irq = r->get(pirq_router_dev, dev, pirq))) { + DBG(" -> got IRQ %d\n", irq); + msg = "Found"; + } else if (newirq && r->set && (dev->class >> 8) != PCI_CLASS_DISPLAY_VGA) { + DBG(" -> assigning IRQ %d", newirq); + if (r->set(pirq_router_dev, dev, pirq, newirq)) { + eisa_set_level_irq(newirq); + DBG(" ... OK\n"); + msg = "Assigned"; + irq = newirq; + } + } + + if (!irq) { + DBG(" ... failed\n"); + if (newirq && mask == (1 << newirq)) { + msg = "Guessed"; + irq = newirq; + } else + return 0; + } + printk(KERN_INFO "PCI: %s IRQ %d for device %s\n", msg, irq, dev->slot_name); + + /* Update IRQ for all devices with the same pirq value */ + pci_for_each_dev(dev2) { + pci_read_config_byte(dev2, PCI_INTERRUPT_PIN, &pin); + if (!pin) + continue; + pin--; + info = pirq_get_info(dev2); + if (!info) + continue; + if (info->irq[pin].link == pirq) { + /* We refuse to override the dev->irq information. Give a warning! */ + if (dev2->irq && dev2->irq != irq) { + printk(KERN_INFO "IRQ routing conflict for %s, have irq %d, want irq %d\n", + dev2->slot_name, dev2->irq, irq); + continue; + } + dev2->irq = irq; + pirq_penalty[irq]++; + if (dev != dev2) + printk(KERN_INFO "PCI: Sharing IRQ %d with %s\n", irq, dev2->slot_name); + } + } + return 1; +} + +void __init pcibios_irq_init(void) +{ + DBG("PCI: IRQ init\n"); + pirq_table = pirq_find_routing_table(); +#ifdef CONFIG_PCI_BIOS + if (!pirq_table && (pci_probe & PCI_BIOS_IRQ_SCAN)) + pirq_table = pcibios_get_irq_routing_table(); +#endif + if (pirq_table) { + pirq_peer_trick(); + pirq_find_router(); + if (pirq_table->exclusive_irqs) { + int i; + for (i=0; i<16; i++) + if (!(pirq_table->exclusive_irqs & (1 << i))) + pirq_penalty[i] += 100; + } + /* If we're using the I/O APIC, avoid using the PCI IRQ routing table */ + if (io_apic_assign_pci_irqs) + pirq_table = NULL; + } +} + +void __init pcibios_fixup_irqs(void) +{ + struct pci_dev *dev; + u8 pin; + + DBG("PCI: IRQ fixup\n"); + pci_for_each_dev(dev) { + /* + * If the BIOS has set an out of range IRQ number, just ignore it. + * Also keep track of which IRQ's are already in use. + */ + if (dev->irq >= 16) { + DBG("%s: ignoring bogus IRQ %d\n", dev->slot_name, dev->irq); + dev->irq = 0; + } + /* If the IRQ is already assigned to a PCI device, ignore its ISA use penalty */ + if (pirq_penalty[dev->irq] >= 100 && pirq_penalty[dev->irq] < 100000) + pirq_penalty[dev->irq] = 0; + pirq_penalty[dev->irq]++; + } + + pci_for_each_dev(dev) { + pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin); +#ifdef CONFIG_X86_IO_APIC + /* + * Recalculate IRQ numbers if we use the I/O APIC. + */ + if (io_apic_assign_pci_irqs) + { + int irq; + + if (pin) { + pin--; /* interrupt pins are numbered starting from 1 */ + irq = IO_APIC_get_PCI_irq_vector(dev->bus->number, PCI_SLOT(dev->devfn), pin); + /* + * Busses behind bridges are typically not listed in the MP-table. + * In this case we have to look up the IRQ based on the parent bus, + * parent slot, and pin number. The SMP code detects such bridged + * busses itself so we should get into this branch reliably. + */ + if (irq < 0 && dev->bus->parent) { /* go back to the bridge */ + struct pci_dev * bridge = dev->bus->self; + + pin = (pin + PCI_SLOT(dev->devfn)) % 4; + irq = IO_APIC_get_PCI_irq_vector(bridge->bus->number, + PCI_SLOT(bridge->devfn), pin); + if (irq >= 0) + printk(KERN_WARNING "PCI: using PPB(B%d,I%d,P%d) to get irq %d\n", + bridge->bus->number, PCI_SLOT(bridge->devfn), pin, irq); + } + if (irq >= 0) { + printk(KERN_INFO "PCI->APIC IRQ transform: (B%d,I%d,P%d) -> %d\n", + dev->bus->number, PCI_SLOT(dev->devfn), pin, irq); + dev->irq = irq; + } + } + } +#endif + /* + * Still no IRQ? Try to lookup one... + */ + if (pin && !dev->irq) + pcibios_lookup_irq(dev, 0); + } +} + +void pcibios_penalize_isa_irq(int irq) +{ + /* + * If any ISAPnP device reports an IRQ in its list of possible + * IRQ's, we try to avoid assigning it to PCI devices. + */ + pirq_penalty[irq] += 100; +} + +void pcibios_enable_irq(struct pci_dev *dev) +{ + u8 pin; + pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin); + if (pin && !pcibios_lookup_irq(dev, 1) && !dev->irq) { + char *msg; + if (io_apic_assign_pci_irqs) + msg = " Probably buggy MP table."; + else if (pci_probe & PCI_BIOS_IRQ_SCAN) + msg = ""; + else + msg = " Please try using pci=biosirq."; + printk(KERN_WARNING "PCI: No IRQ known for interrupt pin %c of device %s.%s\n", + 'A' + pin - 1, dev->slot_name, msg); + } +} diff --git a/xen-2.4.16/arch/i386/pci-pc.c b/xen-2.4.16/arch/i386/pci-pc.c new file mode 100644 index 0000000000..7b02d3f4d3 --- /dev/null +++ b/xen-2.4.16/arch/i386/pci-pc.c @@ -0,0 +1,1276 @@ +/* + * Low-Level PCI Support for PC + * + * (c) 1999--2000 Martin Mares + */ + +#include +#include +//#include +#include +#include +#include +#include + +//#include +#include + +#include "pci-i386.h" + +#define __KERNEL_CS __HYPERVISOR_CS +#define __KERNEL_DS __HYPERVISOR_DS + +unsigned int pci_probe = PCI_PROBE_BIOS | PCI_PROBE_CONF1 | PCI_PROBE_CONF2; + +int pcibios_last_bus = -1; +struct pci_bus *pci_root_bus = NULL; +struct pci_ops *pci_root_ops = NULL; + +int (*pci_config_read)(int seg, int bus, int dev, int fn, int reg, int len, u32 *value) = NULL; +int (*pci_config_write)(int seg, int bus, int dev, int fn, int reg, int len, u32 value) = NULL; + +/* + * This interrupt-safe spinlock protects all accesses to PCI + * configuration space. + */ +spinlock_t pci_config_lock = SPIN_LOCK_UNLOCKED; + + +/* + * Functions for accessing PCI configuration space with type 1 accesses + */ + +#ifdef CONFIG_PCI_DIRECT + +#define PCI_CONF1_ADDRESS(bus, dev, fn, reg) \ + (0x80000000 | (bus << 16) | (dev << 11) | (fn << 8) | (reg & ~3)) + +static int pci_conf1_read (int seg, int bus, int dev, int fn, int reg, int len, u32 *value) +{ + unsigned long flags; + + if (!value || (bus > 255) || (dev > 31) || (fn > 7) || (reg > 255)) + return -EINVAL; + + spin_lock_irqsave(&pci_config_lock, flags); + + outl(PCI_CONF1_ADDRESS(bus, dev, fn, reg), 0xCF8); + + switch (len) { + case 1: + *value = inb(0xCFC + (reg & 3)); + break; + case 2: + *value = inw(0xCFC + (reg & 2)); + break; + case 4: + *value = inl(0xCFC); + break; + } + + spin_unlock_irqrestore(&pci_config_lock, flags); + + return 0; +} + +static int pci_conf1_write (int seg, int bus, int dev, int fn, int reg, int len, u32 value) +{ + unsigned long flags; + + if ((bus > 255) || (dev > 31) || (fn > 7) || (reg > 255)) + return -EINVAL; + + spin_lock_irqsave(&pci_config_lock, flags); + + outl(PCI_CONF1_ADDRESS(bus, dev, fn, reg), 0xCF8); + + switch (len) { + case 1: + outb((u8)value, 0xCFC + (reg & 3)); + break; + case 2: + outw((u16)value, 0xCFC + (reg & 2)); + break; + case 4: + outl((u32)value, 0xCFC); + break; + } + + spin_unlock_irqrestore(&pci_config_lock, flags); + + return 0; +} + +#undef PCI_CONF1_ADDRESS + +static int pci_conf1_read_config_byte(struct pci_dev *dev, int where, u8 *value) +{ + int result; + u32 data; + + if (!value) + return -EINVAL; + + result = pci_conf1_read(0, dev->bus->number, PCI_SLOT(dev->devfn), + PCI_FUNC(dev->devfn), where, 1, &data); + + *value = (u8)data; + + return result; +} + +static int pci_conf1_read_config_word(struct pci_dev *dev, int where, u16 *value) +{ + int result; + u32 data; + + if (!value) + return -EINVAL; + + result = pci_conf1_read(0, dev->bus->number, PCI_SLOT(dev->devfn), + PCI_FUNC(dev->devfn), where, 2, &data); + + *value = (u16)data; + + return result; +} + +static int pci_conf1_read_config_dword(struct pci_dev *dev, int where, u32 *value) +{ + if (!value) + return -EINVAL; + + return pci_conf1_read(0, dev->bus->number, PCI_SLOT(dev->devfn), + PCI_FUNC(dev->devfn), where, 4, value); +} + +static int pci_conf1_write_config_byte(struct pci_dev *dev, int where, u8 value) +{ + return pci_conf1_write(0, dev->bus->number, PCI_SLOT(dev->devfn), + PCI_FUNC(dev->devfn), where, 1, value); +} + +static int pci_conf1_write_config_word(struct pci_dev *dev, int where, u16 value) +{ + return pci_conf1_write(0, dev->bus->number, PCI_SLOT(dev->devfn), + PCI_FUNC(dev->devfn), where, 2, value); +} + +static int pci_conf1_write_config_dword(struct pci_dev *dev, int where, u32 value) +{ + return pci_conf1_write(0, dev->bus->number, PCI_SLOT(dev->devfn), + PCI_FUNC(dev->devfn), where, 4, value); +} + +static struct pci_ops pci_direct_conf1 = { + pci_conf1_read_config_byte, + pci_conf1_read_config_word, + pci_conf1_read_config_dword, + pci_conf1_write_config_byte, + pci_conf1_write_config_word, + pci_conf1_write_config_dword +}; + + +/* + * Functions for accessing PCI configuration space with type 2 accesses + */ + +#define PCI_CONF2_ADDRESS(dev, reg) (u16)(0xC000 | (dev << 8) | reg) + +static int pci_conf2_read (int seg, int bus, int dev, int fn, int reg, int len, u32 *value) +{ + unsigned long flags; + + if (!value || (bus > 255) || (dev > 31) || (fn > 7) || (reg > 255)) + return -EINVAL; + + if (dev & 0x10) + return PCIBIOS_DEVICE_NOT_FOUND; + + spin_lock_irqsave(&pci_config_lock, flags); + + outb((u8)(0xF0 | (fn << 1)), 0xCF8); + outb((u8)bus, 0xCFA); + + switch (len) { + case 1: + *value = inb(PCI_CONF2_ADDRESS(dev, reg)); + break; + case 2: + *value = inw(PCI_CONF2_ADDRESS(dev, reg)); + break; + case 4: + *value = inl(PCI_CONF2_ADDRESS(dev, reg)); + break; + } + + outb (0, 0xCF8); + + spin_unlock_irqrestore(&pci_config_lock, flags); + + return 0; +} + +static int pci_conf2_write (int seg, int bus, int dev, int fn, int reg, int len, u32 value) +{ + unsigned long flags; + + if ((bus > 255) || (dev > 31) || (fn > 7) || (reg > 255)) + return -EINVAL; + + if (dev & 0x10) + return PCIBIOS_DEVICE_NOT_FOUND; + + spin_lock_irqsave(&pci_config_lock, flags); + + outb((u8)(0xF0 | (fn << 1)), 0xCF8); + outb((u8)bus, 0xCFA); + + switch (len) { + case 1: + outb ((u8)value, PCI_CONF2_ADDRESS(dev, reg)); + break; + case 2: + outw ((u16)value, PCI_CONF2_ADDRESS(dev, reg)); + break; + case 4: + outl ((u32)value, PCI_CONF2_ADDRESS(dev, reg)); + break; + } + + outb (0, 0xCF8); + + spin_unlock_irqrestore(&pci_config_lock, flags); + + return 0; +} + +#undef PCI_CONF2_ADDRESS + +static int pci_conf2_read_config_byte(struct pci_dev *dev, int where, u8 *value) +{ + int result; + u32 data; + result = pci_conf2_read(0, dev->bus->number, PCI_SLOT(dev->devfn), + PCI_FUNC(dev->devfn), where, 1, &data); + *value = (u8)data; + return result; +} + +static int pci_conf2_read_config_word(struct pci_dev *dev, int where, u16 *value) +{ + int result; + u32 data; + result = pci_conf2_read(0, dev->bus->number, PCI_SLOT(dev->devfn), + PCI_FUNC(dev->devfn), where, 2, &data); + *value = (u16)data; + return result; +} + +static int pci_conf2_read_config_dword(struct pci_dev *dev, int where, u32 *value) +{ + return pci_conf2_read(0, dev->bus->number, PCI_SLOT(dev->devfn), + PCI_FUNC(dev->devfn), where, 4, value); +} + +static int pci_conf2_write_config_byte(struct pci_dev *dev, int where, u8 value) +{ + return pci_conf2_write(0, dev->bus->number, PCI_SLOT(dev->devfn), + PCI_FUNC(dev->devfn), where, 1, value); +} + +static int pci_conf2_write_config_word(struct pci_dev *dev, int where, u16 value) +{ + return pci_conf2_write(0, dev->bus->number, PCI_SLOT(dev->devfn), + PCI_FUNC(dev->devfn), where, 2, value); +} + +static int pci_conf2_write_config_dword(struct pci_dev *dev, int where, u32 value) +{ + return pci_conf2_write(0, dev->bus->number, PCI_SLOT(dev->devfn), + PCI_FUNC(dev->devfn), where, 4, value); +} + +static struct pci_ops pci_direct_conf2 = { + pci_conf2_read_config_byte, + pci_conf2_read_config_word, + pci_conf2_read_config_dword, + pci_conf2_write_config_byte, + pci_conf2_write_config_word, + pci_conf2_write_config_dword +}; + + +/* + * Before we decide to use direct hardware access mechanisms, we try to do some + * trivial checks to ensure it at least _seems_ to be working -- we just test + * whether bus 00 contains a host bridge (this is similar to checking + * techniques used in XFree86, but ours should be more reliable since we + * attempt to make use of direct access hints provided by the PCI BIOS). + * + * This should be close to trivial, but it isn't, because there are buggy + * chipsets (yes, you guessed it, by Intel and Compaq) that have no class ID. + */ +static int __devinit pci_sanity_check(struct pci_ops *o) +{ + u16 x; + struct pci_bus bus; /* Fake bus and device */ + struct pci_dev dev; + + if (pci_probe & PCI_NO_CHECKS) + return 1; + bus.number = 0; + dev.bus = &bus; + for(dev.devfn=0; dev.devfn < 0x100; dev.devfn++) + if ((!o->read_word(&dev, PCI_CLASS_DEVICE, &x) && + (x == PCI_CLASS_BRIDGE_HOST || x == PCI_CLASS_DISPLAY_VGA)) || + (!o->read_word(&dev, PCI_VENDOR_ID, &x) && + (x == PCI_VENDOR_ID_INTEL || x == PCI_VENDOR_ID_COMPAQ))) + return 1; + DBG("PCI: Sanity check failed\n"); + return 0; +} + +static struct pci_ops * __devinit pci_check_direct(void) +{ + unsigned int tmp; + unsigned long flags; + + __save_flags(flags); __cli(); + + /* + * Check if configuration type 1 works. + */ + if (pci_probe & PCI_PROBE_CONF1) { + outb (0x01, 0xCFB); + tmp = inl (0xCF8); + outl (0x80000000, 0xCF8); + if (inl (0xCF8) == 0x80000000 && + pci_sanity_check(&pci_direct_conf1)) { + outl (tmp, 0xCF8); + __restore_flags(flags); + printk("PCI: Using configuration type 1\n"); + request_region(0xCF8, 8, "PCI conf1"); + return &pci_direct_conf1; + } + outl (tmp, 0xCF8); + } + + /* + * Check if configuration type 2 works. + */ + if (pci_probe & PCI_PROBE_CONF2) { + outb (0x00, 0xCFB); + outb (0x00, 0xCF8); + outb (0x00, 0xCFA); + if (inb (0xCF8) == 0x00 && inb (0xCFA) == 0x00 && + pci_sanity_check(&pci_direct_conf2)) { + __restore_flags(flags); + printk("PCI: Using configuration type 2\n"); + request_region(0xCF8, 4, "PCI conf2"); + return &pci_direct_conf2; + } + } + + __restore_flags(flags); + return NULL; +} + +#endif + +/* + * BIOS32 and PCI BIOS handling. + */ + +#ifdef CONFIG_PCI_BIOS + +#define PCIBIOS_PCI_FUNCTION_ID 0xb1XX +#define PCIBIOS_PCI_BIOS_PRESENT 0xb101 +#define PCIBIOS_FIND_PCI_DEVICE 0xb102 +#define PCIBIOS_FIND_PCI_CLASS_CODE 0xb103 +#define PCIBIOS_GENERATE_SPECIAL_CYCLE 0xb106 +#define PCIBIOS_READ_CONFIG_BYTE 0xb108 +#define PCIBIOS_READ_CONFIG_WORD 0xb109 +#define PCIBIOS_READ_CONFIG_DWORD 0xb10a +#define PCIBIOS_WRITE_CONFIG_BYTE 0xb10b +#define PCIBIOS_WRITE_CONFIG_WORD 0xb10c +#define PCIBIOS_WRITE_CONFIG_DWORD 0xb10d +#define PCIBIOS_GET_ROUTING_OPTIONS 0xb10e +#define PCIBIOS_SET_PCI_HW_INT 0xb10f + +/* BIOS32 signature: "_32_" */ +#define BIOS32_SIGNATURE (('_' << 0) + ('3' << 8) + ('2' << 16) + ('_' << 24)) + +/* PCI signature: "PCI " */ +#define PCI_SIGNATURE (('P' << 0) + ('C' << 8) + ('I' << 16) + (' ' << 24)) + +/* PCI service signature: "$PCI" */ +#define PCI_SERVICE (('$' << 0) + ('P' << 8) + ('C' << 16) + ('I' << 24)) + +/* PCI BIOS hardware mechanism flags */ +#define PCIBIOS_HW_TYPE1 0x01 +#define PCIBIOS_HW_TYPE2 0x02 +#define PCIBIOS_HW_TYPE1_SPEC 0x10 +#define PCIBIOS_HW_TYPE2_SPEC 0x20 + +/* + * This is the standard structure used to identify the entry point + * to the BIOS32 Service Directory, as documented in + * Standard BIOS 32-bit Service Directory Proposal + * Revision 0.4 May 24, 1993 + * Phoenix Technologies Ltd. + * Norwood, MA + * and the PCI BIOS specification. + */ + +union bios32 { + struct { + unsigned long signature; /* _32_ */ + unsigned long entry; /* 32 bit physical address */ + unsigned char revision; /* Revision level, 0 */ + unsigned char length; /* Length in paragraphs should be 01 */ + unsigned char checksum; /* All bytes must add up to zero */ + unsigned char reserved[5]; /* Must be zero */ + } fields; + char chars[16]; +}; + +/* + * Physical address of the service directory. I don't know if we're + * allowed to have more than one of these or not, so just in case + * we'll make pcibios_present() take a memory start parameter and store + * the array there. + */ + +static struct { + unsigned long address; + unsigned short segment; +} bios32_indirect = { 0, __KERNEL_CS }; + +/* + * Returns the entry point for the given service, NULL on error + */ + +static unsigned long bios32_service(unsigned long service) +{ + unsigned char return_code; /* %al */ + unsigned long address; /* %ebx */ + unsigned long length; /* %ecx */ + unsigned long entry; /* %edx */ + unsigned long flags; + + __save_flags(flags); __cli(); + __asm__("lcall *(%%edi); cld" + : "=a" (return_code), + "=b" (address), + "=c" (length), + "=d" (entry) + : "0" (service), + "1" (0), + "D" (&bios32_indirect)); + __restore_flags(flags); + + switch (return_code) { + case 0: + return address + entry; + case 0x80: /* Not present */ + printk("bios32_service(0x%lx): not present\n", service); + return 0; + default: /* Shouldn't happen */ + printk("bios32_service(0x%lx): returned 0x%x -- BIOS bug!\n", + service, return_code); + return 0; + } +} + +static struct { + unsigned long address; + unsigned short segment; +} pci_indirect = { 0, __KERNEL_CS }; + +static int pci_bios_present; + +static int __devinit check_pcibios(void) +{ + u32 signature, eax, ebx, ecx; + u8 status, major_ver, minor_ver, hw_mech; + unsigned long flags, pcibios_entry; + + if ((pcibios_entry = bios32_service(PCI_SERVICE))) { + pci_indirect.address = pcibios_entry + PAGE_OFFSET; + + __save_flags(flags); __cli(); + __asm__( + "lcall *(%%edi); cld\n\t" + "jc 1f\n\t" + "xor %%ah, %%ah\n" + "1:" + : "=d" (signature), + "=a" (eax), + "=b" (ebx), + "=c" (ecx) + : "1" (PCIBIOS_PCI_BIOS_PRESENT), + "D" (&pci_indirect) + : "memory"); + __restore_flags(flags); + + status = (eax >> 8) & 0xff; + hw_mech = eax & 0xff; + major_ver = (ebx >> 8) & 0xff; + minor_ver = ebx & 0xff; + if (pcibios_last_bus < 0) + pcibios_last_bus = ecx & 0xff; + DBG("PCI: BIOS probe returned s=%02x hw=%02x ver=%02x.%02x l=%02x\n", + status, hw_mech, major_ver, minor_ver, pcibios_last_bus); + if (status || signature != PCI_SIGNATURE) { + printk (KERN_ERR "PCI: BIOS BUG #%x[%08x] found\n", + status, signature); + return 0; + } + printk("PCI: PCI BIOS revision %x.%02x entry at 0x%lx, last bus=%d\n", + major_ver, minor_ver, pcibios_entry, pcibios_last_bus); +#ifdef CONFIG_PCI_DIRECT + if (!(hw_mech & PCIBIOS_HW_TYPE1)) + pci_probe &= ~PCI_PROBE_CONF1; + if (!(hw_mech & PCIBIOS_HW_TYPE2)) + pci_probe &= ~PCI_PROBE_CONF2; +#endif + return 1; + } + return 0; +} + +static int __devinit pci_bios_find_device (unsigned short vendor, unsigned short device_id, + unsigned short index, unsigned char *bus, unsigned char *device_fn) +{ + unsigned short bx; + unsigned short ret; + + __asm__("lcall *(%%edi); cld\n\t" + "jc 1f\n\t" + "xor %%ah, %%ah\n" + "1:" + : "=b" (bx), + "=a" (ret) + : "1" (PCIBIOS_FIND_PCI_DEVICE), + "c" (device_id), + "d" (vendor), + "S" ((int) index), + "D" (&pci_indirect)); + *bus = (bx >> 8) & 0xff; + *device_fn = bx & 0xff; + return (int) (ret & 0xff00) >> 8; +} + +static int pci_bios_read (int seg, int bus, int dev, int fn, int reg, int len, u32 *value) +{ + unsigned long result = 0; + unsigned long flags; + unsigned long bx = ((bus << 8) | (dev << 3) | fn); + + if (!value || (bus > 255) || (dev > 31) || (fn > 7) || (reg > 255)) + return -EINVAL; + + spin_lock_irqsave(&pci_config_lock, flags); + + switch (len) { + case 1: + __asm__("lcall *(%%esi); cld\n\t" + "jc 1f\n\t" + "xor %%ah, %%ah\n" + "1:" + : "=c" (*value), + "=a" (result) + : "1" (PCIBIOS_READ_CONFIG_BYTE), + "b" (bx), + "D" ((long)reg), + "S" (&pci_indirect)); + break; + case 2: + __asm__("lcall *(%%esi); cld\n\t" + "jc 1f\n\t" + "xor %%ah, %%ah\n" + "1:" + : "=c" (*value), + "=a" (result) + : "1" (PCIBIOS_READ_CONFIG_WORD), + "b" (bx), + "D" ((long)reg), + "S" (&pci_indirect)); + break; + case 4: + __asm__("lcall *(%%esi); cld\n\t" + "jc 1f\n\t" + "xor %%ah, %%ah\n" + "1:" + : "=c" (*value), + "=a" (result) + : "1" (PCIBIOS_READ_CONFIG_DWORD), + "b" (bx), + "D" ((long)reg), + "S" (&pci_indirect)); + break; + } + + spin_unlock_irqrestore(&pci_config_lock, flags); + + return (int)((result & 0xff00) >> 8); +} + +static int pci_bios_write (int seg, int bus, int dev, int fn, int reg, int len, u32 value) +{ + unsigned long result = 0; + unsigned long flags; + unsigned long bx = ((bus << 8) | (dev << 3) | fn); + + if ((bus > 255) || (dev > 31) || (fn > 7) || (reg > 255)) + return -EINVAL; + + spin_lock_irqsave(&pci_config_lock, flags); + + switch (len) { + case 1: + __asm__("lcall *(%%esi); cld\n\t" + "jc 1f\n\t" + "xor %%ah, %%ah\n" + "1:" + : "=a" (result) + : "0" (PCIBIOS_WRITE_CONFIG_BYTE), + "c" (value), + "b" (bx), + "D" ((long)reg), + "S" (&pci_indirect)); + break; + case 2: + __asm__("lcall *(%%esi); cld\n\t" + "jc 1f\n\t" + "xor %%ah, %%ah\n" + "1:" + : "=a" (result) + : "0" (PCIBIOS_WRITE_CONFIG_WORD), + "c" (value), + "b" (bx), + "D" ((long)reg), + "S" (&pci_indirect)); + break; + case 4: + __asm__("lcall *(%%esi); cld\n\t" + "jc 1f\n\t" + "xor %%ah, %%ah\n" + "1:" + : "=a" (result) + : "0" (PCIBIOS_WRITE_CONFIG_DWORD), + "c" (value), + "b" (bx), + "D" ((long)reg), + "S" (&pci_indirect)); + break; + } + + spin_unlock_irqrestore(&pci_config_lock, flags); + + return (int)((result & 0xff00) >> 8); +} + +static int pci_bios_read_config_byte(struct pci_dev *dev, int where, u8 *value) +{ + int result; + u32 data; + + if (!value) + return -EINVAL; + + result = pci_bios_read(0, dev->bus->number, PCI_SLOT(dev->devfn), + PCI_FUNC(dev->devfn), where, 1, &data); + + *value = (u8)data; + + return result; +} + +static int pci_bios_read_config_word(struct pci_dev *dev, int where, u16 *value) +{ + int result; + u32 data; + + if (!value) + return -EINVAL; + + result = pci_bios_read(0, dev->bus->number, PCI_SLOT(dev->devfn), + PCI_FUNC(dev->devfn), where, 2, &data); + + *value = (u16)data; + + return result; +} + +static int pci_bios_read_config_dword(struct pci_dev *dev, int where, u32 *value) +{ + if (!value) + return -EINVAL; + + return pci_bios_read(0, dev->bus->number, PCI_SLOT(dev->devfn), + PCI_FUNC(dev->devfn), where, 4, value); +} + +static int pci_bios_write_config_byte(struct pci_dev *dev, int where, u8 value) +{ + return pci_bios_write(0, dev->bus->number, PCI_SLOT(dev->devfn), + PCI_FUNC(dev->devfn), where, 1, value); +} + +static int pci_bios_write_config_word(struct pci_dev *dev, int where, u16 value) +{ + return pci_bios_write(0, dev->bus->number, PCI_SLOT(dev->devfn), + PCI_FUNC(dev->devfn), where, 2, value); +} + +static int pci_bios_write_config_dword(struct pci_dev *dev, int where, u32 value) +{ + return pci_bios_write(0, dev->bus->number, PCI_SLOT(dev->devfn), + PCI_FUNC(dev->devfn), where, 4, value); +} + + +/* + * Function table for BIOS32 access + */ + +static struct pci_ops pci_bios_access = { + pci_bios_read_config_byte, + pci_bios_read_config_word, + pci_bios_read_config_dword, + pci_bios_write_config_byte, + pci_bios_write_config_word, + pci_bios_write_config_dword +}; + +/* + * Try to find PCI BIOS. + */ + +static struct pci_ops * __devinit pci_find_bios(void) +{ + union bios32 *check; + unsigned char sum; + int i, length; + + /* + * Follow the standard procedure for locating the BIOS32 Service + * directory by scanning the permissible address range from + * 0xe0000 through 0xfffff for a valid BIOS32 structure. + */ + + for (check = (union bios32 *) __va(0xe0000); + check <= (union bios32 *) __va(0xffff0); + ++check) { + if (check->fields.signature != BIOS32_SIGNATURE) + continue; + length = check->fields.length * 16; + if (!length) + continue; + sum = 0; + for (i = 0; i < length ; ++i) + sum += check->chars[i]; + if (sum != 0) + continue; + if (check->fields.revision != 0) { + printk("PCI: unsupported BIOS32 revision %d at 0x%p\n", + check->fields.revision, check); + continue; + } + DBG("PCI: BIOS32 Service Directory structure at 0x%p\n", check); + if (check->fields.entry >= 0x100000) { + printk("PCI: BIOS32 entry (0x%p) in high memory, cannot use.\n", check); + return NULL; + } else { + unsigned long bios32_entry = check->fields.entry; + DBG("PCI: BIOS32 Service Directory entry at 0x%lx\n", bios32_entry); + bios32_indirect.address = bios32_entry + PAGE_OFFSET; + if (check_pcibios()) + return &pci_bios_access; + } + break; /* Hopefully more than one BIOS32 cannot happen... */ + } + + return NULL; +} + +/* + * Sort the device list according to PCI BIOS. Nasty hack, but since some + * fool forgot to define the `correct' device order in the PCI BIOS specs + * and we want to be (possibly bug-to-bug ;-]) compatible with older kernels + * which used BIOS ordering, we are bound to do this... + */ + +static void __devinit pcibios_sort(void) +{ + LIST_HEAD(sorted_devices); + struct list_head *ln; + struct pci_dev *dev, *d; + int idx, found; + unsigned char bus, devfn; + + DBG("PCI: Sorting device list...\n"); + while (!list_empty(&pci_devices)) { + ln = pci_devices.next; + dev = pci_dev_g(ln); + idx = found = 0; + while (pci_bios_find_device(dev->vendor, dev->device, idx, &bus, &devfn) == PCIBIOS_SUCCESSFUL) { + idx++; + for (ln=pci_devices.next; ln != &pci_devices; ln=ln->next) { + d = pci_dev_g(ln); + if (d->bus->number == bus && d->devfn == devfn) { + list_del(&d->global_list); + list_add_tail(&d->global_list, &sorted_devices); + if (d == dev) + found = 1; + break; + } + } + if (ln == &pci_devices) { + printk("PCI: BIOS reporting unknown device %02x:%02x\n", bus, devfn); + /* + * We must not continue scanning as several buggy BIOSes + * return garbage after the last device. Grr. + */ + break; + } + } + if (!found) { + printk("PCI: Device %02x:%02x not found by BIOS\n", + dev->bus->number, dev->devfn); + list_del(&dev->global_list); + list_add_tail(&dev->global_list, &sorted_devices); + } + } + list_splice(&sorted_devices, &pci_devices); +} + +/* + * BIOS Functions for IRQ Routing + */ + +struct irq_routing_options { + u16 size; + struct irq_info *table; + u16 segment; +} __attribute__((packed)); + +struct irq_routing_table * __devinit pcibios_get_irq_routing_table(void) +{ + struct irq_routing_options opt; + struct irq_routing_table *rt = NULL; + int ret, map; + unsigned long page; + + if (!pci_bios_present) + return NULL; + page = __get_free_page(GFP_KERNEL); + if (!page) + return NULL; + opt.table = (struct irq_info *) page; + opt.size = PAGE_SIZE; + opt.segment = __KERNEL_DS; + + DBG("PCI: Fetching IRQ routing table... "); + __asm__("push %%es\n\t" + "push %%ds\n\t" + "pop %%es\n\t" + "lcall *(%%esi); cld\n\t" + "pop %%es\n\t" + "jc 1f\n\t" + "xor %%ah, %%ah\n" + "1:" + : "=a" (ret), + "=b" (map) + : "0" (PCIBIOS_GET_ROUTING_OPTIONS), + "1" (0), + "D" ((long) &opt), + "S" (&pci_indirect)); + DBG("OK ret=%d, size=%d, map=%x\n", ret, opt.size, map); + if (ret & 0xff00) + printk(KERN_ERR "PCI: Error %02x when fetching IRQ routing table.\n", (ret >> 8) & 0xff); + else if (opt.size) { + rt = kmalloc(sizeof(struct irq_routing_table) + opt.size, GFP_KERNEL); + if (rt) { + memset(rt, 0, sizeof(struct irq_routing_table)); + rt->size = opt.size + sizeof(struct irq_routing_table); + rt->exclusive_irqs = map; + memcpy(rt->slots, (void *) page, opt.size); + printk("PCI: Using BIOS Interrupt Routing Table\n"); + } + } + free_page(page); + return rt; +} + + +int pcibios_set_irq_routing(struct pci_dev *dev, int pin, int irq) +{ + int ret; + + __asm__("lcall *(%%esi); cld\n\t" + "jc 1f\n\t" + "xor %%ah, %%ah\n" + "1:" + : "=a" (ret) + : "0" (PCIBIOS_SET_PCI_HW_INT), + "b" ((dev->bus->number << 8) | dev->devfn), + "c" ((irq << 8) | (pin + 10)), + "S" (&pci_indirect)); + return !(ret & 0xff00); +} + +#endif + +/* + * Several buggy motherboards address only 16 devices and mirror + * them to next 16 IDs. We try to detect this `feature' on all + * primary buses (those containing host bridges as they are + * expected to be unique) and remove the ghost devices. + */ + +static void __devinit pcibios_fixup_ghosts(struct pci_bus *b) +{ + struct list_head *ln, *mn; + struct pci_dev *d, *e; + int mirror = PCI_DEVFN(16,0); + int seen_host_bridge = 0; + int i; + + DBG("PCI: Scanning for ghost devices on bus %d\n", b->number); + for (ln=b->devices.next; ln != &b->devices; ln=ln->next) { + d = pci_dev_b(ln); + if ((d->class >> 8) == PCI_CLASS_BRIDGE_HOST) + seen_host_bridge++; + for (mn=ln->next; mn != &b->devices; mn=mn->next) { + e = pci_dev_b(mn); + if (e->devfn != d->devfn + mirror || + e->vendor != d->vendor || + e->device != d->device || + e->class != d->class) + continue; + for(i=0; iresource[i].start != d->resource[i].start || + e->resource[i].end != d->resource[i].end || + e->resource[i].flags != d->resource[i].flags) + continue; + break; + } + if (mn == &b->devices) + return; + } + if (!seen_host_bridge) + return; + printk("PCI: Ignoring ghost devices on bus %02x\n", b->number); + + ln = &b->devices; + while (ln->next != &b->devices) { + d = pci_dev_b(ln->next); + if (d->devfn >= mirror) { + list_del(&d->global_list); + list_del(&d->bus_list); + kfree(d); + } else + ln = ln->next; + } +} + +/* + * Discover remaining PCI buses in case there are peer host bridges. + * We use the number of last PCI bus provided by the PCI BIOS. + */ +static void __devinit pcibios_fixup_peer_bridges(void) +{ + int n; + struct pci_bus bus; + struct pci_dev dev; + u16 l; + + if (pcibios_last_bus <= 0 || pcibios_last_bus >= 0xff) + return; + DBG("PCI: Peer bridge fixup\n"); + for (n=0; n <= pcibios_last_bus; n++) { + if (pci_bus_exists(&pci_root_buses, n)) + continue; + bus.number = n; + bus.ops = pci_root_ops; + dev.bus = &bus; + for(dev.devfn=0; dev.devfn<256; dev.devfn += 8) + if (!pci_read_config_word(&dev, PCI_VENDOR_ID, &l) && + l != 0x0000 && l != 0xffff) { + DBG("Found device at %02x:%02x [%04x]\n", n, dev.devfn, l); + printk("PCI: Discovered peer bus %02x\n", n); + pci_scan_bus(n, pci_root_ops, NULL); + break; + } + } +} + +/* + * Exceptions for specific devices. Usually work-arounds for fatal design flaws. + */ + +static void __devinit pci_fixup_i450nx(struct pci_dev *d) +{ + /* + * i450NX -- Find and scan all secondary buses on all PXB's. + */ + int pxb, reg; + u8 busno, suba, subb; + printk("PCI: Searching for i450NX host bridges on %s\n", d->slot_name); + reg = 0xd0; + for(pxb=0; pxb<2; pxb++) { + pci_read_config_byte(d, reg++, &busno); + pci_read_config_byte(d, reg++, &suba); + pci_read_config_byte(d, reg++, &subb); + DBG("i450NX PXB %d: %02x/%02x/%02x\n", pxb, busno, suba, subb); + if (busno) + pci_scan_bus(busno, pci_root_ops, NULL); /* Bus A */ + if (suba < subb) + pci_scan_bus(suba+1, pci_root_ops, NULL); /* Bus B */ + } + pcibios_last_bus = -1; +} + +static void __devinit pci_fixup_i450gx(struct pci_dev *d) +{ + /* + * i450GX and i450KX -- Find and scan all secondary buses. + * (called separately for each PCI bridge found) + */ + u8 busno; + pci_read_config_byte(d, 0x4a, &busno); + printk("PCI: i440KX/GX host bridge %s: secondary bus %02x\n", d->slot_name, busno); + pci_scan_bus(busno, pci_root_ops, NULL); + pcibios_last_bus = -1; +} + +static void __devinit pci_fixup_umc_ide(struct pci_dev *d) +{ + /* + * UM8886BF IDE controller sets region type bits incorrectly, + * therefore they look like memory despite of them being I/O. + */ + int i; + + printk("PCI: Fixing base address flags for device %s\n", d->slot_name); + for(i=0; i<4; i++) + d->resource[i].flags |= PCI_BASE_ADDRESS_SPACE_IO; +} + +static void __devinit pci_fixup_ide_bases(struct pci_dev *d) +{ + int i; + + /* + * PCI IDE controllers use non-standard I/O port decoding, respect it. + */ + if ((d->class >> 8) != PCI_CLASS_STORAGE_IDE) + return; + DBG("PCI: IDE base address fixup for %s\n", d->slot_name); + for(i=0; i<4; i++) { + struct resource *r = &d->resource[i]; + if ((r->start & ~0x80) == 0x374) { + r->start |= 2; + r->end = r->start; + } + } +} + +static void __devinit pci_fixup_ide_trash(struct pci_dev *d) +{ + int i; + + /* + * There exist PCI IDE controllers which have utter garbage + * in first four base registers. Ignore that. + */ + DBG("PCI: IDE base address trash cleared for %s\n", d->slot_name); + for(i=0; i<4; i++) + d->resource[i].start = d->resource[i].end = d->resource[i].flags = 0; +} + +static void __devinit pci_fixup_latency(struct pci_dev *d) +{ + /* + * SiS 5597 and 5598 chipsets require latency timer set to + * at most 32 to avoid lockups. + */ + DBG("PCI: Setting max latency to 32\n"); + pcibios_max_latency = 32; +} + +static void __devinit pci_fixup_piix4_acpi(struct pci_dev *d) +{ + /* + * PIIX4 ACPI device: hardwired IRQ9 + */ + d->irq = 9; +} + +/* + * Nobody seems to know what this does. Damn. + * + * But it does seem to fix some unspecified problem + * with 'movntq' copies on Athlons. + * + * VIA 8363 chipset: + * - bit 7 at offset 0x55: Debug (RW) + */ +static void __init pci_fixup_via_athlon_bug(struct pci_dev *d) +{ + u8 v; + pci_read_config_byte(d, 0x55, &v); + if (v & 0x80) { + printk("Trying to stomp on Athlon bug...\n"); + v &= 0x7f; /* clear bit 55.7 */ + pci_write_config_byte(d, 0x55, v); + } +} + +struct pci_fixup pcibios_fixups[] = { + { PCI_FIXUP_HEADER, PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82451NX, pci_fixup_i450nx }, + { PCI_FIXUP_HEADER, PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82454GX, pci_fixup_i450gx }, + { PCI_FIXUP_HEADER, PCI_VENDOR_ID_UMC, PCI_DEVICE_ID_UMC_UM8886BF, pci_fixup_umc_ide }, + { PCI_FIXUP_HEADER, PCI_VENDOR_ID_SI, PCI_DEVICE_ID_SI_5513, pci_fixup_ide_trash }, + { PCI_FIXUP_HEADER, PCI_ANY_ID, PCI_ANY_ID, pci_fixup_ide_bases }, + { PCI_FIXUP_HEADER, PCI_VENDOR_ID_SI, PCI_DEVICE_ID_SI_5597, pci_fixup_latency }, + { PCI_FIXUP_HEADER, PCI_VENDOR_ID_SI, PCI_DEVICE_ID_SI_5598, pci_fixup_latency }, + { PCI_FIXUP_HEADER, PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82371AB_3, pci_fixup_piix4_acpi }, + { PCI_FIXUP_HEADER, PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8363_0, pci_fixup_via_athlon_bug }, + { 0 } +}; + +/* + * Called after each bus is probed, but before its children + * are examined. + */ + +void __devinit pcibios_fixup_bus(struct pci_bus *b) +{ + pcibios_fixup_ghosts(b); + pci_read_bridge_bases(b); +} + + +void __devinit pcibios_config_init(void) +{ + /* + * Try all known PCI access methods. Note that we support using + * both PCI BIOS and direct access, with a preference for direct. + */ + +#ifdef CONFIG_PCI_BIOS + if ((pci_probe & PCI_PROBE_BIOS) + && ((pci_root_ops = pci_find_bios()))) { + pci_probe |= PCI_BIOS_SORT; + pci_bios_present = 1; + pci_config_read = pci_bios_read; + pci_config_write = pci_bios_write; + } +#endif + +#ifdef CONFIG_PCI_DIRECT + if ((pci_probe & (PCI_PROBE_CONF1 | PCI_PROBE_CONF2)) + && (pci_root_ops = pci_check_direct())) { + if (pci_root_ops == &pci_direct_conf1) { + pci_config_read = pci_conf1_read; + pci_config_write = pci_conf1_write; + } + else { + pci_config_read = pci_conf2_read; + pci_config_write = pci_conf2_write; + } + } +#endif + + return; +} + +void __init pcibios_init(void) +{ + if (!pci_root_ops) + pcibios_config_init(); + if (!pci_root_ops) { + printk("PCI: System does not support PCI\n"); + return; + } + + printk("PCI: Probing PCI hardware\n"); + pci_root_bus = pci_scan_bus(0, pci_root_ops, NULL); + + pcibios_irq_init(); + pcibios_fixup_peer_bridges(); + pcibios_fixup_irqs(); + pcibios_resource_survey(); + +#ifdef CONFIG_PCI_BIOS + if ((pci_probe & PCI_BIOS_SORT) && !(pci_probe & PCI_NO_SORT)) + pcibios_sort(); +#endif +} + +char * __devinit pcibios_setup(char *str) +{ + if (!strcmp(str, "off")) { + pci_probe = 0; + return NULL; + } +#ifdef CONFIG_PCI_BIOS + else if (!strcmp(str, "bios")) { + pci_probe = PCI_PROBE_BIOS; + return NULL; + } else if (!strcmp(str, "nobios")) { + pci_probe &= ~PCI_PROBE_BIOS; + return NULL; + } else if (!strcmp(str, "nosort")) { + pci_probe |= PCI_NO_SORT; + return NULL; + } else if (!strcmp(str, "biosirq")) { + pci_probe |= PCI_BIOS_IRQ_SCAN; + return NULL; + } +#endif +#ifdef CONFIG_PCI_DIRECT + else if (!strcmp(str, "conf1")) { + pci_probe = PCI_PROBE_CONF1 | PCI_NO_CHECKS; + return NULL; + } + else if (!strcmp(str, "conf2")) { + pci_probe = PCI_PROBE_CONF2 | PCI_NO_CHECKS; + return NULL; + } +#endif + else if (!strcmp(str, "rom")) { + pci_probe |= PCI_ASSIGN_ROMS; + return NULL; + } else if (!strcmp(str, "assign-busses")) { + pci_probe |= PCI_ASSIGN_ALL_BUSSES; + return NULL; + } else if (!strncmp(str, "irqmask=", 8)) { + pcibios_irq_mask = simple_strtol(str+8, NULL, 0); + return NULL; + } else if (!strncmp(str, "lastbus=", 8)) { + pcibios_last_bus = simple_strtol(str+8, NULL, 0); + return NULL; + } + return str; +} + +unsigned int pcibios_assign_all_busses(void) +{ + return (pci_probe & PCI_ASSIGN_ALL_BUSSES) ? 1 : 0; +} + +int pcibios_enable_device(struct pci_dev *dev) +{ + int err; + + if ((err = pcibios_enable_resources(dev)) < 0) + return err; + pcibios_enable_irq(dev); + return 0; +} diff --git a/xen-2.4.16/arch/i386/process.c b/xen-2.4.16/arch/i386/process.c new file mode 100644 index 0000000000..11cd5a1c23 --- /dev/null +++ b/xen-2.4.16/arch/i386/process.c @@ -0,0 +1,402 @@ +/* + * linux/arch/i386/kernel/process.c + * + * Copyright (C) 1995 Linus Torvalds + * + * Pentium III FXSR, SSE support + * Gareth Hughes , May 2000 + */ + +/* + * This file handles the architecture-dependent parts of process handling.. + */ + +#define __KERNEL_SYSCALLS__ +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include + +asmlinkage void ret_from_newdomain(void) __asm__("ret_from_newdomain"); + +int hlt_counter; + +void disable_hlt(void) +{ + hlt_counter++; +} + +void enable_hlt(void) +{ + hlt_counter--; +} + +/* + * We use this if we don't have any better + * idle routine.. + */ +static void default_idle(void) +{ + if (!hlt_counter) { + __cli(); + if (!current->hyp_events) + safe_halt(); + else + __sti(); + } +} + +/* + * The idle thread. There's no useful work to be + * done, so just try to conserve power and have a + * low exit latency (ie sit in a loop waiting for + * somebody to say that they'd like to reschedule) + */ +void cpu_idle (void) +{ + ASSERT(current->domain == IDLE_DOMAIN_ID); + current->has_cpu = 1; + + /* + * Declares CPU setup done to the boot processor. + * Therefore memory barrier to ensure state is visible. + */ + smp_mb(); + init_idle(); + + for ( ; ; ) + { + while (!current->hyp_events) + default_idle(); + do_hyp_events(); + } +} + +static long no_idt[2]; +static int reboot_mode; +int reboot_thru_bios = 0; + +#ifdef CONFIG_SMP +int reboot_smp = 0; +static int reboot_cpu = -1; +/* shamelessly grabbed from lib/vsprintf.c for readability */ +#define is_digit(c) ((c) >= '0' && (c) <= '9') +#endif + + +static inline void kb_wait(void) +{ + int i; + + for (i=0; i<0x10000; i++) + if ((inb_p(0x64) & 0x02) == 0) + break; +} + + +void machine_restart(char * __unused) +{ +#if CONFIG_SMP + int cpuid; + + cpuid = GET_APIC_ID(apic_read(APIC_ID)); + + if (reboot_smp) { + + /* check to see if reboot_cpu is valid + if its not, default to the BSP */ + if ((reboot_cpu == -1) || + (reboot_cpu > (NR_CPUS -1)) || + !(phys_cpu_present_map & (1<] CPU: %d",0xffff & regs->xcs,regs->eip, smp_processor_id()); + if (regs->xcs & 3) + printk(" ESP: %04x:%08lx",0xffff & regs->xss,regs->esp); + printk(" EFLAGS: %08lx\n",regs->eflags); + printk("EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n", + regs->eax,regs->ebx,regs->ecx,regs->edx); + printk("ESI: %08lx EDI: %08lx EBP: %08lx", + regs->esi, regs->edi, regs->ebp); + printk(" DS: %04x ES: %04x\n", + 0xffff & regs->xds,0xffff & regs->xes); + + __asm__("movl %%cr0, %0": "=r" (cr0)); + __asm__("movl %%cr2, %0": "=r" (cr2)); + __asm__("movl %%cr3, %0": "=r" (cr3)); + /* This could fault if %cr4 does not exist */ + __asm__("1: movl %%cr4, %0 \n" + "2: \n" + ".section __ex_table,\"a\" \n" + ".long 1b,2b \n" + ".previous \n" + : "=r" (cr4): "0" (0)); + printk("CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n", cr0, cr2, cr3, cr4); + show_trace(®s->esp); +} + +/* + * No need to lock the MM as we are the last user + */ +void release_segments(struct mm_struct *mm) +{ +#if 0 + void * ldt = mm.context.segments; + + /* + * free the LDT + */ + if (ldt) { + mm.context.segments = NULL; + clear_LDT(); + vfree(ldt); + } +#endif +} + + +/* + * Free current thread data structures etc.. + */ +void exit_thread(void) +{ + /* nothing to do ... */ +} + +void flush_thread(void) +{ + struct task_struct *tsk = current; + + memset(tsk->thread.debugreg, 0, sizeof(unsigned long)*8); + /* + * Forget coprocessor state.. + */ + clear_fpu(tsk); + tsk->flags &= ~PF_DONEFPUINIT; +} + +void release_thread(struct task_struct *dead_task) +{ +#if 0 + if (dead_task->mm) { + void * ldt = dead_task->mm.context.segments; + + // temporary debugging check + if (ldt) { + printk("WARNING: dead process %8s still has LDT? <%p>\n", + dead_task->comm, ldt); + BUG(); + } + } +#endif +} + +/* + * we do not have to muck with descriptors here, that is + * done in switch_mm() as needed. + */ +void copy_segments(struct task_struct *p, struct mm_struct *new_mm) +{ +#if 0 + struct mm_struct * old_mm; + void *old_ldt, *ldt; + + ldt = NULL; + old_mm = current->mm; + if (old_mm && (old_ldt = old_mm.context.segments) != NULL) { + /* + * Completely new LDT, we initialize it from the parent: + */ + ldt = vmalloc(LDT_ENTRIES*LDT_ENTRY_SIZE); + if (!ldt) + printk(KERN_WARNING "ldt allocation failed\n"); + else + memcpy(ldt, old_ldt, LDT_ENTRIES*LDT_ENTRY_SIZE); + } + new_mm.context.segments = ldt; + new_mm.context.cpuvalid = ~0UL; /* valid on all CPU's - they can't have stale data */ +#endif +} + + +void new_thread(struct task_struct *p, + unsigned long start_pc, + unsigned long start_stack, + unsigned long start_info) +{ + struct pt_regs * regs; + + regs = ((struct pt_regs *) (THREAD_SIZE + (unsigned long) p)) - 1; + memset(regs, 0, sizeof(*regs)); + + /* + * Initial register values: + * DS,ES,FS,GS = __GUEST_DS + * CS:EIP = __GUEST_CS:start_pc + * SS:ESP = __GUEST_DS:start_stack + * ESI = start_info + * [EAX,EBX,ECX,EDX,EDI,EBP are zero] + */ + p->thread.fs = p->thread.gs = __GUEST_DS; + regs->xds = regs->xes = regs->xss = __GUEST_DS; + regs->xcs = __GUEST_CS; + regs->eip = start_pc; + regs->esp = start_stack; + regs->esi = start_info; + + p->thread.esp = (unsigned long) regs; + p->thread.esp0 = (unsigned long) (regs+1); + + p->thread.eip = (unsigned long) ret_from_newdomain; + + __save_flags(regs->eflags); + regs->eflags |= X86_EFLAGS_IF; +} + + +/* + * This special macro can be used to load a debugging register + */ +#define loaddebug(thread,register) \ + __asm__("movl %0,%%db" #register \ + : /* no output */ \ + :"r" (thread->debugreg[register])) + +/* + * switch_to(x,yn) should switch tasks from x to y. + * + * We fsave/fwait so that an exception goes off at the right time + * (as a call from the fsave or fwait in effect) rather than to + * the wrong process. Lazy FP saving no longer makes any sense + * with modern CPU's, and this simplifies a lot of things (SMP + * and UP become the same). + * + * NOTE! We used to use the x86 hardware context switching. The + * reason for not using it any more becomes apparent when you + * try to recover gracefully from saved state that is no longer + * valid (stale segment register values in particular). With the + * hardware task-switch, there is no way to fix up bad state in + * a reasonable manner. + * + * The fact that Intel documents the hardware task-switching to + * be slow is a fairly red herring - this code is not noticeably + * faster. However, there _is_ some room for improvement here, + * so the performance issues may eventually be a valid point. + * More important, however, is the fact that this allows us much + * more flexibility. + */ +/* NB. prev_p passed in %eax, next_p passed in %edx */ +void __switch_to(struct task_struct *prev_p, struct task_struct *next_p) +{ + struct thread_struct *prev = &prev_p->thread, + *next = &next_p->thread; + struct tss_struct *tss = init_tss + smp_processor_id(); + + unlazy_fpu(prev_p); + + tss->esp0 = next->esp0; + tss->esp1 = next->esp1; + tss->ss1 = next->ss1; + + /* + * Save away %fs and %gs. No need to save %es and %ds, as + * those are always kernel segments while inside the kernel. + */ + asm volatile("movl %%fs,%0":"=m" (*(int *)&prev->fs)); + asm volatile("movl %%gs,%0":"=m" (*(int *)&prev->gs)); + + /* + * Restore %fs and %gs. + */ + loadsegment(fs, next->fs); + loadsegment(gs, next->gs); + + /* + * Now maybe reload the debug registers + */ + if (next->debugreg[7]){ + loaddebug(next, 0); + loaddebug(next, 1); + loaddebug(next, 2); + loaddebug(next, 3); + /* no 4 and 5 */ + loaddebug(next, 6); + loaddebug(next, 7); + } + +} diff --git a/xen-2.4.16/arch/i386/rwlock.c b/xen-2.4.16/arch/i386/rwlock.c new file mode 100644 index 0000000000..3b9b689c8a --- /dev/null +++ b/xen-2.4.16/arch/i386/rwlock.c @@ -0,0 +1,33 @@ +#include +#include + +#if defined(CONFIG_SMP) +asm( +" +.align 4 +.globl __write_lock_failed +__write_lock_failed: + " LOCK "addl $" RW_LOCK_BIAS_STR ",(%eax) +1: rep; nop + cmpl $" RW_LOCK_BIAS_STR ",(%eax) + jne 1b + + " LOCK "subl $" RW_LOCK_BIAS_STR ",(%eax) + jnz __write_lock_failed + ret + + +.align 4 +.globl __read_lock_failed +__read_lock_failed: + lock ; incl (%eax) +1: rep; nop + cmpl $1,(%eax) + js 1b + + lock ; decl (%eax) + js __read_lock_failed + ret +" +); +#endif diff --git a/xen-2.4.16/arch/i386/setup.c b/xen-2.4.16/arch/i386/setup.c new file mode 100644 index 0000000000..5b5c73f640 --- /dev/null +++ b/xen-2.4.16/arch/i386/setup.c @@ -0,0 +1,333 @@ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct cpuinfo_x86 boot_cpu_data = { 0 }; +/* Lots of nice things, since we only target PPro+. */ +unsigned long mmu_cr4_features = X86_CR4_PSE | X86_CR4_PGE; +unsigned long wait_init_idle; + +/* Standard macro to see if a specific flag is changeable */ +static inline int flag_is_changeable_p(u32 flag) +{ + u32 f1, f2; + + asm("pushfl\n\t" + "pushfl\n\t" + "popl %0\n\t" + "movl %0,%1\n\t" + "xorl %2,%0\n\t" + "pushl %0\n\t" + "popfl\n\t" + "pushfl\n\t" + "popl %0\n\t" + "popfl\n\t" + : "=&r" (f1), "=&r" (f2) + : "ir" (flag)); + + return ((f1^f2) & flag) != 0; +} + +/* Probe for the CPUID instruction */ +static int __init have_cpuid_p(void) +{ + return flag_is_changeable_p(X86_EFLAGS_ID); +} + +void __init get_cpu_vendor(struct cpuinfo_x86 *c) +{ + char *v = c->x86_vendor_id; + + if (!strcmp(v, "GenuineIntel")) + c->x86_vendor = X86_VENDOR_INTEL; + else if (!strcmp(v, "AuthenticAMD")) + c->x86_vendor = X86_VENDOR_AMD; + else if (!strcmp(v, "CyrixInstead")) + c->x86_vendor = X86_VENDOR_CYRIX; + else if (!strcmp(v, "UMC UMC UMC ")) + c->x86_vendor = X86_VENDOR_UMC; + else if (!strcmp(v, "CentaurHauls")) + c->x86_vendor = X86_VENDOR_CENTAUR; + else if (!strcmp(v, "NexGenDriven")) + c->x86_vendor = X86_VENDOR_NEXGEN; + else if (!strcmp(v, "RiseRiseRise")) + c->x86_vendor = X86_VENDOR_RISE; + else if (!strcmp(v, "GenuineTMx86") || + !strcmp(v, "TransmetaCPU")) + c->x86_vendor = X86_VENDOR_TRANSMETA; + else + c->x86_vendor = X86_VENDOR_UNKNOWN; +} + +static void __init init_intel(struct cpuinfo_x86 *c) +{ + /* SEP CPUID bug: Pentium Pro reports SEP but doesn't have it */ + if ( c->x86 == 6 && c->x86_model < 3 && c->x86_mask < 3 ) + clear_bit(X86_FEATURE_SEP, &c->x86_capability); +} + +static void __init init_amd(struct cpuinfo_x86 *c) +{ + /* Bit 31 in normal CPUID used for nonstandard 3DNow ID; + 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */ + clear_bit(0*32+31, &c->x86_capability); + + switch(c->x86) + { + case 5: + panic("AMD K6 is not supported.\n"); + case 6: /* An Athlon/Duron. We can trust the BIOS probably */ + break; + } +} + +/* + * This does the hard work of actually picking apart the CPU stuff... + */ +void __init identify_cpu(struct cpuinfo_x86 *c) +{ + int junk, i; + u32 xlvl, tfms; + + c->x86_vendor = X86_VENDOR_UNKNOWN; + c->cpuid_level = -1; /* CPUID not detected */ + c->x86_model = c->x86_mask = 0; /* So far unknown... */ + c->x86_vendor_id[0] = '\0'; /* Unset */ + memset(&c->x86_capability, 0, sizeof c->x86_capability); + + if ( !have_cpuid_p() ) + panic("Ancient processors not supported\n"); + + /* Get vendor name */ + cpuid(0x00000000, &c->cpuid_level, + (int *)&c->x86_vendor_id[0], + (int *)&c->x86_vendor_id[8], + (int *)&c->x86_vendor_id[4]); + + get_cpu_vendor(c); + + if ( c->cpuid_level == 0 ) + panic("Decrepit CPUID not supported\n"); + + cpuid(0x00000001, &tfms, &junk, &junk, + &c->x86_capability[0]); + c->x86 = (tfms >> 8) & 15; + c->x86_model = (tfms >> 4) & 15; + c->x86_mask = tfms & 15; + + /* AMD-defined flags: level 0x80000001 */ + xlvl = cpuid_eax(0x80000000); + if ( (xlvl & 0xffff0000) == 0x80000000 ) { + if ( xlvl >= 0x80000001 ) + c->x86_capability[1] = cpuid_edx(0x80000001); + } + + /* Transmeta-defined flags: level 0x80860001 */ + xlvl = cpuid_eax(0x80860000); + if ( (xlvl & 0xffff0000) == 0x80860000 ) { + if ( xlvl >= 0x80860001 ) + c->x86_capability[2] = cpuid_edx(0x80860001); + } + + printk("CPU: Before vendor init, caps: %08x %08x %08x, vendor = %d\n", + c->x86_capability[0], + c->x86_capability[1], + c->x86_capability[2], + c->x86_vendor); + + switch ( c->x86_vendor ) { + case X86_VENDOR_INTEL: + init_intel(c); + break; + case X86_VENDOR_AMD: + init_amd(c); + break; + default: + panic("Only support Intel processors (P6+)\n"); + } + + printk("CPU caps: %08x %08x %08x %08x\n", + c->x86_capability[0], + c->x86_capability[1], + c->x86_capability[2], + c->x86_capability[3]); + + /* + * On SMP, boot_cpu_data holds the common feature set between + * all CPUs; so make sure that we indicate which features are + * common between the CPUs. The first time this routine gets + * executed, c == &boot_cpu_data. + */ + if ( c != &boot_cpu_data ) { + /* AND the already accumulated flags with these */ + for ( i = 0 ; i < NCAPINTS ; i++ ) + boot_cpu_data.x86_capability[i] &= c->x86_capability[i]; + } +} + + +unsigned long cpu_initialized; +void __init cpu_init(void) +{ + int nr = smp_processor_id(); + struct tss_struct * t = &init_tss[nr]; + + if ( test_and_set_bit(nr, &cpu_initialized) ) + panic("CPU#%d already initialized!!!\n", nr); + printk("Initializing CPU#%d\n", nr); + + __asm__ __volatile__("lgdt %0": "=m" (gdt_descr)); + __asm__ __volatile__("lidt %0": "=m" (idt_descr)); + + /* No nested task. */ + __asm__("pushfl ; andl $0xffffbfff,(%esp) ; popfl"); + + /* Set up and load the per-CPU TSS and LDT. */ + t->ss0 = __HYPERVISOR_DS; + t->esp0 = current->thread.esp0; + set_tss_desc(nr,t); + load_TR(nr); + __asm__ __volatile__("lldt %%ax"::"a" (0)); + + /* Clear all 6 debug registers. */ +#define CD(register) __asm__("movl %0,%%db" #register ::"r"(0) ); + CD(0); CD(1); CD(2); CD(3); /* no db4 and db5 */; CD(6); CD(7); +#undef CD + + /* Stick the idle task on the run queue. */ + (void)wake_up(current); +} + +static void __init do_initcalls(void) +{ + initcall_t *call; + + call = &__initcall_start; + do { + (*call)(); + call++; + } while (call < &__initcall_end); +} + +/* + * IBM-compatible BIOSes place drive info tables at initial interrupt + * vectors 0x41 and 0x46. These are in the for of 16-bit-mode far ptrs. + */ +struct drive_info_struct { unsigned char dummy[32]; } drive_info; +void get_bios_driveinfo(void) +{ + unsigned long seg, off, tab1, tab2; + + off = (unsigned long)*(unsigned short *)(4*0x41+0); + seg = (unsigned long)*(unsigned short *)(4*0x41+2); + tab1 = (seg<<4) + off; + + off = (unsigned long)*(unsigned short *)(4*0x46+0); + seg = (unsigned long)*(unsigned short *)(4*0x46+2); + tab2 = (seg<<4) + off; + + printk("Reading BIOS drive-info tables at 0x%05lx and 0x%05lx\n", + tab1, tab2); + + memcpy(drive_info.dummy+ 0, (char *)tab1, 16); + memcpy(drive_info.dummy+16, (char *)tab2, 16); +} + + +unsigned long pci_mem_start = 0x10000000; + +void __init start_of_day(void) +{ + extern void trap_init(void); + extern void init_IRQ(void); + extern void time_init(void); + extern void softirq_init(void); + extern void timer_bh(void); + extern void tqueue_bh(void); + extern void immediate_bh(void); + extern void init_timervecs(void); + extern int setup_network_devices(void); + extern void net_init(void); + + unsigned long low_mem_size; + + /* + * We do this early, but tables are in the lowest 1MB (usually + * 0xfe000-0xfffff). Therefore they're unlikely to ever get clobbered. + */ + get_bios_driveinfo(); + + /* Tell the PCI layer not to allocate too close to the RAM area.. */ + low_mem_size = ((max_page << PAGE_SHIFT) + 0xfffff) & ~0xfffff; + if ( low_mem_size > pci_mem_start ) pci_mem_start = low_mem_size; + + identify_cpu(&boot_cpu_data); /* get CPU type info */ + if ( cpu_has_fxsr ) set_in_cr4(X86_CR4_OSFXSR); + if ( cpu_has_xmm ) set_in_cr4(X86_CR4_OSXMMEXCPT); + find_smp_config(); /* find ACPI tables */ + smp_alloc_memory(); /* trampoline which other CPUs jump at */ + paging_init(); /* not much here now, but sets up fixmap */ + if ( smp_found_config ) get_smp_config(); + domain_init(); + trap_init(); /* + * installs trap (s/w exception) wrappers. + * Most route via entry.S and thence back into traps.c + * where a really simple handler does a panic. + * Instead, we'll want to pass most back to a domain. + */ + init_IRQ(); /* installs simple interrupt wrappers. Starts HZ clock. */ + time_init(); /* installs software handler for HZ clock. */ + softirq_init(); + init_timervecs(); + init_bh(TIMER_BH, timer_bh); + init_bh(TQUEUE_BH, tqueue_bh); + init_bh(IMMEDIATE_BH, immediate_bh); + init_apic_mappings(); /* make APICs addressable in our pagetables. */ + + sti(); + +#ifndef CONFIG_SMP + APIC_init_uniprocessor(); +#else + smp_boot_cpus(); /* + * Does loads of stuff, including kicking the local + * APIC, and the IO APIC after other CPUs are booted. + * Each IRQ is preferably handled by IO-APIC, but + * fall thru to 8259A if we have to (but slower). + */ +#endif + + zap_low_mappings(); + kmem_cache_init(); + kmem_cache_sizes_init(max_page); +#ifdef CONFIG_PCI + pci_init(); +#endif + do_initcalls(); + if ( !setup_network_devices() ) + panic("Must have a network device!\n"); + net_init(); /* initializes virtual network system. */ + +#ifdef CONFIG_SMP + wait_init_idle = cpu_online_map; + clear_bit(smp_processor_id(), &wait_init_idle); + smp_threads_ready = 1; + smp_commence(); /* Tell other CPUs that state of the world is stable. */ + while (wait_init_idle) + { + cpu_relax(); + barrier(); + } +#endif +} diff --git a/xen-2.4.16/arch/i386/smp.c b/xen-2.4.16/arch/i386/smp.c new file mode 100644 index 0000000000..008d1aa83a --- /dev/null +++ b/xen-2.4.16/arch/i386/smp.c @@ -0,0 +1,578 @@ +/* + * Intel SMP support routines. + * + * (c) 1995 Alan Cox, Building #3 + * (c) 1998-99, 2000 Ingo Molnar + * + * This code is released under the GNU General Public License version 2 or + * later. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * Some notes on x86 processor bugs affecting SMP operation: + * + * Pentium, Pentium Pro, II, III (and all CPUs) have bugs. + * The Linux implications for SMP are handled as follows: + * + * Pentium III / [Xeon] + * None of the E1AP-E3AP errata are visible to the user. + * + * E1AP. see PII A1AP + * E2AP. see PII A2AP + * E3AP. see PII A3AP + * + * Pentium II / [Xeon] + * None of the A1AP-A3AP errata are visible to the user. + * + * A1AP. see PPro 1AP + * A2AP. see PPro 2AP + * A3AP. see PPro 7AP + * + * Pentium Pro + * None of 1AP-9AP errata are visible to the normal user, + * except occasional delivery of 'spurious interrupt' as trap #15. + * This is very rare and a non-problem. + * + * 1AP. Linux maps APIC as non-cacheable + * 2AP. worked around in hardware + * 3AP. fixed in C0 and above steppings microcode update. + * Linux does not use excessive STARTUP_IPIs. + * 4AP. worked around in hardware + * 5AP. symmetric IO mode (normal Linux operation) not affected. + * 'noapic' mode has vector 0xf filled out properly. + * 6AP. 'noapic' mode might be affected - fixed in later steppings + * 7AP. We do not assume writes to the LVT deassering IRQs + * 8AP. We do not enable low power mode (deep sleep) during MP bootup + * 9AP. We do not use mixed mode + * + * Pentium + * There is a marginal case where REP MOVS on 100MHz SMP + * machines with B stepping processors can fail. XXX should provide + * an L1cache=Writethrough or L1cache=off option. + * + * B stepping CPUs may hang. There are hardware work arounds + * for this. We warn about it in case your board doesnt have the work + * arounds. Basically thats so I can tell anyone with a B stepping + * CPU and SMP problems "tough". + * + * Specific items [From Pentium Processor Specification Update] + * + * 1AP. Linux doesn't use remote read + * 2AP. Linux doesn't trust APIC errors + * 3AP. We work around this + * 4AP. Linux never generated 3 interrupts of the same priority + * to cause a lost local interrupt. + * 5AP. Remote read is never used + * 6AP. not affected - worked around in hardware + * 7AP. not affected - worked around in hardware + * 8AP. worked around in hardware - we get explicit CS errors if not + * 9AP. only 'noapic' mode affected. Might generate spurious + * interrupts, we log only the first one and count the + * rest silently. + * 10AP. not affected - worked around in hardware + * 11AP. Linux reads the APIC between writes to avoid this, as per + * the documentation. Make sure you preserve this as it affects + * the C stepping chips too. + * 12AP. not affected - worked around in hardware + * 13AP. not affected - worked around in hardware + * 14AP. we always deassert INIT during bootup + * 15AP. not affected - worked around in hardware + * 16AP. not affected - worked around in hardware + * 17AP. not affected - worked around in hardware + * 18AP. not affected - worked around in hardware + * 19AP. not affected - worked around in BIOS + * + * If this sounds worrying believe me these bugs are either ___RARE___, + * or are signal timing bugs worked around in hardware and there's + * about nothing of note with C stepping upwards. + */ + +/* The 'big kernel lock' */ +spinlock_t kernel_flag = SPIN_LOCK_UNLOCKED; + +struct tlb_state cpu_tlbstate[NR_CPUS] = {[0 ... NR_CPUS-1] = { 0 }}; + +/* + * the following functions deal with sending IPIs between CPUs. + * + * We use 'broadcast', CPU->CPU IPIs and self-IPIs too. + */ + +static inline int __prepare_ICR (unsigned int shortcut, int vector) +{ + return APIC_DM_FIXED | shortcut | vector | APIC_DEST_LOGICAL; +} + +static inline int __prepare_ICR2 (unsigned int mask) +{ + return SET_APIC_DEST_FIELD(mask); +} + +static inline void __send_IPI_shortcut(unsigned int shortcut, int vector) +{ + /* + * Subtle. In the case of the 'never do double writes' workaround + * we have to lock out interrupts to be safe. As we don't care + * of the value read we use an atomic rmw access to avoid costly + * cli/sti. Otherwise we use an even cheaper single atomic write + * to the APIC. + */ + unsigned int cfg; + + /* + * Wait for idle. + */ + apic_wait_icr_idle(); + + /* + * No need to touch the target chip field + */ + cfg = __prepare_ICR(shortcut, vector); + + /* + * Send the IPI. The write to APIC_ICR fires this off. + */ + apic_write_around(APIC_ICR, cfg); +} + +void send_IPI_self(int vector) +{ + __send_IPI_shortcut(APIC_DEST_SELF, vector); +} + +static inline void send_IPI_mask_bitmask(int mask, int vector) +{ + unsigned long cfg; + unsigned long flags; + + __save_flags(flags); + __cli(); + + + /* + * Wait for idle. + */ + apic_wait_icr_idle(); + + /* + * prepare target chip field + */ + cfg = __prepare_ICR2(mask); + apic_write_around(APIC_ICR2, cfg); + + /* + * program the ICR + */ + cfg = __prepare_ICR(0, vector); + + /* + * Send the IPI. The write to APIC_ICR fires this off. + */ + apic_write_around(APIC_ICR, cfg); + + __restore_flags(flags); +} + +static inline void send_IPI_mask_sequence(int mask, int vector) +{ + unsigned long cfg, flags; + unsigned int query_cpu, query_mask; + + __save_flags(flags); + __cli(); + + for (query_cpu = 0; query_cpu < NR_CPUS; ++query_cpu) { + query_mask = 1 << query_cpu; + if (query_mask & mask) { + + /* + * Wait for idle. + */ + apic_wait_icr_idle(); + + /* + * prepare target chip field + */ + cfg = __prepare_ICR2(cpu_to_logical_apicid(query_cpu)); + apic_write_around(APIC_ICR2, cfg); + + /* + * program the ICR + */ + cfg = __prepare_ICR(0, vector); + + /* + * Send the IPI. The write to APIC_ICR fires this off. + */ + apic_write_around(APIC_ICR, cfg); + } + } + __restore_flags(flags); +} + +static inline void send_IPI_mask(int mask, int vector) +{ + send_IPI_mask_bitmask(mask, vector); +} + +static inline void send_IPI_allbutself(int vector) +{ + /* + * if there are no other CPUs in the system then + * we get an APIC send error if we try to broadcast. + * thus we have to avoid sending IPIs in this case. + */ + if (!(smp_num_cpus > 1)) + return; + + __send_IPI_shortcut(APIC_DEST_ALLBUT, vector); +} + +static inline void send_IPI_all(int vector) +{ + __send_IPI_shortcut(APIC_DEST_ALLINC, vector); +} + +/* + * Smarter SMP flushing macros. + * c/o Linus Torvalds. + * + * These mean you can really definitely utterly forget about + * writing to user space from interrupts. (Its not allowed anyway). + * + * Optimizations Manfred Spraul + */ + +static volatile unsigned long flush_cpumask; +static struct mm_struct * flush_mm; +static unsigned long flush_va; +static spinlock_t tlbstate_lock = SPIN_LOCK_UNLOCKED; +#define FLUSH_ALL 0xffffffff + +/* + * We cannot call mmdrop() because we are in interrupt context, + * instead update mm.cpu_vm_mask. + */ +static void inline leave_mm (unsigned long cpu) +{ + if (cpu_tlbstate[cpu].state == TLBSTATE_OK) + BUG(); + clear_bit(cpu, &cpu_tlbstate[cpu].active_mm->cpu_vm_mask); +} + +/* + * + * The flush IPI assumes that a thread switch happens in this order: + * [cpu0: the cpu that switches] + * 1) switch_mm() either 1a) or 1b) + * 1a) thread switch to a different mm + * 1a1) clear_bit(cpu, &old_mm.cpu_vm_mask); + * Stop ipi delivery for the old mm. This is not synchronized with + * the other cpus, but smp_invalidate_interrupt ignore flush ipis + * for the wrong mm, and in the worst case we perform a superflous + * tlb flush. + * 1a2) set cpu_tlbstate to TLBSTATE_OK + * Now the smp_invalidate_interrupt won't call leave_mm if cpu0 + * was in lazy tlb mode. + * 1a3) update cpu_tlbstate[].active_mm + * Now cpu0 accepts tlb flushes for the new mm. + * 1a4) set_bit(cpu, &new_mm.cpu_vm_mask); + * Now the other cpus will send tlb flush ipis. + * 1a4) change cr3. + * 1b) thread switch without mm change + * cpu_tlbstate[].active_mm is correct, cpu0 already handles + * flush ipis. + * 1b1) set cpu_tlbstate to TLBSTATE_OK + * 1b2) test_and_set the cpu bit in cpu_vm_mask. + * Atomically set the bit [other cpus will start sending flush ipis], + * and test the bit. + * 1b3) if the bit was 0: leave_mm was called, flush the tlb. + * 2) switch %%esp, ie current + * + * The interrupt must handle 2 special cases: + * - cr3 is changed before %%esp, ie. it cannot use current->{active_,}mm. + * - the cpu performs speculative tlb reads, i.e. even if the cpu only + * runs in kernel space, the cpu could load tlb entries for user space + * pages. + * + * The good news is that cpu_tlbstate is local to each cpu, no + * write/read ordering problems. + */ + +/* + * TLB flush IPI: + * + * 1) Flush the tlb entries if the cpu uses the mm that's being flushed. + * 2) Leave the mm if we are in the lazy tlb mode. + */ + +asmlinkage void smp_invalidate_interrupt (void) +{ + unsigned long cpu = smp_processor_id(); + + if (!test_bit(cpu, &flush_cpumask)) + return; + /* + * This was a BUG() but until someone can quote me the + * line from the intel manual that guarantees an IPI to + * multiple CPUs is retried _only_ on the erroring CPUs + * its staying as a return + * + * BUG(); + */ + + if (flush_mm == cpu_tlbstate[cpu].active_mm) { + if (cpu_tlbstate[cpu].state == TLBSTATE_OK) { + if (flush_va == FLUSH_ALL) + local_flush_tlb(); + else + __flush_tlb_one(flush_va); + } else + leave_mm(cpu); + } + ack_APIC_irq(); + clear_bit(cpu, &flush_cpumask); +} + +static void flush_tlb_others (unsigned long cpumask, struct mm_struct *mm, + unsigned long va) +{ + /* + * A couple of (to be removed) sanity checks: + * + * - we do not send IPIs to not-yet booted CPUs. + * - current CPU must not be in mask + * - mask must exist :) + */ + if (!cpumask) + BUG(); + if ((cpumask & cpu_online_map) != cpumask) + BUG(); + if (cpumask & (1 << smp_processor_id())) + BUG(); + if (!mm) + BUG(); + + /* + * i'm not happy about this global shared spinlock in the + * MM hot path, but we'll see how contended it is. + * Temporarily this turns IRQs off, so that lockups are + * detected by the NMI watchdog. + */ + spin_lock(&tlbstate_lock); + + flush_mm = mm; + flush_va = va; + atomic_set_mask(cpumask, &flush_cpumask); + /* + * We have to send the IPI only to + * CPUs affected. + */ + send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR); + + while (flush_cpumask) + /* nothing. lockup detection does not belong here */; + + flush_mm = NULL; + flush_va = 0; + spin_unlock(&tlbstate_lock); +} + +void flush_tlb_current_task(void) +{ + struct mm_struct *mm = ¤t->mm; + unsigned long cpu_mask = mm->cpu_vm_mask & ~(1 << smp_processor_id()); + + local_flush_tlb(); + if (cpu_mask) + flush_tlb_others(cpu_mask, mm, FLUSH_ALL); +} + +void flush_tlb_mm (struct mm_struct * mm) +{ + unsigned long cpu_mask = mm->cpu_vm_mask & ~(1 << smp_processor_id()); + + if (current->active_mm == mm) + local_flush_tlb(); + if (cpu_mask) + flush_tlb_others(cpu_mask, mm, FLUSH_ALL); +} + +#if 0 +void flush_tlb_page(struct vm_area_struct * vma, unsigned long va) +{ + struct mm_struct *mm = vma->vm_mm; + unsigned long cpu_mask = mm.cpu_vm_mask & ~(1 << smp_processor_id()); + + if (current->active_mm == mm) { + if(current->mm) + __flush_tlb_one(va); + else + leave_mm(smp_processor_id()); + } + + if (cpu_mask) + flush_tlb_others(cpu_mask, mm, va); +} +#endif + +static inline void do_flush_tlb_all_local(void) +{ + unsigned long cpu = smp_processor_id(); + + __flush_tlb_all(); + if (cpu_tlbstate[cpu].state == TLBSTATE_LAZY) + leave_mm(cpu); +} + +static void flush_tlb_all_ipi(void* info) +{ + do_flush_tlb_all_local(); +} + +void flush_tlb_all(void) +{ + smp_call_function (flush_tlb_all_ipi,0,1,1); + + do_flush_tlb_all_local(); +} + +void smp_send_event_check_mask(unsigned long cpu_mask) +{ + send_IPI_mask(cpu_mask, EVENT_CHECK_VECTOR); +} + +/* + * Structure and data for smp_call_function(). This is designed to minimise + * static memory requirements. It also looks cleaner. + */ +static spinlock_t call_lock = SPIN_LOCK_UNLOCKED; + +struct call_data_struct { + void (*func) (void *info); + void *info; + atomic_t started; + atomic_t finished; + int wait; +}; + +static struct call_data_struct * call_data; + +/* + * this function sends a 'generic call function' IPI to all other CPUs + * in the system. + */ + +int smp_call_function (void (*func) (void *info), void *info, int nonatomic, + int wait) +/* + * [SUMMARY] Run a function on all other CPUs. + * The function to run. This must be fast and non-blocking. + * An arbitrary pointer to pass to the function. + * currently unused. + * If true, wait (atomically) until function has completed on other CPUs. + * [RETURNS] 0 on success, else a negative status code. Does not return until + * remote CPUs are nearly ready to execute <> or are or have executed. + * + * You must not call this function with disabled interrupts or from a + * hardware interrupt handler, or bottom halfs. + */ +{ + struct call_data_struct data; + int cpus = smp_num_cpus-1; + + if (!cpus) + return 0; + + data.func = func; + data.info = info; + atomic_set(&data.started, 0); + data.wait = wait; + if (wait) + atomic_set(&data.finished, 0); + + spin_lock(&call_lock); + call_data = &data; + wmb(); + /* Send a message to all other CPUs and wait for them to respond */ + send_IPI_allbutself(CALL_FUNCTION_VECTOR); + + /* Wait for response */ + while (atomic_read(&data.started) != cpus) + barrier(); + + if (wait) + while (atomic_read(&data.finished) != cpus) + barrier(); + + spin_unlock(&call_lock); + + return 0; +} + +static void stop_this_cpu (void * dummy) +{ + /* + * Remove this CPU: + */ + clear_bit(smp_processor_id(), &cpu_online_map); + __cli(); + disable_local_APIC(); + for(;;) __asm__("hlt"); +} + +/* + * this function calls the 'stop' function on all other CPUs in the system. + */ + +void smp_send_stop(void) +{ + smp_call_function(stop_this_cpu, NULL, 1, 0); + smp_num_cpus = 1; + + __cli(); + disable_local_APIC(); + __sti(); +} + +/* + * Nothing to do, as all the work is done automatically when + * we return from the interrupt. + */ +asmlinkage void smp_event_check_interrupt(void) +{ + ack_APIC_irq(); +} + +asmlinkage void smp_call_function_interrupt(void) +{ + void (*func) (void *info) = call_data->func; + void *info = call_data->info; + int wait = call_data->wait; + + ack_APIC_irq(); + /* + * Notify initiating CPU that I've grabbed the data and am + * about to execute the function + */ + mb(); + atomic_inc(&call_data->started); + /* + * At this point the info structure may be out of scope unless wait==1 + */ + (*func)(info); + if (wait) { + mb(); + atomic_inc(&call_data->finished); + } +} + diff --git a/xen-2.4.16/arch/i386/smpboot.c b/xen-2.4.16/arch/i386/smpboot.c new file mode 100644 index 0000000000..ce4d3da402 --- /dev/null +++ b/xen-2.4.16/arch/i386/smpboot.c @@ -0,0 +1,942 @@ +/* + * x86 SMP booting functions + * + * (c) 1995 Alan Cox, Building #3 + * (c) 1998, 1999, 2000 Ingo Molnar + * + * Much of the core SMP work is based on previous work by Thomas Radke, to + * whom a great many thanks are extended. + * + * Thanks to Intel for making available several different Pentium, + * Pentium Pro and Pentium-II/Xeon MP machines. + * Original development of Linux SMP code supported by Caldera. + * + * This code is released under the GNU General Public License version 2 or + * later. + * + * Fixes + * Felix Koop : NR_CPUS used properly + * Jose Renau : Handle single CPU case. + * Alan Cox : By repeated request 8) - Total BogoMIP report. + * Greg Wright : Fix for kernel stacks panic. + * Erich Boleyn : MP v1.4 and additional changes. + * Matthias Sattler : Changes for 2.1 kernel map. + * Michel Lespinasse : Changes for 2.1 kernel map. + * Michael Chastain : Change trampoline.S to gnu as. + * Alan Cox : Dumb bug: 'B' step PPro's are fine + * Ingo Molnar : Added APIC timers, based on code + * from Jose Renau + * Ingo Molnar : various cleanups and rewrites + * Tigran Aivazian : fixed "0.00 in /proc/uptime on SMP" bug. + * Maciej W. Rozycki : Bits for genuine 82489DX APICs + * Martin J. Bligh : Added support for multi-quad systems + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* Set if we find a B stepping CPU */ +static int smp_b_stepping; + +/* Setup configured maximum number of CPUs to activate */ +static int max_cpus = -1; + +/* Total count of live CPUs */ +int smp_num_cpus = 1; + +/* Bitmask of currently online CPUs */ +unsigned long cpu_online_map; + +static volatile unsigned long cpu_callin_map; +static volatile unsigned long cpu_callout_map; + +/* Per CPU bogomips and other parameters */ +struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned; + +/* Set when the idlers are all forked */ +int smp_threads_ready; + +/* + * Trampoline 80x86 program as an array. + */ + +extern unsigned char trampoline_data []; +extern unsigned char trampoline_end []; +static unsigned char *trampoline_base; + +/* + * Currently trivial. Write the real->protected mode + * bootstrap into the page concerned. The caller + * has made sure it's suitably aligned. + */ + +static unsigned long __init setup_trampoline(void) +{ + memcpy(trampoline_base, trampoline_data, trampoline_end - trampoline_data); + return virt_to_phys(trampoline_base); +} + +/* + * We are called very early to get the low memory for the + * SMP bootup trampoline page. + */ +void __init smp_alloc_memory(void) +{ + /* + * Has to be in very low memory so we can execute + * real-mode AP code. + */ + trampoline_base = __va(0x90000); +} + +/* + * The bootstrap kernel entry code has set these up. Save them for + * a given CPU + */ + +void __init smp_store_cpu_info(int id) +{ + struct cpuinfo_x86 *c = cpu_data + id; + + *c = boot_cpu_data; + c->pte_quick = 0; + c->pmd_quick = 0; + c->pgd_quick = 0; + c->pgtable_cache_sz = 0; + identify_cpu(c); + /* + * Mask B, Pentium, but not Pentium MMX + */ + if (c->x86_vendor == X86_VENDOR_INTEL && + c->x86 == 5 && + c->x86_mask >= 1 && c->x86_mask <= 4 && + c->x86_model <= 3) + /* + * Remember we have B step Pentia with bugs + */ + smp_b_stepping = 1; +} + +/* + * Architecture specific routine called by the kernel just before init is + * fired off. This allows the BP to have everything in order [we hope]. + * At the end of this all the APs will hit the system scheduling and off + * we go. Each AP will load the system gdt's and jump through the kernel + * init into idle(). At this point the scheduler will one day take over + * and give them jobs to do. smp_callin is a standard routine + * we use to track CPUs as they power up. + */ + +static atomic_t smp_commenced = ATOMIC_INIT(0); + +void __init smp_commence(void) +{ + /* + * Lets the callins below out of their loop. + */ + Dprintk("Setting commenced=1, go go go\n"); + + wmb(); + atomic_set(&smp_commenced,1); +} + +/* + * TSC synchronization. + * + * We first check wether all CPUs have their TSC's synchronized, + * then we print a warning if not, and always resync. + */ + +static atomic_t tsc_start_flag = ATOMIC_INIT(0); +static atomic_t tsc_count_start = ATOMIC_INIT(0); +static atomic_t tsc_count_stop = ATOMIC_INIT(0); +static unsigned long long tsc_values[NR_CPUS]; + +#define NR_LOOPS 5 + +/* + * accurate 64-bit/32-bit division, expanded to 32-bit divisions and 64-bit + * multiplication. Not terribly optimized but we need it at boot time only + * anyway. + * + * result == a / b + * == (a1 + a2*(2^32)) / b + * == a1/b + a2*(2^32/b) + * == a1/b + a2*((2^32-1)/b) + a2/b + (a2*((2^32-1) % b))/b + * ^---- (this multiplication can overflow) + */ + +static unsigned long long div64 (unsigned long long a, unsigned long b0) +{ + unsigned int a1, a2; + unsigned long long res; + + a1 = ((unsigned int*)&a)[0]; + a2 = ((unsigned int*)&a)[1]; + + res = a1/b0 + + (unsigned long long)a2 * (unsigned long long)(0xffffffff/b0) + + a2 / b0 + + (a2 * (0xffffffff % b0)) / b0; + + return res; +} + +static void __init synchronize_tsc_bp (void) +{ + int i; + unsigned long long t0; + unsigned long long sum, avg; + long long delta; + int buggy = 0; + + printk("checking TSC synchronization across CPUs: "); + + atomic_set(&tsc_start_flag, 1); + wmb(); + + /* + * We loop a few times to get a primed instruction cache, + * then the last pass is more or less synchronized and + * the BP and APs set their cycle counters to zero all at + * once. This reduces the chance of having random offsets + * between the processors, and guarantees that the maximum + * delay between the cycle counters is never bigger than + * the latency of information-passing (cachelines) between + * two CPUs. + */ + for (i = 0; i < NR_LOOPS; i++) { + /* + * all APs synchronize but they loop on '== num_cpus' + */ + while (atomic_read(&tsc_count_start) != smp_num_cpus-1) mb(); + atomic_set(&tsc_count_stop, 0); + wmb(); + /* + * this lets the APs save their current TSC: + */ + atomic_inc(&tsc_count_start); + + rdtscll(tsc_values[smp_processor_id()]); + /* + * We clear the TSC in the last loop: + */ + if (i == NR_LOOPS-1) + write_tsc(0, 0); + + /* + * Wait for all APs to leave the synchronization point: + */ + while (atomic_read(&tsc_count_stop) != smp_num_cpus-1) mb(); + atomic_set(&tsc_count_start, 0); + wmb(); + atomic_inc(&tsc_count_stop); + } + + sum = 0; + for (i = 0; i < smp_num_cpus; i++) { + t0 = tsc_values[i]; + sum += t0; + } + avg = div64(sum, smp_num_cpus); + + sum = 0; + for (i = 0; i < smp_num_cpus; i++) { + delta = tsc_values[i] - avg; + if (delta < 0) + delta = -delta; + /* + * We report bigger than 2 microseconds clock differences. + */ + if (delta > 2*ticks_per_usec) { + long realdelta; + if (!buggy) { + buggy = 1; + printk("\n"); + } + realdelta = div64(delta, ticks_per_usec); + if (tsc_values[i] < avg) + realdelta = -realdelta; + + printk("BIOS BUG: CPU#%d improperly initialized, has %ld usecs TSC skew! FIXED.\n", + i, realdelta); + } + + sum += delta; + } + if (!buggy) + printk("passed.\n"); +} + +static void __init synchronize_tsc_ap (void) +{ + int i; + + /* + * smp_num_cpus is not necessarily known at the time + * this gets called, so we first wait for the BP to + * finish SMP initialization: + */ + while (!atomic_read(&tsc_start_flag)) mb(); + + for (i = 0; i < NR_LOOPS; i++) { + atomic_inc(&tsc_count_start); + while (atomic_read(&tsc_count_start) != smp_num_cpus) mb(); + + rdtscll(tsc_values[smp_processor_id()]); + if (i == NR_LOOPS-1) + write_tsc(0, 0); + + atomic_inc(&tsc_count_stop); + while (atomic_read(&tsc_count_stop) != smp_num_cpus) mb(); + } +} +#undef NR_LOOPS + +static atomic_t init_deasserted; + +void __init smp_callin(void) +{ + int cpuid, phys_id, i; + + /* + * If waken up by an INIT in an 82489DX configuration + * we may get here before an INIT-deassert IPI reaches + * our local APIC. We have to wait for the IPI or we'll + * lock up on an APIC access. + */ + while (!atomic_read(&init_deasserted)); + + /* + * (This works even if the APIC is not enabled.) + */ + phys_id = GET_APIC_ID(apic_read(APIC_ID)); + cpuid = smp_processor_id(); + if (test_and_set_bit(cpuid, &cpu_online_map)) { + printk("huh, phys CPU#%d, CPU#%d already present??\n", + phys_id, cpuid); + BUG(); + } + Dprintk("CPU#%d (phys ID: %d) waiting for CALLOUT\n", cpuid, phys_id); + + /* + * STARTUP IPIs are fragile beasts as they might sometimes + * trigger some glue motherboard logic. Complete APIC bus + * silence for 1 second, this overestimates the time the + * boot CPU is spending to send the up to 2 STARTUP IPIs + * by a factor of two. This should be enough. + */ + + for ( i = 0; i < 200; i++ ) + { + if ( test_bit(cpuid, &cpu_callout_map) ) break; + mdelay(10); + } + + if (!test_bit(cpuid, &cpu_callout_map)) { + printk("BUG: CPU%d started up but did not get a callout!\n", + cpuid); + BUG(); + } + + /* + * the boot CPU has finished the init stage and is spinning + * on callin_map until we finish. We are free to set up this + * CPU, first the APIC. (this is probably redundant on most + * boards) + */ + + Dprintk("CALLIN, before setup_local_APIC().\n"); + + setup_local_APIC(); + + __sti(); + +#ifdef CONFIG_MTRR + /* + * Must be done before calibration delay is computed + */ + mtrr_init_secondary_cpu (); +#endif + + Dprintk("Stack at about %p\n",&cpuid); + + /* + * Save our processor parameters + */ + smp_store_cpu_info(cpuid); + + /* + * Allow the master to continue. + */ + set_bit(cpuid, &cpu_callin_map); + + /* + * Synchronize the TSC with the BP + */ + synchronize_tsc_ap(); +} + +int cpucount; + +/* + * Activate a secondary processor. + */ +int __init start_secondary(void *unused) +{ + extern void cpu_init(void); + + /* + * Dont put anything before smp_callin(), SMP + * booting is too fragile that we want to limit the + * things done here to the most necessary things. + */ + cpu_init(); + smp_callin(); + + while (!atomic_read(&smp_commenced)) + rep_nop(); + + /* + * low-memory mappings have been cleared, flush them from the local TLBs + * too. + */ + local_flush_tlb(); + + cpu_idle(); + BUG(); + + return 0; +} + +/* + * Everything has been set up for the secondary + * CPUs - they just need to reload everything + * from the task structure + * This function must not return. + */ +void __init initialize_secondary(void) +{ + /* + * We don't actually need to load the full TSS, + * basically just the stack pointer and the eip. + */ + asm volatile( + "movl %0,%%esp\n\t" + "jmp *%1" + : + :"r" (current->thread.esp),"r" (current->thread.eip)); +} + +extern struct { + void * esp; + unsigned short ss; +} stack_start; + +/* which physical APIC ID maps to which logical CPU number */ +volatile int physical_apicid_2_cpu[MAX_APICID]; +/* which logical CPU number maps to which physical APIC ID */ +volatile int cpu_2_physical_apicid[NR_CPUS]; + +/* which logical APIC ID maps to which logical CPU number */ +volatile int logical_apicid_2_cpu[MAX_APICID]; +/* which logical CPU number maps to which logical APIC ID */ +volatile int cpu_2_logical_apicid[NR_CPUS]; + +static inline void init_cpu_to_apicid(void) +/* Initialize all maps between cpu number and apicids */ +{ + int apicid, cpu; + + for (apicid = 0; apicid < MAX_APICID; apicid++) { + physical_apicid_2_cpu[apicid] = -1; + logical_apicid_2_cpu[apicid] = -1; + } + for (cpu = 0; cpu < NR_CPUS; cpu++) { + cpu_2_physical_apicid[cpu] = -1; + cpu_2_logical_apicid[cpu] = -1; + } +} + +static inline void map_cpu_to_boot_apicid(int cpu, int apicid) +/* + * set up a mapping between cpu and apicid. Uses logical apicids for multiquad, + * else physical apic ids + */ +{ + physical_apicid_2_cpu[apicid] = cpu; + cpu_2_physical_apicid[cpu] = apicid; +} + +static inline void unmap_cpu_to_boot_apicid(int cpu, int apicid) +/* + * undo a mapping between cpu and apicid. Uses logical apicids for multiquad, + * else physical apic ids + */ +{ + physical_apicid_2_cpu[apicid] = -1; + cpu_2_physical_apicid[cpu] = -1; +} + +#if APIC_DEBUG +static inline void inquire_remote_apic(int apicid) +{ + int i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 }; + char *names[] = { "ID", "VERSION", "SPIV" }; + int timeout, status; + + printk("Inquiring remote APIC #%d...\n", apicid); + + for (i = 0; i < sizeof(regs) / sizeof(*regs); i++) { + printk("... APIC #%d %s: ", apicid, names[i]); + + /* + * Wait for idle. + */ + apic_wait_icr_idle(); + + apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(apicid)); + apic_write_around(APIC_ICR, APIC_DM_REMRD | regs[i]); + + timeout = 0; + do { + udelay(100); + status = apic_read(APIC_ICR) & APIC_ICR_RR_MASK; + } while (status == APIC_ICR_RR_INPROG && timeout++ < 1000); + + switch (status) { + case APIC_ICR_RR_VALID: + status = apic_read(APIC_RRR); + printk("%08x\n", status); + break; + default: + printk("failed\n"); + } + } +} +#endif + + +static int wakeup_secondary_via_INIT(int phys_apicid, unsigned long start_eip) +{ + unsigned long send_status = 0, accept_status = 0; + int maxlvt, timeout, num_starts, j; + + Dprintk("Asserting INIT.\n"); + + /* + * Turn INIT on target chip + */ + apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid)); + + /* + * Send IPI + */ + apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_INT_ASSERT + | APIC_DM_INIT); + + Dprintk("Waiting for send to finish...\n"); + timeout = 0; + do { + Dprintk("+"); + udelay(100); + send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY; + } while (send_status && (timeout++ < 1000)); + + mdelay(10); + + Dprintk("Deasserting INIT.\n"); + + /* Target chip */ + apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid)); + + /* Send IPI */ + apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT); + + Dprintk("Waiting for send to finish...\n"); + timeout = 0; + do { + Dprintk("+"); + udelay(100); + send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY; + } while (send_status && (timeout++ < 1000)); + + atomic_set(&init_deasserted, 1); + + /* + * Should we send STARTUP IPIs ? + * + * Determine this based on the APIC version. + * If we don't have an integrated APIC, don't send the STARTUP IPIs. + */ + if (APIC_INTEGRATED(apic_version[phys_apicid])) + num_starts = 2; + else + num_starts = 0; + + /* + * Run STARTUP IPI loop. + */ + Dprintk("#startup loops: %d.\n", num_starts); + + maxlvt = get_maxlvt(); + + for (j = 1; j <= num_starts; j++) { + Dprintk("Sending STARTUP #%d.\n",j); + + apic_read_around(APIC_SPIV); + apic_write(APIC_ESR, 0); + apic_read(APIC_ESR); + Dprintk("After apic_write.\n"); + + /* + * STARTUP IPI + */ + + /* Target chip */ + apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid)); + + /* Boot on the stack */ + /* Kick the second */ + apic_write_around(APIC_ICR, APIC_DM_STARTUP + | (start_eip >> 12)); + + /* + * Give the other CPU some time to accept the IPI. + */ + udelay(300); + + Dprintk("Startup point 1.\n"); + + Dprintk("Waiting for send to finish...\n"); + timeout = 0; + do { + Dprintk("+"); + udelay(100); + send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY; + } while (send_status && (timeout++ < 1000)); + + /* + * Give the other CPU some time to accept the IPI. + */ + udelay(200); + /* + * Due to the Pentium erratum 3AP. + */ + if (maxlvt > 3) { + apic_read_around(APIC_SPIV); + apic_write(APIC_ESR, 0); + } + accept_status = (apic_read(APIC_ESR) & 0xEF); + if (send_status || accept_status) + break; + } + Dprintk("After Startup.\n"); + + if (send_status) + printk("APIC never delivered???\n"); + if (accept_status) + printk("APIC delivery error (%lx).\n", accept_status); + + return (send_status | accept_status); +} + +extern unsigned long cpu_initialized; + +static void __init do_boot_cpu (int apicid) +/* + * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad + * (ie clustered apic addressing mode), this is a LOGICAL apic ID. + */ +{ + struct task_struct *idle; + unsigned long boot_error = 0; + int timeout, cpu; + unsigned long start_eip; + + cpu = ++cpucount; + /* + * We can't use kernel_thread since we must avoid to reschedule the child. + */ + if ( (idle = do_newdomain()) == NULL ) + panic("failed 'newdomain' for CPU %d", cpu); + + idle->processor = cpu; + idle->domain = IDLE_DOMAIN_ID; + idle->mm.pagetable = mk_pagetable((unsigned long)idle0_pg_table); + + map_cpu_to_boot_apicid(cpu, apicid); + + idle->thread.esp = idle->thread.esp0 = (unsigned long)idle + THREAD_SIZE; + idle->thread.eip = (unsigned long) start_secondary; + + /* start_eip had better be page-aligned! */ + start_eip = setup_trampoline(); + + /* So we see what's up */ + printk("Booting processor %d/%d eip %lx\n", cpu, apicid, start_eip); + stack_start.esp = (void *) (1024+PAGE_SIZE+(char *)idle-__PAGE_OFFSET); + + /* + * This grunge runs the startup process for + * the targeted processor. + */ + + atomic_set(&init_deasserted, 0); + + Dprintk("Setting warm reset code and vector.\n"); + + CMOS_WRITE(0xa, 0xf); + local_flush_tlb(); + Dprintk("1.\n"); + *((volatile unsigned short *) TRAMPOLINE_HIGH) = start_eip >> 4; + Dprintk("2.\n"); + *((volatile unsigned short *) TRAMPOLINE_LOW) = start_eip & 0xf; + Dprintk("3.\n"); + + /* + * Be paranoid about clearing APIC errors. + */ + if (APIC_INTEGRATED(apic_version[apicid])) { + apic_read_around(APIC_SPIV); + apic_write(APIC_ESR, 0); + apic_read(APIC_ESR); + } + + /* + * Status is now clean + */ + boot_error = 0; + + /* + * Starting actual IPI sequence... + */ + + boot_error = wakeup_secondary_via_INIT(apicid, start_eip); + + if (!boot_error) { + /* + * allow APs to start initializing. + */ + Dprintk("Before Callout %d.\n", cpu); + set_bit(cpu, &cpu_callout_map); + Dprintk("After Callout %d.\n", cpu); + + /* + * Wait 5s total for a response + */ + for (timeout = 0; timeout < 50000; timeout++) { + if (test_bit(cpu, &cpu_callin_map)) + break; /* It has booted */ + udelay(100); + } + + if (test_bit(cpu, &cpu_callin_map)) { + /* number CPUs logically, starting from 1 (BSP is 0) */ + printk("CPU%d has booted.\n", cpu); + } else { + boot_error= 1; + if (*((volatile unsigned char *)phys_to_virt(8192)) + == 0xA5) + /* trampoline started but...? */ + printk("Stuck ??\n"); + else + /* trampoline code not run */ + printk("Not responding.\n"); +#if APIC_DEBUG + inquire_remote_apic(apicid); +#endif + } + } + if (boot_error) { + /* Try to put things back the way they were before ... */ + unmap_cpu_to_boot_apicid(cpu, apicid); + clear_bit(cpu, &cpu_callout_map); /* was set here (do_boot_cpu()) */ + clear_bit(cpu, &cpu_initialized); /* was set by cpu_init() */ + clear_bit(cpu, &cpu_online_map); /* was set in smp_callin() */ + cpucount--; + } + + /* mark "stuck" area as not stuck */ + *((volatile unsigned long *)phys_to_virt(8192)) = 0; +} + + +/* + * Cycle through the processors sending APIC IPIs to boot each. + */ + +static int boot_cpu_logical_apicid; +/* Where the IO area was mapped on multiquad, always 0 otherwise */ +void *xquad_portio = NULL; + +void __init smp_boot_cpus(void) +{ + int apicid, bit; + +#ifdef CONFIG_MTRR + /* Must be done before other processors booted */ + mtrr_init_boot_cpu (); +#endif + /* Initialize the logical to physical CPU number mapping */ + init_cpu_to_apicid(); + + /* + * Setup boot CPU information + */ + smp_store_cpu_info(0); /* Final full version of the data */ + printk("CPU%d booted\n", 0); + + /* + * We have the boot CPU online for sure. + */ + set_bit(0, &cpu_online_map); + boot_cpu_logical_apicid = logical_smp_processor_id(); + map_cpu_to_boot_apicid(0, boot_cpu_apicid); + + /* + * If we couldnt find an SMP configuration at boot time, + * get out of here now! + */ + if (!smp_found_config) { + printk("SMP motherboard not detected.\n"); + io_apic_irqs = 0; + cpu_online_map = phys_cpu_present_map = 1; + smp_num_cpus = 1; + if (APIC_init_uniprocessor()) + printk("Local APIC not detected." + " Using dummy APIC emulation.\n"); + goto smp_done; + } + + /* + * Should not be necessary because the MP table should list the boot + * CPU too, but we do it for the sake of robustness anyway. + */ + if (!test_bit(boot_cpu_physical_apicid, &phys_cpu_present_map)) { + printk("weird, boot CPU (#%d) not listed by the BIOS.\n", + boot_cpu_physical_apicid); + phys_cpu_present_map |= (1 << hard_smp_processor_id()); + } + + /* + * If we couldn't find a local APIC, then get out of here now! + */ + if (APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid]) && + !test_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability)) { + printk("BIOS bug, local APIC #%d not detected!...\n", + boot_cpu_physical_apicid); + printk("... forcing use of dummy APIC emulation. (tell your hw vendor)\n"); + io_apic_irqs = 0; + cpu_online_map = phys_cpu_present_map = 1; + smp_num_cpus = 1; + goto smp_done; + } + + verify_local_APIC(); + + /* + * If SMP should be disabled, then really disable it! + */ + if (!max_cpus) { + smp_found_config = 0; + printk("SMP mode deactivated, forcing use of dummy APIC emulation.\n"); + io_apic_irqs = 0; + cpu_online_map = phys_cpu_present_map = 1; + smp_num_cpus = 1; + goto smp_done; + } + + connect_bsp_APIC(); + setup_local_APIC(); + + if (GET_APIC_ID(apic_read(APIC_ID)) != boot_cpu_physical_apicid) + BUG(); + + /* + * Scan the CPU present map and fire up the other CPUs via do_boot_cpu + * + * In clustered apic mode, phys_cpu_present_map is a constructed thus: + * bits 0-3 are quad0, 4-7 are quad1, etc. A perverse twist on the + * clustered apic ID. + */ + Dprintk("CPU present map: %lx\n", phys_cpu_present_map); + + for (bit = 0; bit < NR_CPUS; bit++) { + apicid = cpu_present_to_apicid(bit); + /* + * Don't even attempt to start the boot CPU! + */ + if (apicid == boot_cpu_apicid) + continue; + + if (!(phys_cpu_present_map & (1 << bit))) + continue; + if ((max_cpus >= 0) && (max_cpus <= cpucount+1)) + continue; + + do_boot_cpu(apicid); + + /* + * Make sure we unmap all failed CPUs + */ + if ((boot_apicid_to_cpu(apicid) == -1) && + (phys_cpu_present_map & (1 << bit))) + printk("CPU #%d not responding - cannot use it.\n", + apicid); + } + + /* + * Cleanup possible dangling ends... + */ + /* + * Install writable page 0 entry to set BIOS data area. + */ + local_flush_tlb(); + + /* + * Paranoid: Set warm reset code and vector here back + * to default values. + */ + CMOS_WRITE(0, 0xf); + + *((volatile long *) phys_to_virt(0x467)) = 0; + + if (!cpucount) { + printk("Error: only one processor found.\n"); + } else { + printk("Total of %d processors activated.\n", cpucount+1); + } + smp_num_cpus = cpucount + 1; + + if (smp_b_stepping) + printk("WARNING: SMP operation may" + " be unreliable with B stepping processors.\n"); + Dprintk("Boot done.\n"); + + /* + * Here we can be sure that there is an IO-APIC in the system. Let's + * go and set it up: + */ + if ( nr_ioapics ) setup_IO_APIC(); + + /* Set up all local APIC timers in the system. */ + setup_APIC_clocks(); + + /* Synchronize the TSC with the AP(s). */ + if ( cpucount ) synchronize_tsc_bp(); + + smp_done: + ; +} diff --git a/xen-2.4.16/arch/i386/time.c b/xen-2.4.16/arch/i386/time.c new file mode 100644 index 0000000000..0b7d3ead4e --- /dev/null +++ b/xen-2.4.16/arch/i386/time.c @@ -0,0 +1,143 @@ +/* + * linux/arch/i386/kernel/time.c + * + * Copyright (C) 1991, 1992, 1995 Linus Torvalds + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +unsigned long cpu_khz; /* Detected as we calibrate the TSC */ +unsigned long ticks_per_usec; /* TSC ticks per microsecond. */ + +spinlock_t rtc_lock = SPIN_LOCK_UNLOCKED; + +/* + * timer_interrupt() needs to keep up the real-time clock, + * as well as call the "do_timer()" routine every clocktick + */ +int timer_ack=0; +extern spinlock_t i8259A_lock; +static inline void do_timer_interrupt(int irq, + void *dev_id, struct pt_regs *regs) +{ +#ifdef CONFIG_X86_IO_APIC + if (timer_ack) { + /* + * Subtle, when I/O APICs are used we have to ack timer IRQ + * manually to reset the IRR bit for do_slow_gettimeoffset(). + * This will also deassert NMI lines for the watchdog if run + * on an 82489DX-based system. + */ + spin_lock(&i8259A_lock); + outb(0x0c, 0x20); + /* Ack the IRQ; AEOI will end it automatically. */ + inb(0x20); + spin_unlock(&i8259A_lock); + } +#endif + do_timer(regs); +} + +/* + * This is the same as the above, except we _also_ save the current + * Time Stamp Counter value at the time of the timer interrupt, so that + * we later on can estimate the time of day more exactly. + */ +static void timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) +{ + do_timer_interrupt(irq, NULL, regs); +} + +static struct irqaction irq0 = { timer_interrupt, SA_INTERRUPT, 0, "timer", NULL, NULL}; + +/* ------ Calibrate the TSC ------- + * Return processor ticks per second / CALIBRATE_FRAC. + */ + +#define CLOCK_TICK_RATE 1193180 /* system crystal frequency (Hz) */ +#define CALIBRATE_FRAC 20 /* calibrate over 50ms */ +#define CALIBRATE_LATCH ((CLOCK_TICK_RATE+(CALIBRATE_FRAC/2))/CALIBRATE_FRAC) + +static unsigned long __init calibrate_tsc(void) +{ + /* Set the Gate high, disable speaker */ + outb((inb(0x61) & ~0x02) | 0x01, 0x61); + + /* + * Now let's take care of CTC channel 2 + * + * Set the Gate high, program CTC channel 2 for mode 0, + * (interrupt on terminal count mode), binary count, + * load 5 * LATCH count, (LSB and MSB) to begin countdown. + */ + outb(0xb0, 0x43); /* binary, mode 0, LSB/MSB, Ch 2 */ + outb(CALIBRATE_LATCH & 0xff, 0x42); /* LSB of count */ + outb(CALIBRATE_LATCH >> 8, 0x42); /* MSB of count */ + + { + unsigned long startlow, starthigh; + unsigned long endlow, endhigh; + unsigned long count; + + rdtsc(startlow,starthigh); + count = 0; + do { + count++; + } while ((inb(0x61) & 0x20) == 0); + rdtsc(endlow,endhigh); + + /* Error: ECTCNEVERSET */ + if (count <= 1) + goto bad_ctc; + + /* 64-bit subtract - gcc just messes up with long longs */ + __asm__("subl %2,%0\n\t" + "sbbl %3,%1" + :"=a" (endlow), "=d" (endhigh) + :"g" (startlow), "g" (starthigh), + "0" (endlow), "1" (endhigh)); + + /* Error: ECPUTOOFAST */ + if (endhigh) + goto bad_ctc; + + return endlow; + } + + /* + * The CTC wasn't reliable: we got a hit on the very first read, or the + * CPU was so fast/slow that the quotient wouldn't fit in 32 bits.. + */ + bad_ctc: + return 0; +} + +void __init time_init(void) +{ + unsigned long ticks_per_frac = calibrate_tsc(); + + if ( !ticks_per_frac ) + panic("Error calibrating TSC\n"); + + ticks_per_usec = ticks_per_frac / (1000000/CALIBRATE_FRAC); + cpu_khz = ticks_per_frac / (1000/CALIBRATE_FRAC); + + printk("Detected %lu.%03lu MHz processor.\n", + cpu_khz / 1000, cpu_khz % 1000); + + setup_irq(0, &irq0); +} diff --git a/xen-2.4.16/arch/i386/trampoline.S b/xen-2.4.16/arch/i386/trampoline.S new file mode 100644 index 0000000000..f0beef725a --- /dev/null +++ b/xen-2.4.16/arch/i386/trampoline.S @@ -0,0 +1,54 @@ +/* + * + * Trampoline.S Derived from Setup.S by Linus Torvalds + * + * 4 Jan 1997 Michael Chastain: changed to gnu as. + * + * Entry: CS:IP point to the start of our code, we are + * in real mode with no stack, but the rest of the + * trampoline page to make our stack and everything else + * is a mystery. + * + * On entry to trampoline_data, the processor is in real mode + * with 16-bit addressing and 16-bit data. CS has some value + * and IP is zero. Thus, data addresses need to be absolute + * (no relocation) and are taken with regard to r_base. + */ + +#include +#include + +.data + +.code16 + +ENTRY(trampoline_data) +r_base = . + mov %cs, %ax # Code and data in the same place + mov %ax, %ds + + movl $0xA5A5A5A5, %ebx # Flag an SMP trampoline + cli # We should be safe anyway + + movl $0xA5A5A5A5, trampoline_data - r_base + + lidt idt_48 - r_base # load idt with 0, 0 + lgdt gdt_48 - r_base # load gdt with whatever is appropriate + + xor %ax, %ax + inc %ax # protected mode (PE) bit + lmsw %ax # into protected mode + jmp flush_instr +flush_instr: + ljmpl $__HYPERVISOR_CS, $(MONITOR_BASE)-__PAGE_OFFSET + +idt_48: + .word 0 # idt limit = 0 + .word 0, 0 # idt base = 0L + +gdt_48: + .word 0x0800 # gdt limit = 2048, 256 GDT entries + .long gdt_table-__PAGE_OFFSET # gdt base = gdt (first SMP CPU) + +.globl SYMBOL_NAME(trampoline_end) +SYMBOL_NAME_LABEL(trampoline_end) diff --git a/xen-2.4.16/arch/i386/traps.c b/xen-2.4.16/arch/i386/traps.c new file mode 100644 index 0000000000..e84690ad1a --- /dev/null +++ b/xen-2.4.16/arch/i386/traps.c @@ -0,0 +1,595 @@ +/* + * linux/arch/i386/traps.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + * + * Pentium III FXSR, SSE support + * Gareth Hughes , May 2000 + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define GTBF_TRAP 1 +#define GTBF_TRAP_NOCODE 2 +#define GTBF_TRAP_CR2 4 +struct guest_trap_bounce { + unsigned long error_code; /* 0 */ + unsigned long cr2; /* 4 */ + unsigned short flags; /* 8 */ + unsigned short cs; /* 10 */ + unsigned long eip; /* 12 */ +} guest_trap_bounce[NR_CPUS] = { { 0 } }; + +asmlinkage int hypervisor_call(void); +asmlinkage void lcall7(void); +asmlinkage void lcall27(void); + +/* + * The IDT has to be page-aligned to simplify the Pentium + * F0 0F bug workaround.. We have a special link segment + * for this. + */ +struct desc_struct idt_table[256] __attribute__((__section__(".data.idt"))) = { {0, 0}, }; + +asmlinkage void divide_error(void); +asmlinkage void debug(void); +asmlinkage void nmi(void); +asmlinkage void int3(void); +asmlinkage void overflow(void); +asmlinkage void bounds(void); +asmlinkage void invalid_op(void); +asmlinkage void device_not_available(void); +asmlinkage void double_fault(void); +asmlinkage void coprocessor_segment_overrun(void); +asmlinkage void invalid_TSS(void); +asmlinkage void segment_not_present(void); +asmlinkage void stack_segment(void); +asmlinkage void general_protection(void); +asmlinkage void page_fault(void); +asmlinkage void coprocessor_error(void); +asmlinkage void simd_coprocessor_error(void); +asmlinkage void alignment_check(void); +asmlinkage void spurious_interrupt_bug(void); +asmlinkage void machine_check(void); + +int kstack_depth_to_print = 24; + +static inline int kernel_text_address(unsigned long addr) +{ + return ( 1 ); +} + +void show_trace(unsigned long * stack) +{ + int i; + unsigned long addr; + + if (!stack) + stack = (unsigned long*)&stack; + + printk("Call Trace: "); + i = 1; + while (((long) stack & (THREAD_SIZE-1)) != 0) { + addr = *stack++; + if (kernel_text_address(addr)) { + if (i && ((i % 6) == 0)) + printk("\n "); + printk("[<%08lx>] ", addr); + i++; + } + } + printk("\n"); +} + +void show_trace_task(struct task_struct *tsk) +{ + unsigned long esp = tsk->thread.esp; + + /* User space on another CPU? */ + if ((esp ^ (unsigned long)tsk) & (PAGE_MASK<<1)) + return; + show_trace((unsigned long *)esp); +} + +void show_stack(unsigned long * esp) +{ + unsigned long *stack; + int i; + + // debugging aid: "show_stack(NULL);" prints the + // back trace for this cpu. + + if(esp==NULL) + esp=(unsigned long*)&esp; + + stack = esp; + for(i=0; i < kstack_depth_to_print; i++) { + if (((long) stack & (THREAD_SIZE-1)) == 0) + break; + if (i && ((i % 8) == 0)) + printk("\n "); + printk("%08lx ", *stack++); + } + printk("\n"); + show_trace(esp); +} + +void show_registers(struct pt_regs *regs) +{ + unsigned long esp; + unsigned short ss; + + esp = (unsigned long) (®s->esp); + ss = __HYPERVISOR_DS; + if ( regs->xcs & 3 ) + { + esp = regs->esp; + ss = regs->xss & 0xffff; + } + + printk("CPU: %d\nEIP: %04x:[<%08lx>] \nEFLAGS: %08lx\n", + smp_processor_id(), 0xffff & regs->xcs, regs->eip, regs->eflags); + printk("eax: %08lx ebx: %08lx ecx: %08lx edx: %08lx\n", + regs->eax, regs->ebx, regs->ecx, regs->edx); + printk("esi: %08lx edi: %08lx ebp: %08lx esp: %08lx\n", + regs->esi, regs->edi, regs->ebp, esp); + printk("ds: %04x es: %04x ss: %04x\n", + regs->xds & 0xffff, regs->xes & 0xffff, ss); +} + + +spinlock_t die_lock = SPIN_LOCK_UNLOCKED; + +void die(const char * str, struct pt_regs * regs, long err) +{ + spin_lock_irq(&die_lock); + printk("%s: %04lx,%04lx\n", str, err >> 16, err & 0xffff); + show_registers(regs); + spin_unlock_irq(&die_lock); + panic("HYPERVISOR DEATH!!\n"); +} + +static inline void die_if_kernel(const char * str, struct pt_regs * regs, long err) +{ + if (!(3 & regs->xcs)) die(str, regs, err); +} + +static void inline do_trap(int trapnr, char *str, + struct pt_regs * regs, + long error_code, int use_error_code) +{ + struct guest_trap_bounce *gtb = guest_trap_bounce+smp_processor_id(); + trap_info_t *ti; + unsigned long addr, fixup; + + if (!(regs->xcs & 3)) + goto fault_in_hypervisor; + + ti = current->thread.traps + trapnr; + if ( trapnr == 14 ) + { + /* page fault pushes %cr2 */ + gtb->flags = GTBF_TRAP_CR2; + __asm__ __volatile__ ("movl %%cr2,%0" : "=r" (gtb->cr2) : ); + } + else + { + gtb->flags = use_error_code ? GTBF_TRAP : GTBF_TRAP_NOCODE; + } + gtb->error_code = error_code; + gtb->cs = ti->cs; + gtb->eip = ti->address; + return; + + fault_in_hypervisor: + + if ( (fixup = search_exception_table(regs->eip)) != 0 ) + { + regs->eip = fixup; + return; + } + + __asm__ __volatile__ ("movl %%cr2,%0" : "=r" (addr) : ); + + if ( trapnr == 14 ) + { + unsigned long page; + __asm__ __volatile__ ("movl %%cr3,%0" : "=r" (page) : ); + printk(" pde = %08lx\n", page); + page = ((unsigned long *) __va(page))[addr >> 22]; + printk("*pde = %08lx\n", page); + if ( page & _PAGE_PRESENT ) + { + page &= PAGE_MASK; + page = ((unsigned long *) __va(page))[(addr&0x3ff000)>>PAGE_SHIFT]; + printk(" *pte = %08lx\n", page); + } + } + + show_registers(regs); + panic("CPU%d FATAL TRAP: vector = %d (%s)\n" + "[error_code=%08x]\n" + "Faulting linear address might be %08lx\n", + smp_processor_id(), trapnr, str, + error_code, addr); +} + +#define DO_ERROR_NOCODE(trapnr, str, name) \ +asmlinkage void do_##name(struct pt_regs * regs, long error_code) \ +{ \ +do_trap(trapnr, str, regs, error_code, 0); \ +} + +#define DO_ERROR(trapnr, str, name) \ +asmlinkage void do_##name(struct pt_regs * regs, long error_code) \ +{ \ +do_trap(trapnr, str, regs, error_code, 1); \ +} + +DO_ERROR_NOCODE( 0, "divide error", divide_error) +DO_ERROR_NOCODE( 3, "int3", int3) +DO_ERROR_NOCODE( 4, "overflow", overflow) +DO_ERROR_NOCODE( 5, "bounds", bounds) +DO_ERROR_NOCODE( 6, "invalid operand", invalid_op) +DO_ERROR_NOCODE( 7, "device not available", device_not_available) +DO_ERROR( 8, "double fault", double_fault) +DO_ERROR_NOCODE( 9, "coprocessor segment overrun", coprocessor_segment_overrun) +DO_ERROR(10, "invalid TSS", invalid_TSS) +DO_ERROR(11, "segment not present", segment_not_present) +DO_ERROR(12, "stack segment", stack_segment) +DO_ERROR(14, "page fault", page_fault) +/* Vector 15 reserved by Intel */ +DO_ERROR_NOCODE(16, "fpu error", coprocessor_error) +DO_ERROR(17, "alignment check", alignment_check) +DO_ERROR_NOCODE(18, "machine check", machine_check) +DO_ERROR_NOCODE(19, "simd error", simd_coprocessor_error) + +asmlinkage void do_general_protection(struct pt_regs * regs, long error_code) +{ + struct guest_trap_bounce *gtb = guest_trap_bounce+smp_processor_id(); + trap_info_t *ti; + unsigned long fixup; + + /* Bad shit if error in ring 0, or result of an interrupt. */ + if (!(regs->xcs & 3) || (error_code & 1)) + goto gp_in_kernel; + + if ( (error_code & 2) ) + { + /* This fault must be due to instruction. */ + ti = current->thread.traps + (error_code>>3); + if ( ti->dpl >= (regs->xcs & 3) ) + { + gtb->flags = GTBF_TRAP_NOCODE; + gtb->cs = ti->cs; + gtb->eip = ti->address; + regs->eip += 2; + return; + } + } + + /* Pass on GPF as is. */ + ti = current->thread.traps + 13; + gtb->flags = GTBF_TRAP; + gtb->error_code = error_code; + gtb->cs = ti->cs; + gtb->eip = ti->address; + return; + + gp_in_kernel: + if ( (fixup = search_exception_table(regs->eip)) != 0 ) + { + regs->eip = fixup; + return; + } + + die("general protection fault", regs, error_code); +} + +static void mem_parity_error(unsigned char reason, struct pt_regs * regs) +{ + printk("Uhhuh. NMI received. Dazed and confused, but trying to continue\n"); + printk("You probably have a hardware problem with your RAM chips\n"); + + /* Clear and disable the memory parity error line. */ + reason = (reason & 0xf) | 4; + outb(reason, 0x61); +} + +static void io_check_error(unsigned char reason, struct pt_regs * regs) +{ + unsigned long i; + + printk("NMI: IOCK error (debug interrupt?)\n"); + show_registers(regs); + + /* Re-enable the IOCK line, wait for a few seconds */ + reason = (reason & 0xf) | 8; + outb(reason, 0x61); + i = 2000; + while (--i) udelay(1000); + reason &= ~8; + outb(reason, 0x61); +} + +static void unknown_nmi_error(unsigned char reason, struct pt_regs * regs) +{ + printk("Uhhuh. NMI received for unknown reason %02x.\n", reason); + printk("Dazed and confused, but trying to continue\n"); + printk("Do you have a strange power saving mode enabled?\n"); +} + +asmlinkage void do_nmi(struct pt_regs * regs, long error_code) +{ + unsigned char reason = inb(0x61); + + if (!(reason & 0xc0)) { + unknown_nmi_error(reason, regs); + return; + } + if (reason & 0x80) + mem_parity_error(reason, regs); + if (reason & 0x40) + io_check_error(reason, regs); + /* + * Reassert NMI in case it became active meanwhile + * as it's edge-triggered. + */ + outb(0x8f, 0x70); + inb(0x71); /* dummy */ + outb(0x0f, 0x70); + inb(0x71); /* dummy */ +} + +asmlinkage void math_state_restore(struct pt_regs *regs, long error_code) +{ + __asm__ __volatile__("clts"); + + if ( !(current->flags & PF_USEDFPU) ) + { + if ( current->flags & PF_DONEFPUINIT ) + restore_fpu(current); + else + init_fpu(); + current->flags |= PF_USEDFPU; /* So we fnsave on switch_to() */ + } + + if ( current->flags & PF_GUEST_STTS ) + { + struct guest_trap_bounce *gtb = guest_trap_bounce+smp_processor_id(); + gtb->flags = GTBF_TRAP_NOCODE; + gtb->cs = current->thread.traps[7].cs; + gtb->eip = current->thread.traps[7].address; + current->flags &= ~PF_GUEST_STTS; + } +} + + +/* + * Our handling of the processor debug registers is non-trivial. + * We do not clear them on entry and exit from the kernel. Therefore + * it is possible to get a watchpoint trap here from inside the kernel. + * However, the code in ./ptrace.c has ensured that the user can + * only set watchpoints on userspace addresses. Therefore the in-kernel + * watchpoint trap can only occur in code which is reading/writing + * from user space. Such code must not hold kernel locks (since it + * can equally take a page fault), therefore it is safe to call + * force_sig_info even though that claims and releases locks. + * + * Code in ./signal.c ensures that the debug control register + * is restored before we deliver any signal, and therefore that + * user code runs with the correct debug control register even though + * we clear it here. + * + * Being careful here means that we don't have to be as careful in a + * lot of more complicated places (task switching can be a bit lazy + * about restoring all the debug state, and ptrace doesn't have to + * find every occurrence of the TF bit that could be saved away even + * by user code) + */ +asmlinkage void do_debug(struct pt_regs * regs, long error_code) +{ + unsigned int condition; + struct task_struct *tsk = current; + + __asm__ __volatile__("movl %%db6,%0" : "=r" (condition)); + + /* Mask out spurious debug traps due to lazy DR7 setting */ + if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) { + if (!tsk->thread.debugreg[7]) + goto clear_dr7; + } + + /* Save debug status register where ptrace can see it */ + tsk->thread.debugreg[6] = condition; + + panic("trap up to OS here, pehaps\n"); + + /* Disable additional traps. They'll be re-enabled when + * the signal is delivered. + */ + clear_dr7: + __asm__("movl %0,%%db7" + : /* no output */ + : "r" (0)); +} + + +asmlinkage void do_spurious_interrupt_bug(struct pt_regs * regs, + long error_code) +{ /* nothing */ } + + +#define _set_gate(gate_addr,type,dpl,addr) \ +do { \ + int __d0, __d1; \ + __asm__ __volatile__ ("movw %%dx,%%ax\n\t" \ + "movw %4,%%dx\n\t" \ + "movl %%eax,%0\n\t" \ + "movl %%edx,%1" \ + :"=m" (*((long *) (gate_addr))), \ + "=m" (*(1+(long *) (gate_addr))), "=&a" (__d0), "=&d" (__d1) \ + :"i" ((short) (0x8000+(dpl<<13)+(type<<8))), \ + "3" ((char *) (addr)),"2" (__HYPERVISOR_CS << 16)); \ +} while (0) + + +/* + * This needs to use 'idt_table' rather than 'idt', and + * thus use the _nonmapped_ version of the IDT, as the + * Pentium F0 0F bugfix can have resulted in the mapped + * IDT being write-protected. + */ +void set_intr_gate(unsigned int n, void *addr) +{ + _set_gate(idt_table+n,14,0,addr); +} + +static void __init set_trap_gate(unsigned int n, void *addr) +{ + _set_gate(idt_table+n,15,0,addr); +} + +static void __init set_system_gate(unsigned int n, void *addr) +{ + _set_gate(idt_table+n,15,3,addr); +} + +static void __init set_call_gate(void *a, void *addr) +{ + _set_gate(a,12,3,addr); +} + +#define _set_seg_desc(gate_addr,type,dpl,base,limit) {\ + *((gate_addr)+1) = ((base) & 0xff000000) | \ + (((base) & 0x00ff0000)>>16) | \ + ((limit) & 0xf0000) | \ + ((dpl)<<13) | \ + (0x00408000) | \ + ((type)<<8); \ + *(gate_addr) = (((base) & 0x0000ffff)<<16) | \ + ((limit) & 0x0ffff); } + +#define _set_tssldt_desc(n,addr,limit,type) \ +__asm__ __volatile__ ("movw %w3,0(%2)\n\t" \ + "movw %%ax,2(%2)\n\t" \ + "rorl $16,%%eax\n\t" \ + "movb %%al,4(%2)\n\t" \ + "movb %4,5(%2)\n\t" \ + "movb $0,6(%2)\n\t" \ + "movb %%ah,7(%2)\n\t" \ + "rorl $16,%%eax" \ + : "=m"(*(n)) : "a" (addr), "r"(n), "ir"(limit), "i"(type)) + +void set_tss_desc(unsigned int n, void *addr) +{ + _set_tssldt_desc(gdt_table+__TSS(n), (int)addr, 235, 0x89); +} + +void set_ldt_desc(unsigned int n, void *addr, unsigned int size) +{ + _set_tssldt_desc(gdt_table+__LDT(n), (int)addr, ((size << 3)-1), 0x82); +} + +void __init trap_init(void) +{ + set_trap_gate(0,÷_error); + set_trap_gate(1,&debug); + set_intr_gate(2,&nmi); + set_system_gate(3,&int3); /* int3-5 can be called from all */ + set_system_gate(4,&overflow); + set_system_gate(5,&bounds); + set_trap_gate(6,&invalid_op); + set_trap_gate(7,&device_not_available); + set_trap_gate(8,&double_fault); + set_trap_gate(9,&coprocessor_segment_overrun); + set_trap_gate(10,&invalid_TSS); + set_trap_gate(11,&segment_not_present); + set_trap_gate(12,&stack_segment); + set_trap_gate(13,&general_protection); + set_intr_gate(14,&page_fault); + set_trap_gate(15,&spurious_interrupt_bug); + set_trap_gate(16,&coprocessor_error); + set_trap_gate(17,&alignment_check); + set_trap_gate(18,&machine_check); + set_trap_gate(19,&simd_coprocessor_error); + + /* + * Cunning trick to allow arbitrary "INT n" handling. + * + * 1. 3 <= N <= 5 is trivial, as these are intended to be explicit. + * + * 2. All others, we set gate DPL == 0. Any use of "INT n" will thus + * cause a GPF with CS:EIP pointing at the faulting instruction. + * We can then peek at the instruction at check if it is of the + * form "0xCD ". If so, we fake out an exception to the + * guest OS. If the protected read page faults, we patch that up as + * a page fault to the guest OS. + * [NB. Of course we check the "soft DPL" to check that guest OS + * wants to handle a particular 'n'. If not, we pass the GPF up + * to the guest OS untouched.] + * + * 3. For efficiency, we may want to allow direct traps by the guest + * OS for certain critical vectors (eg. 0x80 in Linux). These must + * therefore not be mapped by hardware interrupts, and so we'd need + * a static list of them, which we add to on demand. + */ + + /* Only ring 1 can access monitor services. */ + _set_gate(idt_table+HYPERVISOR_CALL_VECTOR,15,1,&hypervisor_call); + + /* + * Should be a barrier for any external CPU state. + */ + { + extern void cpu_init(void); + cpu_init(); + } +} + + +long do_set_trap_table(trap_info_t *traps) +{ + trap_info_t cur; + trap_info_t *dst = current->thread.traps; + + memset(dst, 0, sizeof(*dst) * 256); + + for ( ; ; ) + { + if ( copy_from_user(&cur, traps, sizeof(cur)) ) return -EFAULT; + if ( (cur.cs & 3) == 0 ) return -EPERM; + if ( cur.address == 0 ) break; + memcpy(dst+cur.vector, &cur, sizeof(cur)); + traps++; + } + + return(0); +} + + +long do_fpu_taskswitch(void) +{ + current->flags |= PF_GUEST_STTS; + stts(); + return 0; +} diff --git a/xen-2.4.16/arch/i386/usercopy.c b/xen-2.4.16/arch/i386/usercopy.c new file mode 100644 index 0000000000..56322f1b56 --- /dev/null +++ b/xen-2.4.16/arch/i386/usercopy.c @@ -0,0 +1,190 @@ +/* + * User address space access functions. + * The non inlined parts of asm-i386/uaccess.h are here. + * + * Copyright 1997 Andi Kleen + * Copyright 1997 Linus Torvalds + */ +#include +#include +//#include + +#ifdef CONFIG_X86_USE_3DNOW_AND_WORKS + +unsigned long +__generic_copy_to_user(void *to, const void *from, unsigned long n) +{ + if (access_ok(VERIFY_WRITE, to, n)) + { + if(n<512) + __copy_user(to,from,n); + else + mmx_copy_user(to,from,n); + } + return n; +} + +unsigned long +__generic_copy_from_user(void *to, const void *from, unsigned long n) +{ + if (access_ok(VERIFY_READ, from, n)) + { + if(n<512) + __copy_user_zeroing(to,from,n); + else + mmx_copy_user_zeroing(to, from, n); + } + else + memset(to, 0, n); + return n; +} + +#else + +unsigned long +__generic_copy_to_user(void *to, const void *from, unsigned long n) +{ + prefetch(from); + if (access_ok(VERIFY_WRITE, to, n)) + __copy_user(to,from,n); + return n; +} + +unsigned long +__generic_copy_from_user(void *to, const void *from, unsigned long n) +{ + prefetchw(to); + if (access_ok(VERIFY_READ, from, n)) + __copy_user_zeroing(to,from,n); + else + memset(to, 0, n); + return n; +} + +#endif + +/* + * Copy a null terminated string from userspace. + */ + +#define __do_strncpy_from_user(dst,src,count,res) \ +do { \ + int __d0, __d1, __d2; \ + __asm__ __volatile__( \ + " testl %1,%1\n" \ + " jz 2f\n" \ + "0: lodsb\n" \ + " stosb\n" \ + " testb %%al,%%al\n" \ + " jz 1f\n" \ + " decl %1\n" \ + " jnz 0b\n" \ + "1: subl %1,%0\n" \ + "2:\n" \ + ".section .fixup,\"ax\"\n" \ + "3: movl %5,%0\n" \ + " jmp 2b\n" \ + ".previous\n" \ + ".section __ex_table,\"a\"\n" \ + " .align 4\n" \ + " .long 0b,3b\n" \ + ".previous" \ + : "=d"(res), "=c"(count), "=&a" (__d0), "=&S" (__d1), \ + "=&D" (__d2) \ + : "i"(-EFAULT), "0"(count), "1"(count), "3"(src), "4"(dst) \ + : "memory"); \ +} while (0) + +long +__strncpy_from_user(char *dst, const char *src, long count) +{ + long res; + __do_strncpy_from_user(dst, src, count, res); + return res; +} + +long +strncpy_from_user(char *dst, const char *src, long count) +{ + long res = -EFAULT; + if (access_ok(VERIFY_READ, src, 1)) + __do_strncpy_from_user(dst, src, count, res); + return res; +} + + +/* + * Zero Userspace + */ + +#define __do_clear_user(addr,size) \ +do { \ + int __d0; \ + __asm__ __volatile__( \ + "0: rep; stosl\n" \ + " movl %2,%0\n" \ + "1: rep; stosb\n" \ + "2:\n" \ + ".section .fixup,\"ax\"\n" \ + "3: lea 0(%2,%0,4),%0\n" \ + " jmp 2b\n" \ + ".previous\n" \ + ".section __ex_table,\"a\"\n" \ + " .align 4\n" \ + " .long 0b,3b\n" \ + " .long 1b,2b\n" \ + ".previous" \ + : "=&c"(size), "=&D" (__d0) \ + : "r"(size & 3), "0"(size / 4), "1"(addr), "a"(0)); \ +} while (0) + +unsigned long +clear_user(void *to, unsigned long n) +{ + if (access_ok(VERIFY_WRITE, to, n)) + __do_clear_user(to, n); + return n; +} + +unsigned long +__clear_user(void *to, unsigned long n) +{ + __do_clear_user(to, n); + return n; +} + +/* + * Return the size of a string (including the ending 0) + * + * Return 0 on exception, a value greater than N if too long + */ + +long strnlen_user(const char *s, long n) +{ + unsigned long mask = -__addr_ok(s); + unsigned long res, tmp; + + __asm__ __volatile__( + " testl %0, %0\n" + " jz 3f\n" + " andl %0,%%ecx\n" + "0: repne; scasb\n" + " setne %%al\n" + " subl %%ecx,%0\n" + " addl %0,%%eax\n" + "1:\n" + ".section .fixup,\"ax\"\n" + "2: xorl %%eax,%%eax\n" + " jmp 1b\n" + "3: movb $1,%%al\n" + " jmp 1b\n" + ".previous\n" + ".section __ex_table,\"a\"\n" + " .align 4\n" + " .long 0b,2b\n" + ".previous" + :"=r" (n), "=D" (s), "=a" (res), "=c" (tmp) + :"0" (n), "1" (s), "2" (0), "3" (mask) + :"cc"); + return res & mask; +} diff --git a/xen-2.4.16/arch/i386/xeno.lds b/xen-2.4.16/arch/i386/xeno.lds new file mode 100644 index 0000000000..3f784a7c5d --- /dev/null +++ b/xen-2.4.16/arch/i386/xeno.lds @@ -0,0 +1,87 @@ +/* ld script to make i386 Linux kernel + * Written by Martin Mares ; + */ +OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386") +OUTPUT_ARCH(i386) +ENTRY(start) +SECTIONS +{ + . = 0xE0000000 + 0x100000; + _text = .; /* Text and read-only data */ + .text : { + *(.text) + *(.fixup) + *(.gnu.warning) + } = 0x9090 + .text.lock : { *(.text.lock) } /* out-of-line lock text */ + + _etext = .; /* End of text section */ + + .rodata : { *(.rodata) *(.rodata.*) } + .kstrtab : { *(.kstrtab) } + + . = ALIGN(16); /* Exception table */ + __start___ex_table = .; + __ex_table : { *(__ex_table) } + __stop___ex_table = .; + + __start___ksymtab = .; /* Kernel symbol table */ + __ksymtab : { *(__ksymtab) } + __stop___ksymtab = .; + + __start___kallsyms = .; /* All kernel symbols */ + __kallsyms : { *(__kallsyms) } + __stop___kallsyms = .; + + .data : { /* Data */ + *(.data) + CONSTRUCTORS + } + + _edata = .; /* End of data section */ + + . = ALIGN(8192); /* init_task */ + .data.init_task : { *(.data.init_task) } + + . = ALIGN(4096); /* Init code and data */ + __init_begin = .; + .text.init : { *(.text.init) } + .data.init : { *(.data.init) } + . = ALIGN(16); + __setup_start = .; + .setup.init : { *(.setup.init) } + __setup_end = .; + __initcall_start = .; + .initcall.init : { *(.initcall.init) } + __initcall_end = .; + . = ALIGN(4096); + __init_end = .; + + . = ALIGN(4096); + .data.page_aligned : { *(.data.idt) } + + . = ALIGN(32); + .data.cacheline_aligned : { *(.data.cacheline_aligned) } + + __bss_start = .; /* BSS */ + .bss : { + *(.bss) + } + _end = . ; + + /* Sections to be discarded */ + /DISCARD/ : { + *(.text.exit) + *(.data.exit) + *(.exitcall.exit) + } + + /* Stabs debugging sections. */ + .stab 0 : { *(.stab) } + .stabstr 0 : { *(.stabstr) } + .stab.excl 0 : { *(.stab.excl) } + .stab.exclstr 0 : { *(.stab.exclstr) } + .stab.index 0 : { *(.stab.index) } + .stab.indexstr 0 : { *(.stab.indexstr) } + .comment 0 : { *(.comment) } +} diff --git a/xen-2.4.16/common/Makefile b/xen-2.4.16/common/Makefile new file mode 100644 index 0000000000..12f1f7d2e9 --- /dev/null +++ b/xen-2.4.16/common/Makefile @@ -0,0 +1,8 @@ + +include $(BASEDIR)/Rules.mk + +default: $(OBJS) + $(LD) -r -o common.o $(OBJS) + +clean: + rm -f *.o *~ core diff --git a/xen-2.4.16/common/block.c b/xen-2.4.16/common/block.c new file mode 100644 index 0000000000..6ccdb107ed --- /dev/null +++ b/xen-2.4.16/common/block.c @@ -0,0 +1,21 @@ +/* block.c + * + * ring data structures for buffering messages between hypervisor and + * guestos's. + * + */ + +#include +#include + +/* + * create_block_ring + * + * domain: + * + * allocates space for a particular domain's block io ring. + */ +blk_ring_t *create_block_ring(int domain) +{ + printk ("XEN create block ring"); +} diff --git a/xen-2.4.16/common/brlock.c b/xen-2.4.16/common/brlock.c new file mode 100644 index 0000000000..e2bccec6a7 --- /dev/null +++ b/xen-2.4.16/common/brlock.c @@ -0,0 +1,69 @@ +/* + * + * linux/lib/brlock.c + * + * 'Big Reader' read-write spinlocks. See linux/brlock.h for details. + * + * Copyright 2000, Ingo Molnar + * Copyright 2000, David S. Miller + */ + +#include + +#ifdef CONFIG_SMP + +#include +#include + +#ifdef __BRLOCK_USE_ATOMICS + +brlock_read_lock_t __brlock_array[NR_CPUS][__BR_IDX_MAX] = + { [0 ... NR_CPUS-1] = { [0 ... __BR_IDX_MAX-1] = RW_LOCK_UNLOCKED } }; + +void __br_write_lock (enum brlock_indices idx) +{ + int i; + + for (i = 0; i < smp_num_cpus; i++) + write_lock(&__brlock_array[cpu_logical_map(i)][idx]); +} + +void __br_write_unlock (enum brlock_indices idx) +{ + int i; + + for (i = 0; i < smp_num_cpus; i++) + write_unlock(&__brlock_array[cpu_logical_map(i)][idx]); +} + +#else /* ! __BRLOCK_USE_ATOMICS */ + +brlock_read_lock_t __brlock_array[NR_CPUS][__BR_IDX_MAX] = + { [0 ... NR_CPUS-1] = { [0 ... __BR_IDX_MAX-1] = 0 } }; + +struct br_wrlock __br_write_locks[__BR_IDX_MAX] = + { [0 ... __BR_IDX_MAX-1] = { SPIN_LOCK_UNLOCKED } }; + +void __br_write_lock (enum brlock_indices idx) +{ + int i; + +again: + spin_lock(&__br_write_locks[idx].lock); + for (i = 0; i < smp_num_cpus; i++) + if (__brlock_array[cpu_logical_map(i)][idx] != 0) { + spin_unlock(&__br_write_locks[idx].lock); + barrier(); + cpu_relax(); + goto again; + } +} + +void __br_write_unlock (enum brlock_indices idx) +{ + spin_unlock(&__br_write_locks[idx].lock); +} + +#endif /* __BRLOCK_USE_ATOMICS */ + +#endif /* CONFIG_SMP */ diff --git a/xen-2.4.16/common/dom0_ops.c b/xen-2.4.16/common/dom0_ops.c new file mode 100644 index 0000000000..ee0984a360 --- /dev/null +++ b/xen-2.4.16/common/dom0_ops.c @@ -0,0 +1,90 @@ +/****************************************************************************** + * dom0_ops.c + * + * Process command requests from domain-0 guest OS. + * + * Copyright (c) 2002, K A Fraser + */ + +#include +#include +#include +#include +#include +#include + + +static unsigned int get_domnr(void) +{ + struct task_struct *p = &idle0_task; + unsigned long dom_mask = 0; + read_lock_irq(&tasklist_lock); + do { + if ( is_idle_task(p) ) continue; + set_bit(p->domain, &dom_mask); + } + while ( (p = p->next_task) != &idle0_task ); + read_unlock_irq(&tasklist_lock); + return (dom_mask == ~0UL) ? 0 : ffz(dom_mask); +} + +long do_dom0_op(dom0_op_t *u_dom0_op) +{ + long ret = 0; + dom0_op_t op; + + if ( current->domain != 0 ) + return -EPERM; + + if ( copy_from_user(&op, u_dom0_op, sizeof(op)) ) + return -EFAULT; + + switch ( op.cmd ) + { + + case DOM0_NEWDOMAIN: + { + struct task_struct *p; + static unsigned int pro = 0; + unsigned int dom = get_domnr(); + ret = -ENOMEM; + if ( !dom ) break; + p = do_newdomain(); + if ( !p ) break; + p->domain = dom; + pro = (pro+1) % smp_num_cpus; + p->processor = pro; + ret = setup_guestos(p, &op.u.newdomain); /* Load guest OS into @p */ + if ( ret != 0 ) + { + p->state = TASK_DYING; + release_task(p); + break; + } + wake_up(p); /* Put @p on runqueue */ + reschedule(p); /* Force a scheduling decision on @p's CPU */ + ret = p->domain; + } + break; + + case DOM0_KILLDOMAIN: + { + unsigned int dom = op.u.killdomain.domain; + if ( dom == IDLE_DOMAIN_ID ) + { + ret = -EPERM; + } + else + { + ret = kill_other_domain(dom); + } + } + break; + + default: + ret = -ENOSYS; + + } + + return ret; +} diff --git a/xen-2.4.16/common/domain.c b/xen-2.4.16/common/domain.c new file mode 100644 index 0000000000..784b4e46e6 --- /dev/null +++ b/xen-2.4.16/common/domain.c @@ -0,0 +1,654 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +rwlock_t tasklist_lock __cacheline_aligned = RW_LOCK_UNLOCKED; + +schedule_data_t schedule_data[NR_CPUS]; + +int wake_up(struct task_struct *p) +{ + unsigned long flags; + int ret = 0; + spin_lock_irqsave(&schedule_data[p->processor].lock, flags); + if ( __task_on_runqueue(p) ) goto out; + p->state = TASK_RUNNING; + __add_to_runqueue(p); + ret = 1; + + out: + spin_unlock_irqrestore(&schedule_data[p->processor].lock, flags); + return ret; +} + + +struct task_struct *do_newdomain(void) +{ + int retval; + struct task_struct *p = NULL; + + retval = -ENOMEM; + p = alloc_task_struct(); + if (!p) goto newdomain_out; + memset(p, 0, sizeof(*p)); + p->shared_info = (void *)get_free_page(GFP_KERNEL); + memset(p->shared_info, 0, sizeof(shared_info_t)); + + p->addr_limit = USER_DS; + p->state = TASK_UNINTERRUPTIBLE; + p->active_mm = &p->mm; + p->num_net_vifs = 0; + + /* + * KAF: Passing in newdomain struct to this function is gross! + * Therefore, for now we just allocate the single blk_ring + * before the multiople net_rings :-) + */ + p->blk_ring_base = (blk_ring_t *)(p->shared_info + 1); + p->net_ring_base = (net_ring_t *)(p->blk_ring_base + 1); + p->pg_head = p->pg_tail = p->tot_pages = 0; + write_lock_irq(&tasklist_lock); + SET_LINKS(p); + write_unlock_irq(&tasklist_lock); + + newdomain_out: + return(p); +} + + +void reschedule(struct task_struct *p) +{ + int cpu = p->processor; + struct task_struct *curr; + unsigned long flags; + + if ( p->has_cpu ) return; + + spin_lock_irqsave(&schedule_data[cpu].lock, flags); + curr = schedule_data[cpu].curr; + if ( is_idle_task(curr) ) + { + set_bit(_HYP_EVENT_NEED_RESCHED, &curr->hyp_events); + spin_unlock_irqrestore(&schedule_data[cpu].lock, flags); +#ifdef CONFIG_SMP + if ( cpu != smp_processor_id() ) smp_send_event_check_cpu(cpu); +#endif + } + else + { + spin_unlock_irqrestore(&schedule_data[cpu].lock, flags); + } +} + + +static void process_timeout(unsigned long __data) +{ + struct task_struct * p = (struct task_struct *) __data; + wake_up(p); +} + +long schedule_timeout(long timeout) +{ + struct timer_list timer; + unsigned long expire; + + switch (timeout) + { + case MAX_SCHEDULE_TIMEOUT: + /* + * These two special cases are useful to be comfortable in the caller. + * Nothing more. We could take MAX_SCHEDULE_TIMEOUT from one of the + * negative value but I' d like to return a valid offset (>=0) to allow + * the caller to do everything it want with the retval. + */ + schedule(); + goto out; + default: + /* + * Another bit of PARANOID. Note that the retval will be 0 since no + * piece of kernel is supposed to do a check for a negative retval of + * schedule_timeout() (since it should never happens anyway). You just + * have the printk() that will tell you if something is gone wrong and + * where. + */ + if (timeout < 0) + { + printk(KERN_ERR "schedule_timeout: wrong timeout " + "value %lx from %p\n", timeout, + __builtin_return_address(0)); + current->state = TASK_RUNNING; + goto out; + } + } + + expire = timeout + jiffies; + + init_timer(&timer); + timer.expires = expire; + timer.data = (unsigned long) current; + timer.function = process_timeout; + + add_timer(&timer); + schedule(); + del_timer_sync(&timer); + + timeout = expire - jiffies; + + out: + return timeout < 0 ? 0 : timeout; +} + + +long do_yield(void) +{ + current->state = TASK_INTERRUPTIBLE; + schedule(); + return 0; +} + +/* Get a pointer to the specified domain. Consider replacing this + * with a hash lookup later. + * + * Also, kill_other_domain should call this instead of scanning on its own. + */ +struct task_struct *find_domain_by_id(unsigned int dom) +{ + struct task_struct *p = &idle0_task; + + read_lock_irq(&tasklist_lock); + do { + if ( (p->domain == dom) ) { + read_unlock_irq(&tasklist_lock); + return (p); + } + } while ( (p = p->next_task) != &idle0_task ); + read_unlock_irq(&tasklist_lock); + + return 0; +} + + +void kill_domain_with_errmsg(const char *err) +{ + printk("DOM%d FATAL ERROR: %s\n", + current->domain, err); + kill_domain(); +} + + +/* Kill the currently executing domain. */ +void kill_domain(void) +{ + if ( current->domain == 0 ) + { + extern void machine_restart(char *); + printk("Domain 0 killed: rebooting machine!\n"); + machine_restart(0); + } + + printk("Killing domain %d\n", current->domain); + current->state = TASK_DYING; + schedule(); + BUG(); /* never get here */ +} + + +long kill_other_domain(unsigned int dom) +{ + struct task_struct *p = &idle0_task; + unsigned long cpu_mask = 0; + long ret = -ESRCH; + + read_lock_irq(&tasklist_lock); + do { + if ( p->domain == dom ) + { + cpu_mask = mark_guest_event(p, _EVENT_DIE); + ret = 0; + break; + } + } + while ( (p = p->next_task) != &idle0_task ); + read_unlock_irq(&tasklist_lock); + + hyp_event_notify(cpu_mask); + + return ret; +} + + +/* Release resources belonging to task @p. */ +void release_task(struct task_struct *p) +{ + ASSERT(!__task_on_runqueue(p)); + ASSERT(p->state == TASK_DYING); + ASSERT(!p->has_cpu); + write_lock_irq(&tasklist_lock); + REMOVE_LINKS(p); + write_unlock_irq(&tasklist_lock); + + /* + * Safe! Only queue skbuffs with tasklist_lock held. + * Only access shared_info with tasklist_lock held. + * And free_task_struct() only releases if refcnt == 0. + */ + while ( p->num_net_vifs ) + { + destroy_net_vif(p); + } + free_page((unsigned long)p->shared_info); + free_task_struct(p); +} + + +asmlinkage void schedule(void) +{ + struct task_struct *prev, *next, *p; + struct list_head *tmp; + int this_cpu; + + need_resched_back: + prev = current; + this_cpu = prev->processor; + + spin_lock_irq(&schedule_data[this_cpu].lock); + + ASSERT(!in_interrupt()); + ASSERT(__task_on_runqueue(prev)); + + if ( !prev->counter ) + { + prev->counter = 2; + __move_last_runqueue(prev); + } + + switch ( prev->state ) + { + case TASK_INTERRUPTIBLE: + if ( signal_pending(prev) ) + { + prev->state = TASK_RUNNING; + break; + } + default: + __del_from_runqueue(prev); + case TASK_RUNNING:; + } + clear_bit(_HYP_EVENT_NEED_RESCHED, &prev->hyp_events); + + next = NULL; + list_for_each(tmp, &schedule_data[smp_processor_id()].runqueue) { + p = list_entry(tmp, struct task_struct, run_list); + next = p; + break; + } + + prev->has_cpu = 0; + next->has_cpu = 1; + + schedule_data[this_cpu].prev = prev; + schedule_data[this_cpu].curr = next; + + spin_unlock_irq(&schedule_data[this_cpu].lock); + + if ( unlikely(prev == next) ) + { + /* We won't go through the normal tail, so do this by hand */ + prev->policy &= ~SCHED_YIELD; + goto same_process; + } + + prepare_to_switch(); + switch_to(prev, next); + prev = schedule_data[this_cpu].prev; + + prev->policy &= ~SCHED_YIELD; + if ( prev->state == TASK_DYING ) release_task(prev); + + same_process: + if ( test_bit(_HYP_EVENT_NEED_RESCHED, ¤t->hyp_events) ) + goto need_resched_back; + return; +} + + +static unsigned int alloc_new_dom_mem(struct task_struct *p, unsigned int kbytes) +{ + + struct list_head *temp; + struct pfn_info *pf; + unsigned int alloc_pfns; + unsigned int req_pages; + + /* how many pages do we need to alloc? */ + req_pages = kbytes >> (PAGE_SHIFT - 10); + + /* is there enough mem to serve the request? */ + if(req_pages > free_pfns) + return -1; + + /* allocate pages and build a thread through frame_table */ + temp = free_list.next; + printk("bd240 debug: DOM%d requesting %d pages\n", p->domain, req_pages); + for(alloc_pfns = req_pages; alloc_pfns; alloc_pfns--){ + pf = list_entry(temp, struct pfn_info, list); + pf->flags |= p->domain; + temp = temp->next; + list_del(&pf->list); + + if(p->pg_tail){ + pf->next = p->pg_tail; + (frame_table + pf->next)->prev = p->pg_tail = (pf - frame_table); + } else { + p->pg_head = (pf - frame_table); + p->pg_tail = p->pg_head; + pf->next = 0; + pf->prev = 0; + } + pf->prev = 0; + + free_pfns--; + } + + p->tot_pages = req_pages; + + return 0; +} + +/* + * Initial load map: + * start_address: + * OS image + * .... + * stack_start: + * start_info: + * + * page tables: + * + * end_address: + * shared_info: + * + */ +#define MB_PER_DOMAIN 16 +#include +#include +extern int nr_mods; +extern module_t *mod; +extern unsigned char *cmdline; +int setup_guestos(struct task_struct *p, dom0_newdomain_t *params) +{ +#define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_USER|_PAGE_ACCESSED) +#define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_USER|_PAGE_ACCESSED|_PAGE_DIRTY) +#define ALLOC_PAGE_FROM_DOMAIN() \ + ({ alloc_address -= PAGE_SIZE; __va(alloc_address); }) + char *src, *dst; + int i, dom = p->domain; + unsigned long start_address = MAX_MONITOR_ADDRESS; + unsigned long cur_address, end_address, alloc_address, vaddr; + unsigned long virt_load_address, virt_stack_address, virt_shinfo_address; + unsigned long virt_ftable_start_addr = 0, virt_ftable_end_addr; + unsigned int ft_size = 0; + start_info_t *virt_startinfo_address; + unsigned long long time; + l2_pgentry_t *l2tab; + l1_pgentry_t *l1tab = NULL; + struct pfn_info *page = NULL; + net_ring_t *net_ring; + blk_ring_t *blk_ring; + + if ( strncmp(__va(mod[0].mod_start), "XenoGues", 8) ) + { + printk("DOM%d: Invalid guest OS image\n", dom); + return -1; + } + + virt_load_address = *(unsigned long *)__va(mod[0].mod_start + 8); + if ( (virt_load_address & (PAGE_SIZE-1)) ) + { + printk("DOM%d: Guest OS load address not page-aligned (%08lx)\n", + dom, virt_load_address); + return -1; + } + + if ( alloc_new_dom_mem(p, params->memory_kb) ) return -ENOMEM; + + /* temporary, *_address have to be reimplemented in another way + * as we can no longer expect contiguous addr space + */ + start_address = p->pg_head << PAGE_SHIFT; + alloc_address = end_address = start_address + (p->tot_pages << PAGE_SHIFT); + + /* start_address += (dom * MB_PER_DOMAIN) << 20; */ /* MB -> bytes */ + /* alloc_address = end_address = start_address + (MB_PER_DOMAIN << 20); */ + + if ( (mod[nr_mods-1].mod_end-mod[0].mod_start) > + ((end_address-start_address)>>1) ) + { + printk("DOM%d: Guest OS image is too large\n" + " (%luMB is greater than %luMB limit for a\n" + " %luMB address space)\n", + dom, (mod[nr_mods-1].mod_end-mod[0].mod_start)>>20, + (end_address-start_address)>>21, + (end_address-start_address)>>20); + /* XXX Should release memory from alloc_new_dom_mem here XXX */ + return -1; + } + + /* Set up initial mappings. */ + printk("DOM%d: Mapping physmem %08lx -> %08lx (%luMB)\n", dom, + start_address, end_address, (end_address-start_address)>>20); + printk("DOM%d: Guest OS virtual load address is %08lx\n", dom, + virt_load_address); + + l2tab = (l2_pgentry_t *)ALLOC_PAGE_FROM_DOMAIN(); + memcpy(l2tab, idle0_pg_table, sizeof(idle0_pg_table)); + memset(l2tab, 0, DOMAIN_ENTRIES_PER_L2_PAGETABLE*sizeof(l2_pgentry_t)); + p->mm.pagetable = mk_pagetable((unsigned long)l2tab); + + /* + * NB. The upper limit on this loop does one extra page. This is to + * make sure a pte exists when we want to map the shared_info struct. + */ + + /* bd240: not only one extra page but one + num of pages required for + * frame_table if domain 0 is in question. this ugly for loop + * condition is going to change once domain building is moved out + * of hypervisor. + */ + + if(dom == 0) + ft_size = frame_table_size; + + l2tab = pagetable_ptr(p->mm.pagetable) + + l2_table_offset(virt_load_address); + for ( cur_address = start_address; + cur_address != (end_address + PAGE_SIZE + ft_size); + cur_address += PAGE_SIZE ) + { + if ( !((unsigned long)l1tab & (PAGE_SIZE-1)) ) + { + l1tab = (l1_pgentry_t *)ALLOC_PAGE_FROM_DOMAIN(); + clear_page(l1tab); + l1tab += l1_table_offset( + virt_load_address + cur_address - start_address); + *l2tab++ = mk_l2_pgentry(__pa(l1tab)|L2_PROT); + } + *l1tab++ = mk_l1_pgentry(cur_address|L1_PROT); + + /* New domain doesn't own shared_info page, or frame_table. */ + if ( cur_address < end_address ) + { + page = frame_table + (cur_address >> PAGE_SHIFT); + page->flags = dom | PGT_writeable_page; + page->type_count = page->tot_count = 1; + } + } + + /* Pages that are part of page tables must be read-only. */ + vaddr = virt_load_address + alloc_address - start_address; + l2tab = pagetable_ptr(p->mm.pagetable) + l2_table_offset(vaddr); + l1tab = l2_pgentry_to_l1(*l2tab++) + l1_table_offset(vaddr); + for ( cur_address = alloc_address; + cur_address != end_address; + cur_address += PAGE_SIZE ) + { + *l1tab++ = mk_l1_pgentry(l1_pgentry_val(*l1tab) & ~_PAGE_RW); + if ( !((unsigned long)l1tab & (PAGE_SIZE-1)) ) + l1tab = l2_pgentry_to_l1(*l2tab++); + page = frame_table + (cur_address >> PAGE_SHIFT); + page->flags = dom | PGT_l1_page_table; + page->tot_count++; + } + page->flags = dom | PGT_l2_page_table; + + /* Map in the the shared info structure. */ + virt_shinfo_address = end_address - start_address + virt_load_address; + l2tab = pagetable_ptr(p->mm.pagetable) + + l2_table_offset(virt_shinfo_address); + l1tab = l2_pgentry_to_l1(*l2tab) + + l1_table_offset(virt_shinfo_address); + *l1tab = mk_l1_pgentry(__pa(p->shared_info)|L1_PROT); + + /* Set up shared info area. */ + rdtscll(time); + p->shared_info->wall_time = time; + p->shared_info->domain_time = time; + p->shared_info->ticks_per_ms = ticks_per_usec * 1000; + + /* for DOM0, setup mapping of frame table */ + if ( dom == 0 ) + { + virt_ftable_start_addr = virt_load_address + virt_shinfo_address + PAGE_SIZE; + virt_ftable_end_addr = virt_ftable_start_addr + frame_table_size; + for(cur_address = virt_ftable_start_addr; + cur_address < virt_ftable_end_addr; + cur_address += PAGE_SIZE) + { + l2tab = pagetable_ptr(p->mm.pagetable) + l2_table_offset(cur_address); + l1tab = l2_pgentry_to_l1(*l2tab) + l1_table_offset(cur_address); + *l1tab = mk_l1_pgentry(__pa(cur_address)|L1_PROT); + } + } + + virt_startinfo_address = (start_info_t *) + (alloc_address - start_address - PAGE_SIZE + virt_load_address); + virt_stack_address = (unsigned long)virt_startinfo_address; + + /* Install the new page tables. */ + __cli(); + __asm__ __volatile__ ( + "mov %%eax,%%cr3" + : : "a" (__pa(pagetable_ptr(p->mm.pagetable)))); + + /* Copy the guest OS image. */ + src = (char *)__va(mod[0].mod_start + 12); + dst = (char *)virt_load_address; + while ( src < (char *)__va(mod[nr_mods-1].mod_end) ) *dst++ = *src++; + + /* Set up start info area. */ + memset(virt_startinfo_address, 0, sizeof(*virt_startinfo_address)); + virt_startinfo_address->nr_pages = (end_address-start_address)>>PAGE_SHIFT; + virt_startinfo_address->shared_info = + (shared_info_t *)virt_shinfo_address; + virt_startinfo_address->pt_base = + end_address - PAGE_SIZE - start_address + virt_load_address; + virt_startinfo_address->phys_base = start_address; + /* NB. Next field will be NULL if dom != 0. */ + virt_startinfo_address->frame_table = virt_ftable_start_addr; + + /* Add virtual network interfaces and point to them in startinfo. */ + while (params->num_vifs-- > 0) { + net_ring = create_net_vif(dom); + if (!net_ring) panic("no network ring!\n"); + } + virt_startinfo_address->net_rings = p->net_ring_base; + virt_startinfo_address->num_net_rings = p->num_net_vifs; + + /* Add block io interface */ + virt_startinfo_address->blk_ring = p->blk_ring_base; + + /* We tell OS about any modules we were given. */ + if ( nr_mods > 1 ) + { + virt_startinfo_address->mod_start = + (mod[1].mod_start-mod[0].mod_start-12) + virt_load_address; + virt_startinfo_address->mod_len = + mod[nr_mods-1].mod_end - mod[1].mod_start; + } + + dst = virt_startinfo_address->cmd_line; + if ( mod[0].string ) + { + char *modline = (char *)__va(mod[0].string); + for ( i = 0; i < 255; i++ ) + { + if ( modline[i] == '\0' ) break; + *dst++ = modline[i]; + } + } + *dst = '\0'; + + if ( opt_nfsroot ) + { + unsigned char boot[150]; + unsigned char ipbase[20], nfsserv[20], gateway[20], netmask[20]; + unsigned char nfsroot[70]; + snprintf(nfsroot, 70, opt_nfsroot, dom); + snprintf(boot, 200, + " root=/dev/nfs ip=%s:%s:%s:%s::eth0:off nfsroot=%s", + quad_to_str(opt_ipbase + dom, ipbase), + quad_to_str(opt_nfsserv, nfsserv), + quad_to_str(opt_gateway, gateway), + quad_to_str(opt_netmask, netmask), + nfsroot); + strcpy(dst, boot); + } + + /* Reinstate the caller's page tables. */ + __asm__ __volatile__ ( + "mov %%eax,%%cr3" + : : "a" (__pa(pagetable_ptr(current->mm.pagetable)))); + __sti(); + + new_thread(p, + (unsigned long)virt_load_address, + (unsigned long)virt_stack_address, + (unsigned long)virt_startinfo_address); + + return 0; +} + + +void __init domain_init(void) +{ + int i; + for ( i = 0; i < NR_CPUS; i++ ) + { + INIT_LIST_HEAD(&schedule_data[i].runqueue); + spin_lock_init(&schedule_data[i].lock); + schedule_data[i].prev = &idle0_task; + schedule_data[i].curr = &idle0_task; + } +} + + + +#if 0 + unsigned long s = (mod[ 0].mod_start + (PAGE_SIZE-1)) & PAGE_MASK; + unsigned long e = (mod[nr_mods-1].mod_end + (PAGE_SIZE-1)) & PAGE_MASK; + while ( s != e ) + { + free_pages((unsigned long)__va(s), 0); + s += PAGE_SIZE; + } +#endif + diff --git a/xen-2.4.16/common/event.c b/xen-2.4.16/common/event.c new file mode 100644 index 0000000000..6a81c63f8b --- /dev/null +++ b/xen-2.4.16/common/event.c @@ -0,0 +1,33 @@ +/****************************************************************************** + * event.c + * + * A nice interface for passing per-domain asynchronous events. + * These events are handled in the hypervisor, prior to return + * to the guest OS. + * + * Copyright (c) 2002, K A Fraser + */ + +#include +#include + +typedef void (*hyp_event_callback_fn_t)(void); + +extern void schedule(void); +extern void flush_rx_queue(void); + +/* Ordering must match definitions of _HYP_EVENT_* in xeno/sched.h */ +static hyp_event_callback_fn_t event_call_fn[] = +{ + schedule, + flush_rx_queue, + kill_domain +}; + +/* Handle outstanding events for the currently-executing domain. */ +void do_hyp_events(void) +{ + int nr; + while ( (nr = ffs(current->hyp_events)) != 0 ) + (event_call_fn[nr-1])(); +} diff --git a/xen-2.4.16/common/kernel.c b/xen-2.4.16/common/kernel.c new file mode 100644 index 0000000000..3e860e836b --- /dev/null +++ b/xen-2.4.16/common/kernel.c @@ -0,0 +1,358 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* VGA text definitions. */ +#define COLUMNS 80 +#define LINES 24 +#define ATTRIBUTE 7 +#define VIDEO __va(0xB8000) + +static int xpos, ypos; +static volatile unsigned char *video; + +spinlock_t console_lock = SPIN_LOCK_UNLOCKED; + +struct e820entry { + unsigned long addr_lo, addr_hi; /* start of memory segment */ + unsigned long size_lo, size_hi; /* size of memory segment */ + unsigned long type; /* type of memory segment */ +}; + +/* Used by domain.c:setup_guestos */ +int nr_mods; +module_t *mod; + +void init_serial(void); +void start_of_day(void); + +/* Command line options and variables. */ +unsigned long opt_ipbase=0, opt_nfsserv=0, opt_gateway=0, opt_netmask=0; +unsigned char opt_nfsroot[50]=""; +unsigned int opt_dom0_mem = 16000; /* default kbytes for DOM0 */ +enum { OPT_IP, OPT_STR, OPT_UINT }; +static struct { + unsigned char *name; + int type; + void *var; +} opts[] = { + { "ipbase", OPT_IP, &opt_ipbase }, + { "nfsserv", OPT_IP, &opt_nfsserv }, + { "gateway", OPT_IP, &opt_gateway }, + { "netmask", OPT_IP, &opt_netmask }, + { "nfsroot", OPT_STR, &opt_nfsroot }, + { "dom0_mem", OPT_UINT, &opt_dom0_mem }, + { NULL, 0, NULL } +}; + +void cmain (unsigned long magic, multiboot_info_t *mbi) +{ + struct task_struct *new_dom; + dom0_newdomain_t dom0_params; + unsigned long max_page, remaining_hypervisor_memory; + unsigned char *cmdline; + int i; + + init_serial(); + cls(); + + if ( magic != MULTIBOOT_BOOTLOADER_MAGIC ) + { + printf("Invalid magic number: 0x%x\n", (unsigned)magic); + return; + } + + /* + * We require some kind of memory and module information. + * The rest we can fake! + */ + if ( (mbi->flags & 9) != 9 ) + { + printf("Bad flags passed by bootloader: 0x%x\n", (unsigned)mbi->flags); + return; + } + + if ( mbi->mods_count == 0 ) + { + printf("Require at least one module!\n"); + return; + } + + /* Are mmap_* valid? */ +#if 0 + if ( (mbi->flags & (1<<6)) ) + { + memory_map_t *mmap = (memory_map_t *)mbi->mmap_addr; + struct e820entry *e820 = E820_MAP; + + while ( (unsigned long)mmap < (mbi->mmap_addr + mbi->mmap_length) ) + { + e820->addr_lo = mmap->base_addr_low; + e820->addr_hi = mmap->base_addr_high; + e820->size_lo = mmap->length_low; + e820->size_hi = mmap->length_high; + e820->type = mmap->type; + e820++; + mmap = (memory_map_t *) + ((unsigned long)mmap + mmap->size + sizeof (mmap->size)); + } + } +#endif + + nr_mods = mbi->mods_count; + mod = (module_t *)__va(mbi->mods_addr); + + /* Parse the command line. */ + cmdline = (unsigned char *)(mbi->cmdline ? __va(mbi->cmdline) : NULL); + if ( cmdline != NULL ) + { + unsigned char *opt_end, *opt; + while ( *cmdline == ' ' ) cmdline++; + cmdline = strchr(cmdline, ' '); + while ( cmdline != NULL ) + { + while ( *cmdline == ' ' ) cmdline++; + if ( (opt = strchr(cmdline, '=')) == NULL ) break; + *opt++ = '\0'; + opt_end = strchr(opt, ' '); + if ( opt_end != NULL ) *opt_end++ = '\0'; + for ( i = 0; opts[i].name != NULL; i++ ) + { + if ( strcmp(opts[i].name, cmdline ) == 0 ) + { + if ( opts[i].type == OPT_IP ) + { + *(unsigned long *)opts[i].var = str_to_quad(opt); + } + else if(opts[i].type == OPT_STR) + { + strcpy(opts[i].var, opt); + } + else /* opts[i].type == OPT_UINT */ + { + *(unsigned int *)opts[i].var = simple_strtol(opt, (char **)&opt, 10); + } + break; + } + } + cmdline = opt_end; + } + } + + memcpy(&idle0_task_union, &first_task_struct, sizeof(first_task_struct)); + + max_page = (mbi->mem_upper+1024) >> (PAGE_SHIFT - 10); + if ( max_page > (MAX_USABLE_ADDRESS >> PAGE_SHIFT) ) + max_page = MAX_USABLE_ADDRESS >> PAGE_SHIFT; + /* mem_upper is address of first memory hole in high memory, minus 1MB. */ + /* PS. mem_upper is in kB. */ + remaining_hypervisor_memory = init_frametable(max_page); + printk("Initialised %luMB of memory on a %luMB machine\n", + max_page >> (20-PAGE_SHIFT), (mbi->mem_upper>>10)+1); + + init_page_allocator(mod[nr_mods-1].mod_end, remaining_hypervisor_memory); + + /* These things will get done by do_newdomain() for all other tasks. */ + current->shared_info = (void *)get_free_page(GFP_KERNEL); + memset(current->shared_info, 0, sizeof(shared_info_t)); + set_fs(USER_DS); + current->num_net_vifs = 0; + + start_of_day(); + + /* Create initial domain 0. */ + dom0_params.num_vifs = 1; + dom0_params.memory_kb = opt_dom0_mem; + + new_dom = do_newdomain(); + if ( new_dom == NULL ) panic("Error creating domain 0\n"); + new_dom->processor = 0; + new_dom->domain = 0; + if ( setup_guestos(new_dom, &dom0_params) != 0 ) + { + panic("Could not set up DOM0 guest OS\n"); + } + wake_up(new_dom); + + cpu_idle(); +} + + +#define SERIAL_BASE 0x3f8 +#define RX_BUF 0 +#define TX_HOLD 0 +#define INT_ENABLE 1 +#define INT_IDENT 2 +#define DATA_FORMAT 3 +#define LINE_CTL 4 +#define LINE_STATUS 5 +#define LINE_IN 6 +#define DIVISOR_LO 0 +#define DIVISOR_HI 1 + +void init_serial(void) +{ + /* 9600 baud, no parity, 1 stop bit, 8 data bits. */ + outb(0x83, SERIAL_BASE+DATA_FORMAT); + outb(12, SERIAL_BASE+DIVISOR_LO); + outb(0, SERIAL_BASE+DIVISOR_HI); + outb(0x03, SERIAL_BASE+DATA_FORMAT); + + /* No interrupts. */ + outb(0x00, SERIAL_BASE+INT_ENABLE); +} + + +void putchar_serial(unsigned char c) +{ + if ( c == '\n' ) putchar_serial('\r'); + if ( (c != '\n') && (c != '\r') && ((c < 32) || (c > 126)) ) return; + while ( !(inb(SERIAL_BASE+LINE_STATUS)&(1<<5)) ) barrier(); + outb(c, SERIAL_BASE+TX_HOLD); +} + + +/* Clear the screen and initialize VIDEO, XPOS and YPOS. */ +void cls (void) +{ + int i; + + video = (unsigned char *) VIDEO; + + for (i = 0; i < COLUMNS * LINES * 2; i++) + *(video + i) = 0; + + xpos = 0; + ypos = 0; + + outw(10+(1<<(5+8)), 0x3d4); /* cursor off */ +} + + +/* Put the character C on the screen. */ +static void putchar (int c) +{ + static char zeroarr[2*COLUMNS] = { 0 }; + + putchar_serial(c); + + if (c == '\n' || c == '\r') + { + newline: + xpos = 0; + ypos++; + if (ypos >= LINES) + { + ypos = LINES-1; + memcpy((char*)video, + (char*)video + 2*COLUMNS, (LINES-1)*2*COLUMNS); + memcpy((char*)video + (LINES-1)*2*COLUMNS, + zeroarr, 2*COLUMNS); + } + return; + } + + *(video + (xpos + ypos * COLUMNS) * 2) = c & 0xFF; + *(video + (xpos + ypos * COLUMNS) * 2 + 1) = ATTRIBUTE; + + xpos++; + if (xpos >= COLUMNS) + goto newline; +} + +static inline void __putstr(const char *str) +{ + while ( *str ) putchar(*str++); +} + +void printf (const char *fmt, ...) +{ + va_list args; + char buf[1024], *p; + unsigned long flags; + + va_start(args, fmt); + (void)vsnprintf(buf, sizeof(buf), fmt, args); + va_end(args); + + p = buf; + spin_lock_irqsave(&console_lock, flags); + while ( *p ) putchar(*p++); + spin_unlock_irqrestore(&console_lock, flags); +} + +void panic(const char *fmt, ...) +{ + va_list args; + char buf[1024], *p; + unsigned long flags; + extern void machine_restart(char *); + + va_start(args, fmt); + (void)vsnprintf(buf, sizeof(buf), fmt, args); + va_end(args); + + /* Spit out multiline message in one go. */ + spin_lock_irqsave(&console_lock, flags); + __putstr("\n****************************************\n"); + p = buf; + while ( *p ) putchar(*p++); + __putstr("Aieee! CPU"); + putchar((char)smp_processor_id() + '0'); + __putstr(" is toast...\n"); + __putstr("****************************************\n\n"); + __putstr("Reboot in five seconds...\n"); + spin_unlock_irqrestore(&console_lock, flags); + + mdelay(5000); + machine_restart(0); +} + +/* No-op syscall. */ +asmlinkage long sys_ni_syscall(void) +{ + return -ENOSYS; +} + + +long do_console_write(char *str, int count) +{ +#define SIZEOF_BUF 256 + unsigned char safe_str[SIZEOF_BUF]; + unsigned long flags; + int i; + unsigned char prev = '\n'; + + if ( count > SIZEOF_BUF ) count = SIZEOF_BUF; + + if ( copy_from_user(safe_str, str, count) ) + return -EFAULT; + + spin_lock_irqsave(&console_lock, flags); + for ( i = 0; i < count; i++ ) + { + if ( prev == '\n' ) + { + __putstr("DOM"); + putchar(current->domain+'0'); + __putstr(": "); + } + if ( !safe_str[i] ) break; + putchar(prev = safe_str[i]); + } + if ( prev != '\n' ) putchar('\n'); + spin_unlock_irqrestore(&console_lock, flags); + + return(0); +} diff --git a/xen-2.4.16/common/lib.c b/xen-2.4.16/common/lib.c new file mode 100644 index 0000000000..2a55b992e8 --- /dev/null +++ b/xen-2.4.16/common/lib.c @@ -0,0 +1,171 @@ + +#include +#include + +int memcmp(const void * cs,const void * ct,size_t count) +{ + const unsigned char *su1, *su2; + signed char res = 0; + + for( su1 = cs, su2 = ct; 0 < count; ++su1, ++su2, count--) + if ((res = *su1 - *su2) != 0) + break; + return res; +} + +void * memcpy(void * dest,const void *src,size_t count) +{ + char *tmp = (char *) dest, *s = (char *) src; + + while (count--) + *tmp++ = *s++; + + return dest; +} + +int strncmp(const char * cs,const char * ct,size_t count) +{ + register signed char __res = 0; + + while (count) { + if ((__res = *cs - *ct++) != 0 || !*cs++) + break; + count--; + } + + return __res; +} + +int strcmp(const char * cs,const char * ct) +{ + register signed char __res; + + while (1) { + if ((__res = *cs - *ct++) != 0 || !*cs++) + break; + } + + return __res; +} + +char * strcpy(char * dest,const char *src) +{ + char *tmp = dest; + + while ((*dest++ = *src++) != '\0') + /* nothing */; + return tmp; +} + +char * strncpy(char * dest,const char *src,size_t count) +{ + char *tmp = dest; + + while (count-- && (*dest++ = *src++) != '\0') + /* nothing */; + + return tmp; +} + +void * memset(void * s,int c,size_t count) +{ + char *xs = (char *) s; + + while (count--) + *xs++ = c; + + return s; +} + +size_t strnlen(const char * s, size_t count) +{ + const char *sc; + + for (sc = s; count-- && *sc != '\0'; ++sc) + /* nothing */; + return sc - s; +} + +size_t strlen(const char * s) +{ + const char *sc; + + for (sc = s; *sc != '\0'; ++sc) + /* nothing */; + return sc - s; +} + +char * strchr(const char * s, int c) +{ + for(; *s != (char) c; ++s) + if (*s == '\0') + return NULL; + return (char *) s; +} + +char * strstr(const char * s1,const char * s2) +{ + int l1, l2; + + l2 = strlen(s2); + if (!l2) + return (char *) s1; + l1 = strlen(s1); + while (l1 >= l2) { + l1--; + if (!memcmp(s1,s2,l2)) + return (char *) s1; + s1++; + } + return NULL; +} + + +/* for inc/ctype.h */ +unsigned char _ctype[] = { +_C,_C,_C,_C,_C,_C,_C,_C, /* 0-7 */ +_C,_C|_S,_C|_S,_C|_S,_C|_S,_C|_S,_C,_C, /* 8-15 */ +_C,_C,_C,_C,_C,_C,_C,_C, /* 16-23 */ +_C,_C,_C,_C,_C,_C,_C,_C, /* 24-31 */ +_S|_SP,_P,_P,_P,_P,_P,_P,_P, /* 32-39 */ +_P,_P,_P,_P,_P,_P,_P,_P, /* 40-47 */ +_D,_D,_D,_D,_D,_D,_D,_D, /* 48-55 */ +_D,_D,_P,_P,_P,_P,_P,_P, /* 56-63 */ +_P,_U|_X,_U|_X,_U|_X,_U|_X,_U|_X,_U|_X,_U, /* 64-71 */ +_U,_U,_U,_U,_U,_U,_U,_U, /* 72-79 */ +_U,_U,_U,_U,_U,_U,_U,_U, /* 80-87 */ +_U,_U,_U,_P,_P,_P,_P,_P, /* 88-95 */ +_P,_L|_X,_L|_X,_L|_X,_L|_X,_L|_X,_L|_X,_L, /* 96-103 */ +_L,_L,_L,_L,_L,_L,_L,_L, /* 104-111 */ +_L,_L,_L,_L,_L,_L,_L,_L, /* 112-119 */ +_L,_L,_L,_P,_P,_P,_P,_C, /* 120-127 */ +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 128-143 */ +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 144-159 */ +_S|_SP,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P, /* 160-175 */ +_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P, /* 176-191 */ +_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U, /* 192-207 */ +_U,_U,_U,_U,_U,_U,_U,_P,_U,_U,_U,_U,_U,_U,_U,_L, /* 208-223 */ +_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L, /* 224-239 */ +_L,_L,_L,_L,_L,_L,_L,_P,_L,_L,_L,_L,_L,_L,_L,_L}; /* 240-255 */ + + +unsigned long str_to_quad(unsigned char *s) +{ + unsigned long quad = 0; + do { + quad <<= 8; + quad |= simple_strtol(s, (char **)&s, 10); + } + while ( *s++ == '.' ); + return quad; +} + + +unsigned char *quad_to_str(unsigned long q, unsigned char *s) +{ + sprintf(s, "%ld.%ld.%ld.%ld", + (q>>24)&255, (q>>16)&255, (q>>8)&255, (q>>0)&255); + return s; +} + + diff --git a/xen-2.4.16/common/memory.c b/xen-2.4.16/common/memory.c new file mode 100644 index 0000000000..2d6069861e --- /dev/null +++ b/xen-2.4.16/common/memory.c @@ -0,0 +1,654 @@ +/****************************************************************************** + * memory.c + * + * Copyright (c) 2002 K A Fraser + * + * A description of the page table API: + * + * Domains trap to process_page_updates with a list of update requests. + * This is a list of (ptr, val) pairs, where the requested operation + * is *ptr = val. The exceptions are when ptr is PGREQ_ADD_BASEPTR, or + * PGREQ_REMOVE_BASEPTR. + * + * Reference counting of pages: + * ---------------------------- + * Each page has two refcounts: tot_count and type_count. + * + * TOT_COUNT is the obvious reference count. It counts all uses of a + * physical page frame by a domain, including uses as a page directory, + * a page table, or simple mappings via a PTE. This count prevents a + * domain from releasing a frame back to the hypervisor's free pool when + * it is still referencing it! + * + * TYPE_COUNT is more subtle. A frame can be put to one of three + * mutually-exclusive uses: it might be used as a page directory, or a + * page table, or it may be mapped writeable by the domain [of course, a + * frame may not be used in any of these three ways!]. + * So, type_count is a count of the number of times a frame is being + * referred to in its current incarnation. Therefore, a page can only + * change its type when its type count is zero. + * + * A further note on writeable page mappings: + * ------------------------------------------ + * For simplicity, the count of writeable mappings for a page may not + * correspond to reality. The 'writeable count' is incremented for every + * PTE which maps the page with the _PAGE_RW flag set. However, for + * write access to be possible the page directory entry must also have + * its _PAGE_RW bit set. We do not check this as it complicates the + * reference counting considerably [consider the case of multiple + * directory entries referencing a single page table, some with the RW + * bit set, others not -- it starts getting a bit messy]. + * In normal use, this simplification shouldn't be a problem. + * However, the logic can be added if required. + * + * One more note on read-only page mappings: + * ----------------------------------------- + * We want domains to be able to map pages for read-only access. The + * main reason is that page tables and directories should be readable + * by a domain, but it would not be safe for them to be writeable. + * However, domains have free access to rings 1 & 2 of the Intel + * privilege model. In terms of page protection, these are considered + * to be part of 'supervisor mode'. The WP bit in CR0 controls whether + * read-only restrictions are respected in supervisor mode -- if the + * bit is clear then any mapped page is writeable. + * + * We get round this by always setting the WP bit and disallowing + * updates to it. This is very unlikely to cause a problem for guest + * OS's, which will generally use the WP bit to simplify copy-on-write + * implementation (in that case, OS wants a fault when it writes to + * an application-supplied buffer). + */ + + +/* + * THE FOLLOWING ARE ISSUES IF GUEST OPERATING SYSTEMS BECOME SMP-CAPABLE. + * [THAT IS, THEY'RE NOT A PROBLEM NOW, AND MAY NOT EVER BE.] + * ----------------------------------------------------------------------- + * + * ********* + * UPDATE 15/7/02: Interface has changed --updates now specify physical + * address of page-table entry, rather than specifying a virtual address, + * so hypervisor no longer "walks" the page tables. Therefore the + * solution below cannot work. Another possibility is to add a new entry + * to our "struct page" which says to which top-level page table each + * lower-level page table or writeable mapping belongs. If it belongs to more + * than one, we'd probably just flush on all processors running the domain. + * ********* + * + * ** 1 ** + * The problem involves creating new page tables which might be mapped + * writeable in the TLB of another processor. As an example, a domain might be + * running in two contexts (ie. on two processors) simultaneously, using the + * same top-level page table in both contexts. Now, if context 1 sends an + * update request [make page P read-only, add a reference to page P as a page + * table], that will succeed if there was only one writeable mapping of P. + * However, that mapping may persist in the TLB of context 2. + * + * Solution: when installing a new page table, we must flush foreign TLBs as + * necessary. Naive solution is to flush on any processor running our domain. + * Cleverer solution is to flush on any processor running same top-level page + * table, but this will sometimes fail (consider two different top-level page + * tables which have a shared lower-level page table). + * + * A better solution: when squashing a write reference, check how many times + * that lowest-level table entry is referenced by ORing refcounts of tables + * down the page-table hierarchy. If results is != 1, we require flushing all + * instances of current domain if a new table is installed (because the + * lowest-level entry may be referenced by many top-level page tables). + * However, common case will be that result == 1, so we only need to flush + * processors with the same top-level page table. Make choice at + * table-installation time based on a `flush_level' flag, which is + * FLUSH_NONE, FLUSH_PAGETABLE, FLUSH_DOMAIN. A flush reduces this + * to FLUSH_NONE, while squashed write mappings can only promote up + * to more aggressive flush types. + * + * ** 2 ** + * Same problem occurs when removing a page table, at level 1 say, then + * making it writeable. Need a TLB flush between otherwise another processor + * might write an illegal mapping into the old table, while yet another + * processor can use the illegal mapping because of a stale level-2 TLB + * entry. So, removal of a table reference sets 'flush_level' appropriately, + * and a flush occurs on next addition of a fresh write mapping. + * + * BETTER SOLUTION FOR BOTH 1 AND 2: + * When type_refcnt goes to zero, leave old type in place (don't set to + * PGT_none). Then, only flush if making a page table of a page with + * (cnt=0,type=PGT_writeable), or when adding a write mapping for a page + * with (cnt=0, type=PGT_pagexxx). A TLB flush will cause all pages + * with refcnt==0 to be reset to PGT_none. Need an array for the purpose, + * added to when a type_refcnt goes to zero, and emptied on a TLB flush. + * Either have per-domain table, or force TLB flush at end of each + * call to 'process_page_updates'. + * Most OSes will always keep a writeable reference hanging around, and + * page table structure is fairly static, so this mechanism should be + * fairly cheap. + * + * MAYBE EVEN BETTER? [somewhat dubious: not for first cut of the code]: + * If we need to force an intermediate flush, those other processors + * spin until we complete, then do a single TLB flush. They can spin on + * the lock protecting 'process_page_updates', and continue when that + * is freed. Saves cost of setting up and servicing an IPI: later + * communication is synchronous. Processors trying to install the domain + * or domain&pagetable would also enter the spin. + * + * ** 3 ** + * Indeed, this problem generalises to reusing page tables at different + * levels of the hierarchy (conceptually, the guest OS can use the + * hypervisor to introduce illegal table entries by proxy). Consider + * unlinking a level-1 page table and reintroducing at level 2 with no + * TLB flush. Hypervisor can add a reference to some other level-1 table + * with the RW bit set. This is fine in the level-2 context, but some + * other processor may still be using that table in level-1 context + * (due to a stale TLB entry). At level 1 it may look like the + * processor has write access to the other level-1 page table! Therefore + * can add illegal values there with impunity :-( + * + * Fortunately, the solution above generalises to this extended problem. + */ + +/* + * UPDATE 12.11.02.: We no longer have struct page and mem_map. These + * have been replaced by struct pfn_info and frame_table respectively. + * + * system_free_list is a list_head linking all system owned free pages. + * it is initialized in init_frametable. + * + * Boris Dragovic. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#if 1 +#define MEM_LOG(_f, _a...) printk("DOM%d: (file=memory.c, line=%d) " _f "\n", current->domain, __LINE__, ## _a ) +#else +#define MEM_LOG(_f, _a...) ((void)0) +#endif + +/* 'get' checks parameter for validity before inc'ing refcnt. */ +static int get_l2_table(unsigned long page_nr); +static int get_l1_table(unsigned long page_nr); +static int get_page(unsigned long page_nr, int writeable); +static int inc_page_refcnt(unsigned long page_nr, unsigned int type); +/* 'put' does no checking because if refcnt not zero, entity must be valid. */ +static int put_l2_table(unsigned long page_nr); +static void put_l1_table(unsigned long page_nr); +static void put_page(unsigned long page_nr, int writeable); +static int dec_page_refcnt(unsigned long page_nr, unsigned int type); + +static int mod_l2_entry(l2_pgentry_t *, l2_pgentry_t); +static int mod_l1_entry(l1_pgentry_t *, l1_pgentry_t); + +/* frame table size and its size in pages */ +frame_table_t * frame_table; +unsigned long frame_table_size; +unsigned long max_page; + +struct list_head free_list; +unsigned int free_pfns; + + +/* + * init_frametable: + * Initialise per-frame memory information. The return value + * is the amount of memory available for use by the rest of Xen. + * The very highest frames are reserved for the per-frame info. + * This function should be called before initialising the + * page allocator! + */ +unsigned long __init init_frametable(unsigned long nr_pages) +{ + struct pfn_info *pf; + unsigned long page_index; + + max_page = nr_pages; + frame_table_size = nr_pages * sizeof(struct pfn_info); + frame_table_size = (frame_table_size + PAGE_SIZE - 1) & PAGE_MASK; + free_pfns = nr_pages - (MAX_MONITOR_ADDRESS >> PAGE_SHIFT); + + frame_table = phys_to_virt(MAX_MONITOR_ADDRESS - frame_table_size); + memset(frame_table, 0, frame_table_size); + + /* Put all domain-allocatable memory on a free list. */ + INIT_LIST_HEAD(&free_list); + for( page_index = MAX_MONITOR_ADDRESS >> PAGE_SHIFT; + page_index < nr_pages; + page_index++ ) + { + pf = list_entry(&frame_table[page_index].list, struct pfn_info, list); + list_add_tail(&pf->list, &free_list); + } + + /* Return the remaing Xen-allocatable memory. */ + return(MAX_MONITOR_ADDRESS - frame_table_size); +} + + +/* Return original refcnt, or -1 on error. */ +static int inc_page_refcnt(unsigned long page_nr, unsigned int type) +{ + struct pfn_info *page; + unsigned long flags; + + if ( page_nr >= max_page ) + { + MEM_LOG("Page out of range (%08lx>%08lx)", page_nr, max_page); + return(-1); + } + page = frame_table + page_nr; + flags = page->flags; + if ( (flags & PG_domain_mask) != current->domain ) + { + MEM_LOG("Bad page domain (%ld)", flags & PG_domain_mask); + return(-1); + } + if ( (flags & PG_type_mask) != type ) + { + if ( page_type_count(page) != 0 ) + { + MEM_LOG("Page %08lx bad type/count (%08lx!=%08x) cnt=%ld", + page_nr << PAGE_SHIFT, + flags & PG_type_mask, type, page_type_count(page)); + return(-1); + } + page->flags |= type; + } + + get_page_tot(page); + return(get_page_type(page)); +} + +/* Return new refcnt, or -1 on error. */ +static int dec_page_refcnt(unsigned long page_nr, unsigned int type) +{ + struct pfn_info *page; + int ret; + + if ( page_nr >= max_page ) + { + MEM_LOG("Page out of range (%08lx>%08lx)", page_nr, max_page); + return(-1); + } + page = frame_table + page_nr; + if ( (page->flags & (PG_type_mask | PG_domain_mask)) != + (type | current->domain) ) + { + MEM_LOG("Bad page type/domain (dom=%ld) (type %ld != expected %d)", + page->flags & PG_domain_mask, page->flags & PG_type_mask, + type); + return(-1); + } + ASSERT(page_type_count(page) != 0); + if ( (ret = put_page_type(page)) == 0 ) page->flags &= ~PG_type_mask; + put_page_tot(page); + return(ret); +} + + +static int get_l2_table(unsigned long page_nr) +{ + l2_pgentry_t *p_l2_entry, l2_entry; + int i, ret=0; + + ret = inc_page_refcnt(page_nr, PGT_l2_page_table); + if ( ret != 0 ) return((ret < 0) ? ret : 0); + + /* NEW level-2 page table! Deal with every PDE in the table. */ + p_l2_entry = (l2_pgentry_t *)__va(page_nr << PAGE_SHIFT); + for ( i = 0; i < DOMAIN_ENTRIES_PER_L2_PAGETABLE; i++ ) + { + l2_entry = *p_l2_entry++; + if ( !(l2_pgentry_val(l2_entry) & _PAGE_PRESENT) ) continue; + if ( (l2_pgentry_val(l2_entry) & (_PAGE_GLOBAL|_PAGE_PSE)) ) + { + MEM_LOG("Bad L2 page type settings %04lx", + l2_pgentry_val(l2_entry) & (_PAGE_GLOBAL|_PAGE_PSE)); + return(-1); + } + ret = get_l1_table(l2_pgentry_to_pagenr(l2_entry)); + if ( ret ) return(ret); + } + + /* Now we simply slap in our high mapping. */ + memcpy(p_l2_entry, idle0_pg_table + DOMAIN_ENTRIES_PER_L2_PAGETABLE, + HYPERVISOR_ENTRIES_PER_L2_PAGETABLE * sizeof(l2_pgentry_t)); + + return(ret); +} + +static int get_l1_table(unsigned long page_nr) +{ + l1_pgentry_t *p_l1_entry, l1_entry; + int i, ret; + + /* Update ref count for page pointed at by PDE. */ + ret = inc_page_refcnt(page_nr, PGT_l1_page_table); + if ( ret != 0 ) return((ret < 0) ? ret : 0); + + /* NEW level-1 page table! Deal with every PTE in the table. */ + p_l1_entry = (l1_pgentry_t *)__va(page_nr << PAGE_SHIFT); + for ( i = 0; i < ENTRIES_PER_L1_PAGETABLE; i++ ) + { + l1_entry = *p_l1_entry++; + if ( !(l1_pgentry_val(l1_entry) & _PAGE_PRESENT) ) continue; + if ( (l1_pgentry_val(l1_entry) & + (_PAGE_GLOBAL|_PAGE_PAT)) ) + { + MEM_LOG("Bad L1 page type settings %04lx", + l1_pgentry_val(l1_entry) & + (_PAGE_GLOBAL|_PAGE_PAT)); + return(-1); + } + ret = get_page(l1_pgentry_to_pagenr(l1_entry), + l1_pgentry_val(l1_entry) & _PAGE_RW); + if ( ret ) return(ret); + } + + return(ret); +} + +static int get_page(unsigned long page_nr, int writeable) +{ + struct pfn_info *page; + unsigned long flags; + + /* Update ref count for page pointed at by PTE. */ + if ( page_nr >= max_page ) + { + MEM_LOG("Page out of range (%08lx>%08lx)", page_nr, max_page); + return(-1); + } + page = frame_table + page_nr; + flags = page->flags; + if ( (flags & PG_domain_mask) != current->domain ) + { + MEM_LOG("Bad page domain (%ld)", flags & PG_domain_mask); + return(-1); + } + + if ( writeable ) + { + if ( (flags & PG_type_mask) != PGT_writeable_page ) + { + if ( page_type_count(page) != 0 ) + { + MEM_LOG("Bad page type/count (%08lx!=%08x) cnt=%ld", + flags & PG_type_mask, PGT_writeable_page, + page_type_count(page)); + return(-1); + } + page->flags |= PGT_writeable_page; + } + get_page_type(page); + } + + get_page_tot(page); + + return(0); +} + +static int put_l2_table(unsigned long page_nr) +{ + l2_pgentry_t *p_l2_entry, l2_entry; + int i, ret; + + ret = dec_page_refcnt(page_nr, PGT_l2_page_table); + if ( ret != 0 ) return((ret < 0) ? ret : 0); + + /* We had last reference to level-2 page table. Free the PDEs. */ + p_l2_entry = (l2_pgentry_t *)__va(page_nr << PAGE_SHIFT); + for ( i = 0; i < HYPERVISOR_ENTRIES_PER_L2_PAGETABLE; i++ ) + { + l2_entry = *p_l2_entry++; + if ( (l2_pgentry_val(l2_entry) & _PAGE_PRESENT) ) + put_l1_table(l2_pgentry_to_pagenr(l2_entry)); + } + + return(0); +} + +static void put_l1_table(unsigned long page_nr) +{ + l1_pgentry_t *p_l1_entry, l1_entry; + int i; + + if ( dec_page_refcnt(page_nr, PGT_l1_page_table) != 0 ) return; + + /* We had last reference to level-1 page table. Free the PTEs. */ + p_l1_entry = (l1_pgentry_t *)__va(page_nr << PAGE_SHIFT); + for ( i = 0; i < ENTRIES_PER_L1_PAGETABLE; i++ ) + { + l1_entry = *p_l1_entry++; + if ( (l1_pgentry_val(l1_entry) & _PAGE_PRESENT) ) + { + put_page(l1_pgentry_to_pagenr(l1_entry), + l1_pgentry_val(l1_entry) & _PAGE_RW); + } + } +} + +static void put_page(unsigned long page_nr, int writeable) +{ + struct pfn_info *page; + ASSERT(page_nr < max_page); + page = frame_table + page_nr; + ASSERT((page->flags & PG_domain_mask) == current->domain); + ASSERT((((page->flags & PG_type_mask) == PGT_writeable_page) && + (page_type_count(page) != 0)) || + (((page->flags & PG_type_mask) == PGT_none) && + (page_type_count(page) == 0))); + ASSERT((!writeable) || (page_type_count(page) != 0)); + if ( writeable && (put_page_type(page) == 0) ) + page->flags &= ~PG_type_mask; + put_page_tot(page); +} + + +static int mod_l2_entry(l2_pgentry_t *p_l2_entry, l2_pgentry_t new_l2_entry) +{ + l2_pgentry_t old_l2_entry = *p_l2_entry; + + if ( (((unsigned long)p_l2_entry & (PAGE_SIZE-1)) >> 2) >= + DOMAIN_ENTRIES_PER_L2_PAGETABLE ) + { + MEM_LOG("Illegal L2 update attempt in hypervisor area %p\n", + p_l2_entry); + goto fail; + } + + if ( (l2_pgentry_val(new_l2_entry) & _PAGE_PRESENT) ) + { + if ( (l2_pgentry_val(new_l2_entry) & (_PAGE_GLOBAL|_PAGE_PSE)) ) + { + MEM_LOG("Bad L2 entry val %04lx", + l2_pgentry_val(new_l2_entry) & + (_PAGE_GLOBAL|_PAGE_PSE)); + goto fail; + } + /* Differ in mapping (bits 12-31) or presence (bit 0)? */ + if ( ((l2_pgentry_val(old_l2_entry) ^ + l2_pgentry_val(new_l2_entry)) & 0xfffff001) != 0 ) + { + if ( (l2_pgentry_val(old_l2_entry) & _PAGE_PRESENT) ) + { + put_l1_table(l2_pgentry_to_pagenr(old_l2_entry)); + } + + if ( get_l1_table(l2_pgentry_to_pagenr(new_l2_entry)) ) + goto fail; + } + } + else if ( (l2_pgentry_val(old_l2_entry) & _PAGE_PRESENT) ) + { + put_l1_table(l2_pgentry_to_pagenr(old_l2_entry)); + } + + *p_l2_entry++ = new_l2_entry; + + return(0); + fail: + return(-1); +} + + +static int mod_l1_entry(l1_pgentry_t *p_l1_entry, l1_pgentry_t new_l1_entry) +{ + l1_pgentry_t old_l1_entry = *p_l1_entry; + + if ( (l1_pgentry_val(new_l1_entry) & _PAGE_PRESENT) ) + { + if ( (l1_pgentry_val(new_l1_entry) & + (_PAGE_GLOBAL|_PAGE_PAT)) ) + { + MEM_LOG("Bad L1 entry val %04lx", + l1_pgentry_val(new_l1_entry) & + (_PAGE_GLOBAL|_PAGE_PAT)); + goto fail; + } + /* + * Differ in mapping (bits 12-31), writeable (bit 1), or + * presence (bit 0)? + */ + if ( ((l1_pgentry_val(old_l1_entry) ^ + l1_pgentry_val(new_l1_entry)) & 0xfffff003) != 0 ) + { + if ( (l1_pgentry_val(old_l1_entry) & _PAGE_PRESENT) ) + { + put_page(l1_pgentry_to_pagenr(old_l1_entry), + l1_pgentry_val(old_l1_entry) & _PAGE_RW); + } + + if ( get_page(l1_pgentry_to_pagenr(new_l1_entry), + l1_pgentry_val(new_l1_entry) & _PAGE_RW) ) + goto fail; + } + } + else if ( (l1_pgentry_val(old_l1_entry) & _PAGE_PRESENT) ) + { + put_page(l1_pgentry_to_pagenr(old_l1_entry), + l1_pgentry_val(old_l1_entry) & _PAGE_RW); + } + + *p_l1_entry++ = new_l1_entry; + + return(0); + fail: + return(-1); +} + + +/* Apply updates to page table @pagetable_id within the current domain. */ +int do_process_page_updates(page_update_request_t *updates, int count) +{ + page_update_request_t cur; + unsigned long flags, pfn; + struct pfn_info *page; + int err = 0, i; + + for ( i = 0; i < count; i++ ) + { + if ( copy_from_user(&cur, updates, sizeof(cur)) ) + { + kill_domain_with_errmsg("Cannot read page update request"); + } + + err = 1; + + pfn = cur.ptr >> PAGE_SHIFT; + if ( !pfn ) + { + switch ( cur.ptr ) + { + case PGREQ_ADD_BASEPTR: + err = get_l2_table(cur.val >> PAGE_SHIFT); + break; + case PGREQ_REMOVE_BASEPTR: + if ( cur.val == __pa(pagetable_ptr(current->mm.pagetable)) ) + { + MEM_LOG("Attempt to remove current baseptr! %08lx", + cur.val); + } + else + { + err = put_l2_table(cur.val >> PAGE_SHIFT); + } + break; + } + } + else if ( (cur.ptr & (sizeof(l1_pgentry_t)-1)) || (pfn >= max_page) ) + { + MEM_LOG("Page out of range (%08lx>%08lx) or misalign %08lx", + pfn, max_page, cur.ptr); + } + else + { + page = frame_table + pfn; + flags = page->flags; + if ( (flags & PG_domain_mask) == current->domain ) + { + switch ( (flags & PG_type_mask) ) + { + case PGT_l1_page_table: + err = mod_l1_entry((l1_pgentry_t *)__va(cur.ptr), + mk_l1_pgentry(cur.val)); + break; + case PGT_l2_page_table: + err = mod_l2_entry((l2_pgentry_t *)__va(cur.ptr), + mk_l2_pgentry(cur.val)); + break; + } + } + } + + if ( err ) + { + kill_domain_with_errmsg("Illegal page update request"); + } + + updates++; + } + + __asm__ __volatile__ ("movl %%eax,%%cr3" : : + "a" (__pa(pagetable_ptr(current->mm.pagetable)))); + return(0); +} + + +int do_set_pagetable(unsigned long ptr) +{ + struct pfn_info *page; + unsigned long pfn, flags; + + if ( (ptr & ~PAGE_MASK) ) + { + MEM_LOG("Misaligned new baseptr %08lx", ptr); + return -1; + } + pfn = ptr >> PAGE_SHIFT; + if ( pfn >= max_page ) + { + MEM_LOG("Page out of range (%08lx>%08lx)", pfn, max_page); + return -1; + } + page = frame_table + (ptr >> PAGE_SHIFT); + flags = page->flags; + if ( (flags & (PG_domain_mask|PG_type_mask)) != + (current->domain|PGT_l2_page_table) ) + { + MEM_LOG("Page %08lx bad type/domain (dom=%ld) " + "(type %08lx != expected %08x)", + ptr, flags & PG_domain_mask, flags & PG_type_mask, + PGT_l2_page_table); + return -1; + } + current->mm.pagetable = mk_pagetable((unsigned long)__va(ptr)); + __asm__ __volatile__ ("movl %%eax,%%cr3" : : "a" (ptr)); + return 0; +} diff --git a/xen-2.4.16/common/network.c b/xen-2.4.16/common/network.c new file mode 100644 index 0000000000..7fc7b99370 --- /dev/null +++ b/xen-2.4.16/common/network.c @@ -0,0 +1,109 @@ +/* net_ring.c + * + * ring data structures for buffering messages between hypervisor and + * guestos's. As it stands this is only used for network buffer exchange. + * + */ + +#include +#include +#include +#include +#include + +/* vif globals */ +int sys_vif_count; +kmem_cache_t *net_vif_cache; + +net_ring_t *create_net_vif(int domain) +{ + net_vif_t *new_vif; + net_ring_t *new_ring; + struct task_struct *dom_task; + + if ( !(dom_task = find_domain_by_id(domain)) ) + { + return NULL; + } + + if ( (new_vif = kmem_cache_alloc(net_vif_cache, GFP_KERNEL)) == NULL ) + { + return NULL; + } + dom_task->net_vif_list[dom_task->num_net_vifs] = new_vif; + + new_ring = dom_task->net_ring_base + dom_task->num_net_vifs; + memset(new_ring, 0, sizeof(net_ring_t)); + + dom_task->net_vif_list[dom_task->num_net_vifs]->net_ring = new_ring; + skb_queue_head_init( + &dom_task->net_vif_list[dom_task->num_net_vifs]->skb_list); + dom_task->net_vif_list[dom_task->num_net_vifs]->id = sys_vif_count++; + dom_task->num_net_vifs++; + + return new_ring; +} + +/* delete the last vif in the given domain. There doesn't seem to be any reason + * (yet) to be able to axe an arbitrary vif, by vif id. + */ +void destroy_net_vif(struct task_struct *p) +{ + struct sk_buff *skb; + int i; + + if ( p->num_net_vifs <= 0 ) return; // nothing to do. + + i = --p->num_net_vifs; + while ( (skb = skb_dequeue(&p->net_vif_list[i]->skb_list)) != NULL ) + { + kfree_skb(skb); + } + kmem_cache_free(net_vif_cache, p->net_vif_list[i]); +} + + +/* + * This is the hook function to handle guest-invoked traps requesting + * changes to the network system. + */ + +long do_network_op(network_op_t *u_network_op) +{ + long ret=0; + network_op_t op; + + if ( current->domain != 0 ) + return -EPERM; + + if ( copy_from_user(&op, u_network_op, sizeof(op)) ) + return -EFAULT; + + switch ( op.cmd ) + { + + case NETWORK_OP_ADDRULE: + { + printk("received addrule request from guestos!\n"); + } + break; + + case NETWORK_OP_DELETERULE: + { + printk("received deleterule request from guestos!\n"); + } + break; + + default: + ret = -ENOSYS; + } + + return ret; +} + +void __init net_init (void) +{ + sys_vif_count = 0; + net_vif_cache = kmem_cache_create("net_vif_cache", sizeof(net_vif_t), + 0, SLAB_HWCACHE_ALIGN, NULL, NULL); +} diff --git a/xen-2.4.16/common/page_alloc.c b/xen-2.4.16/common/page_alloc.c new file mode 100644 index 0000000000..b7cd2c1a7b --- /dev/null +++ b/xen-2.4.16/common/page_alloc.c @@ -0,0 +1,272 @@ +/****************************************************************************** + * page_alloc.c + * + * Simple buddy allocator for Xenoserver hypervisor. + * + * Copyright (c) 2002 K A Fraser + */ + +#include +#include +#include +#include +#include +#include + + +static spinlock_t alloc_lock = SPIN_LOCK_UNLOCKED; + + +/********************* + * ALLOCATION BITMAP + * One bit per page of memory. Bit set => page is allocated. + */ + +static unsigned long *alloc_bitmap; +#define PAGES_PER_MAPWORD (sizeof(unsigned long) * 8) + +#define allocated_in_map(_pn) \ +(alloc_bitmap[(_pn)/PAGES_PER_MAPWORD] & (1<<((_pn)&(PAGES_PER_MAPWORD-1)))) + + +/* + * Hint regarding bitwise arithmetic in map_{alloc,free}: + * -(1<= n. + * (1<next == NULL) + +#define round_pgdown(_p) ((_p)&PAGE_MASK) +#define round_pgup(_p) (((_p)+(PAGE_SIZE-1))&PAGE_MASK) + + +/* Initialise allocator, placing addresses [@min,@max] in free pool. */ +void __init init_page_allocator(unsigned long min, unsigned long max) +{ + int i; + unsigned long range, bitmap_size; + chunk_head_t *ch; + chunk_tail_t *ct; + + for ( i = 0; i < FREELIST_SIZE; i++ ) + { + free_list[i] = &free_tail[i]; + free_tail[i].pprev = &free_list[i]; + free_tail[i].next = NULL; + } + + min = round_pgup (min); + max = round_pgdown(max); + + /* Allocate space for the allocation bitmap. */ + bitmap_size = (max+1) >> (PAGE_SHIFT+3); + bitmap_size = round_pgup(bitmap_size); + alloc_bitmap = (unsigned long *)__va(min); + min += bitmap_size; + range = max - min; + + /* All allocated by default. */ + memset(alloc_bitmap, ~0, bitmap_size); + /* Free up the memory we've been given to play with. */ + map_free(min>>PAGE_SHIFT, range>>PAGE_SHIFT); + + /* The buddy lists are addressed in high memory. */ + min += PAGE_OFFSET; + max += PAGE_OFFSET; + + while ( range != 0 ) + { + /* + * Next chunk is limited by alignment of min, but also + * must not be bigger than remaining range. + */ + for ( i = PAGE_SHIFT; (1<<(i+1)) <= range; i++ ) + if ( min & (1<level = i; + ch->next = free_list[i]; + ch->pprev = &free_list[i]; + ch->next->pprev = &ch->next; + free_list[i] = ch; + ct->level = i; + } +} + + +/* Allocate 2^@order contiguous pages. */ +unsigned long __get_free_pages(int mask, int order) +{ + int i; + chunk_head_t *alloc_ch, *spare_ch; + chunk_tail_t *spare_ct; + unsigned long flags; + + spin_lock_irqsave(&alloc_lock, flags); + + /* Found smallest order which can satisfy the request. */ + for ( i = order; FREELIST_EMPTY(free_list[i]); i++ ) + { + if ( i == FREELIST_SIZE ) + panic("Out of memory!\n"); + } + + /* Unlink a chunk. */ + alloc_ch = free_list[i]; + free_list[i] = alloc_ch->next; + alloc_ch->next->pprev = alloc_ch->pprev; + + /* We may have to break the chunk a number of times. */ + while ( i != order ) + { + /* Split into two equal parts. */ + i--; + spare_ch = (chunk_head_t *)((char *)alloc_ch + (1<<(i+PAGE_SHIFT))); + spare_ct = (chunk_tail_t *)((char *)spare_ch + (1<<(i+PAGE_SHIFT)))-1; + + /* Create new header for spare chunk. */ + spare_ch->level = i; + spare_ch->next = free_list[i]; + spare_ch->pprev = &free_list[i]; + spare_ct->level = i; + + /* Link in the spare chunk. */ + spare_ch->next->pprev = &spare_ch->next; + free_list[i] = spare_ch; + } + + map_alloc(__pa(alloc_ch)>>PAGE_SHIFT, 1<> PAGE_SHIFT; + + spin_lock_irqsave(&alloc_lock, flags); + + map_free(pagenr, 1<level != order ) break; + ch = (chunk_head_t *)(p - size); + p -= size; + } + else + { + /* Merge with successor block? */ + if ( allocated_in_map(pagenr+(1<level != order ) break; + } + + /* Okay, unlink the neighbour. */ + *ch->pprev = ch->next; + ch->next->pprev = ch->pprev; + + order++; + size <<= 1; + } + + /* Okay, add the final chunk to the appropriate free list. */ + ch = (chunk_head_t *)p; + ct = (chunk_tail_t *)(p+size)-1; + ct->level = order; + ch->level = order; + ch->pprev = &free_list[order]; + ch->next = free_list[order]; + ch->next->pprev = &ch->next; + free_list[order] = ch; + + spin_unlock_irqrestore(&alloc_lock, flags); +} diff --git a/xen-2.4.16/common/resource.c b/xen-2.4.16/common/resource.c new file mode 100644 index 0000000000..15f29999ff --- /dev/null +++ b/xen-2.4.16/common/resource.c @@ -0,0 +1,329 @@ +/* + * linux/kernel/resource.c + * + * Copyright (C) 1999 Linus Torvalds + * Copyright (C) 1999 Martin Mares + * + * Arbitrary resource management. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct resource ioport_resource = { "PCI IO", 0x0000, IO_SPACE_LIMIT, IORESOURCE_IO }; +struct resource iomem_resource = { "PCI mem", 0x00000000, 0xffffffff, IORESOURCE_MEM }; + +static rwlock_t resource_lock = RW_LOCK_UNLOCKED; + +/* + * This generates reports for /proc/ioports and /proc/iomem + */ +static char * do_resource_list(struct resource *entry, const char *fmt, int offset, char *buf, char *end) +{ + if (offset < 0) + offset = 0; + + while (entry) { + const char *name = entry->name; + unsigned long from, to; + + if ((int) (end-buf) < 80) + return buf; + + from = entry->start; + to = entry->end; + if (!name) + name = ""; + + buf += sprintf(buf, fmt + offset, from, to, name); + if (entry->child) + buf = do_resource_list(entry->child, fmt, offset-2, buf, end); + entry = entry->sibling; + } + + return buf; +} + +int get_resource_list(struct resource *root, char *buf, int size) +{ + char *fmt; + int retval; + + fmt = " %08lx-%08lx : %s\n"; + if (root->end < 0x10000) + fmt = " %04lx-%04lx : %s\n"; + read_lock(&resource_lock); + retval = do_resource_list(root->child, fmt, 8, buf, buf + size) - buf; + read_unlock(&resource_lock); + return retval; +} + +/* Return the conflict entry if you can't request it */ +static struct resource * __request_resource(struct resource *root, struct resource *new) +{ + unsigned long start = new->start; + unsigned long end = new->end; + struct resource *tmp, **p; + + if (end < start) + return root; + if (start < root->start) + return root; + if (end > root->end) + return root; + p = &root->child; + for (;;) { + tmp = *p; + if (!tmp || tmp->start > end) { + new->sibling = tmp; + *p = new; + new->parent = root; + return NULL; + } + p = &tmp->sibling; + if (tmp->end < start) + continue; + return tmp; + } +} + +static int __release_resource(struct resource *old) +{ + struct resource *tmp, **p; + + p = &old->parent->child; + for (;;) { + tmp = *p; + if (!tmp) + break; + if (tmp == old) { + *p = tmp->sibling; + old->parent = NULL; + return 0; + } + p = &tmp->sibling; + } + return -EINVAL; +} + +int request_resource(struct resource *root, struct resource *new) +{ + struct resource *conflict; + + write_lock(&resource_lock); + conflict = __request_resource(root, new); + write_unlock(&resource_lock); + return conflict ? -EBUSY : 0; +} + +int release_resource(struct resource *old) +{ + int retval; + + write_lock(&resource_lock); + retval = __release_resource(old); + write_unlock(&resource_lock); + return retval; +} + +int check_resource(struct resource *root, unsigned long start, unsigned long len) +{ + struct resource *conflict, tmp; + + tmp.start = start; + tmp.end = start + len - 1; + write_lock(&resource_lock); + conflict = __request_resource(root, &tmp); + if (!conflict) + __release_resource(&tmp); + write_unlock(&resource_lock); + return conflict ? -EBUSY : 0; +} + +/* + * Find empty slot in the resource tree given range and alignment. + */ +static int find_resource(struct resource *root, struct resource *new, + unsigned long size, + unsigned long min, unsigned long max, + unsigned long align, + void (*alignf)(void *, struct resource *, unsigned long), + void *alignf_data) +{ + struct resource *this = root->child; + + new->start = root->start; + for(;;) { + if (this) + new->end = this->start; + else + new->end = root->end; + if (new->start < min) + new->start = min; + if (new->end > max) + new->end = max; + new->start = (new->start + align - 1) & ~(align - 1); + if (alignf) + alignf(alignf_data, new, size); + if (new->start < new->end && new->end - new->start + 1 >= size) { + new->end = new->start + size - 1; + return 0; + } + if (!this) + break; + new->start = this->end + 1; + this = this->sibling; + } + return -EBUSY; +} + +/* + * Allocate empty slot in the resource tree given range and alignment. + */ +int allocate_resource(struct resource *root, struct resource *new, + unsigned long size, + unsigned long min, unsigned long max, + unsigned long align, + void (*alignf)(void *, struct resource *, unsigned long), + void *alignf_data) +{ + int err; + + write_lock(&resource_lock); + err = find_resource(root, new, size, min, max, align, alignf, alignf_data); + if (err >= 0 && __request_resource(root, new)) + err = -EBUSY; + write_unlock(&resource_lock); + return err; +} + +/* + * This is compatibility stuff for IO resources. + * + * Note how this, unlike the above, knows about + * the IO flag meanings (busy etc). + * + * Request-region creates a new busy region. + * + * Check-region returns non-zero if the area is already busy + * + * Release-region releases a matching busy region. + */ +struct resource * __request_region(struct resource *parent, unsigned long start, unsigned long n, const char *name) +{ + struct resource *res = kmalloc(sizeof(*res), GFP_KERNEL); + + if (res) { + memset(res, 0, sizeof(*res)); + res->name = name; + res->start = start; + res->end = start + n - 1; + res->flags = IORESOURCE_BUSY; + + write_lock(&resource_lock); + + for (;;) { + struct resource *conflict; + + conflict = __request_resource(parent, res); + if (!conflict) + break; + if (conflict != parent) { + parent = conflict; + if (!(conflict->flags & IORESOURCE_BUSY)) + continue; + } + + /* Uhhuh, that didn't work out.. */ + kfree(res); + res = NULL; + break; + } + write_unlock(&resource_lock); + } + return res; +} + +int __check_region(struct resource *parent, unsigned long start, unsigned long n) +{ + struct resource * res; + + res = __request_region(parent, start, n, "check-region"); + if (!res) + return -EBUSY; + + release_resource(res); + kfree(res); + return 0; +} + +void __release_region(struct resource *parent, unsigned long start, unsigned long n) +{ + struct resource **p; + unsigned long end; + + p = &parent->child; + end = start + n - 1; + + for (;;) { + struct resource *res = *p; + + if (!res) + break; + if (res->start <= start && res->end >= end) { + if (!(res->flags & IORESOURCE_BUSY)) { + p = &res->child; + continue; + } + if (res->start != start || res->end != end) + break; + *p = res->sibling; + kfree(res); + return; + } + p = &res->sibling; + } + printk("Trying to free nonexistent resource <%08lx-%08lx>\n", start, end); +} + +#if 0 +/* + * Called from init/main.c to reserve IO ports. + */ +#define MAXRESERVE 4 +static int __init reserve_setup(char *str) +{ + static int reserved = 0; + static struct resource reserve[MAXRESERVE]; + + for (;;) { + int io_start, io_num; + int x = reserved; + + if (get_option (&str, &io_start) != 2) + break; + if (get_option (&str, &io_num) == 0) + break; + if (x < MAXRESERVE) { + struct resource *res = reserve + x; + res->name = "reserved"; + res->start = io_start; + res->end = io_start + io_num - 1; + res->flags = IORESOURCE_BUSY; + res->child = NULL; + if (request_resource(res->start >= 0x10000 ? &iomem_resource : &ioport_resource, res) == 0) + reserved = x+1; + } + } + return 1; +} + +__setup("reserve=", reserve_setup); +#endif diff --git a/xen-2.4.16/common/slab.c b/xen-2.4.16/common/slab.c new file mode 100644 index 0000000000..98a79bb052 --- /dev/null +++ b/xen-2.4.16/common/slab.c @@ -0,0 +1,1839 @@ +/* + * linux/mm/slab.c + * Written by Mark Hemment, 1996/97. + * (markhe@nextd.demon.co.uk) + * + * kmem_cache_destroy() + some cleanup - 1999 Andrea Arcangeli + * + * Major cleanup, different bufctl logic, per-cpu arrays + * (c) 2000 Manfred Spraul + * + * An implementation of the Slab Allocator as described in outline in; + * UNIX Internals: The New Frontiers by Uresh Vahalia + * Pub: Prentice Hall ISBN 0-13-101908-2 + * or with a little more detail in; + * The Slab Allocator: An Object-Caching Kernel Memory Allocator + * Jeff Bonwick (Sun Microsystems). + * Presented at: USENIX Summer 1994 Technical Conference + * + * + * The memory is organized in caches, one cache for each object type. + * (e.g. inode_cache, dentry_cache, buffer_head, vm_area_struct) + * Each cache consists out of many slabs (they are small (usually one + * page long) and always contiguous), and each slab contains multiple + * initialized objects. + * + * Each cache can only support one memory type (GFP_DMA, GFP_HIGHMEM, + * normal). If you need a special memory type, then must create a new + * cache for that memory type. + * + * In order to reduce fragmentation, the slabs are sorted in 3 groups: + * full slabs with 0 free objects + * partial slabs + * empty slabs with no allocated objects + * + * If partial slabs exist, then new allocations come from these slabs, + * otherwise from empty slabs or new slabs are allocated. + * + * kmem_cache_destroy() CAN CRASH if you try to allocate from the cache + * during kmem_cache_destroy(). The caller must prevent concurrent allocs. + * + * On SMP systems, each cache has a short per-cpu head array, most allocs + * and frees go into that array, and if that array overflows, then 1/2 + * of the entries in the array are given back into the global cache. + * This reduces the number of spinlock operations. + * + * The c_cpuarray may not be read with enabled local interrupts. + * + * SMP synchronization: + * constructors and destructors are called without any locking. + * Several members in kmem_cache_t and slab_t never change, they + * are accessed without any locking. + * The per-cpu arrays are never accessed from the wrong cpu, no locking. + * The non-constant members are protected with a per-cache irq spinlock. + * + * Further notes from the original documentation: + * + * 11 April '97. Started multi-threading - markhe + * The global cache-chain is protected by the semaphore 'cache_chain_sem'. + * The sem is only needed when accessing/extending the cache-chain, which + * can never happen inside an interrupt (kmem_cache_create(), + * kmem_cache_shrink() and kmem_cache_reap()). + * + * To prevent kmem_cache_shrink() trying to shrink a 'growing' cache (which + * maybe be sleeping and therefore not holding the semaphore/lock), the + * growing field is used. This also prevents reaping from a cache. + * + * At present, each engine can be growing a cache. This should be blocked. + * + */ + +/* + * DEBUG - 1 for kmem_cache_create() to honour; SLAB_DEBUG_INITIAL, + * SLAB_RED_ZONE & SLAB_POISON. + * 0 for faster, smaller code (especially in the critical paths). + * + * STATS - 1 to collect stats for /proc/slabinfo. + * 0 for faster, smaller code (especially in the critical paths). + * + * FORCED_DEBUG - 1 enables SLAB_RED_ZONE and SLAB_POISON (if possible) + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +#ifdef CONFIG_DEBUG_SLAB +#define DEBUG 1 +#define STATS 1 +#define FORCED_DEBUG 1 +#else +#define DEBUG 0 +#define STATS 0 +#define FORCED_DEBUG 0 +#endif + +/* + * Parameters for kmem_cache_reap + */ +#define REAP_SCANLEN 10 +#define REAP_PERFECT 10 + +/* Shouldn't this be in a header file somewhere? */ +#define BYTES_PER_WORD sizeof(void *) + +/* Legal flag mask for kmem_cache_create(). */ +#if DEBUG +# define CREATE_MASK (SLAB_DEBUG_INITIAL | SLAB_RED_ZONE | \ + SLAB_POISON | SLAB_HWCACHE_ALIGN | \ + SLAB_NO_REAP | SLAB_CACHE_DMA) +#else +# define CREATE_MASK (SLAB_HWCACHE_ALIGN | SLAB_NO_REAP | SLAB_CACHE_DMA) +#endif + +/* + * kmem_bufctl_t: + * + * Bufctl's are used for linking objs within a slab + * linked offsets. + * + * This implementaion relies on "struct page" for locating the cache & + * slab an object belongs to. + * This allows the bufctl structure to be small (one int), but limits + * the number of objects a slab (not a cache) can contain when off-slab + * bufctls are used. The limit is the size of the largest general cache + * that does not use off-slab slabs. + * For 32bit archs with 4 kB pages, is this 56. + * This is not serious, as it is only for large objects, when it is unwise + * to have too many per slab. + * Note: This limit can be raised by introducing a general cache whose size + * is less than 512 (PAGE_SIZE<<3), but greater than 256. + */ + +#define BUFCTL_END 0xffffFFFF +#define SLAB_LIMIT 0xffffFFFE +typedef unsigned int kmem_bufctl_t; + +/* Max number of objs-per-slab for caches which use off-slab slabs. + * Needed to avoid a possible looping condition in kmem_cache_grow(). + */ +static unsigned long offslab_limit; + +/* + * slab_t + * + * Manages the objs in a slab. Placed either at the beginning of mem allocated + * for a slab, or allocated from an general cache. + * Slabs are chained into three list: fully used, partial, fully free slabs. + */ +typedef struct slab_s { + struct list_head list; + unsigned long colouroff; + void *s_mem; /* including colour offset */ + unsigned int inuse; /* num of objs active in slab */ + kmem_bufctl_t free; +} slab_t; + +#define slab_bufctl(slabp) \ + ((kmem_bufctl_t *)(((slab_t*)slabp)+1)) + +/* + * cpucache_t + * + * Per cpu structures + * The limit is stored in the per-cpu structure to reduce the data cache + * footprint. + */ +typedef struct cpucache_s { + unsigned int avail; + unsigned int limit; +} cpucache_t; + +#define cc_entry(cpucache) \ + ((void **)(((cpucache_t*)(cpucache))+1)) +#define cc_data(cachep) \ + ((cachep)->cpudata[smp_processor_id()]) +/* + * kmem_cache_t + * + * manages a cache. + */ + +#define CACHE_NAMELEN 20 /* max name length for a slab cache */ + +struct kmem_cache_s { +/* 1) each alloc & free */ + /* full, partial first, then free */ + struct list_head slabs_full; + struct list_head slabs_partial; + struct list_head slabs_free; + unsigned int objsize; + unsigned int flags; /* constant flags */ + unsigned int num; /* # of objs per slab */ + spinlock_t spinlock; +#ifdef CONFIG_SMP + unsigned int batchcount; +#endif + +/* 2) slab additions /removals */ + /* order of pgs per slab (2^n) */ + unsigned int gfporder; + + /* force GFP flags, e.g. GFP_DMA */ + unsigned int gfpflags; + + size_t colour; /* cache colouring range */ + unsigned int colour_off; /* colour offset */ + unsigned int colour_next; /* cache colouring */ + kmem_cache_t *slabp_cache; + unsigned int growing; + unsigned int dflags; /* dynamic flags */ + + /* constructor func */ + void (*ctor)(void *, kmem_cache_t *, unsigned long); + + /* de-constructor func */ + void (*dtor)(void *, kmem_cache_t *, unsigned long); + + unsigned long failures; + +/* 3) cache creation/removal */ + char name[CACHE_NAMELEN]; + struct list_head next; +#ifdef CONFIG_SMP +/* 4) per-cpu data */ + cpucache_t *cpudata[NR_CPUS]; +#endif +#if STATS + unsigned long num_active; + unsigned long num_allocations; + unsigned long high_mark; + unsigned long grown; + unsigned long reaped; + unsigned long errors; +#ifdef CONFIG_SMP + atomic_t allochit; + atomic_t allocmiss; + atomic_t freehit; + atomic_t freemiss; +#endif +#endif +}; + +/* internal c_flags */ +#define CFLGS_OFF_SLAB 0x010000UL /* slab management in own cache */ +#define CFLGS_OPTIMIZE 0x020000UL /* optimized slab lookup */ + +/* c_dflags (dynamic flags). Need to hold the spinlock to access this member */ +#define DFLGS_GROWN 0x000001UL /* don't reap a recently grown */ + +#define OFF_SLAB(x) ((x)->flags & CFLGS_OFF_SLAB) +#define OPTIMIZE(x) ((x)->flags & CFLGS_OPTIMIZE) +#define GROWN(x) ((x)->dlags & DFLGS_GROWN) + +#if STATS +#define STATS_INC_ACTIVE(x) ((x)->num_active++) +#define STATS_DEC_ACTIVE(x) ((x)->num_active--) +#define STATS_INC_ALLOCED(x) ((x)->num_allocations++) +#define STATS_INC_GROWN(x) ((x)->grown++) +#define STATS_INC_REAPED(x) ((x)->reaped++) +#define STATS_SET_HIGH(x) do { if ((x)->num_active > (x)->high_mark) \ + (x)->high_mark = (x)->num_active; \ + } while (0) +#define STATS_INC_ERR(x) ((x)->errors++) +#else +#define STATS_INC_ACTIVE(x) do { } while (0) +#define STATS_DEC_ACTIVE(x) do { } while (0) +#define STATS_INC_ALLOCED(x) do { } while (0) +#define STATS_INC_GROWN(x) do { } while (0) +#define STATS_INC_REAPED(x) do { } while (0) +#define STATS_SET_HIGH(x) do { } while (0) +#define STATS_INC_ERR(x) do { } while (0) +#endif + +#if STATS && defined(CONFIG_SMP) +#define STATS_INC_ALLOCHIT(x) atomic_inc(&(x)->allochit) +#define STATS_INC_ALLOCMISS(x) atomic_inc(&(x)->allocmiss) +#define STATS_INC_FREEHIT(x) atomic_inc(&(x)->freehit) +#define STATS_INC_FREEMISS(x) atomic_inc(&(x)->freemiss) +#else +#define STATS_INC_ALLOCHIT(x) do { } while (0) +#define STATS_INC_ALLOCMISS(x) do { } while (0) +#define STATS_INC_FREEHIT(x) do { } while (0) +#define STATS_INC_FREEMISS(x) do { } while (0) +#endif + +#if DEBUG +/* Magic nums for obj red zoning. + * Placed in the first word before and the first word after an obj. + */ +#define RED_MAGIC1 0x5A2CF071UL /* when obj is active */ +#define RED_MAGIC2 0x170FC2A5UL /* when obj is inactive */ + +/* ...and for poisoning */ +#define POISON_BYTE 0x5a /* byte value for poisoning */ +#define POISON_END 0xa5 /* end-byte of poisoning */ + +#endif + +/* maximum size of an obj (in 2^order pages) */ +#define MAX_OBJ_ORDER 5 /* 32 pages */ + +/* + * Do not go above this order unless 0 objects fit into the slab. + */ +#define BREAK_GFP_ORDER_HI 2 +#define BREAK_GFP_ORDER_LO 1 +static int slab_break_gfp_order = BREAK_GFP_ORDER_LO; + +/* + * Absolute limit for the gfp order + */ +#define MAX_GFP_ORDER 5 /* 32 pages */ + + +/* Macros for storing/retrieving the cachep and or slab from the + * global 'mem_map'. These are used to find the slab an obj belongs to. + * With kfree(), these are used to find the cache which an obj belongs to. + */ +#define SET_PAGE_CACHE(pg,x) ((pg)->list.next = (struct list_head *)(x)) +#define GET_PAGE_CACHE(pg) ((kmem_cache_t *)(pg)->list.next) +#define SET_PAGE_SLAB(pg,x) ((pg)->list.prev = (struct list_head *)(x)) +#define GET_PAGE_SLAB(pg) ((slab_t *)(pg)->list.prev) + +/* Size description struct for general caches. */ +typedef struct cache_sizes { + size_t cs_size; + kmem_cache_t *cs_cachep; + kmem_cache_t *cs_dmacachep; +} cache_sizes_t; + +static cache_sizes_t cache_sizes[] = { +#if PAGE_SIZE == 4096 + { 32, NULL, NULL}, +#endif + { 64, NULL, NULL}, + { 128, NULL, NULL}, + { 256, NULL, NULL}, + { 512, NULL, NULL}, + { 1024, NULL, NULL}, + { 2048, NULL, NULL}, + { 4096, NULL, NULL}, + { 8192, NULL, NULL}, + { 16384, NULL, NULL}, + { 32768, NULL, NULL}, + { 65536, NULL, NULL}, + {131072, NULL, NULL}, + { 0, NULL, NULL} +}; + +/* internal cache of cache description objs */ +static kmem_cache_t cache_cache = { + slabs_full: LIST_HEAD_INIT(cache_cache.slabs_full), + slabs_partial: LIST_HEAD_INIT(cache_cache.slabs_partial), + slabs_free: LIST_HEAD_INIT(cache_cache.slabs_free), + objsize: sizeof(kmem_cache_t), + flags: SLAB_NO_REAP, + spinlock: SPIN_LOCK_UNLOCKED, + colour_off: L1_CACHE_BYTES, + name: "kmem_cache", +}; + +/* Guard access to the cache-chain. */ +/* KAF: No semaphores, as we'll never wait around for I/O. */ +static spinlock_t cache_chain_sem; +#define init_MUTEX(_m) spin_lock_init(_m) +#define down(_m) spin_lock_irqsave(_m,spin_flags) +#define up(_m) spin_unlock_irqrestore(_m,spin_flags) + +/* Place maintainer for reaping. */ +static kmem_cache_t *clock_searchp = &cache_cache; + +#define cache_chain (cache_cache.next) + +#ifdef CONFIG_SMP +/* + * chicken and egg problem: delay the per-cpu array allocation + * until the general caches are up. + */ +static int g_cpucache_up; + +static void enable_cpucache (kmem_cache_t *cachep); +static void enable_all_cpucaches (void); +#endif + +/* Cal the num objs, wastage, and bytes left over for a given slab size. */ +static void kmem_cache_estimate (unsigned long gfporder, size_t size, + int flags, size_t *left_over, unsigned int *num) +{ + int i; + size_t wastage = PAGE_SIZE< 0) + i--; + + if (i > SLAB_LIMIT) + i = SLAB_LIMIT; + + *num = i; + wastage -= i*size; + wastage -= L1_CACHE_ALIGN(base+i*extra); + *left_over = wastage; +} + +/* Initialisation - setup the `cache' cache. */ +void __init kmem_cache_init(void) +{ + size_t left_over; + + init_MUTEX(&cache_chain_sem); + INIT_LIST_HEAD(&cache_chain); + + kmem_cache_estimate(0, cache_cache.objsize, 0, + &left_over, &cache_cache.num); + if (!cache_cache.num) + BUG(); + + cache_cache.colour = left_over/cache_cache.colour_off; + cache_cache.colour_next = 0; +} + + +/* Initialisation - setup remaining internal and general caches. + * Called after the gfp() functions have been enabled, and before smp_init(). + */ +void __init kmem_cache_sizes_init(unsigned long num_physpages) +{ + cache_sizes_t *sizes = cache_sizes; + char name[20]; + /* + * Fragmentation resistance on low memory - only use bigger + * page orders on machines with more than 32MB of memory. + */ + if (num_physpages > (32 << 20) >> PAGE_SHIFT) + slab_break_gfp_order = BREAK_GFP_ORDER_HI; + do { + /* For performance, all the general caches are L1 aligned. + * This should be particularly beneficial on SMP boxes, as it + * eliminates "false sharing". + * Note for systems short on memory removing the alignment will + * allow tighter packing of the smaller caches. */ + sprintf(name,"size-%Zd",sizes->cs_size); + if (!(sizes->cs_cachep = + kmem_cache_create(name, sizes->cs_size, + 0, SLAB_HWCACHE_ALIGN, NULL, NULL))) { + BUG(); + } + + /* Inc off-slab bufctl limit until the ceiling is hit. */ + if (!(OFF_SLAB(sizes->cs_cachep))) { + offslab_limit = sizes->cs_size-sizeof(slab_t); + offslab_limit /= 2; + } + sprintf(name, "size-%Zd(DMA)",sizes->cs_size); + sizes->cs_dmacachep = kmem_cache_create(name, sizes->cs_size, 0, + SLAB_CACHE_DMA|SLAB_HWCACHE_ALIGN, NULL, NULL); + if (!sizes->cs_dmacachep) + BUG(); + sizes++; + } while (sizes->cs_size); +} + +int __init kmem_cpucache_init(void) +{ +#ifdef CONFIG_SMP + g_cpucache_up = 1; + enable_all_cpucaches(); +#endif + return 0; +} + +/*__initcall(kmem_cpucache_init);*/ + +/* Interface to system's page allocator. No need to hold the cache-lock. + */ +static inline void * kmem_getpages (kmem_cache_t *cachep, unsigned long flags) +{ + void *addr; + + /* + * If we requested dmaable memory, we will get it. Even if we + * did not request dmaable memory, we might get it, but that + * would be relatively rare and ignorable. + */ + flags |= cachep->gfpflags; + addr = (void*) __get_free_pages(flags, cachep->gfporder); + /* Assume that now we have the pages no one else can legally + * messes with the 'struct page's. + * However vm_scan() might try to test the structure to see if + * it is a named-page or buffer-page. The members it tests are + * of no interest here..... + */ + return addr; +} + +/* Interface to system's page release. */ +static inline void kmem_freepages (kmem_cache_t *cachep, void *addr) +{ + unsigned long i = (1<gfporder); + struct pfn_info *page = virt_to_page(addr); + + /* free_pages() does not clear the type bit - we do that. + * The pages have been unlinked from their cache-slab, + * but their 'struct page's might be accessed in + * vm_scan(). Shouldn't be a worry. + */ + while (i--) { + PageClearSlab(page); + page++; + } + + free_pages((unsigned long)addr, cachep->gfporder); +} + +#if DEBUG +static inline void kmem_poison_obj (kmem_cache_t *cachep, void *addr) +{ + int size = cachep->objsize; + if (cachep->flags & SLAB_RED_ZONE) { + addr += BYTES_PER_WORD; + size -= 2*BYTES_PER_WORD; + } + memset(addr, POISON_BYTE, size); + *(unsigned char *)(addr+size-1) = POISON_END; +} + +static inline int kmem_check_poison_obj (kmem_cache_t *cachep, void *addr) +{ + int size = cachep->objsize; + void *end; + if (cachep->flags & SLAB_RED_ZONE) { + addr += BYTES_PER_WORD; + size -= 2*BYTES_PER_WORD; + } + end = memchr(addr, POISON_END, size); + if (end != (addr+size-1)) + return 1; + return 0; +} +#endif + +/* Destroy all the objs in a slab, and release the mem back to the system. + * Before calling the slab must have been unlinked from the cache. + * The cache-lock is not held/needed. + */ +static void kmem_slab_destroy (kmem_cache_t *cachep, slab_t *slabp) +{ + if (cachep->dtor +#if DEBUG + || cachep->flags & (SLAB_POISON | SLAB_RED_ZONE) +#endif + ) { + int i; + for (i = 0; i < cachep->num; i++) { + void* objp = slabp->s_mem+cachep->objsize*i; +#if DEBUG + if (cachep->flags & SLAB_RED_ZONE) { + if (*((unsigned long*)(objp)) != RED_MAGIC1) + BUG(); + if (*((unsigned long*)(objp + cachep->objsize + -BYTES_PER_WORD)) != RED_MAGIC1) + BUG(); + objp += BYTES_PER_WORD; + } +#endif + if (cachep->dtor) + (cachep->dtor)(objp, cachep, 0); +#if DEBUG + if (cachep->flags & SLAB_RED_ZONE) { + objp -= BYTES_PER_WORD; + } + if ((cachep->flags & SLAB_POISON) && + kmem_check_poison_obj(cachep, objp)) + BUG(); +#endif + } + } + + kmem_freepages(cachep, slabp->s_mem-slabp->colouroff); + if (OFF_SLAB(cachep)) + kmem_cache_free(cachep->slabp_cache, slabp); +} + +/** + * kmem_cache_create - Create a cache. + * @name: A string which is used in /proc/slabinfo to identify this cache. + * @size: The size of objects to be created in this cache. + * @offset: The offset to use within the page. + * @flags: SLAB flags + * @ctor: A constructor for the objects. + * @dtor: A destructor for the objects. + * + * Returns a ptr to the cache on success, NULL on failure. + * Cannot be called within a int, but can be interrupted. + * The @ctor is run when new pages are allocated by the cache + * and the @dtor is run before the pages are handed back. + * The flags are + * + * %SLAB_POISON - Poison the slab with a known test pattern (a5a5a5a5) + * to catch references to uninitialised memory. + * + * %SLAB_RED_ZONE - Insert `Red' zones around the allocated memory to check + * for buffer overruns. + * + * %SLAB_NO_REAP - Don't automatically reap this cache when we're under + * memory pressure. + * + * %SLAB_HWCACHE_ALIGN - Align the objects in this cache to a hardware + * cacheline. This can be beneficial if you're counting cycles as closely + * as davem. + */ +kmem_cache_t * +kmem_cache_create (const char *name, size_t size, size_t offset, + unsigned long flags, void (*ctor)(void*, kmem_cache_t *, unsigned long), + void (*dtor)(void*, kmem_cache_t *, unsigned long)) +{ + const char *func_nm = KERN_ERR "kmem_create: "; + size_t left_over, align, slab_size; + kmem_cache_t *cachep = NULL; + unsigned long spin_flags; + + /* + * Sanity checks... these are all serious usage bugs. + */ + if ((!name) || + ((strlen(name) >= CACHE_NAMELEN - 1)) || + (size < BYTES_PER_WORD) || + (size > (1< size)) + BUG(); + +#if DEBUG + if ((flags & SLAB_DEBUG_INITIAL) && !ctor) { + /* No constructor, but inital state check requested */ + printk("%sNo con, but init state check requested - %s\n", func_nm, name); + flags &= ~SLAB_DEBUG_INITIAL; + } + + if ((flags & SLAB_POISON) && ctor) { + /* request for poisoning, but we can't do that with a constructor */ + printk("%sPoisoning requested, but con given - %s\n", func_nm, name); + flags &= ~SLAB_POISON; + } +#if FORCED_DEBUG + if (size < (PAGE_SIZE>>3)) + /* + * do not red zone large object, causes severe + * fragmentation. + */ + flags |= SLAB_RED_ZONE; + if (!ctor) + flags |= SLAB_POISON; +#endif +#endif + + /* + * Always checks flags, a caller might be expecting debug + * support which isn't available. + */ + if (flags & ~CREATE_MASK) + BUG(); + + /* Get cache's description obj. */ + cachep = (kmem_cache_t *) kmem_cache_alloc(&cache_cache, SLAB_KERNEL); + if (!cachep) + goto opps; + memset(cachep, 0, sizeof(kmem_cache_t)); + + /* Check that size is in terms of words. This is needed to avoid + * unaligned accesses for some archs when redzoning is used, and makes + * sure any on-slab bufctl's are also correctly aligned. + */ + if (size & (BYTES_PER_WORD-1)) { + size += (BYTES_PER_WORD-1); + size &= ~(BYTES_PER_WORD-1); + printk("%sForcing size word alignment - %s\n", func_nm, name); + } + +#if DEBUG + if (flags & SLAB_RED_ZONE) { + /* + * There is no point trying to honour cache alignment + * when redzoning. + */ + flags &= ~SLAB_HWCACHE_ALIGN; + size += 2*BYTES_PER_WORD; /* words for redzone */ + } +#endif + align = BYTES_PER_WORD; + if (flags & SLAB_HWCACHE_ALIGN) + align = L1_CACHE_BYTES; + + /* Determine if the slab management is 'on' or 'off' slab. */ + if (size >= (PAGE_SIZE>>3)) + /* + * Size is large, assume best to place the slab management obj + * off-slab (should allow better packing of objs). + */ + flags |= CFLGS_OFF_SLAB; + + if (flags & SLAB_HWCACHE_ALIGN) { + /* Need to adjust size so that objs are cache aligned. */ + /* Small obj size, can get at least two per cache line. */ + /* FIXME: only power of 2 supported, was better */ + while (size < align/2) + align /= 2; + size = (size+align-1)&(~(align-1)); + } + + /* Cal size (in pages) of slabs, and the num of objs per slab. + * This could be made much more intelligent. For now, try to avoid + * using high page-orders for slabs. When the gfp() funcs are more + * friendly towards high-order requests, this should be changed. + */ + do { + unsigned int break_flag = 0; +cal_wastage: + kmem_cache_estimate(cachep->gfporder, size, flags, + &left_over, &cachep->num); + if (break_flag) + break; + if (cachep->gfporder >= MAX_GFP_ORDER) + break; + if (!cachep->num) + goto next; + if (flags & CFLGS_OFF_SLAB && cachep->num > offslab_limit) { + /* Oops, this num of objs will cause problems. */ + cachep->gfporder--; + break_flag++; + goto cal_wastage; + } + + /* + * Large num of objs is good, but v. large slabs are currently + * bad for the gfp()s. + */ + if (cachep->gfporder >= slab_break_gfp_order) + break; + + if ((left_over*8) <= (PAGE_SIZE<gfporder)) + break; /* Acceptable internal fragmentation. */ +next: + cachep->gfporder++; + } while (1); + + if (!cachep->num) { + printk("kmem_cache_create: couldn't create cache %s.\n", name); + kmem_cache_free(&cache_cache, cachep); + cachep = NULL; + goto opps; + } + slab_size = L1_CACHE_ALIGN(cachep->num*sizeof(kmem_bufctl_t)+sizeof(slab_t)); + + /* + * If the slab has been placed off-slab, and we have enough space then + * move it on-slab. This is at the expense of any extra colouring. + */ + if (flags & CFLGS_OFF_SLAB && left_over >= slab_size) { + flags &= ~CFLGS_OFF_SLAB; + left_over -= slab_size; + } + + /* Offset must be a multiple of the alignment. */ + offset += (align-1); + offset &= ~(align-1); + if (!offset) + offset = L1_CACHE_BYTES; + cachep->colour_off = offset; + cachep->colour = left_over/offset; + + /* init remaining fields */ + if (!cachep->gfporder && !(flags & CFLGS_OFF_SLAB)) + flags |= CFLGS_OPTIMIZE; + + cachep->flags = flags; + cachep->gfpflags = 0; + if (flags & SLAB_CACHE_DMA) + cachep->gfpflags |= GFP_DMA; + spin_lock_init(&cachep->spinlock); + cachep->objsize = size; + INIT_LIST_HEAD(&cachep->slabs_full); + INIT_LIST_HEAD(&cachep->slabs_partial); + INIT_LIST_HEAD(&cachep->slabs_free); + + if (flags & CFLGS_OFF_SLAB) + cachep->slabp_cache = kmem_find_general_cachep(slab_size,0); + cachep->ctor = ctor; + cachep->dtor = dtor; + /* Copy name over so we don't have problems with unloaded modules */ + strcpy(cachep->name, name); + +#ifdef CONFIG_SMP + if (g_cpucache_up) + enable_cpucache(cachep); +#endif + /* Need the semaphore to access the chain. */ + down(&cache_chain_sem); + { + struct list_head *p; + + list_for_each(p, &cache_chain) { + kmem_cache_t *pc = list_entry(p, kmem_cache_t, next); + + /* The name field is constant - no lock needed. */ + if (!strcmp(pc->name, name)) + BUG(); + } + } + + /* There is no reason to lock our new cache before we + * link it in - no one knows about it yet... + */ + list_add(&cachep->next, &cache_chain); + up(&cache_chain_sem); +opps: + return cachep; +} + + +#if DEBUG +/* + * This check if the kmem_cache_t pointer is chained in the cache_cache + * list. -arca + */ +static int is_chained_kmem_cache(kmem_cache_t * cachep) +{ + struct list_head *p; + int ret = 0; + unsigned long spin_flags; + + /* Find the cache in the chain of caches. */ + down(&cache_chain_sem); + list_for_each(p, &cache_chain) { + if (p == &cachep->next) { + ret = 1; + break; + } + } + up(&cache_chain_sem); + + return ret; +} +#else +#define is_chained_kmem_cache(x) 1 +#endif + +#ifdef CONFIG_SMP +/* + * Waits for all CPUs to execute func(). + */ +static void smp_call_function_all_cpus(void (*func) (void *arg), void *arg) +{ + local_irq_disable(); + func(arg); + local_irq_enable(); + + if (smp_call_function(func, arg, 1, 1)) + BUG(); +} +typedef struct ccupdate_struct_s +{ + kmem_cache_t *cachep; + cpucache_t *new[NR_CPUS]; +} ccupdate_struct_t; + +static void do_ccupdate_local(void *info) +{ + ccupdate_struct_t *new = (ccupdate_struct_t *)info; + cpucache_t *old = cc_data(new->cachep); + + cc_data(new->cachep) = new->new[smp_processor_id()]; + new->new[smp_processor_id()] = old; +} + +static void free_block (kmem_cache_t* cachep, void** objpp, int len); + +static void drain_cpu_caches(kmem_cache_t *cachep) +{ + ccupdate_struct_t new; + int i; + unsigned long spin_flags; + + memset(&new.new,0,sizeof(new.new)); + + new.cachep = cachep; + + down(&cache_chain_sem); + smp_call_function_all_cpus(do_ccupdate_local, (void *)&new); + + for (i = 0; i < smp_num_cpus; i++) { + cpucache_t* ccold = new.new[cpu_logical_map(i)]; + if (!ccold || (ccold->avail == 0)) + continue; + local_irq_disable(); + free_block(cachep, cc_entry(ccold), ccold->avail); + local_irq_enable(); + ccold->avail = 0; + } + smp_call_function_all_cpus(do_ccupdate_local, (void *)&new); + up(&cache_chain_sem); +} + +#else +#define drain_cpu_caches(cachep) do { } while (0) +#endif + +static int __kmem_cache_shrink(kmem_cache_t *cachep) +{ + slab_t *slabp; + int ret; + + drain_cpu_caches(cachep); + + spin_lock_irq(&cachep->spinlock); + + /* If the cache is growing, stop shrinking. */ + while (!cachep->growing) { + struct list_head *p; + + p = cachep->slabs_free.prev; + if (p == &cachep->slabs_free) + break; + + slabp = list_entry(cachep->slabs_free.prev, slab_t, list); +#if DEBUG + if (slabp->inuse) + BUG(); +#endif + list_del(&slabp->list); + + spin_unlock_irq(&cachep->spinlock); + kmem_slab_destroy(cachep, slabp); + spin_lock_irq(&cachep->spinlock); + } + ret = !list_empty(&cachep->slabs_full) || !list_empty(&cachep->slabs_partial); + spin_unlock_irq(&cachep->spinlock); + return ret; +} + +/** + * kmem_cache_shrink - Shrink a cache. + * @cachep: The cache to shrink. + * + * Releases as many slabs as possible for a cache. + * To help debugging, a zero exit status indicates all slabs were released. + */ +int kmem_cache_shrink(kmem_cache_t *cachep) +{ + if (!cachep || !is_chained_kmem_cache(cachep)) + BUG(); + + return __kmem_cache_shrink(cachep); +} + +/** + * kmem_cache_destroy - delete a cache + * @cachep: the cache to destroy + * + * Remove a kmem_cache_t object from the slab cache. + * Returns 0 on success. + * + * It is expected this function will be called by a module when it is + * unloaded. This will remove the cache completely, and avoid a duplicate + * cache being allocated each time a module is loaded and unloaded, if the + * module doesn't have persistent in-kernel storage across loads and unloads. + * + * The caller must guarantee that noone will allocate memory from the cache + * during the kmem_cache_destroy(). + */ +int kmem_cache_destroy (kmem_cache_t * cachep) +{ + unsigned long spin_flags; + + if (!cachep || cachep->growing) + BUG(); + + /* Find the cache in the chain of caches. */ + down(&cache_chain_sem); + /* the chain is never empty, cache_cache is never destroyed */ + if (clock_searchp == cachep) + clock_searchp = list_entry(cachep->next.next, + kmem_cache_t, next); + list_del(&cachep->next); + up(&cache_chain_sem); + + if (__kmem_cache_shrink(cachep)) { + printk(KERN_ERR "kmem_cache_destroy: Can't free all objects %p\n", + cachep); + down(&cache_chain_sem); + list_add(&cachep->next,&cache_chain); + up(&cache_chain_sem); + return 1; + } +#ifdef CONFIG_SMP + { + int i; + for (i = 0; i < NR_CPUS; i++) + kfree(cachep->cpudata[i]); + } +#endif + kmem_cache_free(&cache_cache, cachep); + + return 0; +} + +/* Get the memory for a slab management obj. */ +static inline slab_t * kmem_cache_slabmgmt (kmem_cache_t *cachep, + void *objp, int colour_off, int local_flags) +{ + slab_t *slabp; + + if (OFF_SLAB(cachep)) { + /* Slab management obj is off-slab. */ + slabp = kmem_cache_alloc(cachep->slabp_cache, local_flags); + if (!slabp) + return NULL; + } else { + /* FIXME: change to + slabp = objp + * if you enable OPTIMIZE + */ + slabp = objp+colour_off; + colour_off += L1_CACHE_ALIGN(cachep->num * + sizeof(kmem_bufctl_t) + sizeof(slab_t)); + } + slabp->inuse = 0; + slabp->colouroff = colour_off; + slabp->s_mem = objp+colour_off; + + return slabp; +} + +static inline void kmem_cache_init_objs (kmem_cache_t * cachep, + slab_t * slabp, unsigned long ctor_flags) +{ + int i; + + for (i = 0; i < cachep->num; i++) { + void* objp = slabp->s_mem+cachep->objsize*i; +#if DEBUG + if (cachep->flags & SLAB_RED_ZONE) { + *((unsigned long*)(objp)) = RED_MAGIC1; + *((unsigned long*)(objp + cachep->objsize - + BYTES_PER_WORD)) = RED_MAGIC1; + objp += BYTES_PER_WORD; + } +#endif + + /* + * Constructors are not allowed to allocate memory from + * the same cache which they are a constructor for. + * Otherwise, deadlock. They must also be threaded. + */ + if (cachep->ctor) + cachep->ctor(objp, cachep, ctor_flags); +#if DEBUG + if (cachep->flags & SLAB_RED_ZONE) + objp -= BYTES_PER_WORD; + if (cachep->flags & SLAB_POISON) + /* need to poison the objs */ + kmem_poison_obj(cachep, objp); + if (cachep->flags & SLAB_RED_ZONE) { + if (*((unsigned long*)(objp)) != RED_MAGIC1) + BUG(); + if (*((unsigned long*)(objp + cachep->objsize - + BYTES_PER_WORD)) != RED_MAGIC1) + BUG(); + } +#endif + slab_bufctl(slabp)[i] = i+1; + } + slab_bufctl(slabp)[i-1] = BUFCTL_END; + slabp->free = 0; +} + +/* + * Grow (by 1) the number of slabs within a cache. This is called by + * kmem_cache_alloc() when there are no active objs left in a cache. + */ +static int kmem_cache_grow (kmem_cache_t * cachep, int flags) +{ + slab_t *slabp; + struct pfn_info *page; unsigned int i; + void *objp; + size_t offset; + unsigned int local_flags; + unsigned long ctor_flags; + unsigned long save_flags; + + /* Be lazy and only check for valid flags here, + * keeping it out of the critical path in kmem_cache_alloc(). + */ + if (flags & ~(SLAB_DMA|SLAB_LEVEL_MASK|SLAB_NO_GROW)) + BUG(); + if (flags & SLAB_NO_GROW) + return 0; + +#if 0 + if (in_interrupt() && (flags & SLAB_LEVEL_MASK) != SLAB_ATOMIC) + BUG(); +#endif + + ctor_flags = SLAB_CTOR_CONSTRUCTOR; + local_flags = (flags & SLAB_LEVEL_MASK); + if (local_flags == SLAB_ATOMIC) + /* + * Not allowed to sleep. Need to tell a constructor about + * this - it might need to know... + */ + ctor_flags |= SLAB_CTOR_ATOMIC; + + /* About to mess with non-constant members - lock. */ + spin_lock_irqsave(&cachep->spinlock, save_flags); + + /* Get colour for the slab, and cal the next value. */ + offset = cachep->colour_next; + cachep->colour_next++; + if (cachep->colour_next >= cachep->colour) + cachep->colour_next = 0; + offset *= cachep->colour_off; + cachep->dflags |= DFLGS_GROWN; + + cachep->growing++; + spin_unlock_irqrestore(&cachep->spinlock, save_flags); + + /* A series of memory allocations for a new slab. + * Neither the cache-chain semaphore, or cache-lock, are + * held, but the incrementing c_growing prevents this + * cache from being reaped or shrunk. + * Note: The cache could be selected in for reaping in + * kmem_cache_reap(), but when the final test is made the + * growing value will be seen. + */ + + /* Get mem for the objs. */ + if (!(objp = kmem_getpages(cachep, flags))) + goto failed; + + /* Get slab management. */ + if (!(slabp = kmem_cache_slabmgmt(cachep, objp, offset, local_flags))) + goto opps1; + + /* Nasty!!!!!! I hope this is OK. */ + i = 1 << cachep->gfporder; + page = virt_to_page(objp); + do { + SET_PAGE_CACHE(page, cachep); + SET_PAGE_SLAB(page, slabp); + PageSetSlab(page); + page++; + } while (--i); + + kmem_cache_init_objs(cachep, slabp, ctor_flags); + + spin_lock_irqsave(&cachep->spinlock, save_flags); + cachep->growing--; + + /* Make slab active. */ + list_add_tail(&slabp->list, &cachep->slabs_free); + STATS_INC_GROWN(cachep); + cachep->failures = 0; + + spin_unlock_irqrestore(&cachep->spinlock, save_flags); + return 1; +opps1: + kmem_freepages(cachep, objp); +failed: + spin_lock_irqsave(&cachep->spinlock, save_flags); + cachep->growing--; + spin_unlock_irqrestore(&cachep->spinlock, save_flags); + return 0; +} + +/* + * Perform extra freeing checks: + * - detect double free + * - detect bad pointers. + * Called with the cache-lock held. + */ + +#if DEBUG +static int kmem_extra_free_checks (kmem_cache_t * cachep, + slab_t *slabp, void * objp) +{ + int i; + unsigned int objnr = (objp-slabp->s_mem)/cachep->objsize; + + if (objnr >= cachep->num) + BUG(); + if (objp != slabp->s_mem + objnr*cachep->objsize) + BUG(); + + /* Check slab's freelist to see if this obj is there. */ + for (i = slabp->free; i != BUFCTL_END; i = slab_bufctl(slabp)[i]) { + if (i == objnr) + BUG(); + } + return 0; +} +#endif + +static inline void kmem_cache_alloc_head(kmem_cache_t *cachep, int flags) +{ + if (flags & SLAB_DMA) { + if (!(cachep->gfpflags & GFP_DMA)) + BUG(); + } else { + if (cachep->gfpflags & GFP_DMA) + BUG(); + } +} + +static inline void * kmem_cache_alloc_one_tail (kmem_cache_t *cachep, + slab_t *slabp) +{ + void *objp; + + STATS_INC_ALLOCED(cachep); + STATS_INC_ACTIVE(cachep); + STATS_SET_HIGH(cachep); + + /* get obj pointer */ + slabp->inuse++; + objp = slabp->s_mem + slabp->free*cachep->objsize; + slabp->free=slab_bufctl(slabp)[slabp->free]; + + if (unlikely(slabp->free == BUFCTL_END)) { + list_del(&slabp->list); + list_add(&slabp->list, &cachep->slabs_full); + } +#if DEBUG + if (cachep->flags & SLAB_POISON) + if (kmem_check_poison_obj(cachep, objp)) + BUG(); + if (cachep->flags & SLAB_RED_ZONE) { + /* Set alloc red-zone, and check old one. */ + if (xchg((unsigned long *)objp, RED_MAGIC2) != + RED_MAGIC1) + BUG(); + if (xchg((unsigned long *)(objp+cachep->objsize - + BYTES_PER_WORD), RED_MAGIC2) != RED_MAGIC1) + BUG(); + objp += BYTES_PER_WORD; + } +#endif + return objp; +} + +/* + * Returns a ptr to an obj in the given cache. + * caller must guarantee synchronization + * #define for the goto optimization 8-) + */ +#define kmem_cache_alloc_one(cachep) \ +({ \ + struct list_head * slabs_partial, * entry; \ + slab_t *slabp; \ + \ + slabs_partial = &(cachep)->slabs_partial; \ + entry = slabs_partial->next; \ + if (unlikely(entry == slabs_partial)) { \ + struct list_head * slabs_free; \ + slabs_free = &(cachep)->slabs_free; \ + entry = slabs_free->next; \ + if (unlikely(entry == slabs_free)) \ + goto alloc_new_slab; \ + list_del(entry); \ + list_add(entry, slabs_partial); \ + } \ + \ + slabp = list_entry(entry, slab_t, list); \ + kmem_cache_alloc_one_tail(cachep, slabp); \ +}) + +#ifdef CONFIG_SMP +void* kmem_cache_alloc_batch(kmem_cache_t* cachep, int flags) +{ + int batchcount = cachep->batchcount; + cpucache_t* cc = cc_data(cachep); + + spin_lock(&cachep->spinlock); + while (batchcount--) { + struct list_head * slabs_partial, * entry; + slab_t *slabp; + /* Get slab alloc is to come from. */ + slabs_partial = &(cachep)->slabs_partial; + entry = slabs_partial->next; + if (unlikely(entry == slabs_partial)) { + struct list_head * slabs_free; + slabs_free = &(cachep)->slabs_free; + entry = slabs_free->next; + if (unlikely(entry == slabs_free)) + break; + list_del(entry); + list_add(entry, slabs_partial); + } + + slabp = list_entry(entry, slab_t, list); + cc_entry(cc)[cc->avail++] = + kmem_cache_alloc_one_tail(cachep, slabp); + } + spin_unlock(&cachep->spinlock); + + if (cc->avail) + return cc_entry(cc)[--cc->avail]; + return NULL; +} +#endif + +static inline void * __kmem_cache_alloc (kmem_cache_t *cachep, int flags) +{ + unsigned long save_flags; + void* objp; + + kmem_cache_alloc_head(cachep, flags); +try_again: + local_irq_save(save_flags); +#ifdef CONFIG_SMP + { + cpucache_t *cc = cc_data(cachep); + + if (cc) { + if (cc->avail) { + STATS_INC_ALLOCHIT(cachep); + objp = cc_entry(cc)[--cc->avail]; + } else { + STATS_INC_ALLOCMISS(cachep); + objp = kmem_cache_alloc_batch(cachep,flags); + if (!objp) + goto alloc_new_slab_nolock; + } + } else { + spin_lock(&cachep->spinlock); + objp = kmem_cache_alloc_one(cachep); + spin_unlock(&cachep->spinlock); + } + } +#else + objp = kmem_cache_alloc_one(cachep); +#endif + local_irq_restore(save_flags); + return objp; +alloc_new_slab: +#ifdef CONFIG_SMP + spin_unlock(&cachep->spinlock); +alloc_new_slab_nolock: +#endif + local_irq_restore(save_flags); + if (kmem_cache_grow(cachep, flags)) + /* Someone may have stolen our objs. Doesn't matter, we'll + * just come back here again. + */ + goto try_again; + return NULL; +} + +/* + * Release an obj back to its cache. If the obj has a constructed + * state, it should be in this state _before_ it is released. + * - caller is responsible for the synchronization + */ + +#if DEBUG +# define CHECK_NR(pg) \ + do { \ + if (!VALID_PAGE(pg)) { \ + printk(KERN_ERR "kfree: out of range ptr %lxh.\n", \ + (unsigned long)objp); \ + BUG(); \ + } \ + } while (0) +# define CHECK_PAGE(page) \ + do { \ + CHECK_NR(page); \ + if (!PageSlab(page)) { \ + printk(KERN_ERR "kfree: bad ptr %lxh.\n", \ + (unsigned long)objp); \ + BUG(); \ + } \ + } while (0) + +#else +# define CHECK_PAGE(pg) do { } while (0) +#endif + +static inline void kmem_cache_free_one(kmem_cache_t *cachep, void *objp) +{ + slab_t* slabp; + + CHECK_PAGE(virt_to_page(objp)); + /* reduces memory footprint + * + if (OPTIMIZE(cachep)) + slabp = (void*)((unsigned long)objp&(~(PAGE_SIZE-1))); + else + */ + slabp = GET_PAGE_SLAB(virt_to_page(objp)); + +#if DEBUG + if (cachep->flags & SLAB_DEBUG_INITIAL) + /* Need to call the slab's constructor so the + * caller can perform a verify of its state (debugging). + * Called without the cache-lock held. + */ + cachep->ctor(objp, cachep, SLAB_CTOR_CONSTRUCTOR|SLAB_CTOR_VERIFY); + + if (cachep->flags & SLAB_RED_ZONE) { + objp -= BYTES_PER_WORD; + if (xchg((unsigned long *)objp, RED_MAGIC1) != RED_MAGIC2) + /* Either write before start, or a double free. */ + BUG(); + if (xchg((unsigned long *)(objp+cachep->objsize - + BYTES_PER_WORD), RED_MAGIC1) != RED_MAGIC2) + /* Either write past end, or a double free. */ + BUG(); + } + if (cachep->flags & SLAB_POISON) + kmem_poison_obj(cachep, objp); + if (kmem_extra_free_checks(cachep, slabp, objp)) + return; +#endif + { + unsigned int objnr = (objp-slabp->s_mem)/cachep->objsize; + + slab_bufctl(slabp)[objnr] = slabp->free; + slabp->free = objnr; + } + STATS_DEC_ACTIVE(cachep); + + /* fixup slab chains */ + { + int inuse = slabp->inuse; + if (unlikely(!--slabp->inuse)) { + /* Was partial or full, now empty. */ + list_del(&slabp->list); + list_add(&slabp->list, &cachep->slabs_free); + } else if (unlikely(inuse == cachep->num)) { + /* Was full. */ + list_del(&slabp->list); + list_add(&slabp->list, &cachep->slabs_partial); + } + } +} + +#ifdef CONFIG_SMP +static inline void __free_block (kmem_cache_t* cachep, + void** objpp, int len) +{ + for ( ; len > 0; len--, objpp++) + kmem_cache_free_one(cachep, *objpp); +} + +static void free_block (kmem_cache_t* cachep, void** objpp, int len) +{ + spin_lock(&cachep->spinlock); + __free_block(cachep, objpp, len); + spin_unlock(&cachep->spinlock); +} +#endif + +/* + * __kmem_cache_free + * called with disabled ints + */ +static inline void __kmem_cache_free (kmem_cache_t *cachep, void* objp) +{ +#ifdef CONFIG_SMP + cpucache_t *cc = cc_data(cachep); + + CHECK_PAGE(virt_to_page(objp)); + if (cc) { + int batchcount; + if (cc->avail < cc->limit) { + STATS_INC_FREEHIT(cachep); + cc_entry(cc)[cc->avail++] = objp; + return; + } + STATS_INC_FREEMISS(cachep); + batchcount = cachep->batchcount; + cc->avail -= batchcount; + free_block(cachep, + &cc_entry(cc)[cc->avail],batchcount); + cc_entry(cc)[cc->avail++] = objp; + return; + } else { + free_block(cachep, &objp, 1); + } +#else + kmem_cache_free_one(cachep, objp); +#endif +} + +/** + * kmem_cache_alloc - Allocate an object + * @cachep: The cache to allocate from. + * @flags: See kmalloc(). + * + * Allocate an object from this cache. The flags are only relevant + * if the cache has no available objects. + */ +void * kmem_cache_alloc (kmem_cache_t *cachep, int flags) +{ + return __kmem_cache_alloc(cachep, flags); +} + +/** + * kmalloc - allocate memory + * @size: how many bytes of memory are required. + * @flags: the type of memory to allocate. + * + * kmalloc is the normal method of allocating memory + * in the kernel. + * + * The @flags argument may be one of: + * + * %GFP_USER - Allocate memory on behalf of user. May sleep. + * + * %GFP_KERNEL - Allocate normal kernel ram. May sleep. + * + * %GFP_ATOMIC - Allocation will not sleep. Use inside interrupt handlers. + * + * Additionally, the %GFP_DMA flag may be set to indicate the memory + * must be suitable for DMA. This can mean different things on different + * platforms. For example, on i386, it means that the memory must come + * from the first 16MB. + */ +void * kmalloc (size_t size, int flags) +{ + cache_sizes_t *csizep = cache_sizes; + + for (; csizep->cs_size; csizep++) { + if (size > csizep->cs_size) + continue; + return __kmem_cache_alloc(flags & GFP_DMA ? + csizep->cs_dmacachep : csizep->cs_cachep, flags); + } + return NULL; +} + +/** + * kmem_cache_free - Deallocate an object + * @cachep: The cache the allocation was from. + * @objp: The previously allocated object. + * + * Free an object which was previously allocated from this + * cache. + */ +void kmem_cache_free (kmem_cache_t *cachep, void *objp) +{ + unsigned long flags; +#if DEBUG + CHECK_PAGE(virt_to_page(objp)); + if (cachep != GET_PAGE_CACHE(virt_to_page(objp))) + BUG(); +#endif + + local_irq_save(flags); + __kmem_cache_free(cachep, objp); + local_irq_restore(flags); +} + +/** + * kfree - free previously allocated memory + * @objp: pointer returned by kmalloc. + * + * Don't free memory not originally allocated by kmalloc() + * or you will run into trouble. + */ +void kfree (const void *objp) +{ + kmem_cache_t *c; + unsigned long flags; + + if (!objp) + return; + local_irq_save(flags); + CHECK_PAGE(virt_to_page(objp)); + c = GET_PAGE_CACHE(virt_to_page(objp)); + __kmem_cache_free(c, (void*)objp); + local_irq_restore(flags); +} + +kmem_cache_t * kmem_find_general_cachep (size_t size, int gfpflags) +{ + cache_sizes_t *csizep = cache_sizes; + + /* This function could be moved to the header file, and + * made inline so consumers can quickly determine what + * cache pointer they require. + */ + for ( ; csizep->cs_size; csizep++) { + if (size > csizep->cs_size) + continue; + break; + } + return (gfpflags & GFP_DMA) ? csizep->cs_dmacachep : csizep->cs_cachep; +} + +#ifdef CONFIG_SMP + +/* called with cache_chain_sem acquired. */ +static int kmem_tune_cpucache (kmem_cache_t* cachep, int limit, int batchcount) +{ + ccupdate_struct_t new; + int i; + + /* + * These are admin-provided, so we are more graceful. + */ + if (limit < 0) + return -EINVAL; + if (batchcount < 0) + return -EINVAL; + if (batchcount > limit) + return -EINVAL; + if (limit != 0 && !batchcount) + return -EINVAL; + + memset(&new.new,0,sizeof(new.new)); + if (limit) { + for (i = 0; i< smp_num_cpus; i++) { + cpucache_t* ccnew; + + ccnew = kmalloc(sizeof(void*)*limit+ + sizeof(cpucache_t), GFP_KERNEL); + if (!ccnew) + goto oom; + ccnew->limit = limit; + ccnew->avail = 0; + new.new[cpu_logical_map(i)] = ccnew; + } + } + new.cachep = cachep; + spin_lock_irq(&cachep->spinlock); + cachep->batchcount = batchcount; + spin_unlock_irq(&cachep->spinlock); + + smp_call_function_all_cpus(do_ccupdate_local, (void *)&new); + + for (i = 0; i < smp_num_cpus; i++) { + cpucache_t* ccold = new.new[cpu_logical_map(i)]; + if (!ccold) + continue; + local_irq_disable(); + free_block(cachep, cc_entry(ccold), ccold->avail); + local_irq_enable(); + kfree(ccold); + } + return 0; +oom: + for (i--; i >= 0; i--) + kfree(new.new[cpu_logical_map(i)]); + return -ENOMEM; +} + +static void enable_cpucache (kmem_cache_t *cachep) +{ + int err; + int limit; + + /* FIXME: optimize */ + if (cachep->objsize > PAGE_SIZE) + return; + if (cachep->objsize > 1024) + limit = 60; + else if (cachep->objsize > 256) + limit = 124; + else + limit = 252; + + err = kmem_tune_cpucache(cachep, limit, limit/2); + if (err) + printk(KERN_ERR "enable_cpucache failed for %s, error %d.\n", + cachep->name, -err); +} + +static void enable_all_cpucaches (void) +{ + struct list_head* p; + unsigned long spin_flags; + + down(&cache_chain_sem); + + p = &cache_cache.next; + do { + kmem_cache_t* cachep = list_entry(p, kmem_cache_t, next); + + enable_cpucache(cachep); + p = cachep->next.next; + } while (p != &cache_cache.next); + + up(&cache_chain_sem); +} +#endif + +/** + * kmem_cache_reap - Reclaim memory from caches. + * @gfp_mask: the type of memory required. + * + * Called from do_try_to_free_pages() and __alloc_pages() + */ +int kmem_cache_reap (int gfp_mask) +{ + slab_t *slabp; + kmem_cache_t *searchp; + kmem_cache_t *best_cachep; + unsigned int best_pages; + unsigned int best_len; + unsigned int scan; + int ret = 0; + unsigned long spin_flags; + + down(&cache_chain_sem); + + scan = REAP_SCANLEN; + best_len = 0; + best_pages = 0; + best_cachep = NULL; + searchp = clock_searchp; + do { + unsigned int pages; + struct list_head* p; + unsigned int full_free; + + /* It's safe to test this without holding the cache-lock. */ + if (searchp->flags & SLAB_NO_REAP) + goto next; + spin_lock_irq(&searchp->spinlock); + if (searchp->growing) + goto next_unlock; + if (searchp->dflags & DFLGS_GROWN) { + searchp->dflags &= ~DFLGS_GROWN; + goto next_unlock; + } +#ifdef CONFIG_SMP + { + cpucache_t *cc = cc_data(searchp); + if (cc && cc->avail) { + __free_block(searchp, cc_entry(cc), cc->avail); + cc->avail = 0; + } + } +#endif + + full_free = 0; + p = searchp->slabs_free.next; + while (p != &searchp->slabs_free) { + slabp = list_entry(p, slab_t, list); +#if DEBUG + if (slabp->inuse) + BUG(); +#endif + full_free++; + p = p->next; + } + + /* + * Try to avoid slabs with constructors and/or + * more than one page per slab (as it can be difficult + * to get high orders from gfp()). + */ + pages = full_free * (1<gfporder); + if (searchp->ctor) + pages = (pages*4+1)/5; + if (searchp->gfporder) + pages = (pages*4+1)/5; + if (pages > best_pages) { + best_cachep = searchp; + best_len = full_free; + best_pages = pages; + if (pages >= REAP_PERFECT) { + clock_searchp = list_entry(searchp->next.next, + kmem_cache_t,next); + goto perfect; + } + } +next_unlock: + spin_unlock_irq(&searchp->spinlock); +next: + searchp = list_entry(searchp->next.next,kmem_cache_t,next); + } while (--scan && searchp != clock_searchp); + + clock_searchp = searchp; + + if (!best_cachep) + /* couldn't find anything to reap */ + goto out; + + spin_lock_irq(&best_cachep->spinlock); +perfect: + /* free only 50% of the free slabs */ + best_len = (best_len + 1)/2; + for (scan = 0; scan < best_len; scan++) { + struct list_head *p; + + if (best_cachep->growing) + break; + p = best_cachep->slabs_free.prev; + if (p == &best_cachep->slabs_free) + break; + slabp = list_entry(p,slab_t,list); +#if DEBUG + if (slabp->inuse) + BUG(); +#endif + list_del(&slabp->list); + STATS_INC_REAPED(best_cachep); + + /* Safe to drop the lock. The slab is no longer linked to the + * cache. + */ + spin_unlock_irq(&best_cachep->spinlock); + kmem_slab_destroy(best_cachep, slabp); + spin_lock_irq(&best_cachep->spinlock); + } + spin_unlock_irq(&best_cachep->spinlock); + ret = scan * (1 << best_cachep->gfporder); +out: + up(&cache_chain_sem); + return ret; +} + diff --git a/xen-2.4.16/common/softirq.c b/xen-2.4.16/common/softirq.c new file mode 100644 index 0000000000..35932711ec --- /dev/null +++ b/xen-2.4.16/common/softirq.c @@ -0,0 +1,334 @@ +/* + * linux/kernel/softirq.c + * + * Copyright (C) 1992 Linus Torvalds + * + * Fixed a disable_bh()/enable_bh() race (was causing a console lockup) + * due bh_mask_count not atomic handling. Copyright (C) 1998 Andrea Arcangeli + * + * Rewritten. Old one was good in 2.2, but in 2.3 it was immoral. --ANK (990903) + */ + +#include +#include +#include +//#include +#include +//#include +#include +#include + +/* + - No shared variables, all the data are CPU local. + - If a softirq needs serialization, let it serialize itself + by its own spinlocks. + - Even if softirq is serialized, only local cpu is marked for + execution. Hence, we get something sort of weak cpu binding. + Though it is still not clear, will it result in better locality + or will not. + - These softirqs are not masked by global cli() and start_bh_atomic() + (by clear reasons). Hence, old parts of code still using global locks + MUST NOT use softirqs, but insert interfacing routines acquiring + global locks. F.e. look at BHs implementation. + + Examples: + - NET RX softirq. It is multithreaded and does not require + any global serialization. + - NET TX softirq. It kicks software netdevice queues, hence + it is logically serialized per device, but this serialization + is invisible to common code. + - Tasklets: serialized wrt itself. + - Bottom halves: globally serialized, grr... + */ + +irq_cpustat_t irq_stat[NR_CPUS]; + +static struct softirq_action softirq_vec[32] __cacheline_aligned; + + +asmlinkage void do_softirq() +{ + int cpu = smp_processor_id(); + __u32 pending; + long flags; + + if (in_interrupt()) + return; + + local_irq_save(flags); + + pending = softirq_pending(cpu); + + while (pending) { + struct softirq_action *h; + + local_bh_disable(); +restart: + /* Reset the pending bitmask before enabling irqs */ + softirq_pending(cpu) = 0; + + local_irq_enable(); + + h = softirq_vec; + + do { + if (pending & 1) + h->action(h); + h++; + pending >>= 1; + } while (pending); + + local_irq_disable(); + + pending = softirq_pending(cpu); + if (pending) goto restart; + __local_bh_enable(); + } + + local_irq_restore(flags); +} + +/* + * This function must run with irq disabled! + */ +inline void cpu_raise_softirq(unsigned int cpu, unsigned int nr) +{ + __cpu_raise_softirq(cpu, nr); + +#ifdef CONFIG_SMP + if ( cpu != smp_processor_id() ) + smp_send_event_check_cpu(cpu); +#endif +} + +void raise_softirq(unsigned int nr) +{ + long flags; + + local_irq_save(flags); + cpu_raise_softirq(smp_processor_id(), nr); + local_irq_restore(flags); +} + +void open_softirq(int nr, void (*action)(struct softirq_action*), void *data) +{ + softirq_vec[nr].data = data; + softirq_vec[nr].action = action; +} + + +/* Tasklets */ + +struct tasklet_head tasklet_vec[NR_CPUS] __cacheline_aligned; +struct tasklet_head tasklet_hi_vec[NR_CPUS] __cacheline_aligned; + +void __tasklet_schedule(struct tasklet_struct *t) +{ + int cpu = smp_processor_id(); + unsigned long flags; + + local_irq_save(flags); + t->next = tasklet_vec[cpu].list; + tasklet_vec[cpu].list = t; + cpu_raise_softirq(cpu, TASKLET_SOFTIRQ); + local_irq_restore(flags); +} + +void __tasklet_hi_schedule(struct tasklet_struct *t) +{ + int cpu = smp_processor_id(); + unsigned long flags; + + local_irq_save(flags); + t->next = tasklet_hi_vec[cpu].list; + tasklet_hi_vec[cpu].list = t; + cpu_raise_softirq(cpu, HI_SOFTIRQ); + local_irq_restore(flags); +} + +static void tasklet_action(struct softirq_action *a) +{ + int cpu = smp_processor_id(); + struct tasklet_struct *list; + + local_irq_disable(); + list = tasklet_vec[cpu].list; + tasklet_vec[cpu].list = NULL; + local_irq_enable(); + + while (list) { + struct tasklet_struct *t = list; + + list = list->next; + + if (tasklet_trylock(t)) { + if (!atomic_read(&t->count)) { + if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state)) + BUG(); + t->func(t->data); + tasklet_unlock(t); + continue; + } + tasklet_unlock(t); + } + + local_irq_disable(); + t->next = tasklet_vec[cpu].list; + tasklet_vec[cpu].list = t; + __cpu_raise_softirq(cpu, TASKLET_SOFTIRQ); + local_irq_enable(); + } +} + +static void tasklet_hi_action(struct softirq_action *a) +{ + int cpu = smp_processor_id(); + struct tasklet_struct *list; + + local_irq_disable(); + list = tasklet_hi_vec[cpu].list; + tasklet_hi_vec[cpu].list = NULL; + local_irq_enable(); + + while (list) { + struct tasklet_struct *t = list; + + list = list->next; + + if (tasklet_trylock(t)) { + if (!atomic_read(&t->count)) { + if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state)) + BUG(); + t->func(t->data); + tasklet_unlock(t); + continue; + } + tasklet_unlock(t); + } + + local_irq_disable(); + t->next = tasklet_hi_vec[cpu].list; + tasklet_hi_vec[cpu].list = t; + __cpu_raise_softirq(cpu, HI_SOFTIRQ); + local_irq_enable(); + } +} + + +void tasklet_init(struct tasklet_struct *t, + void (*func)(unsigned long), unsigned long data) +{ + t->next = NULL; + t->state = 0; + atomic_set(&t->count, 0); + t->func = func; + t->data = data; +} + +void tasklet_kill(struct tasklet_struct *t) +{ + if (in_interrupt()) + printk("Attempt to kill tasklet from interrupt\n"); + + while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) { + set_current_state(TASK_RUNNING); + do { + current->policy |= SCHED_YIELD; + schedule(); + } while (test_bit(TASKLET_STATE_SCHED, &t->state)); + } + tasklet_unlock_wait(t); + clear_bit(TASKLET_STATE_SCHED, &t->state); +} + + + +/* Old style BHs */ + +static void (*bh_base[32])(void); +struct tasklet_struct bh_task_vec[32]; + +/* BHs are serialized by spinlock global_bh_lock. + + It is still possible to make synchronize_bh() as + spin_unlock_wait(&global_bh_lock). This operation is not used + by kernel now, so that this lock is not made private only + due to wait_on_irq(). + + It can be removed only after auditing all the BHs. + */ +spinlock_t global_bh_lock = SPIN_LOCK_UNLOCKED; + +static void bh_action(unsigned long nr) +{ + int cpu = smp_processor_id(); + + if (!spin_trylock(&global_bh_lock)) + goto resched; + + if (!hardirq_trylock(cpu)) + goto resched_unlock; + + if (bh_base[nr]) + bh_base[nr](); + + hardirq_endlock(cpu); + spin_unlock(&global_bh_lock); + return; + +resched_unlock: + spin_unlock(&global_bh_lock); +resched: + mark_bh(nr); +} + +void init_bh(int nr, void (*routine)(void)) +{ + bh_base[nr] = routine; + mb(); +} + +void remove_bh(int nr) +{ + tasklet_kill(bh_task_vec+nr); + bh_base[nr] = NULL; +} + +void __init softirq_init() +{ + int i; + + for (i=0; i<32; i++) + tasklet_init(bh_task_vec+i, bh_action, i); + + open_softirq(TASKLET_SOFTIRQ, tasklet_action, NULL); + open_softirq(HI_SOFTIRQ, tasklet_hi_action, NULL); +} + +void __run_task_queue(task_queue *list) +{ + struct list_head head, *next; + unsigned long flags; + + spin_lock_irqsave(&tqueue_lock, flags); + list_add(&head, list); + list_del_init(list); + spin_unlock_irqrestore(&tqueue_lock, flags); + + next = head.next; + while (next != &head) { + void (*f) (void *); + struct tq_struct *p; + void *data; + + p = list_entry(next, struct tq_struct, list); + next = next->next; + f = p->routine; + data = p->data; + wmb(); + p->sync = 0; + if (f) + f(data); + } +} + diff --git a/xen-2.4.16/common/timer.c b/xen-2.4.16/common/timer.c new file mode 100644 index 0000000000..388275307a --- /dev/null +++ b/xen-2.4.16/common/timer.c @@ -0,0 +1,621 @@ +/* + * linux/kernel/timer.c + * + * Kernel internal timers, kernel timekeeping, basic process system calls + * + * Copyright (C) 1991, 1992 Linus Torvalds + * + * 1997-01-28 Modified by Finn Arne Gangstad to make timers scale better. + * + * 1997-09-10 Updated NTP code according to technical memorandum Jan '96 + * "A Kernel Model for Precision Timekeeping" by Dave Mills + * 1998-12-24 Fixed a xtime SMP race (we need the xtime_lock rw spinlock to + * serialize accesses to xtime/lost_ticks). + * Copyright (C) 1998 Andrea Arcangeli + * 1999-03-10 Improved NTP compatibility by Ulrich Windl + */ + +#include +#include +#include +#include +#include +#include +#include +//#include +#include +//#include + +#include + +#include + +/* + * Timekeeping variables + */ + +long tick = (1000000 + HZ/2) / HZ; /* timer interrupt period */ + +/* The current time */ +struct timeval xtime __attribute__ ((aligned (16))); + +/* Don't completely fail for HZ > 500. */ +int tickadj = 500/HZ ? : 1; /* microsecs */ + +DECLARE_TASK_QUEUE(tq_timer); +DECLARE_TASK_QUEUE(tq_immediate); + +/* + * phase-lock loop variables + */ +/* TIME_ERROR prevents overwriting the CMOS clock */ +int time_state = TIME_OK; /* clock synchronization status */ +int time_status = STA_UNSYNC; /* clock status bits */ +long time_offset; /* time adjustment (us) */ +long time_constant = 2; /* pll time constant */ +long time_tolerance = MAXFREQ; /* frequency tolerance (ppm) */ +long time_precision = 1; /* clock precision (us) */ +long time_maxerror = NTP_PHASE_LIMIT; /* maximum error (us) */ +long time_esterror = NTP_PHASE_LIMIT; /* estimated error (us) */ +long time_phase; /* phase offset (scaled us) */ +long time_freq = ((1000000 + HZ/2) % HZ - HZ/2) << SHIFT_USEC; + /* frequency offset (scaled ppm)*/ +long time_adj; /* tick adjust (scaled 1 / HZ) */ +long time_reftime; /* time at last adjustment (s) */ + +long time_adjust; +long time_adjust_step; + +unsigned long event; + +unsigned long volatile jiffies; + +unsigned int * prof_buffer; +unsigned long prof_len; +unsigned long prof_shift; + +/* + * Event timer code + */ +#define TVN_BITS 6 +#define TVR_BITS 8 +#define TVN_SIZE (1 << TVN_BITS) +#define TVR_SIZE (1 << TVR_BITS) +#define TVN_MASK (TVN_SIZE - 1) +#define TVR_MASK (TVR_SIZE - 1) + +struct timer_vec { + int index; + struct list_head vec[TVN_SIZE]; +}; + +struct timer_vec_root { + int index; + struct list_head vec[TVR_SIZE]; +}; + +static struct timer_vec tv5; +static struct timer_vec tv4; +static struct timer_vec tv3; +static struct timer_vec tv2; +static struct timer_vec_root tv1; + +static struct timer_vec * const tvecs[] = { + (struct timer_vec *)&tv1, &tv2, &tv3, &tv4, &tv5 +}; + +#define NOOF_TVECS (sizeof(tvecs) / sizeof(tvecs[0])) + +void init_timervecs (void) +{ + int i; + + for (i = 0; i < TVN_SIZE; i++) { + INIT_LIST_HEAD(tv5.vec + i); + INIT_LIST_HEAD(tv4.vec + i); + INIT_LIST_HEAD(tv3.vec + i); + INIT_LIST_HEAD(tv2.vec + i); + } + for (i = 0; i < TVR_SIZE; i++) + INIT_LIST_HEAD(tv1.vec + i); +} + +static unsigned long timer_jiffies; + +static inline void internal_add_timer(struct timer_list *timer) +{ + /* + * must be cli-ed when calling this + */ + unsigned long expires = timer->expires; + unsigned long idx = expires - timer_jiffies; + struct list_head * vec; + + if (idx < TVR_SIZE) { + int i = expires & TVR_MASK; + vec = tv1.vec + i; + } else if (idx < 1 << (TVR_BITS + TVN_BITS)) { + int i = (expires >> TVR_BITS) & TVN_MASK; + vec = tv2.vec + i; + } else if (idx < 1 << (TVR_BITS + 2 * TVN_BITS)) { + int i = (expires >> (TVR_BITS + TVN_BITS)) & TVN_MASK; + vec = tv3.vec + i; + } else if (idx < 1 << (TVR_BITS + 3 * TVN_BITS)) { + int i = (expires >> (TVR_BITS + 2 * TVN_BITS)) & TVN_MASK; + vec = tv4.vec + i; + } else if ((signed long) idx < 0) { + /* can happen if you add a timer with expires == jiffies, + * or you set a timer to go off in the past + */ + vec = tv1.vec + tv1.index; + } else if (idx <= 0xffffffffUL) { + int i = (expires >> (TVR_BITS + 3 * TVN_BITS)) & TVN_MASK; + vec = tv5.vec + i; + } else { + /* Can only get here on architectures with 64-bit jiffies */ + INIT_LIST_HEAD(&timer->list); + return; + } + /* + * Timers are FIFO! + */ + list_add(&timer->list, vec->prev); +} + +/* Initialize both explicitly - let's try to have them in the same cache line */ +spinlock_t timerlist_lock = SPIN_LOCK_UNLOCKED; + +#ifdef CONFIG_SMP +volatile struct timer_list * volatile running_timer; +#define timer_enter(t) do { running_timer = t; mb(); } while (0) +#define timer_exit() do { running_timer = NULL; } while (0) +#define timer_is_running(t) (running_timer == t) +#define timer_synchronize(t) while (timer_is_running(t)) barrier() +#else +#define timer_enter(t) do { } while (0) +#define timer_exit() do { } while (0) +#endif + +void add_timer(struct timer_list *timer) +{ + unsigned long flags; + + spin_lock_irqsave(&timerlist_lock, flags); + if (timer_pending(timer)) + goto bug; + internal_add_timer(timer); + spin_unlock_irqrestore(&timerlist_lock, flags); + return; +bug: + spin_unlock_irqrestore(&timerlist_lock, flags); + printk("bug: kernel timer added twice at %p.\n", + __builtin_return_address(0)); +} + +static inline int detach_timer (struct timer_list *timer) +{ + if (!timer_pending(timer)) + return 0; + list_del(&timer->list); + return 1; +} + +int mod_timer(struct timer_list *timer, unsigned long expires) +{ + int ret; + unsigned long flags; + + spin_lock_irqsave(&timerlist_lock, flags); + timer->expires = expires; + ret = detach_timer(timer); + internal_add_timer(timer); + spin_unlock_irqrestore(&timerlist_lock, flags); + return ret; +} + +int del_timer(struct timer_list * timer) +{ + int ret; + unsigned long flags; + + spin_lock_irqsave(&timerlist_lock, flags); + ret = detach_timer(timer); + timer->list.next = timer->list.prev = NULL; + spin_unlock_irqrestore(&timerlist_lock, flags); + return ret; +} + +#ifdef CONFIG_SMP +void sync_timers(void) +{ + spin_unlock_wait(&global_bh_lock); +} + +/* + * SMP specific function to delete periodic timer. + * Caller must disable by some means restarting the timer + * for new. Upon exit the timer is not queued and handler is not running + * on any CPU. It returns number of times, which timer was deleted + * (for reference counting). + */ + +int del_timer_sync(struct timer_list * timer) +{ + int ret = 0; + + for (;;) { + unsigned long flags; + int running; + + spin_lock_irqsave(&timerlist_lock, flags); + ret += detach_timer(timer); + timer->list.next = timer->list.prev = 0; + running = timer_is_running(timer); + spin_unlock_irqrestore(&timerlist_lock, flags); + + if (!running) + break; + + timer_synchronize(timer); + } + + return ret; +} +#endif + + +static inline void cascade_timers(struct timer_vec *tv) +{ + /* cascade all the timers from tv up one level */ + struct list_head *head, *curr, *next; + + head = tv->vec + tv->index; + curr = head->next; + /* + * We are removing _all_ timers from the list, so we don't have to + * detach them individually, just clear the list afterwards. + */ + while (curr != head) { + struct timer_list *tmp; + + tmp = list_entry(curr, struct timer_list, list); + next = curr->next; + list_del(curr); // not needed + internal_add_timer(tmp); + curr = next; + } + INIT_LIST_HEAD(head); + tv->index = (tv->index + 1) & TVN_MASK; +} + +static inline void run_timer_list(void) +{ + spin_lock_irq(&timerlist_lock); + while ((long)(jiffies - timer_jiffies) >= 0) { + struct list_head *head, *curr; + if (!tv1.index) { + int n = 1; + do { + cascade_timers(tvecs[n]); + } while (tvecs[n]->index == 1 && ++n < NOOF_TVECS); + } +repeat: + head = tv1.vec + tv1.index; + curr = head->next; + if (curr != head) { + struct timer_list *timer; + void (*fn)(unsigned long); + unsigned long data; + + timer = list_entry(curr, struct timer_list, list); + fn = timer->function; + data= timer->data; + + detach_timer(timer); + timer->list.next = timer->list.prev = NULL; + timer_enter(timer); + spin_unlock_irq(&timerlist_lock); + fn(data); + spin_lock_irq(&timerlist_lock); + timer_exit(); + goto repeat; + } + ++timer_jiffies; + tv1.index = (tv1.index + 1) & TVR_MASK; + } + spin_unlock_irq(&timerlist_lock); +} + +spinlock_t tqueue_lock = SPIN_LOCK_UNLOCKED; + +void tqueue_bh(void) +{ + run_task_queue(&tq_timer); +} + +void immediate_bh(void) +{ + run_task_queue(&tq_immediate); +} + +/* + * this routine handles the overflow of the microsecond field + * + * The tricky bits of code to handle the accurate clock support + * were provided by Dave Mills (Mills@UDEL.EDU) of NTP fame. + * They were originally developed for SUN and DEC kernels. + * All the kudos should go to Dave for this stuff. + * + */ +static void second_overflow(void) +{ + long ltemp; + + /* Bump the maxerror field */ + time_maxerror += time_tolerance >> SHIFT_USEC; + if ( time_maxerror > NTP_PHASE_LIMIT ) { + time_maxerror = NTP_PHASE_LIMIT; + time_status |= STA_UNSYNC; + } + + /* + * Leap second processing. If in leap-insert state at + * the end of the day, the system clock is set back one + * second; if in leap-delete state, the system clock is + * set ahead one second. The microtime() routine or + * external clock driver will insure that reported time + * is always monotonic. The ugly divides should be + * replaced. + */ + switch (time_state) { + + case TIME_OK: + if (time_status & STA_INS) + time_state = TIME_INS; + else if (time_status & STA_DEL) + time_state = TIME_DEL; + break; + + case TIME_INS: + if (xtime.tv_sec % 86400 == 0) { + xtime.tv_sec--; + time_state = TIME_OOP; + printk(KERN_NOTICE "Clock: inserting leap second 23:59:60 UTC\n"); + } + break; + + case TIME_DEL: + if ((xtime.tv_sec + 1) % 86400 == 0) { + xtime.tv_sec++; + time_state = TIME_WAIT; + printk(KERN_NOTICE "Clock: deleting leap second 23:59:59 UTC\n"); + } + break; + + case TIME_OOP: + time_state = TIME_WAIT; + break; + + case TIME_WAIT: + if (!(time_status & (STA_INS | STA_DEL))) + time_state = TIME_OK; + } + + /* + * Compute the phase adjustment for the next second. In + * PLL mode, the offset is reduced by a fixed factor + * times the time constant. In FLL mode the offset is + * used directly. In either mode, the maximum phase + * adjustment for each second is clamped so as to spread + * the adjustment over not more than the number of + * seconds between updates. + */ + if (time_offset < 0) { + ltemp = -time_offset; + if (!(time_status & STA_FLL)) + ltemp >>= SHIFT_KG + time_constant; + if (ltemp > (MAXPHASE / MINSEC) << SHIFT_UPDATE) + ltemp = (MAXPHASE / MINSEC) << SHIFT_UPDATE; + time_offset += ltemp; + time_adj = -ltemp << (SHIFT_SCALE - SHIFT_HZ - SHIFT_UPDATE); + } else { + ltemp = time_offset; + if (!(time_status & STA_FLL)) + ltemp >>= SHIFT_KG + time_constant; + if (ltemp > (MAXPHASE / MINSEC) << SHIFT_UPDATE) + ltemp = (MAXPHASE / MINSEC) << SHIFT_UPDATE; + time_offset -= ltemp; + time_adj = ltemp << (SHIFT_SCALE - SHIFT_HZ - SHIFT_UPDATE); + } + + if (ltemp < 0) + time_adj -= -ltemp >> + (SHIFT_USEC + SHIFT_HZ - SHIFT_SCALE); + else + time_adj += ltemp >> + (SHIFT_USEC + SHIFT_HZ - SHIFT_SCALE); + +#if HZ == 100 + /* Compensate for (HZ==100) != (1 << SHIFT_HZ). + * Add 25% and 3.125% to get 128.125; => only 0.125% error (p. 14) + */ + if (time_adj < 0) + time_adj -= (-time_adj >> 2) + (-time_adj >> 5); + else + time_adj += (time_adj >> 2) + (time_adj >> 5); +#endif +} + +/* in the NTP reference this is called "hardclock()" */ +static void update_wall_time_one_tick(void) +{ + if ( (time_adjust_step = time_adjust) != 0 ) { + /* We are doing an adjtime thing. + * + * Prepare time_adjust_step to be within bounds. + * Note that a positive time_adjust means we want the clock + * to run faster. + * + * Limit the amount of the step to be in the range + * -tickadj .. +tickadj + */ + if (time_adjust > tickadj) + time_adjust_step = tickadj; + else if (time_adjust < -tickadj) + time_adjust_step = -tickadj; + + /* Reduce by this step the amount of time left */ + time_adjust -= time_adjust_step; + } + xtime.tv_usec += tick + time_adjust_step; + /* + * Advance the phase, once it gets to one microsecond, then + * advance the tick more. + */ + time_phase += time_adj; + if (time_phase <= -FINEUSEC) { + long ltemp = -time_phase >> SHIFT_SCALE; + time_phase += ltemp << SHIFT_SCALE; + xtime.tv_usec -= ltemp; + } + else if (time_phase >= FINEUSEC) { + long ltemp = time_phase >> SHIFT_SCALE; + time_phase -= ltemp << SHIFT_SCALE; + xtime.tv_usec += ltemp; + } +} + +/* + * Using a loop looks inefficient, but "ticks" is + * usually just one (we shouldn't be losing ticks, + * we're doing this this way mainly for interrupt + * latency reasons, not because we think we'll + * have lots of lost timer ticks + */ +static void update_wall_time(unsigned long ticks) +{ + do { + ticks--; + update_wall_time_one_tick(); + } while (ticks); + + if (xtime.tv_usec >= 1000000) { + xtime.tv_usec -= 1000000; + xtime.tv_sec++; + second_overflow(); + } +} + +static inline void do_process_times(struct task_struct *p, + unsigned long user, unsigned long system) +{ + //unsigned long psecs; + +// psecs = (p->times.tms_utime += user); + //psecs += (p->times.tms_stime += system); +} + + +void update_one_process(struct task_struct *p, unsigned long user, + unsigned long system, int cpu) +{ +// p->per_cpu_utime[cpu] += user; +// p->per_cpu_stime[cpu] += system; + do_process_times(p, user, system); +} + +/* + * Called from the timer interrupt handler to charge one tick to the current + * process. user_tick is 1 if the tick is user time, 0 for system. + */ +void update_process_times(int user_tick) +{ + struct task_struct *p = current; + int cpu = smp_processor_id(), system = user_tick ^ 1; + + update_one_process(p, user_tick, system, cpu); + + if ( --p->counter <= 0 ) + { + p->counter = 0; + set_bit(_HYP_EVENT_NEED_RESCHED, &p->hyp_events); + } +} + + +/* jiffies at the most recent update of wall time */ +unsigned long wall_jiffies; + +/* + * This spinlock protect us from races in SMP while playing with xtime. -arca + */ +rwlock_t xtime_lock = RW_LOCK_UNLOCKED; + +static inline void update_times(void) +{ + unsigned long ticks; + + /* + * update_times() is run from the raw timer_bh handler so we + * just know that the irqs are locally enabled and so we don't + * need to save/restore the flags of the local CPU here. -arca + */ + write_lock_irq(&xtime_lock); + + ticks = jiffies - wall_jiffies; + if (ticks) { + wall_jiffies += ticks; + update_wall_time(ticks); + } + write_unlock_irq(&xtime_lock); +} + +void timer_bh(void) +{ + update_times(); + run_timer_list(); +} + +#include +#include +#include +#include +#include +#include +#include + +void do_timer(struct pt_regs *regs) +{ + struct task_struct *p; + shared_info_t *s; + unsigned long long wall; + unsigned long cpu_mask = 0; + + (*(unsigned long *)&jiffies)++; + + if ( !using_apic_timer ) + update_process_times(user_mode(regs)); + + rdtscll(wall); + + read_lock(&tasklist_lock); + p = &idle0_task; + do { + s = p->shared_info; + s->wall_time = s->domain_time = wall; + cpu_mask |= mark_guest_event(p, _EVENT_TIMER); + } + while ( (p = p->next_task) != &idle0_task ); + read_unlock(&tasklist_lock); + + guest_event_notify(cpu_mask); + + mark_bh(TIMER_BH); + if (TQ_ACTIVE(tq_timer)) + mark_bh(TQUEUE_BH); +} + +void get_fast_time(struct timeval * tm) +{ + *tm=xtime; +} diff --git a/xen-2.4.16/common/vsprintf.c b/xen-2.4.16/common/vsprintf.c new file mode 100644 index 0000000000..fe17225088 --- /dev/null +++ b/xen-2.4.16/common/vsprintf.c @@ -0,0 +1,713 @@ +/* + * linux/lib/vsprintf.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + */ + +/* vsprintf.c -- Lars Wirzenius & Linus Torvalds. */ +/* + * Wirzenius wrote this portably, Torvalds fucked it up :-) + */ + +/* + * Fri Jul 13 2001 Crutcher Dunnavant + * - changed to provide snprintf and vsnprintf functions + */ + +#include +#include +#include + +/** + * simple_strtoul - convert a string to an unsigned long + * @cp: The start of the string + * @endp: A pointer to the end of the parsed string will be placed here + * @base: The number base to use + */ +unsigned long simple_strtoul(const char *cp,char **endp,unsigned int base) +{ + unsigned long result = 0,value; + + if (!base) { + base = 10; + if (*cp == '0') { + base = 8; + cp++; + if ((*cp == 'x') && isxdigit(cp[1])) { + cp++; + base = 16; + } + } + } + while (isxdigit(*cp) && + (value = isdigit(*cp) ? *cp-'0' : toupper(*cp)-'A'+10) < base) { + result = result*base + value; + cp++; + } + if (endp) + *endp = (char *)cp; + return result; +} + +/** + * simple_strtol - convert a string to a signed long + * @cp: The start of the string + * @endp: A pointer to the end of the parsed string will be placed here + * @base: The number base to use + */ +long simple_strtol(const char *cp,char **endp,unsigned int base) +{ + if(*cp=='-') + return -simple_strtoul(cp+1,endp,base); + return simple_strtoul(cp,endp,base); +} + +/** + * simple_strtoull - convert a string to an unsigned long long + * @cp: The start of the string + * @endp: A pointer to the end of the parsed string will be placed here + * @base: The number base to use + */ +unsigned long long simple_strtoull(const char *cp,char **endp,unsigned int base) +{ + unsigned long long result = 0,value; + + if (!base) { + base = 10; + if (*cp == '0') { + base = 8; + cp++; + if ((*cp == 'x') && isxdigit(cp[1])) { + cp++; + base = 16; + } + } + } + while (isxdigit(*cp) && (value = isdigit(*cp) ? *cp-'0' : (islower(*cp) + ? toupper(*cp) : *cp)-'A'+10) < base) { + result = result*base + value; + cp++; + } + if (endp) + *endp = (char *)cp; + return result; +} + +/** + * simple_strtoll - convert a string to a signed long long + * @cp: The start of the string + * @endp: A pointer to the end of the parsed string will be placed here + * @base: The number base to use + */ +long long simple_strtoll(const char *cp,char **endp,unsigned int base) +{ + if(*cp=='-') + return -simple_strtoull(cp+1,endp,base); + return simple_strtoull(cp,endp,base); +} + +static int skip_atoi(const char **s) +{ + int i=0; + + while (isdigit(**s)) + i = i*10 + *((*s)++) - '0'; + return i; +} + +#define ZEROPAD 1 /* pad with zero */ +#define SIGN 2 /* unsigned/signed long */ +#define PLUS 4 /* show plus */ +#define SPACE 8 /* space if plus */ +#define LEFT 16 /* left justified */ +#define SPECIAL 32 /* 0x */ +#define LARGE 64 /* use 'ABCDEF' instead of 'abcdef' */ + +static char * number(char * buf, char * end, long num, int base, int size, int precision, int type) +{ + char c,sign,tmp[66]; + const char *digits; + const char small_digits[] = "0123456789abcdefghijklmnopqrstuvwxyz"; + const char large_digits[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"; + int i; + + digits = (type & LARGE) ? large_digits : small_digits; + if (type & LEFT) + type &= ~ZEROPAD; + if (base < 2 || base > 36) + return buf; + c = (type & ZEROPAD) ? '0' : ' '; + sign = 0; + if (type & SIGN) { + if (num < 0) { + sign = '-'; + num = -num; + size--; + } else if (type & PLUS) { + sign = '+'; + size--; + } else if (type & SPACE) { + sign = ' '; + size--; + } + } + if (type & SPECIAL) { + if (base == 16) + size -= 2; + else if (base == 8) + size--; + } + i = 0; + if (num == 0) + tmp[i++]='0'; + else + { + /* XXX KAF: force unsigned mod and div. */ + unsigned long num2=(unsigned long)num; + unsigned int base2=(unsigned int)base; + while (num2 != 0) { tmp[i++] = digits[num2%base2]; num2 /= base2; } + } + if (i > precision) + precision = i; + size -= precision; + if (!(type&(ZEROPAD+LEFT))) { + while(size-->0) { + if (buf <= end) + *buf = ' '; + ++buf; + } + } + if (sign) { + if (buf <= end) + *buf = sign; + ++buf; + } + if (type & SPECIAL) { + if (base==8) { + if (buf <= end) + *buf = '0'; + ++buf; + } else if (base==16) { + if (buf <= end) + *buf = '0'; + ++buf; + if (buf <= end) + *buf = digits[33]; + ++buf; + } + } + if (!(type & LEFT)) { + while (size-- > 0) { + if (buf <= end) + *buf = c; + ++buf; + } + } + while (i < precision--) { + if (buf <= end) + *buf = '0'; + ++buf; + } + while (i-- > 0) { + if (buf <= end) + *buf = tmp[i]; + ++buf; + } + while (size-- > 0) { + if (buf <= end) + *buf = ' '; + ++buf; + } + return buf; +} + +/** +* vsnprintf - Format a string and place it in a buffer +* @buf: The buffer to place the result into +* @size: The size of the buffer, including the trailing null space +* @fmt: The format string to use +* @args: Arguments for the format string +* +* Call this function if you are already dealing with a va_list. +* You probably want snprintf instead. + */ +int vsnprintf(char *buf, size_t size, const char *fmt, va_list args) +{ + int len; + unsigned long long num; + int i, base; + char *str, *end, c; + const char *s; + + int flags; /* flags to number() */ + + int field_width; /* width of output field */ + int precision; /* min. # of digits for integers; max + number of chars for from string */ + int qualifier; /* 'h', 'l', or 'L' for integer fields */ + /* 'z' support added 23/7/1999 S.H. */ + /* 'z' changed to 'Z' --davidm 1/25/99 */ + + str = buf; + end = buf + size - 1; + + if (end < buf - 1) { + end = ((void *) -1); + size = end - buf + 1; + } + + for (; *fmt ; ++fmt) { + if (*fmt != '%') { + if (str <= end) + *str = *fmt; + ++str; + continue; + } + + /* process flags */ + flags = 0; + repeat: + ++fmt; /* this also skips first '%' */ + switch (*fmt) { + case '-': flags |= LEFT; goto repeat; + case '+': flags |= PLUS; goto repeat; + case ' ': flags |= SPACE; goto repeat; + case '#': flags |= SPECIAL; goto repeat; + case '0': flags |= ZEROPAD; goto repeat; + } + + /* get field width */ + field_width = -1; + if (isdigit(*fmt)) + field_width = skip_atoi(&fmt); + else if (*fmt == '*') { + ++fmt; + /* it's the next argument */ + field_width = va_arg(args, int); + if (field_width < 0) { + field_width = -field_width; + flags |= LEFT; + } + } + + /* get the precision */ + precision = -1; + if (*fmt == '.') { + ++fmt; + if (isdigit(*fmt)) + precision = skip_atoi(&fmt); + else if (*fmt == '*') { + ++fmt; + /* it's the next argument */ + precision = va_arg(args, int); + } + if (precision < 0) + precision = 0; + } + + /* get the conversion qualifier */ + qualifier = -1; + if (*fmt == 'h' || *fmt == 'l' || *fmt == 'L' || *fmt =='Z') { + qualifier = *fmt; + ++fmt; + if (qualifier == 'l' && *fmt == 'l') { + qualifier = 'L'; + ++fmt; + } + } + + /* default base */ + base = 10; + + switch (*fmt) { + case 'c': + if (!(flags & LEFT)) { + while (--field_width > 0) { + if (str <= end) + *str = ' '; + ++str; + } + } + c = (unsigned char) va_arg(args, int); + if (str <= end) + *str = c; + ++str; + while (--field_width > 0) { + if (str <= end) + *str = ' '; + ++str; + } + continue; + + case 's': + s = va_arg(args, char *); + if (!s) + s = ""; + + len = strnlen(s, precision); + + if (!(flags & LEFT)) { + while (len < field_width--) { + if (str <= end) + *str = ' '; + ++str; + } + } + for (i = 0; i < len; ++i) { + if (str <= end) + *str = *s; + ++str; ++s; + } + while (len < field_width--) { + if (str <= end) + *str = ' '; + ++str; + } + continue; + + case 'p': + if (field_width == -1) { + field_width = 2*sizeof(void *); + flags |= ZEROPAD; + } + str = number(str, end, + (unsigned long) va_arg(args, void *), + 16, field_width, precision, flags); + continue; + + + case 'n': + /* FIXME: + * What does C99 say about the overflow case here? */ + if (qualifier == 'l') { + long * ip = va_arg(args, long *); + *ip = (str - buf); + } else if (qualifier == 'Z') { + size_t * ip = va_arg(args, size_t *); + *ip = (str - buf); + } else { + int * ip = va_arg(args, int *); + *ip = (str - buf); + } + continue; + + case '%': + if (str <= end) + *str = '%'; + ++str; + continue; + + /* integer number formats - set up the flags and "break" */ + case 'o': + base = 8; + break; + + case 'X': + flags |= LARGE; + case 'x': + base = 16; + break; + + case 'd': + case 'i': + flags |= SIGN; + case 'u': + break; + + default: + if (str <= end) + *str = '%'; + ++str; + if (*fmt) { + if (str <= end) + *str = *fmt; + ++str; + } else { + --fmt; + } + continue; + } + if (qualifier == 'L') + num = va_arg(args, long long); + else if (qualifier == 'l') { + num = va_arg(args, unsigned long); + if (flags & SIGN) + num = (signed long) num; + } else if (qualifier == 'Z') { + num = va_arg(args, size_t); + } else if (qualifier == 'h') { + num = (unsigned short) va_arg(args, int); + if (flags & SIGN) + num = (signed short) num; + } else { + num = va_arg(args, unsigned int); + if (flags & SIGN) + num = (signed int) num; + } + + str = number(str, end, num, base, + field_width, precision, flags); + } + if (str <= end) + *str = '\0'; + else if (size > 0) + /* don't write out a null byte if the buf size is zero */ + *end = '\0'; + /* the trailing null byte doesn't count towards the total + * ++str; + */ + return str-buf; +} + +/** + * snprintf - Format a string and place it in a buffer + * @buf: The buffer to place the result into + * @size: The size of the buffer, including the trailing null space + * @fmt: The format string to use + * @...: Arguments for the format string + */ +int snprintf(char * buf, size_t size, const char *fmt, ...) +{ + va_list args; + int i; + + va_start(args, fmt); + i=vsnprintf(buf,size,fmt,args); + va_end(args); + return i; +} + +/** + * vsprintf - Format a string and place it in a buffer + * @buf: The buffer to place the result into + * @fmt: The format string to use + * @args: Arguments for the format string + * + * Call this function if you are already dealing with a va_list. + * You probably want sprintf instead. + */ +int vsprintf(char *buf, const char *fmt, va_list args) +{ + return vsnprintf(buf, 0xFFFFFFFFUL, fmt, args); +} + + +/** + * sprintf - Format a string and place it in a buffer + * @buf: The buffer to place the result into + * @fmt: The format string to use + * @...: Arguments for the format string + */ +int sprintf(char * buf, const char *fmt, ...) +{ + va_list args; + int i; + + va_start(args, fmt); + i=vsprintf(buf,fmt,args); + va_end(args); + return i; +} + +/** + * vsscanf - Unformat a buffer into a list of arguments + * @buf: input buffer + * @fmt: format of buffer + * @args: arguments + */ +int vsscanf(const char * buf, const char * fmt, va_list args) +{ + const char *str = buf; + char *next; + int num = 0; + int qualifier; + int base; + int field_width = -1; + int is_sign = 0; + + while(*fmt && *str) { + /* skip any white space in format */ + /* white space in format matchs any amount of + * white space, including none, in the input. + */ + if (isspace(*fmt)) { + while (isspace(*fmt)) + ++fmt; + while (isspace(*str)) + ++str; + } + + /* anything that is not a conversion must match exactly */ + if (*fmt != '%' && *fmt) { + if (*fmt++ != *str++) + break; + continue; + } + + if (!*fmt) + break; + ++fmt; + + /* skip this conversion. + * advance both strings to next white space + */ + if (*fmt == '*') { + while (!isspace(*fmt) && *fmt) + fmt++; + while (!isspace(*str) && *str) + str++; + continue; + } + + /* get field width */ + if (isdigit(*fmt)) + field_width = skip_atoi(&fmt); + + /* get conversion qualifier */ + qualifier = -1; + if (*fmt == 'h' || *fmt == 'l' || *fmt == 'L' || *fmt == 'Z') { + qualifier = *fmt; + fmt++; + } + base = 10; + is_sign = 0; + + if (!*fmt || !*str) + break; + + switch(*fmt++) { + case 'c': + { + char *s = (char *) va_arg(args,char*); + if (field_width == -1) + field_width = 1; + do { + *s++ = *str++; + } while(field_width-- > 0 && *str); + num++; + } + continue; + case 's': + { + char *s = (char *) va_arg(args, char *); + if(field_width == -1) + field_width = INT_MAX; + /* first, skip leading white space in buffer */ + while (isspace(*str)) + str++; + + /* now copy until next white space */ + while (*str && !isspace(*str) && field_width--) { + *s++ = *str++; + } + *s = '\0'; + num++; + } + continue; + case 'n': + /* return number of characters read so far */ + { + int *i = (int *)va_arg(args,int*); + *i = str - buf; + } + continue; + case 'o': + base = 8; + break; + case 'x': + case 'X': + base = 16; + break; + case 'd': + case 'i': + is_sign = 1; + case 'u': + break; + case '%': + /* looking for '%' in str */ + if (*str++ != '%') + return num; + continue; + default: + /* invalid format; stop here */ + return num; + } + + /* have some sort of integer conversion. + * first, skip white space in buffer. + */ + while (isspace(*str)) + str++; + + if (!*str || !isdigit(*str)) + break; + + switch(qualifier) { + case 'h': + if (is_sign) { + short *s = (short *) va_arg(args,short *); + *s = (short) simple_strtol(str,&next,base); + } else { + unsigned short *s = (unsigned short *) va_arg(args, unsigned short *); + *s = (unsigned short) simple_strtoul(str, &next, base); + } + break; + case 'l': + if (is_sign) { + long *l = (long *) va_arg(args,long *); + *l = simple_strtol(str,&next,base); + } else { + unsigned long *l = (unsigned long*) va_arg(args,unsigned long*); + *l = simple_strtoul(str,&next,base); + } + break; + case 'L': + if (is_sign) { + long long *l = (long long*) va_arg(args,long long *); + *l = simple_strtoll(str,&next,base); + } else { + unsigned long long *l = (unsigned long long*) va_arg(args,unsigned long long*); + *l = simple_strtoull(str,&next,base); + } + break; + case 'Z': + { + size_t *s = (size_t*) va_arg(args,size_t*); + *s = (size_t) simple_strtoul(str,&next,base); + } + break; + default: + if (is_sign) { + int *i = (int *) va_arg(args, int*); + *i = (int) simple_strtol(str,&next,base); + } else { + unsigned int *i = (unsigned int*) va_arg(args, unsigned int*); + *i = (unsigned int) simple_strtoul(str,&next,base); + } + break; + } + num++; + + if (!next) + break; + str = next; + } + return num; +} + +/** + * sscanf - Unformat a buffer into a list of arguments + * @buf: input buffer + * @fmt: formatting of buffer + * @...: resulting arguments + */ +int sscanf(const char * buf, const char * fmt, ...) +{ + va_list args; + int i; + + va_start(args,fmt); + i = vsscanf(buf,fmt,args); + va_end(args); + return i; +} diff --git a/xen-2.4.16/drivers/Makefile b/xen-2.4.16/drivers/Makefile new file mode 100644 index 0000000000..5aa320fcbe --- /dev/null +++ b/xen-2.4.16/drivers/Makefile @@ -0,0 +1,12 @@ + +default: + $(MAKE) -C pci + $(MAKE) -C net + $(MAKE) -C block + $(MAKE) -C ide + +clean: + $(MAKE) -C pci clean + $(MAKE) -C net clean + $(MAKE) -C block clean + $(MAKE) -C ide clean diff --git a/xen-2.4.16/drivers/block/Makefile b/xen-2.4.16/drivers/block/Makefile new file mode 100644 index 0000000000..574b7d2d79 --- /dev/null +++ b/xen-2.4.16/drivers/block/Makefile @@ -0,0 +1,8 @@ + +include $(BASEDIR)/Rules.mk + +default: $(OBJS) + $(LD) -r -o driver.o $(OBJS) + +clean: + rm -f *.o *~ core diff --git a/xen-2.4.16/drivers/block/blkpg.c b/xen-2.4.16/drivers/block/blkpg.c new file mode 100644 index 0000000000..2e27a1aa2b --- /dev/null +++ b/xen-2.4.16/drivers/block/blkpg.c @@ -0,0 +1,315 @@ +/* + * Partition table and disk geometry handling + * + * This obsoletes the partition-handling code in genhd.c: + * Userspace can look at a disk in arbitrary format and tell + * the kernel what partitions there are on the disk, and how + * these should be numbered. + * It also allows one to repartition a disk that is being used. + * + * A single ioctl with lots of subfunctions: + * + * Device number stuff: + * get_whole_disk() (given the device number of a partition, find + * the device number of the encompassing disk) + * get_all_partitions() (given the device number of a disk, return the + * device numbers of all its known partitions) + * + * Partition stuff: + * add_partition() + * delete_partition() + * test_partition_in_use() (also for test_disk_in_use) + * + * Geometry stuff: + * get_geometry() + * set_geometry() + * get_bios_drivedata() + * + * For today, only the partition stuff - aeb, 990515 + */ + +#include +#include +#include +/*#include */ /* for BLKRASET, ... */ +#include /* for capable() */ +#include /* for set_device_ro() */ +#include +#include +/*#include */ /* for is_swap_partition() */ +#include /* for EXPORT_SYMBOL */ + +#include + +#define is_mounted(_dev) (0) +#define is_swap_partition(_dev) (0) + +#define fsync_dev(_dev) (panic("fsync_dev???")) +#define invalidate_buffers(_dev) (panic("invalidate_buffers???")) + +/* + * What is the data describing a partition? + * + * 1. a device number (kdev_t) + * 2. a starting sector and number of sectors (hd_struct) + * given in the part[] array of the gendisk structure for the drive. + * + * The number of sectors is replicated in the sizes[] array of + * the gendisk structure for the major, which again is copied to + * the blk_size[][] array. + * (However, hd_struct has the number of 512-byte sectors, + * g->sizes[] and blk_size[][] have the number of 1024-byte blocks.) + * Note that several drives may have the same major. + */ + +/* + * Add a partition. + * + * returns: EINVAL: bad parameters + * ENXIO: cannot find drive + * EBUSY: proposed partition overlaps an existing one + * or has the same number as an existing one + * 0: all OK. + */ +int add_partition(kdev_t dev, struct blkpg_partition *p) { + struct gendisk *g; + long long ppstart, pplength; + long pstart, plength; + int i, drive, first_minor, end_minor, minor; + + /* convert bytes to sectors, check for fit in a hd_struct */ + ppstart = (p->start >> 9); + pplength = (p->length >> 9); + pstart = ppstart; + plength = pplength; + if (pstart != ppstart || plength != pplength + || pstart < 0 || plength < 0) + return -EINVAL; + + /* find the drive major */ + g = get_gendisk(dev); + if (!g) + return -ENXIO; + + /* existing drive? */ + drive = (MINOR(dev) >> g->minor_shift); + first_minor = (drive << g->minor_shift); + end_minor = first_minor + g->max_p; + if (drive >= g->nr_real) + return -ENXIO; + + /* drive and partition number OK? */ + if (first_minor != MINOR(dev) || p->pno <= 0 || p->pno >= g->max_p) + return -EINVAL; + + /* partition number in use? */ + minor = first_minor + p->pno; + if (g->part[minor].nr_sects != 0) + return -EBUSY; + + /* overlap? */ + for (i=first_minor+1; ipart[i].start_sect || + pstart >= g->part[i].start_sect + g->part[i].nr_sects)) + return -EBUSY; + + /* all seems OK */ + g->part[minor].start_sect = pstart; + g->part[minor].nr_sects = plength; + if (g->sizes) + g->sizes[minor] = (plength >> (BLOCK_SIZE_BITS - 9)); +#ifdef DEVFS_MUST_DIE + devfs_register_partitions (g, first_minor, 0); +#endif + return 0; +} + +/* + * Delete a partition given by partition number + * + * returns: EINVAL: bad parameters + * ENXIO: cannot find partition + * EBUSY: partition is busy + * 0: all OK. + * + * Note that the dev argument refers to the entire disk, not the partition. + */ +int del_partition(kdev_t dev, struct blkpg_partition *p) { + struct gendisk *g; + kdev_t devp; + int drive, first_minor, minor; + + /* find the drive major */ + g = get_gendisk(dev); + if (!g) + return -ENXIO; + + /* drive and partition number OK? */ + drive = (MINOR(dev) >> g->minor_shift); + first_minor = (drive << g->minor_shift); + if (first_minor != MINOR(dev) || p->pno <= 0 || p->pno >= g->max_p) + return -EINVAL; + + /* existing drive and partition? */ + minor = first_minor + p->pno; + if (drive >= g->nr_real || g->part[minor].nr_sects == 0) + return -ENXIO; + + /* partition in use? Incomplete check for now. */ + devp = MKDEV(MAJOR(dev), minor); + if (is_mounted(devp) || is_swap_partition(devp)) + return -EBUSY; + + /* all seems OK */ + fsync_dev(devp); + invalidate_buffers(devp); + + g->part[minor].start_sect = 0; + g->part[minor].nr_sects = 0; + if (g->sizes) + g->sizes[minor] = 0; +#ifdef DEVFS_MUST_DIE + devfs_register_partitions (g, first_minor, 0); +#endif + + return 0; +} + +int blkpg_ioctl(kdev_t dev, struct blkpg_ioctl_arg *arg) +{ + struct blkpg_ioctl_arg a; + struct blkpg_partition p; + int len; + + if (copy_from_user(&a, arg, sizeof(struct blkpg_ioctl_arg))) + return -EFAULT; + + switch (a.op) { + case BLKPG_ADD_PARTITION: + case BLKPG_DEL_PARTITION: + len = a.datalen; + if (len < sizeof(struct blkpg_partition)) + return -EINVAL; + if (copy_from_user(&p, a.data, sizeof(struct blkpg_partition))) + return -EFAULT; + if (!capable(CAP_SYS_ADMIN)) + return -EACCES; + if (a.op == BLKPG_ADD_PARTITION) + return add_partition(dev, &p); + else + return del_partition(dev, &p); + default: + return -EINVAL; + } +} + +/* + * Common ioctl's for block devices + */ + +int blk_ioctl(kdev_t dev, unsigned int cmd, unsigned long arg) +{ +#if 1 + printk("May want to check out blk_ioctl...\n"); + return -EINVAL; +#else + struct gendisk *g; + u64 ullval = 0; + int intval; + + if (!dev) + return -EINVAL; + + switch (cmd) { + case BLKROSET: + if (!capable(CAP_SYS_ADMIN)) + return -EACCES; + if (get_user(intval, (int *)(arg))) + return -EFAULT; + set_device_ro(dev, intval); + return 0; + case BLKROGET: + intval = (is_read_only(dev) != 0); + return put_user(intval, (int *)(arg)); + + case BLKRASET: + if(!capable(CAP_SYS_ADMIN)) + return -EACCES; + if(arg > 0xff) + return -EINVAL; + read_ahead[MAJOR(dev)] = arg; + return 0; + case BLKRAGET: + if (!arg) + return -EINVAL; + return put_user(read_ahead[MAJOR(dev)], (long *) arg); + + case BLKFLSBUF: + if(!capable(CAP_SYS_ADMIN)) + return -EACCES; + fsync_dev(dev); + invalidate_buffers(dev); + return 0; + + case BLKSSZGET: + /* get block device sector size as needed e.g. by fdisk */ + intval = get_hardsect_size(dev); + return put_user(intval, (int *) arg); + + case BLKGETSIZE: + case BLKGETSIZE64: + g = get_gendisk(dev); + if (g) + ullval = g->part[MINOR(dev)].nr_sects; + + if (cmd == BLKGETSIZE) + return put_user((unsigned long)ullval, (unsigned long *)arg); + else + return put_user(ullval << 9, (u64 *)arg); +#if 0 + case BLKRRPART: /* Re-read partition tables */ + if (!capable(CAP_SYS_ADMIN)) + return -EACCES; + return reread_partitions(dev, 1); +#endif + + case BLKPG: + return blkpg_ioctl(dev, (struct blkpg_ioctl_arg *) arg); + + case BLKELVGET: + return blkelvget_ioctl(&blk_get_queue(dev)->elevator, + (blkelv_ioctl_arg_t *) arg); + case BLKELVSET: + return blkelvset_ioctl(&blk_get_queue(dev)->elevator, + (blkelv_ioctl_arg_t *) arg); + + case BLKBSZGET: + /* get the logical block size (cf. BLKSSZGET) */ + intval = BLOCK_SIZE; + if (blksize_size[MAJOR(dev)]) + intval = blksize_size[MAJOR(dev)][MINOR(dev)]; + return put_user (intval, (int *) arg); + + case BLKBSZSET: + /* set the logical block size */ + if (!capable (CAP_SYS_ADMIN)) + return -EACCES; + if (!dev || !arg) + return -EINVAL; + if (get_user (intval, (int *) arg)) + return -EFAULT; + if (intval > PAGE_SIZE || intval < 512 || + (intval & (intval - 1))) + return -EINVAL; + if (is_mounted (dev) || is_swap_partition (dev)) + return -EBUSY; + set_blocksize (dev, intval); + return 0; + + default: + return -EINVAL; + } +#endif +} + +EXPORT_SYMBOL(blk_ioctl); diff --git a/xen-2.4.16/drivers/block/elevator.c b/xen-2.4.16/drivers/block/elevator.c new file mode 100644 index 0000000000..281e8f8b8d --- /dev/null +++ b/xen-2.4.16/drivers/block/elevator.c @@ -0,0 +1,224 @@ +/* + * linux/drivers/block/elevator.c + * + * Block device elevator/IO-scheduler. + * + * Copyright (C) 2000 Andrea Arcangeli SuSE + * + * 30042000 Jens Axboe : + * + * Split the elevator a bit so that it is possible to choose a different + * one or even write a new "plug in". There are three pieces: + * - elevator_fn, inserts a new request in the queue list + * - elevator_merge_fn, decides whether a new buffer can be merged with + * an existing request + * - elevator_dequeue_fn, called when a request is taken off the active list + * + * 20082000 Dave Jones : + * Removed tests for max-bomb-segments, which was breaking elvtune + * when run without -bN + * + */ + +#include +#include +/*#include */ +#include +#include +#include +#include +#include + +/* + * This is a bit tricky. It's given that bh and rq are for the same + * device, but the next request might of course not be. Run through + * the tests below to check if we want to insert here if we can't merge + * bh into an existing request + */ +inline int bh_rq_in_between(struct buffer_head *bh, struct request *rq, + struct list_head *head) +{ + struct list_head *next; + struct request *next_rq; + + next = rq->queue.next; + if (next == head) + return 0; + + /* + * if the device is different (usually on a different partition), + * just check if bh is after rq + */ + next_rq = blkdev_entry_to_request(next); + if (next_rq->rq_dev != rq->rq_dev) + return bh->b_rsector > rq->sector; + + /* + * ok, rq, next_rq and bh are on the same device. if bh is in between + * the two, this is the sweet spot + */ + if (bh->b_rsector < next_rq->sector && bh->b_rsector > rq->sector) + return 1; + + /* + * next_rq is ordered wrt rq, but bh is not in between the two + */ + if (next_rq->sector > rq->sector) + return 0; + + /* + * next_rq and rq not ordered, if we happen to be either before + * next_rq or after rq insert here anyway + */ + if (bh->b_rsector > rq->sector || bh->b_rsector < next_rq->sector) + return 1; + + return 0; +} + + +int elevator_linus_merge(request_queue_t *q, struct request **req, + struct list_head * head, + struct buffer_head *bh, int rw, + int max_sectors) +{ + struct list_head *entry = &q->queue_head; + unsigned int count = bh->b_size >> 9, ret = ELEVATOR_NO_MERGE; + + while ((entry = entry->prev) != head) { + struct request *__rq = blkdev_entry_to_request(entry); + + /* + * simply "aging" of requests in queue + */ + if (__rq->elevator_sequence-- <= 0) + break; + + if (__rq->waiting) + continue; + if (__rq->rq_dev != bh->b_rdev) + continue; + if (!*req && bh_rq_in_between(bh, __rq, &q->queue_head)) + *req = __rq; + if (__rq->cmd != rw) + continue; + if (__rq->nr_sectors + count > max_sectors) + continue; + if (__rq->elevator_sequence < count) + break; + if (__rq->sector + __rq->nr_sectors == bh->b_rsector) { + ret = ELEVATOR_BACK_MERGE; + *req = __rq; + break; + } else if (__rq->sector - count == bh->b_rsector) { + ret = ELEVATOR_FRONT_MERGE; + __rq->elevator_sequence -= count; + *req = __rq; + break; + } + } + + return ret; +} + +void elevator_linus_merge_cleanup(request_queue_t *q, struct request *req, int count) +{ + struct list_head *entry = &req->queue, *head = &q->queue_head; + + /* + * second pass scan of requests that got passed over, if any + */ + while ((entry = entry->next) != head) { + struct request *tmp = blkdev_entry_to_request(entry); + tmp->elevator_sequence -= count; + } +} + +void elevator_linus_merge_req(struct request *req, struct request *next) +{ + if (next->elevator_sequence < req->elevator_sequence) + req->elevator_sequence = next->elevator_sequence; +} + +/* + * See if we can find a request that this buffer can be coalesced with. + */ +int elevator_noop_merge(request_queue_t *q, struct request **req, + struct list_head * head, + struct buffer_head *bh, int rw, + int max_sectors) +{ + struct list_head *entry; + unsigned int count = bh->b_size >> 9; + + if (list_empty(&q->queue_head)) + return ELEVATOR_NO_MERGE; + + entry = &q->queue_head; + while ((entry = entry->prev) != head) { + struct request *__rq = blkdev_entry_to_request(entry); + + if (__rq->cmd != rw) + continue; + if (__rq->rq_dev != bh->b_rdev) + continue; + if (__rq->nr_sectors + count > max_sectors) + continue; + if (__rq->waiting) + continue; + if (__rq->sector + __rq->nr_sectors == bh->b_rsector) { + *req = __rq; + return ELEVATOR_BACK_MERGE; + } else if (__rq->sector - count == bh->b_rsector) { + *req = __rq; + return ELEVATOR_FRONT_MERGE; + } + } + + *req = blkdev_entry_to_request(q->queue_head.prev); + return ELEVATOR_NO_MERGE; +} + +void elevator_noop_merge_cleanup(request_queue_t *q, struct request *req, int count) {} + +void elevator_noop_merge_req(struct request *req, struct request *next) {} + +int blkelvget_ioctl(elevator_t * elevator, blkelv_ioctl_arg_t * arg) +{ + blkelv_ioctl_arg_t output; + + output.queue_ID = elevator->queue_ID; + output.read_latency = elevator->read_latency; + output.write_latency = elevator->write_latency; + output.max_bomb_segments = 0; + + if (copy_to_user(arg, &output, sizeof(blkelv_ioctl_arg_t))) + return -EFAULT; + + return 0; +} + +int blkelvset_ioctl(elevator_t * elevator, const blkelv_ioctl_arg_t * arg) +{ + blkelv_ioctl_arg_t input; + + if (copy_from_user(&input, arg, sizeof(blkelv_ioctl_arg_t))) + return -EFAULT; + + if (input.read_latency < 0) + return -EINVAL; + if (input.write_latency < 0) + return -EINVAL; + + elevator->read_latency = input.read_latency; + elevator->write_latency = input.write_latency; + return 0; +} + +void elevator_init(elevator_t * elevator, elevator_t type) +{ + static unsigned int queue_ID; + + *elevator = type; + elevator->queue_ID = queue_ID++; +} diff --git a/xen-2.4.16/drivers/block/genhd.c b/xen-2.4.16/drivers/block/genhd.c new file mode 100644 index 0000000000..427c2cb312 --- /dev/null +++ b/xen-2.4.16/drivers/block/genhd.c @@ -0,0 +1,219 @@ +/* + * Code extracted from + * linux/kernel/hd.c + * + * Copyright (C) 1991-1998 Linus Torvalds + * + * devfs support - jj, rgooch, 980122 + * + * Moved partition checking code to fs/partitions* - Russell King + * (linux@arm.uk.linux.org) + */ + +/* + * TODO: rip out the remaining init crap from this file --hch + */ + +#include +#include +/*#include */ +#include +#include +#include +#include +#include + + +static rwlock_t gendisk_lock; + +/* + * Global kernel list of partitioning information. + * + * XXX: you should _never_ access this directly. + * the only reason this is exported is source compatiblity. + */ +/*static*/ struct gendisk *gendisk_head; +static struct gendisk *gendisk_array[MAX_BLKDEV]; + +EXPORT_SYMBOL(gendisk_head); + + +/** + * add_gendisk - add partitioning information to kernel list + * @gp: per-device partitioning information + * + * This function registers the partitioning information in @gp + * with the kernel. + */ +void +add_gendisk(struct gendisk *gp) +{ + struct gendisk *sgp; + + write_lock(&gendisk_lock); + + /* + * In 2.5 this will go away. Fix the drivers who rely on + * old behaviour. + */ + + for (sgp = gendisk_head; sgp; sgp = sgp->next) + { + if (sgp == gp) + { +// printk(KERN_ERR "add_gendisk: device major %d is buggy and added a live gendisk!\n", +// sgp->major) + goto out; + } + } + gendisk_array[gp->major] = gp; + gp->next = gendisk_head; + gendisk_head = gp; +out: + write_unlock(&gendisk_lock); +} + +EXPORT_SYMBOL(add_gendisk); + + +/** + * del_gendisk - remove partitioning information from kernel list + * @gp: per-device partitioning information + * + * This function unregisters the partitioning information in @gp + * with the kernel. + */ +void +del_gendisk(struct gendisk *gp) +{ + struct gendisk **gpp; + + write_lock(&gendisk_lock); + gendisk_array[gp->major] = NULL; + for (gpp = &gendisk_head; *gpp; gpp = &((*gpp)->next)) + if (*gpp == gp) + break; + if (*gpp) + *gpp = (*gpp)->next; + write_unlock(&gendisk_lock); +} + +EXPORT_SYMBOL(del_gendisk); + + +/** + * get_gendisk - get partitioning information for a given device + * @dev: device to get partitioning information for + * + * This function gets the structure containing partitioning + * information for the given device @dev. + */ +struct gendisk * +get_gendisk(kdev_t dev) +{ + struct gendisk *gp = NULL; + int maj = MAJOR(dev); + + read_lock(&gendisk_lock); + if ((gp = gendisk_array[maj])) + goto out; + + /* This is needed for early 2.4 source compatiblity. --hch */ + for (gp = gendisk_head; gp; gp = gp->next) + if (gp->major == maj) + break; +out: + read_unlock(&gendisk_lock); + return gp; +} + +EXPORT_SYMBOL(get_gendisk); + + +/** + * walk_gendisk - issue a command for every registered gendisk + * @walk: user-specified callback + * @data: opaque data for the callback + * + * This function walks through the gendisk chain and calls back + * into @walk for every element. + */ +int +walk_gendisk(int (*walk)(struct gendisk *, void *), void *data) +{ + struct gendisk *gp; + int error = 0; + + read_lock(&gendisk_lock); + for (gp = gendisk_head; gp; gp = gp->next) + if ((error = walk(gp, data))) + break; + read_unlock(&gendisk_lock); + + return error; +} + + +#ifdef CONFIG_PROC_FS +int +get_partition_list(char *page, char **start, off_t offset, int count) +{ + struct gendisk *gp; + struct hd_struct *hd; + char buf[64]; + int len, n; + + len = sprintf(page, "major minor #blocks name\n\n"); + + read_lock(&gendisk_lock); + for (gp = gendisk_head; gp; gp = gp->next) { + for (n = 0; n < (gp->nr_real << gp->minor_shift); n++) { + if (gp->part[n].nr_sects == 0) + continue; + + hd = &gp->part[n]; disk_round_stats(hd); + len += sprintf(page + len, + "%4d %4d %10d %s\n", gp->major, + n, gp->sizes[n], disk_name(gp, n, buf)); + + if (len < offset) + offset -= len, len = 0; + else if (len >= offset + count) + goto out; + } + } + +out: + read_unlock(&gendisk_lock); + *start = page + offset; + len -= offset; + if (len < 0) + len = 0; + return len > count ? count : len; +} +#endif + + +extern int blk_dev_init(void); +extern int net_dev_init(void); +extern void console_map_init(void); +extern int atmdev_init(void); + +int __init device_init(void) +{ + rwlock_init(&gendisk_lock); + blk_dev_init(); + sti(); +#ifdef CONFIG_NET + net_dev_init(); +#endif +#ifdef CONFIG_ATM + (void) atmdev_init(); +#endif +#ifdef CONFIG_VT + console_map_init(); +#endif + return 0; +} + +__initcall(device_init); diff --git a/xen-2.4.16/drivers/block/ll_rw_blk.c b/xen-2.4.16/drivers/block/ll_rw_blk.c new file mode 100644 index 0000000000..0ee8477c71 --- /dev/null +++ b/xen-2.4.16/drivers/block/ll_rw_blk.c @@ -0,0 +1,1466 @@ +/* + * linux/drivers/block/ll_rw_blk.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + * Copyright (C) 1994, Karl Keyte: Added support for disk statistics + * Elevator latency, (C) 2000 Andrea Arcangeli SuSE + * Queue request tables / lock, selectable elevator, Jens Axboe + * kernel-doc documentation started by NeilBrown - July2000 + */ + +/* + * This handles all read/write requests to block devices + */ +#include +#include +#include +#include +/*#include */ +#include +/*#include */ +#include +/*#include */ +#include +/*#include */ +/*#include */ + +#include +#include +#include +/*#include */ +#include +#include + +/* This will die as all synchronous stuff is coming to an end */ +#define end_buffer_io_sync NULL +#define complete(_r) panic("completion.h stuff may be needed...") + +/* + * MAC Floppy IWM hooks + */ + +#ifdef CONFIG_MAC_FLOPPY_IWM +extern int mac_floppy_init(void); +#endif + +/* + * For the allocated request tables + */ +static kmem_cache_t *request_cachep; + +/* + * The "disk" task queue is used to start the actual requests + * after a plug + */ +DECLARE_TASK_QUEUE(tq_disk); + +/* + * Protect the request list against multiple users.. + * + * With this spinlock the Linux block IO subsystem is 100% SMP threaded + * from the IRQ event side, and almost 100% SMP threaded from the syscall + * side (we still have protect against block device array operations, and + * the do_request() side is casually still unsafe. The kernel lock protects + * this part currently.). + * + * there is a fair chance that things will work just OK if these functions + * are called with no global kernel lock held ... + */ +spinlock_t io_request_lock = SPIN_LOCK_UNLOCKED; + +/* This specifies how many sectors to read ahead on the disk. */ + +int read_ahead[MAX_BLKDEV]; + +/* blk_dev_struct is: + * *request_fn + * *current_request + */ +struct blk_dev_struct blk_dev[MAX_BLKDEV]; /* initialized by blk_dev_init() */ + +/* + * blk_size contains the size of all block-devices in units of 1024 byte + * sectors: + * + * blk_size[MAJOR][MINOR] + * + * if (!blk_size[MAJOR]) then no minor size checking is done. + */ +int * blk_size[MAX_BLKDEV]; + +/* + * blksize_size contains the size of all block-devices: + * + * blksize_size[MAJOR][MINOR] + * + * if (!blksize_size[MAJOR]) then 1024 bytes is assumed. + */ +int * blksize_size[MAX_BLKDEV]; + +/* + * hardsect_size contains the size of the hardware sector of a device. + * + * hardsect_size[MAJOR][MINOR] + * + * if (!hardsect_size[MAJOR]) + * then 512 bytes is assumed. + * else + * sector_size is hardsect_size[MAJOR][MINOR] + * This is currently set by some scsi devices and read by the msdos fs driver. + * Other uses may appear later. + */ +int * hardsect_size[MAX_BLKDEV]; + +/* + * The following tunes the read-ahead algorithm in mm/filemap.c + */ +int * max_readahead[MAX_BLKDEV]; + +/* + * Max number of sectors per request + */ +int * max_sectors[MAX_BLKDEV]; + +static inline int get_max_sectors(kdev_t dev) +{ + if (!max_sectors[MAJOR(dev)]) + return MAX_SECTORS; + return max_sectors[MAJOR(dev)][MINOR(dev)]; +} + +inline request_queue_t *blk_get_queue(kdev_t dev) +{ + struct blk_dev_struct *bdev = blk_dev + MAJOR(dev); + + if (bdev->queue) + return bdev->queue(dev); + else + return &blk_dev[MAJOR(dev)].request_queue; +} + +static int __blk_cleanup_queue(struct request_list *list) +{ + struct list_head *head = &list->free; + struct request *rq; + int i = 0; + + while (!list_empty(head)) { + rq = list_entry(head->next, struct request, queue); + list_del(&rq->queue); + kmem_cache_free(request_cachep, rq); + i++; + }; + + if (i != list->count) + printk("request list leak!\n"); + + list->count = 0; + return i; +} + +/** + * blk_cleanup_queue: - release a &request_queue_t when it is no longer needed + * @q: the request queue to be released + * + * Description: + * blk_cleanup_queue is the pair to blk_init_queue(). It should + * be called when a request queue is being released; typically + * when a block device is being de-registered. Currently, its + * primary task it to free all the &struct request structures that + * were allocated to the queue. + * Caveat: + * Hopefully the low level driver will have finished any + * outstanding requests first... + **/ +void blk_cleanup_queue(request_queue_t * q) +{ + int count = q->nr_requests; + + count -= __blk_cleanup_queue(&q->rq[READ]); + count -= __blk_cleanup_queue(&q->rq[WRITE]); + + if (count) + printk("blk_cleanup_queue: leaked requests (%d)\n", count); + + memset(q, 0, sizeof(*q)); +} + +/** + * blk_queue_headactive - indicate whether head of request queue may be active + * @q: The queue which this applies to. + * @active: A flag indication where the head of the queue is active. + * + * Description: + * The driver for a block device may choose to leave the currently active + * request on the request queue, removing it only when it has completed. + * The queue handling routines assume this by default for safety reasons + * and will not involve the head of the request queue in any merging or + * reordering of requests when the queue is unplugged (and thus may be + * working on this particular request). + * + * If a driver removes requests from the queue before processing them, then + * it may indicate that it does so, there by allowing the head of the queue + * to be involved in merging and reordering. This is done be calling + * blk_queue_headactive() with an @active flag of %0. + * + * If a driver processes several requests at once, it must remove them (or + * at least all but one of them) from the request queue. + * + * When a queue is plugged the head will be assumed to be inactive. + **/ + +void blk_queue_headactive(request_queue_t * q, int active) +{ + q->head_active = active; +} + +/** + * blk_queue_make_request - define an alternate make_request function for a device + * @q: the request queue for the device to be affected + * @mfn: the alternate make_request function + * + * Description: + * The normal way for &struct buffer_heads to be passed to a device + * driver is for them to be collected into requests on a request + * queue, and then to allow the device driver to select requests + * off that queue when it is ready. This works well for many block + * devices. However some block devices (typically virtual devices + * such as md or lvm) do not benefit from the processing on the + * request queue, and are served best by having the requests passed + * directly to them. This can be achieved by providing a function + * to blk_queue_make_request(). + * + * Caveat: + * The driver that does this *must* be able to deal appropriately + * with buffers in "highmemory", either by calling bh_kmap() to get + * a kernel mapping, to by calling create_bounce() to create a + * buffer in normal memory. + **/ + +void blk_queue_make_request(request_queue_t * q, make_request_fn * mfn) +{ + q->make_request_fn = mfn; +} + +static inline int ll_new_segment(request_queue_t *q, struct request *req, int max_segments) +{ + if (req->nr_segments < max_segments) { + req->nr_segments++; + return 1; + } + return 0; +} + +static int ll_back_merge_fn(request_queue_t *q, struct request *req, + struct buffer_head *bh, int max_segments) +{ + if (req->bhtail->b_data + req->bhtail->b_size == bh->b_data) + return 1; + return ll_new_segment(q, req, max_segments); +} + +static int ll_front_merge_fn(request_queue_t *q, struct request *req, + struct buffer_head *bh, int max_segments) +{ + if (bh->b_data + bh->b_size == req->bh->b_data) + return 1; + return ll_new_segment(q, req, max_segments); +} + +static int ll_merge_requests_fn(request_queue_t *q, struct request *req, + struct request *next, int max_segments) +{ + int total_segments = req->nr_segments + next->nr_segments; + + if (req->bhtail->b_data + req->bhtail->b_size == next->bh->b_data) + total_segments--; + + if (total_segments > max_segments) + return 0; + + req->nr_segments = total_segments; + return 1; +} + +/* + * "plug" the device if there are no outstanding requests: this will + * force the transfer to start only after we have put all the requests + * on the list. + * + * This is called with interrupts off and no requests on the queue. + * (and with the request spinlock acquired) + */ +static void generic_plug_device(request_queue_t *q, kdev_t dev) +{ + /* + * no need to replug device + */ + if (!list_empty(&q->queue_head) || q->plugged) + return; + + q->plugged = 1; + queue_task(&q->plug_tq, &tq_disk); +} + +/* + * remove the plug and let it rip.. + */ +static inline void __generic_unplug_device(request_queue_t *q) +{ + if (q->plugged) { + q->plugged = 0; + if (!list_empty(&q->queue_head)) + q->request_fn(q); + } +} + +void generic_unplug_device(void *data) +{ + request_queue_t *q = (request_queue_t *) data; + unsigned long flags; + + spin_lock_irqsave(&io_request_lock, flags); + __generic_unplug_device(q); + spin_unlock_irqrestore(&io_request_lock, flags); +} + +/** blk_grow_request_list + * @q: The &request_queue_t + * @nr_requests: how many requests are desired + * + * More free requests are added to the queue's free lists, bringing + * the total number of requests to @nr_requests. + * + * The requests are added equally to the request queue's read + * and write freelists. + * + * This function can sleep. + * + * Returns the (new) number of requests which the queue has available. + */ +int blk_grow_request_list(request_queue_t *q, int nr_requests) +{ + unsigned long flags; + /* Several broken drivers assume that this function doesn't sleep, + * this causes system hangs during boot. + * As a temporary fix, make the the function non-blocking. + */ + spin_lock_irqsave(&io_request_lock, flags); + while (q->nr_requests < nr_requests) { + struct request *rq; + int rw; + + rq = kmem_cache_alloc(request_cachep, SLAB_ATOMIC); + if (rq == NULL) + break; + memset(rq, 0, sizeof(*rq)); + rq->rq_status = RQ_INACTIVE; + rw = q->nr_requests & 1; + list_add(&rq->queue, &q->rq[rw].free); + q->rq[rw].count++; + q->nr_requests++; + } + q->batch_requests = q->nr_requests / 4; + if (q->batch_requests > 32) + q->batch_requests = 32; + spin_unlock_irqrestore(&io_request_lock, flags); + return q->nr_requests; +} + +static void blk_init_free_list(request_queue_t *q) +{ + /*struct sysinfo si;*/ + /*int megs;*/ /* Total memory, in megabytes */ + int nr_requests; + + INIT_LIST_HEAD(&q->rq[READ].free); + INIT_LIST_HEAD(&q->rq[WRITE].free); + q->rq[READ].count = 0; + q->rq[WRITE].count = 0; + q->nr_requests = 0; + +#if 0 + si_meminfo(&si); + megs = si.totalram >> (20 - PAGE_SHIFT); + nr_requests = 128; + if (megs < 32) + nr_requests /= 2; +#else + nr_requests = 128; +#endif + blk_grow_request_list(q, nr_requests); + +#if 0 + init_waitqueue_head(&q->wait_for_requests[0]); + init_waitqueue_head(&q->wait_for_requests[1]); +#endif + spin_lock_init(&q->queue_lock); +} + +static int __make_request(request_queue_t * q, int rw, struct buffer_head * bh); + +/** + * blk_init_queue - prepare a request queue for use with a block device + * @q: The &request_queue_t to be initialised + * @rfn: The function to be called to process requests that have been + * placed on the queue. + * + * Description: + * If a block device wishes to use the standard request handling procedures, + * which sorts requests and coalesces adjacent requests, then it must + * call blk_init_queue(). The function @rfn will be called when there + * are requests on the queue that need to be processed. If the device + * supports plugging, then @rfn may not be called immediately when requests + * are available on the queue, but may be called at some time later instead. + * Plugged queues are generally unplugged when a buffer belonging to one + * of the requests on the queue is needed, or due to memory pressure. + * + * @rfn is not required, or even expected, to remove all requests off the + * queue, but only as many as it can handle at a time. If it does leave + * requests on the queue, it is responsible for arranging that the requests + * get dealt with eventually. + * + * A global spin lock $io_request_lock must be held while manipulating the + * requests on the request queue. + * + * The request on the head of the queue is by default assumed to be + * potentially active, and it is not considered for re-ordering or merging + * whenever the given queue is unplugged. This behaviour can be changed with + * blk_queue_headactive(). + * + * Note: + * blk_init_queue() must be paired with a blk_cleanup_queue() call + * when the block device is deactivated (such as at module unload). + **/ +void blk_init_queue(request_queue_t * q, request_fn_proc * rfn) +{ + INIT_LIST_HEAD(&q->queue_head); + elevator_init(&q->elevator, ELEVATOR_LINUS); + blk_init_free_list(q); + q->request_fn = rfn; + q->back_merge_fn = ll_back_merge_fn; + q->front_merge_fn = ll_front_merge_fn; + q->merge_requests_fn = ll_merge_requests_fn; + q->make_request_fn = __make_request; + q->plug_tq.sync = 0; + q->plug_tq.routine = &generic_unplug_device; + q->plug_tq.data = q; + q->plugged = 0; + /* + * These booleans describe the queue properties. We set the + * default (and most common) values here. Other drivers can + * use the appropriate functions to alter the queue properties. + * as appropriate. + */ + q->plug_device_fn = generic_plug_device; + q->head_active = 1; +} + +#define blkdev_free_rq(list) list_entry((list)->next, struct request, queue); +/* + * Get a free request. io_request_lock must be held and interrupts + * disabled on the way in. Returns NULL if there are no free requests. + */ +static struct request *get_request(request_queue_t *q, int rw) +{ + struct request *rq = NULL; + struct request_list *rl = q->rq + rw; + + if (!list_empty(&rl->free)) { + rq = blkdev_free_rq(&rl->free); + list_del(&rq->queue); + rl->count--; + rq->rq_status = RQ_ACTIVE; + rq->cmd = rw; + rq->special = NULL; + rq->q = q; + } + + return rq; +} + +/* + * Here's the request allocation design: + * + * 1: Blocking on request exhaustion is a key part of I/O throttling. + * + * 2: We want to be `fair' to all requesters. We must avoid starvation, and + * attempt to ensure that all requesters sleep for a similar duration. Hence + * no stealing requests when there are other processes waiting. + * + * 3: We also wish to support `batching' of requests. So when a process is + * woken, we want to allow it to allocate a decent number of requests + * before it blocks again, so they can be nicely merged (this only really + * matters if the process happens to be adding requests near the head of + * the queue). + * + * 4: We want to avoid scheduling storms. This isn't really important, because + * the system will be I/O bound anyway. But it's easy. + * + * There is tension between requirements 2 and 3. Once a task has woken, + * we don't want to allow it to sleep as soon as it takes its second request. + * But we don't want currently-running tasks to steal all the requests + * from the sleepers. We handle this with wakeup hysteresis around + * 0 .. batch_requests and with the assumption that request taking is much, + * much faster than request freeing. + * + * So here's what we do: + * + * a) A READA requester fails if free_requests < batch_requests + * + * We don't want READA requests to prevent sleepers from ever + * waking. Note that READA is used extremely rarely - a few + * filesystems use it for directory readahead. + * + * When a process wants a new request: + * + * b) If free_requests == 0, the requester sleeps in FIFO manner. + * + * b) If 0 < free_requests < batch_requests and there are waiters, + * we still take a request non-blockingly. This provides batching. + * + * c) If free_requests >= batch_requests, the caller is immediately + * granted a new request. + * + * When a request is released: + * + * d) If free_requests < batch_requests, do nothing. + * + * f) If free_requests >= batch_requests, wake up a single waiter. + * + * The net effect is that when a process is woken at the batch_requests level, + * it will be able to take approximately (batch_requests) requests before + * blocking again (at the tail of the queue). + * + * This all assumes that the rate of taking requests is much, much higher + * than the rate of releasing them. Which is very true. + * + * -akpm, Feb 2002. + */ + +static struct request *__get_request_wait(request_queue_t *q, int rw) +{ +#if 0 + register struct request *rq; + /*DECLARE_WAITQUEUE(wait, current);*/ + + generic_unplug_device(q); + add_wait_queue_exclusive(&q->wait_for_requests[rw], &wait); + do { + set_current_state(TASK_UNINTERRUPTIBLE); + if (q->rq[rw].count == 0) + schedule(); + spin_lock_irq(&io_request_lock); + rq = get_request(q,rw); + spin_unlock_irq(&io_request_lock); + } while (rq == NULL); + remove_wait_queue(&q->wait_for_requests[rw], &wait); + current->state = TASK_RUNNING; + return rq; +#else + panic("__get_request_wait shouldn't be depended on"); + return 0; +#endif +} + +/* RO fail safe mechanism */ + +static long ro_bits[MAX_BLKDEV][8]; + +int is_read_only(kdev_t dev) +{ + int minor,major; + + major = MAJOR(dev); + minor = MINOR(dev); + if (major < 0 || major >= MAX_BLKDEV) return 0; + return ro_bits[major][minor >> 5] & (1 << (minor & 31)); +} + +void set_device_ro(kdev_t dev,int flag) +{ + int minor,major; + + major = MAJOR(dev); + minor = MINOR(dev); + if (major < 0 || major >= MAX_BLKDEV) return; + if (flag) ro_bits[major][minor >> 5] |= 1 << (minor & 31); + else ro_bits[major][minor >> 5] &= ~(1 << (minor & 31)); +} + +inline void drive_stat_acct (kdev_t dev, int rw, + unsigned long nr_sectors, int new_io) +{ + /*unsigned int major = MAJOR(dev);*/ + unsigned int index; + + index = disk_index(dev); +#if 0 + if ((index >= DK_MAX_DISK) || (major >= DK_MAX_MAJOR)) + return; +#endif + +#if 0 + kstat.dk_drive[major][index] += new_io; + if (rw == READ) { + kstat.dk_drive_rio[major][index] += new_io; + kstat.dk_drive_rblk[major][index] += nr_sectors; + } else if (rw == WRITE) { + kstat.dk_drive_wio[major][index] += new_io; + kstat.dk_drive_wblk[major][index] += nr_sectors; + } else + printk(KERN_ERR "drive_stat_acct: cmd not R/W?\n"); +#endif +} + +/* Return up to two hd_structs on which to do IO accounting for a given + * request. On a partitioned device, we want to account both against + * the partition and against the whole disk. */ +static void locate_hd_struct(struct request *req, + struct hd_struct **hd1, + struct hd_struct **hd2) +{ + struct gendisk *gd; + + *hd1 = NULL; + *hd2 = NULL; + + gd = get_gendisk(req->rq_dev); + if (gd && gd->part) { + /* Mask out the partition bits: account for the entire disk */ + int devnr = MINOR(req->rq_dev) >> gd->minor_shift; + int whole_minor = devnr << gd->minor_shift; + *hd1 = &gd->part[whole_minor]; + if (whole_minor != MINOR(req->rq_dev)) + *hd2= &gd->part[MINOR(req->rq_dev)]; + } +} + +/* Round off the performance stats on an hd_struct. The average IO + * queue length and utilisation statistics are maintained by observing + * the current state of the queue length and the amount of time it has + * been in this state for. Normally, that accounting is done on IO + * completion, but that can result in more than a second's worth of IO + * being accounted for within any one second, leading to >100% + * utilisation. To deal with that, we do a round-off before returning + * the results when reading /proc/partitions, accounting immediately for + * all queue usage up to the current jiffies and restarting the counters + * again. */ +void disk_round_stats(struct hd_struct *hd) +{ + unsigned long now = jiffies; + + hd->aveq += (hd->ios_in_flight * (jiffies - hd->last_queue_change)); + hd->last_queue_change = now; + + if (hd->ios_in_flight) + hd->io_ticks += (now - hd->last_idle_time); + hd->last_idle_time = now; +} + + +static inline void down_ios(struct hd_struct *hd) +{ + disk_round_stats(hd); + --hd->ios_in_flight; +} + +static inline void up_ios(struct hd_struct *hd) +{ + disk_round_stats(hd); + ++hd->ios_in_flight; +} + +static void account_io_start(struct hd_struct *hd, struct request *req, + int merge, int sectors) +{ + switch (req->cmd) { + case READ: + if (merge) + hd->rd_merges++; + hd->rd_sectors += sectors; + break; + case WRITE: + if (merge) + hd->wr_merges++; + hd->wr_sectors += sectors; + break; + } + if (!merge) + up_ios(hd); +} + +static void account_io_end(struct hd_struct *hd, struct request *req) +{ + unsigned long duration = jiffies - req->start_time; + switch (req->cmd) { + case READ: + hd->rd_ticks += duration; + hd->rd_ios++; + break; + case WRITE: + hd->wr_ticks += duration; + hd->wr_ios++; + break; + } + down_ios(hd); +} + +void req_new_io(struct request *req, int merge, int sectors) +{ + struct hd_struct *hd1, *hd2; + locate_hd_struct(req, &hd1, &hd2); + if (hd1) + account_io_start(hd1, req, merge, sectors); + if (hd2) + account_io_start(hd2, req, merge, sectors); +} + +void req_finished_io(struct request *req) +{ + struct hd_struct *hd1, *hd2; + locate_hd_struct(req, &hd1, &hd2); + if (hd1) + account_io_end(hd1, req); + if (hd2) + account_io_end(hd2, req); +} + +/* + * add-request adds a request to the linked list. + * io_request_lock is held and interrupts disabled, as we muck with the + * request queue list. + * + * By this point, req->cmd is always either READ/WRITE, never READA, + * which is important for drive_stat_acct() above. + */ +static inline void add_request(request_queue_t * q, struct request * req, + struct list_head *insert_here) +{ + drive_stat_acct(req->rq_dev, req->cmd, req->nr_sectors, 1); + + if (!q->plugged && q->head_active && insert_here == &q->queue_head) { + spin_unlock_irq(&io_request_lock); + BUG(); + } + + /* + * elevator indicated where it wants this request to be + * inserted at elevator_merge time + */ + list_add(&req->queue, insert_here); +} + +/* + * Must be called with io_request_lock held and interrupts disabled + */ +void blkdev_release_request(struct request *req) +{ + request_queue_t *q = req->q; + int rw = req->cmd; + + req->rq_status = RQ_INACTIVE; + req->q = NULL; + + /* + * Request may not have originated from ll_rw_blk. if not, + * assume it has free buffers and check waiters + */ + if (q) { + list_add(&req->queue, &q->rq[rw].free); +#if 0 + if (++q->rq[rw].count >= q->batch_requests && + waitqueue_active(&q->wait_for_requests[rw])) + wake_up(&q->wait_for_requests[rw]); +#endif + } +} + +/* + * Has to be called with the request spinlock acquired + */ +static void attempt_merge(request_queue_t * q, + struct request *req, + int max_sectors, + int max_segments) +{ + struct request *next; + struct hd_struct *hd1, *hd2; + + next = blkdev_next_request(req); + if (req->sector + req->nr_sectors != next->sector) + return; + if (req->cmd != next->cmd + || req->rq_dev != next->rq_dev + || req->nr_sectors + next->nr_sectors > max_sectors + || next->waiting) + return; + /* + * If we are not allowed to merge these requests, then + * return. If we are allowed to merge, then the count + * will have been updated to the appropriate number, + * and we shouldn't do it here too. + */ + if (!q->merge_requests_fn(q, req, next, max_segments)) + return; + + q->elevator.elevator_merge_req_fn(req, next); + req->bhtail->b_reqnext = next->bh; + req->bhtail = next->bhtail; + req->nr_sectors = req->hard_nr_sectors += next->hard_nr_sectors; + list_del(&next->queue); + + /* One last thing: we have removed a request, so we now have one + less expected IO to complete for accounting purposes. */ + + locate_hd_struct(req, &hd1, &hd2); + if (hd1) + down_ios(hd1); + if (hd2) + down_ios(hd2); + blkdev_release_request(next); +} + +static inline void attempt_back_merge(request_queue_t * q, + struct request *req, + int max_sectors, + int max_segments) +{ + if (&req->queue == q->queue_head.prev) + return; + attempt_merge(q, req, max_sectors, max_segments); +} + +static inline void attempt_front_merge(request_queue_t * q, + struct list_head * head, + struct request *req, + int max_sectors, + int max_segments) +{ + struct list_head * prev; + + prev = req->queue.prev; + if (head == prev) + return; + attempt_merge(q, blkdev_entry_to_request(prev), max_sectors, max_segments); +} + +static int __make_request(request_queue_t * q, int rw, + struct buffer_head * bh) +{ + unsigned int sector, count; + int max_segments = MAX_SEGMENTS; + struct request * req, *freereq = NULL; + int rw_ahead, max_sectors, el_ret; + struct list_head *head, *insert_here; + int latency; + elevator_t *elevator = &q->elevator; + + count = bh->b_size >> 9; + sector = bh->b_rsector; + + rw_ahead = 0; /* normal case; gets changed below for READA */ + switch (rw) { + case READA: +#if 0 /* bread() misinterprets failed READA attempts as IO errors on SMP */ + rw_ahead = 1; +#endif + rw = READ; /* drop into READ */ + case READ: + case WRITE: + latency = elevator_request_latency(elevator, rw); + break; + default: + BUG(); + goto end_io; + } + + /* We'd better have a real physical mapping! + Check this bit only if the buffer was dirty and just locked + down by us so at this point flushpage will block and + won't clear the mapped bit under us. */ + if (!buffer_mapped(bh)) + BUG(); + + /* + * Temporary solution - in 2.5 this will be done by the lowlevel + * driver. Create a bounce buffer if the buffer data points into + * high memory - keep the original buffer otherwise. + */ +#if CONFIG_HIGHMEM + bh = create_bounce(rw, bh); +#endif + +/* look for a free request. */ + /* + * Try to coalesce the new request with old requests + */ + max_sectors = get_max_sectors(bh->b_rdev); + +again: + req = NULL; + head = &q->queue_head; + /* + * Now we acquire the request spinlock, we have to be mega careful + * not to schedule or do something nonatomic + */ + spin_lock_irq(&io_request_lock); + + insert_here = head->prev; + if (list_empty(head)) { + q->plug_device_fn(q, bh->b_rdev); /* is atomic */ + goto get_rq; + } else if (q->head_active && !q->plugged) + head = head->next; + + el_ret = elevator->elevator_merge_fn(q, &req, head, bh, rw,max_sectors); + switch (el_ret) { + + case ELEVATOR_BACK_MERGE: + if (!q->back_merge_fn(q, req, bh, max_segments)) { + insert_here = &req->queue; + break; + } + elevator->elevator_merge_cleanup_fn(q, req, count); + req->bhtail->b_reqnext = bh; + req->bhtail = bh; + req->nr_sectors = req->hard_nr_sectors += count; + blk_started_io(count); + drive_stat_acct(req->rq_dev, req->cmd, count, 0); + req_new_io(req, 1, count); + attempt_back_merge(q, req, max_sectors, max_segments); + goto out; + + case ELEVATOR_FRONT_MERGE: + if (!q->front_merge_fn(q, req, bh, max_segments)) { + insert_here = req->queue.prev; + break; + } + elevator->elevator_merge_cleanup_fn(q, req, count); + bh->b_reqnext = req->bh; + req->bh = bh; + req->buffer = bh->b_data; + req->current_nr_sectors = count; + req->sector = req->hard_sector = sector; + req->nr_sectors = req->hard_nr_sectors += count; + blk_started_io(count); + drive_stat_acct(req->rq_dev, req->cmd, count, 0); + req_new_io(req, 1, count); + attempt_front_merge(q, head, req, max_sectors, max_segments); + goto out; + + /* + * elevator says don't/can't merge. get new request + */ + case ELEVATOR_NO_MERGE: + /* + * use elevator hints as to where to insert the + * request. if no hints, just add it to the back + * of the queue + */ + if (req) + insert_here = &req->queue; + break; + + default: + printk("elevator returned crap (%d)\n", el_ret); + BUG(); + } + +get_rq: + if (freereq) { + req = freereq; + freereq = NULL; + } else { + /* + * See description above __get_request_wait() + */ + if (rw_ahead) { + if (q->rq[rw].count < q->batch_requests) { + spin_unlock_irq(&io_request_lock); + goto end_io; + } + req = get_request(q, rw); + if (req == NULL) + BUG(); + } else { + req = get_request(q, rw); + if (req == NULL) { + spin_unlock_irq(&io_request_lock); + freereq = __get_request_wait(q, rw); + goto again; + } + } + } + +/* fill up the request-info, and add it to the queue */ + req->elevator_sequence = latency; + req->cmd = rw; + req->errors = 0; + req->hard_sector = req->sector = sector; + req->hard_nr_sectors = req->nr_sectors = count; + req->current_nr_sectors = count; + req->nr_segments = 1; /* Always 1 for a new request. */ + req->nr_hw_segments = 1; /* Always 1 for a new request. */ + req->buffer = bh->b_data; + req->waiting = NULL; + req->bh = bh; + req->bhtail = bh; + req->rq_dev = bh->b_rdev; + req->start_time = jiffies; + req_new_io(req, 0, count); + blk_started_io(count); + add_request(q, req, insert_here); +out: + if (freereq) + blkdev_release_request(freereq); + spin_unlock_irq(&io_request_lock); + return 0; +end_io: + bh->b_end_io(bh, test_bit(BH_Uptodate, &bh->b_state)); + return 0; +} + +/** + * generic_make_request: hand a buffer head to it's device driver for I/O + * @rw: READ, WRITE, or READA - what sort of I/O is desired. + * @bh: The buffer head describing the location in memory and on the device. + * + * generic_make_request() is used to make I/O requests of block + * devices. It is passed a &struct buffer_head and a &rw value. The + * %READ and %WRITE options are (hopefully) obvious in meaning. The + * %READA value means that a read is required, but that the driver is + * free to fail the request if, for example, it cannot get needed + * resources immediately. + * + * generic_make_request() does not return any status. The + * success/failure status of the request, along with notification of + * completion, is delivered asynchronously through the bh->b_end_io + * function described (one day) else where. + * + * The caller of generic_make_request must make sure that b_page, + * b_addr, b_size are set to describe the memory buffer, that b_rdev + * and b_rsector are set to describe the device address, and the + * b_end_io and optionally b_private are set to describe how + * completion notification should be signaled. BH_Mapped should also + * be set (to confirm that b_dev and b_blocknr are valid). + * + * generic_make_request and the drivers it calls may use b_reqnext, + * and may change b_rdev and b_rsector. So the values of these fields + * should NOT be depended on after the call to generic_make_request. + * Because of this, the caller should record the device address + * information in b_dev and b_blocknr. + * + * Apart from those fields mentioned above, no other fields, and in + * particular, no other flags, are changed by generic_make_request or + * any lower level drivers. + * */ +void generic_make_request (int rw, struct buffer_head * bh) +{ + int major = MAJOR(bh->b_rdev); + int minorsize = 0; + request_queue_t *q; + + if (!bh->b_end_io) + BUG(); + + /* Test device size, when known. */ + if (blk_size[major]) + minorsize = blk_size[major][MINOR(bh->b_rdev)]; + if (minorsize) { + unsigned long maxsector = (minorsize << 1) + 1; + unsigned long sector = bh->b_rsector; + unsigned int count = bh->b_size >> 9; + + if (maxsector < count || maxsector - count < sector) { + /* Yecch */ + bh->b_state &= (1 << BH_Lock) | (1 << BH_Mapped); + + /* This may well happen - the kernel calls bread() + without checking the size of the device, e.g., + when mounting a device. */ + printk(KERN_INFO + "attempt to access beyond end of device\n"); + printk(KERN_INFO "%s: rw=%d, want=%ld, limit=%d\n", + kdevname(bh->b_rdev), rw, + (sector + count)>>1, minorsize); + + /* Yecch again */ + bh->b_end_io(bh, 0); + return; + } + } + + /* + * Resolve the mapping until finished. (drivers are + * still free to implement/resolve their own stacking + * by explicitly returning 0) + */ + /* NOTE: we don't repeat the blk_size check for each new device. + * Stacking drivers are expected to know what they are doing. + */ + do { + q = blk_get_queue(bh->b_rdev); + if (!q) { + printk(KERN_ERR + "generic_make_request: Trying to access " + "nonexistent block-device %s (%ld)\n", + kdevname(bh->b_rdev), bh->b_rsector); + buffer_IO_error(bh); + break; + } + } while (q->make_request_fn(q, rw, bh)); +} + + +/** + * submit_bh: submit a buffer_head to the block device later for I/O + * @rw: whether to %READ or %WRITE, or maybe to %READA (read ahead) + * @bh: The &struct buffer_head which describes the I/O + * + * submit_bh() is very similar in purpose to generic_make_request(), and + * uses that function to do most of the work. + * + * The extra functionality provided by submit_bh is to determine + * b_rsector from b_blocknr and b_size, and to set b_rdev from b_dev. + * This is is appropriate for IO requests that come from the buffer + * cache and page cache which (currently) always use aligned blocks. + */ +void submit_bh(int rw, struct buffer_head * bh) +{ + int count = bh->b_size >> 9; + + if (!test_bit(BH_Lock, &bh->b_state)) + BUG(); + + set_bit(BH_Req, &bh->b_state); + set_bit(BH_Launder, &bh->b_state); + + /* + * First step, 'identity mapping' - RAID or LVM might + * further remap this. + */ + bh->b_rdev = bh->b_dev; + bh->b_rsector = bh->b_blocknr * count; + + generic_make_request(rw, bh); + +#if 0 + switch (rw) { + case WRITE: + kstat.pgpgout += count; + break; + default: + kstat.pgpgin += count; + break; + } +#endif +} + +/** + * ll_rw_block: low-level access to block devices + * @rw: whether to %READ or %WRITE or maybe %READA (readahead) + * @nr: number of &struct buffer_heads in the array + * @bhs: array of pointers to &struct buffer_head + * + * ll_rw_block() takes an array of pointers to &struct buffer_heads, + * and requests an I/O operation on them, either a %READ or a %WRITE. + * The third %READA option is described in the documentation for + * generic_make_request() which ll_rw_block() calls. + * + * This function provides extra functionality that is not in + * generic_make_request() that is relevant to buffers in the buffer + * cache or page cache. In particular it drops any buffer that it + * cannot get a lock on (with the BH_Lock state bit), any buffer that + * appears to be clean when doing a write request, and any buffer that + * appears to be up-to-date when doing read request. Further it marks + * as clean buffers that are processed for writing (the buffer cache + * wont assume that they are actually clean until the buffer gets + * unlocked). + * + * ll_rw_block sets b_end_io to simple completion handler that marks + * the buffer up-to-date (if approriate), unlocks the buffer and wakes + * any waiters. As client that needs a more interesting completion + * routine should call submit_bh() (or generic_make_request()) + * directly. + * + * Caveat: + * All of the buffers must be for the same device, and must also be + * of the current approved size for the device. */ + +void ll_rw_block(int rw, int nr, struct buffer_head * bhs[]) +{ + unsigned int major; + int correct_size; + int i; + + if (!nr) + return; + + major = MAJOR(bhs[0]->b_dev); + + /* Determine correct block size for this device. */ + correct_size = get_hardsect_size(bhs[0]->b_dev); + + /* Verify requested block sizes. */ + for (i = 0; i < nr; i++) { + struct buffer_head *bh = bhs[i]; + if (bh->b_size % correct_size) { + printk(KERN_NOTICE "ll_rw_block: device %s: " + "only %d-char blocks implemented (%u)\n", + kdevname(bhs[0]->b_dev), + correct_size, bh->b_size); + goto sorry; + } + } + + if ((rw & WRITE) && is_read_only(bhs[0]->b_dev)) { + printk(KERN_NOTICE "Can't write to read-only device %s\n", + kdevname(bhs[0]->b_dev)); + goto sorry; + } + + for (i = 0; i < nr; i++) { + struct buffer_head *bh = bhs[i]; + + /* Only one thread can actually submit the I/O. */ + if (test_and_set_bit(BH_Lock, &bh->b_state)) + continue; + + /* We have the buffer lock */ + atomic_inc(&bh->b_count); + bh->b_end_io = end_buffer_io_sync; + + switch(rw) { + case WRITE: + if (!atomic_set_buffer_clean(bh)) + /* Hmmph! Nothing to write */ + goto end_io; + __mark_buffer_clean(bh); + break; + + case READA: + case READ: + if (buffer_uptodate(bh)) + /* Hmmph! Already have it */ + goto end_io; + break; + default: + BUG(); + end_io: + bh->b_end_io(bh, test_bit(BH_Uptodate, &bh->b_state)); + continue; + } + + submit_bh(rw, bh); + } + return; + +sorry: + /* Make sure we don't get infinite dirty retries.. */ + for (i = 0; i < nr; i++) + mark_buffer_clean(bhs[i]); +} + +#ifdef CONFIG_STRAM_SWAP +extern int stram_device_init (void); +#endif + + +/** + * end_that_request_first - end I/O on one buffer. + * @req: the request being processed + * @uptodate: 0 for I/O error + * @name: the name printed for an I/O error + * + * Description: + * Ends I/O on the first buffer attached to @req, and sets it up + * for the next buffer_head (if any) in the cluster. + * + * Return: + * 0 - we are done with this request, call end_that_request_last() + * 1 - still buffers pending for this request + * + * Caveat: + * Drivers implementing their own end_request handling must call + * blk_finished_io() appropriately. + **/ + +int end_that_request_first (struct request *req, int uptodate, char *name) +{ + struct buffer_head * bh; + int nsect; + + req->errors = 0; + if (!uptodate) + printk("end_request: I/O error, dev %s (%s), sector %lu\n", + kdevname(req->rq_dev), name, req->sector); + + if ((bh = req->bh) != NULL) { + nsect = bh->b_size >> 9; + blk_finished_io(nsect); + req->bh = bh->b_reqnext; + bh->b_reqnext = NULL; + bh->b_end_io(bh, uptodate); + if ((bh = req->bh) != NULL) { + req->hard_sector += nsect; + req->hard_nr_sectors -= nsect; + req->sector = req->hard_sector; + req->nr_sectors = req->hard_nr_sectors; + + req->current_nr_sectors = bh->b_size >> 9; + if (req->nr_sectors < req->current_nr_sectors) { + req->nr_sectors = req->current_nr_sectors; + printk("end_request: buffer-list destroyed\n"); + } + req->buffer = bh->b_data; + return 1; + } + } + return 0; +} + +void end_that_request_last(struct request *req) +{ + if (req->waiting != NULL) + complete(req->waiting); + req_finished_io(req); + + blkdev_release_request(req); +} + +int __init blk_dev_init(void) +{ + struct blk_dev_struct *dev; + + request_cachep = kmem_cache_create("blkdev_requests", + sizeof(struct request), + 0, SLAB_HWCACHE_ALIGN, NULL, NULL); + + if (!request_cachep) + panic("Can't create request pool slab cache\n"); + + for (dev = blk_dev + MAX_BLKDEV; dev-- != blk_dev;) + dev->queue = NULL; + + memset(ro_bits,0,sizeof(ro_bits)); + memset(max_readahead, 0, sizeof(max_readahead)); + memset(max_sectors, 0, sizeof(max_sectors)); + +#ifdef CONFIG_AMIGA_Z2RAM + z2_init(); +#endif +#ifdef CONFIG_STRAM_SWAP + stram_device_init(); +#endif +#ifdef CONFIG_ISP16_CDI + isp16_init(); +#endif +#if defined(CONFIG_IDE) && defined(CONFIG_BLK_DEV_IDE) + ide_init(); /* this MUST precede hd_init */ +#endif +#if defined(CONFIG_IDE) && defined(CONFIG_BLK_DEV_HD) + hd_init(); +#endif +#ifdef CONFIG_BLK_DEV_PS2 + ps2esdi_init(); +#endif +#ifdef CONFIG_BLK_DEV_XD + xd_init(); +#endif +#ifdef CONFIG_BLK_DEV_MFM + mfm_init(); +#endif +#ifdef CONFIG_PARIDE + { extern void paride_init(void); paride_init(); }; +#endif +#ifdef CONFIG_MAC_FLOPPY + swim3_init(); +#endif +#ifdef CONFIG_BLK_DEV_SWIM_IOP + swimiop_init(); +#endif +#ifdef CONFIG_AMIGA_FLOPPY + amiga_floppy_init(); +#endif +#ifdef CONFIG_ATARI_FLOPPY + atari_floppy_init(); +#endif +#ifdef CONFIG_BLK_DEV_FD + floppy_init(); +#else +#if defined(__i386__) /* Do we even need this? */ + outb_p(0xc, 0x3f2); +#endif +#endif +#ifdef CONFIG_CDU31A + cdu31a_init(); +#endif +#ifdef CONFIG_ATARI_ACSI + acsi_init(); +#endif +#ifdef CONFIG_MCD + mcd_init(); +#endif +#ifdef CONFIG_MCDX + mcdx_init(); +#endif +#ifdef CONFIG_SBPCD + sbpcd_init(); +#endif +#ifdef CONFIG_AZTCD + aztcd_init(); +#endif +#ifdef CONFIG_CDU535 + sony535_init(); +#endif +#ifdef CONFIG_GSCD + gscd_init(); +#endif +#ifdef CONFIG_CM206 + cm206_init(); +#endif +#ifdef CONFIG_OPTCD + optcd_init(); +#endif +#ifdef CONFIG_SJCD + sjcd_init(); +#endif +#ifdef CONFIG_APBLOCK + ap_init(); +#endif +#ifdef CONFIG_DDV + ddv_init(); +#endif +#ifdef CONFIG_MDISK + mdisk_init(); +#endif +#ifdef CONFIG_DASD + dasd_init(); +#endif +#if defined(CONFIG_S390_TAPE) && defined(CONFIG_S390_TAPE_BLOCK) + tapeblock_init(); +#endif +#ifdef CONFIG_BLK_DEV_XPRAM + xpram_init(); +#endif + +#ifdef CONFIG_SUN_JSFLASH + jsfd_init(); +#endif + return 0; +}; + +EXPORT_SYMBOL(io_request_lock); +EXPORT_SYMBOL(end_that_request_first); +EXPORT_SYMBOL(end_that_request_last); +EXPORT_SYMBOL(blk_grow_request_list); +EXPORT_SYMBOL(blk_init_queue); +EXPORT_SYMBOL(blk_get_queue); +EXPORT_SYMBOL(blk_cleanup_queue); +EXPORT_SYMBOL(blk_queue_headactive); +EXPORT_SYMBOL(blk_queue_make_request); +EXPORT_SYMBOL(generic_make_request); +EXPORT_SYMBOL(blkdev_release_request); +EXPORT_SYMBOL(req_finished_io); +EXPORT_SYMBOL(generic_unplug_device); diff --git a/xen-2.4.16/drivers/ide/Makefile b/xen-2.4.16/drivers/ide/Makefile new file mode 100644 index 0000000000..574b7d2d79 --- /dev/null +++ b/xen-2.4.16/drivers/ide/Makefile @@ -0,0 +1,8 @@ + +include $(BASEDIR)/Rules.mk + +default: $(OBJS) + $(LD) -r -o driver.o $(OBJS) + +clean: + rm -f *.o *~ core diff --git a/xen-2.4.16/drivers/ide/ide-disk.c b/xen-2.4.16/drivers/ide/ide-disk.c new file mode 100644 index 0000000000..984e53cd67 --- /dev/null +++ b/xen-2.4.16/drivers/ide/ide-disk.c @@ -0,0 +1,1550 @@ +/* + * linux/drivers/ide/ide-disk.c Version 1.10 June 9, 2000 + * + * Copyright (C) 1994-1998 Linus Torvalds & authors (see below) + */ + +/* + * Mostly written by Mark Lord + * and Gadi Oxman + * and Andre Hedrick + * + * This is the IDE/ATA disk driver, as evolved from hd.c and ide.c. + * + * Version 1.00 move disk only code from ide.c to ide-disk.c + * support optional byte-swapping of all data + * Version 1.01 fix previous byte-swapping code + * Version 1.02 remove ", LBA" from drive identification msgs + * Version 1.03 fix display of id->buf_size for big-endian + * Version 1.04 add /proc configurable settings and S.M.A.R.T support + * Version 1.05 add capacity support for ATA3 >= 8GB + * Version 1.06 get boot-up messages to show full cyl count + * Version 1.07 disable door-locking if it fails + * Version 1.08 fixed CHS/LBA translations for ATA4 > 8GB, + * process of adding new ATA4 compliance. + * fixed problems in allowing fdisk to see + * the entire disk. + * Version 1.09 added increment of rq->sector in ide_multwrite + * added UDMA 3/4 reporting + * Version 1.10 request queue changes, Ultra DMA 100 + * Version 1.11 added 48-bit lba + * Version 1.12 adding taskfile io access method + */ + +#define IDEDISK_VERSION "1.12" + +#undef REALLY_SLOW_IO /* most systems can safely undef this */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#ifdef CONFIG_BLK_DEV_PDC4030 +#define IS_PDC4030_DRIVE (HWIF(drive)->chipset == ide_pdc4030) +#else +#define IS_PDC4030_DRIVE (0) /* auto-NULLs out pdc4030 code */ +#endif + +#ifdef CONFIG_IDE_TASKFILE_IO +# undef __TASKFILE__IO /* define __TASKFILE__IO */ +#else /* CONFIG_IDE_TASKFILE_IO */ +# undef __TASKFILE__IO +#endif /* CONFIG_IDE_TASKFILE_IO */ + +#ifndef __TASKFILE__IO + +static void idedisk_bswap_data (void *buffer, int wcount) +{ + u16 *p = buffer; + + while (wcount--) { + *p = *p << 8 | *p >> 8; p++; + *p = *p << 8 | *p >> 8; p++; + } +} + +static inline void idedisk_input_data (ide_drive_t *drive, void *buffer, unsigned int wcount) +{ + ide_input_data(drive, buffer, wcount); + if (drive->bswap) + idedisk_bswap_data(buffer, wcount); +} + +static inline void idedisk_output_data (ide_drive_t *drive, void *buffer, unsigned int wcount) +{ + if (drive->bswap) { + idedisk_bswap_data(buffer, wcount); + ide_output_data(drive, buffer, wcount); + idedisk_bswap_data(buffer, wcount); + } else + ide_output_data(drive, buffer, wcount); +} + +#endif /* __TASKFILE__IO */ + +/* + * lba_capacity_is_ok() performs a sanity check on the claimed "lba_capacity" + * value for this drive (from its reported identification information). + * + * Returns: 1 if lba_capacity looks sensible + * 0 otherwise + * + * It is called only once for each drive. + */ +static int lba_capacity_is_ok (struct hd_driveid *id) +{ + unsigned long lba_sects, chs_sects, head, tail; + + if ((id->command_set_2 & 0x0400) && (id->cfs_enable_2 & 0x0400)) { + printk("48-bit Drive: %llu \n", id->lba_capacity_2); + return 1; + } + + /* + * The ATA spec tells large drives to return + * C/H/S = 16383/16/63 independent of their size. + * Some drives can be jumpered to use 15 heads instead of 16. + * Some drives can be jumpered to use 4092 cyls instead of 16383. + */ + if ((id->cyls == 16383 + || (id->cyls == 4092 && id->cur_cyls == 16383)) && + id->sectors == 63 && + (id->heads == 15 || id->heads == 16) && + id->lba_capacity >= 16383*63*id->heads) + return 1; + + lba_sects = id->lba_capacity; + chs_sects = id->cyls * id->heads * id->sectors; + + /* perform a rough sanity check on lba_sects: within 10% is OK */ + if ((lba_sects - chs_sects) < chs_sects/10) + return 1; + + /* some drives have the word order reversed */ + head = ((lba_sects >> 16) & 0xffff); + tail = (lba_sects & 0xffff); + lba_sects = (head | (tail << 16)); + if ((lba_sects - chs_sects) < chs_sects/10) { + id->lba_capacity = lba_sects; + return 1; /* lba_capacity is (now) good */ + } + + return 0; /* lba_capacity value may be bad */ +} + +#ifndef __TASKFILE__IO + +/* + * read_intr() is the handler for disk read/multread interrupts + */ +static ide_startstop_t read_intr (ide_drive_t *drive) +{ + byte stat; + int i; + unsigned int msect, nsect; + struct request *rq; + + /* new way for dealing with premature shared PCI interrupts */ + if (!OK_STAT(stat=GET_STAT(),DATA_READY,BAD_R_STAT)) { + if (stat & (ERR_STAT|DRQ_STAT)) { + return ide_error(drive, "read_intr", stat); + } + /* no data yet, so wait for another interrupt */ + ide_set_handler(drive, &read_intr, WAIT_CMD, NULL); + return ide_started; + } + msect = drive->mult_count; + +read_next: + rq = HWGROUP(drive)->rq; + if (msect) { + if ((nsect = rq->current_nr_sectors) > msect) + nsect = msect; + msect -= nsect; + } else + nsect = 1; + idedisk_input_data(drive, rq->buffer, nsect * SECTOR_WORDS); +#ifdef DEBUG + printk("%s: read: sectors(%ld-%ld), buffer=0x%08lx, remaining=%ld\n", + drive->name, rq->sector, rq->sector+nsect-1, + (unsigned long) rq->buffer+(nsect<<9), rq->nr_sectors-nsect); +#endif + rq->sector += nsect; + rq->buffer += nsect<<9; + rq->errors = 0; + i = (rq->nr_sectors -= nsect); + if (((long)(rq->current_nr_sectors -= nsect)) <= 0) + ide_end_request(1, HWGROUP(drive)); + if (i > 0) { + if (msect) + goto read_next; + ide_set_handler (drive, &read_intr, WAIT_CMD, NULL); + return ide_started; + } + return ide_stopped; +} + +/* + * write_intr() is the handler for disk write interrupts + */ +static ide_startstop_t write_intr (ide_drive_t *drive) +{ + byte stat; + int i; + ide_hwgroup_t *hwgroup = HWGROUP(drive); + struct request *rq = hwgroup->rq; + + if (!OK_STAT(stat=GET_STAT(),DRIVE_READY,drive->bad_wstat)) { + printk("%s: write_intr error1: nr_sectors=%ld, stat=0x%02x\n", drive->name, rq->nr_sectors, stat); + } else { +#ifdef DEBUG + printk("%s: write: sector %ld, buffer=0x%08lx, remaining=%ld\n", + drive->name, rq->sector, (unsigned long) rq->buffer, + rq->nr_sectors-1); +#endif + if ((rq->nr_sectors == 1) ^ ((stat & DRQ_STAT) != 0)) { + rq->sector++; + rq->buffer += 512; + rq->errors = 0; + i = --rq->nr_sectors; + --rq->current_nr_sectors; + if (((long)rq->current_nr_sectors) <= 0) + ide_end_request(1, hwgroup); + if (i > 0) { + idedisk_output_data (drive, rq->buffer, SECTOR_WORDS); + ide_set_handler (drive, &write_intr, WAIT_CMD, NULL); + return ide_started; + } + return ide_stopped; + } + return ide_stopped; /* the original code did this here (?) */ + } + return ide_error(drive, "write_intr", stat); +} + +/* + * ide_multwrite() transfers a block of up to mcount sectors of data + * to a drive as part of a disk multiple-sector write operation. + * + * Returns 0 on success. + * + * Note that we may be called from two contexts - the do_rw_disk context + * and IRQ context. The IRQ can happen any time after we've output the + * full "mcount" number of sectors, so we must make sure we update the + * state _before_ we output the final part of the data! + */ +int ide_multwrite (ide_drive_t *drive, unsigned int mcount) +{ + ide_hwgroup_t *hwgroup= HWGROUP(drive); + struct request *rq = &hwgroup->wrq; + + do { + char *buffer; + int nsect = rq->current_nr_sectors; + + if (nsect > mcount) + nsect = mcount; + mcount -= nsect; + buffer = rq->buffer; + + rq->sector += nsect; + rq->buffer += nsect << 9; + rq->nr_sectors -= nsect; + rq->current_nr_sectors -= nsect; + + /* Do we move to the next bh after this? */ + if (!rq->current_nr_sectors) { + struct buffer_head *bh = rq->bh->b_reqnext; + + /* end early early we ran out of requests */ + if (!bh) { + mcount = 0; + } else { + rq->bh = bh; + rq->current_nr_sectors = bh->b_size >> 9; + rq->buffer = bh->b_data; + } + } + + /* + * Ok, we're all setup for the interrupt + * re-entering us on the last transfer. + */ + idedisk_output_data(drive, buffer, nsect<<7); + } while (mcount); + + return 0; +} + +/* + * multwrite_intr() is the handler for disk multwrite interrupts + */ +static ide_startstop_t multwrite_intr (ide_drive_t *drive) +{ + byte stat; + int i; + ide_hwgroup_t *hwgroup = HWGROUP(drive); + struct request *rq = &hwgroup->wrq; + + if (OK_STAT(stat=GET_STAT(),DRIVE_READY,drive->bad_wstat)) { + if (stat & DRQ_STAT) { + /* + * The drive wants data. Remember rq is the copy + * of the request + */ + if (rq->nr_sectors) { + if (ide_multwrite(drive, drive->mult_count)) + return ide_stopped; + ide_set_handler (drive, &multwrite_intr, WAIT_CMD, NULL); + return ide_started; + } + } else { + /* + * If the copy has all the blocks completed then + * we can end the original request. + */ + if (!rq->nr_sectors) { /* all done? */ + rq = hwgroup->rq; + for (i = rq->nr_sectors; i > 0;){ + i -= rq->current_nr_sectors; + ide_end_request(1, hwgroup); + } + return ide_stopped; + } + } + return ide_stopped; /* the original code did this here (?) */ + } + return ide_error(drive, "multwrite_intr", stat); +} +#endif /* __TASKFILE__IO */ + +#ifdef __TASKFILE__IO + +static ide_startstop_t chs_rw_disk (ide_drive_t *drive, struct request *rq, unsigned long block); +static ide_startstop_t lba_28_rw_disk (ide_drive_t *drive, struct request *rq, unsigned long block); +static ide_startstop_t lba_48_rw_disk (ide_drive_t *drive, struct request *rq, unsigned long long block); + +/* + * do_rw_disk() issues READ and WRITE commands to a disk, + * using LBA if supported, or CHS otherwise, to address sectors. + * It also takes care of issuing special DRIVE_CMDs. + */ +static ide_startstop_t do_rw_disk (ide_drive_t *drive, struct request *rq, unsigned long block) +{ + if (rq->cmd == READ) + goto good_command; + if (rq->cmd == WRITE) + goto good_command; + + printk(KERN_ERR "%s: bad command: %d\n", drive->name, rq->cmd); + ide_end_request(0, HWGROUP(drive)); + return ide_stopped; + +good_command: + +#ifdef CONFIG_BLK_DEV_PDC4030 + if (IS_PDC4030_DRIVE) { + extern ide_startstop_t promise_rw_disk(ide_drive_t *, struct request *, unsigned long); + return promise_rw_disk(drive, rq, block); + } +#endif /* CONFIG_BLK_DEV_PDC4030 */ + + if ((drive->id->cfs_enable_2 & 0x0400) && (drive->addressing)) /* 48-bit LBA */ + return lba_48_rw_disk(drive, rq, (unsigned long long) block); + if (drive->select.b.lba) /* 28-bit LBA */ + return lba_28_rw_disk(drive, rq, (unsigned long) block); + + /* 28-bit CHS : DIE DIE DIE piece of legacy crap!!! */ + return chs_rw_disk(drive, rq, (unsigned long) block); +} + +static task_ioreg_t get_command (ide_drive_t *drive, int cmd) +{ + int lba48bit = (drive->id->cfs_enable_2 & 0x0400) ? 1 : 0; + +#if 1 + lba48bit = drive->addressing; +#endif + + if ((cmd == READ) && (drive->using_dma)) + return (lba48bit) ? WIN_READDMA_EXT : WIN_READDMA; + else if ((cmd == READ) && (drive->mult_count)) + return (lba48bit) ? WIN_MULTREAD_EXT : WIN_MULTREAD; + else if (cmd == READ) + return (lba48bit) ? WIN_READ_EXT : WIN_READ; + else if ((cmd == WRITE) && (drive->using_dma)) + return (lba48bit) ? WIN_WRITEDMA_EXT : WIN_WRITEDMA; + else if ((cmd == WRITE) && (drive->mult_count)) + return (lba48bit) ? WIN_MULTWRITE_EXT : WIN_MULTWRITE; + else if (cmd == WRITE) + return (lba48bit) ? WIN_WRITE_EXT : WIN_WRITE; + else + return WIN_NOP; +} + +static ide_startstop_t chs_rw_disk (ide_drive_t *drive, struct request *rq, unsigned long block) +{ + struct hd_drive_task_hdr taskfile; + struct hd_drive_hob_hdr hobfile; + ide_task_t args; + + task_ioreg_t command = get_command(drive, rq->cmd); + unsigned int track = (block / drive->sect); + unsigned int sect = (block % drive->sect) + 1; + unsigned int head = (track % drive->head); + unsigned int cyl = (track / drive->head); + + memset(&taskfile, 0, sizeof(task_struct_t)); + memset(&hobfile, 0, sizeof(hob_struct_t)); + + taskfile.sector_count = (rq->nr_sectors==256)?0x00:rq->nr_sectors; + taskfile.sector_number = sect; + taskfile.low_cylinder = cyl; + taskfile.high_cylinder = (cyl>>8); + taskfile.device_head = head; + taskfile.device_head |= drive->select.all; + taskfile.command = command; + +#ifdef DEBUG + printk("%s: %sing: ", drive->name, (rq->cmd==READ) ? "read" : "writ"); + if (lba) printk("LBAsect=%lld, ", block); + else printk("CHS=%d/%d/%d, ", cyl, head, sect); + printk("sectors=%ld, ", rq->nr_sectors); + printk("buffer=0x%08lx\n", (unsigned long) rq->buffer); +#endif + + memcpy(args.tfRegister, &taskfile, sizeof(struct hd_drive_task_hdr)); + memcpy(args.hobRegister, &hobfile, sizeof(struct hd_drive_hob_hdr)); + args.command_type = ide_cmd_type_parser(&args); + args.prehandler = ide_pre_handler_parser(&taskfile, &hobfile); + args.handler = ide_handler_parser(&taskfile, &hobfile); + args.posthandler = NULL; + args.rq = (struct request *) rq; + args.block = block; + rq->special = NULL; + rq->special = (ide_task_t *)&args; + + return do_rw_taskfile(drive, &args); +} + +static ide_startstop_t lba_28_rw_disk (ide_drive_t *drive, struct request *rq, unsigned long block) +{ + struct hd_drive_task_hdr taskfile; + struct hd_drive_hob_hdr hobfile; + ide_task_t args; + + task_ioreg_t command = get_command(drive, rq->cmd); + + memset(&taskfile, 0, sizeof(task_struct_t)); + memset(&hobfile, 0, sizeof(hob_struct_t)); + + taskfile.sector_count = (rq->nr_sectors==256)?0x00:rq->nr_sectors; + taskfile.sector_number = block; + taskfile.low_cylinder = (block>>=8); + taskfile.high_cylinder = (block>>=8); + taskfile.device_head = ((block>>8)&0x0f); + taskfile.device_head |= drive->select.all; + taskfile.command = command; + + +#ifdef DEBUG + printk("%s: %sing: ", drive->name, (rq->cmd==READ) ? "read" : "writ"); + if (lba) printk("LBAsect=%lld, ", block); + else printk("CHS=%d/%d/%d, ", cyl, head, sect); + printk("sectors=%ld, ", rq->nr_sectors); + printk("buffer=0x%08lx\n", (unsigned long) rq->buffer); +#endif + + memcpy(args.tfRegister, &taskfile, sizeof(struct hd_drive_task_hdr)); + memcpy(args.hobRegister, &hobfile, sizeof(struct hd_drive_hob_hdr)); + args.command_type = ide_cmd_type_parser(&args); + args.prehandler = ide_pre_handler_parser(&taskfile, &hobfile); + args.handler = ide_handler_parser(&taskfile, &hobfile); + args.posthandler = NULL; + args.rq = (struct request *) rq; + args.block = block; + rq->special = NULL; + rq->special = (ide_task_t *)&args; + + return do_rw_taskfile(drive, &args); +} + +/* + * 268435455 == 137439 MB or 28bit limit + * 320173056 == 163929 MB or 48bit addressing + * 1073741822 == 549756 MB or 48bit addressing fake drive + */ + +static ide_startstop_t lba_48_rw_disk (ide_drive_t *drive, struct request *rq, unsigned long long block) +{ + struct hd_drive_task_hdr taskfile; + struct hd_drive_hob_hdr hobfile; + ide_task_t args; + + task_ioreg_t command = get_command(drive, rq->cmd); + + memset(&taskfile, 0, sizeof(task_struct_t)); + memset(&hobfile, 0, sizeof(hob_struct_t)); + + taskfile.sector_count = rq->nr_sectors; + hobfile.sector_count = (rq->nr_sectors>>8); + + if (rq->nr_sectors == 65536) { + taskfile.sector_count = 0x00; + hobfile.sector_count = 0x00; + } + + taskfile.sector_number = block; /* low lba */ + taskfile.low_cylinder = (block>>=8); /* mid lba */ + taskfile.high_cylinder = (block>>=8); /* hi lba */ + hobfile.sector_number = (block>>=8); /* low lba */ + hobfile.low_cylinder = (block>>=8); /* mid lba */ + hobfile.high_cylinder = (block>>=8); /* hi lba */ + taskfile.device_head = drive->select.all; + hobfile.device_head = taskfile.device_head; + hobfile.control = (drive->ctl|0x80); + taskfile.command = command; + +#ifdef DEBUG + printk("%s: %sing: ", drive->name, (rq->cmd==READ) ? "read" : "writ"); + if (lba) printk("LBAsect=%lld, ", block); + else printk("CHS=%d/%d/%d, ", cyl, head, sect); + printk("sectors=%ld, ", rq->nr_sectors); + printk("buffer=0x%08lx\n", (unsigned long) rq->buffer); +#endif + + memcpy(args.tfRegister, &taskfile, sizeof(struct hd_drive_task_hdr)); + memcpy(args.hobRegister, &hobfile, sizeof(struct hd_drive_hob_hdr)); + args.command_type = ide_cmd_type_parser(&args); + args.prehandler = ide_pre_handler_parser(&taskfile, &hobfile); + args.handler = ide_handler_parser(&taskfile, &hobfile); + args.posthandler = NULL; + args.rq = (struct request *) rq; + args.block = block; + rq->special = NULL; + rq->special = (ide_task_t *)&args; + + return do_rw_taskfile(drive, &args); +} + +#else /* !__TASKFILE__IO */ +/* + * do_rw_disk() issues READ and WRITE commands to a disk, + * using LBA if supported, or CHS otherwise, to address sectors. + * It also takes care of issuing special DRIVE_CMDs. + */ +static ide_startstop_t do_rw_disk (ide_drive_t *drive, struct request *rq, unsigned long block) +{ + if (IDE_CONTROL_REG) + OUT_BYTE(drive->ctl,IDE_CONTROL_REG); + +#ifdef CONFIG_BLK_DEV_PDC4030 + if (drive->select.b.lba || IS_PDC4030_DRIVE) { +#else /* !CONFIG_BLK_DEV_PDC4030 */ + if (drive->select.b.lba) { +#endif /* CONFIG_BLK_DEV_PDC4030 */ + + if ((drive->id->cfs_enable_2 & 0x0400) && (drive->addressing)) { + task_ioreg_t tasklets[10]; + + tasklets[0] = 0; + tasklets[1] = 0; + tasklets[2] = rq->nr_sectors; + tasklets[3] = (rq->nr_sectors>>8); + if (rq->nr_sectors == 65536) { + tasklets[2] = 0x00; + tasklets[3] = 0x00; + } + tasklets[4] = (task_ioreg_t) block; + tasklets[5] = (task_ioreg_t) (block>>8); + tasklets[6] = (task_ioreg_t) (block>>16); + tasklets[7] = (task_ioreg_t) (block>>24); + tasklets[8] = (task_ioreg_t) 0; + tasklets[9] = (task_ioreg_t) 0; +// tasklets[8] = (task_ioreg_t) (block>>32); +// tasklets[9] = (task_ioreg_t) (block>>40); +#ifdef DEBUG + printk("%s: %sing: LBAsect=%lu, sectors=%ld, buffer=0x%08lx, LBAsect=0x%012lx\n", + drive->name, + (rq->cmd==READ)?"read":"writ", + block, + rq->nr_sectors, + (unsigned long) rq->buffer, + block); + printk("%s: 0x%02x%02x 0x%02x%02x%02x%02x%02x%02x\n", + drive->name, tasklets[3], tasklets[2], + tasklets[9], tasklets[8], tasklets[7], + tasklets[6], tasklets[5], tasklets[4]); +#endif + OUT_BYTE(tasklets[1], IDE_FEATURE_REG); + OUT_BYTE(tasklets[3], IDE_NSECTOR_REG); + OUT_BYTE(tasklets[7], IDE_SECTOR_REG); + OUT_BYTE(tasklets[8], IDE_LCYL_REG); + OUT_BYTE(tasklets[9], IDE_HCYL_REG); + + OUT_BYTE(tasklets[0], IDE_FEATURE_REG); + OUT_BYTE(tasklets[2], IDE_NSECTOR_REG); + OUT_BYTE(tasklets[4], IDE_SECTOR_REG); + OUT_BYTE(tasklets[5], IDE_LCYL_REG); + OUT_BYTE(tasklets[6], IDE_HCYL_REG); + OUT_BYTE(0x00|drive->select.all,IDE_SELECT_REG); + } else { +#ifdef DEBUG + printk("%s: %sing: LBAsect=%ld, sectors=%ld, buffer=0x%08lx\n", + drive->name, (rq->cmd==READ)?"read":"writ", + block, rq->nr_sectors, (unsigned long) rq->buffer); +#endif + OUT_BYTE(0x00, IDE_FEATURE_REG); + OUT_BYTE((rq->nr_sectors==256)?0x00:rq->nr_sectors,IDE_NSECTOR_REG); + OUT_BYTE(block,IDE_SECTOR_REG); + OUT_BYTE(block>>=8,IDE_LCYL_REG); + OUT_BYTE(block>>=8,IDE_HCYL_REG); + OUT_BYTE(((block>>8)&0x0f)|drive->select.all,IDE_SELECT_REG); + } + } else { + unsigned int sect,head,cyl,track; + track = block / drive->sect; + sect = block % drive->sect + 1; + OUT_BYTE(sect,IDE_SECTOR_REG); + head = track % drive->head; + cyl = track / drive->head; + + OUT_BYTE(0x00, IDE_FEATURE_REG); + OUT_BYTE((rq->nr_sectors==256)?0x00:rq->nr_sectors,IDE_NSECTOR_REG); + OUT_BYTE(cyl,IDE_LCYL_REG); + OUT_BYTE(cyl>>8,IDE_HCYL_REG); + OUT_BYTE(head|drive->select.all,IDE_SELECT_REG); +#ifdef DEBUG + printk("%s: %sing: CHS=%d/%d/%d, sectors=%ld, buffer=0x%08lx\n", + drive->name, (rq->cmd==READ)?"read":"writ", cyl, + head, sect, rq->nr_sectors, (unsigned long) rq->buffer); +#endif + } +#ifdef CONFIG_BLK_DEV_PDC4030 + if (IS_PDC4030_DRIVE) { + extern ide_startstop_t do_pdc4030_io(ide_drive_t *, struct request *); + return do_pdc4030_io (drive, rq); + } +#endif /* CONFIG_BLK_DEV_PDC4030 */ + if (rq->cmd == READ) { +#ifdef CONFIG_BLK_DEV_IDEDMA + if (drive->using_dma && !(HWIF(drive)->dmaproc(ide_dma_read, drive))) + return ide_started; +#endif /* CONFIG_BLK_DEV_IDEDMA */ + ide_set_handler(drive, &read_intr, WAIT_CMD, NULL); + if ((drive->id->cfs_enable_2 & 0x0400) && (drive->addressing)) { + OUT_BYTE(drive->mult_count ? WIN_MULTREAD_EXT : WIN_READ_EXT, IDE_COMMAND_REG); + } else { + OUT_BYTE(drive->mult_count ? WIN_MULTREAD : WIN_READ, IDE_COMMAND_REG); + } + return ide_started; + } + if (rq->cmd == WRITE) { + ide_startstop_t startstop; +#ifdef CONFIG_BLK_DEV_IDEDMA + if (drive->using_dma && !(HWIF(drive)->dmaproc(ide_dma_write, drive))) + return ide_started; +#endif /* CONFIG_BLK_DEV_IDEDMA */ + if ((drive->id->cfs_enable_2 & 0x0400) && (drive->addressing)) { + OUT_BYTE(drive->mult_count ? WIN_MULTWRITE_EXT : WIN_WRITE_EXT, IDE_COMMAND_REG); + } else { + OUT_BYTE(drive->mult_count ? WIN_MULTWRITE : WIN_WRITE, IDE_COMMAND_REG); + } + if (ide_wait_stat(&startstop, drive, DATA_READY, drive->bad_wstat, WAIT_DRQ)) { + printk(KERN_ERR "%s: no DRQ after issuing %s\n", drive->name, + drive->mult_count ? "MULTWRITE" : "WRITE"); + return startstop; + } + if (!drive->unmask) + __cli(); /* local CPU only */ + if (drive->mult_count) { + ide_hwgroup_t *hwgroup = HWGROUP(drive); + /* + * Ugh.. this part looks ugly because we MUST set up + * the interrupt handler before outputting the first block + * of data to be written. If we hit an error (corrupted buffer list) + * in ide_multwrite(), then we need to remove the handler/timer + * before returning. Fortunately, this NEVER happens (right?). + * + * Except when you get an error it seems... + */ + hwgroup->wrq = *rq; /* scratchpad */ + ide_set_handler(drive, &multwrite_intr, WAIT_CMD, NULL); + if (ide_multwrite(drive, drive->mult_count)) { + unsigned long flags; + spin_lock_irqsave(&io_request_lock, flags); + hwgroup->handler = NULL; + del_timer(&hwgroup->timer); + spin_unlock_irqrestore(&io_request_lock, flags); + return ide_stopped; + } + } else { + ide_set_handler (drive, &write_intr, WAIT_CMD, NULL); + idedisk_output_data(drive, rq->buffer, SECTOR_WORDS); + } + return ide_started; + } + printk(KERN_ERR "%s: bad command: %d\n", drive->name, rq->cmd); + ide_end_request(0, HWGROUP(drive)); + return ide_stopped; +} + +#endif /* __TASKFILE__IO */ + +static int idedisk_open (struct inode *inode, struct file *filp, ide_drive_t *drive) +{ + MOD_INC_USE_COUNT; + if (drive->removable && drive->usage == 1) { + struct hd_drive_task_hdr taskfile; + struct hd_drive_hob_hdr hobfile; + memset(&taskfile, 0, sizeof(struct hd_drive_task_hdr)); + memset(&hobfile, 0, sizeof(struct hd_drive_hob_hdr)); + taskfile.command = WIN_DOORLOCK; + check_disk_change(inode->i_rdev); + /* + * Ignore the return code from door_lock, + * since the open() has already succeeded, + * and the door_lock is irrelevant at this point. + */ + if (drive->doorlocking && ide_wait_taskfile(drive, &taskfile, &hobfile, NULL)) + drive->doorlocking = 0; + } + return 0; +} + +static int do_idedisk_flushcache(ide_drive_t *drive); + +static void idedisk_release (struct inode *inode, struct file *filp, ide_drive_t *drive) +{ + if (drive->removable && !drive->usage) { + struct hd_drive_task_hdr taskfile; + struct hd_drive_hob_hdr hobfile; + memset(&taskfile, 0, sizeof(struct hd_drive_task_hdr)); + memset(&hobfile, 0, sizeof(struct hd_drive_hob_hdr)); + taskfile.command = WIN_DOORUNLOCK; + invalidate_bdev(inode->i_bdev, 0); + if (drive->doorlocking && ide_wait_taskfile(drive, &taskfile, &hobfile, NULL)) + drive->doorlocking = 0; + } + if ((drive->id->cfs_enable_2 & 0x3000) && drive->wcache) + if (do_idedisk_flushcache(drive)) + printk (KERN_INFO "%s: Write Cache FAILED Flushing!\n", + drive->name); + MOD_DEC_USE_COUNT; +} + +static int idedisk_media_change (ide_drive_t *drive) +{ + return drive->removable; /* if removable, always assume it was changed */ +} + +static void idedisk_revalidate (ide_drive_t *drive) +{ + grok_partitions(HWIF(drive)->gd, drive->select.b.unit, + 1< 0) of the drive, 0 if failed. + */ +static unsigned long idedisk_read_native_max_address(ide_drive_t *drive) +{ + ide_task_t args; + unsigned long addr = 0; + + if (!(drive->id->command_set_1 & 0x0400) && + !(drive->id->cfs_enable_2 & 0x0100)) + return addr; + + /* Create IDE/ATA command request structure */ + memset(&args, 0, sizeof(ide_task_t)); + args.tfRegister[IDE_SELECT_OFFSET] = 0x40; + args.tfRegister[IDE_COMMAND_OFFSET] = WIN_READ_NATIVE_MAX; + args.handler = task_no_data_intr; + + /* submit command request */ + ide_raw_taskfile(drive, &args, NULL); + + /* if OK, compute maximum address value */ + if ((args.tfRegister[IDE_STATUS_OFFSET] & 0x01) == 0) { + addr = ((args.tfRegister[IDE_SELECT_OFFSET] & 0x0f) << 24) + | ((args.tfRegister[ IDE_HCYL_OFFSET] ) << 16) + | ((args.tfRegister[ IDE_LCYL_OFFSET] ) << 8) + | ((args.tfRegister[IDE_SECTOR_OFFSET] )); + } + addr++; /* since the return value is (maxlba - 1), we add 1 */ + return addr; +} + +static unsigned long long idedisk_read_native_max_address_ext(ide_drive_t *drive) +{ + ide_task_t args; + unsigned long long addr = 0; + + /* Create IDE/ATA command request structure */ + memset(&args, 0, sizeof(ide_task_t)); + + args.tfRegister[IDE_SELECT_OFFSET] = 0x40; + args.tfRegister[IDE_COMMAND_OFFSET] = WIN_READ_NATIVE_MAX_EXT; + args.handler = task_no_data_intr; + + /* submit command request */ + ide_raw_taskfile(drive, &args, NULL); + + /* if OK, compute maximum address value */ + if ((args.tfRegister[IDE_STATUS_OFFSET] & 0x01) == 0) { + u32 high = ((args.hobRegister[IDE_HCYL_OFFSET_HOB])<<16) | + ((args.hobRegister[IDE_LCYL_OFFSET_HOB])<<8) | + (args.hobRegister[IDE_SECTOR_OFFSET_HOB]); + u32 low = ((args.tfRegister[IDE_HCYL_OFFSET])<<16) | + ((args.tfRegister[IDE_LCYL_OFFSET])<<8) | + (args.tfRegister[IDE_SECTOR_OFFSET]); + addr = ((__u64)high << 24) | low; + } + addr++; /* since the return value is (maxlba - 1), we add 1 */ + return addr; +} + +#ifdef CONFIG_IDEDISK_STROKE +/* + * Sets maximum virtual LBA address of the drive. + * Returns new maximum virtual LBA address (> 0) or 0 on failure. + */ +static unsigned long idedisk_set_max_address(ide_drive_t *drive, unsigned long addr_req) +{ + ide_task_t args; + unsigned long addr_set = 0; + + addr_req--; + /* Create IDE/ATA command request structure */ + memset(&args, 0, sizeof(ide_task_t)); + args.tfRegister[IDE_SECTOR_OFFSET] = ((addr_req >> 0) & 0xff); + args.tfRegister[IDE_LCYL_OFFSET] = ((addr_req >> 8) & 0xff); + args.tfRegister[IDE_HCYL_OFFSET] = ((addr_req >> 16) & 0xff); + args.tfRegister[IDE_SELECT_OFFSET] = ((addr_req >> 24) & 0x0f) | 0x40; + args.tfRegister[IDE_COMMAND_OFFSET] = WIN_SET_MAX; + args.handler = task_no_data_intr; + /* submit command request */ + ide_raw_taskfile(drive, &args, NULL); + /* if OK, read new maximum address value */ + if ((args.tfRegister[IDE_STATUS_OFFSET] & 0x01) == 0) { + addr_set = ((args.tfRegister[IDE_SELECT_OFFSET] & 0x0f) << 24) + | ((args.tfRegister[ IDE_HCYL_OFFSET] ) << 16) + | ((args.tfRegister[ IDE_LCYL_OFFSET] ) << 8) + | ((args.tfRegister[IDE_SECTOR_OFFSET] )); + } + addr_set++; + return addr_set; +} + +static unsigned long long idedisk_set_max_address_ext(ide_drive_t *drive, unsigned long long addr_req) +{ + ide_task_t args; + unsigned long long addr_set = 0; + + addr_req--; + /* Create IDE/ATA command request structure */ + memset(&args, 0, sizeof(ide_task_t)); + args.tfRegister[IDE_SECTOR_OFFSET] = ((addr_req >> 0) & 0xff); + args.tfRegister[IDE_LCYL_OFFSET] = ((addr_req >>= 8) & 0xff); + args.tfRegister[IDE_HCYL_OFFSET] = ((addr_req >>= 8) & 0xff); + args.tfRegister[IDE_SELECT_OFFSET] = 0x40; + args.tfRegister[IDE_COMMAND_OFFSET] = WIN_SET_MAX_EXT; + args.hobRegister[IDE_SECTOR_OFFSET_HOB] = ((addr_req >>= 8) & 0xff); + args.hobRegister[IDE_LCYL_OFFSET_HOB] = ((addr_req >>= 8) & 0xff); + args.hobRegister[IDE_HCYL_OFFSET_HOB] = ((addr_req >>= 8) & 0xff); + args.hobRegister[IDE_SELECT_OFFSET_HOB] = 0x40; + args.hobRegister[IDE_CONTROL_OFFSET_HOB]= (drive->ctl|0x80); + args.handler = task_no_data_intr; + /* submit command request */ + ide_raw_taskfile(drive, &args, NULL); + /* if OK, compute maximum address value */ + if ((args.tfRegister[IDE_STATUS_OFFSET] & 0x01) == 0) { + u32 high = ((args.hobRegister[IDE_HCYL_OFFSET_HOB])<<16) | + ((args.hobRegister[IDE_LCYL_OFFSET_HOB])<<8) | + (args.hobRegister[IDE_SECTOR_OFFSET_HOB]); + u32 low = ((args.tfRegister[IDE_HCYL_OFFSET])<<16) | + ((args.tfRegister[IDE_LCYL_OFFSET])<<8) | + (args.tfRegister[IDE_SECTOR_OFFSET]); + addr_set = ((__u64)high << 24) | low; + } + return addr_set; +} + +/* + * Tests if the drive supports Host Protected Area feature. + * Returns true if supported, false otherwise. + */ +static inline int idedisk_supports_host_protected_area(ide_drive_t *drive) +{ + int flag = (drive->id->cfs_enable_1 & 0x0400) ? 1 : 0; + printk("%s: host protected area => %d\n", drive->name, flag); + return flag; +} + +#endif /* CONFIG_IDEDISK_STROKE */ + +/* + * Compute drive->capacity, the full capacity of the drive + * Called with drive->id != NULL. + * + * To compute capacity, this uses either of + * + * 1. CHS value set by user (whatever user sets will be trusted) + * 2. LBA value from target drive (require new ATA feature) + * 3. LBA value from system BIOS (new one is OK, old one may break) + * 4. CHS value from system BIOS (traditional style) + * + * in above order (i.e., if value of higher priority is available, + * reset will be ignored). + */ +static void init_idedisk_capacity (ide_drive_t *drive) +{ + struct hd_driveid *id = drive->id; + unsigned long capacity = drive->cyl * drive->head * drive->sect; + unsigned long set_max = idedisk_read_native_max_address(drive); + unsigned long long capacity_2 = capacity; + unsigned long long set_max_ext; + + drive->capacity48 = 0; + drive->select.b.lba = 0; + + if (id->cfs_enable_2 & 0x0400) { + capacity_2 = id->lba_capacity_2; + drive->head = drive->bios_head = 255; + drive->sect = drive->bios_sect = 63; + drive->cyl = (unsigned int) capacity_2 / (drive->head * drive->sect); + drive->select.b.lba = 1; + set_max_ext = idedisk_read_native_max_address_ext(drive); + if (set_max_ext > capacity_2) { +#ifdef CONFIG_IDEDISK_STROKE + set_max_ext = idedisk_read_native_max_address_ext(drive); + set_max_ext = idedisk_set_max_address_ext(drive, set_max_ext); + if (set_max_ext) { + drive->capacity48 = capacity_2 = set_max_ext; + drive->cyl = (unsigned int) set_max_ext / (drive->head * drive->sect); + drive->select.b.lba = 1; + drive->id->lba_capacity_2 = capacity_2; + } +#else /* !CONFIG_IDEDISK_STROKE */ + printk("%s: setmax_ext LBA %llu, native %llu\n", + drive->name, set_max_ext, capacity_2); +#endif /* CONFIG_IDEDISK_STROKE */ + } + drive->bios_cyl = drive->cyl; + drive->capacity48 = capacity_2; + drive->capacity = (unsigned long) capacity_2; + return; + /* Determine capacity, and use LBA if the drive properly supports it */ + } else if ((id->capability & 2) && lba_capacity_is_ok(id)) { + capacity = id->lba_capacity; + drive->cyl = capacity / (drive->head * drive->sect); + drive->select.b.lba = 1; + } + + if (set_max > capacity) { +#ifdef CONFIG_IDEDISK_STROKE + set_max = idedisk_read_native_max_address(drive); + set_max = idedisk_set_max_address(drive, set_max); + if (set_max) { + drive->capacity = capacity = set_max; + drive->cyl = set_max / (drive->head * drive->sect); + drive->select.b.lba = 1; + drive->id->lba_capacity = capacity; + } +#else /* !CONFIG_IDEDISK_STROKE */ + printk("%s: setmax LBA %lu, native %lu\n", + drive->name, set_max, capacity); +#endif /* CONFIG_IDEDISK_STROKE */ + } + + drive->capacity = capacity; + + if ((id->command_set_2 & 0x0400) && (id->cfs_enable_2 & 0x0400)) { + drive->capacity48 = id->lba_capacity_2; + drive->head = 255; + drive->sect = 63; + drive->cyl = (unsigned long)(drive->capacity48) / (drive->head * drive->sect); + } +} + +static unsigned long idedisk_capacity (ide_drive_t *drive) +{ + if (drive->id->cfs_enable_2 & 0x0400) + return (drive->capacity48 - drive->sect0); + return (drive->capacity - drive->sect0); +} + +static ide_startstop_t idedisk_special (ide_drive_t *drive) +{ + special_t *s = &drive->special; + + if (s->b.set_geometry) { + struct hd_drive_task_hdr taskfile; + struct hd_drive_hob_hdr hobfile; + ide_handler_t *handler = NULL; + + memset(&taskfile, 0, sizeof(struct hd_drive_task_hdr)); + memset(&hobfile, 0, sizeof(struct hd_drive_hob_hdr)); + + s->b.set_geometry = 0; + taskfile.sector_number = drive->sect; + taskfile.low_cylinder = drive->cyl; + taskfile.high_cylinder = drive->cyl>>8; + taskfile.device_head = ((drive->head-1)|drive->select.all)&0xBF; + if (!IS_PDC4030_DRIVE) { + taskfile.sector_count = drive->sect; + taskfile.command = WIN_SPECIFY; + handler = ide_handler_parser(&taskfile, &hobfile); + } + do_taskfile(drive, &taskfile, &hobfile, handler); + } else if (s->b.recalibrate) { + s->b.recalibrate = 0; + if (!IS_PDC4030_DRIVE) { + struct hd_drive_task_hdr taskfile; + struct hd_drive_hob_hdr hobfile; + memset(&taskfile, 0, sizeof(struct hd_drive_task_hdr)); + memset(&hobfile, 0, sizeof(struct hd_drive_hob_hdr)); + taskfile.sector_count = drive->sect; + taskfile.command = WIN_RESTORE; + do_taskfile(drive, &taskfile, &hobfile, ide_handler_parser(&taskfile, &hobfile)); + } + } else if (s->b.set_multmode) { + s->b.set_multmode = 0; + if (drive->id && drive->mult_req > drive->id->max_multsect) + drive->mult_req = drive->id->max_multsect; + if (!IS_PDC4030_DRIVE) { + struct hd_drive_task_hdr taskfile; + struct hd_drive_hob_hdr hobfile; + memset(&taskfile, 0, sizeof(struct hd_drive_task_hdr)); + memset(&hobfile, 0, sizeof(struct hd_drive_hob_hdr)); + taskfile.sector_count = drive->mult_req; + taskfile.command = WIN_SETMULT; + do_taskfile(drive, &taskfile, &hobfile, ide_handler_parser(&taskfile, &hobfile)); + } + } else if (s->all) { + int special = s->all; + s->all = 0; + printk(KERN_ERR "%s: bad special flag: 0x%02x\n", drive->name, special); + return ide_stopped; + } + return IS_PDC4030_DRIVE ? ide_stopped : ide_started; +} + +static void idedisk_pre_reset (ide_drive_t *drive) +{ + int legacy = (drive->id->cfs_enable_2 & 0x0400) ? 0 : 1; + + drive->special.all = 0; + drive->special.b.set_geometry = legacy; + drive->special.b.recalibrate = legacy; + if (OK_TO_RESET_CONTROLLER) + drive->mult_count = 0; + if (!drive->keep_settings && !drive->using_dma) + drive->mult_req = 0; + if (drive->mult_req != drive->mult_count) + drive->special.b.set_multmode = 1; +} + +#ifdef CONFIG_PROC_FS + +static int smart_enable(ide_drive_t *drive) +{ + struct hd_drive_task_hdr taskfile; + struct hd_drive_hob_hdr hobfile; + memset(&taskfile, 0, sizeof(struct hd_drive_task_hdr)); + memset(&hobfile, 0, sizeof(struct hd_drive_hob_hdr)); + taskfile.feature = SMART_ENABLE; + taskfile.low_cylinder = SMART_LCYL_PASS; + taskfile.high_cylinder = SMART_HCYL_PASS; + taskfile.command = WIN_SMART; + return ide_wait_taskfile(drive, &taskfile, &hobfile, NULL); +} + +static int get_smart_values(ide_drive_t *drive, byte *buf) +{ + struct hd_drive_task_hdr taskfile; + struct hd_drive_hob_hdr hobfile; + memset(&taskfile, 0, sizeof(struct hd_drive_task_hdr)); + memset(&hobfile, 0, sizeof(struct hd_drive_hob_hdr)); + taskfile.feature = SMART_READ_VALUES; + taskfile.sector_count = 0x01; + taskfile.low_cylinder = SMART_LCYL_PASS; + taskfile.high_cylinder = SMART_HCYL_PASS; + taskfile.command = WIN_SMART; + (void) smart_enable(drive); + return ide_wait_taskfile(drive, &taskfile, &hobfile, buf); +} + +static int get_smart_thresholds(ide_drive_t *drive, byte *buf) +{ + struct hd_drive_task_hdr taskfile; + struct hd_drive_hob_hdr hobfile; + memset(&taskfile, 0, sizeof(struct hd_drive_task_hdr)); + memset(&hobfile, 0, sizeof(struct hd_drive_hob_hdr)); + taskfile.feature = SMART_READ_THRESHOLDS; + taskfile.sector_count = 0x01; + taskfile.low_cylinder = SMART_LCYL_PASS; + taskfile.high_cylinder = SMART_HCYL_PASS; + taskfile.command = WIN_SMART; + (void) smart_enable(drive); + return ide_wait_taskfile(drive, &taskfile, &hobfile, buf); +} + +static int proc_idedisk_read_cache + (char *page, char **start, off_t off, int count, int *eof, void *data) +{ + ide_drive_t *drive = (ide_drive_t *) data; + char *out = page; + int len; + + if (drive->id) + len = sprintf(out,"%i\n", drive->id->buf_size / 2); + else + len = sprintf(out,"(none)\n"); + PROC_IDE_READ_RETURN(page,start,off,count,eof,len); +} + +static int proc_idedisk_read_smart_thresholds + (char *page, char **start, off_t off, int count, int *eof, void *data) +{ + ide_drive_t *drive = (ide_drive_t *)data; + int len = 0, i = 0; + + if (!get_smart_thresholds(drive, page)) { + unsigned short *val = (unsigned short *) page; + char *out = ((char *)val) + (SECTOR_WORDS * 4); + page = out; + do { + out += sprintf(out, "%04x%c", le16_to_cpu(*val), (++i & 7) ? ' ' : '\n'); + val += 1; + } while (i < (SECTOR_WORDS * 2)); + len = out - page; + } + PROC_IDE_READ_RETURN(page,start,off,count,eof,len); +} + +static int proc_idedisk_read_smart_values + (char *page, char **start, off_t off, int count, int *eof, void *data) +{ + ide_drive_t *drive = (ide_drive_t *)data; + int len = 0, i = 0; + + if (!get_smart_values(drive, page)) { + unsigned short *val = (unsigned short *) page; + char *out = ((char *)val) + (SECTOR_WORDS * 4); + page = out; + do { + out += sprintf(out, "%04x%c", le16_to_cpu(*val), (++i & 7) ? ' ' : '\n'); + val += 1; + } while (i < (SECTOR_WORDS * 2)); + len = out - page; + } + PROC_IDE_READ_RETURN(page,start,off,count,eof,len); +} + +static ide_proc_entry_t idedisk_proc[] = { + { "cache", S_IFREG|S_IRUGO, proc_idedisk_read_cache, NULL }, + { "geometry", S_IFREG|S_IRUGO, proc_ide_read_geometry, NULL }, + { "smart_values", S_IFREG|S_IRUSR, proc_idedisk_read_smart_values, NULL }, + { "smart_thresholds", S_IFREG|S_IRUSR, proc_idedisk_read_smart_thresholds, NULL }, + { NULL, 0, NULL, NULL } +}; + +#else + +#define idedisk_proc NULL + +#endif /* CONFIG_PROC_FS */ + +static int set_multcount(ide_drive_t *drive, int arg) +{ +#ifdef __TASKFILE__IO + struct hd_drive_task_hdr taskfile; + struct hd_drive_hob_hdr hobfile; + + if (drive->special.b.set_multmode) + return -EBUSY; + + memset(&taskfile, 0, sizeof(struct hd_drive_task_hdr)); + memset(&hobfile, 0, sizeof(struct hd_drive_hob_hdr)); + taskfile.sector_count = drive->mult_req; + taskfile.command = WIN_SETMULT; + drive->mult_req = arg; + drive->special.b.set_multmode = 1; + ide_wait_taskfile(drive, &taskfile, &hobfile, NULL); +#else /* !__TASKFILE__IO */ + struct request rq; + + if (drive->special.b.set_multmode) + return -EBUSY; + ide_init_drive_cmd (&rq); + rq.cmd = IDE_DRIVE_CMD; + drive->mult_req = arg; + drive->special.b.set_multmode = 1; + (void) ide_do_drive_cmd (drive, &rq, ide_wait); +#endif /* __TASKFILE__IO */ + return (drive->mult_count == arg) ? 0 : -EIO; +} + +static int set_nowerr(ide_drive_t *drive, int arg) +{ + if (ide_spin_wait_hwgroup(drive)) + return -EBUSY; + drive->nowerr = arg; + drive->bad_wstat = arg ? BAD_R_STAT : BAD_W_STAT; + spin_unlock_irq(&io_request_lock); + return 0; +} + +static int write_cache (ide_drive_t *drive, int arg) +{ + struct hd_drive_task_hdr taskfile; + struct hd_drive_hob_hdr hobfile; + memset(&taskfile, 0, sizeof(struct hd_drive_task_hdr)); + memset(&hobfile, 0, sizeof(struct hd_drive_hob_hdr)); + taskfile.feature = (arg) ? SETFEATURES_EN_WCACHE : SETFEATURES_DIS_WCACHE; + taskfile.command = WIN_SETFEATURES; + + if (!(drive->id->cfs_enable_2 & 0x3000)) + return 1; + + (void) ide_wait_taskfile(drive, &taskfile, &hobfile, NULL); + drive->wcache = arg; + return 0; +} + +static int do_idedisk_standby (ide_drive_t *drive) +{ + struct hd_drive_task_hdr taskfile; + struct hd_drive_hob_hdr hobfile; + memset(&taskfile, 0, sizeof(struct hd_drive_task_hdr)); + memset(&hobfile, 0, sizeof(struct hd_drive_hob_hdr)); + taskfile.command = WIN_STANDBYNOW1; + return ide_wait_taskfile(drive, &taskfile, &hobfile, NULL); +} + +static int do_idedisk_flushcache (ide_drive_t *drive) +{ + struct hd_drive_task_hdr taskfile; + struct hd_drive_hob_hdr hobfile; + memset(&taskfile, 0, sizeof(struct hd_drive_task_hdr)); + memset(&hobfile, 0, sizeof(struct hd_drive_hob_hdr)); + if (drive->id->cfs_enable_2 & 0x2400) { + taskfile.command = WIN_FLUSH_CACHE_EXT; + } else { + taskfile.command = WIN_FLUSH_CACHE; + } + return ide_wait_taskfile(drive, &taskfile, &hobfile, NULL); +} + +static int set_acoustic (ide_drive_t *drive, int arg) +{ + struct hd_drive_task_hdr taskfile; + struct hd_drive_hob_hdr hobfile; + memset(&taskfile, 0, sizeof(struct hd_drive_task_hdr)); + memset(&hobfile, 0, sizeof(struct hd_drive_hob_hdr)); + + taskfile.feature = (arg)?SETFEATURES_EN_AAM:SETFEATURES_DIS_AAM; + taskfile.sector_count = arg; + + taskfile.command = WIN_SETFEATURES; + (void) ide_wait_taskfile(drive, &taskfile, &hobfile, NULL); + drive->acoustic = arg; + return 0; +} + +static int probe_lba_addressing (ide_drive_t *drive, int arg) +{ + drive->addressing = 0; + + if (!(drive->id->cfs_enable_2 & 0x0400)) + return -EIO; + + drive->addressing = arg; + return 0; +} + +static int set_lba_addressing (ide_drive_t *drive, int arg) +{ + return (probe_lba_addressing(drive, arg)); +} + +static void idedisk_add_settings(ide_drive_t *drive) +{ + struct hd_driveid *id = drive->id; +#if 0 + int major = HWIF(drive)->major; + int minor = drive->select.b.unit << PARTN_BITS; +#endif + + ide_add_setting(drive, "bios_cyl", SETTING_RW, -1, -1, TYPE_INT, 0, 65535, 1, 1, &drive->bios_cyl, NULL); + ide_add_setting(drive, "bios_head", SETTING_RW, -1, -1, TYPE_BYTE, 0, 255, 1, 1, &drive->bios_head, NULL); + ide_add_setting(drive, "bios_sect", SETTING_RW, -1, -1, TYPE_BYTE, 0, 63, 1, 1, &drive->bios_sect, NULL); + ide_add_setting(drive, "address", SETTING_RW, HDIO_GET_ADDRESS, HDIO_SET_ADDRESS, TYPE_INTA, 0, 2, 1, 1, &drive->addressing, set_lba_addressing); + ide_add_setting(drive, "bswap", SETTING_READ, -1, -1, TYPE_BYTE, 0, 1, 1, 1, &drive->bswap, NULL); + ide_add_setting(drive, "multcount", id ? SETTING_RW : SETTING_READ, HDIO_GET_MULTCOUNT, HDIO_SET_MULTCOUNT, TYPE_BYTE, 0, id ? id->max_multsect : 0, 1, 1, &drive->mult_count, set_multcount); + ide_add_setting(drive, "nowerr", SETTING_RW, HDIO_GET_NOWERR, HDIO_SET_NOWERR, TYPE_BYTE, 0, 1, 1, 1, &drive->nowerr, set_nowerr); +#if 0 + ide_add_setting(drive, "breada_readahead", SETTING_RW, BLKRAGET, BLKRASET, TYPE_INT, 0, 255, 1, 1, &read_ahead[major], NULL); + ide_add_setting(drive, "file_readahead", SETTING_RW, BLKFRAGET, BLKFRASET, TYPE_INTA, 0, 4096, PAGE_SIZE, 1024, &max_readahead[major][minor], NULL); + ide_add_setting(drive, "max_kb_per_request", SETTING_RW, BLKSECTGET, BLKSECTSET, TYPE_INTA, 1, 255, 1, 1, &max_sectors[major][minor], NULL); +#endif + ide_add_setting(drive, "lun", SETTING_RW, -1, -1, TYPE_INT, 0, 7, 1, 1, &drive->lun, NULL); + ide_add_setting(drive, "wcache", SETTING_RW, HDIO_GET_WCACHE, HDIO_SET_WCACHE, TYPE_BYTE, 0, 1, 1, 1, &drive->wcache, write_cache); + ide_add_setting(drive, "acoustic", SETTING_RW, HDIO_GET_ACOUSTIC, HDIO_SET_ACOUSTIC, TYPE_BYTE, 0, 254, 1, 1, &drive->acoustic, set_acoustic); + ide_add_setting(drive, "failures", SETTING_RW, -1, -1, TYPE_INT, 0, 65535, 1, 1, &drive->failures, NULL); + ide_add_setting(drive, "max_failures", SETTING_RW, -1, -1, TYPE_INT, 0, 65535, 1, 1, &drive->max_failures, NULL); +} + +static void idedisk_setup (ide_drive_t *drive) +{ + int i; + + struct hd_driveid *id = drive->id; + unsigned long capacity; + + idedisk_add_settings(drive); + + if (id == NULL) + return; + + /* + * CompactFlash cards and their brethern look just like hard drives + * to us, but they are removable and don't have a doorlock mechanism. + */ + if (drive->removable && !drive_is_flashcard(drive)) { + /* + * Removable disks (eg. SYQUEST); ignore 'WD' drives + */ + if (id->model[0] != 'W' || id->model[1] != 'D') { + drive->doorlocking = 1; + } + } + for (i = 0; i < MAX_DRIVES; ++i) { + ide_hwif_t *hwif = HWIF(drive); + + if (drive != &hwif->drives[i]) continue; +#ifdef DEVFS_MUST_DIE + hwif->gd->de_arr[i] = drive->de; +#endif + if (drive->removable) + hwif->gd->flags[i] |= GENHD_FL_REMOVABLE; + break; + } + + /* Extract geometry if we did not already have one for the drive */ + if (!drive->cyl || !drive->head || !drive->sect) { + drive->cyl = drive->bios_cyl = id->cyls; + drive->head = drive->bios_head = id->heads; + drive->sect = drive->bios_sect = id->sectors; + } + + /* Handle logical geometry translation by the drive */ + if ((id->field_valid & 1) && id->cur_cyls && + id->cur_heads && (id->cur_heads <= 16) && id->cur_sectors) { + drive->cyl = id->cur_cyls; + drive->head = id->cur_heads; + drive->sect = id->cur_sectors; + } + + /* Use physical geometry if what we have still makes no sense */ + if (drive->head > 16 && id->heads && id->heads <= 16) { + drive->cyl = id->cyls; + drive->head = id->heads; + drive->sect = id->sectors; + } + + /* calculate drive capacity, and select LBA if possible */ + init_idedisk_capacity (drive); + + /* + * if possible, give fdisk access to more of the drive, + * by correcting bios_cyls: + */ + capacity = idedisk_capacity (drive); + if ((capacity >= (drive->bios_cyl * drive->bios_sect * drive->bios_head)) && + (!drive->forced_geom) && drive->bios_sect && drive->bios_head) + drive->bios_cyl = (capacity / drive->bios_sect) / drive->bios_head; + printk (KERN_INFO "XEN %s: %ld sectors", drive->name, capacity); + + /* Give size in megabytes (MB), not mebibytes (MiB). */ + /* We compute the exact rounded value, avoiding overflow. */ + printk (" (%ld MB)", (capacity - capacity/625 + 974)/1950); + + /* Only print cache size when it was specified */ + if (id->buf_size) + printk (" w/%dKiB Cache", id->buf_size/2); + + printk(", CHS=%d/%d/%d", + drive->bios_cyl, drive->bios_head, drive->bios_sect); +#ifdef CONFIG_BLK_DEV_IDEDMA + if (drive->using_dma) + (void) HWIF(drive)->dmaproc(ide_dma_verbose, drive); +#endif /* CONFIG_BLK_DEV_IDEDMA */ + printk("\n"); + + drive->mult_count = 0; + if (id->max_multsect) { +#ifdef CONFIG_IDEDISK_MULTI_MODE + id->multsect = ((id->max_multsect/2) > 1) ? id->max_multsect : 0; + id->multsect_valid = id->multsect ? 1 : 0; + drive->mult_req = id->multsect_valid ? id->max_multsect : INITIAL_MULT_COUNT; + drive->special.b.set_multmode = drive->mult_req ? 1 : 0; +#else /* original, pre IDE-NFG, per request of AC */ + drive->mult_req = INITIAL_MULT_COUNT; + if (drive->mult_req > id->max_multsect) + drive->mult_req = id->max_multsect; + if (drive->mult_req || ((id->multsect_valid & 1) && id->multsect)) + drive->special.b.set_multmode = 1; +#endif /* CONFIG_IDEDISK_MULTI_MODE */ + } + drive->no_io_32bit = id->dword_io ? 1 : 0; + if (drive->id->cfs_enable_2 & 0x3000) + write_cache(drive, (id->cfs_enable_2 & 0x3000)); + (void) probe_lba_addressing(drive, 1); +} + +static int idedisk_cleanup (ide_drive_t *drive) +{ + if ((drive->id->cfs_enable_2 & 0x3000) && drive->wcache) + if (do_idedisk_flushcache(drive)) + printk (KERN_INFO "%s: Write Cache FAILED Flushing!\n", + drive->name); + return ide_unregister_subdriver(drive); +} + +int idedisk_reinit(ide_drive_t *drive); + +/* + * IDE subdriver functions, registered with ide.c + */ +static ide_driver_t idedisk_driver = { + name: "ide-disk", + version: IDEDISK_VERSION, + media: ide_disk, + busy: 0, + supports_dma: 1, + supports_dsc_overlap: 0, + cleanup: idedisk_cleanup, + standby: do_idedisk_standby, + flushcache: do_idedisk_flushcache, + do_request: do_rw_disk, + end_request: NULL, + ioctl: NULL, + open: idedisk_open, + release: idedisk_release, + media_change: idedisk_media_change, + revalidate: idedisk_revalidate, + pre_reset: idedisk_pre_reset, + capacity: idedisk_capacity, + special: idedisk_special, + /*proc: idedisk_proc,*/ + reinit: idedisk_reinit, + ata_prebuilder: NULL, + atapi_prebuilder: NULL, +}; + +int idedisk_init (void); +static ide_module_t idedisk_module = { + IDE_DRIVER_MODULE, + idedisk_init, + &idedisk_driver, + NULL +}; + +MODULE_DESCRIPTION("ATA DISK Driver"); + +int idedisk_reinit (ide_drive_t *drive) +{ + int failed = 0; + + MOD_INC_USE_COUNT; + + if (ide_register_subdriver (drive, &idedisk_driver, IDE_SUBDRIVER_VERSION)) { + printk (KERN_ERR "ide-disk: %s: Failed to register the driver with ide.c\n", drive->name); + return 1; + } + DRIVER(drive)->busy++; + idedisk_setup(drive); + if ((!drive->head || drive->head > 16) && !drive->select.b.lba) { + printk(KERN_ERR "%s: INVALID GEOMETRY: %d PHYSICAL HEADS?\n", drive->name, drive->head); + (void) idedisk_cleanup(drive); + DRIVER(drive)->busy--; + return 1; + } + DRIVER(drive)->busy--; + failed--; + + ide_register_module(&idedisk_module); + MOD_DEC_USE_COUNT; + return 0; +} + +static void __exit idedisk_exit (void) +{ + ide_drive_t *drive; + int failed = 0; + + while ((drive = ide_scan_devices (ide_disk, idedisk_driver.name, &idedisk_driver, failed)) != NULL) { + if (idedisk_cleanup (drive)) { + printk (KERN_ERR "%s: cleanup_module() called while still busy\n", drive->name); + failed++; + } + /* We must remove proc entries defined in this module. + Otherwise we oops while accessing these entries */ +#ifdef CONFIG_PROC_FS + if (drive->proc) + ide_remove_proc_entries(drive->proc, idedisk_proc); +#endif + } + ide_unregister_module(&idedisk_module); +} + +int idedisk_init (void) +{ + ide_drive_t *drive; + int failed = 0; + + MOD_INC_USE_COUNT; + while ((drive = ide_scan_devices (ide_disk, idedisk_driver.name, NULL, failed++)) != NULL) { + if (ide_register_subdriver (drive, &idedisk_driver, IDE_SUBDRIVER_VERSION)) { + printk (KERN_ERR "ide-disk: %s: Failed to register the driver with ide.c\n", drive->name); + continue; + } + DRIVER(drive)->busy++; + idedisk_setup(drive); + if ((!drive->head || drive->head > 16) && !drive->select.b.lba) { + printk(KERN_ERR "%s: INVALID GEOMETRY: %d PHYSICAL HEADS?\n", drive->name, drive->head); + (void) idedisk_cleanup(drive); + DRIVER(drive)->busy--; + continue; + } + DRIVER(drive)->busy--; + failed--; + } + ide_register_module(&idedisk_module); + MOD_DEC_USE_COUNT; + return 0; +} + +module_init(idedisk_init); +module_exit(idedisk_exit); +MODULE_LICENSE("GPL"); diff --git a/xen-2.4.16/drivers/ide/ide-dma.c b/xen-2.4.16/drivers/ide/ide-dma.c new file mode 100644 index 0000000000..f0cd6cc286 --- /dev/null +++ b/xen-2.4.16/drivers/ide/ide-dma.c @@ -0,0 +1,867 @@ +/* + * linux/drivers/ide/ide-dma.c Version 4.10 June 9, 2000 + * + * Copyright (c) 1999-2000 Andre Hedrick + * May be copied or modified under the terms of the GNU General Public License + */ + +/* + * Special Thanks to Mark for his Six years of work. + * + * Copyright (c) 1995-1998 Mark Lord + * May be copied or modified under the terms of the GNU General Public License + */ + +/* + * This module provides support for the bus-master IDE DMA functions + * of various PCI chipsets, including the Intel PIIX (i82371FB for + * the 430 FX chipset), the PIIX3 (i82371SB for the 430 HX/VX and + * 440 chipsets), and the PIIX4 (i82371AB for the 430 TX chipset) + * ("PIIX" stands for "PCI ISA IDE Xcellerator"). + * + * Pretty much the same code works for other IDE PCI bus-mastering chipsets. + * + * DMA is supported for all IDE devices (disk drives, cdroms, tapes, floppies). + * + * By default, DMA support is prepared for use, but is currently enabled only + * for drives which already have DMA enabled (UltraDMA or mode 2 multi/single), + * or which are recognized as "good" (see table below). Drives with only mode0 + * or mode1 (multi/single) DMA should also work with this chipset/driver + * (eg. MC2112A) but are not enabled by default. + * + * Use "hdparm -i" to view modes supported by a given drive. + * + * The hdparm-3.5 (or later) utility can be used for manually enabling/disabling + * DMA support, but must be (re-)compiled against this kernel version or later. + * + * To enable DMA, use "hdparm -d1 /dev/hd?" on a per-drive basis after booting. + * If problems arise, ide.c will disable DMA operation after a few retries. + * This error recovery mechanism works and has been extremely well exercised. + * + * IDE drives, depending on their vintage, may support several different modes + * of DMA operation. The boot-time modes are indicated with a "*" in + * the "hdparm -i" listing, and can be changed with *knowledgeable* use of + * the "hdparm -X" feature. There is seldom a need to do this, as drives + * normally power-up with their "best" PIO/DMA modes enabled. + * + * Testing has been done with a rather extensive number of drives, + * with Quantum & Western Digital models generally outperforming the pack, + * and Fujitsu & Conner (and some Seagate which are really Conner) drives + * showing more lackluster throughput. + * + * Keep an eye on /var/adm/messages for "DMA disabled" messages. + * + * Some people have reported trouble with Intel Zappa motherboards. + * This can be fixed by upgrading the AMI BIOS to version 1.00.04.BS0, + * available from ftp://ftp.intel.com/pub/bios/10004bs0.exe + * (thanks to Glen Morrell for researching this). + * + * Thanks to "Christopher J. Reimer" for + * fixing the problem with the BIOS on some Acer motherboards. + * + * Thanks to "Benoit Poulot-Cazajous" for testing + * "TX" chipset compatibility and for providing patches for the "TX" chipset. + * + * Thanks to Christian Brunner for taking a good first crack + * at generic DMA -- his patches were referred to when preparing this code. + * + * Most importantly, thanks to Robert Bringman + * for supplying a Promise UDMA board & WD UDMA drive for this work! + * + * And, yes, Intel Zappa boards really *do* use both PIIX IDE ports. + * + * check_drive_lists(ide_drive_t *drive, int good_bad) + * + * ATA-66/100 and recovery functions, I forgot the rest...... + * SELECT_READ_WRITE(hwif,drive,func) for active tuning based on IO direction. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +/* + * Long lost data from 2.0.34 that is now in 2.0.39 + * + * This was used in ./drivers/block/triton.c to do DMA Base address setup + * when PnP failed. Oh the things we forget. I believe this was part + * of SFF-8038i that has been withdrawn from public access... :-(( + */ +#define DEFAULT_BMIBA 0xe800 /* in case BIOS did not init it */ +#define DEFAULT_BMCRBA 0xcc00 /* VIA's default value */ +#define DEFAULT_BMALIBA 0xd400 /* ALI's default value */ + +extern char *ide_dmafunc_verbose(ide_dma_action_t dmafunc); + +#ifdef CONFIG_IDEDMA_NEW_DRIVE_LISTINGS + +struct drive_list_entry { + char * id_model; + char * id_firmware; +}; + +struct drive_list_entry drive_whitelist [] = { + + { "Micropolis 2112A" , "ALL" }, + { "CONNER CTMA 4000" , "ALL" }, + { "CONNER CTT8000-A" , "ALL" }, + { "ST34342A" , "ALL" }, + { 0 , 0 } +}; + +struct drive_list_entry drive_blacklist [] = { + + { "WDC AC11000H" , "ALL" }, + { "WDC AC22100H" , "ALL" }, + { "WDC AC31000H" , "ALL" }, + { "WDC AC32500H" , "ALL" }, + { "WDC AC33100H" , "ALL" }, + { "WDC AC31600H" , "ALL" }, + { "WDC AC32100H" , "24.09P07" }, + { "WDC AC23200L" , "21.10N21" }, + { "Compaq CRD-8241B" , "ALL" }, + { "CRD-8400B" , "ALL" }, + { "CRD-8480B", "ALL" }, + { "CRD-8480C", "ALL" }, + { "CRD-8482B", "ALL" }, + { "CRD-84" , "ALL" }, + { "SanDisk SDP3B" , "ALL" }, + { "SanDisk SDP3B-64" , "ALL" }, + { "SANYO CD-ROM CRD" , "ALL" }, + { "HITACHI CDR-8" , "ALL" }, + { "HITACHI CDR-8335" , "ALL" }, + { "HITACHI CDR-8435" , "ALL" }, + { "Toshiba CD-ROM XM-6202B" , "ALL" }, + { "CD-532E-A" , "ALL" }, + { "E-IDE CD-ROM CR-840", "ALL" }, + { "CD-ROM Drive/F5A", "ALL" }, + { "RICOH CD-R/RW MP7083A", "ALL" }, + { "WPI CDD-820", "ALL" }, + { "SAMSUNG CD-ROM SC-148C", "ALL" }, + { "SAMSUNG CD-ROM SC-148F", "ALL" }, + { "SAMSUNG CD-ROM SC", "ALL" }, + { "SanDisk SDP3B-64" , "ALL" }, + { "SAMSUNG CD-ROM SN-124", "ALL" }, + { "PLEXTOR CD-R PX-W8432T", "ALL" }, + { "ATAPI CD-ROM DRIVE 40X MAXIMUM", "ALL" }, + { "_NEC DV5800A", "ALL" }, + { 0 , 0 } + +}; + +int in_drive_list(struct hd_driveid *id, struct drive_list_entry * drive_table) +{ + for ( ; drive_table->id_model ; drive_table++) + if ((!strcmp(drive_table->id_model, id->model)) && + ((!strstr(drive_table->id_firmware, id->fw_rev)) || + (!strcmp(drive_table->id_firmware, "ALL")))) + return 1; + return 0; +} + +#else /* !CONFIG_IDEDMA_NEW_DRIVE_LISTINGS */ + +/* + * good_dma_drives() lists the model names (from "hdparm -i") + * of drives which do not support mode2 DMA but which are + * known to work fine with this interface under Linux. + */ +const char *good_dma_drives[] = {"Micropolis 2112A", + "CONNER CTMA 4000", + "CONNER CTT8000-A", + "ST34342A", /* for Sun Ultra */ + NULL}; + +/* + * bad_dma_drives() lists the model names (from "hdparm -i") + * of drives which supposedly support (U)DMA but which are + * known to corrupt data with this interface under Linux. + * + * This is an empirical list. Its generated from bug reports. That means + * while it reflects actual problem distributions it doesn't answer whether + * the drive or the controller, or cabling, or software, or some combination + * thereof is the fault. If you don't happen to agree with the kernel's + * opinion of your drive - use hdparm to turn DMA on. + */ +const char *bad_dma_drives[] = {"WDC AC11000H", + "WDC AC22100H", + "WDC AC32100H", + "WDC AC32500H", + "WDC AC33100H", + "WDC AC31600H", + NULL}; + +#endif /* CONFIG_IDEDMA_NEW_DRIVE_LISTINGS */ + +/* + * Our Physical Region Descriptor (PRD) table should be large enough + * to handle the biggest I/O request we are likely to see. Since requests + * can have no more than 256 sectors, and since the typical blocksize is + * two or more sectors, we could get by with a limit of 128 entries here for + * the usual worst case. Most requests seem to include some contiguous blocks, + * further reducing the number of table entries required. + * + * The driver reverts to PIO mode for individual requests that exceed + * this limit (possible with 512 byte blocksizes, eg. MSDOS f/s), so handling + * 100% of all crazy scenarios here is not necessary. + * + * As it turns out though, we must allocate a full 4KB page for this, + * so the two PRD tables (ide0 & ide1) will each get half of that, + * allowing each to have about 256 entries (8 bytes each) from this. + */ +#define PRD_BYTES 8 +#define PRD_ENTRIES (PAGE_SIZE / (2 * PRD_BYTES)) + +/* + * dma_intr() is the handler for disk read/write DMA interrupts + */ +ide_startstop_t ide_dma_intr (ide_drive_t *drive) +{ + int i; + byte stat, dma_stat; + + dma_stat = HWIF(drive)->dmaproc(ide_dma_end, drive); + stat = GET_STAT(); /* get drive status */ + if (OK_STAT(stat,DRIVE_READY,drive->bad_wstat|DRQ_STAT)) { + if (!dma_stat) { + struct request *rq = HWGROUP(drive)->rq; + rq = HWGROUP(drive)->rq; + for (i = rq->nr_sectors; i > 0;) { + i -= rq->current_nr_sectors; + ide_end_request(1, HWGROUP(drive)); + } + return ide_stopped; + } + printk("%s: dma_intr: bad DMA status (dma_stat=%x)\n", + drive->name, dma_stat); + } + return ide_error(drive, "dma_intr", stat); +} + +static int ide_build_sglist (ide_hwif_t *hwif, struct request *rq) +{ + struct buffer_head *bh; + struct scatterlist *sg = hwif->sg_table; + int nents = 0; + + if (hwif->sg_dma_active) + BUG(); + + if (rq->cmd == READ) + hwif->sg_dma_direction = PCI_DMA_FROMDEVICE; + else + hwif->sg_dma_direction = PCI_DMA_TODEVICE; + bh = rq->bh; + do { + unsigned char *virt_addr = bh->b_data; + unsigned int size = bh->b_size; + + if (nents >= PRD_ENTRIES) + return 0; + + while ((bh = bh->b_reqnext) != NULL) { + if ((virt_addr + size) != (unsigned char *) bh->b_data) + break; + size += bh->b_size; + } + memset(&sg[nents], 0, sizeof(*sg)); + sg[nents].address = virt_addr; + sg[nents].length = size; + nents++; + } while (bh != NULL); + + return pci_map_sg(hwif->pci_dev, sg, nents, hwif->sg_dma_direction); +} + +static int ide_raw_build_sglist (ide_hwif_t *hwif, struct request *rq) +{ + struct scatterlist *sg = hwif->sg_table; + int nents = 0; + ide_task_t *args = rq->special; + unsigned char *virt_addr = rq->buffer; + int sector_count = rq->nr_sectors; + +// if ((args->tfRegister[IDE_COMMAND_OFFSET] == WIN_WRITEDMA) || +// (args->tfRegister[IDE_COMMAND_OFFSET] == WIN_WRITEDMA_EXT)) + if (args->command_type == IDE_DRIVE_TASK_RAW_WRITE) + hwif->sg_dma_direction = PCI_DMA_TODEVICE; + else + hwif->sg_dma_direction = PCI_DMA_FROMDEVICE; + + if (sector_count > 128) { + memset(&sg[nents], 0, sizeof(*sg)); + sg[nents].address = virt_addr; + sg[nents].length = 128 * SECTOR_SIZE; + nents++; + virt_addr = virt_addr + (128 * SECTOR_SIZE); + sector_count -= 128; + } + memset(&sg[nents], 0, sizeof(*sg)); + sg[nents].address = virt_addr; + sg[nents].length = sector_count * SECTOR_SIZE; + nents++; + + return pci_map_sg(hwif->pci_dev, sg, nents, hwif->sg_dma_direction); +} + +/* + * ide_build_dmatable() prepares a dma request. + * Returns 0 if all went okay, returns 1 otherwise. + * May also be invoked from trm290.c + */ +int ide_build_dmatable (ide_drive_t *drive, ide_dma_action_t func) +{ + unsigned int *table = HWIF(drive)->dmatable_cpu; +#ifdef CONFIG_BLK_DEV_TRM290 + unsigned int is_trm290_chipset = (HWIF(drive)->chipset == ide_trm290); +#else + const int is_trm290_chipset = 0; +#endif + unsigned int count = 0; + int i; + struct scatterlist *sg; + + if (HWGROUP(drive)->rq->cmd == IDE_DRIVE_TASKFILE) + HWIF(drive)->sg_nents = i = ide_raw_build_sglist(HWIF(drive), HWGROUP(drive)->rq); + else + HWIF(drive)->sg_nents = i = ide_build_sglist(HWIF(drive), HWGROUP(drive)->rq); + + if (!i) + return 0; + + sg = HWIF(drive)->sg_table; + while (i && sg_dma_len(sg)) { + u32 cur_addr; + u32 cur_len; + + cur_addr = sg_dma_address(sg); + cur_len = sg_dma_len(sg); + + /* + * Fill in the dma table, without crossing any 64kB boundaries. + * Most hardware requires 16-bit alignment of all blocks, + * but the trm290 requires 32-bit alignment. + */ + + while (cur_len) { + if (count++ >= PRD_ENTRIES) { + printk("%s: DMA table too small\n", drive->name); + goto use_pio_instead; + } else { + u32 xcount, bcount = 0x10000 - (cur_addr & 0xffff); + + if (bcount > cur_len) + bcount = cur_len; + *table++ = cpu_to_le32(cur_addr); + xcount = bcount & 0xffff; + if (is_trm290_chipset) + xcount = ((xcount >> 2) - 1) << 16; + if (xcount == 0x0000) { + /* + * Most chipsets correctly interpret a length of 0x0000 as 64KB, + * but at least one (e.g. CS5530) misinterprets it as zero (!). + * So here we break the 64KB entry into two 32KB entries instead. + */ + if (count++ >= PRD_ENTRIES) { + printk("%s: DMA table too small\n", drive->name); + goto use_pio_instead; + } + *table++ = cpu_to_le32(0x8000); + *table++ = cpu_to_le32(cur_addr + 0x8000); + xcount = 0x8000; + } + *table++ = cpu_to_le32(xcount); + cur_addr += bcount; + cur_len -= bcount; + } + } + + sg++; + i--; + } + + if (count) { + if (!is_trm290_chipset) + *--table |= cpu_to_le32(0x80000000); + return count; + } + printk("%s: empty DMA table?\n", drive->name); +use_pio_instead: + pci_unmap_sg(HWIF(drive)->pci_dev, + HWIF(drive)->sg_table, + HWIF(drive)->sg_nents, + HWIF(drive)->sg_dma_direction); + HWIF(drive)->sg_dma_active = 0; + return 0; /* revert to PIO for this request */ +} + +/* Teardown mappings after DMA has completed. */ +void ide_destroy_dmatable (ide_drive_t *drive) +{ + struct pci_dev *dev = HWIF(drive)->pci_dev; + struct scatterlist *sg = HWIF(drive)->sg_table; + int nents = HWIF(drive)->sg_nents; + + pci_unmap_sg(dev, sg, nents, HWIF(drive)->sg_dma_direction); + HWIF(drive)->sg_dma_active = 0; +} + +/* + * For both Blacklisted and Whitelisted drives. + * This is setup to be called as an extern for future support + * to other special driver code. + */ +int check_drive_lists (ide_drive_t *drive, int good_bad) +{ + struct hd_driveid *id = drive->id; + +#ifdef CONFIG_IDEDMA_NEW_DRIVE_LISTINGS + if (good_bad) { + return in_drive_list(id, drive_whitelist); + } else { + int blacklist = in_drive_list(id, drive_blacklist); + if (blacklist) + printk("%s: Disabling (U)DMA for %s\n", drive->name, id->model); + return(blacklist); + } +#else /* !CONFIG_IDEDMA_NEW_DRIVE_LISTINGS */ + const char **list; + + if (good_bad) { + /* Consult the list of known "good" drives */ + list = good_dma_drives; + while (*list) { + if (!strcmp(*list++,id->model)) + return 1; + } + } else { + /* Consult the list of known "bad" drives */ + list = bad_dma_drives; + while (*list) { + if (!strcmp(*list++,id->model)) { + printk("%s: Disabling (U)DMA for %s\n", + drive->name, id->model); + return 1; + } + } + } +#endif /* CONFIG_IDEDMA_NEW_DRIVE_LISTINGS */ + return 0; +} + +int report_drive_dmaing (ide_drive_t *drive) +{ + struct hd_driveid *id = drive->id; + + if ((id->field_valid & 4) && (eighty_ninty_three(drive)) && + (id->dma_ultra & (id->dma_ultra >> 14) & 3)) { + if ((id->dma_ultra >> 15) & 1) { + printk(", UDMA(mode 7)"); /* UDMA BIOS-enabled! */ + } else { + printk(", UDMA(133)"); /* UDMA BIOS-enabled! */ + } + } else if ((id->field_valid & 4) && (eighty_ninty_three(drive)) && + (id->dma_ultra & (id->dma_ultra >> 11) & 7)) { + if ((id->dma_ultra >> 13) & 1) { + printk(", UDMA(100)"); /* UDMA BIOS-enabled! */ + } else if ((id->dma_ultra >> 12) & 1) { + printk(", UDMA(66)"); /* UDMA BIOS-enabled! */ + } else { + printk(", UDMA(44)"); /* UDMA BIOS-enabled! */ + } + } else if ((id->field_valid & 4) && + (id->dma_ultra & (id->dma_ultra >> 8) & 7)) { + if ((id->dma_ultra >> 10) & 1) { + printk(", UDMA(33)"); /* UDMA BIOS-enabled! */ + } else if ((id->dma_ultra >> 9) & 1) { + printk(", UDMA(25)"); /* UDMA BIOS-enabled! */ + } else { + printk(", UDMA(16)"); /* UDMA BIOS-enabled! */ + } + } else if (id->field_valid & 4) { + printk(", (U)DMA"); /* Can be BIOS-enabled! */ + } else { + printk(", DMA"); + } + return 1; +} + +static int config_drive_for_dma (ide_drive_t *drive) +{ + int config_allows_dma = 1; + struct hd_driveid *id = drive->id; + ide_hwif_t *hwif = HWIF(drive); + +#ifdef CONFIG_IDEDMA_ONLYDISK + if (drive->media != ide_disk) + config_allows_dma = 0; +#endif + + if (id && (id->capability & 1) && hwif->autodma && config_allows_dma) { + /* Consult the list of known "bad" drives */ + if (ide_dmaproc(ide_dma_bad_drive, drive)) + return hwif->dmaproc(ide_dma_off, drive); + + /* Enable DMA on any drive that has UltraDMA (mode 6/7/?) enabled */ + if ((id->field_valid & 4) && (eighty_ninty_three(drive))) + if ((id->dma_ultra & (id->dma_ultra >> 14) & 2)) + return hwif->dmaproc(ide_dma_on, drive); + /* Enable DMA on any drive that has UltraDMA (mode 3/4/5) enabled */ + if ((id->field_valid & 4) && (eighty_ninty_three(drive))) + if ((id->dma_ultra & (id->dma_ultra >> 11) & 7)) + return hwif->dmaproc(ide_dma_on, drive); + /* Enable DMA on any drive that has UltraDMA (mode 0/1/2) enabled */ + if (id->field_valid & 4) /* UltraDMA */ + if ((id->dma_ultra & (id->dma_ultra >> 8) & 7)) + return hwif->dmaproc(ide_dma_on, drive); + /* Enable DMA on any drive that has mode2 DMA (multi or single) enabled */ + if (id->field_valid & 2) /* regular DMA */ + if ((id->dma_mword & 0x404) == 0x404 || (id->dma_1word & 0x404) == 0x404) + return hwif->dmaproc(ide_dma_on, drive); + /* Consult the list of known "good" drives */ + if (ide_dmaproc(ide_dma_good_drive, drive)) + return hwif->dmaproc(ide_dma_on, drive); + } + return hwif->dmaproc(ide_dma_off_quietly, drive); +} + +#ifndef CONFIG_BLK_DEV_IDEDMA_TIMEOUT +/* + * 1 dmaing, 2 error, 4 intr + */ +static int dma_timer_expiry (ide_drive_t *drive) +{ + byte dma_stat = inb(HWIF(drive)->dma_base+2); + +#ifdef DEBUG + printk("%s: dma_timer_expiry: dma status == 0x%02x\n", drive->name, dma_stat); +#endif /* DEBUG */ + +#if 0 + HWGROUP(drive)->expiry = NULL; /* one free ride for now */ +#endif + + if (dma_stat & 2) { /* ERROR */ + byte stat = GET_STAT(); + return ide_error(drive, "dma_timer_expiry", stat); + } + if (dma_stat & 1) /* DMAing */ + return WAIT_CMD; + return 0; +} +#else /* CONFIG_BLK_DEV_IDEDMA_TIMEOUT */ +static ide_startstop_t ide_dma_timeout_revovery (ide_drive_t *drive) +{ + ide_hwgroup_t *hwgroup = HWGROUP(drive); + ide_hwif_t *hwif = HWIF(drive); + int enable_dma = drive->using_dma; + unsigned long flags; + ide_startstop_t startstop; + + spin_lock_irqsave(&io_request_lock, flags); + hwgroup->handler = NULL; + del_timer(&hwgroup->timer); + spin_unlock_irqrestore(&io_request_lock, flags); + + drive->waiting_for_dma = 0; + + startstop = ide_do_reset(drive); + + if ((enable_dma) && !(drive->using_dma)) + (void) hwif->dmaproc(ide_dma_on, drive); + + return startstop; +} +#endif /* CONFIG_BLK_DEV_IDEDMA_TIMEOUT */ + +/* + * ide_dmaproc() initiates/aborts DMA read/write operations on a drive. + * + * The caller is assumed to have selected the drive and programmed the drive's + * sector address using CHS or LBA. All that remains is to prepare for DMA + * and then issue the actual read/write DMA/PIO command to the drive. + * + * For ATAPI devices, we just prepare for DMA and return. The caller should + * then issue the packet command to the drive and call us again with + * ide_dma_begin afterwards. + * + * Returns 0 if all went well. + * Returns 1 if DMA read/write could not be started, in which case + * the caller should revert to PIO for the current request. + * May also be invoked from trm290.c + */ +int ide_dmaproc (ide_dma_action_t func, ide_drive_t *drive) +{ +// ide_hwgroup_t *hwgroup = HWGROUP(drive); + ide_hwif_t *hwif = HWIF(drive); + unsigned long dma_base = hwif->dma_base; + byte unit = (drive->select.b.unit & 0x01); + unsigned int count, reading = 0; + byte dma_stat; + + switch (func) { + case ide_dma_off: + printk("%s: DMA disabled\n", drive->name); + case ide_dma_off_quietly: + outb(inb(dma_base+2) & ~(1<<(5+unit)), dma_base+2); + case ide_dma_on: + drive->using_dma = (func == ide_dma_on); + if (drive->using_dma) + outb(inb(dma_base+2)|(1<<(5+unit)), dma_base+2); + return 0; + case ide_dma_check: + return config_drive_for_dma (drive); + case ide_dma_read: + reading = 1 << 3; + case ide_dma_write: + SELECT_READ_WRITE(hwif,drive,func); + if (!(count = ide_build_dmatable(drive, func))) + return 1; /* try PIO instead of DMA */ + outl(hwif->dmatable_dma, dma_base + 4); /* PRD table */ + outb(reading, dma_base); /* specify r/w */ + outb(inb(dma_base+2)|6, dma_base+2); /* clear INTR & ERROR flags */ + drive->waiting_for_dma = 1; + if (drive->media != ide_disk) + return 0; +#ifdef CONFIG_BLK_DEV_IDEDMA_TIMEOUT + ide_set_handler(drive, &ide_dma_intr, 2*WAIT_CMD, NULL); /* issue cmd to drive */ +#else /* !CONFIG_BLK_DEV_IDEDMA_TIMEOUT */ + ide_set_handler(drive, &ide_dma_intr, WAIT_CMD, dma_timer_expiry); /* issue cmd to drive */ +#endif /* CONFIG_BLK_DEV_IDEDMA_TIMEOUT */ + if ((HWGROUP(drive)->rq->cmd == IDE_DRIVE_TASKFILE) && + (drive->addressing == 1)) { + ide_task_t *args = HWGROUP(drive)->rq->special; + OUT_BYTE(args->tfRegister[IDE_COMMAND_OFFSET], IDE_COMMAND_REG); + } else if (drive->addressing) { + OUT_BYTE(reading ? WIN_READDMA_EXT : WIN_WRITEDMA_EXT, IDE_COMMAND_REG); + } else { + OUT_BYTE(reading ? WIN_READDMA : WIN_WRITEDMA, IDE_COMMAND_REG); + } + return HWIF(drive)->dmaproc(ide_dma_begin, drive); + case ide_dma_begin: + /* Note that this is done *after* the cmd has + * been issued to the drive, as per the BM-IDE spec. + * The Promise Ultra33 doesn't work correctly when + * we do this part before issuing the drive cmd. + */ + outb(inb(dma_base)|1, dma_base); /* start DMA */ + return 0; + case ide_dma_end: /* returns 1 on error, 0 otherwise */ + drive->waiting_for_dma = 0; + outb(inb(dma_base)&~1, dma_base); /* stop DMA */ + dma_stat = inb(dma_base+2); /* get DMA status */ + outb(dma_stat|6, dma_base+2); /* clear the INTR & ERROR bits */ + ide_destroy_dmatable(drive); /* purge DMA mappings */ + return (dma_stat & 7) != 4 ? (0x10 | dma_stat) : 0; /* verify good DMA status */ + case ide_dma_test_irq: /* returns 1 if dma irq issued, 0 otherwise */ + dma_stat = inb(dma_base+2); +#if 0 /* do not set unless you know what you are doing */ + if (dma_stat & 4) { + byte stat = GET_STAT(); + outb(dma_base+2, dma_stat & 0xE4); + } +#endif + return (dma_stat & 4) == 4; /* return 1 if INTR asserted */ + case ide_dma_bad_drive: + case ide_dma_good_drive: + return check_drive_lists(drive, (func == ide_dma_good_drive)); + case ide_dma_verbose: + return report_drive_dmaing(drive); + case ide_dma_timeout: + // FIXME: Many IDE chipsets do not permit command file register access + // FIXME: while the bus-master function is still active. + // FIXME: To prevent deadlock with those chipsets, we must be extremely + // FIXME: careful here (and in ide_intr() as well) to NOT access any + // FIXME: registers from the 0x1Fx/0x17x sets before terminating the + // FIXME: bus-master operation via the bus-master control reg. + // FIXME: Otherwise, chipset deadlock will occur, and some systems will + // FIXME: lock up completely!! +#ifdef CONFIG_BLK_DEV_IDEDMA_TIMEOUT + /* + * Have to issue an abort and requeue the request + * DMA engine got turned off by a goofy ASIC, and + * we have to clean up the mess, and here is as good + * as any. Do it globally for all chipsets. + */ + outb(0x00, dma_base); /* stop DMA */ + dma_stat = inb(dma_base+2); /* get DMA status */ + outb(dma_stat|6, dma_base+2); /* clear the INTR & ERROR bits */ + printk("%s: %s: Lets do it again!" \ + "stat = 0x%02x, dma_stat = 0x%02x\n", + drive->name, ide_dmafunc_verbose(func), + GET_STAT(), dma_stat); + + if (dma_stat & 0xF0) + return ide_dma_timeout_revovery(drive); + + printk("%s: %s: (restart_request) Lets do it again!" \ + "stat = 0x%02x, dma_stat = 0x%02x\n", + drive->name, ide_dmafunc_verbose(func), + GET_STAT(), dma_stat); + + return restart_request(drive); // BUG: return types do not match!! +//#else +// return HWGROUP(drive)->handler(drive); +#endif /* CONFIG_BLK_DEV_IDEDMA_TIMEOUT */ + case ide_dma_retune: + case ide_dma_lostirq: + printk("ide_dmaproc: chipset supported %s func only: %d\n", ide_dmafunc_verbose(func), func); + return 1; + default: + printk("ide_dmaproc: unsupported %s func: %d\n", ide_dmafunc_verbose(func), func); + return 1; + } +} + +/* + * Needed for allowing full modular support of ide-driver + */ +int ide_release_dma (ide_hwif_t *hwif) +{ + if (hwif->dmatable_cpu) { + pci_free_consistent(hwif->pci_dev, + PRD_ENTRIES * PRD_BYTES, + hwif->dmatable_cpu, + hwif->dmatable_dma); + hwif->dmatable_cpu = NULL; + } + if (hwif->sg_table) { + kfree(hwif->sg_table); + hwif->sg_table = NULL; + } + if ((hwif->dma_extra) && (hwif->channel == 0)) + release_region((hwif->dma_base + 16), hwif->dma_extra); + release_region(hwif->dma_base, 8); + return 1; +} + +/* + * This can be called for a dynamically installed interface. Don't __init it + */ + +void ide_setup_dma (ide_hwif_t *hwif, unsigned long dma_base, unsigned int num_ports) +{ + printk(" %s: BM-DMA at 0x%04lx-0x%04lx", hwif->name, dma_base, dma_base + num_ports - 1); + if (check_region(dma_base, num_ports)) { + printk(" -- ERROR, PORT ADDRESSES ALREADY IN USE\n"); + return; + } + request_region(dma_base, num_ports, hwif->name); + hwif->dma_base = dma_base; + hwif->dmatable_cpu = pci_alloc_consistent(hwif->pci_dev, + PRD_ENTRIES * PRD_BYTES, + &hwif->dmatable_dma); + if (hwif->dmatable_cpu == NULL) + goto dma_alloc_failure; + + hwif->sg_table = kmalloc(sizeof(struct scatterlist) * PRD_ENTRIES, + GFP_KERNEL); + if (hwif->sg_table == NULL) { + pci_free_consistent(hwif->pci_dev, PRD_ENTRIES * PRD_BYTES, + hwif->dmatable_cpu, hwif->dmatable_dma); + goto dma_alloc_failure; + } + + hwif->dmaproc = &ide_dmaproc; + + if (hwif->chipset != ide_trm290) { + byte dma_stat = inb(dma_base+2); + printk(", BIOS settings: %s:%s, %s:%s", + hwif->drives[0].name, (dma_stat & 0x20) ? "DMA" : "pio", + hwif->drives[1].name, (dma_stat & 0x40) ? "DMA" : "pio"); + } + printk("\n"); + return; + +dma_alloc_failure: + printk(" -- ERROR, UNABLE TO ALLOCATE DMA TABLES\n"); +} + +/* + * Fetch the DMA Bus-Master-I/O-Base-Address (BMIBA) from PCI space: + */ +unsigned long __init ide_get_or_set_dma_base (ide_hwif_t *hwif, int extra, const char *name) +{ + unsigned long dma_base = 0; + struct pci_dev *dev = hwif->pci_dev; + +#ifdef CONFIG_BLK_DEV_IDEDMA_FORCED + int second_chance = 0; + +second_chance_to_dma: +#endif /* CONFIG_BLK_DEV_IDEDMA_FORCED */ + + if (hwif->mate && hwif->mate->dma_base) { + dma_base = hwif->mate->dma_base - (hwif->channel ? 0 : 8); + } else { + dma_base = pci_resource_start(dev, 4); + if (!dma_base) { + printk("%s: dma_base is invalid (0x%04lx)\n", name, dma_base); + dma_base = 0; + } + } + +#ifdef CONFIG_BLK_DEV_IDEDMA_FORCED + if ((!dma_base) && (!second_chance)) { + unsigned long set_bmiba = 0; + second_chance++; + switch(dev->vendor) { + case PCI_VENDOR_ID_AL: + set_bmiba = DEFAULT_BMALIBA; break; + case PCI_VENDOR_ID_VIA: + set_bmiba = DEFAULT_BMCRBA; break; + case PCI_VENDOR_ID_INTEL: + set_bmiba = DEFAULT_BMIBA; break; + default: + return dma_base; + } + pci_write_config_dword(dev, 0x20, set_bmiba|1); + goto second_chance_to_dma; + } +#endif /* CONFIG_BLK_DEV_IDEDMA_FORCED */ + + if (dma_base) { + if (extra) /* PDC20246, PDC20262, HPT343, & HPT366 */ + request_region(dma_base+16, extra, name); + dma_base += hwif->channel ? 8 : 0; + hwif->dma_extra = extra; + + switch(dev->device) { + case PCI_DEVICE_ID_AL_M5219: + case PCI_DEVICE_ID_AMD_VIPER_7409: + case PCI_DEVICE_ID_CMD_643: + outb(inb(dma_base+2) & 0x60, dma_base+2); + if (inb(dma_base+2) & 0x80) { + printk("%s: simplex device: DMA forced\n", name); + } + break; + default: + /* + * If the device claims "simplex" DMA, + * this means only one of the two interfaces + * can be trusted with DMA at any point in time. + * So we should enable DMA only on one of the + * two interfaces. + */ + if ((inb(dma_base+2) & 0x80)) { /* simplex device? */ + if ((!hwif->drives[0].present && !hwif->drives[1].present) || + (hwif->mate && hwif->mate->dma_base)) { + printk("%s: simplex device: DMA disabled\n", name); + dma_base = 0; + } + } + } + } + return dma_base; +} diff --git a/xen-2.4.16/drivers/ide/ide-features.c b/xen-2.4.16/drivers/ide/ide-features.c new file mode 100644 index 0000000000..a60af2dad3 --- /dev/null +++ b/xen-2.4.16/drivers/ide/ide-features.c @@ -0,0 +1,384 @@ +/* + * linux/drivers/block/ide-features.c Version 0.04 June 9, 2000 + * + * Copyright (C) 1999-2000 Linus Torvalds & authors (see below) + * + * Copyright (C) 1999-2000 Andre Hedrick + * + * Extracts if ide.c to address the evolving transfer rate code for + * the SETFEATURES_XFER callouts. Various parts of any given function + * are credited to previous ATA-IDE maintainers. + * + * Auto-CRC downgrade for Ultra DMA(ing) + * + * May be copied or modified under the terms of the GNU General Public License + */ + +#include +#define __NO_VERSION__ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +/* + * A Verbose noise maker for debugging on the attempted transfer rates. + */ +char *ide_xfer_verbose (byte xfer_rate) +{ + switch(xfer_rate) { + case XFER_UDMA_7: return("UDMA 7"); + case XFER_UDMA_6: return("UDMA 6"); + case XFER_UDMA_5: return("UDMA 5"); + case XFER_UDMA_4: return("UDMA 4"); + case XFER_UDMA_3: return("UDMA 3"); + case XFER_UDMA_2: return("UDMA 2"); + case XFER_UDMA_1: return("UDMA 1"); + case XFER_UDMA_0: return("UDMA 0"); + case XFER_MW_DMA_2: return("MW DMA 2"); + case XFER_MW_DMA_1: return("MW DMA 1"); + case XFER_MW_DMA_0: return("MW DMA 0"); + case XFER_SW_DMA_2: return("SW DMA 2"); + case XFER_SW_DMA_1: return("SW DMA 1"); + case XFER_SW_DMA_0: return("SW DMA 0"); + case XFER_PIO_4: return("PIO 4"); + case XFER_PIO_3: return("PIO 3"); + case XFER_PIO_2: return("PIO 2"); + case XFER_PIO_1: return("PIO 1"); + case XFER_PIO_0: return("PIO 0"); + case XFER_PIO_SLOW: return("PIO SLOW"); + default: return("XFER ERROR"); + } +} + +/* + * + */ +char *ide_media_verbose (ide_drive_t *drive) +{ + switch (drive->media) { + case ide_scsi: return("scsi "); + case ide_disk: return("disk "); + case ide_optical: return("optical"); + case ide_cdrom: return("cdrom "); + case ide_tape: return("tape "); + case ide_floppy: return("floppy "); + default: return("???????"); + } +} + +/* + * A Verbose noise maker for debugging on the attempted dmaing calls. + */ +char *ide_dmafunc_verbose (ide_dma_action_t dmafunc) +{ + switch (dmafunc) { + case ide_dma_read: return("ide_dma_read"); + case ide_dma_write: return("ide_dma_write"); + case ide_dma_begin: return("ide_dma_begin"); + case ide_dma_end: return("ide_dma_end:"); + case ide_dma_check: return("ide_dma_check"); + case ide_dma_on: return("ide_dma_on"); + case ide_dma_off: return("ide_dma_off"); + case ide_dma_off_quietly: return("ide_dma_off_quietly"); + case ide_dma_test_irq: return("ide_dma_test_irq"); + case ide_dma_bad_drive: return("ide_dma_bad_drive"); + case ide_dma_good_drive: return("ide_dma_good_drive"); + case ide_dma_verbose: return("ide_dma_verbose"); + case ide_dma_retune: return("ide_dma_retune"); + case ide_dma_lostirq: return("ide_dma_lostirq"); + case ide_dma_timeout: return("ide_dma_timeout"); + default: return("unknown"); + } +} + +/* + * + */ +byte ide_auto_reduce_xfer (ide_drive_t *drive) +{ + if (!drive->crc_count) + return drive->current_speed; + drive->crc_count = 0; + + switch(drive->current_speed) { + case XFER_UDMA_7: return XFER_UDMA_6; + case XFER_UDMA_6: return XFER_UDMA_5; + case XFER_UDMA_5: return XFER_UDMA_4; + case XFER_UDMA_4: return XFER_UDMA_3; + case XFER_UDMA_3: return XFER_UDMA_2; + case XFER_UDMA_2: return XFER_UDMA_1; + case XFER_UDMA_1: return XFER_UDMA_0; + /* + * OOPS we do not goto non Ultra DMA modes + * without iCRC's available we force + * the system to PIO and make the user + * invoke the ATA-1 ATA-2 DMA modes. + */ + case XFER_UDMA_0: + default: return XFER_PIO_4; + } +} + +/* + * Update the + */ +int ide_driveid_update (ide_drive_t *drive) +{ + /* + * Re-read drive->id for possible DMA mode + * change (copied from ide-probe.c) + */ + struct hd_driveid *id; + unsigned long timeout, flags; + + SELECT_MASK(HWIF(drive), drive, 1); + if (IDE_CONTROL_REG) + OUT_BYTE(drive->ctl,IDE_CONTROL_REG); + ide_delay_50ms(); + OUT_BYTE(WIN_IDENTIFY, IDE_COMMAND_REG); + timeout = jiffies + WAIT_WORSTCASE; + do { + if (0 < (signed long)(jiffies - timeout)) { + SELECT_MASK(HWIF(drive), drive, 0); + return 0; /* drive timed-out */ + } + ide_delay_50ms(); /* give drive a breather */ + } while (IN_BYTE(IDE_ALTSTATUS_REG) & BUSY_STAT); + ide_delay_50ms(); /* wait for IRQ and DRQ_STAT */ + if (!OK_STAT(GET_STAT(),DRQ_STAT,BAD_R_STAT)) { + SELECT_MASK(HWIF(drive), drive, 0); + printk("%s: CHECK for good STATUS\n", drive->name); + return 0; + } + __save_flags(flags); /* local CPU only */ + __cli(); /* local CPU only; some systems need this */ + SELECT_MASK(HWIF(drive), drive, 0); + id = kmalloc(SECTOR_WORDS*4, GFP_ATOMIC); + if (!id) { + __restore_flags(flags); /* local CPU only */ + return 0; + } + ide_input_data(drive, id, SECTOR_WORDS); + (void) GET_STAT(); /* clear drive IRQ */ + ide__sti(); /* local CPU only */ + __restore_flags(flags); /* local CPU only */ + ide_fix_driveid(id); + if (id) { + drive->id->dma_ultra = id->dma_ultra; + drive->id->dma_mword = id->dma_mword; + drive->id->dma_1word = id->dma_1word; + /* anything more ? */ + kfree(id); + } + + return 1; +} + +/* + * Verify that we are doing an approved SETFEATURES_XFER with respect + * to the hardware being able to support request. Since some hardware + * can improperly report capabilties, we check to see if the host adapter + * in combination with the device (usually a disk) properly detect + * and acknowledge each end of the ribbon. + */ +int ide_ata66_check (ide_drive_t *drive, ide_task_t *args) +{ + if ((args->tfRegister[IDE_COMMAND_OFFSET] == WIN_SETFEATURES) && + (args->tfRegister[IDE_SECTOR_OFFSET] > XFER_UDMA_2) && + (args->tfRegister[IDE_FEATURE_OFFSET] == SETFEATURES_XFER)) { + if (!HWIF(drive)->udma_four) { + printk("%s: Speed warnings UDMA 3/4/5 is not functional.\n", HWIF(drive)->name); + return 1; + } +#ifndef CONFIG_IDEDMA_IVB + if ((drive->id->hw_config & 0x6000) == 0) { +#else /* !CONFIG_IDEDMA_IVB */ + if (((drive->id->hw_config & 0x2000) == 0) || + ((drive->id->hw_config & 0x4000) == 0)) { +#endif /* CONFIG_IDEDMA_IVB */ + printk("%s: Speed warnings UDMA 3/4/5 is not functional.\n", drive->name); + return 1; + } + } + return 0; +} + +/* + * Backside of HDIO_DRIVE_CMD call of SETFEATURES_XFER. + * 1 : Safe to update drive->id DMA registers. + * 0 : OOPs not allowed. + */ +int set_transfer (ide_drive_t *drive, ide_task_t *args) +{ + if ((args->tfRegister[IDE_COMMAND_OFFSET] == WIN_SETFEATURES) && + (args->tfRegister[IDE_SECTOR_OFFSET] >= XFER_SW_DMA_0) && + (args->tfRegister[IDE_FEATURE_OFFSET] == SETFEATURES_XFER) && + (drive->id->dma_ultra || + drive->id->dma_mword || + drive->id->dma_1word)) + return 1; + + return 0; +} + +#ifdef CONFIG_BLK_DEV_IDEDMA +/* + * All hosts that use the 80c ribbon mus use! + */ +byte eighty_ninty_three (ide_drive_t *drive) +{ +#ifdef CONFIG_BLK_DEV_IDEPCI + if (HWIF(drive)->pci_devid.vid==0x105a) + return(HWIF(drive)->udma_four); +#endif + /* PDC202XX: that's because some HDD will return wrong info */ + return ((byte) ((HWIF(drive)->udma_four) && +#ifndef CONFIG_IDEDMA_IVB + (drive->id->hw_config & 0x4000) && +#endif /* CONFIG_IDEDMA_IVB */ + (drive->id->hw_config & 0x6000)) ? 1 : 0); +} +#endif // CONFIG_BLK_DEV_IDEDMA + +/* + * Similar to ide_wait_stat(), except it never calls ide_error internally. + * This is a kludge to handle the new ide_config_drive_speed() function, + * and should not otherwise be used anywhere. Eventually, the tuneproc's + * should be updated to return ide_startstop_t, in which case we can get + * rid of this abomination again. :) -ml + * + * It is gone.......... + * + * const char *msg == consider adding for verbose errors. + */ +int ide_config_drive_speed (ide_drive_t *drive, byte speed) +{ + ide_hwif_t *hwif = HWIF(drive); + int i, error = 1; + byte stat; + +#if defined(CONFIG_BLK_DEV_IDEDMA) && !defined(CONFIG_DMA_NONPCI) + byte unit = (drive->select.b.unit & 0x01); + outb(inb(hwif->dma_base+2) & ~(1<<(5+unit)), hwif->dma_base+2); +#endif /* (CONFIG_BLK_DEV_IDEDMA) && !(CONFIG_DMA_NONPCI) */ + + /* + * Don't use ide_wait_cmd here - it will + * attempt to set_geometry and recalibrate, + * but for some reason these don't work at + * this point (lost interrupt). + */ + /* + * Select the drive, and issue the SETFEATURES command + */ + disable_irq(hwif->irq); /* disable_irq_nosync ?? */ + udelay(1); + SELECT_DRIVE(HWIF(drive), drive); + SELECT_MASK(HWIF(drive), drive, 0); + udelay(1); + if (IDE_CONTROL_REG) + OUT_BYTE(drive->ctl | 2, IDE_CONTROL_REG); + OUT_BYTE(speed, IDE_NSECTOR_REG); + OUT_BYTE(SETFEATURES_XFER, IDE_FEATURE_REG); + OUT_BYTE(WIN_SETFEATURES, IDE_COMMAND_REG); + if ((IDE_CONTROL_REG) && (drive->quirk_list == 2)) + OUT_BYTE(drive->ctl, IDE_CONTROL_REG); + udelay(1); + /* + * Wait for drive to become non-BUSY + */ + if ((stat = GET_STAT()) & BUSY_STAT) { + unsigned long flags, timeout; + __save_flags(flags); /* local CPU only */ + ide__sti(); /* local CPU only -- for jiffies */ + timeout = jiffies + WAIT_CMD; + while ((stat = GET_STAT()) & BUSY_STAT) { + if (0 < (signed long)(jiffies - timeout)) + break; + } + __restore_flags(flags); /* local CPU only */ + } + + /* + * Allow status to settle, then read it again. + * A few rare drives vastly violate the 400ns spec here, + * so we'll wait up to 10usec for a "good" status + * rather than expensively fail things immediately. + * This fix courtesy of Matthew Faupel & Niccolo Rigacci. + */ + for (i = 0; i < 10; i++) { + udelay(1); + if (OK_STAT((stat = GET_STAT()), DRIVE_READY, BUSY_STAT|DRQ_STAT|ERR_STAT)) { + error = 0; + break; + } + } + + SELECT_MASK(HWIF(drive), drive, 0); + + enable_irq(hwif->irq); + + if (error) { + (void) ide_dump_status(drive, "set_drive_speed_status", stat); + return error; + } + + drive->id->dma_ultra &= ~0xFF00; + drive->id->dma_mword &= ~0x0F00; + drive->id->dma_1word &= ~0x0F00; + +#if defined(CONFIG_BLK_DEV_IDEDMA) && !defined(CONFIG_DMA_NONPCI) + if (speed > XFER_PIO_4) { + outb(inb(hwif->dma_base+2)|(1<<(5+unit)), hwif->dma_base+2); + } else { + outb(inb(hwif->dma_base+2) & ~(1<<(5+unit)), hwif->dma_base+2); + } +#endif /* (CONFIG_BLK_DEV_IDEDMA) && !(CONFIG_DMA_NONPCI) */ + + switch(speed) { + case XFER_UDMA_7: drive->id->dma_ultra |= 0x8080; break; + case XFER_UDMA_6: drive->id->dma_ultra |= 0x4040; break; + case XFER_UDMA_5: drive->id->dma_ultra |= 0x2020; break; + case XFER_UDMA_4: drive->id->dma_ultra |= 0x1010; break; + case XFER_UDMA_3: drive->id->dma_ultra |= 0x0808; break; + case XFER_UDMA_2: drive->id->dma_ultra |= 0x0404; break; + case XFER_UDMA_1: drive->id->dma_ultra |= 0x0202; break; + case XFER_UDMA_0: drive->id->dma_ultra |= 0x0101; break; + case XFER_MW_DMA_2: drive->id->dma_mword |= 0x0404; break; + case XFER_MW_DMA_1: drive->id->dma_mword |= 0x0202; break; + case XFER_MW_DMA_0: drive->id->dma_mword |= 0x0101; break; + case XFER_SW_DMA_2: drive->id->dma_1word |= 0x0404; break; + case XFER_SW_DMA_1: drive->id->dma_1word |= 0x0202; break; + case XFER_SW_DMA_0: drive->id->dma_1word |= 0x0101; break; + default: break; + } + return error; +} + +EXPORT_SYMBOL(ide_auto_reduce_xfer); +EXPORT_SYMBOL(ide_driveid_update); +EXPORT_SYMBOL(ide_ata66_check); +EXPORT_SYMBOL(set_transfer); +#ifdef CONFIG_BLK_DEV_IDEDMA +EXPORT_SYMBOL(eighty_ninty_three); +#endif // CONFIG_BLK_DEV_IDEDMA +EXPORT_SYMBOL(ide_config_drive_speed); + diff --git a/xen-2.4.16/drivers/ide/ide-geometry.c b/xen-2.4.16/drivers/ide/ide-geometry.c new file mode 100644 index 0000000000..22428288b6 --- /dev/null +++ b/xen-2.4.16/drivers/ide/ide-geometry.c @@ -0,0 +1,222 @@ +/* + * linux/drivers/ide/ide-geometry.c + */ +#include +#include +#include +#include + +#ifdef CONFIG_BLK_DEV_IDE + +/* + * We query CMOS about hard disks : it could be that we have a SCSI/ESDI/etc + * controller that is BIOS compatible with ST-506, and thus showing up in our + * BIOS table, but not register compatible, and therefore not present in CMOS. + * + * Furthermore, we will assume that our ST-506 drives are the primary + * drives in the system -- the ones reflected as drive 1 or 2. The first + * drive is stored in the high nibble of CMOS byte 0x12, the second in the low + * nibble. This will be either a 4 bit drive type or 0xf indicating use byte + * 0x19 for an 8 bit type, drive 1, 0x1a for drive 2 in CMOS. A non-zero value + * means we have an AT controller hard disk for that drive. + * + * Of course, there is no guarantee that either drive is actually on the + * "primary" IDE interface, but we don't bother trying to sort that out here. + * If a drive is not actually on the primary interface, then these parameters + * will be ignored. This results in the user having to supply the logical + * drive geometry as a boot parameter for each drive not on the primary i/f. + */ +/* + * The only "perfect" way to handle this would be to modify the setup.[cS] code + * to do BIOS calls Int13h/Fn08h and Int13h/Fn48h to get all of the drive info + * for us during initialization. I have the necessary docs -- any takers? -ml + */ +/* + * I did this, but it doesnt work - there is no reasonable way to find the + * correspondence between the BIOS numbering of the disks and the Linux + * numbering. -aeb + * + * The code below is bad. One of the problems is that drives 1 and 2 + * may be SCSI disks (even when IDE disks are present), so that + * the geometry we read here from BIOS is attributed to the wrong disks. + * Consequently, also the former "drive->present = 1" below was a mistake. + * + * Eventually the entire routine below should be removed. + * + * 17-OCT-2000 rjohnson@analogic.com Added spin-locks for reading CMOS + * chip. + */ + +void probe_cmos_for_drives (ide_hwif_t *hwif) +{ +#ifdef __i386__ + extern struct drive_info_struct drive_info; + byte cmos_disks, *BIOS = (byte *) &drive_info; + int unit; + unsigned long flags; + +#ifdef CONFIG_BLK_DEV_PDC4030 + if (hwif->chipset == ide_pdc4030 && hwif->channel != 0) + return; +#endif /* CONFIG_BLK_DEV_PDC4030 */ + spin_lock_irqsave(&rtc_lock, flags); + cmos_disks = CMOS_READ(0x12); + spin_unlock_irqrestore(&rtc_lock, flags); + /* Extract drive geometry from CMOS+BIOS if not already setup */ + for (unit = 0; unit < MAX_DRIVES; ++unit) { + ide_drive_t *drive = &hwif->drives[unit]; + if ((cmos_disks & (0xf0 >> (unit*4))) + && !drive->present && !drive->nobios) { + unsigned short cyl = *(unsigned short *)BIOS; + unsigned char head = *(BIOS+2); + unsigned char sect = *(BIOS+14); + if (cyl > 0 && head > 0 && sect > 0 && sect < 64) { + drive->cyl = drive->bios_cyl = cyl; + drive->head = drive->bios_head = head; + drive->sect = drive->bios_sect = sect; + drive->ctl = *(BIOS+8); + } else { + printk("hd%c: C/H/S=%d/%d/%d from BIOS ignored\n", + unit+'a', cyl, head, sect); + } + } + BIOS += 16; + } +#endif +} +#endif /* CONFIG_BLK_DEV_IDE */ + + +#if defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_IDE_MODULE) + +extern ide_drive_t * get_info_ptr(kdev_t); +extern unsigned long current_capacity (ide_drive_t *); + +/* + * If heads is nonzero: find a translation with this many heads and S=63. + * Otherwise: find out how OnTrack Disk Manager would translate the disk. + */ +static void +ontrack(ide_drive_t *drive, int heads, unsigned int *c, int *h, int *s) { + static const byte dm_head_vals[] = {4, 8, 16, 32, 64, 128, 255, 0}; + const byte *headp = dm_head_vals; + unsigned long total; + + /* + * The specs say: take geometry as obtained from Identify, + * compute total capacity C*H*S from that, and truncate to + * 1024*255*63. Now take S=63, H the first in the sequence + * 4, 8, 16, 32, 64, 128, 255 such that 63*H*1024 >= total. + * [Please tell aeb@cwi.nl in case this computes a + * geometry different from what OnTrack uses.] + */ + total = DRIVER(drive)->capacity(drive); + + *s = 63; + + if (heads) { + *h = heads; + *c = total / (63 * heads); + return; + } + + while (63 * headp[0] * 1024 < total && headp[1] != 0) + headp++; + *h = headp[0]; + *c = total / (63 * headp[0]); +} + +/* + * This routine is called from the partition-table code in pt/msdos.c. + * It has two tasks: + * (i) to handle Ontrack DiskManager by offsetting everything by 63 sectors, + * or to handle EZdrive by remapping sector 0 to sector 1. + * (ii) to invent a translated geometry. + * Part (i) is suppressed if the user specifies the "noremap" option + * on the command line. + * Part (ii) is suppressed if the user specifies an explicit geometry. + * + * The ptheads parameter is either 0 or tells about the number of + * heads shown by the end of the first nonempty partition. + * If this is either 16, 32, 64, 128, 240 or 255 we'll believe it. + * + * The xparm parameter has the following meaning: + * 0 = convert to CHS with fewer than 1024 cyls + * using the same method as Ontrack DiskManager. + * 1 = same as "0", plus offset everything by 63 sectors. + * -1 = similar to "0", plus redirect sector 0 to sector 1. + * 2 = convert to a CHS geometry with "ptheads" heads. + * + * Returns 0 if the translation was not possible, if the device was not + * an IDE disk drive, or if a geometry was "forced" on the commandline. + * Returns 1 if the geometry translation was successful. + */ +int ide_xlate_1024 (kdev_t i_rdev, int xparm, int ptheads, const char *msg) +{ + ide_drive_t *drive; + const char *msg1 = ""; + int heads = 0; + int c, h, s; + int transl = 1; /* try translation */ + int ret = 0; + + drive = get_info_ptr(i_rdev); + if (!drive) + return 0; + + /* remap? */ + if (drive->remap_0_to_1 != 2) { + if (xparm == 1) { /* DM */ + drive->sect0 = 63; + msg1 = " [remap +63]"; + ret = 1; + } else if (xparm == -1) { /* EZ-Drive */ + if (drive->remap_0_to_1 == 0) { + drive->remap_0_to_1 = 1; + msg1 = " [remap 0->1]"; + ret = 1; + } + } + } + + /* There used to be code here that assigned drive->id->CHS + to drive->CHS and that to drive->bios_CHS. However, + some disks have id->C/H/S = 4092/16/63 but are larger than 2.1 GB. + In such cases that code was wrong. Moreover, + there seems to be no reason to do any of these things. */ + + /* translate? */ + if (drive->forced_geom) + transl = 0; + + /* does ptheads look reasonable? */ + if (ptheads == 32 || ptheads == 64 || ptheads == 128 || + ptheads == 240 || ptheads == 255) + heads = ptheads; + + if (xparm == 2) { + if (!heads || + (drive->bios_head >= heads && drive->bios_sect == 63)) + transl = 0; + } + if (xparm == -1) { + if (drive->bios_head > 16) + transl = 0; /* we already have a translation */ + } + + if (transl) { + ontrack(drive, heads, &c, &h, &s); + drive->bios_cyl = c; + drive->bios_head = h; + drive->bios_sect = s; + ret = 1; + } + + drive->part[0].nr_sects = current_capacity(drive); + + if (ret) + printk("%s%s [%d/%d/%d]", msg, msg1, + drive->bios_cyl, drive->bios_head, drive->bios_sect); + return ret; +} +#endif /* defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_IDE_MODULE) */ diff --git a/xen-2.4.16/drivers/ide/ide-pci.c b/xen-2.4.16/drivers/ide/ide-pci.c new file mode 100644 index 0000000000..c68ff36847 --- /dev/null +++ b/xen-2.4.16/drivers/ide/ide-pci.c @@ -0,0 +1,1003 @@ +/* + * linux/drivers/ide/ide-pci.c Version 1.05 June 9, 2000 + * + * Copyright (c) 1998-2000 Andre Hedrick + * + * Copyright (c) 1995-1998 Mark Lord + * May be copied or modified under the terms of the GNU General Public License + */ + +/* + * This module provides support for automatic detection and + * configuration of all PCI IDE interfaces present in a system. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#define DEVID_PIIXa ((ide_pci_devid_t){PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82371FB_0}) +#define DEVID_PIIXb ((ide_pci_devid_t){PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82371FB_1}) +#define DEVID_MPIIX ((ide_pci_devid_t){PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82371MX}) +#define DEVID_PIIX3 ((ide_pci_devid_t){PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82371SB_1}) +#define DEVID_PIIX4 ((ide_pci_devid_t){PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82371AB}) +#define DEVID_ICH0 ((ide_pci_devid_t){PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801AB_1}) +#define DEVID_PIIX4E2 ((ide_pci_devid_t){PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82443MX_1}) +#define DEVID_ICH ((ide_pci_devid_t){PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801AA_1}) +#define DEVID_PIIX4U2 ((ide_pci_devid_t){PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82372FB_1}) +#define DEVID_PIIX4NX ((ide_pci_devid_t){PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82451NX}) +#define DEVID_ICH2 ((ide_pci_devid_t){PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801BA_9}) +#define DEVID_ICH2M ((ide_pci_devid_t){PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801BA_8}) +#define DEVID_ICH3M ((ide_pci_devid_t){PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801CA_10}) +#define DEVID_ICH3 ((ide_pci_devid_t){PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801CA_11}) +#define DEVID_ICH4 ((ide_pci_devid_t){PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801DB_11}) +#define DEVID_CICH ((ide_pci_devid_t){PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801E_11}) +#define DEVID_VIA_IDE ((ide_pci_devid_t){PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C561}) +#define DEVID_MR_IDE ((ide_pci_devid_t){PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C576_1}) +#define DEVID_VP_IDE ((ide_pci_devid_t){PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C586_1}) +#define DEVID_PDC20246 ((ide_pci_devid_t){PCI_VENDOR_ID_PROMISE, PCI_DEVICE_ID_PROMISE_20246}) +#define DEVID_PDC20262 ((ide_pci_devid_t){PCI_VENDOR_ID_PROMISE, PCI_DEVICE_ID_PROMISE_20262}) +#define DEVID_PDC20265 ((ide_pci_devid_t){PCI_VENDOR_ID_PROMISE, PCI_DEVICE_ID_PROMISE_20265}) +#define DEVID_PDC20267 ((ide_pci_devid_t){PCI_VENDOR_ID_PROMISE, PCI_DEVICE_ID_PROMISE_20267}) +#define DEVID_PDC20268 ((ide_pci_devid_t){PCI_VENDOR_ID_PROMISE, PCI_DEVICE_ID_PROMISE_20268}) +#define DEVID_PDC20270 ((ide_pci_devid_t){PCI_VENDOR_ID_PROMISE, PCI_DEVICE_ID_PROMISE_20270}) +#define DEVID_PDC20269 ((ide_pci_devid_t){PCI_VENDOR_ID_PROMISE, PCI_DEVICE_ID_PROMISE_20269}) +#define DEVID_PDC20275 ((ide_pci_devid_t){PCI_VENDOR_ID_PROMISE, PCI_DEVICE_ID_PROMISE_20275}) +#define DEVID_PDC20276 ((ide_pci_devid_t){PCI_VENDOR_ID_PROMISE, PCI_DEVICE_ID_PROMISE_20276}) +#define DEVID_RZ1000 ((ide_pci_devid_t){PCI_VENDOR_ID_PCTECH, PCI_DEVICE_ID_PCTECH_RZ1000}) +#define DEVID_RZ1001 ((ide_pci_devid_t){PCI_VENDOR_ID_PCTECH, PCI_DEVICE_ID_PCTECH_RZ1001}) +#define DEVID_SAMURAI ((ide_pci_devid_t){PCI_VENDOR_ID_PCTECH, PCI_DEVICE_ID_PCTECH_SAMURAI_IDE}) +#define DEVID_CMD640 ((ide_pci_devid_t){PCI_VENDOR_ID_CMD, PCI_DEVICE_ID_CMD_640}) +#define DEVID_CMD643 ((ide_pci_devid_t){PCI_VENDOR_ID_CMD, PCI_DEVICE_ID_CMD_643}) +#define DEVID_CMD646 ((ide_pci_devid_t){PCI_VENDOR_ID_CMD, PCI_DEVICE_ID_CMD_646}) +#define DEVID_CMD648 ((ide_pci_devid_t){PCI_VENDOR_ID_CMD, PCI_DEVICE_ID_CMD_648}) +#define DEVID_CMD649 ((ide_pci_devid_t){PCI_VENDOR_ID_CMD, PCI_DEVICE_ID_CMD_649}) +#define DEVID_CMD680 ((ide_pci_devid_t){PCI_VENDOR_ID_CMD, PCI_DEVICE_ID_CMD_680}) +#define DEVID_SIS5513 ((ide_pci_devid_t){PCI_VENDOR_ID_SI, PCI_DEVICE_ID_SI_5513}) +#define DEVID_OPTI621 ((ide_pci_devid_t){PCI_VENDOR_ID_OPTI, PCI_DEVICE_ID_OPTI_82C621}) +#define DEVID_OPTI621V ((ide_pci_devid_t){PCI_VENDOR_ID_OPTI, PCI_DEVICE_ID_OPTI_82C558}) +#define DEVID_OPTI621X ((ide_pci_devid_t){PCI_VENDOR_ID_OPTI, PCI_DEVICE_ID_OPTI_82C825}) +#define DEVID_TRM290 ((ide_pci_devid_t){PCI_VENDOR_ID_TEKRAM, PCI_DEVICE_ID_TEKRAM_DC290}) +#define DEVID_NS87410 ((ide_pci_devid_t){PCI_VENDOR_ID_NS, PCI_DEVICE_ID_NS_87410}) +#define DEVID_NS87415 ((ide_pci_devid_t){PCI_VENDOR_ID_NS, PCI_DEVICE_ID_NS_87415}) +#define DEVID_HT6565 ((ide_pci_devid_t){PCI_VENDOR_ID_HOLTEK, PCI_DEVICE_ID_HOLTEK_6565}) +#define DEVID_AEC6210 ((ide_pci_devid_t){PCI_VENDOR_ID_ARTOP, PCI_DEVICE_ID_ARTOP_ATP850UF}) +#define DEVID_AEC6260 ((ide_pci_devid_t){PCI_VENDOR_ID_ARTOP, PCI_DEVICE_ID_ARTOP_ATP860}) +#define DEVID_AEC6260R ((ide_pci_devid_t){PCI_VENDOR_ID_ARTOP, PCI_DEVICE_ID_ARTOP_ATP860R}) +#define DEVID_W82C105 ((ide_pci_devid_t){PCI_VENDOR_ID_WINBOND, PCI_DEVICE_ID_WINBOND_82C105}) +#define DEVID_UM8673F ((ide_pci_devid_t){PCI_VENDOR_ID_UMC, PCI_DEVICE_ID_UMC_UM8673F}) +#define DEVID_UM8886A ((ide_pci_devid_t){PCI_VENDOR_ID_UMC, PCI_DEVICE_ID_UMC_UM8886A}) +#define DEVID_UM8886BF ((ide_pci_devid_t){PCI_VENDOR_ID_UMC, PCI_DEVICE_ID_UMC_UM8886BF}) +#define DEVID_HPT34X ((ide_pci_devid_t){PCI_VENDOR_ID_TTI, PCI_DEVICE_ID_TTI_HPT343}) +#define DEVID_HPT366 ((ide_pci_devid_t){PCI_VENDOR_ID_TTI, PCI_DEVICE_ID_TTI_HPT366}) +#define DEVID_ALI15X3 ((ide_pci_devid_t){PCI_VENDOR_ID_AL, PCI_DEVICE_ID_AL_M5229}) +#define DEVID_CY82C693 ((ide_pci_devid_t){PCI_VENDOR_ID_CONTAQ, PCI_DEVICE_ID_CONTAQ_82C693}) +#define DEVID_HINT ((ide_pci_devid_t){0x3388, 0x8013}) +#define DEVID_CS5530 ((ide_pci_devid_t){PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5530_IDE}) +#define DEVID_AMD7401 ((ide_pci_devid_t){PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_COBRA_7401}) +#define DEVID_AMD7409 ((ide_pci_devid_t){PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_VIPER_7409}) +#define DEVID_AMD7411 ((ide_pci_devid_t){PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_VIPER_7411}) +#define DEVID_AMD7441 ((ide_pci_devid_t){PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_VIPER_7441}) +#define DEVID_PDCADMA ((ide_pci_devid_t){PCI_VENDOR_ID_PDC, PCI_DEVICE_ID_PDC_1841}) +#define DEVID_SLC90E66 ((ide_pci_devid_t){PCI_VENDOR_ID_EFAR, PCI_DEVICE_ID_EFAR_SLC90E66_1}) +#define DEVID_OSB4 ((ide_pci_devid_t){PCI_VENDOR_ID_SERVERWORKS, PCI_DEVICE_ID_SERVERWORKS_OSB4IDE}) +#define DEVID_CSB5 ((ide_pci_devid_t){PCI_VENDOR_ID_SERVERWORKS, PCI_DEVICE_ID_SERVERWORKS_CSB5IDE}) +#define DEVID_ITE8172G ((ide_pci_devid_t){PCI_VENDOR_ID_ITE, PCI_DEVICE_ID_ITE_IT8172G}) + +#define IDE_IGNORE ((void *)-1) +#define IDE_NO_DRIVER ((void *)-2) + +#ifdef CONFIG_BLK_DEV_AEC62XX +extern unsigned int pci_init_aec62xx(struct pci_dev *, const char *); +extern unsigned int ata66_aec62xx(ide_hwif_t *); +extern void ide_init_aec62xx(ide_hwif_t *); +extern void ide_dmacapable_aec62xx(ide_hwif_t *, unsigned long); +#define PCI_AEC62XX &pci_init_aec62xx +#define ATA66_AEC62XX &ata66_aec62xx +#define INIT_AEC62XX &ide_init_aec62xx +#define DMA_AEC62XX &ide_dmacapable_aec62xx +#else +#define PCI_AEC62XX NULL +#define ATA66_AEC62XX NULL +#define INIT_AEC62XX IDE_NO_DRIVER +#define DMA_AEC62XX NULL +#endif + +#ifdef CONFIG_BLK_DEV_ALI15X3 +extern unsigned int pci_init_ali15x3(struct pci_dev *, const char *); +extern unsigned int ata66_ali15x3(ide_hwif_t *); +extern void ide_init_ali15x3(ide_hwif_t *); +extern void ide_dmacapable_ali15x3(ide_hwif_t *, unsigned long); +#define PCI_ALI15X3 &pci_init_ali15x3 +#define ATA66_ALI15X3 &ata66_ali15x3 +#define INIT_ALI15X3 &ide_init_ali15x3 +#define DMA_ALI15X3 &ide_dmacapable_ali15x3 +#else +#define PCI_ALI15X3 NULL +#define ATA66_ALI15X3 NULL +#define INIT_ALI15X3 IDE_NO_DRIVER +#define DMA_ALI15X3 NULL +#endif + +#ifdef CONFIG_BLK_DEV_AMD74XX +extern unsigned int pci_init_amd74xx(struct pci_dev *, const char *); +extern unsigned int ata66_amd74xx(ide_hwif_t *); +extern void ide_init_amd74xx(ide_hwif_t *); +extern void ide_dmacapable_amd74xx(ide_hwif_t *, unsigned long); +#define PCI_AMD74XX &pci_init_amd74xx +#define ATA66_AMD74XX &ata66_amd74xx +#define INIT_AMD74XX &ide_init_amd74xx +#define DMA_AMD74XX &ide_dmacapable_amd74xx +#else +#define PCI_AMD74XX NULL +#define ATA66_AMD74XX NULL +#define INIT_AMD74XX IDE_NO_DRIVER +#define DMA_AMD74XX NULL +#endif + +#ifdef CONFIG_BLK_DEV_CMD64X +extern unsigned int pci_init_cmd64x(struct pci_dev *, const char *); +extern unsigned int ata66_cmd64x(ide_hwif_t *); +extern void ide_init_cmd64x(ide_hwif_t *); +extern void ide_dmacapable_cmd64x(ide_hwif_t *, unsigned long); +#define PCI_CMD64X &pci_init_cmd64x +#define ATA66_CMD64X &ata66_cmd64x +#define INIT_CMD64X &ide_init_cmd64x +#else +#define PCI_CMD64X NULL +#define ATA66_CMD64X NULL +#ifdef __sparc_v9__ +#define INIT_CMD64X IDE_IGNORE +#else +#define INIT_CMD64X IDE_NO_DRIVER +#endif +#endif + +#ifdef CONFIG_BLK_DEV_CY82C693 +extern unsigned int pci_init_cy82c693(struct pci_dev *, const char *); +extern void ide_init_cy82c693(ide_hwif_t *); +#define PCI_CY82C693 &pci_init_cy82c693 +#define INIT_CY82C693 &ide_init_cy82c693 +#else +#define PCI_CY82C693 NULL +#define INIT_CY82C693 IDE_NO_DRIVER +#endif + +#ifdef CONFIG_BLK_DEV_CS5530 +extern unsigned int pci_init_cs5530(struct pci_dev *, const char *); +extern void ide_init_cs5530(ide_hwif_t *); +#define PCI_CS5530 &pci_init_cs5530 +#define INIT_CS5530 &ide_init_cs5530 +#else +#define PCI_CS5530 NULL +#define INIT_CS5530 IDE_NO_DRIVER +#endif + +#ifdef CONFIG_BLK_DEV_HPT34X +extern unsigned int pci_init_hpt34x(struct pci_dev *, const char *); +extern void ide_init_hpt34x(ide_hwif_t *); +#define PCI_HPT34X &pci_init_hpt34x +#define INIT_HPT34X &ide_init_hpt34x +#else +#define PCI_HPT34X NULL +#define INIT_HPT34X IDE_IGNORE +#endif + +#ifdef CONFIG_BLK_DEV_HPT366 +extern byte hpt363_shared_irq; +extern byte hpt363_shared_pin; +extern unsigned int pci_init_hpt366(struct pci_dev *, const char *); +extern unsigned int ata66_hpt366(ide_hwif_t *); +extern void ide_init_hpt366(ide_hwif_t *); +extern void ide_dmacapable_hpt366(ide_hwif_t *, unsigned long); +#define PCI_HPT366 &pci_init_hpt366 +#define ATA66_HPT366 &ata66_hpt366 +#define INIT_HPT366 &ide_init_hpt366 +#define DMA_HPT366 &ide_dmacapable_hpt366 +#else +static byte hpt363_shared_irq; +static byte hpt363_shared_pin; +#define PCI_HPT366 NULL +#define ATA66_HPT366 NULL +#define INIT_HPT366 IDE_NO_DRIVER +#define DMA_HPT366 NULL +#endif + +#ifdef CONFIG_BLK_DEV_NS87415 +extern void ide_init_ns87415(ide_hwif_t *); +#define INIT_NS87415 &ide_init_ns87415 +#else +#define INIT_NS87415 IDE_IGNORE +#endif + +#ifdef CONFIG_BLK_DEV_OPTI621 +extern void ide_init_opti621(ide_hwif_t *); +#define INIT_OPTI621 &ide_init_opti621 +#else +#define INIT_OPTI621 IDE_NO_DRIVER +#endif + +#ifdef CONFIG_BLK_DEV_PDC_ADMA +extern unsigned int pci_init_pdcadma(struct pci_dev *, const char *); +extern unsigned int ata66_pdcadma(ide_hwif_t *); +extern void ide_init_pdcadma(ide_hwif_t *); +extern void ide_dmacapable_pdcadma(ide_hwif_t *, unsigned long); +#define PCI_PDCADMA &pci_init_pdcadma +#define ATA66_PDCADMA &ata66_pdcadma +#define INIT_PDCADMA &ide_init_pdcadma +#define DMA_PDCADMA &ide_dmacapable_pdcadma +#else +#define PCI_PDCADMA IDE_IGNORE +#define ATA66_PDCADMA IDE_IGNORE +#define INIT_PDCADMA IDE_IGNORE +#define DMA_PDCADMA IDE_IGNORE +#endif + +#ifdef CONFIG_BLK_DEV_PDC202XX +extern unsigned int pci_init_pdc202xx(struct pci_dev *, const char *); +extern unsigned int ata66_pdc202xx(ide_hwif_t *); +extern void ide_init_pdc202xx(ide_hwif_t *); +#define PCI_PDC202XX &pci_init_pdc202xx +#define ATA66_PDC202XX &ata66_pdc202xx +#define INIT_PDC202XX &ide_init_pdc202xx +#else +#define PCI_PDC202XX NULL +#define ATA66_PDC202XX NULL +#define INIT_PDC202XX NULL +#endif + +#ifdef CONFIG_BLK_DEV_PIIX +extern unsigned int pci_init_piix(struct pci_dev *, const char *); +extern unsigned int ata66_piix(ide_hwif_t *); +extern void ide_init_piix(ide_hwif_t *); +#define PCI_PIIX &pci_init_piix +#define ATA66_PIIX &ata66_piix +#define INIT_PIIX &ide_init_piix +#else +#define PCI_PIIX NULL +#define ATA66_PIIX NULL +#define INIT_PIIX IDE_NO_DRIVER +#endif + +#ifdef CONFIG_BLK_DEV_IT8172 +extern unsigned int pci_init_it8172(struct pci_dev *, const char *); +extern unsigned int ata66_it8172(ide_hwif_t *); +extern void ide_init_it8172(ide_hwif_t *); +#define PCI_IT8172 &pci_init_it8172 +#define INIT_IT8172 &ide_init_it8172 +#else +#define PCI_IT8172 NULL +#define ATA66_IT8172 NULL +#define INIT_IT8172 NULL +#endif + +#ifdef CONFIG_BLK_DEV_RZ1000 +extern void ide_init_rz1000(ide_hwif_t *); +#define INIT_RZ1000 &ide_init_rz1000 +#else +#define INIT_RZ1000 IDE_IGNORE +#endif + +#define INIT_SAMURAI NULL + +#ifdef CONFIG_BLK_DEV_SVWKS +extern unsigned int pci_init_svwks(struct pci_dev *, const char *); +extern unsigned int ata66_svwks(ide_hwif_t *); +extern void ide_init_svwks(ide_hwif_t *); +#define PCI_SVWKS &pci_init_svwks +#define ATA66_SVWKS &ata66_svwks +#define INIT_SVWKS &ide_init_svwks +#else +#define PCI_SVWKS NULL +#define ATA66_SVWKS NULL +#define INIT_SVWKS IDE_NO_DRIVER +#endif + +#ifdef CONFIG_BLK_DEV_SIS5513 +extern unsigned int pci_init_sis5513(struct pci_dev *, const char *); +extern unsigned int ata66_sis5513(ide_hwif_t *); +extern void ide_init_sis5513(ide_hwif_t *); +#define PCI_SIS5513 &pci_init_sis5513 +#define ATA66_SIS5513 &ata66_sis5513 +#define INIT_SIS5513 &ide_init_sis5513 +#else +#define PCI_SIS5513 NULL +#define ATA66_SIS5513 NULL +#define INIT_SIS5513 IDE_NO_DRIVER +#endif + +#ifdef CONFIG_BLK_DEV_SLC90E66 +extern unsigned int pci_init_slc90e66(struct pci_dev *, const char *); +extern unsigned int ata66_slc90e66(ide_hwif_t *); +extern void ide_init_slc90e66(ide_hwif_t *); +#define PCI_SLC90E66 &pci_init_slc90e66 +#define ATA66_SLC90E66 &ata66_slc90e66 +#define INIT_SLC90E66 &ide_init_slc90e66 +#else +#define PCI_SLC90E66 NULL +#define ATA66_SLC90E66 NULL +#define INIT_SLC90E66 IDE_NO_DRIVER +#endif + +#ifdef CONFIG_BLK_DEV_SL82C105 +extern unsigned int pci_init_sl82c105(struct pci_dev *, const char *); +extern void dma_init_sl82c105(ide_hwif_t *, unsigned long); +extern void ide_init_sl82c105(ide_hwif_t *); +#define PCI_W82C105 &pci_init_sl82c105 +#define DMA_W82C105 &dma_init_sl82c105 +#define INIT_W82C105 &ide_init_sl82c105 +#else +#define PCI_W82C105 NULL +#define DMA_W82C105 NULL +#define INIT_W82C105 IDE_IGNORE +#endif + +#ifdef CONFIG_BLK_DEV_TRM290 +extern void ide_init_trm290(ide_hwif_t *); +#define INIT_TRM290 &ide_init_trm290 +#else +#define INIT_TRM290 IDE_IGNORE +#endif + +#ifdef CONFIG_BLK_DEV_VIA82CXXX +extern unsigned int pci_init_via82cxxx(struct pci_dev *, const char *); +extern unsigned int ata66_via82cxxx(ide_hwif_t *); +extern void ide_init_via82cxxx(ide_hwif_t *); +extern void ide_dmacapable_via82cxxx(ide_hwif_t *, unsigned long); +#define PCI_VIA82CXXX &pci_init_via82cxxx +#define ATA66_VIA82CXXX &ata66_via82cxxx +#define INIT_VIA82CXXX &ide_init_via82cxxx +#define DMA_VIA82CXXX &ide_dmacapable_via82cxxx +#else +#define PCI_VIA82CXXX NULL +#define ATA66_VIA82CXXX NULL +#define INIT_VIA82CXXX IDE_NO_DRIVER +#define DMA_VIA82CXXX NULL +#endif + +typedef struct ide_pci_enablebit_s { + byte reg; /* byte pci reg holding the enable-bit */ + byte mask; /* mask to isolate the enable-bit */ + byte val; /* value of masked reg when "enabled" */ +} ide_pci_enablebit_t; + +typedef struct ide_pci_device_s { + ide_pci_devid_t devid; + char *name; + unsigned int (*init_chipset)(struct pci_dev *dev, const char *name); + unsigned int (*ata66_check)(ide_hwif_t *hwif); + void (*init_hwif)(ide_hwif_t *hwif); + void (*dma_init)(ide_hwif_t *hwif, unsigned long dmabase); + ide_pci_enablebit_t enablebits[2]; + byte bootable; + unsigned int extra; +} ide_pci_device_t; + +static ide_pci_device_t ide_pci_chipsets[] __initdata = { + {DEVID_PIIXa, "PIIX", NULL, NULL, INIT_PIIX, NULL, {{0x41,0x80,0x80}, {0x43,0x80,0x80}}, ON_BOARD, 0 }, + {DEVID_PIIXb, "PIIX", NULL, NULL, INIT_PIIX, NULL, {{0x41,0x80,0x80}, {0x43,0x80,0x80}}, ON_BOARD, 0 }, + {DEVID_MPIIX, "MPIIX", NULL, NULL, INIT_PIIX, NULL, {{0x6D,0x80,0x80}, {0x6F,0x80,0x80}}, ON_BOARD, 0 }, + {DEVID_PIIX3, "PIIX3", PCI_PIIX, NULL, INIT_PIIX, NULL, {{0x41,0x80,0x80}, {0x43,0x80,0x80}}, ON_BOARD, 0 }, + {DEVID_PIIX4, "PIIX4", PCI_PIIX, NULL, INIT_PIIX, NULL, {{0x41,0x80,0x80}, {0x43,0x80,0x80}}, ON_BOARD, 0 }, + {DEVID_ICH0, "ICH0", PCI_PIIX, NULL, INIT_PIIX, NULL, {{0x41,0x80,0x80}, {0x43,0x80,0x80}}, ON_BOARD, 0 }, + {DEVID_PIIX4E2, "PIIX4", PCI_PIIX, NULL, INIT_PIIX, NULL, {{0x41,0x80,0x80}, {0x43,0x80,0x80}}, ON_BOARD, 0 }, + {DEVID_ICH, "ICH", PCI_PIIX, ATA66_PIIX, INIT_PIIX, NULL, {{0x41,0x80,0x80}, {0x43,0x80,0x80}}, ON_BOARD, 0 }, + {DEVID_PIIX4U2, "PIIX4", PCI_PIIX, ATA66_PIIX, INIT_PIIX, NULL, {{0x41,0x80,0x80}, {0x43,0x80,0x80}}, ON_BOARD, 0 }, + {DEVID_PIIX4NX, "PIIX4", PCI_PIIX, NULL, INIT_PIIX, NULL, {{0x41,0x80,0x80}, {0x43,0x80,0x80}}, ON_BOARD, 0 }, + {DEVID_ICH2, "ICH2", PCI_PIIX, ATA66_PIIX, INIT_PIIX, NULL, {{0x41,0x80,0x80}, {0x43,0x80,0x80}}, ON_BOARD, 0 }, + {DEVID_ICH2M, "ICH2M", PCI_PIIX, ATA66_PIIX, INIT_PIIX, NULL, {{0x41,0x80,0x80}, {0x43,0x80,0x80}}, ON_BOARD, 0 }, + {DEVID_ICH3M, "ICH3M", PCI_PIIX, ATA66_PIIX, INIT_PIIX, NULL, {{0x41,0x80,0x80}, {0x43,0x80,0x80}}, ON_BOARD, 0 }, + {DEVID_ICH3, "ICH3", PCI_PIIX, ATA66_PIIX, INIT_PIIX, NULL, {{0x41,0x80,0x80}, {0x43,0x80,0x80}}, ON_BOARD, 0 }, + {DEVID_ICH4, "ICH4", PCI_PIIX, ATA66_PIIX, INIT_PIIX, NULL, {{0x41,0x80,0x80}, {0x43,0x80,0x80}}, ON_BOARD, 0 }, + {DEVID_CICH, "C-ICH", PCI_PIIX, ATA66_PIIX, INIT_PIIX, NULL, {{0x41,0x80,0x80}, {0x43,0x80,0x80}}, ON_BOARD, 0 }, + {DEVID_VIA_IDE, "VIA_IDE", NULL, NULL, NULL, NULL, {{0x00,0x00,0x00}, {0x00,0x00,0x00}}, ON_BOARD, 0 }, + {DEVID_MR_IDE, "VP_IDE", PCI_VIA82CXXX, ATA66_VIA82CXXX,INIT_VIA82CXXX, DMA_VIA82CXXX, {{0x40,0x02,0x02}, {0x40,0x01,0x01}}, ON_BOARD, 0 }, + {DEVID_VP_IDE, "VP_IDE", PCI_VIA82CXXX, ATA66_VIA82CXXX,INIT_VIA82CXXX, DMA_VIA82CXXX, {{0x40,0x02,0x02}, {0x40,0x01,0x01}}, ON_BOARD, 0 }, +#ifndef CONFIG_PDC202XX_FORCE + {DEVID_PDC20246,"PDC20246", PCI_PDC202XX, NULL, INIT_PDC202XX, NULL, {{0x00,0x00,0x00}, {0x00,0x00,0x00}}, OFF_BOARD, 16 }, + {DEVID_PDC20262,"PDC20262", PCI_PDC202XX, ATA66_PDC202XX, INIT_PDC202XX, NULL, {{0x00,0x00,0x00}, {0x00,0x00,0x00}}, OFF_BOARD, 48 }, + {DEVID_PDC20265,"PDC20265", PCI_PDC202XX, ATA66_PDC202XX, INIT_PDC202XX, NULL, {{0x00,0x00,0x00}, {0x00,0x00,0x00}}, ON_BOARD, 48 }, + {DEVID_PDC20267,"PDC20267", PCI_PDC202XX, ATA66_PDC202XX, INIT_PDC202XX, NULL, {{0x00,0x00,0x00}, {0x00,0x00,0x00}}, OFF_BOARD, 48 }, +#else /* !CONFIG_PDC202XX_FORCE */ + {DEVID_PDC20246,"PDC20246", PCI_PDC202XX, NULL, INIT_PDC202XX, NULL, {{0x50,0x02,0x02}, {0x50,0x04,0x04}}, OFF_BOARD, 16 }, + {DEVID_PDC20262,"PDC20262", PCI_PDC202XX, ATA66_PDC202XX, INIT_PDC202XX, NULL, {{0x50,0x02,0x02}, {0x50,0x04,0x04}}, OFF_BOARD, 48 }, + {DEVID_PDC20265,"PDC20265", PCI_PDC202XX, ATA66_PDC202XX, INIT_PDC202XX, NULL, {{0x50,0x02,0x02}, {0x50,0x04,0x04}}, OFF_BOARD, 48 }, + {DEVID_PDC20267,"PDC20267", PCI_PDC202XX, ATA66_PDC202XX, INIT_PDC202XX, NULL, {{0x50,0x02,0x02}, {0x50,0x04,0x04}}, OFF_BOARD, 48 }, +#endif + {DEVID_PDC20268,"PDC20268", PCI_PDC202XX, ATA66_PDC202XX, INIT_PDC202XX, NULL, {{0x00,0x00,0x00}, {0x00,0x00,0x00}}, OFF_BOARD, 0 }, + /* Promise used a different PCI ident for the raid card apparently to try and + prevent Linux detecting it and using our own raid code. We want to detect + it for the ataraid drivers, so we have to list both here.. */ + {DEVID_PDC20270,"PDC20270", PCI_PDC202XX, ATA66_PDC202XX, INIT_PDC202XX, NULL, {{0x00,0x00,0x00}, {0x00,0x00,0x00}}, OFF_BOARD, 0 }, + {DEVID_PDC20269,"PDC20269", PCI_PDC202XX, ATA66_PDC202XX, INIT_PDC202XX, NULL, {{0x00,0x00,0x00}, {0x00,0x00,0x00}}, OFF_BOARD, 0 }, + {DEVID_PDC20275,"PDC20275", PCI_PDC202XX, ATA66_PDC202XX, INIT_PDC202XX, NULL, {{0x00,0x00,0x00}, {0x00,0x00,0x00}}, OFF_BOARD, 0 }, + {DEVID_PDC20276,"PDC20276", PCI_PDC202XX, ATA66_PDC202XX, INIT_PDC202XX, NULL, {{0x00,0x00,0x00}, {0x00,0x00,0x00}}, OFF_BOARD, 0 }, + {DEVID_RZ1000, "RZ1000", NULL, NULL, INIT_RZ1000, NULL, {{0x00,0x00,0x00}, {0x00,0x00,0x00}}, ON_BOARD, 0 }, + {DEVID_RZ1001, "RZ1001", NULL, NULL, INIT_RZ1000, NULL, {{0x00,0x00,0x00}, {0x00,0x00,0x00}}, ON_BOARD, 0 }, + {DEVID_SAMURAI, "SAMURAI", NULL, NULL, INIT_SAMURAI, NULL, {{0x00,0x00,0x00}, {0x00,0x00,0x00}}, ON_BOARD, 0 }, + {DEVID_CMD640, "CMD640", NULL, NULL, IDE_IGNORE, NULL, {{0x00,0x00,0x00}, {0x00,0x00,0x00}}, ON_BOARD, 0 }, + {DEVID_NS87410, "NS87410", NULL, NULL, NULL, NULL, {{0x43,0x08,0x08}, {0x47,0x08,0x08}}, ON_BOARD, 0 }, + {DEVID_SIS5513, "SIS5513", PCI_SIS5513, ATA66_SIS5513, INIT_SIS5513, NULL, {{0x4a,0x02,0x02}, {0x4a,0x04,0x04}}, ON_BOARD, 0 }, + {DEVID_CMD643, "CMD643", PCI_CMD64X, NULL, INIT_CMD64X, NULL, {{0x00,0x00,0x00}, {0x00,0x00,0x00}}, ON_BOARD, 0 }, + {DEVID_CMD646, "CMD646", PCI_CMD64X, NULL, INIT_CMD64X, NULL, {{0x00,0x00,0x00}, {0x51,0x80,0x80}}, ON_BOARD, 0 }, + {DEVID_CMD648, "CMD648", PCI_CMD64X, ATA66_CMD64X, INIT_CMD64X, NULL, {{0x00,0x00,0x00}, {0x00,0x00,0x00}}, ON_BOARD, 0 }, + {DEVID_CMD649, "CMD649", PCI_CMD64X, ATA66_CMD64X, INIT_CMD64X, NULL, {{0x00,0x00,0x00}, {0x00,0x00,0x00}}, ON_BOARD, 0 }, +#ifndef CONFIG_BLK_DEV_CMD680 + {DEVID_CMD680, "CMD680", NULL, NULL, NULL, NULL, {{0x00,0x00,0x00}, {0x00,0x00,0x00}}, ON_BOARD, 0 }, +#else /* CONFIG_BLK_DEV_CMD680 */ + {DEVID_CMD680, "CMD680", PCI_CMD64X, ATA66_CMD64X, INIT_CMD64X, NULL, {{0x00,0x00,0x00}, {0x00,0x00,0x00}}, ON_BOARD, 0 }, +#endif /* !CONFIG_BLK_DEV_CMD680 */ + {DEVID_HT6565, "HT6565", NULL, NULL, NULL, NULL, {{0x00,0x00,0x00}, {0x00,0x00,0x00}}, ON_BOARD, 0 }, + {DEVID_OPTI621, "OPTI621", NULL, NULL, INIT_OPTI621, NULL, {{0x45,0x80,0x00}, {0x40,0x08,0x00}}, ON_BOARD, 0 }, + {DEVID_OPTI621X,"OPTI621X", NULL, NULL, INIT_OPTI621, NULL, {{0x45,0x80,0x00}, {0x40,0x08,0x00}}, ON_BOARD, 0 }, + {DEVID_TRM290, "TRM290", NULL, NULL, INIT_TRM290, NULL, {{0x00,0x00,0x00}, {0x00,0x00,0x00}}, ON_BOARD, 0 }, + {DEVID_NS87415, "NS87415", NULL, NULL, INIT_NS87415, NULL, {{0x00,0x00,0x00}, {0x00,0x00,0x00}}, ON_BOARD, 0 }, + {DEVID_AEC6210, "AEC6210", PCI_AEC62XX, NULL, INIT_AEC62XX, DMA_AEC62XX, {{0x4a,0x02,0x02}, {0x4a,0x04,0x04}}, OFF_BOARD, 0 }, + {DEVID_AEC6260, "AEC6260", PCI_AEC62XX, ATA66_AEC62XX, INIT_AEC62XX, NULL, {{0x00,0x00,0x00}, {0x00,0x00,0x00}}, NEVER_BOARD, 0 }, + {DEVID_AEC6260R,"AEC6260R", PCI_AEC62XX, ATA66_AEC62XX, INIT_AEC62XX, NULL, {{0x4a,0x02,0x02}, {0x4a,0x04,0x04}}, OFF_BOARD, 0 }, + {DEVID_W82C105, "W82C105", PCI_W82C105, NULL, INIT_W82C105, DMA_W82C105, {{0x40,0x01,0x01}, {0x40,0x10,0x10}}, ON_BOARD, 0 }, + {DEVID_UM8673F, "UM8673F", NULL, NULL, NULL, NULL, {{0x00,0x00,0x00}, {0x00,0x00,0x00}}, ON_BOARD, 0 }, + {DEVID_UM8886A, "UM8886A", NULL, NULL, NULL, NULL, {{0x00,0x00,0x00}, {0x00,0x00,0x00}}, ON_BOARD, 0 }, + {DEVID_UM8886BF,"UM8886BF", NULL, NULL, NULL, NULL, {{0x00,0x00,0x00}, {0x00,0x00,0x00}}, ON_BOARD, 0 }, + {DEVID_HPT34X, "HPT34X", PCI_HPT34X, NULL, INIT_HPT34X, NULL, {{0x00,0x00,0x00}, {0x00,0x00,0x00}}, NEVER_BOARD, 16 }, + {DEVID_HPT366, "HPT366", PCI_HPT366, ATA66_HPT366, INIT_HPT366, DMA_HPT366, {{0x00,0x00,0x00}, {0x00,0x00,0x00}}, OFF_BOARD, 240 }, + {DEVID_ALI15X3, "ALI15X3", PCI_ALI15X3, ATA66_ALI15X3, INIT_ALI15X3, DMA_ALI15X3, {{0x00,0x00,0x00}, {0x00,0x00,0x00}}, ON_BOARD, 0 }, + {DEVID_CY82C693,"CY82C693", PCI_CY82C693, NULL, INIT_CY82C693, NULL, {{0x00,0x00,0x00}, {0x00,0x00,0x00}}, ON_BOARD, 0 }, + {DEVID_HINT, "HINT_IDE", NULL, NULL, NULL, NULL, {{0x00,0x00,0x00}, {0x00,0x00,0x00}}, ON_BOARD, 0 }, + {DEVID_CS5530, "CS5530", PCI_CS5530, NULL, INIT_CS5530, NULL, {{0x00,0x00,0x00}, {0x00,0x00,0x00}}, ON_BOARD, 0 }, + {DEVID_AMD7401, "AMD7401", NULL, NULL, NULL, DMA_AMD74XX, {{0x40,0x01,0x01}, {0x40,0x02,0x02}}, ON_BOARD, 0 }, + {DEVID_AMD7409, "AMD7409", PCI_AMD74XX, ATA66_AMD74XX, INIT_AMD74XX, DMA_AMD74XX, {{0x40,0x01,0x01}, {0x40,0x02,0x02}}, ON_BOARD, 0 }, + {DEVID_AMD7411, "AMD7411", PCI_AMD74XX, ATA66_AMD74XX, INIT_AMD74XX, DMA_AMD74XX, {{0x40,0x01,0x01}, {0x40,0x02,0x02}}, ON_BOARD, 0 }, + {DEVID_AMD7441, "AMD7441", PCI_AMD74XX, ATA66_AMD74XX, INIT_AMD74XX, DMA_AMD74XX, {{0x40,0x01,0x01}, {0x40,0x02,0x02}}, ON_BOARD, 0 }, + {DEVID_PDCADMA, "PDCADMA", PCI_PDCADMA, ATA66_PDCADMA, INIT_PDCADMA, DMA_PDCADMA, {{0x00,0x00,0x00}, {0x00,0x00,0x00}}, OFF_BOARD, 0 }, + {DEVID_SLC90E66,"SLC90E66", PCI_SLC90E66, ATA66_SLC90E66, INIT_SLC90E66, NULL, {{0x41,0x80,0x80}, {0x43,0x80,0x80}}, ON_BOARD, 0 }, + {DEVID_OSB4, "ServerWorks OSB4", PCI_SVWKS, ATA66_SVWKS, INIT_SVWKS, NULL, {{0x00,0x00,0x00}, {0x00,0x00,0x00}}, ON_BOARD, 0 }, + {DEVID_CSB5, "ServerWorks CSB5", PCI_SVWKS, ATA66_SVWKS, INIT_SVWKS, NULL, {{0x00,0x00,0x00}, {0x00,0x00,0x00}}, ON_BOARD, 0 }, + {DEVID_ITE8172G,"IT8172G", PCI_IT8172, NULL, INIT_IT8172, NULL, {{0x00,0x00,0x00}, {0x40,0x00,0x01}}, ON_BOARD, 0 }, + {IDE_PCI_DEVID_NULL, "PCI_IDE", NULL, NULL, NULL, NULL, {{0x00,0x00,0x00}, {0x00,0x00,0x00}}, ON_BOARD, 0 }}; + +/* + * This allows offboard ide-pci cards the enable a BIOS, verify interrupt + * settings of split-mirror pci-config space, place chipset into init-mode, + * and/or preserve an interrupt if the card is not native ide support. + */ +static unsigned int __init ide_special_settings (struct pci_dev *dev, const char *name) +{ + switch(dev->device) { + case PCI_DEVICE_ID_TTI_HPT366: + case PCI_DEVICE_ID_PROMISE_20246: + case PCI_DEVICE_ID_PROMISE_20262: + case PCI_DEVICE_ID_PROMISE_20265: + case PCI_DEVICE_ID_PROMISE_20267: + case PCI_DEVICE_ID_PROMISE_20268: + case PCI_DEVICE_ID_PROMISE_20270: + case PCI_DEVICE_ID_PROMISE_20269: + case PCI_DEVICE_ID_PROMISE_20275: + case PCI_DEVICE_ID_PROMISE_20276: + case PCI_DEVICE_ID_ARTOP_ATP850UF: + case PCI_DEVICE_ID_ARTOP_ATP860: + case PCI_DEVICE_ID_ARTOP_ATP860R: + return dev->irq; + default: + break; + } + return 0; +} + +/* + * Match a PCI IDE port against an entry in ide_hwifs[], + * based on io_base port if possible. + */ +static ide_hwif_t __init *ide_match_hwif (unsigned long io_base, byte bootable, const char *name) +{ + int h; + ide_hwif_t *hwif; + + /* + * Look for a hwif with matching io_base specified using + * parameters to ide_setup(). + */ + for (h = 0; h < MAX_HWIFS; ++h) { + hwif = &ide_hwifs[h]; + if (hwif->io_ports[IDE_DATA_OFFSET] == io_base) { + if (hwif->chipset == ide_generic) + return hwif; /* a perfect match */ + } + } + /* + * Look for a hwif with matching io_base default value. + * If chipset is "ide_unknown", then claim that hwif slot. + * Otherwise, some other chipset has already claimed it.. :( + */ + for (h = 0; h < MAX_HWIFS; ++h) { + hwif = &ide_hwifs[h]; + if (hwif->io_ports[IDE_DATA_OFFSET] == io_base) { + if (hwif->chipset == ide_unknown) + return hwif; /* match */ + printk("%s: port 0x%04lx already claimed by %s\n", name, io_base, hwif->name); + return NULL; /* already claimed */ + } + } + /* + * Okay, there is no hwif matching our io_base, + * so we'll just claim an unassigned slot. + * Give preference to claiming other slots before claiming ide0/ide1, + * just in case there's another interface yet-to-be-scanned + * which uses ports 1f0/170 (the ide0/ide1 defaults). + * + * Unless there is a bootable card that does not use the standard + * ports 1f0/170 (the ide0/ide1 defaults). The (bootable) flag. + */ + if (bootable) { + for (h = 0; h < MAX_HWIFS; ++h) { + hwif = &ide_hwifs[h]; + if (hwif->chipset == ide_unknown) + return hwif; /* pick an unused entry */ + } + } else { + for (h = 2; h < MAX_HWIFS; ++h) { + hwif = ide_hwifs + h; + if (hwif->chipset == ide_unknown) + return hwif; /* pick an unused entry */ + } + } + for (h = 0; h < 2; ++h) { + hwif = ide_hwifs + h; + if (hwif->chipset == ide_unknown) + return hwif; /* pick an unused entry */ + } + printk("%s: too many IDE interfaces, no room in table\n", name); + return NULL; +} + +static int __init ide_setup_pci_baseregs (struct pci_dev *dev, const char *name) +{ + byte reg, progif = 0; + + /* + * Place both IDE interfaces into PCI "native" mode: + */ + if (pci_read_config_byte(dev, PCI_CLASS_PROG, &progif) || (progif & 5) != 5) { + if ((progif & 0xa) != 0xa) { + printk("%s: device not capable of full native PCI mode\n", name); + return 1; + } + printk("%s: placing both ports into native PCI mode\n", name); + (void) pci_write_config_byte(dev, PCI_CLASS_PROG, progif|5); + if (pci_read_config_byte(dev, PCI_CLASS_PROG, &progif) || (progif & 5) != 5) { + printk("%s: rewrite of PROGIF failed, wanted 0x%04x, got 0x%04x\n", name, progif|5, progif); + return 1; + } + } + /* + * Setup base registers for IDE command/control spaces for each interface: + */ + for (reg = 0; reg < 4; reg++) { + struct resource *res = dev->resource + reg; + if ((res->flags & IORESOURCE_IO) == 0) + continue; + if (!res->start) { + printk("%s: Missing I/O address #%d\n", name, reg); + return 1; + } + } + return 0; +} + +/* + * ide_setup_pci_device() looks at the primary/secondary interfaces + * on a PCI IDE device and, if they are enabled, prepares the IDE driver + * for use with them. This generic code works for most PCI chipsets. + * + * One thing that is not standardized is the location of the + * primary/secondary interface "enable/disable" bits. For chipsets that + * we "know" about, this information is in the ide_pci_device_t struct; + * for all other chipsets, we just assume both interfaces are enabled. + */ +static void __init ide_setup_pci_device (struct pci_dev *dev, ide_pci_device_t *d) +{ + unsigned int port, at_least_one_hwif_enabled = 0, autodma = 0, pciirq = 0; + unsigned short pcicmd = 0, tried_config = 0; + byte tmp = 0; + ide_hwif_t *hwif, *mate = NULL; + unsigned int class_rev; + static int secondpdc = 0; + +#ifdef CONFIG_IDEDMA_AUTO + if (!noautodma) + autodma = 1; +#endif + + if (d->init_hwif == IDE_NO_DRIVER) { + printk(KERN_WARNING "%s: detected chipset, but driver not compiled in!\n", d->name); + d->init_hwif = NULL; + } + + if (pci_enable_device(dev)) { + printk(KERN_WARNING "%s: (ide_setup_pci_device:) Could not enable device.\n", d->name); + return; + } + +check_if_enabled: + if (pci_read_config_word(dev, PCI_COMMAND, &pcicmd)) { + printk("%s: error accessing PCI regs\n", d->name); + return; + } + if (!(pcicmd & PCI_COMMAND_IO)) { /* is device disabled? */ + /* + * PnP BIOS was *supposed* to have set this device up for us, + * but we can do it ourselves, so long as the BIOS has assigned an IRQ + * (or possibly the device is using a "legacy header" for IRQs). + * Maybe the user deliberately *disabled* the device, + * but we'll eventually ignore it again if no drives respond. + */ + if (tried_config++ + || ide_setup_pci_baseregs(dev, d->name) + || pci_write_config_word(dev, PCI_COMMAND, pcicmd | PCI_COMMAND_IO)) { + printk("%s: device disabled (BIOS)\n", d->name); + return; + } + autodma = 0; /* default DMA off if we had to configure it here */ + goto check_if_enabled; + } + if (tried_config) + printk("%s: device enabled (Linux)\n", d->name); + + pci_read_config_dword(dev, PCI_CLASS_REVISION, &class_rev); + class_rev &= 0xff; + + if (IDE_PCI_DEVID_EQ(d->devid, DEVID_HPT34X)) { + /* see comments in hpt34x.c on why..... */ + char *chipset_names[] = {"HPT343", "HPT345"}; + strcpy(d->name, chipset_names[(pcicmd & PCI_COMMAND_MEMORY) ? 1 : 0]); + d->bootable = (pcicmd & PCI_COMMAND_MEMORY) ? OFF_BOARD : NEVER_BOARD; + } + + printk("%s: chipset revision %d\n", d->name, class_rev); + + /* + * Can we trust the reported IRQ? + */ + pciirq = dev->irq; + +#ifdef CONFIG_PDC202XX_FORCE + if (dev->class >> 8 == PCI_CLASS_STORAGE_RAID) { + /* + * By rights we want to ignore Promise FastTrak and SuperTrak + * series here, those use own driver. + */ + if (dev->vendor == PCI_VENDOR_ID_PROMISE) { + printk(KERN_INFO "ide: Skipping Promise RAID controller.\n"); + return; + } + } +#endif /* CONFIG_PDC202XX_FORCE */ + if ((dev->class & ~(0xfa)) != ((PCI_CLASS_STORAGE_IDE << 8) | 5)) { + printk("%s: not 100%% native mode: will probe irqs later\n", d->name); + /* + * This allows offboard ide-pci cards the enable a BIOS, + * verify interrupt settings of split-mirror pci-config + * space, place chipset into init-mode, and/or preserve + * an interrupt if the card is not native ide support. + */ + pciirq = (d->init_chipset) ? d->init_chipset(dev, d->name) : ide_special_settings(dev, d->name); + } else if (tried_config) { + printk("%s: will probe irqs later\n", d->name); + pciirq = 0; + } else if (!pciirq) { + printk("%s: bad irq (%d): will probe later\n", d->name, pciirq); + pciirq = 0; + } else { + if (d->init_chipset) + (void) d->init_chipset(dev, d->name); +#ifdef __sparc__ + printk("%s: 100%% native mode on irq %s\n", + d->name, __irq_itoa(pciirq)); +#else + printk("%s: 100%% native mode on irq %d\n", d->name, pciirq); +#endif + } + + /* + * Set up the IDE ports + */ + for (port = 0; port <= 1; ++port) { + unsigned long base = 0, ctl = 0; + ide_pci_enablebit_t *e = &(d->enablebits[port]); + + /* + * If this is a Promise FakeRaid controller, the 2nd controller will be marked as + * disabled while it is actually there and enabled by the bios for raid purposes. + * Skip the normal "is it enabled" test for those. + */ + if ((IDE_PCI_DEVID_EQ(d->devid, DEVID_PDC20265)) && (secondpdc++==1) && (port==1) ) + goto controller_ok; + if ((IDE_PCI_DEVID_EQ(d->devid, DEVID_PDC20262)) && (secondpdc++==1) && (port==1) ) + goto controller_ok; + + if (e->reg && (pci_read_config_byte(dev, e->reg, &tmp) || (tmp & e->mask) != e->val)) + continue; /* port not enabled */ +controller_ok: + if (IDE_PCI_DEVID_EQ(d->devid, DEVID_HPT366) && (port) && (class_rev < 0x03)) + return; + if ((dev->class >> 8) != PCI_CLASS_STORAGE_IDE || (dev->class & (port ? 4 : 1)) != 0) { + ctl = dev->resource[(2*port)+1].start; + base = dev->resource[2*port].start; + if (!(ctl & PCI_BASE_ADDRESS_IO_MASK) || + !(base & PCI_BASE_ADDRESS_IO_MASK)) { + printk("%s: IO baseregs (BIOS) are reported as MEM, report to .\n", d->name); +#if 0 + /* FIXME! This really should check that it really gets the IO/MEM part right! */ + continue; +#endif + } + } + if ((ctl && !base) || (base && !ctl)) { + printk("%s: inconsistent baseregs (BIOS) for port %d, skipping\n", d->name, port); + continue; + } + if (!ctl) + ctl = port ? 0x374 : 0x3f4; /* use default value */ + if (!base) + base = port ? 0x170 : 0x1f0; /* use default value */ + if ((hwif = ide_match_hwif(base, d->bootable, d->name)) == NULL) + continue; /* no room in ide_hwifs[] */ + if (hwif->io_ports[IDE_DATA_OFFSET] != base) { + ide_init_hwif_ports(&hwif->hw, base, (ctl | 2), NULL); + memcpy(hwif->io_ports, hwif->hw.io_ports, sizeof(hwif->io_ports)); + hwif->noprobe = !hwif->io_ports[IDE_DATA_OFFSET]; + } + hwif->chipset = ide_pci; + hwif->pci_dev = dev; + hwif->pci_devid = d->devid; + hwif->channel = port; + if (!hwif->irq) + hwif->irq = pciirq; + if (mate) { + hwif->mate = mate; + mate->mate = hwif; + if (IDE_PCI_DEVID_EQ(d->devid, DEVID_AEC6210)) { + hwif->serialized = 1; + mate->serialized = 1; + } + } + if (IDE_PCI_DEVID_EQ(d->devid, DEVID_UM8886A) || + IDE_PCI_DEVID_EQ(d->devid, DEVID_UM8886BF) || + IDE_PCI_DEVID_EQ(d->devid, DEVID_UM8673F)) { + hwif->irq = hwif->channel ? 15 : 14; + goto bypass_umc_dma; + } + if (IDE_PCI_DEVID_EQ(d->devid, DEVID_MPIIX)) + goto bypass_piix_dma; + if (IDE_PCI_DEVID_EQ(d->devid, DEVID_PDCADMA)) + goto bypass_legacy_dma; + if (hwif->udma_four) { + printk("%s: ATA-66/100 forced bit set (WARNING)!!\n", d->name); + } else { + hwif->udma_four = (d->ata66_check) ? d->ata66_check(hwif) : 0; + } +#ifdef CONFIG_BLK_DEV_IDEDMA + if (IDE_PCI_DEVID_EQ(d->devid, DEVID_SIS5513) || + IDE_PCI_DEVID_EQ(d->devid, DEVID_AEC6260) || + IDE_PCI_DEVID_EQ(d->devid, DEVID_PIIX4NX) || + IDE_PCI_DEVID_EQ(d->devid, DEVID_HPT34X) || + IDE_PCI_DEVID_EQ(d->devid, DEVID_VIA_IDE) || + IDE_PCI_DEVID_EQ(d->devid, DEVID_MR_IDE) || + IDE_PCI_DEVID_EQ(d->devid, DEVID_VP_IDE)) + autodma = 0; + if (autodma) + hwif->autodma = 1; + + if (IDE_PCI_DEVID_EQ(d->devid, DEVID_PDC20246) || + IDE_PCI_DEVID_EQ(d->devid, DEVID_PDC20262) || + IDE_PCI_DEVID_EQ(d->devid, DEVID_PDC20265) || + IDE_PCI_DEVID_EQ(d->devid, DEVID_PDC20267) || + IDE_PCI_DEVID_EQ(d->devid, DEVID_PDC20268) || + IDE_PCI_DEVID_EQ(d->devid, DEVID_PDC20270) || + IDE_PCI_DEVID_EQ(d->devid, DEVID_PDC20269) || + IDE_PCI_DEVID_EQ(d->devid, DEVID_PDC20275) || + IDE_PCI_DEVID_EQ(d->devid, DEVID_PDC20276) || + IDE_PCI_DEVID_EQ(d->devid, DEVID_AEC6210) || + IDE_PCI_DEVID_EQ(d->devid, DEVID_AEC6260) || + IDE_PCI_DEVID_EQ(d->devid, DEVID_AEC6260R) || + IDE_PCI_DEVID_EQ(d->devid, DEVID_HPT34X) || + IDE_PCI_DEVID_EQ(d->devid, DEVID_HPT366) || + IDE_PCI_DEVID_EQ(d->devid, DEVID_CS5530) || + IDE_PCI_DEVID_EQ(d->devid, DEVID_CY82C693) || + IDE_PCI_DEVID_EQ(d->devid, DEVID_CMD646) || + IDE_PCI_DEVID_EQ(d->devid, DEVID_CMD648) || + IDE_PCI_DEVID_EQ(d->devid, DEVID_CMD649) || + IDE_PCI_DEVID_EQ(d->devid, DEVID_CMD680) || + IDE_PCI_DEVID_EQ(d->devid, DEVID_OSB4) || + ((dev->class >> 8) == PCI_CLASS_STORAGE_IDE && (dev->class & 0x80))) { + unsigned long dma_base = ide_get_or_set_dma_base(hwif, (!mate && d->extra) ? d->extra : 0, d->name); + if (dma_base && !(pcicmd & PCI_COMMAND_MASTER)) { + /* + * Set up BM-DMA capability (PnP BIOS should have done this) + */ + if (!IDE_PCI_DEVID_EQ(d->devid, DEVID_CS5530)) + hwif->autodma = 0; /* default DMA off if we had to configure it here */ + (void) pci_write_config_word(dev, PCI_COMMAND, pcicmd | PCI_COMMAND_MASTER); + if (pci_read_config_word(dev, PCI_COMMAND, &pcicmd) || !(pcicmd & PCI_COMMAND_MASTER)) { + printk("%s: %s error updating PCICMD\n", hwif->name, d->name); + dma_base = 0; + } + } + if (dma_base) { + if (d->dma_init) { + d->dma_init(hwif, dma_base); + } else { + ide_setup_dma(hwif, dma_base, 8); + } + } else { + printk("%s: %s Bus-Master DMA disabled (BIOS)\n", hwif->name, d->name); + } + } +#endif /* CONFIG_BLK_DEV_IDEDMA */ +bypass_legacy_dma: +bypass_piix_dma: +bypass_umc_dma: + if (d->init_hwif) /* Call chipset-specific routine for each enabled hwif */ + d->init_hwif(hwif); + mate = hwif; + at_least_one_hwif_enabled = 1; + } + if (!at_least_one_hwif_enabled) + printk("%s: neither IDE port enabled (BIOS)\n", d->name); +} + +static void __init pdc20270_device_order_fixup (struct pci_dev *dev, ide_pci_device_t *d) +{ + struct pci_dev *dev2 = NULL, *findev; + ide_pci_device_t *d2; + + if ((dev->bus->self && + dev->bus->self->vendor == PCI_VENDOR_ID_DEC) && + (dev->bus->self->device == PCI_DEVICE_ID_DEC_21150)) { + if (PCI_SLOT(dev->devfn) & 2) { + return; + } + d->extra = 0; + pci_for_each_dev(findev) { + if ((findev->vendor == dev->vendor) && + (findev->device == dev->device) && + (PCI_SLOT(findev->devfn) & 2)) { + byte irq = 0, irq2 = 0; + dev2 = findev; + pci_read_config_byte(dev, PCI_INTERRUPT_LINE, &irq); + pci_read_config_byte(dev2, PCI_INTERRUPT_LINE, &irq2); + if (irq != irq2) { + dev2->irq = dev->irq; + pci_write_config_byte(dev2, PCI_INTERRUPT_LINE, irq); + } + + } + } + } + + printk("%s: IDE controller on PCI bus %02x dev %02x\n", d->name, dev->bus->number, dev->devfn); + ide_setup_pci_device(dev, d); + if (!dev2) + return; + d2 = d; + printk("%s: IDE controller on PCI bus %02x dev %02x\n", d2->name, dev2->bus->number, dev2->devfn); + ide_setup_pci_device(dev2, d2); +} + +static void __init hpt366_device_order_fixup (struct pci_dev *dev, ide_pci_device_t *d) +{ + struct pci_dev *dev2 = NULL, *findev; + ide_pci_device_t *d2; + unsigned char pin1 = 0, pin2 = 0; + unsigned int class_rev; + char *chipset_names[] = {"HPT366", "HPT366", "HPT368", "HPT370", "HPT370A", "HPT372"}; + + if (PCI_FUNC(dev->devfn) & 1) + return; + + pci_read_config_dword(dev, PCI_CLASS_REVISION, &class_rev); + class_rev &= 0xff; + if (class_rev > 5) + class_rev = 5; + + strcpy(d->name, chipset_names[class_rev]); + + switch(class_rev) { + case 4: + case 3: printk("%s: IDE controller on PCI bus %02x dev %02x\n", d->name, dev->bus->number, dev->devfn); + ide_setup_pci_device(dev, d); + return; + default: break; + } + + pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin1); + pci_for_each_dev(findev) { + if ((findev->vendor == dev->vendor) && + (findev->device == dev->device) && + ((findev->devfn - dev->devfn) == 1) && + (PCI_FUNC(findev->devfn) & 1)) { + dev2 = findev; + pci_read_config_byte(dev2, PCI_INTERRUPT_PIN, &pin2); + hpt363_shared_pin = (pin1 != pin2) ? 1 : 0; + hpt363_shared_irq = (dev->irq == dev2->irq) ? 1 : 0; + if (hpt363_shared_pin && hpt363_shared_irq) { + d->bootable = ON_BOARD; + printk("%s: onboard version of chipset, pin1=%d pin2=%d\n", d->name, pin1, pin2); +#if 0 + /* I forgot why I did this once, but it fixed something. */ + pci_write_config_byte(dev2, PCI_INTERRUPT_PIN, dev->irq); + printk("PCI: %s: Fixing interrupt %d pin %d to ZERO \n", d->name, dev2->irq, pin2); + pci_write_config_byte(dev2, PCI_INTERRUPT_LINE, 0); +#endif + } + break; + } + } + printk("%s: IDE controller on PCI bus %02x dev %02x\n", d->name, dev->bus->number, dev->devfn); + ide_setup_pci_device(dev, d); + if (!dev2) + return; + d2 = d; + printk("%s: IDE controller on PCI bus %02x dev %02x\n", d2->name, dev2->bus->number, dev2->devfn); + ide_setup_pci_device(dev2, d2); +} + +/* + * ide_scan_pcibus() gets invoked at boot time from ide.c. + * It finds all PCI IDE controllers and calls ide_setup_pci_device for them. + */ +void __init ide_scan_pcidev (struct pci_dev *dev) +{ + ide_pci_devid_t devid; + ide_pci_device_t *d; + + devid.vid = dev->vendor; + devid.did = dev->device; + for (d = ide_pci_chipsets; d->devid.vid && !IDE_PCI_DEVID_EQ(d->devid, devid); ++d); + if (d->init_hwif == IDE_IGNORE) + printk("%s: ignored by ide_scan_pci_device() (uses own driver)\n", d->name); + else if (IDE_PCI_DEVID_EQ(d->devid, DEVID_OPTI621V) && !(PCI_FUNC(dev->devfn) & 1)) + return; + else if (IDE_PCI_DEVID_EQ(d->devid, DEVID_CY82C693) && (!(PCI_FUNC(dev->devfn) & 1) || !((dev->class >> 8) == PCI_CLASS_STORAGE_IDE))) + return; /* CY82C693 is more than only a IDE controller */ + else if (IDE_PCI_DEVID_EQ(d->devid, DEVID_ITE8172G) && (!(PCI_FUNC(dev->devfn) & 1) || !((dev->class >> 8) == PCI_CLASS_STORAGE_IDE))) + return; /* IT8172G is also more than only an IDE controller */ + else if (IDE_PCI_DEVID_EQ(d->devid, DEVID_UM8886A) && !(PCI_FUNC(dev->devfn) & 1)) + return; /* UM8886A/BF pair */ + else if (IDE_PCI_DEVID_EQ(d->devid, DEVID_HPT366)) + hpt366_device_order_fixup(dev, d); + else if (IDE_PCI_DEVID_EQ(d->devid, DEVID_PDC20270)) + pdc20270_device_order_fixup(dev, d); + else if (!IDE_PCI_DEVID_EQ(d->devid, IDE_PCI_DEVID_NULL) || (dev->class >> 8) == PCI_CLASS_STORAGE_IDE) { + if (IDE_PCI_DEVID_EQ(d->devid, IDE_PCI_DEVID_NULL)) + printk("%s: unknown IDE controller on PCI bus %02x device %02x, VID=%04x, DID=%04x\n", + d->name, dev->bus->number, dev->devfn, devid.vid, devid.did); + else + printk("%s: IDE controller on PCI bus %02x dev %02x\n", d->name, dev->bus->number, dev->devfn); + ide_setup_pci_device(dev, d); + } +} + +void __init ide_scan_pcibus (int scan_direction) +{ + struct pci_dev *dev; + + if (!scan_direction) { + pci_for_each_dev(dev) { + ide_scan_pcidev(dev); + } + } else { + pci_for_each_dev_reverse(dev) { + ide_scan_pcidev(dev); + } + } +} diff --git a/xen-2.4.16/drivers/ide/ide-probe.c b/xen-2.4.16/drivers/ide/ide-probe.c new file mode 100644 index 0000000000..e83157ec01 --- /dev/null +++ b/xen-2.4.16/drivers/ide/ide-probe.c @@ -0,0 +1,1023 @@ +/* + * linux/drivers/ide/ide-probe.c Version 1.07 March 18, 2001 + * + * Copyright (C) 1994-1998 Linus Torvalds & authors (see below) + */ + +/* + * Mostly written by Mark Lord + * and Gadi Oxman + * and Andre Hedrick + * + * See linux/MAINTAINERS for address of current maintainer. + * + * This is the IDE probe module, as evolved from hd.c and ide.c. + * + * Version 1.00 move drive probing code from ide.c to ide-probe.c + * Version 1.01 fix compilation problem for m68k + * Version 1.02 increase WAIT_PIDENTIFY to avoid CD-ROM locking at boot + * by Andrea Arcangeli + * Version 1.03 fix for (hwif->chipset == ide_4drives) + * Version 1.04 fixed buggy treatments of known flash memory cards + * + * Version 1.05 fix for (hwif->chipset == ide_pdc4030) + * added ide6/7/8/9 + * allowed for secondary flash card to be detectable + * with new flag : drive->ata_flash : 1; + * Version 1.06 stream line request queue and prep for cascade project. + * Version 1.07 max_sect <= 255; slower disks would get behind and + * then fall over when they get to 256. Paul G. + */ + +#undef REALLY_SLOW_IO /* most systems can safely undef this */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +static inline void do_identify (ide_drive_t *drive, byte cmd) +{ + int bswap = 1; + struct hd_driveid *id; + + id = drive->id = kmalloc (SECTOR_WORDS*4, GFP_ATOMIC); /* called with interrupts disabled! */ + if (!id) { + printk(KERN_WARNING "(ide-probe::do_identify) Out of memory.\n"); + goto err_kmalloc; + } + + ide_input_data(drive, id, SECTOR_WORDS); /* read 512 bytes of id info */ + ide__sti(); /* local CPU only */ + ide_fix_driveid(id); + + if (id->word156 == 0x4d42) { + printk("%s: drive->id->word156 == 0x%04x \n", drive->name, drive->id->word156); + } + + if (!drive->forced_lun) + drive->last_lun = id->last_lun & 0x7; +#if defined (CONFIG_SCSI_EATA_DMA) || defined (CONFIG_SCSI_EATA_PIO) || defined (CONFIG_SCSI_EATA) + /* + * EATA SCSI controllers do a hardware ATA emulation: + * Ignore them if there is a driver for them available. + */ + if ((id->model[0] == 'P' && id->model[1] == 'M') + || (id->model[0] == 'S' && id->model[1] == 'K')) { + printk("%s: EATA SCSI HBA %.10s\n", drive->name, id->model); + goto err_misc; + } +#endif /* CONFIG_SCSI_EATA_DMA || CONFIG_SCSI_EATA_PIO */ + + /* + * WIN_IDENTIFY returns little-endian info, + * WIN_PIDENTIFY *usually* returns little-endian info. + */ + if (cmd == WIN_PIDENTIFY) { + if ((id->model[0] == 'N' && id->model[1] == 'E') /* NEC */ + || (id->model[0] == 'F' && id->model[1] == 'X') /* Mitsumi */ + || (id->model[0] == 'P' && id->model[1] == 'i'))/* Pioneer */ + bswap ^= 1; /* Vertos drives may still be weird */ + } + ide_fixstring (id->model, sizeof(id->model), bswap); + ide_fixstring (id->fw_rev, sizeof(id->fw_rev), bswap); + ide_fixstring (id->serial_no, sizeof(id->serial_no), bswap); + + if (strstr(id->model, "E X A B Y T E N E S T")) + goto err_misc; + + id->model[sizeof(id->model)-1] = '\0'; /* we depend on this a lot! */ + printk("%s: %s, ", drive->name, id->model); + drive->present = 1; + + /* + * Check for an ATAPI device + */ + if (cmd == WIN_PIDENTIFY) { + byte type = (id->config >> 8) & 0x1f; + printk("ATAPI "); +#ifdef CONFIG_BLK_DEV_PDC4030 + if (HWIF(drive)->channel == 1 && HWIF(drive)->chipset == ide_pdc4030) { + printk(" -- not supported on 2nd Promise port\n"); + goto err_misc; + } +#endif /* CONFIG_BLK_DEV_PDC4030 */ + switch (type) { + case ide_floppy: + if (!strstr(id->model, "CD-ROM")) { + if (!strstr(id->model, "oppy") && !strstr(id->model, "poyp") && !strstr(id->model, "ZIP")) + printk("cdrom or floppy?, assuming "); + if (drive->media != ide_cdrom) { + printk ("FLOPPY"); + break; + } + } + type = ide_cdrom; /* Early cdrom models used zero */ + case ide_cdrom: + drive->removable = 1; +#ifdef CONFIG_PPC + /* kludge for Apple PowerBook internal zip */ + if (!strstr(id->model, "CD-ROM") && strstr(id->model, "ZIP")) { + printk ("FLOPPY"); + type = ide_floppy; + break; + } +#endif + printk ("CD/DVD-ROM"); + break; + case ide_tape: + printk ("TAPE"); + break; + case ide_optical: + printk ("OPTICAL"); + drive->removable = 1; + break; + default: + printk("UNKNOWN (type %d)", type); + break; + } + printk (" drive\n"); + drive->media = type; + return; + } + + /* + * Not an ATAPI device: looks like a "regular" hard disk + */ + if (id->config & (1<<7)) + drive->removable = 1; + /* + * Prevent long system lockup probing later for non-existant + * slave drive if the hwif is actually a flash memory card of some variety: + */ + if (drive_is_flashcard(drive)) { + ide_drive_t *mate = &HWIF(drive)->drives[1^drive->select.b.unit]; + if (!mate->ata_flash) { + mate->present = 0; + mate->noprobe = 1; + } + } + drive->media = ide_disk; + printk("ATA DISK drive\n"); + QUIRK_LIST(HWIF(drive),drive); + return; + +err_misc: + kfree(id); +err_kmalloc: + drive->present = 0; + return; +} + +/* + * try_to_identify() sends an ATA(PI) IDENTIFY request to a drive + * and waits for a response. It also monitors irqs while this is + * happening, in hope of automatically determining which one is + * being used by the interface. + * + * Returns: 0 device was identified + * 1 device timed-out (no response to identify request) + * 2 device aborted the command (refused to identify itself) + */ +static int actual_try_to_identify (ide_drive_t *drive, byte cmd) +{ + int rc; + ide_ioreg_t hd_status; + unsigned long timeout; + byte s, a; + + if (IDE_CONTROL_REG) { + /* take a deep breath */ + ide_delay_50ms(); + a = IN_BYTE(IDE_ALTSTATUS_REG); + s = IN_BYTE(IDE_STATUS_REG); + if ((a ^ s) & ~INDEX_STAT) { + printk("%s: probing with STATUS(0x%02x) instead of ALTSTATUS(0x%02x)\n", drive->name, s, a); + hd_status = IDE_STATUS_REG; /* ancient Seagate drives, broken interfaces */ + } else { + hd_status = IDE_ALTSTATUS_REG; /* use non-intrusive polling */ + } + } else { + ide_delay_50ms(); + hd_status = IDE_STATUS_REG; + } + + /* set features register for atapi identify command to be sure of reply */ + if ((cmd == WIN_PIDENTIFY)) + OUT_BYTE(0,IDE_FEATURE_REG); /* disable dma & overlap */ + +#if CONFIG_BLK_DEV_PDC4030 + if (HWIF(drive)->chipset == ide_pdc4030) { + /* DC4030 hosted drives need their own identify... */ + extern int pdc4030_identify(ide_drive_t *); + if (pdc4030_identify(drive)) { + return 1; + } + } else +#endif /* CONFIG_BLK_DEV_PDC4030 */ + OUT_BYTE(cmd,IDE_COMMAND_REG); /* ask drive for ID */ + timeout = ((cmd == WIN_IDENTIFY) ? WAIT_WORSTCASE : WAIT_PIDENTIFY) / 2; + timeout += jiffies; + do { + if (0 < (signed long)(jiffies - timeout)) { + return 1; /* drive timed-out */ + } + ide_delay_50ms(); /* give drive a breather */ + } while (IN_BYTE(hd_status) & BUSY_STAT); + + ide_delay_50ms(); /* wait for IRQ and DRQ_STAT */ + if (OK_STAT(GET_STAT(),DRQ_STAT,BAD_R_STAT)) { + unsigned long flags; + __save_flags(flags); /* local CPU only */ + __cli(); /* local CPU only; some systems need this */ + do_identify(drive, cmd); /* drive returned ID */ + rc = 0; /* drive responded with ID */ + (void) GET_STAT(); /* clear drive IRQ */ + __restore_flags(flags); /* local CPU only */ + } else + rc = 2; /* drive refused ID */ + return rc; +} + +static int try_to_identify (ide_drive_t *drive, byte cmd) +{ + int retval; + int autoprobe = 0; + unsigned long cookie = 0; + + if (IDE_CONTROL_REG && !HWIF(drive)->irq) { + autoprobe = 1; + cookie = probe_irq_on(); + OUT_BYTE(drive->ctl,IDE_CONTROL_REG); /* enable device irq */ + } + + retval = actual_try_to_identify(drive, cmd); + + if (autoprobe) { + int irq; + OUT_BYTE(drive->ctl|2,IDE_CONTROL_REG); /* mask device irq */ + (void) GET_STAT(); /* clear drive IRQ */ + udelay(5); + irq = probe_irq_off(cookie); + if (!HWIF(drive)->irq) { + if (irq > 0) { + HWIF(drive)->irq = irq; + } else { /* Mmmm.. multiple IRQs.. don't know which was ours */ + printk("%s: IRQ probe failed (0x%lx)\n", drive->name, cookie); +#ifdef CONFIG_BLK_DEV_CMD640 +#ifdef CMD640_DUMP_REGS + if (HWIF(drive)->chipset == ide_cmd640) { + printk("%s: Hmmm.. probably a driver problem.\n", drive->name); + CMD640_DUMP_REGS; + } +#endif /* CMD640_DUMP_REGS */ +#endif /* CONFIG_BLK_DEV_CMD640 */ + } + } + } + return retval; +} + + +/* + * do_probe() has the difficult job of finding a drive if it exists, + * without getting hung up if it doesn't exist, without trampling on + * ethernet cards, and without leaving any IRQs dangling to haunt us later. + * + * If a drive is "known" to exist (from CMOS or kernel parameters), + * but does not respond right away, the probe will "hang in there" + * for the maximum wait time (about 30 seconds), otherwise it will + * exit much more quickly. + * + * Returns: 0 device was identified + * 1 device timed-out (no response to identify request) + * 2 device aborted the command (refused to identify itself) + * 3 bad status from device (possible for ATAPI drives) + * 4 probe was not attempted because failure was obvious + */ +static int do_probe (ide_drive_t *drive, byte cmd) +{ + int rc; + ide_hwif_t *hwif = HWIF(drive); + if (drive->present) { /* avoid waiting for inappropriate probes */ + if ((drive->media != ide_disk) && (cmd == WIN_IDENTIFY)) + return 4; + } +#ifdef DEBUG + printk("probing for %s: present=%d, media=%d, probetype=%s\n", + drive->name, drive->present, drive->media, + (cmd == WIN_IDENTIFY) ? "ATA" : "ATAPI"); +#endif + ide_delay_50ms(); /* needed for some systems (e.g. crw9624 as drive0 with disk as slave) */ + SELECT_DRIVE(hwif,drive); + ide_delay_50ms(); + if (IN_BYTE(IDE_SELECT_REG) != drive->select.all && !drive->present) { + if (drive->select.b.unit != 0) { + SELECT_DRIVE(hwif,&hwif->drives[0]); /* exit with drive0 selected */ + ide_delay_50ms(); /* allow BUSY_STAT to assert & clear */ + } + return 3; /* no i/f present: mmm.. this should be a 4 -ml */ + } + + if (OK_STAT(GET_STAT(),READY_STAT,BUSY_STAT) + || drive->present || cmd == WIN_PIDENTIFY) + { + if ((rc = try_to_identify(drive,cmd))) /* send cmd and wait */ + rc = try_to_identify(drive,cmd); /* failed: try again */ + if (rc == 1 && cmd == WIN_PIDENTIFY && drive->autotune != 2) { + unsigned long timeout; + printk("%s: no response (status = 0x%02x), resetting drive\n", drive->name, GET_STAT()); + ide_delay_50ms(); + OUT_BYTE (drive->select.all, IDE_SELECT_REG); + ide_delay_50ms(); + OUT_BYTE(WIN_SRST, IDE_COMMAND_REG); + timeout = jiffies; + while ((GET_STAT() & BUSY_STAT) && time_before(jiffies, timeout + WAIT_WORSTCASE)) + ide_delay_50ms(); + rc = try_to_identify(drive, cmd); + } + if (rc == 1) + printk("%s: no response (status = 0x%02x)\n", drive->name, GET_STAT()); + (void) GET_STAT(); /* ensure drive irq is clear */ + } else { + rc = 3; /* not present or maybe ATAPI */ + } + if (drive->select.b.unit != 0) { + SELECT_DRIVE(hwif,&hwif->drives[0]); /* exit with drive0 selected */ + ide_delay_50ms(); + (void) GET_STAT(); /* ensure drive irq is clear */ + } + return rc; +} + +/* + * + */ +static void enable_nest (ide_drive_t *drive) +{ + unsigned long timeout; + + printk("%s: enabling %s -- ", HWIF(drive)->name, drive->id->model); + SELECT_DRIVE(HWIF(drive), drive); + ide_delay_50ms(); + OUT_BYTE(EXABYTE_ENABLE_NEST, IDE_COMMAND_REG); + timeout = jiffies + WAIT_WORSTCASE; + do { + if (time_after(jiffies, timeout)) { + printk("failed (timeout)\n"); + return; + } + ide_delay_50ms(); + } while (GET_STAT() & BUSY_STAT); + ide_delay_50ms(); + if (!OK_STAT(GET_STAT(), 0, BAD_STAT)) + printk("failed (status = 0x%02x)\n", GET_STAT()); + else + printk("success\n"); + if (do_probe(drive, WIN_IDENTIFY) >= 2) { /* if !(success||timed-out) */ + (void) do_probe(drive, WIN_PIDENTIFY); /* look for ATAPI device */ + } +} + +/* + * probe_for_drive() tests for existence of a given drive using do_probe(). + * + * Returns: 0 no device was found + * 1 device was found (note: drive->present might still be 0) + */ +static inline byte probe_for_drive (ide_drive_t *drive) +{ + if (drive->noprobe) /* skip probing? */ + return drive->present; + if (do_probe(drive, WIN_IDENTIFY) >= 2) { /* if !(success||timed-out) */ + (void) do_probe(drive, WIN_PIDENTIFY); /* look for ATAPI device */ + } + if (drive->id && strstr(drive->id->model, "E X A B Y T E N E S T")) + enable_nest(drive); + if (!drive->present) + return 0; /* drive not found */ + if (drive->id == NULL) { /* identification failed? */ + if (drive->media == ide_disk) { + printk ("%s: non-IDE drive, CHS=%d/%d/%d\n", + drive->name, drive->cyl, drive->head, drive->sect); + } else if (drive->media == ide_cdrom) { + printk("%s: ATAPI cdrom (?)\n", drive->name); + } else { + drive->present = 0; /* nuke it */ + } + } + return 1; /* drive was found */ +} + +/* + * Calculate the region that this interface occupies, + * handling interfaces where the registers may not be + * ordered sanely. We deal with the CONTROL register + * separately. + */ +static int hwif_check_regions (ide_hwif_t *hwif) +{ + int region_errors = 0; + + hwif->straight8 = 0; + region_errors = ide_check_region(hwif->io_ports[IDE_DATA_OFFSET], 1); + region_errors += ide_check_region(hwif->io_ports[IDE_ERROR_OFFSET], 1); + region_errors += ide_check_region(hwif->io_ports[IDE_NSECTOR_OFFSET], 1); + region_errors += ide_check_region(hwif->io_ports[IDE_SECTOR_OFFSET], 1); + region_errors += ide_check_region(hwif->io_ports[IDE_LCYL_OFFSET], 1); + region_errors += ide_check_region(hwif->io_ports[IDE_HCYL_OFFSET], 1); + region_errors += ide_check_region(hwif->io_ports[IDE_SELECT_OFFSET], 1); + region_errors += ide_check_region(hwif->io_ports[IDE_STATUS_OFFSET], 1); + + if (hwif->io_ports[IDE_CONTROL_OFFSET]) + region_errors += ide_check_region(hwif->io_ports[IDE_CONTROL_OFFSET], 1); +#if defined(CONFIG_AMIGA) || defined(CONFIG_MAC) + if (hwif->io_ports[IDE_IRQ_OFFSET]) + region_errors += ide_check_region(hwif->io_ports[IDE_IRQ_OFFSET], 1); +#endif /* (CONFIG_AMIGA) || (CONFIG_MAC) */ + /* + * If any errors are return, we drop the hwif interface. + */ + return(region_errors); +} + +static void hwif_register (ide_hwif_t *hwif) +{ + if (((unsigned long)hwif->io_ports[IDE_DATA_OFFSET] | 7) == + ((unsigned long)hwif->io_ports[IDE_STATUS_OFFSET])) { + ide_request_region(hwif->io_ports[IDE_DATA_OFFSET], 8, hwif->name); + hwif->straight8 = 1; + goto jump_straight8; + } + + if (hwif->io_ports[IDE_DATA_OFFSET]) + ide_request_region(hwif->io_ports[IDE_DATA_OFFSET], 1, hwif->name); + if (hwif->io_ports[IDE_ERROR_OFFSET]) + ide_request_region(hwif->io_ports[IDE_ERROR_OFFSET], 1, hwif->name); + if (hwif->io_ports[IDE_NSECTOR_OFFSET]) + ide_request_region(hwif->io_ports[IDE_NSECTOR_OFFSET], 1, hwif->name); + if (hwif->io_ports[IDE_SECTOR_OFFSET]) + ide_request_region(hwif->io_ports[IDE_SECTOR_OFFSET], 1, hwif->name); + if (hwif->io_ports[IDE_LCYL_OFFSET]) + ide_request_region(hwif->io_ports[IDE_LCYL_OFFSET], 1, hwif->name); + if (hwif->io_ports[IDE_HCYL_OFFSET]) + ide_request_region(hwif->io_ports[IDE_HCYL_OFFSET], 1, hwif->name); + if (hwif->io_ports[IDE_SELECT_OFFSET]) + ide_request_region(hwif->io_ports[IDE_SELECT_OFFSET], 1, hwif->name); + if (hwif->io_ports[IDE_STATUS_OFFSET]) + ide_request_region(hwif->io_ports[IDE_STATUS_OFFSET], 1, hwif->name); + +jump_straight8: + if (hwif->io_ports[IDE_CONTROL_OFFSET]) + ide_request_region(hwif->io_ports[IDE_CONTROL_OFFSET], 1, hwif->name); +#if defined(CONFIG_AMIGA) || defined(CONFIG_MAC) + if (hwif->io_ports[IDE_IRQ_OFFSET]) + ide_request_region(hwif->io_ports[IDE_IRQ_OFFSET], 1, hwif->name); +#endif /* (CONFIG_AMIGA) || (CONFIG_MAC) */ +} + +/* + * This routine only knows how to look for drive units 0 and 1 + * on an interface, so any setting of MAX_DRIVES > 2 won't work here. + */ +static void probe_hwif (ide_hwif_t *hwif) +{ + unsigned int unit; + unsigned long flags; + + if (hwif->noprobe) + return; +#ifdef CONFIG_BLK_DEV_IDE + if (hwif->io_ports[IDE_DATA_OFFSET] == HD_DATA) { + extern void probe_cmos_for_drives(ide_hwif_t *); + + probe_cmos_for_drives (hwif); + } +#endif + + if ((hwif->chipset != ide_4drives || !hwif->mate->present) && +#if CONFIG_BLK_DEV_PDC4030 + (hwif->chipset != ide_pdc4030 || hwif->channel == 0) && +#endif /* CONFIG_BLK_DEV_PDC4030 */ + (hwif_check_regions(hwif))) { + int msgout = 0; + for (unit = 0; unit < MAX_DRIVES; ++unit) { + ide_drive_t *drive = &hwif->drives[unit]; + if (drive->present) { + drive->present = 0; + printk("%s: ERROR, PORTS ALREADY IN USE\n", drive->name); + msgout = 1; + } + } + if (!msgout) + printk("%s: ports already in use, skipping probe\n", hwif->name); + return; + } + + __save_flags(flags); /* local CPU only */ + __sti(); /* local CPU only; needed for jiffies and irq probing */ + /* + * Second drive should only exist if first drive was found, + * but a lot of cdrom drives are configured as single slaves. + */ + for (unit = 0; unit < MAX_DRIVES; ++unit) { + ide_drive_t *drive = &hwif->drives[unit]; + (void) probe_for_drive (drive); + if (drive->present && !hwif->present) { + hwif->present = 1; + if (hwif->chipset != ide_4drives || !hwif->mate->present) { + hwif_register(hwif); + } + } + } + if (hwif->io_ports[IDE_CONTROL_OFFSET] && hwif->reset) { + unsigned long timeout = jiffies + WAIT_WORSTCASE; + byte stat; + + printk("%s: reset\n", hwif->name); + OUT_BYTE(12, hwif->io_ports[IDE_CONTROL_OFFSET]); + udelay(10); + OUT_BYTE(8, hwif->io_ports[IDE_CONTROL_OFFSET]); + do { + ide_delay_50ms(); + stat = IN_BYTE(hwif->io_ports[IDE_STATUS_OFFSET]); + } while ((stat & BUSY_STAT) && 0 < (signed long)(timeout - jiffies)); + + } + __restore_flags(flags); /* local CPU only */ + for (unit = 0; unit < MAX_DRIVES; ++unit) { + ide_drive_t *drive = &hwif->drives[unit]; + if (drive->present) { + ide_tuneproc_t *tuneproc = HWIF(drive)->tuneproc; + if (tuneproc != NULL && drive->autotune == 1) + tuneproc(drive, 255); /* auto-tune PIO mode */ + } + } +} + +#if MAX_HWIFS > 1 +/* + * save_match() is used to simplify logic in init_irq() below. + * + * A loophole here is that we may not know about a particular + * hwif's irq until after that hwif is actually probed/initialized.. + * This could be a problem for the case where an hwif is on a + * dual interface that requires serialization (eg. cmd640) and another + * hwif using one of the same irqs is initialized beforehand. + * + * This routine detects and reports such situations, but does not fix them. + */ +static void save_match (ide_hwif_t *hwif, ide_hwif_t *new, ide_hwif_t **match) +{ + ide_hwif_t *m = *match; + + if (m && m->hwgroup && m->hwgroup != new->hwgroup) { + if (!new->hwgroup) + return; + printk("%s: potential irq problem with %s and %s\n", hwif->name, new->name, m->name); + } + if (!m || m->irq != hwif->irq) /* don't undo a prior perfect match */ + *match = new; +} +#endif /* MAX_HWIFS > 1 */ + +/* + * init request queue + */ +static void ide_init_queue(ide_drive_t *drive) +{ + request_queue_t *q = &drive->queue; + + q->queuedata = HWGROUP(drive); + blk_init_queue(q, do_ide_request); + + if (drive->media == ide_disk) { +#ifdef CONFIG_BLK_DEV_ELEVATOR_NOOP + elevator_init(&q->elevator, ELEVATOR_NOOP); +#endif + } +} + +/* + * This routine sets up the irq for an ide interface, and creates a new + * hwgroup for the irq/hwif if none was previously assigned. + * + * Much of the code is for correctly detecting/handling irq sharing + * and irq serialization situations. This is somewhat complex because + * it handles static as well as dynamic (PCMCIA) IDE interfaces. + * + * The SA_INTERRUPT in sa_flags means ide_intr() is always entered with + * interrupts completely disabled. This can be bad for interrupt latency, + * but anything else has led to problems on some machines. We re-enable + * interrupts as much as we can safely do in most places. + */ +static int init_irq (ide_hwif_t *hwif) +{ + unsigned long flags; + unsigned int index; + ide_hwgroup_t *hwgroup, *new_hwgroup; + ide_hwif_t *match = NULL; + + + /* Allocate the buffer and potentially sleep first */ + + new_hwgroup = kmalloc(sizeof(ide_hwgroup_t),GFP_KERNEL); + + save_flags(flags); /* all CPUs */ + cli(); /* all CPUs */ + + hwif->hwgroup = NULL; +#if MAX_HWIFS > 1 + /* + * Group up with any other hwifs that share our irq(s). + */ + for (index = 0; index < MAX_HWIFS; index++) { + ide_hwif_t *h = &ide_hwifs[index]; + if (h->hwgroup) { /* scan only initialized hwif's */ + if (hwif->irq == h->irq) { + hwif->sharing_irq = h->sharing_irq = 1; + if (hwif->chipset != ide_pci || h->chipset != ide_pci) { + save_match(hwif, h, &match); + } + } + if (hwif->serialized) { + if (hwif->mate && hwif->mate->irq == h->irq) + save_match(hwif, h, &match); + } + if (h->serialized) { + if (h->mate && hwif->irq == h->mate->irq) + save_match(hwif, h, &match); + } + } + } +#endif /* MAX_HWIFS > 1 */ + /* + * If we are still without a hwgroup, then form a new one + */ + if (match) { + hwgroup = match->hwgroup; + if(new_hwgroup) + kfree(new_hwgroup); + } else { + hwgroup = new_hwgroup; + if (!hwgroup) { + restore_flags(flags); /* all CPUs */ + return 1; + } + memset(hwgroup, 0, sizeof(ide_hwgroup_t)); + hwgroup->hwif = hwif->next = hwif; + hwgroup->rq = NULL; + hwgroup->handler = NULL; + hwgroup->drive = NULL; + hwgroup->busy = 0; + init_timer(&hwgroup->timer); + hwgroup->timer.function = &ide_timer_expiry; + hwgroup->timer.data = (unsigned long) hwgroup; + } + + /* + * Allocate the irq, if not already obtained for another hwif + */ + if (!match || match->irq != hwif->irq) { +#ifdef CONFIG_IDEPCI_SHARE_IRQ + int sa = IDE_CHIPSET_IS_PCI(hwif->chipset) ? SA_SHIRQ : SA_INTERRUPT; +#else /* !CONFIG_IDEPCI_SHARE_IRQ */ + int sa = IDE_CHIPSET_IS_PCI(hwif->chipset) ? SA_INTERRUPT|SA_SHIRQ : SA_INTERRUPT; +#endif /* CONFIG_IDEPCI_SHARE_IRQ */ + + if (hwif->io_ports[IDE_CONTROL_OFFSET]) + OUT_BYTE(0x08, hwif->io_ports[IDE_CONTROL_OFFSET]); /* clear nIEN */ + + if (ide_request_irq(hwif->irq, &ide_intr, sa, hwif->name, hwgroup)) { + if (!match) + kfree(hwgroup); + restore_flags(flags); /* all CPUs */ + return 1; + } + } + + /* + * Everything is okay, so link us into the hwgroup + */ + hwif->hwgroup = hwgroup; + hwif->next = hwgroup->hwif->next; + hwgroup->hwif->next = hwif; + + for (index = 0; index < MAX_DRIVES; ++index) { + ide_drive_t *drive = &hwif->drives[index]; + if (!drive->present) + continue; + if (!hwgroup->drive) + hwgroup->drive = drive; + drive->next = hwgroup->drive->next; + hwgroup->drive->next = drive; + ide_init_queue(drive); + } + if (!hwgroup->hwif) { + hwgroup->hwif = HWIF(hwgroup->drive); +#ifdef DEBUG + printk("%s : Adding missed hwif to hwgroup!!\n", hwif->name); +#endif + } + restore_flags(flags); /* all CPUs; safe now that hwif->hwgroup is set up */ + +#if !defined(__mc68000__) && !defined(CONFIG_APUS) && !defined(__sparc__) + printk("%s at 0x%03x-0x%03x,0x%03x on irq %d", hwif->name, + hwif->io_ports[IDE_DATA_OFFSET], + hwif->io_ports[IDE_DATA_OFFSET]+7, + hwif->io_ports[IDE_CONTROL_OFFSET], hwif->irq); +#elif defined(__sparc__) + printk("%s at 0x%03lx-0x%03lx,0x%03lx on irq %s", hwif->name, + hwif->io_ports[IDE_DATA_OFFSET], + hwif->io_ports[IDE_DATA_OFFSET]+7, + hwif->io_ports[IDE_CONTROL_OFFSET], __irq_itoa(hwif->irq)); +#else + printk("%s at %p on irq 0x%08x", hwif->name, + hwif->io_ports[IDE_DATA_OFFSET], hwif->irq); +#endif /* __mc68000__ && CONFIG_APUS */ + if (match) + printk(" (%sed with %s)", + hwif->sharing_irq ? "shar" : "serializ", match->name); + printk("\n"); + return 0; +} + +/* + * init_gendisk() (as opposed to ide_geninit) is called for each major device, + * after probing for drives, to allocate partition tables and other data + * structures needed for the routines in genhd.c. ide_geninit() gets called + * somewhat later, during the partition check. + */ +static void init_gendisk (ide_hwif_t *hwif) +{ + struct gendisk *gd; + unsigned int unit, units, minors; + int *bs, *max_sect; /* , *max_ra; */ +#ifdef DEVFS_MUST_DIE + extern devfs_handle_t ide_devfs_handle; +#endif + +#if 1 + units = MAX_DRIVES; +#else + /* figure out maximum drive number on the interface */ + for (units = MAX_DRIVES; units > 0; --units) { + if (hwif->drives[units-1].present) + break; + } +#endif + + minors = units * (1<sizes = kmalloc (minors * sizeof(int), GFP_KERNEL); + if (!gd->sizes) + goto err_kmalloc_gd_sizes; + gd->part = kmalloc (minors * sizeof(struct hd_struct), GFP_KERNEL); + if (!gd->part) + goto err_kmalloc_gd_part; + bs = kmalloc (minors*sizeof(int), GFP_KERNEL); + if (!bs) + goto err_kmalloc_bs; + max_sect = kmalloc (minors*sizeof(int), GFP_KERNEL); + if (!max_sect) + goto err_kmalloc_max_sect; +#if 0 + max_ra = kmalloc (minors*sizeof(int), GFP_KERNEL); + if (!max_ra) + goto err_kmalloc_max_ra; +#endif + + memset(gd->part, 0, minors * sizeof(struct hd_struct)); + + /* cdroms and msdos f/s are examples of non-1024 blocksizes */ + blksize_size[hwif->major] = bs; + max_sectors[hwif->major] = max_sect; + /*max_readahead[hwif->major] = max_ra;*/ + for (unit = 0; unit < minors; ++unit) { + *bs++ = BLOCK_SIZE; + /* + * IDE can do up to 128K per request == 256 + */ + *max_sect++ = ((hwif->chipset == ide_pdc4030) ? 127 : 128); + /* *max_ra++ = vm_max_readahead; */ + } + + for (unit = 0; unit < units; ++unit) + hwif->drives[unit].part = &gd->part[unit << PARTN_BITS]; + + gd->major = hwif->major; /* our major device number */ + gd->major_name = IDE_MAJOR_NAME; /* treated special in genhd.c */ + gd->minor_shift = PARTN_BITS; /* num bits for partitions */ + gd->max_p = 1<nr_real = units; /* current num real drives */ + gd->real_devices= hwif; /* ptr to internal data */ + gd->next = NULL; /* linked list of major devs */ + gd->fops = ide_fops; /* file operations */ + gd->flags = kmalloc (sizeof *gd->flags * units, GFP_KERNEL); + if (gd->flags) + memset (gd->flags, 0, sizeof *gd->flags * units); +#ifdef DEVFS_MUST_DIE + gd->de_arr = kmalloc (sizeof *gd->de_arr * units, GFP_KERNEL); + if (gd->de_arr) + memset (gd->de_arr, 0, sizeof *gd->de_arr * units); +#endif + + hwif->gd = gd; + add_gendisk(gd); + + for (unit = 0; unit < units; ++unit) { +#if 1 + char name[64]; + ide_add_generic_settings(hwif->drives + unit); + hwif->drives[unit].dn = ((hwif->channel ? 2 : 0) + unit); + sprintf (name, "host%d/bus%d/target%d/lun%d", + (hwif->channel && hwif->mate) ? + hwif->mate->index : hwif->index, + hwif->channel, unit, hwif->drives[unit].lun); +#ifdef DEVFS_MUST_DIE + if (hwif->drives[unit].present) + hwif->drives[unit].de = devfs_mk_dir(ide_devfs_handle, name, NULL); +#endif +#else + if (hwif->drives[unit].present) { + char name[64]; + + ide_add_generic_settings(hwif->drives + unit); + hwif->drives[unit].dn = ((hwif->channel ? 2 : 0) + unit); + sprintf (name, "host%d/bus%d/target%d/lun%d", + (hwif->channel && hwif->mate) ? hwif->mate->index : hwif->index, + hwif->channel, unit, hwif->drives[unit].lun); + hwif->drives[unit].de = + devfs_mk_dir (ide_devfs_handle, name, NULL); + } +#endif + } + return; + +#if 0 +err_kmalloc_max_ra: + kfree(max_sect); +#endif +err_kmalloc_max_sect: + kfree(bs); +err_kmalloc_bs: + kfree(gd->part); +err_kmalloc_gd_part: + kfree(gd->sizes); +err_kmalloc_gd_sizes: + kfree(gd); +err_kmalloc_gd: + printk(KERN_WARNING "(ide::init_gendisk) Out of memory\n"); + return; +} + +static int hwif_init (ide_hwif_t *hwif) +{ + if (!hwif->present) + return 0; + if (!hwif->irq) { + if (!(hwif->irq = ide_default_irq(hwif->io_ports[IDE_DATA_OFFSET]))) + { + printk("%s: DISABLED, NO IRQ\n", hwif->name); + return (hwif->present = 0); + } + } +#ifdef CONFIG_BLK_DEV_HD + if (hwif->irq == HD_IRQ && hwif->io_ports[IDE_DATA_OFFSET] != HD_DATA) { + printk("%s: CANNOT SHARE IRQ WITH OLD HARDDISK DRIVER (hd.c)\n", hwif->name); + return (hwif->present = 0); + } +#endif /* CONFIG_BLK_DEV_HD */ + + hwif->present = 0; /* we set it back to 1 if all is ok below */ + +#ifdef DEVFS_MUST_DIE + if (devfs_register_blkdev (hwif->major, hwif->name, ide_fops)) { + printk("%s: UNABLE TO GET MAJOR NUMBER %d\n", hwif->name, hwif->major); + return (hwif->present = 0); + } +#endif + + if (init_irq(hwif)) { + int i = hwif->irq; + /* + * It failed to initialise. Find the default IRQ for + * this port and try that. + */ + if (!(hwif->irq = ide_default_irq(hwif->io_ports[IDE_DATA_OFFSET]))) { + printk("%s: Disabled unable to get IRQ %d.\n", hwif->name, i); + (void) unregister_blkdev (hwif->major, hwif->name); + return (hwif->present = 0); + } + if (init_irq(hwif)) { + printk("%s: probed IRQ %d and default IRQ %d failed.\n", + hwif->name, i, hwif->irq); + (void) unregister_blkdev (hwif->major, hwif->name); + return (hwif->present = 0); + } + printk("%s: probed IRQ %d failed, using default.\n", + hwif->name, hwif->irq); + } + + init_gendisk(hwif); + blk_dev[hwif->major].data = hwif; + blk_dev[hwif->major].queue = ide_get_queue; +#if 0 + read_ahead[hwif->major] = 8; /* (4kB) */ +#endif + hwif->present = 1; /* success */ + +#if (DEBUG_SPINLOCK > 0) +{ + static int done = 0; + if (!done++) + printk("io_request_lock is %p\n", &io_request_lock); /* FIXME */ +} +#endif + return hwif->present; +} + +void export_ide_init_queue (ide_drive_t *drive) +{ + ide_init_queue(drive); +} + +byte export_probe_for_drive (ide_drive_t *drive) +{ + return probe_for_drive(drive); +} + +EXPORT_SYMBOL(export_ide_init_queue); +EXPORT_SYMBOL(export_probe_for_drive); + +int ideprobe_init (void); +static ide_module_t ideprobe_module = { + IDE_PROBE_MODULE, + ideprobe_init, + NULL +}; + +int ideprobe_init (void) +{ + unsigned int index; + int probe[MAX_HWIFS]; + + MOD_INC_USE_COUNT; + memset(probe, 0, MAX_HWIFS * sizeof(int)); + for (index = 0; index < MAX_HWIFS; ++index) + probe[index] = !ide_hwifs[index].present; + + /* + * Probe for drives in the usual way.. CMOS/BIOS, then poke at ports + */ + for (index = 0; index < MAX_HWIFS; ++index) + if (probe[index]) + probe_hwif(&ide_hwifs[index]); + for (index = 0; index < MAX_HWIFS; ++index) + if (probe[index]) + hwif_init(&ide_hwifs[index]); + if (!ide_probe) + ide_probe = &ideprobe_module; + MOD_DEC_USE_COUNT; + return 0; +} + +#ifdef MODULE +extern int (*ide_xlate_1024_hook)(kdev_t, int, int, const char *); + +int init_module (void) +{ + unsigned int index; + + for (index = 0; index < MAX_HWIFS; ++index) + ide_unregister(index); + ideprobe_init(); + create_proc_ide_interfaces(); + ide_xlate_1024_hook = ide_xlate_1024; + return 0; +} + +void cleanup_module (void) +{ + ide_probe = NULL; + ide_xlate_1024_hook = 0; +} +MODULE_LICENSE("GPL"); +#endif /* MODULE */ diff --git a/xen-2.4.16/drivers/ide/ide-taskfile.c b/xen-2.4.16/drivers/ide/ide-taskfile.c new file mode 100644 index 0000000000..34bfacebfe --- /dev/null +++ b/xen-2.4.16/drivers/ide/ide-taskfile.c @@ -0,0 +1,1722 @@ +/* + * linux/drivers/ide/ide-taskfile.c Version 0.20 Oct 11, 2000 + * + * Copyright (C) 2000 Michael Cornwell + * Copyright (C) 2000 Andre Hedrick + * + * May be copied or modified under the terms of the GNU General Public License + * + * IDE_DEBUG(__LINE__); + */ + +#include +#define __NO_VERSION__ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#ifdef CONFIG_IDE_TASKFILE_IO +# define __TASKFILE__IO +#else /* CONFIG_IDE_TASKFILE_IO */ +# undef __TASKFILE__IO +#endif /* CONFIG_IDE_TASKFILE_IO */ + +#define DEBUG_TASKFILE 0 /* unset when fixed */ + +#if DEBUG_TASKFILE +#define DTF(x...) printk(##x) +#else +#define DTF(x...) +#endif + +inline u32 task_read_24 (ide_drive_t *drive) +{ + return (IN_BYTE(IDE_HCYL_REG)<<16) | + (IN_BYTE(IDE_LCYL_REG)<<8) | + IN_BYTE(IDE_SECTOR_REG); +} + +static void ata_bswap_data (void *buffer, int wcount) +{ + u16 *p = buffer; + + while (wcount--) { + *p = *p << 8 | *p >> 8; p++; + *p = *p << 8 | *p >> 8; p++; + } +} + +#if SUPPORT_VLB_SYNC +/* + * Some localbus EIDE interfaces require a special access sequence + * when using 32-bit I/O instructions to transfer data. We call this + * the "vlb_sync" sequence, which consists of three successive reads + * of the sector count register location, with interrupts disabled + * to ensure that the reads all happen together. + */ +static inline void task_vlb_sync (ide_ioreg_t port) { + (void) inb (port); + (void) inb (port); + (void) inb (port); +} +#endif /* SUPPORT_VLB_SYNC */ + +/* + * This is used for most PIO data transfers *from* the IDE interface + */ +void ata_input_data (ide_drive_t *drive, void *buffer, unsigned int wcount) +{ + byte io_32bit = drive->io_32bit; + + if (io_32bit) { +#if SUPPORT_VLB_SYNC + if (io_32bit & 2) { + unsigned long flags; + __save_flags(flags); /* local CPU only */ + __cli(); /* local CPU only */ + task_vlb_sync(IDE_NSECTOR_REG); + insl(IDE_DATA_REG, buffer, wcount); + __restore_flags(flags); /* local CPU only */ + } else +#endif /* SUPPORT_VLB_SYNC */ + insl(IDE_DATA_REG, buffer, wcount); + } else { +#if SUPPORT_SLOW_DATA_PORTS + if (drive->slow) { + unsigned short *ptr = (unsigned short *) buffer; + while (wcount--) { + *ptr++ = inw_p(IDE_DATA_REG); + *ptr++ = inw_p(IDE_DATA_REG); + } + } else +#endif /* SUPPORT_SLOW_DATA_PORTS */ + insw(IDE_DATA_REG, buffer, wcount<<1); + } +} + +/* + * This is used for most PIO data transfers *to* the IDE interface + */ +void ata_output_data (ide_drive_t *drive, void *buffer, unsigned int wcount) +{ + byte io_32bit = drive->io_32bit; + + if (io_32bit) { +#if SUPPORT_VLB_SYNC + if (io_32bit & 2) { + unsigned long flags; + __save_flags(flags); /* local CPU only */ + __cli(); /* local CPU only */ + task_vlb_sync(IDE_NSECTOR_REG); + outsl(IDE_DATA_REG, buffer, wcount); + __restore_flags(flags); /* local CPU only */ + } else +#endif /* SUPPORT_VLB_SYNC */ + outsl(IDE_DATA_REG, buffer, wcount); + } else { +#if SUPPORT_SLOW_DATA_PORTS + if (drive->slow) { + unsigned short *ptr = (unsigned short *) buffer; + while (wcount--) { + outw_p(*ptr++, IDE_DATA_REG); + outw_p(*ptr++, IDE_DATA_REG); + } + } else +#endif /* SUPPORT_SLOW_DATA_PORTS */ + outsw(IDE_DATA_REG, buffer, wcount<<1); + } +} + + +static inline void taskfile_input_data (ide_drive_t *drive, void *buffer, unsigned int wcount) +{ + ata_input_data(drive, buffer, wcount); + if (drive->bswap) + ata_bswap_data(buffer, wcount); +} + +static inline void taskfile_output_data (ide_drive_t *drive, void *buffer, unsigned int wcount) +{ + if (drive->bswap) { + ata_bswap_data(buffer, wcount); + ata_output_data(drive, buffer, wcount); + ata_bswap_data(buffer, wcount); + } else { + ata_output_data(drive, buffer, wcount); + } +} + +ide_startstop_t do_rw_taskfile (ide_drive_t *drive, ide_task_t *task) +{ + task_struct_t *taskfile = (task_struct_t *) task->tfRegister; + hob_struct_t *hobfile = (hob_struct_t *) task->hobRegister; + struct hd_driveid *id = drive->id; + byte HIHI = (drive->addressing) ? 0xE0 : 0xEF; + + /* (ks/hs): Moved to start, do not use for multiple out commands */ + if (task->handler != task_mulout_intr) { + if (IDE_CONTROL_REG) + OUT_BYTE(drive->ctl, IDE_CONTROL_REG); /* clear nIEN */ + SELECT_MASK(HWIF(drive), drive, 0); + } + + if ((id->command_set_2 & 0x0400) && + (id->cfs_enable_2 & 0x0400) && + (drive->addressing == 1)) { + OUT_BYTE(hobfile->feature, IDE_FEATURE_REG); + OUT_BYTE(hobfile->sector_count, IDE_NSECTOR_REG); + OUT_BYTE(hobfile->sector_number, IDE_SECTOR_REG); + OUT_BYTE(hobfile->low_cylinder, IDE_LCYL_REG); + OUT_BYTE(hobfile->high_cylinder, IDE_HCYL_REG); + } + + OUT_BYTE(taskfile->feature, IDE_FEATURE_REG); + OUT_BYTE(taskfile->sector_count, IDE_NSECTOR_REG); + /* refers to number of sectors to transfer */ + OUT_BYTE(taskfile->sector_number, IDE_SECTOR_REG); + /* refers to sector offset or start sector */ + OUT_BYTE(taskfile->low_cylinder, IDE_LCYL_REG); + OUT_BYTE(taskfile->high_cylinder, IDE_HCYL_REG); + + OUT_BYTE((taskfile->device_head & HIHI) | drive->select.all, IDE_SELECT_REG); + if (task->handler != NULL) { +#if 0 + ide_set_handler (drive, task->handler, WAIT_CMD, NULL); + OUT_BYTE(taskfile->command, IDE_COMMAND_REG); + /* + * warning check for race between handler and prehandler for + * writing first block of data. however since we are well + * inside the boundaries of the seek, we should be okay. + */ + if (task->prehandler != NULL) { + return task->prehandler(drive, task->rq); + } +#else + ide_startstop_t startstop; + + ide_set_handler (drive, task->handler, WAIT_CMD, NULL); + OUT_BYTE(taskfile->command, IDE_COMMAND_REG); + + if (ide_wait_stat(&startstop, drive, DATA_READY, drive->bad_wstat, WAIT_DRQ)) { + printk(KERN_ERR "%s: no DRQ after issuing %s\n", + drive->name, + drive->mult_count ? "MULTWRITE" : "WRITE"); + return startstop; + } + /* (ks/hs): Fixed Multi Write */ + if ((taskfile->command != WIN_MULTWRITE) && + (taskfile->command != WIN_MULTWRITE_EXT)) { + struct request *rq = HWGROUP(drive)->rq; + /* For Write_sectors we need to stuff the first sector */ + taskfile_output_data(drive, rq->buffer, SECTOR_WORDS); + rq->current_nr_sectors--; + } else { + /* Stuff first sector(s) by implicitly calling the handler */ + if (!(drive_is_ready(drive))) { + /* FIXME: Replace hard-coded 100, error handling? */ + int i; + for (i=0; i<100; i++) { + if (drive_is_ready(drive)) + break; + } + } + return task->handler(drive); + } +#endif + } else { + /* for dma commands we down set the handler */ + if (drive->using_dma && !(HWIF(drive)->dmaproc(((taskfile->command == WIN_WRITEDMA) || (taskfile->command == WIN_WRITEDMA_EXT)) ? ide_dma_write : ide_dma_read, drive))); + } + + return ide_started; +} + +void do_taskfile (ide_drive_t *drive, struct hd_drive_task_hdr *taskfile, struct hd_drive_hob_hdr *hobfile, ide_handler_t *handler) +{ + struct hd_driveid *id = drive->id; + byte HIHI = (drive->addressing) ? 0xE0 : 0xEF; + + /* (ks/hs): Moved to start, do not use for multiple out commands */ + if (*handler != task_mulout_intr) { + if (IDE_CONTROL_REG) + OUT_BYTE(drive->ctl, IDE_CONTROL_REG); /* clear nIEN */ + SELECT_MASK(HWIF(drive), drive, 0); + } + + if ((id->command_set_2 & 0x0400) && + (id->cfs_enable_2 & 0x0400) && + (drive->addressing == 1)) { + OUT_BYTE(hobfile->feature, IDE_FEATURE_REG); + OUT_BYTE(hobfile->sector_count, IDE_NSECTOR_REG); + OUT_BYTE(hobfile->sector_number, IDE_SECTOR_REG); + OUT_BYTE(hobfile->low_cylinder, IDE_LCYL_REG); + OUT_BYTE(hobfile->high_cylinder, IDE_HCYL_REG); + } + + OUT_BYTE(taskfile->feature, IDE_FEATURE_REG); + OUT_BYTE(taskfile->sector_count, IDE_NSECTOR_REG); + /* refers to number of sectors to transfer */ + OUT_BYTE(taskfile->sector_number, IDE_SECTOR_REG); + /* refers to sector offset or start sector */ + OUT_BYTE(taskfile->low_cylinder, IDE_LCYL_REG); + OUT_BYTE(taskfile->high_cylinder, IDE_HCYL_REG); + + OUT_BYTE((taskfile->device_head & HIHI) | drive->select.all, IDE_SELECT_REG); + if (handler != NULL) { + ide_set_handler (drive, handler, WAIT_CMD, NULL); + OUT_BYTE(taskfile->command, IDE_COMMAND_REG); + } else { + /* for dma commands we down set the handler */ + if (drive->using_dma && !(HWIF(drive)->dmaproc(((taskfile->command == WIN_WRITEDMA) || (taskfile->command == WIN_WRITEDMA_EXT)) ? ide_dma_write : ide_dma_read, drive))); + } +} + +#if 0 +ide_startstop_t flagged_taskfile (ide_drive_t *drive, ide_task_t *task) +{ + task_struct_t *taskfile = (task_struct_t *) task->tfRegister; + hob_struct_t *hobfile = (hob_struct_t *) task->hobRegister; + struct hd_driveid *id = drive->id; + + /* + * (KS) Check taskfile in/out flags. + * If set, then execute as it is defined. + * If not set, then define default settings. + * The default values are: + * write and read all taskfile registers (except data) + * write and read the hob registers (sector,nsector,lcyl,hcyl) + */ + if (task->tf_out_flags.all == 0) { + task->tf_out_flags.all = IDE_TASKFILE_STD_OUT_FLAGS; + if ((id->command_set_2 & 0x0400) && + (id->cfs_enable_2 & 0x0400) && + (drive->addressing == 1)) { + task->tf_out_flags.all != (IDE_HOB_STD_OUT_FLAGS << 8); + } + } + + if (task->tf_in_flags.all == 0) { + task->tf_in_flags.all = IDE_TASKFILE_STD_IN_FLAGS; + if ((id->command_set_2 & 0x0400) && + (id->cfs_enable_2 & 0x0400) && + (drive->addressing == 1)) { + task->tf_in_flags.all != (IDE_HOB_STD_IN_FLAGS << 8); + } + } + + if (IDE_CONTROL_REG) + OUT_BYTE(drive->ctl, IDE_CONTROL_REG); /* clear nIEN */ + SELECT_MASK(HWIF(drive), drive, 0); + + if (task->tf_out_flags.b.data) { + unsigned short data = taskfile->data + (hobfile->data << 8); + OUT_WORD (data, IDE_DATA_REG); + } + + /* (KS) send hob registers first */ + if (task->tf_out_flags.b.nsector_hob) + OUT_BYTE(hobfile->sector_count, IDE_NSECTOR_REG); + if (task->tf_out_flags.b.sector_hob) + OUT_BYTE(hobfile->sector_number, IDE_SECTOR_REG); + if (task->tf_out_flags.b.lcyl_hob) + OUT_BYTE(hobfile->low_cylinder, IDE_LCYL_REG); + if (task->tf_out_flags.b.hcyl_hob) + OUT_BYTE(hobfile->high_cylinder, IDE_HCYL_REG); + + + /* (KS) Send now the standard registers */ + if (task->tf_out_flags.b.error_feature) + OUT_BYTE(taskfile->feature, IDE_FEATURE_REG); + /* refers to number of sectors to transfer */ + if (task->tf_out_flags.b.nsector) + OUT_BYTE(taskfile->sector_count, IDE_NSECTOR_REG); + /* refers to sector offset or start sector */ + if (task->tf_out_flags.b.sector) + OUT_BYTE(taskfile->sector_number, IDE_SECTOR_REG); + if (task->tf_out_flags.b.lcyl) + OUT_BYTE(taskfile->low_cylinder, IDE_LCYL_REG); + if (task->tf_out_flags.b.hcyl) + OUT_BYTE(taskfile->high_cylinder, IDE_HCYL_REG); + + /* + * (KS) Do not modify the specified taskfile. We want to have a + * universal pass through, so we must execute ALL specified values. + * + * (KS) The drive head register is mandatory. + * Don't care about the out flags ! + */ + OUT_BYTE(taskfile->device_head | drive->select.all, IDE_SELECT_REG); + if (task->handler != NULL) { +#if 0 + ide_set_handler (drive, task->handler, WAIT_CMD, NULL); + OUT_BYTE(taskfile->command, IDE_COMMAND_REG); + /* + * warning check for race between handler and prehandler for + * writing first block of data. however since we are well + * inside the boundaries of the seek, we should be okay. + */ + if (task->prehandler != NULL) { + return task->prehandler(drive, task->rq); + } +#else + ide_startstop_t startstop; + + ide_set_handler (drive, task->handler, WAIT_CMD, NULL); + + /* + * (KS) The drive command register is also mandatory. + * Don't care about the out flags ! + */ + OUT_BYTE(taskfile->command, IDE_COMMAND_REG); + + if (ide_wait_stat(&startstop, drive, DATA_READY, drive->bad_wstat, WAIT_DRQ)) { + printk(KERN_ERR "%s: no DRQ after issuing %s\n", + drive->name, + drive->mult_count ? "MULTWRITE" : "WRITE"); + return startstop; + } + /* (ks/hs): Fixed Multi Write */ + if ((taskfile->command != WIN_MULTWRITE) && + (taskfile->command != WIN_MULTWRITE_EXT)) { + struct request *rq = HWGROUP(drive)->rq; + /* For Write_sectors we need to stuff the first sector */ + taskfile_output_data(drive, rq->buffer, SECTOR_WORDS); + rq->current_nr_sectors--; + } else { + /* Stuff first sector(s) by implicitly calling the handler */ + if (!(drive_is_ready(drive))) { + /* FIXME: Replace hard-coded 100, error handling? */ + int i; + for (i=0; i<100; i++) { + if (drive_is_ready(drive)) + break; + } + } + return task->handler(drive); + } +#endif + } else { + /* for dma commands we down set the handler */ + if (drive->using_dma && !(HWIF(drive)->dmaproc(((taskfile->command == WIN_WRITEDMA) || (taskfile->command == WIN_WRITEDMA_EXT)) ? ide_dma_write : ide_dma_read, drive))); + } + + return ide_started; +} +#endif + +#if 0 +/* + * Error reporting, in human readable form (luxurious, but a memory hog). + */ +byte taskfile_dump_status (ide_drive_t *drive, const char *msg, byte stat) +{ + unsigned long flags; + byte err = 0; + + __save_flags (flags); /* local CPU only */ + ide__sti(); /* local CPU only */ + printk("%s: %s: status=0x%02x", drive->name, msg, stat); +#if FANCY_STATUS_DUMPS + printk(" { "); + if (stat & BUSY_STAT) + printk("Busy "); + else { + if (stat & READY_STAT) printk("DriveReady "); + if (stat & WRERR_STAT) printk("DeviceFault "); + if (stat & SEEK_STAT) printk("SeekComplete "); + if (stat & DRQ_STAT) printk("DataRequest "); + if (stat & ECC_STAT) printk("CorrectedError "); + if (stat & INDEX_STAT) printk("Index "); + if (stat & ERR_STAT) printk("Error "); + } + printk("}"); +#endif /* FANCY_STATUS_DUMPS */ + printk("\n"); + if ((stat & (BUSY_STAT|ERR_STAT)) == ERR_STAT) { + err = GET_ERR(); + printk("%s: %s: error=0x%02x", drive->name, msg, err); +#if FANCY_STATUS_DUMPS + if (drive->media == ide_disk) { + printk(" { "); + if (err & ABRT_ERR) printk("DriveStatusError "); + if (err & ICRC_ERR) printk("%s", (err & ABRT_ERR) ? "BadCRC " : "BadSector "); + if (err & ECC_ERR) printk("UncorrectableError "); + if (err & ID_ERR) printk("SectorIdNotFound "); + if (err & TRK0_ERR) printk("TrackZeroNotFound "); + if (err & MARK_ERR) printk("AddrMarkNotFound "); + printk("}"); + if ((err & (BBD_ERR | ABRT_ERR)) == BBD_ERR || (err & (ECC_ERR|ID_ERR|MARK_ERR))) { + if ((drive->id->command_set_2 & 0x0400) && + (drive->id->cfs_enable_2 & 0x0400) && + (drive->addressing == 1)) { + __u64 sectors = 0; + u32 low = 0, high = 0; + low = task_read_24(drive); + OUT_BYTE(0x80, IDE_CONTROL_REG); + high = task_read_24(drive); + sectors = ((__u64)high << 24) | low; + printk(", LBAsect=%lld", sectors); + } else { + byte cur = IN_BYTE(IDE_SELECT_REG); + if (cur & 0x40) { /* using LBA? */ + printk(", LBAsect=%ld", (unsigned long) + ((cur&0xf)<<24) + |(IN_BYTE(IDE_HCYL_REG)<<16) + |(IN_BYTE(IDE_LCYL_REG)<<8) + | IN_BYTE(IDE_SECTOR_REG)); + } else { + printk(", CHS=%d/%d/%d", + (IN_BYTE(IDE_HCYL_REG)<<8) + + IN_BYTE(IDE_LCYL_REG), + cur & 0xf, + IN_BYTE(IDE_SECTOR_REG)); + } + } + if (HWGROUP(drive)->rq) + printk(", sector=%llu", (__u64) HWGROUP(drive)->rq->sector); + } + } +#endif /* FANCY_STATUS_DUMPS */ + printk("\n"); + } + __restore_flags (flags); /* local CPU only */ + return err; +} + +/* + * Clean up after success/failure of an explicit taskfile operation. + */ +void ide_end_taskfile (ide_drive_t *drive, byte stat, byte err) +{ + unsigned long flags; + struct request *rq; + ide_task_t *args; + task_ioreg_t command; + + spin_lock_irqsave(&io_request_lock, flags); + rq = HWGROUP(drive)->rq; + spin_unlock_irqrestore(&io_request_lock, flags); + args = (ide_task_t *) rq->special; + + command = args->tfRegister[IDE_COMMAND_OFFSET]; + + rq->errors = !OK_STAT(stat,READY_STAT,BAD_STAT); + + args->tfRegister[IDE_ERROR_OFFSET] = err; + args->tfRegister[IDE_NSECTOR_OFFSET] = IN_BYTE(IDE_NSECTOR_REG); + args->tfRegister[IDE_SECTOR_OFFSET] = IN_BYTE(IDE_SECTOR_REG); + args->tfRegister[IDE_LCYL_OFFSET] = IN_BYTE(IDE_LCYL_REG); + args->tfRegister[IDE_HCYL_OFFSET] = IN_BYTE(IDE_HCYL_REG); + args->tfRegister[IDE_SELECT_OFFSET] = IN_BYTE(IDE_SELECT_REG); + args->tfRegister[IDE_STATUS_OFFSET] = stat; + if ((drive->id->command_set_2 & 0x0400) && + (drive->id->cfs_enable_2 & 0x0400) && + (drive->addressing == 1)) { + OUT_BYTE(drive->ctl|0x80, IDE_CONTROL_REG_HOB); + args->hobRegister[IDE_FEATURE_OFFSET_HOB] = IN_BYTE(IDE_FEATURE_REG); + args->hobRegister[IDE_NSECTOR_OFFSET_HOB] = IN_BYTE(IDE_NSECTOR_REG); + args->hobRegister[IDE_SECTOR_OFFSET_HOB] = IN_BYTE(IDE_SECTOR_REG); + args->hobRegister[IDE_LCYL_OFFSET_HOB] = IN_BYTE(IDE_LCYL_REG); + args->hobRegister[IDE_HCYL_OFFSET_HOB] = IN_BYTE(IDE_HCYL_REG); + } + +/* taskfile_settings_update(drive, args, command); */ + + spin_lock_irqsave(&io_request_lock, flags); + blkdev_dequeue_request(rq); + HWGROUP(drive)->rq = NULL; + end_that_request_last(rq); + spin_unlock_irqrestore(&io_request_lock, flags); +} + +/* + * try_to_flush_leftover_data() is invoked in response to a drive + * unexpectedly having its DRQ_STAT bit set. As an alternative to + * resetting the drive, this routine tries to clear the condition + * by read a sector's worth of data from the drive. Of course, + * this may not help if the drive is *waiting* for data from *us*. + */ +void task_try_to_flush_leftover_data (ide_drive_t *drive) +{ + int i = (drive->mult_count ? drive->mult_count : 1) * SECTOR_WORDS; + + if (drive->media != ide_disk) + return; + while (i > 0) { + u32 buffer[16]; + unsigned int wcount = (i > 16) ? 16 : i; + i -= wcount; + taskfile_input_data (drive, buffer, wcount); + } +} + +/* + * taskfile_error() takes action based on the error returned by the drive. + */ +ide_startstop_t taskfile_error (ide_drive_t *drive, const char *msg, byte stat) +{ + struct request *rq; + byte err; + + err = taskfile_dump_status(drive, msg, stat); + if (drive == NULL || (rq = HWGROUP(drive)->rq) == NULL) + return ide_stopped; + /* retry only "normal" I/O: */ + if (rq->cmd == IDE_DRIVE_TASKFILE) { + rq->errors = 1; + ide_end_taskfile(drive, stat, err); + return ide_stopped; + } + if (stat & BUSY_STAT || ((stat & WRERR_STAT) && !drive->nowerr)) { /* other bits are useless when BUSY */ + rq->errors |= ERROR_RESET; + } else { + if (drive->media == ide_disk && (stat & ERR_STAT)) { + /* err has different meaning on cdrom and tape */ + if (err == ABRT_ERR) { + if (drive->select.b.lba && IN_BYTE(IDE_COMMAND_REG) == WIN_SPECIFY) + return ide_stopped; /* some newer drives don't support WIN_SPECIFY */ + } else if ((err & (ABRT_ERR | ICRC_ERR)) == (ABRT_ERR | ICRC_ERR)) { + drive->crc_count++; /* UDMA crc error -- just retry the operation */ + } else if (err & (BBD_ERR | ECC_ERR)) /* retries won't help these */ + rq->errors = ERROR_MAX; + else if (err & TRK0_ERR) /* help it find track zero */ + rq->errors |= ERROR_RECAL; + } + if ((stat & DRQ_STAT) && rq->cmd != WRITE) + task_try_to_flush_leftover_data(drive); + } + if (GET_STAT() & (BUSY_STAT|DRQ_STAT)) + OUT_BYTE(WIN_IDLEIMMEDIATE,IDE_COMMAND_REG); /* force an abort */ + + if (rq->errors >= ERROR_MAX) { + if (drive->driver != NULL) + DRIVER(drive)->end_request(0, HWGROUP(drive)); + else + ide_end_request(0, HWGROUP(drive)); + } else { + if ((rq->errors & ERROR_RESET) == ERROR_RESET) { + ++rq->errors; + return ide_do_reset(drive); + } + if ((rq->errors & ERROR_RECAL) == ERROR_RECAL) + drive->special.b.recalibrate = 1; + ++rq->errors; + } + return ide_stopped; +} +#endif + +/* + * Handler for special commands without a data phase from ide-disk + */ + +/* + * set_multmode_intr() is invoked on completion of a WIN_SETMULT cmd. + */ +ide_startstop_t set_multmode_intr (ide_drive_t *drive) +{ + byte stat; + + if (OK_STAT(stat=GET_STAT(),READY_STAT,BAD_STAT)) { + drive->mult_count = drive->mult_req; + } else { + drive->mult_req = drive->mult_count = 0; + drive->special.b.recalibrate = 1; + (void) ide_dump_status(drive, "set_multmode", stat); + } + return ide_stopped; +} + +/* + * set_geometry_intr() is invoked on completion of a WIN_SPECIFY cmd. + */ +ide_startstop_t set_geometry_intr (ide_drive_t *drive) +{ + byte stat; + + if (OK_STAT(stat=GET_STAT(),READY_STAT,BAD_STAT)) + return ide_stopped; + + if (stat & (ERR_STAT|DRQ_STAT)) + return ide_error(drive, "set_geometry_intr", stat); + + ide_set_handler(drive, &set_geometry_intr, WAIT_CMD, NULL); + return ide_started; +} + +/* + * recal_intr() is invoked on completion of a WIN_RESTORE (recalibrate) cmd. + */ +ide_startstop_t recal_intr (ide_drive_t *drive) +{ + byte stat = GET_STAT(); + + if (!OK_STAT(stat,READY_STAT,BAD_STAT)) + return ide_error(drive, "recal_intr", stat); + return ide_stopped; +} + +/* + * Handler for commands without a data phase + */ +ide_startstop_t task_no_data_intr (ide_drive_t *drive) +{ + ide_task_t *args = HWGROUP(drive)->rq->special; + byte stat = GET_STAT(); + + ide__sti(); /* local CPU only */ + + if (!OK_STAT(stat, READY_STAT, BAD_STAT)) + return ide_error(drive, "task_no_data_intr", stat); /* calls ide_end_drive_cmd */ + + if (args) + ide_end_drive_cmd (drive, stat, GET_ERR()); + + return ide_stopped; +} + +/* + * Handler for command with PIO data-in phase + */ +ide_startstop_t task_in_intr (ide_drive_t *drive) +{ + byte stat = GET_STAT(); + byte io_32bit = drive->io_32bit; + struct request *rq = HWGROUP(drive)->rq; + char *pBuf = NULL; + + if (!OK_STAT(stat,DATA_READY,BAD_R_STAT)) { + if (stat & (ERR_STAT|DRQ_STAT)) { + return ide_error(drive, "task_in_intr", stat); + } + if (!(stat & BUSY_STAT)) { + DTF("task_in_intr to Soon wait for next interrupt\n"); + ide_set_handler(drive, &task_in_intr, WAIT_CMD, NULL); + return ide_started; + } + } + DTF("stat: %02x\n", stat); + pBuf = rq->buffer + ((rq->nr_sectors - rq->current_nr_sectors) * SECTOR_SIZE); + DTF("Read: %p, rq->current_nr_sectors: %d\n", pBuf, (int) rq->current_nr_sectors); + + drive->io_32bit = 0; + taskfile_input_data(drive, pBuf, SECTOR_WORDS); + drive->io_32bit = io_32bit; + + if (--rq->current_nr_sectors <= 0) { + /* (hs): swapped next 2 lines */ + DTF("Request Ended stat: %02x\n", GET_STAT()); + ide_end_request(1, HWGROUP(drive)); + } else { + ide_set_handler(drive, &task_in_intr, WAIT_CMD, NULL); + return ide_started; + } + return ide_stopped; +} + +#undef ALTSTAT_SCREW_UP + +#ifdef ALTSTAT_SCREW_UP +/* + * (ks/hs): Poll Alternate Status Register to ensure + * that drive is not busy. + */ +byte altstat_multi_busy (ide_drive_t *drive, byte stat, const char *msg) +{ + int i; + + DTF("multi%s: ASR = %x\n", msg, stat); + if (stat & BUSY_STAT) { + /* (ks/hs): FIXME: Replace hard-coded 100, error handling? */ + for (i=0; i<100; i++) { + stat = GET_ALTSTAT(); + if ((stat & BUSY_STAT) == 0) + break; + } + } + /* + * (ks/hs): Read Status AFTER Alternate Status Register + */ + return(GET_STAT()); +} + +/* + * (ks/hs): Poll Alternate status register to wait for drive + * to become ready for next transfer + */ +byte altstat_multi_poll (ide_drive_t *drive, byte stat, const char *msg) +{ + /* (ks/hs): FIXME: Error handling, time-out? */ + while (stat & BUSY_STAT) + stat = GET_ALTSTAT(); + DTF("multi%s: nsect=1, ASR = %x\n", msg, stat); + return(GET_STAT()); /* (ks/hs): Clear pending IRQ */ +} +#endif /* ALTSTAT_SCREW_UP */ + +/* + * Handler for command with Read Multiple + */ +ide_startstop_t task_mulin_intr (ide_drive_t *drive) +{ + unsigned int msect, nsect; + +#ifdef ALTSTAT_SCREW_UP + byte stat = altstat_multi_busy(drive, GET_ALTSTAT(), "read"); +#else + byte stat = GET_STAT(); +#endif /* ALTSTAT_SCREW_UP */ + + byte io_32bit = drive->io_32bit; + struct request *rq = HWGROUP(drive)->rq; + char *pBuf = NULL; + + if (!OK_STAT(stat,DATA_READY,BAD_R_STAT)) { + if (stat & (ERR_STAT|DRQ_STAT)) { + return ide_error(drive, "task_mulin_intr", stat); + } + /* no data yet, so wait for another interrupt */ + ide_set_handler(drive, &task_mulin_intr, WAIT_CMD, NULL); + return ide_started; + } + + /* (ks/hs): Fixed Multi-Sector transfer */ + msect = drive->mult_count; + +#ifdef ALTSTAT_SCREW_UP + /* + * Screw the request we do not support bad data-phase setups! + * Either read and learn the ATA standard or crash yourself! + */ + if (!msect) { + /* + * (ks/hs): Drive supports multi-sector transfer, + * drive->mult_count was not set + */ + nsect = 1; + while (rq->current_nr_sectors) { + pBuf = rq->buffer + ((rq->nr_sectors - rq->current_nr_sectors) * SECTOR_SIZE); + DTF("Multiread: %p, nsect: %d, rq->current_nr_sectors: %ld\n", pBuf, nsect, rq->current_nr_sectors); + drive->io_32bit = 0; + taskfile_input_data(drive, pBuf, nsect * SECTOR_WORDS); + drive->io_32bit = io_32bit; + rq->errors = 0; + rq->current_nr_sectors -= nsect; + stat = altstat_multi_poll(drive, GET_ALTSTAT(), "read"); + } + ide_end_request(1, HWGROUP(drive)); + return ide_stopped; + } +#endif /* ALTSTAT_SCREW_UP */ + + nsect = (rq->current_nr_sectors > msect) ? msect : rq->current_nr_sectors; + pBuf = rq->buffer + ((rq->nr_sectors - rq->current_nr_sectors) * SECTOR_SIZE); + + DTF("Multiread: %p, nsect: %d , rq->current_nr_sectors: %ld\n", + pBuf, nsect, rq->current_nr_sectors); + drive->io_32bit = 0; + taskfile_input_data(drive, pBuf, nsect * SECTOR_WORDS); + drive->io_32bit = io_32bit; + rq->errors = 0; + rq->current_nr_sectors -= nsect; + if (rq->current_nr_sectors != 0) { + ide_set_handler(drive, &task_mulin_intr, WAIT_CMD, NULL); + return ide_started; + } + ide_end_request(1, HWGROUP(drive)); + return ide_stopped; +} + +ide_startstop_t pre_task_out_intr (ide_drive_t *drive, struct request *rq) +{ + ide_task_t *args = rq->special; + ide_startstop_t startstop; + + if (ide_wait_stat(&startstop, drive, DATA_READY, drive->bad_wstat, WAIT_DRQ)) { + printk(KERN_ERR "%s: no DRQ after issuing %s\n", drive->name, drive->mult_count ? "MULTWRITE" : "WRITE"); + return startstop; + } + + /* (ks/hs): Fixed Multi Write */ + if ((args->tfRegister[IDE_COMMAND_OFFSET] != WIN_MULTWRITE) && + (args->tfRegister[IDE_COMMAND_OFFSET] != WIN_MULTWRITE_EXT)) { + /* For Write_sectors we need to stuff the first sector */ + taskfile_output_data(drive, rq->buffer, SECTOR_WORDS); + rq->current_nr_sectors--; + return ide_started; + } else { + /* + * (ks/hs): Stuff the first sector(s) + * by implicitly calling the handler + */ + if (!(drive_is_ready(drive))) { + int i; + /* + * (ks/hs): FIXME: Replace hard-coded + * 100, error handling? + */ + for (i=0; i<100; i++) { + if (drive_is_ready(drive)) + break; + } + } + return args->handler(drive); + } + return ide_started; +} + +/* + * Handler for command with PIO data-out phase + */ +ide_startstop_t task_out_intr (ide_drive_t *drive) +{ + byte stat = GET_STAT(); + byte io_32bit = drive->io_32bit; + struct request *rq = HWGROUP(drive)->rq; + char *pBuf = NULL; + + if (!rq->current_nr_sectors) { + ide_end_request(1, HWGROUP(drive)); + return ide_stopped; + } + + if (!OK_STAT(stat,DRIVE_READY,drive->bad_wstat)) { + return ide_error(drive, "task_out_intr", stat); + } + if ((rq->current_nr_sectors==1) ^ (stat & DRQ_STAT)) { + rq = HWGROUP(drive)->rq; + pBuf = rq->buffer + ((rq->nr_sectors - rq->current_nr_sectors) * SECTOR_SIZE); + DTF("write: %p, rq->current_nr_sectors: %d\n", pBuf, (int) rq->current_nr_sectors); + drive->io_32bit = 0; + taskfile_output_data(drive, pBuf, SECTOR_WORDS); + drive->io_32bit = io_32bit; + rq->errors = 0; + rq->current_nr_sectors--; + } + + if (rq->current_nr_sectors <= 0) { + ide_end_request(1, HWGROUP(drive)); + } else { + ide_set_handler(drive, &task_out_intr, WAIT_CMD, NULL); + return ide_started; + } + return ide_stopped; +} + +/* + * Handler for command write multiple + * Called directly from execute_drive_cmd for the first bunch of sectors, + * afterwards only by the ISR + */ +ide_startstop_t task_mulout_intr (ide_drive_t *drive) +{ + unsigned int msect, nsect; + +#ifdef ALTSTAT_SCREW_UP + byte stat = altstat_multi_busy(drive, GET_ALTSTAT(), "write"); +#else + byte stat = GET_STAT(); +#endif /* ALTSTAT_SCREW_UP */ + + byte io_32bit = drive->io_32bit; + struct request *rq = HWGROUP(drive)->rq; + ide_hwgroup_t *hwgroup = HWGROUP(drive); + char *pBuf = NULL; + + /* + * (ks/hs): Handle last IRQ on multi-sector transfer, + * occurs after all data was sent + */ + if (rq->current_nr_sectors == 0) { + if (stat & (ERR_STAT|DRQ_STAT)) + return ide_error(drive, "task_mulout_intr", stat); + ide_end_request(1, HWGROUP(drive)); + return ide_stopped; + } + + if (!OK_STAT(stat,DATA_READY,BAD_R_STAT)) { + if (stat & (ERR_STAT|DRQ_STAT)) { + return ide_error(drive, "task_mulout_intr", stat); + } + /* no data yet, so wait for another interrupt */ + if (hwgroup->handler == NULL) + ide_set_handler(drive, &task_mulout_intr, WAIT_CMD, NULL); + return ide_started; + } + + /* (ks/hs): See task_mulin_intr */ + msect = drive->mult_count; + +#ifdef ALTSTAT_SCREW_UP + /* + * Screw the request we do not support bad data-phase setups! + * Either read and learn the ATA standard or crash yourself! + */ + if (!msect) { + nsect = 1; + while (rq->current_nr_sectors) { + pBuf = rq->buffer + ((rq->nr_sectors - rq->current_nr_sectors) * SECTOR_SIZE); + DTF("Multiwrite: %p, nsect: %d, rq->current_nr_sectors: %ld\n", pBuf, nsect, rq->current_nr_sectors); + drive->io_32bit = 0; + taskfile_output_data(drive, pBuf, nsect * SECTOR_WORDS); + drive->io_32bit = io_32bit; + rq->errors = 0; + rq->current_nr_sectors -= nsect; + stat = altstat_multi_poll(drive, GET_ALTSTAT(), "write"); + } + ide_end_request(1, HWGROUP(drive)); + return ide_stopped; + } +#endif /* ALTSTAT_SCREW_UP */ + + nsect = (rq->current_nr_sectors > msect) ? msect : rq->current_nr_sectors; + pBuf = rq->buffer + ((rq->nr_sectors - rq->current_nr_sectors) * SECTOR_SIZE); + DTF("Multiwrite: %p, nsect: %d , rq->current_nr_sectors: %ld\n", + pBuf, nsect, rq->current_nr_sectors); + drive->io_32bit = 0; + taskfile_output_data(drive, pBuf, nsect * SECTOR_WORDS); + drive->io_32bit = io_32bit; + rq->errors = 0; + rq->current_nr_sectors -= nsect; + if (hwgroup->handler == NULL) + ide_set_handler(drive, &task_mulout_intr, WAIT_CMD, NULL); + return ide_started; +} + +/* Called by internal to feature out type of command being called */ +ide_pre_handler_t * ide_pre_handler_parser (struct hd_drive_task_hdr *taskfile, struct hd_drive_hob_hdr *hobfile) +{ + switch(taskfile->command) { + /* IDE_DRIVE_TASK_RAW_WRITE */ + case CFA_WRITE_MULTI_WO_ERASE: + case WIN_MULTWRITE: + case WIN_MULTWRITE_EXT: +// case WIN_WRITEDMA: +// case WIN_WRITEDMA_QUEUED: +// case WIN_WRITEDMA_EXT: +// case WIN_WRITEDMA_QUEUED_EXT: + /* IDE_DRIVE_TASK_OUT */ + case WIN_WRITE: + case WIN_WRITE_VERIFY: + case WIN_WRITE_BUFFER: + case CFA_WRITE_SECT_WO_ERASE: + case WIN_DOWNLOAD_MICROCODE: + return &pre_task_out_intr; + /* IDE_DRIVE_TASK_OUT */ + case WIN_SMART: + if (taskfile->feature == SMART_WRITE_LOG_SECTOR) + return &pre_task_out_intr; + default: + break; + } + return(NULL); +} + +/* Called by internal to feature out type of command being called */ +ide_handler_t * ide_handler_parser (struct hd_drive_task_hdr *taskfile, struct hd_drive_hob_hdr *hobfile) +{ + switch(taskfile->command) { + case WIN_IDENTIFY: + case WIN_PIDENTIFY: + case CFA_TRANSLATE_SECTOR: + case WIN_READ_BUFFER: + case WIN_READ: + case WIN_READ_EXT: + return &task_in_intr; + case WIN_SECURITY_DISABLE: + case WIN_SECURITY_ERASE_UNIT: + case WIN_SECURITY_SET_PASS: + case WIN_SECURITY_UNLOCK: + case WIN_DOWNLOAD_MICROCODE: + case CFA_WRITE_SECT_WO_ERASE: + case WIN_WRITE_BUFFER: + case WIN_WRITE_VERIFY: + case WIN_WRITE: + case WIN_WRITE_EXT: + return &task_out_intr; + case WIN_MULTREAD: + case WIN_MULTREAD_EXT: + return &task_mulin_intr; + case CFA_WRITE_MULTI_WO_ERASE: + case WIN_MULTWRITE: + case WIN_MULTWRITE_EXT: + return &task_mulout_intr; + case WIN_SMART: + switch(taskfile->feature) { + case SMART_READ_VALUES: + case SMART_READ_THRESHOLDS: + case SMART_READ_LOG_SECTOR: + return &task_in_intr; + case SMART_WRITE_LOG_SECTOR: + return &task_out_intr; + default: + return &task_no_data_intr; + } + case CFA_REQ_EXT_ERROR_CODE: + case CFA_ERASE_SECTORS: + case WIN_VERIFY: + case WIN_VERIFY_EXT: + case WIN_SEEK: + return &task_no_data_intr; + case WIN_SPECIFY: + return &set_geometry_intr; + case WIN_RESTORE: + return &recal_intr; + case WIN_DIAGNOSE: + case WIN_FLUSH_CACHE: + case WIN_FLUSH_CACHE_EXT: + case WIN_STANDBYNOW1: + case WIN_STANDBYNOW2: + case WIN_SLEEPNOW1: + case WIN_SLEEPNOW2: + case WIN_SETIDLE1: + case WIN_CHECKPOWERMODE1: + case WIN_CHECKPOWERMODE2: + case WIN_GETMEDIASTATUS: + case WIN_MEDIAEJECT: + return &task_no_data_intr; + case WIN_SETMULT: + return &set_multmode_intr; + case WIN_READ_NATIVE_MAX: + case WIN_SET_MAX: + case WIN_READ_NATIVE_MAX_EXT: + case WIN_SET_MAX_EXT: + case WIN_SECURITY_ERASE_PREPARE: + case WIN_SECURITY_FREEZE_LOCK: + case WIN_DOORLOCK: + case WIN_DOORUNLOCK: + case WIN_SETFEATURES: + return &task_no_data_intr; + case DISABLE_SEAGATE: + case EXABYTE_ENABLE_NEST: + return &task_no_data_intr; +#ifdef CONFIG_BLK_DEV_IDEDMA + case WIN_READDMA: + case WIN_IDENTIFY_DMA: + case WIN_READDMA_QUEUED: + case WIN_READDMA_EXT: + case WIN_READDMA_QUEUED_EXT: + case WIN_WRITEDMA: + case WIN_WRITEDMA_QUEUED: + case WIN_WRITEDMA_EXT: + case WIN_WRITEDMA_QUEUED_EXT: +#endif + case WIN_FORMAT: + case WIN_INIT: + case WIN_DEVICE_RESET: + case WIN_QUEUED_SERVICE: + case WIN_PACKETCMD: + default: + return(NULL); + } +} + +/* Called by ioctl to feature out type of command being called */ +int ide_cmd_type_parser (ide_task_t *args) +{ + struct hd_drive_task_hdr *taskfile = (struct hd_drive_task_hdr *) args->tfRegister; + struct hd_drive_hob_hdr *hobfile = (struct hd_drive_hob_hdr *) args->hobRegister; + + args->prehandler = ide_pre_handler_parser(taskfile, hobfile); + args->handler = ide_handler_parser(taskfile, hobfile); + + switch(args->tfRegister[IDE_COMMAND_OFFSET]) { + case WIN_IDENTIFY: + case WIN_PIDENTIFY: + return IDE_DRIVE_TASK_IN; + case CFA_TRANSLATE_SECTOR: + case WIN_READ: + case WIN_READ_BUFFER: + return IDE_DRIVE_TASK_IN; + case WIN_WRITE: + case WIN_WRITE_VERIFY: + case WIN_WRITE_BUFFER: + case CFA_WRITE_SECT_WO_ERASE: + case WIN_DOWNLOAD_MICROCODE: + return IDE_DRIVE_TASK_RAW_WRITE; + case WIN_MULTREAD: + return IDE_DRIVE_TASK_IN; + case CFA_WRITE_MULTI_WO_ERASE: + case WIN_MULTWRITE: + return IDE_DRIVE_TASK_RAW_WRITE; + case WIN_SECURITY_DISABLE: + case WIN_SECURITY_ERASE_UNIT: + case WIN_SECURITY_SET_PASS: + case WIN_SECURITY_UNLOCK: + return IDE_DRIVE_TASK_OUT; + case WIN_SMART: + args->tfRegister[IDE_LCYL_OFFSET] = SMART_LCYL_PASS; + args->tfRegister[IDE_HCYL_OFFSET] = SMART_HCYL_PASS; + switch(args->tfRegister[IDE_FEATURE_OFFSET]) { + case SMART_READ_VALUES: + case SMART_READ_THRESHOLDS: + case SMART_READ_LOG_SECTOR: + return IDE_DRIVE_TASK_IN; + case SMART_WRITE_LOG_SECTOR: + return IDE_DRIVE_TASK_OUT; + default: + return IDE_DRIVE_TASK_NO_DATA; + } +#ifdef CONFIG_BLK_DEV_IDEDMA + case WIN_READDMA: + case WIN_IDENTIFY_DMA: + case WIN_READDMA_QUEUED: + case WIN_READDMA_EXT: + case WIN_READDMA_QUEUED_EXT: + return IDE_DRIVE_TASK_IN; + case WIN_WRITEDMA: + case WIN_WRITEDMA_QUEUED: + case WIN_WRITEDMA_EXT: + case WIN_WRITEDMA_QUEUED_EXT: + return IDE_DRIVE_TASK_RAW_WRITE; +#endif + case WIN_SETFEATURES: + switch(args->tfRegister[IDE_FEATURE_OFFSET]) { + case SETFEATURES_XFER: + return IDE_DRIVE_TASK_SET_XFER; + case SETFEATURES_DIS_DEFECT: + case SETFEATURES_EN_APM: + case SETFEATURES_DIS_MSN: + case SETFEATURES_EN_RI: + case SETFEATURES_EN_SI: + case SETFEATURES_DIS_RPOD: + case SETFEATURES_DIS_WCACHE: + case SETFEATURES_EN_DEFECT: + case SETFEATURES_DIS_APM: + case SETFEATURES_EN_MSN: + case SETFEATURES_EN_RLA: + case SETFEATURES_PREFETCH: + case SETFEATURES_EN_RPOD: + case SETFEATURES_DIS_RI: + case SETFEATURES_DIS_SI: + default: + return IDE_DRIVE_TASK_NO_DATA; + } + case WIN_NOP: + case CFA_REQ_EXT_ERROR_CODE: + case CFA_ERASE_SECTORS: + case WIN_VERIFY: + case WIN_VERIFY_EXT: + case WIN_SEEK: + case WIN_SPECIFY: + case WIN_RESTORE: + case WIN_DIAGNOSE: + case WIN_FLUSH_CACHE: + case WIN_FLUSH_CACHE_EXT: + case WIN_STANDBYNOW1: + case WIN_STANDBYNOW2: + case WIN_SLEEPNOW1: + case WIN_SLEEPNOW2: + case WIN_SETIDLE1: + case DISABLE_SEAGATE: + case WIN_CHECKPOWERMODE1: + case WIN_CHECKPOWERMODE2: + case WIN_GETMEDIASTATUS: + case WIN_MEDIAEJECT: + case WIN_SETMULT: + case WIN_READ_NATIVE_MAX: + case WIN_SET_MAX: + case WIN_READ_NATIVE_MAX_EXT: + case WIN_SET_MAX_EXT: + case WIN_SECURITY_ERASE_PREPARE: + case WIN_SECURITY_FREEZE_LOCK: + case EXABYTE_ENABLE_NEST: + case WIN_DOORLOCK: + case WIN_DOORUNLOCK: + return IDE_DRIVE_TASK_NO_DATA; + case WIN_FORMAT: + case WIN_INIT: + case WIN_DEVICE_RESET: + case WIN_QUEUED_SERVICE: + case WIN_PACKETCMD: + default: + return IDE_DRIVE_TASK_INVALID; + } +} + +/* + * This function is intended to be used prior to invoking ide_do_drive_cmd(). + */ +void ide_init_drive_taskfile (struct request *rq) +{ + memset(rq, 0, sizeof(*rq)); + rq->cmd = IDE_DRIVE_TASK_NO_DATA; +} + +/* + * This is kept for internal use only !!! + * This is an internal call and nobody in user-space has a damn + * reason to call this taskfile. + * + * ide_raw_taskfile is the one that user-space executes. + */ +int ide_wait_taskfile (ide_drive_t *drive, struct hd_drive_task_hdr *taskfile, struct hd_drive_hob_hdr *hobfile, byte *buf) +{ + struct request rq; + ide_task_t args; + + memset(&args, 0, sizeof(ide_task_t)); + + args.tfRegister[IDE_DATA_OFFSET] = taskfile->data; + args.tfRegister[IDE_FEATURE_OFFSET] = taskfile->feature; + args.tfRegister[IDE_NSECTOR_OFFSET] = taskfile->sector_count; + args.tfRegister[IDE_SECTOR_OFFSET] = taskfile->sector_number; + args.tfRegister[IDE_LCYL_OFFSET] = taskfile->low_cylinder; + args.tfRegister[IDE_HCYL_OFFSET] = taskfile->high_cylinder; + args.tfRegister[IDE_SELECT_OFFSET] = taskfile->device_head; + args.tfRegister[IDE_COMMAND_OFFSET] = taskfile->command; + + args.hobRegister[IDE_DATA_OFFSET_HOB] = hobfile->data; + args.hobRegister[IDE_FEATURE_OFFSET_HOB] = hobfile->feature; + args.hobRegister[IDE_NSECTOR_OFFSET_HOB] = hobfile->sector_count; + args.hobRegister[IDE_SECTOR_OFFSET_HOB] = hobfile->sector_number; + args.hobRegister[IDE_LCYL_OFFSET_HOB] = hobfile->low_cylinder; + args.hobRegister[IDE_HCYL_OFFSET_HOB] = hobfile->high_cylinder; + args.hobRegister[IDE_SELECT_OFFSET_HOB] = hobfile->device_head; + args.hobRegister[IDE_CONTROL_OFFSET_HOB] = hobfile->control; + + ide_init_drive_taskfile(&rq); + /* This is kept for internal use only !!! */ + args.command_type = ide_cmd_type_parser (&args); + if (args.command_type != IDE_DRIVE_TASK_NO_DATA) + rq.current_nr_sectors = rq.nr_sectors = (hobfile->sector_count << 8) | taskfile->sector_count; + + rq.cmd = IDE_DRIVE_TASKFILE; + rq.buffer = buf; + rq.special = &args; + return ide_do_drive_cmd(drive, &rq, ide_wait); +} + +int ide_raw_taskfile (ide_drive_t *drive, ide_task_t *args, byte *buf) +{ + struct request rq; + ide_init_drive_taskfile(&rq); + rq.cmd = IDE_DRIVE_TASKFILE; + rq.buffer = buf; + + if (args->command_type != IDE_DRIVE_TASK_NO_DATA) + rq.current_nr_sectors = rq.nr_sectors = (args->hobRegister[IDE_NSECTOR_OFFSET_HOB] << 8) | args->tfRegister[IDE_NSECTOR_OFFSET]; + + rq.special = args; + return ide_do_drive_cmd(drive, &rq, ide_wait); +} + + +#ifdef CONFIG_IDE_TASK_IOCTL_DEBUG +char * ide_ioctl_verbose (unsigned int cmd) +{ + return("unknown"); +} + +char * ide_task_cmd_verbose (byte task) +{ + return("unknown"); +} +#endif /* CONFIG_IDE_TASK_IOCTL_DEBUG */ + +/* + * The taskfile glue table + * + * reqtask.data_phase reqtask.req_cmd + * args.command_type args.handler + * + * TASKFILE_P_OUT_DMAQ ?? ?? + * TASKFILE_P_IN_DMAQ ?? ?? + * TASKFILE_P_OUT_DMA ?? ?? + * TASKFILE_P_IN_DMA ?? ?? + * TASKFILE_P_OUT ?? ?? + * TASKFILE_P_IN ?? ?? + * + * TASKFILE_OUT_DMAQ IDE_DRIVE_TASK_RAW_WRITE NULL + * TASKFILE_IN_DMAQ IDE_DRIVE_TASK_IN NULL + * + * TASKFILE_OUT_DMA IDE_DRIVE_TASK_RAW_WRITE NULL + * TASKFILE_IN_DMA IDE_DRIVE_TASK_IN NULL + * + * TASKFILE_IN_OUT ?? ?? + * + * TASKFILE_MULTI_OUT IDE_DRIVE_TASK_RAW_WRITE task_mulout_intr + * TASKFILE_MULTI_IN IDE_DRIVE_TASK_IN task_mulin_intr + * + * TASKFILE_OUT IDE_DRIVE_TASK_RAW_WRITE task_out_intr + * TASKFILE_OUT IDE_DRIVE_TASK_OUT task_out_intr + * + * TASKFILE_IN IDE_DRIVE_TASK_IN task_in_intr + * TASKFILE_NO_DATA IDE_DRIVE_TASK_NO_DATA task_no_data_intr + * + * IDE_DRIVE_TASK_SET_XFER task_no_data_intr + * IDE_DRIVE_TASK_INVALID + * + */ + +#define MAX_DMA (256*SECTOR_WORDS) + +int ide_taskfile_ioctl (ide_drive_t *drive, struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg) +{ + ide_task_request_t *req_task; + ide_task_t args; + + byte *outbuf = NULL; + byte *inbuf = NULL; + task_ioreg_t *argsptr = args.tfRegister; + task_ioreg_t *hobsptr = args.hobRegister; + int err = 0; + int tasksize = sizeof(struct ide_task_request_s); + int taskin = 0; + int taskout = 0; + + req_task = kmalloc(tasksize, GFP_KERNEL); + if (req_task == NULL) return -ENOMEM; + memset(req_task, 0, tasksize); + if (copy_from_user(req_task, (void *) arg, tasksize)) { + kfree(req_task); + return -EFAULT; + } + + taskout = (int) req_task->out_size; + taskin = (int) req_task->in_size; + + if (taskout) { + int outtotal = tasksize; + outbuf = kmalloc(taskout, GFP_KERNEL); + if (outbuf == NULL) { + err = -ENOMEM; + goto abort; + } + memset(outbuf, 0, taskout); + if (copy_from_user(outbuf, (void *)arg + outtotal, taskout)) { + err = -EFAULT; + goto abort; + } + } + + if (taskin) { + int intotal = tasksize + taskout; + inbuf = kmalloc(taskin, GFP_KERNEL); + if (inbuf == NULL) { + err = -ENOMEM; + goto abort; + } + memset(inbuf, 0, taskin); + if (copy_from_user(inbuf, (void *)arg + intotal , taskin)) { + err = -EFAULT; + goto abort; + } + } + + memset(argsptr, 0, HDIO_DRIVE_TASK_HDR_SIZE); + memset(hobsptr, 0, HDIO_DRIVE_HOB_HDR_SIZE); + memcpy(argsptr, req_task->io_ports, HDIO_DRIVE_TASK_HDR_SIZE); + memcpy(hobsptr, req_task->hob_ports, HDIO_DRIVE_HOB_HDR_SIZE); + + args.tf_in_flags = req_task->in_flags; + args.tf_out_flags = req_task->out_flags; + args.data_phase = req_task->data_phase; + args.command_type = req_task->req_cmd; + +#ifdef CONFIG_IDE_TASK_IOCTL_DEBUG + DTF("%s: ide_ioctl_cmd %s: ide_task_cmd %s\n", + drive->name, + ide_ioctl_verbose(cmd), + ide_task_cmd_verbose(args.tfRegister[IDE_COMMAND_OFFSET])); +#endif /* CONFIG_IDE_TASK_IOCTL_DEBUG */ + + switch(req_task->data_phase) { + case TASKFILE_OUT_DMAQ: + case TASKFILE_OUT_DMA: + args.prehandler = NULL; + args.handler = NULL; + args.posthandler = NULL; + err = ide_raw_taskfile(drive, &args, outbuf); + break; + case TASKFILE_IN_DMAQ: + case TASKFILE_IN_DMA: + args.prehandler = NULL; + args.handler = NULL; + args.posthandler = NULL; + err = ide_raw_taskfile(drive, &args, inbuf); + break; + case TASKFILE_IN_OUT: +#if 0 + args.prehandler = &pre_task_out_intr; + args.handler = &task_out_intr; + args.posthandler = NULL; + err = ide_raw_taskfile(drive, &args, outbuf); + args.prehandler = NULL; + args.handler = &task_in_intr; + args.posthandler = NULL; + err = ide_raw_taskfile(drive, &args, inbuf); + break; +#else + err = -EFAULT; + goto abort; +#endif + case TASKFILE_MULTI_OUT: + if (drive->mult_count) { + args.prehandler = &pre_task_out_intr; + args.handler = &task_mulout_intr; + args.posthandler = NULL; + err = ide_raw_taskfile(drive, &args, outbuf); + } else { + /* (hs): give up if multcount is not set */ + printk("%s: %s Multimode Write " \ + "multcount is not set\n", + drive->name, __FUNCTION__); + err = -EPERM; + goto abort; + } + break; + case TASKFILE_OUT: + args.prehandler = &pre_task_out_intr; + args.handler = &task_out_intr; + args.posthandler = NULL; + err = ide_raw_taskfile(drive, &args, outbuf); + break; + case TASKFILE_MULTI_IN: + if (drive->mult_count) { + args.prehandler = NULL; + args.handler = &task_mulin_intr; + args.posthandler = NULL; + err = ide_raw_taskfile(drive, &args, inbuf); + } else { + /* (hs): give up if multcount is not set */ + printk("%s: %s Multimode Read failure " \ + "multcount is not set\n", + drive->name, __FUNCTION__); + err = -EPERM; + goto abort; + } + break; + case TASKFILE_IN: + args.prehandler = NULL; + args.handler = &task_in_intr; + args.posthandler = NULL; + err = ide_raw_taskfile(drive, &args, inbuf); + break; + case TASKFILE_NO_DATA: + args.prehandler = NULL; + args.handler = &task_no_data_intr; + args.posthandler = NULL; + err = ide_raw_taskfile(drive, &args, NULL); + break; + default: + args.prehandler = NULL; + args.handler = NULL; + args.posthandler = NULL; + err = -EFAULT; + goto abort; + } + + memcpy(req_task->io_ports, &(args.tfRegister), HDIO_DRIVE_TASK_HDR_SIZE); + memcpy(req_task->hob_ports, &(args.hobRegister), HDIO_DRIVE_HOB_HDR_SIZE); + req_task->in_flags = args.tf_in_flags; + req_task->out_flags = args.tf_out_flags; + + if (copy_to_user((void *)arg, req_task, tasksize)) { + err = -EFAULT; + goto abort; + } + if (taskout) { + int outtotal = tasksize; + if (copy_to_user((void *)arg+outtotal, outbuf, taskout)) { + err = -EFAULT; + goto abort; + } + } + if (taskin) { + int intotal = tasksize + taskout; + if (copy_to_user((void *)arg+intotal, inbuf, taskin)) { + err = -EFAULT; + goto abort; + } + } +abort: + kfree(req_task); + if (outbuf != NULL) + kfree(outbuf); + if (inbuf != NULL) + kfree(inbuf); + return err; +} + +EXPORT_SYMBOL(task_read_24); +EXPORT_SYMBOL(do_rw_taskfile); +EXPORT_SYMBOL(do_taskfile); +// EXPORT_SYMBOL(flagged_taskfile); + +//EXPORT_SYMBOL(ide_end_taskfile); + +EXPORT_SYMBOL(set_multmode_intr); +EXPORT_SYMBOL(set_geometry_intr); +EXPORT_SYMBOL(recal_intr); + +EXPORT_SYMBOL(task_no_data_intr); +EXPORT_SYMBOL(task_in_intr); +EXPORT_SYMBOL(task_mulin_intr); +EXPORT_SYMBOL(pre_task_out_intr); +EXPORT_SYMBOL(task_out_intr); +EXPORT_SYMBOL(task_mulout_intr); + +EXPORT_SYMBOL(ide_init_drive_taskfile); +EXPORT_SYMBOL(ide_wait_taskfile); +EXPORT_SYMBOL(ide_raw_taskfile); +EXPORT_SYMBOL(ide_pre_handler_parser); +EXPORT_SYMBOL(ide_handler_parser); +EXPORT_SYMBOL(ide_cmd_type_parser); +EXPORT_SYMBOL(ide_taskfile_ioctl); + +#ifdef CONFIG_PKT_TASK_IOCTL + +#if 0 +{ + +{ /* start cdrom */ + + struct cdrom_info *info = drive->driver_data; + + if (info->dma) { + if (info->cmd == READ) { + info->dma = !HWIF(drive)->dmaproc(ide_dma_read, drive); + } else if (info->cmd == WRITE) { + info->dma = !HWIF(drive)->dmaproc(ide_dma_write, drive); + } else { + printk("ide-cd: DMA set, but not allowed\n"); + } + } + + /* Set up the controller registers. */ + OUT_BYTE (info->dma, IDE_FEATURE_REG); + OUT_BYTE (0, IDE_NSECTOR_REG); + OUT_BYTE (0, IDE_SECTOR_REG); + + OUT_BYTE (xferlen & 0xff, IDE_LCYL_REG); + OUT_BYTE (xferlen >> 8 , IDE_HCYL_REG); + if (IDE_CONTROL_REG) + OUT_BYTE (drive->ctl, IDE_CONTROL_REG); + + if (info->dma) + (void) (HWIF(drive)->dmaproc(ide_dma_begin, drive)); + + if (CDROM_CONFIG_FLAGS (drive)->drq_interrupt) { + ide_set_handler (drive, handler, WAIT_CMD, cdrom_timer_expiry); + OUT_BYTE (WIN_PACKETCMD, IDE_COMMAND_REG); /* packet command */ + return ide_started; + } else { + OUT_BYTE (WIN_PACKETCMD, IDE_COMMAND_REG); /* packet command */ + return (*handler) (drive); + } + +} /* end cdrom */ + +{ /* start floppy */ + + idefloppy_floppy_t *floppy = drive->driver_data; + idefloppy_bcount_reg_t bcount; + int dma_ok = 0; + + floppy->pc=pc; /* Set the current packet command */ + + pc->retries++; + pc->actually_transferred=0; /* We haven't transferred any data yet */ + pc->current_position=pc->buffer; + bcount.all = IDE_MIN(pc->request_transfer, 63 * 1024); + +#ifdef CONFIG_BLK_DEV_IDEDMA + if (test_and_clear_bit (PC_DMA_ERROR, &pc->flags)) { + (void) HWIF(drive)->dmaproc(ide_dma_off, drive); + } + if (test_bit (PC_DMA_RECOMMENDED, &pc->flags) && drive->using_dma) + dma_ok=!HWIF(drive)->dmaproc(test_bit (PC_WRITING, &pc->flags) ? ide_dma_write : ide_dma_read, drive); +#endif /* CONFIG_BLK_DEV_IDEDMA */ + + if (IDE_CONTROL_REG) + OUT_BYTE (drive->ctl,IDE_CONTROL_REG); + OUT_BYTE (dma_ok ? 1:0,IDE_FEATURE_REG); /* Use PIO/DMA */ + OUT_BYTE (bcount.b.high,IDE_BCOUNTH_REG); + OUT_BYTE (bcount.b.low,IDE_BCOUNTL_REG); + OUT_BYTE (drive->select.all,IDE_SELECT_REG); + +#ifdef CONFIG_BLK_DEV_IDEDMA + if (dma_ok) { /* Begin DMA, if necessary */ + set_bit (PC_DMA_IN_PROGRESS, &pc->flags); + (void) (HWIF(drive)->dmaproc(ide_dma_begin, drive)); + } +#endif /* CONFIG_BLK_DEV_IDEDMA */ + +} /* end floppy */ + +{ /* start tape */ + + idetape_tape_t *tape = drive->driver_data; + +#ifdef CONFIG_BLK_DEV_IDEDMA + if (test_and_clear_bit (PC_DMA_ERROR, &pc->flags)) { + printk (KERN_WARNING "ide-tape: DMA disabled, reverting to PIO\n"); + (void) HWIF(drive)->dmaproc(ide_dma_off, drive); + } + if (test_bit (PC_DMA_RECOMMENDED, &pc->flags) && drive->using_dma) + dma_ok=!HWIF(drive)->dmaproc(test_bit (PC_WRITING, &pc->flags) ? ide_dma_write : ide_dma_read, drive); +#endif /* CONFIG_BLK_DEV_IDEDMA */ + + if (IDE_CONTROL_REG) + OUT_BYTE (drive->ctl,IDE_CONTROL_REG); + OUT_BYTE (dma_ok ? 1:0,IDE_FEATURE_REG); /* Use PIO/DMA */ + OUT_BYTE (bcount.b.high,IDE_BCOUNTH_REG); + OUT_BYTE (bcount.b.low,IDE_BCOUNTL_REG); + OUT_BYTE (drive->select.all,IDE_SELECT_REG); +#ifdef CONFIG_BLK_DEV_IDEDMA + if (dma_ok) { /* Begin DMA, if necessary */ + set_bit (PC_DMA_IN_PROGRESS, &pc->flags); + (void) (HWIF(drive)->dmaproc(ide_dma_begin, drive)); + } +#endif /* CONFIG_BLK_DEV_IDEDMA */ + if (test_bit(IDETAPE_DRQ_INTERRUPT, &tape->flags)) { + ide_set_handler(drive, &idetape_transfer_pc, IDETAPE_WAIT_CMD, NULL); + OUT_BYTE(WIN_PACKETCMD, IDE_COMMAND_REG); + return ide_started; + } else { + OUT_BYTE(WIN_PACKETCMD, IDE_COMMAND_REG); + return idetape_transfer_pc(drive); + } + +} /* end tape */ + +} +#endif + +int pkt_taskfile_ioctl (ide_drive_t *drive, struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg) +{ +#if 0 + switch(req_task->data_phase) { + case TASKFILE_P_OUT_DMAQ: + case TASKFILE_P_IN_DMAQ: + case TASKFILE_P_OUT_DMA: + case TASKFILE_P_IN_DMA: + case TASKFILE_P_OUT: + case TASKFILE_P_IN: + } +#endif + return -ENOMSG; +} + +EXPORT_SYMBOL(pkt_taskfile_ioctl); + +#endif /* CONFIG_PKT_TASK_IOCTL */ diff --git a/xen-2.4.16/drivers/ide/ide.c b/xen-2.4.16/drivers/ide/ide.c new file mode 100644 index 0000000000..af3694bf85 --- /dev/null +++ b/xen-2.4.16/drivers/ide/ide.c @@ -0,0 +1,4167 @@ +/* + * linux/drivers/ide/ide.c Version 6.31 June 9, 2000 + * + * Copyright (C) 1994-1998 Linus Torvalds & authors (see below) + */ + +/* + * Mostly written by Mark Lord + * and Gadi Oxman + * and Andre Hedrick + * + * See linux/MAINTAINERS for address of current maintainer. + * + * This is the multiple IDE interface driver, as evolved from hd.c. + * It supports up to MAX_HWIFS IDE interfaces, on one or more IRQs (usually 14 & 15). + * There can be up to two drives per interface, as per the ATA-2 spec. + * + * Primary: ide0, port 0x1f0; major=3; hda is minor=0; hdb is minor=64 + * Secondary: ide1, port 0x170; major=22; hdc is minor=0; hdd is minor=64 + * Tertiary: ide2, port 0x???; major=33; hde is minor=0; hdf is minor=64 + * Quaternary: ide3, port 0x???; major=34; hdg is minor=0; hdh is minor=64 + * ... + * + * From hd.c: + * | + * | It traverses the request-list, using interrupts to jump between functions. + * | As nearly all functions can be called within interrupts, we may not sleep. + * | Special care is recommended. Have Fun! + * | + * | modified by Drew Eckhardt to check nr of hd's from the CMOS. + * | + * | Thanks to Branko Lankester, lankeste@fwi.uva.nl, who found a bug + * | in the early extended-partition checks and added DM partitions. + * | + * | Early work on error handling by Mika Liljeberg (liljeber@cs.Helsinki.FI). + * | + * | IRQ-unmask, drive-id, multiple-mode, support for ">16 heads", + * | and general streamlining by Mark Lord (mlord@pobox.com). + * + * October, 1994 -- Complete line-by-line overhaul for linux 1.1.x, by: + * + * Mark Lord (mlord@pobox.com) (IDE Perf.Pkg) + * Delman Lee (delman@ieee.org) ("Mr. atdisk2") + * Scott Snyder (snyder@fnald0.fnal.gov) (ATAPI IDE cd-rom) + * + * This was a rewrite of just about everything from hd.c, though some original + * code is still sprinkled about. Think of it as a major evolution, with + * inspiration from lots of linux users, esp. hamish@zot.apana.org.au + * + * Version 1.0 ALPHA initial code, primary i/f working okay + * Version 1.3 BETA dual i/f on shared irq tested & working! + * Version 1.4 BETA added auto probing for irq(s) + * Version 1.5 BETA added ALPHA (untested) support for IDE cd-roms, + * ... + * Version 5.50 allow values as small as 20 for idebus= + * Version 5.51 force non io_32bit in drive_cmd_intr() + * change delay_10ms() to delay_50ms() to fix problems + * Version 5.52 fix incorrect invalidation of removable devices + * add "hdx=slow" command line option + * Version 5.60 start to modularize the driver; the disk and ATAPI + * drivers can be compiled as loadable modules. + * move IDE probe code to ide-probe.c + * move IDE disk code to ide-disk.c + * add support for generic IDE device subdrivers + * add m68k code from Geert Uytterhoeven + * probe all interfaces by default + * add ioctl to (re)probe an interface + * Version 6.00 use per device request queues + * attempt to optimize shared hwgroup performance + * add ioctl to manually adjust bandwidth algorithms + * add kerneld support for the probe module + * fix bug in ide_error() + * fix bug in the first ide_get_lock() call for Atari + * don't flush leftover data for ATAPI devices + * Version 6.01 clear hwgroup->active while the hwgroup sleeps + * support HDIO_GETGEO for floppies + * Version 6.02 fix ide_ack_intr() call + * check partition table on floppies + * Version 6.03 handle bad status bit sequencing in ide_wait_stat() + * Version 6.10 deleted old entries from this list of updates + * replaced triton.c with ide-dma.c generic PCI DMA + * added support for BIOS-enabled UltraDMA + * rename all "promise" things to "pdc4030" + * fix EZ-DRIVE handling on small disks + * Version 6.11 fix probe error in ide_scan_devices() + * fix ancient "jiffies" polling bugs + * mask all hwgroup interrupts on each irq entry + * Version 6.12 integrate ioctl and proc interfaces + * fix parsing of "idex=" command line parameter + * Version 6.13 add support for ide4/ide5 courtesy rjones@orchestream.com + * Version 6.14 fixed IRQ sharing among PCI devices + * Version 6.15 added SMP awareness to IDE drivers + * Version 6.16 fixed various bugs; even more SMP friendly + * Version 6.17 fix for newest EZ-Drive problem + * Version 6.18 default unpartitioned-disk translation now "BIOS LBA" + * Version 6.19 Re-design for a UNIFORM driver for all platforms, + * model based on suggestions from Russell King and + * Geert Uytterhoeven + * Promise DC4030VL now supported. + * add support for ide6/ide7 + * delay_50ms() changed to ide_delay_50ms() and exported. + * Version 6.20 Added/Fixed Generic ATA-66 support and hwif detection. + * Added hdx=flash to allow for second flash disk + * detection w/o the hang loop. + * Added support for ide8/ide9 + * Added idex=ata66 for the quirky chipsets that are + * ATA-66 compliant, but have yet to determine a method + * of verification of the 80c cable presence. + * Specifically Promise's PDC20262 chipset. + * Version 6.21 Fixing/Fixed SMP spinlock issue with insight from an old + * hat that clarified original low level driver design. + * Version 6.30 Added SMP support; fixed multmode issues. -ml + * Version 6.31 Debug Share INTR's and request queue streaming + * Native ATA-100 support + * Prep for Cascades Project + * + * Some additional driver compile-time options are in ./include/linux/ide.h + * + * To do, in likely order of completion: + * - modify kernel to obtain BIOS geometry for drives on 2nd/3rd/4th i/f + * + */ + +#define REVISION "Revision: 6.31" +#define VERSION "Id: ide.c 6.31 2000/06/09" + +#undef REALLY_SLOW_IO /* most systems can safely undef this */ + +#define _IDE_C /* Tell ide.h it's really us */ + +#include +#include +#include +#include +/*#include */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +/*#include */ +/*#include */ +/*#include */ + +#include +#include +#include +#include +#include + +#include "ide_modes.h" + +#ifdef CONFIG_KMOD +#include +#endif /* CONFIG_KMOD */ + +#ifdef CONFIG_IDE_TASKFILE_IO +# define __TASKFILE__IO +#else /* CONFIG_IDE_TASKFILE_IO */ +# undef __TASKFILE__IO +#endif /* CONFIG_IDE_TASKFILE_IO */ + +#ifdef __TASKFILE__IO +#else /* !__TASKFILE__IO */ +#endif /* __TASKFILE__IO */ + + + + +/* XXXXXXXXXXXX This may be replaced by fs/block_dev.c versions!!! XXXXX */ +/* (only included here so the hypervisor will link :-) */ +int check_disk_change(kdev_t dev) { return 0; } +int unregister_blkdev(unsigned int major, const char * name) { return 0; } +/* And these ones are from fs/inode.c... */ +int invalidate_device(kdev_t dev, int do_sync) { return 0; } +/* fs/buffer.c... */ +void invalidate_bdev(struct block_device *bdev, int destroy_dirty_buffers) { } +/* fs/partitions/check.c... */ +void grok_partitions(struct gendisk *dev, int drive, + unsigned minors, long size) { } +void register_disk(struct gendisk *dev, kdev_t first, + unsigned minors, struct block_device_operations *ops, + long size) { } +/* fs/devices.c... */ +const char * kdevname(kdev_t dev) { return NULL; } +/* End of XXXXXX region */ + + + + +/* default maximum number of failures */ +#define IDE_DEFAULT_MAX_FAILURES 1 + +static const byte ide_hwif_to_major[] = { IDE0_MAJOR, IDE1_MAJOR, IDE2_MAJOR, IDE3_MAJOR, IDE4_MAJOR, IDE5_MAJOR, IDE6_MAJOR, IDE7_MAJOR, IDE8_MAJOR, IDE9_MAJOR }; + +static int idebus_parameter; /* holds the "idebus=" parameter */ +static int system_bus_speed; /* holds what we think is VESA/PCI bus speed */ +static int initializing; /* set while initializing built-in drivers */ + +#ifdef CONFIG_BLK_DEV_IDEPCI +static int ide_scan_direction; /* THIS was formerly 2.2.x pci=reverse */ +#endif /* CONFIG_BLK_DEV_IDEPCI */ + +#if defined(__mc68000__) || defined(CONFIG_APUS) +/* + * ide_lock is used by the Atari code to obtain access to the IDE interrupt, + * which is shared between several drivers. + */ +static int ide_lock; +#endif /* __mc68000__ || CONFIG_APUS */ + +int noautodma = 0; + +/* + * ide_modules keeps track of the available IDE chipset/probe/driver modules. + */ +ide_module_t *ide_modules; +ide_module_t *ide_probe; + +/* + * This is declared extern in ide.h, for access by other IDE modules: + */ +ide_hwif_t ide_hwifs[MAX_HWIFS]; /* master data repository */ + +#if (DISK_RECOVERY_TIME > 0) +/* + * For really screwy hardware (hey, at least it *can* be used with Linux) + * we can enforce a minimum delay time between successive operations. + */ +static unsigned long read_timer (void) +{ + unsigned long t, flags; + int i; + + __save_flags(flags); /* local CPU only */ + __cli(); /* local CPU only */ + t = jiffies * 11932; + outb_p(0, 0x43); + i = inb_p(0x40); + i |= inb(0x40) << 8; + __restore_flags(flags); /* local CPU only */ + return (t - i); +} +#endif /* DISK_RECOVERY_TIME */ + +static inline void set_recovery_timer (ide_hwif_t *hwif) +{ +#if (DISK_RECOVERY_TIME > 0) + hwif->last_time = read_timer(); +#endif /* DISK_RECOVERY_TIME */ +} + +/* + * Do not even *think* about calling this! + */ +static void init_hwif_data (unsigned int index) +{ + unsigned int unit; + hw_regs_t hw; + ide_hwif_t *hwif = &ide_hwifs[index]; + + /* bulk initialize hwif & drive info with zeros */ + memset(hwif, 0, sizeof(ide_hwif_t)); + memset(&hw, 0, sizeof(hw_regs_t)); + + /* fill in any non-zero initial values */ + hwif->index = index; + ide_init_hwif_ports(&hw, ide_default_io_base(index), 0, &hwif->irq); + memcpy(&hwif->hw, &hw, sizeof(hw)); + memcpy(hwif->io_ports, hw.io_ports, sizeof(hw.io_ports)); + hwif->noprobe = !hwif->io_ports[IDE_DATA_OFFSET]; +#ifdef CONFIG_BLK_DEV_HD + if (hwif->io_ports[IDE_DATA_OFFSET] == HD_DATA) + hwif->noprobe = 1; /* may be overridden by ide_setup() */ +#endif /* CONFIG_BLK_DEV_HD */ + hwif->major = ide_hwif_to_major[index]; + hwif->name[0] = 'i'; + hwif->name[1] = 'd'; + hwif->name[2] = 'e'; + hwif->name[3] = '0' + index; + hwif->bus_state = BUSSTATE_ON; + for (unit = 0; unit < MAX_DRIVES; ++unit) { + ide_drive_t *drive = &hwif->drives[unit]; + + drive->media = ide_disk; + drive->select.all = (unit<<4)|0xa0; + drive->hwif = hwif; + drive->ctl = 0x08; + drive->ready_stat = READY_STAT; + drive->bad_wstat = BAD_W_STAT; + drive->special.b.recalibrate = 1; + drive->special.b.set_geometry = 1; + drive->name[0] = 'h'; + drive->name[1] = 'd'; + drive->name[2] = 'a' + (index * MAX_DRIVES) + unit; + drive->max_failures = IDE_DEFAULT_MAX_FAILURES; + /*init_waitqueue_head(&drive->wqueue);*/ + } +} + +/* + * init_ide_data() sets reasonable default values into all fields + * of all instances of the hwifs and drives, but only on the first call. + * Subsequent calls have no effect (they don't wipe out anything). + * + * This routine is normally called at driver initialization time, + * but may also be called MUCH earlier during kernel "command-line" + * parameter processing. As such, we cannot depend on any other parts + * of the kernel (such as memory allocation) to be functioning yet. + * + * This is too bad, as otherwise we could dynamically allocate the + * ide_drive_t structs as needed, rather than always consuming memory + * for the max possible number (MAX_HWIFS * MAX_DRIVES) of them. + */ +#define MAGIC_COOKIE 0x12345678 +static void __init init_ide_data (void) +{ + unsigned int index; + static unsigned long magic_cookie = MAGIC_COOKIE; + + if (magic_cookie != MAGIC_COOKIE) + return; /* already initialized */ + magic_cookie = 0; + + /* Initialise all interface structures */ + for (index = 0; index < MAX_HWIFS; ++index) + init_hwif_data(index); + + /* Add default hw interfaces */ + ide_init_default_hwifs(); + + idebus_parameter = 0; + system_bus_speed = 0; +} + +/* + * CompactFlash cards and their brethern pretend to be removable hard disks, except: + * (1) they never have a slave unit, and + * (2) they don't have doorlock mechanisms. + * This test catches them, and is invoked elsewhere when setting appropriate config bits. + * + * FIXME: This treatment is probably applicable for *all* PCMCIA (PC CARD) devices, + * so in linux 2.3.x we should change this to just treat all PCMCIA drives this way, + * and get rid of the model-name tests below (too big of an interface change for 2.2.x). + * At that time, we might also consider parameterizing the timeouts and retries, + * since these are MUCH faster than mechanical drives. -M.Lord + */ +int drive_is_flashcard (ide_drive_t *drive) +{ + struct hd_driveid *id = drive->id; + + if (drive->removable && id != NULL) { + if (id->config == 0x848a) return 1; /* CompactFlash */ + if (!strncmp(id->model, "KODAK ATA_FLASH", 15) /* Kodak */ + || !strncmp(id->model, "Hitachi CV", 10) /* Hitachi */ + || !strncmp(id->model, "SunDisk SDCFB", 13) /* SunDisk */ + || !strncmp(id->model, "HAGIWARA HPC", 12) /* Hagiwara */ + || !strncmp(id->model, "LEXAR ATA_FLASH", 15) /* Lexar */ + || !strncmp(id->model, "ATA_FLASH", 9)) /* Simple Tech */ + { + return 1; /* yes, it is a flash memory card */ + } + } + return 0; /* no, it is not a flash memory card */ +} + +/* + * ide_system_bus_speed() returns what we think is the system VESA/PCI + * bus speed (in MHz). This is used for calculating interface PIO timings. + * The default is 40 for known PCI systems, 50 otherwise. + * The "idebus=xx" parameter can be used to override this value. + * The actual value to be used is computed/displayed the first time through. + */ +int ide_system_bus_speed (void) +{ + if (!system_bus_speed) { + if (idebus_parameter) + system_bus_speed = idebus_parameter; /* user supplied value */ +#ifdef CONFIG_PCI + else if (pci_present()) + system_bus_speed = 33; /* safe default value for PCI */ +#endif /* CONFIG_PCI */ + else + system_bus_speed = 50; /* safe default value for VESA and PCI */ + printk("ide: Assuming %dMHz system bus speed for PIO modes%s\n", system_bus_speed, + idebus_parameter ? "" : "; override with idebus=xx"); + } + return system_bus_speed; +} + +#if SUPPORT_VLB_SYNC +/* + * Some localbus EIDE interfaces require a special access sequence + * when using 32-bit I/O instructions to transfer data. We call this + * the "vlb_sync" sequence, which consists of three successive reads + * of the sector count register location, with interrupts disabled + * to ensure that the reads all happen together. + */ +static inline void do_vlb_sync (ide_ioreg_t port) { + (void) inb (port); + (void) inb (port); + (void) inb (port); +} +#endif /* SUPPORT_VLB_SYNC */ + +/* + * This is used for most PIO data transfers *from* the IDE interface + */ +void ide_input_data (ide_drive_t *drive, void *buffer, unsigned int wcount) +{ + byte io_32bit; + + /* first check if this controller has defined a special function + * for handling polled ide transfers + */ + + if(HWIF(drive)->ideproc) { + HWIF(drive)->ideproc(ideproc_ide_input_data, + drive, buffer, wcount); + return; + } + + io_32bit = drive->io_32bit; + + if (io_32bit) { +#if SUPPORT_VLB_SYNC + if (io_32bit & 2) { + unsigned long flags; + __save_flags(flags); /* local CPU only */ + __cli(); /* local CPU only */ + do_vlb_sync(IDE_NSECTOR_REG); + insl(IDE_DATA_REG, buffer, wcount); + __restore_flags(flags); /* local CPU only */ + } else +#endif /* SUPPORT_VLB_SYNC */ + insl(IDE_DATA_REG, buffer, wcount); + } else { +#if SUPPORT_SLOW_DATA_PORTS + if (drive->slow) { + unsigned short *ptr = (unsigned short *) buffer; + while (wcount--) { + *ptr++ = inw_p(IDE_DATA_REG); + *ptr++ = inw_p(IDE_DATA_REG); + } + } else +#endif /* SUPPORT_SLOW_DATA_PORTS */ + insw(IDE_DATA_REG, buffer, wcount<<1); + } +} + +/* + * This is used for most PIO data transfers *to* the IDE interface + */ +void ide_output_data (ide_drive_t *drive, void *buffer, unsigned int wcount) +{ + byte io_32bit; + + if(HWIF(drive)->ideproc) { + HWIF(drive)->ideproc(ideproc_ide_output_data, + drive, buffer, wcount); + return; + } + + io_32bit = drive->io_32bit; + + if (io_32bit) { +#if SUPPORT_VLB_SYNC + if (io_32bit & 2) { + unsigned long flags; + __save_flags(flags); /* local CPU only */ + __cli(); /* local CPU only */ + do_vlb_sync(IDE_NSECTOR_REG); + outsl(IDE_DATA_REG, buffer, wcount); + __restore_flags(flags); /* local CPU only */ + } else +#endif /* SUPPORT_VLB_SYNC */ + outsl(IDE_DATA_REG, buffer, wcount); + } else { +#if SUPPORT_SLOW_DATA_PORTS + if (drive->slow) { + unsigned short *ptr = (unsigned short *) buffer; + while (wcount--) { + outw_p(*ptr++, IDE_DATA_REG); + outw_p(*ptr++, IDE_DATA_REG); + } + } else +#endif /* SUPPORT_SLOW_DATA_PORTS */ + outsw(IDE_DATA_REG, buffer, wcount<<1); + } +} + +/* + * The following routines are mainly used by the ATAPI drivers. + * + * These routines will round up any request for an odd number of bytes, + * so if an odd bytecount is specified, be sure that there's at least one + * extra byte allocated for the buffer. + */ +void atapi_input_bytes (ide_drive_t *drive, void *buffer, unsigned int bytecount) +{ + if(HWIF(drive)->ideproc) { + HWIF(drive)->ideproc(ideproc_atapi_input_bytes, + drive, buffer, bytecount); + return; + } + + ++bytecount; +#if defined(CONFIG_ATARI) || defined(CONFIG_Q40) + if (MACH_IS_ATARI || MACH_IS_Q40) { + /* Atari has a byte-swapped IDE interface */ + insw_swapw(IDE_DATA_REG, buffer, bytecount / 2); + return; + } +#endif /* CONFIG_ATARI */ + ide_input_data (drive, buffer, bytecount / 4); + if ((bytecount & 0x03) >= 2) + insw (IDE_DATA_REG, ((byte *)buffer) + (bytecount & ~0x03), 1); +} + +void atapi_output_bytes (ide_drive_t *drive, void *buffer, unsigned int bytecount) +{ + if(HWIF(drive)->ideproc) { + HWIF(drive)->ideproc(ideproc_atapi_output_bytes, + drive, buffer, bytecount); + return; + } + + ++bytecount; +#if defined(CONFIG_ATARI) || defined(CONFIG_Q40) + if (MACH_IS_ATARI || MACH_IS_Q40) { + /* Atari has a byte-swapped IDE interface */ + outsw_swapw(IDE_DATA_REG, buffer, bytecount / 2); + return; + } +#endif /* CONFIG_ATARI */ + ide_output_data (drive, buffer, bytecount / 4); + if ((bytecount & 0x03) >= 2) + outsw (IDE_DATA_REG, ((byte *)buffer) + (bytecount & ~0x03), 1); +} + +/* + * Needed for PCI irq sharing + */ +//static inline +int drive_is_ready (ide_drive_t *drive) +{ + byte stat = 0; + if (drive->waiting_for_dma) + return HWIF(drive)->dmaproc(ide_dma_test_irq, drive); +#if 0 + udelay(1); /* need to guarantee 400ns since last command was issued */ +#endif + +#ifdef CONFIG_IDEPCI_SHARE_IRQ + /* + * We do a passive status test under shared PCI interrupts on + * cards that truly share the ATA side interrupt, but may also share + * an interrupt with another pci card/device. We make no assumptions + * about possible isa-pnp and pci-pnp issues yet. + */ + if (IDE_CONTROL_REG) + stat = GET_ALTSTAT(); + else +#endif /* CONFIG_IDEPCI_SHARE_IRQ */ + stat = GET_STAT(); /* Note: this may clear a pending IRQ!! */ + + if (stat & BUSY_STAT) + return 0; /* drive busy: definitely not interrupting */ + return 1; /* drive ready: *might* be interrupting */ +} + +/* + * This is our end_request replacement function. + */ +void ide_end_request (byte uptodate, ide_hwgroup_t *hwgroup) +{ + struct request *rq; + unsigned long flags; + ide_drive_t *drive = hwgroup->drive; + + spin_lock_irqsave(&io_request_lock, flags); + rq = hwgroup->rq; + + /* + * decide whether to reenable DMA -- 3 is a random magic for now, + * if we DMA timeout more than 3 times, just stay in PIO + */ + if (drive->state == DMA_PIO_RETRY && drive->retry_pio <= 3) { + drive->state = 0; + hwgroup->hwif->dmaproc(ide_dma_on, drive); + } + + if (!end_that_request_first(rq, uptodate, hwgroup->drive->name)) { + add_blkdev_randomness(MAJOR(rq->rq_dev)); + blkdev_dequeue_request(rq); + hwgroup->rq = NULL; + end_that_request_last(rq); + } + spin_unlock_irqrestore(&io_request_lock, flags); +} + +/* + * This should get invoked any time we exit the driver to + * wait for an interrupt response from a drive. handler() points + * at the appropriate code to handle the next interrupt, and a + * timer is started to prevent us from waiting forever in case + * something goes wrong (see the ide_timer_expiry() handler later on). + */ +void ide_set_handler (ide_drive_t *drive, ide_handler_t *handler, + unsigned int timeout, ide_expiry_t *expiry) +{ + unsigned long flags; + ide_hwgroup_t *hwgroup = HWGROUP(drive); + + spin_lock_irqsave(&io_request_lock, flags); + if (hwgroup->handler != NULL) { + printk("%s: ide_set_handler: handler not null; old=%p, new=%p\n", + drive->name, hwgroup->handler, handler); + } + hwgroup->handler = handler; + hwgroup->expiry = expiry; + hwgroup->timer.expires = jiffies + timeout; + add_timer(&hwgroup->timer); + spin_unlock_irqrestore(&io_request_lock, flags); +} + +/* + * current_capacity() returns the capacity (in sectors) of a drive + * according to its current geometry/LBA settings. + */ +unsigned long current_capacity (ide_drive_t *drive) +{ + if (!drive->present) + return 0; + if (drive->driver != NULL) + return DRIVER(drive)->capacity(drive); + return 0; +} + +extern struct block_device_operations ide_fops[]; +/* + * ide_geninit() is called exactly *once* for each interface. + */ +void ide_geninit (ide_hwif_t *hwif) +{ + unsigned int unit; + struct gendisk *gd = hwif->gd; + + for (unit = 0; unit < MAX_DRIVES; ++unit) { + ide_drive_t *drive = &hwif->drives[unit]; + + if (!drive->present) + continue; + if (drive->media!=ide_disk && drive->media!=ide_floppy) + continue; + register_disk(gd,MKDEV(hwif->major,unit<forced_geom && drive->noprobe) ? 1 : +#endif /* CONFIG_BLK_DEV_ISAPNP */ + 1<name); + } else { + if (0 < (signed long)(hwgroup->poll_timeout - jiffies)) { + ide_set_handler (drive, &atapi_reset_pollfunc, HZ/20, NULL); + return ide_started; /* continue polling */ + } + hwgroup->poll_timeout = 0; /* end of polling */ + printk("%s: ATAPI reset timed-out, status=0x%02x\n", drive->name, stat); + return do_reset1 (drive, 1); /* do it the old fashioned way */ + } + hwgroup->poll_timeout = 0; /* done polling */ + return ide_stopped; +} + +/* + * reset_pollfunc() gets invoked to poll the interface for completion every 50ms + * during an ide reset operation. If the drives have not yet responded, + * and we have not yet hit our maximum waiting time, then the timer is restarted + * for another 50ms. + */ +static ide_startstop_t reset_pollfunc (ide_drive_t *drive) +{ + ide_hwgroup_t *hwgroup = HWGROUP(drive); + ide_hwif_t *hwif = HWIF(drive); + byte tmp; + + if (!OK_STAT(tmp=GET_STAT(), 0, BUSY_STAT)) { + if (0 < (signed long)(hwgroup->poll_timeout - jiffies)) { + ide_set_handler (drive, &reset_pollfunc, HZ/20, NULL); + return ide_started; /* continue polling */ + } + printk("%s: reset timed-out, status=0x%02x\n", hwif->name, tmp); + drive->failures++; + } else { + printk("%s: reset: ", hwif->name); + if ((tmp = GET_ERR()) == 1) { + printk("success\n"); + drive->failures = 0; + } else { + drive->failures++; +#if FANCY_STATUS_DUMPS + printk("master: "); + switch (tmp & 0x7f) { + case 1: printk("passed"); + break; + case 2: printk("formatter device error"); + break; + case 3: printk("sector buffer error"); + break; + case 4: printk("ECC circuitry error"); + break; + case 5: printk("controlling MPU error"); + break; + default:printk("error (0x%02x?)", tmp); + } + if (tmp & 0x80) + printk("; slave: failed"); + printk("\n"); +#else + printk("failed\n"); +#endif /* FANCY_STATUS_DUMPS */ + } + } + hwgroup->poll_timeout = 0; /* done polling */ + return ide_stopped; +} + +static void check_dma_crc (ide_drive_t *drive) +{ + if (drive->crc_count) { + (void) HWIF(drive)->dmaproc(ide_dma_off_quietly, drive); + if ((HWIF(drive)->speedproc) != NULL) + HWIF(drive)->speedproc(drive, ide_auto_reduce_xfer(drive)); + if (drive->current_speed >= XFER_SW_DMA_0) + (void) HWIF(drive)->dmaproc(ide_dma_on, drive); + } else { + (void) HWIF(drive)->dmaproc(ide_dma_off, drive); + } +} + +static void pre_reset (ide_drive_t *drive) +{ + if (drive->driver != NULL) + DRIVER(drive)->pre_reset(drive); + + if (!drive->keep_settings) { + if (drive->using_dma) { + check_dma_crc(drive); + } else { + drive->unmask = 0; + drive->io_32bit = 0; + } + return; + } + if (drive->using_dma) + check_dma_crc(drive); +} + +/* + * do_reset1() attempts to recover a confused drive by resetting it. + * Unfortunately, resetting a disk drive actually resets all devices on + * the same interface, so it can really be thought of as resetting the + * interface rather than resetting the drive. + * + * ATAPI devices have their own reset mechanism which allows them to be + * individually reset without clobbering other devices on the same interface. + * + * Unfortunately, the IDE interface does not generate an interrupt to let + * us know when the reset operation has finished, so we must poll for this. + * Equally poor, though, is the fact that this may a very long time to complete, + * (up to 30 seconds worstcase). So, instead of busy-waiting here for it, + * we set a timer to poll at 50ms intervals. + */ +static ide_startstop_t do_reset1 (ide_drive_t *drive, int do_not_try_atapi) +{ + unsigned int unit; + unsigned long flags; + ide_hwif_t *hwif = HWIF(drive); + ide_hwgroup_t *hwgroup = HWGROUP(drive); + + __save_flags(flags); /* local CPU only */ + __cli(); /* local CPU only */ + + /* For an ATAPI device, first try an ATAPI SRST. */ + if (drive->media != ide_disk && !do_not_try_atapi) { + pre_reset(drive); + SELECT_DRIVE(hwif,drive); + udelay (20); + OUT_BYTE (WIN_SRST, IDE_COMMAND_REG); + hwgroup->poll_timeout = jiffies + WAIT_WORSTCASE; + ide_set_handler (drive, &atapi_reset_pollfunc, HZ/20, NULL); + __restore_flags (flags); /* local CPU only */ + return ide_started; + } + + /* + * First, reset any device state data we were maintaining + * for any of the drives on this interface. + */ + for (unit = 0; unit < MAX_DRIVES; ++unit) + pre_reset(&hwif->drives[unit]); + +#if OK_TO_RESET_CONTROLLER + if (!IDE_CONTROL_REG) { + __restore_flags(flags); + return ide_stopped; + } + /* + * Note that we also set nIEN while resetting the device, + * to mask unwanted interrupts from the interface during the reset. + * However, due to the design of PC hardware, this will cause an + * immediate interrupt due to the edge transition it produces. + * This single interrupt gives us a "fast poll" for drives that + * recover from reset very quickly, saving us the first 50ms wait time. + */ + OUT_BYTE(drive->ctl|6,IDE_CONTROL_REG); /* set SRST and nIEN */ + udelay(10); /* more than enough time */ + if (drive->quirk_list == 2) { + OUT_BYTE(drive->ctl,IDE_CONTROL_REG); /* clear SRST and nIEN */ + } else { + OUT_BYTE(drive->ctl|2,IDE_CONTROL_REG); /* clear SRST, leave nIEN */ + } + udelay(10); /* more than enough time */ + hwgroup->poll_timeout = jiffies + WAIT_WORSTCASE; + ide_set_handler (drive, &reset_pollfunc, HZ/20, NULL); + + /* + * Some weird controller like resetting themselves to a strange + * state when the disks are reset this way. At least, the Winbond + * 553 documentation says that + */ + if (hwif->resetproc != NULL) + hwif->resetproc(drive); + +#endif /* OK_TO_RESET_CONTROLLER */ + + __restore_flags (flags); /* local CPU only */ + return ide_started; +} + +/* + * ide_do_reset() is the entry point to the drive/interface reset code. + */ +ide_startstop_t ide_do_reset (ide_drive_t *drive) +{ + return do_reset1 (drive, 0); +} + +static inline u32 read_24 (ide_drive_t *drive) +{ + return (IN_BYTE(IDE_HCYL_REG)<<16) | + (IN_BYTE(IDE_LCYL_REG)<<8) | + IN_BYTE(IDE_SECTOR_REG); +} + +/* + * Clean up after success/failure of an explicit drive cmd + */ +void ide_end_drive_cmd (ide_drive_t *drive, byte stat, byte err) +{ + unsigned long flags; + struct request *rq; + + spin_lock_irqsave(&io_request_lock, flags); + rq = HWGROUP(drive)->rq; + spin_unlock_irqrestore(&io_request_lock, flags); + + switch(rq->cmd) { + case IDE_DRIVE_CMD: + { + byte *args = (byte *) rq->buffer; + rq->errors = !OK_STAT(stat,READY_STAT,BAD_STAT); + if (args) { + args[0] = stat; + args[1] = err; + args[2] = IN_BYTE(IDE_NSECTOR_REG); + } + break; + } + case IDE_DRIVE_TASK: + { + byte *args = (byte *) rq->buffer; + rq->errors = !OK_STAT(stat,READY_STAT,BAD_STAT); + if (args) { + args[0] = stat; + args[1] = err; + args[2] = IN_BYTE(IDE_NSECTOR_REG); + args[3] = IN_BYTE(IDE_SECTOR_REG); + args[4] = IN_BYTE(IDE_LCYL_REG); + args[5] = IN_BYTE(IDE_HCYL_REG); + args[6] = IN_BYTE(IDE_SELECT_REG); + } + break; + } + case IDE_DRIVE_TASKFILE: + { + ide_task_t *args = (ide_task_t *) rq->special; + rq->errors = !OK_STAT(stat,READY_STAT,BAD_STAT); + if (args) { + if (args->tf_in_flags.b.data) { + unsigned short data = IN_WORD(IDE_DATA_REG); + args->tfRegister[IDE_DATA_OFFSET] = (data) & 0xFF; + args->hobRegister[IDE_DATA_OFFSET_HOB] = (data >> 8) & 0xFF; + } + args->tfRegister[IDE_ERROR_OFFSET] = err; + args->tfRegister[IDE_NSECTOR_OFFSET] = IN_BYTE(IDE_NSECTOR_REG); + args->tfRegister[IDE_SECTOR_OFFSET] = IN_BYTE(IDE_SECTOR_REG); + args->tfRegister[IDE_LCYL_OFFSET] = IN_BYTE(IDE_LCYL_REG); + args->tfRegister[IDE_HCYL_OFFSET] = IN_BYTE(IDE_HCYL_REG); + args->tfRegister[IDE_SELECT_OFFSET] = IN_BYTE(IDE_SELECT_REG); + args->tfRegister[IDE_STATUS_OFFSET] = stat; + + if ((drive->id->command_set_2 & 0x0400) && + (drive->id->cfs_enable_2 & 0x0400) && + (drive->addressing == 1)) { + OUT_BYTE(drive->ctl|0x80, IDE_CONTROL_REG_HOB); + args->hobRegister[IDE_FEATURE_OFFSET_HOB] = IN_BYTE(IDE_FEATURE_REG); + args->hobRegister[IDE_NSECTOR_OFFSET_HOB] = IN_BYTE(IDE_NSECTOR_REG); + args->hobRegister[IDE_SECTOR_OFFSET_HOB] = IN_BYTE(IDE_SECTOR_REG); + args->hobRegister[IDE_LCYL_OFFSET_HOB] = IN_BYTE(IDE_LCYL_REG); + args->hobRegister[IDE_HCYL_OFFSET_HOB] = IN_BYTE(IDE_HCYL_REG); + } + } + break; + } + default: + break; + } + spin_lock_irqsave(&io_request_lock, flags); + blkdev_dequeue_request(rq); + HWGROUP(drive)->rq = NULL; + end_that_request_last(rq); + spin_unlock_irqrestore(&io_request_lock, flags); +} + +/* + * Error reporting, in human readable form (luxurious, but a memory hog). + */ +byte ide_dump_status (ide_drive_t *drive, const char *msg, byte stat) +{ + unsigned long flags; + byte err = 0; + + __save_flags (flags); /* local CPU only */ + ide__sti(); /* local CPU only */ + printk("%s: %s: status=0x%02x", drive->name, msg, stat); +#if FANCY_STATUS_DUMPS + printk(" { "); + if (stat & BUSY_STAT) + printk("Busy "); + else { + if (stat & READY_STAT) printk("DriveReady "); + if (stat & WRERR_STAT) printk("DeviceFault "); + if (stat & SEEK_STAT) printk("SeekComplete "); + if (stat & DRQ_STAT) printk("DataRequest "); + if (stat & ECC_STAT) printk("CorrectedError "); + if (stat & INDEX_STAT) printk("Index "); + if (stat & ERR_STAT) printk("Error "); + } + printk("}"); +#endif /* FANCY_STATUS_DUMPS */ + printk("\n"); + if ((stat & (BUSY_STAT|ERR_STAT)) == ERR_STAT) { + err = GET_ERR(); + printk("%s: %s: error=0x%02x", drive->name, msg, err); +#if FANCY_STATUS_DUMPS + if (drive->media == ide_disk) { + printk(" { "); + if (err & ABRT_ERR) printk("DriveStatusError "); + if (err & ICRC_ERR) printk("%s", (err & ABRT_ERR) ? "BadCRC " : "BadSector "); + if (err & ECC_ERR) printk("UncorrectableError "); + if (err & ID_ERR) printk("SectorIdNotFound "); + if (err & TRK0_ERR) printk("TrackZeroNotFound "); + if (err & MARK_ERR) printk("AddrMarkNotFound "); + printk("}"); + if ((err & (BBD_ERR | ABRT_ERR)) == BBD_ERR || (err & (ECC_ERR|ID_ERR|MARK_ERR))) { + if ((drive->id->command_set_2 & 0x0400) && + (drive->id->cfs_enable_2 & 0x0400) && + (drive->addressing == 1)) { + __u64 sectors = 0; + u32 low = 0, high = 0; + low = read_24(drive); + OUT_BYTE(drive->ctl|0x80, IDE_CONTROL_REG); + high = read_24(drive); + + sectors = ((__u64)high << 24) | low; + printk(", LBAsect=%llu, high=%d, low=%d", + (unsigned long long) sectors, + high, low); + } else { + byte cur = IN_BYTE(IDE_SELECT_REG); + if (cur & 0x40) { /* using LBA? */ + printk(", LBAsect=%ld", (unsigned long) + ((cur&0xf)<<24) + |(IN_BYTE(IDE_HCYL_REG)<<16) + |(IN_BYTE(IDE_LCYL_REG)<<8) + | IN_BYTE(IDE_SECTOR_REG)); + } else { + printk(", CHS=%d/%d/%d", + (IN_BYTE(IDE_HCYL_REG)<<8) + + IN_BYTE(IDE_LCYL_REG), + cur & 0xf, + IN_BYTE(IDE_SECTOR_REG)); + } + } + if (HWGROUP(drive) && HWGROUP(drive)->rq) + printk(", sector=%ld", HWGROUP(drive)->rq->sector); + } + } +#endif /* FANCY_STATUS_DUMPS */ + printk("\n"); + } + __restore_flags (flags); /* local CPU only */ + return err; +} + +/* + * try_to_flush_leftover_data() is invoked in response to a drive + * unexpectedly having its DRQ_STAT bit set. As an alternative to + * resetting the drive, this routine tries to clear the condition + * by read a sector's worth of data from the drive. Of course, + * this may not help if the drive is *waiting* for data from *us*. + */ +static void try_to_flush_leftover_data (ide_drive_t *drive) +{ + int i = (drive->mult_count ? drive->mult_count : 1) * SECTOR_WORDS; + + if (drive->media != ide_disk) + return; + while (i > 0) { + u32 buffer[16]; + unsigned int wcount = (i > 16) ? 16 : i; + i -= wcount; + ide_input_data (drive, buffer, wcount); + } +} + +/* + * ide_error() takes action based on the error returned by the drive. + */ +ide_startstop_t ide_error (ide_drive_t *drive, const char *msg, byte stat) +{ + struct request *rq; + byte err; + + err = ide_dump_status(drive, msg, stat); + if (drive == NULL || (rq = HWGROUP(drive)->rq) == NULL) + return ide_stopped; + /* retry only "normal" I/O: */ + if (rq->cmd == IDE_DRIVE_CMD || rq->cmd == IDE_DRIVE_TASK) { + rq->errors = 1; + ide_end_drive_cmd(drive, stat, err); + return ide_stopped; + } + if (rq->cmd == IDE_DRIVE_TASKFILE) { + rq->errors = 1; + ide_end_drive_cmd(drive, stat, err); +// ide_end_taskfile(drive, stat, err); + return ide_stopped; + } + + if (stat & BUSY_STAT || ((stat & WRERR_STAT) && !drive->nowerr)) { /* other bits are useless when BUSY */ + rq->errors |= ERROR_RESET; + } else { + if (drive->media == ide_disk && (stat & ERR_STAT)) { + /* err has different meaning on cdrom and tape */ + if (err == ABRT_ERR) { + if (drive->select.b.lba && IN_BYTE(IDE_COMMAND_REG) == WIN_SPECIFY) + return ide_stopped; /* some newer drives don't support WIN_SPECIFY */ + } else if ((err & (ABRT_ERR | ICRC_ERR)) == (ABRT_ERR | ICRC_ERR)) { + drive->crc_count++; /* UDMA crc error -- just retry the operation */ + } else if (err & (BBD_ERR | ECC_ERR)) /* retries won't help these */ + rq->errors = ERROR_MAX; + else if (err & TRK0_ERR) /* help it find track zero */ + rq->errors |= ERROR_RECAL; + } + if ((stat & DRQ_STAT) && rq->cmd != WRITE) + try_to_flush_leftover_data(drive); + } + if (GET_STAT() & (BUSY_STAT|DRQ_STAT)) + OUT_BYTE(WIN_IDLEIMMEDIATE,IDE_COMMAND_REG); /* force an abort */ + + if (rq->errors >= ERROR_MAX) { + if (drive->driver != NULL) + DRIVER(drive)->end_request(0, HWGROUP(drive)); + else + ide_end_request(0, HWGROUP(drive)); + } else { + if ((rq->errors & ERROR_RESET) == ERROR_RESET) { + ++rq->errors; + return ide_do_reset(drive); + } + if ((rq->errors & ERROR_RECAL) == ERROR_RECAL) + drive->special.b.recalibrate = 1; + ++rq->errors; + } + return ide_stopped; +} + +/* + * Issue a simple drive command + * The drive must be selected beforehand. + */ +void ide_cmd (ide_drive_t *drive, byte cmd, byte nsect, ide_handler_t *handler) +{ + ide_set_handler (drive, handler, WAIT_CMD, NULL); + if (IDE_CONTROL_REG) + OUT_BYTE(drive->ctl,IDE_CONTROL_REG); /* clear nIEN */ + SELECT_MASK(HWIF(drive),drive,0); + OUT_BYTE(nsect,IDE_NSECTOR_REG); + OUT_BYTE(cmd,IDE_COMMAND_REG); +} + +/* + * drive_cmd_intr() is invoked on completion of a special DRIVE_CMD. + */ +static ide_startstop_t drive_cmd_intr (ide_drive_t *drive) +{ + struct request *rq = HWGROUP(drive)->rq; + byte *args = (byte *) rq->buffer; + byte stat = GET_STAT(); + int retries = 10; + + ide__sti(); /* local CPU only */ + if ((stat & DRQ_STAT) && args && args[3]) { + byte io_32bit = drive->io_32bit; + drive->io_32bit = 0; + ide_input_data(drive, &args[4], args[3] * SECTOR_WORDS); + drive->io_32bit = io_32bit; + while (((stat = GET_STAT()) & BUSY_STAT) && retries--) + udelay(100); + } + + if (!OK_STAT(stat, READY_STAT, BAD_STAT)) + return ide_error(drive, "drive_cmd", stat); /* calls ide_end_drive_cmd */ + ide_end_drive_cmd (drive, stat, GET_ERR()); + return ide_stopped; +} + +/* + * do_special() is used to issue WIN_SPECIFY, WIN_RESTORE, and WIN_SETMULT + * commands to a drive. It used to do much more, but has been scaled back. + */ +static ide_startstop_t do_special (ide_drive_t *drive) +{ + special_t *s = &drive->special; + +#ifdef DEBUG + printk("%s: do_special: 0x%02x\n", drive->name, s->all); +#endif + if (s->b.set_tune) { + ide_tuneproc_t *tuneproc = HWIF(drive)->tuneproc; + s->b.set_tune = 0; + if (tuneproc != NULL) + tuneproc(drive, drive->tune_req); + } else if (drive->driver != NULL) { + return DRIVER(drive)->special(drive); + } else if (s->all) { + printk("%s: bad special flag: 0x%02x\n", drive->name, s->all); + s->all = 0; + } + return ide_stopped; +} + +/* + * This routine busy-waits for the drive status to be not "busy". + * It then checks the status for all of the "good" bits and none + * of the "bad" bits, and if all is okay it returns 0. All other + * cases return 1 after invoking ide_error() -- caller should just return. + * + * This routine should get fixed to not hog the cpu during extra long waits.. + * That could be done by busy-waiting for the first jiffy or two, and then + * setting a timer to wake up at half second intervals thereafter, + * until timeout is achieved, before timing out. + */ +int ide_wait_stat (ide_startstop_t *startstop, ide_drive_t *drive, byte good, byte bad, unsigned long timeout) { + byte stat; + int i; + unsigned long flags; + + /* bail early if we've exceeded max_failures */ + if (drive->max_failures && (drive->failures > drive->max_failures)) { + *startstop = ide_stopped; + return 1; + } + + udelay(1); /* spec allows drive 400ns to assert "BUSY" */ + if ((stat = GET_STAT()) & BUSY_STAT) { + __save_flags(flags); /* local CPU only */ + ide__sti(); /* local CPU only */ + timeout += jiffies; + while ((stat = GET_STAT()) & BUSY_STAT) { + if (0 < (signed long)(jiffies - timeout)) { + __restore_flags(flags); /* local CPU only */ + *startstop = ide_error(drive, "status timeout", stat); + return 1; + } + } + __restore_flags(flags); /* local CPU only */ + } + /* + * Allow status to settle, then read it again. + * A few rare drives vastly violate the 400ns spec here, + * so we'll wait up to 10usec for a "good" status + * rather than expensively fail things immediately. + * This fix courtesy of Matthew Faupel & Niccolo Rigacci. + */ + for (i = 0; i < 10; i++) { + udelay(1); + if (OK_STAT((stat = GET_STAT()), good, bad)) + return 0; + } + *startstop = ide_error(drive, "status error", stat); + return 1; +} + +/* + * execute_drive_cmd() issues a special drive command, + * usually initiated by ioctl() from the external hdparm program. + */ +static ide_startstop_t execute_drive_cmd (ide_drive_t *drive, struct request *rq) +{ + switch(rq->cmd) { + case IDE_DRIVE_TASKFILE: + { + ide_task_t *args = rq->special; + + if (!(args)) break; + +#ifdef CONFIG_IDE_TASK_IOCTL_DEBUG + { + printk(KERN_INFO "%s: ", drive->name); +// printk("TF.0=x%02x ", args->tfRegister[IDE_DATA_OFFSET]); + printk("TF.1=x%02x ", args->tfRegister[IDE_FEATURE_OFFSET]); + printk("TF.2=x%02x ", args->tfRegister[IDE_NSECTOR_OFFSET]); + printk("TF.3=x%02x ", args->tfRegister[IDE_SECTOR_OFFSET]); + printk("TF.4=x%02x ", args->tfRegister[IDE_LCYL_OFFSET]); + printk("TF.5=x%02x ", args->tfRegister[IDE_HCYL_OFFSET]); + printk("TF.6=x%02x ", args->tfRegister[IDE_SELECT_OFFSET]); + printk("TF.7=x%02x\n", args->tfRegister[IDE_COMMAND_OFFSET]); + printk(KERN_INFO "%s: ", drive->name); +// printk("HTF.0=x%02x ", args->hobRegister[IDE_DATA_OFFSET_HOB]); + printk("HTF.1=x%02x ", args->hobRegister[IDE_FEATURE_OFFSET_HOB]); + printk("HTF.2=x%02x ", args->hobRegister[IDE_NSECTOR_OFFSET_HOB]); + printk("HTF.3=x%02x ", args->hobRegister[IDE_SECTOR_OFFSET_HOB]); + printk("HTF.4=x%02x ", args->hobRegister[IDE_LCYL_OFFSET_HOB]); + printk("HTF.5=x%02x ", args->hobRegister[IDE_HCYL_OFFSET_HOB]); + printk("HTF.6=x%02x ", args->hobRegister[IDE_SELECT_OFFSET_HOB]); + printk("HTF.7=x%02x\n", args->hobRegister[IDE_CONTROL_OFFSET_HOB]); + } +#endif /* CONFIG_IDE_TASK_IOCTL_DEBUG */ + +// if (args->tf_out_flags.all == 0) { + do_taskfile(drive, + (struct hd_drive_task_hdr *)&args->tfRegister, + (struct hd_drive_hob_hdr *)&args->hobRegister, + args->handler); +// } else { +// return flagged_taskfile(drive, args); +// } + + if (((args->command_type == IDE_DRIVE_TASK_RAW_WRITE) || + (args->command_type == IDE_DRIVE_TASK_OUT)) && + args->prehandler && args->handler) + return args->prehandler(drive, rq); + return ide_started; + } + case IDE_DRIVE_TASK: + { + byte *args = rq->buffer; + byte sel; + + if (!(args)) break; +#ifdef DEBUG + printk("%s: DRIVE_TASK_CMD ", drive->name); + printk("cmd=0x%02x ", args[0]); + printk("fr=0x%02x ", args[1]); + printk("ns=0x%02x ", args[2]); + printk("sc=0x%02x ", args[3]); + printk("lcyl=0x%02x ", args[4]); + printk("hcyl=0x%02x ", args[5]); + printk("sel=0x%02x\n", args[6]); +#endif + OUT_BYTE(args[1], IDE_FEATURE_REG); + OUT_BYTE(args[3], IDE_SECTOR_REG); + OUT_BYTE(args[4], IDE_LCYL_REG); + OUT_BYTE(args[5], IDE_HCYL_REG); + sel = (args[6] & ~0x10); + if (drive->select.b.unit) + sel |= 0x10; + OUT_BYTE(sel, IDE_SELECT_REG); + ide_cmd(drive, args[0], args[2], &drive_cmd_intr); + return ide_started; + } + case IDE_DRIVE_CMD: + { + byte *args = rq->buffer; + + if (!(args)) break; +#ifdef DEBUG + printk("%s: DRIVE_CMD ", drive->name); + printk("cmd=0x%02x ", args[0]); + printk("sc=0x%02x ", args[1]); + printk("fr=0x%02x ", args[2]); + printk("xx=0x%02x\n", args[3]); +#endif + if (args[0] == WIN_SMART) { + OUT_BYTE(0x4f, IDE_LCYL_REG); + OUT_BYTE(0xc2, IDE_HCYL_REG); + OUT_BYTE(args[2],IDE_FEATURE_REG); + OUT_BYTE(args[1],IDE_SECTOR_REG); + ide_cmd(drive, args[0], args[3], &drive_cmd_intr); + return ide_started; + } + OUT_BYTE(args[2],IDE_FEATURE_REG); + ide_cmd(drive, args[0], args[1], &drive_cmd_intr); + return ide_started; + } + default: + break; + } + /* + * NULL is actually a valid way of waiting for + * all current requests to be flushed from the queue. + */ +#ifdef DEBUG + printk("%s: DRIVE_CMD (null)\n", drive->name); +#endif + ide_end_drive_cmd(drive, GET_STAT(), GET_ERR()); + return ide_stopped; +} + +/* + * start_request() initiates handling of a new I/O request + * needed to reverse the perverted changes anonymously made back + * 2.3.99-pre6 + */ +static ide_startstop_t start_request (ide_drive_t *drive, struct request *rq) +{ + ide_startstop_t startstop; + unsigned long block, blockend; + unsigned int minor = MINOR(rq->rq_dev), unit = minor >> PARTN_BITS; + ide_hwif_t *hwif = HWIF(drive); + +#ifdef DEBUG + printk("%s: start_request: current=0x%08lx\n", hwif->name, (unsigned long) rq); +#endif + /* bail early if we've exceeded max_failures */ + if (drive->max_failures && (drive->failures > drive->max_failures)) { + goto kill_rq; + } + + if (unit >= MAX_DRIVES) { + printk("%s: bad device number: %s\n", hwif->name, kdevname(rq->rq_dev)); + goto kill_rq; + } +#ifdef DEBUG + if (rq->bh && !buffer_locked(rq->bh)) { + printk("%s: block not locked\n", drive->name); + goto kill_rq; + } +#endif + block = rq->sector; + blockend = block + rq->nr_sectors; + + if ((rq->cmd == READ || rq->cmd == WRITE) && + (drive->media == ide_disk || drive->media == ide_floppy)) { + if ((blockend < block) || (blockend > drive->part[minor&PARTN_MASK].nr_sects)) { + printk("%s%c: bad access: block=%ld, count=%ld\n", drive->name, + (minor&PARTN_MASK)?'0'+(minor&PARTN_MASK):' ', block, rq->nr_sectors); + goto kill_rq; + } + block += drive->part[minor&PARTN_MASK].start_sect + drive->sect0; + } + /* Yecch - this will shift the entire interval, + possibly killing some innocent following sector */ + if (block == 0 && drive->remap_0_to_1 == 1) + block = 1; /* redirect MBR access to EZ-Drive partn table */ + +#if (DISK_RECOVERY_TIME > 0) + while ((read_timer() - hwif->last_time) < DISK_RECOVERY_TIME); +#endif + + SELECT_DRIVE(hwif, drive); + if (ide_wait_stat(&startstop, drive, drive->ready_stat, BUSY_STAT|DRQ_STAT, WAIT_READY)) { + printk("%s: drive not ready for command\n", drive->name); + return startstop; + } + if (!drive->special.all) { + switch(rq->cmd) { + case IDE_DRIVE_CMD: + case IDE_DRIVE_TASK: + case IDE_DRIVE_TASKFILE: + return execute_drive_cmd(drive, rq); + default: + break; + } + if (drive->driver != NULL) { + return (DRIVER(drive)->do_request(drive, rq, block)); + } + printk("%s: media type %d not supported\n", drive->name, drive->media); + goto kill_rq; + } + return do_special(drive); +kill_rq: + if (drive->driver != NULL) + DRIVER(drive)->end_request(0, HWGROUP(drive)); + else + ide_end_request(0, HWGROUP(drive)); + return ide_stopped; +} + +ide_startstop_t restart_request (ide_drive_t *drive) +{ + ide_hwgroup_t *hwgroup = HWGROUP(drive); + unsigned long flags; + struct request *rq; + + spin_lock_irqsave(&io_request_lock, flags); + hwgroup->handler = NULL; + del_timer(&hwgroup->timer); + rq = hwgroup->rq; + spin_unlock_irqrestore(&io_request_lock, flags); + + return start_request(drive, rq); +} + +/* + * ide_stall_queue() can be used by a drive to give excess bandwidth back + * to the hwgroup by sleeping for timeout jiffies. + */ +void ide_stall_queue (ide_drive_t *drive, unsigned long timeout) +{ + if (timeout > WAIT_WORSTCASE) + timeout = WAIT_WORSTCASE; + drive->sleep = timeout + jiffies; +} + +#define WAKEUP(drive) ((drive)->service_start + 2 * (drive)->service_time) + +/* + * choose_drive() selects the next drive which will be serviced. + */ +static inline ide_drive_t *choose_drive (ide_hwgroup_t *hwgroup) +{ + ide_drive_t *drive, *best; + +repeat: + best = NULL; + drive = hwgroup->drive; + do { + if (!list_empty(&drive->queue.queue_head) && (!drive->sleep || 0 <= (signed long)(jiffies - drive->sleep))) { + if (!best + || (drive->sleep && (!best->sleep || 0 < (signed long)(best->sleep - drive->sleep))) + || (!best->sleep && 0 < (signed long)(WAKEUP(best) - WAKEUP(drive)))) + { + if( !drive->queue.plugged ) + best = drive; + } + } + } while ((drive = drive->next) != hwgroup->drive); + if (best && best->nice1 && !best->sleep && best != hwgroup->drive && best->service_time > WAIT_MIN_SLEEP) { + long t = (signed long)(WAKEUP(best) - jiffies); + if (t >= WAIT_MIN_SLEEP) { + /* + * We *may* have some time to spare, but first let's see if + * someone can potentially benefit from our nice mood today.. + */ + drive = best->next; + do { + if (!drive->sleep + && 0 < (signed long)(WAKEUP(drive) - (jiffies - best->service_time)) + && 0 < (signed long)((jiffies + t) - WAKEUP(drive))) + { + ide_stall_queue(best, IDE_MIN(t, 10 * WAIT_MIN_SLEEP)); + goto repeat; + } + } while ((drive = drive->next) != best); + } + } + return best; +} + +/* + * Issue a new request to a drive from hwgroup + * Caller must have already done spin_lock_irqsave(&io_request_lock, ..); + * + * A hwgroup is a serialized group of IDE interfaces. Usually there is + * exactly one hwif (interface) per hwgroup, but buggy controllers (eg. CMD640) + * may have both interfaces in a single hwgroup to "serialize" access. + * Or possibly multiple ISA interfaces can share a common IRQ by being grouped + * together into one hwgroup for serialized access. + * + * Note also that several hwgroups can end up sharing a single IRQ, + * possibly along with many other devices. This is especially common in + * PCI-based systems with off-board IDE controller cards. + * + * The IDE driver uses the single global io_request_lock spinlock to protect + * access to the request queues, and to protect the hwgroup->busy flag. + * + * The first thread into the driver for a particular hwgroup sets the + * hwgroup->busy flag to indicate that this hwgroup is now active, + * and then initiates processing of the top request from the request queue. + * + * Other threads attempting entry notice the busy setting, and will simply + * queue their new requests and exit immediately. Note that hwgroup->busy + * remains set even when the driver is merely awaiting the next interrupt. + * Thus, the meaning is "this hwgroup is busy processing a request". + * + * When processing of a request completes, the completing thread or IRQ-handler + * will start the next request from the queue. If no more work remains, + * the driver will clear the hwgroup->busy flag and exit. + * + * The io_request_lock (spinlock) is used to protect all access to the + * hwgroup->busy flag, but is otherwise not needed for most processing in + * the driver. This makes the driver much more friendlier to shared IRQs + * than previous designs, while remaining 100% (?) SMP safe and capable. + */ +/* --BenH: made non-static as ide-pmac.c uses it to kick the hwgroup back + * into life on wakeup from machine sleep. + */ +void ide_do_request (ide_hwgroup_t *hwgroup, int masked_irq) +{ + ide_drive_t *drive; + ide_hwif_t *hwif; + struct request *rq; + ide_startstop_t startstop; + + ide_get_lock(&ide_lock, ide_intr, hwgroup); /* for atari only: POSSIBLY BROKEN HERE(?) */ + + __cli(); /* necessary paranoia: ensure IRQs are masked on local CPU */ + + while (!hwgroup->busy) { + hwgroup->busy = 1; + drive = choose_drive(hwgroup); + if (drive == NULL) { + unsigned long sleep = 0; + hwgroup->rq = NULL; + drive = hwgroup->drive; + do { + if (drive->sleep && (!sleep || 0 < (signed long)(sleep - drive->sleep))) + sleep = drive->sleep; + } while ((drive = drive->next) != hwgroup->drive); + if (sleep) { + /* + * Take a short snooze, and then wake up this hwgroup again. + * This gives other hwgroups on the same a chance to + * play fairly with us, just in case there are big differences + * in relative throughputs.. don't want to hog the cpu too much. + */ + if (0 < (signed long)(jiffies + WAIT_MIN_SLEEP - sleep)) + sleep = jiffies + WAIT_MIN_SLEEP; +#if 1 + if (timer_pending(&hwgroup->timer)) + printk("ide_set_handler: timer already active\n"); +#endif + hwgroup->sleeping = 1; /* so that ide_timer_expiry knows what to do */ + mod_timer(&hwgroup->timer, sleep); + /* we purposely leave hwgroup->busy==1 while sleeping */ + } else { + /* Ugly, but how can we sleep for the lock otherwise? perhaps from tq_disk? */ + ide_release_lock(&ide_lock); /* for atari only */ + hwgroup->busy = 0; + } + return; /* no more work for this hwgroup (for now) */ + } + hwif = HWIF(drive); + if (hwgroup->hwif->sharing_irq && hwif != hwgroup->hwif && hwif->io_ports[IDE_CONTROL_OFFSET]) { + /* set nIEN for previous hwif */ + SELECT_INTERRUPT(hwif, drive); + } + hwgroup->hwif = hwif; + hwgroup->drive = drive; + drive->sleep = 0; + drive->service_start = jiffies; + + if ( drive->queue.plugged ) /* paranoia */ + printk("%s: Huh? nuking plugged queue\n", drive->name); + + rq = hwgroup->rq = blkdev_entry_next_request(&drive->queue.queue_head); + /* + * Some systems have trouble with IDE IRQs arriving while + * the driver is still setting things up. So, here we disable + * the IRQ used by this interface while the request is being started. + * This may look bad at first, but pretty much the same thing + * happens anyway when any interrupt comes in, IDE or otherwise + * -- the kernel masks the IRQ while it is being handled. + */ + if (masked_irq && hwif->irq != masked_irq) + disable_irq_nosync(hwif->irq); + spin_unlock(&io_request_lock); + ide__sti(); /* allow other IRQs while we start this request */ + startstop = start_request(drive, rq); + spin_lock_irq(&io_request_lock); + if (masked_irq && hwif->irq != masked_irq) + enable_irq(hwif->irq); + if (startstop == ide_stopped) + hwgroup->busy = 0; + } +} + +/* + * ide_get_queue() returns the queue which corresponds to a given device. + */ +request_queue_t *ide_get_queue (kdev_t dev) +{ + ide_hwif_t *hwif = (ide_hwif_t *)blk_dev[MAJOR(dev)].data; + + return &hwif->drives[DEVICE_NR(dev) & 1].queue; +} + +/* + * Passes the stuff to ide_do_request + */ +void do_ide_request(request_queue_t *q) +{ + ide_do_request(q->queuedata, 0); +} + +/* + * un-busy the hwgroup etc, and clear any pending DMA status. we want to + * retry the current request in pio mode instead of risking tossing it + * all away + */ +void ide_dma_timeout_retry(ide_drive_t *drive) +{ + ide_hwif_t *hwif = HWIF(drive); + struct request *rq; + + /* + * end current dma transaction + */ + (void) hwif->dmaproc(ide_dma_end, drive); + + /* + * complain a little, later we might remove some of this verbosity + */ + printk("%s: timeout waiting for DMA\n", drive->name); + (void) hwif->dmaproc(ide_dma_timeout, drive); + + /* + * disable dma for now, but remember that we did so because of + * a timeout -- we'll reenable after we finish this next request + * (or rather the first chunk of it) in pio. + */ + drive->retry_pio++; + drive->state = DMA_PIO_RETRY; + (void) hwif->dmaproc(ide_dma_off_quietly, drive); + + /* + * un-busy drive etc (hwgroup->busy is cleared on return) and + * make sure request is sane + */ + rq = HWGROUP(drive)->rq; + HWGROUP(drive)->rq = NULL; + + rq->errors = 0; + rq->sector = rq->bh->b_rsector; + rq->current_nr_sectors = rq->bh->b_size >> 9; + rq->buffer = rq->bh->b_data; +} + +/* + * ide_timer_expiry() is our timeout function for all drive operations. + * But note that it can also be invoked as a result of a "sleep" operation + * triggered by the mod_timer() call in ide_do_request. + */ +void ide_timer_expiry (unsigned long data) +{ + ide_hwgroup_t *hwgroup = (ide_hwgroup_t *) data; + ide_handler_t *handler; + ide_expiry_t *expiry; + unsigned long flags; + unsigned long wait; + + spin_lock_irqsave(&io_request_lock, flags); + del_timer(&hwgroup->timer); + + if ((handler = hwgroup->handler) == NULL) { + /* + * Either a marginal timeout occurred + * (got the interrupt just as timer expired), + * or we were "sleeping" to give other devices a chance. + * Either way, we don't really want to complain about anything. + */ + if (hwgroup->sleeping) { + hwgroup->sleeping = 0; + hwgroup->busy = 0; + } + } else { + ide_drive_t *drive = hwgroup->drive; + if (!drive) { + printk("ide_timer_expiry: hwgroup->drive was NULL\n"); + hwgroup->handler = NULL; + } else { + ide_hwif_t *hwif; + ide_startstop_t startstop; + if (!hwgroup->busy) { + hwgroup->busy = 1; /* paranoia */ + printk("%s: ide_timer_expiry: hwgroup->busy was 0 ??\n", drive->name); + } + if ((expiry = hwgroup->expiry) != NULL) { + /* continue */ + if ((wait = expiry(drive)) != 0) { + /* reset timer */ + hwgroup->timer.expires = jiffies + wait; + add_timer(&hwgroup->timer); + spin_unlock_irqrestore(&io_request_lock, flags); + return; + } + } + hwgroup->handler = NULL; + /* + * We need to simulate a real interrupt when invoking + * the handler() function, which means we need to globally + * mask the specific IRQ: + */ + spin_unlock(&io_request_lock); + hwif = HWIF(drive); +#if DISABLE_IRQ_NOSYNC + disable_irq_nosync(hwif->irq); +#else + disable_irq(hwif->irq); /* disable_irq_nosync ?? */ +#endif /* DISABLE_IRQ_NOSYNC */ + __cli(); /* local CPU only, as if we were handling an interrupt */ + if (hwgroup->poll_timeout != 0) { + startstop = handler(drive); + } else if (drive_is_ready(drive)) { + if (drive->waiting_for_dma) + (void) hwgroup->hwif->dmaproc(ide_dma_lostirq, drive); + (void)ide_ack_intr(hwif); + printk("%s: lost interrupt\n", drive->name); + startstop = handler(drive); + } else { + if (drive->waiting_for_dma) { + startstop = ide_stopped; + ide_dma_timeout_retry(drive); + } else + startstop = ide_error(drive, "irq timeout", GET_STAT()); + } + set_recovery_timer(hwif); + drive->service_time = jiffies - drive->service_start; + enable_irq(hwif->irq); + spin_lock_irq(&io_request_lock); + if (startstop == ide_stopped) + hwgroup->busy = 0; + } + } + ide_do_request(hwgroup, 0); + spin_unlock_irqrestore(&io_request_lock, flags); +} + +/* + * There's nothing really useful we can do with an unexpected interrupt, + * other than reading the status register (to clear it), and logging it. + * There should be no way that an irq can happen before we're ready for it, + * so we needn't worry much about losing an "important" interrupt here. + * + * On laptops (and "green" PCs), an unexpected interrupt occurs whenever the + * drive enters "idle", "standby", or "sleep" mode, so if the status looks + * "good", we just ignore the interrupt completely. + * + * This routine assumes __cli() is in effect when called. + * + * If an unexpected interrupt happens on irq15 while we are handling irq14 + * and if the two interfaces are "serialized" (CMD640), then it looks like + * we could screw up by interfering with a new request being set up for irq15. + * + * In reality, this is a non-issue. The new command is not sent unless the + * drive is ready to accept one, in which case we know the drive is not + * trying to interrupt us. And ide_set_handler() is always invoked before + * completing the issuance of any new drive command, so we will not be + * accidentally invoked as a result of any valid command completion interrupt. + * + */ +static void unexpected_intr (int irq, ide_hwgroup_t *hwgroup) +{ + byte stat; + ide_hwif_t *hwif = hwgroup->hwif; + + /* + * handle the unexpected interrupt + */ + do { + if (hwif->irq == irq) { + stat = IN_BYTE(hwif->io_ports[IDE_STATUS_OFFSET]); + if (!OK_STAT(stat, READY_STAT, BAD_STAT)) { + /* Try to not flood the console with msgs */ + static unsigned long last_msgtime, count; + ++count; + if (0 < (signed long)(jiffies - (last_msgtime + HZ))) { + last_msgtime = jiffies; + printk("%s%s: unexpected interrupt, status=0x%02x, count=%ld\n", + hwif->name, (hwif->next == hwgroup->hwif) ? "" : "(?)", stat, count); + } + } + } + } while ((hwif = hwif->next) != hwgroup->hwif); +} + +/* + * entry point for all interrupts, caller does __cli() for us + */ +void ide_intr (int irq, void *dev_id, struct pt_regs *regs) +{ + unsigned long flags; + ide_hwgroup_t *hwgroup = (ide_hwgroup_t *)dev_id; + ide_hwif_t *hwif; + ide_drive_t *drive; + ide_handler_t *handler; + ide_startstop_t startstop; + + spin_lock_irqsave(&io_request_lock, flags); + hwif = hwgroup->hwif; + + if (!ide_ack_intr(hwif)) { + spin_unlock_irqrestore(&io_request_lock, flags); + return; + } + + if ((handler = hwgroup->handler) == NULL || hwgroup->poll_timeout != 0) { + /* + * Not expecting an interrupt from this drive. + * That means this could be: + * (1) an interrupt from another PCI device + * sharing the same PCI INT# as us. + * or (2) a drive just entered sleep or standby mode, + * and is interrupting to let us know. + * or (3) a spurious interrupt of unknown origin. + * + * For PCI, we cannot tell the difference, + * so in that case we just ignore it and hope it goes away. + */ +#ifdef CONFIG_BLK_DEV_IDEPCI + if (IDE_PCI_DEVID_EQ(hwif->pci_devid, IDE_PCI_DEVID_NULL)) +#endif /* CONFIG_BLK_DEV_IDEPCI */ + { + /* + * Probably not a shared PCI interrupt, + * so we can safely try to do something about it: + */ + unexpected_intr(irq, hwgroup); +#ifdef CONFIG_BLK_DEV_IDEPCI + } else { + /* + * Whack the status register, just in case we have a leftover pending IRQ. + */ + (void) IN_BYTE(hwif->io_ports[IDE_STATUS_OFFSET]); +#endif /* CONFIG_BLK_DEV_IDEPCI */ + } + spin_unlock_irqrestore(&io_request_lock, flags); + return; + } + drive = hwgroup->drive; + if (!drive) { + /* + * This should NEVER happen, and there isn't much we could do about it here. + */ + spin_unlock_irqrestore(&io_request_lock, flags); + return; + } + if (!drive_is_ready(drive)) { + /* + * This happens regularly when we share a PCI IRQ with another device. + * Unfortunately, it can also happen with some buggy drives that trigger + * the IRQ before their status register is up to date. Hopefully we have + * enough advance overhead that the latter isn't a problem. + */ + spin_unlock_irqrestore(&io_request_lock, flags); + return; + } + if (!hwgroup->busy) { + hwgroup->busy = 1; /* paranoia */ + printk("%s: ide_intr: hwgroup->busy was 0 ??\n", drive->name); + } + hwgroup->handler = NULL; + del_timer(&hwgroup->timer); + spin_unlock(&io_request_lock); + + if (drive->unmask) + ide__sti(); /* local CPU only */ + startstop = handler(drive); /* service this interrupt, may set handler for next interrupt */ + spin_lock_irq(&io_request_lock); + + /* + * Note that handler() may have set things up for another + * interrupt to occur soon, but it cannot happen until + * we exit from this routine, because it will be the + * same irq as is currently being serviced here, and Linux + * won't allow another of the same (on any CPU) until we return. + */ + set_recovery_timer(HWIF(drive)); + drive->service_time = jiffies - drive->service_start; + if (startstop == ide_stopped) { + if (hwgroup->handler == NULL) { /* paranoia */ + hwgroup->busy = 0; + ide_do_request(hwgroup, hwif->irq); + } else { + printk("%s: ide_intr: huh? expected NULL handler on exit\n", drive->name); + } + } + spin_unlock_irqrestore(&io_request_lock, flags); +} + +/* + * get_info_ptr() returns the (ide_drive_t *) for a given device number. + * It returns NULL if the given device number does not match any present drives. + */ +ide_drive_t *get_info_ptr (kdev_t i_rdev) +{ + int major = MAJOR(i_rdev); +#if 0 + int minor = MINOR(i_rdev) & PARTN_MASK; +#endif + unsigned int h; + + for (h = 0; h < MAX_HWIFS; ++h) { + ide_hwif_t *hwif = &ide_hwifs[h]; + if (hwif->present && major == hwif->major) { + unsigned unit = DEVICE_NR(i_rdev); + if (unit < MAX_DRIVES) { + ide_drive_t *drive = &hwif->drives[unit]; +#if 0 + if ((drive->present) && (drive->part[minor].nr_sects)) +#else + if (drive->present) +#endif + return drive; + } + break; + } + } + return NULL; +} + +/* + * This function is intended to be used prior to invoking ide_do_drive_cmd(). + */ +void ide_init_drive_cmd (struct request *rq) +{ + memset(rq, 0, sizeof(*rq)); + rq->cmd = IDE_DRIVE_CMD; +} + +/* + * This function issues a special IDE device request + * onto the request queue. + * + * If action is ide_wait, then the rq is queued at the end of the + * request queue, and the function sleeps until it has been processed. + * This is for use when invoked from an ioctl handler. + * + * If action is ide_preempt, then the rq is queued at the head of + * the request queue, displacing the currently-being-processed + * request and this function returns immediately without waiting + * for the new rq to be completed. This is VERY DANGEROUS, and is + * intended for careful use by the ATAPI tape/cdrom driver code. + * + * If action is ide_next, then the rq is queued immediately after + * the currently-being-processed-request (if any), and the function + * returns without waiting for the new rq to be completed. As above, + * This is VERY DANGEROUS, and is intended for careful use by the + * ATAPI tape/cdrom driver code. + * + * If action is ide_end, then the rq is queued at the end of the + * request queue, and the function returns immediately without waiting + * for the new rq to be completed. This is again intended for careful + * use by the ATAPI tape/cdrom driver code. + */ +int ide_do_drive_cmd (ide_drive_t *drive, struct request *rq, ide_action_t action) +{ + unsigned long flags; + ide_hwgroup_t *hwgroup = HWGROUP(drive); + unsigned int major = HWIF(drive)->major; + struct list_head *queue_head = &drive->queue.queue_head; + /*DECLARE_COMPLETION(wait);*/ + +#ifdef CONFIG_BLK_DEV_PDC4030 + if (HWIF(drive)->chipset == ide_pdc4030 && rq->buffer != NULL) + return -ENOSYS; /* special drive cmds not supported */ +#endif + rq->errors = 0; + rq->rq_status = RQ_ACTIVE; + rq->rq_dev = MKDEV(major,(drive->select.b.unit)<rq = NULL; + } else { + if (action == ide_wait || action == ide_end) { + queue_head = queue_head->prev; + } else + queue_head = queue_head->next; + } + list_add(&rq->queue, queue_head); + ide_do_request(hwgroup, 0); + spin_unlock_irqrestore(&io_request_lock, flags); + return 0; + +} + +/* + * This routine is called to flush all partitions and partition tables + * for a changed disk, and then re-read the new partition table. + * If we are revalidating a disk because of a media change, then we + * enter with usage == 0. If we are using an ioctl, we automatically have + * usage == 1 (we need an open channel to use an ioctl :-), so this + * is our limit. + */ +int ide_revalidate_disk (kdev_t i_rdev) +{ + ide_drive_t *drive; + ide_hwgroup_t *hwgroup; + unsigned int p, major, minor; + unsigned long flags; + + if ((drive = get_info_ptr(i_rdev)) == NULL) + return -ENODEV; + major = MAJOR(i_rdev); + minor = drive->select.b.unit << PARTN_BITS; + hwgroup = HWGROUP(drive); + spin_lock_irqsave(&io_request_lock, flags); + if (drive->busy || (drive->usage > 1)) { + spin_unlock_irqrestore(&io_request_lock, flags); + return -EBUSY; + }; + drive->busy = 1; + MOD_INC_USE_COUNT; + spin_unlock_irqrestore(&io_request_lock, flags); + + for (p = 0; p < (1<part[p].nr_sects > 0) { + kdev_t devp = MKDEV(major, minor+p); + invalidate_device(devp, 1); + } + drive->part[p].start_sect = 0; + drive->part[p].nr_sects = 0; + }; + + if (DRIVER(drive)->revalidate) + DRIVER(drive)->revalidate(drive); + + drive->busy = 0; + /*wake_up(&drive->wqueue);*/ + MOD_DEC_USE_COUNT; + return 0; +} + +static void revalidate_drives (void) +{ + ide_hwif_t *hwif; + ide_drive_t *drive; + int index, unit; + + for (index = 0; index < MAX_HWIFS; ++index) { + hwif = &ide_hwifs[index]; + for (unit = 0; unit < MAX_DRIVES; ++unit) { + drive = &ide_hwifs[index].drives[unit]; + if (drive->revalidate) { + drive->revalidate = 0; + if (!initializing) + (void) ide_revalidate_disk(MKDEV(hwif->major, unit<init(); + } + revalidate_drives(); +} + +static void ide_driver_module (void) +{ + int index; + ide_module_t *module = ide_modules; + + for (index = 0; index < MAX_HWIFS; ++index) + if (ide_hwifs[index].present) + goto search; + ide_probe_module(); +search: + while (module) { + (void) module->init(); + module = module->next; + } + revalidate_drives(); +} + +static int ide_open (struct inode * inode, struct file * filp) +{ + ide_drive_t *drive; + + if ((drive = get_info_ptr(inode->i_rdev)) == NULL) + return -ENXIO; + if (drive->driver == NULL) + ide_driver_module(); +#ifdef CONFIG_KMOD + if (drive->driver == NULL) { + if (drive->media == ide_disk) + (void) request_module("ide-disk"); + if (drive->media == ide_cdrom) + (void) request_module("ide-cd"); + if (drive->media == ide_tape) + (void) request_module("ide-tape"); + if (drive->media == ide_floppy) + (void) request_module("ide-floppy"); +#if defined(CONFIG_BLK_DEV_IDESCSI) && defined(CONFIG_SCSI) + if (drive->media == ide_scsi) + (void) request_module("ide-scsi"); +#endif /* defined(CONFIG_BLK_DEV_IDESCSI) && defined(CONFIG_SCSI) */ + } +#endif /* CONFIG_KMOD */ +#if 0 + while (drive->busy) + sleep_on(&drive->wqueue); +#endif + drive->usage++; + if (drive->driver != NULL) + return DRIVER(drive)->open(inode, filp, drive); + printk ("%s: driver not present\n", drive->name); + drive->usage--; + return -ENXIO; +} + +/* + * Releasing a block device means we sync() it, so that it can safely + * be forgotten about... + */ +static int ide_release (struct inode * inode, struct file * file) +{ + ide_drive_t *drive; + + if ((drive = get_info_ptr(inode->i_rdev)) != NULL) { + drive->usage--; + if (drive->driver != NULL) + DRIVER(drive)->release(inode, file, drive); + } + return 0; +} + +int ide_replace_subdriver (ide_drive_t *drive, const char *driver) +{ + if (!drive->present || drive->busy || drive->usage) + goto abort; + if (drive->driver != NULL && DRIVER(drive)->cleanup(drive)) + goto abort; + strncpy(drive->driver_req, driver, 9); + ide_driver_module(); + drive->driver_req[0] = 0; + ide_driver_module(); + if (DRIVER(drive) && !strcmp(DRIVER(drive)->name, driver)) + return 0; +abort: + return 1; +} + +#ifdef CONFIG_PROC_FS +ide_proc_entry_t generic_subdriver_entries[] = { + { "capacity", S_IFREG|S_IRUGO, proc_ide_read_capacity, NULL }, + { NULL, 0, NULL, NULL } +}; +#endif + +/* + * Note that we only release the standard ports, + * and do not even try to handle any extra ports + * allocated for weird IDE interface chipsets. + */ +void hwif_unregister (ide_hwif_t *hwif) +{ + if (hwif->straight8) { + ide_release_region(hwif->io_ports[IDE_DATA_OFFSET], 8); + goto jump_eight; + } + if (hwif->io_ports[IDE_DATA_OFFSET]) + ide_release_region(hwif->io_ports[IDE_DATA_OFFSET], 1); + if (hwif->io_ports[IDE_ERROR_OFFSET]) + ide_release_region(hwif->io_ports[IDE_ERROR_OFFSET], 1); + if (hwif->io_ports[IDE_NSECTOR_OFFSET]) + ide_release_region(hwif->io_ports[IDE_NSECTOR_OFFSET], 1); + if (hwif->io_ports[IDE_SECTOR_OFFSET]) + ide_release_region(hwif->io_ports[IDE_SECTOR_OFFSET], 1); + if (hwif->io_ports[IDE_LCYL_OFFSET]) + ide_release_region(hwif->io_ports[IDE_LCYL_OFFSET], 1); + if (hwif->io_ports[IDE_HCYL_OFFSET]) + ide_release_region(hwif->io_ports[IDE_HCYL_OFFSET], 1); + if (hwif->io_ports[IDE_SELECT_OFFSET]) + ide_release_region(hwif->io_ports[IDE_SELECT_OFFSET], 1); + if (hwif->io_ports[IDE_STATUS_OFFSET]) + ide_release_region(hwif->io_ports[IDE_STATUS_OFFSET], 1); +jump_eight: + if (hwif->io_ports[IDE_CONTROL_OFFSET]) + ide_release_region(hwif->io_ports[IDE_CONTROL_OFFSET], 1); +#if defined(CONFIG_AMIGA) || defined(CONFIG_MAC) + if (hwif->io_ports[IDE_IRQ_OFFSET]) + ide_release_region(hwif->io_ports[IDE_IRQ_OFFSET], 1); +#endif /* (CONFIG_AMIGA) || (CONFIG_MAC) */ +} + +void ide_unregister (unsigned int index) +{ + struct gendisk *gd; + ide_drive_t *drive, *d; + ide_hwif_t *hwif, *g; + ide_hwgroup_t *hwgroup; + int irq_count = 0, unit, i; + unsigned long flags; + unsigned int p, minor; + ide_hwif_t old_hwif; + + if (index >= MAX_HWIFS) + return; + save_flags(flags); /* all CPUs */ + cli(); /* all CPUs */ + hwif = &ide_hwifs[index]; + if (!hwif->present) + goto abort; + for (unit = 0; unit < MAX_DRIVES; ++unit) { + drive = &hwif->drives[unit]; + if (!drive->present) + continue; + if (drive->busy || drive->usage) + goto abort; + if (drive->driver != NULL && DRIVER(drive)->cleanup(drive)) + goto abort; + } + hwif->present = 0; + + /* + * All clear? Then blow away the buffer cache + */ + sti(); + for (unit = 0; unit < MAX_DRIVES; ++unit) { + drive = &hwif->drives[unit]; + if (!drive->present) + continue; + minor = drive->select.b.unit << PARTN_BITS; + for (p = 0; p < (1<part[p].nr_sects > 0) { + kdev_t devp = MKDEV(hwif->major, minor+p); + invalidate_device(devp, 0); + } + } +#ifdef CONFIG_PROC_FS + destroy_proc_ide_drives(hwif); +#endif + } + cli(); + hwgroup = hwif->hwgroup; + + /* + * free the irq if we were the only hwif using it + */ + g = hwgroup->hwif; + do { + if (g->irq == hwif->irq) + ++irq_count; + g = g->next; + } while (g != hwgroup->hwif); + if (irq_count == 1) + free_irq(hwif->irq, hwgroup); + + /* + * Note that we only release the standard ports, + * and do not even try to handle any extra ports + * allocated for weird IDE interface chipsets. + */ + hwif_unregister(hwif); + + /* + * Remove us from the hwgroup, and free + * the hwgroup if we were the only member + */ + d = hwgroup->drive; + for (i = 0; i < MAX_DRIVES; ++i) { + drive = &hwif->drives[i]; +#ifdef DEVFS_MUST_DIE + if (drive->de) { + devfs_unregister (drive->de); + drive->de = NULL; + } +#endif + if (!drive->present) + continue; + while (hwgroup->drive->next != drive) + hwgroup->drive = hwgroup->drive->next; + hwgroup->drive->next = drive->next; + if (hwgroup->drive == drive) + hwgroup->drive = NULL; + if (drive->id != NULL) { + kfree(drive->id); + drive->id = NULL; + } + drive->present = 0; + blk_cleanup_queue(&drive->queue); + } + if (d->present) + hwgroup->drive = d; + while (hwgroup->hwif->next != hwif) + hwgroup->hwif = hwgroup->hwif->next; + hwgroup->hwif->next = hwif->next; + if (hwgroup->hwif == hwif) + kfree(hwgroup); + else + hwgroup->hwif = HWIF(hwgroup->drive); + +#if defined(CONFIG_BLK_DEV_IDEDMA) && !defined(CONFIG_DMA_NONPCI) + if (hwif->dma_base) { + (void) ide_release_dma(hwif); + hwif->dma_base = 0; + } +#endif /* (CONFIG_BLK_DEV_IDEDMA) && !(CONFIG_DMA_NONPCI) */ + + /* + * Remove us from the kernel's knowledge + */ + unregister_blkdev(hwif->major, hwif->name); + kfree(blksize_size[hwif->major]); + kfree(max_sectors[hwif->major]); + /*kfree(max_readahead[hwif->major]);*/ + blk_dev[hwif->major].data = NULL; + blk_dev[hwif->major].queue = NULL; + blksize_size[hwif->major] = NULL; + gd = hwif->gd; + if (gd) { + del_gendisk(gd); + kfree(gd->sizes); + kfree(gd->part); +#ifdef DEVFS_MUST_DIE + if (gd->de_arr) + kfree (gd->de_arr); +#endif + if (gd->flags) + kfree (gd->flags); + kfree(gd); + hwif->gd = NULL; + } + old_hwif = *hwif; + init_hwif_data (index); /* restore hwif data to pristine status */ + hwif->hwgroup = old_hwif.hwgroup; + hwif->tuneproc = old_hwif.tuneproc; + hwif->speedproc = old_hwif.speedproc; + hwif->selectproc = old_hwif.selectproc; + hwif->resetproc = old_hwif.resetproc; + hwif->intrproc = old_hwif.intrproc; + hwif->maskproc = old_hwif.maskproc; + hwif->quirkproc = old_hwif.quirkproc; + hwif->rwproc = old_hwif.rwproc; + hwif->ideproc = old_hwif.ideproc; + hwif->dmaproc = old_hwif.dmaproc; + hwif->busproc = old_hwif.busproc; + hwif->bus_state = old_hwif.bus_state; + hwif->dma_base = old_hwif.dma_base; + hwif->dma_extra = old_hwif.dma_extra; + hwif->config_data = old_hwif.config_data; + hwif->select_data = old_hwif.select_data; + hwif->proc = old_hwif.proc; +#ifndef CONFIG_BLK_DEV_IDECS + hwif->irq = old_hwif.irq; +#endif /* CONFIG_BLK_DEV_IDECS */ + hwif->major = old_hwif.major; + hwif->chipset = old_hwif.chipset; + hwif->autodma = old_hwif.autodma; + hwif->udma_four = old_hwif.udma_four; +#ifdef CONFIG_BLK_DEV_IDEPCI + hwif->pci_dev = old_hwif.pci_dev; + hwif->pci_devid = old_hwif.pci_devid; +#endif /* CONFIG_BLK_DEV_IDEPCI */ + hwif->straight8 = old_hwif.straight8; + hwif->hwif_data = old_hwif.hwif_data; +abort: + restore_flags(flags); /* all CPUs */ +} + +/* + * Setup hw_regs_t structure described by parameters. You + * may set up the hw structure yourself OR use this routine to + * do it for you. + */ +void ide_setup_ports ( hw_regs_t *hw, + ide_ioreg_t base, int *offsets, + ide_ioreg_t ctrl, ide_ioreg_t intr, + ide_ack_intr_t *ack_intr, int irq) +{ + int i; + + for (i = 0; i < IDE_NR_PORTS; i++) { + if (offsets[i] == -1) { + switch(i) { + case IDE_CONTROL_OFFSET: + hw->io_ports[i] = ctrl; + break; +#if defined(CONFIG_AMIGA) || defined(CONFIG_MAC) + case IDE_IRQ_OFFSET: + hw->io_ports[i] = intr; + break; +#endif /* (CONFIG_AMIGA) || (CONFIG_MAC) */ + default: + hw->io_ports[i] = 0; + break; + } + } else { + hw->io_ports[i] = base + offsets[i]; + } + } + hw->irq = irq; + hw->dma = NO_DMA; + hw->ack_intr = ack_intr; +} + +/* + * Register an IDE interface, specifing exactly the registers etc + * Set init=1 iff calling before probes have taken place. + */ +int ide_register_hw (hw_regs_t *hw, ide_hwif_t **hwifp) +{ + int index, retry = 1; + ide_hwif_t *hwif; + + do { + for (index = 0; index < MAX_HWIFS; ++index) { + hwif = &ide_hwifs[index]; + if (hwif->hw.io_ports[IDE_DATA_OFFSET] == hw->io_ports[IDE_DATA_OFFSET]) + goto found; + } + for (index = 0; index < MAX_HWIFS; ++index) { + hwif = &ide_hwifs[index]; + if ((!hwif->present && !hwif->mate && !initializing) || + (!hwif->hw.io_ports[IDE_DATA_OFFSET] && initializing)) + goto found; + } + for (index = 0; index < MAX_HWIFS; index++) + ide_unregister(index); + } while (retry--); + return -1; +found: + if (hwif->present) + ide_unregister(index); + if (hwif->present) + return -1; + memcpy(&hwif->hw, hw, sizeof(*hw)); + memcpy(hwif->io_ports, hwif->hw.io_ports, sizeof(hwif->hw.io_ports)); + hwif->irq = hw->irq; + hwif->noprobe = 0; + hwif->chipset = hw->chipset; + + if (!initializing) { + ide_probe_module(); +#ifdef CONFIG_PROC_FS + create_proc_ide_interfaces(); +#endif + ide_driver_module(); + } + + if (hwifp) + *hwifp = hwif; + + return (initializing || hwif->present) ? index : -1; +} + +/* + * Compatability function with existing drivers. If you want + * something different, use the function above. + */ +int ide_register (int arg1, int arg2, int irq) +{ + hw_regs_t hw; + ide_init_hwif_ports(&hw, (ide_ioreg_t) arg1, (ide_ioreg_t) arg2, NULL); + hw.irq = irq; + return ide_register_hw(&hw, NULL); +} + +void ide_add_setting (ide_drive_t *drive, const char *name, int rw, int read_ioctl, int write_ioctl, int data_type, int min, int max, int mul_factor, int div_factor, void *data, ide_procset_t *set) +{ + ide_settings_t **p = (ide_settings_t **) &drive->settings, *setting = NULL; + + while ((*p) && strcmp((*p)->name, name) < 0) + p = &((*p)->next); + if ((setting = kmalloc(sizeof(*setting), GFP_KERNEL)) == NULL) + goto abort; + memset(setting, 0, sizeof(*setting)); + if ((setting->name = kmalloc(strlen(name) + 1, GFP_KERNEL)) == NULL) + goto abort; + strcpy(setting->name, name); setting->rw = rw; + setting->read_ioctl = read_ioctl; setting->write_ioctl = write_ioctl; + setting->data_type = data_type; setting->min = min; + setting->max = max; setting->mul_factor = mul_factor; + setting->div_factor = div_factor; setting->data = data; + setting->set = set; setting->next = *p; + if (drive->driver) + setting->auto_remove = 1; + *p = setting; + return; +abort: + if (setting) + kfree(setting); +} + +void ide_remove_setting (ide_drive_t *drive, char *name) +{ + ide_settings_t **p = (ide_settings_t **) &drive->settings, *setting; + + while ((*p) && strcmp((*p)->name, name)) + p = &((*p)->next); + if ((setting = (*p)) == NULL) + return; + (*p) = setting->next; + kfree(setting->name); + kfree(setting); +} + +static ide_settings_t *ide_find_setting_by_ioctl (ide_drive_t *drive, int cmd) +{ + ide_settings_t *setting = drive->settings; + + while (setting) { + if (setting->read_ioctl == cmd || setting->write_ioctl == cmd) + break; + setting = setting->next; + } + return setting; +} + +ide_settings_t *ide_find_setting_by_name (ide_drive_t *drive, char *name) +{ + ide_settings_t *setting = drive->settings; + + while (setting) { + if (strcmp(setting->name, name) == 0) + break; + setting = setting->next; + } + return setting; +} + +static void auto_remove_settings (ide_drive_t *drive) +{ + ide_settings_t *setting; +repeat: + setting = drive->settings; + while (setting) { + if (setting->auto_remove) { + ide_remove_setting(drive, setting->name); + goto repeat; + } + setting = setting->next; + } +} + +int ide_read_setting (ide_drive_t *drive, ide_settings_t *setting) +{ + int val = -EINVAL; + unsigned long flags; + + if ((setting->rw & SETTING_READ)) { + spin_lock_irqsave(&io_request_lock, flags); + switch(setting->data_type) { + case TYPE_BYTE: + val = *((u8 *) setting->data); + break; + case TYPE_SHORT: + val = *((u16 *) setting->data); + break; + case TYPE_INT: + case TYPE_INTA: + val = *((u32 *) setting->data); + break; + } + spin_unlock_irqrestore(&io_request_lock, flags); + } + return val; +} + +int ide_spin_wait_hwgroup (ide_drive_t *drive) +{ + ide_hwgroup_t *hwgroup = HWGROUP(drive); + unsigned long timeout = jiffies + (3 * HZ); + + spin_lock_irq(&io_request_lock); + + while (hwgroup->busy) { + unsigned long lflags; + spin_unlock_irq(&io_request_lock); + __save_flags(lflags); /* local CPU only */ + __sti(); /* local CPU only; needed for jiffies */ + if (0 < (signed long)(jiffies - timeout)) { + __restore_flags(lflags); /* local CPU only */ + printk("%s: channel busy\n", drive->name); + return -EBUSY; + } + __restore_flags(lflags); /* local CPU only */ + spin_lock_irq(&io_request_lock); + } + return 0; +} + +/* + * FIXME: This should be changed to enqueue a special request + * to the driver to change settings, and then wait on a sema for completion. + * The current scheme of polling is kludgey, though safe enough. + */ +int ide_write_setting (ide_drive_t *drive, ide_settings_t *setting, int val) +{ + int i; + u32 *p; + + if (!capable(CAP_SYS_ADMIN)) + return -EACCES; + if (!(setting->rw & SETTING_WRITE)) + return -EPERM; + if (val < setting->min || val > setting->max) + return -EINVAL; + if (setting->set) + return setting->set(drive, val); + if (ide_spin_wait_hwgroup(drive)) + return -EBUSY; + switch (setting->data_type) { + case TYPE_BYTE: + *((u8 *) setting->data) = val; + break; + case TYPE_SHORT: + *((u16 *) setting->data) = val; + break; + case TYPE_INT: + *((u32 *) setting->data) = val; + break; + case TYPE_INTA: + p = (u32 *) setting->data; + for (i = 0; i < 1 << PARTN_BITS; i++, p++) + *p = val; + break; + } + spin_unlock_irq(&io_request_lock); + return 0; +} + +static int set_io_32bit(ide_drive_t *drive, int arg) +{ + drive->io_32bit = arg; +#ifdef CONFIG_BLK_DEV_DTC2278 + if (HWIF(drive)->chipset == ide_dtc2278) + HWIF(drive)->drives[!drive->select.b.unit].io_32bit = arg; +#endif /* CONFIG_BLK_DEV_DTC2278 */ + return 0; +} + +static int set_using_dma (ide_drive_t *drive, int arg) +{ + if (!drive->driver || !DRIVER(drive)->supports_dma) + return -EPERM; + if (!drive->id || !(drive->id->capability & 1) || !HWIF(drive)->dmaproc) + return -EPERM; + if (HWIF(drive)->dmaproc(arg ? ide_dma_on : ide_dma_off, drive)) + return -EIO; + return 0; +} + +static int set_pio_mode (ide_drive_t *drive, int arg) +{ + struct request rq; + + if (!HWIF(drive)->tuneproc) + return -ENOSYS; + if (drive->special.b.set_tune) + return -EBUSY; + ide_init_drive_cmd(&rq); + drive->tune_req = (byte) arg; + drive->special.b.set_tune = 1; + (void) ide_do_drive_cmd (drive, &rq, ide_wait); + return 0; +} + +void ide_add_generic_settings (ide_drive_t *drive) +{ +/* + * drive setting name read/write access read ioctl write ioctl data type min max mul_factor div_factor data pointer set function + */ + ide_add_setting(drive, "io_32bit", drive->no_io_32bit ? SETTING_READ : SETTING_RW, HDIO_GET_32BIT, HDIO_SET_32BIT, TYPE_BYTE, 0, 1 + (SUPPORT_VLB_SYNC << 1), 1, 1, &drive->io_32bit, set_io_32bit); + ide_add_setting(drive, "keepsettings", SETTING_RW, HDIO_GET_KEEPSETTINGS, HDIO_SET_KEEPSETTINGS, TYPE_BYTE, 0, 1, 1, 1, &drive->keep_settings, NULL); + ide_add_setting(drive, "nice1", SETTING_RW, -1, -1, TYPE_BYTE, 0, 1, 1, 1, &drive->nice1, NULL); + ide_add_setting(drive, "pio_mode", SETTING_WRITE, -1, HDIO_SET_PIO_MODE, TYPE_BYTE, 0, 255, 1, 1, NULL, set_pio_mode); + ide_add_setting(drive, "slow", SETTING_RW, -1, -1, TYPE_BYTE, 0, 1, 1, 1, &drive->slow, NULL); + ide_add_setting(drive, "unmaskirq", drive->no_unmask ? SETTING_READ : SETTING_RW, HDIO_GET_UNMASKINTR, HDIO_SET_UNMASKINTR, TYPE_BYTE, 0, 1, 1, 1, &drive->unmask, NULL); + ide_add_setting(drive, "using_dma", SETTING_RW, HDIO_GET_DMA, HDIO_SET_DMA, TYPE_BYTE, 0, 1, 1, 1, &drive->using_dma, set_using_dma); + ide_add_setting(drive, "ide_scsi", SETTING_RW, -1, -1, TYPE_BYTE, 0, 1, 1, 1, &drive->scsi, NULL); + ide_add_setting(drive, "init_speed", SETTING_RW, -1, -1, TYPE_BYTE, 0, 69, 1, 1, &drive->init_speed, NULL); + ide_add_setting(drive, "current_speed", SETTING_RW, -1, -1, TYPE_BYTE, 0, 69, 1, 1, &drive->current_speed, NULL); + ide_add_setting(drive, "number", SETTING_RW, -1, -1, TYPE_BYTE, 0, 3, 1, 1, &drive->dn, NULL); +} + +int ide_wait_cmd (ide_drive_t *drive, int cmd, int nsect, int feature, int sectors, byte *buf) +{ + struct request rq; + byte buffer[4]; + + if (!buf) + buf = buffer; + memset(buf, 0, 4 + SECTOR_WORDS * 4 * sectors); + ide_init_drive_cmd(&rq); + rq.buffer = buf; + *buf++ = cmd; + *buf++ = nsect; + *buf++ = feature; + *buf++ = sectors; + return ide_do_drive_cmd(drive, &rq, ide_wait); +} + +int ide_wait_cmd_task (ide_drive_t *drive, byte *buf) +{ + struct request rq; + + ide_init_drive_cmd(&rq); + rq.cmd = IDE_DRIVE_TASK; + rq.buffer = buf; + return ide_do_drive_cmd(drive, &rq, ide_wait); +} + +/* + * Delay for *at least* 50ms. As we don't know how much time is left + * until the next tick occurs, we wait an extra tick to be safe. + * This is used only during the probing/polling for drives at boot time. + * + * However, its usefullness may be needed in other places, thus we export it now. + * The future may change this to a millisecond setable delay. + */ +void ide_delay_50ms (void) +{ +#ifndef CONFIG_BLK_DEV_IDECS + mdelay(50); +#else + __set_current_state(TASK_UNINTERRUPTIBLE); + schedule_timeout(HZ/20); +#endif /* CONFIG_BLK_DEV_IDECS */ +} + +int system_bus_clock (void) +{ + return((int) ((!system_bus_speed) ? ide_system_bus_speed() : system_bus_speed )); +} + +int ide_reinit_drive (ide_drive_t *drive) +{ + switch (drive->media) { +#ifdef CONFIG_BLK_DEV_IDECD + case ide_cdrom: + { + extern int ide_cdrom_reinit(ide_drive_t *drive); + if (ide_cdrom_reinit(drive)) + return 1; + break; + } +#endif /* CONFIG_BLK_DEV_IDECD */ +#ifdef CONFIG_BLK_DEV_IDEDISK + case ide_disk: + { + extern int idedisk_reinit(ide_drive_t *drive); + if (idedisk_reinit(drive)) + return 1; + break; + } +#endif /* CONFIG_BLK_DEV_IDEDISK */ +#ifdef CONFIG_BLK_DEV_IDEFLOPPY + case ide_floppy: + { + extern int idefloppy_reinit(ide_drive_t *drive); + if (idefloppy_reinit(drive)) + return 1; + break; + } +#endif /* CONFIG_BLK_DEV_IDEFLOPPY */ +#ifdef CONFIG_BLK_DEV_IDETAPE + case ide_tape: + { + extern int idetape_reinit(ide_drive_t *drive); + if (idetape_reinit(drive)) + return 1; + break; + } +#endif /* CONFIG_BLK_DEV_IDETAPE */ +#ifdef CONFIG_BLK_DEV_IDESCSI +/* + * { + * extern int idescsi_reinit(ide_drive_t *drive); + * if (idescsi_reinit(drive)) + * return 1; + * break; + * } + */ +#endif /* CONFIG_BLK_DEV_IDESCSI */ + default: + return 1; + } + return 0; +} + +static int ide_ioctl (struct inode *inode, struct file *file, + unsigned int cmd, unsigned long arg) +{ + int err = 0, major, minor; + ide_drive_t *drive; + struct request rq; + kdev_t dev; + ide_settings_t *setting; + + if (!inode || !(dev = inode->i_rdev)) + return -EINVAL; + major = MAJOR(dev); minor = MINOR(dev); + if ((drive = get_info_ptr(inode->i_rdev)) == NULL) + return -ENODEV; + + if ((setting = ide_find_setting_by_ioctl(drive, cmd)) != NULL) { + if (cmd == setting->read_ioctl) { + err = ide_read_setting(drive, setting); + return err >= 0 ? put_user(err, (long *) arg) : err; + } else { + if ((MINOR(inode->i_rdev) & PARTN_MASK)) + return -EINVAL; + return ide_write_setting(drive, setting, arg); + } + } + + ide_init_drive_cmd (&rq); + switch (cmd) { + case HDIO_GETGEO: + { + struct hd_geometry *loc = (struct hd_geometry *) arg; + unsigned short bios_cyl = drive->bios_cyl; /* truncate */ + if (!loc || (drive->media != ide_disk && drive->media != ide_floppy)) return -EINVAL; + if (put_user(drive->bios_head, (byte *) &loc->heads)) return -EFAULT; + if (put_user(drive->bios_sect, (byte *) &loc->sectors)) return -EFAULT; + if (put_user(bios_cyl, (unsigned short *) &loc->cylinders)) return -EFAULT; + if (put_user((unsigned)drive->part[MINOR(inode->i_rdev)&PARTN_MASK].start_sect, + (unsigned long *) &loc->start)) return -EFAULT; + return 0; + } + + case HDIO_GETGEO_BIG: + { + struct hd_big_geometry *loc = (struct hd_big_geometry *) arg; + if (!loc || (drive->media != ide_disk && drive->media != ide_floppy)) return -EINVAL; + if (put_user(drive->bios_head, (byte *) &loc->heads)) return -EFAULT; + if (put_user(drive->bios_sect, (byte *) &loc->sectors)) return -EFAULT; + if (put_user(drive->bios_cyl, (unsigned int *) &loc->cylinders)) return -EFAULT; + if (put_user((unsigned)drive->part[MINOR(inode->i_rdev)&PARTN_MASK].start_sect, + (unsigned long *) &loc->start)) return -EFAULT; + return 0; + } + + case HDIO_GETGEO_BIG_RAW: + { + struct hd_big_geometry *loc = (struct hd_big_geometry *) arg; + if (!loc || (drive->media != ide_disk && drive->media != ide_floppy)) return -EINVAL; + if (put_user(drive->head, (byte *) &loc->heads)) return -EFAULT; + if (put_user(drive->sect, (byte *) &loc->sectors)) return -EFAULT; + if (put_user(drive->cyl, (unsigned int *) &loc->cylinders)) return -EFAULT; + if (put_user((unsigned)drive->part[MINOR(inode->i_rdev)&PARTN_MASK].start_sect, + (unsigned long *) &loc->start)) return -EFAULT; + return 0; + } + +#if 0 + case BLKGETSIZE: /* Return device size */ + return put_user(drive->part[MINOR(inode->i_rdev)&PARTN_MASK].nr_sects, (unsigned long *) arg); + case BLKGETSIZE64: + return put_user((u64)drive->part[MINOR(inode->i_rdev)&PARTN_MASK].nr_sects << 9, (u64 *) arg); + + case BLKRRPART: /* Re-read partition tables */ + if (!capable(CAP_SYS_ADMIN)) return -EACCES; + return ide_revalidate_disk(inode->i_rdev); +#endif + + case HDIO_OBSOLETE_IDENTITY: + case HDIO_GET_IDENTITY: + if (MINOR(inode->i_rdev) & PARTN_MASK) + return -EINVAL; + if (drive->id == NULL) + return -ENOMSG; + if (copy_to_user((char *)arg, (char *)drive->id, (cmd == HDIO_GET_IDENTITY) ? sizeof(*drive->id) : 142)) + return -EFAULT; + return 0; + + case HDIO_GET_NICE: + return put_user(drive->dsc_overlap << IDE_NICE_DSC_OVERLAP | + drive->atapi_overlap << IDE_NICE_ATAPI_OVERLAP | + drive->nice0 << IDE_NICE_0 | + drive->nice1 << IDE_NICE_1 | + drive->nice2 << IDE_NICE_2, + (long *) arg); + +#ifdef CONFIG_IDE_TASK_IOCTL + case HDIO_DRIVE_TASKFILE: + if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RAWIO)) + return -EACCES; + switch(drive->media) { + case ide_disk: + return ide_taskfile_ioctl(drive, inode, file, cmd, arg); +#ifdef CONFIG_PKT_TASK_IOCTL + case ide_cdrom: + case ide_tape: + case ide_floppy: + return pkt_taskfile_ioctl(drive, inode, file, cmd, arg); +#endif /* CONFIG_PKT_TASK_IOCTL */ + default: + return -ENOMSG; + } +#endif /* CONFIG_IDE_TASK_IOCTL */ + + case HDIO_DRIVE_CMD: + { + byte args[4], *argbuf = args; + byte xfer_rate = 0; + int argsize = 4; + ide_task_t tfargs; + + if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RAWIO)) + return -EACCES; + if (NULL == (void *) arg) + return ide_do_drive_cmd(drive, &rq, ide_wait); + if (copy_from_user(args, (void *)arg, 4)) + return -EFAULT; + + tfargs.tfRegister[IDE_FEATURE_OFFSET] = args[2]; + tfargs.tfRegister[IDE_NSECTOR_OFFSET] = args[3]; + tfargs.tfRegister[IDE_SECTOR_OFFSET] = args[1]; + tfargs.tfRegister[IDE_LCYL_OFFSET] = 0x00; + tfargs.tfRegister[IDE_HCYL_OFFSET] = 0x00; + tfargs.tfRegister[IDE_SELECT_OFFSET] = 0x00; + tfargs.tfRegister[IDE_COMMAND_OFFSET] = args[0]; + + if (args[3]) { + argsize = 4 + (SECTOR_WORDS * 4 * args[3]); + argbuf = kmalloc(argsize, GFP_KERNEL); + if (argbuf == NULL) + return -ENOMEM; + memcpy(argbuf, args, 4); + } + + if (set_transfer(drive, &tfargs)) { + xfer_rate = args[1]; + if (ide_ata66_check(drive, &tfargs)) + goto abort; + } + + err = ide_wait_cmd(drive, args[0], args[1], args[2], args[3], argbuf); + + if (!err && xfer_rate) { + /* active-retuning-calls future */ + if ((HWIF(drive)->speedproc) != NULL) + HWIF(drive)->speedproc(drive, xfer_rate); + ide_driveid_update(drive); + } + abort: + if (copy_to_user((void *)arg, argbuf, argsize)) + err = -EFAULT; + if (argsize > 4) + kfree(argbuf); + return err; + } + case HDIO_DRIVE_TASK: + { + byte args[7], *argbuf = args; + int argsize = 7; + if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RAWIO)) return -EACCES; + if (copy_from_user(args, (void *)arg, 7)) + return -EFAULT; + err = ide_wait_cmd_task(drive, argbuf); + if (copy_to_user((void *)arg, argbuf, argsize)) + err = -EFAULT; + return err; + } + case HDIO_SCAN_HWIF: + { + int args[3]; + if (!capable(CAP_SYS_ADMIN)) return -EACCES; + if (copy_from_user(args, (void *)arg, 3 * sizeof(int))) + return -EFAULT; + if (ide_register(args[0], args[1], args[2]) == -1) + return -EIO; + return 0; + } + case HDIO_UNREGISTER_HWIF: + if (!capable(CAP_SYS_ADMIN)) return -EACCES; + /* (arg > MAX_HWIFS) checked in function */ + ide_unregister(arg); + return 0; + case HDIO_SET_NICE: + if (!capable(CAP_SYS_ADMIN)) return -EACCES; + if (drive->driver == NULL) + return -EPERM; + if (arg != (arg & ((1 << IDE_NICE_DSC_OVERLAP) | (1 << IDE_NICE_1)))) + return -EPERM; + drive->dsc_overlap = (arg >> IDE_NICE_DSC_OVERLAP) & 1; + if (drive->dsc_overlap && !DRIVER(drive)->supports_dsc_overlap) { + drive->dsc_overlap = 0; + return -EPERM; + } + drive->nice1 = (arg >> IDE_NICE_1) & 1; + return 0; + case HDIO_DRIVE_RESET: + { + unsigned long flags; + ide_hwgroup_t *hwgroup = HWGROUP(drive); + + if (!capable(CAP_SYS_ADMIN)) return -EACCES; +#if 1 + spin_lock_irqsave(&io_request_lock, flags); + if (hwgroup->handler != NULL) { + printk("%s: ide_set_handler: handler not null; %p\n", drive->name, hwgroup->handler); + (void) hwgroup->handler(drive); +// hwgroup->handler = NULL; +// hwgroup->expiry = NULL; + hwgroup->timer.expires = jiffies + 0;; + del_timer(&hwgroup->timer); + } + spin_unlock_irqrestore(&io_request_lock, flags); + +#endif + (void) ide_do_reset(drive); + if (drive->suspend_reset) { +/* + * APM WAKE UP todo !! + * int nogoodpower = 1; + * while(nogoodpower) { + * check_power1() or check_power2() + * nogoodpower = 0; + * } + * HWIF(drive)->multiproc(drive); + */ + return ide_revalidate_disk(inode->i_rdev); + } + return 0; + } +#if 0 + case BLKROSET: + case BLKROGET: + case BLKFLSBUF: + case BLKSSZGET: + case BLKPG: + case BLKELVGET: + case BLKELVSET: + case BLKBSZGET: + case BLKBSZSET: + return blk_ioctl(inode->i_rdev, cmd, arg); +#endif + + case HDIO_GET_BUSSTATE: + if (!capable(CAP_SYS_ADMIN)) + return -EACCES; + if (put_user(HWIF(drive)->bus_state, (long *)arg)) + return -EFAULT; + return 0; + + case HDIO_SET_BUSSTATE: + if (!capable(CAP_SYS_ADMIN)) + return -EACCES; + if (HWIF(drive)->busproc) + HWIF(drive)->busproc(drive, (int)arg); + return 0; + + default: + if (drive->driver != NULL) + return DRIVER(drive)->ioctl(drive, inode, file, cmd, arg); + return -EPERM; + } +} + +static int ide_check_media_change (kdev_t i_rdev) +{ + ide_drive_t *drive; + + if ((drive = get_info_ptr(i_rdev)) == NULL) + return -ENODEV; + if (drive->driver != NULL) + return DRIVER(drive)->media_change(drive); + return 0; +} + +void ide_fixstring (byte *s, const int bytecount, const int byteswap) +{ + byte *p = s, *end = &s[bytecount & ~1]; /* bytecount must be even */ + + if (byteswap) { + /* convert from big-endian to host byte order */ + for (p = end ; p != s;) { + unsigned short *pp = (unsigned short *) (p -= 2); + *pp = ntohs(*pp); + } + } + + /* strip leading blanks */ + while (s != end && *s == ' ') + ++s; + + /* compress internal blanks and strip trailing blanks */ + while (s != end && *s) { + if (*s++ != ' ' || (s != end && *s && *s != ' ')) + *p++ = *(s-1); + } + + /* wipe out trailing garbage */ + while (p != end) + *p++ = '\0'; +} + +/* + * stridx() returns the offset of c within s, + * or -1 if c is '\0' or not found within s. + */ +static int __init stridx (const char *s, char c) +{ + char *i = strchr(s, c); + return (i && c) ? i - s : -1; +} + +/* + * match_parm() does parsing for ide_setup(): + * + * 1. the first char of s must be '='. + * 2. if the remainder matches one of the supplied keywords, + * the index (1 based) of the keyword is negated and returned. + * 3. if the remainder is a series of no more than max_vals numbers + * separated by commas, the numbers are saved in vals[] and a + * count of how many were saved is returned. Base10 is assumed, + * and base16 is allowed when prefixed with "0x". + * 4. otherwise, zero is returned. + */ +static int __init match_parm (char *s, const char *keywords[], int vals[], int max_vals) +{ + static const char *decimal = "0123456789"; + static const char *hex = "0123456789abcdef"; + int i, n; + + if (*s++ == '=') { + /* + * Try matching against the supplied keywords, + * and return -(index+1) if we match one + */ + if (keywords != NULL) { + for (i = 0; *keywords != NULL; ++i) { + if (!strcmp(s, *keywords++)) + return -(i+1); + } + } + /* + * Look for a series of no more than "max_vals" + * numeric values separated by commas, in base10, + * or base16 when prefixed with "0x". + * Return a count of how many were found. + */ + for (n = 0; (i = stridx(decimal, *s)) >= 0;) { + vals[n] = i; + while ((i = stridx(decimal, *++s)) >= 0) + vals[n] = (vals[n] * 10) + i; + if (*s == 'x' && !vals[n]) { + while ((i = stridx(hex, *++s)) >= 0) + vals[n] = (vals[n] * 0x10) + i; + } + if (++n == max_vals) + break; + if (*s == ',' || *s == ';') + ++s; + } + if (!*s) + return n; + } + return 0; /* zero = nothing matched */ +} + +/* + * ide_setup() gets called VERY EARLY during initialization, + * to handle kernel "command line" strings beginning with "hdx=" + * or "ide". Here is the complete set currently supported: + * + * "hdx=" is recognized for all "x" from "a" to "h", such as "hdc". + * "idex=" is recognized for all "x" from "0" to "3", such as "ide1". + * + * "hdx=noprobe" : drive may be present, but do not probe for it + * "hdx=none" : drive is NOT present, ignore cmos and do not probe + * "hdx=nowerr" : ignore the WRERR_STAT bit on this drive + * "hdx=cdrom" : drive is present, and is a cdrom drive + * "hdx=cyl,head,sect" : disk drive is present, with specified geometry + * "hdx=noremap" : do not remap 0->1 even though EZD was detected + * "hdx=autotune" : driver will attempt to tune interface speed + * to the fastest PIO mode supported, + * if possible for this drive only. + * Not fully supported by all chipset types, + * and quite likely to cause trouble with + * older/odd IDE drives. + * + * "hdx=slow" : insert a huge pause after each access to the data + * port. Should be used only as a last resort. + * + * "hdx=swapdata" : when the drive is a disk, byte swap all data + * "hdx=bswap" : same as above.......... + * "hdxlun=xx" : set the drive last logical unit. + * "hdx=flash" : allows for more than one ata_flash disk to be + * registered. In most cases, only one device + * will be present. + * "hdx=scsi" : the return of the ide-scsi flag, this is useful for + * allowwing ide-floppy, ide-tape, and ide-cdrom|writers + * to use ide-scsi emulation on a device specific option. + * "idebus=xx" : inform IDE driver of VESA/PCI bus speed in MHz, + * where "xx" is between 20 and 66 inclusive, + * used when tuning chipset PIO modes. + * For PCI bus, 25 is correct for a P75 system, + * 30 is correct for P90,P120,P180 systems, + * and 33 is used for P100,P133,P166 systems. + * If in doubt, use idebus=33 for PCI. + * As for VLB, it is safest to not specify it. + * + * "idex=noprobe" : do not attempt to access/use this interface + * "idex=base" : probe for an interface at the addr specified, + * where "base" is usually 0x1f0 or 0x170 + * and "ctl" is assumed to be "base"+0x206 + * "idex=base,ctl" : specify both base and ctl + * "idex=base,ctl,irq" : specify base, ctl, and irq number + * "idex=autotune" : driver will attempt to tune interface speed + * to the fastest PIO mode supported, + * for all drives on this interface. + * Not fully supported by all chipset types, + * and quite likely to cause trouble with + * older/odd IDE drives. + * "idex=noautotune" : driver will NOT attempt to tune interface speed + * This is the default for most chipsets, + * except the cmd640. + * "idex=serialize" : do not overlap operations on idex and ide(x^1) + * "idex=four" : four drives on idex and ide(x^1) share same ports + * "idex=reset" : reset interface before first use + * "idex=dma" : enable DMA by default on both drives if possible + * "idex=ata66" : informs the interface that it has an 80c cable + * for chipsets that are ATA-66 capable, but + * the ablity to bit test for detection is + * currently unknown. + * "ide=reverse" : Formerly called to pci sub-system, but now local. + * + * The following are valid ONLY on ide0, (except dc4030) + * and the defaults for the base,ctl ports must not be altered. + * + * "ide0=dtc2278" : probe/support DTC2278 interface + * "ide0=ht6560b" : probe/support HT6560B interface + * "ide0=cmd640_vlb" : *REQUIRED* for VLB cards with the CMD640 chip + * (not for PCI -- automatically detected) + * "ide0=qd65xx" : probe/support qd65xx interface + * "ide0=ali14xx" : probe/support ali14xx chipsets (ALI M1439, M1443, M1445) + * "ide0=umc8672" : probe/support umc8672 chipsets + * "idex=dc4030" : probe/support Promise DC4030VL interface + * "ide=doubler" : probe/support IDE doublers on Amiga + */ +int __init ide_setup (char *s) +{ + int i, vals[3]; + ide_hwif_t *hwif; + ide_drive_t *drive; + unsigned int hw, unit; + const char max_drive = 'a' + ((MAX_HWIFS * MAX_DRIVES) - 1); + const char max_hwif = '0' + (MAX_HWIFS - 1); + + + if (strncmp(s,"hd",2) == 0 && s[2] == '=') /* hd= is for hd.c */ + return 0; /* driver and not us */ + + if (strncmp(s,"ide",3) && + strncmp(s,"idebus",6) && + strncmp(s,"hd",2)) /* hdx= & hdxlun= */ + return 0; + + printk("ide_setup: %s", s); + init_ide_data (); + +#ifdef CONFIG_BLK_DEV_IDEDOUBLER + if (!strcmp(s, "ide=doubler")) { + extern int ide_doubler; + + printk(" : Enabled support for IDE doublers\n"); + ide_doubler = 1; + return 1; + } +#endif /* CONFIG_BLK_DEV_IDEDOUBLER */ + + if (!strcmp(s, "ide=nodma")) { + printk("IDE: Prevented DMA\n"); + noautodma = 1; + return 1; + } + +#ifdef CONFIG_BLK_DEV_IDEPCI + if (!strcmp(s, "ide=reverse")) { + ide_scan_direction = 1; + printk(" : Enabled support for IDE inverse scan order.\n"); + return 1; + } +#endif /* CONFIG_BLK_DEV_IDEPCI */ + + /* + * Look for drive options: "hdx=" + */ + if (s[0] == 'h' && s[1] == 'd' && s[2] >= 'a' && s[2] <= max_drive) { + const char *hd_words[] = {"none", "noprobe", "nowerr", "cdrom", + "serialize", "autotune", "noautotune", + "slow", "swapdata", "bswap", "flash", + "remap", "noremap", "scsi", NULL}; + unit = s[2] - 'a'; + hw = unit / MAX_DRIVES; + unit = unit % MAX_DRIVES; + hwif = &ide_hwifs[hw]; + drive = &hwif->drives[unit]; + if (strncmp(s + 4, "ide-", 4) == 0) { + strncpy(drive->driver_req, s + 4, 9); + goto done; + } + /* + * Look for last lun option: "hdxlun=" + */ + if (s[3] == 'l' && s[4] == 'u' && s[5] == 'n') { + if (match_parm(&s[6], NULL, vals, 1) != 1) + goto bad_option; + if (vals[0] >= 0 && vals[0] <= 7) { + drive->last_lun = vals[0]; + drive->forced_lun = 1; + } else + printk(" -- BAD LAST LUN! Expected value from 0 to 7"); + goto done; + } + switch (match_parm(&s[3], hd_words, vals, 3)) { + case -1: /* "none" */ + drive->nobios = 1; /* drop into "noprobe" */ + case -2: /* "noprobe" */ + drive->noprobe = 1; + goto done; + case -3: /* "nowerr" */ + drive->bad_wstat = BAD_R_STAT; + hwif->noprobe = 0; + goto done; + case -4: /* "cdrom" */ + drive->present = 1; + drive->media = ide_cdrom; + hwif->noprobe = 0; + goto done; + case -5: /* "serialize" */ + printk(" -- USE \"ide%d=serialize\" INSTEAD", hw); + goto do_serialize; + case -6: /* "autotune" */ + drive->autotune = 1; + goto done; + case -7: /* "noautotune" */ + drive->autotune = 2; + goto done; + case -8: /* "slow" */ + drive->slow = 1; + goto done; + case -9: /* "swapdata" or "bswap" */ + case -10: + drive->bswap = 1; + goto done; + case -11: /* "flash" */ + drive->ata_flash = 1; + goto done; + case -12: /* "remap" */ + drive->remap_0_to_1 = 1; + goto done; + case -13: /* "noremap" */ + drive->remap_0_to_1 = 2; + goto done; + case -14: /* "scsi" */ +#if defined(CONFIG_BLK_DEV_IDESCSI) && defined(CONFIG_SCSI) + drive->scsi = 1; + goto done; +#else + drive->scsi = 0; + goto bad_option; +#endif /* defined(CONFIG_BLK_DEV_IDESCSI) && defined(CONFIG_SCSI) */ + case 3: /* cyl,head,sect */ + drive->media = ide_disk; + drive->cyl = drive->bios_cyl = vals[0]; + drive->head = drive->bios_head = vals[1]; + drive->sect = drive->bios_sect = vals[2]; + drive->present = 1; + drive->forced_geom = 1; + hwif->noprobe = 0; + goto done; + default: + goto bad_option; + } + } + + if (s[0] != 'i' || s[1] != 'd' || s[2] != 'e') + goto bad_option; + /* + * Look for bus speed option: "idebus=" + */ + if (s[3] == 'b' && s[4] == 'u' && s[5] == 's') { + if (match_parm(&s[6], NULL, vals, 1) != 1) + goto bad_option; + if (vals[0] >= 20 && vals[0] <= 66) { + idebus_parameter = vals[0]; + } else + printk(" -- BAD BUS SPEED! Expected value from 20 to 66"); + goto done; + } + /* + * Look for interface options: "idex=" + */ + if (s[3] >= '0' && s[3] <= max_hwif) { + /* + * Be VERY CAREFUL changing this: note hardcoded indexes below + * -8,-9,-10 : are reserved for future idex calls to ease the hardcoding. + */ + const char *ide_words[] = { + "noprobe", "serialize", "autotune", "noautotune", "reset", "dma", "ata66", + "minus8", "minus9", "minus10", + "four", "qd65xx", "ht6560b", "cmd640_vlb", "dtc2278", "umc8672", "ali14xx", "dc4030", NULL }; + hw = s[3] - '0'; + hwif = &ide_hwifs[hw]; + i = match_parm(&s[4], ide_words, vals, 3); + + /* + * Cryptic check to ensure chipset not already set for hwif: + */ + if (i > 0 || i <= -11) { /* is parameter a chipset name? */ + if (hwif->chipset != ide_unknown) + goto bad_option; /* chipset already specified */ + if (i <= -11 && i != -18 && hw != 0) + goto bad_hwif; /* chipset drivers are for "ide0=" only */ + if (i <= -11 && i != -18 && ide_hwifs[hw+1].chipset != ide_unknown) + goto bad_option; /* chipset for 2nd port already specified */ + printk("\n"); + } + + switch (i) { +#ifdef CONFIG_BLK_DEV_PDC4030 + case -18: /* "dc4030" */ + { + extern void init_pdc4030(void); + init_pdc4030(); + goto done; + } +#endif /* CONFIG_BLK_DEV_PDC4030 */ +#ifdef CONFIG_BLK_DEV_ALI14XX + case -17: /* "ali14xx" */ + { + extern void init_ali14xx (void); + init_ali14xx(); + goto done; + } +#endif /* CONFIG_BLK_DEV_ALI14XX */ +#ifdef CONFIG_BLK_DEV_UMC8672 + case -16: /* "umc8672" */ + { + extern void init_umc8672 (void); + init_umc8672(); + goto done; + } +#endif /* CONFIG_BLK_DEV_UMC8672 */ +#ifdef CONFIG_BLK_DEV_DTC2278 + case -15: /* "dtc2278" */ + { + extern void init_dtc2278 (void); + init_dtc2278(); + goto done; + } +#endif /* CONFIG_BLK_DEV_DTC2278 */ +#ifdef CONFIG_BLK_DEV_CMD640 + case -14: /* "cmd640_vlb" */ + { + extern int cmd640_vlb; /* flag for cmd640.c */ + cmd640_vlb = 1; + goto done; + } +#endif /* CONFIG_BLK_DEV_CMD640 */ +#ifdef CONFIG_BLK_DEV_HT6560B + case -13: /* "ht6560b" */ + { + extern void init_ht6560b (void); + init_ht6560b(); + goto done; + } +#endif /* CONFIG_BLK_DEV_HT6560B */ +#if CONFIG_BLK_DEV_QD65XX + case -12: /* "qd65xx" */ + { + extern void init_qd65xx (void); + init_qd65xx(); + goto done; + } +#endif /* CONFIG_BLK_DEV_QD65XX */ +#ifdef CONFIG_BLK_DEV_4DRIVES + case -11: /* "four" drives on one set of ports */ + { + ide_hwif_t *mate = &ide_hwifs[hw^1]; + mate->drives[0].select.all ^= 0x20; + mate->drives[1].select.all ^= 0x20; + hwif->chipset = mate->chipset = ide_4drives; + mate->irq = hwif->irq; + memcpy(mate->io_ports, hwif->io_ports, sizeof(hwif->io_ports)); + goto do_serialize; + } +#endif /* CONFIG_BLK_DEV_4DRIVES */ + case -10: /* minus10 */ + case -9: /* minus9 */ + case -8: /* minus8 */ + goto bad_option; + case -7: /* ata66 */ +#ifdef CONFIG_BLK_DEV_IDEPCI + hwif->udma_four = 1; + goto done; +#else /* !CONFIG_BLK_DEV_IDEPCI */ + hwif->udma_four = 0; + goto bad_hwif; +#endif /* CONFIG_BLK_DEV_IDEPCI */ + case -6: /* dma */ + hwif->autodma = 1; + goto done; + case -5: /* "reset" */ + hwif->reset = 1; + goto done; + case -4: /* "noautotune" */ + hwif->drives[0].autotune = 2; + hwif->drives[1].autotune = 2; + goto done; + case -3: /* "autotune" */ + hwif->drives[0].autotune = 1; + hwif->drives[1].autotune = 1; + goto done; + case -2: /* "serialize" */ + do_serialize: + hwif->mate = &ide_hwifs[hw^1]; + hwif->mate->mate = hwif; + hwif->serialized = hwif->mate->serialized = 1; + goto done; + + case -1: /* "noprobe" */ + hwif->noprobe = 1; + goto done; + + case 1: /* base */ + vals[1] = vals[0] + 0x206; /* default ctl */ + case 2: /* base,ctl */ + vals[2] = 0; /* default irq = probe for it */ + case 3: /* base,ctl,irq */ + hwif->hw.irq = vals[2]; + ide_init_hwif_ports(&hwif->hw, (ide_ioreg_t) vals[0], (ide_ioreg_t) vals[1], &hwif->irq); + memcpy(hwif->io_ports, hwif->hw.io_ports, sizeof(hwif->io_ports)); + hwif->irq = vals[2]; + hwif->noprobe = 0; + hwif->chipset = ide_generic; + goto done; + + case 0: goto bad_option; + default: + printk(" -- SUPPORT NOT CONFIGURED IN THIS KERNEL\n"); + return 1; + } + } +bad_option: + printk(" -- BAD OPTION\n"); + return 1; +bad_hwif: + printk("-- NOT SUPPORTED ON ide%d", hw); +done: + printk("\n"); + return 1; +} + +/* + * probe_for_hwifs() finds/initializes "known" IDE interfaces + */ +static void __init probe_for_hwifs (void) +{ +#ifdef CONFIG_PCI + if (pci_present()) + { +#ifdef CONFIG_BLK_DEV_IDEPCI + ide_scan_pcibus(ide_scan_direction); +#else +#ifdef CONFIG_BLK_DEV_RZ1000 + { + extern void ide_probe_for_rz100x(void); + ide_probe_for_rz100x(); + } +#endif /* CONFIG_BLK_DEV_RZ1000 */ +#endif /* CONFIG_BLK_DEV_IDEPCI */ + } +#endif /* CONFIG_PCI */ + +#ifdef CONFIG_ETRAX_IDE + { + extern void init_e100_ide(void); + init_e100_ide(); + } +#endif /* CONFIG_ETRAX_IDE */ +#ifdef CONFIG_BLK_DEV_CMD640 + { + extern void ide_probe_for_cmd640x(void); + ide_probe_for_cmd640x(); + } +#endif /* CONFIG_BLK_DEV_CMD640 */ +#ifdef CONFIG_BLK_DEV_PDC4030 + { + extern int ide_probe_for_pdc4030(void); + (void) ide_probe_for_pdc4030(); + } +#endif /* CONFIG_BLK_DEV_PDC4030 */ +#ifdef CONFIG_BLK_DEV_IDE_PMAC + { + extern void pmac_ide_probe(void); + pmac_ide_probe(); + } +#endif /* CONFIG_BLK_DEV_IDE_PMAC */ +#ifdef CONFIG_BLK_DEV_IDE_SWARM + { + extern void swarm_ide_probe(void); + swarm_ide_probe(); + } +#endif /* CONFIG_BLK_DEV_IDE_SWARM */ +#ifdef CONFIG_BLK_DEV_IDE_ICSIDE + { + extern void icside_init(void); + icside_init(); + } +#endif /* CONFIG_BLK_DEV_IDE_ICSIDE */ +#ifdef CONFIG_BLK_DEV_IDE_RAPIDE + { + extern void rapide_init(void); + rapide_init(); + } +#endif /* CONFIG_BLK_DEV_IDE_RAPIDE */ +#ifdef CONFIG_BLK_DEV_GAYLE + { + extern void gayle_init(void); + gayle_init(); + } +#endif /* CONFIG_BLK_DEV_GAYLE */ +#ifdef CONFIG_BLK_DEV_FALCON_IDE + { + extern void falconide_init(void); + falconide_init(); + } +#endif /* CONFIG_BLK_DEV_FALCON_IDE */ +#ifdef CONFIG_BLK_DEV_MAC_IDE + { + extern void macide_init(void); + macide_init(); + } +#endif /* CONFIG_BLK_DEV_MAC_IDE */ +#ifdef CONFIG_BLK_DEV_Q40IDE + { + extern void q40ide_init(void); + q40ide_init(); + } +#endif /* CONFIG_BLK_DEV_Q40IDE */ +#ifdef CONFIG_BLK_DEV_BUDDHA + { + extern void buddha_init(void); + buddha_init(); + } +#endif /* CONFIG_BLK_DEV_BUDDHA */ +#if defined(CONFIG_BLK_DEV_ISAPNP) && defined(CONFIG_ISAPNP) + { + extern void pnpide_init(int enable); + pnpide_init(1); + } +#endif /* CONFIG_BLK_DEV_ISAPNP */ +} + +void __init ide_init_builtin_drivers (void) +{ + /* + * Probe for special PCI and other "known" interface chipsets + */ + probe_for_hwifs (); + +#ifdef CONFIG_BLK_DEV_IDE +#if defined(__mc68000__) || defined(CONFIG_APUS) + if (ide_hwifs[0].io_ports[IDE_DATA_OFFSET]) { + ide_get_lock(&ide_lock, NULL, NULL); /* for atari only */ + disable_irq(ide_hwifs[0].irq); /* disable_irq_nosync ?? */ +// disable_irq_nosync(ide_hwifs[0].irq); + } +#endif /* __mc68000__ || CONFIG_APUS */ + + (void) ideprobe_init(); + +#if defined(__mc68000__) || defined(CONFIG_APUS) + if (ide_hwifs[0].io_ports[IDE_DATA_OFFSET]) { + enable_irq(ide_hwifs[0].irq); + ide_release_lock(&ide_lock); /* for atari only */ + } +#endif /* __mc68000__ || CONFIG_APUS */ +#endif /* CONFIG_BLK_DEV_IDE */ + +#ifdef CONFIG_PROC_FS + proc_ide_create(); +#endif + + /* + * Attempt to match drivers for the available drives + */ +#ifdef CONFIG_BLK_DEV_IDEDISK + (void) idedisk_init(); +#endif /* CONFIG_BLK_DEV_IDEDISK */ +#ifdef CONFIG_BLK_DEV_IDECD + (void) ide_cdrom_init(); +#endif /* CONFIG_BLK_DEV_IDECD */ +#ifdef CONFIG_BLK_DEV_IDETAPE + (void) idetape_init(); +#endif /* CONFIG_BLK_DEV_IDETAPE */ +#ifdef CONFIG_BLK_DEV_IDEFLOPPY + (void) idefloppy_init(); +#endif /* CONFIG_BLK_DEV_IDEFLOPPY */ +#ifdef CONFIG_BLK_DEV_IDESCSI + #ifdef CONFIG_SCSI + (void) idescsi_init(); + #else + #warning ide scsi-emulation selected but no SCSI-subsystem in kernel + #endif +#endif /* CONFIG_BLK_DEV_IDESCSI */ +} + +static int default_cleanup (ide_drive_t *drive) +{ + return ide_unregister_subdriver(drive); +} + +static int default_standby (ide_drive_t *drive) +{ + return 0; +} + +static int default_flushcache (ide_drive_t *drive) +{ + return 0; +} + +static ide_startstop_t default_do_request(ide_drive_t *drive, struct request *rq, unsigned long block) +{ + ide_end_request(0, HWGROUP(drive)); + return ide_stopped; +} + +static void default_end_request (byte uptodate, ide_hwgroup_t *hwgroup) +{ + ide_end_request(uptodate, hwgroup); +} + +static int default_ioctl (ide_drive_t *drive, struct inode *inode, struct file *file, + unsigned int cmd, unsigned long arg) +{ + return -EIO; +} + +static int default_open (struct inode *inode, struct file *filp, ide_drive_t *drive) +{ + drive->usage--; + return -EIO; +} + +static void default_release (struct inode *inode, struct file *filp, ide_drive_t *drive) +{ +} + +static int default_check_media_change (ide_drive_t *drive) +{ + return 1; +} + +static void default_pre_reset (ide_drive_t *drive) +{ +} + +static unsigned long default_capacity (ide_drive_t *drive) +{ + return 0x7fffffff; +} + +static ide_startstop_t default_special (ide_drive_t *drive) +{ + special_t *s = &drive->special; + + s->all = 0; + drive->mult_req = 0; + return ide_stopped; +} + +static int default_reinit (ide_drive_t *drive) +{ + printk(KERN_ERR "%s: does not support hotswap of device class !\n", drive->name); + + return 0; +} + +static void setup_driver_defaults (ide_drive_t *drive) +{ + ide_driver_t *d = drive->driver; + + if (d->cleanup == NULL) d->cleanup = default_cleanup; + if (d->standby == NULL) d->standby = default_standby; + if (d->flushcache == NULL) d->flushcache = default_flushcache; + if (d->do_request == NULL) d->do_request = default_do_request; + if (d->end_request == NULL) d->end_request = default_end_request; + if (d->ioctl == NULL) d->ioctl = default_ioctl; + if (d->open == NULL) d->open = default_open; + if (d->release == NULL) d->release = default_release; + if (d->media_change == NULL) d->media_change = default_check_media_change; + if (d->pre_reset == NULL) d->pre_reset = default_pre_reset; + if (d->capacity == NULL) d->capacity = default_capacity; + if (d->special == NULL) d->special = default_special; + if (d->reinit == NULL) d->reinit = default_reinit; +} + +ide_drive_t *ide_scan_devices (byte media, const char *name, ide_driver_t *driver, int n) +{ + unsigned int unit, index, i; + + for (index = 0, i = 0; index < MAX_HWIFS; ++index) { + ide_hwif_t *hwif = &ide_hwifs[index]; + if (!hwif->present) + continue; + for (unit = 0; unit < MAX_DRIVES; ++unit) { + ide_drive_t *drive = &hwif->drives[unit]; + char *req = drive->driver_req; + if (*req && !strstr(name, req)) + continue; + if (drive->present && drive->media == media && drive->driver == driver && ++i > n) + return drive; + } + } + return NULL; +} + +int ide_register_subdriver (ide_drive_t *drive, ide_driver_t *driver, int version) +{ + unsigned long flags; + + save_flags(flags); /* all CPUs */ + cli(); /* all CPUs */ + if (version != IDE_SUBDRIVER_VERSION || !drive->present || drive->driver != NULL || drive->busy || drive->usage) { + restore_flags(flags); /* all CPUs */ + return 1; + } + drive->driver = driver; + setup_driver_defaults(drive); + restore_flags(flags); /* all CPUs */ + if (drive->autotune != 2) { + if (driver->supports_dma && HWIF(drive)->dmaproc != NULL) { + /* + * Force DMAing for the beginning of the check. + * Some chipsets appear to do interesting things, + * if not checked and cleared. + * PARANOIA!!! + */ + (void) (HWIF(drive)->dmaproc(ide_dma_off_quietly, drive)); + (void) (HWIF(drive)->dmaproc(ide_dma_check, drive)); + } + drive->dsc_overlap = (drive->next != drive && driver->supports_dsc_overlap); + drive->nice1 = 1; + } + drive->revalidate = 1; + drive->suspend_reset = 0; +#ifdef CONFIG_PROC_FS + ide_add_proc_entries(drive->proc, generic_subdriver_entries, drive); + ide_add_proc_entries(drive->proc, driver->proc, drive); +#endif + return 0; +} + +int ide_unregister_subdriver (ide_drive_t *drive) +{ + unsigned long flags; + + save_flags(flags); /* all CPUs */ + cli(); /* all CPUs */ + if (drive->usage || drive->busy || drive->driver == NULL || DRIVER(drive)->busy) { + restore_flags(flags); /* all CPUs */ + return 1; + } +#if defined(CONFIG_BLK_DEV_ISAPNP) && defined(CONFIG_ISAPNP) && defined(MODULE) + pnpide_init(0); +#endif /* CONFIG_BLK_DEV_ISAPNP */ +#ifdef CONFIG_PROC_FS + ide_remove_proc_entries(drive->proc, DRIVER(drive)->proc); + ide_remove_proc_entries(drive->proc, generic_subdriver_entries); +#endif + auto_remove_settings(drive); + drive->driver = NULL; + restore_flags(flags); /* all CPUs */ + return 0; +} + +int ide_register_module (ide_module_t *module) +{ + ide_module_t *p = ide_modules; + + while (p) { + if (p == module) + return 1; + p = p->next; + } + module->next = ide_modules; + ide_modules = module; + revalidate_drives(); + return 0; +} + +void ide_unregister_module (ide_module_t *module) +{ + ide_module_t **p; + + for (p = &ide_modules; (*p) && (*p) != module; p = &((*p)->next)); + if (*p) + *p = (*p)->next; +} + +struct block_device_operations ide_fops[] = {{ + open: ide_open, + release: ide_release, + ioctl: ide_ioctl, + check_media_change: ide_check_media_change, + revalidate: ide_revalidate_disk +}}; + +EXPORT_SYMBOL(ide_hwifs); +EXPORT_SYMBOL(ide_register_module); +EXPORT_SYMBOL(ide_unregister_module); +EXPORT_SYMBOL(ide_spin_wait_hwgroup); + +/* + * Probe module + */ +#ifdef DEVFS_MUST_DIE +devfs_handle_t ide_devfs_handle; +#endif + +EXPORT_SYMBOL(ide_probe); +EXPORT_SYMBOL(drive_is_flashcard); +EXPORT_SYMBOL(ide_timer_expiry); +EXPORT_SYMBOL(ide_intr); +EXPORT_SYMBOL(ide_fops); +EXPORT_SYMBOL(ide_get_queue); +EXPORT_SYMBOL(ide_add_generic_settings); +#ifdef DEVFS_MUST_DIE +EXPORT_SYMBOL(ide_devfs_handle); +#endif +EXPORT_SYMBOL(do_ide_request); +/* + * Driver module + */ +EXPORT_SYMBOL(ide_scan_devices); +EXPORT_SYMBOL(ide_register_subdriver); +EXPORT_SYMBOL(ide_unregister_subdriver); +EXPORT_SYMBOL(ide_replace_subdriver); +EXPORT_SYMBOL(ide_input_data); +EXPORT_SYMBOL(ide_output_data); +EXPORT_SYMBOL(atapi_input_bytes); +EXPORT_SYMBOL(atapi_output_bytes); +EXPORT_SYMBOL(drive_is_ready); +EXPORT_SYMBOL(ide_set_handler); +EXPORT_SYMBOL(ide_dump_status); +EXPORT_SYMBOL(ide_error); +EXPORT_SYMBOL(ide_fixstring); +EXPORT_SYMBOL(ide_wait_stat); +EXPORT_SYMBOL(ide_do_reset); +EXPORT_SYMBOL(restart_request); +EXPORT_SYMBOL(ide_init_drive_cmd); +EXPORT_SYMBOL(ide_do_drive_cmd); +EXPORT_SYMBOL(ide_end_drive_cmd); +EXPORT_SYMBOL(ide_end_request); +EXPORT_SYMBOL(ide_revalidate_disk); +EXPORT_SYMBOL(ide_cmd); +EXPORT_SYMBOL(ide_wait_cmd); +EXPORT_SYMBOL(ide_wait_cmd_task); +EXPORT_SYMBOL(ide_delay_50ms); +EXPORT_SYMBOL(ide_stall_queue); +#ifdef CONFIG_PROC_FS +EXPORT_SYMBOL(ide_add_proc_entries); +EXPORT_SYMBOL(ide_remove_proc_entries); +EXPORT_SYMBOL(proc_ide_read_geometry); +EXPORT_SYMBOL(create_proc_ide_interfaces); +EXPORT_SYMBOL(recreate_proc_ide_device); +EXPORT_SYMBOL(destroy_proc_ide_device); +#endif +EXPORT_SYMBOL(ide_add_setting); +EXPORT_SYMBOL(ide_remove_setting); + +EXPORT_SYMBOL(ide_register_hw); +EXPORT_SYMBOL(ide_register); +EXPORT_SYMBOL(ide_unregister); +EXPORT_SYMBOL(ide_setup_ports); +EXPORT_SYMBOL(hwif_unregister); +EXPORT_SYMBOL(get_info_ptr); +EXPORT_SYMBOL(current_capacity); + +EXPORT_SYMBOL(system_bus_clock); + +EXPORT_SYMBOL(ide_reinit_drive); + +#if 0 +static int ide_notify_reboot (struct notifier_block *this, unsigned long event, void *x) +{ + ide_hwif_t *hwif; + ide_drive_t *drive; + int i, unit; + + switch (event) { + case SYS_HALT: + case SYS_POWER_OFF: + case SYS_RESTART: + break; + default: + return NOTIFY_DONE; + } + + printk("flushing ide devices: "); + + for (i = 0; i < MAX_HWIFS; i++) { + hwif = &ide_hwifs[i]; + if (!hwif->present) + continue; + for (unit = 0; unit < MAX_DRIVES; ++unit) { + drive = &hwif->drives[unit]; + if (!drive->present) + continue; + + /* set the drive to standby */ + printk("%s ", drive->name); + if (event != SYS_RESTART) + if (drive->driver != NULL && DRIVER(drive)->standby(drive)) + continue; + + if (drive->driver != NULL && DRIVER(drive)->cleanup(drive)) + continue; + } + } + printk("\n"); + return NOTIFY_DONE; +} + +static struct notifier_block ide_notifier = { + ide_notify_reboot, + NULL, + 5 +}; +#endif + +/* + * This is gets invoked once during initialization, to set *everything* up + */ +int __init ide_init (void) +{ + static char banner_printed; + int i; + + if (!banner_printed) { + printk(KERN_INFO "Uniform Multi-Platform E-IDE driver " REVISION "\n"); +#ifdef DEVFS_MUST_DIE + ide_devfs_handle = devfs_mk_dir (NULL, "ide", NULL); +#endif + system_bus_speed = ide_system_bus_speed(); + banner_printed = 1; + } + + init_ide_data (); + + initializing = 1; + ide_init_builtin_drivers(); + initializing = 0; + + for (i = 0; i < MAX_HWIFS; ++i) { + ide_hwif_t *hwif = &ide_hwifs[i]; + if (hwif->present) + ide_geninit(hwif); + } + + /*register_reboot_notifier(&ide_notifier);*/ + return 0; +} + +#ifdef MODULE +char *options = NULL; +MODULE_PARM(options,"s"); +MODULE_LICENSE("GPL"); + +static void __init parse_options (char *line) +{ + char *next = line; + + if (line == NULL || !*line) + return; + while ((line = next) != NULL) { + if ((next = strchr(line,' ')) != NULL) + *next++ = 0; + if (!ide_setup(line)) + printk ("Unknown option '%s'\n", line); + } +} + +int init_module (void) +{ + parse_options(options); + return ide_init(); +} + +void cleanup_module (void) +{ + int index; + + /*unregister_reboot_notifier(&ide_notifier);*/ + for (index = 0; index < MAX_HWIFS; ++index) { + ide_unregister(index); +#if defined(CONFIG_BLK_DEV_IDEDMA) && !defined(CONFIG_DMA_NONPCI) + if (ide_hwifs[index].dma_base) + (void) ide_release_dma(&ide_hwifs[index]); +#endif /* (CONFIG_BLK_DEV_IDEDMA) && !(CONFIG_DMA_NONPCI) */ + } + +#ifdef CONFIG_PROC_FS + proc_ide_destroy(); +#endif +#ifdef DEVFS_MUST_DIE + devfs_unregister (ide_devfs_handle); +#endif +} + +#else /* !MODULE */ + +__setup("", ide_setup); + +#endif /* MODULE */ diff --git a/xen-2.4.16/drivers/ide/ide_modes.h b/xen-2.4.16/drivers/ide/ide_modes.h new file mode 100644 index 0000000000..16b8cf123a --- /dev/null +++ b/xen-2.4.16/drivers/ide/ide_modes.h @@ -0,0 +1,236 @@ +/* + * linux/drivers/ide/ide_modes.h + * + * Copyright (C) 1996 Linus Torvalds, Igor Abramov, and Mark Lord + */ + +#ifndef _IDE_MODES_H +#define _IDE_MODES_H + +#include + +/* + * Shared data/functions for determining best PIO mode for an IDE drive. + * Most of this stuff originally lived in cmd640.c, and changes to the + * ide_pio_blacklist[] table should be made with EXTREME CAUTION to avoid + * breaking the fragile cmd640.c support. + */ + +#ifdef CONFIG_BLK_DEV_IDE_MODES + +/* + * Standard (generic) timings for PIO modes, from ATA2 specification. + * These timings are for access to the IDE data port register *only*. + * Some drives may specify a mode, while also specifying a different + * value for cycle_time (from drive identification data). + */ +typedef struct ide_pio_timings_s { + int setup_time; /* Address setup (ns) minimum */ + int active_time; /* Active pulse (ns) minimum */ + int cycle_time; /* Cycle time (ns) minimum = (setup + active + recovery) */ +} ide_pio_timings_t; + +typedef struct ide_pio_data_s { + byte pio_mode; + byte use_iordy; + byte overridden; + byte blacklisted; + unsigned int cycle_time; +} ide_pio_data_t; + +#ifndef _IDE_C + +int ide_scan_pio_blacklist (char *model); +byte ide_get_best_pio_mode (ide_drive_t *drive, byte mode_wanted, byte max_mode, ide_pio_data_t *d); +extern const ide_pio_timings_t ide_pio_timings[6]; + +#else /* _IDE_C */ + +const ide_pio_timings_t ide_pio_timings[6] = { + { 70, 165, 600 }, /* PIO Mode 0 */ + { 50, 125, 383 }, /* PIO Mode 1 */ + { 30, 100, 240 }, /* PIO Mode 2 */ + { 30, 80, 180 }, /* PIO Mode 3 with IORDY */ + { 25, 70, 120 }, /* PIO Mode 4 with IORDY */ + { 20, 50, 100 } /* PIO Mode 5 with IORDY (nonstandard) */ +}; + +/* + * Black list. Some drives incorrectly report their maximal PIO mode, + * at least in respect to CMD640. Here we keep info on some known drives. + */ +static struct ide_pio_info { + const char *name; + int pio; +} ide_pio_blacklist [] = { +/* { "Conner Peripherals 1275MB - CFS1275A", 4 }, */ + { "Conner Peripherals 540MB - CFS540A", 3 }, + + { "WDC AC2700", 3 }, + { "WDC AC2540", 3 }, + { "WDC AC2420", 3 }, + { "WDC AC2340", 3 }, + { "WDC AC2250", 0 }, + { "WDC AC2200", 0 }, + { "WDC AC21200", 4 }, + { "WDC AC2120", 0 }, + { "WDC AC2850", 3 }, + { "WDC AC1270", 3 }, + { "WDC AC1170", 1 }, + { "WDC AC1210", 1 }, + { "WDC AC280", 0 }, +/* { "WDC AC21000", 4 }, */ + { "WDC AC31000", 3 }, + { "WDC AC31200", 3 }, +/* { "WDC AC31600", 4 }, */ + + { "Maxtor 7131 AT", 1 }, + { "Maxtor 7171 AT", 1 }, + { "Maxtor 7213 AT", 1 }, + { "Maxtor 7245 AT", 1 }, + { "Maxtor 7345 AT", 1 }, + { "Maxtor 7546 AT", 3 }, + { "Maxtor 7540 AV", 3 }, + + { "SAMSUNG SHD-3121A", 1 }, + { "SAMSUNG SHD-3122A", 1 }, + { "SAMSUNG SHD-3172A", 1 }, + +/* { "ST51080A", 4 }, + * { "ST51270A", 4 }, + * { "ST31220A", 4 }, + * { "ST31640A", 4 }, + * { "ST32140A", 4 }, + * { "ST3780A", 4 }, + */ + { "ST5660A", 3 }, + { "ST3660A", 3 }, + { "ST3630A", 3 }, + { "ST3655A", 3 }, + { "ST3391A", 3 }, + { "ST3390A", 1 }, + { "ST3600A", 1 }, + { "ST3290A", 0 }, + { "ST3144A", 0 }, + { "ST3491A", 1 }, /* reports 3, should be 1 or 2 (depending on */ + /* drive) according to Seagates FIND-ATA program */ + + { "QUANTUM ELS127A", 0 }, + { "QUANTUM ELS170A", 0 }, + { "QUANTUM LPS240A", 0 }, + { "QUANTUM LPS210A", 3 }, + { "QUANTUM LPS270A", 3 }, + { "QUANTUM LPS365A", 3 }, + { "QUANTUM LPS540A", 3 }, + { "QUANTUM LIGHTNING 540A", 3 }, + { "QUANTUM LIGHTNING 730A", 3 }, + + { "QUANTUM FIREBALL_540", 3 }, /* Older Quantum Fireballs don't work */ + { "QUANTUM FIREBALL_640", 3 }, + { "QUANTUM FIREBALL_1080", 3 }, + { "QUANTUM FIREBALL_1280", 3 }, + { NULL, 0 } +}; + +/* + * This routine searches the ide_pio_blacklist for an entry + * matching the start/whole of the supplied model name. + * + * Returns -1 if no match found. + * Otherwise returns the recommended PIO mode from ide_pio_blacklist[]. + */ +int ide_scan_pio_blacklist (char *model) +{ + struct ide_pio_info *p; + + for (p = ide_pio_blacklist; p->name != NULL; p++) { + if (strncmp(p->name, model, strlen(p->name)) == 0) + return p->pio; + } + return -1; +} + +/* + * This routine returns the recommended PIO settings for a given drive, + * based on the drive->id information and the ide_pio_blacklist[]. + * This is used by most chipset support modules when "auto-tuning". + */ + +/* + * Drive PIO mode auto selection + */ +byte ide_get_best_pio_mode (ide_drive_t *drive, byte mode_wanted, byte max_mode, ide_pio_data_t *d) +{ + int pio_mode; + int cycle_time = 0; + int use_iordy = 0; + struct hd_driveid* id = drive->id; + int overridden = 0; + int blacklisted = 0; + + if (mode_wanted != 255) { + pio_mode = mode_wanted; + } else if (!drive->id) { + pio_mode = 0; + } else if ((pio_mode = ide_scan_pio_blacklist(id->model)) != -1) { + overridden = 1; + blacklisted = 1; + use_iordy = (pio_mode > 2); + } else { + pio_mode = id->tPIO; + if (pio_mode > 2) { /* 2 is maximum allowed tPIO value */ + pio_mode = 2; + overridden = 1; + } + if (id->field_valid & 2) { /* drive implements ATA2? */ + if (id->capability & 8) { /* drive supports use_iordy? */ + use_iordy = 1; + cycle_time = id->eide_pio_iordy; + if (id->eide_pio_modes & 7) { + overridden = 0; + if (id->eide_pio_modes & 4) + pio_mode = 5; + else if (id->eide_pio_modes & 2) + pio_mode = 4; + else + pio_mode = 3; + } + } else { + cycle_time = id->eide_pio; + } + } + +#if 0 + if (drive->id->major_rev_num & 0x0004) printk("ATA-2 "); +#endif + + /* + * Conservative "downgrade" for all pre-ATA2 drives + */ + if (pio_mode && pio_mode < 4) { + pio_mode--; + overridden = 1; +#if 0 + use_iordy = (pio_mode > 2); +#endif + if (cycle_time && cycle_time < ide_pio_timings[pio_mode].cycle_time) + cycle_time = 0; /* use standard timing */ + } + } + if (pio_mode > max_mode) { + pio_mode = max_mode; + cycle_time = 0; + } + if (d) { + d->pio_mode = pio_mode; + d->cycle_time = cycle_time ? cycle_time : ide_pio_timings[pio_mode].cycle_time; + d->use_iordy = use_iordy; + d->overridden = overridden; + d->blacklisted = blacklisted; + } + return pio_mode; +} + +#endif /* _IDE_C */ +#endif /* CONFIG_BLK_DEV_IDE_MODES */ +#endif /* _IDE_MODES_H */ diff --git a/xen-2.4.16/drivers/net/3c509.c b/xen-2.4.16/drivers/net/3c509.c new file mode 100644 index 0000000000..8729f39f8b --- /dev/null +++ b/xen-2.4.16/drivers/net/3c509.c @@ -0,0 +1,1068 @@ +/* 3c509.c: A 3c509 EtherLink3 ethernet driver for linux. */ +/* + Written 1993-2000 by Donald Becker. + + Copyright 1994-2000 by Donald Becker. + Copyright 1993 United States Government as represented by the + Director, National Security Agency. This software may be used and + distributed according to the terms of the GNU General Public License, + incorporated herein by reference. + + This driver is for the 3Com EtherLinkIII series. + + The author may be reached as becker@scyld.com, or C/O + Scyld Computing Corporation + 410 Severn Ave., Suite 210 + Annapolis MD 21403 + + Known limitations: + Because of the way 3c509 ISA detection works it's difficult to predict + a priori which of several ISA-mode cards will be detected first. + + This driver does not use predictive interrupt mode, resulting in higher + packet latency but lower overhead. If interrupts are disabled for an + unusually long time it could also result in missed packets, but in + practice this rarely happens. + + + FIXES: + Alan Cox: Removed the 'Unexpected interrupt' bug. + Michael Meskes: Upgraded to Donald Becker's version 1.07. + Alan Cox: Increased the eeprom delay. Regardless of + what the docs say some people definitely + get problems with lower (but in card spec) + delays + v1.10 4/21/97 Fixed module code so that multiple cards may be detected, + other cleanups. -djb + Andrea Arcangeli: Upgraded to Donald Becker's version 1.12. + Rick Payne: Fixed SMP race condition + v1.13 9/8/97 Made 'max_interrupt_work' an insmod-settable variable -djb + v1.14 10/15/97 Avoided waiting..discard message for fast machines -djb + v1.15 1/31/98 Faster recovery for Tx errors. -djb + v1.16 2/3/98 Different ID port handling to avoid sound cards. -djb + v1.18 12Mar2001 Andrew Morton + - Avoid bogus detect of 3c590's (Andrzej Krzysztofowicz) + - Reviewed against 1.18 from scyld.com +*/ + +/* A few values that may be tweaked. */ + +/* Time in jiffies before concluding the transmitter is hung. */ +#define TX_TIMEOUT (400*HZ/1000) +/* Maximum events (Rx packets, etc.) to handle at each interrupt. */ +static int max_interrupt_work = 10; + +#include +#include + +//#include +//#include +#include +//#include +#include +#include +#include +//#include +#include +#include +#include +#include +#include +#include +#include /* for udelay() */ +#include + +#include +#include +#include + +static char versionA[] __initdata = "3c509.c:1.18 12Mar2001 becker@scyld.com\n"; +static char versionB[] __initdata = "http://www.scyld.com/network/3c509.html\n"; + +#ifdef EL3_DEBUG +static int el3_debug = EL3_DEBUG; +#else +static int el3_debug = 2; +#endif + +/* To minimize the size of the driver source I only define operating + constants if they are used several times. You'll need the manual + anyway if you want to understand driver details. */ +/* Offsets from base I/O address. */ +#define EL3_DATA 0x00 +#define EL3_CMD 0x0e +#define EL3_STATUS 0x0e +#define EEPROM_READ 0x80 + +#define EL3_IO_EXTENT 16 + +#define EL3WINDOW(win_num) outw(SelectWindow + (win_num), ioaddr + EL3_CMD) + + +/* The top five bits written to EL3_CMD are a command, the lower + 11 bits are the parameter, if applicable. */ +enum c509cmd { + TotalReset = 0<<11, SelectWindow = 1<<11, StartCoax = 2<<11, + RxDisable = 3<<11, RxEnable = 4<<11, RxReset = 5<<11, RxDiscard = 8<<11, + TxEnable = 9<<11, TxDisable = 10<<11, TxReset = 11<<11, + FakeIntr = 12<<11, AckIntr = 13<<11, SetIntrEnb = 14<<11, + SetStatusEnb = 15<<11, SetRxFilter = 16<<11, SetRxThreshold = 17<<11, + SetTxThreshold = 18<<11, SetTxStart = 19<<11, StatsEnable = 21<<11, + StatsDisable = 22<<11, StopCoax = 23<<11,}; + +enum c509status { + IntLatch = 0x0001, AdapterFailure = 0x0002, TxComplete = 0x0004, + TxAvailable = 0x0008, RxComplete = 0x0010, RxEarly = 0x0020, + IntReq = 0x0040, StatsFull = 0x0080, CmdBusy = 0x1000, }; + +/* The SetRxFilter command accepts the following classes: */ +enum RxFilter { + RxStation = 1, RxMulticast = 2, RxBroadcast = 4, RxProm = 8 }; + +/* Register window 1 offsets, the window used in normal operation. */ +#define TX_FIFO 0x00 +#define RX_FIFO 0x00 +#define RX_STATUS 0x08 +#define TX_STATUS 0x0B +#define TX_FREE 0x0C /* Remaining free bytes in Tx buffer. */ + +#define WN0_IRQ 0x08 /* Window 0: Set IRQ line in bits 12-15. */ +#define WN4_MEDIA 0x0A /* Window 4: Various transcvr/media bits. */ +#define MEDIA_TP 0x00C0 /* Enable link beat and jabber for 10baseT. */ + +/* + * Must be a power of two (we use a binary and in the + * circular queue) + */ +#define SKB_QUEUE_SIZE 64 + +struct el3_private { + struct net_device_stats stats; + struct net_device *next_dev; + spinlock_t lock; + /* skb send-queue */ + int head, size; + struct sk_buff *queue[SKB_QUEUE_SIZE]; + char mca_slot; +}; +static int id_port __initdata = 0x110; /* Start with 0x110 to avoid new sound cards.*/ +static struct net_device *el3_root_dev; + +static ushort id_read_eeprom(int index); +static ushort read_eeprom(int ioaddr, int index); +static int el3_open(struct net_device *dev); +static int el3_start_xmit(struct sk_buff *skb, struct net_device *dev); +static void el3_interrupt(int irq, void *dev_id, struct pt_regs *regs); +static void update_stats(struct net_device *dev); +static struct net_device_stats *el3_get_stats(struct net_device *dev); +static int el3_rx(struct net_device *dev); +static int el3_close(struct net_device *dev); +static void set_multicast_list(struct net_device *dev); +static void el3_tx_timeout (struct net_device *dev); + +#ifdef CONFIG_MCA +struct el3_mca_adapters_struct { + char* name; + int id; +}; + +static struct el3_mca_adapters_struct el3_mca_adapters[] __initdata = { + { "3Com 3c529 EtherLink III (10base2)", 0x627c }, + { "3Com 3c529 EtherLink III (10baseT)", 0x627d }, + { "3Com 3c529 EtherLink III (test mode)", 0x62db }, + { "3Com 3c529 EtherLink III (TP or coax)", 0x62f6 }, + { "3Com 3c529 EtherLink III (TP)", 0x62f7 }, + { NULL, 0 }, +}; +#endif /* CONFIG_MCA */ + +#if defined(CONFIG_ISAPNP) || defined(CONFIG_ISAPNP_MODULE) +static struct isapnp_device_id el3_isapnp_adapters[] __initdata = { + { ISAPNP_ANY_ID, ISAPNP_ANY_ID, + ISAPNP_VENDOR('T', 'C', 'M'), ISAPNP_FUNCTION(0x5090), + (long) "3Com Etherlink III (TP)" }, + { ISAPNP_ANY_ID, ISAPNP_ANY_ID, + ISAPNP_VENDOR('T', 'C', 'M'), ISAPNP_FUNCTION(0x5091), + (long) "3Com Etherlink III" }, + { ISAPNP_ANY_ID, ISAPNP_ANY_ID, + ISAPNP_VENDOR('T', 'C', 'M'), ISAPNP_FUNCTION(0x5094), + (long) "3Com Etherlink III (combo)" }, + { ISAPNP_ANY_ID, ISAPNP_ANY_ID, + ISAPNP_VENDOR('T', 'C', 'M'), ISAPNP_FUNCTION(0x5095), + (long) "3Com Etherlink III (TPO)" }, + { ISAPNP_ANY_ID, ISAPNP_ANY_ID, + ISAPNP_VENDOR('T', 'C', 'M'), ISAPNP_FUNCTION(0x5098), + (long) "3Com Etherlink III (TPC)" }, + { ISAPNP_ANY_ID, ISAPNP_ANY_ID, + ISAPNP_VENDOR('P', 'N', 'P'), ISAPNP_FUNCTION(0x80f7), + (long) "3Com Etherlink III compatible" }, + { ISAPNP_ANY_ID, ISAPNP_ANY_ID, + ISAPNP_VENDOR('P', 'N', 'P'), ISAPNP_FUNCTION(0x80f8), + (long) "3Com Etherlink III compatible" }, + { } /* terminate list */ +}; + +MODULE_DEVICE_TABLE(isapnp, el3_isapnp_adapters); +MODULE_LICENSE("GPL"); + + +static u16 el3_isapnp_phys_addr[8][3]; +static int nopnp; +#endif /* CONFIG_ISAPNP || CONFIG_ISAPNP_MODULE */ + +int __init el3_probe(struct net_device *dev) +{ + struct el3_private *lp; + short lrs_state = 0xff, i; + int ioaddr, irq, if_port; + u16 phys_addr[3]; + static int current_tag; + int mca_slot = -1; +#if defined(CONFIG_ISAPNP) || defined(CONFIG_ISAPNP_MODULE) + static int pnp_cards; +#endif /* CONFIG_ISAPNP || CONFIG_ISAPNP_MODULE */ + + if (dev) SET_MODULE_OWNER(dev); + + /* First check all slots of the EISA bus. The next slot address to + probe is kept in 'eisa_addr' to support multiple probe() calls. */ + if (EISA_bus) { + static int eisa_addr = 0x1000; + while (eisa_addr < 0x9000) { + int device_id; + + ioaddr = eisa_addr; + eisa_addr += 0x1000; + + /* Check the standard EISA ID register for an encoded '3Com'. */ + if (inw(ioaddr + 0xC80) != 0x6d50) + continue; + + /* Avoid conflict with 3c590, 3c592, 3c597, etc */ + device_id = (inb(ioaddr + 0xC82)<<8) + inb(ioaddr + 0xC83); + if ((device_id & 0xFF00) == 0x5900) { + continue; + } + + /* Change the register set to the configuration window 0. */ + outw(SelectWindow | 0, ioaddr + 0xC80 + EL3_CMD); + + irq = inw(ioaddr + WN0_IRQ) >> 12; + if_port = inw(ioaddr + 6)>>14; + for (i = 0; i < 3; i++) + phys_addr[i] = htons(read_eeprom(ioaddr, i)); + + /* Restore the "Product ID" to the EEPROM read register. */ + read_eeprom(ioaddr, 3); + + /* Was the EISA code an add-on hack? Nahhhhh... */ + goto found; + } + } + +#ifdef CONFIG_MCA + /* Based on Erik Nygren's (nygren@mit.edu) 3c529 patch, heavily + * modified by Chris Beauregard (cpbeaure@csclub.uwaterloo.ca) + * to support standard MCA probing. + * + * redone for multi-card detection by ZP Gu (zpg@castle.net) + * now works as a module + */ + + if( MCA_bus ) { + int slot, j; + u_char pos4, pos5; + + for( j = 0; el3_mca_adapters[j].name != NULL; j ++ ) { + slot = 0; + while( slot != MCA_NOTFOUND ) { + slot = mca_find_unused_adapter( + el3_mca_adapters[j].id, slot ); + if( slot == MCA_NOTFOUND ) break; + + /* if we get this far, an adapter has been + * detected and is enabled + */ + + pos4 = mca_read_stored_pos( slot, 4 ); + pos5 = mca_read_stored_pos( slot, 5 ); + + ioaddr = ((short)((pos4&0xfc)|0x02)) << 8; + irq = pos5 & 0x0f; + + /* probing for a card at a particular IO/IRQ */ + if(dev && ((dev->irq >= 1 && dev->irq != irq) || + (dev->base_addr >= 1 && dev->base_addr != ioaddr))) { + slot++; /* probing next slot */ + continue; + } + + printk("3c509: found %s at slot %d\n", + el3_mca_adapters[j].name, slot + 1 ); + + /* claim the slot */ + mca_set_adapter_name(slot, el3_mca_adapters[j].name); + mca_set_adapter_procfn(slot, NULL, NULL); + mca_mark_as_used(slot); + + if_port = pos4 & 0x03; + if (el3_debug > 2) { + printk("3c529: irq %d ioaddr 0x%x ifport %d\n", irq, ioaddr, if_port); + } + EL3WINDOW(0); + for (i = 0; i < 3; i++) { + phys_addr[i] = htons(read_eeprom(ioaddr, i)); + } + + mca_slot = slot; + + goto found; + } + } + /* if we get here, we didn't find an MCA adapter */ + return -ENODEV; + } +#endif /* CONFIG_MCA */ + +#if defined(CONFIG_ISAPNP) || defined(CONFIG_ISAPNP_MODULE) + if (nopnp == 1) + goto no_pnp; + + for (i=0; el3_isapnp_adapters[i].vendor != 0; i++) { + struct pci_dev *idev = NULL; + int j; + while ((idev = isapnp_find_dev(NULL, + el3_isapnp_adapters[i].vendor, + el3_isapnp_adapters[i].function, + idev))) { + idev->prepare(idev); + /* Deactivation is needed if the driver was called + with "nopnp=1" before, does not harm if not. */ + idev->deactivate(idev); + idev->activate(idev); + if (!idev->resource[0].start || check_region(idev->resource[0].start, EL3_IO_EXTENT)) + continue; + ioaddr = idev->resource[0].start; + if (!request_region(ioaddr, EL3_IO_EXTENT, "3c509 PnP")) + return -EBUSY; + irq = idev->irq_resource[0].start; + if (el3_debug > 3) + printk ("ISAPnP reports %s at i/o 0x%x, irq %d\n", + (char*) el3_isapnp_adapters[i].driver_data, ioaddr, irq); + EL3WINDOW(0); + for (j = 0; j < 3; j++) + el3_isapnp_phys_addr[pnp_cards][j] = + phys_addr[j] = + htons(read_eeprom(ioaddr, j)); + if_port = read_eeprom(ioaddr, 8) >> 14; + pnp_cards++; + goto found; + } + } +no_pnp: +#endif /* CONFIG_ISAPNP || CONFIG_ISAPNP_MODULE */ + + /* Select an open I/O location at 0x1*0 to do contention select. */ + for ( ; id_port < 0x200; id_port += 0x10) { + if (check_region(id_port, 1)) + continue; + outb(0x00, id_port); + outb(0xff, id_port); + if (inb(id_port) & 0x01) + break; + } + if (id_port >= 0x200) { + /* Rare -- do we really need a warning? */ + printk(" WARNING: No I/O port available for 3c509 activation.\n"); + return -ENODEV; + } + /* Next check for all ISA bus boards by sending the ID sequence to the + ID_PORT. We find cards past the first by setting the 'current_tag' + on cards as they are found. Cards with their tag set will not + respond to subsequent ID sequences. */ + + outb(0x00, id_port); + outb(0x00, id_port); + for(i = 0; i < 255; i++) { + outb(lrs_state, id_port); + lrs_state <<= 1; + lrs_state = lrs_state & 0x100 ? lrs_state ^ 0xcf : lrs_state; + } + + /* For the first probe, clear all board's tag registers. */ + if (current_tag == 0) + outb(0xd0, id_port); + else /* Otherwise kill off already-found boards. */ + outb(0xd8, id_port); + + if (id_read_eeprom(7) != 0x6d50) { + return -ENODEV; + } + + /* Read in EEPROM data, which does contention-select. + Only the lowest address board will stay "on-line". + 3Com got the byte order backwards. */ + for (i = 0; i < 3; i++) { + phys_addr[i] = htons(id_read_eeprom(i)); + } + +#if defined(CONFIG_ISAPNP) || defined(CONFIG_ISAPNP_MODULE) + if (nopnp == 0) { + /* The ISA PnP 3c509 cards respond to the ID sequence. + This check is needed in order not to register them twice. */ + for (i = 0; i < pnp_cards; i++) { + if (phys_addr[0] == el3_isapnp_phys_addr[i][0] && + phys_addr[1] == el3_isapnp_phys_addr[i][1] && + phys_addr[2] == el3_isapnp_phys_addr[i][2]) + { + if (el3_debug > 3) + printk("3c509 with address %02x %02x %02x %02x %02x %02x was found by ISAPnP\n", + phys_addr[0] & 0xff, phys_addr[0] >> 8, + phys_addr[1] & 0xff, phys_addr[1] >> 8, + phys_addr[2] & 0xff, phys_addr[2] >> 8); + /* Set the adaptor tag so that the next card can be found. */ + outb(0xd0 + ++current_tag, id_port); + goto no_pnp; + } + } + } +#endif /* CONFIG_ISAPNP || CONFIG_ISAPNP_MODULE */ + + { + unsigned int iobase = id_read_eeprom(8); + if_port = iobase >> 14; + ioaddr = 0x200 + ((iobase & 0x1f) << 4); + } + irq = id_read_eeprom(9) >> 12; + + if (dev) { /* Set passed-in IRQ or I/O Addr. */ + if (dev->irq > 1 && dev->irq < 16) + irq = dev->irq; + + if (dev->base_addr) { + if (dev->mem_end == 0x3c509 /* Magic key */ + && dev->base_addr >= 0x200 && dev->base_addr <= 0x3e0) + ioaddr = dev->base_addr & 0x3f0; + else if (dev->base_addr != ioaddr) + return -ENODEV; + } + } + + if (!request_region(ioaddr, EL3_IO_EXTENT, "3c509")) + return -EBUSY; + + /* Set the adaptor tag so that the next card can be found. */ + outb(0xd0 + ++current_tag, id_port); + + /* Activate the adaptor at the EEPROM location. */ + outb((ioaddr >> 4) | 0xe0, id_port); + + EL3WINDOW(0); + if (inw(ioaddr) != 0x6d50) { + release_region(ioaddr, EL3_IO_EXTENT); + return -ENODEV; + } + + /* Free the interrupt so that some other card can use it. */ + outw(0x0f00, ioaddr + WN0_IRQ); + found: + if (dev == NULL) { + dev = init_etherdev(dev, sizeof(struct el3_private)); + if (dev == NULL) { + release_region(ioaddr, EL3_IO_EXTENT); + return -ENOMEM; + } + SET_MODULE_OWNER(dev); + } + memcpy(dev->dev_addr, phys_addr, sizeof(phys_addr)); + dev->base_addr = ioaddr; + dev->irq = irq; + dev->if_port = (dev->mem_start & 0x1f) ? dev->mem_start & 3 : if_port; + + { + const char *if_names[] = {"10baseT", "AUI", "undefined", "BNC"}; + printk("%s: 3c5x9 at %#3.3lx, %s port, address ", + dev->name, dev->base_addr, if_names[dev->if_port]); + } + + /* Read in the station address. */ + for (i = 0; i < 6; i++) + printk(" %2.2x", dev->dev_addr[i]); + printk(", IRQ %d.\n", dev->irq); + + /* Make up a EL3-specific-data structure. */ + if (dev->priv == NULL) + dev->priv = kmalloc(sizeof(struct el3_private), GFP_KERNEL); + if (dev->priv == NULL) + return -ENOMEM; + memset(dev->priv, 0, sizeof(struct el3_private)); + + lp = dev->priv; + lp->mca_slot = mca_slot; + lp->next_dev = el3_root_dev; + spin_lock_init(&lp->lock); + el3_root_dev = dev; + + if (el3_debug > 0) + printk(KERN_INFO "%s" KERN_INFO "%s", versionA, versionB); + + /* The EL3-specific entries in the device structure. */ + dev->open = &el3_open; + dev->hard_start_xmit = &el3_start_xmit; + dev->stop = &el3_close; + dev->get_stats = &el3_get_stats; + dev->set_multicast_list = &set_multicast_list; + dev->tx_timeout = el3_tx_timeout; + dev->watchdog_timeo = TX_TIMEOUT; + + /* Fill in the generic fields of the device structure. */ + ether_setup(dev); + return 0; +} + +/* Read a word from the EEPROM using the regular EEPROM access register. + Assume that we are in register window zero. + */ +static ushort __init read_eeprom(int ioaddr, int index) +{ + outw(EEPROM_READ + index, ioaddr + 10); + /* Pause for at least 162 us. for the read to take place. */ + udelay (500); + return inw(ioaddr + 12); +} + +/* Read a word from the EEPROM when in the ISA ID probe state. */ +static ushort __init id_read_eeprom(int index) +{ + int bit, word = 0; + + /* Issue read command, and pause for at least 162 us. for it to complete. + Assume extra-fast 16Mhz bus. */ + outb(EEPROM_READ + index, id_port); + + /* Pause for at least 162 us. for the read to take place. */ + udelay (500); + + for (bit = 15; bit >= 0; bit--) + word = (word << 1) + (inb(id_port) & 0x01); + + if (el3_debug > 3) + printk(" 3c509 EEPROM word %d %#4.4x.\n", index, word); + + return word; +} + + +static int +el3_open(struct net_device *dev) +{ + int ioaddr = dev->base_addr; + int i; + + outw(TxReset, ioaddr + EL3_CMD); + outw(RxReset, ioaddr + EL3_CMD); + outw(SetStatusEnb | 0x00, ioaddr + EL3_CMD); + + i = request_irq(dev->irq, &el3_interrupt, 0, dev->name, dev); + if (i) return i; + + EL3WINDOW(0); + if (el3_debug > 3) + printk("%s: Opening, IRQ %d status@%x %4.4x.\n", dev->name, + dev->irq, ioaddr + EL3_STATUS, inw(ioaddr + EL3_STATUS)); + + /* Activate board: this is probably unnecessary. */ + outw(0x0001, ioaddr + 4); + + /* Set the IRQ line. */ + outw((dev->irq << 12) | 0x0f00, ioaddr + WN0_IRQ); + + /* Set the station address in window 2 each time opened. */ + EL3WINDOW(2); + + for (i = 0; i < 6; i++) + outb(dev->dev_addr[i], ioaddr + i); + + if (dev->if_port == 3) + /* Start the thinnet transceiver. We should really wait 50ms...*/ + outw(StartCoax, ioaddr + EL3_CMD); + else if (dev->if_port == 0) { + /* 10baseT interface, enabled link beat and jabber check. */ + EL3WINDOW(4); + outw(inw(ioaddr + WN4_MEDIA) | MEDIA_TP, ioaddr + WN4_MEDIA); + } + + /* Switch to the stats window, and clear all stats by reading. */ + outw(StatsDisable, ioaddr + EL3_CMD); + EL3WINDOW(6); + for (i = 0; i < 9; i++) + inb(ioaddr + i); + inw(ioaddr + 10); + inw(ioaddr + 12); + + /* Switch to register set 1 for normal use. */ + EL3WINDOW(1); + + /* Accept b-case and phys addr only. */ + outw(SetRxFilter | RxStation | RxBroadcast, ioaddr + EL3_CMD); + outw(StatsEnable, ioaddr + EL3_CMD); /* Turn on statistics. */ + + netif_start_queue(dev); + + outw(RxEnable, ioaddr + EL3_CMD); /* Enable the receiver. */ + outw(TxEnable, ioaddr + EL3_CMD); /* Enable transmitter. */ + /* Allow status bits to be seen. */ + outw(SetStatusEnb | 0xff, ioaddr + EL3_CMD); + /* Ack all pending events, and set active indicator mask. */ + outw(AckIntr | IntLatch | TxAvailable | RxEarly | IntReq, + ioaddr + EL3_CMD); + outw(SetIntrEnb | IntLatch|TxAvailable|TxComplete|RxComplete|StatsFull, + ioaddr + EL3_CMD); + + if (el3_debug > 3) + printk("%s: Opened 3c509 IRQ %d status %4.4x.\n", + dev->name, dev->irq, inw(ioaddr + EL3_STATUS)); + + return 0; +} + +static void +el3_tx_timeout (struct net_device *dev) +{ + struct el3_private *lp = (struct el3_private *)dev->priv; + int ioaddr = dev->base_addr; + + /* Transmitter timeout, serious problems. */ + printk("%s: transmit timed out, Tx_status %2.2x status %4.4x " + "Tx FIFO room %d.\n", + dev->name, inb(ioaddr + TX_STATUS), inw(ioaddr + EL3_STATUS), + inw(ioaddr + TX_FREE)); + lp->stats.tx_errors++; + dev->trans_start = jiffies; + /* Issue TX_RESET and TX_START commands. */ + outw(TxReset, ioaddr + EL3_CMD); + outw(TxEnable, ioaddr + EL3_CMD); + netif_wake_queue(dev); +} + + +static int +el3_start_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct el3_private *lp = (struct el3_private *)dev->priv; + int ioaddr = dev->base_addr; + unsigned long flags; + + netif_stop_queue (dev); + + lp->stats.tx_bytes += skb->len; + + if (el3_debug > 4) { + printk("%s: el3_start_xmit(length = %u) called, status %4.4x.\n", + dev->name, skb->len, inw(ioaddr + EL3_STATUS)); + } +#if 0 +#ifndef final_version + { /* Error-checking code, delete someday. */ + ushort status = inw(ioaddr + EL3_STATUS); + if (status & 0x0001 /* IRQ line active, missed one. */ + && inw(ioaddr + EL3_STATUS) & 1) { /* Make sure. */ + printk("%s: Missed interrupt, status then %04x now %04x" + " Tx %2.2x Rx %4.4x.\n", dev->name, status, + inw(ioaddr + EL3_STATUS), inb(ioaddr + TX_STATUS), + inw(ioaddr + RX_STATUS)); + /* Fake interrupt trigger by masking, acknowledge interrupts. */ + outw(SetStatusEnb | 0x00, ioaddr + EL3_CMD); + outw(AckIntr | IntLatch | TxAvailable | RxEarly | IntReq, + ioaddr + EL3_CMD); + outw(SetStatusEnb | 0xff, ioaddr + EL3_CMD); + } + } +#endif +#endif + /* + * We lock the driver against other processors. Note + * we don't need to lock versus the IRQ as we suspended + * that. This means that we lose the ability to take + * an RX during a TX upload. That sucks a bit with SMP + * on an original 3c509 (2K buffer) + * + * Using disable_irq stops us crapping on other + * time sensitive devices. + */ + + spin_lock_irqsave(&lp->lock, flags); + + /* Put out the doubleword header... */ + outw(skb->len, ioaddr + TX_FIFO); + outw(0x00, ioaddr + TX_FIFO); + /* ... and the packet rounded to a doubleword. */ +#ifdef __powerpc__ + outsl_unswapped(ioaddr + TX_FIFO, skb->data, (skb->len + 3) >> 2); +#else + outsl(ioaddr + TX_FIFO, skb->data, (skb->len + 3) >> 2); +#endif + + dev->trans_start = jiffies; + if (inw(ioaddr + TX_FREE) > 1536) + netif_start_queue(dev); + else + /* Interrupt us when the FIFO has room for max-sized packet. */ + outw(SetTxThreshold + 1536, ioaddr + EL3_CMD); + + spin_unlock_irqrestore(&lp->lock, flags); + + dev_kfree_skb (skb); + + /* Clear the Tx status stack. */ + { + short tx_status; + int i = 4; + + while (--i > 0 && (tx_status = inb(ioaddr + TX_STATUS)) > 0) { + if (tx_status & 0x38) lp->stats.tx_aborted_errors++; + if (tx_status & 0x30) outw(TxReset, ioaddr + EL3_CMD); + if (tx_status & 0x3C) outw(TxEnable, ioaddr + EL3_CMD); + outb(0x00, ioaddr + TX_STATUS); /* Pop the status stack. */ + } + } + return 0; +} + +/* The EL3 interrupt handler. */ +static void +el3_interrupt(int irq, void *dev_id, struct pt_regs *regs) +{ + struct net_device *dev = (struct net_device *)dev_id; + struct el3_private *lp; + int ioaddr, status; + int i = max_interrupt_work; + + if (dev == NULL) { + printk ("el3_interrupt(): irq %d for unknown device.\n", irq); + return; + } + + lp = (struct el3_private *)dev->priv; + spin_lock(&lp->lock); + + ioaddr = dev->base_addr; + + if (el3_debug > 4) { + status = inw(ioaddr + EL3_STATUS); + printk("%s: interrupt, status %4.4x.\n", dev->name, status); + } + + while ((status = inw(ioaddr + EL3_STATUS)) & + (IntLatch | RxComplete | StatsFull)) { + + if (status & RxComplete) + el3_rx(dev); + + if (status & TxAvailable) { + if (el3_debug > 5) + printk(" TX room bit was handled.\n"); + /* There's room in the FIFO for a full-sized packet. */ + outw(AckIntr | TxAvailable, ioaddr + EL3_CMD); + netif_wake_queue (dev); + } + if (status & (AdapterFailure | RxEarly | StatsFull | TxComplete)) { + /* Handle all uncommon interrupts. */ + if (status & StatsFull) /* Empty statistics. */ + update_stats(dev); + if (status & RxEarly) { /* Rx early is unused. */ + el3_rx(dev); + outw(AckIntr | RxEarly, ioaddr + EL3_CMD); + } + if (status & TxComplete) { /* Really Tx error. */ + struct el3_private *lp = (struct el3_private *)dev->priv; + short tx_status; + int i = 4; + + while (--i>0 && (tx_status = inb(ioaddr + TX_STATUS)) > 0) { + if (tx_status & 0x38) lp->stats.tx_aborted_errors++; + if (tx_status & 0x30) outw(TxReset, ioaddr + EL3_CMD); + if (tx_status & 0x3C) outw(TxEnable, ioaddr + EL3_CMD); + outb(0x00, ioaddr + TX_STATUS); /* Pop the status stack. */ + } + } + if (status & AdapterFailure) { + /* Adapter failure requires Rx reset and reinit. */ + outw(RxReset, ioaddr + EL3_CMD); + /* Set the Rx filter to the current state. */ + outw(SetRxFilter | RxStation | RxBroadcast + | (dev->flags & IFF_ALLMULTI ? RxMulticast : 0) + | (dev->flags & IFF_PROMISC ? RxProm : 0), + ioaddr + EL3_CMD); + outw(RxEnable, ioaddr + EL3_CMD); /* Re-enable the receiver. */ + outw(AckIntr | AdapterFailure, ioaddr + EL3_CMD); + } + } + + if (--i < 0) { + printk("%s: Infinite loop in interrupt, status %4.4x.\n", + dev->name, status); + /* Clear all interrupts. */ + outw(AckIntr | 0xFF, ioaddr + EL3_CMD); + break; + } + /* Acknowledge the IRQ. */ + outw(AckIntr | IntReq | IntLatch, ioaddr + EL3_CMD); /* Ack IRQ */ + } + + if (el3_debug > 4) { + printk("%s: exiting interrupt, status %4.4x.\n", dev->name, + inw(ioaddr + EL3_STATUS)); + } + spin_unlock(&lp->lock); + return; +} + + +static struct net_device_stats * +el3_get_stats(struct net_device *dev) +{ + struct el3_private *lp = (struct el3_private *)dev->priv; + unsigned long flags; + + /* + * This is fast enough not to bother with disable IRQ + * stuff. + */ + + spin_lock_irqsave(&lp->lock, flags); + update_stats(dev); + spin_unlock_irqrestore(&lp->lock, flags); + return &lp->stats; +} + +/* Update statistics. We change to register window 6, so this should be run + single-threaded if the device is active. This is expected to be a rare + operation, and it's simpler for the rest of the driver to assume that + window 1 is always valid rather than use a special window-state variable. + */ +static void update_stats(struct net_device *dev) +{ + struct el3_private *lp = (struct el3_private *)dev->priv; + int ioaddr = dev->base_addr; + + if (el3_debug > 5) + printk(" Updating the statistics.\n"); + /* Turn off statistics updates while reading. */ + outw(StatsDisable, ioaddr + EL3_CMD); + /* Switch to the stats window, and read everything. */ + EL3WINDOW(6); + lp->stats.tx_carrier_errors += inb(ioaddr + 0); + lp->stats.tx_heartbeat_errors += inb(ioaddr + 1); + /* Multiple collisions. */ inb(ioaddr + 2); + lp->stats.collisions += inb(ioaddr + 3); + lp->stats.tx_window_errors += inb(ioaddr + 4); + lp->stats.rx_fifo_errors += inb(ioaddr + 5); + lp->stats.tx_packets += inb(ioaddr + 6); + /* Rx packets */ inb(ioaddr + 7); + /* Tx deferrals */ inb(ioaddr + 8); + inw(ioaddr + 10); /* Total Rx and Tx octets. */ + inw(ioaddr + 12); + + /* Back to window 1, and turn statistics back on. */ + EL3WINDOW(1); + outw(StatsEnable, ioaddr + EL3_CMD); + return; +} + +static int +el3_rx(struct net_device *dev) +{ + struct el3_private *lp = (struct el3_private *)dev->priv; + int ioaddr = dev->base_addr; + short rx_status; + + if (el3_debug > 5) + printk(" In rx_packet(), status %4.4x, rx_status %4.4x.\n", + inw(ioaddr+EL3_STATUS), inw(ioaddr+RX_STATUS)); + while ((rx_status = inw(ioaddr + RX_STATUS)) > 0) { + if (rx_status & 0x4000) { /* Error, update stats. */ + short error = rx_status & 0x3800; + + outw(RxDiscard, ioaddr + EL3_CMD); + lp->stats.rx_errors++; + switch (error) { + case 0x0000: lp->stats.rx_over_errors++; break; + case 0x0800: lp->stats.rx_length_errors++; break; + case 0x1000: lp->stats.rx_frame_errors++; break; + case 0x1800: lp->stats.rx_length_errors++; break; + case 0x2000: lp->stats.rx_frame_errors++; break; + case 0x2800: lp->stats.rx_crc_errors++; break; + } + } else { + short pkt_len = rx_status & 0x7ff; + struct sk_buff *skb; + + skb = dev_alloc_skb(pkt_len+5); + lp->stats.rx_bytes += pkt_len; + if (el3_debug > 4) + printk("Receiving packet size %d status %4.4x.\n", + pkt_len, rx_status); + if (skb != NULL) { + skb->dev = dev; + skb_reserve(skb, 2); /* Align IP on 16 byte */ + + /* 'skb->data' points to the start of sk_buff data area. */ +#ifdef __powerpc__ + insl_unswapped(ioaddr+RX_FIFO, skb_put(skb,pkt_len), + (pkt_len + 3) >> 2); +#else + insl(ioaddr + RX_FIFO, skb_put(skb,pkt_len), + (pkt_len + 3) >> 2); +#endif + + outw(RxDiscard, ioaddr + EL3_CMD); /* Pop top Rx packet. */ + skb->protocol = eth_type_trans(skb,dev); + netif_rx(skb); + dev->last_rx = jiffies; + lp->stats.rx_packets++; + continue; + } + outw(RxDiscard, ioaddr + EL3_CMD); + lp->stats.rx_dropped++; + if (el3_debug) + printk("%s: Couldn't allocate a sk_buff of size %d.\n", + dev->name, pkt_len); + } + inw(ioaddr + EL3_STATUS); /* Delay. */ + while (inw(ioaddr + EL3_STATUS) & 0x1000) + printk(KERN_DEBUG " Waiting for 3c509 to discard packet, status %x.\n", + inw(ioaddr + EL3_STATUS) ); + } + + return 0; +} + +/* + * Set or clear the multicast filter for this adaptor. + */ +static void +set_multicast_list(struct net_device *dev) +{ + unsigned long flags; + struct el3_private *lp = (struct el3_private *)dev->priv; + int ioaddr = dev->base_addr; + + if (el3_debug > 1) { + static int old; + if (old != dev->mc_count) { + old = dev->mc_count; + printk("%s: Setting Rx mode to %d addresses.\n", dev->name, dev->mc_count); + } + } + spin_lock_irqsave(&lp->lock, flags); + if (dev->flags&IFF_PROMISC) { + outw(SetRxFilter | RxStation | RxMulticast | RxBroadcast | RxProm, + ioaddr + EL3_CMD); + } + else if (dev->mc_count || (dev->flags&IFF_ALLMULTI)) { + outw(SetRxFilter | RxStation | RxMulticast | RxBroadcast, ioaddr + EL3_CMD); + } + else + outw(SetRxFilter | RxStation | RxBroadcast, ioaddr + EL3_CMD); + spin_unlock_irqrestore(&lp->lock, flags); +} + +static int +el3_close(struct net_device *dev) +{ + int ioaddr = dev->base_addr; + + if (el3_debug > 2) + printk("%s: Shutting down ethercard.\n", dev->name); + + netif_stop_queue(dev); + + /* Turn off statistics ASAP. We update lp->stats below. */ + outw(StatsDisable, ioaddr + EL3_CMD); + + /* Disable the receiver and transmitter. */ + outw(RxDisable, ioaddr + EL3_CMD); + outw(TxDisable, ioaddr + EL3_CMD); + + if (dev->if_port == 3) + /* Turn off thinnet power. Green! */ + outw(StopCoax, ioaddr + EL3_CMD); + else if (dev->if_port == 0) { + /* Disable link beat and jabber, if_port may change ere next open(). */ + EL3WINDOW(4); + outw(inw(ioaddr + WN4_MEDIA) & ~MEDIA_TP, ioaddr + WN4_MEDIA); + } + + free_irq(dev->irq, dev); + /* Switching back to window 0 disables the IRQ. */ + EL3WINDOW(0); + /* But we explicitly zero the IRQ line select anyway. */ + outw(0x0f00, ioaddr + WN0_IRQ); + + update_stats(dev); + return 0; +} + +/*#ifdef MODULE*/ +/* Parameters that may be passed into the module. */ +static int debug = -1; +static int irq[] = {-1, -1, -1, -1, -1, -1, -1, -1}; +static int xcvr[] = {-1, -1, -1, -1, -1, -1, -1, -1}; + +MODULE_PARM(debug,"i"); +MODULE_PARM(irq,"1-8i"); +MODULE_PARM(xcvr,"1-8i"); +MODULE_PARM(max_interrupt_work, "i"); +MODULE_PARM_DESC(debug, "EtherLink III debug level (0-6)"); +MODULE_PARM_DESC(irq, "EtherLink III IRQ number(s) (assigned)"); +MODULE_PARM_DESC(xcvr,"EtherLink III tranceiver(s) (0=internal, 1=external)"); +MODULE_PARM_DESC(max_interrupt_work, "EtherLink III maximum events handled per interrupt"); +#ifdef CONFIG_ISAPNP +MODULE_PARM(nopnp, "i"); +MODULE_PARM_DESC(nopnp, "EtherLink III disable ISA PnP support (0-1)"); +#endif /* CONFIG_ISAPNP */ + +int +init_module(void) +{ + int el3_cards = 0; + + if (debug >= 0) + el3_debug = debug; + + el3_root_dev = NULL; + while (el3_probe(0) == 0) { + if (irq[el3_cards] > 1) + el3_root_dev->irq = irq[el3_cards]; + if (xcvr[el3_cards] >= 0) + el3_root_dev->if_port = xcvr[el3_cards]; + el3_cards++; + } + + return el3_cards ? 0 : -ENODEV; +} + +void +cleanup_module(void) +{ + struct net_device *next_dev; + + /* No need to check MOD_IN_USE, as sys_delete_module() checks. */ + while (el3_root_dev) { + struct el3_private *lp = (struct el3_private *)el3_root_dev->priv; +#ifdef CONFIG_MCA + if(lp->mca_slot!=-1) + mca_mark_as_unused(lp->mca_slot); +#endif + next_dev = lp->next_dev; + unregister_netdev(el3_root_dev); + release_region(el3_root_dev->base_addr, EL3_IO_EXTENT); + kfree(el3_root_dev); + el3_root_dev = next_dev; + } +} +/*#endif*/ + +module_init(init_module); +module_exit(cleanup_module); diff --git a/xen-2.4.16/drivers/net/3c59x.c b/xen-2.4.16/drivers/net/3c59x.c new file mode 100644 index 0000000000..af5d1ad5ca --- /dev/null +++ b/xen-2.4.16/drivers/net/3c59x.c @@ -0,0 +1,2987 @@ +/* EtherLinkXL.c: A 3Com EtherLink PCI III/XL ethernet driver for linux. */ +/* + Written 1996-1999 by Donald Becker. + + This software may be used and distributed according to the terms + of the GNU General Public License, incorporated herein by reference. + + This driver is for the 3Com "Vortex" and "Boomerang" series ethercards. + Members of the series include Fast EtherLink 3c590/3c592/3c595/3c597 + and the EtherLink XL 3c900 and 3c905 cards. + + Problem reports and questions should be directed to + vortex@scyld.com + + The author may be reached as becker@scyld.com, or C/O + Scyld Computing Corporation + 410 Severn Ave., Suite 210 + Annapolis MD 21403 + + Linux Kernel Additions: + + 0.99H+lk0.9 - David S. Miller - softnet, PCI DMA updates + 0.99H+lk1.0 - Jeff Garzik + Remove compatibility defines for kernel versions < 2.2.x. + Update for new 2.3.x module interface + LK1.1.2 (March 19, 2000) + * New PCI interface (jgarzik) + + LK1.1.3 25 April 2000, Andrew Morton + - Merged with 3c575_cb.c + - Don't set RxComplete in boomerang interrupt enable reg + - spinlock in vortex_timer to protect mdio functions + - disable local interrupts around call to vortex_interrupt in + vortex_tx_timeout() (So vortex_interrupt can use spin_lock()) + - Select window 3 in vortex_timer()'s write to Wn3_MAC_Ctrl + - In vortex_start_xmit(), move the lock to _after_ we've altered + vp->cur_tx and vp->tx_full. This defeats the race between + vortex_start_xmit() and vortex_interrupt which was identified + by Bogdan Costescu. + - Merged back support for six new cards from various sources + - Set vortex_have_pci if pci_module_init returns zero (fixes cardbus + insertion oops) + - Tell it that 3c905C has NWAY for 100bT autoneg + - Fix handling of SetStatusEnd in 'Too much work..' code, as + per 2.3.99's 3c575_cb (Dave Hinds). + - Split ISR into two for vortex & boomerang + - Fix MOD_INC/DEC races + - Handle resource allocation failures. + - Fix 3CCFE575CT LED polarity + - Make tx_interrupt_mitigation the default + + LK1.1.4 25 April 2000, Andrew Morton + - Add extra TxReset to vortex_up() to fix 575_cb hotplug initialisation probs. + - Put vortex_info_tbl into __devinitdata + - In the vortex_error StatsFull HACK, disable stats in vp->intr_enable as well + as in the hardware. + - Increased the loop counter in issue_and_wait from 2,000 to 4,000. + + LK1.1.5 28 April 2000, andrewm + - Added powerpc defines (John Daniel said these work...) + - Some extra diagnostics + - In vortex_error(), reset the Tx on maxCollisions. Otherwise most + chips usually get a Tx timeout. + - Added extra_reset module parm + - Replaced some inline timer manip with mod_timer + (Franois romieu ) + - In vortex_up(), don't make Wn3_config initialisation dependent upon has_nway + (this came across from 3c575_cb). + + LK1.1.6 06 Jun 2000, andrewm + - Backed out the PPC defines. + - Use del_timer_sync(), mod_timer(). + - Fix wrapped ulong comparison in boomerang_rx() + - Add IS_TORNADO, use it to suppress 3c905C checksum error msg + (Donald Becker, I Lee Hetherington ) + - Replace union wn3_config with BFINS/BFEXT manipulation for + sparc64 (Pete Zaitcev, Peter Jones) + - In vortex_error, do_tx_reset and vortex_tx_timeout(Vortex): + do a netif_wake_queue() to better recover from errors. (Anders Pedersen, + Donald Becker) + - Print a warning on out-of-memory (rate limited to 1 per 10 secs) + - Added two more Cardbus 575 NICs: 5b57 and 6564 (Paul Wagland) + + LK1.1.7 2 Jul 2000 andrewm + - Better handling of shared IRQs + - Reset the transmitter on a Tx reclaim error + - Fixed crash under OOM during vortex_open() (Mark Hemment) + - Fix Rx cessation problem during OOM (help from Mark Hemment) + - The spinlocks around the mdio access were blocking interrupts for 300uS. + Fix all this to use spin_lock_bh() within mdio_read/write + - Only write to TxFreeThreshold if it's a boomerang - other NICs don't + have one. + - Added 802.3x MAC-layer flow control support + + LK1.1.8 13 Aug 2000 andrewm + - Ignore request_region() return value - already reserved if Cardbus. + - Merged some additional Cardbus flags from Don's 0.99Qk + - Some fixes for 3c556 (Fred Maciel) + - Fix for EISA initialisation (Jan Rekorajski) + - Renamed MII_XCVR_PWR and EEPROM_230 to align with 3c575_cb and D. Becker's drivers + - Fixed MII_XCVR_PWR for 3CCFE575CT + - Added INVERT_LED_PWR, used it. + - Backed out the extra_reset stuff + + LK1.1.9 12 Sep 2000 andrewm + - Backed out the tx_reset_resume flags. It was a no-op. + - In vortex_error, don't reset the Tx on txReclaim errors + - In vortex_error, don't reset the Tx on maxCollisions errors. + Hence backed out all the DownListPtr logic here. + - In vortex_error, give Tornado cards a partial TxReset on + maxCollisions (David Hinds). Defined MAX_COLLISION_RESET for this. + - Redid some driver flags and device names based on pcmcia_cs-3.1.20. + - Fixed a bug where, if vp->tx_full is set when the interface + is downed, it remains set when the interface is upped. Bad + things happen. + + LK1.1.10 17 Sep 2000 andrewm + - Added EEPROM_8BIT for 3c555 (Fred Maciel) + - Added experimental support for the 3c556B Laptop Hurricane (Louis Gerbarg) + - Add HAS_NWAY to "3c900 Cyclone 10Mbps TPO" + + LK1.1.11 13 Nov 2000 andrewm + - Dump MOD_INC/DEC_USE_COUNT, use SET_MODULE_OWNER + + LK1.1.12 1 Jan 2001 andrewm (2.4.0-pre1) + - Call pci_enable_device before we request our IRQ (Tobias Ringstrom) + - Add 3c590 PCI latency timer hack to vortex_probe1 (from 0.99Ra) + - Added extended issue_and_wait for the 3c905CX. + - Look for an MII on PHY index 24 first (3c905CX oddity). + - Add HAS_NWAY to 3cSOHO100-TX (Brett Frankenberger) + - Don't free skbs we don't own on oom path in vortex_open(). + + LK1.1.13 27 Jan 2001 + - Added explicit `medialock' flag so we can truly + lock the media type down with `options'. + - "check ioremap return and some tidbits" (Arnaldo Carvalho de Melo ) + - Added and used EEPROM_NORESET for 3c556B PM resumes. + - Fixed leakage of vp->rx_ring. + - Break out separate HAS_HWCKSM device capability flag. + - Kill vp->tx_full (ANK) + - Merge zerocopy fragment handling (ANK?) + + LK1.1.14 15 Feb 2001 + - Enable WOL. Can be turned on with `enable_wol' module option. + - EISA and PCI initialisation fixes (jgarzik, Manfred Spraul) + - If a device's internalconfig register reports it has NWAY, + use it, even if autoselect is enabled. + + LK1.1.15 6 June 2001 akpm + - Prevent double counting of received bytes (Lars Christensen) + - Add ethtool support (jgarzik) + - Add module parm descriptions (Andrzej M. Krzysztofowicz) + - Implemented alloc_etherdev() API + - Special-case the 'Tx error 82' message. + + LK1.1.16 18 July 2001 akpm + - Make NETIF_F_SG dependent upon nr_free_highpages(), not on CONFIG_HIGHMEM + - Lessen verbosity of bootup messages + - Fix WOL - use new PM API functions. + - Use netif_running() instead of vp->open in suspend/resume. + - Don't reset the interface logic on open/close/rmmod. It upsets + autonegotiation, and hence DHCP (from 0.99T). + - Back out EEPROM_NORESET flag because of the above (we do it for all + NICs). + - Correct 3c982 identification string + - Rename wait_for_completion() to issue_and_wait() to avoid completion.h + clash. + + - See http://www.uow.edu.au/~andrewm/linux/#3c59x-2.3 for more details. + - Also see Documentation/networking/vortex.txt +*/ + +/* + * FIXME: This driver _could_ support MTU changing, but doesn't. See Don's hamachi.c implementation + * as well as other drivers + * + * NOTE: If you make 'vortex_debug' a constant (#define vortex_debug 0) the driver shrinks by 2k + * due to dead code elimination. There will be some performance benefits from this due to + * elimination of all the tests and reduced cache footprint. + */ + + +#define DRV_NAME "3c59x" +#define DRV_VERSION "LK1.1.16" +#define DRV_RELDATE "19 July 2001" + + +/* "Knobs" that adjust features and parameters. */ +/* Set the copy breakpoint for the copy-only-tiny-frames scheme. + Setting to > 1512 effectively disables this feature. */ +#ifndef __arm__ +static const int rx_copybreak = 200; +#else +/* ARM systems perform better by disregarding the bus-master + transfer capability of these cards. -- rmk */ +static const int rx_copybreak = 1513; +#endif +/* Allow setting MTU to a larger size, bypassing the normal ethernet setup. */ +static const int mtu = 1500; +/* Maximum events (Rx packets, etc.) to handle at each interrupt. */ +static int max_interrupt_work = 32; +/* Tx timeout interval (millisecs) */ +static int watchdog = 5000; + +/* Allow aggregation of Tx interrupts. Saves CPU load at the cost + * of possible Tx stalls if the system is blocking interrupts + * somewhere else. Undefine this to disable. + */ +#define tx_interrupt_mitigation 1 + +/* Put out somewhat more debugging messages. (0: no msg, 1 minimal .. 6). */ +#define vortex_debug debug +#ifdef VORTEX_DEBUG +static int vortex_debug = VORTEX_DEBUG; +#else +static int vortex_debug = 1; +#endif + +#ifndef __OPTIMIZE__ +#error You must compile this file with the correct options! +#error See the last lines of the source file. +#error You must compile this driver with "-O". +#endif + +#include +#include +#include +//#include +#include +//#include +#include +#include +//#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +//#include +#include /* For NR_IRQS only. */ +#include +#include +#include + +/* A few values that may be tweaked. */ +/* Keep the ring sizes a power of two for efficiency. */ +#undef TX_RING_SIZE +#undef RX_RING_SIZE +#define TX_RING_SIZE 16 +#define RX_RING_SIZE 32 +#define PKT_BUF_SZ 1536 /* Size of each temporary Rx buffer.*/ + +/* Kernel compatibility defines, some common to David Hinds' PCMCIA package. + This is only in the support-all-kernels source code. */ + +#define RUN_AT(x) (jiffies + (x)) + +#include + + +static char version[] __devinitdata = +DRV_NAME ": Donald Becker and others. www.scyld.com/network/vortex.html\n"; + +MODULE_AUTHOR("Donald Becker "); +MODULE_DESCRIPTION("3Com 3c59x/3c9xx ethernet driver " + DRV_VERSION " " DRV_RELDATE); +MODULE_LICENSE("GPL"); + +MODULE_PARM(debug, "i"); +MODULE_PARM(options, "1-" __MODULE_STRING(8) "i"); +MODULE_PARM(full_duplex, "1-" __MODULE_STRING(8) "i"); +MODULE_PARM(hw_checksums, "1-" __MODULE_STRING(8) "i"); +MODULE_PARM(flow_ctrl, "1-" __MODULE_STRING(8) "i"); +MODULE_PARM(enable_wol, "1-" __MODULE_STRING(8) "i"); +MODULE_PARM(rx_copybreak, "i"); +MODULE_PARM(max_interrupt_work, "i"); +MODULE_PARM(compaq_ioaddr, "i"); +MODULE_PARM(compaq_irq, "i"); +MODULE_PARM(compaq_device_id, "i"); +MODULE_PARM(watchdog, "i"); +MODULE_PARM_DESC(debug, "3c59x debug level (0-6)"); +MODULE_PARM_DESC(options, "3c59x: Bits 0-3: media type, bit 4: bus mastering, bit 9: full duplex"); +MODULE_PARM_DESC(full_duplex, "3c59x full duplex setting(s) (1)"); +MODULE_PARM_DESC(hw_checksums, "3c59x Hardware checksum checking by adapter(s) (0-1)"); +MODULE_PARM_DESC(flow_ctrl, "3c59x 802.3x flow control usage (PAUSE only) (0-1)"); +MODULE_PARM_DESC(enable_wol, "3c59x: Turn on Wake-on-LAN for adapter(s) (0-1)"); +MODULE_PARM_DESC(rx_copybreak, "3c59x copy breakpoint for copy-only-tiny-frames"); +MODULE_PARM_DESC(max_interrupt_work, "3c59x maximum events handled per interrupt"); +MODULE_PARM_DESC(compaq_ioaddr, "3c59x PCI I/O base address (Compaq BIOS problem workaround)"); +MODULE_PARM_DESC(compaq_irq, "3c59x PCI IRQ number (Compaq BIOS problem workaround)"); +MODULE_PARM_DESC(compaq_device_id, "3c59x PCI device ID (Compaq BIOS problem workaround)"); +MODULE_PARM_DESC(watchdog, "3c59x transmit timeout in milliseconds"); + +/* Operational parameter that usually are not changed. */ + +/* The Vortex size is twice that of the original EtherLinkIII series: the + runtime register window, window 1, is now always mapped in. + The Boomerang size is twice as large as the Vortex -- it has additional + bus master control registers. */ +#define VORTEX_TOTAL_SIZE 0x20 +#define BOOMERANG_TOTAL_SIZE 0x40 + +/* Set iff a MII transceiver on any interface requires mdio preamble. + This only set with the original DP83840 on older 3c905 boards, so the extra + code size of a per-interface flag is not worthwhile. */ +static char mii_preamble_required; + +#define PFX DRV_NAME ": " + + + +/* + Theory of Operation + +I. Board Compatibility + +This device driver is designed for the 3Com FastEtherLink and FastEtherLink +XL, 3Com's PCI to 10/100baseT adapters. It also works with the 10Mbs +versions of the FastEtherLink cards. The supported product IDs are + 3c590, 3c592, 3c595, 3c597, 3c900, 3c905 + +The related ISA 3c515 is supported with a separate driver, 3c515.c, included +with the kernel source or available from + cesdis.gsfc.nasa.gov:/pub/linux/drivers/3c515.html + +II. Board-specific settings + +PCI bus devices are configured by the system at boot time, so no jumpers +need to be set on the board. The system BIOS should be set to assign the +PCI INTA signal to an otherwise unused system IRQ line. + +The EEPROM settings for media type and forced-full-duplex are observed. +The EEPROM media type should be left at the default "autoselect" unless using +10base2 or AUI connections which cannot be reliably detected. + +III. Driver operation + +The 3c59x series use an interface that's very similar to the previous 3c5x9 +series. The primary interface is two programmed-I/O FIFOs, with an +alternate single-contiguous-region bus-master transfer (see next). + +The 3c900 "Boomerang" series uses a full-bus-master interface with separate +lists of transmit and receive descriptors, similar to the AMD LANCE/PCnet, +DEC Tulip and Intel Speedo3. The first chip version retains a compatible +programmed-I/O interface that has been removed in 'B' and subsequent board +revisions. + +One extension that is advertised in a very large font is that the adapters +are capable of being bus masters. On the Vortex chip this capability was +only for a single contiguous region making it far less useful than the full +bus master capability. There is a significant performance impact of taking +an extra interrupt or polling for the completion of each transfer, as well +as difficulty sharing the single transfer engine between the transmit and +receive threads. Using DMA transfers is a win only with large blocks or +with the flawed versions of the Intel Orion motherboard PCI controller. + +The Boomerang chip's full-bus-master interface is useful, and has the +currently-unused advantages over other similar chips that queued transmit +packets may be reordered and receive buffer groups are associated with a +single frame. + +With full-bus-master support, this driver uses a "RX_COPYBREAK" scheme. +Rather than a fixed intermediate receive buffer, this scheme allocates +full-sized skbuffs as receive buffers. The value RX_COPYBREAK is used as +the copying breakpoint: it is chosen to trade-off the memory wasted by +passing the full-sized skbuff to the queue layer for all frames vs. the +copying cost of copying a frame to a correctly-sized skbuff. + +IIIC. Synchronization +The driver runs as two independent, single-threaded flows of control. One +is the send-packet routine, which enforces single-threaded use by the +dev->tbusy flag. The other thread is the interrupt handler, which is single +threaded by the hardware and other software. + +IV. Notes + +Thanks to Cameron Spitzer and Terry Murphy of 3Com for providing development +3c590, 3c595, and 3c900 boards. +The name "Vortex" is the internal 3Com project name for the PCI ASIC, and +the EISA version is called "Demon". According to Terry these names come +from rides at the local amusement park. + +The new chips support both ethernet (1.5K) and FDDI (4.5K) packet sizes! +This driver only supports ethernet packets because of the skbuff allocation +limit of 4K. +*/ + +/* This table drives the PCI probe routines. It's mostly boilerplate in all + of the drivers, and will likely be provided by some future kernel. +*/ +enum pci_flags_bit { + PCI_USES_IO=1, PCI_USES_MEM=2, PCI_USES_MASTER=4, + PCI_ADDR0=0x10<<0, PCI_ADDR1=0x10<<1, PCI_ADDR2=0x10<<2, PCI_ADDR3=0x10<<3, +}; + +enum { IS_VORTEX=1, IS_BOOMERANG=2, IS_CYCLONE=4, IS_TORNADO=8, + EEPROM_8BIT=0x10, /* AKPM: Uses 0x230 as the base bitmaps for EEPROM reads */ + HAS_PWR_CTRL=0x20, HAS_MII=0x40, HAS_NWAY=0x80, HAS_CB_FNS=0x100, + INVERT_MII_PWR=0x200, INVERT_LED_PWR=0x400, MAX_COLLISION_RESET=0x800, + EEPROM_OFFSET=0x1000, HAS_HWCKSM=0x2000 }; + +enum vortex_chips { + CH_3C590 = 0, + CH_3C592, + CH_3C597, + CH_3C595_1, + CH_3C595_2, + + CH_3C595_3, + CH_3C900_1, + CH_3C900_2, + CH_3C900_3, + CH_3C900_4, + + CH_3C900_5, + CH_3C900B_FL, + CH_3C905_1, + CH_3C905_2, + CH_3C905B_1, + + CH_3C905B_2, + CH_3C905B_FX, + CH_3C905C, + CH_3C980, + CH_3C9805, + + CH_3CSOHO100_TX, + CH_3C555, + CH_3C556, + CH_3C556B, + CH_3C575, + + CH_3C575_1, + CH_3CCFE575, + CH_3CCFE575CT, + CH_3CCFE656, + CH_3CCFEM656, + + CH_3CCFEM656_1, + CH_3C450, +}; + + +/* note: this array directly indexed by above enums, and MUST + * be kept in sync with both the enums above, and the PCI device + * table below + */ +static struct vortex_chip_info { + const char *name; + int flags; + int drv_flags; + int io_size; +} vortex_info_tbl[] __devinitdata = { +#define EISA_TBL_OFFSET 0 /* Offset of this entry for vortex_eisa_init */ + {"3c590 Vortex 10Mbps", + PCI_USES_IO|PCI_USES_MASTER, IS_VORTEX, 32, }, + {"3c592 EISA 10Mbps Demon/Vortex", /* AKPM: from Don's 3c59x_cb.c 0.49H */ + PCI_USES_IO|PCI_USES_MASTER, IS_VORTEX, 32, }, + {"3c597 EISA Fast Demon/Vortex", /* AKPM: from Don's 3c59x_cb.c 0.49H */ + PCI_USES_IO|PCI_USES_MASTER, IS_VORTEX, 32, }, + {"3c595 Vortex 100baseTx", + PCI_USES_IO|PCI_USES_MASTER, IS_VORTEX, 32, }, + {"3c595 Vortex 100baseT4", + PCI_USES_IO|PCI_USES_MASTER, IS_VORTEX, 32, }, + + {"3c595 Vortex 100base-MII", + PCI_USES_IO|PCI_USES_MASTER, IS_VORTEX, 32, }, + {"3c900 Boomerang 10baseT", + PCI_USES_IO|PCI_USES_MASTER, IS_BOOMERANG, 64, }, + {"3c900 Boomerang 10Mbps Combo", + PCI_USES_IO|PCI_USES_MASTER, IS_BOOMERANG, 64, }, + {"3c900 Cyclone 10Mbps TPO", /* AKPM: from Don's 0.99M */ + PCI_USES_IO|PCI_USES_MASTER, IS_CYCLONE|HAS_NWAY|HAS_HWCKSM, 128, }, + {"3c900 Cyclone 10Mbps Combo", + PCI_USES_IO|PCI_USES_MASTER, IS_CYCLONE|HAS_HWCKSM, 128, }, + + {"3c900 Cyclone 10Mbps TPC", /* AKPM: from Don's 0.99M */ + PCI_USES_IO|PCI_USES_MASTER, IS_CYCLONE|HAS_HWCKSM, 128, }, + {"3c900B-FL Cyclone 10base-FL", + PCI_USES_IO|PCI_USES_MASTER, IS_CYCLONE|HAS_HWCKSM, 128, }, + {"3c905 Boomerang 100baseTx", + PCI_USES_IO|PCI_USES_MASTER, IS_BOOMERANG|HAS_MII, 64, }, + {"3c905 Boomerang 100baseT4", + PCI_USES_IO|PCI_USES_MASTER, IS_BOOMERANG|HAS_MII, 64, }, + {"3c905B Cyclone 100baseTx", + PCI_USES_IO|PCI_USES_MASTER, IS_CYCLONE|HAS_NWAY|HAS_HWCKSM, 128, }, + + {"3c905B Cyclone 10/100/BNC", + PCI_USES_IO|PCI_USES_MASTER, IS_CYCLONE|HAS_NWAY|HAS_HWCKSM, 128, }, + {"3c905B-FX Cyclone 100baseFx", + PCI_USES_IO|PCI_USES_MASTER, IS_CYCLONE|HAS_HWCKSM, 128, }, + {"3c905C Tornado", + PCI_USES_IO|PCI_USES_MASTER, IS_TORNADO|HAS_NWAY|HAS_HWCKSM, 128, }, + {"3c980 Cyclone", + PCI_USES_IO|PCI_USES_MASTER, IS_CYCLONE|HAS_HWCKSM, 128, }, + {"3c982 Dual Port Server Cyclone", + PCI_USES_IO|PCI_USES_MASTER, IS_CYCLONE|HAS_HWCKSM, 128, }, + + {"3cSOHO100-TX Hurricane", + PCI_USES_IO|PCI_USES_MASTER, IS_CYCLONE|HAS_NWAY|HAS_HWCKSM, 128, }, + {"3c555 Laptop Hurricane", + PCI_USES_IO|PCI_USES_MASTER, IS_CYCLONE|EEPROM_8BIT|HAS_HWCKSM, 128, }, + {"3c556 Laptop Tornado", + PCI_USES_IO|PCI_USES_MASTER, IS_TORNADO|HAS_NWAY|EEPROM_8BIT|HAS_CB_FNS|INVERT_MII_PWR| + HAS_HWCKSM, 128, }, + {"3c556B Laptop Hurricane", + PCI_USES_IO|PCI_USES_MASTER, IS_TORNADO|HAS_NWAY|EEPROM_OFFSET|HAS_CB_FNS|INVERT_MII_PWR| + HAS_HWCKSM, 128, }, + {"3c575 [Megahertz] 10/100 LAN CardBus", + PCI_USES_IO|PCI_USES_MASTER, IS_BOOMERANG|HAS_MII|EEPROM_8BIT, 128, }, + + {"3c575 Boomerang CardBus", + PCI_USES_IO|PCI_USES_MASTER, IS_BOOMERANG|HAS_MII|EEPROM_8BIT, 128, }, + {"3CCFE575BT Cyclone CardBus", + PCI_USES_IO|PCI_USES_MASTER, IS_CYCLONE|HAS_NWAY|HAS_CB_FNS|EEPROM_8BIT| + INVERT_LED_PWR|HAS_HWCKSM, 128, }, + {"3CCFE575CT Tornado CardBus", + PCI_USES_IO|PCI_USES_MASTER, IS_TORNADO|HAS_NWAY|HAS_CB_FNS|EEPROM_8BIT|INVERT_MII_PWR| + MAX_COLLISION_RESET|HAS_HWCKSM, 128, }, + {"3CCFE656 Cyclone CardBus", + PCI_USES_IO|PCI_USES_MASTER, IS_CYCLONE|HAS_NWAY|HAS_CB_FNS|EEPROM_8BIT|INVERT_MII_PWR| + INVERT_LED_PWR|HAS_HWCKSM, 128, }, + {"3CCFEM656B Cyclone+Winmodem CardBus", + PCI_USES_IO|PCI_USES_MASTER, IS_CYCLONE|HAS_NWAY|HAS_CB_FNS|EEPROM_8BIT|INVERT_MII_PWR| + INVERT_LED_PWR|HAS_HWCKSM, 128, }, + + {"3CXFEM656C Tornado+Winmodem CardBus", /* From pcmcia-cs-3.1.5 */ + PCI_USES_IO|PCI_USES_MASTER, IS_TORNADO|HAS_NWAY|HAS_CB_FNS|EEPROM_8BIT|INVERT_MII_PWR| + MAX_COLLISION_RESET|HAS_HWCKSM, 128, }, + {"3c450 HomePNA Tornado", /* AKPM: from Don's 0.99Q */ + PCI_USES_IO|PCI_USES_MASTER, IS_TORNADO|HAS_NWAY|HAS_HWCKSM, 128, }, + {0,}, /* 0 terminated list. */ +}; + + +static struct pci_device_id vortex_pci_tbl[] __devinitdata = { + { 0x10B7, 0x5900, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C590 }, + { 0x10B7, 0x5920, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C592 }, + { 0x10B7, 0x5970, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C597 }, + { 0x10B7, 0x5950, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C595_1 }, + { 0x10B7, 0x5951, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C595_2 }, + + { 0x10B7, 0x5952, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C595_3 }, + { 0x10B7, 0x9000, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C900_1 }, + { 0x10B7, 0x9001, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C900_2 }, + { 0x10B7, 0x9004, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C900_3 }, + { 0x10B7, 0x9005, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C900_4 }, + + { 0x10B7, 0x9006, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C900_5 }, + { 0x10B7, 0x900A, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C900B_FL }, + { 0x10B7, 0x9050, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C905_1 }, + { 0x10B7, 0x9051, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C905_2 }, + { 0x10B7, 0x9055, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C905B_1 }, + + { 0x10B7, 0x9058, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C905B_2 }, + { 0x10B7, 0x905A, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C905B_FX }, + { 0x10B7, 0x9200, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C905C }, + { 0x10B7, 0x9800, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C980 }, + { 0x10B7, 0x9805, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C9805 }, + + { 0x10B7, 0x7646, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3CSOHO100_TX }, + { 0x10B7, 0x5055, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C555 }, + { 0x10B7, 0x6055, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C556 }, + { 0x10B7, 0x6056, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C556B }, + { 0x10B7, 0x5b57, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C575 }, + + { 0x10B7, 0x5057, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C575_1 }, + { 0x10B7, 0x5157, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3CCFE575 }, + { 0x10B7, 0x5257, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3CCFE575CT }, + { 0x10B7, 0x6560, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3CCFE656 }, + { 0x10B7, 0x6562, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3CCFEM656 }, + + { 0x10B7, 0x6564, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3CCFEM656_1 }, + { 0x10B7, 0x4500, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_3C450 }, + {0,} /* 0 terminated list. */ +}; +MODULE_DEVICE_TABLE(pci, vortex_pci_tbl); + + +/* Operational definitions. + These are not used by other compilation units and thus are not + exported in a ".h" file. + + First the windows. There are eight register windows, with the command + and status registers available in each. + */ +#define EL3WINDOW(win_num) outw(SelectWindow + (win_num), ioaddr + EL3_CMD) +#define EL3_CMD 0x0e +#define EL3_STATUS 0x0e + +/* The top five bits written to EL3_CMD are a command, the lower + 11 bits are the parameter, if applicable. + Note that 11 parameters bits was fine for ethernet, but the new chip + can handle FDDI length frames (~4500 octets) and now parameters count + 32-bit 'Dwords' rather than octets. */ + +enum vortex_cmd { + TotalReset = 0<<11, SelectWindow = 1<<11, StartCoax = 2<<11, + RxDisable = 3<<11, RxEnable = 4<<11, RxReset = 5<<11, + UpStall = 6<<11, UpUnstall = (6<<11)+1, + DownStall = (6<<11)+2, DownUnstall = (6<<11)+3, + RxDiscard = 8<<11, TxEnable = 9<<11, TxDisable = 10<<11, TxReset = 11<<11, + FakeIntr = 12<<11, AckIntr = 13<<11, SetIntrEnb = 14<<11, + SetStatusEnb = 15<<11, SetRxFilter = 16<<11, SetRxThreshold = 17<<11, + SetTxThreshold = 18<<11, SetTxStart = 19<<11, + StartDMAUp = 20<<11, StartDMADown = (20<<11)+1, StatsEnable = 21<<11, + StatsDisable = 22<<11, StopCoax = 23<<11, SetFilterBit = 25<<11,}; + +/* The SetRxFilter command accepts the following classes: */ +enum RxFilter { + RxStation = 1, RxMulticast = 2, RxBroadcast = 4, RxProm = 8 }; + +/* Bits in the general status register. */ +enum vortex_status { + IntLatch = 0x0001, HostError = 0x0002, TxComplete = 0x0004, + TxAvailable = 0x0008, RxComplete = 0x0010, RxEarly = 0x0020, + IntReq = 0x0040, StatsFull = 0x0080, + DMADone = 1<<8, DownComplete = 1<<9, UpComplete = 1<<10, + DMAInProgress = 1<<11, /* DMA controller is still busy.*/ + CmdInProgress = 1<<12, /* EL3_CMD is still busy.*/ +}; + +/* Register window 1 offsets, the window used in normal operation. + On the Vortex this window is always mapped at offsets 0x10-0x1f. */ +enum Window1 { + TX_FIFO = 0x10, RX_FIFO = 0x10, RxErrors = 0x14, + RxStatus = 0x18, Timer=0x1A, TxStatus = 0x1B, + TxFree = 0x1C, /* Remaining free bytes in Tx buffer. */ +}; +enum Window0 { + Wn0EepromCmd = 10, /* Window 0: EEPROM command register. */ + Wn0EepromData = 12, /* Window 0: EEPROM results register. */ + IntrStatus=0x0E, /* Valid in all windows. */ +}; +enum Win0_EEPROM_bits { + EEPROM_Read = 0x80, EEPROM_WRITE = 0x40, EEPROM_ERASE = 0xC0, + EEPROM_EWENB = 0x30, /* Enable erasing/writing for 10 msec. */ + EEPROM_EWDIS = 0x00, /* Disable EWENB before 10 msec timeout. */ +}; +/* EEPROM locations. */ +enum eeprom_offset { + PhysAddr01=0, PhysAddr23=1, PhysAddr45=2, ModelID=3, + EtherLink3ID=7, IFXcvrIO=8, IRQLine=9, + NodeAddr01=10, NodeAddr23=11, NodeAddr45=12, + DriverTune=13, Checksum=15}; + +enum Window2 { /* Window 2. */ + Wn2_ResetOptions=12, +}; +enum Window3 { /* Window 3: MAC/config bits. */ + Wn3_Config=0, Wn3_MAC_Ctrl=6, Wn3_Options=8, +}; + +#define BFEXT(value, offset, bitcount) \ + ((((unsigned long)(value)) >> (offset)) & ((1 << (bitcount)) - 1)) + +#define BFINS(lhs, rhs, offset, bitcount) \ + (((lhs) & ~((((1 << (bitcount)) - 1)) << (offset))) | \ + (((rhs) & ((1 << (bitcount)) - 1)) << (offset))) + +#define RAM_SIZE(v) BFEXT(v, 0, 3) +#define RAM_WIDTH(v) BFEXT(v, 3, 1) +#define RAM_SPEED(v) BFEXT(v, 4, 2) +#define ROM_SIZE(v) BFEXT(v, 6, 2) +#define RAM_SPLIT(v) BFEXT(v, 16, 2) +#define XCVR(v) BFEXT(v, 20, 4) +#define AUTOSELECT(v) BFEXT(v, 24, 1) + +enum Window4 { /* Window 4: Xcvr/media bits. */ + Wn4_FIFODiag = 4, Wn4_NetDiag = 6, Wn4_PhysicalMgmt=8, Wn4_Media = 10, +}; +enum Win4_Media_bits { + Media_SQE = 0x0008, /* Enable SQE error counting for AUI. */ + Media_10TP = 0x00C0, /* Enable link beat and jabber for 10baseT. */ + Media_Lnk = 0x0080, /* Enable just link beat for 100TX/100FX. */ + Media_LnkBeat = 0x0800, +}; +enum Window7 { /* Window 7: Bus Master control. */ + Wn7_MasterAddr = 0, Wn7_MasterLen = 6, Wn7_MasterStatus = 12, +}; +/* Boomerang bus master control registers. */ +enum MasterCtrl { + PktStatus = 0x20, DownListPtr = 0x24, FragAddr = 0x28, FragLen = 0x2c, + TxFreeThreshold = 0x2f, UpPktStatus = 0x30, UpListPtr = 0x38, +}; + +/* The Rx and Tx descriptor lists. + Caution Alpha hackers: these types are 32 bits! Note also the 8 byte + alignment contraint on tx_ring[] and rx_ring[]. */ +#define LAST_FRAG 0x80000000 /* Last Addr/Len pair in descriptor. */ +#define DN_COMPLETE 0x00010000 /* This packet has been downloaded */ +struct boom_rx_desc { + u32 next; /* Last entry points to 0. */ + s32 status; + u32 addr; /* Up to 63 addr/len pairs possible. */ + s32 length; /* Set LAST_FRAG to indicate last pair. */ +}; +/* Values for the Rx status entry. */ +enum rx_desc_status { + RxDComplete=0x00008000, RxDError=0x4000, + /* See boomerang_rx() for actual error bits */ + IPChksumErr=1<<25, TCPChksumErr=1<<26, UDPChksumErr=1<<27, + IPChksumValid=1<<29, TCPChksumValid=1<<30, UDPChksumValid=1<<31, +}; + +#ifdef MAX_SKB_FRAGS +#define DO_ZEROCOPY 1 +#else +#define DO_ZEROCOPY 0 +#endif + +struct boom_tx_desc { + u32 next; /* Last entry points to 0. */ + s32 status; /* bits 0:12 length, others see below. */ +#if DO_ZEROCOPY + struct { + u32 addr; + s32 length; + } frag[1+MAX_SKB_FRAGS]; +#else + u32 addr; + s32 length; +#endif +}; + +/* Values for the Tx status entry. */ +enum tx_desc_status { + CRCDisable=0x2000, TxDComplete=0x8000, + AddIPChksum=0x02000000, AddTCPChksum=0x04000000, AddUDPChksum=0x08000000, + TxIntrUploaded=0x80000000, /* IRQ when in FIFO, but maybe not sent. */ +}; + +/* Chip features we care about in vp->capabilities, read from the EEPROM. */ +enum ChipCaps { CapBusMaster=0x20, CapPwrMgmt=0x2000 }; + +struct vortex_private { + /* The Rx and Tx rings should be quad-word-aligned. */ + struct boom_rx_desc* rx_ring; + struct boom_tx_desc* tx_ring; + dma_addr_t rx_ring_dma; + dma_addr_t tx_ring_dma; + /* The addresses of transmit- and receive-in-place skbuffs. */ + struct sk_buff* rx_skbuff[RX_RING_SIZE]; + struct sk_buff* tx_skbuff[TX_RING_SIZE]; + struct net_device *next_module; /* NULL if PCI device */ + unsigned int cur_rx, cur_tx; /* The next free ring entry */ + unsigned int dirty_rx, dirty_tx; /* The ring entries to be free()ed. */ + struct net_device_stats stats; + struct sk_buff *tx_skb; /* Packet being eaten by bus master ctrl. */ + dma_addr_t tx_skb_dma; /* Allocated DMA address for bus master ctrl DMA. */ + + /* PCI configuration space information. */ + struct pci_dev *pdev; + char *cb_fn_base; /* CardBus function status addr space. */ + + /* Some values here only for performance evaluation and path-coverage */ + int rx_nocopy, rx_copy, queued_packet, rx_csumhits; + int card_idx; + + /* The remainder are related to chip state, mostly media selection. */ + struct timer_list timer; /* Media selection timer. */ + struct timer_list rx_oom_timer; /* Rx skb allocation retry timer */ + int options; /* User-settable misc. driver options. */ + unsigned int media_override:4, /* Passed-in media type. */ + default_media:4, /* Read from the EEPROM/Wn3_Config. */ + full_duplex:1, force_fd:1, autoselect:1, + bus_master:1, /* Vortex can only do a fragment bus-m. */ + full_bus_master_tx:1, full_bus_master_rx:2, /* Boomerang */ + flow_ctrl:1, /* Use 802.3x flow control (PAUSE only) */ + partner_flow_ctrl:1, /* Partner supports flow control */ + has_nway:1, + enable_wol:1, /* Wake-on-LAN is enabled */ + pm_state_valid:1, /* power_state[] has sane contents */ + open:1, + medialock:1, + must_free_region:1; /* Flag: if zero, Cardbus owns the I/O region */ + int drv_flags; + u16 status_enable; + u16 intr_enable; + u16 available_media; /* From Wn3_Options. */ + u16 capabilities, info1, info2; /* Various, from EEPROM. */ + u16 advertising; /* NWay media advertisement */ + unsigned char phys[2]; /* MII device addresses. */ + u16 deferred; /* Resend these interrupts when we + * bale from the ISR */ + u16 io_size; /* Size of PCI region (for release_region) */ + spinlock_t lock; /* Serialise access to device & its vortex_private */ + spinlock_t mdio_lock; /* Serialise access to mdio hardware */ + u32 power_state[16]; +}; + +/* The action to take with a media selection timer tick. + Note that we deviate from the 3Com order by checking 10base2 before AUI. + */ +enum xcvr_types { + XCVR_10baseT=0, XCVR_AUI, XCVR_10baseTOnly, XCVR_10base2, XCVR_100baseTx, + XCVR_100baseFx, XCVR_MII=6, XCVR_NWAY=8, XCVR_ExtMII=9, XCVR_Default=10, +}; + +static struct media_table { + char *name; + unsigned int media_bits:16, /* Bits to set in Wn4_Media register. */ + mask:8, /* The transceiver-present bit in Wn3_Config.*/ + next:8; /* The media type to try next. */ + int wait; /* Time before we check media status. */ +} media_tbl[] = { + { "10baseT", Media_10TP,0x08, XCVR_10base2, (14*HZ)/10}, + { "10Mbs AUI", Media_SQE, 0x20, XCVR_Default, (1*HZ)/10}, + { "undefined", 0, 0x80, XCVR_10baseT, 10000}, + { "10base2", 0, 0x10, XCVR_AUI, (1*HZ)/10}, + { "100baseTX", Media_Lnk, 0x02, XCVR_100baseFx, (14*HZ)/10}, + { "100baseFX", Media_Lnk, 0x04, XCVR_MII, (14*HZ)/10}, + { "MII", 0, 0x41, XCVR_10baseT, 3*HZ }, + { "undefined", 0, 0x01, XCVR_10baseT, 10000}, + { "Autonegotiate", 0, 0x41, XCVR_10baseT, 3*HZ}, + { "MII-External", 0, 0x41, XCVR_10baseT, 3*HZ }, + { "Default", 0, 0xFF, XCVR_10baseT, 10000}, +}; + +static int vortex_probe1(struct pci_dev *pdev, long ioaddr, int irq, + int chip_idx, int card_idx); +static void vortex_up(struct net_device *dev); +static void vortex_down(struct net_device *dev); +static int vortex_open(struct net_device *dev); +static void mdio_sync(long ioaddr, int bits); +static int mdio_read(struct net_device *dev, int phy_id, int location); +static void mdio_write(struct net_device *vp, int phy_id, int location, int value); +static void vortex_timer(unsigned long arg); +static void rx_oom_timer(unsigned long arg); +static int vortex_start_xmit(struct sk_buff *skb, struct net_device *dev); +static int boomerang_start_xmit(struct sk_buff *skb, struct net_device *dev); +static int vortex_rx(struct net_device *dev); +static int boomerang_rx(struct net_device *dev); +static void vortex_interrupt(int irq, void *dev_id, struct pt_regs *regs); +static void boomerang_interrupt(int irq, void *dev_id, struct pt_regs *regs); +static int vortex_close(struct net_device *dev); +static void dump_tx_ring(struct net_device *dev); +static void update_stats(long ioaddr, struct net_device *dev); +static struct net_device_stats *vortex_get_stats(struct net_device *dev); +static void set_rx_mode(struct net_device *dev); +static int vortex_ioctl(struct net_device *dev, struct ifreq *rq, int cmd); +static void vortex_tx_timeout(struct net_device *dev); +static void acpi_set_WOL(struct net_device *dev); + +/* This driver uses 'options' to pass the media type, full-duplex flag, etc. */ +/* Option count limit only -- unlimited interfaces are supported. */ +#define MAX_UNITS 8 +static int options[MAX_UNITS] = { -1, -1, -1, -1, -1, -1, -1, -1,}; +static int full_duplex[MAX_UNITS] = {-1, -1, -1, -1, -1, -1, -1, -1}; +static int hw_checksums[MAX_UNITS] = {-1, -1, -1, -1, -1, -1, -1, -1}; +static int flow_ctrl[MAX_UNITS] = {-1, -1, -1, -1, -1, -1, -1, -1}; +static int enable_wol[MAX_UNITS] = {-1, -1, -1, -1, -1, -1, -1, -1}; + +/* #define dev_alloc_skb dev_alloc_skb_debug */ + +/* A list of all installed Vortex EISA devices, for removing the driver module. */ +static struct net_device *root_vortex_eisa_dev; + +/* Variables to work-around the Compaq PCI BIOS32 problem. */ +static int compaq_ioaddr, compaq_irq, compaq_device_id = 0x5900; + +static int vortex_cards_found; + +#ifdef CONFIG_PM + +static int vortex_suspend (struct pci_dev *pdev, u32 state) +{ + struct net_device *dev = pci_get_drvdata(pdev); + + if (dev && dev->priv) { + if (netif_running(dev)) { + netif_device_detach(dev); + vortex_down(dev); + } + } + return 0; +} + +static int vortex_resume (struct pci_dev *pdev) +{ + struct net_device *dev = pci_get_drvdata(pdev); + + if (dev && dev->priv) { + if (netif_running(dev)) { + vortex_up(dev); + netif_device_attach(dev); + } + } + return 0; +} + +#endif /* CONFIG_PM */ + +/* returns count found (>= 0), or negative on error */ +static int __init vortex_eisa_init (void) +{ + long ioaddr; + int rc; + int orig_cards_found = vortex_cards_found; + + /* Now check all slots of the EISA bus. */ + if (!EISA_bus) + return 0; + + for (ioaddr = 0x1000; ioaddr < 0x9000; ioaddr += 0x1000) { + int device_id; + + if (request_region(ioaddr, VORTEX_TOTAL_SIZE, DRV_NAME) == NULL) + continue; + + /* Check the standard EISA ID register for an encoded '3Com'. */ + if (inw(ioaddr + 0xC80) != 0x6d50) { + release_region (ioaddr, VORTEX_TOTAL_SIZE); + continue; + } + + /* Check for a product that we support, 3c59{2,7} any rev. */ + device_id = (inb(ioaddr + 0xC82)<<8) + inb(ioaddr + 0xC83); + if ((device_id & 0xFF00) != 0x5900) { + release_region (ioaddr, VORTEX_TOTAL_SIZE); + continue; + } + + rc = vortex_probe1(NULL, ioaddr, inw(ioaddr + 0xC88) >> 12, + EISA_TBL_OFFSET, vortex_cards_found); + if (rc == 0) + vortex_cards_found++; + else + release_region (ioaddr, VORTEX_TOTAL_SIZE); + } + + /* Special code to work-around the Compaq PCI BIOS32 problem. */ + if (compaq_ioaddr) { + vortex_probe1(NULL, compaq_ioaddr, compaq_irq, + compaq_device_id, vortex_cards_found++); + } + + return vortex_cards_found - orig_cards_found; +} + +/* returns count (>= 0), or negative on error */ +static int __devinit vortex_init_one (struct pci_dev *pdev, + const struct pci_device_id *ent) +{ + int rc; + + /* wake up and enable device */ + if (pci_enable_device (pdev)) { + rc = -EIO; + } else { + rc = vortex_probe1 (pdev, pci_resource_start (pdev, 0), pdev->irq, + ent->driver_data, vortex_cards_found); + if (rc == 0) + vortex_cards_found++; + } + return rc; +} + +/* + * Start up the PCI device which is described by *pdev. + * Return 0 on success. + * + * NOTE: pdev can be NULL, for the case of an EISA driver + */ +static int __devinit vortex_probe1(struct pci_dev *pdev, + long ioaddr, int irq, + int chip_idx, int card_idx) +{ + struct vortex_private *vp; + int option; + unsigned int eeprom[0x40], checksum = 0; /* EEPROM contents */ + int i, step; + struct net_device *dev; + static int printed_version; + int retval, print_info; + struct vortex_chip_info * const vci = &vortex_info_tbl[chip_idx]; + char *print_name; + + if (!printed_version) { + printk (version); + printed_version = 1; + } + + print_name = pdev ? pdev->slot_name : "3c59x"; + + dev = alloc_etherdev(sizeof(*vp)); + retval = -ENOMEM; + if (!dev) { + printk (KERN_ERR PFX "unable to allocate etherdev, aborting\n"); + goto out; + } + SET_MODULE_OWNER(dev); + vp = dev->priv; + + /* The lower four bits are the media type. */ + if (dev->mem_start) { + /* + * The 'options' param is passed in as the third arg to the + * LILO 'ether=' argument for non-modular use + */ + option = dev->mem_start; + } + else if (card_idx < MAX_UNITS) + option = options[card_idx]; + else + option = -1; + + if (option > 0) { + if (option & 0x8000) + vortex_debug = 7; + if (option & 0x4000) + vortex_debug = 2; + if (option & 0x0400) + vp->enable_wol = 1; + } + + print_info = (vortex_debug > 1); + if (print_info) + printk (KERN_INFO "See Documentation/networking/vortex.txt\n"); + + printk(KERN_INFO "%s: 3Com %s %s at 0x%lx. Vers " DRV_VERSION "\n", + print_name, + pdev ? "PCI" : "EISA", + vci->name, + ioaddr); + + dev->base_addr = ioaddr; + dev->irq = irq; + dev->mtu = mtu; + vp->drv_flags = vci->drv_flags; + vp->has_nway = (vci->drv_flags & HAS_NWAY) ? 1 : 0; + vp->io_size = vci->io_size; + vp->card_idx = card_idx; + + /* module list only for EISA devices */ + if (pdev == NULL) { + vp->next_module = root_vortex_eisa_dev; + root_vortex_eisa_dev = dev; + } + + /* PCI-only startup logic */ + if (pdev) { + /* EISA resources already marked, so only PCI needs to do this here */ + /* Ignore return value, because Cardbus drivers already allocate for us */ + if (request_region(ioaddr, vci->io_size, print_name) != NULL) + vp->must_free_region = 1; + + /* enable bus-mastering if necessary */ + if (vci->flags & PCI_USES_MASTER) + pci_set_master (pdev); + + if (vci->drv_flags & IS_VORTEX) { + u8 pci_latency; + u8 new_latency = 248; + + /* Check the PCI latency value. On the 3c590 series the latency timer + must be set to the maximum value to avoid data corruption that occurs + when the timer expires during a transfer. This bug exists the Vortex + chip only. */ + pci_read_config_byte(pdev, PCI_LATENCY_TIMER, &pci_latency); + if (pci_latency < new_latency) { + printk(KERN_INFO "%s: Overriding PCI latency" + " timer (CFLT) setting of %d, new value is %d.\n", + print_name, pci_latency, new_latency); + pci_write_config_byte(pdev, PCI_LATENCY_TIMER, new_latency); + } + } + } + + spin_lock_init(&vp->lock); + spin_lock_init(&vp->mdio_lock); + vp->pdev = pdev; + + /* Makes sure rings are at least 16 byte aligned. */ + vp->rx_ring = pci_alloc_consistent(pdev, sizeof(struct boom_rx_desc) * RX_RING_SIZE + + sizeof(struct boom_tx_desc) * TX_RING_SIZE, + &vp->rx_ring_dma); + retval = -ENOMEM; + if (vp->rx_ring == 0) + goto free_region; + + vp->tx_ring = (struct boom_tx_desc *)(vp->rx_ring + RX_RING_SIZE); + vp->tx_ring_dma = vp->rx_ring_dma + sizeof(struct boom_rx_desc) * RX_RING_SIZE; + + /* if we are a PCI driver, we store info in pdev->driver_data + * instead of a module list */ + if (pdev) + pci_set_drvdata(pdev, dev); + + vp->media_override = 7; + if (option >= 0) { + vp->media_override = ((option & 7) == 2) ? 0 : option & 15; + if (vp->media_override != 7) + vp->medialock = 1; + vp->full_duplex = (option & 0x200) ? 1 : 0; + vp->bus_master = (option & 16) ? 1 : 0; + } + + if (card_idx < MAX_UNITS) { + if (full_duplex[card_idx] > 0) + vp->full_duplex = 1; + if (flow_ctrl[card_idx] > 0) + vp->flow_ctrl = 1; + if (enable_wol[card_idx] > 0) + vp->enable_wol = 1; + } + + vp->force_fd = vp->full_duplex; + vp->options = option; + /* Read the station address from the EEPROM. */ + EL3WINDOW(0); + { + int base; + + if (vci->drv_flags & EEPROM_8BIT) + base = 0x230; + else if (vci->drv_flags & EEPROM_OFFSET) + base = EEPROM_Read + 0x30; + else + base = EEPROM_Read; + + for (i = 0; i < 0x40; i++) { + int timer; + outw(base + i, ioaddr + Wn0EepromCmd); + /* Pause for at least 162 us. for the read to take place. */ + for (timer = 10; timer >= 0; timer--) { + udelay(162); + if ((inw(ioaddr + Wn0EepromCmd) & 0x8000) == 0) + break; + } + eeprom[i] = inw(ioaddr + Wn0EepromData); + } + } + for (i = 0; i < 0x18; i++) + checksum ^= eeprom[i]; + checksum = (checksum ^ (checksum >> 8)) & 0xff; + if (checksum != 0x00) { /* Grrr, needless incompatible change 3Com. */ + while (i < 0x21) + checksum ^= eeprom[i++]; + checksum = (checksum ^ (checksum >> 8)) & 0xff; + } + if ((checksum != 0x00) && !(vci->drv_flags & IS_TORNADO)) + printk(" ***INVALID CHECKSUM %4.4x*** ", checksum); + for (i = 0; i < 3; i++) + ((u16 *)dev->dev_addr)[i] = htons(eeprom[i + 10]); + if (print_info) { + for (i = 0; i < 6; i++) + printk("%c%2.2x", i ? ':' : ' ', dev->dev_addr[i]); + } + EL3WINDOW(2); + for (i = 0; i < 6; i++) + outb(dev->dev_addr[i], ioaddr + i); + +#ifdef __sparc__ + if (print_info) + printk(", IRQ %s\n", __irq_itoa(dev->irq)); +#else + if (print_info) + printk(", IRQ %d\n", dev->irq); + /* Tell them about an invalid IRQ. */ + if (dev->irq <= 0 || dev->irq >= NR_IRQS) + printk(KERN_WARNING " *** Warning: IRQ %d is unlikely to work! ***\n", + dev->irq); +#endif + + EL3WINDOW(4); + step = (inb(ioaddr + Wn4_NetDiag) & 0x1e) >> 1; + if (print_info) { + printk(KERN_INFO " product code %02x%02x rev %02x.%d date %02d-" + "%02d-%02d\n", eeprom[6]&0xff, eeprom[6]>>8, eeprom[0x14], + step, (eeprom[4]>>5) & 15, eeprom[4] & 31, eeprom[4]>>9); + } + + + if (pdev && vci->drv_flags & HAS_CB_FNS) { + unsigned long fn_st_addr; /* Cardbus function status space */ + unsigned short n; + + fn_st_addr = pci_resource_start (pdev, 2); + if (fn_st_addr) { + vp->cb_fn_base = ioremap(fn_st_addr, 128); + retval = -ENOMEM; + if (!vp->cb_fn_base) + goto free_ring; + } + if (print_info) { + printk(KERN_INFO "%s: CardBus functions mapped %8.8lx->%p\n", + print_name, fn_st_addr, vp->cb_fn_base); + } + EL3WINDOW(2); + + n = inw(ioaddr + Wn2_ResetOptions) & ~0x4010; + if (vp->drv_flags & INVERT_LED_PWR) + n |= 0x10; + if (vp->drv_flags & INVERT_MII_PWR) + n |= 0x4000; + outw(n, ioaddr + Wn2_ResetOptions); + } + + /* Extract our information from the EEPROM data. */ + vp->info1 = eeprom[13]; + vp->info2 = eeprom[15]; + vp->capabilities = eeprom[16]; + + if (vp->info1 & 0x8000) { + vp->full_duplex = 1; + if (print_info) + printk(KERN_INFO "Full duplex capable\n"); + } + + { + static const char * ram_split[] = {"5:3", "3:1", "1:1", "3:5"}; + unsigned int config; + EL3WINDOW(3); + vp->available_media = inw(ioaddr + Wn3_Options); + if ((vp->available_media & 0xff) == 0) /* Broken 3c916 */ + vp->available_media = 0x40; + config = inl(ioaddr + Wn3_Config); + if (print_info) { + printk(KERN_DEBUG " Internal config register is %4.4x, " + "transceivers %#x.\n", config, inw(ioaddr + Wn3_Options)); + printk(KERN_INFO " %dK %s-wide RAM %s Rx:Tx split, %s%s interface.\n", + 8 << RAM_SIZE(config), + RAM_WIDTH(config) ? "word" : "byte", + ram_split[RAM_SPLIT(config)], + AUTOSELECT(config) ? "autoselect/" : "", + XCVR(config) > XCVR_ExtMII ? "" : + media_tbl[XCVR(config)].name); + } + vp->default_media = XCVR(config); + if (vp->default_media == XCVR_NWAY) + vp->has_nway = 1; + vp->autoselect = AUTOSELECT(config); + } + + if (vp->media_override != 7) { + printk(KERN_INFO "%s: Media override to transceiver type %d (%s).\n", + print_name, vp->media_override, + media_tbl[vp->media_override].name); + dev->if_port = vp->media_override; + } else + dev->if_port = vp->default_media; + + if (dev->if_port == XCVR_MII || dev->if_port == XCVR_NWAY) { + int phy, phy_idx = 0; + EL3WINDOW(4); + mii_preamble_required++; + mii_preamble_required++; + mdio_read(dev, 24, 1); + for (phy = 0; phy < 32 && phy_idx < 1; phy++) { + int mii_status, phyx; + + /* + * For the 3c905CX we look at index 24 first, because it bogusly + * reports an external PHY at all indices + */ + if (phy == 0) + phyx = 24; + else if (phy <= 24) + phyx = phy - 1; + else + phyx = phy; + mii_status = mdio_read(dev, phyx, 1); + if (mii_status && mii_status != 0xffff) { + vp->phys[phy_idx++] = phyx; + if (print_info) { + printk(KERN_INFO " MII transceiver found at address %d," + " status %4x.\n", phyx, mii_status); + } + if ((mii_status & 0x0040) == 0) + mii_preamble_required++; + } + } + mii_preamble_required--; + if (phy_idx == 0) { + printk(KERN_WARNING" ***WARNING*** No MII transceivers found!\n"); + vp->phys[0] = 24; + } else { + vp->advertising = mdio_read(dev, vp->phys[0], 4); + if (vp->full_duplex) { + /* Only advertise the FD media types. */ + vp->advertising &= ~0x02A0; + mdio_write(dev, vp->phys[0], 4, vp->advertising); + } + } + } + + if (vp->capabilities & CapBusMaster) { + vp->full_bus_master_tx = 1; + if (print_info) { + printk(KERN_INFO " Enabling bus-master transmits and %s receives.\n", + (vp->info2 & 1) ? "early" : "whole-frame" ); + } + vp->full_bus_master_rx = (vp->info2 & 1) ? 1 : 2; + vp->bus_master = 0; /* AKPM: vortex only */ + } + + /* The 3c59x-specific entries in the device structure. */ + dev->open = vortex_open; + if (vp->full_bus_master_tx) { + dev->hard_start_xmit = boomerang_start_xmit; + /* Actually, it still should work with iommu. */ + dev->features |= NETIF_F_SG; + if (((hw_checksums[card_idx] == -1) && (vp->drv_flags & HAS_HWCKSM)) || + (hw_checksums[card_idx] == 1)) { + dev->features |= NETIF_F_IP_CSUM; + } + } else { + dev->hard_start_xmit = vortex_start_xmit; + } + + if (print_info) { + printk(KERN_INFO "%s: scatter/gather %sabled. h/w checksums %sabled\n", + print_name, + (dev->features & NETIF_F_SG) ? "en":"dis", + (dev->features & NETIF_F_IP_CSUM) ? "en":"dis"); + } + + dev->stop = vortex_close; + dev->get_stats = vortex_get_stats; + dev->do_ioctl = vortex_ioctl; + dev->set_multicast_list = set_rx_mode; + dev->tx_timeout = vortex_tx_timeout; + dev->watchdog_timeo = (watchdog * HZ) / 1000; + if (pdev && vp->enable_wol) { + vp->pm_state_valid = 1; + pci_save_state(vp->pdev, vp->power_state); + acpi_set_WOL(dev); + } + retval = register_netdev(dev); + if (retval == 0) + return 0; + +free_ring: + pci_free_consistent(pdev, + sizeof(struct boom_rx_desc) * RX_RING_SIZE + + sizeof(struct boom_tx_desc) * TX_RING_SIZE, + vp->rx_ring, + vp->rx_ring_dma); +free_region: + if (vp->must_free_region) + release_region(ioaddr, vci->io_size); + kfree (dev); + printk(KERN_ERR PFX "vortex_probe1 fails. Returns %d\n", retval); +out: + return retval; +} + +static void +issue_and_wait(struct net_device *dev, int cmd) +{ + int i; + + outw(cmd, dev->base_addr + EL3_CMD); + for (i = 0; i < 2000; i++) { + if (!(inw(dev->base_addr + EL3_STATUS) & CmdInProgress)) + return; + } + + /* OK, that didn't work. Do it the slow way. One second */ + for (i = 0; i < 100000; i++) { + if (!(inw(dev->base_addr + EL3_STATUS) & CmdInProgress)) { + if (vortex_debug > 1) + printk(KERN_INFO "%s: command 0x%04x took %d usecs\n", + dev->name, cmd, i * 10); + return; + } + udelay(10); + } + printk(KERN_ERR "%s: command 0x%04x did not complete! Status=0x%x\n", + dev->name, cmd, inw(dev->base_addr + EL3_STATUS)); +} + +static void +vortex_up(struct net_device *dev) +{ + long ioaddr = dev->base_addr; + struct vortex_private *vp = (struct vortex_private *)dev->priv; + unsigned int config; + int i; + + if (vp->pdev && vp->enable_wol) { + pci_set_power_state(vp->pdev, 0); /* Go active */ + pci_restore_state(vp->pdev, vp->power_state); + } + + /* Before initializing select the active media port. */ + EL3WINDOW(3); + config = inl(ioaddr + Wn3_Config); + + if (vp->media_override != 7) { + printk(KERN_INFO "%s: Media override to transceiver %d (%s).\n", + dev->name, vp->media_override, + media_tbl[vp->media_override].name); + dev->if_port = vp->media_override; + } else if (vp->autoselect) { + if (vp->has_nway) { + if (vortex_debug > 1) + printk(KERN_INFO "%s: using NWAY device table, not %d\n", + dev->name, dev->if_port); + dev->if_port = XCVR_NWAY; + } else { + /* Find first available media type, starting with 100baseTx. */ + dev->if_port = XCVR_100baseTx; + while (! (vp->available_media & media_tbl[dev->if_port].mask)) + dev->if_port = media_tbl[dev->if_port].next; + if (vortex_debug > 1) + printk(KERN_INFO "%s: first available media type: %s\n", + dev->name, media_tbl[dev->if_port].name); + } + } else { + dev->if_port = vp->default_media; + if (vortex_debug > 1) + printk(KERN_INFO "%s: using default media %s\n", + dev->name, media_tbl[dev->if_port].name); + } + + init_timer(&vp->timer); + vp->timer.expires = RUN_AT(media_tbl[dev->if_port].wait); + vp->timer.data = (unsigned long)dev; + vp->timer.function = vortex_timer; /* timer handler */ + add_timer(&vp->timer); + + init_timer(&vp->rx_oom_timer); + vp->rx_oom_timer.data = (unsigned long)dev; + vp->rx_oom_timer.function = rx_oom_timer; + + if (vortex_debug > 1) + printk(KERN_DEBUG "%s: Initial media type %s.\n", + dev->name, media_tbl[dev->if_port].name); + + vp->full_duplex = vp->force_fd; + config = BFINS(config, dev->if_port, 20, 4); + if (vortex_debug > 6) + printk(KERN_DEBUG "vortex_up(): writing 0x%x to InternalConfig\n", config); + outl(config, ioaddr + Wn3_Config); + + if (dev->if_port == XCVR_MII || dev->if_port == XCVR_NWAY) { + int mii_reg1, mii_reg5; + EL3WINDOW(4); + /* Read BMSR (reg1) only to clear old status. */ + mii_reg1 = mdio_read(dev, vp->phys[0], 1); + mii_reg5 = mdio_read(dev, vp->phys[0], 5); + if (mii_reg5 == 0xffff || mii_reg5 == 0x0000) + ; /* No MII device or no link partner report */ + else if ((mii_reg5 & 0x0100) != 0 /* 100baseTx-FD */ + || (mii_reg5 & 0x00C0) == 0x0040) /* 10T-FD, but not 100-HD */ + vp->full_duplex = 1; + vp->partner_flow_ctrl = ((mii_reg5 & 0x0400) != 0); + if (vortex_debug > 1) + printk(KERN_INFO "%s: MII #%d status %4.4x, link partner capability %4.4x," + " info1 %04x, setting %s-duplex.\n", + dev->name, vp->phys[0], + mii_reg1, mii_reg5, + vp->info1, ((vp->info1 & 0x8000) || vp->full_duplex) ? "full" : "half"); + EL3WINDOW(3); + } + + /* Set the full-duplex bit. */ + outw( ((vp->info1 & 0x8000) || vp->full_duplex ? 0x20 : 0) | + (dev->mtu > 1500 ? 0x40 : 0) | + ((vp->full_duplex && vp->flow_ctrl && vp->partner_flow_ctrl) ? 0x100 : 0), + ioaddr + Wn3_MAC_Ctrl); + + if (vortex_debug > 1) { + printk(KERN_DEBUG "%s: vortex_up() InternalConfig %8.8x.\n", + dev->name, config); + } + + issue_and_wait(dev, TxReset); + /* + * Don't reset the PHY - that upsets autonegotiation during DHCP operations. + */ + issue_and_wait(dev, RxReset|0x04); + + outw(SetStatusEnb | 0x00, ioaddr + EL3_CMD); + + if (vortex_debug > 1) { + EL3WINDOW(4); + printk(KERN_DEBUG "%s: vortex_up() irq %d media status %4.4x.\n", + dev->name, dev->irq, inw(ioaddr + Wn4_Media)); + } + + /* Set the station address and mask in window 2 each time opened. */ + EL3WINDOW(2); + for (i = 0; i < 6; i++) + outb(dev->dev_addr[i], ioaddr + i); + for (; i < 12; i+=2) + outw(0, ioaddr + i); + + if (vp->cb_fn_base) { + unsigned short n = inw(ioaddr + Wn2_ResetOptions) & ~0x4010; + if (vp->drv_flags & INVERT_LED_PWR) + n |= 0x10; + if (vp->drv_flags & INVERT_MII_PWR) + n |= 0x4000; + outw(n, ioaddr + Wn2_ResetOptions); + } + + if (dev->if_port == XCVR_10base2) + /* Start the thinnet transceiver. We should really wait 50ms...*/ + outw(StartCoax, ioaddr + EL3_CMD); + if (dev->if_port != XCVR_NWAY) { + EL3WINDOW(4); + outw((inw(ioaddr + Wn4_Media) & ~(Media_10TP|Media_SQE)) | + media_tbl[dev->if_port].media_bits, ioaddr + Wn4_Media); + } + + /* Switch to the stats window, and clear all stats by reading. */ + outw(StatsDisable, ioaddr + EL3_CMD); + EL3WINDOW(6); + for (i = 0; i < 10; i++) + inb(ioaddr + i); + inw(ioaddr + 10); + inw(ioaddr + 12); + /* New: On the Vortex we must also clear the BadSSD counter. */ + EL3WINDOW(4); + inb(ioaddr + 12); + /* ..and on the Boomerang we enable the extra statistics bits. */ + outw(0x0040, ioaddr + Wn4_NetDiag); + + /* Switch to register set 7 for normal use. */ + EL3WINDOW(7); + + if (vp->full_bus_master_rx) { /* Boomerang bus master. */ + vp->cur_rx = vp->dirty_rx = 0; + /* Initialize the RxEarly register as recommended. */ + outw(SetRxThreshold + (1536>>2), ioaddr + EL3_CMD); + outl(0x0020, ioaddr + PktStatus); + outl(vp->rx_ring_dma, ioaddr + UpListPtr); + } + if (vp->full_bus_master_tx) { /* Boomerang bus master Tx. */ + vp->cur_tx = vp->dirty_tx = 0; + if (vp->drv_flags & IS_BOOMERANG) + outb(PKT_BUF_SZ>>8, ioaddr + TxFreeThreshold); /* Room for a packet. */ + /* Clear the Rx, Tx rings. */ + for (i = 0; i < RX_RING_SIZE; i++) /* AKPM: this is done in vortex_open, too */ + vp->rx_ring[i].status = 0; + for (i = 0; i < TX_RING_SIZE; i++) + vp->tx_skbuff[i] = 0; + outl(0, ioaddr + DownListPtr); + } + /* Set receiver mode: presumably accept b-case and phys addr only. */ + set_rx_mode(dev); + outw(StatsEnable, ioaddr + EL3_CMD); /* Turn on statistics. */ + +// issue_and_wait(dev, SetTxStart|0x07ff); + outw(RxEnable, ioaddr + EL3_CMD); /* Enable the receiver. */ + outw(TxEnable, ioaddr + EL3_CMD); /* Enable transmitter. */ + /* Allow status bits to be seen. */ + vp->status_enable = SetStatusEnb | HostError|IntReq|StatsFull|TxComplete| + (vp->full_bus_master_tx ? DownComplete : TxAvailable) | + (vp->full_bus_master_rx ? UpComplete : RxComplete) | + (vp->bus_master ? DMADone : 0); + vp->intr_enable = SetIntrEnb | IntLatch | TxAvailable | + (vp->full_bus_master_rx ? 0 : RxComplete) | + StatsFull | HostError | TxComplete | IntReq + | (vp->bus_master ? DMADone : 0) | UpComplete | DownComplete; + outw(vp->status_enable, ioaddr + EL3_CMD); + /* Ack all pending events, and set active indicator mask. */ + outw(AckIntr | IntLatch | TxAvailable | RxEarly | IntReq, + ioaddr + EL3_CMD); + outw(vp->intr_enable, ioaddr + EL3_CMD); + if (vp->cb_fn_base) /* The PCMCIA people are idiots. */ + writel(0x8000, vp->cb_fn_base + 4); + netif_start_queue (dev); +} + +static int +vortex_open(struct net_device *dev) +{ + struct vortex_private *vp = (struct vortex_private *)dev->priv; + int i; + int retval; + + /* Use the now-standard shared IRQ implementation. */ + if ((retval = request_irq(dev->irq, vp->full_bus_master_rx ? + &boomerang_interrupt : &vortex_interrupt, SA_SHIRQ, dev->name, dev))) { + printk(KERN_ERR "%s: Could not reserve IRQ %d\n", dev->name, dev->irq); + goto out; + } + + if (vp->full_bus_master_rx) { /* Boomerang bus master. */ + if (vortex_debug > 2) + printk(KERN_DEBUG "%s: Filling in the Rx ring.\n", dev->name); + for (i = 0; i < RX_RING_SIZE; i++) { + struct sk_buff *skb; + vp->rx_ring[i].next = cpu_to_le32(vp->rx_ring_dma + sizeof(struct boom_rx_desc) * (i+1)); + vp->rx_ring[i].status = 0; /* Clear complete bit. */ + vp->rx_ring[i].length = cpu_to_le32(PKT_BUF_SZ | LAST_FRAG); + skb = dev_alloc_skb(PKT_BUF_SZ); + vp->rx_skbuff[i] = skb; + if (skb == NULL) + break; /* Bad news! */ + skb->dev = dev; /* Mark as being used by this device. */ + skb_reserve(skb, 2); /* Align IP on 16 byte boundaries */ + vp->rx_ring[i].addr = cpu_to_le32(pci_map_single(vp->pdev, skb->tail, PKT_BUF_SZ, PCI_DMA_FROMDEVICE)); + } + if (i != RX_RING_SIZE) { + int j; + printk(KERN_EMERG "%s: no memory for rx ring\n", dev->name); + for (j = 0; j < i; j++) { + if (vp->rx_skbuff[j]) { + dev_kfree_skb(vp->rx_skbuff[j]); + vp->rx_skbuff[j] = 0; + } + } + retval = -ENOMEM; + goto out_free_irq; + } + /* Wrap the ring. */ + vp->rx_ring[i-1].next = cpu_to_le32(vp->rx_ring_dma); + } + + vortex_up(dev); + return 0; + +out_free_irq: + free_irq(dev->irq, dev); +out: + if (vortex_debug > 1) + printk(KERN_ERR "%s: vortex_open() fails: returning %d\n", dev->name, retval); + return retval; +} + +static void +vortex_timer(unsigned long data) +{ + struct net_device *dev = (struct net_device *)data; + struct vortex_private *vp = (struct vortex_private *)dev->priv; + long ioaddr = dev->base_addr; + int next_tick = 60*HZ; + int ok = 0; + int media_status, mii_status, old_window; + + if (vortex_debug > 2) { + printk(KERN_DEBUG "%s: Media selection timer tick happened, %s.\n", + dev->name, media_tbl[dev->if_port].name); + printk(KERN_DEBUG "dev->watchdog_timeo=%d\n", dev->watchdog_timeo); + } + + if (vp->medialock) + goto leave_media_alone; + disable_irq(dev->irq); + old_window = inw(ioaddr + EL3_CMD) >> 13; + EL3WINDOW(4); + media_status = inw(ioaddr + Wn4_Media); + switch (dev->if_port) { + case XCVR_10baseT: case XCVR_100baseTx: case XCVR_100baseFx: + if (media_status & Media_LnkBeat) { + ok = 1; + if (vortex_debug > 1) + printk(KERN_DEBUG "%s: Media %s has link beat, %x.\n", + dev->name, media_tbl[dev->if_port].name, media_status); + } else if (vortex_debug > 1) + printk(KERN_DEBUG "%s: Media %s has no link beat, %x.\n", + dev->name, media_tbl[dev->if_port].name, media_status); + break; + case XCVR_MII: case XCVR_NWAY: + { + mii_status = mdio_read(dev, vp->phys[0], 1); + ok = 1; + if (vortex_debug > 2) + printk(KERN_DEBUG "%s: MII transceiver has status %4.4x.\n", + dev->name, mii_status); + if (mii_status & 0x0004) { + int mii_reg5 = mdio_read(dev, vp->phys[0], 5); + if (! vp->force_fd && mii_reg5 != 0xffff) { + int duplex = (mii_reg5&0x0100) || + (mii_reg5 & 0x01C0) == 0x0040; + if (vp->full_duplex != duplex) { + vp->full_duplex = duplex; + printk(KERN_INFO "%s: Setting %s-duplex based on MII " + "#%d link partner capability of %4.4x.\n", + dev->name, vp->full_duplex ? "full" : "half", + vp->phys[0], mii_reg5); + /* Set the full-duplex bit. */ + EL3WINDOW(3); + outw( (vp->full_duplex ? 0x20 : 0) | + (dev->mtu > 1500 ? 0x40 : 0) | + ((vp->full_duplex && vp->flow_ctrl && vp->partner_flow_ctrl) ? 0x100 : 0), + ioaddr + Wn3_MAC_Ctrl); + if (vortex_debug > 1) + printk(KERN_DEBUG "Setting duplex in Wn3_MAC_Ctrl\n"); + /* AKPM: bug: should reset Tx and Rx after setting Duplex. Page 180 */ + } + } + } + } + break; + default: /* Other media types handled by Tx timeouts. */ + if (vortex_debug > 1) + printk(KERN_DEBUG "%s: Media %s has no indication, %x.\n", + dev->name, media_tbl[dev->if_port].name, media_status); + ok = 1; + } + if ( ! ok) { + unsigned int config; + + do { + dev->if_port = media_tbl[dev->if_port].next; + } while ( ! (vp->available_media & media_tbl[dev->if_port].mask)); + if (dev->if_port == XCVR_Default) { /* Go back to default. */ + dev->if_port = vp->default_media; + if (vortex_debug > 1) + printk(KERN_DEBUG "%s: Media selection failing, using default " + "%s port.\n", + dev->name, media_tbl[dev->if_port].name); + } else { + if (vortex_debug > 1) + printk(KERN_DEBUG "%s: Media selection failed, now trying " + "%s port.\n", + dev->name, media_tbl[dev->if_port].name); + next_tick = media_tbl[dev->if_port].wait; + } + outw((media_status & ~(Media_10TP|Media_SQE)) | + media_tbl[dev->if_port].media_bits, ioaddr + Wn4_Media); + + EL3WINDOW(3); + config = inl(ioaddr + Wn3_Config); + config = BFINS(config, dev->if_port, 20, 4); + outl(config, ioaddr + Wn3_Config); + + outw(dev->if_port == XCVR_10base2 ? StartCoax : StopCoax, + ioaddr + EL3_CMD); + if (vortex_debug > 1) + printk(KERN_DEBUG "wrote 0x%08x to Wn3_Config\n", config); + /* AKPM: FIXME: Should reset Rx & Tx here. P60 of 3c90xc.pdf */ + } + EL3WINDOW(old_window); + enable_irq(dev->irq); + +leave_media_alone: + if (vortex_debug > 2) + printk(KERN_DEBUG "%s: Media selection timer finished, %s.\n", + dev->name, media_tbl[dev->if_port].name); + + mod_timer(&vp->timer, RUN_AT(next_tick)); + if (vp->deferred) + outw(FakeIntr, ioaddr + EL3_CMD); + return; +} + +static void vortex_tx_timeout(struct net_device *dev) +{ + struct vortex_private *vp = (struct vortex_private *)dev->priv; + long ioaddr = dev->base_addr; + + printk(KERN_ERR "%s: transmit timed out, tx_status %2.2x status %4.4x.\n", + dev->name, inb(ioaddr + TxStatus), + inw(ioaddr + EL3_STATUS)); + EL3WINDOW(4); + printk(KERN_ERR " diagnostics: net %04x media %04x dma %8.8x.\n", + inw(ioaddr + Wn4_NetDiag), inw(ioaddr + Wn4_Media), + inl(ioaddr + PktStatus)); + /* Slight code bloat to be user friendly. */ + if ((inb(ioaddr + TxStatus) & 0x88) == 0x88) + printk(KERN_ERR "%s: Transmitter encountered 16 collisions --" + " network cable problem?\n", dev->name); + if (inw(ioaddr + EL3_STATUS) & IntLatch) { + printk(KERN_ERR "%s: Interrupt posted but not delivered --" + " IRQ blocked by another device?\n", dev->name); + /* Bad idea here.. but we might as well handle a few events. */ + { + /* + * Block interrupts because vortex_interrupt does a bare spin_lock() + */ + unsigned long flags; + local_irq_save(flags); + if (vp->full_bus_master_tx) + boomerang_interrupt(dev->irq, dev, 0); + else + vortex_interrupt(dev->irq, dev, 0); + local_irq_restore(flags); + } + } + + if (vortex_debug > 0) + dump_tx_ring(dev); + + issue_and_wait(dev, TxReset); + + vp->stats.tx_errors++; + if (vp->full_bus_master_tx) { + printk(KERN_DEBUG "%s: Resetting the Tx ring pointer.\n", dev->name); + if (vp->cur_tx - vp->dirty_tx > 0 && inl(ioaddr + DownListPtr) == 0) + outl(vp->tx_ring_dma + (vp->dirty_tx % TX_RING_SIZE) * sizeof(struct boom_tx_desc), + ioaddr + DownListPtr); + if (vp->cur_tx - vp->dirty_tx < TX_RING_SIZE) + netif_wake_queue (dev); + if (vp->drv_flags & IS_BOOMERANG) + outb(PKT_BUF_SZ>>8, ioaddr + TxFreeThreshold); + outw(DownUnstall, ioaddr + EL3_CMD); + } else { + vp->stats.tx_dropped++; + netif_wake_queue(dev); + } + + /* Issue Tx Enable */ + outw(TxEnable, ioaddr + EL3_CMD); + dev->trans_start = jiffies; + + /* Switch to register set 7 for normal use. */ + EL3WINDOW(7); +} + +/* + * Handle uncommon interrupt sources. This is a separate routine to minimize + * the cache impact. + */ +static void +vortex_error(struct net_device *dev, int status) +{ + struct vortex_private *vp = (struct vortex_private *)dev->priv; + long ioaddr = dev->base_addr; + int do_tx_reset = 0, reset_mask = 0; + unsigned char tx_status = 0; + + if (vortex_debug > 2) { + printk(KERN_ERR "%s: vortex_error(), status=0x%x\n", dev->name, status); + } + + if (status & TxComplete) { /* Really "TxError" for us. */ + tx_status = inb(ioaddr + TxStatus); + /* Presumably a tx-timeout. We must merely re-enable. */ + if (vortex_debug > 2 + || (tx_status != 0x88 && vortex_debug > 0)) { + printk(KERN_ERR "%s: Transmit error, Tx status register %2.2x.\n", + dev->name, tx_status); + if (tx_status == 0x82) { + printk(KERN_ERR "Probably a duplex mismatch. See " + "Documentation/networking/vortex.txt\n"); + } + dump_tx_ring(dev); + } + if (tx_status & 0x14) vp->stats.tx_fifo_errors++; + if (tx_status & 0x38) vp->stats.tx_aborted_errors++; + outb(0, ioaddr + TxStatus); + if (tx_status & 0x30) { /* txJabber or txUnderrun */ + do_tx_reset = 1; + } else if ((tx_status & 0x08) && (vp->drv_flags & MAX_COLLISION_RESET)) { /* maxCollisions */ + do_tx_reset = 1; + reset_mask = 0x0108; /* Reset interface logic, but not download logic */ + } else { /* Merely re-enable the transmitter. */ + outw(TxEnable, ioaddr + EL3_CMD); + } + } + + if (status & RxEarly) { /* Rx early is unused. */ + vortex_rx(dev); + outw(AckIntr | RxEarly, ioaddr + EL3_CMD); + } + if (status & StatsFull) { /* Empty statistics. */ + static int DoneDidThat; + if (vortex_debug > 4) + printk(KERN_DEBUG "%s: Updating stats.\n", dev->name); + update_stats(ioaddr, dev); + /* HACK: Disable statistics as an interrupt source. */ + /* This occurs when we have the wrong media type! */ + if (DoneDidThat == 0 && + inw(ioaddr + EL3_STATUS) & StatsFull) { + printk(KERN_WARNING "%s: Updating statistics failed, disabling " + "stats as an interrupt source.\n", dev->name); + EL3WINDOW(5); + outw(SetIntrEnb | (inw(ioaddr + 10) & ~StatsFull), ioaddr + EL3_CMD); + vp->intr_enable &= ~StatsFull; + EL3WINDOW(7); + DoneDidThat++; + } + } + if (status & IntReq) { /* Restore all interrupt sources. */ + outw(vp->status_enable, ioaddr + EL3_CMD); + outw(vp->intr_enable, ioaddr + EL3_CMD); + } + if (status & HostError) { + u16 fifo_diag; + EL3WINDOW(4); + fifo_diag = inw(ioaddr + Wn4_FIFODiag); + printk(KERN_ERR "%s: Host error, FIFO diagnostic register %4.4x.\n", + dev->name, fifo_diag); + /* Adapter failure requires Tx/Rx reset and reinit. */ + if (vp->full_bus_master_tx) { + int bus_status = inl(ioaddr + PktStatus); + /* 0x80000000 PCI master abort. */ + /* 0x40000000 PCI target abort. */ + if (vortex_debug) + printk(KERN_ERR "%s: PCI bus error, bus status %8.8x\n", dev->name, bus_status); + + /* In this case, blow the card away */ + vortex_down(dev); + issue_and_wait(dev, TotalReset | 0xff); + vortex_up(dev); /* AKPM: bug. vortex_up() assumes that the rx ring is full. It may not be. */ + } else if (fifo_diag & 0x0400) + do_tx_reset = 1; + if (fifo_diag & 0x3000) { + /* Reset Rx fifo and upload logic */ + issue_and_wait(dev, RxReset|0x07); + /* Set the Rx filter to the current state. */ + set_rx_mode(dev); + outw(RxEnable, ioaddr + EL3_CMD); /* Re-enable the receiver. */ + outw(AckIntr | HostError, ioaddr + EL3_CMD); + } + } + + if (do_tx_reset) { + issue_and_wait(dev, TxReset|reset_mask); + outw(TxEnable, ioaddr + EL3_CMD); + if (!vp->full_bus_master_tx) + netif_wake_queue(dev); + } +} + +static int +vortex_start_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct vortex_private *vp = (struct vortex_private *)dev->priv; + long ioaddr = dev->base_addr; + + /* Put out the doubleword header... */ + outl(skb->len, ioaddr + TX_FIFO); + if (vp->bus_master) { + /* Set the bus-master controller to transfer the packet. */ + int len = (skb->len + 3) & ~3; + outl( vp->tx_skb_dma = pci_map_single(vp->pdev, skb->data, len, PCI_DMA_TODEVICE), + ioaddr + Wn7_MasterAddr); + outw(len, ioaddr + Wn7_MasterLen); + vp->tx_skb = skb; + outw(StartDMADown, ioaddr + EL3_CMD); + /* netif_wake_queue() will be called at the DMADone interrupt. */ + } else { + /* ... and the packet rounded to a doubleword. */ + outsl(ioaddr + TX_FIFO, skb->data, (skb->len + 3) >> 2); + dev_kfree_skb (skb); + if (inw(ioaddr + TxFree) > 1536) { + netif_start_queue (dev); /* AKPM: redundant? */ + } else { + /* Interrupt us when the FIFO has room for max-sized packet. */ + netif_stop_queue(dev); + outw(SetTxThreshold + (1536>>2), ioaddr + EL3_CMD); + } + } + + dev->trans_start = jiffies; + + /* Clear the Tx status stack. */ + { + int tx_status; + int i = 32; + + while (--i > 0 && (tx_status = inb(ioaddr + TxStatus)) > 0) { + if (tx_status & 0x3C) { /* A Tx-disabling error occurred. */ + if (vortex_debug > 2) + printk(KERN_DEBUG "%s: Tx error, status %2.2x.\n", + dev->name, tx_status); + if (tx_status & 0x04) vp->stats.tx_fifo_errors++; + if (tx_status & 0x38) vp->stats.tx_aborted_errors++; + if (tx_status & 0x30) { + issue_and_wait(dev, TxReset); + } + outw(TxEnable, ioaddr + EL3_CMD); + } + outb(0x00, ioaddr + TxStatus); /* Pop the status stack. */ + } + } + return 0; +} + +static int +boomerang_start_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct vortex_private *vp = (struct vortex_private *)dev->priv; + long ioaddr = dev->base_addr; + /* Calculate the next Tx descriptor entry. */ + int entry = vp->cur_tx % TX_RING_SIZE; + struct boom_tx_desc *prev_entry = &vp->tx_ring[(vp->cur_tx-1) % TX_RING_SIZE]; + unsigned long flags; + + if (vortex_debug > 6) { + printk(KERN_DEBUG "boomerang_start_xmit()\n"); + if (vortex_debug > 3) + printk(KERN_DEBUG "%s: Trying to send a packet, Tx index %d.\n", + dev->name, vp->cur_tx); + } + + if (vp->cur_tx - vp->dirty_tx >= TX_RING_SIZE) { + if (vortex_debug > 0) + printk(KERN_WARNING "%s: BUG! Tx Ring full, refusing to send buffer.\n", + dev->name); + netif_stop_queue(dev); + return 1; + } + + vp->tx_skbuff[entry] = skb; + + vp->tx_ring[entry].next = 0; +#if DO_ZEROCOPY + if (skb->ip_summed != CHECKSUM_HW) + vp->tx_ring[entry].status = cpu_to_le32(skb->len | TxIntrUploaded); + else + vp->tx_ring[entry].status = cpu_to_le32(skb->len | TxIntrUploaded | AddTCPChksum); + + if (!skb_shinfo(skb)->nr_frags) { + vp->tx_ring[entry].frag[0].addr = cpu_to_le32(pci_map_single(vp->pdev, skb->data, + skb->len, PCI_DMA_TODEVICE)); + vp->tx_ring[entry].frag[0].length = cpu_to_le32(skb->len | LAST_FRAG); + } else { + int i; + + vp->tx_ring[entry].frag[0].addr = cpu_to_le32(pci_map_single(vp->pdev, skb->data, + skb->len-skb->data_len, PCI_DMA_TODEVICE)); + vp->tx_ring[entry].frag[0].length = cpu_to_le32(skb->len-skb->data_len); + + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { + skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; + + vp->tx_ring[entry].frag[i+1].addr = + cpu_to_le32(pci_map_single(vp->pdev, + (void*)page_address(frag->page) + frag->page_offset, + frag->size, PCI_DMA_TODEVICE)); + + if (i == skb_shinfo(skb)->nr_frags-1) + vp->tx_ring[entry].frag[i+1].length = cpu_to_le32(frag->size|LAST_FRAG); + else + vp->tx_ring[entry].frag[i+1].length = cpu_to_le32(frag->size); + } + } +#else + vp->tx_ring[entry].addr = cpu_to_le32(pci_map_single(vp->pdev, skb->data, skb->len, PCI_DMA_TODEVICE)); + vp->tx_ring[entry].length = cpu_to_le32(skb->len | LAST_FRAG); + vp->tx_ring[entry].status = cpu_to_le32(skb->len | TxIntrUploaded); +#endif + + spin_lock_irqsave(&vp->lock, flags); + /* Wait for the stall to complete. */ + issue_and_wait(dev, DownStall); + prev_entry->next = cpu_to_le32(vp->tx_ring_dma + entry * sizeof(struct boom_tx_desc)); + if (inl(ioaddr + DownListPtr) == 0) { + outl(vp->tx_ring_dma + entry * sizeof(struct boom_tx_desc), ioaddr + DownListPtr); + vp->queued_packet++; + } + + vp->cur_tx++; + if (vp->cur_tx - vp->dirty_tx > TX_RING_SIZE - 1) { + netif_stop_queue (dev); + } else { /* Clear previous interrupt enable. */ +#if defined(tx_interrupt_mitigation) + /* Dubious. If in boomeang_interrupt "faster" cyclone ifdef + * were selected, this would corrupt DN_COMPLETE. No? + */ + prev_entry->status &= cpu_to_le32(~TxIntrUploaded); +#endif + } + outw(DownUnstall, ioaddr + EL3_CMD); + spin_unlock_irqrestore(&vp->lock, flags); + dev->trans_start = jiffies; + return 0; +} + +/* The interrupt handler does all of the Rx thread work and cleans up + after the Tx thread. */ + +/* + * This is the ISR for the vortex series chips. + * full_bus_master_tx == 0 && full_bus_master_rx == 0 + */ + +static void vortex_interrupt(int irq, void *dev_id, struct pt_regs *regs) +{ + struct net_device *dev = dev_id; + struct vortex_private *vp = (struct vortex_private *)dev->priv; + long ioaddr; + int status; + int work_done = max_interrupt_work; + + ioaddr = dev->base_addr; + spin_lock(&vp->lock); + + status = inw(ioaddr + EL3_STATUS); + + if (vortex_debug > 6) + printk("vortex_interrupt(). status=0x%4x\n", status); + + if ((status & IntLatch) == 0) + goto handler_exit; /* No interrupt: shared IRQs cause this */ + + if (status & IntReq) { + status |= vp->deferred; + vp->deferred = 0; + } + + if (status == 0xffff) /* h/w no longer present (hotplug)? */ + goto handler_exit; + + if (vortex_debug > 4) + printk(KERN_DEBUG "%s: interrupt, status %4.4x, latency %d ticks.\n", + dev->name, status, inb(ioaddr + Timer)); + + do { + if (vortex_debug > 5) + printk(KERN_DEBUG "%s: In interrupt loop, status %4.4x.\n", + dev->name, status); + if (status & RxComplete) + vortex_rx(dev); + + if (status & TxAvailable) { + if (vortex_debug > 5) + printk(KERN_DEBUG " TX room bit was handled.\n"); + /* There's room in the FIFO for a full-sized packet. */ + outw(AckIntr | TxAvailable, ioaddr + EL3_CMD); + netif_wake_queue (dev); + } + + if (status & DMADone) { + if (inw(ioaddr + Wn7_MasterStatus) & 0x1000) { + outw(0x1000, ioaddr + Wn7_MasterStatus); /* Ack the event. */ + pci_unmap_single(vp->pdev, vp->tx_skb_dma, (vp->tx_skb->len + 3) & ~3, PCI_DMA_TODEVICE); + dev_kfree_skb_irq(vp->tx_skb); /* Release the transferred buffer */ + if (inw(ioaddr + TxFree) > 1536) { + /* + * AKPM: FIXME: I don't think we need this. If the queue was stopped due to + * insufficient FIFO room, the TxAvailable test will succeed and call + * netif_wake_queue() + */ + netif_wake_queue(dev); + } else { /* Interrupt when FIFO has room for max-sized packet. */ + outw(SetTxThreshold + (1536>>2), ioaddr + EL3_CMD); + netif_stop_queue(dev); + } + } + } + /* Check for all uncommon interrupts at once. */ + if (status & (HostError | RxEarly | StatsFull | TxComplete | IntReq)) { + if (status == 0xffff) + break; + vortex_error(dev, status); + } + + if (--work_done < 0) { + printk(KERN_WARNING "%s: Too much work in interrupt, status " + "%4.4x.\n", dev->name, status); + /* Disable all pending interrupts. */ + do { + vp->deferred |= status; + outw(SetStatusEnb | (~vp->deferred & vp->status_enable), + ioaddr + EL3_CMD); + outw(AckIntr | (vp->deferred & 0x7ff), ioaddr + EL3_CMD); + } while ((status = inw(ioaddr + EL3_CMD)) & IntLatch); + /* The timer will reenable interrupts. */ + mod_timer(&vp->timer, jiffies + 1*HZ); + break; + } + /* Acknowledge the IRQ. */ + outw(AckIntr | IntReq | IntLatch, ioaddr + EL3_CMD); + } while ((status = inw(ioaddr + EL3_STATUS)) & (IntLatch | RxComplete)); + + if (vortex_debug > 4) + printk(KERN_DEBUG "%s: exiting interrupt, status %4.4x.\n", + dev->name, status); +handler_exit: + spin_unlock(&vp->lock); +} + +/* + * This is the ISR for the boomerang series chips. + * full_bus_master_tx == 1 && full_bus_master_rx == 1 + */ + +static void boomerang_interrupt(int irq, void *dev_id, struct pt_regs *regs) +{ + struct net_device *dev = dev_id; + struct vortex_private *vp = (struct vortex_private *)dev->priv; + long ioaddr; + int status; + int work_done = max_interrupt_work; + + ioaddr = dev->base_addr; + + /* + * It seems dopey to put the spinlock this early, but we could race against vortex_tx_timeout + * and boomerang_start_xmit + */ + spin_lock(&vp->lock); + + status = inw(ioaddr + EL3_STATUS); + + if (vortex_debug > 6) + printk(KERN_DEBUG "boomerang_interrupt. status=0x%4x\n", status); + + if ((status & IntLatch) == 0) + goto handler_exit; /* No interrupt: shared IRQs can cause this */ + + if (status == 0xffff) { /* h/w no longer present (hotplug)? */ + if (vortex_debug > 1) + printk(KERN_DEBUG "boomerang_interrupt(1): status = 0xffff\n"); + goto handler_exit; + } + + if (status & IntReq) { + status |= vp->deferred; + vp->deferred = 0; + } + + if (vortex_debug > 4) + printk(KERN_DEBUG "%s: interrupt, status %4.4x, latency %d ticks.\n", + dev->name, status, inb(ioaddr + Timer)); + do { + if (vortex_debug > 5) + printk(KERN_DEBUG "%s: In interrupt loop, status %4.4x.\n", + dev->name, status); + if (status & UpComplete) { + outw(AckIntr | UpComplete, ioaddr + EL3_CMD); + if (vortex_debug > 5) + printk(KERN_DEBUG "boomerang_interrupt->boomerang_rx\n"); + boomerang_rx(dev); + } + + if (status & DownComplete) { + unsigned int dirty_tx = vp->dirty_tx; + + outw(AckIntr | DownComplete, ioaddr + EL3_CMD); + while (vp->cur_tx - dirty_tx > 0) { + int entry = dirty_tx % TX_RING_SIZE; +#if 1 /* AKPM: the latter is faster, but cyclone-only */ + if (inl(ioaddr + DownListPtr) == + vp->tx_ring_dma + entry * sizeof(struct boom_tx_desc)) + break; /* It still hasn't been processed. */ +#else + if ((vp->tx_ring[entry].status & DN_COMPLETE) == 0) + break; /* It still hasn't been processed. */ +#endif + + if (vp->tx_skbuff[entry]) { + struct sk_buff *skb = vp->tx_skbuff[entry]; +#if DO_ZEROCOPY + int i; + for (i=0; i<=skb_shinfo(skb)->nr_frags; i++) + pci_unmap_single(vp->pdev, + le32_to_cpu(vp->tx_ring[entry].frag[i].addr), + le32_to_cpu(vp->tx_ring[entry].frag[i].length)&0xFFF, + PCI_DMA_TODEVICE); +#else + pci_unmap_single(vp->pdev, + le32_to_cpu(vp->tx_ring[entry].addr), skb->len, PCI_DMA_TODEVICE); +#endif + dev_kfree_skb_irq(skb); + vp->tx_skbuff[entry] = 0; + } else { + printk(KERN_DEBUG "boomerang_interrupt: no skb!\n"); + } + /* vp->stats.tx_packets++; Counted below. */ + dirty_tx++; + } + vp->dirty_tx = dirty_tx; + if (vp->cur_tx - dirty_tx <= TX_RING_SIZE - 1) { + if (vortex_debug > 6) + printk(KERN_DEBUG "boomerang_interrupt: wake queue\n"); + netif_wake_queue (dev); + } + } + + /* Check for all uncommon interrupts at once. */ + if (status & (HostError | RxEarly | StatsFull | TxComplete | IntReq)) + vortex_error(dev, status); + + if (--work_done < 0) { + printk(KERN_WARNING "%s: Too much work in interrupt, status " + "%4.4x.\n", dev->name, status); + /* Disable all pending interrupts. */ + do { + vp->deferred |= status; + outw(SetStatusEnb | (~vp->deferred & vp->status_enable), + ioaddr + EL3_CMD); + outw(AckIntr | (vp->deferred & 0x7ff), ioaddr + EL3_CMD); + } while ((status = inw(ioaddr + EL3_CMD)) & IntLatch); + /* The timer will reenable interrupts. */ + mod_timer(&vp->timer, jiffies + 1*HZ); + break; + } + /* Acknowledge the IRQ. */ + outw(AckIntr | IntReq | IntLatch, ioaddr + EL3_CMD); + if (vp->cb_fn_base) /* The PCMCIA people are idiots. */ + writel(0x8000, vp->cb_fn_base + 4); + + } while ((status = inw(ioaddr + EL3_STATUS)) & IntLatch); + + if (vortex_debug > 4) + printk(KERN_DEBUG "%s: exiting interrupt, status %4.4x.\n", + dev->name, status); +handler_exit: + spin_unlock(&vp->lock); +} + +static int vortex_rx(struct net_device *dev) +{ + struct vortex_private *vp = (struct vortex_private *)dev->priv; + long ioaddr = dev->base_addr; + int i; + short rx_status; + + if (vortex_debug > 5) + printk(KERN_DEBUG "vortex_rx(): status %4.4x, rx_status %4.4x.\n", + inw(ioaddr+EL3_STATUS), inw(ioaddr+RxStatus)); + while ((rx_status = inw(ioaddr + RxStatus)) > 0) { + if (rx_status & 0x4000) { /* Error, update stats. */ + unsigned char rx_error = inb(ioaddr + RxErrors); + if (vortex_debug > 2) + printk(KERN_DEBUG " Rx error: status %2.2x.\n", rx_error); + vp->stats.rx_errors++; + if (rx_error & 0x01) vp->stats.rx_over_errors++; + if (rx_error & 0x02) vp->stats.rx_length_errors++; + if (rx_error & 0x04) vp->stats.rx_frame_errors++; + if (rx_error & 0x08) vp->stats.rx_crc_errors++; + if (rx_error & 0x10) vp->stats.rx_length_errors++; + } else { + /* The packet length: up to 4.5K!. */ + int pkt_len = rx_status & 0x1fff; + struct sk_buff *skb; + + skb = dev_alloc_skb(pkt_len + 5); + if (vortex_debug > 4) + printk(KERN_DEBUG "Receiving packet size %d status %4.4x.\n", + pkt_len, rx_status); + if (skb != NULL) { + skb->dev = dev; + skb_reserve(skb, 2); /* Align IP on 16 byte boundaries */ + /* 'skb_put()' points to the start of sk_buff data area. */ + if (vp->bus_master && + ! (inw(ioaddr + Wn7_MasterStatus) & 0x8000)) { + dma_addr_t dma = pci_map_single(vp->pdev, skb_put(skb, pkt_len), + pkt_len, PCI_DMA_FROMDEVICE); + outl(dma, ioaddr + Wn7_MasterAddr); + outw((skb->len + 3) & ~3, ioaddr + Wn7_MasterLen); + outw(StartDMAUp, ioaddr + EL3_CMD); + while (inw(ioaddr + Wn7_MasterStatus) & 0x8000) + ; + pci_unmap_single(vp->pdev, dma, pkt_len, PCI_DMA_FROMDEVICE); + } else { + insl(ioaddr + RX_FIFO, skb_put(skb, pkt_len), + (pkt_len + 3) >> 2); + } + outw(RxDiscard, ioaddr + EL3_CMD); /* Pop top Rx packet. */ + skb->protocol = eth_type_trans(skb, dev); + netif_rx(skb); + dev->last_rx = jiffies; + vp->stats.rx_packets++; + /* Wait a limited time to go to next packet. */ + for (i = 200; i >= 0; i--) + if ( ! (inw(ioaddr + EL3_STATUS) & CmdInProgress)) + break; + continue; + } else if (vortex_debug > 0) + printk(KERN_NOTICE "%s: No memory to allocate a sk_buff of " + "size %d.\n", dev->name, pkt_len); + } + vp->stats.rx_dropped++; + issue_and_wait(dev, RxDiscard); + } + + return 0; +} + +static int +boomerang_rx(struct net_device *dev) +{ + struct vortex_private *vp = (struct vortex_private *)dev->priv; + int entry = vp->cur_rx % RX_RING_SIZE; + long ioaddr = dev->base_addr; + int rx_status; + int rx_work_limit = vp->dirty_rx + RX_RING_SIZE - vp->cur_rx; + + if (vortex_debug > 5) + printk(KERN_DEBUG "boomerang_rx(): status %4.4x\n", inw(ioaddr+EL3_STATUS)); + + while ((rx_status = le32_to_cpu(vp->rx_ring[entry].status)) & RxDComplete){ + if (--rx_work_limit < 0) + break; + if (rx_status & RxDError) { /* Error, update stats. */ + unsigned char rx_error = rx_status >> 16; + if (vortex_debug > 2) + printk(KERN_DEBUG " Rx error: status %2.2x.\n", rx_error); + vp->stats.rx_errors++; + if (rx_error & 0x01) vp->stats.rx_over_errors++; + if (rx_error & 0x02) vp->stats.rx_length_errors++; + if (rx_error & 0x04) vp->stats.rx_frame_errors++; + if (rx_error & 0x08) vp->stats.rx_crc_errors++; + if (rx_error & 0x10) vp->stats.rx_length_errors++; + } else { + /* The packet length: up to 4.5K!. */ + int pkt_len = rx_status & 0x1fff; + struct sk_buff *skb; + dma_addr_t dma = le32_to_cpu(vp->rx_ring[entry].addr); + + if (vortex_debug > 4) + printk(KERN_DEBUG "Receiving packet size %d status %4.4x.\n", + pkt_len, rx_status); + + /* Check if the packet is long enough to just accept without + copying to a properly sized skbuff. */ + if (pkt_len < rx_copybreak && (skb = dev_alloc_skb(pkt_len + 2)) != 0) { + skb->dev = dev; + skb_reserve(skb, 2); /* Align IP on 16 byte boundaries */ + pci_dma_sync_single(vp->pdev, dma, PKT_BUF_SZ, PCI_DMA_FROMDEVICE); + /* 'skb_put()' points to the start of sk_buff data area. */ + memcpy(skb_put(skb, pkt_len), + vp->rx_skbuff[entry]->tail, + pkt_len); + vp->rx_copy++; + } else { + /* Pass up the skbuff already on the Rx ring. */ + skb = vp->rx_skbuff[entry]; + vp->rx_skbuff[entry] = NULL; + skb_put(skb, pkt_len); + pci_unmap_single(vp->pdev, dma, PKT_BUF_SZ, PCI_DMA_FROMDEVICE); + vp->rx_nocopy++; + } + skb->protocol = eth_type_trans(skb, dev); + { /* Use hardware checksum info. */ + int csum_bits = rx_status & 0xee000000; + if (csum_bits && + (csum_bits == (IPChksumValid | TCPChksumValid) || + csum_bits == (IPChksumValid | UDPChksumValid))) { + skb->ip_summed = CHECKSUM_UNNECESSARY; + vp->rx_csumhits++; + } + } + netif_rx(skb); + dev->last_rx = jiffies; + vp->stats.rx_packets++; + } + entry = (++vp->cur_rx) % RX_RING_SIZE; + } + /* Refill the Rx ring buffers. */ + for (; vp->cur_rx - vp->dirty_rx > 0; vp->dirty_rx++) { + struct sk_buff *skb; + entry = vp->dirty_rx % RX_RING_SIZE; + if (vp->rx_skbuff[entry] == NULL) { + skb = dev_alloc_skb(PKT_BUF_SZ); + if (skb == NULL) { + static unsigned long last_jif; + if ((jiffies - last_jif) > 10 * HZ) { + printk(KERN_WARNING "%s: memory shortage\n", dev->name); + last_jif = jiffies; + } + if ((vp->cur_rx - vp->dirty_rx) == RX_RING_SIZE) + mod_timer(&vp->rx_oom_timer, RUN_AT(HZ * 1)); + break; /* Bad news! */ + } + skb->dev = dev; /* Mark as being used by this device. */ + skb_reserve(skb, 2); /* Align IP on 16 byte boundaries */ + vp->rx_ring[entry].addr = cpu_to_le32(pci_map_single(vp->pdev, skb->tail, PKT_BUF_SZ, PCI_DMA_FROMDEVICE)); + vp->rx_skbuff[entry] = skb; + } + vp->rx_ring[entry].status = 0; /* Clear complete bit. */ + outw(UpUnstall, ioaddr + EL3_CMD); + } + return 0; +} + +/* + * If we've hit a total OOM refilling the Rx ring we poll once a second + * for some memory. Otherwise there is no way to restart the rx process. + */ +static void +rx_oom_timer(unsigned long arg) +{ + struct net_device *dev = (struct net_device *)arg; + struct vortex_private *vp = (struct vortex_private *)dev->priv; + + spin_lock_irq(&vp->lock); + if ((vp->cur_rx - vp->dirty_rx) == RX_RING_SIZE) /* This test is redundant, but makes me feel good */ + boomerang_rx(dev); + if (vortex_debug > 1) { + printk(KERN_DEBUG "%s: rx_oom_timer %s\n", dev->name, + ((vp->cur_rx - vp->dirty_rx) != RX_RING_SIZE) ? "succeeded" : "retrying"); + } + spin_unlock_irq(&vp->lock); +} + +static void +vortex_down(struct net_device *dev) +{ + struct vortex_private *vp = (struct vortex_private *)dev->priv; + long ioaddr = dev->base_addr; + + netif_stop_queue (dev); + + del_timer_sync(&vp->rx_oom_timer); + del_timer_sync(&vp->timer); + + /* Turn off statistics ASAP. We update vp->stats below. */ + outw(StatsDisable, ioaddr + EL3_CMD); + + /* Disable the receiver and transmitter. */ + outw(RxDisable, ioaddr + EL3_CMD); + outw(TxDisable, ioaddr + EL3_CMD); + + if (dev->if_port == XCVR_10base2) + /* Turn off thinnet power. Green! */ + outw(StopCoax, ioaddr + EL3_CMD); + + outw(SetIntrEnb | 0x0000, ioaddr + EL3_CMD); + + update_stats(ioaddr, dev); + if (vp->full_bus_master_rx) + outl(0, ioaddr + UpListPtr); + if (vp->full_bus_master_tx) + outl(0, ioaddr + DownListPtr); + + if (vp->pdev && vp->enable_wol) { + pci_save_state(vp->pdev, vp->power_state); + acpi_set_WOL(dev); + } +} + +static int +vortex_close(struct net_device *dev) +{ + struct vortex_private *vp = (struct vortex_private *)dev->priv; + long ioaddr = dev->base_addr; + int i; + + if (netif_device_present(dev)) + vortex_down(dev); + + if (vortex_debug > 1) { + printk(KERN_DEBUG"%s: vortex_close() status %4.4x, Tx status %2.2x.\n", + dev->name, inw(ioaddr + EL3_STATUS), inb(ioaddr + TxStatus)); + printk(KERN_DEBUG "%s: vortex close stats: rx_nocopy %d rx_copy %d" + " tx_queued %d Rx pre-checksummed %d.\n", + dev->name, vp->rx_nocopy, vp->rx_copy, vp->queued_packet, vp->rx_csumhits); + } + +#if DO_ZEROCOPY + if ( vp->rx_csumhits && + ((vp->drv_flags & HAS_HWCKSM) == 0) && + (hw_checksums[vp->card_idx] == -1)) { + printk(KERN_WARNING "%s supports hardware checksums, and we're not using them!\n", dev->name); + printk(KERN_WARNING "Please see http://www.uow.edu.au/~andrewm/zerocopy.html\n"); + } +#endif + + free_irq(dev->irq, dev); + + if (vp->full_bus_master_rx) { /* Free Boomerang bus master Rx buffers. */ + for (i = 0; i < RX_RING_SIZE; i++) + if (vp->rx_skbuff[i]) { + pci_unmap_single( vp->pdev, le32_to_cpu(vp->rx_ring[i].addr), + PKT_BUF_SZ, PCI_DMA_FROMDEVICE); + dev_kfree_skb(vp->rx_skbuff[i]); + vp->rx_skbuff[i] = 0; + } + } + if (vp->full_bus_master_tx) { /* Free Boomerang bus master Tx buffers. */ + for (i = 0; i < TX_RING_SIZE; i++) { + if (vp->tx_skbuff[i]) { + struct sk_buff *skb = vp->tx_skbuff[i]; +#if DO_ZEROCOPY + int k; + + for (k=0; k<=skb_shinfo(skb)->nr_frags; k++) + pci_unmap_single(vp->pdev, + le32_to_cpu(vp->tx_ring[i].frag[k].addr), + le32_to_cpu(vp->tx_ring[i].frag[k].length)&0xFFF, + PCI_DMA_TODEVICE); +#else + pci_unmap_single(vp->pdev, le32_to_cpu(vp->tx_ring[i].addr), skb->len, PCI_DMA_TODEVICE); +#endif + dev_kfree_skb(skb); + vp->tx_skbuff[i] = 0; + } + } + } + + return 0; +} + +static void +dump_tx_ring(struct net_device *dev) +{ + if (vortex_debug > 0) { + struct vortex_private *vp = (struct vortex_private *)dev->priv; + long ioaddr = dev->base_addr; + + if (vp->full_bus_master_tx) { + int i; + int stalled = inl(ioaddr + PktStatus) & 0x04; /* Possible racy. But it's only debug stuff */ + + printk(KERN_ERR " Flags; bus-master %d, dirty %d(%d) current %d(%d)\n", + vp->full_bus_master_tx, + vp->dirty_tx, vp->dirty_tx % TX_RING_SIZE, + vp->cur_tx, vp->cur_tx % TX_RING_SIZE); + printk(KERN_ERR " Transmit list %8.8x vs. %p.\n", + inl(ioaddr + DownListPtr), + &vp->tx_ring[vp->dirty_tx % TX_RING_SIZE]); + issue_and_wait(dev, DownStall); + for (i = 0; i < TX_RING_SIZE; i++) { + printk(KERN_ERR " %d: @%p length %8.8x status %8.8x\n", i, + &vp->tx_ring[i], +#if DO_ZEROCOPY + le32_to_cpu(vp->tx_ring[i].frag[0].length), +#else + le32_to_cpu(vp->tx_ring[i].length), +#endif + le32_to_cpu(vp->tx_ring[i].status)); + } + if (!stalled) + outw(DownUnstall, ioaddr + EL3_CMD); + } + } +} + +static struct net_device_stats *vortex_get_stats(struct net_device *dev) +{ + struct vortex_private *vp = (struct vortex_private *)dev->priv; + unsigned long flags; + + if (netif_device_present(dev)) { /* AKPM: Used to be netif_running */ + spin_lock_irqsave (&vp->lock, flags); + update_stats(dev->base_addr, dev); + spin_unlock_irqrestore (&vp->lock, flags); + } + return &vp->stats; +} + +/* Update statistics. + Unlike with the EL3 we need not worry about interrupts changing + the window setting from underneath us, but we must still guard + against a race condition with a StatsUpdate interrupt updating the + table. This is done by checking that the ASM (!) code generated uses + atomic updates with '+='. + */ +static void update_stats(long ioaddr, struct net_device *dev) +{ + struct vortex_private *vp = (struct vortex_private *)dev->priv; + int old_window = inw(ioaddr + EL3_CMD); + + if (old_window == 0xffff) /* Chip suspended or ejected. */ + return; + /* Unlike the 3c5x9 we need not turn off stats updates while reading. */ + /* Switch to the stats window, and read everything. */ + EL3WINDOW(6); + vp->stats.tx_carrier_errors += inb(ioaddr + 0); + vp->stats.tx_heartbeat_errors += inb(ioaddr + 1); + /* Multiple collisions. */ inb(ioaddr + 2); + vp->stats.collisions += inb(ioaddr + 3); + vp->stats.tx_window_errors += inb(ioaddr + 4); + vp->stats.rx_fifo_errors += inb(ioaddr + 5); + vp->stats.tx_packets += inb(ioaddr + 6); + vp->stats.tx_packets += (inb(ioaddr + 9)&0x30) << 4; + /* Rx packets */ inb(ioaddr + 7); /* Must read to clear */ + /* Tx deferrals */ inb(ioaddr + 8); + /* Don't bother with register 9, an extension of registers 6&7. + If we do use the 6&7 values the atomic update assumption above + is invalid. */ + vp->stats.rx_bytes += inw(ioaddr + 10); + vp->stats.tx_bytes += inw(ioaddr + 12); + /* New: On the Vortex we must also clear the BadSSD counter. */ + EL3WINDOW(4); + inb(ioaddr + 12); + + { + u8 up = inb(ioaddr + 13); + vp->stats.rx_bytes += (up & 0x0f) << 16; + vp->stats.tx_bytes += (up & 0xf0) << 12; + } + + EL3WINDOW(old_window >> 13); + return; +} + + +static int netdev_ethtool_ioctl(struct net_device *dev, void *useraddr) +{ + struct vortex_private *vp = dev->priv; + u32 ethcmd; + + if (copy_from_user(ðcmd, useraddr, sizeof(ethcmd))) + return -EFAULT; + + switch (ethcmd) { + case ETHTOOL_GDRVINFO: { + struct ethtool_drvinfo info = {ETHTOOL_GDRVINFO}; + strcpy(info.driver, DRV_NAME); + strcpy(info.version, DRV_VERSION); + if (vp->pdev) + strcpy(info.bus_info, vp->pdev->slot_name); + else + sprintf(info.bus_info, "EISA 0x%lx %d", + dev->base_addr, dev->irq); + if (copy_to_user(useraddr, &info, sizeof(info))) + return -EFAULT; + return 0; + } + + } + + return -EOPNOTSUPP; +} + +static int vortex_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) +{ + struct vortex_private *vp = (struct vortex_private *)dev->priv; + long ioaddr = dev->base_addr; + struct mii_ioctl_data *data = (struct mii_ioctl_data *)&rq->ifr_data; + int phy = vp->phys[0] & 0x1f; + int retval; + + switch(cmd) { + case SIOCETHTOOL: + return netdev_ethtool_ioctl(dev, (void *) rq->ifr_data); + + case SIOCGMIIPHY: /* Get address of MII PHY in use. */ + case SIOCDEVPRIVATE: /* for binary compat, remove in 2.5 */ + data->phy_id = phy; + + case SIOCGMIIREG: /* Read MII PHY register. */ + case SIOCDEVPRIVATE+1: /* for binary compat, remove in 2.5 */ + EL3WINDOW(4); + data->val_out = mdio_read(dev, data->phy_id & 0x1f, data->reg_num & 0x1f); + retval = 0; + break; + + case SIOCSMIIREG: /* Write MII PHY register. */ + case SIOCDEVPRIVATE+2: /* for binary compat, remove in 2.5 */ + if (!capable(CAP_NET_ADMIN)) { + retval = -EPERM; + } else { + EL3WINDOW(4); + mdio_write(dev, data->phy_id & 0x1f, data->reg_num & 0x1f, data->val_in); + retval = 0; + } + break; + default: + retval = -EOPNOTSUPP; + break; + } + + return retval; +} + +/* Pre-Cyclone chips have no documented multicast filter, so the only + multicast setting is to receive all multicast frames. At least + the chip has a very clean way to set the mode, unlike many others. */ +static void set_rx_mode(struct net_device *dev) +{ + long ioaddr = dev->base_addr; + int new_mode; + + if (dev->flags & IFF_PROMISC) { + if (vortex_debug > 0) + printk(KERN_NOTICE "%s: Setting promiscuous mode.\n", dev->name); + new_mode = SetRxFilter|RxStation|RxMulticast|RxBroadcast|RxProm; + } else if ((dev->mc_list) || (dev->flags & IFF_ALLMULTI)) { + new_mode = SetRxFilter|RxStation|RxMulticast|RxBroadcast; + } else + new_mode = SetRxFilter | RxStation | RxBroadcast; + + outw(new_mode, ioaddr + EL3_CMD); +} + +/* MII transceiver control section. + Read and write the MII registers using software-generated serial + MDIO protocol. See the MII specifications or DP83840A data sheet + for details. */ + +/* The maximum data clock rate is 2.5 Mhz. The minimum timing is usually + met by back-to-back PCI I/O cycles, but we insert a delay to avoid + "overclocking" issues. */ +#define mdio_delay() inl(mdio_addr) + +#define MDIO_SHIFT_CLK 0x01 +#define MDIO_DIR_WRITE 0x04 +#define MDIO_DATA_WRITE0 (0x00 | MDIO_DIR_WRITE) +#define MDIO_DATA_WRITE1 (0x02 | MDIO_DIR_WRITE) +#define MDIO_DATA_READ 0x02 +#define MDIO_ENB_IN 0x00 + +/* Generate the preamble required for initial synchronization and + a few older transceivers. */ +static void mdio_sync(long ioaddr, int bits) +{ + long mdio_addr = ioaddr + Wn4_PhysicalMgmt; + + /* Establish sync by sending at least 32 logic ones. */ + while (-- bits >= 0) { + outw(MDIO_DATA_WRITE1, mdio_addr); + mdio_delay(); + outw(MDIO_DATA_WRITE1 | MDIO_SHIFT_CLK, mdio_addr); + mdio_delay(); + } +} + +static int mdio_read(struct net_device *dev, int phy_id, int location) +{ + struct vortex_private *vp = (struct vortex_private *)dev->priv; + int i; + long ioaddr = dev->base_addr; + int read_cmd = (0xf6 << 10) | (phy_id << 5) | location; + unsigned int retval = 0; + long mdio_addr = ioaddr + Wn4_PhysicalMgmt; + + spin_lock_bh(&vp->mdio_lock); + + if (mii_preamble_required) + mdio_sync(ioaddr, 32); + + /* Shift the read command bits out. */ + for (i = 14; i >= 0; i--) { + int dataval = (read_cmd&(1< 0; i--) { + outw(MDIO_ENB_IN, mdio_addr); + mdio_delay(); + retval = (retval << 1) | ((inw(mdio_addr) & MDIO_DATA_READ) ? 1 : 0); + outw(MDIO_ENB_IN | MDIO_SHIFT_CLK, mdio_addr); + mdio_delay(); + } + spin_unlock_bh(&vp->mdio_lock); + return retval & 0x20000 ? 0xffff : retval>>1 & 0xffff; +} + +static void mdio_write(struct net_device *dev, int phy_id, int location, int value) +{ + struct vortex_private *vp = (struct vortex_private *)dev->priv; + long ioaddr = dev->base_addr; + int write_cmd = 0x50020000 | (phy_id << 23) | (location << 18) | value; + long mdio_addr = ioaddr + Wn4_PhysicalMgmt; + int i; + + spin_lock_bh(&vp->mdio_lock); + + if (mii_preamble_required) + mdio_sync(ioaddr, 32); + + /* Shift the command bits out. */ + for (i = 31; i >= 0; i--) { + int dataval = (write_cmd&(1<= 0; i--) { + outw(MDIO_ENB_IN, mdio_addr); + mdio_delay(); + outw(MDIO_ENB_IN | MDIO_SHIFT_CLK, mdio_addr); + mdio_delay(); + } + spin_unlock_bh(&vp->mdio_lock); + return; +} + +/* ACPI: Advanced Configuration and Power Interface. */ +/* Set Wake-On-LAN mode and put the board into D3 (power-down) state. */ +static void acpi_set_WOL(struct net_device *dev) +{ + struct vortex_private *vp = (struct vortex_private *)dev->priv; + long ioaddr = dev->base_addr; + + /* Power up on: 1==Downloaded Filter, 2==Magic Packets, 4==Link Status. */ + EL3WINDOW(7); + outw(2, ioaddr + 0x0c); + /* The RxFilter must accept the WOL frames. */ + outw(SetRxFilter|RxStation|RxMulticast|RxBroadcast, ioaddr + EL3_CMD); + outw(RxEnable, ioaddr + EL3_CMD); + + /* Change the power state to D3; RxEnable doesn't take effect. */ + pci_enable_wake(vp->pdev, 0, 1); + pci_set_power_state(vp->pdev, 3); +} + + +static void __devexit vortex_remove_one (struct pci_dev *pdev) +{ + struct net_device *dev = pci_get_drvdata(pdev); + struct vortex_private *vp; + + if (!dev) { + printk("vortex_remove_one called for EISA device!\n"); + BUG(); + } + + vp = dev->priv; + + /* AKPM: FIXME: we should have + * if (vp->cb_fn_base) iounmap(vp->cb_fn_base); + * here + */ + unregister_netdev(dev); + /* Should really use issue_and_wait() here */ + outw(TotalReset|0x14, dev->base_addr + EL3_CMD); + + if (vp->pdev && vp->enable_wol) { + pci_set_power_state(vp->pdev, 0); /* Go active */ + if (vp->pm_state_valid) + pci_restore_state(vp->pdev, vp->power_state); + } + + pci_free_consistent(pdev, + sizeof(struct boom_rx_desc) * RX_RING_SIZE + + sizeof(struct boom_tx_desc) * TX_RING_SIZE, + vp->rx_ring, + vp->rx_ring_dma); + if (vp->must_free_region) + release_region(dev->base_addr, vp->io_size); + kfree(dev); +} + + +static struct pci_driver vortex_driver = { + name: "3c59x", + probe: vortex_init_one, + remove: __devexit_p(vortex_remove_one), + id_table: vortex_pci_tbl, +#ifdef CONFIG_PM + suspend: vortex_suspend, + resume: vortex_resume, +#endif +}; + + +static int vortex_have_pci; +static int vortex_have_eisa; + + +static int __init vortex_init (void) +{ + int pci_rc, eisa_rc; + + pci_rc = pci_module_init(&vortex_driver); + eisa_rc = vortex_eisa_init(); + + if (pci_rc == 0) + vortex_have_pci = 1; + if (eisa_rc > 0) + vortex_have_eisa = 1; + + return (vortex_have_pci + vortex_have_eisa) ? 0 : -ENODEV; +} + + +static void __exit vortex_eisa_cleanup (void) +{ + struct net_device *dev, *tmp; + struct vortex_private *vp; + long ioaddr; + + dev = root_vortex_eisa_dev; + + while (dev) { + vp = dev->priv; + ioaddr = dev->base_addr; + + unregister_netdev (dev); + outw (TotalReset, ioaddr + EL3_CMD); + release_region (ioaddr, VORTEX_TOTAL_SIZE); + + tmp = dev; + dev = vp->next_module; + + kfree (tmp); + } +} + + +static void __exit vortex_cleanup (void) +{ + if (vortex_have_pci) + pci_unregister_driver (&vortex_driver); + if (vortex_have_eisa) + vortex_eisa_cleanup (); +} + + +module_init(vortex_init); +module_exit(vortex_cleanup); diff --git a/xen-2.4.16/drivers/net/8139cp.c b/xen-2.4.16/drivers/net/8139cp.c new file mode 100644 index 0000000000..c1e2f19d8a --- /dev/null +++ b/xen-2.4.16/drivers/net/8139cp.c @@ -0,0 +1,1334 @@ +/* 8139cp.c: A Linux PCI Ethernet driver for the RealTek 8139C+ chips. */ +/* + Copyright 2001 Jeff Garzik + + Copyright (C) 2000, 2001 David S. Miller (davem@redhat.com) [sungem.c] + Copyright 2001 Manfred Spraul [natsemi.c] + Copyright 1999-2001 by Donald Becker. [natsemi.c] + Written 1997-2001 by Donald Becker. [8139too.c] + Copyright 1998-2001 by Jes Sorensen, . [acenic.c] + + This software may be used and distributed according to the terms of + the GNU General Public License (GPL), incorporated herein by reference. + Drivers based on or derived from this code fall under the GPL and must + retain the authorship, copyright and license notice. This file is not + a complete program and may only be used when the entire operating + system is licensed under the GPL. + + See the file COPYING in this distribution for more information. + + TODO, in rough priority order: + * dev->tx_timeout + * LinkChg interrupt + * ETHTOOL_[GS]SET + * Support forcing media type with a module parameter, + like dl2k.c/sundance.c + * Implement PCI suspend/resume + * Constants (module parms?) for Rx work limit + * support 64-bit PCI DMA + * Complete reset on PciErr + * Consider Rx interrupt mitigation using TimerIntr + * Implement 8139C+ statistics dump; maybe not... + h/w stats can be reset only by software reset + * Rx checksumming + * Tx checksumming + * ETHTOOL_GREGS, ETHTOOL_[GS]WOL, + ETHTOOL_[GS]MSGLVL, ETHTOOL_NWAY_RST + * Jumbo frames / dev->change_mtu + * Investigate using skb->priority with h/w VLAN priority + * Investigate using High Priority Tx Queue with skb->priority + * Adjust Rx FIFO threshold and Max Rx DMA burst on Rx FIFO error + * Adjust Tx FIFO threshold and Max Tx DMA burst on Tx FIFO error + + */ + +#define DRV_NAME "8139cp" +#define DRV_VERSION "0.0.5" +#define DRV_RELDATE "Oct 19, 2001" + + +#include +#include +//#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* These identify the driver base version and may not be removed. */ +static char version[] __devinitdata = +KERN_INFO DRV_NAME " 10/100 PCI Ethernet driver v" DRV_VERSION " (" DRV_RELDATE ")\n"; + +MODULE_AUTHOR("Jeff Garzik "); +MODULE_DESCRIPTION("RealTek RTL-8139C+ series 10/100 PCI Ethernet driver"); +MODULE_LICENSE("GPL"); + +static int debug = -1; +MODULE_PARM (debug, "i"); +MODULE_PARM_DESC (debug, "8139cp bitmapped message enable number"); + +/* Maximum number of multicast addresses to filter (vs. Rx-all-multicast). + The RTL chips use a 64 element hash table based on the Ethernet CRC. */ +static int multicast_filter_limit = 32; +MODULE_PARM (multicast_filter_limit, "i"); +MODULE_PARM_DESC (multicast_filter_limit, "8139cp maximum number of filtered multicast addresses"); + +/* Set the copy breakpoint for the copy-only-tiny-buffer Rx structure. */ +#if defined(__alpha__) || defined(__arm__) || defined(__hppa__) \ + || defined(__sparc_) || defined(__ia64__) \ + || defined(__sh__) || defined(__mips__) +static int rx_copybreak = 1518; +#else +static int rx_copybreak = 100; +#endif +MODULE_PARM (rx_copybreak, "i"); +MODULE_PARM_DESC (rx_copybreak, "8139cp Breakpoint at which Rx packets are copied"); + +#define PFX DRV_NAME ": " + +#define CP_DEF_MSG_ENABLE (NETIF_MSG_DRV | \ + NETIF_MSG_PROBE | \ + NETIF_MSG_LINK) +#define CP_REGS_SIZE (0xff + 1) +#define CP_RX_RING_SIZE 64 +#define CP_TX_RING_SIZE 64 +#define CP_RING_BYTES \ + ((sizeof(struct cp_desc) * CP_RX_RING_SIZE) + \ + (sizeof(struct cp_desc) * CP_TX_RING_SIZE)) +#define NEXT_TX(N) (((N) + 1) & (CP_TX_RING_SIZE - 1)) +#define NEXT_RX(N) (((N) + 1) & (CP_RX_RING_SIZE - 1)) +#define TX_BUFFS_AVAIL(CP) \ + (((CP)->tx_tail <= (CP)->tx_head) ? \ + (CP)->tx_tail + (CP_TX_RING_SIZE - 1) - (CP)->tx_head : \ + (CP)->tx_tail - (CP)->tx_head - 1) +#define CP_CHIP_VERSION 0x76 + +#define PKT_BUF_SZ 1536 /* Size of each temporary Rx buffer.*/ +#define RX_OFFSET 2 + +/* The following settings are log_2(bytes)-4: 0 == 16 bytes .. 6==1024, 7==end of packet. */ +#define RX_FIFO_THRESH 5 /* Rx buffer level before first PCI xfer. */ +#define RX_DMA_BURST 4 /* Maximum PCI burst, '4' is 256 */ +#define TX_DMA_BURST 6 /* Maximum PCI burst, '6' is 1024 */ +#define TX_EARLY_THRESH 256 /* Early Tx threshold, in bytes */ + +/* Time in jiffies before concluding the transmitter is hung. */ +#define TX_TIMEOUT (6*HZ) + + +enum { + /* NIC register offsets */ + MAC0 = 0x00, /* Ethernet hardware address. */ + MAR0 = 0x08, /* Multicast filter. */ + TxRingAddr = 0x20, /* 64-bit start addr of Tx ring */ + HiTxRingAddr = 0x28, /* 64-bit start addr of high priority Tx ring */ + Cmd = 0x37, /* Command register */ + IntrMask = 0x3C, /* Interrupt mask */ + IntrStatus = 0x3E, /* Interrupt status */ + TxConfig = 0x40, /* Tx configuration */ + ChipVersion = 0x43, /* 8-bit chip version, inside TxConfig */ + RxConfig = 0x44, /* Rx configuration */ + Cfg9346 = 0x50, /* EEPROM select/control; Cfg reg [un]lock */ + Config1 = 0x52, /* Config1 */ + Config3 = 0x59, /* Config3 */ + Config4 = 0x5A, /* Config4 */ + MultiIntr = 0x5C, /* Multiple interrupt select */ + Config5 = 0xD8, /* Config5 */ + TxPoll = 0xD9, /* Tell chip to check Tx descriptors for work */ + CpCmd = 0xE0, /* C+ Command register (C+ mode only) */ + RxRingAddr = 0xE4, /* 64-bit start addr of Rx ring */ + TxThresh = 0xEC, /* Early Tx threshold */ + OldRxBufAddr = 0x30, /* DMA address of Rx ring buffer (C mode) */ + OldTSD0 = 0x10, /* DMA address of first Tx desc (C mode) */ + + /* Tx and Rx status descriptors */ + DescOwn = (1 << 31), /* Descriptor is owned by NIC */ + RingEnd = (1 << 30), /* End of descriptor ring */ + FirstFrag = (1 << 29), /* First segment of a packet */ + LastFrag = (1 << 28), /* Final segment of a packet */ + TxError = (1 << 23), /* Tx error summary */ + RxError = (1 << 20), /* Rx error summary */ + IPCS = (1 << 18), /* Calculate IP checksum */ + UDPCS = (1 << 17), /* Calculate UDP/IP checksum */ + TCPCS = (1 << 16), /* Calculate TCP/IP checksum */ + IPFail = (1 << 15), /* IP checksum failed */ + UDPFail = (1 << 14), /* UDP/IP checksum failed */ + TCPFail = (1 << 13), /* TCP/IP checksum failed */ + NormalTxPoll = (1 << 6), /* One or more normal Tx packets to send */ + PID1 = (1 << 17), /* 2 protocol id bits: 0==non-IP, */ + PID0 = (1 << 16), /* 1==UDP/IP, 2==TCP/IP, 3==IP */ + TxFIFOUnder = (1 << 25), /* Tx FIFO underrun */ + TxOWC = (1 << 22), /* Tx Out-of-window collision */ + TxLinkFail = (1 << 21), /* Link failed during Tx of packet */ + TxMaxCol = (1 << 20), /* Tx aborted due to excessive collisions */ + TxColCntShift = 16, /* Shift, to get 4-bit Tx collision cnt */ + TxColCntMask = 0x01 | 0x02 | 0x04 | 0x08, /* 4-bit collision count */ + RxErrFrame = (1 << 27), /* Rx frame alignment error */ + RxMcast = (1 << 26), /* Rx multicast packet rcv'd */ + RxErrCRC = (1 << 18), /* Rx CRC error */ + RxErrRunt = (1 << 19), /* Rx error, packet < 64 bytes */ + RxErrLong = (1 << 21), /* Rx error, packet > 4096 bytes */ + RxErrFIFO = (1 << 22), /* Rx error, FIFO overflowed, pkt bad */ + + /* RxConfig register */ + RxCfgFIFOShift = 13, /* Shift, to get Rx FIFO thresh value */ + RxCfgDMAShift = 8, /* Shift, to get Rx Max DMA value */ + AcceptErr = 0x20, /* Accept packets with CRC errors */ + AcceptRunt = 0x10, /* Accept runt (<64 bytes) packets */ + AcceptBroadcast = 0x08, /* Accept broadcast packets */ + AcceptMulticast = 0x04, /* Accept multicast packets */ + AcceptMyPhys = 0x02, /* Accept pkts with our MAC as dest */ + AcceptAllPhys = 0x01, /* Accept all pkts w/ physical dest */ + + /* IntrMask / IntrStatus registers */ + PciErr = (1 << 15), /* System error on the PCI bus */ + TimerIntr = (1 << 14), /* Asserted when TCTR reaches TimerInt value */ + LenChg = (1 << 13), /* Cable length change */ + SWInt = (1 << 8), /* Software-requested interrupt */ + TxEmpty = (1 << 7), /* No Tx descriptors available */ + RxFIFOOvr = (1 << 6), /* Rx FIFO Overflow */ + LinkChg = (1 << 5), /* Packet underrun, or link change */ + RxEmpty = (1 << 4), /* No Rx descriptors available */ + TxErr = (1 << 3), /* Tx error */ + TxOK = (1 << 2), /* Tx packet sent */ + RxErr = (1 << 1), /* Rx error */ + RxOK = (1 << 0), /* Rx packet received */ + IntrResvd = (1 << 10), /* reserved, according to RealTek engineers, + but hardware likes to raise it */ + + IntrAll = PciErr | TimerIntr | LenChg | SWInt | TxEmpty | + RxFIFOOvr | LinkChg | RxEmpty | TxErr | TxOK | + RxErr | RxOK | IntrResvd, + + /* C mode command register */ + CmdReset = (1 << 4), /* Enable to reset; self-clearing */ + RxOn = (1 << 3), /* Rx mode enable */ + TxOn = (1 << 2), /* Tx mode enable */ + + /* C+ mode command register */ + RxChkSum = (1 << 5), /* Rx checksum offload enable */ + PCIMulRW = (1 << 3), /* Enable PCI read/write multiple */ + CpRxOn = (1 << 1), /* Rx mode enable */ + CpTxOn = (1 << 0), /* Tx mode enable */ + + /* Cfg9436 EEPROM control register */ + Cfg9346_Lock = 0x00, /* Lock ConfigX/MII register access */ + Cfg9346_Unlock = 0xC0, /* Unlock ConfigX/MII register access */ + + /* TxConfig register */ + IFG = (1 << 25) | (1 << 24), /* standard IEEE interframe gap */ + TxDMAShift = 8, /* DMA burst value (0-7) is shift this many bits */ + + /* Early Tx Threshold register */ + TxThreshMask = 0x3f, /* Mask bits 5-0 */ + TxThreshMax = 2048, /* Max early Tx threshold */ + + /* Config1 register */ + DriverLoaded = (1 << 5), /* Software marker, driver is loaded */ + PMEnable = (1 << 0), /* Enable various PM features of chip */ + + /* Config3 register */ + PARMEnable = (1 << 6), /* Enable auto-loading of PHY parms */ + + /* Config5 register */ + PMEStatus = (1 << 0), /* PME status can be reset by PCI RST# */ +}; + +static const unsigned int cp_intr_mask = + PciErr | LinkChg | + RxOK | RxErr | RxEmpty | RxFIFOOvr | + TxOK | TxErr | TxEmpty; + +static const unsigned int cp_rx_config = + (RX_FIFO_THRESH << RxCfgFIFOShift) | + (RX_DMA_BURST << RxCfgDMAShift); + +struct cp_desc { + u32 opts1; + u32 opts2; + u32 addr_lo; + u32 addr_hi; +}; + +struct ring_info { + struct sk_buff *skb; + dma_addr_t mapping; + unsigned frag; +}; + +struct cp_extra_stats { + unsigned long rx_frags; +}; + +struct cp_private { + unsigned tx_head; + unsigned tx_tail; + unsigned rx_tail; + + void *regs; + struct net_device *dev; + spinlock_t lock; + + struct cp_desc *rx_ring; + struct cp_desc *tx_ring; + struct ring_info tx_skb[CP_TX_RING_SIZE]; + struct ring_info rx_skb[CP_RX_RING_SIZE]; + unsigned rx_buf_sz; + dma_addr_t ring_dma; + + u32 msg_enable; + + struct net_device_stats net_stats; + struct cp_extra_stats cp_stats; + + struct pci_dev *pdev; + u32 rx_config; + + struct sk_buff *frag_skb; + unsigned dropping_frag : 1; +}; + +#define cpr8(reg) readb(cp->regs + (reg)) +#define cpr16(reg) readw(cp->regs + (reg)) +#define cpr32(reg) readl(cp->regs + (reg)) +#define cpw8(reg,val) writeb((val), cp->regs + (reg)) +#define cpw16(reg,val) writew((val), cp->regs + (reg)) +#define cpw32(reg,val) writel((val), cp->regs + (reg)) +#define cpw8_f(reg,val) do { \ + writeb((val), cp->regs + (reg)); \ + readb(cp->regs + (reg)); \ + } while (0) +#define cpw16_f(reg,val) do { \ + writew((val), cp->regs + (reg)); \ + readw(cp->regs + (reg)); \ + } while (0) +#define cpw32_f(reg,val) do { \ + writel((val), cp->regs + (reg)); \ + readl(cp->regs + (reg)); \ + } while (0) + + +static void __cp_set_rx_mode (struct net_device *dev); +static void cp_tx (struct cp_private *cp); +static void cp_clean_rings (struct cp_private *cp); + + +static struct pci_device_id cp_pci_tbl[] __devinitdata = { + { PCI_VENDOR_ID_REALTEK, PCI_DEVICE_ID_REALTEK_8139, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, + { }, +}; +MODULE_DEVICE_TABLE(pci, cp_pci_tbl); + +static inline void cp_rx_skb (struct cp_private *cp, struct sk_buff *skb) +{ + skb->protocol = eth_type_trans (skb, cp->dev); + + cp->net_stats.rx_packets++; + cp->net_stats.rx_bytes += skb->len; + cp->dev->last_rx = jiffies; + netif_rx (skb); +} + +static inline void cp_rx_err_acct (struct cp_private *cp, unsigned rx_tail, + u32 status, u32 len) +{ + if (netif_msg_rx_err (cp)) + printk (KERN_DEBUG + "%s: rx err, slot %d status 0x%x len %d\n", + cp->dev->name, rx_tail, status, len); + cp->net_stats.rx_errors++; + if (status & RxErrFrame) + cp->net_stats.rx_frame_errors++; + if (status & RxErrCRC) + cp->net_stats.rx_crc_errors++; + if (status & RxErrRunt) + cp->net_stats.rx_length_errors++; + if (status & RxErrLong) + cp->net_stats.rx_length_errors++; + if (status & RxErrFIFO) + cp->net_stats.rx_fifo_errors++; +} + +static void cp_rx_frag (struct cp_private *cp, unsigned rx_tail, + struct sk_buff *skb, u32 status, u32 len) +{ + struct sk_buff *copy_skb, *frag_skb = cp->frag_skb; + unsigned orig_len = frag_skb ? frag_skb->len : 0; + unsigned target_len = orig_len + len; + unsigned first_frag = status & FirstFrag; + unsigned last_frag = status & LastFrag; + + if (netif_msg_rx_status (cp)) + printk (KERN_DEBUG "%s: rx %s%sfrag, slot %d status 0x%x len %d\n", + cp->dev->name, + cp->dropping_frag ? "dropping " : "", + first_frag ? "first " : + last_frag ? "last " : "", + rx_tail, status, len); + + cp->cp_stats.rx_frags++; + + if (!frag_skb && !first_frag) + cp->dropping_frag = 1; + if (cp->dropping_frag) + goto drop_frag; + + copy_skb = dev_alloc_skb (target_len + RX_OFFSET); + if (!copy_skb) { + printk(KERN_WARNING "%s: rx slot %d alloc failed\n", + cp->dev->name, rx_tail); + + cp->dropping_frag = 1; +drop_frag: + if (frag_skb) { + dev_kfree_skb_irq(frag_skb); + cp->frag_skb = NULL; + } + if (last_frag) { + cp->net_stats.rx_dropped++; + cp->dropping_frag = 0; + } + return; + } + + copy_skb->dev = cp->dev; + skb_reserve(copy_skb, RX_OFFSET); + skb_put(copy_skb, target_len); + if (frag_skb) { + memcpy(copy_skb->data, frag_skb->data, orig_len); + dev_kfree_skb_irq(frag_skb); + } + pci_dma_sync_single(cp->pdev, cp->rx_skb[rx_tail].mapping, + len, PCI_DMA_FROMDEVICE); + memcpy(copy_skb->data + orig_len, skb->data, len); + + copy_skb->ip_summed = CHECKSUM_NONE; + + if (last_frag) { + if (status & (RxError | RxErrFIFO)) { + cp_rx_err_acct(cp, rx_tail, status, len); + dev_kfree_skb_irq(copy_skb); + } else + cp_rx_skb(cp, copy_skb); + cp->frag_skb = NULL; + } else { + cp->frag_skb = copy_skb; + } +} + +static void cp_rx (struct cp_private *cp) +{ + unsigned rx_tail = cp->rx_tail; + unsigned rx_work = 100; + + while (rx_work--) { + u32 status, len; + dma_addr_t mapping; + struct sk_buff *skb, *copy_skb; + unsigned copying_skb, buflen; + + skb = cp->rx_skb[rx_tail].skb; + if (!skb) + BUG(); + rmb(); + status = le32_to_cpu(cp->rx_ring[rx_tail].opts1); + if (status & DescOwn) + break; + + len = (status & 0x1fff) - 4; + mapping = cp->rx_skb[rx_tail].mapping; + + if ((status & (FirstFrag | LastFrag)) != (FirstFrag | LastFrag)) { + cp_rx_frag(cp, rx_tail, skb, status, len); + goto rx_next; + } + + if (status & (RxError | RxErrFIFO)) { + cp_rx_err_acct(cp, rx_tail, status, len); + goto rx_next; + } + + copying_skb = (len <= rx_copybreak); + + if (netif_msg_rx_status(cp)) + printk(KERN_DEBUG "%s: rx slot %d status 0x%x len %d copying? %d\n", + cp->dev->name, rx_tail, status, len, + copying_skb); + + buflen = copying_skb ? len : cp->rx_buf_sz; + copy_skb = dev_alloc_skb (buflen + RX_OFFSET); + if (!copy_skb) { + cp->net_stats.rx_dropped++; + goto rx_next; + } + + skb_reserve(copy_skb, RX_OFFSET); + copy_skb->dev = cp->dev; + + if (!copying_skb) { + pci_unmap_single(cp->pdev, mapping, + buflen, PCI_DMA_FROMDEVICE); + skb->ip_summed = CHECKSUM_NONE; + skb_trim(skb, len); + + mapping = + cp->rx_skb[rx_tail].mapping = + pci_map_single(cp->pdev, copy_skb->data, + buflen, PCI_DMA_FROMDEVICE); + cp->rx_skb[rx_tail].skb = copy_skb; + skb_put(copy_skb, buflen); + } else { + skb_put(copy_skb, len); + pci_dma_sync_single(cp->pdev, mapping, len, PCI_DMA_FROMDEVICE); + memcpy(copy_skb->data, skb->data, len); + + /* We'll reuse the original ring buffer. */ + skb = copy_skb; + } + + cp_rx_skb(cp, skb); + +rx_next: + if (rx_tail == (CP_RX_RING_SIZE - 1)) + cp->rx_ring[rx_tail].opts1 = + cpu_to_le32(DescOwn | RingEnd | cp->rx_buf_sz); + else + cp->rx_ring[rx_tail].opts1 = + cpu_to_le32(DescOwn | cp->rx_buf_sz); + cp->rx_ring[rx_tail].opts2 = 0; + cp->rx_ring[rx_tail].addr_lo = cpu_to_le32(mapping); + rx_tail = NEXT_RX(rx_tail); + } + + if (!rx_work) + printk(KERN_WARNING "%s: rx work limit reached\n", cp->dev->name); + + cp->rx_tail = rx_tail; +} + +static void cp_interrupt (int irq, void *dev_instance, struct pt_regs *regs) +{ + struct net_device *dev = dev_instance; + struct cp_private *cp = dev->priv; + u16 status; + + status = cpr16(IntrStatus); + if (!status || (status == 0xFFFF)) + return; + + if (netif_msg_intr(cp)) + printk(KERN_DEBUG "%s: intr, status %04x cmd %02x cpcmd %04x\n", + dev->name, status, cpr8(Cmd), cpr16(CpCmd)); + + spin_lock(&cp->lock); + + if (status & (RxOK | RxErr | RxEmpty | RxFIFOOvr)) + cp_rx(cp); + if (status & (TxOK | TxErr | TxEmpty | SWInt)) + cp_tx(cp); + + cpw16_f(IntrStatus, status); + + if (status & PciErr) { + u16 pci_status; + + pci_read_config_word(cp->pdev, PCI_STATUS, &pci_status); + pci_write_config_word(cp->pdev, PCI_STATUS, pci_status); + printk(KERN_ERR "%s: PCI bus error, status=%04x, PCI status=%04x\n", + dev->name, status, pci_status); + } + + spin_unlock(&cp->lock); +} + +static void cp_tx (struct cp_private *cp) +{ + unsigned tx_head = cp->tx_head; + unsigned tx_tail = cp->tx_tail; + + while (tx_tail != tx_head) { + struct sk_buff *skb; + u32 status; + + rmb(); + status = le32_to_cpu(cp->tx_ring[tx_tail].opts1); + if (status & DescOwn) + break; + + skb = cp->tx_skb[tx_tail].skb; + if (!skb) + BUG(); + + pci_unmap_single(cp->pdev, cp->tx_skb[tx_tail].mapping, + skb->len, PCI_DMA_TODEVICE); + + if (status & LastFrag) { + if (status & (TxError | TxFIFOUnder)) { + if (netif_msg_tx_err(cp)) + printk(KERN_DEBUG "%s: tx err, status 0x%x\n", + cp->dev->name, status); + cp->net_stats.tx_errors++; + if (status & TxOWC) + cp->net_stats.tx_window_errors++; + if (status & TxMaxCol) + cp->net_stats.tx_aborted_errors++; + if (status & TxLinkFail) + cp->net_stats.tx_carrier_errors++; + if (status & TxFIFOUnder) + cp->net_stats.tx_fifo_errors++; + } else { + cp->net_stats.collisions += + ((status >> TxColCntShift) & TxColCntMask); + cp->net_stats.tx_packets++; + cp->net_stats.tx_bytes += skb->len; + if (netif_msg_tx_done(cp)) + printk(KERN_DEBUG "%s: tx done, slot %d\n", cp->dev->name, tx_tail); + } + dev_kfree_skb_irq(skb); + } + + cp->tx_skb[tx_tail].skb = NULL; + + tx_tail = NEXT_TX(tx_tail); + } + + cp->tx_tail = tx_tail; + + if (netif_queue_stopped(cp->dev) && (TX_BUFFS_AVAIL(cp) > 1)) + netif_wake_queue(cp->dev); +} + +static int cp_start_xmit (struct sk_buff *skb, struct net_device *dev) +{ + struct cp_private *cp = dev->priv; + unsigned entry; + u32 eor; + + spin_lock_irq(&cp->lock); + + if (TX_BUFFS_AVAIL(cp) <= (skb_shinfo(skb)->nr_frags + 1)) { + netif_stop_queue(dev); + spin_unlock_irq(&cp->lock); + return 1; + } + + entry = cp->tx_head; + eor = (entry == (CP_TX_RING_SIZE - 1)) ? RingEnd : 0; + if (skb_shinfo(skb)->nr_frags == 0) { + struct cp_desc *txd = &cp->tx_ring[entry]; + u32 mapping, len; + + len = skb->len; + mapping = pci_map_single(cp->pdev, skb->data, len, PCI_DMA_TODEVICE); + eor = (entry == (CP_TX_RING_SIZE - 1)) ? RingEnd : 0; + txd->opts2 = 0; + txd->addr_lo = cpu_to_le32(mapping); + wmb(); + +#ifdef CP_TX_CHECKSUM + txd->opts1 = cpu_to_le32(eor | len | DescOwn | FirstFrag | + LastFrag | IPCS | UDPCS | TCPCS); +#else + txd->opts1 = cpu_to_le32(eor | len | DescOwn | FirstFrag | + LastFrag); +#endif + wmb(); + + cp->tx_skb[entry].skb = skb; + cp->tx_skb[entry].mapping = mapping; + cp->tx_skb[entry].frag = 0; + entry = NEXT_TX(entry); + } else { + struct cp_desc *txd; + u32 first_len, first_mapping; + int frag, first_entry = entry; + + /* We must give this initial chunk to the device last. + * Otherwise we could race with the device. + */ + first_len = skb->len - skb->data_len; + first_mapping = pci_map_single(cp->pdev, skb->data, + first_len, PCI_DMA_TODEVICE); + cp->tx_skb[entry].skb = skb; + cp->tx_skb[entry].mapping = first_mapping; + cp->tx_skb[entry].frag = 1; + entry = NEXT_TX(entry); + + for (frag = 0; frag < skb_shinfo(skb)->nr_frags; frag++) { + skb_frag_t *this_frag = &skb_shinfo(skb)->frags[frag]; + u32 len, mapping; + u32 ctrl; + + len = this_frag->size; + mapping = pci_map_single(cp->pdev, + ((void *) page_address(this_frag->page) + + this_frag->page_offset), + len, PCI_DMA_TODEVICE); + eor = (entry == (CP_TX_RING_SIZE - 1)) ? RingEnd : 0; +#ifdef CP_TX_CHECKSUM + ctrl = eor | len | DescOwn | IPCS | UDPCS | TCPCS; +#else + ctrl = eor | len | DescOwn; +#endif + if (frag == skb_shinfo(skb)->nr_frags - 1) + ctrl |= LastFrag; + + txd = &cp->tx_ring[entry]; + txd->opts2 = 0; + txd->addr_lo = cpu_to_le32(mapping); + wmb(); + + txd->opts1 = cpu_to_le32(ctrl); + wmb(); + + cp->tx_skb[entry].skb = skb; + cp->tx_skb[entry].mapping = mapping; + cp->tx_skb[entry].frag = frag + 2; + entry = NEXT_TX(entry); + } + + txd = &cp->tx_ring[first_entry]; + txd->opts2 = 0; + txd->addr_lo = cpu_to_le32(first_mapping); + wmb(); + +#ifdef CP_TX_CHECKSUM + txd->opts1 = cpu_to_le32(first_len | FirstFrag | DescOwn | IPCS | UDPCS | TCPCS); +#else + txd->opts1 = cpu_to_le32(first_len | FirstFrag | DescOwn); +#endif + wmb(); + } + cp->tx_head = entry; + if (netif_msg_tx_queued(cp)) + printk(KERN_DEBUG "%s: tx queued, slot %d, skblen %d\n", + dev->name, entry, skb->len); + if (TX_BUFFS_AVAIL(cp) < 0) + BUG(); + if (TX_BUFFS_AVAIL(cp) == 0) + netif_stop_queue(dev); + + spin_unlock_irq(&cp->lock); + + cpw8(TxPoll, NormalTxPoll); + dev->trans_start = jiffies; + + return 0; +} + +/* Set or clear the multicast filter for this adaptor. + This routine is not state sensitive and need not be SMP locked. */ + +static unsigned const ethernet_polynomial = 0x04c11db7U; +static inline u32 ether_crc (int length, unsigned char *data) +{ + int crc = -1; + + while (--length >= 0) { + unsigned char current_octet = *data++; + int bit; + for (bit = 0; bit < 8; bit++, current_octet >>= 1) + crc = (crc << 1) ^ ((crc < 0) ^ (current_octet & 1) ? + ethernet_polynomial : 0); + } + + return crc; +} + +static void __cp_set_rx_mode (struct net_device *dev) +{ + struct cp_private *cp = dev->priv; + u32 mc_filter[2]; /* Multicast hash filter */ + int i, rx_mode; + u32 tmp; + + /* Note: do not reorder, GCC is clever about common statements. */ + if (dev->flags & IFF_PROMISC) { + /* Unconditionally log net taps. */ + printk (KERN_NOTICE "%s: Promiscuous mode enabled.\n", + dev->name); + rx_mode = + AcceptBroadcast | AcceptMulticast | AcceptMyPhys | + AcceptAllPhys; + mc_filter[1] = mc_filter[0] = 0xffffffff; + } else if ((dev->mc_count > multicast_filter_limit) + || (dev->flags & IFF_ALLMULTI)) { + /* Too many to filter perfectly -- accept all multicasts. */ + rx_mode = AcceptBroadcast | AcceptMulticast | AcceptMyPhys; + mc_filter[1] = mc_filter[0] = 0xffffffff; + } else { + struct dev_mc_list *mclist; + rx_mode = AcceptBroadcast | AcceptMyPhys; + mc_filter[1] = mc_filter[0] = 0; + for (i = 0, mclist = dev->mc_list; mclist && i < dev->mc_count; + i++, mclist = mclist->next) { + int bit_nr = ether_crc(ETH_ALEN, mclist->dmi_addr) >> 26; + + mc_filter[bit_nr >> 5] |= cpu_to_le32(1 << (bit_nr & 31)); + rx_mode |= AcceptMulticast; + } + } + + /* We can safely update without stopping the chip. */ + tmp = cp_rx_config | rx_mode; + if (cp->rx_config != tmp) { + cpw32_f (RxConfig, tmp); + cp->rx_config = tmp; + } + cpw32_f (MAR0 + 0, mc_filter[0]); + cpw32_f (MAR0 + 4, mc_filter[1]); +} + +static void cp_set_rx_mode (struct net_device *dev) +{ + unsigned long flags; + struct cp_private *cp = dev->priv; + + spin_lock_irqsave (&cp->lock, flags); + __cp_set_rx_mode(dev); + spin_unlock_irqrestore (&cp->lock, flags); +} + +static void __cp_get_stats(struct cp_private *cp) +{ + /* XXX implement */ +} + +static struct net_device_stats *cp_get_stats(struct net_device *dev) +{ + struct cp_private *cp = dev->priv; + + /* The chip only need report frame silently dropped. */ + spin_lock_irq(&cp->lock); + if (netif_running(dev) && netif_device_present(dev)) + __cp_get_stats(cp); + spin_unlock_irq(&cp->lock); + + return &cp->net_stats; +} + +static void cp_stop_hw (struct cp_private *cp) +{ + cpw16(IntrMask, 0); + cpr16(IntrMask); + cpw8(Cmd, 0); + cpw16(CpCmd, 0); + cpr16(CpCmd); + cpw16(IntrStatus, ~(cpr16(IntrStatus))); + synchronize_irq(); + udelay(10); + + cp->rx_tail = 0; + cp->tx_head = cp->tx_tail = 0; +} + +static void cp_reset_hw (struct cp_private *cp) +{ + unsigned work = 1000; + + cpw8(Cmd, CmdReset); + + while (work--) { + if (!(cpr8(Cmd) & CmdReset)) + return; + + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_timeout(10); + } + + printk(KERN_ERR "%s: hardware reset timeout\n", cp->dev->name); +} + +static void cp_init_hw (struct cp_private *cp) +{ + struct net_device *dev = cp->dev; + + cp_reset_hw(cp); + + cpw8_f (Cfg9346, Cfg9346_Unlock); + + /* Restore our idea of the MAC address. */ + cpw32_f (MAC0 + 0, cpu_to_le32 (*(u32 *) (dev->dev_addr + 0))); + cpw32_f (MAC0 + 4, cpu_to_le32 (*(u32 *) (dev->dev_addr + 4))); + + cpw8(Cmd, RxOn | TxOn); + cpw16(CpCmd, PCIMulRW | CpRxOn | CpTxOn); + cpw8(TxThresh, 0x06); /* XXX convert magic num to a constant */ + + __cp_set_rx_mode(dev); + cpw32_f (TxConfig, IFG | (TX_DMA_BURST << TxDMAShift)); + + cpw8(Config1, cpr8(Config1) | DriverLoaded | PMEnable); + cpw8(Config3, PARMEnable); /* disables magic packet and WOL */ + cpw8(Config5, cpr8(Config5) & PMEStatus); /* disables more WOL stuff */ + + cpw32_f(HiTxRingAddr, 0); + cpw32_f(HiTxRingAddr + 4, 0); + cpw32_f(OldRxBufAddr, 0); + cpw32_f(OldTSD0, 0); + cpw32_f(OldTSD0 + 4, 0); + cpw32_f(OldTSD0 + 8, 0); + cpw32_f(OldTSD0 + 12, 0); + + cpw32_f(RxRingAddr, cp->ring_dma); + cpw32_f(RxRingAddr + 4, 0); + cpw32_f(TxRingAddr, cp->ring_dma + (sizeof(struct cp_desc) * CP_RX_RING_SIZE)); + cpw32_f(TxRingAddr + 4, 0); + + cpw16(MultiIntr, 0); + + cpw16(IntrMask, cp_intr_mask); + + cpw8_f (Cfg9346, Cfg9346_Lock); +} + +static int cp_refill_rx (struct cp_private *cp) +{ + unsigned i; + + for (i = 0; i < CP_RX_RING_SIZE; i++) { + struct sk_buff *skb; + + skb = dev_alloc_skb(cp->rx_buf_sz + RX_OFFSET); + if (!skb) + goto err_out; + + skb->dev = cp->dev; + skb_reserve(skb, RX_OFFSET); + skb_put(skb, cp->rx_buf_sz); + + cp->rx_skb[i].mapping = pci_map_single(cp->pdev, + skb->data, cp->rx_buf_sz, PCI_DMA_FROMDEVICE); + cp->rx_skb[i].skb = skb; + cp->rx_skb[i].frag = 0; + + if (i == (CP_RX_RING_SIZE - 1)) + cp->rx_ring[i].opts1 = + cpu_to_le32(DescOwn | RingEnd | cp->rx_buf_sz); + else + cp->rx_ring[i].opts1 = + cpu_to_le32(DescOwn | cp->rx_buf_sz); + cp->rx_ring[i].opts2 = 0; + cp->rx_ring[i].addr_lo = cpu_to_le32(cp->rx_skb[i].mapping); + cp->rx_ring[i].addr_hi = 0; + } + + return 0; + +err_out: + cp_clean_rings(cp); + return -ENOMEM; +} + +static int cp_init_rings (struct cp_private *cp) +{ + memset(cp->tx_ring, 0, sizeof(struct cp_desc) * CP_TX_RING_SIZE); + cp->tx_ring[CP_TX_RING_SIZE - 1].opts1 = cpu_to_le32(RingEnd); + + cp->rx_tail = 0; + cp->tx_head = cp->tx_tail = 0; + + return cp_refill_rx (cp); +} + +static int cp_alloc_rings (struct cp_private *cp) +{ + cp->rx_ring = pci_alloc_consistent(cp->pdev, CP_RING_BYTES, &cp->ring_dma); + if (!cp->rx_ring) + return -ENOMEM; + cp->tx_ring = &cp->rx_ring[CP_RX_RING_SIZE]; + return cp_init_rings(cp); +} + +static void cp_clean_rings (struct cp_private *cp) +{ + unsigned i; + + memset(cp->rx_ring, 0, sizeof(struct cp_desc) * CP_RX_RING_SIZE); + memset(cp->tx_ring, 0, sizeof(struct cp_desc) * CP_TX_RING_SIZE); + + for (i = 0; i < CP_RX_RING_SIZE; i++) { + if (cp->rx_skb[i].skb) { + pci_unmap_single(cp->pdev, cp->rx_skb[i].mapping, + cp->rx_buf_sz, PCI_DMA_FROMDEVICE); + dev_kfree_skb(cp->rx_skb[i].skb); + } + } + + for (i = 0; i < CP_TX_RING_SIZE; i++) { + if (cp->tx_skb[i].skb) { + struct sk_buff *skb = cp->tx_skb[i].skb; + pci_unmap_single(cp->pdev, cp->tx_skb[i].mapping, + skb->len, PCI_DMA_TODEVICE); + dev_kfree_skb(skb); + cp->net_stats.tx_dropped++; + } + } + + memset(&cp->rx_skb, 0, sizeof(struct ring_info) * CP_RX_RING_SIZE); + memset(&cp->tx_skb, 0, sizeof(struct ring_info) * CP_TX_RING_SIZE); +} + +static void cp_free_rings (struct cp_private *cp) +{ + cp_clean_rings(cp); + pci_free_consistent(cp->pdev, CP_RING_BYTES, cp->rx_ring, cp->ring_dma); + cp->rx_ring = NULL; + cp->tx_ring = NULL; +} + +static int cp_open (struct net_device *dev) +{ + struct cp_private *cp = dev->priv; + int rc; + + if (netif_msg_ifup(cp)) + printk(KERN_DEBUG "%s: enabling interface\n", dev->name); + + cp->rx_buf_sz = (dev->mtu <= 1500 ? PKT_BUF_SZ : dev->mtu + 32); + + rc = cp_alloc_rings(cp); + if (rc) + return rc; + + cp_init_hw(cp); + + rc = request_irq(dev->irq, cp_interrupt, SA_SHIRQ, dev->name, dev); + if (rc) + goto err_out_hw; + + netif_start_queue(dev); + + return 0; + +err_out_hw: + cp_stop_hw(cp); + cp_free_rings(cp); + return rc; +} + +static int cp_close (struct net_device *dev) +{ + struct cp_private *cp = dev->priv; + + if (netif_msg_ifdown(cp)) + printk(KERN_DEBUG "%s: disabling interface\n", dev->name); + + netif_stop_queue(dev); + cp_stop_hw(cp); + free_irq(dev->irq, dev); + cp_free_rings(cp); + return 0; +} + +static int cp_ethtool_ioctl (struct cp_private *cp, void *useraddr) +{ + u32 ethcmd; + + /* dev_ioctl() in ../../net/core/dev.c has already checked + capable(CAP_NET_ADMIN), so don't bother with that here. */ + + if (copy_from_user (ðcmd, useraddr, sizeof (ethcmd))) + return -EFAULT; + + switch (ethcmd) { + + case ETHTOOL_GDRVINFO: + { + struct ethtool_drvinfo info = { ETHTOOL_GDRVINFO }; + strcpy (info.driver, DRV_NAME); + strcpy (info.version, DRV_VERSION); + strcpy (info.bus_info, cp->pdev->slot_name); + if (copy_to_user (useraddr, &info, sizeof (info))) + return -EFAULT; + return 0; + } + + default: + break; + } + + return -EOPNOTSUPP; +} + + +static int cp_ioctl (struct net_device *dev, struct ifreq *rq, int cmd) +{ + struct cp_private *cp = dev->priv; + int rc = 0; + + switch (cmd) { + case SIOCETHTOOL: + return cp_ethtool_ioctl(cp, (void *) rq->ifr_data); + + default: + rc = -EOPNOTSUPP; + break; + } + + return rc; +} + + + +/* Serial EEPROM section. */ + +/* EEPROM_Ctrl bits. */ +#define EE_SHIFT_CLK 0x04 /* EEPROM shift clock. */ +#define EE_CS 0x08 /* EEPROM chip select. */ +#define EE_DATA_WRITE 0x02 /* EEPROM chip data in. */ +#define EE_WRITE_0 0x00 +#define EE_WRITE_1 0x02 +#define EE_DATA_READ 0x01 /* EEPROM chip data out. */ +#define EE_ENB (0x80 | EE_CS) + +/* Delay between EEPROM clock transitions. + No extra delay is needed with 33Mhz PCI, but 66Mhz may change this. + */ + +#define eeprom_delay() readl(ee_addr) + +/* The EEPROM commands include the alway-set leading bit. */ +#define EE_WRITE_CMD (5) +#define EE_READ_CMD (6) +#define EE_ERASE_CMD (7) + +static int __devinit read_eeprom (void *ioaddr, int location, int addr_len) +{ + int i; + unsigned retval = 0; + void *ee_addr = ioaddr + Cfg9346; + int read_cmd = location | (EE_READ_CMD << addr_len); + + writeb (EE_ENB & ~EE_CS, ee_addr); + writeb (EE_ENB, ee_addr); + eeprom_delay (); + + /* Shift the read command bits out. */ + for (i = 4 + addr_len; i >= 0; i--) { + int dataval = (read_cmd & (1 << i)) ? EE_DATA_WRITE : 0; + writeb (EE_ENB | dataval, ee_addr); + eeprom_delay (); + writeb (EE_ENB | dataval | EE_SHIFT_CLK, ee_addr); + eeprom_delay (); + } + writeb (EE_ENB, ee_addr); + eeprom_delay (); + + for (i = 16; i > 0; i--) { + writeb (EE_ENB | EE_SHIFT_CLK, ee_addr); + eeprom_delay (); + retval = + (retval << 1) | ((readb (ee_addr) & EE_DATA_READ) ? 1 : + 0); + writeb (EE_ENB, ee_addr); + eeprom_delay (); + } + + /* Terminate the EEPROM access. */ + writeb (~EE_CS, ee_addr); + eeprom_delay (); + + return retval; +} + +static int __devinit cp_init_one (struct pci_dev *pdev, + const struct pci_device_id *ent) +{ + struct net_device *dev; + struct cp_private *cp; + int rc; + void *regs; + long pciaddr; + unsigned addr_len, i; + u8 pci_rev, cache_size; + u16 pci_command; + +#ifndef MODULE + static int version_printed; + if (version_printed++ == 0) + printk("%s", version); +#endif + + pci_read_config_byte(pdev, PCI_REVISION_ID, &pci_rev); + + if (pdev->vendor == PCI_VENDOR_ID_REALTEK && + pdev->device == PCI_DEVICE_ID_REALTEK_8139 && pci_rev < 0x20) { + printk(KERN_ERR PFX "pci dev %s (id %04x:%04x rev %02x) is not an 8139C+ compatible chip\n", + pdev->slot_name, pdev->vendor, pdev->device, pci_rev); + printk(KERN_ERR PFX "Ensure the \"8139too\" driver is installed!\n"); + return -ENODEV; + } + + dev = alloc_etherdev(sizeof(struct cp_private)); + if (!dev) + return -ENOMEM; + SET_MODULE_OWNER(dev); + cp = dev->priv; + cp->pdev = pdev; + cp->dev = dev; + cp->msg_enable = (debug < 0 ? CP_DEF_MSG_ENABLE : debug); + spin_lock_init (&cp->lock); + + rc = pci_enable_device(pdev); + if (rc) + goto err_out_free; + + rc = pci_request_regions(pdev, DRV_NAME); + if (rc) + goto err_out_disable; + + if (pdev->irq < 2) { + rc = -EIO; + printk(KERN_ERR PFX "invalid irq (%d) for pci dev %s\n", + pdev->irq, pdev->slot_name); + goto err_out_res; + } + pciaddr = pci_resource_start(pdev, 1); + if (!pciaddr) { + rc = -EIO; + printk(KERN_ERR PFX "no MMIO resource for pci dev %s\n", + pdev->slot_name); + goto err_out_res; + } + if (pci_resource_len(pdev, 1) < CP_REGS_SIZE) { + rc = -EIO; + printk(KERN_ERR PFX "MMIO resource (%lx) too small on pci dev %s\n", + pci_resource_len(pdev, 1), pdev->slot_name); + goto err_out_res; + } + + regs = ioremap_nocache(pciaddr, CP_REGS_SIZE); + if (!regs) { + rc = -EIO; + printk(KERN_ERR PFX "Cannot map PCI MMIO (%lx@%lx) on pci dev %s\n", + pci_resource_len(pdev, 1), pciaddr, pdev->slot_name); + goto err_out_res; + } + dev->base_addr = (unsigned long) regs; + cp->regs = regs; + + cp_stop_hw(cp); + + /* read MAC address from EEPROM */ + addr_len = read_eeprom (regs, 0, 8) == 0x8129 ? 8 : 6; + for (i = 0; i < 3; i++) + ((u16 *) (dev->dev_addr))[i] = + le16_to_cpu (read_eeprom (regs, i + 7, addr_len)); + + dev->open = cp_open; + dev->stop = cp_close; + dev->set_multicast_list = cp_set_rx_mode; + dev->hard_start_xmit = cp_start_xmit; + dev->get_stats = cp_get_stats; + dev->do_ioctl = cp_ioctl; +#if 0 + dev->tx_timeout = cp_tx_timeout; + dev->watchdog_timeo = TX_TIMEOUT; +#endif +#ifdef CP_TX_CHECKSUM + dev->features |= NETIF_F_SG | NETIF_F_IP_CSUM; +#endif + + dev->irq = pdev->irq; + + rc = register_netdev(dev); + if (rc) + goto err_out_iomap; + + printk (KERN_INFO "%s: %s at 0x%lx, " + "%02x:%02x:%02x:%02x:%02x:%02x, " + "IRQ %d\n", + dev->name, + "RTL-8139C+", + dev->base_addr, + dev->dev_addr[0], dev->dev_addr[1], + dev->dev_addr[2], dev->dev_addr[3], + dev->dev_addr[4], dev->dev_addr[5], + dev->irq); + + pci_set_drvdata(pdev, dev); + + /* + * Looks like this is necessary to deal with on all architectures, + * even this %$#%$# N440BX Intel based thing doesn't get it right. + * Ie. having two NICs in the machine, one will have the cache + * line set at boot time, the other will not. + */ + pci_read_config_byte(pdev, PCI_CACHE_LINE_SIZE, &cache_size); + cache_size <<= 2; + if (cache_size != SMP_CACHE_BYTES) { + printk(KERN_INFO "%s: PCI cache line size set incorrectly " + "(%i bytes) by BIOS/FW, ", dev->name, cache_size); + if (cache_size > SMP_CACHE_BYTES) + printk("expecting %i\n", SMP_CACHE_BYTES); + else { + printk("correcting to %i\n", SMP_CACHE_BYTES); + pci_write_config_byte(pdev, PCI_CACHE_LINE_SIZE, + SMP_CACHE_BYTES >> 2); + } + } + + /* enable busmastering and memory-write-invalidate */ + pci_read_config_word(pdev, PCI_COMMAND, &pci_command); + if (!(pci_command & PCI_COMMAND_INVALIDATE)) { + pci_command |= PCI_COMMAND_INVALIDATE; + pci_write_config_word(pdev, PCI_COMMAND, pci_command); + } + pci_set_master(pdev); + + return 0; + +err_out_iomap: + iounmap(regs); +err_out_res: + pci_release_regions(pdev); +err_out_disable: + pci_disable_device(pdev); +err_out_free: + kfree(dev); + return rc; +} + +static void __devexit cp_remove_one (struct pci_dev *pdev) +{ + struct net_device *dev = pci_get_drvdata(pdev); + struct cp_private *cp = dev->priv; + + if (!dev) + BUG(); + unregister_netdev(dev); + iounmap(cp->regs); + pci_release_regions(pdev); + pci_disable_device(pdev); + pci_set_drvdata(pdev, NULL); + kfree(dev); +} + +static struct pci_driver cp_driver = { + name: DRV_NAME, + id_table: cp_pci_tbl, + probe: cp_init_one, + remove: __devexit_p(cp_remove_one), +}; + +static int __init cp_init (void) +{ +#ifdef MODULE + printk("%s", version); +#endif + return pci_module_init (&cp_driver); +} + +static void __exit cp_exit (void) +{ + pci_unregister_driver (&cp_driver); +} + +module_init(cp_init); +module_exit(cp_exit); diff --git a/xen-2.4.16/drivers/net/8139too.c b/xen-2.4.16/drivers/net/8139too.c new file mode 100644 index 0000000000..d1f0a04623 --- /dev/null +++ b/xen-2.4.16/drivers/net/8139too.c @@ -0,0 +1,2324 @@ + +#define CONFIG_8139TOO_8129 + +/* + + 8139too.c: A RealTek RTL-8139 Fast Ethernet driver for Linux. + + Maintained by Jeff Garzik + Copyright 2000,2001 Jeff Garzik + + Much code comes from Donald Becker's rtl8139.c driver, + versions 1.13 and older. This driver was originally based + on rtl8139.c version 1.07. Header of rtl8139.c version 1.13: + + ---------- + + Written 1997-2001 by Donald Becker. + This software may be used and distributed according to the + terms of the GNU General Public License (GPL), incorporated + herein by reference. Drivers based on or derived from this + code fall under the GPL and must retain the authorship, + copyright and license notice. This file is not a complete + program and may only be used when the entire operating + system is licensed under the GPL. + + This driver is for boards based on the RTL8129 and RTL8139 + PCI ethernet chips. + + The author may be reached as becker@scyld.com, or C/O Scyld + Computing Corporation 410 Severn Ave., Suite 210 Annapolis + MD 21403 + + Support and updates available at + http://www.scyld.com/network/rtl8139.html + + Twister-tuning table provided by Kinston + . + + ---------- + + This software may be used and distributed according to the terms + of the GNU General Public License, incorporated herein by reference. + + Contributors: + + Donald Becker - he wrote the original driver, kudos to him! + (but please don't e-mail him for support, this isn't his driver) + + Tigran Aivazian - bug fixes, skbuff free cleanup + + Martin Mares - suggestions for PCI cleanup + + David S. Miller - PCI DMA and softnet updates + + Ernst Gill - fixes ported from BSD driver + + Daniel Kobras - identified specific locations of + posted MMIO write bugginess + + Gerard Sharp - bug fix, testing and feedback + + David Ford - Rx ring wrap fix + + Dan DeMaggio - swapped RTL8139 cards with me, and allowed me + to find and fix a crucial bug on older chipsets. + + Donald Becker/Chris Butterworth/Marcus Westergren - + Noticed various Rx packet size-related buglets. + + Santiago Garcia Mantinan - testing and feedback + + Jens David - 2.2.x kernel backports + + Martin Dennett - incredibly helpful insight on undocumented + features of the 8139 chips + + Jean-Jacques Michel - bug fix + + Tobias Ringström - Rx interrupt status checking suggestion + + Andrew Morton - Clear blocked signals, avoid + buffer overrun setting current->comm. + + Kalle Olavi Niemitalo - Wake-on-LAN ioctls + + Robert Kuebel - Save kernel thread from dying on any signal. + + Submitting bug reports: + + "rtl8139-diag -mmmaaavvveefN" output + enable RTL8139_DEBUG below, and look at 'dmesg' or kernel log + + See 8139too.txt for more details. + +*/ + +#define DRV_NAME "8139too" +#define DRV_VERSION "0.9.22" + + +#include +#include +//#include +//#include +#include +#include +#include +#include +#include +//#include +#include +#include +#include +//#include +#include +#include + +#define RTL8139_DRIVER_NAME DRV_NAME " Fast Ethernet driver " DRV_VERSION +#define PFX DRV_NAME ": " + + +/* enable PIO instead of MMIO, if CONFIG_8139TOO_PIO is selected */ +#ifdef CONFIG_8139TOO_PIO +#define USE_IO_OPS 1 +#endif + +/* define to 1 to enable copious debugging info */ +#undef RTL8139_DEBUG + +/* define to 1 to disable lightweight runtime debugging checks */ +#undef RTL8139_NDEBUG + + +#ifdef RTL8139_DEBUG +/* note: prints function name for you */ +# define DPRINTK(fmt, args...) printk(KERN_DEBUG "%s: " fmt, __FUNCTION__ , ## args) +#else +# define DPRINTK(fmt, args...) +#endif + +#ifdef RTL8139_NDEBUG +# define assert(expr) do {} while (0) +#else +# define assert(expr) \ + if(!(expr)) { \ + printk( "Assertion failed! %s,%s,%s,line=%d\n", \ + #expr,__FILE__,__FUNCTION__,__LINE__); \ + } +#endif + + +/* A few user-configurable values. */ +/* media options */ +#define MAX_UNITS 8 +static int media[MAX_UNITS] = {-1, -1, -1, -1, -1, -1, -1, -1}; +static int full_duplex[MAX_UNITS] = {-1, -1, -1, -1, -1, -1, -1, -1}; + +/* Maximum events (Rx packets, etc.) to handle at each interrupt. */ +static int max_interrupt_work = 20; + +/* Maximum number of multicast addresses to filter (vs. Rx-all-multicast). + The RTL chips use a 64 element hash table based on the Ethernet CRC. */ +static int multicast_filter_limit = 32; + +/* Size of the in-memory receive ring. */ +#define RX_BUF_LEN_IDX 2 /* 0==8K, 1==16K, 2==32K, 3==64K */ +#define RX_BUF_LEN (8192 << RX_BUF_LEN_IDX) +#define RX_BUF_PAD 16 +#define RX_BUF_WRAP_PAD 2048 /* spare padding to handle lack of packet wrap */ +#define RX_BUF_TOT_LEN (RX_BUF_LEN + RX_BUF_PAD + RX_BUF_WRAP_PAD) + +/* Number of Tx descriptor registers. */ +#define NUM_TX_DESC 4 + +/* max supported ethernet frame size -- must be at least (dev->mtu+14+4).*/ +#define MAX_ETH_FRAME_SIZE 1536 + +/* Size of the Tx bounce buffers -- must be at least (dev->mtu+14+4). */ +#define TX_BUF_SIZE MAX_ETH_FRAME_SIZE +#define TX_BUF_TOT_LEN (TX_BUF_SIZE * NUM_TX_DESC) + +/* PCI Tuning Parameters + Threshold is bytes transferred to chip before transmission starts. */ +#define TX_FIFO_THRESH 256 /* In bytes, rounded down to 32 byte units. */ + +/* The following settings are log_2(bytes)-4: 0 == 16 bytes .. 6==1024, 7==end of packet. */ +#define RX_FIFO_THRESH 7 /* Rx buffer level before first PCI xfer. */ +#define RX_DMA_BURST 7 /* Maximum PCI burst, '6' is 1024 */ +#define TX_DMA_BURST 6 /* Maximum PCI burst, '6' is 1024 */ +#define TX_RETRY 8 /* 0-15. retries = 16 + (TX_RETRY * 16) */ + +/* Operational parameters that usually are not changed. */ +/* Time in jiffies before concluding the transmitter is hung. */ +#define TX_TIMEOUT (6*HZ) + + +enum { + HAS_MII_XCVR = 0x010000, + HAS_CHIP_XCVR = 0x020000, + HAS_LNK_CHNG = 0x040000, +}; + +#define RTL_MIN_IO_SIZE 0x80 +#define RTL8139B_IO_SIZE 256 + +#define RTL8129_CAPS HAS_MII_XCVR +#define RTL8139_CAPS HAS_CHIP_XCVR|HAS_LNK_CHNG + +typedef enum { + RTL8139 = 0, + RTL8139_CB, + SMC1211TX, + /*MPX5030,*/ + DELTA8139, + ADDTRON8139, + DFE538TX, + DFE690TXD, + RTL8129, +} board_t; + + +/* indexed by board_t, above */ +static struct { + const char *name; + u32 hw_flags; +} board_info[] __devinitdata = { + { "RealTek RTL8139 Fast Ethernet", RTL8139_CAPS }, + { "RealTek RTL8139B PCI/CardBus", RTL8139_CAPS }, + { "SMC1211TX EZCard 10/100 (RealTek RTL8139)", RTL8139_CAPS }, +/* { MPX5030, "Accton MPX5030 (RealTek RTL8139)", RTL8139_CAPS },*/ + { "Delta Electronics 8139 10/100BaseTX", RTL8139_CAPS }, + { "Addtron Technolgy 8139 10/100BaseTX", RTL8139_CAPS }, + { "D-Link DFE-538TX (RealTek RTL8139)", RTL8139_CAPS }, + { "D-Link DFE-690TXD (RealTek RTL8139)", RTL8139_CAPS }, + { "RealTek RTL8129", RTL8129_CAPS }, +}; + + +static struct pci_device_id rtl8139_pci_tbl[] __devinitdata = { + {0x10ec, 0x8139, PCI_ANY_ID, PCI_ANY_ID, 0, 0, RTL8139 }, + {0x10ec, 0x8138, PCI_ANY_ID, PCI_ANY_ID, 0, 0, RTL8139_CB }, + {0x1113, 0x1211, PCI_ANY_ID, PCI_ANY_ID, 0, 0, SMC1211TX }, +/* {0x1113, 0x1211, PCI_ANY_ID, PCI_ANY_ID, 0, 0, MPX5030 },*/ + {0x1500, 0x1360, PCI_ANY_ID, PCI_ANY_ID, 0, 0, DELTA8139 }, + {0x4033, 0x1360, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ADDTRON8139 }, + {0x1186, 0x1300, PCI_ANY_ID, PCI_ANY_ID, 0, 0, DFE538TX }, + {0x1186, 0x1340, PCI_ANY_ID, PCI_ANY_ID, 0, 0, DFE690TXD }, + +#ifdef CONFIG_8139TOO_8129 + {0x10ec, 0x8129, PCI_ANY_ID, PCI_ANY_ID, 0, 0, RTL8129 }, +#endif + + /* some crazy cards report invalid vendor ids like + * 0x0001 here. The other ids are valid and constant, + * so we simply don't match on the main vendor id. + */ + {PCI_ANY_ID, 0x8139, 0x10ec, 0x8139, 0, 0, RTL8139 }, + {PCI_ANY_ID, 0x8139, 0x1186, 0x1300, 0, 0, DFE538TX }, + + {0,} +}; +MODULE_DEVICE_TABLE (pci, rtl8139_pci_tbl); + + +/* The rest of these values should never change. */ + +/* Symbolic offsets to registers. */ +enum RTL8139_registers { + MAC0 = 0, /* Ethernet hardware address. */ + MAR0 = 8, /* Multicast filter. */ + TxStatus0 = 0x10, /* Transmit status (Four 32bit registers). */ + TxAddr0 = 0x20, /* Tx descriptors (also four 32bit). */ + RxBuf = 0x30, + ChipCmd = 0x37, + RxBufPtr = 0x38, + RxBufAddr = 0x3A, + IntrMask = 0x3C, + IntrStatus = 0x3E, + TxConfig = 0x40, + ChipVersion = 0x43, + RxConfig = 0x44, + Timer = 0x48, /* A general-purpose counter. */ + RxMissed = 0x4C, /* 24 bits valid, write clears. */ + Cfg9346 = 0x50, + Config0 = 0x51, + Config1 = 0x52, + FlashReg = 0x54, + MediaStatus = 0x58, + Config3 = 0x59, + Config4 = 0x5A, /* absent on RTL-8139A */ + HltClk = 0x5B, + MultiIntr = 0x5C, + TxSummary = 0x60, + BasicModeCtrl = 0x62, + BasicModeStatus = 0x64, + NWayAdvert = 0x66, + NWayLPAR = 0x68, + NWayExpansion = 0x6A, + /* Undocumented registers, but required for proper operation. */ + FIFOTMS = 0x70, /* FIFO Control and test. */ + CSCR = 0x74, /* Chip Status and Configuration Register. */ + PARA78 = 0x78, + PARA7c = 0x7c, /* Magic transceiver parameter register. */ + Config5 = 0xD8, /* absent on RTL-8139A */ +}; + +enum ClearBitMasks { + MultiIntrClear = 0xF000, + ChipCmdClear = 0xE2, + Config1Clear = (1<<7)|(1<<6)|(1<<3)|(1<<2)|(1<<1), +}; + +enum ChipCmdBits { + CmdReset = 0x10, + CmdRxEnb = 0x08, + CmdTxEnb = 0x04, + RxBufEmpty = 0x01, +}; + +/* Interrupt register bits, using my own meaningful names. */ +enum IntrStatusBits { + PCIErr = 0x8000, + PCSTimeout = 0x4000, + RxFIFOOver = 0x40, + RxUnderrun = 0x20, + RxOverflow = 0x10, + TxErr = 0x08, + TxOK = 0x04, + RxErr = 0x02, + RxOK = 0x01, + + RxAckBits = RxFIFOOver | RxOverflow | RxOK, +}; + +enum TxStatusBits { + TxHostOwns = 0x2000, + TxUnderrun = 0x4000, + TxStatOK = 0x8000, + TxOutOfWindow = 0x20000000, + TxAborted = 0x40000000, + TxCarrierLost = 0x80000000, +}; +enum RxStatusBits { + RxMulticast = 0x8000, + RxPhysical = 0x4000, + RxBroadcast = 0x2000, + RxBadSymbol = 0x0020, + RxRunt = 0x0010, + RxTooLong = 0x0008, + RxCRCErr = 0x0004, + RxBadAlign = 0x0002, + RxStatusOK = 0x0001, +}; + +/* Bits in RxConfig. */ +enum rx_mode_bits { + AcceptErr = 0x20, + AcceptRunt = 0x10, + AcceptBroadcast = 0x08, + AcceptMulticast = 0x04, + AcceptMyPhys = 0x02, + AcceptAllPhys = 0x01, +}; + +/* Bits in TxConfig. */ +enum tx_config_bits { + TxIFG1 = (1 << 25), /* Interframe Gap Time */ + TxIFG0 = (1 << 24), /* Enabling these bits violates IEEE 802.3 */ + TxLoopBack = (1 << 18) | (1 << 17), /* enable loopback test mode */ + TxCRC = (1 << 16), /* DISABLE appending CRC to end of Tx packets */ + TxClearAbt = (1 << 0), /* Clear abort (WO) */ + TxDMAShift = 8, /* DMA burst value (0-7) is shifted this many bits */ + TxRetryShift = 4, /* TXRR value (0-15) is shifted this many bits */ + + TxVersionMask = 0x7C800000, /* mask out version bits 30-26, 23 */ +}; + +/* Bits in Config1 */ +enum Config1Bits { + Cfg1_PM_Enable = 0x01, + Cfg1_VPD_Enable = 0x02, + Cfg1_PIO = 0x04, + Cfg1_MMIO = 0x08, + LWAKE = 0x10, /* not on 8139, 8139A */ + Cfg1_Driver_Load = 0x20, + Cfg1_LED0 = 0x40, + Cfg1_LED1 = 0x80, + SLEEP = (1 << 1), /* only on 8139, 8139A */ + PWRDN = (1 << 0), /* only on 8139, 8139A */ +}; + +/* Bits in Config3 */ +enum Config3Bits { + Cfg3_FBtBEn = (1 << 0), /* 1 = Fast Back to Back */ + Cfg3_FuncRegEn = (1 << 1), /* 1 = enable CardBus Function registers */ + Cfg3_CLKRUN_En = (1 << 2), /* 1 = enable CLKRUN */ + Cfg3_CardB_En = (1 << 3), /* 1 = enable CardBus registers */ + Cfg3_LinkUp = (1 << 4), /* 1 = wake up on link up */ + Cfg3_Magic = (1 << 5), /* 1 = wake up on Magic Packet (tm) */ + Cfg3_PARM_En = (1 << 6), /* 0 = software can set twister parameters */ + Cfg3_GNTSel = (1 << 7), /* 1 = delay 1 clock from PCI GNT signal */ +}; + +/* Bits in Config4 */ +enum Config4Bits { + LWPTN = (1 << 2), /* not on 8139, 8139A */ +}; + +/* Bits in Config5 */ +enum Config5Bits { + Cfg5_PME_STS = (1 << 0), /* 1 = PCI reset resets PME_Status */ + Cfg5_LANWake = (1 << 1), /* 1 = enable LANWake signal */ + Cfg5_LDPS = (1 << 2), /* 0 = save power when link is down */ + Cfg5_FIFOAddrPtr = (1 << 3), /* Realtek internal SRAM testing */ + Cfg5_UWF = (1 << 4), /* 1 = accept unicast wakeup frame */ + Cfg5_MWF = (1 << 5), /* 1 = accept multicast wakeup frame */ + Cfg5_BWF = (1 << 6), /* 1 = accept broadcast wakeup frame */ +}; + +enum RxConfigBits { + /* rx fifo threshold */ + RxCfgFIFOShift = 13, + RxCfgFIFONone = (7 << RxCfgFIFOShift), + + /* Max DMA burst */ + RxCfgDMAShift = 8, + RxCfgDMAUnlimited = (7 << RxCfgDMAShift), + + /* rx ring buffer length */ + RxCfgRcv8K = 0, + RxCfgRcv16K = (1 << 11), + RxCfgRcv32K = (1 << 12), + RxCfgRcv64K = (1 << 11) | (1 << 12), + + /* Disable packet wrap at end of Rx buffer */ + RxNoWrap = (1 << 7), +}; + + +/* Twister tuning parameters from RealTek. + Completely undocumented, but required to tune bad links. */ +enum CSCRBits { + CSCR_LinkOKBit = 0x0400, + CSCR_LinkChangeBit = 0x0800, + CSCR_LinkStatusBits = 0x0f000, + CSCR_LinkDownOffCmd = 0x003c0, + CSCR_LinkDownCmd = 0x0f3c0, +}; + + +enum Cfg9346Bits { + Cfg9346_Lock = 0x00, + Cfg9346_Unlock = 0xC0, +}; + + +#define PARA78_default 0x78fa8388 +#define PARA7c_default 0xcb38de43 /* param[0][3] */ +#define PARA7c_xxx 0xcb38de43 +static const unsigned long param[4][4] = { + {0xcb39de43, 0xcb39ce43, 0xfb38de03, 0xcb38de43}, + {0xcb39de43, 0xcb39ce43, 0xcb39ce83, 0xcb39ce83}, + {0xcb39de43, 0xcb39ce43, 0xcb39ce83, 0xcb39ce83}, + {0xbb39de43, 0xbb39ce43, 0xbb39ce83, 0xbb39ce83} +}; + +typedef enum { + CH_8139 = 0, + CH_8139_K, + CH_8139A, + CH_8139B, + CH_8130, + CH_8139C, +} chip_t; + +enum chip_flags { + HasHltClk = (1 << 0), + HasLWake = (1 << 1), +}; + + +/* directly indexed by chip_t, above */ +const static struct { + const char *name; + u8 version; /* from RTL8139C docs */ + u32 RxConfigMask; /* should clear the bits supported by this chip */ + u32 flags; +} rtl_chip_info[] = { + { "RTL-8139", + 0x40, + 0xf0fe0040, /* XXX copied from RTL8139A, verify */ + HasHltClk, + }, + + { "RTL-8139 rev K", + 0x60, + 0xf0fe0040, + HasHltClk, + }, + + { "RTL-8139A", + 0x70, + 0xf0fe0040, + HasHltClk, /* XXX undocumented? */ + }, + + { "RTL-8139B", + 0x78, + 0xf0fc0040, + HasLWake, + }, + + { "RTL-8130", + 0x7C, + 0xf0fe0040, /* XXX copied from RTL8139A, verify */ + HasLWake, + }, + + { "RTL-8139C", + 0x74, + 0xf0fc0040, /* XXX copied from RTL8139B, verify */ + HasLWake, + }, + +}; + +struct rtl_extra_stats { + unsigned long early_rx; + unsigned long tx_buf_mapped; + unsigned long tx_timeouts; +}; + +struct rtl8139_private { + void *mmio_addr; + int drv_flags; + struct pci_dev *pci_dev; + struct net_device_stats stats; + unsigned char *rx_ring; + unsigned int cur_rx; /* Index into the Rx buffer of next Rx pkt. */ + unsigned int tx_flag; + unsigned long cur_tx; + unsigned long dirty_tx; + unsigned char *tx_buf[NUM_TX_DESC]; /* Tx bounce buffers */ + unsigned char *tx_bufs; /* Tx bounce buffer region. */ + dma_addr_t rx_ring_dma; + dma_addr_t tx_bufs_dma; + signed char phys[4]; /* MII device addresses. */ + char twistie, twist_row, twist_col; /* Twister tune state. */ + unsigned int full_duplex:1; /* Full-duplex operation requested. */ + unsigned int duplex_lock:1; + unsigned int default_port:4; /* Last dev->if_port value. */ + unsigned int media2:4; /* Secondary monitored media port. */ + unsigned int medialock:1; /* Don't sense media type. */ + unsigned int mediasense:1; /* Media sensing in progress. */ + spinlock_t lock; + chip_t chipset; + u32 rx_config; + struct rtl_extra_stats xstats; + int time_to_die; +}; + +MODULE_AUTHOR ("Jeff Garzik "); +MODULE_DESCRIPTION ("RealTek RTL-8139 Fast Ethernet driver"); +MODULE_LICENSE("GPL"); + +MODULE_PARM (multicast_filter_limit, "i"); +MODULE_PARM (max_interrupt_work, "i"); +MODULE_PARM (media, "1-" __MODULE_STRING(MAX_UNITS) "i"); +MODULE_PARM (full_duplex, "1-" __MODULE_STRING(MAX_UNITS) "i"); +MODULE_PARM_DESC (multicast_filter_limit, "8139too maximum number of filtered multicast addresses"); +MODULE_PARM_DESC (max_interrupt_work, "8139too maximum events handled per interrupt"); +MODULE_PARM_DESC (media, "8139too: Bits 4+9: force full duplex, bit 5: 100Mbps"); +MODULE_PARM_DESC (full_duplex, "8139too: Force full duplex for board(s) (1)"); + +static int read_eeprom (void *ioaddr, int location, int addr_len); +static int rtl8139_open (struct net_device *dev); +static int mdio_read (struct net_device *dev, int phy_id, int location); +static void mdio_write (struct net_device *dev, int phy_id, int location, + int val); +static void rtl8139_tx_timeout (struct net_device *dev); +static void rtl8139_init_ring (struct net_device *dev); +static int rtl8139_start_xmit (struct sk_buff *skb, + struct net_device *dev); +static void rtl8139_interrupt (int irq, void *dev_instance, + struct pt_regs *regs); +static int rtl8139_close (struct net_device *dev); +static int netdev_ioctl (struct net_device *dev, struct ifreq *rq, int cmd); +static struct net_device_stats *rtl8139_get_stats (struct net_device *dev); +static inline u32 ether_crc (int length, unsigned char *data); +static void rtl8139_set_rx_mode (struct net_device *dev); +static void __set_rx_mode (struct net_device *dev); +static void rtl8139_hw_start (struct net_device *dev); + +#ifdef USE_IO_OPS + +#define RTL_R8(reg) inb (((unsigned long)ioaddr) + (reg)) +#define RTL_R16(reg) inw (((unsigned long)ioaddr) + (reg)) +#define RTL_R32(reg) ((unsigned long) inl (((unsigned long)ioaddr) + (reg))) +#define RTL_W8(reg, val8) outb ((val8), ((unsigned long)ioaddr) + (reg)) +#define RTL_W16(reg, val16) outw ((val16), ((unsigned long)ioaddr) + (reg)) +#define RTL_W32(reg, val32) outl ((val32), ((unsigned long)ioaddr) + (reg)) +#define RTL_W8_F RTL_W8 +#define RTL_W16_F RTL_W16 +#define RTL_W32_F RTL_W32 +#undef readb +#undef readw +#undef readl +#undef writeb +#undef writew +#undef writel +#define readb(addr) inb((unsigned long)(addr)) +#define readw(addr) inw((unsigned long)(addr)) +#define readl(addr) inl((unsigned long)(addr)) +#define writeb(val,addr) outb((val),(unsigned long)(addr)) +#define writew(val,addr) outw((val),(unsigned long)(addr)) +#define writel(val,addr) outl((val),(unsigned long)(addr)) + +#else + +/* write MMIO register, with flush */ +/* Flush avoids rtl8139 bug w/ posted MMIO writes */ +#define RTL_W8_F(reg, val8) do { writeb ((val8), ioaddr + (reg)); readb (ioaddr + (reg)); } while (0) +#define RTL_W16_F(reg, val16) do { writew ((val16), ioaddr + (reg)); readw (ioaddr + (reg)); } while (0) +#define RTL_W32_F(reg, val32) do { writel ((val32), ioaddr + (reg)); readl (ioaddr + (reg)); } while (0) + + +#define MMIO_FLUSH_AUDIT_COMPLETE 1 +#if MMIO_FLUSH_AUDIT_COMPLETE + +/* write MMIO register */ +#define RTL_W8(reg, val8) writeb ((val8), ioaddr + (reg)) +#define RTL_W16(reg, val16) writew ((val16), ioaddr + (reg)) +#define RTL_W32(reg, val32) writel ((val32), ioaddr + (reg)) + +#else + +/* write MMIO register, then flush */ +#define RTL_W8 RTL_W8_F +#define RTL_W16 RTL_W16_F +#define RTL_W32 RTL_W32_F + +#endif /* MMIO_FLUSH_AUDIT_COMPLETE */ + +/* read MMIO register */ +#define RTL_R8(reg) readb (ioaddr + (reg)) +#define RTL_R16(reg) readw (ioaddr + (reg)) +#define RTL_R32(reg) ((unsigned long) readl (ioaddr + (reg))) + +#endif /* USE_IO_OPS */ + + +static const u16 rtl8139_intr_mask = + PCIErr | PCSTimeout | RxUnderrun | RxOverflow | RxFIFOOver | + TxErr | TxOK | RxErr | RxOK; + +static const unsigned int rtl8139_rx_config = + RxCfgRcv32K | RxNoWrap | + (RX_FIFO_THRESH << RxCfgFIFOShift) | + (RX_DMA_BURST << RxCfgDMAShift); + +static const unsigned int rtl8139_tx_config = + (TX_DMA_BURST << TxDMAShift) | (TX_RETRY << TxRetryShift); + +static void __rtl8139_cleanup_dev (struct net_device *dev) +{ + struct rtl8139_private *tp; + struct pci_dev *pdev; + + assert (dev != NULL); + assert (dev->priv != NULL); + + tp = dev->priv; + assert (tp->pci_dev != NULL); + pdev = tp->pci_dev; + +#ifndef USE_IO_OPS + if (tp->mmio_addr) + iounmap (tp->mmio_addr); +#endif /* !USE_IO_OPS */ + + /* it's ok to call this even if we have no regions to free */ + pci_release_regions (pdev); + +#ifndef RTL8139_NDEBUG + /* poison memory before freeing */ + memset (dev, 0xBC, + sizeof (struct net_device) + + sizeof (struct rtl8139_private)); +#endif /* RTL8139_NDEBUG */ + + kfree (dev); + + pci_set_drvdata (pdev, NULL); +} + + +static void rtl8139_chip_reset (void *ioaddr) +{ + int i; + + /* Soft reset the chip. */ + RTL_W8 (ChipCmd, CmdReset); + + /* Check that the chip has finished the reset. */ + for (i = 1000; i > 0; i--) { + barrier(); + if ((RTL_R8 (ChipCmd) & CmdReset) == 0) + break; + udelay (10); + } +} + + +static int __devinit rtl8139_init_board (struct pci_dev *pdev, + struct net_device **dev_out) +{ + void *ioaddr; + struct net_device *dev; + struct rtl8139_private *tp; + u8 tmp8; + int rc; + unsigned int i; + u32 pio_start, pio_end, pio_flags, pio_len; + unsigned long mmio_start, mmio_end, mmio_flags, mmio_len; + u32 tmp; + + assert (pdev != NULL); + + *dev_out = NULL; + + /* dev and dev->priv zeroed in alloc_etherdev */ + dev = alloc_etherdev (sizeof (*tp)); + if (dev == NULL) { + printk (KERN_ERR PFX "%s: Unable to alloc new net device\n", pdev->slot_name); + return -ENOMEM; + } + SET_MODULE_OWNER(dev); + tp = dev->priv; + tp->pci_dev = pdev; + + /* enable device (incl. PCI PM wakeup and hotplug setup) */ + rc = pci_enable_device (pdev); + if (rc) + goto err_out; + + pio_start = pci_resource_start (pdev, 0); + pio_end = pci_resource_end (pdev, 0); + pio_flags = pci_resource_flags (pdev, 0); + pio_len = pci_resource_len (pdev, 0); + + mmio_start = pci_resource_start (pdev, 1); + mmio_end = pci_resource_end (pdev, 1); + mmio_flags = pci_resource_flags (pdev, 1); + mmio_len = pci_resource_len (pdev, 1); + + /* set this immediately, we need to know before + * we talk to the chip directly */ + DPRINTK("PIO region size == 0x%02X\n", pio_len); + DPRINTK("MMIO region size == 0x%02lX\n", mmio_len); + +#ifdef USE_IO_OPS + /* make sure PCI base addr 0 is PIO */ + if (!(pio_flags & IORESOURCE_IO)) { + printk (KERN_ERR PFX "%s: region #0 not a PIO resource, aborting\n", pdev->slot_name); + rc = -ENODEV; + goto err_out; + } + /* check for weird/broken PCI region reporting */ + if (pio_len < RTL_MIN_IO_SIZE) { + printk (KERN_ERR PFX "%s: Invalid PCI I/O region size(s), aborting\n", pdev->slot_name); + rc = -ENODEV; + goto err_out; + } +#else + /* make sure PCI base addr 1 is MMIO */ + if (!(mmio_flags & IORESOURCE_MEM)) { + printk (KERN_ERR PFX "%s: region #1 not an MMIO resource, aborting\n", pdev->slot_name); + rc = -ENODEV; + goto err_out; + } + if (mmio_len < RTL_MIN_IO_SIZE) { + printk (KERN_ERR PFX "%s: Invalid PCI mem region size(s), aborting\n", pdev->slot_name); + rc = -ENODEV; + goto err_out; + } +#endif + + rc = pci_request_regions (pdev, "8139too"); + if (rc) + goto err_out; + + /* enable PCI bus-mastering */ + pci_set_master (pdev); + +#ifdef USE_IO_OPS + ioaddr = (void *) pio_start; + dev->base_addr = pio_start; + tp->mmio_addr = ioaddr; +#else + /* ioremap MMIO region */ + ioaddr = ioremap (mmio_start, mmio_len); + if (ioaddr == NULL) { + printk (KERN_ERR PFX "%s: cannot remap MMIO, aborting\n", pdev->slot_name); + rc = -EIO; + goto err_out; + } + dev->base_addr = (long) ioaddr; + tp->mmio_addr = ioaddr; +#endif /* USE_IO_OPS */ + + /* Bring old chips out of low-power mode. */ + RTL_W8 (HltClk, 'R'); + + /* check for missing/broken hardware */ + if (RTL_R32 (TxConfig) == 0xFFFFFFFF) { + printk (KERN_ERR PFX "%s: Chip not responding, ignoring board\n", + pdev->slot_name); + rc = -EIO; + goto err_out; + } + + /* identify chip attached to board */ + tmp = RTL_R8 (ChipVersion); + for (i = 0; i < ARRAY_SIZE (rtl_chip_info); i++) + if (tmp == rtl_chip_info[i].version) { + tp->chipset = i; + goto match; + } + + /* if unknown chip, assume array element #0, original RTL-8139 in this case */ + printk (KERN_DEBUG PFX "%s: unknown chip version, assuming RTL-8139\n", + pdev->slot_name); + printk (KERN_DEBUG PFX "%s: TxConfig = 0x%lx\n", pdev->slot_name, RTL_R32 (TxConfig)); + tp->chipset = 0; + +match: + DPRINTK ("chipset id (%d) == index %d, '%s'\n", + tmp, + tp->chipset, + rtl_chip_info[tp->chipset].name); + + if (tp->chipset >= CH_8139B) { + u8 new_tmp8 = tmp8 = RTL_R8 (Config1); + DPRINTK("PCI PM wakeup\n"); + if ((rtl_chip_info[tp->chipset].flags & HasLWake) && + (tmp8 & LWAKE)) + new_tmp8 &= ~LWAKE; + new_tmp8 |= Cfg1_PM_Enable; + if (new_tmp8 != tmp8) { + RTL_W8 (Cfg9346, Cfg9346_Unlock); + RTL_W8 (Config1, tmp8); + RTL_W8 (Cfg9346, Cfg9346_Lock); + } + if (rtl_chip_info[tp->chipset].flags & HasLWake) { + tmp8 = RTL_R8 (Config4); + if (tmp8 & LWPTN) + RTL_W8 (Config4, tmp8 & ~LWPTN); + } + } else { + DPRINTK("Old chip wakeup\n"); + tmp8 = RTL_R8 (Config1); + tmp8 &= ~(SLEEP | PWRDN); + RTL_W8 (Config1, tmp8); + } + + rtl8139_chip_reset (ioaddr); + + *dev_out = dev; + return 0; + +err_out: + __rtl8139_cleanup_dev (dev); + return rc; +} + + +static int __devinit rtl8139_init_one (struct pci_dev *pdev, + const struct pci_device_id *ent) +{ + struct net_device *dev = NULL; + struct rtl8139_private *tp; + int i, addr_len, option; + void *ioaddr; + static int board_idx = -1; + u8 pci_rev; + + assert (pdev != NULL); + assert (ent != NULL); + + board_idx++; + + /* when we're built into the kernel, the driver version message + * is only printed if at least one 8139 board has been found + */ +#ifndef MODULE + { + static int printed_version; + if (!printed_version++) + printk (KERN_INFO RTL8139_DRIVER_NAME "\n"); + } +#endif + + pci_read_config_byte(pdev, PCI_REVISION_ID, &pci_rev); + + if (pdev->vendor == PCI_VENDOR_ID_REALTEK && + pdev->device == PCI_DEVICE_ID_REALTEK_8139 && pci_rev >= 0x20) { + printk(KERN_INFO PFX "pci dev %s (id %04x:%04x rev %02x) is an enhanced 8139C+ chip\n", + pdev->slot_name, pdev->vendor, pdev->device, pci_rev); + printk(KERN_INFO PFX "Ensure the \"8139cp\" driver is installed!\n"); + return -ENODEV; /* force use of better driver */ + } + + i = rtl8139_init_board (pdev, &dev); + if (i < 0) + return i; + + tp = dev->priv; + ioaddr = tp->mmio_addr; + + assert (ioaddr != NULL); + assert (dev != NULL); + assert (tp != NULL); + + addr_len = read_eeprom (ioaddr, 0, 8) == 0x8129 ? 8 : 6; + for (i = 0; i < 3; i++) + ((u16 *) (dev->dev_addr))[i] = + le16_to_cpu (read_eeprom (ioaddr, i + 7, addr_len)); + + /* The Rtl8139-specific entries in the device structure. */ + dev->open = rtl8139_open; + dev->hard_start_xmit = rtl8139_start_xmit; + dev->stop = rtl8139_close; + dev->get_stats = rtl8139_get_stats; + dev->set_multicast_list = rtl8139_set_rx_mode; + dev->do_ioctl = netdev_ioctl; + dev->tx_timeout = rtl8139_tx_timeout; + dev->watchdog_timeo = TX_TIMEOUT; + dev->features |= NETIF_F_SG; + + dev->irq = pdev->irq; + + /* dev->priv/tp zeroed and aligned in init_etherdev */ + tp = dev->priv; + + /* note: tp->chipset set in rtl8139_init_board */ + tp->drv_flags = board_info[ent->driver_data].hw_flags; + tp->mmio_addr = ioaddr; + spin_lock_init (&tp->lock); + + /* dev is fully set up and ready to use now */ + DPRINTK("about to register device named %s (%p)...\n", dev->name, dev); + i = register_netdev (dev); + if (i) goto err_out; + + pci_set_drvdata (pdev, dev); + + printk (KERN_INFO "%s: %s at 0x%lx, " + "%2.2x:%2.2x:%2.2x:%2.2x:%2.2x:%2.2x, " + "IRQ %d\n", + dev->name, + board_info[ent->driver_data].name, + dev->base_addr, + dev->dev_addr[0], dev->dev_addr[1], + dev->dev_addr[2], dev->dev_addr[3], + dev->dev_addr[4], dev->dev_addr[5], + dev->irq); + + printk (KERN_DEBUG "%s: Identified 8139 chip type '%s'\n", + dev->name, rtl_chip_info[tp->chipset].name); + + /* Find the connected MII xcvrs. + Doing this in open() would allow detecting external xcvrs later, but + takes too much time. */ +#ifdef CONFIG_8139TOO_8129 + if (tp->drv_flags & HAS_MII_XCVR) { + int phy, phy_idx = 0; + for (phy = 0; phy < 32 && phy_idx < sizeof(tp->phys); phy++) { + int mii_status = mdio_read(dev, phy, 1); + if (mii_status != 0xffff && mii_status != 0x0000) { + u16 advertising = mdio_read(dev, phy, 4); + tp->phys[phy_idx++] = phy; + printk(KERN_INFO "%s: MII transceiver %d status 0x%4.4x " + "advertising %4.4x.\n", + dev->name, phy, mii_status, advertising); + } + } + if (phy_idx == 0) { + printk(KERN_INFO "%s: No MII transceivers found! Assuming SYM " + "transceiver.\n", + dev->name); + tp->phys[0] = 32; + } + } else +#endif + tp->phys[0] = 32; + + /* The lower four bits are the media type. */ + option = (board_idx >= MAX_UNITS) ? 0 : media[board_idx]; + if (option > 0) { + tp->full_duplex = (option & 0x210) ? 1 : 0; + tp->default_port = option & 0xFF; + if (tp->default_port) + tp->medialock = 1; + } + if (board_idx < MAX_UNITS && full_duplex[board_idx] > 0) + tp->full_duplex = full_duplex[board_idx]; + if (tp->full_duplex) { + printk(KERN_INFO "%s: Media type forced to Full Duplex.\n", dev->name); + /* Changing the MII-advertised media because might prevent + re-connection. */ + tp->duplex_lock = 1; + } + if (tp->default_port) { + printk(KERN_INFO " Forcing %dMbps %s-duplex operation.\n", + (option & 0x20 ? 100 : 10), + (option & 0x10 ? "full" : "half")); + mdio_write(dev, tp->phys[0], 0, + ((option & 0x20) ? 0x2000 : 0) | /* 100Mbps? */ + ((option & 0x10) ? 0x0100 : 0)); /* Full duplex? */ + } + + /* Put the chip into low-power mode. */ + if (rtl_chip_info[tp->chipset].flags & HasHltClk) + RTL_W8 (HltClk, 'H'); /* 'R' would leave the clock running. */ + + return 0; + +err_out: + __rtl8139_cleanup_dev (dev); + return i; +} + + +static void __devexit rtl8139_remove_one (struct pci_dev *pdev) +{ + struct net_device *dev = pci_get_drvdata (pdev); + struct rtl8139_private *np; + + assert (dev != NULL); + np = dev->priv; + assert (np != NULL); + + unregister_netdev (dev); + + __rtl8139_cleanup_dev (dev); +} + + +/* Serial EEPROM section. */ + +/* EEPROM_Ctrl bits. */ +#define EE_SHIFT_CLK 0x04 /* EEPROM shift clock. */ +#define EE_CS 0x08 /* EEPROM chip select. */ +#define EE_DATA_WRITE 0x02 /* EEPROM chip data in. */ +#define EE_WRITE_0 0x00 +#define EE_WRITE_1 0x02 +#define EE_DATA_READ 0x01 /* EEPROM chip data out. */ +#define EE_ENB (0x80 | EE_CS) + +/* Delay between EEPROM clock transitions. + No extra delay is needed with 33Mhz PCI, but 66Mhz may change this. + */ + +#define eeprom_delay() readl(ee_addr) + +/* The EEPROM commands include the alway-set leading bit. */ +#define EE_WRITE_CMD (5) +#define EE_READ_CMD (6) +#define EE_ERASE_CMD (7) + +static int __devinit read_eeprom (void *ioaddr, int location, int addr_len) +{ + int i; + unsigned retval = 0; + void *ee_addr = ioaddr + Cfg9346; + int read_cmd = location | (EE_READ_CMD << addr_len); + + writeb (EE_ENB & ~EE_CS, ee_addr); + writeb (EE_ENB, ee_addr); + eeprom_delay (); + + /* Shift the read command bits out. */ + for (i = 4 + addr_len; i >= 0; i--) { + int dataval = (read_cmd & (1 << i)) ? EE_DATA_WRITE : 0; + writeb (EE_ENB | dataval, ee_addr); + eeprom_delay (); + writeb (EE_ENB | dataval | EE_SHIFT_CLK, ee_addr); + eeprom_delay (); + } + writeb (EE_ENB, ee_addr); + eeprom_delay (); + + for (i = 16; i > 0; i--) { + writeb (EE_ENB | EE_SHIFT_CLK, ee_addr); + eeprom_delay (); + retval = + (retval << 1) | ((readb (ee_addr) & EE_DATA_READ) ? 1 : + 0); + writeb (EE_ENB, ee_addr); + eeprom_delay (); + } + + /* Terminate the EEPROM access. */ + writeb (~EE_CS, ee_addr); + eeprom_delay (); + + return retval; +} + +/* MII serial management: mostly bogus for now. */ +/* Read and write the MII management registers using software-generated + serial MDIO protocol. + The maximum data clock rate is 2.5 Mhz. The minimum timing is usually + met by back-to-back PCI I/O cycles, but we insert a delay to avoid + "overclocking" issues. */ +#define MDIO_DIR 0x80 +#define MDIO_DATA_OUT 0x04 +#define MDIO_DATA_IN 0x02 +#define MDIO_CLK 0x01 +#define MDIO_WRITE0 (MDIO_DIR) +#define MDIO_WRITE1 (MDIO_DIR | MDIO_DATA_OUT) + +#define mdio_delay(mdio_addr) readb(mdio_addr) + + +static char mii_2_8139_map[8] = { + BasicModeCtrl, + BasicModeStatus, + 0, + 0, + NWayAdvert, + NWayLPAR, + NWayExpansion, + 0 +}; + + +#ifdef CONFIG_8139TOO_8129 +/* Syncronize the MII management interface by shifting 32 one bits out. */ +static void mdio_sync (void *mdio_addr) +{ + int i; + + for (i = 32; i >= 0; i--) { + writeb (MDIO_WRITE1, mdio_addr); + mdio_delay (mdio_addr); + writeb (MDIO_WRITE1 | MDIO_CLK, mdio_addr); + mdio_delay (mdio_addr); + } +} +#endif + +static int mdio_read (struct net_device *dev, int phy_id, int location) +{ + struct rtl8139_private *tp = dev->priv; + int retval = 0; +#ifdef CONFIG_8139TOO_8129 + void *mdio_addr = tp->mmio_addr + Config4; + int mii_cmd = (0xf6 << 10) | (phy_id << 5) | location; + int i; +#endif + + if (phy_id > 31) { /* Really a 8139. Use internal registers. */ + return location < 8 && mii_2_8139_map[location] ? + readw (tp->mmio_addr + mii_2_8139_map[location]) : 0; + } + +#ifdef CONFIG_8139TOO_8129 + mdio_sync (mdio_addr); + /* Shift the read command bits out. */ + for (i = 15; i >= 0; i--) { + int dataval = (mii_cmd & (1 << i)) ? MDIO_DATA_OUT : 0; + + writeb (MDIO_DIR | dataval, mdio_addr); + mdio_delay (mdio_addr); + writeb (MDIO_DIR | dataval | MDIO_CLK, mdio_addr); + mdio_delay (mdio_addr); + } + + /* Read the two transition, 16 data, and wire-idle bits. */ + for (i = 19; i > 0; i--) { + writeb (0, mdio_addr); + mdio_delay (mdio_addr); + retval = (retval << 1) | ((readb (mdio_addr) & MDIO_DATA_IN) ? 1 : 0); + writeb (MDIO_CLK, mdio_addr); + mdio_delay (mdio_addr); + } +#endif + + return (retval >> 1) & 0xffff; +} + + +static void mdio_write (struct net_device *dev, int phy_id, int location, + int value) +{ + struct rtl8139_private *tp = dev->priv; +#ifdef CONFIG_8139TOO_8129 + void *mdio_addr = tp->mmio_addr + Config4; + int mii_cmd = (0x5002 << 16) | (phy_id << 23) | (location << 18) | value; + int i; +#endif + + if (phy_id > 31) { /* Really a 8139. Use internal registers. */ + void *ioaddr = tp->mmio_addr; + if (location == 0) { + RTL_W8 (Cfg9346, Cfg9346_Unlock); + RTL_W16 (BasicModeCtrl, value); + RTL_W8 (Cfg9346, Cfg9346_Lock); + } else if (location < 8 && mii_2_8139_map[location]) + RTL_W16 (mii_2_8139_map[location], value); + return; + } + +#ifdef CONFIG_8139TOO_8129 + mdio_sync (mdio_addr); + + /* Shift the command bits out. */ + for (i = 31; i >= 0; i--) { + int dataval = + (mii_cmd & (1 << i)) ? MDIO_WRITE1 : MDIO_WRITE0; + writeb (dataval, mdio_addr); + mdio_delay (mdio_addr); + writeb (dataval | MDIO_CLK, mdio_addr); + mdio_delay (mdio_addr); + } + /* Clear out extra bits. */ + for (i = 2; i > 0; i--) { + writeb (0, mdio_addr); + mdio_delay (mdio_addr); + writeb (MDIO_CLK, mdio_addr); + mdio_delay (mdio_addr); + } +#endif +} + + +static int rtl8139_open (struct net_device *dev) +{ + struct rtl8139_private *tp = dev->priv; + int retval; +#ifdef RTL8139_DEBUG + void *ioaddr = tp->mmio_addr; +#endif + + retval = request_irq (dev->irq, rtl8139_interrupt, SA_SHIRQ, dev->name, dev); + if (retval) + return retval; + + tp->tx_bufs = pci_alloc_consistent(tp->pci_dev, TX_BUF_TOT_LEN, + &tp->tx_bufs_dma); + tp->rx_ring = pci_alloc_consistent(tp->pci_dev, RX_BUF_TOT_LEN, + &tp->rx_ring_dma); + if (tp->tx_bufs == NULL || tp->rx_ring == NULL) { + free_irq(dev->irq, dev); + + if (tp->tx_bufs) + pci_free_consistent(tp->pci_dev, TX_BUF_TOT_LEN, + tp->tx_bufs, tp->tx_bufs_dma); + if (tp->rx_ring) + pci_free_consistent(tp->pci_dev, RX_BUF_TOT_LEN, + tp->rx_ring, tp->rx_ring_dma); + + return -ENOMEM; + + } + + tp->full_duplex = tp->duplex_lock; + tp->tx_flag = (TX_FIFO_THRESH << 11) & 0x003f0000; + tp->twistie = 1; + tp->time_to_die = 0; + + rtl8139_init_ring (dev); + rtl8139_hw_start (dev); + + DPRINTK ("%s: rtl8139_open() ioaddr %#lx IRQ %d" + " GP Pins %2.2x %s-duplex.\n", + dev->name, pci_resource_start (tp->pci_dev, 1), + dev->irq, RTL_R8 (MediaStatus), + tp->full_duplex ? "full" : "half"); + + return 0; +} + + +static void rtl_check_media (struct net_device *dev) +{ + struct rtl8139_private *tp = dev->priv; + + if (tp->phys[0] >= 0) { + u16 mii_reg5 = mdio_read(dev, tp->phys[0], 5); + if (mii_reg5 == 0xffff) + ; /* Not there */ + else if ((mii_reg5 & 0x0100) == 0x0100 + || (mii_reg5 & 0x00C0) == 0x0040) + tp->full_duplex = 1; + + printk (KERN_INFO"%s: Setting %s%s-duplex based on" + " auto-negotiated partner ability %4.4x.\n", + dev->name, mii_reg5 == 0 ? "" : + (mii_reg5 & 0x0180) ? "100mbps " : "10mbps ", + tp->full_duplex ? "full" : "half", mii_reg5); + } +} + +/* Start the hardware at open or resume. */ +static void rtl8139_hw_start (struct net_device *dev) +{ + struct rtl8139_private *tp = dev->priv; + void *ioaddr = tp->mmio_addr; + u32 i; + u8 tmp; + + /* Bring old chips out of low-power mode. */ + if (rtl_chip_info[tp->chipset].flags & HasHltClk) + RTL_W8 (HltClk, 'R'); + + rtl8139_chip_reset (ioaddr); + + /* unlock Config[01234] and BMCR register writes */ + RTL_W8_F (Cfg9346, Cfg9346_Unlock); + /* Restore our idea of the MAC address. */ + RTL_W32_F (MAC0 + 0, cpu_to_le32 (*(u32 *) (dev->dev_addr + 0))); + RTL_W32_F (MAC0 + 4, cpu_to_le32 (*(u32 *) (dev->dev_addr + 4))); + + /* Must enable Tx/Rx before setting transfer thresholds! */ + RTL_W8 (ChipCmd, CmdRxEnb | CmdTxEnb); + + tp->rx_config = rtl8139_rx_config | AcceptBroadcast | AcceptMyPhys; + RTL_W32 (RxConfig, tp->rx_config); + + /* Check this value: the documentation for IFG contradicts ifself. */ + RTL_W32 (TxConfig, rtl8139_tx_config); + + tp->cur_rx = 0; + + rtl_check_media (dev); + + if (tp->chipset >= CH_8139B) { + /* Disable magic packet scanning, which is enabled + * when PM is enabled in Config1. It can be reenabled + * via ETHTOOL_SWOL if desired. */ + RTL_W8 (Config3, RTL_R8 (Config3) & ~Cfg3_Magic); + } + + DPRINTK("init buffer addresses\n"); + + /* Lock Config[01234] and BMCR register writes */ + RTL_W8 (Cfg9346, Cfg9346_Lock); + + /* init Rx ring buffer DMA address */ + RTL_W32_F (RxBuf, tp->rx_ring_dma); + + /* init Tx buffer DMA addresses */ + for (i = 0; i < NUM_TX_DESC; i++) + RTL_W32_F (TxAddr0 + (i * 4), tp->tx_bufs_dma + (tp->tx_buf[i] - tp->tx_bufs)); + + RTL_W32 (RxMissed, 0); + + rtl8139_set_rx_mode (dev); + + /* no early-rx interrupts */ + RTL_W16 (MultiIntr, RTL_R16 (MultiIntr) & MultiIntrClear); + + /* make sure RxTx has started */ + tmp = RTL_R8 (ChipCmd); + if ((!(tmp & CmdRxEnb)) || (!(tmp & CmdTxEnb))) + RTL_W8 (ChipCmd, CmdRxEnb | CmdTxEnb); + + /* Enable all known interrupts by setting the interrupt mask. */ + RTL_W16 (IntrMask, rtl8139_intr_mask); + + netif_start_queue (dev); +} + + +/* Initialize the Rx and Tx rings, along with various 'dev' bits. */ +static void rtl8139_init_ring (struct net_device *dev) +{ + struct rtl8139_private *tp = dev->priv; + int i; + + tp->cur_rx = 0; + tp->cur_tx = 0; + tp->dirty_tx = 0; + + for (i = 0; i < NUM_TX_DESC; i++) + tp->tx_buf[i] = &tp->tx_bufs[i * TX_BUF_SIZE]; +} + + +static void rtl8139_tx_clear (struct rtl8139_private *tp) +{ + tp->cur_tx = 0; + tp->dirty_tx = 0; + + /* XXX account for unsent Tx packets in tp->stats.tx_dropped */ +} + + +static void rtl8139_tx_timeout (struct net_device *dev) +{ + struct rtl8139_private *tp = dev->priv; + void *ioaddr = tp->mmio_addr; + int i; + u8 tmp8; + unsigned long flags; + + DPRINTK ("%s: Transmit timeout, status %2.2x %4.4x " + "media %2.2x.\n", dev->name, + RTL_R8 (ChipCmd), + RTL_R16 (IntrStatus), + RTL_R8 (MediaStatus)); + + tp->xstats.tx_timeouts++; + + /* disable Tx ASAP, if not already */ + tmp8 = RTL_R8 (ChipCmd); + if (tmp8 & CmdTxEnb) + RTL_W8 (ChipCmd, CmdRxEnb); + + /* Disable interrupts by clearing the interrupt mask. */ + RTL_W16 (IntrMask, 0x0000); + + /* Emit info to figure out what went wrong. */ + printk (KERN_DEBUG "%s: Tx queue start entry %ld dirty entry %ld.\n", + dev->name, tp->cur_tx, tp->dirty_tx); + for (i = 0; i < NUM_TX_DESC; i++) + printk (KERN_DEBUG "%s: Tx descriptor %d is %8.8lx.%s\n", + dev->name, i, RTL_R32 (TxStatus0 + (i * 4)), + i == tp->dirty_tx % NUM_TX_DESC ? + " (queue head)" : ""); + + /* Stop a shared interrupt from scavenging while we are. */ + spin_lock_irqsave (&tp->lock, flags); + rtl8139_tx_clear (tp); + spin_unlock_irqrestore (&tp->lock, flags); + + /* ...and finally, reset everything */ + rtl8139_hw_start (dev); + + netif_wake_queue (dev); +} + + +static int rtl8139_start_xmit (struct sk_buff *skb, struct net_device *dev) +{ + struct rtl8139_private *tp = dev->priv; + void *ioaddr = tp->mmio_addr; + unsigned int entry; + + /* Calculate the next Tx descriptor entry. */ + entry = tp->cur_tx % NUM_TX_DESC; + + if (likely(skb->len < TX_BUF_SIZE)) { + skb_copy_bits(skb, 0, tp->tx_buf[entry], skb->len); + dev_kfree_skb(skb); + } else { + dev_kfree_skb(skb); + tp->stats.tx_dropped++; + return 0; + } + + /* Note: the chip doesn't have auto-pad! */ + spin_lock_irq(&tp->lock); + RTL_W32_F (TxStatus0 + (entry * sizeof (u32)), + tp->tx_flag | (skb->len >= ETH_ZLEN ? skb->len : ETH_ZLEN)); + + dev->trans_start = jiffies; + + tp->cur_tx++; + wmb(); + + if ((tp->cur_tx - NUM_TX_DESC) == tp->dirty_tx) + netif_stop_queue (dev); + spin_unlock_irq(&tp->lock); + + DPRINTK ("%s: Queued Tx packet at %p size %u to slot %d.\n", + dev->name, skb->data, skb->len, entry); + + return 0; +} + + +static void rtl8139_tx_interrupt (struct net_device *dev, + struct rtl8139_private *tp, + void *ioaddr) +{ + unsigned long dirty_tx, tx_left; + + assert (dev != NULL); + assert (tp != NULL); + assert (ioaddr != NULL); + + dirty_tx = tp->dirty_tx; + tx_left = tp->cur_tx - dirty_tx; + while (tx_left > 0) { + int entry = dirty_tx % NUM_TX_DESC; + int txstatus; + + txstatus = RTL_R32 (TxStatus0 + (entry * sizeof (u32))); + + if (!(txstatus & (TxStatOK | TxUnderrun | TxAborted))) + break; /* It still hasn't been Txed */ + + /* Note: TxCarrierLost is always asserted at 100mbps. */ + if (txstatus & (TxOutOfWindow | TxAborted)) { + /* There was an major error, log it. */ + DPRINTK ("%s: Transmit error, Tx status %8.8x.\n", + dev->name, txstatus); + tp->stats.tx_errors++; + if (txstatus & TxAborted) { + tp->stats.tx_aborted_errors++; + RTL_W32 (TxConfig, TxClearAbt); + RTL_W16 (IntrStatus, TxErr); + wmb(); + } + if (txstatus & TxCarrierLost) + tp->stats.tx_carrier_errors++; + if (txstatus & TxOutOfWindow) + tp->stats.tx_window_errors++; +#ifdef ETHER_STATS + if ((txstatus & 0x0f000000) == 0x0f000000) + tp->stats.collisions16++; +#endif + } else { + if (txstatus & TxUnderrun) { + /* Add 64 to the Tx FIFO threshold. */ + if (tp->tx_flag < 0x00300000) + tp->tx_flag += 0x00020000; + tp->stats.tx_fifo_errors++; + } + tp->stats.collisions += (txstatus >> 24) & 15; + tp->stats.tx_bytes += txstatus & 0x7ff; + tp->stats.tx_packets++; + } + + dirty_tx++; + tx_left--; + } + +#ifndef RTL8139_NDEBUG + if (tp->cur_tx - dirty_tx > NUM_TX_DESC) { + printk (KERN_ERR "%s: Out-of-sync dirty pointer, %ld vs. %ld.\n", + dev->name, dirty_tx, tp->cur_tx); + dirty_tx += NUM_TX_DESC; + } +#endif /* RTL8139_NDEBUG */ + + /* only wake the queue if we did work, and the queue is stopped */ + if (tp->dirty_tx != dirty_tx) { + tp->dirty_tx = dirty_tx; + mb(); + if (netif_queue_stopped (dev)) + netif_wake_queue (dev); + } +} + + +/* TODO: clean this up! Rx reset need not be this intensive */ +static void rtl8139_rx_err (u32 rx_status, struct net_device *dev, + struct rtl8139_private *tp, void *ioaddr) +{ + u8 tmp8; + int tmp_work; + + DPRINTK ("%s: Ethernet frame had errors, status %8.8x.\n", + dev->name, rx_status); + if (rx_status & RxTooLong) { + DPRINTK ("%s: Oversized Ethernet frame, status %4.4x!\n", + dev->name, rx_status); + /* A.C.: The chip hangs here. */ + } + tp->stats.rx_errors++; + if (rx_status & (RxBadSymbol | RxBadAlign)) + tp->stats.rx_frame_errors++; + if (rx_status & (RxRunt | RxTooLong)) + tp->stats.rx_length_errors++; + if (rx_status & RxCRCErr) + tp->stats.rx_crc_errors++; + + /* Reset the receiver, based on RealTek recommendation. (Bug?) */ + + /* disable receive */ + RTL_W8_F (ChipCmd, CmdTxEnb); + tmp_work = 200; + while (--tmp_work > 0) { + udelay(1); + tmp8 = RTL_R8 (ChipCmd); + if (!(tmp8 & CmdRxEnb)) + break; + } + if (tmp_work <= 0) + printk (KERN_WARNING PFX "rx stop wait too long\n"); + /* restart receive */ + tmp_work = 200; + while (--tmp_work > 0) { + RTL_W8_F (ChipCmd, CmdRxEnb | CmdTxEnb); + udelay(1); + tmp8 = RTL_R8 (ChipCmd); + if ((tmp8 & CmdRxEnb) && (tmp8 & CmdTxEnb)) + break; + } + if (tmp_work <= 0) + printk (KERN_WARNING PFX "tx/rx enable wait too long\n"); + + /* and reinitialize all rx related registers */ + RTL_W8_F (Cfg9346, Cfg9346_Unlock); + /* Must enable Tx/Rx before setting transfer thresholds! */ + RTL_W8 (ChipCmd, CmdRxEnb | CmdTxEnb); + + tp->rx_config = rtl8139_rx_config | AcceptBroadcast | AcceptMyPhys; + RTL_W32 (RxConfig, tp->rx_config); + tp->cur_rx = 0; + + DPRINTK("init buffer addresses\n"); + + /* Lock Config[01234] and BMCR register writes */ + RTL_W8 (Cfg9346, Cfg9346_Lock); + + /* init Rx ring buffer DMA address */ + RTL_W32_F (RxBuf, tp->rx_ring_dma); + + /* A.C.: Reset the multicast list. */ + __set_rx_mode (dev); +} + +static void rtl8139_rx_interrupt (struct net_device *dev, + struct rtl8139_private *tp, void *ioaddr) +{ + unsigned char *rx_ring; + u16 cur_rx; + + assert (dev != NULL); + assert (tp != NULL); + assert (ioaddr != NULL); + + rx_ring = tp->rx_ring; + cur_rx = tp->cur_rx; + + DPRINTK ("%s: In rtl8139_rx(), current %4.4x BufAddr %4.4x," + " free to %4.4x, Cmd %2.2x.\n", dev->name, cur_rx, + RTL_R16 (RxBufAddr), + RTL_R16 (RxBufPtr), RTL_R8 (ChipCmd)); + + while ((RTL_R8 (ChipCmd) & RxBufEmpty) == 0) { + int ring_offset = cur_rx % RX_BUF_LEN; + u32 rx_status; + unsigned int rx_size; + unsigned int pkt_size; + struct sk_buff *skb; + + rmb(); + + /* read size+status of next frame from DMA ring buffer */ + rx_status = le32_to_cpu (*(u32 *) (rx_ring + ring_offset)); + rx_size = rx_status >> 16; + pkt_size = rx_size - 4; + + DPRINTK ("%s: rtl8139_rx() status %4.4x, size %4.4x," + " cur %4.4x.\n", dev->name, rx_status, + rx_size, cur_rx); +#if RTL8139_DEBUG > 2 + { + int i; + DPRINTK ("%s: Frame contents ", dev->name); + for (i = 0; i < 70; i++) + printk (" %2.2x", + rx_ring[ring_offset + i]); + printk (".\n"); + } +#endif + + /* Packet copy from FIFO still in progress. + * Theoretically, this should never happen + * since EarlyRx is disabled. + */ + if (rx_size == 0xfff0) { + tp->xstats.early_rx++; + break; + } + + /* If Rx err or invalid rx_size/rx_status received + * (which happens if we get lost in the ring), + * Rx process gets reset, so we abort any further + * Rx processing. + */ + if ((rx_size > (MAX_ETH_FRAME_SIZE+4)) || + (rx_size < 8) || + (!(rx_status & RxStatusOK))) { + rtl8139_rx_err (rx_status, dev, tp, ioaddr); + return; + } + + /* Malloc up new buffer, compatible with net-2e. */ + /* Omit the four octet CRC from the length. */ + + /* TODO: consider allocating skb's outside of + * interrupt context, both to speed interrupt processing, + * and also to reduce the chances of having to + * drop packets here under memory pressure. + */ + + skb = dev_alloc_skb (pkt_size + 2); + if (skb) { + skb->dev = dev; + skb_reserve (skb, 2); /* 16 byte align the IP fields. */ + + eth_copy_and_sum (skb, &rx_ring[ring_offset + 4], pkt_size, 0); + skb_put (skb, pkt_size); + + skb->protocol = eth_type_trans (skb, dev); + netif_rx (skb); + dev->last_rx = jiffies; + tp->stats.rx_bytes += pkt_size; + tp->stats.rx_packets++; + } else { + printk (KERN_WARNING + "%s: Memory squeeze, dropping packet.\n", + dev->name); + tp->stats.rx_dropped++; + } + + cur_rx = (cur_rx + rx_size + 4 + 3) & ~3; + RTL_W16 (RxBufPtr, cur_rx - 16); + + if (RTL_R16 (IntrStatus) & RxAckBits) + RTL_W16_F (IntrStatus, RxAckBits); + } + + DPRINTK ("%s: Done rtl8139_rx(), current %4.4x BufAddr %4.4x," + " free to %4.4x, Cmd %2.2x.\n", dev->name, cur_rx, + RTL_R16 (RxBufAddr), + RTL_R16 (RxBufPtr), RTL_R8 (ChipCmd)); + + tp->cur_rx = cur_rx; +} + + +static void rtl8139_weird_interrupt (struct net_device *dev, + struct rtl8139_private *tp, + void *ioaddr, + int status, int link_changed) +{ + DPRINTK ("%s: Abnormal interrupt, status %8.8x.\n", + dev->name, status); + + assert (dev != NULL); + assert (tp != NULL); + assert (ioaddr != NULL); + + /* Update the error count. */ + tp->stats.rx_missed_errors += RTL_R32 (RxMissed); + RTL_W32 (RxMissed, 0); + + if ((status & RxUnderrun) && link_changed && + (tp->drv_flags & HAS_LNK_CHNG)) { + /* Really link-change on new chips. */ + int lpar = RTL_R16 (NWayLPAR); + int duplex = (lpar & 0x0100) || (lpar & 0x01C0) == 0x0040 + || tp->duplex_lock; + if (tp->full_duplex != duplex) { + tp->full_duplex = duplex; +#if 0 + RTL_W8 (Cfg9346, Cfg9346_Unlock); + RTL_W8 (Config1, tp->full_duplex ? 0x60 : 0x20); + RTL_W8 (Cfg9346, Cfg9346_Lock); +#endif + } + status &= ~RxUnderrun; + } + + /* XXX along with rtl8139_rx_err, are we double-counting errors? */ + if (status & + (RxUnderrun | RxOverflow | RxErr | RxFIFOOver)) + tp->stats.rx_errors++; + + if (status & PCSTimeout) + tp->stats.rx_length_errors++; + if (status & (RxUnderrun | RxFIFOOver)) + tp->stats.rx_fifo_errors++; + if (status & PCIErr) { + u16 pci_cmd_status; + pci_read_config_word (tp->pci_dev, PCI_STATUS, &pci_cmd_status); + pci_write_config_word (tp->pci_dev, PCI_STATUS, pci_cmd_status); + + printk (KERN_ERR "%s: PCI Bus error %4.4x.\n", + dev->name, pci_cmd_status); + } +} + + +/* The interrupt handler does all of the Rx thread work and cleans up + after the Tx thread. */ +static void rtl8139_interrupt (int irq, void *dev_instance, + struct pt_regs *regs) +{ + struct net_device *dev = (struct net_device *) dev_instance; + struct rtl8139_private *tp = dev->priv; + int boguscnt = max_interrupt_work; + void *ioaddr = tp->mmio_addr; + int ackstat, status; + int link_changed = 0; /* avoid bogus "uninit" warning */ + + spin_lock (&tp->lock); + + do { + status = RTL_R16 (IntrStatus); + + /* h/w no longer present (hotplug?) or major error, bail */ + if (status == 0xFFFF) + break; + + if ((status & + (PCIErr | PCSTimeout | RxUnderrun | RxOverflow | + RxFIFOOver | TxErr | TxOK | RxErr | RxOK)) == 0) + break; + + /* Acknowledge all of the current interrupt sources ASAP, but + an first get an additional status bit from CSCR. */ + if (status & RxUnderrun) + link_changed = RTL_R16 (CSCR) & CSCR_LinkChangeBit; + + /* The chip takes special action when we clear RxAckBits, + * so we clear them later in rtl8139_rx_interrupt + */ + ackstat = status & ~(RxAckBits | TxErr); + RTL_W16 (IntrStatus, ackstat); + + DPRINTK ("%s: interrupt status=%#4.4x ackstat=%#4.4x new intstat=%#4.4x.\n", + dev->name, ackstat, status, RTL_R16 (IntrStatus)); + + if (netif_running (dev) && (status & RxAckBits)) + rtl8139_rx_interrupt (dev, tp, ioaddr); + + /* Check uncommon events with one test. */ + if (status & (PCIErr | PCSTimeout | RxUnderrun | RxOverflow | + RxFIFOOver | RxErr)) + rtl8139_weird_interrupt (dev, tp, ioaddr, + status, link_changed); + + if (netif_running (dev) && (status & (TxOK | TxErr))) { + rtl8139_tx_interrupt (dev, tp, ioaddr); + if (status & TxErr) + RTL_W16 (IntrStatus, TxErr); + } + + boguscnt--; + } while (boguscnt > 0); + + if (boguscnt <= 0) { + printk (KERN_WARNING "%s: Too much work at interrupt, " + "IntrStatus=0x%4.4x.\n", dev->name, status); + + /* Clear all interrupt sources. */ + RTL_W16 (IntrStatus, 0xffff); + } + + spin_unlock (&tp->lock); + + DPRINTK ("%s: exiting interrupt, intr_status=%#4.4x.\n", + dev->name, RTL_R16 (IntrStatus)); +} + + +static int rtl8139_close (struct net_device *dev) +{ + struct rtl8139_private *tp = dev->priv; + void *ioaddr = tp->mmio_addr; + unsigned long flags; + + netif_stop_queue (dev); + + DPRINTK ("%s: Shutting down ethercard, status was 0x%4.4x.\n", + dev->name, RTL_R16 (IntrStatus)); + + spin_lock_irqsave (&tp->lock, flags); + + /* Stop the chip's Tx and Rx DMA processes. */ + RTL_W8 (ChipCmd, 0); + + /* Disable interrupts by clearing the interrupt mask. */ + RTL_W16 (IntrMask, 0); + + /* Update the error counts. */ + tp->stats.rx_missed_errors += RTL_R32 (RxMissed); + RTL_W32 (RxMissed, 0); + + spin_unlock_irqrestore (&tp->lock, flags); + + synchronize_irq (); + free_irq (dev->irq, dev); + + rtl8139_tx_clear (tp); + + pci_free_consistent(tp->pci_dev, RX_BUF_TOT_LEN, + tp->rx_ring, tp->rx_ring_dma); + pci_free_consistent(tp->pci_dev, TX_BUF_TOT_LEN, + tp->tx_bufs, tp->tx_bufs_dma); + tp->rx_ring = NULL; + tp->tx_bufs = NULL; + + /* Green! Put the chip in low-power mode. */ + RTL_W8 (Cfg9346, Cfg9346_Unlock); + + if (rtl_chip_info[tp->chipset].flags & HasHltClk) + RTL_W8 (HltClk, 'H'); /* 'R' would leave the clock running. */ + + return 0; +} + + +/* Get the ethtool settings. Assumes that eset points to kernel + memory, *eset has been initialized as {ETHTOOL_GSET}, and other + threads or interrupts aren't messing with the 8139. */ +static void netdev_get_eset (struct net_device *dev, struct ethtool_cmd *eset) +{ + struct rtl8139_private *np = dev->priv; + void *ioaddr = np->mmio_addr; + u16 advert; + + eset->supported = SUPPORTED_10baseT_Half + | SUPPORTED_10baseT_Full + | SUPPORTED_100baseT_Half + | SUPPORTED_100baseT_Full + | SUPPORTED_Autoneg + | SUPPORTED_TP; + + eset->advertising = ADVERTISED_TP | ADVERTISED_Autoneg; + advert = mdio_read (dev, np->phys[0], 4); + if (advert & 0x0020) + eset->advertising |= ADVERTISED_10baseT_Half; + if (advert & 0x0040) + eset->advertising |= ADVERTISED_10baseT_Full; + if (advert & 0x0080) + eset->advertising |= ADVERTISED_100baseT_Half; + if (advert & 0x0100) + eset->advertising |= ADVERTISED_100baseT_Full; + + eset->speed = (RTL_R8 (MediaStatus) & 0x08) ? 10 : 100; + /* (KON)FIXME: np->full_duplex is set or reset by the thread, + which means this always shows half duplex if the interface + isn't up yet, even if it has already autonegotiated. */ + eset->duplex = np->full_duplex ? DUPLEX_FULL : DUPLEX_HALF; + eset->port = PORT_TP; + /* (KON)FIXME: Is np->phys[0] correct? starfire.c uses that. */ + eset->phy_address = np->phys[0]; + eset->transceiver = XCVR_INTERNAL; + eset->autoneg = (mdio_read (dev, np->phys[0], 0) & 0x1000) != 0; + eset->maxtxpkt = 1; + eset->maxrxpkt = 1; +} + + +/* Get the ethtool Wake-on-LAN settings. Assumes that wol points to + kernel memory, *wol has been initialized as {ETHTOOL_GWOL}, and + other threads or interrupts aren't messing with the 8139. */ +static void netdev_get_wol (struct net_device *dev, struct ethtool_wolinfo *wol) +{ + struct rtl8139_private *np = dev->priv; + void *ioaddr = np->mmio_addr; + + if (rtl_chip_info[np->chipset].flags & HasLWake) { + u8 cfg3 = RTL_R8 (Config3); + u8 cfg5 = RTL_R8 (Config5); + + wol->supported = WAKE_PHY | WAKE_MAGIC + | WAKE_UCAST | WAKE_MCAST | WAKE_BCAST; + + wol->wolopts = 0; + if (cfg3 & Cfg3_LinkUp) + wol->wolopts |= WAKE_PHY; + if (cfg3 & Cfg3_Magic) + wol->wolopts |= WAKE_MAGIC; + /* (KON)FIXME: See how netdev_set_wol() handles the + following constants. */ + if (cfg5 & Cfg5_UWF) + wol->wolopts |= WAKE_UCAST; + if (cfg5 & Cfg5_MWF) + wol->wolopts |= WAKE_MCAST; + if (cfg5 & Cfg5_BWF) + wol->wolopts |= WAKE_BCAST; + } +} + + +/* Set the ethtool Wake-on-LAN settings. Return 0 or -errno. Assumes + that wol points to kernel memory and other threads or interrupts + aren't messing with the 8139. */ +static int netdev_set_wol (struct net_device *dev, + const struct ethtool_wolinfo *wol) +{ + struct rtl8139_private *np = dev->priv; + void *ioaddr = np->mmio_addr; + u32 support; + u8 cfg3, cfg5; + + support = ((rtl_chip_info[np->chipset].flags & HasLWake) + ? (WAKE_PHY | WAKE_MAGIC + | WAKE_UCAST | WAKE_MCAST | WAKE_BCAST) + : 0); + if (wol->wolopts & ~support) + return -EINVAL; + + cfg3 = RTL_R8 (Config3) & ~(Cfg3_LinkUp | Cfg3_Magic); + if (wol->wolopts & WAKE_PHY) + cfg3 |= Cfg3_LinkUp; + if (wol->wolopts & WAKE_MAGIC) + cfg3 |= Cfg3_Magic; + RTL_W8 (Cfg9346, Cfg9346_Unlock); + RTL_W8 (Config3, cfg3); + RTL_W8 (Cfg9346, Cfg9346_Lock); + + cfg5 = RTL_R8 (Config5) & ~(Cfg5_UWF | Cfg5_MWF | Cfg5_BWF); + /* (KON)FIXME: These are untested. We may have to set the + CRC0, Wakeup0 and LSBCRC0 registers too, but I have no + documentation. */ + if (wol->wolopts & WAKE_UCAST) + cfg5 |= Cfg5_UWF; + if (wol->wolopts & WAKE_MCAST) + cfg5 |= Cfg5_MWF; + if (wol->wolopts & WAKE_BCAST) + cfg5 |= Cfg5_BWF; + RTL_W8 (Config5, cfg5); /* need not unlock via Cfg9346 */ + + return 0; +} + + +static int netdev_ethtool_ioctl (struct net_device *dev, void *useraddr) +{ + struct rtl8139_private *np = dev->priv; + u32 ethcmd; + + /* dev_ioctl() in ../../net/core/dev.c has already checked + capable(CAP_NET_ADMIN), so don't bother with that here. */ + + if (copy_from_user (ðcmd, useraddr, sizeof (ethcmd))) + return -EFAULT; + + switch (ethcmd) { + case ETHTOOL_GSET: + { + struct ethtool_cmd eset = { ETHTOOL_GSET }; + spin_lock_irq (&np->lock); + netdev_get_eset (dev, &eset); + spin_unlock_irq (&np->lock); + if (copy_to_user (useraddr, &eset, sizeof (eset))) + return -EFAULT; + return 0; + } + + /* TODO: ETHTOOL_SSET */ + + case ETHTOOL_GDRVINFO: + { + struct ethtool_drvinfo info = { ETHTOOL_GDRVINFO }; + strcpy (info.driver, DRV_NAME); + strcpy (info.version, DRV_VERSION); + strcpy (info.bus_info, np->pci_dev->slot_name); + if (copy_to_user (useraddr, &info, sizeof (info))) + return -EFAULT; + return 0; + } + + case ETHTOOL_GWOL: + { + struct ethtool_wolinfo wol = { ETHTOOL_GWOL }; + spin_lock_irq (&np->lock); + netdev_get_wol (dev, &wol); + spin_unlock_irq (&np->lock); + if (copy_to_user (useraddr, &wol, sizeof (wol))) + return -EFAULT; + return 0; + } + + case ETHTOOL_SWOL: + { + struct ethtool_wolinfo wol; + int rc; + if (copy_from_user (&wol, useraddr, sizeof (wol))) + return -EFAULT; + spin_lock_irq (&np->lock); + rc = netdev_set_wol (dev, &wol); + spin_unlock_irq (&np->lock); + return rc; + } + + default: + break; + } + + return -EOPNOTSUPP; +} + + +static int netdev_ioctl (struct net_device *dev, struct ifreq *rq, int cmd) +{ + struct rtl8139_private *tp = dev->priv; + struct mii_ioctl_data *data = (struct mii_ioctl_data *)&rq->ifr_data; + int rc = 0; + int phy = tp->phys[0] & 0x3f; + + if (cmd != SIOCETHTOOL) { + /* With SIOCETHTOOL, this would corrupt the pointer. */ + data->phy_id &= 0x1f; + data->reg_num &= 0x1f; + } + + switch (cmd) { + case SIOCETHTOOL: + return netdev_ethtool_ioctl(dev, (void *) rq->ifr_data); + + case SIOCGMIIPHY: /* Get the address of the PHY in use. */ + case SIOCDEVPRIVATE: /* binary compat, remove in 2.5 */ + data->phy_id = phy; + /* Fall Through */ + + case SIOCGMIIREG: /* Read the specified MII register. */ + case SIOCDEVPRIVATE+1: /* binary compat, remove in 2.5 */ + data->val_out = mdio_read (dev, data->phy_id, data->reg_num); + break; + + case SIOCSMIIREG: /* Write the specified MII register */ + case SIOCDEVPRIVATE+2: /* binary compat, remove in 2.5 */ + if (!capable (CAP_NET_ADMIN)) { + rc = -EPERM; + break; + } + + if (data->phy_id == phy) { + u16 value = data->val_in; + switch (data->reg_num) { + case 0: + /* Check for autonegotiation on or reset. */ + tp->medialock = (value & 0x9000) ? 0 : 1; + if (tp->medialock) + tp->full_duplex = (value & 0x0100) ? 1 : 0; + break; + case 4: /* tp->advertising = value; */ break; + } + } + mdio_write(dev, data->phy_id, data->reg_num, data->val_in); + break; + + default: + rc = -EOPNOTSUPP; + break; + } + + return rc; +} + + +static struct net_device_stats *rtl8139_get_stats (struct net_device *dev) +{ + struct rtl8139_private *tp = dev->priv; + void *ioaddr = tp->mmio_addr; + unsigned long flags; + + if (netif_running(dev)) { + spin_lock_irqsave (&tp->lock, flags); + tp->stats.rx_missed_errors += RTL_R32 (RxMissed); + RTL_W32 (RxMissed, 0); + spin_unlock_irqrestore (&tp->lock, flags); + } + + return &tp->stats; +} + +/* Set or clear the multicast filter for this adaptor. + This routine is not state sensitive and need not be SMP locked. */ + +static unsigned const ethernet_polynomial = 0x04c11db7U; +static inline u32 ether_crc (int length, unsigned char *data) +{ + int crc = -1; + + while (--length >= 0) { + unsigned char current_octet = *data++; + int bit; + for (bit = 0; bit < 8; bit++, current_octet >>= 1) + crc = (crc << 1) ^ ((crc < 0) ^ (current_octet & 1) ? + ethernet_polynomial : 0); + } + + return crc; +} + + +static void __set_rx_mode (struct net_device *dev) +{ + struct rtl8139_private *tp = dev->priv; + void *ioaddr = tp->mmio_addr; + u32 mc_filter[2]; /* Multicast hash filter */ + int i, rx_mode; + u32 tmp; + + DPRINTK ("%s: rtl8139_set_rx_mode(%4.4x) done -- Rx config %8.8lx.\n", + dev->name, dev->flags, RTL_R32 (RxConfig)); + + /* Note: do not reorder, GCC is clever about common statements. */ + if (dev->flags & IFF_PROMISC) { + /* Unconditionally log net taps. */ + printk (KERN_NOTICE "%s: Promiscuous mode enabled.\n", + dev->name); + rx_mode = + AcceptBroadcast | AcceptMulticast | AcceptMyPhys | + AcceptAllPhys; + mc_filter[1] = mc_filter[0] = 0xffffffff; + } else if ((dev->mc_count > multicast_filter_limit) + || (dev->flags & IFF_ALLMULTI)) { + /* Too many to filter perfectly -- accept all multicasts. */ + rx_mode = AcceptBroadcast | AcceptMulticast | AcceptMyPhys; + mc_filter[1] = mc_filter[0] = 0xffffffff; + } else { + struct dev_mc_list *mclist; + rx_mode = AcceptBroadcast | AcceptMyPhys; + mc_filter[1] = mc_filter[0] = 0; + for (i = 0, mclist = dev->mc_list; mclist && i < dev->mc_count; + i++, mclist = mclist->next) { + int bit_nr = ether_crc(ETH_ALEN, mclist->dmi_addr) >> 26; + + mc_filter[bit_nr >> 5] |= cpu_to_le32(1 << (bit_nr & 31)); + rx_mode |= AcceptMulticast; + } + } + + /* We can safely update without stopping the chip. */ + tmp = rtl8139_rx_config | rx_mode; + if (tp->rx_config != tmp) { + RTL_W32_F (RxConfig, tmp); + tp->rx_config = tmp; + } + RTL_W32_F (MAR0 + 0, mc_filter[0]); + RTL_W32_F (MAR0 + 4, mc_filter[1]); +} + +static void rtl8139_set_rx_mode (struct net_device *dev) +{ + unsigned long flags; + struct rtl8139_private *tp = dev->priv; + + spin_lock_irqsave (&tp->lock, flags); + __set_rx_mode(dev); + spin_unlock_irqrestore (&tp->lock, flags); +} + +#ifdef CONFIG_PM + +static int rtl8139_suspend (struct pci_dev *pdev, u32 state) +{ + struct net_device *dev = pci_get_drvdata (pdev); + struct rtl8139_private *tp = dev->priv; + void *ioaddr = tp->mmio_addr; + unsigned long flags; + + if (!netif_running (dev)) + return 0; + + netif_device_detach (dev); + + spin_lock_irqsave (&tp->lock, flags); + + /* Disable interrupts, stop Tx and Rx. */ + RTL_W16 (IntrMask, 0); + RTL_W8 (ChipCmd, 0); + + /* Update the error counts. */ + tp->stats.rx_missed_errors += RTL_R32 (RxMissed); + RTL_W32 (RxMissed, 0); + + spin_unlock_irqrestore (&tp->lock, flags); + return 0; +} + + +static int rtl8139_resume (struct pci_dev *pdev) +{ + struct net_device *dev = pci_get_drvdata (pdev); + + if (!netif_running (dev)) + return 0; + netif_device_attach (dev); + rtl8139_hw_start (dev); + return 0; +} + +#endif /* CONFIG_PM */ + + +static struct pci_driver rtl8139_pci_driver = { + name: DRV_NAME, + id_table: rtl8139_pci_tbl, + probe: rtl8139_init_one, + remove: __devexit_p(rtl8139_remove_one), +#ifdef CONFIG_PM + suspend: rtl8139_suspend, + resume: rtl8139_resume, +#endif /* CONFIG_PM */ +}; + + +static int __init rtl8139_init_module (void) +{ + /* when we're a module, we always print a version message, + * even if no 8139 board is found. + */ +#ifdef MODULE + printk (KERN_INFO RTL8139_DRIVER_NAME "\n"); +#endif + + return pci_module_init (&rtl8139_pci_driver); +} + + +static void __exit rtl8139_cleanup_module (void) +{ + pci_unregister_driver (&rtl8139_pci_driver); +} + + +module_init(rtl8139_init_module); +module_exit(rtl8139_cleanup_module); diff --git a/xen-2.4.16/drivers/net/Makefile b/xen-2.4.16/drivers/net/Makefile new file mode 100644 index 0000000000..69b624e792 --- /dev/null +++ b/xen-2.4.16/drivers/net/Makefile @@ -0,0 +1,10 @@ + +include $(BASEDIR)/Rules.mk + +default: $(OBJS) + $(MAKE) -C tulip + $(LD) -r -o driver.o tulip/tulip.o $(OBJS) + +clean: + $(MAKE) -C tulip clean + rm -f *.o *~ core diff --git a/xen-2.4.16/drivers/net/Space.c b/xen-2.4.16/drivers/net/Space.c new file mode 100644 index 0000000000..5724837106 --- /dev/null +++ b/xen-2.4.16/drivers/net/Space.c @@ -0,0 +1,44 @@ +/* + * INET An implementation of the TCP/IP protocol suite for the LINUX + * operating system. INET is implemented using the BSD Socket + * interface as the means of communication with the user level. + * + * Holds initial configuration information for devices. + * + * Version: @(#)Space.c 1.0.7 08/12/93 + * + * Authors: Ross Biro, + * Fred N. van Kempen, + * Donald J. Becker, + */ +#include +#include + +/* + * KAF (23/7/02): All the probe shit is gone from here -- each network + * driver should probe as part of its setup, and dynamically append + * to dev_base when it finds a NIC. + */ + +/* + * The @dev_base list is protected by @dev_base_lock and the rtln + * semaphore. + * + * Pure readers hold dev_base_lock for reading. + * + * Writers must hold the rtnl semaphore while they loop through the + * dev_base list, and hold dev_base_lock for writing when they do the + * actual updates. This allows pure readers to access the list even + * while a writer is preparing to update it. + * + * To put it another way, dev_base_lock is held for writing only to + * protect against pure readers; the rtnl semaphore provides the + * protection against other writers. + * + * See, for example usages, register_netdevice() and + * unregister_netdevice(), which must be called with the rtnl + * semaphore held. + */ +struct net_device *dev_base = NULL; +rwlock_t dev_base_lock = RW_LOCK_UNLOCKED; + diff --git a/xen-2.4.16/drivers/net/eepro100.c b/xen-2.4.16/drivers/net/eepro100.c new file mode 100644 index 0000000000..bce354fac1 --- /dev/null +++ b/xen-2.4.16/drivers/net/eepro100.c @@ -0,0 +1,2309 @@ +/* drivers/net/eepro100.c: An Intel i82557-559 Ethernet driver for Linux. */ +/* + NOTICE: For use with late 2.3 kernels only. + May not compile for kernels 2.3.43-47. + Written 1996-1999 by Donald Becker. + + The driver also contains updates by different kernel developers + (see incomplete list below). + Current maintainer is Andrey V. Savochkin . + Please use this email address and linux-kernel mailing list for bug reports. + + This software may be used and distributed according to the terms + of the GNU General Public License, incorporated herein by reference. + + This driver is for the Intel EtherExpress Pro100 (Speedo3) design. + It should work with all i82557/558/559 boards. + + Version history: + 1998 Apr - 2000 Feb Andrey V. Savochkin + Serious fixes for multicast filter list setting, TX timeout routine; + RX ring refilling logic; other stuff + 2000 Feb Jeff Garzik + Convert to new PCI driver interface + 2000 Mar 24 Dragan Stancevic + Disabled FC and ER, to avoid lockups when when we get FCP interrupts. + 2000 Jul 17 Goutham Rao + PCI DMA API fixes, adding pci_dma_sync_single calls where neccesary +*/ + +static const char *version = +"eepro100.c:v1.09j-t 9/29/99 Donald Becker http://cesdis.gsfc.nasa.gov/linux/drivers/eepro100.html\n" +"eepro100.c: $Revision: 1.36 $ 2000/11/17 Modified by Andrey V. Savochkin and others\n"; + +/* A few user-configurable values that apply to all boards. + First set is undocumented and spelled per Intel recommendations. */ + +static int congenb /* = 0 */; /* Enable congestion control in the DP83840. */ +static int txfifo = 8; /* Tx FIFO threshold in 4 byte units, 0-15 */ +static int rxfifo = 8; /* Rx FIFO threshold, default 32 bytes. */ +/* Tx/Rx DMA burst length, 0-127, 0 == no preemption, tx==128 -> disabled. */ +static int txdmacount = 128; +static int rxdmacount /* = 0 */; + +/* Set the copy breakpoint for the copy-only-tiny-buffer Rx method. + Lower values use more memory, but are faster. */ +#if defined(__alpha__) || defined(__sparc__) || defined(__mips__) || \ + defined(__arm__) +static int rx_copybreak = 1518; +#else +static int rx_copybreak = 200; +#endif + +/* Maximum events (Rx packets, etc.) to handle at each interrupt. */ +static int max_interrupt_work = 20; + +/* Maximum number of multicast addresses to filter (vs. rx-all-multicast) */ +static int multicast_filter_limit = 64; + +/* 'options' is used to pass a transceiver override or full-duplex flag + e.g. "options=16" for FD, "options=32" for 100mbps-only. */ +static int full_duplex[] = {-1, -1, -1, -1, -1, -1, -1, -1}; +static int options[] = {-1, -1, -1, -1, -1, -1, -1, -1}; +static int debug = -1; /* The debug level */ + +#if !defined(__OPTIMIZE__) || !defined(__KERNEL__) +#warning You must compile this file with the correct options! +#warning See the last lines of the source file. +#error You must compile this driver with "-O". +#endif + +#include +#include +//#include +#include + +//#include +//#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#include +#include + +/* A few values that may be tweaked. */ + +/* The ring sizes should be a power of two for efficiency. */ +#undef RX_RING_SIZE +#undef TX_RING_SIZE +#define TX_RING_SIZE 32 +#define RX_RING_SIZE 32 + +/* How much slots multicast filter setup may take. + Do not descrease without changing set_rx_mode() implementaion. */ +#define TX_MULTICAST_SIZE 2 +#define TX_MULTICAST_RESERV (TX_MULTICAST_SIZE*2) +/* Actual number of TX packets queued, must be + <= TX_RING_SIZE-TX_MULTICAST_RESERV. */ +#define TX_QUEUE_LIMIT (TX_RING_SIZE-TX_MULTICAST_RESERV) +/* Hysteresis marking queue as no longer full. */ +#define TX_QUEUE_UNFULL (TX_QUEUE_LIMIT-4) + +/* Operational parameters that usually are not changed. */ + +/* Time in jiffies before concluding the transmitter is hung. */ +#define TX_TIMEOUT (2*HZ) +/* Size of an pre-allocated Rx buffer: + slack.*/ +#define PKT_BUF_SZ 1536 + +MODULE_AUTHOR("Maintainer: Andrey V. Savochkin "); +MODULE_DESCRIPTION("Intel i82557/i82558/i82559 PCI EtherExpressPro driver"); +MODULE_LICENSE("GPL"); +MODULE_PARM(debug, "i"); +MODULE_PARM(options, "1-" __MODULE_STRING(8) "i"); +MODULE_PARM(full_duplex, "1-" __MODULE_STRING(8) "i"); +MODULE_PARM(congenb, "i"); +MODULE_PARM(txfifo, "i"); +MODULE_PARM(rxfifo, "i"); +MODULE_PARM(txdmacount, "i"); +MODULE_PARM(rxdmacount, "i"); +MODULE_PARM(rx_copybreak, "i"); +MODULE_PARM(max_interrupt_work, "i"); +MODULE_PARM(multicast_filter_limit, "i"); +MODULE_PARM_DESC(debug, "eepro100 debug level (0-6)"); +MODULE_PARM_DESC(options, "eepro100: Bits 0-3: tranceiver type, bit 4: full duplex, bit 5: 100Mbps"); +MODULE_PARM_DESC(full_duplex, "eepro100 full duplex setting(s) (1)"); +MODULE_PARM_DESC(congenb, "eepro100 Enable congestion control (1)"); +MODULE_PARM_DESC(txfifo, "eepro100 Tx FIFO threshold in 4 byte units, (0-15)"); +MODULE_PARM_DESC(rxfifo, "eepro100 Rx FIFO threshold in 4 byte units, (0-15)"); +MODULE_PARM_DESC(txdmaccount, "eepro100 Tx DMA burst length; 128 - disable (0-128)"); +MODULE_PARM_DESC(rxdmaccount, "eepro100 Rx DMA burst length; 128 - disable (0-128)"); +MODULE_PARM_DESC(rx_copybreak, "eepro100 copy breakpoint for copy-only-tiny-frames"); +MODULE_PARM_DESC(max_interrupt_work, "eepro100 maximum events handled per interrupt"); +MODULE_PARM_DESC(multicast_filter_limit, "eepro100 maximum number of filtered multicast addresses"); + +#define RUN_AT(x) (jiffies + (x)) + +/* ACPI power states don't universally work (yet) */ +#ifndef CONFIG_PM +#define SET_POWER_STATE(_dev, _state) pci_set_power_state(_dev, 0) +#else +#define SET_POWER_STATE(_dev, _state) pci_set_power_state(_dev, _state) +#endif /* CONFIG_PM */ + +#define netdevice_start(dev) +#define netdevice_stop(dev) +#define netif_set_tx_timeout(dev, tf, tm) \ + do { \ + (dev)->tx_timeout = (tf); \ + (dev)->watchdog_timeo = (tm); \ + } while(0) + +#ifndef PCI_DEVICE_ID_INTEL_ID1029 +#define PCI_DEVICE_ID_INTEL_ID1029 0x1029 +#endif +#ifndef PCI_DEVICE_ID_INTEL_ID1030 +#define PCI_DEVICE_ID_INTEL_ID1030 0x1030 +#endif + + +static int speedo_debug = 1; + +/* + Theory of Operation + +I. Board Compatibility + +This device driver is designed for the Intel i82557 "Speedo3" chip, Intel's +single-chip fast Ethernet controller for PCI, as used on the Intel +EtherExpress Pro 100 adapter. + +II. Board-specific settings + +PCI bus devices are configured by the system at boot time, so no jumpers +need to be set on the board. The system BIOS should be set to assign the +PCI INTA signal to an otherwise unused system IRQ line. While it's +possible to share PCI interrupt lines, it negatively impacts performance and +only recent kernels support it. + +III. Driver operation + +IIIA. General +The Speedo3 is very similar to other Intel network chips, that is to say +"apparently designed on a different planet". This chips retains the complex +Rx and Tx descriptors and multiple buffers pointers as previous chips, but +also has simplified Tx and Rx buffer modes. This driver uses the "flexible" +Tx mode, but in a simplified lower-overhead manner: it associates only a +single buffer descriptor with each frame descriptor. + +Despite the extra space overhead in each receive skbuff, the driver must use +the simplified Rx buffer mode to assure that only a single data buffer is +associated with each RxFD. The driver implements this by reserving space +for the Rx descriptor at the head of each Rx skbuff. + +The Speedo-3 has receive and command unit base addresses that are added to +almost all descriptor pointers. The driver sets these to zero, so that all +pointer fields are absolute addresses. + +The System Control Block (SCB) of some previous Intel chips exists on the +chip in both PCI I/O and memory space. This driver uses the I/O space +registers, but might switch to memory mapped mode to better support non-x86 +processors. + +IIIB. Transmit structure + +The driver must use the complex Tx command+descriptor mode in order to +have a indirect pointer to the skbuff data section. Each Tx command block +(TxCB) is associated with two immediately appended Tx Buffer Descriptor +(TxBD). A fixed ring of these TxCB+TxBD pairs are kept as part of the +speedo_private data structure for each adapter instance. + +The newer i82558 explicitly supports this structure, and can read the two +TxBDs in the same PCI burst as the TxCB. + +This ring structure is used for all normal transmit packets, but the +transmit packet descriptors aren't long enough for most non-Tx commands such +as CmdConfigure. This is complicated by the possibility that the chip has +already loaded the link address in the previous descriptor. So for these +commands we convert the next free descriptor on the ring to a NoOp, and point +that descriptor's link to the complex command. + +An additional complexity of these non-transmit commands are that they may be +added asynchronous to the normal transmit queue, so we disable interrupts +whenever the Tx descriptor ring is manipulated. + +A notable aspect of these special configure commands is that they do +work with the normal Tx ring entry scavenge method. The Tx ring scavenge +is done at interrupt time using the 'dirty_tx' index, and checking for the +command-complete bit. While the setup frames may have the NoOp command on the +Tx ring marked as complete, but not have completed the setup command, this +is not a problem. The tx_ring entry can be still safely reused, as the +tx_skbuff[] entry is always empty for config_cmd and mc_setup frames. + +Commands may have bits set e.g. CmdSuspend in the command word to either +suspend or stop the transmit/command unit. This driver always flags the last +command with CmdSuspend, erases the CmdSuspend in the previous command, and +then issues a CU_RESUME. +Note: Watch out for the potential race condition here: imagine + erasing the previous suspend + the chip processes the previous command + the chip processes the final command, and suspends + doing the CU_RESUME + the chip processes the next-yet-valid post-final-command. +So blindly sending a CU_RESUME is only safe if we do it immediately after +after erasing the previous CmdSuspend, without the possibility of an +intervening delay. Thus the resume command is always within the +interrupts-disabled region. This is a timing dependence, but handling this +condition in a timing-independent way would considerably complicate the code. + +Note: In previous generation Intel chips, restarting the command unit was a +notoriously slow process. This is presumably no longer true. + +IIIC. Receive structure + +Because of the bus-master support on the Speedo3 this driver uses the new +SKBUFF_RX_COPYBREAK scheme, rather than a fixed intermediate receive buffer. +This scheme allocates full-sized skbuffs as receive buffers. The value +SKBUFF_RX_COPYBREAK is used as the copying breakpoint: it is chosen to +trade-off the memory wasted by passing the full-sized skbuff to the queue +layer for all frames vs. the copying cost of copying a frame to a +correctly-sized skbuff. + +For small frames the copying cost is negligible (esp. considering that we +are pre-loading the cache with immediately useful header information), so we +allocate a new, minimally-sized skbuff. For large frames the copying cost +is non-trivial, and the larger copy might flush the cache of useful data, so +we pass up the skbuff the packet was received into. + +IV. Notes + +Thanks to Steve Williams of Intel for arranging the non-disclosure agreement +that stated that I could disclose the information. But I still resent +having to sign an Intel NDA when I'm helping Intel sell their own product! + +*/ + +static int speedo_found1(struct pci_dev *pdev, long ioaddr, int fnd_cnt, int acpi_idle_state); + +enum pci_flags_bit { + PCI_USES_IO=1, PCI_USES_MEM=2, PCI_USES_MASTER=4, + PCI_ADDR0=0x10<<0, PCI_ADDR1=0x10<<1, PCI_ADDR2=0x10<<2, PCI_ADDR3=0x10<<3, +}; + +static inline unsigned int io_inw(unsigned long port) +{ + return inw(port); +} +static inline void io_outw(unsigned int val, unsigned long port) +{ + outw(val, port); +} + +#ifndef USE_IO +/* Currently alpha headers define in/out macros. + Undefine them. 2000/03/30 SAW */ +#undef inb +#undef inw +#undef inl +#undef outb +#undef outw +#undef outl +#define inb readb +#define inw readw +#define inl readl +#define outb writeb +#define outw writew +#define outl writel +#endif + +/* How to wait for the command unit to accept a command. + Typically this takes 0 ticks. */ +static inline void wait_for_cmd_done(long cmd_ioaddr) +{ + int wait = 1000; + do udelay(1) ; + while(inb(cmd_ioaddr) && --wait >= 0); +#ifndef final_version + if (wait < 0) + printk(KERN_ALERT "eepro100: wait_for_cmd_done timeout!\n"); +#endif +} + +/* Offsets to the various registers. + All accesses need not be longword aligned. */ +enum speedo_offsets { + SCBStatus = 0, SCBCmd = 2, /* Rx/Command Unit command and status. */ + SCBPointer = 4, /* General purpose pointer. */ + SCBPort = 8, /* Misc. commands and operands. */ + SCBflash = 12, SCBeeprom = 14, /* EEPROM and flash memory control. */ + SCBCtrlMDI = 16, /* MDI interface control. */ + SCBEarlyRx = 20, /* Early receive byte count. */ +}; +/* Commands that can be put in a command list entry. */ +enum commands { + CmdNOp = 0, CmdIASetup = 0x10000, CmdConfigure = 0x20000, + CmdMulticastList = 0x30000, CmdTx = 0x40000, CmdTDR = 0x50000, + CmdDump = 0x60000, CmdDiagnose = 0x70000, + CmdSuspend = 0x40000000, /* Suspend after completion. */ + CmdIntr = 0x20000000, /* Interrupt after completion. */ + CmdTxFlex = 0x00080000, /* Use "Flexible mode" for CmdTx command. */ +}; +/* Clear CmdSuspend (1<<30) avoiding interference with the card access to the + status bits. Previous driver versions used separate 16 bit fields for + commands and statuses. --SAW + */ +#if defined(__alpha__) +# define clear_suspend(cmd) clear_bit(30, &(cmd)->cmd_status); +#else +# if defined(__LITTLE_ENDIAN) +# define clear_suspend(cmd) ((__u16 *)&(cmd)->cmd_status)[1] &= ~0x4000 +# elif defined(__BIG_ENDIAN) +# define clear_suspend(cmd) ((__u16 *)&(cmd)->cmd_status)[1] &= ~0x0040 +# else +# error Unsupported byteorder +# endif +#endif + +enum SCBCmdBits { + SCBMaskCmdDone=0x8000, SCBMaskRxDone=0x4000, SCBMaskCmdIdle=0x2000, + SCBMaskRxSuspend=0x1000, SCBMaskEarlyRx=0x0800, SCBMaskFlowCtl=0x0400, + SCBTriggerIntr=0x0200, SCBMaskAll=0x0100, + /* The rest are Rx and Tx commands. */ + CUStart=0x0010, CUResume=0x0020, CUStatsAddr=0x0040, CUShowStats=0x0050, + CUCmdBase=0x0060, /* CU Base address (set to zero) . */ + CUDumpStats=0x0070, /* Dump then reset stats counters. */ + RxStart=0x0001, RxResume=0x0002, RxAbort=0x0004, RxAddrLoad=0x0006, + RxResumeNoResources=0x0007, +}; + +enum SCBPort_cmds { + PortReset=0, PortSelfTest=1, PortPartialReset=2, PortDump=3, +}; + +/* The Speedo3 Rx and Tx frame/buffer descriptors. */ +struct descriptor { /* A generic descriptor. */ + s32 cmd_status; /* All command and status fields. */ + u32 link; /* struct descriptor * */ + unsigned char params[0]; +}; + +/* The Speedo3 Rx and Tx buffer descriptors. */ +struct RxFD { /* Receive frame descriptor. */ + s32 status; + u32 link; /* struct RxFD * */ + u32 rx_buf_addr; /* void * */ + u32 count; +}; + +/* Selected elements of the Tx/RxFD.status word. */ +enum RxFD_bits { + RxComplete=0x8000, RxOK=0x2000, + RxErrCRC=0x0800, RxErrAlign=0x0400, RxErrTooBig=0x0200, RxErrSymbol=0x0010, + RxEth2Type=0x0020, RxNoMatch=0x0004, RxNoIAMatch=0x0002, + TxUnderrun=0x1000, StatusComplete=0x8000, +}; + +#define CONFIG_DATA_SIZE 22 +struct TxFD { /* Transmit frame descriptor set. */ + s32 status; + u32 link; /* void * */ + u32 tx_desc_addr; /* Always points to the tx_buf_addr element. */ + s32 count; /* # of TBD (=1), Tx start thresh., etc. */ + /* This constitutes two "TBD" entries -- we only use one. */ +#define TX_DESCR_BUF_OFFSET 16 + u32 tx_buf_addr0; /* void *, frame to be transmitted. */ + s32 tx_buf_size0; /* Length of Tx frame. */ + u32 tx_buf_addr1; /* void *, frame to be transmitted. */ + s32 tx_buf_size1; /* Length of Tx frame. */ + /* the structure must have space for at least CONFIG_DATA_SIZE starting + * from tx_desc_addr field */ +}; + +/* Multicast filter setting block. --SAW */ +struct speedo_mc_block { + struct speedo_mc_block *next; + unsigned int tx; + dma_addr_t frame_dma; + unsigned int len; + struct descriptor frame __attribute__ ((__aligned__(16))); +}; + +/* Elements of the dump_statistics block. This block must be lword aligned. */ +struct speedo_stats { + u32 tx_good_frames; + u32 tx_coll16_errs; + u32 tx_late_colls; + u32 tx_underruns; + u32 tx_lost_carrier; + u32 tx_deferred; + u32 tx_one_colls; + u32 tx_multi_colls; + u32 tx_total_colls; + u32 rx_good_frames; + u32 rx_crc_errs; + u32 rx_align_errs; + u32 rx_resource_errs; + u32 rx_overrun_errs; + u32 rx_colls_errs; + u32 rx_runt_errs; + u32 done_marker; +}; + +enum Rx_ring_state_bits { + RrNoMem=1, RrPostponed=2, RrNoResources=4, RrOOMReported=8, +}; + +/* Do not change the position (alignment) of the first few elements! + The later elements are grouped for cache locality. + + Unfortunately, all the positions have been shifted since there. + A new re-alignment is required. 2000/03/06 SAW */ +struct speedo_private { + struct TxFD *tx_ring; /* Commands (usually CmdTxPacket). */ + struct RxFD *rx_ringp[RX_RING_SIZE];/* Rx descriptor, used as ring. */ + /* The addresses of a Tx/Rx-in-place packets/buffers. */ + struct sk_buff *tx_skbuff[TX_RING_SIZE]; + struct sk_buff *rx_skbuff[RX_RING_SIZE]; + /* Mapped addresses of the rings. */ + dma_addr_t tx_ring_dma; +#define TX_RING_ELEM_DMA(sp, n) ((sp)->tx_ring_dma + (n)*sizeof(struct TxFD)) + dma_addr_t rx_ring_dma[RX_RING_SIZE]; + struct descriptor *last_cmd; /* Last command sent. */ + unsigned int cur_tx, dirty_tx; /* The ring entries to be free()ed. */ + spinlock_t lock; /* Group with Tx control cache line. */ + u32 tx_threshold; /* The value for txdesc.count. */ + struct RxFD *last_rxf; /* Last filled RX buffer. */ + dma_addr_t last_rxf_dma; + unsigned int cur_rx, dirty_rx; /* The next free ring entry */ + long last_rx_time; /* Last Rx, in jiffies, to handle Rx hang. */ + struct net_device_stats stats; + struct speedo_stats *lstats; + dma_addr_t lstats_dma; + int chip_id; + struct pci_dev *pdev; + struct timer_list timer; /* Media selection timer. */ + struct speedo_mc_block *mc_setup_head;/* Multicast setup frame list head. */ + struct speedo_mc_block *mc_setup_tail;/* Multicast setup frame list tail. */ + long in_interrupt; /* Word-aligned dev->interrupt */ + unsigned char acpi_pwr; + signed char rx_mode; /* Current PROMISC/ALLMULTI setting. */ + unsigned int tx_full:1; /* The Tx queue is full. */ + unsigned int full_duplex:1; /* Full-duplex operation requested. */ + unsigned int flow_ctrl:1; /* Use 802.3x flow control. */ + unsigned int rx_bug:1; /* Work around receiver hang errata. */ + unsigned char default_port:8; /* Last dev->if_port value. */ + unsigned char rx_ring_state; /* RX ring status flags. */ + unsigned short phy[2]; /* PHY media interfaces available. */ + unsigned short advertising; /* Current PHY advertised caps. */ + unsigned short partner; /* Link partner caps. */ +#ifdef CONFIG_PM + u32 pm_state[16]; +#endif +}; + +/* The parameters for a CmdConfigure operation. + There are so many options that it would be difficult to document each bit. + We mostly use the default or recommended settings. */ +static const char i82557_config_cmd[CONFIG_DATA_SIZE] = { + 22, 0x08, 0, 0, 0, 0, 0x32, 0x03, 1, /* 1=Use MII 0=Use AUI */ + 0, 0x2E, 0, 0x60, 0, + 0xf2, 0x48, 0, 0x40, 0xf2, 0x80, /* 0x40=Force full-duplex */ + 0x3f, 0x05, }; +static const char i82558_config_cmd[CONFIG_DATA_SIZE] = { + 22, 0x08, 0, 1, 0, 0, 0x22, 0x03, 1, /* 1=Use MII 0=Use AUI */ + 0, 0x2E, 0, 0x60, 0x08, 0x88, + 0x68, 0, 0x40, 0xf2, 0x84, /* Disable FC */ + 0x31, 0x05, }; + +/* PHY media interface chips. */ +static const char *phys[] = { + "None", "i82553-A/B", "i82553-C", "i82503", + "DP83840", "80c240", "80c24", "i82555", + "unknown-8", "unknown-9", "DP83840A", "unknown-11", + "unknown-12", "unknown-13", "unknown-14", "unknown-15", }; +enum phy_chips { NonSuchPhy=0, I82553AB, I82553C, I82503, DP83840, S80C240, + S80C24, I82555, DP83840A=10, }; +static const char is_mii[] = { 0, 1, 1, 0, 1, 1, 0, 1 }; +#define EE_READ_CMD (6) + +static int eepro100_init_one(struct pci_dev *pdev, + const struct pci_device_id *ent); +static void eepro100_remove_one (struct pci_dev *pdev); +#ifdef CONFIG_PM +static int eepro100_suspend (struct pci_dev *pdev, u32 state); +static int eepro100_resume (struct pci_dev *pdev); +#endif + +static int do_eeprom_cmd(long ioaddr, int cmd, int cmd_len); +static int mdio_read(long ioaddr, int phy_id, int location); +static int mdio_write(long ioaddr, int phy_id, int location, int value); +static int speedo_open(struct net_device *dev); +static void speedo_resume(struct net_device *dev); +static void speedo_timer(unsigned long data); +static void speedo_init_rx_ring(struct net_device *dev); +static void speedo_tx_timeout(struct net_device *dev); +static int speedo_start_xmit(struct sk_buff *skb, struct net_device *dev); +static void speedo_refill_rx_buffers(struct net_device *dev, int force); +static int speedo_rx(struct net_device *dev); +static void speedo_tx_buffer_gc(struct net_device *dev); +static void speedo_interrupt(int irq, void *dev_instance, struct pt_regs *regs); +static int speedo_close(struct net_device *dev); +static struct net_device_stats *speedo_get_stats(struct net_device *dev); +static int speedo_ioctl(struct net_device *dev, struct ifreq *rq, int cmd); +static void set_rx_mode(struct net_device *dev); +static void speedo_show_state(struct net_device *dev); + + + +#ifdef honor_default_port +/* Optional driver feature to allow forcing the transceiver setting. + Not recommended. */ +static int mii_ctrl[8] = { 0x3300, 0x3100, 0x0000, 0x0100, + 0x2000, 0x2100, 0x0400, 0x3100}; +#endif + +static int __devinit eepro100_init_one (struct pci_dev *pdev, + const struct pci_device_id *ent) +{ + unsigned long ioaddr; + int irq; + int acpi_idle_state = 0, pm; + static int cards_found /* = 0 */; + + static int did_version /* = 0 */; /* Already printed version info. */ + if (speedo_debug > 0 && did_version++ == 0) + printk(version); + + if (!request_region(pci_resource_start(pdev, 1), + pci_resource_len(pdev, 1), "eepro100")) { + printk (KERN_ERR "eepro100: cannot reserve I/O ports\n"); + goto err_out_none; + } + if (!request_mem_region(pci_resource_start(pdev, 0), + pci_resource_len(pdev, 0), "eepro100")) { + printk (KERN_ERR "eepro100: cannot reserve MMIO region\n"); + goto err_out_free_pio_region; + } + + irq = pdev->irq; +#ifdef USE_IO + ioaddr = pci_resource_start(pdev, 1); + if (speedo_debug > 2) + printk("Found Intel i82557 PCI Speedo at I/O %#lx, IRQ %d.\n", + ioaddr, irq); +#else + ioaddr = (unsigned long)ioremap(pci_resource_start(pdev, 0), + pci_resource_len(pdev, 0)); + if (!ioaddr) { + printk (KERN_ERR "eepro100: cannot remap MMIO region %lx @ %lx\n", + pci_resource_len(pdev, 0), pci_resource_start(pdev, 0)); + goto err_out_free_mmio_region; + } + if (speedo_debug > 2) + printk("Found Intel i82557 PCI Speedo, MMIO at %#lx, IRQ %d.\n", + pci_resource_start(pdev, 0), irq); +#endif + + /* save power state b4 pci_enable_device overwrites it */ + pm = pci_find_capability(pdev, PCI_CAP_ID_PM); + if (pm) { + u16 pwr_command; + pci_read_config_word(pdev, pm + PCI_PM_CTRL, &pwr_command); + acpi_idle_state = pwr_command & PCI_PM_CTRL_STATE_MASK; + } + + if (pci_enable_device(pdev)) + goto err_out_free_mmio_region; + + pci_set_master(pdev); + + if (speedo_found1(pdev, ioaddr, cards_found, acpi_idle_state) == 0) + cards_found++; + else + goto err_out_iounmap; + + return 0; + +err_out_iounmap: ; +#ifndef USE_IO + iounmap ((void *)ioaddr); +#endif +err_out_free_mmio_region: + release_mem_region(pci_resource_start(pdev, 0), pci_resource_len(pdev, 0)); +err_out_free_pio_region: + release_region(pci_resource_start(pdev, 1), pci_resource_len(pdev, 1)); +err_out_none: + return -ENODEV; +} + +static int speedo_found1(struct pci_dev *pdev, + long ioaddr, int card_idx, int acpi_idle_state) +{ + struct net_device *dev; + struct speedo_private *sp; + const char *product; + int i, option; + u16 eeprom[0x100]; + int size; + void *tx_ring_space; + dma_addr_t tx_ring_dma; + + size = TX_RING_SIZE * sizeof(struct TxFD) + sizeof(struct speedo_stats); + tx_ring_space = pci_alloc_consistent(pdev, size, &tx_ring_dma); + if (tx_ring_space == NULL) + return -1; + + dev = init_etherdev(NULL, sizeof(struct speedo_private)); + if (dev == NULL) { + printk(KERN_ERR "eepro100: Could not allocate ethernet device.\n"); + pci_free_consistent(pdev, size, tx_ring_space, tx_ring_dma); + return -1; + } + + if (dev->mem_start > 0) + option = dev->mem_start; + else if (card_idx >= 0 && options[card_idx] >= 0) + option = options[card_idx]; + else + option = 0; + + /* Read the station address EEPROM before doing the reset. + Nominally his should even be done before accepting the device, but + then we wouldn't have a device name with which to report the error. + The size test is for 6 bit vs. 8 bit address serial EEPROMs. + */ + { + unsigned long iobase; + int read_cmd, ee_size; + u16 sum; + int j; + + /* Use IO only to avoid postponed writes and satisfy EEPROM timing + requirements. */ + iobase = pci_resource_start(pdev, 1); + if ((do_eeprom_cmd(iobase, EE_READ_CMD << 24, 27) & 0xffe0000) + == 0xffe0000) { + ee_size = 0x100; + read_cmd = EE_READ_CMD << 24; + } else { + ee_size = 0x40; + read_cmd = EE_READ_CMD << 22; + } + + for (j = 0, i = 0, sum = 0; i < ee_size; i++) { + u16 value = do_eeprom_cmd(iobase, read_cmd | (i << 16), 27); + eeprom[i] = value; + sum += value; + if (i < 3) { + dev->dev_addr[j++] = value; + dev->dev_addr[j++] = value >> 8; + } + } + if (sum != 0xBABA) + printk(KERN_WARNING "%s: Invalid EEPROM checksum %#4.4x, " + "check settings before activating this device!\n", + dev->name, sum); + /* Don't unregister_netdev(dev); as the EEPro may actually be + usable, especially if the MAC address is set later. + On the other hand, it may be unusable if MDI data is corrupted. */ + } + + /* Reset the chip: stop Tx and Rx processes and clear counters. + This takes less than 10usec and will easily finish before the next + action. */ + outl(PortReset, ioaddr + SCBPort); + inl(ioaddr + SCBPort); + udelay(10); + + if (eeprom[3] & 0x0100) + product = "OEM i82557/i82558 10/100 Ethernet"; + else + product = pdev->name; + + printk(KERN_INFO "%s: %s, ", dev->name, product); + + for (i = 0; i < 5; i++) + printk("%2.2X:", dev->dev_addr[i]); + printk("%2.2X, ", dev->dev_addr[i]); +#ifdef USE_IO + printk("I/O at %#3lx, ", ioaddr); +#endif + printk("IRQ %d.\n", pdev->irq); + +#if 1 || defined(kernel_bloat) + /* OK, this is pure kernel bloat. I don't like it when other drivers + waste non-pageable kernel space to emit similar messages, but I need + them for bug reports. */ + { + const char *connectors[] = {" RJ45", " BNC", " AUI", " MII"}; + /* The self-test results must be paragraph aligned. */ + volatile s32 *self_test_results; + int boguscnt = 16000; /* Timeout for set-test. */ + if ((eeprom[3] & 0x03) != 0x03) + printk(KERN_INFO " Receiver lock-up bug exists -- enabling" + " work-around.\n"); + printk(KERN_INFO " Board assembly %4.4x%2.2x-%3.3d, Physical" + " connectors present:", + eeprom[8], eeprom[9]>>8, eeprom[9] & 0xff); + for (i = 0; i < 4; i++) + if (eeprom[5] & (1<>8)&15], eeprom[6] & 0x1f); + if (eeprom[7] & 0x0700) + printk(KERN_INFO " Secondary interface chip %s.\n", + phys[(eeprom[7]>>8)&7]); + if (((eeprom[6]>>8) & 0x3f) == DP83840 + || ((eeprom[6]>>8) & 0x3f) == DP83840A) { + int mdi_reg23 = mdio_read(ioaddr, eeprom[6] & 0x1f, 23) | 0x0422; + if (congenb) + mdi_reg23 |= 0x0100; + printk(KERN_INFO" DP83840 specific setup, setting register 23 to %4.4x.\n", + mdi_reg23); + mdio_write(ioaddr, eeprom[6] & 0x1f, 23, mdi_reg23); + } + if ((option >= 0) && (option & 0x70)) { + printk(KERN_INFO " Forcing %dMbs %s-duplex operation.\n", + (option & 0x20 ? 100 : 10), + (option & 0x10 ? "full" : "half")); + mdio_write(ioaddr, eeprom[6] & 0x1f, 0, + ((option & 0x20) ? 0x2000 : 0) | /* 100mbps? */ + ((option & 0x10) ? 0x0100 : 0)); /* Full duplex? */ + } + + /* Perform a system self-test. */ + self_test_results = (s32*) ((((long) tx_ring_space) + 15) & ~0xf); + self_test_results[0] = 0; + self_test_results[1] = -1; + outl(tx_ring_dma | PortSelfTest, ioaddr + SCBPort); + do { + udelay(10); + } while (self_test_results[1] == -1 && --boguscnt >= 0); + + if (boguscnt < 0) { /* Test optimized out. */ + printk(KERN_ERR "Self test failed, status %8.8x:\n" + KERN_ERR " Failure to initialize the i82557.\n" + KERN_ERR " Verify that the card is a bus-master" + " capable slot.\n", + self_test_results[1]); + } else + printk(KERN_INFO " General self-test: %s.\n" + KERN_INFO " Serial sub-system self-test: %s.\n" + KERN_INFO " Internal registers self-test: %s.\n" + KERN_INFO " ROM checksum self-test: %s (%#8.8x).\n", + self_test_results[1] & 0x1000 ? "failed" : "passed", + self_test_results[1] & 0x0020 ? "failed" : "passed", + self_test_results[1] & 0x0008 ? "failed" : "passed", + self_test_results[1] & 0x0004 ? "failed" : "passed", + self_test_results[0]); + } +#endif /* kernel_bloat */ + + outl(PortReset, ioaddr + SCBPort); + inl(ioaddr + SCBPort); + udelay(10); + + /* Return the chip to its original power state. */ + SET_POWER_STATE(pdev, acpi_idle_state); + + pci_set_drvdata (pdev, dev); + + dev->base_addr = ioaddr; + dev->irq = pdev->irq; + + sp = dev->priv; + sp->pdev = pdev; + sp->acpi_pwr = acpi_idle_state; + sp->tx_ring = tx_ring_space; + sp->tx_ring_dma = tx_ring_dma; + sp->lstats = (struct speedo_stats *)(sp->tx_ring + TX_RING_SIZE); + sp->lstats_dma = TX_RING_ELEM_DMA(sp, TX_RING_SIZE); + init_timer(&sp->timer); /* used in ioctl() */ + + sp->full_duplex = option >= 0 && (option & 0x10) ? 1 : 0; + if (card_idx >= 0) { + if (full_duplex[card_idx] >= 0) + sp->full_duplex = full_duplex[card_idx]; + } + sp->default_port = option >= 0 ? (option & 0x0f) : 0; + + sp->phy[0] = eeprom[6]; + sp->phy[1] = eeprom[7]; + sp->rx_bug = (eeprom[3] & 0x03) == 3 ? 0 : 1; + + if (sp->rx_bug) + printk(KERN_INFO " Receiver lock-up workaround activated.\n"); + + /* The Speedo-specific entries in the device structure. */ + dev->open = &speedo_open; + dev->hard_start_xmit = &speedo_start_xmit; + netif_set_tx_timeout(dev, &speedo_tx_timeout, TX_TIMEOUT); + dev->stop = &speedo_close; + dev->get_stats = &speedo_get_stats; + dev->set_multicast_list = &set_rx_mode; + dev->do_ioctl = &speedo_ioctl; + + return 0; +} + +/* Serial EEPROM section. + A "bit" grungy, but we work our way through bit-by-bit :->. */ +/* EEPROM_Ctrl bits. */ +#define EE_SHIFT_CLK 0x01 /* EEPROM shift clock. */ +#define EE_CS 0x02 /* EEPROM chip select. */ +#define EE_DATA_WRITE 0x04 /* EEPROM chip data in. */ +#define EE_DATA_READ 0x08 /* EEPROM chip data out. */ +#define EE_ENB (0x4800 | EE_CS) +#define EE_WRITE_0 0x4802 +#define EE_WRITE_1 0x4806 +#define EE_OFFSET SCBeeprom + +/* The fixes for the code were kindly provided by Dragan Stancevic + to strictly follow Intel specifications of EEPROM + access timing. + The publicly available sheet 64486302 (sec. 3.1) specifies 1us access + interval for serial EEPROM. However, it looks like that there is an + additional requirement dictating larger udelay's in the code below. + 2000/05/24 SAW */ +static int do_eeprom_cmd(long ioaddr, int cmd, int cmd_len) +{ + unsigned retval = 0; + long ee_addr = ioaddr + SCBeeprom; + + io_outw(EE_ENB, ee_addr); udelay(2); + io_outw(EE_ENB | EE_SHIFT_CLK, ee_addr); udelay(2); + + /* Shift the command bits out. */ + do { + short dataval = (cmd & (1 << cmd_len)) ? EE_WRITE_1 : EE_WRITE_0; + io_outw(dataval, ee_addr); udelay(2); + io_outw(dataval | EE_SHIFT_CLK, ee_addr); udelay(2); + retval = (retval << 1) | ((io_inw(ee_addr) & EE_DATA_READ) ? 1 : 0); + } while (--cmd_len >= 0); + io_outw(EE_ENB, ee_addr); udelay(2); + + /* Terminate the EEPROM access. */ + io_outw(EE_ENB & ~EE_CS, ee_addr); + return retval; +} + +static int mdio_read(long ioaddr, int phy_id, int location) +{ + int val, boguscnt = 64*10; /* <64 usec. to complete, typ 27 ticks */ + outl(0x08000000 | (location<<16) | (phy_id<<21), ioaddr + SCBCtrlMDI); + do { + val = inl(ioaddr + SCBCtrlMDI); + if (--boguscnt < 0) { + printk(KERN_ERR " mdio_read() timed out with val = %8.8x.\n", val); + break; + } + } while (! (val & 0x10000000)); + return val & 0xffff; +} + +static int mdio_write(long ioaddr, int phy_id, int location, int value) +{ + int val, boguscnt = 64*10; /* <64 usec. to complete, typ 27 ticks */ + outl(0x04000000 | (location<<16) | (phy_id<<21) | value, + ioaddr + SCBCtrlMDI); + do { + val = inl(ioaddr + SCBCtrlMDI); + if (--boguscnt < 0) { + printk(KERN_ERR" mdio_write() timed out with val = %8.8x.\n", val); + break; + } + } while (! (val & 0x10000000)); + return val & 0xffff; +} + + +static int +speedo_open(struct net_device *dev) +{ + struct speedo_private *sp = (struct speedo_private *)dev->priv; + long ioaddr = dev->base_addr; + int retval; + + if (speedo_debug > 1) + printk(KERN_DEBUG "%s: speedo_open() irq %d.\n", dev->name, dev->irq); + + MOD_INC_USE_COUNT; + + SET_POWER_STATE(sp->pdev, 0); + + /* Set up the Tx queue early.. */ + sp->cur_tx = 0; + sp->dirty_tx = 0; + sp->last_cmd = 0; + sp->tx_full = 0; + spin_lock_init(&sp->lock); + sp->in_interrupt = 0; + + /* .. we can safely take handler calls during init. */ + retval = request_irq(dev->irq, &speedo_interrupt, SA_SHIRQ, dev->name, dev); + if (retval) { + MOD_DEC_USE_COUNT; + return retval; + } + + dev->if_port = sp->default_port; + +#ifdef oh_no_you_dont_unless_you_honour_the_options_passed_in_to_us + /* Retrigger negotiation to reset previous errors. */ + if ((sp->phy[0] & 0x8000) == 0) { + int phy_addr = sp->phy[0] & 0x1f ; + /* Use 0x3300 for restarting NWay, other values to force xcvr: + 0x0000 10-HD + 0x0100 10-FD + 0x2000 100-HD + 0x2100 100-FD + */ +#ifdef honor_default_port + mdio_write(ioaddr, phy_addr, 0, mii_ctrl[dev->default_port & 7]); +#else + mdio_write(ioaddr, phy_addr, 0, 0x3300); +#endif + } +#endif + + speedo_init_rx_ring(dev); + + /* Fire up the hardware. */ + outw(SCBMaskAll, ioaddr + SCBCmd); + speedo_resume(dev); + + netdevice_start(dev); + netif_start_queue(dev); + + /* Setup the chip and configure the multicast list. */ + sp->mc_setup_head = NULL; + sp->mc_setup_tail = NULL; + sp->flow_ctrl = sp->partner = 0; + sp->rx_mode = -1; /* Invalid -> always reset the mode. */ + set_rx_mode(dev); + if ((sp->phy[0] & 0x8000) == 0) + sp->advertising = mdio_read(ioaddr, sp->phy[0] & 0x1f, 4); + + if (speedo_debug > 2) { + printk(KERN_DEBUG "%s: Done speedo_open(), status %8.8x.\n", + dev->name, inw(ioaddr + SCBStatus)); + } + + /* Set the timer. The timer serves a dual purpose: + 1) to monitor the media interface (e.g. link beat) and perhaps switch + to an alternate media type + 2) to monitor Rx activity, and restart the Rx process if the receiver + hangs. */ + sp->timer.expires = RUN_AT((24*HZ)/10); /* 2.4 sec. */ + sp->timer.data = (unsigned long)dev; + sp->timer.function = &speedo_timer; /* timer handler */ + add_timer(&sp->timer); + + /* No need to wait for the command unit to accept here. */ + if ((sp->phy[0] & 0x8000) == 0) + mdio_read(ioaddr, sp->phy[0] & 0x1f, 0); + + return 0; +} + +/* Start the chip hardware after a full reset. */ +static void speedo_resume(struct net_device *dev) +{ + struct speedo_private *sp = (struct speedo_private *)dev->priv; + long ioaddr = dev->base_addr; + + /* Start with a Tx threshold of 256 (0x..20.... 8 byte units). */ + sp->tx_threshold = 0x01208000; + + /* Set the segment registers to '0'. */ + wait_for_cmd_done(ioaddr + SCBCmd); + outl(0, ioaddr + SCBPointer); + /* impose a delay to avoid a bug */ + inl(ioaddr + SCBPointer); + udelay(10); + outb(RxAddrLoad, ioaddr + SCBCmd); + wait_for_cmd_done(ioaddr + SCBCmd); + outb(CUCmdBase, ioaddr + SCBCmd); + + /* Load the statistics block and rx ring addresses. */ + wait_for_cmd_done(ioaddr + SCBCmd); + outl(sp->lstats_dma, ioaddr + SCBPointer); + outb(CUStatsAddr, ioaddr + SCBCmd); + sp->lstats->done_marker = 0; + + if (sp->rx_ringp[sp->cur_rx % RX_RING_SIZE] == NULL) { + if (speedo_debug > 2) + printk(KERN_DEBUG "%s: NULL cur_rx in speedo_resume().\n", + dev->name); + } else { + wait_for_cmd_done(ioaddr + SCBCmd); + outl(sp->rx_ring_dma[sp->cur_rx % RX_RING_SIZE], + ioaddr + SCBPointer); + outb(RxStart, ioaddr + SCBCmd); + } + + wait_for_cmd_done(ioaddr + SCBCmd); + outb(CUDumpStats, ioaddr + SCBCmd); + udelay(30); + + /* Fill the first command with our physical address. */ + { + struct descriptor *ias_cmd; + + ias_cmd = + (struct descriptor *)&sp->tx_ring[sp->cur_tx++ % TX_RING_SIZE]; + /* Avoid a bug(?!) here by marking the command already completed. */ + ias_cmd->cmd_status = cpu_to_le32((CmdSuspend | CmdIASetup) | 0xa000); + ias_cmd->link = + cpu_to_le32(TX_RING_ELEM_DMA(sp, sp->cur_tx % TX_RING_SIZE)); + memcpy(ias_cmd->params, dev->dev_addr, 6); + sp->last_cmd = ias_cmd; + } + + /* Start the chip's Tx process and unmask interrupts. */ + wait_for_cmd_done(ioaddr + SCBCmd); + outl(TX_RING_ELEM_DMA(sp, sp->dirty_tx % TX_RING_SIZE), + ioaddr + SCBPointer); + /* We are not ACK-ing FCP and ER in the interrupt handler yet so they should + remain masked --Dragan */ + outw(CUStart | SCBMaskEarlyRx | SCBMaskFlowCtl, ioaddr + SCBCmd); +} + +/* Media monitoring and control. */ +static void speedo_timer(unsigned long data) +{ + struct net_device *dev = (struct net_device *)data; + struct speedo_private *sp = (struct speedo_private *)dev->priv; + long ioaddr = dev->base_addr; + int phy_num = sp->phy[0] & 0x1f; + + /* We have MII and lost link beat. */ + if ((sp->phy[0] & 0x8000) == 0) { + int partner = mdio_read(ioaddr, phy_num, 5); + if (partner != sp->partner) { + int flow_ctrl = sp->advertising & partner & 0x0400 ? 1 : 0; + if (speedo_debug > 2) { + printk(KERN_DEBUG "%s: Link status change.\n", dev->name); + printk(KERN_DEBUG "%s: Old partner %x, new %x, adv %x.\n", + dev->name, sp->partner, partner, sp->advertising); + } + sp->partner = partner; + if (flow_ctrl != sp->flow_ctrl) { + sp->flow_ctrl = flow_ctrl; + sp->rx_mode = -1; /* Trigger a reload. */ + } + /* Clear sticky bit. */ + mdio_read(ioaddr, phy_num, 1); + /* If link beat has returned... */ + if (mdio_read(ioaddr, phy_num, 1) & 0x0004) + dev->flags |= IFF_RUNNING; + else + dev->flags &= ~IFF_RUNNING; + } + } + if (speedo_debug > 3) { + printk(KERN_DEBUG "%s: Media control tick, status %4.4x.\n", + dev->name, inw(ioaddr + SCBStatus)); + } + if (sp->rx_mode < 0 || + (sp->rx_bug && jiffies - sp->last_rx_time > 2*HZ)) { + /* We haven't received a packet in a Long Time. We might have been + bitten by the receiver hang bug. This can be cleared by sending + a set multicast list command. */ + if (speedo_debug > 3) + printk(KERN_DEBUG "%s: Sending a multicast list set command" + " from a timer routine," + " m=%d, j=%ld, l=%ld.\n", + dev->name, sp->rx_mode, jiffies, sp->last_rx_time); + set_rx_mode(dev); + } + /* We must continue to monitor the media. */ + sp->timer.expires = RUN_AT(2*HZ); /* 2.0 sec. */ + add_timer(&sp->timer); +#if defined(timer_exit) + timer_exit(&sp->timer); +#endif +} + +static void speedo_show_state(struct net_device *dev) +{ + struct speedo_private *sp = (struct speedo_private *)dev->priv; + int i; + + /* Print a few items for debugging. */ + if (speedo_debug > 0) { + int i; + printk(KERN_DEBUG "%s: Tx ring dump, Tx queue %u / %u:\n", dev->name, + sp->cur_tx, sp->dirty_tx); + for (i = 0; i < TX_RING_SIZE; i++) + printk(KERN_DEBUG "%s: %c%c%2d %8.8x.\n", dev->name, + i == sp->dirty_tx % TX_RING_SIZE ? '*' : ' ', + i == sp->cur_tx % TX_RING_SIZE ? '=' : ' ', + i, sp->tx_ring[i].status); + } + printk(KERN_DEBUG "%s: Printing Rx ring" + " (next to receive into %u, dirty index %u).\n", + dev->name, sp->cur_rx, sp->dirty_rx); + + for (i = 0; i < RX_RING_SIZE; i++) + printk(KERN_DEBUG "%s: %c%c%c%2d %8.8x.\n", dev->name, + sp->rx_ringp[i] == sp->last_rxf ? 'l' : ' ', + i == sp->dirty_rx % RX_RING_SIZE ? '*' : ' ', + i == sp->cur_rx % RX_RING_SIZE ? '=' : ' ', + i, (sp->rx_ringp[i] != NULL) ? + (unsigned)sp->rx_ringp[i]->status : 0); + +#if 0 + { + long ioaddr = dev->base_addr; + int phy_num = sp->phy[0] & 0x1f; + for (i = 0; i < 16; i++) { + /* FIXME: what does it mean? --SAW */ + if (i == 6) i = 21; + printk(KERN_DEBUG "%s: PHY index %d register %d is %4.4x.\n", + dev->name, phy_num, i, mdio_read(ioaddr, phy_num, i)); + } + } +#endif + +} + +/* Initialize the Rx and Tx rings, along with various 'dev' bits. */ +static void +speedo_init_rx_ring(struct net_device *dev) +{ + struct speedo_private *sp = (struct speedo_private *)dev->priv; + struct RxFD *rxf, *last_rxf = NULL; + dma_addr_t last_rxf_dma = 0 /* to shut up the compiler */; + int i; + + sp->cur_rx = 0; + + for (i = 0; i < RX_RING_SIZE; i++) { + struct sk_buff *skb; + skb = dev_alloc_skb(PKT_BUF_SZ + sizeof(struct RxFD)); + sp->rx_skbuff[i] = skb; + if (skb == NULL) + break; /* OK. Just initially short of Rx bufs. */ + skb->dev = dev; /* Mark as being used by this device. */ + rxf = (struct RxFD *)skb->tail; + sp->rx_ringp[i] = rxf; + sp->rx_ring_dma[i] = + pci_map_single(sp->pdev, rxf, + PKT_BUF_SZ + sizeof(struct RxFD), PCI_DMA_BIDIRECTIONAL); + skb_reserve(skb, sizeof(struct RxFD)); + if (last_rxf) { + last_rxf->link = cpu_to_le32(sp->rx_ring_dma[i]); + pci_dma_sync_single(sp->pdev, last_rxf_dma, + sizeof(struct RxFD), PCI_DMA_TODEVICE); + } + last_rxf = rxf; + last_rxf_dma = sp->rx_ring_dma[i]; + rxf->status = cpu_to_le32(0x00000001); /* '1' is flag value only. */ + rxf->link = 0; /* None yet. */ + /* This field unused by i82557. */ + rxf->rx_buf_addr = 0xffffffff; + rxf->count = cpu_to_le32(PKT_BUF_SZ << 16); + pci_dma_sync_single(sp->pdev, sp->rx_ring_dma[i], + sizeof(struct RxFD), PCI_DMA_TODEVICE); + } + sp->dirty_rx = (unsigned int)(i - RX_RING_SIZE); + /* Mark the last entry as end-of-list. */ + last_rxf->status = cpu_to_le32(0xC0000002); /* '2' is flag value only. */ + pci_dma_sync_single(sp->pdev, sp->rx_ring_dma[RX_RING_SIZE-1], + sizeof(struct RxFD), PCI_DMA_TODEVICE); + sp->last_rxf = last_rxf; + sp->last_rxf_dma = last_rxf_dma; +} + +static void speedo_purge_tx(struct net_device *dev) +{ + struct speedo_private *sp = (struct speedo_private *)dev->priv; + int entry; + + while ((int)(sp->cur_tx - sp->dirty_tx) > 0) { + entry = sp->dirty_tx % TX_RING_SIZE; + if (sp->tx_skbuff[entry]) { + sp->stats.tx_errors++; + pci_unmap_single(sp->pdev, + le32_to_cpu(sp->tx_ring[entry].tx_buf_addr0), + sp->tx_skbuff[entry]->len, PCI_DMA_TODEVICE); + dev_kfree_skb_irq(sp->tx_skbuff[entry]); + sp->tx_skbuff[entry] = 0; + } + sp->dirty_tx++; + } + while (sp->mc_setup_head != NULL) { + struct speedo_mc_block *t; + if (speedo_debug > 1) + printk(KERN_DEBUG "%s: freeing mc frame.\n", dev->name); + pci_unmap_single(sp->pdev, sp->mc_setup_head->frame_dma, + sp->mc_setup_head->len, PCI_DMA_TODEVICE); + t = sp->mc_setup_head->next; + kfree(sp->mc_setup_head); + sp->mc_setup_head = t; + } + sp->mc_setup_tail = NULL; + sp->tx_full = 0; + netif_wake_queue(dev); +} + +static void reset_mii(struct net_device *dev) +{ + struct speedo_private *sp = (struct speedo_private *)dev->priv; + long ioaddr = dev->base_addr; + /* Reset the MII transceiver, suggested by Fred Young @ scalable.com. */ + if ((sp->phy[0] & 0x8000) == 0) { + int phy_addr = sp->phy[0] & 0x1f; + int advertising = mdio_read(ioaddr, phy_addr, 4); + int mii_bmcr = mdio_read(ioaddr, phy_addr, 0); + mdio_write(ioaddr, phy_addr, 0, 0x0400); + mdio_write(ioaddr, phy_addr, 1, 0x0000); + mdio_write(ioaddr, phy_addr, 4, 0x0000); + mdio_write(ioaddr, phy_addr, 0, 0x8000); +#ifdef honor_default_port + mdio_write(ioaddr, phy_addr, 0, mii_ctrl[dev->default_port & 7]); +#else + mdio_read(ioaddr, phy_addr, 0); + mdio_write(ioaddr, phy_addr, 0, mii_bmcr); + mdio_write(ioaddr, phy_addr, 4, advertising); +#endif + } +} + +static void speedo_tx_timeout(struct net_device *dev) +{ + struct speedo_private *sp = (struct speedo_private *)dev->priv; + long ioaddr = dev->base_addr; + int status = inw(ioaddr + SCBStatus); + unsigned long flags; + + printk(KERN_WARNING "%s: Transmit timed out: status %4.4x " + " %4.4x at %d/%d command %8.8x.\n", + dev->name, status, inw(ioaddr + SCBCmd), + sp->dirty_tx, sp->cur_tx, + sp->tx_ring[sp->dirty_tx % TX_RING_SIZE].status); + + speedo_show_state(dev); +#if 0 + if ((status & 0x00C0) != 0x0080 + && (status & 0x003C) == 0x0010) { + /* Only the command unit has stopped. */ + printk(KERN_WARNING "%s: Trying to restart the transmitter...\n", + dev->name); + outl(TX_RING_ELEM_DMA(sp, dirty_tx % TX_RING_SIZE]), + ioaddr + SCBPointer); + outw(CUStart, ioaddr + SCBCmd); + reset_mii(dev); + } else { +#else + { +#endif + del_timer_sync(&sp->timer); + /* Reset the Tx and Rx units. */ + outl(PortReset, ioaddr + SCBPort); + /* We may get spurious interrupts here. But I don't think that they + may do much harm. 1999/12/09 SAW */ + udelay(10); + /* Disable interrupts. */ + outw(SCBMaskAll, ioaddr + SCBCmd); + synchronize_irq(); + speedo_tx_buffer_gc(dev); + /* Free as much as possible. + It helps to recover from a hang because of out-of-memory. + It also simplifies speedo_resume() in case TX ring is full or + close-to-be full. */ + speedo_purge_tx(dev); + speedo_refill_rx_buffers(dev, 1); + spin_lock_irqsave(&sp->lock, flags); + speedo_resume(dev); + sp->rx_mode = -1; + dev->trans_start = jiffies; + spin_unlock_irqrestore(&sp->lock, flags); + set_rx_mode(dev); /* it takes the spinlock itself --SAW */ + /* Reset MII transceiver. Do it before starting the timer to serialize + mdio_xxx operations. Yes, it's a paranoya :-) 2000/05/09 SAW */ + reset_mii(dev); + sp->timer.expires = RUN_AT(2*HZ); + add_timer(&sp->timer); + } + return; +} + +static int +speedo_start_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct speedo_private *sp = (struct speedo_private *)dev->priv; + long ioaddr = dev->base_addr; + int entry; + + /* Prevent interrupts from changing the Tx ring from underneath us. */ + unsigned long flags; + + spin_lock_irqsave(&sp->lock, flags); + + /* Check if there are enough space. */ + if ((int)(sp->cur_tx - sp->dirty_tx) >= TX_QUEUE_LIMIT) { + printk(KERN_ERR "%s: incorrect tbusy state, fixed.\n", dev->name); + netif_stop_queue(dev); + sp->tx_full = 1; + spin_unlock_irqrestore(&sp->lock, flags); + return 1; + } + + /* Calculate the Tx descriptor entry. */ + entry = sp->cur_tx++ % TX_RING_SIZE; + + sp->tx_skbuff[entry] = skb; + sp->tx_ring[entry].status = + cpu_to_le32(CmdSuspend | CmdTx | CmdTxFlex); + if (!(entry & ((TX_RING_SIZE>>2)-1))) + sp->tx_ring[entry].status |= cpu_to_le32(CmdIntr); + sp->tx_ring[entry].link = + cpu_to_le32(TX_RING_ELEM_DMA(sp, sp->cur_tx % TX_RING_SIZE)); + sp->tx_ring[entry].tx_desc_addr = + cpu_to_le32(TX_RING_ELEM_DMA(sp, entry) + TX_DESCR_BUF_OFFSET); + /* The data region is always in one buffer descriptor. */ + sp->tx_ring[entry].count = cpu_to_le32(sp->tx_threshold); + sp->tx_ring[entry].tx_buf_addr0 = + cpu_to_le32(pci_map_single(sp->pdev, skb->data, + skb->len, PCI_DMA_TODEVICE)); + sp->tx_ring[entry].tx_buf_size0 = cpu_to_le32(skb->len); + + /* workaround for hardware bug on 10 mbit half duplex */ + + if ((sp->partner==0) && (sp->chip_id==1)) { + wait_for_cmd_done(ioaddr + SCBCmd); + outb(0 , ioaddr + SCBCmd); + } + + /* Trigger the command unit resume. */ + wait_for_cmd_done(ioaddr + SCBCmd); + clear_suspend(sp->last_cmd); + /* We want the time window between clearing suspend flag on the previous + command and resuming CU to be as small as possible. + Interrupts in between are very undesired. --SAW */ + outb(CUResume, ioaddr + SCBCmd); + sp->last_cmd = (struct descriptor *)&sp->tx_ring[entry]; + + /* Leave room for set_rx_mode(). If there is no more space than reserved + for multicast filter mark the ring as full. */ + if ((int)(sp->cur_tx - sp->dirty_tx) >= TX_QUEUE_LIMIT) { + netif_stop_queue(dev); + sp->tx_full = 1; + } + + spin_unlock_irqrestore(&sp->lock, flags); + + dev->trans_start = jiffies; + + return 0; +} + +static void speedo_tx_buffer_gc(struct net_device *dev) +{ + unsigned int dirty_tx; + struct speedo_private *sp = (struct speedo_private *)dev->priv; + + dirty_tx = sp->dirty_tx; + while ((int)(sp->cur_tx - dirty_tx) > 0) { + int entry = dirty_tx % TX_RING_SIZE; + int status = le32_to_cpu(sp->tx_ring[entry].status); + + if (speedo_debug > 5) + printk(KERN_DEBUG " scavenge candidate %d status %4.4x.\n", + entry, status); + if ((status & StatusComplete) == 0) + break; /* It still hasn't been processed. */ + if (status & TxUnderrun) + if (sp->tx_threshold < 0x01e08000) { + if (speedo_debug > 2) + printk(KERN_DEBUG "%s: TX underrun, threshold adjusted.\n", + dev->name); + sp->tx_threshold += 0x00040000; + } + /* Free the original skb. */ + if (sp->tx_skbuff[entry]) { + sp->stats.tx_packets++; /* Count only user packets. */ + sp->stats.tx_bytes += sp->tx_skbuff[entry]->len; + pci_unmap_single(sp->pdev, + le32_to_cpu(sp->tx_ring[entry].tx_buf_addr0), + sp->tx_skbuff[entry]->len, PCI_DMA_TODEVICE); + dev_kfree_skb_irq(sp->tx_skbuff[entry]); + sp->tx_skbuff[entry] = 0; + } + dirty_tx++; + } + + if (speedo_debug && (int)(sp->cur_tx - dirty_tx) > TX_RING_SIZE) { + printk(KERN_ERR "out-of-sync dirty pointer, %d vs. %d," + " full=%d.\n", + dirty_tx, sp->cur_tx, sp->tx_full); + dirty_tx += TX_RING_SIZE; + } + + while (sp->mc_setup_head != NULL + && (int)(dirty_tx - sp->mc_setup_head->tx - 1) > 0) { + struct speedo_mc_block *t; + if (speedo_debug > 1) + printk(KERN_DEBUG "%s: freeing mc frame.\n", dev->name); + pci_unmap_single(sp->pdev, sp->mc_setup_head->frame_dma, + sp->mc_setup_head->len, PCI_DMA_TODEVICE); + t = sp->mc_setup_head->next; + kfree(sp->mc_setup_head); + sp->mc_setup_head = t; + } + if (sp->mc_setup_head == NULL) + sp->mc_setup_tail = NULL; + + sp->dirty_tx = dirty_tx; +} + +/* The interrupt handler does all of the Rx thread work and cleans up + after the Tx thread. */ +static void speedo_interrupt(int irq, void *dev_instance, struct pt_regs *regs) +{ + struct net_device *dev = (struct net_device *)dev_instance; + struct speedo_private *sp; + long ioaddr, boguscnt = max_interrupt_work; + unsigned short status; + +#ifndef final_version + if (dev == NULL) { + printk(KERN_ERR "speedo_interrupt(): irq %d for unknown device.\n", irq); + return; + } +#endif + + ioaddr = dev->base_addr; + sp = (struct speedo_private *)dev->priv; + +#ifndef final_version + /* A lock to prevent simultaneous entry on SMP machines. */ + if (test_and_set_bit(0, (void*)&sp->in_interrupt)) { + printk(KERN_ERR"%s: SMP simultaneous entry of an interrupt handler.\n", + dev->name); + sp->in_interrupt = 0; /* Avoid halting machine. */ + return; + } +#endif + + do { + status = inw(ioaddr + SCBStatus); + /* Acknowledge all of the current interrupt sources ASAP. */ + /* Will change from 0xfc00 to 0xff00 when we start handling + FCP and ER interrupts --Dragan */ + outw(status & 0xfc00, ioaddr + SCBStatus); + + if (speedo_debug > 4) + printk(KERN_DEBUG "%s: interrupt status=%#4.4x.\n", + dev->name, status); + + if ((status & 0xfc00) == 0) + break; + + /* Always check if all rx buffers are allocated. --SAW */ + speedo_refill_rx_buffers(dev, 0); + + if ((status & 0x5000) || /* Packet received, or Rx error. */ + (sp->rx_ring_state&(RrNoMem|RrPostponed)) == RrPostponed) + /* Need to gather the postponed packet. */ + speedo_rx(dev); + + if (status & 0x1000) { + spin_lock(&sp->lock); + if ((status & 0x003c) == 0x0028) { /* No more Rx buffers. */ + struct RxFD *rxf; + printk(KERN_WARNING "%s: card reports no RX buffers.\n", + dev->name); + rxf = sp->rx_ringp[sp->cur_rx % RX_RING_SIZE]; + if (rxf == NULL) { + if (speedo_debug > 2) + printk(KERN_DEBUG + "%s: NULL cur_rx in speedo_interrupt().\n", + dev->name); + sp->rx_ring_state |= RrNoMem|RrNoResources; + } else if (rxf == sp->last_rxf) { + if (speedo_debug > 2) + printk(KERN_DEBUG + "%s: cur_rx is last in speedo_interrupt().\n", + dev->name); + sp->rx_ring_state |= RrNoMem|RrNoResources; + } else + outb(RxResumeNoResources, ioaddr + SCBCmd); + } else if ((status & 0x003c) == 0x0008) { /* No resources. */ + struct RxFD *rxf; + printk(KERN_WARNING "%s: card reports no resources.\n", + dev->name); + rxf = sp->rx_ringp[sp->cur_rx % RX_RING_SIZE]; + if (rxf == NULL) { + if (speedo_debug > 2) + printk(KERN_DEBUG + "%s: NULL cur_rx in speedo_interrupt().\n", + dev->name); + sp->rx_ring_state |= RrNoMem|RrNoResources; + } else if (rxf == sp->last_rxf) { + if (speedo_debug > 2) + printk(KERN_DEBUG + "%s: cur_rx is last in speedo_interrupt().\n", + dev->name); + sp->rx_ring_state |= RrNoMem|RrNoResources; + } else { + /* Restart the receiver. */ + outl(sp->rx_ring_dma[sp->cur_rx % RX_RING_SIZE], + ioaddr + SCBPointer); + outb(RxStart, ioaddr + SCBCmd); + } + } + sp->stats.rx_errors++; + spin_unlock(&sp->lock); + } + + if ((sp->rx_ring_state&(RrNoMem|RrNoResources)) == RrNoResources) { + printk(KERN_WARNING + "%s: restart the receiver after a possible hang.\n", + dev->name); + spin_lock(&sp->lock); + /* Restart the receiver. + I'm not sure if it's always right to restart the receiver + here but I don't know another way to prevent receiver hangs. + 1999/12/25 SAW */ + outl(sp->rx_ring_dma[sp->cur_rx % RX_RING_SIZE], + ioaddr + SCBPointer); + outb(RxStart, ioaddr + SCBCmd); + sp->rx_ring_state &= ~RrNoResources; + spin_unlock(&sp->lock); + } + + /* User interrupt, Command/Tx unit interrupt or CU not active. */ + if (status & 0xA400) { + spin_lock(&sp->lock); + speedo_tx_buffer_gc(dev); + if (sp->tx_full + && (int)(sp->cur_tx - sp->dirty_tx) < TX_QUEUE_UNFULL) { + /* The ring is no longer full. */ + sp->tx_full = 0; + netif_wake_queue(dev); /* Attention: under a spinlock. --SAW */ + } + spin_unlock(&sp->lock); + } + + if (--boguscnt < 0) { + printk(KERN_ERR "%s: Too much work at interrupt, status=0x%4.4x.\n", + dev->name, status); + /* Clear all interrupt sources. */ + /* Will change from 0xfc00 to 0xff00 when we start handling + FCP and ER interrupts --Dragan */ + outw(0xfc00, ioaddr + SCBStatus); + break; + } + } while (1); + + if (speedo_debug > 3) + printk(KERN_DEBUG "%s: exiting interrupt, status=%#4.4x.\n", + dev->name, inw(ioaddr + SCBStatus)); + + clear_bit(0, (void*)&sp->in_interrupt); + return; +} + +static inline struct RxFD *speedo_rx_alloc(struct net_device *dev, int entry) +{ + struct speedo_private *sp = (struct speedo_private *)dev->priv; + struct RxFD *rxf; + struct sk_buff *skb; + /* Get a fresh skbuff to replace the consumed one. */ + skb = dev_alloc_skb(PKT_BUF_SZ + sizeof(struct RxFD)); + sp->rx_skbuff[entry] = skb; + if (skb == NULL) { + sp->rx_ringp[entry] = NULL; + return NULL; + } + rxf = sp->rx_ringp[entry] = (struct RxFD *)skb->tail; + sp->rx_ring_dma[entry] = + pci_map_single(sp->pdev, rxf, + PKT_BUF_SZ + sizeof(struct RxFD), PCI_DMA_FROMDEVICE); + skb->dev = dev; + skb_reserve(skb, sizeof(struct RxFD)); + rxf->rx_buf_addr = 0xffffffff; + pci_dma_sync_single(sp->pdev, sp->rx_ring_dma[entry], + sizeof(struct RxFD), PCI_DMA_TODEVICE); + return rxf; +} + +static inline void speedo_rx_link(struct net_device *dev, int entry, + struct RxFD *rxf, dma_addr_t rxf_dma) +{ + struct speedo_private *sp = (struct speedo_private *)dev->priv; + rxf->status = cpu_to_le32(0xC0000001); /* '1' for driver use only. */ + rxf->link = 0; /* None yet. */ + rxf->count = cpu_to_le32(PKT_BUF_SZ << 16); + sp->last_rxf->link = cpu_to_le32(rxf_dma); + sp->last_rxf->status &= cpu_to_le32(~0xC0000000); + pci_dma_sync_single(sp->pdev, sp->last_rxf_dma, + sizeof(struct RxFD), PCI_DMA_TODEVICE); + sp->last_rxf = rxf; + sp->last_rxf_dma = rxf_dma; +} + +static int speedo_refill_rx_buf(struct net_device *dev, int force) +{ + struct speedo_private *sp = (struct speedo_private *)dev->priv; + int entry; + struct RxFD *rxf; + + entry = sp->dirty_rx % RX_RING_SIZE; + if (sp->rx_skbuff[entry] == NULL) { + rxf = speedo_rx_alloc(dev, entry); + if (rxf == NULL) { + unsigned int forw; + int forw_entry; + if (speedo_debug > 2 || !(sp->rx_ring_state & RrOOMReported)) { + printk(KERN_WARNING "%s: can't fill rx buffer (force %d)!\n", + dev->name, force); + speedo_show_state(dev); + sp->rx_ring_state |= RrOOMReported; + } + if (!force) + return -1; /* Better luck next time! */ + /* Borrow an skb from one of next entries. */ + for (forw = sp->dirty_rx + 1; forw != sp->cur_rx; forw++) + if (sp->rx_skbuff[forw % RX_RING_SIZE] != NULL) + break; + if (forw == sp->cur_rx) + return -1; + forw_entry = forw % RX_RING_SIZE; + sp->rx_skbuff[entry] = sp->rx_skbuff[forw_entry]; + sp->rx_skbuff[forw_entry] = NULL; + rxf = sp->rx_ringp[forw_entry]; + sp->rx_ringp[forw_entry] = NULL; + sp->rx_ringp[entry] = rxf; + } + } else { + rxf = sp->rx_ringp[entry]; + } + speedo_rx_link(dev, entry, rxf, sp->rx_ring_dma[entry]); + sp->dirty_rx++; + sp->rx_ring_state &= ~(RrNoMem|RrOOMReported); /* Mark the progress. */ + return 0; +} + +static void speedo_refill_rx_buffers(struct net_device *dev, int force) +{ + struct speedo_private *sp = (struct speedo_private *)dev->priv; + + /* Refill the RX ring. */ + while ((int)(sp->cur_rx - sp->dirty_rx) > 0 && + speedo_refill_rx_buf(dev, force) != -1); +} + +static int +speedo_rx(struct net_device *dev) +{ + struct speedo_private *sp = (struct speedo_private *)dev->priv; + int entry = sp->cur_rx % RX_RING_SIZE; + int rx_work_limit = sp->dirty_rx + RX_RING_SIZE - sp->cur_rx; + int alloc_ok = 1; + + if (speedo_debug > 4) + printk(KERN_DEBUG " In speedo_rx().\n"); + /* If we own the next entry, it's a new packet. Send it up. */ + while (sp->rx_ringp[entry] != NULL) { + int status; + int pkt_len; + + pci_dma_sync_single(sp->pdev, sp->rx_ring_dma[entry], + sizeof(struct RxFD), PCI_DMA_FROMDEVICE); + status = le32_to_cpu(sp->rx_ringp[entry]->status); + pkt_len = le32_to_cpu(sp->rx_ringp[entry]->count) & 0x3fff; + + if (!(status & RxComplete)) + break; + + if (--rx_work_limit < 0) + break; + + /* Check for a rare out-of-memory case: the current buffer is + the last buffer allocated in the RX ring. --SAW */ + if (sp->last_rxf == sp->rx_ringp[entry]) { + /* Postpone the packet. It'll be reaped at an interrupt when this + packet is no longer the last packet in the ring. */ + if (speedo_debug > 2) + printk(KERN_DEBUG "%s: RX packet postponed!\n", + dev->name); + sp->rx_ring_state |= RrPostponed; + break; + } + + if (speedo_debug > 4) + printk(KERN_DEBUG " speedo_rx() status %8.8x len %d.\n", status, + pkt_len); + if ((status & (RxErrTooBig|RxOK|0x0f90)) != RxOK) { + if (status & RxErrTooBig) + printk(KERN_ERR "%s: Ethernet frame overran the Rx buffer, " + "status %8.8x!\n", dev->name, status); + else if (! (status & RxOK)) { + /* There was a fatal error. This *should* be impossible. */ + sp->stats.rx_errors++; + printk(KERN_ERR "%s: Anomalous event in speedo_rx(), " + "status %8.8x.\n", + dev->name, status); + } + } else { + struct sk_buff *skb; + + /* Check if the packet is long enough to just accept without + copying to a properly sized skbuff. */ + if (pkt_len < rx_copybreak + && (skb = dev_alloc_skb(pkt_len + 2)) != 0) { + skb->dev = dev; + skb_reserve(skb, 2); /* Align IP on 16 byte boundaries */ + /* 'skb_put()' points to the start of sk_buff data area. */ + pci_dma_sync_single(sp->pdev, sp->rx_ring_dma[entry], + sizeof(struct RxFD) + pkt_len, PCI_DMA_FROMDEVICE); + +#if 1 || USE_IP_CSUM + /* Packet is in one chunk -- we can copy + cksum. */ + eth_copy_and_sum(skb, sp->rx_skbuff[entry]->tail, pkt_len, 0); + skb_put(skb, pkt_len); +#else + memcpy(skb_put(skb, pkt_len), sp->rx_skbuff[entry]->tail, + pkt_len); +#endif + } else { + /* Pass up the already-filled skbuff. */ + skb = sp->rx_skbuff[entry]; + if (skb == NULL) { + printk(KERN_ERR "%s: Inconsistent Rx descriptor chain.\n", + dev->name); + break; + } + sp->rx_skbuff[entry] = NULL; + skb_put(skb, pkt_len); + sp->rx_ringp[entry] = NULL; + pci_unmap_single(sp->pdev, sp->rx_ring_dma[entry], + PKT_BUF_SZ + sizeof(struct RxFD), PCI_DMA_FROMDEVICE); + } + skb->protocol = eth_type_trans(skb, dev); + netif_rx(skb); + sp->stats.rx_packets++; + sp->stats.rx_bytes += pkt_len; + } + entry = (++sp->cur_rx) % RX_RING_SIZE; + sp->rx_ring_state &= ~RrPostponed; + /* Refill the recently taken buffers. + Do it one-by-one to handle traffic bursts better. */ + if (alloc_ok && speedo_refill_rx_buf(dev, 0) == -1) + alloc_ok = 0; + } + + /* Try hard to refill the recently taken buffers. */ + speedo_refill_rx_buffers(dev, 1); + + sp->last_rx_time = jiffies; + + return 0; +} + +static int +speedo_close(struct net_device *dev) +{ + long ioaddr = dev->base_addr; + struct speedo_private *sp = (struct speedo_private *)dev->priv; + int i; + + netdevice_stop(dev); + netif_stop_queue(dev); + + if (speedo_debug > 1) + printk(KERN_DEBUG "%s: Shutting down ethercard, status was %4.4x.\n", + dev->name, inw(ioaddr + SCBStatus)); + + /* Shut off the media monitoring timer. */ + del_timer_sync(&sp->timer); + + /* Shutting down the chip nicely fails to disable flow control. So.. */ + outl(PortPartialReset, ioaddr + SCBPort); + + free_irq(dev->irq, dev); + + /* Print a few items for debugging. */ + if (speedo_debug > 3) + speedo_show_state(dev); + + /* Free all the skbuffs in the Rx and Tx queues. */ + for (i = 0; i < RX_RING_SIZE; i++) { + struct sk_buff *skb = sp->rx_skbuff[i]; + sp->rx_skbuff[i] = 0; + /* Clear the Rx descriptors. */ + if (skb) { + pci_unmap_single(sp->pdev, + sp->rx_ring_dma[i], + PKT_BUF_SZ + sizeof(struct RxFD), PCI_DMA_FROMDEVICE); + dev_kfree_skb(skb); + } + } + + for (i = 0; i < TX_RING_SIZE; i++) { + struct sk_buff *skb = sp->tx_skbuff[i]; + sp->tx_skbuff[i] = 0; + /* Clear the Tx descriptors. */ + if (skb) { + pci_unmap_single(sp->pdev, + le32_to_cpu(sp->tx_ring[i].tx_buf_addr0), + skb->len, PCI_DMA_TODEVICE); + dev_kfree_skb(skb); + } + } + + /* Free multicast setting blocks. */ + for (i = 0; sp->mc_setup_head != NULL; i++) { + struct speedo_mc_block *t; + t = sp->mc_setup_head->next; + kfree(sp->mc_setup_head); + sp->mc_setup_head = t; + } + sp->mc_setup_tail = NULL; + if (speedo_debug > 0) + printk(KERN_DEBUG "%s: %d multicast blocks dropped.\n", dev->name, i); + + SET_POWER_STATE(sp->pdev, 2); + + MOD_DEC_USE_COUNT; + + return 0; +} + +/* The Speedo-3 has an especially awkward and unusable method of getting + statistics out of the chip. It takes an unpredictable length of time + for the dump-stats command to complete. To avoid a busy-wait loop we + update the stats with the previous dump results, and then trigger a + new dump. + + Oh, and incoming frames are dropped while executing dump-stats! + */ +static struct net_device_stats * +speedo_get_stats(struct net_device *dev) +{ + struct speedo_private *sp = (struct speedo_private *)dev->priv; + long ioaddr = dev->base_addr; + + /* Update only if the previous dump finished. */ + if (sp->lstats->done_marker == le32_to_cpu(0xA007)) { + sp->stats.tx_aborted_errors += le32_to_cpu(sp->lstats->tx_coll16_errs); + sp->stats.tx_window_errors += le32_to_cpu(sp->lstats->tx_late_colls); + sp->stats.tx_fifo_errors += le32_to_cpu(sp->lstats->tx_underruns); + sp->stats.tx_fifo_errors += le32_to_cpu(sp->lstats->tx_lost_carrier); + /*sp->stats.tx_deferred += le32_to_cpu(sp->lstats->tx_deferred);*/ + sp->stats.collisions += le32_to_cpu(sp->lstats->tx_total_colls); + sp->stats.rx_crc_errors += le32_to_cpu(sp->lstats->rx_crc_errs); + sp->stats.rx_frame_errors += le32_to_cpu(sp->lstats->rx_align_errs); + sp->stats.rx_over_errors += le32_to_cpu(sp->lstats->rx_resource_errs); + sp->stats.rx_fifo_errors += le32_to_cpu(sp->lstats->rx_overrun_errs); + sp->stats.rx_length_errors += le32_to_cpu(sp->lstats->rx_runt_errs); + sp->lstats->done_marker = 0x0000; + if (netif_running(dev)) { + unsigned long flags; + /* Take a spinlock to make wait_for_cmd_done and sending the + command atomic. --SAW */ + spin_lock_irqsave(&sp->lock, flags); + wait_for_cmd_done(ioaddr + SCBCmd); + outb(CUDumpStats, ioaddr + SCBCmd); + spin_unlock_irqrestore(&sp->lock, flags); + } + } + return &sp->stats; +} + +static int netdev_ethtool_ioctl(struct net_device *dev, void *useraddr) +{ + u32 ethcmd; + struct speedo_private *sp = dev->priv; + + if (copy_from_user(ðcmd, useraddr, sizeof(ethcmd))) + return -EFAULT; + + switch (ethcmd) { + case ETHTOOL_GDRVINFO: { + struct ethtool_drvinfo info = {ETHTOOL_GDRVINFO}; + strncpy(info.driver, "eepro100", sizeof(info.driver)-1); + strncpy(info.version, version, sizeof(info.version)-1); + if (sp && sp->pdev) + strcpy(info.bus_info, sp->pdev->slot_name); + if (copy_to_user(useraddr, &info, sizeof(info))) + return -EFAULT; + return 0; + } + + } + + return -EOPNOTSUPP; +} + + + + + +static int speedo_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) +{ + struct speedo_private *sp = (struct speedo_private *)dev->priv; + long ioaddr = dev->base_addr; + struct mii_ioctl_data *data = (struct mii_ioctl_data *)&rq->ifr_data; + int phy = sp->phy[0] & 0x1f; + int saved_acpi; + int t; + + switch(cmd) { + case SIOCGMIIPHY: /* Get address of MII PHY in use. */ + case SIOCDEVPRIVATE: /* for binary compat, remove in 2.5 */ + data->phy_id = phy; + + case SIOCGMIIREG: /* Read MII PHY register. */ + case SIOCDEVPRIVATE+1: /* for binary compat, remove in 2.5 */ + /* FIXME: these operations need to be serialized with MDIO + access from the timeout handler. + They are currently serialized only with MDIO access from the + timer routine. 2000/05/09 SAW */ + saved_acpi = SET_POWER_STATE(sp->pdev, 0); + t = del_timer_sync(&sp->timer); + data->val_out = mdio_read(ioaddr, data->phy_id & 0x1f, data->reg_num & 0x1f); + if (t) + add_timer(&sp->timer); /* may be set to the past --SAW */ + SET_POWER_STATE(sp->pdev, saved_acpi); + return 0; + + case SIOCSMIIREG: /* Write MII PHY register. */ + case SIOCDEVPRIVATE+2: /* for binary compat, remove in 2.5 */ + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + saved_acpi = SET_POWER_STATE(sp->pdev, 0); + t = del_timer_sync(&sp->timer); + mdio_write(ioaddr, data->phy_id, data->reg_num, data->val_in); + if (t) + add_timer(&sp->timer); /* may be set to the past --SAW */ + SET_POWER_STATE(sp->pdev, saved_acpi); + return 0; + case SIOCETHTOOL: + return netdev_ethtool_ioctl(dev, (void *) rq->ifr_data); + default: + return -EOPNOTSUPP; + } +} + +/* Set or clear the multicast filter for this adaptor. + This is very ugly with Intel chips -- we usually have to execute an + entire configuration command, plus process a multicast command. + This is complicated. We must put a large configuration command and + an arbitrarily-sized multicast command in the transmit list. + To minimize the disruption -- the previous command might have already + loaded the link -- we convert the current command block, normally a Tx + command, into a no-op and link it to the new command. +*/ +static void set_rx_mode(struct net_device *dev) +{ + struct speedo_private *sp = (struct speedo_private *)dev->priv; + long ioaddr = dev->base_addr; + struct descriptor *last_cmd; + char new_rx_mode; + unsigned long flags; + int entry, i; + + if (dev->flags & IFF_PROMISC) { /* Set promiscuous. */ + new_rx_mode = 3; + } else if ((dev->flags & IFF_ALLMULTI) || + dev->mc_count > multicast_filter_limit) { + new_rx_mode = 1; + } else + new_rx_mode = 0; + + if (speedo_debug > 3) + printk(KERN_DEBUG "%s: set_rx_mode %d -> %d\n", dev->name, + sp->rx_mode, new_rx_mode); + + if ((int)(sp->cur_tx - sp->dirty_tx) > TX_RING_SIZE - TX_MULTICAST_SIZE) { + /* The Tx ring is full -- don't add anything! Hope the mode will be + * set again later. */ + sp->rx_mode = -1; + return; + } + + if (new_rx_mode != sp->rx_mode) { + u8 *config_cmd_data; + + spin_lock_irqsave(&sp->lock, flags); + entry = sp->cur_tx++ % TX_RING_SIZE; + last_cmd = sp->last_cmd; + sp->last_cmd = (struct descriptor *)&sp->tx_ring[entry]; + + sp->tx_skbuff[entry] = 0; /* Redundant. */ + sp->tx_ring[entry].status = cpu_to_le32(CmdSuspend | CmdConfigure); + sp->tx_ring[entry].link = + cpu_to_le32(TX_RING_ELEM_DMA(sp, (entry + 1) % TX_RING_SIZE)); + config_cmd_data = (void *)&sp->tx_ring[entry].tx_desc_addr; + /* Construct a full CmdConfig frame. */ + memcpy(config_cmd_data, i82558_config_cmd, CONFIG_DATA_SIZE); + config_cmd_data[1] = (txfifo << 4) | rxfifo; + config_cmd_data[4] = rxdmacount; + config_cmd_data[5] = txdmacount + 0x80; + config_cmd_data[15] |= (new_rx_mode & 2) ? 1 : 0; + /* 0x80 doesn't disable FC 0x84 does. + Disable Flow control since we are not ACK-ing any FC interrupts + for now. --Dragan */ + config_cmd_data[19] = 0x84; + config_cmd_data[19] |= sp->full_duplex ? 0x40 : 0; + config_cmd_data[21] = (new_rx_mode & 1) ? 0x0D : 0x05; + if (sp->phy[0] & 0x8000) { /* Use the AUI port instead. */ + config_cmd_data[15] |= 0x80; + config_cmd_data[8] = 0; + } + /* Trigger the command unit resume. */ + wait_for_cmd_done(ioaddr + SCBCmd); + clear_suspend(last_cmd); + outb(CUResume, ioaddr + SCBCmd); + if ((int)(sp->cur_tx - sp->dirty_tx) >= TX_QUEUE_LIMIT) { + netif_stop_queue(dev); + sp->tx_full = 1; + } + spin_unlock_irqrestore(&sp->lock, flags); + } + + if (new_rx_mode == 0 && dev->mc_count < 4) { + /* The simple case of 0-3 multicast list entries occurs often, and + fits within one tx_ring[] entry. */ + struct dev_mc_list *mclist; + u16 *setup_params, *eaddrs; + + spin_lock_irqsave(&sp->lock, flags); + entry = sp->cur_tx++ % TX_RING_SIZE; + last_cmd = sp->last_cmd; + sp->last_cmd = (struct descriptor *)&sp->tx_ring[entry]; + + sp->tx_skbuff[entry] = 0; + sp->tx_ring[entry].status = cpu_to_le32(CmdSuspend | CmdMulticastList); + sp->tx_ring[entry].link = + cpu_to_le32(TX_RING_ELEM_DMA(sp, (entry + 1) % TX_RING_SIZE)); + sp->tx_ring[entry].tx_desc_addr = 0; /* Really MC list count. */ + setup_params = (u16 *)&sp->tx_ring[entry].tx_desc_addr; + *setup_params++ = cpu_to_le16(dev->mc_count*6); + /* Fill in the multicast addresses. */ + for (i = 0, mclist = dev->mc_list; i < dev->mc_count; + i++, mclist = mclist->next) { + eaddrs = (u16 *)mclist->dmi_addr; + *setup_params++ = *eaddrs++; + *setup_params++ = *eaddrs++; + *setup_params++ = *eaddrs++; + } + + wait_for_cmd_done(ioaddr + SCBCmd); + clear_suspend(last_cmd); + /* Immediately trigger the command unit resume. */ + outb(CUResume, ioaddr + SCBCmd); + + if ((int)(sp->cur_tx - sp->dirty_tx) >= TX_QUEUE_LIMIT) { + netif_stop_queue(dev); + sp->tx_full = 1; + } + spin_unlock_irqrestore(&sp->lock, flags); + } else if (new_rx_mode == 0) { + struct dev_mc_list *mclist; + u16 *setup_params, *eaddrs; + struct speedo_mc_block *mc_blk; + struct descriptor *mc_setup_frm; + int i; + + mc_blk = kmalloc(sizeof(*mc_blk) + 2 + multicast_filter_limit*6, + GFP_ATOMIC); + if (mc_blk == NULL) { + printk(KERN_ERR "%s: Failed to allocate a setup frame.\n", + dev->name); + sp->rx_mode = -1; /* We failed, try again. */ + return; + } + mc_blk->next = NULL; + mc_blk->len = 2 + multicast_filter_limit*6; + mc_blk->frame_dma = + pci_map_single(sp->pdev, &mc_blk->frame, mc_blk->len, + PCI_DMA_TODEVICE); + mc_setup_frm = &mc_blk->frame; + + /* Fill the setup frame. */ + if (speedo_debug > 1) + printk(KERN_DEBUG "%s: Constructing a setup frame at %p.\n", + dev->name, mc_setup_frm); + mc_setup_frm->cmd_status = + cpu_to_le32(CmdSuspend | CmdIntr | CmdMulticastList); + /* Link set below. */ + setup_params = (u16 *)&mc_setup_frm->params; + *setup_params++ = cpu_to_le16(dev->mc_count*6); + /* Fill in the multicast addresses. */ + for (i = 0, mclist = dev->mc_list; i < dev->mc_count; + i++, mclist = mclist->next) { + eaddrs = (u16 *)mclist->dmi_addr; + *setup_params++ = *eaddrs++; + *setup_params++ = *eaddrs++; + *setup_params++ = *eaddrs++; + } + + /* Disable interrupts while playing with the Tx Cmd list. */ + spin_lock_irqsave(&sp->lock, flags); + + if (sp->mc_setup_tail) + sp->mc_setup_tail->next = mc_blk; + else + sp->mc_setup_head = mc_blk; + sp->mc_setup_tail = mc_blk; + mc_blk->tx = sp->cur_tx; + + entry = sp->cur_tx++ % TX_RING_SIZE; + last_cmd = sp->last_cmd; + sp->last_cmd = mc_setup_frm; + + /* Change the command to a NoOp, pointing to the CmdMulti command. */ + sp->tx_skbuff[entry] = 0; + sp->tx_ring[entry].status = cpu_to_le32(CmdNOp); + sp->tx_ring[entry].link = cpu_to_le32(mc_blk->frame_dma); + + /* Set the link in the setup frame. */ + mc_setup_frm->link = + cpu_to_le32(TX_RING_ELEM_DMA(sp, (entry + 1) % TX_RING_SIZE)); + + pci_dma_sync_single(sp->pdev, mc_blk->frame_dma, + mc_blk->len, PCI_DMA_TODEVICE); + + wait_for_cmd_done(ioaddr + SCBCmd); + clear_suspend(last_cmd); + /* Immediately trigger the command unit resume. */ + outb(CUResume, ioaddr + SCBCmd); + + if ((int)(sp->cur_tx - sp->dirty_tx) >= TX_QUEUE_LIMIT) { + netif_stop_queue(dev); + sp->tx_full = 1; + } + spin_unlock_irqrestore(&sp->lock, flags); + + if (speedo_debug > 5) + printk(" CmdMCSetup frame length %d in entry %d.\n", + dev->mc_count, entry); + } + + sp->rx_mode = new_rx_mode; +} + +#ifdef CONFIG_PM +static int eepro100_suspend(struct pci_dev *pdev, u32 state) +{ + struct net_device *dev = pci_get_drvdata (pdev); + struct speedo_private *sp = (struct speedo_private *)dev->priv; + long ioaddr = dev->base_addr; + + pci_save_state(pdev, sp->pm_state); + + if (!netif_running(dev)) + return 0; + + netif_device_detach(dev); + outl(PortPartialReset, ioaddr + SCBPort); + + /* XXX call SET_POWER_STATE ()? */ + return 0; +} + +static int eepro100_resume(struct pci_dev *pdev) +{ + struct net_device *dev = pci_get_drvdata (pdev); + struct speedo_private *sp = (struct speedo_private *)dev->priv; + long ioaddr = dev->base_addr; + + pci_restore_state(pdev, sp->pm_state); + + if (!netif_running(dev)) + return 0; + + /* I'm absolutely uncertain if this part of code may work. + The problems are: + - correct hardware reinitialization; + - correct driver behavior between different steps of the + reinitialization; + - serialization with other driver calls. + 2000/03/08 SAW */ + outw(SCBMaskAll, ioaddr + SCBCmd); + speedo_resume(dev); + netif_device_attach(dev); + sp->rx_mode = -1; + sp->flow_ctrl = sp->partner = 0; + set_rx_mode(dev); + return 0; +} +#endif /* CONFIG_PM */ + +static void __devexit eepro100_remove_one (struct pci_dev *pdev) +{ + struct net_device *dev = pci_get_drvdata (pdev); + struct speedo_private *sp = (struct speedo_private *)dev->priv; + + unregister_netdev(dev); + + release_region(pci_resource_start(pdev, 1), pci_resource_len(pdev, 1)); + release_mem_region(pci_resource_start(pdev, 0), pci_resource_len(pdev, 0)); + +#ifndef USE_IO + iounmap((char *)dev->base_addr); +#endif + + pci_free_consistent(pdev, TX_RING_SIZE * sizeof(struct TxFD) + + sizeof(struct speedo_stats), + sp->tx_ring, sp->tx_ring_dma); + pci_disable_device(pdev); + kfree(dev); +} + +static struct pci_device_id eepro100_pci_tbl[] __devinitdata = { + { PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82557, + PCI_ANY_ID, PCI_ANY_ID, }, + { PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82562ET, + PCI_ANY_ID, PCI_ANY_ID, }, + { PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82559ER, + PCI_ANY_ID, PCI_ANY_ID, }, + { PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ID1029, + PCI_ANY_ID, PCI_ANY_ID, }, + { PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ID1030, + PCI_ANY_ID, PCI_ANY_ID, }, + { PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801BA_7, + PCI_ANY_ID, PCI_ANY_ID, }, + { 0,} +}; +MODULE_DEVICE_TABLE(pci, eepro100_pci_tbl); + +static struct pci_driver eepro100_driver = { + name: "eepro100", + id_table: eepro100_pci_tbl, + probe: eepro100_init_one, + remove: __devexit_p(eepro100_remove_one), +#ifdef CONFIG_PM + suspend: eepro100_suspend, + resume: eepro100_resume, +#endif /* CONFIG_PM */ +}; + +static int __init eepro100_init_module(void) +{ + if (debug >= 0 && speedo_debug != debug) + printk(KERN_INFO "eepro100.c: Debug level is %d.\n", debug); + if (debug >= 0) + speedo_debug = debug; + + return pci_module_init(&eepro100_driver); +} + +static void __exit eepro100_cleanup_module(void) +{ + pci_unregister_driver(&eepro100_driver); +} + +module_init(eepro100_init_module); +module_exit(eepro100_cleanup_module); diff --git a/xen-2.4.16/drivers/net/net_init.c b/xen-2.4.16/drivers/net/net_init.c new file mode 100644 index 0000000000..a0c1f01002 --- /dev/null +++ b/xen-2.4.16/drivers/net/net_init.c @@ -0,0 +1,738 @@ +/* net_init.c: Initialization for network devices. */ +/* + Written 1993,1994,1995 by Donald Becker. + + The author may be reached as becker@scyld.com, or C/O + Scyld Computing Corporation + 410 Severn Ave., Suite 210 + Annapolis MD 21403 + + This file contains the initialization for the "pl14+" style ethernet + drivers. It should eventually replace most of drivers/net/Space.c. + It's primary advantage is that it's able to allocate low-memory buffers. + A secondary advantage is that the dangerous NE*000 netcards can reserve + their I/O port region before the SCSI probes start. + + Modifications/additions by Bjorn Ekwall : + ethdev_index[MAX_ETH_CARDS] + register_netdev() / unregister_netdev() + + Modifications by Wolfgang Walter + Use dev_close cleanly so we always shut things down tidily. + + Changed 29/10/95, Alan Cox to pass sockaddr's around for mac addresses. + + 14/06/96 - Paul Gortmaker: Add generic eth_change_mtu() function. + 24/09/96 - Paul Norton: Add token-ring variants of the netdev functions. + + 08/11/99 - Alan Cox: Got fed up of the mess in this file and cleaned it + up. We now share common code and have regularised name + allocation setups. Abolished the 16 card limits. + 03/19/2000 - jgarzik and Urban Widmark: init_etherdev 32-byte align + 03/21/2001 - jgarzik: alloc_etherdev and friends + +*/ + +#include +#include +#include +//#include +#include +#include +//#include +#include +#include +#include +#include +#include +//#include +//#include +//#include +//#include +//#include +//#include +//#include +//#include + +#define rtnl_lock() ((void)0) +#define rtnl_unlock() ((void)0) + +/* The network devices currently exist only in the socket namespace, so these + entries are unused. The only ones that make sense are + open start the ethercard + close stop the ethercard + ioctl To get statistics, perhaps set the interface port (AUI, BNC, etc.) + One can also imagine getting raw packets using + read & write + but this is probably better handled by a raw packet socket. + + Given that almost all of these functions are handled in the current + socket-based scheme, putting ethercard devices in /dev/ seems pointless. + + [Removed all support for /dev network devices. When someone adds + streams then by magic we get them, but otherwise they are un-needed + and a space waste] +*/ + + +static struct net_device *alloc_netdev(int sizeof_priv, const char *mask, + void (*setup)(struct net_device *)) +{ + struct net_device *dev; + int alloc_size; + + /* ensure 32-byte alignment of the private area */ + alloc_size = sizeof (*dev) + sizeof_priv + 31; + + dev = (struct net_device *) kmalloc (alloc_size, GFP_KERNEL); + if (dev == NULL) + { + printk(KERN_ERR "alloc_dev: Unable to allocate device memory.\n"); + return NULL; + } + + memset(dev, 0, alloc_size); + + if (sizeof_priv) + dev->priv = (void *) (((long)(dev + 1) + 31) & ~31); + + setup(dev); + strcpy(dev->name, mask); + + return dev; +} + +static struct net_device *init_alloc_dev(int sizeof_priv) +{ + struct net_device *dev; + int alloc_size; + + /* ensure 32-byte alignment of the private area */ + alloc_size = sizeof (*dev) + sizeof_priv + 31; + + dev = (struct net_device *) kmalloc (alloc_size, GFP_KERNEL); + if (dev == NULL) + { + printk(KERN_ERR "alloc_dev: Unable to allocate device memory.\n"); + return NULL; + } + + memset(dev, 0, alloc_size); + + if (sizeof_priv) + dev->priv = (void *) (((long)(dev + 1) + 31) & ~31); + + return dev; +} + +/* + * Create and name a device from a prototype, then perform any needed + * setup. + */ + +static struct net_device *init_netdev(struct net_device *dev, int sizeof_priv, + char *mask, void (*setup)(struct net_device *)) +{ + int new_device = 0; + + /* + * Allocate a device if one is not provided. + */ + + if (dev == NULL) { + dev=init_alloc_dev(sizeof_priv); + if(dev==NULL) + return NULL; + new_device = 1; + } + + /* + * Allocate a name + */ + + if (dev->name[0] == '\0' || dev->name[0] == ' ') { + strcpy(dev->name, mask); + if (dev_alloc_name(dev, mask)<0) { + if (new_device) + kfree(dev); + return NULL; + } + } + + //netdev_boot_setup_check(dev); + + /* + * Configure via the caller provided setup function then + * register if needed. + */ + + setup(dev); + + if (new_device) { + int err; + + rtnl_lock(); + err = register_netdevice(dev); + rtnl_unlock(); + + if (err < 0) { + kfree(dev); + dev = NULL; + } + } + return dev; +} + +#if defined(CONFIG_HIPPI) || defined(CONFIG_TR) || defined(CONFIG_NET_FC) +static int __register_netdev(struct net_device *dev) +{ + if (dev->init && dev->init(dev) != 0) { + unregister_netdev(dev); + return -EIO; + } + return 0; +} +#endif + +/** + * init_etherdev - Register ethernet device + * @dev: An ethernet device structure to be filled in, or %NULL if a new + * struct should be allocated. + * @sizeof_priv: Size of additional driver-private structure to be allocated + * for this ethernet device + * + * Fill in the fields of the device structure with ethernet-generic values. + * + * If no device structure is passed, a new one is constructed, complete with + * a private data area of size @sizeof_priv. A 32-byte (not bit) + * alignment is enforced for this private data area. + * + * If an empty string area is passed as dev->name, or a new structure is made, + * a new name string is constructed. + */ + +struct net_device *init_etherdev(struct net_device *dev, int sizeof_priv) +{ + return init_netdev(dev, sizeof_priv, "eth%d", ether_setup); +} + +/** + * alloc_etherdev - Allocates and sets up an ethernet device + * @sizeof_priv: Size of additional driver-private structure to be allocated + * for this ethernet device + * + * Fill in the fields of the device structure with ethernet-generic + * values. Basically does everything except registering the device. + * + * Constructs a new net device, complete with a private data area of + * size @sizeof_priv. A 32-byte (not bit) alignment is enforced for + * this private data area. + */ + +struct net_device *alloc_etherdev(int sizeof_priv) +{ + return alloc_netdev(sizeof_priv, "eth%d", ether_setup); +} + +EXPORT_SYMBOL(init_etherdev); +EXPORT_SYMBOL(alloc_etherdev); + +static int eth_mac_addr(struct net_device *dev, void *p) +{ + struct sockaddr *addr=p; + if (netif_running(dev)) + return -EBUSY; + memcpy(dev->dev_addr, addr->sa_data,dev->addr_len); + return 0; +} + +static int eth_change_mtu(struct net_device *dev, int new_mtu) +{ + if ((new_mtu < 68) || (new_mtu > 1500)) + return -EINVAL; + dev->mtu = new_mtu; + return 0; +} + +#ifdef CONFIG_FDDI + +/** + * init_fddidev - Register FDDI device + * @dev: A FDDI device structure to be filled in, or %NULL if a new + * struct should be allocated. + * @sizeof_priv: Size of additional driver-private structure to be allocated + * for this ethernet device + * + * Fill in the fields of the device structure with FDDI-generic values. + * + * If no device structure is passed, a new one is constructed, complete with + * a private data area of size @sizeof_priv. A 32-byte (not bit) + * alignment is enforced for this private data area. + * + * If an empty string area is passed as dev->name, or a new structure is made, + * a new name string is constructed. + */ + +struct net_device *init_fddidev(struct net_device *dev, int sizeof_priv) +{ + return init_netdev(dev, sizeof_priv, "fddi%d", fddi_setup); +} + +/** + * alloc_fddidev - Register FDDI device + * @sizeof_priv: Size of additional driver-private structure to be allocated + * for this FDDI device + * + * Fill in the fields of the device structure with FDDI-generic values. + * + * Constructs a new net device, complete with a private data area of + * size @sizeof_priv. A 32-byte (not bit) alignment is enforced for + * this private data area. + */ + +struct net_device *alloc_fddidev(int sizeof_priv) +{ + return alloc_netdev(sizeof_priv, "fddi%d", fddi_setup); +} + +EXPORT_SYMBOL(init_fddidev); +EXPORT_SYMBOL(alloc_fddidev); + +static int fddi_change_mtu(struct net_device *dev, int new_mtu) +{ + if ((new_mtu < FDDI_K_SNAP_HLEN) || (new_mtu > FDDI_K_SNAP_DLEN)) + return(-EINVAL); + dev->mtu = new_mtu; + return(0); +} + +#endif /* CONFIG_FDDI */ + +#ifdef CONFIG_HIPPI + +static int hippi_change_mtu(struct net_device *dev, int new_mtu) +{ + /* + * HIPPI's got these nice large MTUs. + */ + if ((new_mtu < 68) || (new_mtu > 65280)) + return -EINVAL; + dev->mtu = new_mtu; + return(0); +} + + +/* + * For HIPPI we will actually use the lower 4 bytes of the hardware + * address as the I-FIELD rather than the actual hardware address. + */ +static int hippi_mac_addr(struct net_device *dev, void *p) +{ + struct sockaddr *addr = p; + if (netif_running(dev)) + return -EBUSY; + memcpy(dev->dev_addr, addr->sa_data, dev->addr_len); + return 0; +} + + +/** + * init_hippi_dev - Register HIPPI device + * @dev: A HIPPI device structure to be filled in, or %NULL if a new + * struct should be allocated. + * @sizeof_priv: Size of additional driver-private structure to be allocated + * for this ethernet device + * + * Fill in the fields of the device structure with HIPPI-generic values. + * + * If no device structure is passed, a new one is constructed, complete with + * a private data area of size @sizeof_priv. A 32-byte (not bit) + * alignment is enforced for this private data area. + * + * If an empty string area is passed as dev->name, or a new structure is made, + * a new name string is constructed. + */ + +struct net_device *init_hippi_dev(struct net_device *dev, int sizeof_priv) +{ + return init_netdev(dev, sizeof_priv, "hip%d", hippi_setup); +} + +/** + * alloc_hippi_dev - Register HIPPI device + * @sizeof_priv: Size of additional driver-private structure to be allocated + * for this HIPPI device + * + * Fill in the fields of the device structure with HIPPI-generic values. + * + * Constructs a new net device, complete with a private data area of + * size @sizeof_priv. A 32-byte (not bit) alignment is enforced for + * this private data area. + */ + +struct net_device *alloc_hippi_dev(int sizeof_priv) +{ + return alloc_netdev(sizeof_priv, "hip%d", hippi_setup); +} + +int register_hipdev(struct net_device *dev) +{ + return __register_netdev(dev); +} + +void unregister_hipdev(struct net_device *dev) +{ + unregister_netdev(dev); +} + +EXPORT_SYMBOL(init_hippi_dev); +EXPORT_SYMBOL(alloc_hippi_dev); +EXPORT_SYMBOL(register_hipdev); +EXPORT_SYMBOL(unregister_hipdev); + +static int hippi_neigh_setup_dev(struct net_device *dev, struct neigh_parms *p) +{ + /* Never send broadcast/multicast ARP messages */ + p->mcast_probes = 0; + + /* In IPv6 unicast probes are valid even on NBMA, + * because they are encapsulated in normal IPv6 protocol. + * Should be a generic flag. + */ + if (p->tbl->family != AF_INET6) + p->ucast_probes = 0; + return 0; +} + +#endif /* CONFIG_HIPPI */ + +void ether_setup(struct net_device *dev) +{ + /* Fill in the fields of the device structure with ethernet-generic values. + This should be in a common file instead of per-driver. */ + + dev->change_mtu = eth_change_mtu; + dev->hard_header = eth_header; + dev->rebuild_header = eth_rebuild_header; + dev->set_mac_address = eth_mac_addr; + dev->hard_header_cache = eth_header_cache; + dev->header_cache_update= eth_header_cache_update; + dev->hard_header_parse = eth_header_parse; + + dev->type = 0; //ARPHRD_ETHER; + dev->hard_header_len = ETH_HLEN; + dev->mtu = 1500; /* eth_mtu */ + dev->addr_len = ETH_ALEN; + dev->tx_queue_len = 100; /* Ethernet wants good queues */ + + memset(dev->broadcast,0xFF, ETH_ALEN); + + /* New-style flags. */ + dev->flags = IFF_BROADCAST|IFF_MULTICAST; +} +EXPORT_SYMBOL(ether_setup); + +#ifdef CONFIG_FDDI + +void fddi_setup(struct net_device *dev) +{ + /* + * Fill in the fields of the device structure with FDDI-generic values. + * This should be in a common file instead of per-driver. + */ + + dev->change_mtu = fddi_change_mtu; + dev->hard_header = fddi_header; + dev->rebuild_header = fddi_rebuild_header; + + dev->type = ARPHRD_FDDI; + dev->hard_header_len = FDDI_K_SNAP_HLEN+3; /* Assume 802.2 SNAP hdr len + 3 pad bytes */ + dev->mtu = FDDI_K_SNAP_DLEN; /* Assume max payload of 802.2 SNAP frame */ + dev->addr_len = FDDI_K_ALEN; + dev->tx_queue_len = 100; /* Long queues on FDDI */ + + memset(dev->broadcast, 0xFF, FDDI_K_ALEN); + + /* New-style flags */ + dev->flags = IFF_BROADCAST | IFF_MULTICAST; +} +EXPORT_SYMBOL(fddi_setup); + +#endif /* CONFIG_FDDI */ + +#ifdef CONFIG_HIPPI +void hippi_setup(struct net_device *dev) +{ + dev->set_multicast_list = NULL; + dev->change_mtu = hippi_change_mtu; + dev->hard_header = hippi_header; + dev->rebuild_header = hippi_rebuild_header; + dev->set_mac_address = hippi_mac_addr; + dev->hard_header_parse = NULL; + dev->hard_header_cache = NULL; + dev->header_cache_update = NULL; + dev->neigh_setup = hippi_neigh_setup_dev; + + /* + * We don't support HIPPI `ARP' for the time being, and probably + * never will unless someone else implements it. However we + * still need a fake ARPHRD to make ifconfig and friends play ball. + */ + dev->type = ARPHRD_HIPPI; + dev->hard_header_len = HIPPI_HLEN; + dev->mtu = 65280; + dev->addr_len = HIPPI_ALEN; + dev->tx_queue_len = 25 /* 5 */; + memset(dev->broadcast, 0xFF, HIPPI_ALEN); + + + /* + * HIPPI doesn't support broadcast+multicast and we only use + * static ARP tables. ARP is disabled by hippi_neigh_setup_dev. + */ + dev->flags = 0; +} +EXPORT_SYMBOL(hippi_setup); +#endif /* CONFIG_HIPPI */ + +#if defined(CONFIG_ATALK) || defined(CONFIG_ATALK_MODULE) + +static int ltalk_change_mtu(struct net_device *dev, int mtu) +{ + return -EINVAL; +} + +static int ltalk_mac_addr(struct net_device *dev, void *addr) +{ + return -EINVAL; +} + + +void ltalk_setup(struct net_device *dev) +{ + /* Fill in the fields of the device structure with localtalk-generic values. */ + + dev->change_mtu = ltalk_change_mtu; + dev->hard_header = NULL; + dev->rebuild_header = NULL; + dev->set_mac_address = ltalk_mac_addr; + dev->hard_header_cache = NULL; + dev->header_cache_update= NULL; + + dev->type = ARPHRD_LOCALTLK; + dev->hard_header_len = LTALK_HLEN; + dev->mtu = LTALK_MTU; + dev->addr_len = LTALK_ALEN; + dev->tx_queue_len = 10; + + dev->broadcast[0] = 0xFF; + + dev->flags = IFF_BROADCAST|IFF_MULTICAST|IFF_NOARP; +} +EXPORT_SYMBOL(ltalk_setup); + +#endif /* CONFIG_ATALK || CONFIG_ATALK_MODULE */ + +int register_netdev(struct net_device *dev) +{ + int err; + + rtnl_lock(); + + /* + * If the name is a format string the caller wants us to + * do a name allocation + */ + + if (strchr(dev->name, '%')) + { + err = dev_alloc_name(dev, dev->name); + if (err < 0) + goto out; + } + + /* + * Back compatibility hook. Kill this one in 2.5 + */ + + if (dev->name[0]==0 || dev->name[0]==' ') + { + err = dev_alloc_name(dev, "eth%d"); + if (err < 0) + goto out; + } + + err = register_netdevice(dev); + +out: + rtnl_unlock(); + return err; +} + +void unregister_netdev(struct net_device *dev) +{ + rtnl_lock(); + unregister_netdevice(dev); + rtnl_unlock(); +} + +EXPORT_SYMBOL(register_netdev); +EXPORT_SYMBOL(unregister_netdev); + +#ifdef CONFIG_TR + +void tr_setup(struct net_device *dev) +{ + /* + * Configure and register + */ + + dev->hard_header = tr_header; + dev->rebuild_header = tr_rebuild_header; + + dev->type = ARPHRD_IEEE802_TR; + dev->hard_header_len = TR_HLEN; + dev->mtu = 2000; + dev->addr_len = TR_ALEN; + dev->tx_queue_len = 100; /* Long queues on tr */ + + memset(dev->broadcast,0xFF, TR_ALEN); + + /* New-style flags. */ + dev->flags = IFF_BROADCAST | IFF_MULTICAST ; +} + +/** + * init_trdev - Register token ring device + * @dev: A token ring device structure to be filled in, or %NULL if a new + * struct should be allocated. + * @sizeof_priv: Size of additional driver-private structure to be allocated + * for this ethernet device + * + * Fill in the fields of the device structure with token ring-generic values. + * + * If no device structure is passed, a new one is constructed, complete with + * a private data area of size @sizeof_priv. A 32-byte (not bit) + * alignment is enforced for this private data area. + * + * If an empty string area is passed as dev->name, or a new structure is made, + * a new name string is constructed. + */ + +struct net_device *init_trdev(struct net_device *dev, int sizeof_priv) +{ + return init_netdev(dev, sizeof_priv, "tr%d", tr_setup); +} + +/** + * alloc_trdev - Register token ring device + * @sizeof_priv: Size of additional driver-private structure to be allocated + * for this token ring device + * + * Fill in the fields of the device structure with token ring-generic values. + * + * Constructs a new net device, complete with a private data area of + * size @sizeof_priv. A 32-byte (not bit) alignment is enforced for + * this private data area. + */ + +struct net_device *alloc_trdev(int sizeof_priv) +{ + return alloc_netdev(sizeof_priv, "tr%d", tr_setup); +} + +int register_trdev(struct net_device *dev) +{ + return __register_netdev(dev); +} + +void unregister_trdev(struct net_device *dev) +{ + unregister_netdev(dev); +} + +EXPORT_SYMBOL(tr_setup); +EXPORT_SYMBOL(init_trdev); +EXPORT_SYMBOL(alloc_trdev); +EXPORT_SYMBOL(register_trdev); +EXPORT_SYMBOL(unregister_trdev); + +#endif /* CONFIG_TR */ + + +#ifdef CONFIG_NET_FC + +void fc_setup(struct net_device *dev) +{ + dev->hard_header = fc_header; + dev->rebuild_header = fc_rebuild_header; + + dev->type = ARPHRD_IEEE802; + dev->hard_header_len = FC_HLEN; + dev->mtu = 2024; + dev->addr_len = FC_ALEN; + dev->tx_queue_len = 100; /* Long queues on fc */ + + memset(dev->broadcast,0xFF, FC_ALEN); + + /* New-style flags. */ + dev->flags = IFF_BROADCAST; +} + +/** + * init_fcdev - Register fibre channel device + * @dev: A fibre channel device structure to be filled in, or %NULL if a new + * struct should be allocated. + * @sizeof_priv: Size of additional driver-private structure to be allocated + * for this ethernet device + * + * Fill in the fields of the device structure with fibre channel-generic values. + * + * If no device structure is passed, a new one is constructed, complete with + * a private data area of size @sizeof_priv. A 32-byte (not bit) + * alignment is enforced for this private data area. + * + * If an empty string area is passed as dev->name, or a new structure is made, + * a new name string is constructed. + */ + +struct net_device *init_fcdev(struct net_device *dev, int sizeof_priv) +{ + return init_netdev(dev, sizeof_priv, "fc%d", fc_setup); +} + +/** + * alloc_fcdev - Register fibre channel device + * @sizeof_priv: Size of additional driver-private structure to be allocated + * for this fibre channel device + * + * Fill in the fields of the device structure with fibre channel-generic values. + * + * Constructs a new net device, complete with a private data area of + * size @sizeof_priv. A 32-byte (not bit) alignment is enforced for + * this private data area. + */ + +struct net_device *alloc_fcdev(int sizeof_priv) +{ + return alloc_netdev(sizeof_priv, "fc%d", fc_setup); +} + +int register_fcdev(struct net_device *dev) +{ + return __register_netdev(dev); +} + +void unregister_fcdev(struct net_device *dev) +{ + unregister_netdev(dev); +} + +EXPORT_SYMBOL(fc_setup); +EXPORT_SYMBOL(init_fcdev); +EXPORT_SYMBOL(alloc_fcdev); +EXPORT_SYMBOL(register_fcdev); +EXPORT_SYMBOL(unregister_fcdev); + +#endif /* CONFIG_NET_FC */ + diff --git a/xen-2.4.16/drivers/net/pcnet32.c b/xen-2.4.16/drivers/net/pcnet32.c new file mode 100644 index 0000000000..08ace71a04 --- /dev/null +++ b/xen-2.4.16/drivers/net/pcnet32.c @@ -0,0 +1,1614 @@ +/* pcnet32.c: An AMD PCnet32 ethernet driver for linux. */ +/* + * Copyright 1996-1999 Thomas Bogendoerfer + * + * Derived from the lance driver written 1993,1994,1995 by Donald Becker. + * + * Copyright 1993 United States Government as represented by the + * Director, National Security Agency. + * + * This software may be used and distributed according to the terms + * of the GNU General Public License, incorporated herein by reference. + * + * This driver is for PCnet32 and PCnetPCI based ethercards + */ +/************************************************************************** + * 23 Oct, 2000. + * Fixed a few bugs, related to running the controller in 32bit mode. + * + * Carsten Langgaard, carstenl@mips.com + * Copyright (C) 2000 MIPS Technologies, Inc. All rights reserved. + * + *************************************************************************/ + +static const char *version = "pcnet32.c:v1.25kf 26.9.1999 tsbogend@alpha.franken.de\n"; + +#include +#include +#include + +#include + +//#include +#include +//#include +//#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +//#include + +#include +#include +#include +#include + +static unsigned int pcnet32_portlist[] __initdata = {0x300, 0x320, 0x340, 0x360, 0}; + +/* + * PCI device identifiers for "new style" Linux PCI Device Drivers + */ +static struct pci_device_id pcnet32_pci_tbl[] __devinitdata = { + { PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_LANCE_HOME, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, + { PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_LANCE, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, + { 0, } +}; + +static int pcnet32_debug = 1; +static int tx_start = 1; /* Mapping -- 0:20, 1:64, 2:128, 3:~220 (depends on chip vers) */ + +static struct net_device *pcnet32_dev; + +static const int max_interrupt_work = 80; +static const int rx_copybreak = 200; + +#define PORT_AUI 0x00 +#define PORT_10BT 0x01 +#define PORT_GPSI 0x02 +#define PORT_MII 0x03 + +#define PORT_PORTSEL 0x03 +#define PORT_ASEL 0x04 +#define PORT_100 0x40 +#define PORT_FD 0x80 + +#define PCNET32_DMA_MASK 0xffffffff + +/* + * table to translate option values from tulip + * to internal options + */ +static unsigned char options_mapping[] = { + PORT_ASEL, /* 0 Auto-select */ + PORT_AUI, /* 1 BNC/AUI */ + PORT_AUI, /* 2 AUI/BNC */ + PORT_ASEL, /* 3 not supported */ + PORT_10BT | PORT_FD, /* 4 10baseT-FD */ + PORT_ASEL, /* 5 not supported */ + PORT_ASEL, /* 6 not supported */ + PORT_ASEL, /* 7 not supported */ + PORT_ASEL, /* 8 not supported */ + PORT_MII, /* 9 MII 10baseT */ + PORT_MII | PORT_FD, /* 10 MII 10baseT-FD */ + PORT_MII, /* 11 MII (autosel) */ + PORT_10BT, /* 12 10BaseT */ + PORT_MII | PORT_100, /* 13 MII 100BaseTx */ + PORT_MII | PORT_100 | PORT_FD, /* 14 MII 100BaseTx-FD */ + PORT_ASEL /* 15 not supported */ +}; + +#define MAX_UNITS 8 +static int options[MAX_UNITS]; +static int full_duplex[MAX_UNITS]; + +/* + * Theory of Operation + * + * This driver uses the same software structure as the normal lance + * driver. So look for a verbose description in lance.c. The differences + * to the normal lance driver is the use of the 32bit mode of PCnet32 + * and PCnetPCI chips. Because these chips are 32bit chips, there is no + * 16MB limitation and we don't need bounce buffers. + */ + +/* + * History: + * v0.01: Initial version + * only tested on Alpha Noname Board + * v0.02: changed IRQ handling for new interrupt scheme (dev_id) + * tested on a ASUS SP3G + * v0.10: fixed an odd problem with the 79C974 in a Compaq Deskpro XL + * looks like the 974 doesn't like stopping and restarting in a + * short period of time; now we do a reinit of the lance; the + * bug was triggered by doing ifconfig eth0 broadcast + * and hangs the machine (thanks to Klaus Liedl for debugging) + * v0.12: by suggestion from Donald Becker: Renamed driver to pcnet32, + * made it standalone (no need for lance.c) + * v0.13: added additional PCI detecting for special PCI devices (Compaq) + * v0.14: stripped down additional PCI probe (thanks to David C Niemi + * and sveneric@xs4all.nl for testing this on their Compaq boxes) + * v0.15: added 79C965 (VLB) probe + * added interrupt sharing for PCI chips + * v0.16: fixed set_multicast_list on Alpha machines + * v0.17: removed hack from dev.c; now pcnet32 uses ethif_probe in Space.c + * v0.19: changed setting of autoselect bit + * v0.20: removed additional Compaq PCI probe; there is now a working one + * in arch/i386/bios32.c + * v0.21: added endian conversion for ppc, from work by cort@cs.nmt.edu + * v0.22: added printing of status to ring dump + * v0.23: changed enet_statistics to net_devive_stats + * v0.90: added multicast filter + * added module support + * changed irq probe to new style + * added PCnetFast chip id + * added fix for receive stalls with Intel saturn chipsets + * added in-place rx skbs like in the tulip driver + * minor cleanups + * v0.91: added PCnetFast+ chip id + * back port to 2.0.x + * v1.00: added some stuff from Donald Becker's 2.0.34 version + * added support for byte counters in net_dev_stats + * v1.01: do ring dumps, only when debugging the driver + * increased the transmit timeout + * v1.02: fixed memory leak in pcnet32_init_ring() + * v1.10: workaround for stopped transmitter + * added port selection for modules + * detect special T1/E1 WAN card and setup port selection + * v1.11: fixed wrong checking of Tx errors + * v1.20: added check of return value kmalloc (cpeterso@cs.washington.edu) + * added save original kmalloc addr for freeing (mcr@solidum.com) + * added support for PCnetHome chip (joe@MIT.EDU) + * rewritten PCI card detection + * added dwio mode to get driver working on some PPC machines + * v1.21: added mii selection and mii ioctl + * v1.22: changed pci scanning code to make PPC people happy + * fixed switching to 32bit mode in pcnet32_open() (thanks + * to Michael Richard for noticing this one) + * added sub vendor/device id matching (thanks again to + * Michael Richard ) + * added chip id for 79c973/975 (thanks to Zach Brown ) + * v1.23 fixed small bug, when manual selecting MII speed/duplex + * v1.24 Applied Thomas' patch to use TxStartPoint and thus decrease TxFIFO + * underflows. Added tx_start_pt module parameter. Increased + * TX_RING_SIZE from 16 to 32. Added #ifdef'd code to use DXSUFLO + * for FAST[+] chipsets. + * v1.24ac Added SMP spinlocking - Alan Cox + * v1.25kf Added No Interrupt on successful Tx for some Tx's + * v1.26 Converted to pci_alloc_consistent, Jamey Hicks / George France + * + * v1.26p Fix oops on rmmod+insmod; plug i/o resource leak - Paul Gortmaker + */ + + +/* + * Set the number of Tx and Rx buffers, using Log_2(# buffers). + * Reasonable default values are 4 Tx buffers, and 16 Rx buffers. + * That translates to 2 (4 == 2^^2) and 4 (16 == 2^^4). + */ +#ifndef PCNET32_LOG_TX_BUFFERS +#define PCNET32_LOG_TX_BUFFERS 4 +#define PCNET32_LOG_RX_BUFFERS 5 +#endif + +#undef RX_RING_SIZE +#undef TX_RING_SIZE + +#define TX_RING_SIZE (1 << (PCNET32_LOG_TX_BUFFERS)) +#define TX_RING_MOD_MASK (TX_RING_SIZE - 1) +#define TX_RING_LEN_BITS ((PCNET32_LOG_TX_BUFFERS) << 12) + +#define RX_RING_SIZE (1 << (PCNET32_LOG_RX_BUFFERS)) +#define RX_RING_MOD_MASK (RX_RING_SIZE - 1) +#define RX_RING_LEN_BITS ((PCNET32_LOG_RX_BUFFERS) << 4) + +#define PKT_BUF_SZ 1544 + +/* Offsets from base I/O address. */ +#define PCNET32_WIO_RDP 0x10 +#define PCNET32_WIO_RAP 0x12 +#define PCNET32_WIO_RESET 0x14 +#define PCNET32_WIO_BDP 0x16 + +#define PCNET32_DWIO_RDP 0x10 +#define PCNET32_DWIO_RAP 0x14 +#define PCNET32_DWIO_RESET 0x18 +#define PCNET32_DWIO_BDP 0x1C + +#define PCNET32_TOTAL_SIZE 0x20 + +#define CRC_POLYNOMIAL_LE 0xedb88320UL /* Ethernet CRC, little endian */ + +/* The PCNET32 Rx and Tx ring descriptors. */ +struct pcnet32_rx_head { + u32 base; + s16 buf_length; + s16 status; + u32 msg_length; + u32 reserved; +}; + +struct pcnet32_tx_head { + u32 base; + s16 length; + s16 status; + u32 misc; + u32 reserved; +}; + +/* The PCNET32 32-Bit initialization block, described in databook. */ +struct pcnet32_init_block { + u16 mode; + u16 tlen_rlen; + u8 phys_addr[6]; + u16 reserved; + u32 filter[2]; + /* Receive and transmit ring base, along with extra bits. */ + u32 rx_ring; + u32 tx_ring; +}; + +/* PCnet32 access functions */ +struct pcnet32_access { + u16 (*read_csr)(unsigned long, int); + void (*write_csr)(unsigned long, int, u16); + u16 (*read_bcr)(unsigned long, int); + void (*write_bcr)(unsigned long, int, u16); + u16 (*read_rap)(unsigned long); + void (*write_rap)(unsigned long, u16); + void (*reset)(unsigned long); +}; + +/* + * The first three fields of pcnet32_private are read by the ethernet device + * so we allocate the structure should be allocated by pci_alloc_consistent(). + */ +struct pcnet32_private { + /* The Tx and Rx ring entries must be aligned on 16-byte boundaries in 32bit mode. */ + struct pcnet32_rx_head rx_ring[RX_RING_SIZE]; + struct pcnet32_tx_head tx_ring[TX_RING_SIZE]; + struct pcnet32_init_block init_block; + dma_addr_t dma_addr; /* DMA address of beginning of this object, returned by pci_alloc_consistent */ + struct pci_dev *pci_dev; /* Pointer to the associated pci device structure */ + const char *name; + /* The saved address of a sent-in-place packet/buffer, for skfree(). */ + struct sk_buff *tx_skbuff[TX_RING_SIZE]; + struct sk_buff *rx_skbuff[RX_RING_SIZE]; + dma_addr_t tx_dma_addr[TX_RING_SIZE]; + dma_addr_t rx_dma_addr[RX_RING_SIZE]; + struct pcnet32_access a; + spinlock_t lock; /* Guard lock */ + unsigned int cur_rx, cur_tx; /* The next free ring entry */ + unsigned int dirty_rx, dirty_tx; /* The ring entries to be free()ed. */ + struct net_device_stats stats; + char tx_full; + int options; + int shared_irq:1, /* shared irq possible */ + ltint:1, +#ifdef DO_DXSUFLO + dxsuflo:1, /* disable transmit stop on uflo */ +#endif + full_duplex:1, /* full duplex possible */ + mii:1; /* mii port available */ + struct net_device *next; +}; + +static int pcnet32_probe_vlbus(int cards_found); +static int pcnet32_probe_pci(struct pci_dev *, const struct pci_device_id *); +static int pcnet32_probe1(unsigned long, unsigned char, int, int, struct pci_dev *); +static int pcnet32_open(struct net_device *); +static int pcnet32_init_ring(struct net_device *); +static int pcnet32_start_xmit(struct sk_buff *, struct net_device *); +static int pcnet32_rx(struct net_device *); +static void pcnet32_tx_timeout (struct net_device *dev); +static void pcnet32_interrupt(int, void *, struct pt_regs *); +static int pcnet32_close(struct net_device *); +static struct net_device_stats *pcnet32_get_stats(struct net_device *); +static void pcnet32_set_multicast_list(struct net_device *); +#ifdef HAVE_PRIVATE_IOCTL +static int pcnet32_mii_ioctl(struct net_device *, struct ifreq *, int); +#endif + +enum pci_flags_bit { + PCI_USES_IO=1, PCI_USES_MEM=2, PCI_USES_MASTER=4, + PCI_ADDR0=0x10<<0, PCI_ADDR1=0x10<<1, PCI_ADDR2=0x10<<2, PCI_ADDR3=0x10<<3, +}; + +struct pcnet32_pci_id_info { + const char *name; + u16 vendor_id, device_id, svid, sdid, flags; + int io_size; + int (*probe1) (unsigned long, unsigned char, int, int, struct pci_dev *); +}; + + +MODULE_DEVICE_TABLE (pci, pcnet32_pci_tbl); + +static u16 pcnet32_wio_read_csr (unsigned long addr, int index) +{ + outw (index, addr+PCNET32_WIO_RAP); + return inw (addr+PCNET32_WIO_RDP); +} + +static void pcnet32_wio_write_csr (unsigned long addr, int index, u16 val) +{ + outw (index, addr+PCNET32_WIO_RAP); + outw (val, addr+PCNET32_WIO_RDP); +} + +static u16 pcnet32_wio_read_bcr (unsigned long addr, int index) +{ + outw (index, addr+PCNET32_WIO_RAP); + return inw (addr+PCNET32_WIO_BDP); +} + +static void pcnet32_wio_write_bcr (unsigned long addr, int index, u16 val) +{ + outw (index, addr+PCNET32_WIO_RAP); + outw (val, addr+PCNET32_WIO_BDP); +} + +static u16 pcnet32_wio_read_rap (unsigned long addr) +{ + return inw (addr+PCNET32_WIO_RAP); +} + +static void pcnet32_wio_write_rap (unsigned long addr, u16 val) +{ + outw (val, addr+PCNET32_WIO_RAP); +} + +static void pcnet32_wio_reset (unsigned long addr) +{ + inw (addr+PCNET32_WIO_RESET); +} + +static int pcnet32_wio_check (unsigned long addr) +{ + outw (88, addr+PCNET32_WIO_RAP); + return (inw (addr+PCNET32_WIO_RAP) == 88); +} + +static struct pcnet32_access pcnet32_wio = { + pcnet32_wio_read_csr, + pcnet32_wio_write_csr, + pcnet32_wio_read_bcr, + pcnet32_wio_write_bcr, + pcnet32_wio_read_rap, + pcnet32_wio_write_rap, + pcnet32_wio_reset +}; + +static u16 pcnet32_dwio_read_csr (unsigned long addr, int index) +{ + outl (index, addr+PCNET32_DWIO_RAP); + return (inl (addr+PCNET32_DWIO_RDP) & 0xffff); +} + +static void pcnet32_dwio_write_csr (unsigned long addr, int index, u16 val) +{ + outl (index, addr+PCNET32_DWIO_RAP); + outl (val, addr+PCNET32_DWIO_RDP); +} + +static u16 pcnet32_dwio_read_bcr (unsigned long addr, int index) +{ + outl (index, addr+PCNET32_DWIO_RAP); + return (inl (addr+PCNET32_DWIO_BDP) & 0xffff); +} + +static void pcnet32_dwio_write_bcr (unsigned long addr, int index, u16 val) +{ + outl (index, addr+PCNET32_DWIO_RAP); + outl (val, addr+PCNET32_DWIO_BDP); +} + +static u16 pcnet32_dwio_read_rap (unsigned long addr) +{ + return (inl (addr+PCNET32_DWIO_RAP) & 0xffff); +} + +static void pcnet32_dwio_write_rap (unsigned long addr, u16 val) +{ + outl (val, addr+PCNET32_DWIO_RAP); +} + +static void pcnet32_dwio_reset (unsigned long addr) +{ + inl (addr+PCNET32_DWIO_RESET); +} + +static int pcnet32_dwio_check (unsigned long addr) +{ + outl (88, addr+PCNET32_DWIO_RAP); + return ((inl (addr+PCNET32_DWIO_RAP) & 0xffff) == 88); +} + +static struct pcnet32_access pcnet32_dwio = { + pcnet32_dwio_read_csr, + pcnet32_dwio_write_csr, + pcnet32_dwio_read_bcr, + pcnet32_dwio_write_bcr, + pcnet32_dwio_read_rap, + pcnet32_dwio_write_rap, + pcnet32_dwio_reset + +}; + + + +/* only probes for non-PCI devices, the rest are handled by pci_register_driver via pcnet32_probe_pci*/ +static int __init pcnet32_probe_vlbus(int cards_found) +{ + unsigned long ioaddr = 0; // FIXME dev ? dev->base_addr: 0; + unsigned int irq_line = 0; // FIXME dev ? dev->irq : 0; + int *port; + + printk(KERN_INFO "pcnet32_probe_vlbus: cards_found=%d\n", cards_found); +#ifndef __powerpc__ + if (ioaddr > 0x1ff) { + if (check_region(ioaddr, PCNET32_TOTAL_SIZE) == 0) + return pcnet32_probe1(ioaddr, irq_line, 0, 0, NULL); + else + return -ENODEV; + } else +#endif + if (ioaddr != 0) + return -ENXIO; + + /* now look for PCnet32 VLB cards */ + for (port = pcnet32_portlist; *port; port++) { + unsigned long ioaddr = *port; + + if ( check_region(ioaddr, PCNET32_TOTAL_SIZE) == 0) { + /* check if there is really a pcnet chip on that ioaddr */ + if ((inb(ioaddr + 14) == 0x57) && + (inb(ioaddr + 15) == 0x57) && + (pcnet32_probe1(ioaddr, 0, 0, 0, NULL) == 0)) + cards_found++; + } + } + return cards_found ? 0: -ENODEV; +} + + + +static int __devinit +pcnet32_probe_pci(struct pci_dev *pdev, const struct pci_device_id *ent) +{ + static int card_idx; + long ioaddr; + int err = 0; + + printk(KERN_INFO "pcnet32_probe_pci: found device %#08x.%#08x\n", ent->vendor, ent->device); + + if ((err = pci_enable_device(pdev)) < 0) { + printk(KERN_ERR "pcnet32.c: failed to enable device -- err=%d\n", err); + return err; + } + pci_set_master(pdev); + + ioaddr = pci_resource_start (pdev, 0); + printk(KERN_INFO " ioaddr=%#08lx resource_flags=%#08lx\n", ioaddr, pci_resource_flags (pdev, 0)); + if (!ioaddr) { + printk (KERN_ERR "no PCI IO resources, aborting\n"); + return -ENODEV; + } + + if (!pci_dma_supported(pdev, PCNET32_DMA_MASK)) { + printk(KERN_ERR "pcnet32.c: architecture does not support 32bit PCI busmaster DMA\n"); + return -ENODEV; + } + + return pcnet32_probe1(ioaddr, pdev->irq, 1, card_idx, pdev); +} + + +/* pcnet32_probe1 + * Called from both pcnet32_probe_vlbus and pcnet_probe_pci. + * pdev will be NULL when called from pcnet32_probe_vlbus. + */ +static int __devinit +pcnet32_probe1(unsigned long ioaddr, unsigned char irq_line, int shared, int card_idx, struct pci_dev *pdev) +{ + struct pcnet32_private *lp; + struct resource *res; + dma_addr_t lp_dma_addr; + int i,media,fdx = 0, mii = 0, fset = 0; +#ifdef DO_DXSUFLO + int dxsuflo = 0; +#endif + int ltint = 0; + int chip_version; + char *chipname; + struct net_device *dev; + struct pcnet32_access *a = NULL; + + /* reset the chip */ + pcnet32_dwio_reset(ioaddr); + pcnet32_wio_reset(ioaddr); + + /* NOTE: 16-bit check is first, otherwise some older PCnet chips fail */ + if (pcnet32_wio_read_csr (ioaddr, 0) == 4 && pcnet32_wio_check (ioaddr)) { + a = &pcnet32_wio; + } else { + if (pcnet32_dwio_read_csr (ioaddr, 0) == 4 && pcnet32_dwio_check(ioaddr)) { + a = &pcnet32_dwio; + } else + return -ENODEV; + } + + chip_version = a->read_csr (ioaddr, 88) | (a->read_csr (ioaddr,89) << 16); + if (pcnet32_debug > 2) + printk(KERN_INFO " PCnet chip version is %#x.\n", chip_version); + if ((chip_version & 0xfff) != 0x003) + return -ENODEV; + chip_version = (chip_version >> 12) & 0xffff; + switch (chip_version) { + case 0x2420: + chipname = "PCnet/PCI 79C970"; /* PCI */ + break; + case 0x2430: + if (shared) + chipname = "PCnet/PCI 79C970"; /* 970 gives the wrong chip id back */ + else + chipname = "PCnet/32 79C965"; /* 486/VL bus */ + break; + case 0x2621: + chipname = "PCnet/PCI II 79C970A"; /* PCI */ + fdx = 1; + break; + case 0x2623: + chipname = "PCnet/FAST 79C971"; /* PCI */ + fdx = 1; mii = 1; fset = 1; + ltint = 1; + break; + case 0x2624: + chipname = "PCnet/FAST+ 79C972"; /* PCI */ + fdx = 1; mii = 1; fset = 1; + break; + case 0x2625: + chipname = "PCnet/FAST III 79C973"; /* PCI */ + fdx = 1; mii = 1; + break; + case 0x2626: + chipname = "PCnet/Home 79C978"; /* PCI */ + fdx = 1; + /* + * This is based on specs published at www.amd.com. This section + * assumes that a card with a 79C978 wants to go into 1Mb HomePNA + * mode. The 79C978 can also go into standard ethernet, and there + * probably should be some sort of module option to select the + * mode by which the card should operate + */ + /* switch to home wiring mode */ + media = a->read_bcr (ioaddr, 49); +#if 0 + if (pcnet32_debug > 2) + printk(KERN_DEBUG "pcnet32: pcnet32 media value %#x.\n", media); + media &= ~3; + media |= 1; +#endif + if (pcnet32_debug > 2) + printk(KERN_DEBUG "pcnet32: pcnet32 media reset to %#x.\n", media); + a->write_bcr (ioaddr, 49, media); + break; + case 0x2627: + chipname = "PCnet/FAST III 79C975"; /* PCI */ + fdx = 1; mii = 1; + break; + default: + printk(KERN_INFO "pcnet32: PCnet version %#x, no PCnet32 chip.\n",chip_version); + return -ENODEV; + } + + /* + * On selected chips turn on the BCR18:NOUFLO bit. This stops transmit + * starting until the packet is loaded. Strike one for reliability, lose + * one for latency - although on PCI this isnt a big loss. Older chips + * have FIFO's smaller than a packet, so you can't do this. + */ + + if(fset) + { + a->write_bcr(ioaddr, 18, (a->read_bcr(ioaddr, 18) | 0x0800)); + a->write_csr(ioaddr, 80, (a->read_csr(ioaddr, 80) & 0x0C00) | 0x0c00); +#ifdef DO_DXSUFLO + dxsuflo = 1; +#endif + ltint = 1; + } + + dev = init_etherdev(NULL, 0); + if(dev==NULL) + return -ENOMEM; + + printk(KERN_INFO "%s: %s at %#3lx,", dev->name, chipname, ioaddr); + + /* In most chips, after a chip reset, the ethernet address is read from the + * station address PROM at the base address and programmed into the + * "Physical Address Registers" CSR12-14. + * As a precautionary measure, we read the PROM values and complain if + * they disagree with the CSRs. Either way, we use the CSR values, and + * double check that they are valid. + */ + for (i = 0; i < 3; i++) { + unsigned int val; + val = a->read_csr(ioaddr, i+12) & 0x0ffff; + /* There may be endianness issues here. */ + dev->dev_addr[2*i] = val & 0x0ff; + dev->dev_addr[2*i+1] = (val >> 8) & 0x0ff; + } + { + u8 promaddr[6]; + for (i = 0; i < 6; i++) { + promaddr[i] = inb(ioaddr + i); + } + if( memcmp( promaddr, dev->dev_addr, 6) ) + { + printk(" warning PROM address does not match CSR address\n"); +#if defined(__i386__) + printk(KERN_WARNING "%s: Probably a Compaq, using the PROM address of", dev->name); + memcpy(dev->dev_addr, promaddr, 6); +#endif + } + } + /* if the ethernet address is not valid, force to 00:00:00:00:00:00 */ + if( !is_valid_ether_addr(dev->dev_addr) ) + for (i = 0; i < 6; i++) + dev->dev_addr[i]=0; + + for (i = 0; i < 6; i++) + printk(" %2.2x", dev->dev_addr[i] ); + + if (((chip_version + 1) & 0xfffe) == 0x2624) { /* Version 0x2623 or 0x2624 */ + i = a->read_csr(ioaddr, 80) & 0x0C00; /* Check tx_start_pt */ + printk("\n" KERN_INFO " tx_start_pt(0x%04x):",i); + switch(i>>10) { + case 0: printk(" 20 bytes,"); break; + case 1: printk(" 64 bytes,"); break; + case 2: printk(" 128 bytes,"); break; + case 3: printk("~220 bytes,"); break; + } + i = a->read_bcr(ioaddr, 18); /* Check Burst/Bus control */ + printk(" BCR18(%x):",i&0xffff); + if (i & (1<<5)) printk("BurstWrEn "); + if (i & (1<<6)) printk("BurstRdEn "); + if (i & (1<<7)) printk("DWordIO "); + if (i & (1<<11)) printk("NoUFlow "); + i = a->read_bcr(ioaddr, 25); + printk("\n" KERN_INFO " SRAMSIZE=0x%04x,",i<<8); + i = a->read_bcr(ioaddr, 26); + printk(" SRAM_BND=0x%04x,",i<<8); + i = a->read_bcr(ioaddr, 27); + if (i & (1<<14)) printk("LowLatRx"); + } + + dev->base_addr = ioaddr; + res = request_region(ioaddr, PCNET32_TOTAL_SIZE, chipname); + if (res == NULL) + return -EBUSY; + + /* pci_alloc_consistent returns page-aligned memory, so we do not have to check the alignment */ + if ((lp = pci_alloc_consistent(pdev, sizeof(*lp), &lp_dma_addr)) == NULL) { + release_resource(res); + return -ENOMEM; + } + + memset(lp, 0, sizeof(*lp)); + lp->dma_addr = lp_dma_addr; + lp->pci_dev = pdev; + printk("\n" KERN_INFO "pcnet32: pcnet32_private lp=%p lp_dma_addr=%#08x", lp, lp_dma_addr); + + spin_lock_init(&lp->lock); + + dev->priv = lp; + lp->name = chipname; + lp->shared_irq = shared; + lp->full_duplex = fdx; +#ifdef DO_DXSUFLO + lp->dxsuflo = dxsuflo; +#endif + lp->ltint = ltint; + lp->mii = mii; + if (options[card_idx] > sizeof (options_mapping)) + lp->options = PORT_ASEL; + else + lp->options = options_mapping[options[card_idx]]; + + if (fdx && !(lp->options & PORT_ASEL) && full_duplex[card_idx]) + lp->options |= PORT_FD; + + if (a == NULL) { + printk(KERN_ERR "pcnet32: No access methods\n"); + pci_free_consistent(lp->pci_dev, sizeof(*lp), lp, lp->dma_addr); + release_resource(res); + return -ENODEV; + } + lp->a = *a; + + /* detect special T1/E1 WAN card by checking for MAC address */ + if (dev->dev_addr[0] == 0x00 && dev->dev_addr[1] == 0xe0 && dev->dev_addr[2] == 0x75) + lp->options = PORT_FD | PORT_GPSI; + + lp->init_block.mode = le16_to_cpu(0x0003); /* Disable Rx and Tx. */ + lp->init_block.tlen_rlen = le16_to_cpu(TX_RING_LEN_BITS | RX_RING_LEN_BITS); + for (i = 0; i < 6; i++) + lp->init_block.phys_addr[i] = dev->dev_addr[i]; + lp->init_block.filter[0] = 0x00000000; + lp->init_block.filter[1] = 0x00000000; + lp->init_block.rx_ring = (u32)le32_to_cpu(lp->dma_addr + offsetof(struct pcnet32_private, rx_ring)); + lp->init_block.tx_ring = (u32)le32_to_cpu(lp->dma_addr + offsetof(struct pcnet32_private, tx_ring)); + + /* switch pcnet32 to 32bit mode */ + a->write_bcr (ioaddr, 20, 2); + + a->write_csr (ioaddr, 1, (lp->dma_addr + offsetof(struct pcnet32_private, init_block)) & 0xffff); + a->write_csr (ioaddr, 2, (lp->dma_addr + offsetof(struct pcnet32_private, init_block)) >> 16); + + if (irq_line) { + dev->irq = irq_line; + } + + if (dev->irq >= 2) + printk(" assigned IRQ %d.\n", dev->irq); + else { + unsigned long irq_mask = probe_irq_on(); + + /* + * To auto-IRQ we enable the initialization-done and DMA error + * interrupts. For ISA boards we get a DMA error, but VLB and PCI + * boards will work. + */ + /* Trigger an initialization just for the interrupt. */ + a->write_csr (ioaddr, 0, 0x41); + mdelay (1); + + dev->irq = probe_irq_off (irq_mask); + if (dev->irq) + printk(", probed IRQ %d.\n", dev->irq); + else { + printk(", failed to detect IRQ line.\n"); + pci_free_consistent(lp->pci_dev, sizeof(*lp), lp, lp->dma_addr); + release_resource(res); + return -ENODEV; + } + } + + if (pcnet32_debug > 0) + printk(KERN_INFO "%s", version); + + /* The PCNET32-specific entries in the device structure. */ + dev->open = &pcnet32_open; + dev->hard_start_xmit = &pcnet32_start_xmit; + dev->stop = &pcnet32_close; + dev->get_stats = &pcnet32_get_stats; + dev->set_multicast_list = &pcnet32_set_multicast_list; +#ifdef HAVE_PRIVATE_IOCTL + dev->do_ioctl = &pcnet32_mii_ioctl; +#endif + dev->tx_timeout = pcnet32_tx_timeout; + dev->watchdog_timeo = (HZ >> 1); + + lp->next = pcnet32_dev; + pcnet32_dev = dev; + + /* Fill in the generic fields of the device structure. */ + ether_setup(dev); + return 0; +} + + +static int +pcnet32_open(struct net_device *dev) +{ + struct pcnet32_private *lp = dev->priv; + unsigned long ioaddr = dev->base_addr; + u16 val; + int i; + + if (dev->irq == 0 || + request_irq(dev->irq, &pcnet32_interrupt, + lp->shared_irq ? SA_SHIRQ : 0, lp->name, (void *)dev)) { + return -EAGAIN; + } + + /* Check for a valid station address */ + if( !is_valid_ether_addr(dev->dev_addr) ) + return -EINVAL; + + /* Reset the PCNET32 */ + lp->a.reset (ioaddr); + + /* switch pcnet32 to 32bit mode */ + lp->a.write_bcr (ioaddr, 20, 2); + + if (pcnet32_debug > 1) + printk(KERN_DEBUG "%s: pcnet32_open() irq %d tx/rx rings %#x/%#x init %#x.\n", + dev->name, dev->irq, + (u32) (lp->dma_addr + offsetof(struct pcnet32_private, tx_ring)), + (u32) (lp->dma_addr + offsetof(struct pcnet32_private, rx_ring)), + (u32) (lp->dma_addr + offsetof(struct pcnet32_private, init_block))); + + /* set/reset autoselect bit */ + val = lp->a.read_bcr (ioaddr, 2) & ~2; + if (lp->options & PORT_ASEL) + val |= 2; + lp->a.write_bcr (ioaddr, 2, val); + + /* handle full duplex setting */ + if (lp->full_duplex) { + val = lp->a.read_bcr (ioaddr, 9) & ~3; + if (lp->options & PORT_FD) { + val |= 1; + if (lp->options == (PORT_FD | PORT_AUI)) + val |= 2; + } + lp->a.write_bcr (ioaddr, 9, val); + } + + /* set/reset GPSI bit in test register */ + val = lp->a.read_csr (ioaddr, 124) & ~0x10; + if ((lp->options & PORT_PORTSEL) == PORT_GPSI) + val |= 0x10; + lp->a.write_csr (ioaddr, 124, val); + + if (lp->mii && !(lp->options & PORT_ASEL)) { + val = lp->a.read_bcr (ioaddr, 32) & ~0x38; /* disable Auto Negotiation, set 10Mpbs, HD */ + if (lp->options & PORT_FD) + val |= 0x10; + if (lp->options & PORT_100) + val |= 0x08; + lp->a.write_bcr (ioaddr, 32, val); + } else { + if (lp->options & PORT_ASEL) { /* enable auto negotiate, setup, disable fd */ + val = lp->a.read_bcr(ioaddr, 32) & ~0x98; + val |= 0x20; + lp->a.write_bcr(ioaddr, 32, val); + } + } + +#ifdef DO_DXSUFLO + if (lp->dxsuflo) { /* Disable transmit stop on underflow */ + val = lp->a.read_csr (ioaddr, 3); + val |= 0x40; + lp->a.write_csr (ioaddr, 3, val); + } +#endif + if (lp->ltint) { /* Enable TxDone-intr inhibitor */ + val = lp->a.read_csr (ioaddr, 5); + val |= (1<<14); + lp->a.write_csr (ioaddr, 5, val); + } + + lp->init_block.mode = le16_to_cpu((lp->options & PORT_PORTSEL) << 7); + lp->init_block.filter[0] = 0x00000000; + lp->init_block.filter[1] = 0x00000000; + if (pcnet32_init_ring(dev)) + return -ENOMEM; + + /* Re-initialize the PCNET32, and start it when done. */ + lp->a.write_csr (ioaddr, 1, (lp->dma_addr + offsetof(struct pcnet32_private, init_block)) &0xffff); + lp->a.write_csr (ioaddr, 2, (lp->dma_addr + offsetof(struct pcnet32_private, init_block)) >> 16); + + lp->a.write_csr (ioaddr, 4, 0x0915); + lp->a.write_csr (ioaddr, 0, 0x0001); + + netif_start_queue(dev); + + i = 0; + while (i++ < 100) + if (lp->a.read_csr (ioaddr, 0) & 0x0100) + break; + /* + * We used to clear the InitDone bit, 0x0100, here but Mark Stockton + * reports that doing so triggers a bug in the '974. + */ + lp->a.write_csr (ioaddr, 0, 0x0042); + + if (pcnet32_debug > 2) + printk(KERN_DEBUG "%s: pcnet32 open after %d ticks, init block %#x csr0 %4.4x.\n", + dev->name, i, (u32) (lp->dma_addr + offsetof(struct pcnet32_private, init_block)), + lp->a.read_csr (ioaddr, 0)); + + + MOD_INC_USE_COUNT; + + return 0; /* Always succeed */ +} + +/* + * The LANCE has been halted for one reason or another (busmaster memory + * arbitration error, Tx FIFO underflow, driver stopped it to reconfigure, + * etc.). Modern LANCE variants always reload their ring-buffer + * configuration when restarted, so we must reinitialize our ring + * context before restarting. As part of this reinitialization, + * find all packets still on the Tx ring and pretend that they had been + * sent (in effect, drop the packets on the floor) - the higher-level + * protocols will time out and retransmit. It'd be better to shuffle + * these skbs to a temp list and then actually re-Tx them after + * restarting the chip, but I'm too lazy to do so right now. dplatt@3do.com + */ + +static void +pcnet32_purge_tx_ring(struct net_device *dev) +{ + struct pcnet32_private *lp = dev->priv; + int i; + + for (i = 0; i < TX_RING_SIZE; i++) { + if (lp->tx_skbuff[i]) { + pci_unmap_single(lp->pci_dev, lp->tx_dma_addr[i], lp->tx_skbuff[i]->len, PCI_DMA_TODEVICE); + dev_kfree_skb(lp->tx_skbuff[i]); + lp->tx_skbuff[i] = NULL; + lp->tx_dma_addr[i] = 0; + } + } +} + + +/* Initialize the PCNET32 Rx and Tx rings. */ +static int +pcnet32_init_ring(struct net_device *dev) +{ + struct pcnet32_private *lp = dev->priv; + int i; + + lp->tx_full = 0; + lp->cur_rx = lp->cur_tx = 0; + lp->dirty_rx = lp->dirty_tx = 0; + + for (i = 0; i < RX_RING_SIZE; i++) { + struct sk_buff *rx_skbuff = lp->rx_skbuff[i]; + if (rx_skbuff == NULL) { + if (!(rx_skbuff = lp->rx_skbuff[i] = dev_alloc_skb (PKT_BUF_SZ))) { + /* there is not much, we can do at this point */ + printk(KERN_ERR "%s: pcnet32_init_ring dev_alloc_skb failed.\n",dev->name); + return -1; + } + skb_reserve (rx_skbuff, 2); + } + lp->rx_dma_addr[i] = pci_map_single(lp->pci_dev, rx_skbuff->tail, rx_skbuff->len, PCI_DMA_FROMDEVICE); + lp->rx_ring[i].base = (u32)le32_to_cpu(lp->rx_dma_addr[i]); + lp->rx_ring[i].buf_length = le16_to_cpu(-PKT_BUF_SZ); + lp->rx_ring[i].status = le16_to_cpu(0x8000); + } + /* The Tx buffer address is filled in as needed, but we do need to clear + the upper ownership bit. */ + for (i = 0; i < TX_RING_SIZE; i++) { + lp->tx_ring[i].base = 0; + lp->tx_ring[i].status = 0; + lp->tx_dma_addr[i] = 0; + } + + lp->init_block.tlen_rlen = le16_to_cpu(TX_RING_LEN_BITS | RX_RING_LEN_BITS); + for (i = 0; i < 6; i++) + lp->init_block.phys_addr[i] = dev->dev_addr[i]; + lp->init_block.rx_ring = (u32)le32_to_cpu(lp->dma_addr + offsetof(struct pcnet32_private, rx_ring)); + lp->init_block.tx_ring = (u32)le32_to_cpu(lp->dma_addr + offsetof(struct pcnet32_private, tx_ring)); + return 0; +} + +static void +pcnet32_restart(struct net_device *dev, unsigned int csr0_bits) +{ + struct pcnet32_private *lp = dev->priv; + unsigned long ioaddr = dev->base_addr; + int i; + + pcnet32_purge_tx_ring(dev); + if (pcnet32_init_ring(dev)) + return; + + /* ReInit Ring */ + lp->a.write_csr (ioaddr, 0, 1); + i = 0; + while (i++ < 100) + if (lp->a.read_csr (ioaddr, 0) & 0x0100) + break; + + lp->a.write_csr (ioaddr, 0, csr0_bits); +} + + +static void +pcnet32_tx_timeout (struct net_device *dev) +{ + struct pcnet32_private *lp = dev->priv; + unsigned int ioaddr = dev->base_addr; + + /* Transmitter timeout, serious problems. */ + printk(KERN_ERR "%s: transmit timed out, status %4.4x, resetting.\n", + dev->name, lp->a.read_csr (ioaddr, 0)); + lp->a.write_csr (ioaddr, 0, 0x0004); + lp->stats.tx_errors++; + if (pcnet32_debug > 2) { + int i; + printk(KERN_DEBUG " Ring data dump: dirty_tx %d cur_tx %d%s cur_rx %d.", + lp->dirty_tx, lp->cur_tx, lp->tx_full ? " (full)" : "", + lp->cur_rx); + for (i = 0 ; i < RX_RING_SIZE; i++) + printk("%s %08x %04x %08x %04x", i & 1 ? "" : "\n ", + lp->rx_ring[i].base, -lp->rx_ring[i].buf_length, + lp->rx_ring[i].msg_length, (unsigned)lp->rx_ring[i].status); + for (i = 0 ; i < TX_RING_SIZE; i++) + printk("%s %08x %04x %08x %04x", i & 1 ? "" : "\n ", + lp->tx_ring[i].base, -lp->tx_ring[i].length, + lp->tx_ring[i].misc, (unsigned)lp->tx_ring[i].status); + printk("\n"); + } + pcnet32_restart(dev, 0x0042); + + dev->trans_start = jiffies; + netif_start_queue(dev); +} + + +static int +pcnet32_start_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct pcnet32_private *lp = dev->priv; + unsigned int ioaddr = dev->base_addr; + u16 status; + int entry; + unsigned long flags; + + if (pcnet32_debug > 3) { + printk(KERN_DEBUG "%s: pcnet32_start_xmit() called, csr0 %4.4x.\n", + dev->name, lp->a.read_csr (ioaddr, 0)); + } + + spin_lock_irqsave(&lp->lock, flags); + + /* Default status -- will not enable Successful-TxDone + * interrupt when that option is available to us. + */ + status = 0x8300; + if ((lp->ltint) && + ((lp->cur_tx - lp->dirty_tx == TX_RING_SIZE/2) || + (lp->cur_tx - lp->dirty_tx >= TX_RING_SIZE-2))) + { + /* Enable Successful-TxDone interrupt if we have + * 1/2 of, or nearly all of, our ring buffer Tx'd + * but not yet cleaned up. Thus, most of the time, + * we will not enable Successful-TxDone interrupts. + */ + status = 0x9300; + } + + /* Fill in a Tx ring entry */ + + /* Mask to ring buffer boundary. */ + entry = lp->cur_tx & TX_RING_MOD_MASK; + + /* Caution: the write order is important here, set the base address + with the "ownership" bits last. */ + + lp->tx_ring[entry].length = le16_to_cpu(-skb->len); + + lp->tx_ring[entry].misc = 0x00000000; + + lp->tx_skbuff[entry] = skb; + lp->tx_dma_addr[entry] = pci_map_single(lp->pci_dev, skb->data, skb->len, PCI_DMA_TODEVICE); + lp->tx_ring[entry].base = (u32)le32_to_cpu(lp->tx_dma_addr[entry]); + lp->tx_ring[entry].status = le16_to_cpu(status); + + lp->cur_tx++; + lp->stats.tx_bytes += skb->len; + + /* Trigger an immediate send poll. */ + lp->a.write_csr (ioaddr, 0, 0x0048); + + dev->trans_start = jiffies; + + if (lp->tx_ring[(entry+1) & TX_RING_MOD_MASK].base == 0) + netif_start_queue(dev); + else { + lp->tx_full = 1; + netif_stop_queue(dev); + } + spin_unlock_irqrestore(&lp->lock, flags); + return 0; +} + +/* The PCNET32 interrupt handler. */ +static void +pcnet32_interrupt(int irq, void *dev_id, struct pt_regs * regs) +{ + struct net_device *dev = dev_id; + struct pcnet32_private *lp; + unsigned long ioaddr; + u16 csr0,rap; + int boguscnt = max_interrupt_work; + int must_restart; + + if (dev == NULL) { + printk (KERN_DEBUG "pcnet32_interrupt(): irq %d for unknown device.\n", irq); + return; + } + + ioaddr = dev->base_addr; + lp = dev->priv; + + spin_lock(&lp->lock); + + rap = lp->a.read_rap(ioaddr); + while ((csr0 = lp->a.read_csr (ioaddr, 0)) & 0x8600 && --boguscnt >= 0) { + /* Acknowledge all of the current interrupt sources ASAP. */ + lp->a.write_csr (ioaddr, 0, csr0 & ~0x004f); + + must_restart = 0; + + if (pcnet32_debug > 5) + printk(KERN_DEBUG "%s: interrupt csr0=%#2.2x new csr=%#2.2x.\n", + dev->name, csr0, lp->a.read_csr (ioaddr, 0)); + + if (csr0 & 0x0400) /* Rx interrupt */ + pcnet32_rx(dev); + + if (csr0 & 0x0200) { /* Tx-done interrupt */ + unsigned int dirty_tx = lp->dirty_tx; + + while (dirty_tx < lp->cur_tx) { + int entry = dirty_tx & TX_RING_MOD_MASK; + int status = (short)le16_to_cpu(lp->tx_ring[entry].status); + + if (status < 0) + break; /* It still hasn't been Txed */ + + lp->tx_ring[entry].base = 0; + + if (status & 0x4000) { + /* There was an major error, log it. */ + int err_status = le32_to_cpu(lp->tx_ring[entry].misc); + lp->stats.tx_errors++; + if (err_status & 0x04000000) lp->stats.tx_aborted_errors++; + if (err_status & 0x08000000) lp->stats.tx_carrier_errors++; + if (err_status & 0x10000000) lp->stats.tx_window_errors++; +#ifndef DO_DXSUFLO + if (err_status & 0x40000000) { + lp->stats.tx_fifo_errors++; + /* Ackk! On FIFO errors the Tx unit is turned off! */ + /* Remove this verbosity later! */ + printk(KERN_ERR "%s: Tx FIFO error! CSR0=%4.4x\n", + dev->name, csr0); + must_restart = 1; + } +#else + if (err_status & 0x40000000) { + lp->stats.tx_fifo_errors++; + if (! lp->dxsuflo) { /* If controller doesn't recover ... */ + /* Ackk! On FIFO errors the Tx unit is turned off! */ + /* Remove this verbosity later! */ + printk(KERN_ERR "%s: Tx FIFO error! CSR0=%4.4x\n", + dev->name, csr0); + must_restart = 1; + } + } +#endif + } else { + if (status & 0x1800) + lp->stats.collisions++; + lp->stats.tx_packets++; + } + + /* We must free the original skb */ + if (lp->tx_skbuff[entry]) { + pci_unmap_single(lp->pci_dev, lp->tx_dma_addr[entry], lp->tx_skbuff[entry]->len, PCI_DMA_TODEVICE); + dev_kfree_skb_irq(lp->tx_skbuff[entry]); + lp->tx_skbuff[entry] = 0; + lp->tx_dma_addr[entry] = 0; + } + dirty_tx++; + } + +#ifndef final_version + if (lp->cur_tx - dirty_tx >= TX_RING_SIZE) { + printk(KERN_ERR "out-of-sync dirty pointer, %d vs. %d, full=%d.\n", + dirty_tx, lp->cur_tx, lp->tx_full); + dirty_tx += TX_RING_SIZE; + } +#endif + if (lp->tx_full && + netif_queue_stopped(dev) && + dirty_tx > lp->cur_tx - TX_RING_SIZE + 2) { + /* The ring is no longer full, clear tbusy. */ + lp->tx_full = 0; + netif_wake_queue (dev); + } + lp->dirty_tx = dirty_tx; + } + + /* Log misc errors. */ + if (csr0 & 0x4000) lp->stats.tx_errors++; /* Tx babble. */ + if (csr0 & 0x1000) { + /* + * this happens when our receive ring is full. This shouldn't + * be a problem as we will see normal rx interrupts for the frames + * in the receive ring. But there are some PCI chipsets (I can reproduce + * this on SP3G with Intel saturn chipset) which have sometimes problems + * and will fill up the receive ring with error descriptors. In this + * situation we don't get a rx interrupt, but a missed frame interrupt sooner + * or later. So we try to clean up our receive ring here. + */ + pcnet32_rx(dev); + lp->stats.rx_errors++; /* Missed a Rx frame. */ + } + if (csr0 & 0x0800) { + printk(KERN_ERR "%s: Bus master arbitration failure, status %4.4x.\n", + dev->name, csr0); + /* unlike for the lance, there is no restart needed */ + } + + if (must_restart) { + /* stop the chip to clear the error condition, then restart */ + lp->a.write_csr (ioaddr, 0, 0x0004); + pcnet32_restart(dev, 0x0002); + } + } + + /* Clear any other interrupt, and set interrupt enable. */ + lp->a.write_csr (ioaddr, 0, 0x7940); + lp->a.write_rap(ioaddr,rap); + + if (pcnet32_debug > 4) + printk(KERN_DEBUG "%s: exiting interrupt, csr0=%#4.4x.\n", + dev->name, lp->a.read_csr (ioaddr, 0)); + + spin_unlock(&lp->lock); +} + +static int +pcnet32_rx(struct net_device *dev) +{ + struct pcnet32_private *lp = dev->priv; + int entry = lp->cur_rx & RX_RING_MOD_MASK; + + /* If we own the next entry, it's a new packet. Send it up. */ + while ((short)le16_to_cpu(lp->rx_ring[entry].status) >= 0) { + int status = (short)le16_to_cpu(lp->rx_ring[entry].status) >> 8; + + if (status != 0x03) { /* There was an error. */ + /* + * There is a tricky error noted by John Murphy, + * to Russ Nelson: Even with full-sized + * buffers it's possible for a jabber packet to use two + * buffers, with only the last correctly noting the error. + */ + if (status & 0x01) /* Only count a general error at the */ + lp->stats.rx_errors++; /* end of a packet.*/ + if (status & 0x20) lp->stats.rx_frame_errors++; + if (status & 0x10) lp->stats.rx_over_errors++; + if (status & 0x08) lp->stats.rx_crc_errors++; + if (status & 0x04) lp->stats.rx_fifo_errors++; + lp->rx_ring[entry].status &= le16_to_cpu(0x03ff); + } else { + /* Malloc up new buffer, compatible with net-2e. */ + short pkt_len = (le32_to_cpu(lp->rx_ring[entry].msg_length) & 0xfff)-4; + struct sk_buff *skb; + + if(pkt_len < 60) { + printk(KERN_ERR "%s: Runt packet!\n",dev->name); + lp->stats.rx_errors++; + } else { + int rx_in_place = 0; + + if (pkt_len > rx_copybreak) { + struct sk_buff *newskb; + + if ((newskb = dev_alloc_skb (PKT_BUF_SZ))) { + skb_reserve (newskb, 2); + skb = lp->rx_skbuff[entry]; + skb_put (skb, pkt_len); + lp->rx_skbuff[entry] = newskb; + newskb->dev = dev; + lp->rx_dma_addr[entry] = pci_map_single(lp->pci_dev, newskb->tail, newskb->len, PCI_DMA_FROMDEVICE); + lp->rx_ring[entry].base = le32_to_cpu(lp->rx_dma_addr[entry]); + rx_in_place = 1; + } else + skb = NULL; + } else { + skb = dev_alloc_skb(pkt_len+2); + } + + if (skb == NULL) { + int i; + printk(KERN_ERR "%s: Memory squeeze, deferring packet.\n", dev->name); + for (i = 0; i < RX_RING_SIZE; i++) + if ((short)le16_to_cpu(lp->rx_ring[(entry+i) & RX_RING_MOD_MASK].status) < 0) + break; + + if (i > RX_RING_SIZE -2) { + lp->stats.rx_dropped++; + lp->rx_ring[entry].status |= le16_to_cpu(0x8000); + lp->cur_rx++; + } + break; + } + skb->dev = dev; + if (!rx_in_place) { + skb_reserve(skb,2); /* 16 byte align */ + skb_put(skb,pkt_len); /* Make room */ + eth_copy_and_sum(skb, + (unsigned char *)(lp->rx_skbuff[entry]->tail), + pkt_len,0); + } + lp->stats.rx_bytes += skb->len; + skb->protocol=eth_type_trans(skb,dev); + netif_rx(skb); + lp->stats.rx_packets++; + } + } + /* + * The docs say that the buffer length isn't touched, but Andrew Boyd + * of QNX reports that some revs of the 79C965 clear it. + */ + lp->rx_ring[entry].buf_length = le16_to_cpu(-PKT_BUF_SZ); + lp->rx_ring[entry].status |= le16_to_cpu(0x8000); + entry = (++lp->cur_rx) & RX_RING_MOD_MASK; + } + + return 0; +} + +static int +pcnet32_close(struct net_device *dev) +{ + unsigned long ioaddr = dev->base_addr; + struct pcnet32_private *lp = dev->priv; + int i; + + netif_stop_queue(dev); + + lp->stats.rx_missed_errors = lp->a.read_csr (ioaddr, 112); + + if (pcnet32_debug > 1) + printk(KERN_DEBUG "%s: Shutting down ethercard, status was %2.2x.\n", + dev->name, lp->a.read_csr (ioaddr, 0)); + + /* We stop the PCNET32 here -- it occasionally polls memory if we don't. */ + lp->a.write_csr (ioaddr, 0, 0x0004); + + /* + * Switch back to 16bit mode to avoid problems with dumb + * DOS packet driver after a warm reboot + */ + lp->a.write_bcr (ioaddr, 20, 4); + + free_irq(dev->irq, dev); + + /* free all allocated skbuffs */ + for (i = 0; i < RX_RING_SIZE; i++) { + lp->rx_ring[i].status = 0; + if (lp->rx_skbuff[i]) { + pci_unmap_single(lp->pci_dev, lp->rx_dma_addr[i], lp->rx_skbuff[i]->len, PCI_DMA_FROMDEVICE); + dev_kfree_skb(lp->rx_skbuff[i]); + } + lp->rx_skbuff[i] = NULL; + lp->rx_dma_addr[i] = 0; + } + + for (i = 0; i < TX_RING_SIZE; i++) { + if (lp->tx_skbuff[i]) { + pci_unmap_single(lp->pci_dev, lp->tx_dma_addr[i], lp->tx_skbuff[i]->len, PCI_DMA_TODEVICE); + dev_kfree_skb(lp->tx_skbuff[i]); + } + lp->tx_skbuff[i] = NULL; + lp->tx_dma_addr[i] = 0; + } + + MOD_DEC_USE_COUNT; + + return 0; +} + +static struct net_device_stats * +pcnet32_get_stats(struct net_device *dev) +{ + struct pcnet32_private *lp = dev->priv; + unsigned long ioaddr = dev->base_addr; + u16 saved_addr; + unsigned long flags; + + spin_lock_irqsave(&lp->lock, flags); + saved_addr = lp->a.read_rap(ioaddr); + lp->stats.rx_missed_errors = lp->a.read_csr (ioaddr, 112); + lp->a.write_rap(ioaddr, saved_addr); + spin_unlock_irqrestore(&lp->lock, flags); + + return &lp->stats; +} + +/* taken from the sunlance driver, which it took from the depca driver */ +static void pcnet32_load_multicast (struct net_device *dev) +{ + struct pcnet32_private *lp = dev->priv; + volatile struct pcnet32_init_block *ib = &lp->init_block; + volatile u16 *mcast_table = (u16 *)&ib->filter; + struct dev_mc_list *dmi=dev->mc_list; + char *addrs; + int i, j, bit, byte; + u32 crc, poly = CRC_POLYNOMIAL_LE; + + /* set all multicast bits */ + if (dev->flags & IFF_ALLMULTI){ + ib->filter [0] = 0xffffffff; + ib->filter [1] = 0xffffffff; + return; + } + /* clear the multicast filter */ + ib->filter [0] = 0; + ib->filter [1] = 0; + + /* Add addresses */ + for (i = 0; i < dev->mc_count; i++){ + addrs = dmi->dmi_addr; + dmi = dmi->next; + + /* multicast address? */ + if (!(*addrs & 1)) + continue; + + crc = 0xffffffff; + for (byte = 0; byte < 6; byte++) + for (bit = *addrs++, j = 0; j < 8; j++, bit >>= 1) { + int test; + + test = ((bit ^ crc) & 0x01); + crc >>= 1; + + if (test) { + crc = crc ^ poly; + } + } + + crc = crc >> 26; + mcast_table [crc >> 4] |= 1 << (crc & 0xf); + } + return; +} + + +/* + * Set or clear the multicast filter for this adaptor. + */ +static void pcnet32_set_multicast_list(struct net_device *dev) +{ + unsigned long ioaddr = dev->base_addr; + struct pcnet32_private *lp = dev->priv; + + if (dev->flags&IFF_PROMISC) { + /* Log any net taps. */ + printk(KERN_INFO "%s: Promiscuous mode enabled.\n", dev->name); + lp->init_block.mode = le16_to_cpu(0x8000 | (lp->options & PORT_PORTSEL) << 7); + } else { + lp->init_block.mode = le16_to_cpu((lp->options & PORT_PORTSEL) << 7); + pcnet32_load_multicast (dev); + } + + lp->a.write_csr (ioaddr, 0, 0x0004); /* Temporarily stop the lance. */ + + pcnet32_restart(dev, 0x0042); /* Resume normal operation */ +} + +#ifdef HAVE_PRIVATE_IOCTL +static int pcnet32_mii_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) +{ + unsigned long ioaddr = dev->base_addr; + struct pcnet32_private *lp = dev->priv; + u16 *data = (u16 *)&rq->ifr_data; + int phyaddr = lp->a.read_bcr (ioaddr, 33); + + if (lp->mii) { + switch(cmd) { + case SIOCDEVPRIVATE: /* Get the address of the PHY in use. */ + data[0] = (phyaddr >> 5) & 0x1f; + /* Fall Through */ + case SIOCDEVPRIVATE+1: /* Read the specified MII register. */ + lp->a.write_bcr (ioaddr, 33, ((data[0] & 0x1f) << 5) | (data[1] & 0x1f)); + data[3] = lp->a.read_bcr (ioaddr, 34); + lp->a.write_bcr (ioaddr, 33, phyaddr); + return 0; + case SIOCDEVPRIVATE+2: /* Write the specified MII register */ + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + lp->a.write_bcr (ioaddr, 33, ((data[0] & 0x1f) << 5) | (data[1] & 0x1f)); + lp->a.write_bcr (ioaddr, 34, data[2]); + lp->a.write_bcr (ioaddr, 33, phyaddr); + return 0; + default: + return -EOPNOTSUPP; + } + } + return -EOPNOTSUPP; +} +#endif /* HAVE_PRIVATE_IOCTL */ + +static struct pci_driver pcnet32_driver = { + name: "pcnet32", + probe: pcnet32_probe_pci, + remove: NULL, + id_table: pcnet32_pci_tbl, +}; + +MODULE_PARM(debug, "i"); +MODULE_PARM(max_interrupt_work, "i"); +MODULE_PARM(rx_copybreak, "i"); +MODULE_PARM(tx_start_pt, "i"); +MODULE_PARM(options, "1-" __MODULE_STRING(MAX_UNITS) "i"); +MODULE_PARM(full_duplex, "1-" __MODULE_STRING(MAX_UNITS) "i"); +MODULE_AUTHOR("Thomas Bogendoerfer"); +MODULE_DESCRIPTION("Driver for PCnet32 and PCnetPCI based ethercards"); +MODULE_LICENSE("GPL"); + +/* An additional parameter that may be passed in... */ +static int debug = -1; +static int tx_start_pt = -1; + +static int __init pcnet32_init_module(void) +{ + int cards_found = 0; + int err; + + if (debug > 0) + pcnet32_debug = debug; + if ((tx_start_pt >= 0) && (tx_start_pt <= 3)) + tx_start = tx_start_pt; + + pcnet32_dev = NULL; + /* find the PCI devices */ +#define USE_PCI_REGISTER_DRIVER +#ifdef USE_PCI_REGISTER_DRIVER + if ((err = pci_module_init(&pcnet32_driver)) < 0 ) + return err; +#else + { + struct pci_device_id *devid = pcnet32_pci_tbl; + for (devid = pcnet32_pci_tbl; devid != NULL && devid->vendor != 0; devid++) { + struct pci_dev *pdev = pci_find_subsys(devid->vendor, devid->device, devid->subvendor, devid->subdevice, NULL); + if (pdev != NULL) { + if (pcnet32_probe_pci(pdev, devid) >= 0) { + cards_found++; + } + } + } + } +#endif + return 0; + /* find any remaining VLbus devices */ + return pcnet32_probe_vlbus(cards_found); +} + +static void __exit pcnet32_cleanup_module(void) +{ + struct net_device *next_dev; + + /* No need to check MOD_IN_USE, as sys_delete_module() checks. */ + while (pcnet32_dev) { + struct pcnet32_private *lp = pcnet32_dev->priv; + next_dev = lp->next; + unregister_netdev(pcnet32_dev); + release_region(pcnet32_dev->base_addr, PCNET32_TOTAL_SIZE); + if (lp->pci_dev != NULL) + pci_unregister_driver(&pcnet32_driver); + pci_free_consistent(lp->pci_dev, sizeof(*lp), lp, lp->dma_addr); + kfree(pcnet32_dev); + pcnet32_dev = next_dev; + } +} + +module_init(pcnet32_init_module); +module_exit(pcnet32_cleanup_module); + +/* + * Local variables: + * compile-command: "gcc -D__KERNEL__ -I/usr/src/linux/net/inet -Wall -Wstrict-prototypes -O6 -m486 -c pcnet32.c" + * c-indent-level: 4 + * tab-width: 8 + * End: + */ diff --git a/xen-2.4.16/drivers/net/setup.c b/xen-2.4.16/drivers/net/setup.c new file mode 100644 index 0000000000..1352a1cb4b --- /dev/null +++ b/xen-2.4.16/drivers/net/setup.c @@ -0,0 +1,173 @@ + +/* + * New style setup code for the network devices + */ + +#include +#include +#include +#include +//#include + +extern int slip_init_ctrl_dev(void); +extern int x25_asy_init_ctrl_dev(void); + +extern int dmascc_init(void); + +extern int awc4500_pci_probe(void); +extern int awc4500_isa_probe(void); +extern int awc4500_pnp_probe(void); +extern int awc4500_365_probe(void); +extern int arcnet_init(void); +extern int scc_enet_init(void); +extern int fec_enet_init(void); +extern int dlci_setup(void); +extern int sdla_setup(void); +extern int sdla_c_setup(void); +extern int comx_init(void); +extern int lmc_setup(void); + +extern int madgemc_probe(void); +extern int uml_net_probe(void); + +/* Pad device name to IFNAMSIZ=16. F.e. __PAD6 is string of 9 zeros. */ +#define __PAD6 "\0\0\0\0\0\0\0\0\0" +#define __PAD5 __PAD6 "\0" +#define __PAD4 __PAD5 "\0" +#define __PAD3 __PAD4 "\0" +#define __PAD2 __PAD3 "\0" + + +/* + * Devices in this list must do new style probing. That is they must + * allocate their own device objects and do their own bus scans. + */ + +struct net_probe +{ + int (*probe)(void); + int status; /* non-zero if autoprobe has failed */ +}; + +static struct net_probe pci_probes[] __initdata = { + /* + * Early setup devices + */ + +#if defined(CONFIG_DMASCC) + {dmascc_init, 0}, +#endif +#if defined(CONFIG_DLCI) + {dlci_setup, 0}, +#endif +#if defined(CONFIG_SDLA) + {sdla_c_setup, 0}, +#endif +#if defined(CONFIG_ARCNET) + {arcnet_init, 0}, +#endif +#if defined(CONFIG_SCC_ENET) + {scc_enet_init, 0}, +#endif +#if defined(CONFIG_FEC_ENET) + {fec_enet_init, 0}, +#endif +#if defined(CONFIG_COMX) + {comx_init, 0}, +#endif + +#if defined(CONFIG_LANMEDIA) + {lmc_setup, 0}, +#endif + +/* +* +* Wireless non-HAM +* +*/ +#ifdef CONFIG_AIRONET4500_NONCS + +#ifdef CONFIG_AIRONET4500_PCI + {awc4500_pci_probe,0}, +#endif + +#ifdef CONFIG_AIRONET4500_PNP + {awc4500_pnp_probe,0}, +#endif + +#endif + +/* + * Token Ring Drivers + */ +#ifdef CONFIG_MADGEMC + {madgemc_probe, 0}, +#endif +#ifdef CONFIG_UML_NET + {uml_net_probe, 0}, +#endif + + {NULL, 0}, +}; + + +/* + * Run the updated device probes. These do not need a device passed + * into them. + */ + +static void __init network_probe(void) +{ + struct net_probe *p = pci_probes; + + while (p->probe != NULL) + { + p->status = p->probe(); + p++; + } +} + + +/* + * Initialise the line discipline drivers + */ + +static void __init network_ldisc_init(void) +{ +#if defined(CONFIG_SLIP) + slip_init_ctrl_dev(); +#endif +#if defined(CONFIG_X25_ASY) + x25_asy_init_ctrl_dev(); +#endif +} + + +static void __init special_device_init(void) +{ +#ifdef CONFIG_NET_SB1000 + { + extern int sb1000_probe(struct net_device *dev); + static struct net_device sb1000_dev = + { + "cm0" __PAD3, 0x0, 0x0, 0x0, 0x0, 0, 0, 0, 0, 0, NULL, sb1000_probe + }; + register_netdev(&sb1000_dev); + } +#endif +} + +/* + * Initialise network devices + */ + +void __init net_device_init(void) +{ + /* Devices supporting the new probing API */ + network_probe(); + /* Line disciplines */ + network_ldisc_init(); + /* Special devices */ + special_device_init(); + /* That kicks off the legacy init functions */ +} diff --git a/xen-2.4.16/drivers/net/tulip/.depend b/xen-2.4.16/drivers/net/tulip/.depend new file mode 100644 index 0000000000..d2cbd43c1b --- /dev/null +++ b/xen-2.4.16/drivers/net/tulip/.depend @@ -0,0 +1,58 @@ +21142.o: 21142.c \ + tulip.h \ + /home/kaf24/xeno/build/linux-2.4.16-kdb-orig/include/linux/pci.h \ + /home/kaf24/xeno/build/linux-2.4.16-kdb-orig/include/linux/delay.h +eeprom.o: eeprom.c \ + tulip.h \ + /home/kaf24/xeno/build/linux-2.4.16-kdb-orig/include/linux/init.h \ + /home/kaf24/xeno/build/linux-2.4.16-kdb-orig/include/asm/unaligned.h +interrupt.o: interrupt.c \ + tulip.h \ + /home/kaf24/xeno/build/linux-2.4.16-kdb-orig/include/linux/etherdevice.h \ + /home/kaf24/xeno/build/linux-2.4.16-kdb-orig/include/linux/pci.h \ + $(wildcard /home/kaf24/xeno/build/linux-2.4.16-kdb-orig/include/config/net/hw/flowcontrol.h) +media.o: media.c \ + /home/kaf24/xeno/build/linux-2.4.16-kdb-orig/include/linux/kernel.h \ + /home/kaf24/xeno/build/linux-2.4.16-kdb-orig/include/linux/mii.h \ + /home/kaf24/xeno/build/linux-2.4.16-kdb-orig/include/linux/init.h \ + /home/kaf24/xeno/build/linux-2.4.16-kdb-orig/include/linux/delay.h \ + tulip.h +pnic2.o: pnic2.c \ + tulip.h \ + /home/kaf24/xeno/build/linux-2.4.16-kdb-orig/include/linux/pci.h \ + /home/kaf24/xeno/build/linux-2.4.16-kdb-orig/include/linux/delay.h +pnic.o: pnic.c \ + /home/kaf24/xeno/build/linux-2.4.16-kdb-orig/include/linux/kernel.h \ + tulip.h +timer.o: timer.c \ + tulip.h +tulip_core.o: tulip_core.c \ + /home/kaf24/xeno/build/linux-2.4.16-kdb-orig/include/linux/module.h \ + tulip.h \ + /home/kaf24/xeno/build/linux-2.4.16-kdb-orig/include/linux/pci.h \ + /home/kaf24/xeno/build/linux-2.4.16-kdb-orig/include/linux/init.h \ + /home/kaf24/xeno/build/linux-2.4.16-kdb-orig/include/linux/etherdevice.h \ + /home/kaf24/xeno/build/linux-2.4.16-kdb-orig/include/linux/delay.h \ + /home/kaf24/xeno/build/linux-2.4.16-kdb-orig/include/linux/mii.h \ + /home/kaf24/xeno/build/linux-2.4.16-kdb-orig/include/linux/ethtool.h \ + /home/kaf24/xeno/build/linux-2.4.16-kdb-orig/include/asm/unaligned.h \ + /home/kaf24/xeno/build/linux-2.4.16-kdb-orig/include/asm/uaccess.h \ + $(wildcard /home/kaf24/xeno/build/linux-2.4.16-kdb-orig/include/config/net/hw/flowcontrol.h) \ + $(wildcard /home/kaf24/xeno/build/linux-2.4.16-kdb-orig/include/config/tulip/mwi.h) \ + $(wildcard /home/kaf24/xeno/build/linux-2.4.16-kdb-orig/include/config/ddb5476.h) \ + $(wildcard /home/kaf24/xeno/build/linux-2.4.16-kdb-orig/include/config/ddb5477.h) \ + $(wildcard /home/kaf24/xeno/build/linux-2.4.16-kdb-orig/include/config/pm.h) +tulip.h: \ + /home/kaf24/xeno/build/linux-2.4.16-kdb-orig/include/linux/kernel.h \ + /home/kaf24/xeno/build/linux-2.4.16-kdb-orig/include/linux/types.h \ + /home/kaf24/xeno/build/linux-2.4.16-kdb-orig/include/linux/spinlock.h \ + /home/kaf24/xeno/build/linux-2.4.16-kdb-orig/include/linux/netdevice.h \ + /home/kaf24/xeno/build/linux-2.4.16-kdb-orig/include/linux/timer.h \ + /home/kaf24/xeno/build/linux-2.4.16-kdb-orig/include/linux/delay.h \ + /home/kaf24/xeno/build/linux-2.4.16-kdb-orig/include/asm/io.h \ + /home/kaf24/xeno/build/linux-2.4.16-kdb-orig/include/asm/irq.h \ + $(wildcard /home/kaf24/xeno/build/linux-2.4.16-kdb-orig/include/config/tulip/mmio.h) \ + $(wildcard /home/kaf24/xeno/build/linux-2.4.16-kdb-orig/include/config/net/hw/flowcontrol.h) + @touch tulip.h +.PRECIOUS: tulip.h \ + diff --git a/xen-2.4.16/drivers/net/tulip/21142.c b/xen-2.4.16/drivers/net/tulip/21142.c new file mode 100644 index 0000000000..3a88c44120 --- /dev/null +++ b/xen-2.4.16/drivers/net/tulip/21142.c @@ -0,0 +1,239 @@ +/* + drivers/net/tulip/21142.c + + Maintained by Jeff Garzik + Copyright 2000,2001 The Linux Kernel Team + Written/copyright 1994-2001 by Donald Becker. + + This software may be used and distributed according to the terms + of the GNU General Public License, incorporated herein by reference. + + Please refer to Documentation/DocBook/tulip.{pdf,ps,html} + for more information on this driver, or visit the project + Web page at http://sourceforge.net/projects/tulip/ + +*/ + +#include "tulip.h" +#include +#include + + +static u16 t21142_csr13[] = { 0x0001, 0x0009, 0x0009, 0x0000, 0x0001, }; +u16 t21142_csr14[] = { 0xFFFF, 0x0705, 0x0705, 0x0000, 0x7F3D, }; +static u16 t21142_csr15[] = { 0x0008, 0x0006, 0x000E, 0x0008, 0x0008, }; + + +/* Handle the 21143 uniquely: do autoselect with NWay, not the EEPROM list + of available transceivers. */ +void t21142_timer(unsigned long data) +{ + struct net_device *dev = (struct net_device *)data; + struct tulip_private *tp = (struct tulip_private *)dev->priv; + long ioaddr = dev->base_addr; + int csr12 = inl(ioaddr + CSR12); + int next_tick = 60*HZ; + int new_csr6 = 0; + + if (tulip_debug > 2) + printk(KERN_INFO"%s: 21143 negotiation status %8.8x, %s.\n", + dev->name, csr12, medianame[dev->if_port]); + if (tulip_media_cap[dev->if_port] & MediaIsMII) { + tulip_check_duplex(dev); + next_tick = 60*HZ; + } else if (tp->nwayset) { + /* Don't screw up a negotiated session! */ + if (tulip_debug > 1) + printk(KERN_INFO"%s: Using NWay-set %s media, csr12 %8.8x.\n", + dev->name, medianame[dev->if_port], csr12); + } else if (tp->medialock) { + ; + } else if (dev->if_port == 3) { + if (csr12 & 2) { /* No 100mbps link beat, revert to 10mbps. */ + if (tulip_debug > 1) + printk(KERN_INFO"%s: No 21143 100baseTx link beat, %8.8x, " + "trying NWay.\n", dev->name, csr12); + t21142_start_nway(dev); + next_tick = 3*HZ; + } + } else if ((csr12 & 0x7000) != 0x5000) { + /* Negotiation failed. Search media types. */ + if (tulip_debug > 1) + printk(KERN_INFO"%s: 21143 negotiation failed, status %8.8x.\n", + dev->name, csr12); + if (!(csr12 & 4)) { /* 10mbps link beat good. */ + new_csr6 = 0x82420000; + dev->if_port = 0; + outl(0, ioaddr + CSR13); + outl(0x0003FFFF, ioaddr + CSR14); + outw(t21142_csr15[dev->if_port], ioaddr + CSR15); + outl(t21142_csr13[dev->if_port], ioaddr + CSR13); + } else { + /* Select 100mbps port to check for link beat. */ + new_csr6 = 0x83860000; + dev->if_port = 3; + outl(0, ioaddr + CSR13); + outl(0x0003FF7F, ioaddr + CSR14); + outw(8, ioaddr + CSR15); + outl(1, ioaddr + CSR13); + } + if (tulip_debug > 1) + printk(KERN_INFO"%s: Testing new 21143 media %s.\n", + dev->name, medianame[dev->if_port]); + if (new_csr6 != (tp->csr6 & ~0x00D5)) { + tp->csr6 &= 0x00D5; + tp->csr6 |= new_csr6; + outl(0x0301, ioaddr + CSR12); + tulip_restart_rxtx(tp); + } + next_tick = 3*HZ; + } + + /* mod_timer synchronizes us with potential add_timer calls + * from interrupts. + */ + mod_timer(&tp->timer, RUN_AT(next_tick)); +} + + +void t21142_start_nway(struct net_device *dev) +{ + struct tulip_private *tp = (struct tulip_private *)dev->priv; + long ioaddr = dev->base_addr; + int csr14 = ((tp->sym_advertise & 0x0780) << 9) | + ((tp->sym_advertise & 0x0020) << 1) | 0xffbf; + + dev->if_port = 0; + tp->nway = tp->mediasense = 1; + tp->nwayset = tp->lpar = 0; + if (tulip_debug > 1) + printk(KERN_DEBUG "%s: Restarting 21143 autonegotiation, csr14=%8.8x.\n", + dev->name, csr14); + outl(0x0001, ioaddr + CSR13); + udelay(100); + outl(csr14, ioaddr + CSR14); + tp->csr6 = 0x82420000 | (tp->sym_advertise & 0x0040 ? FullDuplex : 0); + outl(tp->csr6, ioaddr + CSR6); + if (tp->mtable && tp->mtable->csr15dir) { + outl(tp->mtable->csr15dir, ioaddr + CSR15); + outl(tp->mtable->csr15val, ioaddr + CSR15); + } else + outw(0x0008, ioaddr + CSR15); + outl(0x1301, ioaddr + CSR12); /* Trigger NWAY. */ +} + + + +void t21142_lnk_change(struct net_device *dev, int csr5) +{ + struct tulip_private *tp = (struct tulip_private *)dev->priv; + long ioaddr = dev->base_addr; + int csr12 = inl(ioaddr + CSR12); + + if (tulip_debug > 1) + printk(KERN_INFO"%s: 21143 link status interrupt %8.8x, CSR5 %x, " + "%8.8x.\n", dev->name, csr12, csr5, inl(ioaddr + CSR14)); + + /* If NWay finished and we have a negotiated partner capability. */ + if (tp->nway && !tp->nwayset && (csr12 & 0x7000) == 0x5000) { + int setup_done = 0; + int negotiated = tp->sym_advertise & (csr12 >> 16); + tp->lpar = csr12 >> 16; + tp->nwayset = 1; + if (negotiated & 0x0100) dev->if_port = 5; + else if (negotiated & 0x0080) dev->if_port = 3; + else if (negotiated & 0x0040) dev->if_port = 4; + else if (negotiated & 0x0020) dev->if_port = 0; + else { + tp->nwayset = 0; + if ((csr12 & 2) == 0 && (tp->sym_advertise & 0x0180)) + dev->if_port = 3; + } + tp->full_duplex = (tulip_media_cap[dev->if_port] & MediaAlwaysFD) ? 1:0; + + if (tulip_debug > 1) { + if (tp->nwayset) + printk(KERN_INFO "%s: Switching to %s based on link " + "negotiation %4.4x & %4.4x = %4.4x.\n", + dev->name, medianame[dev->if_port], tp->sym_advertise, + tp->lpar, negotiated); + else + printk(KERN_INFO "%s: Autonegotiation failed, using %s," + " link beat status %4.4x.\n", + dev->name, medianame[dev->if_port], csr12); + } + + if (tp->mtable) { + int i; + for (i = 0; i < tp->mtable->leafcount; i++) + if (tp->mtable->mleaf[i].media == dev->if_port) { + tp->cur_index = i; + tulip_select_media(dev, 1); + setup_done = 1; + break; + } + } + if ( ! setup_done) { + tp->csr6 = (dev->if_port & 1 ? 0x838E0000 : 0x82420000) | (tp->csr6 & 0x20ff); + if (tp->full_duplex) + tp->csr6 |= 0x0200; + outl(1, ioaddr + CSR13); + } +#if 0 /* Restart shouldn't be needed. */ + outl(tp->csr6 | RxOn, ioaddr + CSR6); + if (tulip_debug > 2) + printk(KERN_DEBUG "%s: Restarting Tx and Rx, CSR5 is %8.8x.\n", + dev->name, inl(ioaddr + CSR5)); +#endif + tulip_start_rxtx(tp); + if (tulip_debug > 2) + printk(KERN_DEBUG "%s: Setting CSR6 %8.8x/%x CSR12 %8.8x.\n", + dev->name, tp->csr6, inl(ioaddr + CSR6), + inl(ioaddr + CSR12)); + } else if ((tp->nwayset && (csr5 & 0x08000000) + && (dev->if_port == 3 || dev->if_port == 5) + && (csr12 & 2) == 2) || + (tp->nway && (csr5 & (TPLnkFail)))) { + /* Link blew? Maybe restart NWay. */ + del_timer_sync(&tp->timer); + t21142_start_nway(dev); + tp->timer.expires = RUN_AT(3*HZ); + add_timer(&tp->timer); + } else if (dev->if_port == 3 || dev->if_port == 5) { + if (tulip_debug > 1) + printk(KERN_INFO"%s: 21143 %s link beat %s.\n", + dev->name, medianame[dev->if_port], + (csr12 & 2) ? "failed" : "good"); + if ((csr12 & 2) && ! tp->medialock) { + del_timer_sync(&tp->timer); + t21142_start_nway(dev); + tp->timer.expires = RUN_AT(3*HZ); + add_timer(&tp->timer); + } else if (dev->if_port == 5) + outl(inl(ioaddr + CSR14) & ~0x080, ioaddr + CSR14); + } else if (dev->if_port == 0 || dev->if_port == 4) { + if ((csr12 & 4) == 0) + printk(KERN_INFO"%s: 21143 10baseT link beat good.\n", + dev->name); + } else if (!(csr12 & 4)) { /* 10mbps link beat good. */ + if (tulip_debug) + printk(KERN_INFO"%s: 21143 10mbps sensed media.\n", + dev->name); + dev->if_port = 0; + } else if (tp->nwayset) { + if (tulip_debug) + printk(KERN_INFO"%s: 21143 using NWay-set %s, csr6 %8.8x.\n", + dev->name, medianame[dev->if_port], tp->csr6); + } else { /* 100mbps link beat good. */ + if (tulip_debug) + printk(KERN_INFO"%s: 21143 100baseTx sensed media.\n", + dev->name); + dev->if_port = 3; + tp->csr6 = 0x838E0000 | (tp->csr6 & 0x20ff); + outl(0x0003FF7F, ioaddr + CSR14); + outl(0x0301, ioaddr + CSR12); + tulip_restart_rxtx(tp); + } +} + + diff --git a/xen-2.4.16/drivers/net/tulip/ChangeLog b/xen-2.4.16/drivers/net/tulip/ChangeLog new file mode 100644 index 0000000000..a515efcfd3 --- /dev/null +++ b/xen-2.4.16/drivers/net/tulip/ChangeLog @@ -0,0 +1,520 @@ +2001-11-13 David S. Miller + + * tulip_core.c (tulip_mwi_config): Kill unused label early_out. + +2001-11-06 Richard Mortimer + + * tulip_core.c: Correct set of values to mask out of csr0, + for DM9102A chips. Limit burst/alignment of DM9102A chips + on Sparcs. + +2001-11-06 Jun Sun + + * tulip_core.c: Support finding MAC address on + two MIPS boards, DDB5476 and DDB5477. + +2001-11-06 Kevin B. Hendricks + + * Makefile, tulip.h, tulip_core.c, pnic2.c, 21142.c: + Fixes for PNIC II support. + +2001-11-06 David S. Miller + + * tulip_core.c: Support reading MAC address from + Sparc OBP property local-mac-address. + +2001-07-17 Erik A. Hendriks + + * 21142.c: Merge fix from tulip.c 0.92w which prevents the + overwriting of csr6 bits we want to preserve. + +2001-07-10 Jeff Golds + + * tulip_core.c: Fix two comments + +2001-07-06 Stephen Degler + + * media.c: + The media selection process at the end of NWAY is busted + because for the case of MII/SYM it needs to be: + + csr13 <- 0 + csr14 <- 0 + csr6 <- the value calculated is okay. + + In the other media cases csr14 is computed by + t21142_csr14val[dev->if_port], which seems ok. The value of + zero as opposed to 3FFFFF comes straight from appendix D of the + 21143 data book, and it makes logical sense because you're + bypassing all the SIA interface when you usa MII or SYM (see + figure 1-1 in the data book if your're visually oriented) + +2001-07-03 Jeff Golds + + * tulip_core.c (tulip_clean_tx_ring): + Clear status for in-progress Tx's, and count + Tx errors for all packets being released. + +2001-06-16 Jeff Garzik + + * tulip.h, tulip_core.c: + Integrate MMIO support from devel branch, but default + it to off for stable kernel and driver series. + +2001-06-16 Jeff Garzik + + * tulip_core.c (tulip_init_one): + Free descriptor rings on error. + +2001-06-16 Jeff Garzik + + * tulip_core.c (tulip_mwi_config, tulip_init_one): + Large update to csr0 bus configuration code. This is not stable + yet, so it is only conditionally enabled, via CONFIG_TULIP_MWI. + +2001-06-16 Jeff Garzik + + * tulip_core.c: + Initialize timer in tulip_init_one and tulip_down, + not in tulip_up. + +2001-06-14 Jeff Garzik + + * tulip_core.c: + - Update tulip_suspend, tulip_resume for new PCI PM API. + - Surround suspend/resume code with CONFIG_PM. + +2001-06-12 Jeff Golds + + * tulip_core.c: + - Reset sw ring ptrs in tulip_up. Fixes PM resume case. + - Clean rx and tx rings on device down. + +2001-06-05 David Miller + + * tulip_core (set_rx_mode): Do not use set_bit + on an integer variable. Also fix endianness issue. + +2001-06-04 Jeff Garzik + + * interrupt.c: + Simplify rx processing when CONFIG_NET_HW_FLOWCONTROL is + active, and in the process fix a bug where flow control + and low load caused rx not to be acknowledged properly. + +2001-06-01 Jeff Garzik + + * tulip.h: + - Remove tulip_outl_csr helper, redundant. + - Add tulip_start_rxtx inline helper. + - tulip_stop_rxtx helper: Add synchronization. Always use current + csr6 value, instead of tp->csr6 value or value passed as arg. + - tulip_restart_rxtx helper: Add synchronization. Always + use tp->csr6 for desired mode, not value passed as arg. + - New RxOn, TxOn, RxTx constants for csr6 modes. + - Remove now-redundant constants csr6_st, csr6_sr. + + * 21142.c, interrupt.c, media.c, pnic.c, tulip_core.c: + Update for above rxtx helper changes. + + * interrupt.c: + - whitespace cleanup around #ifdef CONFIG_NET_HW_FLOWCONTROL, + convert tabs to spaces. + - Move tp->stats.rx_missed_errors update outside the ifdef. + +2001-05-18 Jeff Garzik + + * tulip_core.c: Added ethtool support. + ETHTOOL_GDRVINFO ioctl only, for now. + +2001-05-14 Robert Olsson + + * Restored HW_FLOWCONTROL from Linux 2.1 series tulip (ANK) + plus Jamal's NETIF_RX_* feedback control. + +2001-05-14 Robert Olsson + + * Added support for 21143's Interrupt Mitigation. + Jamal original instigator. + +2001-05-14 Robert Olsson + + * tulip_refill_rx prototype added to tulip.h + +2001-05-13 Jeff Garzik + + * tulip_core.c: Remove HAS_PCI_MWI flag from Comet, untested. + +2001-05-12 Jeff Garzik + + * tulip_core.c, tulip.h: Remove Conexant PCI id, no chip + docs are available to fix problems with support. + +2001-05-12 Jeff Garzik + + * tulip_core.c (tulip_init_one): Do not call + unregister_netdev in error cleanup. Remnant of old + usage of init_etherdev. + +2001-05-12 Jeff Garzik + + * media.c (tulip_find_mii): Simple write the updated BMCR + twice, as it seems the best thing to do for both broken and + sane chips. + If the mii_advert value, as read from MII_ADVERTISE, is zero, + then generate a value we should advertise from the capability + bits in BMSR. + Fill in tp->advertising for all cases. + Just to be safe, clear all unwanted bits. + +2001-05-12 Jeff Garzik + + * tulip_core.c (private_ioctl): Fill in tp->advertising + when advertising value is changed by the user. + +2001-05-12 Jeff Garzik + + * tulip_core.c: Mark Comet chips as needed the updated MWI + csr0 configuration. + +2001-05-12 Jeff Garzik + + * media.c, tulip_core.c: Move MII scan into + from inlined inside tulip_init_one to new function + tulip_find_mii in media.c. + +2001-05-12 Jeff Garzik + + * media.c (tulip_check_duplex): + Only restart Rx/Tx engines if they are active + (and csr6 changes) + +2001-05-12 Jeff Garzik + + * tulip_core.c (tulip_mwi_config): + Clamp values read from PCI cache line size register to + values acceptable to tulip chip. Done for safety and + -almost- certainly unneeded. + +2001-05-11 Jeff Garzik + + * tulip_core.c (tulip_init_one): + Instead of unconditionally enabling autonegotiation, disable + autonegotiation if not using the default port. Further, + flip the nway bit immediately, and then update the + speed/duplex in a separate MII transaction. We do this + because some boards require that nway be disabled separately, + before media selection is forced. + + TODO: Investigate if we can simply write the same value + to BMCR twice, to avoid setting unnecessarily changing + phy settings. + +2001-05-11 Jeff Garzik + + * tulip.h, tulip_core.c: If HAS_PCI_MWI is set for a + given chip, adjust the csr0 values not according to + provided values but according to system cache line size. + Currently cache alignment is matched as closely to cache + line size as possible. Currently programmable burst limit + is set (ie. never unlimited), and always equal to cache + alignment and system cache size. Currently MWI bit is set + only if the MWI bit is present in the PCI command register. + +2001-05-11 Jeff Garzik + + * media.c (tulip_select_media): + For media types 1 and 3, only use the provided eeprom + advertising value if it is non-zero. + (tulip_check_duplex): + Do not exit ASAP if full_duplex_lock is set. This + ensures that the csr6 value is written if an update + is needed. + +2001-05-10 Jeff Garzik + + Merge PNIC-II-specific stuff from Becker's tulip.c: + + * tulip.h, 21142.c (pnic2_lnk_change): new function + * tulip_core.c (tulip_init_one): use it + + * tulip_core.c (tulip_tx_timeout): Add specific + debugging for PNIC2. + +2001-05-10 Jeff Garzik + + * tulip_core.c (tulip_init_one): Print out + tulip%d instead of PCI device number, for + consistency. + +2001-05-10 Jeff Garzik + + * Merge changes from Becker's tulip.c: + Fix bugs in ioctl. + Fix several bugs by distinguishing between MII + and SYM advertising values. + Set CSR14 autonegotiation bit for media types 2 and 4, + where the SIA CSR setup values are not provided. + +2001-05-10 Jeff Garzik + + * media.c (tulip_select_media): Only update MII + advertising value if startup arg < 2. + + * tulip.h: Do not enable CSR13/14/15 autoconfiguration + for 21041. + + * tulip_core.c: + 21041: add specific code for reset, and do not set CAC bit + When resetting media, for media table type 11 media, pass + value 2 as 'startup' arg to select_media, to avoid updating + MII advertising value. + +2001-05-10 Jeff Garzik + + * pnic.c (pnic_check_duplex): remove + pnic.c (pnic_lnk_change, pnic_timer): use + tulip_check_duplex not pnic_check_duplex. + + * media.c (tulip_check_duplex): + Clean up to use symbolic names instead of numeric constants. + Set TxThreshold mode as necessary as well as clearing it. + Update csr6 if csr6 changes, not simply if duplex changes. + + (found by Manfred Spraul) + +2001-05-10 Jeff Garzik + + * 21142.c, eeprom.c, tulip.h, tulip_core.c: + Remove DPRINTK as another, better method of + debug message printing is available. + +2001-05-09 Jeff Garzik + + * 21142.c (t21142_lnk_change): Pass arg startup==1 + to tulip_select_media, in order to force csr13 to be + zeroed out prior to going to full duplex mode. Fixes + autonegotiation on a quad-port Znyx card. + (from Stephen Dengler) + +2001-05-09 Russell King + + * interrupt.c: Better PCI bus error reporting. + +2001-04-03 Jeff Garzik + + * tulip_core.c: Now that dev->name is only available late + in the probe, insert a hack to replace a not-evaluated + "eth%d" string with an evaluated "tulip%d" string. + Also, remove obvious comment and an indentation cleanup. + +2001-04-03 Jeff Garzik + + * tulip_core.c: If we are a module, always print out the + version string. If we are built into the kernel, only print + the version string if at least one tulip is detected. + +2001-04-03 Jeff Garzik + + Merged from Becker's tulip.c 0.92t: + + * tulip_core.c: Add support for Conexant LANfinity. + +2001-04-03 Jeff Garzik + + * tulip_core.c: Only suspend/resume if the interface + is up and running. Use alloc_etherdev and pci_request_regions. + Spelling fix. + +2001-04-03 Jeff Garzik + + * tulip_core.c: Remove code that existed when one or more of + the following defines existed. These defines were never used + by normal users in practice: TULIP_FULL_DUPLEX, + TULIP_DEFAULT_MEDIA, and TULIP_NO_MEDIA_SWITCH. + + * tulip.h, eeprom.c: Move EE_* constants from tulip.h to eeprom.c. + * tulip.h, media.c: Move MDIO_* constants from tulip.h to media.c. + + * media.c: Add barrier() to mdio_read/write's PNIC status check + loops. + +2001-04-03 Jeff Garzik + + Merged from Becker's tulip.c 0.92t: + + * tulip.h: Add MEDIA_MASK constant for bounding medianame[] + array lookups. + * eeprom.c, media.c, timer.c, tulip_core.c: Use it. + + * media.c, tulip_core.c: mdio_{read,write} cleanup. Since this + is called [pretty much] directly from ioctl, we mask + read/write arguments to limit the values passed. + Added mii_lock. Added comet_miireg2offset and better + Comet-specific mdio_read/write code. Pay closer attention + to the bits we set in ioctl. Remove spinlocks from ioctl, + they are in mdio_read/write now. Use mask to limit + phy number in tulip_init_one's MII scan. + +2001-04-03 Jeff Garzik + + Merged from Becker's tulip.c 0.92t: + + * 21142.c, tulip_core.c: PNIC2 MAC address and NWay fixes. + * tulip.h: Add FullDuplex constant, used in above change. + +2001-04-03 Jeff Garzik + + * timer.c: Do not call netif_carrier_{on,off}, it is not used in + the main tree. Leave code in, disabled, as markers for future + carrier notification. + +2001-04-03 Jeff Garzik + + Merged from Becker's tulip.c 0.92t, except for the tulip.h + whitespace cleanup: + + * interrupt.c: If Rx stops, make sure to update the + multicast filter before restarting. + * tulip.h: Add COMET_MAC_ADDR feature flag, clean up flags. + Add Accept* Rx mode bit constants. + Add mc_filter[] to driver private struct. + * tulip_core.c: Add new Comet PCI id 0x1113:0x9511. + Add COMET_MAC_ADDR feature flag to comet entry in board info array. + Prefer to test COMET_MAC_ADDR flag to testing chip_id for COMET, + when dealing with the Comet's MAC address. + Enable Tx underrun recovery for Comet chips. + Use new Accept* constants in set_rx_mode. + Prefer COMET_MAC_ADDR flag test to chip_id test in set_rx_mode. + Store built mc_filter for later use in intr handler by Comets. + +2001-04-03 Jeff Garzik + + * tulip_core.c: Use tp->cur_tx when building the + setup frame, instead of assuming that the setup + frame is always built in slot zero. This case is + hit during PM resume. + +2001-04-03 Jeff Garzik + + * *.c: Update file headers (copyright, urls, etc.) + * Makefile: re-order to that chip-specific modules on own line + * eeprom.c: BSS/zero-init cleanup (Andrey Panin) + * tulip_core.c: merge medianame[] update from tulip.c. + Additional arch-specific rx_copybreak, csr0 values. (various) + +2001-02-20 Jeff Garzik + + * media.c (tulip_select_media): No need to initialize + new_csr6, all cases initialize it properly. + +2001-02-18 Manfred Spraul + + * interrupt.c (tulip_refill_rx): Make public. + If PNIC chip stops due to lack of Rx buffers, restart it. + (tulip_interrupt): PNIC doesn't have a h/w timer, emulate + with software timers. + * pnic.c (pnic_check_duplex): New function, PNIC-specific + version of tulip_check_duplex. + (pnic_lnk_change): Call pnic_check_duplex. If we use an + external MII, then we mustn't use the internal negotiation. + (pnic_timer): Support Rx refilling on work overflow in + interrupt handler, as PNIC doesn't support a h/w timer. + * tulip_core.c (tulip_tbl[]): Modify default csr6 + +2001-02-11 Jeff Garzik + + * tulip_core.c (tulip_init_one): Call pci_enable_device + to ensure wakeup/resource assignment before checking those + values. + (tulip_init_one): Replace PCI ids with constants from pci_id.h. + (tulip_suspend, tulip_resume, tulip_remove_one): Call + pci_power_on/off (commented out for now). + +2001-02-10 Jeff Garzik + + * tulip.h: Add CFDD_xxx bits for Tulip power management + * tulip_core.c (tulip_set_power_state): New function, + manipulating Tulip chip power state where supported. + (tulip_up, tulip_down, tulip_init_one): Use it. + +2001-02-10 Jeff Garzik + + * tulip_core.c (tulip_tx_timeout): Call netif_wake_queue + to ensure the next Tx is always sent to us. + +2001-01-27 Jeff Garzik + + * tulip_core.c (tulip_remove_one): Fix mem leak by freeing + tp->media_tbl. Add check for !dev, reformat code appropriately. + +2001-01-27 Jeff Garzik + + * tulip_tbl[]: Comment all entries to make order and chip_id + relationship more clear. + * tulip_pci_tbl[]: Add new Accton PCI id (COMET chipset). + +2001-01-16 Jeff Garzik + + * tulip_core.c: static vars no longer explicitly + initialized to zero. + * eeprom.c (tulip_read_eeprom): Make sure to delay between + EE_ENB and EE_ENB|EE_SHIFT_CLK. Merged from becker tulip.c. + +2001-01-05 Peter De Schrijver + + * eeprom.c (tulip_parse_eeprom): Interpret a bit more of 21142 + extended format type 3 info blocks in a tulip SROM. + +2001-01-03 Matti Aarnio + + * media.c (tulip_select_media): Support media types 5 and 6 + +2001-??-?? ?? + + * tulip_core.c: Add comment about LanMedia needing + a different driver. + Enable workarounds for early PCI chipsets. + Add IA64 csr0 support, update HPPA csr0 support. + +2000-12-17 Alan Cox + + * eeprom.c, timer.c, tulip.h, tulip_core.c: Merge support + for the Davicom's quirks into the main tulip. + Patch by Tobias Ringstrom + +2000-11-08 Jim Studt + + * eeprom.c (tulip_parse_eeprom): Check array bounds for + medianame[] and block_name[] arrays to avoid oops due + to bad values returned from hardware. + +2000-11-02 Jeff Garzik + + * tulip_core.c (set_rx_mode): This is synchronized via + dev->xmit_lock, so only the queueing of the setup frame needs to + be locked, against tulip_interrupt. + +2000-11-02 Alexey Kuznetov + + * timer.c (tulip_timer): Call netif_carrier_{on,off} to report + link state to the rest of the kernel, and userspace. + * interrupt.c (tulip_interrupt): Remove tx_full. + * tulip.h: Likewise. + * tulip_core.c (tulip_init_ring, tulip_start_xmit, set_rx_mode): + Likewise. + +2000-10-18 Jeff Garzik + + * tulip_core.c: (tulip_init_one) Print out ethernet interface + on error. Print out a message when pci_enable_device fails. + Handle DMA alloc failure. + +2000-10-18 Jeff Garzik + + * Makefile: New file. + * tulip_core.c (tulip_init_one): Correct error messages + on PIO/MMIO region reserve failure. + (tulip_init_one) Add new check to ensure that PIO region is + sufficient for our needs. + diff --git a/xen-2.4.16/drivers/net/tulip/Makefile b/xen-2.4.16/drivers/net/tulip/Makefile new file mode 100644 index 0000000000..a96b5b9835 --- /dev/null +++ b/xen-2.4.16/drivers/net/tulip/Makefile @@ -0,0 +1,8 @@ + +include $(BASEDIR)/Rules.mk + +default: $(OBJS) + $(LD) -r -o tulip.o $(OBJS) + +clean: + rm -f *.o *~ core diff --git a/xen-2.4.16/drivers/net/tulip/eeprom.c b/xen-2.4.16/drivers/net/tulip/eeprom.c new file mode 100644 index 0000000000..beb1430cc4 --- /dev/null +++ b/xen-2.4.16/drivers/net/tulip/eeprom.c @@ -0,0 +1,320 @@ +/* + drivers/net/tulip/eeprom.c + + Maintained by Jeff Garzik + Copyright 2000,2001 The Linux Kernel Team + Written/copyright 1994-2001 by Donald Becker. + + This software may be used and distributed according to the terms + of the GNU General Public License, incorporated herein by reference. + + Please refer to Documentation/DocBook/tulip.{pdf,ps,html} + for more information on this driver, or visit the project + Web page at http://sourceforge.net/projects/tulip/ + +*/ + +#include "tulip.h" +#include +#include + + + +/* Serial EEPROM section. */ +/* The main routine to parse the very complicated SROM structure. + Search www.digital.com for "21X4 SROM" to get details. + This code is very complex, and will require changes to support + additional cards, so I'll be verbose about what is going on. + */ + +/* Known cards that have old-style EEPROMs. */ +static struct eeprom_fixup eeprom_fixups[] __devinitdata = { + {"Asante", 0, 0, 0x94, {0x1e00, 0x0000, 0x0800, 0x0100, 0x018c, + 0x0000, 0x0000, 0xe078, 0x0001, 0x0050, 0x0018 }}, + {"SMC9332DST", 0, 0, 0xC0, { 0x1e00, 0x0000, 0x0800, 0x041f, + 0x0000, 0x009E, /* 10baseT */ + 0x0004, 0x009E, /* 10baseT-FD */ + 0x0903, 0x006D, /* 100baseTx */ + 0x0905, 0x006D, /* 100baseTx-FD */ }}, + {"Cogent EM100", 0, 0, 0x92, { 0x1e00, 0x0000, 0x0800, 0x063f, + 0x0107, 0x8021, /* 100baseFx */ + 0x0108, 0x8021, /* 100baseFx-FD */ + 0x0100, 0x009E, /* 10baseT */ + 0x0104, 0x009E, /* 10baseT-FD */ + 0x0103, 0x006D, /* 100baseTx */ + 0x0105, 0x006D, /* 100baseTx-FD */ }}, + {"Maxtech NX-110", 0, 0, 0xE8, { 0x1e00, 0x0000, 0x0800, 0x0513, + 0x1001, 0x009E, /* 10base2, CSR12 0x10*/ + 0x0000, 0x009E, /* 10baseT */ + 0x0004, 0x009E, /* 10baseT-FD */ + 0x0303, 0x006D, /* 100baseTx, CSR12 0x03 */ + 0x0305, 0x006D, /* 100baseTx-FD CSR12 0x03 */}}, + {"Accton EN1207", 0, 0, 0xE8, { 0x1e00, 0x0000, 0x0800, 0x051F, + 0x1B01, 0x0000, /* 10base2, CSR12 0x1B */ + 0x0B00, 0x009E, /* 10baseT, CSR12 0x0B */ + 0x0B04, 0x009E, /* 10baseT-FD,CSR12 0x0B */ + 0x1B03, 0x006D, /* 100baseTx, CSR12 0x1B */ + 0x1B05, 0x006D, /* 100baseTx-FD CSR12 0x1B */ + }}, + {"NetWinder", 0x00, 0x10, 0x57, + /* Default media = MII + * MII block, reset sequence (3) = 0x0821 0x0000 0x0001, capabilities 0x01e1 + */ + { 0x1e00, 0x0000, 0x000b, 0x8f01, 0x0103, 0x0300, 0x0821, 0x000, 0x0001, 0x0000, 0x01e1 } + }, + {0, 0, 0, 0, {}}}; + + +static const char *block_name[] __devinitdata = { + "21140 non-MII", + "21140 MII PHY", + "21142 Serial PHY", + "21142 MII PHY", + "21143 SYM PHY", + "21143 reset method" +}; + + +void __devinit tulip_parse_eeprom(struct net_device *dev) +{ + /* The last media info list parsed, for multiport boards. */ + static struct mediatable *last_mediatable; + static unsigned char *last_ee_data; + static int controller_index; + struct tulip_private *tp = (struct tulip_private *)dev->priv; + unsigned char *ee_data = tp->eeprom; + int i; + + tp->mtable = 0; + /* Detect an old-style (SA only) EEPROM layout: + memcmp(eedata, eedata+16, 8). */ + for (i = 0; i < 8; i ++) + if (ee_data[i] != ee_data[16+i]) + break; + if (i >= 8) { + if (ee_data[0] == 0xff) { + if (last_mediatable) { + controller_index++; + printk(KERN_INFO "%s: Controller %d of multiport board.\n", + dev->name, controller_index); + tp->mtable = last_mediatable; + ee_data = last_ee_data; + goto subsequent_board; + } else + printk(KERN_INFO "%s: Missing EEPROM, this interface may " + "not work correctly!\n", + dev->name); + return; + } + /* Do a fix-up based on the vendor half of the station address prefix. */ + for (i = 0; eeprom_fixups[i].name; i++) { + if (dev->dev_addr[0] == eeprom_fixups[i].addr0 + && dev->dev_addr[1] == eeprom_fixups[i].addr1 + && dev->dev_addr[2] == eeprom_fixups[i].addr2) { + if (dev->dev_addr[2] == 0xE8 && ee_data[0x1a] == 0x55) + i++; /* An Accton EN1207, not an outlaw Maxtech. */ + memcpy(ee_data + 26, eeprom_fixups[i].newtable, + sizeof(eeprom_fixups[i].newtable)); + printk(KERN_INFO "%s: Old format EEPROM on '%s' board. Using" + " substitute media control info.\n", + dev->name, eeprom_fixups[i].name); + break; + } + } + if (eeprom_fixups[i].name == NULL) { /* No fixup found. */ + printk(KERN_INFO "%s: Old style EEPROM with no media selection " + "information.\n", + dev->name); + return; + } + } + + controller_index = 0; + if (ee_data[19] > 1) { /* Multiport board. */ + last_ee_data = ee_data; + } +subsequent_board: + + if (ee_data[27] == 0) { /* No valid media table. */ + } else if (tp->chip_id == DC21041) { + unsigned char *p = (void *)ee_data + ee_data[27 + controller_index*3]; + int media = get_u16(p); + int count = p[2]; + p += 3; + + printk(KERN_INFO "%s: 21041 Media table, default media %4.4x (%s).\n", + dev->name, media, + media & 0x0800 ? "Autosense" : medianame[media & MEDIA_MASK]); + for (i = 0; i < count; i++) { + unsigned char media_block = *p++; + int media_code = media_block & MEDIA_MASK; + if (media_block & 0x40) + p += 6; + printk(KERN_INFO "%s: 21041 media #%d, %s.\n", + dev->name, media_code, medianame[media_code]); + } + } else { + unsigned char *p = (void *)ee_data + ee_data[27]; + unsigned char csr12dir = 0; + int count, new_advertise = 0; + struct mediatable *mtable; + u16 media = get_u16(p); + + p += 2; + if (tp->flags & CSR12_IN_SROM) + csr12dir = *p++; + count = *p++; + + /* there is no phy information, don't even try to build mtable */ + if (count == 0) { + if (tulip_debug > 0) + printk(KERN_WARNING "%s: no phy info, aborting mtable build\n", dev->name); + return; + } + + mtable = (struct mediatable *) + kmalloc(sizeof(struct mediatable) + count*sizeof(struct medialeaf), + GFP_KERNEL); + if (mtable == NULL) + return; /* Horrible, impossible failure. */ + last_mediatable = tp->mtable = mtable; + mtable->defaultmedia = media; + mtable->leafcount = count; + mtable->csr12dir = csr12dir; + mtable->has_nonmii = mtable->has_mii = mtable->has_reset = 0; + mtable->csr15dir = mtable->csr15val = 0; + + printk(KERN_INFO "%s: EEPROM default media type %s.\n", dev->name, + media & 0x0800 ? "Autosense" : medianame[media & MEDIA_MASK]); + for (i = 0; i < count; i++) { + struct medialeaf *leaf = &mtable->mleaf[i]; + + if ((p[0] & 0x80) == 0) { /* 21140 Compact block. */ + leaf->type = 0; + leaf->media = p[0] & 0x3f; + leaf->leafdata = p; + if ((p[2] & 0x61) == 0x01) /* Bogus, but Znyx boards do it. */ + mtable->has_mii = 1; + p += 4; + } else { + leaf->type = p[1]; + if (p[1] == 0x05) { + mtable->has_reset = i; + leaf->media = p[2] & 0x0f; + } else if (tp->chip_id == DM910X && p[1] == 0x80) { + /* Hack to ignore Davicom delay period block */ + mtable->leafcount--; + count--; + i--; + leaf->leafdata = p + 2; + p += (p[0] & 0x3f) + 1; + continue; + } else if (p[1] & 1) { + int gpr_len, reset_len; + + mtable->has_mii = 1; + leaf->media = 11; + gpr_len=p[3]*2; + reset_len=p[4+gpr_len]*2; + new_advertise |= get_u16(&p[7+gpr_len+reset_len]); + } else { + mtable->has_nonmii = 1; + leaf->media = p[2] & MEDIA_MASK; + /* Davicom's media number for 100BaseTX is strange */ + if (tp->chip_id == DM910X && leaf->media == 1) + leaf->media = 3; + switch (leaf->media) { + case 0: new_advertise |= 0x0020; break; + case 4: new_advertise |= 0x0040; break; + case 3: new_advertise |= 0x0080; break; + case 5: new_advertise |= 0x0100; break; + case 6: new_advertise |= 0x0200; break; + } + if (p[1] == 2 && leaf->media == 0) { + if (p[2] & 0x40) { + u32 base15 = get_unaligned((u16*)&p[7]); + mtable->csr15dir = + (get_unaligned((u16*)&p[9])<<16) + base15; + mtable->csr15val = + (get_unaligned((u16*)&p[11])<<16) + base15; + } else { + mtable->csr15dir = get_unaligned((u16*)&p[3])<<16; + mtable->csr15val = get_unaligned((u16*)&p[5])<<16; + } + } + } + leaf->leafdata = p + 2; + p += (p[0] & 0x3f) + 1; + } + if (tulip_debug > 1 && leaf->media == 11) { + unsigned char *bp = leaf->leafdata; + printk(KERN_INFO "%s: MII interface PHY %d, setup/reset " + "sequences %d/%d long, capabilities %2.2x %2.2x.\n", + dev->name, bp[0], bp[1], bp[2 + bp[1]*2], + bp[5 + bp[2 + bp[1]*2]*2], bp[4 + bp[2 + bp[1]*2]*2]); + } + printk(KERN_INFO "%s: Index #%d - Media %s (#%d) described " + "by a %s (%d) block.\n", + dev->name, i, medianame[leaf->media & 15], leaf->media, + leaf->type < ARRAY_SIZE(block_name) ? block_name[leaf->type] : "", + leaf->type); + } + if (new_advertise) + tp->sym_advertise = new_advertise; + } +} +/* Reading a serial EEPROM is a "bit" grungy, but we work our way through:->.*/ + +/* EEPROM_Ctrl bits. */ +#define EE_SHIFT_CLK 0x02 /* EEPROM shift clock. */ +#define EE_CS 0x01 /* EEPROM chip select. */ +#define EE_DATA_WRITE 0x04 /* Data from the Tulip to EEPROM. */ +#define EE_WRITE_0 0x01 +#define EE_WRITE_1 0x05 +#define EE_DATA_READ 0x08 /* Data from the EEPROM chip. */ +#define EE_ENB (0x4800 | EE_CS) + +/* Delay between EEPROM clock transitions. + Even at 33Mhz current PCI implementations don't overrun the EEPROM clock. + We add a bus turn-around to insure that this remains true. */ +#define eeprom_delay() inl(ee_addr) + +/* The EEPROM commands include the alway-set leading bit. */ +#define EE_READ_CMD (6) + +/* Note: this routine returns extra data bits for size detection. */ +int __devinit tulip_read_eeprom(long ioaddr, int location, int addr_len) +{ + int i; + unsigned retval = 0; + long ee_addr = ioaddr + CSR9; + int read_cmd = location | (EE_READ_CMD << addr_len); + + outl(EE_ENB & ~EE_CS, ee_addr); + outl(EE_ENB, ee_addr); + + /* Shift the read command bits out. */ + for (i = 4 + addr_len; i >= 0; i--) { + short dataval = (read_cmd & (1 << i)) ? EE_DATA_WRITE : 0; + outl(EE_ENB | dataval, ee_addr); + eeprom_delay(); + outl(EE_ENB | dataval | EE_SHIFT_CLK, ee_addr); + eeprom_delay(); + retval = (retval << 1) | ((inl(ee_addr) & EE_DATA_READ) ? 1 : 0); + } + outl(EE_ENB, ee_addr); + eeprom_delay(); + + for (i = 16; i > 0; i--) { + outl(EE_ENB | EE_SHIFT_CLK, ee_addr); + eeprom_delay(); + retval = (retval << 1) | ((inl(ee_addr) & EE_DATA_READ) ? 1 : 0); + outl(EE_ENB, ee_addr); + eeprom_delay(); + } + + /* Terminate the EEPROM access. */ + outl(EE_ENB & ~EE_CS, ee_addr); + return retval; +} + diff --git a/xen-2.4.16/drivers/net/tulip/interrupt.c b/xen-2.4.16/drivers/net/tulip/interrupt.c new file mode 100644 index 0000000000..c92b12ea92 --- /dev/null +++ b/xen-2.4.16/drivers/net/tulip/interrupt.c @@ -0,0 +1,559 @@ +/* + drivers/net/tulip/interrupt.c + + Maintained by Jeff Garzik + Copyright 2000,2001 The Linux Kernel Team + Written/copyright 1994-2001 by Donald Becker. + + This software may be used and distributed according to the terms + of the GNU General Public License, incorporated herein by reference. + + Please refer to Documentation/DocBook/tulip.{pdf,ps,html} + for more information on this driver, or visit the project + Web page at http://sourceforge.net/projects/tulip/ + +*/ + +#include "tulip.h" +#include +#include +#include + + +int tulip_rx_copybreak; +unsigned int tulip_max_interrupt_work; + +#ifdef CONFIG_NET_HW_FLOWCONTROL + +#define MIT_SIZE 15 +unsigned int mit_table[MIT_SIZE+1] = +{ + /* CRS11 21143 hardware Mitigation Control Interrupt + We use only RX mitigation we other techniques for + TX intr. mitigation. + + 31 Cycle Size (timer control) + 30:27 TX timer in 16 * Cycle size + 26:24 TX No pkts before Int. + 23:20 RX timer in Cycle size + 19:17 RX No pkts before Int. + 16 Continues Mode (CM) + */ + + 0x0, /* IM disabled */ + 0x80150000, /* RX time = 1, RX pkts = 2, CM = 1 */ + 0x80150000, + 0x80270000, + 0x80370000, + 0x80490000, + 0x80590000, + 0x80690000, + 0x807B0000, + 0x808B0000, + 0x809D0000, + 0x80AD0000, + 0x80BD0000, + 0x80CF0000, + 0x80DF0000, +// 0x80FF0000 /* RX time = 16, RX pkts = 7, CM = 1 */ + 0x80F10000 /* RX time = 16, RX pkts = 0, CM = 1 */ +}; +#endif + + +int tulip_refill_rx(struct net_device *dev) +{ + struct tulip_private *tp = (struct tulip_private *)dev->priv; + int entry; + int refilled = 0; + + /* Refill the Rx ring buffers. */ + for (; tp->cur_rx - tp->dirty_rx > 0; tp->dirty_rx++) { + entry = tp->dirty_rx % RX_RING_SIZE; + if (tp->rx_buffers[entry].skb == NULL) { + struct sk_buff *skb; + dma_addr_t mapping; + + skb = tp->rx_buffers[entry].skb = dev_alloc_skb(PKT_BUF_SZ); + if (skb == NULL) + break; + + mapping = pci_map_single(tp->pdev, skb->tail, PKT_BUF_SZ, + PCI_DMA_FROMDEVICE); + tp->rx_buffers[entry].mapping = mapping; + + skb->dev = dev; /* Mark as being used by this device. */ + tp->rx_ring[entry].buffer1 = cpu_to_le32(mapping); + refilled++; + } + tp->rx_ring[entry].status = cpu_to_le32(DescOwned); + } + if(tp->chip_id == LC82C168) { + if(((inl(dev->base_addr + CSR5)>>17)&0x07) == 4) { + /* Rx stopped due to out of buffers, + * restart it + */ + outl(0x01, dev->base_addr + CSR2); + } + } + return refilled; +} + + +static int tulip_rx(struct net_device *dev) +{ + struct tulip_private *tp = (struct tulip_private *)dev->priv; + int entry = tp->cur_rx % RX_RING_SIZE; + int rx_work_limit = tp->dirty_rx + RX_RING_SIZE - tp->cur_rx; + int received = 0; + +#ifdef CONFIG_NET_HW_FLOWCONTROL + int drop = 0, mit_sel = 0; + +/* that one buffer is needed for mit activation; or might be a + bug in the ring buffer code; check later -- JHS*/ + + if (rx_work_limit >=RX_RING_SIZE) rx_work_limit--; +#endif + + if (tulip_debug > 4) + printk(KERN_DEBUG " In tulip_rx(), entry %d %8.8x.\n", entry, + tp->rx_ring[entry].status); + /* If we own the next entry, it is a new packet. Send it up. */ + while ( ! (tp->rx_ring[entry].status & cpu_to_le32(DescOwned))) { + s32 status = le32_to_cpu(tp->rx_ring[entry].status); + + if (tulip_debug > 5) + printk(KERN_DEBUG "%s: In tulip_rx(), entry %d %8.8x.\n", + dev->name, entry, status); + if (--rx_work_limit < 0) + break; + if ((status & 0x38008300) != 0x0300) { + if ((status & 0x38000300) != 0x0300) { + /* Ingore earlier buffers. */ + if ((status & 0xffff) != 0x7fff) { + if (tulip_debug > 1) + printk(KERN_WARNING "%s: Oversized Ethernet frame " + "spanned multiple buffers, status %8.8x!\n", + dev->name, status); + tp->stats.rx_length_errors++; + } + } else if (status & RxDescFatalErr) { + /* There was a fatal error. */ + if (tulip_debug > 2) + printk(KERN_DEBUG "%s: Receive error, Rx status %8.8x.\n", + dev->name, status); + tp->stats.rx_errors++; /* end of a packet.*/ + if (status & 0x0890) tp->stats.rx_length_errors++; + if (status & 0x0004) tp->stats.rx_frame_errors++; + if (status & 0x0002) tp->stats.rx_crc_errors++; + if (status & 0x0001) tp->stats.rx_fifo_errors++; + } + } else { + /* Omit the four octet CRC from the length. */ + short pkt_len = ((status >> 16) & 0x7ff) - 4; + struct sk_buff *skb; + +#ifndef final_version + if (pkt_len > 1518) { + printk(KERN_WARNING "%s: Bogus packet size of %d (%#x).\n", + dev->name, pkt_len, pkt_len); + pkt_len = 1518; + tp->stats.rx_length_errors++; + } +#endif + +#ifdef CONFIG_NET_HW_FLOWCONTROL + drop = atomic_read(&netdev_dropping); + if (drop) + goto throttle; +#endif + /* Check if the packet is long enough to accept without copying + to a minimally-sized skbuff. */ + if (pkt_len < tulip_rx_copybreak + && (skb = dev_alloc_skb(pkt_len + 2)) != NULL) { + skb->dev = dev; + skb_reserve(skb, 2); /* 16 byte align the IP header */ + pci_dma_sync_single(tp->pdev, + tp->rx_buffers[entry].mapping, + pkt_len, PCI_DMA_FROMDEVICE); +#if ! defined(__alpha__) + eth_copy_and_sum(skb, tp->rx_buffers[entry].skb->tail, + pkt_len, 0); + skb_put(skb, pkt_len); +#else + memcpy(skb_put(skb, pkt_len), + tp->rx_buffers[entry].skb->tail, + pkt_len); +#endif + } else { /* Pass up the skb already on the Rx ring. */ + char *temp = skb_put(skb = tp->rx_buffers[entry].skb, + pkt_len); + +#ifndef final_version + if (tp->rx_buffers[entry].mapping != + le32_to_cpu(tp->rx_ring[entry].buffer1)) { + printk(KERN_ERR "%s: Internal fault: The skbuff addresses " + "do not match in tulip_rx: %08x vs. %08x %p / %p.\n", + dev->name, + le32_to_cpu(tp->rx_ring[entry].buffer1), + tp->rx_buffers[entry].mapping, + skb->head, temp); + } +#endif + + pci_unmap_single(tp->pdev, tp->rx_buffers[entry].mapping, + PKT_BUF_SZ, PCI_DMA_FROMDEVICE); + + tp->rx_buffers[entry].skb = NULL; + tp->rx_buffers[entry].mapping = 0; + } + skb->protocol = eth_type_trans(skb, dev); +#ifdef CONFIG_NET_HW_FLOWCONTROL + mit_sel = +#endif + netif_rx(skb); + +#ifdef CONFIG_NET_HW_FLOWCONTROL + switch (mit_sel) { + case NET_RX_SUCCESS: + case NET_RX_CN_LOW: + case NET_RX_CN_MOD: + break; + + case NET_RX_CN_HIGH: + rx_work_limit -= NET_RX_CN_HIGH; /* additional*/ + break; + case NET_RX_DROP: + rx_work_limit = -1; + break; + default: + printk("unknown feedback return code %d\n", mit_sel); + break; + } + + drop = atomic_read(&netdev_dropping); + if (drop) { +throttle: + rx_work_limit = -1; + mit_sel = NET_RX_DROP; + + if (tp->fc_bit) { + long ioaddr = dev->base_addr; + + /* disable Rx & RxNoBuf ints. */ + outl(tulip_tbl[tp->chip_id].valid_intrs&RX_A_NBF_STOP, ioaddr + CSR7); + set_bit(tp->fc_bit, &netdev_fc_xoff); + } + } +#endif + dev->last_rx = jiffies; + tp->stats.rx_packets++; + tp->stats.rx_bytes += pkt_len; + } + received++; + entry = (++tp->cur_rx) % RX_RING_SIZE; + } +#ifdef CONFIG_NET_HW_FLOWCONTROL + + /* We use this simplistic scheme for IM. It's proven by + real life installations. We can have IM enabled + continuesly but this would cause unnecessary latency. + Unfortunely we can't use all the NET_RX_* feedback here. + This would turn on IM for devices that is not contributing + to backlog congestion with unnecessary latency. + + We monitor the the device RX-ring and have: + + HW Interrupt Mitigation either ON or OFF. + + ON: More then 1 pkt received (per intr.) OR we are dropping + OFF: Only 1 pkt received + + Note. We only use min and max (0, 15) settings from mit_table */ + + + if( tp->flags & HAS_INTR_MITIGATION) { + if((received > 1 || mit_sel == NET_RX_DROP) + && tp->mit_sel != 15 ) { + tp->mit_sel = 15; + tp->mit_change = 1; /* Force IM change */ + } + if((received <= 1 && mit_sel != NET_RX_DROP) && tp->mit_sel != 0 ) { + tp->mit_sel = 0; + tp->mit_change = 1; /* Force IM change */ + } + } + + return RX_RING_SIZE+1; /* maxrx+1 */ +#else + return received; +#endif +} + + +/* The interrupt handler does all of the Rx thread work and cleans up + after the Tx thread. */ +void tulip_interrupt(int irq, void *dev_instance, struct pt_regs *regs) +{ + struct net_device *dev = (struct net_device *)dev_instance; + struct tulip_private *tp = (struct tulip_private *)dev->priv; + long ioaddr = dev->base_addr; + int csr5; + int entry; + int missed; + int rx = 0; + int tx = 0; + int oi = 0; + int maxrx = RX_RING_SIZE; + int maxtx = TX_RING_SIZE; + int maxoi = TX_RING_SIZE; + unsigned int work_count = tulip_max_interrupt_work; + + /* Let's see whether the interrupt really is for us */ + csr5 = inl(ioaddr + CSR5); + + if ((csr5 & (NormalIntr|AbnormalIntr)) == 0) + return; + + tp->nir++; + + do { + /* Acknowledge all of the current interrupt sources ASAP. */ + outl(csr5 & 0x0001ffff, ioaddr + CSR5); + + if (tulip_debug > 4) + printk(KERN_DEBUG "%s: interrupt csr5=%#8.8x new csr5=%#8.8x.\n", + dev->name, csr5, inl(dev->base_addr + CSR5)); + + if (csr5 & (RxIntr | RxNoBuf)) { +#ifdef CONFIG_NET_HW_FLOWCONTROL + if ((!tp->fc_bit) || + (!test_bit(tp->fc_bit, &netdev_fc_xoff))) +#endif + rx += tulip_rx(dev); + tulip_refill_rx(dev); + } + + if (csr5 & (TxNoBuf | TxDied | TxIntr | TimerInt)) { + unsigned int dirty_tx; + + spin_lock(&tp->lock); + + for (dirty_tx = tp->dirty_tx; tp->cur_tx - dirty_tx > 0; + dirty_tx++) { + int entry = dirty_tx % TX_RING_SIZE; + int status = le32_to_cpu(tp->tx_ring[entry].status); + + if (status < 0) + break; /* It still has not been Txed */ + + /* Check for Rx filter setup frames. */ + if (tp->tx_buffers[entry].skb == NULL) { + /* test because dummy frames not mapped */ + if (tp->tx_buffers[entry].mapping) + pci_unmap_single(tp->pdev, + tp->tx_buffers[entry].mapping, + sizeof(tp->setup_frame), + PCI_DMA_TODEVICE); + continue; + } + + if (status & 0x8000) { + /* There was an major error, log it. */ +#ifndef final_version + if (tulip_debug > 1) + printk(KERN_DEBUG "%s: Transmit error, Tx status %8.8x.\n", + dev->name, status); +#endif + tp->stats.tx_errors++; + if (status & 0x4104) tp->stats.tx_aborted_errors++; + if (status & 0x0C00) tp->stats.tx_carrier_errors++; + if (status & 0x0200) tp->stats.tx_window_errors++; + if (status & 0x0002) tp->stats.tx_fifo_errors++; + if ((status & 0x0080) && tp->full_duplex == 0) + tp->stats.tx_heartbeat_errors++; + } else { + tp->stats.tx_bytes += + tp->tx_buffers[entry].skb->len; + tp->stats.collisions += (status >> 3) & 15; + tp->stats.tx_packets++; + } + + pci_unmap_single(tp->pdev, tp->tx_buffers[entry].mapping, + tp->tx_buffers[entry].skb->len, + PCI_DMA_TODEVICE); + + /* Free the original skb. */ + dev_kfree_skb_irq(tp->tx_buffers[entry].skb); + tp->tx_buffers[entry].skb = NULL; + tp->tx_buffers[entry].mapping = 0; + tx++; + } + +#ifndef final_version + if (tp->cur_tx - dirty_tx > TX_RING_SIZE) { + printk(KERN_ERR "%s: Out-of-sync dirty pointer, %d vs. %d.\n", + dev->name, dirty_tx, tp->cur_tx); + dirty_tx += TX_RING_SIZE; + } +#endif + + if (tp->cur_tx - dirty_tx < TX_RING_SIZE - 2) + netif_wake_queue(dev); + + tp->dirty_tx = dirty_tx; + if (csr5 & TxDied) { + if (tulip_debug > 2) + printk(KERN_WARNING "%s: The transmitter stopped." + " CSR5 is %x, CSR6 %x, new CSR6 %x.\n", + dev->name, csr5, inl(ioaddr + CSR6), tp->csr6); + tulip_restart_rxtx(tp); + } + spin_unlock(&tp->lock); + } + + /* Log errors. */ + if (csr5 & AbnormalIntr) { /* Abnormal error summary bit. */ + if (csr5 == 0xffffffff) + break; + if (csr5 & TxJabber) tp->stats.tx_errors++; + if (csr5 & TxFIFOUnderflow) { + if ((tp->csr6 & 0xC000) != 0xC000) + tp->csr6 += 0x4000; /* Bump up the Tx threshold */ + else + tp->csr6 |= 0x00200000; /* Store-n-forward. */ + /* Restart the transmit process. */ + tulip_restart_rxtx(tp); + outl(0, ioaddr + CSR1); + } + if (csr5 & (RxDied | RxNoBuf)) { + if (tp->flags & COMET_MAC_ADDR) { + outl(tp->mc_filter[0], ioaddr + 0xAC); + outl(tp->mc_filter[1], ioaddr + 0xB0); + } + } + if (csr5 & RxDied) { /* Missed a Rx frame. */ + tp->stats.rx_missed_errors += inl(ioaddr + CSR8) & 0xffff; +#ifdef CONFIG_NET_HW_FLOWCONTROL + if (tp->fc_bit && !test_bit(tp->fc_bit, &netdev_fc_xoff)) { + tp->stats.rx_errors++; + tulip_start_rxtx(tp); + } +#else + tp->stats.rx_errors++; + tulip_start_rxtx(tp); +#endif + } + /* + * NB: t21142_lnk_change() does a del_timer_sync(), so be careful if this + * call is ever done under the spinlock + */ + if (csr5 & (TPLnkPass | TPLnkFail | 0x08000000)) { + if (tp->link_change) + (tp->link_change)(dev, csr5); + } + if (csr5 & SytemError) { + int error = (csr5 >> 23) & 7; + /* oops, we hit a PCI error. The code produced corresponds + * to the reason: + * 0 - parity error + * 1 - master abort + * 2 - target abort + * Note that on parity error, we should do a software reset + * of the chip to get it back into a sane state (according + * to the 21142/3 docs that is). + * -- rmk + */ + printk(KERN_ERR "%s: (%lu) System Error occured (%d)\n", + dev->name, tp->nir, error); + } + /* Clear all error sources, included undocumented ones! */ + outl(0x0800f7ba, ioaddr + CSR5); + oi++; + } + if (csr5 & TimerInt) { + + if (tulip_debug > 2) + printk(KERN_ERR "%s: Re-enabling interrupts, %8.8x.\n", + dev->name, csr5); +#ifdef CONFIG_NET_HW_FLOWCONTROL + if (tp->fc_bit && (test_bit(tp->fc_bit, &netdev_fc_xoff))) + if (net_ratelimit()) printk("BUG!! enabling interupt when FC off (timerintr.) \n"); +#endif + outl(tulip_tbl[tp->chip_id].valid_intrs, ioaddr + CSR7); + tp->ttimer = 0; + oi++; + } + if (tx > maxtx || rx > maxrx || oi > maxoi) { + if (tulip_debug > 1) + printk(KERN_WARNING "%s: Too much work during an interrupt, " + "csr5=0x%8.8x. (%lu) (%d,%d,%d)\n", dev->name, csr5, tp->nir, tx, rx, oi); + + /* Acknowledge all interrupt sources. */ + outl(0x8001ffff, ioaddr + CSR5); + if (tp->flags & HAS_INTR_MITIGATION) { +#ifdef CONFIG_NET_HW_FLOWCONTROL + if(tp->mit_change) { + outl(mit_table[tp->mit_sel], ioaddr + CSR11); + tp->mit_change = 0; + } +#else + /* Josip Loncaric at ICASE did extensive experimentation + to develop a good interrupt mitigation setting.*/ + outl(0x8b240000, ioaddr + CSR11); +#endif + } else if (tp->chip_id == LC82C168) { + /* the LC82C168 doesn't have a hw timer.*/ + outl(0x00, ioaddr + CSR7); + mod_timer(&tp->timer, RUN_AT(HZ/50)); + } else { + /* Mask all interrupting sources, set timer to + re-enable. */ +#ifndef CONFIG_NET_HW_FLOWCONTROL + outl(((~csr5) & 0x0001ebef) | AbnormalIntr | TimerInt, ioaddr + CSR7); + outl(0x0012, ioaddr + CSR11); +#endif + } + break; + } + + work_count--; + if (work_count == 0) + break; + + csr5 = inl(ioaddr + CSR5); + } while ((csr5 & (NormalIntr|AbnormalIntr)) != 0); + + tulip_refill_rx(dev); + + /* check if the card is in suspend mode */ + entry = tp->dirty_rx % RX_RING_SIZE; + if (tp->rx_buffers[entry].skb == NULL) { + if (tulip_debug > 1) + printk(KERN_WARNING "%s: in rx suspend mode: (%lu) (tp->cur_rx = %u, ttimer = %d, rx = %d) go/stay in suspend mode\n", dev->name, tp->nir, tp->cur_rx, tp->ttimer, rx); + if (tp->chip_id == LC82C168) { + outl(0x00, ioaddr + CSR7); + mod_timer(&tp->timer, RUN_AT(HZ/50)); + } else { + if (tp->ttimer == 0 || (inl(ioaddr + CSR11) & 0xffff) == 0) { + if (tulip_debug > 1) + printk(KERN_WARNING "%s: in rx suspend mode: (%lu) set timer\n", dev->name, tp->nir); + outl(tulip_tbl[tp->chip_id].valid_intrs | TimerInt, + ioaddr + CSR7); + outl(TimerInt, ioaddr + CSR5); + outl(12, ioaddr + CSR11); + tp->ttimer = 1; + } + } + } + + if ((missed = inl(ioaddr + CSR8) & 0x1ffff)) { + tp->stats.rx_dropped += missed & 0x10000 ? 0x10000 : missed; + } + + if (tulip_debug > 4) + printk(KERN_DEBUG "%s: exiting interrupt, csr5=%#4.4x.\n", + dev->name, inl(ioaddr + CSR5)); + +} diff --git a/xen-2.4.16/drivers/net/tulip/media.c b/xen-2.4.16/drivers/net/tulip/media.c new file mode 100644 index 0000000000..e0bb197dc5 --- /dev/null +++ b/xen-2.4.16/drivers/net/tulip/media.c @@ -0,0 +1,563 @@ +/* + drivers/net/tulip/media.c + + Maintained by Jeff Garzik + Copyright 2000,2001 The Linux Kernel Team + Written/copyright 1994-2001 by Donald Becker. + + This software may be used and distributed according to the terms + of the GNU General Public License, incorporated herein by reference. + + Please refer to Documentation/DocBook/tulip.{pdf,ps,html} + for more information on this driver, or visit the project + Web page at http://sourceforge.net/projects/tulip/ + +*/ + +//#include +#include +#include +#include +#include "tulip.h" + + +/* This is a mysterious value that can be written to CSR11 in the 21040 (only) + to support a pre-NWay full-duplex signaling mechanism using short frames. + No one knows what it should be, but if left at its default value some + 10base2(!) packets trigger a full-duplex-request interrupt. */ +#define FULL_DUPLEX_MAGIC 0x6969 + +/* The maximum data clock rate is 2.5 Mhz. The minimum timing is usually + met by back-to-back PCI I/O cycles, but we insert a delay to avoid + "overclocking" issues or future 66Mhz PCI. */ +#define mdio_delay() inl(mdio_addr) + +/* Read and write the MII registers using software-generated serial + MDIO protocol. It is just different enough from the EEPROM protocol + to not share code. The maxium data clock rate is 2.5 Mhz. */ +#define MDIO_SHIFT_CLK 0x10000 +#define MDIO_DATA_WRITE0 0x00000 +#define MDIO_DATA_WRITE1 0x20000 +#define MDIO_ENB 0x00000 /* Ignore the 0x02000 databook setting. */ +#define MDIO_ENB_IN 0x40000 +#define MDIO_DATA_READ 0x80000 + +static const unsigned char comet_miireg2offset[32] = { + 0xB4, 0xB8, 0xBC, 0xC0, 0xC4, 0xC8, 0xCC, 0, 0,0,0,0, 0,0,0,0, + 0,0xD0,0,0, 0,0,0,0, 0,0,0,0, 0, 0xD4, 0xD8, 0xDC, }; + + +/* MII transceiver control section. + Read and write the MII registers using software-generated serial + MDIO protocol. See the MII specifications or DP83840A data sheet + for details. */ + +int tulip_mdio_read(struct net_device *dev, int phy_id, int location) +{ + struct tulip_private *tp = (struct tulip_private *)dev->priv; + int i; + int read_cmd = (0xf6 << 10) | ((phy_id & 0x1f) << 5) | location; + int retval = 0; + long ioaddr = dev->base_addr; + long mdio_addr = ioaddr + CSR9; + unsigned long flags; + + if (location & ~0x1f) + return 0xffff; + + if (tp->chip_id == COMET && phy_id == 30) { + if (comet_miireg2offset[location]) + return inl(ioaddr + comet_miireg2offset[location]); + return 0xffff; + } + + spin_lock_irqsave(&tp->mii_lock, flags); + if (tp->chip_id == LC82C168) { + int i = 1000; + outl(0x60020000 + (phy_id<<23) + (location<<18), ioaddr + 0xA0); + inl(ioaddr + 0xA0); + inl(ioaddr + 0xA0); + while (--i > 0) { + barrier(); + if ( ! ((retval = inl(ioaddr + 0xA0)) & 0x80000000)) + break; + } + spin_unlock_irqrestore(&tp->mii_lock, flags); + return retval & 0xffff; + } + + /* Establish sync by sending at least 32 logic ones. */ + for (i = 32; i >= 0; i--) { + outl(MDIO_ENB | MDIO_DATA_WRITE1, mdio_addr); + mdio_delay(); + outl(MDIO_ENB | MDIO_DATA_WRITE1 | MDIO_SHIFT_CLK, mdio_addr); + mdio_delay(); + } + /* Shift the read command bits out. */ + for (i = 15; i >= 0; i--) { + int dataval = (read_cmd & (1 << i)) ? MDIO_DATA_WRITE1 : 0; + + outl(MDIO_ENB | dataval, mdio_addr); + mdio_delay(); + outl(MDIO_ENB | dataval | MDIO_SHIFT_CLK, mdio_addr); + mdio_delay(); + } + /* Read the two transition, 16 data, and wire-idle bits. */ + for (i = 19; i > 0; i--) { + outl(MDIO_ENB_IN, mdio_addr); + mdio_delay(); + retval = (retval << 1) | ((inl(mdio_addr) & MDIO_DATA_READ) ? 1 : 0); + outl(MDIO_ENB_IN | MDIO_SHIFT_CLK, mdio_addr); + mdio_delay(); + } + + spin_unlock_irqrestore(&tp->mii_lock, flags); + return (retval>>1) & 0xffff; +} + +void tulip_mdio_write(struct net_device *dev, int phy_id, int location, int val) +{ + struct tulip_private *tp = (struct tulip_private *)dev->priv; + int i; + int cmd = (0x5002 << 16) | ((phy_id & 0x1f) << 23) | (location<<18) | (val & 0xffff); + long ioaddr = dev->base_addr; + long mdio_addr = ioaddr + CSR9; + unsigned long flags; + + if (location & ~0x1f) + return; + + if (tp->chip_id == COMET && phy_id == 30) { + if (comet_miireg2offset[location]) + outl(val, ioaddr + comet_miireg2offset[location]); + return; + } + + spin_lock_irqsave(&tp->mii_lock, flags); + if (tp->chip_id == LC82C168) { + int i = 1000; + outl(cmd, ioaddr + 0xA0); + do { + barrier(); + if ( ! (inl(ioaddr + 0xA0) & 0x80000000)) + break; + } while (--i > 0); + spin_unlock_irqrestore(&tp->mii_lock, flags); + return; + } + + /* Establish sync by sending 32 logic ones. */ + for (i = 32; i >= 0; i--) { + outl(MDIO_ENB | MDIO_DATA_WRITE1, mdio_addr); + mdio_delay(); + outl(MDIO_ENB | MDIO_DATA_WRITE1 | MDIO_SHIFT_CLK, mdio_addr); + mdio_delay(); + } + /* Shift the command bits out. */ + for (i = 31; i >= 0; i--) { + int dataval = (cmd & (1 << i)) ? MDIO_DATA_WRITE1 : 0; + outl(MDIO_ENB | dataval, mdio_addr); + mdio_delay(); + outl(MDIO_ENB | dataval | MDIO_SHIFT_CLK, mdio_addr); + mdio_delay(); + } + /* Clear out extra bits. */ + for (i = 2; i > 0; i--) { + outl(MDIO_ENB_IN, mdio_addr); + mdio_delay(); + outl(MDIO_ENB_IN | MDIO_SHIFT_CLK, mdio_addr); + mdio_delay(); + } + + spin_unlock_irqrestore(&tp->mii_lock, flags); +} + + +/* Set up the transceiver control registers for the selected media type. */ +void tulip_select_media(struct net_device *dev, int startup) +{ + long ioaddr = dev->base_addr; + struct tulip_private *tp = (struct tulip_private *)dev->priv; + struct mediatable *mtable = tp->mtable; + u32 new_csr6; + int i; + + if (mtable) { + struct medialeaf *mleaf = &mtable->mleaf[tp->cur_index]; + unsigned char *p = mleaf->leafdata; + switch (mleaf->type) { + case 0: /* 21140 non-MII xcvr. */ + if (tulip_debug > 1) + printk(KERN_DEBUG "%s: Using a 21140 non-MII transceiver" + " with control setting %2.2x.\n", + dev->name, p[1]); + dev->if_port = p[0]; + if (startup) + outl(mtable->csr12dir | 0x100, ioaddr + CSR12); + outl(p[1], ioaddr + CSR12); + new_csr6 = 0x02000000 | ((p[2] & 0x71) << 18); + break; + case 2: case 4: { + u16 setup[5]; + u32 csr13val, csr14val, csr15dir, csr15val; + for (i = 0; i < 5; i++) + setup[i] = get_u16(&p[i*2 + 1]); + + dev->if_port = p[0] & MEDIA_MASK; + if (tulip_media_cap[dev->if_port] & MediaAlwaysFD) + tp->full_duplex = 1; + + if (startup && mtable->has_reset) { + struct medialeaf *rleaf = &mtable->mleaf[mtable->has_reset]; + unsigned char *rst = rleaf->leafdata; + if (tulip_debug > 1) + printk(KERN_DEBUG "%s: Resetting the transceiver.\n", + dev->name); + for (i = 0; i < rst[0]; i++) + outl(get_u16(rst + 1 + (i<<1)) << 16, ioaddr + CSR15); + } + if (tulip_debug > 1) + printk(KERN_DEBUG "%s: 21143 non-MII %s transceiver control " + "%4.4x/%4.4x.\n", + dev->name, medianame[dev->if_port], setup[0], setup[1]); + if (p[0] & 0x40) { /* SIA (CSR13-15) setup values are provided. */ + csr13val = setup[0]; + csr14val = setup[1]; + csr15dir = (setup[3]<<16) | setup[2]; + csr15val = (setup[4]<<16) | setup[2]; + outl(0, ioaddr + CSR13); + outl(csr14val, ioaddr + CSR14); + outl(csr15dir, ioaddr + CSR15); /* Direction */ + outl(csr15val, ioaddr + CSR15); /* Data */ + outl(csr13val, ioaddr + CSR13); + } else { + csr13val = 1; + csr14val = 0; + csr15dir = (setup[0]<<16) | 0x0008; + csr15val = (setup[1]<<16) | 0x0008; + if (dev->if_port <= 4) + csr14val = t21142_csr14[dev->if_port]; + if (startup) { + outl(0, ioaddr + CSR13); + outl(csr14val, ioaddr + CSR14); + } + outl(csr15dir, ioaddr + CSR15); /* Direction */ + outl(csr15val, ioaddr + CSR15); /* Data */ + if (startup) outl(csr13val, ioaddr + CSR13); + } + if (tulip_debug > 1) + printk(KERN_DEBUG "%s: Setting CSR15 to %8.8x/%8.8x.\n", + dev->name, csr15dir, csr15val); + if (mleaf->type == 4) + new_csr6 = 0x82020000 | ((setup[2] & 0x71) << 18); + else + new_csr6 = 0x82420000; + break; + } + case 1: case 3: { + int phy_num = p[0]; + int init_length = p[1]; + u16 *misc_info, tmp_info; + + dev->if_port = 11; + new_csr6 = 0x020E0000; + if (mleaf->type == 3) { /* 21142 */ + u16 *init_sequence = (u16*)(p+2); + u16 *reset_sequence = &((u16*)(p+3))[init_length]; + int reset_length = p[2 + init_length*2]; + misc_info = reset_sequence + reset_length; + if (startup) + for (i = 0; i < reset_length; i++) + outl(get_u16(&reset_sequence[i]) << 16, ioaddr + CSR15); + for (i = 0; i < init_length; i++) + outl(get_u16(&init_sequence[i]) << 16, ioaddr + CSR15); + } else { + u8 *init_sequence = p + 2; + u8 *reset_sequence = p + 3 + init_length; + int reset_length = p[2 + init_length]; + misc_info = (u16*)(reset_sequence + reset_length); + if (startup) { + outl(mtable->csr12dir | 0x100, ioaddr + CSR12); + for (i = 0; i < reset_length; i++) + outl(reset_sequence[i], ioaddr + CSR12); + } + for (i = 0; i < init_length; i++) + outl(init_sequence[i], ioaddr + CSR12); + } + tmp_info = get_u16(&misc_info[1]); + if (tmp_info) + tp->advertising[phy_num] = tmp_info | 1; + if (tmp_info && startup < 2) { + if (tp->mii_advertise == 0) + tp->mii_advertise = tp->advertising[phy_num]; + if (tulip_debug > 1) + printk(KERN_DEBUG "%s: Advertising %4.4x on MII %d.\n", + dev->name, tp->mii_advertise, tp->phys[phy_num]); + tulip_mdio_write(dev, tp->phys[phy_num], 4, tp->mii_advertise); + } + break; + } + case 5: case 6: { + u16 setup[5]; + + new_csr6 = 0; /* FIXME */ + + for (i = 0; i < 5; i++) + setup[i] = get_u16(&p[i*2 + 1]); + + if (startup && mtable->has_reset) { + struct medialeaf *rleaf = &mtable->mleaf[mtable->has_reset]; + unsigned char *rst = rleaf->leafdata; + if (tulip_debug > 1) + printk(KERN_DEBUG "%s: Resetting the transceiver.\n", + dev->name); + for (i = 0; i < rst[0]; i++) + outl(get_u16(rst + 1 + (i<<1)) << 16, ioaddr + CSR15); + } + + break; + } + default: + printk(KERN_DEBUG "%s: Invalid media table selection %d.\n", + dev->name, mleaf->type); + new_csr6 = 0x020E0000; + } + if (tulip_debug > 1) + printk(KERN_DEBUG "%s: Using media type %s, CSR12 is %2.2x.\n", + dev->name, medianame[dev->if_port], + inl(ioaddr + CSR12) & 0xff); + } else if (tp->chip_id == DC21041) { + int port = dev->if_port <= 4 ? dev->if_port : 0; + if (tulip_debug > 1) + printk(KERN_DEBUG "%s: 21041 using media %s, CSR12 is %4.4x.\n", + dev->name, medianame[port == 3 ? 12: port], + inl(ioaddr + CSR12)); + outl(0x00000000, ioaddr + CSR13); /* Reset the serial interface */ + outl(t21041_csr14[port], ioaddr + CSR14); + outl(t21041_csr15[port], ioaddr + CSR15); + outl(t21041_csr13[port], ioaddr + CSR13); + new_csr6 = 0x80020000; + } else if (tp->chip_id == LC82C168) { + if (startup && ! tp->medialock) + dev->if_port = tp->mii_cnt ? 11 : 0; + if (tulip_debug > 1) + printk(KERN_DEBUG "%s: PNIC PHY status is %3.3x, media %s.\n", + dev->name, inl(ioaddr + 0xB8), medianame[dev->if_port]); + if (tp->mii_cnt) { + new_csr6 = 0x810C0000; + outl(0x0001, ioaddr + CSR15); + outl(0x0201B07A, ioaddr + 0xB8); + } else if (startup) { + /* Start with 10mbps to do autonegotiation. */ + outl(0x32, ioaddr + CSR12); + new_csr6 = 0x00420000; + outl(0x0001B078, ioaddr + 0xB8); + outl(0x0201B078, ioaddr + 0xB8); + } else if (dev->if_port == 3 || dev->if_port == 5) { + outl(0x33, ioaddr + CSR12); + new_csr6 = 0x01860000; + /* Trigger autonegotiation. */ + outl(startup ? 0x0201F868 : 0x0001F868, ioaddr + 0xB8); + } else { + outl(0x32, ioaddr + CSR12); + new_csr6 = 0x00420000; + outl(0x1F078, ioaddr + 0xB8); + } + } else if (tp->chip_id == DC21040) { /* 21040 */ + /* Turn on the xcvr interface. */ + int csr12 = inl(ioaddr + CSR12); + if (tulip_debug > 1) + printk(KERN_DEBUG "%s: 21040 media type is %s, CSR12 is %2.2x.\n", + dev->name, medianame[dev->if_port], csr12); + if (tulip_media_cap[dev->if_port] & MediaAlwaysFD) + tp->full_duplex = 1; + new_csr6 = 0x20000; + /* Set the full duplux match frame. */ + outl(FULL_DUPLEX_MAGIC, ioaddr + CSR11); + outl(0x00000000, ioaddr + CSR13); /* Reset the serial interface */ + if (t21040_csr13[dev->if_port] & 8) { + outl(0x0705, ioaddr + CSR14); + outl(0x0006, ioaddr + CSR15); + } else { + outl(0xffff, ioaddr + CSR14); + outl(0x0000, ioaddr + CSR15); + } + outl(0x8f01 | t21040_csr13[dev->if_port], ioaddr + CSR13); + } else { /* Unknown chip type with no media table. */ + if (tp->default_port == 0) + dev->if_port = tp->mii_cnt ? 11 : 3; + if (tulip_media_cap[dev->if_port] & MediaIsMII) { + new_csr6 = 0x020E0000; + } else if (tulip_media_cap[dev->if_port] & MediaIsFx) { + new_csr6 = 0x028600000; + } else + new_csr6 = 0x038600000; + if (tulip_debug > 1) + printk(KERN_DEBUG "%s: No media description table, assuming " + "%s transceiver, CSR12 %2.2x.\n", + dev->name, medianame[dev->if_port], + inl(ioaddr + CSR12)); + } + + tp->csr6 = new_csr6 | (tp->csr6 & 0xfdff) | (tp->full_duplex ? 0x0200 : 0); + return; +} + +/* + Check the MII negotiated duplex and change the CSR6 setting if + required. + Return 0 if everything is OK. + Return < 0 if the transceiver is missing or has no link beat. + */ +int tulip_check_duplex(struct net_device *dev) +{ + struct tulip_private *tp = dev->priv; + unsigned int bmsr, lpa, negotiated, new_csr6; + + bmsr = tulip_mdio_read(dev, tp->phys[0], MII_BMSR); + lpa = tulip_mdio_read(dev, tp->phys[0], MII_LPA); + if (tulip_debug > 1) + printk(KERN_INFO "%s: MII status %4.4x, Link partner report " + "%4.4x.\n", dev->name, bmsr, lpa); + if (bmsr == 0xffff) + return -2; + if ((bmsr & BMSR_LSTATUS) == 0) { + int new_bmsr = tulip_mdio_read(dev, tp->phys[0], MII_BMSR); + if ((new_bmsr & BMSR_LSTATUS) == 0) { + if (tulip_debug > 1) + printk(KERN_INFO "%s: No link beat on the MII interface," + " status %4.4x.\n", dev->name, new_bmsr); + return -1; + } + } + negotiated = lpa & tp->advertising[0]; + tp->full_duplex = mii_duplex(tp->full_duplex_lock, negotiated); + + new_csr6 = tp->csr6; + + if (negotiated & LPA_100) new_csr6 &= ~TxThreshold; + else new_csr6 |= TxThreshold; + if (tp->full_duplex) new_csr6 |= FullDuplex; + else new_csr6 &= ~FullDuplex; + + if (new_csr6 != tp->csr6) { + tp->csr6 = new_csr6; + tulip_restart_rxtx(tp); + + if (tulip_debug > 0) + printk(KERN_INFO "%s: Setting %s-duplex based on MII" + "#%d link partner capability of %4.4x.\n", + dev->name, tp->full_duplex ? "full" : "half", + tp->phys[0], lpa); + return 1; + } + + return 0; +} + +void __devinit tulip_find_mii (struct net_device *dev, int board_idx) +{ + struct tulip_private *tp = dev->priv; + int phyn, phy_idx = 0; + int mii_reg0; + int mii_advert; + unsigned int to_advert, new_bmcr, ane_switch; + + /* Find the connected MII xcvrs. + Doing this in open() would allow detecting external xcvrs later, + but takes much time. */ + for (phyn = 1; phyn <= 32 && phy_idx < sizeof (tp->phys); phyn++) { + int phy = phyn & 0x1f; + int mii_status = tulip_mdio_read (dev, phy, MII_BMSR); + if ((mii_status & 0x8301) == 0x8001 || + ((mii_status & BMSR_100BASE4) == 0 + && (mii_status & 0x7800) != 0)) { + /* preserve Becker logic, gain indentation level */ + } else { + continue; + } + + mii_reg0 = tulip_mdio_read (dev, phy, MII_BMCR); + mii_advert = tulip_mdio_read (dev, phy, MII_ADVERTISE); + ane_switch = 0; + + /* if not advertising at all, gen an + * advertising value from the capability + * bits in BMSR + */ + if ((mii_advert & ADVERTISE_ALL) == 0) { + unsigned int tmpadv = tulip_mdio_read (dev, phy, MII_BMSR); + mii_advert = ((tmpadv >> 6) & 0x3e0) | 1; + } + + if (tp->mii_advertise) { + tp->advertising[phy_idx] = + to_advert = tp->mii_advertise; + } else if (tp->advertising[phy_idx]) { + to_advert = tp->advertising[phy_idx]; + } else { + tp->advertising[phy_idx] = + tp->mii_advertise = + to_advert = mii_advert; + } + + tp->phys[phy_idx++] = phy; + + printk (KERN_INFO "tulip%d: MII transceiver #%d " + "config %4.4x status %4.4x advertising %4.4x.\n", + board_idx, phy, mii_reg0, mii_status, mii_advert); + + /* Fixup for DLink with miswired PHY. */ + if (mii_advert != to_advert) { + printk (KERN_DEBUG "tulip%d: Advertising %4.4x on PHY %d," + " previously advertising %4.4x.\n", + board_idx, to_advert, phy, mii_advert); + tulip_mdio_write (dev, phy, 4, to_advert); + } + + /* Enable autonegotiation: some boards default to off. */ + if (tp->default_port == 0) { + new_bmcr = mii_reg0 | BMCR_ANENABLE; + if (new_bmcr != mii_reg0) { + new_bmcr |= BMCR_ANRESTART; + ane_switch = 1; + } + } + /* ...or disable nway, if forcing media */ + else { + new_bmcr = mii_reg0 & ~BMCR_ANENABLE; + if (new_bmcr != mii_reg0) + ane_switch = 1; + } + + /* clear out bits we never want at this point */ + new_bmcr &= ~(BMCR_CTST | BMCR_FULLDPLX | BMCR_ISOLATE | + BMCR_PDOWN | BMCR_SPEED100 | BMCR_LOOPBACK | + BMCR_RESET); + + if (tp->full_duplex) + new_bmcr |= BMCR_FULLDPLX; + if (tulip_media_cap[tp->default_port] & MediaIs100) + new_bmcr |= BMCR_SPEED100; + + if (new_bmcr != mii_reg0) { + /* some phys need the ANE switch to + * happen before forced media settings + * will "take." However, we write the + * same value twice in order not to + * confuse the sane phys. + */ + if (ane_switch) { + tulip_mdio_write (dev, phy, MII_BMCR, new_bmcr); + udelay (10); + } + tulip_mdio_write (dev, phy, MII_BMCR, new_bmcr); + } + } + tp->mii_cnt = phy_idx; + if (tp->mtable && tp->mtable->has_mii && phy_idx == 0) { + printk (KERN_INFO "tulip%d: ***WARNING***: No MII transceiver found!\n", + board_idx); + tp->phys[0] = 1; + } +} diff --git a/xen-2.4.16/drivers/net/tulip/pnic.c b/xen-2.4.16/drivers/net/tulip/pnic.c new file mode 100644 index 0000000000..6739dd30fc --- /dev/null +++ b/xen-2.4.16/drivers/net/tulip/pnic.c @@ -0,0 +1,171 @@ +/* + drivers/net/tulip/pnic.c + + Maintained by Jeff Garzik + Copyright 2000,2001 The Linux Kernel Team + Written/copyright 1994-2001 by Donald Becker. + + This software may be used and distributed according to the terms + of the GNU General Public License, incorporated herein by reference. + + Please refer to Documentation/DocBook/tulip.{pdf,ps,html} + for more information on this driver, or visit the project + Web page at http://sourceforge.net/projects/tulip/ + +*/ + +//#include +#include "tulip.h" + + +void pnic_do_nway(struct net_device *dev) +{ + struct tulip_private *tp = (struct tulip_private *)dev->priv; + long ioaddr = dev->base_addr; + u32 phy_reg = inl(ioaddr + 0xB8); + u32 new_csr6 = tp->csr6 & ~0x40C40200; + + if (phy_reg & 0x78000000) { /* Ignore baseT4 */ + if (phy_reg & 0x20000000) dev->if_port = 5; + else if (phy_reg & 0x40000000) dev->if_port = 3; + else if (phy_reg & 0x10000000) dev->if_port = 4; + else if (phy_reg & 0x08000000) dev->if_port = 0; + tp->nwayset = 1; + new_csr6 = (dev->if_port & 1) ? 0x01860000 : 0x00420000; + outl(0x32 | (dev->if_port & 1), ioaddr + CSR12); + if (dev->if_port & 1) + outl(0x1F868, ioaddr + 0xB8); + if (phy_reg & 0x30000000) { + tp->full_duplex = 1; + new_csr6 |= 0x00000200; + } + if (tulip_debug > 1) + printk(KERN_DEBUG "%s: PNIC autonegotiated status %8.8x, %s.\n", + dev->name, phy_reg, medianame[dev->if_port]); + if (tp->csr6 != new_csr6) { + tp->csr6 = new_csr6; + /* Restart Tx */ + tulip_restart_rxtx(tp); + dev->trans_start = jiffies; + } + } +} + +void pnic_lnk_change(struct net_device *dev, int csr5) +{ + struct tulip_private *tp = (struct tulip_private *)dev->priv; + long ioaddr = dev->base_addr; + int phy_reg = inl(ioaddr + 0xB8); + + if (tulip_debug > 1) + printk(KERN_DEBUG "%s: PNIC link changed state %8.8x, CSR5 %8.8x.\n", + dev->name, phy_reg, csr5); + if (inl(ioaddr + CSR5) & TPLnkFail) { + outl((inl(ioaddr + CSR7) & ~TPLnkFail) | TPLnkPass, ioaddr + CSR7); + /* If we use an external MII, then we mustn't use the + * internal negotiation. + */ + if (tulip_media_cap[dev->if_port] & MediaIsMII) + return; + if (! tp->nwayset || jiffies - dev->trans_start > 1*HZ) { + tp->csr6 = 0x00420000 | (tp->csr6 & 0x0000fdff); + outl(tp->csr6, ioaddr + CSR6); + outl(0x30, ioaddr + CSR12); + outl(0x0201F078, ioaddr + 0xB8); /* Turn on autonegotiation. */ + dev->trans_start = jiffies; + } + } else if (inl(ioaddr + CSR5) & TPLnkPass) { + if (tulip_media_cap[dev->if_port] & MediaIsMII) { + spin_lock(&tp->lock); + tulip_check_duplex(dev); + spin_unlock(&tp->lock); + } else { + pnic_do_nway(dev); + } + outl((inl(ioaddr + CSR7) & ~TPLnkPass) | TPLnkFail, ioaddr + CSR7); + } +} + +void pnic_timer(unsigned long data) +{ + struct net_device *dev = (struct net_device *)data; + struct tulip_private *tp = (struct tulip_private *)dev->priv; + long ioaddr = dev->base_addr; + int next_tick = 60*HZ; + + if(!inl(ioaddr + CSR7)) { + /* the timer was called due to a work overflow + * in the interrupt handler. Skip the connection + * checks, the nic is definitively speaking with + * his link partner. + */ + goto too_good_connection; + } + + if (tulip_media_cap[dev->if_port] & MediaIsMII) { + spin_lock_irq(&tp->lock); + if (tulip_check_duplex(dev) > 0) + next_tick = 3*HZ; + spin_unlock_irq(&tp->lock); + } else { + int csr12 = inl(ioaddr + CSR12); + int new_csr6 = tp->csr6 & ~0x40C40200; + int phy_reg = inl(ioaddr + 0xB8); + int csr5 = inl(ioaddr + CSR5); + + if (tulip_debug > 1) + printk(KERN_DEBUG "%s: PNIC timer PHY status %8.8x, %s " + "CSR5 %8.8x.\n", + dev->name, phy_reg, medianame[dev->if_port], csr5); + if (phy_reg & 0x04000000) { /* Remote link fault */ + outl(0x0201F078, ioaddr + 0xB8); + next_tick = 1*HZ; + tp->nwayset = 0; + } else if (phy_reg & 0x78000000) { /* Ignore baseT4 */ + pnic_do_nway(dev); + next_tick = 60*HZ; + } else if (csr5 & TPLnkFail) { /* 100baseTx link beat */ + if (tulip_debug > 1) + printk(KERN_DEBUG "%s: %s link beat failed, CSR12 %4.4x, " + "CSR5 %8.8x, PHY %3.3x.\n", + dev->name, medianame[dev->if_port], csr12, + inl(ioaddr + CSR5), inl(ioaddr + 0xB8)); + next_tick = 3*HZ; + if (tp->medialock) { + } else if (tp->nwayset && (dev->if_port & 1)) { + next_tick = 1*HZ; + } else if (dev->if_port == 0) { + dev->if_port = 3; + outl(0x33, ioaddr + CSR12); + new_csr6 = 0x01860000; + outl(0x1F868, ioaddr + 0xB8); + } else { + dev->if_port = 0; + outl(0x32, ioaddr + CSR12); + new_csr6 = 0x00420000; + outl(0x1F078, ioaddr + 0xB8); + } + if (tp->csr6 != new_csr6) { + tp->csr6 = new_csr6; + /* Restart Tx */ + tulip_restart_rxtx(tp); + dev->trans_start = jiffies; + if (tulip_debug > 1) + printk(KERN_INFO "%s: Changing PNIC configuration to %s " + "%s-duplex, CSR6 %8.8x.\n", + dev->name, medianame[dev->if_port], + tp->full_duplex ? "full" : "half", new_csr6); + } + } + } +too_good_connection: + mod_timer(&tp->timer, RUN_AT(next_tick)); + if(!inl(ioaddr + CSR7)) { + if (tulip_debug > 1) + printk(KERN_INFO "%s: sw timer wakeup.\n", dev->name); + disable_irq(dev->irq); + tulip_refill_rx(dev); + enable_irq(dev->irq); + outl(tulip_tbl[tp->chip_id].valid_intrs, ioaddr + CSR7); + } +} diff --git a/xen-2.4.16/drivers/net/tulip/pnic2.c b/xen-2.4.16/drivers/net/tulip/pnic2.c new file mode 100644 index 0000000000..9b209d2c84 --- /dev/null +++ b/xen-2.4.16/drivers/net/tulip/pnic2.c @@ -0,0 +1,407 @@ +/* + drivers/net/tulip/pnic2.c + + Maintained by Jeff Garzik + Copyright 2000,2001 The Linux Kernel Team + Written/copyright 1994-2001 by Donald Becker. + Modified to hep support PNIC_II by Kevin B. Hendricks + + This software may be used and distributed according to the terms + of the GNU General Public License, incorporated herein by reference. + + Please refer to Documentation/DocBook/tulip.{pdf,ps,html} + for more information on this driver, or visit the project + Web page at http://sourceforge.net/projects/tulip/ + +*/ + + +/* Understanding the PNIC_II - everything is this file is based + * on the PNIC_II_PDF datasheet which is sorely lacking in detail + * + * As I understand things, here are the registers and bits that + * explain the masks and constants used in this file that are + * either different from the 21142/3 or important for basic operation. + * + * + * CSR 6 (mask = 0xfe3bd1fd of bits not to change) + * ----- + * Bit 24 - SCR + * Bit 23 - PCS + * Bit 22 - TTM (Trasmit Threshold Mode) + * Bit 18 - Port Select + * Bit 13 - Start - 1, Stop - 0 Transmissions + * Bit 11:10 - Loop Back Operation Mode + * Bit 9 - Full Duplex mode (Advertise 10BaseT-FD is CSR14<7> is set) + * Bit 1 - Start - 1, Stop - 0 Receive + * + * + * CSR 14 (mask = 0xfff0ee39 of bits not to change) + * ------ + * Bit 19 - PAUSE-Pause + * Bit 18 - Advertise T4 + * Bit 17 - Advertise 100baseTx-FD + * Bit 16 - Advertise 100baseTx-HD + * Bit 12 - LTE - Link Test Enable + * Bit 7 - ANE - Auto Negotiate Enable + * Bit 6 - HDE - Advertise 10baseT-HD + * Bit 2 - Reset to Power down - kept as 1 for normal operation + * Bit 1 - Loop Back enable for 10baseT MCC + * + * + * CSR 12 + * ------ + * Bit 25 - Partner can do T4 + * Bit 24 - Partner can do 100baseTx-FD + * Bit 23 - Partner can do 100baseTx-HD + * Bit 22 - Partner can do 10baseT-FD + * Bit 21 - Partner can do 10baseT-HD + * Bit 15 - LPN is 1 if all above bits are valid other wise 0 + * Bit 14:12 - autonegotiation state (write 001 to start autonegotiate) + * Bit 3 - Autopolarity state + * Bit 2 - LS10B - link state of 10baseT 0 - good, 1 - failed + * Bit 1 - LS100B - link state of 100baseT 0 - good, 1- faild + * + * + * Data Port Selection Info + *------------------------- + * + * CSR14<7> CSR6<18> CSR6<22> CSR6<23> CSR6<24> MODE/PORT + * 1 0 0 (X) 0 (X) 1 NWAY + * 0 0 1 0 (X) 0 10baseT + * 0 1 0 1 1 (X) 100baseT + * + * + */ + + + +#include "tulip.h" +#include +#include + + +void pnic2_timer(unsigned long data) +{ + struct net_device *dev = (struct net_device *)data; + struct tulip_private *tp = (struct tulip_private *)dev->priv; + long ioaddr = dev->base_addr; + int next_tick = 60*HZ; + + if (tulip_debug > 3) + printk(KERN_INFO"%s: PNIC2 negotiation status %8.8x.\n", + dev->name,inl(ioaddr + CSR12)); + + if (next_tick) { + mod_timer(&tp->timer, RUN_AT(next_tick)); + } +} + + +void pnic2_start_nway(struct net_device *dev) +{ + struct tulip_private *tp = (struct tulip_private *)dev->priv; + long ioaddr = dev->base_addr; + int csr14; + int csr12; + + /* set up what to advertise during the negotiation */ + + /* load in csr14 and mask off bits not to touch + * comment at top of file explains mask value + */ + csr14 = (inl(ioaddr + CSR14) & 0xfff0ee39); + + /* bit 17 - advetise 100baseTx-FD */ + if (tp->sym_advertise & 0x0100) csr14 |= 0x00020000; + + /* bit 16 - advertise 100baseTx-HD */ + if (tp->sym_advertise & 0x0080) csr14 |= 0x00010000; + + /* bit 6 - advertise 10baseT-HD */ + if (tp->sym_advertise & 0x0020) csr14 |= 0x00000040; + + /* Now set bit 12 Link Test Enable, Bit 7 Autonegotiation Enable + * and bit 0 Don't PowerDown 10baseT + */ + csr14 |= 0x00001184; + + if (tulip_debug > 1) + printk(KERN_DEBUG "%s: Restarting PNIC2 autonegotiation, " + "csr14=%8.8x.\n", dev->name, csr14); + + /* tell pnic2_lnk_change we are doing an nway negotiation */ + dev->if_port = 0; + tp->nway = tp->mediasense = 1; + tp->nwayset = tp->lpar = 0; + + /* now we have to set up csr6 for NWAY state */ + + tp->csr6 = inl(ioaddr + CSR6); + if (tulip_debug > 1) + printk(KERN_DEBUG "%s: On Entry to Nway, " + "csr6=%8.8x.\n", dev->name, tp->csr6); + + /* mask off any bits not to touch + * comment at top of file explains mask value + */ + tp->csr6 = tp->csr6 & 0xfe3bd1fd; + + /* don't forget that bit 9 is also used for advertising */ + /* advertise 10baseT-FD for the negotiation (bit 9) */ + if (tp->sym_advertise & 0x0040) tp->csr6 |= 0x00000200; + + /* set bit 24 for nway negotiation mode ... + * see Data Port Selection comment at top of file + * and "Stop" - reset both Transmit (bit 13) and Receive (bit 1) + */ + tp->csr6 |= 0x01000000; + outl(csr14, ioaddr + CSR14); + outl(tp->csr6, ioaddr + CSR6); + udelay(100); + + /* all set up so now force the negotiation to begin */ + + /* read in current values and mask off all but the + * Autonegotiation bits 14:12. Writing a 001 to those bits + * should start the autonegotiation + */ + csr12 = (inl(ioaddr + CSR12) & 0xffff8fff); + csr12 |= 0x1000; + outl(csr12, ioaddr + CSR12); +} + + + +void pnic2_lnk_change(struct net_device *dev, int csr5) +{ + struct tulip_private *tp = (struct tulip_private *)dev->priv; + long ioaddr = dev->base_addr; + int csr14; + + /* read the staus register to find out what is up */ + int csr12 = inl(ioaddr + CSR12); + + if (tulip_debug > 1) + printk(KERN_INFO"%s: PNIC2 link status interrupt %8.8x, " + " CSR5 %x, %8.8x.\n", dev->name, csr12, + csr5, inl(ioaddr + CSR14)); + + /* If NWay finished and we have a negotiated partner capability. + * check bits 14:12 for bit pattern 101 - all is good + */ + if (tp->nway && !tp->nwayset) { + + /* we did an auto negotiation */ + + if ((csr12 & 0x7000) == 0x5000) { + + /* negotiation ended successfully */ + + /* get the link partners reply and mask out all but + * bits 24-21 which show the partners capabilites + * and match those to what we advertised + * + * then begin to interpret the results of the negotiation. + * Always go in this order : (we are ignoring T4 for now) + * 100baseTx-FD, 100baseTx-HD, 10baseT-FD, 10baseT-HD + */ + + int negotiated = ((csr12 >> 16) & 0x01E0) & tp->sym_advertise; + tp->lpar = (csr12 >> 16); + tp->nwayset = 1; + + if (negotiated & 0x0100) dev->if_port = 5; + else if (negotiated & 0x0080) dev->if_port = 3; + else if (negotiated & 0x0040) dev->if_port = 4; + else if (negotiated & 0x0020) dev->if_port = 0; + else { + if (tulip_debug > 1) + printk(KERN_INFO "%s: funny autonegotiate result " + "csr12 %8.8x advertising %4.4x\n", + dev->name, csr12, tp->sym_advertise); + tp->nwayset = 0; + /* so check if 100baseTx link state is okay */ + if ((csr12 & 2) == 0 && (tp->sym_advertise & 0x0180)) + dev->if_port = 3; + } + + /* now record the duplex that was negotiated */ + tp->full_duplex = 0; + if ((dev->if_port == 4) || (dev->if_port == 5)) + tp->full_duplex = 1; + + if (tulip_debug > 1) { + if (tp->nwayset) + printk(KERN_INFO "%s: Switching to %s based on link " + "negotiation %4.4x & %4.4x = %4.4x.\n", + dev->name, medianame[dev->if_port], + tp->sym_advertise, tp->lpar, negotiated); + } + + /* remember to turn off bit 7 - autonegotiate + * enable so we can properly end nway mode and + * set duplex (ie. use csr6<9> again) + */ + csr14 = (inl(ioaddr + CSR14) & 0xffffff7f); + outl(csr14,ioaddr + CSR14); + + + /* now set the data port and operating mode + * (see the Data Port Selection comments at + * the top of the file + */ + + /* get current csr6 and mask off bits not to touch */ + /* see comment at top of file */ + + tp->csr6 = (inl(ioaddr + CSR6) & 0xfe3bd1fd); + + /* so if using if_port 3 or 5 then select the 100baseT + * port else select the 10baseT port. + * See the Data Port Selection table at the top + * of the file which was taken from the PNIC_II.PDF + * datasheet + */ + if (dev->if_port & 1) tp->csr6 |= 0x01840000; + else tp->csr6 |= 0x00400000; + + /* now set the full duplex bit appropriately */ + if (tp->full_duplex) tp->csr6 |= 0x00000200; + + outl(1, ioaddr + CSR13); + + if (tulip_debug > 2) + printk(KERN_DEBUG "%s: Setting CSR6 %8.8x/%x CSR12 " + "%8.8x.\n", dev->name, tp->csr6, + inl(ioaddr + CSR6), inl(ioaddr + CSR12)); + + /* now the following actually writes out the + * new csr6 values + */ + tulip_start_rxtx(tp); + + return; + + } else { + printk(KERN_INFO "%s: Autonegotiation failed, " + "using %s, link beat status %4.4x.\n", + dev->name, medianame[dev->if_port], csr12); + + /* remember to turn off bit 7 - autonegotiate + * enable so we don't forget + */ + csr14 = (inl(ioaddr + CSR14) & 0xffffff7f); + outl(csr14,ioaddr + CSR14); + + /* what should we do when autonegotiate fails? + * should we try again or default to baseline + * case. I just don't know. + * + * for now default to some baseline case + */ + + dev->if_port = 0; + tp->nway = 0; + tp->nwayset = 1; + + /* set to 10baseTx-HD - see Data Port Selection + * comment given at the top of the file + */ + tp->csr6 = (inl(ioaddr + CSR6) & 0xfe3bd1fd); + tp->csr6 |= 0x00400000; + + tulip_restart_rxtx(tp); + + return; + + } + } + + if ((tp->nwayset && (csr5 & 0x08000000) + && (dev->if_port == 3 || dev->if_port == 5) + && (csr12 & 2) == 2) || (tp->nway && (csr5 & (TPLnkFail)))) { + + /* Link blew? Maybe restart NWay. */ + + if (tulip_debug > 2) + printk(KERN_DEBUG "%s: Ugh! Link blew?\n", dev->name); + + del_timer_sync(&tp->timer); + pnic2_start_nway(dev); + tp->timer.expires = RUN_AT(3*HZ); + add_timer(&tp->timer); + + return; + } + + + if (dev->if_port == 3 || dev->if_port == 5) { + + /* we are at 100mb and a potential link change occurred */ + + if (tulip_debug > 1) + printk(KERN_INFO"%s: PNIC2 %s link beat %s.\n", + dev->name, medianame[dev->if_port], + (csr12 & 2) ? "failed" : "good"); + + /* check 100 link beat */ + + tp->nway = 0; + tp->nwayset = 1; + + /* if failed then try doing an nway to get in sync */ + if ((csr12 & 2) && ! tp->medialock) { + del_timer_sync(&tp->timer); + pnic2_start_nway(dev); + tp->timer.expires = RUN_AT(3*HZ); + add_timer(&tp->timer); + } + + return; + } + + if (dev->if_port == 0 || dev->if_port == 4) { + + /* we are at 10mb and a potential link change occurred */ + + if (tulip_debug > 1) + printk(KERN_INFO"%s: PNIC2 %s link beat %s.\n", + dev->name, medianame[dev->if_port], + (csr12 & 4) ? "failed" : "good"); + + + tp->nway = 0; + tp->nwayset = 1; + + /* if failed, try doing an nway to get in sync */ + if ((csr12 & 4) && ! tp->medialock) { + del_timer_sync(&tp->timer); + pnic2_start_nway(dev); + tp->timer.expires = RUN_AT(3*HZ); + add_timer(&tp->timer); + } + + return; + } + + + if (tulip_debug > 1) + printk(KERN_INFO"%s: PNIC2 Link Change Default?\n",dev->name); + + /* if all else fails default to trying 10baseT-HD */ + dev->if_port = 0; + + /* make sure autonegotiate enable is off */ + csr14 = (inl(ioaddr + CSR14) & 0xffffff7f); + outl(csr14,ioaddr + CSR14); + + /* set to 10baseTx-HD - see Data Port Selection + * comment given at the top of the file + */ + tp->csr6 = (inl(ioaddr + CSR6) & 0xfe3bd1fd); + tp->csr6 |= 0x00400000; + + tulip_restart_rxtx(tp); +} + diff --git a/xen-2.4.16/drivers/net/tulip/timer.c b/xen-2.4.16/drivers/net/tulip/timer.c new file mode 100644 index 0000000000..4079772ae9 --- /dev/null +++ b/xen-2.4.16/drivers/net/tulip/timer.c @@ -0,0 +1,220 @@ +/* + drivers/net/tulip/timer.c + + Maintained by Jeff Garzik + Copyright 2000,2001 The Linux Kernel Team + Written/copyright 1994-2001 by Donald Becker. + + This software may be used and distributed according to the terms + of the GNU General Public License, incorporated herein by reference. + + Please refer to Documentation/DocBook/tulip.{pdf,ps,html} + for more information on this driver, or visit the project + Web page at http://sourceforge.net/projects/tulip/ + +*/ + +#include "tulip.h" + + +void tulip_timer(unsigned long data) +{ + struct net_device *dev = (struct net_device *)data; + struct tulip_private *tp = (struct tulip_private *)dev->priv; + long ioaddr = dev->base_addr; + u32 csr12 = inl(ioaddr + CSR12); + int next_tick = 2*HZ; + + if (tulip_debug > 2) { + printk(KERN_DEBUG "%s: Media selection tick, %s, status %8.8x mode" + " %8.8x SIA %8.8x %8.8x %8.8x %8.8x.\n", + dev->name, medianame[dev->if_port], inl(ioaddr + CSR5), + inl(ioaddr + CSR6), csr12, inl(ioaddr + CSR13), + inl(ioaddr + CSR14), inl(ioaddr + CSR15)); + } + switch (tp->chip_id) { + case DC21040: + if (!tp->medialock && csr12 & 0x0002) { /* Network error */ + printk(KERN_INFO "%s: No link beat found.\n", + dev->name); + dev->if_port = (dev->if_port == 2 ? 0 : 2); + tulip_select_media(dev, 0); + dev->trans_start = jiffies; + } + break; + case DC21041: + if (tulip_debug > 2) + printk(KERN_DEBUG "%s: 21041 media tick CSR12 %8.8x.\n", + dev->name, csr12); + if (tp->medialock) break; + switch (dev->if_port) { + case 0: case 3: case 4: + if (csr12 & 0x0004) { /*LnkFail */ + /* 10baseT is dead. Check for activity on alternate port. */ + tp->mediasense = 1; + if (csr12 & 0x0200) + dev->if_port = 2; + else + dev->if_port = 1; + printk(KERN_INFO "%s: No 21041 10baseT link beat, Media switched to %s.\n", + dev->name, medianame[dev->if_port]); + outl(0, ioaddr + CSR13); /* Reset */ + outl(t21041_csr14[dev->if_port], ioaddr + CSR14); + outl(t21041_csr15[dev->if_port], ioaddr + CSR15); + outl(t21041_csr13[dev->if_port], ioaddr + CSR13); + next_tick = 10*HZ; /* 2.4 sec. */ + } else + next_tick = 30*HZ; + break; + case 1: /* 10base2 */ + case 2: /* AUI */ + if (csr12 & 0x0100) { + next_tick = (30*HZ); /* 30 sec. */ + tp->mediasense = 0; + } else if ((csr12 & 0x0004) == 0) { + printk(KERN_INFO "%s: 21041 media switched to 10baseT.\n", + dev->name); + dev->if_port = 0; + tulip_select_media(dev, 0); + next_tick = (24*HZ)/10; /* 2.4 sec. */ + } else if (tp->mediasense || (csr12 & 0x0002)) { + dev->if_port = 3 - dev->if_port; /* Swap ports. */ + tulip_select_media(dev, 0); + next_tick = 20*HZ; + } else { + next_tick = 20*HZ; + } + break; + } + break; + case DC21140: + case DC21142: + case MX98713: + case COMPEX9881: + case DM910X: + default: { + struct medialeaf *mleaf; + unsigned char *p; + if (tp->mtable == NULL) { /* No EEPROM info, use generic code. */ + /* Not much that can be done. + Assume this a generic MII or SYM transceiver. */ + next_tick = 60*HZ; + if (tulip_debug > 2) + printk(KERN_DEBUG "%s: network media monitor CSR6 %8.8x " + "CSR12 0x%2.2x.\n", + dev->name, inl(ioaddr + CSR6), csr12 & 0xff); + break; + } + mleaf = &tp->mtable->mleaf[tp->cur_index]; + p = mleaf->leafdata; + switch (mleaf->type) { + case 0: case 4: { + /* Type 0 serial or 4 SYM transceiver. Check the link beat bit. */ + int offset = mleaf->type == 4 ? 5 : 2; + s8 bitnum = p[offset]; + if (p[offset+1] & 0x80) { + if (tulip_debug > 1) + printk(KERN_DEBUG"%s: Transceiver monitor tick " + "CSR12=%#2.2x, no media sense.\n", + dev->name, csr12); + if (mleaf->type == 4) { + if (mleaf->media == 3 && (csr12 & 0x02)) + goto select_next_media; + } + break; + } + if (tulip_debug > 2) + printk(KERN_DEBUG "%s: Transceiver monitor tick: CSR12=%#2.2x" + " bit %d is %d, expecting %d.\n", + dev->name, csr12, (bitnum >> 1) & 7, + (csr12 & (1 << ((bitnum >> 1) & 7))) != 0, + (bitnum >= 0)); + /* Check that the specified bit has the proper value. */ + if ((bitnum < 0) != + ((csr12 & (1 << ((bitnum >> 1) & 7))) != 0)) { + if (tulip_debug > 2) + printk(KERN_DEBUG "%s: Link beat detected for %s.\n", dev->name, + medianame[mleaf->media & MEDIA_MASK]); + if ((p[2] & 0x61) == 0x01) /* Bogus Znyx board. */ + goto actually_mii; + /* netif_carrier_on(dev); */ + break; + } + /* netif_carrier_off(dev); */ + if (tp->medialock) + break; + select_next_media: + if (--tp->cur_index < 0) { + /* We start again, but should instead look for default. */ + tp->cur_index = tp->mtable->leafcount - 1; + } + dev->if_port = tp->mtable->mleaf[tp->cur_index].media; + if (tulip_media_cap[dev->if_port] & MediaIsFD) + goto select_next_media; /* Skip FD entries. */ + if (tulip_debug > 1) + printk(KERN_DEBUG "%s: No link beat on media %s," + " trying transceiver type %s.\n", + dev->name, medianame[mleaf->media & MEDIA_MASK], + medianame[tp->mtable->mleaf[tp->cur_index].media]); + tulip_select_media(dev, 0); + /* Restart the transmit process. */ + tulip_restart_rxtx(tp); + next_tick = (24*HZ)/10; + break; + } + case 1: case 3: /* 21140, 21142 MII */ + actually_mii: + if (tulip_check_duplex(dev) < 0) + { /* netif_carrier_off(dev); */ } + else + { /* netif_carrier_on(dev); */ } + next_tick = 60*HZ; + break; + case 2: /* 21142 serial block has no link beat. */ + default: + break; + } + } + break; + } + /* mod_timer synchronizes us with potential add_timer calls + * from interrupts. + */ + mod_timer(&tp->timer, RUN_AT(next_tick)); +} + + +void mxic_timer(unsigned long data) +{ + struct net_device *dev = (struct net_device *)data; + struct tulip_private *tp = (struct tulip_private *)dev->priv; + long ioaddr = dev->base_addr; + int next_tick = 60*HZ; + + if (tulip_debug > 3) { + printk(KERN_INFO"%s: MXIC negotiation status %8.8x.\n", dev->name, + inl(ioaddr + CSR12)); + } + if (next_tick) { + mod_timer(&tp->timer, RUN_AT(next_tick)); + } +} + + +void comet_timer(unsigned long data) +{ + struct net_device *dev = (struct net_device *)data; + struct tulip_private *tp = (struct tulip_private *)dev->priv; + long ioaddr = dev->base_addr; + int next_tick = 60*HZ; + + if (tulip_debug > 1) + printk(KERN_DEBUG "%s: Comet link status %4.4x partner capability " + "%4.4x.\n", + dev->name, inl(ioaddr + 0xB8), inl(ioaddr + 0xC8)); + /* mod_timer synchronizes us with potential add_timer calls + * from interrupts. + */ + mod_timer(&tp->timer, RUN_AT(next_tick)); +} + diff --git a/xen-2.4.16/drivers/net/tulip/tulip.h b/xen-2.4.16/drivers/net/tulip/tulip.h new file mode 100644 index 0000000000..716bc9294a --- /dev/null +++ b/xen-2.4.16/drivers/net/tulip/tulip.h @@ -0,0 +1,499 @@ +/* + drivers/net/tulip/tulip.h + + Copyright 2000,2001 The Linux Kernel Team + Written/copyright 1994-2001 by Donald Becker. + + This software may be used and distributed according to the terms + of the GNU General Public License, incorporated herein by reference. + + Please refer to Documentation/DocBook/tulip.{pdf,ps,html} + for more information on this driver, or visit the project + Web page at http://sourceforge.net/projects/tulip/ + +*/ + +#ifndef __NET_TULIP_H__ +#define __NET_TULIP_H__ + +#include +//#include +#include +#include +#include +#include +#include +#include +#include + + + +/* undefine, or define to various debugging levels (>4 == obscene levels) */ +#define TULIP_DEBUG 1 + +/* undefine USE_IO_OPS for MMIO, define for PIO */ +#ifdef CONFIG_TULIP_MMIO +# undef USE_IO_OPS +#else +# define USE_IO_OPS 1 +#endif + + + +struct tulip_chip_table { + char *chip_name; + int io_size; + int valid_intrs; /* CSR7 interrupt enable settings */ + int flags; + void (*media_timer) (unsigned long data); +}; + + +enum tbl_flag { + HAS_MII = 0x0001, + HAS_MEDIA_TABLE = 0x0002, + CSR12_IN_SROM = 0x0004, + ALWAYS_CHECK_MII = 0x0008, + HAS_ACPI = 0x0010, + MC_HASH_ONLY = 0x0020, /* Hash-only multicast filter. */ + HAS_PNICNWAY = 0x0080, + HAS_NWAY = 0x0040, /* Uses internal NWay xcvr. */ + HAS_INTR_MITIGATION = 0x0100, + IS_ASIX = 0x0200, + HAS_8023X = 0x0400, + COMET_MAC_ADDR = 0x0800, + HAS_PCI_MWI = 0x1000, +}; + + +/* chip types. careful! order is VERY IMPORTANT here, as these + * are used throughout the driver as indices into arrays */ +/* Note 21142 == 21143. */ +enum chips { + DC21040 = 0, + DC21041 = 1, + DC21140 = 2, + DC21142 = 3, DC21143 = 3, + LC82C168, + MX98713, + MX98715, + MX98725, + AX88140, + PNIC2, + COMET, + COMPEX9881, + I21145, + DM910X, +}; + + +enum MediaIs { + MediaIsFD = 1, + MediaAlwaysFD = 2, + MediaIsMII = 4, + MediaIsFx = 8, + MediaIs100 = 16 +}; + + +/* Offsets to the Command and Status Registers, "CSRs". All accesses + must be longword instructions and quadword aligned. */ +enum tulip_offsets { + CSR0 = 0, + CSR1 = 0x08, + CSR2 = 0x10, + CSR3 = 0x18, + CSR4 = 0x20, + CSR5 = 0x28, + CSR6 = 0x30, + CSR7 = 0x38, + CSR8 = 0x40, + CSR9 = 0x48, + CSR10 = 0x50, + CSR11 = 0x58, + CSR12 = 0x60, + CSR13 = 0x68, + CSR14 = 0x70, + CSR15 = 0x78, +}; + +/* register offset and bits for CFDD PCI config reg */ +enum pci_cfg_driver_reg { + CFDD = 0x40, + CFDD_Sleep = (1 << 31), + CFDD_Snooze = (1 << 30), +}; + + +/* The bits in the CSR5 status registers, mostly interrupt sources. */ +enum status_bits { + TimerInt = 0x800, + SytemError = 0x2000, + TPLnkFail = 0x1000, + TPLnkPass = 0x10, + NormalIntr = 0x10000, + AbnormalIntr = 0x8000, + RxJabber = 0x200, + RxDied = 0x100, + RxNoBuf = 0x80, + RxIntr = 0x40, + TxFIFOUnderflow = 0x20, + TxJabber = 0x08, + TxNoBuf = 0x04, + TxDied = 0x02, + TxIntr = 0x01, +}; + + +enum tulip_mode_bits { + TxThreshold = (1 << 22), + FullDuplex = (1 << 9), + TxOn = 0x2000, + AcceptBroadcast = 0x0100, + AcceptAllMulticast = 0x0080, + AcceptAllPhys = 0x0040, + AcceptRunt = 0x0008, + RxOn = 0x0002, + RxTx = (TxOn | RxOn), +}; + + +enum tulip_busconfig_bits { + MWI = (1 << 24), + MRL = (1 << 23), + MRM = (1 << 21), + CALShift = 14, + BurstLenShift = 8, +}; + + +/* The Tulip Rx and Tx buffer descriptors. */ +struct tulip_rx_desc { + s32 status; + s32 length; + u32 buffer1; + u32 buffer2; +}; + + +struct tulip_tx_desc { + s32 status; + s32 length; + u32 buffer1; + u32 buffer2; /* We use only buffer 1. */ +}; + + +enum desc_status_bits { + DescOwned = 0x80000000, + RxDescFatalErr = 0x8000, + RxWholePkt = 0x0300, +}; + + +enum t21041_csr13_bits { + csr13_eng = (0xEF0<<4), /* for eng. purposes only, hardcode at EF0h */ + csr13_aui = (1<<3), /* clear to force 10bT, set to force AUI/BNC */ + csr13_cac = (1<<2), /* CSR13/14/15 autoconfiguration */ + csr13_srl = (1<<0), /* When reset, resets all SIA functions, machines */ + + csr13_mask_auibnc = (csr13_eng | csr13_aui | csr13_srl), + csr13_mask_10bt = (csr13_eng | csr13_srl), +}; + +enum t21143_csr6_bits { + csr6_sc = (1<<31), + csr6_ra = (1<<30), + csr6_ign_dest_msb = (1<<26), + csr6_mbo = (1<<25), + csr6_scr = (1<<24), /* scramble mode flag: can't be set */ + csr6_pcs = (1<<23), /* Enables PCS functions (symbol mode requires csr6_ps be set) default is set */ + csr6_ttm = (1<<22), /* Transmit Threshold Mode, set for 10baseT, 0 for 100BaseTX */ + csr6_sf = (1<<21), /* Store and forward. If set ignores TR bits */ + csr6_hbd = (1<<19), /* Heart beat disable. Disables SQE function in 10baseT */ + csr6_ps = (1<<18), /* Port Select. 0 (defualt) = 10baseT, 1 = 100baseTX: can't be set */ + csr6_ca = (1<<17), /* Collision Offset Enable. If set uses special algorithm in low collision situations */ + csr6_trh = (1<<15), /* Transmit Threshold high bit */ + csr6_trl = (1<<14), /* Transmit Threshold low bit */ + + /*************************************************************** + * This table shows transmit threshold values based on media * + * and these two registers (from PNIC1 & 2 docs) Note: this is * + * all meaningless if sf is set. * + ***************************************************************/ + + /*********************************** + * (trh,trl) * 100BaseTX * 10BaseT * + *********************************** + * (0,0) * 128 * 72 * + * (0,1) * 256 * 96 * + * (1,0) * 512 * 128 * + * (1,1) * 1024 * 160 * + ***********************************/ + + csr6_fc = (1<<12), /* Forces a collision in next transmission (for testing in loopback mode) */ + csr6_om_int_loop = (1<<10), /* internal (FIFO) loopback flag */ + csr6_om_ext_loop = (1<<11), /* external (PMD) loopback flag */ + /* set both and you get (PHY) loopback */ + csr6_fd = (1<<9), /* Full duplex mode, disables hearbeat, no loopback */ + csr6_pm = (1<<7), /* Pass All Multicast */ + csr6_pr = (1<<6), /* Promiscuous mode */ + csr6_sb = (1<<5), /* Start(1)/Stop(0) backoff counter */ + csr6_if = (1<<4), /* Inverse Filtering, rejects only addresses in address table: can't be set */ + csr6_pb = (1<<3), /* Pass Bad Frames, (1) causes even bad frames to be passed on */ + csr6_ho = (1<<2), /* Hash-only filtering mode: can't be set */ + csr6_hp = (1<<0), /* Hash/Perfect Receive Filtering Mode: can't be set */ + + csr6_mask_capture = (csr6_sc | csr6_ca), + csr6_mask_defstate = (csr6_mask_capture | csr6_mbo), + csr6_mask_hdcap = (csr6_mask_defstate | csr6_hbd | csr6_ps), + csr6_mask_hdcaptt = (csr6_mask_hdcap | csr6_trh | csr6_trl), + csr6_mask_fullcap = (csr6_mask_hdcaptt | csr6_fd), + csr6_mask_fullpromisc = (csr6_pr | csr6_pm), + csr6_mask_filters = (csr6_hp | csr6_ho | csr6_if), + csr6_mask_100bt = (csr6_scr | csr6_pcs | csr6_hbd), +}; + + +/* Keep the ring sizes a power of two for efficiency. + Making the Tx ring too large decreases the effectiveness of channel + bonding and packet priority. + There are no ill effects from too-large receive rings. */ +#undef TX_RING_SIZE +#undef RX_RING_SIZE +#define TX_RING_SIZE 16 +#define RX_RING_SIZE 32 + +#define MEDIA_MASK 31 + +#define PKT_BUF_SZ 1536 /* Size of each temporary Rx buffer. */ + +#define TULIP_MIN_CACHE_LINE 8 /* in units of 32-bit words */ + +#if defined(__sparc__) || defined(__hppa__) +/* The UltraSparc PCI controllers will disconnect at every 64-byte + * crossing anyways so it makes no sense to tell Tulip to burst + * any more than that. + */ +#define TULIP_MAX_CACHE_LINE 16 /* in units of 32-bit words */ +#else +#define TULIP_MAX_CACHE_LINE 32 /* in units of 32-bit words */ +#endif + + +/* Ring-wrap flag in length field, use for last ring entry. + 0x01000000 means chain on buffer2 address, + 0x02000000 means use the ring start address in CSR2/3. + Note: Some work-alike chips do not function correctly in chained mode. + The ASIX chip works only in chained mode. + Thus we indicates ring mode, but always write the 'next' field for + chained mode as well. +*/ +#define DESC_RING_WRAP 0x02000000 + + +#define EEPROM_SIZE 128 /* 2 << EEPROM_ADDRLEN */ + + +#define RUN_AT(x) (jiffies + (x)) + +#if defined(__i386__) /* AKA get_unaligned() */ +#define get_u16(ptr) (*(u16 *)(ptr)) +#else +#define get_u16(ptr) (((u8*)(ptr))[0] + (((u8*)(ptr))[1]<<8)) +#endif + +struct medialeaf { + u8 type; + u8 media; + unsigned char *leafdata; +}; + + +struct mediatable { + u16 defaultmedia; + u8 leafcount; + u8 csr12dir; /* General purpose pin directions. */ + unsigned has_mii:1; + unsigned has_nonmii:1; + unsigned has_reset:6; + u32 csr15dir; + u32 csr15val; /* 21143 NWay setting. */ + struct medialeaf mleaf[0]; +}; + + +struct mediainfo { + struct mediainfo *next; + int info_type; + int index; + unsigned char *info; +}; + +struct ring_info { + struct sk_buff *skb; + dma_addr_t mapping; +}; + + +struct tulip_private { + const char *product_name; + struct net_device *next_module; + struct tulip_rx_desc *rx_ring; + struct tulip_tx_desc *tx_ring; + dma_addr_t rx_ring_dma; + dma_addr_t tx_ring_dma; + /* The saved address of a sent-in-place packet/buffer, for skfree(). */ + struct ring_info tx_buffers[TX_RING_SIZE]; + /* The addresses of receive-in-place skbuffs. */ + struct ring_info rx_buffers[RX_RING_SIZE]; + u16 setup_frame[96]; /* Pseudo-Tx frame to init address table. */ + int chip_id; + int revision; + int flags; + struct net_device_stats stats; + struct timer_list timer; /* Media selection timer. */ + u32 mc_filter[2]; + spinlock_t lock; + spinlock_t mii_lock; + unsigned int cur_rx, cur_tx; /* The next free ring entry */ + unsigned int dirty_rx, dirty_tx; /* The ring entries to be free()ed. */ + +#ifdef CONFIG_NET_HW_FLOWCONTROL +#define RX_A_NBF_STOP 0xffffff3f /* To disable RX and RX-NOBUF ints. */ + int fc_bit; + int mit_sel; + int mit_change; /* Signal for Interrupt Mitigtion */ +#endif + unsigned int full_duplex:1; /* Full-duplex operation requested. */ + unsigned int full_duplex_lock:1; + unsigned int fake_addr:1; /* Multiport board faked address. */ + unsigned int default_port:4; /* Last dev->if_port value. */ + unsigned int media2:4; /* Secondary monitored media port. */ + unsigned int medialock:1; /* Don't sense media type. */ + unsigned int mediasense:1; /* Media sensing in progress. */ + unsigned int nway:1, nwayset:1; /* 21143 internal NWay. */ + unsigned int csr0; /* CSR0 setting. */ + unsigned int csr6; /* Current CSR6 control settings. */ + unsigned char eeprom[EEPROM_SIZE]; /* Serial EEPROM contents. */ + void (*link_change) (struct net_device * dev, int csr5); + u16 sym_advertise, mii_advertise; /* NWay capabilities advertised. */ + u16 lpar; /* 21143 Link partner ability. */ + u16 advertising[4]; + signed char phys[4], mii_cnt; /* MII device addresses. */ + struct mediatable *mtable; + int cur_index; /* Current media index. */ + int saved_if_port; + struct pci_dev *pdev; + int ttimer; + int susp_rx; + unsigned long nir; + unsigned long base_addr; + int pad0, pad1; /* Used for 8-byte alignment */ +}; + + +struct eeprom_fixup { + char *name; + unsigned char addr0; + unsigned char addr1; + unsigned char addr2; + u16 newtable[32]; /* Max length below. */ +}; + + +/* 21142.c */ +extern u16 t21142_csr14[]; +void t21142_timer(unsigned long data); +void t21142_start_nway(struct net_device *dev); +void t21142_lnk_change(struct net_device *dev, int csr5); + + +/* PNIC2.c */ +void pnic2_lnk_change(struct net_device *dev, int csr5); +void pnic2_timer(unsigned long data); +void pnic2_start_nway(struct net_device *dev); +void pnic2_lnk_change(struct net_device *dev, int csr5); + +/* eeprom.c */ +void tulip_parse_eeprom(struct net_device *dev); +int tulip_read_eeprom(long ioaddr, int location, int addr_len); + +/* interrupt.c */ +extern unsigned int tulip_max_interrupt_work; +extern int tulip_rx_copybreak; +void tulip_interrupt(int irq, void *dev_instance, struct pt_regs *regs); +int tulip_refill_rx(struct net_device *dev); + +/* media.c */ +int tulip_mdio_read(struct net_device *dev, int phy_id, int location); +void tulip_mdio_write(struct net_device *dev, int phy_id, int location, int value); +void tulip_select_media(struct net_device *dev, int startup); +int tulip_check_duplex(struct net_device *dev); +void tulip_find_mii (struct net_device *dev, int board_idx); + +/* pnic.c */ +void pnic_do_nway(struct net_device *dev); +void pnic_lnk_change(struct net_device *dev, int csr5); +void pnic_timer(unsigned long data); + +/* timer.c */ +void tulip_timer(unsigned long data); +void mxic_timer(unsigned long data); +void comet_timer(unsigned long data); + +/* tulip_core.c */ +extern int tulip_debug; +extern const char * const medianame[]; +extern const char tulip_media_cap[]; +extern struct tulip_chip_table tulip_tbl[]; +extern u8 t21040_csr13[]; +extern u16 t21041_csr13[]; +extern u16 t21041_csr14[]; +extern u16 t21041_csr15[]; + +#ifndef USE_IO_OPS +#undef inb +#undef inw +#undef inl +#undef outb +#undef outw +#undef outl +#define inb(addr) readb((void*)(addr)) +#define inw(addr) readw((void*)(addr)) +#define inl(addr) readl((void*)(addr)) +#define outb(val,addr) writeb((val), (void*)(addr)) +#define outw(val,addr) writew((val), (void*)(addr)) +#define outl(val,addr) writel((val), (void*)(addr)) +#endif /* !USE_IO_OPS */ + + + +static inline void tulip_start_rxtx(struct tulip_private *tp) +{ + long ioaddr = tp->base_addr; + outl(tp->csr6 | RxTx, ioaddr + CSR6); + barrier(); + (void) inl(ioaddr + CSR6); /* mmio sync */ +} + +static inline void tulip_stop_rxtx(struct tulip_private *tp) +{ + long ioaddr = tp->base_addr; + u32 csr6 = inl(ioaddr + CSR6); + + if (csr6 & RxTx) { + outl(csr6 & ~RxTx, ioaddr + CSR6); + barrier(); + (void) inl(ioaddr + CSR6); /* mmio sync */ + } +} + +static inline void tulip_restart_rxtx(struct tulip_private *tp) +{ + tulip_stop_rxtx(tp); + udelay(5); + tulip_start_rxtx(tp); +} + +#endif /* __NET_TULIP_H__ */ diff --git a/xen-2.4.16/drivers/net/tulip/tulip_core.c b/xen-2.4.16/drivers/net/tulip/tulip_core.c new file mode 100644 index 0000000000..601046c8ea --- /dev/null +++ b/xen-2.4.16/drivers/net/tulip/tulip_core.c @@ -0,0 +1,1922 @@ +/* tulip_core.c: A DEC 21x4x-family ethernet driver for Linux. */ + +/* + Maintained by Jeff Garzik + Copyright 2000,2001 The Linux Kernel Team + Written/copyright 1994-2001 by Donald Becker. + + This software may be used and distributed according to the terms + of the GNU General Public License, incorporated herein by reference. + + Please refer to Documentation/DocBook/tulip.{pdf,ps,html} + for more information on this driver, or visit the project + Web page at http://sourceforge.net/projects/tulip/ + +*/ + +#define DRV_NAME "tulip" +#define DRV_VERSION "0.9.15-pre9" +#define DRV_RELDATE "Nov 6, 2001" + +#include +#include +#include "tulip.h" +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef __sparc__ +#include +#endif + +static char version[] __devinitdata = + "Linux Tulip driver version " DRV_VERSION " (" DRV_RELDATE ")\n"; + + +/* A few user-configurable values. */ + +/* Maximum events (Rx packets, etc.) to handle at each interrupt. */ +static unsigned int max_interrupt_work = 25; + +#define MAX_UNITS 8 +/* Used to pass the full-duplex flag, etc. */ +static int full_duplex[MAX_UNITS]; +static int options[MAX_UNITS]; +static int mtu[MAX_UNITS]; /* Jumbo MTU for interfaces. */ + +/* The possible media types that can be set in options[] are: */ +const char * const medianame[32] = { + "10baseT", "10base2", "AUI", "100baseTx", + "10baseT-FDX", "100baseTx-FDX", "100baseT4", "100baseFx", + "100baseFx-FDX", "MII 10baseT", "MII 10baseT-FDX", "MII", + "10baseT(forced)", "MII 100baseTx", "MII 100baseTx-FDX", "MII 100baseT4", + "MII 100baseFx-HDX", "MII 100baseFx-FDX", "Home-PNA 1Mbps", "Invalid-19", + "","","","", "","","","", "","","","Transceiver reset", +}; + +/* Set the copy breakpoint for the copy-only-tiny-buffer Rx structure. */ +#if defined(__alpha__) || defined(__arm__) || defined(__hppa__) \ + || defined(__sparc_) || defined(__ia64__) \ + || defined(__sh__) || defined(__mips__) +static int rx_copybreak = 1518; +#else +static int rx_copybreak = 100; +#endif + +/* + Set the bus performance register. + Typical: Set 16 longword cache alignment, no burst limit. + Cache alignment bits 15:14 Burst length 13:8 + 0000 No alignment 0x00000000 unlimited 0800 8 longwords + 4000 8 longwords 0100 1 longword 1000 16 longwords + 8000 16 longwords 0200 2 longwords 2000 32 longwords + C000 32 longwords 0400 4 longwords + Warning: many older 486 systems are broken and require setting 0x00A04800 + 8 longword cache alignment, 8 longword burst. + ToDo: Non-Intel setting could be better. +*/ + +#if defined(__alpha__) || defined(__ia64__) || defined(__x86_64__) +static int csr0 = 0x01A00000 | 0xE000; +#elif defined(__i386__) || defined(__powerpc__) +static int csr0 = 0x01A00000 | 0x8000; +#elif defined(__sparc__) || defined(__hppa__) +/* The UltraSparc PCI controllers will disconnect at every 64-byte + * crossing anyways so it makes no sense to tell Tulip to burst + * any more than that. + */ +static int csr0 = 0x01A00000 | 0x9000; +#elif defined(__arm__) || defined(__sh__) +static int csr0 = 0x01A00000 | 0x4800; +#else +#warning Processor architecture undefined! +static int csr0 = 0x00A00000 | 0x4800; +#endif + +/* Operational parameters that usually are not changed. */ +/* Time in jiffies before concluding the transmitter is hung. */ +#define TX_TIMEOUT (4*HZ) + + +MODULE_AUTHOR("The Linux Kernel Team"); +MODULE_DESCRIPTION("Digital 21*4* Tulip ethernet driver"); +MODULE_LICENSE("GPL"); +MODULE_PARM(tulip_debug, "i"); +MODULE_PARM(max_interrupt_work, "i"); +MODULE_PARM(rx_copybreak, "i"); +MODULE_PARM(csr0, "i"); +MODULE_PARM(options, "1-" __MODULE_STRING(MAX_UNITS) "i"); +MODULE_PARM(full_duplex, "1-" __MODULE_STRING(MAX_UNITS) "i"); + +#define PFX DRV_NAME ": " + +#ifdef TULIP_DEBUG +int tulip_debug = TULIP_DEBUG; +#else +int tulip_debug = 1; +#endif + + + +/* + * This table use during operation for capabilities and media timer. + * + * It is indexed via the values in 'enum chips' + */ + +struct tulip_chip_table tulip_tbl[] = { + /* DC21040 */ + { "Digital DC21040 Tulip", 128, 0x0001ebef, 0, tulip_timer }, + + /* DC21041 */ + { "Digital DC21041 Tulip", 128, 0x0001ebef, + HAS_MEDIA_TABLE | HAS_NWAY, tulip_timer }, + + /* DC21140 */ + { "Digital DS21140 Tulip", 128, 0x0001ebef, + HAS_MII | HAS_MEDIA_TABLE | CSR12_IN_SROM | HAS_PCI_MWI, tulip_timer }, + + /* DC21142, DC21143 */ + { "Digital DS21143 Tulip", 128, 0x0801fbff, + HAS_MII | HAS_MEDIA_TABLE | ALWAYS_CHECK_MII | HAS_ACPI | HAS_NWAY + | HAS_INTR_MITIGATION | HAS_PCI_MWI, t21142_timer }, + + /* LC82C168 */ + { "Lite-On 82c168 PNIC", 256, 0x0001fbef, + HAS_MII | HAS_PNICNWAY, pnic_timer }, + + /* MX98713 */ + { "Macronix 98713 PMAC", 128, 0x0001ebef, + HAS_MII | HAS_MEDIA_TABLE | CSR12_IN_SROM, mxic_timer }, + + /* MX98715 */ + { "Macronix 98715 PMAC", 256, 0x0001ebef, + HAS_MEDIA_TABLE, mxic_timer }, + + /* MX98725 */ + { "Macronix 98725 PMAC", 256, 0x0001ebef, + HAS_MEDIA_TABLE, mxic_timer }, + + /* AX88140 */ + { "ASIX AX88140", 128, 0x0001fbff, + HAS_MII | HAS_MEDIA_TABLE | CSR12_IN_SROM | MC_HASH_ONLY + | IS_ASIX, tulip_timer }, + + /* PNIC2 */ + { "Lite-On PNIC-II", 256, 0x0801fbff, + HAS_MII | HAS_NWAY | HAS_8023X | HAS_PCI_MWI, pnic2_timer }, + + /* COMET */ + { "ADMtek Comet", 256, 0x0001abef, + MC_HASH_ONLY | COMET_MAC_ADDR, comet_timer }, + + /* COMPEX9881 */ + { "Compex 9881 PMAC", 128, 0x0001ebef, + HAS_MII | HAS_MEDIA_TABLE | CSR12_IN_SROM, mxic_timer }, + + /* I21145 */ + { "Intel DS21145 Tulip", 128, 0x0801fbff, + HAS_MII | HAS_MEDIA_TABLE | ALWAYS_CHECK_MII | HAS_ACPI + | HAS_NWAY | HAS_PCI_MWI, t21142_timer }, + + /* DM910X */ + { "Davicom DM9102/DM9102A", 128, 0x0001ebef, + HAS_MII | HAS_MEDIA_TABLE | CSR12_IN_SROM | HAS_ACPI, + tulip_timer }, +}; + + +static struct pci_device_id tulip_pci_tbl[] __devinitdata = { + { 0x1011, 0x0002, PCI_ANY_ID, PCI_ANY_ID, 0, 0, DC21040 }, + { 0x1011, 0x0014, PCI_ANY_ID, PCI_ANY_ID, 0, 0, DC21041 }, + { 0x1011, 0x0009, PCI_ANY_ID, PCI_ANY_ID, 0, 0, DC21140 }, + { 0x1011, 0x0019, PCI_ANY_ID, PCI_ANY_ID, 0, 0, DC21143 }, + { 0x11AD, 0x0002, PCI_ANY_ID, PCI_ANY_ID, 0, 0, LC82C168 }, + { 0x10d9, 0x0512, PCI_ANY_ID, PCI_ANY_ID, 0, 0, MX98713 }, + { 0x10d9, 0x0531, PCI_ANY_ID, PCI_ANY_ID, 0, 0, MX98715 }, +/* { 0x10d9, 0x0531, PCI_ANY_ID, PCI_ANY_ID, 0, 0, MX98725 },*/ + { 0x125B, 0x1400, PCI_ANY_ID, PCI_ANY_ID, 0, 0, AX88140 }, + { 0x11AD, 0xc115, PCI_ANY_ID, PCI_ANY_ID, 0, 0, PNIC2 }, + { 0x1317, 0x0981, PCI_ANY_ID, PCI_ANY_ID, 0, 0, COMET }, + { 0x1317, 0x0985, PCI_ANY_ID, PCI_ANY_ID, 0, 0, COMET }, + { 0x1317, 0x1985, PCI_ANY_ID, PCI_ANY_ID, 0, 0, COMET }, + { 0x13D1, 0xAB02, PCI_ANY_ID, PCI_ANY_ID, 0, 0, COMET }, + { 0x13D1, 0xAB03, PCI_ANY_ID, PCI_ANY_ID, 0, 0, COMET }, + { 0x104A, 0x0981, PCI_ANY_ID, PCI_ANY_ID, 0, 0, COMET }, + { 0x104A, 0x2774, PCI_ANY_ID, PCI_ANY_ID, 0, 0, COMET }, + { 0x11F6, 0x9881, PCI_ANY_ID, PCI_ANY_ID, 0, 0, COMPEX9881 }, + { 0x8086, 0x0039, PCI_ANY_ID, PCI_ANY_ID, 0, 0, I21145 }, + { 0x1282, 0x9100, PCI_ANY_ID, PCI_ANY_ID, 0, 0, DM910X }, + { 0x1282, 0x9102, PCI_ANY_ID, PCI_ANY_ID, 0, 0, DM910X }, + { 0x1113, 0x1216, PCI_ANY_ID, PCI_ANY_ID, 0, 0, COMET }, + { 0x1113, 0x1217, PCI_ANY_ID, PCI_ANY_ID, 0, 0, MX98715 }, + { 0x1113, 0x9511, PCI_ANY_ID, PCI_ANY_ID, 0, 0, COMET }, + { } /* terminate list */ +}; +MODULE_DEVICE_TABLE(pci, tulip_pci_tbl); + + +/* A full-duplex map for media types. */ +const char tulip_media_cap[32] = +{0,0,0,16, 3,19,16,24, 27,4,7,5, 0,20,23,20, 28,31,0,0, }; +u8 t21040_csr13[] = {2,0x0C,8,4, 4,0,0,0, 0,0,0,0, 4,0,0,0}; + +/* 21041 transceiver register settings: 10-T, 10-2, AUI, 10-T, 10T-FD*/ +u16 t21041_csr13[] = { + csr13_mask_10bt, /* 10-T */ + csr13_mask_auibnc, /* 10-2 */ + csr13_mask_auibnc, /* AUI */ + csr13_mask_10bt, /* 10-T */ + csr13_mask_10bt, /* 10T-FD */ +}; +u16 t21041_csr14[] = { 0xFFFF, 0xF7FD, 0xF7FD, 0x7F3F, 0x7F3D, }; +u16 t21041_csr15[] = { 0x0008, 0x0006, 0x000E, 0x0008, 0x0008, }; + + +static void tulip_tx_timeout(struct net_device *dev); +static void tulip_init_ring(struct net_device *dev); +static int tulip_start_xmit(struct sk_buff *skb, struct net_device *dev); +static int tulip_open(struct net_device *dev); +static int tulip_close(struct net_device *dev); +static void tulip_up(struct net_device *dev); +static void tulip_down(struct net_device *dev); +static struct net_device_stats *tulip_get_stats(struct net_device *dev); +static int private_ioctl(struct net_device *dev, struct ifreq *rq, int cmd); +static void set_rx_mode(struct net_device *dev); + + + +static void tulip_set_power_state (struct tulip_private *tp, + int sleep, int snooze) +{ + if (tp->flags & HAS_ACPI) { + u32 tmp, newtmp; + pci_read_config_dword (tp->pdev, CFDD, &tmp); + newtmp = tmp & ~(CFDD_Sleep | CFDD_Snooze); + if (sleep) + newtmp |= CFDD_Sleep; + else if (snooze) + newtmp |= CFDD_Snooze; + if (tmp != newtmp) + pci_write_config_dword (tp->pdev, CFDD, newtmp); + } + +} + + +static void tulip_up(struct net_device *dev) +{ + struct tulip_private *tp = (struct tulip_private *)dev->priv; + long ioaddr = dev->base_addr; + int next_tick = 3*HZ; + int i; + + /* Wake the chip from sleep/snooze mode. */ + tulip_set_power_state (tp, 0, 0); + + /* On some chip revs we must set the MII/SYM port before the reset!? */ + if (tp->mii_cnt || (tp->mtable && tp->mtable->has_mii)) + outl(0x00040000, ioaddr + CSR6); + + /* Reset the chip, holding bit 0 set at least 50 PCI cycles. */ + outl(0x00000001, ioaddr + CSR0); + udelay(100); + + /* Deassert reset. + Wait the specified 50 PCI cycles after a reset by initializing + Tx and Rx queues and the address filter list. */ + outl(tp->csr0, ioaddr + CSR0); + udelay(100); + + if (tulip_debug > 1) + printk(KERN_DEBUG "%s: tulip_up(), irq==%d.\n", dev->name, dev->irq); + + outl(tp->rx_ring_dma, ioaddr + CSR3); + outl(tp->tx_ring_dma, ioaddr + CSR4); + tp->cur_rx = tp->cur_tx = 0; + tp->dirty_rx = tp->dirty_tx = 0; + + if (tp->flags & MC_HASH_ONLY) { + u32 addr_low = cpu_to_le32(get_unaligned((u32 *)dev->dev_addr)); + u32 addr_high = cpu_to_le32(get_unaligned((u16 *)(dev->dev_addr+4))); + if (tp->chip_id == AX88140) { + outl(0, ioaddr + CSR13); + outl(addr_low, ioaddr + CSR14); + outl(1, ioaddr + CSR13); + outl(addr_high, ioaddr + CSR14); + } else if (tp->flags & COMET_MAC_ADDR) { + outl(addr_low, ioaddr + 0xA4); + outl(addr_high, ioaddr + 0xA8); + outl(0, ioaddr + 0xAC); + outl(0, ioaddr + 0xB0); + } + } else { + /* This is set_rx_mode(), but without starting the transmitter. */ + u16 *eaddrs = (u16 *)dev->dev_addr; + u16 *setup_frm = &tp->setup_frame[15*6]; + dma_addr_t mapping; + + /* 21140 bug: you must add the broadcast address. */ + memset(tp->setup_frame, 0xff, sizeof(tp->setup_frame)); + /* Fill the final entry of the table with our physical address. */ + *setup_frm++ = eaddrs[0]; *setup_frm++ = eaddrs[0]; + *setup_frm++ = eaddrs[1]; *setup_frm++ = eaddrs[1]; + *setup_frm++ = eaddrs[2]; *setup_frm++ = eaddrs[2]; + + mapping = pci_map_single(tp->pdev, tp->setup_frame, + sizeof(tp->setup_frame), + PCI_DMA_TODEVICE); + tp->tx_buffers[tp->cur_tx].skb = NULL; + tp->tx_buffers[tp->cur_tx].mapping = mapping; + + /* Put the setup frame on the Tx list. */ + tp->tx_ring[tp->cur_tx].length = cpu_to_le32(0x08000000 | 192); + tp->tx_ring[tp->cur_tx].buffer1 = cpu_to_le32(mapping); + tp->tx_ring[tp->cur_tx].status = cpu_to_le32(DescOwned); + + tp->cur_tx++; + } + + tp->saved_if_port = dev->if_port; + if (dev->if_port == 0) + dev->if_port = tp->default_port; + + /* Allow selecting a default media. */ + i = 0; + if (tp->mtable == NULL) + goto media_picked; + if (dev->if_port) { + int looking_for = tulip_media_cap[dev->if_port] & MediaIsMII ? 11 : + (dev->if_port == 12 ? 0 : dev->if_port); + for (i = 0; i < tp->mtable->leafcount; i++) + if (tp->mtable->mleaf[i].media == looking_for) { + printk(KERN_INFO "%s: Using user-specified media %s.\n", + dev->name, medianame[dev->if_port]); + goto media_picked; + } + } + if ((tp->mtable->defaultmedia & 0x0800) == 0) { + int looking_for = tp->mtable->defaultmedia & MEDIA_MASK; + for (i = 0; i < tp->mtable->leafcount; i++) + if (tp->mtable->mleaf[i].media == looking_for) { + printk(KERN_INFO "%s: Using EEPROM-set media %s.\n", + dev->name, medianame[looking_for]); + goto media_picked; + } + } + /* Start sensing first non-full-duplex media. */ + for (i = tp->mtable->leafcount - 1; + (tulip_media_cap[tp->mtable->mleaf[i].media] & MediaAlwaysFD) && i > 0; i--) + ; +media_picked: + + tp->csr6 = 0; + tp->cur_index = i; + tp->nwayset = 0; + + if (dev->if_port) { + if (tp->chip_id == DC21143 && + (tulip_media_cap[dev->if_port] & MediaIsMII)) { + /* We must reset the media CSRs when we force-select MII mode. */ + outl(0x0000, ioaddr + CSR13); + outl(0x0000, ioaddr + CSR14); + outl(0x0008, ioaddr + CSR15); + } + tulip_select_media(dev, 1); + } else if (tp->chip_id == DC21041) { + dev->if_port = 0; + tp->nway = tp->mediasense = 1; + tp->nwayset = tp->lpar = 0; + outl(0x00000000, ioaddr + CSR13); + outl(0xFFFFFFFF, ioaddr + CSR14); + outl(0x00000008, ioaddr + CSR15); /* Listen on AUI also. */ + tp->csr6 = 0x80020000; + if (tp->sym_advertise & 0x0040) + tp->csr6 |= FullDuplex; + outl(tp->csr6, ioaddr + CSR6); + outl(0x0000EF01, ioaddr + CSR13); + + } else if (tp->chip_id == DC21142) { + if (tp->mii_cnt) { + tulip_select_media(dev, 1); + if (tulip_debug > 1) + printk(KERN_INFO "%s: Using MII transceiver %d, status " + "%4.4x.\n", + dev->name, tp->phys[0], tulip_mdio_read(dev, tp->phys[0], 1)); + outl(csr6_mask_defstate, ioaddr + CSR6); + tp->csr6 = csr6_mask_hdcap; + dev->if_port = 11; + outl(0x0000, ioaddr + CSR13); + outl(0x0000, ioaddr + CSR14); + } else + t21142_start_nway(dev); + } else if (tp->chip_id == PNIC2) { + /* for initial startup advertise 10/100 Full and Half */ + tp->sym_advertise = 0x01E0; + /* enable autonegotiate end interrupt */ + outl(inl(ioaddr+CSR5)| 0x00008010, ioaddr + CSR5); + outl(inl(ioaddr+CSR7)| 0x00008010, ioaddr + CSR7); + pnic2_start_nway(dev); + } else if (tp->chip_id == LC82C168 && ! tp->medialock) { + if (tp->mii_cnt) { + dev->if_port = 11; + tp->csr6 = 0x814C0000 | (tp->full_duplex ? 0x0200 : 0); + outl(0x0001, ioaddr + CSR15); + } else if (inl(ioaddr + CSR5) & TPLnkPass) + pnic_do_nway(dev); + else { + /* Start with 10mbps to do autonegotiation. */ + outl(0x32, ioaddr + CSR12); + tp->csr6 = 0x00420000; + outl(0x0001B078, ioaddr + 0xB8); + outl(0x0201B078, ioaddr + 0xB8); + next_tick = 1*HZ; + } + } else if ((tp->chip_id == MX98713 || tp->chip_id == COMPEX9881) + && ! tp->medialock) { + dev->if_port = 0; + tp->csr6 = 0x01880000 | (tp->full_duplex ? 0x0200 : 0); + outl(0x0f370000 | inw(ioaddr + 0x80), ioaddr + 0x80); + } else if (tp->chip_id == MX98715 || tp->chip_id == MX98725) { + /* Provided by BOLO, Macronix - 12/10/1998. */ + dev->if_port = 0; + tp->csr6 = 0x01a80200; + outl(0x0f370000 | inw(ioaddr + 0x80), ioaddr + 0x80); + outl(0x11000 | inw(ioaddr + 0xa0), ioaddr + 0xa0); + } else if (tp->chip_id == COMET) { + /* Enable automatic Tx underrun recovery. */ + outl(inl(ioaddr + 0x88) | 1, ioaddr + 0x88); + dev->if_port = tp->mii_cnt ? 11 : 0; + tp->csr6 = 0x00040000; + } else if (tp->chip_id == AX88140) { + tp->csr6 = tp->mii_cnt ? 0x00040100 : 0x00000100; + } else + tulip_select_media(dev, 1); + + /* Start the chip's Tx to process setup frame. */ + tulip_stop_rxtx(tp); + barrier(); + udelay(5); + outl(tp->csr6 | TxOn, ioaddr + CSR6); + + /* Enable interrupts by setting the interrupt mask. */ + outl(tulip_tbl[tp->chip_id].valid_intrs, ioaddr + CSR5); + outl(tulip_tbl[tp->chip_id].valid_intrs, ioaddr + CSR7); + tulip_start_rxtx(tp); + outl(0, ioaddr + CSR2); /* Rx poll demand */ + + if (tulip_debug > 2) { + printk(KERN_DEBUG "%s: Done tulip_up(), CSR0 %8.8x, CSR5 %8.8x CSR6 %8.8x.\n", + dev->name, inl(ioaddr + CSR0), inl(ioaddr + CSR5), + inl(ioaddr + CSR6)); + } + + /* Set the timer to switch to check for link beat and perhaps switch + to an alternate media type. */ + tp->timer.expires = RUN_AT(next_tick); + add_timer(&tp->timer); +} + +#ifdef CONFIG_NET_HW_FLOWCONTROL +/* Enable receiver */ +void tulip_xon(struct net_device *dev) +{ + struct tulip_private *tp = (struct tulip_private *)dev->priv; + + clear_bit(tp->fc_bit, &netdev_fc_xoff); + if (netif_running(dev)){ + + tulip_refill_rx(dev); + outl(tulip_tbl[tp->chip_id].valid_intrs, dev->base_addr+CSR7); + } +} +#endif + +static int +tulip_open(struct net_device *dev) +{ +#ifdef CONFIG_NET_HW_FLOWCONTROL + struct tulip_private *tp = (struct tulip_private *)dev->priv; +#endif + int retval; + MOD_INC_USE_COUNT; + + if ((retval = request_irq(dev->irq, &tulip_interrupt, SA_SHIRQ, dev->name, dev))) { + MOD_DEC_USE_COUNT; + return retval; + } + + tulip_init_ring (dev); + + tulip_up (dev); + +#ifdef CONFIG_NET_HW_FLOWCONTROL + tp->fc_bit = netdev_register_fc(dev, tulip_xon); +#endif + + netif_start_queue (dev); + + return 0; +} + + +static void tulip_tx_timeout(struct net_device *dev) +{ + struct tulip_private *tp = (struct tulip_private *)dev->priv; + long ioaddr = dev->base_addr; + unsigned long flags; + + spin_lock_irqsave (&tp->lock, flags); + + if (tulip_media_cap[dev->if_port] & MediaIsMII) { + /* Do nothing -- the media monitor should handle this. */ + if (tulip_debug > 1) + printk(KERN_WARNING "%s: Transmit timeout using MII device.\n", + dev->name); + } else if (tp->chip_id == DC21040) { + if ( !tp->medialock && inl(ioaddr + CSR12) & 0x0002) { + dev->if_port = (dev->if_port == 2 ? 0 : 2); + printk(KERN_INFO "%s: 21040 transmit timed out, switching to " + "%s.\n", + dev->name, medianame[dev->if_port]); + tulip_select_media(dev, 0); + } + goto out; + } else if (tp->chip_id == DC21041) { + int csr12 = inl(ioaddr + CSR12); + + printk(KERN_WARNING "%s: 21041 transmit timed out, status %8.8x, " + "CSR12 %8.8x, CSR13 %8.8x, CSR14 %8.8x, resetting...\n", + dev->name, inl(ioaddr + CSR5), csr12, + inl(ioaddr + CSR13), inl(ioaddr + CSR14)); + tp->mediasense = 1; + if ( ! tp->medialock) { + if (dev->if_port == 1 || dev->if_port == 2) + if (csr12 & 0x0004) { + dev->if_port = 2 - dev->if_port; + } else + dev->if_port = 0; + else + dev->if_port = 1; + tulip_select_media(dev, 0); + } + } else if (tp->chip_id == DC21140 || tp->chip_id == DC21142 + || tp->chip_id == MX98713 || tp->chip_id == COMPEX9881 + || tp->chip_id == DM910X) { + printk(KERN_WARNING "%s: 21140 transmit timed out, status %8.8x, " + "SIA %8.8x %8.8x %8.8x %8.8x, resetting...\n", + dev->name, inl(ioaddr + CSR5), inl(ioaddr + CSR12), + inl(ioaddr + CSR13), inl(ioaddr + CSR14), inl(ioaddr + CSR15)); + if ( ! tp->medialock && tp->mtable) { + do + --tp->cur_index; + while (tp->cur_index >= 0 + && (tulip_media_cap[tp->mtable->mleaf[tp->cur_index].media] + & MediaIsFD)); + if (--tp->cur_index < 0) { + /* We start again, but should instead look for default. */ + tp->cur_index = tp->mtable->leafcount - 1; + } + tulip_select_media(dev, 0); + printk(KERN_WARNING "%s: transmit timed out, switching to %s " + "media.\n", dev->name, medianame[dev->if_port]); + } + } else if (tp->chip_id == PNIC2) { + printk(KERN_WARNING "%s: PNIC2 transmit timed out, status %8.8x, " + "CSR6/7 %8.8x / %8.8x CSR12 %8.8x, resetting...\n", + dev->name, (int)inl(ioaddr + CSR5), (int)inl(ioaddr + CSR6), + (int)inl(ioaddr + CSR7), (int)inl(ioaddr + CSR12)); + } else { + printk(KERN_WARNING "%s: Transmit timed out, status %8.8x, CSR12 " + "%8.8x, resetting...\n", + dev->name, inl(ioaddr + CSR5), inl(ioaddr + CSR12)); + dev->if_port = 0; + } + +#if defined(way_too_many_messages) + if (tulip_debug > 3) { + int i; + for (i = 0; i < RX_RING_SIZE; i++) { + u8 *buf = (u8 *)(tp->rx_ring[i].buffer1); + int j; + printk(KERN_DEBUG "%2d: %8.8x %8.8x %8.8x %8.8x " + "%2.2x %2.2x %2.2x.\n", + i, (unsigned int)tp->rx_ring[i].status, + (unsigned int)tp->rx_ring[i].length, + (unsigned int)tp->rx_ring[i].buffer1, + (unsigned int)tp->rx_ring[i].buffer2, + buf[0], buf[1], buf[2]); + for (j = 0; buf[j] != 0xee && j < 1600; j++) + if (j < 100) printk(" %2.2x", buf[j]); + printk(" j=%d.\n", j); + } + printk(KERN_DEBUG " Rx ring %8.8x: ", (int)tp->rx_ring); + for (i = 0; i < RX_RING_SIZE; i++) + printk(" %8.8x", (unsigned int)tp->rx_ring[i].status); + printk("\n" KERN_DEBUG " Tx ring %8.8x: ", (int)tp->tx_ring); + for (i = 0; i < TX_RING_SIZE; i++) + printk(" %8.8x", (unsigned int)tp->tx_ring[i].status); + printk("\n"); + } +#endif + + /* Stop and restart the chip's Tx processes . */ +#ifdef CONFIG_NET_HW_FLOWCONTROL + if (tp->fc_bit && test_bit(tp->fc_bit,&netdev_fc_xoff)) + printk("BUG tx_timeout restarting rx when fc on\n"); +#endif + tulip_restart_rxtx(tp); + /* Trigger an immediate transmit demand. */ + outl(0, ioaddr + CSR1); + + tp->stats.tx_errors++; + +out: + spin_unlock_irqrestore (&tp->lock, flags); + dev->trans_start = jiffies; + netif_wake_queue (dev); +} + + +/* Initialize the Rx and Tx rings, along with various 'dev' bits. */ +static void tulip_init_ring(struct net_device *dev) +{ + struct tulip_private *tp = (struct tulip_private *)dev->priv; + int i; + + tp->susp_rx = 0; + tp->ttimer = 0; + tp->nir = 0; + + for (i = 0; i < RX_RING_SIZE; i++) { + tp->rx_ring[i].status = 0x00000000; + tp->rx_ring[i].length = cpu_to_le32(PKT_BUF_SZ); + tp->rx_ring[i].buffer2 = cpu_to_le32(tp->rx_ring_dma + sizeof(struct tulip_rx_desc) * (i + 1)); + tp->rx_buffers[i].skb = NULL; + tp->rx_buffers[i].mapping = 0; + } + /* Mark the last entry as wrapping the ring. */ + tp->rx_ring[i-1].length = cpu_to_le32(PKT_BUF_SZ | DESC_RING_WRAP); + tp->rx_ring[i-1].buffer2 = cpu_to_le32(tp->rx_ring_dma); + + for (i = 0; i < RX_RING_SIZE; i++) { + dma_addr_t mapping; + + /* Note the receive buffer must be longword aligned. + dev_alloc_skb() provides 16 byte alignment. But do *not* + use skb_reserve() to align the IP header! */ + struct sk_buff *skb = dev_alloc_skb(PKT_BUF_SZ); + tp->rx_buffers[i].skb = skb; + if (skb == NULL) + break; + mapping = pci_map_single(tp->pdev, skb->tail, + PKT_BUF_SZ, PCI_DMA_FROMDEVICE); + tp->rx_buffers[i].mapping = mapping; + skb->dev = dev; /* Mark as being used by this device. */ + tp->rx_ring[i].status = cpu_to_le32(DescOwned); /* Owned by Tulip chip */ + tp->rx_ring[i].buffer1 = cpu_to_le32(mapping); + } + tp->dirty_rx = (unsigned int)(i - RX_RING_SIZE); + + /* The Tx buffer descriptor is filled in as needed, but we + do need to clear the ownership bit. */ + for (i = 0; i < TX_RING_SIZE; i++) { + tp->tx_buffers[i].skb = NULL; + tp->tx_buffers[i].mapping = 0; + tp->tx_ring[i].status = 0x00000000; + tp->tx_ring[i].buffer2 = cpu_to_le32(tp->tx_ring_dma + sizeof(struct tulip_tx_desc) * (i + 1)); + } + tp->tx_ring[i-1].buffer2 = cpu_to_le32(tp->tx_ring_dma); +} + +static int +tulip_start_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct tulip_private *tp = (struct tulip_private *)dev->priv; + int entry; + u32 flag; + dma_addr_t mapping; + + spin_lock_irq(&tp->lock); + + /* Calculate the next Tx descriptor entry. */ + entry = tp->cur_tx % TX_RING_SIZE; + + tp->tx_buffers[entry].skb = skb; + mapping = pci_map_single(tp->pdev, skb->data, + skb->len, PCI_DMA_TODEVICE); + tp->tx_buffers[entry].mapping = mapping; + tp->tx_ring[entry].buffer1 = cpu_to_le32(mapping); + + if (tp->cur_tx - tp->dirty_tx < TX_RING_SIZE/2) {/* Typical path */ + flag = 0x60000000; /* No interrupt */ + } else if (tp->cur_tx - tp->dirty_tx == TX_RING_SIZE/2) { + flag = 0xe0000000; /* Tx-done intr. */ + } else if (tp->cur_tx - tp->dirty_tx < TX_RING_SIZE - 2) { + flag = 0x60000000; /* No Tx-done intr. */ + } else { /* Leave room for set_rx_mode() to fill entries. */ + flag = 0xe0000000; /* Tx-done intr. */ + netif_stop_queue(dev); + } + if (entry == TX_RING_SIZE-1) + flag = 0xe0000000 | DESC_RING_WRAP; + + tp->tx_ring[entry].length = cpu_to_le32(skb->len | flag); + /* if we were using Transmit Automatic Polling, we would need a + * wmb() here. */ + tp->tx_ring[entry].status = cpu_to_le32(DescOwned); + wmb(); + + tp->cur_tx++; + + /* Trigger an immediate transmit demand. */ + outl(0, dev->base_addr + CSR1); + + spin_unlock_irq(&tp->lock); + + dev->trans_start = jiffies; + + return 0; +} + +static void tulip_clean_tx_ring(struct tulip_private *tp) +{ + unsigned int dirty_tx; + + for (dirty_tx = tp->dirty_tx ; tp->cur_tx - dirty_tx > 0; + dirty_tx++) { + int entry = dirty_tx % TX_RING_SIZE; + int status = le32_to_cpu(tp->tx_ring[entry].status); + + if (status < 0) { + tp->stats.tx_errors++; /* It wasn't Txed */ + tp->tx_ring[entry].status = 0; + } + + /* Check for Tx filter setup frames. */ + if (tp->tx_buffers[entry].skb == NULL) { + /* test because dummy frames not mapped */ + if (tp->tx_buffers[entry].mapping) + pci_unmap_single(tp->pdev, + tp->tx_buffers[entry].mapping, + sizeof(tp->setup_frame), + PCI_DMA_TODEVICE); + continue; + } + + pci_unmap_single(tp->pdev, tp->tx_buffers[entry].mapping, + tp->tx_buffers[entry].skb->len, + PCI_DMA_TODEVICE); + + /* Free the original skb. */ + dev_kfree_skb_irq(tp->tx_buffers[entry].skb); + tp->tx_buffers[entry].skb = NULL; + tp->tx_buffers[entry].mapping = 0; + } +} + +static void tulip_down (struct net_device *dev) +{ + long ioaddr = dev->base_addr; + struct tulip_private *tp = (struct tulip_private *) dev->priv; + unsigned long flags; + + del_timer_sync (&tp->timer); + + spin_lock_irqsave (&tp->lock, flags); + + /* Disable interrupts by clearing the interrupt mask. */ + outl (0x00000000, ioaddr + CSR7); + + /* Stop the Tx and Rx processes. */ + tulip_stop_rxtx(tp); + + /* prepare receive buffers */ + tulip_refill_rx(dev); + + /* release any unconsumed transmit buffers */ + tulip_clean_tx_ring(tp); + + /* 21040 -- Leave the card in 10baseT state. */ + if (tp->chip_id == DC21040) + outl (0x00000004, ioaddr + CSR13); + + if (inl (ioaddr + CSR6) != 0xffffffff) + tp->stats.rx_missed_errors += inl (ioaddr + CSR8) & 0xffff; + + spin_unlock_irqrestore (&tp->lock, flags); + + init_timer(&tp->timer); + tp->timer.data = (unsigned long)dev; + tp->timer.function = tulip_tbl[tp->chip_id].media_timer; + + dev->if_port = tp->saved_if_port; + + /* Leave the driver in snooze, not sleep, mode. */ + tulip_set_power_state (tp, 0, 1); +} + + +static int tulip_close (struct net_device *dev) +{ + long ioaddr = dev->base_addr; + struct tulip_private *tp = (struct tulip_private *) dev->priv; + int i; + + netif_stop_queue (dev); + +#ifdef CONFIG_NET_HW_FLOWCONTROL + if (tp->fc_bit) { + int bit = tp->fc_bit; + tp->fc_bit = 0; + netdev_unregister_fc(bit); + } +#endif + tulip_down (dev); + + if (tulip_debug > 1) + printk (KERN_DEBUG "%s: Shutting down ethercard, status was %2.2x.\n", + dev->name, inl (ioaddr + CSR5)); + + free_irq (dev->irq, dev); + + /* Free all the skbuffs in the Rx queue. */ + for (i = 0; i < RX_RING_SIZE; i++) { + struct sk_buff *skb = tp->rx_buffers[i].skb; + dma_addr_t mapping = tp->rx_buffers[i].mapping; + + tp->rx_buffers[i].skb = NULL; + tp->rx_buffers[i].mapping = 0; + + tp->rx_ring[i].status = 0; /* Not owned by Tulip chip. */ + tp->rx_ring[i].length = 0; + tp->rx_ring[i].buffer1 = 0xBADF00D0; /* An invalid address. */ + if (skb) { + pci_unmap_single(tp->pdev, mapping, PKT_BUF_SZ, + PCI_DMA_FROMDEVICE); + dev_kfree_skb (skb); + } + } + for (i = 0; i < TX_RING_SIZE; i++) { + struct sk_buff *skb = tp->tx_buffers[i].skb; + + if (skb != NULL) { + pci_unmap_single(tp->pdev, tp->tx_buffers[i].mapping, + skb->len, PCI_DMA_TODEVICE); + dev_kfree_skb (skb); + } + tp->tx_buffers[i].skb = NULL; + tp->tx_buffers[i].mapping = 0; + } + + MOD_DEC_USE_COUNT; + + return 0; +} + +static struct net_device_stats *tulip_get_stats(struct net_device *dev) +{ + struct tulip_private *tp = (struct tulip_private *)dev->priv; + long ioaddr = dev->base_addr; + + if (netif_running(dev)) { + unsigned long flags; + + spin_lock_irqsave (&tp->lock, flags); + + tp->stats.rx_missed_errors += inl(ioaddr + CSR8) & 0xffff; + + spin_unlock_irqrestore(&tp->lock, flags); + } + + return &tp->stats; +} + + +static int netdev_ethtool_ioctl(struct net_device *dev, void *useraddr) +{ + struct tulip_private *np = dev->priv; + u32 ethcmd; + + if (copy_from_user(ðcmd, useraddr, sizeof(ethcmd))) + return -EFAULT; + + switch (ethcmd) { + case ETHTOOL_GDRVINFO: { + struct ethtool_drvinfo info = {ETHTOOL_GDRVINFO}; + strcpy(info.driver, DRV_NAME); + strcpy(info.version, DRV_VERSION); + strcpy(info.bus_info, np->pdev->slot_name); + if (copy_to_user(useraddr, &info, sizeof(info))) + return -EFAULT; + return 0; + } + + } + + return -EOPNOTSUPP; +} + +/* Provide ioctl() calls to examine the MII xcvr state. */ +static int private_ioctl (struct net_device *dev, struct ifreq *rq, int cmd) +{ + struct tulip_private *tp = dev->priv; + long ioaddr = dev->base_addr; + struct mii_ioctl_data *data = (struct mii_ioctl_data *) & rq->ifr_data; + const unsigned int phy_idx = 0; + int phy = tp->phys[phy_idx] & 0x1f; + unsigned int regnum = data->reg_num; + + switch (cmd) { + case SIOCETHTOOL: + return netdev_ethtool_ioctl(dev, (void *) rq->ifr_data); + + case SIOCGMIIPHY: /* Get address of MII PHY in use. */ + case SIOCDEVPRIVATE: /* for binary compat, remove in 2.5 */ + if (tp->mii_cnt) + data->phy_id = phy; + else if (tp->flags & HAS_NWAY) + data->phy_id = 32; + else if (tp->chip_id == COMET) + data->phy_id = 1; + else + return -ENODEV; + + case SIOCGMIIREG: /* Read MII PHY register. */ + case SIOCDEVPRIVATE+1: /* for binary compat, remove in 2.5 */ + if (data->phy_id == 32 && (tp->flags & HAS_NWAY)) { + int csr12 = inl (ioaddr + CSR12); + int csr14 = inl (ioaddr + CSR14); + switch (regnum) { + case 0: + if (((csr14<<5) & 0x1000) || + (dev->if_port == 5 && tp->nwayset)) + data->val_out = 0x1000; + else + data->val_out = (tulip_media_cap[dev->if_port]&MediaIs100 ? 0x2000 : 0) + | (tulip_media_cap[dev->if_port]&MediaIsFD ? 0x0100 : 0); + break; + case 1: + data->val_out = + 0x1848 + + ((csr12&0x7000) == 0x5000 ? 0x20 : 0) + + ((csr12&0x06) == 6 ? 0 : 4); + if (tp->chip_id != DC21041) + data->val_out |= 0x6048; + break; + case 4: + /* Advertised value, bogus 10baseTx-FD value from CSR6. */ + data->val_out = + ((inl(ioaddr + CSR6) >> 3) & 0x0040) + + ((csr14 >> 1) & 0x20) + 1; + if (tp->chip_id != DC21041) + data->val_out |= ((csr14 >> 9) & 0x03C0); + break; + case 5: data->val_out = tp->lpar; break; + default: data->val_out = 0; break; + } + } else { + data->val_out = tulip_mdio_read (dev, data->phy_id & 0x1f, regnum); + } + return 0; + + case SIOCSMIIREG: /* Write MII PHY register. */ + case SIOCDEVPRIVATE+2: /* for binary compat, remove in 2.5 */ + if (!capable (CAP_NET_ADMIN)) + return -EPERM; + if (regnum & ~0x1f) + return -EINVAL; + if (data->phy_id == phy) { + u16 value = data->val_in; + switch (regnum) { + case 0: /* Check for autonegotiation on or reset. */ + tp->full_duplex_lock = (value & 0x9000) ? 0 : 1; + if (tp->full_duplex_lock) + tp->full_duplex = (value & 0x0100) ? 1 : 0; + break; + case 4: + tp->advertising[phy_idx] = + tp->mii_advertise = data->val_in; + break; + } + } + if (data->phy_id == 32 && (tp->flags & HAS_NWAY)) { + u16 value = data->val_in; + if (regnum == 0) { + if ((value & 0x1200) == 0x1200) { + if (tp->chip_id == PNIC2) { + pnic2_start_nway (dev); + } else { + t21142_start_nway (dev); + } + } + } else if (regnum == 4) + tp->sym_advertise = value; + } else { + tulip_mdio_write (dev, data->phy_id & 0x1f, regnum, data->val_in); + } + return 0; + default: + return -EOPNOTSUPP; + } + + return -EOPNOTSUPP; +} + + +/* Set or clear the multicast filter for this adaptor. + Note that we only use exclusion around actually queueing the + new frame, not around filling tp->setup_frame. This is non-deterministic + when re-entered but still correct. */ + +/* The little-endian AUTODIN32 ethernet CRC calculation. + N.B. Do not use for bulk data, use a table-based routine instead. + This is common code and should be moved to net/core/crc.c */ +static unsigned const ethernet_polynomial_le = 0xedb88320U; +static inline u32 ether_crc_le(int length, unsigned char *data) +{ + u32 crc = 0xffffffff; /* Initial value. */ + while(--length >= 0) { + unsigned char current_octet = *data++; + int bit; + for (bit = 8; --bit >= 0; current_octet >>= 1) { + if ((crc ^ current_octet) & 1) { + crc >>= 1; + crc ^= ethernet_polynomial_le; + } else + crc >>= 1; + } + } + return crc; +} +static unsigned const ethernet_polynomial = 0x04c11db7U; +static inline u32 ether_crc(int length, unsigned char *data) +{ + int crc = -1; + + while(--length >= 0) { + unsigned char current_octet = *data++; + int bit; + for (bit = 0; bit < 8; bit++, current_octet >>= 1) + crc = (crc << 1) ^ + ((crc < 0) ^ (current_octet & 1) ? ethernet_polynomial : 0); + } + return crc; +} + +#undef set_bit_le +#define set_bit_le(i,p) do { ((char *)(p))[(i)/8] |= (1<<((i)%8)); } while(0) + +static void build_setup_frame_hash(u16 *setup_frm, struct net_device *dev) +{ + struct tulip_private *tp = (struct tulip_private *)dev->priv; + u16 hash_table[32]; + struct dev_mc_list *mclist; + int i; + u16 *eaddrs; + + memset(hash_table, 0, sizeof(hash_table)); + set_bit_le(255, hash_table); /* Broadcast entry */ + /* This should work on big-endian machines as well. */ + for (i = 0, mclist = dev->mc_list; mclist && i < dev->mc_count; + i++, mclist = mclist->next) { + int index = ether_crc_le(ETH_ALEN, mclist->dmi_addr) & 0x1ff; + + set_bit_le(index, hash_table); + + for (i = 0; i < 32; i++) { + *setup_frm++ = hash_table[i]; + *setup_frm++ = hash_table[i]; + } + setup_frm = &tp->setup_frame[13*6]; + } + + /* Fill the final entry with our physical address. */ + eaddrs = (u16 *)dev->dev_addr; + *setup_frm++ = eaddrs[0]; *setup_frm++ = eaddrs[0]; + *setup_frm++ = eaddrs[1]; *setup_frm++ = eaddrs[1]; + *setup_frm++ = eaddrs[2]; *setup_frm++ = eaddrs[2]; +} + +static void build_setup_frame_perfect(u16 *setup_frm, struct net_device *dev) +{ + struct tulip_private *tp = (struct tulip_private *)dev->priv; + struct dev_mc_list *mclist; + int i; + u16 *eaddrs; + + /* We have <= 14 addresses so we can use the wonderful + 16 address perfect filtering of the Tulip. */ + for (i = 0, mclist = dev->mc_list; i < dev->mc_count; + i++, mclist = mclist->next) { + eaddrs = (u16 *)mclist->dmi_addr; + *setup_frm++ = *eaddrs; *setup_frm++ = *eaddrs++; + *setup_frm++ = *eaddrs; *setup_frm++ = *eaddrs++; + *setup_frm++ = *eaddrs; *setup_frm++ = *eaddrs++; + } + /* Fill the unused entries with the broadcast address. */ + memset(setup_frm, 0xff, (15-i)*12); + setup_frm = &tp->setup_frame[15*6]; + + /* Fill the final entry with our physical address. */ + eaddrs = (u16 *)dev->dev_addr; + *setup_frm++ = eaddrs[0]; *setup_frm++ = eaddrs[0]; + *setup_frm++ = eaddrs[1]; *setup_frm++ = eaddrs[1]; + *setup_frm++ = eaddrs[2]; *setup_frm++ = eaddrs[2]; +} + + +static void set_rx_mode(struct net_device *dev) +{ + struct tulip_private *tp = (struct tulip_private *)dev->priv; + long ioaddr = dev->base_addr; + int csr6; + + csr6 = inl(ioaddr + CSR6) & ~0x00D5; + + tp->csr6 &= ~0x00D5; + if (dev->flags & IFF_PROMISC) { /* Set promiscuous. */ + tp->csr6 |= AcceptAllMulticast | AcceptAllPhys; + csr6 |= AcceptAllMulticast | AcceptAllPhys; + /* Unconditionally log net taps. */ + printk(KERN_INFO "%s: Promiscuous mode enabled.\n", dev->name); + } else if ((dev->mc_count > 1000) || (dev->flags & IFF_ALLMULTI)) { + /* Too many to filter well -- accept all multicasts. */ + tp->csr6 |= AcceptAllMulticast; + csr6 |= AcceptAllMulticast; + } else if (tp->flags & MC_HASH_ONLY) { + /* Some work-alikes have only a 64-entry hash filter table. */ + /* Should verify correctness on big-endian/__powerpc__ */ + struct dev_mc_list *mclist; + int i; + if (dev->mc_count > 64) { /* Arbitrary non-effective limit. */ + tp->csr6 |= AcceptAllMulticast; + csr6 |= AcceptAllMulticast; + } else { + u32 mc_filter[2] = {0, 0}; /* Multicast hash filter */ + int filterbit; + for (i = 0, mclist = dev->mc_list; mclist && i < dev->mc_count; + i++, mclist = mclist->next) { + if (tp->flags & COMET_MAC_ADDR) + filterbit = ether_crc_le(ETH_ALEN, mclist->dmi_addr); + else + filterbit = ether_crc(ETH_ALEN, mclist->dmi_addr) >> 26; + filterbit &= 0x3f; + mc_filter[filterbit >> 5] |= cpu_to_le32(1 << (filterbit & 31)); + if (tulip_debug > 2) { + printk(KERN_INFO "%s: Added filter for %2.2x:%2.2x:%2.2x:" + "%2.2x:%2.2x:%2.2x %8.8x bit %d.\n", dev->name, + mclist->dmi_addr[0], mclist->dmi_addr[1], + mclist->dmi_addr[2], mclist->dmi_addr[3], + mclist->dmi_addr[4], mclist->dmi_addr[5], + ether_crc(ETH_ALEN, mclist->dmi_addr), filterbit); + } + } + if (mc_filter[0] == tp->mc_filter[0] && + mc_filter[1] == tp->mc_filter[1]) + ; /* No change. */ + else if (tp->flags & IS_ASIX) { + outl(2, ioaddr + CSR13); + outl(mc_filter[0], ioaddr + CSR14); + outl(3, ioaddr + CSR13); + outl(mc_filter[1], ioaddr + CSR14); + } else if (tp->flags & COMET_MAC_ADDR) { + outl(mc_filter[0], ioaddr + 0xAC); + outl(mc_filter[1], ioaddr + 0xB0); + } + tp->mc_filter[0] = mc_filter[0]; + tp->mc_filter[1] = mc_filter[1]; + } + } else { + unsigned long flags; + + /* Note that only the low-address shortword of setup_frame is valid! + The values are doubled for big-endian architectures. */ + if (dev->mc_count > 14) { /* Must use a multicast hash table. */ + build_setup_frame_hash(tp->setup_frame, dev); + } else { + build_setup_frame_perfect(tp->setup_frame, dev); + } + + spin_lock_irqsave(&tp->lock, flags); + + if (tp->cur_tx - tp->dirty_tx > TX_RING_SIZE - 2) { + /* Same setup recently queued, we need not add it. */ + } else { + u32 tx_flags = 0x08000000 | 192; + unsigned int entry; + int dummy = -1; + + /* Now add this frame to the Tx list. */ + + entry = tp->cur_tx++ % TX_RING_SIZE; + + if (entry != 0) { + /* Avoid a chip errata by prefixing a dummy entry. */ + tp->tx_buffers[entry].skb = NULL; + tp->tx_buffers[entry].mapping = 0; + tp->tx_ring[entry].length = + (entry == TX_RING_SIZE-1) ? cpu_to_le32(DESC_RING_WRAP) : 0; + tp->tx_ring[entry].buffer1 = 0; + /* Must set DescOwned later to avoid race with chip */ + dummy = entry; + entry = tp->cur_tx++ % TX_RING_SIZE; + } + + tp->tx_buffers[entry].skb = NULL; + tp->tx_buffers[entry].mapping = + pci_map_single(tp->pdev, tp->setup_frame, + sizeof(tp->setup_frame), + PCI_DMA_TODEVICE); + /* Put the setup frame on the Tx list. */ + if (entry == TX_RING_SIZE-1) + tx_flags |= DESC_RING_WRAP; /* Wrap ring. */ + tp->tx_ring[entry].length = cpu_to_le32(tx_flags); + tp->tx_ring[entry].buffer1 = + cpu_to_le32(tp->tx_buffers[entry].mapping); + tp->tx_ring[entry].status = cpu_to_le32(DescOwned); + if (dummy >= 0) + tp->tx_ring[dummy].status = cpu_to_le32(DescOwned); + if (tp->cur_tx - tp->dirty_tx >= TX_RING_SIZE - 2) + netif_stop_queue(dev); + + /* Trigger an immediate transmit demand. */ + outl(0, ioaddr + CSR1); + } + + spin_unlock_irqrestore(&tp->lock, flags); + } + + outl(csr6, ioaddr + CSR6); +} + +#ifdef CONFIG_TULIP_MWI +static void __devinit tulip_mwi_config (struct pci_dev *pdev, + struct net_device *dev) +{ + struct tulip_private *tp = dev->priv; + u8 cache; + u16 pci_command, new_command; + u32 csr0; + + if (tulip_debug > 3) + printk(KERN_DEBUG "%s: tulip_mwi_config()\n", pdev->slot_name); + + tp->csr0 = csr0 = 0; + + /* check for sane cache line size. from acenic.c. */ + pci_read_config_byte(pdev, PCI_CACHE_LINE_SIZE, &cache); + if ((cache << 2) != SMP_CACHE_BYTES) { + printk(KERN_WARNING "%s: PCI cache line size set incorrectly " + "(%i bytes) by BIOS/FW, correcting to %i\n", + pdev->slot_name, (cache << 2), SMP_CACHE_BYTES); + pci_write_config_byte(pdev, PCI_CACHE_LINE_SIZE, + SMP_CACHE_BYTES >> 2); + udelay(5); + } + + /* read cache line size again, hardware may not have accepted + * our cache line size change + */ + pci_read_config_byte(pdev, PCI_CACHE_LINE_SIZE, &cache); + if (!cache) + goto out; + + /* if we have any cache line size at all, we can do MRM */ + csr0 |= MRM; + + /* ...and barring hardware bugs, MWI */ + if (!(tp->chip_id == DC21143 && tp->revision == 65)) + csr0 |= MWI; + + /* set or disable MWI in the standard PCI command bit. + * Check for the case where mwi is desired but not available + */ + pci_read_config_word(pdev, PCI_COMMAND, &pci_command); + if (csr0 & MWI) new_command = pci_command | PCI_COMMAND_INVALIDATE; + else new_command = pci_command & ~PCI_COMMAND_INVALIDATE; + if (new_command != pci_command) { + pci_write_config_word(pdev, PCI_COMMAND, new_command); + udelay(5); + pci_read_config_word(pdev, PCI_COMMAND, &pci_command); + if ((csr0 & MWI) && (!(pci_command & PCI_COMMAND_INVALIDATE))) + csr0 &= ~MWI; + } + + /* assign per-cacheline-size cache alignment and + * burst length values + */ + switch (cache) { + case 8: + csr0 |= MRL | (1 << CALShift) | (16 << BurstLenShift); + break; + case 16: + csr0 |= MRL | (2 << CALShift) | (16 << BurstLenShift); + break; + case 32: + csr0 |= MRL | (3 << CALShift) | (32 << BurstLenShift); + break; + default: + goto out; + } + + tp->csr0 = csr0; + goto out; + + if (csr0 & MWI) { + pci_command &= ~PCI_COMMAND_INVALIDATE; + pci_write_config_word(pdev, PCI_COMMAND, pci_command); + csr0 &= ~MWI; + } + tp->csr0 = csr0 | (8 << BurstLenShift) | (1 << CALShift); + +out: + if (tulip_debug > 2) + printk(KERN_DEBUG "%s: MWI config cacheline=%d, csr0=%08x\n", + pdev->slot_name, cache, csr0); +} +#endif + +static int __devinit tulip_init_one (struct pci_dev *pdev, + const struct pci_device_id *ent) +{ + struct tulip_private *tp; + /* See note below on the multiport cards. */ + static unsigned char last_phys_addr[6] = {0x00, 'L', 'i', 'n', 'u', 'x'}; + static int last_irq; + static int multiport_cnt; /* For four-port boards w/one EEPROM */ + u8 chip_rev; + int i, irq; + unsigned short sum; + u8 ee_data[EEPROM_SIZE]; + struct net_device *dev; + long ioaddr; + static int board_idx = -1; + int chip_idx = ent->driver_data; + unsigned int t2104x_mode = 0; + unsigned int eeprom_missing = 0; + unsigned int force_csr0 = 0; + +#ifndef MODULE + static int did_version; /* Already printed version info. */ + if (tulip_debug > 0 && did_version++ == 0) + printk (KERN_INFO "%s", version); +#endif + + board_idx++; + + /* + * Lan media wire a tulip chip to a wan interface. Needs a very + * different driver (lmc driver) + */ + + if (pdev->subsystem_vendor == PCI_VENDOR_ID_LMC) { + printk (KERN_ERR PFX "skipping LMC card.\n"); + return -ENODEV; + } + + /* + * Early DM9100's need software CRC and the DMFE driver + */ + + if (pdev->vendor == 0x1282 && pdev->device == 0x9100) + { + u32 dev_rev; + /* Read Chip revision */ + pci_read_config_dword(pdev, PCI_REVISION_ID, &dev_rev); + if(dev_rev < 0x02000030) + { + printk(KERN_ERR PFX "skipping early DM9100 with Crc bug (use dmfe)\n"); + return -ENODEV; + } + } + + /* + * Looks for early PCI chipsets where people report hangs + * without the workarounds being on. + */ + + /* Intel Saturn. Switch to 8 long words burst, 8 long word cache aligned + Aries might need this too. The Saturn errata are not pretty reading but + thankfully its an old 486 chipset. + */ + + if (pci_find_device(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82424, NULL)) { + csr0 = MRL | MRM | (8 << BurstLenShift) | (1 << CALShift); + force_csr0 = 1; + } + /* The dreaded SiS496 486 chipset. Same workaround as above. */ + if (pci_find_device(PCI_VENDOR_ID_SI, PCI_DEVICE_ID_SI_496, NULL)) { + csr0 = MRL | MRM | (8 << BurstLenShift) | (1 << CALShift); + force_csr0 = 1; + } + + /* bugfix: the ASIX must have a burst limit or horrible things happen. */ + if (chip_idx == AX88140) { + if ((csr0 & 0x3f00) == 0) + csr0 |= 0x2000; + } + + /* PNIC doesn't have MWI/MRL/MRM... */ + if (chip_idx == LC82C168) + csr0 &= ~0xfff10000; /* zero reserved bits 31:20, 16 */ + + /* DM9102A has troubles with MRM & clear reserved bits 24:22, 20, 16, 7:1 */ + if (pdev->vendor == 0x1282 && pdev->device == 0x9102) + csr0 &= ~0x01f100ff; + +#if defined(__sparc__) + /* DM9102A needs 32-dword alignment/burst length on sparc - chip bug? */ + if (pdev->vendor == 0x1282 && pdev->device == 0x9102) + csr0 = (csr0 & ~0xff00) | 0xe000; +#endif + + /* + * And back to business + */ + + i = pci_enable_device(pdev); + if (i) { + printk (KERN_ERR PFX + "Cannot enable tulip board #%d, aborting\n", + board_idx); + return i; + } + + ioaddr = pci_resource_start (pdev, 0); + irq = pdev->irq; + + /* alloc_etherdev ensures aligned and zeroed private structures */ + dev = alloc_etherdev (sizeof (*tp)); + if (!dev) { + printk (KERN_ERR PFX "ether device alloc failed, aborting\n"); + return -ENOMEM; + } + + if (pci_resource_len (pdev, 0) < tulip_tbl[chip_idx].io_size) { + printk (KERN_ERR PFX "%s: I/O region (0x%lx@0x%lx) too small, " + "aborting\n", pdev->slot_name, + pci_resource_len (pdev, 0), + pci_resource_start (pdev, 0)); + goto err_out_free_netdev; + } + + /* grab all resources from both PIO and MMIO regions, as we + * don't want anyone else messing around with our hardware */ + if (pci_request_regions (pdev, "tulip")) + goto err_out_free_netdev; + +#ifndef USE_IO_OPS + ioaddr = (unsigned long) ioremap (pci_resource_start (pdev, 1), + tulip_tbl[chip_idx].io_size); + if (!ioaddr) + goto err_out_free_res; +#endif + + pci_read_config_byte (pdev, PCI_REVISION_ID, &chip_rev); + + /* + * initialize private data structure 'tp' + * it is zeroed and aligned in alloc_etherdev + */ + tp = dev->priv; + + tp->rx_ring = pci_alloc_consistent(pdev, + sizeof(struct tulip_rx_desc) * RX_RING_SIZE + + sizeof(struct tulip_tx_desc) * TX_RING_SIZE, + &tp->rx_ring_dma); + if (!tp->rx_ring) + goto err_out_mtable; + tp->tx_ring = (struct tulip_tx_desc *)(tp->rx_ring + RX_RING_SIZE); + tp->tx_ring_dma = tp->rx_ring_dma + sizeof(struct tulip_rx_desc) * RX_RING_SIZE; + + tp->chip_id = chip_idx; + tp->flags = tulip_tbl[chip_idx].flags; + tp->pdev = pdev; + tp->base_addr = ioaddr; + tp->revision = chip_rev; + tp->csr0 = csr0; + spin_lock_init(&tp->lock); + spin_lock_init(&tp->mii_lock); + init_timer(&tp->timer); + tp->timer.data = (unsigned long)dev; + tp->timer.function = tulip_tbl[tp->chip_id].media_timer; + + dev->base_addr = ioaddr; + dev->irq = irq; + +#ifdef CONFIG_TULIP_MWI + if (!force_csr0 && (tp->flags & HAS_PCI_MWI)) + tulip_mwi_config (pdev, dev); +#else + /* MWI is broken for DC21143 rev 65... */ + if (chip_idx == DC21143 && chip_rev == 65) + tp->csr0 &= ~MWI; +#endif + + /* Stop the chip's Tx and Rx processes. */ + tulip_stop_rxtx(tp); + + pci_set_master(pdev); + + /* Clear the missed-packet counter. */ + inl(ioaddr + CSR8); + + if (chip_idx == DC21041) { + if (inl(ioaddr + CSR9) & 0x8000) { + chip_idx = DC21040; + t2104x_mode = 1; + } else { + t2104x_mode = 2; + } + } + + /* The station address ROM is read byte serially. The register must + be polled, waiting for the value to be read bit serially from the + EEPROM. + */ + sum = 0; + if (chip_idx == DC21040) { + outl(0, ioaddr + CSR9); /* Reset the pointer with a dummy write. */ + for (i = 0; i < 6; i++) { + int value, boguscnt = 100000; + do + value = inl(ioaddr + CSR9); + while (value < 0 && --boguscnt > 0); + dev->dev_addr[i] = value; + sum += value & 0xff; + } + } else if (chip_idx == LC82C168) { + for (i = 0; i < 3; i++) { + int value, boguscnt = 100000; + outl(0x600 | i, ioaddr + 0x98); + do + value = inl(ioaddr + CSR9); + while (value < 0 && --boguscnt > 0); + put_unaligned(le16_to_cpu(value), ((u16*)dev->dev_addr) + i); + sum += value & 0xffff; + } + } else if (chip_idx == COMET) { + /* No need to read the EEPROM. */ + put_unaligned(inl(ioaddr + 0xA4), (u32 *)dev->dev_addr); + put_unaligned(inl(ioaddr + 0xA8), (u16 *)(dev->dev_addr + 4)); + for (i = 0; i < 6; i ++) + sum += dev->dev_addr[i]; + } else { + /* A serial EEPROM interface, we read now and sort it out later. */ + int sa_offset = 0; + int ee_addr_size = tulip_read_eeprom(ioaddr, 0xff, 8) & 0x40000 ? 8 : 6; + + for (i = 0; i < sizeof(ee_data)/2; i++) + ((u16 *)ee_data)[i] = + le16_to_cpu(tulip_read_eeprom(ioaddr, i, ee_addr_size)); + + /* DEC now has a specification (see Notes) but early board makers + just put the address in the first EEPROM locations. */ + /* This does memcmp(eedata, eedata+16, 8) */ + for (i = 0; i < 8; i ++) + if (ee_data[i] != ee_data[16+i]) + sa_offset = 20; + if (ee_data[0] == 0xff && ee_data[1] == 0xff && ee_data[2] == 0) { + sa_offset = 2; /* Grrr, damn Matrox boards. */ + multiport_cnt = 4; + } +#ifdef CONFIG_DDB5476 + if ((pdev->bus->number == 0) && (PCI_SLOT(pdev->devfn) == 6)) { + /* DDB5476 MAC address in first EEPROM locations. */ + sa_offset = 0; + /* No media table either */ + tp->flags &= ~HAS_MEDIA_TABLE; + } +#endif +#ifdef CONFIG_DDB5477 + if ((pdev->bus->number == 0) && (PCI_SLOT(pdev->devfn) == 4)) { + /* DDB5477 MAC address in first EEPROM locations. */ + sa_offset = 0; + /* No media table either */ + tp->flags &= ~HAS_MEDIA_TABLE; + } +#endif + for (i = 0; i < 6; i ++) { + dev->dev_addr[i] = ee_data[i + sa_offset]; + sum += ee_data[i + sa_offset]; + } + } + /* Lite-On boards have the address byte-swapped. */ + if ((dev->dev_addr[0] == 0xA0 || dev->dev_addr[0] == 0xC0) + && dev->dev_addr[1] == 0x00) + for (i = 0; i < 6; i+=2) { + char tmp = dev->dev_addr[i]; + dev->dev_addr[i] = dev->dev_addr[i+1]; + dev->dev_addr[i+1] = tmp; + } + /* On the Zynx 315 Etherarray and other multiport boards only the + first Tulip has an EEPROM. + On Sparc systems the mac address is held in the OBP property + "local-mac-address". + The addresses of the subsequent ports are derived from the first. + Many PCI BIOSes also incorrectly report the IRQ line, so we correct + that here as well. */ + if (sum == 0 || sum == 6*0xff) { +#if defined(__sparc__) + struct pcidev_cookie *pcp = pdev->sysdata; +#endif + eeprom_missing = 1; + for (i = 0; i < 5; i++) + dev->dev_addr[i] = last_phys_addr[i]; + dev->dev_addr[i] = last_phys_addr[i] + 1; +#if defined(__sparc__) + if ((pcp != NULL) && prom_getproplen(pcp->prom_node, + "local-mac-address") == 6) { + prom_getproperty(pcp->prom_node, "local-mac-address", + dev->dev_addr, 6); + } +#endif +#if defined(__i386__) /* Patch up x86 BIOS bug. */ + if (last_irq) + irq = last_irq; +#endif + } + + for (i = 0; i < 6; i++) + last_phys_addr[i] = dev->dev_addr[i]; + last_irq = irq; + + /* The lower four bits are the media type. */ + if (board_idx >= 0 && board_idx < MAX_UNITS) { + if (options[board_idx] & MEDIA_MASK) + tp->default_port = options[board_idx] & MEDIA_MASK; + if ((options[board_idx] & FullDuplex) || full_duplex[board_idx] > 0) + tp->full_duplex = 1; + if (mtu[board_idx] > 0) + dev->mtu = mtu[board_idx]; + } + if (dev->mem_start & MEDIA_MASK) + tp->default_port = dev->mem_start & MEDIA_MASK; + if (tp->default_port) { + printk(KERN_INFO "tulip%d: Transceiver selection forced to %s.\n", + board_idx, medianame[tp->default_port & MEDIA_MASK]); + tp->medialock = 1; + if (tulip_media_cap[tp->default_port] & MediaAlwaysFD) + tp->full_duplex = 1; + } + if (tp->full_duplex) + tp->full_duplex_lock = 1; + + if (tulip_media_cap[tp->default_port] & MediaIsMII) { + u16 media2advert[] = { 0x20, 0x40, 0x03e0, 0x60, 0x80, 0x100, 0x200 }; + tp->mii_advertise = media2advert[tp->default_port - 9]; + tp->mii_advertise |= (tp->flags & HAS_8023X); /* Matching bits! */ + } + + if (tp->flags & HAS_MEDIA_TABLE) { + memcpy(tp->eeprom, ee_data, sizeof(tp->eeprom)); + + sprintf(dev->name, "tulip%d", board_idx); /* hack */ + tulip_parse_eeprom(dev); + strcpy(dev->name, "eth%d"); /* un-hack */ + } + + if ((tp->flags & ALWAYS_CHECK_MII) || + (tp->mtable && tp->mtable->has_mii) || + ( ! tp->mtable && (tp->flags & HAS_MII))) { + if (tp->mtable && tp->mtable->has_mii) { + for (i = 0; i < tp->mtable->leafcount; i++) + if (tp->mtable->mleaf[i].media == 11) { + tp->cur_index = i; + tp->saved_if_port = dev->if_port; + tulip_select_media(dev, 2); + dev->if_port = tp->saved_if_port; + break; + } + } + + /* Find the connected MII xcvrs. + Doing this in open() would allow detecting external xcvrs + later, but takes much time. */ + tulip_find_mii (dev, board_idx); + } + + /* The Tulip-specific entries in the device structure. */ + dev->open = tulip_open; + dev->hard_start_xmit = tulip_start_xmit; + dev->tx_timeout = tulip_tx_timeout; + dev->watchdog_timeo = TX_TIMEOUT; + dev->stop = tulip_close; + dev->get_stats = tulip_get_stats; + dev->do_ioctl = private_ioctl; + dev->set_multicast_list = set_rx_mode; + + if (register_netdev(dev)) + goto err_out_free_ring; + + printk(KERN_INFO "%s: %s rev %d at %#3lx,", + dev->name, tulip_tbl[chip_idx].chip_name, chip_rev, ioaddr); + pci_set_drvdata(pdev, dev); + + if (t2104x_mode == 1) + printk(" 21040 compatible mode,"); + else if (t2104x_mode == 2) + printk(" 21041 mode,"); + if (eeprom_missing) + printk(" EEPROM not present,"); + for (i = 0; i < 6; i++) + printk("%c%2.2X", i ? ':' : ' ', dev->dev_addr[i]); + printk(", IRQ %d.\n", irq); + + if (tp->chip_id == PNIC2) + tp->link_change = pnic2_lnk_change; + else if ((tp->flags & HAS_NWAY) || tp->chip_id == DC21041) + tp->link_change = t21142_lnk_change; + else if (tp->flags & HAS_PNICNWAY) + tp->link_change = pnic_lnk_change; + + /* Reset the xcvr interface and turn on heartbeat. */ + switch (chip_idx) { + case DC21041: + if (tp->sym_advertise == 0) + tp->sym_advertise = 0x0061; + outl(0x00000000, ioaddr + CSR13); + outl(0xFFFFFFFF, ioaddr + CSR14); + outl(0x00000008, ioaddr + CSR15); /* Listen on AUI also. */ + outl(inl(ioaddr + CSR6) | csr6_fd, ioaddr + CSR6); + outl(0x0000EF01, ioaddr + CSR13); + break; + case DC21040: + outl(0x00000000, ioaddr + CSR13); + outl(0x00000004, ioaddr + CSR13); + break; + case DC21140: + case DM910X: + default: + if (tp->mtable) + outl(tp->mtable->csr12dir | 0x100, ioaddr + CSR12); + break; + case DC21142: + if (tp->mii_cnt || tulip_media_cap[dev->if_port] & MediaIsMII) { + outl(csr6_mask_defstate, ioaddr + CSR6); + outl(0x0000, ioaddr + CSR13); + outl(0x0000, ioaddr + CSR14); + outl(csr6_mask_hdcap, ioaddr + CSR6); + } else + t21142_start_nway(dev); + break; + case PNIC2: + /* just do a reset for sanity sake */ + outl(0x0000, ioaddr + CSR13); + outl(0x0000, ioaddr + CSR14); + break; + case LC82C168: + if ( ! tp->mii_cnt) { + tp->nway = 1; + tp->nwayset = 0; + outl(csr6_ttm | csr6_ca, ioaddr + CSR6); + outl(0x30, ioaddr + CSR12); + outl(0x0001F078, ioaddr + CSR6); + outl(0x0201F078, ioaddr + CSR6); /* Turn on autonegotiation. */ + } + break; + case MX98713: + case COMPEX9881: + outl(0x00000000, ioaddr + CSR6); + outl(0x000711C0, ioaddr + CSR14); /* Turn on NWay. */ + outl(0x00000001, ioaddr + CSR13); + break; + case MX98715: + case MX98725: + outl(0x01a80000, ioaddr + CSR6); + outl(0xFFFFFFFF, ioaddr + CSR14); + outl(0x00001000, ioaddr + CSR12); + break; + case COMET: + /* No initialization necessary. */ + break; + } + + /* put the chip in snooze mode until opened */ + tulip_set_power_state (tp, 0, 1); + + return 0; + +err_out_free_ring: + pci_free_consistent (pdev, + sizeof (struct tulip_rx_desc) * RX_RING_SIZE + + sizeof (struct tulip_tx_desc) * TX_RING_SIZE, + tp->rx_ring, tp->rx_ring_dma); + +err_out_mtable: + if (tp->mtable) + kfree (tp->mtable); +#ifndef USE_IO_OPS + iounmap((void *)ioaddr); + +err_out_free_res: +#endif + pci_release_regions (pdev); + +err_out_free_netdev: + kfree (dev); + return -ENODEV; +} + + +#ifdef CONFIG_PM + +static int tulip_suspend (struct pci_dev *pdev, u32 state) +{ + struct net_device *dev = pci_get_drvdata(pdev); + + if (dev && netif_running (dev) && netif_device_present (dev)) { + netif_device_detach (dev); + tulip_down (dev); + /* pci_power_off(pdev, -1); */ + } + return 0; +} + + +static int tulip_resume(struct pci_dev *pdev) +{ + struct net_device *dev = pci_get_drvdata(pdev); + + if (dev && netif_running (dev) && !netif_device_present (dev)) { +#if 1 + pci_enable_device (pdev); +#endif + /* pci_power_on(pdev); */ + tulip_up (dev); + netif_device_attach (dev); + } + return 0; +} + +#endif /* CONFIG_PM */ + + +static void __devexit tulip_remove_one (struct pci_dev *pdev) +{ + struct net_device *dev = pci_get_drvdata (pdev); + struct tulip_private *tp; + + if (!dev) + return; + + tp = dev->priv; + pci_free_consistent (pdev, + sizeof (struct tulip_rx_desc) * RX_RING_SIZE + + sizeof (struct tulip_tx_desc) * TX_RING_SIZE, + tp->rx_ring, tp->rx_ring_dma); + unregister_netdev (dev); + if (tp->mtable) + kfree (tp->mtable); +#ifndef USE_IO_OPS + iounmap((void *)dev->base_addr); +#endif + kfree (dev); + pci_release_regions (pdev); + pci_set_drvdata (pdev, NULL); + + /* pci_power_off (pdev, -1); */ +} + + +static struct pci_driver tulip_driver = { + name: DRV_NAME, + id_table: tulip_pci_tbl, + probe: tulip_init_one, + remove: __devexit_p(tulip_remove_one), +#ifdef CONFIG_PM + suspend: tulip_suspend, + resume: tulip_resume, +#endif /* CONFIG_PM */ +}; + + +static int __init tulip_init (void) +{ +#ifdef MODULE + printk (KERN_INFO "%s", version); +#endif + + /* copy module parms into globals */ + tulip_rx_copybreak = rx_copybreak; + tulip_max_interrupt_work = max_interrupt_work; + + /* probe for and init boards */ + return pci_module_init (&tulip_driver); +} + + +static void __exit tulip_cleanup (void) +{ + pci_unregister_driver (&tulip_driver); +} + + +module_init(tulip_init); +module_exit(tulip_cleanup); diff --git a/xen-2.4.16/drivers/pci/Makefile b/xen-2.4.16/drivers/pci/Makefile new file mode 100644 index 0000000000..36972aba4d --- /dev/null +++ b/xen-2.4.16/drivers/pci/Makefile @@ -0,0 +1,43 @@ +# +# Makefile for the PCI bus specific drivers. +# + +include $(BASEDIR)/Rules.mk + +OBJS := pci.o quirks.o compat.o names.o setup-res.o + +#obj-$(CONFIG_PCI) += pci.o quirks.o compat.o names.o +#obj-$(CONFIG_PROC_FS) += proc.o + +#ifndef CONFIG_SPARC64 +#obj-$(CONFIG_PCI) += setup-res.o +#endif + +# +# Some architectures use the generic PCI setup functions +# +#obj-$(CONFIG_ALPHA) += setup-bus.o setup-irq.o +#obj-$(CONFIG_ARM) += setup-bus.o setup-irq.o +#obj-$(CONFIG_PARISC) += setup-bus.o +#obj-$(CONFIG_SUPERH) += setup-bus.o setup-irq.o +#obj-$(CONFIG_ALL_PPC) += setup-bus.o +#obj-$(CONFIG_DDB5476) += setup-bus.o +#obj-$(CONFIG_SGI_IP27) += setup-irq.o + +#ifndef CONFIG_X86 +#obj-y += syscall.o +#endif + +default: $(OBJS) + $(LD) -r -o driver.o $(OBJS) + +clean: + rm -f *.o *~ core gen-devlist classlist.h devlist.h + +names.o: names.c devlist.h classlist.h + +devlist.h classlist.h: pci.ids gen-devlist + ./gen-devlist + */ + +#include +//#include +#include + +int +pcibios_present(void) +{ + return !list_empty(&pci_devices); +} + +int +pcibios_find_class(unsigned int class, unsigned short index, unsigned char *bus, unsigned char *devfn) +{ + const struct pci_dev *dev = NULL; + int cnt = 0; + + while ((dev = pci_find_class(class, dev))) + if (index == cnt++) { + *bus = dev->bus->number; + *devfn = dev->devfn; + return PCIBIOS_SUCCESSFUL; + } + return PCIBIOS_DEVICE_NOT_FOUND; +} + + +int +pcibios_find_device(unsigned short vendor, unsigned short device, unsigned short index, + unsigned char *bus, unsigned char *devfn) +{ + const struct pci_dev *dev = NULL; + int cnt = 0; + + while ((dev = pci_find_device(vendor, device, dev))) + if (index == cnt++) { + *bus = dev->bus->number; + *devfn = dev->devfn; + return PCIBIOS_SUCCESSFUL; + } + return PCIBIOS_DEVICE_NOT_FOUND; +} + +#define PCI_OP(rw,size,type) \ +int pcibios_##rw##_config_##size (unsigned char bus, unsigned char dev_fn, \ + unsigned char where, unsigned type val) \ +{ \ + struct pci_dev *dev = pci_find_slot(bus, dev_fn); \ + if (!dev) return PCIBIOS_DEVICE_NOT_FOUND; \ + return pci_##rw##_config_##size(dev, where, val); \ +} + +PCI_OP(read, byte, char *) +PCI_OP(read, word, short *) +PCI_OP(read, dword, int *) +PCI_OP(write, byte, char) +PCI_OP(write, word, short) +PCI_OP(write, dword, int) diff --git a/xen-2.4.16/drivers/pci/gen-devlist.c b/xen-2.4.16/drivers/pci/gen-devlist.c new file mode 100644 index 0000000000..7c718080e2 --- /dev/null +++ b/xen-2.4.16/drivers/pci/gen-devlist.c @@ -0,0 +1,125 @@ +/* + * Generate devlist.h and classlist.h from the PCI ID file. + * + * (c) 1999--2000 Martin Mares + */ + +#include +#include + +#define MAX_NAME_SIZE 79 + +static void +pq(FILE *f, const char *c) +{ + while (*c) { + if (*c == '"') + fprintf(f, "\\\""); + else + fputc(*c, f); + c++; + } +} + +int +main(void) +{ + char line[1024], *c, *bra, vend[8]; + int vendors = 0; + int mode = 0; + int lino = 0; + int vendor_len = 0; + FILE *devf, *clsf; + + devf = fopen("devlist.h", "w"); + clsf = fopen("classlist.h", "w"); + if (!devf || !clsf) { + fprintf(stderr, "Cannot create output file!\n"); + return 1; + } + + while (fgets(line, sizeof(line)-1, stdin)) { + lino++; + if ((c = strchr(line, '\n'))) + *c = 0; + if (!line[0] || line[0] == '#') + continue; + if (line[1] == ' ') { + if (line[0] == 'C' && strlen(line) > 4 && line[4] == ' ') { + vend[0] = line[2]; + vend[1] = line[3]; + vend[2] = 0; + mode = 2; + } else goto err; + } + else if (line[0] == '\t') { + if (line[1] == '\t') + continue; + switch (mode) { + case 1: + if (strlen(line) > 5 && line[5] == ' ') { + c = line + 5; + while (*c == ' ') + *c++ = 0; + if (vendor_len + strlen(c) + 1 > MAX_NAME_SIZE) { + /* Too long, try cutting off long description */ + bra = strchr(c, '['); + if (bra && bra > c && bra[-1] == ' ') + bra[-1] = 0; + if (vendor_len + strlen(c) + 1 > MAX_NAME_SIZE) { + fprintf(stderr, "Line %d: Device name too long\n", lino); + fprintf(stderr, "%s\n", c); + return 1; + } + } + fprintf(devf, "\tDEVICE(%s,%s,\"", vend, line+1); + pq(devf, c); + fputs("\")\n", devf); + } else goto err; + break; + case 2: + if (strlen(line) > 3 && line[3] == ' ') { + c = line + 3; + while (*c == ' ') + *c++ = 0; + fprintf(clsf, "CLASS(%s%s, \"%s\")\n", vend, line+1, c); + } else goto err; + break; + default: + goto err; + } + } else if (strlen(line) > 4 && line[4] == ' ') { + c = line + 4; + while (*c == ' ') + *c++ = 0; + if (vendors) + fputs("ENDVENDOR()\n\n", devf); + vendors++; + strcpy(vend, line); + vendor_len = strlen(c); + if (vendor_len + 24 > MAX_NAME_SIZE) { + fprintf(stderr, "Line %d: Vendor name too long\n", lino); + return 1; + } + fprintf(devf, "VENDOR(%s,\"", vend); + pq(devf, c); + fputs("\")\n", devf); + mode = 1; + } else { + err: + fprintf(stderr, "Line %d: Syntax error in mode %d: %s\n", lino, mode, line); + return 1; + } + } + fputs("ENDVENDOR()\n\ +\n\ +#undef VENDOR\n\ +#undef DEVICE\n\ +#undef ENDVENDOR\n", devf); + fputs("\n#undef CLASS\n", clsf); + + fclose(devf); + fclose(clsf); + + return 0; +} diff --git a/xen-2.4.16/drivers/pci/names.c b/xen-2.4.16/drivers/pci/names.c new file mode 100644 index 0000000000..90bb37e382 --- /dev/null +++ b/xen-2.4.16/drivers/pci/names.c @@ -0,0 +1,135 @@ +/* + * PCI Class and Device Name Tables + * + * Copyright 1993--1999 Drew Eckhardt, Frederic Potter, + * David Mosberger-Tang, Martin Mares + */ + +#include +#include +//#include +#include +#include + +#ifdef CONFIG_PCI_NAMES + +struct pci_device_info { + unsigned short device; + unsigned short seen; + const char *name; +}; + +struct pci_vendor_info { + unsigned short vendor; + unsigned short nr; + const char *name; + struct pci_device_info *devices; +}; + +/* + * This is ridiculous, but we want the strings in + * the .init section so that they don't take up + * real memory.. Parse the same file multiple times + * to get all the info. + */ +#define VENDOR( vendor, name ) static char __vendorstr_##vendor[] __initdata = name; +#define ENDVENDOR() +#define DEVICE( vendor, device, name ) static char __devicestr_##vendor##device[] __initdata = name; +#include "devlist.h" + + +#define VENDOR( vendor, name ) static struct pci_device_info __devices_##vendor[] __initdata = { +#define ENDVENDOR() }; +#define DEVICE( vendor, device, name ) { 0x##device, 0, __devicestr_##vendor##device }, +#include "devlist.h" + +static struct pci_vendor_info __initdata pci_vendor_list[] = { +#define VENDOR( vendor, name ) { 0x##vendor, sizeof(__devices_##vendor) / sizeof(struct pci_device_info), __vendorstr_##vendor, __devices_##vendor }, +#define ENDVENDOR() +#define DEVICE( vendor, device, name ) +#include "devlist.h" +}; + +#define VENDORS (sizeof(pci_vendor_list)/sizeof(struct pci_vendor_info)) + +void __devinit pci_name_device(struct pci_dev *dev) +{ + const struct pci_vendor_info *vendor_p = pci_vendor_list; + int i = VENDORS; + char *name = dev->name; + + do { + if (vendor_p->vendor == dev->vendor) + goto match_vendor; + vendor_p++; + } while (--i); + + /* Couldn't find either the vendor nor the device */ + sprintf(name, "PCI device %04x:%04x", dev->vendor, dev->device); + return; + + match_vendor: { + struct pci_device_info *device_p = vendor_p->devices; + int i = vendor_p->nr; + + while (i > 0) { + if (device_p->device == dev->device) + goto match_device; + device_p++; + i--; + } + + /* Ok, found the vendor, but unknown device */ + sprintf(name, "PCI device %04x:%04x (%s)", dev->vendor, dev->device, vendor_p->name); + return; + + /* Full match */ + match_device: { + char *n = name + sprintf(name, "%s %s", vendor_p->name, device_p->name); + int nr = device_p->seen + 1; + device_p->seen = nr; + if (nr > 1) + sprintf(n, " (#%d)", nr); + } + } +} + +/* + * Class names. Not in .init section as they are needed in runtime. + */ + +static u16 pci_class_numbers[] = { +#define CLASS(x,y) 0x##x, +#include "classlist.h" +}; + +static char *pci_class_names[] = { +#define CLASS(x,y) y, +#include "classlist.h" +}; + +char * +pci_class_name(u32 class) +{ + int i; + + for(i=0; i + */ + +#include +#include +#include +#include +//#include +#include +//#include +#include +#include +#include +#include +//#include +//#include /* for hotplug_path */ +//#include +#include + +#include +//#include /* isa_dma_bridge_buggy */ + +#undef DEBUG + +#ifdef DEBUG +#define DBG(x...) printk(x) +#else +#define DBG(x...) +#endif + +LIST_HEAD(pci_root_buses); +LIST_HEAD(pci_devices); + +/** + * pci_find_slot - locate PCI device from a given PCI slot + * @bus: number of PCI bus on which desired PCI device resides + * @devfn: encodes number of PCI slot in which the desired PCI + * device resides and the logical device number within that slot + * in case of multi-function devices. + * + * Given a PCI bus and slot/function number, the desired PCI device + * is located in system global list of PCI devices. If the device + * is found, a pointer to its data structure is returned. If no + * device is found, %NULL is returned. + */ +struct pci_dev * +pci_find_slot(unsigned int bus, unsigned int devfn) +{ + struct pci_dev *dev; + + pci_for_each_dev(dev) { + if (dev->bus->number == bus && dev->devfn == devfn) + return dev; + } + return NULL; +} + +/** + * pci_find_subsys - begin or continue searching for a PCI device by vendor/subvendor/device/subdevice id + * @vendor: PCI vendor id to match, or %PCI_ANY_ID to match all vendor ids + * @device: PCI device id to match, or %PCI_ANY_ID to match all device ids + * @ss_vendor: PCI subsystem vendor id to match, or %PCI_ANY_ID to match all vendor ids + * @ss_device: PCI subsystem device id to match, or %PCI_ANY_ID to match all device ids + * @from: Previous PCI device found in search, or %NULL for new search. + * + * Iterates through the list of known PCI devices. If a PCI device is + * found with a matching @vendor, @device, @ss_vendor and @ss_device, a pointer to its + * device structure is returned. Otherwise, %NULL is returned. + * A new search is initiated by passing %NULL to the @from argument. + * Otherwise if @from is not %NULL, searches continue from next device on the global list. + */ +struct pci_dev * +pci_find_subsys(unsigned int vendor, unsigned int device, + unsigned int ss_vendor, unsigned int ss_device, + const struct pci_dev *from) +{ + struct list_head *n = from ? from->global_list.next : pci_devices.next; + + while (n != &pci_devices) { + struct pci_dev *dev = pci_dev_g(n); + if ((vendor == PCI_ANY_ID || dev->vendor == vendor) && + (device == PCI_ANY_ID || dev->device == device) && + (ss_vendor == PCI_ANY_ID || dev->subsystem_vendor == ss_vendor) && + (ss_device == PCI_ANY_ID || dev->subsystem_device == ss_device)) + return dev; + n = n->next; + } + return NULL; +} + + +/** + * pci_find_device - begin or continue searching for a PCI device by vendor/device id + * @vendor: PCI vendor id to match, or %PCI_ANY_ID to match all vendor ids + * @device: PCI device id to match, or %PCI_ANY_ID to match all device ids + * @from: Previous PCI device found in search, or %NULL for new search. + * + * Iterates through the list of known PCI devices. If a PCI device is + * found with a matching @vendor and @device, a pointer to its device structure is + * returned. Otherwise, %NULL is returned. + * A new search is initiated by passing %NULL to the @from argument. + * Otherwise if @from is not %NULL, searches continue from next device on the global list. + */ +struct pci_dev * +pci_find_device(unsigned int vendor, unsigned int device, const struct pci_dev *from) +{ + return pci_find_subsys(vendor, device, PCI_ANY_ID, PCI_ANY_ID, from); +} + + +/** + * pci_find_class - begin or continue searching for a PCI device by class + * @class: search for a PCI device with this class designation + * @from: Previous PCI device found in search, or %NULL for new search. + * + * Iterates through the list of known PCI devices. If a PCI device is + * found with a matching @class, a pointer to its device structure is + * returned. Otherwise, %NULL is returned. + * A new search is initiated by passing %NULL to the @from argument. + * Otherwise if @from is not %NULL, searches continue from next device + * on the global list. + */ +struct pci_dev * +pci_find_class(unsigned int class, const struct pci_dev *from) +{ + struct list_head *n = from ? from->global_list.next : pci_devices.next; + + while (n != &pci_devices) { + struct pci_dev *dev = pci_dev_g(n); + if (dev->class == class) + return dev; + n = n->next; + } + return NULL; +} + +/** + * pci_find_capability - query for devices' capabilities + * @dev: PCI device to query + * @cap: capability code + * + * Tell if a device supports a given PCI capability. + * Returns the address of the requested capability structure within the + * device's PCI configuration space or 0 in case the device does not + * support it. Possible values for @cap: + * + * %PCI_CAP_ID_PM Power Management + * + * %PCI_CAP_ID_AGP Accelerated Graphics Port + * + * %PCI_CAP_ID_VPD Vital Product Data + * + * %PCI_CAP_ID_SLOTID Slot Identification + * + * %PCI_CAP_ID_MSI Message Signalled Interrupts + * + * %PCI_CAP_ID_CHSWP CompactPCI HotSwap + */ +int +pci_find_capability(struct pci_dev *dev, int cap) +{ + u16 status; + u8 pos, id; + int ttl = 48; + + pci_read_config_word(dev, PCI_STATUS, &status); + if (!(status & PCI_STATUS_CAP_LIST)) + return 0; + switch (dev->hdr_type) { + case PCI_HEADER_TYPE_NORMAL: + case PCI_HEADER_TYPE_BRIDGE: + pci_read_config_byte(dev, PCI_CAPABILITY_LIST, &pos); + break; + case PCI_HEADER_TYPE_CARDBUS: + pci_read_config_byte(dev, PCI_CB_CAPABILITY_LIST, &pos); + break; + default: + return 0; + } + while (ttl-- && pos >= 0x40) { + pos &= ~3; + pci_read_config_byte(dev, pos + PCI_CAP_LIST_ID, &id); + if (id == 0xff) + break; + if (id == cap) + return pos; + pci_read_config_byte(dev, pos + PCI_CAP_LIST_NEXT, &pos); + } + return 0; +} + + +/** + * pci_find_parent_resource - return resource region of parent bus of given region + * @dev: PCI device structure contains resources to be searched + * @res: child resource record for which parent is sought + * + * For given resource region of given device, return the resource + * region of parent bus the given region is contained in or where + * it should be allocated from. + */ +struct resource * +pci_find_parent_resource(const struct pci_dev *dev, struct resource *res) +{ + const struct pci_bus *bus = dev->bus; + int i; + struct resource *best = NULL; + + for(i=0; i<4; i++) { + struct resource *r = bus->resource[i]; + if (!r) + continue; + if (res->start && !(res->start >= r->start && res->end <= r->end)) + continue; /* Not contained */ + if ((res->flags ^ r->flags) & (IORESOURCE_IO | IORESOURCE_MEM)) + continue; /* Wrong type */ + if (!((res->flags ^ r->flags) & IORESOURCE_PREFETCH)) + return r; /* Exact match */ + if ((res->flags & IORESOURCE_PREFETCH) && !(r->flags & IORESOURCE_PREFETCH)) + best = r; /* Approximating prefetchable by non-prefetchable */ + } + return best; +} + +/** + * pci_set_power_state - Set the power state of a PCI device + * @dev: PCI device to be suspended + * @state: Power state we're entering + * + * Transition a device to a new power state, using the Power Management + * Capabilities in the device's config space. + * + * RETURN VALUE: + * -EINVAL if trying to enter a lower state than we're already in. + * 0 if we're already in the requested state. + * -EIO if device does not support PCI PM. + * 0 if we can successfully change the power state. + */ + +int +pci_set_power_state(struct pci_dev *dev, int state) +{ + int pm; + u16 pmcsr; + + /* bound the state we're entering */ + if (state > 3) state = 3; + + /* Validate current state: + * Can enter D0 from any state, but if we can only go deeper + * to sleep if we're already in a low power state + */ + if (state > 0 && dev->current_state > state) + return -EINVAL; + else if (dev->current_state == state) + return 0; /* we're already there */ + + /* find PCI PM capability in list */ + pm = pci_find_capability(dev, PCI_CAP_ID_PM); + + /* abort if the device doesn't support PM capabilities */ + if (!pm) return -EIO; + + /* check if this device supports the desired state */ + if (state == 1 || state == 2) { + u16 pmc; + pci_read_config_word(dev,pm + PCI_PM_PMC,&pmc); + if (state == 1 && !(pmc & PCI_PM_CAP_D1)) return -EIO; + else if (state == 2 && !(pmc & PCI_PM_CAP_D2)) return -EIO; + } + + /* If we're in D3, force entire word to 0. + * This doesn't affect PME_Status, disables PME_En, and + * sets PowerState to 0. + */ + if (dev->current_state >= 3) + pmcsr = 0; + else { + pci_read_config_word(dev, pm + PCI_PM_CTRL, &pmcsr); + pmcsr &= ~PCI_PM_CTRL_STATE_MASK; + pmcsr |= state; + } + + /* enter specified state */ + pci_write_config_word(dev, pm + PCI_PM_CTRL, pmcsr); + + /* Mandatory power management transition delays */ + /* see PCI PM 1.1 5.6.1 table 18 */ + if(state == 3 || dev->current_state == 3) + { + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_timeout(HZ/100); + } + else if(state == 2 || dev->current_state == 2) + udelay(200); + dev->current_state = state; + + return 0; +} + +/** + * pci_save_state - save the PCI configuration space of a device before suspending + * @dev: - PCI device that we're dealing with + * @buffer: - buffer to hold config space context + * + * @buffer must be large enough to hold the entire PCI 2.2 config space + * (>= 64 bytes). + */ +int +pci_save_state(struct pci_dev *dev, u32 *buffer) +{ + int i; + if (buffer) { + /* XXX: 100% dword access ok here? */ + for (i = 0; i < 16; i++) + pci_read_config_dword(dev, i * 4,&buffer[i]); + } + return 0; +} + +/** + * pci_restore_state - Restore the saved state of a PCI device + * @dev: - PCI device that we're dealing with + * @buffer: - saved PCI config space + * + */ +int +pci_restore_state(struct pci_dev *dev, u32 *buffer) +{ + int i; + + if (buffer) { + for (i = 0; i < 16; i++) + pci_write_config_dword(dev,i * 4, buffer[i]); + } + /* + * otherwise, write the context information we know from bootup. + * This works around a problem where warm-booting from Windows + * combined with a D3(hot)->D0 transition causes PCI config + * header data to be forgotten. + */ + else { + for (i = 0; i < 6; i ++) + pci_write_config_dword(dev, + PCI_BASE_ADDRESS_0 + (i * 4), + dev->resource[i].start); + pci_write_config_byte(dev, PCI_INTERRUPT_LINE, dev->irq); + } + return 0; +} + +/** + * pci_enable_device - Initialize device before it's used by a driver. + * @dev: PCI device to be initialized + * + * Initialize device before it's used by a driver. Ask low-level code + * to enable I/O and memory. Wake up the device if it was suspended. + * Beware, this function can fail. + */ +int +pci_enable_device(struct pci_dev *dev) +{ + int err; + + pci_set_power_state(dev, 0); + if ((err = pcibios_enable_device(dev)) < 0) + return err; + return 0; +} + +/** + * pci_disable_device - Disable PCI device after use + * @dev: PCI device to be disabled + * + * Signal to the system that the PCI device is not in use by the system + * anymore. This only involves disabling PCI bus-mastering, if active. + */ +void +pci_disable_device(struct pci_dev *dev) +{ + u16 pci_command; + + pci_read_config_word(dev, PCI_COMMAND, &pci_command); + if (pci_command & PCI_COMMAND_MASTER) { + pci_command &= ~PCI_COMMAND_MASTER; + pci_write_config_word(dev, PCI_COMMAND, pci_command); + } +} + +/** + * pci_enable_wake - enable device to generate PME# when suspended + * @dev: - PCI device to operate on + * @state: - Current state of device. + * @enable: - Flag to enable or disable generation + * + * Set the bits in the device's PM Capabilities to generate PME# when + * the system is suspended. + * + * -EIO is returned if device doesn't have PM Capabilities. + * -EINVAL is returned if device supports it, but can't generate wake events. + * 0 if operation is successful. + * + */ +int pci_enable_wake(struct pci_dev *dev, u32 state, int enable) +{ + int pm; + u16 value; + + /* find PCI PM capability in list */ + pm = pci_find_capability(dev, PCI_CAP_ID_PM); + + /* If device doesn't support PM Capabilities, but request is to disable + * wake events, it's a nop; otherwise fail */ + if (!pm) + return enable ? -EIO : 0; + + /* Check device's ability to generate PME# */ + pci_read_config_word(dev,pm+PCI_PM_PMC,&value); + + value &= PCI_PM_CAP_PME_MASK; + value >>= ffs(value); /* First bit of mask */ + + /* Check if it can generate PME# from requested state. */ + if (!value || !(value & (1 << state))) + return enable ? -EINVAL : 0; + + pci_read_config_word(dev, pm + PCI_PM_CTRL, &value); + + /* Clear PME_Status by writing 1 to it and enable PME# */ + value |= PCI_PM_CTRL_PME_STATUS | PCI_PM_CTRL_PME_ENABLE; + + if (!enable) + value &= ~PCI_PM_CTRL_PME_ENABLE; + + pci_write_config_word(dev, pm + PCI_PM_CTRL, value); + + return 0; +} + +int +pci_get_interrupt_pin(struct pci_dev *dev, struct pci_dev **bridge) +{ + u8 pin; + + pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin); + if (!pin) + return -1; + pin--; + while (dev->bus->self) { + pin = (pin + PCI_SLOT(dev->devfn)) % 4; + dev = dev->bus->self; + } + *bridge = dev; + return pin; +} + +/** + * pci_release_regions - Release reserved PCI I/O and memory resources + * @pdev: PCI device whose resources were previously reserved by pci_request_regions + * + * Releases all PCI I/O and memory resources previously reserved by a + * successful call to pci_request_regions. Call this function only + * after all use of the PCI regions has ceased. + */ +void pci_release_regions(struct pci_dev *pdev) +{ + int i; + + for (i = 0; i < 6; i++) { + if (pci_resource_len(pdev, i) == 0) + continue; + + if (pci_resource_flags(pdev, i) & IORESOURCE_IO) + release_region(pci_resource_start(pdev, i), + pci_resource_len(pdev, i)); + + else if (pci_resource_flags(pdev, i) & IORESOURCE_MEM) + release_mem_region(pci_resource_start(pdev, i), + pci_resource_len(pdev, i)); + } +} + +/** + * pci_request_regions - Reserved PCI I/O and memory resources + * @pdev: PCI device whose resources are to be reserved + * @res_name: Name to be associated with resource. + * + * Mark all PCI regions associated with PCI device @pdev as + * being reserved by owner @res_name. Do not access any + * address inside the PCI regions unless this call returns + * successfully. + * + * Returns 0 on success, or %EBUSY on error. A warning + * message is also printed on failure. + */ +int pci_request_regions(struct pci_dev *pdev, char *res_name) +{ + int i; + + for (i = 0; i < 6; i++) { + if (pci_resource_len(pdev, i) == 0) + continue; + + if (pci_resource_flags(pdev, i) & IORESOURCE_IO) { + if (!request_region(pci_resource_start(pdev, i), + pci_resource_len(pdev, i), res_name)) + goto err_out; + } + + else if (pci_resource_flags(pdev, i) & IORESOURCE_MEM) { + if (!request_mem_region(pci_resource_start(pdev, i), + pci_resource_len(pdev, i), res_name)) + goto err_out; + } + } + + return 0; + +err_out: + printk (KERN_WARNING "PCI: Unable to reserve %s region #%d:%lx@%lx for device %s\n", + pci_resource_flags(pdev, i) & IORESOURCE_IO ? "I/O" : "mem", + i + 1, /* PCI BAR # */ + pci_resource_len(pdev, i), pci_resource_start(pdev, i), + pdev->slot_name); + pci_release_regions(pdev); + return -EBUSY; +} + + +/* + * Registration of PCI drivers and handling of hot-pluggable devices. + */ + +static LIST_HEAD(pci_drivers); + +/** + * pci_match_device - Tell if a PCI device structure has a matching PCI device id structure + * @ids: array of PCI device id structures to search in + * @dev: the PCI device structure to match against + * + * Used by a driver to check whether a PCI device present in the + * system is in its list of supported devices.Returns the matching + * pci_device_id structure or %NULL if there is no match. + */ +const struct pci_device_id * +pci_match_device(const struct pci_device_id *ids, const struct pci_dev *dev) +{ + while (ids->vendor || ids->subvendor || ids->class_mask) { + if ((ids->vendor == PCI_ANY_ID || ids->vendor == dev->vendor) && + (ids->device == PCI_ANY_ID || ids->device == dev->device) && + (ids->subvendor == PCI_ANY_ID || ids->subvendor == dev->subsystem_vendor) && + (ids->subdevice == PCI_ANY_ID || ids->subdevice == dev->subsystem_device) && + !((ids->class ^ dev->class) & ids->class_mask)) + return ids; + ids++; + } + return NULL; +} + +static int +pci_announce_device(struct pci_driver *drv, struct pci_dev *dev) +{ + const struct pci_device_id *id; + int ret = 0; + + if (drv->id_table) { + id = pci_match_device(drv->id_table, dev); + if (!id) { + ret = 0; + goto out; + } + } else + id = NULL; + + dev_probe_lock(); + if (drv->probe(dev, id) >= 0) { + dev->driver = drv; + ret = 1; + } + dev_probe_unlock(); +out: + return ret; +} + +/** + * pci_register_driver - register a new pci driver + * @drv: the driver structure to register + * + * Adds the driver structure to the list of registered drivers + * Returns the number of pci devices which were claimed by the driver + * during registration. The driver remains registered even if the + * return value is zero. + */ +int +pci_register_driver(struct pci_driver *drv) +{ + struct pci_dev *dev; + int count = 0; + + list_add_tail(&drv->node, &pci_drivers); + pci_for_each_dev(dev) { + if (!pci_dev_driver(dev)) + count += pci_announce_device(drv, dev); + } + return count; +} + +/** + * pci_unregister_driver - unregister a pci driver + * @drv: the driver structure to unregister + * + * Deletes the driver structure from the list of registered PCI drivers, + * gives it a chance to clean up by calling its remove() function for + * each device it was responsible for, and marks those devices as + * driverless. + */ + +void +pci_unregister_driver(struct pci_driver *drv) +{ + struct pci_dev *dev; + + list_del(&drv->node); + pci_for_each_dev(dev) { + if (dev->driver == drv) { + if (drv->remove) + drv->remove(dev); + dev->driver = NULL; + } + } +} + +#ifdef CONFIG_HOTPLUG + +#ifndef FALSE +#define FALSE (0) +#define TRUE (!FALSE) +#endif + +static void +run_sbin_hotplug(struct pci_dev *pdev, int insert) +{ + int i; + char *argv[3], *envp[8]; + char id[20], sub_id[24], bus_id[24], class_id[20]; + + if (!hotplug_path[0]) + return; + + sprintf(class_id, "PCI_CLASS=%04X", pdev->class); + sprintf(id, "PCI_ID=%04X:%04X", pdev->vendor, pdev->device); + sprintf(sub_id, "PCI_SUBSYS_ID=%04X:%04X", pdev->subsystem_vendor, pdev->subsystem_device); + sprintf(bus_id, "PCI_SLOT_NAME=%s", pdev->slot_name); + + i = 0; + argv[i++] = hotplug_path; + argv[i++] = "pci"; + argv[i] = 0; + + i = 0; + /* minimal command environment */ + envp[i++] = "HOME=/"; + envp[i++] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin"; + + /* other stuff we want to pass to /sbin/hotplug */ + envp[i++] = class_id; + envp[i++] = id; + envp[i++] = sub_id; + envp[i++] = bus_id; + if (insert) + envp[i++] = "ACTION=add"; + else + envp[i++] = "ACTION=remove"; + envp[i] = 0; + + call_usermodehelper (argv [0], argv, envp); +} + +/** + * pci_announce_device_to_drivers - tell the drivers a new device has appeared + * @dev: the device that has shown up + * + * Notifys the drivers that a new device has appeared, and also notifys + * userspace through /sbin/hotplug. + */ +void +pci_announce_device_to_drivers(struct pci_dev *dev) +{ + struct list_head *ln; + + for(ln=pci_drivers.next; ln != &pci_drivers; ln=ln->next) { + struct pci_driver *drv = list_entry(ln, struct pci_driver, node); + if (drv->remove && pci_announce_device(drv, dev)) + break; + } + + /* notify userspace of new hotplug device */ + run_sbin_hotplug(dev, TRUE); +} + +/** + * pci_insert_device - insert a hotplug device + * @dev: the device to insert + * @bus: where to insert it + * + * Add a new device to the device lists and notify userspace (/sbin/hotplug). + */ +void +pci_insert_device(struct pci_dev *dev, struct pci_bus *bus) +{ + list_add_tail(&dev->bus_list, &bus->devices); + list_add_tail(&dev->global_list, &pci_devices); +#ifdef CONFIG_PROC_FS + pci_proc_attach_device(dev); +#endif + pci_announce_device_to_drivers(dev); +} + +static void +pci_free_resources(struct pci_dev *dev) +{ + int i; + + for (i = 0; i < PCI_NUM_RESOURCES; i++) { + struct resource *res = dev->resource + i; + if (res->parent) + release_resource(res); + } +} + +/** + * pci_remove_device - remove a hotplug device + * @dev: the device to remove + * + * Delete the device structure from the device lists and + * notify userspace (/sbin/hotplug). + */ +void +pci_remove_device(struct pci_dev *dev) +{ + if (dev->driver) { + if (dev->driver->remove) + dev->driver->remove(dev); + dev->driver = NULL; + } + list_del(&dev->bus_list); + list_del(&dev->global_list); + pci_free_resources(dev); +#ifdef CONFIG_PROC_FS + pci_proc_detach_device(dev); +#endif + + /* notify userspace of hotplug device removal */ + run_sbin_hotplug(dev, FALSE); +} + +#endif + +static struct pci_driver pci_compat_driver = { + name: "compat" +}; + +/** + * pci_dev_driver - get the pci_driver of a device + * @dev: the device to query + * + * Returns the appropriate pci_driver structure or %NULL if there is no + * registered driver for the device. + */ +struct pci_driver * +pci_dev_driver(const struct pci_dev *dev) +{ + if (dev->driver) + return dev->driver; + else { + int i; + for(i=0; i<=PCI_ROM_RESOURCE; i++) + if (dev->resource[i].flags & IORESOURCE_BUSY) + return &pci_compat_driver; + } + return NULL; +} + + +/* + * This interrupt-safe spinlock protects all accesses to PCI + * configuration space. + */ + +static spinlock_t pci_lock = SPIN_LOCK_UNLOCKED; + +/* + * Wrappers for all PCI configuration access functions. They just check + * alignment, do locking and call the low-level functions pointed to + * by pci_dev->ops. + */ + +#define PCI_byte_BAD 0 +#define PCI_word_BAD (pos & 1) +#define PCI_dword_BAD (pos & 3) + +#define PCI_OP(rw,size,type) \ +int pci_##rw##_config_##size (struct pci_dev *dev, int pos, type value) \ +{ \ + int res; \ + unsigned long flags; \ + if (PCI_##size##_BAD) return PCIBIOS_BAD_REGISTER_NUMBER; \ + spin_lock_irqsave(&pci_lock, flags); \ + res = dev->bus->ops->rw##_##size(dev, pos, value); \ + spin_unlock_irqrestore(&pci_lock, flags); \ + return res; \ +} + +PCI_OP(read, byte, u8 *) +PCI_OP(read, word, u16 *) +PCI_OP(read, dword, u32 *) +PCI_OP(write, byte, u8) +PCI_OP(write, word, u16) +PCI_OP(write, dword, u32) + +/** + * pci_set_master - enables bus-mastering for device dev + * @dev: the PCI device to enable + * + * Enables bus-mastering on the device and calls pcibios_set_master() + * to do the needed arch specific settings. + */ +void +pci_set_master(struct pci_dev *dev) +{ + u16 cmd; + + pci_read_config_word(dev, PCI_COMMAND, &cmd); + if (! (cmd & PCI_COMMAND_MASTER)) { + DBG("PCI: Enabling bus mastering for device %s\n", dev->slot_name); + cmd |= PCI_COMMAND_MASTER; + pci_write_config_word(dev, PCI_COMMAND, cmd); + } + pcibios_set_master(dev); +} + +int +pci_set_dma_mask(struct pci_dev *dev, u64 mask) +{ + if (!pci_dma_supported(dev, mask)) + return -EIO; + + dev->dma_mask = mask; + + return 0; +} + +int +pci_dac_set_dma_mask(struct pci_dev *dev, u64 mask) +{ + if (!pci_dac_dma_supported(dev, mask)) + return -EIO; + + dev->dma_mask = mask; + + return 0; +} + +/* + * Translate the low bits of the PCI base + * to the resource type + */ +static inline unsigned int pci_calc_resource_flags(unsigned int flags) +{ + if (flags & PCI_BASE_ADDRESS_SPACE_IO) + return IORESOURCE_IO; + + if (flags & PCI_BASE_ADDRESS_MEM_PREFETCH) + return IORESOURCE_MEM | IORESOURCE_PREFETCH; + + return IORESOURCE_MEM; +} + +/* + * Find the extent of a PCI decode.. + */ +static u32 pci_size(u32 base, unsigned long mask) +{ + u32 size = mask & base; /* Find the significant bits */ + size = size & ~(size-1); /* Get the lowest of them to find the decode size */ + return size-1; /* extent = size - 1 */ +} + +static void pci_read_bases(struct pci_dev *dev, unsigned int howmany, int rom) +{ + unsigned int pos, reg, next; + u32 l, sz; + struct resource *res; + + for(pos=0; posresource[pos]; + res->name = dev->name; + reg = PCI_BASE_ADDRESS_0 + (pos << 2); + pci_read_config_dword(dev, reg, &l); + pci_write_config_dword(dev, reg, ~0); + pci_read_config_dword(dev, reg, &sz); + pci_write_config_dword(dev, reg, l); + if (!sz || sz == 0xffffffff) + continue; + if (l == 0xffffffff) + l = 0; + if ((l & PCI_BASE_ADDRESS_SPACE) == PCI_BASE_ADDRESS_SPACE_MEMORY) { + res->start = l & PCI_BASE_ADDRESS_MEM_MASK; + sz = pci_size(sz, PCI_BASE_ADDRESS_MEM_MASK); + } else { + res->start = l & PCI_BASE_ADDRESS_IO_MASK; + sz = pci_size(sz, PCI_BASE_ADDRESS_IO_MASK & 0xffff); + } + res->end = res->start + (unsigned long) sz; + res->flags |= (l & 0xf) | pci_calc_resource_flags(l); + if ((l & (PCI_BASE_ADDRESS_SPACE | PCI_BASE_ADDRESS_MEM_TYPE_MASK)) + == (PCI_BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_TYPE_64)) { + pci_read_config_dword(dev, reg+4, &l); + next++; +#if BITS_PER_LONG == 64 + res->start |= ((unsigned long) l) << 32; + res->end = res->start + sz; + pci_write_config_dword(dev, reg+4, ~0); + pci_read_config_dword(dev, reg+4, &sz); + pci_write_config_dword(dev, reg+4, l); + if (~sz) + res->end = res->start + 0xffffffff + + (((unsigned long) ~sz) << 32); +#else + if (l) { + printk(KERN_ERR "PCI: Unable to handle 64-bit address for device %s\n", dev->slot_name); + res->start = 0; + res->flags = 0; + continue; + } +#endif + } + } + if (rom) { + dev->rom_base_reg = rom; + res = &dev->resource[PCI_ROM_RESOURCE]; + pci_read_config_dword(dev, rom, &l); + pci_write_config_dword(dev, rom, ~PCI_ROM_ADDRESS_ENABLE); + pci_read_config_dword(dev, rom, &sz); + pci_write_config_dword(dev, rom, l); + if (l == 0xffffffff) + l = 0; + if (sz && sz != 0xffffffff) { + res->flags = (l & PCI_ROM_ADDRESS_ENABLE) | + IORESOURCE_MEM | IORESOURCE_PREFETCH | IORESOURCE_READONLY | IORESOURCE_CACHEABLE; + res->start = l & PCI_ROM_ADDRESS_MASK; + sz = pci_size(sz, PCI_ROM_ADDRESS_MASK); + res->end = res->start + (unsigned long) sz; + } + res->name = dev->name; + } +} + +void __devinit pci_read_bridge_bases(struct pci_bus *child) +{ + struct pci_dev *dev = child->self; + u8 io_base_lo, io_limit_lo; + u16 mem_base_lo, mem_limit_lo; + unsigned long base, limit; + struct resource *res; + int i; + + if (!dev) /* It's a host bus, nothing to read */ + return; + + for(i=0; i<3; i++) + child->resource[i] = &dev->resource[PCI_BRIDGE_RESOURCES+i]; + + res = child->resource[0]; + pci_read_config_byte(dev, PCI_IO_BASE, &io_base_lo); + pci_read_config_byte(dev, PCI_IO_LIMIT, &io_limit_lo); + base = (io_base_lo & PCI_IO_RANGE_MASK) << 8; + limit = (io_limit_lo & PCI_IO_RANGE_MASK) << 8; + + if ((io_base_lo & PCI_IO_RANGE_TYPE_MASK) == PCI_IO_RANGE_TYPE_32) { + u16 io_base_hi, io_limit_hi; + pci_read_config_word(dev, PCI_IO_BASE_UPPER16, &io_base_hi); + pci_read_config_word(dev, PCI_IO_LIMIT_UPPER16, &io_limit_hi); + base |= (io_base_hi << 16); + limit |= (io_limit_hi << 16); + } + + if (base && base <= limit) { + res->flags = (io_base_lo & PCI_IO_RANGE_TYPE_MASK) | IORESOURCE_IO; + res->start = base; + res->end = limit + 0xfff; + res->name = child->name; + } else { + /* + * Ugh. We don't know enough about this bridge. Just assume + * that it's entirely transparent. + */ + printk(KERN_ERR "Unknown bridge resource %d: assuming transparent\n", 0); + child->resource[0] = child->parent->resource[0]; + } + + res = child->resource[1]; + pci_read_config_word(dev, PCI_MEMORY_BASE, &mem_base_lo); + pci_read_config_word(dev, PCI_MEMORY_LIMIT, &mem_limit_lo); + base = (mem_base_lo & PCI_MEMORY_RANGE_MASK) << 16; + limit = (mem_limit_lo & PCI_MEMORY_RANGE_MASK) << 16; + if (base && base <= limit) { + res->flags = (mem_base_lo & PCI_MEMORY_RANGE_TYPE_MASK) | IORESOURCE_MEM; + res->start = base; + res->end = limit + 0xfffff; + res->name = child->name; + } else { + /* See comment above. Same thing */ + printk(KERN_ERR "Unknown bridge resource %d: assuming transparent\n", 1); + child->resource[1] = child->parent->resource[1]; + } + + res = child->resource[2]; + pci_read_config_word(dev, PCI_PREF_MEMORY_BASE, &mem_base_lo); + pci_read_config_word(dev, PCI_PREF_MEMORY_LIMIT, &mem_limit_lo); + base = (mem_base_lo & PCI_PREF_RANGE_MASK) << 16; + limit = (mem_limit_lo & PCI_PREF_RANGE_MASK) << 16; + + if ((mem_base_lo & PCI_PREF_RANGE_TYPE_MASK) == PCI_PREF_RANGE_TYPE_64) { + u32 mem_base_hi, mem_limit_hi; + pci_read_config_dword(dev, PCI_PREF_BASE_UPPER32, &mem_base_hi); + pci_read_config_dword(dev, PCI_PREF_LIMIT_UPPER32, &mem_limit_hi); +#if BITS_PER_LONG == 64 + base |= ((long) mem_base_hi) << 32; + limit |= ((long) mem_limit_hi) << 32; +#else + if (mem_base_hi || mem_limit_hi) { + printk(KERN_ERR "PCI: Unable to handle 64-bit address space for %s\n", child->name); + return; + } +#endif + } + if (base && base <= limit) { + res->flags = (mem_base_lo & PCI_MEMORY_RANGE_TYPE_MASK) | IORESOURCE_MEM | IORESOURCE_PREFETCH; + res->start = base; + res->end = limit + 0xfffff; + res->name = child->name; + } else { + /* See comments above */ + printk(KERN_ERR "Unknown bridge resource %d: assuming transparent\n", 2); + child->resource[2] = child->parent->resource[2]; + } +} + +static struct pci_bus * __devinit pci_alloc_bus(void) +{ + struct pci_bus *b; + + b = kmalloc(sizeof(*b), GFP_KERNEL); + if (b) { + memset(b, 0, sizeof(*b)); + INIT_LIST_HEAD(&b->children); + INIT_LIST_HEAD(&b->devices); + } + return b; +} + +struct pci_bus * __devinit pci_add_new_bus(struct pci_bus *parent, struct pci_dev *dev, int busnr) +{ + struct pci_bus *child; + int i; + + /* + * Allocate a new bus, and inherit stuff from the parent.. + */ + child = pci_alloc_bus(); + + list_add_tail(&child->node, &parent->children); + child->self = dev; + dev->subordinate = child; + child->parent = parent; + child->ops = parent->ops; + child->sysdata = parent->sysdata; + + /* + * Set up the primary, secondary and subordinate + * bus numbers. + */ + child->number = child->secondary = busnr; + child->primary = parent->secondary; + child->subordinate = 0xff; + + /* Set up default resource pointers.. */ + for (i = 0; i < 4; i++) + child->resource[i] = &dev->resource[PCI_BRIDGE_RESOURCES+i]; + + return child; +} + +unsigned int __devinit pci_do_scan_bus(struct pci_bus *bus); + +/* + * If it's a bridge, configure it and scan the bus behind it. + * For CardBus bridges, we don't scan behind as the devices will + * be handled by the bridge driver itself. + * + * We need to process bridges in two passes -- first we scan those + * already configured by the BIOS and after we are done with all of + * them, we proceed to assigning numbers to the remaining buses in + * order to avoid overlaps between old and new bus numbers. + */ +static int __devinit pci_scan_bridge(struct pci_bus *bus, struct pci_dev * dev, int max, int pass) +{ + unsigned int buses; + unsigned short cr; + struct pci_bus *child; + int is_cardbus = (dev->hdr_type == PCI_HEADER_TYPE_CARDBUS); + + pci_read_config_dword(dev, PCI_PRIMARY_BUS, &buses); + DBG("Scanning behind PCI bridge %s, config %06x, pass %d\n", dev->slot_name, buses & 0xffffff, pass); + if ((buses & 0xffff00) && !pcibios_assign_all_busses()) { + /* + * Bus already configured by firmware, process it in the first + * pass and just note the configuration. + */ + if (pass) + return max; + child = pci_add_new_bus(bus, dev, 0); + child->primary = buses & 0xFF; + child->secondary = (buses >> 8) & 0xFF; + child->subordinate = (buses >> 16) & 0xFF; + child->number = child->secondary; + if (!is_cardbus) { + unsigned int cmax = pci_do_scan_bus(child); + if (cmax > max) max = cmax; + } else { + unsigned int cmax = child->subordinate; + if (cmax > max) max = cmax; + } + } else { + /* + * We need to assign a number to this bus which we always + * do in the second pass. We also keep all address decoders + * on the bridge disabled during scanning. FIXME: Why? + */ + if (!pass) + return max; + pci_read_config_word(dev, PCI_COMMAND, &cr); + pci_write_config_word(dev, PCI_COMMAND, 0x0000); + pci_write_config_word(dev, PCI_STATUS, 0xffff); + + child = pci_add_new_bus(bus, dev, ++max); + buses = (buses & 0xff000000) + | ((unsigned int)(child->primary) << 0) + | ((unsigned int)(child->secondary) << 8) + | ((unsigned int)(child->subordinate) << 16); + /* + * We need to blast all three values with a single write. + */ + pci_write_config_dword(dev, PCI_PRIMARY_BUS, buses); + if (!is_cardbus) { + /* Now we can scan all subordinate buses... */ + max = pci_do_scan_bus(child); + } else { + /* + * For CardBus bridges, we leave 4 bus numbers + * as cards with a PCI-to-PCI bridge can be + * inserted later. + */ + max += 3; + } + /* + * Set the subordinate bus number to its real value. + */ + child->subordinate = max; + pci_write_config_byte(dev, PCI_SUBORDINATE_BUS, max); + pci_write_config_word(dev, PCI_COMMAND, cr); + } + sprintf(child->name, (is_cardbus ? "PCI CardBus #%02x" : "PCI Bus #%02x"), child->number); + return max; +} + +/* + * Read interrupt line and base address registers. + * The architecture-dependent code can tweak these, of course. + */ +static void pci_read_irq(struct pci_dev *dev) +{ + unsigned char irq; + + pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &irq); + if (irq) + pci_read_config_byte(dev, PCI_INTERRUPT_LINE, &irq); + dev->irq = irq; +} + +/** + * pci_setup_device - fill in class and map information of a device + * @dev: the device structure to fill + * + * Initialize the device structure with information about the device's + * vendor,class,memory and IO-space addresses,IRQ lines etc. + * Called at initialisation of the PCI subsystem and by CardBus services. + * Returns 0 on success and -1 if unknown type of device (not normal, bridge + * or CardBus). + */ +int pci_setup_device(struct pci_dev * dev) +{ + u32 class; + + sprintf(dev->slot_name, "%02x:%02x.%d", dev->bus->number, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn)); + sprintf(dev->name, "PCI device %04x:%04x", dev->vendor, dev->device); + + pci_read_config_dword(dev, PCI_CLASS_REVISION, &class); + class >>= 8; /* upper 3 bytes */ + dev->class = class; + class >>= 8; + + DBG("Found %02x:%02x [%04x/%04x] %06x %02x\n", dev->bus->number, dev->devfn, dev->vendor, dev->device, class, dev->hdr_type); + + /* "Unknown power state" */ + dev->current_state = 4; + + switch (dev->hdr_type) { /* header type */ + case PCI_HEADER_TYPE_NORMAL: /* standard header */ + if (class == PCI_CLASS_BRIDGE_PCI) + goto bad; + pci_read_irq(dev); + pci_read_bases(dev, 6, PCI_ROM_ADDRESS); + pci_read_config_word(dev, PCI_SUBSYSTEM_VENDOR_ID, &dev->subsystem_vendor); + pci_read_config_word(dev, PCI_SUBSYSTEM_ID, &dev->subsystem_device); + break; + + case PCI_HEADER_TYPE_BRIDGE: /* bridge header */ + if (class != PCI_CLASS_BRIDGE_PCI) + goto bad; + pci_read_bases(dev, 2, PCI_ROM_ADDRESS1); + break; + + case PCI_HEADER_TYPE_CARDBUS: /* CardBus bridge header */ + if (class != PCI_CLASS_BRIDGE_CARDBUS) + goto bad; + pci_read_irq(dev); + pci_read_bases(dev, 1, 0); + pci_read_config_word(dev, PCI_CB_SUBSYSTEM_VENDOR_ID, &dev->subsystem_vendor); + pci_read_config_word(dev, PCI_CB_SUBSYSTEM_ID, &dev->subsystem_device); + break; + + default: /* unknown header */ + printk(KERN_ERR "PCI: device %s has unknown header type %02x, ignoring.\n", + dev->slot_name, dev->hdr_type); + return -1; + + bad: + printk(KERN_ERR "PCI: %s: class %x doesn't match header type %02x. Ignoring class.\n", + dev->slot_name, class, dev->hdr_type); + dev->class = PCI_CLASS_NOT_DEFINED; + } + + /* We found a fine healthy device, go go go... */ + return 0; +} + +/* + * Read the config data for a PCI device, sanity-check it + * and fill in the dev structure... + */ +struct pci_dev * __devinit pci_scan_device(struct pci_dev *temp) +{ + struct pci_dev *dev; + u32 l; + + if (pci_read_config_dword(temp, PCI_VENDOR_ID, &l)) + return NULL; + + /* some broken boards return 0 or ~0 if a slot is empty: */ + if (l == 0xffffffff || l == 0x00000000 || l == 0x0000ffff || l == 0xffff0000) + return NULL; + + dev = kmalloc(sizeof(*dev), GFP_KERNEL); + if (!dev) + return NULL; + + memcpy(dev, temp, sizeof(*dev)); + dev->vendor = l & 0xffff; + dev->device = (l >> 16) & 0xffff; + + /* Assume 32-bit PCI; let 64-bit PCI cards (which are far rarer) + set this higher, assuming the system even supports it. */ + dev->dma_mask = 0xffffffff; + if (pci_setup_device(dev) < 0) { + kfree(dev); + dev = NULL; + } + return dev; +} + +struct pci_dev * __devinit pci_scan_slot(struct pci_dev *temp) +{ + struct pci_bus *bus = temp->bus; + struct pci_dev *dev; + struct pci_dev *first_dev = NULL; + int func = 0; + int is_multi = 0; + u8 hdr_type; + + for (func = 0; func < 8; func++, temp->devfn++) { + if (func && !is_multi) /* not a multi-function device */ + continue; + if (pci_read_config_byte(temp, PCI_HEADER_TYPE, &hdr_type)) + continue; + temp->hdr_type = hdr_type & 0x7f; + + dev = pci_scan_device(temp); + if (!dev) + continue; + pci_name_device(dev); + if (!func) { + is_multi = hdr_type & 0x80; + first_dev = dev; + } + + /* + * Link the device to both the global PCI device chain and + * the per-bus list of devices. + */ + list_add_tail(&dev->global_list, &pci_devices); + list_add_tail(&dev->bus_list, &bus->devices); + + /* Fix up broken headers */ + pci_fixup_device(PCI_FIXUP_HEADER, dev); + } + return first_dev; +} + +unsigned int __devinit pci_do_scan_bus(struct pci_bus *bus) +{ + unsigned int devfn, max, pass; + struct list_head *ln; + struct pci_dev *dev, dev0; + + DBG("Scanning bus %02x\n", bus->number); + max = bus->secondary; + + /* Create a device template */ + memset(&dev0, 0, sizeof(dev0)); + dev0.bus = bus; + dev0.sysdata = bus->sysdata; + + /* Go find them, Rover! */ + for (devfn = 0; devfn < 0x100; devfn += 8) { + dev0.devfn = devfn; + pci_scan_slot(&dev0); + } + + /* + * After performing arch-dependent fixup of the bus, look behind + * all PCI-to-PCI bridges on this bus. + */ + DBG("Fixups for bus %02x\n", bus->number); + pcibios_fixup_bus(bus); + for (pass=0; pass < 2; pass++) + for (ln=bus->devices.next; ln != &bus->devices; ln=ln->next) { + dev = pci_dev_b(ln); + if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE || dev->hdr_type == PCI_HEADER_TYPE_CARDBUS) + max = pci_scan_bridge(bus, dev, max, pass); + } + + /* + * We've scanned the bus and so we know all about what's on + * the other side of any bridges that may be on this bus plus + * any devices. + * + * Return how far we've got finding sub-buses. + */ + DBG("Bus scan for %02x returning with max=%02x\n", bus->number, max); + return max; +} + +int __devinit pci_bus_exists(const struct list_head *list, int nr) +{ + const struct list_head *l; + + for(l=list->next; l != list; l = l->next) { + const struct pci_bus *b = pci_bus_b(l); + if (b->number == nr || pci_bus_exists(&b->children, nr)) + return 1; + } + return 0; +} + +struct pci_bus * __devinit pci_alloc_primary_bus(int bus) +{ + struct pci_bus *b; + + if (pci_bus_exists(&pci_root_buses, bus)) { + /* If we already got to this bus through a different bridge, ignore it */ + DBG("PCI: Bus %02x already known\n", bus); + return NULL; + } + + b = pci_alloc_bus(); + list_add_tail(&b->node, &pci_root_buses); + + b->number = b->secondary = bus; + b->resource[0] = &ioport_resource; + b->resource[1] = &iomem_resource; + return b; +} + +struct pci_bus * __devinit pci_scan_bus(int bus, struct pci_ops *ops, void *sysdata) +{ + struct pci_bus *b = pci_alloc_primary_bus(bus); + if (b) { + b->sysdata = sysdata; + b->ops = ops; + b->subordinate = pci_do_scan_bus(b); + } + return b; +} + +#ifdef CONFIG_PM + +/* + * PCI Power management.. + * + * This needs to be done centralized, so that we power manage PCI + * devices in the right order: we should not shut down PCI bridges + * before we've shut down the devices behind them, and we should + * not wake up devices before we've woken up the bridge to the + * device.. Eh? + * + * We do not touch devices that don't have a driver that exports + * a suspend/resume function. That is just too dangerous. If the default + * PCI suspend/resume functions work for a device, the driver can + * easily implement them (ie just have a suspend function that calls + * the pci_set_power_state() function). + */ + +static int pci_pm_save_state_device(struct pci_dev *dev, u32 state) +{ + int error = 0; + if (dev) { + struct pci_driver *driver = dev->driver; + if (driver && driver->save_state) + error = driver->save_state(dev,state); + } + return error; +} + +static int pci_pm_suspend_device(struct pci_dev *dev, u32 state) +{ + int error = 0; + if (dev) { + struct pci_driver *driver = dev->driver; + if (driver && driver->suspend) + error = driver->suspend(dev,state); + } + return error; +} + +static int pci_pm_resume_device(struct pci_dev *dev) +{ + int error = 0; + if (dev) { + struct pci_driver *driver = dev->driver; + if (driver && driver->resume) + error = driver->resume(dev); + } + return error; +} + +static int pci_pm_save_state_bus(struct pci_bus *bus, u32 state) +{ + struct list_head *list; + int error = 0; + + list_for_each(list, &bus->children) { + error = pci_pm_save_state_bus(pci_bus_b(list),state); + if (error) return error; + } + list_for_each(list, &bus->devices) { + error = pci_pm_save_state_device(pci_dev_b(list),state); + if (error) return error; + } + return 0; +} + +static int pci_pm_suspend_bus(struct pci_bus *bus, u32 state) +{ + struct list_head *list; + + /* Walk the bus children list */ + list_for_each(list, &bus->children) + pci_pm_suspend_bus(pci_bus_b(list),state); + + /* Walk the device children list */ + list_for_each(list, &bus->devices) + pci_pm_suspend_device(pci_dev_b(list),state); + return 0; +} + +static int pci_pm_resume_bus(struct pci_bus *bus) +{ + struct list_head *list; + + /* Walk the device children list */ + list_for_each(list, &bus->devices) + pci_pm_resume_device(pci_dev_b(list)); + + /* And then walk the bus children */ + list_for_each(list, &bus->children) + pci_pm_resume_bus(pci_bus_b(list)); + return 0; +} + +static int pci_pm_save_state(u32 state) +{ + struct list_head *list; + struct pci_bus *bus; + int error = 0; + + list_for_each(list, &pci_root_buses) { + bus = pci_bus_b(list); + error = pci_pm_save_state_bus(bus,state); + if (!error) + error = pci_pm_save_state_device(bus->self,state); + } + return error; +} + +static int pci_pm_suspend(u32 state) +{ + struct list_head *list; + struct pci_bus *bus; + + list_for_each(list, &pci_root_buses) { + bus = pci_bus_b(list); + pci_pm_suspend_bus(bus,state); + pci_pm_suspend_device(bus->self,state); + } + return 0; +} + +static int pci_pm_resume(void) +{ + struct list_head *list; + struct pci_bus *bus; + + list_for_each(list, &pci_root_buses) { + bus = pci_bus_b(list); + pci_pm_resume_device(bus->self); + pci_pm_resume_bus(bus); + } + return 0; +} + +static int +pci_pm_callback(struct pm_dev *pm_device, pm_request_t rqst, void *data) +{ + int error = 0; + + switch (rqst) { + case PM_SAVE_STATE: + error = pci_pm_save_state((u32)data); + break; + case PM_SUSPEND: + error = pci_pm_suspend((u32)data); + break; + case PM_RESUME: + error = pci_pm_resume(); + break; + default: break; + } + return error; +} + +#endif + +#if 0 /* XXX KAF: Only USB uses this stuff -- I think we'll just bin it. */ + +/* + * Pool allocator ... wraps the pci_alloc_consistent page allocator, so + * small blocks are easily used by drivers for bus mastering controllers. + * This should probably be sharing the guts of the slab allocator. + */ + +struct pci_pool { /* the pool */ + struct list_head page_list; + spinlock_t lock; + size_t blocks_per_page; + size_t size; + int flags; + struct pci_dev *dev; + size_t allocation; + char name [32]; + wait_queue_head_t waitq; +}; + +struct pci_page { /* cacheable header for 'allocation' bytes */ + struct list_head page_list; + void *vaddr; + dma_addr_t dma; + unsigned long bitmap [0]; +}; + +#define POOL_TIMEOUT_JIFFIES ((100 /* msec */ * HZ) / 1000) +#define POOL_POISON_BYTE 0xa7 + +// #define CONFIG_PCIPOOL_DEBUG + + +/** + * pci_pool_create - Creates a pool of pci consistent memory blocks, for dma. + * @name: name of pool, for diagnostics + * @pdev: pci device that will be doing the DMA + * @size: size of the blocks in this pool. + * @align: alignment requirement for blocks; must be a power of two + * @allocation: returned blocks won't cross this boundary (or zero) + * @flags: SLAB_* flags (not all are supported). + * + * Returns a pci allocation pool with the requested characteristics, or + * null if one can't be created. Given one of these pools, pci_pool_alloc() + * may be used to allocate memory. Such memory will all have "consistent" + * DMA mappings, accessible by the device and its driver without using + * cache flushing primitives. The actual size of blocks allocated may be + * larger than requested because of alignment. + * + * If allocation is nonzero, objects returned from pci_pool_alloc() won't + * cross that size boundary. This is useful for devices which have + * addressing restrictions on individual DMA transfers, such as not crossing + * boundaries of 4KBytes. + */ +struct pci_pool * +pci_pool_create (const char *name, struct pci_dev *pdev, + size_t size, size_t align, size_t allocation, int flags) +{ + struct pci_pool *retval; + + if (align == 0) + align = 1; + if (size == 0) + return 0; + else if (size < align) + size = align; + else if ((size % align) != 0) { + size += align + 1; + size &= ~(align - 1); + } + + if (allocation == 0) { + if (PAGE_SIZE < size) + allocation = size; + else + allocation = PAGE_SIZE; + // FIXME: round up for less fragmentation + } else if (allocation < size) + return 0; + + if (!(retval = kmalloc (sizeof *retval, flags))) + return retval; + +#ifdef CONFIG_PCIPOOL_DEBUG + flags |= SLAB_POISON; +#endif + + strncpy (retval->name, name, sizeof retval->name); + retval->name [sizeof retval->name - 1] = 0; + + retval->dev = pdev; + INIT_LIST_HEAD (&retval->page_list); + spin_lock_init (&retval->lock); + retval->size = size; + retval->flags = flags; + retval->allocation = allocation; + retval->blocks_per_page = allocation / size; + init_waitqueue_head (&retval->waitq); + +#ifdef CONFIG_PCIPOOL_DEBUG + printk (KERN_DEBUG "pcipool create %s/%s size %d, %d/page (%d alloc)\n", + pdev ? pdev->slot_name : NULL, retval->name, size, + retval->blocks_per_page, allocation); +#endif + + return retval; +} + + +static struct pci_page * +pool_alloc_page (struct pci_pool *pool, int mem_flags) +{ + struct pci_page *page; + int mapsize; + + mapsize = pool->blocks_per_page; + mapsize = (mapsize + BITS_PER_LONG - 1) / BITS_PER_LONG; + mapsize *= sizeof (long); + + page = (struct pci_page *) kmalloc (mapsize + sizeof *page, mem_flags); + if (!page) + return 0; + page->vaddr = pci_alloc_consistent (pool->dev, + pool->allocation, + &page->dma); + if (page->vaddr) { + memset (page->bitmap, 0xff, mapsize); // bit set == free + if (pool->flags & SLAB_POISON) + memset (page->vaddr, POOL_POISON_BYTE, pool->allocation); + list_add (&page->page_list, &pool->page_list); + } else { + kfree (page); + page = 0; + } + return page; +} + + +static inline int +is_page_busy (int blocks, unsigned long *bitmap) +{ + while (blocks > 0) { + if (*bitmap++ != ~0UL) + return 1; + blocks -= BITS_PER_LONG; + } + return 0; +} + +static void +pool_free_page (struct pci_pool *pool, struct pci_page *page) +{ + dma_addr_t dma = page->dma; + + if (pool->flags & SLAB_POISON) + memset (page->vaddr, POOL_POISON_BYTE, pool->allocation); + pci_free_consistent (pool->dev, pool->allocation, page->vaddr, dma); + list_del (&page->page_list); + kfree (page); +} + + +/** + * pci_pool_destroy - destroys a pool of pci memory blocks. + * @pool: pci pool that will be destroyed + * + * Caller guarantees that no more memory from the pool is in use, + * and that nothing will try to use the pool after this call. + */ +void +pci_pool_destroy (struct pci_pool *pool) +{ + unsigned long flags; + +#ifdef CONFIG_PCIPOOL_DEBUG + printk (KERN_DEBUG "pcipool destroy %s/%s\n", + pool->dev ? pool->dev->slot_name : NULL, + pool->name); +#endif + + spin_lock_irqsave (&pool->lock, flags); + while (!list_empty (&pool->page_list)) { + struct pci_page *page; + page = list_entry (pool->page_list.next, + struct pci_page, page_list); + if (is_page_busy (pool->blocks_per_page, page->bitmap)) { + printk (KERN_ERR "pci_pool_destroy %s/%s, %p busy\n", + pool->dev ? pool->dev->slot_name : NULL, + pool->name, page->vaddr); + /* leak the still-in-use consistent memory */ + list_del (&page->page_list); + kfree (page); + } else + pool_free_page (pool, page); + } + spin_unlock_irqrestore (&pool->lock, flags); + kfree (pool); +} + + +/** + * pci_pool_alloc - get a block of consistent memory + * @pool: pci pool that will produce the block + * @mem_flags: SLAB_KERNEL or SLAB_ATOMIC + * @handle: pointer to dma address of block + * + * This returns the kernel virtual address of a currently unused block, + * and reports its dma address through the handle. + * If such a memory block can't be allocated, null is returned. + */ +void * +pci_pool_alloc (struct pci_pool *pool, int mem_flags, dma_addr_t *handle) +{ + unsigned long flags; + struct list_head *entry; + struct pci_page *page; + int map, block; + size_t offset; + void *retval; + +restart: + spin_lock_irqsave (&pool->lock, flags); + list_for_each (entry, &pool->page_list) { + int i; + page = list_entry (entry, struct pci_page, page_list); + /* only cachable accesses here ... */ + for (map = 0, i = 0; + i < pool->blocks_per_page; + i += BITS_PER_LONG, map++) { + if (page->bitmap [map] == 0) + continue; + block = ffz (~ page->bitmap [map]); + if ((i + block) < pool->blocks_per_page) { + clear_bit (block, &page->bitmap [map]); + offset = (BITS_PER_LONG * map) + block; + offset *= pool->size; + goto ready; + } + } + } + if (!(page = pool_alloc_page (pool, mem_flags))) { + if (mem_flags == SLAB_KERNEL) { + DECLARE_WAITQUEUE (wait, current); + + current->state = TASK_INTERRUPTIBLE; + add_wait_queue (&pool->waitq, &wait); + spin_unlock_irqrestore (&pool->lock, flags); + + schedule_timeout (POOL_TIMEOUT_JIFFIES); + + current->state = TASK_RUNNING; + remove_wait_queue (&pool->waitq, &wait); + goto restart; + } + retval = 0; + goto done; + } + + clear_bit (0, &page->bitmap [0]); + offset = 0; +ready: + retval = offset + page->vaddr; + *handle = offset + page->dma; +done: + spin_unlock_irqrestore (&pool->lock, flags); + return retval; +} + + +static struct pci_page * +pool_find_page (struct pci_pool *pool, dma_addr_t dma) +{ + unsigned long flags; + struct list_head *entry; + struct pci_page *page; + + spin_lock_irqsave (&pool->lock, flags); + list_for_each (entry, &pool->page_list) { + page = list_entry (entry, struct pci_page, page_list); + if (dma < page->dma) + continue; + if (dma < (page->dma + pool->allocation)) + goto done; + } + page = 0; +done: + spin_unlock_irqrestore (&pool->lock, flags); + return page; +} + + +/** + * pci_pool_free - put block back into pci pool + * @pool: the pci pool holding the block + * @vaddr: virtual address of block + * @dma: dma address of block + * + * Caller promises neither device nor driver will again touch this block + * unless it is first re-allocated. + */ +void +pci_pool_free (struct pci_pool *pool, void *vaddr, dma_addr_t dma) +{ + struct pci_page *page; + unsigned long flags; + int map, block; + + if ((page = pool_find_page (pool, dma)) == 0) { + printk (KERN_ERR "pci_pool_free %s/%s, %p/%x (bad dma)\n", + pool->dev ? pool->dev->slot_name : NULL, + pool->name, vaddr, (int) (dma & 0xffffffff)); + return; + } +#ifdef CONFIG_PCIPOOL_DEBUG + if (((dma - page->dma) + (void *)page->vaddr) != vaddr) { + printk (KERN_ERR "pci_pool_free %s/%s, %p (bad vaddr)/%x\n", + pool->dev ? pool->dev->slot_name : NULL, + pool->name, vaddr, (int) (dma & 0xffffffff)); + return; + } +#endif + + block = dma - page->dma; + block /= pool->size; + map = block / BITS_PER_LONG; + block %= BITS_PER_LONG; + +#ifdef CONFIG_PCIPOOL_DEBUG + if (page->bitmap [map] & (1UL << block)) { + printk (KERN_ERR "pci_pool_free %s/%s, dma %x already free\n", + pool->dev ? pool->dev->slot_name : NULL, + pool->name, dma); + return; + } +#endif + if (pool->flags & SLAB_POISON) + memset (vaddr, POOL_POISON_BYTE, pool->size); + + spin_lock_irqsave (&pool->lock, flags); + set_bit (block, &page->bitmap [map]); + if (waitqueue_active (&pool->waitq)) + wake_up (&pool->waitq); + /* + * Resist a temptation to do + * if (!is_page_busy(bpp, page->bitmap)) pool_free_page(pool, page); + * it is not interrupt safe. Better have empty pages hang around. + */ + spin_unlock_irqrestore (&pool->lock, flags); +} + +#endif /* XXX End of PCI pool allocator stuff. */ + + +void __devinit pci_init(void) +{ + struct pci_dev *dev; + + pcibios_init(); + + pci_for_each_dev(dev) { + pci_fixup_device(PCI_FIXUP_FINAL, dev); + } + +#ifdef CONFIG_PM + pm_register(PM_PCI_DEV, 0, pci_pm_callback); +#endif +} + +static int __devinit pci_setup(char *str) +{ + while (str) { + char *k = strchr(str, ','); + if (k) + *k++ = 0; + if (*str && (str = pcibios_setup(str)) && *str) { + /* PCI layer options should be handled here */ + printk(KERN_ERR "PCI: Unknown option `%s'\n", str); + } + str = k; + } + return 1; +} + +__setup("pci=", pci_setup); + +EXPORT_SYMBOL(pci_read_config_byte); +EXPORT_SYMBOL(pci_read_config_word); +EXPORT_SYMBOL(pci_read_config_dword); +EXPORT_SYMBOL(pci_write_config_byte); +EXPORT_SYMBOL(pci_write_config_word); +EXPORT_SYMBOL(pci_write_config_dword); +EXPORT_SYMBOL(pci_devices); +EXPORT_SYMBOL(pci_root_buses); +EXPORT_SYMBOL(pci_enable_device); +EXPORT_SYMBOL(pci_disable_device); +EXPORT_SYMBOL(pci_find_capability); +EXPORT_SYMBOL(pci_release_regions); +EXPORT_SYMBOL(pci_request_regions); +EXPORT_SYMBOL(pci_find_class); +EXPORT_SYMBOL(pci_find_device); +EXPORT_SYMBOL(pci_find_slot); +EXPORT_SYMBOL(pci_find_subsys); +EXPORT_SYMBOL(pci_set_master); +EXPORT_SYMBOL(pci_set_dma_mask); +EXPORT_SYMBOL(pci_dac_set_dma_mask); +EXPORT_SYMBOL(pci_assign_resource); +EXPORT_SYMBOL(pci_register_driver); +EXPORT_SYMBOL(pci_unregister_driver); +EXPORT_SYMBOL(pci_dev_driver); +EXPORT_SYMBOL(pci_match_device); +EXPORT_SYMBOL(pci_find_parent_resource); + +#ifdef CONFIG_HOTPLUG +EXPORT_SYMBOL(pci_setup_device); +EXPORT_SYMBOL(pci_insert_device); +EXPORT_SYMBOL(pci_remove_device); +EXPORT_SYMBOL(pci_announce_device_to_drivers); +EXPORT_SYMBOL(pci_add_new_bus); +EXPORT_SYMBOL(pci_do_scan_bus); +EXPORT_SYMBOL(pci_scan_slot); +EXPORT_SYMBOL(pci_proc_attach_device); +EXPORT_SYMBOL(pci_proc_detach_device); +EXPORT_SYMBOL(pci_proc_attach_bus); +EXPORT_SYMBOL(pci_proc_detach_bus); +#endif + +EXPORT_SYMBOL(pci_set_power_state); +EXPORT_SYMBOL(pci_save_state); +EXPORT_SYMBOL(pci_restore_state); +EXPORT_SYMBOL(pci_enable_wake); + +/* Obsolete functions */ + +EXPORT_SYMBOL(pcibios_present); +EXPORT_SYMBOL(pcibios_read_config_byte); +EXPORT_SYMBOL(pcibios_read_config_word); +EXPORT_SYMBOL(pcibios_read_config_dword); +EXPORT_SYMBOL(pcibios_write_config_byte); +EXPORT_SYMBOL(pcibios_write_config_word); +EXPORT_SYMBOL(pcibios_write_config_dword); +EXPORT_SYMBOL(pcibios_find_class); +EXPORT_SYMBOL(pcibios_find_device); + +/* Quirk info */ + +EXPORT_SYMBOL(isa_dma_bridge_buggy); +EXPORT_SYMBOL(pci_pci_problems); + +#if 0 +/* Pool allocator */ + +EXPORT_SYMBOL (pci_pool_create); +EXPORT_SYMBOL (pci_pool_destroy); +EXPORT_SYMBOL (pci_pool_alloc); +EXPORT_SYMBOL (pci_pool_free); + +#endif diff --git a/xen-2.4.16/drivers/pci/pci.ids b/xen-2.4.16/drivers/pci/pci.ids new file mode 100644 index 0000000000..c73f0bd65b --- /dev/null +++ b/xen-2.4.16/drivers/pci/pci.ids @@ -0,0 +1,5445 @@ +# +# List of PCI ID's +# +# Maintained by Martin Mares and other volunteers from the +# Linux PCI ID's Project at http://pciids.sf.net/. New data are always +# welcome (if they are accurate), we're eagerly expecting new entries, +# so if you have anything to contribute, please visit the home page or +# send a diff -u against the most recent pci.ids to pci-ids@ucw.cz. +# +# $Id: pci.ids,v 1.24 2001/10/28 21:55:26 mares Exp $ +# + +# Vendors, devices and subsystems. Please keep sorted. + +# Syntax: +# vendor vendor_name +# device device_name <-- single tab +# subvendor subdevice subsystem_name <-- two tabs + +0000 Gammagraphx, Inc. +001a Ascend Communications, Inc. +0033 Paradyne corp. +003d Lockheed Martin-Marietta Corp +0070 Hauppauge computer works Inc. +0100 Ncipher Corp Ltd +0675 Dynalink + 1700 IS64PH ISDN Adapter + 1702 IS64PH ISDN Adapter +0a89 BREA Technologies Inc +0e11 Compaq Computer Corporation + 0001 PCI to EISA Bridge + 0002 PCI to ISA Bridge + 0049 NC7132 Gigabit Upgrade Module + 004a NC6136 Gigabit Server Adapter + 0508 Netelligent 4/16 Token Ring + 1000 Triflex/Pentium Bridge, Model 1000 + 2000 Triflex/Pentium Bridge, Model 2000 + 3032 QVision 1280/p + 3033 QVision 1280/p + 3034 QVision 1280/p + 4000 4000 [Triflex] + 6010 HotPlug PCI Bridge 6010 + 7020 USB Controller + a0ec Fibre Channel Host Controller + a0f0 Advanced System Management Controller + a0f3 Triflex PCI to ISA Bridge + a0f7 PCI Hotplug Controller + 8086 002a PCI Hotplug Controller A + 8086 002b PCI Hotplug Controller B + a0f8 USB Open Host Controller + a0fc Fibre Channel Host Controller + ae10 Smart-2/P RAID Controller + 0e11 4030 Smart-2/P Array Controller + 0e11 4031 Smart-2SL Array Controller + 0e11 4032 Smart Array Controller + 0e11 4033 Smart 3100ES Array Controller + ae29 MIS-L + ae2a MPC + ae2b MIS-E + ae31 System Management Controller + ae32 Netelligent 10/100 + ae33 Triflex Dual EIDE Controller + ae34 Netelligent 10 + ae35 Integrated NetFlex-3/P + ae40 Netelligent 10/100 Dual + ae43 ProLiant Integrated Netelligent 10/100 + ae69 CETUS-L + ae6c Northstar + ae6d NorthStar CPU to PCI Bridge + b011 Integrated Netelligent 10/100 + b012 Netelligent 10 T/2 + b01e NC3120 Fast Ethernet NIC + b01f NC3122 Fast Ethernet NIC + b02f NC1120 Ethernet NIC + b030 Netelligent WS 5100 + b04a 10/100 TX PCI Intel WOL UTP Controller + b060 Smart Array 5300 Controller + b0c6 NC3161 Fast Ethernet NIC + b0c7 NC3160 Fast Ethernet NIC + b0d7 NC3121 Fast Ethernet NIC + b0dd NC3131 Fast Ethernet NIC + b0de NC3132 Fast Ethernet Module + b0df NC6132 Gigabit Module + b0e0 NC6133 Gigabit Module + b0e1 NC3133 Fast Ethernet Module + b123 NC6134 Gigabit NIC + b134 NC3163 Fast Ethernet NIC + b13c NC3162 Fast Ethernet NIC + b144 NC3123 Fast Ethernet NIC + b163 NC3134 Fast Ethernet NIC + b164 NC3135 Fast Ethernet Upgrade Module + b178 Smart Array 5i/532 + b1a4 NC7131 Gigabit Server Adapter + f130 NetFlex-3/P ThunderLAN 1.0 + f150 NetFlex-3/P ThunderLAN 2.3 +1000 LSI Logic / Symbios Logic (formerly NCR) + 0001 53c810 + 1000 1000 8100S + 0002 53c820 + 0003 53c825 + 0004 53c815 + 0005 53c810AP + 0006 53c860 + 000a 53c1510 + 000b 53c896 + 000c 53c895 + 000d 53c885 + 000f 53c875 + 0e11 7004 Embedded Ultra Wide SCSI Controller + 1092 8760 FirePort 40 Dual SCSI Controller + 1de1 3904 DC390F Ultra Wide SCSI Controller + 0012 53c895a + 0020 53c1010 Ultra3 SCSI Adapter + 0021 53c1010 66MHz Ultra3 SCSI Adapter + 0030 53c1030 + 0040 53c1035 + 008f 53c875J + 1092 8000 FirePort 40 SCSI Controller + 1092 8760 FirePort 40 Dual SCSI Host Adapter + 0621 FC909 + 0622 FC929 + 0623 FC929 LAN + 0624 FC919 + 0625 FC919 LAN + 0701 83C885 + 0702 Yellowfin G-NIC gigabit ethernet + 1318 0000 PEI100X + 0901 61C102 + 1000 63C815 +1001 Initio + 0010 PCI 1616 Measurement card with 32 digital I/O lines + 0011 OPTO-PCI Opto-Isolated digital I/O board + 0012 PCI-AD/DA Analogue I/O board + 0013 PCI-OPTO-RELAIS Digital I/O board with relay outputs + 0014 PCI-Counter/Timer Counter Timer board + 0015 PCI-DAC416 Analogue output board + 0016 PCI-MFB Analogue I/O board + 0017 PROTO-3 PCI Prototyping board + 9100 INI-9100/9100W SCSI Host +1002 ATI Technologies Inc + 4158 68800AX [Mach32] + 4354 215CT [Mach64 CT] + 4358 210888CX [Mach64 CX] + 4554 210888ET [Mach64 ET] + 4654 Mach64 VT + 4742 3D Rage Pro AGP 1X/2X + 1028 4082 Optiplex GX1 Onboard Display Adapter + 8086 4152 Rage 3D Pro AGP + 4744 3D Rage Pro AGP 1X + 4747 3D Rage Pro + 4749 3D Rage Pro + 474c Rage XC + 474d Rage XL AGP + 474e Rage XC AGP + 474f Rage XL + 4750 3D Rage Pro 215GP + 4751 3D Rage Pro 215GQ + 4752 Rage XL + 4753 Rage XC + 4754 3D Rage I/II 215GT [Mach64 GT] + 4755 3D Rage II+ 215GTB [Mach64 GTB] + 4756 3D Rage IIC 215IIC [Mach64 GT IIC] + 4757 3D Rage IIC AGP + 1028 0089 Rage 3D IIC + 1028 4082 Rage 3D IIC + 1028 8082 Rage 3D IIC + 1028 c082 Rage 3D IIC + 4758 210888GX [Mach64 GX] + 4759 3D Rage IIC + 475a 3D Rage IIC AGP + 1002 0087 Rage 3D IIC + 4c42 3D Rage LT Pro AGP-133 + 0e11 b0e8 Rage 3D LT Pro + 0e11 b10e 3D Rage LT Pro (Compaq Armada 1750) + 1028 0085 Rage 3D LT Pro + 4c44 3D Rage LT Pro AGP-66 + 4c45 Rage Mobility M3 AGP + 4c46 Rage Mobility M3 AGP 2x + 4c47 3D Rage LT-G 215LG + 4c49 3D Rage LT Pro + 4c4d Rage Mobility P/M AGP 2x + 4c4e Rage Mobility L AGP 2x + 4c50 3D Rage LT Pro + 4c51 3D Rage LT Pro + 4c52 Rage Mobility P/M + 4c53 Rage Mobility L + 4c54 264LT [Mach64 LT] + 4c57 Radeon Mobility M6 LW + 4c59 Radeon Mobility M6 LY + 4c5a Radeon Mobility M6 LZ + 4d46 Rage Mobility M4 AGP + 4d4c Rage Mobility M4 AGP + 5041 Rage 128 PA + 5042 Rage 128 PB + 5043 Rage 128 PC + 5044 Rage 128 PD + 5045 Rage 128 PE + 5046 Rage 128 PF + 1002 2000 Rage Fury MAXX AGP 4x (TMDS) (VGA device) + 1002 2001 Rage Fury MAXX AGP 4x (TMDS) (Extra device?!) + 5047 Rage 128 PG + 5048 Rage 128 PH + 5049 Rage 128 PI + 504a Rage 128 PJ + 504b Rage 128 PK + 504c Rage 128 PL + 504d Rage 128 PM + 504e Rage 128 PN + 504f Rage 128 PO + 5050 Rage 128 PP + 5051 Rage 128 PQ + 5052 Rage 128 PR + 5053 Rage 128 PS + 5054 Rage 128 PT + 5055 Rage 128 PU + 5056 Rage 128 PV + 5057 Rage 128 PW + 5058 Rage 128 PX + 5144 Radeon QD + 5145 Radeon QE + 5146 Radeon QF + 5147 Radeon QG + 5159 Radeon VE QY + 515a Radeon VE QZ + 5245 Rage 128 RE + 5246 Rage 128 RF + 5247 Rage 128 RG + 524b Rage 128 RK + 524c Rage 128 RL + 5345 Rage 128 SE + 5346 Rage 128 SF + 5347 Rage 128 SG + 5348 Rage 128 4x + 534b Rage 128 SK + 534c Rage 128 SL + 534d Rage 128 SM + 534e Rage 128 SN + 5354 Mach 64 VT + 1002 5654 Mach 64 reference + 5446 Rage 128 Pro TF + 544c Rage 128 Pro TL + 5452 Rage 128 Pro TR + 5654 264VT [Mach64 VT] + 1002 5654 Mach64VT Reference + 5655 264VT3 [Mach64 VT3] + 5656 264VT4 [Mach64 VT4] +1003 ULSI Systems + 0201 US201 +1004 VLSI Technology Inc + 0005 82C592-FC1 + 0006 82C593-FC1 + 0007 82C594-AFC2 + 0008 82C596/7 [Wildcat] + 0009 82C597-AFC2 + 000c 82C541 [Lynx] + 000d 82C543 [Lynx] + 0101 82C532 + 0102 82C534 + 0103 82C538 + 0104 82C535 + 0105 82C147 + 0200 82C975 + 0280 82C925 + 0304 QSound ThunderBird PCI Audio + 1004 0304 QSound ThunderBird PCI Audio + 122d 1206 DSP368 Audio + 1483 5020 XWave Thunder 3D Audio + 0305 QSound ThunderBird PCI Audio Gameport + 1004 0305 QSound ThunderBird PCI Audio Gameport + 122d 1207 DSP368 Audio Gameport + 1483 5021 XWave Thunder 3D Audio Gameport + 0306 QSound ThunderBird PCI Audio Support Registers + 1004 0306 QSound ThunderBird PCI Audio Support Registers + 122d 1208 DSP368 Audio Support Registers + 1483 5022 XWave Thunder 3D Audio Support Registers + 0702 VAS96011 [Golden Gate II] +1005 Avance Logic Inc. [ALI] + 2064 ALG2032/2064 + 2128 ALG2364A + 2301 ALG2301 + 2302 ALG2302 + 2364 ALG2364 + 2464 ALG2364A + 2501 ALG2564A/25128A +1006 Reply Group +1007 NetFrame Systems Inc +1008 Epson +100a Phoenix Technologies +100b National Semiconductor Corporation + 0001 DP83810 + 0002 87415/87560 IDE + 000e 87560 Legacy I/O + 000f OHCI Compliant FireWire Controller + 0011 National PCI System I/O + 0012 USB Controller + 0020 DP83815 (MacPhyter) Ethernet Controller + 0022 DP83820 10/100/1000 Ethernet Controller + d001 87410 IDE +100c Tseng Labs Inc + 3202 ET4000/W32p rev A + 3205 ET4000/W32p rev B + 3206 ET4000/W32p rev C + 3207 ET4000/W32p rev D + 3208 ET6000 + 4702 ET6300 +100d AST Research Inc +100e Weitek + 9000 P9000 + 9001 P9000 + 9100 P9100 +1010 Video Logic, Ltd. +1011 Digital Equipment Corporation + 0001 DECchip 21050 + 0002 DECchip 21040 [Tulip] + 0004 DECchip 21030 [TGA] + 0007 NVRAM [Zephyr NVRAM] + 0008 KZPSA [KZPSA] + 0009 DECchip 21140 [FasterNet] + 10b8 2001 SMC9332BDT EtherPower 10/100 + 10b8 2002 SMC9332BVT EtherPower T4 10/100 + 10b8 2003 SMC9334BDT EtherPower 10/100 (1-port) + 1109 2400 ANA-6944A/TX Fast Ethernet + 1112 2300 RNS2300 Fast Ethernet + 1112 2320 RNS2320 Fast Ethernet + 1112 2340 RNS2340 Fast Ethernet + 1113 1207 EN-1207-TX Fast Ethernet + 1186 1100 DFE-500TX Fast Ethernet + 1186 1112 DFE-570TX Fast Ethernet + 1186 1140 DFE-660 Cardbus Ethernet 10/100 + 1186 1142 DFE-660 Cardbus Ethernet 10/100 + 1282 9100 AEF-380TXD Fast Ethernet + 1385 1100 FA310TX Fast Ethernet + 2646 0001 KNE100TX Fast Ethernet + 000a 21230 Video Codec + 000d PBXGB [TGA2] + 000f DEFPA + 0014 DECchip 21041 [Tulip Pass 3] + 1186 0100 DE-530+ + 0016 DGLPB [OPPO] + 0019 DECchip 21142/43 + 1011 500b DE500 Fast Ethernet + 1014 0001 10/100 EtherJet Cardbus + 1025 0315 ALN315 Fast Ethernet + 108d 0016 Rapidfire 2327 10/100 Ethernet + 10b8 2005 SMC8032DT Extreme Ethernet 10/100 + 10ef 8169 Cardbus Fast Ethernet + 1109 2a00 ANA-6911A/TX Fast Ethernet + 1109 2b00 ANA-6911A/TXC Fast Ethernet + 1109 3000 ANA-6922/TX Fast Ethernet + 1113 1207 Cheetah Fast Ethernet + 1113 2220 Cardbus Fast Ethernet + 115d 0002 Cardbus Ethernet 10/100 + 1179 0203 Fast Ethernet + 1179 0204 Cardbus Fast Ethernet + 1186 1100 DFE-500TX Fast Ethernet + 1186 1101 DFE-500TX Fast Ethernet + 1186 1102 DFE-500TX Fast Ethernet + 1266 0004 Eagle Fast EtherMAX + 12af 0019 NetFlyer Cardbus Fast Ethernet + 1374 0001 Cardbus Ethernet Card 10/100 + 1395 0001 10/100 Ethernet CardBus PC Card + 8086 0001 EtherExpress PRO/100 Mobile CardBus 32 + 0021 DECchip 21052 + 0022 DECchip 21150 + 0023 DECchip 21150 + 0024 DECchip 21152 + 0025 DECchip 21153 + 0026 DECchip 21154 + 0045 DECchip 21553 + 0046 DECchip 21554 + 103c 10c2 Hewlett-Packard NetRAID-4M + 9005 0365 Adaptec 5400S + 9005 1364 Dell PowerEdge RAID Controller 2 + 9005 1365 Dell PowerEdge RAID Controller 2 + 1065 StrongARM DC21285 + 1069 0020 DAC960P +1012 Micronics Computers Inc +1013 Cirrus Logic + 0038 GD 7548 + 0040 GD 7555 Flat Panel GUI Accelerator + 004c GD 7556 Video/Graphics LCD/CRT Ctrlr + 00a0 GD 5430/40 [Alpine] + 00a2 GD 5432 [Alpine] + 00a4 GD 5434-4 [Alpine] + 00a8 GD 5434-8 [Alpine] + 00ac GD 5436 [Alpine] + 00b0 GD 5440 + 00b8 GD 5446 + 00bc GD 5480 + 1013 00bc CL-GD5480 + 00d0 GD 5462 + 00d2 GD 5462 [Laguna I] + 00d4 GD 5464 [Laguna] + 00d6 GD 5465 [Laguna] + 00e8 GD 5436U + 1100 CL 6729 + 1110 PD 6832 + 1112 PD 6834 PCMCIA/CardBus Ctrlr + 1113 PD 6833 PCMCIA/CardBus Ctrlr + 1200 GD 7542 [Nordic] + 1202 GD 7543 [Viking] + 1204 GD 7541 [Nordic Light] + 4400 CD 4400 + 6001 CS 4610/11 [CrystalClear SoundFusion Audio Accelerator] + 1014 1010 CS4610 SoundFusion Audio Accelerator + 6003 CS 4614/22/24 [CrystalClear SoundFusion Audio Accelerator] + 1013 4280 Crystal SoundFusion PCI Audio Accelerator + 6005 Crystal CS4281 PCI Audio + 1013 4281 Crystal CS4281 PCI Audio + 10cf 10a8 Crystal CS4281 PCI Audio + 10cf 10a9 Crystal CS4281 PCI Audio + 10cf 10aa Crystal CS4281 PCI Audio + 10cf 10ab Crystal CS4281 PCI Audio + 10cf 10ac Crystal CS4281 PCI Audio + 10cf 10ad Crystal CS4281 PCI Audio + 10cf 10b4 Crystal CS4281 PCI Audio + 1179 0001 Crystal CS4281 PCI Audio + 14c0 000c Crystal CS4281 PCI Audio +1014 IBM + 0002 PCI to MCA Bridge + 0005 Alta Lite + 0007 Alta MP + 000a Fire Coral + 0017 CPU to PCI Bridge + 0018 TR Auto LANstreamer + 001b GXT-150P + 001d 82G2675 + 0020 MCA + 0022 IBM27-82351 + 002d Python + 002e ServeRAID-3x + 0036 Miami + 003a CPU to PCI Bridge + 003e 16/4 Token ring UTP/STP controller + 1014 003e Token-Ring Adapter + 1014 00cd Token-Ring Adapter + Wake-On-LAN + 1014 00ce 16/4 Token-Ring Adapter 2 + 1014 00cf 16/4 Token-Ring Adapter Special + 1014 00e4 High-Speed 100/16/4 Token-Ring Adapter + 1014 00e5 16/4 Token-Ring Adapter 2 + Wake-On-LAN + 1014 016d iSeries 2744 Card + 0045 SSA Adapter + 0046 MPIC interrupt controller + 0047 PCI to PCI Bridge + 0048 PCI to PCI Bridge + 0049 Warhead SCSI Controller + 004e ATM Controller (14104e00) + 004f ATM Controller (14104f00) + 0050 ATM Controller (14105000) + 0053 25 MBit ATM Controller + 0057 MPEG PCI Bridge + 005c i82557B 10/100 + 007c ATM Controller (14107c00) + 007d 3780IDSP [MWave] + 0090 GXT 3000P + 1014 008e GXT-3000P + 0095 20H2999 PCI Docking Bridge + 0096 Chukar chipset SCSI controller + 1014 0097 iSeries 2778 DASD IOA + 1014 0098 iSeries 2763 DASD IOA + 1014 0099 iSeries 2748 DASD IOA + 00a5 ATM Controller (1410a500) + 00a6 ATM 155MBPS MM Controller (1410a600) + 00b7 256-bit Graphics Rasterizer [Fire GL1] + 1902 00b8 Fire GL1 + 00be ATM 622MBPS Controller (1410be00) + 0142 Yotta Video Compositor Input + 1014 0143 Yotta Input Controller (ytin) + 0144 Yotta Video Compositor Output + 1014 0145 Yotta Output Controller (ytout) + 0156 405GP PLB to PCI Bridge + 01bd Netfinity ServeRAID controller + 01be ServeRAID-4M + 01bf ServeRAID-4L + 022e ServeRAID-4H + ffff MPIC-2 interrupt controller +1015 LSI Logic Corp of Canada +1016 ICL Personal Systems +1017 SPEA Software AG + 5343 SPEA 3D Accelerator +1018 Unisys Systems +1019 Elitegroup Computer Systems +101a AT&T GIS (NCR) + 0005 100VG ethernet +101b Vitesse Semiconductor +101c Western Digital + 0193 33C193A + 0196 33C196A + 0197 33C197A + 0296 33C296A + 3193 7193 + 3197 7197 + 3296 33C296A + 4296 34C296 + 9710 Pipeline 9710 + 9712 Pipeline 9712 + c24a 90C +101e American Megatrends Inc. + 1960 MegaRAID + 1028 0471 PowerEdge RAID Controller 3/QC + 1028 0493 PowerEdge RAID Controller 3/DC + 9010 MegaRAID + 9030 EIDE Controller + 9031 EIDE Controller + 9032 EIDE & SCSI Controller + 9033 SCSI Controller + 9040 Multimedia card + 9060 MegaRAID +101f PictureTel +1020 Hitachi Computer Products +1021 OKI Electric Industry Co. Ltd. +1022 Advanced Micro Devices [AMD] + 2000 79c970 [PCnet LANCE] + 103c 104c Ethernet with LAN remote power Adapter + 103c 1064 Ethernet with LAN remote power Adapter + 103c 1065 Ethernet with LAN remote power Adapter + 103c 106c Ethernet with LAN remote power Adapter + 103c 106e Ethernet with LAN remote power Adapter + 103c 10ea Ethernet with LAN remote power Adapter + 1113 1220 EN1220 10/100 Fast Ethernet + 1259 2450 AT-2450 10/100 Fast Ethernet + 1259 2700 AT-2700TX 10/100 Fast Ethernet + 1259 2701 AT-2700FX 100Mb Ethernet + 2001 79c978 [HomePNA] + 2020 53c974 [PCscsi] + 2040 79c974 + 7006 AMD-751 [Irongate] System Controller + 7007 AMD-751 [Irongate] AGP Bridge + 700e AMD-760 [Irongate] System Controller + 700f AMD-760 [Irongate] AGP Bridge + 7400 AMD-755 [Cobra] ISA + 7401 AMD-755 [Cobra] IDE + 7403 AMD-755 [Cobra] ACPI + 7404 AMD-755 [Cobra] USB + 7408 AMD-756 [Viper] ISA + 7409 AMD-756 [Viper] IDE + 740b AMD-756 [Viper] ACPI + 740c AMD-756 [Viper] USB + 7410 AMD-765 [Viper] ISA + 7411 AMD-765 [Viper] IDE + 7413 AMD-765 [Viper] ACPI + 7414 AMD-765 [Viper] USB + 7440 AMD-768 [??] ISA + 7441 AMD-768 [??] IDE + 7443 AMD-768 [??] ACPI + 7448 AMD-768 [??] PCI + 7449 AMD-768 [??] USB +1023 Trident Microsystems + 0194 82C194 + 2000 4DWave DX + 2001 4DWave NX + 8400 CyberBlade/i7 + 1023 8400 CyberBlade i7 AGP + 8420 CyberBlade/i7d + 0e11 b15a CyberBlade i7 AGP + 8500 CyberBlade/i1 + 8520 CyberBlade i1 + 0e11 b16e CyberBlade i1 AGP + 1023 8520 CyberBlade i1 AGP + 9320 TGUI 9320 + 9350 GUI Accelerator + 9360 Flat panel GUI Accelerator + 9382 Cyber 9382 [Reference design] + 9383 Cyber 9383 [Reference design] + 9385 Cyber 9385 [Reference design] + 9386 Cyber 9386 + 9388 Cyber 9388 + 9397 Cyber 9397 + 939a Cyber 9397DVD + 9420 TGUI 9420 + 9430 TGUI 9430 + 9440 TGUI 9440 + 9460 TGUI 9460 + 9470 TGUI 9470 + 9520 Cyber 9520 + 9525 Cyber 9525 + 9540 Cyber 9540 + 9660 TGUI 9660/968x/968x + 9680 TGUI 9680 + 9682 TGUI 9682 + 9683 TGUI 9683 + 9685 ProVIDIA 9685 + 9750 3DIm`age 975 + 1014 9750 3DImage 9750 + 1023 9750 3DImage 9750 + 9753 TGUI 9753 + 9754 TGUI 9754 + 9759 TGUI 975 + 9783 TGUI 9783 + 9785 TGUI 9785 + 9850 3DImage 9850 + 9880 Blade 3D PCI/AGP + 1023 9880 Blade 3D + 9910 CyberBlade/XP + 9930 CyberBlade/XPm +1024 Zenith Data Systems +1025 Acer Incorporated [ALI] + 1435 M1435 + 1445 M1445 + 1449 M1449 + 1451 M1451 + 1461 M1461 + 1489 M1489 + 1511 M1511 + 1512 ALI M1512 Aladdin + 1513 M1513 + 1521 ALI M1521 Aladdin III CPU Bridge + 10b9 1521 ALI M1521 Aladdin III CPU Bridge + 1523 ALI M1523 ISA Bridge + 10b9 1523 ALI M1523 ISA Bridge + 1531 M1531 Northbridge [Aladdin IV/IV+] + 1533 M1533 PCI-to-ISA Bridge + 10b9 1533 ALI M1533 Aladdin IV/V ISA South Bridge + 1535 M1535 PCI Bridge + Super I/O + FIR + 1541 M1541 Northbridge [Aladdin V] + 10b9 1541 ALI M1541 Aladdin V/V+ AGP+PCI North Bridge + 1542 M1542 Northbridge [Aladdin V] + 1543 M1543 PCI-to-ISA Bridge + Super I/O + FIR + 1561 M1561 Northbridge [Aladdin 7] + 1621 M1621 Northbridge [Aladdin-Pro II] + 1631 M1631 Northbridge+3D Graphics [Aladdin TNT2] + 1641 M1641 Northbridge [Aladdin-Pro IV] + 3141 M3141 + 3143 M3143 + 3145 M3145 + 3147 M3147 + 3149 M3149 + 3151 M3151 + 3307 M3307 MPEG-I Video Controller + 3309 M3309 MPEG-II Video w/ Software Audio Decoder + 3321 M3321 MPEG-II Audio/Video Decoder + 5212 ALI M4803 + 5215 ALI PCI EIDE Controller + 5217 M5217H + 5219 M5219 + 5225 M5225 + 5229 M5229 + 5235 M5235 + 5237 ALI M5237 PCI USB Host Controller + 5240 EIDE Controller + 5241 PCMCIA Bridge + 5242 General Purpose Controller + 5243 PCI to PCI Bridge Controller + 5244 Floppy Disk Controller + 5247 ALI M1541 PCI to PCI Bridge + 5251 M5251 P1394 OHCI Controller + 5427 ALI PCI to AGP Bridge + 5451 ALI M5451 PCI AC-Link Controller Audio Device + 5453 ALI M5453 PCI AC-Link Controller Modem Device + 7101 ALI M7101 PCI PMU Power Management Controller + 10b9 7101 ALI M7101 PCI PMU Power Management Controller +1028 Dell Computer Corporation + 0001 PowerEdge Expandable RAID Controller 2/Si + 0002 PowerEdge Expandable RAID Controller 3/Di + 0003 PowerEdge Expandable RAID Controller 3/Si + 0004 PowerEdge Expandable RAID Controller 3/Si + 0005 PowerEdge Expandable RAID Controller 3/Di + 0006 PowerEdge Expandable RAID Controller 3/Di + 0008 PowerEdge Expandable RAID Controller 3/Di + 000a PowerEdge Expandable RAID Controller 3/Di +1029 Siemens Nixdorf IS +102a LSI Logic + 0000 HYDRA + 0010 ASPEN +102b Matrox Graphics, Inc. +# DJ: I've a suspicion that 0010 is a duplicate of 0d10. + 0010 MGA-I [Impression?] + 0518 MGA-II [Athena] + 0519 MGA 2064W [Millennium] + 051a MGA 1064SG [Mystique] + 102b 1100 MGA-1084SG Mystique + 102b 1200 MGA-1084SG Mystique + 1100 102b MGA-1084SG Mystique + 110a 0018 Scenic Pro C5 (D1025) + 051b MGA 2164W [Millennium II] + 102b 051b MGA-2164W Millennium II + 102b 1100 MGA-2164W Millennium II + 102b 1200 MGA-2164W Millennium II + 051e MGA 1064SG [Mystique] AGP + 051f MGA 2164W [Millennium II] AGP + 0520 MGA G200 + 102b dbc2 G200 Multi-Monitor + 102b dbc8 G200 Multi-Monitor + 102b dbe2 G200 Multi-Monitor + 102b dbe8 G200 Multi-Monitor + 102b ff03 Millennium G200 SD + 102b ff04 Marvel G200 + 0521 MGA G200 AGP + 1014 ff03 Millennium G200 AGP + 102b 48e9 Mystique G200 AGP + 102b 48f8 Millennium G200 SD AGP + 102b 4a60 Millennium G200 LE AGP + 102b 4a64 Millennium G200 AGP + 102b c93c Millennium G200 AGP + 102b c9b0 Millennium G200 AGP + 102b c9bc Millennium G200 AGP + 102b ca60 Millennium G250 LE AGP + 102b ca6c Millennium G250 AGP + 102b dbbc Millennium G200 AGP + 102b dbc2 Millennium G200 MMS (Dual G200) + 102b dbc8 Millennium G200 MMS (Dual G200) + 102b dbe2 Millennium G200 MMS (Quad G200) + 102b dbe8 Millennium G200 MMS (Quad G200) + 102b f806 Mystique G200 Video AGP + 102b ff00 MGA-G200 AGP + 102b ff02 Mystique G200 AGP + 102b ff03 Millennium G200 AGP + 102b ff04 Marvel G200 AGP + 110a 0032 MGA-G200 AGP + 0525 MGA G400 AGP + 0e11 b16f Matrox MGA-G400 AGP + 102b 0328 Millennium G400 16Mb SDRAM + 102b 0338 Millennium G400 16Mb SDRAM + 102b 0378 Millennium G400 32Mb SDRAM + 102b 0541 Millennium G450 Dual Head + 102b 0542 Millennium G450 Dual Head LX + 102b 0641 Millennium G450 32Mb SDRAM + 102b 0642 Millennium G450 32Mb SDRAM Dual Head LX + 102b 07c0 Millennium G450 Dual Head LE + 102b 07c1 Millennium G450 SDR Dual Head + 102b 0d41 Millennium G450 Dual Head PCI + 102b 0d42 Millennium G450 Dual Head LX PCI + 102b 0e00 Marvel G450 eTV + 102b 0e01 Marvel G450 eTV + 102b 0e02 Marvel G450 eTV + 102b 0e03 Marvel G450 eTV + 102b 0f80 Millennium G450 Low Profile + 102b 0f81 Millennium G450 Low Profile + 102b 0f82 Millennium G450 Low Profile DVI + 102b 0f83 Millennium G450 Low Profile DVI + 102b 19d8 Millennium G400 16Mb SGRAM + 102b 19f8 Millennium G400 32Mb SGRAM + 102b 2159 Millennium G400 Dual Head 16Mb + 102b 2179 Millennium G400 MAX/Dual Head 32Mb + 102b 217d Millennium G400 Dual Head Max + 102b 2f58 Millennium G400 + 102b 2f78 Millennium G400 + 102b 3693 Marvel G400 AGP + 1705 0001 Millennium G450 32MB SGRAM + b16f 0e11 Matrox MGA-G400 AGP + 0d10 MGA Ultima/Impression + 1000 MGA G100 [Productiva] + 102b ff01 Productiva G100 + 102b ff05 Productiva G100 Multi-Monitor + 1001 MGA G100 [Productiva] AGP + 102b 1001 MGA-G100 AGP + 102b ff00 MGA-G100 AGP + 102b ff01 MGA-G100 Productiva AGP + 102b ff03 Millennium G100 AGP + 102b ff04 MGA-G100 AGP + 102b ff05 MGA-G100 Productiva AGP Multi-Monitor + 110a 001e MGA-G100 AGP + 2007 MGA Mistral + 2527 MGA G550 AGP + 102b 0f84 Millennium G550 Dual Head DDR 32Mb + 4536 VIA Framegrabber + 6573 Shark 10/100 Multiport SwitchNIC +102c Chips and Technologies + 00b8 F64310 + 00c0 F69000 HiQVideo + 00d0 F65545 + 00d8 F65545 + 00dc F65548 + 00e0 F65550 + 00e4 F65554 + 00e5 F65555 HiQVPro + 0e11 b049 Armada 1700 Laptop Display Controller + 00f0 F68554 + 00f4 F68554 HiQVision + 00f5 F68555 +102d Wyse Technology Inc. + 50dc 3328 Audio +102e Olivetti Advanced Technology +102f Toshiba America + 0009 r4x00 + 0020 ATM Meteor 155 +1030 TMC Research +1031 Miro Computer Products AG + 5601 DC20 ASIC + 5607 Video I/O & motion JPEG compressor + 5631 Media 3D + 6057 MiroVideo DC10/DC30+ +1032 Compaq +1033 NEC Corporation + 0001 PCI to 486-like bus Bridge + 0002 PCI to VL98 Bridge + 0003 ATM Controller + 0004 R4000 PCI Bridge + 0005 PCI to 486-like bus Bridge + 0006 GUI Accelerator + 0007 PCI to UX-Bus Bridge + 0008 GUI Accelerator + 0009 GUI Accelerator for W98 + 001a [Nile II] + 0021 Vrc4373 [Nile I] + 0029 PowerVR PCX1 + 002a PowerVR 3D + 0035 USB + 003e NAPCCARD Cardbus Controller + 0046 PowerVR PCX2 [midas] + 005a Vrc5074 [Nile 4] + 0063 Firewarden + 0067 PowerVR Neon 250 Chipset + 1010 0020 PowerVR Neon 250 AGP 32Mb + 1010 0080 PowerVR Neon 250 AGP 16Mb + 1010 0088 PowerVR Neon 250 16Mb + 1010 0090 PowerVR Neon 250 AGP 16Mb + 1010 0098 PowerVR Neon 250 16Mb + 1010 00a0 PowerVR Neon 250 AGP 32Mb + 1010 00a8 PowerVR Neon 250 32Mb + 1010 0120 PowerVR Neon 250 AGP 32Mb + 0074 56k Voice Modem + 1033 8014 RCV56ACF 56k Voice Modem + 009b Vrc5476 +1034 Framatome Connectors USA Inc. +1035 Comp. & Comm. Research Lab +1036 Future Domain Corp. + 0000 TMC-18C30 [36C70] +1037 Hitachi Micro Systems +1038 AMP, Inc +1039 Silicon Integrated Systems [SiS] + 0001 5591/5592 AGP + 0002 SG86C202 + 0006 85C501/2/3 + 0008 85C503/5513 + 0009 ACPI + 0018 SiS85C503/5513 (LPC Bridge) + 0200 5597/5598 VGA + 1039 0000 SiS5597 SVGA (Shared RAM) + 0204 82C204 + 0205 SG86C205 + 0406 85C501/2 + 0496 85C496 + 0530 530 Host + 0540 540 Host + 0597 5513C + 0601 85C601 + 0620 620 Host + 0630 630 Host + 0730 730 Host + 0735 735 Host + 0900 SiS900 10/100 Ethernet + 1039 0900 SiS900 10/100 Ethernet Adapter + 3602 83C602 + 5107 5107 + 5300 SiS540 PCI Display Adapter + 5401 486 PCI Chipset + 5511 5511/5512 + 5513 5513 [IDE] + 1039 5513 SiS5513 EIDE Controller (A,B step) + 5517 5517 + 5571 5571 + 5581 5581 Pentium Chipset + 5582 5582 + 5591 5591/5592 Host + 5596 5596 Pentium Chipset + 5597 5597 [SiS5582] + 5600 5600 Host + 6204 Video decoder & MPEG interface + 6205 VGA Controller + 6236 6236 3D-AGP + 6300 SiS630 GUI Accelerator+3D + 6306 6306 3D-AGP + 1039 6306 SiS530,620 GUI Accelerator+3D + 6326 86C326 + 1039 6326 SiS6326 GUI Accelerator + 1092 0a50 SpeedStar A50 + 1092 0a70 SpeedStar A70 + 1092 4910 SpeedStar A70 + 1092 4920 SpeedStar A70 + 1569 6326 SiS6326 GUI Accelerator + 7001 7001 + 7007 OHCI Compliant FireWire Controller + 7012 SiS7012 PCI Audio Accelerator + 7016 SiS7016 10/100 Ethernet Adapter + 1039 7016 SiS7016 10/100 Ethernet Adapter + 7018 SiS PCI Audio Accelerator + 1014 01b6 SiS PCI Audio Accelerator + 1014 01b7 SiS PCI Audio Accelerator + 1019 7018 SiS PCI Audio Accelerator + 1025 000e SiS PCI Audio Accelerator + 1025 0018 SiS PCI Audio Accelerator + 1039 7018 SiS PCI Audio Accelerator + 1043 800b SiS PCI Audio Accelerator + 1054 7018 SiS PCI Audio Accelerator + 107d 5330 SiS PCI Audio Accelerator + 107d 5350 SiS PCI Audio Accelerator + 1170 3209 SiS PCI Audio Accelerator + 1462 400a SiS PCI Audio Accelerator + 14a4 2089 SiS PCI Audio Accelerator + 14cd 2194 SiS PCI Audio Accelerator + 14ff 1100 SiS PCI Audio Accelerator + 152d 8808 SiS PCI Audio Accelerator + 1558 1103 SiS PCI Audio Accelerator + 1558 2200 SiS PCI Audio Accelerator + 1563 7018 SiS PCI Audio Accelerator + 15c5 0111 SiS PCI Audio Accelerator + 270f a171 SiS PCI Audio Accelerator + a0a0 0022 SiS PCI Audio Accelerator +103a Seiko Epson Corporation +103b Tatung Co. of America +103c Hewlett-Packard Company + 1005 A4977A Visualize EG + 1030 J2585A + 1031 J2585B + 103c 1040 J2973A DeskDirect 10BaseT NIC + 103c 1041 J2585B DeskDirect 10/100VG NIC + 103c 1042 J2970A DeskDirect 10BaseT/2 NIC + 1040 J2973A DeskDirect 10BaseT NIC + 1041 J2585B DeskDirect 10/100 NIC + 1042 J2970A DeskDirect 10BaseT/2 NIC + 1064 79C970 PCnet Ethernet Controller + 10c1 NetServer Smart IRQ Router + 10ed TopTools Remote Control + 1200 82557B 10/100 NIC + 1219 NetServer PCI Hot-Plug Controller + 121a NetServer SMIC Controller + 121b NetServer Legacy COM Port Decoder + 121c NetServer PCI COM Port Decoder + 2910 E2910A + 2925 E2925A +103e Solliday Engineering +103f Synopsys/Logic Modeling Group +1040 Accelgraphics Inc. +1041 Computrend +1042 Micron + 1000 FDC 37C665 + 1001 37C922 + 3000 Samurai_0 + 3010 Samurai_1 + 3020 Samurai_IDE +1043 Asustek Computer, Inc. +1044 Distributed Processing Technology + 1012 Domino RAID Engine + a400 SmartCache/Raid I-IV Controller + a500 PCI Bridge + a501 SmartRAID V Controller +1045 OPTi Inc. + a0f8 82C750 [Vendetta] USB Controller + c101 92C264 + c178 92C178 + c556 82X556 [Viper] + c557 82C557 [Viper-M] + c558 82C558 [Viper-M ISA+IDE] + c567 82C750 [Vendetta], device 0 + c568 82C750 [Vendetta], device 1 + c569 82C579 [Viper XPress+ Chipset] + c621 82C621 + c700 82C700 + c701 82C701 [FireStar Plus] + c814 82C814 [Firebridge 1] + c822 82C822 + c824 82C824 + c825 82C825 [Firebridge 2] + c832 82C832 + c861 82C861 + c895 82C895 + c935 EV1935 ECTIVA MachOne PCI Audio + d568 82C825 [Firebridge 2] +1046 IPC Corporation, Ltd. +1047 Genoa Systems Corp +1048 Elsa AG + 1000 QuickStep 1000 + 3000 QuickStep 3000 +1049 Fountain Technologies, Inc. +104a SGS Thomson Microelectronics + 0008 STG 2000X + 0009 STG 1764X + 1746 STG 1764X + 3520 MPEG-II decoder card +104b BusLogic + 0140 BT-946C (old) [multimaster 01] + 1040 BT-946C (BA80C30) [MultiMaster 10] + 8130 Flashpoint LT +104c Texas Instruments + 0500 100 MBit LAN Controller + 0508 TMS380C2X Compressor Interface + 1000 Eagle i/f AS + 3d04 TVP4010 [Permedia] + 3d07 TVP4020 [Permedia 2] + 1092 0127 FIRE GL 1000 PRO + 1092 0136 FIRE GL 1000 PRO + 1092 0141 FIRE GL 1000 PRO + 1092 0146 FIRE GL 1000 PRO + 1092 0148 FIRE GL 1000 PRO + 1092 0149 FIRE GL 1000 PRO + 1092 0152 FIRE GL 1000 PRO + 1092 0154 FIRE GL 1000 PRO + 1092 0155 FIRE GL 1000 PRO + 1092 0156 FIRE GL 1000 PRO + 1092 0157 FIRE GL 1000 PRO + 1097 3d01 Jeronimo Pro + 3d3d 0100 Reference Permedia 2 3D + 8000 PCILynx/PCILynx2 IEEE 1394 Link Layer Controller + e4bf 1010 CF1-1-SNARE + e4bf 1020 CF1-2-SNARE + 8009 OHCI Compliant FireWire Controller + 8019 TSB12LV23 OHCI Compliant IEEE-1394 Controller + 11bd 000a Studio DV500-1394 + 11bd 000e Studio DV + e4bf 1010 CF2-1-CYMBAL + a001 TDC1570 + a100 TDC1561 + ac10 PCI1050 + ac11 PCI1053 + ac12 PCI1130 + ac13 PCI1031 + ac15 PCI1131 + ac16 PCI1250 + ac17 PCI1220 + ac18 PCI1260 + ac19 PCI1221 + ac1a PCI1210 + ac1b PCI1450 + ac1c PCI1225 + ac1d PCI1251A + ac1e PCI1211 + ac1f PCI1251B + ac20 TI 2030 + ac30 PCI1260 PC card Cardbus Controller + ac40 PCI4450 PC card Cardbus Controller + ac41 PCI4410 PC card Cardbus Controller + ac42 PCI4451 PC card Cardbus Controller + ac50 PCI1410 PC card Cardbus Controller + ac51 PCI1420 + ac52 PCI1451 PC card Cardbus Controller + ac53 PCI1421 PC card Cardbus Controller + fe00 FireWire Host Controller + fe03 12C01A FireWire Host Controller +104d Sony Corporation + 8009 CXD1947Q i.LINK Controller + 8039 CXD3222 i.LINK Controller + 8056 Rockwell HCF 56K modem + 808a Memory Stick Controller +104e Oak Technology, Inc + 0017 OTI-64017 + 0107 OTI-107 [Spitfire] + 0109 Video Adapter + 0111 OTI-64111 [Spitfire] + 0217 OTI-64217 + 0317 OTI-64317 +104f Co-time Computer Ltd +1050 Winbond Electronics Corp + 0000 NE2000 + 0001 W83769F + 0105 W82C105 + 0840 W89C840 + 1050 0001 W89C840 Ethernet Adapter + 1050 0840 W89C840 Ethernet Adapter + 0940 W89C940 + 5a5a W89C940F + 9970 W9970CF +1051 Anigma, Inc. +1052 ?Young Micro Systems +1053 Young Micro Systems +1054 Hitachi, Ltd +1055 EFAR Microsystems + 9130 EIDE Controller + 9460 PCI to ISA Bridge + 9462 USB Universal Host Controller [OHCI] + 9463 Power Management Controller [Bridge] +1056 ICL +# Motorola made a mistake and used 1507 instead of 1057 in some chips. Please look at the 1507 entry as well when updating this. +1057 Motorola + 0001 MPC105 [Eagle] + 0002 MPC106 [Grackle] + 0100 MC145575 [HFC-PCI] + 0431 KTI829c 100VG + 1801 Audio I/O Controller (MIDI) + ecc0 0030 Layla + 4801 Raven + 4802 Falcon + 4803 Hawk + 4806 CPX8216 + 4d68 20268 + 5600 SM56 PCI Modem + 1057 0300 SM56 PCI Speakerphone Modem + 1057 0301 SM56 PCI Voice Modem + 1057 0302 SM56 PCI Fax Modem + 1057 5600 SM56 PCI Voice modem + 13d2 0300 SM56 PCI Speakerphone Modem + 13d2 0301 SM56 PCI Voice modem + 13d2 0302 SM56 PCI Fax Modem + 1436 0300 SM56 PCI Speakerphone Modem + 1436 0301 SM56 PCI Voice modem + 1436 0302 SM56 PCI Fax Modem + 144f 100c SM56 PCI Fax Modem + 1494 0300 SM56 PCI Speakerphone Modem + 1494 0301 SM56 PCI Voice modem + 14c8 0300 SM56 PCI Speakerphone Modem + 14c8 0302 SM56 PCI Fax Modem + 1668 0300 SM56 PCI Speakerphone Modem + 1668 0302 SM56 PCI Fax Modem +1058 Electronics & Telecommunications RSH +1059 Teknor Industrial Computers Inc +105a Promise Technology, Inc. + 0d30 20265 + 4d30 20267 + 4d33 20246 + 4d38 20262 + 4d68 20268 + 6268 20268R + 4d69 20269 + 5300 DC5300 +105b Foxconn International, Inc. +105c Wipro Infotech Limited +105d Number 9 Computer Company + 2309 Imagine 128 + 2339 Imagine 128-II + 105d 0000 Imagine 128 series 2 4Mb VRAM + 105d 0001 Imagine 128 series 2 4Mb VRAM + 105d 0002 Imagine 128 series 2 4Mb VRAM + 105d 0003 Imagine 128 series 2 4Mb VRAM + 105d 0004 Imagine 128 series 2 4Mb VRAM + 105d 0005 Imagine 128 series 2 4Mb VRAM + 105d 0006 Imagine 128 series 2 4Mb VRAM + 105d 0007 Imagine 128 series 2 4Mb VRAM + 105d 0008 Imagine 128 series 2e 4Mb DRAM + 105d 0009 Imagine 128 series 2e 4Mb DRAM + 105d 000a Imagine 128 series 2 8Mb VRAM + 105d 000b Imagine 128 series 2 8Mb H-VRAM + 493d Imagine 128 T2R [Ticket to Ride] + 5348 Revolution 4 +105e Vtech Computers Ltd +105f Infotronic America Inc +1060 United Microelectronics [UMC] + 0001 UM82C881 + 0002 UM82C886 + 0101 UM8673F + 0881 UM8881 + 0886 UM8886F + 0891 UM8891A + 1001 UM886A + 673a UM8886BF + 673b EIDE Master/DMA + 8710 UM8710 + 886a UM8886A + 8881 UM8881F + 8886 UM8886F + 888a UM8886A + 8891 UM8891A + 9017 UM9017F + 9018 UM9018 + 9026 UM9026 + e881 UM8881N + e886 UM8886N + e88a UM8886N + e891 UM8891N +1061 I.I.T. + 0001 AGX016 + 0002 IIT3204/3501 +1062 Maspar Computer Corp +1063 Ocean Office Automation +1064 Alcatel +1065 Texas Microsystems +1066 PicoPower Technology + 0000 PT80C826 + 0001 PT86C52x [Vesuvius] + 0002 PT80C524 [Nile] + 0005 National PC87550 System Controller + 8002 PT80C524 [Nile] +1067 Mitsubishi Electric + 1002 VG500 [VolumePro Volume Rendering Accelerator] +1068 Diversified Technology +1069 Mylex Corporation + 0001 DAC960P + 0002 DAC960PD + 0010 DAC960PX + ba55 eXtremeRAID support Device +106a Aten Research Inc +106b Apple Computer Inc. + 0001 Bandit PowerPC host bridge + 0002 Grand Central I/O + 0003 Control Video + 0004 PlanB Video-In + 0007 O'Hare I/O + 000e Hydra Mac I/O + 0010 Heathrow Mac I/O + 0017 Paddington Mac I/O + 0018 UniNorth FireWire + 0019 KeyLargo USB + 001e UniNorth PCI + 001f UniNorth PCI + 0020 UniNorth AGP + 0021 UniNorth GMAC + 0022 KeyLargo Mac I/O +106c Hyundai Electronics America + 8801 Dual Pentium ISA/PCI Motherboard + 8802 PowerPC ISA/PCI Motherboard + 8803 Dual Window Graphics Accelerator + 8804 LAN Controller + 8805 100-BaseT LAN +106d Sequent Computer Systems +106e DFI, Inc +106f City Gate Development Ltd +1070 Daewoo Telecom Ltd +1071 Mitac +1072 GIT Co Ltd +1073 Yamaha Corporation + 0001 3D GUI Accelerator + 0002 YGV615 [RPA3 3D-Graphics Controller] + 0003 YMF-740 + 0004 YMF-724 + 1073 0004 YMF724-Based PCI Audio Adapter + 0005 DS1 Audio + 1073 0005 DS-XG PCI Audio CODEC + 0006 DS1 Audio + 0008 DS1 Audio + 1073 0008 DS-XG PCI Audio CODEC + 000a DS1L Audio + 1073 0004 DS-XG PCI Audio CODEC + 1073 000a DS-XG PCI Audio CODEC + 000c YMF-740C [DS-1L Audio Controller] + 107a 000c DS-XG PCI Audio CODEC + 000d YMF-724F [DS-1 Audio Controller] + 1073 000d DS-XG PCI Audio CODEC + 0010 YMF-744B [DS-1S Audio Controller] + 1073 0006 DS-XG PCI Audio CODEC + 1073 0010 DS-XG PCI Audio CODEC + 0012 YMF-754 [DS-1E Audio Controller] + 1073 0012 DS-XG PCI Audio Codec + 0020 DS-1 Audio + 2000 DS2416 Digital Mixing Card + 1073 2000 DS2416 Digital Mixing Card +1074 NexGen Microsystems + 4e78 82c500/1 +1075 Advanced Integrations Research +1076 Chaintech Computer Co. Ltd +1077 QLogic Corp. + 1016 QLA10160 + 1020 ISP1020 + 1022 ISP1022 + 1080 QLA1080 + 1216 QLA12160 + 101e 8471 QLA12160 on AMI MegaRAID + 101e 8493 QLA12160 on AMI MegaRAID + 1240 QLA1240 + 1280 QLA1280 + 2020 ISP2020A + 2100 QLA2100 + 2200 QLA2200 + 2300 QLA2300 +1078 Cyrix Corporation + 0000 5510 [Grappa] + 0001 PCI Master + 0002 5520 [Cognac] + 0100 5530 Legacy [Kahlua] + 0101 5530 SMI [Kahlua] + 0102 5530 IDE [Kahlua] + 0103 5530 Audio [Kahlua] + 0104 5530 Video [Kahlua] +1079 I-Bus +107a NetWorth +107b Gateway 2000 +107c LG Electronics [Lucky Goldstar Co. Ltd] +107d LeadTek Research Inc. + 0000 P86C850 +107e Interphase Corporation + 0001 ATM Interface Card + 0002 100 VG AnyLan Controller + 0008 155 Mbit ATM Controller +107f Data Technology Corporation + 0802 SL82C105 +1080 Contaq Microsystems + 0600 82C599 + c691 Cypress CY82C691 + c693 82c693 +1081 Supermac Technology + 0d47 Radius PCI to NuBUS Bridge +1082 EFA Corporation of America +1083 Forex Computer Corporation + 0001 FR710 +1084 Parador +1085 Tulip Computers Int.B.V. +1086 J. Bond Computer Systems +1087 Cache Computer +1088 Microcomputer Systems (M) Son +1089 Data General Corporation +108a Bit3 Computer Corp. + 0001 VME Bridge Model 617 + 0010 VME Bridge Model 618 + 3000 VME Bridge Model 2706 +108c Oakleigh Systems Inc. +108d Olicom + 0001 Token-Ring 16/4 PCI Adapter (3136/3137) + 0002 16/4 Token Ring + 0004 RapidFire 3139 Token-Ring 16/4 PCI Adapter + 108d 0004 OC-3139/3140 RapidFire Token-Ring 16/4 Adapter + 0005 GoCard 3250 Token-Ring 16/4 CardBus PC Card + 0006 OC-3530 RapidFire Token-Ring 100 + 0007 RapidFire 3141 Token-Ring 16/4 PCI Fiber Adapter + 108d 0007 OC-3141 RapidFire Token-Ring 16/4 Adapter + 0008 RapidFire 3540 HSTR 100/16/4 PCI Adapter + 108d 0008 OC-3540 RapidFire HSTR 100/16/4 Adapter + 0011 OC-2315 + 0012 OC-2325 + 0013 OC-2183/2185 + 0014 OC-2326 + 0019 OC-2327/2250 10/100 Ethernet Adapter + 108d 0016 OC-2327 Rapidfire 10/100 Ethernet Adapter + 108d 0017 OC-2250 GoCard 10/100 Ethernet Adapter + 0021 OC-6151/6152 [RapidFire ATM 155] + 0022 ATM Adapter +108e Sun Microsystems Computer Corp. + 0001 EBUS + 1000 EBUS + 1001 Happy Meal + 1100 RIO EBUS + 1101 RIO GEM + 1102 RIO 1394 + 1103 RIO USB + 2bad GEM + 5000 Simba Advanced PCI Bridge + 5043 SunPCI Co-processor + 8000 Psycho PCI Bus Module + 8001 Schizo PCI Bus Module + a000 Ultra IIi + a001 Ultra IIe +108f Systemsoft +1090 Encore Computer Corporation +1091 Intergraph Corporation + 0020 3D graphics processor + 0021 3D graphics processor w/Texturing + 0040 3D graphics frame buffer + 0041 3D graphics frame buffer + 0060 Proprietary bus bridge + 00e4 Powerstorm 4D50T + 0720 Motion JPEG codec +1092 Diamond Multimedia Systems + 00a0 Speedstar Pro SE + 00a8 Speedstar 64 + 0550 Viper V550 + 08d4 Supra 2260 Modem + 1092 Viper V330 + 6120 Maximum DVD + 8810 Stealth SE + 8811 Stealth 64/SE + 8880 Stealth + 8881 Stealth + 88b0 Stealth 64 + 88b1 Stealth 64 + 88c0 Stealth 64 + 88c1 Stealth 64 + 88d0 Stealth 64 + 88d1 Stealth 64 + 88f0 Stealth 64 + 88f1 Stealth 64 + 9999 DMD-I0928-1 "Monster sound" sound chip +1093 National Instruments + 0160 PCI-DIO-96 + 0162 PCI-MIO-16XE-50 + 1170 PCI-MIO-16XE-10 + 1180 PCI-MIO-16E-1 + 1190 PCI-MIO-16E-4 + 1330 PCI-6031E + 1350 PCI-6071E + 2a60 PCI-6023E + b001 IMAQ-PCI-1408 + b011 IMAQ-PXI-1408 + b021 IMAQ-PCI-1424 + b031 IMAQ-PCI-1413 + b041 IMAQ-PCI-1407 + b051 IMAQ-PXI-1407 + b061 IMAQ-PCI-1411 + b071 IMAQ-PCI-1422 + b081 IMAQ-PXI-1422 + b091 IMAQ-PXI-1411 + c801 PCI-GPIB +1094 First International Computers [FIC] +1095 CMD Technology Inc + 0640 PCI0640 + 0643 PCI0643 + 0646 PCI0646 + 0647 PCI0647 + 0648 PCI0648 + 0649 PCI0649 + 0650 PBC0650A + 0670 USB0670 + 0673 USB0673 + 0680 PCI0680 +1096 Alacron +1097 Appian Technology +1098 Quantum Designs (H.K.) Ltd + 0001 QD-8500 + 0002 QD-8580 +1099 Samsung Electronics Co., Ltd +109a Packard Bell +109b Gemlight Computer Ltd. +109c Megachips Corporation +109d Zida Technologies Ltd. +109e Brooktree Corporation + 0350 Bt848 TV with DMA push + 0351 Bt849A Video capture + 036c Bt879(??) Video Capture + 13e9 0070 Win/TV (Video Section) + 036e Bt878 + 0070 13eb WinTV/GO + 127a 0001 Bt878 Mediastream Controller NTSC + 127a 0002 Bt878 Mediastream Controller PAL BG + 127a 0003 Bt878a Mediastream Controller PAL BG + 127a 0048 Bt878/832 Mediastream Controller + 144f 3000 MagicTView CPH060 - Video + 14f1 0001 Bt878 Mediastream Controller NTSC + 14f1 0002 Bt878 Mediastream Controller PAL BG + 14f1 0003 Bt878a Mediastream Controller PAL BG + 14f1 0048 Bt878/832 Mediastream Controller + 1851 1850 FlyVideo'98 - Video + 1851 1851 FlyVideo II + 1852 1852 FlyVideo'98 - Video (with FM Tuner) + 036f Bt879 + 127a 0044 Bt879 Video Capture NTSC + 127a 0122 Bt879 Video Capture PAL I + 127a 0144 Bt879 Video Capture NTSC + 127a 0222 Bt879 Video Capture PAL BG + 127a 0244 Bt879a Video Capture NTSC + 127a 0322 Bt879 Video Capture NTSC + 127a 0422 Bt879 Video Capture NTSC + 127a 1122 Bt879 Video Capture PAL I + 127a 1222 Bt879 Video Capture PAL BG + 127a 1322 Bt879 Video Capture NTSC + 127a 1522 Bt879a Video Capture PAL I + 127a 1622 Bt879a Video Capture PAL BG + 127a 1722 Bt879a Video Capture NTSC + 14f1 0044 Bt879 Video Capture NTSC + 14f1 0122 Bt879 Video Capture PAL I + 14f1 0144 Bt879 Video Capture NTSC + 14f1 0222 Bt879 Video Capture PAL BG + 14f1 0244 Bt879a Video Capture NTSC + 14f1 0322 Bt879 Video Capture NTSC + 14f1 0422 Bt879 Video Capture NTSC + 14f1 1122 Bt879 Video Capture PAL I + 14f1 1222 Bt879 Video Capture PAL BG + 14f1 1322 Bt879 Video Capture NTSC + 14f1 1522 Bt879a Video Capture PAL I + 14f1 1622 Bt879a Video Capture PAL BG + 14f1 1722 Bt879a Video Capture NTSC + 1851 1850 FlyVideo'98 - Video + 1851 1851 FlyVideo II + 1852 1852 FlyVideo'98 - Video (with FM Tuner) + 0370 Bt880 Video Capture + 1851 1850 FlyVideo'98 + 1851 1851 FlyVideo'98 EZ - video + 1852 1852 FlyVideo'98 (with FM Tuner) + 0878 Bt878 + 0070 13eb WinTV/GO + 127a 0001 Bt878 Video Capture (Audio Section) + 127a 0002 Bt878 Video Capture (Audio Section) + 127a 0003 Bt878 Video Capture (Audio Section) + 127a 0048 Bt878 Video Capture (Audio Section) + 13e9 0070 Win/TV (Audio Section) + 144f 3000 MagicTView CPH060 - Audio + 14f1 0001 Bt878 Video Capture (Audio Section) + 14f1 0002 Bt878 Video Capture (Audio Section) + 14f1 0003 Bt878 Video Capture (Audio Section) + 14f1 0048 Bt878 Video Capture (Audio Section) + 0879 Bt879 Video Capture (Audio Section) + 127a 0044 Bt879 Video Capture (Audio Section) + 127a 0122 Bt879 Video Capture (Audio Section) + 127a 0144 Bt879 Video Capture (Audio Section) + 127a 0222 Bt879 Video Capture (Audio Section) + 127a 0244 Bt879 Video Capture (Audio Section) + 127a 0322 Bt879 Video Capture (Audio Section) + 127a 0422 Bt879 Video Capture (Audio Section) + 127a 1122 Bt879 Video Capture (Audio Section) + 127a 1222 Bt879 Video Capture (Audio Section) + 127a 1322 Bt879 Video Capture (Audio Section) + 127a 1522 Bt879 Video Capture (Audio Section) + 127a 1622 Bt879 Video Capture (Audio Section) + 127a 1722 Bt879 Video Capture (Audio Section) + 14f1 0044 Bt879 Video Capture (Audio Section) + 14f1 0122 Bt879 Video Capture (Audio Section) + 14f1 0144 Bt879 Video Capture (Audio Section) + 14f1 0222 Bt879 Video Capture (Audio Section) + 14f1 0244 Bt879 Video Capture (Audio Section) + 14f1 0322 Bt879 Video Capture (Audio Section) + 14f1 0422 Bt879 Video Capture (Audio Section) + 14f1 1122 Bt879 Video Capture (Audio Section) + 14f1 1222 Bt879 Video Capture (Audio Section) + 14f1 1322 Bt879 Video Capture (Audio Section) + 14f1 1522 Bt879 Video Capture (Audio Section) + 14f1 1622 Bt879 Video Capture (Audio Section) + 14f1 1722 Bt879 Video Capture (Audio Section) + 0880 Bt880 Video Capture (Audio Section) + 2115 BtV 2115 Mediastream controller + 2125 BtV 2125 Mediastream controller + 2164 BtV 2164 + 2165 BtV 2165 + 8230 Bt8230 ATM Segment/Reassembly Ctrlr (SRC) + 8472 Bt8472 + 8474 Bt8474 +109f Trigem Computer Inc. +10a0 Meidensha Corporation +10a1 Juko Electronics Ind. Co. Ltd +10a2 Quantum Corporation +10a3 Everex Systems Inc +10a4 Globe Manufacturing Sales +10a5 Racal Interlan +10a6 Informtech Industrial Ltd. +10a7 Benchmarq Microelectronics +10a8 Sierra Semiconductor + 0000 STB Horizon 64 +10a9 Silicon Graphics, Inc. + 0001 Crosstalk to PCI Bridge + 0002 Linc I/O controller + 0003 IOC3 I/O controller + 0004 O2 MACE + 0005 RAD Audio + 0006 HPCEX + 0007 RPCEX + 0008 DiVO VIP + 0009 Alteon Gigabit Ethernet + 0010 AMP Video I/O + 0011 GRIP + 0012 SGH PSHAC GSN + 1001 Magic Carpet + 1002 Lithium + 1003 Dual JPEG 1 + 1004 Dual JPEG 2 + 1005 Dual JPEG 3 + 1006 Dual JPEG 4 + 1007 Dual JPEG 5 + 1008 Cesium + 2001 Fibre Channel + 2002 ASDE + 8001 O2 1394 + 8002 G-net NT +10aa ACC Microelectronics + 0000 ACCM 2188 +10ab Digicom +10ac Honeywell IAC +10ad Symphony Labs + 0001 W83769F + 0003 SL82C103 + 0005 SL82C105 + 0103 SL82c103 + 0105 SL82c105 + 0565 W83C553 +10ae Cornerstone Technology +10af Micro Computer Systems Inc +10b0 CardExpert Technology +10b1 Cabletron Systems Inc +10b2 Raytheon Company +10b3 Databook Inc + 3106 DB87144 + b106 DB87144 +10b4 STB Systems Inc + 1b1d Velocity 128 3D + 10b4 237e Velocity 4400 +10b5 PLX Technology, Inc. + 0001 i960 PCI bus interface + 1076 VScom 800 8 port serial adaptor + 1077 VScom 400 4 port serial adaptor + 9036 9036 + 9050 PCI <-> IOBus Bridge + 10b5 2273 SH-ARC SoHard ARCnet card + d84d 4006 EX-4006 1P + d84d 4008 EX-4008 1P EPP/ECP + d84d 4014 EX-4014 2P + d84d 4018 EX-4018 3P EPP/ECP + d84d 4025 EX-4025 1S(16C550) RS-232 + d84d 4027 EX-4027 1S(16C650) RS-232 + d84d 4028 EX-4028 1S(16C850) RS-232 + d84d 4036 EX-4036 2S(16C650) RS-232 + d84d 4037 EX-4037 2S(16C650) RS-232 + d84d 4038 EX-4038 2S(16C850) RS-232 + d84d 4052 EX-4052 1S(16C550) RS-422/485 + d84d 4053 EX-4053 2S(16C550) RS-422/485 + d84d 4055 EX-4055 4S(16C550) RS-232 + d84d 4058 EX-4055 4S(16C650) RS-232 + d84d 4065 EX-4065 8S(16C550) RS-232 + d84d 4068 EX-4068 8S(16C650) RS-232 + d84d 4078 EX-4078 2S(16C552) RS-232+1P + 9060 9060 + 906d 9060SD + 125c 0640 Aries 16000P + 906e 9060ES + 9080 9080 + 10b5 9080 9080 [real subsystem ID not set] + a001 GTEK Jetport II 2 port serial adaptor + c001 GTEK Cyclone 16/32 port serial adaptor +10b6 Madge Networks + 0001 Smart 16/4 PCI Ringnode + 0002 Smart 16/4 PCI Ringnode Mk2 + 10b6 0002 Smart 16/4 PCI Ringnode Mk2 + 10b6 0006 16/4 CardBus Adapter + 0003 Smart 16/4 PCI Ringnode Mk3 + 0e11 b0fd Compaq NC4621 PCI, 4/16, WOL + 10b6 0003 Smart 16/4 PCI Ringnode Mk3 + 10b6 0007 Presto PCI Plus Adapter + 0004 Smart 16/4 PCI Ringnode Mk1 + 0006 16/4 Cardbus Adapter + 10b6 0006 16/4 CardBus Adapter + 0007 Presto PCI Adapter + 10b6 0007 Presto PCI + 0009 Smart 100/16/4 PCI-HS Ringnode + 10b6 0009 Smart 100/16/4 PCI-HS Ringnode + 000a Smart 100/16/4 PCI Ringnode + 10b6 000a Smart 100/16/4 PCI Ringnode + 000b 16/4 CardBus Adapter Mk2 + 10b6 000b 16/4 Cardbus Adapter Mk2 + 1000 Collage 25 ATM Adapter + 1001 Collage 155 ATM Server Adapter +10b7 3Com Corporation + 0001 3c985 1000BaseSX + 3390 Token Link Velocity + 3590 3c359 TokenLink Velocity XL + 10b7 3590 TokenLink Velocity XL Adapter + 4500 3c450 Cyclone/unknown + 5055 3c555 Laptop Hurricane + 5057 3c575 [Megahertz] 10/100 LAN CardBus + 10b7 5a57 3C575 Megahertz 10/100 LAN Cardbus PC Card + 5157 3c575 [Megahertz] 10/100 LAN CardBus + 10b7 5b57 3C575 Megahertz 10/100 LAN Cardbus PC Card + 5257 3CCFE575CT Cyclone CardBus + 5900 3c590 10BaseT [Vortex] + 5920 3c592 EISA 10mbps Demon/Vortex + 5950 3c595 100BaseTX [Vortex] + 5951 3c595 100BaseT4 [Vortex] + 5952 3c595 100Base-MII [Vortex] + 5970 3c597 EISA Fast Demon/Vortex + 5b57 3c595 [Megahertz] 10/100 LAN CardBus + 10b7 5b57 3C575 Megahertz 10/100 LAN Cardbus PC Card + 6055 3c556 Hurricane CardBus + 6056 3c556B Hurricane CardBus + 6560 3CCFE656 Cyclone CardBus + 6562 3CCFEM656 [id 6562] Cyclone CardBus + 6564 3CCFEM656 [id 6564] Cyclone CardBus + 7646 3cSOHO100-TX Hurricane + 8811 Token ring + 9000 3c900 10BaseT [Boomerang] + 9001 3c900 Combo [Boomerang] + 9004 3c900B-TPO [Etherlink XL TPO] + 10b7 9004 3C900B-TPO Etherlink XL TPO 10Mb + 9005 3c900B-Combo [Etherlink XL Combo] + 10b7 9005 3C900B-Combo Etherlink XL Combo + 9006 3c900B-TPC [Etherlink XL TPC] + 900a 3c900B-FL [Etherlink XL FL] + 9050 3c905 100BaseTX [Boomerang] + 9051 3c905 100BaseT4 + 9055 3c905B 100BaseTX [Cyclone] + 1028 0080 3C905B Fast Etherlink XL 10/100 + 1028 0081 3C905B Fast Etherlink XL 10/100 + 1028 0082 3C905B Fast Etherlink XL 10/100 + 1028 0083 3C905B Fast Etherlink XL 10/100 + 1028 0084 3C905B Fast Etherlink XL 10/100 + 1028 0085 3C905B Fast Etherlink XL 10/100 + 1028 0086 3C905B Fast Etherlink XL 10/100 + 1028 0087 3C905B Fast Etherlink XL 10/100 + 1028 0088 3C905B Fast Etherlink XL 10/100 + 1028 0089 3C905B Fast Etherlink XL 10/100 + 1028 0090 3C905B Fast Etherlink XL 10/100 + 1028 0091 3C905B Fast Etherlink XL 10/100 + 1028 0092 3C905B Fast Etherlink XL 10/100 + 1028 0093 3C905B Fast Etherlink XL 10/100 + 1028 0094 3C905B Fast Etherlink XL 10/100 + 1028 0095 3C905B Fast Etherlink XL 10/100 + 1028 0096 3C905B Fast Etherlink XL 10/100 + 1028 0097 3C905B Fast Etherlink XL 10/100 + 1028 0098 3C905B Fast Etherlink XL 10/100 + 1028 0099 3C905B Fast Etherlink XL 10/100 + 10b7 9055 3C905B Fast Etherlink XL 10/100 + 9056 3c905B-T4 + 9058 3c905B-Combo [Deluxe Etherlink XL 10/100] + 905a 3c905B-FX [Fast Etherlink XL FX 10/100] + 9200 3c905C-TX [Fast Etherlink] + 10b7 1000 3C905C-TX Fast Etherlink for PC Management NIC + 9800 3c980-TX [Fast Etherlink XL Server Adapter] + 10b7 9800 3c980-TX Fast Etherlink XL Server Adapter + 9805 3c980-TX 10/100baseTX NIC [Python-T] + 10b7 9805 3c980 10/100baseTX NIC [Python-T] +10b8 Standard Microsystems Corp [SMC] + 0005 83C170QF + 1055 e000 LANEPIC + 1055 e002 LANEPIC + 10b8 a011 EtherPower II 10/100 + 10b8 a014 EtherPower II 10/100 + 10b8 a015 EtherPower II 10/100 + 10b8 a016 EtherPower II 10/100 + 10b8 a017 EtherPower II 10/100 + 0006 LANEPIC + 1055 e100 LANEPIC Cardbus Fast Ethernet Adapter + 1055 e102 LANEPIC Cardbus Fast Ethernet Adapter + 1055 e300 LANEPIC Cardbus Fast Ethernet Adapter + 1055 e302 LANEPIC Cardbus Fast Ethernet Adapter + 10b8 a012 LANEPIC Cardbus Fast Ethernet Adapter + 13a2 8002 LANEPIC Cardbus Fast Ethernet Adapter + 13a2 8006 LANEPIC Cardbus Fast Ethernet Adapter + 1000 FDC 37c665 + 1001 FDC 37C922 + a011 83C170QF + b106 SMC34C90 +10b9 Acer Laboratories Inc. [ALi] + 0111 C-Media CMI8738/C3DX Audio Device (OEM) + 10b9 0111 C-Media CMI8738/C3DX Audio Device (OEM) + 1435 M1435 + 1445 M1445 + 1449 M1449 + 1451 M1451 + 1461 M1461 + 1489 M1489 + 1511 M1511 [Aladdin] + 1512 M1512 [Aladdin] + 1513 M1513 [Aladdin] + 1521 M1521 [Aladdin III] + 10b9 1521 ALI M1521 Aladdin III CPU Bridge + 1523 M1523 + 10b9 1523 ALI M1523 ISA Bridge + 1531 M1531 [Aladdin IV] + 1533 M1533 PCI to ISA Bridge [Aladdin IV] + 10b9 1533 ALI M1533 Aladdin IV ISA Bridge + 1541 M1541 + 10b9 1541 ALI M1541 Aladdin V/V+ AGP System Controller + 1543 M1543 + 1621 M1621 + 1631 ALI M1631 PCI North Bridge Aladdin Pro III + 1641 ALI M1641 PCI North Bridge Aladdin Pro IV + 3141 M3141 + 3143 M3143 + 3145 M3145 + 3147 M3147 + 3149 M3149 + 3151 M3151 + 3307 M3307 + 3309 M3309 + 5212 M4803 + 5215 MS4803 + 5217 M5217H + 5219 M5219 + 5225 M5225 + 5229 M5229 IDE + 5235 M5225 + 5237 M5237 USB + 5243 M5243 + 5247 M5247 + 5451 M5451 PCI South Bridge Audio + 7101 M7101 PMU + 10b9 7101 ALI M7101 Power Management Controller +10ba Mitsubishi Electric Corp. + 0301 AccelGraphics AccelECLIPSE +10bb Dapha Electronics Corporation +10bc Advanced Logic Research +10bd Surecom Technology + 0e34 NE-34 +10be Tseng Labs International Co. +10bf Most Inc +10c0 Boca Research Inc. +10c1 ICM Co., Ltd. +10c2 Auspex Systems Inc. +10c3 Samsung Semiconductors, Inc. + 1100 Smartether100 SC1100 LAN Adapter (i82557B) +10c4 Award Software International Inc. +10c5 Xerox Corporation +10c6 Rambus Inc. +10c7 Media Vision +10c8 Neomagic Corporation + 0001 NM2070 [MagicGraph NM2070] + 0002 NM2090 [MagicGraph 128V] + 0003 NM2093 [MagicGraph 128ZV] + 0004 NM2160 [MagicGraph 128XD] + 1014 00ba MagicGraph 128XD + 1025 1007 MagicGraph 128XD + 1028 0074 MagicGraph 128XD + 1028 0075 MagicGraph 128XD + 1028 007d MagicGraph 128XD + 1028 007e MagicGraph 128XD + 1033 802f MagicGraph 128XD + 104d 801b MagicGraph 128XD + 104d 802f MagicGraph 128XD + 104d 830b MagicGraph 128XD + 10ba 0e00 MagicGraph 128XD + 10c8 0004 MagicGraph 128XD + 10cf 1029 MagicGraph 128XD + 10f7 8308 MagicGraph 128XD + 10f7 8309 MagicGraph 128XD + 10f7 830b MagicGraph 128XD + 10f7 830d MagicGraph 128XD + 10f7 8312 MagicGraph 128XD + 0005 [MagicMedia 256AV] + 0006 NM2360 [MagicMedia 256ZX] + 0016 NM2380 [MagicMedia 256XL+] + 10c8 0016 MagicMedia 256XL+ + 0025 [MagicMedia 256AV+] + 0083 [MagicGraph 128ZV Plus] + 8005 [MagicMedia 256AV Audio] + 0e11 b0d1 MagicMedia 256AV Audio Device on Discovery + 0e11 b126 MagicMedia 256AV Audio Device on Durango + 1014 00dd MagicMedia 256AV Audio Device on BlackTip Thinkpad + 1025 1003 MagicMedia 256AV Audio Device on TravelMate 720 + 1028 008f MagicMedia 256AV Audio Device on Colorado Inspiron + 103c 0007 MagicMedia 256AV Audio Device on Voyager II + 103c 0008 MagicMedia 256AV Audio Device on Voyager III + 103c 000d MagicMedia 256AV Audio Device on Omnibook 900 + 10c8 8005 MagicMedia 256AV Audio Device on FireAnt + 110a 8005 MagicMedia 256AV Audio Device + 14c0 0004 MagicMedia 256AV Audio Device + 8006 NM2360 [MagicMedia 256ZX Audio] +10c9 Dataexpert Corporation +10ca Fujitsu Microelectr., Inc. +10cb Omron Corporation +10cc Mentor ARC Inc +10cd Advanced System Products, Inc + 1100 ASC1100 + 1200 ASC1200 [(abp940) Fast SCSI-II] + 1300 ABP940-U / ABP960-U + 10cd 1310 ASC1300 SCSI Adapter + 2300 ABP940-UW + 2500 ABP940-U2W +10ce Radius +10cf Citicorp TTI + 2001 mb86605 +10d0 Fujitsu Limited +10d1 FuturePlus Systems Corp. +10d2 Molex Incorporated +10d3 Jabil Circuit Inc +10d4 Hualon Microelectronics +10d5 Autologic Inc. +10d6 Cetia +10d7 BCM Advanced Research +10d8 Advanced Peripherals Labs +10d9 Macronix, Inc. [MXIC] + 0512 MX98713 + 0531 MX987x5 + 8625 MX86250 + 8888 MX86200 +10da Compaq IPG-Austin + 0508 TC4048 Token Ring 4/16 + 3390 Tl3c3x9 +10db Rohm LSI Systems, Inc. +10dc CERN/ECP/EDU + 0001 STAR/RD24 SCI-PCI (PMC) + 0002 TAR/RD24 SCI-PCI (PMC) + 0021 HIPPI destination + 0022 HIPPI source + 10dc ATT2C15-3 FPGA +10dd Evans & Sutherland +10de nVidia Corporation + 0008 EDGE 3D [NV1] + 0009 EDGE 3D [NV1] + 0010 Mutara V08 [NV2] + 0020 Riva TnT [NV04] + 1043 0200 V3400 TNT + 1048 0c18 Erazor II SGRAM + 1092 0550 Viper V550 + 1092 0552 Viper V550 + 1092 4804 Viper V550 + 1092 4808 Viper V550 + 1092 4810 Viper V550 + 1092 4812 Viper V550 + 1092 4815 Viper V550 + 1092 4820 Viper V550 with TV out + 1092 4822 Viper V550 + 1092 4904 Viper V550 + 1092 4914 Viper V550 + 1092 8225 Viper V550 + 10de 0020 Riva TNT + 1102 1015 Graphics Blaster CT6710 + 1102 1016 Graphics Blaster RIVA TNT + 0028 Riva TnT2 [NV5] + 1043 0200 AGP-V3800 SGRAM + 1043 0201 AGP-V3800 SDRAM + 1043 0205 PCI-V3800 + 1043 4000 AGP-V3800PRO + 1092 4804 Viper V770 + 1092 4a00 Viper V770 + 1092 4a02 Viper V770 Ultra + 1092 6a02 Viper V770 Ultra + 1092 7a02 Viper V770 Ultra + 10de 0005 RIVA TNT2 Pro + 1102 1020 3D Blaster RIVA TNT2 + 1102 1026 3D Blaster RIVA TNT2 Digital + 14af 5810 Maxi Gamer Xentor + 0029 Riva TnT2 Ultra [NV5] + 1043 0200 AGP-V3800 Deluxe + 1043 0201 AGP-V3800 Ultra SDRAM + 1043 0205 PCI-V3800 Ultra + 1102 1021 3D Blaster RIVA TNT2 Ultra + 1102 1029 3D Blaster RIVA TNT2 Ultra + 1102 102f 3D Blaster RIVA TNT2 Ultra + 14af 5820 Maxi Gamer Xentor 32 + 002a Riva TnT2 [NV5] + 002b Riva TnT2 [NV5] + 002c Vanta [NV6] + 1043 0200 AGP-V3800 Combat SDRAM + 1043 0201 AGP-V3800 Combat + 1092 6820 Viper V730 + 1102 1031 CT6938 VANTA 8MB + 1102 1034 CT6894 VANTA 16MB + 14af 5008 Maxi Gamer Phoenix 2 + 002d Vanta [NV6] + 1043 0200 AGP-V3800M + 1043 0201 AGP-V3800M + 1102 1023 CT6892 RIVA TNT2 Value + 1102 1024 CT6932 RIVA TNT2 Value 32Mb + 1102 102c CT6931 RIVA TNT2 Value (Jumper) + 1462 8808 MSI-8808 + 002e Vanta [NV6] + 002f Vanta [NV6] + 00a0 Riva TNT2 + 14af 5810 Maxi Gamer Xentor + 0100 GeForce 256 + 1043 0200 AGP-V6600 SGRAM + 1043 0201 AGP-V6600 SDRAM + 1043 4008 AGP-V6600 SGRAM + 1043 4009 AGP-V6600 SDRAM + 1102 102d CT6941 GeForce 256 + 0101 GeForce 256 DDR + 1043 0202 AGP-V6800 DDR + 1043 400a AGP-V6800 DDR SGRAM + 1043 400b AGP-V6800 DDR SDRAM + 1102 102e CT6971 GeForce 256 DDR + 14af 5021 3D Prophet DDR-DVI + 0103 Quadro (GeForce 256 GL) + 0110 NV11 (GeForce2 MX) + 0111 NV11 (GeForce2 MX DDR) + 0112 GeForce2 Go + 0113 NV11 (GeForce2 MXR) + 0150 NV15 (GeForce2 Pro) + 107d 2840 WinFast GeForce2 GTS with TV output + 0151 NV15 DDR (GeForce2 GTS) + 0152 NV15 Bladerunner (GeForce2 Ultra) + 0153 NV15 GL (Quadro2 Pro) + 0200 NV20 (GeForce3) + 0203 Quadro DCC +10df Emulex Corporation + 10df Light Pulse Fibre Channel Adapter + 1ae5 LP6000 Fibre Channel Host Adapter + f700 LP7000 Fibre Channel Host Adapter +10e0 Integrated Micro Solutions Inc. + 5026 IMS5026/27/28 + 5027 IMS5027 + 5028 IMS5028 + 8849 IMS8849 + 8853 IMS8853 + 9128 IMS9129 [Twin turbo 128] +10e1 Tekram Technology Co.,Ltd. + 0391 TRM-S1040 + 10e1 0391 DC-315U SCSI-3 Host Adapter + 690c DC-690c + dc29 DC-290 +10e2 Aptix Corporation +10e3 Tundra Semiconductor Corp. + 0000 CA91C042 [Universe] + 0860 CA91C860 [QSpan] +10e4 Tandem Computers +10e5 Micro Industries Corporation +10e6 Gainbery Computer Products Inc. +10e7 Vadem +10e8 Applied Micro Circuits Corp. + 2011 Q-Motion Video Capture/Edit board + 4750 S5930 [Matchmaker] + 5920 S5920 + 8043 LANai4.x [Myrinet LANai interface chip] + 8062 S5933_PARASTATION + 807d S5933 [Matchmaker] + 8088 Kingsberg Spacetec Format Synchronizer + 8089 Kingsberg Spacetec Serial Output Board + 809c S5933_HEPC3 + 80d7 PCI-9112 + 80d9 PCI-9118 + 80da PCI-9812 + 811a PCI-IEEE1355-DS-DE Interface + 8170 S5933 [Matchmaker] (Chipset Development Tool) +10e9 Alps Electric Co., Ltd. +10ea Intergraphics Systems + 1680 IGA-1680 + 1682 IGA-1682 + 1683 IGA-1683 + 2000 CyberPro 2000 + 2010 CyberPro 2000A + 5000 CyberPro 5000 + 5050 CyberPro 5050 +10eb Artists Graphics + 0101 3GA + 8111 Twist3 Frame Grabber +10ec Realtek Semiconductor Co., Ltd. + 8029 RTL-8029(AS) + 10b8 2011 EZ-Card + 10ec 8029 RT8029(AS) + 1113 1208 EN1208 + 1186 0300 DE-528 + 1259 2400 AT-2400 + 8129 RTL-8129 + 10ec 8129 RT8129 Fast Ethernet Adapter + 8138 RT8139 (B/C) Cardbus Fast Ethernet Adapter + 10ec 8138 RT8139 (B/C) Fast Ethernet Adapter + 8139 RTL-8139 + 1025 8920 ALN-325 + 1025 8921 ALN-325 + 10bd 0320 EP-320X-R + 10ec 8139 RT8139 + 1186 1300 DFE-538TX + 1186 1320 SN5200 + 1259 2500 AT-2500TX + 1429 d010 ND010 + 1432 9130 EN-9130TX + 1436 8139 RT8139 + 146c 1439 FE-1439TX + 1489 6001 GF100TXRII + 1489 6002 GF100TXRA + 149c 139a LFE-8139ATX + 149c 8139 LFE-8139TX + 2646 0001 EtheRx + 8e2e 7000 KF-230TX + 8e2e 7100 KF-230TX/2 + a0a0 0007 ALN-325C +10ed Ascii Corporation + 7310 V7310 +10ee Xilinx, Inc. + 3fc0 RME Digi96 + 3fc1 RME Digi96/8 + 3fc2 RME Digi96/8 Pro + 3fc3 RME Digi96/8 Pad +10ef Racore Computer Products, Inc. + 8154 M815x Token Ring Adapter +10f0 Peritek Corporation +10f1 Tyan Computer +10f2 Achme Computer, Inc. +10f3 Alaris, Inc. +10f4 S-MOS Systems, Inc. +10f5 NKK Corporation + a001 NDR4000 [NR4600 Bridge] +10f6 Creative Electronic Systems SA +10f7 Matsushita Electric Industrial Co., Ltd. +10f8 Altos India Ltd +10f9 PC Direct +10fa Truevision + 000c TARGA 1000 +10fb Thesys Gesellschaft für Mikroelektronik mbH +10fc I-O Data Device, Inc. +10fd Soyo Computer, Inc +10fe Fast Multimedia AG +10ff NCube +1100 Jazz Multimedia +1101 Initio Corporation + 1060 INI-A100U2W + 9100 INI-9100/9100W + 9400 INI-940 + 9401 INI-950 + 9500 360P +1102 Creative Labs + 0002 SB Live! EMU10k1 + 1102 0020 CT4850 SBLive! Value + 1102 0021 CT4620 SBLive! + 1102 002f SBLive! mainboard implementation + 1102 4001 E-mu APS + 1102 8022 CT4780 SBLive! Value + 1102 8023 CT4790 SoundBlaster PCI512 + 1102 8024 CT4760 SBLive! + 1102 8025 SBLive! Mainboard Implementation + 1102 8026 CT4830 SBLive! Value + 1102 8027 CT4832 SBLive! Value + 1102 8031 CT4831 SBLive! Value + 1102 8040 CT4760 SBLive! + 1102 8051 CT4850 SBLive! Value + 7002 SB Live! + 1102 0020 Gameport Joystick + 8938 ES1371 +1103 Triones Technologies, Inc. + 0003 HPT343 + 0004 HPT366 / HPT370 + 1103 0005 HPT370 UDMA100 +1104 RasterOps Corp. +1105 Sigma Designs, Inc. + 8300 REALmagic Hollywood Plus DVD Decoder +1106 VIA Technologies, Inc. + 0305 VT8363/8365 [KT133/KM133] + 0391 VT8371 [KX133] + 0501 VT8501 [Apollo MVP4] + 0505 VT82C505 + 0561 VT82C561 + 0571 Bus Master IDE + 0576 VT82C576 3V [Apollo Master] + 0585 VT82C585VP [Apollo VP1/VPX] + 0586 VT82C586/A/B PCI-to-ISA [Apollo VP] + 1106 0000 MVP3 ISA Bridge + 0595 VT82C595 [Apollo VP2] + 0596 VT82C596 ISA [Mobile South] + 1106 0000 VT82C596/A/B PCI to ISA Bridge + 1458 0596 VT82C596/A/B PCI to ISA Bridge + 0597 VT82C597 [Apollo VP3] + 0598 VT82C598 [Apollo MVP3] + 0601 VT8601 [Apollo ProMedia] + 0605 VT8605 [ProSavage PM133] + 0680 VT82C680 [Apollo P6] + 0686 VT82C686 [Apollo Super South] + 1106 0000 VT82C686/A PCI to ISA Bridge + 1106 0686 VT82C686/A PCI to ISA Bridge + 0691 VT82C693A/694x [Apollo PRO133x] + 1458 0691 VT82C691 Apollo Pro System Controller + 0693 VT82C693 [Apollo Pro Plus] + 0698 VT82C693A [Apollo Pro133 AGP] + 0926 VT82C926 [Amazon] + 1000 VT82C570MV + 1106 VT82C570MV + 1571 VT82C416MV + 1595 VT82C595/97 [Apollo VP2/97] + 3038 UHCI USB + 1234 0925 MVP3 USB Controller + 3040 VT82C586B ACPI + 3043 VT86C100A [Rhine 10/100] + 10bd 0000 VT86C100A Fast Ethernet Adapter + 1106 0100 VT86C100A Fast Ethernet Adapter + 1186 1400 DFE-530TX + 3044 OHCI Compliant IEEE 1394 Host Controller + 3050 VT82C596 Power Management + 3051 VT82C596 Power Management + 3057 VT82C686 [Apollo Super ACPI] + 3058 AC97 Audio Controller + 1462 3091 MS-6309 Onboard Audio + 3059 AC97 Audio Controller + 3065 Ethernet Controller + 3068 AC97 Modem Controller + 3074 VT8233 PCI to ISA Bridge + 3091 VT8633 [Apollo Pro266] + 3099 VT8367 [KT266] + 3109 VT8233C PCI to ISA Bridge + 5030 VT82C596 ACPI [Apollo PRO] + 6100 VT85C100A [Rhine II] + 8231 VT8231 [PCI-to-ISA Bridge] + 8235 VT8235 Power Management + 8305 VT8363/8365 [KT133/KM133 AGP] + 8391 VT8371 [KX133 AGP] + 8501 VT8501 [Apollo MVP4 AGP] + 8596 VT82C596 [Apollo PRO AGP] + 8597 VT82C597 [Apollo VP3 AGP] + 8598 VT82C598/694x [Apollo MVP3/Pro133x AGP] + 8601 VT8601 [Apollo ProMedia AGP] + 8605 VT8605 [PM133 AGP] + 8691 VT82C691 [Apollo Pro] + b091 VT8633 [Apollo Pro266 AGP] + b099 VT8367 [KT266 AGP] +1107 Stratus Computers + 0576 VIA VT82C570MV [Apollo] (Wrong vendor ID!) +1108 Proteon, Inc. + 0100 p1690plus_AA + 0101 p1690plus_AB + 0105 P1690Plus + 0108 P1690Plus + 0138 P1690Plus + 0139 P1690Plus + 013c P1690Plus + 013d P1690Plus +1109 Cogent Data Technologies, Inc. + 1400 EM110TX [EX110TX] +110a Siemens Nixdorf AG + 0002 Pirahna 2-port + 0005 Tulip controller, power management, switch extender + 2102 DSCC4 WAN adapter + 4942 FPGA I-Bus Tracer for MBD + 6120 SZB6120 +110b Chromatic Research Inc. + 0001 Mpact Media Processor +110c Mini-Max Technology, Inc. +110d Znyx Advanced Systems +110e CPU Technology +110f Ross Technology +1110 Powerhouse Systems + 6037 Firepower Powerized SMP I/O ASIC + 6073 Firepower Powerized SMP I/O ASIC +1111 Santa Cruz Operation +# DJ: Some people say that 0x1112 is Rockwell International +1112 RNS - Div. of Meret Communications Inc + 2200 FDDI Adapter + 2300 Fast Ethernet Adapter + 2340 4 Port Fast Ethernet Adapter + 2400 ATM Adapter +1113 Accton Technology Corporation + 1211 SMC2-1211TX + 103c 1207 EN-1207D Fast Ethernet Adapter + 1113 1211 EN-1207D Fast Ethernet Adapter + 1216 EN-1216 Ethernet Adapter + 1217 EN-1217 Ethernet Adapter + 5105 10Mbps Network card + 9211 EN-1207D Fast Ethernet Adapter + 1113 9211 EN-1207D Fast Ethernet Adapter +1114 Atmel Corporation +1115 3D Labs +1116 Data Translation + 0022 DT3001 + 0023 DT3002 + 0024 DT3003 + 0025 DT3004 + 0026 DT3005 + 0027 DT3001-PGL + 0028 DT3003-PGL +1117 Datacube, Inc + 9500 Max-1C SVGA card + 9501 Max-1C image processing +1118 Berg Electronics +1119 ICP Vortex Computersysteme GmbH + 0000 GDT 6000/6020/6050 + 0001 GDT 6000B/6010 + 0002 GDT 6110/6510 + 0003 GDT 6120/6520 + 0004 GDT 6530 + 0005 GDT 6550 + 0006 GDT 6x17 + 0007 GDT 6x27 + 0008 GDT 6537 + 0009 GDT 6557 + 000a GDT 6115/6515 + 000b GDT 6125/6525 + 000c GDT 6535 + 000d GDT 6555 + 0100 GDT 6117RP/6517RP + 0101 GDT 6127RP/6527RP + 0102 GDT 6537RP + 0103 GDT 6557RP + 0104 GDT 6111RP/6511RP + 0105 GDT 6121RP/6521RP + 0110 GDT 6117RD/6517RD + 0111 GDT 6127RD/6527RD + 0112 GDT 6537RD + 0113 GDT 6557RD + 0114 GDT 6111RD/6511RD + 0115 GDT 6121RD/6521RD + 0118 GDT 6118RD/6518RD/6618RD + 0119 GDT 6128RD/6528RD/6628RD + 011a GDT 6538RD/6638RD + 011b GDT 6558RD/6658RD + 0120 GDT 6117RP2/6517RP2 + 0121 GDT 6127RP2/6527RP2 + 0122 GDT 6537RP2 + 0123 GDT 6557RP2 + 0124 GDT 6111RP2/6511RP2 + 0125 GDT 6121RP2/6521RP2 + 0136 GDT 6113RS/6513RS + 0137 GDT 6123RS/6523RS + 0138 GDT 6118RS/6518RS/6618RS + 0139 GDT 6128RS/6528RS/6628RS + 013a GDT 6538RS/6638RS + 013b GDT 6558RS/6658RS + 013c GDT 6533RS/6633RS + 013d GDT 6543RS/6643RS + 013e GDT 6553RS/6653RS + 013f GDT 6563RS/6663RS + 0166 GDT 7113RN/7513RN/7613RN + 0167 GDT 7123RN/7523RN/7623RN + 0168 GDT 7118RN/7518RN/7518RN + 0169 GDT 7128RN/7528RN/7628RN + 016a GDT 7538RN/7638RN + 016b GDT 7558RN/7658RN + 016c GDT 7533RN/7633RN + 016d GDT 7543RN/7643RN + 016e GDT 7553RN/7653RN + 016f GDT 7563RN/7663RN + 01d6 GDT 4x13RZ + 01d7 GDT 4x23RZ + 01f6 GDT 8x13RZ + 01f7 GDT 8x23RZ + 01fc GDT 8x33RZ + 01fd GDT 8x43RZ + 01fe GDT 8x53RZ + 01ff GDT 8x63RZ + 0210 GDT 6519RD/6619RD + 0211 GDT 6529RD/6629RD + 0260 GDT 7519RN/7619RN + 0261 GDT 7529RN/7629RN + 0300 GDT Raid Controller +111a Efficient Networks, Inc + 0000 155P-MF1 (FPGA) + 0002 155P-MF1 (ASIC) + 0003 ENI-25P ATM + 111a 0000 ENI-25p Miniport ATM Adapter + 0005 SpeedStream (LANAI) + 111a 0001 ENI-3010 ATM + 111a 0009 ENI-3060 ADSL (VPI=0) + 111a 0101 ENI-3010 ATM + 111a 0109 ENI-3060CO ADSL (VPI=0) + 111a 0809 ENI-3060 ADSL (VPI=0 or 8) + 111a 0909 ENI-3060CO ADSL (VPI=0 or 8) + 111a 0a09 ENI-3060 ADSL (VPI=<0..15>) + 0007 SpeedStream ADSL + 111a 1001 ENI-3061 ADSL [ASIC] +111b Teledyne Electronic Systems +111c Tricord Systems Inc. + 0001 Powerbis Bridge +111d Integrated Device Tech + 0001 IDT77211 ATM Adapter +111e Eldec +111f Precision Digital Images + 4a47 Precision MX Video engine interface + 5243 Frame capture bus interface +1120 EMC Corporation +1121 Zilog +1122 Multi-tech Systems, Inc. +1123 Excellent Design, Inc. +1124 Leutron Vision AG +1125 Eurocore +1126 Vigra +1127 FORE Systems Inc + 0200 ForeRunner PCA-200 ATM + 0210 PCA-200PC + 0250 ATM + 0300 PCA-200E + 0310 ATM + 0400 ForeRunnerHE ATM Adapter +1129 Firmworks +112a Hermes Electronics Company, Ltd. +112b Linotype - Hell AG +112c Zenith Data Systems +112d Ravicad +112e Infomedia Microelectronics Inc. +112f Imaging Technology Inc + 0000 MVC IC-PCI + 0001 MVC IM-PCI Video frame grabber/processor +1130 Computervision +1131 Philips Semiconductors + 7145 SAA7145 + 7146 SAA7146 + 114b 2003 DVRaptor Video Edit/Capture Card + 11bd 0006 DV500 Overlay + 11bd 000a DV500 Overlay +1132 Mitel Corp. +1133 Eicon Technology Corporation + 7901 EiconCard S90 + 7902 EiconCard S90 + 7911 EiconCard S91 + 7912 EiconCard S91 + 7941 EiconCard S94 + 7942 EiconCard S94 + b921 EiconCard P92 + b922 EiconCard P92 + e001 DIVA 20PRO + 1133 e001 DIVA Pro 2.0 S/T + e002 DIVA 20 + 1133 e002 DIVA 2.0 S/T + e003 DIVA 20PRO_U + 1133 e003 DIVA Pro 2.0 U + e004 DIVA 20_U + 1133 e004 DIVA 2.0 U + e010 DIVA Server BRI-2M + 1133 e010 DIVA Server BRI-2M + e014 DIVA Server PRI-30M + 1133 e014 DIVA Server PRI-30M +1134 Mercury Computer Systems + 0001 Raceway Bridge +1135 Fuji Xerox Co Ltd + 0001 Printer controller +1136 Momentum Data Systems +1137 Cisco Systems Inc +1138 Ziatech Corporation + 8905 8905 [STD 32 Bridge] +1139 Dynamic Pictures, Inc + 0001 VGA Compatable 3D Graphics +113a FWB Inc +113b Network Computing Devices +113c Cyclone Microsystems, Inc. + 0000 PCI-9060 i960 Bridge + 0001 PCI-SDK [PCI i960 Evaluation Platform] + 0911 PCI-911 [i960Jx-based Intelligent I/O Controller] + 0912 PCI-912 [i960CF-based Intelligent I/O Controller] + 0913 PCI-913 + 0914 PCI-914 [I/O Controller w/ secondary PCI bus] +113d Leading Edge Products Inc +113e Sanyo Electric Co - Computer Engineering Dept +113f Equinox Systems, Inc. + 0808 SST-64P Adapter + 1010 SST-128P Adapter + 80c0 SST-16P Adapter + 80c4 SST-16P Adapter + 80c8 SST-16P Adapter + 8888 SST-4P Adapter + 9090 SST-8P Adapter +1140 Intervoice Inc +1141 Crest Microsystem Inc +1142 Alliance Semiconductor Corporation + 3210 AP6410 + 6422 ProVideo 6422 + 6424 ProVideo 6424 + 6425 ProMotion AT25 + 643d ProMotion AT3D +1143 NetPower, Inc +1144 Cincinnati Milacron + 0001 Noservo controller +1145 Workbit Corporation +1146 Force Computers +1147 Interface Corp +1148 Syskonnect (Schneider & Koch) + 4000 FDDI Adapter + 0e11 b03b Netelligent 100 FDDI DAS Fibre SC + 0e11 b03c Netelligent 100 FDDI SAS Fibre SC + 0e11 b03d Netelligent 100 FDDI DAS UTP + 0e11 b03e Netelligent 100 FDDI SAS UTP + 0e11 b03f Netelligent 100 FDDI SAS Fibre MIC + 1148 5521 FDDI SK-5521 (SK-NET FDDI-UP) + 1148 5522 FDDI SK-5522 (SK-NET FDDI-UP DAS) + 1148 5541 FDDI SK-5541 (SK-NET FDDI-FP) + 1148 5543 FDDI SK-5543 (SK-NET FDDI-LP) + 1148 5544 FDDI SK-5544 (SK-NET FDDI-LP DAS) + 1148 5821 FDDI SK-5821 (SK-NET FDDI-UP64) + 1148 5822 FDDI SK-5822 (SK-NET FDDI-UP64 DAS) + 1148 5841 FDDI SK-5841 (SK-NET FDDI-FP64) + 1148 5843 FDDI SK-5843 (SK-NET FDDI-LP64) + 1148 5844 FDDI SK-5844 (SK-NET FDDI-LP64 DAS) + 4200 Token Ring adapter + 4300 Gigabit Ethernet + 1148 9821 SK-9821 (1000Base-T single link) + 1148 9822 SK-9822 (1000Base-T dual link) + 1148 9841 SK-9841 (1000Base-LX single link) + 1148 9842 SK-9842 (1000Base-LX dual link) + 1148 9843 SK-9843 (1000Base-SX single link) + 1148 9844 SK-9844 (1000Base-SX dual link) + 1148 9861 SK-9861 (1000Base-SX VF45 single link) + 1148 9862 SK-9862 (1000Base-SX VF45 dual link) +1149 Win System Corporation +114a VMIC + 7587 VMIVME-7587 +114b Canopus Co., Ltd +114c Annabooks +114d IC Corporation +114e Nikon Systems Inc +114f Digi International + 0002 AccelePort EPC + 0003 RightSwitch SE-6 + 0004 AccelePort Xem + 0005 AccelePort Xr + 0006 AccelePort Xr,C/X + 0009 AccelePort Xr/J + 000a AccelePort EPC/J + 000c DataFirePRIme T1 (1-port) + 000d SyncPort 2-Port (x.25/FR) + 0011 AccelePort 8r EIA-232 (IBM) + 0012 AccelePort 8r EIA-422 + 0013 AccelePort Xr + 0014 AccelePort 8r EIA-422 + 0015 AccelePort Xem + 0016 AccelePort EPC/X + 0017 AccelePort C/X + 001a DataFirePRIme E1 (1-port) + 001b AccelePort C/X (IBM) + 001d DataFire RAS T1/E1/PRI + 0023 AccelePort RAS + 0024 DataFire RAS B4 ST/U + 0026 AccelePort 4r 920 + 0027 AccelePort Xr 920 + 0034 AccelePort 2r 920 + 0035 DataFire DSP T1/E1/PRI cPCI + 6001 Avanstar +1150 Thinking Machines Corp +1151 JAE Electronics Inc. +1152 Megatek +1153 Land Win Electronic Corp +1154 Melco Inc +1155 Pine Technology Ltd +1156 Periscope Engineering +1157 Avsys Corporation +1158 Voarx R & D Inc + 3011 Tokenet/vg 1001/10m anylan + 9050 Lanfleet/Truevalue + 9051 Lanfleet/Truevalue +1159 Mutech Corp + 0001 MV-1000 +115a Harlequin Ltd +115b Parallax Graphics +115c Photron Ltd. +115d Xircom + 0003 Cardbus Ethernet 10/100 + 1014 0181 10/100 EtherJet Cardbus Adapter + 1014 1181 10/100 EtherJet Cardbus Adapter + 115d 0181 Cardbus Ethernet 10/100 + 115d 1181 Cardbus Ethernet 10/100 + 8086 8181 EtherExpress PRO/100 Mobile CardBus 32 Adapter + 8086 9181 EtherExpress PRO/100 Mobile CardBus 32 Adapter + 0005 Cardbus Ethernet 10/100 + 1014 0182 10/100 EtherJet Cardbus Adapter + 1014 1182 10/100 EtherJet Cardbus Adapter + 115d 0182 Cardbus Ethernet 10/100 + 115d 1182 Cardbus Ethernet 10/100 + 0007 Cardbus Ethernet 10/100 + 1014 0182 10/100 EtherJet Cardbus Adapter + 1014 1182 10/100 EtherJet Cardbus Adapter + 115d 0182 Cardbus Ethernet 10/100 + 115d 1182 Cardbus Ethernet 10/100 + 000b Cardbus Ethernet 10/100 + 1014 0183 10/100 EtherJet Cardbus Adapter + 115d 0183 Cardbus Ethernet 10/100 + 000f Cardbus Ethernet 10/100 + 1014 0183 10/100 EtherJet Cardbus Adapter + 115d 0183 Cardbus Ethernet 10/100 + 0101 Cardbus 56k modem + 115d 1081 Cardbus 56k Modem + 0103 Cardbus Ethernet + 56k Modem + 115d 1181 CBEM56G-100 Ethernet + 56k Modem + 8086 9181 PRO/100 LAN + Modem56 CardBus +115e Peer Protocols Inc +115f Maxtor Corporation +1160 Megasoft Inc +1161 PFU Limited +1162 OA Laboratory Co Ltd +1163 Rendition + 0001 Verite 1000 + 2000 Verite V2000/V2100/V2200 + 1092 2000 Stealth II S220 +1164 Advanced Peripherals Technologies +1165 Imagraph Corporation + 0001 Motion TPEG Recorder/Player with audio +1166 ServerWorks + 0007 CNB20-LE Host Bridge + 0008 CNB20HE Host Bridge + 0009 CNB20LE Host Bridge + 0010 CIOB30 + 0011 CMIC-HE + 0200 OSB4 South Bridge + 0201 CSB5 South Bridge + 0211 OSB4 IDE Controller + 0212 CSB5 IDE Controller + 0220 OSB4/CSB5 OHCI USB Controller +1167 Mutoh Industries Inc +1168 Thine Electronics Inc +1169 Centre for Development of Advanced Computing +116a Polaris Communications + 6100 Bus/Tag Channel + 6800 Escon Channel + 7100 Bus/Tag Channel + 7800 Escon Channel +116b Connectware Inc +116c Intelligent Resources Integrated Systems +116d Martin-Marietta +116e Electronics for Imaging +116f Workstation Technology +1170 Inventec Corporation +1171 Loughborough Sound Images Plc +1172 Altera Corporation +1173 Adobe Systems, Inc +1174 Bridgeport Machines +1175 Mitron Computer Inc. +1176 SBE Incorporated +1177 Silicon Engineering +1178 Alfa, Inc. + afa1 Fast Ethernet Adapter +1179 Toshiba America Info Systems + 0404 DVD Decoder card + 0406 Tecra Video Capture device + 0407 DVD Decoder card (Version 2) + 0601 601 + 0603 ToPIC95 PCI to CardBus Bridge for Notebooks + 060a ToPIC95 + 060f ToPIC97 + 0617 ToPIC95 PCI to Cardbus Bridge with ZV Support + 0618 CPU to PCI and PCI to ISA bridge +# Claimed to be Lucent DSP1645 [Mars], but that's apparently incorrect. Does anyone know the correct ID? + 0701 FIR Port + 0d01 FIR Port Type-DO + 1179 0001 FIR Port Type-DO +117a A-Trend Technology +117b L G Electronics, Inc. +117c Atto Technology +117d Becton & Dickinson +117e T/R Systems +117f Integrated Circuit Systems +1180 Ricoh Co Ltd + 0465 RL5c465 + 0466 RL5c466 + 0475 RL5c475 + 0476 RL5c476 II + 0477 RL5c477 + 0478 RL5c478 +1181 Telmatics International +1183 Fujikura Ltd +1184 Forks Inc +1185 Dataworld International Ltd +1186 D-Link System Inc + 0100 DC21041 + 1002 Sundance Ethernet + 1300 RTL8139 Ethernet + 4000 DL2K Ethernet +1187 Advanced Technology Laboratories, Inc. +1188 Shima Seiki Manufacturing Ltd. +1189 Matsushita Electronics Co Ltd +118a Hilevel Technology +118b Hypertec Pty Limited +118c Corollary, Inc + 0014 PCIB [C-bus II to PCI bus host bridge chip] + 1117 Intel 8-way XEON Profusion Chipset [Cache Coherency Filter] +118d BitFlow Inc + 0001 Raptor-PCI framegrabber + 0012 Model 12 Road Runner Frame Grabber + 0014 Model 14 Road Runner Frame Grabber + 0024 Model 24 Road Runner Frame Grabber + 0044 Model 44 Road Runner Frame Grabber + 0112 Model 12 Road Runner Frame Grabber + 0114 Model 14 Road Runner Frame Grabber + 0124 Model 24 Road Runner Frame Grabber + 0144 Model 44 Road Runner Frame Grabber + 0212 Model 12 Road Runner Frame Grabber + 0214 Model 14 Road Runner Frame Grabber + 0224 Model 24 Road Runner Frame Grabber + 0244 Model 44 Road Runner Frame Grabber + 0312 Model 12 Road Runner Frame Grabber + 0314 Model 14 Road Runner Frame Grabber + 0324 Model 24 Road Runner Frame Grabber + 0344 Model 44 Road Runner Frame Grabber +118e Hermstedt GmbH +118f Green Logic +1190 Tripace + c731 TP-910/920/940 PCI Ultra(Wide) SCSI Adapter +1191 Artop Electronic Corp + 0003 SCSI Cache Host Adapter + 0004 ATP8400 + 0005 ATP850UF + 0006 ATP860 NO-BIOS + 0007 ATP860 + 8002 AEC6710 SCSI-2 Host Adapter + 8010 AEC6712UW SCSI + 8020 AEC6712U SCSI + 8030 AEC6712S SCSI + 8040 AEC6712D SCSI + 8050 AEC6712SUW SCSI +1192 Densan Company Ltd +1193 Zeitnet Inc. + 0001 1221 + 0002 1225 +1194 Toucan Technology +1195 Ratoc System Inc +1196 Hytec Electronics Ltd +1197 Gage Applied Sciences, Inc. +1198 Lambda Systems Inc +1199 Attachmate Corporation +119a Mind Share, Inc. +119b Omega Micro Inc. + 1221 82C092G +119c Information Technology Inst. +119d Bug, Inc. Sapporo Japan +119e Fujitsu Microelectronics Ltd. + 0001 FireStream 155 + 0003 FireStream 50 +119f Bull HN Information Systems +11a0 Convex Computer Corporation +11a1 Hamamatsu Photonics K.K. +11a2 Sierra Research and Technology +11a3 Deuretzbacher GmbH & Co. Eng. KG +11a4 Barco Graphics NV +11a5 Microunity Systems Eng. Inc +11a6 Pure Data Ltd. +11a7 Power Computing Corp. +11a8 Systech Corp. +11a9 InnoSys Inc. + 4240 AMCC S933Q Intelligent Serial Card +11aa Actel +11ab Galileo Technology Ltd. + 0146 GT-64010 + 4801 GT-48001 + f003 GT-64010 Primary Image Piranha Image Generator +11ac Canon Information Systems Research Aust. +11ad Lite-On Communications Inc + 0002 LNE100TX + 11ad 0002 LNE100TX + 11ad 0003 LNE100TX + 11ad f003 LNE100TX + 11ad ffff LNE100TX + 1385 f004 FA310TX + c115 LNE100TX [Linksys EtherFast 10/100] +11ae Aztech System Ltd +11af Avid Technology Inc. +11b0 V3 Semiconductor Inc. + 0002 V300PSC + 0292 V292PBC [Am29030/40 Bridge] + 0960 V96xPBC + c960 V96DPC +11b1 Apricot Computers +11b2 Eastman Kodak +11b3 Barr Systems Inc. +11b4 Leitch Technology International +11b5 Radstone Technology Plc +11b6 United Video Corp +11b7 Motorola +11b8 XPoint Technologies, Inc + 0001 Quad PeerMaster +11b9 Pathlight Technology Inc. + c0ed SSA Controller +11ba Videotron Corp +11bb Pyramid Technology +11bc Network Peripherals Inc + 0001 NP-PCI +11bd Pinnacle Systems Inc. +11be International Microcircuits Inc +11bf Astrodesign, Inc. +11c0 Hewlett Packard +11c1 Lucent Microelectronics + 0440 56k WinModem + 0001 0440 LT WinModem 56k Data+Fax+Voice+Dsvd + 1033 8015 LT WinModem 56k Data+Fax+Voice+Dsvd + 1033 804f LT WinModem 56k Data+Fax+Voice+Dsvd + 10cf 102c LB LT Modem V.90 56k + 10cf 104a BIBLO LT Modem 56k + 10cf 105f LB2 LT Modem V.90 56k + 1179 0001 Internal V.90 Modem + 11c1 0440 LT WinModem 56k Data+Fax+Voice+Dsvd + 122d 4101 MDP7800-U Modem + 13e0 0040 LT WinModem 56k Data+Fax+Voice+Dsvd + 13e0 0440 LT WinModem 56k Data+Fax+Voice+Dsvd + 13e0 0441 LT WinModem 56k Data+Fax+Voice+Dsvd + 13e0 f100 LT WinModem 56k Data+Fax+Voice+Dsvd + 13e0 f101 LT WinModem 56k Data+Fax+Voice+Dsvd + 144d 2101 LT56PV Modem + 149f 0440 LT WinModem 56k Data+Fax+Voice+Dsvd + 0441 56k WinModem + 1033 804d LT WinModem 56k Data+Fax + 1092 0440 Supra 56i + 1179 0001 Internal V.90 Modem + 11c1 0440 LT WinModem 56k Data+Fax + 11c1 0441 LT WinModem 56k Data+Fax + 122d 4100 MDP7800-U Modem + 13e0 0040 LT WinModem 56k Data+Fax + 13e0 0100 LT WinModem 56k Data+Fax + 13e0 0410 LT WinModem 56k Data+Fax + 13e0 0420 TelePath Internet 56k WinModem + 13e0 0443 LT WinModem 56k Data+Fax + 1416 9804 CommWave 56k Modem + 141d 0440 LT WinModem 56k Data+Fax + 144f 0441 Lucent 56k V.90 DF Modem + 1468 0441 Presario 56k V.90 DF Modem + 0442 56k WinModem + 0001 0440 LT WinModem 56k Data+Fax+Voice+VoiceView+Dsvd + 11c1 0440 LT WinModem 56k Data+Fax+Voice+VoiceView+Dsvd + 11c1 0442 LT WinModem 56k Data+Fax+Voice+VoiceView+Dsvd + 13e0 0412 LT WinModem 56k Data+Fax+Voice+VoiceView+Dsvd + 13e0 0442 LT WinModem 56k Data+Fax+Voice+VoiceView+Dsvd + 13fc 2471 LT WinModem 56k Data+Fax+Voice+VoiceView+Dsvd + 144d 2104 LT56PT Modem + 149f 0440 LT WinModem 56k Data+Fax+Voice+VoiceView+Dsvd + 1668 0440 LT WinModem 56k Data+Fax+Voice+VoiceView+Dsvd + 0443 LT WinModem + 0444 LT WinModem + 0445 LT WinModem + 0446 LT WinModem + 0447 LT WinModem + 0448 WinModem 56k + 13e0 0040 LT WinModem 56k Data+Fax+Voice+Dsvd + 0449 WinModem 56k + 0e11 b14d 56k V.90 Modem + 13e0 0020 LT WinModem 56k Data+Fax + 13e0 0041 TelePath Internet 56k WinModem + 144f 0449 Lucent 56k V.90 DFi Modem + 1468 0449 Presario 56k V.90 DFi Modem + 044a F-1156IV WinModem (V90, 56KFlex) + 13e0 0012 LT WinModem 56k Data+Fax+Voice+VoiceView+Dsvd + 13e0 0042 LT WinModem 56k Data+Fax+Voice+VoiceView+Dsvd + 144f 1005 LT WinModem 56k Data+Fax+Voice+VoiceView+Dsvd + 044b LT WinModem + 044c LT WinModem + 044d LT WinModem + 044e LT WinModem + 0450 LT WinModem + 0451 LT WinModem + 0452 LT WinModem + 0453 LT WinModem + 0454 LT WinModem + 0455 LT WinModem + 0456 LT WinModem + 0457 LT WinModem + 0458 LT WinModem + 0459 LT WinModem + 045a LT WinModem + 0480 Venus Modem (V90, 56KFlex) +11c2 Sand Microelectronics +11c3 NEC Corp +11c4 Document Technologies, Inc +11c5 Shiva Corporation +11c6 Dainippon Screen Mfg. Co. Ltd +11c7 D.C.M. Data Systems +11c8 Dolphin Interconnect Solutions AS + 0658 PSB32 SCI-Adapter D31x + d665 PSB64 SCI-Adapter D32x + d667 PSB66 SCI-Adapter D33x +11c9 Magma + 0010 16-line serial port w/- DMA + 0011 4-line serial port w/- DMA +11ca LSI Systems, Inc +11cb Specialix Research Ltd. + 2000 PCI_9050 + 11cb 0200 SX + 11cb b008 I/O8+ + 4000 SUPI_1 + 8000 T225 +11cc Michels & Kleberhoff Computer GmbH +11cd HAL Computer Systems, Inc. +11ce Netaccess +11cf Pioneer Electronic Corporation +11d0 Lockheed Martin Federal Systems-Manassas +11d1 Auravision + 01f7 VxP524 +11d2 Intercom Inc. +11d3 Trancell Systems Inc +11d4 Analog Devices + 1889 AD1889 sound chip +11d5 Ikon Corporation + 0115 10115 + 0117 10117 +11d6 Tekelec Telecom +11d7 Trenton Technology, Inc. +11d8 Image Technologies Development +11d9 TEC Corporation +11da Novell +11db Sega Enterprises Ltd +11dc Questra Corporation +11dd Crosfield Electronics Limited +11de Zoran Corporation + 6057 ZR36057PQC Video cutting chipset + 1031 7efe DC10 Plus + 1031 fc00 MiroVIDEO DC50, Motion JPEG Capture/CODEC Board + 13ca 4231 JPEG/TV Card + 6120 ZR36120 + 1328 f001 Cinemaster C DVD Decoder +11df New Wave PDG +11e0 Cray Communications A/S +11e1 GEC Plessey Semi Inc. +11e2 Samsung Information Systems America +11e3 Quicklogic Corporation +11e4 Second Wave Inc +11e5 IIX Consulting +11e6 Mitsui-Zosen System Research +11e7 Toshiba America, Elec. Company +11e8 Digital Processing Systems Inc. +11e9 Highwater Designs Ltd. +11ea Elsag Bailey +11eb Formation Inc. +11ec Coreco Inc +11ed Mediamatics +11ee Dome Imaging Systems Inc +11ef Nicolet Technologies B.V. +11f0 Compu-Shack + 4231 FDDI + 4232 FASTline UTP Quattro + 4233 FASTline FO + 4234 FASTline UTP + 4235 FASTline-II UTP + 4236 FASTline-II FO + 4731 GIGAline +11f1 Symbios Logic Inc +11f2 Picture Tel Japan K.K. +11f3 Keithley Metrabyte +11f4 Kinetic Systems Corporation + 2915 CAMAC controller +11f5 Computing Devices International +11f6 Compex + 0112 ENet100VG4 + 0113 FreedomLine 100 + 1401 ReadyLink 2000 + 2011 RL100-ATX 10/100 + 2201 ReadyLink 100TX (Winbond W89C840) + 11f6 2011 ReadyLink 100TX + 9881 RL100TX +11f7 Scientific Atlanta +11f8 PMC-Sierra Inc. + 7375 PM7375 [LASAR-155 ATM SAR] +11f9 I-Cube Inc +11fa Kasan Electronics Company, Ltd. +11fb Datel Inc +11fc Silicon Magic +11fd High Street Consultants +11fe Comtrol Corporation + 0001 RocketPort 8 Oct + 0002 RocketPort 8 Intf + 0003 RocketPort 16 Intf + 0004 RocketPort 32 Intf + 0005 RocketPort Octacable + 0006 RocketPort 8J + 0008 RocketPort 8-port + 0009 RocketPort 16-port + 000a RocketPort Plus Quadcable + 000b RocketPort Plus Octacable + 000c RocketPort 8-port Modem +11ff Scion Corporation +1200 CSS Corporation +1201 Vista Controls Corp +1202 Network General Corp. +1203 Bayer Corporation, Agfa Division +1204 Lattice Semiconductor Corporation +1205 Array Corporation +1206 Amdahl Corporation +1208 Parsytec GmbH + 4853 HS-Link Device +1209 SCI Systems Inc +120a Synaptel +120b Adaptive Solutions +120c Technical Corp. +120d Compression Labs, Inc. +120e Cyclades Corporation + 0100 Cyclom_Y below first megabyte + 0101 Cyclom_Y above first megabyte + 0102 Cyclom_4Y below first megabyte + 0103 Cyclom_4Y above first megabyte + 0104 Cyclom_8Y below first megabyte + 0105 Cyclom_8Y above first megabyte + 0200 Cyclom_Z below first megabyte + 0201 Cyclom_Z above first megabyte + 0300 PC300 RX 2 + 0301 PC300 RX 1 + 0310 PC300 TE 2 + 0311 PC300 TE 1 +120f Essential Communications + 0001 Roadrunner serial HIPPI +1210 Hyperparallel Technologies +1211 Braintech Inc +1212 Kingston Technology Corp. +1213 Applied Intelligent Systems, Inc. +1214 Performance Technologies, Inc. +1215 Interware Co., Ltd +1216 Purup Prepress A/S +1217 O2 Micro, Inc. + 6729 6729 + 673a 6730 + 6832 6832 + 6836 6836 + 6872 OZ6812 Cardbus Controller + 6933 OZ6933 Cardbus Controller +1218 Hybricon Corp. +1219 First Virtual Corporation +121a 3Dfx Interactive, Inc. + 0001 Voodoo + 0002 Voodoo 2 + 0003 Voodoo Banshee + 1092 0003 Monster Fusion + 1092 4000 Monster Fusion + 1092 4002 Monster Fusion + 1092 4801 Monster Fusion AGP + 1092 4803 Monster Fusion AGP + 1092 8030 Monster Fusion + 1092 8035 Monster Fusion AGP + 10b0 0001 Dragon 4000 + 1102 1017 CT6760 3D Blaster Banshee + 121a 0001 Voodoo Banshee AGP + 121a 0003 Voodoo Banshee AGP SGRAM + 121a 0004 Voodoo Banshee + 139c 0016 Raven + 139c 0017 Raven + 14af 0002 Maxi Gamer Phoenix + 3030 3030 Skywell Magic TwinPower + 0004 Voodoo Banshee [Velocity 100] + 0005 Voodoo 3 + 121a 0004 Voodoo3 AGP + 121a 0030 Voodoo3 AGP + 121a 0031 Voodoo3 AGP + 121a 0034 Voodoo3 AGP + 121a 0036 Voodoo3 + 121a 0037 Voodoo3 AGP + 121a 0038 Voodoo3 AGP + 121a 003a Voodoo3 AGP + 121a 0044 Voodoo3 + 121a 004b Velocity 100 + 121a 004c Velocity 200 + 121a 004d Voodoo3 AGP + 121a 004e Voodoo3 AGP + 121a 0051 Voodoo3 AGP + 121a 0052 Voodoo3 AGP + 121a 0060 Voodoo3 3500 TV (NTSC) + 121a 0061 Voodoo3 3500 TV (PAL) + 121a 0062 Voodoo3 3500 TV (SECAM) + 0009 Voodoo 4 / Voodoo 5 + 121a 0009 Voodoo5 AGP 5500/6000 +121b Advanced Telecommunications Modules +121c Nippon Texaco., Ltd +121d Lippert Automationstechnik GmbH +121e CSPI +121f Arcus Technology, Inc. +1220 Ariel Corporation + 1220 AMCC 5933 TMS320C80 DSP/Imaging board +1221 Contec Co., Ltd +1222 Ancor Communications, Inc. +1223 Artesyn Communication Products + 0003 PM/Link + 0004 PM/T1 + 0005 PM/E1 + 0008 PM/SLS + 0009 BajaSpan Resource Target + 000a BajaSpan Section 0 + 000b BajaSpan Section 1 + 000c BajaSpan Section 2 + 000d BajaSpan Section 3 + 000e PM/PPC +1224 Interactive Images +1225 Power I/O, Inc. +1227 Tech-Source +1228 Norsk Elektro Optikk A/S +1229 Data Kinesis Inc. +122a Integrated Telecom +122b LG Industrial Systems Co., Ltd +122c Sican GmbH +122d Aztech System Ltd + 1206 368DSP + 50dc 3328 Audio + 122d 0001 3328 Audio + 80da 3328 Audio + 122d 0001 3328 Audio +122e Xyratex +122f Andrew Corporation +1230 Fishcamp Engineering +1231 Woodward McCoach, Inc. +1232 GPT Limited +1233 Bus-Tech, Inc. +1234 Technical Corp. +1235 Risq Modular Systems, Inc. +1236 Sigma Designs Corporation + 0000 RealMagic64/GX + 6401 REALmagic 64/GX (SD 6425) +1237 Alta Technology Corporation +1238 Adtran +1239 3DO Company +123a Visicom Laboratories, Inc. +123b Seeq Technology, Inc. +123c Century Systems, Inc. +123d Engineering Design Team, Inc. + 0000 EasyConnect 8/32 + 0002 EasyConnect 8/64 + 0003 EasyIO +123e Simutech, Inc. +123f C-Cube Microsystems + 00e4 MPEG + 8120 E4? + 11bd 0006 DV500 E4 + 11bd 000a DV500 E4 + 8888 Cinemaster C 3.0 DVD Decoder + 1002 0001 Cinemaster C 3.0 DVD Decoder + 1002 0002 Cinemaster C 3.0 DVD Decoder + 1328 0001 Cinemaster C 3.0 DVD Decoder +1240 Marathon Technologies Corp. +1241 DSC Communications +1242 Jaycor Networks, Inc. + 4643 FCI-1063 Fibre Channel Adapter +1243 Delphax +1244 AVM Audiovisuelles MKTG & Computer System GmbH + 0700 B1 ISDN + 0a00 A1 ISDN [Fritz] + 1244 0a00 FRITZ!Card ISDN Controller +1245 A.P.D., S.A. +1246 Dipix Technologies, Inc. +1247 Xylon Research, Inc. +1248 Central Data Corporation +1249 Samsung Electronics Co., Ltd. +124a AEG Electrocom GmbH +124b SBS/Greenspring Modular I/O +124c Solitron Technologies, Inc. +124d Stallion Technologies, Inc. + 0000 EasyConnection 8/32 + 0002 EasyConnection 8/64 + 0003 EasyIO +124e Cylink +124f Infotrend Technology, Inc. + 0041 IFT-2000 Series RAID Controller +1250 Hitachi Microcomputer System Ltd +1251 VLSI Solutions Oy +1253 Guzik Technical Enterprises +1254 Linear Systems Ltd. +1255 Optibase Ltd + 1110 MPEG Forge + 1210 MPEG Fusion + 2110 VideoPlex + 2120 VideoPlex CC + 2130 VideoQuest +1256 Perceptive Solutions, Inc. + 4201 PCI-2220I + 4401 PCI-2240I + 5201 PCI-2000 +1257 Vertex Networks, Inc. +1258 Gilbarco, Inc. +1259 Allied Telesyn International + 2560 AT-2560 Fast Ethernet Adapter (i82557B) +125a ABB Power Systems +125b Asix Electronics Corporation + 1400 ALFA GFC2204 +125c Aurora Technologies, Inc. +125d ESS Technology + 0000 ES336H Fax Modem (Early Model) + 1948 Solo? + 1968 ES1968 Maestro 2 + 1028 0085 ES1968 Maestro-2 PCI + 1033 8051 ES1968 Maestro-2 Audiodrive + 1969 ES1969 Solo-1 Audiodrive + 1014 0166 ES1969 SOLO-1 AudioDrive on IBM Aptiva Mainboard + 125d 8888 Solo-1 Audio Adapter + 525f c888 ES1969 SOLO-1 AudioDrive (+ES1938) + 1978 ES1978 Maestro 2E + 1033 803c ES1978 Maestro-2E Audiodrive + 1033 8058 ES1978 Maestro-2E Audiodrive + 1092 4000 Monster Sound MX400 + 1179 0001 ES1978 Maestro-2E Audiodrive + 1988 ES1988 Allegro-1 + 1092 4100 Sonic Impact S100 + 125d 1988 ESS Allegro-1 Audiodrive + 1989 ESS Modem + 125d 1989 ESS Modem + 1998 ES1983S Maestro-3i PCI Audio Accelerator + 1999 ES1983S Maestro-3i PCI Modem Accelerator + 2808 ES336H Fax Modem (Later Model) + 2838 ES2838/2839 SuperLink Modem + 2898 ES2898 Modem +125e Specialvideo Engineering SRL +125f Concurrent Technologies, Inc. +1260 Harris Semiconductor + 8130 HMP8130 NTSC/PAL Video Decoder + 8131 HMP8131 NTSC/PAL Video Decoder +1261 Matsushita-Kotobuki Electronics Industries, Ltd. +1262 ES Computer Company, Ltd. +1263 Sonic Solutions +1264 Aval Nagasaki Corporation +1265 Casio Computer Co., Ltd. +1266 Microdyne Corporation + 0001 NE10/100 Adapter (i82557B) + 1910 NE2000Plus (RT8029) Ethernet Adapter + 1266 1910 NE2000Plus Ethernet Adapter +1267 S. A. Telecommunications + 5352 PCR2101 + 5a4b Telsat Turbo +1268 Tektronix +1269 Thomson-CSF/TTM +126a Lexmark International, Inc. +126b Adax, Inc. +126c Northern Telecom +126d Splash Technology, Inc. +126e Sumitomo Metal Industries, Ltd. +126f Silicon Motion, Inc. + 0710 SM710 LynxEM + 0712 SM712 LynxEM+ + 0720 SM720 Lynx3DM + 0810 SM810 LynxE + 0811 SM811 LynxE + 0820 SM820 Lynx3D + 0910 SM910 +1270 Olympus Optical Co., Ltd. +1271 GW Instruments +1272 Telematics International +1273 Hughes Network Systems + 0002 DirecPC +1274 Ensoniq + 1371 ES1371 [AudioPCI-97] + 0e11 b1a7 ES1371, ES1373 AudioPCI + 1033 80ac ES1371, ES1373 AudioPCI + 1042 1854 Tazer + 107b 8054 Tabor2 + 1274 1371 Creative Sound Blaster AudioPCI64V, AudioPCI128 + 1462 6470 ES1371, ES1373 AudioPCI On Motherboard MS-6147 1.1A + 1462 6560 ES1371, ES1373 AudioPCI On Motherboard MS-6156 1.10 + 1462 6630 ES1371, ES1373 AudioPCI On Motherboard MS-6163BX 1.0A + 1462 6631 ES1371, ES1373 AudioPCI On Motherboard MS-6163VIA 1.0A + 1462 6632 ES1371, ES1373 AudioPCI On Motherboard MS-6163BX 2.0A + 1462 6633 ES1371, ES1373 AudioPCI On Motherboard MS-6163VIA 2.0A + 1462 6820 ES1371, ES1373 AudioPCI On Motherboard MS-6182 1.00 + 1462 6822 ES1371, ES1373 AudioPCI On Motherboard MS-6182 1.00A + 1462 6830 ES1371, ES1373 AudioPCI On Motherboard MS-6183 1.00 + 1462 6880 ES1371, ES1373 AudioPCI On Motherboard MS-6188 1.00 + 1462 6900 ES1371, ES1373 AudioPCI On Motherboard MS-6190 1.00 + 1462 6910 ES1371, ES1373 AudioPCI On Motherboard MS-6191 + 1462 6930 ES1371, ES1373 AudioPCI On Motherboard MS-6193 + 1462 6990 ES1371, ES1373 AudioPCI On Motherboard MS-6199BX 2.0A + 1462 6991 ES1371, ES1373 AudioPCI On Motherboard MS-6199VIA 2.0A + 14a4 2077 ES1371, ES1373 AudioPCI On Motherboard KR639 + 14a4 2105 ES1371, ES1373 AudioPCI On Motherboard MR800 + 14a4 2107 ES1371, ES1373 AudioPCI On Motherboard MR801 + 14a4 2172 ES1371, ES1373 AudioPCI On Motherboard DR739 + 1509 9902 ES1371, ES1373 AudioPCI On Motherboard KW11 + 1509 9903 ES1371, ES1373 AudioPCI On Motherboard KW31 + 1509 9904 ES1371, ES1373 AudioPCI On Motherboard KA11 + 1509 9905 ES1371, ES1373 AudioPCI On Motherboard KC13 + 152d 8801 ES1371, ES1373 AudioPCI On Motherboard CP810E + 152d 8802 ES1371, ES1373 AudioPCI On Motherboard CP810 + 152d 8803 ES1371, ES1373 AudioPCI On Motherboard P3810E + 152d 8804 ES1371, ES1373 AudioPCI On Motherboard P3810-S + 152d 8805 ES1371, ES1373 AudioPCI On Motherboard P3820-S + 270f 2001 ES1371, ES1373 AudioPCI On Motherboard 6CTR + 270f 2200 ES1371, ES1373 AudioPCI On Motherboard 6WTX + 270f 3000 ES1371, ES1373 AudioPCI On Motherboard 6WSV + 270f 3100 ES1371, ES1373 AudioPCI On Motherboard 6WIV2 + 270f 3102 ES1371, ES1373 AudioPCI On Motherboard 6WIV + 270f 7060 ES1371, ES1373 AudioPCI On Motherboard 6ASA2 + 8086 4249 ES1371, ES1373 AudioPCI On Motherboard BI440ZX + 8086 424c ES1371, ES1373 AudioPCI On Motherboard BL440ZX + 8086 425a ES1371, ES1373 AudioPCI On Motherboard BZ440ZX + 8086 4341 ES1371, ES1373 AudioPCI On Motherboard Cayman + 8086 4343 ES1371, ES1373 AudioPCI On Motherboard Cape Cod + 8086 4649 ES1371, ES1373 AudioPCI On Motherboard Fire Island + 8086 464a ES1371, ES1373 AudioPCI On Motherboard FJ440ZX + 8086 4d4f ES1371, ES1373 AudioPCI On Motherboard Montreal + 8086 4f43 ES1371, ES1373 AudioPCI On Motherboard OC440LX + 8086 5243 ES1371, ES1373 AudioPCI On Motherboard RC440BX + 8086 5352 ES1371, ES1373 AudioPCI On Motherboard SunRiver + 8086 5643 ES1371, ES1373 AudioPCI On Motherboard Vancouver + 8086 5753 ES1371, ES1373 AudioPCI On Motherboard WS440BX + 5000 ES1370 [AudioPCI] + 4942 4c4c Creative Sound Blaster AudioPCI128 + 5880 5880 AudioPCI + 1274 2000 Creative Sound Blaster AudioPCI128 + 1274 5880 Creative Sound Blaster AudioPCI128 + 1462 6880 5880 AudioPCI On Motherboard MS-6188 1.00 + 270f 2001 5880 AudioPCI On Motherboard 6CTR + 270f 2200 5880 AudioPCI On Motherboard 6WTX + 270f 7040 5880 AudioPCI On Motherboard 6ATA4 +1275 Network Appliance Corporation +1276 Switched Network Technologies, Inc. +1277 Comstream +1278 Transtech Parallel Systems Ltd. +1279 Transmeta Corporation + 0295 Northbridge + 0395 LongRun Northbridge + 0396 SDRAM controller + 0397 BIOS scratchpad +127a Rockwell International + 1002 HCF 56k Data/Fax Modem + 122d 4002 HPG / MDP3858-U # Aztech + 122d 4005 MDP3858-E # Aztech + 122d 4007 MDP3858-A/-NZ # Aztech + 122d 4012 MDP3858-SA # Aztech + 122d 4017 MDP3858-W # Aztech + 122d 4018 MDP3858-W # Aztech + 1003 HCF 56k Data/Fax Modem + 0e11 b0bc 229-DF Zephyr # Compaq + 0e11 b114 229-DF Cheetah # Compaq + 1033 802b 229-DF # NEC + 13df 1003 PCI56RX Modem # E-Tech Inc + 13e0 0117 IBM # GVC + 13e0 0147 IBM # GVC + 13e0 0197 IBM # GVC + 13e0 01c7 IBM # GVC + 13e0 01f7 IBM # GVC + 1436 1003 IBM # CIS + 1436 1103 IBM # CIS + 1436 1602 Compaq 229-DF Ducati + 1004 HCF 56k Data/Fax/Voice Modem + 10cf 1059 Fujitsu 229-DFRT + 1005 HCF 56k Data/Fax/Voice/Spkp (w/Handset) Modem + 1033 8029 229-DFSV # NEC + 1033 8054 Modem # NEC + 10cf 103c Fujitsu + 10cf 1055 Fujitsu 229-DFSV + 10cf 1056 Fujitsu 229-DFSV + 122d 4003 MDP3858SP-U # Aztech + 122d 4006 Packard Bell MDP3858V-E # Aztech + 122d 4008 MDP3858SP-A/SP-NZ # Aztech + 122d 4009 MDP3858SP-E # Aztech + 122d 4010 MDP3858V-U # Aztech + 122d 4011 MDP3858SP-SA # Aztech + 122d 4013 MDP3858V-A/V-NZ # Aztech + 122d 4015 MDP3858SP-W # Aztech + 122d 4016 MDP3858V-W # Aztech + 122d 4019 MDP3858V-SA # Aztech + 13df 1005 PCI56RVP Modem # E-Tech Inc + 13e0 0187 IBM # GVC + 13e0 01a7 IBM # GVC + 13e0 01b7 IBM # GVC + 13e0 01d7 IBM # GVC + 1436 1005 IBM # CIS + 1436 1105 IBM # CIS + 1023 HCF 56k Data/Fax Modem + 122d 4020 Packard Bell MDP3858-WE # Aztech + 122d 4023 MDP3858-UE # Aztech + 13e0 0247 IBM # GVC + 13e0 0297 IBM # GVC + 13e0 02c7 IBM # GVC + 1436 1203 IBM # CIS + 1436 1303 IBM # CIS + 1024 HCF 56k Data/Fax/Voice Modem + 1025 HCF 56k Data/Fax/Voice/Spkp (w/Handset) Modem + 10cf 106a Fujitsu 235-DFSV + 122d 4021 Packard Bell MDP3858V-WE # Aztech + 122d 4022 MDP3858SP-WE # Aztech + 122d 4024 MDP3858V-UE # Aztech + 122d 4025 MDP3858SP-UE # Aztech + 1026 HCF 56k PCI Speakerphone Modem + 1035 HCF 56k PCI Speakerphone Modem + 1085 HCF 56k Volcano PCI Modem + 2005 HCF 56k Data/Fax Modem + 104d 8044 229-DFSV # Sony + 104d 8045 229-DFSV # Sony + 104d 8055 PBE/Aztech 235W-DFSV # Sony + 104d 8056 235-DFSV # Sony + 104d 805a Modem # Sony + 104d 805f Modem # Sony + 104d 8074 Modem # Sony + 2013 HSF 56k Data/Fax Modem + 1179 0001 Modem # Toshiba + 1179 ff00 Modem # Toshiba + 2014 HSF 56k Data/Fax/Voice Modem + 10cf 1057 Fujitsu Citicorp III + 122d 4050 MSP3880-U # Aztech + 122d 4055 MSP3880-W # Aztech + 2015 HSF 56k Data/Fax/Voice/Spkp (w/Handset) Modem + 10cf 1063 Fujitsu + 10cf 1064 Fujitsu + 1468 2015 Fujitsu + 2016 HSF 56k Data/Fax/Voice/Spkp Modem + 122d 4051 MSP3880V-W # Aztech + 122d 4052 MSP3880SP-W # Aztech + 122d 4054 MSP3880V-U # Aztech + 122d 4056 MSP3880SP-U # Aztech + 122d 4057 MSP3880SP-A # Aztech + 4311 Riptide HSF 56k PCI Modem + 127a 4311 Ring Modular? Riptide HSF RT HP Dom + 13e0 0210 HP-GVC + 4320 Riptide PCI Audio Controller + 1235 4320 Riptide PCI Audio Controller + 4321 Riptide HCF 56k PCI Modem + 1235 4321 Hewlett Packard DF + 1235 4324 Hewlett Packard DF + 13e0 0210 Hewlett Packard DF + 144d 2321 Riptide # Samsung + 4322 Riptide PCI Game Controller + 1235 4322 Riptide PCI Game Controller + 8234 RapidFire 616X ATM155 Adapter + 108d 0027 RapidFire 616X ATM155 Adapter +127b Pixera Corporation +127c Crosspoint Solutions, Inc. +127d Vela Research +127e Winnov, L.P. +127f Fujifilm +1280 Photoscript Group Ltd. +1281 Yokogawa Electric Corporation +1282 Davicom Semiconductor, Inc. + 9009 Ethernet 100/10 MBit + 9100 Ethernet 100/10 MBit + 9102 Ethernet 100/10 MBit + 9132 Ethernet 100/10 MBit +1283 Integrated Technology Express, Inc. + 673a IT8330G + 8330 IT8330G + 8888 IT8888F PCI to ISA Bridge with SMB + 8889 IT8889F PCI to ISA Bridge + e886 IT8330G +1284 Sahara Networks, Inc. +1285 Platform Technologies, Inc. + 0100 AGOGO sound chip (aka ESS Maestro 1) +1286 Mazet GmbH +1287 M-Pact, Inc. + 001e LS220D DVD Decoder + 001f LS220C DVD Decoder +1288 Timestep Corporation +1289 AVC Technology, Inc. +128a Asante Technologies, Inc. +128b Transwitch Corporation +128c Retix Corporation +128d G2 Networks, Inc. + 0021 ATM155 Adapter +128e Hoontech Corporation/Samho Multi Tech Ltd. + 0008 ST128 WSS/SB + 0009 ST128 SAM9407 + 000a ST128 Game Port + 000b ST128 MPU Port + 000c ST128 Ctrl Port +128f Tateno Dennou, Inc. +1290 Sord Computer Corporation +1291 NCS Computer Italia +1292 Tritech Microelectronics Inc +1293 Media Reality Technology +1294 Rhetorex, Inc. +1295 Imagenation Corporation +1296 Kofax Image Products +1297 Holco Enterprise Co, Ltd/Shuttle Computer +1298 Spellcaster Telecommunications Inc. +1299 Knowledge Technology Lab. +129a VMetro, inc. +129b Image Access +129c Jaycor +129d Compcore Multimedia, Inc. +129e Victor Company of Japan, Ltd. +129f OEC Medical Systems, Inc. +12a0 Allen-Bradley Company +12a1 Simpact Associates, Inc. +12a2 Newgen Systems Corporation +12a3 Lucent Technologies +12a4 NTT Electronics Technology Company +12a5 Vision Dynamics Ltd. +12a6 Scalable Networks, Inc. +12a7 AMO GmbH +12a8 News Datacom +12a9 Xiotech Corporation +12aa SDL Communications, Inc. +12ab Yuan Yuan Enterprise Co., Ltd. + 3000 MPG-200C PCI DVD Decoder Card +12ac Measurex Corporation +12ad Multidata GmbH +12ae Alteon Networks Inc. + 0001 AceNIC Gigabit Ethernet (Fibre) + 1410 0104 Gigabit Ethernet-SX PCI Adapter (14100401) + 0002 AceNIC Gigabit Ethernet (Copper) +12af TDK USA Corp +12b0 Jorge Scientific Corp +12b1 GammaLink +12b2 General Signal Networks +12b3 Inter-Face Co Ltd +12b4 FutureTel Inc +12b5 Granite Systems Inc. +12b6 Natural Microsystems +12b7 Cognex Modular Vision Systems Div. - Acumen Inc. +12b8 Korg +12b9 US Robotics/3Com + 1006 WinModem + 12b9 005c USR 56k Internal Voice WinModem (Model 3472) + 12b9 005e USR 56k Internal WinModem (Models 662975) + 12b9 0062 USR 56k Internal Voice WinModem (Model 662978) + 12b9 0068 USR 56k Internal Voice WinModem (Model 5690) + 12b9 007a USR 56k Internal Voice WinModem (Model 662974) + 12b9 007f USR 56k Internal WinModem (Models 5698, 5699) + 12b9 0080 USR 56k Internal WinModem (Models 2975, 3528) + 12b9 0081 USR 56k Internal Voice WinModem (Models 2974, 3529) + 12b9 0091 USR 56k Internal Voice WinModem (Model 2978) + 1007 USR 56k Internal WinModem + 12b9 00a3 USR 56k Internal WinModem (Model 3595) + 1008 56K FaxModem Model 5610 + 12b9 00a2 USR 56k Internal FAX Modem (Model 2977) + 12b9 00aa USR 56k Internal Voice Modem (Model 2976) + 12b9 00ab USR 56k Internal Voice Modem (Model 5609) + 12b9 00ac USR 56k Internal Voice Modem (Model 3298) + 12b9 00ad USR 56k Internal FAX Modem (Model 5610) +12ba PMC Sierra +12bb Nippon Unisoft Corporation +12bc Array Microsystems +12bd Computerm Corp. +12be Anchor Chips Inc. + 3041 AN3041Q CO-MEM + 3042 AN3042Q CO-MEM Lite + 12be 3042 Anchor Chips Lite Evaluation Board +12bf Fujifilm Microdevices +12c0 Infimed +12c1 GMM Research Corp +12c2 Mentec Limited +12c3 Holtek Microelectronics Inc + 0058 PCI NE2K Ethernet + 5598 PCI NE2K Ethernet +12c4 Connect Tech Inc +12c5 Picture Elements Incorporated + 0081 PCIVST [Grayscale Thresholding Engine] + 0085 Video Simulator/Sender + 0086 THR2 Multi-scale Thresholder +12c6 Mitani Corporation +12c7 Dialogic Corp +12c8 G Force Co, Ltd +12c9 Gigi Operations +12ca Integrated Computing Engines +12cb Antex Electronics Corporation +12cc Pluto Technologies International +12cd Aims Lab +12ce Netspeed Inc. +12cf Prophet Systems, Inc. +12d0 GDE Systems, Inc. +12d1 PSITech +12d2 NVidia / SGS Thomson (Joint Venture) + 0008 NV1 + 0009 DAC64 + 0018 Riva128 + 107b 8030 STB Velocity 128 + 1092 0350 Viper V330 + 1092 1092 Viper V330 + 10b4 1b1b STB Velocity 128 + 10b4 1b20 STB Velocity 128 + 10b4 1b21 STB Velocity 128 + 10b4 1b22 STB Velocity 128 AGP, NTSC TV-Out + 10b4 1b23 STB Velocity 128 AGP, PAL TV-Out + 10b4 1b27 STB Velocity 128 DVD + 10b4 222a STB Velocity 128 AGP + 10b4 2230 STB Velocity 128 + 10b4 2235 STB Velocity 128 AGP + 2a15 54a3 3DVision-SAGP + 0019 Riva128ZX + 0020 TNT + 0028 TNT2 + 0029 UTNT2 + 002c VTNT2 + 00a0 ITNT2 +12d3 Vingmed Sound A/S +12d4 DGM&S +12d5 Equator Technologies +12d6 Analogic Corp +12d7 Biotronic SRL +12d8 Pericom Semiconductor +12d9 Aculab PLC +12da True Time Inc. +12db Annapolis Micro Systems, Inc +12dc Symicron Computer Communication Ltd. +12dd Management Graphics +12de Rainbow Technologies +12df SBS Technologies Inc +12e0 Chase Research + 0010 ST16C654 Quad UART + 0020 ST16C654 Quad UART + 0030 ST16C654 Quad UART +12e1 Nintendo Co, Ltd +12e2 Datum Inc. Bancomm-Timing Division +12e3 Imation Corp - Medical Imaging Systems +12e4 Brooktrout Technology Inc +12e5 Apex Semiconductor Inc +12e6 Cirel Systems +12e7 Sunsgroup Corporation +12e8 Crisc Corp +12e9 GE Spacenet +12ea Zuken +12eb Aureal Semiconductor + 0001 Vortex 1 + 104d 8036 AU8820 Vortex Digital Audio Processor + 1092 2000 Sonic Impact A3D + 1092 2100 Sonic Impact A3D + 1092 2110 Sonic Impact A3D + 1092 2200 Sonic Impact A3D + 12eb 0001 AU8820 Vortex Digital Audio Processor + 5053 3355 Montego + 0002 Vortex 2 + 104d 8049 AU8830 Vortex 3D Digital Audio Processor + 104d 807b AU8830 Vortex 3D Digital Audio Processor + 1092 3000 Monster Sound II + 1092 3001 Monster Sound II + 1092 3002 Monster Sound II + 1092 3003 Monster Sound II + 1092 3004 Monster Sound II + 12eb 0001 AU8830 Vortex 3D Digital Audio Processor + 12eb 0002 AU8830 Vortex 3D Digital Audio Processor + 12eb 0088 AU8830 Vortex 3D Digital Audio Processor + 144d 3510 AU8830 Vortex 3D Digital Audio Processor + 5053 3356 Montego II + 0003 AU8810 Vortex Digital Audio Processor + 104d 8049 AU8810 Vortex Digital Audio Processor + 104d 8077 AU8810 Vortex Digital Audio Processor + 109f 1000 AU8810 Vortex Digital Audio Processor + 12eb 0003 AU8810 Vortex Digital Audio Processor + 1462 6780 AU8810 Vortex Digital Audio Processor + 14a4 2073 AU8810 Vortex Digital Audio Processor + 14a4 2091 AU8810 Vortex Digital Audio Processor + 14a4 2104 AU8810 Vortex Digital Audio Processor + 14a4 2106 AU8810 Vortex Digital Audio Processor + 8803 Vortex 56k Software Modem + 12eb 8803 Vortex 56k Software Modem +12ec 3A International, Inc. +12ed Optivision Inc. +12ee Orange Micro +12ef Vienna Systems +12f0 Pentek +12f1 Sorenson Vision Inc +12f2 Gammagraphx, Inc. +12f3 Radstone Technology +12f4 Megatel +12f5 Forks +12f6 Dawson France +12f7 Cognex +12f8 Electronic Design GmbH + 0002 VideoMaker +12f9 Four Fold Ltd +12fb Spectrum Signal Processing +12fc Capital Equipment Corp +12fd I2S +12fe ESD Electronic System Design GmbH +12ff Lexicon +1300 Harman International Industries Inc +1302 Computer Sciences Corp +1303 Innovative Integration +1304 Juniper Networks +1305 Netphone, Inc +1306 Duet Technologies +1307 Computer Boards + 0001 PCI-DAS1602/16 + 000b PCI-DIO48H + 000c PCI-PDISO8 + 000d PCI-PDISO16 + 000f PCI-DAS1200 + 0010 PCI-DAS1602/12 + 0014 PCI-DIO24H + 0015 PCI-DIO24H/CTR3 + 0016 PCI-DIO48H/CTR15 + 0017 PCI-DIO96H + 0018 PCI-CTR05 + 0019 PCI-DAS1200/JR + 001a PCI-DAS1001 + 001b PCI-DAS1002 + 001c PCI-DAS1602JR/16 + 001d PCI-DAS6402/16 + 001e PCI-DAS6402/12 + 001f PCI-DAS16/M1 + 0020 PCI-DDA02/12 + 0021 PCI-DDA04/12 + 0022 PCI-DDA08/12 + 0023 PCI-DDA02/16 + 0024 PCI-DDA04/16 + 0025 PCI-DDA08/16 + 0026 PCI-DAC04/12-HS + 0027 PCI-DAC04/16-HS + 0028 PCI-DIO24 + 0029 PCI-DAS08 + 002c PCI-INT32 + 0033 PCI-DUAL-AC5 + 0034 PCI-DAS-TC + 0035 PCI-DAS64/M1/16 + 0036 PCI-DAS64/M2/16 + 0037 PCI-DAS64/M3/16 + 004c PCI-DAS1000 +1308 Jato Technologies Inc. + 0001 NetCelerator Adapter + 1308 0001 NetCelerator Adapter +1309 AB Semiconductor Ltd +130a Mitsubishi Electric Microcomputer +130b Colorgraphic Communications Corp +130c Ambex Technologies, Inc +130d Accelerix Inc +130e Yamatake-Honeywell Co. Ltd +130f Advanet Inc +1310 Gespac +1311 Videoserver, Inc +1312 Acuity Imaging, Inc +1313 Yaskawa Electric Co. +1316 Teradyne Inc +1317 Linksys + 0981 Fast Ethernet 10/100 + 0985 Network Everywhere Fast Ethernet 10/100 model NC100 + 1985 Fast Ethernet 10/100 +1318 Packet Engines Inc. + 0911 PCI Ethernet Adapter +1319 Fortemedia, Inc + 0801 Xwave QS3000A [FM801] + 0802 Xwave QS3000A [FM801 game port] + 1000 FM801 PCI Audio + 1001 FM801 PCI Joystick +131a Finisar Corp. +131c Nippon Electro-Sensory Devices Corp +131d Sysmic, Inc. +131e Xinex Networks Inc +131f Siig Inc + 1000 CyberSerial (1-port) 16550 + 1001 CyberSerial (1-port) 16650 + 1002 CyberSerial (1-port) 16850 + 1010 Duet 1S(16550)+1P + 1011 Duet 1S(16650)+1P + 1012 Duet 1S(16850)+1P + 1020 CyberParallel (1-port) + 1021 CyberParallel (2-port) + 1030 CyberSerial (2-port) 16550 + 1031 CyberSerial (2-port) 16650 + 1032 CyberSerial (2-port) 16850 + 1034 Trio 2S(16550)+1P + 1035 Trio 2S(16650)+1P + 1036 Trio 2S(16850)+1P + 1050 CyberSerial (4-port) 16550 + 1051 CyberSerial (4-port) 16650 + 1052 CyberSerial (4-port) 16850 + 2000 CyberSerial (1-port) 16550 + 2001 CyberSerial (1-port) 16650 + 2002 CyberSerial (1-port) 16850 + 2010 Duet 1S(16550)+1P + 2011 Duet 1S(16650)+1P + 2012 Duet 1S(16850)+1P + 2020 CyberParallel (1-port) + 2021 CyberParallel (2-port) + 2030 CyberSerial (2-port) 16550 + 131f 2030 PCI Serial Card + 2031 CyberSerial (2-port) 16650 + 2032 CyberSerial (2-port) 16850 + 2040 Trio 1S(16550)+2P + 2041 Trio 1S(16650)+2P + 2042 Trio 1S(16850)+2P + 2050 CyberSerial (4-port) 16550 + 2051 CyberSerial (4-port) 16650 + 2052 CyberSerial (4-port) 16850 + 2060 Trio 2S(16550)+1P + 2061 Trio 2S(16650)+1P + 2062 Trio 2S(16850)+1P +1320 Crypto AG +1321 Arcobel Graphics BV +1322 MTT Co., Ltd +1323 Dome Inc +1324 Sphere Communications +1325 Salix Technologies, Inc +1326 Seachange international +1327 Voss scientific +1328 quadrant international +1329 Productivity Enhancement +132a Microcom Inc. +132b Broadband Technologies +132c Micrel Inc +132d Integrated Silicon Solution, Inc. +1330 MMC Networks +1331 Radisys Corp. +1332 Micro Memory +1334 Redcreek Communications, Inc +1335 Videomail, Inc +1337 Third Planet Publishing +1338 BT Electronics +133a Vtel Corp +133b Softcom Microsystems +133c Holontech Corp +133d SS Technologies +133e Virtual Computer Corp +133f SCM Microsystems +1340 Atalla Corp +1341 Kyoto Microcomputer Co +1342 Promax Systems Inc +1343 Phylon Communications Inc +1344 Crucial Technology +1345 Arescom Inc +1347 Odetics +1349 Sumitomo Electric Industries, Ltd. +134a DTC Technology Corp. + 0001 Domex 536 + 0002 Domex DMX3194UP SCSI Adapter +134b ARK Research Corp. +134c Chori Joho System Co. Ltd +134d PCTel Inc + 7890 HSP MicroModem 56 + 7891 HSP MicroModem 56 + 134d 0001 HSP MicroModem 56 + 7892 HSP MicroModem 56 + 7893 HSP MicroModem 56 + 7894 HSP MicroModem 56 + 7895 HSP MicroModem 56 + 7896 HSP MicroModem 56 + 7897 HSP MicroModem 56 +134e CSTI +134f Algo System Co Ltd +1350 Systec Co. Ltd +1351 Sonix Inc +1353 Dassault A.T. +1354 Dwave System Inc +1355 Kratos Analytical Ltd +1356 The Logical Co +1359 Prisa Networks +135a Brain Boxes +135b Giganet Inc +135c Quatech Inc +135d ABB Network Partner AB +135e Sealevel Systems Inc + 7101 Single Port RS-232/422/485/530 + 7201 Dual Port RS-232/422/485 Interface + 7202 Dual Port RS-232 Interface + 7401 Four Port RS-232 Interface + 7402 Four Port RS-422/485 Interface + 7801 Eight Port RS-232 Interface + 8001 8001 Digital I/O Adapter +135f I-Data International A-S +1360 Meinberg Funkuhren +1361 Soliton Systems K.K. +1362 Fujifacom Corporation +1363 Phoenix Technology Ltd +1364 ATM Communications Inc +1365 Hypercope GmbH +1366 Teijin Seiki Co. Ltd +1367 Hitachi Zosen Corporation +1368 Skyware Corporation +1369 Digigram +136a High Soft Tech +136b Kawasaki Steel Corporation +136c Adtek System Science Co Ltd +136d Gigalabs Inc +136f Applied Magic Inc +1370 ATL Products +1371 CNet Technology Inc +1373 Silicon Vision Inc +1374 Silicom Ltd +1375 Argosystems Inc +1376 LMC +1377 Electronic Equipment Production & Distribution GmbH +1378 Telemann Co. Ltd +1379 Asahi Kasei Microsystems Co Ltd +137a Mark of the Unicorn Inc +137b PPT Vision +137c Iwatsu Electric Co Ltd +137d Dynachip Corporation +137e Patriot Scientific Corporation +137f Japan Satellite Systems Inc +1380 Sanritz Automation Co Ltd +1381 Brains Co. Ltd +1382 Marian - Electronic & Software +1383 Controlnet Inc +1384 Reality Simulation Systems Inc +1385 Netgear + 620a GA620 + 622a GA622 + 630a GA630 + f311 FA311 +1386 Video Domain Technologies +1387 Systran Corp +1388 Hitachi Information Technology Co Ltd +1389 Applicom International + 0001 PCI1500PFB [Intelligent fieldbus adaptor] +138a Fusion Micromedia Corp +138b Tokimec Inc +138c Silicon Reality +138d Future Techno Designs pte Ltd +138e Basler GmbH +138f Patapsco Designs Inc +1390 Concept Development Inc +1391 Development Concepts Inc +1392 Medialight Inc +1393 Moxa Technologies Co Ltd + 1040 Smartio C104H/PCI + 1680 Smartio C168H/PCI + 2040 Intellio CP-204J + 2180 Intellio C218 Turbo PCI + 3200 Intellio C320 Turbo PCI +1394 Level One Communications +1395 Ambicom Inc +1396 Cipher Systems Inc +1397 Cologne Chip Designs GmbH + 2bd0 ISDN network controller [HFC-PCI] + 1397 2bd0 ISDN Board + e4bf 1000 CI1-1-Harp +1398 Clarion co. Ltd +1399 Rios systems Co Ltd +139a Alacritech Inc +139b Mediasonic Multimedia Systems Ltd +139c Quantum 3d Inc +139d EPL limited +139e Media4 +139f Aethra s.r.l. +13a0 Crystal Group Inc +13a1 Kawasaki Heavy Industries Ltd +13a2 Ositech Communications Inc +13a3 Hi-Fn +13a4 Rascom Inc +13a5 Audio Digital Imaging Inc +13a6 Videonics Inc +13a7 Teles AG +13a8 Exar Corp. +13a9 Siemens Medical Systems, Ultrasound Group +13aa Broadband Networks Inc +13ab Arcom Control Systems Ltd +13ac Motion Media Technology Ltd +13ad Nexus Inc +13ae ALD Technology Ltd +13af T.Sqware +13b0 Maxspeed Corp +13b1 Tamura corporation +13b2 Techno Chips Co. Ltd +13b3 Lanart Corporation +13b4 Wellbean Co Inc +13b5 ARM +13b6 Dlog GmbH +13b7 Logic Devices Inc +13b8 Nokia Telecommunications oy +13b9 Elecom Co Ltd +13ba Oxford Instruments +13bb Sanyo Technosound Co Ltd +13bc Bitran Corporation +13bd Sharp corporation +13be Miroku Jyoho Service Co. Ltd +13bf Sharewave Inc +13c0 Microgate Corporation + 0010 SyncLink WAN Adapter +13c1 3ware Inc + 1000 3ware ATA-RAID + 1001 3ware 7000-series ATA-RAID +13c2 Technotrend Systemtechnik GmbH +13c3 Janz Computer AG +13c4 Phase Metrics +13c5 Alphi Technology Corp +13c6 Condor Engineering Inc +13c7 Blue Chip Technology Ltd +13c8 Apptech Inc +13c9 Eaton Corporation +13ca Iomega Corporation +13cb Yano Electric Co Ltd +13cc Metheus Corporation +13cd Compatible Systems Corporation +13ce Cocom A/S +13cf Studio Audio & Video Ltd +13d0 Techsan Electronics Co Ltd +13d1 Abocom Systems Inc +13d2 Shark Multimedia Inc +13d3 IMC Networks +13d4 Graphics Microsystems Inc +13d5 Media 100 Inc +13d6 K.I. Technology Co Ltd +13d7 Toshiba Engineering Corporation +13d8 Phobos corporation +13d9 Apex PC Solutions Inc +13da Intresource Systems pte Ltd +13db Janich & Klass Computertechnik GmbH +13dc Netboost Corporation +13dd Multimedia Bundle Inc +13de ABB Robotics Products AB +13df E-Tech Inc + 0001 PCI56RVP Modem + 13df 0001 PCI56RVP Modem +13e0 GVC Corporation +13e1 Silicom Multimedia Systems Inc +13e2 Dynamics Research Corporation +13e3 Nest Inc +13e4 Calculex Inc +13e5 Telesoft Design Ltd +13e6 Argosy research Inc +13e7 NAC Incorporated +13e8 Chip Express Corporation +13e9 Chip Express Corporation +13ea Dallas Semiconductor +13eb Hauppauge Computer Works Inc +13ec Zydacron Inc +13ed Raytheion E-Systems +13ee Hayes Microcomputer Products Inc +13ef Coppercom Inc +13f0 Sundance Technology Inc + 0201 Sundance Ethernet +13f1 Oce' - Technologies B.V. +13f2 Ford Microelectronics Inc +13f3 Mcdata Corporation +13f4 Troika Design Inc +13f5 Kansai Electric Co. Ltd +13f6 C-Media Electronics Inc + 0100 CM8338A + 13f6 ffff CMI8338/C3DX PCI Audio Device + 0101 CM8338B + 13f6 0101 CMI8338-031 PCI Audio Device + 0111 CM8738 + 13f6 0111 CMI8738/C3DX PCI Audio Device + 0211 CM8738 +13f7 Wildfire Communications +13f8 Ad Lib Multimedia Inc +13f9 NTT Advanced Technology Corp. +13fa Pentland Systems Ltd +13fb Aydin Corp +13fc Computer Peripherals International +13fd Micro Science Inc +13fe Advantech Co. Ltd +13ff Silicon Spice Inc +1400 Artx Inc + 1401 9432 TX +1401 CR-Systems A/S +1402 Meilhaus Electronic GmbH +1403 Ascor Inc +1404 Fundamental Software Inc +1405 Excalibur Systems Inc +1406 Oce' Printing Systems GmbH +1407 Lava Computer mfg Inc + 0100 Lava Dual Serial + 0101 Lava Quatro A + 0102 Lava Quatro B + 0200 Lava Port Plus + 0201 Lava Quad A + 0202 Lava Quad B + 0500 Lava Single Serial + 0600 Lava Port 650 + 8000 Lava Parallel + 8002 Lava Dual Parallel port A + 8003 Lava Dual Parallel port B + 8800 BOCA Research IOPPAR +1408 Aloka Co. Ltd +1409 Timedia Technology Co Ltd +140a DSP Research Inc +140b Ramix Inc +140c Elmic Systems Inc +140d Matsushita Electric Works Ltd +140e Goepel Electronic GmbH +140f Salient Systems Corp +1410 Midas lab Inc +1411 Ikos Systems Inc +1412 IC Ensemble Inc + 1712 ICE1712 [Envy24] +1413 Addonics +1414 Microsoft Corporation +1415 Oxford Semiconductor Ltd +1416 Multiwave Innovation pte Ltd +1417 Convergenet Technologies Inc +1418 Kyushu electronics systems Inc +1419 Excel Switching Corp +141a Apache Micro Peripherals Inc +141b Zoom Telephonics Inc +141d Digitan Systems Inc +141e Fanuc Ltd +141f Visiontech Ltd +1420 Psion Dacom plc +1421 Ads Technologies Inc +1422 Ygrec Systems Co Ltd +1423 Custom Technology Corp. +1424 Videoserver Connections +1425 ASIC Designers Inc +1426 Storage Technology Corp. +1427 Better On-Line Solutions +1428 Edec Co Ltd +1429 Unex Technology Corp. +142a Kingmax Technology Inc +142b Radiolan +142c Minton Optic Industry Co Ltd +142d Pix stream Inc +142e Vitec Multimedia +142f Radicom Research Inc +1430 ITT Aerospace/Communications Division +1431 Gilat Satellite Networks +1432 Edimax Computer Co. +1433 Eltec Elektronik GmbH +1435 Real Time Devices US Inc. +1436 CIS Technology Inc +1437 Nissin Inc Co +1438 Atmel-dream +1439 Outsource Engineering & Mfg. Inc +143a Stargate Solutions Inc +143b Canon Research Center, America +143c Amlogic Inc +143d Tamarack Microelectronics Inc +143e Jones Futurex Inc +143f Lightwell Co Ltd - Zax Division +1440 ALGOL Corp. +1441 AGIE Ltd +1442 Phoenix Contact GmbH & Co. +1443 Unibrain S.A. +1444 TRW +1445 Logical DO Ltd +1446 Graphin Co Ltd +1447 AIM GmBH +1448 Alesis Studio Electronics +1449 TUT Systems Inc +144a Adlink Technology + 7296 PCI-7296 + 7432 PCI-7432 + 7433 PCI-7433 + 7434 PCI-7434 + 7841 PCI-7841 + 8133 PCI-8133 + 8554 PCI-8554 + 9111 PCI-9111 + 9113 PCI-9113 + 9114 PCI-9114 +144b Loronix Information Systems Inc +144c Catalina Research Inc +144d Samsung Electronics Co Ltd +144e OLITEC +144f Askey Computer Corp. +1450 Octave Communications Ind. +1451 SP3D Chip Design GmBH +1453 MYCOM Inc +1454 Altiga Networks +1455 Logic Plus Plus Inc +1456 Advanced Hardware Architectures +1457 Nuera Communications Inc +1458 Giga-byte Technology +1459 DOOIN Electronics +145a Escalate Networks Inc +145b PRAIM SRL +145c Cryptek +145d Gallant Computer Inc +145e Aashima Technology B.V. +145f Baldor Electric Company + 0001 NextMove PCI +1460 DYNARC INC +1461 Avermedia Technologies Inc +1462 Micro-star International Co Ltd +1463 Fast Corporation +1464 Interactive Circuits & Systems Ltd +1465 GN NETTEST Telecom DIV. +1466 Designpro Inc. +1467 DIGICOM SPA +1468 AMBIT Microsystem Corp. +1469 Cleveland Motion Controls +146a IFR +146b Parascan Technologies Ltd +146c Ruby Tech Corp. +146d Tachyon, INC. +146e Williams Electronics Games, Inc. +146f Multi Dimensional Consulting Inc +1470 Bay Networks +1471 Integrated Telecom Express Inc +1472 DAIKIN Industries, Ltd +1473 ZAPEX Technologies Inc +1474 Doug Carson & Associates +1475 PICAZO Communications +1476 MORTARA Instrument Inc +1477 Net Insight +1478 DIATREND Corporation +1479 TORAY Industries Inc +147a FORMOSA Industrial Computing +147b ABIT Computer Corp. +147c AWARE, Inc. +147d Interworks Computer Products +147e Matsushita Graphic Communication Systems, Inc. +147f NIHON UNISYS, Ltd. +1480 SCII Telecom +1481 BIOPAC Systems Inc +1482 ISYTEC - Integrierte Systemtechnik GmBH +1483 LABWAY Corporation +1484 Logic Corporation +1485 ERMA - Electronic GmBH +1486 L3 Communications Telemetry & Instrumentation +1487 MARQUETTE Medical Systems +1488 KONTRON Electronik GmBH +1489 KYE Systems Corporation +148a OPTO +148b INNOMEDIALOGIC Inc. +148c C.P. Technology Co. Ltd +148d DIGICOM Systems, Inc. + 1003 HCF 56k Data/Fax Modem +148e OSI Plus Corporation +148f Plant Equipment, Inc. +1490 Stone Microsystems PTY Ltd. +1491 ZEAL Corporation +1492 Time Logic Corporation +1493 MAKER Communications +1494 WINTOP Technology, Inc. +1495 TOKAI Communications Industry Co. Ltd +1496 JOYTECH Computer Co., Ltd. +1497 SMA Regelsysteme GmBH +1498 TEWS Datentechnik GmBH +1499 EMTEC CO., Ltd +149a ANDOR Technology Ltd +149b SEIKO Instruments Inc +149c OVISLINK Corp. +149d NEWTEK Inc +149e Mapletree Networks Inc. +149f LECTRON Co Ltd +14a0 SOFTING GmBH +14a1 Systembase Co Ltd +14a2 Millennium Engineering Inc +14a3 Maverick Networks +14a4 GVC/BCM Advanced Research +14a5 XIONICS Document Technologies Inc +14a6 INOVA Computers GmBH & Co KG +14a7 MYTHOS Systems Inc +14a8 FEATRON Technologies Corporation +14a9 HIVERTEC Inc +14aa Advanced MOS Technology Inc +14ab Mentor Graphics Corp. +14ac Novaweb Technologies Inc +14ad Time Space Radio AB +14ae CTI, Inc +14af Guillemot Corporation +14b0 BST Communication Technology Ltd +14b1 Nextcom K.K. +14b2 ENNOVATE Networks Inc +14b3 XPEED Inc + 0000 DSL NIC +14b4 PHILIPS Business Electronics B.V. +14b5 Creamware GmBH +14b6 Quantum Data Corp. +14b7 PROXIM Inc + 0001 Symphony 4110 +14b8 Techsoft Technology Co Ltd +14b9 AIRONET Wireless Communications + 0001 PC4800 +14ba INTERNIX Inc. +14bb SEMTECH Corporation +14bc Globespan Semiconductor Inc. +14bd CARDIO Control N.V. +14be L3 Communications +14bf SPIDER Communications Inc. +14c0 COMPAL Electronics Inc +14c1 MYRICOM Inc. +14c2 DTK Computer +14c3 MEDIATEK Corp. +14c4 IWASAKI Information Systems Co Ltd +14c5 Automation Products AB +14c6 Data Race Inc +14c7 Modular Technology Holdings Ltd +14c8 Turbocomm Tech. Inc. +14c9 ODIN Telesystems Inc +14ca PE Logic Corp. +14cb Billionton Systems Inc +14cc NAKAYO Telecommunications Inc +14cd Universal Scientific Ind. +14ce Whistle Communications +14cf TEK Microsystems Inc. +14d0 Ericsson Axe R & D +14d1 Computer Hi-Tech Co Ltd +14d2 Titan Electronics Inc +14d3 CIRTECH (UK) Ltd +14d4 Panacom Technology Corp +14d5 Nitsuko Corporation +14d6 Accusys Inc +14d7 Hirakawa Hewtech Corp +14d8 HOPF Elektronik GmBH +14d9 Alpha Processor Inc +14da National Aerospace Laboratories +14db AFAVLAB Technology Inc + 2120 TK9902 +14dc Amplicon Liveline Ltd + 0000 PCI230 + 0001 PCI242 + 0002 PCI244 + 0003 PCI247 + 0004 PCI248 + 0005 PCI249 + 0006 PCI260 + 0007 PCI224 + 0008 PCI234 + 0009 PCI236 +14dd Boulder Design Labs Inc +14de Applied Integration Corporation +14df ASIC Communications Corp +14e1 INVERTEX +14e2 INFOLIBRIA +14e3 AMTELCO +14e4 BROADCOM Corporation + 1644 NetXtreme BCM5700 Gigabit Ethernet + 10b7 1000 3C996-T 1000BaseTX + 10b7 1001 3C996B-T 1000BaseTX + 10b7 1002 3C996C-T 1000BaseTX + 10b7 1003 3C997-T 1000BaseTX + 10b7 1004 3C996-SX 1000BaseSX + 10b7 1005 3C997-SX 1000BaseSX + 14e4 0002 NetXtreme 1000BaseSX + 14e4 0003 NetXtreme 1000BaseSX + 14e4 0004 NetXtreme 1000BaseTX + 14e4 1644 NetXtreme BCM5700 1000BaseTX + 1645 NetXtreme BCM5701 Gigabit Ethernet + 0e11 007c NC7770 1000BaseTX + 0e11 007d NC6770 1000BaseSX + 0e11 0085 NC7780 1000BaseTX + 10b7 1004 3C996-SX 1000BaseSX + 10b7 1006 3C996B-T 1000BaseTX + 10b7 1007 3C1000-T 1000BaseTX + 10b7 1008 3C940-BR01 1000BaseTX + 14e4 0001 NetXtreme BCM5701 1000BaseTX + 14e4 0005 NetXtreme BCM5701 1000BaseTX + 14e4 0006 NetXtreme BCM5701 1000BaseTX + 14e4 0007 NetXtreme BCM5701 1000BaseSX + 14e4 0008 NetXtreme BCM5701 1000BaseTX + 14e4 8008 NetXtreme BCM5701 1000BaseTX + 1647 NetXtreme BCM5703 Gigabit Ethernet + 5820 BCM5820 Crypto Accelerator +14e5 Pixelfusion Ltd +14e6 SHINING Technology Inc +14e7 3CX +14e8 RAYCER Inc +14e9 GARNETS System CO Ltd +14ea PLANEX COMMUNICATIONS Inc +14eb SEIKO EPSON Corp +14ec ACQIRIS +14ed DATAKINETICS Ltd +14ee MASPRO KENKOH Corp +14ef CARRY Computer ENG. CO Ltd +14f0 CANON RESEACH CENTRE FRANCE +14f1 Conexant + 1033 HCF 56k Data/Fax Modem + 122d 4027 Dell Zeus - MDP3880-W(B) Data Fax Modem + 122d 4030 Dell Mercury - MDP3880-U(B) Data Fax Modem + 122d 4034 Dell Thor - MDP3880-W(U) Data Fax Modem + 13e0 020d Dell Copper + 13e0 020e Dell Silver + 13e0 0290 Compaq Goldwing + 13e0 02c0 Compaq Scooter + 144f 1500 IBM P85-DF (1) + 144f 1501 IBM P85-DF (2) + 144f 150a IBM P85-DF (3) + 144f 150b IBM P85-DF Low Profile (1) + 144f 1510 IBM P85-DF Low Profile (2) + 1034 HCF 56k Data/Fax/Voice Modem + 1035 HCF 56k Data/Fax/Voice/Spkp (w/Handset) Modem + 10cf 1098 Fujitsu P85-DFSV + 1036 HCF 56k Data/Fax/Voice/Spkp Modem + 122d 4029 MDP3880SP-W + 122d 4031 MDP3880SP-U + 13e0 0209 Dell Titanium + 13e0 020a Dell Graphite + 13e0 0260 Gateway Red Owl + 13e0 0270 Gateway White Horse + 1052 HCF 56k Data/Fax Modem (Worldwide) + 1053 HCF 56k Data/Fax Modem (Worldwide) + 1054 HCF 56k Data/Fax/Voice Modem (Worldwide) + 1055 HCF 56k Data/Fax/Voice/Spkp (w/Handset) Modem (Worldwide) + 1056 HCF 56k Data/Fax/Voice/Spkp Modem (Worldwide) + 1057 HCF 56k Data/Fax/Voice/Spkp Modem (Worldwide) + 1059 HCF 56k Data/Fax/Voice Modem (Worldwide) + 1063 HCF 56k Data/Fax Modem + 1064 HCF 56k Data/Fax/Voice Modem + 1065 HCF 56k Data/Fax/Voice/Spkp (w/Handset) Modem + 1066 HCF 56k Data/Fax/Voice/Spkp Modem + 122d 4033 Dell Athena - MDP3900V-U + 1433 HCF 56k Data/Fax Modem + 1434 HCF 56k Data/Fax/Voice Modem + 1435 HCF 56k Data/Fax/Voice/Spkp (w/Handset) Modem + 1436 HCF 56k Data/Fax Modem + 1453 HCF 56k Data/Fax Modem + 144f 1502 IBM P95-DF (1) + 144f 1503 IBM P95-DF (2) + 1454 HCF 56k Data/Fax/Voice Modem + 1455 HCF 56k Data/Fax/Voice/Spkp (w/Handset) Modem + 1456 HCF 56k Data/Fax/Voice/Spkp Modem + 122d 4035 Dell Europa - MDP3900V-W + 122d 4302 Dell MP3930V-W(C) MiniPCI + 1803 HCF 56k Modem + 0e11 0023 623-LAN Grizzly + 0e11 0043 623-LAN Yogi + 1815 HCF 56k Modem + 0e11 0022 Grizzly + 0e11 0042 Yogi + 2003 HSF 56k Data/Fax Modem + 2004 HSF 56k Data/Fax/Voice Modem + 2005 HSF 56k Data/Fax/Voice/Spkp (w/Handset) Modem + 2006 HSF 56k Data/Fax/Voice/Spkp Modem + 2013 HSF 56k Data/Fax Modem + 0e11 b195 Bear + 0e11 b196 Seminole 1 + 0e11 b1be Seminole 2 + 155d 6793 HP + 155d 8850 E Machines + 2014 HSF 56k Data/Fax/Voice Modem + 2015 HSF 56k Data/Fax/Voice/Spkp (w/Handset) Modem + 2016 HSF 56k Data/Fax/Voice/Spkp Modem + 2043 HSF 56k Data/Fax Modem (WorldW SmartDAA) + 2044 HSF 56k Data/Fax/Voice Modem (WorldW SmartDAA) + 2045 HSF 56k Data/Fax/Voice/Spkp (w/Handset) Modem (WorldW SmartDAA) + 2046 HSF 56k Data/Fax/Voice/Spkp Modem (WorldW SmartDAA) + 2063 HSF 56k Data/Fax Modem (SmartDAA) + 2064 HSF 56k Data/Fax/Voice Modem (SmartDAA) + 2065 HSF 56k Data/Fax/Voice/Spkp (w/Handset) Modem (SmartDAA) + 2066 HSF 56k Data/Fax/Voice/Spkp Modem (SmartDAA) + 2093 HSF 56k Modem + 155d 2f07 Legend + 2143 HSF 56k Data/Fax/Cell Modem (Mob WorldW SmartDAA) + 2144 HSF 56k Data/Fax/Voice/Cell Modem (Mob WorldW SmartDAA) + 2145 HSF 56k Data/Fax/Voice/Spkp (w/HS)/Cell Modem (Mob WorldW SmartDAA) + 2146 HSF 56k Data/Fax/Voice/Spkp/Cell Modem (Mob WorldW SmartDAA) + 2163 HSF 56k Data/Fax/Cell Modem (Mob SmartDAA) + 2164 HSF 56k Data/Fax/Voice/Cell Modem (Mob SmartDAA) + 2165 HSF 56k Data/Fax/Voice/Spkp (w/HS)/Cell Modem (Mob SmartDAA) + 2166 HSF 56k Data/Fax/Voice/Spkp/Cell Modem (Mob SmartDAA) + 2343 HSF 56k Data/Fax CardBus Modem (Mob WorldW SmartDAA) + 2344 HSF 56k Data/Fax/Voice CardBus Modem (Mob WorldW SmartDAA) + 2345 HSF 56k Data/Fax/Voice/Spkp (w/HS) CardBus Modem (Mob WorldW SmartDAA) + 2346 HSF 56k Data/Fax/Voice/Spkp CardBus Modem (Mob WorldW SmartDAA) + 2363 HSF 56k Data/Fax CardBus Modem (Mob SmartDAA) + 2364 HSF 56k Data/Fax/Voice CardBus Modem (Mob SmartDAA) + 2365 HSF 56k Data/Fax/Voice/Spkp (w/HS) CardBus Modem (Mob SmartDAA) + 2366 HSF 56k Data/Fax/Voice/Spkp CardBus Modem (Mob SmartDAA) + 2443 HSF 56k Data/Fax Modem (Mob WorldW SmartDAA) + 2444 HSF 56k Data/Fax/Voice Modem (Mob WorldW SmartDAA) + 2445 HSF 56k Data/Fax/Voice/Spkp (w/HS) Modem (Mob WorldW SmartDAA) + 2446 HSF 56k Data/Fax/Voice/Spkp Modem (Mob WorldW SmartDAA) + 2463 HSF 56k Data/Fax Modem (Mob SmartDAA) + 2464 HSF 56k Data/Fax/Voice Modem (Mob SmartDAA) + 2465 HSF 56k Data/Fax/Voice/Spkp (w/HS) Modem (Mob SmartDAA) + 2466 HSF 56k Data/Fax/Voice/Spkp Modem (Mob SmartDAA) + 2f00 HSF 56k HSFi Modem + 13e0 8d84 IBM HSFi V.90 + 13e0 8d85 Compaq Stinger +14f2 MOBILITY Electronics +14f3 BROADLOGIC +14f4 TOKYO Electronic Industry CO Ltd +14f5 SOPAC Ltd +14f6 COYOTE Technologies LLC +14f7 WOLF Technology Inc +14f8 AUDIOCODES Inc +14f9 AG COMMUNICATIONS +14fa WANDEL & GOCHERMANN +14fb TRANSAS MARINE (UK) Ltd +14fc QUADRICS Supercomputers World +14fd JAPAN Computer Industry Inc +14fe ARCHTEK TELECOM Corp +14ff TWINHEAD INTERNATIONAL Corp +1500 DELTA Electronics, Inc +1501 BANKSOFT CANADA Ltd +1502 MITSUBISHI ELECTRIC LOGISTICS SUPPORT Co Ltd +1503 KAWASAKI LSI USA Inc +1504 KAISER Electronics +1505 ITA INGENIEURBURO FUR TESTAUFGABEN GmbH +1506 CHAMELEON Systems Inc +# Should be HTEC Ltd, but there are no known HTEC chips and 1507 is already used by mistake by Motorola (see vendor ID 1057). +1507 Motorola ?? / HTEC + 0001 MPC105 [Eagle] + 0002 MPC106 [Grackle] + 0003 MPC8240 [Kahlua] + 0100 MC145575 [HFC-PCI] + 0431 KTI829c 100VG + 4801 Raven + 4802 Falcon + 4803 Hawk + 4806 CPX8216 +1508 HONDA CONNECTORS/MHOTRONICS Inc +1509 FIRST INTERNATIONAL Computer Inc +150a FORVUS RESEARCH Inc +150b YAMASHITA Systems Corp +150c KYOPAL CO Ltd +150d WARPSPPED Inc +150e C-PORT Corp +150f INTEC GmbH +1510 BEHAVIOR TECH Computer Corp +1511 CENTILLIUM Technology Corp +1512 ROSUN Technologies Inc +1513 Raychem +1514 TFL LAN Inc +1515 Advent design +1516 MYSON Technology Inc +1517 ECHOTEK Corp +1518 PEP MODULAR Computers GmbH +1519 TELEFON AKTIEBOLAGET LM Ericsson +151a Globetek + 1002 PCI-1002 + 1004 PCI-1004 + 1008 PCI-1008 +151b COMBOX Ltd +151c DIGITAL AUDIO LABS Inc +151d Fujitsu Computer Products Of America +151e MATRIX Corp +151f TOPIC SEMICONDUCTOR Corp +1520 CHAPLET System Inc +1521 BELL Corp +1522 MainPine Ltd +1523 MUSIC Semiconductors +1524 ENE Technology Inc +1525 IMPACT Technologies +1526 ISS, Inc +1527 SOLECTRON +1528 ACKSYS +1529 AMERICAN MICROSystems Inc +152a QUICKTURN DESIGN Systems +152b FLYTECH Technology CO Ltd +152c MACRAIGOR Systems LLC +152d QUANTA Computer Inc +152e MELEC Inc +152f PHILIPS - CRYPTO +1530 ACQIS Technology Inc +1531 CHRYON Corp +1532 ECHELON Corp +1533 BALTIMORE +1534 ROAD Corp +1535 EVERGREEN Technologies Inc +1537 DATALEX COMMUNCATIONS +1538 ARALION Inc +1539 ATELIER INFORMATIQUES et ELECTRONIQUE ETUDES S.A. +153a ONO SOKKI +153b TERRATEC Electronic GmbH +153c ANTAL Electronic +153d FILANET Corp +153e TECHWELL Inc +153f MIPS DENMARK +1540 PROVIDEO MULTIMEDIA Co Ltd +1541 MACHONE Communications +1542 VIVID Technology Inc +1543 SILICON Laboratories +1544 DCM DATA Systems +1545 VISIONTEK +1546 IOI Technology Corp +1547 MITUTOYO Corp +1548 JET PROPULSION Laboratory +1549 INTERCONNECT Systems Solutions +154a MAX Technologies Inc +154b COMPUTEX Co Ltd +154c VISUAL Technology Inc +154d PAN INTERNATIONAL Industrial Corp +154e SERVOTEST Ltd +154f STRATABEAM Technology +1550 OPEN NETWORK Co Ltd +1551 SMART Electronic DEVELOPMENT GmBH +1552 RACAL AIRTECH Ltd +1553 CHICONY Electronics Co Ltd +1554 PROLINK Microsystems Corp +1555 GESYTEC GmBH +1556 PLD APPLICATIONS +1557 MEDIASTAR Co Ltd +1558 CLEVO/KAPOK Computer +1559 SI LOGIC Ltd +155a INNOMEDIA Inc +155b PROTAC INTERNATIONAL Corp +155c Cemax-Icon Inc +155d Mac System Co Ltd +155e LP Elektronik GmbH +155f Perle Systems Ltd +1560 Terayon Communications Systems +1561 Viewgraphics Inc +1562 Symbol Technologies +1563 A-Trend Technology Co Ltd +1564 Yamakatsu Electronics Industry Co Ltd +1565 Biostar Microtech Int'l Corp +1566 Ardent Technologies Inc +1567 Jungsoft +1568 DDK Electronics Inc +1569 Palit Microsystems Inc. +156a Avtec Systems +156b 2wire Inc +156c Vidac Electronics GmbH +156d Alpha-Top Corp +156e Alfa Inc +156f M-Systems Flash Disk Pioneers Ltd +1570 Lecroy Corp +1571 Contemporary Controls + a001 CCSI PCI20-485 ARCnet + a002 CCSI PCI20-485D ARCnet + a003 CCSI PCI20-485X ARCnet + a004 CCSI PCI20-CXB ARCnet + a005 CCSI PCI20-CXS ARCnet + a006 CCSI PCI20-FOG-SMA ARCnet + a007 CCSI PCI20-FOG-ST ARCnet + a008 CCSI PCI20-TB5 ARCnet + a009 CCSI PCI20-5-485 5Mbit ARCnet + a00a CCSI PCI20-5-485D 5Mbit ARCnet + a00b CCSI PCI20-5-485X 5Mbit ARCnet + a00c CCSI PCI20-5-FOG-ST 5Mbit ARCnet + a00d CCSI PCI20-5-FOG-SMA 5Mbit ARCnet + a201 CCSI PCI22-485 10Mbit ARCnet + a202 CCSI PCI22-485D 10Mbit ARCnet + a203 CCSI PCI22-485X 10Mbit ARCnet + a204 CCSI PCI22-CHB 10Mbit ARCnet + a205 CCSI PCI22-FOG_ST 10Mbit ARCnet + a206 CCSI PCI22-THB 10Mbit ARCnet +1572 Otis Elevator Company +1573 Lattice - Vantis +1574 Fairchild Semiconductor +1575 Voltaire Advanced Data Security Ltd +1576 Viewcast COM +1578 HITT +1579 Dual Technology Corp +157a Japan Elecronics Ind Inc +157b Star Multimedia Corp +157c Eurosoft (UK) + 8001 Fix2000 PCI Y2K Compliance Card +157d Gemflex Networks +157e Transition Networks +157f PX Instruments Technology Ltd +1580 Primex Aerospace Co +1581 SEH Computertechnik GmbH +1582 Cytec Corp +1583 Inet Technologies Inc +1584 Uniwill Computer Corp +1585 Logitron +1586 Lancast Inc +1587 Konica Corp +1588 Solidum Systems Corp +1589 Atlantek Microsystems Pty Ltd +158a Digalog Systems Inc +158b Allied Data Technologies +158c Hitachi Semiconductor & Devices Sales Co Ltd +158d Point Multimedia Systems +158e Lara Technology Inc +158f Ditect Coop +1590 3pardata Inc +1591 ARN +1592 Syba Tech Ltd + 0781 Multi-IO Card + 0782 Parallel Port Card 2xEPP + 0783 Multi-IO Card + 0785 Multi-IO Card + 0786 Multi-IO Card + 0787 Multi-IO Card + 0788 Multi-IO Card + 078a Multi-IO Card +1593 Bops Inc +1594 Netgame Ltd +1595 Diva Systems Corp +1596 Folsom Research Inc +1597 Memec Design Services +1598 Granite Microsystems +1599 Delta Electronics Inc +159a General Instrument +159b Faraday Technology Corp +159c Stratus Computer Systems +159d Ningbo Harrison Electronics Co Ltd +159e A-Max Technology Co Ltd +159f Galea Network Security +15a0 Compumaster SRL +15a1 Geocast Network Systems +15a2 Catalyst Enterprises Inc +15a3 Italtel +15a4 X-Net OY +15a5 Toyota Macs Inc +15a6 Sunlight Ultrasound Technologies Ltd +15a7 SSE Telecom Inc +15a8 Shanghai Communications Technologies Center +15aa Moreton Bay +15ab Bluesteel Networks Inc +15ac North Atlantic Instruments +15ad VMWare Inc + 0710 Virtual SVGA +15ae Amersham Pharmacia Biotech +15b0 Zoltrix International Ltd +15b1 Source Technology Inc +15b2 Mosaid Technologies Inc +15b3 Mellanox Technology +15b4 CCI/TRIAD +15b5 Cimetrics Inc +15b6 Texas Memory Systems Inc +15b7 Sandisk Corp +15b8 ADDI-DATA GmbH +15b9 Maestro Digital Communications +15ba Impacct Technology Corp +15bb Portwell Inc +15bc Agilent Technologies +15bd DFI Inc +15be Sola Electronics +15bf High Tech Computer Corp (HTC) +15c0 BVM Ltd +15c1 Quantel +15c2 Newer Technology Inc +15c3 Taiwan Mycomp Co Ltd +15c4 EVSX Inc +15c5 Procomp Informatics Ltd +15c6 Technical University of Budapest +15c7 Tateyama Dystem Laboratory Co Ltd +15c8 Penta Media Co Ltd +15c9 Serome Technology Inc +15ca Bitboys OY +15cb AG Electronics Ltd +15cc Hotrail Inc +15cd Dreamtech Co Ltd +15ce Genrad Inc +15cf Hilscher GmbH +15d1 Infineon Technologies AG +15d2 FIC (First International Computer Inc) +15d3 NDS Technologies Israel Ltd +15d4 Iwill Corp +15d5 Tatung Co +15d6 Entridia Corp +15d7 Rockwell-Collins Inc +15d8 Cybernetics Technology Co Ltd +15d9 Super Micro Computer Inc +15da Cyberfirm Inc +15db Applied Computing Systems Inc +15dc Litronic Inc + 0001 Argus 300 PCI Cryptography Module +15dd Sigmatel Inc +15de Malleable Technologies Inc +15df Infinilink Corp +15e0 Cacheflow Inc +15e1 Voice Technologies Group Inc +15e2 Quicknet Technologies Inc +15e3 Networth Technologies Inc +15e4 VSN Systemen BV +15e5 Valley technologies Inc +15e6 Agere Inc +15e7 Get Engineering Corp +15e8 National Datacomm Corp +15e9 Pacific Digital Corp +15ea Tokyo Denshi Sekei K.K. +15eb Drsearch GmbH +15ec Beckhoff GmbH +15ed Macrolink Inc +15ee In Win Development Inc +15ef Intelligent Paradigm Inc +15f0 B-Tree Systems Inc +15f1 Times N Systems Inc +15f2 Diagnostic Instruments Inc +15f3 Digitmedia Corp +15f4 Valuesoft +15f5 Power Micro Research +15f6 Extreme Packet Device Inc +15f7 Banctec +15f8 Koga Electronics Co +15f9 Zenith Electronics Corp +15fa J.P. Axzam Corp +15fb Zilog Inc +15fc Techsan Electronics Co Ltd +15fd N-CUBED.NET +15fe Kinpo Electronics Inc +15ff Fastpoint Technologies Inc +1600 Northrop Grumman - Canada Ltd +1601 Tenta Technology +1602 Prosys-tec Inc +1603 Nokia Wireless Communications +1604 Central System Research Co Ltd +1605 Pairgain Technologies +1606 Europop AG +1607 Lava Semiconductor Manufacturing Inc +1608 Automated Wagering International +1609 Scimetric Instruments Inc +1619 FarSite Communications Ltd + 0400 FarSync T2P (2 port X.21/V.35/V.24) + 0440 FarSync T4P (4 port X.21/V.35/V.24) +1668 Action Tec Electronics Inc +1813 Ambient Technologies Inc +1a08 Sierra semiconductor + 0000 SC15064 +1b13 Jaton Corp +1c1c Symphony + 0001 82C101 +1d44 DPT + a400 PM2x24/PM3224 +1de1 Tekram Technology Co.,Ltd. + 0391 TRM-S1040 + 2020 DC-390 + 690c 690c + dc29 DC290 +2001 Temporal Research Ltd +21c3 21st Century Computer Corp. +2348 Racore + 2010 8142 100VG/AnyLAN +2646 Kingston Technologies +270b Xantel Corporation +270f Chaintech Computer Co. Ltd +2711 AVID Technology Inc. +2a15 3D Vision(???) +3000 Hansol Electronics Inc. +3142 Post Impression Systems. +3388 Hint Corp + 8011 VXPro II Chipset + 3388 8011 VXPro II Chipset CPU to PCI Bridge + 8012 VXPro II Chipset + 3388 8012 VXPro II Chipset PCI to ISA Bridge + 8013 VXPro II Chipset + 3388 8013 VXPro II Chipset EIDE Controller +3411 Quantum Designs (H.K.) Inc +3513 ARCOM Control Systems Ltd +38ef 4Links +3d3d 3DLabs + 0001 GLINT 300SX + 0002 GLINT 500TX + 0003 GLINT Delta + 0004 Permedia + 0005 Permedia + 0006 GLINT MX + 0007 3D Extreme + 0008 GLINT Gamma G1 + 0009 Permedia II 2D+3D + 3d3d 0100 AccelStar II 3D Accelerator + 3d3d 0111 Permedia 3:16 + 3d3d 0114 Santa Ana + 3d3d 0116 Oxygen GVX1 + 3d3d 0119 Scirocco + 3d3d 0120 Santa Ana PCL + 3d3d 0125 Oxygen VX1 + 3d3d 0127 Permedia3 Create! + 000a GLINT R3 + 3d3d 0121 Oxygen VX1 + 0100 Permedia II 2D+3D + 1004 Permedia + 3d04 Permedia + ffff Glint VGA +4005 Avance Logic Inc. + 0300 ALS300 PCI Audio Device + 0308 ALS300+ PCI Audio Device + 0309 PCI Input Controller + 1064 ALG-2064 + 2064 ALG-2064i + 2128 ALG-2364A GUI Accelerator + 2301 ALG-2301 + 2302 ALG-2302 + 2303 AVG-2302 GUI Accelerator + 2364 ALG-2364A + 2464 ALG-2464 + 2501 ALG-2564A/25128A + 4000 ALS4000 Audio Chipset + 4005 4000 ALS4000 Audio Chipset +4033 Addtron Technology Co, Inc. + 1360 RTL8139 Ethernet +4143 Digital Equipment Corp +416c Aladdin Knowledge Systems + 0100 AladdinCARD + 0200 CPC +4444 Internext Compression Inc +4468 Bridgeport machines +4594 Cogetec Informatique Inc +45fb Baldor Electric Company +4680 Umax Computer Corp +4843 Hercules Computer Technology Inc +4916 RedCreek Communications Inc + 1960 RedCreek PCI adapter +4943 Growth Networks +4978 Axil Computer Inc +4a14 NetVin + 5000 NV5000SC + 4a14 5000 RT8029-Based Ethernet Adapter +4b10 Buslogic Inc. +4c48 LUNG HWA Electronics +4ca1 Seanix Technology Inc +4d51 MediaQ Inc. + 0200 MQ-200 +4d54 Microtechnica Co Ltd +4ddc ILC Data Device Corp +5053 Voyetra Technologies + 2010 Daytona Audio Adapter +5136 S S Technologies +5143 Qualcomm Inc +5145 Ensoniq (Old) + 3031 Concert AudioPCI +5301 Alliance Semiconductor Corp. + 0001 ProMotion aT3D +5333 S3 Inc. + 0551 Plato/PX (system) + 5631 86c325 [ViRGE] + 8800 86c866 [Vision 866] + 8801 86c964 [Vision 964] + 8810 86c764_0 [Trio 32 vers 0] + 8811 86c764/765 [Trio32/64/64V+] + 8812 86cM65 [Aurora64V+] + 8813 86c764_3 [Trio 32/64 vers 3] + 8814 86c767 [Trio 64UV+] + 8815 86cM65 [Aurora 128] + 883d 86c988 [ViRGE/VX] + 8870 FireGL + 8880 86c868 [Vision 868 VRAM] vers 0 + 8881 86c868 [Vision 868 VRAM] vers 1 + 8882 86c868 [Vision 868 VRAM] vers 2 + 8883 86c868 [Vision 868 VRAM] vers 3 + 88b0 86c928 [Vision 928 VRAM] vers 0 + 88b1 86c928 [Vision 928 VRAM] vers 1 + 88b2 86c928 [Vision 928 VRAM] vers 2 + 88b3 86c928 [Vision 928 VRAM] vers 3 + 88c0 86c864 [Vision 864 DRAM] vers 0 + 88c1 86c864 [Vision 864 DRAM] vers 1 + 88c2 86c864 [Vision 864-P DRAM] vers 2 + 88c3 86c864 [Vision 864-P DRAM] vers 3 + 88d0 86c964 [Vision 964 VRAM] vers 0 + 88d1 86c964 [Vision 964 VRAM] vers 1 + 88d2 86c964 [Vision 964-P VRAM] vers 2 + 88d3 86c964 [Vision 964-P VRAM] vers 3 + 88f0 86c968 [Vision 968 VRAM] rev 0 + 88f1 86c968 [Vision 968 VRAM] rev 1 + 88f2 86c968 [Vision 968 VRAM] rev 2 + 88f3 86c968 [Vision 968 VRAM] rev 3 + 8900 86c755 [Trio 64V2/DX] + 5333 8900 86C775 Trio64V2/DX + 8901 Trio 64V2/DX or /GX + 5333 8901 86C775 Trio64V2/DX, 86C785 Trio64V2/GX + 8902 Plato/PX + 8903 Trio 3D business multimedia + 8904 Trio 64 3D + 1014 00db Integrated Trio3D + 5333 8904 86C365 Trio3D AGP + 8905 Trio 64V+ family + 8906 Trio 64V+ family + 8907 Trio 64V+ family + 8908 Trio 64V+ family + 8909 Trio 64V+ family + 890a Trio 64V+ family + 890b Trio 64V+ family + 890c Trio 64V+ family + 890d Trio 64V+ family + 890e Trio 64V+ family + 890f Trio 64V+ family + 8a01 ViRGE/DX or /GX + 0e11 b032 ViRGE/GX + 10b4 1617 Nitro 3D + 10b4 1717 Nitro 3D + 5333 8a01 ViRGE/DX + 8a10 ViRGE/GX2 + 1092 8a10 Stealth 3D 4000 + 8a13 86c368 [Trio 3D/2X] + 5333 8a13 Trio3D/2X + 8a20 86c794 [Savage 3D] + 5333 8a20 86C391 Savage3D + 8a21 86c795 [Savage 3D/MV] + 5333 8a21 86C390 Savage3D/MV + 8a22 Savage 4 + 105d 0018 SR9 8Mb SDRAM + 105d 002a SR9 Pro 16Mb SDRAM + 105d 003a SR9 Pro 32Mb SDRAM + 105d 092f SR9 Pro+ 16Mb SGRAM + 1092 4207 Stealth III S540 + 1092 4800 Stealth III S540 + 1092 4807 SpeedStar A90 + 1092 4808 Stealth III S540 + 1092 4809 Stealth III S540 + 1092 480e Stealth III S540 + 1092 4904 Stealth III S520 + 1092 4905 SpeedStar A200 + 1092 4a09 Stealth III S540 + 1092 4a0b Stealth III S540 Xtreme + 1092 4a0f Stealth III S540 + 1092 4e01 Stealth III S540 + 1102 101d 3d Blaster Savage 4 + 1102 101e 3d Blaster Savage 4 + 5333 8100 86C394-397 Savage4 SDRAM 100 + 5333 8110 86C394-397 Savage4 SDRAM 110 + 5333 8125 86C394-397 Savage4 SDRAM 125 + 5333 8143 86C394-397 Savage4 SDRAM 143 + 5333 8a22 86C394-397 Savage4 + 5333 8a2e 86C394-397 Savage4 32bit + 5333 9125 86C394-397 Savage4 SGRAM 125 + 5333 9143 86C394-397 Savage4 SGRAM 143 + 8a23 Savage 4 + 8a25 ProSavage PM133 + 8a26 ProSavage KM133 + 8c00 ViRGE/M3 + 8c01 ViRGE/MX + 8c02 ViRGE/MX+ + 8c03 ViRGE/MX+MV + 8c10 86C270-294 Savage/MX-MV + 8c11 82C270-294 Savage/MX + 8c12 86C270-294 Savage/IX-MV + 8c13 86C270-294 Savage/IX + 9102 86C410 Savage 2000 + 1092 5932 Viper II Z200 + 1092 5934 Viper II Z200 + 1092 5952 Viper II Z200 + 1092 5954 Viper II Z200 + 1092 5a35 Viper II Z200 + 1092 5a37 Viper II Z200 + 1092 5a55 Viper II Z200 + 1092 5a57 Viper II Z200 + ca00 SonicVibes +544c Teralogic Inc +5455 Technische University Berlin + 4458 S5933 +5519 Cnet Technologies, Inc. +5544 Dunord Technologies + 0001 I-30xx Scanner Interface +5555 Genroco, Inc + 0003 TURBOstor HFP-832 [HiPPI NIC] +5700 Netpower +6356 UltraStor +6374 c't Magazin für Computertechnik + 6773 GPPCI +6409 Logitec Corp. +6666 Decision Computer International Co. + 0001 PCCOM4 + 0002 PCCOM8 +7604 O.N. Electronic Co Ltd. +7bde MIDAC Corporation +7fed PowerTV +8008 Quancom Electronic GmbH + 0010 WDOG1 [PCI-Watchdog 1] + 0011 PWDOG2 [PCI-Watchdog 2] +8086 Intel Corp. + 0007 82379AB + 0039 21145 + 0122 82437FX + 0482 82375EB + 0483 82424ZX [Saturn] + 0484 82378IB [SIO ISA Bridge] + 0486 82430ZX [Aries] + 04a3 82434LX [Mercury/Neptune] + 04d0 82437FX [Triton FX] + 0600 RAID Controller + 0960 80960RP [i960 RP Microprocessor/Bridge] + 0964 80960RP [i960 RP Microprocessor/Bridge] + 1000 82542 Gigabit Ethernet Controller + 0e11 b0df NC1632 Gigabit Ethernet Adapter + 0e11 b0e0 NC1633 Gigabit Ethernet Adapter + 0e11 b123 NC1634 Gigabit Ethernet Adapter + 1014 0119 Netfinity Gigabit Ethernet SX Adapter + 1001 82543GC Gigabit Ethernet Controller + 1004 82543GC Gigabit Ethernet Controller + 1008 82544EI Gigabit Ethernet Controller + 1009 82544EI Gigabit Ethernet Controller + 100c 82544GC Gigabit Ethernet Controller + 100d 82544GC Gigabit Ethernet Controller + 1029 82559 Ethernet Controller + 1030 82559 InBusiness 10/100 + 1031 82801CAM (ICH3) Chipset Ethernet Controller + 1032 82801CAM (ICH3) Chipset Ethernet Controller + 1033 82801CAM (ICH3) Chipset Ethernet Controller + 1034 82801CAM (ICH3) Chipset Ethernet Controller + 1035 82801CAM (ICH3) Chipset Ethernet Controller + 1036 82801CAM (ICH3) Chipset Ethernet Controller + 1037 82801CAM (ICH3) Chipset Ethernet Controller + 1038 82801CAM (ICH3) Chipset Ethernet Controller + 1130 82815 815 Chipset Host Bridge and Memory Controller Hub + 1132 82815 CGC [Chipset Graphics Controller] + 1161 82806AA PCI64 Hub Advanced Programmable Interrupt Controller + 1209 82559ER + 1221 82092AA_0 + 1222 82092AA_1 + 1223 SAA7116 + 1225 82452KX/GX [Orion] + 1226 82596 PRO/10 PCI + 1227 82865 EtherExpress PRO/100A + 1228 82556 EtherExpress PRO/100 Smart + 1229 82557 [Ethernet Pro 100] + 0e11 b01e NC3120 + 0e11 b01f NC3122 + 0e11 b02f NC1120 + 0e11 b04a Netelligent 10/100TX NIC with Wake on LAN + 0e11 b0c6 Embedded NC3120 with Wake on LAN + 0e11 b0c7 Embedded NC3121 + 0e11 b0d7 NC3121 with Wake on LAN + 0e11 b0dd NC3131 (82558B) + 0e11 b0de NC3132 + 0e11 b0e1 NC3133 + 0e11 b144 NC3123 (82559) + 1014 005c 82558B Ethernet Pro 10/100 + 1014 105c Netfinity 10/100 + 1033 8000 PC-9821X-B06 + 1033 8016 PK-UG-X006 + 1033 801f PK-UG-X006 + 103c 10c0 Ethernet Pro 10/100TX + 103c 10c3 Ethernet Pro 10/100TX + 103c 1200 Ethernet Pro 10/100TX + 10c3 1100 SmartEther100 SC1100 + 1179 0002 PCI FastEther LAN on Docker + 1259 2560 AT-2560 100 + 1259 2561 AT-2560 100 FX Ethernet Adapter + 1266 0001 NE10/100 Adapter + 8086 0001 EtherExpress PRO/100B (TX) + 8086 0002 EtherExpress PRO/100B (T4) + 8086 0003 EtherExpress PRO/10+ + 8086 0004 EtherExpress PRO/100 WfM + 8086 0005 82557 10/100 + 8086 0006 82557 10/100 with Wake on LAN + 8086 0007 82558 10/100 Adapter + 8086 0008 82558 10/100 with Wake on LAN + 8086 0009 EtherExpress PRO/100+ + 8086 000a EtherExpress PRO/100+ Management Adapter + 8086 000b EtherExpress PRO/100+ + 8086 000c EtherExpress PRO/100+ Management Adapter + 8086 000d EtherExpress PRO/100+ Alert On LAN II* Adapter + 8086 000e EtherExpress PRO/100+ Management Adapter with Alert On LAN* + 8086 1009 EtherExpress PRO/100+ Server Adapter + 8086 100c EtherExpress PRO/100+ Server Adapter (PILA8470B) + 8086 10f0 EtherExpress PRO/100+ Dual Port Adapter + 8086 200d EtherExpress PRO/100 Cardbus + 8086 200e EtherExpress PRO/100 LAN+V90 Cardbus Modem + 8086 3000 82559 Fast Ethernet LAN on Motherboard + 8086 3001 82559 Fast Ethernet LOM with Basic Alert on LAN* + 8086 3002 82559 Fast Ethernet LOM with Alert on LAN II* + 122d 430FX - 82437FX TSC [Triton I] + 122e 82371FB PIIX ISA [Triton I] + 1230 82371FB PIIX IDE [Triton I] + 1231 DSVD Modem + 1234 430MX - 82371MX Mobile PCI I/O IDE Xcelerator (MPIIX) + 1235 430MX - 82437MX Mob. System Ctrlr (MTSC) & 82438MX Data Path (MTDP) + 1237 440FX - 82441FX PMC [Natoma] + 1239 82371FB + 123b 82380PB + 123c 82380AB + 123d 683053 Programmable Interrupt Device + 1240 752 AGP + 124b 82380FB + 1250 430HX - 82439HX TXC [Triton II] + 1360 82806AA PCI64 Hub PCI Bridge + 1361 82806AA PCI64 Hub Controller (HRes) + 1960 80960RP [i960RP Microprocessor] + 101e 0438 MegaRaid 438 + 101e 0466 MegaRaid 466 + 101e 0467 MegaRaid 467 + 101e 09a0 PowerEdge Expandable RAID Controller 2/SC + 1028 0467 PowerEdge Expandable RAID Controller 2/DC + 1028 1111 PowerEdge Expandable RAID Controller 2/SC + 103c 03a2 MegaRaid + 103c 10c6 MegaRaid 438 + 103c 10c7 MegaRaid T5 + 103c 10cc MegaRaid + 1111 1111 MegaRaid 466 + 113c 03a2 MegaRaid + 1a21 82840 840 (Carmel) Chipset Host Bridge (Hub A) + 1a23 82840 840 (Carmel) Chipset AGP Bridge + 1a24 82840 840 (Carmel) Chipset PCI Bridge (Hub B) + 1a30 82845 845 (Brookdale) Chipset Host Bridge + 1a31 82845 845 (Brookdale) Chipset AGP Bridge + 2410 82801AA ISA Bridge (LPC) + 2411 82801AA IDE + 2412 82801AA USB + 2413 82801AA SMBus + 2415 82801AA AC'97 Audio + 11d4 0040 SoundMAX Integrated Digital Audio + 11d4 0048 SoundMAX Integrated Digital Audio + 11d4 5340 SoundMAX Integrated Digital Audio + 2416 82801AA AC'97 Modem + 2418 82801AA PCI Bridge + 2420 82801AB ISA Bridge (LPC) + 2421 82801AB IDE + 2422 82801AB USB + 2423 82801AB SMBus + 2425 82801AB AC'97 Audio + 11d4 0040 SoundMAX Integrated Digital Audio + 11d4 0048 SoundMAX Integrated Digital Audio + 2426 82801AB AC'97 Modem + 2428 82801AB PCI Bridge + 2440 82820 820 (Camino 2) Chipset ISA Bridge (ICH2) + 2442 82820 820 (Camino 2) Chipset USB (Hub A) + 2443 82820 820 (Camino 2) Chipset SMBus + 2444 82820 820 (Camino 2) Chipset USB (Hub B) + 2445 82820 820 (Camino 2) Chipset AC'97 Audio Controller + 2446 82820 820 (Camino 2) Chipset AC'97 Modem Controller + 2448 82820 820 (Camino 2) Chipset PCI (-M) + 2449 82820 (ICH2) Chipset Ethernet Controller + 244a 82820 820 (Camino 2) Chipset IDE U100 (-M) + 244b 82820 820 (Camino 2) Chipset IDE U100 + 244c 82820 820 (Camino 2) Chipset ISA Bridge (ICH2-M) + 244e 82820 820 (Camino 2) Chipset PCI + 2485 AC'97 Audio Controller + 2500 82820 820 (Camino) Chipset Host Bridge (MCH) + 1043 801c P3C-2000 system chipset + 2501 82820 820 (Camino) Chipset Host Bridge (MCH) + 1043 801c P3C-2000 system chipset + 250b 82820 820 (Camino) Chipset Host Bridge + 250f 82820 820 (Camino) Chipset PCI to AGP Bridge + 2520 82805AA MTH Memory Translator Hub + 2521 82804AA MRH-S Memory Repeater Hub for SDRAM + 2530 82850 850 (Tehama) Chipset Host Bridge (MCH) + 2531 82850 860 (Wombat) Chipset Host Bridge (MCH) + 2532 82850 850 (Tehama) Chipset AGP Bridge + 2533 82860 860 (Wombat) Chipset AGP Bridge + 3092 Integrated RAID + 5200 EtherExpress PRO/100 Intelligent Server + 5201 EtherExpress PRO/100 Intelligent Server + 8086 0001 EtherExpress PRO/100 Server Ethernet Adapter + 530d 80310 IOP [IO Processor] + 7000 82371SB PIIX3 ISA [Natoma/Triton II] + 7010 82371SB PIIX3 IDE [Natoma/Triton II] + 7020 82371SB PIIX3 USB [Natoma/Triton II] + 7030 430VX - 82437VX TVX [Triton VX] + 7100 430TX - 82439TX MTXC + 7110 82371AB PIIX4 ISA + 7111 82371AB PIIX4 IDE + 7112 82371AB PIIX4 USB + 7113 82371AB PIIX4 ACPI + 7120 82810 GMCH [Graphics Memory Controller Hub] + 7121 82810 CGC [Chipset Graphics Controller] + 7122 82810-DC100 GMCH [Graphics Memory Controller Hub] + 7123 82810-DC100 CGC [Chipset Graphics Controller] + 7124 82810E GMCH [Graphics Memory Controller Hub] + 7125 82810E CGC [Chipset Graphics Controller] + 7126 82810 810 Chipset Host Bridge and Memory Controller Hub + 7180 440LX/EX - 82443LX/EX Host bridge + 7181 440LX/EX - 82443LX/EX AGP bridge + 7190 440BX/ZX - 82443BX/ZX Host bridge + 0e11 0500 Armada 1750 Laptop System Chipset + 7191 440BX/ZX - 82443BX/ZX AGP bridge + 7192 440BX/ZX - 82443BX/ZX Host bridge (AGP disabled) + 0e11 0460 Armada 1700 Laptop System Chipset + 7194 82440MX I/O Controller + 7195 82440MX AC'97 Audio Controller + 10cf 1099 QSound_SigmaTel Stac97 PCI Audio + 11d4 0040 SoundMAX Integrated Digital Audio + 11d4 0048 SoundMAX Integrated Digital Audio + 7198 82440MX PCI to ISA Bridge + 7199 82440MX EIDE Controller + 719a 82440MX USB Universal Host Controller + 719b 82440MX Power Management Controller + 71a0 440GX - 82443GX Host bridge + 71a1 440GX - 82443GX AGP bridge + 71a2 440GX - 82443GX Host bridge (AGP disabled) + 7600 82372FB PCI to ISA Bridge + 7601 82372FB PIIX4 IDE + 7602 82372FB [PCI-to-USB UHCI] + 7603 82372FB System Management Bus Controller + 7800 i740 + 1092 0100 Stealth II G460 + 8086 0100 Intel740 Graphics Accelerator + 84c4 450KX/GX [Orion] - 82454KX/GX PCI bridge + 84c5 450KX/GX [Orion] - 82453KX/GX Memory controller + 84ca 450NX - 82451NX Memory & I/O Controller + 84cb 450NX - 82454NX/84460GX PCI Expander Bridge + 84e0 460GX - 84460GX System Address Controller (SAC) + 84e1 460GX - 84460GX System Data Controller (SDC) + 84e2 460GX - 84460GX AGP Bridge (GXB) + 84e3 460GX - 84460GX Memory Address Controller (MAC) + 84e4 460GX - 84460GX Memory Data Controller (MDC) + 9621 Integrated RAID + 9622 Integrated RAID + 9641 Integrated RAID + 96a1 Integrated RAID + ffff 450NX/GX [Orion] - 82453KX/GX Memory controller [BUG] +8800 Trigem Computer Inc. + 2008 Video assistent component +8866 T-Square Design Inc. +8888 Silicon Magic +8e0e Computone Corporation +8e2e KTI + 3000 ET32P2 +9004 Adaptec + 1078 AIC-7810 + 1160 AIC-1160 [Family Fibre Channel Adapter] + 2178 AIC-7821 + 3860 AHA-2930CU + 3b78 AHA-4844W/4844UW + 5075 AIC-755x + 5078 AHA-7850 + 9004 7850 AHA-2904/Integrated AIC-7850 + 5175 AIC-755x + 5178 AIC-7851 + 5275 AIC-755x + 5278 AIC-7852 + 5375 AIC-755x + 5378 AIC-7850 + 5475 AIC-2930 + 5478 AIC-7850 + 5575 AVA-2930 + 5578 AIC-7855 + 5675 AIC-755x + 5678 AIC-7850 + 5775 AIC-755x + 5778 AIC-7850 + 5800 AIC-5800 + 5900 ANA-5910/5930/5940 ATM155 & 25 LAN Adapter + 5905 ANA-5910A/5930A/5940A ATM Adapter + 6038 AIC-3860 + 6075 AIC-1480 / APA-1480 + 6078 AIC-7860 + 6178 AIC-7861 + 9004 7861 AHA-2940AU Single + 6278 AIC-7860 + 6378 AIC-7860 + 6478 AIC-786 + 6578 AIC-786x + 6678 AIC-786 + 6778 AIC-786x + 6915 ANA620xx/ANA69011A + 9004 0008 ANA69011A/TX 10/100 + 9004 0009 ANA69011A/TX 10/100 + 9004 0010 ANA62022 2-port 10/100 + 9004 0018 ANA62044 4-port 10/100 + 9004 0020 ANA62022 2-port 10/100 + 9004 0028 ANA69011A/TX 10/100 + 9004 8008 ANA69011A/TX 64 bit 10/100 + 9004 8009 ANA69011A/TX 64 bit 10/100 + 9004 8010 ANA62022 2-port 64 bit 10/100 + 9004 8018 ANA62044 4-port 64 bit 10/100 + 9004 8020 ANA62022 2-port 64 bit 10/100 + 9004 8028 ANA69011A/TX 64 bit 10/100 + 7078 AHA-294x / AIC-7870 + 7178 AHA-294x / AIC-7871 + 7278 AHA-3940 / AIC-7872 + 7378 AHA-3985 / AIC-7873 + 7478 AHA-2944 / AIC-7874 +# DJ: Where did the 3rd number come from? + 7578 AHA-3944 / AHA-3944W / 7875 +# DJ: Where did the 3rd number come from? + 7678 AHA-4944W/UW / 7876 + 7778 AIC-787x + 7810 AIC-7810 + 7815 AIC-7815 RAID+Memory Controller IC + 9004 7815 ARO-1130U2 RAID Controller + 9004 7840 AIC-7815 RAID+Memory Controller IC + 7850 AIC-7850 + 7855 AHA-2930 + 7860 AIC-7860 + 7870 AIC-7870 + 7871 AHA-2940 + 7872 AHA-3940 + 7873 AHA-3980 + 7874 AHA-2944 + 7880 AIC-7880P + 7890 AIC-7890 + 7891 AIC-789x + 7892 AIC-789x + 7893 AIC-789x + 7894 AIC-789x + 7895 AHA-2940U/UW / AHA-39xx / AIC-7895 + 9004 7895 AHA-2940U/2940UW Dual AHA-394xAU/AUW/AUWD AIC-7895B + 7896 AIC-789x + 7897 AIC-789x + 8078 AIC-7880U + 9004 7880 AIC-7880P Ultra/Ultra Wide SCSI Chipset + 8178 AIC-7881U + 9004 7881 AHA-2940UW SCSI Host Adapter + 8278 AHA-3940U/UW / AIC-7882U + 8378 AHA-3940U/UW / AIC-7883U + 8478 AHA-294x / AIC-7884U + 8578 AHA-3944U / AHA-3944UWD / 7885 + 8678 AHA-4944UW / 7886 + 8778 AIC-788x + 9004 7887 2940UW Pro Ultra-Wide SCSI Controller + 8878 7888 + 8b78 ABA-1030 + ec78 AHA-4944W/UW +9005 Adaptec + 0010 AHA-2940U2/W + 0011 2930U2 + 0013 78902 + 9005 0003 AAA-131U2 Array1000 1 Channel RAID Controller + 001f AHA-2940U2/W / 7890 + 9005 000f 2940U2W SCSI Controller + 9005 a180 2940U2W SCSI Controller + 0020 AIC-7890 + 002f AIC-7890 + 0030 AIC-7890 + 003f AIC-7890 + 0050 3940U2 + 0051 3950U2D + 0053 AIC-7896 SCSI Controller + 9005 ffff AIC-7896 SCSI Controller mainboard implementation + 005f 7896 + 0080 7892A + 0081 7892B + 0083 7892D + 008f 7892P + 00c0 7899A + 00c1 7899B + 00c3 7899D + 00c5 RAID subsystem HBA + 00cf 7899P +907f Atronics + 2015 IDE-2015PL +919a Gigapixel Corp +9412 Holtek + 6565 6565 +9699 Omni Media Technology Inc + 6565 6565 +a0a0 AOPEN Inc. +a0f1 UNISYS Corporation +a200 NEC Corporation +a259 Hewlett Packard +a25b Hewlett Packard GmbH PL24-MKT +a304 Sony +a727 3Com Corporation +aa42 Scitex Digital Video +ac1e Digital Receiver Technology Inc +b1b3 Shiva Europe Limited +c001 TSI Telsys +c0a9 Micron/Crucial Technology +c0de Motorola +c0fe Motion Engineering, Inc. +ca50 Varian Australia Pty Ltd +cafe Chrysalis-ITS +cccc Catapult Communications +d4d4 Dy4 Systems Inc + 0601 PCI Mezzanine Card +d84d Exsys +e000 Winbond + e000 W89C940 +e159 Tiger Jet Network Inc. + 0001 Model 300 128k + 0059 0001 128k ISDN-S/T Adapter + 0059 0003 128k ISDN-U Adapter +e4bf EKF Elektronik GmbH +ea01 Eagle Technology +eabb Aashima Technology B.V. +ecc0 Echo Corporation +edd8 ARK Logic Inc + a091 1000PV [Stingray] + a099 2000PV [Stingray] + a0a1 2000MT + a0a9 2000MI +fa57 Fast Search & Transfer ASA +feda Epigram Inc +fffe VMWare Inc + 0710 Virtual SVGA +ffff Illegal Vendor ID + + +# List of known device classes, subclasses and programming interfaces + +# Syntax: +# C class class_name +# subclass subclass_name <-- single tab +# prog-if prog-if_name <-- two tabs + +C 00 Unclassified device + 00 Non-VGA unclassified device + 01 VGA compatible unclassified device +C 01 Mass storage controller + 00 SCSI storage controller + 01 IDE interface + 02 Floppy disk controller + 03 IPI bus controller + 04 RAID bus controller + 80 Unknown mass storage controller +C 02 Network controller + 00 Ethernet controller + 01 Token ring network controller + 02 FDDI network controller + 03 ATM network controller + 04 ISDN controller + 80 Network controller +C 03 Display controller + 00 VGA compatible controller + 00 VGA + 01 8514 + 01 XGA compatible controller + 02 3D controller + 80 Display controller +C 04 Multimedia controller + 00 Multimedia video controller + 01 Multimedia audio controller + 02 Computer telephony device + 80 Multimedia controller +C 05 Memory controller + 00 RAM memory + 01 FLASH memory + 80 Memory controller +C 06 Bridge + 00 Host bridge + 01 ISA bridge + 02 EISA bridge + 03 MicroChannel bridge + 04 PCI bridge + 00 Normal decode + 01 Subtractive decode + 05 PCMCIA bridge + 06 NuBus bridge + 07 CardBus bridge + 08 RACEway bridge + 00 Transparent mode + 01 Endpoint mode + 09 Semi-transparent PCI-to-PCI bridge + 40 Primary bus towards host CPU + 80 Secondary bus towards host CPU + 0a InfiniBand to PCI host bridge + 80 Bridge +C 07 Communication controller + 00 Serial controller + 00 8250 + 01 16450 + 02 16550 + 03 16650 + 04 16750 + 05 16850 + 06 16950 + 01 Parallel controller + 00 SPP + 01 BiDir + 02 ECP + 03 IEEE1284 + fe IEEE1284 Target + 02 Multiport serial controller + 03 Modem + 00 Generic + 01 Hayes/16450 + 02 Hayes/16550 + 03 Hayes/16650 + 04 Hayes/16750 + 80 Communication controller +C 08 Generic system peripheral + 00 PIC + 00 8259 + 01 ISA PIC + 02 EISA PIC + 10 IO-APIC + 20 IO(X)-APIC + 01 DMA controller + 00 8237 + 01 ISA DMA + 02 EISA DMA + 02 Timer + 00 8254 + 01 ISA Timer + 02 EISA Timers + 03 RTC + 00 Generic + 01 ISA RTC + 04 PCI Hot-plug controller + 80 System peripheral +C 09 Input device controller + 00 Keyboard controller + 01 Digitizer Pen + 02 Mouse controller + 03 Scanner controller + 04 Gameport controller + 00 Generic + 10 Extended + 80 Input device controller +C 0a Docking station + 00 Generic Docking Station + 80 Docking Station +C 0b Processor + 00 386 + 01 486 + 02 Pentium + 10 Alpha + 20 Power PC + 30 MIPS + 40 Co-processor +C 0c Serial bus controller + 00 FireWire (IEEE 1394) + 00 Generic + 10 OHCI + 01 ACCESS Bus + 02 SSA + 03 USB Controller + 00 UHCI + 10 OHCI + 80 Unspecified + fe USB Device + 04 Fibre Channel + 05 SMBus + 06 InfiniBand +C 0d Wireless controller + 00 IRDA controller + 01 Consumer IR controller + 10 RF controller + 80 Wireless controller +C 0e Intelligent controller + 00 I2O +C 0f Satellite communications controller + 00 Satellite TV controller + 01 Satellite audio communication controller + 03 Satellite voice communication controller + 04 Satellite data communication controller +C 10 Encryption controller + 00 Network and computing encryption device + 10 Entertainment encryption device + 80 Encryption controller +C 11 Signal processing controller + 00 DPIO module + 01 Performance counters + 10 Communication synchronizer + 80 Signal processing controller diff --git a/xen-2.4.16/drivers/pci/proc.c b/xen-2.4.16/drivers/pci/proc.c new file mode 100644 index 0000000000..3516d445e3 --- /dev/null +++ b/xen-2.4.16/drivers/pci/proc.c @@ -0,0 +1,572 @@ +/* + * $Id: proc.c,v 1.13 1998/05/12 07:36:07 mj Exp $ + * + * Procfs interface for the PCI bus. + * + * Copyright (c) 1997--1999 Martin Mares + */ + +#include +#include +#include +#include +#include +#include + +#include +#include + +#define PCI_CFG_SPACE_SIZE 256 + +static loff_t +proc_bus_pci_lseek(struct file *file, loff_t off, int whence) +{ + loff_t new; + + switch (whence) { + case 0: + new = off; + break; + case 1: + new = file->f_pos + off; + break; + case 2: + new = PCI_CFG_SPACE_SIZE + off; + break; + default: + return -EINVAL; + } + if (new < 0 || new > PCI_CFG_SPACE_SIZE) + return -EINVAL; + return (file->f_pos = new); +} + +static ssize_t +proc_bus_pci_read(struct file *file, char *buf, size_t nbytes, loff_t *ppos) +{ + const struct inode *ino = file->f_dentry->d_inode; + const struct proc_dir_entry *dp = ino->u.generic_ip; + struct pci_dev *dev = dp->data; + unsigned int pos = *ppos; + unsigned int cnt, size; + + /* + * Normal users can read only the standardized portion of the + * configuration space as several chips lock up when trying to read + * undefined locations (think of Intel PIIX4 as a typical example). + */ + + if (capable(CAP_SYS_ADMIN)) + size = PCI_CFG_SPACE_SIZE; + else if (dev->hdr_type == PCI_HEADER_TYPE_CARDBUS) + size = 128; + else + size = 64; + + if (pos >= size) + return 0; + if (nbytes >= size) + nbytes = size; + if (pos + nbytes > size) + nbytes = size - pos; + cnt = nbytes; + + if (!access_ok(VERIFY_WRITE, buf, cnt)) + return -EINVAL; + + if ((pos & 1) && cnt) { + unsigned char val; + pci_read_config_byte(dev, pos, &val); + __put_user(val, buf); + buf++; + pos++; + cnt--; + } + + if ((pos & 3) && cnt > 2) { + unsigned short val; + pci_read_config_word(dev, pos, &val); + __put_user(cpu_to_le16(val), (unsigned short *) buf); + buf += 2; + pos += 2; + cnt -= 2; + } + + while (cnt >= 4) { + unsigned int val; + pci_read_config_dword(dev, pos, &val); + __put_user(cpu_to_le32(val), (unsigned int *) buf); + buf += 4; + pos += 4; + cnt -= 4; + } + + if (cnt >= 2) { + unsigned short val; + pci_read_config_word(dev, pos, &val); + __put_user(cpu_to_le16(val), (unsigned short *) buf); + buf += 2; + pos += 2; + cnt -= 2; + } + + if (cnt) { + unsigned char val; + pci_read_config_byte(dev, pos, &val); + __put_user(val, buf); + buf++; + pos++; + cnt--; + } + + *ppos = pos; + return nbytes; +} + +static ssize_t +proc_bus_pci_write(struct file *file, const char *buf, size_t nbytes, loff_t *ppos) +{ + const struct inode *ino = file->f_dentry->d_inode; + const struct proc_dir_entry *dp = ino->u.generic_ip; + struct pci_dev *dev = dp->data; + int pos = *ppos; + int cnt; + + if (pos >= PCI_CFG_SPACE_SIZE) + return 0; + if (nbytes >= PCI_CFG_SPACE_SIZE) + nbytes = PCI_CFG_SPACE_SIZE; + if (pos + nbytes > PCI_CFG_SPACE_SIZE) + nbytes = PCI_CFG_SPACE_SIZE - pos; + cnt = nbytes; + + if (!access_ok(VERIFY_READ, buf, cnt)) + return -EINVAL; + + if ((pos & 1) && cnt) { + unsigned char val; + __get_user(val, buf); + pci_write_config_byte(dev, pos, val); + buf++; + pos++; + cnt--; + } + + if ((pos & 3) && cnt > 2) { + unsigned short val; + __get_user(val, (unsigned short *) buf); + pci_write_config_word(dev, pos, le16_to_cpu(val)); + buf += 2; + pos += 2; + cnt -= 2; + } + + while (cnt >= 4) { + unsigned int val; + __get_user(val, (unsigned int *) buf); + pci_write_config_dword(dev, pos, le32_to_cpu(val)); + buf += 4; + pos += 4; + cnt -= 4; + } + + if (cnt >= 2) { + unsigned short val; + __get_user(val, (unsigned short *) buf); + pci_write_config_word(dev, pos, le16_to_cpu(val)); + buf += 2; + pos += 2; + cnt -= 2; + } + + if (cnt) { + unsigned char val; + __get_user(val, buf); + pci_write_config_byte(dev, pos, val); + buf++; + pos++; + cnt--; + } + + *ppos = pos; + return nbytes; +} + +struct pci_filp_private { + enum pci_mmap_state mmap_state; + int write_combine; +}; + +static int proc_bus_pci_ioctl(struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg) +{ + const struct proc_dir_entry *dp = inode->u.generic_ip; + struct pci_dev *dev = dp->data; +#ifdef HAVE_PCI_MMAP + struct pci_filp_private *fpriv = file->private_data; +#endif /* HAVE_PCI_MMAP */ + int ret = 0; + + switch (cmd) { + case PCIIOC_CONTROLLER: + ret = pci_controller_num(dev); + break; + +#ifdef HAVE_PCI_MMAP + case PCIIOC_MMAP_IS_IO: + fpriv->mmap_state = pci_mmap_io; + break; + + case PCIIOC_MMAP_IS_MEM: + fpriv->mmap_state = pci_mmap_mem; + break; + + case PCIIOC_WRITE_COMBINE: + if (arg) + fpriv->write_combine = 1; + else + fpriv->write_combine = 0; + break; + +#endif /* HAVE_PCI_MMAP */ + + default: + ret = -EINVAL; + break; + }; + + return ret; +} + +#ifdef HAVE_PCI_MMAP +static int proc_bus_pci_mmap(struct file *file, struct vm_area_struct *vma) +{ + struct inode *inode = file->f_dentry->d_inode; + const struct proc_dir_entry *dp = inode->u.generic_ip; + struct pci_dev *dev = dp->data; + struct pci_filp_private *fpriv = file->private_data; + int ret; + + if (!capable(CAP_SYS_RAWIO)) + return -EPERM; + + ret = pci_mmap_page_range(dev, vma, + fpriv->mmap_state, + fpriv->write_combine); + if (ret < 0) + return ret; + + return 0; +} + +static int proc_bus_pci_open(struct inode *inode, struct file *file) +{ + struct pci_filp_private *fpriv = kmalloc(sizeof(*fpriv), GFP_KERNEL); + + if (!fpriv) + return -ENOMEM; + + fpriv->mmap_state = pci_mmap_io; + fpriv->write_combine = 0; + + file->private_data = fpriv; + + return 0; +} + +static int proc_bus_pci_release(struct inode *inode, struct file *file) +{ + kfree(file->private_data); + file->private_data = NULL; + + return 0; +} +#endif /* HAVE_PCI_MMAP */ + +static struct file_operations proc_bus_pci_operations = { + llseek: proc_bus_pci_lseek, + read: proc_bus_pci_read, + write: proc_bus_pci_write, + ioctl: proc_bus_pci_ioctl, +#ifdef HAVE_PCI_MMAP + open: proc_bus_pci_open, + release: proc_bus_pci_release, + mmap: proc_bus_pci_mmap, +#ifdef HAVE_ARCH_PCI_GET_UNMAPPED_AREA + get_unmapped_area: get_pci_unmapped_area, +#endif /* HAVE_ARCH_PCI_GET_UNMAPPED_AREA */ +#endif /* HAVE_PCI_MMAP */ +}; + +#if BITS_PER_LONG == 32 +#define LONG_FORMAT "\t%08lx" +#else +#define LONG_FORMAT "\t%16lx" +#endif + +/* iterator */ +static void *pci_seq_start(struct seq_file *m, loff_t *pos) +{ + struct list_head *p = &pci_devices; + loff_t n = *pos; + + /* XXX: surely we need some locking for traversing the list? */ + while (n--) { + p = p->next; + if (p == &pci_devices) + return NULL; + } + return p; +} +static void *pci_seq_next(struct seq_file *m, void *v, loff_t *pos) +{ + struct list_head *p = v; + (*pos)++; + return p->next != &pci_devices ? p->next : NULL; +} +static void pci_seq_stop(struct seq_file *m, void *v) +{ + /* release whatever locks we need */ +} + +static int show_device(struct seq_file *m, void *v) +{ + struct list_head *p = v; + const struct pci_dev *dev; + const struct pci_driver *drv; + int i; + + if (p == &pci_devices) + return 0; + + dev = pci_dev_g(p); + drv = pci_dev_driver(dev); + seq_printf(m, "%02x%02x\t%04x%04x\t%x", + dev->bus->number, + dev->devfn, + dev->vendor, + dev->device, + dev->irq); + /* Here should be 7 and not PCI_NUM_RESOURCES as we need to preserve compatibility */ + for(i=0; i<7; i++) + seq_printf(m, LONG_FORMAT, + dev->resource[i].start | + (dev->resource[i].flags & PCI_REGION_FLAG_MASK)); + for(i=0; i<7; i++) + seq_printf(m, LONG_FORMAT, + dev->resource[i].start < dev->resource[i].end ? + dev->resource[i].end - dev->resource[i].start + 1 : 0); + seq_putc(m, '\t'); + if (drv) + seq_printf(m, "%s", drv->name); + seq_putc(m, '\n'); + return 0; +} + +static struct seq_operations proc_bus_pci_devices_op = { + start: pci_seq_start, + next: pci_seq_next, + stop: pci_seq_stop, + show: show_device +}; + +static struct proc_dir_entry *proc_bus_pci_dir; + +int pci_proc_attach_device(struct pci_dev *dev) +{ + struct pci_bus *bus = dev->bus; + struct proc_dir_entry *de, *e; + char name[16]; + + if (!(de = bus->procdir)) { + sprintf(name, "%02x", bus->number); + de = bus->procdir = proc_mkdir(name, proc_bus_pci_dir); + if (!de) + return -ENOMEM; + } + sprintf(name, "%02x.%x", PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn)); + e = dev->procent = create_proc_entry(name, S_IFREG | S_IRUGO | S_IWUSR, de); + if (!e) + return -ENOMEM; + e->proc_fops = &proc_bus_pci_operations; + e->data = dev; + e->size = PCI_CFG_SPACE_SIZE; + return 0; +} + +int pci_proc_detach_device(struct pci_dev *dev) +{ + struct proc_dir_entry *e; + + if ((e = dev->procent)) { + if (atomic_read(&e->count)) + return -EBUSY; + remove_proc_entry(e->name, dev->bus->procdir); + dev->procent = NULL; + } + return 0; +} + +int pci_proc_attach_bus(struct pci_bus* bus) +{ + struct proc_dir_entry *de = bus->procdir; + + if (!de) { + char name[16]; + sprintf(name, "%02x", bus->number); + de = bus->procdir = proc_mkdir(name, proc_bus_pci_dir); + if (!de) + return -ENOMEM; + } + return 0; +} + +int pci_proc_detach_bus(struct pci_bus* bus) +{ + struct proc_dir_entry *de = bus->procdir; + if (de) + remove_proc_entry(de->name, proc_bus_pci_dir); + return 0; +} + + +/* + * Backward compatible /proc/pci interface. + */ + +/* + * Convert some of the configuration space registers of the device at + * address (bus,devfn) into a string (possibly several lines each). + * The configuration string is stored starting at buf[len]. If the + * string would exceed the size of the buffer (SIZE), 0 is returned. + */ +static int show_dev_config(struct seq_file *m, void *v) +{ + struct list_head *p = v; + struct pci_dev *dev; + struct pci_driver *drv; + u32 class_rev; + unsigned char latency, min_gnt, max_lat, *class; + int reg; + + if (p == &pci_devices) { + seq_puts(m, "PCI devices found:\n"); + return 0; + } + + dev = pci_dev_g(p); + drv = pci_dev_driver(dev); + + pci_read_config_dword(dev, PCI_CLASS_REVISION, &class_rev); + pci_read_config_byte (dev, PCI_LATENCY_TIMER, &latency); + pci_read_config_byte (dev, PCI_MIN_GNT, &min_gnt); + pci_read_config_byte (dev, PCI_MAX_LAT, &max_lat); + seq_printf(m, " Bus %2d, device %3d, function %2d:\n", + dev->bus->number, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn)); + class = pci_class_name(class_rev >> 16); + if (class) + seq_printf(m, " %s", class); + else + seq_printf(m, " Class %04x", class_rev >> 16); + seq_printf(m, ": %s (rev %d).\n", dev->name, class_rev & 0xff); + + if (dev->irq) + seq_printf(m, " IRQ %d.\n", dev->irq); + + if (latency || min_gnt || max_lat) { + seq_printf(m, " Master Capable. "); + if (latency) + seq_printf(m, "Latency=%d. ", latency); + else + seq_puts(m, "No bursts. "); + if (min_gnt) + seq_printf(m, "Min Gnt=%d.", min_gnt); + if (max_lat) + seq_printf(m, "Max Lat=%d.", max_lat); + seq_putc(m, '\n'); + } + + for (reg = 0; reg < 6; reg++) { + struct resource *res = dev->resource + reg; + unsigned long base, end, flags; + + base = res->start; + end = res->end; + flags = res->flags; + if (!end) + continue; + + if (flags & PCI_BASE_ADDRESS_SPACE_IO) { + seq_printf(m, " I/O at 0x%lx [0x%lx].\n", + base, end); + } else { + const char *pref, *type = "unknown"; + + if (flags & PCI_BASE_ADDRESS_MEM_PREFETCH) + pref = "P"; + else + pref = "Non-p"; + switch (flags & PCI_BASE_ADDRESS_MEM_TYPE_MASK) { + case PCI_BASE_ADDRESS_MEM_TYPE_32: + type = "32 bit"; break; + case PCI_BASE_ADDRESS_MEM_TYPE_1M: + type = "20 bit"; break; + case PCI_BASE_ADDRESS_MEM_TYPE_64: + type = "64 bit"; break; + } + seq_printf(m, " %srefetchable %s memory at " + "0x%lx [0x%lx].\n", pref, type, + base, + end); + } + } + return 0; +} + +static struct seq_operations proc_pci_op = { + start: pci_seq_start, + next: pci_seq_next, + stop: pci_seq_stop, + show: show_dev_config +}; + +static int proc_bus_pci_dev_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &proc_bus_pci_devices_op); +} +static struct file_operations proc_bus_pci_dev_operations = { + open: proc_bus_pci_dev_open, + read: seq_read, + llseek: seq_lseek, + release: seq_release, +}; +static int proc_pci_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &proc_pci_op); +} +static struct file_operations proc_pci_operations = { + open: proc_pci_open, + read: seq_read, + llseek: seq_lseek, + release: seq_release, +}; + +static int __init pci_proc_init(void) +{ + if (pci_present()) { + struct proc_dir_entry *entry; + struct pci_dev *dev; + proc_bus_pci_dir = proc_mkdir("pci", proc_bus); + entry = create_proc_entry("devices", 0, proc_bus_pci_dir); + if (entry) + entry->proc_fops = &proc_bus_pci_dev_operations; + pci_for_each_dev(dev) { + pci_proc_attach_device(dev); + } + entry = create_proc_entry("pci", 0, NULL); + if (entry) + entry->proc_fops = &proc_pci_operations; + } + return 0; +} + +__initcall(pci_proc_init); diff --git a/xen-2.4.16/drivers/pci/quirks.c b/xen-2.4.16/drivers/pci/quirks.c new file mode 100644 index 0000000000..5c06bb55a2 --- /dev/null +++ b/xen-2.4.16/drivers/pci/quirks.c @@ -0,0 +1,534 @@ +/* + * $Id: quirks.c,v 1.5 1998/05/02 19:24:14 mj Exp $ + * + * This file contains work-arounds for many known PCI hardware + * bugs. Devices present only on certain architectures (host + * bridges et cetera) should be handled in arch-specific code. + * + * Copyright (c) 1999 Martin Mares + * + * The bridge optimization stuff has been removed. If you really + * have a silly BIOS which is unable to set your host bridge right, + * use the PowerTweak utility (see http://powertweak.sourceforge.net). + */ + +#include +#include +//#include +#include +#include +#include + +#undef DEBUG + +/* Deal with broken BIOS'es that neglect to enable passive release, + which can cause problems in combination with the 82441FX/PPro MTRRs */ +static void __init quirk_passive_release(struct pci_dev *dev) +{ + struct pci_dev *d = NULL; + unsigned char dlc; + + /* We have to make sure a particular bit is set in the PIIX3 + ISA bridge, so we have to go out and find it. */ + while ((d = pci_find_device(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82371SB_0, d))) { + pci_read_config_byte(d, 0x82, &dlc); + if (!(dlc & 1<<1)) { + printk(KERN_ERR "PCI: PIIX3: Enabling Passive Release on %s\n", d->slot_name); + dlc |= 1<<1; + pci_write_config_byte(d, 0x82, dlc); + } + } +} + +/* The VIA VP2/VP3/MVP3 seem to have some 'features'. There may be a workaround + but VIA don't answer queries. If you happen to have good contacts at VIA + ask them for me please -- Alan + + This appears to be BIOS not version dependent. So presumably there is a + chipset level fix */ + + +int isa_dma_bridge_buggy; /* Exported */ + +static void __init quirk_isa_dma_hangs(struct pci_dev *dev) +{ + if (!isa_dma_bridge_buggy) { + isa_dma_bridge_buggy=1; + printk(KERN_INFO "Activating ISA DMA hang workarounds.\n"); + } +} + +int pci_pci_problems; + +/* + * Chipsets where PCI->PCI transfers vanish or hang + */ + +static void __init quirk_nopcipci(struct pci_dev *dev) +{ + if((pci_pci_problems&PCIPCI_FAIL)==0) + { + printk(KERN_INFO "Disabling direct PCI/PCI transfers.\n"); + pci_pci_problems|=PCIPCI_FAIL; + } +} + +/* + * Triton requires workarounds to be used by the drivers + */ + +static void __init quirk_triton(struct pci_dev *dev) +{ + if((pci_pci_problems&PCIPCI_TRITON)==0) + { + printk(KERN_INFO "Limiting direct PCI/PCI transfers.\n"); + pci_pci_problems|=PCIPCI_TRITON; + } +} + +/* + * VIA Apollo KT133 needs PCI latency patch + * Made according to a windows driver based patch by George E. Breese + * see PCI Latency Adjust on http://www.viahardware.com/download/viatweak.shtm + * Also see http://home.tiscalinet.de/au-ja/review-kt133a-1-en.html for + * the info on which Mr Breese based his work. + * + * Updated based on further information from the site and also on + * information provided by VIA + */ +static void __init quirk_vialatency(struct pci_dev *dev) +{ + struct pci_dev *p; + u8 rev; + u8 busarb; + /* Ok we have a potential problem chipset here. Now see if we have + a buggy southbridge */ + + p=pci_find_device(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C686, NULL); + if(p!=NULL) + { + pci_read_config_byte(p, PCI_CLASS_REVISION, &rev); + /* 0x40 - 0x4f == 686B, 0x10 - 0x2f == 686A; thanks Dan Hollis */ + /* Check for buggy part revisions */ + if (rev < 0x40 || rev > 0x42) + return; + } + else + { + p = pci_find_device(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8231, NULL); + if(p==NULL) /* No problem parts */ + return; + pci_read_config_byte(p, PCI_CLASS_REVISION, &rev); + /* Check for buggy part revisions */ + if (rev < 0x10 || rev > 0x12) + return; + } + + /* + * Ok we have the problem. Now set the PCI master grant to + * occur every master grant. The apparent bug is that under high + * PCI load (quite common in Linux of course) you can get data + * loss when the CPU is held off the bus for 3 bus master requests + * This happens to include the IDE controllers.... + * + * VIA only apply this fix when an SB Live! is present but under + * both Linux and Windows this isnt enough, and we have seen + * corruption without SB Live! but with things like 3 UDMA IDE + * controllers. So we ignore that bit of the VIA recommendation.. + */ + + pci_read_config_byte(dev, 0x76, &busarb); + /* Set bit 4 and bi 5 of byte 76 to 0x01 + "Master priority rotation on every PCI master grant */ + busarb &= ~(1<<5); + busarb |= (1<<4); + pci_write_config_byte(dev, 0x76, busarb); + printk(KERN_INFO "Applying VIA southbridge workaround.\n"); +} + +/* + * VIA Apollo VP3 needs ETBF on BT848/878 + */ + +static void __init quirk_viaetbf(struct pci_dev *dev) +{ + if((pci_pci_problems&PCIPCI_VIAETBF)==0) + { + printk(KERN_INFO "Limiting direct PCI/PCI transfers.\n"); + pci_pci_problems|=PCIPCI_VIAETBF; + } +} +static void __init quirk_vsfx(struct pci_dev *dev) +{ + if((pci_pci_problems&PCIPCI_VSFX)==0) + { + printk(KERN_INFO "Limiting direct PCI/PCI transfers.\n"); + pci_pci_problems|=PCIPCI_VSFX; + } +} + + +/* + * Natoma has some interesting boundary conditions with Zoran stuff + * at least + */ + +static void __init quirk_natoma(struct pci_dev *dev) +{ + if((pci_pci_problems&PCIPCI_NATOMA)==0) + { + printk(KERN_INFO "Limiting direct PCI/PCI transfers.\n"); + pci_pci_problems|=PCIPCI_NATOMA; + } +} + +/* + * S3 868 and 968 chips report region size equal to 32M, but they decode 64M. + * If it's needed, re-allocate the region. + */ + +static void __init quirk_s3_64M(struct pci_dev *dev) +{ + struct resource *r = &dev->resource[0]; + + if ((r->start & 0x3ffffff) || r->end != r->start + 0x3ffffff) { + r->start = 0; + r->end = 0x3ffffff; + } +} + +static void __init quirk_io_region(struct pci_dev *dev, unsigned region, unsigned size, int nr) +{ + region &= ~(size-1); + if (region) { + struct resource *res = dev->resource + nr; + + res->name = dev->name; + res->start = region; + res->end = region + size - 1; + res->flags = IORESOURCE_IO; + pci_claim_resource(dev, nr); + } +} + +/* + * Let's make the southbridge information explicit instead + * of having to worry about people probing the ACPI areas, + * for example.. (Yes, it happens, and if you read the wrong + * ACPI register it will put the machine to sleep with no + * way of waking it up again. Bummer). + * + * ALI M7101: Two IO regions pointed to by words at + * 0xE0 (64 bytes of ACPI registers) + * 0xE2 (32 bytes of SMB registers) + */ +static void __init quirk_ali7101_acpi(struct pci_dev *dev) +{ + u16 region; + + pci_read_config_word(dev, 0xE0, ®ion); + quirk_io_region(dev, region, 64, PCI_BRIDGE_RESOURCES); + pci_read_config_word(dev, 0xE2, ®ion); + quirk_io_region(dev, region, 32, PCI_BRIDGE_RESOURCES+1); +} + +/* + * PIIX4 ACPI: Two IO regions pointed to by longwords at + * 0x40 (64 bytes of ACPI registers) + * 0x90 (32 bytes of SMB registers) + */ +static void __init quirk_piix4_acpi(struct pci_dev *dev) +{ + u32 region; + + pci_read_config_dword(dev, 0x40, ®ion); + quirk_io_region(dev, region, 64, PCI_BRIDGE_RESOURCES); + pci_read_config_dword(dev, 0x90, ®ion); + quirk_io_region(dev, region, 32, PCI_BRIDGE_RESOURCES+1); +} + +/* + * VIA ACPI: One IO region pointed to by longword at + * 0x48 or 0x20 (256 bytes of ACPI registers) + */ +static void __init quirk_vt82c586_acpi(struct pci_dev *dev) +{ + u8 rev; + u32 region; + + pci_read_config_byte(dev, PCI_CLASS_REVISION, &rev); + if (rev & 0x10) { + pci_read_config_dword(dev, 0x48, ®ion); + region &= PCI_BASE_ADDRESS_IO_MASK; + quirk_io_region(dev, region, 256, PCI_BRIDGE_RESOURCES); + } +} + +/* + * VIA VT82C686 ACPI: Three IO region pointed to by (long)words at + * 0x48 (256 bytes of ACPI registers) + * 0x70 (128 bytes of hardware monitoring register) + * 0x90 (16 bytes of SMB registers) + */ +static void __init quirk_vt82c686_acpi(struct pci_dev *dev) +{ + u16 hm; + u32 smb; + + quirk_vt82c586_acpi(dev); + + pci_read_config_word(dev, 0x70, &hm); + hm &= PCI_BASE_ADDRESS_IO_MASK; + quirk_io_region(dev, hm, 128, PCI_BRIDGE_RESOURCES + 1); + + pci_read_config_dword(dev, 0x90, &smb); + smb &= PCI_BASE_ADDRESS_IO_MASK; + quirk_io_region(dev, smb, 16, PCI_BRIDGE_RESOURCES + 2); +} + + +#ifdef CONFIG_X86_IO_APIC +extern int nr_ioapics; + +/* + * VIA 686A/B: If an IO-APIC is active, we need to route all on-chip + * devices to the external APIC. + * + * TODO: When we have device-specific interrupt routers, + * this code will go away from quirks. + */ +static void __init quirk_via_ioapic(struct pci_dev *dev) +{ + u8 tmp; + + if (nr_ioapics < 1) + tmp = 0; /* nothing routed to external APIC */ + else + tmp = 0x1f; /* all known bits (4-0) routed to external APIC */ + + printk(KERN_INFO "PCI: %sbling Via external APIC routing\n", + tmp == 0 ? "Disa" : "Ena"); + + /* Offset 0x58: External APIC IRQ output control */ + pci_write_config_byte (dev, 0x58, tmp); +} + +#endif /* CONFIG_X86_IO_APIC */ + + +/* + * Via 686A/B: The PCI_INTERRUPT_LINE register for the on-chip + * devices, USB0/1, AC97, MC97, and ACPI, has an unusual feature: + * when written, it makes an internal connection to the PIC. + * For these devices, this register is defined to be 4 bits wide. + * Normally this is fine. However for IO-APIC motherboards, or + * non-x86 architectures (yes Via exists on PPC among other places), + * we must mask the PCI_INTERRUPT_LINE value versus 0xf to get + * interrupts delivered properly. + * + * TODO: When we have device-specific interrupt routers, + * quirk_via_irqpic will go away from quirks. + */ + +/* + * FIXME: it is questionable that quirk_via_acpi + * is needed. It shows up as an ISA bridge, and does not + * support the PCI_INTERRUPT_LINE register at all. Therefore + * it seems like setting the pci_dev's 'irq' to the + * value of the ACPI SCI interrupt is only done for convenience. + * -jgarzik + */ +static void __init quirk_via_acpi(struct pci_dev *d) +{ + /* + * VIA ACPI device: SCI IRQ line in PCI config byte 0x42 + */ + u8 irq; + pci_read_config_byte(d, 0x42, &irq); + irq &= 0xf; + if (irq && (irq != 2)) + d->irq = irq; +} + +static void __init quirk_via_irqpic(struct pci_dev *dev) +{ + u8 irq, new_irq = dev->irq & 0xf; + + pci_read_config_byte(dev, PCI_INTERRUPT_LINE, &irq); + + if (new_irq != irq) { + printk(KERN_INFO "PCI: Via IRQ fixup for %s, from %d to %d\n", + dev->slot_name, irq, new_irq); + + udelay(15); + pci_write_config_byte(dev, PCI_INTERRUPT_LINE, new_irq); + } +} + + +/* + * PIIX3 USB: We have to disable USB interrupts that are + * hardwired to PIRQD# and may be shared with an + * external device. + * + * Legacy Support Register (LEGSUP): + * bit13: USB PIRQ Enable (USBPIRQDEN), + * bit4: Trap/SMI On IRQ Enable (USBSMIEN). + * + * We mask out all r/wc bits, too. + */ +static void __init quirk_piix3_usb(struct pci_dev *dev) +{ + u16 legsup; + + pci_read_config_word(dev, 0xc0, &legsup); + legsup &= 0x50ef; + pci_write_config_word(dev, 0xc0, legsup); +} + +/* + * VIA VT82C598 has its device ID settable and many BIOSes + * set it to the ID of VT82C597 for backward compatibility. + * We need to switch it off to be able to recognize the real + * type of the chip. + */ +static void __init quirk_vt82c598_id(struct pci_dev *dev) +{ + pci_write_config_byte(dev, 0xfc, 0); + pci_read_config_word(dev, PCI_DEVICE_ID, &dev->device); +} + +/* + * CardBus controllers have a legacy base address that enables them + * to respond as i82365 pcmcia controllers. We don't want them to + * do this even if the Linux CardBus driver is not loaded, because + * the Linux i82365 driver does not (and should not) handle CardBus. + */ +static void __init quirk_cardbus_legacy(struct pci_dev *dev) +{ + if ((PCI_CLASS_BRIDGE_CARDBUS << 8) ^ dev->class) + return; + pci_write_config_dword(dev, PCI_CB_LEGACY_MODE_BASE, 0); +} + +/* + * The AMD io apic can hang the box when an apic irq is masked. + * We check all revs >= B0 (yet not in the pre production!) as the bug + * is currently marked NoFix + * + * We have multiple reports of hangs with this chipset that went away with + * noapic specified. For the moment we assume its the errata. We may be wrong + * of course. However the advice is demonstrably good even if so.. + */ + +static void __init quirk_amd_ioapic(struct pci_dev *dev) +{ + u8 rev; + + pci_read_config_byte(dev, PCI_REVISION_ID, &rev); + if(rev >= 0x02) + { + printk(KERN_WARNING "I/O APIC: AMD Errata #22 may be present. In the event of instability try\n"); + printk(KERN_WARNING " : booting with the \"noapic\" option.\n"); + } +} + +/* + * Following the PCI ordering rules is optional on the AMD762. I'm not + * sure what the designers were smoking but let's not inhale... + * + * To be fair to AMD, it follows the spec by default, its BIOS people + * who turn it off! + */ + +static void __init quirk_amd_ordering(struct pci_dev *dev) +{ + u32 pcic; + + pci_read_config_dword(dev, 0x42, &pcic); + if((pcic&2)==0) + { + pcic |= 2; + printk(KERN_WARNING "BIOS disabled PCI ordering compliance, so we enabled it again.\n"); + pci_write_config_dword(dev, 0x42, pcic); + } +} + +/* + * The main table of quirks. + */ + +static struct pci_fixup pci_fixups[] __initdata = { + { PCI_FIXUP_FINAL, PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82441, quirk_passive_release }, + { PCI_FIXUP_FINAL, PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82441, quirk_passive_release }, + /* + * Its not totally clear which chipsets are the problematic ones + * We know 82C586 and 82C596 variants are affected. + */ + { PCI_FIXUP_FINAL, PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C586_0, quirk_isa_dma_hangs }, + { PCI_FIXUP_FINAL, PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C596, quirk_isa_dma_hangs }, + { PCI_FIXUP_FINAL, PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82371SB_0, quirk_isa_dma_hangs }, + { PCI_FIXUP_HEADER, PCI_VENDOR_ID_S3, PCI_DEVICE_ID_S3_868, quirk_s3_64M }, + { PCI_FIXUP_HEADER, PCI_VENDOR_ID_S3, PCI_DEVICE_ID_S3_968, quirk_s3_64M }, + { PCI_FIXUP_FINAL, PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82437, quirk_triton }, + { PCI_FIXUP_FINAL, PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82437VX, quirk_triton }, + { PCI_FIXUP_FINAL, PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82439, quirk_triton }, + { PCI_FIXUP_FINAL, PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82439TX, quirk_triton }, + { PCI_FIXUP_FINAL, PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82441, quirk_natoma }, + { PCI_FIXUP_FINAL, PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82443LX_0, quirk_natoma }, + { PCI_FIXUP_FINAL, PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82443LX_1, quirk_natoma }, + { PCI_FIXUP_FINAL, PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82443BX_0, quirk_natoma }, + { PCI_FIXUP_FINAL, PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82443BX_1, quirk_natoma }, + { PCI_FIXUP_FINAL, PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82443BX_2, quirk_natoma }, + { PCI_FIXUP_FINAL, PCI_VENDOR_ID_SI, PCI_DEVICE_ID_SI_5597, quirk_nopcipci }, + { PCI_FIXUP_FINAL, PCI_VENDOR_ID_SI, PCI_DEVICE_ID_SI_496, quirk_nopcipci }, + { PCI_FIXUP_FINAL, PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8363_0, quirk_vialatency }, + { PCI_FIXUP_FINAL, PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8371_1, quirk_vialatency }, + { PCI_FIXUP_FINAL, PCI_VENDOR_ID_VIA, 0x3112 /* Not out yet ? */, quirk_vialatency }, + { PCI_FIXUP_FINAL, PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C576, quirk_vsfx }, + { PCI_FIXUP_FINAL, PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C597_0, quirk_viaetbf }, + { PCI_FIXUP_HEADER, PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C597_0, quirk_vt82c598_id }, + { PCI_FIXUP_HEADER, PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C586_3, quirk_vt82c586_acpi }, + { PCI_FIXUP_HEADER, PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C686_4, quirk_vt82c686_acpi }, + { PCI_FIXUP_HEADER, PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82371AB_3, quirk_piix4_acpi }, + { PCI_FIXUP_HEADER, PCI_VENDOR_ID_AL, PCI_DEVICE_ID_AL_M7101, quirk_ali7101_acpi }, + { PCI_FIXUP_HEADER, PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82371SB_2, quirk_piix3_usb }, + { PCI_FIXUP_HEADER, PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82371AB_2, quirk_piix3_usb }, + { PCI_FIXUP_FINAL, PCI_ANY_ID, PCI_ANY_ID, quirk_cardbus_legacy }, + +#ifdef CONFIG_X86_IO_APIC + { PCI_FIXUP_FINAL, PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C686, quirk_via_ioapic }, +#endif + { PCI_FIXUP_HEADER, PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C586_3, quirk_via_acpi }, + { PCI_FIXUP_HEADER, PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C686_4, quirk_via_acpi }, + { PCI_FIXUP_FINAL, PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C586_2, quirk_via_irqpic }, + { PCI_FIXUP_FINAL, PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C686_5, quirk_via_irqpic }, + { PCI_FIXUP_FINAL, PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C686_6, quirk_via_irqpic }, + + { PCI_FIXUP_FINAL, PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_VIPER_7410, quirk_amd_ioapic }, + { PCI_FIXUP_FINAL, PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_FE_GATE_700C, quirk_amd_ordering }, + + { 0 } +}; + + +static void pci_do_fixups(struct pci_dev *dev, int pass, struct pci_fixup *f) +{ + while (f->pass) { + if (f->pass == pass && + (f->vendor == dev->vendor || f->vendor == (u16) PCI_ANY_ID) && + (f->device == dev->device || f->device == (u16) PCI_ANY_ID)) { +#ifdef DEBUG + printk(KERN_INFO "PCI: Calling quirk %p for %s\n", f->hook, dev->slot_name); +#endif + f->hook(dev); + } + f++; + } +} + +void pci_fixup_device(int pass, struct pci_dev *dev) +{ + pci_do_fixups(dev, pass, pcibios_fixups); + pci_do_fixups(dev, pass, pci_fixups); +} diff --git a/xen-2.4.16/drivers/pci/setup-bus.c b/xen-2.4.16/drivers/pci/setup-bus.c new file mode 100644 index 0000000000..6a26a4ae8f --- /dev/null +++ b/xen-2.4.16/drivers/pci/setup-bus.c @@ -0,0 +1,249 @@ +/* + * drivers/pci/setup-bus.c + * + * Extruded from code written by + * Dave Rusling (david.rusling@reo.mts.dec.com) + * David Mosberger (davidm@cs.arizona.edu) + * David Miller (davem@redhat.com) + * + * Support routines for initializing a PCI subsystem. + */ + +/* + * Nov 2000, Ivan Kokshaysky + * PCI-PCI bridges cleanup, sorted resource allocation + */ + +#include +#include +#include +#include +#include +#include +#include + + +#define DEBUG_CONFIG 0 +#if DEBUG_CONFIG +# define DBGC(args) printk args +#else +# define DBGC(args) +#endif + +#define ROUND_UP(x, a) (((x) + (a) - 1) & ~((a) - 1)) + +static int __init +pbus_assign_resources_sorted(struct pci_bus *bus, + struct pbus_set_ranges_data *ranges) +{ + struct list_head *ln; + struct resource *res; + struct resource_list head_io, head_mem, *list, *tmp; + unsigned long io_reserved = 0, mem_reserved = 0; + int idx, found_vga = 0; + + head_io.next = head_mem.next = NULL; + for (ln=bus->devices.next; ln != &bus->devices; ln=ln->next) { + struct pci_dev *dev = pci_dev_b(ln); + u16 class = dev->class >> 8; + u16 cmd; + + /* First, disable the device to avoid side + effects of possibly overlapping I/O and + memory ranges. + Leave VGA enabled - for obvious reason. :-) + Same with all sorts of bridges - they may + have VGA behind them. */ + if (class == PCI_CLASS_DISPLAY_VGA + || class == PCI_CLASS_NOT_DEFINED_VGA) + found_vga = 1; + else if (class >> 8 != PCI_BASE_CLASS_BRIDGE) { + pci_read_config_word(dev, PCI_COMMAND, &cmd); + cmd &= ~(PCI_COMMAND_IO | PCI_COMMAND_MEMORY + | PCI_COMMAND_MASTER); + pci_write_config_word(dev, PCI_COMMAND, cmd); + } + + /* Reserve some resources for CardBus. + Are these values reasonable? */ + if (class == PCI_CLASS_BRIDGE_CARDBUS) { + io_reserved += 8*1024; + mem_reserved += 32*1024*1024; + continue; + } + + pdev_sort_resources(dev, &head_io, IORESOURCE_IO); + pdev_sort_resources(dev, &head_mem, IORESOURCE_MEM); + } + + for (list = head_io.next; list;) { + res = list->res; + idx = res - &list->dev->resource[0]; + if (pci_assign_resource(list->dev, idx) == 0 + && ranges->io_end < res->end) + ranges->io_end = res->end; + tmp = list; + list = list->next; + kfree(tmp); + } + for (list = head_mem.next; list;) { + res = list->res; + idx = res - &list->dev->resource[0]; + if (pci_assign_resource(list->dev, idx) == 0 + && ranges->mem_end < res->end) + ranges->mem_end = res->end; + tmp = list; + list = list->next; + kfree(tmp); + } + + ranges->io_end += io_reserved; + ranges->mem_end += mem_reserved; + + /* PCI-to-PCI Bridge Architecture Specification rev. 1.1 (1998) + requires that if there is no I/O ports or memory behind the + bridge, corresponding range must be turned off by writing base + value greater than limit to the bridge's base/limit registers. */ +#if 1 + /* But assuming that some hardware designed before 1998 might + not support this (very unlikely - at least all DEC bridges + are ok and I believe that was standard de-facto. -ink), we + must allow for at least one unit. */ + if (ranges->io_end == ranges->io_start) + ranges->io_end += 1; + if (ranges->mem_end == ranges->mem_start) + ranges->mem_end += 1; +#endif + ranges->io_end = ROUND_UP(ranges->io_end, 4*1024); + ranges->mem_end = ROUND_UP(ranges->mem_end, 1024*1024); + + return found_vga; +} + +/* Initialize bridges with base/limit values we have collected */ +static void __init +pci_setup_bridge(struct pci_bus *bus) +{ + struct pbus_set_ranges_data ranges; + struct pci_dev *bridge = bus->self; + u32 l; + + if (!bridge || (bridge->class >> 8) != PCI_CLASS_BRIDGE_PCI) + return; + ranges.io_start = bus->resource[0]->start; + ranges.io_end = bus->resource[0]->end; + ranges.mem_start = bus->resource[1]->start; + ranges.mem_end = bus->resource[1]->end; + pcibios_fixup_pbus_ranges(bus, &ranges); + + DBGC((KERN_ERR "PCI: Bus %d, bridge: %s\n", bus->number, bridge->name)); + DBGC((KERN_ERR " IO window: %04lx-%04lx\n", ranges.io_start, ranges.io_end)); + DBGC((KERN_ERR " MEM window: %08lx-%08lx\n", ranges.mem_start, ranges.mem_end)); + + /* Set up the top and bottom of the PCI I/O segment for this bus. */ + pci_read_config_dword(bridge, PCI_IO_BASE, &l); + l &= 0xffff0000; + l |= (ranges.io_start >> 8) & 0x00f0; + l |= ranges.io_end & 0xf000; + pci_write_config_dword(bridge, PCI_IO_BASE, l); + + /* Clear upper 16 bits of I/O base/limit. */ + pci_write_config_dword(bridge, PCI_IO_BASE_UPPER16, 0); + + /* Clear out the upper 32 bits of PREF base/limit. */ + pci_write_config_dword(bridge, PCI_PREF_BASE_UPPER32, 0); + pci_write_config_dword(bridge, PCI_PREF_LIMIT_UPPER32, 0); + + /* Set up the top and bottom of the PCI Memory segment + for this bus. */ + l = (ranges.mem_start >> 16) & 0xfff0; + l |= ranges.mem_end & 0xfff00000; + pci_write_config_dword(bridge, PCI_MEMORY_BASE, l); + + /* Set up PREF base/limit. */ + l = (bus->resource[2]->start >> 16) & 0xfff0; + l |= bus->resource[2]->end & 0xfff00000; + pci_write_config_dword(bridge, PCI_PREF_MEMORY_BASE, l); + + /* Check if we have VGA behind the bridge. + Enable ISA in either case. */ + l = (bus->resource[0]->flags & IORESOURCE_BUS_HAS_VGA) ? 0x0c : 0x04; + pci_write_config_word(bridge, PCI_BRIDGE_CONTROL, l); +} + +static void __init +pbus_assign_resources(struct pci_bus *bus, struct pbus_set_ranges_data *ranges) +{ + struct list_head *ln; + int found_vga = pbus_assign_resources_sorted(bus, ranges); + + if (!ranges->found_vga && found_vga) { + struct pci_bus *b; + + ranges->found_vga = 1; + /* Propogate presence of the VGA to upstream bridges */ + for (b = bus; b->parent; b = b->parent) { +#if 0 + /* ? Do we actually need to enable PF memory? */ + b->resource[2]->start = 0; +#endif + b->resource[0]->flags |= IORESOURCE_BUS_HAS_VGA; + } + } + for (ln=bus->children.next; ln != &bus->children; ln=ln->next) { + struct pci_bus *b = pci_bus_b(ln); + + b->resource[0]->start = ranges->io_start = ranges->io_end; + b->resource[1]->start = ranges->mem_start = ranges->mem_end; + + pbus_assign_resources(b, ranges); + + b->resource[0]->end = ranges->io_end - 1; + b->resource[1]->end = ranges->mem_end - 1; + + pci_setup_bridge(b); + } +} + +void __init +pci_assign_unassigned_resources(void) +{ + struct pbus_set_ranges_data ranges; + struct list_head *ln; + struct pci_dev *dev; + + for(ln=pci_root_buses.next; ln != &pci_root_buses; ln=ln->next) { + struct pci_bus *b = pci_bus_b(ln); + + ranges.io_start = b->resource[0]->start + PCIBIOS_MIN_IO; + ranges.mem_start = b->resource[1]->start + PCIBIOS_MIN_MEM; + ranges.io_end = ranges.io_start; + ranges.mem_end = ranges.mem_start; + ranges.found_vga = 0; + pbus_assign_resources(b, &ranges); + } + pci_for_each_dev(dev) { + pdev_enable_device(dev); + } +} + +/* Check whether the bridge supports I/O forwarding. + If not, its I/O base/limit register must be + read-only and read as 0. */ +unsigned long __init +pci_bridge_check_io(struct pci_dev *bridge) +{ + u16 io; + + pci_read_config_word(bridge, PCI_IO_BASE, &io); + if (!io) { + pci_write_config_word(bridge, PCI_IO_BASE, 0xf0f0); + pci_read_config_word(bridge, PCI_IO_BASE, &io); + pci_write_config_word(bridge, PCI_IO_BASE, 0x0); + } + if (io) + return IORESOURCE_IO; + printk(KERN_WARNING "PCI: bridge %s does not support I/O forwarding!\n", + bridge->name); + return 0; +} diff --git a/xen-2.4.16/drivers/pci/setup-irq.c b/xen-2.4.16/drivers/pci/setup-irq.c new file mode 100644 index 0000000000..4c65b2e98d --- /dev/null +++ b/xen-2.4.16/drivers/pci/setup-irq.c @@ -0,0 +1,71 @@ +/* + * drivers/pci/setup-irq.c + * + * Extruded from code written by + * Dave Rusling (david.rusling@reo.mts.dec.com) + * David Mosberger (davidm@cs.arizona.edu) + * David Miller (davem@redhat.com) + * + * Support routines for initializing a PCI subsystem. + */ + + +#include +#include +#include +#include +#include +#include + + +#define DEBUG_CONFIG 0 +#if DEBUG_CONFIG +# define DBGC(args) printk args +#else +# define DBGC(args) +#endif + + +static void __init +pdev_fixup_irq(struct pci_dev *dev, + u8 (*swizzle)(struct pci_dev *, u8 *), + int (*map_irq)(struct pci_dev *, u8, u8)) +{ + u8 pin, slot; + int irq; + + /* If this device is not on the primary bus, we need to figure out + which interrupt pin it will come in on. We know which slot it + will come in on 'cos that slot is where the bridge is. Each + time the interrupt line passes through a PCI-PCI bridge we must + apply the swizzle function. */ + + pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin); + /* Cope with 0 and illegal. */ + if (pin == 0 || pin > 4) + pin = 1; + + /* Follow the chain of bridges, swizzling as we go. */ + slot = (*swizzle)(dev, &pin); + + irq = (*map_irq)(dev, slot, pin); + if (irq == -1) + irq = 0; + dev->irq = irq; + + DBGC((KERN_ERR "PCI fixup irq: (%s) got %d\n", dev->name, dev->irq)); + + /* Always tell the device, so the driver knows what is + the real IRQ to use; the device does not use it. */ + pcibios_update_irq(dev, irq); +} + +void __init +pci_fixup_irqs(u8 (*swizzle)(struct pci_dev *, u8 *), + int (*map_irq)(struct pci_dev *, u8, u8)) +{ + struct pci_dev *dev; + pci_for_each_dev(dev) { + pdev_fixup_irq(dev, swizzle, map_irq); + } +} diff --git a/xen-2.4.16/drivers/pci/setup-res.c b/xen-2.4.16/drivers/pci/setup-res.c new file mode 100644 index 0000000000..57932fbf11 --- /dev/null +++ b/xen-2.4.16/drivers/pci/setup-res.c @@ -0,0 +1,234 @@ +/* + * drivers/pci/setup-res.c + * + * Extruded from code written by + * Dave Rusling (david.rusling@reo.mts.dec.com) + * David Mosberger (davidm@cs.arizona.edu) + * David Miller (davem@redhat.com) + * + * Support routines for initializing a PCI subsystem. + */ + +/* fixed for multiple pci buses, 1999 Andrea Arcangeli */ + +/* + * Nov 2000, Ivan Kokshaysky + * Resource sorting + */ + +#include +//#include +#include +#include +#include +#include +#include + + +#define DEBUG_CONFIG 0 +#if DEBUG_CONFIG +# define DBGC(args) printk args +#else +# define DBGC(args) +#endif + + +int __init +pci_claim_resource(struct pci_dev *dev, int resource) +{ + struct resource *res = &dev->resource[resource]; + struct resource *root = pci_find_parent_resource(dev, res); + int err; + + err = -EINVAL; + if (root != NULL) { + err = request_resource(root, res); + if (err) { + printk(KERN_ERR "PCI: Address space collision on " + "region %d of device %s [%lx:%lx]\n", + resource, dev->name, res->start, res->end); + } + } else { + printk(KERN_ERR "PCI: No parent found for region %d " + "of device %s\n", resource, dev->name); + } + + return err; +} + +/* + * Given the PCI bus a device resides on, try to + * find an acceptable resource allocation for a + * specific device resource.. + */ +static int pci_assign_bus_resource(const struct pci_bus *bus, + struct pci_dev *dev, + struct resource *res, + unsigned long size, + unsigned long min, + unsigned int type_mask, + int resno) +{ + int i; + + type_mask |= IORESOURCE_IO | IORESOURCE_MEM; + for (i = 0 ; i < 4; i++) { + struct resource *r = bus->resource[i]; + if (!r) + continue; + + /* type_mask must match */ + if ((res->flags ^ r->flags) & type_mask) + continue; + + /* We cannot allocate a non-prefetching resource from a pre-fetching area */ + if ((r->flags & IORESOURCE_PREFETCH) && !(res->flags & IORESOURCE_PREFETCH)) + continue; + + /* Ok, try it out.. */ + if (allocate_resource(r, res, size, min, -1, size, pcibios_align_resource, dev) < 0) + continue; + + /* Update PCI config space. */ + pcibios_update_resource(dev, r, res, resno); + return 0; + } + return -EBUSY; +} + +int +pci_assign_resource(struct pci_dev *dev, int i) +{ + const struct pci_bus *bus = dev->bus; + struct resource *res = dev->resource + i; + unsigned long size, min; + + size = res->end - res->start + 1; + min = (res->flags & IORESOURCE_IO) ? PCIBIOS_MIN_IO : PCIBIOS_MIN_MEM; + + /* First, try exact prefetching match.. */ + if (pci_assign_bus_resource(bus, dev, res, size, min, IORESOURCE_PREFETCH, i) < 0) { + /* + * That failed. + * + * But a prefetching area can handle a non-prefetching + * window (it will just not perform as well). + */ + if (!(res->flags & IORESOURCE_PREFETCH) || pci_assign_bus_resource(bus, dev, res, size, min, 0, i) < 0) { + printk(KERN_ERR "PCI: Failed to allocate resource %d(%lx-%lx) for %s\n", + i, res->start, res->end, dev->slot_name); + return -EBUSY; + } + } + + DBGC((KERN_ERR " got res[%lx:%lx] for resource %d of %s\n", res->start, + res->end, i, dev->name)); + + return 0; +} + +/* Sort resources of a given type by alignment */ +void __init +pdev_sort_resources(struct pci_dev *dev, + struct resource_list *head, u32 type_mask) +{ + int i; + + for (i = 0; i < PCI_NUM_RESOURCES; i++) { + struct resource *r; + struct resource_list *list, *tmp; + unsigned long r_size; + + /* PCI-PCI bridges may have I/O ports or + memory on the primary bus */ + if (dev->class >> 8 == PCI_CLASS_BRIDGE_PCI && + i >= PCI_BRIDGE_RESOURCES) + continue; + + r = &dev->resource[i]; + r_size = r->end - r->start; + + if (!(r->flags & type_mask) || r->parent) + continue; + if (!r_size) { + printk(KERN_WARNING "PCI: Ignore bogus resource %d " + "[%lx:%lx] of %s\n", + i, r->start, r->end, dev->name); + continue; + } + for (list = head; ; list = list->next) { + unsigned long size = 0; + struct resource_list *ln = list->next; + + if (ln) + size = ln->res->end - ln->res->start; + if (r_size > size) { + tmp = kmalloc(sizeof(*tmp), GFP_KERNEL); + if (!tmp) { + printk(KERN_ERR "pdev_sort_resources(): kmalloc() failed!\n"); + continue; + } + tmp->next = ln; + tmp->res = r; + tmp->dev = dev; + list->next = tmp; + break; + } + } + } +} + +void __init +pdev_enable_device(struct pci_dev *dev) +{ + u32 reg; + u16 cmd; + int i; + + DBGC((KERN_ERR "PCI enable device: (%s)\n", dev->name)); + + pci_read_config_word(dev, PCI_COMMAND, &cmd); + + for (i = 0; i < PCI_NUM_RESOURCES; i++) { + struct resource *res = &dev->resource[i]; + + if (res->flags & IORESOURCE_IO) + cmd |= PCI_COMMAND_IO; + else if (res->flags & IORESOURCE_MEM) + cmd |= PCI_COMMAND_MEMORY; + } + + /* Special case, disable the ROM. Several devices act funny + (ie. do not respond to memory space writes) when it is left + enabled. A good example are QlogicISP adapters. */ + + if (dev->rom_base_reg) { + pci_read_config_dword(dev, dev->rom_base_reg, ®); + reg &= ~PCI_ROM_ADDRESS_ENABLE; + pci_write_config_dword(dev, dev->rom_base_reg, reg); + dev->resource[PCI_ROM_RESOURCE].flags &= ~PCI_ROM_ADDRESS_ENABLE; + } + + /* All of these (may) have I/O scattered all around and may not + use I/O base address registers at all. So we just have to + always enable IO to these devices. */ + if ((dev->class >> 8) == PCI_CLASS_NOT_DEFINED + || (dev->class >> 8) == PCI_CLASS_NOT_DEFINED_VGA + || (dev->class >> 8) == PCI_CLASS_STORAGE_IDE + || (dev->class >> 16) == PCI_BASE_CLASS_DISPLAY) { + cmd |= PCI_COMMAND_IO; + } + + /* ??? Always turn on bus mastering. If the device doesn't support + it, the bit will go into the bucket. */ + cmd |= PCI_COMMAND_MASTER; + + /* Set the cache line and default latency (32). */ + pci_write_config_word(dev, PCI_CACHE_LINE_SIZE, + (32 << 8) | (L1_CACHE_BYTES / sizeof(u32))); + + /* Enable the appropriate bits in the PCI command register. */ + pci_write_config_word(dev, PCI_COMMAND, cmd); + + DBGC((KERN_ERR " cmd reg 0x%x\n", cmd)); +} diff --git a/xen-2.4.16/drivers/pci/syscall.c b/xen-2.4.16/drivers/pci/syscall.c new file mode 100644 index 0000000000..c935efd9a9 --- /dev/null +++ b/xen-2.4.16/drivers/pci/syscall.c @@ -0,0 +1,144 @@ +/* + * pci_syscall.c + * + * For architectures where we want to allow direct access + * to the PCI config stuff - it would probably be preferable + * on PCs too, but there people just do it by hand with the + * magic northbridge registers.. + */ + +#include +#include +#include +#include +#include + + +asmlinkage long +sys_pciconfig_read(unsigned long bus, unsigned long dfn, + unsigned long off, unsigned long len, void *buf) +{ + struct pci_dev *dev; + u8 byte; + u16 word; + u32 dword; + long err, cfg_ret; + + err = -EPERM; + if (!capable(CAP_SYS_ADMIN)) + goto error; + + err = -ENODEV; + dev = pci_find_slot(bus, dfn); + if (!dev) + goto error; + + lock_kernel(); + switch (len) { + case 1: + cfg_ret = pci_read_config_byte(dev, off, &byte); + break; + case 2: + cfg_ret = pci_read_config_word(dev, off, &word); + break; + case 4: + cfg_ret = pci_read_config_dword(dev, off, &dword); + break; + default: + err = -EINVAL; + unlock_kernel(); + goto error; + }; + unlock_kernel(); + + err = -EIO; + if (cfg_ret != PCIBIOS_SUCCESSFUL) + goto error; + + switch (len) { + case 1: + err = put_user(byte, (unsigned char *)buf); + break; + case 2: + err = put_user(word, (unsigned short *)buf); + break; + case 4: + err = put_user(dword, (unsigned int *)buf); + break; + }; + return err; + +error: + /* ??? XFree86 doesn't even check the return value. They + just look for 0xffffffff in the output, since that's what + they get instead of a machine check on x86. */ + switch (len) { + case 1: + put_user(-1, (unsigned char *)buf); + break; + case 2: + put_user(-1, (unsigned short *)buf); + break; + case 4: + put_user(-1, (unsigned int *)buf); + break; + }; + return err; +} + +asmlinkage long +sys_pciconfig_write(unsigned long bus, unsigned long dfn, + unsigned long off, unsigned long len, void *buf) +{ + struct pci_dev *dev; + u8 byte; + u16 word; + u32 dword; + int err = 0; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + if (!pcibios_present()) + return -ENOSYS; + + dev = pci_find_slot(bus, dfn); + if (!dev) + return -ENODEV; + + lock_kernel(); + switch(len) { + case 1: + err = get_user(byte, (u8 *)buf); + if (err) + break; + err = pci_write_config_byte(dev, off, byte); + if (err != PCIBIOS_SUCCESSFUL) + err = -EIO; + break; + + case 2: + err = get_user(word, (u16 *)buf); + if (err) + break; + err = pci_write_config_word(dev, off, word); + if (err != PCIBIOS_SUCCESSFUL) + err = -EIO; + break; + + case 4: + err = get_user(dword, (u32 *)buf); + if (err) + break; + err = pci_write_config_dword(dev, off, dword); + if (err != PCIBIOS_SUCCESSFUL) + err = -EIO; + break; + + default: + err = -EINVAL; + break; + }; + unlock_kernel(); + + return err; +} diff --git a/xen-2.4.16/drivers/scsi/Makefile b/xen-2.4.16/drivers/scsi/Makefile new file mode 100644 index 0000000000..574b7d2d79 --- /dev/null +++ b/xen-2.4.16/drivers/scsi/Makefile @@ -0,0 +1,8 @@ + +include $(BASEDIR)/Rules.mk + +default: $(OBJS) + $(LD) -r -o driver.o $(OBJS) + +clean: + rm -f *.o *~ core diff --git a/xen-2.4.16/drivers/scsi/constants.h b/xen-2.4.16/drivers/scsi/constants.h new file mode 100644 index 0000000000..e10527ea5e --- /dev/null +++ b/xen-2.4.16/drivers/scsi/constants.h @@ -0,0 +1,6 @@ +#ifndef _CONSTANTS_H +#define _CONSTANTS_H +extern int print_msg(unsigned char *); +extern void print_status(int); +extern void print_Scsi_Cmnd (Scsi_Cmnd *); +#endif /* def _CONSTANTS_H */ diff --git a/xen-2.4.16/drivers/scsi/hosts.h b/xen-2.4.16/drivers/scsi/hosts.h new file mode 100644 index 0000000000..d222fc61dc --- /dev/null +++ b/xen-2.4.16/drivers/scsi/hosts.h @@ -0,0 +1,585 @@ +/* + * hosts.h Copyright (C) 1992 Drew Eckhardt + * Copyright (C) 1993, 1994, 1995, 1998, 1999 Eric Youngdale + * + * mid to low-level SCSI driver interface header + * Initial versions: Drew Eckhardt + * Subsequent revisions: Eric Youngdale + * + * + * + * Modified by Eric Youngdale eric@andante.org to + * add scatter-gather, multiple outstanding request, and other + * enhancements. + * + * Further modified by Eric Youngdale to support multiple host adapters + * of the same type. + * + * Jiffies wrap fixes (host->resetting), 3 Dec 1998 Andrea Arcangeli + */ + +#ifndef _HOSTS_H +#define _HOSTS_H + +/* + $Header: /vger/u4/cvs/linux/drivers/scsi/hosts.h,v 1.6 1997/01/19 23:07:13 davem Exp $ +*/ + +#include +/*#include */ +#include + +/* It is senseless to set SG_ALL any higher than this - the performance + * does not get any better, and it wastes memory + */ +#define SG_NONE 0 +#define SG_ALL 0xff + +#define DISABLE_CLUSTERING 0 +#define ENABLE_CLUSTERING 1 + +/* The various choices mean: + * NONE: Self evident. Host adapter is not capable of scatter-gather. + * ALL: Means that the host adapter module can do scatter-gather, + * and that there is no limit to the size of the table to which + * we scatter/gather data. + * Anything else: Indicates the maximum number of chains that can be + * used in one scatter-gather request. + */ + +/* + * The Scsi_Host_Template type has all that is needed to interface with a SCSI + * host in a device independent matter. There is one entry for each different + * type of host adapter that is supported on the system. + */ + +typedef struct scsi_disk Disk; + +typedef struct SHT +{ + + /* Used with loadable modules so we can construct a linked list. */ + struct SHT * next; + + /* Used with loadable modules so that we know when it is safe to unload */ + struct module * module; + + /* The pointer to the /proc/scsi directory entry */ + struct proc_dir_entry *proc_dir; + + /* proc-fs info function. + * Can be used to export driver statistics and other infos to the world + * outside the kernel ie. userspace and it also provides an interface + * to feed the driver with information. Check eata_dma_proc.c for reference + */ + int (*proc_info)(char *, char **, off_t, int, int, int); + + /* + * The name pointer is a pointer to the name of the SCSI + * device detected. + */ + const char *name; + + /* + * The detect function shall return non zero on detection, + * indicating the number of host adapters of this particular + * type were found. It should also + * initialize all data necessary for this particular + * SCSI driver. It is passed the host number, so this host + * knows where the first entry is in the scsi_hosts[] array. + * + * Note that the detect routine MUST not call any of the mid level + * functions to queue commands because things are not guaranteed + * to be set up yet. The detect routine can send commands to + * the host adapter as long as the program control will not be + * passed to scsi.c in the processing of the command. Note + * especially that scsi_malloc/scsi_free must not be called. + */ + int (* detect)(struct SHT *); + + int (*revoke)(Scsi_Device *); + + /* Used with loadable modules to unload the host structures. Note: + * there is a default action built into the modules code which may + * be sufficient for most host adapters. Thus you may not have to supply + * this at all. + */ + int (*release)(struct Scsi_Host *); + + /* + * The info function will return whatever useful + * information the developer sees fit. If not provided, then + * the name field will be used instead. + */ + const char *(* info)(struct Scsi_Host *); + + /* + * ioctl interface + */ + int (*ioctl)(Scsi_Device *dev, int cmd, void *arg); + + /* + * The command function takes a target, a command (this is a SCSI + * command formatted as per the SCSI spec, nothing strange), a + * data buffer pointer, and data buffer length pointer. The return + * is a status int, bit fielded as follows : + * Byte What + * 0 SCSI status code + * 1 SCSI 1 byte message + * 2 host error return. + * 3 mid level error return + */ + int (* command)(Scsi_Cmnd *); + + /* + * The QueueCommand function works in a similar manner + * to the command function. It takes an additional parameter, + * void (* done)(int host, int code) which is passed the host + * # and exit result when the command is complete. + * Host number is the POSITION IN THE hosts array of THIS + * host adapter. + * + * The done() function must only be called after QueueCommand() + * has returned. + */ + int (* queuecommand)(Scsi_Cmnd *, void (*done)(Scsi_Cmnd *)); + + /* + * This is an error handling strategy routine. You don't need to + * define one of these if you don't want to - there is a default + * routine that is present that should work in most cases. For those + * driver authors that have the inclination and ability to write their + * own strategy routine, this is where it is specified. Note - the + * strategy routine is *ALWAYS* run in the context of the kernel eh + * thread. Thus you are guaranteed to *NOT* be in an interrupt handler + * when you execute this, and you are also guaranteed to *NOT* have any + * other commands being queued while you are in the strategy routine. + * When you return from this function, operations return to normal. + * + * See scsi_error.c scsi_unjam_host for additional comments about what + * this function should and should not be attempting to do. + */ + int (*eh_strategy_handler)(struct Scsi_Host *); + int (*eh_abort_handler)(Scsi_Cmnd *); + int (*eh_device_reset_handler)(Scsi_Cmnd *); + int (*eh_bus_reset_handler)(Scsi_Cmnd *); + int (*eh_host_reset_handler)(Scsi_Cmnd *); + + /* + * Since the mid level driver handles time outs, etc, we want to + * be able to abort the current command. Abort returns 0 if the + * abortion was successful. The field SCpnt->abort reason + * can be filled in with the appropriate reason why we wanted + * the abort in the first place, and this will be used + * in the mid-level code instead of the host_byte(). + * If non-zero, the code passed to it + * will be used as the return code, otherwise + * DID_ABORT should be returned. + * + * Note that the scsi driver should "clean up" after itself, + * resetting the bus, etc. if necessary. + * + * NOTE - this interface is depreciated, and will go away. Use + * the eh_ routines instead. + */ + int (* abort)(Scsi_Cmnd *); + + /* + * The reset function will reset the SCSI bus. Any executing + * commands should fail with a DID_RESET in the host byte. + * The Scsi_Cmnd is passed so that the reset routine can figure + * out which host adapter should be reset, and also which command + * within the command block was responsible for the reset in + * the first place. Some hosts do not implement a reset function, + * and these hosts must call scsi_request_sense(SCpnt) to keep + * the command alive. + * + * NOTE - this interface is depreciated, and will go away. Use + * the eh_ routines instead. + */ + int (* reset)(Scsi_Cmnd *, unsigned int); + + /* + * This function is used to select synchronous communications, + * which will result in a higher data throughput. Not implemented + * yet. + */ + int (* slave_attach)(int, int); + + /* + * This function determines the bios parameters for a given + * harddisk. These tend to be numbers that are made up by + * the host adapter. Parameters: + * size, device number, list (heads, sectors, cylinders) + */ + int (* bios_param)(Disk *, kdev_t, int []); + + + /* + * Used to set the queue depth for a specific device. + */ + void (*select_queue_depths)(struct Scsi_Host *, Scsi_Device *); + + /* + * This determines if we will use a non-interrupt driven + * or an interrupt driven scheme, It is set to the maximum number + * of simultaneous commands a given host adapter will accept. + */ + int can_queue; + + /* + * In many instances, especially where disconnect / reconnect are + * supported, our host also has an ID on the SCSI bus. If this is + * the case, then it must be reserved. Please set this_id to -1 if + * your setup is in single initiator mode, and the host lacks an + * ID. + */ + int this_id; + + /* + * This determines the degree to which the host adapter is capable + * of scatter-gather. + */ + short unsigned int sg_tablesize; + + /* + * if the host adapter has limitations beside segment count + */ + short unsigned int max_sectors; + + /* + * True if this host adapter can make good use of linked commands. + * This will allow more than one command to be queued to a given + * unit on a given host. Set this to the maximum number of command + * blocks to be provided for each device. Set this to 1 for one + * command block per lun, 2 for two, etc. Do not set this to 0. + * You should make sure that the host adapter will do the right thing + * before you try setting this above 1. + */ + short cmd_per_lun; + + /* + * present contains counter indicating how many boards of this + * type were found when we did the scan. + */ + unsigned char present; + + /* + * true if this host adapter uses unchecked DMA onto an ISA bus. + */ + unsigned unchecked_isa_dma:1; + + /* + * true if this host adapter can make good use of clustering. + * I originally thought that if the tablesize was large that it + * was a waste of CPU cycles to prepare a cluster list, but + * it works out that the Buslogic is faster if you use a smaller + * number of segments (i.e. use clustering). I guess it is + * inefficient. + */ + unsigned use_clustering:1; + + /* + * True if this driver uses the new error handling code. This flag is + * really only temporary until all of the other drivers get converted + * to use the new error handling code. + */ + unsigned use_new_eh_code:1; + + /* + * True for emulated SCSI host adapters (e.g. ATAPI) + */ + unsigned emulated:1; + + /* + * Name of proc directory + */ + char *proc_name; + +} Scsi_Host_Template; + +/* + * The scsi_hosts array is the array containing the data for all + * possible scsi hosts. This is similar to the + * Scsi_Host_Template, except that we have one entry for each + * actual physical host adapter on the system, stored as a linked + * list. Note that if there are 2 aha1542 boards, then there will + * be two Scsi_Host entries, but only 1 Scsi_Host_Template entry. + */ + +struct Scsi_Host +{ +/* private: */ + /* + * This information is private to the scsi mid-layer. Wrapping it in a + * struct private is a way of marking it in a sort of C++ type of way. + */ + struct Scsi_Host * next; + Scsi_Device * host_queue; + + + struct task_struct * ehandler; /* Error recovery thread. */ + struct semaphore * eh_wait; /* The error recovery thread waits on + this. */ + struct semaphore * eh_notify; /* wait for eh to begin */ + struct semaphore * eh_action; /* Wait for specific actions on the + host. */ + unsigned int eh_active:1; /* Indicates the eh thread is awake and active if + this is true. */ + wait_queue_head_t host_wait; + Scsi_Host_Template * hostt; + atomic_t host_active; /* commands checked out */ + volatile unsigned short host_busy; /* commands actually active on low-level */ + volatile unsigned short host_failed; /* commands that failed. */ + +/* public: */ + unsigned short extra_bytes; + unsigned short host_no; /* Used for IOCTL_GET_IDLUN, /proc/scsi et al. */ + int resetting; /* if set, it means that last_reset is a valid value */ + unsigned long last_reset; + + + /* + * These three parameters can be used to allow for wide scsi, + * and for host adapters that support multiple busses + * The first two should be set to 1 more than the actual max id + * or lun (i.e. 8 for normal systems). + */ + unsigned int max_id; + unsigned int max_lun; + unsigned int max_channel; + + /* These parameters should be set by the detect routine */ + unsigned long base; + unsigned long io_port; + unsigned char n_io_port; + unsigned char dma_channel; + unsigned int irq; + + /* + * This is a unique identifier that must be assigned so that we + * have some way of identifying each detected host adapter properly + * and uniquely. For hosts that do not support more than one card + * in the system at one time, this does not need to be set. It is + * initialized to 0 in scsi_register. + */ + unsigned int unique_id; + + /* + * The rest can be copied from the template, or specifically + * initialized, as required. + */ + + /* + * The maximum length of SCSI commands that this host can accept. + * Probably 12 for most host adapters, but could be 16 for others. + * For drivers that don't set this field, a value of 12 is + * assumed. I am leaving this as a number rather than a bit + * because you never know what subsequent SCSI standards might do + * (i.e. could there be a 20 byte or a 24-byte command a few years + * down the road?). + */ + unsigned char max_cmd_len; + + int this_id; + int can_queue; + short cmd_per_lun; + short unsigned int sg_tablesize; + short unsigned int max_sectors; + + unsigned in_recovery:1; + unsigned unchecked_isa_dma:1; + unsigned use_clustering:1; + /* + * True if this host was loaded as a loadable module + */ + unsigned loaded_as_module:1; + + /* + * Host has rejected a command because it was busy. + */ + unsigned host_blocked:1; + + /* + * Host has requested that no further requests come through for the + * time being. + */ + unsigned host_self_blocked:1; + + /* + * Host uses correct SCSI ordering not PC ordering. The bit is + * set for the minority of drivers whose authors actually read the spec ;) + */ + unsigned reverse_ordering:1; + + /* + * Indicates that one or more devices on this host were starved, and + * when the device becomes less busy that we need to feed them. + */ + unsigned some_device_starved:1; + + void (*select_queue_depths)(struct Scsi_Host *, Scsi_Device *); + + /* + * For SCSI hosts which are PCI devices, set pci_dev so that + * we can do BIOS EDD 3.0 mappings + */ + struct pci_dev *pci_dev; + + /* + * We should ensure that this is aligned, both for better performance + * and also because some compilers (m68k) don't automatically force + * alignment to a long boundary. + */ + unsigned long hostdata[0] /* Used for storage of host specific stuff */ + __attribute__ ((aligned (sizeof(unsigned long)))); +}; + +/* + * These two functions are used to allocate and free a pseudo device + * which will connect to the host adapter itself rather than any + * physical device. You must deallocate when you are done with the + * thing. This physical pseudo-device isn't real and won't be available + * from any high-level drivers. + */ +extern void scsi_free_host_dev(Scsi_Device * SDpnt); +extern Scsi_Device * scsi_get_host_dev(struct Scsi_Host * SHpnt); + +extern void scsi_unblock_requests(struct Scsi_Host * SHpnt); +extern void scsi_block_requests(struct Scsi_Host * SHpnt); +extern void scsi_report_bus_reset(struct Scsi_Host * SHpnt, int channel); + +typedef struct SHN + { + struct SHN * next; + char * name; + unsigned short host_no; + unsigned short host_registered; + unsigned loaded_as_module; + } Scsi_Host_Name; + +extern Scsi_Host_Name * scsi_host_no_list; +extern struct Scsi_Host * scsi_hostlist; +extern struct Scsi_Device_Template * scsi_devicelist; + +extern Scsi_Host_Template * scsi_hosts; + +extern void build_proc_dir_entries(Scsi_Host_Template *); + +/* + * scsi_init initializes the scsi hosts. + */ + +extern int next_scsi_host; + +unsigned int scsi_init(void); +extern struct Scsi_Host * scsi_register(Scsi_Host_Template *, int j); +extern void scsi_unregister(struct Scsi_Host * i); + +extern void scsi_register_blocked_host(struct Scsi_Host * SHpnt); +extern void scsi_deregister_blocked_host(struct Scsi_Host * SHpnt); + +static inline void scsi_set_pci_device(struct Scsi_Host *SHpnt, + struct pci_dev *pdev) +{ + SHpnt->pci_dev = pdev; +} + + +/* + * Prototypes for functions/data in scsi_scan.c + */ +extern void scan_scsis(struct Scsi_Host *shpnt, + uint hardcoded, + uint hchannel, + uint hid, + uint hlun); + +extern void scsi_mark_host_reset(struct Scsi_Host *Host); + +#define BLANK_HOST {"", 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} + +struct Scsi_Device_Template +{ + struct Scsi_Device_Template * next; + const char * name; + const char * tag; + struct module * module; /* Used for loadable modules */ + unsigned char scsi_type; + unsigned int major; + unsigned int min_major; /* Minimum major in range. */ + unsigned int max_major; /* Maximum major in range. */ + unsigned int nr_dev; /* Number currently attached */ + unsigned int dev_noticed; /* Number of devices detected. */ + unsigned int dev_max; /* Current size of arrays */ + unsigned blk:1; /* 0 if character device */ + int (*detect)(Scsi_Device *); /* Returns 1 if we can attach this device */ + int (*init)(void); /* Sizes arrays based upon number of devices + * detected */ + void (*finish)(void); /* Perform initialization after attachment */ + int (*attach)(Scsi_Device *); /* Attach devices to arrays */ + void (*detach)(Scsi_Device *); + int (*init_command)(Scsi_Cmnd *); /* Used by new queueing code. + Selects command for blkdevs */ +}; + +void scsi_initialize_queue(Scsi_Device * SDpnt, struct Scsi_Host * SHpnt); + +int scsi_register_device(struct Scsi_Device_Template * sdpnt); +void scsi_deregister_device(struct Scsi_Device_Template * tpnt); + +/* These are used by loadable modules */ +extern int scsi_register_module(int, void *); +extern int scsi_unregister_module(int, void *); + +/* The different types of modules that we can load and unload */ +#define MODULE_SCSI_HA 1 +#define MODULE_SCSI_CONST 2 +#define MODULE_SCSI_IOCTL 3 +#define MODULE_SCSI_DEV 4 + + +/* + * This is an ugly hack. If we expect to be able to load devices at run time, + * we need to leave extra room in some of the data structures. Doing a + * realloc to enlarge the structures would be riddled with race conditions, + * so until a better solution is discovered, we use this crude approach + * + * Even bigger hack for SparcSTORAGE arrays. Those are at least 6 disks, but + * usually up to 30 disks, so everyone would need to change this. -jj + * + * Note: These things are all evil and all need to go away. My plan is to + * tackle the character devices first, as there aren't any locking implications + * in the block device layer. The block devices will require more work. + * + * The generics driver has been updated to resize as required. So as the tape + * driver. Two down, two more to go. + */ +#ifndef CONFIG_SD_EXTRA_DEVS +#define CONFIG_SD_EXTRA_DEVS 2 +#endif +#ifndef CONFIG_SR_EXTRA_DEVS +#define CONFIG_SR_EXTRA_DEVS 2 +#endif +#define SD_EXTRA_DEVS CONFIG_SD_EXTRA_DEVS +#define SR_EXTRA_DEVS CONFIG_SR_EXTRA_DEVS + +#endif +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-indent-level: 4 + * c-brace-imaginary-offset: 0 + * c-brace-offset: -4 + * c-argdecl-indent: 4 + * c-label-offset: -4 + * c-continued-statement-offset: 4 + * c-continued-brace-offset: 0 + * indent-tabs-mode: nil + * tab-width: 8 + * End: + */ diff --git a/xen-2.4.16/drivers/scsi/scsi.c b/xen-2.4.16/drivers/scsi/scsi.c new file mode 100644 index 0000000000..320b7cd1a9 --- /dev/null +++ b/xen-2.4.16/drivers/scsi/scsi.c @@ -0,0 +1,2845 @@ +/* + * scsi.c Copyright (C) 1992 Drew Eckhardt + * Copyright (C) 1993, 1994, 1995, 1999 Eric Youngdale + * + * generic mid-level SCSI driver + * Initial versions: Drew Eckhardt + * Subsequent revisions: Eric Youngdale + * + * + * + * Bug correction thanks go to : + * Rik Faith + * Tommy Thorn + * Thomas Wuensche + * + * Modified by Eric Youngdale eric@andante.org or ericy@gnu.ai.mit.edu to + * add scatter-gather, multiple outstanding request, and other + * enhancements. + * + * Native multichannel, wide scsi, /proc/scsi and hot plugging + * support added by Michael Neuffer + * + * Added request_module("scsi_hostadapter") for kerneld: + * (Put an "alias scsi_hostadapter your_hostadapter" in /etc/modules.conf) + * Bjorn Ekwall + * (changed to kmod) + * + * Major improvements to the timeout, abort, and reset processing, + * as well as performance modifications for large queue depths by + * Leonard N. Zubkoff + * + * Converted cli() code to spinlocks, Ingo Molnar + * + * Jiffies wrap fixes (host->resetting), 3 Dec 1998 Andrea Arcangeli + * + * out_of_space hacks, D. Gilbert (dpg) 990608 + */ + +#define REVISION "Revision: 1.00" +#define VERSION "Id: scsi.c 1.00 2000/09/26" + +#include +#include + +#include +#include +#include +#include +#include +/*#include */ +#include +#include +#include +#include +/*#include */ +/*#include */ + +#define __KERNEL_SYSCALLS__ + +/*#include */ +#include + +#include +#include +#include +#include + +#include "scsi.h" +#include "hosts.h" +#include "constants.h" + +#ifdef CONFIG_KMOD +#include +#endif + +#undef USE_STATIC_SCSI_MEMORY + +struct proc_dir_entry *proc_scsi; + +#ifdef CONFIG_PROC_FS +static int scsi_proc_info(char *buffer, char **start, off_t offset, int length); +static void scsi_dump_status(int level); +#endif + +/* + static const char RCSid[] = "$Header: /vger/u4/cvs/linux/drivers/scsi/scsi.c,v 1.38 1997/01/19 23:07:18 davem Exp $"; + */ + +/* + * Definitions and constants. + */ + +#define MIN_RESET_DELAY (2*HZ) + +/* Do not call reset on error if we just did a reset within 15 sec. */ +#define MIN_RESET_PERIOD (15*HZ) + +/* + * Macro to determine the size of SCSI command. This macro takes vendor + * unique commands into account. SCSI commands in groups 6 and 7 are + * vendor unique and we will depend upon the command length being + * supplied correctly in cmd_len. + */ +#define CDB_SIZE(SCpnt) ((((SCpnt->cmnd[0] >> 5) & 7) < 6) ? \ + COMMAND_SIZE(SCpnt->cmnd[0]) : SCpnt->cmd_len) + +/* + * Data declarations. + */ +unsigned long scsi_pid; +Scsi_Cmnd *last_cmnd; +/* Command group 3 is reserved and should never be used. */ +const unsigned char scsi_command_size[8] = +{ + 6, 10, 10, 12, + 16, 12, 10, 10 +}; +static unsigned long serial_number; +static Scsi_Cmnd *scsi_bh_queue_head; +static Scsi_Cmnd *scsi_bh_queue_tail; + +/* + * Note - the initial logging level can be set here to log events at boot time. + * After the system is up, you may enable logging via the /proc interface. + */ +unsigned int scsi_logging_level; + +const char *const scsi_device_types[MAX_SCSI_DEVICE_CODE] = +{ + "Direct-Access ", + "Sequential-Access", + "Printer ", + "Processor ", + "WORM ", + "CD-ROM ", + "Scanner ", + "Optical Device ", + "Medium Changer ", + "Communications ", + "Unknown ", + "Unknown ", + "Unknown ", + "Enclosure ", +}; + +/* + * Function prototypes. + */ +extern void scsi_times_out(Scsi_Cmnd * SCpnt); +void scsi_build_commandblocks(Scsi_Device * SDpnt); + +/* + * These are the interface to the old error handling code. It should go away + * someday soon. + */ +extern void scsi_old_done(Scsi_Cmnd * SCpnt); +extern void scsi_old_times_out(Scsi_Cmnd * SCpnt); +extern int scsi_old_reset(Scsi_Cmnd *SCpnt, unsigned int flag); + +/* + * Private interface into the new error handling code. + */ +extern int scsi_new_reset(Scsi_Cmnd *SCpnt, unsigned int flag); + +/* + * Function: scsi_initialize_queue() + * + * Purpose: Selects queue handler function for a device. + * + * Arguments: SDpnt - device for which we need a handler function. + * + * Returns: Nothing + * + * Lock status: No locking assumed or required. + * + * Notes: Most devices will end up using scsi_request_fn for the + * handler function (at least as things are done now). + * The "block" feature basically ensures that only one of + * the blocked hosts is active at one time, mainly to work around + * buggy DMA chipsets where the memory gets starved. + * For this case, we have a special handler function, which + * does some checks and ultimately calls scsi_request_fn. + * + * The single_lun feature is a similar special case. + * + * We handle these things by stacking the handlers. The + * special case handlers simply check a few conditions, + * and return if they are not supposed to do anything. + * In the event that things are OK, then they call the next + * handler in the list - ultimately they call scsi_request_fn + * to do the dirty deed. + */ +void scsi_initialize_queue(Scsi_Device * SDpnt, struct Scsi_Host * SHpnt) { + blk_init_queue(&SDpnt->request_queue, scsi_request_fn); + blk_queue_headactive(&SDpnt->request_queue, 0); + SDpnt->request_queue.queuedata = (void *) SDpnt; +} + +#ifdef MODULE +MODULE_PARM(scsi_logging_level, "i"); +MODULE_PARM_DESC(scsi_logging_level, "SCSI logging level; should be zero or nonzero"); + +#else + +static int __init scsi_logging_setup(char *str) +{ + int tmp; + + if (get_option(&str, &tmp) == 1) { + scsi_logging_level = (tmp ? ~0 : 0); + return 1; + } else { + printk(KERN_INFO "scsi_logging_setup : usage scsi_logging_level=n " + "(n should be 0 or non-zero)\n"); + return 0; + } +} + +__setup("scsi_logging=", scsi_logging_setup); + +#endif + +/* + * Issue a command and wait for it to complete + */ + +static void scsi_wait_done(Scsi_Cmnd * SCpnt) +{ + struct request *req; + + req = &SCpnt->request; + req->rq_status = RQ_SCSI_DONE; /* Busy, but indicate request done */ + + if (req->waiting != NULL) { + complete(req->waiting); + } +} + +/* + * This lock protects the freelist for all devices on the system. + * We could make this finer grained by having a single lock per + * device if it is ever found that there is excessive contention + * on this lock. + */ +static spinlock_t device_request_lock = SPIN_LOCK_UNLOCKED; + +/* + * Used to protect insertion into and removal from the queue of + * commands to be processed by the bottom half handler. + */ +static spinlock_t scsi_bhqueue_lock = SPIN_LOCK_UNLOCKED; + +/* + * Function: scsi_allocate_request + * + * Purpose: Allocate a request descriptor. + * + * Arguments: device - device for which we want a request + * + * Lock status: No locks assumed to be held. This function is SMP-safe. + * + * Returns: Pointer to request block. + * + * Notes: With the new queueing code, it becomes important + * to track the difference between a command and a + * request. A request is a pending item in the queue that + * has not yet reached the top of the queue. + */ + +Scsi_Request *scsi_allocate_request(Scsi_Device * device) +{ + Scsi_Request *SRpnt = NULL; + + if (!device) + panic("No device passed to scsi_allocate_request().\n"); + + SRpnt = (Scsi_Request *) kmalloc(sizeof(Scsi_Request), GFP_ATOMIC); + if( SRpnt == NULL ) + { + return NULL; + } + + memset(SRpnt, 0, sizeof(Scsi_Request)); + SRpnt->sr_device = device; + SRpnt->sr_host = device->host; + SRpnt->sr_magic = SCSI_REQ_MAGIC; + SRpnt->sr_data_direction = SCSI_DATA_UNKNOWN; + + return SRpnt; +} + +/* + * Function: scsi_release_request + * + * Purpose: Release a request descriptor. + * + * Arguments: device - device for which we want a request + * + * Lock status: No locks assumed to be held. This function is SMP-safe. + * + * Returns: Pointer to request block. + * + * Notes: With the new queueing code, it becomes important + * to track the difference between a command and a + * request. A request is a pending item in the queue that + * has not yet reached the top of the queue. We still need + * to free a request when we are done with it, of course. + */ +void scsi_release_request(Scsi_Request * req) +{ + if( req->sr_command != NULL ) + { + scsi_release_command(req->sr_command); + req->sr_command = NULL; + } + + kfree(req); +} + +/* + * Function: scsi_allocate_device + * + * Purpose: Allocate a command descriptor. + * + * Arguments: device - device for which we want a command descriptor + * wait - 1 if we should wait in the event that none + * are available. + * interruptible - 1 if we should unblock and return NULL + * in the event that we must wait, and a signal + * arrives. + * + * Lock status: No locks assumed to be held. This function is SMP-safe. + * + * Returns: Pointer to command descriptor. + * + * Notes: Prior to the new queue code, this function was not SMP-safe. + * + * If the wait flag is true, and we are waiting for a free + * command block, this function will interrupt and return + * NULL in the event that a signal arrives that needs to + * be handled. + * + * This function is deprecated, and drivers should be + * rewritten to use Scsi_Request instead of Scsi_Cmnd. + */ + +Scsi_Cmnd *scsi_allocate_device(Scsi_Device * device, int wait, + int interruptable) +{ + struct Scsi_Host *host; + Scsi_Cmnd *SCpnt = NULL; + Scsi_Device *SDpnt; + unsigned long flags; + + if (!device) + panic("No device passed to scsi_allocate_device().\n"); + + host = device->host; + + spin_lock_irqsave(&device_request_lock, flags); + + while (1 == 1) { + SCpnt = NULL; + if (!device->device_blocked) { + if (device->single_lun) { + /* + * FIXME(eric) - this is not at all optimal. Given that + * single lun devices are rare and usually slow + * (i.e. CD changers), this is good enough for now, but + * we may want to come back and optimize this later. + * + * Scan through all of the devices attached to this + * host, and see if any are active or not. If so, + * we need to defer this command. + * + * We really need a busy counter per device. This would + * allow us to more easily figure out whether we should + * do anything here or not. + */ + for (SDpnt = host->host_queue; + SDpnt; + SDpnt = SDpnt->next) { + /* + * Only look for other devices on the same bus + * with the same target ID. + */ + if (SDpnt->channel != device->channel + || SDpnt->id != device->id + || SDpnt == device) { + continue; + } + if( atomic_read(&SDpnt->device_active) != 0) + { + break; + } + } + if (SDpnt) { + /* + * Some other device in this cluster is busy. + * If asked to wait, we need to wait, otherwise + * return NULL. + */ + SCpnt = NULL; + goto busy; + } + } + /* + * Now we can check for a free command block for this device. + */ + for (SCpnt = device->device_queue; SCpnt; SCpnt = SCpnt->next) { + if (SCpnt->request.rq_status == RQ_INACTIVE) + break; + } + } + /* + * If we couldn't find a free command block, and we have been + * asked to wait, then do so. + */ + if (SCpnt) { + break; + } + busy: + /* + * If we have been asked to wait for a free block, then + * wait here. + */ + if (wait) { + DECLARE_WAITQUEUE(wait, current); + + /* + * We need to wait for a free commandblock. We need to + * insert ourselves into the list before we release the + * lock. This way if a block were released the same + * microsecond that we released the lock, the call + * to schedule() wouldn't block (well, it might switch, + * but the current task will still be schedulable. + */ + add_wait_queue(&device->scpnt_wait, &wait); + if( interruptable ) { + set_current_state(TASK_INTERRUPTIBLE); + } else { + set_current_state(TASK_UNINTERRUPTIBLE); + } + + spin_unlock_irqrestore(&device_request_lock, flags); + + /* + * This should block until a device command block + * becomes available. + */ + schedule(); + + spin_lock_irqsave(&device_request_lock, flags); + + remove_wait_queue(&device->scpnt_wait, &wait); + /* + * FIXME - Isn't this redundant?? Someone + * else will have forced the state back to running. + */ + set_current_state(TASK_RUNNING); + /* + * In the event that a signal has arrived that we need + * to consider, then simply return NULL. Everyone + * that calls us should be prepared for this + * possibility, and pass the appropriate code back + * to the user. + */ + if( interruptable ) { + if (signal_pending(current)) { + spin_unlock_irqrestore(&device_request_lock, flags); + return NULL; + } + } + } else { + spin_unlock_irqrestore(&device_request_lock, flags); + return NULL; + } + } + + SCpnt->request.rq_status = RQ_SCSI_BUSY; + SCpnt->request.waiting = NULL; /* And no one is waiting for this + * to complete */ + atomic_inc(&SCpnt->host->host_active); + atomic_inc(&SCpnt->device->device_active); + + SCpnt->buffer = NULL; + SCpnt->bufflen = 0; + SCpnt->request_buffer = NULL; + SCpnt->request_bufflen = 0; + + SCpnt->use_sg = 0; /* Reset the scatter-gather flag */ + SCpnt->old_use_sg = 0; + SCpnt->transfersize = 0; /* No default transfer size */ + SCpnt->cmd_len = 0; + + SCpnt->sc_data_direction = SCSI_DATA_UNKNOWN; + SCpnt->sc_request = NULL; + SCpnt->sc_magic = SCSI_CMND_MAGIC; + + SCpnt->result = 0; + SCpnt->underflow = 0; /* Do not flag underflow conditions */ + SCpnt->old_underflow = 0; + SCpnt->resid = 0; + SCpnt->state = SCSI_STATE_INITIALIZING; + SCpnt->owner = SCSI_OWNER_HIGHLEVEL; + + spin_unlock_irqrestore(&device_request_lock, flags); + + SCSI_LOG_MLQUEUE(5, printk("Activating command for device %d (%d)\n", + SCpnt->target, + atomic_read(&SCpnt->host->host_active))); + + return SCpnt; +} + +inline void __scsi_release_command(Scsi_Cmnd * SCpnt) +{ + unsigned long flags; + Scsi_Device * SDpnt; + + spin_lock_irqsave(&device_request_lock, flags); + + SDpnt = SCpnt->device; + + SCpnt->request.rq_status = RQ_INACTIVE; + SCpnt->state = SCSI_STATE_UNUSED; + SCpnt->owner = SCSI_OWNER_NOBODY; + atomic_dec(&SCpnt->host->host_active); + atomic_dec(&SDpnt->device_active); + + SCSI_LOG_MLQUEUE(5, printk("Deactivating command for device %d (active=%d, failed=%d)\n", + SCpnt->target, + atomic_read(&SCpnt->host->host_active), + SCpnt->host->host_failed)); + if (SCpnt->host->host_failed != 0) { + SCSI_LOG_ERROR_RECOVERY(5, printk("Error handler thread %d %d\n", + SCpnt->host->in_recovery, + SCpnt->host->eh_active)); + } + /* + * If the host is having troubles, then look to see if this was the last + * command that might have failed. If so, wake up the error handler. + */ + if (SCpnt->host->in_recovery + && !SCpnt->host->eh_active + && SCpnt->host->host_busy == SCpnt->host->host_failed) { + SCSI_LOG_ERROR_RECOVERY(5, printk("Waking error handler thread (%d)\n", + atomic_read(&SCpnt->host->eh_wait->count))); + up(SCpnt->host->eh_wait); + } + + spin_unlock_irqrestore(&device_request_lock, flags); + + /* + * Wake up anyone waiting for this device. Do this after we + * have released the lock, as they will need it as soon as + * they wake up. + */ + wake_up(&SDpnt->scpnt_wait); +} + +/* + * Function: scsi_release_command + * + * Purpose: Release a command block. + * + * Arguments: SCpnt - command block we are releasing. + * + * Notes: The command block can no longer be used by the caller once + * this funciton is called. This is in effect the inverse + * of scsi_allocate_device. Note that we also must perform + * a couple of additional tasks. We must first wake up any + * processes that might have blocked waiting for a command + * block, and secondly we must hit the queue handler function + * to make sure that the device is busy. Note - there is an + * option to not do this - there were instances where we could + * recurse too deeply and blow the stack if this happened + * when we were indirectly called from the request function + * itself. + * + * The idea is that a lot of the mid-level internals gunk + * gets hidden in this function. Upper level drivers don't + * have any chickens to wave in the air to get things to + * work reliably. + * + * This function is deprecated, and drivers should be + * rewritten to use Scsi_Request instead of Scsi_Cmnd. + */ +void scsi_release_command(Scsi_Cmnd * SCpnt) +{ + request_queue_t *q; + Scsi_Device * SDpnt; + + SDpnt = SCpnt->device; + + __scsi_release_command(SCpnt); + + /* + * Finally, hit the queue request function to make sure that + * the device is actually busy if there are requests present. + * This won't block - if the device cannot take any more, life + * will go on. + */ + q = &SDpnt->request_queue; + scsi_queue_next_request(q, NULL); +} + +/* + * Function: scsi_dispatch_command + * + * Purpose: Dispatch a command to the low-level driver. + * + * Arguments: SCpnt - command block we are dispatching. + * + * Notes: + */ +int scsi_dispatch_cmd(Scsi_Cmnd * SCpnt) +{ +#ifdef DEBUG_DELAY + unsigned long clock; +#endif + struct Scsi_Host *host; + int rtn = 0; + unsigned long flags = 0; + unsigned long timeout; + + ASSERT_LOCK(&io_request_lock, 0); + +#if DEBUG + unsigned long *ret = 0; +#ifdef __mips__ + __asm__ __volatile__("move\t%0,$31":"=r"(ret)); +#else + ret = __builtin_return_address(0); +#endif +#endif + + host = SCpnt->host; + + /* Assign a unique nonzero serial_number. */ + if (++serial_number == 0) + serial_number = 1; + SCpnt->serial_number = serial_number; + SCpnt->pid = scsi_pid++; + + /* + * We will wait MIN_RESET_DELAY clock ticks after the last reset so + * we can avoid the drive not being ready. + */ + timeout = host->last_reset + MIN_RESET_DELAY; + + if (host->resetting && time_before(jiffies, timeout)) { + int ticks_remaining = timeout - jiffies; + /* + * NOTE: This may be executed from within an interrupt + * handler! This is bad, but for now, it'll do. The irq + * level of the interrupt handler has been masked out by the + * platform dependent interrupt handling code already, so the + * sti() here will not cause another call to the SCSI host's + * interrupt handler (assuming there is one irq-level per + * host). + */ + while (--ticks_remaining >= 0) + mdelay(1 + 999 / HZ); + host->resetting = 0; + } + if (host->hostt->use_new_eh_code) { + scsi_add_timer(SCpnt, SCpnt->timeout_per_command, scsi_times_out); + } else { + scsi_add_timer(SCpnt, SCpnt->timeout_per_command, + scsi_old_times_out); + } + + /* + * We will use a queued command if possible, otherwise we will emulate the + * queuing and calling of completion function ourselves. + */ + SCSI_LOG_MLQUEUE(3, printk("scsi_dispatch_cmnd (host = %d, channel = %d, target = %d, " + "command = %p, buffer = %p, \nbufflen = %d, done = %p)\n", + SCpnt->host->host_no, SCpnt->channel, SCpnt->target, SCpnt->cmnd, + SCpnt->buffer, SCpnt->bufflen, SCpnt->done)); + + SCpnt->state = SCSI_STATE_QUEUED; + SCpnt->owner = SCSI_OWNER_LOWLEVEL; + if (host->can_queue) { + SCSI_LOG_MLQUEUE(3, printk("queuecommand : routine at %p\n", + host->hostt->queuecommand)); + /* + * Use the old error handling code if we haven't converted the driver + * to use the new one yet. Note - only the new queuecommand variant + * passes a meaningful return value. + */ + if (host->hostt->use_new_eh_code) { + /* + * Before we queue this command, check if the command + * length exceeds what the host adapter can handle. + */ + if (CDB_SIZE(SCpnt) <= SCpnt->host->max_cmd_len) { + spin_lock_irqsave(&io_request_lock, flags); + rtn = host->hostt->queuecommand(SCpnt, scsi_done); + spin_unlock_irqrestore(&io_request_lock, flags); + if (rtn != 0) { + scsi_delete_timer(SCpnt); + scsi_mlqueue_insert(SCpnt, SCSI_MLQUEUE_HOST_BUSY); + SCSI_LOG_MLQUEUE(3, printk("queuecommand : request rejected\n")); + } + } else { + SCSI_LOG_MLQUEUE(3, printk("queuecommand : command too long.\n")); + SCpnt->result = (DID_ABORT << 16); + spin_lock_irqsave(&io_request_lock, flags); + scsi_done(SCpnt); + spin_unlock_irqrestore(&io_request_lock, flags); + rtn = 1; + } + } else { + /* + * Before we queue this command, check if the command + * length exceeds what the host adapter can handle. + */ + if (CDB_SIZE(SCpnt) <= SCpnt->host->max_cmd_len) { + spin_lock_irqsave(&io_request_lock, flags); + host->hostt->queuecommand(SCpnt, scsi_old_done); + spin_unlock_irqrestore(&io_request_lock, flags); + } else { + SCSI_LOG_MLQUEUE(3, printk("queuecommand : command too long.\n")); + SCpnt->result = (DID_ABORT << 16); + spin_lock_irqsave(&io_request_lock, flags); + scsi_old_done(SCpnt); + spin_unlock_irqrestore(&io_request_lock, flags); + rtn = 1; + } + } + } else { + int temp; + + SCSI_LOG_MLQUEUE(3, printk("command() : routine at %p\n", host->hostt->command)); + spin_lock_irqsave(&io_request_lock, flags); + temp = host->hostt->command(SCpnt); + SCpnt->result = temp; +#ifdef DEBUG_DELAY + spin_unlock_irqrestore(&io_request_lock, flags); + clock = jiffies + 4 * HZ; + while (time_before(jiffies, clock)) { + barrier(); + cpu_relax(); + } + printk("done(host = %d, result = %04x) : routine at %p\n", + host->host_no, temp, host->hostt->command); + spin_lock_irqsave(&io_request_lock, flags); +#endif + if (host->hostt->use_new_eh_code) { + scsi_done(SCpnt); + } else { + scsi_old_done(SCpnt); + } + spin_unlock_irqrestore(&io_request_lock, flags); + } + SCSI_LOG_MLQUEUE(3, printk("leaving scsi_dispatch_cmnd()\n")); + return rtn; +} + +devfs_handle_t scsi_devfs_handle; + +/* + * scsi_do_cmd sends all the commands out to the low-level driver. It + * handles the specifics required for each low level driver - ie queued + * or non queued. It also prevents conflicts when different high level + * drivers go for the same host at the same time. + */ + +void scsi_wait_req (Scsi_Request * SRpnt, const void *cmnd , + void *buffer, unsigned bufflen, + int timeout, int retries) +{ + DECLARE_COMPLETION(wait); + request_queue_t *q = &SRpnt->sr_device->request_queue; + + SRpnt->sr_request.waiting = &wait; + SRpnt->sr_request.rq_status = RQ_SCSI_BUSY; + scsi_do_req (SRpnt, (void *) cmnd, + buffer, bufflen, scsi_wait_done, timeout, retries); + generic_unplug_device(q); + wait_for_completion(&wait); + SRpnt->sr_request.waiting = NULL; + if( SRpnt->sr_command != NULL ) + { + scsi_release_command(SRpnt->sr_command); + SRpnt->sr_command = NULL; + } + +} + +/* + * Function: scsi_do_req + * + * Purpose: Queue a SCSI request + * + * Arguments: SRpnt - command descriptor. + * cmnd - actual SCSI command to be performed. + * buffer - data buffer. + * bufflen - size of data buffer. + * done - completion function to be run. + * timeout - how long to let it run before timeout. + * retries - number of retries we allow. + * + * Lock status: With the new queueing code, this is SMP-safe, and no locks + * need be held upon entry. The old queueing code the lock was + * assumed to be held upon entry. + * + * Returns: Nothing. + * + * Notes: Prior to the new queue code, this function was not SMP-safe. + * Also, this function is now only used for queueing requests + * for things like ioctls and character device requests - this + * is because we essentially just inject a request into the + * queue for the device. Normal block device handling manipulates + * the queue directly. + */ +void scsi_do_req(Scsi_Request * SRpnt, const void *cmnd, + void *buffer, unsigned bufflen, void (*done) (Scsi_Cmnd *), + int timeout, int retries) +{ + Scsi_Device * SDpnt = SRpnt->sr_device; + struct Scsi_Host *host = SDpnt->host; + + ASSERT_LOCK(&io_request_lock, 0); + + SCSI_LOG_MLQUEUE(4, + { + int i; + int target = SDpnt->id; + int size = COMMAND_SIZE(((const unsigned char *)cmnd)[0]); + printk("scsi_do_req (host = %d, channel = %d target = %d, " + "buffer =%p, bufflen = %d, done = %p, timeout = %d, " + "retries = %d)\n" + "command : ", host->host_no, SDpnt->channel, target, buffer, + bufflen, done, timeout, retries); + for (i = 0; i < size; ++i) + printk("%02x ", ((unsigned char *) cmnd)[i]); + printk("\n"); + }); + + if (!host) { + panic("Invalid or not present host.\n"); + } + + /* + * If the upper level driver is reusing these things, then + * we should release the low-level block now. Another one will + * be allocated later when this request is getting queued. + */ + if( SRpnt->sr_command != NULL ) + { + scsi_release_command(SRpnt->sr_command); + SRpnt->sr_command = NULL; + } + + /* + * We must prevent reentrancy to the lowlevel host driver. This prevents + * it - we enter a loop until the host we want to talk to is not busy. + * Race conditions are prevented, as interrupts are disabled in between the + * time we check for the host being not busy, and the time we mark it busy + * ourselves. + */ + + + /* + * Our own function scsi_done (which marks the host as not busy, disables + * the timeout counter, etc) will be called by us or by the + * scsi_hosts[host].queuecommand() function needs to also call + * the completion function for the high level driver. + */ + + memcpy((void *) SRpnt->sr_cmnd, (const void *) cmnd, + sizeof(SRpnt->sr_cmnd)); + SRpnt->sr_bufflen = bufflen; + SRpnt->sr_buffer = buffer; + SRpnt->sr_allowed = retries; + SRpnt->sr_done = done; + SRpnt->sr_timeout_per_command = timeout; + + if (SRpnt->sr_cmd_len == 0) + SRpnt->sr_cmd_len = COMMAND_SIZE(SRpnt->sr_cmnd[0]); + + /* + * At this point, we merely set up the command, stick it in the normal + * request queue, and return. Eventually that request will come to the + * top of the list, and will be dispatched. + */ + scsi_insert_special_req(SRpnt, 0); + + SCSI_LOG_MLQUEUE(3, printk("Leaving scsi_do_req()\n")); +} + +/* + * Function: scsi_init_cmd_from_req + * + * Purpose: Queue a SCSI command + * Purpose: Initialize a Scsi_Cmnd from a Scsi_Request + * + * Arguments: SCpnt - command descriptor. + * SRpnt - Request from the queue. + * + * Lock status: None needed. + * + * Returns: Nothing. + * + * Notes: Mainly transfer data from the request structure to the + * command structure. The request structure is allocated + * using the normal memory allocator, and requests can pile + * up to more or less any depth. The command structure represents + * a consumable resource, as these are allocated into a pool + * when the SCSI subsystem initializes. The preallocation is + * required so that in low-memory situations a disk I/O request + * won't cause the memory manager to try and write out a page. + * The request structure is generally used by ioctls and character + * devices. + */ +void scsi_init_cmd_from_req(Scsi_Cmnd * SCpnt, Scsi_Request * SRpnt) +{ + struct Scsi_Host *host = SCpnt->host; + + ASSERT_LOCK(&io_request_lock, 0); + + SCpnt->owner = SCSI_OWNER_MIDLEVEL; + SRpnt->sr_command = SCpnt; + + if (!host) { + panic("Invalid or not present host.\n"); + } + + SCpnt->cmd_len = SRpnt->sr_cmd_len; + SCpnt->use_sg = SRpnt->sr_use_sg; + + memcpy((void *) &SCpnt->request, (const void *) &SRpnt->sr_request, + sizeof(SRpnt->sr_request)); + memcpy((void *) SCpnt->data_cmnd, (const void *) SRpnt->sr_cmnd, + sizeof(SCpnt->data_cmnd)); + SCpnt->reset_chain = NULL; + SCpnt->serial_number = 0; + SCpnt->serial_number_at_timeout = 0; + SCpnt->bufflen = SRpnt->sr_bufflen; + SCpnt->buffer = SRpnt->sr_buffer; + SCpnt->flags = 0; + SCpnt->retries = 0; + SCpnt->allowed = SRpnt->sr_allowed; + SCpnt->done = SRpnt->sr_done; + SCpnt->timeout_per_command = SRpnt->sr_timeout_per_command; + + SCpnt->sc_data_direction = SRpnt->sr_data_direction; + + SCpnt->sglist_len = SRpnt->sr_sglist_len; + SCpnt->underflow = SRpnt->sr_underflow; + + SCpnt->sc_request = SRpnt; + + memcpy((void *) SCpnt->cmnd, (const void *) SRpnt->sr_cmnd, + sizeof(SCpnt->cmnd)); + /* Zero the sense buffer. Some host adapters automatically request + * sense on error. 0 is not a valid sense code. + */ + memset((void *) SCpnt->sense_buffer, 0, sizeof SCpnt->sense_buffer); + SCpnt->request_buffer = SRpnt->sr_buffer; + SCpnt->request_bufflen = SRpnt->sr_bufflen; + SCpnt->old_use_sg = SCpnt->use_sg; + if (SCpnt->cmd_len == 0) + SCpnt->cmd_len = COMMAND_SIZE(SCpnt->cmnd[0]); + SCpnt->old_cmd_len = SCpnt->cmd_len; + SCpnt->sc_old_data_direction = SCpnt->sc_data_direction; + SCpnt->old_underflow = SCpnt->underflow; + + /* Start the timer ticking. */ + + SCpnt->internal_timeout = NORMAL_TIMEOUT; + SCpnt->abort_reason = 0; + SCpnt->result = 0; + + SCSI_LOG_MLQUEUE(3, printk("Leaving scsi_init_cmd_from_req()\n")); +} + +/* + * Function: scsi_do_cmd + * + * Purpose: Queue a SCSI command + * + * Arguments: SCpnt - command descriptor. + * cmnd - actual SCSI command to be performed. + * buffer - data buffer. + * bufflen - size of data buffer. + * done - completion function to be run. + * timeout - how long to let it run before timeout. + * retries - number of retries we allow. + * + * Lock status: With the new queueing code, this is SMP-safe, and no locks + * need be held upon entry. The old queueing code the lock was + * assumed to be held upon entry. + * + * Returns: Nothing. + * + * Notes: Prior to the new queue code, this function was not SMP-safe. + * Also, this function is now only used for queueing requests + * for things like ioctls and character device requests - this + * is because we essentially just inject a request into the + * queue for the device. Normal block device handling manipulates + * the queue directly. + */ +void scsi_do_cmd(Scsi_Cmnd * SCpnt, const void *cmnd, + void *buffer, unsigned bufflen, void (*done) (Scsi_Cmnd *), + int timeout, int retries) +{ + struct Scsi_Host *host = SCpnt->host; + + ASSERT_LOCK(&io_request_lock, 0); + + SCpnt->pid = scsi_pid++; + SCpnt->owner = SCSI_OWNER_MIDLEVEL; + + SCSI_LOG_MLQUEUE(4, + { + int i; + int target = SCpnt->target; + int size = COMMAND_SIZE(((const unsigned char *)cmnd)[0]); + printk("scsi_do_cmd (host = %d, channel = %d target = %d, " + "buffer =%p, bufflen = %d, done = %p, timeout = %d, " + "retries = %d)\n" + "command : ", host->host_no, SCpnt->channel, target, buffer, + bufflen, done, timeout, retries); + for (i = 0; i < size; ++i) + printk("%02x ", ((unsigned char *) cmnd)[i]); + printk("\n"); + }); + + if (!host) { + panic("Invalid or not present host.\n"); + } + /* + * We must prevent reentrancy to the lowlevel host driver. This prevents + * it - we enter a loop until the host we want to talk to is not busy. + * Race conditions are prevented, as interrupts are disabled in between the + * time we check for the host being not busy, and the time we mark it busy + * ourselves. + */ + + + /* + * Our own function scsi_done (which marks the host as not busy, disables + * the timeout counter, etc) will be called by us or by the + * scsi_hosts[host].queuecommand() function needs to also call + * the completion function for the high level driver. + */ + + memcpy((void *) SCpnt->data_cmnd, (const void *) cmnd, + sizeof(SCpnt->data_cmnd)); + SCpnt->reset_chain = NULL; + SCpnt->serial_number = 0; + SCpnt->serial_number_at_timeout = 0; + SCpnt->bufflen = bufflen; + SCpnt->buffer = buffer; + SCpnt->flags = 0; + SCpnt->retries = 0; + SCpnt->allowed = retries; + SCpnt->done = done; + SCpnt->timeout_per_command = timeout; + + memcpy((void *) SCpnt->cmnd, (const void *) cmnd, + sizeof(SCpnt->cmnd)); + /* Zero the sense buffer. Some host adapters automatically request + * sense on error. 0 is not a valid sense code. + */ + memset((void *) SCpnt->sense_buffer, 0, sizeof SCpnt->sense_buffer); + SCpnt->request_buffer = buffer; + SCpnt->request_bufflen = bufflen; + SCpnt->old_use_sg = SCpnt->use_sg; + if (SCpnt->cmd_len == 0) + SCpnt->cmd_len = COMMAND_SIZE(SCpnt->cmnd[0]); + SCpnt->old_cmd_len = SCpnt->cmd_len; + SCpnt->sc_old_data_direction = SCpnt->sc_data_direction; + SCpnt->old_underflow = SCpnt->underflow; + + /* Start the timer ticking. */ + + SCpnt->internal_timeout = NORMAL_TIMEOUT; + SCpnt->abort_reason = 0; + SCpnt->result = 0; + + /* + * At this point, we merely set up the command, stick it in the normal + * request queue, and return. Eventually that request will come to the + * top of the list, and will be dispatched. + */ + scsi_insert_special_cmd(SCpnt, 0); + + SCSI_LOG_MLQUEUE(3, printk("Leaving scsi_do_cmd()\n")); +} + +/* + * This function is the mid-level interrupt routine, which decides how + * to handle error conditions. Each invocation of this function must + * do one and *only* one of the following: + * + * 1) Insert command in BH queue. + * 2) Activate error handler for host. + * + * FIXME(eric) - I am concerned about stack overflow (still). An + * interrupt could come while we are processing the bottom queue, + * which would cause another command to be stuffed onto the bottom + * queue, and it would in turn be processed as that interrupt handler + * is returning. Given a sufficiently steady rate of returning + * commands, this could cause the stack to overflow. I am not sure + * what is the most appropriate solution here - we should probably + * keep a depth count, and not process any commands while we still + * have a bottom handler active higher in the stack. + * + * There is currently code in the bottom half handler to monitor + * recursion in the bottom handler and report if it ever happens. If + * this becomes a problem, it won't be hard to engineer something to + * deal with it so that only the outer layer ever does any real + * processing. + */ +void scsi_done(Scsi_Cmnd * SCpnt) +{ + unsigned long flags; + int tstatus; + + /* + * We don't have to worry about this one timing out any more. + */ + tstatus = scsi_delete_timer(SCpnt); + + /* + * If we are unable to remove the timer, it means that the command + * has already timed out. In this case, we have no choice but to + * let the timeout function run, as we have no idea where in fact + * that function could really be. It might be on another processor, + * etc, etc. + */ + if (!tstatus) { + SCpnt->done_late = 1; + return; + } + /* Set the serial numbers back to zero */ + SCpnt->serial_number = 0; + + /* + * First, see whether this command already timed out. If so, we ignore + * the response. We treat it as if the command never finished. + * + * Since serial_number is now 0, the error handler cound detect this + * situation and avoid to call the low level driver abort routine. + * (DB) + * + * FIXME(eric) - I believe that this test is now redundant, due to + * the test of the return status of del_timer(). + */ + if (SCpnt->state == SCSI_STATE_TIMEOUT) { + SCSI_LOG_MLCOMPLETE(1, printk("Ignoring completion of %p due to timeout status", SCpnt)); + return; + } + spin_lock_irqsave(&scsi_bhqueue_lock, flags); + + SCpnt->serial_number_at_timeout = 0; + SCpnt->state = SCSI_STATE_BHQUEUE; + SCpnt->owner = SCSI_OWNER_BH_HANDLER; + SCpnt->bh_next = NULL; + + /* + * Next, put this command in the BH queue. + * + * We need a spinlock here, or compare and exchange if we can reorder incoming + * Scsi_Cmnds, as it happens pretty often scsi_done is called multiple times + * before bh is serviced. -jj + * + * We already have the io_request_lock here, since we are called from the + * interrupt handler or the error handler. (DB) + * + * This may be true at the moment, but I would like to wean all of the low + * level drivers away from using io_request_lock. Technically they should + * all use their own locking. I am adding a small spinlock to protect + * this datastructure to make it safe for that day. (ERY) + */ + if (!scsi_bh_queue_head) { + scsi_bh_queue_head = SCpnt; + scsi_bh_queue_tail = SCpnt; + } else { + scsi_bh_queue_tail->bh_next = SCpnt; + scsi_bh_queue_tail = SCpnt; + } + + spin_unlock_irqrestore(&scsi_bhqueue_lock, flags); + /* + * Mark the bottom half handler to be run. + */ + mark_bh(SCSI_BH); +} + +/* + * Procedure: scsi_bottom_half_handler + * + * Purpose: Called after we have finished processing interrupts, it + * performs post-interrupt handling for commands that may + * have completed. + * + * Notes: This is called with all interrupts enabled. This should reduce + * interrupt latency, stack depth, and reentrancy of the low-level + * drivers. + * + * The io_request_lock is required in all the routine. There was a subtle + * race condition when scsi_done is called after a command has already + * timed out but before the time out is processed by the error handler. + * (DB) + * + * I believe I have corrected this. We simply monitor the return status of + * del_timer() - if this comes back as 0, it means that the timer has fired + * and that a timeout is in progress. I have modified scsi_done() such + * that in this instance the command is never inserted in the bottom + * half queue. Thus the only time we hold the lock here is when + * we wish to atomically remove the contents of the queue. + */ +void scsi_bottom_half_handler(void) +{ + Scsi_Cmnd *SCpnt; + Scsi_Cmnd *SCnext; + unsigned long flags; + + + while (1 == 1) { + spin_lock_irqsave(&scsi_bhqueue_lock, flags); + SCpnt = scsi_bh_queue_head; + scsi_bh_queue_head = NULL; + spin_unlock_irqrestore(&scsi_bhqueue_lock, flags); + + if (SCpnt == NULL) { + return; + } + SCnext = SCpnt->bh_next; + + for (; SCpnt; SCpnt = SCnext) { + SCnext = SCpnt->bh_next; + + switch (scsi_decide_disposition(SCpnt)) { + case SUCCESS: + /* + * Add to BH queue. + */ + SCSI_LOG_MLCOMPLETE(3, printk("Command finished %d %d 0x%x\n", SCpnt->host->host_busy, + SCpnt->host->host_failed, + SCpnt->result)); + + scsi_finish_command(SCpnt); + break; + case NEEDS_RETRY: + /* + * We only come in here if we want to retry a command. The + * test to see whether the command should be retried should be + * keeping track of the number of tries, so we don't end up looping, + * of course. + */ + SCSI_LOG_MLCOMPLETE(3, printk("Command needs retry %d %d 0x%x\n", SCpnt->host->host_busy, + SCpnt->host->host_failed, SCpnt->result)); + + scsi_retry_command(SCpnt); + break; + case ADD_TO_MLQUEUE: + /* + * This typically happens for a QUEUE_FULL message - + * typically only when the queue depth is only + * approximate for a given device. Adding a command + * to the queue for the device will prevent further commands + * from being sent to the device, so we shouldn't end up + * with tons of things being sent down that shouldn't be. + */ + SCSI_LOG_MLCOMPLETE(3, printk("Command rejected as device queue full, put on ml queue %p\n", + SCpnt)); + scsi_mlqueue_insert(SCpnt, SCSI_MLQUEUE_DEVICE_BUSY); + break; + default: + /* + * Here we have a fatal error of some sort. Turn it over to + * the error handler. + */ + SCSI_LOG_MLCOMPLETE(3, printk("Command failed %p %x active=%d busy=%d failed=%d\n", + SCpnt, SCpnt->result, + atomic_read(&SCpnt->host->host_active), + SCpnt->host->host_busy, + SCpnt->host->host_failed)); + + /* + * Dump the sense information too. + */ + if ((status_byte(SCpnt->result) & CHECK_CONDITION) != 0) { + SCSI_LOG_MLCOMPLETE(3, print_sense("bh", SCpnt)); + } + if (SCpnt->host->eh_wait != NULL) { + SCpnt->host->host_failed++; + SCpnt->owner = SCSI_OWNER_ERROR_HANDLER; + SCpnt->state = SCSI_STATE_FAILED; + SCpnt->host->in_recovery = 1; + /* + * If the host is having troubles, then look to see if this was the last + * command that might have failed. If so, wake up the error handler. + */ + if (SCpnt->host->host_busy == SCpnt->host->host_failed) { + SCSI_LOG_ERROR_RECOVERY(5, printk("Waking error handler thread (%d)\n", + atomic_read(&SCpnt->host->eh_wait->count))); + up(SCpnt->host->eh_wait); + } + } else { + /* + * We only get here if the error recovery thread has died. + */ + scsi_finish_command(SCpnt); + } + } + } /* for(; SCpnt...) */ + + } /* while(1==1) */ + +} + +/* + * Function: scsi_retry_command + * + * Purpose: Send a command back to the low level to be retried. + * + * Notes: This command is always executed in the context of the + * bottom half handler, or the error handler thread. Low + * level drivers should not become re-entrant as a result of + * this. + */ +int scsi_retry_command(Scsi_Cmnd * SCpnt) +{ + memcpy((void *) SCpnt->cmnd, (void *) SCpnt->data_cmnd, + sizeof(SCpnt->data_cmnd)); + SCpnt->request_buffer = SCpnt->buffer; + SCpnt->request_bufflen = SCpnt->bufflen; + SCpnt->use_sg = SCpnt->old_use_sg; + SCpnt->cmd_len = SCpnt->old_cmd_len; + SCpnt->sc_data_direction = SCpnt->sc_old_data_direction; + SCpnt->underflow = SCpnt->old_underflow; + + /* + * Zero the sense information from the last time we tried + * this command. + */ + memset((void *) SCpnt->sense_buffer, 0, sizeof SCpnt->sense_buffer); + + return scsi_dispatch_cmd(SCpnt); +} + +/* + * Function: scsi_finish_command + * + * Purpose: Pass command off to upper layer for finishing of I/O + * request, waking processes that are waiting on results, + * etc. + */ +void scsi_finish_command(Scsi_Cmnd * SCpnt) +{ + struct Scsi_Host *host; + Scsi_Device *device; + Scsi_Request * SRpnt; + unsigned long flags; + + ASSERT_LOCK(&io_request_lock, 0); + + host = SCpnt->host; + device = SCpnt->device; + + /* + * We need to protect the decrement, as otherwise a race condition + * would exist. Fiddling with SCpnt isn't a problem as the + * design only allows a single SCpnt to be active in only + * one execution context, but the device and host structures are + * shared. + */ + spin_lock_irqsave(&io_request_lock, flags); + host->host_busy--; /* Indicate that we are free */ + device->device_busy--; /* Decrement device usage counter. */ + spin_unlock_irqrestore(&io_request_lock, flags); + + /* + * Clear the flags which say that the device/host is no longer + * capable of accepting new commands. These are set in scsi_queue.c + * for both the queue full condition on a device, and for a + * host full condition on the host. + */ + host->host_blocked = FALSE; + device->device_blocked = FALSE; + + /* + * If we have valid sense information, then some kind of recovery + * must have taken place. Make a note of this. + */ + if (scsi_sense_valid(SCpnt)) { + SCpnt->result |= (DRIVER_SENSE << 24); + } + SCSI_LOG_MLCOMPLETE(3, printk("Notifying upper driver of completion for device %d %x\n", + SCpnt->device->id, SCpnt->result)); + + SCpnt->owner = SCSI_OWNER_HIGHLEVEL; + SCpnt->state = SCSI_STATE_FINISHED; + + /* We can get here with use_sg=0, causing a panic in the upper level (DB) */ + SCpnt->use_sg = SCpnt->old_use_sg; + + /* + * If there is an associated request structure, copy the data over before we call the + * completion function. + */ + SRpnt = SCpnt->sc_request; + if( SRpnt != NULL ) { + SRpnt->sr_result = SRpnt->sr_command->result; + if( SRpnt->sr_result != 0 ) { + memcpy(SRpnt->sr_sense_buffer, + SRpnt->sr_command->sense_buffer, + sizeof(SRpnt->sr_sense_buffer)); + } + } + + SCpnt->done(SCpnt); +} + +static int scsi_register_host(Scsi_Host_Template *); +static int scsi_unregister_host(Scsi_Host_Template *); + +/* + * Function: scsi_release_commandblocks() + * + * Purpose: Release command blocks associated with a device. + * + * Arguments: SDpnt - device + * + * Returns: Nothing + * + * Lock status: No locking assumed or required. + * + * Notes: + */ +void scsi_release_commandblocks(Scsi_Device * SDpnt) +{ + Scsi_Cmnd *SCpnt, *SCnext; + unsigned long flags; + + spin_lock_irqsave(&device_request_lock, flags); + for (SCpnt = SDpnt->device_queue; SCpnt; SCpnt = SCnext) { + SDpnt->device_queue = SCnext = SCpnt->next; + kfree((char *) SCpnt); + } + SDpnt->has_cmdblocks = 0; + SDpnt->queue_depth = 0; + spin_unlock_irqrestore(&device_request_lock, flags); +} + +/* + * Function: scsi_build_commandblocks() + * + * Purpose: Allocate command blocks associated with a device. + * + * Arguments: SDpnt - device + * + * Returns: Nothing + * + * Lock status: No locking assumed or required. + * + * Notes: + */ +void scsi_build_commandblocks(Scsi_Device * SDpnt) +{ + unsigned long flags; + struct Scsi_Host *host = SDpnt->host; + int j; + Scsi_Cmnd *SCpnt; + + spin_lock_irqsave(&device_request_lock, flags); + + if (SDpnt->queue_depth == 0) + { + SDpnt->queue_depth = host->cmd_per_lun; + if (SDpnt->queue_depth == 0) + SDpnt->queue_depth = 1; /* live to fight another day */ + } + SDpnt->device_queue = NULL; + + for (j = 0; j < SDpnt->queue_depth; j++) { + SCpnt = (Scsi_Cmnd *) + kmalloc(sizeof(Scsi_Cmnd), + GFP_ATOMIC | + (host->unchecked_isa_dma ? GFP_DMA : 0)); + if (NULL == SCpnt) + break; /* If not, the next line will oops ... */ + memset(SCpnt, 0, sizeof(Scsi_Cmnd)); + SCpnt->host = host; + SCpnt->device = SDpnt; + SCpnt->target = SDpnt->id; + SCpnt->lun = SDpnt->lun; + SCpnt->channel = SDpnt->channel; + SCpnt->request.rq_status = RQ_INACTIVE; + SCpnt->use_sg = 0; + SCpnt->old_use_sg = 0; + SCpnt->old_cmd_len = 0; + SCpnt->underflow = 0; + SCpnt->old_underflow = 0; + SCpnt->transfersize = 0; + SCpnt->resid = 0; + SCpnt->serial_number = 0; + SCpnt->serial_number_at_timeout = 0; + SCpnt->host_scribble = NULL; + SCpnt->next = SDpnt->device_queue; + SDpnt->device_queue = SCpnt; + SCpnt->state = SCSI_STATE_UNUSED; + SCpnt->owner = SCSI_OWNER_NOBODY; + } + if (j < SDpnt->queue_depth) { /* low on space (D.Gilbert 990424) */ + printk(KERN_WARNING "scsi_build_commandblocks: want=%d, space for=%d blocks\n", + SDpnt->queue_depth, j); + SDpnt->queue_depth = j; + SDpnt->has_cmdblocks = (0 != j); + } else { + SDpnt->has_cmdblocks = 1; + } + spin_unlock_irqrestore(&device_request_lock, flags); +} + +void __init scsi_host_no_insert(char *str, int n) +{ + Scsi_Host_Name *shn, *shn2; + int len; + + len = strlen(str); + if (len && (shn = (Scsi_Host_Name *) kmalloc(sizeof(Scsi_Host_Name), GFP_ATOMIC))) { + if ((shn->name = kmalloc(len+1, GFP_ATOMIC))) { + strncpy(shn->name, str, len); + shn->name[len] = 0; + shn->host_no = n; + shn->host_registered = 0; + shn->loaded_as_module = 1; /* numbers shouldn't be freed in any case */ + shn->next = NULL; + if (scsi_host_no_list) { + for (shn2 = scsi_host_no_list;shn2->next;shn2 = shn2->next) + ; + shn2->next = shn; + } + else + scsi_host_no_list = shn; + max_scsi_hosts = n+1; + } + else + kfree((char *) shn); + } +} + +#ifdef CONFIG_PROC_FS +static int scsi_proc_info(char *buffer, char **start, off_t offset, int length) +{ + Scsi_Device *scd; + struct Scsi_Host *HBA_ptr; + int size, len = 0; + off_t begin = 0; + off_t pos = 0; + + /* + * First, see if there are any attached devices or not. + */ + for (HBA_ptr = scsi_hostlist; HBA_ptr; HBA_ptr = HBA_ptr->next) { + if (HBA_ptr->host_queue != NULL) { + break; + } + } + size = sprintf(buffer + len, "Attached devices: %s\n", (HBA_ptr) ? "" : "none"); + len += size; + pos = begin + len; + for (HBA_ptr = scsi_hostlist; HBA_ptr; HBA_ptr = HBA_ptr->next) { +#if 0 + size += sprintf(buffer + len, "scsi%2d: %s\n", (int) HBA_ptr->host_no, + HBA_ptr->hostt->procname); + len += size; + pos = begin + len; +#endif + for (scd = HBA_ptr->host_queue; scd; scd = scd->next) { + proc_print_scsidevice(scd, buffer, &size, len); + len += size; + pos = begin + len; + + if (pos < offset) { + len = 0; + begin = pos; + } + if (pos > offset + length) + goto stop_output; + } + } + +stop_output: + *start = buffer + (offset - begin); /* Start of wanted data */ + len -= (offset - begin); /* Start slop */ + if (len > length) + len = length; /* Ending slop */ + return (len); +} + +static int proc_scsi_gen_write(struct file * file, const char * buf, + unsigned long length, void *data) +{ + struct Scsi_Device_Template *SDTpnt; + Scsi_Device *scd; + struct Scsi_Host *HBA_ptr; + char *p; + int host, channel, id, lun; + char * buffer; + int err; + + if (!buf || length>PAGE_SIZE) + return -EINVAL; + + if (!(buffer = (char *) __get_free_page(GFP_KERNEL))) + return -ENOMEM; + if(copy_from_user(buffer, buf, length)) + { + err =-EFAULT; + goto out; + } + + err = -EINVAL; + + if (length < PAGE_SIZE) + buffer[length] = '\0'; + else if (buffer[PAGE_SIZE-1]) + goto out; + + if (length < 11 || strncmp("scsi", buffer, 4)) + goto out; + + /* + * Usage: echo "scsi dump #N" > /proc/scsi/scsi + * to dump status of all scsi commands. The number is used to specify the level + * of detail in the dump. + */ + if (!strncmp("dump", buffer + 5, 4)) { + unsigned int level; + + p = buffer + 10; + + if (*p == '\0') + goto out; + + level = simple_strtoul(p, NULL, 0); + scsi_dump_status(level); + } + /* + * Usage: echo "scsi log token #N" > /proc/scsi/scsi + * where token is one of [error,scan,mlqueue,mlcomplete,llqueue, + * llcomplete,hlqueue,hlcomplete] + */ +#ifdef CONFIG_SCSI_LOGGING /* { */ + + if (!strncmp("log", buffer + 5, 3)) { + char *token; + unsigned int level; + + p = buffer + 9; + token = p; + while (*p != ' ' && *p != '\t' && *p != '\0') { + p++; + } + + if (*p == '\0') { + if (strncmp(token, "all", 3) == 0) { + /* + * Turn on absolutely everything. + */ + scsi_logging_level = ~0; + } else if (strncmp(token, "none", 4) == 0) { + /* + * Turn off absolutely everything. + */ + scsi_logging_level = 0; + } else { + goto out; + } + } else { + *p++ = '\0'; + + level = simple_strtoul(p, NULL, 0); + + /* + * Now figure out what to do with it. + */ + if (strcmp(token, "error") == 0) { + SCSI_SET_ERROR_RECOVERY_LOGGING(level); + } else if (strcmp(token, "timeout") == 0) { + SCSI_SET_TIMEOUT_LOGGING(level); + } else if (strcmp(token, "scan") == 0) { + SCSI_SET_SCAN_BUS_LOGGING(level); + } else if (strcmp(token, "mlqueue") == 0) { + SCSI_SET_MLQUEUE_LOGGING(level); + } else if (strcmp(token, "mlcomplete") == 0) { + SCSI_SET_MLCOMPLETE_LOGGING(level); + } else if (strcmp(token, "llqueue") == 0) { + SCSI_SET_LLQUEUE_LOGGING(level); + } else if (strcmp(token, "llcomplete") == 0) { + SCSI_SET_LLCOMPLETE_LOGGING(level); + } else if (strcmp(token, "hlqueue") == 0) { + SCSI_SET_HLQUEUE_LOGGING(level); + } else if (strcmp(token, "hlcomplete") == 0) { + SCSI_SET_HLCOMPLETE_LOGGING(level); + } else if (strcmp(token, "ioctl") == 0) { + SCSI_SET_IOCTL_LOGGING(level); + } else { + goto out; + } + } + + printk(KERN_INFO "scsi logging level set to 0x%8.8x\n", scsi_logging_level); + } +#endif /* CONFIG_SCSI_LOGGING */ /* } */ + + /* + * Usage: echo "scsi add-single-device 0 1 2 3" >/proc/scsi/scsi + * with "0 1 2 3" replaced by your "Host Channel Id Lun". + * Consider this feature BETA. + * CAUTION: This is not for hotplugging your peripherals. As + * SCSI was not designed for this you could damage your + * hardware ! + * However perhaps it is legal to switch on an + * already connected device. It is perhaps not + * guaranteed this device doesn't corrupt an ongoing data transfer. + */ + if (!strncmp("add-single-device", buffer + 5, 17)) { + p = buffer + 23; + + host = simple_strtoul(p, &p, 0); + channel = simple_strtoul(p + 1, &p, 0); + id = simple_strtoul(p + 1, &p, 0); + lun = simple_strtoul(p + 1, &p, 0); + + printk(KERN_INFO "scsi singledevice %d %d %d %d\n", host, channel, + id, lun); + + for (HBA_ptr = scsi_hostlist; HBA_ptr; HBA_ptr = HBA_ptr->next) { + if (HBA_ptr->host_no == host) { + break; + } + } + err = -ENXIO; + if (!HBA_ptr) + goto out; + + for (scd = HBA_ptr->host_queue; scd; scd = scd->next) { + if ((scd->channel == channel + && scd->id == id + && scd->lun == lun)) { + break; + } + } + + err = -ENOSYS; + if (scd) + goto out; /* We do not yet support unplugging */ + + scan_scsis(HBA_ptr, 1, channel, id, lun); + + /* FIXME (DB) This assumes that the queue_depth routines can be used + in this context as well, while they were all designed to be + called only once after the detect routine. (DB) */ + /* queue_depth routine moved to inside scan_scsis(,1,,,) so + it is called before build_commandblocks() */ + + err = length; + goto out; + } + /* + * Usage: echo "scsi remove-single-device 0 1 2 3" >/proc/scsi/scsi + * with "0 1 2 3" replaced by your "Host Channel Id Lun". + * + * Consider this feature pre-BETA. + * + * CAUTION: This is not for hotplugging your peripherals. As + * SCSI was not designed for this you could damage your + * hardware and thoroughly confuse the SCSI subsystem. + * + */ + else if (!strncmp("remove-single-device", buffer + 5, 20)) { + p = buffer + 26; + + host = simple_strtoul(p, &p, 0); + channel = simple_strtoul(p + 1, &p, 0); + id = simple_strtoul(p + 1, &p, 0); + lun = simple_strtoul(p + 1, &p, 0); + + + for (HBA_ptr = scsi_hostlist; HBA_ptr; HBA_ptr = HBA_ptr->next) { + if (HBA_ptr->host_no == host) { + break; + } + } + err = -ENODEV; + if (!HBA_ptr) + goto out; + + for (scd = HBA_ptr->host_queue; scd; scd = scd->next) { + if ((scd->channel == channel + && scd->id == id + && scd->lun == lun)) { + break; + } + } + + if (scd == NULL) + goto out; /* there is no such device attached */ + + err = -EBUSY; + if (scd->access_count) + goto out; + + SDTpnt = scsi_devicelist; + while (SDTpnt != NULL) { + if (SDTpnt->detach) + (*SDTpnt->detach) (scd); + SDTpnt = SDTpnt->next; + } + + if (scd->attached == 0) { + /* + * Nobody is using this device any more. + * Free all of the command structures. + */ + if (HBA_ptr->hostt->revoke) + HBA_ptr->hostt->revoke(scd); + devfs_unregister (scd->de); + scsi_release_commandblocks(scd); + + /* Now we can remove the device structure */ + if (scd->next != NULL) + scd->next->prev = scd->prev; + + if (scd->prev != NULL) + scd->prev->next = scd->next; + + if (HBA_ptr->host_queue == scd) { + HBA_ptr->host_queue = scd->next; + } + blk_cleanup_queue(&scd->request_queue); + kfree((char *) scd); + } else { + goto out; + } + err = 0; + } +out: + + free_page((unsigned long) buffer); + return err; +} +#endif + +/* + * This entry point should be called by a driver if it is trying + * to add a low level scsi driver to the system. + */ +static int scsi_register_host(Scsi_Host_Template * tpnt) +{ + int pcount; + struct Scsi_Host *shpnt; + Scsi_Device *SDpnt; + struct Scsi_Device_Template *sdtpnt; + const char *name; + unsigned long flags; + int out_of_space = 0; + + if (tpnt->next || !tpnt->detect) + return 1; /* Must be already loaded, or + * no detect routine available + */ + + /* If max_sectors isn't set, default to max */ + if (!tpnt->max_sectors) + tpnt->max_sectors = MAX_SECTORS; + + pcount = next_scsi_host; + + MOD_INC_USE_COUNT; + + /* The detect routine must carefully spinunlock/spinlock if + it enables interrupts, since all interrupt handlers do + spinlock as well. + All lame drivers are going to fail due to the following + spinlock. For the time beeing let's use it only for drivers + using the new scsi code. NOTE: the detect routine could + redefine the value tpnt->use_new_eh_code. (DB, 13 May 1998) */ + + if (tpnt->use_new_eh_code) { + spin_lock_irqsave(&io_request_lock, flags); + tpnt->present = tpnt->detect(tpnt); + spin_unlock_irqrestore(&io_request_lock, flags); + } else + tpnt->present = tpnt->detect(tpnt); + + if (tpnt->present) { + if (pcount == next_scsi_host) { + if (tpnt->present > 1) { + printk(KERN_ERR "scsi: Failure to register low-level scsi driver"); + scsi_unregister_host(tpnt); + return 1; + } + /* + * The low-level driver failed to register a driver. + * We can do this now. + */ + if(scsi_register(tpnt, 0)==NULL) + { + printk(KERN_ERR "scsi: register failed.\n"); + scsi_unregister_host(tpnt); + return 1; + } + } + tpnt->next = scsi_hosts; /* Add to the linked list */ + scsi_hosts = tpnt; + + /* Add the new driver to /proc/scsi */ +#ifdef CONFIG_PROC_FS + build_proc_dir_entries(tpnt); +#endif + + + /* + * Add the kernel threads for each host adapter that will + * handle error correction. + */ + for (shpnt = scsi_hostlist; shpnt; shpnt = shpnt->next) { + if (shpnt->hostt == tpnt && shpnt->hostt->use_new_eh_code) { + DECLARE_MUTEX_LOCKED(sem); + + shpnt->eh_notify = &sem; + kernel_thread((int (*)(void *)) scsi_error_handler, + (void *) shpnt, 0); + + /* + * Now wait for the kernel error thread to initialize itself + * as it might be needed when we scan the bus. + */ + down(&sem); + shpnt->eh_notify = NULL; + } + } + + for (shpnt = scsi_hostlist; shpnt; shpnt = shpnt->next) { + if (shpnt->hostt == tpnt) { + if (tpnt->info) { + name = tpnt->info(shpnt); + } else { + name = tpnt->name; + } + printk(KERN_INFO "scsi%d : %s\n", /* And print a little message */ + shpnt->host_no, name); + } + } + + /* The next step is to call scan_scsis here. This generates the + * Scsi_Devices entries + */ + for (shpnt = scsi_hostlist; shpnt; shpnt = shpnt->next) { + if (shpnt->hostt == tpnt) { + scan_scsis(shpnt, 0, 0, 0, 0); + if (shpnt->select_queue_depths != NULL) { + (shpnt->select_queue_depths) (shpnt, shpnt->host_queue); + } + } + } + + for (sdtpnt = scsi_devicelist; sdtpnt; sdtpnt = sdtpnt->next) { + if (sdtpnt->init && sdtpnt->dev_noticed) + (*sdtpnt->init) (); + } + + /* + * Next we create the Scsi_Cmnd structures for this host + */ + for (shpnt = scsi_hostlist; shpnt; shpnt = shpnt->next) { + for (SDpnt = shpnt->host_queue; SDpnt; SDpnt = SDpnt->next) + if (SDpnt->host->hostt == tpnt) { + for (sdtpnt = scsi_devicelist; sdtpnt; sdtpnt = sdtpnt->next) + if (sdtpnt->attach) + (*sdtpnt->attach) (SDpnt); + if (SDpnt->attached) { + scsi_build_commandblocks(SDpnt); + if (0 == SDpnt->has_cmdblocks) + out_of_space = 1; + } + } + } + + /* + * Now that we have all of the devices, resize the DMA pool, + * as required. */ + if (!out_of_space) + scsi_resize_dma_pool(); + + + /* This does any final handling that is required. */ + for (sdtpnt = scsi_devicelist; sdtpnt; sdtpnt = sdtpnt->next) { + if (sdtpnt->finish && sdtpnt->nr_dev) { + (*sdtpnt->finish) (); + } + } + } +#if defined(USE_STATIC_SCSI_MEMORY) + printk("SCSI memory: total %ldKb, used %ldKb, free %ldKb.\n", + (scsi_memory_upper_value - scsi_memory_lower_value) / 1024, + (scsi_init_memory_start - scsi_memory_lower_value) / 1024, + (scsi_memory_upper_value - scsi_init_memory_start) / 1024); +#endif + + if (out_of_space) { + scsi_unregister_host(tpnt); /* easiest way to clean up?? */ + return 1; + } else + return 0; +} + +/* + * Similarly, this entry point should be called by a loadable module if it + * is trying to remove a low level scsi driver from the system. + */ +static int scsi_unregister_host(Scsi_Host_Template * tpnt) +{ + int online_status; + int pcount0, pcount; + Scsi_Cmnd *SCpnt; + Scsi_Device *SDpnt; + Scsi_Device *SDpnt1; + struct Scsi_Device_Template *sdtpnt; + struct Scsi_Host *sh1; + struct Scsi_Host *shpnt; + char name[10]; /* host_no>=10^9? I don't think so. */ + + /* get the big kernel lock, so we don't race with open() */ + lock_kernel(); + + /* + * First verify that this host adapter is completely free with no pending + * commands + */ + for (shpnt = scsi_hostlist; shpnt; shpnt = shpnt->next) { + for (SDpnt = shpnt->host_queue; SDpnt; + SDpnt = SDpnt->next) { + if (SDpnt->host->hostt == tpnt + && SDpnt->host->hostt->module + && GET_USE_COUNT(SDpnt->host->hostt->module)) + goto err_out; + /* + * FIXME(eric) - We need to find a way to notify the + * low level driver that we are shutting down - via the + * special device entry that still needs to get added. + * + * Is detach interface below good enough for this? + */ + } + } + + /* + * FIXME(eric) put a spinlock on this. We force all of the devices offline + * to help prevent race conditions where other hosts/processors could try and + * get in and queue a command. + */ + for (shpnt = scsi_hostlist; shpnt; shpnt = shpnt->next) { + for (SDpnt = shpnt->host_queue; SDpnt; + SDpnt = SDpnt->next) { + if (SDpnt->host->hostt == tpnt) + SDpnt->online = FALSE; + + } + } + + for (shpnt = scsi_hostlist; shpnt; shpnt = shpnt->next) { + if (shpnt->hostt != tpnt) { + continue; + } + for (SDpnt = shpnt->host_queue; SDpnt; + SDpnt = SDpnt->next) { + /* + * Loop over all of the commands associated with the device. If any of + * them are busy, then set the state back to inactive and bail. + */ + for (SCpnt = SDpnt->device_queue; SCpnt; + SCpnt = SCpnt->next) { + online_status = SDpnt->online; + SDpnt->online = FALSE; + if (SCpnt->request.rq_status != RQ_INACTIVE) { + printk(KERN_ERR "SCSI device not inactive - rq_status=%d, target=%d, pid=%ld, state=%d, owner=%d.\n", + SCpnt->request.rq_status, SCpnt->target, SCpnt->pid, + SCpnt->state, SCpnt->owner); + for (SDpnt1 = shpnt->host_queue; SDpnt1; + SDpnt1 = SDpnt1->next) { + for (SCpnt = SDpnt1->device_queue; SCpnt; + SCpnt = SCpnt->next) + if (SCpnt->request.rq_status == RQ_SCSI_DISCONNECTING) + SCpnt->request.rq_status = RQ_INACTIVE; + } + SDpnt->online = online_status; + printk(KERN_ERR "Device busy???\n"); + goto err_out; + } + /* + * No, this device is really free. Mark it as such, and + * continue on. + */ + SCpnt->state = SCSI_STATE_DISCONNECTING; + SCpnt->request.rq_status = RQ_SCSI_DISCONNECTING; /* Mark as busy */ + } + } + } + /* Next we detach the high level drivers from the Scsi_Device structures */ + + for (shpnt = scsi_hostlist; shpnt; shpnt = shpnt->next) { + if (shpnt->hostt != tpnt) { + continue; + } + for (SDpnt = shpnt->host_queue; SDpnt; + SDpnt = SDpnt->next) { + for (sdtpnt = scsi_devicelist; sdtpnt; sdtpnt = sdtpnt->next) + if (sdtpnt->detach) + (*sdtpnt->detach) (SDpnt); + + /* If something still attached, punt */ + if (SDpnt->attached) { + printk(KERN_ERR "Attached usage count = %d\n", SDpnt->attached); + goto err_out; + } + devfs_unregister (SDpnt->de); + } + } + + /* + * Next, kill the kernel error recovery thread for this host. + */ + for (shpnt = scsi_hostlist; shpnt; shpnt = shpnt->next) { + if (shpnt->hostt == tpnt + && shpnt->hostt->use_new_eh_code + && shpnt->ehandler != NULL) { + DECLARE_MUTEX_LOCKED(sem); + + shpnt->eh_notify = &sem; + send_sig(SIGHUP, shpnt->ehandler, 1); + down(&sem); + shpnt->eh_notify = NULL; + } + } + + /* Next we free up the Scsi_Cmnd structures for this host */ + + for (shpnt = scsi_hostlist; shpnt; shpnt = shpnt->next) { + if (shpnt->hostt != tpnt) { + continue; + } + for (SDpnt = shpnt->host_queue; SDpnt; + SDpnt = shpnt->host_queue) { + scsi_release_commandblocks(SDpnt); + + blk_cleanup_queue(&SDpnt->request_queue); + /* Next free up the Scsi_Device structures for this host */ + shpnt->host_queue = SDpnt->next; + kfree((char *) SDpnt); + + } + } + + /* Next we go through and remove the instances of the individual hosts + * that were detected */ + + pcount0 = next_scsi_host; + for (shpnt = scsi_hostlist; shpnt; shpnt = sh1) { + sh1 = shpnt->next; + if (shpnt->hostt != tpnt) + continue; + pcount = next_scsi_host; + /* Remove the /proc/scsi directory entry */ + sprintf(name,"%d",shpnt->host_no); + remove_proc_entry(name, tpnt->proc_dir); + if (tpnt->release) + (*tpnt->release) (shpnt); + else { + /* This is the default case for the release function. + * It should do the right thing for most correctly + * written host adapters. + */ + if (shpnt->irq) + free_irq(shpnt->irq, NULL); + if (shpnt->dma_channel != 0xff) + free_dma(shpnt->dma_channel); + if (shpnt->io_port && shpnt->n_io_port) + release_region(shpnt->io_port, shpnt->n_io_port); + } + if (pcount == next_scsi_host) + scsi_unregister(shpnt); + tpnt->present--; + } + + /* + * If there are absolutely no more hosts left, it is safe + * to completely nuke the DMA pool. The resize operation will + * do the right thing and free everything. + */ + if (!scsi_hosts) + scsi_resize_dma_pool(); + + if (pcount0 != next_scsi_host) + printk(KERN_INFO "scsi : %d host%s left.\n", next_scsi_host, + (next_scsi_host == 1) ? "" : "s"); + +#if defined(USE_STATIC_SCSI_MEMORY) + printk("SCSI memory: total %ldKb, used %ldKb, free %ldKb.\n", + (scsi_memory_upper_value - scsi_memory_lower_value) / 1024, + (scsi_init_memory_start - scsi_memory_lower_value) / 1024, + (scsi_memory_upper_value - scsi_init_memory_start) / 1024); +#endif + + /* + * Remove it from the linked list and /proc if all + * hosts were successfully removed (ie preset == 0) + */ + if (!tpnt->present) { + Scsi_Host_Template **SHTp = &scsi_hosts; + Scsi_Host_Template *SHT; + + while ((SHT = *SHTp) != NULL) { + if (SHT == tpnt) { + *SHTp = SHT->next; + remove_proc_entry(tpnt->proc_name, proc_scsi); + break; + } + SHTp = &SHT->next; + } + } + MOD_DEC_USE_COUNT; + + unlock_kernel(); + return 0; + +err_out: + unlock_kernel(); + return -1; +} + +static int scsi_unregister_device(struct Scsi_Device_Template *tpnt); + +/* + * This entry point should be called by a loadable module if it is trying + * add a high level scsi driver to the system. + */ +static int scsi_register_device_module(struct Scsi_Device_Template *tpnt) +{ + Scsi_Device *SDpnt; + struct Scsi_Host *shpnt; + int out_of_space = 0; + + if (tpnt->next) + return 1; + + scsi_register_device(tpnt); + /* + * First scan the devices that we know about, and see if we notice them. + */ + + for (shpnt = scsi_hostlist; shpnt; shpnt = shpnt->next) { + for (SDpnt = shpnt->host_queue; SDpnt; + SDpnt = SDpnt->next) { + if (tpnt->detect) + SDpnt->detected = (*tpnt->detect) (SDpnt); + } + } + + /* + * If any of the devices would match this driver, then perform the + * init function. + */ + if (tpnt->init && tpnt->dev_noticed) { + if ((*tpnt->init) ()) { + for (shpnt = scsi_hostlist; shpnt; + shpnt = shpnt->next) { + for (SDpnt = shpnt->host_queue; SDpnt; + SDpnt = SDpnt->next) { + SDpnt->detected = 0; + } + } + scsi_deregister_device(tpnt); + return 1; + } + } + + /* + * Now actually connect the devices to the new driver. + */ + for (shpnt = scsi_hostlist; shpnt; shpnt = shpnt->next) { + for (SDpnt = shpnt->host_queue; SDpnt; + SDpnt = SDpnt->next) { + SDpnt->attached += SDpnt->detected; + SDpnt->detected = 0; + if (tpnt->attach) + (*tpnt->attach) (SDpnt); + /* + * If this driver attached to the device, and don't have any + * command blocks for this device, allocate some. + */ + if (SDpnt->attached && SDpnt->has_cmdblocks == 0) { + SDpnt->online = TRUE; + scsi_build_commandblocks(SDpnt); + if (0 == SDpnt->has_cmdblocks) + out_of_space = 1; + } + } + } + + /* + * This does any final handling that is required. + */ + if (tpnt->finish && tpnt->nr_dev) + (*tpnt->finish) (); + if (!out_of_space) + scsi_resize_dma_pool(); + MOD_INC_USE_COUNT; + + if (out_of_space) { + scsi_unregister_device(tpnt); /* easiest way to clean up?? */ + return 1; + } else + return 0; +} + +static int scsi_unregister_device(struct Scsi_Device_Template *tpnt) +{ + Scsi_Device *SDpnt; + struct Scsi_Host *shpnt; + + lock_kernel(); + /* + * If we are busy, this is not going to fly. + */ + if (GET_USE_COUNT(tpnt->module) != 0) + goto error_out; + + /* + * Next, detach the devices from the driver. + */ + + for (shpnt = scsi_hostlist; shpnt; shpnt = shpnt->next) { + for (SDpnt = shpnt->host_queue; SDpnt; + SDpnt = SDpnt->next) { + if (tpnt->detach) + (*tpnt->detach) (SDpnt); + if (SDpnt->attached == 0) { + SDpnt->online = FALSE; + + /* + * Nobody is using this device any more. Free all of the + * command structures. + */ + scsi_release_commandblocks(SDpnt); + } + } + } + /* + * Extract the template from the linked list. + */ + scsi_deregister_device(tpnt); + + MOD_DEC_USE_COUNT; + unlock_kernel(); + /* + * Final cleanup for the driver is done in the driver sources in the + * cleanup function. + */ + return 0; +error_out: + unlock_kernel(); + return -1; +} + + +/* This function should be called by drivers which needs to register + * with the midlevel scsi system. As of 2.4.0-test9pre3 this is our + * main device/hosts register function /mathiasen + */ +int scsi_register_module(int module_type, void *ptr) +{ + switch (module_type) { + case MODULE_SCSI_HA: + return scsi_register_host((Scsi_Host_Template *) ptr); + + /* Load upper level device handler of some kind */ + case MODULE_SCSI_DEV: +#ifdef CONFIG_KMOD + if (scsi_hosts == NULL) + request_module("scsi_hostadapter"); +#endif + return scsi_register_device_module((struct Scsi_Device_Template *) ptr); + /* The rest of these are not yet implemented */ + + /* Load constants.o */ + case MODULE_SCSI_CONST: + + /* Load specialized ioctl handler for some device. Intended for + * cdroms that have non-SCSI2 audio command sets. */ + case MODULE_SCSI_IOCTL: + + default: + return 1; + } +} + +/* Reverse the actions taken above + */ +int scsi_unregister_module(int module_type, void *ptr) +{ + int retval = 0; + + switch (module_type) { + case MODULE_SCSI_HA: + retval = scsi_unregister_host((Scsi_Host_Template *) ptr); + break; + case MODULE_SCSI_DEV: + retval = scsi_unregister_device((struct Scsi_Device_Template *)ptr); + break; + /* The rest of these are not yet implemented. */ + case MODULE_SCSI_CONST: + case MODULE_SCSI_IOCTL: + break; + default:; + } + return retval; +} + +#ifdef CONFIG_PROC_FS +/* + * Function: scsi_dump_status + * + * Purpose: Brain dump of scsi system, used for problem solving. + * + * Arguments: level - used to indicate level of detail. + * + * Notes: The level isn't used at all yet, but we need to find some way + * of sensibly logging varying degrees of information. A quick one-line + * display of each command, plus the status would be most useful. + * + * This does depend upon CONFIG_SCSI_LOGGING - I do want some way of turning + * it all off if the user wants a lean and mean kernel. It would probably + * also be useful to allow the user to specify one single host to be dumped. + * A second argument to the function would be useful for that purpose. + * + * FIXME - some formatting of the output into tables would be very handy. + */ +static void scsi_dump_status(int level) +{ +#ifdef CONFIG_SCSI_LOGGING /* { */ + int i; + struct Scsi_Host *shpnt; + Scsi_Cmnd *SCpnt; + Scsi_Device *SDpnt; + printk(KERN_INFO "Dump of scsi host parameters:\n"); + i = 0; + for (shpnt = scsi_hostlist; shpnt; shpnt = shpnt->next) { + printk(KERN_INFO " %d %d %d : %d %d\n", + shpnt->host_failed, + shpnt->host_busy, + atomic_read(&shpnt->host_active), + shpnt->host_blocked, + shpnt->host_self_blocked); + } + + printk(KERN_INFO "\n\n"); + printk(KERN_INFO "Dump of scsi command parameters:\n"); + for (shpnt = scsi_hostlist; shpnt; shpnt = shpnt->next) { + printk(KERN_INFO "h:c:t:l (dev sect nsect cnumsec sg) (ret all flg) (to/cmd to ito) cmd snse result\n"); + for (SDpnt = shpnt->host_queue; SDpnt; SDpnt = SDpnt->next) { + for (SCpnt = SDpnt->device_queue; SCpnt; SCpnt = SCpnt->next) { + /* (0) h:c:t:l (dev sect nsect cnumsec sg) (ret all flg) (to/cmd to ito) cmd snse result %d %x */ + printk(KERN_INFO "(%3d) %2d:%1d:%2d:%2d (%6s %4ld %4ld %4ld %4x %1d) (%1d %1d 0x%2x) (%4d %4d %4d) 0x%2.2x 0x%2.2x 0x%8.8x\n", + i++, + + SCpnt->host->host_no, + SCpnt->channel, + SCpnt->target, + SCpnt->lun, + + kdevname(SCpnt->request.rq_dev), + SCpnt->request.sector, + SCpnt->request.nr_sectors, + SCpnt->request.current_nr_sectors, + SCpnt->request.rq_status, + SCpnt->use_sg, + + SCpnt->retries, + SCpnt->allowed, + SCpnt->flags, + + SCpnt->timeout_per_command, + SCpnt->timeout, + SCpnt->internal_timeout, + + SCpnt->cmnd[0], + SCpnt->sense_buffer[2], + SCpnt->result); + } + } + } + + for (shpnt = scsi_hostlist; shpnt; shpnt = shpnt->next) { + for (SDpnt = shpnt->host_queue; SDpnt; SDpnt = SDpnt->next) { + /* Now dump the request lists for each block device */ + printk(KERN_INFO "Dump of pending block device requests\n"); + for (i = 0; i < MAX_BLKDEV; i++) { + struct list_head * queue_head; + + queue_head = &blk_dev[i].request_queue.queue_head; + if (!list_empty(queue_head)) { + struct request *req; + struct list_head * entry; + + printk(KERN_INFO "%d: ", i); + entry = queue_head->next; + do { + req = blkdev_entry_to_request(entry); + printk("(%s %d %ld %ld %ld) ", + kdevname(req->rq_dev), + req->cmd, + req->sector, + req->nr_sectors, + req->current_nr_sectors); + } while ((entry = entry->next) != queue_head); + printk("\n"); + } + } + } + } +#endif /* CONFIG_SCSI_LOGGING */ /* } */ +} +#endif /* CONFIG_PROC_FS */ + +static int __init scsi_host_no_init (char *str) +{ + static int next_no = 0; + char *temp; + + while (str) { + temp = str; + while (*temp && (*temp != ':') && (*temp != ',')) + temp++; + if (!*temp) + temp = NULL; + else + *temp++ = 0; + scsi_host_no_insert(str, next_no); + str = temp; + next_no++; + } + return 1; +} + +static char *scsihosts; + +MODULE_PARM(scsihosts, "s"); +MODULE_DESCRIPTION("SCSI core"); +MODULE_LICENSE("GPL"); + +#ifndef MODULE +int __init scsi_setup(char *str) +{ + scsihosts = str; + return 1; +} + +__setup("scsihosts=", scsi_setup); +#endif + +static int __init init_scsi(void) +{ + struct proc_dir_entry *generic; + + printk(KERN_INFO "SCSI subsystem driver " REVISION "\n"); + + if( scsi_init_minimal_dma_pool() != 0 ) + { + return 1; + } + + /* + * This makes /proc/scsi and /proc/scsi/scsi visible. + */ +#ifdef CONFIG_PROC_FS + proc_scsi = proc_mkdir("scsi", 0); + if (!proc_scsi) { + printk (KERN_ERR "cannot init /proc/scsi\n"); + return -ENOMEM; + } + generic = create_proc_info_entry ("scsi/scsi", 0, 0, scsi_proc_info); + if (!generic) { + printk (KERN_ERR "cannot init /proc/scsi/scsi\n"); + remove_proc_entry("scsi", 0); + return -ENOMEM; + } + generic->write_proc = proc_scsi_gen_write; +#endif + + scsi_devfs_handle = devfs_mk_dir (NULL, "scsi", NULL); + if (scsihosts) + printk(KERN_INFO "scsi: host order: %s\n", scsihosts); + scsi_host_no_init (scsihosts); + /* + * This is where the processing takes place for most everything + * when commands are completed. + */ + init_bh(SCSI_BH, scsi_bottom_half_handler); + + return 0; +} + +static void __exit exit_scsi(void) +{ + Scsi_Host_Name *shn, *shn2 = NULL; + + remove_bh(SCSI_BH); + + devfs_unregister (scsi_devfs_handle); + for (shn = scsi_host_no_list;shn;shn = shn->next) { + if (shn->name) + kfree(shn->name); + if (shn2) + kfree (shn2); + shn2 = shn; + } + if (shn2) + kfree (shn2); + +#ifdef CONFIG_PROC_FS + /* No, we're not here anymore. Don't show the /proc/scsi files. */ + remove_proc_entry ("scsi/scsi", 0); + remove_proc_entry ("scsi", 0); +#endif + + /* + * Free up the DMA pool. + */ + scsi_resize_dma_pool(); + +} + +module_init(init_scsi); +module_exit(exit_scsi); + +/* + * Function: scsi_get_host_dev() + * + * Purpose: Create a Scsi_Device that points to the host adapter itself. + * + * Arguments: SHpnt - Host that needs a Scsi_Device + * + * Lock status: None assumed. + * + * Returns: The Scsi_Device or NULL + * + * Notes: + */ +Scsi_Device * scsi_get_host_dev(struct Scsi_Host * SHpnt) +{ + Scsi_Device * SDpnt; + + /* + * Attach a single Scsi_Device to the Scsi_Host - this should + * be made to look like a "pseudo-device" that points to the + * HA itself. For the moment, we include it at the head of + * the host_queue itself - I don't think we want to show this + * to the HA in select_queue_depths(), as this would probably confuse + * matters. + * Note - this device is not accessible from any high-level + * drivers (including generics), which is probably not + * optimal. We can add hooks later to attach + */ + SDpnt = (Scsi_Device *) kmalloc(sizeof(Scsi_Device), + GFP_ATOMIC); + if(SDpnt == NULL) + return NULL; + + memset(SDpnt, 0, sizeof(Scsi_Device)); + + SDpnt->host = SHpnt; + SDpnt->id = SHpnt->this_id; + SDpnt->type = -1; + SDpnt->queue_depth = 1; + + scsi_build_commandblocks(SDpnt); + + scsi_initialize_queue(SDpnt, SHpnt); + + SDpnt->online = TRUE; + + /* + * Initialize the object that we will use to wait for command blocks. + */ + init_waitqueue_head(&SDpnt->scpnt_wait); + return SDpnt; +} + +/* + * Function: scsi_free_host_dev() + * + * Purpose: Create a Scsi_Device that points to the host adapter itself. + * + * Arguments: SHpnt - Host that needs a Scsi_Device + * + * Lock status: None assumed. + * + * Returns: Nothing + * + * Notes: + */ +void scsi_free_host_dev(Scsi_Device * SDpnt) +{ + if( (unsigned char) SDpnt->id != (unsigned char) SDpnt->host->this_id ) + { + panic("Attempt to delete wrong device\n"); + } + + blk_cleanup_queue(&SDpnt->request_queue); + + /* + * We only have a single SCpnt attached to this device. Free + * it now. + */ + scsi_release_commandblocks(SDpnt); + kfree(SDpnt); +} + +/* + * Function: scsi_reset_provider_done_command + * + * Purpose: Dummy done routine. + * + * Notes: Some low level drivers will call scsi_done and end up here, + * others won't bother. + * We don't want the bogus command used for the bus/device + * reset to find its way into the mid-layer so we intercept + * it here. + */ +static void +scsi_reset_provider_done_command(Scsi_Cmnd *SCpnt) +{ +} + +/* + * Function: scsi_reset_provider + * + * Purpose: Send requested reset to a bus or device at any phase. + * + * Arguments: device - device to send reset to + * flag - reset type (see scsi.h) + * + * Returns: SUCCESS/FAILURE. + * + * Notes: This is used by the SCSI Generic driver to provide + * Bus/Device reset capability. + */ +int +scsi_reset_provider(Scsi_Device *dev, int flag) +{ + Scsi_Cmnd SC, *SCpnt = &SC; + int rtn; + + memset(&SCpnt->eh_timeout, 0, sizeof(SCpnt->eh_timeout)); + SCpnt->host = dev->host; + SCpnt->device = dev; + SCpnt->target = dev->id; + SCpnt->lun = dev->lun; + SCpnt->channel = dev->channel; + SCpnt->request.rq_status = RQ_SCSI_BUSY; + SCpnt->request.waiting = NULL; + SCpnt->use_sg = 0; + SCpnt->old_use_sg = 0; + SCpnt->old_cmd_len = 0; + SCpnt->underflow = 0; + SCpnt->transfersize = 0; + SCpnt->resid = 0; + SCpnt->serial_number = 0; + SCpnt->serial_number_at_timeout = 0; + SCpnt->host_scribble = NULL; + SCpnt->next = NULL; + SCpnt->state = SCSI_STATE_INITIALIZING; + SCpnt->owner = SCSI_OWNER_MIDLEVEL; + + memset(&SCpnt->cmnd, '\0', sizeof(SCpnt->cmnd)); + + SCpnt->scsi_done = scsi_reset_provider_done_command; + SCpnt->done = NULL; + SCpnt->reset_chain = NULL; + + SCpnt->buffer = NULL; + SCpnt->bufflen = 0; + SCpnt->request_buffer = NULL; + SCpnt->request_bufflen = 0; + + SCpnt->internal_timeout = NORMAL_TIMEOUT; + SCpnt->abort_reason = DID_ABORT; + + SCpnt->cmd_len = 0; + + SCpnt->sc_data_direction = SCSI_DATA_UNKNOWN; + SCpnt->sc_request = NULL; + SCpnt->sc_magic = SCSI_CMND_MAGIC; + + /* + * Sometimes the command can get back into the timer chain, + * so use the pid as an identifier. + */ + SCpnt->pid = 0; + + if (dev->host->hostt->use_new_eh_code) { + rtn = scsi_new_reset(SCpnt, flag); + } else { + unsigned long flags; + + spin_lock_irqsave(&io_request_lock, flags); + rtn = scsi_old_reset(SCpnt, flag); + spin_unlock_irqrestore(&io_request_lock, flags); + } + + scsi_delete_timer(SCpnt); + return rtn; +} + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-indent-level: 4 + * c-brace-imaginary-offset: 0 + * c-brace-offset: -4 + * c-argdecl-indent: 4 + * c-label-offset: -4 + * c-continued-statement-offset: 4 + * c-continued-brace-offset: 0 + * indent-tabs-mode: nil + * tab-width: 8 + * End: + */ diff --git a/xen-2.4.16/drivers/scsi/scsi.h b/xen-2.4.16/drivers/scsi/scsi.h new file mode 100644 index 0000000000..f8199d5274 --- /dev/null +++ b/xen-2.4.16/drivers/scsi/scsi.h @@ -0,0 +1,881 @@ +/* + * scsi.h Copyright (C) 1992 Drew Eckhardt + * Copyright (C) 1993, 1994, 1995, 1998, 1999 Eric Youngdale + * generic SCSI package header file by + * Initial versions: Drew Eckhardt + * Subsequent revisions: Eric Youngdale + * + * + * + * Modified by Eric Youngdale eric@andante.org to + * add scatter-gather, multiple outstanding request, and other + * enhancements. + */ + +#ifndef _SCSI_H +#define _SCSI_H + +#include /* for CONFIG_SCSI_LOGGING */ +/*#include */ +/*#include */ + +/* + * Some of the public constants are being moved to this file. + * We include it here so that what came from where is transparent. + */ +#include + +/*#include */ + +#include +#include +#include + +/* + * These are the values that the SCpnt->sc_data_direction and + * SRpnt->sr_data_direction can take. These need to be set + * The SCSI_DATA_UNKNOWN value is essentially the default. + * In the event that the command creator didn't bother to + * set a value, you will see SCSI_DATA_UNKNOWN. + */ +#define SCSI_DATA_UNKNOWN 0 +#define SCSI_DATA_WRITE 1 +#define SCSI_DATA_READ 2 +#define SCSI_DATA_NONE 3 + +#ifdef CONFIG_PCI +#include +#if ((SCSI_DATA_UNKNOWN == PCI_DMA_BIDIRECTIONAL) && (SCSI_DATA_WRITE == PCI_DMA_TODEVICE) && (SCSI_DATA_READ == PCI_DMA_FROMDEVICE) && (SCSI_DATA_NONE == PCI_DMA_NONE)) +#define scsi_to_pci_dma_dir(scsi_dir) ((int)(scsi_dir)) +#else +extern __inline__ int scsi_to_pci_dma_dir(unsigned char scsi_dir) +{ + if (scsi_dir == SCSI_DATA_UNKNOWN) + return PCI_DMA_BIDIRECTIONAL; + if (scsi_dir == SCSI_DATA_WRITE) + return PCI_DMA_TODEVICE; + if (scsi_dir == SCSI_DATA_READ) + return PCI_DMA_FROMDEVICE; + return PCI_DMA_NONE; +} +#endif +#endif + +#if defined(CONFIG_SBUS) && !defined(CONFIG_SUN3) && !defined(CONFIG_SUN3X) +#include +#if ((SCSI_DATA_UNKNOWN == SBUS_DMA_BIDIRECTIONAL) && (SCSI_DATA_WRITE == SBUS_DMA_TODEVICE) && (SCSI_DATA_READ == SBUS_DMA_FROMDEVICE) && (SCSI_DATA_NONE == SBUS_DMA_NONE)) +#define scsi_to_sbus_dma_dir(scsi_dir) ((int)(scsi_dir)) +#else +extern __inline__ int scsi_to_sbus_dma_dir(unsigned char scsi_dir) +{ + if (scsi_dir == SCSI_DATA_UNKNOWN) + return SBUS_DMA_BIDIRECTIONAL; + if (scsi_dir == SCSI_DATA_WRITE) + return SBUS_DMA_TODEVICE; + if (scsi_dir == SCSI_DATA_READ) + return SBUS_DMA_FROMDEVICE; + return SBUS_DMA_NONE; +} +#endif +#endif + +/* + * Some defs, in case these are not defined elsewhere. + */ +#ifndef TRUE +#define TRUE 1 +#endif +#ifndef FALSE +#define FALSE 0 +#endif + +#define MAX_SCSI_DEVICE_CODE 14 +extern const char *const scsi_device_types[MAX_SCSI_DEVICE_CODE]; + +#ifdef DEBUG +#define SCSI_TIMEOUT (5*HZ) +#else +#define SCSI_TIMEOUT (2*HZ) +#endif + +/* + * Used for debugging the new queueing code. We want to make sure + * that the lock state is consistent with design. Only do this in + * the user space simulator. + */ +#define ASSERT_LOCK(_LOCK, _COUNT) + +#if defined(CONFIG_SMP) && defined(CONFIG_USER_DEBUG) +#undef ASSERT_LOCK +#define ASSERT_LOCK(_LOCK,_COUNT) \ + { if( (_LOCK)->lock != _COUNT ) \ + panic("Lock count inconsistent %s %d\n", __FILE__, __LINE__); \ + } +#endif + +/* + * Use these to separate status msg and our bytes + * + * These are set by: + * + * status byte = set from target device + * msg_byte = return status from host adapter itself. + * host_byte = set by low-level driver to indicate status. + * driver_byte = set by mid-level. + */ +#define status_byte(result) (((result) >> 1) & 0x1f) +#define msg_byte(result) (((result) >> 8) & 0xff) +#define host_byte(result) (((result) >> 16) & 0xff) +#define driver_byte(result) (((result) >> 24) & 0xff) +#define suggestion(result) (driver_byte(result) & SUGGEST_MASK) + +#define sense_class(sense) (((sense) >> 4) & 0x7) +#define sense_error(sense) ((sense) & 0xf) +#define sense_valid(sense) ((sense) & 0x80); + +#define NEEDS_RETRY 0x2001 +#define SUCCESS 0x2002 +#define FAILED 0x2003 +#define QUEUED 0x2004 +#define SOFT_ERROR 0x2005 +#define ADD_TO_MLQUEUE 0x2006 + +/* + * These are the values that scsi_cmd->state can take. + */ +#define SCSI_STATE_TIMEOUT 0x1000 +#define SCSI_STATE_FINISHED 0x1001 +#define SCSI_STATE_FAILED 0x1002 +#define SCSI_STATE_QUEUED 0x1003 +#define SCSI_STATE_UNUSED 0x1006 +#define SCSI_STATE_DISCONNECTING 0x1008 +#define SCSI_STATE_INITIALIZING 0x1009 +#define SCSI_STATE_BHQUEUE 0x100a +#define SCSI_STATE_MLQUEUE 0x100b + +/* + * These are the values that the owner field can take. + * They are used as an indication of who the command belongs to. + */ +#define SCSI_OWNER_HIGHLEVEL 0x100 +#define SCSI_OWNER_MIDLEVEL 0x101 +#define SCSI_OWNER_LOWLEVEL 0x102 +#define SCSI_OWNER_ERROR_HANDLER 0x103 +#define SCSI_OWNER_BH_HANDLER 0x104 +#define SCSI_OWNER_NOBODY 0x105 + +#define COMMAND_SIZE(opcode) scsi_command_size[((opcode) >> 5) & 7] + +#define IDENTIFY_BASE 0x80 +#define IDENTIFY(can_disconnect, lun) (IDENTIFY_BASE |\ + ((can_disconnect) ? 0x40 : 0) |\ + ((lun) & 0x07)) + + +/* + * This defines the scsi logging feature. It is a means by which the + * user can select how much information they get about various goings on, + * and it can be really useful for fault tracing. The logging word is divided + * into 8 nibbles, each of which describes a loglevel. The division of things + * is somewhat arbitrary, and the division of the word could be changed if it + * were really needed for any reason. The numbers below are the only place where these + * are specified. For a first go-around, 3 bits is more than enough, since this + * gives 8 levels of logging (really 7, since 0 is always off). Cutting to 2 bits + * might be wise at some point. + */ + +#define SCSI_LOG_ERROR_SHIFT 0 +#define SCSI_LOG_TIMEOUT_SHIFT 3 +#define SCSI_LOG_SCAN_SHIFT 6 +#define SCSI_LOG_MLQUEUE_SHIFT 9 +#define SCSI_LOG_MLCOMPLETE_SHIFT 12 +#define SCSI_LOG_LLQUEUE_SHIFT 15 +#define SCSI_LOG_LLCOMPLETE_SHIFT 18 +#define SCSI_LOG_HLQUEUE_SHIFT 21 +#define SCSI_LOG_HLCOMPLETE_SHIFT 24 +#define SCSI_LOG_IOCTL_SHIFT 27 + +#define SCSI_LOG_ERROR_BITS 3 +#define SCSI_LOG_TIMEOUT_BITS 3 +#define SCSI_LOG_SCAN_BITS 3 +#define SCSI_LOG_MLQUEUE_BITS 3 +#define SCSI_LOG_MLCOMPLETE_BITS 3 +#define SCSI_LOG_LLQUEUE_BITS 3 +#define SCSI_LOG_LLCOMPLETE_BITS 3 +#define SCSI_LOG_HLQUEUE_BITS 3 +#define SCSI_LOG_HLCOMPLETE_BITS 3 +#define SCSI_LOG_IOCTL_BITS 3 + +#if CONFIG_SCSI_LOGGING + +#define SCSI_CHECK_LOGGING(SHIFT, BITS, LEVEL, CMD) \ +{ \ + unsigned int mask; \ + \ + mask = (1 << (BITS)) - 1; \ + if( ((scsi_logging_level >> (SHIFT)) & mask) > (LEVEL) ) \ + { \ + (CMD); \ + } \ +} + +#define SCSI_SET_LOGGING(SHIFT, BITS, LEVEL) \ +{ \ + unsigned int mask; \ + \ + mask = ((1 << (BITS)) - 1) << SHIFT; \ + scsi_logging_level = ((scsi_logging_level & ~mask) \ + | ((LEVEL << SHIFT) & mask)); \ +} + + + +#else + +/* + * With no logging enabled, stub these out so they don't do anything. + */ +#define SCSI_SET_LOGGING(SHIFT, BITS, LEVEL) + +#define SCSI_CHECK_LOGGING(SHIFT, BITS, LEVEL, CMD) +#endif + +/* + * These are the macros that are actually used throughout the code to + * log events. If logging isn't enabled, they are no-ops and will be + * completely absent from the user's code. + * + * The 'set' versions of the macros are really intended to only be called + * from the /proc filesystem, and in production kernels this will be about + * all that is ever used. It could be useful in a debugging environment to + * bump the logging level when certain strange events are detected, however. + */ +#define SCSI_LOG_ERROR_RECOVERY(LEVEL,CMD) \ + SCSI_CHECK_LOGGING(SCSI_LOG_ERROR_SHIFT, SCSI_LOG_ERROR_BITS, LEVEL,CMD); +#define SCSI_LOG_TIMEOUT(LEVEL,CMD) \ + SCSI_CHECK_LOGGING(SCSI_LOG_TIMEOUT_SHIFT, SCSI_LOG_TIMEOUT_BITS, LEVEL,CMD); +#define SCSI_LOG_SCAN_BUS(LEVEL,CMD) \ + SCSI_CHECK_LOGGING(SCSI_LOG_SCAN_SHIFT, SCSI_LOG_SCAN_BITS, LEVEL,CMD); +#define SCSI_LOG_MLQUEUE(LEVEL,CMD) \ + SCSI_CHECK_LOGGING(SCSI_LOG_MLQUEUE_SHIFT, SCSI_LOG_MLQUEUE_BITS, LEVEL,CMD); +#define SCSI_LOG_MLCOMPLETE(LEVEL,CMD) \ + SCSI_CHECK_LOGGING(SCSI_LOG_MLCOMPLETE_SHIFT, SCSI_LOG_MLCOMPLETE_BITS, LEVEL,CMD); +#define SCSI_LOG_LLQUEUE(LEVEL,CMD) \ + SCSI_CHECK_LOGGING(SCSI_LOG_LLQUEUE_SHIFT, SCSI_LOG_LLQUEUE_BITS, LEVEL,CMD); +#define SCSI_LOG_LLCOMPLETE(LEVEL,CMD) \ + SCSI_CHECK_LOGGING(SCSI_LOG_LLCOMPLETE_SHIFT, SCSI_LOG_LLCOMPLETE_BITS, LEVEL,CMD); +#define SCSI_LOG_HLQUEUE(LEVEL,CMD) \ + SCSI_CHECK_LOGGING(SCSI_LOG_HLQUEUE_SHIFT, SCSI_LOG_HLQUEUE_BITS, LEVEL,CMD); +#define SCSI_LOG_HLCOMPLETE(LEVEL,CMD) \ + SCSI_CHECK_LOGGING(SCSI_LOG_HLCOMPLETE_SHIFT, SCSI_LOG_HLCOMPLETE_BITS, LEVEL,CMD); +#define SCSI_LOG_IOCTL(LEVEL,CMD) \ + SCSI_CHECK_LOGGING(SCSI_LOG_IOCTL_SHIFT, SCSI_LOG_IOCTL_BITS, LEVEL,CMD); + + +#define SCSI_SET_ERROR_RECOVERY_LOGGING(LEVEL) \ + SCSI_SET_LOGGING(SCSI_LOG_ERROR_SHIFT, SCSI_LOG_ERROR_BITS, LEVEL); +#define SCSI_SET_TIMEOUT_LOGGING(LEVEL) \ + SCSI_SET_LOGGING(SCSI_LOG_TIMEOUT_SHIFT, SCSI_LOG_TIMEOUT_BITS, LEVEL); +#define SCSI_SET_SCAN_BUS_LOGGING(LEVEL) \ + SCSI_SET_LOGGING(SCSI_LOG_SCAN_SHIFT, SCSI_LOG_SCAN_BITS, LEVEL); +#define SCSI_SET_MLQUEUE_LOGGING(LEVEL) \ + SCSI_SET_LOGGING(SCSI_LOG_MLQUEUE_SHIFT, SCSI_LOG_MLQUEUE_BITS, LEVEL); +#define SCSI_SET_MLCOMPLETE_LOGGING(LEVEL) \ + SCSI_SET_LOGGING(SCSI_LOG_MLCOMPLETE_SHIFT, SCSI_LOG_MLCOMPLETE_BITS, LEVEL); +#define SCSI_SET_LLQUEUE_LOGGING(LEVEL) \ + SCSI_SET_LOGGING(SCSI_LOG_LLQUEUE_SHIFT, SCSI_LOG_LLQUEUE_BITS, LEVEL); +#define SCSI_SET_LLCOMPLETE_LOGGING(LEVEL) \ + SCSI_SET_LOGGING(SCSI_LOG_LLCOMPLETE_SHIFT, SCSI_LOG_LLCOMPLETE_BITS, LEVEL); +#define SCSI_SET_HLQUEUE_LOGGING(LEVEL) \ + SCSI_SET_LOGGING(SCSI_LOG_HLQUEUE_SHIFT, SCSI_LOG_HLQUEUE_BITS, LEVEL); +#define SCSI_SET_HLCOMPLETE_LOGGING(LEVEL) \ + SCSI_SET_LOGGING(SCSI_LOG_HLCOMPLETE_SHIFT, SCSI_LOG_HLCOMPLETE_BITS, LEVEL); +#define SCSI_SET_IOCTL_LOGGING(LEVEL) \ + SCSI_SET_LOGGING(SCSI_LOG_IOCTL_SHIFT, SCSI_LOG_IOCTL_BITS, LEVEL); + +/* + * the return of the status word will be in the following format : + * The low byte is the status returned by the SCSI command, + * with vendor specific bits masked. + * + * The next byte is the message which followed the SCSI status. + * This allows a stos to be used, since the Intel is a little + * endian machine. + * + * The final byte is a host return code, which is one of the following. + * + * IE + * lsb msb + * status msg host code + * + * Our errors returned by OUR driver, NOT SCSI message. Or'd with + * SCSI message passed back to driver . + */ + + +#define DID_OK 0x00 /* NO error */ +#define DID_NO_CONNECT 0x01 /* Couldn't connect before timeout period */ +#define DID_BUS_BUSY 0x02 /* BUS stayed busy through time out period */ +#define DID_TIME_OUT 0x03 /* TIMED OUT for other reason */ +#define DID_BAD_TARGET 0x04 /* BAD target. */ +#define DID_ABORT 0x05 /* Told to abort for some other reason */ +#define DID_PARITY 0x06 /* Parity error */ +#define DID_ERROR 0x07 /* Internal error */ +#define DID_RESET 0x08 /* Reset by somebody. */ +#define DID_BAD_INTR 0x09 /* Got an interrupt we weren't expecting. */ +#define DID_PASSTHROUGH 0x0a /* Force command past mid-layer */ +#define DID_SOFT_ERROR 0x0b /* The low level driver just wish a retry */ +#define DRIVER_OK 0x00 /* Driver status */ + +/* + * These indicate the error that occurred, and what is available. + */ + +#define DRIVER_BUSY 0x01 +#define DRIVER_SOFT 0x02 +#define DRIVER_MEDIA 0x03 +#define DRIVER_ERROR 0x04 + +#define DRIVER_INVALID 0x05 +#define DRIVER_TIMEOUT 0x06 +#define DRIVER_HARD 0x07 +#define DRIVER_SENSE 0x08 + +#define SUGGEST_RETRY 0x10 +#define SUGGEST_ABORT 0x20 +#define SUGGEST_REMAP 0x30 +#define SUGGEST_DIE 0x40 +#define SUGGEST_SENSE 0x80 +#define SUGGEST_IS_OK 0xff + +#define DRIVER_MASK 0x0f +#define SUGGEST_MASK 0xf0 + +#define MAX_COMMAND_SIZE 16 +#define SCSI_SENSE_BUFFERSIZE 64 + +/* + * SCSI command sets + */ + +#define SCSI_UNKNOWN 0 +#define SCSI_1 1 +#define SCSI_1_CCS 2 +#define SCSI_2 3 +#define SCSI_3 4 + +/* + * Every SCSI command starts with a one byte OP-code. + * The next byte's high three bits are the LUN of the + * device. Any multi-byte quantities are stored high byte + * first, and may have a 5 bit MSB in the same byte + * as the LUN. + */ + +/* + * As the scsi do command functions are intelligent, and may need to + * redo a command, we need to keep track of the last command + * executed on each one. + */ + +#define WAS_RESET 0x01 +#define WAS_TIMEDOUT 0x02 +#define WAS_SENSE 0x04 +#define IS_RESETTING 0x08 +#define IS_ABORTING 0x10 +#define ASKED_FOR_SENSE 0x20 +#define SYNC_RESET 0x40 + +#if defined(__mc68000__) || defined(CONFIG_APUS) +#include +#define CONTIGUOUS_BUFFERS(X,Y) \ + (virt_to_phys((X)->b_data+(X)->b_size-1)+1==virt_to_phys((Y)->b_data)) +#else +#define CONTIGUOUS_BUFFERS(X,Y) ((X->b_data+X->b_size) == Y->b_data) +#endif + + +/* + * This is the crap from the old error handling code. We have it in a special + * place so that we can more easily delete it later on. + */ +#include "scsi_obsolete.h" + +/* + * Add some typedefs so that we can prototyope a bunch of the functions. + */ +typedef struct scsi_device Scsi_Device; +typedef struct scsi_cmnd Scsi_Cmnd; +typedef struct scsi_request Scsi_Request; + +#define SCSI_CMND_MAGIC 0xE25C23A5 +#define SCSI_REQ_MAGIC 0x75F6D354 + +/* + * Here is where we prototype most of the mid-layer. + */ + +/* + * Initializes all SCSI devices. This scans all scsi busses. + */ + +extern unsigned int scsi_logging_level; /* What do we log? */ +extern unsigned int scsi_dma_free_sectors; /* How much room do we have left */ +extern unsigned int scsi_need_isa_buffer; /* True if some devices need indirection + * buffers */ +extern volatile int in_scan_scsis; +extern const unsigned char scsi_command_size[8]; + + +/* + * These are the error handling functions defined in scsi_error.c + */ +extern void scsi_times_out(Scsi_Cmnd * SCpnt); +extern void scsi_add_timer(Scsi_Cmnd * SCset, int timeout, + void (*complete) (Scsi_Cmnd *)); +extern int scsi_delete_timer(Scsi_Cmnd * SCset); +extern void scsi_error_handler(void *host); +extern int scsi_sense_valid(Scsi_Cmnd *); +extern int scsi_decide_disposition(Scsi_Cmnd * SCpnt); +extern int scsi_block_when_processing_errors(Scsi_Device *); +extern void scsi_sleep(int); + +/* + * Prototypes for functions in scsicam.c + */ +extern int scsi_partsize(struct buffer_head *bh, unsigned long capacity, + unsigned int *cyls, unsigned int *hds, + unsigned int *secs); + +/* + * Prototypes for functions in scsi_dma.c + */ +void scsi_resize_dma_pool(void); +int scsi_init_minimal_dma_pool(void); +void *scsi_malloc(unsigned int); +int scsi_free(void *, unsigned int); + +/* + * Prototypes for functions in scsi_merge.c + */ +extern void recount_segments(Scsi_Cmnd * SCpnt); +extern void initialize_merge_fn(Scsi_Device * SDpnt); + +/* + * Prototypes for functions in scsi_queue.c + */ +extern int scsi_mlqueue_insert(Scsi_Cmnd * cmd, int reason); + +/* + * Prototypes for functions in scsi_lib.c + */ +extern int scsi_maybe_unblock_host(Scsi_Device * SDpnt); +extern Scsi_Cmnd *scsi_end_request(Scsi_Cmnd * SCpnt, int uptodate, + int sectors); +extern struct Scsi_Device_Template *scsi_get_request_dev(struct request *); +extern int scsi_init_cmd_errh(Scsi_Cmnd * SCpnt); +extern int scsi_insert_special_cmd(Scsi_Cmnd * SCpnt, int); +extern void scsi_io_completion(Scsi_Cmnd * SCpnt, int good_sectors, + int block_sectors); +extern void scsi_queue_next_request(request_queue_t * q, Scsi_Cmnd * SCpnt); +extern void scsi_request_fn(request_queue_t * q); +extern int scsi_starvation_completion(Scsi_Device * SDpnt); + +/* + * Prototypes for functions in scsi.c + */ +extern int scsi_dispatch_cmd(Scsi_Cmnd * SCpnt); +extern void scsi_bottom_half_handler(void); +extern void scsi_release_commandblocks(Scsi_Device * SDpnt); +extern void scsi_build_commandblocks(Scsi_Device * SDpnt); +extern void scsi_done(Scsi_Cmnd * SCpnt); +extern void scsi_finish_command(Scsi_Cmnd *); +extern int scsi_retry_command(Scsi_Cmnd *); +extern Scsi_Cmnd *scsi_allocate_device(Scsi_Device *, int, int); +extern void __scsi_release_command(Scsi_Cmnd *); +extern void scsi_release_command(Scsi_Cmnd *); +extern void scsi_do_cmd(Scsi_Cmnd *, const void *cmnd, + void *buffer, unsigned bufflen, + void (*done) (struct scsi_cmnd *), + int timeout, int retries); +extern int scsi_dev_init(void); + +/* + * Newer request-based interfaces. + */ +extern Scsi_Request *scsi_allocate_request(Scsi_Device *); +extern void scsi_release_request(Scsi_Request *); +extern void scsi_wait_req(Scsi_Request *, const void *cmnd, + void *buffer, unsigned bufflen, + int timeout, int retries); + +extern void scsi_do_req(Scsi_Request *, const void *cmnd, + void *buffer, unsigned bufflen, + void (*done) (struct scsi_cmnd *), + int timeout, int retries); +extern int scsi_insert_special_req(Scsi_Request * SRpnt, int); +extern void scsi_init_cmd_from_req(Scsi_Cmnd *, Scsi_Request *); + + +/* + * Prototypes for functions/data in hosts.c + */ +extern int max_scsi_hosts; + +/* + * Prototypes for functions in scsi_proc.c + */ +extern void proc_print_scsidevice(Scsi_Device *, char *, int *, int); +extern struct proc_dir_entry *proc_scsi; + +/* + * Prototypes for functions in constants.c + */ +extern void print_command(unsigned char *); +extern void print_sense(const char *, Scsi_Cmnd *); +extern void print_req_sense(const char *, Scsi_Request *); +extern void print_driverbyte(int scsiresult); +extern void print_hostbyte(int scsiresult); +extern void print_status (int status); + +/* + * The scsi_device struct contains what we know about each given scsi + * device. + * + * FIXME(eric) - one of the great regrets that I have is that I failed to define + * these structure elements as something like sdev_foo instead of foo. This would + * make it so much easier to grep through sources and so forth. I propose that + * all new elements that get added to these structures follow this convention. + * As time goes on and as people have the stomach for it, it should be possible to + * go back and retrofit at least some of the elements here with with the prefix. + */ + +struct scsi_device { +/* private: */ + /* + * This information is private to the scsi mid-layer. Wrapping it in a + * struct private is a way of marking it in a sort of C++ type of way. + */ + struct scsi_device *next; /* Used for linked list */ + struct scsi_device *prev; /* Used for linked list */ + wait_queue_head_t scpnt_wait; /* Used to wait if + device is busy */ + struct Scsi_Host *host; + request_queue_t request_queue; + atomic_t device_active; /* commands checked out for device */ + volatile unsigned short device_busy; /* commands actually active on low-level */ + int (*scsi_init_io_fn) (Scsi_Cmnd *); /* Used to initialize + new request */ + Scsi_Cmnd *device_queue; /* queue of SCSI Command structures */ + +/* public: */ + unsigned int id, lun, channel; + + unsigned int manufacturer; /* Manufacturer of device, for using + * vendor-specific cmd's */ + unsigned sector_size; /* size in bytes */ + + int attached; /* # of high level drivers attached to this */ + int detected; /* Delta attached - don't use in drivers! */ + int access_count; /* Count of open channels/mounts */ + + void *hostdata; /* available to low-level driver */ + devfs_handle_t de; /* directory for the device */ + char type; + char scsi_level; + char vendor[8], model[16], rev[4]; + unsigned char current_tag; /* current tag */ + unsigned char sync_min_period; /* Not less than this period */ + unsigned char sync_max_offset; /* Not greater than this offset */ + unsigned char queue_depth; /* How deep a queue to use */ + + unsigned online:1; + unsigned writeable:1; + unsigned removable:1; + unsigned random:1; + unsigned has_cmdblocks:1; + unsigned changed:1; /* Data invalid due to media change */ + unsigned busy:1; /* Used to prevent races */ + unsigned lockable:1; /* Able to prevent media removal */ + unsigned borken:1; /* Tell the Seagate driver to be + * painfully slow on this device */ + unsigned tagged_supported:1; /* Supports SCSI-II tagged queuing */ + unsigned tagged_queue:1; /* SCSI-II tagged queuing enabled */ + unsigned disconnect:1; /* can disconnect */ + unsigned soft_reset:1; /* Uses soft reset option */ + unsigned sync:1; /* Negotiate for sync transfers */ + unsigned wide:1; /* Negotiate for WIDE transfers */ + unsigned single_lun:1; /* Indicates we should only allow I/O to + * one of the luns for the device at a + * time. */ + unsigned was_reset:1; /* There was a bus reset on the bus for + * this device */ + unsigned expecting_cc_ua:1; /* Expecting a CHECK_CONDITION/UNIT_ATTN + * because we did a bus reset. */ + unsigned device_blocked:1; /* Device returned QUEUE_FULL. */ + unsigned ten:1; /* support ten byte read / write */ + unsigned remap:1; /* support remapping */ + unsigned starved:1; /* unable to process commands because + host busy */ + + // Flag to allow revalidate to succeed in sd_open + int allow_revalidate; +}; + + +/* + * The Scsi_Cmnd structure is used by scsi.c internally, and for communication + * with low level drivers that support multiple outstanding commands. + */ +typedef struct scsi_pointer { + char *ptr; /* data pointer */ + int this_residual; /* left in this buffer */ + struct scatterlist *buffer; /* which buffer */ + int buffers_residual; /* how many buffers left */ + + dma_addr_t dma_handle; + + volatile int Status; + volatile int Message; + volatile int have_data_in; + volatile int sent_command; + volatile int phase; +} Scsi_Pointer; + +/* + * This is essentially a slimmed down version of Scsi_Cmnd. The point of + * having this is that requests that are injected into the queue as result + * of things like ioctls and character devices shouldn't be using a + * Scsi_Cmnd until such a time that the command is actually at the head + * of the queue and being sent to the driver. + */ +struct scsi_request { + int sr_magic; + int sr_result; /* Status code from lower level driver */ + unsigned char sr_sense_buffer[SCSI_SENSE_BUFFERSIZE]; /* obtained by REQUEST SENSE + * when CHECK CONDITION is + * received on original command + * (auto-sense) */ + + struct Scsi_Host *sr_host; + Scsi_Device *sr_device; + Scsi_Cmnd *sr_command; + struct request sr_request; /* A copy of the command we are + working on */ + unsigned sr_bufflen; /* Size of data buffer */ + void *sr_buffer; /* Data buffer */ + int sr_allowed; + unsigned char sr_data_direction; + unsigned char sr_cmd_len; + unsigned char sr_cmnd[MAX_COMMAND_SIZE]; + void (*sr_done) (struct scsi_cmnd *); /* Mid-level done function */ + int sr_timeout_per_command; + unsigned short sr_use_sg; /* Number of pieces of scatter-gather */ + unsigned short sr_sglist_len; /* size of malloc'd scatter-gather list */ + unsigned sr_underflow; /* Return error if less than + this amount is transferred */ +}; + +/* + * FIXME(eric) - one of the great regrets that I have is that I failed to define + * these structure elements as something like sc_foo instead of foo. This would + * make it so much easier to grep through sources and so forth. I propose that + * all new elements that get added to these structures follow this convention. + * As time goes on and as people have the stomach for it, it should be possible to + * go back and retrofit at least some of the elements here with with the prefix. + */ +struct scsi_cmnd { + int sc_magic; +/* private: */ + /* + * This information is private to the scsi mid-layer. Wrapping it in a + * struct private is a way of marking it in a sort of C++ type of way. + */ + struct Scsi_Host *host; + unsigned short state; + unsigned short owner; + Scsi_Device *device; + Scsi_Request *sc_request; + struct scsi_cmnd *next; + struct scsi_cmnd *reset_chain; + + int eh_state; /* Used for state tracking in error handlr */ + void (*done) (struct scsi_cmnd *); /* Mid-level done function */ + /* + A SCSI Command is assigned a nonzero serial_number when internal_cmnd + passes it to the driver's queue command function. The serial_number + is cleared when scsi_done is entered indicating that the command has + been completed. If a timeout occurs, the serial number at the moment + of timeout is copied into serial_number_at_timeout. By subsequently + comparing the serial_number and serial_number_at_timeout fields + during abort or reset processing, we can detect whether the command + has already completed. This also detects cases where the command has + completed and the SCSI Command structure has already being reused + for another command, so that we can avoid incorrectly aborting or + resetting the new command. + */ + + unsigned long serial_number; + unsigned long serial_number_at_timeout; + + int retries; + int allowed; + int timeout_per_command; + int timeout_total; + int timeout; + + /* + * We handle the timeout differently if it happens when a reset, + * abort, etc are in process. + */ + unsigned volatile char internal_timeout; + struct scsi_cmnd *bh_next; /* To enumerate the commands waiting + to be processed. */ + +/* public: */ + + unsigned int target; + unsigned int lun; + unsigned int channel; + unsigned char cmd_len; + unsigned char old_cmd_len; + unsigned char sc_data_direction; + unsigned char sc_old_data_direction; + + /* These elements define the operation we are about to perform */ + unsigned char cmnd[MAX_COMMAND_SIZE]; + unsigned request_bufflen; /* Actual request size */ + + struct timer_list eh_timeout; /* Used to time out the command. */ + void *request_buffer; /* Actual requested buffer */ + void **bounce_buffers; /* Array of bounce buffers when using scatter-gather */ + + /* These elements define the operation we ultimately want to perform */ + unsigned char data_cmnd[MAX_COMMAND_SIZE]; + unsigned short old_use_sg; /* We save use_sg here when requesting + * sense info */ + unsigned short use_sg; /* Number of pieces of scatter-gather */ + unsigned short sglist_len; /* size of malloc'd scatter-gather list */ + unsigned short abort_reason; /* If the mid-level code requests an + * abort, this is the reason. */ + unsigned bufflen; /* Size of data buffer */ + void *buffer; /* Data buffer */ + + unsigned underflow; /* Return error if less than + this amount is transferred */ + unsigned old_underflow; /* save underflow here when reusing the + * command for error handling */ + + unsigned transfersize; /* How much we are guaranteed to + transfer with each SCSI transfer + (ie, between disconnect / + reconnects. Probably == sector + size */ + + int resid; /* Number of bytes requested to be + transferred less actual number + transferred (0 if not supported) */ + + struct request request; /* A copy of the command we are + working on */ + + unsigned char sense_buffer[SCSI_SENSE_BUFFERSIZE]; /* obtained by REQUEST SENSE + * when CHECK CONDITION is + * received on original command + * (auto-sense) */ + + unsigned flags; + + /* + * Used to indicate that a command which has timed out also + * completed normally. Typically the completion function will + * do nothing but set this flag in this instance because the + * timeout handler is already running. + */ + unsigned done_late:1; + + /* Low-level done function - can be used by low-level driver to point + * to completion function. Not used by mid/upper level code. */ + void (*scsi_done) (struct scsi_cmnd *); + + /* + * The following fields can be written to by the host specific code. + * Everything else should be left alone. + */ + + Scsi_Pointer SCp; /* Scratchpad used by some host adapters */ + + unsigned char *host_scribble; /* The host adapter is allowed to + * call scsi_malloc and get some memory + * and hang it here. The host adapter + * is also expected to call scsi_free + * to release this memory. (The memory + * obtained by scsi_malloc is guaranteed + * to be at an address < 16Mb). */ + + int result; /* Status code from lower level driver */ + + unsigned char tag; /* SCSI-II queued command tag */ + unsigned long pid; /* Process ID, starts at 0 */ +}; + +/* + * Flag bit for the internal_timeout array + */ +#define NORMAL_TIMEOUT 0 + +/* + * Definitions and prototypes used for scsi mid-level queue. + */ +#define SCSI_MLQUEUE_HOST_BUSY 0x1055 +#define SCSI_MLQUEUE_DEVICE_BUSY 0x1056 + +#define SCSI_SLEEP(QUEUE, CONDITION) { \ + if (CONDITION) { \ + DECLARE_WAITQUEUE(wait, current); \ + add_wait_queue(QUEUE, &wait); \ + for(;;) { \ + set_current_state(TASK_UNINTERRUPTIBLE); \ + if (CONDITION) { \ + if (in_interrupt()) \ + panic("scsi: trying to call schedule() in interrupt" \ + ", file %s, line %d.\n", __FILE__, __LINE__); \ + schedule(); \ + } \ + else \ + break; \ + } \ + remove_wait_queue(QUEUE, &wait);\ + current->state = TASK_RUNNING; \ + }; } + +/* + * old style reset request from external source + * (private to sg.c and scsi_error.c, supplied by scsi_obsolete.c) + */ +#define SCSI_TRY_RESET_DEVICE 1 +#define SCSI_TRY_RESET_BUS 2 +#define SCSI_TRY_RESET_HOST 3 + +extern int scsi_reset_provider(Scsi_Device *, int); + +#endif + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-indent-level: 4 + * c-brace-imaginary-offset: 0 + * c-brace-offset: -4 + * c-argdecl-indent: 4 + * c-label-offset: -4 + * c-continued-statement-offset: 4 + * c-continued-brace-offset: 0 + * indent-tabs-mode: nil + * tab-width: 8 + * End: + */ diff --git a/xen-2.4.16/drivers/scsi/scsi_dma.c b/xen-2.4.16/drivers/scsi/scsi_dma.c new file mode 100644 index 0000000000..5594828616 --- /dev/null +++ b/xen-2.4.16/drivers/scsi/scsi_dma.c @@ -0,0 +1,451 @@ +/* + * scsi_dma.c Copyright (C) 2000 Eric Youngdale + * + * mid-level SCSI DMA bounce buffer allocator + * + */ + +#define __NO_VERSION__ +#include +#include +#include + + +#include "scsi.h" +#include "hosts.h" +#include "constants.h" + +#ifdef CONFIG_KMOD +#include +#endif + +/* + * PAGE_SIZE must be a multiple of the sector size (512). True + * for all reasonably recent architectures (even the VAX...). + */ +#define SECTOR_SIZE 512 +#define SECTORS_PER_PAGE (PAGE_SIZE/SECTOR_SIZE) + +#if SECTORS_PER_PAGE <= 8 +typedef unsigned char FreeSectorBitmap; +#elif SECTORS_PER_PAGE <= 32 +typedef unsigned int FreeSectorBitmap; +#else +#error You lose. +#endif + +/* + * Used for access to internal allocator used for DMA safe buffers. + */ +static spinlock_t allocator_request_lock = SPIN_LOCK_UNLOCKED; + +static FreeSectorBitmap *dma_malloc_freelist = NULL; +static int need_isa_bounce_buffers; +static unsigned int dma_sectors = 0; +unsigned int scsi_dma_free_sectors = 0; +unsigned int scsi_need_isa_buffer = 0; +static unsigned char **dma_malloc_pages = NULL; + +/* + * Function: scsi_malloc + * + * Purpose: Allocate memory from the DMA-safe pool. + * + * Arguments: len - amount of memory we need. + * + * Lock status: No locks assumed to be held. This function is SMP-safe. + * + * Returns: Pointer to memory block. + * + * Notes: Prior to the new queue code, this function was not SMP-safe. + * This function can only allocate in units of sectors + * (i.e. 512 bytes). + * + * We cannot use the normal system allocator becuase we need + * to be able to guarantee that we can process a complete disk + * I/O request without touching the system allocator. Think + * about it - if the system were heavily swapping, and tried to + * write out a block of memory to disk, and the SCSI code needed + * to allocate more memory in order to be able to write the + * data to disk, you would wedge the system. + */ +void *scsi_malloc(unsigned int len) +{ + unsigned int nbits, mask; + unsigned long flags; + + int i, j; + if (len % SECTOR_SIZE != 0 || len > PAGE_SIZE) + return NULL; + + nbits = len >> 9; + mask = (1 << nbits) - 1; + + spin_lock_irqsave(&allocator_request_lock, flags); + + for (i = 0; i < dma_sectors / SECTORS_PER_PAGE; i++) + for (j = 0; j <= SECTORS_PER_PAGE - nbits; j++) { + if ((dma_malloc_freelist[i] & (mask << j)) == 0) { + dma_malloc_freelist[i] |= (mask << j); + scsi_dma_free_sectors -= nbits; +#ifdef DEBUG + SCSI_LOG_MLQUEUE(3, printk("SMalloc: %d %p [From:%p]\n", len, dma_malloc_pages[i] + (j << 9))); + printk("SMalloc: %d %p [From:%p]\n", len, dma_malloc_pages[i] + (j << 9)); +#endif + spin_unlock_irqrestore(&allocator_request_lock, flags); + return (void *) ((unsigned long) dma_malloc_pages[i] + (j << 9)); + } + } + spin_unlock_irqrestore(&allocator_request_lock, flags); + return NULL; /* Nope. No more */ +} + +/* + * Function: scsi_free + * + * Purpose: Free memory into the DMA-safe pool. + * + * Arguments: ptr - data block we are freeing. + * len - size of block we are freeing. + * + * Lock status: No locks assumed to be held. This function is SMP-safe. + * + * Returns: Nothing + * + * Notes: This function *must* only be used to free memory + * allocated from scsi_malloc(). + * + * Prior to the new queue code, this function was not SMP-safe. + * This function can only allocate in units of sectors + * (i.e. 512 bytes). + */ +int scsi_free(void *obj, unsigned int len) +{ + unsigned int page, sector, nbits, mask; + unsigned long flags; + +#ifdef DEBUG + unsigned long ret = 0; + +#ifdef __mips__ + __asm__ __volatile__("move\t%0,$31":"=r"(ret)); +#else + ret = __builtin_return_address(0); +#endif + printk("scsi_free %p %d\n", obj, len); + SCSI_LOG_MLQUEUE(3, printk("SFree: %p %d\n", obj, len)); +#endif + + spin_lock_irqsave(&allocator_request_lock, flags); + + for (page = 0; page < dma_sectors / SECTORS_PER_PAGE; page++) { + unsigned long page_addr = (unsigned long) dma_malloc_pages[page]; + if ((unsigned long) obj >= page_addr && + (unsigned long) obj < page_addr + PAGE_SIZE) { + sector = (((unsigned long) obj) - page_addr) >> 9; + + nbits = len >> 9; + mask = (1 << nbits) - 1; + + if (sector + nbits > SECTORS_PER_PAGE) + panic("scsi_free:Bad memory alignment"); + + if ((dma_malloc_freelist[page] & + (mask << sector)) != (mask << sector)) { +#ifdef DEBUG + printk("scsi_free(obj=%p, len=%d) called from %08lx\n", + obj, len, ret); +#endif + panic("scsi_free:Trying to free unused memory"); + } + scsi_dma_free_sectors += nbits; + dma_malloc_freelist[page] &= ~(mask << sector); + spin_unlock_irqrestore(&allocator_request_lock, flags); + return 0; + } + } + panic("scsi_free:Bad offset"); +} + + +/* + * Function: scsi_resize_dma_pool + * + * Purpose: Ensure that the DMA pool is sufficiently large to be + * able to guarantee that we can always process I/O requests + * without calling the system allocator. + * + * Arguments: None. + * + * Lock status: No locks assumed to be held. This function is SMP-safe. + * + * Returns: Nothing + * + * Notes: Prior to the new queue code, this function was not SMP-safe. + * Go through the device list and recompute the most appropriate + * size for the dma pool. Then grab more memory (as required). + */ +void scsi_resize_dma_pool(void) +{ + int i, k; + unsigned long size; + unsigned long flags; + struct Scsi_Host *shpnt; + struct Scsi_Host *host = NULL; + Scsi_Device *SDpnt; + FreeSectorBitmap *new_dma_malloc_freelist = NULL; + unsigned int new_dma_sectors = 0; + unsigned int new_need_isa_buffer = 0; + unsigned char **new_dma_malloc_pages = NULL; + int out_of_space = 0; + + spin_lock_irqsave(&allocator_request_lock, flags); + + if (!scsi_hostlist) { + /* + * Free up the DMA pool. + */ + if (scsi_dma_free_sectors != dma_sectors) + panic("SCSI DMA pool memory leak %d %d\n", scsi_dma_free_sectors, dma_sectors); + + for (i = 0; i < dma_sectors / SECTORS_PER_PAGE; i++) + free_pages((unsigned long) dma_malloc_pages[i], 0); + if (dma_malloc_pages) + kfree((char *) dma_malloc_pages); + dma_malloc_pages = NULL; + if (dma_malloc_freelist) + kfree((char *) dma_malloc_freelist); + dma_malloc_freelist = NULL; + dma_sectors = 0; + scsi_dma_free_sectors = 0; + spin_unlock_irqrestore(&allocator_request_lock, flags); + return; + } + /* Next, check to see if we need to extend the DMA buffer pool */ + + new_dma_sectors = 2 * SECTORS_PER_PAGE; /* Base value we use */ + + if (__pa(high_memory) - 1 > ISA_DMA_THRESHOLD) + need_isa_bounce_buffers = 1; + else + need_isa_bounce_buffers = 0; + + if (scsi_devicelist) + for (shpnt = scsi_hostlist; shpnt; shpnt = shpnt->next) + new_dma_sectors += SECTORS_PER_PAGE; /* Increment for each host */ + + for (host = scsi_hostlist; host; host = host->next) { + for (SDpnt = host->host_queue; SDpnt; SDpnt = SDpnt->next) { + /* + * sd and sr drivers allocate scatterlists. + * sr drivers may allocate for each command 1x2048 or 2x1024 extra + * buffers for 2k sector size and 1k fs. + * sg driver allocates buffers < 4k. + * st driver does not need buffers from the dma pool. + * estimate 4k buffer/command for devices of unknown type (should panic). + */ + if (SDpnt->type == TYPE_WORM || SDpnt->type == TYPE_ROM || + SDpnt->type == TYPE_DISK || SDpnt->type == TYPE_MOD) { + int nents = host->sg_tablesize; +#ifdef DMA_CHUNK_SIZE + /* If the architecture does DMA sg merging, make sure + we count with at least 64 entries even for HBAs + which handle very few sg entries. */ + if (nents < 64) nents = 64; +#endif + new_dma_sectors += ((nents * + sizeof(struct scatterlist) + 511) >> 9) * + SDpnt->queue_depth; + if (SDpnt->type == TYPE_WORM || SDpnt->type == TYPE_ROM) + new_dma_sectors += (2048 >> 9) * SDpnt->queue_depth; + } else if (SDpnt->type == TYPE_SCANNER || + SDpnt->type == TYPE_PRINTER || + SDpnt->type == TYPE_PROCESSOR || + SDpnt->type == TYPE_COMM || + SDpnt->type == TYPE_MEDIUM_CHANGER || + SDpnt->type == TYPE_ENCLOSURE) { + new_dma_sectors += (4096 >> 9) * SDpnt->queue_depth; + } else { + if (SDpnt->type != TYPE_TAPE) { + printk("resize_dma_pool: unknown device type %d\n", SDpnt->type); + new_dma_sectors += (4096 >> 9) * SDpnt->queue_depth; + } + } + + if (host->unchecked_isa_dma && + need_isa_bounce_buffers && + SDpnt->type != TYPE_TAPE) { + new_dma_sectors += (PAGE_SIZE >> 9) * host->sg_tablesize * + SDpnt->queue_depth; + new_need_isa_buffer++; + } + } + } + +#ifdef DEBUG_INIT + printk("resize_dma_pool: needed dma sectors = %d\n", new_dma_sectors); +#endif + + /* limit DMA memory to 32MB: */ + new_dma_sectors = (new_dma_sectors + 15) & 0xfff0; + + /* + * We never shrink the buffers - this leads to + * race conditions that I would rather not even think + * about right now. + */ +#if 0 /* Why do this? No gain and risks out_of_space */ + if (new_dma_sectors < dma_sectors) + new_dma_sectors = dma_sectors; +#endif + if (new_dma_sectors <= dma_sectors) { + spin_unlock_irqrestore(&allocator_request_lock, flags); + return; /* best to quit while we are in front */ + } + + for (k = 0; k < 20; ++k) { /* just in case */ + out_of_space = 0; + size = (new_dma_sectors / SECTORS_PER_PAGE) * + sizeof(FreeSectorBitmap); + new_dma_malloc_freelist = (FreeSectorBitmap *) + kmalloc(size, GFP_ATOMIC); + if (new_dma_malloc_freelist) { + memset(new_dma_malloc_freelist, 0, size); + size = (new_dma_sectors / SECTORS_PER_PAGE) * + sizeof(*new_dma_malloc_pages); + new_dma_malloc_pages = (unsigned char **) + kmalloc(size, GFP_ATOMIC); + if (!new_dma_malloc_pages) { + size = (new_dma_sectors / SECTORS_PER_PAGE) * + sizeof(FreeSectorBitmap); + kfree((char *) new_dma_malloc_freelist); + out_of_space = 1; + } else { + memset(new_dma_malloc_pages, 0, size); + } + } else + out_of_space = 1; + + if ((!out_of_space) && (new_dma_sectors > dma_sectors)) { + for (i = dma_sectors / SECTORS_PER_PAGE; + i < new_dma_sectors / SECTORS_PER_PAGE; i++) { + new_dma_malloc_pages[i] = (unsigned char *) + __get_free_pages(GFP_ATOMIC | GFP_DMA, 0); + if (!new_dma_malloc_pages[i]) + break; + } + if (i != new_dma_sectors / SECTORS_PER_PAGE) { /* clean up */ + int k = i; + + out_of_space = 1; + for (i = 0; i < k; ++i) + free_pages((unsigned long) new_dma_malloc_pages[i], 0); + } + } + if (out_of_space) { /* try scaling down new_dma_sectors request */ + printk("scsi::resize_dma_pool: WARNING, dma_sectors=%u, " + "wanted=%u, scaling\n", dma_sectors, new_dma_sectors); + if (new_dma_sectors < (8 * SECTORS_PER_PAGE)) + break; /* pretty well hopeless ... */ + new_dma_sectors = (new_dma_sectors * 3) / 4; + new_dma_sectors = (new_dma_sectors + 15) & 0xfff0; + if (new_dma_sectors <= dma_sectors) + break; /* stick with what we have got */ + } else + break; /* found space ... */ + } /* end of for loop */ + if (out_of_space) { + spin_unlock_irqrestore(&allocator_request_lock, flags); + scsi_need_isa_buffer = new_need_isa_buffer; /* some useful info */ + printk(" WARNING, not enough memory, pool not expanded\n"); + return; + } + /* When we dick with the actual DMA list, we need to + * protect things + */ + if (dma_malloc_freelist) { + size = (dma_sectors / SECTORS_PER_PAGE) * sizeof(FreeSectorBitmap); + memcpy(new_dma_malloc_freelist, dma_malloc_freelist, size); + kfree((char *) dma_malloc_freelist); + } + dma_malloc_freelist = new_dma_malloc_freelist; + + if (dma_malloc_pages) { + size = (dma_sectors / SECTORS_PER_PAGE) * sizeof(*dma_malloc_pages); + memcpy(new_dma_malloc_pages, dma_malloc_pages, size); + kfree((char *) dma_malloc_pages); + } + scsi_dma_free_sectors += new_dma_sectors - dma_sectors; + dma_malloc_pages = new_dma_malloc_pages; + dma_sectors = new_dma_sectors; + scsi_need_isa_buffer = new_need_isa_buffer; + + spin_unlock_irqrestore(&allocator_request_lock, flags); + +#ifdef DEBUG_INIT + printk("resize_dma_pool: dma free sectors = %d\n", scsi_dma_free_sectors); + printk("resize_dma_pool: dma sectors = %d\n", dma_sectors); + printk("resize_dma_pool: need isa buffers = %d\n", scsi_need_isa_buffer); +#endif +} + +/* + * Function: scsi_init_minimal_dma_pool + * + * Purpose: Allocate a minimal (1-page) DMA pool. + * + * Arguments: None. + * + * Lock status: No locks assumed to be held. This function is SMP-safe. + * + * Returns: Nothing + * + * Notes: + */ +int scsi_init_minimal_dma_pool(void) +{ + unsigned long size; + unsigned long flags; + int has_space = 0; + + spin_lock_irqsave(&allocator_request_lock, flags); + + dma_sectors = PAGE_SIZE / SECTOR_SIZE; + scsi_dma_free_sectors = dma_sectors; + /* + * Set up a minimal DMA buffer list - this will be used during scan_scsis + * in some cases. + */ + + /* One bit per sector to indicate free/busy */ + size = (dma_sectors / SECTORS_PER_PAGE) * sizeof(FreeSectorBitmap); + dma_malloc_freelist = (FreeSectorBitmap *) + kmalloc(size, GFP_ATOMIC); + if (dma_malloc_freelist) { + memset(dma_malloc_freelist, 0, size); + /* One pointer per page for the page list */ + dma_malloc_pages = (unsigned char **) kmalloc( + (dma_sectors / SECTORS_PER_PAGE) * sizeof(*dma_malloc_pages), + GFP_ATOMIC); + if (dma_malloc_pages) { + memset(dma_malloc_pages, 0, size); + dma_malloc_pages[0] = (unsigned char *) + __get_free_pages(GFP_ATOMIC | GFP_DMA, 0); + if (dma_malloc_pages[0]) + has_space = 1; + } + } + if (!has_space) { + if (dma_malloc_freelist) { + kfree((char *) dma_malloc_freelist); + if (dma_malloc_pages) + kfree((char *) dma_malloc_pages); + } + spin_unlock_irqrestore(&allocator_request_lock, flags); + printk("scsi::init_module: failed, out of memory\n"); + return 1; + } + + spin_unlock_irqrestore(&allocator_request_lock, flags); + return 0; +} diff --git a/xen-2.4.16/drivers/scsi/scsi_error.c b/xen-2.4.16/drivers/scsi/scsi_error.c new file mode 100644 index 0000000000..e4a69bc835 --- /dev/null +++ b/xen-2.4.16/drivers/scsi/scsi_error.c @@ -0,0 +1,2027 @@ +/* + * scsi_error.c Copyright (C) 1997 Eric Youngdale + * + * SCSI error/timeout handling + * Initial versions: Eric Youngdale. Based upon conversations with + * Leonard Zubkoff and David Miller at Linux Expo, + * ideas originating from all over the place. + * + */ + +#define __NO_VERSION__ +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define __KERNEL_SYSCALLS__ + +#include + +#include +#include +#include + +#include "scsi.h" +#include "hosts.h" +#include "constants.h" + +/* + * We must always allow SHUTDOWN_SIGS. Even if we are not a module, + * the host drivers that we are using may be loaded as modules, and + * when we unload these, we need to ensure that the error handler thread + * can be shut down. + * + * Note - when we unload a module, we send a SIGHUP. We mustn't + * enable SIGTERM, as this is how the init shuts things down when you + * go to single-user mode. For that matter, init also sends SIGKILL, + * so we mustn't enable that one either. We use SIGHUP instead. Other + * options would be SIGPWR, I suppose. + */ +#define SHUTDOWN_SIGS (sigmask(SIGHUP)) + +#ifdef DEBUG +#define SENSE_TIMEOUT SCSI_TIMEOUT +#define ABORT_TIMEOUT SCSI_TIMEOUT +#define RESET_TIMEOUT SCSI_TIMEOUT +#else +#define SENSE_TIMEOUT (10*HZ) +#define RESET_TIMEOUT (2*HZ) +#define ABORT_TIMEOUT (15*HZ) +#endif + +#define STATIC + +/* + * These should *probably* be handled by the host itself. + * Since it is allowed to sleep, it probably should. + */ +#define BUS_RESET_SETTLE_TIME 5*HZ +#define HOST_RESET_SETTLE_TIME 10*HZ + + +static const char RCSid[] = "$Header: /mnt/ide/home/eric/CVSROOT/linux/drivers/scsi/scsi_error.c,v 1.10 1997/12/08 04:50:35 eric Exp $"; + +STATIC int scsi_check_sense(Scsi_Cmnd * SCpnt); +STATIC int scsi_request_sense(Scsi_Cmnd *); +STATIC void scsi_send_eh_cmnd(Scsi_Cmnd * SCpnt, int timeout); +STATIC int scsi_try_to_abort_command(Scsi_Cmnd *, int); +STATIC int scsi_test_unit_ready(Scsi_Cmnd *); +STATIC int scsi_try_bus_device_reset(Scsi_Cmnd *, int timeout); +STATIC int scsi_try_bus_reset(Scsi_Cmnd *); +STATIC int scsi_try_host_reset(Scsi_Cmnd *); +STATIC int scsi_unit_is_ready(Scsi_Cmnd *); +STATIC void scsi_eh_action_done(Scsi_Cmnd *, int); +STATIC int scsi_eh_retry_command(Scsi_Cmnd *); +STATIC int scsi_eh_completed_normally(Scsi_Cmnd * SCpnt); +STATIC void scsi_restart_operations(struct Scsi_Host *); +STATIC void scsi_eh_finish_command(Scsi_Cmnd ** SClist, Scsi_Cmnd * SCpnt); + + +/* + * Function: scsi_add_timer() + * + * Purpose: Start timeout timer for a single scsi command. + * + * Arguments: SCset - command that is about to start running. + * timeout - amount of time to allow this command to run. + * complete - timeout function to call if timer isn't + * canceled. + * + * Returns: Nothing + * + * Notes: This should be turned into an inline function. + * + * More Notes: Each scsi command has it's own timer, and as it is added to + * the queue, we set up the timer. When the command completes, + * we cancel the timer. Pretty simple, really, especially + * compared to the old way of handling this crap. + */ +void scsi_add_timer(Scsi_Cmnd * SCset, + int timeout, + void (*complete) (Scsi_Cmnd *)) +{ + + /* + * If the clock was already running for this command, then + * first delete the timer. The timer handling code gets rather + * confused if we don't do this. + */ + if (SCset->eh_timeout.function != NULL) { + del_timer(&SCset->eh_timeout); + } + SCset->eh_timeout.data = (unsigned long) SCset; + SCset->eh_timeout.expires = jiffies + timeout; + SCset->eh_timeout.function = (void (*)(unsigned long)) complete; + + SCset->done_late = 0; + + SCSI_LOG_ERROR_RECOVERY(5, printk("Adding timer for command %p at %d (%p)\n", SCset, timeout, complete)); + + add_timer(&SCset->eh_timeout); + +} + +/* + * Function: scsi_delete_timer() + * + * Purpose: Delete/cancel timer for a given function. + * + * Arguments: SCset - command that we are canceling timer for. + * + * Returns: 1 if we were able to detach the timer. 0 if we + * blew it, and the timer function has already started + * to run. + * + * Notes: This should be turned into an inline function. + */ +int scsi_delete_timer(Scsi_Cmnd * SCset) +{ + int rtn; + + rtn = del_timer(&SCset->eh_timeout); + + SCSI_LOG_ERROR_RECOVERY(5, printk("Clearing timer for command %p %d\n", SCset, rtn)); + + SCset->eh_timeout.data = (unsigned long) NULL; + SCset->eh_timeout.function = NULL; + + return rtn; +} + +/* + * Function: scsi_times_out() + * + * Purpose: Timeout function for normal scsi commands.. + * + * Arguments: SCpnt - command that is timing out. + * + * Returns: Nothing. + * + * Notes: We do not need to lock this. There is the potential for + * a race only in that the normal completion handling might + * run, but if the normal completion function determines + * that the timer has already fired, then it mustn't do + * anything. + */ +void scsi_times_out(Scsi_Cmnd * SCpnt) +{ + /* + * Notify the low-level code that this operation failed and we are + * reposessing the command. + */ +#ifdef ERIC_neverdef + /* + * FIXME(eric) + * Allow the host adapter to push a queue ordering tag + * out to the bus to force the command in question to complete. + * If the host wants to do this, then we just restart the timer + * for the command. Before we really do this, some real thought + * as to the optimum way to handle this should be done. We *do* + * need to force ordering every so often to ensure that all requests + * do eventually complete, but I am not sure if this is the best way + * to actually go about it. + * + * Better yet, force a sync here, but don't block since we are in an + * interrupt. + */ + if (SCpnt->host->hostt->eh_ordered_queue_tag) { + if ((*SCpnt->host->hostt->eh_ordered_queue_tag) (SCpnt)) { + scsi_add_timer(SCpnt, SCpnt->internal_timeout, + scsi_times_out); + return; + } + } + /* + * FIXME(eric) - add a second special interface to handle this + * case. Ideally that interface can also be used to request + * a queu + */ + if (SCpnt->host->can_queue) { + SCpnt->host->hostt->queuecommand(SCpnt, NULL); + } +#endif + + /* Set the serial_number_at_timeout to the current serial_number */ + SCpnt->serial_number_at_timeout = SCpnt->serial_number; + + SCpnt->eh_state = FAILED; + SCpnt->state = SCSI_STATE_TIMEOUT; + SCpnt->owner = SCSI_OWNER_ERROR_HANDLER; + + SCpnt->host->in_recovery = 1; + SCpnt->host->host_failed++; + + SCSI_LOG_TIMEOUT(3, printk("Command timed out active=%d busy=%d failed=%d\n", + atomic_read(&SCpnt->host->host_active), + SCpnt->host->host_busy, + SCpnt->host->host_failed)); + + /* + * If the host is having troubles, then look to see if this was the last + * command that might have failed. If so, wake up the error handler. + */ + if( SCpnt->host->eh_wait == NULL ) { + panic("Error handler thread not present at %p %p %s %d", + SCpnt, SCpnt->host, __FILE__, __LINE__); + } + if (SCpnt->host->host_busy == SCpnt->host->host_failed) { + up(SCpnt->host->eh_wait); + } +} + +/* + * Function scsi_block_when_processing_errors + * + * Purpose: Prevent more commands from being queued while error recovery + * is taking place. + * + * Arguments: SDpnt - device on which we are performing recovery. + * + * Returns: FALSE The device was taken offline by error recovery. + * TRUE OK to proceed. + * + * Notes: We block until the host is out of error recovery, and then + * check to see whether the host or the device is offline. + */ +int scsi_block_when_processing_errors(Scsi_Device * SDpnt) +{ + + SCSI_SLEEP(&SDpnt->host->host_wait, SDpnt->host->in_recovery); + + SCSI_LOG_ERROR_RECOVERY(5, printk("Open returning %d\n", SDpnt->online)); + + return SDpnt->online; +} + +/* + * Function: scsi_eh_times_out() + * + * Purpose: Timeout function for error handling. + * + * Arguments: SCpnt - command that is timing out. + * + * Returns: Nothing. + * + * Notes: During error handling, the kernel thread will be sleeping + * waiting for some action to complete on the device. Our only + * job is to record that it timed out, and to wake up the + * thread. + */ +STATIC +void scsi_eh_times_out(Scsi_Cmnd * SCpnt) +{ + SCpnt->eh_state = SCSI_STATE_TIMEOUT; + SCSI_LOG_ERROR_RECOVERY(5, printk("In scsi_eh_times_out %p\n", SCpnt)); + + if (SCpnt->host->eh_action != NULL) + up(SCpnt->host->eh_action); + else + printk("Missing scsi error handler thread\n"); +} + + +/* + * Function: scsi_eh_done() + * + * Purpose: Completion function for error handling. + * + * Arguments: SCpnt - command that is timing out. + * + * Returns: Nothing. + * + * Notes: During error handling, the kernel thread will be sleeping + * waiting for some action to complete on the device. Our only + * job is to record that the action completed, and to wake up the + * thread. + */ +STATIC +void scsi_eh_done(Scsi_Cmnd * SCpnt) +{ + int rtn; + + /* + * If the timeout handler is already running, then just set the + * flag which says we finished late, and return. We have no + * way of stopping the timeout handler from running, so we must + * always defer to it. + */ + rtn = del_timer(&SCpnt->eh_timeout); + if (!rtn) { + SCpnt->done_late = 1; + return; + } + + SCpnt->request.rq_status = RQ_SCSI_DONE; + + SCpnt->owner = SCSI_OWNER_ERROR_HANDLER; + SCpnt->eh_state = SUCCESS; + + SCSI_LOG_ERROR_RECOVERY(5, printk("In eh_done %p result:%x\n", SCpnt, + SCpnt->result)); + + if (SCpnt->host->eh_action != NULL) + up(SCpnt->host->eh_action); +} + +/* + * Function: scsi_eh_action_done() + * + * Purpose: Completion function for error handling. + * + * Arguments: SCpnt - command that is timing out. + * answer - boolean that indicates whether operation succeeded. + * + * Returns: Nothing. + * + * Notes: This callback is only used for abort and reset operations. + */ +STATIC +void scsi_eh_action_done(Scsi_Cmnd * SCpnt, int answer) +{ + SCpnt->request.rq_status = RQ_SCSI_DONE; + + SCpnt->owner = SCSI_OWNER_ERROR_HANDLER; + SCpnt->eh_state = (answer ? SUCCESS : FAILED); + + if (SCpnt->host->eh_action != NULL) + up(SCpnt->host->eh_action); +} + +/* + * Function: scsi_sense_valid() + * + * Purpose: Determine whether a host has automatically obtained sense + * information or not. If we have it, then give a recommendation + * as to what we should do next. + */ +int scsi_sense_valid(Scsi_Cmnd * SCpnt) +{ + if (((SCpnt->sense_buffer[0] & 0x70) >> 4) != 7) { + return FALSE; + } + return TRUE; +} + +/* + * Function: scsi_eh_retry_command() + * + * Purpose: Retry the original command + * + * Returns: SUCCESS - we were able to get the sense data. + * FAILED - we were not able to get the sense data. + * + * Notes: This function will *NOT* return until the command either + * times out, or it completes. + */ +STATIC int scsi_eh_retry_command(Scsi_Cmnd * SCpnt) +{ + memcpy((void *) SCpnt->cmnd, (void *) SCpnt->data_cmnd, + sizeof(SCpnt->data_cmnd)); + SCpnt->request_buffer = SCpnt->buffer; + SCpnt->request_bufflen = SCpnt->bufflen; + SCpnt->use_sg = SCpnt->old_use_sg; + SCpnt->cmd_len = SCpnt->old_cmd_len; + SCpnt->sc_data_direction = SCpnt->sc_old_data_direction; + SCpnt->underflow = SCpnt->old_underflow; + + scsi_send_eh_cmnd(SCpnt, SCpnt->timeout_per_command); + + /* + * Hey, we are done. Let's look to see what happened. + */ + return SCpnt->eh_state; +} + +/* + * Function: scsi_request_sense() + * + * Purpose: Request sense data from a particular target. + * + * Returns: SUCCESS - we were able to get the sense data. + * FAILED - we were not able to get the sense data. + * + * Notes: Some hosts automatically obtain this information, others + * require that we obtain it on our own. + * + * This function will *NOT* return until the command either + * times out, or it completes. + */ +STATIC int scsi_request_sense(Scsi_Cmnd * SCpnt) +{ + static unsigned char generic_sense[6] = + {REQUEST_SENSE, 0, 0, 0, 255, 0}; + unsigned char scsi_result0[256], *scsi_result = NULL; + int saved_result; + + ASSERT_LOCK(&io_request_lock, 0); + + memcpy((void *) SCpnt->cmnd, (void *) generic_sense, + sizeof(generic_sense)); + + if (SCpnt->device->scsi_level <= SCSI_2) + SCpnt->cmnd[1] = SCpnt->lun << 5; + + scsi_result = (!SCpnt->host->hostt->unchecked_isa_dma) + ? &scsi_result0[0] : kmalloc(512, GFP_ATOMIC | GFP_DMA); + + if (scsi_result == NULL) { + printk("cannot allocate scsi_result in scsi_request_sense.\n"); + return FAILED; + } + /* + * Zero the sense buffer. Some host adapters automatically always request + * sense, so it is not a good idea that SCpnt->request_buffer and + * SCpnt->sense_buffer point to the same address (DB). + * 0 is not a valid sense code. + */ + memset((void *) SCpnt->sense_buffer, 0, sizeof(SCpnt->sense_buffer)); + memset((void *) scsi_result, 0, 256); + + saved_result = SCpnt->result; + SCpnt->request_buffer = scsi_result; + SCpnt->request_bufflen = 256; + SCpnt->use_sg = 0; + SCpnt->cmd_len = COMMAND_SIZE(SCpnt->cmnd[0]); + SCpnt->sc_data_direction = SCSI_DATA_READ; + SCpnt->underflow = 0; + + scsi_send_eh_cmnd(SCpnt, SENSE_TIMEOUT); + + /* Last chance to have valid sense data */ + if (!scsi_sense_valid(SCpnt)) + memcpy((void *) SCpnt->sense_buffer, + SCpnt->request_buffer, + sizeof(SCpnt->sense_buffer)); + + if (scsi_result != &scsi_result0[0] && scsi_result != NULL) + kfree(scsi_result); + + /* + * When we eventually call scsi_finish, we really wish to complete + * the original request, so let's restore the original data. (DB) + */ + memcpy((void *) SCpnt->cmnd, (void *) SCpnt->data_cmnd, + sizeof(SCpnt->data_cmnd)); + SCpnt->result = saved_result; + SCpnt->request_buffer = SCpnt->buffer; + SCpnt->request_bufflen = SCpnt->bufflen; + SCpnt->use_sg = SCpnt->old_use_sg; + SCpnt->cmd_len = SCpnt->old_cmd_len; + SCpnt->sc_data_direction = SCpnt->sc_old_data_direction; + SCpnt->underflow = SCpnt->old_underflow; + + /* + * Hey, we are done. Let's look to see what happened. + */ + return SCpnt->eh_state; +} + +/* + * Function: scsi_test_unit_ready() + * + * Purpose: Run test unit ready command to see if the device is talking to us or not. + * + */ +STATIC int scsi_test_unit_ready(Scsi_Cmnd * SCpnt) +{ + static unsigned char tur_command[6] = + {TEST_UNIT_READY, 0, 0, 0, 0, 0}; + + memcpy((void *) SCpnt->cmnd, (void *) tur_command, + sizeof(tur_command)); + + if (SCpnt->device->scsi_level <= SCSI_2) + SCpnt->cmnd[1] = SCpnt->lun << 5; + + /* + * Zero the sense buffer. The SCSI spec mandates that any + * untransferred sense data should be interpreted as being zero. + */ + memset((void *) SCpnt->sense_buffer, 0, sizeof(SCpnt->sense_buffer)); + + SCpnt->request_buffer = NULL; + SCpnt->request_bufflen = 0; + SCpnt->use_sg = 0; + SCpnt->cmd_len = COMMAND_SIZE(SCpnt->cmnd[0]); + SCpnt->underflow = 0; + SCpnt->sc_data_direction = SCSI_DATA_NONE; + + scsi_send_eh_cmnd(SCpnt, SENSE_TIMEOUT); + + /* + * When we eventually call scsi_finish, we really wish to complete + * the original request, so let's restore the original data. (DB) + */ + memcpy((void *) SCpnt->cmnd, (void *) SCpnt->data_cmnd, + sizeof(SCpnt->data_cmnd)); + SCpnt->request_buffer = SCpnt->buffer; + SCpnt->request_bufflen = SCpnt->bufflen; + SCpnt->use_sg = SCpnt->old_use_sg; + SCpnt->cmd_len = SCpnt->old_cmd_len; + SCpnt->sc_data_direction = SCpnt->sc_old_data_direction; + SCpnt->underflow = SCpnt->old_underflow; + + /* + * Hey, we are done. Let's look to see what happened. + */ + SCSI_LOG_ERROR_RECOVERY(3, + printk("scsi_test_unit_ready: SCpnt %p eh_state %x\n", + SCpnt, SCpnt->eh_state)); + return SCpnt->eh_state; +} + +/* + * This would normally need to get the IO request lock, + * but as it doesn't actually touch anything that needs + * to be locked we can avoid the lock here.. + */ +STATIC +void scsi_sleep_done(struct semaphore *sem) +{ + if (sem != NULL) { + up(sem); + } +} + +void scsi_sleep(int timeout) +{ + DECLARE_MUTEX_LOCKED(sem); + struct timer_list timer; + + init_timer(&timer); + timer.data = (unsigned long) &sem; + timer.expires = jiffies + timeout; + timer.function = (void (*)(unsigned long)) scsi_sleep_done; + + SCSI_LOG_ERROR_RECOVERY(5, printk("Sleeping for timer tics %d\n", timeout)); + + add_timer(&timer); + + down(&sem); + del_timer(&timer); +} + +/* + * Function: scsi_send_eh_cmnd + * + * Purpose: Send a command out to a device as part of error recovery. + * + * Notes: The initialization of the structures is quite a bit different + * in this case, and furthermore, there is a different completion + * handler. + */ +STATIC void scsi_send_eh_cmnd(Scsi_Cmnd * SCpnt, int timeout) +{ + unsigned long flags; + struct Scsi_Host *host; + + ASSERT_LOCK(&io_request_lock, 0); + + host = SCpnt->host; + + retry: + /* + * We will use a queued command if possible, otherwise we will emulate the + * queuing and calling of completion function ourselves. + */ + SCpnt->owner = SCSI_OWNER_LOWLEVEL; + + if (host->can_queue) { + DECLARE_MUTEX_LOCKED(sem); + + SCpnt->eh_state = SCSI_STATE_QUEUED; + + scsi_add_timer(SCpnt, timeout, scsi_eh_times_out); + + /* + * Set up the semaphore so we wait for the command to complete. + */ + SCpnt->host->eh_action = &sem; + SCpnt->request.rq_status = RQ_SCSI_BUSY; + + spin_lock_irqsave(&io_request_lock, flags); + host->hostt->queuecommand(SCpnt, scsi_eh_done); + spin_unlock_irqrestore(&io_request_lock, flags); + + down(&sem); + + SCpnt->host->eh_action = NULL; + + /* + * See if timeout. If so, tell the host to forget about it. + * In other words, we don't want a callback any more. + */ + if (SCpnt->eh_state == SCSI_STATE_TIMEOUT) { + SCpnt->owner = SCSI_OWNER_LOWLEVEL; + + /* + * As far as the low level driver is + * concerned, this command is still active, so + * we must give the low level driver a chance + * to abort it. (DB) + * + * FIXME(eric) - we are not tracking whether we could + * abort a timed out command or not. Not sure how + * we should treat them differently anyways. + */ + spin_lock_irqsave(&io_request_lock, flags); + if (SCpnt->host->hostt->eh_abort_handler) + SCpnt->host->hostt->eh_abort_handler(SCpnt); + spin_unlock_irqrestore(&io_request_lock, flags); + + SCpnt->request.rq_status = RQ_SCSI_DONE; + SCpnt->owner = SCSI_OWNER_ERROR_HANDLER; + + SCpnt->eh_state = FAILED; + } + SCSI_LOG_ERROR_RECOVERY(5, printk("send_eh_cmnd: %p eh_state:%x\n", + SCpnt, SCpnt->eh_state)); + } else { + int temp; + + /* + * We damn well had better never use this code. There is no timeout + * protection here, since we would end up waiting in the actual low + * level driver, we don't know how to wake it up. + */ + spin_lock_irqsave(&io_request_lock, flags); + temp = host->hostt->command(SCpnt); + spin_unlock_irqrestore(&io_request_lock, flags); + + SCpnt->result = temp; + /* Fall through to code below to examine status. */ + SCpnt->eh_state = SUCCESS; + } + + /* + * Now examine the actual status codes to see whether the command actually + * did complete normally. + */ + if (SCpnt->eh_state == SUCCESS) { + int ret = scsi_eh_completed_normally(SCpnt); + SCSI_LOG_ERROR_RECOVERY(3, + printk("scsi_send_eh_cmnd: scsi_eh_completed_normally %x\n", ret)); + switch (ret) { + case SUCCESS: + SCpnt->eh_state = SUCCESS; + break; + case NEEDS_RETRY: + goto retry; + case FAILED: + default: + SCpnt->eh_state = FAILED; + break; + } + } else { + SCpnt->eh_state = FAILED; + } +} + +/* + * Function: scsi_unit_is_ready() + * + * Purpose: Called after TEST_UNIT_READY is run, to test to see if + * the unit responded in a way that indicates it is ready. + */ +STATIC int scsi_unit_is_ready(Scsi_Cmnd * SCpnt) +{ + if (SCpnt->result) { + if (((driver_byte(SCpnt->result) & DRIVER_SENSE) || + (status_byte(SCpnt->result) & CHECK_CONDITION)) && + ((SCpnt->sense_buffer[0] & 0x70) >> 4) == 7) { + if (((SCpnt->sense_buffer[2] & 0xf) != NOT_READY) && + ((SCpnt->sense_buffer[2] & 0xf) != UNIT_ATTENTION) && + ((SCpnt->sense_buffer[2] & 0xf) != ILLEGAL_REQUEST)) { + return 0; + } + } + } + return 1; +} + +/* + * Function: scsi_eh_finish_command + * + * Purpose: Handle a command that we are finished with WRT error handling. + * + * Arguments: SClist - pointer to list into which we are putting completed commands. + * SCpnt - command that is completing + * + * Notes: We don't want to use the normal command completion while we are + * are still handling errors - it may cause other commands to be queued, + * and that would disturb what we are doing. Thus we really want to keep + * a list of pending commands for final completion, and once we + * are ready to leave error handling we handle completion for real. + */ +STATIC void scsi_eh_finish_command(Scsi_Cmnd ** SClist, Scsi_Cmnd * SCpnt) +{ + SCpnt->state = SCSI_STATE_BHQUEUE; + SCpnt->bh_next = *SClist; + /* + * Set this back so that the upper level can correctly free up + * things. + */ + SCpnt->use_sg = SCpnt->old_use_sg; + SCpnt->sc_data_direction = SCpnt->sc_old_data_direction; + SCpnt->underflow = SCpnt->old_underflow; + *SClist = SCpnt; +} + +/* + * Function: scsi_try_to_abort_command + * + * Purpose: Ask host adapter to abort a running command. + * + * Returns: FAILED Operation failed or not supported. + * SUCCESS Succeeded. + * + * Notes: This function will not return until the user's completion + * function has been called. There is no timeout on this + * operation. If the author of the low-level driver wishes + * this operation to be timed, they can provide this facility + * themselves. Helper functions in scsi_error.c can be supplied + * to make this easier to do. + * + * Notes: It may be possible to combine this with all of the reset + * handling to eliminate a lot of code duplication. I don't + * know what makes more sense at the moment - this is just a + * prototype. + */ +STATIC int scsi_try_to_abort_command(Scsi_Cmnd * SCpnt, int timeout) +{ + int rtn; + unsigned long flags; + + SCpnt->eh_state = FAILED; /* Until we come up with something better */ + + if (SCpnt->host->hostt->eh_abort_handler == NULL) { + return FAILED; + } + /* + * scsi_done was called just after the command timed out and before + * we had a chance to process it. (DB) + */ + if (SCpnt->serial_number == 0) + return SUCCESS; + + SCpnt->owner = SCSI_OWNER_LOWLEVEL; + + spin_lock_irqsave(&io_request_lock, flags); + rtn = SCpnt->host->hostt->eh_abort_handler(SCpnt); + spin_unlock_irqrestore(&io_request_lock, flags); + return rtn; +} + +/* + * Function: scsi_try_bus_device_reset + * + * Purpose: Ask host adapter to perform a bus device reset for a given + * device. + * + * Returns: FAILED Operation failed or not supported. + * SUCCESS Succeeded. + * + * Notes: There is no timeout for this operation. If this operation is + * unreliable for a given host, then the host itself needs to put a + * timer on it, and set the host back to a consistent state prior + * to returning. + */ +STATIC int scsi_try_bus_device_reset(Scsi_Cmnd * SCpnt, int timeout) +{ + unsigned long flags; + int rtn; + + SCpnt->eh_state = FAILED; /* Until we come up with something better */ + + if (SCpnt->host->hostt->eh_device_reset_handler == NULL) { + return FAILED; + } + SCpnt->owner = SCSI_OWNER_LOWLEVEL; + + spin_lock_irqsave(&io_request_lock, flags); + rtn = SCpnt->host->hostt->eh_device_reset_handler(SCpnt); + spin_unlock_irqrestore(&io_request_lock, flags); + + if (rtn == SUCCESS) + SCpnt->eh_state = SUCCESS; + + return SCpnt->eh_state; +} + +/* + * Function: scsi_try_bus_reset + * + * Purpose: Ask host adapter to perform a bus reset for a host. + * + * Returns: FAILED Operation failed or not supported. + * SUCCESS Succeeded. + * + * Notes: + */ +STATIC int scsi_try_bus_reset(Scsi_Cmnd * SCpnt) +{ + unsigned long flags; + int rtn; + + SCpnt->eh_state = FAILED; /* Until we come up with something better */ + SCpnt->owner = SCSI_OWNER_LOWLEVEL; + SCpnt->serial_number_at_timeout = SCpnt->serial_number; + + if (SCpnt->host->hostt->eh_bus_reset_handler == NULL) { + return FAILED; + } + + spin_lock_irqsave(&io_request_lock, flags); + rtn = SCpnt->host->hostt->eh_bus_reset_handler(SCpnt); + spin_unlock_irqrestore(&io_request_lock, flags); + + if (rtn == SUCCESS) + SCpnt->eh_state = SUCCESS; + + /* + * If we had a successful bus reset, mark the command blocks to expect + * a condition code of unit attention. + */ + scsi_sleep(BUS_RESET_SETTLE_TIME); + if (SCpnt->eh_state == SUCCESS) { + Scsi_Device *SDloop; + for (SDloop = SCpnt->host->host_queue; SDloop; SDloop = SDloop->next) { + if (SCpnt->channel == SDloop->channel) { + SDloop->was_reset = 1; + SDloop->expecting_cc_ua = 1; + } + } + } + return SCpnt->eh_state; +} + +/* + * Function: scsi_try_host_reset + * + * Purpose: Ask host adapter to reset itself, and the bus. + * + * Returns: FAILED Operation failed or not supported. + * SUCCESS Succeeded. + * + * Notes: + */ +STATIC int scsi_try_host_reset(Scsi_Cmnd * SCpnt) +{ + unsigned long flags; + int rtn; + + SCpnt->eh_state = FAILED; /* Until we come up with something better */ + SCpnt->owner = SCSI_OWNER_LOWLEVEL; + SCpnt->serial_number_at_timeout = SCpnt->serial_number; + + if (SCpnt->host->hostt->eh_host_reset_handler == NULL) { + return FAILED; + } + spin_lock_irqsave(&io_request_lock, flags); + rtn = SCpnt->host->hostt->eh_host_reset_handler(SCpnt); + spin_unlock_irqrestore(&io_request_lock, flags); + + if (rtn == SUCCESS) + SCpnt->eh_state = SUCCESS; + + /* + * If we had a successful host reset, mark the command blocks to expect + * a condition code of unit attention. + */ + scsi_sleep(HOST_RESET_SETTLE_TIME); + if (SCpnt->eh_state == SUCCESS) { + Scsi_Device *SDloop; + for (SDloop = SCpnt->host->host_queue; SDloop; SDloop = SDloop->next) { + SDloop->was_reset = 1; + SDloop->expecting_cc_ua = 1; + } + } + return SCpnt->eh_state; +} + +/* + * Function: scsi_decide_disposition + * + * Purpose: Examine a command block that has come back from the low-level + * and figure out what to do next. + * + * Returns: SUCCESS - pass on to upper level. + * FAILED - pass on to error handler thread. + * RETRY - command should be retried. + * SOFTERR - command succeeded, but we need to log + * a soft error. + * + * Notes: This is *ONLY* called when we are examining the status + * after sending out the actual data command. Any commands + * that are queued for error recovery (i.e. TEST_UNIT_READY) + * do *NOT* come through here. + * + * NOTE - When this routine returns FAILED, it means the error + * handler thread is woken. In cases where the error code + * indicates an error that doesn't require the error handler + * thread (i.e. we don't need to abort/reset), then this function + * should return SUCCESS. + */ +int scsi_decide_disposition(Scsi_Cmnd * SCpnt) +{ + int rtn; + + /* + * If the device is offline, then we clearly just pass the result back + * up to the top level. + */ + if (SCpnt->device->online == FALSE) { + SCSI_LOG_ERROR_RECOVERY(5, printk("scsi_error.c: device offline - report as SUCCESS\n")); + return SUCCESS; + } + /* + * First check the host byte, to see if there is anything in there + * that would indicate what we need to do. + */ + + switch (host_byte(SCpnt->result)) { + case DID_PASSTHROUGH: + /* + * No matter what, pass this through to the upper layer. + * Nuke this special code so that it looks like we are saying + * DID_OK. + */ + SCpnt->result &= 0xff00ffff; + return SUCCESS; + case DID_OK: + /* + * Looks good. Drop through, and check the next byte. + */ + break; + case DID_NO_CONNECT: + case DID_BAD_TARGET: + case DID_ABORT: + /* + * Note - this means that we just report the status back to the + * top level driver, not that we actually think that it indicates + * success. + */ + return SUCCESS; + /* + * When the low level driver returns DID_SOFT_ERROR, + * it is responsible for keeping an internal retry counter + * in order to avoid endless loops (DB) + * + * Actually this is a bug in this function here. We should + * be mindful of the maximum number of retries specified + * and not get stuck in a loop. + */ + case DID_SOFT_ERROR: + goto maybe_retry; + + case DID_ERROR: + if (msg_byte(SCpnt->result) == COMMAND_COMPLETE && + status_byte(SCpnt->result) == RESERVATION_CONFLICT) + /* + * execute reservation conflict processing code + * lower down + */ + break; + /* FALLTHROUGH */ + + case DID_BUS_BUSY: + case DID_PARITY: + goto maybe_retry; + case DID_TIME_OUT: + /* + * When we scan the bus, we get timeout messages for + * these commands if there is no device available. + * Other hosts report DID_NO_CONNECT for the same thing. + */ + if ((SCpnt->cmnd[0] == TEST_UNIT_READY || + SCpnt->cmnd[0] == INQUIRY)) { + return SUCCESS; + } else { + return FAILED; + } + case DID_RESET: + /* + * In the normal case where we haven't initiated a reset, this is + * a failure. + */ + if (SCpnt->flags & IS_RESETTING) { + SCpnt->flags &= ~IS_RESETTING; + goto maybe_retry; + } + return SUCCESS; + default: + return FAILED; + } + + /* + * Next, check the message byte. + */ + if (msg_byte(SCpnt->result) != COMMAND_COMPLETE) { + return FAILED; + } + /* + * Now, check the status byte to see if this indicates anything special. + */ + switch (status_byte(SCpnt->result)) { + case QUEUE_FULL: + /* + * The case of trying to send too many commands to a tagged queueing + * device. + */ + return ADD_TO_MLQUEUE; + case GOOD: + case COMMAND_TERMINATED: + return SUCCESS; + case CHECK_CONDITION: + rtn = scsi_check_sense(SCpnt); + if (rtn == NEEDS_RETRY) { + goto maybe_retry; + } + return rtn; + case CONDITION_GOOD: + case INTERMEDIATE_GOOD: + case INTERMEDIATE_C_GOOD: + /* + * Who knows? FIXME(eric) + */ + return SUCCESS; + case BUSY: + goto maybe_retry; + + case RESERVATION_CONFLICT: + printk("scsi%d (%d,%d,%d) : RESERVATION CONFLICT\n", + SCpnt->host->host_no, SCpnt->channel, + SCpnt->device->id, SCpnt->device->lun); + return SUCCESS; /* causes immediate I/O error */ + default: + return FAILED; + } + return FAILED; + + maybe_retry: + + if ((++SCpnt->retries) < SCpnt->allowed) { + return NEEDS_RETRY; + } else { + /* + * No more retries - report this one back to upper level. + */ + return SUCCESS; + } +} + +/* + * Function: scsi_eh_completed_normally + * + * Purpose: Examine a command block that has come back from the low-level + * and figure out what to do next. + * + * Returns: SUCCESS - pass on to upper level. + * FAILED - pass on to error handler thread. + * RETRY - command should be retried. + * SOFTERR - command succeeded, but we need to log + * a soft error. + * + * Notes: This is *ONLY* called when we are examining the status + * of commands queued during error recovery. The main + * difference here is that we don't allow for the possibility + * of retries here, and we are a lot more restrictive about what + * we consider acceptable. + */ +STATIC int scsi_eh_completed_normally(Scsi_Cmnd * SCpnt) +{ + /* + * First check the host byte, to see if there is anything in there + * that would indicate what we need to do. + */ + if (host_byte(SCpnt->result) == DID_RESET) { + if (SCpnt->flags & IS_RESETTING) { + /* + * OK, this is normal. We don't know whether in fact the + * command in question really needs to be rerun or not - + * if this was the original data command then the answer is yes, + * otherwise we just flag it as success. + */ + SCpnt->flags &= ~IS_RESETTING; + return NEEDS_RETRY; + } + /* + * Rats. We are already in the error handler, so we now get to try + * and figure out what to do next. If the sense is valid, we have + * a pretty good idea of what to do. If not, we mark it as failed. + */ + return scsi_check_sense(SCpnt); + } + if (host_byte(SCpnt->result) != DID_OK) { + return FAILED; + } + /* + * Next, check the message byte. + */ + if (msg_byte(SCpnt->result) != COMMAND_COMPLETE) { + return FAILED; + } + /* + * Now, check the status byte to see if this indicates anything special. + */ + switch (status_byte(SCpnt->result)) { + case GOOD: + case COMMAND_TERMINATED: + return SUCCESS; + case CHECK_CONDITION: + return scsi_check_sense(SCpnt); + case CONDITION_GOOD: + case INTERMEDIATE_GOOD: + case INTERMEDIATE_C_GOOD: + /* + * Who knows? FIXME(eric) + */ + return SUCCESS; + case BUSY: + case QUEUE_FULL: + case RESERVATION_CONFLICT: + default: + return FAILED; + } + return FAILED; +} + +/* + * Function: scsi_check_sense + * + * Purpose: Examine sense information - give suggestion as to what + * we should do with it. + */ +STATIC int scsi_check_sense(Scsi_Cmnd * SCpnt) +{ + if (!scsi_sense_valid(SCpnt)) { + return FAILED; + } + if (SCpnt->sense_buffer[2] & 0xe0) + return SUCCESS; + + switch (SCpnt->sense_buffer[2] & 0xf) { + case NO_SENSE: + return SUCCESS; + case RECOVERED_ERROR: + return /* SOFT_ERROR */ SUCCESS; + + case ABORTED_COMMAND: + return NEEDS_RETRY; + case NOT_READY: + case UNIT_ATTENTION: + /* + * If we are expecting a CC/UA because of a bus reset that we + * performed, treat this just as a retry. Otherwise this is + * information that we should pass up to the upper-level driver + * so that we can deal with it there. + */ + if (SCpnt->device->expecting_cc_ua) { + SCpnt->device->expecting_cc_ua = 0; + return NEEDS_RETRY; + } + /* + * If the device is in the process of becoming ready, we + * should retry. + */ + if ((SCpnt->sense_buffer[12] == 0x04) && + (SCpnt->sense_buffer[13] == 0x01)) { + return NEEDS_RETRY; + } + return SUCCESS; + + /* these three are not supported */ + case COPY_ABORTED: + case VOLUME_OVERFLOW: + case MISCOMPARE: + return SUCCESS; + + case MEDIUM_ERROR: + return NEEDS_RETRY; + + case ILLEGAL_REQUEST: + case BLANK_CHECK: + case DATA_PROTECT: + case HARDWARE_ERROR: + default: + return SUCCESS; + } +} + + +/* + * Function: scsi_restart_operations + * + * Purpose: Restart IO operations to the specified host. + * + * Arguments: host - host that we are restarting + * + * Lock status: Assumed that locks are not held upon entry. + * + * Returns: Nothing + * + * Notes: When we entered the error handler, we blocked all further + * I/O to this device. We need to 'reverse' this process. + */ +STATIC void scsi_restart_operations(struct Scsi_Host *host) +{ + Scsi_Device *SDpnt; + unsigned long flags; + + ASSERT_LOCK(&io_request_lock, 0); + + /* + * Next free up anything directly waiting upon the host. This will be + * requests for character device operations, and also for ioctls to queued + * block devices. + */ + SCSI_LOG_ERROR_RECOVERY(5, printk("scsi_error.c: Waking up host to restart\n")); + + wake_up(&host->host_wait); + + /* + * Finally we need to re-initiate requests that may be pending. We will + * have had everything blocked while error handling is taking place, and + * now that error recovery is done, we will need to ensure that these + * requests are started. + */ + spin_lock_irqsave(&io_request_lock, flags); + for (SDpnt = host->host_queue; SDpnt; SDpnt = SDpnt->next) { + request_queue_t *q; + if ((host->can_queue > 0 && (host->host_busy >= host->can_queue)) + || (host->host_blocked) + || (host->host_self_blocked) + || (SDpnt->device_blocked)) { + break; + } + q = &SDpnt->request_queue; + q->request_fn(q); + } + spin_unlock_irqrestore(&io_request_lock, flags); +} + +/* + * Function: scsi_unjam_host + * + * Purpose: Attempt to fix a host which has a command that failed for + * some reason. + * + * Arguments: host - host that needs unjamming. + * + * Returns: Nothing + * + * Notes: When we come in here, we *know* that all commands on the + * bus have either completed, failed or timed out. We also + * know that no further commands are being sent to the host, + * so things are relatively quiet and we have freedom to + * fiddle with things as we wish. + * + * Additional note: This is only the *default* implementation. It is possible + * for individual drivers to supply their own version of this + * function, and if the maintainer wishes to do this, it is + * strongly suggested that this function be taken as a template + * and modified. This function was designed to correctly handle + * problems for about 95% of the different cases out there, and + * it should always provide at least a reasonable amount of error + * recovery. + * + * Note3: Any command marked 'FAILED' or 'TIMEOUT' must eventually + * have scsi_finish_command() called for it. We do all of + * the retry stuff here, so when we restart the host after we + * return it should have an empty queue. + */ +STATIC int scsi_unjam_host(struct Scsi_Host *host) +{ + int devices_failed; + int numfailed; + int ourrtn; + int rtn = FALSE; + int result; + Scsi_Cmnd *SCloop; + Scsi_Cmnd *SCpnt; + Scsi_Device *SDpnt; + Scsi_Device *SDloop; + Scsi_Cmnd *SCdone; + int timed_out; + + ASSERT_LOCK(&io_request_lock, 0); + + SCdone = NULL; + + /* + * First, protect against any sort of race condition. If any of the outstanding + * commands are in states that indicate that we are not yet blocked (i.e. we are + * not in a quiet state) then we got woken up in error. If we ever end up here, + * we need to re-examine some of the assumptions. + */ + for (SDpnt = host->host_queue; SDpnt; SDpnt = SDpnt->next) { + for (SCpnt = SDpnt->device_queue; SCpnt; SCpnt = SCpnt->next) { + if (SCpnt->state == SCSI_STATE_FAILED + || SCpnt->state == SCSI_STATE_TIMEOUT + || SCpnt->state == SCSI_STATE_INITIALIZING + || SCpnt->state == SCSI_STATE_UNUSED) { + continue; + } + /* + * Rats. Something is still floating around out there. This could + * be the result of the fact that the upper level drivers are still frobbing + * commands that might have succeeded. There are two outcomes. One is that + * the command block will eventually be freed, and the other one is that + * the command will be queued and will be finished along the way. + */ + SCSI_LOG_ERROR_RECOVERY(1, printk("Error handler prematurely woken - commands still active (%p %x %d)\n", SCpnt, SCpnt->state, SCpnt->target)); + +/* + * panic("SCSI Error handler woken too early\n"); + * + * This is no longer a problem, since now the code cares only about + * SCSI_STATE_TIMEOUT and SCSI_STATE_FAILED. + * Other states are useful only to release active commands when devices are + * set offline. If (host->host_active == host->host_busy) we can safely assume + * that there are no commands in state other then TIMEOUT od FAILED. (DB) + * + * FIXME: + * It is not easy to release correctly commands according to their state when + * devices are set offline, when the state is neither TIMEOUT nor FAILED. + * When a device is set offline, we can have some command with + * rq_status=RQ_SCSY_BUSY, owner=SCSI_STATE_HIGHLEVEL, + * state=SCSI_STATE_INITIALIZING and the driver module cannot be released. + * (DB, 17 May 1998) + */ + } + } + + /* + * Next, see if we need to request sense information. if so, + * then get it now, so we have a better idea of what to do. + * FIXME(eric) this has the unfortunate side effect that if a host + * adapter does not automatically request sense information, that we end + * up shutting it down before we request it. All hosts should be doing this + * anyways, so for now all I have to say is tough noogies if you end up in here. + * On second thought, this is probably a good idea. We *really* want to give + * authors an incentive to automatically request this. + */ + SCSI_LOG_ERROR_RECOVERY(3, printk("scsi_unjam_host: Checking to see if we need to request sense\n")); + + for (SDpnt = host->host_queue; SDpnt; SDpnt = SDpnt->next) { + for (SCpnt = SDpnt->device_queue; SCpnt; SCpnt = SCpnt->next) { + if (SCpnt->state != SCSI_STATE_FAILED || scsi_sense_valid(SCpnt)) { + continue; + } + SCSI_LOG_ERROR_RECOVERY(2, printk("scsi_unjam_host: Requesting sense for %d\n", + SCpnt->target)); + rtn = scsi_request_sense(SCpnt); + if (rtn != SUCCESS) { + continue; + } + SCSI_LOG_ERROR_RECOVERY(3, printk("Sense requested for %p - result %x\n", + SCpnt, SCpnt->result)); + SCSI_LOG_ERROR_RECOVERY(3, print_sense("bh", SCpnt)); + + result = scsi_decide_disposition(SCpnt); + + /* + * If the result was normal, then just pass it along to the + * upper level. + */ + if (result == SUCCESS) { + SCpnt->host->host_failed--; + scsi_eh_finish_command(&SCdone, SCpnt); + } + if (result != NEEDS_RETRY) { + continue; + } + /* + * We only come in here if we want to retry a + * command. The test to see whether the command + * should be retried should be keeping track of the + * number of tries, so we don't end up looping, of + * course. + */ + SCpnt->state = NEEDS_RETRY; + rtn = scsi_eh_retry_command(SCpnt); + if (rtn != SUCCESS) { + continue; + } + /* + * We eventually hand this one back to the top level. + */ + SCpnt->host->host_failed--; + scsi_eh_finish_command(&SCdone, SCpnt); + } + } + + /* + * Go through the list of commands and figure out where we stand and how bad things + * really are. + */ + numfailed = 0; + timed_out = 0; + devices_failed = 0; + for (SDpnt = host->host_queue; SDpnt; SDpnt = SDpnt->next) { + unsigned int device_error = 0; + + for (SCpnt = SDpnt->device_queue; SCpnt; SCpnt = SCpnt->next) { + if (SCpnt->state == SCSI_STATE_FAILED) { + SCSI_LOG_ERROR_RECOVERY(5, printk("Command to ID %d failed\n", + SCpnt->target)); + numfailed++; + device_error++; + } + if (SCpnt->state == SCSI_STATE_TIMEOUT) { + SCSI_LOG_ERROR_RECOVERY(5, printk("Command to ID %d timedout\n", + SCpnt->target)); + timed_out++; + device_error++; + } + } + if (device_error > 0) { + devices_failed++; + } + } + + SCSI_LOG_ERROR_RECOVERY(2, printk("Total of %d+%d commands on %d devices require eh work\n", + numfailed, timed_out, devices_failed)); + + if (host->host_failed == 0) { + ourrtn = TRUE; + goto leave; + } + /* + * Next, try and see whether or not it makes sense to try and abort + * the running command. This only works out to be the case if we have + * one command that has timed out. If the command simply failed, it + * makes no sense to try and abort the command, since as far as the + * host adapter is concerned, it isn't running. + */ + + SCSI_LOG_ERROR_RECOVERY(3, printk("scsi_unjam_host: Checking to see if we want to try abort\n")); + + for (SDpnt = host->host_queue; SDpnt; SDpnt = SDpnt->next) { + for (SCloop = SDpnt->device_queue; SCloop; SCloop = SCloop->next) { + if (SCloop->state != SCSI_STATE_TIMEOUT) { + continue; + } + rtn = scsi_try_to_abort_command(SCloop, ABORT_TIMEOUT); + if (rtn == SUCCESS) { + rtn = scsi_test_unit_ready(SCloop); + + if (rtn == SUCCESS && scsi_unit_is_ready(SCloop)) { + rtn = scsi_eh_retry_command(SCloop); + + if (rtn == SUCCESS) { + SCloop->host->host_failed--; + scsi_eh_finish_command(&SCdone, SCloop); + } + } + } + } + } + + /* + * If we have corrected all of the problems, then we are done. + */ + if (host->host_failed == 0) { + ourrtn = TRUE; + goto leave; + } + /* + * Either the abort wasn't appropriate, or it didn't succeed. + * Now try a bus device reset. Still, look to see whether we have + * multiple devices that are jammed or not - if we have multiple devices, + * it makes no sense to try BUS_DEVICE_RESET - we really would need + * to try a BUS_RESET instead. + * + * Does this make sense - should we try BDR on each device individually? + * Yes, definitely. + */ + SCSI_LOG_ERROR_RECOVERY(3, printk("scsi_unjam_host: Checking to see if we want to try BDR\n")); + + for (SDpnt = host->host_queue; SDpnt; SDpnt = SDpnt->next) { + for (SCloop = SDpnt->device_queue; SCloop; SCloop = SCloop->next) { + if (SCloop->state == SCSI_STATE_FAILED + || SCloop->state == SCSI_STATE_TIMEOUT) { + break; + } + } + + if (SCloop == NULL) { + continue; + } + /* + * OK, we have a device that is having problems. Try and send + * a bus device reset to it. + * + * FIXME(eric) - make sure we handle the case where multiple + * commands to the same device have failed. They all must + * get properly restarted. + */ + rtn = scsi_try_bus_device_reset(SCloop, RESET_TIMEOUT); + + if (rtn == SUCCESS) { + rtn = scsi_test_unit_ready(SCloop); + + if (rtn == SUCCESS && scsi_unit_is_ready(SCloop)) { + rtn = scsi_eh_retry_command(SCloop); + + if (rtn == SUCCESS) { + SCloop->host->host_failed--; + scsi_eh_finish_command(&SCdone, SCloop); + } + } + } + } + + if (host->host_failed == 0) { + ourrtn = TRUE; + goto leave; + } + /* + * If we ended up here, we have serious problems. The only thing left + * to try is a full bus reset. If someone has grabbed the bus and isn't + * letting go, then perhaps this will help. + */ + SCSI_LOG_ERROR_RECOVERY(3, printk("scsi_unjam_host: Try hard bus reset\n")); + + /* + * We really want to loop over the various channels, and do this on + * a channel by channel basis. We should also check to see if any + * of the failed commands are on soft_reset devices, and if so, skip + * the reset. + */ + for (SDpnt = host->host_queue; SDpnt; SDpnt = SDpnt->next) { + next_device: + for (SCpnt = SDpnt->device_queue; SCpnt; SCpnt = SCpnt->next) { + if (SCpnt->state != SCSI_STATE_FAILED + && SCpnt->state != SCSI_STATE_TIMEOUT) { + continue; + } + /* + * We have a failed command. Make sure there are no other failed + * commands on the same channel that are timed out and implement a + * soft reset. + */ + for (SDloop = host->host_queue; SDloop; SDloop = SDloop->next) { + for (SCloop = SDloop->device_queue; SCloop; SCloop = SCloop->next) { + if (SCloop->channel != SCpnt->channel) { + continue; + } + if (SCloop->state != SCSI_STATE_FAILED + && SCloop->state != SCSI_STATE_TIMEOUT) { + continue; + } + if (SDloop->soft_reset && SCloop->state == SCSI_STATE_TIMEOUT) { + /* + * If this device uses the soft reset option, and this + * is one of the devices acting up, then our only + * option is to wait a bit, since the command is + * supposedly still running. + * + * FIXME(eric) - right now we will just end up falling + * through to the 'take device offline' case. + * + * FIXME(eric) - It is possible that the command completed + * *after* the error recovery procedure started, and if this + * is the case, we are worrying about nothing here. + */ + + scsi_sleep(1 * HZ); + goto next_device; + } + } + } + + /* + * We now know that we are able to perform a reset for the + * bus that SCpnt points to. There are no soft-reset devices + * with outstanding timed out commands. + */ + rtn = scsi_try_bus_reset(SCpnt); + if (rtn == SUCCESS) { + for (SDloop = host->host_queue; SDloop; SDloop = SDloop->next) { + for (SCloop = SDloop->device_queue; SCloop; SCloop = SCloop->next) { + if (SCloop->channel != SCpnt->channel) { + continue; + } + if (SCloop->state != SCSI_STATE_FAILED + && SCloop->state != SCSI_STATE_TIMEOUT) { + continue; + } + rtn = scsi_test_unit_ready(SCloop); + + if (rtn == SUCCESS && scsi_unit_is_ready(SCloop)) { + rtn = scsi_eh_retry_command(SCloop); + + if (rtn == SUCCESS) { + SCpnt->host->host_failed--; + scsi_eh_finish_command(&SCdone, SCloop); + } + } + /* + * If the bus reset worked, but we are still unable to + * talk to the device, take it offline. + * FIXME(eric) - is this really the correct thing to do? + */ + if (rtn != SUCCESS) { + printk(KERN_INFO "scsi: device set offline - not ready or command retry failed after bus reset: host %d channel %d id %d lun %d\n", SDloop->host->host_no, SDloop->channel, SDloop->id, SDloop->lun); + + SDloop->online = FALSE; + SDloop->host->host_failed--; + scsi_eh_finish_command(&SCdone, SCloop); + } + } + } + } + } + } + + if (host->host_failed == 0) { + ourrtn = TRUE; + goto leave; + } + /* + * If we ended up here, we have serious problems. The only thing left + * to try is a full host reset - perhaps the firmware on the device + * crashed, or something like that. + * + * It is assumed that a succesful host reset will cause *all* information + * about the command to be flushed from both the host adapter *and* the + * device. + * + * FIXME(eric) - it isn't clear that devices that implement the soft reset + * option can ever be cleared except via cycling the power. The problem is + * that sending the host reset command will cause the host to forget + * about the pending command, but the device won't forget. For now, we + * skip the host reset option if any of the failed devices are configured + * to use the soft reset option. + */ + for (SDpnt = host->host_queue; SDpnt; SDpnt = SDpnt->next) { + next_device2: + for (SCpnt = SDpnt->device_queue; SCpnt; SCpnt = SCpnt->next) { + if (SCpnt->state != SCSI_STATE_FAILED + && SCpnt->state != SCSI_STATE_TIMEOUT) { + continue; + } + if (SDpnt->soft_reset && SCpnt->state == SCSI_STATE_TIMEOUT) { + /* + * If this device uses the soft reset option, and this + * is one of the devices acting up, then our only + * option is to wait a bit, since the command is + * supposedly still running. + * + * FIXME(eric) - right now we will just end up falling + * through to the 'take device offline' case. + */ + SCSI_LOG_ERROR_RECOVERY(3, + printk("scsi_unjam_host: Unable to try hard host reset\n")); + + /* + * Due to the spinlock, we will never get out of this + * loop without a proper wait. (DB) + */ + scsi_sleep(1 * HZ); + + goto next_device2; + } + SCSI_LOG_ERROR_RECOVERY(3, printk("scsi_unjam_host: Try hard host reset\n")); + + /* + * FIXME(eric) - we need to obtain a valid SCpnt to perform this call. + */ + rtn = scsi_try_host_reset(SCpnt); + if (rtn == SUCCESS) { + /* + * FIXME(eric) we assume that all commands are flushed from the + * controller. We should get a DID_RESET for all of the commands + * that were pending. We should ignore these so that we can + * guarantee that we are in a consistent state. + * + * I believe this to be the case right now, but this needs to be + * tested. + */ + for (SDloop = host->host_queue; SDloop; SDloop = SDloop->next) { + for (SCloop = SDloop->device_queue; SCloop; SCloop = SCloop->next) { + if (SCloop->state != SCSI_STATE_FAILED + && SCloop->state != SCSI_STATE_TIMEOUT) { + continue; + } + rtn = scsi_test_unit_ready(SCloop); + + if (rtn == SUCCESS && scsi_unit_is_ready(SCloop)) { + rtn = scsi_eh_retry_command(SCloop); + + if (rtn == SUCCESS) { + SCpnt->host->host_failed--; + scsi_eh_finish_command(&SCdone, SCloop); + } + } + if (rtn != SUCCESS) { + printk(KERN_INFO "scsi: device set offline - not ready or command retry failed after host reset: host %d channel %d id %d lun %d\n", SDloop->host->host_no, SDloop->channel, SDloop->id, SDloop->lun); + SDloop->online = FALSE; + SDloop->host->host_failed--; + scsi_eh_finish_command(&SCdone, SCloop); + } + } + } + } + } + } + + /* + * If we solved all of the problems, then let's rev up the engines again. + */ + if (host->host_failed == 0) { + ourrtn = TRUE; + goto leave; + } + /* + * If the HOST RESET failed, then for now we assume that the entire host + * adapter is too hosed to be of any use. For our purposes, however, it is + * easier to simply take the devices offline that correspond to commands + * that failed. + */ + SCSI_LOG_ERROR_RECOVERY(1, printk("scsi_unjam_host: Take device offline\n")); + + for (SDpnt = host->host_queue; SDpnt; SDpnt = SDpnt->next) { + for (SCloop = SDpnt->device_queue; SCloop; SCloop = SCloop->next) { + if (SCloop->state == SCSI_STATE_FAILED || SCloop->state == SCSI_STATE_TIMEOUT) { + SDloop = SCloop->device; + if (SDloop->online == TRUE) { + printk(KERN_INFO "scsi: device set offline - command error recover failed: host %d channel %d id %d lun %d\n", SDloop->host->host_no, SDloop->channel, SDloop->id, SDloop->lun); + SDloop->online = FALSE; + } + + /* + * This should pass the failure up to the top level driver, and + * it will have to try and do something intelligent with it. + */ + SCloop->host->host_failed--; + + if (SCloop->state == SCSI_STATE_TIMEOUT) { + SCloop->result |= (DRIVER_TIMEOUT << 24); + } + SCSI_LOG_ERROR_RECOVERY(3, printk("Finishing command for device %d %x\n", + SDloop->id, SCloop->result)); + + scsi_eh_finish_command(&SCdone, SCloop); + } + } + } + + if (host->host_failed != 0) { + panic("scsi_unjam_host: Miscount of number of failed commands.\n"); + } + SCSI_LOG_ERROR_RECOVERY(3, printk("scsi_unjam_host: Returning\n")); + + ourrtn = FALSE; + + leave: + + /* + * We should have a list of commands that we 'finished' during the course of + * error recovery. This should be the same as the list of commands that timed out + * or failed. We are currently holding these things in a linked list - we didn't + * put them in the bottom half queue because we wanted to keep things quiet while + * we were working on recovery, and passing them up to the top level could easily + * cause the top level to try and queue something else again. + * + * Start by marking that the host is no longer in error recovery. + */ + host->in_recovery = 0; + + /* + * Take the list of commands, and stick them in the bottom half queue. + * The current implementation of scsi_done will do this for us - if need + * be we can create a special version of this function to do the + * same job for us. + */ + for (SCpnt = SCdone; SCpnt != NULL; SCpnt = SCdone) { + SCdone = SCpnt->bh_next; + SCpnt->bh_next = NULL; + /* + * Oh, this is a vile hack. scsi_done() expects a timer + * to be running on the command. If there isn't, it assumes + * that the command has actually timed out, and a timer + * handler is running. That may well be how we got into + * this fix, but right now things are stable. We add + * a timer back again so that we can report completion. + * scsi_done() will immediately remove said timer from + * the command, and then process it. + */ + scsi_add_timer(SCpnt, 100, scsi_eh_times_out); + scsi_done(SCpnt); + } + + return (ourrtn); +} + + +/* + * Function: scsi_error_handler + * + * Purpose: Handle errors/timeouts of scsi commands, try and clean up + * and unjam the bus, and restart things. + * + * Arguments: host - host for which we are running. + * + * Returns: Never returns. + * + * Notes: This is always run in the context of a kernel thread. The + * idea is that we start this thing up when the kernel starts + * up (one per host that we detect), and it immediately goes to + * sleep and waits for some event (i.e. failure). When this + * takes place, we have the job of trying to unjam the bus + * and restarting things. + * + */ +void scsi_error_handler(void *data) +{ + struct Scsi_Host *host = (struct Scsi_Host *) data; + int rtn; + DECLARE_MUTEX_LOCKED(sem); + + /* + * We only listen to signals if the HA was loaded as a module. + * If the HA was compiled into the kernel, then we don't listen + * to any signals. + */ + if( host->loaded_as_module ) { + siginitsetinv(¤t->blocked, SHUTDOWN_SIGS); + } else { + siginitsetinv(¤t->blocked, 0); + } + + lock_kernel(); + + /* + * Flush resources + */ + + daemonize(); + reparent_to_init(); + + /* + * Set the name of this process. + */ + + sprintf(current->comm, "scsi_eh_%d", host->host_no); + + host->eh_wait = &sem; + host->ehandler = current; + + unlock_kernel(); + + /* + * Wake up the thread that created us. + */ + SCSI_LOG_ERROR_RECOVERY(3, printk("Wake up parent %d\n", host->eh_notify->count.counter)); + + up(host->eh_notify); + + while (1) { + /* + * If we get a signal, it means we are supposed to go + * away and die. This typically happens if the user is + * trying to unload a module. + */ + SCSI_LOG_ERROR_RECOVERY(1, printk("Error handler sleeping\n")); + + /* + * Note - we always use down_interruptible with the semaphore + * even if the module was loaded as part of the kernel. The + * reason is that down() will cause this thread to be counted + * in the load average as a running process, and down + * interruptible doesn't. Given that we need to allow this + * thread to die if the driver was loaded as a module, using + * semaphores isn't unreasonable. + */ + down_interruptible(&sem); + if( host->loaded_as_module ) { + if (signal_pending(current)) + break; + } + + SCSI_LOG_ERROR_RECOVERY(1, printk("Error handler waking up\n")); + + host->eh_active = 1; + + /* + * We have a host that is failing for some reason. Figure out + * what we need to do to get it up and online again (if we can). + * If we fail, we end up taking the thing offline. + */ + if (host->hostt->eh_strategy_handler != NULL) { + rtn = host->hostt->eh_strategy_handler(host); + } else { + rtn = scsi_unjam_host(host); + } + + host->eh_active = 0; + + /* + * Note - if the above fails completely, the action is to take + * individual devices offline and flush the queue of any + * outstanding requests that may have been pending. When we + * restart, we restart any I/O to any other devices on the bus + * which are still online. + */ + scsi_restart_operations(host); + + } + + SCSI_LOG_ERROR_RECOVERY(1, printk("Error handler exiting\n")); + + /* + * Make sure that nobody tries to wake us up again. + */ + host->eh_wait = NULL; + + /* + * Knock this down too. From this point on, the host is flying + * without a pilot. If this is because the module is being unloaded, + * that's fine. If the user sent a signal to this thing, we are + * potentially in real danger. + */ + host->in_recovery = 0; + host->eh_active = 0; + host->ehandler = NULL; + + /* + * If anyone is waiting for us to exit (i.e. someone trying to unload + * a driver), then wake up that process to let them know we are on + * the way out the door. This may be overkill - I *think* that we + * could probably just unload the driver and send the signal, and when + * the error handling thread wakes up that it would just exit without + * needing to touch any memory associated with the driver itself. + */ + if (host->eh_notify != NULL) + up(host->eh_notify); +} + +/* + * Function: scsi_new_reset + * + * Purpose: Send requested reset to a bus or device at any phase. + * + * Arguments: SCpnt - command ptr to send reset with (usually a dummy) + * flag - reset type (see scsi.h) + * + * Returns: SUCCESS/FAILURE. + * + * Notes: This is used by the SCSI Generic driver to provide + * Bus/Device reset capability. + */ +int +scsi_new_reset(Scsi_Cmnd *SCpnt, int flag) +{ + int rtn; + + switch(flag) { + case SCSI_TRY_RESET_DEVICE: + rtn = scsi_try_bus_device_reset(SCpnt, 0); + if (rtn == SUCCESS) + break; + /* FALLTHROUGH */ + case SCSI_TRY_RESET_BUS: + rtn = scsi_try_bus_reset(SCpnt); + if (rtn == SUCCESS) + break; + /* FALLTHROUGH */ + case SCSI_TRY_RESET_HOST: + rtn = scsi_try_host_reset(SCpnt); + break; + default: + rtn = FAILED; + } + + return rtn; +} + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-indent-level: 4 + * c-brace-imaginary-offset: 0 + * c-brace-offset: -4 + * c-argdecl-indent: 4 + * c-label-offset: -4 + * c-continued-statement-offset: 4 + * c-continued-brace-offset: 0 + * indent-tabs-mode: nil + * tab-width: 8 + * End: + */ diff --git a/xen-2.4.16/drivers/scsi/scsi_ioctl.c b/xen-2.4.16/drivers/scsi/scsi_ioctl.c new file mode 100644 index 0000000000..6f891de66b --- /dev/null +++ b/xen-2.4.16/drivers/scsi/scsi_ioctl.c @@ -0,0 +1,538 @@ +/* + * Changes: + * Arnaldo Carvalho de Melo 08/23/2000 + * - get rid of some verify_areas and use __copy*user and __get/put_user + * for the ones that remain + */ +#define __NO_VERSION__ +#include + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include +#include "scsi.h" +#include "hosts.h" +#include + +#define NORMAL_RETRIES 5 +#define IOCTL_NORMAL_TIMEOUT (10 * HZ) +#define FORMAT_UNIT_TIMEOUT (2 * 60 * 60 * HZ) +#define START_STOP_TIMEOUT (60 * HZ) +#define MOVE_MEDIUM_TIMEOUT (5 * 60 * HZ) +#define READ_ELEMENT_STATUS_TIMEOUT (5 * 60 * HZ) +#define READ_DEFECT_DATA_TIMEOUT (60 * HZ ) /* ZIP-250 on parallel port takes as long! */ + +#define MAX_BUF PAGE_SIZE + +/* + * If we are told to probe a host, we will return 0 if the host is not + * present, 1 if the host is present, and will return an identifying + * string at *arg, if arg is non null, filling to the length stored at + * (int *) arg + */ + +static int ioctl_probe(struct Scsi_Host *host, void *buffer) +{ + unsigned int len, slen; + const char *string; + int temp = host->hostt->present; + + if (temp && buffer) { + if (get_user(len, (unsigned int *) buffer)) + return -EFAULT; + + if (host->hostt->info) + string = host->hostt->info(host); + else + string = host->hostt->name; + if (string) { + slen = strlen(string); + if (len > slen) + len = slen + 1; + if (copy_to_user(buffer, string, len)) + return -EFAULT; + } + } + return temp; +} + +/* + + * The SCSI_IOCTL_SEND_COMMAND ioctl sends a command out to the SCSI host. + * The IOCTL_NORMAL_TIMEOUT and NORMAL_RETRIES variables are used. + * + * dev is the SCSI device struct ptr, *(int *) arg is the length of the + * input data, if any, not including the command string & counts, + * *((int *)arg + 1) is the output buffer size in bytes. + * + * *(char *) ((int *) arg)[2] the actual command byte. + * + * Note that if more than MAX_BUF bytes are requested to be transferred, + * the ioctl will fail with error EINVAL. MAX_BUF can be increased in + * the future by increasing the size that scsi_malloc will accept. + * + * This size *does not* include the initial lengths that were passed. + * + * The SCSI command is read from the memory location immediately after the + * length words, and the input data is right after the command. The SCSI + * routines know the command size based on the opcode decode. + * + * The output area is then filled in starting from the command byte. + */ + +static int ioctl_internal_command(Scsi_Device * dev, char *cmd, + int timeout, int retries) +{ + int result; + Scsi_Request *SRpnt; + Scsi_Device *SDpnt; + + + SCSI_LOG_IOCTL(1, printk("Trying ioctl with scsi command %d\n", cmd[0])); + if (NULL == (SRpnt = scsi_allocate_request(dev))) { + printk("SCSI internal ioctl failed, no memory\n"); + return -ENOMEM; + } + + SRpnt->sr_data_direction = SCSI_DATA_NONE; + scsi_wait_req(SRpnt, cmd, NULL, 0, timeout, retries); + + SCSI_LOG_IOCTL(2, printk("Ioctl returned 0x%x\n", SRpnt->sr_result)); + + if (driver_byte(SRpnt->sr_result) != 0) + switch (SRpnt->sr_sense_buffer[2] & 0xf) { + case ILLEGAL_REQUEST: + if (cmd[0] == ALLOW_MEDIUM_REMOVAL) + dev->lockable = 0; + else + printk("SCSI device (ioctl) reports ILLEGAL REQUEST.\n"); + break; + case NOT_READY: /* This happens if there is no disc in drive */ + if (dev->removable && (cmd[0] != TEST_UNIT_READY)) { + printk(KERN_INFO "Device not ready. Make sure there is a disc in the drive.\n"); + break; + } + case UNIT_ATTENTION: + if (dev->removable) { + dev->changed = 1; + SRpnt->sr_result = 0; /* This is no longer considered an error */ + /* gag this error, VFS will log it anyway /axboe */ + /* printk(KERN_INFO "Disc change detected.\n"); */ + break; + }; + default: /* Fall through for non-removable media */ + printk("SCSI error: host %d id %d lun %d return code = %x\n", + dev->host->host_no, + dev->id, + dev->lun, + SRpnt->sr_result); + printk("\tSense class %x, sense error %x, extended sense %x\n", + sense_class(SRpnt->sr_sense_buffer[0]), + sense_error(SRpnt->sr_sense_buffer[0]), + SRpnt->sr_sense_buffer[2] & 0xf); + + }; + + result = SRpnt->sr_result; + + SCSI_LOG_IOCTL(2, printk("IOCTL Releasing command\n")); + SDpnt = SRpnt->sr_device; + scsi_release_request(SRpnt); + SRpnt = NULL; + + return result; +} + +/* + * This interface is depreciated - users should use the scsi generic (sg) + * interface instead, as this is a more flexible approach to performing + * generic SCSI commands on a device. + * + * The structure that we are passed should look like: + * + * struct sdata { + * unsigned int inlen; [i] Length of data to be written to device + * unsigned int outlen; [i] Length of data to be read from device + * unsigned char cmd[x]; [i] SCSI command (6 <= x <= 12). + * [o] Data read from device starts here. + * [o] On error, sense buffer starts here. + * unsigned char wdata[y]; [i] Data written to device starts here. + * }; + * Notes: + * - The SCSI command length is determined by examining the 1st byte + * of the given command. There is no way to override this. + * - Data transfers are limited to PAGE_SIZE (4K on i386, 8K on alpha). + * - The length (x + y) must be at least OMAX_SB_LEN bytes long to + * accomodate the sense buffer when an error occurs. + * The sense buffer is truncated to OMAX_SB_LEN (16) bytes so that + * old code will not be surprised. + * - If a Unix error occurs (e.g. ENOMEM) then the user will receive + * a negative return and the Unix error code in 'errno'. + * If the SCSI command succeeds then 0 is returned. + * Positive numbers returned are the compacted SCSI error codes (4 + * bytes in one int) where the lowest byte is the SCSI status. + * See the drivers/scsi/scsi.h file for more information on this. + * + */ +#define OMAX_SB_LEN 16 /* Old sense buffer length */ + +int scsi_ioctl_send_command(Scsi_Device * dev, Scsi_Ioctl_Command * sic) +{ + char *buf; + unsigned char cmd[MAX_COMMAND_SIZE]; + char *cmd_in; + Scsi_Request *SRpnt; + Scsi_Device *SDpnt; + unsigned char opcode; + unsigned int inlen, outlen, cmdlen; + unsigned int needed, buf_needed; + int timeout, retries, result; + int data_direction; + + if (!sic) + return -EINVAL; + /* + * Verify that we can read at least this much. + */ + if (verify_area(VERIFY_READ, sic, sizeof(Scsi_Ioctl_Command))) + return -EFAULT; + + if(__get_user(inlen, &sic->inlen)) + return -EFAULT; + + if(__get_user(outlen, &sic->outlen)) + return -EFAULT; + + /* + * We do not transfer more than MAX_BUF with this interface. + * If the user needs to transfer more data than this, they + * should use scsi_generics (sg) instead. + */ + if (inlen > MAX_BUF) + return -EINVAL; + if (outlen > MAX_BUF) + return -EINVAL; + + cmd_in = sic->data; + if(get_user(opcode, cmd_in)) + return -EFAULT; + + needed = buf_needed = (inlen > outlen ? inlen : outlen); + if (buf_needed) { + buf_needed = (buf_needed + 511) & ~511; + if (buf_needed > MAX_BUF) + buf_needed = MAX_BUF; + buf = (char *) scsi_malloc(buf_needed); + if (!buf) + return -ENOMEM; + memset(buf, 0, buf_needed); + if( inlen == 0 ) { + data_direction = SCSI_DATA_READ; + } else if (outlen == 0 ) { + data_direction = SCSI_DATA_WRITE; + } else { + /* + * Can this ever happen? + */ + data_direction = SCSI_DATA_UNKNOWN; + } + + } else { + buf = NULL; + data_direction = SCSI_DATA_NONE; + } + + /* + * Obtain the command from the user's address space. + */ + cmdlen = COMMAND_SIZE(opcode); + + result = -EFAULT; + + if (verify_area(VERIFY_READ, cmd_in, cmdlen + inlen)) + goto error; + + if(__copy_from_user(cmd, cmd_in, cmdlen)) + goto error; + + /* + * Obtain the data to be sent to the device (if any). + */ + + if(copy_from_user(buf, cmd_in + cmdlen, inlen)) + goto error; + + /* + * Set the lun field to the correct value. + */ + if (dev->scsi_level <= SCSI_2) + cmd[1] = (cmd[1] & 0x1f) | (dev->lun << 5); + + switch (opcode) { + case FORMAT_UNIT: + timeout = FORMAT_UNIT_TIMEOUT; + retries = 1; + break; + case START_STOP: + timeout = START_STOP_TIMEOUT; + retries = NORMAL_RETRIES; + break; + case MOVE_MEDIUM: + timeout = MOVE_MEDIUM_TIMEOUT; + retries = NORMAL_RETRIES; + break; + case READ_ELEMENT_STATUS: + timeout = READ_ELEMENT_STATUS_TIMEOUT; + retries = NORMAL_RETRIES; + break; + case READ_DEFECT_DATA: + timeout = READ_DEFECT_DATA_TIMEOUT; + retries = 1; + break; + default: + timeout = IOCTL_NORMAL_TIMEOUT; + retries = NORMAL_RETRIES; + break; + } + +#ifndef DEBUG_NO_CMD + + + SRpnt = scsi_allocate_request(dev); + if( SRpnt == NULL ) + { + result = -EINTR; + goto error; + } + + SRpnt->sr_data_direction = data_direction; + scsi_wait_req(SRpnt, cmd, buf, needed, timeout, retries); + + /* + * If there was an error condition, pass the info back to the user. + */ + + result = SRpnt->sr_result; + + if (SRpnt->sr_result) { + int sb_len = sizeof(SRpnt->sr_sense_buffer); + + sb_len = (sb_len > OMAX_SB_LEN) ? OMAX_SB_LEN : sb_len; + if (copy_to_user(cmd_in, SRpnt->sr_sense_buffer, sb_len)) + result = -EFAULT; + } else { + if (copy_to_user(cmd_in, buf, outlen)) + result = -EFAULT; + } + + SDpnt = SRpnt->sr_device; + scsi_release_request(SRpnt); + SRpnt = NULL; + +error: + if (buf) + scsi_free(buf, buf_needed); + + + return result; +#else + { + int i; + printk("scsi_ioctl : device %d. command = ", dev->id); + for (i = 0; i < cmdlen; ++i) + printk("%02x ", cmd[i]); + printk("\nbuffer ="); + for (i = 0; i < 20; ++i) + printk("%02x ", buf[i]); + printk("\n"); + printk("inlen = %d, outlen = %d, cmdlen = %d\n", + inlen, outlen, cmdlen); + printk("buffer = %d, cmd_in = %d\n", buffer, cmd_in); + } + return 0; +#endif +} + +/* + * The scsi_ioctl_get_pci() function places into arg the value + * pci_dev::slot_name (8 characters) for the PCI device (if any). + * Returns: 0 on success + * -ENXIO if there isn't a PCI device pointer + * (could be because the SCSI driver hasn't been + * updated yet, or because it isn't a SCSI + * device) + * any copy_to_user() error on failure there + */ +static int +scsi_ioctl_get_pci(Scsi_Device * dev, void *arg) +{ + + if (!dev->host->pci_dev) + return -ENXIO; + if(copy_to_user(arg, dev->host->pci_dev->slot_name, + sizeof(dev->host->pci_dev->slot_name))) + return -EFAULT; + return 0; +} + + +/* + * the scsi_ioctl() function differs from most ioctls in that it does + * not take a major/minor number as the dev field. Rather, it takes + * a pointer to a scsi_devices[] element, a structure. + */ +int scsi_ioctl(Scsi_Device * dev, int cmd, void *arg) +{ + char scsi_cmd[MAX_COMMAND_SIZE]; + char cmd_byte1; + + /* No idea how this happens.... */ + if (!dev) + return -ENXIO; + + /* + * If we are in the middle of error recovery, don't let anyone + * else try and use this device. Also, if error recovery fails, it + * may try and take the device offline, in which case all further + * access to the device is prohibited. + */ + if (!scsi_block_when_processing_errors(dev)) { + return -ENODEV; + } + cmd_byte1 = (dev->scsi_level <= SCSI_2) ? (dev->lun << 5) : 0; + + switch (cmd) { + case SCSI_IOCTL_GET_IDLUN: + if (verify_area(VERIFY_WRITE, arg, sizeof(Scsi_Idlun))) + return -EFAULT; + + __put_user((dev->id & 0xff) + + ((dev->lun & 0xff) << 8) + + ((dev->channel & 0xff) << 16) + + ((dev->host->host_no & 0xff) << 24), + &((Scsi_Idlun *) arg)->dev_id); + __put_user(dev->host->unique_id, &((Scsi_Idlun *) arg)->host_unique_id); + return 0; + case SCSI_IOCTL_GET_BUS_NUMBER: + return put_user(dev->host->host_no, (int *) arg); + case SCSI_IOCTL_TAGGED_ENABLE: + if (!capable(CAP_SYS_ADMIN)) + return -EACCES; + if (!dev->tagged_supported) + return -EINVAL; + dev->tagged_queue = 1; + dev->current_tag = 1; + return 0; + case SCSI_IOCTL_TAGGED_DISABLE: + if (!capable(CAP_SYS_ADMIN)) + return -EACCES; + if (!dev->tagged_supported) + return -EINVAL; + dev->tagged_queue = 0; + dev->current_tag = 0; + return 0; + case SCSI_IOCTL_PROBE_HOST: + return ioctl_probe(dev->host, arg); + case SCSI_IOCTL_SEND_COMMAND: + if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RAWIO)) + return -EACCES; + return scsi_ioctl_send_command((Scsi_Device *) dev, + (Scsi_Ioctl_Command *) arg); + case SCSI_IOCTL_DOORLOCK: + if (!dev->removable || !dev->lockable) + return 0; + scsi_cmd[0] = ALLOW_MEDIUM_REMOVAL; + scsi_cmd[1] = cmd_byte1; + scsi_cmd[2] = scsi_cmd[3] = scsi_cmd[5] = 0; + scsi_cmd[4] = SCSI_REMOVAL_PREVENT; + return ioctl_internal_command((Scsi_Device *) dev, scsi_cmd, + IOCTL_NORMAL_TIMEOUT, NORMAL_RETRIES); + break; + case SCSI_IOCTL_DOORUNLOCK: + if (!dev->removable || !dev->lockable) + return 0; + scsi_cmd[0] = ALLOW_MEDIUM_REMOVAL; + scsi_cmd[1] = cmd_byte1; + scsi_cmd[2] = scsi_cmd[3] = scsi_cmd[5] = 0; + scsi_cmd[4] = SCSI_REMOVAL_ALLOW; + return ioctl_internal_command((Scsi_Device *) dev, scsi_cmd, + IOCTL_NORMAL_TIMEOUT, NORMAL_RETRIES); + case SCSI_IOCTL_TEST_UNIT_READY: + scsi_cmd[0] = TEST_UNIT_READY; + scsi_cmd[1] = cmd_byte1; + scsi_cmd[2] = scsi_cmd[3] = scsi_cmd[5] = 0; + scsi_cmd[4] = 0; + return ioctl_internal_command((Scsi_Device *) dev, scsi_cmd, + IOCTL_NORMAL_TIMEOUT, NORMAL_RETRIES); + break; + case SCSI_IOCTL_START_UNIT: + scsi_cmd[0] = START_STOP; + scsi_cmd[1] = cmd_byte1; + scsi_cmd[2] = scsi_cmd[3] = scsi_cmd[5] = 0; + scsi_cmd[4] = 1; + return ioctl_internal_command((Scsi_Device *) dev, scsi_cmd, + START_STOP_TIMEOUT, NORMAL_RETRIES); + break; + case SCSI_IOCTL_STOP_UNIT: + scsi_cmd[0] = START_STOP; + scsi_cmd[1] = cmd_byte1; + scsi_cmd[2] = scsi_cmd[3] = scsi_cmd[5] = 0; + scsi_cmd[4] = 0; + return ioctl_internal_command((Scsi_Device *) dev, scsi_cmd, + START_STOP_TIMEOUT, NORMAL_RETRIES); + break; + case SCSI_IOCTL_GET_PCI: + return scsi_ioctl_get_pci(dev, arg); + break; + default: + if (dev->host->hostt->ioctl) + return dev->host->hostt->ioctl(dev, cmd, arg); + return -EINVAL; + } + return -EINVAL; +} + +/* + * Just like scsi_ioctl, only callable from kernel space with no + * fs segment fiddling. + */ + +int kernel_scsi_ioctl(Scsi_Device * dev, int cmd, void *arg) +{ + mm_segment_t oldfs; + int tmp; + oldfs = get_fs(); + set_fs(get_ds()); + tmp = scsi_ioctl(dev, cmd, arg); + set_fs(oldfs); + return tmp; +} + +/* + * Overrides for Emacs so that we almost follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-indent-level: 4 + * c-brace-imaginary-offset: 0 + * c-brace-offset: -4 + * c-argdecl-indent: 4 + * c-label-offset: -4 + * c-continued-statement-offset: 4 + * c-continued-brace-offset: 0 + * indent-tabs-mode: nil + * tab-width: 8 + * End: + */ diff --git a/xen-2.4.16/drivers/scsi/scsi_lib.c b/xen-2.4.16/drivers/scsi/scsi_lib.c new file mode 100644 index 0000000000..53c1092a57 --- /dev/null +++ b/xen-2.4.16/drivers/scsi/scsi_lib.c @@ -0,0 +1,1197 @@ +/* + * scsi_lib.c Copyright (C) 1999 Eric Youngdale + * + * SCSI queueing library. + * Initial versions: Eric Youngdale (eric@andante.org). + * Based upon conversations with large numbers + * of people at Linux Expo. + */ + +/* + * The fundamental purpose of this file is to contain a library of utility + * routines that can be used by low-level drivers. Ultimately the idea + * is that there should be a sufficiently rich number of functions that it + * would be possible for a driver author to fashion a queueing function for + * a low-level driver if they wished. Note however that this file also + * contains the "default" versions of these functions, as we don't want to + * go through and retrofit queueing functions into all 30 some-odd drivers. + */ + +#define __NO_VERSION__ +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +#define __KERNEL_SYSCALLS__ + +#include + +#include +#include +#include + +#include "scsi.h" +#include "hosts.h" +#include "constants.h" +#include + +/* + * This entire source file deals with the new queueing code. + */ + +/* + * Function: __scsi_insert_special() + * + * Purpose: worker for scsi_insert_special_*() + * + * Arguments: q - request queue where request should be inserted + * rq - request to be inserted + * data - private data + * at_head - insert request at head or tail of queue + * + * Lock status: Assumed that io_request_lock is not held upon entry. + * + * Returns: Nothing + */ +static void __scsi_insert_special(request_queue_t *q, struct request *rq, + void *data, int at_head) +{ + unsigned long flags; + + ASSERT_LOCK(&io_request_lock, 0); + + rq->cmd = SPECIAL; + rq->special = data; + rq->q = NULL; + rq->nr_segments = 0; + rq->elevator_sequence = 0; + + /* + * We have the option of inserting the head or the tail of the queue. + * Typically we use the tail for new ioctls and so forth. We use the + * head of the queue for things like a QUEUE_FULL message from a + * device, or a host that is unable to accept a particular command. + */ + spin_lock_irqsave(&io_request_lock, flags); + + if (at_head) + list_add(&rq->queue, &q->queue_head); + else + list_add_tail(&rq->queue, &q->queue_head); + + q->request_fn(q); + spin_unlock_irqrestore(&io_request_lock, flags); +} + + +/* + * Function: scsi_insert_special_cmd() + * + * Purpose: Insert pre-formed command into request queue. + * + * Arguments: SCpnt - command that is ready to be queued. + * at_head - boolean. True if we should insert at head + * of queue, false if we should insert at tail. + * + * Lock status: Assumed that lock is not held upon entry. + * + * Returns: Nothing + * + * Notes: This function is called from character device and from + * ioctl types of functions where the caller knows exactly + * what SCSI command needs to be issued. The idea is that + * we merely inject the command into the queue (at the head + * for now), and then call the queue request function to actually + * process it. + */ +int scsi_insert_special_cmd(Scsi_Cmnd * SCpnt, int at_head) +{ + request_queue_t *q = &SCpnt->device->request_queue; + + __scsi_insert_special(q, &SCpnt->request, SCpnt, at_head); + return 0; +} + +/* + * Function: scsi_insert_special_req() + * + * Purpose: Insert pre-formed request into request queue. + * + * Arguments: SRpnt - request that is ready to be queued. + * at_head - boolean. True if we should insert at head + * of queue, false if we should insert at tail. + * + * Lock status: Assumed that lock is not held upon entry. + * + * Returns: Nothing + * + * Notes: This function is called from character device and from + * ioctl types of functions where the caller knows exactly + * what SCSI command needs to be issued. The idea is that + * we merely inject the command into the queue (at the head + * for now), and then call the queue request function to actually + * process it. + */ +int scsi_insert_special_req(Scsi_Request * SRpnt, int at_head) +{ + request_queue_t *q = &SRpnt->sr_device->request_queue; + + __scsi_insert_special(q, &SRpnt->sr_request, SRpnt, at_head); + return 0; +} + +/* + * Function: scsi_init_cmd_errh() + * + * Purpose: Initialize SCpnt fields related to error handling. + * + * Arguments: SCpnt - command that is ready to be queued. + * + * Returns: Nothing + * + * Notes: This function has the job of initializing a number of + * fields related to error handling. Typically this will + * be called once for each command, as required. + */ +int scsi_init_cmd_errh(Scsi_Cmnd * SCpnt) +{ + ASSERT_LOCK(&io_request_lock, 0); + + SCpnt->owner = SCSI_OWNER_MIDLEVEL; + SCpnt->reset_chain = NULL; + SCpnt->serial_number = 0; + SCpnt->serial_number_at_timeout = 0; + SCpnt->flags = 0; + SCpnt->retries = 0; + + SCpnt->abort_reason = 0; + + memset((void *) SCpnt->sense_buffer, 0, sizeof SCpnt->sense_buffer); + + if (SCpnt->cmd_len == 0) + SCpnt->cmd_len = COMMAND_SIZE(SCpnt->cmnd[0]); + + /* + * We need saved copies of a number of fields - this is because + * error handling may need to overwrite these with different values + * to run different commands, and once error handling is complete, + * we will need to restore these values prior to running the actual + * command. + */ + SCpnt->old_use_sg = SCpnt->use_sg; + SCpnt->old_cmd_len = SCpnt->cmd_len; + SCpnt->sc_old_data_direction = SCpnt->sc_data_direction; + SCpnt->old_underflow = SCpnt->underflow; + memcpy((void *) SCpnt->data_cmnd, + (const void *) SCpnt->cmnd, sizeof(SCpnt->cmnd)); + SCpnt->buffer = SCpnt->request_buffer; + SCpnt->bufflen = SCpnt->request_bufflen; + + SCpnt->reset_chain = NULL; + + SCpnt->internal_timeout = NORMAL_TIMEOUT; + SCpnt->abort_reason = 0; + + return 1; +} + +/* + * Function: scsi_queue_next_request() + * + * Purpose: Handle post-processing of completed commands. + * + * Arguments: SCpnt - command that may need to be requeued. + * + * Returns: Nothing + * + * Notes: After command completion, there may be blocks left + * over which weren't finished by the previous command + * this can be for a number of reasons - the main one is + * that a medium error occurred, and the sectors after + * the bad block need to be re-read. + * + * If SCpnt is NULL, it means that the previous command + * was completely finished, and we should simply start + * a new command, if possible. + * + * This is where a lot of special case code has begun to + * accumulate. It doesn't really affect readability or + * anything, but it might be considered architecturally + * inelegant. If more of these special cases start to + * accumulate, I am thinking along the lines of implementing + * an atexit() like technology that gets run when commands + * complete. I am not convinced that it is worth the + * added overhead, however. Right now as things stand, + * there are simple conditional checks, and most hosts + * would skip past. + * + * Another possible solution would be to tailor different + * handler functions, sort of like what we did in scsi_merge.c. + * This is probably a better solution, but the number of different + * permutations grows as 2**N, and if too many more special cases + * get added, we start to get screwed. + */ +void scsi_queue_next_request(request_queue_t * q, Scsi_Cmnd * SCpnt) +{ + int all_clear; + unsigned long flags; + Scsi_Device *SDpnt; + struct Scsi_Host *SHpnt; + + ASSERT_LOCK(&io_request_lock, 0); + + spin_lock_irqsave(&io_request_lock, flags); + if (SCpnt != NULL) { + + /* + * For some reason, we are not done with this request. + * This happens for I/O errors in the middle of the request, + * in which case we need to request the blocks that come after + * the bad sector. + */ + SCpnt->request.special = (void *) SCpnt; + list_add(&SCpnt->request.queue, &q->queue_head); + } + + /* + * Just hit the requeue function for the queue. + */ + q->request_fn(q); + + SDpnt = (Scsi_Device *) q->queuedata; + SHpnt = SDpnt->host; + + /* + * If this is a single-lun device, and we are currently finished + * with this device, then see if we need to get another device + * started. FIXME(eric) - if this function gets too cluttered + * with special case code, then spin off separate versions and + * use function pointers to pick the right one. + */ + if (SDpnt->single_lun + && list_empty(&q->queue_head) + && SDpnt->device_busy == 0) { + request_queue_t *q; + + for (SDpnt = SHpnt->host_queue; + SDpnt; + SDpnt = SDpnt->next) { + if (((SHpnt->can_queue > 0) + && (SHpnt->host_busy >= SHpnt->can_queue)) + || (SHpnt->host_blocked) + || (SHpnt->host_self_blocked) + || (SDpnt->device_blocked)) { + break; + } + q = &SDpnt->request_queue; + q->request_fn(q); + } + } + + /* + * Now see whether there are other devices on the bus which + * might be starved. If so, hit the request function. If we + * don't find any, then it is safe to reset the flag. If we + * find any device that it is starved, it isn't safe to reset the + * flag as the queue function releases the lock and thus some + * other device might have become starved along the way. + */ + all_clear = 1; + if (SHpnt->some_device_starved) { + for (SDpnt = SHpnt->host_queue; SDpnt; SDpnt = SDpnt->next) { + request_queue_t *q; + if ((SHpnt->can_queue > 0 && (SHpnt->host_busy >= SHpnt->can_queue)) + || (SHpnt->host_blocked) + || (SHpnt->host_self_blocked)) { + break; + } + if (SDpnt->device_blocked || !SDpnt->starved) { + continue; + } + q = &SDpnt->request_queue; + q->request_fn(q); + all_clear = 0; + } + if (SDpnt == NULL && all_clear) { + SHpnt->some_device_starved = 0; + } + } + spin_unlock_irqrestore(&io_request_lock, flags); +} + +/* + * Function: scsi_end_request() + * + * Purpose: Post-processing of completed commands called from interrupt + * handler or a bottom-half handler. + * + * Arguments: SCpnt - command that is complete. + * uptodate - 1 if I/O indicates success, 0 for I/O error. + * sectors - number of sectors we want to mark. + * requeue - indicates whether we should requeue leftovers. + * frequeue - indicates that if we release the command block + * that the queue request function should be called. + * + * Lock status: Assumed that lock is not held upon entry. + * + * Returns: Nothing + * + * Notes: This is called for block device requests in order to + * mark some number of sectors as complete. + * + * We are guaranteeing that the request queue will be goosed + * at some point during this call. + */ +static Scsi_Cmnd *__scsi_end_request(Scsi_Cmnd * SCpnt, + int uptodate, + int sectors, + int requeue, + int frequeue) +{ + struct request *req; + struct buffer_head *bh; + Scsi_Device * SDpnt; + int nsect; + + ASSERT_LOCK(&io_request_lock, 0); + + req = &SCpnt->request; + req->errors = 0; + if (!uptodate) { + printk(" I/O error: dev %s, sector %lu\n", + kdevname(req->rq_dev), req->sector); + } + do { + if ((bh = req->bh) != NULL) { + nsect = bh->b_size >> 9; + blk_finished_io(nsect); + req->bh = bh->b_reqnext; + bh->b_reqnext = NULL; + sectors -= nsect; + bh->b_end_io(bh, uptodate); + if ((bh = req->bh) != NULL) { + req->hard_sector += nsect; + req->hard_nr_sectors -= nsect; + req->sector += nsect; + req->nr_sectors -= nsect; + + req->current_nr_sectors = bh->b_size >> 9; + if (req->nr_sectors < req->current_nr_sectors) { + req->nr_sectors = req->current_nr_sectors; + printk("scsi_end_request: buffer-list destroyed\n"); + } + } + } + } while (sectors && bh); + + /* + * If there are blocks left over at the end, set up the command + * to queue the remainder of them. + */ + if (req->bh) { + request_queue_t *q; + + if( !requeue ) + { + return SCpnt; + } + + q = &SCpnt->device->request_queue; + + req->buffer = bh->b_data; + /* + * Bleah. Leftovers again. Stick the leftovers in + * the front of the queue, and goose the queue again. + */ + scsi_queue_next_request(q, SCpnt); + return SCpnt; + } + /* + * This request is done. If there is someone blocked waiting for this + * request, wake them up. Typically used to wake up processes trying + * to swap a page into memory. + */ + if (req->waiting != NULL) { + complete(req->waiting); + } + req_finished_io(req); + add_blkdev_randomness(MAJOR(req->rq_dev)); + + SDpnt = SCpnt->device; + + /* + * This will goose the queue request function at the end, so we don't + * need to worry about launching another command. + */ + __scsi_release_command(SCpnt); + + if( frequeue ) { + request_queue_t *q; + + q = &SDpnt->request_queue; + scsi_queue_next_request(q, NULL); + } + return NULL; +} + +/* + * Function: scsi_end_request() + * + * Purpose: Post-processing of completed commands called from interrupt + * handler or a bottom-half handler. + * + * Arguments: SCpnt - command that is complete. + * uptodate - 1 if I/O indicates success, 0 for I/O error. + * sectors - number of sectors we want to mark. + * + * Lock status: Assumed that lock is not held upon entry. + * + * Returns: Nothing + * + * Notes: This is called for block device requests in order to + * mark some number of sectors as complete. + * + * We are guaranteeing that the request queue will be goosed + * at some point during this call. + */ +Scsi_Cmnd *scsi_end_request(Scsi_Cmnd * SCpnt, int uptodate, int sectors) +{ + return __scsi_end_request(SCpnt, uptodate, sectors, 1, 1); +} + +/* + * Function: scsi_release_buffers() + * + * Purpose: Completion processing for block device I/O requests. + * + * Arguments: SCpnt - command that we are bailing. + * + * Lock status: Assumed that no lock is held upon entry. + * + * Returns: Nothing + * + * Notes: In the event that an upper level driver rejects a + * command, we must release resources allocated during + * the __init_io() function. Primarily this would involve + * the scatter-gather table, and potentially any bounce + * buffers. + */ +static void scsi_release_buffers(Scsi_Cmnd * SCpnt) +{ + ASSERT_LOCK(&io_request_lock, 0); + + /* + * Free up any indirection buffers we allocated for DMA purposes. + */ + if (SCpnt->use_sg) { + struct scatterlist *sgpnt; + void **bbpnt; + int i; + + sgpnt = (struct scatterlist *) SCpnt->request_buffer; + bbpnt = SCpnt->bounce_buffers; + + if (bbpnt) { + for (i = 0; i < SCpnt->use_sg; i++) { + if (bbpnt[i]) + scsi_free(sgpnt[i].address, sgpnt[i].length); + } + } + scsi_free(SCpnt->request_buffer, SCpnt->sglist_len); + } else { + if (SCpnt->request_buffer != SCpnt->request.buffer) { + scsi_free(SCpnt->request_buffer, SCpnt->request_bufflen); + } + } + + /* + * Zero these out. They now point to freed memory, and it is + * dangerous to hang onto the pointers. + */ + SCpnt->buffer = NULL; + SCpnt->bufflen = 0; + SCpnt->request_buffer = NULL; + SCpnt->request_bufflen = 0; +} + +/* + * Function: scsi_io_completion() + * + * Purpose: Completion processing for block device I/O requests. + * + * Arguments: SCpnt - command that is finished. + * + * Lock status: Assumed that no lock is held upon entry. + * + * Returns: Nothing + * + * Notes: This function is matched in terms of capabilities to + * the function that created the scatter-gather list. + * In other words, if there are no bounce buffers + * (the normal case for most drivers), we don't need + * the logic to deal with cleaning up afterwards. + */ +void scsi_io_completion(Scsi_Cmnd * SCpnt, int good_sectors, + int block_sectors) +{ + int result = SCpnt->result; + int this_count = SCpnt->bufflen >> 9; + request_queue_t *q = &SCpnt->device->request_queue; + + /* + * We must do one of several things here: + * + * Call scsi_end_request. This will finish off the specified + * number of sectors. If we are done, the command block will + * be released, and the queue function will be goosed. If we + * are not done, then scsi_end_request will directly goose + * the queue. + * + * We can just use scsi_queue_next_request() here. This + * would be used if we just wanted to retry, for example. + * + */ + ASSERT_LOCK(&io_request_lock, 0); + + /* + * Free up any indirection buffers we allocated for DMA purposes. + * For the case of a READ, we need to copy the data out of the + * bounce buffer and into the real buffer. + */ + if (SCpnt->use_sg) { + struct scatterlist *sgpnt; + void **bbpnt; + int i; + + sgpnt = (struct scatterlist *) SCpnt->buffer; + bbpnt = SCpnt->bounce_buffers; + + if (bbpnt) { + for (i = 0; i < SCpnt->use_sg; i++) { + if (bbpnt[i]) { + if (SCpnt->request.cmd == READ) { + memcpy(bbpnt[i], + sgpnt[i].address, + sgpnt[i].length); + } + scsi_free(sgpnt[i].address, sgpnt[i].length); + } + } + } + scsi_free(SCpnt->buffer, SCpnt->sglist_len); + } else { + if (SCpnt->buffer != SCpnt->request.buffer) { + if (SCpnt->request.cmd == READ) { + memcpy(SCpnt->request.buffer, SCpnt->buffer, + SCpnt->bufflen); + } + scsi_free(SCpnt->buffer, SCpnt->bufflen); + } + } + + /* + * Zero these out. They now point to freed memory, and it is + * dangerous to hang onto the pointers. + */ + SCpnt->buffer = NULL; + SCpnt->bufflen = 0; + SCpnt->request_buffer = NULL; + SCpnt->request_bufflen = 0; + + /* + * Next deal with any sectors which we were able to correctly + * handle. + */ + if (good_sectors > 0) { + SCSI_LOG_HLCOMPLETE(1, printk("%ld sectors total, %d sectors done.\n", + SCpnt->request.nr_sectors, + good_sectors)); + SCSI_LOG_HLCOMPLETE(1, printk("use_sg is %d\n ", SCpnt->use_sg)); + + SCpnt->request.errors = 0; + /* + * If multiple sectors are requested in one buffer, then + * they will have been finished off by the first command. + * If not, then we have a multi-buffer command. + * + * If block_sectors != 0, it means we had a medium error + * of some sort, and that we want to mark some number of + * sectors as not uptodate. Thus we want to inhibit + * requeueing right here - we will requeue down below + * when we handle the bad sectors. + */ + SCpnt = __scsi_end_request(SCpnt, + 1, + good_sectors, + result == 0, + 1); + + /* + * If the command completed without error, then either finish off the + * rest of the command, or start a new one. + */ + if (result == 0 || SCpnt == NULL ) { + return; + } + } + /* + * Now, if we were good little boys and girls, Santa left us a request + * sense buffer. We can extract information from this, so we + * can choose a block to remap, etc. + */ + if (driver_byte(result) != 0) { + if (suggestion(result) == SUGGEST_REMAP) { +#ifdef REMAP + /* + * Not yet implemented. A read will fail after being remapped, + * a write will call the strategy routine again. + */ + if (SCpnt->device->remap) { + result = 0; + } +#endif + } + if ((SCpnt->sense_buffer[0] & 0x7f) == 0x70) { + /* + * If the device is in the process of becoming ready, + * retry. + */ + if (SCpnt->sense_buffer[12] == 0x04 && + SCpnt->sense_buffer[13] == 0x01) { + scsi_queue_next_request(q, SCpnt); + return; + } + if ((SCpnt->sense_buffer[2] & 0xf) == UNIT_ATTENTION) { + if (SCpnt->device->removable) { + /* detected disc change. set a bit + * and quietly refuse further access. + */ + SCpnt->device->changed = 1; + SCpnt = scsi_end_request(SCpnt, 0, this_count); + return; + } else { + /* + * Must have been a power glitch, or a + * bus reset. Could not have been a + * media change, so we just retry the + * request and see what happens. + */ + scsi_queue_next_request(q, SCpnt); + return; + } + } + } + /* If we had an ILLEGAL REQUEST returned, then we may have + * performed an unsupported command. The only thing this should be + * would be a ten byte read where only a six byte read was supported. + * Also, on a system where READ CAPACITY failed, we have have read + * past the end of the disk. + */ + + switch (SCpnt->sense_buffer[2]) { + case ILLEGAL_REQUEST: + if (SCpnt->device->ten) { + SCpnt->device->ten = 0; + /* + * This will cause a retry with a 6-byte + * command. + */ + scsi_queue_next_request(q, SCpnt); + result = 0; + } else { + SCpnt = scsi_end_request(SCpnt, 0, this_count); + return; + } + break; + case NOT_READY: + printk(KERN_INFO "Device %s not ready.\n", + kdevname(SCpnt->request.rq_dev)); + SCpnt = scsi_end_request(SCpnt, 0, this_count); + return; + break; + case MEDIUM_ERROR: + case VOLUME_OVERFLOW: + printk("scsi%d: ERROR on channel %d, id %d, lun %d, CDB: ", + SCpnt->host->host_no, (int) SCpnt->channel, + (int) SCpnt->target, (int) SCpnt->lun); + print_command(SCpnt->cmnd); + print_sense("sd", SCpnt); + SCpnt = scsi_end_request(SCpnt, 0, block_sectors); + return; + default: + break; + } + } /* driver byte != 0 */ + if (host_byte(result) == DID_RESET) { + /* + * Third party bus reset or reset for error + * recovery reasons. Just retry the request + * and see what happens. + */ + scsi_queue_next_request(q, SCpnt); + return; + } + if (result) { + struct Scsi_Device_Template *STpnt; + + STpnt = scsi_get_request_dev(&SCpnt->request); + printk("SCSI %s error : host %d channel %d id %d lun %d return code = %x\n", + (STpnt ? STpnt->name : "device"), + SCpnt->device->host->host_no, + SCpnt->device->channel, + SCpnt->device->id, + SCpnt->device->lun, result); + + if (driver_byte(result) & DRIVER_SENSE) + print_sense("sd", SCpnt); + /* + * Mark a single buffer as not uptodate. Queue the remainder. + * We sometimes get this cruft in the event that a medium error + * isn't properly reported. + */ + SCpnt = scsi_end_request(SCpnt, 0, SCpnt->request.current_nr_sectors); + return; + } +} + +/* + * Function: scsi_get_request_dev() + * + * Purpose: Find the upper-level driver that is responsible for this + * request + * + * Arguments: request - I/O request we are preparing to queue. + * + * Lock status: No locks assumed to be held, but as it happens the + * io_request_lock is held when this is called. + * + * Returns: Nothing + * + * Notes: The requests in the request queue may have originated + * from any block device driver. We need to find out which + * one so that we can later form the appropriate command. + */ +struct Scsi_Device_Template *scsi_get_request_dev(struct request *req) +{ + struct Scsi_Device_Template *spnt; + kdev_t dev = req->rq_dev; + int major = MAJOR(dev); + + ASSERT_LOCK(&io_request_lock, 1); + + for (spnt = scsi_devicelist; spnt; spnt = spnt->next) { + /* + * Search for a block device driver that supports this + * major. + */ + if (spnt->blk && spnt->major == major) { + return spnt; + } + /* + * I am still not entirely satisfied with this solution, + * but it is good enough for now. Disks have a number of + * major numbers associated with them, the primary + * 8, which we test above, and a secondary range of 7 + * different consecutive major numbers. If this ever + * becomes insufficient, then we could add another function + * to the structure, and generalize this completely. + */ + if( spnt->min_major != 0 + && spnt->max_major != 0 + && major >= spnt->min_major + && major <= spnt->max_major ) + { + return spnt; + } + } + return NULL; +} + +/* + * Function: scsi_request_fn() + * + * Purpose: Generic version of request function for SCSI hosts. + * + * Arguments: q - Pointer to actual queue. + * + * Returns: Nothing + * + * Lock status: IO request lock assumed to be held when called. + * + * Notes: The theory is that this function is something which individual + * drivers could also supply if they wished to. The problem + * is that we have 30 some odd low-level drivers in the kernel + * tree already, and it would be most difficult to retrofit + * this crap into all of them. Thus this function has the job + * of acting as a generic queue manager for all of those existing + * drivers. + */ +void scsi_request_fn(request_queue_t * q) +{ + struct request *req; + Scsi_Cmnd *SCpnt; + Scsi_Request *SRpnt; + Scsi_Device *SDpnt; + struct Scsi_Host *SHpnt; + struct Scsi_Device_Template *STpnt; + + ASSERT_LOCK(&io_request_lock, 1); + + SDpnt = (Scsi_Device *) q->queuedata; + if (!SDpnt) { + panic("Missing device"); + } + SHpnt = SDpnt->host; + + /* + * To start with, we keep looping until the queue is empty, or until + * the host is no longer able to accept any more requests. + */ + while (1 == 1) { + /* + * Check this again - each time we loop through we will have + * released the lock and grabbed it again, so each time + * we need to check to see if the queue is plugged or not. + */ + if (SHpnt->in_recovery || q->plugged) + return; + + /* + * If the device cannot accept another request, then quit. + */ + if (SDpnt->device_blocked) { + break; + } + if ((SHpnt->can_queue > 0 && (SHpnt->host_busy >= SHpnt->can_queue)) + || (SHpnt->host_blocked) + || (SHpnt->host_self_blocked)) { + /* + * If we are unable to process any commands at all for + * this device, then we consider it to be starved. + * What this means is that there are no outstanding + * commands for this device and hence we need a + * little help getting it started again + * once the host isn't quite so busy. + */ + if (SDpnt->device_busy == 0) { + SDpnt->starved = 1; + SHpnt->some_device_starved = 1; + } + break; + } else { + SDpnt->starved = 0; + } + + /* + * FIXME(eric) + * I am not sure where the best place to do this is. We need + * to hook in a place where we are likely to come if in user + * space. Technically the error handling thread should be + * doing this crap, but the error handler isn't used by + * most hosts. + */ + if (SDpnt->was_reset) { + /* + * We need to relock the door, but we might + * be in an interrupt handler. Only do this + * from user space, since we do not want to + * sleep from an interrupt. + * + * FIXME(eric) - have the error handler thread do + * this work. + */ + SDpnt->was_reset = 0; + if (SDpnt->removable && !in_interrupt()) { + spin_unlock_irq(&io_request_lock); + scsi_ioctl(SDpnt, SCSI_IOCTL_DOORLOCK, 0); + spin_lock_irq(&io_request_lock); + continue; + } + } + + /* + * If we couldn't find a request that could be queued, then we + * can also quit. + */ + if (list_empty(&q->queue_head)) + break; + + /* + * Loop through all of the requests in this queue, and find + * one that is queueable. + */ + req = blkdev_entry_next_request(&q->queue_head); + + /* + * Find the actual device driver associated with this command. + * The SPECIAL requests are things like character device or + * ioctls, which did not originate from ll_rw_blk. Note that + * the special field is also used to indicate the SCpnt for + * the remainder of a partially fulfilled request that can + * come up when there is a medium error. We have to treat + * these two cases differently. We differentiate by looking + * at request.cmd, as this tells us the real story. + */ + if (req->cmd == SPECIAL) { + STpnt = NULL; + SCpnt = (Scsi_Cmnd *) req->special; + SRpnt = (Scsi_Request *) req->special; + + if( SRpnt->sr_magic == SCSI_REQ_MAGIC ) { + SCpnt = scsi_allocate_device(SRpnt->sr_device, + FALSE, FALSE); + if( !SCpnt ) { + break; + } + scsi_init_cmd_from_req(SCpnt, SRpnt); + } + + } else { + SRpnt = NULL; + STpnt = scsi_get_request_dev(req); + if (!STpnt) { + panic("Unable to find device associated with request"); + } + /* + * Now try and find a command block that we can use. + */ + if( req->special != NULL ) { + SCpnt = (Scsi_Cmnd *) req->special; + /* + * We need to recount the number of + * scatter-gather segments here - the + * normal case code assumes this to be + * correct, as it would be a performance + * lose to always recount. Handling + * errors is always unusual, of course. + */ + recount_segments(SCpnt); + } else { + SCpnt = scsi_allocate_device(SDpnt, FALSE, FALSE); + } + /* + * If so, we are ready to do something. Bump the count + * while the queue is locked and then break out of the + * loop. Otherwise loop around and try another request. + */ + if (!SCpnt) { + break; + } + } + + /* + * Now bump the usage count for both the host and the + * device. + */ + SHpnt->host_busy++; + SDpnt->device_busy++; + + /* + * Finally, before we release the lock, we copy the + * request to the command block, and remove the + * request from the request list. Note that we always + * operate on the queue head - there is absolutely no + * reason to search the list, because all of the commands + * in this queue are for the same device. + */ + blkdev_dequeue_request(req); + + if (req != &SCpnt->request && req != &SRpnt->sr_request ) { + memcpy(&SCpnt->request, req, sizeof(struct request)); + + /* + * We have copied the data out of the request block - + * it is now in a field in SCpnt. Release the request + * block. + */ + blkdev_release_request(req); + } + /* + * Now it is finally safe to release the lock. We are + * not going to noodle the request list until this + * request has been queued and we loop back to queue + * another. + */ + req = NULL; + spin_unlock_irq(&io_request_lock); + + if (SCpnt->request.cmd != SPECIAL) { + /* + * This will do a couple of things: + * 1) Fill in the actual SCSI command. + * 2) Fill in any other upper-level specific fields + * (timeout). + * + * If this returns 0, it means that the request failed + * (reading past end of disk, reading offline device, + * etc). This won't actually talk to the device, but + * some kinds of consistency checking may cause the + * request to be rejected immediately. + */ + if (STpnt == NULL) { + STpnt = scsi_get_request_dev(req); + } + /* + * This sets up the scatter-gather table (allocating if + * required). Hosts that need bounce buffers will also + * get those allocated here. + */ + if (!SDpnt->scsi_init_io_fn(SCpnt)) { + SCpnt = __scsi_end_request(SCpnt, 0, + SCpnt->request.nr_sectors, 0, 0); + if( SCpnt != NULL ) + { + panic("Should not have leftover blocks\n"); + } + spin_lock_irq(&io_request_lock); + SHpnt->host_busy--; + SDpnt->device_busy--; + continue; + } + /* + * Initialize the actual SCSI command for this request. + */ + if (!STpnt->init_command(SCpnt)) { + scsi_release_buffers(SCpnt); + SCpnt = __scsi_end_request(SCpnt, 0, + SCpnt->request.nr_sectors, 0, 0); + if( SCpnt != NULL ) + { + panic("Should not have leftover blocks\n"); + } + spin_lock_irq(&io_request_lock); + SHpnt->host_busy--; + SDpnt->device_busy--; + continue; + } + } + /* + * Finally, initialize any error handling parameters, and set up + * the timers for timeouts. + */ + scsi_init_cmd_errh(SCpnt); + + /* + * Dispatch the command to the low-level driver. + */ + scsi_dispatch_cmd(SCpnt); + + /* + * Now we need to grab the lock again. We are about to mess + * with the request queue and try to find another command. + */ + spin_lock_irq(&io_request_lock); + } +} + +/* + * Function: scsi_block_requests() + * + * Purpose: Utility function used by low-level drivers to prevent further + * commands from being queued to the device. + * + * Arguments: SHpnt - Host in question + * + * Returns: Nothing + * + * Lock status: No locks are assumed held. + * + * Notes: There is no timer nor any other means by which the requests + * get unblocked other than the low-level driver calling + * scsi_unblock_requests(). + */ +void scsi_block_requests(struct Scsi_Host * SHpnt) +{ + SHpnt->host_self_blocked = TRUE; +} + +/* + * Function: scsi_unblock_requests() + * + * Purpose: Utility function used by low-level drivers to allow further + * commands from being queued to the device. + * + * Arguments: SHpnt - Host in question + * + * Returns: Nothing + * + * Lock status: No locks are assumed held. + * + * Notes: There is no timer nor any other means by which the requests + * get unblocked other than the low-level driver calling + * scsi_unblock_requests(). + * + * This is done as an API function so that changes to the + * internals of the scsi mid-layer won't require wholesale + * changes to drivers that use this feature. + */ +void scsi_unblock_requests(struct Scsi_Host * SHpnt) +{ + Scsi_Device *SDloop; + + SHpnt->host_self_blocked = FALSE; + /* Now that we are unblocked, try to start the queues. */ + for (SDloop = SHpnt->host_queue; SDloop; SDloop = SDloop->next) + scsi_queue_next_request(&SDloop->request_queue, NULL); +} + +/* + * Function: scsi_report_bus_reset() + * + * Purpose: Utility function used by low-level drivers to report that + * they have observed a bus reset on the bus being handled. + * + * Arguments: SHpnt - Host in question + * channel - channel on which reset was observed. + * + * Returns: Nothing + * + * Lock status: No locks are assumed held. + * + * Notes: This only needs to be called if the reset is one which + * originates from an unknown location. Resets originated + * by the mid-level itself don't need to call this, but there + * should be no harm. + * + * The main purpose of this is to make sure that a CHECK_CONDITION + * is properly treated. + */ +void scsi_report_bus_reset(struct Scsi_Host * SHpnt, int channel) +{ + Scsi_Device *SDloop; + for (SDloop = SHpnt->host_queue; SDloop; SDloop = SDloop->next) { + if (channel == SDloop->channel) { + SDloop->was_reset = 1; + SDloop->expecting_cc_ua = 1; + } + } +} + +/* + * FIXME(eric) - these are empty stubs for the moment. I need to re-implement + * host blocking from scratch. The theory is that hosts that wish to block + * will register/deregister using these functions instead of the old way + * of setting the wish_block flag. + * + * The details of the implementation remain to be settled, however the + * stubs are here now so that the actual drivers will properly compile. + */ +void scsi_register_blocked_host(struct Scsi_Host * SHpnt) +{ +} + +void scsi_deregister_blocked_host(struct Scsi_Host * SHpnt) +{ +} diff --git a/xen-2.4.16/drivers/scsi/scsi_merge.c b/xen-2.4.16/drivers/scsi/scsi_merge.c new file mode 100644 index 0000000000..985e74a18c --- /dev/null +++ b/xen-2.4.16/drivers/scsi/scsi_merge.c @@ -0,0 +1,1181 @@ +/* + * scsi_merge.c Copyright (C) 1999 Eric Youngdale + * + * SCSI queueing library. + * Initial versions: Eric Youngdale (eric@andante.org). + * Based upon conversations with large numbers + * of people at Linux Expo. + * Support for dynamic DMA mapping: Jakub Jelinek (jakub@redhat.com). + */ + +/* + * This file contains queue management functions that are used by SCSI. + * Typically this is used for several purposes. First, we need to ensure + * that commands do not grow so large that they cannot be handled all at + * once by a host adapter. The various flavors of merge functions included + * here serve this purpose. + * + * Note that it would be quite trivial to allow the low-level driver the + * flexibility to define it's own queue handling functions. For the time + * being, the hooks are not present. Right now we are just using the + * data in the host template as an indicator of how we should be handling + * queues, and we select routines that are optimized for that purpose. + * + * Some hosts do not impose any restrictions on the size of a request. + * In such cases none of the merge functions in this file are called, + * and we allow ll_rw_blk to merge requests in the default manner. + * This isn't guaranteed to be optimal, but it should be pretty darned + * good. If someone comes up with ideas of better ways of managing queues + * to improve on the default behavior, then certainly fit it into this + * scheme in whatever manner makes the most sense. Please note that + * since each device has it's own queue, we have considerable flexibility + * in queue management. + */ + +#define __NO_VERSION__ +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +#define __KERNEL_SYSCALLS__ + +#include + +#include +#include +#include +#include + +#include "scsi.h" +#include "hosts.h" +#include "constants.h" +#include + +/* + * This means that bounce buffers cannot be allocated in chunks > PAGE_SIZE. + * Ultimately we should get away from using a dedicated DMA bounce buffer + * pool, and we should instead try and use kmalloc() instead. If we can + * eliminate this pool, then this restriction would no longer be needed. + */ +#define DMA_SEGMENT_SIZE_LIMITED + +#ifdef CONFIG_SCSI_DEBUG_QUEUES +/* + * Enable a bunch of additional consistency checking. Turn this off + * if you are benchmarking. + */ +static int dump_stats(struct request *req, + int use_clustering, + int dma_host, + int segments) +{ + struct buffer_head *bh; + + /* + * Dump the information that we have. We know we have an + * inconsistency. + */ + printk("nr_segments is %x\n", req->nr_segments); + printk("counted segments is %x\n", segments); + printk("Flags %d %d\n", use_clustering, dma_host); + for (bh = req->bh; bh->b_reqnext != NULL; bh = bh->b_reqnext) + { + printk("Segment 0x%p, blocks %d, addr 0x%lx\n", + bh, + bh->b_size >> 9, + virt_to_phys(bh->b_data - 1)); + } + panic("Ththththaats all folks. Too dangerous to continue.\n"); +} + + +/* + * Simple sanity check that we will use for the first go around + * in order to ensure that we are doing the counting correctly. + * This can be removed for optimization. + */ +#define SANITY_CHECK(req, _CLUSTER, _DMA) \ + if( req->nr_segments != __count_segments(req, _CLUSTER, _DMA, NULL) ) \ + { \ + printk("Incorrect segment count at 0x%p", current_text_addr()); \ + dump_stats(req, _CLUSTER, _DMA, __count_segments(req, _CLUSTER, _DMA, NULL)); \ + } +#else +#define SANITY_CHECK(req, _CLUSTER, _DMA) +#endif + +static void dma_exhausted(Scsi_Cmnd * SCpnt, int i) +{ + int jj; + struct scatterlist *sgpnt; + void **bbpnt; + int consumed = 0; + + sgpnt = (struct scatterlist *) SCpnt->request_buffer; + bbpnt = SCpnt->bounce_buffers; + + /* + * Now print out a bunch of stats. First, start with the request + * size. + */ + printk("dma_free_sectors:%d\n", scsi_dma_free_sectors); + printk("use_sg:%d\ti:%d\n", SCpnt->use_sg, i); + printk("request_bufflen:%d\n", SCpnt->request_bufflen); + /* + * Now dump the scatter-gather table, up to the point of failure. + */ + for(jj=0; jj < SCpnt->use_sg; jj++) + { + printk("[%d]\tlen:%d\taddr:%p\tbounce:%p\n", + jj, + sgpnt[jj].length, + sgpnt[jj].address, + (bbpnt ? bbpnt[jj] : NULL)); + if (bbpnt && bbpnt[jj]) + consumed += sgpnt[jj].length; + } + printk("Total %d sectors consumed\n", consumed); + panic("DMA pool exhausted"); +} + +#define CLUSTERABLE_DEVICE(SH,SD) (SH->use_clustering) + +/* + * This entire source file deals with the new queueing code. + */ + +/* + * Function: __count_segments() + * + * Purpose: Prototype for queue merge function. + * + * Arguments: q - Queue for which we are merging request. + * req - request into which we wish to merge. + * use_clustering - 1 if this host wishes to use clustering + * dma_host - 1 if this host has ISA DMA issues (bus doesn't + * expose all of the address lines, so that DMA cannot + * be done from an arbitrary address). + * remainder - used to track the residual size of the last + * segment. Comes in handy when we want to limit the + * size of bounce buffer segments to PAGE_SIZE. + * + * Returns: Count of the number of SG segments for the request. + * + * Lock status: + * + * Notes: This is only used for diagnostic purposes. + */ +__inline static int __count_segments(struct request *req, + int use_clustering, + int dma_host, + int * remainder) +{ + int ret = 1; + int reqsize = 0; + struct buffer_head *bh; + struct buffer_head *bhnext; + + if( remainder != NULL ) { + reqsize = *remainder; + } + + /* + * Add in the size increment for the first buffer. + */ + bh = req->bh; +#ifdef DMA_SEGMENT_SIZE_LIMITED + if( reqsize + bh->b_size > PAGE_SIZE ) { + ret++; + reqsize = bh->b_size; + } else { + reqsize += bh->b_size; + } +#else + reqsize += bh->b_size; +#endif + + for (bh = req->bh, bhnext = bh->b_reqnext; + bhnext != NULL; + bh = bhnext, bhnext = bh->b_reqnext) { + if (use_clustering) { + /* + * See if we can do this without creating another + * scatter-gather segment. In the event that this is a + * DMA capable host, make sure that a segment doesn't span + * the DMA threshold boundary. + */ + if (dma_host && + virt_to_phys(bhnext->b_data) - 1 == ISA_DMA_THRESHOLD) { + ret++; + reqsize = bhnext->b_size; + } else if (CONTIGUOUS_BUFFERS(bh, bhnext)) { + /* + * This one is OK. Let it go. + */ +#ifdef DMA_SEGMENT_SIZE_LIMITED + /* Note scsi_malloc is only able to hand out + * chunks of memory in sizes of PAGE_SIZE or + * less. Thus we need to keep track of + * the size of the piece that we have + * seen so far, and if we have hit + * the limit of PAGE_SIZE, then we are + * kind of screwed and we need to start + * another segment. + */ + if( dma_host + && virt_to_phys(bh->b_data) - 1 >= ISA_DMA_THRESHOLD + && reqsize + bhnext->b_size > PAGE_SIZE ) + { + ret++; + reqsize = bhnext->b_size; + continue; + } +#endif + reqsize += bhnext->b_size; + continue; + } + ret++; + reqsize = bhnext->b_size; + } else { + ret++; + reqsize = bhnext->b_size; + } + } + if( remainder != NULL ) { + *remainder = reqsize; + } + return ret; +} + +/* + * Function: recount_segments() + * + * Purpose: Recount the number of scatter-gather segments for this request. + * + * Arguments: req - request that needs recounting. + * + * Returns: Count of the number of SG segments for the request. + * + * Lock status: Irrelevant. + * + * Notes: This is only used when we have partially completed requests + * and the bit that is leftover is of an indeterminate size. + * This can come up if you get a MEDIUM_ERROR, for example, + * as we will have "completed" all of the sectors up to and + * including the bad sector, and the leftover bit is what + * we have to do now. This tends to be a rare occurrence, so + * we aren't busting our butts to instantiate separate versions + * of this function for the 4 different flag values. We + * probably should, however. + */ +void +recount_segments(Scsi_Cmnd * SCpnt) +{ + struct request *req; + struct Scsi_Host *SHpnt; + Scsi_Device * SDpnt; + + req = &SCpnt->request; + SHpnt = SCpnt->host; + SDpnt = SCpnt->device; + + req->nr_segments = __count_segments(req, + CLUSTERABLE_DEVICE(SHpnt, SDpnt), + SHpnt->unchecked_isa_dma, NULL); +} + +#define MERGEABLE_BUFFERS(X,Y) \ +(((((long)(X)->b_data+(X)->b_size)|((long)(Y)->b_data)) & \ + (DMA_CHUNK_SIZE - 1)) == 0) + +#ifdef DMA_CHUNK_SIZE +static inline int scsi_new_mergeable(request_queue_t * q, + struct request * req, + struct Scsi_Host *SHpnt, + int max_segments) +{ + /* + * pci_map_sg will be able to merge these two + * into a single hardware sg entry, check if + * we'll have enough memory for the sg list. + * scsi.c allocates for this purpose + * min(64,sg_tablesize) entries. + */ + if (req->nr_segments >= max_segments || + req->nr_segments >= SHpnt->sg_tablesize) + return 0; + req->nr_segments++; + return 1; +} + +static inline int scsi_new_segment(request_queue_t * q, + struct request * req, + struct Scsi_Host *SHpnt, + int max_segments) +{ + /* + * pci_map_sg won't be able to map these two + * into a single hardware sg entry, so we have to + * check if things fit into sg_tablesize. + */ + if (req->nr_hw_segments >= SHpnt->sg_tablesize || + req->nr_segments >= SHpnt->sg_tablesize) + return 0; + req->nr_hw_segments++; + req->nr_segments++; + return 1; +} +#else +static inline int scsi_new_segment(request_queue_t * q, + struct request * req, + struct Scsi_Host *SHpnt, + int max_segments) +{ + if (req->nr_segments < SHpnt->sg_tablesize && + req->nr_segments < max_segments) { + /* + * This will form the start of a new segment. Bump the + * counter. + */ + req->nr_segments++; + return 1; + } else { + return 0; + } +} +#endif + +/* + * Function: __scsi_merge_fn() + * + * Purpose: Prototype for queue merge function. + * + * Arguments: q - Queue for which we are merging request. + * req - request into which we wish to merge. + * bh - Block which we may wish to merge into request + * use_clustering - 1 if this host wishes to use clustering + * dma_host - 1 if this host has ISA DMA issues (bus doesn't + * expose all of the address lines, so that DMA cannot + * be done from an arbitrary address). + * + * Returns: 1 if it is OK to merge the block into the request. 0 + * if it is not OK. + * + * Lock status: io_request_lock is assumed to be held here. + * + * Notes: Some drivers have limited scatter-gather table sizes, and + * thus they cannot queue an infinitely large command. This + * function is called from ll_rw_blk before it attempts to merge + * a new block into a request to make sure that the request will + * not become too large. + * + * This function is not designed to be directly called. Instead + * it should be referenced from other functions where the + * use_clustering and dma_host parameters should be integer + * constants. The compiler should thus be able to properly + * optimize the code, eliminating stuff that is irrelevant. + * It is more maintainable to do this way with a single function + * than to have 4 separate functions all doing roughly the + * same thing. + */ +__inline static int __scsi_back_merge_fn(request_queue_t * q, + struct request *req, + struct buffer_head *bh, + int max_segments, + int use_clustering, + int dma_host) +{ + unsigned int count; + unsigned int segment_size = 0; + Scsi_Device *SDpnt; + struct Scsi_Host *SHpnt; + + SDpnt = (Scsi_Device *) q->queuedata; + SHpnt = SDpnt->host; + +#ifdef DMA_CHUNK_SIZE + if (max_segments > 64) + max_segments = 64; +#endif + + if ((req->nr_sectors + (bh->b_size >> 9)) > SHpnt->max_sectors) + return 0; + + if (use_clustering) { + /* + * See if we can do this without creating another + * scatter-gather segment. In the event that this is a + * DMA capable host, make sure that a segment doesn't span + * the DMA threshold boundary. + */ + if (dma_host && + virt_to_phys(req->bhtail->b_data) - 1 == ISA_DMA_THRESHOLD) { + goto new_end_segment; + } + if (CONTIGUOUS_BUFFERS(req->bhtail, bh)) { +#ifdef DMA_SEGMENT_SIZE_LIMITED + if( dma_host + && virt_to_phys(bh->b_data) - 1 >= ISA_DMA_THRESHOLD ) { + segment_size = 0; + count = __count_segments(req, use_clustering, dma_host, &segment_size); + if( segment_size + bh->b_size > PAGE_SIZE ) { + goto new_end_segment; + } + } +#endif + /* + * This one is OK. Let it go. + */ + return 1; + } + } + new_end_segment: +#ifdef DMA_CHUNK_SIZE + if (MERGEABLE_BUFFERS(req->bhtail, bh)) + return scsi_new_mergeable(q, req, SHpnt, max_segments); +#endif + return scsi_new_segment(q, req, SHpnt, max_segments); +} + +__inline static int __scsi_front_merge_fn(request_queue_t * q, + struct request *req, + struct buffer_head *bh, + int max_segments, + int use_clustering, + int dma_host) +{ + unsigned int count; + unsigned int segment_size = 0; + Scsi_Device *SDpnt; + struct Scsi_Host *SHpnt; + + SDpnt = (Scsi_Device *) q->queuedata; + SHpnt = SDpnt->host; + +#ifdef DMA_CHUNK_SIZE + if (max_segments > 64) + max_segments = 64; +#endif + + if ((req->nr_sectors + (bh->b_size >> 9)) > SHpnt->max_sectors) + return 0; + + if (use_clustering) { + /* + * See if we can do this without creating another + * scatter-gather segment. In the event that this is a + * DMA capable host, make sure that a segment doesn't span + * the DMA threshold boundary. + */ + if (dma_host && + virt_to_phys(bh->b_data) - 1 == ISA_DMA_THRESHOLD) { + goto new_start_segment; + } + if (CONTIGUOUS_BUFFERS(bh, req->bh)) { +#ifdef DMA_SEGMENT_SIZE_LIMITED + if( dma_host + && virt_to_phys(bh->b_data) - 1 >= ISA_DMA_THRESHOLD ) { + segment_size = bh->b_size; + count = __count_segments(req, use_clustering, dma_host, &segment_size); + if( count != req->nr_segments ) { + goto new_start_segment; + } + } +#endif + /* + * This one is OK. Let it go. + */ + return 1; + } + } + new_start_segment: +#ifdef DMA_CHUNK_SIZE + if (MERGEABLE_BUFFERS(bh, req->bh)) + return scsi_new_mergeable(q, req, SHpnt, max_segments); +#endif + return scsi_new_segment(q, req, SHpnt, max_segments); +} + +/* + * Function: scsi_merge_fn_() + * + * Purpose: queue merge function. + * + * Arguments: q - Queue for which we are merging request. + * req - request into which we wish to merge. + * bh - Block which we may wish to merge into request + * + * Returns: 1 if it is OK to merge the block into the request. 0 + * if it is not OK. + * + * Lock status: io_request_lock is assumed to be held here. + * + * Notes: Optimized for different cases depending upon whether + * ISA DMA is in use and whether clustering should be used. + */ +#define MERGEFCT(_FUNCTION, _BACK_FRONT, _CLUSTER, _DMA) \ +static int _FUNCTION(request_queue_t * q, \ + struct request * req, \ + struct buffer_head * bh, \ + int max_segments) \ +{ \ + int ret; \ + SANITY_CHECK(req, _CLUSTER, _DMA); \ + ret = __scsi_ ## _BACK_FRONT ## _merge_fn(q, \ + req, \ + bh, \ + max_segments, \ + _CLUSTER, \ + _DMA); \ + return ret; \ +} + +/* Version with use_clustering 0 and dma_host 1 is not necessary, + * since the only use of dma_host above is protected by use_clustering. + */ +MERGEFCT(scsi_back_merge_fn_, back, 0, 0) +MERGEFCT(scsi_back_merge_fn_c, back, 1, 0) +MERGEFCT(scsi_back_merge_fn_dc, back, 1, 1) + +MERGEFCT(scsi_front_merge_fn_, front, 0, 0) +MERGEFCT(scsi_front_merge_fn_c, front, 1, 0) +MERGEFCT(scsi_front_merge_fn_dc, front, 1, 1) + +/* + * Function: __scsi_merge_requests_fn() + * + * Purpose: Prototype for queue merge function. + * + * Arguments: q - Queue for which we are merging request. + * req - request into which we wish to merge. + * next - 2nd request that we might want to combine with req + * use_clustering - 1 if this host wishes to use clustering + * dma_host - 1 if this host has ISA DMA issues (bus doesn't + * expose all of the address lines, so that DMA cannot + * be done from an arbitrary address). + * + * Returns: 1 if it is OK to merge the two requests. 0 + * if it is not OK. + * + * Lock status: io_request_lock is assumed to be held here. + * + * Notes: Some drivers have limited scatter-gather table sizes, and + * thus they cannot queue an infinitely large command. This + * function is called from ll_rw_blk before it attempts to merge + * a new block into a request to make sure that the request will + * not become too large. + * + * This function is not designed to be directly called. Instead + * it should be referenced from other functions where the + * use_clustering and dma_host parameters should be integer + * constants. The compiler should thus be able to properly + * optimize the code, eliminating stuff that is irrelevant. + * It is more maintainable to do this way with a single function + * than to have 4 separate functions all doing roughly the + * same thing. + */ +__inline static int __scsi_merge_requests_fn(request_queue_t * q, + struct request *req, + struct request *next, + int max_segments, + int use_clustering, + int dma_host) +{ + Scsi_Device *SDpnt; + struct Scsi_Host *SHpnt; + + /* + * First check if the either of the requests are re-queued + * requests. Can't merge them if they are. + */ + if (req->special || next->special) + return 0; + + SDpnt = (Scsi_Device *) q->queuedata; + SHpnt = SDpnt->host; + +#ifdef DMA_CHUNK_SIZE + if (max_segments > 64) + max_segments = 64; + + /* If it would not fit into prepared memory space for sg chain, + * then don't allow the merge. + */ + if (req->nr_segments + next->nr_segments - 1 > max_segments || + req->nr_segments + next->nr_segments - 1 > SHpnt->sg_tablesize) { + return 0; + } + if (req->nr_hw_segments + next->nr_hw_segments - 1 > SHpnt->sg_tablesize) { + return 0; + } +#else + /* + * If the two requests together are too large (even assuming that we + * can merge the boundary requests into one segment, then don't + * allow the merge. + */ + if (req->nr_segments + next->nr_segments - 1 > SHpnt->sg_tablesize) { + return 0; + } +#endif + + if ((req->nr_sectors + next->nr_sectors) > SHpnt->max_sectors) + return 0; + + /* + * The main question is whether the two segments at the boundaries + * would be considered one or two. + */ + if (use_clustering) { + /* + * See if we can do this without creating another + * scatter-gather segment. In the event that this is a + * DMA capable host, make sure that a segment doesn't span + * the DMA threshold boundary. + */ + if (dma_host && + virt_to_phys(req->bhtail->b_data) - 1 == ISA_DMA_THRESHOLD) { + goto dont_combine; + } +#ifdef DMA_SEGMENT_SIZE_LIMITED + /* + * We currently can only allocate scatter-gather bounce + * buffers in chunks of PAGE_SIZE or less. + */ + if (dma_host + && CONTIGUOUS_BUFFERS(req->bhtail, next->bh) + && virt_to_phys(req->bhtail->b_data) - 1 >= ISA_DMA_THRESHOLD ) + { + int segment_size = 0; + int count = 0; + + count = __count_segments(req, use_clustering, dma_host, &segment_size); + count += __count_segments(next, use_clustering, dma_host, &segment_size); + if( count != req->nr_segments + next->nr_segments ) { + goto dont_combine; + } + } +#endif + if (CONTIGUOUS_BUFFERS(req->bhtail, next->bh)) { + /* + * This one is OK. Let it go. + */ + req->nr_segments += next->nr_segments - 1; +#ifdef DMA_CHUNK_SIZE + req->nr_hw_segments += next->nr_hw_segments - 1; +#endif + return 1; + } + } + dont_combine: +#ifdef DMA_CHUNK_SIZE + if (req->nr_segments + next->nr_segments > max_segments || + req->nr_segments + next->nr_segments > SHpnt->sg_tablesize) { + return 0; + } + /* If dynamic DMA mapping can merge last segment in req with + * first segment in next, then the check for hw segments was + * done above already, so we can always merge. + */ + if (MERGEABLE_BUFFERS (req->bhtail, next->bh)) { + req->nr_hw_segments += next->nr_hw_segments - 1; + } else if (req->nr_hw_segments + next->nr_hw_segments > SHpnt->sg_tablesize) { + return 0; + } else { + req->nr_hw_segments += next->nr_hw_segments; + } + req->nr_segments += next->nr_segments; + return 1; +#else + /* + * We know that the two requests at the boundary should not be combined. + * Make sure we can fix something that is the sum of the two. + * A slightly stricter test than we had above. + */ + if (req->nr_segments + next->nr_segments > max_segments || + req->nr_segments + next->nr_segments > SHpnt->sg_tablesize) { + return 0; + } else { + /* + * This will form the start of a new segment. Bump the + * counter. + */ + req->nr_segments += next->nr_segments; + return 1; + } +#endif +} + +/* + * Function: scsi_merge_requests_fn_() + * + * Purpose: queue merge function. + * + * Arguments: q - Queue for which we are merging request. + * req - request into which we wish to merge. + * bh - Block which we may wish to merge into request + * + * Returns: 1 if it is OK to merge the block into the request. 0 + * if it is not OK. + * + * Lock status: io_request_lock is assumed to be held here. + * + * Notes: Optimized for different cases depending upon whether + * ISA DMA is in use and whether clustering should be used. + */ +#define MERGEREQFCT(_FUNCTION, _CLUSTER, _DMA) \ +static int _FUNCTION(request_queue_t * q, \ + struct request * req, \ + struct request * next, \ + int max_segments) \ +{ \ + int ret; \ + SANITY_CHECK(req, _CLUSTER, _DMA); \ + ret = __scsi_merge_requests_fn(q, req, next, max_segments, _CLUSTER, _DMA); \ + return ret; \ +} + +/* Version with use_clustering 0 and dma_host 1 is not necessary, + * since the only use of dma_host above is protected by use_clustering. + */ +MERGEREQFCT(scsi_merge_requests_fn_, 0, 0) +MERGEREQFCT(scsi_merge_requests_fn_c, 1, 0) +MERGEREQFCT(scsi_merge_requests_fn_dc, 1, 1) +/* + * Function: __init_io() + * + * Purpose: Prototype for io initialize function. + * + * Arguments: SCpnt - Command descriptor we wish to initialize + * sg_count_valid - 1 if the sg count in the req is valid. + * use_clustering - 1 if this host wishes to use clustering + * dma_host - 1 if this host has ISA DMA issues (bus doesn't + * expose all of the address lines, so that DMA cannot + * be done from an arbitrary address). + * + * Returns: 1 on success. + * + * Lock status: + * + * Notes: Only the SCpnt argument should be a non-constant variable. + * This function is designed in such a way that it will be + * invoked from a series of small stubs, each of which would + * be optimized for specific circumstances. + * + * The advantage of this is that hosts that don't do DMA + * get versions of the function that essentially don't have + * any of the DMA code. Same goes for clustering - in the + * case of hosts with no need for clustering, there is no point + * in a whole bunch of overhead. + * + * Finally, in the event that a host has set can_queue to SG_ALL + * implying that there is no limit to the length of a scatter + * gather list, the sg count in the request won't be valid + * (mainly because we don't need queue management functions + * which keep the tally uptodate. + */ +__inline static int __init_io(Scsi_Cmnd * SCpnt, + int sg_count_valid, + int use_clustering, + int dma_host) +{ + struct buffer_head * bh; + struct buffer_head * bhprev; + char * buff; + int count; + int i; + struct request * req; + int sectors; + struct scatterlist * sgpnt; + int this_count; + void ** bbpnt; + + /* + * FIXME(eric) - don't inline this - it doesn't depend on the + * integer flags. Come to think of it, I don't think this is even + * needed any more. Need to play with it and see if we hit the + * panic. If not, then don't bother. + */ + if (!SCpnt->request.bh) { + /* + * Case of page request (i.e. raw device), or unlinked buffer + * Typically used for swapping, but this isn't how we do + * swapping any more. + */ + panic("I believe this is dead code. If we hit this, I was wrong"); +#if 0 + SCpnt->request_bufflen = SCpnt->request.nr_sectors << 9; + SCpnt->request_buffer = SCpnt->request.buffer; + SCpnt->use_sg = 0; + /* + * FIXME(eric) - need to handle DMA here. + */ +#endif + return 1; + } + req = &SCpnt->request; + /* + * First we need to know how many scatter gather segments are needed. + */ + if (!sg_count_valid) { + count = __count_segments(req, use_clustering, dma_host, NULL); + } else { + count = req->nr_segments; + } + + /* + * If the dma pool is nearly empty, then queue a minimal request + * with a single segment. Typically this will satisfy a single + * buffer. + */ + if (dma_host && scsi_dma_free_sectors <= 10) { + this_count = SCpnt->request.current_nr_sectors; + goto single_segment; + } + /* + * Don't bother with scatter-gather if there is only one segment. + */ + if (count == 1) { + this_count = SCpnt->request.nr_sectors; + goto single_segment; + } + SCpnt->use_sg = count; + + /* + * Allocate the actual scatter-gather table itself. + */ + SCpnt->sglist_len = (SCpnt->use_sg * sizeof(struct scatterlist)); + + /* If we could potentially require ISA bounce buffers, allocate + * space for this array here. + */ + if (dma_host) + SCpnt->sglist_len += (SCpnt->use_sg * sizeof(void *)); + + /* scsi_malloc can only allocate in chunks of 512 bytes so + * round it up. + */ + SCpnt->sglist_len = (SCpnt->sglist_len + 511) & ~511; + + sgpnt = (struct scatterlist *) scsi_malloc(SCpnt->sglist_len); + + /* + * Now fill the scatter-gather table. + */ + if (!sgpnt) { + /* + * If we cannot allocate the scatter-gather table, then + * simply write the first buffer all by itself. + */ + printk("Warning - running *really* short on DMA buffers\n"); + this_count = SCpnt->request.current_nr_sectors; + goto single_segment; + } + /* + * Next, walk the list, and fill in the addresses and sizes of + * each segment. + */ + memset(sgpnt, 0, SCpnt->sglist_len); + SCpnt->request_buffer = (char *) sgpnt; + SCpnt->request_bufflen = 0; + bhprev = NULL; + + if (dma_host) + bbpnt = (void **) ((char *)sgpnt + + (SCpnt->use_sg * sizeof(struct scatterlist))); + else + bbpnt = NULL; + + SCpnt->bounce_buffers = bbpnt; + + for (count = 0, bh = SCpnt->request.bh; + bh; bh = bh->b_reqnext) { + if (use_clustering && bhprev != NULL) { + if (dma_host && + virt_to_phys(bhprev->b_data) - 1 == ISA_DMA_THRESHOLD) { + /* Nothing - fall through */ + } else if (CONTIGUOUS_BUFFERS(bhprev, bh)) { + /* + * This one is OK. Let it go. Note that we + * do not have the ability to allocate + * bounce buffer segments > PAGE_SIZE, so + * for now we limit the thing. + */ + if( dma_host ) { +#ifdef DMA_SEGMENT_SIZE_LIMITED + if( virt_to_phys(bh->b_data) - 1 < ISA_DMA_THRESHOLD + || sgpnt[count - 1].length + bh->b_size <= PAGE_SIZE ) { + sgpnt[count - 1].length += bh->b_size; + bhprev = bh; + continue; + } +#else + sgpnt[count - 1].length += bh->b_size; + bhprev = bh; + continue; +#endif + } else { + sgpnt[count - 1].length += bh->b_size; + SCpnt->request_bufflen += bh->b_size; + bhprev = bh; + continue; + } + } + } + count++; + sgpnt[count - 1].address = bh->b_data; + sgpnt[count - 1].page = NULL; + sgpnt[count - 1].length += bh->b_size; + if (!dma_host) { + SCpnt->request_bufflen += bh->b_size; + } + bhprev = bh; + } + + /* + * Verify that the count is correct. + */ + if (count != SCpnt->use_sg) { + printk("Incorrect number of segments after building list\n"); +#ifdef CONFIG_SCSI_DEBUG_QUEUES + dump_stats(req, use_clustering, dma_host, count); +#endif + } + if (!dma_host) { + return 1; + } + /* + * Now allocate bounce buffers, if needed. + */ + SCpnt->request_bufflen = 0; + for (i = 0; i < count; i++) { + sectors = (sgpnt[i].length >> 9); + SCpnt->request_bufflen += sgpnt[i].length; + if (virt_to_phys(sgpnt[i].address) + sgpnt[i].length - 1 > + ISA_DMA_THRESHOLD) { + if( scsi_dma_free_sectors - sectors <= 10 ) { + /* + * If this would nearly drain the DMA + * pool empty, then let's stop here. + * Don't make this request any larger. + * This is kind of a safety valve that + * we use - we could get screwed later + * on if we run out completely. + */ + SCpnt->request_bufflen -= sgpnt[i].length; + SCpnt->use_sg = i; + if (i == 0) { + goto big_trouble; + } + break; + } + + bbpnt[i] = sgpnt[i].address; + sgpnt[i].address = + (char *) scsi_malloc(sgpnt[i].length); + /* + * If we cannot allocate memory for this DMA bounce + * buffer, then queue just what we have done so far. + */ + if (sgpnt[i].address == NULL) { + printk("Warning - running low on DMA memory\n"); + SCpnt->request_bufflen -= sgpnt[i].length; + SCpnt->use_sg = i; + if (i == 0) { + goto big_trouble; + } + break; + } + if (SCpnt->request.cmd == WRITE) { + memcpy(sgpnt[i].address, bbpnt[i], + sgpnt[i].length); + } + } + } + return 1; + + big_trouble: + /* + * We come here in the event that we get one humongous + * request, where we need a bounce buffer, and the buffer is + * more than we can allocate in a single call to + * scsi_malloc(). In addition, we only come here when it is + * the 0th element of the scatter-gather table that gets us + * into this trouble. As a fallback, we fall back to + * non-scatter-gather, and ask for a single segment. We make + * a half-hearted attempt to pick a reasonably large request + * size mainly so that we don't thrash the thing with + * iddy-biddy requests. + */ + + /* + * The original number of sectors in the 0th element of the + * scatter-gather table. + */ + sectors = sgpnt[0].length >> 9; + + /* + * Free up the original scatter-gather table. Note that since + * it was the 0th element that got us here, we don't have to + * go in and free up memory from the other slots. + */ + SCpnt->request_bufflen = 0; + SCpnt->use_sg = 0; + scsi_free(SCpnt->request_buffer, SCpnt->sglist_len); + + /* + * Make an attempt to pick up as much as we reasonably can. + * Just keep adding sectors until the pool starts running kind of + * low. The limit of 30 is somewhat arbitrary - the point is that + * it would kind of suck if we dropped down and limited ourselves to + * single-block requests if we had hundreds of free sectors. + */ + if( scsi_dma_free_sectors > 30 ) { + for (this_count = 0, bh = SCpnt->request.bh; + bh; bh = bh->b_reqnext) { + if( scsi_dma_free_sectors - this_count < 30 + || this_count == sectors ) + { + break; + } + this_count += bh->b_size >> 9; + } + + } else { + /* + * Yow! Take the absolute minimum here. + */ + this_count = SCpnt->request.current_nr_sectors; + } + + /* + * Now drop through into the single-segment case. + */ + + single_segment: + /* + * Come here if for any reason we choose to do this as a single + * segment. Possibly the entire request, or possibly a small + * chunk of the entire request. + */ + bh = SCpnt->request.bh; + buff = SCpnt->request.buffer; + + if (dma_host) { + /* + * Allocate a DMA bounce buffer. If the allocation fails, fall + * back and allocate a really small one - enough to satisfy + * the first buffer. + */ + if (virt_to_phys(SCpnt->request.bh->b_data) + + (this_count << 9) - 1 > ISA_DMA_THRESHOLD) { + buff = (char *) scsi_malloc(this_count << 9); + if (!buff) { + printk("Warning - running low on DMA memory\n"); + this_count = SCpnt->request.current_nr_sectors; + buff = (char *) scsi_malloc(this_count << 9); + if (!buff) { + dma_exhausted(SCpnt, 0); + } + } + if (SCpnt->request.cmd == WRITE) + memcpy(buff, (char *) SCpnt->request.buffer, this_count << 9); + } + } + SCpnt->request_bufflen = this_count << 9; + SCpnt->request_buffer = buff; + SCpnt->use_sg = 0; + return 1; +} + +#define INITIO(_FUNCTION, _VALID, _CLUSTER, _DMA) \ +static int _FUNCTION(Scsi_Cmnd * SCpnt) \ +{ \ + return __init_io(SCpnt, _VALID, _CLUSTER, _DMA); \ +} + +/* + * ll_rw_blk.c now keeps track of the number of segments in + * a request. Thus we don't have to do it any more here. + * We always force "_VALID" to 1. Eventually clean this up + * and get rid of the extra argument. + */ +INITIO(scsi_init_io_v, 1, 0, 0) +INITIO(scsi_init_io_vd, 1, 0, 1) +INITIO(scsi_init_io_vc, 1, 1, 0) +INITIO(scsi_init_io_vdc, 1, 1, 1) + +/* + * Function: initialize_merge_fn() + * + * Purpose: Initialize merge function for a host + * + * Arguments: SHpnt - Host descriptor. + * + * Returns: Nothing. + * + * Lock status: + * + * Notes: + */ +void initialize_merge_fn(Scsi_Device * SDpnt) +{ + request_queue_t *q; + struct Scsi_Host *SHpnt; + SHpnt = SDpnt->host; + + q = &SDpnt->request_queue; + + /* + * If the host has already selected a merge manager, then don't + * pick a new one. + */ +#if 0 + if (q->back_merge_fn && q->front_merge_fn) + return; +#endif + /* + * If this host has an unlimited tablesize, then don't bother with a + * merge manager. The whole point of the operation is to make sure + * that requests don't grow too large, and this host isn't picky. + * + * Note that ll_rw_blk.c is effectively maintaining a segment + * count which is only valid if clustering is used, and it obviously + * doesn't handle the DMA case. In the end, it + * is simply easier to do it ourselves with our own functions + * rather than rely upon the default behavior of ll_rw_blk. + */ + if (!CLUSTERABLE_DEVICE(SHpnt, SDpnt) && SHpnt->unchecked_isa_dma == 0) { + q->back_merge_fn = scsi_back_merge_fn_; + q->front_merge_fn = scsi_front_merge_fn_; + q->merge_requests_fn = scsi_merge_requests_fn_; + SDpnt->scsi_init_io_fn = scsi_init_io_v; + } else if (!CLUSTERABLE_DEVICE(SHpnt, SDpnt) && SHpnt->unchecked_isa_dma != 0) { + q->back_merge_fn = scsi_back_merge_fn_; + q->front_merge_fn = scsi_front_merge_fn_; + q->merge_requests_fn = scsi_merge_requests_fn_; + SDpnt->scsi_init_io_fn = scsi_init_io_vd; + } else if (CLUSTERABLE_DEVICE(SHpnt, SDpnt) && SHpnt->unchecked_isa_dma == 0) { + q->back_merge_fn = scsi_back_merge_fn_c; + q->front_merge_fn = scsi_front_merge_fn_c; + q->merge_requests_fn = scsi_merge_requests_fn_c; + SDpnt->scsi_init_io_fn = scsi_init_io_vc; + } else if (CLUSTERABLE_DEVICE(SHpnt, SDpnt) && SHpnt->unchecked_isa_dma != 0) { + q->back_merge_fn = scsi_back_merge_fn_dc; + q->front_merge_fn = scsi_front_merge_fn_dc; + q->merge_requests_fn = scsi_merge_requests_fn_dc; + SDpnt->scsi_init_io_fn = scsi_init_io_vdc; + } +} diff --git a/xen-2.4.16/drivers/scsi/scsi_module.c b/xen-2.4.16/drivers/scsi/scsi_module.c new file mode 100644 index 0000000000..c14a8eef77 --- /dev/null +++ b/xen-2.4.16/drivers/scsi/scsi_module.c @@ -0,0 +1,71 @@ +/* + * scsi_module.c Copyright (1994, 1995) Eric Youngdale. + * + * Support for loading low-level scsi drivers using the linux kernel loadable + * module interface. + * + * To use, the host adapter should first define and initialize the variable + * driver_template (datatype Scsi_Host_Template), and then include this file. + * This should also be wrapped in a #ifdef MODULE/#endif. + * + * The low -level driver must also define a release function which will + * free any irq assignments, release any dma channels, release any I/O + * address space that might be reserved, and otherwise clean up after itself. + * The idea is that the same driver should be able to be reloaded without + * any difficulty. This makes debugging new drivers easier, as you should + * be able to load the driver, test it, unload, modify and reload. + * + * One *very* important caveat. If the driver may need to do DMA on the + * ISA bus, you must have unchecked_isa_dma set in the device template, + * even if this might be changed during the detect routine. This is + * because the shpnt structure will be allocated in a special way so that + * it will be below the appropriate DMA limit - thus if your driver uses + * the hostdata field of shpnt, and the board must be able to access this + * via DMA, the shpnt structure must be in a DMA accessible region of + * memory. This comment would be relevant for something like the buslogic + * driver where there are many boards, only some of which do DMA onto the + * ISA bus. There is no convenient way of specifying whether the host + * needs to be in a ISA DMA accessible region of memory when you call + * scsi_register. + */ + +#include +#include + +static int __init init_this_scsi_driver(void) +{ + driver_template.module = THIS_MODULE; + scsi_register_module(MODULE_SCSI_HA, &driver_template); + if (driver_template.present) + return 0; + + scsi_unregister_module(MODULE_SCSI_HA, &driver_template); + return -ENODEV; +} + +static void __exit exit_this_scsi_driver(void) +{ + scsi_unregister_module(MODULE_SCSI_HA, &driver_template); +} + +module_init(init_this_scsi_driver); +module_exit(exit_this_scsi_driver); + +/* + * Overrides for Emacs so that we almost follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-indent-level: 4 + * c-brace-imaginary-offset: 0 + * c-brace-offset: -4 + * c-argdecl-indent: 4 + * c-label-offset: -4 + * c-continued-statement-offset: 4 + * c-continued-brace-offset: 0 + * indent-tabs-mode: nil + * tab-width: 8 + * End: + */ diff --git a/xen-2.4.16/drivers/scsi/scsi_obsolete.c b/xen-2.4.16/drivers/scsi/scsi_obsolete.c new file mode 100644 index 0000000000..488bb26672 --- /dev/null +++ b/xen-2.4.16/drivers/scsi/scsi_obsolete.c @@ -0,0 +1,1162 @@ +/* + * scsi_obsolete.c Copyright (C) 1992 Drew Eckhardt + * Copyright (C) 1993, 1994, 1995 Eric Youngdale + * + * generic mid-level SCSI driver + * Initial versions: Drew Eckhardt + * Subsequent revisions: Eric Youngdale + * + * + * + * Bug correction thanks go to : + * Rik Faith + * Tommy Thorn + * Thomas Wuensche + * + * Modified by Eric Youngdale eric@andante.org to + * add scatter-gather, multiple outstanding request, and other + * enhancements. + * + * Native multichannel, wide scsi, /proc/scsi and hot plugging + * support added by Michael Neuffer + * + * Major improvements to the timeout, abort, and reset processing, + * as well as performance modifications for large queue depths by + * Leonard N. Zubkoff + * + * Improved compatibility with 2.0 behaviour by Manfred Spraul + * + */ + +/* + *######################################################################### + *######################################################################### + *######################################################################### + *######################################################################### + * NOTE - NOTE - NOTE - NOTE - NOTE - NOTE - NOTE + * + *######################################################################### + *######################################################################### + *######################################################################### + *######################################################################### + * + * This file contains the 'old' scsi error handling. It is only present + * while the new error handling code is being debugged, and while the low + * level drivers are being converted to use the new code. Once the last + * driver uses the new code this *ENTIRE* file will be nuked. + */ + +#define __NO_VERSION__ +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "scsi.h" +#include "hosts.h" +#include "constants.h" + +#undef USE_STATIC_SCSI_MEMORY + +/* + static const char RCSid[] = "$Header: /mnt/ide/home/eric/CVSROOT/linux/drivers/scsi/scsi_obsolete.c,v 1.1 1997/05/18 23:27:21 eric Exp $"; + */ + + +#define INTERNAL_ERROR (panic ("Internal error in file %s, line %d.\n", __FILE__, __LINE__)) + + +static int scsi_abort(Scsi_Cmnd *, int code); +static int scsi_reset(Scsi_Cmnd *, unsigned int); + +extern void scsi_old_done(Scsi_Cmnd * SCpnt); +int update_timeout(Scsi_Cmnd *, int); +extern void scsi_old_times_out(Scsi_Cmnd * SCpnt); + +extern int scsi_dispatch_cmd(Scsi_Cmnd * SCpnt); + +#define SCSI_BLOCK(HOST) (HOST->can_queue && HOST->host_busy >= HOST->can_queue) + +static unsigned char generic_sense[6] = +{REQUEST_SENSE, 0, 0, 0, 255, 0}; + +/* + * This is the number of clock ticks we should wait before we time out + * and abort the command. This is for where the scsi.c module generates + * the command, not where it originates from a higher level, in which + * case the timeout is specified there. + * + * ABORT_TIMEOUT and RESET_TIMEOUT are the timeouts for RESET and ABORT + * respectively. + */ + +#ifdef DEBUG_TIMEOUT +static void scsi_dump_status(void); +#endif + + +#ifdef DEBUG +#define SCSI_TIMEOUT (5*HZ) +#else +#define SCSI_TIMEOUT (2*HZ) +#endif + +#ifdef DEBUG +#define SENSE_TIMEOUT SCSI_TIMEOUT +#define ABORT_TIMEOUT SCSI_TIMEOUT +#define RESET_TIMEOUT SCSI_TIMEOUT +#else +#define SENSE_TIMEOUT (5*HZ/10) +#define RESET_TIMEOUT (5*HZ/10) +#define ABORT_TIMEOUT (5*HZ/10) +#endif + + +/* Do not call reset on error if we just did a reset within 15 sec. */ +#define MIN_RESET_PERIOD (15*HZ) + + + +/* + * Flag bits for the internal_timeout array + */ +#define IN_ABORT 1 +#define IN_RESET 2 +#define IN_RESET2 4 +#define IN_RESET3 8 + +/* + * This is our time out function, called when the timer expires for a + * given host adapter. It will attempt to abort the currently executing + * command, that failing perform a kernel panic. + */ + +void scsi_old_times_out(Scsi_Cmnd * SCpnt) +{ + unsigned long flags; + + spin_lock_irqsave(&io_request_lock, flags); + + /* Set the serial_number_at_timeout to the current serial_number */ + SCpnt->serial_number_at_timeout = SCpnt->serial_number; + + switch (SCpnt->internal_timeout & (IN_ABORT | IN_RESET | IN_RESET2 | IN_RESET3)) { + case NORMAL_TIMEOUT: + { +#ifdef DEBUG_TIMEOUT + scsi_dump_status(); +#endif + } + + if (!scsi_abort(SCpnt, DID_TIME_OUT)) + break; + case IN_ABORT: + printk("SCSI host %d abort (pid %ld) timed out - resetting\n", + SCpnt->host->host_no, SCpnt->pid); + if (!scsi_reset(SCpnt, SCSI_RESET_ASYNCHRONOUS)) + break; + case IN_RESET: + case (IN_ABORT | IN_RESET): + /* This might be controversial, but if there is a bus hang, + * you might conceivably want the machine up and running + * esp if you have an ide disk. + */ + printk("SCSI host %d channel %d reset (pid %ld) timed out - " + "trying harder\n", + SCpnt->host->host_no, SCpnt->channel, SCpnt->pid); + SCpnt->internal_timeout &= ~IN_RESET; + SCpnt->internal_timeout |= IN_RESET2; + scsi_reset(SCpnt, + SCSI_RESET_ASYNCHRONOUS | SCSI_RESET_SUGGEST_BUS_RESET); + break; + case IN_RESET2: + case (IN_ABORT | IN_RESET2): + /* Obviously the bus reset didn't work. + * Let's try even harder and call for an HBA reset. + * Maybe the HBA itself crashed and this will shake it loose. + */ + printk("SCSI host %d reset (pid %ld) timed out - trying to shake it loose\n", + SCpnt->host->host_no, SCpnt->pid); + SCpnt->internal_timeout &= ~(IN_RESET | IN_RESET2); + SCpnt->internal_timeout |= IN_RESET3; + scsi_reset(SCpnt, + SCSI_RESET_ASYNCHRONOUS | SCSI_RESET_SUGGEST_HOST_RESET); + break; + + default: + printk("SCSI host %d reset (pid %ld) timed out again -\n", + SCpnt->host->host_no, SCpnt->pid); + printk("probably an unrecoverable SCSI bus or device hang.\n"); + break; + + } + spin_unlock_irqrestore(&io_request_lock, flags); + +} + +/* + * From what I can find in scsi_obsolete.c, this function is only called + * by scsi_old_done and scsi_reset. Both of these functions run with the + * io_request_lock already held, so we need do nothing here about grabbing + * any locks. + */ +static void scsi_request_sense(Scsi_Cmnd * SCpnt) +{ + SCpnt->flags |= WAS_SENSE | ASKED_FOR_SENSE; + update_timeout(SCpnt, SENSE_TIMEOUT); + + + memcpy((void *) SCpnt->cmnd, (void *) generic_sense, + sizeof(generic_sense)); + memset((void *) SCpnt->sense_buffer, 0, + sizeof(SCpnt->sense_buffer)); + + if (SCpnt->device->scsi_level <= SCSI_2) + SCpnt->cmnd[1] = SCpnt->lun << 5; + SCpnt->cmnd[4] = sizeof(SCpnt->sense_buffer); + + SCpnt->request_buffer = &SCpnt->sense_buffer; + SCpnt->request_bufflen = sizeof(SCpnt->sense_buffer); + SCpnt->use_sg = 0; + SCpnt->cmd_len = COMMAND_SIZE(SCpnt->cmnd[0]); + SCpnt->result = 0; + SCpnt->sc_data_direction = SCSI_DATA_READ; + + /* + * Ugly, ugly. The newer interfaces all assume that the lock + * isn't held. Mustn't disappoint, or we deadlock the system. + */ + spin_unlock_irq(&io_request_lock); + scsi_dispatch_cmd(SCpnt); + spin_lock_irq(&io_request_lock); +} + + + + +static int check_sense(Scsi_Cmnd * SCpnt) +{ + /* If there is no sense information, request it. If we have already + * requested it, there is no point in asking again - the firmware must + * be confused. + */ + if (((SCpnt->sense_buffer[0] & 0x70) >> 4) != 7) { + if (!(SCpnt->flags & ASKED_FOR_SENSE)) + return SUGGEST_SENSE; + else + return SUGGEST_RETRY; + } + SCpnt->flags &= ~ASKED_FOR_SENSE; + +#ifdef DEBUG_INIT + printk("scsi%d, channel%d : ", SCpnt->host->host_no, SCpnt->channel); + print_sense("", SCpnt); + printk("\n"); +#endif + if (SCpnt->sense_buffer[2] & 0xe0) + return SUGGEST_ABORT; + + switch (SCpnt->sense_buffer[2] & 0xf) { + case NO_SENSE: + return 0; + case RECOVERED_ERROR: + return SUGGEST_IS_OK; + + case ABORTED_COMMAND: + return SUGGEST_RETRY; + case NOT_READY: + case UNIT_ATTENTION: + /* + * If we are expecting a CC/UA because of a bus reset that we + * performed, treat this just as a retry. Otherwise this is + * information that we should pass up to the upper-level driver + * so that we can deal with it there. + */ + if (SCpnt->device->expecting_cc_ua) { + SCpnt->device->expecting_cc_ua = 0; + return SUGGEST_RETRY; + } + return SUGGEST_ABORT; + + /* these three are not supported */ + case COPY_ABORTED: + case VOLUME_OVERFLOW: + case MISCOMPARE: + + case MEDIUM_ERROR: + return SUGGEST_REMAP; + case BLANK_CHECK: + case DATA_PROTECT: + case HARDWARE_ERROR: + case ILLEGAL_REQUEST: + default: + return SUGGEST_ABORT; + } +} + +/* This function is the mid-level interrupt routine, which decides how + * to handle error conditions. Each invocation of this function must + * do one and *only* one of the following: + * + * (1) Call last_cmnd[host].done. This is done for fatal errors and + * normal completion, and indicates that the handling for this + * request is complete. + * (2) Call internal_cmnd to requeue the command. This will result in + * scsi_done being called again when the retry is complete. + * (3) Call scsi_request_sense. This asks the host adapter/drive for + * more information about the error condition. When the information + * is available, scsi_done will be called again. + * (4) Call reset(). This is sort of a last resort, and the idea is that + * this may kick things loose and get the drive working again. reset() + * automatically calls scsi_request_sense, and thus scsi_done will be + * called again once the reset is complete. + * + * If none of the above actions are taken, the drive in question + * will hang. If more than one of the above actions are taken by + * scsi_done, then unpredictable behavior will result. + */ +void scsi_old_done(Scsi_Cmnd * SCpnt) +{ + int status = 0; + int exit = 0; + int checked; + int oldto; + struct Scsi_Host *host = SCpnt->host; + Scsi_Device * device = SCpnt->device; + int result = SCpnt->result; + SCpnt->serial_number = 0; + SCpnt->serial_number_at_timeout = 0; + oldto = update_timeout(SCpnt, 0); + +#ifdef DEBUG_TIMEOUT + if (result) + printk("Non-zero result in scsi_done %x %d:%d\n", + result, SCpnt->target, SCpnt->lun); +#endif + + /* If we requested an abort, (and we got it) then fix up the return + * status to say why + */ + if (host_byte(result) == DID_ABORT && SCpnt->abort_reason) + SCpnt->result = result = (result & 0xff00ffff) | + (SCpnt->abort_reason << 16); + + +#define CMD_FINISHED 0 +#define MAYREDO 1 +#define REDO 3 +#define PENDING 4 + +#ifdef DEBUG + printk("In scsi_done(host = %d, result = %06x)\n", host->host_no, result); +#endif + + if (SCpnt->flags & SYNC_RESET) { + /* + * The behaviou of scsi_reset(SYNC) was changed in 2.1.? . + * The scsi mid-layer does a REDO after every sync reset, the driver + * must not do that any more. In order to prevent old drivers from + * crashing, all scsi_done() calls during sync resets are ignored. + */ + printk("scsi%d: device driver called scsi_done() " + "for a synchronous reset.\n", SCpnt->host->host_no); + return; + } + if (SCpnt->flags & WAS_SENSE) { + SCpnt->use_sg = SCpnt->old_use_sg; + SCpnt->cmd_len = SCpnt->old_cmd_len; + SCpnt->sc_data_direction = SCpnt->sc_old_data_direction; + SCpnt->underflow = SCpnt->old_underflow; + } + switch (host_byte(result)) { + case DID_OK: + if (status_byte(result) && (SCpnt->flags & WAS_SENSE)) + /* Failed to obtain sense information */ + { + SCpnt->flags &= ~WAS_SENSE; +#if 0 /* This cannot possibly be correct. */ + SCpnt->internal_timeout &= ~SENSE_TIMEOUT; +#endif + + if (!(SCpnt->flags & WAS_RESET)) { + printk("scsi%d : channel %d target %d lun %d request sense" + " failed, performing reset.\n", + SCpnt->host->host_no, SCpnt->channel, SCpnt->target, + SCpnt->lun); + scsi_reset(SCpnt, SCSI_RESET_SYNCHRONOUS); + status = REDO; + break; + } else { + exit = (DRIVER_HARD | SUGGEST_ABORT); + status = CMD_FINISHED; + } + } else + switch (msg_byte(result)) { + case COMMAND_COMPLETE: + switch (status_byte(result)) { + case GOOD: + if (SCpnt->flags & WAS_SENSE) { +#ifdef DEBUG + printk("In scsi_done, GOOD status, COMMAND COMPLETE, " + "parsing sense information.\n"); +#endif + SCpnt->flags &= ~WAS_SENSE; +#if 0 /* This cannot possibly be correct. */ + SCpnt->internal_timeout &= ~SENSE_TIMEOUT; +#endif + + switch (checked = check_sense(SCpnt)) { + case SUGGEST_SENSE: + case 0: +#ifdef DEBUG + printk("NO SENSE. status = REDO\n"); +#endif + update_timeout(SCpnt, oldto); + status = REDO; + break; + case SUGGEST_IS_OK: + break; + case SUGGEST_REMAP: +#ifdef DEBUG + printk("SENSE SUGGEST REMAP - status = CMD_FINISHED\n"); +#endif + status = CMD_FINISHED; + exit = DRIVER_SENSE | SUGGEST_ABORT; + break; + case SUGGEST_RETRY: +#ifdef DEBUG + printk("SENSE SUGGEST RETRY - status = MAYREDO\n"); +#endif + status = MAYREDO; + exit = DRIVER_SENSE | SUGGEST_RETRY; + break; + case SUGGEST_ABORT: +#ifdef DEBUG + printk("SENSE SUGGEST ABORT - status = CMD_FINISHED"); +#endif + status = CMD_FINISHED; + exit = DRIVER_SENSE | SUGGEST_ABORT; + break; + default: + printk("Internal error %s %d \n", __FILE__, + __LINE__); + } + } + /* end WAS_SENSE */ + else { +#ifdef DEBUG + printk("COMMAND COMPLETE message returned, " + "status = CMD_FINISHED. \n"); +#endif + exit = DRIVER_OK; + status = CMD_FINISHED; + } + break; + + case CHECK_CONDITION: + case COMMAND_TERMINATED: + switch (check_sense(SCpnt)) { + case 0: + update_timeout(SCpnt, oldto); + status = REDO; + break; + case SUGGEST_REMAP: + status = CMD_FINISHED; + exit = DRIVER_SENSE | SUGGEST_ABORT; + break; + case SUGGEST_RETRY: + status = MAYREDO; + exit = DRIVER_SENSE | SUGGEST_RETRY; + break; + case SUGGEST_ABORT: + status = CMD_FINISHED; + exit = DRIVER_SENSE | SUGGEST_ABORT; + break; + case SUGGEST_SENSE: + scsi_request_sense(SCpnt); + status = PENDING; + break; + } + break; + + case CONDITION_GOOD: + case INTERMEDIATE_GOOD: + case INTERMEDIATE_C_GOOD: + break; + + case BUSY: + case QUEUE_FULL: + update_timeout(SCpnt, oldto); + status = REDO; + break; + + case RESERVATION_CONFLICT: + /* + * Most HAs will return an error for + * this, so usually reservation + * conflicts will be processed under + * DID_ERROR code + */ + printk("scsi%d (%d,%d,%d) : RESERVATION CONFLICT\n", + SCpnt->host->host_no, SCpnt->channel, + SCpnt->device->id, SCpnt->device->lun); + status = CMD_FINISHED; /* returns I/O error */ + break; + + default: + printk("Internal error %s %d \n" + "status byte = %d \n", __FILE__, + __LINE__, status_byte(result)); + + } + break; + default: + panic("scsi: unsupported message byte %d received\n", + msg_byte(result)); + } + break; + case DID_TIME_OUT: +#ifdef DEBUG + printk("Host returned DID_TIME_OUT - "); +#endif + + if (SCpnt->flags & WAS_TIMEDOUT) { +#ifdef DEBUG + printk("Aborting\n"); +#endif + /* + Allow TEST_UNIT_READY and INQUIRY commands to timeout early + without causing resets. All other commands should be retried. + */ + if (SCpnt->cmnd[0] != TEST_UNIT_READY && + SCpnt->cmnd[0] != INQUIRY) + status = MAYREDO; + exit = (DRIVER_TIMEOUT | SUGGEST_ABORT); + } else { +#ifdef DEBUG + printk("Retrying.\n"); +#endif + SCpnt->flags |= WAS_TIMEDOUT; + SCpnt->internal_timeout &= ~IN_ABORT; + status = REDO; + } + break; + case DID_BUS_BUSY: + case DID_PARITY: + status = REDO; + break; + case DID_NO_CONNECT: +#ifdef DEBUG + printk("Couldn't connect.\n"); +#endif + exit = (DRIVER_HARD | SUGGEST_ABORT); + break; + case DID_ERROR: + if (msg_byte(result) == COMMAND_COMPLETE && + status_byte(result) == RESERVATION_CONFLICT) { + printk("scsi%d (%d,%d,%d) : RESERVATION CONFLICT\n", + SCpnt->host->host_no, SCpnt->channel, + SCpnt->device->id, SCpnt->device->lun); + status = CMD_FINISHED; /* returns I/O error */ + break; + } + status = MAYREDO; + exit = (DRIVER_HARD | SUGGEST_ABORT); + break; + case DID_BAD_TARGET: + case DID_ABORT: + exit = (DRIVER_INVALID | SUGGEST_ABORT); + break; + case DID_RESET: + if (SCpnt->flags & IS_RESETTING) { + SCpnt->flags &= ~IS_RESETTING; + status = REDO; + break; + } + if (msg_byte(result) == GOOD && + status_byte(result) == CHECK_CONDITION) { + switch (check_sense(SCpnt)) { + case 0: + update_timeout(SCpnt, oldto); + status = REDO; + break; + case SUGGEST_REMAP: + case SUGGEST_RETRY: + status = MAYREDO; + exit = DRIVER_SENSE | SUGGEST_RETRY; + break; + case SUGGEST_ABORT: + status = CMD_FINISHED; + exit = DRIVER_SENSE | SUGGEST_ABORT; + break; + case SUGGEST_SENSE: + scsi_request_sense(SCpnt); + status = PENDING; + break; + } + } else { + status = REDO; + exit = SUGGEST_RETRY; + } + break; + default: + exit = (DRIVER_ERROR | SUGGEST_DIE); + } + + switch (status) { + case CMD_FINISHED: + case PENDING: + break; + case MAYREDO: +#ifdef DEBUG + printk("In MAYREDO, allowing %d retries, have %d\n", + SCpnt->allowed, SCpnt->retries); +#endif + if ((++SCpnt->retries) < SCpnt->allowed) { + if ((SCpnt->retries >= (SCpnt->allowed >> 1)) + && !(SCpnt->host->resetting && time_before(jiffies, SCpnt->host->last_reset + MIN_RESET_PERIOD)) + && !(SCpnt->flags & WAS_RESET)) { + printk("scsi%d channel %d : resetting for second half of retries.\n", + SCpnt->host->host_no, SCpnt->channel); + scsi_reset(SCpnt, SCSI_RESET_SYNCHRONOUS); + /* fall through to REDO */ + } + } else { + status = CMD_FINISHED; + break; + } + /* fall through to REDO */ + + case REDO: + + if (SCpnt->flags & WAS_SENSE) + scsi_request_sense(SCpnt); + else { + memcpy((void *) SCpnt->cmnd, + (void *) SCpnt->data_cmnd, + sizeof(SCpnt->data_cmnd)); + memset((void *) SCpnt->sense_buffer, 0, + sizeof(SCpnt->sense_buffer)); + SCpnt->request_buffer = SCpnt->buffer; + SCpnt->request_bufflen = SCpnt->bufflen; + SCpnt->use_sg = SCpnt->old_use_sg; + SCpnt->cmd_len = SCpnt->old_cmd_len; + SCpnt->sc_data_direction = SCpnt->sc_old_data_direction; + SCpnt->underflow = SCpnt->old_underflow; + SCpnt->result = 0; + /* + * Ugly, ugly. The newer interfaces all + * assume that the lock isn't held. Mustn't + * disappoint, or we deadlock the system. + */ + spin_unlock_irq(&io_request_lock); + scsi_dispatch_cmd(SCpnt); + spin_lock_irq(&io_request_lock); + } + break; + default: + INTERNAL_ERROR; + } + + if (status == CMD_FINISHED) { + Scsi_Request *SRpnt; +#ifdef DEBUG + printk("Calling done function - at address %p\n", SCpnt->done); +#endif + host->host_busy--; /* Indicate that we are free */ + device->device_busy--; /* Decrement device usage counter. */ + + SCpnt->result = result | ((exit & 0xff) << 24); + SCpnt->use_sg = SCpnt->old_use_sg; + SCpnt->cmd_len = SCpnt->old_cmd_len; + SCpnt->sc_data_direction = SCpnt->sc_old_data_direction; + SCpnt->underflow = SCpnt->old_underflow; + /* + * The upper layers assume the lock isn't held. We mustn't + * disappoint them. When the new error handling code is in + * use, the upper code is run from a bottom half handler, so + * it isn't an issue. + */ + spin_unlock_irq(&io_request_lock); + SRpnt = SCpnt->sc_request; + if( SRpnt != NULL ) { + SRpnt->sr_result = SRpnt->sr_command->result; + if( SRpnt->sr_result != 0 ) { + memcpy(SRpnt->sr_sense_buffer, + SRpnt->sr_command->sense_buffer, + sizeof(SRpnt->sr_sense_buffer)); + } + } + + SCpnt->done(SCpnt); + spin_lock_irq(&io_request_lock); + } +#undef CMD_FINISHED +#undef REDO +#undef MAYREDO +#undef PENDING +} + +/* + * The scsi_abort function interfaces with the abort() function of the host + * we are aborting, and causes the current command to not complete. The + * caller should deal with any error messages or status returned on the + * next call. + * + * This will not be called reentrantly for a given host. + */ + +/* + * Since we're nice guys and specified that abort() and reset() + * can be non-reentrant. The internal_timeout flags are used for + * this. + */ + + +static int scsi_abort(Scsi_Cmnd * SCpnt, int why) +{ + int oldto; + struct Scsi_Host *host = SCpnt->host; + + while (1) { + + /* + * Protect against races here. If the command is done, or we are + * on a different command forget it. + */ + if (SCpnt->serial_number != SCpnt->serial_number_at_timeout) { + return 0; + } + if (SCpnt->internal_timeout & IN_ABORT) { + spin_unlock_irq(&io_request_lock); + while (SCpnt->internal_timeout & IN_ABORT) + barrier(); + spin_lock_irq(&io_request_lock); + } else { + SCpnt->internal_timeout |= IN_ABORT; + oldto = update_timeout(SCpnt, ABORT_TIMEOUT); + + if ((SCpnt->flags & IS_RESETTING) && SCpnt->device->soft_reset) { + /* OK, this command must have died when we did the + * reset. The device itself must have lied. + */ + printk("Stale command on %d %d:%d appears to have died when" + " the bus was reset\n", + SCpnt->channel, SCpnt->target, SCpnt->lun); + } + if (!host->host_busy) { + SCpnt->internal_timeout &= ~IN_ABORT; + update_timeout(SCpnt, oldto); + return 0; + } + printk("scsi : aborting command due to timeout : pid %lu, scsi%d," + " channel %d, id %d, lun %d ", + SCpnt->pid, SCpnt->host->host_no, (int) SCpnt->channel, + (int) SCpnt->target, (int) SCpnt->lun); + print_command(SCpnt->cmnd); + if (SCpnt->serial_number != SCpnt->serial_number_at_timeout) + return 0; + SCpnt->abort_reason = why; + switch (host->hostt->abort(SCpnt)) { + /* We do not know how to abort. Try waiting another + * time increment and see if this helps. Set the + * WAS_TIMEDOUT flag set so we do not try this twice + */ + case SCSI_ABORT_BUSY: /* Tough call - returning 1 from + * this is too severe + */ + case SCSI_ABORT_SNOOZE: + if (why == DID_TIME_OUT) { + SCpnt->internal_timeout &= ~IN_ABORT; + if (SCpnt->flags & WAS_TIMEDOUT) { + return 1; /* Indicate we cannot handle this. + * We drop down into the reset handler + * and try again + */ + } else { + SCpnt->flags |= WAS_TIMEDOUT; + oldto = SCpnt->timeout_per_command; + update_timeout(SCpnt, oldto); + } + } + return 0; + case SCSI_ABORT_PENDING: + if (why != DID_TIME_OUT) { + update_timeout(SCpnt, oldto); + } + return 0; + case SCSI_ABORT_SUCCESS: + /* We should have already aborted this one. No + * need to adjust timeout + */ + SCpnt->internal_timeout &= ~IN_ABORT; + return 0; + case SCSI_ABORT_NOT_RUNNING: + SCpnt->internal_timeout &= ~IN_ABORT; + update_timeout(SCpnt, 0); + return 0; + case SCSI_ABORT_ERROR: + default: + SCpnt->internal_timeout &= ~IN_ABORT; + return 1; + } + } + } +} + + +/* Mark a single SCSI Device as having been reset. */ + +static inline void scsi_mark_device_reset(Scsi_Device * Device) +{ + Device->was_reset = 1; + Device->expecting_cc_ua = 1; +} + + +/* Mark all SCSI Devices on a specific Host as having been reset. */ + +void scsi_mark_host_reset(struct Scsi_Host *Host) +{ + Scsi_Cmnd *SCpnt; + Scsi_Device *SDpnt; + + for (SDpnt = Host->host_queue; SDpnt; SDpnt = SDpnt->next) { + for (SCpnt = SDpnt->device_queue; SCpnt; SCpnt = SCpnt->next) + scsi_mark_device_reset(SCpnt->device); + } +} + + +/* Mark all SCSI Devices on a specific Host Bus as having been reset. */ + +static void scsi_mark_bus_reset(struct Scsi_Host *Host, int channel) +{ + Scsi_Cmnd *SCpnt; + Scsi_Device *SDpnt; + + for (SDpnt = Host->host_queue; SDpnt; SDpnt = SDpnt->next) { + for (SCpnt = SDpnt->device_queue; SCpnt; SCpnt = SCpnt->next) + if (SCpnt->channel == channel) + scsi_mark_device_reset(SCpnt->device); + } +} + + +static int scsi_reset(Scsi_Cmnd * SCpnt, unsigned int reset_flags) +{ + int temp; + Scsi_Cmnd *SCpnt1; + Scsi_Device *SDpnt; + struct Scsi_Host *host = SCpnt->host; + + printk("SCSI bus is being reset for host %d channel %d.\n", + host->host_no, SCpnt->channel); + +#if 0 + /* + * First of all, we need to make a recommendation to the low-level + * driver as to whether a BUS_DEVICE_RESET should be performed, + * or whether we should do a full BUS_RESET. There is no simple + * algorithm here - we basically use a series of heuristics + * to determine what we should do. + */ + SCpnt->host->suggest_bus_reset = FALSE; + + /* + * First see if all of the active devices on the bus have + * been jammed up so that we are attempting resets. If so, + * then suggest a bus reset. Forcing a bus reset could + * result in some race conditions, but no more than + * you would usually get with timeouts. We will cross + * that bridge when we come to it. + * + * This is actually a pretty bad idea, since a sequence of + * commands will often timeout together and this will cause a + * Bus Device Reset followed immediately by a SCSI Bus Reset. + * If all of the active devices really are jammed up, the + * Bus Device Reset will quickly timeout and scsi_times_out + * will follow up with a SCSI Bus Reset anyway. + */ + SCpnt1 = host->host_queue; + while (SCpnt1) { + if (SCpnt1->request.rq_status != RQ_INACTIVE + && (SCpnt1->flags & (WAS_RESET | IS_RESETTING)) == 0) + break; + SCpnt1 = SCpnt1->next; + } + if (SCpnt1 == NULL) { + reset_flags |= SCSI_RESET_SUGGEST_BUS_RESET; + } + /* + * If the code that called us is suggesting a hard reset, then + * definitely request it. This usually occurs because a + * BUS_DEVICE_RESET times out. + * + * Passing reset_flags along takes care of this automatically. + */ + if (reset_flags & SCSI_RESET_SUGGEST_BUS_RESET) { + SCpnt->host->suggest_bus_reset = TRUE; + } +#endif + + while (1) { + + /* + * Protect against races here. If the command is done, or we are + * on a different command forget it. + */ + if (reset_flags & SCSI_RESET_ASYNCHRONOUS) + if (SCpnt->serial_number != SCpnt->serial_number_at_timeout) { + return 0; + } + if (SCpnt->internal_timeout & IN_RESET) { + spin_unlock_irq(&io_request_lock); + while (SCpnt->internal_timeout & IN_RESET) + barrier(); + spin_lock_irq(&io_request_lock); + } else { + SCpnt->internal_timeout |= IN_RESET; + update_timeout(SCpnt, RESET_TIMEOUT); + + if (reset_flags & SCSI_RESET_SYNCHRONOUS) + SCpnt->flags |= SYNC_RESET; + if (host->host_busy) { + for (SDpnt = host->host_queue; SDpnt; SDpnt = SDpnt->next) { + SCpnt1 = SDpnt->device_queue; + while (SCpnt1) { + if (SCpnt1->request.rq_status != RQ_INACTIVE) { +#if 0 + if (!(SCpnt1->flags & IS_RESETTING) && + !(SCpnt1->internal_timeout & IN_ABORT)) + scsi_abort(SCpnt1, DID_RESET); +#endif + SCpnt1->flags |= (WAS_RESET | IS_RESETTING); + } + SCpnt1 = SCpnt1->next; + } + } + + host->last_reset = jiffies; + host->resetting = 1; + /* + * I suppose that the host reset callback will not play + * with the resetting field. We have just set the resetting + * flag here. -arca + */ + temp = host->hostt->reset(SCpnt, reset_flags); + /* + This test allows the driver to introduce an additional bus + settle time delay by setting last_reset up to 20 seconds in + the future. In the normal case where the driver does not + modify last_reset, it must be assumed that the actual bus + reset occurred immediately prior to the return to this code, + and so last_reset must be updated to the current time, so + that the delay in internal_cmnd will guarantee at least a + MIN_RESET_DELAY bus settle time. + */ + if (host->last_reset - jiffies > 20UL * HZ) + host->last_reset = jiffies; + } else { + host->host_busy++; + host->last_reset = jiffies; + host->resetting = 1; + SCpnt->flags |= (WAS_RESET | IS_RESETTING); + /* + * I suppose that the host reset callback will not play + * with the resetting field. We have just set the resetting + * flag here. -arca + */ + temp = host->hostt->reset(SCpnt, reset_flags); + if (time_before(host->last_reset, jiffies) || + (time_after(host->last_reset, jiffies + 20 * HZ))) + host->last_reset = jiffies; + host->host_busy--; + } + if (reset_flags & SCSI_RESET_SYNCHRONOUS) + SCpnt->flags &= ~SYNC_RESET; + +#ifdef DEBUG + printk("scsi reset function returned %d\n", temp); +#endif + + /* + * Now figure out what we need to do, based upon + * what the low level driver said that it did. + * If the result is SCSI_RESET_SUCCESS, SCSI_RESET_PENDING, + * or SCSI_RESET_WAKEUP, then the low level driver did a + * bus device reset or bus reset, so we should go through + * and mark one or all of the devices on that bus + * as having been reset. + */ + switch (temp & SCSI_RESET_ACTION) { + case SCSI_RESET_SUCCESS: + if (temp & SCSI_RESET_HOST_RESET) + scsi_mark_host_reset(host); + else if (temp & SCSI_RESET_BUS_RESET) + scsi_mark_bus_reset(host, SCpnt->channel); + else + scsi_mark_device_reset(SCpnt->device); + SCpnt->internal_timeout &= ~(IN_RESET | IN_RESET2 | IN_RESET3); + return 0; + case SCSI_RESET_PENDING: + if (temp & SCSI_RESET_HOST_RESET) + scsi_mark_host_reset(host); + else if (temp & SCSI_RESET_BUS_RESET) + scsi_mark_bus_reset(host, SCpnt->channel); + else + scsi_mark_device_reset(SCpnt->device); + case SCSI_RESET_NOT_RUNNING: + return 0; + case SCSI_RESET_PUNT: + SCpnt->internal_timeout &= ~(IN_RESET | IN_RESET2 | IN_RESET3); + scsi_request_sense(SCpnt); + return 0; + case SCSI_RESET_WAKEUP: + if (temp & SCSI_RESET_HOST_RESET) + scsi_mark_host_reset(host); + else if (temp & SCSI_RESET_BUS_RESET) + scsi_mark_bus_reset(host, SCpnt->channel); + else + scsi_mark_device_reset(SCpnt->device); + SCpnt->internal_timeout &= ~(IN_RESET | IN_RESET2 | IN_RESET3); + scsi_request_sense(SCpnt); + /* + * If a bus reset was performed, we + * need to wake up each and every command + * that was active on the bus or if it was a HBA + * reset all active commands on all channels + */ + if (temp & SCSI_RESET_HOST_RESET) { + for (SDpnt = host->host_queue; SDpnt; SDpnt = SDpnt->next) { + SCpnt1 = SDpnt->device_queue; + while (SCpnt1) { + if (SCpnt1->request.rq_status != RQ_INACTIVE + && SCpnt1 != SCpnt) + scsi_request_sense(SCpnt1); + SCpnt1 = SCpnt1->next; + } + } + } else if (temp & SCSI_RESET_BUS_RESET) { + for (SDpnt = host->host_queue; SDpnt; SDpnt = SDpnt->next) { + SCpnt1 = SDpnt->device_queue; + while (SCpnt1) { + if (SCpnt1->request.rq_status != RQ_INACTIVE + && SCpnt1 != SCpnt + && SCpnt1->channel == SCpnt->channel) + scsi_request_sense(SCpnt); + SCpnt1 = SCpnt1->next; + } + } + } + return 0; + case SCSI_RESET_SNOOZE: + /* In this case, we set the timeout field to 0 + * so that this command does not time out any more, + * and we return 1 so that we get a message on the + * screen. + */ + SCpnt->internal_timeout &= ~(IN_RESET | IN_RESET2 | IN_RESET3); + update_timeout(SCpnt, 0); + /* If you snooze, you lose... */ + case SCSI_RESET_ERROR: + default: + return 1; + } + + return temp; + } + } +} + +/* + * The strategy is to cause the timer code to call scsi_times_out() + * when the soonest timeout is pending. + * The arguments are used when we are queueing a new command, because + * we do not want to subtract the time used from this time, but when we + * set the timer, we want to take this value into account. + */ + +int update_timeout(Scsi_Cmnd * SCset, int timeout) +{ + int rtn; + + /* + * We are using the new error handling code to actually register/deregister + * timers for timeout. + */ + + if (!timer_pending(&SCset->eh_timeout)) { + rtn = 0; + } else { + rtn = SCset->eh_timeout.expires - jiffies; + } + + if (timeout == 0) { + scsi_delete_timer(SCset); + } else { + scsi_add_timer(SCset, timeout, scsi_old_times_out); + } + + return rtn; +} + + +/* + * This function exports SCSI Bus, Device or Host reset capability + * and is for use with the SCSI generic driver. + */ +int +scsi_old_reset(Scsi_Cmnd *SCpnt, unsigned int flag) +{ + unsigned int old_flags = SCSI_RESET_SYNCHRONOUS; + + switch(flag) { + case SCSI_TRY_RESET_DEVICE: + /* no suggestion flags to add, device reset is default */ + break; + case SCSI_TRY_RESET_BUS: + old_flags |= SCSI_RESET_SUGGEST_BUS_RESET; + break; + case SCSI_TRY_RESET_HOST: + old_flags |= SCSI_RESET_SUGGEST_HOST_RESET; + break; + default: + return FAILED; + } + + if (scsi_reset(SCpnt, old_flags)) + return FAILED; + return SUCCESS; +} + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-indent-level: 4 + * c-brace-imaginary-offset: 0 + * c-brace-offset: -4 + * c-argdecl-indent: 4 + * c-label-offset: -4 + * c-continued-statement-offset: 4 + * c-continued-brace-offset: 0 + * indent-tabs-mode: nil + * tab-width: 8 + * End: + */ diff --git a/xen-2.4.16/drivers/scsi/scsi_obsolete.h b/xen-2.4.16/drivers/scsi/scsi_obsolete.h new file mode 100644 index 0000000000..abeacb996e --- /dev/null +++ b/xen-2.4.16/drivers/scsi/scsi_obsolete.h @@ -0,0 +1,106 @@ +/* + * scsi_obsolete.h Copyright (C) 1997 Eric Youngdale + * + */ + +#ifndef _SCSI_OBSOLETE_H +#define _SCSI_OBSOLETE_H + +/* + * These are the return codes for the abort and reset functions. The mid-level + * code uses these to decide what to do next. Each of the low level abort + * and reset functions must correctly indicate what it has done. + * The descriptions are written from the point of view of the mid-level code, + * so that the return code is telling the mid-level drivers exactly what + * the low level driver has already done, and what remains to be done. + */ + +/* We did not do anything. + * Wait some more for this command to complete, and if this does not work, + * try something more serious. */ +#define SCSI_ABORT_SNOOZE 0 + +/* This means that we were able to abort the command. We have already + * called the mid-level done function, and do not expect an interrupt that + * will lead to another call to the mid-level done function for this command */ +#define SCSI_ABORT_SUCCESS 1 + +/* We called for an abort of this command, and we should get an interrupt + * when this succeeds. Thus we should not restore the timer for this + * command in the mid-level abort function. */ +#define SCSI_ABORT_PENDING 2 + +/* Unable to abort - command is currently on the bus. Grin and bear it. */ +#define SCSI_ABORT_BUSY 3 + +/* The command is not active in the low level code. Command probably + * finished. */ +#define SCSI_ABORT_NOT_RUNNING 4 + +/* Something went wrong. The low level driver will indicate the correct + * error condition when it calls scsi_done, so the mid-level abort function + * can simply wait until this comes through */ +#define SCSI_ABORT_ERROR 5 + +/* We do not know how to reset the bus, or we do not want to. Bummer. + * Anyway, just wait a little more for the command in question, and hope that + * it eventually finishes. If it never finishes, the SCSI device could + * hang, so use this with caution. */ +#define SCSI_RESET_SNOOZE 0 + +/* We do not know how to reset the bus, or we do not want to. Bummer. + * We have given up on this ever completing. The mid-level code will + * request sense information to decide how to proceed from here. */ +#define SCSI_RESET_PUNT 1 + +/* This means that we were able to reset the bus. We have restarted all of + * the commands that should be restarted, and we should be able to continue + * on normally from here. We do not expect any interrupts that will return + * DID_RESET to any of the other commands in the host_queue, and the mid-level + * code does not need to do anything special to keep the commands alive. + * If a hard reset was performed then all outstanding commands on the + * bus have been restarted. */ +#define SCSI_RESET_SUCCESS 2 + +/* We called for a reset of this bus, and we should get an interrupt + * when this succeeds. Each command should get its own status + * passed up to scsi_done, but this has not happened yet. + * If a hard reset was performed, then we expect an interrupt + * for *each* of the outstanding commands that will have the + * effect of restarting the commands. + */ +#define SCSI_RESET_PENDING 3 + +/* We did a reset, but do not expect an interrupt to signal DID_RESET. + * This tells the upper level code to request the sense info, and this + * should keep the command alive. */ +#define SCSI_RESET_WAKEUP 4 + +/* The command is not active in the low level code. Command probably + finished. */ +#define SCSI_RESET_NOT_RUNNING 5 + +/* Something went wrong, and we do not know how to fix it. */ +#define SCSI_RESET_ERROR 6 + +#define SCSI_RESET_SYNCHRONOUS 0x01 +#define SCSI_RESET_ASYNCHRONOUS 0x02 +#define SCSI_RESET_SUGGEST_BUS_RESET 0x04 +#define SCSI_RESET_SUGGEST_HOST_RESET 0x08 +/* + * This is a bitmask that is ored with one of the above codes. + * It tells the mid-level code that we did a hard reset. + */ +#define SCSI_RESET_BUS_RESET 0x100 +/* + * This is a bitmask that is ored with one of the above codes. + * It tells the mid-level code that we did a host adapter reset. + */ +#define SCSI_RESET_HOST_RESET 0x200 +/* + * Used to mask off bits and to obtain the basic action that was + * performed. + */ +#define SCSI_RESET_ACTION 0xff + +#endif /* SCSI_OBSOLETE_H */ diff --git a/xen-2.4.16/drivers/scsi/scsi_proc.c b/xen-2.4.16/drivers/scsi/scsi_proc.c new file mode 100644 index 0000000000..01d6679250 --- /dev/null +++ b/xen-2.4.16/drivers/scsi/scsi_proc.c @@ -0,0 +1,329 @@ +/* + * linux/drivers/scsi/scsi_proc.c + * + * The functions in this file provide an interface between + * the PROC file system and the SCSI device drivers + * It is mainly used for debugging, statistics and to pass + * information directly to the lowlevel driver. + * + * (c) 1995 Michael Neuffer neuffer@goofy.zdv.uni-mainz.de + * Version: 0.99.8 last change: 95/09/13 + * + * generic command parser provided by: + * Andreas Heilwagen + * + * generic_proc_info() support of xxxx_info() by: + * Michael A. Griffith + */ + +#include /* for CONFIG_PROC_FS */ +#define __NO_VERSION__ +#include + +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "scsi.h" +#include "hosts.h" + +#ifndef TRUE +#define TRUE 1 +#define FALSE 0 +#endif + +#ifdef CONFIG_PROC_FS + +/* generic_proc_info + * Used if the driver currently has no own support for /proc/scsi + */ +int generic_proc_info(char *buffer, char **start, off_t offset, int length, + const char *(*info) (struct Scsi_Host *), + struct Scsi_Host *sh) +{ + int len, pos, begin; + + begin = 0; + if (info && sh) { + pos = len = sprintf(buffer, "%s\n", info(sh)); + } else { + pos = len = sprintf(buffer, + "The driver does not yet support the proc-fs\n"); + } + if (pos < offset) { + len = 0; + begin = pos; + } + *start = buffer + (offset - begin); /* Start of wanted data */ + len -= (offset - begin); + if (len > length) + len = length; + + return (len); +} + +/* dispatch_scsi_info is the central dispatcher + * It is the interface between the proc-fs and the SCSI subsystem code + */ +static int proc_scsi_read(char *buffer, char **start, off_t offset, + int length, int *eof, void *data) +{ + struct Scsi_Host *hpnt = data; + int n; + + if (hpnt->hostt->proc_info == NULL) + n = generic_proc_info(buffer, start, offset, length, + hpnt->hostt->info, hpnt); + else + n = (hpnt->hostt->proc_info(buffer, start, offset, + length, hpnt->host_no, 0)); + *eof = (nhostt->proc_info == NULL) + ret = -ENOSYS; + + if (count > PROC_BLOCK_SIZE) + return -EOVERFLOW; + + if (!(page = (char *) __get_free_page(GFP_KERNEL))) + return -ENOMEM; + if(copy_from_user(page, buf, count)) + { + free_page((ulong) page); + return -EFAULT; + } + + ret = hpnt->hostt->proc_info(page, &start, 0, count, + hpnt->host_no, 1); + + free_page((ulong) page); + return(ret); +} + +void build_proc_dir_entries(Scsi_Host_Template * tpnt) +{ + struct Scsi_Host *hpnt; + char name[10]; /* see scsi_unregister_host() */ + + tpnt->proc_dir = proc_mkdir(tpnt->proc_name, proc_scsi); + if (!tpnt->proc_dir) { + printk(KERN_ERR "Unable to proc_mkdir in scsi.c/build_proc_dir_entries"); + return; + } + tpnt->proc_dir->owner = tpnt->module; + + hpnt = scsi_hostlist; + while (hpnt) { + if (tpnt == hpnt->hostt) { + struct proc_dir_entry *p; + sprintf(name,"%d",hpnt->host_no); + p = create_proc_read_entry(name, + S_IFREG | S_IRUGO | S_IWUSR, + tpnt->proc_dir, + proc_scsi_read, + (void *)hpnt); + if (!p) + panic("Not enough memory to register SCSI HBA in /proc/scsi !\n"); + p->write_proc=proc_scsi_write; + p->owner = tpnt->module; + } + hpnt = hpnt->next; + } +} + +/* + * parseHandle *parseInit(char *buf, char *cmdList, int cmdNum); + * gets a pointer to a null terminated data buffer + * and a list of commands with blanks as delimiter + * in between. + * The commands have to be alphanumerically sorted. + * cmdNum has to contain the number of commands. + * On success, a pointer to a handle structure + * is returned, NULL on failure + * + * int parseOpt(parseHandle *handle, char **param); + * processes the next parameter. On success, the + * index of the appropriate command in the cmdList + * is returned, starting with zero. + * param points to the null terminated parameter string. + * On failure, -1 is returned. + * + * The databuffer buf may only contain pairs of commands + * options, separated by blanks: + * [ ]* + */ + +typedef struct { + char *buf, /* command buffer */ + *cmdList, /* command list */ + *bufPos, /* actual position */ + **cmdPos, /* cmdList index */ + cmdNum; /* cmd number */ +} parseHandle; + +inline int parseFree(parseHandle * handle) +{ /* free memory */ + kfree(handle->cmdPos); + kfree(handle); + + return -1; +} + +parseHandle *parseInit(char *buf, char *cmdList, int cmdNum) +{ + char *ptr; /* temp pointer */ + parseHandle *handle; /* new handle */ + + if (!buf || !cmdList) /* bad input ? */ + return NULL; + handle = (parseHandle *) kmalloc(sizeof(parseHandle), GFP_KERNEL); + if (!handle) + return NULL; /* out of memory */ + handle->cmdPos = (char **) kmalloc(sizeof(int) * cmdNum, GFP_KERNEL); + if (!handle->cmdPos) { + kfree(handle); + return NULL; /* out of memory */ + } + handle->buf = handle->bufPos = buf; /* init handle */ + handle->cmdList = cmdList; + handle->cmdNum = cmdNum; + + handle->cmdPos[cmdNum = 0] = cmdList; + for (ptr = cmdList; *ptr; ptr++) { /* scan command string */ + if (*ptr == ' ') { /* and insert zeroes */ + *ptr++ = 0; + handle->cmdPos[++cmdNum] = ptr++; + } + } + return handle; +} + +int parseOpt(parseHandle * handle, char **param) +{ + int cmdIndex = 0, cmdLen = 0; + char *startPos; + + if (!handle) /* invalid handle */ + return (parseFree(handle)); + /* skip spaces */ + for (; *(handle->bufPos) && *(handle->bufPos) == ' '; handle->bufPos++); + if (!*(handle->bufPos)) + return (parseFree(handle)); /* end of data */ + + startPos = handle->bufPos; /* store cmd start */ + for (; handle->cmdPos[cmdIndex][cmdLen] && *(handle->bufPos); handle->bufPos++) { /* no string end? */ + for (;;) { + if (*(handle->bufPos) == handle->cmdPos[cmdIndex][cmdLen]) + break; /* char matches ? */ + else if (memcmp(startPos, (char *) (handle->cmdPos[++cmdIndex]), cmdLen)) + return (parseFree(handle)); /* unknown command */ + + if (cmdIndex >= handle->cmdNum) + return (parseFree(handle)); /* unknown command */ + } + + cmdLen++; /* next char */ + } + + /* Get param. First skip all blanks, then insert zero after param */ + + for (; *(handle->bufPos) && *(handle->bufPos) == ' '; handle->bufPos++); + *param = handle->bufPos; + + for (; *(handle->bufPos) && *(handle->bufPos) != ' '; handle->bufPos++); + *(handle->bufPos++) = 0; + + return (cmdIndex); +} + +void proc_print_scsidevice(Scsi_Device * scd, char *buffer, int *size, int len) +{ + + int x, y = *size; + extern const char *const scsi_device_types[MAX_SCSI_DEVICE_CODE]; + + y = sprintf(buffer + len, + "Host: scsi%d Channel: %02d Id: %02d Lun: %02d\n Vendor: ", + scd->host->host_no, scd->channel, scd->id, scd->lun); + for (x = 0; x < 8; x++) { + if (scd->vendor[x] >= 0x20) + y += sprintf(buffer + len + y, "%c", scd->vendor[x]); + else + y += sprintf(buffer + len + y, " "); + } + y += sprintf(buffer + len + y, " Model: "); + for (x = 0; x < 16; x++) { + if (scd->model[x] >= 0x20) + y += sprintf(buffer + len + y, "%c", scd->model[x]); + else + y += sprintf(buffer + len + y, " "); + } + y += sprintf(buffer + len + y, " Rev: "); + for (x = 0; x < 4; x++) { + if (scd->rev[x] >= 0x20) + y += sprintf(buffer + len + y, "%c", scd->rev[x]); + else + y += sprintf(buffer + len + y, " "); + } + y += sprintf(buffer + len + y, "\n"); + + y += sprintf(buffer + len + y, " Type: %s ", + scd->type < MAX_SCSI_DEVICE_CODE ? + scsi_device_types[(int) scd->type] : "Unknown "); + y += sprintf(buffer + len + y, " ANSI" + " SCSI revision: %02x", (scd->scsi_level - 1) ? scd->scsi_level - 1 : 1); + if (scd->scsi_level == 2) + y += sprintf(buffer + len + y, " CCS\n"); + else + y += sprintf(buffer + len + y, "\n"); + + *size = y; + return; +} + +#else /* if !CONFIG_PROC_FS */ + +void proc_print_scsidevice(Scsi_Device * scd, char *buffer, int *size, int len) +{ +} + +#endif /* CONFIG_PROC_FS */ + +/* + * Overrides for Emacs so that we get a uniform tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-indent-level: 4 + * c-brace-imaginary-offset: 0 + * c-brace-offset: -4 + * c-argdecl-indent: 4 + * c-label-offset: -4 + * c-continued-statement-offset: 4 + * c-continued-brace-offset: 0 + * indent-tabs-mode: nil + * tab-width: 8 + * End: + */ diff --git a/xen-2.4.16/drivers/scsi/scsi_queue.c b/xen-2.4.16/drivers/scsi/scsi_queue.c new file mode 100644 index 0000000000..c41f1ce069 --- /dev/null +++ b/xen-2.4.16/drivers/scsi/scsi_queue.c @@ -0,0 +1,151 @@ +/* + * scsi_queue.c Copyright (C) 1997 Eric Youngdale + * + * generic mid-level SCSI queueing. + * + * The point of this is that we need to track when hosts are unable to + * accept a command because they are busy. In addition, we track devices + * that cannot accept a command because of a QUEUE_FULL condition. In both + * of these cases, we enter the command in the queue. At some later point, + * we attempt to remove commands from the queue and retry them. + */ + +#define __NO_VERSION__ +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define __KERNEL_SYSCALLS__ + +#include + +#include +#include +#include + +#include "scsi.h" +#include "hosts.h" +#include "constants.h" + +/* + * TODO: + * 1) Prevent multiple traversals of list to look for commands to + * queue. + * 2) Protect against multiple insertions of list at the same time. + * DONE: + * 1) Set state of scsi command to a new state value for ml queue. + * 2) Insert into queue when host rejects command. + * 3) Make sure status code is properly passed from low-level queue func + * so that internal_cmnd properly returns the right value. + * 4) Insert into queue when QUEUE_FULL. + * 5) Cull queue in bottom half handler. + * 6) Check usage count prior to queue insertion. Requeue if usage + * count is 0. + * 7) Don't send down any more commands if the host/device is busy. + */ + +static const char RCSid[] = "$Header: /mnt/ide/home/eric/CVSROOT/linux/drivers/scsi/scsi_queue.c,v 1.1 1997/10/21 11:16:38 eric Exp $"; + + +/* + * Function: scsi_mlqueue_insert() + * + * Purpose: Insert a command in the midlevel queue. + * + * Arguments: cmd - command that we are adding to queue. + * reason - why we are inserting command to queue. + * + * Lock status: Assumed that lock is not held upon entry. + * + * Returns: Nothing. + * + * Notes: We do this for one of two cases. Either the host is busy + * and it cannot accept any more commands for the time being, + * or the device returned QUEUE_FULL and can accept no more + * commands. + * Notes: This could be called either from an interrupt context or a + * normal process context. + */ +int scsi_mlqueue_insert(Scsi_Cmnd * cmd, int reason) +{ + struct Scsi_Host *host; + unsigned long flags; + + SCSI_LOG_MLQUEUE(1, printk("Inserting command %p into mlqueue\n", cmd)); + + /* + * We are inserting the command into the ml queue. First, we + * cancel the timer, so it doesn't time out. + */ + scsi_delete_timer(cmd); + + host = cmd->host; + + /* + * Next, set the appropriate busy bit for the device/host. + */ + if (reason == SCSI_MLQUEUE_HOST_BUSY) { + /* + * Protect against race conditions. If the host isn't busy, + * assume that something actually completed, and that we should + * be able to queue a command now. Note that there is an implicit + * assumption that every host can always queue at least one command. + * If a host is inactive and cannot queue any commands, I don't see + * how things could possibly work anyways. + */ + if (host->host_busy == 0) { + if (scsi_retry_command(cmd) == 0) { + return 0; + } + } + host->host_blocked = TRUE; + } else { + /* + * Protect against race conditions. If the device isn't busy, + * assume that something actually completed, and that we should + * be able to queue a command now. Note that there is an implicit + * assumption that every host can always queue at least one command. + * If a host is inactive and cannot queue any commands, I don't see + * how things could possibly work anyways. + */ + if (cmd->device->device_busy == 0) { + if (scsi_retry_command(cmd) == 0) { + return 0; + } + } + cmd->device->device_blocked = TRUE; + } + + /* + * Register the fact that we own the thing for now. + */ + cmd->state = SCSI_STATE_MLQUEUE; + cmd->owner = SCSI_OWNER_MIDLEVEL; + cmd->bh_next = NULL; + + /* + * Decrement the counters, since these commands are no longer + * active on the host/device. + */ + spin_lock_irqsave(&io_request_lock, flags); + cmd->host->host_busy--; + cmd->device->device_busy--; + spin_unlock_irqrestore(&io_request_lock, flags); + + /* + * Insert this command at the head of the queue for it's device. + * It will go before all other commands that are already in the queue. + */ + scsi_insert_special_cmd(cmd, 1); + return 0; +} diff --git a/xen-2.4.16/drivers/scsi/scsi_scan.c b/xen-2.4.16/drivers/scsi/scsi_scan.c new file mode 100644 index 0000000000..f6dbf406b1 --- /dev/null +++ b/xen-2.4.16/drivers/scsi/scsi_scan.c @@ -0,0 +1,894 @@ +/* + * scsi_scan.c Copyright (C) 2000 Eric Youngdale + * + * Bus scan logic. + * + * This used to live in scsi.c, but that file was just a laundry basket + * full of misc stuff. This got separated out in order to make things + * clearer. + */ + +#define __NO_VERSION__ +#include +#include +#include + +#include + +#include "scsi.h" +#include "hosts.h" +#include "constants.h" + +#ifdef CONFIG_KMOD +#include +#endif + +/* + * Flags for irregular SCSI devices that need special treatment + */ +#define BLIST_NOLUN 0x001 /* Don't scan for LUNs */ +#define BLIST_FORCELUN 0x002 /* Known to have LUNs, force sanning */ +#define BLIST_BORKEN 0x004 /* Flag for broken handshaking */ +#define BLIST_KEY 0x008 /* Needs to be unlocked by special command */ +#define BLIST_SINGLELUN 0x010 /* LUNs should better not be used in parallel */ +#define BLIST_NOTQ 0x020 /* Buggy Tagged Command Queuing */ +#define BLIST_SPARSELUN 0x040 /* Non consecutive LUN numbering */ +#define BLIST_MAX5LUN 0x080 /* Avoid LUNS >= 5 */ +#define BLIST_ISDISK 0x100 /* Treat as (removable) disk */ +#define BLIST_ISROM 0x200 /* Treat as (removable) CD-ROM */ +#define BLIST_LARGELUN 0x400 /* LUNs larger than 7 despite reporting as SCSI 2 */ + +static void print_inquiry(unsigned char *data); +static int scan_scsis_single(unsigned int channel, unsigned int dev, + unsigned int lun, int lun0_scsi_level, + unsigned int *max_scsi_dev, unsigned int *sparse_lun, + Scsi_Device ** SDpnt, struct Scsi_Host *shpnt, + char *scsi_result); +static int find_lun0_scsi_level(unsigned int channel, unsigned int dev, + struct Scsi_Host *shpnt); + +struct dev_info { + const char *vendor; + const char *model; + const char *revision; /* Latest revision known to be bad. Not used yet */ + unsigned flags; +}; + +/* + * This is what was previously known as the blacklist. The concept + * has been expanded so that we can specify other types of things we + * need to be aware of. + */ +static struct dev_info device_list[] = +{ +/* The following devices are known not to tolerate a lun != 0 scan for + * one reason or another. Some will respond to all luns, others will + * lock up. + */ + {"Aashima", "IMAGERY 2400SP", "1.03", BLIST_NOLUN}, /* Locks up if polled for lun != 0 */ + {"CHINON", "CD-ROM CDS-431", "H42", BLIST_NOLUN}, /* Locks up if polled for lun != 0 */ + {"CHINON", "CD-ROM CDS-535", "Q14", BLIST_NOLUN}, /* Locks up if polled for lun != 0 */ + {"DENON", "DRD-25X", "V", BLIST_NOLUN}, /* Locks up if probed for lun != 0 */ + {"HITACHI", "DK312C", "CM81", BLIST_NOLUN}, /* Responds to all lun - dtg */ + {"HITACHI", "DK314C", "CR21", BLIST_NOLUN}, /* responds to all lun */ + {"IMS", "CDD521/10", "2.06", BLIST_NOLUN}, /* Locks-up when LUN>0 polled. */ + {"MAXTOR", "XT-3280", "PR02", BLIST_NOLUN}, /* Locks-up when LUN>0 polled. */ + {"MAXTOR", "XT-4380S", "B3C", BLIST_NOLUN}, /* Locks-up when LUN>0 polled. */ + {"MAXTOR", "MXT-1240S", "I1.2", BLIST_NOLUN}, /* Locks up when LUN>0 polled */ + {"MAXTOR", "XT-4170S", "B5A", BLIST_NOLUN}, /* Locks-up sometimes when LUN>0 polled. */ + {"MAXTOR", "XT-8760S", "B7B", BLIST_NOLUN}, /* guess what? */ + {"MEDIAVIS", "RENO CD-ROMX2A", "2.03", BLIST_NOLUN}, /*Responds to all lun */ + {"NEC", "CD-ROM DRIVE:841", "1.0", BLIST_NOLUN}, /* Locks-up when LUN>0 polled. */ + {"PHILIPS", "PCA80SC", "V4-2", BLIST_NOLUN}, /* Responds to all lun */ + {"RODIME", "RO3000S", "2.33", BLIST_NOLUN}, /* Locks up if polled for lun != 0 */ + {"SANYO", "CRD-250S", "1.20", BLIST_NOLUN}, /* causes failed REQUEST SENSE on lun 1 + * for aha152x controller, which causes + * SCSI code to reset bus.*/ + {"SEAGATE", "ST157N", "\004|j", BLIST_NOLUN}, /* causes failed REQUEST SENSE on lun 1 + * for aha152x controller, which causes + * SCSI code to reset bus.*/ + {"SEAGATE", "ST296", "921", BLIST_NOLUN}, /* Responds to all lun */ + {"SEAGATE", "ST1581", "6538", BLIST_NOLUN}, /* Responds to all lun */ + {"SONY", "CD-ROM CDU-541", "4.3d", BLIST_NOLUN}, + {"SONY", "CD-ROM CDU-55S", "1.0i", BLIST_NOLUN}, + {"SONY", "CD-ROM CDU-561", "1.7x", BLIST_NOLUN}, + {"SONY", "CD-ROM CDU-8012", "*", BLIST_NOLUN}, + {"TANDBERG", "TDC 3600", "U07", BLIST_NOLUN}, /* Locks up if polled for lun != 0 */ + {"TEAC", "CD-R55S", "1.0H", BLIST_NOLUN}, /* Locks up if polled for lun != 0 */ + {"TEAC", "CD-ROM", "1.06", BLIST_NOLUN}, /* causes failed REQUEST SENSE on lun 1 + * for seagate controller, which causes + * SCSI code to reset bus.*/ + {"TEAC", "MT-2ST/45S2-27", "RV M", BLIST_NOLUN}, /* Responds to all lun */ + {"TEXEL", "CD-ROM", "1.06", BLIST_NOLUN}, /* causes failed REQUEST SENSE on lun 1 + * for seagate controller, which causes + * SCSI code to reset bus.*/ + {"QUANTUM", "LPS525S", "3110", BLIST_NOLUN}, /* Locks sometimes if polled for lun != 0 */ + {"QUANTUM", "PD1225S", "3110", BLIST_NOLUN}, /* Locks sometimes if polled for lun != 0 */ + {"QUANTUM", "FIREBALL ST4.3S", "0F0C", BLIST_NOLUN}, /* Locks up when polled for lun != 0 */ + {"MEDIAVIS", "CDR-H93MV", "1.31", BLIST_NOLUN}, /* Locks up if polled for lun != 0 */ + {"SANKYO", "CP525", "6.64", BLIST_NOLUN}, /* causes failed REQ SENSE, extra reset */ + {"HP", "C1750A", "3226", BLIST_NOLUN}, /* scanjet iic */ + {"HP", "C1790A", "", BLIST_NOLUN}, /* scanjet iip */ + {"HP", "C2500A", "", BLIST_NOLUN}, /* scanjet iicx */ + {"HP", "A6188A", "*", BLIST_SPARSELUN}, /* HP Va7100 Array */ + {"HP", "A6189A", "*", BLIST_SPARSELUN}, /* HP Va7400 Array */ + {"HP", "A6189B", "*", BLIST_SPARSELUN}, /* HP Va7410 Array */ + {"HP", "OPEN-", "*", BLIST_SPARSELUN}, /* HP XP Arrays */ + {"YAMAHA", "CDR100", "1.00", BLIST_NOLUN}, /* Locks up if polled for lun != 0 */ + {"YAMAHA", "CDR102", "1.00", BLIST_NOLUN}, /* Locks up if polled for lun != 0 + * extra reset */ + {"YAMAHA", "CRW8424S", "1.0", BLIST_NOLUN}, /* Locks up if polled for lun != 0 */ + {"YAMAHA", "CRW6416S", "1.0c", BLIST_NOLUN}, /* Locks up if polled for lun != 0 */ + {"MITSUMI", "CD-R CR-2201CS", "6119", BLIST_NOLUN}, /* Locks up if polled for lun != 0 */ + {"RELISYS", "Scorpio", "*", BLIST_NOLUN}, /* responds to all LUN */ + {"RELISYS", "VM3530+", "*", BLIST_NOLUN}, /* responds to all LUN */ + {"ACROSS", "", "*", BLIST_NOLUN}, /* responds to all LUN */ + {"MICROTEK", "ScanMaker II", "5.61", BLIST_NOLUN}, /* responds to all LUN */ + +/* + * Other types of devices that have special flags. + */ + {"SONY", "CD-ROM CDU-8001", "*", BLIST_BORKEN}, + {"TEXEL", "CD-ROM", "1.06", BLIST_BORKEN}, + {"IOMEGA", "Io20S *F", "*", BLIST_KEY}, + {"INSITE", "Floptical F*8I", "*", BLIST_KEY}, + {"INSITE", "I325VM", "*", BLIST_KEY}, + {"LASOUND","CDX7405","3.10", BLIST_MAX5LUN | BLIST_SINGLELUN}, + {"MICROP", "4110", "*", BLIST_NOTQ}, /* Buggy Tagged Queuing */ + {"NRC", "MBR-7", "*", BLIST_FORCELUN | BLIST_SINGLELUN}, + {"NRC", "MBR-7.4", "*", BLIST_FORCELUN | BLIST_SINGLELUN}, + {"REGAL", "CDC-4X", "*", BLIST_MAX5LUN | BLIST_SINGLELUN}, + {"NAKAMICH", "MJ-4.8S", "*", BLIST_FORCELUN | BLIST_SINGLELUN}, + {"NAKAMICH", "MJ-5.16S", "*", BLIST_FORCELUN | BLIST_SINGLELUN}, + {"PIONEER", "CD-ROM DRM-600", "*", BLIST_FORCELUN | BLIST_SINGLELUN}, + {"PIONEER", "CD-ROM DRM-602X", "*", BLIST_FORCELUN | BLIST_SINGLELUN}, + {"PIONEER", "CD-ROM DRM-604X", "*", BLIST_FORCELUN | BLIST_SINGLELUN}, + {"EMULEX", "MD21/S2 ESDI", "*", BLIST_SINGLELUN}, + {"CANON", "IPUBJD", "*", BLIST_SPARSELUN}, + {"nCipher", "Fastness Crypto", "*", BLIST_FORCELUN}, + {"DEC","HSG80","*", BLIST_FORCELUN}, + {"COMPAQ","LOGICAL VOLUME","*", BLIST_FORCELUN}, + {"COMPAQ","CR3500","*", BLIST_FORCELUN}, + {"NEC", "PD-1 ODX654P", "*", BLIST_FORCELUN | BLIST_SINGLELUN}, + {"MATSHITA", "PD-1", "*", BLIST_FORCELUN | BLIST_SINGLELUN}, + {"iomega", "jaz 1GB", "J.86", BLIST_NOTQ | BLIST_NOLUN}, + {"TOSHIBA","CDROM","*", BLIST_ISROM}, + {"TOSHIBA","CD-ROM","*", BLIST_ISROM}, + {"MegaRAID", "LD", "*", BLIST_FORCELUN}, + {"DGC", "RAID", "*", BLIST_SPARSELUN | BLIST_LARGELUN}, // Dell PV 650F (tgt @ LUN 0) + {"DGC", "DISK", "*", BLIST_SPARSELUN | BLIST_LARGELUN}, // Dell PV 650F (no tgt @ LUN 0) + {"DELL", "PV660F", "*", BLIST_SPARSELUN | BLIST_LARGELUN}, + {"DELL", "PV660F PSEUDO", "*", BLIST_SPARSELUN | BLIST_LARGELUN}, + {"DELL", "PSEUDO DEVICE .", "*", BLIST_SPARSELUN | BLIST_LARGELUN}, // Dell PV 530F + {"DELL", "PV530F", "*", BLIST_SPARSELUN | BLIST_LARGELUN}, // Dell PV 530F + {"EMC", "SYMMETRIX", "*", BLIST_SPARSELUN | BLIST_LARGELUN | BLIST_FORCELUN}, + {"HP", "A6189A", "*", BLIST_SPARSELUN | BLIST_LARGELUN}, // HP VA7400, by Alar Aun + {"CMD", "CRA-7280", "*", BLIST_SPARSELUN | BLIST_LARGELUN}, // CMD RAID Controller + {"CNSI", "G7324", "*", BLIST_SPARSELUN | BLIST_LARGELUN}, // Chaparral G7324 RAID + {"CNSi", "G8324", "*", BLIST_SPARSELUN}, // Chaparral G8324 RAID + {"Zzyzx", "RocketStor 500S", "*", BLIST_SPARSELUN}, + {"Zzyzx", "RocketStor 2000", "*", BLIST_SPARSELUN}, + {"SONY", "TSL", "*", BLIST_FORCELUN}, // DDS3 & DDS4 autoloaders + {"DELL", "PERCRAID", "*", BLIST_FORCELUN}, + {"HP", "NetRAID-4M", "*", BLIST_FORCELUN}, + {"ADAPTEC", "AACRAID", "*", BLIST_FORCELUN}, + {"ADAPTEC", "Adaptec 5400S", "*", BLIST_FORCELUN}, + {"COMPAQ", "MSA1000", "*", BLIST_FORCELUN}, + {"HP", "C1557A", "*", BLIST_FORCELUN}, + {"IBM", "AuSaV1S2", "*", BLIST_FORCELUN}, + {"FSC", "CentricStor", "*", BLIST_SPARSELUN | BLIST_LARGELUN}, + {"DDN", "SAN DataDirector", "*", BLIST_SPARSELUN}, + {"HITACHI", "DF400", "*", BLIST_SPARSELUN}, + {"HITACHI", "DF500", "*", BLIST_SPARSELUN}, + {"HITACHI", "DF600", "*", BLIST_SPARSELUN}, + + /* + * Must be at end of list... + */ + {NULL, NULL, NULL} +}; + +#define MAX_SCSI_LUNS 0xFFFFFFFF + +#ifdef CONFIG_SCSI_MULTI_LUN +static unsigned int max_scsi_luns = MAX_SCSI_LUNS; +#else +static unsigned int max_scsi_luns = 1; +#endif + +#ifdef MODULE + +MODULE_PARM(max_scsi_luns, "i"); +MODULE_PARM_DESC(max_scsi_luns, "last scsi LUN (should be between 1 and 2^32-1)"); + +#else + +static int __init scsi_luns_setup(char *str) +{ + unsigned int tmp; + + if (get_option(&str, &tmp) == 1) { + max_scsi_luns = tmp; + return 1; + } else { + printk("scsi_luns_setup : usage max_scsi_luns=n " + "(n should be between 1 and 2^32-1)\n"); + return 0; + } +} + +__setup("max_scsi_luns=", scsi_luns_setup); + +#endif + +static void print_inquiry(unsigned char *data) +{ + int i; + + printk(" Vendor: "); + for (i = 8; i < 16; i++) { + if (data[i] >= 0x20 && i < data[4] + 5) + printk("%c", data[i]); + else + printk(" "); + } + + printk(" Model: "); + for (i = 16; i < 32; i++) { + if (data[i] >= 0x20 && i < data[4] + 5) + printk("%c", data[i]); + else + printk(" "); + } + + printk(" Rev: "); + for (i = 32; i < 36; i++) { + if (data[i] >= 0x20 && i < data[4] + 5) + printk("%c", data[i]); + else + printk(" "); + } + + printk("\n"); + + i = data[0] & 0x1f; + + printk(" Type: %s ", + i < MAX_SCSI_DEVICE_CODE ? scsi_device_types[i] : "Unknown "); + printk(" ANSI SCSI revision: %02x", data[2] & 0x07); + if ((data[2] & 0x07) == 1 && (data[3] & 0x0f) == 1) + printk(" CCS\n"); + else + printk("\n"); +} + +static int get_device_flags(unsigned char *response_data) +{ + int i = 0; + unsigned char *pnt; + for (i = 0; 1; i++) { + if (device_list[i].vendor == NULL) + return 0; + pnt = &response_data[8]; + while (*pnt && *pnt == ' ') + pnt++; + if (memcmp(device_list[i].vendor, pnt, + strlen(device_list[i].vendor))) + continue; + pnt = &response_data[16]; + while (*pnt && *pnt == ' ') + pnt++; + if (memcmp(device_list[i].model, pnt, + strlen(device_list[i].model))) + continue; + return device_list[i].flags; + } + return 0; +} + +/* + * Detecting SCSI devices : + * We scan all present host adapter's busses, from ID 0 to ID (max_id). + * We use the INQUIRY command, determine device type, and pass the ID / + * lun address of all sequential devices to the tape driver, all random + * devices to the disk driver. + */ +void scan_scsis(struct Scsi_Host *shpnt, + uint hardcoded, + uint hchannel, + uint hid, + uint hlun) +{ + uint channel; + unsigned int dev; + unsigned int lun; + unsigned int max_dev_lun; + unsigned char *scsi_result; + unsigned char scsi_result0[256]; + Scsi_Device *SDpnt; + Scsi_Device *SDtail; + unsigned int sparse_lun; + int lun0_sl; + + scsi_result = NULL; + + SDpnt = (Scsi_Device *) kmalloc(sizeof(Scsi_Device), + GFP_ATOMIC); + if (SDpnt) { + memset(SDpnt, 0, sizeof(Scsi_Device)); + /* + * Register the queue for the device. All I/O requests will + * come in through here. We also need to register a pointer to + * ourselves, since the queue handler won't know what device + * the queue actually represents. We could look it up, but it + * is pointless work. + */ + scsi_initialize_queue(SDpnt, shpnt); + SDpnt->request_queue.queuedata = (void *) SDpnt; + /* Make sure we have something that is valid for DMA purposes */ + scsi_result = ((!shpnt->unchecked_isa_dma) + ? &scsi_result0[0] : kmalloc(512, GFP_DMA)); + } + + if (scsi_result == NULL) { + printk("Unable to obtain scsi_result buffer\n"); + goto leave; + } + /* + * We must chain ourself in the host_queue, so commands can time out + */ + SDpnt->queue_depth = 1; + SDpnt->host = shpnt; + SDpnt->online = TRUE; + + initialize_merge_fn(SDpnt); + + /* + * Initialize the object that we will use to wait for command blocks. + */ + init_waitqueue_head(&SDpnt->scpnt_wait); + + /* + * Next, hook the device to the host in question. + */ + SDpnt->prev = NULL; + SDpnt->next = NULL; + if (shpnt->host_queue != NULL) { + SDtail = shpnt->host_queue; + while (SDtail->next != NULL) + SDtail = SDtail->next; + + SDtail->next = SDpnt; + SDpnt->prev = SDtail; + } else { + shpnt->host_queue = SDpnt; + } + + /* + * We need to increment the counter for this one device so we can track + * when things are quiet. + */ + if (hardcoded == 1) { + Scsi_Device *oldSDpnt = SDpnt; + struct Scsi_Device_Template *sdtpnt; + channel = hchannel; + if (channel > shpnt->max_channel) + goto leave; + dev = hid; + if (dev >= shpnt->max_id) + goto leave; + lun = hlun; + if (lun >= shpnt->max_lun) + goto leave; + if ((0 == lun) || (lun > 7)) + lun0_sl = SCSI_3; /* actually don't care for 0 == lun */ + else + lun0_sl = find_lun0_scsi_level(channel, dev, shpnt); + scan_scsis_single(channel, dev, lun, lun0_sl, &max_dev_lun, + &sparse_lun, &SDpnt, shpnt, scsi_result); + if (SDpnt != oldSDpnt) { + + /* it could happen the blockdevice hasn't yet been inited */ + /* queue_depth() moved from scsi_proc_info() so that + it is called before scsi_build_commandblocks() */ + if (shpnt->select_queue_depths != NULL) + (shpnt->select_queue_depths)(shpnt, + shpnt->host_queue); + + for (sdtpnt = scsi_devicelist; sdtpnt; sdtpnt = sdtpnt->next) + if (sdtpnt->init && sdtpnt->dev_noticed) + (*sdtpnt->init) (); + + for (sdtpnt = scsi_devicelist; sdtpnt; sdtpnt = sdtpnt->next) { + if (sdtpnt->attach) { + (*sdtpnt->attach) (oldSDpnt); + if (oldSDpnt->attached) { + scsi_build_commandblocks(oldSDpnt); + if (0 == oldSDpnt->has_cmdblocks) { + printk("scan_scsis: DANGER, no command blocks\n"); + /* What to do now ?? */ + } + } + } + } + scsi_resize_dma_pool(); + + for (sdtpnt = scsi_devicelist; sdtpnt; sdtpnt = sdtpnt->next) { + if (sdtpnt->finish && sdtpnt->nr_dev) { + (*sdtpnt->finish) (); + } + } + } + } else { + /* Actual LUN. PC ordering is 0->n IBM/spec ordering is n->0 */ + int order_dev; + + for (channel = 0; channel <= shpnt->max_channel; channel++) { + for (dev = 0; dev < shpnt->max_id; ++dev) { + if (shpnt->reverse_ordering) + /* Shift to scanning 15,14,13... or 7,6,5,4, */ + order_dev = shpnt->max_id - dev - 1; + else + order_dev = dev; + + if (shpnt->this_id != order_dev) { + + /* + * We need the for so our continue, etc. work fine. We put this in + * a variable so that we can override it during the scan if we + * detect a device *KNOWN* to have multiple logical units. + */ + max_dev_lun = (max_scsi_luns < shpnt->max_lun ? + max_scsi_luns : shpnt->max_lun); + sparse_lun = 0; + for (lun = 0, lun0_sl = SCSI_2; lun < max_dev_lun; ++lun) { + /* don't probe further for luns > 7 for targets <= SCSI_2 */ + if ((lun0_sl < SCSI_3) && (lun > 7)) + break; + + if (!scan_scsis_single(channel, order_dev, lun, lun0_sl, + &max_dev_lun, &sparse_lun, &SDpnt, shpnt, + scsi_result) + && !sparse_lun) + break; /* break means don't probe further for luns!=0 */ + if (SDpnt && (0 == lun)) { + int bflags = get_device_flags (scsi_result); + if (bflags & BLIST_LARGELUN) + lun0_sl = SCSI_3; /* treat as SCSI 3 */ + else + lun0_sl = SDpnt->scsi_level; + } + } /* for lun ends */ + } /* if this_id != id ends */ + } /* for dev ends */ + } /* for channel ends */ + } /* if/else hardcoded */ + + leave: + + { /* Unchain SRpnt from host_queue */ + Scsi_Device *prev, *next; + Scsi_Device *dqptr; + + for (dqptr = shpnt->host_queue; dqptr != SDpnt; dqptr = dqptr->next) + continue; + if (dqptr) { + prev = dqptr->prev; + next = dqptr->next; + if (prev) + prev->next = next; + else + shpnt->host_queue = next; + if (next) + next->prev = prev; + } + } + + /* Last device block does not exist. Free memory. */ + if (SDpnt != NULL) { + blk_cleanup_queue(&SDpnt->request_queue); + kfree((char *) SDpnt); + } + + /* If we allocated a buffer so we could do DMA, free it now */ + if (scsi_result != &scsi_result0[0] && scsi_result != NULL) { + kfree(scsi_result); + } { + Scsi_Device *sdev; + Scsi_Cmnd *scmd; + + SCSI_LOG_SCAN_BUS(4, printk("Host status for host %p:\n", shpnt)); + for (sdev = shpnt->host_queue; sdev; sdev = sdev->next) { + SCSI_LOG_SCAN_BUS(4, printk("Device %d %p: ", sdev->id, sdev)); + for (scmd = sdev->device_queue; scmd; scmd = scmd->next) { + SCSI_LOG_SCAN_BUS(4, printk("%p ", scmd)); + } + SCSI_LOG_SCAN_BUS(4, printk("\n")); + } + } +} + +/* + * The worker for scan_scsis. + * Returning 0 means Please don't ask further for lun!=0, 1 means OK go on. + * Global variables used : scsi_devices(linked list) + */ +static int scan_scsis_single(unsigned int channel, unsigned int dev, + unsigned int lun, int lun0_scsi_level, + unsigned int *max_dev_lun, unsigned int *sparse_lun, + Scsi_Device ** SDpnt2, struct Scsi_Host *shpnt, + char *scsi_result) +{ + char devname[64]; + unsigned char scsi_cmd[MAX_COMMAND_SIZE]; + struct Scsi_Device_Template *sdtpnt; + Scsi_Device *SDtail, *SDpnt = *SDpnt2; + Scsi_Request * SRpnt; + int bflags, type = -1; + extern devfs_handle_t scsi_devfs_handle; + int scsi_level; + + SDpnt->host = shpnt; + SDpnt->id = dev; + SDpnt->lun = lun; + SDpnt->channel = channel; + SDpnt->online = TRUE; + + scsi_build_commandblocks(SDpnt); + + /* Some low level driver could use device->type (DB) */ + SDpnt->type = -1; + + /* + * Assume that the device will have handshaking problems, and then fix + * this field later if it turns out it doesn't + */ + SDpnt->borken = 1; + SDpnt->was_reset = 0; + SDpnt->expecting_cc_ua = 0; + SDpnt->starved = 0; + + if (NULL == (SRpnt = scsi_allocate_request(SDpnt))) { + printk("scan_scsis_single: no memory\n"); + return 0; + } + + /* + * We used to do a TEST_UNIT_READY before the INQUIRY but that was + * not really necessary. Spec recommends using INQUIRY to scan for + * devices (and TEST_UNIT_READY to poll for media change). - Paul G. + */ + + SCSI_LOG_SCAN_BUS(3, printk("scsi: performing INQUIRY\n")); + /* + * Build an INQUIRY command block. + */ + scsi_cmd[0] = INQUIRY; + if ((lun > 0) && (lun0_scsi_level <= SCSI_2)) + scsi_cmd[1] = (lun << 5) & 0xe0; + else + scsi_cmd[1] = 0; /* SCSI_3 and higher, don't touch */ + scsi_cmd[2] = 0; + scsi_cmd[3] = 0; + scsi_cmd[4] = 255; + scsi_cmd[5] = 0; + SRpnt->sr_cmd_len = 0; + SRpnt->sr_data_direction = SCSI_DATA_READ; + + scsi_wait_req (SRpnt, (void *) scsi_cmd, + (void *) scsi_result, + 256, SCSI_TIMEOUT+4*HZ, 3); + + SCSI_LOG_SCAN_BUS(3, printk("scsi: INQUIRY %s with code 0x%x\n", + SRpnt->sr_result ? "failed" : "successful", SRpnt->sr_result)); + + /* + * Now that we don't do TEST_UNIT_READY anymore, we must be prepared + * for media change conditions here, so cannot require zero result. + */ + if (SRpnt->sr_result) { + if ((driver_byte(SRpnt->sr_result) & DRIVER_SENSE) != 0 && + (SRpnt->sr_sense_buffer[2] & 0xf) == UNIT_ATTENTION && + SRpnt->sr_sense_buffer[12] == 0x28 && + SRpnt->sr_sense_buffer[13] == 0) { + /* not-ready to ready transition - good */ + } else { + /* assume no peripheral if any other sort of error */ + scsi_release_request(SRpnt); + return 0; + } + } + + /* + * Check for SPARSELUN before checking the peripheral qualifier, + * so sparse lun devices are completely scanned. + */ + + /* + * Get any flags for this device. + */ + bflags = get_device_flags (scsi_result); + + if (bflags & BLIST_SPARSELUN) { + *sparse_lun = 1; + } + /* + * Check the peripheral qualifier field - this tells us whether LUNS + * are supported here or not. + */ + if ((scsi_result[0] >> 5) == 3) { + scsi_release_request(SRpnt); + return 0; /* assume no peripheral if any sort of error */ + } + /* The Toshiba ROM was "gender-changed" here as an inline hack. + This is now much more generic. + This is a mess: What we really want is to leave the scsi_result + alone, and just change the SDpnt structure. And the SDpnt is what + we want print_inquiry to print. -- REW + */ + if (bflags & BLIST_ISDISK) { + scsi_result[0] = TYPE_DISK; + scsi_result[1] |= 0x80; /* removable */ + } + + if (bflags & BLIST_ISROM) { + scsi_result[0] = TYPE_ROM; + scsi_result[1] |= 0x80; /* removable */ + } + + memcpy(SDpnt->vendor, scsi_result + 8, 8); + memcpy(SDpnt->model, scsi_result + 16, 16); + memcpy(SDpnt->rev, scsi_result + 32, 4); + + SDpnt->removable = (0x80 & scsi_result[1]) >> 7; + /* Use the peripheral qualifier field to determine online/offline */ + if (((scsi_result[0] >> 5) & 7) == 1) SDpnt->online = FALSE; + else SDpnt->online = TRUE; + SDpnt->lockable = SDpnt->removable; + SDpnt->changed = 0; + SDpnt->access_count = 0; + SDpnt->busy = 0; + SDpnt->has_cmdblocks = 0; + /* + * Currently, all sequential devices are assumed to be tapes, all random + * devices disk, with the appropriate read only flags set for ROM / WORM + * treated as RO. + */ + switch (type = (scsi_result[0] & 0x1f)) { + case TYPE_TAPE: + case TYPE_DISK: + case TYPE_PRINTER: + case TYPE_MOD: + case TYPE_PROCESSOR: + case TYPE_SCANNER: + case TYPE_MEDIUM_CHANGER: + case TYPE_ENCLOSURE: + case TYPE_COMM: + SDpnt->writeable = 1; + break; + case TYPE_WORM: + case TYPE_ROM: + SDpnt->writeable = 0; + break; + default: + printk("scsi: unknown type %d\n", type); + } + + SDpnt->device_blocked = FALSE; + SDpnt->device_busy = 0; + SDpnt->single_lun = 0; + SDpnt->soft_reset = + (scsi_result[7] & 1) && ((scsi_result[3] & 7) == 2); + SDpnt->random = (type == TYPE_TAPE) ? 0 : 1; + SDpnt->type = (type & 0x1f); + + print_inquiry(scsi_result); + + sprintf (devname, "host%d/bus%d/target%d/lun%d", + SDpnt->host->host_no, SDpnt->channel, SDpnt->id, SDpnt->lun); + if (SDpnt->de) printk ("DEBUG: dir: \"%s\" already exists\n", devname); + else SDpnt->de = devfs_mk_dir (scsi_devfs_handle, devname, NULL); + + for (sdtpnt = scsi_devicelist; sdtpnt; + sdtpnt = sdtpnt->next) + if (sdtpnt->detect) + SDpnt->attached += + (*sdtpnt->detect) (SDpnt); + + SDpnt->scsi_level = scsi_result[2] & 0x07; + if (SDpnt->scsi_level >= 2 || + (SDpnt->scsi_level == 1 && + (scsi_result[3] & 0x0f) == 1)) + SDpnt->scsi_level++; + scsi_level = SDpnt->scsi_level; + + /* + * Accommodate drivers that want to sleep when they should be in a polling + * loop. + */ + SDpnt->disconnect = 0; + + + /* + * Set the tagged_queue flag for SCSI-II devices that purport to support + * tagged queuing in the INQUIRY data. + */ + SDpnt->tagged_queue = 0; + if ((SDpnt->scsi_level >= SCSI_2) && + (scsi_result[7] & 2) && + !(bflags & BLIST_NOTQ)) { + SDpnt->tagged_supported = 1; + SDpnt->current_tag = 0; + } + /* + * Some revisions of the Texel CD ROM drives have handshaking problems when + * used with the Seagate controllers. Before we know what type of device + * we're talking to, we assume it's borken and then change it here if it + * turns out that it isn't a TEXEL drive. + */ + if ((bflags & BLIST_BORKEN) == 0) + SDpnt->borken = 0; + + /* + * If we want to only allow I/O to one of the luns attached to this device + * at a time, then we set this flag. + */ + if (bflags & BLIST_SINGLELUN) + SDpnt->single_lun = 1; + + /* + * These devices need this "key" to unlock the devices so we can use it + */ + if ((bflags & BLIST_KEY) != 0) { + printk("Unlocked floptical drive.\n"); + SDpnt->lockable = 0; + scsi_cmd[0] = MODE_SENSE; + if (shpnt->max_lun <= 8) + scsi_cmd[1] = (lun << 5) & 0xe0; + else scsi_cmd[1] = 0; /* any other idea? */ + scsi_cmd[2] = 0x2e; + scsi_cmd[3] = 0; + scsi_cmd[4] = 0x2a; + scsi_cmd[5] = 0; + SRpnt->sr_cmd_len = 0; + SRpnt->sr_data_direction = SCSI_DATA_READ; + scsi_wait_req (SRpnt, (void *) scsi_cmd, + (void *) scsi_result, 0x2a, + SCSI_TIMEOUT, 3); + } + + scsi_release_request(SRpnt); + SRpnt = NULL; + + scsi_release_commandblocks(SDpnt); + + /* + * This device was already hooked up to the host in question, + * so at this point we just let go of it and it should be fine. We do need to + * allocate a new one and attach it to the host so that we can further scan the bus. + */ + SDpnt = (Scsi_Device *) kmalloc(sizeof(Scsi_Device), GFP_ATOMIC); + if (!SDpnt) { + printk("scsi: scan_scsis_single: Cannot malloc\n"); + return 0; + } + memset(SDpnt, 0, sizeof(Scsi_Device)); + + *SDpnt2 = SDpnt; + SDpnt->queue_depth = 1; + SDpnt->host = shpnt; + SDpnt->online = TRUE; + SDpnt->scsi_level = scsi_level; + + /* + * Register the queue for the device. All I/O requests will come + * in through here. We also need to register a pointer to + * ourselves, since the queue handler won't know what device + * the queue actually represents. We could look it up, but it + * is pointless work. + */ + scsi_initialize_queue(SDpnt, shpnt); + SDpnt->host = shpnt; + initialize_merge_fn(SDpnt); + + /* + * Mark this device as online, or otherwise we won't be able to do much with it. + */ + SDpnt->online = TRUE; + + /* + * Initialize the object that we will use to wait for command blocks. + */ + init_waitqueue_head(&SDpnt->scpnt_wait); + + /* + * Since we just found one device, there had damn well better be one in the list + * already. + */ + if (shpnt->host_queue == NULL) + panic("scan_scsis_single: Host queue == NULL\n"); + + SDtail = shpnt->host_queue; + while (SDtail->next) { + SDtail = SDtail->next; + } + + /* Add this device to the linked list at the end */ + SDtail->next = SDpnt; + SDpnt->prev = SDtail; + SDpnt->next = NULL; + + /* + * Some scsi devices cannot be polled for lun != 0 due to firmware bugs + */ + if (bflags & BLIST_NOLUN) + return 0; /* break; */ + + /* + * If this device is known to support sparse multiple units, override the + * other settings, and scan all of them. + */ + if (bflags & BLIST_SPARSELUN) { + *max_dev_lun = shpnt->max_lun; + *sparse_lun = 1; + return 1; + } + /* + * If this device is known to support multiple units, override the other + * settings, and scan all of them. + */ + if (bflags & BLIST_FORCELUN) { + /* + * Scanning MAX_SCSI_LUNS units would be a bad idea. + * Any better idea? + * I think we need REPORT LUNS in future to avoid scanning + * of unused LUNs. But, that is another item. + */ + if (*max_dev_lun < shpnt->max_lun) + *max_dev_lun = shpnt->max_lun; + else if ((max_scsi_luns >> 1) >= *max_dev_lun) + *max_dev_lun += shpnt->max_lun; + else *max_dev_lun = max_scsi_luns; + return 1; + } + /* + * REGAL CDC-4X: avoid hang after LUN 4 + */ + if (bflags & BLIST_MAX5LUN) { + *max_dev_lun = 5; + return 1; + } + + /* + * We assume the device can't handle lun!=0 if: - it reports scsi-0 + * (ANSI SCSI Revision 0) (old drives like MAXTOR XT-3280) or - it + * reports scsi-1 (ANSI SCSI Revision 1) and Response Data Format 0 + */ + if (((scsi_result[2] & 0x07) == 0) + || + ((scsi_result[2] & 0x07) == 1 && + (scsi_result[3] & 0x0f) == 0)) + return 0; + return 1; +} + +/* + * The worker for scan_scsis. + * Returns the scsi_level of lun0 on this host, channel and dev (if already + * known), otherwise returns SCSI_2. + */ +static int find_lun0_scsi_level(unsigned int channel, unsigned int dev, + struct Scsi_Host *shpnt) +{ + int res = SCSI_2; + Scsi_Device *SDpnt; + + for (SDpnt = shpnt->host_queue; SDpnt; SDpnt = SDpnt->next) + { + if ((0 == SDpnt->lun) && (dev == SDpnt->id) && + (channel == SDpnt->channel)) + return (int)SDpnt->scsi_level; + } + /* haven't found lun0, should send INQUIRY but take easy route */ + return res; +} diff --git a/xen-2.4.16/drivers/scsi/scsi_syms.c b/xen-2.4.16/drivers/scsi/scsi_syms.c new file mode 100644 index 0000000000..8a25a456ae --- /dev/null +++ b/xen-2.4.16/drivers/scsi/scsi_syms.c @@ -0,0 +1,105 @@ +/* + * We should not even be trying to compile this if we are not doing + * a module. + */ +#define __NO_VERSION__ +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "scsi.h" +#include +#include "hosts.h" +#include "constants.h" + +#include "sd.h" +#include + +/* + * This source file contains the symbol table used by scsi loadable + * modules. + */ +EXPORT_SYMBOL(scsi_register_module); +EXPORT_SYMBOL(scsi_unregister_module); +EXPORT_SYMBOL(scsi_free); +EXPORT_SYMBOL(scsi_malloc); +EXPORT_SYMBOL(scsi_register); +EXPORT_SYMBOL(scsi_unregister); +EXPORT_SYMBOL(scsicam_bios_param); +EXPORT_SYMBOL(scsi_partsize); +EXPORT_SYMBOL(scsi_allocate_device); +EXPORT_SYMBOL(scsi_do_cmd); +EXPORT_SYMBOL(scsi_command_size); +EXPORT_SYMBOL(scsi_ioctl); +EXPORT_SYMBOL(print_command); +EXPORT_SYMBOL(print_sense); +EXPORT_SYMBOL(print_req_sense); +EXPORT_SYMBOL(print_msg); +EXPORT_SYMBOL(print_status); +EXPORT_SYMBOL(scsi_dma_free_sectors); +EXPORT_SYMBOL(kernel_scsi_ioctl); +EXPORT_SYMBOL(scsi_need_isa_buffer); +EXPORT_SYMBOL(scsi_release_command); +EXPORT_SYMBOL(print_Scsi_Cmnd); +EXPORT_SYMBOL(scsi_block_when_processing_errors); +EXPORT_SYMBOL(scsi_mark_host_reset); +EXPORT_SYMBOL(scsi_ioctl_send_command); +#if defined(CONFIG_SCSI_LOGGING) /* { */ +EXPORT_SYMBOL(scsi_logging_level); +#endif + +EXPORT_SYMBOL(scsi_allocate_request); +EXPORT_SYMBOL(scsi_release_request); +EXPORT_SYMBOL(scsi_wait_req); +EXPORT_SYMBOL(scsi_do_req); + +EXPORT_SYMBOL(scsi_report_bus_reset); +EXPORT_SYMBOL(scsi_block_requests); +EXPORT_SYMBOL(scsi_unblock_requests); + +EXPORT_SYMBOL(scsi_get_host_dev); +EXPORT_SYMBOL(scsi_free_host_dev); + +EXPORT_SYMBOL(scsi_sleep); + +EXPORT_SYMBOL(proc_print_scsidevice); +EXPORT_SYMBOL(proc_scsi); + +EXPORT_SYMBOL(scsi_io_completion); +EXPORT_SYMBOL(scsi_end_request); + +EXPORT_SYMBOL(scsi_register_blocked_host); +EXPORT_SYMBOL(scsi_deregister_blocked_host); + +/* + * This symbol is for the highlevel drivers (e.g. sg) only. + */ +EXPORT_SYMBOL(scsi_reset_provider); + +/* + * These are here only while I debug the rest of the scsi stuff. + */ +EXPORT_SYMBOL(scsi_hostlist); +EXPORT_SYMBOL(scsi_hosts); +EXPORT_SYMBOL(scsi_devicelist); +EXPORT_SYMBOL(scsi_device_types); + +/* + * Externalize timers so that HBAs can safely start/restart commands. + */ +extern void scsi_add_timer(Scsi_Cmnd *, int, void ((*) (Scsi_Cmnd *))); +extern int scsi_delete_timer(Scsi_Cmnd *); +EXPORT_SYMBOL(scsi_add_timer); +EXPORT_SYMBOL(scsi_delete_timer); diff --git a/xen-2.4.16/drivers/scsi/scsicam.c b/xen-2.4.16/drivers/scsi/scsicam.c new file mode 100644 index 0000000000..77c7846879 --- /dev/null +++ b/xen-2.4.16/drivers/scsi/scsicam.c @@ -0,0 +1,225 @@ +/* + * scsicam.c - SCSI CAM support functions, use for HDIO_GETGEO, etc. + * + * Copyright 1993, 1994 Drew Eckhardt + * Visionary Computing + * (Unix and Linux consulting and custom programming) + * drew@Colorado.EDU + * +1 (303) 786-7975 + * + * For more information, please consult the SCSI-CAM draft. + */ + +#define __NO_VERSION__ +#include + +#include +#include +#include +#include +#include +#include "scsi.h" +#include "hosts.h" +#include "sd.h" +#include + +static int setsize(unsigned long capacity, unsigned int *cyls, unsigned int *hds, + unsigned int *secs); + + +/* + * Function : int scsicam_bios_param (Disk *disk, int dev, int *ip) + * + * Purpose : to determine the BIOS mapping used for a drive in a + * SCSI-CAM system, storing the results in ip as required + * by the HDIO_GETGEO ioctl(). + * + * Returns : -1 on failure, 0 on success. + * + */ + +int scsicam_bios_param(Disk * disk, /* SCSI disk */ + kdev_t dev, /* Device major, minor */ + int *ip /* Heads, sectors, cylinders in that order */ ) +{ + struct buffer_head *bh; + int ret_code; + int size = disk->capacity; + unsigned long temp_cyl; + + if (!(bh = bread(MKDEV(MAJOR(dev), MINOR(dev)&~0xf), 0, block_size(dev)))) + return -1; + + /* try to infer mapping from partition table */ + ret_code = scsi_partsize(bh, (unsigned long) size, (unsigned int *) ip + 2, + (unsigned int *) ip + 0, (unsigned int *) ip + 1); + brelse(bh); + + if (ret_code == -1) { + /* pick some standard mapping with at most 1024 cylinders, + and at most 62 sectors per track - this works up to + 7905 MB */ + ret_code = setsize((unsigned long) size, (unsigned int *) ip + 2, + (unsigned int *) ip + 0, (unsigned int *) ip + 1); + } + /* if something went wrong, then apparently we have to return + a geometry with more than 1024 cylinders */ + if (ret_code || ip[0] > 255 || ip[1] > 63) { + ip[0] = 64; + ip[1] = 32; + temp_cyl = size / (ip[0] * ip[1]); + if (temp_cyl > 65534) { + ip[0] = 255; + ip[1] = 63; + } + ip[2] = size / (ip[0] * ip[1]); + } + return 0; +} + +/* + * Function : static int scsi_partsize(struct buffer_head *bh, unsigned long + * capacity,unsigned int *cyls, unsigned int *hds, unsigned int *secs); + * + * Purpose : to determine the BIOS mapping used to create the partition + * table, storing the results in *cyls, *hds, and *secs + * + * Returns : -1 on failure, 0 on success. + * + */ + +int scsi_partsize(struct buffer_head *bh, unsigned long capacity, + unsigned int *cyls, unsigned int *hds, unsigned int *secs) +{ + struct partition *p, *largest = NULL; + int i, largest_cyl; + int cyl, ext_cyl, end_head, end_cyl, end_sector; + unsigned int logical_end, physical_end, ext_physical_end; + + + if (*(unsigned short *) (bh->b_data + 510) == 0xAA55) { + for (largest_cyl = -1, p = (struct partition *) + (0x1BE + bh->b_data), i = 0; i < 4; ++i, ++p) { + if (!p->sys_ind) + continue; +#ifdef DEBUG + printk("scsicam_bios_param : partition %d has system \n", + i); +#endif + cyl = p->cyl + ((p->sector & 0xc0) << 2); + if (cyl > largest_cyl) { + largest_cyl = cyl; + largest = p; + } + } + } + if (largest) { + end_cyl = largest->end_cyl + ((largest->end_sector & 0xc0) << 2); + end_head = largest->end_head; + end_sector = largest->end_sector & 0x3f; + + if (end_head + 1 == 0 || end_sector == 0) + return -1; + +#ifdef DEBUG + printk("scsicam_bios_param : end at h = %d, c = %d, s = %d\n", + end_head, end_cyl, end_sector); +#endif + + physical_end = end_cyl * (end_head + 1) * end_sector + + end_head * end_sector + end_sector; + + /* This is the actual _sector_ number at the end */ + logical_end = get_unaligned(&largest->start_sect) + + get_unaligned(&largest->nr_sects); + + /* This is for >1023 cylinders */ + ext_cyl = (logical_end - (end_head * end_sector + end_sector)) + / (end_head + 1) / end_sector; + ext_physical_end = ext_cyl * (end_head + 1) * end_sector + + end_head * end_sector + end_sector; + +#ifdef DEBUG + printk("scsicam_bios_param : logical_end=%d physical_end=%d ext_physical_end=%d ext_cyl=%d\n" + ,logical_end, physical_end, ext_physical_end, ext_cyl); +#endif + + if ((logical_end == physical_end) || + (end_cyl == 1023 && ext_physical_end == logical_end)) { + *secs = end_sector; + *hds = end_head + 1; + *cyls = capacity / ((end_head + 1) * end_sector); + return 0; + } +#ifdef DEBUG + printk("scsicam_bios_param : logical (%u) != physical (%u)\n", + logical_end, physical_end); +#endif + } + return -1; +} + +/* + * Function : static int setsize(unsigned long capacity,unsigned int *cyls, + * unsigned int *hds, unsigned int *secs); + * + * Purpose : to determine a near-optimal int 0x13 mapping for a + * SCSI disk in terms of lost space of size capacity, storing + * the results in *cyls, *hds, and *secs. + * + * Returns : -1 on failure, 0 on success. + * + * Extracted from + * + * WORKING X3T9.2 + * DRAFT 792D + * + * + * Revision 6 + * 10-MAR-94 + * Information technology - + * SCSI-2 Common access method + * transport and SCSI interface module + * + * ANNEX A : + * + * setsize() converts a read capacity value to int 13h + * head-cylinder-sector requirements. It minimizes the value for + * number of heads and maximizes the number of cylinders. This + * will support rather large disks before the number of heads + * will not fit in 4 bits (or 6 bits). This algorithm also + * minimizes the number of sectors that will be unused at the end + * of the disk while allowing for very large disks to be + * accommodated. This algorithm does not use physical geometry. + */ + +static int setsize(unsigned long capacity, unsigned int *cyls, unsigned int *hds, + unsigned int *secs) +{ + unsigned int rv = 0; + unsigned long heads, sectors, cylinders, temp; + + cylinders = 1024L; /* Set number of cylinders to max */ + sectors = 62L; /* Maximize sectors per track */ + + temp = cylinders * sectors; /* Compute divisor for heads */ + heads = capacity / temp; /* Compute value for number of heads */ + if (capacity % temp) { /* If no remainder, done! */ + heads++; /* Else, increment number of heads */ + temp = cylinders * heads; /* Compute divisor for sectors */ + sectors = capacity / temp; /* Compute value for sectors per + track */ + if (capacity % temp) { /* If no remainder, done! */ + sectors++; /* Else, increment number of sectors */ + temp = heads * sectors; /* Compute divisor for cylinders */ + cylinders = capacity / temp; /* Compute number of cylinders */ + } + } + if (cylinders == 0) + rv = (unsigned) -1; /* Give error if 0 cylinders */ + + *cyls = (unsigned int) cylinders; /* Stuff return values */ + *secs = (unsigned int) sectors; + *hds = (unsigned int) heads; + return (rv); +} diff --git a/xen-2.4.16/drivers/scsi/sd.c b/xen-2.4.16/drivers/scsi/sd.c new file mode 100644 index 0000000000..1e0749b9f0 --- /dev/null +++ b/xen-2.4.16/drivers/scsi/sd.c @@ -0,0 +1,1462 @@ +/* + * sd.c Copyright (C) 1992 Drew Eckhardt + * Copyright (C) 1993, 1994, 1995, 1999 Eric Youngdale + * + * Linux scsi disk driver + * Initial versions: Drew Eckhardt + * Subsequent revisions: Eric Youngdale + * + * + * + * Modified by Eric Youngdale ericy@andante.org to + * add scatter-gather, multiple outstanding request, and other + * enhancements. + * + * Modified by Eric Youngdale eric@andante.org to support loadable + * low-level scsi drivers. + * + * Modified by Jirka Hanika geo@ff.cuni.cz to support more + * scsi disks using eight major numbers. + * + * Modified by Richard Gooch rgooch@atnf.csiro.au to support devfs. + * + * Modified by Torben Mathiasen tmm@image.dk + * Resource allocation fixes in sd_init and cleanups. + * + * Modified by Alex Davis + * Fix problem where partition info not being read in sd_open. + * + * Modified by Alex Davis + * Fix problem where removable media could be ejected after sd_open. + */ + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include + +#define MAJOR_NR SCSI_DISK0_MAJOR +#include +#include +#include "scsi.h" +#include "hosts.h" +#include "sd.h" +#include +#include "constants.h" +#include /* must follow "hosts.h" */ + +#include + +/* + * static const char RCSid[] = "$Header:"; + */ + +/* system major --> sd_gendisks index */ +#define SD_MAJOR_IDX(i) (MAJOR(i) & SD_MAJOR_MASK) +/* sd_gendisks index --> system major */ +#define SD_MAJOR(i) (!(i) ? SCSI_DISK0_MAJOR : SCSI_DISK1_MAJOR-1+(i)) + +#define SD_PARTITION(dev) ((SD_MAJOR_IDX(dev) << 8) | (MINOR(dev) & 255)) + +#define SCSI_DISKS_PER_MAJOR 16 +#define SD_MAJOR_NUMBER(i) SD_MAJOR((i) >> 8) +#define SD_MINOR_NUMBER(i) ((i) & 255) +#define MKDEV_SD_PARTITION(i) MKDEV(SD_MAJOR_NUMBER(i), (i) & 255) +#define MKDEV_SD(index) MKDEV_SD_PARTITION((index) << 4) +#define N_USED_SCSI_DISKS (sd_template.dev_max + SCSI_DISKS_PER_MAJOR - 1) +#define N_USED_SD_MAJORS (N_USED_SCSI_DISKS / SCSI_DISKS_PER_MAJOR) + +#define MAX_RETRIES 5 + +/* + * Time out in seconds for disks and Magneto-opticals (which are slower). + */ + +#define SD_TIMEOUT (30 * HZ) +#define SD_MOD_TIMEOUT (75 * HZ) + +static Scsi_Disk *rscsi_disks; +static struct gendisk *sd_gendisks; +static int *sd_sizes; +static int *sd_blocksizes; +static int *sd_hardsizes; /* Hardware sector size */ +static int *sd_max_sectors; + +static int check_scsidisk_media_change(kdev_t); +static int fop_revalidate_scsidisk(kdev_t); + +static int sd_init_onedisk(int); + + +static int sd_init(void); +static void sd_finish(void); +static int sd_attach(Scsi_Device *); +static int sd_detect(Scsi_Device *); +static void sd_detach(Scsi_Device *); +static int sd_init_command(Scsi_Cmnd *); + +static struct Scsi_Device_Template sd_template = { + name:"disk", + tag:"sd", + scsi_type:TYPE_DISK, + major:SCSI_DISK0_MAJOR, + /* + * Secondary range of majors that this driver handles. + */ + min_major:SCSI_DISK1_MAJOR, + max_major:SCSI_DISK7_MAJOR, + blk:1, + detect:sd_detect, + init:sd_init, + finish:sd_finish, + attach:sd_attach, + detach:sd_detach, + init_command:sd_init_command, +}; + + +static void rw_intr(Scsi_Cmnd * SCpnt); + +#if defined(CONFIG_PPC) +/* + * Moved from arch/ppc/pmac_setup.c. This is where it really belongs. + */ +kdev_t __init +sd_find_target(void *host, int tgt) +{ + Scsi_Disk *dp; + int i; + for (dp = rscsi_disks, i = 0; i < sd_template.dev_max; ++i, ++dp) + if (dp->device != NULL && dp->device->host == host + && dp->device->id == tgt) + return MKDEV_SD(i); + return 0; +} +#endif + +static int sd_ioctl(struct inode * inode, struct file * file, unsigned int cmd, unsigned long arg) +{ + kdev_t dev = inode->i_rdev; + struct Scsi_Host * host; + Scsi_Device * SDev; + int diskinfo[4]; + + SDev = rscsi_disks[DEVICE_NR(dev)].device; + if (!SDev) + return -ENODEV; + + /* + * If we are in the middle of error recovery, don't let anyone + * else try and use this device. Also, if error recovery fails, it + * may try and take the device offline, in which case all further + * access to the device is prohibited. + */ + + if( !scsi_block_when_processing_errors(SDev) ) + { + return -ENODEV; + } + + switch (cmd) + { + case HDIO_GETGEO: /* Return BIOS disk parameters */ + { + struct hd_geometry *loc = (struct hd_geometry *) arg; + if(!loc) + return -EINVAL; + + host = rscsi_disks[DEVICE_NR(dev)].device->host; + + /* default to most commonly used values */ + + diskinfo[0] = 0x40; + diskinfo[1] = 0x20; + diskinfo[2] = rscsi_disks[DEVICE_NR(dev)].capacity >> 11; + + /* override with calculated, extended default, or driver values */ + + if(host->hostt->bios_param != NULL) + host->hostt->bios_param(&rscsi_disks[DEVICE_NR(dev)], + dev, + &diskinfo[0]); + else scsicam_bios_param(&rscsi_disks[DEVICE_NR(dev)], + dev, &diskinfo[0]); + + if (put_user(diskinfo[0], &loc->heads) || + put_user(diskinfo[1], &loc->sectors) || + put_user(diskinfo[2], &loc->cylinders) || + put_user(sd_gendisks[SD_MAJOR_IDX( + inode->i_rdev)].part[MINOR( + inode->i_rdev)].start_sect, &loc->start)) + return -EFAULT; + return 0; + } + case HDIO_GETGEO_BIG: + { + struct hd_big_geometry *loc = (struct hd_big_geometry *) arg; + + if(!loc) + return -EINVAL; + + host = rscsi_disks[DEVICE_NR(dev)].device->host; + + /* default to most commonly used values */ + + diskinfo[0] = 0x40; + diskinfo[1] = 0x20; + diskinfo[2] = rscsi_disks[DEVICE_NR(dev)].capacity >> 11; + + /* override with calculated, extended default, or driver values */ + + if(host->hostt->bios_param != NULL) + host->hostt->bios_param(&rscsi_disks[DEVICE_NR(dev)], + dev, + &diskinfo[0]); + else scsicam_bios_param(&rscsi_disks[DEVICE_NR(dev)], + dev, &diskinfo[0]); + + if (put_user(diskinfo[0], &loc->heads) || + put_user(diskinfo[1], &loc->sectors) || + put_user(diskinfo[2], (unsigned int *) &loc->cylinders) || + put_user(sd_gendisks[SD_MAJOR_IDX( + inode->i_rdev)].part[MINOR( + inode->i_rdev)].start_sect, &loc->start)) + return -EFAULT; + return 0; + } + case BLKGETSIZE: + case BLKGETSIZE64: + case BLKROSET: + case BLKROGET: + case BLKRASET: + case BLKRAGET: + case BLKFLSBUF: + case BLKSSZGET: + case BLKPG: + case BLKELVGET: + case BLKELVSET: + case BLKBSZGET: + case BLKBSZSET: + return blk_ioctl(inode->i_rdev, cmd, arg); + + case BLKRRPART: /* Re-read partition tables */ + if (!capable(CAP_SYS_ADMIN)) + return -EACCES; + return revalidate_scsidisk(dev, 1); + + default: + return scsi_ioctl(rscsi_disks[DEVICE_NR(dev)].device , cmd, (void *) arg); + } +} + +static void sd_devname(unsigned int disknum, char *buffer) +{ + if (disknum < 26) + sprintf(buffer, "sd%c", 'a' + disknum); + else { + unsigned int min1; + unsigned int min2; + /* + * For larger numbers of disks, we need to go to a new + * naming scheme. + */ + min1 = disknum / 26; + min2 = disknum % 26; + sprintf(buffer, "sd%c%c", 'a' + min1 - 1, 'a' + min2); + } +} + +static request_queue_t *sd_find_queue(kdev_t dev) +{ + Scsi_Disk *dpnt; + int target; + target = DEVICE_NR(dev); + + dpnt = &rscsi_disks[target]; + if (!dpnt->device) + return NULL; /* No such device */ + return &dpnt->device->request_queue; +} + +static int sd_init_command(Scsi_Cmnd * SCpnt) +{ + int dev, block, this_count; + struct hd_struct *ppnt; + Scsi_Disk *dpnt; +#if CONFIG_SCSI_LOGGING + char nbuff[6]; +#endif + + ppnt = &sd_gendisks[SD_MAJOR_IDX(SCpnt->request.rq_dev)].part[MINOR(SCpnt->request.rq_dev)]; + dev = DEVICE_NR(SCpnt->request.rq_dev); + + block = SCpnt->request.sector; + this_count = SCpnt->request_bufflen >> 9; + + SCSI_LOG_HLQUEUE(1, printk("Doing sd request, dev = 0x%x, block = %d\n", + SCpnt->request.rq_dev, block)); + + dpnt = &rscsi_disks[dev]; + if (dev >= sd_template.dev_max || + !dpnt->device || + !dpnt->device->online || + block + SCpnt->request.nr_sectors > ppnt->nr_sects) { + SCSI_LOG_HLQUEUE(2, printk("Finishing %ld sectors\n", SCpnt->request.nr_sectors)); + SCSI_LOG_HLQUEUE(2, printk("Retry with 0x%p\n", SCpnt)); + return 0; + } + block += ppnt->start_sect; + if (dpnt->device->changed) { + /* + * quietly refuse to do anything to a changed disc until the changed + * bit has been reset + */ + /* printk("SCSI disk has been changed. Prohibiting further I/O.\n"); */ + return 0; + } + SCSI_LOG_HLQUEUE(2, sd_devname(dev, nbuff)); + SCSI_LOG_HLQUEUE(2, printk("%s : real dev = /dev/%d, block = %d\n", + nbuff, dev, block)); + + /* + * If we have a 1K hardware sectorsize, prevent access to single + * 512 byte sectors. In theory we could handle this - in fact + * the scsi cdrom driver must be able to handle this because + * we typically use 1K blocksizes, and cdroms typically have + * 2K hardware sectorsizes. Of course, things are simpler + * with the cdrom, since it is read-only. For performance + * reasons, the filesystems should be able to handle this + * and not force the scsi disk driver to use bounce buffers + * for this. + */ + if (dpnt->device->sector_size == 1024) { + if ((block & 1) || (SCpnt->request.nr_sectors & 1)) { + printk("sd.c:Bad block number requested"); + return 0; + } else { + block = block >> 1; + this_count = this_count >> 1; + } + } + if (dpnt->device->sector_size == 2048) { + if ((block & 3) || (SCpnt->request.nr_sectors & 3)) { + printk("sd.c:Bad block number requested"); + return 0; + } else { + block = block >> 2; + this_count = this_count >> 2; + } + } + if (dpnt->device->sector_size == 4096) { + if ((block & 7) || (SCpnt->request.nr_sectors & 7)) { + printk("sd.c:Bad block number requested"); + return 0; + } else { + block = block >> 3; + this_count = this_count >> 3; + } + } + switch (SCpnt->request.cmd) { + case WRITE: + if (!dpnt->device->writeable) { + return 0; + } + SCpnt->cmnd[0] = WRITE_6; + SCpnt->sc_data_direction = SCSI_DATA_WRITE; + break; + case READ: + SCpnt->cmnd[0] = READ_6; + SCpnt->sc_data_direction = SCSI_DATA_READ; + break; + default: + panic("Unknown sd command %d\n", SCpnt->request.cmd); + } + + SCSI_LOG_HLQUEUE(2, printk("%s : %s %d/%ld 512 byte blocks.\n", + nbuff, + (SCpnt->request.cmd == WRITE) ? "writing" : "reading", + this_count, SCpnt->request.nr_sectors)); + + SCpnt->cmnd[1] = (SCpnt->device->scsi_level <= SCSI_2) ? + ((SCpnt->lun << 5) & 0xe0) : 0; + + if (((this_count > 0xff) || (block > 0x1fffff)) || SCpnt->device->ten) { + if (this_count > 0xffff) + this_count = 0xffff; + + SCpnt->cmnd[0] += READ_10 - READ_6; + SCpnt->cmnd[2] = (unsigned char) (block >> 24) & 0xff; + SCpnt->cmnd[3] = (unsigned char) (block >> 16) & 0xff; + SCpnt->cmnd[4] = (unsigned char) (block >> 8) & 0xff; + SCpnt->cmnd[5] = (unsigned char) block & 0xff; + SCpnt->cmnd[6] = SCpnt->cmnd[9] = 0; + SCpnt->cmnd[7] = (unsigned char) (this_count >> 8) & 0xff; + SCpnt->cmnd[8] = (unsigned char) this_count & 0xff; + } else { + if (this_count > 0xff) + this_count = 0xff; + + SCpnt->cmnd[1] |= (unsigned char) ((block >> 16) & 0x1f); + SCpnt->cmnd[2] = (unsigned char) ((block >> 8) & 0xff); + SCpnt->cmnd[3] = (unsigned char) block & 0xff; + SCpnt->cmnd[4] = (unsigned char) this_count; + SCpnt->cmnd[5] = 0; + } + + /* + * We shouldn't disconnect in the middle of a sector, so with a dumb + * host adapter, it's safe to assume that we can at least transfer + * this many bytes between each connect / disconnect. + */ + SCpnt->transfersize = dpnt->device->sector_size; + SCpnt->underflow = this_count << 9; + + SCpnt->allowed = MAX_RETRIES; + SCpnt->timeout_per_command = (SCpnt->device->type == TYPE_DISK ? + SD_TIMEOUT : SD_MOD_TIMEOUT); + + /* + * This is the completion routine we use. This is matched in terms + * of capability to this function. + */ + SCpnt->done = rw_intr; + + /* + * This indicates that the command is ready from our end to be + * queued. + */ + return 1; +} + +static int sd_open(struct inode *inode, struct file *filp) +{ + int target, retval = -ENXIO; + Scsi_Device * SDev; + target = DEVICE_NR(inode->i_rdev); + + SCSI_LOG_HLQUEUE(1, printk("target=%d, max=%d\n", target, sd_template.dev_max)); + + if (target >= sd_template.dev_max || !rscsi_disks[target].device) + return -ENXIO; /* No such device */ + + /* + * If the device is in error recovery, wait until it is done. + * If the device is offline, then disallow any access to it. + */ + if (!scsi_block_when_processing_errors(rscsi_disks[target].device)) { + return -ENXIO; + } + /* + * Make sure that only one process can do a check_change_disk at one time. + * This is also used to lock out further access when the partition table + * is being re-read. + */ + + while (rscsi_disks[target].device->busy) { + barrier(); + cpu_relax(); + } + /* + * The following code can sleep. + * Module unloading must be prevented + */ + SDev = rscsi_disks[target].device; + if (SDev->host->hostt->module) + __MOD_INC_USE_COUNT(SDev->host->hostt->module); + if (sd_template.module) + __MOD_INC_USE_COUNT(sd_template.module); + SDev->access_count++; + + if (rscsi_disks[target].device->removable) { + SDev->allow_revalidate = 1; + check_disk_change(inode->i_rdev); + SDev->allow_revalidate = 0; + + /* + * If the drive is empty, just let the open fail. + */ + if ((!rscsi_disks[target].ready) && !(filp->f_flags & O_NDELAY)) { + retval = -ENOMEDIUM; + goto error_out; + } + + /* + * Similarly, if the device has the write protect tab set, + * have the open fail if the user expects to be able to write + * to the thing. + */ + if ((rscsi_disks[target].write_prot) && (filp->f_mode & 2)) { + retval = -EROFS; + goto error_out; + } + } + /* + * It is possible that the disk changing stuff resulted in the device + * being taken offline. If this is the case, report this to the user, + * and don't pretend that + * the open actually succeeded. + */ + if (!SDev->online) { + goto error_out; + } + /* + * See if we are requesting a non-existent partition. Do this + * after checking for disk change. + */ + if (sd_sizes[SD_PARTITION(inode->i_rdev)] == 0) { + goto error_out; + } + + if (SDev->removable) + if (SDev->access_count==1) + if (scsi_block_when_processing_errors(SDev)) + scsi_ioctl(SDev, SCSI_IOCTL_DOORLOCK, NULL); + + + return 0; + +error_out: + SDev->access_count--; + if (SDev->host->hostt->module) + __MOD_DEC_USE_COUNT(SDev->host->hostt->module); + if (sd_template.module) + __MOD_DEC_USE_COUNT(sd_template.module); + return retval; +} + +static int sd_release(struct inode *inode, struct file *file) +{ + int target; + Scsi_Device * SDev; + + target = DEVICE_NR(inode->i_rdev); + SDev = rscsi_disks[target].device; + if (!SDev) + return -ENODEV; + + SDev->access_count--; + + if (SDev->removable) { + if (!SDev->access_count) + if (scsi_block_when_processing_errors(SDev)) + scsi_ioctl(SDev, SCSI_IOCTL_DOORUNLOCK, NULL); + } + if (SDev->host->hostt->module) + __MOD_DEC_USE_COUNT(SDev->host->hostt->module); + if (sd_template.module) + __MOD_DEC_USE_COUNT(sd_template.module); + return 0; +} + +static struct block_device_operations sd_fops = +{ + owner: THIS_MODULE, + open: sd_open, + release: sd_release, + ioctl: sd_ioctl, + check_media_change: check_scsidisk_media_change, + revalidate: fop_revalidate_scsidisk +}; + +/* + * If we need more than one SCSI disk major (i.e. more than + * 16 SCSI disks), we'll have to kmalloc() more gendisks later. + */ + +static struct gendisk sd_gendisk = +{ + major: SCSI_DISK0_MAJOR, + major_name: "sd", + minor_shift: 4, + max_p: 1 << 4, + fops: &sd_fops, +}; + +#define SD_GENDISK(i) sd_gendisks[(i) / SCSI_DISKS_PER_MAJOR] + +/* + * rw_intr is the interrupt routine for the device driver. + * It will be notified on the end of a SCSI read / write, and + * will take one of several actions based on success or failure. + */ + +static void rw_intr(Scsi_Cmnd * SCpnt) +{ + int result = SCpnt->result; +#if CONFIG_SCSI_LOGGING + char nbuff[6]; +#endif + int this_count = SCpnt->bufflen >> 9; + int good_sectors = (result == 0 ? this_count : 0); + int block_sectors = 1; + long error_sector; + + SCSI_LOG_HLCOMPLETE(1, sd_devname(DEVICE_NR(SCpnt->request.rq_dev), nbuff)); + + SCSI_LOG_HLCOMPLETE(1, printk("%s : rw_intr(%d, %x [%x %x])\n", nbuff, + SCpnt->host->host_no, + result, + SCpnt->sense_buffer[0], + SCpnt->sense_buffer[2])); + + /* + Handle MEDIUM ERRORs that indicate partial success. Since this is a + relatively rare error condition, no care is taken to avoid + unnecessary additional work such as memcpy's that could be avoided. + */ + + /* An error occurred */ + if (driver_byte(result) != 0 && /* An error occured */ + SCpnt->sense_buffer[0] == 0xF0) { /* Sense data is valid */ + switch (SCpnt->sense_buffer[2]) { + case MEDIUM_ERROR: + error_sector = (SCpnt->sense_buffer[3] << 24) | + (SCpnt->sense_buffer[4] << 16) | + (SCpnt->sense_buffer[5] << 8) | + SCpnt->sense_buffer[6]; + if (SCpnt->request.bh != NULL) + block_sectors = SCpnt->request.bh->b_size >> 9; + switch (SCpnt->device->sector_size) { + case 1024: + error_sector <<= 1; + if (block_sectors < 2) + block_sectors = 2; + break; + case 2048: + error_sector <<= 2; + if (block_sectors < 4) + block_sectors = 4; + break; + case 4096: + error_sector <<=3; + if (block_sectors < 8) + block_sectors = 8; + break; + case 256: + error_sector >>= 1; + break; + default: + break; + } + error_sector -= sd_gendisks[SD_MAJOR_IDX( + SCpnt->request.rq_dev)].part[MINOR( + SCpnt->request.rq_dev)].start_sect; + error_sector &= ~(block_sectors - 1); + good_sectors = error_sector - SCpnt->request.sector; + if (good_sectors < 0 || good_sectors >= this_count) + good_sectors = 0; + break; + + case RECOVERED_ERROR: + /* + * An error occured, but it recovered. Inform the + * user, but make sure that it's not treated as a + * hard error. + */ + print_sense("sd", SCpnt); + result = 0; + SCpnt->sense_buffer[0] = 0x0; + good_sectors = this_count; + break; + + case ILLEGAL_REQUEST: + if (SCpnt->device->ten == 1) { + if (SCpnt->cmnd[0] == READ_10 || + SCpnt->cmnd[0] == WRITE_10) + SCpnt->device->ten = 0; + } + break; + + default: + break; + } + } + /* + * This calls the generic completion function, now that we know + * how many actual sectors finished, and how many sectors we need + * to say have failed. + */ + scsi_io_completion(SCpnt, good_sectors, block_sectors); +} +/* + * requeue_sd_request() is the request handler function for the sd driver. + * Its function in life is to take block device requests, and translate + * them to SCSI commands. + */ + + +static int check_scsidisk_media_change(kdev_t full_dev) +{ + int retval; + int target; + int flag = 0; + Scsi_Device * SDev; + + target = DEVICE_NR(full_dev); + SDev = rscsi_disks[target].device; + + if (target >= sd_template.dev_max || !SDev) { + printk("SCSI disk request error: invalid device.\n"); + return 0; + } + if (!SDev->removable) + return 0; + + /* + * If the device is offline, don't send any commands - just pretend as + * if the command failed. If the device ever comes back online, we + * can deal with it then. It is only because of unrecoverable errors + * that we would ever take a device offline in the first place. + */ + if (SDev->online == FALSE) { + rscsi_disks[target].ready = 0; + SDev->changed = 1; + return 1; /* This will force a flush, if called from + * check_disk_change */ + } + + /* Using Start/Stop enables differentiation between drive with + * no cartridge loaded - NOT READY, drive with changed cartridge - + * UNIT ATTENTION, or with same cartridge - GOOD STATUS. + * This also handles drives that auto spin down. eg iomega jaz 1GB + * as this will spin up the drive. + */ + retval = -ENODEV; + if (scsi_block_when_processing_errors(SDev)) + retval = scsi_ioctl(SDev, SCSI_IOCTL_START_UNIT, NULL); + + if (retval) { /* Unable to test, unit probably not ready. + * This usually means there is no disc in the + * drive. Mark as changed, and we will figure + * it out later once the drive is available + * again. */ + + rscsi_disks[target].ready = 0; + SDev->changed = 1; + return 1; /* This will force a flush, if called from + * check_disk_change */ + } + /* + * for removable scsi disk ( FLOPTICAL ) we have to recognise the + * presence of disk in the drive. This is kept in the Scsi_Disk + * struct and tested at open ! Daniel Roche ( dan@lectra.fr ) + */ + + rscsi_disks[target].ready = 1; /* FLOPTICAL */ + + retval = SDev->changed; + if (!flag) + SDev->changed = 0; + return retval; +} + +static int sd_init_onedisk(int i) +{ + unsigned char cmd[10]; + char nbuff[6]; + unsigned char *buffer; + unsigned long spintime_value = 0; + int the_result, retries, spintime; + int sector_size; + Scsi_Request *SRpnt; + + /* + * Get the name of the disk, in case we need to log it somewhere. + */ + sd_devname(i, nbuff); + + /* + * If the device is offline, don't try and read capacity or any + * of the other niceties. + */ + if (rscsi_disks[i].device->online == FALSE) + return i; + + /* + * We need to retry the READ_CAPACITY because a UNIT_ATTENTION is + * considered a fatal error, and many devices report such an error + * just after a scsi bus reset. + */ + + SRpnt = scsi_allocate_request(rscsi_disks[i].device); + if (!SRpnt) { + printk(KERN_WARNING "(sd_init_onedisk:) Request allocation failure.\n"); + return i; + } + + buffer = (unsigned char *) scsi_malloc(512); + if (!buffer) { + printk(KERN_WARNING "(sd_init_onedisk:) Memory allocation failure.\n"); + scsi_release_request(SRpnt); + return i; + } + + spintime = 0; + + /* Spin up drives, as required. Only do this at boot time */ + /* Spinup needs to be done for module loads too. */ + do { + retries = 0; + + while (retries < 3) { + cmd[0] = TEST_UNIT_READY; + cmd[1] = (rscsi_disks[i].device->scsi_level <= SCSI_2) ? + ((rscsi_disks[i].device->lun << 5) & 0xe0) : 0; + memset((void *) &cmd[2], 0, 8); + SRpnt->sr_cmd_len = 0; + SRpnt->sr_sense_buffer[0] = 0; + SRpnt->sr_sense_buffer[2] = 0; + SRpnt->sr_data_direction = SCSI_DATA_NONE; + + scsi_wait_req (SRpnt, (void *) cmd, (void *) buffer, + 0/*512*/, SD_TIMEOUT, MAX_RETRIES); + + the_result = SRpnt->sr_result; + retries++; + if (the_result == 0 + || SRpnt->sr_sense_buffer[2] != UNIT_ATTENTION) + break; + } + + /* + * If the drive has indicated to us that it doesn't have + * any media in it, don't bother with any of the rest of + * this crap. + */ + if( the_result != 0 + && ((driver_byte(the_result) & DRIVER_SENSE) != 0) + && SRpnt->sr_sense_buffer[2] == UNIT_ATTENTION + && SRpnt->sr_sense_buffer[12] == 0x3A ) { + rscsi_disks[i].capacity = 0x1fffff; + sector_size = 512; + rscsi_disks[i].device->changed = 1; + rscsi_disks[i].ready = 0; + break; + } + + /* Look for non-removable devices that return NOT_READY. + * Issue command to spin up drive for these cases. */ + if (the_result && !rscsi_disks[i].device->removable && + SRpnt->sr_sense_buffer[2] == NOT_READY) { + unsigned long time1; + if (!spintime) { + printk("%s: Spinning up disk...", nbuff); + cmd[0] = START_STOP; + cmd[1] = (rscsi_disks[i].device->scsi_level <= SCSI_2) ? + ((rscsi_disks[i].device->lun << 5) & 0xe0) : 0; + cmd[1] |= 1; /* Return immediately */ + memset((void *) &cmd[2], 0, 8); + cmd[4] = 1; /* Start spin cycle */ + SRpnt->sr_cmd_len = 0; + SRpnt->sr_sense_buffer[0] = 0; + SRpnt->sr_sense_buffer[2] = 0; + + SRpnt->sr_data_direction = SCSI_DATA_READ; + scsi_wait_req(SRpnt, (void *) cmd, (void *) buffer, + 0/*512*/, SD_TIMEOUT, MAX_RETRIES); + spintime_value = jiffies; + } + spintime = 1; + time1 = HZ; + /* Wait 1 second for next try */ + do { + current->state = TASK_UNINTERRUPTIBLE; + time1 = schedule_timeout(time1); + } while(time1); + printk("."); + } + } while (the_result && spintime && + time_after(spintime_value + 100 * HZ, jiffies)); + if (spintime) { + if (the_result) + printk("not responding...\n"); + else + printk("ready\n"); + } + retries = 3; + do { + cmd[0] = READ_CAPACITY; + cmd[1] = (rscsi_disks[i].device->scsi_level <= SCSI_2) ? + ((rscsi_disks[i].device->lun << 5) & 0xe0) : 0; + memset((void *) &cmd[2], 0, 8); + memset((void *) buffer, 0, 8); + SRpnt->sr_cmd_len = 0; + SRpnt->sr_sense_buffer[0] = 0; + SRpnt->sr_sense_buffer[2] = 0; + + SRpnt->sr_data_direction = SCSI_DATA_READ; + scsi_wait_req(SRpnt, (void *) cmd, (void *) buffer, + 8, SD_TIMEOUT, MAX_RETRIES); + + the_result = SRpnt->sr_result; + retries--; + + } while (the_result && retries); + + /* + * The SCSI standard says: + * "READ CAPACITY is necessary for self configuring software" + * While not mandatory, support of READ CAPACITY is strongly + * encouraged. + * We used to die if we couldn't successfully do a READ CAPACITY. + * But, now we go on about our way. The side effects of this are + * + * 1. We can't know block size with certainty. I have said + * "512 bytes is it" as this is most common. + * + * 2. Recovery from when someone attempts to read past the + * end of the raw device will be slower. + */ + + if (the_result) { + printk("%s : READ CAPACITY failed.\n" + "%s : status = %x, message = %02x, host = %d, driver = %02x \n", + nbuff, nbuff, + status_byte(the_result), + msg_byte(the_result), + host_byte(the_result), + driver_byte(the_result) + ); + if (driver_byte(the_result) & DRIVER_SENSE) + print_req_sense("sd", SRpnt); + else + printk("%s : sense not available. \n", nbuff); + + printk("%s : block size assumed to be 512 bytes, disk size 1GB. \n", + nbuff); + rscsi_disks[i].capacity = 0x1fffff; + sector_size = 512; + + /* Set dirty bit for removable devices if not ready - + * sometimes drives will not report this properly. */ + if (rscsi_disks[i].device->removable && + SRpnt->sr_sense_buffer[2] == NOT_READY) + rscsi_disks[i].device->changed = 1; + + } else { + /* + * FLOPTICAL, if read_capa is ok, drive is assumed to be ready + */ + rscsi_disks[i].ready = 1; + + rscsi_disks[i].capacity = 1 + ((buffer[0] << 24) | + (buffer[1] << 16) | + (buffer[2] << 8) | + buffer[3]); + + sector_size = (buffer[4] << 24) | + (buffer[5] << 16) | (buffer[6] << 8) | buffer[7]; + + if (sector_size == 0) { + sector_size = 512; + printk("%s : sector size 0 reported, assuming 512.\n", + nbuff); + } + if (sector_size != 512 && + sector_size != 1024 && + sector_size != 2048 && + sector_size != 4096 && + sector_size != 256) { + printk("%s : unsupported sector size %d.\n", + nbuff, sector_size); + /* + * The user might want to re-format the drive with + * a supported sectorsize. Once this happens, it + * would be relatively trivial to set the thing up. + * For this reason, we leave the thing in the table. + */ + rscsi_disks[i].capacity = 0; + } + if (sector_size > 1024) { + int m; + + /* + * We must fix the sd_blocksizes and sd_hardsizes + * to allow us to read the partition tables. + * The disk reading code does not allow for reading + * of partial sectors. + */ + for (m = i << 4; m < ((i + 1) << 4); m++) { + sd_blocksizes[m] = sector_size; + } + } { + /* + * The msdos fs needs to know the hardware sector size + * So I have created this table. See ll_rw_blk.c + * Jacques Gelinas (Jacques@solucorp.qc.ca) + */ + int m; + int hard_sector = sector_size; + int sz = rscsi_disks[i].capacity * (hard_sector/256); + + /* There are 16 minors allocated for each major device */ + for (m = i << 4; m < ((i + 1) << 4); m++) { + sd_hardsizes[m] = hard_sector; + } + + printk("SCSI device %s: " + "%d %d-byte hdwr sectors (%d MB)\n", + nbuff, rscsi_disks[i].capacity, + hard_sector, (sz/2 - sz/1250 + 974)/1950); + } + + /* Rescale capacity to 512-byte units */ + if (sector_size == 4096) + rscsi_disks[i].capacity <<= 3; + if (sector_size == 2048) + rscsi_disks[i].capacity <<= 2; + if (sector_size == 1024) + rscsi_disks[i].capacity <<= 1; + if (sector_size == 256) + rscsi_disks[i].capacity >>= 1; + } + + + /* + * Unless otherwise specified, this is not write protected. + */ + rscsi_disks[i].write_prot = 0; + if (rscsi_disks[i].device->removable && rscsi_disks[i].ready) { + /* FLOPTICAL */ + + /* + * For removable scsi disk ( FLOPTICAL ) we have to recognise + * the Write Protect Flag. This flag is kept in the Scsi_Disk + * struct and tested at open ! + * Daniel Roche ( dan@lectra.fr ) + * + * Changed to get all pages (0x3f) rather than page 1 to + * get around devices which do not have a page 1. Since + * we're only interested in the header anyway, this should + * be fine. + * -- Matthew Dharm (mdharm-scsi@one-eyed-alien.net) + */ + + memset((void *) &cmd[0], 0, 8); + cmd[0] = MODE_SENSE; + cmd[1] = (rscsi_disks[i].device->scsi_level <= SCSI_2) ? + ((rscsi_disks[i].device->lun << 5) & 0xe0) : 0; + cmd[2] = 0x3f; /* Get all pages */ + cmd[4] = 255; /* Ask for 255 bytes, even tho we want just the first 8 */ + SRpnt->sr_cmd_len = 0; + SRpnt->sr_sense_buffer[0] = 0; + SRpnt->sr_sense_buffer[2] = 0; + + /* same code as READCAPA !! */ + SRpnt->sr_data_direction = SCSI_DATA_READ; + scsi_wait_req(SRpnt, (void *) cmd, (void *) buffer, + 512, SD_TIMEOUT, MAX_RETRIES); + + the_result = SRpnt->sr_result; + + if (the_result) { + printk("%s: test WP failed, assume Write Enabled\n", nbuff); + } else { + rscsi_disks[i].write_prot = ((buffer[2] & 0x80) != 0); + printk("%s: Write Protect is %s\n", nbuff, + rscsi_disks[i].write_prot ? "on" : "off"); + } + + } /* check for write protect */ + SRpnt->sr_device->ten = 1; + SRpnt->sr_device->remap = 1; + SRpnt->sr_device->sector_size = sector_size; + /* Wake up a process waiting for device */ + scsi_release_request(SRpnt); + SRpnt = NULL; + + scsi_free(buffer, 512); + return i; +} + +/* + * The sd_init() function looks at all SCSI drives present, determines + * their size, and reads partition table entries for them. + */ + +static int sd_registered; + +static int sd_init() +{ + int i; + + if (sd_template.dev_noticed == 0) + return 0; + + if (!rscsi_disks) + sd_template.dev_max = sd_template.dev_noticed + SD_EXTRA_DEVS; + + if (sd_template.dev_max > N_SD_MAJORS * SCSI_DISKS_PER_MAJOR) + sd_template.dev_max = N_SD_MAJORS * SCSI_DISKS_PER_MAJOR; + + if (!sd_registered) { + for (i = 0; i < N_USED_SD_MAJORS; i++) { + if (devfs_register_blkdev(SD_MAJOR(i), "sd", &sd_fops)) { + printk("Unable to get major %d for SCSI disk\n", SD_MAJOR(i)); + sd_template.dev_noticed = 0; + return 1; + } + } + sd_registered++; + } + /* We do not support attaching loadable devices yet. */ + if (rscsi_disks) + return 0; + + rscsi_disks = kmalloc(sd_template.dev_max * sizeof(Scsi_Disk), GFP_ATOMIC); + if (!rscsi_disks) + goto cleanup_devfs; + memset(rscsi_disks, 0, sd_template.dev_max * sizeof(Scsi_Disk)); + + /* for every (necessary) major: */ + sd_sizes = kmalloc((sd_template.dev_max << 4) * sizeof(int), GFP_ATOMIC); + if (!sd_sizes) + goto cleanup_disks; + memset(sd_sizes, 0, (sd_template.dev_max << 4) * sizeof(int)); + + sd_blocksizes = kmalloc((sd_template.dev_max << 4) * sizeof(int), GFP_ATOMIC); + if (!sd_blocksizes) + goto cleanup_sizes; + + sd_hardsizes = kmalloc((sd_template.dev_max << 4) * sizeof(int), GFP_ATOMIC); + if (!sd_hardsizes) + goto cleanup_blocksizes; + + sd_max_sectors = kmalloc((sd_template.dev_max << 4) * sizeof(int), GFP_ATOMIC); + if (!sd_max_sectors) + goto cleanup_max_sectors; + + for (i = 0; i < sd_template.dev_max << 4; i++) { + sd_blocksizes[i] = 1024; + sd_hardsizes[i] = 512; + /* + * Allow lowlevel device drivers to generate 512k large scsi + * commands if they know what they're doing and they ask for it + * explicitly via the SHpnt->max_sectors API. + */ + sd_max_sectors[i] = MAX_SEGMENTS*8; + } + + for (i = 0; i < N_USED_SD_MAJORS; i++) { + blksize_size[SD_MAJOR(i)] = sd_blocksizes + i * (SCSI_DISKS_PER_MAJOR << 4); + hardsect_size[SD_MAJOR(i)] = sd_hardsizes + i * (SCSI_DISKS_PER_MAJOR << 4); + max_sectors[SD_MAJOR(i)] = sd_max_sectors + i * (SCSI_DISKS_PER_MAJOR << 4); + } + + sd_gendisks = kmalloc(N_USED_SD_MAJORS * sizeof(struct gendisk), GFP_ATOMIC); + if (!sd_gendisks) + goto cleanup_sd_gendisks; + for (i = 0; i < N_USED_SD_MAJORS; i++) { + sd_gendisks[i] = sd_gendisk; /* memcpy */ + sd_gendisks[i].de_arr = kmalloc (SCSI_DISKS_PER_MAJOR * sizeof *sd_gendisks[i].de_arr, + GFP_ATOMIC); + if (!sd_gendisks[i].de_arr) + goto cleanup_gendisks_de_arr; + memset (sd_gendisks[i].de_arr, 0, + SCSI_DISKS_PER_MAJOR * sizeof *sd_gendisks[i].de_arr); + sd_gendisks[i].flags = kmalloc (SCSI_DISKS_PER_MAJOR * sizeof *sd_gendisks[i].flags, + GFP_ATOMIC); + if (!sd_gendisks[i].flags) + goto cleanup_gendisks_flags; + memset (sd_gendisks[i].flags, 0, + SCSI_DISKS_PER_MAJOR * sizeof *sd_gendisks[i].flags); + sd_gendisks[i].major = SD_MAJOR(i); + sd_gendisks[i].major_name = "sd"; + sd_gendisks[i].minor_shift = 4; + sd_gendisks[i].max_p = 1 << 4; + sd_gendisks[i].part = kmalloc((SCSI_DISKS_PER_MAJOR << 4) * sizeof(struct hd_struct), + GFP_ATOMIC); + if (!sd_gendisks[i].part) + goto cleanup_gendisks_part; + memset(sd_gendisks[i].part, 0, (SCSI_DISKS_PER_MAJOR << 4) * sizeof(struct hd_struct)); + sd_gendisks[i].sizes = sd_sizes + (i * SCSI_DISKS_PER_MAJOR << 4); + sd_gendisks[i].nr_real = 0; + sd_gendisks[i].real_devices = + (void *) (rscsi_disks + i * SCSI_DISKS_PER_MAJOR); + } + + return 0; + +cleanup_gendisks_part: + kfree(sd_gendisks[i].flags); +cleanup_gendisks_flags: + kfree(sd_gendisks[i].de_arr); +cleanup_gendisks_de_arr: + while (--i >= 0 ) { + kfree(sd_gendisks[i].de_arr); + kfree(sd_gendisks[i].flags); + kfree(sd_gendisks[i].part); + } + kfree(sd_gendisks); + sd_gendisks = NULL; +cleanup_sd_gendisks: + kfree(sd_max_sectors); +cleanup_max_sectors: + kfree(sd_hardsizes); +cleanup_blocksizes: + kfree(sd_blocksizes); +cleanup_sizes: + kfree(sd_sizes); +cleanup_disks: + kfree(rscsi_disks); + rscsi_disks = NULL; +cleanup_devfs: + for (i = 0; i < N_USED_SD_MAJORS; i++) { + devfs_unregister_blkdev(SD_MAJOR(i), "sd"); + } + sd_registered--; + sd_template.dev_noticed = 0; + return 1; +} + + +static void sd_finish() +{ + int i; + + for (i = 0; i < N_USED_SD_MAJORS; i++) { + blk_dev[SD_MAJOR(i)].queue = sd_find_queue; + add_gendisk(&sd_gendisks[i]); + } + + for (i = 0; i < sd_template.dev_max; ++i) + if (!rscsi_disks[i].capacity && rscsi_disks[i].device) { + sd_init_onedisk(i); + if (!rscsi_disks[i].has_part_table) { + sd_sizes[i << 4] = rscsi_disks[i].capacity; + register_disk(&SD_GENDISK(i), MKDEV_SD(i), + 1<<4, &sd_fops, + rscsi_disks[i].capacity); + rscsi_disks[i].has_part_table = 1; + } + } + /* If our host adapter is capable of scatter-gather, then we increase + * the read-ahead to 60 blocks (120 sectors). If not, we use + * a two block (4 sector) read ahead. We can only respect this with the + * granularity of every 16 disks (one device major). + */ + for (i = 0; i < N_USED_SD_MAJORS; i++) { + read_ahead[SD_MAJOR(i)] = + (rscsi_disks[i * SCSI_DISKS_PER_MAJOR].device + && rscsi_disks[i * SCSI_DISKS_PER_MAJOR].device->host->sg_tablesize) + ? 120 /* 120 sector read-ahead */ + : 4; /* 4 sector read-ahead */ + } + + return; +} + +static int sd_detect(Scsi_Device * SDp) +{ + if (SDp->type != TYPE_DISK && SDp->type != TYPE_MOD) + return 0; + sd_template.dev_noticed++; + return 1; +} + +static int sd_attach(Scsi_Device * SDp) +{ + unsigned int devnum; + Scsi_Disk *dpnt; + int i; + char nbuff[6]; + + if (SDp->type != TYPE_DISK && SDp->type != TYPE_MOD) + return 0; + + if (sd_template.nr_dev >= sd_template.dev_max || rscsi_disks == NULL) { + SDp->attached--; + return 1; + } + for (dpnt = rscsi_disks, i = 0; i < sd_template.dev_max; i++, dpnt++) + if (!dpnt->device) + break; + + if (i >= sd_template.dev_max) { + printk(KERN_WARNING "scsi_devices corrupt (sd)," + " nr_dev %d dev_max %d\n", + sd_template.nr_dev, sd_template.dev_max); + SDp->attached--; + return 1; + } + + rscsi_disks[i].device = SDp; + rscsi_disks[i].has_part_table = 0; + sd_template.nr_dev++; + SD_GENDISK(i).nr_real++; + devnum = i % SCSI_DISKS_PER_MAJOR; + SD_GENDISK(i).de_arr[devnum] = SDp->de; + if (SDp->removable) + SD_GENDISK(i).flags[devnum] |= GENHD_FL_REMOVABLE; + sd_devname(i, nbuff); + printk("Attached scsi %sdisk %s at scsi%d, channel %d, id %d, lun %d\n", + SDp->removable ? "removable " : "", + nbuff, SDp->host->host_no, SDp->channel, SDp->id, SDp->lun); + return 0; +} + +#define DEVICE_BUSY rscsi_disks[target].device->busy +#define ALLOW_REVALIDATE rscsi_disks[target].device->allow_revalidate +#define USAGE rscsi_disks[target].device->access_count +#define CAPACITY rscsi_disks[target].capacity +#define MAYBE_REINIT sd_init_onedisk(target) + +/* This routine is called to flush all partitions and partition tables + * for a changed scsi disk, and then re-read the new partition table. + * If we are revalidating a disk because of a media change, then we + * enter with usage == 0. If we are using an ioctl, we automatically have + * usage == 1 (we need an open channel to use an ioctl :-), so this + * is our limit. + */ +int revalidate_scsidisk(kdev_t dev, int maxusage) +{ + struct gendisk *sdgd; + int target; + int max_p; + int start; + int i; + + target = DEVICE_NR(dev); + + if (DEVICE_BUSY || (ALLOW_REVALIDATE == 0 && USAGE > maxusage)) { + printk("Device busy for revalidation (usage=%d)\n", USAGE); + return -EBUSY; + } + DEVICE_BUSY = 1; + + sdgd = &SD_GENDISK(target); + max_p = sd_gendisk.max_p; + start = target << sd_gendisk.minor_shift; + + for (i = max_p - 1; i >= 0; i--) { + int index = start + i; + invalidate_device(MKDEV_SD_PARTITION(index), 1); + sdgd->part[SD_MINOR_NUMBER(index)].start_sect = 0; + sdgd->part[SD_MINOR_NUMBER(index)].nr_sects = 0; + /* + * Reset the blocksize for everything so that we can read + * the partition table. Technically we will determine the + * correct block size when we revalidate, but we do this just + * to make sure that everything remains consistent. + */ + sd_blocksizes[index] = 1024; + if (rscsi_disks[target].device->sector_size == 2048) + sd_blocksizes[index] = 2048; + else + sd_blocksizes[index] = 1024; + } + +#ifdef MAYBE_REINIT + MAYBE_REINIT; +#endif + + grok_partitions(&SD_GENDISK(target), target % SCSI_DISKS_PER_MAJOR, + 1<<4, CAPACITY); + + DEVICE_BUSY = 0; + return 0; +} + +static int fop_revalidate_scsidisk(kdev_t dev) +{ + return revalidate_scsidisk(dev, 0); +} +static void sd_detach(Scsi_Device * SDp) +{ + Scsi_Disk *dpnt; + struct gendisk *sdgd; + int i, j; + int max_p; + int start; + + if (rscsi_disks == NULL) + return; + + for (dpnt = rscsi_disks, i = 0; i < sd_template.dev_max; i++, dpnt++) + if (dpnt->device == SDp) { + + /* If we are disconnecting a disk driver, sync and invalidate + * everything */ + sdgd = &SD_GENDISK(i); + max_p = sd_gendisk.max_p; + start = i << sd_gendisk.minor_shift; + + for (j = max_p - 1; j >= 0; j--) { + int index = start + j; + invalidate_device(MKDEV_SD_PARTITION(index), 1); + sdgd->part[SD_MINOR_NUMBER(index)].start_sect = 0; + sdgd->part[SD_MINOR_NUMBER(index)].nr_sects = 0; + sd_sizes[index] = 0; + } + devfs_register_partitions (sdgd, + SD_MINOR_NUMBER (start), 1); + /* unregister_disk() */ + dpnt->has_part_table = 0; + dpnt->device = NULL; + dpnt->capacity = 0; + SDp->attached--; + sd_template.dev_noticed--; + sd_template.nr_dev--; + SD_GENDISK(i).nr_real--; + return; + } + return; +} + +static int __init init_sd(void) +{ + sd_template.module = THIS_MODULE; + return scsi_register_module(MODULE_SCSI_DEV, &sd_template); +} + +static void __exit exit_sd(void) +{ + int i; + + scsi_unregister_module(MODULE_SCSI_DEV, &sd_template); + + for (i = 0; i < N_USED_SD_MAJORS; i++) + devfs_unregister_blkdev(SD_MAJOR(i), "sd"); + + sd_registered--; + if (rscsi_disks != NULL) { + kfree(rscsi_disks); + kfree(sd_sizes); + kfree(sd_blocksizes); + kfree(sd_hardsizes); + for (i = 0; i < N_USED_SD_MAJORS; i++) { +#if 0 /* XXX aren't we forgetting to deallocate something? */ + kfree(sd_gendisks[i].de_arr); + kfree(sd_gendisks[i].flags); +#endif + kfree(sd_gendisks[i].part); + } + } + for (i = 0; i < N_USED_SD_MAJORS; i++) { + del_gendisk(&sd_gendisks[i]); + blk_size[SD_MAJOR(i)] = NULL; /* XXX blksize_size actually? */ + hardsect_size[SD_MAJOR(i)] = NULL; + read_ahead[SD_MAJOR(i)] = 0; + } + sd_template.dev_max = 0; + if (sd_gendisks != NULL) /* kfree tests for 0, but leave explicit */ + kfree(sd_gendisks); +} + +module_init(init_sd); +module_exit(exit_sd); +MODULE_LICENSE("GPL"); diff --git a/xen-2.4.16/drivers/scsi/sd.h b/xen-2.4.16/drivers/scsi/sd.h new file mode 100644 index 0000000000..8e29445839 --- /dev/null +++ b/xen-2.4.16/drivers/scsi/sd.h @@ -0,0 +1,66 @@ +/* + * sd.h Copyright (C) 1992 Drew Eckhardt + * SCSI disk driver header file by + * Drew Eckhardt + * + * + * + * Modified by Eric Youngdale eric@andante.org to + * add scatter-gather, multiple outstanding request, and other + * enhancements. + */ +#ifndef _SD_H +#define _SD_H +/* + $Header: /usr/src/linux/kernel/blk_drv/scsi/RCS/sd.h,v 1.1 1992/07/24 06:27:38 root Exp root $ + */ + +#ifndef _SCSI_H +#include "scsi.h" +#endif + +#ifndef _GENDISK_H +#include +#endif + +typedef struct scsi_disk { + unsigned capacity; /* size in blocks */ + Scsi_Device *device; + unsigned char ready; /* flag ready for FLOPTICAL */ + unsigned char write_prot; /* flag write_protect for rmvable dev */ + unsigned char sector_bit_size; /* sector_size = 2 to the bit size power */ + unsigned char sector_bit_shift; /* power of 2 sectors per FS block */ + unsigned has_part_table:1; /* has partition table */ +} Scsi_Disk; + +extern int revalidate_scsidisk(kdev_t dev, int maxusage); + +/* + * Used by pmac to find the device associated with a target. + */ +extern kdev_t sd_find_target(void *host, int tgt); + +#define N_SD_MAJORS 8 + +#define SD_MAJOR_MASK (N_SD_MAJORS - 1) + +#endif + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-indent-level: 4 + * c-brace-imaginary-offset: 0 + * c-brace-offset: -4 + * c-argdecl-indent: 4 + * c-label-offset: -4 + * c-continued-statement-offset: 4 + * c-continued-brace-offset: 0 + * indent-tabs-mode: nil + * tab-width: 8 + * End: + */ diff --git a/xen-2.4.16/include/asm-i386/apic.h b/xen-2.4.16/include/asm-i386/apic.h new file mode 100644 index 0000000000..86dd0fbc66 --- /dev/null +++ b/xen-2.4.16/include/asm-i386/apic.h @@ -0,0 +1,74 @@ +#ifndef __ASM_APIC_H +#define __ASM_APIC_H + +#include +#include +#include + +#define APIC_DEBUG 0 + +#if APIC_DEBUG +#define Dprintk(x...) printk(x) +#else +#define Dprintk(x...) +#endif + +/* + * Basic functions accessing APICs. + */ + +static __inline void apic_write(unsigned long reg, unsigned long v) +{ + *((volatile unsigned long *)(APIC_BASE+reg)) = v; +} + +static __inline void apic_write_atomic(unsigned long reg, unsigned long v) +{ + xchg((volatile unsigned long *)(APIC_BASE+reg), v); +} + +static __inline unsigned long apic_read(unsigned long reg) +{ + return *((volatile unsigned long *)(APIC_BASE+reg)); +} + +static __inline__ void apic_wait_icr_idle(void) +{ + do { } while ( apic_read( APIC_ICR ) & APIC_ICR_BUSY ); +} + +#define FORCE_READ_AROUND_WRITE 0 +#define apic_read_around(x) +#define apic_write_around(x,y) apic_write((x),(y)) + +static inline void ack_APIC_irq(void) +{ + /* + * ack_APIC_irq() actually gets compiled as a single instruction: + * - a single rmw on Pentium/82489DX + * - a single write on P6+ cores (CONFIG_X86_GOOD_APIC) + * ... yummie. + */ + + /* Docs say use 0 for future compatibility */ + apic_write_around(APIC_EOI, 0); +} + +extern int get_maxlvt(void); +extern void clear_local_APIC(void); +extern void connect_bsp_APIC (void); +extern void disconnect_bsp_APIC (void); +extern void disable_local_APIC (void); +extern int verify_local_APIC (void); +extern void cache_APIC_registers (void); +extern void sync_Arb_IDs (void); +extern void init_bsp_APIC (void); +extern void setup_local_APIC (void); +extern void init_apic_mappings (void); +extern void smp_local_timer_interrupt (struct pt_regs * regs); +extern void setup_APIC_clocks (void); +extern int APIC_init_uniprocessor (void); + +extern unsigned int apic_timer_irqs [NR_CPUS]; + +#endif /* __ASM_APIC_H */ diff --git a/xen-2.4.16/include/asm-i386/apicdef.h b/xen-2.4.16/include/asm-i386/apicdef.h new file mode 100644 index 0000000000..f855a7d88d --- /dev/null +++ b/xen-2.4.16/include/asm-i386/apicdef.h @@ -0,0 +1,363 @@ +#ifndef __ASM_APICDEF_H +#define __ASM_APICDEF_H + +/* + * Constants for various Intel APICs. (local APIC, IOAPIC, etc.) + * + * Alan Cox , 1995. + * Ingo Molnar , 1999, 2000 + */ + +#define APIC_DEFAULT_PHYS_BASE 0xfee00000 + +#define APIC_ID 0x20 +#define APIC_ID_MASK (0x0F<<24) +#define GET_APIC_ID(x) (((x)>>24)&0x0F) +#define APIC_LVR 0x30 +#define APIC_LVR_MASK 0xFF00FF +#define GET_APIC_VERSION(x) ((x)&0xFF) +#define GET_APIC_MAXLVT(x) (((x)>>16)&0xFF) +#define APIC_INTEGRATED(x) ((x)&0xF0) +#define APIC_TASKPRI 0x80 +#define APIC_TPRI_MASK 0xFF +#define APIC_ARBPRI 0x90 +#define APIC_ARBPRI_MASK 0xFF +#define APIC_PROCPRI 0xA0 +#define APIC_EOI 0xB0 +#define APIC_EIO_ACK 0x0 /* Write this to the EOI register */ +#define APIC_RRR 0xC0 +#define APIC_LDR 0xD0 +#define APIC_LDR_MASK (0xFF<<24) +#define GET_APIC_LOGICAL_ID(x) (((x)>>24)&0xFF) +#define SET_APIC_LOGICAL_ID(x) (((x)<<24)) +#define APIC_ALL_CPUS 0xFF +#define APIC_DFR 0xE0 +#define APIC_SPIV 0xF0 +#define APIC_SPIV_FOCUS_DISABLED (1<<9) +#define APIC_SPIV_APIC_ENABLED (1<<8) +#define APIC_ISR 0x100 +#define APIC_TMR 0x180 +#define APIC_IRR 0x200 +#define APIC_ESR 0x280 +#define APIC_ESR_SEND_CS 0x00001 +#define APIC_ESR_RECV_CS 0x00002 +#define APIC_ESR_SEND_ACC 0x00004 +#define APIC_ESR_RECV_ACC 0x00008 +#define APIC_ESR_SENDILL 0x00020 +#define APIC_ESR_RECVILL 0x00040 +#define APIC_ESR_ILLREGA 0x00080 +#define APIC_ICR 0x300 +#define APIC_DEST_SELF 0x40000 +#define APIC_DEST_ALLINC 0x80000 +#define APIC_DEST_ALLBUT 0xC0000 +#define APIC_ICR_RR_MASK 0x30000 +#define APIC_ICR_RR_INVALID 0x00000 +#define APIC_ICR_RR_INPROG 0x10000 +#define APIC_ICR_RR_VALID 0x20000 +#define APIC_INT_LEVELTRIG 0x08000 +#define APIC_INT_ASSERT 0x04000 +#define APIC_ICR_BUSY 0x01000 +#define APIC_DEST_LOGICAL 0x00800 +#define APIC_DM_FIXED 0x00000 +#define APIC_DM_LOWEST 0x00100 +#define APIC_DM_SMI 0x00200 +#define APIC_DM_REMRD 0x00300 +#define APIC_DM_NMI 0x00400 +#define APIC_DM_INIT 0x00500 +#define APIC_DM_STARTUP 0x00600 +#define APIC_DM_EXTINT 0x00700 +#define APIC_VECTOR_MASK 0x000FF +#define APIC_ICR2 0x310 +#define GET_APIC_DEST_FIELD(x) (((x)>>24)&0xFF) +#define SET_APIC_DEST_FIELD(x) ((x)<<24) +#define APIC_LVTT 0x320 +#define APIC_LVTPC 0x340 +#define APIC_LVT0 0x350 +#define APIC_LVT_TIMER_BASE_MASK (0x3<<18) +#define GET_APIC_TIMER_BASE(x) (((x)>>18)&0x3) +#define SET_APIC_TIMER_BASE(x) (((x)<<18)) +#define APIC_TIMER_BASE_CLKIN 0x0 +#define APIC_TIMER_BASE_TMBASE 0x1 +#define APIC_TIMER_BASE_DIV 0x2 +#define APIC_LVT_TIMER_PERIODIC (1<<17) +#define APIC_LVT_MASKED (1<<16) +#define APIC_LVT_LEVEL_TRIGGER (1<<15) +#define APIC_LVT_REMOTE_IRR (1<<14) +#define APIC_INPUT_POLARITY (1<<13) +#define APIC_SEND_PENDING (1<<12) +#define GET_APIC_DELIVERY_MODE(x) (((x)>>8)&0x7) +#define SET_APIC_DELIVERY_MODE(x,y) (((x)&~0x700)|((y)<<8)) +#define APIC_MODE_FIXED 0x0 +#define APIC_MODE_NMI 0x4 +#define APIC_MODE_EXINT 0x7 +#define APIC_LVT1 0x360 +#define APIC_LVTERR 0x370 +#define APIC_TMICT 0x380 +#define APIC_TMCCT 0x390 +#define APIC_TDCR 0x3E0 +#define APIC_TDR_DIV_TMBASE (1<<2) +#define APIC_TDR_DIV_1 0xB +#define APIC_TDR_DIV_2 0x0 +#define APIC_TDR_DIV_4 0x1 +#define APIC_TDR_DIV_8 0x2 +#define APIC_TDR_DIV_16 0x3 +#define APIC_TDR_DIV_32 0x8 +#define APIC_TDR_DIV_64 0x9 +#define APIC_TDR_DIV_128 0xA + +#define APIC_BASE (fix_to_virt(FIX_APIC_BASE)) + +#define MAX_IO_APICS 8 + +/* + * the local APIC register structure, memory mapped. Not terribly well + * tested, but we might eventually use this one in the future - the + * problem why we cannot use it right now is the P5 APIC, it has an + * errata which cannot take 8-bit reads and writes, only 32-bit ones ... + */ +#define u32 unsigned int + +#define lapic ((volatile struct local_apic *)APIC_BASE) + +struct local_apic { + +/*000*/ struct { u32 __reserved[4]; } __reserved_01; + +/*010*/ struct { u32 __reserved[4]; } __reserved_02; + +/*020*/ struct { /* APIC ID Register */ + u32 __reserved_1 : 24, + phys_apic_id : 4, + __reserved_2 : 4; + u32 __reserved[3]; + } id; + +/*030*/ const + struct { /* APIC Version Register */ + u32 version : 8, + __reserved_1 : 8, + max_lvt : 8, + __reserved_2 : 8; + u32 __reserved[3]; + } version; + +/*040*/ struct { u32 __reserved[4]; } __reserved_03; + +/*050*/ struct { u32 __reserved[4]; } __reserved_04; + +/*060*/ struct { u32 __reserved[4]; } __reserved_05; + +/*070*/ struct { u32 __reserved[4]; } __reserved_06; + +/*080*/ struct { /* Task Priority Register */ + u32 priority : 8, + __reserved_1 : 24; + u32 __reserved_2[3]; + } tpr; + +/*090*/ const + struct { /* Arbitration Priority Register */ + u32 priority : 8, + __reserved_1 : 24; + u32 __reserved_2[3]; + } apr; + +/*0A0*/ const + struct { /* Processor Priority Register */ + u32 priority : 8, + __reserved_1 : 24; + u32 __reserved_2[3]; + } ppr; + +/*0B0*/ struct { /* End Of Interrupt Register */ + u32 eoi; + u32 __reserved[3]; + } eoi; + +/*0C0*/ struct { u32 __reserved[4]; } __reserved_07; + +/*0D0*/ struct { /* Logical Destination Register */ + u32 __reserved_1 : 24, + logical_dest : 8; + u32 __reserved_2[3]; + } ldr; + +/*0E0*/ struct { /* Destination Format Register */ + u32 __reserved_1 : 28, + model : 4; + u32 __reserved_2[3]; + } dfr; + +/*0F0*/ struct { /* Spurious Interrupt Vector Register */ + u32 spurious_vector : 8, + apic_enabled : 1, + focus_cpu : 1, + __reserved_2 : 22; + u32 __reserved_3[3]; + } svr; + +/*100*/ struct { /* In Service Register */ +/*170*/ u32 bitfield; + u32 __reserved[3]; + } isr [8]; + +/*180*/ struct { /* Trigger Mode Register */ +/*1F0*/ u32 bitfield; + u32 __reserved[3]; + } tmr [8]; + +/*200*/ struct { /* Interrupt Request Register */ +/*270*/ u32 bitfield; + u32 __reserved[3]; + } irr [8]; + +/*280*/ union { /* Error Status Register */ + struct { + u32 send_cs_error : 1, + receive_cs_error : 1, + send_accept_error : 1, + receive_accept_error : 1, + __reserved_1 : 1, + send_illegal_vector : 1, + receive_illegal_vector : 1, + illegal_register_address : 1, + __reserved_2 : 24; + u32 __reserved_3[3]; + } error_bits; + struct { + u32 errors; + u32 __reserved_3[3]; + } all_errors; + } esr; + +/*290*/ struct { u32 __reserved[4]; } __reserved_08; + +/*2A0*/ struct { u32 __reserved[4]; } __reserved_09; + +/*2B0*/ struct { u32 __reserved[4]; } __reserved_10; + +/*2C0*/ struct { u32 __reserved[4]; } __reserved_11; + +/*2D0*/ struct { u32 __reserved[4]; } __reserved_12; + +/*2E0*/ struct { u32 __reserved[4]; } __reserved_13; + +/*2F0*/ struct { u32 __reserved[4]; } __reserved_14; + +/*300*/ struct { /* Interrupt Command Register 1 */ + u32 vector : 8, + delivery_mode : 3, + destination_mode : 1, + delivery_status : 1, + __reserved_1 : 1, + level : 1, + trigger : 1, + __reserved_2 : 2, + shorthand : 2, + __reserved_3 : 12; + u32 __reserved_4[3]; + } icr1; + +/*310*/ struct { /* Interrupt Command Register 2 */ + union { + u32 __reserved_1 : 24, + phys_dest : 4, + __reserved_2 : 4; + u32 __reserved_3 : 24, + logical_dest : 8; + } dest; + u32 __reserved_4[3]; + } icr2; + +/*320*/ struct { /* LVT - Timer */ + u32 vector : 8, + __reserved_1 : 4, + delivery_status : 1, + __reserved_2 : 3, + mask : 1, + timer_mode : 1, + __reserved_3 : 14; + u32 __reserved_4[3]; + } lvt_timer; + +/*330*/ struct { u32 __reserved[4]; } __reserved_15; + +/*340*/ struct { /* LVT - Performance Counter */ + u32 vector : 8, + delivery_mode : 3, + __reserved_1 : 1, + delivery_status : 1, + __reserved_2 : 3, + mask : 1, + __reserved_3 : 15; + u32 __reserved_4[3]; + } lvt_pc; + +/*350*/ struct { /* LVT - LINT0 */ + u32 vector : 8, + delivery_mode : 3, + __reserved_1 : 1, + delivery_status : 1, + polarity : 1, + remote_irr : 1, + trigger : 1, + mask : 1, + __reserved_2 : 15; + u32 __reserved_3[3]; + } lvt_lint0; + +/*360*/ struct { /* LVT - LINT1 */ + u32 vector : 8, + delivery_mode : 3, + __reserved_1 : 1, + delivery_status : 1, + polarity : 1, + remote_irr : 1, + trigger : 1, + mask : 1, + __reserved_2 : 15; + u32 __reserved_3[3]; + } lvt_lint1; + +/*370*/ struct { /* LVT - Error */ + u32 vector : 8, + __reserved_1 : 4, + delivery_status : 1, + __reserved_2 : 3, + mask : 1, + __reserved_3 : 15; + u32 __reserved_4[3]; + } lvt_error; + +/*380*/ struct { /* Timer Initial Count Register */ + u32 initial_count; + u32 __reserved_2[3]; + } timer_icr; + +/*390*/ const + struct { /* Timer Current Count Register */ + u32 curr_count; + u32 __reserved_2[3]; + } timer_ccr; + +/*3A0*/ struct { u32 __reserved[4]; } __reserved_16; + +/*3B0*/ struct { u32 __reserved[4]; } __reserved_17; + +/*3C0*/ struct { u32 __reserved[4]; } __reserved_18; + +/*3D0*/ struct { u32 __reserved[4]; } __reserved_19; + +/*3E0*/ struct { /* Timer Divide Configuration Register */ + u32 divisor : 4, + __reserved_1 : 28; + u32 __reserved_2[3]; + } timer_dcr; + +/*3F0*/ struct { u32 __reserved[4]; } __reserved_20; + +} __attribute__ ((packed)); + +#undef u32 + +#endif diff --git a/xen-2.4.16/include/asm-i386/atomic.h b/xen-2.4.16/include/asm-i386/atomic.h new file mode 100644 index 0000000000..70a1212ed6 --- /dev/null +++ b/xen-2.4.16/include/asm-i386/atomic.h @@ -0,0 +1,204 @@ +#ifndef __ARCH_I386_ATOMIC__ +#define __ARCH_I386_ATOMIC__ + +#include + +/* + * Atomic operations that C can't guarantee us. Useful for + * resource counting etc.. + */ + +#ifdef CONFIG_SMP +#define LOCK "lock ; " +#else +#define LOCK "" +#endif + +/* + * Make sure gcc doesn't try to be clever and move things around + * on us. We need to use _exactly_ the address the user gave us, + * not some alias that contains the same information. + */ +typedef struct { volatile int counter; } atomic_t; + +#define ATOMIC_INIT(i) { (i) } + +/** + * atomic_read - read atomic variable + * @v: pointer of type atomic_t + * + * Atomically reads the value of @v. Note that the guaranteed + * useful range of an atomic_t is only 24 bits. + */ +#define atomic_read(v) ((v)->counter) + +/** + * atomic_set - set atomic variable + * @v: pointer of type atomic_t + * @i: required value + * + * Atomically sets the value of @v to @i. Note that the guaranteed + * useful range of an atomic_t is only 24 bits. + */ +#define atomic_set(v,i) (((v)->counter) = (i)) + +/** + * atomic_add - add integer to atomic variable + * @i: integer value to add + * @v: pointer of type atomic_t + * + * Atomically adds @i to @v. Note that the guaranteed useful range + * of an atomic_t is only 24 bits. + */ +static __inline__ void atomic_add(int i, atomic_t *v) +{ + __asm__ __volatile__( + LOCK "addl %1,%0" + :"=m" (v->counter) + :"ir" (i), "m" (v->counter)); +} + +/** + * atomic_sub - subtract the atomic variable + * @i: integer value to subtract + * @v: pointer of type atomic_t + * + * Atomically subtracts @i from @v. Note that the guaranteed + * useful range of an atomic_t is only 24 bits. + */ +static __inline__ void atomic_sub(int i, atomic_t *v) +{ + __asm__ __volatile__( + LOCK "subl %1,%0" + :"=m" (v->counter) + :"ir" (i), "m" (v->counter)); +} + +/** + * atomic_sub_and_test - subtract value from variable and test result + * @i: integer value to subtract + * @v: pointer of type atomic_t + * + * Atomically subtracts @i from @v and returns + * true if the result is zero, or false for all + * other cases. Note that the guaranteed + * useful range of an atomic_t is only 24 bits. + */ +static __inline__ int atomic_sub_and_test(int i, atomic_t *v) +{ + unsigned char c; + + __asm__ __volatile__( + LOCK "subl %2,%0; sete %1" + :"=m" (v->counter), "=qm" (c) + :"ir" (i), "m" (v->counter) : "memory"); + return c; +} + +/** + * atomic_inc - increment atomic variable + * @v: pointer of type atomic_t + * + * Atomically increments @v by 1. Note that the guaranteed + * useful range of an atomic_t is only 24 bits. + */ +static __inline__ void atomic_inc(atomic_t *v) +{ + __asm__ __volatile__( + LOCK "incl %0" + :"=m" (v->counter) + :"m" (v->counter)); +} + +/** + * atomic_dec - decrement atomic variable + * @v: pointer of type atomic_t + * + * Atomically decrements @v by 1. Note that the guaranteed + * useful range of an atomic_t is only 24 bits. + */ +static __inline__ void atomic_dec(atomic_t *v) +{ + __asm__ __volatile__( + LOCK "decl %0" + :"=m" (v->counter) + :"m" (v->counter)); +} + +/** + * atomic_dec_and_test - decrement and test + * @v: pointer of type atomic_t + * + * Atomically decrements @v by 1 and + * returns true if the result is 0, or false for all other + * cases. Note that the guaranteed + * useful range of an atomic_t is only 24 bits. + */ +static __inline__ int atomic_dec_and_test(atomic_t *v) +{ + unsigned char c; + + __asm__ __volatile__( + LOCK "decl %0; sete %1" + :"=m" (v->counter), "=qm" (c) + :"m" (v->counter) : "memory"); + return c != 0; +} + +/** + * atomic_inc_and_test - increment and test + * @v: pointer of type atomic_t + * + * Atomically increments @v by 1 + * and returns true if the result is zero, or false for all + * other cases. Note that the guaranteed + * useful range of an atomic_t is only 24 bits. + */ +static __inline__ int atomic_inc_and_test(atomic_t *v) +{ + unsigned char c; + + __asm__ __volatile__( + LOCK "incl %0; sete %1" + :"=m" (v->counter), "=qm" (c) + :"m" (v->counter) : "memory"); + return c != 0; +} + +/** + * atomic_add_negative - add and test if negative + * @v: pointer of type atomic_t + * @i: integer value to add + * + * Atomically adds @i to @v and returns true + * if the result is negative, or false when + * result is greater than or equal to zero. Note that the guaranteed + * useful range of an atomic_t is only 24 bits. + */ +static __inline__ int atomic_add_negative(int i, atomic_t *v) +{ + unsigned char c; + + __asm__ __volatile__( + LOCK "addl %2,%0; sets %1" + :"=m" (v->counter), "=qm" (c) + :"ir" (i), "m" (v->counter) : "memory"); + return c; +} + +/* These are x86-specific, used by some header files */ +#define atomic_clear_mask(mask, addr) \ +__asm__ __volatile__(LOCK "andl %0,%1" \ +: : "r" (~(mask)),"m" (*addr) : "memory") + +#define atomic_set_mask(mask, addr) \ +__asm__ __volatile__(LOCK "orl %0,%1" \ +: : "r" (mask),"m" (*addr) : "memory") + +/* Atomic operations are already serializing on x86 */ +#define smp_mb__before_atomic_dec() barrier() +#define smp_mb__after_atomic_dec() barrier() +#define smp_mb__before_atomic_inc() barrier() +#define smp_mb__after_atomic_inc() barrier() + +#endif diff --git a/xen-2.4.16/include/asm-i386/bitops.h b/xen-2.4.16/include/asm-i386/bitops.h new file mode 100644 index 0000000000..73bcd8ef5f --- /dev/null +++ b/xen-2.4.16/include/asm-i386/bitops.h @@ -0,0 +1,368 @@ +#ifndef _I386_BITOPS_H +#define _I386_BITOPS_H + +/* + * Copyright 1992, Linus Torvalds. + */ + +#include + +/* + * These have to be done with inline assembly: that way the bit-setting + * is guaranteed to be atomic. All bit operations return 0 if the bit + * was cleared before the operation and != 0 if it was not. + * + * bit 0 is the LSB of addr; bit 32 is the LSB of (addr+1). + */ + +#ifdef CONFIG_SMP +#define LOCK_PREFIX "lock ; " +#else +#define LOCK_PREFIX "" +#endif + +#define ADDR (*(volatile long *) addr) + +/** + * set_bit - Atomically set a bit in memory + * @nr: the bit to set + * @addr: the address to start counting from + * + * This function is atomic and may not be reordered. See __set_bit() + * if you do not require the atomic guarantees. + * Note that @nr may be almost arbitrarily large; this function is not + * restricted to acting on a single-word quantity. + */ +static __inline__ void set_bit(int nr, volatile void * addr) +{ + __asm__ __volatile__( LOCK_PREFIX + "btsl %1,%0" + :"=m" (ADDR) + :"Ir" (nr)); +} + +/** + * __set_bit - Set a bit in memory + * @nr: the bit to set + * @addr: the address to start counting from + * + * Unlike set_bit(), this function is non-atomic and may be reordered. + * If it's called on the same region of memory simultaneously, the effect + * may be that only one operation succeeds. + */ +static __inline__ void __set_bit(int nr, volatile void * addr) +{ + __asm__( + "btsl %1,%0" + :"=m" (ADDR) + :"Ir" (nr)); +} + +/** + * clear_bit - Clears a bit in memory + * @nr: Bit to clear + * @addr: Address to start counting from + * + * clear_bit() is atomic and may not be reordered. However, it does + * not contain a memory barrier, so if it is used for locking purposes, + * you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit() + * in order to ensure changes are visible on other processors. + */ +static __inline__ void clear_bit(int nr, volatile void * addr) +{ + __asm__ __volatile__( LOCK_PREFIX + "btrl %1,%0" + :"=m" (ADDR) + :"Ir" (nr)); +} +#define smp_mb__before_clear_bit() barrier() +#define smp_mb__after_clear_bit() barrier() + +/** + * __change_bit - Toggle a bit in memory + * @nr: the bit to set + * @addr: the address to start counting from + * + * Unlike change_bit(), this function is non-atomic and may be reordered. + * If it's called on the same region of memory simultaneously, the effect + * may be that only one operation succeeds. + */ +static __inline__ void __change_bit(int nr, volatile void * addr) +{ + __asm__ __volatile__( + "btcl %1,%0" + :"=m" (ADDR) + :"Ir" (nr)); +} + +/** + * change_bit - Toggle a bit in memory + * @nr: Bit to clear + * @addr: Address to start counting from + * + * change_bit() is atomic and may not be reordered. + * Note that @nr may be almost arbitrarily large; this function is not + * restricted to acting on a single-word quantity. + */ +static __inline__ void change_bit(int nr, volatile void * addr) +{ + __asm__ __volatile__( LOCK_PREFIX + "btcl %1,%0" + :"=m" (ADDR) + :"Ir" (nr)); +} + +/** + * test_and_set_bit - Set a bit and return its old value + * @nr: Bit to set + * @addr: Address to count from + * + * This operation is atomic and cannot be reordered. + * It also implies a memory barrier. + */ +static __inline__ int test_and_set_bit(int nr, volatile void * addr) +{ + int oldbit; + + __asm__ __volatile__( LOCK_PREFIX + "btsl %2,%1\n\tsbbl %0,%0" + :"=r" (oldbit),"=m" (ADDR) + :"Ir" (nr) : "memory"); + return oldbit; +} + +/** + * __test_and_set_bit - Set a bit and return its old value + * @nr: Bit to set + * @addr: Address to count from + * + * This operation is non-atomic and can be reordered. + * If two examples of this operation race, one can appear to succeed + * but actually fail. You must protect multiple accesses with a lock. + */ +static __inline__ int __test_and_set_bit(int nr, volatile void * addr) +{ + int oldbit; + + __asm__( + "btsl %2,%1\n\tsbbl %0,%0" + :"=r" (oldbit),"=m" (ADDR) + :"Ir" (nr)); + return oldbit; +} + +/** + * test_and_clear_bit - Clear a bit and return its old value + * @nr: Bit to set + * @addr: Address to count from + * + * This operation is atomic and cannot be reordered. + * It also implies a memory barrier. + */ +static __inline__ int test_and_clear_bit(int nr, volatile void * addr) +{ + int oldbit; + + __asm__ __volatile__( LOCK_PREFIX + "btrl %2,%1\n\tsbbl %0,%0" + :"=r" (oldbit),"=m" (ADDR) + :"Ir" (nr) : "memory"); + return oldbit; +} + +/** + * __test_and_clear_bit - Clear a bit and return its old value + * @nr: Bit to set + * @addr: Address to count from + * + * This operation is non-atomic and can be reordered. + * If two examples of this operation race, one can appear to succeed + * but actually fail. You must protect multiple accesses with a lock. + */ +static __inline__ int __test_and_clear_bit(int nr, volatile void * addr) +{ + int oldbit; + + __asm__( + "btrl %2,%1\n\tsbbl %0,%0" + :"=r" (oldbit),"=m" (ADDR) + :"Ir" (nr)); + return oldbit; +} + +/* WARNING: non atomic and it can be reordered! */ +static __inline__ int __test_and_change_bit(int nr, volatile void * addr) +{ + int oldbit; + + __asm__ __volatile__( + "btcl %2,%1\n\tsbbl %0,%0" + :"=r" (oldbit),"=m" (ADDR) + :"Ir" (nr) : "memory"); + return oldbit; +} + +/** + * test_and_change_bit - Change a bit and return its new value + * @nr: Bit to set + * @addr: Address to count from + * + * This operation is atomic and cannot be reordered. + * It also implies a memory barrier. + */ +static __inline__ int test_and_change_bit(int nr, volatile void * addr) +{ + int oldbit; + + __asm__ __volatile__( LOCK_PREFIX + "btcl %2,%1\n\tsbbl %0,%0" + :"=r" (oldbit),"=m" (ADDR) + :"Ir" (nr) : "memory"); + return oldbit; +} + + +static __inline__ int constant_test_bit(int nr, const volatile void * addr) +{ + return ((1UL << (nr & 31)) & (((const volatile unsigned int *) addr)[nr >> 5])) != 0; +} + +static __inline__ int variable_test_bit(int nr, volatile void * addr) +{ + int oldbit; + + __asm__ __volatile__( + "btl %2,%1\n\tsbbl %0,%0" + :"=r" (oldbit) + :"m" (ADDR),"Ir" (nr)); + return oldbit; +} + +#define test_bit(nr,addr) \ +(__builtin_constant_p(nr) ? \ + constant_test_bit((nr),(addr)) : \ + variable_test_bit((nr),(addr))) + +/** + * find_first_zero_bit - find the first zero bit in a memory region + * @addr: The address to start the search at + * @size: The maximum size to search + * + * Returns the bit-number of the first zero bit, not the number of the byte + * containing a bit. + */ +static __inline__ int find_first_zero_bit(void * addr, unsigned size) +{ + int d0, d1, d2; + int res; + + if (!size) + return 0; + /* This looks at memory. Mark it volatile to tell gcc not to move it around */ + __asm__ __volatile__( + "movl $-1,%%eax\n\t" + "xorl %%edx,%%edx\n\t" + "repe; scasl\n\t" + "je 1f\n\t" + "xorl -4(%%edi),%%eax\n\t" + "subl $4,%%edi\n\t" + "bsfl %%eax,%%edx\n" + "1:\tsubl %%ebx,%%edi\n\t" + "shll $3,%%edi\n\t" + "addl %%edi,%%edx" + :"=d" (res), "=&c" (d0), "=&D" (d1), "=&a" (d2) + :"1" ((size + 31) >> 5), "2" (addr), "b" (addr)); + return res; +} + +/** + * find_next_zero_bit - find the first zero bit in a memory region + * @addr: The address to base the search on + * @offset: The bitnumber to start searching at + * @size: The maximum size to search + */ +static __inline__ int find_next_zero_bit (void * addr, int size, int offset) +{ + unsigned long * p = ((unsigned long *) addr) + (offset >> 5); + int set = 0, bit = offset & 31, res; + + if (bit) { + /* + * Look for zero in first byte + */ + __asm__("bsfl %1,%0\n\t" + "jne 1f\n\t" + "movl $32, %0\n" + "1:" + : "=r" (set) + : "r" (~(*p >> bit))); + if (set < (32 - bit)) + return set + offset; + set = 32 - bit; + p++; + } + /* + * No zero yet, search remaining full bytes for a zero + */ + res = find_first_zero_bit (p, size - 32 * (p - (unsigned long *) addr)); + return (offset + set + res); +} + +/** + * ffz - find first zero in word. + * @word: The word to search + * + * Undefined if no zero exists, so code should check against ~0UL first. + */ +static __inline__ unsigned long ffz(unsigned long word) +{ + __asm__("bsfl %1,%0" + :"=r" (word) + :"r" (~word)); + return word; +} + +/** + * ffs - find first bit set + * @x: the word to search + * + * This is defined the same way as + * the libc and compiler builtin ffs routines, therefore + * differs in spirit from the above ffz (man ffs). + */ +static __inline__ int ffs(int x) +{ + int r; + + __asm__("bsfl %1,%0\n\t" + "jnz 1f\n\t" + "movl $-1,%0\n" + "1:" : "=r" (r) : "g" (x)); + return r+1; +} + +/** + * hweightN - returns the hamming weight of a N-bit word + * @x: the word to weigh + * + * The Hamming Weight of a number is the total number of bits set in it. + */ + +#define hweight32(x) generic_hweight32(x) +#define hweight16(x) generic_hweight16(x) +#define hweight8(x) generic_hweight8(x) + +#define ext2_set_bit __test_and_set_bit +#define ext2_clear_bit __test_and_clear_bit +#define ext2_test_bit test_bit +#define ext2_find_first_zero_bit find_first_zero_bit +#define ext2_find_next_zero_bit find_next_zero_bit + +/* Bitmap functions for the minix filesystem. */ +#define minix_test_and_set_bit(nr,addr) __test_and_set_bit(nr,addr) +#define minix_set_bit(nr,addr) __set_bit(nr,addr) +#define minix_test_and_clear_bit(nr,addr) __test_and_clear_bit(nr,addr) +#define minix_test_bit(nr,addr) test_bit(nr,addr) +#define minix_find_first_zero_bit(addr,size) find_first_zero_bit(addr,size) + +#endif /* _I386_BITOPS_H */ diff --git a/xen-2.4.16/include/asm-i386/byteorder.h b/xen-2.4.16/include/asm-i386/byteorder.h new file mode 100644 index 0000000000..bbfb629fae --- /dev/null +++ b/xen-2.4.16/include/asm-i386/byteorder.h @@ -0,0 +1,47 @@ +#ifndef _I386_BYTEORDER_H +#define _I386_BYTEORDER_H + +#include + +#ifdef __GNUC__ + +/* For avoiding bswap on i386 */ +#ifdef __KERNEL__ +#include +#endif + +static __inline__ __const__ __u32 ___arch__swab32(__u32 x) +{ +#ifdef CONFIG_X86_BSWAP + __asm__("bswap %0" : "=r" (x) : "0" (x)); +#else + __asm__("xchgb %b0,%h0\n\t" /* swap lower bytes */ + "rorl $16,%0\n\t" /* swap words */ + "xchgb %b0,%h0" /* swap higher bytes */ + :"=q" (x) + : "0" (x)); +#endif + return x; +} + +static __inline__ __const__ __u16 ___arch__swab16(__u16 x) +{ + __asm__("xchgb %b0,%h0" /* swap bytes */ \ + : "=q" (x) \ + : "0" (x)); \ + return x; +} + +#define __arch__swab32(x) ___arch__swab32(x) +#define __arch__swab16(x) ___arch__swab16(x) + +#if !defined(__STRICT_ANSI__) || defined(__KERNEL__) +# define __BYTEORDER_HAS_U64__ +# define __SWAB_64_THRU_32__ +#endif + +#endif /* __GNUC__ */ + +#include + +#endif /* _I386_BYTEORDER_H */ diff --git a/xen-2.4.16/include/asm-i386/cache.h b/xen-2.4.16/include/asm-i386/cache.h new file mode 100644 index 0000000000..502c8ba7a6 --- /dev/null +++ b/xen-2.4.16/include/asm-i386/cache.h @@ -0,0 +1,13 @@ +/* + * include/asm-i386/cache.h + */ +#ifndef __ARCH_I386_CACHE_H +#define __ARCH_I386_CACHE_H + +#include + +/* L1 cache line size */ +#define L1_CACHE_SHIFT (CONFIG_X86_L1_CACHE_SHIFT) +#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT) + +#endif diff --git a/xen-2.4.16/include/asm-i386/cpufeature.h b/xen-2.4.16/include/asm-i386/cpufeature.h new file mode 100644 index 0000000000..598edbdafe --- /dev/null +++ b/xen-2.4.16/include/asm-i386/cpufeature.h @@ -0,0 +1,73 @@ +/* + * cpufeature.h + * + * Defines x86 CPU feature bits + */ + +#ifndef __ASM_I386_CPUFEATURE_H +#define __ASM_I386_CPUFEATURE_H + +/* Sample usage: CPU_FEATURE_P(cpu.x86_capability, FPU) */ +#define CPU_FEATURE_P(CAP, FEATURE) test_bit(CAP, X86_FEATURE_##FEATURE ##_BIT) + +#define NCAPINTS 4 /* Currently we have 4 32-bit words worth of info */ + +/* Intel-defined CPU features, CPUID level 0x00000001, word 0 */ +#define X86_FEATURE_FPU (0*32+ 0) /* Onboard FPU */ +#define X86_FEATURE_VME (0*32+ 1) /* Virtual Mode Extensions */ +#define X86_FEATURE_DE (0*32+ 2) /* Debugging Extensions */ +#define X86_FEATURE_PSE (0*32+ 3) /* Page Size Extensions */ +#define X86_FEATURE_TSC (0*32+ 4) /* Time Stamp Counter */ +#define X86_FEATURE_MSR (0*32+ 5) /* Model-Specific Registers, RDMSR, WRMSR */ +#define X86_FEATURE_PAE (0*32+ 6) /* Physical Address Extensions */ +#define X86_FEATURE_MCE (0*32+ 7) /* Machine Check Architecture */ +#define X86_FEATURE_CX8 (0*32+ 8) /* CMPXCHG8 instruction */ +#define X86_FEATURE_APIC (0*32+ 9) /* Onboard APIC */ +#define X86_FEATURE_SEP (0*32+11) /* SYSENTER/SYSEXIT */ +#define X86_FEATURE_MTRR (0*32+12) /* Memory Type Range Registers */ +#define X86_FEATURE_PGE (0*32+13) /* Page Global Enable */ +#define X86_FEATURE_MCA (0*32+14) /* Machine Check Architecture */ +#define X86_FEATURE_CMOV (0*32+15) /* CMOV instruction (FCMOVCC and FCOMI too if FPU present) */ +#define X86_FEATURE_PAT (0*32+16) /* Page Attribute Table */ +#define X86_FEATURE_PSE36 (0*32+17) /* 36-bit PSEs */ +#define X86_FEATURE_PN (0*32+18) /* Processor serial number */ +#define X86_FEATURE_CLFLSH (0*32+19) /* Supports the CLFLUSH instruction */ +#define X86_FEATURE_DTES (0*32+21) /* Debug Trace Store */ +#define X86_FEATURE_ACPI (0*32+22) /* ACPI via MSR */ +#define X86_FEATURE_MMX (0*32+23) /* Multimedia Extensions */ +#define X86_FEATURE_FXSR (0*32+24) /* FXSAVE and FXRSTOR instructions (fast save and restore */ + /* of FPU context), and CR4.OSFXSR available */ +#define X86_FEATURE_XMM (0*32+25) /* Streaming SIMD Extensions */ +#define X86_FEATURE_XMM2 (0*32+26) /* Streaming SIMD Extensions-2 */ +#define X86_FEATURE_SELFSNOOP (0*32+27) /* CPU self snoop */ +#define X86_FEATURE_ACC (0*32+29) /* Automatic clock control */ +#define X86_FEATURE_IA64 (0*32+30) /* IA-64 processor */ + +/* AMD-defined CPU features, CPUID level 0x80000001, word 1 */ +/* Don't duplicate feature flags which are redundant with Intel! */ +#define X86_FEATURE_SYSCALL (1*32+11) /* SYSCALL/SYSRET */ +#define X86_FEATURE_MMXEXT (1*32+22) /* AMD MMX extensions */ +#define X86_FEATURE_LM (1*32+29) /* Long Mode (x86-64) */ +#define X86_FEATURE_3DNOWEXT (1*32+30) /* AMD 3DNow! extensions */ +#define X86_FEATURE_3DNOW (1*32+31) /* 3DNow! */ + +/* Transmeta-defined CPU features, CPUID level 0x80860001, word 2 */ +#define X86_FEATURE_RECOVERY (2*32+ 0) /* CPU in recovery mode */ +#define X86_FEATURE_LONGRUN (2*32+ 1) /* Longrun power control */ +#define X86_FEATURE_LRTI (2*32+ 3) /* LongRun table interface */ + +/* Other features, Linux-defined mapping, word 3 */ +/* This range is used for feature bits which conflict or are synthesized */ +#define X86_FEATURE_CXMMX (3*32+ 0) /* Cyrix MMX extensions */ +#define X86_FEATURE_K6_MTRR (3*32+ 1) /* AMD K6 nonstandard MTRRs */ +#define X86_FEATURE_CYRIX_ARR (3*32+ 2) /* Cyrix ARRs (= MTRRs) */ +#define X86_FEATURE_CENTAUR_MCR (3*32+ 3) /* Centaur MCRs (= MTRRs) */ + +#endif /* __ASM_I386_CPUFEATURE_H */ + +/* + * Local Variables: + * mode:c + * comment-column:42 + * End: + */ diff --git a/xen-2.4.16/include/asm-i386/current.h b/xen-2.4.16/include/asm-i386/current.h new file mode 100644 index 0000000000..bc1496a2c9 --- /dev/null +++ b/xen-2.4.16/include/asm-i386/current.h @@ -0,0 +1,15 @@ +#ifndef _I386_CURRENT_H +#define _I386_CURRENT_H + +struct task_struct; + +static inline struct task_struct * get_current(void) +{ + struct task_struct *current; + __asm__("andl %%esp,%0; ":"=r" (current) : "0" (~8191UL)); + return current; + } + +#define current get_current() + +#endif /* !(_I386_CURRENT_H) */ diff --git a/xen-2.4.16/include/asm-i386/debugreg.h b/xen-2.4.16/include/asm-i386/debugreg.h new file mode 100644 index 0000000000..f0b2b06ae0 --- /dev/null +++ b/xen-2.4.16/include/asm-i386/debugreg.h @@ -0,0 +1,64 @@ +#ifndef _I386_DEBUGREG_H +#define _I386_DEBUGREG_H + + +/* Indicate the register numbers for a number of the specific + debug registers. Registers 0-3 contain the addresses we wish to trap on */ +#define DR_FIRSTADDR 0 /* u_debugreg[DR_FIRSTADDR] */ +#define DR_LASTADDR 3 /* u_debugreg[DR_LASTADDR] */ + +#define DR_STATUS 6 /* u_debugreg[DR_STATUS] */ +#define DR_CONTROL 7 /* u_debugreg[DR_CONTROL] */ + +/* Define a few things for the status register. We can use this to determine + which debugging register was responsible for the trap. The other bits + are either reserved or not of interest to us. */ + +#define DR_TRAP0 (0x1) /* db0 */ +#define DR_TRAP1 (0x2) /* db1 */ +#define DR_TRAP2 (0x4) /* db2 */ +#define DR_TRAP3 (0x8) /* db3 */ + +#define DR_STEP (0x4000) /* single-step */ +#define DR_SWITCH (0x8000) /* task switch */ + +/* Now define a bunch of things for manipulating the control register. + The top two bytes of the control register consist of 4 fields of 4 + bits - each field corresponds to one of the four debug registers, + and indicates what types of access we trap on, and how large the data + field is that we are looking at */ + +#define DR_CONTROL_SHIFT 16 /* Skip this many bits in ctl register */ +#define DR_CONTROL_SIZE 4 /* 4 control bits per register */ + +#define DR_RW_EXECUTE (0x0) /* Settings for the access types to trap on */ +#define DR_RW_WRITE (0x1) +#define DR_RW_READ (0x3) + +#define DR_LEN_1 (0x0) /* Settings for data length to trap on */ +#define DR_LEN_2 (0x4) +#define DR_LEN_4 (0xC) + +/* The low byte to the control register determine which registers are + enabled. There are 4 fields of two bits. One bit is "local", meaning + that the processor will reset the bit after a task switch and the other + is global meaning that we have to explicitly reset the bit. With linux, + you can use either one, since we explicitly zero the register when we enter + kernel mode. */ + +#define DR_LOCAL_ENABLE_SHIFT 0 /* Extra shift to the local enable bit */ +#define DR_GLOBAL_ENABLE_SHIFT 1 /* Extra shift to the global enable bit */ +#define DR_ENABLE_SIZE 2 /* 2 enable bits per register */ + +#define DR_LOCAL_ENABLE_MASK (0x55) /* Set local bits for all 4 regs */ +#define DR_GLOBAL_ENABLE_MASK (0xAA) /* Set global bits for all 4 regs */ + +/* The second byte to the control register has a few special things. + We can slow the instruction pipeline for instructions coming via the + gdt or the ldt if we want to. I am not sure why this is an advantage */ + +#define DR_CONTROL_RESERVED (0xFC00) /* Reserved by Intel */ +#define DR_LOCAL_SLOWDOWN (0x100) /* Local slow the pipeline */ +#define DR_GLOBAL_SLOWDOWN (0x200) /* Global slow the pipeline */ + +#endif diff --git a/xen-2.4.16/include/asm-i386/delay.h b/xen-2.4.16/include/asm-i386/delay.h new file mode 100644 index 0000000000..9e0adb4a27 --- /dev/null +++ b/xen-2.4.16/include/asm-i386/delay.h @@ -0,0 +1,14 @@ +#ifndef _I386_DELAY_H +#define _I386_DELAY_H + +/* + * Copyright (C) 1993 Linus Torvalds + * + * Delay routines calling functions in arch/i386/lib/delay.c + */ + +extern unsigned long ticks_per_usec; +extern void __udelay(unsigned long usecs); +#define udelay(n) __udelay(n) + +#endif /* defined(_I386_DELAY_H) */ diff --git a/xen-2.4.16/include/asm-i386/desc.h b/xen-2.4.16/include/asm-i386/desc.h new file mode 100644 index 0000000000..3855ebcb6c --- /dev/null +++ b/xen-2.4.16/include/asm-i386/desc.h @@ -0,0 +1,36 @@ +#ifndef __ARCH_DESC_H +#define __ARCH_DESC_H + +#define __FIRST_TSS_ENTRY 12 +#define __FIRST_LDT_ENTRY (__FIRST_TSS_ENTRY+1) + +#define __TSS(n) (((n)<<2) + __FIRST_TSS_ENTRY) +#define __LDT(n) (((n)<<2) + __FIRST_LDT_ENTRY) + +#ifndef __ASSEMBLY__ +struct desc_struct { + unsigned long a,b; +}; + +extern struct desc_struct gdt_table[]; +extern struct desc_struct *idt, *gdt; + +struct Xgt_desc_struct { + unsigned short size; + unsigned long address __attribute__((packed)); +}; + +#define idt_descr (*(struct Xgt_desc_struct *)((char *)&idt - 2)) +#define gdt_descr (*(struct Xgt_desc_struct *)((char *)&gdt - 2)) + +#define load_TR(n) __asm__ __volatile__("ltr %%ax"::"a" (__TSS(n)<<3)) + +#define __load_LDT(n) __asm__ __volatile__("lldt %%ax"::"a" (__LDT(n)<<3)) + +extern void set_intr_gate(unsigned int irq, void * addr); +extern void set_ldt_desc(unsigned int n, void *addr, unsigned int size); +extern void set_tss_desc(unsigned int n, void *addr); + +#endif /* !__ASSEMBLY__ */ + +#endif diff --git a/xen-2.4.16/include/asm-i386/elf.h b/xen-2.4.16/include/asm-i386/elf.h new file mode 100644 index 0000000000..ded22856d0 --- /dev/null +++ b/xen-2.4.16/include/asm-i386/elf.h @@ -0,0 +1,233 @@ +/* + * GRUB -- GRand Unified Bootloader + * Copyright (C) 1996 Erich Boleyn + * Copyright (C) 2001 Free Software Foundation, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +/* 32-bit data types */ + +typedef unsigned long Elf32_Addr; +typedef unsigned short Elf32_Half; +typedef unsigned long Elf32_Off; +typedef signed long Elf32_Sword; +typedef unsigned long Elf32_Word; +/* "unsigned char" already exists */ + +/* ELF header */ +typedef struct +{ + +#define EI_NIDENT 16 + + /* first four characters are defined below */ +#define EI_MAG0 0 +#define ELFMAG0 0x7f +#define EI_MAG1 1 +#define ELFMAG1 'E' +#define EI_MAG2 2 +#define ELFMAG2 'L' +#define EI_MAG3 3 +#define ELFMAG3 'F' + +#define EI_CLASS 4 /* data sizes */ +#define ELFCLASS32 1 /* i386 -- up to 32-bit data sizes present */ + +#define EI_DATA 5 /* data type and ordering */ +#define ELFDATA2LSB 1 /* i386 -- LSB 2's complement */ + +#define EI_VERSION 6 /* version number. "e_version" must be the same */ +#define EV_CURRENT 1 /* current version number */ + +#define EI_PAD 7 /* from here in is just padding */ + +#define EI_BRAND 8 /* start of OS branding (This is + obviously illegal against the ELF + standard.) */ + + unsigned char e_ident[EI_NIDENT]; /* basic identification block */ + +#define ET_EXEC 2 /* we only care about executable types */ + Elf32_Half e_type; /* file types */ + +#define EM_386 3 /* i386 -- obviously use this one */ + Elf32_Half e_machine; /* machine types */ + Elf32_Word e_version; /* use same as "EI_VERSION" above */ + Elf32_Addr e_entry; /* entry point of the program */ + Elf32_Off e_phoff; /* program header table file offset */ + Elf32_Off e_shoff; /* section header table file offset */ + Elf32_Word e_flags; /* flags */ + Elf32_Half e_ehsize; /* elf header size in bytes */ + Elf32_Half e_phentsize; /* program header entry size */ + Elf32_Half e_phnum; /* number of entries in program header */ + Elf32_Half e_shentsize; /* section header entry size */ + Elf32_Half e_shnum; /* number of entries in section header */ + +#define SHN_UNDEF 0 +#define SHN_LORESERVE 0xff00 +#define SHN_LOPROC 0xff00 +#define SHN_HIPROC 0xff1f +#define SHN_ABS 0xfff1 +#define SHN_COMMON 0xfff2 +#define SHN_HIRESERVE 0xffff + Elf32_Half e_shstrndx; /* section header table index */ +} +Elf32_Ehdr; + + +#define BOOTABLE_I386_ELF(h) \ + ((h.e_ident[EI_MAG0] == ELFMAG0) & (h.e_ident[EI_MAG1] == ELFMAG1) \ + & (h.e_ident[EI_MAG2] == ELFMAG2) & (h.e_ident[EI_MAG3] == ELFMAG3) \ + & (h.e_ident[EI_CLASS] == ELFCLASS32) & (h.e_ident[EI_DATA] == ELFDATA2LSB) \ + & (h.e_ident[EI_VERSION] == EV_CURRENT) & (h.e_type == ET_EXEC) \ + & (h.e_machine == EM_386) & (h.e_version == EV_CURRENT)) + +/* section table - ? */ +typedef struct +{ + Elf32_Word sh_name; /* Section name (string tbl index) */ + Elf32_Word sh_type; /* Section type */ + Elf32_Word sh_flags; /* Section flags */ + Elf32_Addr sh_addr; /* Section virtual addr at execution */ + Elf32_Off sh_offset; /* Section file offset */ + Elf32_Word sh_size; /* Section size in bytes */ + Elf32_Word sh_link; /* Link to another section */ + Elf32_Word sh_info; /* Additional section information */ + Elf32_Word sh_addralign; /* Section alignment */ + Elf32_Word sh_entsize; /* Entry size if section holds table */ +} +Elf32_Shdr; + +/* symbol table - page 4-25, figure 4-15 */ +typedef struct +{ + Elf32_Word st_name; + Elf32_Addr st_value; + Elf32_Word st_size; + unsigned char st_info; + unsigned char st_other; + Elf32_Half st_shndx; +} +Elf32_Sym; + +/* symbol type and binding attributes - page 4-26 */ + +#define ELF32_ST_BIND(i) ((i) >> 4) +#define ELF32_ST_TYPE(i) ((i) & 0xf) +#define ELF32_ST_INFO(b,t) (((b)<<4)+((t)&0xf)) + +/* symbol binding - page 4-26, figure 4-16 */ + +#define STB_LOCAL 0 +#define STB_GLOBAL 1 +#define STB_WEAK 2 +#define STB_LOPROC 13 +#define STB_HIPROC 15 + +/* symbol types - page 4-28, figure 4-17 */ + +#define STT_NOTYPE 0 +#define STT_OBJECT 1 +#define STT_FUNC 2 +#define STT_SECTION 3 +#define STT_FILE 4 +#define STT_LOPROC 13 +#define STT_HIPROC 15 + + +/* Macros to split/combine relocation type and symbol page 4-32 */ + +#define ELF32_R_SYM(__i) ((__i)>>8) +#define ELF32_R_TYPE(__i) ((unsigned char) (__i)) +#define ELF32_R_INFO(__s, __t) (((__s)<<8) + (unsigned char) (__t)) + + +/* program header - page 5-2, figure 5-1 */ + +typedef struct +{ + Elf32_Word p_type; + Elf32_Off p_offset; + Elf32_Addr p_vaddr; + Elf32_Addr p_paddr; + Elf32_Word p_filesz; + Elf32_Word p_memsz; + Elf32_Word p_flags; + Elf32_Word p_align; +} +Elf32_Phdr; + +/* segment types - page 5-3, figure 5-2 */ + +#define PT_NULL 0 +#define PT_LOAD 1 +#define PT_DYNAMIC 2 +#define PT_INTERP 3 +#define PT_NOTE 4 +#define PT_SHLIB 5 +#define PT_PHDR 6 + +#define PT_LOPROC 0x70000000 +#define PT_HIPROC 0x7fffffff + +/* segment permissions - page 5-6 */ + +#define PF_X 0x1 +#define PF_W 0x2 +#define PF_R 0x4 +#define PF_MASKPROC 0xf0000000 + + +/* dynamic structure - page 5-15, figure 5-9 */ + +typedef struct +{ + Elf32_Sword d_tag; + union + { + Elf32_Word d_val; + Elf32_Addr d_ptr; + } + d_un; +} +Elf32_Dyn; + +/* Dynamic array tags - page 5-16, figure 5-10. */ + +#define DT_NULL 0 +#define DT_NEEDED 1 +#define DT_PLTRELSZ 2 +#define DT_PLTGOT 3 +#define DT_HASH 4 +#define DT_STRTAB 5 +#define DT_SYMTAB 6 +#define DT_RELA 7 +#define DT_RELASZ 8 +#define DT_RELAENT 9 +#define DT_STRSZ 10 +#define DT_SYMENT 11 +#define DT_INIT 12 +#define DT_FINI 13 +#define DT_SONAME 14 +#define DT_RPATH 15 +#define DT_SYMBOLIC 16 +#define DT_REL 17 +#define DT_RELSZ 18 +#define DT_RELENT 19 +#define DT_PLTREL 20 +#define DT_DEBUG 21 +#define DT_TEXTREL 22 +#define DT_JMPREL 23 diff --git a/xen-2.4.16/include/asm-i386/fixmap.h b/xen-2.4.16/include/asm-i386/fixmap.h new file mode 100644 index 0000000000..b0f455a5af --- /dev/null +++ b/xen-2.4.16/include/asm-i386/fixmap.h @@ -0,0 +1,107 @@ +/* + * fixmap.h: compile-time virtual memory allocation + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 1998 Ingo Molnar + * + * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999 + */ + +#ifndef _ASM_FIXMAP_H +#define _ASM_FIXMAP_H + +#include +#include +#include + +/* + * Here we define all the compile-time 'special' virtual + * addresses. The point is to have a constant address at + * compile time, but to set the physical address only + * in the boot process. We allocate these special addresses + * from the end of virtual memory (0xfffff000) backwards. + * Also this lets us do fail-safe vmalloc(), we + * can guarantee that these special addresses and + * vmalloc()-ed addresses never overlap. + * + * these 'compile-time allocated' memory buffers are + * fixed-size 4k pages. (or larger if used with an increment + * highger than 1) use fixmap_set(idx,phys) to associate + * physical memory with fixmap indices. + * + * TLB entries of such buffers will not be flushed across + * task switches. + */ + +/* + * on UP currently we will have no trace of the fixmap mechanizm, + * no page table allocations, etc. This might change in the + * future, say framebuffers for the console driver(s) could be + * fix-mapped? + */ +enum fixed_addresses { +#ifdef CONFIG_X86_LOCAL_APIC + FIX_APIC_BASE, /* local (CPU) APIC) -- required for SMP or not */ +#endif +#ifdef CONFIG_X86_IO_APIC + FIX_IO_APIC_BASE_0, + FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS-1, +#endif +#ifdef CONFIG_HIGHMEM + FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */ + FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1, +#endif + __end_of_fixed_addresses +}; + +extern void __set_fixmap (enum fixed_addresses idx, + l1_pgentry_t entry); + +#define set_fixmap(idx, phys) \ + __set_fixmap(idx, mk_l1_pgentry(phys|PAGE_HYPERVISOR)) +/* + * Some hardware wants to get fixmapped without caching. + */ +#define set_fixmap_nocache(idx, phys) \ + __set_fixmap(idx, mk_l1_pgentry(phys|PAGE_HYPERVISOR_NOCACHE)) +/* + * used by vmalloc.c. + * + * Leave one empty page between vmalloc'ed areas and + * the start of the fixmap, and leave one page empty + * at the top of mem.. + */ +#define FIXADDR_TOP (0xffffe000UL) +#define FIXADDR_SIZE (__end_of_fixed_addresses << PAGE_SHIFT) +#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE) + +#define __fix_to_virt(x) (FIXADDR_TOP - ((x) << PAGE_SHIFT)) + +extern void __this_fixmap_does_not_exist(void); + +/* + * 'index to address' translation. If anyone tries to use the idx + * directly without tranlation, we catch the bug with a NULL-deference + * kernel oops. Illegal ranges of incoming indices are caught too. + */ +static inline unsigned long fix_to_virt(const unsigned int idx) +{ + /* + * this branch gets completely eliminated after inlining, + * except when someone tries to use fixaddr indices in an + * illegal way. (such as mixing up address types or using + * out-of-range indices). + * + * If it doesn't get removed, the linker will complain + * loudly with a reasonably clear error message.. + */ + if (idx >= __end_of_fixed_addresses) + __this_fixmap_does_not_exist(); + + return __fix_to_virt(idx); +} + +#endif diff --git a/xen-2.4.16/include/asm-i386/hardirq.h b/xen-2.4.16/include/asm-i386/hardirq.h new file mode 100644 index 0000000000..bad529b882 --- /dev/null +++ b/xen-2.4.16/include/asm-i386/hardirq.h @@ -0,0 +1,88 @@ +#ifndef __ASM_HARDIRQ_H +#define __ASM_HARDIRQ_H + +#include +#include + +/* assembly code in softirq.h is sensitive to the offsets of these fields */ +typedef struct { + unsigned int __softirq_pending; + unsigned int __local_irq_count; + unsigned int __local_bh_count; + unsigned int __syscall_count; +} ____cacheline_aligned irq_cpustat_t; + +#include /* Standard mappings for irq_cpustat_t above */ + +/* + * Are we in an interrupt context? Either doing bottom half + * or hardware interrupt processing? + */ +#define in_interrupt() ({ int __cpu = smp_processor_id(); \ + (local_irq_count(__cpu) + local_bh_count(__cpu) != 0); }) + +#define in_irq() (local_irq_count(smp_processor_id()) != 0) + +#ifndef CONFIG_SMP + +#define hardirq_trylock(cpu) (local_irq_count(cpu) == 0) +#define hardirq_endlock(cpu) do { } while (0) + +#define irq_enter(cpu, irq) (local_irq_count(cpu)++) +#define irq_exit(cpu, irq) (local_irq_count(cpu)--) + +#define synchronize_irq() barrier() + +#else + +#include +#include + +extern unsigned char global_irq_holder; +extern unsigned volatile long global_irq_lock; /* long for set_bit -RR */ + +static inline int irqs_running (void) +{ + int i; + + for (i = 0; i < smp_num_cpus; i++) + if (local_irq_count(i)) + return 1; + return 0; +} + +static inline void release_irqlock(int cpu) +{ + /* if we didn't own the irq lock, just ignore.. */ + if (global_irq_holder == (unsigned char) cpu) { + global_irq_holder = 0xff; + clear_bit(0,&global_irq_lock); + } +} + +static inline void irq_enter(int cpu, int irq) +{ + ++local_irq_count(cpu); + + while (test_bit(0,&global_irq_lock)) { + cpu_relax(); + } +} + +static inline void irq_exit(int cpu, int irq) +{ + --local_irq_count(cpu); +} + +static inline int hardirq_trylock(int cpu) +{ + return !local_irq_count(cpu) && !test_bit(0,&global_irq_lock); +} + +#define hardirq_endlock(cpu) do { } while (0) + +extern void synchronize_irq(void); + +#endif /* CONFIG_SMP */ + +#endif /* __ASM_HARDIRQ_H */ diff --git a/xen-2.4.16/include/asm-i386/hdreg.h b/xen-2.4.16/include/asm-i386/hdreg.h new file mode 100644 index 0000000000..1ad5c07394 --- /dev/null +++ b/xen-2.4.16/include/asm-i386/hdreg.h @@ -0,0 +1,12 @@ +/* + * linux/include/asm-i386/hdreg.h + * + * Copyright (C) 1994-1996 Linus Torvalds & authors + */ + +#ifndef __ASMi386_HDREG_H +#define __ASMi386_HDREG_H + +typedef unsigned short ide_ioreg_t; + +#endif /* __ASMi386_HDREG_H */ diff --git a/xen-2.4.16/include/asm-i386/i387.h b/xen-2.4.16/include/asm-i386/i387.h new file mode 100644 index 0000000000..7ec679d446 --- /dev/null +++ b/xen-2.4.16/include/asm-i386/i387.h @@ -0,0 +1,39 @@ +/* + * include/asm-i386/i387.h + * + * Copyright (C) 1994 Linus Torvalds + * + * Pentium III FXSR, SSE support + * General FPU state handling cleanups + * Gareth Hughes , May 2000 + */ + +#ifndef __ASM_I386_I387_H +#define __ASM_I386_I387_H + +#include +#include + +extern void init_fpu(void); +extern void save_init_fpu( struct task_struct *tsk ); +extern void restore_fpu( struct task_struct *tsk ); + +#define unlazy_fpu( tsk ) do { \ + if ( tsk->flags & PF_USEDFPU ) \ + save_init_fpu( tsk ); \ +} while (0) + +#define clear_fpu( tsk ) do { \ + if ( tsk->flags & PF_USEDFPU ) { \ + asm volatile("fwait"); \ + tsk->flags &= ~PF_USEDFPU; \ + stts(); \ + } \ +} while (0) + +#define load_mxcsr( val ) do { \ + unsigned long __mxcsr = ((unsigned long)(val) & 0xffbf); \ + asm volatile( "ldmxcsr %0" : : "m" (__mxcsr) ); \ +} while (0) + +#endif /* __ASM_I386_I387_H */ diff --git a/xen-2.4.16/include/asm-i386/ide.h b/xen-2.4.16/include/asm-i386/ide.h new file mode 100644 index 0000000000..6642abf467 --- /dev/null +++ b/xen-2.4.16/include/asm-i386/ide.h @@ -0,0 +1,128 @@ +/* + * linux/include/asm-i386/ide.h + * + * Copyright (C) 1994-1996 Linus Torvalds & authors + */ + +/* + * This file contains the i386 architecture specific IDE code. + */ + +#ifndef __ASMi386_IDE_H +#define __ASMi386_IDE_H + +#ifdef __KERNEL__ + +#include + +#ifndef MAX_HWIFS +# ifdef CONFIG_BLK_DEV_IDEPCI +#define MAX_HWIFS 10 +# else +#define MAX_HWIFS 6 +# endif +#endif + +#define ide__sti() __sti() + +static __inline__ int ide_default_irq(ide_ioreg_t base) +{ + switch (base) { + case 0x1f0: return 14; + case 0x170: return 15; + case 0x1e8: return 11; + case 0x168: return 10; + case 0x1e0: return 8; + case 0x160: return 12; + default: + return 0; + } +} + +static __inline__ ide_ioreg_t ide_default_io_base(int index) +{ + switch (index) { + case 0: return 0x1f0; + case 1: return 0x170; + case 2: return 0x1e8; + case 3: return 0x168; + case 4: return 0x1e0; + case 5: return 0x160; + default: + return 0; + } +} + +static __inline__ void ide_init_hwif_ports(hw_regs_t *hw, ide_ioreg_t data_port, ide_ioreg_t ctrl_port, int *irq) +{ + ide_ioreg_t reg = data_port; + int i; + + for (i = IDE_DATA_OFFSET; i <= IDE_STATUS_OFFSET; i++) { + hw->io_ports[i] = reg; + reg += 1; + } + if (ctrl_port) { + hw->io_ports[IDE_CONTROL_OFFSET] = ctrl_port; + } else { + hw->io_ports[IDE_CONTROL_OFFSET] = hw->io_ports[IDE_DATA_OFFSET] + 0x206; + } + if (irq != NULL) + *irq = 0; + hw->io_ports[IDE_IRQ_OFFSET] = 0; +} + +static __inline__ void ide_init_default_hwifs(void) +{ +#ifndef CONFIG_BLK_DEV_IDEPCI + hw_regs_t hw; + int index; + + for(index = 0; index < MAX_HWIFS; index++) { + ide_init_hwif_ports(&hw, ide_default_io_base(index), 0, NULL); + hw.irq = ide_default_irq(ide_default_io_base(index)); + ide_register_hw(&hw, NULL); + } +#endif /* CONFIG_BLK_DEV_IDEPCI */ +} + +typedef union { + unsigned all : 8; /* all of the bits together */ + struct { + unsigned head : 4; /* always zeros here */ + unsigned unit : 1; /* drive select number, 0 or 1 */ + unsigned bit5 : 1; /* always 1 */ + unsigned lba : 1; /* using LBA instead of CHS */ + unsigned bit7 : 1; /* always 1 */ + } b; +} select_t; + +typedef union { + unsigned all : 8; /* all of the bits together */ + struct { + unsigned bit0 : 1; + unsigned nIEN : 1; /* device INTRQ to host */ + unsigned SRST : 1; /* host soft reset bit */ + unsigned bit3 : 1; /* ATA-2 thingy */ + unsigned reserved456 : 3; + unsigned HOB : 1; /* 48-bit address ordering */ + } b; +} control_t; + +#define ide_request_irq(irq,hand,flg,dev,id) request_irq((irq),(hand),(flg),(dev),(id)) +#define ide_free_irq(irq,dev_id) free_irq((irq), (dev_id)) +#define ide_check_region(from,extent) check_region((from), (extent)) +#define ide_request_region(from,extent,name) request_region((from), (extent), (name)) +#define ide_release_region(from,extent) release_region((from), (extent)) + +/* + * The following are not needed for the non-m68k ports + */ +#define ide_ack_intr(hwif) (1) +#define ide_fix_driveid(id) do {} while (0) +#define ide_release_lock(lock) do {} while (0) +#define ide_get_lock(lock, hdlr, data) do {} while (0) + +#endif /* __KERNEL__ */ + +#endif /* __ASMi386_IDE_H */ diff --git a/xen-2.4.16/include/asm-i386/io.h b/xen-2.4.16/include/asm-i386/io.h new file mode 100644 index 0000000000..9b54ae278d --- /dev/null +++ b/xen-2.4.16/include/asm-i386/io.h @@ -0,0 +1,253 @@ +#ifndef _ASM_IO_H +#define _ASM_IO_H + +#include +#include + +#define IO_SPACE_LIMIT 0xffff + +/*#include */ + +/* + * Temporary debugging check to catch old code using + * unmapped ISA addresses. Will be removed in 2.4. + */ +#if CONFIG_DEBUG_IOVIRT + extern void *__io_virt_debug(unsigned long x, const char *file, int line); + extern unsigned long __io_phys_debug(unsigned long x, const char *file, int line); + #define __io_virt(x) __io_virt_debug((unsigned long)(x), __FILE__, __LINE__) +//#define __io_phys(x) __io_phys_debug((unsigned long)(x), __FILE__, __LINE__) +#else + #define __io_virt(x) ((void *)(x)) +//#define __io_phys(x) __pa(x) +#endif + +/* + * Change virtual addresses to physical addresses and vv. + * These are pretty trivial + */ +static inline unsigned long virt_to_phys(volatile void * address) +{ + return __pa(address); +} + +static inline void * phys_to_virt(unsigned long address) +{ + return __va(address); +} + +/* + * Change "struct page" to physical address. + */ +#define page_to_phys(page) ((page - frame_table) << PAGE_SHIFT) + +extern void * __ioremap(unsigned long offset, unsigned long size, unsigned long flags); + +static inline void * ioremap (unsigned long offset, unsigned long size) +{ + return __ioremap(offset, size, 0); +} + +/* + * This one maps high address device memory and turns off caching for that area. + * it's useful if some control registers are in such an area and write combining + * or read caching is not desirable: + */ +static inline void * ioremap_nocache (unsigned long offset, unsigned long size) +{ + return __ioremap(offset, size, _PAGE_PCD); +} + +extern void iounmap(void *addr); + +/* + * IO bus memory addresses are also 1:1 with the physical address + */ +#define virt_to_bus virt_to_phys +#define bus_to_virt phys_to_virt +#define page_to_bus page_to_phys + +/* + * readX/writeX() are used to access memory mapped devices. On some + * architectures the memory mapped IO stuff needs to be accessed + * differently. On the x86 architecture, we just read/write the + * memory location directly. + */ + +#define readb(addr) (*(volatile unsigned char *) __io_virt(addr)) +#define readw(addr) (*(volatile unsigned short *) __io_virt(addr)) +#define readl(addr) (*(volatile unsigned int *) __io_virt(addr)) +#define __raw_readb readb +#define __raw_readw readw +#define __raw_readl readl + +#define writeb(b,addr) (*(volatile unsigned char *) __io_virt(addr) = (b)) +#define writew(b,addr) (*(volatile unsigned short *) __io_virt(addr) = (b)) +#define writel(b,addr) (*(volatile unsigned int *) __io_virt(addr) = (b)) +#define __raw_writeb writeb +#define __raw_writew writew +#define __raw_writel writel + +#define memset_io(a,b,c) memset(__io_virt(a),(b),(c)) +#define memcpy_fromio(a,b,c) memcpy((a),__io_virt(b),(c)) +#define memcpy_toio(a,b,c) memcpy(__io_virt(a),(b),(c)) + +/* + * ISA space is 'always mapped' on a typical x86 system, no need to + * explicitly ioremap() it. The fact that the ISA IO space is mapped + * to PAGE_OFFSET is pure coincidence - it does not mean ISA values + * are physical addresses. The following constant pointer can be + * used as the IO-area pointer (it can be iounmapped as well, so the + * analogy with PCI is quite large): + */ +#define __ISA_IO_base ((char *)(PAGE_OFFSET)) + +#define isa_readb(a) readb(__ISA_IO_base + (a)) +#define isa_readw(a) readw(__ISA_IO_base + (a)) +#define isa_readl(a) readl(__ISA_IO_base + (a)) +#define isa_writeb(b,a) writeb(b,__ISA_IO_base + (a)) +#define isa_writew(w,a) writew(w,__ISA_IO_base + (a)) +#define isa_writel(l,a) writel(l,__ISA_IO_base + (a)) +#define isa_memset_io(a,b,c) memset_io(__ISA_IO_base + (a),(b),(c)) +#define isa_memcpy_fromio(a,b,c) memcpy_fromio((a),__ISA_IO_base + (b),(c)) +#define isa_memcpy_toio(a,b,c) memcpy_toio(__ISA_IO_base + (a),(b),(c)) + + +/* + * Again, i386 does not require mem IO specific function. + */ + +#define eth_io_copy_and_sum(a,b,c,d) eth_copy_and_sum((a),__io_virt(b),(c),(d)) +#define isa_eth_io_copy_and_sum(a,b,c,d) eth_copy_and_sum((a),__io_virt(__ISA_IO_base + (b)),(c),(d)) + +static inline int check_signature(unsigned long io_addr, + const unsigned char *signature, int length) +{ + int retval = 0; + do { + if (readb(io_addr) != *signature) + goto out; + io_addr++; + signature++; + length--; + } while (length); + retval = 1; +out: + return retval; +} + +static inline int isa_check_signature(unsigned long io_addr, + const unsigned char *signature, int length) +{ + int retval = 0; + do { + if (isa_readb(io_addr) != *signature) + goto out; + io_addr++; + signature++; + length--; + } while (length); + retval = 1; +out: + return retval; +} + +/* + * Cache management + * + * This needed for two cases + * 1. Out of order aware processors + * 2. Accidentally out of order processors (PPro errata #51) + */ + +#if defined(CONFIG_X86_OOSTORE) || defined(CONFIG_X86_PPRO_FENCE) + +static inline void flush_write_buffers(void) +{ + __asm__ __volatile__ ("lock; addl $0,0(%%esp)": : :"memory"); +} + +#define dma_cache_inv(_start,_size) flush_write_buffers() +#define dma_cache_wback(_start,_size) flush_write_buffers() +#define dma_cache_wback_inv(_start,_size) flush_write_buffers() + +#else + +/* Nothing to do */ + +#define dma_cache_inv(_start,_size) do { } while (0) +#define dma_cache_wback(_start,_size) do { } while (0) +#define dma_cache_wback_inv(_start,_size) do { } while (0) +#define flush_write_buffers() + +#endif + +#ifdef SLOW_IO_BY_JUMPING +#define __SLOW_DOWN_IO "\njmp 1f\n1:\tjmp 1f\n1:" +#else +#define __SLOW_DOWN_IO "\noutb %%al,$0x80" +#endif + +#ifdef REALLY_SLOW_IO +#define __FULL_SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO +#else +#define __FULL_SLOW_DOWN_IO __SLOW_DOWN_IO +#endif + + +/* + * Talk about misusing macros.. + */ +#define __OUT1(s,x) \ +static inline void out##s(unsigned x value, unsigned short port) { + +#define __OUT2(s,s1,s2) \ +__asm__ __volatile__ ("out" #s " %" s1 "0,%" s2 "1" + +#define __OUT(s,s1,x) \ +__OUT1(s,x) __OUT2(s,s1,"w") : : "a" (value), "Nd" (port)); } \ +__OUT1(s##_p,x) __OUT2(s,s1,"w") __FULL_SLOW_DOWN_IO : : "a" (value), "Nd" (port));} + +#define __IN1(s) \ +static inline RETURN_TYPE in##s(unsigned short port) { RETURN_TYPE _v; + +#define __IN2(s,s1,s2) \ +__asm__ __volatile__ ("in" #s " %" s2 "1,%" s1 "0" + +#define __IN(s,s1,i...) \ +__IN1(s) __IN2(s,s1,"w") : "=a" (_v) : "Nd" (port) ,##i ); return _v; } \ +__IN1(s##_p) __IN2(s,s1,"w") __FULL_SLOW_DOWN_IO : "=a" (_v) : "Nd" (port) ,##i ); return _v; } + +#define __INS(s) \ +static inline void ins##s(unsigned short port, void * addr, unsigned long count) \ +{ __asm__ __volatile__ ("rep ; ins" #s \ +: "=D" (addr), "=c" (count) : "d" (port),"0" (addr),"1" (count)); } + +#define __OUTS(s) \ +static inline void outs##s(unsigned short port, const void * addr, unsigned long count) \ +{ __asm__ __volatile__ ("rep ; outs" #s \ +: "=S" (addr), "=c" (count) : "d" (port),"0" (addr),"1" (count)); } + +#define RETURN_TYPE unsigned char +__IN(b,"") +#undef RETURN_TYPE +#define RETURN_TYPE unsigned short +__IN(w,"") +#undef RETURN_TYPE +#define RETURN_TYPE unsigned int +__IN(l,"") +#undef RETURN_TYPE + +__OUT(b,"b",char) +__OUT(w,"w",short) +__OUT(l,,int) + +__INS(b) +__INS(w) +__INS(l) + +__OUTS(b) +__OUTS(w) +__OUTS(l) + +#endif diff --git a/xen-2.4.16/include/asm-i386/io_apic.h b/xen-2.4.16/include/asm-i386/io_apic.h new file mode 100644 index 0000000000..a03be4d733 --- /dev/null +++ b/xen-2.4.16/include/asm-i386/io_apic.h @@ -0,0 +1,148 @@ +#ifndef __ASM_IO_APIC_H +#define __ASM_IO_APIC_H + +#include +#include + +/* + * Intel IO-APIC support for SMP and UP systems. + * + * Copyright (C) 1997, 1998, 1999, 2000 Ingo Molnar + */ + +#ifdef CONFIG_X86_IO_APIC + +#define APIC_MISMATCH_DEBUG + +#define IO_APIC_BASE(idx) \ + ((volatile int *)__fix_to_virt(FIX_IO_APIC_BASE_0 + idx)) + +/* + * The structure of the IO-APIC: + */ +struct IO_APIC_reg_00 { + __u32 __reserved_2 : 24, + ID : 4, + __reserved_1 : 4; +} __attribute__ ((packed)); + +struct IO_APIC_reg_01 { + __u32 version : 8, + __reserved_2 : 7, + PRQ : 1, + entries : 8, + __reserved_1 : 8; +} __attribute__ ((packed)); + +struct IO_APIC_reg_02 { + __u32 __reserved_2 : 24, + arbitration : 4, + __reserved_1 : 4; +} __attribute__ ((packed)); + +/* + * # of IO-APICs and # of IRQ routing registers + */ +extern int nr_ioapics; +extern int nr_ioapic_registers[MAX_IO_APICS]; + +enum ioapic_irq_destination_types { + dest_Fixed = 0, + dest_LowestPrio = 1, + dest_SMI = 2, + dest__reserved_1 = 3, + dest_NMI = 4, + dest_INIT = 5, + dest__reserved_2 = 6, + dest_ExtINT = 7 +}; + +struct IO_APIC_route_entry { + __u32 vector : 8, + delivery_mode : 3, /* 000: FIXED + * 001: lowest prio + * 111: ExtINT + */ + dest_mode : 1, /* 0: physical, 1: logical */ + delivery_status : 1, + polarity : 1, + irr : 1, + trigger : 1, /* 0: edge, 1: level */ + mask : 1, /* 0: enabled, 1: disabled */ + __reserved_2 : 15; + + union { struct { __u32 + __reserved_1 : 24, + physical_dest : 4, + __reserved_2 : 4; + } physical; + + struct { __u32 + __reserved_1 : 24, + logical_dest : 8; + } logical; + } dest; + +} __attribute__ ((packed)); + +/* + * MP-BIOS irq configuration table structures: + */ + +/* I/O APIC entries */ +extern struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS]; + +/* # of MP IRQ source entries */ +extern int mp_irq_entries; + +/* MP IRQ source entries */ +extern struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES]; + +/* non-0 if default (table-less) MP configuration */ +extern int mpc_default_type; + +static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg) +{ + *IO_APIC_BASE(apic) = reg; + return *(IO_APIC_BASE(apic)+4); +} + +static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned int value) +{ + *IO_APIC_BASE(apic) = reg; + *(IO_APIC_BASE(apic)+4) = value; +} + +/* + * Re-write a value: to be used for read-modify-write + * cycles where the read already set up the index register. + */ +static inline void io_apic_modify(unsigned int apic, unsigned int value) +{ + *(IO_APIC_BASE(apic)+4) = value; +} + +/* + * Synchronize the IO-APIC and the CPU by doing + * a dummy read from the IO-APIC + */ +static inline void io_apic_sync(unsigned int apic) +{ + (void) *(IO_APIC_BASE(apic)+4); +} + +/* 1 if "noapic" boot option passed */ +//extern int skip_ioapic_setup; +#define skip_ioapic_setup 0 + +/* + * If we use the IO-APIC for IRQ routing, disable automatic + * assignment of PCI IRQ's. + */ +#define io_apic_assign_pci_irqs (mp_irq_entries && !skip_ioapic_setup) + +#else /* !CONFIG_X86_IO_APIC */ +#define io_apic_assign_pci_irqs 0 +#endif + +#endif diff --git a/xen-2.4.16/include/asm-i386/ioctl.h b/xen-2.4.16/include/asm-i386/ioctl.h new file mode 100644 index 0000000000..c75f20ade6 --- /dev/null +++ b/xen-2.4.16/include/asm-i386/ioctl.h @@ -0,0 +1,75 @@ +/* $Id: ioctl.h,v 1.5 1993/07/19 21:53:50 root Exp root $ + * + * linux/ioctl.h for Linux by H.H. Bergman. + */ + +#ifndef _ASMI386_IOCTL_H +#define _ASMI386_IOCTL_H + +/* ioctl command encoding: 32 bits total, command in lower 16 bits, + * size of the parameter structure in the lower 14 bits of the + * upper 16 bits. + * Encoding the size of the parameter structure in the ioctl request + * is useful for catching programs compiled with old versions + * and to avoid overwriting user space outside the user buffer area. + * The highest 2 bits are reserved for indicating the ``access mode''. + * NOTE: This limits the max parameter size to 16kB -1 ! + */ + +/* + * The following is for compatibility across the various Linux + * platforms. The i386 ioctl numbering scheme doesn't really enforce + * a type field. De facto, however, the top 8 bits of the lower 16 + * bits are indeed used as a type field, so we might just as well make + * this explicit here. Please be sure to use the decoding macros + * below from now on. + */ +#define _IOC_NRBITS 8 +#define _IOC_TYPEBITS 8 +#define _IOC_SIZEBITS 14 +#define _IOC_DIRBITS 2 + +#define _IOC_NRMASK ((1 << _IOC_NRBITS)-1) +#define _IOC_TYPEMASK ((1 << _IOC_TYPEBITS)-1) +#define _IOC_SIZEMASK ((1 << _IOC_SIZEBITS)-1) +#define _IOC_DIRMASK ((1 << _IOC_DIRBITS)-1) + +#define _IOC_NRSHIFT 0 +#define _IOC_TYPESHIFT (_IOC_NRSHIFT+_IOC_NRBITS) +#define _IOC_SIZESHIFT (_IOC_TYPESHIFT+_IOC_TYPEBITS) +#define _IOC_DIRSHIFT (_IOC_SIZESHIFT+_IOC_SIZEBITS) + +/* + * Direction bits. + */ +#define _IOC_NONE 0U +#define _IOC_WRITE 1U +#define _IOC_READ 2U + +#define _IOC(dir,type,nr,size) \ + (((dir) << _IOC_DIRSHIFT) | \ + ((type) << _IOC_TYPESHIFT) | \ + ((nr) << _IOC_NRSHIFT) | \ + ((size) << _IOC_SIZESHIFT)) + +/* used to create numbers */ +#define _IO(type,nr) _IOC(_IOC_NONE,(type),(nr),0) +#define _IOR(type,nr,size) _IOC(_IOC_READ,(type),(nr),sizeof(size)) +#define _IOW(type,nr,size) _IOC(_IOC_WRITE,(type),(nr),sizeof(size)) +#define _IOWR(type,nr,size) _IOC(_IOC_READ|_IOC_WRITE,(type),(nr),sizeof(size)) + +/* used to decode ioctl numbers.. */ +#define _IOC_DIR(nr) (((nr) >> _IOC_DIRSHIFT) & _IOC_DIRMASK) +#define _IOC_TYPE(nr) (((nr) >> _IOC_TYPESHIFT) & _IOC_TYPEMASK) +#define _IOC_NR(nr) (((nr) >> _IOC_NRSHIFT) & _IOC_NRMASK) +#define _IOC_SIZE(nr) (((nr) >> _IOC_SIZESHIFT) & _IOC_SIZEMASK) + +/* ...and for the drivers/sound files... */ + +#define IOC_IN (_IOC_WRITE << _IOC_DIRSHIFT) +#define IOC_OUT (_IOC_READ << _IOC_DIRSHIFT) +#define IOC_INOUT ((_IOC_WRITE|_IOC_READ) << _IOC_DIRSHIFT) +#define IOCSIZE_MASK (_IOC_SIZEMASK << _IOC_SIZESHIFT) +#define IOCSIZE_SHIFT (_IOC_SIZESHIFT) + +#endif /* _ASMI386_IOCTL_H */ diff --git a/xen-2.4.16/include/asm-i386/irq.h b/xen-2.4.16/include/asm-i386/irq.h new file mode 100644 index 0000000000..918f828ce6 --- /dev/null +++ b/xen-2.4.16/include/asm-i386/irq.h @@ -0,0 +1,203 @@ +#ifndef _ASM_HW_IRQ_H +#define _ASM_HW_IRQ_H + +/* (C) 1992, 1993 Linus Torvalds, (C) 1997 Ingo Molnar */ + +#include +#include + +#define SA_INTERRUPT 0x20000000 +#define SA_SHIRQ 0x04000000 + +#define TIMER_IRQ 0 +/* 256 in architecture, minus 16 allocated to processor. */ +#define NR_IRQS 224 + +extern void disable_irq(unsigned int); +extern void disable_irq_nosync(unsigned int); +extern void enable_irq(unsigned int); + +/* + * IDT vectors usable for external interrupt sources start + * at 0x20: + */ +#define FIRST_EXTERNAL_VECTOR 0x20 + +#define SYSCALL_VECTOR 0x80 /* application -> OS */ +#define KDBENTER_VECTOR 0x81 /* anyone -> KDB */ +#define HYPERVISOR_CALL_VECTOR 0x82 /* OS -> monitor*/ + +/* + * Vectors 0x20-0x2f are used for ISA interrupts. + */ + +/* + * Special IRQ vectors used by the SMP architecture, 0xf0-0xff + * + * some of the following vectors are 'rare', they are merged + * into a single vector (CALL_FUNCTION_VECTOR) to save vector space. + * TLB, reschedule and local APIC vectors are performance-critical. + * + * Vectors 0xf0-0xfa are free (reserved for future Linux use). + */ +#define SPURIOUS_APIC_VECTOR 0xff +#define ERROR_APIC_VECTOR 0xfe +#define INVALIDATE_TLB_VECTOR 0xfd +#define EVENT_CHECK_VECTOR 0xfc +#define CALL_FUNCTION_VECTOR 0xfb +#define KDB_VECTOR 0xfa + +/* + * Local APIC timer IRQ vector is on a different priority level, + * to work around the 'lost local interrupt if more than 2 IRQ + * sources per level' errata. + */ +#define LOCAL_TIMER_VECTOR 0xef + +/* + * First APIC vector available to drivers: (vectors 0x30-0xee) + * we start at 0x31 to spread out vectors evenly between priority + * levels. (0x80 is the syscall vector) + */ +#define FIRST_DEVICE_VECTOR 0x31 +#define FIRST_SYSTEM_VECTOR 0xef + +extern int irq_vector[NR_IRQS]; +#define IO_APIC_VECTOR(irq) irq_vector[irq] + +/* + * Various low-level irq details needed by irq.c, process.c, + * time.c, io_apic.c and smp.c + * + * Interrupt entry/exit code at both C and assembly level + */ + +extern void mask_irq(unsigned int irq); +extern void unmask_irq(unsigned int irq); +extern void disable_8259A_irq(unsigned int irq); +extern void enable_8259A_irq(unsigned int irq); +extern int i8259A_irq_pending(unsigned int irq); +extern void make_8259A_irq(unsigned int irq); +extern void init_8259A(int aeoi); +extern void FASTCALL(send_IPI_self(int vector)); +extern void init_VISWS_APIC_irqs(void); +extern void setup_IO_APIC(void); +extern void disable_IO_APIC(void); +extern void print_IO_APIC(void); +extern int IO_APIC_get_PCI_irq_vector(int bus, int slot, int fn); +extern void send_IPI(int dest, int vector); + +extern unsigned long io_apic_irqs; + +extern atomic_t irq_err_count; +extern atomic_t irq_mis_count; + +extern char _stext, _etext; + +#define IO_APIC_IRQ(x) (((x) >= 16) || ((1<<(x)) & io_apic_irqs)) + +#define __STR(x) #x +#define STR(x) __STR(x) + +#define SAVE_ALL \ + "cld\n\t" \ + "pushl %es\n\t" \ + "pushl %ds\n\t" \ + "pushl %eax\n\t" \ + "pushl %ebp\n\t" \ + "pushl %edi\n\t" \ + "pushl %esi\n\t" \ + "pushl %edx\n\t" \ + "pushl %ecx\n\t" \ + "pushl %ebx\n\t" \ + "movl $" STR(__HYPERVISOR_DS) ",%edx\n\t" \ + "movl %edx,%ds\n\t" \ + "movl %edx,%es\n\t" + +#define IRQ_NAME2(nr) nr##_interrupt(void) +#define IRQ_NAME(nr) IRQ_NAME2(IRQ##nr) + +#define GET_CURRENT \ + "movl %esp, %ebx\n\t" \ + "andl $-8192, %ebx\n\t" + +/* + * SMP has a few special interrupts for IPI messages + */ + + /* there is a second layer of macro just to get the symbolic + name for the vector evaluated. This change is for RTLinux */ +#define BUILD_SMP_INTERRUPT(x,v) XBUILD_SMP_INTERRUPT(x,v) +#define XBUILD_SMP_INTERRUPT(x,v)\ +asmlinkage void x(void); \ +asmlinkage void call_##x(void); \ +__asm__( \ +"\n"__ALIGN_STR"\n" \ +SYMBOL_NAME_STR(x) ":\n\t" \ + "pushl $"#v"-256\n\t" \ + SAVE_ALL \ + SYMBOL_NAME_STR(call_##x)":\n\t" \ + "call "SYMBOL_NAME_STR(smp_##x)"\n\t" \ + "jmp ret_from_intr\n"); + +#define BUILD_SMP_TIMER_INTERRUPT(x,v) XBUILD_SMP_TIMER_INTERRUPT(x,v) +#define XBUILD_SMP_TIMER_INTERRUPT(x,v) \ +asmlinkage void x(struct pt_regs * regs); \ +asmlinkage void call_##x(void); \ +__asm__( \ +"\n"__ALIGN_STR"\n" \ +SYMBOL_NAME_STR(x) ":\n\t" \ + "pushl $"#v"-256\n\t" \ + SAVE_ALL \ + "movl %esp,%eax\n\t" \ + "pushl %eax\n\t" \ + SYMBOL_NAME_STR(call_##x)":\n\t" \ + "call "SYMBOL_NAME_STR(smp_##x)"\n\t" \ + "addl $4,%esp\n\t" \ + "jmp ret_from_intr\n"); + +#define BUILD_COMMON_IRQ() \ +asmlinkage void call_do_IRQ(void); \ +__asm__( \ + "\n" __ALIGN_STR"\n" \ + "common_interrupt:\n\t" \ + SAVE_ALL \ + SYMBOL_NAME_STR(call_do_IRQ)":\n\t" \ + "call " SYMBOL_NAME_STR(do_IRQ) "\n\t" \ + "jmp ret_from_intr\n"); + +/* + * subtle. orig_eax is used by the signal code to distinct between + * system calls and interrupted 'random user-space'. Thus we have + * to put a negative value into orig_eax here. (the problem is that + * both system calls and IRQs want to have small integer numbers in + * orig_eax, and the syscall code has won the optimization conflict ;) + * + * Subtle as a pigs ear. VY + */ + +#define BUILD_IRQ(nr) \ +asmlinkage void IRQ_NAME(nr); \ +__asm__( \ +"\n"__ALIGN_STR"\n" \ +SYMBOL_NAME_STR(IRQ) #nr "_interrupt:\n\t" \ + "pushl $"#nr"-256\n\t" \ + "jmp common_interrupt"); + +extern unsigned long prof_cpu_mask; +extern unsigned int * prof_buffer; +extern unsigned long prof_len; +extern unsigned long prof_shift; + +#include + +#ifdef CONFIG_SMP /*more of this file should probably be ifdefed SMP */ +static inline void hw_resend_irq(struct hw_interrupt_type *h, unsigned int i) { + if (IO_APIC_IRQ(i)) + send_IPI_self(IO_APIC_VECTOR(i)); +} +#else +static inline void hw_resend_irq(struct hw_interrupt_type *h, unsigned int i) {} +#endif + +#endif /* _ASM_HW_IRQ_H */ diff --git a/xen-2.4.16/include/asm-i386/mc146818rtc.h b/xen-2.4.16/include/asm-i386/mc146818rtc.h new file mode 100644 index 0000000000..03a4efa9e8 --- /dev/null +++ b/xen-2.4.16/include/asm-i386/mc146818rtc.h @@ -0,0 +1,113 @@ +/* + * Machine dependent access functions for RTC registers. + */ +#ifndef _ASM_MC146818RTC_H +#define _ASM_MC146818RTC_H + +#include +#include + +extern spinlock_t rtc_lock; /* serialize CMOS RAM access */ + +/********************************************************************** + * register summary + **********************************************************************/ +#define RTC_SECONDS 0 +#define RTC_SECONDS_ALARM 1 +#define RTC_MINUTES 2 +#define RTC_MINUTES_ALARM 3 +#define RTC_HOURS 4 +#define RTC_HOURS_ALARM 5 +/* RTC_*_alarm is always true if 2 MSBs are set */ +# define RTC_ALARM_DONT_CARE 0xC0 + +#define RTC_DAY_OF_WEEK 6 +#define RTC_DAY_OF_MONTH 7 +#define RTC_MONTH 8 +#define RTC_YEAR 9 + +/* control registers - Moto names + */ +#define RTC_REG_A 10 +#define RTC_REG_B 11 +#define RTC_REG_C 12 +#define RTC_REG_D 13 + +/********************************************************************** + * register details + **********************************************************************/ +#define RTC_FREQ_SELECT RTC_REG_A + +/* update-in-progress - set to "1" 244 microsecs before RTC goes off the bus, + * reset after update (may take 1.984ms @ 32768Hz RefClock) is complete, + * totalling to a max high interval of 2.228 ms. + */ +# define RTC_UIP 0x80 +# define RTC_DIV_CTL 0x70 + /* divider control: refclock values 4.194 / 1.049 MHz / 32.768 kHz */ +# define RTC_REF_CLCK_4MHZ 0x00 +# define RTC_REF_CLCK_1MHZ 0x10 +# define RTC_REF_CLCK_32KHZ 0x20 + /* 2 values for divider stage reset, others for "testing purposes only" */ +# define RTC_DIV_RESET1 0x60 +# define RTC_DIV_RESET2 0x70 + /* Periodic intr. / Square wave rate select. 0=none, 1=32.8kHz,... 15=2Hz */ +# define RTC_RATE_SELECT 0x0F + +/**********************************************************************/ +#define RTC_CONTROL RTC_REG_B +# define RTC_SET 0x80 /* disable updates for clock setting */ +# define RTC_PIE 0x40 /* periodic interrupt enable */ +# define RTC_AIE 0x20 /* alarm interrupt enable */ +# define RTC_UIE 0x10 /* update-finished interrupt enable */ +# define RTC_SQWE 0x08 /* enable square-wave output */ +# define RTC_DM_BINARY 0x04 /* all time/date values are BCD if clear */ +# define RTC_24H 0x02 /* 24 hour mode - else hours bit 7 means pm */ +# define RTC_DST_EN 0x01 /* auto switch DST - works f. USA only */ + +/**********************************************************************/ +#define RTC_INTR_FLAGS RTC_REG_C +/* caution - cleared by read */ +# define RTC_IRQF 0x80 /* any of the following 3 is active */ +# define RTC_PF 0x40 +# define RTC_AF 0x20 +# define RTC_UF 0x10 + +/**********************************************************************/ +#define RTC_VALID RTC_REG_D +# define RTC_VRT 0x80 /* valid RAM and time */ +/**********************************************************************/ + +/* example: !(CMOS_READ(RTC_CONTROL) & RTC_DM_BINARY) + * determines if the following two #defines are needed + */ +#ifndef BCD_TO_BIN +#define BCD_TO_BIN(val) ((val)=((val)&15) + ((val)>>4)*10) +#endif + +#ifndef BIN_TO_BCD +#define BIN_TO_BCD(val) ((val)=(((val)/10)<<4) + (val)%10) +#endif + + +#ifndef RTC_PORT +#define RTC_PORT(x) (0x70 + (x)) +#define RTC_ALWAYS_BCD 1 /* RTC operates in binary mode */ +#endif + +/* + * The yet supported machines all access the RTC index register via + * an ISA port access but the way to access the date register differs ... + */ +#define CMOS_READ(addr) ({ \ +outb_p((addr),RTC_PORT(0)); \ +inb_p(RTC_PORT(1)); \ +}) +#define CMOS_WRITE(val, addr) ({ \ +outb_p((addr),RTC_PORT(0)); \ +outb_p((val),RTC_PORT(1)); \ +}) + +#define RTC_IRQ 8 + +#endif /* _ASM_MC146818RTC_H */ diff --git a/xen-2.4.16/include/asm-i386/mpspec.h b/xen-2.4.16/include/asm-i386/mpspec.h new file mode 100644 index 0000000000..2598ea02db --- /dev/null +++ b/xen-2.4.16/include/asm-i386/mpspec.h @@ -0,0 +1,211 @@ +#ifndef __ASM_MPSPEC_H +#define __ASM_MPSPEC_H + +/* + * Structure definitions for SMP machines following the + * Intel Multiprocessing Specification 1.1 and 1.4. + */ + +/* + * This tag identifies where the SMP configuration + * information is. + */ + +#define SMP_MAGIC_IDENT (('_'<<24)|('P'<<16)|('M'<<8)|'_') + +/* Maximum of 16 APICs with the current APIC ID architecture. */ +#define MAX_APICS 16 + +#define MAX_MPC_ENTRY 1024 + +struct intel_mp_floating +{ + char mpf_signature[4]; /* "_MP_" */ + unsigned long mpf_physptr; /* Configuration table address */ + unsigned char mpf_length; /* Our length (paragraphs) */ + unsigned char mpf_specification;/* Specification version */ + unsigned char mpf_checksum; /* Checksum (makes sum 0) */ + unsigned char mpf_feature1; /* Standard or configuration ? */ + unsigned char mpf_feature2; /* Bit7 set for IMCR|PIC */ + unsigned char mpf_feature3; /* Unused (0) */ + unsigned char mpf_feature4; /* Unused (0) */ + unsigned char mpf_feature5; /* Unused (0) */ +}; + +struct mp_config_table +{ + char mpc_signature[4]; +#define MPC_SIGNATURE "PCMP" + unsigned short mpc_length; /* Size of table */ + char mpc_spec; /* 0x01 */ + char mpc_checksum; + char mpc_oem[8]; + char mpc_productid[12]; + unsigned long mpc_oemptr; /* 0 if not present */ + unsigned short mpc_oemsize; /* 0 if not present */ + unsigned short mpc_oemcount; + unsigned long mpc_lapic; /* APIC address */ + unsigned long reserved; +}; + +/* Followed by entries */ + +#define MP_PROCESSOR 0 +#define MP_BUS 1 +#define MP_IOAPIC 2 +#define MP_INTSRC 3 +#define MP_LINTSRC 4 +#define MP_TRANSLATION 192 /* Used by IBM NUMA-Q to describe node locality */ + +struct mpc_config_processor +{ + unsigned char mpc_type; + unsigned char mpc_apicid; /* Local APIC number */ + unsigned char mpc_apicver; /* Its versions */ + unsigned char mpc_cpuflag; +#define CPU_ENABLED 1 /* Processor is available */ +#define CPU_BOOTPROCESSOR 2 /* Processor is the BP */ + unsigned long mpc_cpufeature; +#define CPU_STEPPING_MASK 0x0F +#define CPU_MODEL_MASK 0xF0 +#define CPU_FAMILY_MASK 0xF00 + unsigned long mpc_featureflag; /* CPUID feature value */ + unsigned long mpc_reserved[2]; +}; + +struct mpc_config_bus +{ + unsigned char mpc_type; + unsigned char mpc_busid; + unsigned char mpc_bustype[6] __attribute((packed)); +}; + +/* List of Bus Type string values, Intel MP Spec. */ +#define BUSTYPE_EISA "EISA" +#define BUSTYPE_ISA "ISA" +#define BUSTYPE_INTERN "INTERN" /* Internal BUS */ +#define BUSTYPE_MCA "MCA" +#define BUSTYPE_VL "VL" /* Local bus */ +#define BUSTYPE_PCI "PCI" +#define BUSTYPE_PCMCIA "PCMCIA" +#define BUSTYPE_CBUS "CBUS" +#define BUSTYPE_CBUSII "CBUSII" +#define BUSTYPE_FUTURE "FUTURE" +#define BUSTYPE_MBI "MBI" +#define BUSTYPE_MBII "MBII" +#define BUSTYPE_MPI "MPI" +#define BUSTYPE_MPSA "MPSA" +#define BUSTYPE_NUBUS "NUBUS" +#define BUSTYPE_TC "TC" +#define BUSTYPE_VME "VME" +#define BUSTYPE_XPRESS "XPRESS" + +struct mpc_config_ioapic +{ + unsigned char mpc_type; + unsigned char mpc_apicid; + unsigned char mpc_apicver; + unsigned char mpc_flags; +#define MPC_APIC_USABLE 0x01 + unsigned long mpc_apicaddr; +}; + +struct mpc_config_intsrc +{ + unsigned char mpc_type; + unsigned char mpc_irqtype; + unsigned short mpc_irqflag; + unsigned char mpc_srcbus; + unsigned char mpc_srcbusirq; + unsigned char mpc_dstapic; + unsigned char mpc_dstirq; +}; + +enum mp_irq_source_types { + mp_INT = 0, + mp_NMI = 1, + mp_SMI = 2, + mp_ExtINT = 3 +}; + +#define MP_IRQDIR_DEFAULT 0 +#define MP_IRQDIR_HIGH 1 +#define MP_IRQDIR_LOW 3 + + +struct mpc_config_lintsrc +{ + unsigned char mpc_type; + unsigned char mpc_irqtype; + unsigned short mpc_irqflag; + unsigned char mpc_srcbusid; + unsigned char mpc_srcbusirq; + unsigned char mpc_destapic; +#define MP_APIC_ALL 0xFF + unsigned char mpc_destapiclint; +}; + +struct mp_config_oemtable +{ + char oem_signature[4]; +#define MPC_OEM_SIGNATURE "_OEM" + unsigned short oem_length; /* Size of table */ + char oem_rev; /* 0x01 */ + char oem_checksum; + char mpc_oem[8]; +}; + +struct mpc_config_translation +{ + unsigned char mpc_type; + unsigned char trans_len; + unsigned char trans_type; + unsigned char trans_quad; + unsigned char trans_global; + unsigned char trans_local; + unsigned short trans_reserved; +}; + +/* + * Default configurations + * + * 1 2 CPU ISA 82489DX + * 2 2 CPU EISA 82489DX neither IRQ 0 timer nor IRQ 13 DMA chaining + * 3 2 CPU EISA 82489DX + * 4 2 CPU MCA 82489DX + * 5 2 CPU ISA+PCI + * 6 2 CPU EISA+PCI + * 7 2 CPU MCA+PCI + */ + +#define MAX_IRQ_SOURCES 256 + +#define MAX_MP_BUSSES 32 +enum mp_bustype { + MP_BUS_ISA = 1, + MP_BUS_EISA, + MP_BUS_PCI, + MP_BUS_MCA +}; +extern int mp_bus_id_to_type [MAX_MP_BUSSES]; +extern int mp_bus_id_to_pci_bus [MAX_MP_BUSSES]; + +extern unsigned int boot_cpu_physical_apicid; +extern unsigned long phys_cpu_present_map; +extern int smp_found_config; +extern void find_smp_config (void); +extern void get_smp_config (void); +extern int nr_ioapics; +extern int apic_version [MAX_APICS]; +extern int mp_bus_id_to_type [MAX_MP_BUSSES]; +extern int mp_irq_entries; +extern struct mpc_config_intsrc mp_irqs [MAX_IRQ_SOURCES]; +extern int mpc_default_type; +extern int mp_bus_id_to_pci_bus [MAX_MP_BUSSES]; +extern int mp_current_pci_id; +extern unsigned long mp_lapic_addr; +extern int pic_mode; +extern int using_apic_timer; + +#endif + diff --git a/xen-2.4.16/include/asm-i386/msr.h b/xen-2.4.16/include/asm-i386/msr.h new file mode 100644 index 0000000000..11bcb7f29e --- /dev/null +++ b/xen-2.4.16/include/asm-i386/msr.h @@ -0,0 +1,104 @@ +#ifndef __ASM_MSR_H +#define __ASM_MSR_H + +/* + * Access to machine-specific registers (available on 586 and better only) + * Note: the rd* operations modify the parameters directly (without using + * pointer indirection), this allows gcc to optimize better + */ + +#define rdmsr(msr,val1,val2) \ + __asm__ __volatile__("rdmsr" \ + : "=a" (val1), "=d" (val2) \ + : "c" (msr)) + +#define wrmsr(msr,val1,val2) \ + __asm__ __volatile__("wrmsr" \ + : /* no outputs */ \ + : "c" (msr), "a" (val1), "d" (val2)) + +#define rdtsc(low,high) \ + __asm__ __volatile__("rdtsc" : "=a" (low), "=d" (high)) + +#define rdtscl(low) \ + __asm__ __volatile__("rdtsc" : "=a" (low) : : "edx") + +#define rdtscll(val) \ + __asm__ __volatile__("rdtsc" : "=A" (val)) + +#define write_tsc(val1,val2) wrmsr(0x10, val1, val2) + +#define rdpmc(counter,low,high) \ + __asm__ __volatile__("rdpmc" \ + : "=a" (low), "=d" (high) \ + : "c" (counter)) + +/* symbolic names for some interesting MSRs */ +/* Intel defined MSRs. */ +#define MSR_IA32_P5_MC_ADDR 0 +#define MSR_IA32_P5_MC_TYPE 1 +#define MSR_IA32_PLATFORM_ID 0x17 +#define MSR_IA32_EBL_CR_POWERON 0x2a + +#define MSR_IA32_APICBASE 0x1b +#define MSR_IA32_APICBASE_BSP (1<<8) +#define MSR_IA32_APICBASE_ENABLE (1<<11) +#define MSR_IA32_APICBASE_BASE (0xfffff<<12) + +#define MSR_IA32_UCODE_WRITE 0x79 +#define MSR_IA32_UCODE_REV 0x8b + +#define MSR_IA32_PERFCTR0 0xc1 +#define MSR_IA32_PERFCTR1 0xc2 + +#define MSR_IA32_BBL_CR_CTL 0x119 + +#define MSR_IA32_MCG_CAP 0x179 +#define MSR_IA32_MCG_STATUS 0x17a +#define MSR_IA32_MCG_CTL 0x17b + +#define MSR_IA32_EVNTSEL0 0x186 +#define MSR_IA32_EVNTSEL1 0x187 + +#define MSR_IA32_DEBUGCTLMSR 0x1d9 +#define MSR_IA32_LASTBRANCHFROMIP 0x1db +#define MSR_IA32_LASTBRANCHTOIP 0x1dc +#define MSR_IA32_LASTINTFROMIP 0x1dd +#define MSR_IA32_LASTINTTOIP 0x1de + +#define MSR_IA32_MC0_CTL 0x400 +#define MSR_IA32_MC0_STATUS 0x401 +#define MSR_IA32_MC0_ADDR 0x402 +#define MSR_IA32_MC0_MISC 0x403 + +/* AMD Defined MSRs */ +#define MSR_K6_EFER 0xC0000080 +#define MSR_K6_STAR 0xC0000081 +#define MSR_K6_WHCR 0xC0000082 +#define MSR_K6_UWCCR 0xC0000085 +#define MSR_K6_PSOR 0xC0000087 +#define MSR_K6_PFIR 0xC0000088 + +#define MSR_K7_EVNTSEL0 0xC0010000 +#define MSR_K7_PERFCTR0 0xC0010004 + +/* Centaur-Hauls/IDT defined MSRs. */ +#define MSR_IDT_FCR1 0x107 +#define MSR_IDT_FCR2 0x108 +#define MSR_IDT_FCR3 0x109 +#define MSR_IDT_FCR4 0x10a + +#define MSR_IDT_MCR0 0x110 +#define MSR_IDT_MCR1 0x111 +#define MSR_IDT_MCR2 0x112 +#define MSR_IDT_MCR3 0x113 +#define MSR_IDT_MCR4 0x114 +#define MSR_IDT_MCR5 0x115 +#define MSR_IDT_MCR6 0x116 +#define MSR_IDT_MCR7 0x117 +#define MSR_IDT_MCR_CTRL 0x120 + +/* VIA Cyrix defined MSRs*/ +#define MSR_VIA_FCR 0x1107 + +#endif /* __ASM_MSR_H */ diff --git a/xen-2.4.16/include/asm-i386/page.h b/xen-2.4.16/include/asm-i386/page.h new file mode 100644 index 0000000000..ac1304c13e --- /dev/null +++ b/xen-2.4.16/include/asm-i386/page.h @@ -0,0 +1,169 @@ +#ifndef _I386_PAGE_H +#define _I386_PAGE_H + + +#ifndef __ASSEMBLY__ +#ifdef CONFIG_DEBUG_BUGVERBOSE +extern void do_BUG(const char *file, int line); +#define BUG() do { \ + do_BUG(__FILE__, __LINE__); \ + __asm__ __volatile__("ud2"); \ +} while (0) +#else +#include +#define BUG() (panic("BUG at %s:%d\n", __FILE__, __LINE__)) +#endif +#endif /* __ASSEMBLY__ */ + + +#define L1_PAGETABLE_SHIFT 12 +#define L2_PAGETABLE_SHIFT 22 + +#define ENTRIES_PER_L1_PAGETABLE 1024 +#define ENTRIES_PER_L2_PAGETABLE 1024 + +#define PAGE_SHIFT L1_PAGETABLE_SHIFT +#define PAGE_SIZE (1UL << PAGE_SHIFT) +#define PAGE_MASK (~(PAGE_SIZE-1)) + +#define clear_page(_p) memset((void *)(_p), 0, PAGE_SIZE) +#define copy_page(_t,_f) memcpy((void *)(_t), (void *)(_f), PAGE_SIZE) + +#ifndef __ASSEMBLY__ +#include +typedef struct { unsigned long l1_lo; } l1_pgentry_t; +typedef struct { unsigned long l2_lo; } l2_pgentry_t; +typedef l1_pgentry_t *l1_pagetable_t; +typedef l2_pgentry_t *l2_pagetable_t; +typedef struct { unsigned long pt_lo; } pagetable_t; +#endif /* !__ASSEMBLY__ */ + +/* Strip type from a table entry. */ +#define l1_pgentry_val(_x) ((_x).l1_lo) +#define l2_pgentry_val(_x) ((_x).l2_lo) + +#define alloc_l1_pagetable() ((l1_pgentry_t *)get_free_page(GFP_KERNEL)) +#define alloc_l2_pagetable() ((l2_pgentry_t *)get_free_page(GFP_KERNEL)) + +#define pagetable_ptr(_x) ((l2_pagetable_t)((_x).pt_lo)) +#define pagetable_type(_x) (((_x).pt_lo) & ~PAGE_MASK) +#define mk_pagetable(_x) ( (pagetable_t) { (_x) } ) +#define pagetable_none(_x) ((_x).pt_lo == 0) + +/* Add type to a table entry. */ +#define mk_l1_pgentry(_x) ( (l1_pgentry_t) { (_x) } ) +#define mk_l2_pgentry(_x) ( (l2_pgentry_t) { (_x) } ) + +/* Turn a typed table entry into a page index. */ +#define l1_pgentry_to_pagenr(_x) (l1_pgentry_val(_x) >> PAGE_SHIFT) +#define l2_pgentry_to_pagenr(_x) (l2_pgentry_val(_x) >> PAGE_SHIFT) + +/* Dereference a typed level-2 entry to yield a typed level-1 table. */ +#define l2_pgentry_to_l1(_x) \ + ((l1_pgentry_t *)__va(l2_pgentry_val(_x) & PAGE_MASK)) + +/* Given a virtual address, get an entry offset into a page table. */ +#define l1_table_offset(_a) \ + (((_a) >> L1_PAGETABLE_SHIFT) & (ENTRIES_PER_L1_PAGETABLE - 1)) +#define l2_table_offset(_a) \ + ((_a) >> L2_PAGETABLE_SHIFT) + +/* Hypervisor table entries use zero to sugnify 'empty'. */ +#define l1_pgentry_empty(_x) (!l1_pgentry_val(_x)) +#define l2_pgentry_empty(_x) (!l2_pgentry_val(_x)) + +#define __PAGE_OFFSET (0xE0000000) +#define PAGE_OFFSET ((unsigned long)__PAGE_OFFSET) +#define __pa(x) ((unsigned long)(x)-PAGE_OFFSET) +#define __va(x) ((void *)((unsigned long)(x)+PAGE_OFFSET)) +#define page_address(_p) (__va(((_p) - frame_table) << PAGE_SHIFT)) +#define virt_to_page(kaddr) (frame_table + (__pa(kaddr) >> PAGE_SHIFT)) +#define VALID_PAGE(page) ((page - frame_table) < max_mapnr) + +/* High table entries are reserved by the hypervisor. */ +#define DOMAIN_ENTRIES_PER_L2_PAGETABLE \ + (PAGE_OFFSET >> L2_PAGETABLE_SHIFT) +#define HYPERVISOR_ENTRIES_PER_L2_PAGETABLE \ + (ENTRIES_PER_L2_PAGETABLE - DOMAIN_ENTRIES_PER_L2_PAGETABLE) + +#ifndef __ASSEMBLY__ +#include +#include +#include + +extern l2_pgentry_t idle0_pg_table[ENTRIES_PER_L2_PAGETABLE]; +extern void paging_init(void); + +#define __flush_tlb() \ + do { \ + unsigned int tmpreg; \ + \ + __asm__ __volatile__( \ + "movl %%cr3, %0; # flush TLB \n" \ + "movl %0, %%cr3; \n" \ + : "=r" (tmpreg) \ + :: "memory"); \ + } while (0) + +/* Flush global pages as well. */ +#define __flush_tlb_all() \ + do { \ + unsigned int tmpreg; \ + \ + __asm__ __volatile__( \ + "movl %1, %%cr4; # turn off PGE \n" \ + "movl %%cr3, %0; # flush TLB \n" \ + "movl %0, %%cr3; \n" \ + "movl %2, %%cr4; # turn PGE back on \n" \ + : "=&r" (tmpreg) \ + : "r" (mmu_cr4_features & ~X86_CR4_PGE), \ + "r" (mmu_cr4_features) \ + : "memory"); \ + } while (0) + +#define __flush_tlb_one(addr) \ +__asm__ __volatile__("invlpg %0": :"m" (*(char *) addr)) + +#endif /* !__ASSEMBLY__ */ + + +#define _PAGE_PRESENT 0x001 +#define _PAGE_RW 0x002 +#define _PAGE_USER 0x004 +#define _PAGE_PWT 0x008 +#define _PAGE_PCD 0x010 +#define _PAGE_ACCESSED 0x020 +#define _PAGE_DIRTY 0x040 +#define _PAGE_PAT 0x080 +#define _PAGE_PSE 0x080 +#define _PAGE_GLOBAL 0x100 + +#define __PAGE_HYPERVISOR \ + (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED) +#define __PAGE_HYPERVISOR_NOCACHE \ + (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_PCD | _PAGE_ACCESSED) +#define __PAGE_HYPERVISOR_RO \ + (_PAGE_PRESENT | _PAGE_DIRTY | _PAGE_ACCESSED) + +#define MAKE_GLOBAL(_x) ((_x) | _PAGE_GLOBAL) + +#define PAGE_HYPERVISOR MAKE_GLOBAL(__PAGE_HYPERVISOR) +#define PAGE_HYPERVISOR_RO MAKE_GLOBAL(__PAGE_HYPERVISOR_RO) +#define PAGE_HYPERVISOR_NOCACHE MAKE_GLOBAL(__PAGE_HYPERVISOR_NOCACHE) + +#ifndef __ASSEMBLY__ +static __inline__ int get_order(unsigned long size) +{ + int order; + + size = (size-1) >> (PAGE_SHIFT-1); + order = -1; + do { + size >>= 1; + order++; + } while (size); + return order; +} +#endif + +#endif /* _I386_PAGE_H */ diff --git a/xen-2.4.16/include/asm-i386/pci.h b/xen-2.4.16/include/asm-i386/pci.h new file mode 100644 index 0000000000..9ab9c282fe --- /dev/null +++ b/xen-2.4.16/include/asm-i386/pci.h @@ -0,0 +1,268 @@ +#ifndef __i386_PCI_H +#define __i386_PCI_H + +#include + +#ifdef __KERNEL__ + +/* Can be used to override the logic in pci_scan_bus for skipping + already-configured bus numbers - to be used for buggy BIOSes + or architectures with incomplete PCI setup by the loader */ + +#ifdef CONFIG_PCI +extern unsigned int pcibios_assign_all_busses(void); +#else +#define pcibios_assign_all_busses() 0 +#endif + +extern unsigned long pci_mem_start; +#define PCIBIOS_MIN_IO 0x1000 +#define PCIBIOS_MIN_MEM (pci_mem_start) + +void pcibios_set_master(struct pci_dev *dev); +void pcibios_penalize_isa_irq(int irq); +struct irq_routing_table *pcibios_get_irq_routing_table(void); +int pcibios_set_irq_routing(struct pci_dev *dev, int pin, int irq); + +/* Dynamic DMA mapping stuff. + * i386 has everything mapped statically. + */ + +#include +#include +#include +//#include +#include + +struct pci_dev; + +/* The PCI address space does equal the physical memory + * address space. The networking and block device layers use + * this boolean for bounce buffer decisions. + */ +#define PCI_DMA_BUS_IS_PHYS (1) + +/* Allocate and map kernel buffer using consistent mode DMA for a device. + * hwdev should be valid struct pci_dev pointer for PCI devices, + * NULL for PCI-like buses (ISA, EISA). + * Returns non-NULL cpu-view pointer to the buffer if successful and + * sets *dma_addrp to the pci side dma address as well, else *dma_addrp + * is undefined. + */ +extern void *pci_alloc_consistent(struct pci_dev *hwdev, size_t size, + dma_addr_t *dma_handle); + +/* Free and unmap a consistent DMA buffer. + * cpu_addr is what was returned from pci_alloc_consistent, + * size must be the same as what as passed into pci_alloc_consistent, + * and likewise dma_addr must be the same as what *dma_addrp was set to. + * + * References to the memory and mappings associated with cpu_addr/dma_addr + * past this call are illegal. + */ +extern void pci_free_consistent(struct pci_dev *hwdev, size_t size, + void *vaddr, dma_addr_t dma_handle); + +/* Map a single buffer of the indicated size for DMA in streaming mode. + * The 32-bit bus address to use is returned. + * + * Once the device is given the dma address, the device owns this memory + * until either pci_unmap_single or pci_dma_sync_single is performed. + */ +static inline dma_addr_t pci_map_single(struct pci_dev *hwdev, void *ptr, + size_t size, int direction) +{ + if (direction == PCI_DMA_NONE) + BUG(); + flush_write_buffers(); + return virt_to_bus(ptr); +} + +/* Unmap a single streaming mode DMA translation. The dma_addr and size + * must match what was provided for in a previous pci_map_single call. All + * other usages are undefined. + * + * After this call, reads by the cpu to the buffer are guarenteed to see + * whatever the device wrote there. + */ +static inline void pci_unmap_single(struct pci_dev *hwdev, dma_addr_t dma_addr, + size_t size, int direction) +{ + if (direction == PCI_DMA_NONE) + BUG(); + /* Nothing to do */ +} + +/* + * pci_{map,unmap}_single_page maps a kernel page to a dma_addr_t. identical + * to pci_map_single, but takes a struct page instead of a virtual address + */ +static inline dma_addr_t pci_map_page(struct pci_dev *hwdev, struct pfn_info *page, + unsigned long offset, size_t size, int direction) +{ + if (direction == PCI_DMA_NONE) + BUG(); + + return (page - frame_table) * PAGE_SIZE + offset; +} + +static inline void pci_unmap_page(struct pci_dev *hwdev, dma_addr_t dma_address, + size_t size, int direction) +{ + if (direction == PCI_DMA_NONE) + BUG(); + /* Nothing to do */ +} + +/* Map a set of buffers described by scatterlist in streaming + * mode for DMA. This is the scather-gather version of the + * above pci_map_single interface. Here the scatter gather list + * elements are each tagged with the appropriate dma address + * and length. They are obtained via sg_dma_{address,length}(SG). + * + * NOTE: An implementation may be able to use a smaller number of + * DMA address/length pairs than there are SG table elements. + * (for example via virtual mapping capabilities) + * The routine returns the number of addr/length pairs actually + * used, at most nents. + * + * Device ownership issues as mentioned above for pci_map_single are + * the same here. + */ +static inline int pci_map_sg(struct pci_dev *hwdev, struct scatterlist *sg, + int nents, int direction) +{ + int i; + + if (direction == PCI_DMA_NONE) + BUG(); + + /* + * temporary 2.4 hack + */ + for (i = 0; i < nents; i++ ) { + if (sg[i].address && sg[i].page) + BUG(); + else if (!sg[i].address && !sg[i].page) + BUG(); + + if (sg[i].address) + sg[i].dma_address = virt_to_bus(sg[i].address); + else + sg[i].dma_address = page_to_bus(sg[i].page) + sg[i].offset; + } + + flush_write_buffers(); + return nents; +} + +/* Unmap a set of streaming mode DMA translations. + * Again, cpu read rules concerning calls here are the same as for + * pci_unmap_single() above. + */ +static inline void pci_unmap_sg(struct pci_dev *hwdev, struct scatterlist *sg, + int nents, int direction) +{ + if (direction == PCI_DMA_NONE) + BUG(); + /* Nothing to do */ +} + +/* Make physical memory consistent for a single + * streaming mode DMA translation after a transfer. + * + * If you perform a pci_map_single() but wish to interrogate the + * buffer using the cpu, yet do not wish to teardown the PCI dma + * mapping, you must call this function before doing so. At the + * next point you give the PCI dma address back to the card, the + * device again owns the buffer. + */ +static inline void pci_dma_sync_single(struct pci_dev *hwdev, + dma_addr_t dma_handle, + size_t size, int direction) +{ + if (direction == PCI_DMA_NONE) + BUG(); + flush_write_buffers(); +} + +/* Make physical memory consistent for a set of streaming + * mode DMA translations after a transfer. + * + * The same as pci_dma_sync_single but for a scatter-gather list, + * same rules and usage. + */ +static inline void pci_dma_sync_sg(struct pci_dev *hwdev, + struct scatterlist *sg, + int nelems, int direction) +{ + if (direction == PCI_DMA_NONE) + BUG(); + flush_write_buffers(); +} + +/* Return whether the given PCI device DMA address mask can + * be supported properly. For example, if your device can + * only drive the low 24-bits during PCI bus mastering, then + * you would pass 0x00ffffff as the mask to this function. + */ +static inline int pci_dma_supported(struct pci_dev *hwdev, u64 mask) +{ + /* + * we fall back to GFP_DMA when the mask isn't all 1s, + * so we can't guarantee allocations that must be + * within a tighter range than GFP_DMA.. + */ + if(mask < 0x00ffffff) + return 0; + + return 1; +} + +/* This is always fine. */ +#define pci_dac_dma_supported(pci_dev, mask) (1) + +static __inline__ dma64_addr_t +pci_dac_page_to_dma(struct pci_dev *pdev, struct pfn_info *page, unsigned long offset, int direction) +{ + return ((dma64_addr_t) page_to_bus(page) + + (dma64_addr_t) offset); +} + +static __inline__ struct pfn_info * +pci_dac_dma_to_page(struct pci_dev *pdev, dma64_addr_t dma_addr) +{ + unsigned long poff = (dma_addr >> PAGE_SHIFT); + + return frame_table + poff; +} + +static __inline__ unsigned long +pci_dac_dma_to_offset(struct pci_dev *pdev, dma64_addr_t dma_addr) +{ + return (dma_addr & ~PAGE_MASK); +} + +static __inline__ void +pci_dac_dma_sync_single(struct pci_dev *pdev, dma64_addr_t dma_addr, size_t len, int direction) +{ + flush_write_buffers(); +} + +/* These macros should be used after a pci_map_sg call has been done + * to get bus addresses of each of the SG entries and their lengths. + * You should only work with the number of sg entries pci_map_sg + * returns. + */ +#define sg_dma_address(sg) ((sg)->dma_address) +#define sg_dma_len(sg) ((sg)->length) + +/* Return the index of the PCI controller for device. */ +static inline int pci_controller_num(struct pci_dev *dev) +{ + return 0; +} + +#endif /* __KERNEL__ */ + +#endif /* __i386_PCI_H */ diff --git a/xen-2.4.16/include/asm-i386/pgalloc.h b/xen-2.4.16/include/asm-i386/pgalloc.h new file mode 100644 index 0000000000..fcba5e1585 --- /dev/null +++ b/xen-2.4.16/include/asm-i386/pgalloc.h @@ -0,0 +1,117 @@ +#ifndef _I386_PGALLOC_H +#define _I386_PGALLOC_H + +#include +#include +#include +#include + +#define pgd_quicklist (current_cpu_data.pgd_quick) +#define pmd_quicklist (current_cpu_data.pmd_quick) +#define pte_quicklist (current_cpu_data.pte_quick) +#define pgtable_cache_size (current_cpu_data.pgtable_cache_sz) + + +/* + * Allocate and free page tables. + */ + + +#define pte_free(pte) pte_free_fast(pte) +#define pgd_alloc(mm) get_pgd_fast() +#define pgd_free(pgd) free_pgd_fast(pgd) + +/* + * allocating and freeing a pmd is trivial: the 1-entry pmd is + * inside the pgd, so has no extra memory associated with it. + * (In the PAE case we free the pmds as part of the pgd.) + */ + +#define pmd_alloc_one_fast(mm, addr) ({ BUG(); ((pmd_t *)1); }) +#define pmd_alloc_one(mm, addr) ({ BUG(); ((pmd_t *)2); }) +#define pmd_free_slow(x) do { } while (0) +#define pmd_free_fast(x) do { } while (0) +#define pmd_free(x) do { } while (0) +#define pgd_populate(mm, pmd, pte) BUG() + +/* + * TLB flushing: + * + * - flush_tlb() flushes the current mm struct TLBs + * - flush_tlb_all() flushes all processes TLBs + * - flush_tlb_mm(mm) flushes the specified mm context TLB's + * - flush_tlb_page(vma, vmaddr) flushes one page + * - flush_tlb_range(mm, start, end) flushes a range of pages + * - flush_tlb_pgtables(mm, start, end) flushes a range of page tables + * + * ..but the i386 has somewhat limited tlb flushing capabilities, + * and page-granular flushes are available only on i486 and up. + */ + +#ifndef CONFIG_SMP + +#define flush_tlb() __flush_tlb() +#define flush_tlb_all() __flush_tlb_all() +#define local_flush_tlb() __flush_tlb() + +static inline void flush_tlb_mm(struct mm_struct *mm) +{ + if (mm == current->active_mm) + __flush_tlb(); +} + +#if 0 +static inline void flush_tlb_page(struct vm_area_struct *vma, + unsigned long addr) +{ + if (vma->vm_mm == current->active_mm) + __flush_tlb_one(addr); +} +#endif + +static inline void flush_tlb_range(struct mm_struct *mm, + unsigned long start, unsigned long end) +{ + if (mm == current->active_mm) + __flush_tlb(); +} + +#else + +#include + +#define local_flush_tlb() \ + __flush_tlb() + +extern void flush_tlb_all(void); +extern void flush_tlb_current_task(void); +extern void flush_tlb_mm(struct mm_struct *); +/*extern void flush_tlb_page(struct vm_area_struct *, unsigned long);*/ + +#define flush_tlb() flush_tlb_current_task() + +static inline void flush_tlb_range(struct mm_struct * mm, unsigned long start, unsigned long end) +{ + flush_tlb_mm(mm); +} + +#define TLBSTATE_OK 1 +#define TLBSTATE_LAZY 2 + +struct tlb_state +{ + struct mm_struct *active_mm; + int state; +}; +extern struct tlb_state cpu_tlbstate[NR_CPUS]; + + +#endif + +static inline void flush_tlb_pgtables(struct mm_struct *mm, + unsigned long start, unsigned long end) +{ + /* i386 does not keep any page table caches in TLB */ +} + +#endif /* _I386_PGALLOC_H */ diff --git a/xen-2.4.16/include/asm-i386/processor.h b/xen-2.4.16/include/asm-i386/processor.h new file mode 100644 index 0000000000..2e98c53134 --- /dev/null +++ b/xen-2.4.16/include/asm-i386/processor.h @@ -0,0 +1,480 @@ +/* + * include/asm-i386/processor.h + * + * Copyright (C) 1994 Linus Torvalds + */ + +#ifndef __ASM_I386_PROCESSOR_H +#define __ASM_I386_PROCESSOR_H + +#include +#include +#include +#include +#include + + +/* + * Default implementation of macro that returns current + * instruction pointer ("program counter"). + */ +#define current_text_addr() ({ void *pc; __asm__("movl $1f,%0\n1:":"=g" (pc)); pc; }) + +/* + * CPU type and hardware bug flags. Kept separately for each CPU. + * Members of this structure are referenced in head.S, so think twice + * before touching them. [mj] + */ + +struct cpuinfo_x86 { + __u8 x86; /* CPU family */ + __u8 x86_vendor; /* CPU vendor */ + __u8 x86_model; + __u8 x86_mask; + int cpuid_level; /* Maximum supported CPUID level, -1=no CPUID */ + __u32 x86_capability[NCAPINTS]; + char x86_vendor_id[16]; + unsigned long *pgd_quick; + unsigned long *pmd_quick; + unsigned long *pte_quick; + unsigned long pgtable_cache_sz; +} __attribute__((__aligned__(SMP_CACHE_BYTES))); + +#define X86_VENDOR_INTEL 0 +#define X86_VENDOR_CYRIX 1 +#define X86_VENDOR_AMD 2 +#define X86_VENDOR_UMC 3 +#define X86_VENDOR_NEXGEN 4 +#define X86_VENDOR_CENTAUR 5 +#define X86_VENDOR_RISE 6 +#define X86_VENDOR_TRANSMETA 7 +#define X86_VENDOR_UNKNOWN 0xff + +/* + * capabilities of CPUs + */ + +extern struct cpuinfo_x86 boot_cpu_data; +extern struct tss_struct init_tss[NR_CPUS]; + +#ifdef CONFIG_SMP +extern struct cpuinfo_x86 cpu_data[]; +#define current_cpu_data cpu_data[smp_processor_id()] +#else +#define cpu_data (&boot_cpu_data) +#define current_cpu_data boot_cpu_data +#endif + +#define cpu_has_pge (test_bit(X86_FEATURE_PGE, boot_cpu_data.x86_capability)) +#define cpu_has_pse (test_bit(X86_FEATURE_PSE, boot_cpu_data.x86_capability)) +#define cpu_has_pae (test_bit(X86_FEATURE_PAE, boot_cpu_data.x86_capability)) +#define cpu_has_tsc (test_bit(X86_FEATURE_TSC, boot_cpu_data.x86_capability)) +#define cpu_has_de (test_bit(X86_FEATURE_DE, boot_cpu_data.x86_capability)) +#define cpu_has_vme (test_bit(X86_FEATURE_VME, boot_cpu_data.x86_capability)) +#define cpu_has_fxsr (test_bit(X86_FEATURE_FXSR, boot_cpu_data.x86_capability)) +#define cpu_has_xmm (test_bit(X86_FEATURE_XMM, boot_cpu_data.x86_capability)) +#define cpu_has_fpu (test_bit(X86_FEATURE_FPU, boot_cpu_data.x86_capability)) +#define cpu_has_apic (test_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability)) + +extern void identify_cpu(struct cpuinfo_x86 *); +extern void print_cpu_info(struct cpuinfo_x86 *); +extern void dodgy_tsc(void); + +/* + * EFLAGS bits + */ +#define X86_EFLAGS_CF 0x00000001 /* Carry Flag */ +#define X86_EFLAGS_PF 0x00000004 /* Parity Flag */ +#define X86_EFLAGS_AF 0x00000010 /* Auxillary carry Flag */ +#define X86_EFLAGS_ZF 0x00000040 /* Zero Flag */ +#define X86_EFLAGS_SF 0x00000080 /* Sign Flag */ +#define X86_EFLAGS_TF 0x00000100 /* Trap Flag */ +#define X86_EFLAGS_IF 0x00000200 /* Interrupt Flag */ +#define X86_EFLAGS_DF 0x00000400 /* Direction Flag */ +#define X86_EFLAGS_OF 0x00000800 /* Overflow Flag */ +#define X86_EFLAGS_IOPL 0x00003000 /* IOPL mask */ +#define X86_EFLAGS_NT 0x00004000 /* Nested Task */ +#define X86_EFLAGS_RF 0x00010000 /* Resume Flag */ +#define X86_EFLAGS_VM 0x00020000 /* Virtual Mode */ +#define X86_EFLAGS_AC 0x00040000 /* Alignment Check */ +#define X86_EFLAGS_VIF 0x00080000 /* Virtual Interrupt Flag */ +#define X86_EFLAGS_VIP 0x00100000 /* Virtual Interrupt Pending */ +#define X86_EFLAGS_ID 0x00200000 /* CPUID detection flag */ + +/* + * Generic CPUID function + */ +static inline void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx) +{ + __asm__("cpuid" + : "=a" (*eax), + "=b" (*ebx), + "=c" (*ecx), + "=d" (*edx) + : "0" (op)); +} + +/* + * CPUID functions returning a single datum + */ +static inline unsigned int cpuid_eax(unsigned int op) +{ + unsigned int eax; + + __asm__("cpuid" + : "=a" (eax) + : "0" (op) + : "bx", "cx", "dx"); + return eax; +} +static inline unsigned int cpuid_ebx(unsigned int op) +{ + unsigned int eax, ebx; + + __asm__("cpuid" + : "=a" (eax), "=b" (ebx) + : "0" (op) + : "cx", "dx" ); + return ebx; +} +static inline unsigned int cpuid_ecx(unsigned int op) +{ + unsigned int eax, ecx; + + __asm__("cpuid" + : "=a" (eax), "=c" (ecx) + : "0" (op) + : "bx", "dx" ); + return ecx; +} +static inline unsigned int cpuid_edx(unsigned int op) +{ + unsigned int eax, edx; + + __asm__("cpuid" + : "=a" (eax), "=d" (edx) + : "0" (op) + : "bx", "cx"); + return edx; +} + +/* + * Intel CPU features in CR4 + */ +#define X86_CR4_VME 0x0001 /* enable vm86 extensions */ +#define X86_CR4_PVI 0x0002 /* virtual interrupts flag enable */ +#define X86_CR4_TSD 0x0004 /* disable time stamp at ipl 3 */ +#define X86_CR4_DE 0x0008 /* enable debugging extensions */ +#define X86_CR4_PSE 0x0010 /* enable page size extensions */ +#define X86_CR4_PAE 0x0020 /* enable physical address extensions */ +#define X86_CR4_MCE 0x0040 /* Machine check enable */ +#define X86_CR4_PGE 0x0080 /* enable global pages */ +#define X86_CR4_PCE 0x0100 /* enable performance counters at ipl 3 */ +#define X86_CR4_OSFXSR 0x0200 /* enable fast FPU save and restore */ +#define X86_CR4_OSXMMEXCPT 0x0400 /* enable unmasked SSE exceptions */ + +/* + * Save the cr4 feature set we're using (ie + * Pentium 4MB enable and PPro Global page + * enable), so that any CPU's that boot up + * after us can get the correct flags. + */ +extern unsigned long mmu_cr4_features; + +static inline void set_in_cr4 (unsigned long mask) +{ + mmu_cr4_features |= mask; + __asm__("movl %%cr4,%%eax\n\t" + "orl %0,%%eax\n\t" + "movl %%eax,%%cr4\n" + : : "irg" (mask) + :"ax"); +} + +static inline void clear_in_cr4 (unsigned long mask) +{ + mmu_cr4_features &= ~mask; + __asm__("movl %%cr4,%%eax\n\t" + "andl %0,%%eax\n\t" + "movl %%eax,%%cr4\n" + : : "irg" (~mask) + :"ax"); +} + +/* + * Cyrix CPU configuration register indexes + */ +#define CX86_CCR0 0xc0 +#define CX86_CCR1 0xc1 +#define CX86_CCR2 0xc2 +#define CX86_CCR3 0xc3 +#define CX86_CCR4 0xe8 +#define CX86_CCR5 0xe9 +#define CX86_CCR6 0xea +#define CX86_CCR7 0xeb +#define CX86_DIR0 0xfe +#define CX86_DIR1 0xff +#define CX86_ARR_BASE 0xc4 +#define CX86_RCR_BASE 0xdc + +/* + * Cyrix CPU indexed register access macros + */ + +#define getCx86(reg) ({ outb((reg), 0x22); inb(0x23); }) + +#define setCx86(reg, data) do { \ + outb((reg), 0x22); \ + outb((data), 0x23); \ +} while (0) + +#define EISA_bus (0) +#define MCA_bus (0) + +/* from system description table in BIOS. Mostly for MCA use, but +others may find it useful. */ +extern unsigned int machine_id; +extern unsigned int machine_submodel_id; +extern unsigned int BIOS_revision; +extern unsigned int mca_pentium_flag; + +/* + * User space process size: 3GB (default). + */ +#define TASK_SIZE (PAGE_OFFSET) + +/* This decides where the kernel will search for a free chunk of vm + * space during mmap's. + */ +#define TASK_UNMAPPED_BASE (TASK_SIZE / 3) + +/* + * Size of io_bitmap in longwords: 32 is ports 0-0x3ff. + */ +#define IO_BITMAP_SIZE 32 +#define IO_BITMAP_OFFSET offsetof(struct tss_struct,io_bitmap) +#define INVALID_IO_BITMAP_OFFSET 0x8000 + +struct i387_fsave_struct { + long cwd; + long swd; + long twd; + long fip; + long fcs; + long foo; + long fos; + long st_space[20]; /* 8*10 bytes for each FP-reg = 80 bytes */ + long status; /* software status information */ +}; + +struct i387_fxsave_struct { + unsigned short cwd; + unsigned short swd; + unsigned short twd; + unsigned short fop; + long fip; + long fcs; + long foo; + long fos; + long mxcsr; + long reserved; + long st_space[32]; /* 8*16 bytes for each FP-reg = 128 bytes */ + long xmm_space[32]; /* 8*16 bytes for each XMM-reg = 128 bytes */ + long padding[56]; +} __attribute__ ((aligned (16))); + +struct i387_soft_struct { + long cwd; + long swd; + long twd; + long fip; + long fcs; + long foo; + long fos; + long st_space[20]; /* 8*10 bytes for each FP-reg = 80 bytes */ + unsigned char ftop, changed, lookahead, no_update, rm, alimit; + struct info *info; + unsigned long entry_eip; +}; + +union i387_union { + struct i387_fsave_struct fsave; + struct i387_fxsave_struct fxsave; + struct i387_soft_struct soft; +}; + +typedef struct { + unsigned long seg; +} mm_segment_t; + +struct tss_struct { + unsigned short back_link,__blh; + unsigned long esp0; + unsigned short ss0,__ss0h; + unsigned long esp1; + unsigned short ss1,__ss1h; + unsigned long esp2; + unsigned short ss2,__ss2h; + unsigned long __cr3; + unsigned long eip; + unsigned long eflags; + unsigned long eax,ecx,edx,ebx; + unsigned long esp; + unsigned long ebp; + unsigned long esi; + unsigned long edi; + unsigned short es, __esh; + unsigned short cs, __csh; + unsigned short ss, __ssh; + unsigned short ds, __dsh; + unsigned short fs, __fsh; + unsigned short gs, __gsh; + unsigned short ldt, __ldth; + unsigned short trace, bitmap; + unsigned long io_bitmap[IO_BITMAP_SIZE+1]; + /* + * pads the TSS to be cacheline-aligned (size is 0x100) + */ + unsigned long __cacheline_filler[5]; +}; + +struct thread_struct { + unsigned long esp0; /* top of the stack */ + unsigned long eip; /* in kernel space, saved on task switch */ + unsigned long esp; /* "" */ + unsigned long fs; /* "" (NB. DS/ES constant in mon, so no save) */ + unsigned long gs; /* "" ("") */ + unsigned long esp1, ss1; +/* Hardware debugging registers */ + unsigned long debugreg[8]; /* %%db0-7 debug registers */ +/* fault info */ + unsigned long cr2, trap_no, error_code; +/* floating point info */ + union i387_union i387; +/* Trap info. */ + trap_info_t traps[256]; +}; + +#define INIT_THREAD { \ + sizeof(idle0_stack) + (long) &idle0_stack, /* esp0 */ \ + 0, 0, 0, 0, 0, 0, \ + { [0 ... 7] = 0 }, /* debugging registers */ \ + 0, 0, 0, \ + { { 0, }, }, /* 387 state */ \ + { {0} } /* io permissions */ \ +} + +#define INIT_TSS { \ + 0,0, /* back_link, __blh */ \ + sizeof(idle0_stack) + (long) &idle0_stack, /* esp0 */ \ + __HYPERVISOR_DS, 0, /* ss0 */ \ + 0,0,0,0,0,0, /* stack1, stack2 */ \ + 0, /* cr3 */ \ + 0,0, /* eip,eflags */ \ + 0,0,0,0, /* eax,ecx,edx,ebx */ \ + 0,0,0,0, /* esp,ebp,esi,edi */ \ + 0,0,0,0,0,0, /* es,cs,ss */ \ + 0,0,0,0,0,0, /* ds,fs,gs */ \ + __LDT(0),0, /* ldt */ \ + 0, INVALID_IO_BITMAP_OFFSET, /* tace, bitmap */ \ + {~0, } /* ioperm */ \ +} + +#define start_thread(regs, new_eip, new_esp) do { \ + __asm__("movl %0,%%fs ; movl %0,%%gs": :"r" (0)); \ + set_fs(USER_DS); \ + regs->xds = __USER_DS; \ + regs->xes = __USER_DS; \ + regs->xss = __USER_DS; \ + regs->xcs = __USER_CS; \ + regs->eip = new_eip; \ + regs->esp = new_esp; \ +} while (0) + +/* Forward declaration, a strange C thing */ +struct task_struct; +struct mm_struct; + +/* Free all resources held by a thread. */ +extern void release_thread(struct task_struct *); +/* + * create a kernel thread without removing it from tasklists + */ +extern int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags); + +/* Copy and release all segment info associated with a VM */ +extern void copy_segments(struct task_struct *p, struct mm_struct * mm); +extern void release_segments(struct mm_struct * mm); + +/* + * Return saved PC of a blocked thread. + */ +static inline unsigned long thread_saved_pc(struct thread_struct *t) +{ + return ((unsigned long *)t->esp)[3]; +} + +unsigned long get_wchan(struct task_struct *p); +#define KSTK_EIP(tsk) (((unsigned long *)(4096+(unsigned long)(tsk)))[1019]) +#define KSTK_ESP(tsk) (((unsigned long *)(4096+(unsigned long)(tsk)))[1022]) + +#define THREAD_SIZE (2*PAGE_SIZE) +#define alloc_task_struct() ((struct task_struct *) __get_free_pages(GFP_KERNEL,1)) +#define free_task_struct(p) free_pages((unsigned long) (p), 1) +#define get_task_struct(tsk) atomic_inc(&virt_to_page(tsk)->count) + +#define idle0_task (idle0_task_union.task) +#define idle0_stack (idle0_task_union.stack) + +struct microcode { + unsigned int hdrver; + unsigned int rev; + unsigned int date; + unsigned int sig; + unsigned int cksum; + unsigned int ldrver; + unsigned int pf; + unsigned int reserved[5]; + unsigned int bits[500]; +}; + +/* '6' because it used to be for P6 only (but now covers Pentium 4 as well) */ +#define MICROCODE_IOCFREE _IO('6',0) + +/* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */ +static inline void rep_nop(void) +{ + __asm__ __volatile__("rep;nop"); +} + +#define cpu_relax() rep_nop() + +/* Prefetch instructions for Pentium III and AMD Athlon */ +#ifdef CONFIG_MPENTIUMIII + +#define ARCH_HAS_PREFETCH +extern inline void prefetch(const void *x) +{ + __asm__ __volatile__ ("prefetchnta (%0)" : : "r"(x)); +} + +#elif CONFIG_X86_USE_3DNOW + +#define ARCH_HAS_PREFETCH +#define ARCH_HAS_PREFETCHW +#define ARCH_HAS_SPINLOCK_PREFETCH + +extern inline void prefetch(const void *x) +{ + __asm__ __volatile__ ("prefetch (%0)" : : "r"(x)); +} + +extern inline void prefetchw(const void *x) +{ + __asm__ __volatile__ ("prefetchw (%0)" : : "r"(x)); +} +#define spin_lock_prefetch(x) prefetchw(x) + +#endif + +#endif /* __ASM_I386_PROCESSOR_H */ diff --git a/xen-2.4.16/include/asm-i386/ptrace.h b/xen-2.4.16/include/asm-i386/ptrace.h new file mode 100644 index 0000000000..509001cf57 --- /dev/null +++ b/xen-2.4.16/include/asm-i386/ptrace.h @@ -0,0 +1,86 @@ +#ifndef _I386_PTRACE_H +#define _I386_PTRACE_H + +#define EBX 0 +#define ECX 1 +#define EDX 2 +#define ESI 3 +#define EDI 4 +#define EBP 5 +#define EAX 6 +#define DS 7 +#define ES 8 +#define FS 9 +#define GS 10 +#define ORIG_EAX 11 +#define EIP 12 +#define CS 13 +#define EFL 14 +#define UESP 15 +#define SS 16 +#define FRAME_SIZE 17 + +/* this struct defines the way the registers are stored on the + stack during a system call. */ + +struct pt_regs { + long ebx; + long ecx; + long edx; + long esi; + long edi; + long ebp; + long eax; + int xds; + int xes; + long orig_eax; + long eip; + int xcs; + long eflags; + long esp; + int xss; +}; + +/* Arbitrarily choose the same ptrace numbers as used by the Sparc code. */ +#define PTRACE_GETREGS 12 +#define PTRACE_SETREGS 13 +#define PTRACE_GETFPREGS 14 +#define PTRACE_SETFPREGS 15 +#define PTRACE_GETFPXREGS 18 +#define PTRACE_SETFPXREGS 19 + +#define PTRACE_SETOPTIONS 21 + +/* options set using PTRACE_SETOPTIONS */ +#define PTRACE_O_TRACESYSGOOD 0x00000001 + +enum EFLAGS { + EF_CF = 0x00000001, + EF_PF = 0x00000004, + EF_AF = 0x00000010, + EF_ZF = 0x00000040, + EF_SF = 0x00000080, + EF_TF = 0x00000100, + EF_IE = 0x00000200, + EF_DF = 0x00000400, + EF_OF = 0x00000800, + EF_IOPL = 0x00003000, + EF_IOPL_RING0 = 0x00000000, + EF_IOPL_RING1 = 0x00001000, + EF_IOPL_RING2 = 0x00002000, + EF_NT = 0x00004000, /* nested task */ + EF_RF = 0x00010000, /* resume */ + EF_VM = 0x00020000, /* virtual mode */ + EF_AC = 0x00040000, /* alignment */ + EF_VIF = 0x00080000, /* virtual interrupt */ + EF_VIP = 0x00100000, /* virtual interrupt pending */ + EF_ID = 0x00200000, /* id */ +}; + +#ifdef __KERNEL__ +#define user_mode(regs) ((3 & (regs)->xcs)) +#define instruction_pointer(regs) ((regs)->eip) +extern void show_regs(struct pt_regs *); +#endif + +#endif diff --git a/xen-2.4.16/include/asm-i386/rwlock.h b/xen-2.4.16/include/asm-i386/rwlock.h new file mode 100644 index 0000000000..9475419f95 --- /dev/null +++ b/xen-2.4.16/include/asm-i386/rwlock.h @@ -0,0 +1,83 @@ +/* include/asm-i386/rwlock.h + * + * Helpers used by both rw spinlocks and rw semaphores. + * + * Based in part on code from semaphore.h and + * spinlock.h Copyright 1996 Linus Torvalds. + * + * Copyright 1999 Red Hat, Inc. + * + * Written by Benjamin LaHaise. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#ifndef _ASM_I386_RWLOCK_H +#define _ASM_I386_RWLOCK_H + +#define RW_LOCK_BIAS 0x01000000 +#define RW_LOCK_BIAS_STR "0x01000000" + +#define __build_read_lock_ptr(rw, helper) \ + asm volatile(LOCK "subl $1,(%0)\n\t" \ + "js 2f\n" \ + "1:\n" \ + ".section .text.lock,\"ax\"\n" \ + "2:\tcall " helper "\n\t" \ + "jmp 1b\n" \ + ".previous" \ + ::"a" (rw) : "memory") + +#define __build_read_lock_const(rw, helper) \ + asm volatile(LOCK "subl $1,%0\n\t" \ + "js 2f\n" \ + "1:\n" \ + ".section .text.lock,\"ax\"\n" \ + "2:\tpushl %%eax\n\t" \ + "leal %0,%%eax\n\t" \ + "call " helper "\n\t" \ + "popl %%eax\n\t" \ + "jmp 1b\n" \ + ".previous" \ + :"=m" (*(volatile int *)rw) : : "memory") + +#define __build_read_lock(rw, helper) do { \ + if (__builtin_constant_p(rw)) \ + __build_read_lock_const(rw, helper); \ + else \ + __build_read_lock_ptr(rw, helper); \ + } while (0) + +#define __build_write_lock_ptr(rw, helper) \ + asm volatile(LOCK "subl $" RW_LOCK_BIAS_STR ",(%0)\n\t" \ + "jnz 2f\n" \ + "1:\n" \ + ".section .text.lock,\"ax\"\n" \ + "2:\tcall " helper "\n\t" \ + "jmp 1b\n" \ + ".previous" \ + ::"a" (rw) : "memory") + +#define __build_write_lock_const(rw, helper) \ + asm volatile(LOCK "subl $" RW_LOCK_BIAS_STR ",(%0)\n\t" \ + "jnz 2f\n" \ + "1:\n" \ + ".section .text.lock,\"ax\"\n" \ + "2:\tpushl %%eax\n\t" \ + "leal %0,%%eax\n\t" \ + "call " helper "\n\t" \ + "popl %%eax\n\t" \ + "jmp 1b\n" \ + ".previous" \ + :"=m" (*(volatile int *)rw) : : "memory") + +#define __build_write_lock(rw, helper) do { \ + if (__builtin_constant_p(rw)) \ + __build_write_lock_const(rw, helper); \ + else \ + __build_write_lock_ptr(rw, helper); \ + } while (0) + +#endif diff --git a/xen-2.4.16/include/asm-i386/scatterlist.h b/xen-2.4.16/include/asm-i386/scatterlist.h new file mode 100644 index 0000000000..9d858415db --- /dev/null +++ b/xen-2.4.16/include/asm-i386/scatterlist.h @@ -0,0 +1,16 @@ +#ifndef _I386_SCATTERLIST_H +#define _I386_SCATTERLIST_H + +struct scatterlist { + char * address; /* Location data is to be transferred to, NULL for + * highmem page */ + struct pfn_info * page; /* Location for highmem page, if any */ + unsigned int offset;/* for highmem, page offset */ + + dma_addr_t dma_address; + unsigned int length; +}; + +#define ISA_DMA_THRESHOLD (0x00ffffff) + +#endif /* !(_I386_SCATTERLIST_H) */ diff --git a/xen-2.4.16/include/asm-i386/smp.h b/xen-2.4.16/include/asm-i386/smp.h new file mode 100644 index 0000000000..560f5ead19 --- /dev/null +++ b/xen-2.4.16/include/asm-i386/smp.h @@ -0,0 +1,94 @@ +#ifndef __ASM_SMP_H +#define __ASM_SMP_H + +#ifndef __ASSEMBLY__ +#include +#include +#include +#include +#include +#include +#include +#endif + +#ifdef CONFIG_SMP +#define TARGET_CPUS cpu_online_map +#define INT_DELIVERY_MODE 1 /* logical delivery broadcast to all procs */ +#else +#define INT_DELIVERY_MODE 1 /* logical delivery */ +#define TARGET_CPUS 0x01 +#endif + +#ifdef CONFIG_SMP +#ifndef __ASSEMBLY__ + +/* + * Private routines/data + */ + +extern void smp_alloc_memory(void); +extern unsigned long phys_cpu_present_map; +extern unsigned long cpu_online_map; +extern volatile unsigned long smp_invalidate_needed; +extern int pic_mode; +extern void smp_flush_tlb(void); +extern void smp_message_irq(int cpl, void *dev_id, struct pt_regs *regs); +extern void smp_invalidate_rcv(void); /* Process an NMI */ +extern void (*mtrr_hook) (void); +extern void zap_low_mappings (void); + +/* + * On x86 all CPUs are mapped 1:1 to the APIC space. + * This simplifies scheduling and IPI sending and + * compresses data structures. + */ +static inline int cpu_logical_map(int cpu) +{ + return cpu; +} +static inline int cpu_number_map(int cpu) +{ + return cpu; +} + +/* + * Some lowlevel functions might want to know about + * the real APIC ID <-> CPU # mapping. + */ +#define MAX_APICID 256 +extern volatile int cpu_to_physical_apicid[NR_CPUS]; +extern volatile int physical_apicid_to_cpu[MAX_APICID]; +extern volatile int cpu_to_logical_apicid[NR_CPUS]; +extern volatile int logical_apicid_to_cpu[MAX_APICID]; + +/* + * General functions that each host system must provide. + */ + +extern void smp_boot_cpus(void); +extern void smp_store_cpu_info(int id); /* Store per CPU info (like the initial udelay numbers */ + +/* + * This function is needed by all SMP systems. It must _always_ be valid + * from the initial startup. We map APIC_BASE very early in page_setup(), + * so this is correct in the x86 case. + */ + +#define smp_processor_id() (current->processor) + +static __inline int hard_smp_processor_id(void) +{ + /* we don't want to mark this access volatile - bad code generation */ + return GET_APIC_ID(*(unsigned long *)(APIC_BASE+APIC_ID)); +} + +static __inline int logical_smp_processor_id(void) +{ + /* we don't want to mark this access volatile - bad code generation */ + return GET_APIC_LOGICAL_ID(*(unsigned long *)(APIC_BASE+APIC_LDR)); +} + +#endif /* !__ASSEMBLY__ */ + +#endif +#endif diff --git a/xen-2.4.16/include/asm-i386/smpboot.h b/xen-2.4.16/include/asm-i386/smpboot.h new file mode 100644 index 0000000000..67bbedbd08 --- /dev/null +++ b/xen-2.4.16/include/asm-i386/smpboot.h @@ -0,0 +1,28 @@ +#ifndef __ASM_SMPBOOT_H +#define __ASM_SMPBOOT_H + +#define TRAMPOLINE_LOW phys_to_virt(0x467) +#define TRAMPOLINE_HIGH phys_to_virt(0x469) + +#define boot_cpu_apicid boot_cpu_physical_apicid + +/* How to map from the cpu_present_map. */ +#define cpu_present_to_apicid(apicid) (apicid) + +/* + * Mappings between logical cpu number and logical / physical apicid + * The first four macros are trivial, but it keeps the abstraction consistent + */ +extern volatile int logical_apicid_2_cpu[]; +extern volatile int cpu_2_logical_apicid[]; +extern volatile int physical_apicid_2_cpu[]; +extern volatile int cpu_2_physical_apicid[]; + +#define logical_apicid_to_cpu(apicid) logical_apicid_2_cpu[apicid] +#define cpu_to_logical_apicid(cpu) cpu_2_logical_apicid[cpu] +#define physical_apicid_to_cpu(apicid) physical_apicid_2_cpu[apicid] +#define cpu_to_physical_apicid(cpu) cpu_2_physical_apicid[cpu] +#define boot_apicid_to_cpu(apicid) physical_apicid_2_cpu[apicid] +#define cpu_to_boot_apicid(cpu) cpu_2_physical_apicid[cpu] + +#endif diff --git a/xen-2.4.16/include/asm-i386/softirq.h b/xen-2.4.16/include/asm-i386/softirq.h new file mode 100644 index 0000000000..254224411b --- /dev/null +++ b/xen-2.4.16/include/asm-i386/softirq.h @@ -0,0 +1,48 @@ +#ifndef __ASM_SOFTIRQ_H +#define __ASM_SOFTIRQ_H + +#include +#include + +#define __cpu_bh_enable(cpu) \ + do { barrier(); local_bh_count(cpu)--; } while (0) +#define cpu_bh_disable(cpu) \ + do { local_bh_count(cpu)++; barrier(); } while (0) + +#define local_bh_disable() cpu_bh_disable(smp_processor_id()) +#define __local_bh_enable() __cpu_bh_enable(smp_processor_id()) + +#define in_softirq() (local_bh_count(smp_processor_id()) != 0) + +/* + * NOTE: this assembly code assumes: + * + * (char *)&local_bh_count - 8 == (char *)&softirq_pending + * + * If you change the offsets in irq_stat then you have to + * update this code as well. + */ +#define local_bh_enable() \ +do { \ + unsigned int *ptr = &local_bh_count(smp_processor_id()); \ + \ + barrier(); \ + if (!--*ptr) \ + __asm__ __volatile__ ( \ + "cmpl $0, -8(%0);" \ + "jnz 2f;" \ + "1:;" \ + \ + ".section .text.lock,\"ax\";" \ + "2: pushl %%eax; pushl %%ecx; pushl %%edx;" \ + "call %c1;" \ + "popl %%edx; popl %%ecx; popl %%eax;" \ + "jmp 1b;" \ + ".previous;" \ + \ + : /* no output */ \ + : "r" (ptr), "i" (do_softirq) \ + /* no registers clobbered */ ); \ +} while (0) + +#endif /* __ASM_SOFTIRQ_H */ diff --git a/xen-2.4.16/include/asm-i386/spinlock.h b/xen-2.4.16/include/asm-i386/spinlock.h new file mode 100644 index 0000000000..59dc7b209f --- /dev/null +++ b/xen-2.4.16/include/asm-i386/spinlock.h @@ -0,0 +1,206 @@ +#ifndef __ASM_SPINLOCK_H +#define __ASM_SPINLOCK_H + +#include +#include +#include +#include +#include + +#if 0 +#define SPINLOCK_DEBUG 1 +#else +#define SPINLOCK_DEBUG 0 +#endif + +/* + * Your basic SMP spinlocks, allowing only a single CPU anywhere + */ + +typedef struct { + volatile unsigned int lock; +#if SPINLOCK_DEBUG + unsigned magic; +#endif +} spinlock_t; + +#define SPINLOCK_MAGIC 0xdead4ead + +#if SPINLOCK_DEBUG +#define SPINLOCK_MAGIC_INIT , SPINLOCK_MAGIC +#else +#define SPINLOCK_MAGIC_INIT /* */ +#endif + +#define SPIN_LOCK_UNLOCKED (spinlock_t) { 1 SPINLOCK_MAGIC_INIT } + +#define spin_lock_init(x) do { *(x) = SPIN_LOCK_UNLOCKED; } while(0) + +/* + * Simple spin lock operations. There are two variants, one clears IRQ's + * on the local processor, one does not. + * + * We make no fairness assumptions. They have a cost. + */ + +#define spin_is_locked(x) (*(volatile char *)(&(x)->lock) <= 0) +#define spin_unlock_wait(x) do { barrier(); } while(spin_is_locked(x)) + +#define spin_lock_string \ + "\n1:\t" \ + "lock ; decb %0\n\t" \ + "js 2f\n" \ + ".section .text.lock,\"ax\"\n" \ + "2:\t" \ + "cmpb $0,%0\n\t" \ + "rep;nop\n\t" \ + "jle 2b\n\t" \ + "jmp 1b\n" \ + ".previous" + +/* + * This works. Despite all the confusion. + * (except on PPro SMP or if we are using OOSTORE) + * (PPro errata 66, 92) + */ + +#if !defined(CONFIG_X86_OOSTORE) && !defined(CONFIG_X86_PPRO_FENCE) + +#define spin_unlock_string \ + "movb $1,%0" \ + :"=m" (lock->lock) : : "memory" + + +static inline void spin_unlock(spinlock_t *lock) +{ +#if SPINLOCK_DEBUG + if (lock->magic != SPINLOCK_MAGIC) + BUG(); + if (!spin_is_locked(lock)) + BUG(); +#endif + __asm__ __volatile__( + spin_unlock_string + ); +} + +#else + +#define spin_unlock_string \ + "xchgb %b0, %1" \ + :"=q" (oldval), "=m" (lock->lock) \ + :"0" (oldval) : "memory" + +static inline void spin_unlock(spinlock_t *lock) +{ + char oldval = 1; +#if SPINLOCK_DEBUG + if (lock->magic != SPINLOCK_MAGIC) + BUG(); + if (!spin_is_locked(lock)) + BUG(); +#endif + __asm__ __volatile__( + spin_unlock_string + ); +} + +#endif + +static inline int spin_trylock(spinlock_t *lock) +{ + char oldval; + __asm__ __volatile__( + "xchgb %b0,%1" + :"=q" (oldval), "=m" (lock->lock) + :"0" (0) : "memory"); + return oldval > 0; +} + +static inline void spin_lock(spinlock_t *lock) +{ +#if SPINLOCK_DEBUG + __label__ here; +here: + if (lock->magic != SPINLOCK_MAGIC) { +printk("eip: %p\n", &&here); + BUG(); + } +#endif + __asm__ __volatile__( + spin_lock_string + :"=m" (lock->lock) : : "memory"); +} + + +/* + * Read-write spinlocks, allowing multiple readers + * but only one writer. + * + * NOTE! it is quite common to have readers in interrupts + * but no interrupt writers. For those circumstances we + * can "mix" irq-safe locks - any writer needs to get a + * irq-safe write-lock, but readers can get non-irqsafe + * read-locks. + */ +typedef struct { + volatile unsigned int lock; +#if SPINLOCK_DEBUG + unsigned magic; +#endif +} rwlock_t; + +#define RWLOCK_MAGIC 0xdeaf1eed + +#if SPINLOCK_DEBUG +#define RWLOCK_MAGIC_INIT , RWLOCK_MAGIC +#else +#define RWLOCK_MAGIC_INIT /* */ +#endif + +#define RW_LOCK_UNLOCKED (rwlock_t) { RW_LOCK_BIAS RWLOCK_MAGIC_INIT } + +#define rwlock_init(x) do { *(x) = RW_LOCK_UNLOCKED; } while(0) + +/* + * On x86, we implement read-write locks as a 32-bit counter + * with the high bit (sign) being the "contended" bit. + * + * The inline assembly is non-obvious. Think about it. + * + * Changed to use the same technique as rw semaphores. See + * semaphore.h for details. -ben + */ +/* the spinlock helpers are in arch/i386/kernel/semaphore.c */ + +static inline void read_lock(rwlock_t *rw) +{ +#if SPINLOCK_DEBUG + if (rw->magic != RWLOCK_MAGIC) + BUG(); +#endif + __build_read_lock(rw, "__read_lock_failed"); +} + +static inline void write_lock(rwlock_t *rw) +{ +#if SPINLOCK_DEBUG + if (rw->magic != RWLOCK_MAGIC) + BUG(); +#endif + __build_write_lock(rw, "__write_lock_failed"); +} + +#define read_unlock(rw) asm volatile("lock ; incl %0" :"=m" ((rw)->lock) : : "memory") +#define write_unlock(rw) asm volatile("lock ; addl $" RW_LOCK_BIAS_STR ",%0":"=m" ((rw)->lock) : : "memory") + +static inline int write_trylock(rwlock_t *lock) +{ + atomic_t *count = (atomic_t *)lock; + if (atomic_sub_and_test(RW_LOCK_BIAS, count)) + return 1; + atomic_add(RW_LOCK_BIAS, count); + return 0; +} + +#endif /* __ASM_SPINLOCK_H */ diff --git a/xen-2.4.16/include/asm-i386/system.h b/xen-2.4.16/include/asm-i386/system.h new file mode 100644 index 0000000000..18d29014d8 --- /dev/null +++ b/xen-2.4.16/include/asm-i386/system.h @@ -0,0 +1,354 @@ +#ifndef __ASM_SYSTEM_H +#define __ASM_SYSTEM_H + +#include +#include + +struct task_struct; /* one of the stranger aspects of C forward declarations.. */ +extern void FASTCALL(__switch_to(struct task_struct *prev, struct task_struct *next)); + +#define prepare_to_switch() do { } while(0) +#define switch_to(prev,next) do { \ + asm volatile("pushl %%ebp\n\t" \ + "pushl %%ebx\n\t" \ + "pushl %%esi\n\t" \ + "pushl %%edi\n\t" \ + "movl %%esp,%0\n\t" /* save ESP */ \ + "cli\n\t" \ + "movl %2,%%esp\n\t" /* restore ESP */ \ + "movl %6,%%cr3\n\t" /* restore pagetables */\ + "sti\n\t" \ + "movl $1f,%1\n\t" /* save EIP */ \ + "pushl %3\n\t" /* restore EIP */ \ + "jmp __switch_to\n" \ + "1:\t" \ + "popl %%edi\n\t" \ + "popl %%esi\n\t" \ + "popl %%ebx\n\t" \ + "popl %%ebp\n\t" \ + :"=m" (prev->thread.esp),"=m" (prev->thread.eip) \ + :"m" (next->thread.esp),"m" (next->thread.eip), \ + "a" (prev), "d" (next), \ + "c" (__pa(pagetable_ptr(next->mm.pagetable))) \ + :"memory"); \ +} while (0) + +#define _set_base(addr,base) do { unsigned long __pr; \ +__asm__ __volatile__ ("movw %%dx,%1\n\t" \ + "rorl $16,%%edx\n\t" \ + "movb %%dl,%2\n\t" \ + "movb %%dh,%3" \ + :"=&d" (__pr) \ + :"m" (*((addr)+2)), \ + "m" (*((addr)+4)), \ + "m" (*((addr)+7)), \ + "0" (base) \ + ); } while(0) + +#define _set_limit(addr,limit) do { unsigned long __lr; \ +__asm__ __volatile__ ("movw %%dx,%1\n\t" \ + "rorl $16,%%edx\n\t" \ + "movb %2,%%dh\n\t" \ + "andb $0xf0,%%dh\n\t" \ + "orb %%dh,%%dl\n\t" \ + "movb %%dl,%2" \ + :"=&d" (__lr) \ + :"m" (*(addr)), \ + "m" (*((addr)+6)), \ + "0" (limit) \ + ); } while(0) + +#define set_base(ldt,base) _set_base( ((char *)&(ldt)) , (base) ) +#define set_limit(ldt,limit) _set_limit( ((char *)&(ldt)) , ((limit)-1)>>12 ) + +static inline unsigned long _get_base(char * addr) +{ + unsigned long __base; + __asm__("movb %3,%%dh\n\t" + "movb %2,%%dl\n\t" + "shll $16,%%edx\n\t" + "movw %1,%%dx" + :"=&d" (__base) + :"m" (*((addr)+2)), + "m" (*((addr)+4)), + "m" (*((addr)+7))); + return __base; +} + +#define get_base(ldt) _get_base( ((char *)&(ldt)) ) + +/* + * Load a segment. Fall back on loading the zero + * segment if something goes wrong.. + */ +#define loadsegment(seg,value) \ + asm volatile("\n" \ + "1:\t" \ + "movl %0,%%" #seg "\n" \ + "2:\n" \ + ".section .fixup,\"ax\"\n" \ + "3:\t" \ + "pushl $0\n\t" \ + "popl %%" #seg "\n\t" \ + "jmp 2b\n" \ + ".previous\n" \ + ".section __ex_table,\"a\"\n\t" \ + ".align 4\n\t" \ + ".long 1b,3b\n" \ + ".previous" \ + : :"m" (*(unsigned int *)&(value))) + +/* + * Clear and set 'TS' bit respectively + */ +#define clts() __asm__ __volatile__ ("clts") +#define read_cr0() ({ \ + unsigned int __dummy; \ + __asm__( \ + "movl %%cr0,%0\n\t" \ + :"=r" (__dummy)); \ + __dummy; \ +}) +#define write_cr0(x) \ + __asm__("movl %0,%%cr0": :"r" (x)); + +#define read_cr4() ({ \ + unsigned int __dummy; \ + __asm__( \ + "movl %%cr4,%0\n\t" \ + :"=r" (__dummy)); \ + __dummy; \ +}) +#define write_cr4(x) \ + __asm__("movl %0,%%cr4": :"r" (x)); +#define stts() write_cr0(8 | read_cr0()) + +#define wbinvd() \ + __asm__ __volatile__ ("wbinvd": : :"memory"); + +static inline unsigned long get_limit(unsigned long segment) +{ + unsigned long __limit; + __asm__("lsll %1,%0" + :"=r" (__limit):"r" (segment)); + return __limit+1; +} + +#define nop() __asm__ __volatile__ ("nop") + +#define xchg(ptr,v) ((__typeof__(*(ptr)))__xchg((unsigned long)(v),(ptr),sizeof(*(ptr)))) + +#define tas(ptr) (xchg((ptr),1)) + +struct __xchg_dummy { unsigned long a[100]; }; +#define __xg(x) ((struct __xchg_dummy *)(x)) + + +/* + * The semantics of XCHGCMP8B are a bit strange, this is why + * there is a loop and the loading of %%eax and %%edx has to + * be inside. This inlines well in most cases, the cached + * cost is around ~38 cycles. (in the future we might want + * to do an SIMD/3DNOW!/MMX/FPU 64-bit store here, but that + * might have an implicit FPU-save as a cost, so it's not + * clear which path to go.) + */ +static inline void __set_64bit (unsigned long long * ptr, + unsigned int low, unsigned int high) +{ + __asm__ __volatile__ ( + "\n1:\t" + "movl (%0), %%eax\n\t" + "movl 4(%0), %%edx\n\t" + "cmpxchg8b (%0)\n\t" + "jnz 1b" + : /* no outputs */ + : "D"(ptr), + "b"(low), + "c"(high) + : "ax","dx","memory"); +} + +static inline void __set_64bit_constant (unsigned long long *ptr, + unsigned long long value) +{ + __set_64bit(ptr,(unsigned int)(value), (unsigned int)((value)>>32ULL)); +} +#define ll_low(x) *(((unsigned int*)&(x))+0) +#define ll_high(x) *(((unsigned int*)&(x))+1) + +static inline void __set_64bit_var (unsigned long long *ptr, + unsigned long long value) +{ + __set_64bit(ptr,ll_low(value), ll_high(value)); +} + +#define set_64bit(ptr,value) \ +(__builtin_constant_p(value) ? \ + __set_64bit_constant(ptr, value) : \ + __set_64bit_var(ptr, value) ) + +#define _set_64bit(ptr,value) \ +(__builtin_constant_p(value) ? \ + __set_64bit(ptr, (unsigned int)(value), (unsigned int)((value)>>32ULL) ) : \ + __set_64bit(ptr, ll_low(value), ll_high(value)) ) + +/* + * Note: no "lock" prefix even on SMP: xchg always implies lock anyway + * Note 2: xchg has side effect, so that attribute volatile is necessary, + * but generally the primitive is invalid, *ptr is output argument. --ANK + */ +static inline unsigned long __xchg(unsigned long x, volatile void * ptr, int size) +{ + switch (size) { + case 1: + __asm__ __volatile__("xchgb %b0,%1" + :"=q" (x) + :"m" (*__xg(ptr)), "0" (x) + :"memory"); + break; + case 2: + __asm__ __volatile__("xchgw %w0,%1" + :"=r" (x) + :"m" (*__xg(ptr)), "0" (x) + :"memory"); + break; + case 4: + __asm__ __volatile__("xchgl %0,%1" + :"=r" (x) + :"m" (*__xg(ptr)), "0" (x) + :"memory"); + break; + } + return x; +} + +/* + * Atomic compare and exchange. Compare OLD with MEM, if identical, + * store NEW in MEM. Return the initial value in MEM. Success is + * indicated by comparing RETURN with OLD. + */ + +#ifdef CONFIG_X86_CMPXCHG +#define __HAVE_ARCH_CMPXCHG 1 + +static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old, + unsigned long new, int size) +{ + unsigned long prev; + switch (size) { + case 1: + __asm__ __volatile__(LOCK_PREFIX "cmpxchgb %b1,%2" + : "=a"(prev) + : "q"(new), "m"(*__xg(ptr)), "0"(old) + : "memory"); + return prev; + case 2: + __asm__ __volatile__(LOCK_PREFIX "cmpxchgw %w1,%2" + : "=a"(prev) + : "q"(new), "m"(*__xg(ptr)), "0"(old) + : "memory"); + return prev; + case 4: + __asm__ __volatile__(LOCK_PREFIX "cmpxchgl %1,%2" + : "=a"(prev) + : "q"(new), "m"(*__xg(ptr)), "0"(old) + : "memory"); + return prev; + } + return old; +} + +#define cmpxchg(ptr,o,n)\ + ((__typeof__(*(ptr)))__cmpxchg((ptr),(unsigned long)(o),\ + (unsigned long)(n),sizeof(*(ptr)))) + +#else +/* Compiling for a 386 proper. Is it worth implementing via cli/sti? */ +#endif + +/* + * Force strict CPU ordering. + * And yes, this is required on UP too when we're talking + * to devices. + * + * For now, "wmb()" doesn't actually do anything, as all + * Intel CPU's follow what Intel calls a *Processor Order*, + * in which all writes are seen in the program order even + * outside the CPU. + * + * I expect future Intel CPU's to have a weaker ordering, + * but I'd also expect them to finally get their act together + * and add some real memory barriers if so. + * + * Some non intel clones support out of order store. wmb() ceases to be a + * nop for these. + */ + +#define mb() __asm__ __volatile__ ("lock; addl $0,0(%%esp)": : :"memory") +#define rmb() mb() + +#ifdef CONFIG_X86_OOSTORE +#define wmb() __asm__ __volatile__ ("lock; addl $0,0(%%esp)": : :"memory") +#else +#define wmb() __asm__ __volatile__ ("": : :"memory") +#endif + +#ifdef CONFIG_SMP +#define smp_mb() mb() +#define smp_rmb() rmb() +#define smp_wmb() wmb() +#else +#define smp_mb() barrier() +#define smp_rmb() barrier() +#define smp_wmb() barrier() +#endif + +#define set_mb(var, value) do { xchg(&var, value); } while (0) +#define set_wmb(var, value) do { var = value; wmb(); } while (0) + +/* interrupt control.. */ +#define __save_flags(x) __asm__ __volatile__("pushfl ; popl %0":"=g" (x): /* no input */) +#define __restore_flags(x) __asm__ __volatile__("pushl %0 ; popfl": /* no output */ :"g" (x):"memory", "cc") +#define __cli() __asm__ __volatile__("cli": : :"memory") +#define __sti() __asm__ __volatile__("sti": : :"memory") +/* used in the idle loop; sti takes one instruction cycle to complete */ +#define safe_halt() __asm__ __volatile__("sti; hlt": : :"memory") + +/* For spinlocks etc */ +#define local_irq_save(x) __asm__ __volatile__("pushfl ; popl %0 ; cli":"=g" (x): /* no input */ :"memory") +#define local_irq_restore(x) __restore_flags(x) +#define local_irq_disable() __cli() +#define local_irq_enable() __sti() + +#ifdef CONFIG_SMP + +extern void __global_cli(void); +extern void __global_sti(void); +extern unsigned long __global_save_flags(void); +extern void __global_restore_flags(unsigned long); +#define cli() __global_cli() +#define sti() __global_sti() +#define save_flags(x) ((x)=__global_save_flags()) +#define restore_flags(x) __global_restore_flags(x) + +#else + +#define cli() __cli() +#define sti() __sti() +#define save_flags(x) __save_flags(x) +#define restore_flags(x) __restore_flags(x) + +#endif + +/* + * disable hlt during certain critical i/o operations + */ +#define HAVE_DISABLE_HLT +void disable_hlt(void); +void enable_hlt(void); + +#define BROKEN_ACPI_Sx 0x0001 +#define BROKEN_INIT_AFTER_S1 0x0002 + +#endif diff --git a/xen-2.4.16/include/asm-i386/types.h b/xen-2.4.16/include/asm-i386/types.h new file mode 100644 index 0000000000..2bd0f258b9 --- /dev/null +++ b/xen-2.4.16/include/asm-i386/types.h @@ -0,0 +1,50 @@ +#ifndef _I386_TYPES_H +#define _I386_TYPES_H + +typedef unsigned short umode_t; + +/* + * __xx is ok: it doesn't pollute the POSIX namespace. Use these in the + * header files exported to user space + */ + +typedef __signed__ char __s8; +typedef unsigned char __u8; + +typedef __signed__ short __s16; +typedef unsigned short __u16; + +typedef __signed__ int __s32; +typedef unsigned int __u32; + +#if defined(__GNUC__) && !defined(__STRICT_ANSI__) +typedef __signed__ long long __s64; +typedef unsigned long long __u64; +#endif + +#include + +typedef signed char s8; +typedef unsigned char u8; + +typedef signed short s16; +typedef unsigned short u16; + +typedef signed int s32; +typedef unsigned int u32; + +typedef signed long long s64; +typedef unsigned long long u64; + +#define BITS_PER_LONG 32 + +/* DMA addresses come in generic and 64-bit flavours. */ + +#ifdef CONFIG_HIGHMEM +typedef u64 dma_addr_t; +#else +typedef u32 dma_addr_t; +#endif +typedef u64 dma64_addr_t; + +#endif diff --git a/xen-2.4.16/include/asm-i386/uaccess.h b/xen-2.4.16/include/asm-i386/uaccess.h new file mode 100644 index 0000000000..5b4ba6e976 --- /dev/null +++ b/xen-2.4.16/include/asm-i386/uaccess.h @@ -0,0 +1,596 @@ +#ifndef __i386_UACCESS_H +#define __i386_UACCESS_H + +/* + * User space memory access functions + */ +#include +#include +#include +#include +#include + +#define VERIFY_READ 0 +#define VERIFY_WRITE 1 + +/* + * The fs value determines whether argument validity checking should be + * performed or not. If get_fs() == USER_DS, checking is performed, with + * get_fs() == KERNEL_DS, checking is bypassed. + * + * For historical reasons, these macros are grossly misnamed. + */ + +#define MAKE_MM_SEG(s) ((mm_segment_t) { (s) }) + + +#define KERNEL_DS MAKE_MM_SEG(0xFFFFFFFF) +#define USER_DS MAKE_MM_SEG(PAGE_OFFSET) + +#define get_ds() (KERNEL_DS) +#define get_fs() (current->addr_limit) +#define set_fs(x) (current->addr_limit = (x)) + +#define segment_eq(a,b) ((a).seg == (b).seg) + +extern int __verify_write(const void *, unsigned long); + +#define __addr_ok(addr) ((unsigned long)(addr) < (current->addr_limit.seg)) + +/* + * Uhhuh, this needs 33-bit arithmetic. We have a carry.. + */ +#define __range_ok(addr,size) ({ \ + unsigned long flag,sum; \ + asm("addl %3,%1 ; sbbl %0,%0; cmpl %1,%4; sbbl $0,%0" \ + :"=&r" (flag), "=r" (sum) \ + :"1" (addr),"g" ((int)(size)),"g" (current->addr_limit.seg)); \ + flag; }) + +#define access_ok(type,addr,size) (__range_ok(addr,size) == 0) + +static inline int verify_area(int type, const void * addr, unsigned long size) +{ + return access_ok(type,addr,size) ? 0 : -EFAULT; +} + + +/* + * The exception table consists of pairs of addresses: the first is the + * address of an instruction that is allowed to fault, and the second is + * the address at which the program should continue. No registers are + * modified, so it is entirely up to the continuation code to figure out + * what to do. + * + * All the routines below use bits of fixup code that are out of line + * with the main instruction path. This means when everything is well, + * we don't even have to jump over them. Further, they do not intrude + * on our cache or tlb entries. + */ + +struct exception_table_entry +{ + unsigned long insn, fixup; +}; + +/* Returns 0 if exception not found and fixup otherwise. */ +extern unsigned long search_exception_table(unsigned long); + + +/* + * These are the main single-value transfer routines. They automatically + * use the right size if we just have the right pointer type. + * + * This gets kind of ugly. We want to return _two_ values in "get_user()" + * and yet we don't want to do any pointers, because that is too much + * of a performance impact. Thus we have a few rather ugly macros here, + * and hide all the uglyness from the user. + * + * The "__xxx" versions of the user access functions are versions that + * do not verify the address space, that must have been done previously + * with a separate "access_ok()" call (this is used when we do multiple + * accesses to the same area of user memory). + */ + +extern void __get_user_1(void); +extern void __get_user_2(void); +extern void __get_user_4(void); + +#define __get_user_x(size,ret,x,ptr) \ + __asm__ __volatile__("call __get_user_" #size \ + :"=a" (ret),"=d" (x) \ + :"0" (ptr)) + +/* Careful: we have to cast the result to the type of the pointer for sign reasons */ +#define get_user(x,ptr) \ +({ int __ret_gu,__val_gu; \ + switch(sizeof (*(ptr))) { \ + case 1: __get_user_x(1,__ret_gu,__val_gu,ptr); break; \ + case 2: __get_user_x(2,__ret_gu,__val_gu,ptr); break; \ + case 4: __get_user_x(4,__ret_gu,__val_gu,ptr); break; \ + default: __get_user_x(X,__ret_gu,__val_gu,ptr); break; \ + } \ + (x) = (__typeof__(*(ptr)))__val_gu; \ + __ret_gu; \ +}) + +extern void __put_user_1(void); +extern void __put_user_2(void); +extern void __put_user_4(void); +extern void __put_user_8(void); + +extern void __put_user_bad(void); + +#define put_user(x,ptr) \ + __put_user_check((__typeof__(*(ptr)))(x),(ptr),sizeof(*(ptr))) + +#define __get_user(x,ptr) \ + __get_user_nocheck((x),(ptr),sizeof(*(ptr))) +#define __put_user(x,ptr) \ + __put_user_nocheck((__typeof__(*(ptr)))(x),(ptr),sizeof(*(ptr))) + +#define __put_user_nocheck(x,ptr,size) \ +({ \ + long __pu_err; \ + __put_user_size((x),(ptr),(size),__pu_err); \ + __pu_err; \ +}) + + +#define __put_user_check(x,ptr,size) \ +({ \ + long __pu_err = -EFAULT; \ + __typeof__(*(ptr)) *__pu_addr = (ptr); \ + if (access_ok(VERIFY_WRITE,__pu_addr,size)) \ + __put_user_size((x),__pu_addr,(size),__pu_err); \ + __pu_err; \ +}) + +#define __put_user_u64(x, addr, err) \ + __asm__ __volatile__( \ + "1: movl %%eax,0(%2)\n" \ + "2: movl %%edx,4(%2)\n" \ + "3:\n" \ + ".section .fixup,\"ax\"\n" \ + "4: movl %3,%0\n" \ + " jmp 3b\n" \ + ".previous\n" \ + ".section __ex_table,\"a\"\n" \ + " .align 4\n" \ + " .long 1b,4b\n" \ + " .long 2b,4b\n" \ + ".previous" \ + : "=r"(err) \ + : "A" (x), "r" (addr), "i"(-EFAULT), "0"(err)) + +#define __put_user_size(x,ptr,size,retval) \ +do { \ + retval = 0; \ + switch (size) { \ + case 1: __put_user_asm(x,ptr,retval,"b","b","iq"); break; \ + case 2: __put_user_asm(x,ptr,retval,"w","w","ir"); break; \ + case 4: __put_user_asm(x,ptr,retval,"l","","ir"); break; \ + case 8: __put_user_u64(x,ptr,retval); break; \ + default: __put_user_bad(); \ + } \ +} while (0) + +struct __large_struct { unsigned long buf[100]; }; +#define __m(x) (*(struct __large_struct *)(x)) + +/* + * Tell gcc we read from memory instead of writing: this is because + * we do not write to any memory gcc knows about, so there are no + * aliasing issues. + */ +#define __put_user_asm(x, addr, err, itype, rtype, ltype) \ + __asm__ __volatile__( \ + "1: mov"itype" %"rtype"1,%2\n" \ + "2:\n" \ + ".section .fixup,\"ax\"\n" \ + "3: movl %3,%0\n" \ + " jmp 2b\n" \ + ".previous\n" \ + ".section __ex_table,\"a\"\n" \ + " .align 4\n" \ + " .long 1b,3b\n" \ + ".previous" \ + : "=r"(err) \ + : ltype (x), "m"(__m(addr)), "i"(-EFAULT), "0"(err)) + + +#define __get_user_nocheck(x,ptr,size) \ +({ \ + long __gu_err, __gu_val; \ + __get_user_size(__gu_val,(ptr),(size),__gu_err); \ + (x) = (__typeof__(*(ptr)))__gu_val; \ + __gu_err; \ +}) + +extern long __get_user_bad(void); + +#define __get_user_size(x,ptr,size,retval) \ +do { \ + retval = 0; \ + switch (size) { \ + case 1: __get_user_asm(x,ptr,retval,"b","b","=q"); break; \ + case 2: __get_user_asm(x,ptr,retval,"w","w","=r"); break; \ + case 4: __get_user_asm(x,ptr,retval,"l","","=r"); break; \ + default: (x) = __get_user_bad(); \ + } \ +} while (0) + +#define __get_user_asm(x, addr, err, itype, rtype, ltype) \ + __asm__ __volatile__( \ + "1: mov"itype" %2,%"rtype"1\n" \ + "2:\n" \ + ".section .fixup,\"ax\"\n" \ + "3: movl %3,%0\n" \ + " xor"itype" %"rtype"1,%"rtype"1\n" \ + " jmp 2b\n" \ + ".previous\n" \ + ".section __ex_table,\"a\"\n" \ + " .align 4\n" \ + " .long 1b,3b\n" \ + ".previous" \ + : "=r"(err), ltype (x) \ + : "m"(__m(addr)), "i"(-EFAULT), "0"(err)) + + +/* + * Copy To/From Userspace + */ + +/* Generic arbitrary sized copy. */ +#define __copy_user(to,from,size) \ +do { \ + int __d0, __d1; \ + __asm__ __volatile__( \ + "0: rep; movsl\n" \ + " movl %3,%0\n" \ + "1: rep; movsb\n" \ + "2:\n" \ + ".section .fixup,\"ax\"\n" \ + "3: lea 0(%3,%0,4),%0\n" \ + " jmp 2b\n" \ + ".previous\n" \ + ".section __ex_table,\"a\"\n" \ + " .align 4\n" \ + " .long 0b,3b\n" \ + " .long 1b,2b\n" \ + ".previous" \ + : "=&c"(size), "=&D" (__d0), "=&S" (__d1) \ + : "r"(size & 3), "0"(size / 4), "1"(to), "2"(from) \ + : "memory"); \ +} while (0) + +#define __copy_user_zeroing(to,from,size) \ +do { \ + int __d0, __d1; \ + __asm__ __volatile__( \ + "0: rep; movsl\n" \ + " movl %3,%0\n" \ + "1: rep; movsb\n" \ + "2:\n" \ + ".section .fixup,\"ax\"\n" \ + "3: lea 0(%3,%0,4),%0\n" \ + "4: pushl %0\n" \ + " pushl %%eax\n" \ + " xorl %%eax,%%eax\n" \ + " rep; stosb\n" \ + " popl %%eax\n" \ + " popl %0\n" \ + " jmp 2b\n" \ + ".previous\n" \ + ".section __ex_table,\"a\"\n" \ + " .align 4\n" \ + " .long 0b,3b\n" \ + " .long 1b,4b\n" \ + ".previous" \ + : "=&c"(size), "=&D" (__d0), "=&S" (__d1) \ + : "r"(size & 3), "0"(size / 4), "1"(to), "2"(from) \ + : "memory"); \ +} while (0) + +/* We let the __ versions of copy_from/to_user inline, because they're often + * used in fast paths and have only a small space overhead. + */ +static inline unsigned long +__generic_copy_from_user_nocheck(void *to, const void *from, unsigned long n) +{ + __copy_user_zeroing(to,from,n); + return n; +} + +static inline unsigned long +__generic_copy_to_user_nocheck(void *to, const void *from, unsigned long n) +{ + __copy_user(to,from,n); + return n; +} + + +/* Optimize just a little bit when we know the size of the move. */ +#define __constant_copy_user(to, from, size) \ +do { \ + int __d0, __d1; \ + switch (size & 3) { \ + default: \ + __asm__ __volatile__( \ + "0: rep; movsl\n" \ + "1:\n" \ + ".section .fixup,\"ax\"\n" \ + "2: shl $2,%0\n" \ + " jmp 1b\n" \ + ".previous\n" \ + ".section __ex_table,\"a\"\n" \ + " .align 4\n" \ + " .long 0b,2b\n" \ + ".previous" \ + : "=c"(size), "=&S" (__d0), "=&D" (__d1)\ + : "1"(from), "2"(to), "0"(size/4) \ + : "memory"); \ + break; \ + case 1: \ + __asm__ __volatile__( \ + "0: rep; movsl\n" \ + "1: movsb\n" \ + "2:\n" \ + ".section .fixup,\"ax\"\n" \ + "3: shl $2,%0\n" \ + "4: incl %0\n" \ + " jmp 2b\n" \ + ".previous\n" \ + ".section __ex_table,\"a\"\n" \ + " .align 4\n" \ + " .long 0b,3b\n" \ + " .long 1b,4b\n" \ + ".previous" \ + : "=c"(size), "=&S" (__d0), "=&D" (__d1)\ + : "1"(from), "2"(to), "0"(size/4) \ + : "memory"); \ + break; \ + case 2: \ + __asm__ __volatile__( \ + "0: rep; movsl\n" \ + "1: movsw\n" \ + "2:\n" \ + ".section .fixup,\"ax\"\n" \ + "3: shl $2,%0\n" \ + "4: addl $2,%0\n" \ + " jmp 2b\n" \ + ".previous\n" \ + ".section __ex_table,\"a\"\n" \ + " .align 4\n" \ + " .long 0b,3b\n" \ + " .long 1b,4b\n" \ + ".previous" \ + : "=c"(size), "=&S" (__d0), "=&D" (__d1)\ + : "1"(from), "2"(to), "0"(size/4) \ + : "memory"); \ + break; \ + case 3: \ + __asm__ __volatile__( \ + "0: rep; movsl\n" \ + "1: movsw\n" \ + "2: movsb\n" \ + "3:\n" \ + ".section .fixup,\"ax\"\n" \ + "4: shl $2,%0\n" \ + "5: addl $2,%0\n" \ + "6: incl %0\n" \ + " jmp 3b\n" \ + ".previous\n" \ + ".section __ex_table,\"a\"\n" \ + " .align 4\n" \ + " .long 0b,4b\n" \ + " .long 1b,5b\n" \ + " .long 2b,6b\n" \ + ".previous" \ + : "=c"(size), "=&S" (__d0), "=&D" (__d1)\ + : "1"(from), "2"(to), "0"(size/4) \ + : "memory"); \ + break; \ + } \ +} while (0) + +/* Optimize just a little bit when we know the size of the move. */ +#define __constant_copy_user_zeroing(to, from, size) \ +do { \ + int __d0, __d1; \ + switch (size & 3) { \ + default: \ + __asm__ __volatile__( \ + "0: rep; movsl\n" \ + "1:\n" \ + ".section .fixup,\"ax\"\n" \ + "2: pushl %0\n" \ + " pushl %%eax\n" \ + " xorl %%eax,%%eax\n" \ + " rep; stosl\n" \ + " popl %%eax\n" \ + " popl %0\n" \ + " shl $2,%0\n" \ + " jmp 1b\n" \ + ".previous\n" \ + ".section __ex_table,\"a\"\n" \ + " .align 4\n" \ + " .long 0b,2b\n" \ + ".previous" \ + : "=c"(size), "=&S" (__d0), "=&D" (__d1)\ + : "1"(from), "2"(to), "0"(size/4) \ + : "memory"); \ + break; \ + case 1: \ + __asm__ __volatile__( \ + "0: rep; movsl\n" \ + "1: movsb\n" \ + "2:\n" \ + ".section .fixup,\"ax\"\n" \ + "3: pushl %0\n" \ + " pushl %%eax\n" \ + " xorl %%eax,%%eax\n" \ + " rep; stosl\n" \ + " stosb\n" \ + " popl %%eax\n" \ + " popl %0\n" \ + " shl $2,%0\n" \ + " incl %0\n" \ + " jmp 2b\n" \ + "4: pushl %%eax\n" \ + " xorl %%eax,%%eax\n" \ + " stosb\n" \ + " popl %%eax\n" \ + " incl %0\n" \ + " jmp 2b\n" \ + ".previous\n" \ + ".section __ex_table,\"a\"\n" \ + " .align 4\n" \ + " .long 0b,3b\n" \ + " .long 1b,4b\n" \ + ".previous" \ + : "=c"(size), "=&S" (__d0), "=&D" (__d1)\ + : "1"(from), "2"(to), "0"(size/4) \ + : "memory"); \ + break; \ + case 2: \ + __asm__ __volatile__( \ + "0: rep; movsl\n" \ + "1: movsw\n" \ + "2:\n" \ + ".section .fixup,\"ax\"\n" \ + "3: pushl %0\n" \ + " pushl %%eax\n" \ + " xorl %%eax,%%eax\n" \ + " rep; stosl\n" \ + " stosw\n" \ + " popl %%eax\n" \ + " popl %0\n" \ + " shl $2,%0\n" \ + " addl $2,%0\n" \ + " jmp 2b\n" \ + "4: pushl %%eax\n" \ + " xorl %%eax,%%eax\n" \ + " stosw\n" \ + " popl %%eax\n" \ + " addl $2,%0\n" \ + " jmp 2b\n" \ + ".previous\n" \ + ".section __ex_table,\"a\"\n" \ + " .align 4\n" \ + " .long 0b,3b\n" \ + " .long 1b,4b\n" \ + ".previous" \ + : "=c"(size), "=&S" (__d0), "=&D" (__d1)\ + : "1"(from), "2"(to), "0"(size/4) \ + : "memory"); \ + break; \ + case 3: \ + __asm__ __volatile__( \ + "0: rep; movsl\n" \ + "1: movsw\n" \ + "2: movsb\n" \ + "3:\n" \ + ".section .fixup,\"ax\"\n" \ + "4: pushl %0\n" \ + " pushl %%eax\n" \ + " xorl %%eax,%%eax\n" \ + " rep; stosl\n" \ + " stosw\n" \ + " stosb\n" \ + " popl %%eax\n" \ + " popl %0\n" \ + " shl $2,%0\n" \ + " addl $3,%0\n" \ + " jmp 2b\n" \ + "5: pushl %%eax\n" \ + " xorl %%eax,%%eax\n" \ + " stosw\n" \ + " stosb\n" \ + " popl %%eax\n" \ + " addl $3,%0\n" \ + " jmp 2b\n" \ + "6: pushl %%eax\n" \ + " xorl %%eax,%%eax\n" \ + " stosb\n" \ + " popl %%eax\n" \ + " incl %0\n" \ + " jmp 3b\n" \ + ".previous\n" \ + ".section __ex_table,\"a\"\n" \ + " .align 4\n" \ + " .long 0b,4b\n" \ + " .long 1b,5b\n" \ + " .long 2b,6b\n" \ + ".previous" \ + : "=c"(size), "=&S" (__d0), "=&D" (__d1)\ + : "1"(from), "2"(to), "0"(size/4) \ + : "memory"); \ + break; \ + } \ +} while (0) + +unsigned long __generic_copy_to_user(void *, const void *, unsigned long); +unsigned long __generic_copy_from_user(void *, const void *, unsigned long); + +static inline unsigned long +__constant_copy_to_user(void *to, const void *from, unsigned long n) +{ + prefetch(from); + if (access_ok(VERIFY_WRITE, to, n)) + __constant_copy_user(to,from,n); + return n; +} + +static inline unsigned long +__constant_copy_from_user(void *to, const void *from, unsigned long n) +{ + if (access_ok(VERIFY_READ, from, n)) + __constant_copy_user_zeroing(to,from,n); + else + memset(to, 0, n); + return n; +} + +static inline unsigned long +__constant_copy_to_user_nocheck(void *to, const void *from, unsigned long n) +{ + __constant_copy_user(to,from,n); + return n; +} + +static inline unsigned long +__constant_copy_from_user_nocheck(void *to, const void *from, unsigned long n) +{ + __constant_copy_user_zeroing(to,from,n); + return n; +} + +#define copy_to_user(to,from,n) \ + (__builtin_constant_p(n) ? \ + __constant_copy_to_user((to),(from),(n)) : \ + __generic_copy_to_user((to),(from),(n))) + +#define copy_from_user(to,from,n) \ + (__builtin_constant_p(n) ? \ + __constant_copy_from_user((to),(from),(n)) : \ + __generic_copy_from_user((to),(from),(n))) + +#define __copy_to_user(to,from,n) \ + (__builtin_constant_p(n) ? \ + __constant_copy_to_user_nocheck((to),(from),(n)) : \ + __generic_copy_to_user_nocheck((to),(from),(n))) + +#define __copy_from_user(to,from,n) \ + (__builtin_constant_p(n) ? \ + __constant_copy_from_user_nocheck((to),(from),(n)) : \ + __generic_copy_from_user_nocheck((to),(from),(n))) + +long strncpy_from_user(char *dst, const char *src, long count); +long __strncpy_from_user(char *dst, const char *src, long count); +#define strlen_user(str) strnlen_user(str, ~0UL >> 1) +long strnlen_user(const char *str, long n); +unsigned long clear_user(void *mem, unsigned long len); +unsigned long __clear_user(void *mem, unsigned long len); + +#endif /* __i386_UACCESS_H */ diff --git a/xen-2.4.16/include/asm-i386/unaligned.h b/xen-2.4.16/include/asm-i386/unaligned.h new file mode 100644 index 0000000000..7acd795762 --- /dev/null +++ b/xen-2.4.16/include/asm-i386/unaligned.h @@ -0,0 +1,37 @@ +#ifndef __I386_UNALIGNED_H +#define __I386_UNALIGNED_H + +/* + * The i386 can do unaligned accesses itself. + * + * The strange macros are there to make sure these can't + * be misused in a way that makes them not work on other + * architectures where unaligned accesses aren't as simple. + */ + +/** + * get_unaligned - get value from possibly mis-aligned location + * @ptr: pointer to value + * + * This macro should be used for accessing values larger in size than + * single bytes at locations that are expected to be improperly aligned, + * e.g. retrieving a u16 value from a location not u16-aligned. + * + * Note that unaligned accesses can be very expensive on some architectures. + */ +#define get_unaligned(ptr) (*(ptr)) + +/** + * put_unaligned - put value to a possibly mis-aligned location + * @val: value to place + * @ptr: pointer to location + * + * This macro should be used for placing values larger in size than + * single bytes at locations that are expected to be improperly aligned, + * e.g. writing a u16 value to a location not u16-aligned. + * + * Note that unaligned accesses can be very expensive on some architectures. + */ +#define put_unaligned(val, ptr) ((void)( *(ptr) = (val) )) + +#endif diff --git a/xen-2.4.16/include/hypervisor-ifs/block.h b/xen-2.4.16/include/hypervisor-ifs/block.h new file mode 100644 index 0000000000..55f7a33ce2 --- /dev/null +++ b/xen-2.4.16/include/hypervisor-ifs/block.h @@ -0,0 +1,40 @@ +/****************************************************************************** + * block.h + * + * Block IO communication rings. + * + * These are the ring data structures for buffering messages between + * the hypervisor and guestos's. + * + * For now we'll start with our own rings for the block IO code instead + * of using the network rings. Hopefully, this will give us additional + * flexibility in the future should we choose to move away from a + * ring producer consumer communication model. + */ + +#ifndef __BLOCK_H__ +#define __BLOCK_H__ + +typedef struct blk_tx_entry_st { + unsigned long addr; /* virtual address */ + unsigned long size; /* in bytes */ +} blk_tx_entry_t; + +typedef struct blk_rx_entry_st { + unsigned long addr; /* virtual address */ + unsigned long size; /* in bytes */ +} blk_rx_entry_t; + +typedef struct blk_ring_st { + blk_tx_entry_t *tx_ring; + unsigned int tx_prod, tx_cons, tx_event; + unsigned int tx_ring_size; + + blk_rx_entry_t *rx_ring; + unsigned int rx_prod, rx_cons, rx_event; + unsigned int rx_ring_size; +} blk_ring_t; + +int blk_create_ring(int domain, unsigned long ptr); + +#endif diff --git a/xen-2.4.16/include/hypervisor-ifs/hypervisor-if.h b/xen-2.4.16/include/hypervisor-ifs/hypervisor-if.h new file mode 100644 index 0000000000..5f57125e6e --- /dev/null +++ b/xen-2.4.16/include/hypervisor-ifs/hypervisor-if.h @@ -0,0 +1,272 @@ +/****************************************************************************** + * hypervisor-if.h + * + * Interface to Xeno hypervisor. + */ + +#include +#include + +#ifndef __HYPERVISOR_IF_H__ +#define __HYPERVISOR_IF_H__ + +typedef struct trap_info_st +{ + unsigned char vector; /* exception/interrupt vector */ + unsigned char dpl; /* privilege level */ + unsigned short cs; /* code selector */ + unsigned long address; /* code address */ +} trap_info_t; + + +typedef struct +{ +#define PGREQ_ADD_BASEPTR 0 +#define PGREQ_REMOVE_BASEPTR 1 + unsigned long ptr, val; /* *ptr = val */ +} page_update_request_t; + + +/* EAX = vector; EBX, ECX, EDX, ESI, EDI = args 1, 2, 3, 4, 5. */ + +#define __HYPERVISOR_set_trap_table 0 +#define __HYPERVISOR_pt_update 1 +#define __HYPERVISOR_console_write 2 +#define __HYPERVISOR_set_pagetable 3 +#define __HYPERVISOR_set_guest_stack 4 +#define __HYPERVISOR_net_update 5 +#define __HYPERVISOR_fpu_taskswitch 6 +#define __HYPERVISOR_yield 7 +#define __HYPERVISOR_exit 8 +#define __HYPERVISOR_dom0_op 9 +#define __HYPERVISOR_network_op 10 + +#define TRAP_INSTR "int $0x82" + + +static inline int HYPERVISOR_set_trap_table(trap_info_t *table) +{ + int ret; + __asm__ __volatile__ ( + TRAP_INSTR + : "=a" (ret) : "0" (__HYPERVISOR_set_trap_table), + "b" (table) ); + + return ret; +} + + +static inline int HYPERVISOR_pt_update(page_update_request_t *req, int count) +{ + int ret; + __asm__ __volatile__ ( + TRAP_INSTR + : "=a" (ret) : "0" (__HYPERVISOR_pt_update), + "b" (req), "c" (count) ); + + return ret; +} + + +static inline int HYPERVISOR_console_write(const char *str, int count) +{ + int ret; + __asm__ __volatile__ ( + TRAP_INSTR + : "=a" (ret) : "0" (__HYPERVISOR_console_write), + "b" (str), "c" (count) ); + + + return ret; +} + +static inline int HYPERVISOR_set_pagetable(unsigned long ptr) +{ + int ret; + __asm__ __volatile__ ( + TRAP_INSTR + : "=a" (ret) : "0" (__HYPERVISOR_set_pagetable), + "b" (ptr) ); + + return ret; +} + +static inline int HYPERVISOR_set_guest_stack( + unsigned long ss, unsigned long esp) +{ + int ret; + __asm__ __volatile__ ( + TRAP_INSTR + : "=a" (ret) : "0" (__HYPERVISOR_set_guest_stack), + "b" (ss), "c" (esp) ); + + return ret; +} + +static inline int HYPERVISOR_net_update(void) +{ + int ret; + __asm__ __volatile__ ( + TRAP_INSTR + : "=a" (ret) : "0" (__HYPERVISOR_net_update) ); + + return ret; +} + +static inline int HYPERVISOR_fpu_taskswitch(void) +{ + int ret; + __asm__ __volatile__ ( + TRAP_INSTR + : "=a" (ret) : "0" (__HYPERVISOR_fpu_taskswitch) ); + + return ret; +} + +static inline int HYPERVISOR_yield(void) +{ + int ret; + __asm__ __volatile__ ( + TRAP_INSTR + : "=a" (ret) : "0" (__HYPERVISOR_yield) ); + + return ret; +} + +static inline int HYPERVISOR_exit(void) +{ + int ret; + __asm__ __volatile__ ( + TRAP_INSTR + : "=a" (ret) : "0" (__HYPERVISOR_exit) ); + + return ret; +} + +static inline int HYPERVISOR_dom0_op(void *dom0_op) +{ + int ret; + __asm__ __volatile__ ( + TRAP_INSTR + : "=a" (ret) : "0" (__HYPERVISOR_dom0_op), + "b" (dom0_op) ); + + return ret; +} + +static inline int HYPERVISOR_network_op(void *network_op) +{ + int ret; + __asm__ __volatile__ ( + TRAP_INSTR + : "=a" (ret) : "0" (__HYPERVISOR_network_op), + "b" (network_op) ); + + return ret; +} + +/* Events that a guest OS may receive from the hypervisor. */ +#define EVENT_NET_TX 0x01 /* packets for transmission. */ +#define EVENT_NET_RX 0x02 /* empty buffers for receive. */ +#define EVENT_TIMER 0x04 /* a timeout has been updated. */ +#define EVENT_DIE 0x08 /* OS is about to be killed. Clean up please! */ +#define EVENT_BLK_TX 0x10 /* packets for transmission. */ +#define EVENT_BLK_RX 0x20 /* empty buffers for receive. */ + +/* Bit offsets, as opposed to the above masks. */ +#define _EVENT_NET_TX 0 +#define _EVENT_NET_RX 1 +#define _EVENT_TIMER 2 +#define _EVENT_DIE 3 +#define _EVENT_BLK_TX 4 +#define _EVENT_BLK_RX 5 + +/* + * NB. We expect that this struct is smaller than a page. + */ +typedef struct shared_info_st { + + /* Bitmask of outstanding event notifications hypervisor -> guest OS. */ + unsigned long events; + /* + * Hypervisor will only signal event delivery via the "callback + * exception" when this value is non-zero. Hypervisor clears this when + * notiying the guest OS -- thsi prevents unbounded reentrancy and + * stack overflow (in this way, acts as an interrupt-enable flag). + */ + unsigned long events_enable; + + /* + * Address for callbacks hypervisor -> guest OS. + * Stack frame looks like that of an interrupt. + * Code segment is the default flat selector. + * This handler will only be called when events_enable is non-zero. + */ + unsigned long event_address; + + /* + * Hypervisor uses this callback when it takes a fault on behalf of + * an application. This can happen when returning from interrupts for + * example: various faults can occur when reloading the segment + * registers, and executing 'iret'. + * This callback is provided with an extended stack frame, augmented + * with saved values for segment registers %ds and %es: + * %ds, %es, %eip, %cs, %eflags [, %oldesp, %oldss] + * Code segment is the default flat selector. + * FAULTS WHEN CALLING THIS HANDLER WILL TERMINATE THE DOMAIN!!! + */ + unsigned long failsafe_address; + + /* + * CPU ticks since start of day. + * `wall_time' counts CPU ticks in real time. + * `domain_time' counts CPU ticks during which this domain has run. + */ + unsigned long ticks_per_ms; /* CPU ticks per millisecond */ + /* + * Current wall_time can be found by rdtsc. Only possible use of + * variable below is that it provides a timestamp for last update + * of domain_time. + */ + unsigned long long wall_time; + unsigned long long domain_time; + + /* + * Timeouts for points at which guest OS would like a callback. + * This will probably be backed up by a timer heap in the guest OS. + * In Linux we use timeouts to update 'jiffies'. + */ + unsigned long long wall_timeout; + unsigned long long domain_timeout; + + /* + * Real-Time Clock. This shows time, in seconds, since 1.1.1980. + * The timestamp shows the CPU 'wall time' when RTC was last read. + * Thus it allows a mapping between 'real time' and 'wall time'. + */ + unsigned long rtc_time; + unsigned long long rtc_timestamp; + +} shared_info_t; + +/* + * NB. We expect that this struct is smaller than a page. + */ +typedef struct start_info_st { + unsigned long nr_pages; /* total pages allocated to this domain */ + shared_info_t *shared_info; /* start address of shared info struct */ + unsigned long pt_base; /* address of page directory */ + unsigned long phys_base; + unsigned long mod_start; /* start address of pre-loaded module */ + unsigned long mod_len; /* size (bytes) of pre-loaded module */ + net_ring_t *net_rings; + int num_net_rings; + blk_ring_t *blk_ring; /* block io communication rings */ + unsigned char cmd_line[1]; /* variable-length */ + unsigned long frame_table; /* mapping of the frame_table for dom0 */ +} start_info_t; + +/* For use in guest OSes. */ +extern shared_info_t *HYPERVISOR_shared_info; + +#endif /* __HYPERVISOR_IF_H__ */ diff --git a/xen-2.4.16/include/hypervisor-ifs/network.h b/xen-2.4.16/include/hypervisor-ifs/network.h new file mode 100644 index 0000000000..30a5bf3414 --- /dev/null +++ b/xen-2.4.16/include/hypervisor-ifs/network.h @@ -0,0 +1,108 @@ +/****************************************************************************** + * network.h + * + * ring data structures for buffering messages between hypervisor and + * guestos's. As it stands this is only used for network buffer exchange. + * + * This file also contains structures and interfaces for the per-domain + * routing/filtering tables in the hypervisor. + * + */ + +#ifndef __RING_H__ +#define __RING_H__ + +#include + +typedef struct tx_entry_st { + unsigned long addr; /* virtual address */ + unsigned long size; /* in bytes */ +} tx_entry_t; + +typedef struct rx_entry_st { + unsigned long addr; /* virtual address */ + unsigned long size; /* in bytes */ +} rx_entry_t; + +#define TX_RING_SIZE 1024 +#define RX_RING_SIZE 1024 +typedef struct net_ring_st { + /* + * Guest OS places packets into ring at tx_prod. + * Hypervisor removes at tx_cons. + * Ring is empty when tx_prod == tx_cons. + * Guest OS receives a DOMAIN_EVENT_NET_TX when tx_cons passes tx_event. + * Hypervisor may be prodded whenever tx_prod is updated, but this is + * only necessary when tx_cons == old_tx_prod (ie. transmitter stalled). + */ + tx_entry_t *tx_ring; + unsigned int tx_prod, tx_cons, tx_event; + unsigned int tx_ring_size; + /* + * Guest OS places empty buffers into ring at rx_prod. + * Hypervisor fills buffers as rx_cons. + * Ring is empty when rx_prod == rx_cons. + * Guest OS receives a DOMAIN_EVENT_NET_RX when rx_cons passes rx_event. + * Hypervisor may be prodded whenever rx_prod is updated, but this is + * only necessary when rx_cons == old_rx_prod (ie. receiver stalled). + */ + rx_entry_t *rx_ring; + unsigned int rx_prod, rx_cons, rx_event; + unsigned int rx_ring_size; +} net_ring_t; + +/* This is here for consideration: Having a global lookup for vifs + * may make the guest /proc stuff more straight forward, and could + * be used in the routing code. I don't know if it warrants the + * overhead yet. + */ + +/* net_vif_t sys_vif_list[MAX_SYSTEM_VIFS]; */ + +/* Specify base of per-domain array. Get returned free slot in the array. */ +net_ring_t *create_net_vif(int domain); + +/* Packet routing/filtering code follows: + */ + +#define NETWORK_ACTION_DROP 0 +#define NETWORK_ACTION_PASS 1 + +typedef struct net_rule_st +{ + u32 src_addr; + u32 dst_addr; + u16 src_port; + u16 dst_port; + u32 src_addr_mask; + u32 dst_addr_mask; + u16 src_port_mask; + u16 dst_port_mask; + + int src_interface; + int dst_interface; + int action; +} net_rule_t; + +/* Network trap operations and associated structure. + * This presently just handles rule insertion and deletion, but will + * evenually have code to add and remove interfaces. + */ + +#define NETWORK_OP_ADDRULE 0 +#define NETWORK_OP_DELETERULE 1 + +typedef struct network_op_st +{ + unsigned long cmd; + union + { + net_rule_t net_rule; + } + u; +} network_op_t; + +/* Drop a new rule down to the network tables. */ +int add_net_rule(net_rule_t *rule); + +#endif diff --git a/xen-2.4.16/include/scsi/scsi.h b/xen-2.4.16/include/scsi/scsi.h new file mode 100644 index 0000000000..ffcb419482 --- /dev/null +++ b/xen-2.4.16/include/scsi/scsi.h @@ -0,0 +1,237 @@ +#ifndef _LINUX_SCSI_H +#define _LINUX_SCSI_H + +/* + * This header file contains public constants and structures used by + * the scsi code for linux. + */ + +/* + $Header: /usr/src/linux/include/linux/RCS/scsi.h,v 1.3 1993/09/24 12:20:33 drew Exp $ + + For documentation on the OPCODES, MESSAGES, and SENSE values, + please consult the SCSI standard. + +*/ + +/* + * SCSI opcodes + */ + +#define TEST_UNIT_READY 0x00 +#define REZERO_UNIT 0x01 +#define REQUEST_SENSE 0x03 +#define FORMAT_UNIT 0x04 +#define READ_BLOCK_LIMITS 0x05 +#define REASSIGN_BLOCKS 0x07 +#define READ_6 0x08 +#define WRITE_6 0x0a +#define SEEK_6 0x0b +#define READ_REVERSE 0x0f +#define WRITE_FILEMARKS 0x10 +#define SPACE 0x11 +#define INQUIRY 0x12 +#define RECOVER_BUFFERED_DATA 0x14 +#define MODE_SELECT 0x15 +#define RESERVE 0x16 +#define RELEASE 0x17 +#define COPY 0x18 +#define ERASE 0x19 +#define MODE_SENSE 0x1a +#define START_STOP 0x1b +#define RECEIVE_DIAGNOSTIC 0x1c +#define SEND_DIAGNOSTIC 0x1d +#define ALLOW_MEDIUM_REMOVAL 0x1e + +#define SET_WINDOW 0x24 +#define READ_CAPACITY 0x25 +#define READ_10 0x28 +#define WRITE_10 0x2a +#define SEEK_10 0x2b +#define WRITE_VERIFY 0x2e +#define VERIFY 0x2f +#define SEARCH_HIGH 0x30 +#define SEARCH_EQUAL 0x31 +#define SEARCH_LOW 0x32 +#define SET_LIMITS 0x33 +#define PRE_FETCH 0x34 +#define READ_POSITION 0x34 +#define SYNCHRONIZE_CACHE 0x35 +#define LOCK_UNLOCK_CACHE 0x36 +#define READ_DEFECT_DATA 0x37 +#define MEDIUM_SCAN 0x38 +#define COMPARE 0x39 +#define COPY_VERIFY 0x3a +#define WRITE_BUFFER 0x3b +#define READ_BUFFER 0x3c +#define UPDATE_BLOCK 0x3d +#define READ_LONG 0x3e +#define WRITE_LONG 0x3f +#define CHANGE_DEFINITION 0x40 +#define WRITE_SAME 0x41 +#define READ_TOC 0x43 +#define LOG_SELECT 0x4c +#define LOG_SENSE 0x4d +#define MODE_SELECT_10 0x55 +#define RESERVE_10 0x56 +#define RELEASE_10 0x57 +#define MODE_SENSE_10 0x5a +#define PERSISTENT_RESERVE_IN 0x5e +#define PERSISTENT_RESERVE_OUT 0x5f +#define MOVE_MEDIUM 0xa5 +#define READ_12 0xa8 +#define WRITE_12 0xaa +#define WRITE_VERIFY_12 0xae +#define SEARCH_HIGH_12 0xb0 +#define SEARCH_EQUAL_12 0xb1 +#define SEARCH_LOW_12 0xb2 +#define READ_ELEMENT_STATUS 0xb8 +#define SEND_VOLUME_TAG 0xb6 +#define WRITE_LONG_2 0xea + +/* + * Status codes + */ + +#define GOOD 0x00 +#define CHECK_CONDITION 0x01 +#define CONDITION_GOOD 0x02 +#define BUSY 0x04 +#define INTERMEDIATE_GOOD 0x08 +#define INTERMEDIATE_C_GOOD 0x0a +#define RESERVATION_CONFLICT 0x0c +#define COMMAND_TERMINATED 0x11 +#define QUEUE_FULL 0x14 + +#define STATUS_MASK 0x3e + +/* + * SENSE KEYS + */ + +#define NO_SENSE 0x00 +#define RECOVERED_ERROR 0x01 +#define NOT_READY 0x02 +#define MEDIUM_ERROR 0x03 +#define HARDWARE_ERROR 0x04 +#define ILLEGAL_REQUEST 0x05 +#define UNIT_ATTENTION 0x06 +#define DATA_PROTECT 0x07 +#define BLANK_CHECK 0x08 +#define COPY_ABORTED 0x0a +#define ABORTED_COMMAND 0x0b +#define VOLUME_OVERFLOW 0x0d +#define MISCOMPARE 0x0e + + +/* + * DEVICE TYPES + */ + +#define TYPE_DISK 0x00 +#define TYPE_TAPE 0x01 +#define TYPE_PRINTER 0x02 +#define TYPE_PROCESSOR 0x03 /* HP scanners use this */ +#define TYPE_WORM 0x04 /* Treated as ROM by our system */ +#define TYPE_ROM 0x05 +#define TYPE_SCANNER 0x06 +#define TYPE_MOD 0x07 /* Magneto-optical disk - + * - treated as TYPE_DISK */ +#define TYPE_MEDIUM_CHANGER 0x08 +#define TYPE_COMM 0x09 /* Communications device */ +#define TYPE_ENCLOSURE 0x0d /* Enclosure Services Device */ +#define TYPE_NO_LUN 0x7f + +/* + * standard mode-select header prepended to all mode-select commands + * + * moved here from cdrom.h -- kraxel + */ + +struct ccs_modesel_head +{ + u_char _r1; /* reserved */ + u_char medium; /* device-specific medium type */ + u_char _r2; /* reserved */ + u_char block_desc_length; /* block descriptor length */ + u_char density; /* device-specific density code */ + u_char number_blocks_hi; /* number of blocks in this block desc */ + u_char number_blocks_med; + u_char number_blocks_lo; + u_char _r3; + u_char block_length_hi; /* block length for blocks in this desc */ + u_char block_length_med; + u_char block_length_lo; +}; + +/* + * MESSAGE CODES + */ + +#define COMMAND_COMPLETE 0x00 +#define EXTENDED_MESSAGE 0x01 +#define EXTENDED_MODIFY_DATA_POINTER 0x00 +#define EXTENDED_SDTR 0x01 +#define EXTENDED_EXTENDED_IDENTIFY 0x02 /* SCSI-I only */ +#define EXTENDED_WDTR 0x03 +#define SAVE_POINTERS 0x02 +#define RESTORE_POINTERS 0x03 +#define DISCONNECT 0x04 +#define INITIATOR_ERROR 0x05 +#define ABORT 0x06 +#define MESSAGE_REJECT 0x07 +#define NOP 0x08 +#define MSG_PARITY_ERROR 0x09 +#define LINKED_CMD_COMPLETE 0x0a +#define LINKED_FLG_CMD_COMPLETE 0x0b +#define BUS_DEVICE_RESET 0x0c + +#define INITIATE_RECOVERY 0x0f /* SCSI-II only */ +#define RELEASE_RECOVERY 0x10 /* SCSI-II only */ + +#define SIMPLE_QUEUE_TAG 0x20 +#define HEAD_OF_QUEUE_TAG 0x21 +#define ORDERED_QUEUE_TAG 0x22 + +/* + * Here are some scsi specific ioctl commands which are sometimes useful. + */ +/* These are a few other constants only used by scsi devices */ +/* Note that include/linux/cdrom.h also defines IOCTL 0x5300 - 0x5395 */ + +#define SCSI_IOCTL_GET_IDLUN 0x5382 /* conflicts with CDROMAUDIOBUFSIZ */ + +/* Used to turn on and off tagged queuing for scsi devices */ + +#define SCSI_IOCTL_TAGGED_ENABLE 0x5383 +#define SCSI_IOCTL_TAGGED_DISABLE 0x5384 + +/* Used to obtain the host number of a device. */ +#define SCSI_IOCTL_PROBE_HOST 0x5385 + +/* Used to get the bus number for a device */ +#define SCSI_IOCTL_GET_BUS_NUMBER 0x5386 + +/* Used to get the PCI location of a device */ +#define SCSI_IOCTL_GET_PCI 0x5387 + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-indent-level: 4 + * c-brace-imaginary-offset: 0 + * c-brace-offset: -4 + * c-argdecl-indent: 4 + * c-label-offset: -4 + * c-continued-statement-offset: 4 + * c-continued-brace-offset: 0 + * indent-tabs-mode: nil + * tab-width: 8 + * End: + */ + +#endif diff --git a/xen-2.4.16/include/scsi/scsi_ioctl.h b/xen-2.4.16/include/scsi/scsi_ioctl.h new file mode 100644 index 0000000000..937cadfb91 --- /dev/null +++ b/xen-2.4.16/include/scsi/scsi_ioctl.h @@ -0,0 +1,51 @@ +#ifndef _SCSI_IOCTL_H +#define _SCSI_IOCTL_H + +#define SCSI_IOCTL_SEND_COMMAND 1 +#define SCSI_IOCTL_TEST_UNIT_READY 2 +#define SCSI_IOCTL_BENCHMARK_COMMAND 3 +#define SCSI_IOCTL_SYNC 4 /* Request synchronous parameters */ +#define SCSI_IOCTL_START_UNIT 5 +#define SCSI_IOCTL_STOP_UNIT 6 +/* The door lock/unlock constants are compatible with Sun constants for + the cdrom */ +#define SCSI_IOCTL_DOORLOCK 0x5380 /* lock the eject mechanism */ +#define SCSI_IOCTL_DOORUNLOCK 0x5381 /* unlock the mechanism */ + +#define SCSI_REMOVAL_PREVENT 1 +#define SCSI_REMOVAL_ALLOW 0 + +#ifdef __KERNEL__ + +/* + * Structures used for scsi_ioctl et al. + */ + +typedef struct scsi_ioctl_command { + unsigned int inlen; + unsigned int outlen; + unsigned char data[0]; +} Scsi_Ioctl_Command; + +typedef struct scsi_idlun { + __u32 dev_id; + __u32 host_unique_id; +} Scsi_Idlun; + +/* Fibre Channel WWN, port_id struct */ +typedef struct scsi_fctargaddress +{ + __u32 host_port_id; + unsigned char host_wwn[8]; // include NULL term. +} Scsi_FCTargAddress; + +extern int scsi_ioctl (Scsi_Device *dev, int cmd, void *arg); +extern int kernel_scsi_ioctl (Scsi_Device *dev, int cmd, void *arg); +extern int scsi_ioctl_send_command(Scsi_Device *dev, + Scsi_Ioctl_Command *arg); + +#endif + +#endif + + diff --git a/xen-2.4.16/include/scsi/scsicam.h b/xen-2.4.16/include/scsi/scsicam.h new file mode 100644 index 0000000000..13e9378f55 --- /dev/null +++ b/xen-2.4.16/include/scsi/scsicam.h @@ -0,0 +1,19 @@ +/* + * scsicam.h - SCSI CAM support functions, use for HDIO_GETGEO, etc. + * + * Copyright 1993, 1994 Drew Eckhardt + * Visionary Computing + * (Unix and Linux consulting and custom programming) + * drew@Colorado.EDU + * +1 (303) 786-7975 + * + * For more information, please consult the SCSI-CAM draft. + */ + +#ifndef SCSICAM_H +#define SCSICAM_H +#include +extern int scsicam_bios_param (Disk *disk, kdev_t dev, int *ip); +extern int scsi_partsize(struct buffer_head *bh, unsigned long capacity, + unsigned int *cyls, unsigned int *hds, unsigned int *secs); +#endif /* def SCSICAM_H */ diff --git a/xen-2.4.16/include/scsi/sg.h b/xen-2.4.16/include/scsi/sg.h new file mode 100644 index 0000000000..ccb47c88bb --- /dev/null +++ b/xen-2.4.16/include/scsi/sg.h @@ -0,0 +1,330 @@ +#ifndef _SCSI_GENERIC_H +#define _SCSI_GENERIC_H + +/* + History: + Started: Aug 9 by Lawrence Foard (entropy@world.std.com), to allow user + process control of SCSI devices. + Development Sponsored by Killy Corp. NY NY +Original driver (sg.h): +* Copyright (C) 1992 Lawrence Foard +Version 2 and 3 extensions to driver: +* Copyright (C) 1998 - 2002 Douglas Gilbert + + Version: 3.1.23 (20020318) + This version is for 2.4 series kernels. + + Changes since 3.1.22 (20011208) + - change EACCES to EPERM when O_RDONLY is insufficient + - suppress newlines in host string ( /proc/scsi/sg/host_strs output) + - fix xfer direction, old interface, short reply_len [Travers Carter] + Changes since 3.1.21 (20011029) + - add support for SG_FLAG_MMAP_IO [permit mmap() on sg devices] + - update documentation pointers in this header + - put KERNEL_VERSION macros around code that breaks early 2.4 series + - fix use count for multiple queued requests on closed fd + - switch back to alloc_kiovec() + Changes since 3.1.20 (20010814) + - use alloc_kiovec_sz() to speed dio [set num_buffer_heads==0] + - changes to cope with larger scatter gather element sizes + - clean up some printk()s + - add MODULE_LICENSE("GPL") [in a 3.1.20 subversion] + - fix race around generic_unplug_device() [in a 3.1.20 subversion] + Changes since 3.1.19 (20010623) + - add SG_GET_ACCESS_COUNT ioctl + - make open() increment and close() decrement access_count + - only register first 256 devices, reject subsequent devices + Changes since 3.1.18 (20010505) + - fix bug that caused long wait when large buffer requested + - fix leak in error case of sg_new_read() [report: Eric Barton] + - add 'online' column to /proc/scsi/sg/devices + Changes since 3.1.17 (20000921) + - add CAP_SYS_RAWIO capability for sensitive stuff + - compile in dio stuff, procfs 'allow_dio' defaulted off (0) + - make premature close and detach more robust + - lun masked into commands <= SCSI_2 + - poll() and async notification now yield POLL_HUP on detach + - various 3rd party tweaks tracking lk 2.4 internal changes + +Map of SG verions to the Linux kernels in which they appear: + ---------- ---------------------------------- + original all kernels < 2.2.6 + 2.1.40 2.2.20 + 3.0.x optional version 3 sg driver for 2.2 series + 3.1.17++ 2.4.0++ + +Major new features in SG 3.x driver (cf SG 2.x drivers) + - SG_IO ioctl() combines function if write() and read() + - new interface (sg_io_hdr_t) but still supports old interface + - scatter/gather in user space, direct IO, and mmap supported + + The normal action of this driver is to use the adapter (HBA) driver to DMA + data into kernel buffers and then use the CPU to copy the data into the + user space (vice versa for writes). That is called "indirect" IO due to + the double handling of data. There are two methods offered to remove the + redundant copy: 1) direct IO which uses the kernel kiobuf mechanism and + 2) using the mmap() system call to map the reserve buffer (this driver has + one reserve buffer per fd) into the user space. Both have their advantages. + In terms of absolute speed mmap() is faster. If speed is not a concern, + indirect IO should be fine. Read the documentation for more information. + + ** N.B. To use direct IO 'echo 1 > /proc/scsi/sg/allow_dio' may be + needed. That pseudo file's content is defaulted to 0. ** + + Historical note: this SCSI pass-through driver has been known as "sg" for + a decade. In broader kernel discussions "sg" is used to refer to scatter + gather techniques. The context should clarify which "sg" is referred to. + + Documentation + ============= + A web site for the SG device driver can be found at: + http://www.torque.net/sg [alternatively check the MAINTAINERS file] + The documentation for the sg version 3 driver can be found at: + http://www.torque.net/sg/p/sg_v3_ho.html + This is a rendering from DocBook source [change the extension to "sgml" + or "xml"]. There are renderings in "ps", "pdf", "rtf" and "txt" (soon). + + The older, version 2 documents discuss the original sg interface in detail: + http://www.torque.net/sg/p/scsi-generic.txt + http://www.torque.net/sg/p/scsi-generic_long.txt + A version of this document (potentially out of date) may also be found in + the kernel source tree, probably at: + /usr/src/linux/Documentation/scsi-generic.txt . + + Utility and test programs are available at the sg web site. They are + bundled as sg_utils (for the lk 2.2 series) and sg3_utils (for the + lk 2.4 series). + + There is a HOWTO on the Linux SCSI subsystem in the lk 2.4 series at: + http://www.linuxdoc.org/HOWTO/SCSI-2.4-HOWTO +*/ + + +/* New interface introduced in the 3.x SG drivers follows */ + +typedef struct sg_iovec /* same structure as used by readv() Linux system */ +{ /* call. It defines one scatter-gather element. */ + void * iov_base; /* Starting address */ + size_t iov_len; /* Length in bytes */ +} sg_iovec_t; + + +typedef struct sg_io_hdr +{ + int interface_id; /* [i] 'S' for SCSI generic (required) */ + int dxfer_direction; /* [i] data transfer direction */ + unsigned char cmd_len; /* [i] SCSI command length ( <= 16 bytes) */ + unsigned char mx_sb_len; /* [i] max length to write to sbp */ + unsigned short iovec_count; /* [i] 0 implies no scatter gather */ + unsigned int dxfer_len; /* [i] byte count of data transfer */ + void * dxferp; /* [i], [*io] points to data transfer memory + or scatter gather list */ + unsigned char * cmdp; /* [i], [*i] points to command to perform */ + unsigned char * sbp; /* [i], [*o] points to sense_buffer memory */ + unsigned int timeout; /* [i] MAX_UINT->no timeout (unit: millisec) */ + unsigned int flags; /* [i] 0 -> default, see SG_FLAG... */ + int pack_id; /* [i->o] unused internally (normally) */ + void * usr_ptr; /* [i->o] unused internally */ + unsigned char status; /* [o] scsi status */ + unsigned char masked_status;/* [o] shifted, masked scsi status */ + unsigned char msg_status; /* [o] messaging level data (optional) */ + unsigned char sb_len_wr; /* [o] byte count actually written to sbp */ + unsigned short host_status; /* [o] errors from host adapter */ + unsigned short driver_status;/* [o] errors from software driver */ + int resid; /* [o] dxfer_len - actual_transferred */ + unsigned int duration; /* [o] time taken by cmd (unit: millisec) */ + unsigned int info; /* [o] auxiliary information */ +} sg_io_hdr_t; /* 64 bytes long (on i386) */ + +/* Use negative values to flag difference from original sg_header structure */ +#define SG_DXFER_NONE (-1) /* e.g. a SCSI Test Unit Ready command */ +#define SG_DXFER_TO_DEV (-2) /* e.g. a SCSI WRITE command */ +#define SG_DXFER_FROM_DEV (-3) /* e.g. a SCSI READ command */ +#define SG_DXFER_TO_FROM_DEV (-4) /* treated like SG_DXFER_FROM_DEV with the + additional property than during indirect + IO the user buffer is copied into the + kernel buffers before the transfer */ +#define SG_DXFER_UNKNOWN (-5) /* Unknown data direction */ + +/* following flag values can be "or"-ed together */ +#define SG_FLAG_DIRECT_IO 1 /* default is indirect IO */ +#define SG_FLAG_LUN_INHIBIT 2 /* default is overwrite lun in SCSI */ + /* command block (when <= SCSI_2) */ +#define SG_FLAG_MMAP_IO 4 /* request memory mapped IO */ +#define SG_FLAG_NO_DXFER 0x10000 /* no transfer of kernel buffers to/from */ + /* user space (debug indirect IO) */ + +/* following 'info' values are "or"-ed together */ +#define SG_INFO_OK_MASK 0x1 +#define SG_INFO_OK 0x0 /* no sense, host nor driver "noise" */ +#define SG_INFO_CHECK 0x1 /* something abnormal happened */ + +#define SG_INFO_DIRECT_IO_MASK 0x6 +#define SG_INFO_INDIRECT_IO 0x0 /* data xfer via kernel buffers (or no xfer) */ +#define SG_INFO_DIRECT_IO 0x2 /* direct IO requested and performed */ +#define SG_INFO_MIXED_IO 0x4 /* part direct, part indirect IO */ + + +typedef struct sg_scsi_id { /* used by SG_GET_SCSI_ID ioctl() */ + int host_no; /* as in "scsi" where 'n' is one of 0, 1, 2 etc */ + int channel; + int scsi_id; /* scsi id of target device */ + int lun; + int scsi_type; /* TYPE_... defined in scsi/scsi.h */ + short h_cmd_per_lun;/* host (adapter) maximum commands per lun */ + short d_queue_depth;/* device (or adapter) maximum queue length */ + int unused[2]; /* probably find a good use, set 0 for now */ +} sg_scsi_id_t; /* 32 bytes long on i386 */ + +typedef struct sg_req_info { /* used by SG_GET_REQUEST_TABLE ioctl() */ + char req_state; /* 0 -> not used, 1 -> written, 2 -> ready to read */ + char orphan; /* 0 -> normal request, 1 -> from interruped SG_IO */ + char sg_io_owned; /* 0 -> complete with read(), 1 -> owned by SG_IO */ + char problem; /* 0 -> no problem detected, 1 -> error to report */ + int pack_id; /* pack_id associated with request */ + void * usr_ptr; /* user provided pointer (in new interface) */ + unsigned int duration; /* millisecs elapsed since written (req_state==1) + or request duration (req_state==2) */ + int unused; +} sg_req_info_t; /* 20 bytes long on i386 */ + + +/* IOCTLs: Those ioctls that are relevant to the SG 3.x drivers follow. + [Those that only apply to the SG 2.x drivers are at the end of the file.] + (_GET_s yield result via 'int *' 3rd argument unless otherwise indicated) */ + +#define SG_EMULATED_HOST 0x2203 /* true for emulated host adapter (ATAPI) */ + +/* Used to configure SCSI command transformation layer for ATAPI devices */ +/* Only supported by the ide-scsi driver */ +#define SG_SET_TRANSFORM 0x2204 /* N.B. 3rd arg is not pointer but value: */ + /* 3rd arg = 0 to disable transform, 1 to enable it */ +#define SG_GET_TRANSFORM 0x2205 + +#define SG_SET_RESERVED_SIZE 0x2275 /* request a new reserved buffer size */ +#define SG_GET_RESERVED_SIZE 0x2272 /* actual size of reserved buffer */ + +/* The following ioctl has a 'sg_scsi_id_t *' object as its 3rd argument. */ +#define SG_GET_SCSI_ID 0x2276 /* Yields fd's bus, chan, dev, lun + type */ +/* SCSI id information can also be obtained from SCSI_IOCTL_GET_IDLUN */ + +/* Override host setting and always DMA using low memory ( <16MB on i386) */ +#define SG_SET_FORCE_LOW_DMA 0x2279 /* 0-> use adapter setting, 1-> force */ +#define SG_GET_LOW_DMA 0x227a /* 0-> use all ram for dma; 1-> low dma ram */ + +/* When SG_SET_FORCE_PACK_ID set to 1, pack_id is input to read() which + tries to fetch a packet with a matching pack_id, waits, or returns EAGAIN. + If pack_id is -1 then read oldest waiting. When ...FORCE_PACK_ID set to 0 + then pack_id ignored by read() and oldest readable fetched. */ +#define SG_SET_FORCE_PACK_ID 0x227b +#define SG_GET_PACK_ID 0x227c /* Yields oldest readable pack_id (or -1) */ + +#define SG_GET_NUM_WAITING 0x227d /* Number of commands awaiting read() */ + +/* Yields max scatter gather tablesize allowed by current host adapter */ +#define SG_GET_SG_TABLESIZE 0x227F /* 0 implies can't do scatter gather */ + +#define SG_GET_VERSION_NUM 0x2282 /* Example: version 2.1.34 yields 20134 */ + +/* Returns -EBUSY if occupied. 3rd argument pointer to int (see next) */ +#define SG_SCSI_RESET 0x2284 +/* Associated values that can be given to SG_SCSI_RESET follow */ +#define SG_SCSI_RESET_NOTHING 0 +#define SG_SCSI_RESET_DEVICE 1 +#define SG_SCSI_RESET_BUS 2 +#define SG_SCSI_RESET_HOST 3 + +/* synchronous SCSI command ioctl, (only in version 3 interface) */ +#define SG_IO 0x2285 /* similar effect as write() followed by read() */ + +#define SG_GET_REQUEST_TABLE 0x2286 /* yields table of active requests */ + +/* How to treat EINTR during SG_IO ioctl(), only in SG 3.x series */ +#define SG_SET_KEEP_ORPHAN 0x2287 /* 1 -> hold for read(), 0 -> drop (def) */ +#define SG_GET_KEEP_ORPHAN 0x2288 + +/* yields scsi midlevel's access_count for this SCSI device */ +#define SG_GET_ACCESS_COUNT 0x2289 + + +#define SG_SCATTER_SZ (8 * 4096) /* PAGE_SIZE not available to user */ +/* Largest size (in bytes) a single scatter-gather list element can have. + The value must be a power of 2 and <= (PAGE_SIZE * 32) [131072 bytes on + i386]. The minimum value is PAGE_SIZE. If scatter-gather not supported + by adapter then this value is the largest data block that can be + read/written by a single scsi command. The user can find the value of + PAGE_SIZE by calling getpagesize() defined in unistd.h . */ + +#define SG_DEFAULT_RETRIES 1 + +/* Defaults, commented if they differ from original sg driver */ +#define SG_DEF_FORCE_LOW_DMA 0 /* was 1 -> memory below 16MB on i386 */ +#define SG_DEF_FORCE_PACK_ID 0 +#define SG_DEF_KEEP_ORPHAN 0 +#define SG_DEF_RESERVED_SIZE SG_SCATTER_SZ /* load time option */ + +/* maximum outstanding requests, write() yields EDOM if exceeded */ +#define SG_MAX_QUEUE 16 + +#define SG_BIG_BUFF SG_DEF_RESERVED_SIZE /* for backward compatibility */ + +/* Alternate style type names, "..._t" variants preferred */ +typedef struct sg_io_hdr Sg_io_hdr; +typedef struct sg_io_vec Sg_io_vec; +typedef struct sg_scsi_id Sg_scsi_id; +typedef struct sg_req_info Sg_req_info; + + +/* vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv */ +/* The older SG interface based on the 'sg_header' structure follows. */ +/* ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ */ + +#define SG_MAX_SENSE 16 /* this only applies to the sg_header interface */ + +struct sg_header +{ + int pack_len; /* [o] reply_len (ie useless), ignored as input */ + int reply_len; /* [i] max length of expected reply (inc. sg_header) */ + int pack_id; /* [io] id number of packet (use ints >= 0) */ + int result; /* [o] 0==ok, else (+ve) Unix errno (best ignored) */ + unsigned int twelve_byte:1; + /* [i] Force 12 byte command length for group 6 & 7 commands */ + unsigned int target_status:5; /* [o] scsi status from target */ + unsigned int host_status:8; /* [o] host status (see "DID" codes) */ + unsigned int driver_status:8; /* [o] driver status+suggestion */ + unsigned int other_flags:10; /* unused */ + unsigned char sense_buffer[SG_MAX_SENSE]; /* [o] Output in 3 cases: + when target_status is CHECK_CONDITION or + when target_status is COMMAND_TERMINATED or + when (driver_status & DRIVER_SENSE) is true. */ +}; /* This structure is 36 bytes long on i386 */ + + +/* IOCTLs: The following are not required (or ignored) when the sg_io_hdr_t + interface is used. They are kept for backward compatibility with + the original and version 2 drivers. */ + +#define SG_SET_TIMEOUT 0x2201 /* unit: jiffies (10ms on i386) */ +#define SG_GET_TIMEOUT 0x2202 /* yield timeout as _return_ value */ + +/* Get/set command queuing state per fd (default is SG_DEF_COMMAND_Q. + Each time a sg_io_hdr_t object is seen on this file descriptor, this + command queuing flag is set on (overriding the previous setting). */ +#define SG_GET_COMMAND_Q 0x2270 /* Yields 0 (queuing off) or 1 (on) */ +#define SG_SET_COMMAND_Q 0x2271 /* Change queuing state with 0 or 1 */ + +/* Turn on/off error sense trace (1 and 0 respectively, default is off). + Try using: "# cat /proc/scsi/sg/debug" instead in the v3 driver */ +#define SG_SET_DEBUG 0x227e /* 0 -> turn off debug */ + +#define SG_NEXT_CMD_LEN 0x2283 /* override SCSI command length with given + number on the next write() on this file descriptor */ + + +/* Defaults, commented if they differ from original sg driver */ +#define SG_DEFAULT_TIMEOUT (60*HZ) /* HZ == 'jiffies in 1 second' */ +#define SG_DEF_COMMAND_Q 0 /* command queuing is always on when + the new interface is used */ +#define SG_DEF_UNDERRUN_FLAG 0 + +#endif diff --git a/xen-2.4.16/include/xeno/blk.h b/xen-2.4.16/include/xeno/blk.h new file mode 100644 index 0000000000..bc3f5548c6 --- /dev/null +++ b/xen-2.4.16/include/xeno/blk.h @@ -0,0 +1,409 @@ +#ifndef _BLK_H +#define _BLK_H + +#include +/*#include */ +#include +#include + +/* + * Spinlock for protecting the request queue which + * is mucked around with in interrupts on potentially + * multiple CPU's.. + */ +extern spinlock_t io_request_lock; + +/* + * Initialization functions. + */ +extern int isp16_init(void); +extern int cdu31a_init(void); +extern int acsi_init(void); +extern int mcd_init(void); +extern int mcdx_init(void); +extern int sbpcd_init(void); +extern int aztcd_init(void); +extern int sony535_init(void); +extern int gscd_init(void); +extern int cm206_init(void); +extern int optcd_init(void); +extern int sjcd_init(void); +extern int cdi_init(void); +extern int hd_init(void); +extern int ide_init(void); +extern int xd_init(void); +extern int mfm_init(void); +extern int loop_init(void); +extern int md_init(void); +extern int ap_init(void); +extern int ddv_init(void); +extern int z2_init(void); +extern int swim3_init(void); +extern int swimiop_init(void); +extern int amiga_floppy_init(void); +extern int atari_floppy_init(void); +extern int ez_init(void); +extern int bpcd_init(void); +extern int ps2esdi_init(void); +extern int jsfd_init(void); +extern int viodasd_init(void); +extern int viocd_init(void); + +#if defined(CONFIG_ARCH_S390) +extern int dasd_init(void); +extern int xpram_init(void); +extern int tapeblock_init(void); +#endif /* CONFIG_ARCH_S390 */ + +extern void set_device_ro(kdev_t dev,int flag); +#if 0 +void add_blkdev_randomness(int major); +#else +#define add_blkdev_randomness(_major) ((void)0) +#endif + +extern int floppy_init(void); +extern int rd_doload; /* 1 = load ramdisk, 0 = don't load */ +extern int rd_prompt; /* 1 = prompt for ramdisk, 0 = don't prompt */ +extern int rd_image_start; /* starting block # of image */ + +#ifdef CONFIG_BLK_DEV_INITRD + +#define INITRD_MINOR 250 /* shouldn't collide with /dev/ram* too soon ... */ + +extern unsigned long initrd_start,initrd_end; +extern int initrd_below_start_ok; /* 1 if it is not an error if initrd_start < memory_start */ +void initrd_init(void); + +#endif + + +/* + * end_request() and friends. Must be called with the request queue spinlock + * acquired. All functions called within end_request() _must_be_ atomic. + * + * Several drivers define their own end_request and call + * end_that_request_first() and end_that_request_last() + * for parts of the original function. This prevents + * code duplication in drivers. + */ + +static inline void blkdev_dequeue_request(struct request * req) +{ + list_del(&req->queue); +} + +int end_that_request_first(struct request *req, int uptodate, char *name); +void end_that_request_last(struct request *req); + +#if defined(MAJOR_NR) || defined(IDE_DRIVER) + +#undef DEVICE_ON +#undef DEVICE_OFF + +/* + * Add entries as needed. + */ + +#ifdef IDE_DRIVER + +#define DEVICE_NR(device) (MINOR(device) >> PARTN_BITS) +#define DEVICE_NAME "ide" + +#elif (MAJOR_NR == RAMDISK_MAJOR) + +/* ram disk */ +#define DEVICE_NAME "ramdisk" +#define DEVICE_NR(device) (MINOR(device)) +#define DEVICE_NO_RANDOM + +#elif (MAJOR_NR == Z2RAM_MAJOR) + +/* Zorro II Ram */ +#define DEVICE_NAME "Z2RAM" +#define DEVICE_REQUEST do_z2_request +#define DEVICE_NR(device) (MINOR(device)) + +#elif (MAJOR_NR == FLOPPY_MAJOR) + +static void floppy_off(unsigned int nr); + +#define DEVICE_NAME "floppy" +#define DEVICE_INTR do_floppy +#define DEVICE_REQUEST do_fd_request +#define DEVICE_NR(device) ( (MINOR(device) & 3) | ((MINOR(device) & 0x80 ) >> 5 )) +#define DEVICE_OFF(device) floppy_off(DEVICE_NR(device)) + +#elif (MAJOR_NR == HD_MAJOR) + +/* Hard disk: timeout is 6 seconds. */ +#define DEVICE_NAME "hard disk" +#define DEVICE_INTR do_hd +#define TIMEOUT_VALUE (6*HZ) +#define DEVICE_REQUEST do_hd_request +#define DEVICE_NR(device) (MINOR(device)>>6) + +#elif (SCSI_DISK_MAJOR(MAJOR_NR)) + +#define DEVICE_NAME "scsidisk" +#define TIMEOUT_VALUE (2*HZ) +#define DEVICE_NR(device) (((MAJOR(device) & SD_MAJOR_MASK) << (8 - 4)) + (MINOR(device) >> 4)) + +/* Kludge to use the same number for both char and block major numbers */ +#elif (MAJOR_NR == MD_MAJOR) && defined(MD_DRIVER) + +#define DEVICE_NAME "Multiple devices driver" +#define DEVICE_REQUEST do_md_request +#define DEVICE_NR(device) (MINOR(device)) + +#elif (MAJOR_NR == SCSI_TAPE_MAJOR) + +#define DEVICE_NAME "scsitape" +#define DEVICE_INTR do_st +#define DEVICE_NR(device) (MINOR(device) & 0x7f) + +#elif (MAJOR_NR == OSST_MAJOR) + +#define DEVICE_NAME "onstream" +#define DEVICE_INTR do_osst +#define DEVICE_NR(device) (MINOR(device) & 0x7f) +#define DEVICE_ON(device) +#define DEVICE_OFF(device) + +#elif (MAJOR_NR == SCSI_CDROM_MAJOR) + +#define DEVICE_NAME "CD-ROM" +#define DEVICE_NR(device) (MINOR(device)) + +#elif (MAJOR_NR == XT_DISK_MAJOR) + +#define DEVICE_NAME "xt disk" +#define DEVICE_REQUEST do_xd_request +#define DEVICE_NR(device) (MINOR(device) >> 6) + +#elif (MAJOR_NR == PS2ESDI_MAJOR) + +#define DEVICE_NAME "PS/2 ESDI" +#define DEVICE_REQUEST do_ps2esdi_request +#define DEVICE_NR(device) (MINOR(device) >> 6) + +#elif (MAJOR_NR == CDU31A_CDROM_MAJOR) + +#define DEVICE_NAME "CDU31A" +#define DEVICE_REQUEST do_cdu31a_request +#define DEVICE_NR(device) (MINOR(device)) + +#elif (MAJOR_NR == ACSI_MAJOR) && (defined(CONFIG_ATARI_ACSI) || defined(CONFIG_ATARI_ACSI_MODULE)) + +#define DEVICE_NAME "ACSI" +#define DEVICE_INTR do_acsi +#define DEVICE_REQUEST do_acsi_request +#define DEVICE_NR(device) (MINOR(device) >> 4) + +#elif (MAJOR_NR == MITSUMI_CDROM_MAJOR) + +#define DEVICE_NAME "Mitsumi CD-ROM" +/* #define DEVICE_INTR do_mcd */ +#define DEVICE_REQUEST do_mcd_request +#define DEVICE_NR(device) (MINOR(device)) + +#elif (MAJOR_NR == MITSUMI_X_CDROM_MAJOR) + +#define DEVICE_NAME "Mitsumi CD-ROM" +/* #define DEVICE_INTR do_mcdx */ +#define DEVICE_REQUEST do_mcdx_request +#define DEVICE_NR(device) (MINOR(device)) + +#elif (MAJOR_NR == MATSUSHITA_CDROM_MAJOR) + +#define DEVICE_NAME "Matsushita CD-ROM controller #1" +#define DEVICE_REQUEST do_sbpcd_request +#define DEVICE_NR(device) (MINOR(device)) + +#elif (MAJOR_NR == MATSUSHITA_CDROM2_MAJOR) + +#define DEVICE_NAME "Matsushita CD-ROM controller #2" +#define DEVICE_REQUEST do_sbpcd2_request +#define DEVICE_NR(device) (MINOR(device)) + +#elif (MAJOR_NR == MATSUSHITA_CDROM3_MAJOR) + +#define DEVICE_NAME "Matsushita CD-ROM controller #3" +#define DEVICE_REQUEST do_sbpcd3_request +#define DEVICE_NR(device) (MINOR(device)) + +#elif (MAJOR_NR == MATSUSHITA_CDROM4_MAJOR) + +#define DEVICE_NAME "Matsushita CD-ROM controller #4" +#define DEVICE_REQUEST do_sbpcd4_request +#define DEVICE_NR(device) (MINOR(device)) + +#elif (MAJOR_NR == AZTECH_CDROM_MAJOR) + +#define DEVICE_NAME "Aztech CD-ROM" +#define DEVICE_REQUEST do_aztcd_request +#define DEVICE_NR(device) (MINOR(device)) + +#elif (MAJOR_NR == CDU535_CDROM_MAJOR) + +#define DEVICE_NAME "SONY-CDU535" +#define DEVICE_INTR do_cdu535 +#define DEVICE_REQUEST do_cdu535_request +#define DEVICE_NR(device) (MINOR(device)) + +#elif (MAJOR_NR == GOLDSTAR_CDROM_MAJOR) + +#define DEVICE_NAME "Goldstar R420" +#define DEVICE_REQUEST do_gscd_request +#define DEVICE_NR(device) (MINOR(device)) + +#elif (MAJOR_NR == CM206_CDROM_MAJOR) +#define DEVICE_NAME "Philips/LMS CD-ROM cm206" +#define DEVICE_REQUEST do_cm206_request +#define DEVICE_NR(device) (MINOR(device)) + +#elif (MAJOR_NR == OPTICS_CDROM_MAJOR) + +#define DEVICE_NAME "DOLPHIN 8000AT CD-ROM" +#define DEVICE_REQUEST do_optcd_request +#define DEVICE_NR(device) (MINOR(device)) + +#elif (MAJOR_NR == SANYO_CDROM_MAJOR) + +#define DEVICE_NAME "Sanyo H94A CD-ROM" +#define DEVICE_REQUEST do_sjcd_request +#define DEVICE_NR(device) (MINOR(device)) + +#elif (MAJOR_NR == APBLOCK_MAJOR) + +#define DEVICE_NAME "apblock" +#define DEVICE_REQUEST ap_request +#define DEVICE_NR(device) (MINOR(device)) + +#elif (MAJOR_NR == DDV_MAJOR) + +#define DEVICE_NAME "ddv" +#define DEVICE_REQUEST ddv_request +#define DEVICE_NR(device) (MINOR(device)>>PARTN_BITS) + +#elif (MAJOR_NR == MFM_ACORN_MAJOR) + +#define DEVICE_NAME "mfm disk" +#define DEVICE_INTR do_mfm +#define DEVICE_REQUEST do_mfm_request +#define DEVICE_NR(device) (MINOR(device) >> 6) + +#elif (MAJOR_NR == NBD_MAJOR) + +#define DEVICE_NAME "nbd" +#define DEVICE_REQUEST do_nbd_request +#define DEVICE_NR(device) (MINOR(device)) + +#elif (MAJOR_NR == MDISK_MAJOR) + +#define DEVICE_NAME "mdisk" +#define DEVICE_REQUEST mdisk_request +#define DEVICE_NR(device) (MINOR(device)) + +#elif (MAJOR_NR == DASD_MAJOR) + +#define DEVICE_NAME "dasd" +#define DEVICE_REQUEST do_dasd_request +#define DEVICE_NR(device) (MINOR(device) >> PARTN_BITS) + +#elif (MAJOR_NR == I2O_MAJOR) + +#define DEVICE_NAME "I2O block" +#define DEVICE_REQUEST i2ob_request +#define DEVICE_NR(device) (MINOR(device)>>4) + +#elif (MAJOR_NR == COMPAQ_SMART2_MAJOR) + +#define DEVICE_NAME "ida" +#define TIMEOUT_VALUE (25*HZ) +#define DEVICE_REQUEST do_ida_request +#define DEVICE_NR(device) (MINOR(device) >> 4) + +#endif /* MAJOR_NR == whatever */ + +/* provide DEVICE_xxx defaults, if not explicitly defined + * above in the MAJOR_NR==xxx if-elif tree */ +#ifndef DEVICE_ON +#define DEVICE_ON(device) do {} while (0) +#endif +#ifndef DEVICE_OFF +#define DEVICE_OFF(device) do {} while (0) +#endif + +#if (MAJOR_NR != SCSI_TAPE_MAJOR) && (MAJOR_NR != OSST_MAJOR) +#if !defined(IDE_DRIVER) + +#ifndef CURRENT +#define CURRENT blkdev_entry_next_request(&blk_dev[MAJOR_NR].request_queue.queue_head) +#endif +#ifndef QUEUE_EMPTY +#define QUEUE_EMPTY list_empty(&blk_dev[MAJOR_NR].request_queue.queue_head) +#endif + +#ifndef DEVICE_NAME +#define DEVICE_NAME "unknown" +#endif + +#define CURRENT_DEV DEVICE_NR(CURRENT->rq_dev) + +#ifdef DEVICE_INTR +static void (*DEVICE_INTR)(void) = NULL; +#endif + +#define SET_INTR(x) (DEVICE_INTR = (x)) + +#ifdef DEVICE_REQUEST +static void (DEVICE_REQUEST)(request_queue_t *); +#endif + +#ifdef DEVICE_INTR +#define CLEAR_INTR SET_INTR(NULL) +#else +#define CLEAR_INTR +#endif + +#define INIT_REQUEST \ + if (QUEUE_EMPTY) {\ + CLEAR_INTR; \ + return; \ + } \ + if (MAJOR(CURRENT->rq_dev) != MAJOR_NR) \ + panic(DEVICE_NAME ": request list destroyed"); \ + if (CURRENT->bh) { \ + if (!buffer_locked(CURRENT->bh)) \ + panic(DEVICE_NAME ": block not locked"); \ + } + +#endif /* !defined(IDE_DRIVER) */ + + +#ifndef LOCAL_END_REQUEST /* If we have our own end_request, we do not want to include this mess */ + +#if ! SCSI_BLK_MAJOR(MAJOR_NR) && (MAJOR_NR != COMPAQ_SMART2_MAJOR) + +static inline void end_request(int uptodate) { + struct request *req = CURRENT; + + if (end_that_request_first(req, uptodate, DEVICE_NAME)) + return; + +#ifndef DEVICE_NO_RANDOM + add_blkdev_randomness(MAJOR(req->rq_dev)); +#endif + DEVICE_OFF(req->rq_dev); + blkdev_dequeue_request(req); + end_that_request_last(req); +} + +#endif /* ! SCSI_BLK_MAJOR(MAJOR_NR) */ +#endif /* LOCAL_END_REQUEST */ + +#endif /* (MAJOR_NR != SCSI_TAPE_MAJOR) */ +#endif /* defined(MAJOR_NR) || defined(IDE_DRIVER) */ + +#endif /* _BLK_H */ diff --git a/xen-2.4.16/include/xeno/blkdev.h b/xen-2.4.16/include/xeno/blkdev.h new file mode 100644 index 0000000000..2701eb84e3 --- /dev/null +++ b/xen-2.4.16/include/xeno/blkdev.h @@ -0,0 +1,375 @@ +#ifndef _LINUX_BLKDEV_H +#define _LINUX_BLKDEV_H + +#include +#include +#include +#include +#include + +/* Some defines from fs.h that may actually be useful to the blkdev layer. */ +#define READ 0 +#define WRITE 1 +#define READA 2 +#define BLOCK_SIZE_BITS 10 +#define BLOCK_SIZE (1<b_state & (1UL << BH_##state)) != 0) + +#define buffer_uptodate(bh) __buffer_state(bh,Uptodate) +#define buffer_dirty(bh) __buffer_state(bh,Dirty) +#define buffer_locked(bh) __buffer_state(bh,Lock) +#define buffer_req(bh) __buffer_state(bh,Req) +#define buffer_mapped(bh) __buffer_state(bh,Mapped) +#define buffer_new(bh) __buffer_state(bh,New) +#define buffer_async(bh) __buffer_state(bh,Async) +#define buffer_launder(bh) __buffer_state(bh,Launder) + +#define bh_offset(bh) ((unsigned long)(bh)->b_data & ~PAGE_MASK) + +extern void set_bh_page(struct buffer_head *bh, struct pfn_info *page, unsigned long offset); + +#define touch_buffer(bh) mark_page_accessed(bh->b_page) + +#define atomic_set_buffer_clean(bh) test_and_clear_bit(BH_Dirty, &(bh)->b_state) + +static inline void __mark_buffer_clean(struct buffer_head *bh) +{ + panic("__mark_buffer_clean"); +} + +static inline void mark_buffer_clean(struct buffer_head * bh) +{ + if (atomic_set_buffer_clean(bh)) + __mark_buffer_clean(bh); +} + +static inline void buffer_IO_error(struct buffer_head * bh) +{ + mark_buffer_clean(bh); + /* + * b_end_io has to clear the BH_Uptodate bitflag in the error case! + */ + bh->b_end_io(bh, 0); +} + +/**** XXX END OF BUFFER_HEAD STUFF XXXX ****/ + +#include +#include +#include +#include +#include + +struct request_queue; +typedef struct request_queue request_queue_t; +struct elevator_s; +typedef struct elevator_s elevator_t; + +/* + * Ok, this is an expanded form so that we can use the same + * request for paging requests. + */ +struct request { + struct list_head queue; + int elevator_sequence; + + volatile int rq_status; /* should split this into a few status bits */ +#define RQ_INACTIVE (-1) +#define RQ_ACTIVE 1 +#define RQ_SCSI_BUSY 0xffff +#define RQ_SCSI_DONE 0xfffe +#define RQ_SCSI_DISCONNECTING 0xffe0 + + kdev_t rq_dev; + int cmd; /* READ or WRITE */ + int errors; + unsigned long start_time; + unsigned long sector; + unsigned long nr_sectors; + unsigned long hard_sector, hard_nr_sectors; + unsigned int nr_segments; + unsigned int nr_hw_segments; + unsigned long current_nr_sectors; + void * special; + char * buffer; + struct completion * waiting; + struct buffer_head * bh; + struct buffer_head * bhtail; + request_queue_t *q; +}; + +#include + +typedef int (merge_request_fn) (request_queue_t *q, + struct request *req, + struct buffer_head *bh, + int); +typedef int (merge_requests_fn) (request_queue_t *q, + struct request *req, + struct request *req2, + int); +typedef void (request_fn_proc) (request_queue_t *q); +typedef request_queue_t * (queue_proc) (kdev_t dev); +typedef int (make_request_fn) (request_queue_t *q, int rw, struct buffer_head *bh); +typedef void (plug_device_fn) (request_queue_t *q, kdev_t device); +typedef void (unplug_device_fn) (void *q); + +/* + * Default nr free requests per queue, ll_rw_blk will scale it down + * according to available RAM at init time + */ +#define QUEUE_NR_REQUESTS 8192 + +struct request_list { + unsigned int count; + struct list_head free; +}; + +struct request_queue +{ + /* + * the queue request freelist, one for reads and one for writes + */ + struct request_list rq[2]; + + /* + * The total number of requests on each queue + */ + int nr_requests; + + /* + * Batching threshold for sleep/wakeup decisions + */ + int batch_requests; + + /* + * Together with queue_head for cacheline sharing + */ + struct list_head queue_head; + elevator_t elevator; + + request_fn_proc * request_fn; + merge_request_fn * back_merge_fn; + merge_request_fn * front_merge_fn; + merge_requests_fn * merge_requests_fn; + make_request_fn * make_request_fn; + plug_device_fn * plug_device_fn; + /* + * The queue owner gets to use this for whatever they like. + * ll_rw_blk doesn't touch it. + */ + void * queuedata; + + /* + * This is used to remove the plug when tq_disk runs. + */ + struct tq_struct plug_tq; + + /* + * Boolean that indicates whether this queue is plugged or not. + */ + char plugged; + + /* + * Boolean that indicates whether current_request is active or + * not. + */ + char head_active; + + /* + * Is meant to protect the queue in the future instead of + * io_request_lock + */ + spinlock_t queue_lock; + +#if 0 + /* + * Tasks wait here for free read and write requests + */ + wait_queue_head_t wait_for_requests[2]; +#endif +}; + +struct blk_dev_struct { + /* + * queue_proc has to be atomic + */ + request_queue_t request_queue; + queue_proc *queue; + void *data; +}; + +struct sec_size { + unsigned block_size; + unsigned block_size_bits; +}; + +/* + * Used to indicate the default queue for drivers that don't bother + * to implement multiple queues. We have this access macro here + * so as to eliminate the need for each and every block device + * driver to know about the internal structure of blk_dev[]. + */ +#define BLK_DEFAULT_QUEUE(_MAJOR) &blk_dev[_MAJOR].request_queue + +extern struct sec_size * blk_sec[MAX_BLKDEV]; +extern struct blk_dev_struct blk_dev[MAX_BLKDEV]; +extern void grok_partitions(struct gendisk *dev, int drive, unsigned minors, long size); +extern void register_disk(struct gendisk *dev, kdev_t first, unsigned minors, struct block_device_operations *ops, long size); +extern void generic_make_request(int rw, struct buffer_head * bh); +extern inline request_queue_t *blk_get_queue(kdev_t dev); +extern void blkdev_release_request(struct request *); + +/* + * Access functions for manipulating queue properties + */ +extern int blk_grow_request_list(request_queue_t *q, int nr_requests); +extern void blk_init_queue(request_queue_t *, request_fn_proc *); +extern void blk_cleanup_queue(request_queue_t *); +extern void blk_queue_headactive(request_queue_t *, int); +extern void blk_queue_make_request(request_queue_t *, make_request_fn *); +extern void generic_unplug_device(void *); + +extern int * blk_size[MAX_BLKDEV]; + +extern int * blksize_size[MAX_BLKDEV]; + +extern int * hardsect_size[MAX_BLKDEV]; + +/*extern int * max_readahead[MAX_BLKDEV];*/ + +extern int * max_sectors[MAX_BLKDEV]; + +extern int * max_segments[MAX_BLKDEV]; + +#define MAX_SEGMENTS 128 +#define MAX_SECTORS 255 + +#define PageAlignSize(size) (((size) + PAGE_SIZE -1) & PAGE_MASK) + +#define blkdev_entry_to_request(entry) list_entry((entry), struct request, queue) +#define blkdev_entry_next_request(entry) blkdev_entry_to_request((entry)->next) +#define blkdev_entry_prev_request(entry) blkdev_entry_to_request((entry)->prev) +#define blkdev_next_request(req) blkdev_entry_to_request((req)->queue.next) +#define blkdev_prev_request(req) blkdev_entry_to_request((req)->queue.prev) + +extern void drive_stat_acct (kdev_t dev, int rw, + unsigned long nr_sectors, int new_io); + +static inline int get_hardsect_size(kdev_t dev) +{ + int retval = 512; + int major = MAJOR(dev); + + if (hardsect_size[major]) { + int minor = MINOR(dev); + if (hardsect_size[major][minor]) + retval = hardsect_size[major][minor]; + } + return retval; +} + +#define blk_finished_io(nsects) do { } while (0) +#define blk_started_io(nsects) do { } while (0) + +static inline unsigned int blksize_bits(unsigned int size) +{ + unsigned int bits = 8; + do { + bits++; + size >>= 1; + } while (size > 256); + return bits; +} + +static inline unsigned int block_size(kdev_t dev) +{ + int retval = BLOCK_SIZE; + int major = MAJOR(dev); + + if (blksize_size[major]) { + int minor = MINOR(dev); + if (blksize_size[major][minor]) + retval = blksize_size[major][minor]; + } + return retval; +} + +#endif diff --git a/xen-2.4.16/include/xeno/blkpg.h b/xen-2.4.16/include/xeno/blkpg.h new file mode 100644 index 0000000000..f4240abaf1 --- /dev/null +++ b/xen-2.4.16/include/xeno/blkpg.h @@ -0,0 +1,64 @@ +#ifndef _LINUX_BLKPG_H +#define _LINUX_BLKPG_H + +/* + * Partition table and disk geometry handling + * + * A single ioctl with lots of subfunctions: + * + * Device number stuff: + * get_whole_disk() (given the device number of a partition, + * find the device number of the encompassing disk) + * get_all_partitions() (given the device number of a disk, return the + * device numbers of all its known partitions) + * + * Partition stuff: + * add_partition() + * delete_partition() + * test_partition_in_use() (also for test_disk_in_use) + * + * Geometry stuff: + * get_geometry() + * set_geometry() + * get_bios_drivedata() + * + * For today, only the partition stuff - aeb, 990515 + */ +#include + +#define BLKPG _IO(0x12,105) + +/* The argument structure */ +struct blkpg_ioctl_arg { + int op; + int flags; + int datalen; + void *data; +}; + +/* The subfunctions (for the op field) */ +#define BLKPG_ADD_PARTITION 1 +#define BLKPG_DEL_PARTITION 2 + +/* Sizes of name fields. Unused at present. */ +#define BLKPG_DEVNAMELTH 64 +#define BLKPG_VOLNAMELTH 64 + +/* The data structure for ADD_PARTITION and DEL_PARTITION */ +struct blkpg_partition { + long long start; /* starting offset in bytes */ + long long length; /* length in bytes */ + int pno; /* partition number */ + char devname[BLKPG_DEVNAMELTH]; /* partition name, like sda5 or c0d1p2, + to be used in kernel messages */ + char volname[BLKPG_VOLNAMELTH]; /* volume label */ +}; + +#ifdef __KERNEL__ + +extern char * partition_name(kdev_t dev); +extern int blk_ioctl(kdev_t dev, unsigned int cmd, unsigned long arg); + +#endif /* __KERNEL__ */ + +#endif /* _LINUX_BLKPG_H */ diff --git a/xen-2.4.16/include/xeno/block.h b/xen-2.4.16/include/xeno/block.h new file mode 100644 index 0000000000..bb80d0987b --- /dev/null +++ b/xen-2.4.16/include/xeno/block.h @@ -0,0 +1,11 @@ +/* block.h + * + * this is the hypervisor end of the block io code. + */ + +#include + +/* vif prototypes */ +blk_ring_t *create_block_ring(int domain); +void destroy_block_ring(struct task_struct *p); + diff --git a/xen-2.4.16/include/xeno/bootmem.h b/xen-2.4.16/include/xeno/bootmem.h new file mode 100644 index 0000000000..5e07d1c407 --- /dev/null +++ b/xen-2.4.16/include/xeno/bootmem.h @@ -0,0 +1,64 @@ +/* + * Discontiguous memory support, Kanoj Sarcar, SGI, Nov 1999 + */ +#ifndef _LINUX_BOOTMEM_H +#define _LINUX_BOOTMEM_H + +//#include + +/* + * simple boot-time physical memory area allocator. + */ + +extern unsigned long max_low_pfn, max_page; +extern unsigned long min_low_pfn; + +#if 0 + +/* + * node_bootmem_map is a map pointer - the bits represent all physical + * memory pages (including holes) on the node. + */ +typedef struct bootmem_data { + unsigned long node_boot_start; + unsigned long node_low_pfn; + void *node_bootmem_map; + unsigned long last_offset; + unsigned long last_pos; +} bootmem_data_t; + +extern unsigned long __init bootmem_bootmap_pages (unsigned long); +extern unsigned long __init init_bootmem (unsigned long addr, unsigned long memend); +extern void __init reserve_bootmem (unsigned long addr, unsigned long size); +extern void __init free_bootmem (unsigned long addr, unsigned long size); +extern void * __init __alloc_bootmem (unsigned long size, unsigned long align, unsigned long goal); +#define alloc_bootmem(x) \ + __alloc_bootmem((x), SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS)) +#define alloc_bootmem_low(x) \ + __alloc_bootmem((x), SMP_CACHE_BYTES, 0) +#define alloc_bootmem_pages(x) \ + __alloc_bootmem((x), PAGE_SIZE, __pa(MAX_DMA_ADDRESS)) +#define alloc_bootmem_low_pages(x) \ + __alloc_bootmem((x), PAGE_SIZE, 0) +extern unsigned long __init free_all_bootmem (void); + +extern unsigned long __init init_bootmem_node (pg_data_t *pgdat, unsigned long freepfn, unsigned long startpfn, unsigned long endpfn); +extern void __init reserve_bootmem_node (pg_data_t *pgdat, unsigned long physaddr, unsigned long size); +extern void __init free_bootmem_node (pg_data_t *pgdat, unsigned long addr, unsigned long size); +extern unsigned long __init free_all_bootmem_node (pg_data_t *pgdat); +extern void * __init __alloc_bootmem_node (pg_data_t *pgdat, unsigned long size, unsigned long align, unsigned long goal); +#define alloc_bootmem_node(pgdat, x) \ + __alloc_bootmem_node((pgdat), (x), SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS)) +#define alloc_bootmem_pages_node(pgdat, x) \ + __alloc_bootmem_node((pgdat), (x), PAGE_SIZE, __pa(MAX_DMA_ADDRESS)) +#define alloc_bootmem_low_pages_node(pgdat, x) \ + __alloc_bootmem_node((pgdat), (x), PAGE_SIZE, 0) + +#else + +extern void __init init_bootmem (unsigned long addr, unsigned long memend); +extern void * __init alloc_bootmem_low_pages(unsigned long size); + +#endif + +#endif /* _LINUX_BOOTMEM_H */ diff --git a/xen-2.4.16/include/xeno/brlock.h b/xen-2.4.16/include/xeno/brlock.h new file mode 100644 index 0000000000..208c457338 --- /dev/null +++ b/xen-2.4.16/include/xeno/brlock.h @@ -0,0 +1,220 @@ +#ifndef __LINUX_BRLOCK_H +#define __LINUX_BRLOCK_H + +/* + * 'Big Reader' read-write spinlocks. + * + * super-fast read/write locks, with write-side penalty. The point + * is to have a per-CPU read/write lock. Readers lock their CPU-local + * readlock, writers must lock all locks to get write access. These + * CPU-read-write locks are semantically identical to normal rwlocks. + * Memory usage is higher as well. (NR_CPUS*L1_CACHE_BYTES bytes) + * + * The most important feature is that these spinlocks do not cause + * cacheline ping-pong in the 'most readonly data' case. + * + * Copyright 2000, Ingo Molnar + * + * Registry idea and naming [ crutial! :-) ] by: + * + * David S. Miller + * + * David has an implementation that doesnt use atomic operations in + * the read branch via memory ordering tricks - i guess we need to + * split this up into a per-arch thing? The atomicity issue is a + * secondary item in profiles, at least on x86 platforms. + * + * The atomic op version overhead is indeed a big deal on + * load-locked/store-conditional cpus (ALPHA/MIPS/PPC) and + * compare-and-swap cpus (Sparc64). So we control which + * implementation to use with a __BRLOCK_USE_ATOMICS define. -DaveM + */ + +/* Register bigreader lock indices here. */ +enum brlock_indices { + BR_GLOBALIRQ_LOCK, + BR_NETPROTO_LOCK, + + __BR_END +}; + +#include + +#ifdef CONFIG_SMP + +#include +#include + +#if defined(__i386__) || defined(__ia64__) || defined(__x86_64__) +#define __BRLOCK_USE_ATOMICS +#else +#undef __BRLOCK_USE_ATOMICS +#endif + +#ifdef __BRLOCK_USE_ATOMICS +typedef rwlock_t brlock_read_lock_t; +#else +typedef unsigned int brlock_read_lock_t; +#endif + +/* + * align last allocated index to the next cacheline: + */ +#define __BR_IDX_MAX \ + (((sizeof(brlock_read_lock_t)*__BR_END + SMP_CACHE_BYTES-1) & ~(SMP_CACHE_BYTES-1)) / sizeof(brlock_read_lock_t)) + +extern brlock_read_lock_t __brlock_array[NR_CPUS][__BR_IDX_MAX]; + +#ifndef __BRLOCK_USE_ATOMICS +struct br_wrlock { + spinlock_t lock; +} __attribute__ ((__aligned__(SMP_CACHE_BYTES))); + +extern struct br_wrlock __br_write_locks[__BR_IDX_MAX]; +#endif + +extern void __br_lock_usage_bug (void); + +#ifdef __BRLOCK_USE_ATOMICS + +static inline void br_read_lock (enum brlock_indices idx) +{ + /* + * This causes a link-time bug message if an + * invalid index is used: + */ + if (idx >= __BR_END) + __br_lock_usage_bug(); + + read_lock(&__brlock_array[smp_processor_id()][idx]); +} + +static inline void br_read_unlock (enum brlock_indices idx) +{ + if (idx >= __BR_END) + __br_lock_usage_bug(); + + read_unlock(&__brlock_array[smp_processor_id()][idx]); +} + +#else /* ! __BRLOCK_USE_ATOMICS */ +static inline void br_read_lock (enum brlock_indices idx) +{ + unsigned int *ctr; + spinlock_t *lock; + + /* + * This causes a link-time bug message if an + * invalid index is used: + */ + if (idx >= __BR_END) + __br_lock_usage_bug(); + + ctr = &__brlock_array[smp_processor_id()][idx]; + lock = &__br_write_locks[idx].lock; +again: + (*ctr)++; + mb(); + if (spin_is_locked(lock)) { + (*ctr)--; + wmb(); /* + * The release of the ctr must become visible + * to the other cpus eventually thus wmb(), + * we don't care if spin_is_locked is reordered + * before the releasing of the ctr. + * However IMHO this wmb() is superflous even in theory. + * It would not be superflous only if on the + * other CPUs doing a ldl_l instead of an ldl + * would make a difference and I don't think this is + * the case. + * I'd like to clarify this issue further + * but for now this is a slow path so adding the + * wmb() will keep us on the safe side. + */ + while (spin_is_locked(lock)) + barrier(); + goto again; + } +} + +static inline void br_read_unlock (enum brlock_indices idx) +{ + unsigned int *ctr; + + if (idx >= __BR_END) + __br_lock_usage_bug(); + + ctr = &__brlock_array[smp_processor_id()][idx]; + + wmb(); + (*ctr)--; +} +#endif /* __BRLOCK_USE_ATOMICS */ + +/* write path not inlined - it's rare and larger */ + +extern void FASTCALL(__br_write_lock (enum brlock_indices idx)); +extern void FASTCALL(__br_write_unlock (enum brlock_indices idx)); + +static inline void br_write_lock (enum brlock_indices idx) +{ + if (idx >= __BR_END) + __br_lock_usage_bug(); + __br_write_lock(idx); +} + +static inline void br_write_unlock (enum brlock_indices idx) +{ + if (idx >= __BR_END) + __br_lock_usage_bug(); + __br_write_unlock(idx); +} + +#else +# define br_read_lock(idx) ((void)(idx)) +# define br_read_unlock(idx) ((void)(idx)) +# define br_write_lock(idx) ((void)(idx)) +# define br_write_unlock(idx) ((void)(idx)) +#endif + +/* + * Now enumerate all of the possible sw/hw IRQ protected + * versions of the interfaces. + */ +#define br_read_lock_irqsave(idx, flags) \ + do { local_irq_save(flags); br_read_lock(idx); } while (0) + +#define br_read_lock_irq(idx) \ + do { local_irq_disable(); br_read_lock(idx); } while (0) + +#define br_read_lock_bh(idx) \ + do { local_bh_disable(); br_read_lock(idx); } while (0) + +#define br_write_lock_irqsave(idx, flags) \ + do { local_irq_save(flags); br_write_lock(idx); } while (0) + +#define br_write_lock_irq(idx) \ + do { local_irq_disable(); br_write_lock(idx); } while (0) + +#define br_write_lock_bh(idx) \ + do { local_bh_disable(); br_write_lock(idx); } while (0) + +#define br_read_unlock_irqrestore(idx, flags) \ + do { br_read_unlock(irx); local_irq_restore(flags); } while (0) + +#define br_read_unlock_irq(idx) \ + do { br_read_unlock(idx); local_irq_enable(); } while (0) + +#define br_read_unlock_bh(idx) \ + do { br_read_unlock(idx); local_bh_enable(); } while (0) + +#define br_write_unlock_irqrestore(idx, flags) \ + do { br_write_unlock(irx); local_irq_restore(flags); } while (0) + +#define br_write_unlock_irq(idx) \ + do { br_write_unlock(idx); local_irq_enable(); } while (0) + +#define br_write_unlock_bh(idx) \ + do { br_write_unlock(idx); local_bh_enable(); } while (0) + +#endif /* __LINUX_BRLOCK_H */ diff --git a/xen-2.4.16/include/xeno/byteorder/big_endian.h b/xen-2.4.16/include/xeno/byteorder/big_endian.h new file mode 100644 index 0000000000..b84efd74c9 --- /dev/null +++ b/xen-2.4.16/include/xeno/byteorder/big_endian.h @@ -0,0 +1,68 @@ +#ifndef _LINUX_BYTEORDER_BIG_ENDIAN_H +#define _LINUX_BYTEORDER_BIG_ENDIAN_H + +#ifndef __BIG_ENDIAN +#define __BIG_ENDIAN 4321 +#endif +#ifndef __BIG_ENDIAN_BITFIELD +#define __BIG_ENDIAN_BITFIELD +#endif + +#include + +#define __constant_htonl(x) ((__u32)(x)) +#define __constant_ntohl(x) ((__u32)(x)) +#define __constant_htons(x) ((__u16)(x)) +#define __constant_ntohs(x) ((__u16)(x)) +#define __constant_cpu_to_le64(x) ___constant_swab64((x)) +#define __constant_le64_to_cpu(x) ___constant_swab64((x)) +#define __constant_cpu_to_le32(x) ___constant_swab32((x)) +#define __constant_le32_to_cpu(x) ___constant_swab32((x)) +#define __constant_cpu_to_le16(x) ___constant_swab16((x)) +#define __constant_le16_to_cpu(x) ___constant_swab16((x)) +#define __constant_cpu_to_be64(x) ((__u64)(x)) +#define __constant_be64_to_cpu(x) ((__u64)(x)) +#define __constant_cpu_to_be32(x) ((__u32)(x)) +#define __constant_be32_to_cpu(x) ((__u32)(x)) +#define __constant_cpu_to_be16(x) ((__u16)(x)) +#define __constant_be16_to_cpu(x) ((__u16)(x)) +#define __cpu_to_le64(x) __swab64((x)) +#define __le64_to_cpu(x) __swab64((x)) +#define __cpu_to_le32(x) __swab32((x)) +#define __le32_to_cpu(x) __swab32((x)) +#define __cpu_to_le16(x) __swab16((x)) +#define __le16_to_cpu(x) __swab16((x)) +#define __cpu_to_be64(x) ((__u64)(x)) +#define __be64_to_cpu(x) ((__u64)(x)) +#define __cpu_to_be32(x) ((__u32)(x)) +#define __be32_to_cpu(x) ((__u32)(x)) +#define __cpu_to_be16(x) ((__u16)(x)) +#define __be16_to_cpu(x) ((__u16)(x)) +#define __cpu_to_le64p(x) __swab64p((x)) +#define __le64_to_cpup(x) __swab64p((x)) +#define __cpu_to_le32p(x) __swab32p((x)) +#define __le32_to_cpup(x) __swab32p((x)) +#define __cpu_to_le16p(x) __swab16p((x)) +#define __le16_to_cpup(x) __swab16p((x)) +#define __cpu_to_be64p(x) (*(__u64*)(x)) +#define __be64_to_cpup(x) (*(__u64*)(x)) +#define __cpu_to_be32p(x) (*(__u32*)(x)) +#define __be32_to_cpup(x) (*(__u32*)(x)) +#define __cpu_to_be16p(x) (*(__u16*)(x)) +#define __be16_to_cpup(x) (*(__u16*)(x)) +#define __cpu_to_le64s(x) __swab64s((x)) +#define __le64_to_cpus(x) __swab64s((x)) +#define __cpu_to_le32s(x) __swab32s((x)) +#define __le32_to_cpus(x) __swab32s((x)) +#define __cpu_to_le16s(x) __swab16s((x)) +#define __le16_to_cpus(x) __swab16s((x)) +#define __cpu_to_be64s(x) do {} while (0) +#define __be64_to_cpus(x) do {} while (0) +#define __cpu_to_be32s(x) do {} while (0) +#define __be32_to_cpus(x) do {} while (0) +#define __cpu_to_be16s(x) do {} while (0) +#define __be16_to_cpus(x) do {} while (0) + +#include + +#endif /* _LINUX_BYTEORDER_BIG_ENDIAN_H */ diff --git a/xen-2.4.16/include/xeno/byteorder/generic.h b/xen-2.4.16/include/xeno/byteorder/generic.h new file mode 100644 index 0000000000..d3d63a5a1e --- /dev/null +++ b/xen-2.4.16/include/xeno/byteorder/generic.h @@ -0,0 +1,180 @@ +#ifndef _LINUX_BYTEORDER_GENERIC_H +#define _LINUX_BYTEORDER_GENERIC_H + +/* + * linux/byteorder_generic.h + * Generic Byte-reordering support + * + * Francois-Rene Rideau 19970707 + * gathered all the good ideas from all asm-foo/byteorder.h into one file, + * cleaned them up. + * I hope it is compliant with non-GCC compilers. + * I decided to put __BYTEORDER_HAS_U64__ in byteorder.h, + * because I wasn't sure it would be ok to put it in types.h + * Upgraded it to 2.1.43 + * Francois-Rene Rideau 19971012 + * Upgraded it to 2.1.57 + * to please Linus T., replaced huge #ifdef's between little/big endian + * by nestedly #include'd files. + * Francois-Rene Rideau 19971205 + * Made it to 2.1.71; now a facelift: + * Put files under include/linux/byteorder/ + * Split swab from generic support. + * + * TODO: + * = Regular kernel maintainers could also replace all these manual + * byteswap macros that remain, disseminated among drivers, + * after some grep or the sources... + * = Linus might want to rename all these macros and files to fit his taste, + * to fit his personal naming scheme. + * = it seems that a few drivers would also appreciate + * nybble swapping support... + * = every architecture could add their byteswap macro in asm/byteorder.h + * see how some architectures already do (i386, alpha, ppc, etc) + * = cpu_to_beXX and beXX_to_cpu might some day need to be well + * distinguished throughout the kernel. This is not the case currently, + * since little endian, big endian, and pdp endian machines needn't it. + * But this might be the case for, say, a port of Linux to 20/21 bit + * architectures (and F21 Linux addict around?). + */ + +/* + * The following macros are to be defined by : + * + * Conversion of long and short int between network and host format + * ntohl(__u32 x) + * ntohs(__u16 x) + * htonl(__u32 x) + * htons(__u16 x) + * It seems that some programs (which? where? or perhaps a standard? POSIX?) + * might like the above to be functions, not macros (why?). + * if that's true, then detect them, and take measures. + * Anyway, the measure is: define only ___ntohl as a macro instead, + * and in a separate file, have + * unsigned long inline ntohl(x){return ___ntohl(x);} + * + * The same for constant arguments + * __constant_ntohl(__u32 x) + * __constant_ntohs(__u16 x) + * __constant_htonl(__u32 x) + * __constant_htons(__u16 x) + * + * Conversion of XX-bit integers (16- 32- or 64-) + * between native CPU format and little/big endian format + * 64-bit stuff only defined for proper architectures + * cpu_to_[bl]eXX(__uXX x) + * [bl]eXX_to_cpu(__uXX x) + * + * The same, but takes a pointer to the value to convert + * cpu_to_[bl]eXXp(__uXX x) + * [bl]eXX_to_cpup(__uXX x) + * + * The same, but change in situ + * cpu_to_[bl]eXXs(__uXX x) + * [bl]eXX_to_cpus(__uXX x) + * + * See asm-foo/byteorder.h for examples of how to provide + * architecture-optimized versions + * + */ + + +#if defined(__KERNEL__) +/* + * inside the kernel, we can use nicknames; + * outside of it, we must avoid POSIX namespace pollution... + */ +#define cpu_to_le64 __cpu_to_le64 +#define le64_to_cpu __le64_to_cpu +#define cpu_to_le32 __cpu_to_le32 +#define le32_to_cpu __le32_to_cpu +#define cpu_to_le16 __cpu_to_le16 +#define le16_to_cpu __le16_to_cpu +#define cpu_to_be64 __cpu_to_be64 +#define be64_to_cpu __be64_to_cpu +#define cpu_to_be32 __cpu_to_be32 +#define be32_to_cpu __be32_to_cpu +#define cpu_to_be16 __cpu_to_be16 +#define be16_to_cpu __be16_to_cpu +#define cpu_to_le64p __cpu_to_le64p +#define le64_to_cpup __le64_to_cpup +#define cpu_to_le32p __cpu_to_le32p +#define le32_to_cpup __le32_to_cpup +#define cpu_to_le16p __cpu_to_le16p +#define le16_to_cpup __le16_to_cpup +#define cpu_to_be64p __cpu_to_be64p +#define be64_to_cpup __be64_to_cpup +#define cpu_to_be32p __cpu_to_be32p +#define be32_to_cpup __be32_to_cpup +#define cpu_to_be16p __cpu_to_be16p +#define be16_to_cpup __be16_to_cpup +#define cpu_to_le64s __cpu_to_le64s +#define le64_to_cpus __le64_to_cpus +#define cpu_to_le32s __cpu_to_le32s +#define le32_to_cpus __le32_to_cpus +#define cpu_to_le16s __cpu_to_le16s +#define le16_to_cpus __le16_to_cpus +#define cpu_to_be64s __cpu_to_be64s +#define be64_to_cpus __be64_to_cpus +#define cpu_to_be32s __cpu_to_be32s +#define be32_to_cpus __be32_to_cpus +#define cpu_to_be16s __cpu_to_be16s +#define be16_to_cpus __be16_to_cpus +#endif + + +/* + * Handle ntohl and suches. These have various compatibility + * issues - like we want to give the prototype even though we + * also have a macro for them in case some strange program + * wants to take the address of the thing or something.. + * + * Note that these used to return a "long" in libc5, even though + * long is often 64-bit these days.. Thus the casts. + * + * They have to be macros in order to do the constant folding + * correctly - if the argument passed into a inline function + * it is no longer constant according to gcc.. + */ + +#undef ntohl +#undef ntohs +#undef htonl +#undef htons + +/* + * Do the prototypes. Somebody might want to take the + * address or some such sick thing.. + */ +#if defined(__KERNEL__) || (defined (__GLIBC__) && __GLIBC__ >= 2) +extern __u32 ntohl(__u32); +extern __u32 htonl(__u32); +#else +extern unsigned long int ntohl(unsigned long int); +extern unsigned long int htonl(unsigned long int); +#endif +extern unsigned short int ntohs(unsigned short int); +extern unsigned short int htons(unsigned short int); + + +#if defined(__GNUC__) && (__GNUC__ >= 2) && defined(__OPTIMIZE__) + +#define ___htonl(x) __cpu_to_be32(x) +#define ___htons(x) __cpu_to_be16(x) +#define ___ntohl(x) __be32_to_cpu(x) +#define ___ntohs(x) __be16_to_cpu(x) + +#if defined(__KERNEL__) || (defined (__GLIBC__) && __GLIBC__ >= 2) +#define htonl(x) ___htonl(x) +#define ntohl(x) ___ntohl(x) +#else +#define htonl(x) ((unsigned long)___htonl(x)) +#define ntohl(x) ((unsigned long)___ntohl(x)) +#endif +#define htons(x) ___htons(x) +#define ntohs(x) ___ntohs(x) + +#endif /* OPTIMIZE */ + + +#endif /* _LINUX_BYTEORDER_GENERIC_H */ diff --git a/xen-2.4.16/include/xeno/byteorder/little_endian.h b/xen-2.4.16/include/xeno/byteorder/little_endian.h new file mode 100644 index 0000000000..1431663621 --- /dev/null +++ b/xen-2.4.16/include/xeno/byteorder/little_endian.h @@ -0,0 +1,68 @@ +#ifndef _LINUX_BYTEORDER_LITTLE_ENDIAN_H +#define _LINUX_BYTEORDER_LITTLE_ENDIAN_H + +#ifndef __LITTLE_ENDIAN +#define __LITTLE_ENDIAN 1234 +#endif +#ifndef __LITTLE_ENDIAN_BITFIELD +#define __LITTLE_ENDIAN_BITFIELD +#endif + +#include + +#define __constant_htonl(x) ___constant_swab32((x)) +#define __constant_ntohl(x) ___constant_swab32((x)) +#define __constant_htons(x) ___constant_swab16((x)) +#define __constant_ntohs(x) ___constant_swab16((x)) +#define __constant_cpu_to_le64(x) ((__u64)(x)) +#define __constant_le64_to_cpu(x) ((__u64)(x)) +#define __constant_cpu_to_le32(x) ((__u32)(x)) +#define __constant_le32_to_cpu(x) ((__u32)(x)) +#define __constant_cpu_to_le16(x) ((__u16)(x)) +#define __constant_le16_to_cpu(x) ((__u16)(x)) +#define __constant_cpu_to_be64(x) ___constant_swab64((x)) +#define __constant_be64_to_cpu(x) ___constant_swab64((x)) +#define __constant_cpu_to_be32(x) ___constant_swab32((x)) +#define __constant_be32_to_cpu(x) ___constant_swab32((x)) +#define __constant_cpu_to_be16(x) ___constant_swab16((x)) +#define __constant_be16_to_cpu(x) ___constant_swab16((x)) +#define __cpu_to_le64(x) ((__u64)(x)) +#define __le64_to_cpu(x) ((__u64)(x)) +#define __cpu_to_le32(x) ((__u32)(x)) +#define __le32_to_cpu(x) ((__u32)(x)) +#define __cpu_to_le16(x) ((__u16)(x)) +#define __le16_to_cpu(x) ((__u16)(x)) +#define __cpu_to_be64(x) __swab64((x)) +#define __be64_to_cpu(x) __swab64((x)) +#define __cpu_to_be32(x) __swab32((x)) +#define __be32_to_cpu(x) __swab32((x)) +#define __cpu_to_be16(x) __swab16((x)) +#define __be16_to_cpu(x) __swab16((x)) +#define __cpu_to_le64p(x) (*(__u64*)(x)) +#define __le64_to_cpup(x) (*(__u64*)(x)) +#define __cpu_to_le32p(x) (*(__u32*)(x)) +#define __le32_to_cpup(x) (*(__u32*)(x)) +#define __cpu_to_le16p(x) (*(__u16*)(x)) +#define __le16_to_cpup(x) (*(__u16*)(x)) +#define __cpu_to_be64p(x) __swab64p((x)) +#define __be64_to_cpup(x) __swab64p((x)) +#define __cpu_to_be32p(x) __swab32p((x)) +#define __be32_to_cpup(x) __swab32p((x)) +#define __cpu_to_be16p(x) __swab16p((x)) +#define __be16_to_cpup(x) __swab16p((x)) +#define __cpu_to_le64s(x) do {} while (0) +#define __le64_to_cpus(x) do {} while (0) +#define __cpu_to_le32s(x) do {} while (0) +#define __le32_to_cpus(x) do {} while (0) +#define __cpu_to_le16s(x) do {} while (0) +#define __le16_to_cpus(x) do {} while (0) +#define __cpu_to_be64s(x) __swab64s((x)) +#define __be64_to_cpus(x) __swab64s((x)) +#define __cpu_to_be32s(x) __swab32s((x)) +#define __be32_to_cpus(x) __swab32s((x)) +#define __cpu_to_be16s(x) __swab16s((x)) +#define __be16_to_cpus(x) __swab16s((x)) + +#include + +#endif /* _LINUX_BYTEORDER_LITTLE_ENDIAN_H */ diff --git a/xen-2.4.16/include/xeno/byteorder/pdp_endian.h b/xen-2.4.16/include/xeno/byteorder/pdp_endian.h new file mode 100644 index 0000000000..618631cbc6 --- /dev/null +++ b/xen-2.4.16/include/xeno/byteorder/pdp_endian.h @@ -0,0 +1,88 @@ +#ifndef _LINUX_BYTEORDER_PDP_ENDIAN_H +#define _LINUX_BYTEORDER_PDP_ENDIAN_H + +/* + * Could have been named NUXI-endian, but we use the same name as in glibc. + * hopefully only the PDP and its evolutions (old VAXen in compatibility mode) + * should ever use this braindead byteorder. + * This file *should* work, but has not been tested. + * + * little-endian is 1234; big-endian is 4321; nuxi/pdp-endian is 3412 + * + * I thought vaxen were NUXI-endian, but was told they were correct-endian + * (little-endian), though indeed there existed NUXI-endian machines + * (DEC PDP-11 and old VAXen in compatibility mode). + * This makes this file a bit useless, but as a proof-of-concept. + * + * But what does a __u64 look like: is it 34127856 or 78563412 ??? + * I don't dare imagine! Hence, no 64-bit byteorder support yet. + * Hopefully, there 64-bit pdp-endian support shouldn't ever be required. + * + */ + +#ifndef __PDP_ENDIAN +#define __PDP_ENDIAN 3412 +#endif +#ifndef __PDP_ENDIAN_BITFIELD +#define __PDP_ENDIAN_BITFIELD +#endif + +#include +#include + +#define __constant_htonl(x) ___constant_swahb32((x)) +#define __constant_ntohl(x) ___constant_swahb32((x)) +#define __constant_htons(x) ___constant_swab16((x)) +#define __constant_ntohs(x) ___constant_swab16((x)) +#define __constant_cpu_to_le64(x) I DON'T KNOW +#define __constant_le64_to_cpu(x) I DON'T KNOW +#define __constant_cpu_to_le32(x) ___constant_swahw32((x)) +#define __constant_le32_to_cpu(x) ___constant_swahw32((x)) +#define __constant_cpu_to_le16(x) ((__u16)(x) +#define __constant_le16_to_cpu(x) ((__u16)(x) +#define __constant_cpu_to_be64(x) I DON'T KNOW +#define __constant_be64_to_cpu(x) I DON'T KNOW +#define __constant_cpu_to_be32(x) ___constant_swahb32((x)) +#define __constant_be32_to_cpu(x) ___constant_swahb32((x)) +#define __constant_cpu_to_be16(x) ___constant_swab16((x)) +#define __constant_be16_to_cpu(x) ___constant_swab16((x)) +#define __cpu_to_le64(x) I DON'T KNOW +#define __le64_to_cpu(x) I DON'T KNOW +#define __cpu_to_le32(x) ___swahw32((x)) +#define __le32_to_cpu(x) ___swahw32((x)) +#define __cpu_to_le16(x) ((__u16)(x) +#define __le16_to_cpu(x) ((__u16)(x) +#define __cpu_to_be64(x) I DON'T KNOW +#define __be64_to_cpu(x) I DON'T KNOW +#define __cpu_to_be32(x) __swahb32((x)) +#define __be32_to_cpu(x) __swahb32((x)) +#define __cpu_to_be16(x) __swab16((x)) +#define __be16_to_cpu(x) __swab16((x)) +#define __cpu_to_le64p(x) I DON'T KNOW +#define __le64_to_cpup(x) I DON'T KNOW +#define __cpu_to_le32p(x) ___swahw32p((x)) +#define __le32_to_cpup(x) ___swahw32p((x)) +#define __cpu_to_le16p(x) (*(__u16*)(x)) +#define __le16_to_cpup(x) (*(__u16*)(x)) +#define __cpu_to_be64p(x) I DON'T KNOW +#define __be64_to_cpup(x) I DON'T KNOW +#define __cpu_to_be32p(x) __swahb32p((x)) +#define __be32_to_cpup(x) __swahb32p((x)) +#define __cpu_to_be16p(x) __swab16p((x)) +#define __be16_to_cpup(x) __swab16p((x)) +#define __cpu_to_le64s(x) I DON'T KNOW +#define __le64_to_cpus(x) I DON'T KNOW +#define __cpu_to_le32s(x) ___swahw32s((x)) +#define __le32_to_cpus(x) ___swahw32s((x)) +#define __cpu_to_le16s(x) do {} while (0) +#define __le16_to_cpus(x) do {} while (0) +#define __cpu_to_be64s(x) I DON'T KNOW +#define __be64_to_cpus(x) I DON'T KNOW +#define __cpu_to_be32s(x) __swahb32s((x)) +#define __be32_to_cpus(x) __swahb32s((x)) +#define __cpu_to_be16s(x) __swab16s((x)) +#define __be16_to_cpus(x) __swab16s((x)) + +#include + +#endif /* _LINUX_BYTEORDER_PDP_ENDIAN_H */ diff --git a/xen-2.4.16/include/xeno/byteorder/swab.h b/xen-2.4.16/include/xeno/byteorder/swab.h new file mode 100644 index 0000000000..814b4519ff --- /dev/null +++ b/xen-2.4.16/include/xeno/byteorder/swab.h @@ -0,0 +1,190 @@ +#ifndef _LINUX_BYTEORDER_SWAB_H +#define _LINUX_BYTEORDER_SWAB_H + +/* + * linux/byteorder/swab.h + * Byte-swapping, independently from CPU endianness + * swabXX[ps]?(foo) + * + * Francois-Rene Rideau 19971205 + * separated swab functions from cpu_to_XX, + * to clean up support for bizarre-endian architectures. + * + * See asm-i386/byteorder.h and suches for examples of how to provide + * architecture-dependent optimized versions + * + */ + +/* casts are necessary for constants, because we never know how for sure + * how U/UL/ULL map to __u16, __u32, __u64. At least not in a portable way. + */ +#define ___swab16(x) \ +({ \ + __u16 __x = (x); \ + ((__u16)( \ + (((__u16)(__x) & (__u16)0x00ffU) << 8) | \ + (((__u16)(__x) & (__u16)0xff00U) >> 8) )); \ +}) + +#define ___swab32(x) \ +({ \ + __u32 __x = (x); \ + ((__u32)( \ + (((__u32)(__x) & (__u32)0x000000ffUL) << 24) | \ + (((__u32)(__x) & (__u32)0x0000ff00UL) << 8) | \ + (((__u32)(__x) & (__u32)0x00ff0000UL) >> 8) | \ + (((__u32)(__x) & (__u32)0xff000000UL) >> 24) )); \ +}) + +#define ___swab64(x) \ +({ \ + __u64 __x = (x); \ + ((__u64)( \ + (__u64)(((__u64)(__x) & (__u64)0x00000000000000ffULL) << 56) | \ + (__u64)(((__u64)(__x) & (__u64)0x000000000000ff00ULL) << 40) | \ + (__u64)(((__u64)(__x) & (__u64)0x0000000000ff0000ULL) << 24) | \ + (__u64)(((__u64)(__x) & (__u64)0x00000000ff000000ULL) << 8) | \ + (__u64)(((__u64)(__x) & (__u64)0x000000ff00000000ULL) >> 8) | \ + (__u64)(((__u64)(__x) & (__u64)0x0000ff0000000000ULL) >> 24) | \ + (__u64)(((__u64)(__x) & (__u64)0x00ff000000000000ULL) >> 40) | \ + (__u64)(((__u64)(__x) & (__u64)0xff00000000000000ULL) >> 56) )); \ +}) + +#define ___constant_swab16(x) \ + ((__u16)( \ + (((__u16)(x) & (__u16)0x00ffU) << 8) | \ + (((__u16)(x) & (__u16)0xff00U) >> 8) )) +#define ___constant_swab32(x) \ + ((__u32)( \ + (((__u32)(x) & (__u32)0x000000ffUL) << 24) | \ + (((__u32)(x) & (__u32)0x0000ff00UL) << 8) | \ + (((__u32)(x) & (__u32)0x00ff0000UL) >> 8) | \ + (((__u32)(x) & (__u32)0xff000000UL) >> 24) )) +#define ___constant_swab64(x) \ + ((__u64)( \ + (__u64)(((__u64)(x) & (__u64)0x00000000000000ffULL) << 56) | \ + (__u64)(((__u64)(x) & (__u64)0x000000000000ff00ULL) << 40) | \ + (__u64)(((__u64)(x) & (__u64)0x0000000000ff0000ULL) << 24) | \ + (__u64)(((__u64)(x) & (__u64)0x00000000ff000000ULL) << 8) | \ + (__u64)(((__u64)(x) & (__u64)0x000000ff00000000ULL) >> 8) | \ + (__u64)(((__u64)(x) & (__u64)0x0000ff0000000000ULL) >> 24) | \ + (__u64)(((__u64)(x) & (__u64)0x00ff000000000000ULL) >> 40) | \ + (__u64)(((__u64)(x) & (__u64)0xff00000000000000ULL) >> 56) )) + +/* + * provide defaults when no architecture-specific optimization is detected + */ +#ifndef __arch__swab16 +# define __arch__swab16(x) ({ __u16 __tmp = (x) ; ___swab16(__tmp); }) +#endif +#ifndef __arch__swab32 +# define __arch__swab32(x) ({ __u32 __tmp = (x) ; ___swab32(__tmp); }) +#endif +#ifndef __arch__swab64 +# define __arch__swab64(x) ({ __u64 __tmp = (x) ; ___swab64(__tmp); }) +#endif + +#ifndef __arch__swab16p +# define __arch__swab16p(x) __arch__swab16(*(x)) +#endif +#ifndef __arch__swab32p +# define __arch__swab32p(x) __arch__swab32(*(x)) +#endif +#ifndef __arch__swab64p +# define __arch__swab64p(x) __arch__swab64(*(x)) +#endif + +#ifndef __arch__swab16s +# define __arch__swab16s(x) do { *(x) = __arch__swab16p((x)); } while (0) +#endif +#ifndef __arch__swab32s +# define __arch__swab32s(x) do { *(x) = __arch__swab32p((x)); } while (0) +#endif +#ifndef __arch__swab64s +# define __arch__swab64s(x) do { *(x) = __arch__swab64p((x)); } while (0) +#endif + + +/* + * Allow constant folding + */ +#if defined(__GNUC__) && (__GNUC__ >= 2) && defined(__OPTIMIZE__) +# define __swab16(x) \ +(__builtin_constant_p((__u16)(x)) ? \ + ___swab16((x)) : \ + __fswab16((x))) +# define __swab32(x) \ +(__builtin_constant_p((__u32)(x)) ? \ + ___swab32((x)) : \ + __fswab32((x))) +# define __swab64(x) \ +(__builtin_constant_p((__u64)(x)) ? \ + ___swab64((x)) : \ + __fswab64((x))) +#else +# define __swab16(x) __fswab16(x) +# define __swab32(x) __fswab32(x) +# define __swab64(x) __fswab64(x) +#endif /* OPTIMIZE */ + + +static __inline__ __const__ __u16 __fswab16(__u16 x) +{ + return __arch__swab16(x); +} +static __inline__ __u16 __swab16p(__u16 *x) +{ + return __arch__swab16p(x); +} +static __inline__ void __swab16s(__u16 *addr) +{ + __arch__swab16s(addr); +} + +static __inline__ __const__ __u32 __fswab32(__u32 x) +{ + return __arch__swab32(x); +} +static __inline__ __u32 __swab32p(__u32 *x) +{ + return __arch__swab32p(x); +} +static __inline__ void __swab32s(__u32 *addr) +{ + __arch__swab32s(addr); +} + +#ifdef __BYTEORDER_HAS_U64__ +static __inline__ __const__ __u64 __fswab64(__u64 x) +{ +# ifdef __SWAB_64_THRU_32__ + __u32 h = x >> 32; + __u32 l = x & ((1ULL<<32)-1); + return (((__u64)__swab32(l)) << 32) | ((__u64)(__swab32(h))); +# else + return __arch__swab64(x); +# endif +} +static __inline__ __u64 __swab64p(__u64 *x) +{ + return __arch__swab64p(x); +} +static __inline__ void __swab64s(__u64 *addr) +{ + __arch__swab64s(addr); +} +#endif /* __BYTEORDER_HAS_U64__ */ + +#if defined(__KERNEL__) +#define swab16 __swab16 +#define swab32 __swab32 +#define swab64 __swab64 +#define swab16p __swab16p +#define swab32p __swab32p +#define swab64p __swab64p +#define swab16s __swab16s +#define swab32s __swab32s +#define swab64s __swab64s +#endif + +#endif /* _LINUX_BYTEORDER_SWAB_H */ diff --git a/xen-2.4.16/include/xeno/byteorder/swabb.h b/xen-2.4.16/include/xeno/byteorder/swabb.h new file mode 100644 index 0000000000..d28d9a804d --- /dev/null +++ b/xen-2.4.16/include/xeno/byteorder/swabb.h @@ -0,0 +1,137 @@ +#ifndef _LINUX_BYTEORDER_SWABB_H +#define _LINUX_BYTEORDER_SWABB_H + +/* + * linux/byteorder/swabb.h + * SWAp Bytes Bizarrely + * swaHHXX[ps]?(foo) + * + * Support for obNUXIous pdp-endian and other bizarre architectures. + * Will Linux ever run on such ancient beasts? if not, this file + * will be but a programming pearl. Still, it's a reminder that we + * shouldn't be making too many assumptions when trying to be portable. + * + */ + +/* + * Meaning of the names I chose (vaxlinux people feel free to correct them): + * swahw32 swap 16-bit half-words in a 32-bit word + * swahb32 swap 8-bit halves of each 16-bit half-word in a 32-bit word + * + * No 64-bit support yet. I don't know NUXI conventions for long longs. + * I guarantee it will be a mess when it's there, though :-> + * It will be even worse if there are conflicting 64-bit conventions. + * Hopefully, no one ever used 64-bit objects on NUXI machines. + * + */ + +#define ___swahw32(x) \ +({ \ + __u32 __x = (x); \ + ((__u32)( \ + (((__u32)(__x) & (__u32)0x0000ffffUL) << 16) | \ + (((__u32)(__x) & (__u32)0xffff0000UL) >> 16) )); \ +}) +#define ___swahb32(x) \ +({ \ + __u32 __x = (x); \ + ((__u32)( \ + (((__u32)(__x) & (__u32)0x00ff00ffUL) << 8) | \ + (((__u32)(__x) & (__u32)0xff00ff00UL) >> 8) )); \ +}) + +#define ___constant_swahw32(x) \ + ((__u32)( \ + (((__u32)(x) & (__u32)0x0000ffffUL) << 16) | \ + (((__u32)(x) & (__u32)0xffff0000UL) >> 16) )) +#define ___constant_swahb32(x) \ + ((__u32)( \ + (((__u32)(x) & (__u32)0x00ff00ffUL) << 8) | \ + (((__u32)(x) & (__u32)0xff00ff00UL) >> 8) )) + +/* + * provide defaults when no architecture-specific optimization is detected + */ +#ifndef __arch__swahw32 +# define __arch__swahw32(x) ___swahw32(x) +#endif +#ifndef __arch__swahb32 +# define __arch__swahb32(x) ___swahb32(x) +#endif + +#ifndef __arch__swahw32p +# define __arch__swahw32p(x) __swahw32(*(x)) +#endif +#ifndef __arch__swahb32p +# define __arch__swahb32p(x) __swahb32(*(x)) +#endif + +#ifndef __arch__swahw32s +# define __arch__swahw32s(x) do { *(x) = __swahw32p((x)); } while (0) +#endif +#ifndef __arch__swahb32s +# define __arch__swahb32s(x) do { *(x) = __swahb32p((x)); } while (0) +#endif + + +/* + * Allow constant folding + */ +#if defined(__GNUC__) && (__GNUC__ >= 2) && defined(__OPTIMIZE__) +# define __swahw32(x) \ +(__builtin_constant_p((__u32)(x)) ? \ + ___swahw32((x)) : \ + __fswahw32((x))) +# define __swahb32(x) \ +(__builtin_constant_p((__u32)(x)) ? \ + ___swahb32((x)) : \ + __fswahb32((x))) +#else +# define __swahw32(x) __fswahw32(x) +# define __swahb32(x) __fswahb32(x) +#endif /* OPTIMIZE */ + + +static __inline__ __const__ __u32 __fswahw32(__u32 x) +{ + return __arch__swahw32(x); +} +static __inline__ __u32 __swahw32p(__u32 *x) +{ + return __arch__swahw32p(x); +} +static __inline__ void __swahw32s(__u32 *addr) +{ + __arch__swahw32s(addr); +} + + +static __inline__ __const__ __u32 __fswahb32(__u32 x) +{ + return __arch__swahb32(x); +} +static __inline__ __u32 __swahb32p(__u32 *x) +{ + return __arch__swahb32p(x); +} +static __inline__ void __swahb32s(__u32 *addr) +{ + __arch__swahb32s(addr); +} + +#ifdef __BYTEORDER_HAS_U64__ +/* + * Not supported yet + */ +#endif /* __BYTEORDER_HAS_U64__ */ + +#if defined(__KERNEL__) +#define swahw32 __swahw32 +#define swahb32 __swahb32 +#define swahw32p __swahw32p +#define swahb32p __swahb32p +#define swahw32s __swahw32s +#define swahb32s __swahb32s +#endif + +#endif /* _LINUX_BYTEORDER_SWABB_H */ diff --git a/xen-2.4.16/include/xeno/cache.h b/xen-2.4.16/include/xeno/cache.h new file mode 100644 index 0000000000..73a3be7f9f --- /dev/null +++ b/xen-2.4.16/include/xeno/cache.h @@ -0,0 +1,37 @@ +#ifndef __LINUX_CACHE_H +#define __LINUX_CACHE_H + +#include +#include + +#ifndef L1_CACHE_ALIGN +#define L1_CACHE_ALIGN(x) (((x)+(L1_CACHE_BYTES-1))&~(L1_CACHE_BYTES-1)) +#endif + +#ifndef SMP_CACHE_BYTES +#define SMP_CACHE_BYTES L1_CACHE_BYTES +#endif + +#ifndef ____cacheline_aligned +#define ____cacheline_aligned __attribute__((__aligned__(SMP_CACHE_BYTES))) +#endif + +#ifndef ____cacheline_aligned_in_smp +#ifdef CONFIG_SMP +#define ____cacheline_aligned_in_smp ____cacheline_aligned +#else +#define ____cacheline_aligned_in_smp +#endif /* CONFIG_SMP */ +#endif + +#ifndef __cacheline_aligned +#ifdef MODULE +#define __cacheline_aligned ____cacheline_aligned +#else +#define __cacheline_aligned \ + __attribute__((__aligned__(SMP_CACHE_BYTES), \ + __section__(".data.cacheline_aligned"))) +#endif +#endif /* __cacheline_aligned */ + +#endif /* __LINUX_CACHE_H */ diff --git a/xen-2.4.16/include/xeno/config.h b/xen-2.4.16/include/xeno/config.h new file mode 100644 index 0000000000..d48fadbe42 --- /dev/null +++ b/xen-2.4.16/include/xeno/config.h @@ -0,0 +1,105 @@ +/****************************************************************************** + * config.h + * + * A Linux-style configuration list. + */ + +#ifndef __XENO_CONFIG_H__ +#define __XENO_CONFIG_H__ + +#define CONFIG_SMP 1 +#define CONFIG_X86_LOCAL_APIC 1 +#define CONFIG_X86_IO_APIC 1 +#define CONFIG_X86_L1_CACHE_SHIFT 5 + +#define CONFIG_PCI 1 +#define CONFIG_PCI_BIOS 1 +#define CONFIG_PCI_DIRECT 1 + +#define CONFIG_IDE 1 +#define CONFIG_BLK_DEV_IDE 1 +#define CONFIG_BLK_DEV_IDEDMA 1 +#define CONFIG_BLK_DEV_IDEPCI 1 +#define CONFIG_IDEDISK_MULTI_MODE 1 +#define CONFIG_IDEDISK_STROKE 1 +#define CONFIG_IDEPCI_SHARE_IRQ 1 +#define CONFIG_BLK_DEV_IDEDMA_PCI 1 +#define CONFIG_IDEDMA_PCI_AUTO 1 +#define CONFIG_IDEDMA_AUTO 1 +#define CONFIG_BLK_DEV_IDE_MODES 1 + +#define CONFIG_SCSI 1 +#define CONFIG_BLK_DEV_SD 1 +#define CONFIG_SD_EXTRA_DEVS 40 +#define CONFIG_SCSI_MULTI_LUN 1 + +#define HZ 100 + +/* Just to keep compiler happy. */ +#define SMP_CACHE_BYTES 64 +#define NR_CPUS 16 +#define __cacheline_aligned __attribute__((__aligned__(SMP_CACHE_BYTES))) +#define ____cacheline_aligned __cacheline_aligned + +/* 0-8MB is fixed monitor space for now. */ +#define MAX_MONITOR_ADDRESS ( 16*1024*1024) +#define MAX_DMA_ADDRESS ( 16*1024*1024) +#define MAX_USABLE_ADDRESS ((0xfc000000-__PAGE_OFFSET) & ~((1<<22)-1)) + /*^^^^^^^^^*/ + /*arbitrary*/ + +/* Linkage for x86 */ +#define FASTCALL(x) x __attribute__((regparm(3))) +#define asmlinkage __attribute__((regparm(0))) +#define __ALIGN .align 16,0x90 +#define __ALIGN_STR ".align 16,0x90" +#define SYMBOL_NAME_STR(X) #X +#define SYMBOL_NAME(X) X +#define SYMBOL_NAME_LABEL(X) X##: +#ifdef __ASSEMBLY__ +#define ALIGN __ALIGN +#define ALIGN_STR __ALIGN_STR +#define ENTRY(name) \ + .globl SYMBOL_NAME(name); \ + ALIGN; \ + SYMBOL_NAME_LABEL(name) +#endif + +/* syslog levels ==> nothing! */ +#define KERN_NOTICE +#define KERN_WARNING +#define KERN_DEBUG +#define KERN_INFO +#define KERN_ERR +#define KERN_CRIT +#define KERN_EMERG +#define KERN_ALERT + +#define barrier() __asm__ __volatile__("": : :"memory") + +#define __HYPERVISOR_CS 0x30 +#define __HYPERVISOR_DS 0x38 +#define __GUEST_CS 0x11 +#define __GUEST_DS 0x19 + +#define NR_syscalls 255 + +#define offsetof(_p,_f) ((unsigned long)&(((_p *)0)->_f)) +#define struct_cpy(_x,_y) (memcpy((_x),(_y),sizeof(*(_x)))) + +#define likely(_x) (_x) +#define unlikely(_x) (_x) + +#define dev_probe_lock() ((void)0) +#define dev_probe_unlock() ((void)0) + +#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) + +#define capable(_c) 0 + +#ifndef __ASSEMBLY__ +extern unsigned long opt_ipbase, opt_nfsserv, opt_gateway, opt_netmask; +extern unsigned char opt_nfsroot[]; +#endif + +#endif /* __XENO_CONFIG_H__ */ diff --git a/xen-2.4.16/include/xeno/ctype.h b/xen-2.4.16/include/xeno/ctype.h new file mode 100644 index 0000000000..afa3639229 --- /dev/null +++ b/xen-2.4.16/include/xeno/ctype.h @@ -0,0 +1,54 @@ +#ifndef _LINUX_CTYPE_H +#define _LINUX_CTYPE_H + +/* + * NOTE! This ctype does not handle EOF like the standard C + * library is required to. + */ + +#define _U 0x01 /* upper */ +#define _L 0x02 /* lower */ +#define _D 0x04 /* digit */ +#define _C 0x08 /* cntrl */ +#define _P 0x10 /* punct */ +#define _S 0x20 /* white space (space/lf/tab) */ +#define _X 0x40 /* hex digit */ +#define _SP 0x80 /* hard space (0x20) */ + +extern unsigned char _ctype[]; + +#define __ismask(x) (_ctype[(int)(unsigned char)(x)]) + +#define isalnum(c) ((__ismask(c)&(_U|_L|_D)) != 0) +#define isalpha(c) ((__ismask(c)&(_U|_L)) != 0) +#define iscntrl(c) ((__ismask(c)&(_C)) != 0) +#define isdigit(c) ((__ismask(c)&(_D)) != 0) +#define isgraph(c) ((__ismask(c)&(_P|_U|_L|_D)) != 0) +#define islower(c) ((__ismask(c)&(_L)) != 0) +#define isprint(c) ((__ismask(c)&(_P|_U|_L|_D|_SP)) != 0) +#define ispunct(c) ((__ismask(c)&(_P)) != 0) +#define isspace(c) ((__ismask(c)&(_S)) != 0) +#define isupper(c) ((__ismask(c)&(_U)) != 0) +#define isxdigit(c) ((__ismask(c)&(_D|_X)) != 0) + +#define isascii(c) (((unsigned char)(c))<=0x7f) +#define toascii(c) (((unsigned char)(c))&0x7f) + +static inline unsigned char __tolower(unsigned char c) +{ + if (isupper(c)) + c -= 'A'-'a'; + return c; +} + +static inline unsigned char __toupper(unsigned char c) +{ + if (islower(c)) + c -= 'a'-'A'; + return c; +} + +#define tolower(c) __tolower(c) +#define toupper(c) __toupper(c) + +#endif diff --git a/xen-2.4.16/include/xeno/delay.h b/xen-2.4.16/include/xeno/delay.h new file mode 100644 index 0000000000..9d70ef035f --- /dev/null +++ b/xen-2.4.16/include/xeno/delay.h @@ -0,0 +1,10 @@ +#ifndef _LINUX_DELAY_H +#define _LINUX_DELAY_H + +/* Copyright (C) 1993 Linus Torvalds */ + +#include +#define mdelay(n) (\ + {unsigned long msec=(n); while (msec--) udelay(1000);}) + +#endif /* defined(_LINUX_DELAY_H) */ diff --git a/xen-2.4.16/include/xeno/dom0_ops.h b/xen-2.4.16/include/xeno/dom0_ops.h new file mode 100644 index 0000000000..651a2b77fa --- /dev/null +++ b/xen-2.4.16/include/xeno/dom0_ops.h @@ -0,0 +1,37 @@ +/****************************************************************************** + * dom0_ops.h + * + * Process command requests from domain-0 guest OS. + * + * Copyright (c) 2002, K A Fraser + */ + +#ifndef __DOM0_OPS_H__ +#define __DOM0_OPS_H__ + +#define DOM0_NEWDOMAIN 0 +#define DOM0_KILLDOMAIN 1 + +typedef struct dom0_newdomain_st +{ + unsigned int memory_kb; + unsigned int num_vifs; +} dom0_newdomain_t; + +typedef struct dom0_killdomain_st +{ + unsigned int domain; +} dom0_killdomain_t; + +typedef struct dom0_op_st +{ + unsigned long cmd; + union + { + dom0_newdomain_t newdomain; + dom0_killdomain_t killdomain; + } + u; +} dom0_op_t; + +#endif diff --git a/xen-2.4.16/include/xeno/elevator.h b/xen-2.4.16/include/xeno/elevator.h new file mode 100644 index 0000000000..1a8bb5c39a --- /dev/null +++ b/xen-2.4.16/include/xeno/elevator.h @@ -0,0 +1,104 @@ +#ifndef _LINUX_ELEVATOR_H +#define _LINUX_ELEVATOR_H + +typedef void (elevator_fn) (struct request *, elevator_t *, + struct list_head *, + struct list_head *, int); + +typedef int (elevator_merge_fn) (request_queue_t *, struct request **, struct list_head *, + struct buffer_head *, int, int); + +typedef void (elevator_merge_cleanup_fn) (request_queue_t *, struct request *, int); + +typedef void (elevator_merge_req_fn) (struct request *, struct request *); + +struct elevator_s +{ + int read_latency; + int write_latency; + + elevator_merge_fn *elevator_merge_fn; + elevator_merge_cleanup_fn *elevator_merge_cleanup_fn; + elevator_merge_req_fn *elevator_merge_req_fn; + + unsigned int queue_ID; +}; + +int elevator_noop_merge(request_queue_t *, struct request **, struct list_head *, struct buffer_head *, int, int); +void elevator_noop_merge_cleanup(request_queue_t *, struct request *, int); +void elevator_noop_merge_req(struct request *, struct request *); + +int elevator_linus_merge(request_queue_t *, struct request **, struct list_head *, struct buffer_head *, int, int); +void elevator_linus_merge_cleanup(request_queue_t *, struct request *, int); +void elevator_linus_merge_req(struct request *, struct request *); + +typedef struct blkelv_ioctl_arg_s { + int queue_ID; + int read_latency; + int write_latency; + int max_bomb_segments; +} blkelv_ioctl_arg_t; + +#define BLKELVGET _IOR(0x12,106,sizeof(blkelv_ioctl_arg_t)) +#define BLKELVSET _IOW(0x12,107,sizeof(blkelv_ioctl_arg_t)) + +extern int blkelvget_ioctl(elevator_t *, blkelv_ioctl_arg_t *); +extern int blkelvset_ioctl(elevator_t *, const blkelv_ioctl_arg_t *); + +extern void elevator_init(elevator_t *, elevator_t); + +/* + * Return values from elevator merger + */ +#define ELEVATOR_NO_MERGE 0 +#define ELEVATOR_FRONT_MERGE 1 +#define ELEVATOR_BACK_MERGE 2 + +/* + * This is used in the elevator algorithm. We don't prioritise reads + * over writes any more --- although reads are more time-critical than + * writes, by treating them equally we increase filesystem throughput. + * This turns out to give better overall performance. -- sct + */ +#define IN_ORDER(s1,s2) \ + ((((s1)->rq_dev == (s2)->rq_dev && \ + (s1)->sector < (s2)->sector)) || \ + (s1)->rq_dev < (s2)->rq_dev) + +#define BHRQ_IN_ORDER(bh, rq) \ + ((((bh)->b_rdev == (rq)->rq_dev && \ + (bh)->b_rsector < (rq)->sector)) || \ + (bh)->b_rdev < (rq)->rq_dev) + +static inline int elevator_request_latency(elevator_t * elevator, int rw) +{ + int latency; + + latency = elevator->read_latency; + if (rw != READ) + latency = elevator->write_latency; + + return latency; +} + +#define ELEVATOR_NOOP \ +((elevator_t) { \ + 0, /* read_latency */ \ + 0, /* write_latency */ \ + \ + elevator_noop_merge, /* elevator_merge_fn */ \ + elevator_noop_merge_cleanup, /* elevator_merge_cleanup_fn */ \ + elevator_noop_merge_req, /* elevator_merge_req_fn */ \ + }) + +#define ELEVATOR_LINUS \ +((elevator_t) { \ + 8192, /* read passovers */ \ + 16384, /* write passovers */ \ + \ + elevator_linus_merge, /* elevator_merge_fn */ \ + elevator_linus_merge_cleanup, /* elevator_merge_cleanup_fn */ \ + elevator_linus_merge_req, /* elevator_merge_req_fn */ \ + }) + +#endif diff --git a/xen-2.4.16/include/xeno/errno.h b/xen-2.4.16/include/xeno/errno.h new file mode 100644 index 0000000000..7cf599f4de --- /dev/null +++ b/xen-2.4.16/include/xeno/errno.h @@ -0,0 +1,132 @@ +#ifndef _I386_ERRNO_H +#define _I386_ERRNO_H + +#define EPERM 1 /* Operation not permitted */ +#define ENOENT 2 /* No such file or directory */ +#define ESRCH 3 /* No such process */ +#define EINTR 4 /* Interrupted system call */ +#define EIO 5 /* I/O error */ +#define ENXIO 6 /* No such device or address */ +#define E2BIG 7 /* Arg list too long */ +#define ENOEXEC 8 /* Exec format error */ +#define EBADF 9 /* Bad file number */ +#define ECHILD 10 /* No child processes */ +#define EAGAIN 11 /* Try again */ +#define ENOMEM 12 /* Out of memory */ +#define EACCES 13 /* Permission denied */ +#define EFAULT 14 /* Bad address */ +#define ENOTBLK 15 /* Block device required */ +#define EBUSY 16 /* Device or resource busy */ +#define EEXIST 17 /* File exists */ +#define EXDEV 18 /* Cross-device link */ +#define ENODEV 19 /* No such device */ +#define ENOTDIR 20 /* Not a directory */ +#define EISDIR 21 /* Is a directory */ +#define EINVAL 22 /* Invalid argument */ +#define ENFILE 23 /* File table overflow */ +#define EMFILE 24 /* Too many open files */ +#define ENOTTY 25 /* Not a typewriter */ +#define ETXTBSY 26 /* Text file busy */ +#define EFBIG 27 /* File too large */ +#define ENOSPC 28 /* No space left on device */ +#define ESPIPE 29 /* Illegal seek */ +#define EROFS 30 /* Read-only file system */ +#define EMLINK 31 /* Too many links */ +#define EPIPE 32 /* Broken pipe */ +#define EDOM 33 /* Math argument out of domain of func */ +#define ERANGE 34 /* Math result not representable */ +#define EDEADLK 35 /* Resource deadlock would occur */ +#define ENAMETOOLONG 36 /* File name too long */ +#define ENOLCK 37 /* No record locks available */ +#define ENOSYS 38 /* Function not implemented */ +#define ENOTEMPTY 39 /* Directory not empty */ +#define ELOOP 40 /* Too many symbolic links encountered */ +#define EWOULDBLOCK EAGAIN /* Operation would block */ +#define ENOMSG 42 /* No message of desired type */ +#define EIDRM 43 /* Identifier removed */ +#define ECHRNG 44 /* Channel number out of range */ +#define EL2NSYNC 45 /* Level 2 not synchronized */ +#define EL3HLT 46 /* Level 3 halted */ +#define EL3RST 47 /* Level 3 reset */ +#define ELNRNG 48 /* Link number out of range */ +#define EUNATCH 49 /* Protocol driver not attached */ +#define ENOCSI 50 /* No CSI structure available */ +#define EL2HLT 51 /* Level 2 halted */ +#define EBADE 52 /* Invalid exchange */ +#define EBADR 53 /* Invalid request descriptor */ +#define EXFULL 54 /* Exchange full */ +#define ENOANO 55 /* No anode */ +#define EBADRQC 56 /* Invalid request code */ +#define EBADSLT 57 /* Invalid slot */ + +#define EDEADLOCK EDEADLK + +#define EBFONT 59 /* Bad font file format */ +#define ENOSTR 60 /* Device not a stream */ +#define ENODATA 61 /* No data available */ +#define ETIME 62 /* Timer expired */ +#define ENOSR 63 /* Out of streams resources */ +#define ENONET 64 /* Machine is not on the network */ +#define ENOPKG 65 /* Package not installed */ +#define EREMOTE 66 /* Object is remote */ +#define ENOLINK 67 /* Link has been severed */ +#define EADV 68 /* Advertise error */ +#define ESRMNT 69 /* Srmount error */ +#define ECOMM 70 /* Communication error on send */ +#define EPROTO 71 /* Protocol error */ +#define EMULTIHOP 72 /* Multihop attempted */ +#define EDOTDOT 73 /* RFS specific error */ +#define EBADMSG 74 /* Not a data message */ +#define EOVERFLOW 75 /* Value too large for defined data type */ +#define ENOTUNIQ 76 /* Name not unique on network */ +#define EBADFD 77 /* File descriptor in bad state */ +#define EREMCHG 78 /* Remote address changed */ +#define ELIBACC 79 /* Can not access a needed shared library */ +#define ELIBBAD 80 /* Accessing a corrupted shared library */ +#define ELIBSCN 81 /* .lib section in a.out corrupted */ +#define ELIBMAX 82 /* Attempting to link in too many shared libraries */ +#define ELIBEXEC 83 /* Cannot exec a shared library directly */ +#define EILSEQ 84 /* Illegal byte sequence */ +#define ERESTART 85 /* Interrupted system call should be restarted */ +#define ESTRPIPE 86 /* Streams pipe error */ +#define EUSERS 87 /* Too many users */ +#define ENOTSOCK 88 /* Socket operation on non-socket */ +#define EDESTADDRREQ 89 /* Destination address required */ +#define EMSGSIZE 90 /* Message too long */ +#define EPROTOTYPE 91 /* Protocol wrong type for socket */ +#define ENOPROTOOPT 92 /* Protocol not available */ +#define EPROTONOSUPPORT 93 /* Protocol not supported */ +#define ESOCKTNOSUPPORT 94 /* Socket type not supported */ +#define EOPNOTSUPP 95 /* Operation not supported on transport endpoint */ +#define EPFNOSUPPORT 96 /* Protocol family not supported */ +#define EAFNOSUPPORT 97 /* Address family not supported by protocol */ +#define EADDRINUSE 98 /* Address already in use */ +#define EADDRNOTAVAIL 99 /* Cannot assign requested address */ +#define ENETDOWN 100 /* Network is down */ +#define ENETUNREACH 101 /* Network is unreachable */ +#define ENETRESET 102 /* Network dropped connection because of reset */ +#define ECONNABORTED 103 /* Software caused connection abort */ +#define ECONNRESET 104 /* Connection reset by peer */ +#define ENOBUFS 105 /* No buffer space available */ +#define EISCONN 106 /* Transport endpoint is already connected */ +#define ENOTCONN 107 /* Transport endpoint is not connected */ +#define ESHUTDOWN 108 /* Cannot send after transport endpoint shutdown */ +#define ETOOMANYREFS 109 /* Too many references: cannot splice */ +#define ETIMEDOUT 110 /* Connection timed out */ +#define ECONNREFUSED 111 /* Connection refused */ +#define EHOSTDOWN 112 /* Host is down */ +#define EHOSTUNREACH 113 /* No route to host */ +#define EALREADY 114 /* Operation already in progress */ +#define EINPROGRESS 115 /* Operation now in progress */ +#define ESTALE 116 /* Stale NFS file handle */ +#define EUCLEAN 117 /* Structure needs cleaning */ +#define ENOTNAM 118 /* Not a XENIX named type file */ +#define ENAVAIL 119 /* No XENIX semaphores available */ +#define EISNAM 120 /* Is a named type file */ +#define EREMOTEIO 121 /* Remote I/O error */ +#define EDQUOT 122 /* Quota exceeded */ + +#define ENOMEDIUM 123 /* No medium found */ +#define EMEDIUMTYPE 124 /* Wrong medium type */ + +#endif diff --git a/xen-2.4.16/include/xeno/etherdevice.h b/xen-2.4.16/include/xeno/etherdevice.h new file mode 100644 index 0000000000..bac9b4d5ad --- /dev/null +++ b/xen-2.4.16/include/xeno/etherdevice.h @@ -0,0 +1,68 @@ +/* + * INET An implementation of the TCP/IP protocol suite for the LINUX + * operating system. NET is implemented using the BSD Socket + * interface as the means of communication with the user level. + * + * Definitions for the Ethernet handlers. + * + * Version: @(#)eth.h 1.0.4 05/13/93 + * + * Authors: Ross Biro, + * Fred N. van Kempen, + * + * Relocated to include/linux where it belongs by Alan Cox + * + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * WARNING: This move may well be temporary. This file will get merged with others RSN. + * + */ +#ifndef _LINUX_ETHERDEVICE_H +#define _LINUX_ETHERDEVICE_H + +#include + +#ifdef __KERNEL__ +extern int eth_header(struct sk_buff *skb, struct net_device *dev, + unsigned short type, void *daddr, + void *saddr, unsigned len); +extern int eth_rebuild_header(struct sk_buff *skb); +extern unsigned short eth_type_trans(struct sk_buff *skb, struct net_device *dev); +extern void eth_header_cache_update(struct hh_cache *hh, struct net_device *dev, + unsigned char * haddr); +extern int eth_header_cache(struct neighbour *neigh, + struct hh_cache *hh); +extern int eth_header_parse(struct sk_buff *skb, + unsigned char *haddr); +extern struct net_device *init_etherdev(struct net_device *dev, int sizeof_priv); +extern struct net_device *alloc_etherdev(int sizeof_priv); + +static inline void eth_copy_and_sum (struct sk_buff *dest, unsigned char *src, int len, int base) +{ + memcpy (dest->data, src, len); +} + +/** + * is_valid_ether_addr - Determine if the given Ethernet address is valid + * @addr: Pointer to a six-byte array containing the Ethernet address + * + * Check that the Ethernet address (MAC) is not 00:00:00:00:00:00, is not + * a multicast address, and is not FF:FF:FF:FF:FF:FF. The multicast + * and FF:FF:... tests are combined into the single test "!(addr[0]&1)". + * + * Return true if the address is valid. + */ +static inline int is_valid_ether_addr( u8 *addr ) +{ + const char zaddr[6] = {0,}; + + return !(addr[0]&1) && memcmp( addr, zaddr, 6); +} + +#endif + +#endif /* _LINUX_ETHERDEVICE_H */ diff --git a/xen-2.4.16/include/xeno/ethtool.h b/xen-2.4.16/include/xeno/ethtool.h new file mode 100644 index 0000000000..9b65fe1187 --- /dev/null +++ b/xen-2.4.16/include/xeno/ethtool.h @@ -0,0 +1,165 @@ +/* + * ethtool.h: Defines for Linux ethtool. + * + * Copyright (C) 1998 David S. Miller (davem@redhat.com) + * Copyright 2001 Jeff Garzik + * Portions Copyright 2001 Sun Microsystems (thockin@sun.com) + */ + +#ifndef _LINUX_ETHTOOL_H +#define _LINUX_ETHTOOL_H + + +/* This should work for both 32 and 64 bit userland. */ +struct ethtool_cmd { + u32 cmd; + u32 supported; /* Features this interface supports */ + u32 advertising; /* Features this interface advertises */ + u16 speed; /* The forced speed, 10Mb, 100Mb, gigabit */ + u8 duplex; /* Duplex, half or full */ + u8 port; /* Which connector port */ + u8 phy_address; + u8 transceiver; /* Which tranceiver to use */ + u8 autoneg; /* Enable or disable autonegotiation */ + u32 maxtxpkt; /* Tx pkts before generating tx int */ + u32 maxrxpkt; /* Rx pkts before generating rx int */ + u32 reserved[4]; +}; + +#define ETHTOOL_BUSINFO_LEN 32 +/* these strings are set to whatever the driver author decides... */ +struct ethtool_drvinfo { + u32 cmd; + char driver[32]; /* driver short name, "tulip", "eepro100" */ + char version[32]; /* driver version string */ + char fw_version[32]; /* firmware version string, if applicable */ + char bus_info[ETHTOOL_BUSINFO_LEN]; /* Bus info for this IF. */ + /* For PCI devices, use pci_dev->slot_name. */ + char reserved1[32]; + char reserved2[24]; + u32 eedump_len; /* Size of data from ETHTOOL_GEEPROM (bytes) */ + u32 regdump_len; /* Size of data from ETHTOOL_GREGS (bytes) */ +}; + +#define SOPASS_MAX 6 +/* wake-on-lan settings */ +struct ethtool_wolinfo { + u32 cmd; + u32 supported; + u32 wolopts; + u8 sopass[SOPASS_MAX]; /* SecureOn(tm) password */ +}; + +/* for passing single values */ +struct ethtool_value { + u32 cmd; + u32 data; +}; + +/* for passing big chunks of data */ +struct ethtool_regs { + u32 cmd; + u32 version; /* driver-specific, indicates different chips/revs */ + u32 len; /* bytes */ + u8 data[0]; +}; + +/* for passing EEPROM chunks */ +struct ethtool_eeprom { + u32 cmd; + u32 magic; + u32 offset; /* in bytes */ + u32 len; /* in bytes */ + u8 data[0]; +}; +/* CMDs currently supported */ +#define ETHTOOL_GSET 0x00000001 /* Get settings. */ +#define ETHTOOL_SSET 0x00000002 /* Set settings, privileged. */ +#define ETHTOOL_GDRVINFO 0x00000003 /* Get driver info. */ +#define ETHTOOL_GREGS 0x00000004 /* Get NIC registers, privileged. */ +#define ETHTOOL_GWOL 0x00000005 /* Get wake-on-lan options. */ +#define ETHTOOL_SWOL 0x00000006 /* Set wake-on-lan options, priv. */ +#define ETHTOOL_GMSGLVL 0x00000007 /* Get driver message level */ +#define ETHTOOL_SMSGLVL 0x00000008 /* Set driver msg level, priv. */ +#define ETHTOOL_NWAY_RST 0x00000009 /* Restart autonegotiation, priv. */ +#define ETHTOOL_GLINK 0x0000000a /* Get link status */ +#define ETHTOOL_GEEPROM 0x0000000b /* Get EEPROM data */ +#define ETHTOOL_SEEPROM 0x0000000c /* Set EEPROM data */ + +/* compatibility with older code */ +#define SPARC_ETH_GSET ETHTOOL_GSET +#define SPARC_ETH_SSET ETHTOOL_SSET + +/* Indicates what features are supported by the interface. */ +#define SUPPORTED_10baseT_Half (1 << 0) +#define SUPPORTED_10baseT_Full (1 << 1) +#define SUPPORTED_100baseT_Half (1 << 2) +#define SUPPORTED_100baseT_Full (1 << 3) +#define SUPPORTED_1000baseT_Half (1 << 4) +#define SUPPORTED_1000baseT_Full (1 << 5) +#define SUPPORTED_Autoneg (1 << 6) +#define SUPPORTED_TP (1 << 7) +#define SUPPORTED_AUI (1 << 8) +#define SUPPORTED_MII (1 << 9) +#define SUPPORTED_FIBRE (1 << 10) +#define SUPPORTED_BNC (1 << 11) + +/* Indicates what features are advertised by the interface. */ +#define ADVERTISED_10baseT_Half (1 << 0) +#define ADVERTISED_10baseT_Full (1 << 1) +#define ADVERTISED_100baseT_Half (1 << 2) +#define ADVERTISED_100baseT_Full (1 << 3) +#define ADVERTISED_1000baseT_Half (1 << 4) +#define ADVERTISED_1000baseT_Full (1 << 5) +#define ADVERTISED_Autoneg (1 << 6) +#define ADVERTISED_TP (1 << 7) +#define ADVERTISED_AUI (1 << 8) +#define ADVERTISED_MII (1 << 9) +#define ADVERTISED_FIBRE (1 << 10) +#define ADVERTISED_BNC (1 << 11) + +/* The following are all involved in forcing a particular link + * mode for the device for setting things. When getting the + * devices settings, these indicate the current mode and whether + * it was foced up into this mode or autonegotiated. + */ + +/* The forced speed, 10Mb, 100Mb, gigabit. */ +#define SPEED_10 10 +#define SPEED_100 100 +#define SPEED_1000 1000 + +/* Duplex, half or full. */ +#define DUPLEX_HALF 0x00 +#define DUPLEX_FULL 0x01 + +/* Which connector port. */ +#define PORT_TP 0x00 +#define PORT_AUI 0x01 +#define PORT_MII 0x02 +#define PORT_FIBRE 0x03 +#define PORT_BNC 0x04 + +/* Which tranceiver to use. */ +#define XCVR_INTERNAL 0x00 +#define XCVR_EXTERNAL 0x01 +#define XCVR_DUMMY1 0x02 +#define XCVR_DUMMY2 0x03 +#define XCVR_DUMMY3 0x04 + +/* Enable or disable autonegotiation. If this is set to enable, + * the forced link modes above are completely ignored. + */ +#define AUTONEG_DISABLE 0x00 +#define AUTONEG_ENABLE 0x01 + +/* Wake-On-Lan options. */ +#define WAKE_PHY (1 << 0) +#define WAKE_UCAST (1 << 1) +#define WAKE_MCAST (1 << 2) +#define WAKE_BCAST (1 << 3) +#define WAKE_ARP (1 << 4) +#define WAKE_MAGIC (1 << 5) +#define WAKE_MAGICSECURE (1 << 6) /* only meaningful if WAKE_MAGIC */ + +#endif /* _LINUX_ETHTOOL_H */ diff --git a/xen-2.4.16/include/xeno/event.h b/xen-2.4.16/include/xeno/event.h new file mode 100644 index 0000000000..fdb9fed24d --- /dev/null +++ b/xen-2.4.16/include/xeno/event.h @@ -0,0 +1,101 @@ +/****************************************************************************** + * event.h + * + * A nice interface for passing asynchronous events to guest OSes. + * + * Copyright (c) 2002, K A Fraser + */ + +#include +#include +#include + +#ifdef CONFIG_SMP + +/* + * mark_guest_event: + * @p: Domain to which event should be passed + * @event: Event number + * RETURNS: "Bitmask" of CPU on which process is currently running + * + * Idea is that caller may loop on task_list, looking for domains + * to pass events to (using this function). The caller accumulates the + * bits returned by this function (ORing them together) then calls + * event_notify(). + * + * Guest_events are per-domain events passed directly to the guest OS + * in ring 1. + */ +static inline unsigned long mark_guest_event(struct task_struct *p, int event) +{ + set_bit(event, &p->shared_info->events); + + /* + * No need for the runqueue_lock! The check below does not race + * with the setting of has_cpu, because that is set with runqueue_lock + * held. The lock must be released before hypervisor exit (and so + * a write barrier executed). And, just before hypervisor exit, + * outstanding events are checked. So bit is certainly set early enough. + */ + smp_mb(); + if ( p->state == TASK_INTERRUPTIBLE ) wake_up(p); + reschedule(p); + return p->has_cpu ? (1 << p->processor) : 0; +} + +/* As above, but hyp_events are handled within the hypervisor. */ +static inline unsigned long mark_hyp_event(struct task_struct *p, int event) +{ + set_bit(event, &p->hyp_events); + smp_mb(); + if ( p->state == TASK_INTERRUPTIBLE ) wake_up(p); + reschedule(p); + return p->has_cpu ? (1 << p->processor) : 0; +} + +/* Notify the given set of CPUs that guest events may be outstanding. */ +static inline void guest_event_notify(unsigned long cpu_mask) +{ + cpu_mask &= ~(1 << smp_processor_id()); + if ( cpu_mask != 0 ) smp_send_event_check_mask(cpu_mask); +} + +#else + +static inline unsigned long mark_guest_event(struct task_struct *p, int event) +{ + set_bit(event, &p->shared_info->events); + if ( p->state == TASK_INTERRUPTIBLE ) wake_up(p); + reschedule(p); + return 0; +} + +static inline unsigned long mark_hyp_event(struct task_struct *p, int event) +{ + set_bit(event, &p->hyp_events); + if ( p->state == TASK_INTERRUPTIBLE ) wake_up(p); + reschedule(p); + return 0; +} + +#define guest_event_notify(_mask) ((void)0) + +#endif + +/* Notify hypervisor events in thesame way as for guest OS events. */ +#define hyp_event_notify(_mask) guest_event_notify(_mask) + +/* Clear a guest-OS event from a per-domain mask. */ +static inline void clear_guest_event(struct task_struct *p, int event) +{ + clear_bit(event, &p->shared_info->events); +} + +/* Clear a hypervisor event from a per-domain mask. */ +static inline void clear_hyp_event(struct task_struct *p, int event) +{ + clear_bit(event, &p->hyp_events); +} + +/* Called on return from (architecture-dependent) entry.S. */ +void do_hyp_events(void); diff --git a/xen-2.4.16/include/xeno/genhd.h b/xen-2.4.16/include/xeno/genhd.h new file mode 100644 index 0000000000..58a1734a56 --- /dev/null +++ b/xen-2.4.16/include/xeno/genhd.h @@ -0,0 +1,313 @@ +#ifndef _LINUX_GENHD_H +#define _LINUX_GENHD_H + +/* + * genhd.h Copyright (C) 1992 Drew Eckhardt + * Generic hard disk header file by + * Drew Eckhardt + * + * + */ + +#include +#include +#include + +enum { +/* These three have identical behaviour; use the second one if DOS fdisk gets + confused about extended/logical partitions starting past cylinder 1023. */ + DOS_EXTENDED_PARTITION = 5, + LINUX_EXTENDED_PARTITION = 0x85, + WIN98_EXTENDED_PARTITION = 0x0f, + + LINUX_SWAP_PARTITION = 0x82, + LINUX_RAID_PARTITION = 0xfd, /* autodetect RAID partition */ + + SOLARIS_X86_PARTITION = LINUX_SWAP_PARTITION, + + DM6_PARTITION = 0x54, /* has DDO: use xlated geom & offset */ + EZD_PARTITION = 0x55, /* EZ-DRIVE */ + DM6_AUX1PARTITION = 0x51, /* no DDO: use xlated geom */ + DM6_AUX3PARTITION = 0x53, /* no DDO: use xlated geom */ + + FREEBSD_PARTITION = 0xa5, /* FreeBSD Partition ID */ + OPENBSD_PARTITION = 0xa6, /* OpenBSD Partition ID */ + NETBSD_PARTITION = 0xa9, /* NetBSD Partition ID */ + BSDI_PARTITION = 0xb7, /* BSDI Partition ID */ +/* Ours is not to wonder why.. */ + BSD_PARTITION = FREEBSD_PARTITION, + MINIX_PARTITION = 0x81, /* Minix Partition ID */ + PLAN9_PARTITION = 0x39, /* Plan 9 Partition ID */ + UNIXWARE_PARTITION = 0x63, /* Partition ID, same as */ + /* GNU_HURD and SCO Unix */ +}; + +struct partition { + unsigned char boot_ind; /* 0x80 - active */ + unsigned char head; /* starting head */ + unsigned char sector; /* starting sector */ + unsigned char cyl; /* starting cylinder */ + unsigned char sys_ind; /* What partition type */ + unsigned char end_head; /* end head */ + unsigned char end_sector; /* end sector */ + unsigned char end_cyl; /* end cylinder */ + unsigned int start_sect; /* starting sector counting from 0 */ + unsigned int nr_sects; /* nr of sectors in partition */ +} __attribute__((packed)); + +#ifdef __KERNEL__ +/*# include */ + +struct hd_struct { + unsigned long start_sect; + unsigned long nr_sects; + /*devfs_handle_t de;*/ /* primary (master) devfs entry */ + int number; /* stupid old code wastes space */ + + /* Performance stats: */ + unsigned int ios_in_flight; + unsigned int io_ticks; + unsigned int last_idle_time; + unsigned int last_queue_change; + unsigned int aveq; + + unsigned int rd_ios; + unsigned int rd_merges; + unsigned int rd_ticks; + unsigned int rd_sectors; + unsigned int wr_ios; + unsigned int wr_merges; + unsigned int wr_ticks; + unsigned int wr_sectors; +}; + +#define GENHD_FL_REMOVABLE 1 + +struct gendisk { + int major; /* major number of driver */ + const char *major_name; /* name of major driver */ + int minor_shift; /* number of times minor is shifted to + get real minor */ + int max_p; /* maximum partitions per device */ + + struct hd_struct *part; /* [indexed by minor] */ + int *sizes; /* [idem], device size in blocks */ + int nr_real; /* number of real devices */ + + void *real_devices; /* internal use */ + struct gendisk *next; + struct block_device_operations *fops; + + /*devfs_handle_t *de_arr;*/ /* one per physical disc */ + char *flags; /* one per physical disc */ +}; + +/* drivers/block/genhd.c */ +extern struct gendisk *gendisk_head; + +extern void add_gendisk(struct gendisk *gp); +extern void del_gendisk(struct gendisk *gp); +extern struct gendisk *get_gendisk(kdev_t dev); +extern int walk_gendisk(int (*walk)(struct gendisk *, void *), void *); + +#endif /* __KERNEL__ */ + +#ifdef CONFIG_SOLARIS_X86_PARTITION + +#define SOLARIS_X86_NUMSLICE 8 +#define SOLARIS_X86_VTOC_SANE (0x600DDEEEUL) + +struct solaris_x86_slice { + ushort s_tag; /* ID tag of partition */ + ushort s_flag; /* permission flags */ + unsigned int s_start; /* start sector no of partition */ + unsigned int s_size; /* # of blocks in partition */ +}; + +struct solaris_x86_vtoc { + unsigned int v_bootinfo[3]; /* info needed by mboot (unsupported) */ + unsigned int v_sanity; /* to verify vtoc sanity */ + unsigned int v_version; /* layout version */ + char v_volume[8]; /* volume name */ + ushort v_sectorsz; /* sector size in bytes */ + ushort v_nparts; /* number of partitions */ + unsigned int v_reserved[10]; /* free space */ + struct solaris_x86_slice + v_slice[SOLARIS_X86_NUMSLICE]; /* slice headers */ + unsigned int timestamp[SOLARIS_X86_NUMSLICE]; /* timestamp (unsupported) */ + char v_asciilabel[128]; /* for compatibility */ +}; + +#endif /* CONFIG_SOLARIS_X86_PARTITION */ + +#ifdef CONFIG_BSD_DISKLABEL +/* + * BSD disklabel support by Yossi Gottlieb + * updated by Marc Espie + */ + +/* check against BSD src/sys/sys/disklabel.h for consistency */ + +#define BSD_DISKMAGIC (0x82564557UL) /* The disk magic number */ +#define BSD_MAXPARTITIONS 8 +#define OPENBSD_MAXPARTITIONS 16 +#define BSD_FS_UNUSED 0 /* disklabel unused partition entry ID */ +struct bsd_disklabel { + __u32 d_magic; /* the magic number */ + __s16 d_type; /* drive type */ + __s16 d_subtype; /* controller/d_type specific */ + char d_typename[16]; /* type name, e.g. "eagle" */ + char d_packname[16]; /* pack identifier */ + __u32 d_secsize; /* # of bytes per sector */ + __u32 d_nsectors; /* # of data sectors per track */ + __u32 d_ntracks; /* # of tracks per cylinder */ + __u32 d_ncylinders; /* # of data cylinders per unit */ + __u32 d_secpercyl; /* # of data sectors per cylinder */ + __u32 d_secperunit; /* # of data sectors per unit */ + __u16 d_sparespertrack; /* # of spare sectors per track */ + __u16 d_sparespercyl; /* # of spare sectors per cylinder */ + __u32 d_acylinders; /* # of alt. cylinders per unit */ + __u16 d_rpm; /* rotational speed */ + __u16 d_interleave; /* hardware sector interleave */ + __u16 d_trackskew; /* sector 0 skew, per track */ + __u16 d_cylskew; /* sector 0 skew, per cylinder */ + __u32 d_headswitch; /* head switch time, usec */ + __u32 d_trkseek; /* track-to-track seek, usec */ + __u32 d_flags; /* generic flags */ +#define NDDATA 5 + __u32 d_drivedata[NDDATA]; /* drive-type specific information */ +#define NSPARE 5 + __u32 d_spare[NSPARE]; /* reserved for future use */ + __u32 d_magic2; /* the magic number (again) */ + __u16 d_checksum; /* xor of data incl. partitions */ + + /* filesystem and partition information: */ + __u16 d_npartitions; /* number of partitions in following */ + __u32 d_bbsize; /* size of boot area at sn0, bytes */ + __u32 d_sbsize; /* max size of fs superblock, bytes */ + struct bsd_partition { /* the partition table */ + __u32 p_size; /* number of sectors in partition */ + __u32 p_offset; /* starting sector */ + __u32 p_fsize; /* filesystem basic fragment size */ + __u8 p_fstype; /* filesystem type, see below */ + __u8 p_frag; /* filesystem fragments per block */ + __u16 p_cpg; /* filesystem cylinders per group */ + } d_partitions[BSD_MAXPARTITIONS]; /* actually may be more */ +}; + +#endif /* CONFIG_BSD_DISKLABEL */ + +#ifdef CONFIG_UNIXWARE_DISKLABEL +/* + * Unixware slices support by Andrzej Krzysztofowicz + * and Krzysztof G. Baranowski + */ + +#define UNIXWARE_DISKMAGIC (0xCA5E600DUL) /* The disk magic number */ +#define UNIXWARE_DISKMAGIC2 (0x600DDEEEUL) /* The slice table magic nr */ +#define UNIXWARE_NUMSLICE 16 +#define UNIXWARE_FS_UNUSED 0 /* Unused slice entry ID */ + +struct unixware_slice { + __u16 s_label; /* label */ + __u16 s_flags; /* permission flags */ + __u32 start_sect; /* starting sector */ + __u32 nr_sects; /* number of sectors in slice */ +}; + +struct unixware_disklabel { + __u32 d_type; /* drive type */ + __u32 d_magic; /* the magic number */ + __u32 d_version; /* version number */ + char d_serial[12]; /* serial number of the device */ + __u32 d_ncylinders; /* # of data cylinders per device */ + __u32 d_ntracks; /* # of tracks per cylinder */ + __u32 d_nsectors; /* # of data sectors per track */ + __u32 d_secsize; /* # of bytes per sector */ + __u32 d_part_start; /* # of first sector of this partition */ + __u32 d_unknown1[12]; /* ? */ + __u32 d_alt_tbl; /* byte offset of alternate table */ + __u32 d_alt_len; /* byte length of alternate table */ + __u32 d_phys_cyl; /* # of physical cylinders per device */ + __u32 d_phys_trk; /* # of physical tracks per cylinder */ + __u32 d_phys_sec; /* # of physical sectors per track */ + __u32 d_phys_bytes; /* # of physical bytes per sector */ + __u32 d_unknown2; /* ? */ + __u32 d_unknown3; /* ? */ + __u32 d_pad[8]; /* pad */ + + struct unixware_vtoc { + __u32 v_magic; /* the magic number */ + __u32 v_version; /* version number */ + char v_name[8]; /* volume name */ + __u16 v_nslices; /* # of slices */ + __u16 v_unknown1; /* ? */ + __u32 v_reserved[10]; /* reserved */ + struct unixware_slice + v_slice[UNIXWARE_NUMSLICE]; /* slice headers */ + } vtoc; + +}; /* 408 */ + +#endif /* CONFIG_UNIXWARE_DISKLABEL */ + +#ifdef CONFIG_MINIX_SUBPARTITION +# define MINIX_NR_SUBPARTITIONS 4 +#endif /* CONFIG_MINIX_SUBPARTITION */ + +#ifdef __KERNEL__ + +char *disk_name (struct gendisk *hd, int minor, char *buf); + +/* + * disk_round_stats is used to round off the IO statistics for a disk + * for a complete clock tick. + */ +void disk_round_stats(struct hd_struct *hd); + +/* + * Account for the completion of an IO request (used by drivers which + * bypass the normal end_request processing) + */ +struct request; +void req_finished_io(struct request *); + +#ifdef DEVFS_MUST_DIE +extern void devfs_register_partitions (struct gendisk *dev, int minor, + int unregister); +#endif + + + +/* + * FIXME: this should use genhd->minor_shift, but that is slow to look up. + */ +static inline unsigned int disk_index (kdev_t dev) +{ + int major = MAJOR(dev); + int minor = MINOR(dev); + unsigned int index; + + switch (major) { + case DAC960_MAJOR+0: + index = (minor & 0x00f8) >> 3; + break; + case SCSI_DISK0_MAJOR: + index = (minor & 0x00f0) >> 4; + break; + case IDE0_MAJOR: /* same as HD_MAJOR */ + case XT_DISK_MAJOR: + index = (minor & 0x0040) >> 6; + break; + case IDE1_MAJOR: + index = ((minor & 0x0040) >> 6) + 2; + break; + default: + return 0; + } + return index; +} + +#endif + +#endif diff --git a/xen-2.4.16/include/xeno/hdreg.h b/xen-2.4.16/include/xeno/hdreg.h new file mode 100644 index 0000000000..703b750110 --- /dev/null +++ b/xen-2.4.16/include/xeno/hdreg.h @@ -0,0 +1,662 @@ +#ifndef _LINUX_HDREG_H +#define _LINUX_HDREG_H + +/* + * This file contains some defines for the AT-hd-controller. + * Various sources. + */ + +#define HD_IRQ 14 /* the standard disk interrupt */ + +/* ide.c has its own port definitions in "ide.h" */ + +/* Hd controller regs. Ref: IBM AT Bios-listing */ +#define HD_DATA 0x1f0 /* _CTL when writing */ +#define HD_ERROR 0x1f1 /* see err-bits */ +#define HD_NSECTOR 0x1f2 /* nr of sectors to read/write */ +#define HD_SECTOR 0x1f3 /* starting sector */ +#define HD_LCYL 0x1f4 /* starting cylinder */ +#define HD_HCYL 0x1f5 /* high byte of starting cyl */ +#define HD_CURRENT 0x1f6 /* 101dhhhh , d=drive, hhhh=head */ +#define HD_STATUS 0x1f7 /* see status-bits */ +#define HD_FEATURE HD_ERROR /* same io address, read=error, write=feature */ +#define HD_PRECOMP HD_FEATURE /* obsolete use of this port - predates IDE */ +#define HD_COMMAND HD_STATUS /* same io address, read=status, write=cmd */ + +#define HD_CMD 0x3f6 /* used for resets */ +#define HD_ALTSTATUS 0x3f6 /* same as HD_STATUS but doesn't clear irq */ + +/* remainder is shared between hd.c, ide.c, ide-cd.c, and the hdparm utility */ + +/* Bits of HD_STATUS */ +#define ERR_STAT 0x01 +#define INDEX_STAT 0x02 +#define ECC_STAT 0x04 /* Corrected error */ +#define DRQ_STAT 0x08 +#define SEEK_STAT 0x10 +#define WRERR_STAT 0x20 +#define READY_STAT 0x40 +#define BUSY_STAT 0x80 + +/* Bits for HD_ERROR */ +#define MARK_ERR 0x01 /* Bad address mark */ +#define TRK0_ERR 0x02 /* couldn't find track 0 */ +#define ABRT_ERR 0x04 /* Command aborted */ +#define MCR_ERR 0x08 /* media change request */ +#define ID_ERR 0x10 /* ID field not found */ +#define MC_ERR 0x20 /* media changed */ +#define ECC_ERR 0x40 /* Uncorrectable ECC error */ +#define BBD_ERR 0x80 /* pre-EIDE meaning: block marked bad */ +#define ICRC_ERR 0x80 /* new meaning: CRC error during transfer */ + +/* + * Command Header sizes for IOCTL commands + * HDIO_DRIVE_CMD, HDIO_DRIVE_TASK, and HDIO_DRIVE_TASKFILE + */ + +#if 0 +#include +typedef ide_ioreg_t task_ioreg_t; +#else +typedef unsigned char task_ioreg_t; +#endif + +#define HDIO_DRIVE_CMD_HDR_SIZE 4*sizeof(task_ioreg_t) +#define HDIO_DRIVE_TASK_HDR_SIZE 8*sizeof(task_ioreg_t) +#define HDIO_DRIVE_HOB_HDR_SIZE 8*sizeof(task_ioreg_t) + +#define IDE_DRIVE_TASK_INVALID -1 +#define IDE_DRIVE_TASK_NO_DATA 0 +#define IDE_DRIVE_TASK_SET_XFER 1 + +#define IDE_DRIVE_TASK_IN 2 + +#define IDE_DRIVE_TASK_OUT 3 +#define IDE_DRIVE_TASK_RAW_WRITE 4 + +struct hd_drive_cmd_hdr { + task_ioreg_t command; + task_ioreg_t sector_number; + task_ioreg_t feature; + task_ioreg_t sector_count; +}; + +typedef struct hd_drive_task_hdr { + task_ioreg_t data; + task_ioreg_t feature; + task_ioreg_t sector_count; + task_ioreg_t sector_number; + task_ioreg_t low_cylinder; + task_ioreg_t high_cylinder; + task_ioreg_t device_head; + task_ioreg_t command; +} task_struct_t; + +typedef struct hd_drive_hob_hdr { + task_ioreg_t data; + task_ioreg_t feature; + task_ioreg_t sector_count; + task_ioreg_t sector_number; + task_ioreg_t low_cylinder; + task_ioreg_t high_cylinder; + task_ioreg_t device_head; + task_ioreg_t control; +} hob_struct_t; + +typedef union ide_reg_valid_s { + unsigned all : 16; + struct { + unsigned data : 1; + unsigned error_feature : 1; + unsigned sector : 1; + unsigned nsector : 1; + unsigned lcyl : 1; + unsigned hcyl : 1; + unsigned select : 1; + unsigned status_command : 1; + + unsigned data_hob : 1; + unsigned error_feature_hob : 1; + unsigned sector_hob : 1; + unsigned nsector_hob : 1; + unsigned lcyl_hob : 1; + unsigned hcyl_hob : 1; + unsigned select_hob : 1; + unsigned control_hob : 1; + } b; +} ide_reg_valid_t; + +/* + * Define standard taskfile in/out register + */ +#define IDE_TASKFILE_STD_OUT_FLAGS 0xFE +#define IDE_TASKFILE_STD_IN_FLAGS 0xFE +#define IDE_HOB_STD_OUT_FLAGS 0xC0 +#define IDE_HOB_STD_IN_FLAGS 0xC0 + +typedef struct ide_task_request_s { + task_ioreg_t io_ports[8]; + task_ioreg_t hob_ports[8]; + ide_reg_valid_t out_flags; + ide_reg_valid_t in_flags; + int data_phase; + int req_cmd; + unsigned long out_size; + unsigned long in_size; +} ide_task_request_t; + +typedef struct ide_ioctl_request_s { + ide_task_request_t *task_request; + unsigned char *out_buffer; + unsigned char *in_buffer; +} ide_ioctl_request_t; + +#define TASKFILE_INVALID 0x7fff +#define TASKFILE_48 0x8000 + +#define TASKFILE_NO_DATA 0x0000 + +#define TASKFILE_IN 0x0001 +#define TASKFILE_MULTI_IN 0x0002 + +#define TASKFILE_OUT 0x0004 +#define TASKFILE_MULTI_OUT 0x0008 +#define TASKFILE_IN_OUT 0x0010 + +#define TASKFILE_IN_DMA 0x0020 +#define TASKFILE_OUT_DMA 0x0040 +#define TASKFILE_IN_DMAQ 0x0080 +#define TASKFILE_OUT_DMAQ 0x0100 + +#define TASKFILE_P_IN 0x0200 +#define TASKFILE_P_OUT 0x0400 +#define TASKFILE_P_IN_DMA 0x0800 +#define TASKFILE_P_OUT_DMA 0x1000 +#define TASKFILE_P_IN_DMAQ 0x2000 +#define TASKFILE_P_OUT_DMAQ 0x4000 + +/* ATA/ATAPI Commands pre T13 Spec */ +#define WIN_NOP 0x00 +#define CFA_REQ_EXT_ERROR_CODE 0x03 /* CFA Request Extended Error Code */ +#define WIN_SRST 0x08 /* ATAPI soft reset command */ +#define WIN_DEVICE_RESET 0x08 +#define WIN_RESTORE 0x10 +#define WIN_READ 0x20 /* 28-Bit */ +#define WIN_READ_EXT 0x24 /* 48-Bit */ +#define WIN_READDMA_EXT 0x25 /* 48-Bit */ +#define WIN_READDMA_QUEUED_EXT 0x26 /* 48-Bit */ +#define WIN_READ_NATIVE_MAX_EXT 0x27 /* 48-Bit */ +#define WIN_MULTREAD_EXT 0x29 /* 48-Bit */ +#define WIN_WRITE 0x30 /* 28-Bit */ +#define WIN_WRITE_EXT 0x34 /* 48-Bit */ +#define WIN_WRITEDMA_EXT 0x35 /* 48-Bit */ +#define WIN_WRITEDMA_QUEUED_EXT 0x36 /* 48-Bit */ +#define WIN_SET_MAX_EXT 0x37 /* 48-Bit */ +#define CFA_WRITE_SECT_WO_ERASE 0x38 /* CFA Write Sectors without erase */ +#define WIN_MULTWRITE_EXT 0x39 /* 48-Bit */ +#define WIN_WRITE_VERIFY 0x3C /* 28-Bit */ +#define WIN_VERIFY 0x40 /* 28-Bit - Read Verify Sectors */ +#define WIN_VERIFY_EXT 0x42 /* 48-Bit */ +#define WIN_FORMAT 0x50 +#define WIN_INIT 0x60 +#define WIN_SEEK 0x70 +#define CFA_TRANSLATE_SECTOR 0x87 /* CFA Translate Sector */ +#define WIN_DIAGNOSE 0x90 +#define WIN_SPECIFY 0x91 /* set drive geometry translation */ +#define WIN_DOWNLOAD_MICROCODE 0x92 +#define WIN_STANDBYNOW2 0x94 +#define WIN_SETIDLE2 0x97 +#define WIN_CHECKPOWERMODE2 0x98 +#define WIN_SLEEPNOW2 0x99 +#define WIN_PACKETCMD 0xA0 /* Send a packet command. */ +#define WIN_PIDENTIFY 0xA1 /* identify ATAPI device */ +#define WIN_QUEUED_SERVICE 0xA2 +#define WIN_SMART 0xB0 /* self-monitoring and reporting */ +#define CFA_ERASE_SECTORS 0xC0 +#define WIN_MULTREAD 0xC4 /* read sectors using multiple mode*/ +#define WIN_MULTWRITE 0xC5 /* write sectors using multiple mode */ +#define WIN_SETMULT 0xC6 /* enable/disable multiple mode */ +#define WIN_READDMA_QUEUED 0xC7 /* read sectors using Queued DMA transfers */ +#define WIN_READDMA 0xC8 /* read sectors using DMA transfers */ +#define WIN_WRITEDMA 0xCA /* write sectors using DMA transfers */ +#define WIN_WRITEDMA_QUEUED 0xCC /* write sectors using Queued DMA transfers */ +#define CFA_WRITE_MULTI_WO_ERASE 0xCD /* CFA Write multiple without erase */ +#define WIN_GETMEDIASTATUS 0xDA +#define WIN_DOORLOCK 0xDE /* lock door on removable drives */ +#define WIN_DOORUNLOCK 0xDF /* unlock door on removable drives */ +#define WIN_STANDBYNOW1 0xE0 +#define WIN_IDLEIMMEDIATE 0xE1 /* force drive to become "ready" */ +#define WIN_STANDBY 0xE2 /* Set device in Standby Mode */ +#define WIN_SETIDLE1 0xE3 +#define WIN_READ_BUFFER 0xE4 /* force read only 1 sector */ +#define WIN_CHECKPOWERMODE1 0xE5 +#define WIN_SLEEPNOW1 0xE6 +#define WIN_FLUSH_CACHE 0xE7 +#define WIN_WRITE_BUFFER 0xE8 /* force write only 1 sector */ +#define WIN_FLUSH_CACHE_EXT 0xEA /* 48-Bit */ +#define WIN_IDENTIFY 0xEC /* ask drive to identify itself */ +#define WIN_MEDIAEJECT 0xED +#define WIN_IDENTIFY_DMA 0xEE /* same as WIN_IDENTIFY, but DMA */ +#define WIN_SETFEATURES 0xEF /* set special drive features */ +#define EXABYTE_ENABLE_NEST 0xF0 +#define WIN_SECURITY_SET_PASS 0xF1 +#define WIN_SECURITY_UNLOCK 0xF2 +#define WIN_SECURITY_ERASE_PREPARE 0xF3 +#define WIN_SECURITY_ERASE_UNIT 0xF4 +#define WIN_SECURITY_FREEZE_LOCK 0xF5 +#define WIN_SECURITY_DISABLE 0xF6 +#define WIN_READ_NATIVE_MAX 0xF8 /* return the native maximum address */ +#define WIN_SET_MAX 0xF9 +#define DISABLE_SEAGATE 0xFB + +/* WIN_SMART sub-commands */ + +#define SMART_READ_VALUES 0xD0 +#define SMART_READ_THRESHOLDS 0xD1 +#define SMART_AUTOSAVE 0xD2 +#define SMART_SAVE 0xD3 +#define SMART_IMMEDIATE_OFFLINE 0xD4 +#define SMART_READ_LOG_SECTOR 0xD5 +#define SMART_WRITE_LOG_SECTOR 0xD6 +#define SMART_WRITE_THRESHOLDS 0xD7 +#define SMART_ENABLE 0xD8 +#define SMART_DISABLE 0xD9 +#define SMART_STATUS 0xDA +#define SMART_AUTO_OFFLINE 0xDB + +/* Password used in TF4 & TF5 executing SMART commands */ + +#define SMART_LCYL_PASS 0x4F +#define SMART_HCYL_PASS 0xC2 + +/* WIN_SETFEATURES sub-commands */ + +#define SETFEATURES_EN_WCACHE 0x02 /* Enable write cache */ +#define SETFEATURES_XFER 0x03 /* Set transfer mode */ +# define XFER_UDMA_7 0x47 /* 0100|0111 */ +# define XFER_UDMA_6 0x46 /* 0100|0110 */ +# define XFER_UDMA_5 0x45 /* 0100|0101 */ +# define XFER_UDMA_4 0x44 /* 0100|0100 */ +# define XFER_UDMA_3 0x43 /* 0100|0011 */ +# define XFER_UDMA_2 0x42 /* 0100|0010 */ +# define XFER_UDMA_1 0x41 /* 0100|0001 */ +# define XFER_UDMA_0 0x40 /* 0100|0000 */ +# define XFER_MW_DMA_2 0x22 /* 0010|0010 */ +# define XFER_MW_DMA_1 0x21 /* 0010|0001 */ +# define XFER_MW_DMA_0 0x20 /* 0010|0000 */ +# define XFER_SW_DMA_2 0x12 /* 0001|0010 */ +# define XFER_SW_DMA_1 0x11 /* 0001|0001 */ +# define XFER_SW_DMA_0 0x10 /* 0001|0000 */ +# define XFER_PIO_4 0x0C /* 0000|1100 */ +# define XFER_PIO_3 0x0B /* 0000|1011 */ +# define XFER_PIO_2 0x0A /* 0000|1010 */ +# define XFER_PIO_1 0x09 /* 0000|1001 */ +# define XFER_PIO_0 0x08 /* 0000|1000 */ +# define XFER_PIO_SLOW 0x00 /* 0000|0000 */ +#define SETFEATURES_DIS_DEFECT 0x04 /* Disable Defect Management */ +#define SETFEATURES_EN_APM 0x05 /* Enable advanced power management */ +#define SETFEATURES_DIS_MSN 0x31 /* Disable Media Status Notification */ +#define SETFEATURES_EN_AAM 0x42 /* Enable Automatic Acoustic Management */ +#define SETFEATURES_DIS_RLA 0x55 /* Disable read look-ahead feature */ +#define SETFEATURES_EN_RI 0x5D /* Enable release interrupt */ +#define SETFEATURES_EN_SI 0x5E /* Enable SERVICE interrupt */ +#define SETFEATURES_DIS_RPOD 0x66 /* Disable reverting to power on defaults */ +#define SETFEATURES_DIS_WCACHE 0x82 /* Disable write cache */ +#define SETFEATURES_EN_DEFECT 0x84 /* Enable Defect Management */ +#define SETFEATURES_DIS_APM 0x85 /* Disable advanced power management */ +#define SETFEATURES_EN_MSN 0x95 /* Enable Media Status Notification */ +#define SETFEATURES_EN_RLA 0xAA /* Enable read look-ahead feature */ +#define SETFEATURES_PREFETCH 0xAB /* Sets drive prefetch value */ +#define SETFEATURES_DIS_AAM 0xC2 /* Disable Automatic Acoustic Management */ +#define SETFEATURES_EN_RPOD 0xCC /* Enable reverting to power on defaults */ +#define SETFEATURES_DIS_RI 0xDD /* Disable release interrupt */ +#define SETFEATURES_DIS_SI 0xDE /* Disable SERVICE interrupt */ + +/* WIN_SECURITY sub-commands */ + +#define SECURITY_SET_PASSWORD 0xBA +#define SECURITY_UNLOCK 0xBB +#define SECURITY_ERASE_PREPARE 0xBC +#define SECURITY_ERASE_UNIT 0xBD +#define SECURITY_FREEZE_LOCK 0xBE +#define SECURITY_DISABLE_PASSWORD 0xBF + +struct hd_geometry { + unsigned char heads; + unsigned char sectors; + unsigned short cylinders; + unsigned long start; +}; + +/* BIG GEOMETRY */ +struct hd_big_geometry { + unsigned char heads; + unsigned char sectors; + unsigned int cylinders; + unsigned long start; +}; + +/* hd/ide ctl's that pass (arg) ptrs to user space are numbered 0x030n/0x031n */ +#define HDIO_GETGEO 0x0301 /* get device geometry */ +#define HDIO_GET_UNMASKINTR 0x0302 /* get current unmask setting */ +#define HDIO_GET_MULTCOUNT 0x0304 /* get current IDE blockmode setting */ +#define HDIO_GET_QDMA 0x0305 /* get use-qdma flag */ +#define HDIO_OBSOLETE_IDENTITY 0x0307 /* OBSOLETE, DO NOT USE: returns 142 bytes */ +#define HDIO_GET_KEEPSETTINGS 0x0308 /* get keep-settings-on-reset flag */ +#define HDIO_GET_32BIT 0x0309 /* get current io_32bit setting */ +#define HDIO_GET_NOWERR 0x030a /* get ignore-write-error flag */ +#define HDIO_GET_DMA 0x030b /* get use-dma flag */ +#define HDIO_GET_NICE 0x030c /* get nice flags */ +#define HDIO_GET_IDENTITY 0x030d /* get IDE identification info */ +#define HDIO_GET_WCACHE 0x030e /* get write cache mode on|off */ +#define HDIO_GET_ACOUSTIC 0x030f /* get acoustic value */ +#define HDIO_GET_ADDRESS 0x0310 /* */ + +#define HDIO_GET_BUSSTATE 0x031a /* get the bus state of the hwif */ +#define HDIO_TRISTATE_HWIF 0x031b /* execute a channel tristate */ +#define HDIO_DRIVE_RESET 0x031c /* execute a device reset */ +#define HDIO_DRIVE_TASKFILE 0x031d /* execute raw taskfile */ +#define HDIO_DRIVE_TASK 0x031e /* execute task and special drive command */ +#define HDIO_DRIVE_CMD 0x031f /* execute a special drive command */ + +#define HDIO_DRIVE_CMD_AEB HDIO_DRIVE_TASK + +/* hd/ide ctl's that pass (arg) non-ptr values are numbered 0x032n/0x033n */ +#define HDIO_SET_MULTCOUNT 0x0321 /* change IDE blockmode */ +#define HDIO_SET_UNMASKINTR 0x0322 /* permit other irqs during I/O */ +#define HDIO_SET_KEEPSETTINGS 0x0323 /* keep ioctl settings on reset */ +#define HDIO_SET_32BIT 0x0324 /* change io_32bit flags */ +#define HDIO_SET_NOWERR 0x0325 /* change ignore-write-error flag */ +#define HDIO_SET_DMA 0x0326 /* change use-dma flag */ +#define HDIO_SET_PIO_MODE 0x0327 /* reconfig interface to new speed */ +#define HDIO_SCAN_HWIF 0x0328 /* register and (re)scan interface */ +#define HDIO_SET_NICE 0x0329 /* set nice flags */ +#define HDIO_UNREGISTER_HWIF 0x032a /* unregister interface */ +#define HDIO_SET_WCACHE 0x032b /* change write cache enable-disable */ +#define HDIO_SET_ACOUSTIC 0x032c /* change acoustic behavior */ +#define HDIO_SET_BUSSTATE 0x032d /* set the bus state of the hwif */ +#define HDIO_SET_QDMA 0x032e /* change use-qdma flag */ +#define HDIO_SET_ADDRESS 0x032f /* change lba addressing modes */ + +/* bus states */ +enum { + BUSSTATE_OFF = 0, + BUSSTATE_ON, + BUSSTATE_TRISTATE +}; + +/* hd/ide ctl's that pass (arg) ptrs to user space are numbered 0x033n/0x033n */ +#define HDIO_GETGEO_BIG 0x0330 /* */ +#define HDIO_GETGEO_BIG_RAW 0x0331 /* */ + +#define __NEW_HD_DRIVE_ID +/* structure returned by HDIO_GET_IDENTITY, + * as per ANSI NCITS ATA6 rev.1b spec + */ +struct hd_driveid { + unsigned short config; /* lots of obsolete bit flags */ + unsigned short cyls; /* Obsolete, "physical" cyls */ + unsigned short reserved2; /* reserved (word 2) */ + unsigned short heads; /* Obsolete, "physical" heads */ + unsigned short track_bytes; /* unformatted bytes per track */ + unsigned short sector_bytes; /* unformatted bytes per sector */ + unsigned short sectors; /* Obsolete, "physical" sectors per track */ + unsigned short vendor0; /* vendor unique */ + unsigned short vendor1; /* vendor unique */ + unsigned short vendor2; /* Retired vendor unique */ + unsigned char serial_no[20]; /* 0 = not_specified */ + unsigned short buf_type; /* Retired */ + unsigned short buf_size; /* Retired, 512 byte increments + * 0 = not_specified + */ + unsigned short ecc_bytes; /* for r/w long cmds; 0 = not_specified */ + unsigned char fw_rev[8]; /* 0 = not_specified */ + unsigned char model[40]; /* 0 = not_specified */ + unsigned char max_multsect; /* 0=not_implemented */ + unsigned char vendor3; /* vendor unique */ + unsigned short dword_io; /* 0=not_implemented; 1=implemented */ + unsigned char vendor4; /* vendor unique */ + unsigned char capability; /* (upper byte of word 49) + * 3: IORDYsup + * 2: IORDYsw + * 1: LBA + * 0: DMA + */ + unsigned short reserved50; /* reserved (word 50) */ + unsigned char vendor5; /* Obsolete, vendor unique */ + unsigned char tPIO; /* Obsolete, 0=slow, 1=medium, 2=fast */ + unsigned char vendor6; /* Obsolete, vendor unique */ + unsigned char tDMA; /* Obsolete, 0=slow, 1=medium, 2=fast */ + unsigned short field_valid; /* (word 53) + * 2: ultra_ok word 88 + * 1: eide_ok words 64-70 + * 0: cur_ok words 54-58 + */ + unsigned short cur_cyls; /* Obsolete, logical cylinders */ + unsigned short cur_heads; /* Obsolete, l heads */ + unsigned short cur_sectors; /* Obsolete, l sectors per track */ + unsigned short cur_capacity0; /* Obsolete, l total sectors on drive */ + unsigned short cur_capacity1; /* Obsolete, (2 words, misaligned int) */ + unsigned char multsect; /* current multiple sector count */ + unsigned char multsect_valid; /* when (bit0==1) multsect is ok */ + unsigned int lba_capacity; /* Obsolete, total number of sectors */ + unsigned short dma_1word; /* Obsolete, single-word dma info */ + unsigned short dma_mword; /* multiple-word dma info */ + unsigned short eide_pio_modes; /* bits 0:mode3 1:mode4 */ + unsigned short eide_dma_min; /* min mword dma cycle time (ns) */ + unsigned short eide_dma_time; /* recommended mword dma cycle time (ns) */ + unsigned short eide_pio; /* min cycle time (ns), no IORDY */ + unsigned short eide_pio_iordy; /* min cycle time (ns), with IORDY */ + unsigned short words69_70[2]; /* reserved words 69-70 + * future command overlap and queuing + */ + /* HDIO_GET_IDENTITY currently returns only words 0 through 70 */ + unsigned short words71_74[4]; /* reserved words 71-74 + * for IDENTIFY PACKET DEVICE command + */ + unsigned short queue_depth; /* (word 75) + * 15:5 reserved + * 4:0 Maximum queue depth -1 + */ + unsigned short words76_79[4]; /* reserved words 76-79 */ + unsigned short major_rev_num; /* (word 80) */ + unsigned short minor_rev_num; /* (word 81) */ + unsigned short command_set_1; /* (word 82) supported + * 15: Obsolete + * 14: NOP command + * 13: READ_BUFFER + * 12: WRITE_BUFFER + * 11: Obsolete + * 10: Host Protected Area + * 9: DEVICE Reset + * 8: SERVICE Interrupt + * 7: Release Interrupt + * 6: look-ahead + * 5: write cache + * 4: PACKET Command + * 3: Power Management Feature Set + * 2: Removable Feature Set + * 1: Security Feature Set + * 0: SMART Feature Set + */ + unsigned short command_set_2; /* (word 83) + * 15: Shall be ZERO + * 14: Shall be ONE + * 13: FLUSH CACHE EXT + * 12: FLUSH CACHE + * 11: Device Configuration Overlay + * 10: 48-bit Address Feature Set + * 9: Automatic Acoustic Management + * 8: SET MAX security + * 7: reserved 1407DT PARTIES + * 6: SetF sub-command Power-Up + * 5: Power-Up in Standby Feature Set + * 4: Removable Media Notification + * 3: APM Feature Set + * 2: CFA Feature Set + * 1: READ/WRITE DMA QUEUED + * 0: Download MicroCode + */ + unsigned short cfsse; /* (word 84) + * cmd set-feature supported extensions + * 15: Shall be ZERO + * 14: Shall be ONE + * 13:3 reserved + * 2: Media Serial Number Valid + * 1: SMART selt-test supported + * 0: SMART error logging + */ + unsigned short cfs_enable_1; /* (word 85) + * command set-feature enabled + * 15: Obsolete + * 14: NOP command + * 13: READ_BUFFER + * 12: WRITE_BUFFER + * 11: Obsolete + * 10: Host Protected Area + * 9: DEVICE Reset + * 8: SERVICE Interrupt + * 7: Release Interrupt + * 6: look-ahead + * 5: write cache + * 4: PACKET Command + * 3: Power Management Feature Set + * 2: Removable Feature Set + * 1: Security Feature Set + * 0: SMART Feature Set + */ + unsigned short cfs_enable_2; /* (word 86) + * command set-feature enabled + * 15: Shall be ZERO + * 14: Shall be ONE + * 13: FLUSH CACHE EXT + * 12: FLUSH CACHE + * 11: Device Configuration Overlay + * 10: 48-bit Address Feature Set + * 9: Automatic Acoustic Management + * 8: SET MAX security + * 7: reserved 1407DT PARTIES + * 6: SetF sub-command Power-Up + * 5: Power-Up in Standby Feature Set + * 4: Removable Media Notification + * 3: APM Feature Set + * 2: CFA Feature Set + * 1: READ/WRITE DMA QUEUED + * 0: Download MicroCode + */ + unsigned short csf_default; /* (word 87) + * command set-feature default + * 15: Shall be ZERO + * 14: Shall be ONE + * 13:3 reserved + * 2: Media Serial Number Valid + * 1: SMART selt-test supported + * 0: SMART error logging + */ + unsigned short dma_ultra; /* (word 88) */ + unsigned short word89; /* reserved (word 89) */ + unsigned short word90; /* reserved (word 90) */ + unsigned short CurAPMvalues; /* current APM values */ + unsigned short word92; /* reserved (word 92) */ + unsigned short hw_config; /* hardware config (word 93) + * 15: + * 14: + * 13: + * 12: + * 11: + * 10: + * 9: + * 8: + * 7: + * 6: + * 5: + * 4: + * 3: + * 2: + * 1: + * 0: + */ + unsigned short acoustic; /* (word 94) + * 15:8 Vendor's recommended value + * 7:0 current value + */ + unsigned short words95_99[5]; /* reserved words 95-99 */ +#if 0 + unsigned short words100_103[4] ;/* reserved words 100-103 */ +#else + unsigned long long lba_capacity_2;/* 48-bit total number of sectors */ +#endif + unsigned short words104_125[22];/* reserved words 104-125 */ + unsigned short last_lun; /* (word 126) */ + unsigned short word127; /* (word 127) Feature Set + * Removable Media Notification + * 15:2 reserved + * 1:0 00 = not supported + * 01 = supported + * 10 = reserved + * 11 = reserved + */ + unsigned short dlf; /* (word 128) + * device lock function + * 15:9 reserved + * 8 security level 1:max 0:high + * 7:6 reserved + * 5 enhanced erase + * 4 expire + * 3 frozen + * 2 locked + * 1 en/disabled + * 0 capability + */ + unsigned short csfo; /* (word 129) + * current set features options + * 15:4 reserved + * 3: auto reassign + * 2: reverting + * 1: read-look-ahead + * 0: write cache + */ + unsigned short words130_155[26];/* reserved vendor words 130-155 */ + unsigned short word156; /* reserved vendor word 156 */ + unsigned short words157_159[3];/* reserved vendor words 157-159 */ + unsigned short cfa_power; /* (word 160) CFA Power Mode + * 15 word 160 supported + * 14 reserved + * 13 + * 12 + * 11:0 + */ + unsigned short words161_175[14];/* Reserved for CFA */ + unsigned short words176_205[31];/* Current Media Serial Number */ + unsigned short words206_254[48];/* reserved words 206-254 */ + unsigned short integrity_word; /* (word 255) + * 15:8 Checksum + * 7:0 Signature + */ +}; + +/* + * IDE "nice" flags. These are used on a per drive basis to determine + * when to be nice and give more bandwidth to the other devices which + * share the same IDE bus. + */ +#define IDE_NICE_DSC_OVERLAP (0) /* per the DSC overlap protocol */ +#define IDE_NICE_ATAPI_OVERLAP (1) /* not supported yet */ +#define IDE_NICE_0 (2) /* when sure that it won't affect us */ +#define IDE_NICE_1 (3) /* when probably won't affect us much */ +#define IDE_NICE_2 (4) /* when we know it's on our expense */ + +#ifdef __KERNEL__ +/* + * These routines are used for kernel command line parameters from main.c: + */ +#include + +#if defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_IDE_MODULE) +int ide_register(int io_port, int ctl_port, int irq); +void ide_unregister(unsigned int); +#endif /* CONFIG_BLK_DEV_IDE || CONFIG_BLK_DEV_IDE_MODULE */ + +#endif /* __KERNEL__ */ + +#endif /* _LINUX_HDREG_H */ diff --git a/xen-2.4.16/include/xeno/hdsmart.h b/xen-2.4.16/include/xeno/hdsmart.h new file mode 100644 index 0000000000..7974a47fe5 --- /dev/null +++ b/xen-2.4.16/include/xeno/hdsmart.h @@ -0,0 +1,124 @@ +/* + * linux/include/linux/hdsmart.h + * + * Copyright (C) 1999-2000 Michael Cornwell + * Copyright (C) 2000 Andre Hedrick + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * You should have received a copy of the GNU General Public License + * (for example /usr/src/linux/COPYING); if not, write to the Free + * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifndef _LINUX_HDSMART_H +#define _LINUX_HDSMART_H + +#define OFFLINE_FULL_SCAN 0 +#define SHORT_SELF_TEST 1 +#define EXTEND_SELF_TEST 2 +#define SHORT_CAPTIVE_SELF_TEST 129 +#define EXTEND_CAPTIVE_SELF_TEST 130 + +/* smart_attribute is the vendor specific in SFF-8035 spec */ +typedef struct ata_smart_attribute_s { + unsigned char id; + unsigned short status_flag; + unsigned char normalized; + unsigned char worse_normal; + unsigned char raw[6]; + unsigned char reserv; +} __attribute__ ((packed)) ata_smart_attribute_t; + +/* smart_values is format of the read drive Atrribute command */ +typedef struct ata_smart_values_s { + unsigned short revnumber; + ata_smart_attribute_t vendor_attributes [30]; + unsigned char offline_data_collection_status; + unsigned char self_test_exec_status; + unsigned short total_time_to_complete_off_line; + unsigned char vendor_specific_366; + unsigned char offline_data_collection_capability; + unsigned short smart_capability; + unsigned char errorlog_capability; + unsigned char vendor_specific_371; + unsigned char short_test_completion_time; + unsigned char extend_test_completion_time; + unsigned char reserved_374_385 [12]; + unsigned char vendor_specific_386_509 [125]; + unsigned char chksum; +} __attribute__ ((packed)) ata_smart_values_t; + +/* Smart Threshold data structures */ +/* Vendor attribute of SMART Threshold */ +typedef struct ata_smart_threshold_entry_s { + unsigned char id; + unsigned char normalized_threshold; + unsigned char reserved[10]; +} __attribute__ ((packed)) ata_smart_threshold_entry_t; + +/* Format of Read SMART THreshold Command */ +typedef struct ata_smart_thresholds_s { + unsigned short revnumber; + ata_smart_threshold_entry_t thres_entries[30]; + unsigned char reserved[149]; + unsigned char chksum; +} __attribute__ ((packed)) ata_smart_thresholds_t; + +typedef struct ata_smart_errorlog_command_struct_s { + unsigned char devicecontrolreg; + unsigned char featuresreg; + unsigned char sector_count; + unsigned char sector_number; + unsigned char cylinder_low; + unsigned char cylinder_high; + unsigned char drive_head; + unsigned char commandreg; + unsigned int timestamp; +} __attribute__ ((packed)) ata_smart_errorlog_command_struct_t; + +typedef struct ata_smart_errorlog_error_struct_s { + unsigned char error_condition; + unsigned char extended_error[14]; + unsigned char state; + unsigned short timestamp; +} __attribute__ ((packed)) ata_smart_errorlog_error_struct_t; + +typedef struct ata_smart_errorlog_struct_s { + ata_smart_errorlog_command_struct_t commands[6]; + ata_smart_errorlog_error_struct_t error_struct; +} __attribute__ ((packed)) ata_smart_errorlog_struct_t; + +typedef struct ata_smart_errorlog_s { + unsigned char revnumber; + unsigned char error_log_pointer; + ata_smart_errorlog_struct_t errorlog_struct[5]; + unsigned short ata_error_count; + unsigned short non_fatal_count; + unsigned short drive_timeout_count; + unsigned char reserved[53]; + unsigned char chksum; +} __attribute__ ((packed)) ata_smart_errorlog_t; + +typedef struct ata_smart_selftestlog_struct_s { + unsigned char selftestnumber; + unsigned char selfteststatus; + unsigned short timestamp; + unsigned char selftestfailurecheckpoint; + unsigned int lbafirstfailure; + unsigned char vendorspecific[15]; +} __attribute__ ((packed)) ata_smart_selftestlog_struct_t; + +typedef struct ata_smart_selftestlog_s { + unsigned short revnumber; + ata_smart_selftestlog_struct_t selftest_struct[21]; + unsigned char vendorspecific[2]; + unsigned char mostrecenttest; + unsigned char resevered[2]; + unsigned char chksum; +} __attribute__ ((packed)) ata_smart_selftestlog_t; + +#endif /* _LINUX_HDSMART_H */ diff --git a/xen-2.4.16/include/xeno/ide.h b/xen-2.4.16/include/xeno/ide.h new file mode 100644 index 0000000000..dacfd89842 --- /dev/null +++ b/xen-2.4.16/include/xeno/ide.h @@ -0,0 +1,1105 @@ +#ifndef _IDE_H +#define _IDE_H +/* + * linux/include/linux/ide.h + * + * Copyright (C) 1994-1998 Linus Torvalds & authors + */ + +#include +#include +#include +#include +#include +#include +#include +/*#include */ +/*#include */ +#include + +/* + * This is the multiple IDE interface driver, as evolved from hd.c. + * It supports up to four IDE interfaces, on one or more IRQs (usually 14 & 15). + * There can be up to two drives per interface, as per the ATA-2 spec. + * + * Primary i/f: ide0: major=3; (hda) minor=0; (hdb) minor=64 + * Secondary i/f: ide1: major=22; (hdc or hd1a) minor=0; (hdd or hd1b) minor=64 + * Tertiary i/f: ide2: major=33; (hde) minor=0; (hdf) minor=64 + * Quaternary i/f: ide3: major=34; (hdg) minor=0; (hdh) minor=64 + */ + +/****************************************************************************** + * IDE driver configuration options (play with these as desired): + * + * REALLY_SLOW_IO can be defined in ide.c and ide-cd.c, if necessary + */ +#undef REALLY_FAST_IO /* define if ide ports are perfect */ +#define INITIAL_MULT_COUNT 0 /* off=0; on=2,4,8,16,32, etc.. */ + +#ifndef SUPPORT_SLOW_DATA_PORTS /* 1 to support slow data ports */ +#define SUPPORT_SLOW_DATA_PORTS 1 /* 0 to reduce kernel size */ +#endif +#ifndef SUPPORT_VLB_SYNC /* 1 to support weird 32-bit chips */ +#define SUPPORT_VLB_SYNC 1 /* 0 to reduce kernel size */ +#endif +#ifndef DISK_RECOVERY_TIME /* off=0; on=access_delay_time */ +#define DISK_RECOVERY_TIME 0 /* for hardware that needs it */ +#endif +#ifndef OK_TO_RESET_CONTROLLER /* 1 needed for good error recovery */ +#define OK_TO_RESET_CONTROLLER 1 /* 0 for use with AH2372A/B interface */ +#endif +#ifndef FANCY_STATUS_DUMPS /* 1 for human-readable drive errors */ +#define FANCY_STATUS_DUMPS 1 /* 0 to reduce kernel size */ +#endif + +#ifdef CONFIG_BLK_DEV_CMD640 +#if 0 /* change to 1 when debugging cmd640 problems */ +void cmd640_dump_regs (void); +#define CMD640_DUMP_REGS cmd640_dump_regs() /* for debugging cmd640 chipset */ +#endif +#endif /* CONFIG_BLK_DEV_CMD640 */ + +#ifndef DISABLE_IRQ_NOSYNC +#define DISABLE_IRQ_NOSYNC 0 +#endif + +/* + * IDE_DRIVE_CMD is used to implement many features of the hdparm utility + */ +#define IDE_DRIVE_CMD 99 /* (magic) undef to reduce kernel size*/ + +#define IDE_DRIVE_TASK 98 + +/* + * IDE_DRIVE_TASKFILE is used to implement many features needed for raw tasks + */ +#define IDE_DRIVE_TASKFILE 97 + +/* + * "No user-serviceable parts" beyond this point :) + *****************************************************************************/ + +typedef unsigned char byte; /* used everywhere */ + +/* + * Probably not wise to fiddle with these + */ +#define ERROR_MAX 8 /* Max read/write errors per sector */ +#define ERROR_RESET 3 /* Reset controller every 4th retry */ +#define ERROR_RECAL 1 /* Recalibrate every 2nd retry */ + +/* + * state flags + */ +#define DMA_PIO_RETRY 1 /* retrying in PIO */ + +/* + * Ensure that various configuration flags have compatible settings + */ +#ifdef REALLY_SLOW_IO +#undef REALLY_FAST_IO +#endif + +#define HWIF(drive) ((ide_hwif_t *)((drive)->hwif)) +#define HWGROUP(drive) ((ide_hwgroup_t *)(HWIF(drive)->hwgroup)) + +/* + * Definitions for accessing IDE controller registers + */ +#define IDE_NR_PORTS (10) + +#define IDE_DATA_OFFSET (0) +#define IDE_ERROR_OFFSET (1) +#define IDE_NSECTOR_OFFSET (2) +#define IDE_SECTOR_OFFSET (3) +#define IDE_LCYL_OFFSET (4) +#define IDE_HCYL_OFFSET (5) +#define IDE_SELECT_OFFSET (6) +#define IDE_STATUS_OFFSET (7) +#define IDE_CONTROL_OFFSET (8) +#define IDE_IRQ_OFFSET (9) + +#define IDE_FEATURE_OFFSET IDE_ERROR_OFFSET +#define IDE_COMMAND_OFFSET IDE_STATUS_OFFSET + +#define IDE_DATA_OFFSET_HOB (0) +#define IDE_ERROR_OFFSET_HOB (1) +#define IDE_NSECTOR_OFFSET_HOB (2) +#define IDE_SECTOR_OFFSET_HOB (3) +#define IDE_LCYL_OFFSET_HOB (4) +#define IDE_HCYL_OFFSET_HOB (5) +#define IDE_SELECT_OFFSET_HOB (6) +#define IDE_CONTROL_OFFSET_HOB (7) + +#define IDE_FEATURE_OFFSET_HOB IDE_ERROR_OFFSET_HOB + +#define IDE_DATA_REG (HWIF(drive)->io_ports[IDE_DATA_OFFSET]) +#define IDE_ERROR_REG (HWIF(drive)->io_ports[IDE_ERROR_OFFSET]) +#define IDE_NSECTOR_REG (HWIF(drive)->io_ports[IDE_NSECTOR_OFFSET]) +#define IDE_SECTOR_REG (HWIF(drive)->io_ports[IDE_SECTOR_OFFSET]) +#define IDE_LCYL_REG (HWIF(drive)->io_ports[IDE_LCYL_OFFSET]) +#define IDE_HCYL_REG (HWIF(drive)->io_ports[IDE_HCYL_OFFSET]) +#define IDE_SELECT_REG (HWIF(drive)->io_ports[IDE_SELECT_OFFSET]) +#define IDE_STATUS_REG (HWIF(drive)->io_ports[IDE_STATUS_OFFSET]) +#define IDE_CONTROL_REG (HWIF(drive)->io_ports[IDE_CONTROL_OFFSET]) +#define IDE_IRQ_REG (HWIF(drive)->io_ports[IDE_IRQ_OFFSET]) + +#define IDE_DATA_REG_HOB (HWIF(drive)->io_ports[IDE_DATA_OFFSET]) +#define IDE_ERROR_REG_HOB (HWIF(drive)->io_ports[IDE_ERROR_OFFSET]) +#define IDE_NSECTOR_REG_HOB (HWIF(drive)->io_ports[IDE_NSECTOR_OFFSET]) +#define IDE_SECTOR_REG_HOB (HWIF(drive)->io_ports[IDE_SECTOR_OFFSET]) +#define IDE_LCYL_REG_HOB (HWIF(drive)->io_ports[IDE_LCYL_OFFSET]) +#define IDE_HCYL_REG_HOB (HWIF(drive)->io_ports[IDE_HCYL_OFFSET]) +#define IDE_SELECT_REG_HOB (HWIF(drive)->io_ports[IDE_SELECT_OFFSET]) +#define IDE_STATUS_REG_HOB (HWIF(drive)->io_ports[IDE_STATUS_OFFSET]) +#define IDE_CONTROL_REG_HOB (HWIF(drive)->io_ports[IDE_CONTROL_OFFSET]) + +#define IDE_FEATURE_REG IDE_ERROR_REG +#define IDE_COMMAND_REG IDE_STATUS_REG +#define IDE_ALTSTATUS_REG IDE_CONTROL_REG +#define IDE_IREASON_REG IDE_NSECTOR_REG +#define IDE_BCOUNTL_REG IDE_LCYL_REG +#define IDE_BCOUNTH_REG IDE_HCYL_REG + +#define GET_ERR() IN_BYTE(IDE_ERROR_REG) +#define GET_STAT() IN_BYTE(IDE_STATUS_REG) +#define GET_ALTSTAT() IN_BYTE(IDE_CONTROL_REG) +#define OK_STAT(stat,good,bad) (((stat)&((good)|(bad)))==(good)) +#define BAD_R_STAT (BUSY_STAT | ERR_STAT) +#define BAD_W_STAT (BAD_R_STAT | WRERR_STAT) +#define BAD_STAT (BAD_R_STAT | DRQ_STAT) +#define DRIVE_READY (READY_STAT | SEEK_STAT) +#define DATA_READY (DRQ_STAT) + +/* + * Some more useful definitions + */ +#define IDE_MAJOR_NAME "hd" /* the same for all i/f; see also genhd.c */ +#define MAJOR_NAME IDE_MAJOR_NAME +#define PARTN_BITS 6 /* number of minor dev bits for partitions */ +#define PARTN_MASK ((1< (b2) + (t)) || ((b2) > (b1) + (t))) +#define IDE_MIN(a,b) ((a)<(b) ? (a):(b)) +#define IDE_MAX(a,b) ((a)>(b) ? (a):(b)) + +#ifndef SPLIT_WORD +# define SPLIT_WORD(W,HB,LB) ((HB)=(W>>8), (LB)=(W-((W>>8)<<8))) +#endif +#ifndef MAKE_WORD +# define MAKE_WORD(W,HB,LB) ((W)=((HB<<8)+LB)) +#endif + + +/* + * Timeouts for various operations: + */ +#define WAIT_DRQ (5*HZ/100) /* 50msec - spec allows up to 20ms */ +#if defined(CONFIG_APM) || defined(CONFIG_APM_MODULE) +#define WAIT_READY (5*HZ) /* 5sec - some laptops are very slow */ +#else +#define WAIT_READY (3*HZ/100) /* 30msec - should be instantaneous */ +#endif /* CONFIG_APM || CONFIG_APM_MODULE */ +#define WAIT_PIDENTIFY (10*HZ) /* 10sec - should be less than 3ms (?), if all ATAPI CD is closed at boot */ +#define WAIT_WORSTCASE (30*HZ) /* 30sec - worst case when spinning up */ +#define WAIT_CMD (10*HZ) /* 10sec - maximum wait for an IRQ to happen */ +#define WAIT_MIN_SLEEP (2*HZ/100) /* 20msec - minimum sleep time */ + +#define SELECT_DRIVE(hwif,drive) \ +{ \ + if (hwif->selectproc) \ + hwif->selectproc(drive); \ + OUT_BYTE((drive)->select.all, hwif->io_ports[IDE_SELECT_OFFSET]); \ +} + +#define SELECT_INTERRUPT(hwif,drive) \ +{ \ + if (hwif->intrproc) \ + hwif->intrproc(drive); \ + else \ + OUT_BYTE((drive)->ctl|2, hwif->io_ports[IDE_CONTROL_OFFSET]); \ +} + +#define SELECT_MASK(hwif,drive,mask) \ +{ \ + if (hwif->maskproc) \ + hwif->maskproc(drive,mask); \ +} + +#define SELECT_READ_WRITE(hwif,drive,func) \ +{ \ + if (hwif->rwproc) \ + hwif->rwproc(drive,func); \ +} + +#define QUIRK_LIST(hwif,drive) \ +{ \ + if (hwif->quirkproc) \ + (drive)->quirk_list = hwif->quirkproc(drive); \ +} + +#define HOST(hwif,chipset) \ +{ \ + return ((hwif)->chipset == chipset) ? 1 : 0; \ +} + +#define IDE_DEBUG(lineno) \ + printk("%s,%s,line=%d\n", __FILE__, __FUNCTION__, (lineno)) + +/* + * Check for an interrupt and acknowledge the interrupt status + */ +struct hwif_s; +typedef int (ide_ack_intr_t)(struct hwif_s *); + +#ifndef NO_DMA +#define NO_DMA 255 +#endif + +/* + * hwif_chipset_t is used to keep track of the specific hardware + * chipset used by each IDE interface, if known. + */ +typedef enum { ide_unknown, ide_generic, ide_pci, + ide_cmd640, ide_dtc2278, ide_ali14xx, + ide_qd65xx, ide_umc8672, ide_ht6560b, + ide_pdc4030, ide_rz1000, ide_trm290, + ide_cmd646, ide_cy82c693, ide_4drives, + ide_pmac, ide_etrax100 +} hwif_chipset_t; + +/* + * Structure to hold all information about the location of this port + */ +typedef struct hw_regs_s { + ide_ioreg_t io_ports[IDE_NR_PORTS]; /* task file registers */ + int irq; /* our irq number */ + int dma; /* our dma entry */ + ide_ack_intr_t *ack_intr; /* acknowledge interrupt */ + void *priv; /* interface specific data */ + hwif_chipset_t chipset; +} hw_regs_t; + +/* + * Register new hardware with ide + */ +int ide_register_hw(hw_regs_t *hw, struct hwif_s **hwifp); + +/* + * Set up hw_regs_t structure before calling ide_register_hw (optional) + */ +void ide_setup_ports( hw_regs_t *hw, + ide_ioreg_t base, + int *offsets, + ide_ioreg_t ctrl, + ide_ioreg_t intr, + ide_ack_intr_t *ack_intr, + int irq); + +#include + +/* + * If the arch-dependant ide.h did not declare/define any OUT_BYTE + * or IN_BYTE functions, we make some defaults here. + */ + +#ifndef HAVE_ARCH_OUT_BYTE +#ifdef REALLY_FAST_IO +#define OUT_BYTE(b,p) outb((b),(p)) +#define OUT_WORD(w,p) outw((w),(p)) +#else +#define OUT_BYTE(b,p) outb_p((b),(p)) +#define OUT_WORD(w,p) outw_p((w),(p)) +#endif +#endif + +#ifndef HAVE_ARCH_IN_BYTE +#ifdef REALLY_FAST_IO +#define IN_BYTE(p) (byte)inb(p) +#define IN_WORD(p) (short)inw(p) +#else +#define IN_BYTE(p) (byte)inb_p(p) +#define IN_WORD(p) (short)inw_p(p) +#endif +#endif + +/* + * Now for the data we need to maintain per-drive: ide_drive_t + */ + +#define ide_scsi 0x21 +#define ide_disk 0x20 +#define ide_optical 0x7 +#define ide_cdrom 0x5 +#define ide_tape 0x1 +#define ide_floppy 0x0 + +typedef union { + unsigned all : 8; /* all of the bits together */ + struct { + unsigned set_geometry : 1; /* respecify drive geometry */ + unsigned recalibrate : 1; /* seek to cyl 0 */ + unsigned set_multmode : 1; /* set multmode count */ + unsigned set_tune : 1; /* tune interface for drive */ + unsigned reserved : 4; /* unused */ + } b; +} special_t; + +typedef struct ide_drive_s { + request_queue_t queue; /* request queue */ + struct ide_drive_s *next; /* circular list of hwgroup drives */ + unsigned long sleep; /* sleep until this time */ + unsigned long service_start; /* time we started last request */ + unsigned long service_time; /* service time of last request */ + unsigned long timeout; /* max time to wait for irq */ + special_t special; /* special action flags */ + byte keep_settings; /* restore settings after drive reset */ + byte using_dma; /* disk is using dma for read/write */ + byte retry_pio; /* retrying dma capable host in pio */ + byte state; /* retry state */ + byte waiting_for_dma; /* dma currently in progress */ + byte unmask; /* flag: okay to unmask other irqs */ + byte slow; /* flag: slow data port */ + byte bswap; /* flag: byte swap data */ + byte dsc_overlap; /* flag: DSC overlap */ + byte nice1; /* flag: give potential excess bandwidth */ + unsigned present : 1; /* drive is physically present */ + unsigned noprobe : 1; /* from: hdx=noprobe */ + unsigned busy : 1; /* currently doing revalidate_disk() */ + unsigned removable : 1; /* 1 if need to do check_media_change */ + unsigned forced_geom : 1; /* 1 if hdx=c,h,s was given at boot */ + unsigned no_unmask : 1; /* disallow setting unmask bit */ + unsigned no_io_32bit : 1; /* disallow enabling 32bit I/O */ + unsigned nobios : 1; /* flag: do not probe bios for drive */ + unsigned revalidate : 1; /* request revalidation */ + unsigned atapi_overlap : 1; /* flag: ATAPI overlap (not supported) */ + unsigned nice0 : 1; /* flag: give obvious excess bandwidth */ + unsigned nice2 : 1; /* flag: give a share in our own bandwidth */ + unsigned doorlocking : 1; /* flag: for removable only: door lock/unlock works */ + unsigned autotune : 2; /* 1=autotune, 2=noautotune, 0=default */ + unsigned remap_0_to_1 : 2; /* 0=remap if ezdrive, 1=remap, 2=noremap */ + unsigned ata_flash : 1; /* 1=present, 0=default */ + unsigned addressing; /* : 2; 0=28-bit, 1=48-bit, 2=64-bit */ + byte scsi; /* 0=default, 1=skip current ide-subdriver for ide-scsi emulation */ + byte media; /* disk, cdrom, tape, floppy, ... */ + select_t select; /* basic drive/head select reg value */ + byte ctl; /* "normal" value for IDE_CONTROL_REG */ + byte ready_stat; /* min status value for drive ready */ + byte mult_count; /* current multiple sector setting */ + byte mult_req; /* requested multiple sector setting */ + byte tune_req; /* requested drive tuning setting */ + byte io_32bit; /* 0=16-bit, 1=32-bit, 2/3=32bit+sync */ + byte bad_wstat; /* used for ignoring WRERR_STAT */ + byte nowerr; /* used for ignoring WRERR_STAT */ + byte sect0; /* offset of first sector for DM6:DDO */ + unsigned int usage; /* current "open()" count for drive */ + byte head; /* "real" number of heads */ + byte sect; /* "real" sectors per track */ + byte bios_head; /* BIOS/fdisk/LILO number of heads */ + byte bios_sect; /* BIOS/fdisk/LILO sectors per track */ + unsigned int bios_cyl; /* BIOS/fdisk/LILO number of cyls */ + unsigned int cyl; /* "real" number of cyls */ + unsigned long capacity; /* total number of sectors */ + unsigned long long capacity48; /* total number of sectors */ + unsigned int drive_data; /* for use by tuneproc/selectproc as needed */ + void *hwif; /* actually (ide_hwif_t *) */ + /*wait_queue_head_t wqueue;*/ /* used to wait for drive in open() */ + struct hd_driveid *id; /* drive model identification info */ + struct hd_struct *part; /* drive partition table */ + char name[4]; /* drive name, such as "hda" */ + void *driver; /* (ide_driver_t *) */ + void *driver_data; /* extra driver data */ + /*devfs_handle_t de; */ /* directory for device */ + struct proc_dir_entry *proc; /* /proc/ide/ directory entry */ + void *settings; /* /proc/ide/ drive settings */ + char driver_req[10]; /* requests specific driver */ + int last_lun; /* last logical unit */ + int forced_lun; /* if hdxlun was given at boot */ + int lun; /* logical unit */ + int crc_count; /* crc counter to reduce drive speed */ + byte quirk_list; /* drive is considered quirky if set for a specific host */ + byte suspend_reset; /* drive suspend mode flag, soft-reset recovers */ + byte init_speed; /* transfer rate set at boot */ + byte current_speed; /* current transfer rate set */ + byte dn; /* now wide spread use */ + byte wcache; /* status of write cache */ + byte acoustic; /* acoustic management */ + unsigned int failures; /* current failure count */ + unsigned int max_failures; /* maximum allowed failure count */ +} ide_drive_t; + +/* + * An ide_dmaproc_t() initiates/aborts DMA read/write operations on a drive. + * + * The caller is assumed to have selected the drive and programmed the drive's + * sector address using CHS or LBA. All that remains is to prepare for DMA + * and then issue the actual read/write DMA/PIO command to the drive. + * + * Returns 0 if all went well. + * Returns 1 if DMA read/write could not be started, in which case the caller + * should either try again later, or revert to PIO for the current request. + */ +typedef enum { ide_dma_read, ide_dma_write, ide_dma_begin, + ide_dma_end, ide_dma_check, ide_dma_on, + ide_dma_off, ide_dma_off_quietly, ide_dma_test_irq, + ide_dma_bad_drive, ide_dma_good_drive, + ide_dma_verbose, ide_dma_retune, + ide_dma_lostirq, ide_dma_timeout +} ide_dma_action_t; + +typedef int (ide_dmaproc_t)(ide_dma_action_t, ide_drive_t *); + +/* + * An ide_ideproc_t() performs CPU-polled transfers to/from a drive. + * Arguments are: the drive, the buffer pointer, and the length (in bytes or + * words depending on if it's an IDE or ATAPI call). + * + * If it is not defined for a controller, standard-code is used from ide.c. + * + * Controllers which are not memory-mapped in the standard way need to + * override that mechanism using this function to work. + * + */ +typedef enum { ideproc_ide_input_data, ideproc_ide_output_data, + ideproc_atapi_input_bytes, ideproc_atapi_output_bytes +} ide_ide_action_t; + +typedef void (ide_ideproc_t)(ide_ide_action_t, ide_drive_t *, void *, unsigned int); + +/* + * An ide_tuneproc_t() is used to set the speed of an IDE interface + * to a particular PIO mode. The "byte" parameter is used + * to select the PIO mode by number (0,1,2,3,4,5), and a value of 255 + * indicates that the interface driver should "auto-tune" the PIO mode + * according to the drive capabilities in drive->id; + * + * Not all interface types support tuning, and not all of those + * support all possible PIO settings. They may silently ignore + * or round values as they see fit. + */ +typedef void (ide_tuneproc_t) (ide_drive_t *, byte); +typedef int (ide_speedproc_t) (ide_drive_t *, byte); + +/* + * This is used to provide support for strange interfaces + */ +typedef void (ide_selectproc_t) (ide_drive_t *); +typedef void (ide_resetproc_t) (ide_drive_t *); +typedef int (ide_quirkproc_t) (ide_drive_t *); +typedef void (ide_intrproc_t) (ide_drive_t *); +typedef void (ide_maskproc_t) (ide_drive_t *, int); +typedef void (ide_rw_proc_t) (ide_drive_t *, ide_dma_action_t); + +/* + * ide soft-power support + */ +typedef int (ide_busproc_t) (ide_drive_t *, int); + +#define IDE_CHIPSET_PCI_MASK \ + ((1<> (c)) & 1) + +#ifdef CONFIG_BLK_DEV_IDEPCI +typedef struct ide_pci_devid_s { + unsigned short vid; + unsigned short did; +} ide_pci_devid_t; + +#define IDE_PCI_DEVID_NULL ((ide_pci_devid_t){0,0}) +#define IDE_PCI_DEVID_EQ(a,b) (a.vid == b.vid && a.did == b.did) +#endif /* CONFIG_BLK_DEV_IDEPCI */ + +typedef struct hwif_s { + struct hwif_s *next; /* for linked-list in ide_hwgroup_t */ + void *hwgroup; /* actually (ide_hwgroup_t *) */ + ide_ioreg_t io_ports[IDE_NR_PORTS]; /* task file registers */ + hw_regs_t hw; /* Hardware info */ + ide_drive_t drives[MAX_DRIVES]; /* drive info */ + struct gendisk *gd; /* gendisk structure */ + ide_tuneproc_t *tuneproc; /* routine to tune PIO mode for drives */ + ide_speedproc_t *speedproc; /* routine to retune DMA modes for drives */ + ide_selectproc_t *selectproc; /* tweaks hardware to select drive */ + ide_resetproc_t *resetproc; /* routine to reset controller after a disk reset */ + ide_intrproc_t *intrproc; /* special interrupt handling for shared pci interrupts */ + ide_maskproc_t *maskproc; /* special host masking for drive selection */ + ide_quirkproc_t *quirkproc; /* check host's drive quirk list */ + ide_rw_proc_t *rwproc; /* adjust timing based upon rq->cmd direction */ + ide_ideproc_t *ideproc; /* CPU-polled transfer routine */ + ide_dmaproc_t *dmaproc; /* dma read/write/abort routine */ + unsigned int *dmatable_cpu; /* dma physical region descriptor table (cpu view) */ + dma_addr_t dmatable_dma; /* dma physical region descriptor table (dma view) */ + struct scatterlist *sg_table; /* Scatter-gather list used to build the above */ + int sg_nents; /* Current number of entries in it */ + int sg_dma_direction; /* dma transfer direction */ + int sg_dma_active; /* is it in use */ + struct hwif_s *mate; /* other hwif from same PCI chip */ + unsigned long dma_base; /* base addr for dma ports */ + unsigned dma_extra; /* extra addr for dma ports */ + unsigned long config_data; /* for use by chipset-specific code */ + unsigned long select_data; /* for use by chipset-specific code */ + struct proc_dir_entry *proc; /* /proc/ide/ directory entry */ + int irq; /* our irq number */ + byte major; /* our major number */ + char name[6]; /* name of interface, eg. "ide0" */ + byte index; /* 0 for ide0; 1 for ide1; ... */ + hwif_chipset_t chipset; /* sub-module for tuning.. */ + unsigned noprobe : 1; /* don't probe for this interface */ + unsigned present : 1; /* this interface exists */ + unsigned serialized : 1; /* serialized operation with mate hwif */ + unsigned sharing_irq: 1; /* 1 = sharing irq with another hwif */ + unsigned reset : 1; /* reset after probe */ + unsigned autodma : 1; /* automatically try to enable DMA at boot */ + unsigned udma_four : 1; /* 1=ATA-66 capable, 0=default */ + byte channel; /* for dual-port chips: 0=primary, 1=secondary */ +#ifdef CONFIG_BLK_DEV_IDEPCI + struct pci_dev *pci_dev; /* for pci chipsets */ + ide_pci_devid_t pci_devid; /* for pci chipsets: {VID,DID} */ +#endif /* CONFIG_BLK_DEV_IDEPCI */ +#if (DISK_RECOVERY_TIME > 0) + unsigned long last_time; /* time when previous rq was done */ +#endif + byte straight8; /* Alan's straight 8 check */ + void *hwif_data; /* extra hwif data */ + ide_busproc_t *busproc; /* driver soft-power interface */ + byte bus_state; /* power state of the IDE bus */ +} ide_hwif_t; + +/* + * Status returned from various ide_ functions + */ +typedef enum { + ide_stopped, /* no drive operation was started */ + ide_started /* a drive operation was started, and a handler was set */ +} ide_startstop_t; + +/* + * internal ide interrupt handler type + */ +typedef ide_startstop_t (ide_pre_handler_t)(ide_drive_t *, struct request *); +typedef ide_startstop_t (ide_handler_t)(ide_drive_t *); +typedef ide_startstop_t (ide_post_handler_t)(ide_drive_t *); + +/* + * when ide_timer_expiry fires, invoke a handler of this type + * to decide what to do. + */ +typedef int (ide_expiry_t)(ide_drive_t *); + +typedef struct hwgroup_s { + ide_handler_t *handler;/* irq handler, if active */ + volatile int busy; /* BOOL: protects all fields below */ + int sleeping; /* BOOL: wake us up on timer expiry */ + ide_drive_t *drive; /* current drive */ + ide_hwif_t *hwif; /* ptr to current hwif in linked-list */ + struct request *rq; /* current request */ + struct timer_list timer; /* failsafe timer */ + struct request wrq; /* local copy of current write rq */ + unsigned long poll_timeout; /* timeout value during long polls */ + ide_expiry_t *expiry; /* queried upon timeouts */ +} ide_hwgroup_t; + +/* structure attached to the request for IDE_TASK_CMDS */ + +/* + * configurable drive settings + */ + +#define TYPE_INT 0 +#define TYPE_INTA 1 +#define TYPE_BYTE 2 +#define TYPE_SHORT 3 + +#define SETTING_READ (1 << 0) +#define SETTING_WRITE (1 << 1) +#define SETTING_RW (SETTING_READ | SETTING_WRITE) + +typedef int (ide_procset_t)(ide_drive_t *, int); +typedef struct ide_settings_s { + char *name; + int rw; + int read_ioctl; + int write_ioctl; + int data_type; + int min; + int max; + int mul_factor; + int div_factor; + void *data; + ide_procset_t *set; + int auto_remove; + struct ide_settings_s *next; +} ide_settings_t; + +void ide_add_setting(ide_drive_t *drive, const char *name, int rw, int read_ioctl, int write_ioctl, int data_type, int min, int max, int mul_factor, int div_factor, void *data, ide_procset_t *set); +void ide_remove_setting(ide_drive_t *drive, char *name); +ide_settings_t *ide_find_setting_by_name(ide_drive_t *drive, char *name); +int ide_read_setting(ide_drive_t *t, ide_settings_t *setting); +int ide_write_setting(ide_drive_t *drive, ide_settings_t *setting, int val); +void ide_add_generic_settings(ide_drive_t *drive); + +#if 0 +/* + * /proc/ide interface + */ +typedef struct { + const char *name; + mode_t mode; + read_proc_t *read_proc; + write_proc_t *write_proc; +} ide_proc_entry_t; +#endif + +#ifdef CONFIG_PROC_FS +void proc_ide_create(void); +void proc_ide_destroy(void); +void recreate_proc_ide_device(ide_hwif_t *, ide_drive_t *); +void destroy_proc_ide_device(ide_hwif_t *, ide_drive_t *); +void destroy_proc_ide_drives(ide_hwif_t *); +void create_proc_ide_interfaces(void); +void ide_add_proc_entries(struct proc_dir_entry *dir, ide_proc_entry_t *p, void *data); +void ide_remove_proc_entries(struct proc_dir_entry *dir, ide_proc_entry_t *p); +read_proc_t proc_ide_read_capacity; +read_proc_t proc_ide_read_geometry; + +/* + * Standard exit stuff: + */ +#define PROC_IDE_READ_RETURN(page,start,off,count,eof,len) \ +{ \ + len -= off; \ + if (len < count) { \ + *eof = 1; \ + if (len <= 0) \ + return 0; \ + } else \ + len = count; \ + *start = page + off; \ + return len; \ +} +#else +#define PROC_IDE_READ_RETURN(page,start,off,count,eof,len) return 0; +#endif + +/* + * Subdrivers support. + */ +#define IDE_SUBDRIVER_VERSION 1 + +typedef int (ide_cleanup_proc)(ide_drive_t *); +typedef int (ide_standby_proc)(ide_drive_t *); +typedef int (ide_flushcache_proc)(ide_drive_t *); +typedef ide_startstop_t (ide_do_request_proc)(ide_drive_t *, struct request *, unsigned long); +typedef void (ide_end_request_proc)(byte, ide_hwgroup_t *); +typedef int (ide_ioctl_proc)(ide_drive_t *, struct inode *, struct file *, unsigned int, unsigned long); +typedef int (ide_open_proc)(struct inode *, struct file *, ide_drive_t *); +typedef void (ide_release_proc)(struct inode *, struct file *, ide_drive_t *); +typedef int (ide_check_media_change_proc)(ide_drive_t *); +typedef void (ide_revalidate_proc)(ide_drive_t *); +typedef void (ide_pre_reset_proc)(ide_drive_t *); +typedef unsigned long (ide_capacity_proc)(ide_drive_t *); +typedef ide_startstop_t (ide_special_proc)(ide_drive_t *); +typedef void (ide_setting_proc)(ide_drive_t *); +typedef int (ide_reinit_proc)(ide_drive_t *); +typedef void (ata_prebuilder_proc)(ide_drive_t *); +typedef void (atapi_prebuilder_proc)(ide_drive_t *); + +typedef struct ide_driver_s { + const char *name; + const char *version; + byte media; + unsigned busy : 1; + unsigned supports_dma : 1; + unsigned supports_dsc_overlap : 1; + ide_cleanup_proc *cleanup; + ide_standby_proc *standby; + ide_flushcache_proc *flushcache; + ide_do_request_proc *do_request; + ide_end_request_proc *end_request; + ide_ioctl_proc *ioctl; + ide_open_proc *open; + ide_release_proc *release; + ide_check_media_change_proc *media_change; + ide_revalidate_proc *revalidate; + ide_pre_reset_proc *pre_reset; + ide_capacity_proc *capacity; + ide_special_proc *special; + /*ide_proc_entry_t *proc;*/ + ide_reinit_proc *reinit; + ata_prebuilder_proc *ata_prebuilder; + atapi_prebuilder_proc *atapi_prebuilder; +} ide_driver_t; + +#define DRIVER(drive) ((ide_driver_t *)((drive)->driver)) + +/* + * IDE modules. + */ +#define IDE_CHIPSET_MODULE 0 /* not supported yet */ +#define IDE_PROBE_MODULE 1 +#define IDE_DRIVER_MODULE 2 + +typedef int (ide_module_init_proc)(void); + +typedef struct ide_module_s { + int type; + ide_module_init_proc *init; + void *info; + struct ide_module_s *next; +} ide_module_t; + +/* + * ide_hwifs[] is the master data structure used to keep track + * of just about everything in ide.c. Whenever possible, routines + * should be using pointers to a drive (ide_drive_t *) or + * pointers to a hwif (ide_hwif_t *), rather than indexing this + * structure directly (the allocation/layout may change!). + * + */ +#ifndef _IDE_C +extern ide_hwif_t ide_hwifs[]; /* master data repository */ +extern ide_module_t *ide_modules; +extern ide_module_t *ide_probe; +#endif +extern int noautodma; + +/* + * We need blk.h, but we replace its end_request by our own version. + */ +#define IDE_DRIVER /* Toggle some magic bits in blk.h */ +#define LOCAL_END_REQUEST /* Don't generate end_request in blk.h */ +#include + +void ide_end_request(byte uptodate, ide_hwgroup_t *hwgroup); + +/* + * This is used for (nearly) all data transfers from/to the IDE interface + * FIXME for 2.5, to a pointer pass verses memcpy........ + */ +void ide_input_data (ide_drive_t *drive, void *buffer, unsigned int wcount); +void ide_output_data (ide_drive_t *drive, void *buffer, unsigned int wcount); + +/* + * This is used for (nearly) all ATAPI data transfers from/to the IDE interface + * FIXME for 2.5, to a pointer pass verses memcpy........ + */ +void atapi_input_bytes (ide_drive_t *drive, void *buffer, unsigned int bytecount); +void atapi_output_bytes (ide_drive_t *drive, void *buffer, unsigned int bytecount); + +int drive_is_ready (ide_drive_t *drive); + +/* + * This is used on exit from the driver, to designate the next irq handler + * and also to start the safety timer. + */ +void ide_set_handler (ide_drive_t *drive, ide_handler_t *handler, unsigned int timeout, ide_expiry_t *expiry); + +/* + * Error reporting, in human readable form (luxurious, but a memory hog). + */ +byte ide_dump_status (ide_drive_t *drive, const char *msg, byte stat); + +/* + * ide_error() takes action based on the error returned by the controller. + * The caller should return immediately after invoking this. + */ +ide_startstop_t ide_error (ide_drive_t *drive, const char *msg, byte stat); + +/* + * Issue a simple drive command + * The drive must be selected beforehand. + */ +void ide_cmd (ide_drive_t *drive, byte cmd, byte nsect, ide_handler_t *handler); + +/* + * ide_fixstring() cleans up and (optionally) byte-swaps a text string, + * removing leading/trailing blanks and compressing internal blanks. + * It is primarily used to tidy up the model name/number fields as + * returned by the WIN_[P]IDENTIFY commands. + */ +void ide_fixstring (byte *s, const int bytecount, const int byteswap); + +/* + * This routine busy-waits for the drive status to be not "busy". + * It then checks the status for all of the "good" bits and none + * of the "bad" bits, and if all is okay it returns 0. All other + * cases return 1 after doing "*startstop = ide_error()", and the + * caller should return the updated value of "startstop" in this case. + * "startstop" is unchanged when the function returns 0; + */ +int ide_wait_stat (ide_startstop_t *startstop, ide_drive_t *drive, byte good, byte bad, unsigned long timeout); + +int ide_wait_noerr (ide_drive_t *drive, byte good, byte bad, unsigned long timeout); + +/* + * This routine is called from the partition-table code in genhd.c + * to "convert" a drive to a logical geometry with fewer than 1024 cyls. + */ +int ide_xlate_1024 (kdev_t, int, int, const char *); + +/* + * Convert kdev_t structure into ide_drive_t * one. + */ +ide_drive_t *get_info_ptr (kdev_t i_rdev); + +/* + * Return the current idea about the total capacity of this drive. + */ +unsigned long current_capacity (ide_drive_t *drive); + +/* + * Start a reset operation for an IDE interface. + * The caller should return immediately after invoking this. + */ +ide_startstop_t ide_do_reset (ide_drive_t *); + +/* + * Re-Start an operation for an IDE interface. + * The caller should return immediately after invoking this. + */ +ide_startstop_t restart_request (ide_drive_t *); + +/* + * This function is intended to be used prior to invoking ide_do_drive_cmd(). + */ +void ide_init_drive_cmd (struct request *rq); + +/* + * "action" parameter type for ide_do_drive_cmd() below. + */ +typedef enum { + ide_wait, /* insert rq at end of list, and wait for it */ + ide_next, /* insert rq immediately after current request */ + ide_preempt, /* insert rq in front of current request */ + ide_end /* insert rq at end of list, but don't wait for it */ +} ide_action_t; + +/* + * This function issues a special IDE device request + * onto the request queue. + * + * If action is ide_wait, then the rq is queued at the end of the + * request queue, and the function sleeps until it has been processed. + * This is for use when invoked from an ioctl handler. + * + * If action is ide_preempt, then the rq is queued at the head of + * the request queue, displacing the currently-being-processed + * request and this function returns immediately without waiting + * for the new rq to be completed. This is VERY DANGEROUS, and is + * intended for careful use by the ATAPI tape/cdrom driver code. + * + * If action is ide_next, then the rq is queued immediately after + * the currently-being-processed-request (if any), and the function + * returns without waiting for the new rq to be completed. As above, + * This is VERY DANGEROUS, and is intended for careful use by the + * ATAPI tape/cdrom driver code. + * + * If action is ide_end, then the rq is queued at the end of the + * request queue, and the function returns immediately without waiting + * for the new rq to be completed. This is again intended for careful + * use by the ATAPI tape/cdrom driver code. + */ +int ide_do_drive_cmd (ide_drive_t *drive, struct request *rq, ide_action_t action); + +/* + * Clean up after success/failure of an explicit drive cmd. + * stat/err are used only when (HWGROUP(drive)->rq->cmd == IDE_DRIVE_CMD). + * stat/err are used only when (HWGROUP(drive)->rq->cmd == IDE_DRIVE_TASK_MASK). + */ +void ide_end_drive_cmd (ide_drive_t *drive, byte stat, byte err); + +/* + * Issue ATA command and wait for completion. use for implementing commands in kernel + */ +int ide_wait_cmd (ide_drive_t *drive, int cmd, int nsect, int feature, int sectors, byte *buf); + +int ide_wait_cmd_task (ide_drive_t *drive, byte *buf); + +typedef struct ide_task_s { + task_ioreg_t tfRegister[8]; + task_ioreg_t hobRegister[8]; + ide_reg_valid_t tf_out_flags; + ide_reg_valid_t tf_in_flags; + int data_phase; + int command_type; + ide_pre_handler_t *prehandler; + ide_handler_t *handler; + ide_post_handler_t *posthandler; + void *special; /* valid_t generally */ + struct request *rq; /* copy of request */ + unsigned long block; /* copy of block */ +} ide_task_t; + +typedef struct pkt_task_s { + task_ioreg_t tfRegister[8]; + int data_phase; + int command_type; + ide_handler_t *handler; + void *special; + struct request *rq; /* copy of request */ + unsigned long block; /* copy of block */ +} pkt_task_t; + +/* + * taskfile io for disks for now... + */ +ide_startstop_t do_rw_taskfile (ide_drive_t *drive, ide_task_t *task); + +/* + * Builds request from ide_ioctl + */ +void do_taskfile (ide_drive_t *drive, struct hd_drive_task_hdr *taskfile, struct hd_drive_hob_hdr *hobfile, ide_handler_t *handler); + +/* + * Special Flagged Register Validation Caller + */ +// ide_startstop_t flagged_taskfile (ide_drive_t *drive, ide_task_t *task); + +ide_startstop_t set_multmode_intr (ide_drive_t *drive); +ide_startstop_t set_geometry_intr (ide_drive_t *drive); +ide_startstop_t recal_intr (ide_drive_t *drive); +ide_startstop_t task_no_data_intr (ide_drive_t *drive); +ide_startstop_t task_in_intr (ide_drive_t *drive); +ide_startstop_t task_mulin_intr (ide_drive_t *drive); +ide_startstop_t pre_task_out_intr (ide_drive_t *drive, struct request *rq); +ide_startstop_t task_out_intr (ide_drive_t *drive); +ide_startstop_t task_mulout_intr (ide_drive_t *drive); +void ide_init_drive_taskfile (struct request *rq); + +int ide_wait_taskfile (ide_drive_t *drive, struct hd_drive_task_hdr *taskfile, struct hd_drive_hob_hdr *hobfile, byte *buf); + +int ide_raw_taskfile (ide_drive_t *drive, ide_task_t *cmd, byte *buf); + +ide_pre_handler_t * ide_pre_handler_parser (struct hd_drive_task_hdr *taskfile, struct hd_drive_hob_hdr *hobfile); +ide_handler_t * ide_handler_parser (struct hd_drive_task_hdr *taskfile, struct hd_drive_hob_hdr *hobfile); +/* Expects args is a full set of TF registers and parses the command type */ +int ide_cmd_type_parser (ide_task_t *args); + +int ide_taskfile_ioctl (ide_drive_t *drive, struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg); + +#ifdef CONFIG_PKT_TASK_IOCTL +int pkt_taskfile_ioctl (ide_drive_t *drive, struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg); +#endif /* CONFIG_PKT_TASK_IOCTL */ + +void ide_delay_50ms (void); +int system_bus_clock(void); + +byte ide_auto_reduce_xfer (ide_drive_t *drive); +int ide_driveid_update (ide_drive_t *drive); +int ide_ata66_check (ide_drive_t *drive, ide_task_t *args); +int ide_config_drive_speed (ide_drive_t *drive, byte speed); +byte eighty_ninty_three (ide_drive_t *drive); +int set_transfer (ide_drive_t *drive, ide_task_t *args); + +/* + * ide_system_bus_speed() returns what we think is the system VESA/PCI + * bus speed (in MHz). This is used for calculating interface PIO timings. + * The default is 40 for known PCI systems, 50 otherwise. + * The "idebus=xx" parameter can be used to override this value. + */ +int ide_system_bus_speed (void); + +/* + * ide_multwrite() transfers a block of up to mcount sectors of data + * to a drive as part of a disk multwrite operation. + */ +int ide_multwrite (ide_drive_t *drive, unsigned int mcount); + +/* + * ide_stall_queue() can be used by a drive to give excess bandwidth back + * to the hwgroup by sleeping for timeout jiffies. + */ +void ide_stall_queue (ide_drive_t *drive, unsigned long timeout); + +/* + * ide_get_queue() returns the queue which corresponds to a given device. + */ +request_queue_t *ide_get_queue (kdev_t dev); + +/* + * CompactFlash cards and their brethern pretend to be removable hard disks, + * but they never have a slave unit, and they don't have doorlock mechanisms. + * This test catches them, and is invoked elsewhere when setting appropriate config bits. + */ +int drive_is_flashcard (ide_drive_t *drive); + +int ide_spin_wait_hwgroup (ide_drive_t *drive); +void ide_timer_expiry (unsigned long data); +void ide_intr (int irq, void *dev_id, struct pt_regs *regs); +void do_ide_request (request_queue_t * q); +void ide_init_subdrivers (void); + +#ifndef _IDE_C +extern struct block_device_operations ide_fops[]; +/*extern ide_proc_entry_t generic_subdriver_entries[];*/ +#endif + +int ide_reinit_drive (ide_drive_t *drive); + +#ifdef _IDE_C +#ifdef CONFIG_BLK_DEV_IDE +int ideprobe_init (void); +#endif /* CONFIG_BLK_DEV_IDE */ +#ifdef CONFIG_BLK_DEV_IDEDISK +int idedisk_reinit (ide_drive_t *drive); +int idedisk_init (void); +#endif /* CONFIG_BLK_DEV_IDEDISK */ +#ifdef CONFIG_BLK_DEV_IDECD +int ide_cdrom_reinit (ide_drive_t *drive); +int ide_cdrom_init (void); +#endif /* CONFIG_BLK_DEV_IDECD */ +#ifdef CONFIG_BLK_DEV_IDETAPE +int idetape_reinit (ide_drive_t *drive); +int idetape_init (void); +#endif /* CONFIG_BLK_DEV_IDETAPE */ +#ifdef CONFIG_BLK_DEV_IDEFLOPPY +int idefloppy_reinit (ide_drive_t *drive); +int idefloppy_init (void); +#endif /* CONFIG_BLK_DEV_IDEFLOPPY */ +#ifdef CONFIG_BLK_DEV_IDESCSI +int idescsi_reinit (ide_drive_t *drive); +int idescsi_init (void); +#endif /* CONFIG_BLK_DEV_IDESCSI */ +#endif /* _IDE_C */ + +int ide_register_module (ide_module_t *module); +void ide_unregister_module (ide_module_t *module); +ide_drive_t *ide_scan_devices (byte media, const char *name, ide_driver_t *driver, int n); +int ide_register_subdriver (ide_drive_t *drive, ide_driver_t *driver, int version); +int ide_unregister_subdriver (ide_drive_t *drive); +int ide_replace_subdriver(ide_drive_t *drive, const char *driver); + +#ifdef CONFIG_BLK_DEV_IDEPCI +#define ON_BOARD 1 +#define NEVER_BOARD 0 +#ifdef CONFIG_BLK_DEV_OFFBOARD +# define OFF_BOARD ON_BOARD +#else /* CONFIG_BLK_DEV_OFFBOARD */ +# define OFF_BOARD NEVER_BOARD +#endif /* CONFIG_BLK_DEV_OFFBOARD */ + +unsigned long ide_find_free_region (unsigned short size) __init; +void ide_scan_pcibus (int scan_direction) __init; +#endif +#ifdef CONFIG_BLK_DEV_IDEDMA +#define BAD_DMA_DRIVE 0 +#define GOOD_DMA_DRIVE 1 +int ide_build_dmatable (ide_drive_t *drive, ide_dma_action_t func); +void ide_destroy_dmatable (ide_drive_t *drive); +ide_startstop_t ide_dma_intr (ide_drive_t *drive); +int check_drive_lists (ide_drive_t *drive, int good_bad); +int report_drive_dmaing (ide_drive_t *drive); +int ide_dmaproc (ide_dma_action_t func, ide_drive_t *drive); +int ide_release_dma (ide_hwif_t *hwif); +void ide_setup_dma (ide_hwif_t *hwif, unsigned long dmabase, unsigned int num_ports) __init; +unsigned long ide_get_or_set_dma_base (ide_hwif_t *hwif, int extra, const char *name) __init; +#endif + +void hwif_unregister (ide_hwif_t *hwif); + +void export_ide_init_queue (ide_drive_t *drive); +byte export_probe_for_drive (ide_drive_t *drive); + +#endif /* _IDE_H */ diff --git a/xen-2.4.16/include/xeno/if.h b/xen-2.4.16/include/xeno/if.h new file mode 100644 index 0000000000..8d3fc2b7fc --- /dev/null +++ b/xen-2.4.16/include/xeno/if.h @@ -0,0 +1,141 @@ +/* + * INET An implementation of the TCP/IP protocol suite for the LINUX + * operating system. INET is implemented using the BSD Socket + * interface as the means of communication with the user level. + * + * Global definitions for the INET interface module. + * + * Version: @(#)if.h 1.0.2 04/18/93 + * + * Authors: Original taken from Berkeley UNIX 4.3, (c) UCB 1982-1988 + * Ross Biro, + * Fred N. van Kempen, + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#ifndef _LINUX_IF_H +#define _LINUX_IF_H + +#include /* for "__kernel_caddr_t" et al */ +#include /* for "struct sockaddr" et al */ + +/* Standard interface flags (netdevice->flags). */ +#define IFF_UP 0x1 /* interface is up */ +#define IFF_BROADCAST 0x2 /* broadcast address valid */ +#define IFF_DEBUG 0x4 /* turn on debugging */ +#define IFF_LOOPBACK 0x8 /* is a loopback net */ +#define IFF_POINTOPOINT 0x10 /* interface is has p-p link */ +#define IFF_NOTRAILERS 0x20 /* avoid use of trailers */ +#define IFF_RUNNING 0x40 /* resources allocated */ +#define IFF_NOARP 0x80 /* no ARP protocol */ +#define IFF_PROMISC 0x100 /* receive all packets */ +#define IFF_ALLMULTI 0x200 /* receive all multicast packets*/ + +#define IFF_MASTER 0x400 /* master of a load balancer */ +#define IFF_SLAVE 0x800 /* slave of a load balancer */ + +#define IFF_MULTICAST 0x1000 /* Supports multicast */ + +#define IFF_VOLATILE (IFF_LOOPBACK|IFF_POINTOPOINT|IFF_BROADCAST|IFF_MASTER|IFF_SLAVE|IFF_RUNNING) + +#define IFF_PORTSEL 0x2000 /* can set media type */ +#define IFF_AUTOMEDIA 0x4000 /* auto media select active */ +#define IFF_DYNAMIC 0x8000 /* dialup device with changing addresses*/ + +/* Private (from user) interface flags (netdevice->priv_flags). */ +#define IFF_802_1Q_VLAN 0x1 /* 802.1Q VLAN device. */ + +/* + * Device mapping structure. I'd just gone off and designed a + * beautiful scheme using only loadable modules with arguments + * for driver options and along come the PCMCIA people 8) + * + * Ah well. The get() side of this is good for WDSETUP, and it'll + * be handy for debugging things. The set side is fine for now and + * being very small might be worth keeping for clean configuration. + */ + +struct ifmap +{ + unsigned long mem_start; + unsigned long mem_end; + unsigned short base_addr; + unsigned char irq; + unsigned char dma; + unsigned char port; + /* 3 bytes spare */ +}; + +/*` + * Interface request structure used for socket + * ioctl's. All interface ioctl's must have parameter + * definitions which begin with ifr_name. The + * remainder may be interface specific. + */ + +struct ifreq +{ +#define IFHWADDRLEN 6 +#define IFNAMSIZ 16 + union + { + char ifrn_name[IFNAMSIZ]; /* if name, e.g. "en0" */ + } ifr_ifrn; + + union { + struct sockaddr ifru_addr; + struct sockaddr ifru_dstaddr; + struct sockaddr ifru_broadaddr; + struct sockaddr ifru_netmask; + struct sockaddr ifru_hwaddr; + short ifru_flags; + int ifru_ivalue; + int ifru_mtu; + struct ifmap ifru_map; + char ifru_slave[IFNAMSIZ]; /* Just fits the size */ + char ifru_newname[IFNAMSIZ]; + char * ifru_data; + } ifr_ifru; +}; + +#define ifr_name ifr_ifrn.ifrn_name /* interface name */ +#define ifr_hwaddr ifr_ifru.ifru_hwaddr /* MAC address */ +#define ifr_addr ifr_ifru.ifru_addr /* address */ +#define ifr_dstaddr ifr_ifru.ifru_dstaddr /* other end of p-p lnk */ +#define ifr_broadaddr ifr_ifru.ifru_broadaddr /* broadcast address */ +#define ifr_netmask ifr_ifru.ifru_netmask /* interface net mask */ +#define ifr_flags ifr_ifru.ifru_flags /* flags */ +#define ifr_metric ifr_ifru.ifru_ivalue /* metric */ +#define ifr_mtu ifr_ifru.ifru_mtu /* mtu */ +#define ifr_map ifr_ifru.ifru_map /* device map */ +#define ifr_slave ifr_ifru.ifru_slave /* slave device */ +#define ifr_data ifr_ifru.ifru_data /* for use by interface */ +#define ifr_ifindex ifr_ifru.ifru_ivalue /* interface index */ +#define ifr_bandwidth ifr_ifru.ifru_ivalue /* link bandwidth */ +#define ifr_qlen ifr_ifru.ifru_ivalue /* Queue length */ +#define ifr_newname ifr_ifru.ifru_newname /* New name */ + +/* + * Structure used in SIOCGIFCONF request. + * Used to retrieve interface configuration + * for machine (useful for programs which + * must know all networks accessible). + */ + +struct ifconf +{ + int ifc_len; /* size of buffer */ + union + { + char * ifcu_buf; + struct ifreq *ifcu_req; + } ifc_ifcu; +}; +#define ifc_buf ifc_ifcu.ifcu_buf /* buffer address */ +#define ifc_req ifc_ifcu.ifcu_req /* array of structures */ + + +#endif /* _LINUX_IF_H */ diff --git a/xen-2.4.16/include/xeno/if_ether.h b/xen-2.4.16/include/xeno/if_ether.h new file mode 100644 index 0000000000..b64559d713 --- /dev/null +++ b/xen-2.4.16/include/xeno/if_ether.h @@ -0,0 +1,100 @@ +/* + * INET An implementation of the TCP/IP protocol suite for the LINUX + * operating system. INET is implemented using the BSD Socket + * interface as the means of communication with the user level. + * + * Global definitions for the Ethernet IEEE 802.3 interface. + * + * Version: @(#)if_ether.h 1.0.1a 02/08/94 + * + * Author: Fred N. van Kempen, + * Donald Becker, + * Alan Cox, + * Steve Whitehouse, + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _LINUX_IF_ETHER_H +#define _LINUX_IF_ETHER_H + +/* + * IEEE 802.3 Ethernet magic constants. The frame sizes omit the preamble + * and FCS/CRC (frame check sequence). + */ + +#define ETH_ALEN 6 /* Octets in one ethernet addr */ +#define ETH_HLEN 14 /* Total octets in header. */ +#define ETH_ZLEN 60 /* Min. octets in frame sans FCS */ +#define ETH_DATA_LEN 1500 /* Max. octets in payload */ +#define ETH_FRAME_LEN 1514 /* Max. octets in frame sans FCS */ + +/* + * These are the defined Ethernet Protocol ID's. + */ + +#define ETH_P_LOOP 0x0060 /* Ethernet Loopback packet */ +#define ETH_P_PUP 0x0200 /* Xerox PUP packet */ +#define ETH_P_PUPAT 0x0201 /* Xerox PUP Addr Trans packet */ +#define ETH_P_IP 0x0800 /* Internet Protocol packet */ +#define ETH_P_X25 0x0805 /* CCITT X.25 */ +#define ETH_P_ARP 0x0806 /* Address Resolution packet */ +#define ETH_P_BPQ 0x08FF /* G8BPQ AX.25 Ethernet Packet [ NOT AN OFFICIALLY REGISTERED ID ] */ +#define ETH_P_IEEEPUP 0x0a00 /* Xerox IEEE802.3 PUP packet */ +#define ETH_P_IEEEPUPAT 0x0a01 /* Xerox IEEE802.3 PUP Addr Trans packet */ +#define ETH_P_DEC 0x6000 /* DEC Assigned proto */ +#define ETH_P_DNA_DL 0x6001 /* DEC DNA Dump/Load */ +#define ETH_P_DNA_RC 0x6002 /* DEC DNA Remote Console */ +#define ETH_P_DNA_RT 0x6003 /* DEC DNA Routing */ +#define ETH_P_LAT 0x6004 /* DEC LAT */ +#define ETH_P_DIAG 0x6005 /* DEC Diagnostics */ +#define ETH_P_CUST 0x6006 /* DEC Customer use */ +#define ETH_P_SCA 0x6007 /* DEC Systems Comms Arch */ +#define ETH_P_RARP 0x8035 /* Reverse Addr Res packet */ +#define ETH_P_ATALK 0x809B /* Appletalk DDP */ +#define ETH_P_AARP 0x80F3 /* Appletalk AARP */ +#define ETH_P_8021Q 0x8100 /* 802.1Q VLAN Extended Header */ +#define ETH_P_IPX 0x8137 /* IPX over DIX */ +#define ETH_P_IPV6 0x86DD /* IPv6 over bluebook */ +#define ETH_P_PPP_DISC 0x8863 /* PPPoE discovery messages */ +#define ETH_P_PPP_SES 0x8864 /* PPPoE session messages */ +#define ETH_P_ATMMPOA 0x884c /* MultiProtocol Over ATM */ +#define ETH_P_ATMFATE 0x8884 /* Frame-based ATM Transport + * over Ethernet + */ + +/* + * Non DIX types. Won't clash for 1500 types. + */ + +#define ETH_P_802_3 0x0001 /* Dummy type for 802.3 frames */ +#define ETH_P_AX25 0x0002 /* Dummy protocol id for AX.25 */ +#define ETH_P_ALL 0x0003 /* Every packet (be careful!!!) */ +#define ETH_P_802_2 0x0004 /* 802.2 frames */ +#define ETH_P_SNAP 0x0005 /* Internal only */ +#define ETH_P_DDCMP 0x0006 /* DEC DDCMP: Internal only */ +#define ETH_P_WAN_PPP 0x0007 /* Dummy type for WAN PPP frames*/ +#define ETH_P_PPP_MP 0x0008 /* Dummy type for PPP MP frames */ +#define ETH_P_LOCALTALK 0x0009 /* Localtalk pseudo type */ +#define ETH_P_PPPTALK 0x0010 /* Dummy type for Atalk over PPP*/ +#define ETH_P_TR_802_2 0x0011 /* 802.2 frames */ +#define ETH_P_MOBITEX 0x0015 /* Mobitex (kaz@cafe.net) */ +#define ETH_P_CONTROL 0x0016 /* Card specific control frames */ +#define ETH_P_IRDA 0x0017 /* Linux-IrDA */ +#define ETH_P_ECONET 0x0018 /* Acorn Econet */ + +/* + * This is an Ethernet frame header. + */ + +struct ethhdr +{ + unsigned char h_dest[ETH_ALEN]; /* destination eth addr */ + unsigned char h_source[ETH_ALEN]; /* source ether addr */ + unsigned short h_proto; /* packet type ID field */ +}; + +#endif /* _LINUX_IF_ETHER_H */ diff --git a/xen-2.4.16/include/xeno/if_packet.h b/xen-2.4.16/include/xeno/if_packet.h new file mode 100644 index 0000000000..b92558549d --- /dev/null +++ b/xen-2.4.16/include/xeno/if_packet.h @@ -0,0 +1,102 @@ +#ifndef __LINUX_IF_PACKET_H +#define __LINUX_IF_PACKET_H + +struct sockaddr_pkt +{ + unsigned short spkt_family; + unsigned char spkt_device[14]; + unsigned short spkt_protocol; +}; + +struct sockaddr_ll +{ + unsigned short sll_family; + unsigned short sll_protocol; + int sll_ifindex; + unsigned short sll_hatype; + unsigned char sll_pkttype; + unsigned char sll_halen; + unsigned char sll_addr[8]; +}; + +/* Packet types */ + +#define PACKET_HOST 0 /* To us */ +#define PACKET_BROADCAST 1 /* To all */ +#define PACKET_MULTICAST 2 /* To group */ +#define PACKET_OTHERHOST 3 /* To someone else */ +#define PACKET_OUTGOING 4 /* Outgoing of any type */ +/* These ones are invisible by user level */ +#define PACKET_LOOPBACK 5 /* MC/BRD frame looped back */ +#define PACKET_FASTROUTE 6 /* Fastrouted frame */ + +/* Packet socket options */ + +#define PACKET_ADD_MEMBERSHIP 1 +#define PACKET_DROP_MEMBERSHIP 2 +#define PACKET_RECV_OUTPUT 3 +/* Value 4 is still used by obsolete turbo-packet. */ +#define PACKET_RX_RING 5 +#define PACKET_STATISTICS 6 +#define PACKET_COPY_THRESH 7 + +struct tpacket_stats +{ + unsigned int tp_packets; + unsigned int tp_drops; +}; + +struct tpacket_hdr +{ + unsigned long tp_status; +#define TP_STATUS_KERNEL 0 +#define TP_STATUS_USER 1 +#define TP_STATUS_COPY 2 +#define TP_STATUS_LOSING 4 +#define TP_STATUS_CSUMNOTREADY 8 + unsigned int tp_len; + unsigned int tp_snaplen; + unsigned short tp_mac; + unsigned short tp_net; + unsigned int tp_sec; + unsigned int tp_usec; +}; + +#define TPACKET_ALIGNMENT 16 +#define TPACKET_ALIGN(x) (((x)+TPACKET_ALIGNMENT-1)&~(TPACKET_ALIGNMENT-1)) +#define TPACKET_HDRLEN (TPACKET_ALIGN(sizeof(struct tpacket_hdr)) + sizeof(struct sockaddr_ll)) + +/* + Frame structure: + + - Start. Frame must be aligned to TPACKET_ALIGNMENT=16 + - struct tpacket_hdr + - pad to TPACKET_ALIGNMENT=16 + - struct sockaddr_ll + - Gap, chosen so that packet data (Start+tp_net) alignes to TPACKET_ALIGNMENT=16 + - Start+tp_mac: [ Optional MAC header ] + - Start+tp_net: Packet data, aligned to TPACKET_ALIGNMENT=16. + - Pad to align to TPACKET_ALIGNMENT=16 + */ + +struct tpacket_req +{ + unsigned int tp_block_size; /* Minimal size of contiguous block */ + unsigned int tp_block_nr; /* Number of blocks */ + unsigned int tp_frame_size; /* Size of frame */ + unsigned int tp_frame_nr; /* Total number of frames */ +}; + +struct packet_mreq +{ + int mr_ifindex; + unsigned short mr_type; + unsigned short mr_alen; + unsigned char mr_address[8]; +}; + +#define PACKET_MR_MULTICAST 0 +#define PACKET_MR_PROMISC 1 +#define PACKET_MR_ALLMULTI 2 + +#endif diff --git a/xen-2.4.16/include/xeno/init.h b/xen-2.4.16/include/xeno/init.h new file mode 100644 index 0000000000..5c4477f18c --- /dev/null +++ b/xen-2.4.16/include/xeno/init.h @@ -0,0 +1,170 @@ +#ifndef _LINUX_INIT_H +#define _LINUX_INIT_H + +#include + +/* These macros are used to mark some functions or + * initialized data (doesn't apply to uninitialized data) + * as `initialization' functions. The kernel can take this + * as hint that the function is used only during the initialization + * phase and free up used memory resources after + * + * Usage: + * For functions: + * + * You should add __init immediately before the function name, like: + * + * static void __init initme(int x, int y) + * { + * extern int z; z = x * y; + * } + * + * If the function has a prototype somewhere, you can also add + * __init between closing brace of the prototype and semicolon: + * + * extern int initialize_foobar_device(int, int, int) __init; + * + * For initialized data: + * You should insert __initdata between the variable name and equal + * sign followed by value, e.g.: + * + * static int init_variable __initdata = 0; + * static char linux_logo[] __initdata = { 0x32, 0x36, ... }; + * + * Don't forget to initialize data not at file scope, i.e. within a function, + * as gcc otherwise puts the data into the bss section and not into the init + * section. + * + * Also note, that this data cannot be "const". + */ + +#ifndef MODULE + +#ifndef __ASSEMBLY__ + +/* + * Used for initialization calls.. + */ +typedef int (*initcall_t)(void); +typedef void (*exitcall_t)(void); + +extern initcall_t __initcall_start, __initcall_end; + +#define __initcall(fn) \ + static initcall_t __initcall_##fn __init_call = fn +#define __exitcall(fn) \ + static exitcall_t __exitcall_##fn __exit_call = fn + +/* + * Used for kernel command line parameter setup + */ +struct kernel_param { + const char *str; + int (*setup_func)(char *); +}; + +extern struct kernel_param __setup_start, __setup_end; + +#define __setup(str, fn) \ + static char __setup_str_##fn[] __initdata = str; \ + static struct kernel_param __setup_##fn __attribute__((unused)) __initsetup = { __setup_str_##fn, fn } + +#endif /* __ASSEMBLY__ */ + +/* + * Mark functions and data as being only used at initialization + * or exit time. + */ +#define __init __attribute__ ((__section__ (".text.init"))) +#define __exit __attribute__ ((unused, __section__(".text.exit"))) +#define __initdata __attribute__ ((__section__ (".data.init"))) +#define __exitdata __attribute__ ((unused, __section__ (".data.exit"))) +#define __initsetup __attribute__ ((unused,__section__ (".setup.init"))) +#define __init_call __attribute__ ((unused,__section__ (".initcall.init"))) +#define __exit_call __attribute__ ((unused,__section__ (".exitcall.exit"))) + +/* For assembly routines */ +#define __INIT .section ".text.init","ax" +#define __FINIT .previous +#define __INITDATA .section ".data.init","aw" + +/** + * module_init() - driver initialization entry point + * @x: function to be run at kernel boot time or module insertion + * + * module_init() will add the driver initialization routine in + * the "__initcall.int" code segment if the driver is checked as + * "y" or static, or else it will wrap the driver initialization + * routine with init_module() which is used by insmod and + * modprobe when the driver is used as a module. + */ +#define module_init(x) __initcall(x); + +/** + * module_exit() - driver exit entry point + * @x: function to be run when driver is removed + * + * module_exit() will wrap the driver clean-up code + * with cleanup_module() when used with rmmod when + * the driver is a module. If the driver is statically + * compiled into the kernel, module_exit() has no effect. + */ +#define module_exit(x) __exitcall(x); + +#else + +#define __init +#define __exit +#define __initdata +#define __exitdata +#define __initcall(fn) +/* For assembly routines */ +#define __INIT +#define __FINIT +#define __INITDATA + +/* These macros create a dummy inline: gcc 2.9x does not count alias + as usage, hence the `unused function' warning when __init functions + are declared static. We use the dummy __*_module_inline functions + both to kill the warning and check the type of the init/cleanup + function. */ +typedef int (*__init_module_func_t)(void); +typedef void (*__cleanup_module_func_t)(void); +#define module_init(x) \ + int init_module(void) __attribute__((alias(#x))); \ + static inline __init_module_func_t __init_module_inline(void) \ + { return x; } +#define module_exit(x) \ + void cleanup_module(void) __attribute__((alias(#x))); \ + static inline __cleanup_module_func_t __cleanup_module_inline(void) \ + { return x; } + +#define __setup(str,func) /* nothing */ + +#endif + +#ifdef CONFIG_HOTPLUG +#define __devinit +#define __devinitdata +#define __devexit +#define __devexitdata +#else +#define __devinit __init +#define __devinitdata __initdata +#define __devexit __exit +#define __devexitdata __exitdata +#endif + +/* Functions marked as __devexit may be discarded at kernel link time, depending + on config options. Newer versions of binutils detect references from + retained sections to discarded sections and flag an error. Pointers to + __devexit functions must use __devexit_p(function_name), the wrapper will + insert either the function_name or NULL, depending on the config options. + */ +#if defined(MODULE) || defined(CONFIG_HOTPLUG) +#define __devexit_p(x) x +#else +#define __devexit_p(x) NULL +#endif + +#endif /* _LINUX_INIT_H */ diff --git a/xen-2.4.16/include/xeno/interrupt.h b/xen-2.4.16/include/xeno/interrupt.h new file mode 100644 index 0000000000..0102f3e205 --- /dev/null +++ b/xen-2.4.16/include/xeno/interrupt.h @@ -0,0 +1,267 @@ +/* interrupt.h */ +#ifndef _LINUX_INTERRUPT_H +#define _LINUX_INTERRUPT_H + +#include +//#include +#include +#include + +#include +#include +#include + +struct irqaction { + void (*handler)(int, void *, struct pt_regs *); + unsigned long flags; + unsigned long mask; + const char *name; + void *dev_id; + struct irqaction *next; +}; + + +/* Who gets which entry in bh_base. Things which will occur most often + should come first */ + +enum { + TIMER_BH = 0, + TQUEUE_BH, + DIGI_BH, + SERIAL_BH, + RISCOM8_BH, + SPECIALIX_BH, + AURORA_BH, + ESP_BH, + SCSI_BH, + IMMEDIATE_BH, + CYCLADES_BH, + CM206_BH, + JS_BH, + MACSERIAL_BH, + ISICOM_BH +}; + +#include +#include + + + +/* PLEASE, avoid to allocate new softirqs, if you need not _really_ high + frequency threaded job scheduling. For almost all the purposes + tasklets are more than enough. F.e. all serial device BHs et + al. should be converted to tasklets, not to softirqs. + */ + +enum +{ + HI_SOFTIRQ=0, + NET_TX_SOFTIRQ, + NET_RX_SOFTIRQ, + TASKLET_SOFTIRQ +}; + +/* softirq mask and active fields moved to irq_cpustat_t in + * asm/hardirq.h to get better cache usage. KAO + */ + +struct softirq_action +{ + void (*action)(struct softirq_action *); + void *data; +}; + +asmlinkage void do_softirq(void); +extern void open_softirq(int nr, void (*action)(struct softirq_action*), void *data); +extern void softirq_init(void); +#define __cpu_raise_softirq(cpu, nr) do { softirq_pending(cpu) |= 1UL << (nr); } while (0) +extern void FASTCALL(cpu_raise_softirq(unsigned int cpu, unsigned int nr)); +extern void FASTCALL(raise_softirq(unsigned int nr)); + + + +/* Tasklets --- multithreaded analogue of BHs. + + Main feature differing them of generic softirqs: tasklet + is running only on one CPU simultaneously. + + Main feature differing them of BHs: different tasklets + may be run simultaneously on different CPUs. + + Properties: + * If tasklet_schedule() is called, then tasklet is guaranteed + to be executed on some cpu at least once after this. + * If the tasklet is already scheduled, but its excecution is still not + started, it will be executed only once. + * If this tasklet is already running on another CPU (or schedule is called + from tasklet itself), it is rescheduled for later. + * Tasklet is strictly serialized wrt itself, but not + wrt another tasklets. If client needs some intertask synchronization, + he makes it with spinlocks. + */ + +struct tasklet_struct +{ + struct tasklet_struct *next; + unsigned long state; + atomic_t count; + void (*func)(unsigned long); + unsigned long data; +}; + +#define DECLARE_TASKLET(name, func, data) \ +struct tasklet_struct name = { NULL, 0, ATOMIC_INIT(0), func, data } + +#define DECLARE_TASKLET_DISABLED(name, func, data) \ +struct tasklet_struct name = { NULL, 0, ATOMIC_INIT(1), func, data } + + +enum +{ + TASKLET_STATE_SCHED, /* Tasklet is scheduled for execution */ + TASKLET_STATE_RUN /* Tasklet is running (SMP only) */ +}; + +struct tasklet_head +{ + struct tasklet_struct *list; +} __attribute__ ((__aligned__(SMP_CACHE_BYTES))); + +extern struct tasklet_head tasklet_vec[NR_CPUS]; +extern struct tasklet_head tasklet_hi_vec[NR_CPUS]; + +#ifdef CONFIG_SMP +static inline int tasklet_trylock(struct tasklet_struct *t) +{ + return !test_and_set_bit(TASKLET_STATE_RUN, &(t)->state); +} + +static inline void tasklet_unlock(struct tasklet_struct *t) +{ + smp_mb__before_clear_bit(); + clear_bit(TASKLET_STATE_RUN, &(t)->state); +} + +static inline void tasklet_unlock_wait(struct tasklet_struct *t) +{ + while (test_bit(TASKLET_STATE_RUN, &(t)->state)) { barrier(); } +} +#else +#define tasklet_trylock(t) 1 +#define tasklet_unlock_wait(t) do { } while (0) +#define tasklet_unlock(t) do { } while (0) +#endif + +extern void FASTCALL(__tasklet_schedule(struct tasklet_struct *t)); + +static inline void tasklet_schedule(struct tasklet_struct *t) +{ + if (!test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) + __tasklet_schedule(t); +} + +extern void FASTCALL(__tasklet_hi_schedule(struct tasklet_struct *t)); + +static inline void tasklet_hi_schedule(struct tasklet_struct *t) +{ + if (!test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) + __tasklet_hi_schedule(t); +} + + +static inline void tasklet_disable_nosync(struct tasklet_struct *t) +{ + atomic_inc(&t->count); + smp_mb__after_atomic_inc(); +} + +static inline void tasklet_disable(struct tasklet_struct *t) +{ + tasklet_disable_nosync(t); + tasklet_unlock_wait(t); + smp_mb(); +} + +static inline void tasklet_enable(struct tasklet_struct *t) +{ + smp_mb__before_atomic_dec(); + atomic_dec(&t->count); +} + +static inline void tasklet_hi_enable(struct tasklet_struct *t) +{ + smp_mb__before_atomic_dec(); + atomic_dec(&t->count); +} + +extern void tasklet_kill(struct tasklet_struct *t); +extern void tasklet_init(struct tasklet_struct *t, + void (*func)(unsigned long), unsigned long data); + +#ifdef CONFIG_SMP + +#define SMP_TIMER_NAME(name) name##__thr + +#define SMP_TIMER_DEFINE(name, task) \ +DECLARE_TASKLET(task, name##__thr, 0); \ +static void name (unsigned long dummy) \ +{ \ + tasklet_schedule(&(task)); \ +} + +#else /* CONFIG_SMP */ + +#define SMP_TIMER_NAME(name) name +#define SMP_TIMER_DEFINE(name, task) + +#endif /* CONFIG_SMP */ + + +/* Old BH definitions */ + +extern struct tasklet_struct bh_task_vec[]; + +/* It is exported _ONLY_ for wait_on_irq(). */ +extern spinlock_t global_bh_lock; + +static inline void mark_bh(int nr) +{ + tasklet_hi_schedule(bh_task_vec+nr); +} + +extern void init_bh(int nr, void (*routine)(void)); +extern void remove_bh(int nr); + + +/* + * Autoprobing for irqs: + * + * probe_irq_on() and probe_irq_off() provide robust primitives + * for accurate IRQ probing during kernel initialization. They are + * reasonably simple to use, are not "fooled" by spurious interrupts, + * and, unlike other attempts at IRQ probing, they do not get hung on + * stuck interrupts (such as unused PS2 mouse interfaces on ASUS boards). + * + * For reasonably foolproof probing, use them as follows: + * + * 1. clear and/or mask the device's internal interrupt. + * 2. sti(); + * 3. irqs = probe_irq_on(); // "take over" all unassigned idle IRQs + * 4. enable the device and cause it to trigger an interrupt. + * 5. wait for the device to interrupt, using non-intrusive polling or a delay. + * 6. irq = probe_irq_off(irqs); // get IRQ number, 0=none, negative=multiple + * 7. service the device to clear its pending interrupt. + * 8. loop again if paranoia is required. + * + * probe_irq_on() returns a mask of allocated irq's. + * + * probe_irq_off() takes the mask as a parameter, + * and returns the irq number which occurred, + * or zero if none occurred, or a negative irq number + * if more than one irq occurred. + */ +extern unsigned long probe_irq_on(void); /* returns 0 on failure */ +extern int probe_irq_off(unsigned long); /* returns 0 or negative on failure */ +extern unsigned int probe_irq_mask(unsigned long); /* returns mask of ISA interrupts */ + +#endif diff --git a/xen-2.4.16/include/xeno/ioctl.h b/xen-2.4.16/include/xeno/ioctl.h new file mode 100644 index 0000000000..aa91eb3951 --- /dev/null +++ b/xen-2.4.16/include/xeno/ioctl.h @@ -0,0 +1,7 @@ +#ifndef _LINUX_IOCTL_H +#define _LINUX_IOCTL_H + +#include + +#endif /* _LINUX_IOCTL_H */ + diff --git a/xen-2.4.16/include/xeno/ioport.h b/xen-2.4.16/include/xeno/ioport.h new file mode 100644 index 0000000000..7c35699496 --- /dev/null +++ b/xen-2.4.16/include/xeno/ioport.h @@ -0,0 +1,120 @@ +/* + * ioport.h Definitions of routines for detecting, reserving and + * allocating system resources. + * + * Authors: Linus Torvalds + */ + +#ifndef _LINUX_IOPORT_H +#define _LINUX_IOPORT_H + +/* + * Resources are tree-like, allowing + * nesting etc.. + */ +struct resource { + const char *name; + unsigned long start, end; + unsigned long flags; + struct resource *parent, *sibling, *child; +}; + +struct resource_list { + struct resource_list *next; + struct resource *res; + struct pci_dev *dev; +}; + +/* + * IO resources have these defined flags. + */ +#define IORESOURCE_BITS 0x000000ff /* Bus-specific bits */ + +#define IORESOURCE_IO 0x00000100 /* Resource type */ +#define IORESOURCE_MEM 0x00000200 +#define IORESOURCE_IRQ 0x00000400 +#define IORESOURCE_DMA 0x00000800 + +#define IORESOURCE_PREFETCH 0x00001000 /* No side effects */ +#define IORESOURCE_READONLY 0x00002000 +#define IORESOURCE_CACHEABLE 0x00004000 +#define IORESOURCE_RANGELENGTH 0x00008000 +#define IORESOURCE_SHADOWABLE 0x00010000 +#define IORESOURCE_BUS_HAS_VGA 0x00080000 + +#define IORESOURCE_UNSET 0x20000000 +#define IORESOURCE_AUTO 0x40000000 +#define IORESOURCE_BUSY 0x80000000 /* Driver has marked this resource busy */ + +/* ISA PnP IRQ specific bits (IORESOURCE_BITS) */ +#define IORESOURCE_IRQ_HIGHEDGE (1<<0) +#define IORESOURCE_IRQ_LOWEDGE (1<<1) +#define IORESOURCE_IRQ_HIGHLEVEL (1<<2) +#define IORESOURCE_IRQ_LOWLEVEL (1<<3) + +/* ISA PnP DMA specific bits (IORESOURCE_BITS) */ +#define IORESOURCE_DMA_TYPE_MASK (3<<0) +#define IORESOURCE_DMA_8BIT (0<<0) +#define IORESOURCE_DMA_8AND16BIT (1<<0) +#define IORESOURCE_DMA_16BIT (2<<0) + +#define IORESOURCE_DMA_MASTER (1<<2) +#define IORESOURCE_DMA_BYTE (1<<3) +#define IORESOURCE_DMA_WORD (1<<4) + +#define IORESOURCE_DMA_SPEED_MASK (3<<6) +#define IORESOURCE_DMA_COMPATIBLE (0<<6) +#define IORESOURCE_DMA_TYPEA (1<<6) +#define IORESOURCE_DMA_TYPEB (2<<6) +#define IORESOURCE_DMA_TYPEF (3<<6) + +/* ISA PnP memory I/O specific bits (IORESOURCE_BITS) */ +#define IORESOURCE_MEM_WRITEABLE (1<<0) /* dup: IORESOURCE_READONLY */ +#define IORESOURCE_MEM_CACHEABLE (1<<1) /* dup: IORESOURCE_CACHEABLE */ +#define IORESOURCE_MEM_RANGELENGTH (1<<2) /* dup: IORESOURCE_RANGELENGTH */ +#define IORESOURCE_MEM_TYPE_MASK (3<<3) +#define IORESOURCE_MEM_8BIT (0<<3) +#define IORESOURCE_MEM_16BIT (1<<3) +#define IORESOURCE_MEM_8AND16BIT (2<<3) +#define IORESOURCE_MEM_SHADOWABLE (1<<5) /* dup: IORESOURCE_SHADOWABLE */ +#define IORESOURCE_MEM_EXPANSIONROM (1<<6) + +/* PC/ISA/whatever - the normal PC address spaces: IO and memory */ +extern struct resource ioport_resource; +extern struct resource iomem_resource; + +extern int get_resource_list(struct resource *, char *buf, int size); + +extern int check_resource(struct resource *root, unsigned long, unsigned long); +extern int request_resource(struct resource *root, struct resource *new); +extern int release_resource(struct resource *new); +extern int allocate_resource(struct resource *root, struct resource *new, + unsigned long size, + unsigned long min, unsigned long max, + unsigned long align, + void (*alignf)(void *, struct resource *, unsigned long), + void *alignf_data); + +/* Convenience shorthand with allocation */ +#define request_region(start,n,name) __request_region(&ioport_resource, (start), (n), (name)) +#define request_mem_region(start,n,name) __request_region(&iomem_resource, (start), (n), (name)) + +extern struct resource * __request_region(struct resource *, unsigned long start, unsigned long n, const char *name); + +/* Compatibility cruft */ +#define check_region(start,n) __check_region(&ioport_resource, (start), (n)) +#define release_region(start,n) __release_region(&ioport_resource, (start), (n)) +#define check_mem_region(start,n) __check_region(&iomem_resource, (start), (n)) +#define release_mem_region(start,n) __release_region(&iomem_resource, (start), (n)) + +extern int __check_region(struct resource *, unsigned long, unsigned long); +extern void __release_region(struct resource *, unsigned long, unsigned long); + +#define get_ioport_list(buf) get_resource_list(&ioport_resource, buf, PAGE_SIZE) +#define get_mem_list(buf) get_resource_list(&iomem_resource, buf, PAGE_SIZE) + +#define HAVE_AUTOIRQ +extern void autoirq_setup(int waittime); +extern int autoirq_report(int waittime); + +#endif /* _LINUX_IOPORT_H */ diff --git a/xen-2.4.16/include/xeno/irq.h b/xen-2.4.16/include/xeno/irq.h new file mode 100644 index 0000000000..7342491345 --- /dev/null +++ b/xen-2.4.16/include/xeno/irq.h @@ -0,0 +1,63 @@ +#ifndef __irq_h +#define __irq_h + +#include +#include +#include + +/* + * IRQ line status. + */ +#define IRQ_INPROGRESS 1 /* IRQ handler active - do not enter! */ +#define IRQ_DISABLED 2 /* IRQ disabled - do not enter! */ +#define IRQ_PENDING 4 /* IRQ pending - replay on enable */ +#define IRQ_REPLAY 8 /* IRQ has been replayed but not acked yet */ +#define IRQ_AUTODETECT 16 /* IRQ is being autodetected */ +#define IRQ_WAITING 32 /* IRQ not yet seen - for autodetection */ +#define IRQ_LEVEL 64 /* IRQ level triggered */ +#define IRQ_MASKED 128 /* IRQ masked - shouldn't be seen again */ +#define IRQ_PER_CPU 256 /* IRQ is per CPU */ + +/* + * Interrupt controller descriptor. This is all we need + * to describe about the low-level hardware. + */ +struct hw_interrupt_type { + const char * typename; + unsigned int (*startup)(unsigned int irq); + void (*shutdown)(unsigned int irq); + void (*enable)(unsigned int irq); + void (*disable)(unsigned int irq); + void (*ack)(unsigned int irq); + void (*end)(unsigned int irq); + void (*set_affinity)(unsigned int irq, unsigned long mask); +}; + +typedef struct hw_interrupt_type hw_irq_controller; + +#include + +/* + * This is the "IRQ descriptor", which contains various information + * about the irq, including what kind of hardware handling it has, + * whether it is disabled etc etc. + * + * Pad this out to 32 bytes for cache and indexing reasons. + */ +typedef struct { + unsigned int status; /* IRQ status */ + hw_irq_controller *handler; + struct irqaction *action; /* IRQ action list */ + unsigned int depth; /* nested irq disables */ + spinlock_t lock; +} ____cacheline_aligned irq_desc_t; + +extern irq_desc_t irq_desc [NR_IRQS]; + +extern int handle_IRQ_event(unsigned int, struct pt_regs *, struct irqaction *); +extern int setup_irq(unsigned int , struct irqaction * ); + +extern hw_irq_controller no_irq_type; /* needed in every arch ? */ +extern void no_action(int cpl, void *dev_id, struct pt_regs *regs); + +#endif /* __asm_h */ diff --git a/xen-2.4.16/include/xeno/irq_cpustat.h b/xen-2.4.16/include/xeno/irq_cpustat.h new file mode 100644 index 0000000000..646655403a --- /dev/null +++ b/xen-2.4.16/include/xeno/irq_cpustat.h @@ -0,0 +1,34 @@ +#ifndef __irq_cpustat_h +#define __irq_cpustat_h + +/* + * Contains default mappings for irq_cpustat_t, used by almost every + * architecture. Some arch (like s390) have per cpu hardware pages and + * they define their own mappings for irq_stat. + * + * Keith Owens July 2000. + */ + +#include + +/* + * Simple wrappers reducing source bloat. Define all irq_stat fields + * here, even ones that are arch dependent. That way we get common + * definitions instead of differing sets for each arch. + */ + +extern irq_cpustat_t irq_stat[]; /* defined in asm/hardirq.h */ + +#ifdef CONFIG_SMP +#define __IRQ_STAT(cpu, member) (irq_stat[cpu].member) +#else +#define __IRQ_STAT(cpu, member) ((void)(cpu), irq_stat[0].member) +#endif + + /* arch independent irq_stat fields */ +#define softirq_pending(cpu) __IRQ_STAT((cpu), __softirq_pending) +#define local_irq_count(cpu) __IRQ_STAT((cpu), __local_irq_count) +#define local_bh_count(cpu) __IRQ_STAT((cpu), __local_bh_count) +#define syscall_count(cpu) __IRQ_STAT((cpu), __syscall_count) + +#endif /* __irq_cpustat_h */ diff --git a/xen-2.4.16/include/xeno/kdev_t.h b/xen-2.4.16/include/xeno/kdev_t.h new file mode 100644 index 0000000000..9d85cba3e5 --- /dev/null +++ b/xen-2.4.16/include/xeno/kdev_t.h @@ -0,0 +1,123 @@ +#ifndef _LINUX_KDEV_T_H +#define _LINUX_KDEV_T_H +#if defined(__KERNEL__) || defined(_LVM_H_INCLUDE) +/* +As a preparation for the introduction of larger device numbers, +we introduce a type kdev_t to hold them. No information about +this type is known outside of this include file. + +Objects of type kdev_t designate a device. Outside of the kernel +the corresponding things are objects of type dev_t - usually an +integral type with the device major and minor in the high and low +bits, respectively. Conversion is done by + +extern kdev_t to_kdev_t(int); + +It is up to the various file systems to decide how objects of type +dev_t are stored on disk. +The only other point of contact between kernel and outside world +are the system calls stat and mknod, new versions of which will +eventually have to be used in libc. + +[Unfortunately, the floppy control ioctls fail to hide the internal +kernel structures, and the fd_device field of a struct floppy_drive_struct +is user-visible. So, it remains a dev_t for the moment, with some ugly +conversions in floppy.c.] + +Inside the kernel, we aim for a kdev_t type that is a pointer +to a structure with information about the device (like major, +minor, size, blocksize, sectorsize, name, read-only flag, +struct file_operations etc.). + +However, for the time being we let kdev_t be almost the same as dev_t: + +typedef struct { unsigned short major, minor; } kdev_t; + +Admissible operations on an object of type kdev_t: +- passing it along +- comparing it for equality with another such object +- storing it in ROOT_DEV, inode->i_dev, inode->i_rdev, sb->s_dev, + bh->b_dev, req->rq_dev, de->dc_dev, tty->device +- using its bit pattern as argument in a hash function +- finding its major and minor +- complaining about it + +An object of type kdev_t is created only by the function MKDEV(), +with the single exception of the constant 0 (no device). + +Right now the other information mentioned above is usually found +in static arrays indexed by major or major,minor. + +An obstacle to immediately using + typedef struct { ... (* lots of information *) } *kdev_t +is the case of mknod used to create a block device that the +kernel doesn't know about at present (but first learns about +when some module is inserted). + +aeb - 950811 +*/ + +/* Since MINOR(dev) is used as index in static arrays, + the kernel is not quite ready yet for larger minors. + However, everything runs fine with an arbitrary kdev_t type. */ + +#define MINORBITS 8 +#define MINORMASK ((1U << MINORBITS) - 1) + +typedef unsigned short kdev_t; + +#define MAJOR(dev) ((unsigned int) ((dev) >> MINORBITS)) +#define MINOR(dev) ((unsigned int) ((dev) & MINORMASK)) +#define HASHDEV(dev) ((unsigned int) (dev)) +#define NODEV 0 +#define MKDEV(ma,mi) (((ma) << MINORBITS) | (mi)) +#define B_FREE 0xffff /* yuk */ + +extern const char * kdevname(kdev_t); /* note: returns pointer to static data! */ + +/* 2.5.x compatibility */ +#define mk_kdev(a,b) MKDEV(a,b) +#define major(d) MAJOR(d) +#define minor(d) MINOR(d) +#define kdev_same(a,b) ((a) == (b)) +#define kdev_none(d) (!(d)) +#define kdev_val(d) ((unsigned int)(d)) +#define val_to_kdev(d) ((kdev_t)(d)) + +/* +As long as device numbers in the outside world have 16 bits only, +we use these conversions. +*/ + +static inline unsigned int kdev_t_to_nr(kdev_t dev) { + return (MAJOR(dev)<<8) | MINOR(dev); +} + +static inline kdev_t to_kdev_t(int dev) +{ + int major, minor; +#if 0 + major = (dev >> 16); + if (!major) { + major = (dev >> 8); + minor = (dev & 0xff); + } else + minor = (dev & 0xffff); +#else + major = (dev >> 8); + minor = (dev & 0xff); +#endif + return MKDEV(major, minor); +} + +#else /* __KERNEL__ || _LVM_H_INCLUDE */ + +/* +Some programs want their definitions of MAJOR and MINOR and MKDEV +from the kernel sources. These must be the externally visible ones. +*/ +#define MAJOR(dev) ((dev)>>8) +#define MINOR(dev) ((dev) & 0xff) +#define MKDEV(ma,mi) ((ma)<<8 | (mi)) +#endif /* __KERNEL__ || _LVM_H_INCLUDE */ +#endif diff --git a/xen-2.4.16/include/xeno/lib.h b/xen-2.4.16/include/xeno/lib.h new file mode 100644 index 0000000000..cd40d119f0 --- /dev/null +++ b/xen-2.4.16/include/xeno/lib.h @@ -0,0 +1,51 @@ +#ifndef __LIB_H__ +#define __LIB_H__ + +#include +#include + +#ifndef NDEBUG +#define ASSERT(_p) if ( !(_p) ) { printk("Assertion '%s' failed, line %d, file %s", #_p , __LINE__, __FILE__); *(int*)0=0; } +#else +#define ASSERT(_p) ((void)0) +#endif + +#define reserve_bootmem(_p,_l) \ +printk("Memory Reservation 0x%lx, %lu bytes\n", (_p), (_l)) + +/* lib.c */ +int memcmp(const void * cs,const void * ct,size_t count); +void * memcpy(void * dest,const void *src,size_t count); +int strncmp(const char * cs,const char * ct,size_t count); +int strcmp(const char * cs,const char * ct); +char * strcpy(char * dest,const char *src); +char * strncpy(char * dest,const char *src,size_t count); +void * memset(void * s,int c,size_t count); +size_t strnlen(const char * s, size_t count); +size_t strlen(const char * s); +char * strchr(const char *,int); +char * strstr(const char * s1,const char * s2); +unsigned long str_to_quad(unsigned char *s); +unsigned char *quad_to_str(unsigned long q, unsigned char *s); + +/* kernel.c */ +#define printk printf +void printf (const char *format, ...); +void cls(void); +void panic(const char *format, ...); + +/* vsprintf.c */ +extern int sprintf(char * buf, const char * fmt, ...) + __attribute__ ((format (printf, 2, 3))); +extern int vsprintf(char *buf, const char *, va_list); +extern int snprintf(char * buf, size_t size, const char * fmt, ...) + __attribute__ ((format (printf, 3, 4))); +extern int vsnprintf(char *buf, size_t size, const char *fmt, va_list args); + +extern int sscanf(const char *, const char *, ...) + __attribute__ ((format (scanf,2,3))); +extern int vsscanf(const char *, const char *, va_list); +long simple_strtol(const char *cp,char **endp,unsigned int base); +long long simple_strtoll(const char *cp,char **endp,unsigned int base); + +#endif /* __LIB_H__ */ diff --git a/xen-2.4.16/include/xeno/list.h b/xen-2.4.16/include/xeno/list.h new file mode 100644 index 0000000000..4124a9a037 --- /dev/null +++ b/xen-2.4.16/include/xeno/list.h @@ -0,0 +1,160 @@ +#ifndef _LINUX_LIST_H +#define _LINUX_LIST_H + +/* + * Simple doubly linked list implementation. + * + * Some of the internal functions ("__xxx") are useful when + * manipulating whole lists rather than single entries, as + * sometimes we already know the next/prev entries and we can + * generate better code by using them directly rather than + * using the generic single-entry routines. + */ + +struct list_head { + struct list_head *next, *prev; +}; + +#define LIST_HEAD_INIT(name) { &(name), &(name) } + +#define LIST_HEAD(name) \ + struct list_head name = LIST_HEAD_INIT(name) + +#define INIT_LIST_HEAD(ptr) do { \ + (ptr)->next = (ptr); (ptr)->prev = (ptr); \ +} while (0) + +/* + * Insert a new entry between two known consecutive entries. + * + * This is only for internal list manipulation where we know + * the prev/next entries already! + */ +static __inline__ void __list_add(struct list_head * new, + struct list_head * prev, + struct list_head * next) +{ + next->prev = new; + new->next = next; + new->prev = prev; + prev->next = new; +} + +/** + * list_add - add a new entry + * @new: new entry to be added + * @head: list head to add it after + * + * Insert a new entry after the specified head. + * This is good for implementing stacks. + */ +static __inline__ void list_add(struct list_head *new, struct list_head *head) +{ + __list_add(new, head, head->next); +} + +/** + * list_add_tail - add a new entry + * @new: new entry to be added + * @head: list head to add it before + * + * Insert a new entry before the specified head. + * This is useful for implementing queues. + */ +static __inline__ void list_add_tail(struct list_head *new, struct list_head *head) +{ + __list_add(new, head->prev, head); +} + +/* + * Delete a list entry by making the prev/next entries + * point to each other. + * + * This is only for internal list manipulation where we know + * the prev/next entries already! + */ +static __inline__ void __list_del(struct list_head * prev, + struct list_head * next) +{ + next->prev = prev; + prev->next = next; +} + +/** + * list_del - deletes entry from list. + * @entry: the element to delete from the list. + * Note: list_empty on entry does not return true after this, the entry is in an undefined state. + */ +static __inline__ void list_del(struct list_head *entry) +{ + __list_del(entry->prev, entry->next); +} + +/** + * list_del_init - deletes entry from list and reinitialize it. + * @entry: the element to delete from the list. + */ +static __inline__ void list_del_init(struct list_head *entry) +{ + __list_del(entry->prev, entry->next); + INIT_LIST_HEAD(entry); +} + +/** + * list_empty - tests whether a list is empty + * @head: the list to test. + */ +static __inline__ int list_empty(struct list_head *head) +{ + return head->next == head; +} + +/** + * list_splice - join two lists + * @list: the new list to add. + * @head: the place to add it in the first list. + */ +static __inline__ void list_splice(struct list_head *list, struct list_head *head) +{ + struct list_head *first = list->next; + + if (first != list) { + struct list_head *last = list->prev; + struct list_head *at = head->next; + + first->prev = head; + head->next = first; + + last->next = at; + at->prev = last; + } +} + +/** + * list_entry - get the struct for this entry + * @ptr: the &struct list_head pointer. + * @type: the type of the struct this is embedded in. + * @member: the name of the list_struct within the struct. + */ +#define list_entry(ptr, type, member) \ + ((type *)((char *)(ptr)-(unsigned long)(&((type *)0)->member))) + +/** + * list_for_each - iterate over a list + * @pos: the &struct list_head to use as a loop counter. + * @head: the head for your list. + */ +#define list_for_each(pos, head) \ + for (pos = (head)->next; pos != (head); pos = pos->next) + +/** + * list_for_each_safe - iterate over a list safe against removal of list entry + * @pos: the &struct list_head to use as a loop counter. + * @n: another &struct list_head to use as temporary storage + * @head: the head for your list. + */ +#define list_for_each_safe(pos, n, head) \ + for (pos = (head)->next, n = pos->next; pos != (head); \ + pos = n, n = pos->next) + +#endif diff --git a/xen-2.4.16/include/xeno/major.h b/xen-2.4.16/include/xeno/major.h new file mode 100644 index 0000000000..b30f88baf8 --- /dev/null +++ b/xen-2.4.16/include/xeno/major.h @@ -0,0 +1,199 @@ +#ifndef _LINUX_MAJOR_H +#define _LINUX_MAJOR_H + +#include + +/* + * This file has definitions for major device numbers. + * For the device number assignments, see Documentation/devices.txt. + */ + +/* limits */ + +/* + * Important: Don't change this to 256. Major number 255 is and must be + * reserved for future expansion into a larger dev_t space. + */ +#define MAX_CHRDEV 255 +#define MAX_BLKDEV 255 + +#define UNNAMED_MAJOR 0 +#define MEM_MAJOR 1 +#define RAMDISK_MAJOR 1 +#define FLOPPY_MAJOR 2 +#define PTY_MASTER_MAJOR 2 +#define IDE0_MAJOR 3 +#define PTY_SLAVE_MAJOR 3 +#define HD_MAJOR IDE0_MAJOR +#define TTY_MAJOR 4 +#define TTYAUX_MAJOR 5 +#define LP_MAJOR 6 +#define VCS_MAJOR 7 +#define LOOP_MAJOR 7 +#define SCSI_DISK0_MAJOR 8 +#define SCSI_TAPE_MAJOR 9 +#define MD_MAJOR 9 +#define MISC_MAJOR 10 +#define SCSI_CDROM_MAJOR 11 +#define QIC02_TAPE_MAJOR 12 +#define XT_DISK_MAJOR 13 +#define SOUND_MAJOR 14 +#define CDU31A_CDROM_MAJOR 15 +#define JOYSTICK_MAJOR 15 +#define GOLDSTAR_CDROM_MAJOR 16 +#define OPTICS_CDROM_MAJOR 17 +#define SANYO_CDROM_MAJOR 18 +#define CYCLADES_MAJOR 19 +#define CYCLADESAUX_MAJOR 20 +#define MITSUMI_X_CDROM_MAJOR 20 +#define MFM_ACORN_MAJOR 21 /* ARM Linux /dev/mfm */ +#define SCSI_GENERIC_MAJOR 21 +#define Z8530_MAJOR 34 +#define DIGI_MAJOR 23 +#define IDE1_MAJOR 22 +#define DIGICU_MAJOR 22 +#define MITSUMI_CDROM_MAJOR 23 +#define CDU535_CDROM_MAJOR 24 +#define STL_SERIALMAJOR 24 +#define MATSUSHITA_CDROM_MAJOR 25 +#define STL_CALLOUTMAJOR 25 +#define MATSUSHITA_CDROM2_MAJOR 26 +#define QIC117_TAPE_MAJOR 27 +#define MATSUSHITA_CDROM3_MAJOR 27 +#define MATSUSHITA_CDROM4_MAJOR 28 +#define STL_SIOMEMMAJOR 28 +#define ACSI_MAJOR 28 +#define AZTECH_CDROM_MAJOR 29 +#define GRAPHDEV_MAJOR 29 /* SparcLinux & Linux/68k /dev/fb */ +#define SHMIQ_MAJOR 85 /* Linux/mips, SGI /dev/shmiq */ +#define CM206_CDROM_MAJOR 32 +#define IDE2_MAJOR 33 +#define IDE3_MAJOR 34 +#define XPRAM_MAJOR 35 /* expanded storage on S/390 = "slow ram" */ + /* proposed by Peter */ +#define NETLINK_MAJOR 36 +#define PS2ESDI_MAJOR 36 +#define IDETAPE_MAJOR 37 +#define Z2RAM_MAJOR 37 +#define APBLOCK_MAJOR 38 /* AP1000 Block device */ +#define DDV_MAJOR 39 /* AP1000 DDV block device */ +#define NBD_MAJOR 43 /* Network block device */ +#define RISCOM8_NORMAL_MAJOR 48 +#define DAC960_MAJOR 48 /* 48..55 */ +#define RISCOM8_CALLOUT_MAJOR 49 +#define MKISS_MAJOR 55 +#define DSP56K_MAJOR 55 /* DSP56001 processor device */ + +#define IDE4_MAJOR 56 +#define IDE5_MAJOR 57 + +#define LVM_BLK_MAJOR 58 /* Logical Volume Manager */ + +#define SCSI_DISK1_MAJOR 65 +#define SCSI_DISK2_MAJOR 66 +#define SCSI_DISK3_MAJOR 67 +#define SCSI_DISK4_MAJOR 68 +#define SCSI_DISK5_MAJOR 69 +#define SCSI_DISK6_MAJOR 70 +#define SCSI_DISK7_MAJOR 71 + + +#define COMPAQ_SMART2_MAJOR 72 +#define COMPAQ_SMART2_MAJOR1 73 +#define COMPAQ_SMART2_MAJOR2 74 +#define COMPAQ_SMART2_MAJOR3 75 +#define COMPAQ_SMART2_MAJOR4 76 +#define COMPAQ_SMART2_MAJOR5 77 +#define COMPAQ_SMART2_MAJOR6 78 +#define COMPAQ_SMART2_MAJOR7 79 + +#define SPECIALIX_NORMAL_MAJOR 75 +#define SPECIALIX_CALLOUT_MAJOR 76 + +#define COMPAQ_CISS_MAJOR 104 +#define COMPAQ_CISS_MAJOR1 105 +#define COMPAQ_CISS_MAJOR2 106 +#define COMPAQ_CISS_MAJOR3 107 +#define COMPAQ_CISS_MAJOR4 108 +#define COMPAQ_CISS_MAJOR5 109 +#define COMPAQ_CISS_MAJOR6 110 +#define COMPAQ_CISS_MAJOR7 111 + +#define ATARAID_MAJOR 114 + +#define DASD_MAJOR 94 /* Official assignations from Peter */ + +#define MDISK_MAJOR 95 /* Official assignations from Peter */ + +#define I2O_MAJOR 80 /* 80->87 */ + +#define IDE6_MAJOR 88 +#define IDE7_MAJOR 89 +#define IDE8_MAJOR 90 +#define IDE9_MAJOR 91 + +#define UBD_MAJOR 98 + +#define AURORA_MAJOR 79 + +#define JSFD_MAJOR 99 + +#define PHONE_MAJOR 100 + +#define LVM_CHAR_MAJOR 109 /* Logical Volume Manager */ + +#define UMEM_MAJOR 116 /* http://www.umem.com/ Battery Backed RAM */ + +#define RTF_MAJOR 150 +#define RAW_MAJOR 162 + +#define USB_ACM_MAJOR 166 +#define USB_ACM_AUX_MAJOR 167 +#define USB_CHAR_MAJOR 180 + +#define UNIX98_PTY_MASTER_MAJOR 128 +#define UNIX98_PTY_MAJOR_COUNT 8 +#define UNIX98_PTY_SLAVE_MAJOR (UNIX98_PTY_MASTER_MAJOR+UNIX98_PTY_MAJOR_COUNT) + +#define VXVM_MAJOR 199 /* VERITAS volume i/o driver */ +#define VXSPEC_MAJOR 200 /* VERITAS volume config driver */ +#define VXDMP_MAJOR 201 /* VERITAS volume multipath driver */ + +#define MSR_MAJOR 202 +#define CPUID_MAJOR 203 + +#define OSST_MAJOR 206 /* OnStream-SCx0 SCSI tape */ + +#define IBM_TTY3270_MAJOR 227 /* Official allocations now */ +#define IBM_FS3270_MAJOR 228 + +/* + * Tests for SCSI devices. + */ + +#define SCSI_DISK_MAJOR(M) ((M) == SCSI_DISK0_MAJOR || \ + ((M) >= SCSI_DISK1_MAJOR && (M) <= SCSI_DISK7_MAJOR)) + +#define SCSI_BLK_MAJOR(M) \ + (SCSI_DISK_MAJOR(M) \ + || (M) == SCSI_CDROM_MAJOR) + +static __inline__ int scsi_blk_major(int m) { + return SCSI_BLK_MAJOR(m); +} + +/* + * Tests for IDE devices + */ +#define IDE_DISK_MAJOR(M) ((M) == IDE0_MAJOR || (M) == IDE1_MAJOR || \ + (M) == IDE2_MAJOR || (M) == IDE3_MAJOR || \ + (M) == IDE4_MAJOR || (M) == IDE5_MAJOR || \ + (M) == IDE6_MAJOR || (M) == IDE7_MAJOR || \ + (M) == IDE8_MAJOR || (M) == IDE9_MAJOR) + +static __inline__ int ide_blk_major(int m) +{ + return IDE_DISK_MAJOR(m); +} + +#endif diff --git a/xen-2.4.16/include/xeno/mii.h b/xen-2.4.16/include/xeno/mii.h new file mode 100644 index 0000000000..943913583d --- /dev/null +++ b/xen-2.4.16/include/xeno/mii.h @@ -0,0 +1,165 @@ +/* + * linux/mii.h: definitions for MII-compatible transceivers + * Originally drivers/net/sunhme.h. + * + * Copyright (C) 1996, 1999, 2001 David S. Miller (davem@redhat.com) + */ + +#ifndef __LINUX_MII_H__ +#define __LINUX_MII_H__ + +#include + +/* Generic MII registers. */ + +#define MII_BMCR 0x00 /* Basic mode control register */ +#define MII_BMSR 0x01 /* Basic mode status register */ +#define MII_PHYSID1 0x02 /* PHYS ID 1 */ +#define MII_PHYSID2 0x03 /* PHYS ID 2 */ +#define MII_ADVERTISE 0x04 /* Advertisement control reg */ +#define MII_LPA 0x05 /* Link partner ability reg */ +#define MII_EXPANSION 0x06 /* Expansion register */ +#define MII_DCOUNTER 0x12 /* Disconnect counter */ +#define MII_FCSCOUNTER 0x13 /* False carrier counter */ +#define MII_NWAYTEST 0x14 /* N-way auto-neg test reg */ +#define MII_RERRCOUNTER 0x15 /* Receive error counter */ +#define MII_SREVISION 0x16 /* Silicon revision */ +#define MII_RESV1 0x17 /* Reserved... */ +#define MII_LBRERROR 0x18 /* Lpback, rx, bypass error */ +#define MII_PHYADDR 0x19 /* PHY address */ +#define MII_RESV2 0x1a /* Reserved... */ +#define MII_TPISTATUS 0x1b /* TPI status for 10mbps */ +#define MII_NCONFIG 0x1c /* Network interface config */ + +/* Basic mode control register. */ +#define BMCR_RESV 0x007f /* Unused... */ +#define BMCR_CTST 0x0080 /* Collision test */ +#define BMCR_FULLDPLX 0x0100 /* Full duplex */ +#define BMCR_ANRESTART 0x0200 /* Auto negotiation restart */ +#define BMCR_ISOLATE 0x0400 /* Disconnect DP83840 from MII */ +#define BMCR_PDOWN 0x0800 /* Powerdown the DP83840 */ +#define BMCR_ANENABLE 0x1000 /* Enable auto negotiation */ +#define BMCR_SPEED100 0x2000 /* Select 100Mbps */ +#define BMCR_LOOPBACK 0x4000 /* TXD loopback bits */ +#define BMCR_RESET 0x8000 /* Reset the DP83840 */ + +/* Basic mode status register. */ +#define BMSR_ERCAP 0x0001 /* Ext-reg capability */ +#define BMSR_JCD 0x0002 /* Jabber detected */ +#define BMSR_LSTATUS 0x0004 /* Link status */ +#define BMSR_ANEGCAPABLE 0x0008 /* Able to do auto-negotiation */ +#define BMSR_RFAULT 0x0010 /* Remote fault detected */ +#define BMSR_ANEGCOMPLETE 0x0020 /* Auto-negotiation complete */ +#define BMSR_RESV 0x07c0 /* Unused... */ +#define BMSR_10HALF 0x0800 /* Can do 10mbps, half-duplex */ +#define BMSR_10FULL 0x1000 /* Can do 10mbps, full-duplex */ +#define BMSR_100HALF 0x2000 /* Can do 100mbps, half-duplex */ +#define BMSR_100FULL 0x4000 /* Can do 100mbps, full-duplex */ +#define BMSR_100BASE4 0x8000 /* Can do 100mbps, 4k packets */ + +/* Advertisement control register. */ +#define ADVERTISE_SLCT 0x001f /* Selector bits */ +#define ADVERTISE_CSMA 0x0001 /* Only selector supported */ +#define ADVERTISE_10HALF 0x0020 /* Try for 10mbps half-duplex */ +#define ADVERTISE_10FULL 0x0040 /* Try for 10mbps full-duplex */ +#define ADVERTISE_100HALF 0x0080 /* Try for 100mbps half-duplex */ +#define ADVERTISE_100FULL 0x0100 /* Try for 100mbps full-duplex */ +#define ADVERTISE_100BASE4 0x0200 /* Try for 100mbps 4k packets */ +#define ADVERTISE_RESV 0x1c00 /* Unused... */ +#define ADVERTISE_RFAULT 0x2000 /* Say we can detect faults */ +#define ADVERTISE_LPACK 0x4000 /* Ack link partners response */ +#define ADVERTISE_NPAGE 0x8000 /* Next page bit */ + +#define ADVERTISE_ALL (ADVERTISE_10HALF | ADVERTISE_10FULL | \ + ADVERTISE_100HALF | ADVERTISE_100FULL) + +/* Link partner ability register. */ +#define LPA_SLCT 0x001f /* Same as advertise selector */ +#define LPA_10HALF 0x0020 /* Can do 10mbps half-duplex */ +#define LPA_10FULL 0x0040 /* Can do 10mbps full-duplex */ +#define LPA_100HALF 0x0080 /* Can do 100mbps half-duplex */ +#define LPA_100FULL 0x0100 /* Can do 100mbps full-duplex */ +#define LPA_100BASE4 0x0200 /* Can do 100mbps 4k packets */ +#define LPA_RESV 0x1c00 /* Unused... */ +#define LPA_RFAULT 0x2000 /* Link partner faulted */ +#define LPA_LPACK 0x4000 /* Link partner acked us */ +#define LPA_NPAGE 0x8000 /* Next page bit */ + +#define LPA_DUPLEX (LPA_10FULL | LPA_100FULL) +#define LPA_100 (LPA_100FULL | LPA_100HALF | LPA_100BASE4) + +/* Expansion register for auto-negotiation. */ +#define EXPANSION_NWAY 0x0001 /* Can do N-way auto-nego */ +#define EXPANSION_LCWP 0x0002 /* Got new RX page code word */ +#define EXPANSION_ENABLENPAGE 0x0004 /* This enables npage words */ +#define EXPANSION_NPCAPABLE 0x0008 /* Link partner supports npage */ +#define EXPANSION_MFAULTS 0x0010 /* Multiple faults detected */ +#define EXPANSION_RESV 0xffe0 /* Unused... */ + +/* N-way test register. */ +#define NWAYTEST_RESV1 0x00ff /* Unused... */ +#define NWAYTEST_LOOPBACK 0x0100 /* Enable loopback for N-way */ +#define NWAYTEST_RESV2 0xfe00 /* Unused... */ + +/* This structure is used in all SIOCxMIIxxx ioctl calls */ +struct mii_ioctl_data { + u16 phy_id; + u16 reg_num; + u16 val_in; + u16 val_out; +}; + + +/** + * mii_nway_result + * @negotiated: value of MII ANAR and'd with ANLPAR + * + * Given a set of MII abilities, check each bit and returns the + * currently supported media, in the priority order defined by + * IEEE 802.3u. We use LPA_xxx constants but note this is not the + * value of LPA solely, as described above. + * + * The one exception to IEEE 802.3u is that 100baseT4 is placed + * between 100T-full and 100T-half. If your phy does not support + * 100T4 this is fine. If your phy places 100T4 elsewhere in the + * priority order, you will need to roll your own function. + */ +static inline unsigned int mii_nway_result (unsigned int negotiated) +{ + unsigned int ret; + + if (negotiated & LPA_100FULL) + ret = LPA_100FULL; + else if (negotiated & LPA_100BASE4) + ret = LPA_100BASE4; + else if (negotiated & LPA_100HALF) + ret = LPA_100HALF; + else if (negotiated & LPA_10FULL) + ret = LPA_10FULL; + else + ret = LPA_10HALF; + + return ret; +} + +/** + * mii_duplex + * @duplex_lock: Non-zero if duplex is locked at full + * @negotiated: value of MII ANAR and'd with ANLPAR + * + * A small helper function for a common case. Returns one + * if the media is operating or locked at full duplex, and + * returns zero otherwise. + */ +static inline unsigned int mii_duplex (unsigned int duplex_lock, + unsigned int negotiated) +{ + if (duplex_lock) + return 1; + if (mii_nway_result(negotiated) & LPA_DUPLEX) + return 1; + return 0; +} + + +#endif /* __LINUX_MII_H__ */ diff --git a/xen-2.4.16/include/xeno/mm.h b/xen-2.4.16/include/xeno/mm.h new file mode 100644 index 0000000000..8a547ea9cc --- /dev/null +++ b/xen-2.4.16/include/xeno/mm.h @@ -0,0 +1,109 @@ + +#ifndef __XENO_MM_H__ +#define __XENO_MM_H__ + +#include +#include +#include +#include + +/* XXX KAF: These may die eventually, but so many refs in slab.c :((( */ + +/* Zone modifiers in GFP_ZONEMASK (see linux/mmzone.h - low four bits) */ +#define __GFP_DMA 0x01 + +/* Action modifiers - doesn't change the zoning */ +#define __GFP_WAIT 0x10 /* Can wait and reschedule? */ +#define __GFP_HIGH 0x20 /* Should access emergency pools? */ +#define __GFP_IO 0x40 /* Can start low memory physical IO? */ +#define __GFP_HIGHIO 0x80 /* Can start high mem physical IO? */ +#define __GFP_FS 0x100 /* Can call down to low-level FS? */ + +#define GFP_ATOMIC (__GFP_HIGH) +#define GFP_KERNEL (__GFP_HIGH | __GFP_WAIT | __GFP_IO | __GFP_HIGHIO | __GFP_FS) + +/* Flag - indicates that the buffer will be suitable for DMA. Ignored on some + platforms, used as appropriate on others */ + +#define GFP_DMA __GFP_DMA + + +/****************************************************************************** + * The following is for page_alloc.c. + */ + +void init_page_allocator(unsigned long min, unsigned long max); +unsigned long __get_free_pages(int mask, int order); +void __free_pages(unsigned long p, int order); +#define get_free_page(_m) (__get_free_pages((_m),0)) +#define __get_free_page(_m) (__get_free_pages((_m),0)) +#define free_pages(_p,_o) (__free_pages(_p,_o)) +#define free_page(_p) (__free_pages(_p,0)) + + +/****************************************************************************** + * The following is the array of page info. One entry per page owned + * by the hypervisor, indexed from `mem_map', just like Linux. + * + * 12.11.02. We no longer use struct page or mem_map, these are replaced + * with struct pfn_info and frame_table respectively. Boris Dragovic + */ + +/* + * This is still fatter than I'd like. Do we need the count? + * Do we need the flags? The list at least seems req'd by slab.c. + */ +typedef struct pfn_info { + struct list_head list; /* ->mapping has some page lists. */ + unsigned long next; /* used for threading pages belonging */ + unsigned long prev; /* to same domain */ + unsigned long flags; /* atomic flags. */ + unsigned long tot_count; /* Total domain usage count. */ + unsigned long type_count; /* pagetable/dir, or domain-writeable refs. */ +} frame_table_t; + +#define get_page_tot(p) ((p)->tot_count++) +#define put_page_tot(p) (--(p)->tot_count) +#define page_tot_count(p) ((p)->tot_count) +#define set_page_tot_count(p,v) ((p)->tot_count = v) + +#define get_page_type(p) ((p)->type_count++) +#define put_page_type(p) (--(p)->type_count) +#define page_type_count(p) ((p)->type_count) +#define set_page_type_count(p,v) ((p)->type_count = v) + +#define PG_domain_mask 0x00ffffff /* owning domain (24 bits) */ +/* hypervisor flags (domain == 0) */ +#define PG_slab 24 +/* domain flags (domain != 0) */ +/* + * NB. The following three flags are MUTUALLY EXCLUSIVE! + * At most one can be true at any point, and 'type_count' counts how many + * references exist of teh current type. A change in type can only occur + * when type_count == 0. + */ +#define PG_type_mask (7<<25) /* bits 25-27 */ +#define PGT_none (0<<25) /* no special uses of this page */ +#define PGT_l1_page_table (1<<25) /* using this page as an L1 page table? */ +#define PGT_l2_page_table (2<<25) /* using this page as an L2 page table? */ +#define PGT_l3_page_table (3<<25) /* using this page as an L3 page table? */ +#define PGT_l4_page_table (4<<25) /* using this page as an L4 page table? */ +#define PGT_writeable_page (7<<25) /* has writable mappings of this page? */ + +#define PageSlab(page) test_bit(PG_slab, &(page)->flags) +#define PageSetSlab(page) set_bit(PG_slab, &(page)->flags) +#define PageClearSlab(page) clear_bit(PG_slab, &(page)->flags) + +/* The array of struct pfn_info, + * free pfn list and number of free pfns in the free list + */ +extern frame_table_t * frame_table; +extern unsigned long frame_table_size; +extern struct list_head free_list; +extern unsigned int free_pfns; +unsigned long init_frametable(unsigned long nr_pages); + +/* Part of the domain API. */ +int do_process_page_updates(page_update_request_t *updates, int count); + +#endif /* __XENO_MM_H__ */ diff --git a/xen-2.4.16/include/xeno/module.h b/xen-2.4.16/include/xeno/module.h new file mode 100644 index 0000000000..5e8ce698d6 --- /dev/null +++ b/xen-2.4.16/include/xeno/module.h @@ -0,0 +1,417 @@ +/* + * Dynamic loading of modules into the kernel. + * + * Rewritten by Richard Henderson Dec 1996 + */ + +#ifndef _LINUX_MODULE_H +#define _LINUX_MODULE_H + +#include +#include +#include + +#ifdef __GENKSYMS__ +# define _set_ver(sym) sym +# undef MODVERSIONS +# define MODVERSIONS +#else /* ! __GENKSYMS__ */ +# if !defined(MODVERSIONS) && defined(EXPORT_SYMTAB) +# define _set_ver(sym) sym +# include +# endif +#endif /* __GENKSYMS__ */ + +#include + +/* Don't need to bring in all of uaccess.h just for this decl. */ +struct exception_table_entry; + +/* Used by get_kernel_syms, which is obsolete. */ +struct kernel_sym +{ + unsigned long value; + char name[60]; /* should have been 64-sizeof(long); oh well */ +}; + +struct module_symbol +{ + unsigned long value; + const char *name; +}; + +struct module_ref +{ + struct module *dep; /* "parent" pointer */ + struct module *ref; /* "child" pointer */ + struct module_ref *next_ref; +}; + +/* TBD */ +struct module_persist; + +struct module +{ + unsigned long size_of_struct; /* == sizeof(module) */ + struct module *next; + const char *name; + unsigned long size; + + union + { + atomic_t usecount; + long pad; + } uc; /* Needs to keep its size - so says rth */ + + unsigned long flags; /* AUTOCLEAN et al */ + + unsigned nsyms; + unsigned ndeps; + + struct module_symbol *syms; + struct module_ref *deps; + struct module_ref *refs; + int (*init)(void); + void (*cleanup)(void); + const struct exception_table_entry *ex_table_start; + const struct exception_table_entry *ex_table_end; +#ifdef __alpha__ + unsigned long gp; +#endif + /* Members past this point are extensions to the basic + module support and are optional. Use mod_member_present() + to examine them. */ + const struct module_persist *persist_start; + const struct module_persist *persist_end; + int (*can_unload)(void); + int runsize; /* In modutils, not currently used */ + const char *kallsyms_start; /* All symbols for kernel debugging */ + const char *kallsyms_end; + const char *archdata_start; /* arch specific data for module */ + const char *archdata_end; + const char *kernel_data; /* Reserved for kernel internal use */ +}; + +struct module_info +{ + unsigned long addr; + unsigned long size; + unsigned long flags; + long usecount; +}; + +/* Bits of module.flags. */ + +#define MOD_UNINITIALIZED 0 +#define MOD_RUNNING 1 +#define MOD_DELETED 2 +#define MOD_AUTOCLEAN 4 +#define MOD_VISITED 8 +#define MOD_USED_ONCE 16 +#define MOD_JUST_FREED 32 +#define MOD_INITIALIZING 64 + +/* Values for query_module's which. */ + +#define QM_MODULES 1 +#define QM_DEPS 2 +#define QM_REFS 3 +#define QM_SYMBOLS 4 +#define QM_INFO 5 + +/* Can the module be queried? */ +#define MOD_CAN_QUERY(mod) (((mod)->flags & (MOD_RUNNING | MOD_INITIALIZING)) && !((mod)->flags & MOD_DELETED)) + +/* When struct module is extended, we must test whether the new member + is present in the header received from insmod before we can use it. + This function returns true if the member is present. */ + +#define mod_member_present(mod,member) \ + ((unsigned long)(&((struct module *)0L)->member + 1) \ + <= (mod)->size_of_struct) + +/* + * Ditto for archdata. Assumes mod->archdata_start and mod->archdata_end + * are validated elsewhere. + */ +#define mod_archdata_member_present(mod, type, member) \ + (((unsigned long)(&((type *)0L)->member) + \ + sizeof(((type *)0L)->member)) <= \ + ((mod)->archdata_end - (mod)->archdata_start)) + + +/* Check if an address p with number of entries n is within the body of module m */ +#define mod_bound(p, n, m) ((unsigned long)(p) >= ((unsigned long)(m) + ((m)->size_of_struct)) && \ + (unsigned long)((p)+(n)) <= (unsigned long)(m) + (m)->size) + +/* Backwards compatibility definition. */ + +#define GET_USE_COUNT(module) (atomic_read(&(module)->uc.usecount)) + +/* Poke the use count of a module. */ + +#define __MOD_INC_USE_COUNT(mod) \ + (atomic_inc(&(mod)->uc.usecount), (mod)->flags |= MOD_VISITED|MOD_USED_ONCE) +#define __MOD_DEC_USE_COUNT(mod) \ + (atomic_dec(&(mod)->uc.usecount), (mod)->flags |= MOD_VISITED) +#define __MOD_IN_USE(mod) \ + (mod_member_present((mod), can_unload) && (mod)->can_unload \ + ? (mod)->can_unload() : atomic_read(&(mod)->uc.usecount)) + +/* Indirect stringification. */ + +#define __MODULE_STRING_1(x) #x +#define __MODULE_STRING(x) __MODULE_STRING_1(x) + +/* Generic inter module communication. + * + * NOTE: This interface is intended for small amounts of data that are + * passed between two objects and either or both of the objects + * might be compiled as modules. Do not over use this interface. + * + * If more than two objects need to communicate then you probably + * need a specific interface instead of abusing this generic + * interface. If both objects are *always* built into the kernel + * then a global extern variable is good enough, you do not need + * this interface. + * + * Keith Owens 28 Oct 2000. + */ + +#ifdef __KERNEL__ +#define HAVE_INTER_MODULE +extern void inter_module_register(const char *, struct module *, const void *); +extern void inter_module_unregister(const char *); +extern const void *inter_module_get(const char *); +extern const void *inter_module_get_request(const char *, const char *); +extern void inter_module_put(const char *); + +struct inter_module_entry { + struct list_head list; + const char *im_name; + struct module *owner; + const void *userdata; +}; + +#if 0 +extern int try_inc_mod_count(struct module *mod); +#else +static inline int try_inc_mod_count(struct module * mod) +{ + if ( mod ) __MOD_INC_USE_COUNT(mod); + return 1; +} +#endif +#endif /* __KERNEL__ */ + +#if defined(MODULE) && !defined(__GENKSYMS__) + +/* Embedded module documentation macros. */ + +/* For documentation purposes only. */ + +#define MODULE_AUTHOR(name) \ +const char __module_author[] __attribute__((section(".modinfo"))) = \ +"author=" name + +#define MODULE_DESCRIPTION(desc) \ +const char __module_description[] __attribute__((section(".modinfo"))) = \ +"description=" desc + +/* Could potentially be used by kmod... */ + +#define MODULE_SUPPORTED_DEVICE(dev) \ +const char __module_device[] __attribute__((section(".modinfo"))) = \ +"device=" dev + +/* Used to verify parameters given to the module. The TYPE arg should + be a string in the following format: + [min[-max]]{b,h,i,l,s} + The MIN and MAX specifiers delimit the length of the array. If MAX + is omitted, it defaults to MIN; if both are omitted, the default is 1. + The final character is a type specifier: + b byte + h short + i int + l long + s string +*/ + +#define MODULE_PARM(var,type) \ +const char __module_parm_##var[] \ +__attribute__((section(".modinfo"))) = \ +"parm_" __MODULE_STRING(var) "=" type + +#define MODULE_PARM_DESC(var,desc) \ +const char __module_parm_desc_##var[] \ +__attribute__((section(".modinfo"))) = \ +"parm_desc_" __MODULE_STRING(var) "=" desc + +/* + * MODULE_DEVICE_TABLE exports information about devices + * currently supported by this module. A device type, such as PCI, + * is a C-like identifier passed as the first arg to this macro. + * The second macro arg is the variable containing the device + * information being made public. + * + * The following is a list of known device types (arg 1), + * and the C types which are to be passed as arg 2. + * pci - struct pci_device_id - List of PCI ids supported by this module + * isapnp - struct isapnp_device_id - List of ISA PnP ids supported by this module + * usb - struct usb_device_id - List of USB ids supported by this module + */ +#define MODULE_GENERIC_TABLE(gtype,name) \ +static const unsigned long __module_##gtype##_size \ + __attribute__ ((unused)) = sizeof(struct gtype##_id); \ +static const struct gtype##_id * __module_##gtype##_table \ + __attribute__ ((unused)) = name + +/* + * The following license idents are currently accepted as indicating free + * software modules + * + * "GPL" [GNU Public License v2 or later] + * "GPL and additional rights" [GNU Public License v2 rights and more] + * "Dual BSD/GPL" [GNU Public License v2 or BSD license choice] + * "Dual MPL/GPL" [GNU Public License v2 or Mozilla license choice] + * + * The following other idents are available + * + * "Proprietary" [Non free products] + * + * There are dual licensed components, but when running with Linux it is the + * GPL that is relevant so this is a non issue. Similarly LGPL linked with GPL + * is a GPL combined work. + * + * This exists for several reasons + * 1. So modinfo can show license info for users wanting to vet their setup + * is free + * 2. So the community can ignore bug reports including proprietary modules + * 3. So vendors can do likewise based on their own policies + */ + +#define MODULE_LICENSE(license) \ +static const char __module_license[] __attribute__((section(".modinfo"))) = \ +"license=" license + +/* Define the module variable, and usage macros. */ +extern struct module __this_module; + +#define THIS_MODULE (&__this_module) +#define MOD_INC_USE_COUNT __MOD_INC_USE_COUNT(THIS_MODULE) +#define MOD_DEC_USE_COUNT __MOD_DEC_USE_COUNT(THIS_MODULE) +#define MOD_IN_USE __MOD_IN_USE(THIS_MODULE) + +#if 0 +#include +static const char __module_kernel_version[] __attribute__((section(".modinfo"))) = +"kernel_version=" UTS_RELEASE; +#ifdef MODVERSIONS +static const char __module_using_checksums[] __attribute__((section(".modinfo"))) = +"using_checksums=1"; +#endif +#endif + +#else /* MODULE */ + +#define MODULE_AUTHOR(name) +#define MODULE_LICENSE(license) +#define MODULE_DESCRIPTION(desc) +#define MODULE_SUPPORTED_DEVICE(name) +#define MODULE_PARM(var,type) +#define MODULE_PARM_DESC(var,desc) + +/* Create a dummy reference to the table to suppress gcc unused warnings. Put + * the reference in the .data.exit section which is discarded when code is built + * in, so the reference does not bloat the running kernel. Note: cannot be + * const, other exit data may be writable. + */ +#define MODULE_GENERIC_TABLE(gtype,name) \ +static const struct gtype##_id * __module_##gtype##_table \ + __attribute__ ((unused, __section__(".data.exit"))) = name + +#ifndef __GENKSYMS__ + +#define THIS_MODULE NULL +#define MOD_INC_USE_COUNT do { } while (0) +#define MOD_DEC_USE_COUNT do { } while (0) +#define MOD_IN_USE 1 + +extern struct module *module_list; + +#endif /* !__GENKSYMS__ */ + +#endif /* MODULE */ + +#define MODULE_DEVICE_TABLE(type,name) \ + MODULE_GENERIC_TABLE(type##_device,name) + +/* Export a symbol either from the kernel or a module. + + In the kernel, the symbol is added to the kernel's global symbol table. + + In a module, it controls which variables are exported. If no + variables are explicitly exported, the action is controled by the + insmod -[xX] flags. Otherwise, only the variables listed are exported. + This obviates the need for the old register_symtab() function. */ + +#if defined(__GENKSYMS__) + +/* We want the EXPORT_SYMBOL tag left intact for recognition. */ + +#elif !defined(CONFIG_MODULES) + +#define __EXPORT_SYMBOL(sym,str) +#define EXPORT_SYMBOL(var) +#define EXPORT_SYMBOL_NOVERS(var) +#define EXPORT_SYMBOL_GPL(var) + +#elif !defined(EXPORT_SYMTAB) + +#define __EXPORT_SYMBOL(sym,str) error this_object_must_be_defined_as_export_objs_in_the_Makefile +#define EXPORT_SYMBOL(var) error this_object_must_be_defined_as_export_objs_in_the_Makefile +#define EXPORT_SYMBOL_NOVERS(var) error this_object_must_be_defined_as_export_objs_in_the_Makefile +#define EXPORT_SYMBOL_GPL(var) error this_object_must_be_defined_as_export_objs_in_the_Makefile + +#else + +#define __EXPORT_SYMBOL(sym, str) \ +const char __kstrtab_##sym[] \ +__attribute__((section(".kstrtab"))) = str; \ +const struct module_symbol __ksymtab_##sym \ +__attribute__((section("__ksymtab"))) = \ +{ (unsigned long)&sym, __kstrtab_##sym } + +#define __EXPORT_SYMBOL_GPL(sym, str) \ +const char __kstrtab_##sym[] \ +__attribute__((section(".kstrtab"))) = "GPLONLY_" str; \ +const struct module_symbol __ksymtab_##sym \ +__attribute__((section("__ksymtab"))) = \ +{ (unsigned long)&sym, __kstrtab_##sym } + +#if defined(MODVERSIONS) || !defined(CONFIG_MODVERSIONS) +#define EXPORT_SYMBOL(var) __EXPORT_SYMBOL(var, __MODULE_STRING(var)) +#define EXPORT_SYMBOL_GPL(var) __EXPORT_SYMBOL_GPL(var, __MODULE_STRING(var)) +#else +#define EXPORT_SYMBOL(var) __EXPORT_SYMBOL(var, __MODULE_STRING(__VERSIONED_SYMBOL(var))) +#define EXPORT_SYMBOL_GPL(var) __EXPORT_SYMBOL(var, __MODULE_STRING(__VERSIONED_SYMBOL(var))) +#endif + +#define EXPORT_SYMBOL_NOVERS(var) __EXPORT_SYMBOL(var, __MODULE_STRING(var)) + +#endif /* __GENKSYMS__ */ + +#ifdef MODULE +/* Force a module to export no symbols. */ +#define EXPORT_NO_SYMBOLS __asm__(".section __ksymtab\n.previous") +#else +#define EXPORT_NO_SYMBOLS +#endif /* MODULE */ + +#ifdef CONFIG_MODULES +#define SET_MODULE_OWNER(some_struct) do { (some_struct)->owner = THIS_MODULE; } while (0) +#else +#define SET_MODULE_OWNER(some_struct) do { } while (0) +#endif + +#endif /* _LINUX_MODULE_H */ diff --git a/xen-2.4.16/include/xeno/multiboot.h b/xen-2.4.16/include/xeno/multiboot.h new file mode 100644 index 0000000000..a61117bbbd --- /dev/null +++ b/xen-2.4.16/include/xeno/multiboot.h @@ -0,0 +1,81 @@ +/* multiboot.h - the header for Multiboot */ +/* Copyright (C) 1999, 2001 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ + +#ifndef __ELF__ +#error "Build on a 32-bit ELF system" +#endif + +/* The magic number passed by a Multiboot-compliant boot loader. */ +#define MULTIBOOT_BOOTLOADER_MAGIC 0x2BADB002 + +/* The symbol table for a.out. */ +typedef struct aout_symbol_table +{ + unsigned long tabsize; + unsigned long strsize; + unsigned long addr; + unsigned long reserved; +} aout_symbol_table_t; + +/* The section header table for ELF. */ +typedef struct elf_section_header_table +{ + unsigned long num; + unsigned long size; + unsigned long addr; + unsigned long shndx; +} elf_section_header_table_t; + +/* The Multiboot information. */ +typedef struct multiboot_info +{ + unsigned long flags; + unsigned long mem_lower; + unsigned long mem_upper; + unsigned long boot_device; + unsigned long cmdline; + unsigned long mods_count; + unsigned long mods_addr; + union + { + aout_symbol_table_t aout_sym; + elf_section_header_table_t elf_sec; + } u; + unsigned long mmap_length; + unsigned long mmap_addr; +} multiboot_info_t; + +/* The module structure. */ +typedef struct module +{ + unsigned long mod_start; + unsigned long mod_end; + unsigned long string; + unsigned long reserved; +} module_t; + +/* The memory map. Be careful that the offset 0 is base_addr_low + but no size. */ +typedef struct memory_map +{ + unsigned long size; + unsigned long base_addr_low; + unsigned long base_addr_high; + unsigned long length_low; + unsigned long length_high; + unsigned long type; +} memory_map_t; diff --git a/xen-2.4.16/include/xeno/netdevice.h b/xen-2.4.16/include/xeno/netdevice.h new file mode 100644 index 0000000000..51b063a726 --- /dev/null +++ b/xen-2.4.16/include/xeno/netdevice.h @@ -0,0 +1,688 @@ +/* + * INET An implementation of the TCP/IP protocol suite for the LINUX + * operating system. INET is implemented using the BSD Socket + * interface as the means of communication with the user level. + * + * Definitions for the Interfaces handler. + * + * Version: @(#)dev.h 1.0.10 08/12/93 + * + * Authors: Ross Biro, + * Fred N. van Kempen, + * Corey Minyard + * Donald J. Becker, + * Alan Cox, + * Bjorn Ekwall. + * Pekka Riikonen + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Moved to /usr/include/linux for NET3 + */ +#ifndef _LINUX_NETDEVICE_H +#define _LINUX_NETDEVICE_H + +#include +#include +#include +#include + +#include +#include +#include + +#ifdef __KERNEL__ +#include + +struct divert_blk; + +#define HAVE_ALLOC_NETDEV /* feature macro: alloc_xxxdev + functions are available. */ + +#define NET_XMIT_SUCCESS 0 +#define NET_XMIT_DROP 1 /* skb dropped */ +#define NET_XMIT_CN 2 /* congestion notification */ +#define NET_XMIT_POLICED 3 /* skb is shot by police */ +#define NET_XMIT_BYPASS 4 /* packet does not leave via dequeue; + (TC use only - dev_queue_xmit + returns this as NET_XMIT_SUCCESS) */ + +/* Backlog congestion levels */ +#define NET_RX_SUCCESS 0 /* keep 'em coming, baby */ +#define NET_RX_DROP 1 /* packet dropped */ +#define NET_RX_CN_LOW 2 /* storm alert, just in case */ +#define NET_RX_CN_MOD 3 /* Storm on its way! */ +#define NET_RX_CN_HIGH 4 /* The storm is here */ +#define NET_RX_BAD 5 /* packet dropped due to kernel error */ + +#define net_xmit_errno(e) ((e) != NET_XMIT_CN ? -ENOBUFS : 0) + +#endif + +#define MAX_ADDR_LEN 8 /* Largest hardware address length */ + +/* + * Compute the worst case header length according to the protocols + * used. + */ + +#if !defined(CONFIG_AX25) && !defined(CONFIG_AX25_MODULE) && !defined(CONFIG_TR) +#define LL_MAX_HEADER 32 +#else +#if defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE) +#define LL_MAX_HEADER 96 +#else +#define LL_MAX_HEADER 48 +#endif +#endif + +#if !defined(CONFIG_NET_IPIP) && \ + !defined(CONFIG_IPV6) && !defined(CONFIG_IPV6_MODULE) +#define MAX_HEADER LL_MAX_HEADER +#else +#define MAX_HEADER (LL_MAX_HEADER + 48) +#endif + +/* + * Network device statistics. Akin to the 2.0 ether stats but + * with byte counters. + */ + +struct net_device_stats +{ + unsigned long rx_packets; /* total packets received */ + unsigned long tx_packets; /* total packets transmitted */ + unsigned long rx_bytes; /* total bytes received */ + unsigned long tx_bytes; /* total bytes transmitted */ + unsigned long rx_errors; /* bad packets received */ + unsigned long tx_errors; /* packet transmit problems */ + unsigned long rx_dropped; /* no space in linux buffers */ + unsigned long tx_dropped; /* no space available in linux */ + unsigned long multicast; /* multicast packets received */ + unsigned long collisions; + + /* detailed rx_errors: */ + unsigned long rx_length_errors; + unsigned long rx_over_errors; /* receiver ring buff overflow */ + unsigned long rx_crc_errors; /* recved pkt with crc error */ + unsigned long rx_frame_errors; /* recv'd frame alignment error */ + unsigned long rx_fifo_errors; /* recv'r fifo overrun */ + unsigned long rx_missed_errors; /* receiver missed packet */ + + /* detailed tx_errors */ + unsigned long tx_aborted_errors; + unsigned long tx_carrier_errors; + unsigned long tx_fifo_errors; + unsigned long tx_heartbeat_errors; + unsigned long tx_window_errors; + + /* for cslip etc */ + unsigned long rx_compressed; + unsigned long tx_compressed; +}; + + +/* Media selection options. */ +enum { + IF_PORT_UNKNOWN = 0, + IF_PORT_10BASE2, + IF_PORT_10BASET, + IF_PORT_AUI, + IF_PORT_100BASET, + IF_PORT_100BASETX, + IF_PORT_100BASEFX +}; + +#ifdef __KERNEL__ + +extern const char *if_port_text[]; + +#include +#include + +struct neighbour; +struct neigh_parms; +struct sk_buff; + +struct netif_rx_stats +{ + unsigned total; + unsigned dropped; + unsigned time_squeeze; + unsigned throttled; + unsigned fastroute_hit; + unsigned fastroute_success; + unsigned fastroute_defer; + unsigned fastroute_deferred_out; + unsigned fastroute_latency_reduction; + unsigned cpu_collision; +} __attribute__ ((__aligned__(SMP_CACHE_BYTES))); + +extern struct netif_rx_stats netdev_rx_stat[]; + + +/* + * We tag multicasts with these structures. + */ + +struct dev_mc_list +{ + struct dev_mc_list *next; + __u8 dmi_addr[MAX_ADDR_LEN]; + unsigned char dmi_addrlen; + int dmi_users; + int dmi_gusers; +}; + +struct hh_cache +{ + struct hh_cache *hh_next; /* Next entry */ + atomic_t hh_refcnt; /* number of users */ + unsigned short hh_type; /* protocol identifier, f.e ETH_P_IP + * NOTE: For VLANs, this will be the + * encapuslated type. --BLG + */ + int hh_len; /* length of header */ + int (*hh_output)(struct sk_buff *skb); + rwlock_t hh_lock; + /* cached hardware header; allow for machine alignment needs. */ + unsigned long hh_data[16/sizeof(unsigned long)]; +}; + +/* These flag bits are private to the generic network queueing + * layer, they may not be explicitly referenced by any other + * code. + */ + +enum netdev_state_t +{ + __LINK_STATE_XOFF=0, + __LINK_STATE_START, + __LINK_STATE_PRESENT, + __LINK_STATE_SCHED, + __LINK_STATE_NOCARRIER +}; + + +/* + * The DEVICE structure. + * Actually, this whole structure is a big mistake. It mixes I/O + * data with strictly "high-level" data, and it has to know about + * almost every data structure used in the INET module. + * + * FIXME: cleanup struct net_device such that network protocol info + * moves out. + */ + +struct net_device +{ + /* + * This is the first field of the "visible" part of this structure + * (i.e. as seen by users in the "Space.c" file). It is the name + * the interface. + */ + char name[IFNAMSIZ]; + + /* + * I/O specific fields + * FIXME: Merge these and struct ifmap into one + */ + unsigned long rmem_end; /* shmem "recv" end */ + unsigned long rmem_start; /* shmem "recv" start */ + unsigned long mem_end; /* shared mem end */ + unsigned long mem_start; /* shared mem start */ + unsigned long base_addr; /* device I/O address */ + unsigned int irq; /* device IRQ number */ + + /* + * Some hardware also needs these fields, but they are not + * part of the usual set specified in Space.c. + */ + + unsigned char if_port; /* Selectable AUI, TP,..*/ + unsigned char dma; /* DMA channel */ + + unsigned long state; + + struct net_device *next; + + /* The device initialization function. Called only once. */ + int (*init)(struct net_device *dev); + + /* ------- Fields preinitialized in Space.c finish here ------- */ + + struct net_device *next_sched; + + /* Interface index. Unique device identifier */ + int ifindex; + int iflink; + + + struct net_device_stats* (*get_stats)(struct net_device *dev); + struct iw_statistics* (*get_wireless_stats)(struct net_device *dev); + + /* + * This marks the end of the "visible" part of the structure. All + * fields hereafter are internal to the system, and may change at + * will (read: may be cleaned up at will). + */ + + /* These may be needed for future network-power-down code. */ + unsigned long trans_start; /* Time (in jiffies) of last Tx */ + unsigned long last_rx; /* Time of last Rx */ + + unsigned short flags; /* interface flags (a la BSD) */ + unsigned short gflags; + unsigned short priv_flags; /* Like 'flags' but invisible to userspace. */ + unsigned short unused_alignment_fixer; /* Because we need priv_flags, + * and we want to be 32-bit aligned. + */ + + unsigned mtu; /* interface MTU value */ + unsigned short type; /* interface hardware type */ + unsigned short hard_header_len; /* hardware hdr length */ + void *priv; /* pointer to private data */ + + struct net_device *master; /* Pointer to master device of a group, + * which this device is member of. + */ + + /* Interface address info. */ + unsigned char broadcast[MAX_ADDR_LEN]; /* hw bcast add */ + unsigned char dev_addr[MAX_ADDR_LEN]; /* hw address */ + unsigned char addr_len; /* hardware address length */ + + struct dev_mc_list *mc_list; /* Multicast mac addresses */ + int mc_count; /* Number of installed mcasts */ + int promiscuity; + int allmulti; + + int watchdog_timeo; + struct timer_list watchdog_timer; + + /* Protocol specific pointers */ + + void *atalk_ptr; /* AppleTalk link */ + void *ip_ptr; /* IPv4 specific data */ + void *dn_ptr; /* DECnet specific data */ + void *ip6_ptr; /* IPv6 specific data */ + void *ec_ptr; /* Econet specific data */ + + struct Qdisc *qdisc; + struct Qdisc *qdisc_sleeping; + struct Qdisc *qdisc_list; + struct Qdisc *qdisc_ingress; + unsigned long tx_queue_len; /* Max frames per queue allowed */ + + /* hard_start_xmit synchronizer */ + spinlock_t xmit_lock; + /* cpu id of processor entered to hard_start_xmit or -1, + if nobody entered there. + */ + int xmit_lock_owner; + /* device queue lock */ + spinlock_t queue_lock; + /* Number of references to this device */ + atomic_t refcnt; + /* The flag marking that device is unregistered, but held by an user */ + int deadbeaf; + + /* Net device features */ + int features; +#define NETIF_F_SG 1 /* Scatter/gather IO. */ +#define NETIF_F_IP_CSUM 2 /* Can checksum only TCP/UDP over IPv4. */ +#define NETIF_F_NO_CSUM 4 /* Does not require checksum. F.e. loopack. */ +#define NETIF_F_HW_CSUM 8 /* Can checksum all the packets. */ +#define NETIF_F_DYNALLOC 16 /* Self-dectructable device. */ +#define NETIF_F_HIGHDMA 32 /* Can DMA to high memory. */ +#define NETIF_F_FRAGLIST 64 /* Scatter/gather IO. */ + + /* Called after device is detached from network. */ + void (*uninit)(struct net_device *dev); + /* Called after last user reference disappears. */ + void (*destructor)(struct net_device *dev); + + /* Pointers to interface service routines. */ + int (*open)(struct net_device *dev); + int (*stop)(struct net_device *dev); + int (*hard_start_xmit) (struct sk_buff *skb, + struct net_device *dev); + int (*hard_header) (struct sk_buff *skb, + struct net_device *dev, + unsigned short type, + void *daddr, + void *saddr, + unsigned len); + int (*rebuild_header)(struct sk_buff *skb); +#define HAVE_MULTICAST + void (*set_multicast_list)(struct net_device *dev); +#define HAVE_SET_MAC_ADDR + int (*set_mac_address)(struct net_device *dev, + void *addr); +#define HAVE_PRIVATE_IOCTL + int (*do_ioctl)(struct net_device *dev, + struct ifreq *ifr, int cmd); +#define HAVE_SET_CONFIG + int (*set_config)(struct net_device *dev, + struct ifmap *map); +#define HAVE_HEADER_CACHE + int (*hard_header_cache)(struct neighbour *neigh, + struct hh_cache *hh); + void (*header_cache_update)(struct hh_cache *hh, + struct net_device *dev, + unsigned char * haddr); +#define HAVE_CHANGE_MTU + int (*change_mtu)(struct net_device *dev, int new_mtu); + +#define HAVE_TX_TIMEOUT + void (*tx_timeout) (struct net_device *dev); + + int (*hard_header_parse)(struct sk_buff *skb, + unsigned char *haddr); + int (*neigh_setup)(struct net_device *dev, struct neigh_parms *); +// int (*accept_fastpath)(struct net_device *, struct dst_entry*); + + /* open/release and usage marking */ + struct module *owner; + + /* bridge stuff */ + struct net_bridge_port *br_port; +}; + + +struct packet_type +{ + unsigned short type; /* This is really htons(ether_type). */ + struct net_device *dev; /* NULL is wildcarded here */ + int (*func) (struct sk_buff *, struct net_device *, + struct packet_type *); + void *data; /* Private to the packet type */ + struct packet_type *next; +}; + + +#include +//#include + +extern struct net_device loopback_dev; /* The loopback */ +extern struct net_device *dev_base; /* All devices */ +extern rwlock_t dev_base_lock; /* Device list lock */ + +extern int netdev_boot_setup_add(char *name, struct ifmap *map); +extern int netdev_boot_setup_check(struct net_device *dev); +extern struct net_device *dev_getbyhwaddr(unsigned short type, char *hwaddr); +extern void dev_add_pack(struct packet_type *pt); +extern void dev_remove_pack(struct packet_type *pt); +extern int dev_get(const char *name); +extern struct net_device *dev_get_by_name(const char *name); +extern struct net_device *__dev_get_by_name(const char *name); +extern struct net_device *dev_alloc(const char *name, int *err); +extern int dev_alloc_name(struct net_device *dev, const char *name); +extern int dev_open(struct net_device *dev); +extern int dev_close(struct net_device *dev); +extern int dev_queue_xmit(struct sk_buff *skb); +extern int register_netdevice(struct net_device *dev); +extern int unregister_netdevice(struct net_device *dev); +//extern int register_netdevice_notifier(struct notifier_block *nb); +//extern int unregister_netdevice_notifier(struct notifier_block *nb); +extern int dev_new_index(void); +extern struct net_device *dev_get_by_index(int ifindex); +extern struct net_device *__dev_get_by_index(int ifindex); +extern int dev_restart(struct net_device *dev); + +typedef int gifconf_func_t(struct net_device * dev, char * bufptr, int len); +extern int register_gifconf(unsigned int family, gifconf_func_t * gifconf); +static inline int unregister_gifconf(unsigned int family) +{ + return register_gifconf(family, 0); +} + +/* + * Incoming packets are placed on per-cpu queues so that + * no locking is needed. + */ + +struct softnet_data +{ + int throttle; + int cng_level; + int avg_blog; + struct sk_buff_head input_pkt_queue; + struct net_device *output_queue; + struct sk_buff *completion_queue; +} __attribute__((__aligned__(SMP_CACHE_BYTES))); + + +extern struct softnet_data softnet_data[NR_CPUS]; + +#define HAVE_NETIF_QUEUE + +static inline void __netif_schedule(struct net_device *dev) +{ + if (!test_and_set_bit(__LINK_STATE_SCHED, &dev->state)) { + unsigned long flags; + int cpu = smp_processor_id(); + + local_irq_save(flags); + dev->next_sched = softnet_data[cpu].output_queue; + softnet_data[cpu].output_queue = dev; + cpu_raise_softirq(cpu, NET_TX_SOFTIRQ); + local_irq_restore(flags); + } +} + +static inline void netif_schedule(struct net_device *dev) +{ + if (!test_bit(__LINK_STATE_XOFF, &dev->state)) + __netif_schedule(dev); +} + +static inline void netif_start_queue(struct net_device *dev) +{ + clear_bit(__LINK_STATE_XOFF, &dev->state); +} + +static inline void netif_wake_queue(struct net_device *dev) +{ + if (test_and_clear_bit(__LINK_STATE_XOFF, &dev->state)) + __netif_schedule(dev); +} + +static inline void netif_stop_queue(struct net_device *dev) +{ + set_bit(__LINK_STATE_XOFF, &dev->state); +} + +static inline int netif_queue_stopped(struct net_device *dev) +{ + return test_bit(__LINK_STATE_XOFF, &dev->state); +} + +static inline int netif_running(struct net_device *dev) +{ + return test_bit(__LINK_STATE_START, &dev->state); +} + +/* Use this variant when it is known for sure that it + * is executing from interrupt context. + */ +static inline void dev_kfree_skb_irq(struct sk_buff *skb) +{ + if (atomic_dec_and_test(&skb->users)) { + int cpu =smp_processor_id(); + unsigned long flags; + + local_irq_save(flags); + skb->next = softnet_data[cpu].completion_queue; + softnet_data[cpu].completion_queue = skb; + cpu_raise_softirq(cpu, NET_TX_SOFTIRQ); + local_irq_restore(flags); + } +} + +/* Use this variant in places where it could be invoked + * either from interrupt or non-interrupt context. + */ +static inline void dev_kfree_skb_any(struct sk_buff *skb) +{ + if (in_irq()) + dev_kfree_skb_irq(skb); + else + dev_kfree_skb(skb); +} + +extern void net_call_rx_atomic(void (*fn)(void)); +#define HAVE_NETIF_RX 1 +extern int netif_rx(struct sk_buff *skb); +extern int dev_ioctl(unsigned int cmd, void *); +extern int dev_change_flags(struct net_device *, unsigned); +extern void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev); + +extern void dev_init(void); + +extern int netdev_nit; + +/* Post buffer to the network code from _non interrupt_ context. + * see net/core/dev.c for netif_rx description. + */ +static inline int netif_rx_ni(struct sk_buff *skb) +{ + int err = netif_rx(skb); + if (softirq_pending(smp_processor_id())) + do_softirq(); + return err; +} + +static inline void dev_init_buffers(struct net_device *dev) +{ + /* WILL BE REMOVED IN 2.5.0 */ +} + +extern int netdev_finish_unregister(struct net_device *dev); + +static inline void dev_put(struct net_device *dev) +{ + if (atomic_dec_and_test(&dev->refcnt)) + netdev_finish_unregister(dev); +} + +#define __dev_put(dev) atomic_dec(&(dev)->refcnt) +#define dev_hold(dev) atomic_inc(&(dev)->refcnt) + +/* Carrier loss detection, dial on demand. The functions netif_carrier_on + * and _off may be called from IRQ context, but it is caller + * who is responsible for serialization of these calls. + */ + +static inline int netif_carrier_ok(struct net_device *dev) +{ + return !test_bit(__LINK_STATE_NOCARRIER, &dev->state); +} + +extern void __netdev_watchdog_up(struct net_device *dev); + +static inline void netif_carrier_on(struct net_device *dev) +{ + clear_bit(__LINK_STATE_NOCARRIER, &dev->state); + if (netif_running(dev)) + __netdev_watchdog_up(dev); +} + +static inline void netif_carrier_off(struct net_device *dev) +{ + set_bit(__LINK_STATE_NOCARRIER, &dev->state); +} + +/* Hot-plugging. */ +static inline int netif_device_present(struct net_device *dev) +{ + return test_bit(__LINK_STATE_PRESENT, &dev->state); +} + +static inline void netif_device_detach(struct net_device *dev) +{ + if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) && + netif_running(dev)) { + netif_stop_queue(dev); + } +} + +static inline void netif_device_attach(struct net_device *dev) +{ + if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) && + netif_running(dev)) { + netif_wake_queue(dev); + __netdev_watchdog_up(dev); + } +} + +/* + * Network interface message level settings + */ +#define HAVE_NETIF_MSG 1 + +enum { + NETIF_MSG_DRV = 0x0001, + NETIF_MSG_PROBE = 0x0002, + NETIF_MSG_LINK = 0x0004, + NETIF_MSG_TIMER = 0x0008, + NETIF_MSG_IFDOWN = 0x0010, + NETIF_MSG_IFUP = 0x0020, + NETIF_MSG_RX_ERR = 0x0040, + NETIF_MSG_TX_ERR = 0x0080, + NETIF_MSG_TX_QUEUED = 0x0100, + NETIF_MSG_INTR = 0x0200, + NETIF_MSG_TX_DONE = 0x0400, + NETIF_MSG_RX_STATUS = 0x0800, + NETIF_MSG_PKTDATA = 0x1000, +}; + +#define netif_msg_drv(p) ((p)->msg_enable & NETIF_MSG_DRV) +#define netif_msg_probe(p) ((p)->msg_enable & NETIF_MSG_PROBE) +#define netif_msg_link(p) ((p)->msg_enable & NETIF_MSG_LINK) +#define netif_msg_timer(p) ((p)->msg_enable & NETIF_MSG_TIMER) +#define netif_msg_ifdown(p) ((p)->msg_enable & NETIF_MSG_IFDOWN) +#define netif_msg_ifup(p) ((p)->msg_enable & NETIF_MSG_IFUP) +#define netif_msg_rx_err(p) ((p)->msg_enable & NETIF_MSG_RX_ERR) +#define netif_msg_tx_err(p) ((p)->msg_enable & NETIF_MSG_TX_ERR) +#define netif_msg_tx_queued(p) ((p)->msg_enable & NETIF_MSG_TX_QUEUED) +#define netif_msg_intr(p) ((p)->msg_enable & NETIF_MSG_INTR) +#define netif_msg_tx_done(p) ((p)->msg_enable & NETIF_MSG_TX_DONE) +#define netif_msg_rx_status(p) ((p)->msg_enable & NETIF_MSG_RX_STATUS) +#define netif_msg_pktdata(p) ((p)->msg_enable & NETIF_MSG_PKTDATA) + +/* These functions live elsewhere (drivers/net/net_init.c, but related) */ + +extern void ether_setup(struct net_device *dev); +extern void fddi_setup(struct net_device *dev); +extern void tr_setup(struct net_device *dev); +extern void fc_setup(struct net_device *dev); +extern void fc_freedev(struct net_device *dev); +/* Support for loadable net-drivers */ +extern int register_netdev(struct net_device *dev); +extern void unregister_netdev(struct net_device *dev); +/* Functions used for multicast support */ +extern void dev_mc_upload(struct net_device *dev); +extern int dev_mc_delete(struct net_device *dev, void *addr, int alen, int all); +extern int dev_mc_add(struct net_device *dev, void *addr, int alen, int newonly); +extern void dev_mc_discard(struct net_device *dev); +extern void dev_set_promiscuity(struct net_device *dev, int inc); +extern void dev_set_allmulti(struct net_device *dev, int inc); +extern void netdev_state_change(struct net_device *dev); +/* Load a device via the kmod */ +extern void dev_load(const char *name); +extern void dev_mcast_init(void); +extern int netdev_register_fc(struct net_device *dev, void (*stimul)(struct net_device *dev)); +extern void netdev_unregister_fc(int bit); +extern int netdev_max_backlog; +extern unsigned long netdev_fc_xoff; +extern atomic_t netdev_dropping; +extern int netdev_set_master(struct net_device *dev, struct net_device *master); +extern struct sk_buff * skb_checksum_help(struct sk_buff *skb); + +#endif /* __KERNEL__ */ + +#endif /* _LINUX_DEV_H */ diff --git a/xen-2.4.16/include/xeno/pci.h b/xen-2.4.16/include/xeno/pci.h new file mode 100644 index 0000000000..15e51d2cdb --- /dev/null +++ b/xen-2.4.16/include/xeno/pci.h @@ -0,0 +1,761 @@ +/* + * $Id: pci.h,v 1.87 1998/10/11 15:13:12 mj Exp $ + * + * PCI defines and function prototypes + * Copyright 1994, Drew Eckhardt + * Copyright 1997--1999 Martin Mares + * + * For more information, please consult the following manuals (look at + * http://www.pcisig.com/ for how to get them): + * + * PCI BIOS Specification + * PCI Local Bus Specification + * PCI to PCI Bridge Specification + * PCI System Design Guide + */ + +#ifndef LINUX_PCI_H +#define LINUX_PCI_H + +/* + * Under PCI, each device has 256 bytes of configuration address space, + * of which the first 64 bytes are standardized as follows: + */ +#define PCI_VENDOR_ID 0x00 /* 16 bits */ +#define PCI_DEVICE_ID 0x02 /* 16 bits */ +#define PCI_COMMAND 0x04 /* 16 bits */ +#define PCI_COMMAND_IO 0x1 /* Enable response in I/O space */ +#define PCI_COMMAND_MEMORY 0x2 /* Enable response in Memory space */ +#define PCI_COMMAND_MASTER 0x4 /* Enable bus mastering */ +#define PCI_COMMAND_SPECIAL 0x8 /* Enable response to special cycles */ +#define PCI_COMMAND_INVALIDATE 0x10 /* Use memory write and invalidate */ +#define PCI_COMMAND_VGA_PALETTE 0x20 /* Enable palette snooping */ +#define PCI_COMMAND_PARITY 0x40 /* Enable parity checking */ +#define PCI_COMMAND_WAIT 0x80 /* Enable address/data stepping */ +#define PCI_COMMAND_SERR 0x100 /* Enable SERR */ +#define PCI_COMMAND_FAST_BACK 0x200 /* Enable back-to-back writes */ + +#define PCI_STATUS 0x06 /* 16 bits */ +#define PCI_STATUS_CAP_LIST 0x10 /* Support Capability List */ +#define PCI_STATUS_66MHZ 0x20 /* Support 66 Mhz PCI 2.1 bus */ +#define PCI_STATUS_UDF 0x40 /* Support User Definable Features [obsolete] */ +#define PCI_STATUS_FAST_BACK 0x80 /* Accept fast-back to back */ +#define PCI_STATUS_PARITY 0x100 /* Detected parity error */ +#define PCI_STATUS_DEVSEL_MASK 0x600 /* DEVSEL timing */ +#define PCI_STATUS_DEVSEL_FAST 0x000 +#define PCI_STATUS_DEVSEL_MEDIUM 0x200 +#define PCI_STATUS_DEVSEL_SLOW 0x400 +#define PCI_STATUS_SIG_TARGET_ABORT 0x800 /* Set on target abort */ +#define PCI_STATUS_REC_TARGET_ABORT 0x1000 /* Master ack of " */ +#define PCI_STATUS_REC_MASTER_ABORT 0x2000 /* Set on master abort */ +#define PCI_STATUS_SIG_SYSTEM_ERROR 0x4000 /* Set when we drive SERR */ +#define PCI_STATUS_DETECTED_PARITY 0x8000 /* Set on parity error */ + +#define PCI_CLASS_REVISION 0x08 /* High 24 bits are class, low 8 + revision */ +#define PCI_REVISION_ID 0x08 /* Revision ID */ +#define PCI_CLASS_PROG 0x09 /* Reg. Level Programming Interface */ +#define PCI_CLASS_DEVICE 0x0a /* Device class */ + +#define PCI_CACHE_LINE_SIZE 0x0c /* 8 bits */ +#define PCI_LATENCY_TIMER 0x0d /* 8 bits */ +#define PCI_HEADER_TYPE 0x0e /* 8 bits */ +#define PCI_HEADER_TYPE_NORMAL 0 +#define PCI_HEADER_TYPE_BRIDGE 1 +#define PCI_HEADER_TYPE_CARDBUS 2 + +#define PCI_BIST 0x0f /* 8 bits */ +#define PCI_BIST_CODE_MASK 0x0f /* Return result */ +#define PCI_BIST_START 0x40 /* 1 to start BIST, 2 secs or less */ +#define PCI_BIST_CAPABLE 0x80 /* 1 if BIST capable */ + +/* + * Base addresses specify locations in memory or I/O space. + * Decoded size can be determined by writing a value of + * 0xffffffff to the register, and reading it back. Only + * 1 bits are decoded. + */ +#define PCI_BASE_ADDRESS_0 0x10 /* 32 bits */ +#define PCI_BASE_ADDRESS_1 0x14 /* 32 bits [htype 0,1 only] */ +#define PCI_BASE_ADDRESS_2 0x18 /* 32 bits [htype 0 only] */ +#define PCI_BASE_ADDRESS_3 0x1c /* 32 bits */ +#define PCI_BASE_ADDRESS_4 0x20 /* 32 bits */ +#define PCI_BASE_ADDRESS_5 0x24 /* 32 bits */ +#define PCI_BASE_ADDRESS_SPACE 0x01 /* 0 = memory, 1 = I/O */ +#define PCI_BASE_ADDRESS_SPACE_IO 0x01 +#define PCI_BASE_ADDRESS_SPACE_MEMORY 0x00 +#define PCI_BASE_ADDRESS_MEM_TYPE_MASK 0x06 +#define PCI_BASE_ADDRESS_MEM_TYPE_32 0x00 /* 32 bit address */ +#define PCI_BASE_ADDRESS_MEM_TYPE_1M 0x02 /* Below 1M [obsolete] */ +#define PCI_BASE_ADDRESS_MEM_TYPE_64 0x04 /* 64 bit address */ +#define PCI_BASE_ADDRESS_MEM_PREFETCH 0x08 /* prefetchable? */ +#define PCI_BASE_ADDRESS_MEM_MASK (~0x0fUL) +#define PCI_BASE_ADDRESS_IO_MASK (~0x03UL) +/* bit 1 is reserved if address_space = 1 */ + +/* Header type 0 (normal devices) */ +#define PCI_CARDBUS_CIS 0x28 +#define PCI_SUBSYSTEM_VENDOR_ID 0x2c +#define PCI_SUBSYSTEM_ID 0x2e +#define PCI_ROM_ADDRESS 0x30 /* Bits 31..11 are address, 10..1 reserved */ +#define PCI_ROM_ADDRESS_ENABLE 0x01 +#define PCI_ROM_ADDRESS_MASK (~0x7ffUL) + +#define PCI_CAPABILITY_LIST 0x34 /* Offset of first capability list entry */ + +/* 0x35-0x3b are reserved */ +#define PCI_INTERRUPT_LINE 0x3c /* 8 bits */ +#define PCI_INTERRUPT_PIN 0x3d /* 8 bits */ +#define PCI_MIN_GNT 0x3e /* 8 bits */ +#define PCI_MAX_LAT 0x3f /* 8 bits */ + +/* Header type 1 (PCI-to-PCI bridges) */ +#define PCI_PRIMARY_BUS 0x18 /* Primary bus number */ +#define PCI_SECONDARY_BUS 0x19 /* Secondary bus number */ +#define PCI_SUBORDINATE_BUS 0x1a /* Highest bus number behind the bridge */ +#define PCI_SEC_LATENCY_TIMER 0x1b /* Latency timer for secondary interface */ +#define PCI_IO_BASE 0x1c /* I/O range behind the bridge */ +#define PCI_IO_LIMIT 0x1d +#define PCI_IO_RANGE_TYPE_MASK 0x0fUL /* I/O bridging type */ +#define PCI_IO_RANGE_TYPE_16 0x00 +#define PCI_IO_RANGE_TYPE_32 0x01 +#define PCI_IO_RANGE_MASK (~0x0fUL) +#define PCI_SEC_STATUS 0x1e /* Secondary status register, only bit 14 used */ +#define PCI_MEMORY_BASE 0x20 /* Memory range behind */ +#define PCI_MEMORY_LIMIT 0x22 +#define PCI_MEMORY_RANGE_TYPE_MASK 0x0fUL +#define PCI_MEMORY_RANGE_MASK (~0x0fUL) +#define PCI_PREF_MEMORY_BASE 0x24 /* Prefetchable memory range behind */ +#define PCI_PREF_MEMORY_LIMIT 0x26 +#define PCI_PREF_RANGE_TYPE_MASK 0x0fUL +#define PCI_PREF_RANGE_TYPE_32 0x00 +#define PCI_PREF_RANGE_TYPE_64 0x01 +#define PCI_PREF_RANGE_MASK (~0x0fUL) +#define PCI_PREF_BASE_UPPER32 0x28 /* Upper half of prefetchable memory range */ +#define PCI_PREF_LIMIT_UPPER32 0x2c +#define PCI_IO_BASE_UPPER16 0x30 /* Upper half of I/O addresses */ +#define PCI_IO_LIMIT_UPPER16 0x32 +/* 0x34 same as for htype 0 */ +/* 0x35-0x3b is reserved */ +#define PCI_ROM_ADDRESS1 0x38 /* Same as PCI_ROM_ADDRESS, but for htype 1 */ +/* 0x3c-0x3d are same as for htype 0 */ +#define PCI_BRIDGE_CONTROL 0x3e +#define PCI_BRIDGE_CTL_PARITY 0x01 /* Enable parity detection on secondary interface */ +#define PCI_BRIDGE_CTL_SERR 0x02 /* The same for SERR forwarding */ +#define PCI_BRIDGE_CTL_NO_ISA 0x04 /* Disable bridging of ISA ports */ +#define PCI_BRIDGE_CTL_VGA 0x08 /* Forward VGA addresses */ +#define PCI_BRIDGE_CTL_MASTER_ABORT 0x20 /* Report master aborts */ +#define PCI_BRIDGE_CTL_BUS_RESET 0x40 /* Secondary bus reset */ +#define PCI_BRIDGE_CTL_FAST_BACK 0x80 /* Fast Back2Back enabled on secondary interface */ + +/* Header type 2 (CardBus bridges) */ +#define PCI_CB_CAPABILITY_LIST 0x14 +/* 0x15 reserved */ +#define PCI_CB_SEC_STATUS 0x16 /* Secondary status */ +#define PCI_CB_PRIMARY_BUS 0x18 /* PCI bus number */ +#define PCI_CB_CARD_BUS 0x19 /* CardBus bus number */ +#define PCI_CB_SUBORDINATE_BUS 0x1a /* Subordinate bus number */ +#define PCI_CB_LATENCY_TIMER 0x1b /* CardBus latency timer */ +#define PCI_CB_MEMORY_BASE_0 0x1c +#define PCI_CB_MEMORY_LIMIT_0 0x20 +#define PCI_CB_MEMORY_BASE_1 0x24 +#define PCI_CB_MEMORY_LIMIT_1 0x28 +#define PCI_CB_IO_BASE_0 0x2c +#define PCI_CB_IO_BASE_0_HI 0x2e +#define PCI_CB_IO_LIMIT_0 0x30 +#define PCI_CB_IO_LIMIT_0_HI 0x32 +#define PCI_CB_IO_BASE_1 0x34 +#define PCI_CB_IO_BASE_1_HI 0x36 +#define PCI_CB_IO_LIMIT_1 0x38 +#define PCI_CB_IO_LIMIT_1_HI 0x3a +#define PCI_CB_IO_RANGE_MASK (~0x03UL) +/* 0x3c-0x3d are same as for htype 0 */ +#define PCI_CB_BRIDGE_CONTROL 0x3e +#define PCI_CB_BRIDGE_CTL_PARITY 0x01 /* Similar to standard bridge control register */ +#define PCI_CB_BRIDGE_CTL_SERR 0x02 +#define PCI_CB_BRIDGE_CTL_ISA 0x04 +#define PCI_CB_BRIDGE_CTL_VGA 0x08 +#define PCI_CB_BRIDGE_CTL_MASTER_ABORT 0x20 +#define PCI_CB_BRIDGE_CTL_CB_RESET 0x40 /* CardBus reset */ +#define PCI_CB_BRIDGE_CTL_16BIT_INT 0x80 /* Enable interrupt for 16-bit cards */ +#define PCI_CB_BRIDGE_CTL_PREFETCH_MEM0 0x100 /* Prefetch enable for both memory regions */ +#define PCI_CB_BRIDGE_CTL_PREFETCH_MEM1 0x200 +#define PCI_CB_BRIDGE_CTL_POST_WRITES 0x400 +#define PCI_CB_SUBSYSTEM_VENDOR_ID 0x40 +#define PCI_CB_SUBSYSTEM_ID 0x42 +#define PCI_CB_LEGACY_MODE_BASE 0x44 /* 16-bit PC Card legacy mode base address (ExCa) */ +/* 0x48-0x7f reserved */ + +/* Capability lists */ + +#define PCI_CAP_LIST_ID 0 /* Capability ID */ +#define PCI_CAP_ID_PM 0x01 /* Power Management */ +#define PCI_CAP_ID_AGP 0x02 /* Accelerated Graphics Port */ +#define PCI_CAP_ID_VPD 0x03 /* Vital Product Data */ +#define PCI_CAP_ID_SLOTID 0x04 /* Slot Identification */ +#define PCI_CAP_ID_MSI 0x05 /* Message Signalled Interrupts */ +#define PCI_CAP_ID_CHSWP 0x06 /* CompactPCI HotSwap */ +#define PCI_CAP_LIST_NEXT 1 /* Next capability in the list */ +#define PCI_CAP_FLAGS 2 /* Capability defined flags (16 bits) */ +#define PCI_CAP_SIZEOF 4 + +/* Power Management Registers */ + +#define PCI_PM_PMC 2 /* PM Capabilities Register */ +#define PCI_PM_CAP_VER_MASK 0x0007 /* Version */ +#define PCI_PM_CAP_PME_CLOCK 0x0008 /* PME clock required */ +#define PCI_PM_CAP_RESERVED 0x0010 /* Reserved field */ +#define PCI_PM_CAP_DSI 0x0020 /* Device specific initialization */ +#define PCI_PM_CAP_AUX_POWER 0x01C0 /* Auxilliary power support mask */ +#define PCI_PM_CAP_D1 0x0200 /* D1 power state support */ +#define PCI_PM_CAP_D2 0x0400 /* D2 power state support */ +#define PCI_PM_CAP_PME 0x0800 /* PME pin supported */ +#define PCI_PM_CAP_PME_MASK 0xF800 /* PME Mask of all supported states */ +#define PCI_PM_CAP_PME_D0 0x0800 /* PME# from D0 */ +#define PCI_PM_CAP_PME_D1 0x1000 /* PME# from D1 */ +#define PCI_PM_CAP_PME_D2 0x2000 /* PME# from D2 */ +#define PCI_PM_CAP_PME_D3 0x4000 /* PME# from D3 (hot) */ +#define PCI_PM_CAP_PME_D3cold 0x8000 /* PME# from D3 (cold) */ +#define PCI_PM_CTRL 4 /* PM control and status register */ +#define PCI_PM_CTRL_STATE_MASK 0x0003 /* Current power state (D0 to D3) */ +#define PCI_PM_CTRL_PME_ENABLE 0x0100 /* PME pin enable */ +#define PCI_PM_CTRL_DATA_SEL_MASK 0x1e00 /* Data select (??) */ +#define PCI_PM_CTRL_DATA_SCALE_MASK 0x6000 /* Data scale (??) */ +#define PCI_PM_CTRL_PME_STATUS 0x8000 /* PME pin status */ +#define PCI_PM_PPB_EXTENSIONS 6 /* PPB support extensions (??) */ +#define PCI_PM_PPB_B2_B3 0x40 /* Stop clock when in D3hot (??) */ +#define PCI_PM_BPCC_ENABLE 0x80 /* Bus power/clock control enable (??) */ +#define PCI_PM_DATA_REGISTER 7 /* (??) */ +#define PCI_PM_SIZEOF 8 + +/* AGP registers */ + +#define PCI_AGP_VERSION 2 /* BCD version number */ +#define PCI_AGP_RFU 3 /* Rest of capability flags */ +#define PCI_AGP_STATUS 4 /* Status register */ +#define PCI_AGP_STATUS_RQ_MASK 0xff000000 /* Maximum number of requests - 1 */ +#define PCI_AGP_STATUS_SBA 0x0200 /* Sideband addressing supported */ +#define PCI_AGP_STATUS_64BIT 0x0020 /* 64-bit addressing supported */ +#define PCI_AGP_STATUS_FW 0x0010 /* FW transfers supported */ +#define PCI_AGP_STATUS_RATE4 0x0004 /* 4x transfer rate supported */ +#define PCI_AGP_STATUS_RATE2 0x0002 /* 2x transfer rate supported */ +#define PCI_AGP_STATUS_RATE1 0x0001 /* 1x transfer rate supported */ +#define PCI_AGP_COMMAND 8 /* Control register */ +#define PCI_AGP_COMMAND_RQ_MASK 0xff000000 /* Master: Maximum number of requests */ +#define PCI_AGP_COMMAND_SBA 0x0200 /* Sideband addressing enabled */ +#define PCI_AGP_COMMAND_AGP 0x0100 /* Allow processing of AGP transactions */ +#define PCI_AGP_COMMAND_64BIT 0x0020 /* Allow processing of 64-bit addresses */ +#define PCI_AGP_COMMAND_FW 0x0010 /* Force FW transfers */ +#define PCI_AGP_COMMAND_RATE4 0x0004 /* Use 4x rate */ +#define PCI_AGP_COMMAND_RATE2 0x0002 /* Use 2x rate */ +#define PCI_AGP_COMMAND_RATE1 0x0001 /* Use 1x rate */ +#define PCI_AGP_SIZEOF 12 + +/* Slot Identification */ + +#define PCI_SID_ESR 2 /* Expansion Slot Register */ +#define PCI_SID_ESR_NSLOTS 0x1f /* Number of expansion slots available */ +#define PCI_SID_ESR_FIC 0x20 /* First In Chassis Flag */ +#define PCI_SID_CHASSIS_NR 3 /* Chassis Number */ + +/* Message Signalled Interrupts registers */ + +#define PCI_MSI_FLAGS 2 /* Various flags */ +#define PCI_MSI_FLAGS_64BIT 0x80 /* 64-bit addresses allowed */ +#define PCI_MSI_FLAGS_QSIZE 0x70 /* Message queue size configured */ +#define PCI_MSI_FLAGS_QMASK 0x0e /* Maximum queue size available */ +#define PCI_MSI_FLAGS_ENABLE 0x01 /* MSI feature enabled */ +#define PCI_MSI_RFU 3 /* Rest of capability flags */ +#define PCI_MSI_ADDRESS_LO 4 /* Lower 32 bits */ +#define PCI_MSI_ADDRESS_HI 8 /* Upper 32 bits (if PCI_MSI_FLAGS_64BIT set) */ +#define PCI_MSI_DATA_32 8 /* 16 bits of data for 32-bit devices */ +#define PCI_MSI_DATA_64 12 /* 16 bits of data for 64-bit devices */ + +/* Include the ID list */ + +#include + +/* + * The PCI interface treats multi-function devices as independent + * devices. The slot/function address of each device is encoded + * in a single byte as follows: + * + * 7:3 = slot + * 2:0 = function + */ +#define PCI_DEVFN(slot,func) ((((slot) & 0x1f) << 3) | ((func) & 0x07)) +#define PCI_SLOT(devfn) (((devfn) >> 3) & 0x1f) +#define PCI_FUNC(devfn) ((devfn) & 0x07) + +/* Ioctls for /proc/bus/pci/X/Y nodes. */ +#define PCIIOC_BASE ('P' << 24 | 'C' << 16 | 'I' << 8) +#define PCIIOC_CONTROLLER (PCIIOC_BASE | 0x00) /* Get controller for PCI device. */ +#define PCIIOC_MMAP_IS_IO (PCIIOC_BASE | 0x01) /* Set mmap state to I/O space. */ +#define PCIIOC_MMAP_IS_MEM (PCIIOC_BASE | 0x02) /* Set mmap state to MEM space. */ +#define PCIIOC_WRITE_COMBINE (PCIIOC_BASE | 0x03) /* Enable/disable write-combining. */ + +#ifdef __KERNEL__ + +#include +#include +#include +#include +#include + +/* File state for mmap()s on /proc/bus/pci/X/Y */ +enum pci_mmap_state { + pci_mmap_io, + pci_mmap_mem +}; + +/* This defines the direction arg to the DMA mapping routines. */ +#define PCI_DMA_BIDIRECTIONAL 0 +#define PCI_DMA_TODEVICE 1 +#define PCI_DMA_FROMDEVICE 2 +#define PCI_DMA_NONE 3 + +#define DEVICE_COUNT_COMPATIBLE 4 +#define DEVICE_COUNT_IRQ 2 +#define DEVICE_COUNT_DMA 2 +#define DEVICE_COUNT_RESOURCE 12 + +#define PCI_ANY_ID (~0) + +#define pci_present pcibios_present + + +#define pci_for_each_dev_reverse(dev) \ + for(dev = pci_dev_g(pci_devices.prev); dev != pci_dev_g(&pci_devices); dev = pci_dev_g(dev->global_list.prev)) + +#define pci_for_each_bus(bus) \ +for(bus = pci_bus_b(pci_root_buses.next); bus != pci_bus_b(&pci_root_buses); bus = pci_bus_b(bus->node.next)) + +/* + * The pci_dev structure is used to describe both PCI and ISAPnP devices. + */ +struct pci_dev { + struct list_head global_list; /* node in list of all PCI devices */ + struct list_head bus_list; /* node in per-bus list */ + struct pci_bus *bus; /* bus this device is on */ + struct pci_bus *subordinate; /* bus this device bridges to */ + + void *sysdata; /* hook for sys-specific extension */ + struct proc_dir_entry *procent; /* device entry in /proc/bus/pci */ + + unsigned int devfn; /* encoded device & function index */ + unsigned short vendor; + unsigned short device; + unsigned short subsystem_vendor; + unsigned short subsystem_device; + unsigned int class; /* 3 bytes: (base,sub,prog-if) */ + u8 hdr_type; /* PCI header type (`multi' flag masked out) */ + u8 rom_base_reg; /* which config register controls the ROM */ + + struct pci_driver *driver; /* which driver has allocated this device */ + void *driver_data; /* data private to the driver */ + u64 dma_mask; /* Mask of the bits of bus address this + device implements. Normally this is + 0xffffffff. You only need to change + this if your device has broken DMA + or supports 64-bit transfers. */ + + u32 current_state; /* Current operating state. In ACPI-speak, + this is D0-D3, D0 being fully functional, + and D3 being off. */ + + /* device is compatible with these IDs */ + unsigned short vendor_compatible[DEVICE_COUNT_COMPATIBLE]; + unsigned short device_compatible[DEVICE_COUNT_COMPATIBLE]; + + /* + * Instead of touching interrupt line and base address registers + * directly, use the values stored here. They might be different! + */ + unsigned int irq; + struct resource resource[DEVICE_COUNT_RESOURCE]; /* I/O and memory regions + expansion ROMs */ + struct resource dma_resource[DEVICE_COUNT_DMA]; + struct resource irq_resource[DEVICE_COUNT_IRQ]; + + char name[80]; /* device name */ + char slot_name[8]; /* slot name */ + int active; /* ISAPnP: device is active */ + int ro; /* ISAPnP: read only */ + unsigned short regs; /* ISAPnP: supported registers */ + + int (*prepare)(struct pci_dev *dev); /* ISAPnP hooks */ + int (*activate)(struct pci_dev *dev); + int (*deactivate)(struct pci_dev *dev); +}; + +#define pci_dev_g(n) list_entry(n, struct pci_dev, global_list) +#define pci_dev_b(n) list_entry(n, struct pci_dev, bus_list) + +/* + * For PCI devices, the region numbers are assigned this way: + * + * 0-5 standard PCI regions + * 6 expansion ROM + * 7-10 bridges: address space assigned to buses behind the bridge + */ + +#define PCI_ROM_RESOURCE 6 +#define PCI_BRIDGE_RESOURCES 7 +#define PCI_NUM_RESOURCES 11 + +#define PCI_REGION_FLAG_MASK 0x0fU /* These bits of resource flags tell us the PCI region flags */ + +struct pci_bus { + struct list_head node; /* node in list of buses */ + struct pci_bus *parent; /* parent bus this bridge is on */ + struct list_head children; /* list of child buses */ + struct list_head devices; /* list of devices on this bus */ + struct pci_dev *self; /* bridge device as seen by parent */ + struct resource *resource[4]; /* address space routed to this bus */ + + struct pci_ops *ops; /* configuration access functions */ + void *sysdata; /* hook for sys-specific extension */ + struct proc_dir_entry *procdir; /* directory entry in /proc/bus/pci */ + + unsigned char number; /* bus number */ + unsigned char primary; /* number of primary bridge */ + unsigned char secondary; /* number of secondary bridge */ + unsigned char subordinate; /* max number of subordinate buses */ + + char name[48]; + unsigned short vendor; + unsigned short device; + unsigned int serial; /* serial number */ + unsigned char pnpver; /* Plug & Play version */ + unsigned char productver; /* product version */ + unsigned char checksum; /* if zero - checksum passed */ + unsigned char pad1; +}; + +#define pci_bus_b(n) list_entry(n, struct pci_bus, node) + +extern struct list_head pci_root_buses; /* list of all known PCI buses */ +extern struct list_head pci_devices; /* list of all devices */ + +/* + * Error values that may be returned by PCI functions. + */ +#define PCIBIOS_SUCCESSFUL 0x00 +#define PCIBIOS_FUNC_NOT_SUPPORTED 0x81 +#define PCIBIOS_BAD_VENDOR_ID 0x83 +#define PCIBIOS_DEVICE_NOT_FOUND 0x86 +#define PCIBIOS_BAD_REGISTER_NUMBER 0x87 +#define PCIBIOS_SET_FAILED 0x88 +#define PCIBIOS_BUFFER_TOO_SMALL 0x89 + +/* Low-level architecture-dependent routines */ + +struct pci_ops { + int (*read_byte)(struct pci_dev *, int where, u8 *val); + int (*read_word)(struct pci_dev *, int where, u16 *val); + int (*read_dword)(struct pci_dev *, int where, u32 *val); + int (*write_byte)(struct pci_dev *, int where, u8 val); + int (*write_word)(struct pci_dev *, int where, u16 val); + int (*write_dword)(struct pci_dev *, int where, u32 val); +}; + +struct pbus_set_ranges_data +{ + int found_vga; + unsigned long io_start, io_end; + unsigned long mem_start, mem_end; +}; + +struct pci_device_id { + unsigned int vendor, device; /* Vendor and device ID or PCI_ANY_ID */ + unsigned int subvendor, subdevice; /* Subsystem ID's or PCI_ANY_ID */ + unsigned int class, class_mask; /* (class,subclass,prog-if) triplet */ + unsigned long driver_data; /* Data private to the driver */ +}; + +struct pci_driver { + struct list_head node; + char *name; + const struct pci_device_id *id_table; /* NULL if wants all devices */ + int (*probe) (struct pci_dev *dev, const struct pci_device_id *id); /* New device inserted */ + void (*remove) (struct pci_dev *dev); /* Device removed (NULL if not a hot-plug capable driver) */ + int (*save_state) (struct pci_dev *dev, u32 state); /* Save Device Context */ + int (*suspend)(struct pci_dev *dev, u32 state); /* Device suspended */ + int (*resume) (struct pci_dev *dev); /* Device woken up */ + int (*enable_wake) (struct pci_dev *dev, u32 state, int enable); /* Enable wake event */ +}; + + +/* these external functions are only available when PCI support is enabled */ +#ifdef CONFIG_PCI + +#define pci_for_each_dev(dev) \ + for(dev = pci_dev_g(pci_devices.next); dev != pci_dev_g(&pci_devices); dev = pci_dev_g(dev->global_list.next)) + +void pcibios_init(void); +void pcibios_fixup_bus(struct pci_bus *); +int pcibios_enable_device(struct pci_dev *); +char *pcibios_setup (char *str); + +/* Used only when drivers/pci/setup.c is used */ +void pcibios_align_resource(void *, struct resource *, unsigned long); +void pcibios_update_resource(struct pci_dev *, struct resource *, + struct resource *, int); +void pcibios_update_irq(struct pci_dev *, int irq); +void pcibios_fixup_pbus_ranges(struct pci_bus *, struct pbus_set_ranges_data *); + +/* Backward compatibility, don't use in new code! */ + +int pcibios_present(void); +int pcibios_read_config_byte (unsigned char bus, unsigned char dev_fn, + unsigned char where, unsigned char *val); +int pcibios_read_config_word (unsigned char bus, unsigned char dev_fn, + unsigned char where, unsigned short *val); +int pcibios_read_config_dword (unsigned char bus, unsigned char dev_fn, + unsigned char where, unsigned int *val); +int pcibios_write_config_byte (unsigned char bus, unsigned char dev_fn, + unsigned char where, unsigned char val); +int pcibios_write_config_word (unsigned char bus, unsigned char dev_fn, + unsigned char where, unsigned short val); +int pcibios_write_config_dword (unsigned char bus, unsigned char dev_fn, + unsigned char where, unsigned int val); +int pcibios_find_class (unsigned int class_code, unsigned short index, unsigned char *bus, unsigned char *dev_fn); +int pcibios_find_device (unsigned short vendor, unsigned short dev_id, + unsigned short index, unsigned char *bus, + unsigned char *dev_fn); + +/* Generic PCI functions used internally */ + +void pci_init(void); +int pci_bus_exists(const struct list_head *list, int nr); +struct pci_bus *pci_scan_bus(int bus, struct pci_ops *ops, void *sysdata); +struct pci_bus *pci_alloc_primary_bus(int bus); +struct pci_dev *pci_scan_slot(struct pci_dev *temp); +int pci_proc_attach_device(struct pci_dev *dev); +int pci_proc_detach_device(struct pci_dev *dev); +int pci_proc_attach_bus(struct pci_bus *bus); +int pci_proc_detach_bus(struct pci_bus *bus); +void pci_name_device(struct pci_dev *dev); +char *pci_class_name(u32 class); +void pci_read_bridge_bases(struct pci_bus *child); +struct resource *pci_find_parent_resource(const struct pci_dev *dev, struct resource *res); +int pci_setup_device(struct pci_dev *dev); +int pci_get_interrupt_pin(struct pci_dev *dev, struct pci_dev **bridge); + +/* Generic PCI functions exported to card drivers */ + +struct pci_dev *pci_find_device (unsigned int vendor, unsigned int device, const struct pci_dev *from); +struct pci_dev *pci_find_subsys (unsigned int vendor, unsigned int device, + unsigned int ss_vendor, unsigned int ss_device, + const struct pci_dev *from); +struct pci_dev *pci_find_class (unsigned int class, const struct pci_dev *from); +struct pci_dev *pci_find_slot (unsigned int bus, unsigned int devfn); +int pci_find_capability (struct pci_dev *dev, int cap); + +int pci_read_config_byte(struct pci_dev *dev, int where, u8 *val); +int pci_read_config_word(struct pci_dev *dev, int where, u16 *val); +int pci_read_config_dword(struct pci_dev *dev, int where, u32 *val); +int pci_write_config_byte(struct pci_dev *dev, int where, u8 val); +int pci_write_config_word(struct pci_dev *dev, int where, u16 val); +int pci_write_config_dword(struct pci_dev *dev, int where, u32 val); + +int pci_enable_device(struct pci_dev *dev); +void pci_disable_device(struct pci_dev *dev); +void pci_set_master(struct pci_dev *dev); +int pci_set_dma_mask(struct pci_dev *dev, u64 mask); +int pci_dac_set_dma_mask(struct pci_dev *dev, u64 mask); +int pci_assign_resource(struct pci_dev *dev, int i); + +/* Power management related routines */ +int pci_save_state(struct pci_dev *dev, u32 *buffer); +int pci_restore_state(struct pci_dev *dev, u32 *buffer); +int pci_set_power_state(struct pci_dev *dev, int state); +int pci_enable_wake(struct pci_dev *dev, u32 state, int enable); + +/* Helper functions for low-level code (drivers/pci/setup-[bus,res].c) */ + +int pci_claim_resource(struct pci_dev *, int); +void pci_assign_unassigned_resources(void); +void pdev_enable_device(struct pci_dev *); +void pdev_sort_resources(struct pci_dev *, struct resource_list *, u32); +unsigned long pci_bridge_check_io(struct pci_dev *); +void pci_fixup_irqs(u8 (*)(struct pci_dev *, u8 *), + int (*)(struct pci_dev *, u8, u8)); +#define HAVE_PCI_REQ_REGIONS +int pci_request_regions(struct pci_dev *, char *); +void pci_release_regions(struct pci_dev *); + +/* New-style probing supporting hot-pluggable devices */ +int pci_register_driver(struct pci_driver *); +void pci_unregister_driver(struct pci_driver *); +void pci_insert_device(struct pci_dev *, struct pci_bus *); +void pci_remove_device(struct pci_dev *); +struct pci_driver *pci_dev_driver(const struct pci_dev *); +const struct pci_device_id *pci_match_device(const struct pci_device_id *ids, const struct pci_dev *dev); +void pci_announce_device_to_drivers(struct pci_dev *); +unsigned int pci_do_scan_bus(struct pci_bus *bus); +struct pci_bus * pci_add_new_bus(struct pci_bus *parent, struct pci_dev *dev, int busnr); + +#if 0 +/* kmem_cache style wrapper around pci_alloc_consistent() */ +struct pci_pool *pci_pool_create (const char *name, struct pci_dev *dev, + size_t size, size_t align, size_t allocation, int flags); +void pci_pool_destroy (struct pci_pool *pool); + +void *pci_pool_alloc (struct pci_pool *pool, int flags, dma_addr_t *handle); +void pci_pool_free (struct pci_pool *pool, void *vaddr, dma_addr_t addr); +#endif + +#endif /* CONFIG_PCI */ + +/* Include architecture-dependent settings and functions */ + +#include + +/* + * If the system does not have PCI, clearly these return errors. Define + * these as simple inline functions to avoid hair in drivers. + */ + +#ifndef CONFIG_PCI +static inline int pcibios_present(void) { return 0; } +static inline int pcibios_find_class (unsigned int class_code, unsigned short index, unsigned char *bus, unsigned char *dev_fn) +{ return PCIBIOS_DEVICE_NOT_FOUND; } + +#define _PCI_NOP(o,s,t) \ + static inline int pcibios_##o##_config_##s (u8 bus, u8 dfn, u8 where, t val) \ + { return PCIBIOS_FUNC_NOT_SUPPORTED; } \ + static inline int pci_##o##_config_##s (struct pci_dev *dev, int where, t val) \ + { return PCIBIOS_FUNC_NOT_SUPPORTED; } +#define _PCI_NOP_ALL(o,x) _PCI_NOP(o,byte,u8 x) \ + _PCI_NOP(o,word,u16 x) \ + _PCI_NOP(o,dword,u32 x) +_PCI_NOP_ALL(read, *) +_PCI_NOP_ALL(write,) + +static inline struct pci_dev *pci_find_device(unsigned int vendor, unsigned int device, const struct pci_dev *from) +{ return NULL; } + +static inline struct pci_dev *pci_find_class(unsigned int class, const struct pci_dev *from) +{ return NULL; } + +static inline struct pci_dev *pci_find_slot(unsigned int bus, unsigned int devfn) +{ return NULL; } + +static inline struct pci_dev *pci_find_subsys(unsigned int vendor, unsigned int device, +unsigned int ss_vendor, unsigned int ss_device, const struct pci_dev *from) +{ return NULL; } + +static inline void pci_set_master(struct pci_dev *dev) { } +static inline int pci_enable_device(struct pci_dev *dev) { return -EIO; } +static inline void pci_disable_device(struct pci_dev *dev) { } +static inline int pci_module_init(struct pci_driver *drv) { return -ENODEV; } +static inline int pci_set_dma_mask(struct pci_dev *dev, u64 mask) { return -EIO; } +static inline int pci_dac_set_dma_mask(struct pci_dev *dev, u64 mask) { return -EIO; } +static inline int pci_assign_resource(struct pci_dev *dev, int i) { return -EBUSY;} +static inline int pci_register_driver(struct pci_driver *drv) { return 0;} +static inline void pci_unregister_driver(struct pci_driver *drv) { } +static inline int scsi_to_pci_dma_dir(unsigned char scsi_dir) { return scsi_dir; } +static inline int pci_find_capability (struct pci_dev *dev, int cap) {return 0; } +static inline const struct pci_device_id *pci_match_device(const struct pci_device_id *ids, const struct pci_dev *dev) { return NULL; } + +/* Power management related routines */ +static inline int pci_save_state(struct pci_dev *dev, u32 *buffer) { return 0; } +static inline int pci_restore_state(struct pci_dev *dev, u32 *buffer) { return 0; } +static inline int pci_set_power_state(struct pci_dev *dev, int state) { return 0; } +static inline int pci_enable_wake(struct pci_dev *dev, u32 state, int enable) { return 0; } + +#define pci_for_each_dev(dev) \ + for(dev = NULL; 0; ) + +#else + +/* + * a helper function which helps ensure correct pci_driver + * setup and cleanup for commonly-encountered hotplug/modular cases + * + * This MUST stay in a header, as it checks for -DMODULE + */ +static inline int pci_module_init(struct pci_driver *drv) +{ + int rc = pci_register_driver (drv); + + if (rc > 0) + return 0; + + /* iff CONFIG_HOTPLUG and built into kernel, we should + * leave the driver around for future hotplug events. + * For the module case, a hotplug daemon of some sort + * should load a module in response to an insert event. */ +#if defined(CONFIG_HOTPLUG) && !defined(MODULE) + if (rc == 0) + return 0; +#else + if (rc == 0) + rc = -ENODEV; +#endif + + /* if we get here, we need to clean up pci driver instance + * and return some sort of error */ + pci_unregister_driver (drv); + + return rc; +} + +#endif /* !CONFIG_PCI */ + +/* these helpers provide future and backwards compatibility + * for accessing popular PCI BAR info */ +#define pci_resource_start(dev,bar) ((dev)->resource[(bar)].start) +#define pci_resource_end(dev,bar) ((dev)->resource[(bar)].end) +#define pci_resource_flags(dev,bar) ((dev)->resource[(bar)].flags) +#define pci_resource_len(dev,bar) \ + ((pci_resource_start((dev),(bar)) == 0 && \ + pci_resource_end((dev),(bar)) == \ + pci_resource_start((dev),(bar))) ? 0 : \ + \ + (pci_resource_end((dev),(bar)) - \ + pci_resource_start((dev),(bar)) + 1)) + +/* Similar to the helpers above, these manipulate per-pci_dev + * driver-specific data. Currently stored as pci_dev::driver_data, + * a void pointer, but it is not present on older kernels. + */ +static inline void *pci_get_drvdata (struct pci_dev *pdev) +{ + return pdev->driver_data; +} + +static inline void pci_set_drvdata (struct pci_dev *pdev, void *data) +{ + pdev->driver_data = data; +} + +/* + * The world is not perfect and supplies us with broken PCI devices. + * For at least a part of these bugs we need a work-around, so both + * generic (drivers/pci/quirks.c) and per-architecture code can define + * fixup hooks to be called for particular buggy devices. + */ + +struct pci_fixup { + int pass; + u16 vendor, device; /* You can use PCI_ANY_ID here of course */ + void (*hook)(struct pci_dev *dev); +}; + +extern struct pci_fixup pcibios_fixups[]; + +#define PCI_FIXUP_HEADER 1 /* Called immediately after reading configuration header */ +#define PCI_FIXUP_FINAL 2 /* Final phase of device fixups */ + +void pci_fixup_device(int pass, struct pci_dev *dev); + +extern int pci_pci_problems; +#define PCIPCI_FAIL 1 +#define PCIPCI_TRITON 2 +#define PCIPCI_NATOMA 4 +#define PCIPCI_VIAETBF 8 +#define PCIPCI_VSFX 16 + +#endif /* __KERNEL__ */ +#endif /* LINUX_PCI_H */ diff --git a/xen-2.4.16/include/xeno/pci_ids.h b/xen-2.4.16/include/xeno/pci_ids.h new file mode 100644 index 0000000000..4149c50a2d --- /dev/null +++ b/xen-2.4.16/include/xeno/pci_ids.h @@ -0,0 +1,1803 @@ +/* + * PCI Class, Vendor and Device IDs + * + * Please keep sorted. + */ + +/* Device classes and subclasses */ + +#define PCI_CLASS_NOT_DEFINED 0x0000 +#define PCI_CLASS_NOT_DEFINED_VGA 0x0001 + +#define PCI_BASE_CLASS_STORAGE 0x01 +#define PCI_CLASS_STORAGE_SCSI 0x0100 +#define PCI_CLASS_STORAGE_IDE 0x0101 +#define PCI_CLASS_STORAGE_FLOPPY 0x0102 +#define PCI_CLASS_STORAGE_IPI 0x0103 +#define PCI_CLASS_STORAGE_RAID 0x0104 +#define PCI_CLASS_STORAGE_OTHER 0x0180 + +#define PCI_BASE_CLASS_NETWORK 0x02 +#define PCI_CLASS_NETWORK_ETHERNET 0x0200 +#define PCI_CLASS_NETWORK_TOKEN_RING 0x0201 +#define PCI_CLASS_NETWORK_FDDI 0x0202 +#define PCI_CLASS_NETWORK_ATM 0x0203 +#define PCI_CLASS_NETWORK_OTHER 0x0280 + +#define PCI_BASE_CLASS_DISPLAY 0x03 +#define PCI_CLASS_DISPLAY_VGA 0x0300 +#define PCI_CLASS_DISPLAY_XGA 0x0301 +#define PCI_CLASS_DISPLAY_3D 0x0302 +#define PCI_CLASS_DISPLAY_OTHER 0x0380 + +#define PCI_BASE_CLASS_MULTIMEDIA 0x04 +#define PCI_CLASS_MULTIMEDIA_VIDEO 0x0400 +#define PCI_CLASS_MULTIMEDIA_AUDIO 0x0401 +#define PCI_CLASS_MULTIMEDIA_PHONE 0x0402 +#define PCI_CLASS_MULTIMEDIA_OTHER 0x0480 + +#define PCI_BASE_CLASS_MEMORY 0x05 +#define PCI_CLASS_MEMORY_RAM 0x0500 +#define PCI_CLASS_MEMORY_FLASH 0x0501 +#define PCI_CLASS_MEMORY_OTHER 0x0580 + +#define PCI_BASE_CLASS_BRIDGE 0x06 +#define PCI_CLASS_BRIDGE_HOST 0x0600 +#define PCI_CLASS_BRIDGE_ISA 0x0601 +#define PCI_CLASS_BRIDGE_EISA 0x0602 +#define PCI_CLASS_BRIDGE_MC 0x0603 +#define PCI_CLASS_BRIDGE_PCI 0x0604 +#define PCI_CLASS_BRIDGE_PCMCIA 0x0605 +#define PCI_CLASS_BRIDGE_NUBUS 0x0606 +#define PCI_CLASS_BRIDGE_CARDBUS 0x0607 +#define PCI_CLASS_BRIDGE_RACEWAY 0x0608 +#define PCI_CLASS_BRIDGE_OTHER 0x0680 + +#define PCI_BASE_CLASS_COMMUNICATION 0x07 +#define PCI_CLASS_COMMUNICATION_SERIAL 0x0700 +#define PCI_CLASS_COMMUNICATION_PARALLEL 0x0701 +#define PCI_CLASS_COMMUNICATION_MULTISERIAL 0x0702 +#define PCI_CLASS_COMMUNICATION_MODEM 0x0703 +#define PCI_CLASS_COMMUNICATION_OTHER 0x0780 + +#define PCI_BASE_CLASS_SYSTEM 0x08 +#define PCI_CLASS_SYSTEM_PIC 0x0800 +#define PCI_CLASS_SYSTEM_DMA 0x0801 +#define PCI_CLASS_SYSTEM_TIMER 0x0802 +#define PCI_CLASS_SYSTEM_RTC 0x0803 +#define PCI_CLASS_SYSTEM_PCI_HOTPLUG 0x0804 +#define PCI_CLASS_SYSTEM_OTHER 0x0880 + +#define PCI_BASE_CLASS_INPUT 0x09 +#define PCI_CLASS_INPUT_KEYBOARD 0x0900 +#define PCI_CLASS_INPUT_PEN 0x0901 +#define PCI_CLASS_INPUT_MOUSE 0x0902 +#define PCI_CLASS_INPUT_SCANNER 0x0903 +#define PCI_CLASS_INPUT_GAMEPORT 0x0904 +#define PCI_CLASS_INPUT_OTHER 0x0980 + +#define PCI_BASE_CLASS_DOCKING 0x0a +#define PCI_CLASS_DOCKING_GENERIC 0x0a00 +#define PCI_CLASS_DOCKING_OTHER 0x0a80 + +#define PCI_BASE_CLASS_PROCESSOR 0x0b +#define PCI_CLASS_PROCESSOR_386 0x0b00 +#define PCI_CLASS_PROCESSOR_486 0x0b01 +#define PCI_CLASS_PROCESSOR_PENTIUM 0x0b02 +#define PCI_CLASS_PROCESSOR_ALPHA 0x0b10 +#define PCI_CLASS_PROCESSOR_POWERPC 0x0b20 +#define PCI_CLASS_PROCESSOR_MIPS 0x0b30 +#define PCI_CLASS_PROCESSOR_CO 0x0b40 + +#define PCI_BASE_CLASS_SERIAL 0x0c +#define PCI_CLASS_SERIAL_FIREWIRE 0x0c00 +#define PCI_CLASS_SERIAL_ACCESS 0x0c01 +#define PCI_CLASS_SERIAL_SSA 0x0c02 +#define PCI_CLASS_SERIAL_USB 0x0c03 +#define PCI_CLASS_SERIAL_FIBER 0x0c04 +#define PCI_CLASS_SERIAL_SMBUS 0x0c05 + +#define PCI_BASE_CLASS_INTELLIGENT 0x0e +#define PCI_CLASS_INTELLIGENT_I2O 0x0e00 + +#define PCI_BASE_CLASS_SATELLITE 0x0f +#define PCI_CLASS_SATELLITE_TV 0x0f00 +#define PCI_CLASS_SATELLITE_AUDIO 0x0f01 +#define PCI_CLASS_SATELLITE_VOICE 0x0f03 +#define PCI_CLASS_SATELLITE_DATA 0x0f04 + +#define PCI_BASE_CLASS_CRYPT 0x10 +#define PCI_CLASS_CRYPT_NETWORK 0x1000 +#define PCI_CLASS_CRYPT_ENTERTAINMENT 0x1001 +#define PCI_CLASS_CRYPT_OTHER 0x1080 + +#define PCI_BASE_CLASS_SIGNAL_PROCESSING 0x11 +#define PCI_CLASS_SP_DPIO 0x1100 +#define PCI_CLASS_SP_OTHER 0x1180 + +#define PCI_CLASS_OTHERS 0xff + +/* Vendors and devices. Sort key: vendor first, device next. */ + +#define PCI_VENDOR_ID_DYNALINK 0x0675 +#define PCI_DEVICE_ID_DYNALINK_IS64PH 0x1702 + +#define PCI_VENDOR_ID_BERKOM 0x0871 +#define PCI_DEVICE_ID_BERKOM_A1T 0xffa1 +#define PCI_DEVICE_ID_BERKOM_T_CONCEPT 0xffa2 +#define PCI_DEVICE_ID_BERKOM_A4T 0xffa4 +#define PCI_DEVICE_ID_BERKOM_SCITEL_QUADRO 0xffa8 + +#define PCI_VENDOR_ID_COMPAQ 0x0e11 +#define PCI_DEVICE_ID_COMPAQ_TOKENRING 0x0508 +#define PCI_DEVICE_ID_COMPAQ_1280 0x3033 +#define PCI_DEVICE_ID_COMPAQ_TRIFLEX 0x4000 +#define PCI_DEVICE_ID_COMPAQ_6010 0x6010 +#define PCI_DEVICE_ID_COMPAQ_TACHYON 0xa0fc +#define PCI_DEVICE_ID_COMPAQ_SMART2P 0xae10 +#define PCI_DEVICE_ID_COMPAQ_NETEL100 0xae32 +#define PCI_DEVICE_ID_COMPAQ_NETEL10 0xae34 +#define PCI_DEVICE_ID_COMPAQ_NETFLEX3I 0xae35 +#define PCI_DEVICE_ID_COMPAQ_NETEL100D 0xae40 +#define PCI_DEVICE_ID_COMPAQ_NETEL100PI 0xae43 +#define PCI_DEVICE_ID_COMPAQ_NETEL100I 0xb011 +#define PCI_DEVICE_ID_COMPAQ_CISS 0xb060 +#define PCI_DEVICE_ID_COMPAQ_CISSB 0xb178 +#define PCI_DEVICE_ID_COMPAQ_THUNDER 0xf130 +#define PCI_DEVICE_ID_COMPAQ_NETFLEX3B 0xf150 + +#define PCI_VENDOR_ID_NCR 0x1000 +#define PCI_VENDOR_ID_LSI_LOGIC 0x1000 +#define PCI_DEVICE_ID_NCR_53C810 0x0001 +#define PCI_DEVICE_ID_NCR_53C820 0x0002 +#define PCI_DEVICE_ID_NCR_53C825 0x0003 +#define PCI_DEVICE_ID_NCR_53C815 0x0004 +#define PCI_DEVICE_ID_LSI_53C810AP 0x0005 +#define PCI_DEVICE_ID_NCR_53C860 0x0006 +#define PCI_DEVICE_ID_LSI_53C1510 0x000a +#define PCI_DEVICE_ID_NCR_53C896 0x000b +#define PCI_DEVICE_ID_NCR_53C895 0x000c +#define PCI_DEVICE_ID_NCR_53C885 0x000d +#define PCI_DEVICE_ID_NCR_53C875 0x000f +#define PCI_DEVICE_ID_NCR_53C1510 0x0010 +#define PCI_DEVICE_ID_LSI_53C895A 0x0012 +#define PCI_DEVICE_ID_LSI_53C875A 0x0013 +#define PCI_DEVICE_ID_LSI_53C1010_33 0x0020 +#define PCI_DEVICE_ID_LSI_53C1010_66 0x0021 +#define PCI_DEVICE_ID_LSI_53C1030 0x0030 +#define PCI_DEVICE_ID_LSI_53C1035 0x0040 +#define PCI_DEVICE_ID_NCR_53C875J 0x008f +#define PCI_DEVICE_ID_LSI_FC909 0x0621 +#define PCI_DEVICE_ID_LSI_FC929 0x0622 +#define PCI_DEVICE_ID_LSI_FC929_LAN 0x0623 +#define PCI_DEVICE_ID_LSI_FC919 0x0624 +#define PCI_DEVICE_ID_LSI_FC919_LAN 0x0625 +#define PCI_DEVICE_ID_NCR_YELLOWFIN 0x0701 +#define PCI_DEVICE_ID_LSI_61C102 0x0901 +#define PCI_DEVICE_ID_LSI_63C815 0x1000 + +#define PCI_VENDOR_ID_ATI 0x1002 +/* Mach64 */ +#define PCI_DEVICE_ID_ATI_68800 0x4158 +#define PCI_DEVICE_ID_ATI_215CT222 0x4354 +#define PCI_DEVICE_ID_ATI_210888CX 0x4358 +#define PCI_DEVICE_ID_ATI_215ET222 0x4554 +/* Mach64 / Rage */ +#define PCI_DEVICE_ID_ATI_215GB 0x4742 +#define PCI_DEVICE_ID_ATI_215GD 0x4744 +#define PCI_DEVICE_ID_ATI_215GI 0x4749 +#define PCI_DEVICE_ID_ATI_215GP 0x4750 +#define PCI_DEVICE_ID_ATI_215GQ 0x4751 +#define PCI_DEVICE_ID_ATI_215XL 0x4752 +#define PCI_DEVICE_ID_ATI_215GT 0x4754 +#define PCI_DEVICE_ID_ATI_215GTB 0x4755 +#define PCI_DEVICE_ID_ATI_215_IV 0x4756 +#define PCI_DEVICE_ID_ATI_215_IW 0x4757 +#define PCI_DEVICE_ID_ATI_215_IZ 0x475A +#define PCI_DEVICE_ID_ATI_210888GX 0x4758 +#define PCI_DEVICE_ID_ATI_215_LB 0x4c42 +#define PCI_DEVICE_ID_ATI_215_LD 0x4c44 +#define PCI_DEVICE_ID_ATI_215_LG 0x4c47 +#define PCI_DEVICE_ID_ATI_215_LI 0x4c49 +#define PCI_DEVICE_ID_ATI_215_LM 0x4c4D +#define PCI_DEVICE_ID_ATI_215_LN 0x4c4E +#define PCI_DEVICE_ID_ATI_215_LR 0x4c52 +#define PCI_DEVICE_ID_ATI_215_LS 0x4c53 +#define PCI_DEVICE_ID_ATI_264_LT 0x4c54 +/* Mach64 VT */ +#define PCI_DEVICE_ID_ATI_264VT 0x5654 +#define PCI_DEVICE_ID_ATI_264VU 0x5655 +#define PCI_DEVICE_ID_ATI_264VV 0x5656 +/* Rage128 Pro GL */ +#define PCI_DEVICE_ID_ATI_Rage128_PA 0x5041 +#define PCI_DEVICE_ID_ATI_Rage128_PB 0x5042 +#define PCI_DEVICE_ID_ATI_Rage128_PC 0x5043 +#define PCI_DEVICE_ID_ATI_Rage128_PD 0x5044 +#define PCI_DEVICE_ID_ATI_Rage128_PE 0x5045 +#define PCI_DEVICE_ID_ATI_RAGE128_PF 0x5046 +/* Rage128 Pro VR */ +#define PCI_DEVICE_ID_ATI_RAGE128_PG 0x5047 +#define PCI_DEVICE_ID_ATI_RAGE128_PH 0x5048 +#define PCI_DEVICE_ID_ATI_RAGE128_PI 0x5049 +#define PCI_DEVICE_ID_ATI_RAGE128_PJ 0x504A +#define PCI_DEVICE_ID_ATI_RAGE128_PK 0x504B +#define PCI_DEVICE_ID_ATI_RAGE128_PL 0x504C +#define PCI_DEVICE_ID_ATI_RAGE128_PM 0x504D +#define PCI_DEVICE_ID_ATI_RAGE128_PN 0x504E +#define PCI_DEVICE_ID_ATI_RAGE128_PO 0x504F +#define PCI_DEVICE_ID_ATI_RAGE128_PP 0x5050 +#define PCI_DEVICE_ID_ATI_RAGE128_PQ 0x5051 +#define PCI_DEVICE_ID_ATI_RAGE128_PR 0x5052 +#define PCI_DEVICE_ID_ATI_RAGE128_TR 0x5452 +#define PCI_DEVICE_ID_ATI_RAGE128_PS 0x5053 +#define PCI_DEVICE_ID_ATI_RAGE128_PT 0x5054 +#define PCI_DEVICE_ID_ATI_RAGE128_PU 0x5055 +#define PCI_DEVICE_ID_ATI_RAGE128_PV 0x5056 +#define PCI_DEVICE_ID_ATI_RAGE128_PW 0x5057 +#define PCI_DEVICE_ID_ATI_RAGE128_PX 0x5058 +/* Rage128 GL */ +#define PCI_DEVICE_ID_ATI_RAGE128_RE 0x5245 +#define PCI_DEVICE_ID_ATI_RAGE128_RF 0x5246 +#define PCI_DEVICE_ID_ATI_RAGE128_RG 0x534b +#define PCI_DEVICE_ID_ATI_RAGE128_RH 0x534c +#define PCI_DEVICE_ID_ATI_RAGE128_RI 0x534d +/* Rage128 VR */ +#define PCI_DEVICE_ID_ATI_RAGE128_RK 0x524b +#define PCI_DEVICE_ID_ATI_RAGE128_RL 0x524c +#define PCI_DEVICE_ID_ATI_RAGE128_RM 0x5345 +#define PCI_DEVICE_ID_ATI_RAGE128_RN 0x5346 +#define PCI_DEVICE_ID_ATI_RAGE128_RO 0x5347 +/* Rage128 M3 */ +#define PCI_DEVICE_ID_ATI_RAGE128_LE 0x4c45 +#define PCI_DEVICE_ID_ATI_RAGE128_LF 0x4c46 +/* Rage128 Pro Ultra */ +#define PCI_DEVICE_ID_ATI_RAGE128_U1 0x5446 +#define PCI_DEVICE_ID_ATI_RAGE128_U2 0x544C +#define PCI_DEVICE_ID_ATI_RAGE128_U3 0x5452 +/* Radeon M4 */ +#define PCI_DEVICE_ID_ATI_RADEON_LE 0x4d45 +#define PCI_DEVICE_ID_ATI_RADEON_LF 0x4d46 +/* Radeon NV-100 */ +#define PCI_DEVICE_ID_ATI_RADEON_N1 0x5159 +#define PCI_DEVICE_ID_ATI_RADEON_N2 0x515a +/* Radeon */ +#define PCI_DEVICE_ID_ATI_RADEON_RA 0x5144 +#define PCI_DEVICE_ID_ATI_RADEON_RB 0x5145 +#define PCI_DEVICE_ID_ATI_RADEON_RC 0x5146 +#define PCI_DEVICE_ID_ATI_RADEON_RD 0x5147 + +#define PCI_VENDOR_ID_VLSI 0x1004 +#define PCI_DEVICE_ID_VLSI_82C592 0x0005 +#define PCI_DEVICE_ID_VLSI_82C593 0x0006 +#define PCI_DEVICE_ID_VLSI_82C594 0x0007 +#define PCI_DEVICE_ID_VLSI_82C597 0x0009 +#define PCI_DEVICE_ID_VLSI_82C541 0x000c +#define PCI_DEVICE_ID_VLSI_82C543 0x000d +#define PCI_DEVICE_ID_VLSI_82C532 0x0101 +#define PCI_DEVICE_ID_VLSI_82C534 0x0102 +#define PCI_DEVICE_ID_VLSI_82C535 0x0104 +#define PCI_DEVICE_ID_VLSI_82C147 0x0105 +#define PCI_DEVICE_ID_VLSI_VAS96011 0x0702 + +#define PCI_VENDOR_ID_ADL 0x1005 +#define PCI_DEVICE_ID_ADL_2301 0x2301 + +#define PCI_VENDOR_ID_NS 0x100b +#define PCI_DEVICE_ID_NS_87415 0x0002 +#define PCI_DEVICE_ID_NS_87560_LIO 0x000e +#define PCI_DEVICE_ID_NS_87560_USB 0x0012 +#define PCI_DEVICE_ID_NS_83815 0x0020 +#define PCI_DEVICE_ID_NS_83820 0x0022 +#define PCI_DEVICE_ID_NS_87410 0xd001 + +#define PCI_VENDOR_ID_TSENG 0x100c +#define PCI_DEVICE_ID_TSENG_W32P_2 0x3202 +#define PCI_DEVICE_ID_TSENG_W32P_b 0x3205 +#define PCI_DEVICE_ID_TSENG_W32P_c 0x3206 +#define PCI_DEVICE_ID_TSENG_W32P_d 0x3207 +#define PCI_DEVICE_ID_TSENG_ET6000 0x3208 + +#define PCI_VENDOR_ID_WEITEK 0x100e +#define PCI_DEVICE_ID_WEITEK_P9000 0x9001 +#define PCI_DEVICE_ID_WEITEK_P9100 0x9100 + +#define PCI_VENDOR_ID_DEC 0x1011 +#define PCI_DEVICE_ID_DEC_BRD 0x0001 +#define PCI_DEVICE_ID_DEC_TULIP 0x0002 +#define PCI_DEVICE_ID_DEC_TGA 0x0004 +#define PCI_DEVICE_ID_DEC_TULIP_FAST 0x0009 +#define PCI_DEVICE_ID_DEC_TGA2 0x000D +#define PCI_DEVICE_ID_DEC_FDDI 0x000F +#define PCI_DEVICE_ID_DEC_TULIP_PLUS 0x0014 +#define PCI_DEVICE_ID_DEC_21142 0x0019 +#define PCI_DEVICE_ID_DEC_21052 0x0021 +#define PCI_DEVICE_ID_DEC_21150 0x0022 +#define PCI_DEVICE_ID_DEC_21152 0x0024 +#define PCI_DEVICE_ID_DEC_21153 0x0025 +#define PCI_DEVICE_ID_DEC_21154 0x0026 +#define PCI_DEVICE_ID_DEC_21285 0x1065 +#define PCI_DEVICE_ID_COMPAQ_42XX 0x0046 + +#define PCI_VENDOR_ID_CIRRUS 0x1013 +#define PCI_DEVICE_ID_CIRRUS_7548 0x0038 +#define PCI_DEVICE_ID_CIRRUS_5430 0x00a0 +#define PCI_DEVICE_ID_CIRRUS_5434_4 0x00a4 +#define PCI_DEVICE_ID_CIRRUS_5434_8 0x00a8 +#define PCI_DEVICE_ID_CIRRUS_5436 0x00ac +#define PCI_DEVICE_ID_CIRRUS_5446 0x00b8 +#define PCI_DEVICE_ID_CIRRUS_5480 0x00bc +#define PCI_DEVICE_ID_CIRRUS_5462 0x00d0 +#define PCI_DEVICE_ID_CIRRUS_5464 0x00d4 +#define PCI_DEVICE_ID_CIRRUS_5465 0x00d6 +#define PCI_DEVICE_ID_CIRRUS_6729 0x1100 +#define PCI_DEVICE_ID_CIRRUS_6832 0x1110 +#define PCI_DEVICE_ID_CIRRUS_7542 0x1200 +#define PCI_DEVICE_ID_CIRRUS_7543 0x1202 +#define PCI_DEVICE_ID_CIRRUS_7541 0x1204 + +#define PCI_VENDOR_ID_IBM 0x1014 +#define PCI_DEVICE_ID_IBM_FIRE_CORAL 0x000a +#define PCI_DEVICE_ID_IBM_TR 0x0018 +#define PCI_DEVICE_ID_IBM_82G2675 0x001d +#define PCI_DEVICE_ID_IBM_MCA 0x0020 +#define PCI_DEVICE_ID_IBM_82351 0x0022 +#define PCI_DEVICE_ID_IBM_PYTHON 0x002d +#define PCI_DEVICE_ID_IBM_SERVERAID 0x002e +#define PCI_DEVICE_ID_IBM_TR_WAKE 0x003e +#define PCI_DEVICE_ID_IBM_MPIC 0x0046 +#define PCI_DEVICE_ID_IBM_3780IDSP 0x007d +#define PCI_DEVICE_ID_IBM_CHUKAR 0x0096 +#define PCI_DEVICE_ID_IBM_CPC710_PCI64 0x00fc +#define PCI_DEVICE_ID_IBM_CPC710_PCI32 0x0105 +#define PCI_DEVICE_ID_IBM_405GP 0x0156 +#define PCI_DEVICE_ID_IBM_SERVERAIDI960 0x01bd +#define PCI_DEVICE_ID_IBM_MPIC_2 0xffff + +#define PCI_VENDOR_ID_COMPEX2 0x101a // pci.ids says "AT&T GIS (NCR)" +#define PCI_DEVICE_ID_COMPEX2_100VG 0x0005 + +#define PCI_VENDOR_ID_WD 0x101c +#define PCI_DEVICE_ID_WD_7197 0x3296 +#define PCI_DEVICE_ID_WD_90C 0xc24a + +#define PCI_VENDOR_ID_AMI 0x101e +#define PCI_DEVICE_ID_AMI_MEGARAID3 0x1960 +#define PCI_DEVICE_ID_AMI_MEGARAID 0x9010 +#define PCI_DEVICE_ID_AMI_MEGARAID2 0x9060 + +#define PCI_VENDOR_ID_AMD 0x1022 +#define PCI_DEVICE_ID_AMD_LANCE 0x2000 +#define PCI_DEVICE_ID_AMD_LANCE_HOME 0x2001 +#define PCI_DEVICE_ID_AMD_SCSI 0x2020 +#define PCI_DEVICE_ID_AMD_FE_GATE_7006 0x7006 +#define PCI_DEVICE_ID_AMD_FE_GATE_7007 0x7007 +#define PCI_DEVICE_ID_AMD_FE_GATE_700C 0x700C +#define PCI_DEVIDE_ID_AMD_FE_GATE_700D 0x700D +#define PCI_DEVICE_ID_AMD_FE_GATE_700E 0x700E +#define PCI_DEVICE_ID_AMD_FE_GATE_700F 0x700F +#define PCI_DEVICE_ID_AMD_COBRA_7400 0x7400 +#define PCI_DEVICE_ID_AMD_COBRA_7401 0x7401 +#define PCI_DEVICE_ID_AMD_COBRA_7403 0x7403 +#define PCI_DEVICE_ID_AMD_COBRA_7404 0x7404 +#define PCI_DEVICE_ID_AMD_VIPER_7408 0x7408 +#define PCI_DEVICE_ID_AMD_VIPER_7409 0x7409 +#define PCI_DEVICE_ID_AMD_VIPER_740B 0x740B +#define PCI_DEVICE_ID_AMD_VIPER_740C 0x740C +#define PCI_DEVICE_ID_AMD_VIPER_7410 0x7410 +#define PCI_DEVICE_ID_AMD_VIPER_7411 0x7411 +#define PCI_DEVICE_ID_AMD_VIPER_7413 0x7413 +#define PCI_DEVICE_ID_AMD_VIPER_7414 0x7414 +#define PCI_DEVICE_ID_AMD_VIPER_7440 0x7440 +#define PCI_DEVICE_ID_AMD_VIPER_7441 0x7441 +#define PCI_DEVICE_ID_AMD_VIPER_7443 0x7443 +#define PCI_DEVICE_ID_AMD_VIPER_7448 0x7448 +#define PCI_DEVICE_ID_AMD_VIPER_7449 0x7449 + +#define PCI_VENDOR_ID_TRIDENT 0x1023 +#define PCI_DEVICE_ID_TRIDENT_4DWAVE_DX 0x2000 +#define PCI_DEVICE_ID_TRIDENT_4DWAVE_NX 0x2001 +#define PCI_DEVICE_ID_TRIDENT_9320 0x9320 +#define PCI_DEVICE_ID_TRIDENT_9388 0x9388 +#define PCI_DEVICE_ID_TRIDENT_9397 0x9397 +#define PCI_DEVICE_ID_TRIDENT_939A 0x939A +#define PCI_DEVICE_ID_TRIDENT_9520 0x9520 +#define PCI_DEVICE_ID_TRIDENT_9525 0x9525 +#define PCI_DEVICE_ID_TRIDENT_9420 0x9420 +#define PCI_DEVICE_ID_TRIDENT_9440 0x9440 +#define PCI_DEVICE_ID_TRIDENT_9660 0x9660 +#define PCI_DEVICE_ID_TRIDENT_9750 0x9750 +#define PCI_DEVICE_ID_TRIDENT_9850 0x9850 +#define PCI_DEVICE_ID_TRIDENT_9880 0x9880 +#define PCI_DEVICE_ID_TRIDENT_8400 0x8400 +#define PCI_DEVICE_ID_TRIDENT_8420 0x8420 +#define PCI_DEVICE_ID_TRIDENT_8500 0x8500 + +#define PCI_VENDOR_ID_AI 0x1025 +#define PCI_DEVICE_ID_AI_M1435 0x1435 + +#define PCI_VENDOR_ID_DELL 0x1028 + +#define PCI_VENDOR_ID_MATROX 0x102B +#define PCI_DEVICE_ID_MATROX_MGA_2 0x0518 +#define PCI_DEVICE_ID_MATROX_MIL 0x0519 +#define PCI_DEVICE_ID_MATROX_MYS 0x051A +#define PCI_DEVICE_ID_MATROX_MIL_2 0x051b +#define PCI_DEVICE_ID_MATROX_MIL_2_AGP 0x051f +#define PCI_DEVICE_ID_MATROX_MGA_IMP 0x0d10 +#define PCI_DEVICE_ID_MATROX_G100_MM 0x1000 +#define PCI_DEVICE_ID_MATROX_G100_AGP 0x1001 +#define PCI_DEVICE_ID_MATROX_G200_PCI 0x0520 +#define PCI_DEVICE_ID_MATROX_G200_AGP 0x0521 +#define PCI_DEVICE_ID_MATROX_G400 0x0525 +#define PCI_DEVICE_ID_MATROX_G550 0x2527 +#define PCI_DEVICE_ID_MATROX_VIA 0x4536 + +#define PCI_VENDOR_ID_CT 0x102c +#define PCI_DEVICE_ID_CT_65545 0x00d8 +#define PCI_DEVICE_ID_CT_65548 0x00dc +#define PCI_DEVICE_ID_CT_65550 0x00e0 +#define PCI_DEVICE_ID_CT_65554 0x00e4 +#define PCI_DEVICE_ID_CT_65555 0x00e5 + +#define PCI_VENDOR_ID_MIRO 0x1031 +#define PCI_DEVICE_ID_MIRO_36050 0x5601 + +#define PCI_VENDOR_ID_NEC 0x1033 +#define PCI_DEVICE_ID_NEC_PCX2 0x0046 +#define PCI_DEVICE_ID_NEC_NILE4 0x005a +#define PCI_DEVICE_ID_NEC_VRC5476 0x009b +#define PCI_DEVICE_ID_NEC_VRC5477_AC97 0x00a6 + +#define PCI_VENDOR_ID_FD 0x1036 +#define PCI_DEVICE_ID_FD_36C70 0x0000 + +#define PCI_VENDOR_ID_SI 0x1039 +#define PCI_DEVICE_ID_SI_5591_AGP 0x0001 +#define PCI_DEVICE_ID_SI_6202 0x0002 +#define PCI_DEVICE_ID_SI_503 0x0008 +#define PCI_DEVICE_ID_SI_ACPI 0x0009 +#define PCI_DEVICE_ID_SI_5597_VGA 0x0200 +#define PCI_DEVICE_ID_SI_6205 0x0205 +#define PCI_DEVICE_ID_SI_501 0x0406 +#define PCI_DEVICE_ID_SI_496 0x0496 +#define PCI_DEVICE_ID_SI_300 0x0300 +#define PCI_DEVICE_ID_SI_315H 0x0310 +#define PCI_DEVICE_ID_SI_315 0x0315 +#define PCI_DEVICE_ID_SI_315PRO 0x0325 +#define PCI_DEVICE_ID_SI_530 0x0530 +#define PCI_DEVICE_ID_SI_540 0x0540 +#define PCI_DEVICE_ID_SI_550 0x0550 +#define PCI_DEVICE_ID_SI_540_VGA 0x5300 +#define PCI_DEVICE_ID_SI_550_VGA 0x5315 +#define PCI_DEVICE_ID_SI_601 0x0601 +#define PCI_DEVICE_ID_SI_620 0x0620 +#define PCI_DEVICE_ID_SI_630 0x0630 +#define PCI_DEVICE_ID_SI_635 0x0635 +#define PCI_DEVICE_ID_SI_640 0x0640 +#define PCI_DEVICE_ID_SI_645 0x0645 +#define PCI_DEVICE_ID_SI_650 0x0650 +#define PCI_DEVICE_ID_SI_730 0x0730 +#define PCI_DEVICE_ID_SI_630_VGA 0x6300 +#define PCI_DEVICE_ID_SI_730_VGA 0x7300 +#define PCI_DEVICE_ID_SI_735 0x0735 +#define PCI_DEVICE_ID_SI_740 0x0740 +#define PCI_DEVICE_ID_SI_745 0x0745 +#define PCI_DEVICE_ID_SI_750 0x0750 +#define PCI_DEVICE_ID_SI_900 0x0900 +#define PCI_DEVICE_ID_SI_5107 0x5107 +#define PCI_DEVICE_ID_SI_5300 0x5300 +#define PCI_DEVICE_ID_SI_5511 0x5511 +#define PCI_DEVICE_ID_SI_5513 0x5513 +#define PCI_DEVICE_ID_SI_5571 0x5571 +#define PCI_DEVICE_ID_SI_5591 0x5591 +#define PCI_DEVICE_ID_SI_5597 0x5597 +#define PCI_DEVICE_ID_SI_5598 0x5598 +#define PCI_DEVICE_ID_SI_5600 0x5600 +#define PCI_DEVICE_ID_SI_6300 0x6300 +#define PCI_DEVICE_ID_SI_6306 0x6306 +#define PCI_DEVICE_ID_SI_6326 0x6326 +#define PCI_DEVICE_ID_SI_7001 0x7001 +#define PCI_DEVICE_ID_SI_7016 0x7016 + +#define PCI_VENDOR_ID_HP 0x103c +#define PCI_DEVICE_ID_HP_DONNER_GFX 0x1008 +#define PCI_DEVICE_ID_HP_TACHYON 0x1028 +#define PCI_DEVICE_ID_HP_TACHLITE 0x1029 +#define PCI_DEVICE_ID_HP_J2585A 0x1030 +#define PCI_DEVICE_ID_HP_J2585B 0x1031 +#define PCI_DEVICE_ID_HP_SAS 0x1048 +#define PCI_DEVICE_ID_HP_DIVA1 0x1049 +#define PCI_DEVICE_ID_HP_DIVA2 0x104A +#define PCI_DEVICE_ID_HP_SP2_0 0x104B +#define PCI_DEVICE_ID_HP_ZX1_SBA 0x1229 +#define PCI_DEVICE_ID_HP_ZX1_IOC 0x122a +#define PCI_DEVICE_ID_HP_ZX1_LBA 0x122e + +#define PCI_VENDOR_ID_PCTECH 0x1042 +#define PCI_DEVICE_ID_PCTECH_RZ1000 0x1000 +#define PCI_DEVICE_ID_PCTECH_RZ1001 0x1001 +#define PCI_DEVICE_ID_PCTECH_SAMURAI_0 0x3000 +#define PCI_DEVICE_ID_PCTECH_SAMURAI_1 0x3010 +#define PCI_DEVICE_ID_PCTECH_SAMURAI_IDE 0x3020 + +#define PCI_VENDOR_ID_ASUSTEK 0x1043 +#define PCI_DEVICE_ID_ASUSTEK_0675 0x0675 + +#define PCI_VENDOR_ID_DPT 0x1044 +#define PCI_DEVICE_ID_DPT 0xa400 + +#define PCI_VENDOR_ID_OPTI 0x1045 +#define PCI_DEVICE_ID_OPTI_92C178 0xc178 +#define PCI_DEVICE_ID_OPTI_82C557 0xc557 +#define PCI_DEVICE_ID_OPTI_82C558 0xc558 +#define PCI_DEVICE_ID_OPTI_82C621 0xc621 +#define PCI_DEVICE_ID_OPTI_82C700 0xc700 +#define PCI_DEVICE_ID_OPTI_82C701 0xc701 +#define PCI_DEVICE_ID_OPTI_82C814 0xc814 +#define PCI_DEVICE_ID_OPTI_82C822 0xc822 +#define PCI_DEVICE_ID_OPTI_82C861 0xc861 +#define PCI_DEVICE_ID_OPTI_82C825 0xd568 + +#define PCI_VENDOR_ID_ELSA 0x1048 +#define PCI_DEVICE_ID_ELSA_MICROLINK 0x1000 +#define PCI_DEVICE_ID_ELSA_QS3000 0x3000 + +#define PCI_VENDOR_ID_SGS 0x104a +#define PCI_DEVICE_ID_SGS_2000 0x0008 +#define PCI_DEVICE_ID_SGS_1764 0x0009 + +#define PCI_VENDOR_ID_BUSLOGIC 0x104B +#define PCI_DEVICE_ID_BUSLOGIC_MULTIMASTER_NC 0x0140 +#define PCI_DEVICE_ID_BUSLOGIC_MULTIMASTER 0x1040 +#define PCI_DEVICE_ID_BUSLOGIC_FLASHPOINT 0x8130 + +#define PCI_VENDOR_ID_TI 0x104c +#define PCI_DEVICE_ID_TI_TVP4010 0x3d04 +#define PCI_DEVICE_ID_TI_TVP4020 0x3d07 +#define PCI_DEVICE_ID_TI_1130 0xac12 +#define PCI_DEVICE_ID_TI_1031 0xac13 +#define PCI_DEVICE_ID_TI_1131 0xac15 +#define PCI_DEVICE_ID_TI_1250 0xac16 +#define PCI_DEVICE_ID_TI_1220 0xac17 +#define PCI_DEVICE_ID_TI_1221 0xac19 +#define PCI_DEVICE_ID_TI_1210 0xac1a +#define PCI_DEVICE_ID_TI_1410 0xac50 +#define PCI_DEVICE_ID_TI_1450 0xac1b +#define PCI_DEVICE_ID_TI_1225 0xac1c +#define PCI_DEVICE_ID_TI_1251A 0xac1d +#define PCI_DEVICE_ID_TI_1211 0xac1e +#define PCI_DEVICE_ID_TI_1251B 0xac1f +#define PCI_DEVICE_ID_TI_4410 0xac41 +#define PCI_DEVICE_ID_TI_4451 0xac42 +#define PCI_DEVICE_ID_TI_1420 0xac51 + +#define PCI_VENDOR_ID_SONY 0x104d +#define PCI_DEVICE_ID_SONY_CXD3222 0x8039 + +#define PCI_VENDOR_ID_OAK 0x104e +#define PCI_DEVICE_ID_OAK_OTI107 0x0107 + +/* Winbond have two vendor IDs! See 0x10ad as well */ +#define PCI_VENDOR_ID_WINBOND2 0x1050 +#define PCI_DEVICE_ID_WINBOND2_89C940 0x0940 +#define PCI_DEVICE_ID_WINBOND2_89C940F 0x5a5a +#define PCI_DEVICE_ID_WINBOND2_6692 0x6692 + +#define PCI_VENDOR_ID_ANIGMA 0x1051 +#define PCI_DEVICE_ID_ANIGMA_MC145575 0x0100 + +#define PCI_VENDOR_ID_EFAR 0x1055 +#define PCI_DEVICE_ID_EFAR_SLC90E66_1 0x9130 +#define PCI_DEVICE_ID_EFAR_SLC90E66_0 0x9460 +#define PCI_DEVICE_ID_EFAR_SLC90E66_2 0x9462 +#define PCI_DEVICE_ID_EFAR_SLC90E66_3 0x9463 + +#define PCI_VENDOR_ID_MOTOROLA 0x1057 +#define PCI_VENDOR_ID_MOTOROLA_OOPS 0x1507 +#define PCI_DEVICE_ID_MOTOROLA_MPC105 0x0001 +#define PCI_DEVICE_ID_MOTOROLA_MPC106 0x0002 +#define PCI_DEVICE_ID_MOTOROLA_RAVEN 0x4801 +#define PCI_DEVICE_ID_MOTOROLA_FALCON 0x4802 +#define PCI_DEVICE_ID_MOTOROLA_HAWK 0x4803 +#define PCI_DEVICE_ID_MOTOROLA_CPX8216 0x4806 + +#define PCI_VENDOR_ID_PROMISE 0x105a +#define PCI_DEVICE_ID_PROMISE_20265 0x0d30 +#define PCI_DEVICE_ID_PROMISE_20267 0x4d30 +#define PCI_DEVICE_ID_PROMISE_20246 0x4d33 +#define PCI_DEVICE_ID_PROMISE_20262 0x4d38 +#define PCI_DEVICE_ID_PROMISE_20263 0x0D38 +#define PCI_DEVICE_ID_PROMISE_20268 0x4d68 +#define PCI_DEVICE_ID_PROMISE_20270 0x6268 +#define PCI_DEVICE_ID_PROMISE_20269 0x4d69 +#define PCI_DEVICE_ID_PROMISE_20271 0x6269 +#define PCI_DEVICE_ID_PROMISE_20275 0x1275 +#define PCI_DEVICE_ID_PROMISE_20276 0x5275 +#define PCI_DEVICE_ID_PROMISE_20277 0x7275 +#define PCI_DEVICE_ID_PROMISE_5300 0x5300 + +#define PCI_VENDOR_ID_N9 0x105d +#define PCI_DEVICE_ID_N9_I128 0x2309 +#define PCI_DEVICE_ID_N9_I128_2 0x2339 +#define PCI_DEVICE_ID_N9_I128_T2R 0x493d + +#define PCI_VENDOR_ID_UMC 0x1060 +#define PCI_DEVICE_ID_UMC_UM8673F 0x0101 +#define PCI_DEVICE_ID_UMC_UM8891A 0x0891 +#define PCI_DEVICE_ID_UMC_UM8886BF 0x673a +#define PCI_DEVICE_ID_UMC_UM8886A 0x886a +#define PCI_DEVICE_ID_UMC_UM8881F 0x8881 +#define PCI_DEVICE_ID_UMC_UM8886F 0x8886 +#define PCI_DEVICE_ID_UMC_UM9017F 0x9017 +#define PCI_DEVICE_ID_UMC_UM8886N 0xe886 +#define PCI_DEVICE_ID_UMC_UM8891N 0xe891 + +#define PCI_VENDOR_ID_X 0x1061 +#define PCI_DEVICE_ID_X_AGX016 0x0001 + +#define PCI_VENDOR_ID_MYLEX 0x1069 +#define PCI_DEVICE_ID_MYLEX_DAC960_P 0x0001 +#define PCI_DEVICE_ID_MYLEX_DAC960_PD 0x0002 +#define PCI_DEVICE_ID_MYLEX_DAC960_PG 0x0010 +#define PCI_DEVICE_ID_MYLEX_DAC960_LA 0x0020 +#define PCI_DEVICE_ID_MYLEX_DAC960_LP 0x0050 +#define PCI_DEVICE_ID_MYLEX_DAC960_BA 0xBA56 + +#define PCI_VENDOR_ID_PICOP 0x1066 +#define PCI_DEVICE_ID_PICOP_PT86C52X 0x0001 +#define PCI_DEVICE_ID_PICOP_PT80C524 0x8002 + +#define PCI_VENDOR_ID_APPLE 0x106b +#define PCI_DEVICE_ID_APPLE_BANDIT 0x0001 +#define PCI_DEVICE_ID_APPLE_GC 0x0002 +#define PCI_DEVICE_ID_APPLE_HYDRA 0x000e +#define PCI_DEVICE_ID_APPLE_UNI_N_FW 0x0018 +#define PCI_DEVICE_ID_APPLE_KL_USB 0x0019 +#define PCI_DEVICE_ID_APPLE_UNI_N_AGP 0x0020 +#define PCI_DEVICE_ID_APPLE_UNI_N_GMAC 0x0021 +#define PCI_DEVICE_ID_APPLE_KEYLARGO 0x0022 +#define PCI_DEVICE_ID_APPLE_UNI_N_GMACP 0x0024 +#define PCI_DEVICE_ID_APPLE_KEYLARGO_P 0x0025 +#define PCI_DEVICE_ID_APPLE_KL_USB_P 0x0026 +#define PCI_DEVICE_ID_APPLE_UNI_N_AGP_P 0x0027 +#define PCI_DEVICE_ID_APPLE_UNI_N_AGP15 0x002d +#define PCI_DEVICE_ID_APPLE_UNI_N_FW2 0x0030 + +#define PCI_VENDOR_ID_YAMAHA 0x1073 +#define PCI_DEVICE_ID_YAMAHA_724 0x0004 +#define PCI_DEVICE_ID_YAMAHA_724F 0x000d +#define PCI_DEVICE_ID_YAMAHA_740 0x000a +#define PCI_DEVICE_ID_YAMAHA_740C 0x000c +#define PCI_DEVICE_ID_YAMAHA_744 0x0010 +#define PCI_DEVICE_ID_YAMAHA_754 0x0012 + +#define PCI_VENDOR_ID_NEXGEN 0x1074 +#define PCI_DEVICE_ID_NEXGEN_82C501 0x4e78 + +#define PCI_VENDOR_ID_QLOGIC 0x1077 +#define PCI_DEVICE_ID_QLOGIC_ISP1020 0x1020 +#define PCI_DEVICE_ID_QLOGIC_ISP1022 0x1022 +#define PCI_DEVICE_ID_QLOGIC_ISP2100 0x2100 +#define PCI_DEVICE_ID_QLOGIC_ISP2200 0x2200 + +#define PCI_VENDOR_ID_CYRIX 0x1078 +#define PCI_DEVICE_ID_CYRIX_5510 0x0000 +#define PCI_DEVICE_ID_CYRIX_PCI_MASTER 0x0001 +#define PCI_DEVICE_ID_CYRIX_5520 0x0002 +#define PCI_DEVICE_ID_CYRIX_5530_LEGACY 0x0100 +#define PCI_DEVICE_ID_CYRIX_5530_SMI 0x0101 +#define PCI_DEVICE_ID_CYRIX_5530_IDE 0x0102 +#define PCI_DEVICE_ID_CYRIX_5530_AUDIO 0x0103 +#define PCI_DEVICE_ID_CYRIX_5530_VIDEO 0x0104 + +#define PCI_VENDOR_ID_LEADTEK 0x107d +#define PCI_DEVICE_ID_LEADTEK_805 0x0000 + +#define PCI_VENDOR_ID_INTERPHASE 0x107e +#define PCI_DEVICE_ID_INTERPHASE_5526 0x0004 +#define PCI_DEVICE_ID_INTERPHASE_55x6 0x0005 +#define PCI_DEVICE_ID_INTERPHASE_5575 0x0008 + +#define PCI_VENDOR_ID_CONTAQ 0x1080 +#define PCI_DEVICE_ID_CONTAQ_82C599 0x0600 +#define PCI_DEVICE_ID_CONTAQ_82C693 0xc693 + +#define PCI_VENDOR_ID_FOREX 0x1083 + +#define PCI_VENDOR_ID_OLICOM 0x108d +#define PCI_DEVICE_ID_OLICOM_OC3136 0x0001 +#define PCI_DEVICE_ID_OLICOM_OC2315 0x0011 +#define PCI_DEVICE_ID_OLICOM_OC2325 0x0012 +#define PCI_DEVICE_ID_OLICOM_OC2183 0x0013 +#define PCI_DEVICE_ID_OLICOM_OC2326 0x0014 +#define PCI_DEVICE_ID_OLICOM_OC6151 0x0021 + +#define PCI_VENDOR_ID_SUN 0x108e +#define PCI_DEVICE_ID_SUN_EBUS 0x1000 +#define PCI_DEVICE_ID_SUN_HAPPYMEAL 0x1001 +#define PCI_DEVICE_ID_SUN_RIO_EBUS 0x1100 +#define PCI_DEVICE_ID_SUN_RIO_GEM 0x1101 +#define PCI_DEVICE_ID_SUN_RIO_1394 0x1102 +#define PCI_DEVICE_ID_SUN_RIO_USB 0x1103 +#define PCI_DEVICE_ID_SUN_GEM 0x2bad +#define PCI_DEVICE_ID_SUN_SIMBA 0x5000 +#define PCI_DEVICE_ID_SUN_PBM 0x8000 +#define PCI_DEVICE_ID_SUN_SCHIZO 0x8001 +#define PCI_DEVICE_ID_SUN_SABRE 0xa000 +#define PCI_DEVICE_ID_SUN_HUMMINGBIRD 0xa001 + +#define PCI_VENDOR_ID_CMD 0x1095 +#define PCI_DEVICE_ID_CMD_640 0x0640 +#define PCI_DEVICE_ID_CMD_643 0x0643 +#define PCI_DEVICE_ID_CMD_646 0x0646 +#define PCI_DEVICE_ID_CMD_647 0x0647 +#define PCI_DEVICE_ID_CMD_648 0x0648 +#define PCI_DEVICE_ID_CMD_649 0x0649 +#define PCI_DEVICE_ID_CMD_670 0x0670 +#define PCI_DEVICE_ID_CMD_680 0x0680 + +#define PCI_VENDOR_ID_VISION 0x1098 +#define PCI_DEVICE_ID_VISION_QD8500 0x0001 +#define PCI_DEVICE_ID_VISION_QD8580 0x0002 + +#define PCI_VENDOR_ID_BROOKTREE 0x109e +#define PCI_DEVICE_ID_BROOKTREE_848 0x0350 +#define PCI_DEVICE_ID_BROOKTREE_849A 0x0351 +#define PCI_DEVICE_ID_BROOKTREE_878_1 0x036e +#define PCI_DEVICE_ID_BROOKTREE_878 0x0878 +#define PCI_DEVICE_ID_BROOKTREE_8474 0x8474 + +#define PCI_VENDOR_ID_SIERRA 0x10a8 +#define PCI_DEVICE_ID_SIERRA_STB 0x0000 + +#define PCI_VENDOR_ID_SGI 0x10a9 +#define PCI_DEVICE_ID_SGI_IOC3 0x0003 + +#define PCI_VENDOR_ID_ACC 0x10aa +#define PCI_DEVICE_ID_ACC_2056 0x0000 + +#define PCI_VENDOR_ID_WINBOND 0x10ad +#define PCI_DEVICE_ID_WINBOND_83769 0x0001 +#define PCI_DEVICE_ID_WINBOND_82C105 0x0105 +#define PCI_DEVICE_ID_WINBOND_83C553 0x0565 + +#define PCI_VENDOR_ID_DATABOOK 0x10b3 +#define PCI_DEVICE_ID_DATABOOK_87144 0xb106 + +#define PCI_VENDOR_ID_PLX 0x10b5 +#define PCI_DEVICE_ID_PLX_R685 0x1030 +#define PCI_DEVICE_ID_PLX_ROMULUS 0x106a +#define PCI_DEVICE_ID_PLX_SPCOM800 0x1076 +#define PCI_DEVICE_ID_PLX_1077 0x1077 +#define PCI_DEVICE_ID_PLX_SPCOM200 0x1103 +#define PCI_DEVICE_ID_PLX_DJINN_ITOO 0x1151 +#define PCI_DEVICE_ID_PLX_R753 0x1152 +#define PCI_DEVICE_ID_PLX_9050 0x9050 +#define PCI_DEVICE_ID_PLX_9060 0x9060 +#define PCI_DEVICE_ID_PLX_9060ES 0x906E +#define PCI_DEVICE_ID_PLX_9060SD 0x906D +#define PCI_DEVICE_ID_PLX_9080 0x9080 +#define PCI_DEVICE_ID_PLX_GTEK_SERIAL2 0xa001 + +#define PCI_VENDOR_ID_MADGE 0x10b6 +#define PCI_DEVICE_ID_MADGE_MK2 0x0002 +#define PCI_DEVICE_ID_MADGE_C155S 0x1001 + +#define PCI_VENDOR_ID_3COM 0x10b7 +#define PCI_DEVICE_ID_3COM_3C985 0x0001 +#define PCI_DEVICE_ID_3COM_3C339 0x3390 +#define PCI_DEVICE_ID_3COM_3C590 0x5900 +#define PCI_DEVICE_ID_3COM_3C595TX 0x5950 +#define PCI_DEVICE_ID_3COM_3C595T4 0x5951 +#define PCI_DEVICE_ID_3COM_3C595MII 0x5952 +#define PCI_DEVICE_ID_3COM_3C900TPO 0x9000 +#define PCI_DEVICE_ID_3COM_3C900COMBO 0x9001 +#define PCI_DEVICE_ID_3COM_3C905TX 0x9050 +#define PCI_DEVICE_ID_3COM_3C905T4 0x9051 +#define PCI_DEVICE_ID_3COM_3C905B_TX 0x9055 + +#define PCI_VENDOR_ID_SMC 0x10b8 +#define PCI_DEVICE_ID_SMC_EPIC100 0x0005 + +#define PCI_VENDOR_ID_AL 0x10b9 +#define PCI_DEVICE_ID_AL_M1445 0x1445 +#define PCI_DEVICE_ID_AL_M1449 0x1449 +#define PCI_DEVICE_ID_AL_M1451 0x1451 +#define PCI_DEVICE_ID_AL_M1461 0x1461 +#define PCI_DEVICE_ID_AL_M1489 0x1489 +#define PCI_DEVICE_ID_AL_M1511 0x1511 +#define PCI_DEVICE_ID_AL_M1513 0x1513 +#define PCI_DEVICE_ID_AL_M1521 0x1521 +#define PCI_DEVICE_ID_AL_M1523 0x1523 +#define PCI_DEVICE_ID_AL_M1531 0x1531 +#define PCI_DEVICE_ID_AL_M1533 0x1533 +#define PCI_DEVICE_ID_AL_M1541 0x1541 +#define PCI_DEVICE_ID_AL_M1621 0x1621 +#define PCI_DEVICE_ID_AL_M1631 0x1631 +#define PCI_DEVICE_ID_AL_M1641 0x1641 +#define PCI_DEVICE_ID_AL_M1644 0x1644 +#define PCI_DEVICE_ID_AL_M1647 0x1647 +#define PCI_DEVICE_ID_AL_M1651 0x1651 +#define PCI_DEVICE_ID_AL_M1543 0x1543 +#define PCI_DEVICE_ID_AL_M3307 0x3307 +#define PCI_DEVICE_ID_AL_M4803 0x5215 +#define PCI_DEVICE_ID_AL_M5219 0x5219 +#define PCI_DEVICE_ID_AL_M5229 0x5229 +#define PCI_DEVICE_ID_AL_M5237 0x5237 +#define PCI_DEVICE_ID_AL_M5243 0x5243 +#define PCI_DEVICE_ID_AL_M5451 0x5451 +#define PCI_DEVICE_ID_AL_M7101 0x7101 + +#define PCI_VENDOR_ID_MITSUBISHI 0x10ba + +#define PCI_VENDOR_ID_SURECOM 0x10bd +#define PCI_DEVICE_ID_SURECOM_NE34 0x0e34 + +#define PCI_VENDOR_ID_NEOMAGIC 0x10c8 +#define PCI_DEVICE_ID_NEOMAGIC_MAGICGRAPH_NM2070 0x0001 +#define PCI_DEVICE_ID_NEOMAGIC_MAGICGRAPH_128V 0x0002 +#define PCI_DEVICE_ID_NEOMAGIC_MAGICGRAPH_128ZV 0x0003 +#define PCI_DEVICE_ID_NEOMAGIC_MAGICGRAPH_NM2160 0x0004 +#define PCI_DEVICE_ID_NEOMAGIC_MAGICMEDIA_256AV 0x0005 +#define PCI_DEVICE_ID_NEOMAGIC_MAGICGRAPH_128ZVPLUS 0x0083 + +#define PCI_VENDOR_ID_ASP 0x10cd +#define PCI_DEVICE_ID_ASP_ABP940 0x1200 +#define PCI_DEVICE_ID_ASP_ABP940U 0x1300 +#define PCI_DEVICE_ID_ASP_ABP940UW 0x2300 + +#define PCI_VENDOR_ID_MACRONIX 0x10d9 +#define PCI_DEVICE_ID_MACRONIX_MX98713 0x0512 +#define PCI_DEVICE_ID_MACRONIX_MX987x5 0x0531 + +#define PCI_VENDOR_ID_TCONRAD 0x10da +#define PCI_DEVICE_ID_TCONRAD_TOKENRING 0x0508 + +#define PCI_VENDOR_ID_CERN 0x10dc +#define PCI_DEVICE_ID_CERN_SPSB_PMC 0x0001 +#define PCI_DEVICE_ID_CERN_SPSB_PCI 0x0002 +#define PCI_DEVICE_ID_CERN_HIPPI_DST 0x0021 +#define PCI_DEVICE_ID_CERN_HIPPI_SRC 0x0022 + +#define PCI_VENDOR_ID_NVIDIA 0x10de +#define PCI_DEVICE_ID_NVIDIA_TNT 0x0020 +#define PCI_DEVICE_ID_NVIDIA_TNT2 0x0028 +#define PCI_DEVICE_ID_NVIDIA_UTNT2 0x0029 +#define PCI_DEVICE_ID_NVIDIA_VTNT2 0x002C +#define PCI_DEVICE_ID_NVIDIA_UVTNT2 0x002D +#define PCI_DEVICE_ID_NVIDIA_ITNT2 0x00A0 +#define PCI_DEVICE_ID_NVIDIA_GEFORCE_SDR 0x0100 +#define PCI_DEVICE_ID_NVIDIA_GEFORCE_DDR 0x0101 +#define PCI_DEVICE_ID_NVIDIA_QUADRO 0x0103 +#define PCI_DEVICE_ID_NVIDIA_GEFORCE2_MX 0x0110 +#define PCI_DEVICE_ID_NVIDIA_GEFORCE2_MX2 0x0111 +#define PCI_DEVICE_ID_NVIDIA_GEFORCE2_GO 0x0112 +#define PCI_DEVICE_ID_NVIDIA_QUADRO2_MXR 0x0113 +#define PCI_DEVICE_ID_NVIDIA_GEFORCE2_GTS 0x0150 +#define PCI_DEVICE_ID_NVIDIA_GEFORCE2_GTS2 0x0151 +#define PCI_DEVICE_ID_NVIDIA_GEFORCE2_ULTRA 0x0152 +#define PCI_DEVICE_ID_NVIDIA_QUADRO2_PRO 0x0153 +#define PCI_DEVICE_ID_NVIDIA_IGEFORCE2 0x01a0 +#define PCI_DEVICE_ID_NVIDIA_GEFORCE3 0x0200 +#define PCI_DEVICE_ID_NVIDIA_GEFORCE3_1 0x0201 +#define PCI_DEVICE_ID_NVIDIA_GEFORCE3_2 0x0202 +#define PCI_DEVICE_ID_NVIDIA_QUADRO_DDC 0x0203 + +#define PCI_VENDOR_ID_IMS 0x10e0 +#define PCI_DEVICE_ID_IMS_8849 0x8849 +#define PCI_DEVICE_ID_IMS_TT128 0x9128 +#define PCI_DEVICE_ID_IMS_TT3D 0x9135 + +#define PCI_VENDOR_ID_TEKRAM2 0x10e1 +#define PCI_DEVICE_ID_TEKRAM2_690c 0x690c + +#define PCI_VENDOR_ID_TUNDRA 0x10e3 +#define PCI_DEVICE_ID_TUNDRA_CA91C042 0x0000 + +#define PCI_VENDOR_ID_AMCC 0x10e8 +#define PCI_DEVICE_ID_AMCC_MYRINET 0x8043 +#define PCI_DEVICE_ID_AMCC_PARASTATION 0x8062 +#define PCI_DEVICE_ID_AMCC_S5933 0x807d +#define PCI_DEVICE_ID_AMCC_S5933_HEPC3 0x809c + +#define PCI_VENDOR_ID_INTERG 0x10ea +#define PCI_DEVICE_ID_INTERG_1680 0x1680 +#define PCI_DEVICE_ID_INTERG_1682 0x1682 +#define PCI_DEVICE_ID_INTERG_2000 0x2000 +#define PCI_DEVICE_ID_INTERG_2010 0x2010 +#define PCI_DEVICE_ID_INTERG_5000 0x5000 +#define PCI_DEVICE_ID_INTERG_5050 0x5050 + +#define PCI_VENDOR_ID_REALTEK 0x10ec +#define PCI_DEVICE_ID_REALTEK_8029 0x8029 +#define PCI_DEVICE_ID_REALTEK_8129 0x8129 +#define PCI_DEVICE_ID_REALTEK_8139 0x8139 + +#define PCI_VENDOR_ID_XILINX 0x10ee +#define PCI_DEVICE_ID_TURBOPAM 0x4020 + +#define PCI_VENDOR_ID_TRUEVISION 0x10fa +#define PCI_DEVICE_ID_TRUEVISION_T1000 0x000c + +#define PCI_VENDOR_ID_INIT 0x1101 +#define PCI_DEVICE_ID_INIT_320P 0x9100 +#define PCI_DEVICE_ID_INIT_360P 0x9500 + +#define PCI_VENDOR_ID_CREATIVE 0x1102 // duplicate: ECTIVA +#define PCI_DEVICE_ID_CREATIVE_EMU10K1 0x0002 + +#define PCI_VENDOR_ID_ECTIVA 0x1102 // duplicate: CREATIVE +#define PCI_DEVICE_ID_ECTIVA_EV1938 0x8938 + +#define PCI_VENDOR_ID_TTI 0x1103 +#define PCI_DEVICE_ID_TTI_HPT343 0x0003 +#define PCI_DEVICE_ID_TTI_HPT366 0x0004 +#define PCI_DEVICE_ID_TTI_HPT372 0x0005 +#define PCI_DEVICE_ID_TTI_HPT302 0x0006 +#define PCI_DEVICE_ID_TTI_HPT371 0x0007 +#define PCI_DEVICE_ID_TTI_HPT374 0x0008 + +#define PCI_VENDOR_ID_VIA 0x1106 +#define PCI_DEVICE_ID_VIA_8363_0 0x0305 +#define PCI_DEVICE_ID_VIA_8371_0 0x0391 +#define PCI_DEVICE_ID_VIA_8501_0 0x0501 +#define PCI_DEVICE_ID_VIA_82C505 0x0505 +#define PCI_DEVICE_ID_VIA_82C561 0x0561 +#define PCI_DEVICE_ID_VIA_82C586_1 0x0571 +#define PCI_DEVICE_ID_VIA_82C576 0x0576 +#define PCI_DEVICE_ID_VIA_82C585 0x0585 +#define PCI_DEVICE_ID_VIA_82C586_0 0x0586 +#define PCI_DEVICE_ID_VIA_82C595 0x0595 +#define PCI_DEVICE_ID_VIA_82C596 0x0596 +#define PCI_DEVICE_ID_VIA_82C597_0 0x0597 +#define PCI_DEVICE_ID_VIA_82C598_0 0x0598 +#define PCI_DEVICE_ID_VIA_8601_0 0x0601 +#define PCI_DEVICE_ID_VIA_8605_0 0x0605 +#define PCI_DEVICE_ID_VIA_82C680 0x0680 +#define PCI_DEVICE_ID_VIA_82C686 0x0686 +#define PCI_DEVICE_ID_VIA_82C691 0x0691 +#define PCI_DEVICE_ID_VIA_82C693 0x0693 +#define PCI_DEVICE_ID_VIA_82C693_1 0x0698 +#define PCI_DEVICE_ID_VIA_82C926 0x0926 +#define PCI_DEVICE_ID_VIA_82C576_1 0x1571 +#define PCI_DEVICE_ID_VIA_82C595_97 0x1595 +#define PCI_DEVICE_ID_VIA_82C586_2 0x3038 +#define PCI_DEVICE_ID_VIA_82C586_3 0x3040 +#define PCI_DEVICE_ID_VIA_6305 0x3044 +#define PCI_DEVICE_ID_VIA_82C596_3 0x3050 +#define PCI_DEVICE_ID_VIA_82C596B_3 0x3051 +#define PCI_DEVICE_ID_VIA_82C686_4 0x3057 +#define PCI_DEVICE_ID_VIA_82C686_5 0x3058 +#define PCI_DEVICE_ID_VIA_8233_5 0x3059 +#define PCI_DEVICE_ID_VIA_8233_7 0x3065 +#define PCI_DEVICE_ID_VIA_82C686_6 0x3068 +#define PCI_DEVICE_ID_VIA_8233_0 0x3074 +#define PCI_DEVICE_ID_VIA_8633_0 0x3091 +#define PCI_DEVICE_ID_VIA_8367_0 0x3099 +#define PCI_DEVICE_ID_VIA_8622 0x3102 +#define PCI_DEVICE_ID_VIA_8233C_0 0x3109 +#define PCI_DEVICE_ID_VIA_8361 0x3112 +#define PCI_DEVICE_ID_VIA_8233A 0x3147 +#define PCI_DEVICE_ID_VIA_86C100A 0x6100 +#define PCI_DEVICE_ID_VIA_8231 0x8231 +#define PCI_DEVICE_ID_VIA_8231_4 0x8235 +#define PCI_DEVICE_ID_VIA_8365_1 0x8305 +#define PCI_DEVICE_ID_VIA_8371_1 0x8391 +#define PCI_DEVICE_ID_VIA_8501_1 0x8501 +#define PCI_DEVICE_ID_VIA_82C597_1 0x8597 +#define PCI_DEVICE_ID_VIA_82C598_1 0x8598 +#define PCI_DEVICE_ID_VIA_8601_1 0x8601 +#define PCI_DEVICE_ID_VIA_8505_1 0x8605 +#define PCI_DEVICE_ID_VIA_8633_1 0xB091 +#define PCI_DEVICE_ID_VIA_8367_1 0xB099 + +#define PCI_VENDOR_ID_SIEMENS 0x110A +#define PCI_DEVICE_ID_SIEMENS_DSCC4 0x2102 + +#define PCI_VENDOR_ID_SMC2 0x1113 +#define PCI_DEVICE_ID_SMC2_1211TX 0x1211 + +#define PCI_VENDOR_ID_VORTEX 0x1119 +#define PCI_DEVICE_ID_VORTEX_GDT60x0 0x0000 +#define PCI_DEVICE_ID_VORTEX_GDT6000B 0x0001 +#define PCI_DEVICE_ID_VORTEX_GDT6x10 0x0002 +#define PCI_DEVICE_ID_VORTEX_GDT6x20 0x0003 +#define PCI_DEVICE_ID_VORTEX_GDT6530 0x0004 +#define PCI_DEVICE_ID_VORTEX_GDT6550 0x0005 +#define PCI_DEVICE_ID_VORTEX_GDT6x17 0x0006 +#define PCI_DEVICE_ID_VORTEX_GDT6x27 0x0007 +#define PCI_DEVICE_ID_VORTEX_GDT6537 0x0008 +#define PCI_DEVICE_ID_VORTEX_GDT6557 0x0009 +#define PCI_DEVICE_ID_VORTEX_GDT6x15 0x000a +#define PCI_DEVICE_ID_VORTEX_GDT6x25 0x000b +#define PCI_DEVICE_ID_VORTEX_GDT6535 0x000c +#define PCI_DEVICE_ID_VORTEX_GDT6555 0x000d +#define PCI_DEVICE_ID_VORTEX_GDT6x17RP 0x0100 +#define PCI_DEVICE_ID_VORTEX_GDT6x27RP 0x0101 +#define PCI_DEVICE_ID_VORTEX_GDT6537RP 0x0102 +#define PCI_DEVICE_ID_VORTEX_GDT6557RP 0x0103 +#define PCI_DEVICE_ID_VORTEX_GDT6x11RP 0x0104 +#define PCI_DEVICE_ID_VORTEX_GDT6x21RP 0x0105 +#define PCI_DEVICE_ID_VORTEX_GDT6x17RP1 0x0110 +#define PCI_DEVICE_ID_VORTEX_GDT6x27RP1 0x0111 +#define PCI_DEVICE_ID_VORTEX_GDT6537RP1 0x0112 +#define PCI_DEVICE_ID_VORTEX_GDT6557RP1 0x0113 +#define PCI_DEVICE_ID_VORTEX_GDT6x11RP1 0x0114 +#define PCI_DEVICE_ID_VORTEX_GDT6x21RP1 0x0115 +#define PCI_DEVICE_ID_VORTEX_GDT6x17RP2 0x0120 +#define PCI_DEVICE_ID_VORTEX_GDT6x27RP2 0x0121 +#define PCI_DEVICE_ID_VORTEX_GDT6537RP2 0x0122 +#define PCI_DEVICE_ID_VORTEX_GDT6557RP2 0x0123 +#define PCI_DEVICE_ID_VORTEX_GDT6x11RP2 0x0124 +#define PCI_DEVICE_ID_VORTEX_GDT6x21RP2 0x0125 + +#define PCI_VENDOR_ID_EF 0x111a +#define PCI_DEVICE_ID_EF_ATM_FPGA 0x0000 +#define PCI_DEVICE_ID_EF_ATM_ASIC 0x0002 + +#define PCI_VENDOR_ID_IDT 0x111d +#define PCI_DEVICE_ID_IDT_IDT77201 0x0001 + +#define PCI_VENDOR_ID_FORE 0x1127 +#define PCI_DEVICE_ID_FORE_PCA200PC 0x0210 +#define PCI_DEVICE_ID_FORE_PCA200E 0x0300 + +#define PCI_VENDOR_ID_IMAGINGTECH 0x112f +#define PCI_DEVICE_ID_IMAGINGTECH_ICPCI 0x0000 + +#define PCI_VENDOR_ID_PHILIPS 0x1131 +#define PCI_DEVICE_ID_PHILIPS_SAA7145 0x7145 +#define PCI_DEVICE_ID_PHILIPS_SAA7146 0x7146 +#define PCI_DEVICE_ID_PHILIPS_SAA9730 0x9730 + +#define PCI_VENDOR_ID_EICON 0x1133 +#define PCI_DEVICE_ID_EICON_DIVA20PRO 0xe001 +#define PCI_DEVICE_ID_EICON_DIVA20 0xe002 +#define PCI_DEVICE_ID_EICON_DIVA20PRO_U 0xe003 +#define PCI_DEVICE_ID_EICON_DIVA20_U 0xe004 +#define PCI_DEVICE_ID_EICON_DIVA201 0xe005 +#define PCI_DEVICE_ID_EICON_MAESTRA 0xe010 +#define PCI_DEVICE_ID_EICON_MAESTRAQ 0xe012 +#define PCI_DEVICE_ID_EICON_MAESTRAQ_U 0xe013 +#define PCI_DEVICE_ID_EICON_MAESTRAP 0xe014 + +#define PCI_VENDOR_ID_CYCLONE 0x113c +#define PCI_DEVICE_ID_CYCLONE_SDK 0x0001 + +#define PCI_VENDOR_ID_ALLIANCE 0x1142 +#define PCI_DEVICE_ID_ALLIANCE_PROMOTIO 0x3210 +#define PCI_DEVICE_ID_ALLIANCE_PROVIDEO 0x6422 +#define PCI_DEVICE_ID_ALLIANCE_AT24 0x6424 +#define PCI_DEVICE_ID_ALLIANCE_AT3D 0x643d + +#define PCI_VENDOR_ID_SYSKONNECT 0x1148 +#define PCI_DEVICE_ID_SYSKONNECT_FP 0x4000 +#define PCI_DEVICE_ID_SYSKONNECT_TR 0x4200 +#define PCI_DEVICE_ID_SYSKONNECT_GE 0x4300 + +#define PCI_VENDOR_ID_VMIC 0x114a +#define PCI_DEVICE_ID_VMIC_VME 0x7587 + +#define PCI_VENDOR_ID_DIGI 0x114f +#define PCI_DEVICE_ID_DIGI_EPC 0x0002 +#define PCI_DEVICE_ID_DIGI_RIGHTSWITCH 0x0003 +#define PCI_DEVICE_ID_DIGI_XEM 0x0004 +#define PCI_DEVICE_ID_DIGI_XR 0x0005 +#define PCI_DEVICE_ID_DIGI_CX 0x0006 +#define PCI_DEVICE_ID_DIGI_XRJ 0x0009 +#define PCI_DEVICE_ID_DIGI_EPCJ 0x000a +#define PCI_DEVICE_ID_DIGI_XR_920 0x0027 +#define PCI_DEVICE_ID_DIGI_DF_M_IOM2_E 0x0070 +#define PCI_DEVICE_ID_DIGI_DF_M_E 0x0071 +#define PCI_DEVICE_ID_DIGI_DF_M_IOM2_A 0x0072 +#define PCI_DEVICE_ID_DIGI_DF_M_A 0x0073 + +#define PCI_VENDOR_ID_MUTECH 0x1159 +#define PCI_DEVICE_ID_MUTECH_MV1000 0x0001 + +#define PCI_VENDOR_ID_XIRCOM 0x115d +#define PCI_DEVICE_ID_XIRCOM_X3201_ETH 0x0003 +#define PCI_DEVICE_ID_XIRCOM_X3201_MDM 0x0103 + +#define PCI_VENDOR_ID_RENDITION 0x1163 +#define PCI_DEVICE_ID_RENDITION_VERITE 0x0001 +#define PCI_DEVICE_ID_RENDITION_VERITE2100 0x2000 + +#define PCI_VENDOR_ID_SERVERWORKS 0x1166 +#define PCI_DEVICE_ID_SERVERWORKS_HE 0x0008 +#define PCI_DEVICE_ID_SERVERWORKS_LE 0x0009 +#define PCI_DEVICE_ID_SERVERWORKS_CIOB30 0x0010 +#define PCI_DEVICE_ID_SERVERWORKS_CMIC_HE 0x0011 +#define PCI_DEVICE_ID_SERVERWORKS_GCNB_LE 0x0017 +#define PCI_DEVICE_ID_SERVERWORKS_OSB4 0x0200 +#define PCI_DEVICE_ID_SERVERWORKS_CSB5 0x0201 +#define PCI_DEVICE_ID_SERVERWORKS_CSB6 0x0203 +#define PCI_DEVICE_ID_SERVERWORKS_OSB4IDE 0x0211 +#define PCI_DEVICE_ID_SERVERWORKS_CSB5IDE 0x0212 +#define PCI_DEVICE_ID_SERVERWORKS_CSB6IDE 0x0213 +#define PCI_DEVICE_ID_SERVERWORKS_OSB4USB 0x0220 +#define PCI_DEVICE_ID_SERVERWORKS_CSB5USB PCI_DEVICE_ID_SERVERWORKS_OSB4USB +#define PCI_DEVICE_ID_SERVERWORKS_CSB6USB 0x0221 +#define PCI_DEVICE_ID_SERVERWORKS_GCLE 0x0225 +#define PCI_DEVICE_ID_SERVERWORKS_GCLE2 0x0227 +#define PCI_DEVICE_ID_SERVERWORKS_CSB5ISA 0x0230 + +#define PCI_VENDOR_ID_SBE 0x1176 +#define PCI_DEVICE_ID_SBE_WANXL100 0x0301 +#define PCI_DEVICE_ID_SBE_WANXL200 0x0302 +#define PCI_DEVICE_ID_SBE_WANXL400 0x0104 + +#define PCI_VENDOR_ID_TOSHIBA 0x1179 +#define PCI_DEVICE_ID_TOSHIBA_601 0x0601 +#define PCI_DEVICE_ID_TOSHIBA_TOPIC95 0x060a +#define PCI_DEVICE_ID_TOSHIBA_TOPIC97 0x060f + +#define PCI_VENDOR_ID_TOSHIBA_2 0x102f +#define PCI_DEVICE_ID_TOSHIBA_TX3927 0x000a +#define PCI_DEVICE_ID_TOSHIBA_TC35815CF 0x0030 +#define PCI_DEVICE_ID_TOSHIBA_TX4927 0x0180 + +#define PCI_VENDOR_ID_RICOH 0x1180 +#define PCI_DEVICE_ID_RICOH_RL5C465 0x0465 +#define PCI_DEVICE_ID_RICOH_RL5C466 0x0466 +#define PCI_DEVICE_ID_RICOH_RL5C475 0x0475 +#define PCI_DEVICE_ID_RICOH_RL5C476 0x0476 +#define PCI_DEVICE_ID_RICOH_RL5C478 0x0478 + +#define PCI_VENDOR_ID_ARTOP 0x1191 +#define PCI_DEVICE_ID_ARTOP_ATP8400 0x0004 +#define PCI_DEVICE_ID_ARTOP_ATP850UF 0x0005 +#define PCI_DEVICE_ID_ARTOP_ATP860 0x0006 +#define PCI_DEVICE_ID_ARTOP_ATP860R 0x0007 +#define PCI_DEVICE_ID_ARTOP_ATP865 0x0008 +#define PCI_DEVICE_ID_ARTOP_ATP865R 0x0009 +#define PCI_DEVICE_ID_ARTOP_AEC7610 0x8002 +#define PCI_DEVICE_ID_ARTOP_AEC7612UW 0x8010 +#define PCI_DEVICE_ID_ARTOP_AEC7612U 0x8020 +#define PCI_DEVICE_ID_ARTOP_AEC7612S 0x8030 +#define PCI_DEVICE_ID_ARTOP_AEC7612D 0x8040 +#define PCI_DEVICE_ID_ARTOP_AEC7612SUW 0x8050 +#define PCI_DEVICE_ID_ARTOP_8060 0x8060 + +#define PCI_VENDOR_ID_ZEITNET 0x1193 +#define PCI_DEVICE_ID_ZEITNET_1221 0x0001 +#define PCI_DEVICE_ID_ZEITNET_1225 0x0002 + +#define PCI_VENDOR_ID_OMEGA 0x119b +#define PCI_DEVICE_ID_OMEGA_82C092G 0x1221 + +#define PCI_VENDOR_ID_FUJITSU_ME 0x119e +#define PCI_DEVICE_ID_FUJITSU_FS155 0x0001 +#define PCI_DEVICE_ID_FUJITSU_FS50 0x0003 + +#define PCI_SUBVENDOR_ID_KEYSPAN 0x11a9 +#define PCI_SUBDEVICE_ID_KEYSPAN_SX2 0x5334 + +#define PCI_VENDOR_ID_GALILEO 0x11ab +#define PCI_DEVICE_ID_GALILEO_GT64011 0x4146 +#define PCI_DEVICE_ID_GALILEO_GT64111 0x4146 +#define PCI_DEVICE_ID_GALILEO_GT96100 0x9652 +#define PCI_DEVICE_ID_GALILEO_GT96100A 0x9653 + +#define PCI_VENDOR_ID_LITEON 0x11ad +#define PCI_DEVICE_ID_LITEON_LNE100TX 0x0002 + +#define PCI_VENDOR_ID_V3 0x11b0 +#define PCI_DEVICE_ID_V3_V960 0x0001 +#define PCI_DEVICE_ID_V3_V350 0x0001 +#define PCI_DEVICE_ID_V3_V961 0x0002 +#define PCI_DEVICE_ID_V3_V351 0x0002 + +#define PCI_VENDOR_ID_NP 0x11bc +#define PCI_DEVICE_ID_NP_PCI_FDDI 0x0001 + +#define PCI_VENDOR_ID_ATT 0x11c1 +#define PCI_DEVICE_ID_ATT_L56XMF 0x0440 +#define PCI_DEVICE_ID_ATT_VENUS_MODEM 0x480 + +#define PCI_VENDOR_ID_SPECIALIX 0x11cb +#define PCI_DEVICE_ID_SPECIALIX_IO8 0x2000 +#define PCI_DEVICE_ID_SPECIALIX_XIO 0x4000 +#define PCI_DEVICE_ID_SPECIALIX_RIO 0x8000 +#define PCI_SUBDEVICE_ID_SPECIALIX_SPEED4 0xa004 + +#define PCI_VENDOR_ID_AURAVISION 0x11d1 +#define PCI_DEVICE_ID_AURAVISION_VXP524 0x01f7 + +#define PCI_VENDOR_ID_ANALOG_DEVICES 0x11d4 +#define PCI_DEVICE_ID_AD1889JS 0x1889 + +#define PCI_VENDOR_ID_IKON 0x11d5 +#define PCI_DEVICE_ID_IKON_10115 0x0115 +#define PCI_DEVICE_ID_IKON_10117 0x0117 + +#define PCI_VENDOR_ID_ZORAN 0x11de +#define PCI_DEVICE_ID_ZORAN_36057 0x6057 +#define PCI_DEVICE_ID_ZORAN_36120 0x6120 + +#define PCI_VENDOR_ID_KINETIC 0x11f4 +#define PCI_DEVICE_ID_KINETIC_2915 0x2915 + +#define PCI_VENDOR_ID_COMPEX 0x11f6 +#define PCI_DEVICE_ID_COMPEX_ENET100VG4 0x0112 +#define PCI_DEVICE_ID_COMPEX_RL2000 0x1401 + +#define PCI_VENDOR_ID_RP 0x11fe +#define PCI_DEVICE_ID_RP32INTF 0x0001 +#define PCI_DEVICE_ID_RP8INTF 0x0002 +#define PCI_DEVICE_ID_RP16INTF 0x0003 +#define PCI_DEVICE_ID_RP4QUAD 0x0004 +#define PCI_DEVICE_ID_RP8OCTA 0x0005 +#define PCI_DEVICE_ID_RP8J 0x0006 +#define PCI_DEVICE_ID_RPP4 0x000A +#define PCI_DEVICE_ID_RPP8 0x000B +#define PCI_DEVICE_ID_RP8M 0x000C + +#define PCI_VENDOR_ID_CYCLADES 0x120e +#define PCI_DEVICE_ID_CYCLOM_Y_Lo 0x0100 +#define PCI_DEVICE_ID_CYCLOM_Y_Hi 0x0101 +#define PCI_DEVICE_ID_CYCLOM_4Y_Lo 0x0102 +#define PCI_DEVICE_ID_CYCLOM_4Y_Hi 0x0103 +#define PCI_DEVICE_ID_CYCLOM_8Y_Lo 0x0104 +#define PCI_DEVICE_ID_CYCLOM_8Y_Hi 0x0105 +#define PCI_DEVICE_ID_CYCLOM_Z_Lo 0x0200 +#define PCI_DEVICE_ID_CYCLOM_Z_Hi 0x0201 +#define PCI_DEVICE_ID_PC300_RX_2 0x0300 +#define PCI_DEVICE_ID_PC300_RX_1 0x0301 +#define PCI_DEVICE_ID_PC300_TE_2 0x0310 +#define PCI_DEVICE_ID_PC300_TE_1 0x0311 + +#define PCI_VENDOR_ID_ESSENTIAL 0x120f +#define PCI_DEVICE_ID_ESSENTIAL_ROADRUNNER 0x0001 + +#define PCI_VENDOR_ID_O2 0x1217 +#define PCI_DEVICE_ID_O2_6729 0x6729 +#define PCI_DEVICE_ID_O2_6730 0x673a +#define PCI_DEVICE_ID_O2_6832 0x6832 +#define PCI_DEVICE_ID_O2_6836 0x6836 + +#define PCI_VENDOR_ID_3DFX 0x121a +#define PCI_DEVICE_ID_3DFX_VOODOO 0x0001 +#define PCI_DEVICE_ID_3DFX_VOODOO2 0x0002 +#define PCI_DEVICE_ID_3DFX_BANSHEE 0x0003 +#define PCI_DEVICE_ID_3DFX_VOODOO3 0x0005 + +#define PCI_VENDOR_ID_SIGMADES 0x1236 +#define PCI_DEVICE_ID_SIGMADES_6425 0x6401 + +#define PCI_VENDOR_ID_CCUBE 0x123f + +#define PCI_VENDOR_ID_AVM 0x1244 +#define PCI_DEVICE_ID_AVM_B1 0x0700 +#define PCI_DEVICE_ID_AVM_C4 0x0800 +#define PCI_DEVICE_ID_AVM_A1 0x0a00 +#define PCI_DEVICE_ID_AVM_A1_V2 0x0e00 +#define PCI_DEVICE_ID_AVM_C2 0x1100 +#define PCI_DEVICE_ID_AVM_T1 0x1200 + +#define PCI_VENDOR_ID_DIPIX 0x1246 + +#define PCI_VENDOR_ID_STALLION 0x124d +#define PCI_DEVICE_ID_STALLION_ECHPCI832 0x0000 +#define PCI_DEVICE_ID_STALLION_ECHPCI864 0x0002 +#define PCI_DEVICE_ID_STALLION_EIOPCI 0x0003 + +#define PCI_VENDOR_ID_OPTIBASE 0x1255 +#define PCI_DEVICE_ID_OPTIBASE_FORGE 0x1110 +#define PCI_DEVICE_ID_OPTIBASE_FUSION 0x1210 +#define PCI_DEVICE_ID_OPTIBASE_VPLEX 0x2110 +#define PCI_DEVICE_ID_OPTIBASE_VPLEXCC 0x2120 +#define PCI_DEVICE_ID_OPTIBASE_VQUEST 0x2130 + +#define PCI_VENDOR_ID_ESS 0x125d +#define PCI_DEVICE_ID_ESS_ESS1968 0x1968 +#define PCI_DEVICE_ID_ESS_AUDIOPCI 0x1969 +#define PCI_DEVICE_ID_ESS_ESS1978 0x1978 + +#define PCI_VENDOR_ID_SATSAGEM 0x1267 +#define PCI_DEVICE_ID_SATSAGEM_NICCY 0x1016 +#define PCI_DEVICE_ID_SATSAGEM_PCR2101 0x5352 +#define PCI_DEVICE_ID_SATSAGEM_TELSATTURBO 0x5a4b + +#define PCI_VENDOR_ID_HUGHES 0x1273 +#define PCI_DEVICE_ID_HUGHES_DIRECPC 0x0002 + +#define PCI_VENDOR_ID_ENSONIQ 0x1274 +#define PCI_DEVICE_ID_ENSONIQ_CT5880 0x5880 +#define PCI_DEVICE_ID_ENSONIQ_ES1370 0x5000 +#define PCI_DEVICE_ID_ENSONIQ_ES1371 0x1371 + +#define PCI_VENDOR_ID_ROCKWELL 0x127A + +#define PCI_VENDOR_ID_ITE 0x1283 +#define PCI_DEVICE_ID_ITE_IT8172G 0x8172 +#define PCI_DEVICE_ID_ITE_IT8172G_AUDIO 0x0801 +#define PCI_DEVICE_ID_ITE_8872 0x8872 + +#define PCI_DEVICE_ID_ITE_IT8330G_0 0xe886 + +/* formerly Platform Tech */ +#define PCI_VENDOR_ID_ESS_OLD 0x1285 +#define PCI_DEVICE_ID_ESS_ESS0100 0x0100 + +#define PCI_VENDOR_ID_ALTEON 0x12ae +#define PCI_DEVICE_ID_ALTEON_ACENIC 0x0001 + +#define PCI_VENDOR_ID_USR 0x12B9 + +#define PCI_SUBVENDOR_ID_CONNECT_TECH 0x12c4 +#define PCI_SUBDEVICE_ID_CONNECT_TECH_BH8_232 0x0001 +#define PCI_SUBDEVICE_ID_CONNECT_TECH_BH4_232 0x0002 +#define PCI_SUBDEVICE_ID_CONNECT_TECH_BH2_232 0x0003 +#define PCI_SUBDEVICE_ID_CONNECT_TECH_BH8_485 0x0004 +#define PCI_SUBDEVICE_ID_CONNECT_TECH_BH8_485_4_4 0x0005 +#define PCI_SUBDEVICE_ID_CONNECT_TECH_BH4_485 0x0006 +#define PCI_SUBDEVICE_ID_CONNECT_TECH_BH4_485_2_2 0x0007 +#define PCI_SUBDEVICE_ID_CONNECT_TECH_BH2_485 0x0008 +#define PCI_SUBDEVICE_ID_CONNECT_TECH_BH8_485_2_6 0x0009 +#define PCI_SUBDEVICE_ID_CONNECT_TECH_BH081101V1 0x000A +#define PCI_SUBDEVICE_ID_CONNECT_TECH_BH041101V1 0x000B + +#define PCI_VENDOR_ID_PICTUREL 0x12c5 +#define PCI_DEVICE_ID_PICTUREL_PCIVST 0x0081 + +#define PCI_VENDOR_ID_NVIDIA_SGS 0x12d2 +#define PCI_DEVICE_ID_NVIDIA_SGS_RIVA128 0x0018 + +#define PCI_SUBVENDOR_ID_CHASE_PCIFAST 0x12E0 +#define PCI_SUBDEVICE_ID_CHASE_PCIFAST4 0x0031 +#define PCI_SUBDEVICE_ID_CHASE_PCIFAST8 0x0021 +#define PCI_SUBDEVICE_ID_CHASE_PCIFAST16 0x0011 +#define PCI_SUBDEVICE_ID_CHASE_PCIFAST16FMC 0x0041 +#define PCI_SUBVENDOR_ID_CHASE_PCIRAS 0x124D +#define PCI_SUBDEVICE_ID_CHASE_PCIRAS4 0xF001 +#define PCI_SUBDEVICE_ID_CHASE_PCIRAS8 0xF010 + +#define PCI_VENDOR_ID_AUREAL 0x12eb +#define PCI_DEVICE_ID_AUREAL_VORTEX_1 0x0001 +#define PCI_DEVICE_ID_AUREAL_VORTEX_2 0x0002 + +#define PCI_VENDOR_ID_CBOARDS 0x1307 +#define PCI_DEVICE_ID_CBOARDS_DAS1602_16 0x0001 + +#define PCI_VENDOR_ID_SIIG 0x131f +#define PCI_DEVICE_ID_SIIG_1S_10x_550 0x1000 +#define PCI_DEVICE_ID_SIIG_1S_10x_650 0x1001 +#define PCI_DEVICE_ID_SIIG_1S_10x_850 0x1002 +#define PCI_DEVICE_ID_SIIG_1S1P_10x_550 0x1010 +#define PCI_DEVICE_ID_SIIG_1S1P_10x_650 0x1011 +#define PCI_DEVICE_ID_SIIG_1S1P_10x_850 0x1012 +#define PCI_DEVICE_ID_SIIG_1P_10x 0x1020 +#define PCI_DEVICE_ID_SIIG_2P_10x 0x1021 +#define PCI_DEVICE_ID_SIIG_2S_10x_550 0x1030 +#define PCI_DEVICE_ID_SIIG_2S_10x_650 0x1031 +#define PCI_DEVICE_ID_SIIG_2S_10x_850 0x1032 +#define PCI_DEVICE_ID_SIIG_2S1P_10x_550 0x1034 +#define PCI_DEVICE_ID_SIIG_2S1P_10x_650 0x1035 +#define PCI_DEVICE_ID_SIIG_2S1P_10x_850 0x1036 +#define PCI_DEVICE_ID_SIIG_4S_10x_550 0x1050 +#define PCI_DEVICE_ID_SIIG_4S_10x_650 0x1051 +#define PCI_DEVICE_ID_SIIG_4S_10x_850 0x1052 +#define PCI_DEVICE_ID_SIIG_1S_20x_550 0x2000 +#define PCI_DEVICE_ID_SIIG_1S_20x_650 0x2001 +#define PCI_DEVICE_ID_SIIG_1S_20x_850 0x2002 +#define PCI_DEVICE_ID_SIIG_1P_20x 0x2020 +#define PCI_DEVICE_ID_SIIG_2P_20x 0x2021 +#define PCI_DEVICE_ID_SIIG_2S_20x_550 0x2030 +#define PCI_DEVICE_ID_SIIG_2S_20x_650 0x2031 +#define PCI_DEVICE_ID_SIIG_2S_20x_850 0x2032 +#define PCI_DEVICE_ID_SIIG_2P1S_20x_550 0x2040 +#define PCI_DEVICE_ID_SIIG_2P1S_20x_650 0x2041 +#define PCI_DEVICE_ID_SIIG_2P1S_20x_850 0x2042 +#define PCI_DEVICE_ID_SIIG_1S1P_20x_550 0x2010 +#define PCI_DEVICE_ID_SIIG_1S1P_20x_650 0x2011 +#define PCI_DEVICE_ID_SIIG_1S1P_20x_850 0x2012 +#define PCI_DEVICE_ID_SIIG_4S_20x_550 0x2050 +#define PCI_DEVICE_ID_SIIG_4S_20x_650 0x2051 +#define PCI_DEVICE_ID_SIIG_4S_20x_850 0x2052 +#define PCI_DEVICE_ID_SIIG_2S1P_20x_550 0x2060 +#define PCI_DEVICE_ID_SIIG_2S1P_20x_650 0x2061 +#define PCI_DEVICE_ID_SIIG_2S1P_20x_850 0x2062 + +#define PCI_VENDOR_ID_DOMEX 0x134a +#define PCI_DEVICE_ID_DOMEX_DMX3191D 0x0001 + +#define PCI_VENDOR_ID_QUATECH 0x135C +#define PCI_DEVICE_ID_QUATECH_QSC100 0x0010 +#define PCI_DEVICE_ID_QUATECH_DSC100 0x0020 +#define PCI_DEVICE_ID_QUATECH_DSC200 0x0030 +#define PCI_DEVICE_ID_QUATECH_QSC200 0x0040 +#define PCI_DEVICE_ID_QUATECH_ESC100D 0x0050 +#define PCI_DEVICE_ID_QUATECH_ESC100M 0x0060 + +#define PCI_VENDOR_ID_SEALEVEL 0x135e +#define PCI_DEVICE_ID_SEALEVEL_U530 0x7101 +#define PCI_DEVICE_ID_SEALEVEL_UCOMM2 0x7201 +#define PCI_DEVICE_ID_SEALEVEL_UCOMM422 0x7402 +#define PCI_DEVICE_ID_SEALEVEL_UCOMM232 0x7202 +#define PCI_DEVICE_ID_SEALEVEL_COMM4 0x7401 +#define PCI_DEVICE_ID_SEALEVEL_COMM8 0x7801 + +#define PCI_VENDOR_ID_HYPERCOPE 0x1365 +#define PCI_DEVICE_ID_HYPERCOPE_PLX 0x9050 +#define PCI_SUBDEVICE_ID_HYPERCOPE_OLD_ERGO 0x0104 +#define PCI_SUBDEVICE_ID_HYPERCOPE_ERGO 0x0106 +#define PCI_SUBDEVICE_ID_HYPERCOPE_METRO 0x0107 +#define PCI_SUBDEVICE_ID_HYPERCOPE_CHAMP2 0x0108 +#define PCI_SUBDEVICE_ID_HYPERCOPE_PLEXUS 0x0109 + +#define PCI_VENDOR_ID_KAWASAKI 0x136b +#define PCI_DEVICE_ID_MCHIP_KL5A72002 0xff01 + +#define PCI_VENDOR_ID_LMC 0x1376 +#define PCI_DEVICE_ID_LMC_HSSI 0x0003 +#define PCI_DEVICE_ID_LMC_DS3 0x0004 +#define PCI_DEVICE_ID_LMC_SSI 0x0005 +#define PCI_DEVICE_ID_LMC_T1 0x0006 + +#define PCI_VENDOR_ID_NETGEAR 0x1385 +#define PCI_DEVICE_ID_NETGEAR_GA620 0x620a +#define PCI_DEVICE_ID_NETGEAR_GA622 0x622a + +#define PCI_VENDOR_ID_APPLICOM 0x1389 +#define PCI_DEVICE_ID_APPLICOM_PCIGENERIC 0x0001 +#define PCI_DEVICE_ID_APPLICOM_PCI2000IBS_CAN 0x0002 +#define PCI_DEVICE_ID_APPLICOM_PCI2000PFB 0x0003 + +#define PCI_VENDOR_ID_MOXA 0x1393 +#define PCI_DEVICE_ID_MOXA_C104 0x1040 +#define PCI_DEVICE_ID_MOXA_C168 0x1680 +#define PCI_DEVICE_ID_MOXA_CP204J 0x2040 +#define PCI_DEVICE_ID_MOXA_C218 0x2180 +#define PCI_DEVICE_ID_MOXA_C320 0x3200 + +#define PCI_VENDOR_ID_CCD 0x1397 +#define PCI_DEVICE_ID_CCD_2BD0 0x2bd0 +#define PCI_DEVICE_ID_CCD_B000 0xb000 +#define PCI_DEVICE_ID_CCD_B006 0xb006 +#define PCI_DEVICE_ID_CCD_B007 0xb007 +#define PCI_DEVICE_ID_CCD_B008 0xb008 +#define PCI_DEVICE_ID_CCD_B009 0xb009 +#define PCI_DEVICE_ID_CCD_B00A 0xb00a +#define PCI_DEVICE_ID_CCD_B00B 0xb00b +#define PCI_DEVICE_ID_CCD_B00C 0xb00c +#define PCI_DEVICE_ID_CCD_B100 0xb100 + +#define PCI_VENDOR_ID_3WARE 0x13C1 +#define PCI_DEVICE_ID_3WARE_1000 0x1000 + +#define PCI_VENDOR_ID_ABOCOM 0x13D1 +#define PCI_DEVICE_ID_ABOCOM_2BD1 0x2BD1 + +#define PCI_VENDOR_ID_CMEDIA 0x13f6 +#define PCI_DEVICE_ID_CMEDIA_CM8338A 0x0100 +#define PCI_DEVICE_ID_CMEDIA_CM8338B 0x0101 +#define PCI_DEVICE_ID_CMEDIA_CM8738 0x0111 +#define PCI_DEVICE_ID_CMEDIA_CM8738B 0x0112 + +#define PCI_VENDOR_ID_LAVA 0x1407 +#define PCI_DEVICE_ID_LAVA_DSERIAL 0x0100 /* 2x 16550 */ +#define PCI_DEVICE_ID_LAVA_QUATRO_A 0x0101 /* 2x 16550, half of 4 port */ +#define PCI_DEVICE_ID_LAVA_QUATRO_B 0x0102 /* 2x 16550, half of 4 port */ +#define PCI_DEVICE_ID_LAVA_OCTO_A 0x0180 /* 4x 16550A, half of 8 port */ +#define PCI_DEVICE_ID_LAVA_OCTO_B 0x0181 /* 4x 16550A, half of 8 port */ +#define PCI_DEVICE_ID_LAVA_PORT_PLUS 0x0200 /* 2x 16650 */ +#define PCI_DEVICE_ID_LAVA_QUAD_A 0x0201 /* 2x 16650, half of 4 port */ +#define PCI_DEVICE_ID_LAVA_QUAD_B 0x0202 /* 2x 16650, half of 4 port */ +#define PCI_DEVICE_ID_LAVA_SSERIAL 0x0500 /* 1x 16550 */ +#define PCI_DEVICE_ID_LAVA_PORT_650 0x0600 /* 1x 16650 */ +#define PCI_DEVICE_ID_LAVA_PARALLEL 0x8000 +#define PCI_DEVICE_ID_LAVA_DUAL_PAR_A 0x8002 /* The Lava Dual Parallel is */ +#define PCI_DEVICE_ID_LAVA_DUAL_PAR_B 0x8003 /* two PCI devices on a card */ +#define PCI_DEVICE_ID_LAVA_BOCA_IOPPAR 0x8800 + +#define PCI_VENDOR_ID_TIMEDIA 0x1409 +#define PCI_DEVICE_ID_TIMEDIA_1889 0x7168 + +#define PCI_VENDOR_ID_OXSEMI 0x1415 +#define PCI_DEVICE_ID_OXSEMI_12PCI840 0x8403 +#define PCI_DEVICE_ID_OXSEMI_16PCI954 0x9501 +#define PCI_DEVICE_ID_OXSEMI_16PCI95N 0x9511 +#define PCI_DEVICE_ID_OXSEMI_16PCI954PP 0x9513 +#define PCI_DEVICE_ID_OXSEMI_16PCI952 0x9521 + +#define PCI_VENDOR_ID_AIRONET 0x14b9 +#define PCI_DEVICE_ID_AIRONET_4800_1 0x0001 +#define PCI_DEVICE_ID_AIRONET_4800 0x4500 // values switched? see +#define PCI_DEVICE_ID_AIRONET_4500 0x4800 // drivers/net/aironet4500_card.c + +#define PCI_VENDOR_ID_TITAN 0x14D2 +#define PCI_DEVICE_ID_TITAN_010L 0x8001 +#define PCI_DEVICE_ID_TITAN_100L 0x8010 +#define PCI_DEVICE_ID_TITAN_110L 0x8011 +#define PCI_DEVICE_ID_TITAN_200L 0x8020 +#define PCI_DEVICE_ID_TITAN_210L 0x8021 +#define PCI_DEVICE_ID_TITAN_400L 0x8040 +#define PCI_DEVICE_ID_TITAN_800L 0x8080 +#define PCI_DEVICE_ID_TITAN_100 0xA001 +#define PCI_DEVICE_ID_TITAN_200 0xA005 +#define PCI_DEVICE_ID_TITAN_400 0xA003 +#define PCI_DEVICE_ID_TITAN_800B 0xA004 + +#define PCI_VENDOR_ID_PANACOM 0x14d4 +#define PCI_DEVICE_ID_PANACOM_QUADMODEM 0x0400 +#define PCI_DEVICE_ID_PANACOM_DUALMODEM 0x0402 + +#define PCI_VENDOR_ID_AFAVLAB 0x14db +#define PCI_DEVICE_ID_AFAVLAB_P028 0x2180 + +#define PCI_VENDOR_ID_BROADCOM 0x14e4 +#define PCI_DEVICE_ID_TIGON3_5700 0x1644 +#define PCI_DEVICE_ID_TIGON3_5701 0x1645 +#define PCI_DEVICE_ID_TIGON3_5702 0x1646 +#define PCI_DEVICE_ID_TIGON3_5703 0x1647 +#define PCI_DEVICE_ID_TIGON3_5702FE 0x164d +#define PCI_DEVICE_ID_TIGON3_5702X 0x16a6 +#define PCI_DEVICE_ID_TIGON3_5703X 0x16a7 + +#define PCI_VENDOR_ID_SYBA 0x1592 +#define PCI_DEVICE_ID_SYBA_2P_EPP 0x0782 +#define PCI_DEVICE_ID_SYBA_1P_ECP 0x0783 + +#define PCI_VENDOR_ID_MORETON 0x15aa +#define PCI_DEVICE_ID_RASTEL_2PORT 0x2000 + +#define PCI_VENDOR_ID_ZOLTRIX 0x15b0 +#define PCI_DEVICE_ID_ZOLTRIX_2BD0 0x2bd0 + +#define PCI_VENDOR_ID_PDC 0x15e9 +#define PCI_DEVICE_ID_PDC_1841 0x1841 + +#define PCI_VENDOR_ID_ALTIMA 0x173b +#define PCI_DEVICE_ID_ALTIMA_AC1000 0x03e8 +#define PCI_DEVICE_ID_ALTIMA_AC9100 0x03ea + +#define PCI_VENDOR_ID_SYMPHONY 0x1c1c +#define PCI_DEVICE_ID_SYMPHONY_101 0x0001 + +#define PCI_VENDOR_ID_TEKRAM 0x1de1 +#define PCI_DEVICE_ID_TEKRAM_DC290 0xdc29 + +#define PCI_VENDOR_ID_3DLABS 0x3d3d +#define PCI_DEVICE_ID_3DLABS_300SX 0x0001 +#define PCI_DEVICE_ID_3DLABS_500TX 0x0002 +#define PCI_DEVICE_ID_3DLABS_DELTA 0x0003 +#define PCI_DEVICE_ID_3DLABS_PERMEDIA 0x0004 +#define PCI_DEVICE_ID_3DLABS_MX 0x0006 +#define PCI_DEVICE_ID_3DLABS_PERMEDIA2 0x0007 +#define PCI_DEVICE_ID_3DLABS_GAMMA 0x0008 +#define PCI_DEVICE_ID_3DLABS_PERMEDIA2V 0x0009 + +#define PCI_VENDOR_ID_AVANCE 0x4005 +#define PCI_DEVICE_ID_AVANCE_ALG2064 0x2064 +#define PCI_DEVICE_ID_AVANCE_2302 0x2302 + +#define PCI_VENDOR_ID_AKS 0x416c +#define PCI_DEVICE_ID_AKS_ALADDINCARD 0x0100 +#define PCI_DEVICE_ID_AKS_CPC 0x0200 + +#define PCI_VENDOR_ID_NETVIN 0x4a14 +#define PCI_DEVICE_ID_NETVIN_NV5000SC 0x5000 + +#define PCI_VENDOR_ID_S3 0x5333 +#define PCI_DEVICE_ID_S3_PLATO_PXS 0x0551 +#define PCI_DEVICE_ID_S3_ViRGE 0x5631 +#define PCI_DEVICE_ID_S3_TRIO 0x8811 +#define PCI_DEVICE_ID_S3_AURORA64VP 0x8812 +#define PCI_DEVICE_ID_S3_TRIO64UVP 0x8814 +#define PCI_DEVICE_ID_S3_ViRGE_VX 0x883d +#define PCI_DEVICE_ID_S3_868 0x8880 +#define PCI_DEVICE_ID_S3_928 0x88b0 +#define PCI_DEVICE_ID_S3_864_1 0x88c0 +#define PCI_DEVICE_ID_S3_864_2 0x88c1 +#define PCI_DEVICE_ID_S3_964_1 0x88d0 +#define PCI_DEVICE_ID_S3_964_2 0x88d1 +#define PCI_DEVICE_ID_S3_968 0x88f0 +#define PCI_DEVICE_ID_S3_TRIO64V2 0x8901 +#define PCI_DEVICE_ID_S3_PLATO_PXG 0x8902 +#define PCI_DEVICE_ID_S3_ViRGE_DXGX 0x8a01 +#define PCI_DEVICE_ID_S3_ViRGE_GX2 0x8a10 +#define PCI_DEVICE_ID_S3_ViRGE_MX 0x8c01 +#define PCI_DEVICE_ID_S3_ViRGE_MXP 0x8c02 +#define PCI_DEVICE_ID_S3_ViRGE_MXPMV 0x8c03 +#define PCI_DEVICE_ID_S3_SONICVIBES 0xca00 + +#define PCI_VENDOR_ID_DUNORD 0x5544 +#define PCI_DEVICE_ID_DUNORD_I3000 0x0001 +#define PCI_VENDOR_ID_GENROCO 0x5555 +#define PCI_DEVICE_ID_GENROCO_HFP832 0x0003 + +#define PCI_VENDOR_ID_DCI 0x6666 +#define PCI_DEVICE_ID_DCI_PCCOM4 0x0001 +#define PCI_DEVICE_ID_DCI_PCCOM8 0x0002 + +#define PCI_VENDOR_ID_INTEL 0x8086 +#define PCI_DEVICE_ID_INTEL_21145 0x0039 +#define PCI_DEVICE_ID_INTEL_82375 0x0482 +#define PCI_DEVICE_ID_INTEL_82424 0x0483 +#define PCI_DEVICE_ID_INTEL_82378 0x0484 +#define PCI_DEVICE_ID_INTEL_82430 0x0486 +#define PCI_DEVICE_ID_INTEL_82434 0x04a3 +#define PCI_DEVICE_ID_INTEL_I960 0x0960 +#define PCI_DEVICE_ID_INTEL_82562ET 0x1031 +#define PCI_DEVICE_ID_INTEL_82559ER 0x1209 +#define PCI_DEVICE_ID_INTEL_82092AA_0 0x1221 +#define PCI_DEVICE_ID_INTEL_82092AA_1 0x1222 +#define PCI_DEVICE_ID_INTEL_7116 0x1223 +#define PCI_DEVICE_ID_INTEL_82596 0x1226 +#define PCI_DEVICE_ID_INTEL_82865 0x1227 +#define PCI_DEVICE_ID_INTEL_82557 0x1229 +#define PCI_DEVICE_ID_INTEL_82437 0x122d +#define PCI_DEVICE_ID_INTEL_82371FB_0 0x122e +#define PCI_DEVICE_ID_INTEL_82371FB_1 0x1230 +#define PCI_DEVICE_ID_INTEL_82371MX 0x1234 +#define PCI_DEVICE_ID_INTEL_82437MX 0x1235 +#define PCI_DEVICE_ID_INTEL_82441 0x1237 +#define PCI_DEVICE_ID_INTEL_82380FB 0x124b +#define PCI_DEVICE_ID_INTEL_82439 0x1250 +#define PCI_DEVICE_ID_INTEL_80960_RP 0x1960 +#define PCI_DEVICE_ID_INTEL_82371SB_0 0x7000 +#define PCI_DEVICE_ID_INTEL_82371SB_1 0x7010 +#define PCI_DEVICE_ID_INTEL_82371SB_2 0x7020 +#define PCI_DEVICE_ID_INTEL_82437VX 0x7030 +#define PCI_DEVICE_ID_INTEL_82439TX 0x7100 +#define PCI_DEVICE_ID_INTEL_82371AB_0 0x7110 +#define PCI_DEVICE_ID_INTEL_82371AB 0x7111 +#define PCI_DEVICE_ID_INTEL_82371AB_2 0x7112 +#define PCI_DEVICE_ID_INTEL_82371AB_3 0x7113 +#define PCI_DEVICE_ID_INTEL_82801AA_0 0x2410 +#define PCI_DEVICE_ID_INTEL_82801AA_1 0x2411 +#define PCI_DEVICE_ID_INTEL_82801AA_2 0x2412 +#define PCI_DEVICE_ID_INTEL_82801AA_3 0x2413 +#define PCI_DEVICE_ID_INTEL_82801AA_5 0x2415 +#define PCI_DEVICE_ID_INTEL_82801AA_6 0x2416 +#define PCI_DEVICE_ID_INTEL_82801AA_8 0x2418 +#define PCI_DEVICE_ID_INTEL_82801AB_0 0x2420 +#define PCI_DEVICE_ID_INTEL_82801AB_1 0x2421 +#define PCI_DEVICE_ID_INTEL_82801AB_2 0x2422 +#define PCI_DEVICE_ID_INTEL_82801AB_3 0x2423 +#define PCI_DEVICE_ID_INTEL_82801AB_5 0x2425 +#define PCI_DEVICE_ID_INTEL_82801AB_6 0x2426 +#define PCI_DEVICE_ID_INTEL_82801AB_8 0x2428 +#define PCI_DEVICE_ID_INTEL_82801BA_0 0x2440 +#define PCI_DEVICE_ID_INTEL_82801BA_1 0x2442 +#define PCI_DEVICE_ID_INTEL_82801BA_2 0x2443 +#define PCI_DEVICE_ID_INTEL_82801BA_3 0x2444 +#define PCI_DEVICE_ID_INTEL_82801BA_4 0x2445 +#define PCI_DEVICE_ID_INTEL_82801BA_5 0x2446 +#define PCI_DEVICE_ID_INTEL_82801BA_6 0x2448 +#define PCI_DEVICE_ID_INTEL_82801BA_7 0x2449 +#define PCI_DEVICE_ID_INTEL_82801BA_8 0x244a +#define PCI_DEVICE_ID_INTEL_82801BA_9 0x244b +#define PCI_DEVICE_ID_INTEL_82801BA_10 0x244c +#define PCI_DEVICE_ID_INTEL_82801BA_11 0x244e +#define PCI_DEVICE_ID_INTEL_82801E_0 0x2450 +#define PCI_DEVICE_ID_INTEL_82801E_2 0x2452 +#define PCI_DEVICE_ID_INTEL_82801E_3 0x2453 +#define PCI_DEVICE_ID_INTEL_82801E_9 0x2459 +#define PCI_DEVICE_ID_INTEL_82801E_11 0x245b +#define PCI_DEVICE_ID_INTEL_82801E_13 0x245d +#define PCI_DEVICE_ID_INTEL_82801E_14 0x245e +#define PCI_DEVICE_ID_INTEL_82801CA_0 0x2480 +#define PCI_DEVICE_ID_INTEL_82801CA_2 0x2482 +#define PCI_DEVICE_ID_INTEL_82801CA_3 0x2483 +#define PCI_DEVICE_ID_INTEL_82801CA_4 0x2484 +#define PCI_DEVICE_ID_INTEL_82801CA_5 0x2485 +#define PCI_DEVICE_ID_INTEL_82801CA_6 0x2486 +#define PCI_DEVICE_ID_INTEL_82801CA_7 0x2487 +#define PCI_DEVICE_ID_INTEL_82801CA_10 0x248a +#define PCI_DEVICE_ID_INTEL_82801CA_11 0x248b +#define PCI_DEVICE_ID_INTEL_82801CA_12 0x248c +#define PCI_DEVICE_ID_INTEL_82801DB_0 0x24c0 +#define PCI_DEVICE_ID_INTEL_82801DB_2 0x24c2 +#define PCI_DEVICE_ID_INTEL_82801DB_3 0x24c3 +#define PCI_DEVICE_ID_INTEL_82801DB_4 0x24c4 +#define PCI_DEVICE_ID_INTEL_82801DB_5 0x24c5 +#define PCI_DEVICE_ID_INTEL_82801DB_6 0x24c6 +#define PCI_DEVICE_ID_INTEL_82801DB_7 0x24c7 +#define PCI_DEVICE_ID_INTEL_82801DB_11 0x24cb +#define PCI_DEVICE_ID_INTEL_82801DB_13 0x24cd +#define PCI_DEVICE_ID_INTEL_80310 0x530d +#define PCI_DEVICE_ID_INTEL_82810_MC1 0x7120 +#define PCI_DEVICE_ID_INTEL_82810_IG1 0x7121 +#define PCI_DEVICE_ID_INTEL_82810_MC3 0x7122 +#define PCI_DEVICE_ID_INTEL_82810_IG3 0x7123 +#define PCI_DEVICE_ID_INTEL_82443LX_0 0x7180 +#define PCI_DEVICE_ID_INTEL_82443LX_1 0x7181 +#define PCI_DEVICE_ID_INTEL_82443BX_0 0x7190 +#define PCI_DEVICE_ID_INTEL_82443BX_1 0x7191 +#define PCI_DEVICE_ID_INTEL_82443BX_2 0x7192 +#define PCI_DEVICE_ID_INTEL_82443MX_0 0x7198 +#define PCI_DEVICE_ID_INTEL_82443MX_1 0x7199 +#define PCI_DEVICE_ID_INTEL_82443MX_2 0x719a +#define PCI_DEVICE_ID_INTEL_82443MX_3 0x719b +#define PCI_DEVICE_ID_INTEL_82372FB_0 0x7600 +#define PCI_DEVICE_ID_INTEL_82372FB_1 0x7601 +#define PCI_DEVICE_ID_INTEL_82372FB_2 0x7602 +#define PCI_DEVICE_ID_INTEL_82372FB_3 0x7603 +#define PCI_DEVICE_ID_INTEL_82454GX 0x84c4 +#define PCI_DEVICE_ID_INTEL_82450GX 0x84c5 +#define PCI_DEVICE_ID_INTEL_82451NX 0x84ca + +#define PCI_VENDOR_ID_COMPUTONE 0x8e0e +#define PCI_DEVICE_ID_COMPUTONE_IP2EX 0x0291 +#define PCI_DEVICE_ID_COMPUTONE_PG 0x0302 +#define PCI_SUBVENDOR_ID_COMPUTONE 0x8e0e +#define PCI_SUBDEVICE_ID_COMPUTONE_PG4 0x0001 +#define PCI_SUBDEVICE_ID_COMPUTONE_PG8 0x0002 +#define PCI_SUBDEVICE_ID_COMPUTONE_PG6 0x0003 + +#define PCI_VENDOR_ID_KTI 0x8e2e +#define PCI_DEVICE_ID_KTI_ET32P2 0x3000 + +#define PCI_VENDOR_ID_ADAPTEC 0x9004 +#define PCI_DEVICE_ID_ADAPTEC_7810 0x1078 +#define PCI_DEVICE_ID_ADAPTEC_7821 0x2178 +#define PCI_DEVICE_ID_ADAPTEC_38602 0x3860 +#define PCI_DEVICE_ID_ADAPTEC_7850 0x5078 +#define PCI_DEVICE_ID_ADAPTEC_7855 0x5578 +#define PCI_DEVICE_ID_ADAPTEC_5800 0x5800 +#define PCI_DEVICE_ID_ADAPTEC_3860 0x6038 +#define PCI_DEVICE_ID_ADAPTEC_1480A 0x6075 +#define PCI_DEVICE_ID_ADAPTEC_7860 0x6078 +#define PCI_DEVICE_ID_ADAPTEC_7861 0x6178 +#define PCI_DEVICE_ID_ADAPTEC_7870 0x7078 +#define PCI_DEVICE_ID_ADAPTEC_7871 0x7178 +#define PCI_DEVICE_ID_ADAPTEC_7872 0x7278 +#define PCI_DEVICE_ID_ADAPTEC_7873 0x7378 +#define PCI_DEVICE_ID_ADAPTEC_7874 0x7478 +#define PCI_DEVICE_ID_ADAPTEC_7895 0x7895 +#define PCI_DEVICE_ID_ADAPTEC_7880 0x8078 +#define PCI_DEVICE_ID_ADAPTEC_7881 0x8178 +#define PCI_DEVICE_ID_ADAPTEC_7882 0x8278 +#define PCI_DEVICE_ID_ADAPTEC_7883 0x8378 +#define PCI_DEVICE_ID_ADAPTEC_7884 0x8478 +#define PCI_DEVICE_ID_ADAPTEC_7885 0x8578 +#define PCI_DEVICE_ID_ADAPTEC_7886 0x8678 +#define PCI_DEVICE_ID_ADAPTEC_7887 0x8778 +#define PCI_DEVICE_ID_ADAPTEC_7888 0x8878 +#define PCI_DEVICE_ID_ADAPTEC_1030 0x8b78 + +#define PCI_VENDOR_ID_ADAPTEC2 0x9005 +#define PCI_DEVICE_ID_ADAPTEC2_2940U2 0x0010 +#define PCI_DEVICE_ID_ADAPTEC2_2930U2 0x0011 +#define PCI_DEVICE_ID_ADAPTEC2_7890B 0x0013 +#define PCI_DEVICE_ID_ADAPTEC2_7890 0x001f +#define PCI_DEVICE_ID_ADAPTEC2_3940U2 0x0050 +#define PCI_DEVICE_ID_ADAPTEC2_3950U2D 0x0051 +#define PCI_DEVICE_ID_ADAPTEC2_7896 0x005f +#define PCI_DEVICE_ID_ADAPTEC2_7892A 0x0080 +#define PCI_DEVICE_ID_ADAPTEC2_7892B 0x0081 +#define PCI_DEVICE_ID_ADAPTEC2_7892D 0x0083 +#define PCI_DEVICE_ID_ADAPTEC2_7892P 0x008f +#define PCI_DEVICE_ID_ADAPTEC2_7899A 0x00c0 +#define PCI_DEVICE_ID_ADAPTEC2_7899B 0x00c1 +#define PCI_DEVICE_ID_ADAPTEC2_7899D 0x00c3 +#define PCI_DEVICE_ID_ADAPTEC2_7899P 0x00cf + +#define PCI_VENDOR_ID_ATRONICS 0x907f +#define PCI_DEVICE_ID_ATRONICS_2015 0x2015 + +#define PCI_VENDOR_ID_HOLTEK 0x9412 +#define PCI_DEVICE_ID_HOLTEK_6565 0x6565 + +#define PCI_VENDOR_ID_NETMOS 0x9710 +#define PCI_DEVICE_ID_NETMOS_9735 0x9735 +#define PCI_DEVICE_ID_NETMOS_9835 0x9835 + +#define PCI_SUBVENDOR_ID_EXSYS 0xd84d +#define PCI_SUBDEVICE_ID_EXSYS_4014 0x4014 + +#define PCI_VENDOR_ID_TIGERJET 0xe159 +#define PCI_DEVICE_ID_TIGERJET_300 0x0001 +#define PCI_DEVICE_ID_TIGERJET_100 0x0002 + +#define PCI_VENDOR_ID_ARK 0xedd8 +#define PCI_DEVICE_ID_ARK_STING 0xa091 +#define PCI_DEVICE_ID_ARK_STINGARK 0xa099 +#define PCI_DEVICE_ID_ARK_2000MT 0xa0a1 + +#define PCI_VENDOR_ID_MICROGATE 0x13c0 +#define PCI_DEVICE_ID_MICROGATE_USC 0x0010 +#define PCI_DEVICE_ID_MICROGATE_SCC 0x0020 +#define PCI_DEVICE_ID_MICROGATE_SCA 0x0030 diff --git a/xen-2.4.16/include/xeno/pkt_sched.h b/xen-2.4.16/include/xeno/pkt_sched.h new file mode 100644 index 0000000000..87b1a08e52 --- /dev/null +++ b/xen-2.4.16/include/xeno/pkt_sched.h @@ -0,0 +1,816 @@ +#ifndef __NET_PKT_SCHED_H +#define __NET_PKT_SCHED_H + +#define PSCHED_GETTIMEOFDAY 1 +#define PSCHED_JIFFIES 2 +#define PSCHED_CPU 3 + +#define PSCHED_CLOCK_SOURCE PSCHED_JIFFIES + +#include +#include +//#include + +#ifdef CONFIG_X86_TSC +#include +#endif + +struct rtattr; +struct Qdisc; + +struct qdisc_walker +{ + int stop; + int skip; + int count; + int (*fn)(struct Qdisc *, unsigned long cl, struct qdisc_walker *); +}; + +struct Qdisc_class_ops +{ + /* Child qdisc manipulation */ + int (*graft)(struct Qdisc *, unsigned long cl, struct Qdisc *, struct Qdisc **); + struct Qdisc * (*leaf)(struct Qdisc *, unsigned long cl); + + /* Class manipulation routines */ + unsigned long (*get)(struct Qdisc *, u32 classid); + void (*put)(struct Qdisc *, unsigned long); + int (*change)(struct Qdisc *, u32, u32, struct rtattr **, unsigned long *); + int (*delete)(struct Qdisc *, unsigned long); + void (*walk)(struct Qdisc *, struct qdisc_walker * arg); + +#if 0 + /* Filter manipulation */ + struct tcf_proto ** (*tcf_chain)(struct Qdisc *, unsigned long); + unsigned long (*bind_tcf)(struct Qdisc *, unsigned long, u32 classid); + void (*unbind_tcf)(struct Qdisc *, unsigned long); +#endif +}; + +struct Qdisc_ops +{ + struct Qdisc_ops *next; + struct Qdisc_class_ops *cl_ops; + char id[IFNAMSIZ]; + int priv_size; + + int (*enqueue)(struct sk_buff *, struct Qdisc *); + struct sk_buff * (*dequeue)(struct Qdisc *); + int (*requeue)(struct sk_buff *, struct Qdisc *); + int (*drop)(struct Qdisc *); + + int (*init)(struct Qdisc *, struct rtattr *arg); + void (*reset)(struct Qdisc *); + void (*destroy)(struct Qdisc *); + int (*change)(struct Qdisc *, struct rtattr *arg); + + int (*dump)(struct Qdisc *, struct sk_buff *); +}; + +extern rwlock_t qdisc_tree_lock; + +struct Qdisc +{ + int (*enqueue)(struct sk_buff *skb, struct Qdisc *dev); + struct sk_buff * (*dequeue)(struct Qdisc *dev); + unsigned flags; +#define TCQ_F_BUILTIN 1 +#define TCQ_F_THROTTLED 2 +#define TCQ_F_INGRES 4 + struct Qdisc_ops *ops; + struct Qdisc *next; + u32 handle; + atomic_t refcnt; + struct sk_buff_head q; + struct net_device *dev; + + //struct tc_stats stats; + int (*reshape_fail)(struct sk_buff *skb, struct Qdisc *q); + + /* This field is deprecated, but it is still used by CBQ + * and it will live until better solution will be invented. + */ + struct Qdisc *__parent; + + char data[0]; +}; + +struct qdisc_rate_table +{ + //struct tc_ratespec rate; + u32 data[256]; + struct qdisc_rate_table *next; + int refcnt; +}; + +static inline void sch_tree_lock(struct Qdisc *q) +{ + write_lock(&qdisc_tree_lock); + spin_lock_bh(&q->dev->queue_lock); +} + +static inline void sch_tree_unlock(struct Qdisc *q) +{ + spin_unlock_bh(&q->dev->queue_lock); + write_unlock(&qdisc_tree_lock); +} + +#if 0 +static inline void tcf_tree_lock(struct tcf_proto *tp) +{ + write_lock(&qdisc_tree_lock); + spin_lock_bh(&tp->q->dev->queue_lock); +} + +static inline void tcf_tree_unlock(struct tcf_proto *tp) +{ + spin_unlock_bh(&tp->q->dev->queue_lock); + write_unlock(&qdisc_tree_lock); +} + +static inline unsigned long +cls_set_class(struct tcf_proto *tp, unsigned long *clp, unsigned long cl) +{ + unsigned long old_cl; + + tcf_tree_lock(tp); + old_cl = *clp; + *clp = cl; + tcf_tree_unlock(tp); + return old_cl; +} + +static inline unsigned long +__cls_set_class(unsigned long *clp, unsigned long cl) +{ + unsigned long old_cl; + + old_cl = *clp; + *clp = cl; + return old_cl; +} +#endif + + +/* + Timer resolution MUST BE < 10% of min_schedulable_packet_size/bandwidth + + Normal IP packet size ~ 512byte, hence: + + 0.5Kbyte/1Mbyte/sec = 0.5msec, so that we need 50usec timer for + 10Mbit ethernet. + + 10msec resolution -> <50Kbit/sec. + + The result: [34]86 is not good choice for QoS router :-( + + The things are not so bad, because we may use artifical + clock evaluated by integration of network data flow + in the most critical places. + + Note: we do not use fastgettimeofday. + The reason is that, when it is not the same thing as + gettimeofday, it returns invalid timestamp, which is + not updated, when net_bh is active. + + So, use PSCHED_CLOCK_SOURCE = PSCHED_CPU on alpha and pentiums + with rtdsc. And PSCHED_JIFFIES on all other architectures, including [34]86 + and pentiums without rtdsc. + You can use PSCHED_GETTIMEOFDAY on another architectures, + which have fast and precise clock source, but it is too expensive. + */ + +/* General note about internal clock. + + Any clock source returns time intervals, measured in units + close to 1usec. With source PSCHED_GETTIMEOFDAY it is precisely + microseconds, otherwise something close but different chosen to minimize + arithmetic cost. Ratio usec/internal untis in form nominator/denominator + may be read from /proc/net/psched. + */ + + +#if PSCHED_CLOCK_SOURCE == PSCHED_GETTIMEOFDAY + +typedef struct timeval psched_time_t; +typedef long psched_tdiff_t; + +#define PSCHED_GET_TIME(stamp) do_gettimeofday(&(stamp)) +#define PSCHED_US2JIFFIE(usecs) (((usecs)+(1000000/HZ-1))/(1000000/HZ)) + +#define PSCHED_EXPORTLIST EXPORT_SYMBOL(psched_tod_diff); + +#else /* PSCHED_CLOCK_SOURCE != PSCHED_GETTIMEOFDAY */ + +#define PSCHED_EXPORTLIST PSCHED_EXPORTLIST_1 PSCHED_EXPORTLIST_2 + +typedef u64 psched_time_t; +typedef long psched_tdiff_t; + +extern psched_time_t psched_time_base; + +#if PSCHED_CLOCK_SOURCE == PSCHED_JIFFIES + +#if HZ == 100 +#define PSCHED_JSCALE 13 +#elif HZ == 1024 +#define PSCHED_JSCALE 10 +#else +#define PSCHED_JSCALE 0 +#endif + +#define PSCHED_EXPORTLIST_2 + +#if ~0UL == 0xFFFFFFFF + +#define PSCHED_WATCHER unsigned long + +extern PSCHED_WATCHER psched_time_mark; + +#define PSCHED_GET_TIME(stamp) ((stamp) = psched_time_base + (((unsigned long)(jiffies-psched_time_mark))<>PSCHED_JSCALE) + +#elif PSCHED_CLOCK_SOURCE == PSCHED_CPU + +extern psched_tdiff_t psched_clock_per_hz; +extern int psched_clock_scale; + +#define PSCHED_EXPORTLIST_2 EXPORT_SYMBOL(psched_clock_per_hz); \ + EXPORT_SYMBOL(psched_clock_scale); + +#define PSCHED_US2JIFFIE(delay) (((delay)+psched_clock_per_hz-1)/psched_clock_per_hz) + +#ifdef CONFIG_X86_TSC + +#define PSCHED_GET_TIME(stamp) \ +({ u64 __cur; \ + rdtscll(__cur); \ + (stamp) = __cur>>psched_clock_scale; \ +}) + +#define PSCHED_EXPORTLIST_1 + +#elif defined (__alpha__) + +#define PSCHED_WATCHER u32 + +extern PSCHED_WATCHER psched_time_mark; + +#define PSCHED_GET_TIME(stamp) \ +({ u32 __res; \ + __asm__ __volatile__ ("rpcc %0" : "r="(__res)); \ + if (__res <= psched_time_mark) psched_time_base += 0x100000000UL; \ + psched_time_mark = __res; \ + (stamp) = (psched_time_base + __res)>>psched_clock_scale; \ +}) + +#define PSCHED_EXPORTLIST_1 EXPORT_SYMBOL(psched_time_base); \ + EXPORT_SYMBOL(psched_time_mark); + +#else + +#error PSCHED_CLOCK_SOURCE=PSCHED_CPU is not supported on this arch. + +#endif /* ARCH */ + +#endif /* PSCHED_CLOCK_SOURCE == PSCHED_JIFFIES */ + +#endif /* PSCHED_CLOCK_SOURCE == PSCHED_GETTIMEOFDAY */ + +#if PSCHED_CLOCK_SOURCE == PSCHED_GETTIMEOFDAY +#define PSCHED_TDIFF(tv1, tv2) \ +({ \ + int __delta_sec = (tv1).tv_sec - (tv2).tv_sec; \ + int __delta = (tv1).tv_usec - (tv2).tv_usec; \ + if (__delta_sec) { \ + switch (__delta_sec) { \ + default: \ + __delta = 0; \ + case 2: \ + __delta += 1000000; \ + case 1: \ + __delta += 1000000; \ + } \ + } \ + __delta; \ +}) + +extern int psched_tod_diff(int delta_sec, int bound); + +#define PSCHED_TDIFF_SAFE(tv1, tv2, bound, guard) \ +({ \ + int __delta_sec = (tv1).tv_sec - (tv2).tv_sec; \ + int __delta = (tv1).tv_usec - (tv2).tv_usec; \ + switch (__delta_sec) { \ + default: \ + __delta = psched_tod_diff(__delta_sec, bound); guard; break; \ + case 2: \ + __delta += 1000000; \ + case 1: \ + __delta += 1000000; \ + case 0: ; \ + } \ + __delta; \ +}) + +#define PSCHED_TLESS(tv1, tv2) (((tv1).tv_usec < (tv2).tv_usec && \ + (tv1).tv_sec <= (tv2).tv_sec) || \ + (tv1).tv_sec < (tv2).tv_sec) + +#define PSCHED_TADD2(tv, delta, tv_res) \ +({ \ + int __delta = (tv).tv_usec + (delta); \ + (tv_res).tv_sec = (tv).tv_sec; \ + if (__delta > 1000000) { (tv_res).tv_sec++; __delta -= 1000000; } \ + (tv_res).tv_usec = __delta; \ +}) + +#define PSCHED_TADD(tv, delta) \ +({ \ + (tv).tv_usec += (delta); \ + if ((tv).tv_usec > 1000000) { (tv).tv_sec++; \ + (tv).tv_usec -= 1000000; } \ +}) + +/* Set/check that time is in the "past perfect"; + it depends on concrete representation of system time + */ + +#define PSCHED_SET_PASTPERFECT(t) ((t).tv_sec = 0) +#define PSCHED_IS_PASTPERFECT(t) ((t).tv_sec == 0) + +#define PSCHED_AUDIT_TDIFF(t) ({ if ((t) > 2000000) (t) = 2000000; }) + +#else + +#define PSCHED_TDIFF(tv1, tv2) (long)((tv1) - (tv2)) +#define PSCHED_TDIFF_SAFE(tv1, tv2, bound, guard) \ +({ \ + long __delta = (tv1) - (tv2); \ + if ( __delta > (bound)) { __delta = (bound); guard; } \ + __delta; \ +}) + + +#define PSCHED_TLESS(tv1, tv2) ((tv1) < (tv2)) +#define PSCHED_TADD2(tv, delta, tv_res) ((tv_res) = (tv) + (delta)) +#define PSCHED_TADD(tv, delta) ((tv) += (delta)) +#define PSCHED_SET_PASTPERFECT(t) ((t) = 0) +#define PSCHED_IS_PASTPERFECT(t) ((t) == 0) +#define PSCHED_AUDIT_TDIFF(t) + +#endif + +struct tcf_police +{ + struct tcf_police *next; + int refcnt; + u32 index; + + int action; + int result; + u32 ewma_rate; + u32 burst; + u32 mtu; + + u32 toks; + u32 ptoks; + psched_time_t t_c; + spinlock_t lock; + struct qdisc_rate_table *R_tab; + struct qdisc_rate_table *P_tab; + + //struct tc_stats stats; +}; + +//extern int qdisc_copy_stats(struct sk_buff *skb, struct tc_stats *st); +extern void tcf_police_destroy(struct tcf_police *p); +extern struct tcf_police * tcf_police_locate(struct rtattr *rta, struct rtattr *est); +extern int tcf_police_dump(struct sk_buff *skb, struct tcf_police *p); +extern int tcf_police(struct sk_buff *skb, struct tcf_police *p); + +static inline void tcf_police_release(struct tcf_police *p) +{ + if (p && --p->refcnt == 0) + tcf_police_destroy(p); +} + +extern struct Qdisc noop_qdisc; +extern struct Qdisc_ops noop_qdisc_ops; +extern struct Qdisc_ops pfifo_qdisc_ops; +extern struct Qdisc_ops bfifo_qdisc_ops; + +int register_qdisc(struct Qdisc_ops *qops); +int unregister_qdisc(struct Qdisc_ops *qops); +struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle); +struct Qdisc *qdisc_lookup_class(struct net_device *dev, u32 handle); +void dev_init_scheduler(struct net_device *dev); +void dev_shutdown(struct net_device *dev); +void dev_activate(struct net_device *dev); +void dev_deactivate(struct net_device *dev); +void qdisc_reset(struct Qdisc *qdisc); +void qdisc_destroy(struct Qdisc *qdisc); +struct Qdisc * qdisc_create_dflt(struct net_device *dev, struct Qdisc_ops *ops); +//int qdisc_new_estimator(struct tc_stats *stats, struct rtattr *opt); +//void qdisc_kill_estimator(struct tc_stats *stats); +//struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, struct rtattr *tab); +void qdisc_put_rtab(struct qdisc_rate_table *tab); +int teql_init(void); +int tc_filter_init(void); +int pktsched_init(void); + +extern int qdisc_restart(struct net_device *dev); + +static inline void qdisc_run(struct net_device *dev) +{ + while (!netif_queue_stopped(dev) && + qdisc_restart(dev)<0) + /* NOTHING */; +} + +/* Calculate maximal size of packet seen by hard_start_xmit + routine of this device. + */ +static inline unsigned psched_mtu(struct net_device *dev) +{ + unsigned mtu = dev->mtu; + return dev->hard_header ? mtu + dev->hard_header_len : mtu; +} + + +/****************************************************************************** + * XXXXXXXXX Rest of this file is real linux/pkt_sched.h XXXXX + */ + + +/* Logical priority bands not depending on specific packet scheduler. + Every scheduler will map them to real traffic classes, if it has + no more precise mechanism to classify packets. + + These numbers have no special meaning, though their coincidence + with obsolete IPv6 values is not occasional :-). New IPv6 drafts + preferred full anarchy inspired by diffserv group. + + Note: TC_PRIO_BESTEFFORT does not mean that it is the most unhappy + class, actually, as rule it will be handled with more care than + filler or even bulk. + */ + +#define TC_PRIO_BESTEFFORT 0 +#define TC_PRIO_FILLER 1 +#define TC_PRIO_BULK 2 +#define TC_PRIO_INTERACTIVE_BULK 4 +#define TC_PRIO_INTERACTIVE 6 +#define TC_PRIO_CONTROL 7 + +#define TC_PRIO_MAX 15 + +/* Generic queue statistics, available for all the elements. + Particular schedulers may have also their private records. + */ + +struct tc_stats +{ + __u64 bytes; /* NUmber of enqueues bytes */ + __u32 packets; /* Number of enqueued packets */ + __u32 drops; /* Packets dropped because of lack of resources */ + __u32 overlimits; /* Number of throttle events when this + * flow goes out of allocated bandwidth */ + __u32 bps; /* Current flow byte rate */ + __u32 pps; /* Current flow packet rate */ + __u32 qlen; + __u32 backlog; +#ifdef __KERNEL__ + spinlock_t *lock; +#endif +}; + +struct tc_estimator +{ + char interval; + unsigned char ewma_log; +}; + +/* "Handles" + --------- + + All the traffic control objects have 32bit identifiers, or "handles". + + They can be considered as opaque numbers from user API viewpoint, + but actually they always consist of two fields: major and + minor numbers, which are interpreted by kernel specially, + that may be used by applications, though not recommended. + + F.e. qdisc handles always have minor number equal to zero, + classes (or flows) have major equal to parent qdisc major, and + minor uniquely identifying class inside qdisc. + + Macros to manipulate handles: + */ + +#define TC_H_MAJ_MASK (0xFFFF0000U) +#define TC_H_MIN_MASK (0x0000FFFFU) +#define TC_H_MAJ(h) ((h)&TC_H_MAJ_MASK) +#define TC_H_MIN(h) ((h)&TC_H_MIN_MASK) +#define TC_H_MAKE(maj,min) (((maj)&TC_H_MAJ_MASK)|((min)&TC_H_MIN_MASK)) + +#define TC_H_UNSPEC (0U) +#define TC_H_ROOT (0xFFFFFFFFU) +#define TC_H_INGRESS (0xFFFFFFF1U) + +struct tc_ratespec +{ + unsigned char cell_log; + unsigned char __reserved; + unsigned short feature; + short addend; + unsigned short mpu; + __u32 rate; +}; + +/* FIFO section */ + +struct tc_fifo_qopt +{ + __u32 limit; /* Queue length: bytes for bfifo, packets for pfifo */ +}; + +/* PRIO section */ + +#define TCQ_PRIO_BANDS 16 + +struct tc_prio_qopt +{ + int bands; /* Number of bands */ + __u8 priomap[TC_PRIO_MAX+1]; /* Map: logical priority -> PRIO band */ +}; + +/* CSZ section */ + +struct tc_csz_qopt +{ + int flows; /* Maximal number of guaranteed flows */ + unsigned char R_log; /* Fixed point position for round number */ + unsigned char delta_log; /* Log of maximal managed time interval */ + __u8 priomap[TC_PRIO_MAX+1]; /* Map: logical priority -> CSZ band */ +}; + +struct tc_csz_copt +{ + struct tc_ratespec slice; + struct tc_ratespec rate; + struct tc_ratespec peakrate; + __u32 limit; + __u32 buffer; + __u32 mtu; +}; + +enum +{ + TCA_CSZ_UNSPEC, + TCA_CSZ_PARMS, + TCA_CSZ_RTAB, + TCA_CSZ_PTAB, +}; + +/* TBF section */ + +struct tc_tbf_qopt +{ + struct tc_ratespec rate; + struct tc_ratespec peakrate; + __u32 limit; + __u32 buffer; + __u32 mtu; +}; + +enum +{ + TCA_TBF_UNSPEC, + TCA_TBF_PARMS, + TCA_TBF_RTAB, + TCA_TBF_PTAB, +}; + + +/* TEQL section */ + +/* TEQL does not require any parameters */ + +/* SFQ section */ + +struct tc_sfq_qopt +{ + unsigned quantum; /* Bytes per round allocated to flow */ + int perturb_period; /* Period of hash perturbation */ + __u32 limit; /* Maximal packets in queue */ + unsigned divisor; /* Hash divisor */ + unsigned flows; /* Maximal number of flows */ +}; + +/* + * NOTE: limit, divisor and flows are hardwired to code at the moment. + * + * limit=flows=128, divisor=1024; + * + * The only reason for this is efficiency, it is possible + * to change these parameters in compile time. + */ + +/* RED section */ + +enum +{ + TCA_RED_UNSPEC, + TCA_RED_PARMS, + TCA_RED_STAB, +}; + +struct tc_red_qopt +{ + __u32 limit; /* HARD maximal queue length (bytes) */ + __u32 qth_min; /* Min average length threshold (bytes) */ + __u32 qth_max; /* Max average length threshold (bytes) */ + unsigned char Wlog; /* log(W) */ + unsigned char Plog; /* log(P_max/(qth_max-qth_min)) */ + unsigned char Scell_log; /* cell size for idle damping */ + unsigned char flags; +#define TC_RED_ECN 1 +}; + +struct tc_red_xstats +{ + __u32 early; /* Early drops */ + __u32 pdrop; /* Drops due to queue limits */ + __u32 other; /* Drops due to drop() calls */ + __u32 marked; /* Marked packets */ +}; + +/* GRED section */ + +#define MAX_DPs 16 + +enum +{ + TCA_GRED_UNSPEC, + TCA_GRED_PARMS, + TCA_GRED_STAB, + TCA_GRED_DPS, +}; + +#define TCA_SET_OFF TCA_GRED_PARMS +struct tc_gred_qopt +{ + __u32 limit; /* HARD maximal queue length (bytes) +*/ + __u32 qth_min; /* Min average length threshold (bytes) +*/ + __u32 qth_max; /* Max average length threshold (bytes) +*/ + __u32 DP; /* upto 2^32 DPs */ + __u32 backlog; + __u32 qave; + __u32 forced; + __u32 early; + __u32 other; + __u32 pdrop; + + unsigned char Wlog; /* log(W) */ + unsigned char Plog; /* log(P_max/(qth_max-qth_min)) */ + unsigned char Scell_log; /* cell size for idle damping */ + __u8 prio; /* prio of this VQ */ + __u32 packets; + __u32 bytesin; +}; +/* gred setup */ +struct tc_gred_sopt +{ + __u32 DPs; + __u32 def_DP; + __u8 grio; +}; + +/* CBQ section */ + +#define TC_CBQ_MAXPRIO 8 +#define TC_CBQ_MAXLEVEL 8 +#define TC_CBQ_DEF_EWMA 5 + +struct tc_cbq_lssopt +{ + unsigned char change; + unsigned char flags; +#define TCF_CBQ_LSS_BOUNDED 1 +#define TCF_CBQ_LSS_ISOLATED 2 + unsigned char ewma_log; + unsigned char level; +#define TCF_CBQ_LSS_FLAGS 1 +#define TCF_CBQ_LSS_EWMA 2 +#define TCF_CBQ_LSS_MAXIDLE 4 +#define TCF_CBQ_LSS_MINIDLE 8 +#define TCF_CBQ_LSS_OFFTIME 0x10 +#define TCF_CBQ_LSS_AVPKT 0x20 + __u32 maxidle; + __u32 minidle; + __u32 offtime; + __u32 avpkt; +}; + +struct tc_cbq_wrropt +{ + unsigned char flags; + unsigned char priority; + unsigned char cpriority; + unsigned char __reserved; + __u32 allot; + __u32 weight; +}; + +struct tc_cbq_ovl +{ + unsigned char strategy; +#define TC_CBQ_OVL_CLASSIC 0 +#define TC_CBQ_OVL_DELAY 1 +#define TC_CBQ_OVL_LOWPRIO 2 +#define TC_CBQ_OVL_DROP 3 +#define TC_CBQ_OVL_RCLASSIC 4 + unsigned char priority2; + __u32 penalty; +}; + +struct tc_cbq_police +{ + unsigned char police; + unsigned char __res1; + unsigned short __res2; +}; + +struct tc_cbq_fopt +{ + __u32 split; + __u32 defmap; + __u32 defchange; +}; + +struct tc_cbq_xstats +{ + __u32 borrows; + __u32 overactions; + __s32 avgidle; + __s32 undertime; +}; + +enum +{ + TCA_CBQ_UNSPEC, + TCA_CBQ_LSSOPT, + TCA_CBQ_WRROPT, + TCA_CBQ_FOPT, + TCA_CBQ_OVL_STRATEGY, + TCA_CBQ_RATE, + TCA_CBQ_RTAB, + TCA_CBQ_POLICE, +}; + +#define TCA_CBQ_MAX TCA_CBQ_POLICE + +/* dsmark section */ + +enum { + TCA_DSMARK_UNSPEC, + TCA_DSMARK_INDICES, + TCA_DSMARK_DEFAULT_INDEX, + TCA_DSMARK_SET_TC_INDEX, + TCA_DSMARK_MASK, + TCA_DSMARK_VALUE +}; + +#define TCA_DSMARK_MAX TCA_DSMARK_VALUE + +/* ATM section */ + +enum { + TCA_ATM_UNSPEC, + TCA_ATM_FD, /* file/socket descriptor */ + TCA_ATM_PTR, /* pointer to descriptor - later */ + TCA_ATM_HDR, /* LL header */ + TCA_ATM_EXCESS, /* excess traffic class (0 for CLP) */ + TCA_ATM_ADDR, /* PVC address (for output only) */ + TCA_ATM_STATE /* VC state (ATM_VS_*; for output only) */ +}; + +#define TCA_ATM_MAX TCA_ATM_STATE + +#endif diff --git a/xen-2.4.16/include/xeno/prefetch.h b/xen-2.4.16/include/xeno/prefetch.h new file mode 100644 index 0000000000..8d7d3ffeb4 --- /dev/null +++ b/xen-2.4.16/include/xeno/prefetch.h @@ -0,0 +1,60 @@ +/* + * Generic cache management functions. Everything is arch-specific, + * but this header exists to make sure the defines/functions can be + * used in a generic way. + * + * 2000-11-13 Arjan van de Ven + * + */ + +#ifndef _LINUX_PREFETCH_H +#define _LINUX_PREFETCH_H + +#include +#include + +/* + prefetch(x) attempts to pre-emptively get the memory pointed to + by address "x" into the CPU L1 cache. + prefetch(x) should not cause any kind of exception, prefetch(0) is + specifically ok. + + prefetch() should be defined by the architecture, if not, the + #define below provides a no-op define. + + There are 3 prefetch() macros: + + prefetch(x) - prefetches the cacheline at "x" for read + prefetchw(x) - prefetches the cacheline at "x" for write + spin_lock_prefetch(x) - prefectches the spinlock *x for taking + + there is also PREFETCH_STRIDE which is the architecure-prefered + "lookahead" size for prefetching streamed operations. + +*/ + +/* + * These cannot be do{}while(0) macros. See the mental gymnastics in + * the loop macro. + */ + +#ifndef ARCH_HAS_PREFETCH +#define ARCH_HAS_PREFETCH +static inline void prefetch(const void *x) {;} +#endif + +#ifndef ARCH_HAS_PREFETCHW +#define ARCH_HAS_PREFETCHW +static inline void prefetchw(const void *x) {;} +#endif + +#ifndef ARCH_HAS_SPINLOCK_PREFETCH +#define ARCH_HAS_SPINLOCK_PREFETCH +#define spin_lock_prefetch(x) prefetchw(x) +#endif + +#ifndef PREFETCH_STRIDE +#define PREFETCH_STRIDE (4*L1_CACHE_BYTES) +#endif + +#endif diff --git a/xen-2.4.16/include/xeno/sched.h b/xen-2.4.16/include/xeno/sched.h new file mode 100644 index 0000000000..ae3457ce3f --- /dev/null +++ b/xen-2.4.16/include/xeno/sched.h @@ -0,0 +1,222 @@ +#ifndef _LINUX_SCHED_H +#define _LINUX_SCHED_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +extern unsigned long volatile jiffies; +extern rwlock_t tasklist_lock; + +#include + +struct mm_struct { + unsigned long cpu_vm_mask; + pagetable_t pagetable; +}; + +extern struct mm_struct init_mm; +#define IDLE0_MM \ +{ \ + cpu_vm_mask: 0, \ + pagetable: mk_pagetable((unsigned long)idle0_pg_table) \ +} + +#define _HYP_EVENT_NEED_RESCHED 0 +#define _HYP_EVENT_NET_RX 1 +#define _HYP_EVENT_DIE 2 + +#define PF_DONEFPUINIT 0x1 /* Has the FPU been initialised for this task? */ +#define PF_USEDFPU 0x2 /* Has this task used the FPU since last save? */ +#define PF_GUEST_STTS 0x4 /* Has the guest OS requested 'stts'? */ + +#include +#include + +struct task_struct { + int processor; + int state, hyp_events; + unsigned int domain; + + /* An unsafe pointer into a shared data area. */ + shared_info_t *shared_info; + + /* Pointer to this guest's virtual interfaces. */ + /* network */ + net_ring_t *net_ring_base; + net_vif_t *net_vif_list[MAX_GUEST_VIFS]; + int num_net_vifs; + /* block io */ + blk_ring_t *blk_ring_base; + + int has_cpu, policy, counter; + + struct list_head run_list; + + struct mm_struct mm; + + mm_segment_t addr_limit; /* thread address space: + 0-0xBFFFFFFF for user-thead + 0-0xFFFFFFFF for kernel-thread + */ + + /* + * active_mm stays for now. It's entangled in the tricky TLB flushing + * stuff which I haven't addressed yet. It stays until I'm man enough + * to venture in. + */ + struct mm_struct *active_mm; + struct thread_struct thread; + struct task_struct *prev_task, *next_task; + + /* index into frame_table threading pages belonging to this + * domain together + */ + unsigned long pg_head; + unsigned long pg_tail; + unsigned int tot_pages; + + unsigned long flags; +}; + +#define TASK_RUNNING 0 +#define TASK_INTERRUPTIBLE 1 +#define TASK_UNINTERRUPTIBLE 2 +#define TASK_STOPPED 4 +#define TASK_DYING 8 + +#define SCHED_YIELD 0x10 + +#include /* for KERNEL_DS */ + +#define IDLE0_TASK(_t) \ +{ \ + processor: 0, \ + domain: IDLE_DOMAIN_ID, \ + state: TASK_RUNNING, \ + has_cpu: 0, \ + mm: IDLE0_MM, \ + addr_limit: KERNEL_DS, \ + active_mm: &idle0_task.mm, \ + thread: INIT_THREAD, \ + prev_task: &(_t), \ + next_task: &(_t) \ +} + +#define IDLE_DOMAIN_ID (~0) +#define is_idle_task(_p) ((_p)->domain == IDLE_DOMAIN_ID) + +#ifndef IDLE0_TASK_SIZE +#define IDLE0_TASK_SIZE 2048*sizeof(long) +#endif + +union task_union { + struct task_struct task; + unsigned long stack[IDLE0_TASK_SIZE/sizeof(long)]; +}; + +extern union task_union idle0_task_union; +extern struct task_struct first_task_struct; + +extern struct task_struct *do_newdomain(void); +extern int setup_guestos(struct task_struct *p, dom0_newdomain_t *params); + +struct task_struct *find_domain_by_id(unsigned int dom); +extern void release_task(struct task_struct *); +extern void kill_domain(void); +extern void kill_domain_with_errmsg(const char *err); +extern long kill_other_domain(unsigned int dom); + +/* arch/process.c */ +void new_thread(struct task_struct *p, + unsigned long start_pc, + unsigned long start_stack, + unsigned long start_info); +extern void flush_thread(void); +extern void exit_thread(void); + +/* Linux puts these here for some reason! */ +extern int request_irq(unsigned int, + void (*handler)(int, void *, struct pt_regs *), + unsigned long, const char *, void *); +extern void free_irq(unsigned int, void *); + +extern unsigned long wait_init_idle; +#define init_idle() clear_bit(smp_processor_id(), &wait_init_idle); + +#define set_current_state(_s) do { current->state = (_s); } while (0) +#define MAX_SCHEDULE_TIMEOUT LONG_MAX +long schedule_timeout(long timeout); +asmlinkage void schedule(void); + +void reschedule(struct task_struct *p); + +typedef struct schedule_data_st +{ + spinlock_t lock; + struct list_head runqueue; + struct task_struct *prev, *curr; +} __cacheline_aligned schedule_data_t; +extern schedule_data_t schedule_data[NR_CPUS]; + +static inline void __add_to_runqueue(struct task_struct * p) +{ + list_add(&p->run_list, &schedule_data[p->processor].runqueue); +} + + +static inline void __move_last_runqueue(struct task_struct * p) +{ + list_del(&p->run_list); + list_add_tail(&p->run_list, &schedule_data[p->processor].runqueue); +} + + +static inline void __move_first_runqueue(struct task_struct * p) +{ + list_del(&p->run_list); + list_add(&p->run_list, &schedule_data[p->processor].runqueue); +} + +static inline void __del_from_runqueue(struct task_struct * p) +{ + list_del(&p->run_list); + p->run_list.next = NULL; +} + +static inline int __task_on_runqueue(struct task_struct *p) +{ + return (p->run_list.next != NULL); +} + +int wake_up(struct task_struct *p); + +#define signal_pending(_p) ((_p)->hyp_events || \ + (_p)->shared_info->events) + +void domain_init(void); + +void cpu_idle(void); + +#define REMOVE_LINKS(p) do { \ + (p)->next_task->prev_task = (p)->prev_task; \ + (p)->prev_task->next_task = (p)->next_task; \ + } while (0) + +#define SET_LINKS(p) do { \ + (p)->next_task = &idle0_task; \ + (p)->prev_task = idle0_task.prev_task; \ + idle0_task.prev_task->next_task = (p); \ + idle0_task.prev_task = (p); \ + } while (0) + +extern void update_process_times(int user); + +#endif diff --git a/xen-2.4.16/include/xeno/skbuff.h b/xen-2.4.16/include/xeno/skbuff.h new file mode 100644 index 0000000000..4a540d8e0b --- /dev/null +++ b/xen-2.4.16/include/xeno/skbuff.h @@ -0,0 +1,1139 @@ +/* + * Definitions for the 'struct sk_buff' memory handlers. + * + * Authors: + * Alan Cox, + * Florian La Roche, + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _LINUX_SKBUFF_H +#define _LINUX_SKBUFF_H + +#include +#include +//#include +//#include +#include +#include +#include + +#include +#include +#include +#include +//#include + +#define HAVE_ALLOC_SKB /* For the drivers to know */ +#define HAVE_ALIGNABLE_SKB /* Ditto 8) */ +#define SLAB_SKB /* Slabified skbuffs */ + +#define CHECKSUM_NONE 0 +#define CHECKSUM_HW 1 +#define CHECKSUM_UNNECESSARY 2 + +#define SKB_DATA_ALIGN(X) (((X) + (SMP_CACHE_BYTES-1)) & ~(SMP_CACHE_BYTES-1)) +#define SKB_MAX_ORDER(X,ORDER) (((PAGE_SIZE<<(ORDER)) - (X) - sizeof(struct skb_shared_info))&~(SMP_CACHE_BYTES-1)) +#define SKB_MAX_HEAD(X) (SKB_MAX_ORDER((X),0)) +#define SKB_MAX_ALLOC (SKB_MAX_ORDER(0,2)) + +/* A. Checksumming of received packets by device. + * + * NONE: device failed to checksum this packet. + * skb->csum is undefined. + * + * UNNECESSARY: device parsed packet and wouldbe verified checksum. + * skb->csum is undefined. + * It is bad option, but, unfortunately, many of vendors do this. + * Apparently with secret goal to sell you new device, when you + * will add new protocol to your host. F.e. IPv6. 8) + * + * HW: the most generic way. Device supplied checksum of _all_ + * the packet as seen by netif_rx in skb->csum. + * NOTE: Even if device supports only some protocols, but + * is able to produce some skb->csum, it MUST use HW, + * not UNNECESSARY. + * + * B. Checksumming on output. + * + * NONE: skb is checksummed by protocol or csum is not required. + * + * HW: device is required to csum packet as seen by hard_start_xmit + * from skb->h.raw to the end and to record the checksum + * at skb->h.raw+skb->csum. + * + * Device must show its capabilities in dev->features, set + * at device setup time. + * NETIF_F_HW_CSUM - it is clever device, it is able to checksum + * everything. + * NETIF_F_NO_CSUM - loopback or reliable single hop media. + * NETIF_F_IP_CSUM - device is dumb. It is able to csum only + * TCP/UDP over IPv4. Sigh. Vendors like this + * way by an unknown reason. Though, see comment above + * about CHECKSUM_UNNECESSARY. 8) + * + * Any questions? No questions, good. --ANK + */ + +#ifdef __i386__ +#define NET_CALLER(arg) (*(((void**)&arg)-1)) +#else +#define NET_CALLER(arg) __builtin_return_address(0) +#endif + +#ifdef CONFIG_NETFILTER +struct nf_conntrack { + atomic_t use; + void (*destroy)(struct nf_conntrack *); +}; + +struct nf_ct_info { + struct nf_conntrack *master; +}; +#endif + +struct sk_buff_head { + /* These two members must be first. */ + struct sk_buff * next; + struct sk_buff * prev; + + __u32 qlen; + spinlock_t lock; +}; + +struct sk_buff; + +#define MAX_SKB_FRAGS 6 + +typedef struct skb_frag_struct skb_frag_t; + +struct skb_frag_struct +{ + struct pfn_info *page; + __u16 page_offset; + __u16 size; +}; + +/* This data is invariant across clones and lives at + * the end of the header data, ie. at skb->end. + */ +struct skb_shared_info { + atomic_t dataref; + unsigned int nr_frags; + struct sk_buff *frag_list; + skb_frag_t frags[MAX_SKB_FRAGS]; +}; + +struct sk_buff { + /* These two members must be first. */ + struct sk_buff * next; /* Next buffer in list */ + struct sk_buff * prev; /* Previous buffer in list */ + + struct sk_buff_head * list; /* List we are on */ + struct sock *sk; /* Socket we are owned by */ + struct timeval stamp; /* Time we arrived */ + struct net_device *dev; /* Device we arrived on/are leaving by */ + + /* Transport layer header */ + union + { + struct tcphdr *th; + struct udphdr *uh; + struct icmphdr *icmph; + struct igmphdr *igmph; + struct iphdr *ipiph; + struct spxhdr *spxh; + unsigned char *raw; + } h; + + /* Network layer header */ + union + { + struct iphdr *iph; + struct ipv6hdr *ipv6h; + struct arphdr *arph; + struct ipxhdr *ipxh; + unsigned char *raw; + } nh; + + /* Link layer header */ + union + { + struct ethhdr *ethernet; + unsigned char *raw; + } mac; + +// struct dst_entry *dst; + + /* + * This is the control buffer. It is free to use for every + * layer. Please put your private variables there. If you + * want to keep them across layers you have to do a skb_clone() + * first. This is owned by whoever has the skb queued ATM. + */ + char cb[48]; + + unsigned int len; /* Length of actual data */ + unsigned int data_len; + unsigned int csum; /* Checksum */ + unsigned char __unused, /* Dead field, may be reused */ + cloned, /* head may be cloned (check refcnt to be sure). */ + pkt_type, /* Packet class */ + ip_summed; /* Driver fed us an IP checksum */ + __u32 priority; /* Packet queueing priority */ + atomic_t users; /* User count - see datagram.c,tcp.c */ + unsigned short protocol; /* Packet protocol from driver. */ + unsigned short security; /* Security level of packet */ + unsigned int truesize; /* Buffer size */ + + unsigned char *head; /* Head of buffer */ + unsigned char *data; /* Data head pointer */ + unsigned char *tail; /* Tail pointer */ + unsigned char *end; /* End pointer */ + + void (*destructor)(struct sk_buff *); /* Destruct function */ +#ifdef CONFIG_NETFILTER + /* Can be used for communication between hooks. */ + unsigned long nfmark; + /* Cache info */ + __u32 nfcache; + /* Associated connection, if any */ + struct nf_ct_info *nfct; +#ifdef CONFIG_NETFILTER_DEBUG + unsigned int nf_debug; +#endif +#endif /*CONFIG_NETFILTER*/ + +#if defined(CONFIG_HIPPI) + union{ + __u32 ifield; + } private; +#endif + +#ifdef CONFIG_NET_SCHED + __u32 tc_index; /* traffic control index */ +#endif +}; + +#define SK_WMEM_MAX 65535 +#define SK_RMEM_MAX 65535 + +#ifdef __KERNEL__ +/* + * Handling routines are only of interest to the kernel + */ +#include + +#include + +extern void __kfree_skb(struct sk_buff *skb); +extern struct sk_buff * alloc_skb(unsigned int size, int priority); +extern void kfree_skbmem(struct sk_buff *skb); +extern struct sk_buff * skb_clone(struct sk_buff *skb, int priority); +extern struct sk_buff * skb_copy(const struct sk_buff *skb, int priority); +extern struct sk_buff * pskb_copy(struct sk_buff *skb, int gfp_mask); +extern int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, int gfp_mask); +extern struct sk_buff * skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom); +extern struct sk_buff * skb_copy_expand(const struct sk_buff *skb, + int newheadroom, + int newtailroom, + int priority); +#define dev_kfree_skb(a) kfree_skb(a) +extern void skb_over_panic(struct sk_buff *skb, int len, void *here); +extern void skb_under_panic(struct sk_buff *skb, int len, void *here); + +/* Internal */ +#define skb_shinfo(SKB) ((struct skb_shared_info *)((SKB)->end)) + +/** + * skb_queue_empty - check if a queue is empty + * @list: queue head + * + * Returns true if the queue is empty, false otherwise. + */ + +static inline int skb_queue_empty(struct sk_buff_head *list) +{ + return (list->next == (struct sk_buff *) list); +} + +/** + * skb_get - reference buffer + * @skb: buffer to reference + * + * Makes another reference to a socket buffer and returns a pointer + * to the buffer. + */ + +static inline struct sk_buff *skb_get(struct sk_buff *skb) +{ + atomic_inc(&skb->users); + return skb; +} + +/* + * If users==1, we are the only owner and are can avoid redundant + * atomic change. + */ + +/** + * kfree_skb - free an sk_buff + * @skb: buffer to free + * + * Drop a reference to the buffer and free it if the usage count has + * hit zero. + */ + +static inline void kfree_skb(struct sk_buff *skb) +{ + if (atomic_read(&skb->users) == 1 || atomic_dec_and_test(&skb->users)) + __kfree_skb(skb); +} + +/* Use this if you didn't touch the skb state [for fast switching] */ +static inline void kfree_skb_fast(struct sk_buff *skb) +{ + if (atomic_read(&skb->users) == 1 || atomic_dec_and_test(&skb->users)) + kfree_skbmem(skb); +} + +/** + * skb_cloned - is the buffer a clone + * @skb: buffer to check + * + * Returns true if the buffer was generated with skb_clone() and is + * one of multiple shared copies of the buffer. Cloned buffers are + * shared data so must not be written to under normal circumstances. + */ + +static inline int skb_cloned(struct sk_buff *skb) +{ + return skb->cloned && atomic_read(&skb_shinfo(skb)->dataref) != 1; +} + +/** + * skb_shared - is the buffer shared + * @skb: buffer to check + * + * Returns true if more than one person has a reference to this + * buffer. + */ + +static inline int skb_shared(struct sk_buff *skb) +{ + return (atomic_read(&skb->users) != 1); +} + +/** + * skb_share_check - check if buffer is shared and if so clone it + * @skb: buffer to check + * @pri: priority for memory allocation + * + * If the buffer is shared the buffer is cloned and the old copy + * drops a reference. A new clone with a single reference is returned. + * If the buffer is not shared the original buffer is returned. When + * being called from interrupt status or with spinlocks held pri must + * be GFP_ATOMIC. + * + * NULL is returned on a memory allocation failure. + */ + +static inline struct sk_buff *skb_share_check(struct sk_buff *skb, int pri) +{ + if (skb_shared(skb)) { + struct sk_buff *nskb; + nskb = skb_clone(skb, pri); + kfree_skb(skb); + return nskb; + } + return skb; +} + + +/* + * Copy shared buffers into a new sk_buff. We effectively do COW on + * packets to handle cases where we have a local reader and forward + * and a couple of other messy ones. The normal one is tcpdumping + * a packet thats being forwarded. + */ + +/** + * skb_unshare - make a copy of a shared buffer + * @skb: buffer to check + * @pri: priority for memory allocation + * + * If the socket buffer is a clone then this function creates a new + * copy of the data, drops a reference count on the old copy and returns + * the new copy with the reference count at 1. If the buffer is not a clone + * the original buffer is returned. When called with a spinlock held or + * from interrupt state @pri must be %GFP_ATOMIC + * + * %NULL is returned on a memory allocation failure. + */ + +static inline struct sk_buff *skb_unshare(struct sk_buff *skb, int pri) +{ + struct sk_buff *nskb; + if(!skb_cloned(skb)) + return skb; + nskb=skb_copy(skb, pri); + kfree_skb(skb); /* Free our shared copy */ + return nskb; +} + +/** + * skb_peek + * @list_: list to peek at + * + * Peek an &sk_buff. Unlike most other operations you _MUST_ + * be careful with this one. A peek leaves the buffer on the + * list and someone else may run off with it. You must hold + * the appropriate locks or have a private queue to do this. + * + * Returns %NULL for an empty list or a pointer to the head element. + * The reference count is not incremented and the reference is therefore + * volatile. Use with caution. + */ + +static inline struct sk_buff *skb_peek(struct sk_buff_head *list_) +{ + struct sk_buff *list = ((struct sk_buff *)list_)->next; + if (list == (struct sk_buff *)list_) + list = NULL; + return list; +} + +/** + * skb_peek_tail + * @list_: list to peek at + * + * Peek an &sk_buff. Unlike most other operations you _MUST_ + * be careful with this one. A peek leaves the buffer on the + * list and someone else may run off with it. You must hold + * the appropriate locks or have a private queue to do this. + * + * Returns %NULL for an empty list or a pointer to the tail element. + * The reference count is not incremented and the reference is therefore + * volatile. Use with caution. + */ + +static inline struct sk_buff *skb_peek_tail(struct sk_buff_head *list_) +{ + struct sk_buff *list = ((struct sk_buff *)list_)->prev; + if (list == (struct sk_buff *)list_) + list = NULL; + return list; +} + +/** + * skb_queue_len - get queue length + * @list_: list to measure + * + * Return the length of an &sk_buff queue. + */ + +static inline __u32 skb_queue_len(struct sk_buff_head *list_) +{ + return(list_->qlen); +} + +static inline void skb_queue_head_init(struct sk_buff_head *list) +{ + spin_lock_init(&list->lock); + list->prev = (struct sk_buff *)list; + list->next = (struct sk_buff *)list; + list->qlen = 0; +} + +/* + * Insert an sk_buff at the start of a list. + * + * The "__skb_xxxx()" functions are the non-atomic ones that + * can only be called with interrupts disabled. + */ + +/** + * __skb_queue_head - queue a buffer at the list head + * @list: list to use + * @newsk: buffer to queue + * + * Queue a buffer at the start of a list. This function takes no locks + * and you must therefore hold required locks before calling it. + * + * A buffer cannot be placed on two lists at the same time. + */ + +static inline void __skb_queue_head(struct sk_buff_head *list, struct sk_buff *newsk) +{ + struct sk_buff *prev, *next; + + newsk->list = list; + list->qlen++; + prev = (struct sk_buff *)list; + next = prev->next; + newsk->next = next; + newsk->prev = prev; + next->prev = newsk; + prev->next = newsk; +} + + +/** + * skb_queue_head - queue a buffer at the list head + * @list: list to use + * @newsk: buffer to queue + * + * Queue a buffer at the start of the list. This function takes the + * list lock and can be used safely with other locking &sk_buff functions + * safely. + * + * A buffer cannot be placed on two lists at the same time. + */ + +static inline void skb_queue_head(struct sk_buff_head *list, struct sk_buff *newsk) +{ + unsigned long flags; + + spin_lock_irqsave(&list->lock, flags); + __skb_queue_head(list, newsk); + spin_unlock_irqrestore(&list->lock, flags); +} + +/** + * __skb_queue_tail - queue a buffer at the list tail + * @list: list to use + * @newsk: buffer to queue + * + * Queue a buffer at the end of a list. This function takes no locks + * and you must therefore hold required locks before calling it. + * + * A buffer cannot be placed on two lists at the same time. + */ + + +static inline void __skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk) +{ + struct sk_buff *prev, *next; + + newsk->list = list; + list->qlen++; + next = (struct sk_buff *)list; + prev = next->prev; + newsk->next = next; + newsk->prev = prev; + next->prev = newsk; + prev->next = newsk; +} + +/** + * skb_queue_tail - queue a buffer at the list tail + * @list: list to use + * @newsk: buffer to queue + * + * Queue a buffer at the tail of the list. This function takes the + * list lock and can be used safely with other locking &sk_buff functions + * safely. + * + * A buffer cannot be placed on two lists at the same time. + */ + +static inline void skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk) +{ + unsigned long flags; + + spin_lock_irqsave(&list->lock, flags); + __skb_queue_tail(list, newsk); + spin_unlock_irqrestore(&list->lock, flags); +} + +/** + * __skb_dequeue - remove from the head of the queue + * @list: list to dequeue from + * + * Remove the head of the list. This function does not take any locks + * so must be used with appropriate locks held only. The head item is + * returned or %NULL if the list is empty. + */ + +static inline struct sk_buff *__skb_dequeue(struct sk_buff_head *list) +{ + struct sk_buff *next, *prev, *result; + + prev = (struct sk_buff *) list; + next = prev->next; + result = NULL; + if (next != prev) { + result = next; + next = next->next; + list->qlen--; + next->prev = prev; + prev->next = next; + result->next = NULL; + result->prev = NULL; + result->list = NULL; + } + return result; +} + +/** + * skb_dequeue - remove from the head of the queue + * @list: list to dequeue from + * + * Remove the head of the list. The list lock is taken so the function + * may be used safely with other locking list functions. The head item is + * returned or %NULL if the list is empty. + */ + +static inline struct sk_buff *skb_dequeue(struct sk_buff_head *list) +{ + long flags; + struct sk_buff *result; + + spin_lock_irqsave(&list->lock, flags); + result = __skb_dequeue(list); + spin_unlock_irqrestore(&list->lock, flags); + return result; +} + +/* + * Insert a packet on a list. + */ + +static inline void __skb_insert(struct sk_buff *newsk, + struct sk_buff * prev, struct sk_buff *next, + struct sk_buff_head * list) +{ + newsk->next = next; + newsk->prev = prev; + next->prev = newsk; + prev->next = newsk; + newsk->list = list; + list->qlen++; +} + +/** + * skb_insert - insert a buffer + * @old: buffer to insert before + * @newsk: buffer to insert + * + * Place a packet before a given packet in a list. The list locks are taken + * and this function is atomic with respect to other list locked calls + * A buffer cannot be placed on two lists at the same time. + */ + +static inline void skb_insert(struct sk_buff *old, struct sk_buff *newsk) +{ + unsigned long flags; + + spin_lock_irqsave(&old->list->lock, flags); + __skb_insert(newsk, old->prev, old, old->list); + spin_unlock_irqrestore(&old->list->lock, flags); +} + +/* + * Place a packet after a given packet in a list. + */ + +static inline void __skb_append(struct sk_buff *old, struct sk_buff *newsk) +{ + __skb_insert(newsk, old, old->next, old->list); +} + +/** + * skb_append - append a buffer + * @old: buffer to insert after + * @newsk: buffer to insert + * + * Place a packet after a given packet in a list. The list locks are taken + * and this function is atomic with respect to other list locked calls. + * A buffer cannot be placed on two lists at the same time. + */ + + +static inline void skb_append(struct sk_buff *old, struct sk_buff *newsk) +{ + unsigned long flags; + + spin_lock_irqsave(&old->list->lock, flags); + __skb_append(old, newsk); + spin_unlock_irqrestore(&old->list->lock, flags); +} + +/* + * remove sk_buff from list. _Must_ be called atomically, and with + * the list known.. + */ + +static inline void __skb_unlink(struct sk_buff *skb, struct sk_buff_head *list) +{ + struct sk_buff * next, * prev; + + list->qlen--; + next = skb->next; + prev = skb->prev; + skb->next = NULL; + skb->prev = NULL; + skb->list = NULL; + next->prev = prev; + prev->next = next; +} + +/** + * skb_unlink - remove a buffer from a list + * @skb: buffer to remove + * + * Place a packet after a given packet in a list. The list locks are taken + * and this function is atomic with respect to other list locked calls + * + * Works even without knowing the list it is sitting on, which can be + * handy at times. It also means that THE LIST MUST EXIST when you + * unlink. Thus a list must have its contents unlinked before it is + * destroyed. + */ + +static inline void skb_unlink(struct sk_buff *skb) +{ + struct sk_buff_head *list = skb->list; + + if(list) { + unsigned long flags; + + spin_lock_irqsave(&list->lock, flags); + if(skb->list == list) + __skb_unlink(skb, skb->list); + spin_unlock_irqrestore(&list->lock, flags); + } +} + +/* XXX: more streamlined implementation */ + +/** + * __skb_dequeue_tail - remove from the tail of the queue + * @list: list to dequeue from + * + * Remove the tail of the list. This function does not take any locks + * so must be used with appropriate locks held only. The tail item is + * returned or %NULL if the list is empty. + */ + +static inline struct sk_buff *__skb_dequeue_tail(struct sk_buff_head *list) +{ + struct sk_buff *skb = skb_peek_tail(list); + if (skb) + __skb_unlink(skb, list); + return skb; +} + +/** + * skb_dequeue - remove from the head of the queue + * @list: list to dequeue from + * + * Remove the head of the list. The list lock is taken so the function + * may be used safely with other locking list functions. The tail item is + * returned or %NULL if the list is empty. + */ + +static inline struct sk_buff *skb_dequeue_tail(struct sk_buff_head *list) +{ + long flags; + struct sk_buff *result; + + spin_lock_irqsave(&list->lock, flags); + result = __skb_dequeue_tail(list); + spin_unlock_irqrestore(&list->lock, flags); + return result; +} + +static inline int skb_is_nonlinear(const struct sk_buff *skb) +{ + return skb->data_len; +} + +static inline int skb_headlen(const struct sk_buff *skb) +{ + return skb->len - skb->data_len; +} + +#define SKB_PAGE_ASSERT(skb) do { if (skb_shinfo(skb)->nr_frags) BUG(); } while (0) +#define SKB_FRAG_ASSERT(skb) do { if (skb_shinfo(skb)->frag_list) BUG(); } while (0) +#define SKB_LINEAR_ASSERT(skb) do { if (skb_is_nonlinear(skb)) BUG(); } while (0) + +/* + * Add data to an sk_buff + */ + +static inline unsigned char *__skb_put(struct sk_buff *skb, unsigned int len) +{ + unsigned char *tmp=skb->tail; + SKB_LINEAR_ASSERT(skb); + skb->tail+=len; + skb->len+=len; + return tmp; +} + +/** + * skb_put - add data to a buffer + * @skb: buffer to use + * @len: amount of data to add + * + * This function extends the used data area of the buffer. If this would + * exceed the total buffer size the kernel will panic. A pointer to the + * first byte of the extra data is returned. + */ + +static inline unsigned char *skb_put(struct sk_buff *skb, unsigned int len) +{ + unsigned char *tmp=skb->tail; + SKB_LINEAR_ASSERT(skb); + skb->tail+=len; + skb->len+=len; + if(skb->tail>skb->end) { + skb_over_panic(skb, len, current_text_addr()); + } + return tmp; +} + +static inline unsigned char *__skb_push(struct sk_buff *skb, unsigned int len) +{ + skb->data-=len; + skb->len+=len; + return skb->data; +} + +/** + * skb_push - add data to the start of a buffer + * @skb: buffer to use + * @len: amount of data to add + * + * This function extends the used data area of the buffer at the buffer + * start. If this would exceed the total buffer headroom the kernel will + * panic. A pointer to the first byte of the extra data is returned. + */ + +static inline unsigned char *skb_push(struct sk_buff *skb, unsigned int len) +{ + skb->data-=len; + skb->len+=len; + if(skb->datahead) { + skb_under_panic(skb, len, current_text_addr()); + } + return skb->data; +} + +static inline char *__skb_pull(struct sk_buff *skb, unsigned int len) +{ + skb->len-=len; + if (skb->len < skb->data_len) + BUG(); + return skb->data+=len; +} + +/** + * skb_pull - remove data from the start of a buffer + * @skb: buffer to use + * @len: amount of data to remove + * + * This function removes data from the start of a buffer, returning + * the memory to the headroom. A pointer to the next data in the buffer + * is returned. Once the data has been pulled future pushes will overwrite + * the old data. + */ + +static inline unsigned char * skb_pull(struct sk_buff *skb, unsigned int len) +{ + if (len > skb->len) + return NULL; + return __skb_pull(skb,len); +} + +extern unsigned char * __pskb_pull_tail(struct sk_buff *skb, int delta); + +static inline char *__pskb_pull(struct sk_buff *skb, unsigned int len) +{ + if (len > skb_headlen(skb) && + __pskb_pull_tail(skb, len-skb_headlen(skb)) == NULL) + return NULL; + skb->len -= len; + return skb->data += len; +} + +static inline unsigned char * pskb_pull(struct sk_buff *skb, unsigned int len) +{ + if (len > skb->len) + return NULL; + return __pskb_pull(skb,len); +} + +static inline int pskb_may_pull(struct sk_buff *skb, unsigned int len) +{ + if (len <= skb_headlen(skb)) + return 1; + if (len > skb->len) + return 0; + return (__pskb_pull_tail(skb, len-skb_headlen(skb)) != NULL); +} + +/** + * skb_headroom - bytes at buffer head + * @skb: buffer to check + * + * Return the number of bytes of free space at the head of an &sk_buff. + */ + +static inline int skb_headroom(const struct sk_buff *skb) +{ + return skb->data-skb->head; +} + +/** + * skb_tailroom - bytes at buffer end + * @skb: buffer to check + * + * Return the number of bytes of free space at the tail of an sk_buff + */ + +static inline int skb_tailroom(const struct sk_buff *skb) +{ + return skb_is_nonlinear(skb) ? 0 : skb->end-skb->tail; +} + +/** + * skb_reserve - adjust headroom + * @skb: buffer to alter + * @len: bytes to move + * + * Increase the headroom of an empty &sk_buff by reducing the tail + * room. This is only allowed for an empty buffer. + */ + +static inline void skb_reserve(struct sk_buff *skb, unsigned int len) +{ + skb->data+=len; + skb->tail+=len; +} + +extern int ___pskb_trim(struct sk_buff *skb, unsigned int len, int realloc); + +static inline void __skb_trim(struct sk_buff *skb, unsigned int len) +{ + if (!skb->data_len) { + skb->len = len; + skb->tail = skb->data+len; + } else { + ___pskb_trim(skb, len, 0); + } +} + +/** + * skb_trim - remove end from a buffer + * @skb: buffer to alter + * @len: new length + * + * Cut the length of a buffer down by removing data from the tail. If + * the buffer is already under the length specified it is not modified. + */ + +static inline void skb_trim(struct sk_buff *skb, unsigned int len) +{ + if (skb->len > len) { + __skb_trim(skb, len); + } +} + + +static inline int __pskb_trim(struct sk_buff *skb, unsigned int len) +{ + if (!skb->data_len) { + skb->len = len; + skb->tail = skb->data+len; + return 0; + } else { + return ___pskb_trim(skb, len, 1); + } +} + +static inline int pskb_trim(struct sk_buff *skb, unsigned int len) +{ + if (len < skb->len) + return __pskb_trim(skb, len); + return 0; +} + +/** + * skb_orphan - orphan a buffer + * @skb: buffer to orphan + * + * If a buffer currently has an owner then we call the owner's + * destructor function and make the @skb unowned. The buffer continues + * to exist but is no longer charged to its former owner. + */ + + +static inline void skb_orphan(struct sk_buff *skb) +{ + if (skb->destructor) + skb->destructor(skb); + skb->destructor = NULL; + skb->sk = NULL; +} + +/** + * skb_purge - empty a list + * @list: list to empty + * + * Delete all buffers on an &sk_buff list. Each buffer is removed from + * the list and one reference dropped. This function takes the list + * lock and is atomic with respect to other list locking functions. + */ + + +static inline void skb_queue_purge(struct sk_buff_head *list) +{ + struct sk_buff *skb; + while ((skb=skb_dequeue(list))!=NULL) + kfree_skb(skb); +} + +/** + * __skb_purge - empty a list + * @list: list to empty + * + * Delete all buffers on an &sk_buff list. Each buffer is removed from + * the list and one reference dropped. This function does not take the + * list lock and the caller must hold the relevant locks to use it. + */ + + +static inline void __skb_queue_purge(struct sk_buff_head *list) +{ + struct sk_buff *skb; + while ((skb=__skb_dequeue(list))!=NULL) + kfree_skb(skb); +} + +/** + * __dev_alloc_skb - allocate an skbuff for sending + * @length: length to allocate + * @gfp_mask: get_free_pages mask, passed to alloc_skb + * + * Allocate a new &sk_buff and assign it a usage count of one. The + * buffer has unspecified headroom built in. Users should allocate + * the headroom they think they need without accounting for the + * built in space. The built in space is used for optimisations. + * + * %NULL is returned in there is no free memory. + */ + +static inline struct sk_buff *__dev_alloc_skb(unsigned int length, + int gfp_mask) +{ + struct sk_buff *skb; + + skb = alloc_skb(length+16, gfp_mask); + if (skb) + skb_reserve(skb,16); + return skb; +} + +/** + * dev_alloc_skb - allocate an skbuff for sending + * @length: length to allocate + * + * Allocate a new &sk_buff and assign it a usage count of one. The + * buffer has unspecified headroom built in. Users should allocate + * the headroom they think they need without accounting for the + * built in space. The built in space is used for optimisations. + * + * %NULL is returned in there is no free memory. Although this function + * allocates memory it can be called from an interrupt. + */ + +static inline struct sk_buff *dev_alloc_skb(unsigned int length) +{ + return __dev_alloc_skb(length, GFP_ATOMIC); +} + +/** + * skb_cow - copy header of skb when it is required + * @skb: buffer to cow + * @headroom: needed headroom + * + * If the skb passed lacks sufficient headroom or its data part + * is shared, data is reallocated. If reallocation fails, an error + * is returned and original skb is not changed. + * + * The result is skb with writable area skb->head...skb->tail + * and at least @headroom of space at head. + */ + +static inline int +skb_cow(struct sk_buff *skb, unsigned int headroom) +{ + int delta = (headroom > 16 ? headroom : 16) - skb_headroom(skb); + + if (delta < 0) + delta = 0; + + if (delta || skb_cloned(skb)) + return pskb_expand_head(skb, (delta+15)&~15, 0, GFP_ATOMIC); + return 0; +} + +/** + * skb_linearize - convert paged skb to linear one + * @skb: buffer to linarize + * @gfp: allocation mode + * + * If there is no free memory -ENOMEM is returned, otherwise zero + * is returned and the old skb data released. */ +int skb_linearize(struct sk_buff *skb, int gfp); + +static inline void *kmap_skb_frag(const skb_frag_t *frag) +{ + return page_address(frag->page); +} + +static inline void kunmap_skb_frag(void *vaddr) +{ +} + +#define skb_queue_walk(queue, skb) \ + for (skb = (queue)->next; \ + (skb != (struct sk_buff *)(queue)); \ + skb=skb->next) + + +extern struct sk_buff * skb_recv_datagram(struct sock *sk,unsigned flags,int noblock, int *err); +extern int skb_copy_datagram(const struct sk_buff *from, int offset, char *to,int size); +extern int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset, u8 *to, int len, unsigned int *csump); +extern void skb_free_datagram(struct sock * sk, struct sk_buff *skb); + +extern unsigned int skb_checksum(const struct sk_buff *skb, int offset, int len, unsigned int csum); +extern int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len); +extern unsigned int skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, u8 *to, int len, unsigned int csum); +extern void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to); + +extern void skb_init(void); +extern void skb_add_mtu(int mtu); + +#ifdef CONFIG_NETFILTER +static inline void +nf_conntrack_put(struct nf_ct_info *nfct) +{ + if (nfct && atomic_dec_and_test(&nfct->master->use)) + nfct->master->destroy(nfct->master); +} +static inline void +nf_conntrack_get(struct nf_ct_info *nfct) +{ + if (nfct) + atomic_inc(&nfct->master->use); +} +#endif + +#endif /* __KERNEL__ */ +#endif /* _LINUX_SKBUFF_H */ diff --git a/xen-2.4.16/include/xeno/slab.h b/xen-2.4.16/include/xeno/slab.h new file mode 100644 index 0000000000..c7aadff3ea --- /dev/null +++ b/xen-2.4.16/include/xeno/slab.h @@ -0,0 +1,81 @@ +/* + * linux/mm/slab.h + * Written by Mark Hemment, 1996. + * (markhe@nextd.demon.co.uk) + */ + +#if !defined(_LINUX_SLAB_H) +#define _LINUX_SLAB_H + +typedef struct kmem_cache_s kmem_cache_t; + +#include +#include + +/* flags for kmem_cache_alloc() */ +#define SLAB_NOFS GFP_NOFS +#define SLAB_NOIO GFP_NOIO +#define SLAB_NOHIGHIO GFP_NOHIGHIO +#define SLAB_ATOMIC GFP_ATOMIC +#define SLAB_USER GFP_USER +#define SLAB_KERNEL GFP_KERNEL +#define SLAB_NFS GFP_NFS +#define SLAB_DMA GFP_DMA + +#define SLAB_LEVEL_MASK (__GFP_WAIT|__GFP_HIGH|__GFP_IO|__GFP_HIGHIO|__GFP_FS) +#define SLAB_NO_GROW 0x00001000UL /* don't grow a cache */ + +/* flags to pass to kmem_cache_create(). + * The first 3 are only valid when the allocator as been build + * SLAB_DEBUG_SUPPORT. + */ +#define SLAB_DEBUG_FREE 0x00000100UL /* Peform (expensive) checks on free */ +#define SLAB_DEBUG_INITIAL 0x00000200UL /* Call constructor (as verifier) */ +#define SLAB_RED_ZONE 0x00000400UL /* Red zone objs in a cache */ +#define SLAB_POISON 0x00000800UL /* Poison objects */ +#define SLAB_NO_REAP 0x00001000UL /* never reap from the cache */ +#define SLAB_HWCACHE_ALIGN 0x00002000UL /* align objs on a h/w cache lines */ +#define SLAB_CACHE_DMA 0x00004000UL /* use GFP_DMA memory */ + +/* flags passed to a constructor func */ +#define SLAB_CTOR_CONSTRUCTOR 0x001UL /* if not set, then deconstructor */ +#define SLAB_CTOR_ATOMIC 0x002UL /* tell constructor it can't sleep */ +#define SLAB_CTOR_VERIFY 0x004UL /* tell constructor it's a verify call */ + +/* prototypes */ +extern void kmem_cache_init(void); +extern void kmem_cache_sizes_init(unsigned long); + +extern kmem_cache_t *kmem_find_general_cachep(size_t, int gfpflags); +extern kmem_cache_t *kmem_cache_create(const char *, size_t, size_t, unsigned long, + void (*)(void *, kmem_cache_t *, unsigned long), + void (*)(void *, kmem_cache_t *, unsigned long)); +extern int kmem_cache_destroy(kmem_cache_t *); +extern int kmem_cache_shrink(kmem_cache_t *); +extern void *kmem_cache_alloc(kmem_cache_t *, int); +extern void kmem_cache_free(kmem_cache_t *, void *); + +extern void *kmalloc(size_t, int); +extern void kfree(const void *); + +extern int FASTCALL(kmem_cache_reap(int)); +#if 0 +extern int slabinfo_read_proc(char *page, char **start, off_t off, + int count, int *eof, void *data); +extern int slabinfo_write_proc(struct file *file, const char *buffer, + unsigned long count, void *data); +#endif + +/* System wide caches */ +extern kmem_cache_t *vm_area_cachep; +extern kmem_cache_t *mm_cachep; +extern kmem_cache_t *names_cachep; +extern kmem_cache_t *files_cachep; +extern kmem_cache_t *filp_cachep; +extern kmem_cache_t *dquot_cachep; +extern kmem_cache_t *bh_cachep; +extern kmem_cache_t *fs_cachep; +extern kmem_cache_t *sigact_cachep; + + +#endif /* _LINUX_SLAB_H */ diff --git a/xen-2.4.16/include/xeno/smp.h b/xen-2.4.16/include/xeno/smp.h new file mode 100644 index 0000000000..786026649e --- /dev/null +++ b/xen-2.4.16/include/xeno/smp.h @@ -0,0 +1,88 @@ +#ifndef __LINUX_SMP_H +#define __LINUX_SMP_H + +/* + * Generic SMP support + * Alan Cox. + */ + +#include + +#ifdef CONFIG_SMP + +#include + +/* + * main cross-CPU interfaces, handles INIT, TLB flush, STOP, etc. + * (defined in asm header): + */ + +/* + * stops all CPUs but the current one: + */ +extern void smp_send_stop(void); + +extern void FASTCALL(smp_send_event_check_mask(unsigned long cpu_mask)); +#define smp_send_event_check_cpu(_cpu) smp_send_event_check_mask(1<<(_cpu)) + + +/* + * Boot processor call to load the other CPU's + */ +extern void smp_boot_cpus(void); + +/* + * Processor call in. Must hold processors until .. + */ +extern void smp_callin(void); + +/* + * Multiprocessors may now schedule + */ +extern void smp_commence(void); + +/* + * Call a function on all other processors + */ +extern int smp_call_function (void (*func) (void *info), void *info, + int retry, int wait); + +/* + * True once the per process idle is forked + */ +extern int smp_threads_ready; + +extern int smp_num_cpus; + +extern volatile unsigned long smp_msg_data; +extern volatile int smp_src_cpu; +extern volatile int smp_msg_id; + +#define MSG_ALL_BUT_SELF 0x8000 /* Assume <32768 CPU's */ +#define MSG_ALL 0x8001 + +#define MSG_INVALIDATE_TLB 0x0001 /* Remote processor TLB invalidate */ +#define MSG_STOP_CPU 0x0002 /* Sent to shut down slave CPU's + * when rebooting + */ +#define MSG_RESCHEDULE 0x0003 /* Reschedule request from master CPU*/ +#define MSG_CALL_FUNCTION 0x0004 /* Call function on all other CPUs */ + +#else + +/* + * These macros fold the SMP functionality into a single CPU system + */ + +#define smp_num_cpus 1 +#define smp_processor_id() 0 +#define hard_smp_processor_id() 0 +#define smp_threads_ready 1 +#define kernel_lock() +#define cpu_logical_map(cpu) 0 +#define cpu_number_map(cpu) 0 +#define smp_call_function(func,info,retry,wait) ({ 0; }) +#define cpu_online_map 1 + +#endif +#endif diff --git a/xen-2.4.16/include/xeno/socket.h b/xen-2.4.16/include/xeno/socket.h new file mode 100644 index 0000000000..92e7b93ae5 --- /dev/null +++ b/xen-2.4.16/include/xeno/socket.h @@ -0,0 +1,136 @@ +#ifndef _LINUX_SOCKET_H +#define _LINUX_SOCKET_H + +#if defined(__KERNEL__) || !defined(__GLIBC__) || (__GLIBC__ < 2) + +//#include /* arch-dependent defines */ +#include /* the SIOCxxx I/O controls */ +//#include /* iovec support */ +#include /* pid_t */ + +typedef unsigned short sa_family_t; + +/* + * 1003.1g requires sa_family_t and that sa_data is char. + */ + +struct sockaddr { + sa_family_t sa_family; /* address family, AF_xxx */ + char sa_data[14]; /* 14 bytes of protocol address */ +}; + +/* Supported address families. */ +#define AF_UNSPEC 0 +#define AF_UNIX 1 /* Unix domain sockets */ +#define AF_LOCAL 1 /* POSIX name for AF_UNIX */ +#define AF_INET 2 /* Internet IP Protocol */ +#define AF_AX25 3 /* Amateur Radio AX.25 */ +#define AF_IPX 4 /* Novell IPX */ +#define AF_APPLETALK 5 /* AppleTalk DDP */ +#define AF_NETROM 6 /* Amateur Radio NET/ROM */ +#define AF_BRIDGE 7 /* Multiprotocol bridge */ +#define AF_ATMPVC 8 /* ATM PVCs */ +#define AF_X25 9 /* Reserved for X.25 project */ +#define AF_INET6 10 /* IP version 6 */ +#define AF_ROSE 11 /* Amateur Radio X.25 PLP */ +#define AF_DECnet 12 /* Reserved for DECnet project */ +#define AF_NETBEUI 13 /* Reserved for 802.2LLC project*/ +#define AF_SECURITY 14 /* Security callback pseudo AF */ +#define AF_KEY 15 /* PF_KEY key management API */ +#define AF_NETLINK 16 +#define AF_ROUTE AF_NETLINK /* Alias to emulate 4.4BSD */ +#define AF_PACKET 17 /* Packet family */ +#define AF_ASH 18 /* Ash */ +#define AF_ECONET 19 /* Acorn Econet */ +#define AF_ATMSVC 20 /* ATM SVCs */ +#define AF_SNA 22 /* Linux SNA Project (nutters!) */ +#define AF_IRDA 23 /* IRDA sockets */ +#define AF_PPPOX 24 /* PPPoX sockets */ +#define AF_WANPIPE 25 /* Wanpipe API Sockets */ +#define AF_BLUETOOTH 31 /* Bluetooth sockets */ +#define AF_MAX 32 /* For now.. */ + +/* Protocol families, same as address families. */ +#define PF_UNSPEC AF_UNSPEC +#define PF_UNIX AF_UNIX +#define PF_LOCAL AF_LOCAL +#define PF_INET AF_INET +#define PF_AX25 AF_AX25 +#define PF_IPX AF_IPX +#define PF_APPLETALK AF_APPLETALK +#define PF_NETROM AF_NETROM +#define PF_BRIDGE AF_BRIDGE +#define PF_ATMPVC AF_ATMPVC +#define PF_X25 AF_X25 +#define PF_INET6 AF_INET6 +#define PF_ROSE AF_ROSE +#define PF_DECnet AF_DECnet +#define PF_NETBEUI AF_NETBEUI +#define PF_SECURITY AF_SECURITY +#define PF_KEY AF_KEY +#define PF_NETLINK AF_NETLINK +#define PF_ROUTE AF_ROUTE +#define PF_PACKET AF_PACKET +#define PF_ASH AF_ASH +#define PF_ECONET AF_ECONET +#define PF_ATMSVC AF_ATMSVC +#define PF_SNA AF_SNA +#define PF_IRDA AF_IRDA +#define PF_PPPOX AF_PPPOX +#define PF_WANPIPE AF_WANPIPE +#define PF_BLUETOOTH AF_BLUETOOTH +#define PF_MAX AF_MAX + +/* Maximum queue length specifiable by listen. */ +#define SOMAXCONN 128 + +/* Flags we can use with send/ and recv. + Added those for 1003.1g not all are supported yet + */ + +#define MSG_OOB 1 +#define MSG_PEEK 2 +#define MSG_DONTROUTE 4 +#define MSG_TRYHARD 4 /* Synonym for MSG_DONTROUTE for DECnet */ +#define MSG_CTRUNC 8 +#define MSG_PROBE 0x10 /* Do not send. Only probe path f.e. for MTU */ +#define MSG_TRUNC 0x20 +#define MSG_DONTWAIT 0x40 /* Nonblocking io */ +#define MSG_EOR 0x80 /* End of record */ +#define MSG_WAITALL 0x100 /* Wait for a full request */ +#define MSG_FIN 0x200 +#define MSG_SYN 0x400 +#define MSG_CONFIRM 0x800 /* Confirm path validity */ +#define MSG_RST 0x1000 +#define MSG_ERRQUEUE 0x2000 /* Fetch message from error queue */ +#define MSG_NOSIGNAL 0x4000 /* Do not generate SIGPIPE */ +#define MSG_MORE 0x8000 /* Sender will send more */ + +#define MSG_EOF MSG_FIN + + +/* Setsockoptions(2) level. Thanks to BSD these must match IPPROTO_xxx */ +#define SOL_IP 0 +/* #define SOL_ICMP 1 No-no-no! Due to Linux :-) we cannot use SOL_ICMP=1 */ +#define SOL_TCP 6 +#define SOL_UDP 17 +#define SOL_IPV6 41 +#define SOL_ICMPV6 58 +#define SOL_RAW 255 +#define SOL_IPX 256 +#define SOL_AX25 257 +#define SOL_ATALK 258 +#define SOL_NETROM 259 +#define SOL_ROSE 260 +#define SOL_DECNET 261 +#define SOL_X25 262 +#define SOL_PACKET 263 +#define SOL_ATM 264 /* ATM layer (cell level) */ +#define SOL_AAL 265 /* ATM Adaption Layer (packet level) */ +#define SOL_IRDA 266 + +/* IPX options */ +#define IPX_TYPE 1 + +#endif /* not kernel and not glibc */ +#endif /* _LINUX_SOCKET_H */ diff --git a/xen-2.4.16/include/xeno/sockios.h b/xen-2.4.16/include/xeno/sockios.h new file mode 100644 index 0000000000..81f99a124c --- /dev/null +++ b/xen-2.4.16/include/xeno/sockios.h @@ -0,0 +1,135 @@ +/* + * INET An implementation of the TCP/IP protocol suite for the LINUX + * operating system. INET is implemented using the BSD Socket + * interface as the means of communication with the user level. + * + * Definitions of the socket-level I/O control calls. + * + * Version: @(#)sockios.h 1.0.2 03/09/93 + * + * Authors: Ross Biro, + * Fred N. van Kempen, + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#ifndef _LINUX_SOCKIOS_H +#define _LINUX_SOCKIOS_H + +//#include + +/* Linux-specific socket ioctls */ +#define SIOCINQ FIONREAD +#define SIOCOUTQ TIOCOUTQ + +/* Routing table calls. */ +#define SIOCADDRT 0x890B /* add routing table entry */ +#define SIOCDELRT 0x890C /* delete routing table entry */ +#define SIOCRTMSG 0x890D /* call to routing system */ + +/* Socket configuration controls. */ +#define SIOCGIFNAME 0x8910 /* get iface name */ +#define SIOCSIFLINK 0x8911 /* set iface channel */ +#define SIOCGIFCONF 0x8912 /* get iface list */ +#define SIOCGIFFLAGS 0x8913 /* get flags */ +#define SIOCSIFFLAGS 0x8914 /* set flags */ +#define SIOCGIFADDR 0x8915 /* get PA address */ +#define SIOCSIFADDR 0x8916 /* set PA address */ +#define SIOCGIFDSTADDR 0x8917 /* get remote PA address */ +#define SIOCSIFDSTADDR 0x8918 /* set remote PA address */ +#define SIOCGIFBRDADDR 0x8919 /* get broadcast PA address */ +#define SIOCSIFBRDADDR 0x891a /* set broadcast PA address */ +#define SIOCGIFNETMASK 0x891b /* get network PA mask */ +#define SIOCSIFNETMASK 0x891c /* set network PA mask */ +#define SIOCGIFMETRIC 0x891d /* get metric */ +#define SIOCSIFMETRIC 0x891e /* set metric */ +#define SIOCGIFMEM 0x891f /* get memory address (BSD) */ +#define SIOCSIFMEM 0x8920 /* set memory address (BSD) */ +#define SIOCGIFMTU 0x8921 /* get MTU size */ +#define SIOCSIFMTU 0x8922 /* set MTU size */ +#define SIOCSIFNAME 0x8923 /* set interface name */ +#define SIOCSIFHWADDR 0x8924 /* set hardware address */ +#define SIOCGIFENCAP 0x8925 /* get/set encapsulations */ +#define SIOCSIFENCAP 0x8926 +#define SIOCGIFHWADDR 0x8927 /* Get hardware address */ +#define SIOCGIFSLAVE 0x8929 /* Driver slaving support */ +#define SIOCSIFSLAVE 0x8930 +#define SIOCADDMULTI 0x8931 /* Multicast address lists */ +#define SIOCDELMULTI 0x8932 +#define SIOCGIFINDEX 0x8933 /* name -> if_index mapping */ +#define SIOGIFINDEX SIOCGIFINDEX /* misprint compatibility :-) */ +#define SIOCSIFPFLAGS 0x8934 /* set/get extended flags set */ +#define SIOCGIFPFLAGS 0x8935 +#define SIOCDIFADDR 0x8936 /* delete PA address */ +#define SIOCSIFHWBROADCAST 0x8937 /* set hardware broadcast addr */ +#define SIOCGIFCOUNT 0x8938 /* get number of devices */ + +#define SIOCGIFBR 0x8940 /* Bridging support */ +#define SIOCSIFBR 0x8941 /* Set bridging options */ + +#define SIOCGIFTXQLEN 0x8942 /* Get the tx queue length */ +#define SIOCSIFTXQLEN 0x8943 /* Set the tx queue length */ + +#define SIOCGIFDIVERT 0x8944 /* Frame diversion support */ +#define SIOCSIFDIVERT 0x8945 /* Set frame diversion options */ + +#define SIOCETHTOOL 0x8946 /* Ethtool interface */ + +#define SIOCGMIIPHY 0x8947 /* Get address of MII PHY in use. */ +#define SIOCGMIIREG 0x8948 /* Read MII PHY register. */ +#define SIOCSMIIREG 0x8949 /* Write MII PHY register. */ + +/* ARP cache control calls. */ + /* 0x8950 - 0x8952 * obsolete calls, don't re-use */ +#define SIOCDARP 0x8953 /* delete ARP table entry */ +#define SIOCGARP 0x8954 /* get ARP table entry */ +#define SIOCSARP 0x8955 /* set ARP table entry */ + +/* RARP cache control calls. */ +#define SIOCDRARP 0x8960 /* delete RARP table entry */ +#define SIOCGRARP 0x8961 /* get RARP table entry */ +#define SIOCSRARP 0x8962 /* set RARP table entry */ + +/* Driver configuration calls */ + +#define SIOCGIFMAP 0x8970 /* Get device parameters */ +#define SIOCSIFMAP 0x8971 /* Set device parameters */ + +/* DLCI configuration calls */ + +#define SIOCADDDLCI 0x8980 /* Create new DLCI device */ +#define SIOCDELDLCI 0x8981 /* Delete DLCI device */ + +#define SIOCGIFVLAN 0x8982 /* 802.1Q VLAN support */ +#define SIOCSIFVLAN 0x8983 /* Set 802.1Q VLAN options */ + +/* bonding calls */ + +#define SIOCBONDENSLAVE 0x8990 /* enslave a device to the bond */ +#define SIOCBONDRELEASE 0x8991 /* release a slave from the bond*/ +#define SIOCBONDSETHWADDR 0x8992 /* set the hw addr of the bond */ +#define SIOCBONDSLAVEINFOQUERY 0x8993 /* rtn info about slave state */ +#define SIOCBONDINFOQUERY 0x8994 /* rtn info about bond state */ +#define SIOCBONDCHANGEACTIVE 0x8995 /* update to a new active slave */ + +/* Device private ioctl calls */ + +/* + * These 16 ioctls are available to devices via the do_ioctl() device + * vector. Each device should include this file and redefine these names + * as their own. Because these are device dependent it is a good idea + * _NOT_ to issue them to random objects and hope. + * + * THESE IOCTLS ARE _DEPRECATED_ AND WILL DISAPPEAR IN 2.5.X -DaveM + */ + +#define SIOCDEVPRIVATE 0x89F0 /* to 89FF */ + +/* + * These 16 ioctl calls are protocol private + */ + +#define SIOCPROTOPRIVATE 0x89E0 /* to 89EF */ +#endif /* _LINUX_SOCKIOS_H */ diff --git a/xen-2.4.16/include/xeno/spinlock.h b/xen-2.4.16/include/xeno/spinlock.h new file mode 100644 index 0000000000..08f2eb6098 --- /dev/null +++ b/xen-2.4.16/include/xeno/spinlock.h @@ -0,0 +1,142 @@ +#ifndef __LINUX_SPINLOCK_H +#define __LINUX_SPINLOCK_H + +#include +#include + +/* + * These are the generic versions of the spinlocks and read-write + * locks.. + */ +#define spin_lock_irqsave(lock, flags) do { local_irq_save(flags); spin_lock(lock); } while (0) +#define spin_lock_irq(lock) do { local_irq_disable(); spin_lock(lock); } while (0) +#define spin_lock_bh(lock) do { local_bh_disable(); spin_lock(lock); } while (0) + +#define read_lock_irqsave(lock, flags) do { local_irq_save(flags); read_lock(lock); } while (0) +#define read_lock_irq(lock) do { local_irq_disable(); read_lock(lock); } while (0) +#define read_lock_bh(lock) do { local_bh_disable(); read_lock(lock); } while (0) + +#define write_lock_irqsave(lock, flags) do { local_irq_save(flags); write_lock(lock); } while (0) +#define write_lock_irq(lock) do { local_irq_disable(); write_lock(lock); } while (0) +#define write_lock_bh(lock) do { local_bh_disable(); write_lock(lock); } while (0) + +#define spin_unlock_irqrestore(lock, flags) do { spin_unlock(lock); local_irq_restore(flags); } while (0) +#define spin_unlock_irq(lock) do { spin_unlock(lock); local_irq_enable(); } while (0) +#define spin_unlock_bh(lock) do { spin_unlock(lock); local_bh_enable(); } while (0) + +#define read_unlock_irqrestore(lock, flags) do { read_unlock(lock); local_irq_restore(flags); } while (0) +#define read_unlock_irq(lock) do { read_unlock(lock); local_irq_enable(); } while (0) +#define read_unlock_bh(lock) do { read_unlock(lock); local_bh_enable(); } while (0) + +#define write_unlock_irqrestore(lock, flags) do { write_unlock(lock); local_irq_restore(flags); } while (0) +#define write_unlock_irq(lock) do { write_unlock(lock); local_irq_enable(); } while (0) +#define write_unlock_bh(lock) do { write_unlock(lock); local_bh_enable(); } while (0) +#define spin_trylock_bh(lock) ({ int __r; local_bh_disable();\ + __r = spin_trylock(lock); \ + if (!__r) local_bh_enable(); \ + __r; }) + +#ifdef CONFIG_SMP +#include + +#elif !defined(spin_lock_init) /* !SMP and spin_lock_init not previously + defined (e.g. by including asm/spinlock.h */ + +#define DEBUG_SPINLOCKS 0 /* 0 == no debugging, 1 == maintain lock state, 2 == full debug */ + +#if (DEBUG_SPINLOCKS < 1) + +#define atomic_dec_and_lock(atomic,lock) atomic_dec_and_test(atomic) +#define ATOMIC_DEC_AND_LOCK + +/* + * Your basic spinlocks, allowing only a single CPU anywhere + * + * Most gcc versions have a nasty bug with empty initializers. + */ +#if (__GNUC__ > 2) + typedef struct { } spinlock_t; + #define SPIN_LOCK_UNLOCKED (spinlock_t) { } +#else + typedef struct { int gcc_is_buggy; } spinlock_t; + #define SPIN_LOCK_UNLOCKED (spinlock_t) { 0 } +#endif + +#define spin_lock_init(lock) do { } while(0) +#define spin_lock(lock) (void)(lock) /* Not "unused variable". */ +#define spin_is_locked(lock) (0) +#define spin_trylock(lock) ({1; }) +#define spin_unlock_wait(lock) do { } while(0) +#define spin_unlock(lock) do { } while(0) + +#elif (DEBUG_SPINLOCKS < 2) + +typedef struct { + volatile unsigned long lock; +} spinlock_t; +#define SPIN_LOCK_UNLOCKED (spinlock_t) { 0 } + +#define spin_lock_init(x) do { (x)->lock = 0; } while (0) +#define spin_is_locked(lock) (test_bit(0,(lock))) +#define spin_trylock(lock) (!test_and_set_bit(0,(lock))) + +#define spin_lock(x) do { (x)->lock = 1; } while (0) +#define spin_unlock_wait(x) do { } while (0) +#define spin_unlock(x) do { (x)->lock = 0; } while (0) + +#else /* (DEBUG_SPINLOCKS >= 2) */ + +typedef struct { + volatile unsigned long lock; + volatile unsigned int babble; + const char *module; +} spinlock_t; +#define SPIN_LOCK_UNLOCKED (spinlock_t) { 0, 25, __BASE_FILE__ } + +/*#include */ + +#define spin_lock_init(x) do { (x)->lock = 0; } while (0) +#define spin_is_locked(lock) (test_bit(0,(lock))) +#define spin_trylock(lock) (!test_and_set_bit(0,(lock))) + +#define spin_lock(x) do {unsigned long __spinflags; save_flags(__spinflags); cli(); if ((x)->lock&&(x)->babble) {printk("%s:%d: spin_lock(%s:%p) already locked\n", __BASE_FILE__,__LINE__, (x)->module, (x));(x)->babble--;} (x)->lock = 1; restore_flags(__spinflags);} while (0) +#define spin_unlock_wait(x) do {unsigned long __spinflags; save_flags(__spinflags); cli(); if ((x)->lock&&(x)->babble) {printk("%s:%d: spin_unlock_wait(%s:%p) deadlock\n", __BASE_FILE__,__LINE__, (x)->module, (x));(x)->babble--;} restore_flags(__spinflags);} while (0) +#define spin_unlock(x) do {unsigned long __spinflags; save_flags(__spinflags); cli(); if (!(x)->lock&&(x)->babble) {printk("%s:%d: spin_unlock(%s:%p) not locked\n", __BASE_FILE__,__LINE__, (x)->module, (x));(x)->babble--;} (x)->lock = 0; restore_flags(__spinflags);} while (0) + +#endif /* DEBUG_SPINLOCKS */ + +/* + * Read-write spinlocks, allowing multiple readers + * but only one writer. + * + * NOTE! it is quite common to have readers in interrupts + * but no interrupt writers. For those circumstances we + * can "mix" irq-safe locks - any writer needs to get a + * irq-safe write-lock, but readers can get non-irqsafe + * read-locks. + * + * Most gcc versions have a nasty bug with empty initializers. + */ +#if (__GNUC__ > 2) + typedef struct { } rwlock_t; + #define RW_LOCK_UNLOCKED (rwlock_t) { } +#else + typedef struct { int gcc_is_buggy; } rwlock_t; + #define RW_LOCK_UNLOCKED (rwlock_t) { 0 } +#endif + +#define rwlock_init(lock) do { } while(0) +#define read_lock(lock) (void)(lock) /* Not "unused variable". */ +#define read_unlock(lock) do { } while(0) +#define write_lock(lock) (void)(lock) /* Not "unused variable". */ +#define write_unlock(lock) do { } while(0) + +#endif /* !SMP */ + +/* "lock on reference count zero" */ +#ifndef ATOMIC_DEC_AND_LOCK +#include +extern int atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock); +#endif + +#endif /* __LINUX_SPINLOCK_H */ diff --git a/xen-2.4.16/include/xeno/time.h b/xen-2.4.16/include/xeno/time.h new file mode 100644 index 0000000000..33837c5009 --- /dev/null +++ b/xen-2.4.16/include/xeno/time.h @@ -0,0 +1,30 @@ +/****************************************************************************** + * time.h + */ + +#ifndef __XENO_TIME_H__ +#define __XENO_TIME_H__ + +#include +#include + +struct timeval { + long tv_sec; /* seconds */ + long tv_usec; /* microseconds */ +}; + +struct timezone { + int tz_minuteswest; /* minutes west of Greenwich */ + int tz_dsttime; /* type of dst correction */ +}; + +#ifdef __KERNEL__ +extern void do_gettimeofday(struct timeval *tv); +extern void do_settimeofday(struct timeval *tv); +extern void get_fast_time(struct timeval *tv); +extern void (*do_get_fast_time)(struct timeval *); +#endif + +extern void do_timer(struct pt_regs *regs); + +#endif /* __XENO_TIME_H__ */ diff --git a/xen-2.4.16/include/xeno/timer.h b/xen-2.4.16/include/xeno/timer.h new file mode 100644 index 0000000000..c4f01ada59 --- /dev/null +++ b/xen-2.4.16/include/xeno/timer.h @@ -0,0 +1,74 @@ +#ifndef _LINUX_TIMER_H +#define _LINUX_TIMER_H + +#include +#include + +/* + * In Linux 2.4, static timers have been removed from the kernel. + * Timers may be dynamically created and destroyed, and should be initialized + * by a call to init_timer() upon creation. + * + * The "data" field enables use of a common timeout function for several + * timeouts. You can use this field to distinguish between the different + * invocations. + */ +struct timer_list { + struct list_head list; + unsigned long expires; + unsigned long data; + void (*function)(unsigned long); +}; + +extern void add_timer(struct timer_list * timer); +extern int del_timer(struct timer_list * timer); + +#ifdef CONFIG_SMP +extern int del_timer_sync(struct timer_list * timer); +extern void sync_timers(void); +#else +#define del_timer_sync(t) del_timer(t) +#define sync_timers() do { } while (0) +#endif + +/* + * mod_timer is a more efficient way to update the expire field of an + * active timer (if the timer is inactive it will be activated) + * mod_timer(a,b) is equivalent to del_timer(a); a->expires = b; add_timer(a). + * If the timer is known to be not pending (ie, in the handler), mod_timer + * is less efficient than a->expires = b; add_timer(a). + */ +int mod_timer(struct timer_list *timer, unsigned long expires); + +extern void it_real_fn(unsigned long); + +static inline void init_timer(struct timer_list * timer) +{ + timer->list.next = timer->list.prev = NULL; +} + +static inline int timer_pending (const struct timer_list * timer) +{ + return timer->list.next != NULL; +} + +/* + * These inlines deal with timer wrapping correctly. You are + * strongly encouraged to use them + * 1. Because people otherwise forget + * 2. Because if the timer wrap changes in future you wont have to + * alter your driver code. + * + * time_after(a,b) returns true if the time a is after time b. + * + * Do this with "<0" and ">=0" to only test the sign of the result. A + * good compiler would generate better code (and a really good compiler + * wouldn't care). Gcc is currently neither. + */ +#define time_after(a,b) ((long)(b) - (long)(a) < 0) +#define time_before(a,b) time_after(b,a) + +#define time_after_eq(a,b) ((long)(a) - (long)(b) >= 0) +#define time_before_eq(a,b) time_after_eq(b,a) + +#endif diff --git a/xen-2.4.16/include/xeno/timex.h b/xen-2.4.16/include/xeno/timex.h new file mode 100644 index 0000000000..3a00a26e2d --- /dev/null +++ b/xen-2.4.16/include/xeno/timex.h @@ -0,0 +1,291 @@ +/***************************************************************************** + * * + * Copyright (c) David L. Mills 1993 * + * * + * Permission to use, copy, modify, and distribute this software and its * + * documentation for any purpose and without fee is hereby granted, provided * + * that the above copyright notice appears in all copies and that both the * + * copyright notice and this permission notice appear in supporting * + * documentation, and that the name University of Delaware not be used in * + * advertising or publicity pertaining to distribution of the software * + * without specific, written prior permission. The University of Delaware * + * makes no representations about the suitability this software for any * + * purpose. It is provided "as is" without express or implied warranty. * + * * + *****************************************************************************/ + +/* + * Modification history timex.h + * + * 29 Dec 97 Russell King + * Moved CLOCK_TICK_RATE, CLOCK_TICK_FACTOR and FINETUNE to asm/timex.h + * for ARM machines + * + * 9 Jan 97 Adrian Sun + * Shifted LATCH define to allow access to alpha machines. + * + * 26 Sep 94 David L. Mills + * Added defines for hybrid phase/frequency-lock loop. + * + * 19 Mar 94 David L. Mills + * Moved defines from kernel routines to header file and added new + * defines for PPS phase-lock loop. + * + * 20 Feb 94 David L. Mills + * Revised status codes and structures for external clock and PPS + * signal discipline. + * + * 28 Nov 93 David L. Mills + * Adjusted parameters to improve stability and increase poll + * interval. + * + * 17 Sep 93 David L. Mills + * Created file $NTP/include/sys/timex.h + * 07 Oct 93 Torsten Duwe + * Derived linux/timex.h + * 1995-08-13 Torsten Duwe + * kernel PLL updated to 1994-12-13 specs (rfc-1589) + * 1997-08-30 Ulrich Windl + * Added new constant NTP_PHASE_LIMIT + */ +#ifndef _LINUX_TIMEX_H +#define _LINUX_TIMEX_H + +#include + +/* + * The following defines establish the engineering parameters of the PLL + * model. The HZ variable establishes the timer interrupt frequency, 100 Hz + * for the SunOS kernel, 256 Hz for the Ultrix kernel and 1024 Hz for the + * OSF/1 kernel. The SHIFT_HZ define expresses the same value as the + * nearest power of two in order to avoid hardware multiply operations. + */ +#if HZ >= 12 && HZ < 24 +# define SHIFT_HZ 4 +#elif HZ >= 24 && HZ < 48 +# define SHIFT_HZ 5 +#elif HZ >= 48 && HZ < 96 +# define SHIFT_HZ 6 +#elif HZ >= 96 && HZ < 192 +# define SHIFT_HZ 7 +#elif HZ >= 192 && HZ < 384 +# define SHIFT_HZ 8 +#elif HZ >= 384 && HZ < 768 +# define SHIFT_HZ 9 +#elif HZ >= 768 && HZ < 1536 +# define SHIFT_HZ 10 +#else +# error You lose. +#endif + +/* + * SHIFT_KG and SHIFT_KF establish the damping of the PLL and are chosen + * for a slightly underdamped convergence characteristic. SHIFT_KH + * establishes the damping of the FLL and is chosen by wisdom and black + * art. + * + * MAXTC establishes the maximum time constant of the PLL. With the + * SHIFT_KG and SHIFT_KF values given and a time constant range from + * zero to MAXTC, the PLL will converge in 15 minutes to 16 hours, + * respectively. + */ +#define SHIFT_KG 6 /* phase factor (shift) */ +#define SHIFT_KF 16 /* PLL frequency factor (shift) */ +#define SHIFT_KH 2 /* FLL frequency factor (shift) */ +#define MAXTC 6 /* maximum time constant (shift) */ + +/* + * The SHIFT_SCALE define establishes the decimal point of the time_phase + * variable which serves as an extension to the low-order bits of the + * system clock variable. The SHIFT_UPDATE define establishes the decimal + * point of the time_offset variable which represents the current offset + * with respect to standard time. The FINEUSEC define represents 1 usec in + * scaled units. + * + * SHIFT_USEC defines the scaling (shift) of the time_freq and + * time_tolerance variables, which represent the current frequency + * offset and maximum frequency tolerance. + * + * FINEUSEC is 1 us in SHIFT_UPDATE units of the time_phase variable. + */ +#define SHIFT_SCALE 22 /* phase scale (shift) */ +#define SHIFT_UPDATE (SHIFT_KG + MAXTC) /* time offset scale (shift) */ +#define SHIFT_USEC 16 /* frequency offset scale (shift) */ +#define FINEUSEC (1L << SHIFT_SCALE) /* 1 us in phase units */ + +#define MAXPHASE 512000L /* max phase error (us) */ +#define MAXFREQ (512L << SHIFT_USEC) /* max frequency error (ppm) */ +#define MAXTIME (200L << PPS_AVG) /* max PPS error (jitter) (200 us) */ +#define MINSEC 16L /* min interval between updates (s) */ +#define MAXSEC 1200L /* max interval between updates (s) */ +#define NTP_PHASE_LIMIT (MAXPHASE << 5) /* beyond max. dispersion */ + +/* + * The following defines are used only if a pulse-per-second (PPS) + * signal is available and connected via a modem control lead, such as + * produced by the optional ppsclock feature incorporated in the Sun + * asynch driver. They establish the design parameters of the frequency- + * lock loop used to discipline the CPU clock oscillator to the PPS + * signal. + * + * PPS_AVG is the averaging factor for the frequency loop, as well as + * the time and frequency dispersion. + * + * PPS_SHIFT and PPS_SHIFTMAX specify the minimum and maximum + * calibration intervals, respectively, in seconds as a power of two. + * + * PPS_VALID is the maximum interval before the PPS signal is considered + * invalid and protocol updates used directly instead. + * + * MAXGLITCH is the maximum interval before a time offset of more than + * MAXTIME is believed. + */ +#define PPS_AVG 2 /* pps averaging constant (shift) */ +#define PPS_SHIFT 2 /* min interval duration (s) (shift) */ +#define PPS_SHIFTMAX 8 /* max interval duration (s) (shift) */ +#define PPS_VALID 120 /* pps signal watchdog max (s) */ +#define MAXGLITCH 30 /* pps signal glitch max (s) */ + +/* + * Pick up the architecture specific timex specifications + */ +#include + +/* LATCH is used in the interval timer and ftape setup. */ +#define LATCH ((CLOCK_TICK_RATE + HZ/2) / HZ) /* For divider */ + +/* + * syscall interface - used (mainly by NTP daemon) + * to discipline kernel clock oscillator + */ +struct timex { + unsigned int modes; /* mode selector */ + long offset; /* time offset (usec) */ + long freq; /* frequency offset (scaled ppm) */ + long maxerror; /* maximum error (usec) */ + long esterror; /* estimated error (usec) */ + int status; /* clock command/status */ + long constant; /* pll time constant */ + long precision; /* clock precision (usec) (read only) */ + long tolerance; /* clock frequency tolerance (ppm) + * (read only) + */ + struct timeval time; /* (read only) */ + long tick; /* (modified) usecs between clock ticks */ + + long ppsfreq; /* pps frequency (scaled ppm) (ro) */ + long jitter; /* pps jitter (us) (ro) */ + int shift; /* interval duration (s) (shift) (ro) */ + long stabil; /* pps stability (scaled ppm) (ro) */ + long jitcnt; /* jitter limit exceeded (ro) */ + long calcnt; /* calibration intervals (ro) */ + long errcnt; /* calibration errors (ro) */ + long stbcnt; /* stability limit exceeded (ro) */ + + int :32; int :32; int :32; int :32; + int :32; int :32; int :32; int :32; + int :32; int :32; int :32; int :32; +}; + +/* + * Mode codes (timex.mode) + */ +#define ADJ_OFFSET 0x0001 /* time offset */ +#define ADJ_FREQUENCY 0x0002 /* frequency offset */ +#define ADJ_MAXERROR 0x0004 /* maximum time error */ +#define ADJ_ESTERROR 0x0008 /* estimated time error */ +#define ADJ_STATUS 0x0010 /* clock status */ +#define ADJ_TIMECONST 0x0020 /* pll time constant */ +#define ADJ_TICK 0x4000 /* tick value */ +#define ADJ_OFFSET_SINGLESHOT 0x8001 /* old-fashioned adjtime */ + +/* xntp 3.4 compatibility names */ +#define MOD_OFFSET ADJ_OFFSET +#define MOD_FREQUENCY ADJ_FREQUENCY +#define MOD_MAXERROR ADJ_MAXERROR +#define MOD_ESTERROR ADJ_ESTERROR +#define MOD_STATUS ADJ_STATUS +#define MOD_TIMECONST ADJ_TIMECONST +#define MOD_CLKB ADJ_TICK +#define MOD_CLKA ADJ_OFFSET_SINGLESHOT /* 0x8000 in original */ + + +/* + * Status codes (timex.status) + */ +#define STA_PLL 0x0001 /* enable PLL updates (rw) */ +#define STA_PPSFREQ 0x0002 /* enable PPS freq discipline (rw) */ +#define STA_PPSTIME 0x0004 /* enable PPS time discipline (rw) */ +#define STA_FLL 0x0008 /* select frequency-lock mode (rw) */ + +#define STA_INS 0x0010 /* insert leap (rw) */ +#define STA_DEL 0x0020 /* delete leap (rw) */ +#define STA_UNSYNC 0x0040 /* clock unsynchronized (rw) */ +#define STA_FREQHOLD 0x0080 /* hold frequency (rw) */ + +#define STA_PPSSIGNAL 0x0100 /* PPS signal present (ro) */ +#define STA_PPSJITTER 0x0200 /* PPS signal jitter exceeded (ro) */ +#define STA_PPSWANDER 0x0400 /* PPS signal wander exceeded (ro) */ +#define STA_PPSERROR 0x0800 /* PPS signal calibration error (ro) */ + +#define STA_CLOCKERR 0x1000 /* clock hardware fault (ro) */ + +#define STA_RONLY (STA_PPSSIGNAL | STA_PPSJITTER | STA_PPSWANDER | \ + STA_PPSERROR | STA_CLOCKERR) /* read-only bits */ + +/* + * Clock states (time_state) + */ +#define TIME_OK 0 /* clock synchronized, no leap second */ +#define TIME_INS 1 /* insert leap second */ +#define TIME_DEL 2 /* delete leap second */ +#define TIME_OOP 3 /* leap second in progress */ +#define TIME_WAIT 4 /* leap second has occurred */ +#define TIME_ERROR 5 /* clock not synchronized */ +#define TIME_BAD TIME_ERROR /* bw compat */ + +#ifdef __KERNEL__ +/* + * kernel variables + * Note: maximum error = NTP synch distance = dispersion + delay / 2; + * estimated error = NTP dispersion. + */ +extern long tick; /* timer interrupt period */ +extern int tickadj; /* amount of adjustment per tick */ + +/* + * phase-lock loop variables + */ +extern int time_state; /* clock status */ +extern int time_status; /* clock synchronization status bits */ +extern long time_offset; /* time adjustment (us) */ +extern long time_constant; /* pll time constant */ +extern long time_tolerance; /* frequency tolerance (ppm) */ +extern long time_precision; /* clock precision (us) */ +extern long time_maxerror; /* maximum error */ +extern long time_esterror; /* estimated error */ + +extern long time_phase; /* phase offset (scaled us) */ +extern long time_freq; /* frequency offset (scaled ppm) */ +extern long time_adj; /* tick adjust (scaled 1 / HZ) */ +extern long time_reftime; /* time at last adjustment (s) */ + +extern long time_adjust; /* The amount of adjtime left */ + +/* interface variables pps->timer interrupt */ +extern long pps_offset; /* pps time offset (us) */ +extern long pps_jitter; /* time dispersion (jitter) (us) */ +extern long pps_freq; /* frequency offset (scaled ppm) */ +extern long pps_stabil; /* frequency dispersion (scaled ppm) */ +extern long pps_valid; /* pps signal watchdog counter */ + +/* interface variables pps->adjtimex */ +extern int pps_shift; /* interval duration (s) (shift) */ +extern long pps_jitcnt; /* jitter limit exceeded */ +extern long pps_calcnt; /* calibration intervals */ +extern long pps_errcnt; /* calibration errors */ +extern long pps_stbcnt; /* stability limit exceeded */ + +#endif /* KERNEL */ + +#endif /* LINUX_TIMEX_H */ diff --git a/xen-2.4.16/include/xeno/tqueue.h b/xen-2.4.16/include/xeno/tqueue.h new file mode 100644 index 0000000000..4a730f0ad9 --- /dev/null +++ b/xen-2.4.16/include/xeno/tqueue.h @@ -0,0 +1,125 @@ +/* + * tqueue.h --- task queue handling for Linux. + * + * Mostly based on a proposed bottom-half replacement code written by + * Kai Petzke, wpp@marie.physik.tu-berlin.de. + * + * Modified for use in the Linux kernel by Theodore Ts'o, + * tytso@mit.edu. Any bugs are my fault, not Kai's. + * + * The original comment follows below. + */ + +#ifndef _LINUX_TQUEUE_H +#define _LINUX_TQUEUE_H + +#include +#include +#include +#include + +/* + * New proposed "bottom half" handlers: + * (C) 1994 Kai Petzke, wpp@marie.physik.tu-berlin.de + * + * Advantages: + * - Bottom halfs are implemented as a linked list. You can have as many + * of them, as you want. + * - No more scanning of a bit field is required upon call of a bottom half. + * - Support for chained bottom half lists. The run_task_queue() function can be + * used as a bottom half handler. This is for example useful for bottom + * halfs, which want to be delayed until the next clock tick. + * + * Notes: + * - Bottom halfs are called in the reverse order that they were linked into + * the list. + */ + +struct tq_struct { + struct list_head list; /* linked list of active bh's */ + unsigned long sync; /* must be initialized to zero */ + void (*routine)(void *); /* function to call */ + void *data; /* argument to function */ +}; + +/* + * Emit code to initialise a tq_struct's routine and data pointers + */ +#define PREPARE_TQUEUE(_tq, _routine, _data) \ + do { \ + (_tq)->routine = _routine; \ + (_tq)->data = _data; \ + } while (0) + +/* + * Emit code to initialise all of a tq_struct + */ +#define INIT_TQUEUE(_tq, _routine, _data) \ + do { \ + INIT_LIST_HEAD(&(_tq)->list); \ + (_tq)->sync = 0; \ + PREPARE_TQUEUE((_tq), (_routine), (_data)); \ + } while (0) + +typedef struct list_head task_queue; + +#define DECLARE_TASK_QUEUE(q) LIST_HEAD(q) +#define TQ_ACTIVE(q) (!list_empty(&q)) + +extern task_queue tq_timer, tq_immediate, tq_disk; + +/* + * To implement your own list of active bottom halfs, use the following + * two definitions: + * + * DECLARE_TASK_QUEUE(my_tqueue); + * struct tq_struct my_task = { + * routine: (void (*)(void *)) my_routine, + * data: &my_data + * }; + * + * To activate a bottom half on a list, use: + * + * queue_task(&my_task, &my_tqueue); + * + * To later run the queued tasks use + * + * run_task_queue(&my_tqueue); + * + * This allows you to do deferred processing. For example, you could + * have a task queue called tq_timer, which is executed within the timer + * interrupt. + */ + +extern spinlock_t tqueue_lock; + +/* + * Queue a task on a tq. Return non-zero if it was successfully + * added. + */ +static inline int queue_task(struct tq_struct *bh_pointer, task_queue *bh_list) +{ + int ret = 0; + if (!test_and_set_bit(0,&bh_pointer->sync)) { + unsigned long flags; + spin_lock_irqsave(&tqueue_lock, flags); + list_add_tail(&bh_pointer->list, bh_list); + spin_unlock_irqrestore(&tqueue_lock, flags); + ret = 1; + } + return ret; +} + +/* + * Call all "bottom halfs" on a given list. + */ + +extern void __run_task_queue(task_queue *list); + +static inline void run_task_queue(task_queue *list) +{ + if (TQ_ACTIVE(*list)) + __run_task_queue(list); +} + +#endif /* _LINUX_TQUEUE_H */ diff --git a/xen-2.4.16/include/xeno/types.h b/xen-2.4.16/include/xeno/types.h new file mode 100644 index 0000000000..6a270f3f0c --- /dev/null +++ b/xen-2.4.16/include/xeno/types.h @@ -0,0 +1,31 @@ +#ifndef __TYPES_H__ +#define __TYPES_H__ + +#include + +#ifndef NULL +#define NULL ((void*)0) +#endif + +#define INT_MAX ((int)(~0U>>1)) +#define INT_MIN (-INT_MAX - 1) +#define UINT_MAX (~0U) +#define LONG_MAX ((long)(~0UL>>1)) +#define LONG_MIN (-LONG_MAX - 1) +#define ULONG_MAX (~0UL) + +typedef unsigned int size_t; + +/* bsd */ +typedef unsigned char u_char; +typedef unsigned short u_short; +typedef unsigned int u_int; +typedef unsigned long u_long; + +/* sysv */ +typedef unsigned char unchar; +typedef unsigned short ushort; +typedef unsigned int uint; +typedef unsigned long ulong; + +#endif /* __TYPES_H__ */ diff --git a/xen-2.4.16/include/xeno/vif.h b/xen-2.4.16/include/xeno/vif.h new file mode 100644 index 0000000000..3ef0f1f991 --- /dev/null +++ b/xen-2.4.16/include/xeno/vif.h @@ -0,0 +1,36 @@ +/* vif.h + * + * this is the hypervisor end of the network code. The net_ring structure + * stored in each vif is placed on a shared page to interact with the guest VM. + */ + +/* virtual network interface struct and associated defines. */ +/* net_vif_st is the larger struct that describes a virtual network interface + * it contains a pointer to the net_ring_t structure that needs to be on a + * shared page between the hypervisor and guest. The vif struct is private + * to the hypervisor and is used primarily as a container to allow routing + * and interface administration. This define should eventually be moved to + * a non-shared interface file, as it is of no relevance to the guest. + */ + +#include +#include + +typedef struct net_vif_st { + net_ring_t *net_ring; + int id; + struct sk_buff_head skb_list; + // rules table goes here in next revision. +} net_vif_t; + +/* VIF-related defines. */ +#define MAX_GUEST_VIFS 2 // each VIF is a small overhead in task_struct +#define MAX_SYSTEM_VIFS 256 // trying to avoid dynamic allocation + +/* vif globals */ +extern int sys_vif_count; + +/* vif prototypes */ +net_ring_t *create_net_vif(int domain); +void destroy_net_vif(struct task_struct *p); + diff --git a/xen-2.4.16/net/Makefile b/xen-2.4.16/net/Makefile new file mode 100644 index 0000000000..e9a8eba3d7 --- /dev/null +++ b/xen-2.4.16/net/Makefile @@ -0,0 +1,8 @@ + +include $(BASEDIR)/Rules.mk + +default: $(OBJS) + $(LD) -r -o network.o $(OBJS) + +clean: + rm -f *.o *~ core diff --git a/xen-2.4.16/net/dev.c b/xen-2.4.16/net/dev.c new file mode 100644 index 0000000000..160aedc191 --- /dev/null +++ b/xen-2.4.16/net/dev.c @@ -0,0 +1,2007 @@ +/* + * NET3 Protocol independent device support routines. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#define BUG_TRAP ASSERT +#define notifier_call_chain(_a,_b,_c) ((void)0) +#define rtmsg_ifinfo(_a,_b,_c) ((void)0) +#define rtnl_lock() ((void)0) +#define rtnl_unlock() ((void)0) +#define dst_init() ((void)0) + +struct net_device *the_dev = NULL; + +/* + * Device drivers call our routines to queue packets here. We empty the + * queue in the local softnet handler. + */ +struct softnet_data softnet_data[NR_CPUS] __cacheline_aligned; + + +/***************************************************************************************** + + Device Interface Subroutines + +******************************************************************************************/ + +/** + * __dev_get_by_name - find a device by its name + * @name: name to find + * + * Find an interface by name. Must be called under RTNL semaphore + * or @dev_base_lock. If the name is found a pointer to the device + * is returned. If the name is not found then %NULL is returned. The + * reference counters are not incremented so the caller must be + * careful with locks. + */ + + +struct net_device *__dev_get_by_name(const char *name) +{ + struct net_device *dev; + + for (dev = dev_base; dev != NULL; dev = dev->next) { + if (strncmp(dev->name, name, IFNAMSIZ) == 0) + return dev; + } + return NULL; +} + +/** + * dev_get_by_name - find a device by its name + * @name: name to find + * + * Find an interface by name. This can be called from any + * context and does its own locking. The returned handle has + * the usage count incremented and the caller must use dev_put() to + * release it when it is no longer needed. %NULL is returned if no + * matching device is found. + */ + +struct net_device *dev_get_by_name(const char *name) +{ + struct net_device *dev; + + read_lock(&dev_base_lock); + dev = __dev_get_by_name(name); + if (dev) + dev_hold(dev); + read_unlock(&dev_base_lock); + return dev; +} + +/* + Return value is changed to int to prevent illegal usage in future. + It is still legal to use to check for device existance. + + User should understand, that the result returned by this function + is meaningless, if it was not issued under rtnl semaphore. + */ + +/** + * dev_get - test if a device exists + * @name: name to test for + * + * Test if a name exists. Returns true if the name is found. In order + * to be sure the name is not allocated or removed during the test the + * caller must hold the rtnl semaphore. + * + * This function primarily exists for back compatibility with older + * drivers. + */ + +int dev_get(const char *name) +{ + struct net_device *dev; + + read_lock(&dev_base_lock); + dev = __dev_get_by_name(name); + read_unlock(&dev_base_lock); + return dev != NULL; +} + +/** + * __dev_get_by_index - find a device by its ifindex + * @ifindex: index of device + * + * Search for an interface by index. Returns %NULL if the device + * is not found or a pointer to the device. The device has not + * had its reference counter increased so the caller must be careful + * about locking. The caller must hold either the RTNL semaphore + * or @dev_base_lock. + */ + +struct net_device * __dev_get_by_index(int ifindex) +{ + struct net_device *dev; + + for (dev = dev_base; dev != NULL; dev = dev->next) { + if (dev->ifindex == ifindex) + return dev; + } + return NULL; +} + + +/** + * dev_get_by_index - find a device by its ifindex + * @ifindex: index of device + * + * Search for an interface by index. Returns NULL if the device + * is not found or a pointer to the device. The device returned has + * had a reference added and the pointer is safe until the user calls + * dev_put to indicate they have finished with it. + */ + +struct net_device * dev_get_by_index(int ifindex) +{ + struct net_device *dev; + + read_lock(&dev_base_lock); + dev = __dev_get_by_index(ifindex); + if (dev) + dev_hold(dev); + read_unlock(&dev_base_lock); + return dev; +} + +/** + * dev_getbyhwaddr - find a device by its hardware address + * @type: media type of device + * @ha: hardware address + * + * Search for an interface by MAC address. Returns NULL if the device + * is not found or a pointer to the device. The caller must hold the + * rtnl semaphore. The returned device has not had its ref count increased + * and the caller must therefore be careful about locking + * + * BUGS: + * If the API was consistent this would be __dev_get_by_hwaddr + */ + +struct net_device *dev_getbyhwaddr(unsigned short type, char *ha) +{ + struct net_device *dev; + + for (dev = dev_base; dev != NULL; dev = dev->next) { + if (dev->type == type && + memcmp(dev->dev_addr, ha, dev->addr_len) == 0) + return dev; + } + return NULL; +} + +/** + * dev_alloc_name - allocate a name for a device + * @dev: device + * @name: name format string + * + * Passed a format string - eg "lt%d" it will try and find a suitable + * id. Not efficient for many devices, not called a lot. The caller + * must hold the dev_base or rtnl lock while allocating the name and + * adding the device in order to avoid duplicates. Returns the number + * of the unit assigned or a negative errno code. + */ + +int dev_alloc_name(struct net_device *dev, const char *name) +{ + int i; + char buf[32]; + char *p; + + /* + * Verify the string as this thing may have come from + * the user. There must be either one "%d" and no other "%" + * characters, or no "%" characters at all. + */ + p = strchr(name, '%'); + if (p && (p[1] != 'd' || strchr(p+2, '%'))) + return -EINVAL; + + /* + * If you need over 100 please also fix the algorithm... + */ + for (i = 0; i < 100; i++) { + snprintf(buf,sizeof(buf),name,i); + if (__dev_get_by_name(buf) == NULL) { + strcpy(dev->name, buf); + return i; + } + } + return -ENFILE; /* Over 100 of the things .. bail out! */ +} + +/** + * dev_alloc - allocate a network device and name + * @name: name format string + * @err: error return pointer + * + * Passed a format string, eg. "lt%d", it will allocate a network device + * and space for the name. %NULL is returned if no memory is available. + * If the allocation succeeds then the name is assigned and the + * device pointer returned. %NULL is returned if the name allocation + * failed. The cause of an error is returned as a negative errno code + * in the variable @err points to. + * + * The caller must hold the @dev_base or RTNL locks when doing this in + * order to avoid duplicate name allocations. + */ + +struct net_device *dev_alloc(const char *name, int *err) +{ + struct net_device *dev=kmalloc(sizeof(struct net_device), GFP_KERNEL); + if (dev == NULL) { + *err = -ENOBUFS; + return NULL; + } + memset(dev, 0, sizeof(struct net_device)); + *err = dev_alloc_name(dev, name); + if (*err < 0) { + kfree(dev); + return NULL; + } + return dev; +} + +/** + * netdev_state_change - device changes state + * @dev: device to cause notification + * + * Called to indicate a device has changed state. This function calls + * the notifier chains for netdev_chain and sends a NEWLINK message + * to the routing socket. + */ + +void netdev_state_change(struct net_device *dev) +{ + if (dev->flags&IFF_UP) { + notifier_call_chain(&netdev_chain, NETDEV_CHANGE, dev); + rtmsg_ifinfo(RTM_NEWLINK, dev, 0); + } +} + + +#ifdef CONFIG_KMOD + +/** + * dev_load - load a network module + * @name: name of interface + * + * If a network interface is not present and the process has suitable + * privileges this function loads the module. If module loading is not + * available in this kernel then it becomes a nop. + */ + +void dev_load(const char *name) +{ + if (!dev_get(name) && capable(CAP_SYS_MODULE)) + request_module(name); +} + +#else + +extern inline void dev_load(const char *unused){;} + +#endif + +static int default_rebuild_header(struct sk_buff *skb) +{ + printk(KERN_DEBUG "%s: default_rebuild_header called -- BUG!\n", skb->dev ? skb->dev->name : "NULL!!!"); + kfree_skb(skb); + return 1; +} + +/** + * dev_open - prepare an interface for use. + * @dev: device to open + * + * Takes a device from down to up state. The device's private open + * function is invoked and then the multicast lists are loaded. Finally + * the device is moved into the up state and a %NETDEV_UP message is + * sent to the netdev notifier chain. + * + * Calling this function on an active interface is a nop. On a failure + * a negative errno code is returned. + */ + +int dev_open(struct net_device *dev) +{ + int ret = 0; + + /* + * Is it already up? + */ + + if (dev->flags&IFF_UP) + return 0; + + /* + * Is it even present? + */ + if (!netif_device_present(dev)) + return -ENODEV; + + /* + * Call device private open method + */ + if (try_inc_mod_count(dev->owner)) { + if (dev->open) { + ret = dev->open(dev); + if (ret != 0 && dev->owner) + __MOD_DEC_USE_COUNT(dev->owner); + } + } else { + ret = -ENODEV; + } + + /* + * If it went open OK then: + */ + + if (ret == 0) + { + /* + * Set the flags. + */ + dev->flags |= IFF_UP; + + set_bit(__LINK_STATE_START, &dev->state); + + /* + * Initialize multicasting status + */ + dev_mc_upload(dev); + + /* + * Wakeup transmit queue engine + */ + dev_activate(dev); + + /* + * ... and announce new interface. + */ + notifier_call_chain(&netdev_chain, NETDEV_UP, dev); + } + return(ret); +} + + +/** + * dev_close - shutdown an interface. + * @dev: device to shutdown + * + * This function moves an active device into down state. A + * %NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device + * is then deactivated and finally a %NETDEV_DOWN is sent to the notifier + * chain. + */ + +int dev_close(struct net_device *dev) +{ + if (!(dev->flags&IFF_UP)) + return 0; + + /* + * Tell people we are going down, so that they can + * prepare to death, when device is still operating. + */ + notifier_call_chain(&netdev_chain, NETDEV_GOING_DOWN, dev); + + dev_deactivate(dev); + + clear_bit(__LINK_STATE_START, &dev->state); + + /* + * Call the device specific close. This cannot fail. + * Only if device is UP + * + * We allow it to be called even after a DETACH hot-plug + * event. + */ + + if (dev->stop) + dev->stop(dev); + + /* + * Device is now down. + */ + + dev->flags &= ~IFF_UP; + + /* + * Tell people we are down + */ + notifier_call_chain(&netdev_chain, NETDEV_DOWN, dev); + + /* + * Drop the module refcount + */ + if (dev->owner) + __MOD_DEC_USE_COUNT(dev->owner); + + return(0); +} + + +#ifdef CONFIG_HIGHMEM +/* Actually, we should eliminate this check as soon as we know, that: + * 1. IOMMU is present and allows to map all the memory. + * 2. No high memory really exists on this machine. + */ + +static inline int +illegal_highdma(struct net_device *dev, struct sk_buff *skb) +{ + int i; + + if (dev->features&NETIF_F_HIGHDMA) + return 0; + + for (i=0; inr_frags; i++) + if (skb_shinfo(skb)->frags[i].page >= highmem_start_page) + return 1; + + return 0; +} +#else +#define illegal_highdma(dev, skb) (0) +#endif + +/** + * dev_queue_xmit - transmit a buffer + * @skb: buffer to transmit + * + * Queue a buffer for transmission to a network device. The caller must + * have set the device and priority and built the buffer before calling this + * function. The function can be called from an interrupt. + * + * A negative errno code is returned on a failure. A success does not + * guarantee the frame will be transmitted as it may be dropped due + * to congestion or traffic shaping. + */ + +int dev_queue_xmit(struct sk_buff *skb) +{ + struct net_device *dev = skb->dev; + struct Qdisc *q; + + if (skb_shinfo(skb)->frag_list && + !(dev->features&NETIF_F_FRAGLIST) && + skb_linearize(skb, GFP_ATOMIC) != 0) { + kfree_skb(skb); + return -ENOMEM; + } + + /* Fragmented skb is linearized if device does not support SG, + * or if at least one of fragments is in highmem and device + * does not support DMA from it. + */ + if (skb_shinfo(skb)->nr_frags && + (!(dev->features&NETIF_F_SG) || illegal_highdma(dev, skb)) && + skb_linearize(skb, GFP_ATOMIC) != 0) { + kfree_skb(skb); + return -ENOMEM; + } + + /* Grab device queue */ + spin_lock_bh(&dev->queue_lock); + q = dev->qdisc; + if (q->enqueue) { + int ret = q->enqueue(skb, q); + + qdisc_run(dev); + + spin_unlock_bh(&dev->queue_lock); + return ret == NET_XMIT_BYPASS ? NET_XMIT_SUCCESS : ret; + } + + /* The device has no queue. Common case for software devices: + loopback, all the sorts of tunnels... + + Really, it is unlikely that xmit_lock protection is necessary here. + (f.e. loopback and IP tunnels are clean ignoring statistics counters.) + However, it is possible, that they rely on protection + made by us here. + + Check this and shot the lock. It is not prone from deadlocks. + Either shot noqueue qdisc, it is even simpler 8) + */ + if (dev->flags&IFF_UP) { + int cpu = smp_processor_id(); + + if (dev->xmit_lock_owner != cpu) { + spin_unlock(&dev->queue_lock); + spin_lock(&dev->xmit_lock); + dev->xmit_lock_owner = cpu; + + if (!netif_queue_stopped(dev)) { + if (dev->hard_start_xmit(skb, dev) == 0) { + dev->xmit_lock_owner = -1; + spin_unlock_bh(&dev->xmit_lock); + return 0; + } + } + dev->xmit_lock_owner = -1; + spin_unlock_bh(&dev->xmit_lock); + kfree_skb(skb); + return -ENETDOWN; + } else { + /* Recursion is detected! It is possible, unfortunately */ + } + } + spin_unlock_bh(&dev->queue_lock); + + kfree_skb(skb); + return -ENETDOWN; +} + + +/*======================================================================= + Receiver routines + =======================================================================*/ + +int netdev_max_backlog = 300; +/* These numbers are selected based on intuition and some + * experimentatiom, if you have more scientific way of doing this + * please go ahead and fix things. + */ +int no_cong_thresh = 10; +int no_cong = 20; +int lo_cong = 100; +int mod_cong = 290; + +struct netif_rx_stats netdev_rx_stat[NR_CPUS]; + + +#ifdef CONFIG_NET_HW_FLOWCONTROL +atomic_t netdev_dropping = ATOMIC_INIT(0); +static unsigned long netdev_fc_mask = 1; +unsigned long netdev_fc_xoff = 0; +spinlock_t netdev_fc_lock = SPIN_LOCK_UNLOCKED; + +static struct +{ + void (*stimul)(struct net_device *); + struct net_device *dev; +} netdev_fc_slots[BITS_PER_LONG]; + +int netdev_register_fc(struct net_device *dev, void (*stimul)(struct net_device *dev)) +{ + int bit = 0; + unsigned long flags; + + spin_lock_irqsave(&netdev_fc_lock, flags); + if (netdev_fc_mask != ~0UL) { + bit = ffz(netdev_fc_mask); + netdev_fc_slots[bit].stimul = stimul; + netdev_fc_slots[bit].dev = dev; + set_bit(bit, &netdev_fc_mask); + clear_bit(bit, &netdev_fc_xoff); + } + spin_unlock_irqrestore(&netdev_fc_lock, flags); + return bit; +} + +void netdev_unregister_fc(int bit) +{ + unsigned long flags; + + spin_lock_irqsave(&netdev_fc_lock, flags); + if (bit > 0) { + netdev_fc_slots[bit].stimul = NULL; + netdev_fc_slots[bit].dev = NULL; + clear_bit(bit, &netdev_fc_mask); + clear_bit(bit, &netdev_fc_xoff); + } + spin_unlock_irqrestore(&netdev_fc_lock, flags); +} + +static void netdev_wakeup(void) +{ + unsigned long xoff; + + spin_lock(&netdev_fc_lock); + xoff = netdev_fc_xoff; + netdev_fc_xoff = 0; + while (xoff) { + int i = ffz(~xoff); + xoff &= ~(1<> 1)+ (blog >> 1); + + if (avg_blog > mod_cong) { + /* Above moderate congestion levels. */ + softnet_data[cpu].cng_level = NET_RX_CN_HIGH; + } else if (avg_blog > lo_cong) { + softnet_data[cpu].cng_level = NET_RX_CN_MOD; + } else if (avg_blog > no_cong) + softnet_data[cpu].cng_level = NET_RX_CN_LOW; + else /* no congestion */ + softnet_data[cpu].cng_level = NET_RX_SUCCESS; + + softnet_data[cpu].avg_blog = avg_blog; +} + + +/** + * netif_rx - post buffer to the network code + * @skb: buffer to post + * + * This function receives a packet from a device driver and queues it for + * the upper (protocol) levels to process. It always succeeds. The buffer + * may be dropped during processing for congestion control or by the + * protocol layers. + * + * return values: + * NET_RX_SUCCESS (no congestion) + * NET_RX_CN_LOW (low congestion) + * NET_RX_CN_MOD (moderate congestion) + * NET_RX_CN_HIGH (high congestion) + * NET_RX_DROP (packet was dropped) + * + * + */ + +int netif_rx(struct sk_buff *skb) +{ +#ifdef CONFIG_SMP + unsigned long cpu_mask; +#endif + struct task_struct *p; + unsigned int dest_dom; + int this_cpu = smp_processor_id(); + struct softnet_data *queue; + unsigned long flags; + + if (skb->stamp.tv_sec == 0) + get_fast_time(&skb->stamp); + + /* The code is rearranged so that the path is the most + short when CPU is congested, but is still operating. + */ + queue = &softnet_data[this_cpu]; + + local_irq_save(flags); + + netdev_rx_stat[this_cpu].total++; + + skb->h.raw = skb->nh.raw = skb->data; + + if ( skb->len < 2 ) goto drop; + switch ( ntohs(skb->mac.ethernet->h_proto) ) + { + case ETH_P_ARP: + if ( skb->len < 28 ) goto drop; + dest_dom = ntohl(*(unsigned long *) + (skb->nh.raw + 24)); + break; + case ETH_P_IP: + if ( skb->len < 20 ) goto drop; + dest_dom = ntohl(*(unsigned long *) + (skb->nh.raw + 16)); + break; + default: + goto drop; + } + + if ( (dest_dom < opt_ipbase) || + (dest_dom > (opt_ipbase + 16)) ) + goto drop; + + dest_dom -= opt_ipbase; + + read_lock(&tasklist_lock); + p = &idle0_task; + do { + if ( p->domain != dest_dom ) continue; + skb_queue_tail(&p->net_vif_list[0]->skb_list, skb); // vfr will fix. + cpu_mask = mark_hyp_event(p, _HYP_EVENT_NET_RX); + read_unlock(&tasklist_lock); + goto found; + } + while ( (p = p->next_task) != &idle0_task ); + read_unlock(&tasklist_lock); + goto drop; + + found: +#if 0 + __skb_queue_tail(&queue->input_pkt_queue,skb); + /* Runs from irqs or BH's, no need to wake BH */ + cpu_raise_softirq(this_cpu, NET_RX_SOFTIRQ); + local_irq_restore(flags); + get_sample_stats(this_cpu); + return softnet_data[this_cpu].cng_level; +#else + hyp_event_notify(cpu_mask); + local_irq_restore(flags); + return 0; +#endif + +drop: + netdev_rx_stat[this_cpu].dropped++; + local_irq_restore(flags); + + kfree_skb(skb); + return NET_RX_DROP; +} + +/* Deliver skb to an old protocol, which is not threaded well + or which do not understand shared skbs. + */ +static int deliver_to_old_ones(struct packet_type *pt, struct sk_buff *skb, int last) +{ + static spinlock_t net_bh_lock = SPIN_LOCK_UNLOCKED; + int ret = NET_RX_DROP; + + + if (!last) { + skb = skb_clone(skb, GFP_ATOMIC); + if (skb == NULL) + return ret; + } + if (skb_is_nonlinear(skb) && skb_linearize(skb, GFP_ATOMIC) != 0) { + kfree_skb(skb); + return ret; + } + + /* The assumption (correct one) is that old protocols + did not depened on BHs different of NET_BH and TIMER_BH. + */ + + /* Emulate NET_BH with special spinlock */ + spin_lock(&net_bh_lock); + + /* Disable timers and wait for all timers completion */ + tasklet_disable(bh_task_vec+TIMER_BH); + + ret = pt->func(skb, skb->dev, pt); + + tasklet_hi_enable(bh_task_vec+TIMER_BH); + spin_unlock(&net_bh_lock); + return ret; +} + +static void net_tx_action(struct softirq_action *h) +{ + int cpu = smp_processor_id(); + + if (softnet_data[cpu].completion_queue) { + struct sk_buff *clist; + + local_irq_disable(); + clist = softnet_data[cpu].completion_queue; + softnet_data[cpu].completion_queue = NULL; + local_irq_enable(); + + while (clist != NULL) { + struct sk_buff *skb = clist; + clist = clist->next; + + BUG_TRAP(atomic_read(&skb->users) == 0); + __kfree_skb(skb); + } + } + + if (softnet_data[cpu].output_queue) { + struct net_device *head; + + local_irq_disable(); + head = softnet_data[cpu].output_queue; + softnet_data[cpu].output_queue = NULL; + local_irq_enable(); + + while (head != NULL) { + struct net_device *dev = head; + head = head->next_sched; + + smp_mb__before_clear_bit(); + clear_bit(__LINK_STATE_SCHED, &dev->state); + + if (spin_trylock(&dev->queue_lock)) { + qdisc_run(dev); + spin_unlock(&dev->queue_lock); + } else { + netif_schedule(dev); + } + } + } +} + + +#if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE) +void (*br_handle_frame_hook)(struct sk_buff *skb) = NULL; +#endif + +static __inline__ int handle_bridge(struct sk_buff *skb, + struct packet_type *pt_prev) +{ + int ret = NET_RX_DROP; + + if (pt_prev) { + if (!pt_prev->data) + ret = deliver_to_old_ones(pt_prev, skb, 0); + else { + atomic_inc(&skb->users); + ret = pt_prev->func(skb, skb->dev, pt_prev); + } + } + +#if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE) + br_handle_frame_hook(skb); +#endif + return ret; +} + + +#ifdef CONFIG_NET_DIVERT +static inline void handle_diverter(struct sk_buff *skb) +{ + /* if diversion is supported on device, then divert */ + if (skb->dev->divert && skb->dev->divert->divert) + divert_frame(skb); +} +#endif /* CONFIG_NET_DIVERT */ + + +void flush_rx_queue(void) +{ + struct sk_buff *skb; + shared_info_t *s = current->shared_info; + net_ring_t *net_ring; + unsigned int i, nvif; + rx_entry_t rx; + + clear_bit(_HYP_EVENT_NET_RX, ¤t->hyp_events); + + for (nvif = 0; nvif < current->num_net_vifs; nvif++) + { + net_ring = current->net_vif_list[nvif]->net_ring; + while ( (skb = skb_dequeue(¤t->net_vif_list[nvif]->skb_list)) + != NULL ) + { + /* + * Write the virtual MAC address into the destination field + * of the ethernet packet. Furthermore, do the same for ARP + * reply packets. This is easy because the virtual MAC address + * is always 00-00-00-00-00-00. + */ + memset(skb->mac.ethernet->h_dest, 0, ETH_ALEN); + if ( ntohs(skb->mac.ethernet->h_proto) == ETH_P_ARP ) + memset(skb->nh.raw + 18, 0, ETH_ALEN); + + i = net_ring->rx_cons; + if ( i != net_ring->rx_prod ) + { + if ( !copy_from_user(&rx, net_ring->rx_ring+i, sizeof(rx)) ) + { + if ( (skb->len + ETH_HLEN) < rx.size ) + rx.size = skb->len + ETH_HLEN; + copy_to_user((void *)rx.addr, skb->mac.raw, rx.size); + copy_to_user(net_ring->rx_ring+i, &rx, sizeof(rx)); + } + net_ring->rx_cons = (i+1) & (RX_RING_SIZE-1); + if ( net_ring->rx_cons == net_ring->rx_event ) + set_bit(_EVENT_NET_RX, &s->events); + } + kfree_skb(skb); + } + } +} + + +/* + * Map an interface index to its name (SIOCGIFNAME) + */ + +/* + * We need this ioctl for efficient implementation of the + * if_indextoname() function required by the IPv6 API. Without + * it, we would have to search all the interfaces to find a + * match. --pb + */ + +static int dev_ifname(struct ifreq *arg) +{ + struct net_device *dev; + struct ifreq ifr; + + /* + * Fetch the caller's info block. + */ + + if (copy_from_user(&ifr, arg, sizeof(struct ifreq))) + return -EFAULT; + + read_lock(&dev_base_lock); + dev = __dev_get_by_index(ifr.ifr_ifindex); + if (!dev) { + read_unlock(&dev_base_lock); + return -ENODEV; + } + + strcpy(ifr.ifr_name, dev->name); + read_unlock(&dev_base_lock); + + if (copy_to_user(arg, &ifr, sizeof(struct ifreq))) + return -EFAULT; + return 0; +} + + +/** + * netdev_set_master - set up master/slave pair + * @slave: slave device + * @master: new master device + * + * Changes the master device of the slave. Pass %NULL to break the + * bonding. The caller must hold the RTNL semaphore. On a failure + * a negative errno code is returned. On success the reference counts + * are adjusted, %RTM_NEWLINK is sent to the routing socket and the + * function returns zero. + */ + +int netdev_set_master(struct net_device *slave, struct net_device *master) +{ + struct net_device *old = slave->master; + + if (master) { + if (old) + return -EBUSY; + dev_hold(master); + } + + br_write_lock_bh(BR_NETPROTO_LOCK); + slave->master = master; + br_write_unlock_bh(BR_NETPROTO_LOCK); + + if (old) + dev_put(old); + + if (master) + slave->flags |= IFF_SLAVE; + else + slave->flags &= ~IFF_SLAVE; + + rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE); + return 0; +} + +/** + * dev_set_promiscuity - update promiscuity count on a device + * @dev: device + * @inc: modifier + * + * Add or remove promsicuity from a device. While the count in the device + * remains above zero the interface remains promiscuous. Once it hits zero + * the device reverts back to normal filtering operation. A negative inc + * value is used to drop promiscuity on the device. + */ + +void dev_set_promiscuity(struct net_device *dev, int inc) +{ + unsigned short old_flags = dev->flags; + + dev->flags |= IFF_PROMISC; + if ((dev->promiscuity += inc) == 0) + dev->flags &= ~IFF_PROMISC; + if (dev->flags^old_flags) { +#ifdef CONFIG_NET_FASTROUTE + if (dev->flags&IFF_PROMISC) { + netdev_fastroute_obstacles++; + dev_clear_fastroute(dev); + } else + netdev_fastroute_obstacles--; +#endif + dev_mc_upload(dev); + printk(KERN_INFO "device %s %s promiscuous mode\n", + dev->name, (dev->flags&IFF_PROMISC) ? "entered" : "left"); + } +} + +/** + * dev_set_allmulti - update allmulti count on a device + * @dev: device + * @inc: modifier + * + * Add or remove reception of all multicast frames to a device. While the + * count in the device remains above zero the interface remains listening + * to all interfaces. Once it hits zero the device reverts back to normal + * filtering operation. A negative @inc value is used to drop the counter + * when releasing a resource needing all multicasts. + */ + +void dev_set_allmulti(struct net_device *dev, int inc) +{ + unsigned short old_flags = dev->flags; + + dev->flags |= IFF_ALLMULTI; + if ((dev->allmulti += inc) == 0) + dev->flags &= ~IFF_ALLMULTI; + if (dev->flags^old_flags) + dev_mc_upload(dev); +} + +int dev_change_flags(struct net_device *dev, unsigned flags) +{ + int ret; + int old_flags = dev->flags; + + /* + * Set the flags on our device. + */ + + dev->flags = (flags & (IFF_DEBUG|IFF_NOTRAILERS|IFF_NOARP|IFF_DYNAMIC| + IFF_MULTICAST|IFF_PORTSEL|IFF_AUTOMEDIA)) | + (dev->flags & (IFF_UP|IFF_VOLATILE|IFF_PROMISC|IFF_ALLMULTI)); + + /* + * Load in the correct multicast list now the flags have changed. + */ + + dev_mc_upload(dev); + + /* + * Have we downed the interface. We handle IFF_UP ourselves + * according to user attempts to set it, rather than blindly + * setting it. + */ + + ret = 0; + if ((old_flags^flags)&IFF_UP) /* Bit is different ? */ + { + ret = ((old_flags & IFF_UP) ? dev_close : dev_open)(dev); + + if (ret == 0) + dev_mc_upload(dev); + } + + if (dev->flags&IFF_UP && + ((old_flags^dev->flags)&~(IFF_UP|IFF_PROMISC|IFF_ALLMULTI|IFF_VOLATILE))) + notifier_call_chain(&netdev_chain, NETDEV_CHANGE, dev); + + if ((flags^dev->gflags)&IFF_PROMISC) { + int inc = (flags&IFF_PROMISC) ? +1 : -1; + dev->gflags ^= IFF_PROMISC; + dev_set_promiscuity(dev, inc); + } + + /* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI + is important. Some (broken) drivers set IFF_PROMISC, when + IFF_ALLMULTI is requested not asking us and not reporting. + */ + if ((flags^dev->gflags)&IFF_ALLMULTI) { + int inc = (flags&IFF_ALLMULTI) ? +1 : -1; + dev->gflags ^= IFF_ALLMULTI; + dev_set_allmulti(dev, inc); + } + + if (old_flags^dev->flags) + rtmsg_ifinfo(RTM_NEWLINK, dev, old_flags^dev->flags); + + return ret; +} + +/* + * Perform the SIOCxIFxxx calls. + */ + +static int dev_ifsioc(struct ifreq *ifr, unsigned int cmd) +{ + struct net_device *dev; + int err; + + if ((dev = __dev_get_by_name(ifr->ifr_name)) == NULL) + return -ENODEV; + + switch(cmd) + { + case SIOCGIFFLAGS: /* Get interface flags */ + ifr->ifr_flags = (dev->flags&~(IFF_PROMISC|IFF_ALLMULTI|IFF_RUNNING)) + |(dev->gflags&(IFF_PROMISC|IFF_ALLMULTI)); + if (netif_running(dev) && netif_carrier_ok(dev)) + ifr->ifr_flags |= IFF_RUNNING; + return 0; + + case SIOCSIFFLAGS: /* Set interface flags */ + return dev_change_flags(dev, ifr->ifr_flags); + + case SIOCGIFMETRIC: /* Get the metric on the interface (currently unused) */ + ifr->ifr_metric = 0; + return 0; + + case SIOCSIFMETRIC: /* Set the metric on the interface (currently unused) */ + return -EOPNOTSUPP; + + case SIOCGIFMTU: /* Get the MTU of a device */ + ifr->ifr_mtu = dev->mtu; + return 0; + + case SIOCSIFMTU: /* Set the MTU of a device */ + if (ifr->ifr_mtu == dev->mtu) + return 0; + + /* + * MTU must be positive. + */ + + if (ifr->ifr_mtu<0) + return -EINVAL; + + if (!netif_device_present(dev)) + return -ENODEV; + + if (dev->change_mtu) + err = dev->change_mtu(dev, ifr->ifr_mtu); + else { + dev->mtu = ifr->ifr_mtu; + err = 0; + } + if (!err && dev->flags&IFF_UP) + notifier_call_chain(&netdev_chain, NETDEV_CHANGEMTU, dev); + return err; + + case SIOCGIFHWADDR: + memcpy(ifr->ifr_hwaddr.sa_data,dev->dev_addr, MAX_ADDR_LEN); + ifr->ifr_hwaddr.sa_family=dev->type; + return 0; + + case SIOCSIFHWADDR: + if (dev->set_mac_address == NULL) + return -EOPNOTSUPP; + if (ifr->ifr_hwaddr.sa_family!=dev->type) + return -EINVAL; + if (!netif_device_present(dev)) + return -ENODEV; + err = dev->set_mac_address(dev, &ifr->ifr_hwaddr); + if (!err) + notifier_call_chain(&netdev_chain, NETDEV_CHANGEADDR, dev); + return err; + + case SIOCSIFHWBROADCAST: + if (ifr->ifr_hwaddr.sa_family!=dev->type) + return -EINVAL; + memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data, MAX_ADDR_LEN); + notifier_call_chain(&netdev_chain, NETDEV_CHANGEADDR, dev); + return 0; + + case SIOCGIFMAP: + ifr->ifr_map.mem_start=dev->mem_start; + ifr->ifr_map.mem_end=dev->mem_end; + ifr->ifr_map.base_addr=dev->base_addr; + ifr->ifr_map.irq=dev->irq; + ifr->ifr_map.dma=dev->dma; + ifr->ifr_map.port=dev->if_port; + return 0; + + case SIOCSIFMAP: + if (dev->set_config) { + if (!netif_device_present(dev)) + return -ENODEV; + return dev->set_config(dev,&ifr->ifr_map); + } + return -EOPNOTSUPP; + + case SIOCADDMULTI: + if (dev->set_multicast_list == NULL || + ifr->ifr_hwaddr.sa_family != AF_UNSPEC) + return -EINVAL; + if (!netif_device_present(dev)) + return -ENODEV; + dev_mc_add(dev,ifr->ifr_hwaddr.sa_data, dev->addr_len, 1); + return 0; + + case SIOCDELMULTI: + if (dev->set_multicast_list == NULL || + ifr->ifr_hwaddr.sa_family!=AF_UNSPEC) + return -EINVAL; + if (!netif_device_present(dev)) + return -ENODEV; + dev_mc_delete(dev,ifr->ifr_hwaddr.sa_data,dev->addr_len, 1); + return 0; + + case SIOCGIFINDEX: + ifr->ifr_ifindex = dev->ifindex; + return 0; + + case SIOCGIFTXQLEN: + ifr->ifr_qlen = dev->tx_queue_len; + return 0; + + case SIOCSIFTXQLEN: + if (ifr->ifr_qlen<0) + return -EINVAL; + dev->tx_queue_len = ifr->ifr_qlen; + return 0; + + case SIOCSIFNAME: + if (dev->flags&IFF_UP) + return -EBUSY; + if (__dev_get_by_name(ifr->ifr_newname)) + return -EEXIST; + memcpy(dev->name, ifr->ifr_newname, IFNAMSIZ); + dev->name[IFNAMSIZ-1] = 0; + notifier_call_chain(&netdev_chain, NETDEV_CHANGENAME, dev); + return 0; + +#ifdef WIRELESS_EXT + case SIOCGIWSTATS: + return dev_iwstats(dev, ifr); +#endif /* WIRELESS_EXT */ + + /* + * Unknown or private ioctl + */ + + default: + if ((cmd >= SIOCDEVPRIVATE && + cmd <= SIOCDEVPRIVATE + 15) || + cmd == SIOCBONDENSLAVE || + cmd == SIOCBONDRELEASE || + cmd == SIOCBONDSETHWADDR || + cmd == SIOCBONDSLAVEINFOQUERY || + cmd == SIOCBONDINFOQUERY || + cmd == SIOCBONDCHANGEACTIVE || + cmd == SIOCETHTOOL || + cmd == SIOCGMIIPHY || + cmd == SIOCGMIIREG || + cmd == SIOCSMIIREG) { + if (dev->do_ioctl) { + if (!netif_device_present(dev)) + return -ENODEV; + return dev->do_ioctl(dev, ifr, cmd); + } + return -EOPNOTSUPP; + } + +#ifdef WIRELESS_EXT + if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) { + if (dev->do_ioctl) { + if (!netif_device_present(dev)) + return -ENODEV; + return dev->do_ioctl(dev, ifr, cmd); + } + return -EOPNOTSUPP; + } +#endif /* WIRELESS_EXT */ + + } + return -EINVAL; +} + +/* + * This function handles all "interface"-type I/O control requests. The actual + * 'doing' part of this is dev_ifsioc above. + */ + +/** + * dev_ioctl - network device ioctl + * @cmd: command to issue + * @arg: pointer to a struct ifreq in user space + * + * Issue ioctl functions to devices. This is normally called by the + * user space syscall interfaces but can sometimes be useful for + * other purposes. The return value is the return from the syscall if + * positive or a negative errno code on error. + */ + +int dev_ioctl(unsigned int cmd, void *arg) +{ + struct ifreq ifr; + int ret; + char *colon; + + /* One special case: SIOCGIFCONF takes ifconf argument + and requires shared lock, because it sleeps writing + to user space. + */ + + if (cmd == SIOCGIFCONF) { + return -ENOSYS; + } + if (cmd == SIOCGIFNAME) { + return dev_ifname((struct ifreq *)arg); + } + + if (copy_from_user(&ifr, arg, sizeof(struct ifreq))) + return -EFAULT; + + ifr.ifr_name[IFNAMSIZ-1] = 0; + + colon = strchr(ifr.ifr_name, ':'); + if (colon) + *colon = 0; + + /* + * See which interface the caller is talking about. + */ + + switch(cmd) + { + /* + * These ioctl calls: + * - can be done by all. + * - atomic and do not require locking. + * - return a value + */ + + case SIOCGIFFLAGS: + case SIOCGIFMETRIC: + case SIOCGIFMTU: + case SIOCGIFHWADDR: + case SIOCGIFSLAVE: + case SIOCGIFMAP: + case SIOCGIFINDEX: + case SIOCGIFTXQLEN: + dev_load(ifr.ifr_name); + read_lock(&dev_base_lock); + ret = dev_ifsioc(&ifr, cmd); + read_unlock(&dev_base_lock); + if (!ret) { + if (colon) + *colon = ':'; + if (copy_to_user(arg, &ifr, sizeof(struct ifreq))) + return -EFAULT; + } + return ret; + + /* + * These ioctl calls: + * - require superuser power. + * - require strict serialization. + * - return a value + */ + + case SIOCETHTOOL: + case SIOCGMIIPHY: + case SIOCGMIIREG: + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + dev_load(ifr.ifr_name); + dev_probe_lock(); + rtnl_lock(); + ret = dev_ifsioc(&ifr, cmd); + rtnl_unlock(); + dev_probe_unlock(); + if (!ret) { + if (colon) + *colon = ':'; + if (copy_to_user(arg, &ifr, sizeof(struct ifreq))) + return -EFAULT; + } + return ret; + + /* + * These ioctl calls: + * - require superuser power. + * - require strict serialization. + * - do not return a value + */ + + case SIOCSIFFLAGS: + case SIOCSIFMETRIC: + case SIOCSIFMTU: + case SIOCSIFMAP: + case SIOCSIFHWADDR: + case SIOCSIFSLAVE: + case SIOCADDMULTI: + case SIOCDELMULTI: + case SIOCSIFHWBROADCAST: + case SIOCSIFTXQLEN: + case SIOCSIFNAME: + case SIOCSMIIREG: + case SIOCBONDENSLAVE: + case SIOCBONDRELEASE: + case SIOCBONDSETHWADDR: + case SIOCBONDSLAVEINFOQUERY: + case SIOCBONDINFOQUERY: + case SIOCBONDCHANGEACTIVE: + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + dev_load(ifr.ifr_name); + dev_probe_lock(); + rtnl_lock(); + ret = dev_ifsioc(&ifr, cmd); + rtnl_unlock(); + dev_probe_unlock(); + return ret; + + case SIOCGIFMEM: + /* Get the per device memory space. We can add this but currently + do not support it */ + case SIOCSIFMEM: + /* Set the per device memory buffer space. Not applicable in our case */ + case SIOCSIFLINK: + return -EINVAL; + + /* + * Unknown or private ioctl. + */ + + default: + if (cmd >= SIOCDEVPRIVATE && + cmd <= SIOCDEVPRIVATE + 15) { + dev_load(ifr.ifr_name); + dev_probe_lock(); + rtnl_lock(); + ret = dev_ifsioc(&ifr, cmd); + rtnl_unlock(); + dev_probe_unlock(); + if (!ret && copy_to_user(arg, &ifr, sizeof(struct ifreq))) + return -EFAULT; + return ret; + } +#ifdef WIRELESS_EXT + /* Take care of Wireless Extensions */ + if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) { + /* If command is `set a parameter', or + * `get the encoding parameters', check if + * the user has the right to do it */ + if (IW_IS_SET(cmd) || (cmd == SIOCGIWENCODE)) { + if(!capable(CAP_NET_ADMIN)) + return -EPERM; + } + dev_load(ifr.ifr_name); + rtnl_lock(); + ret = dev_ifsioc(&ifr, cmd); + rtnl_unlock(); + if (!ret && IW_IS_GET(cmd) && + copy_to_user(arg, &ifr, sizeof(struct ifreq))) + return -EFAULT; + return ret; + } +#endif /* WIRELESS_EXT */ + return -EINVAL; + } +} + + +/** + * dev_new_index - allocate an ifindex + * + * Returns a suitable unique value for a new device interface + * number. The caller must hold the rtnl semaphore or the + * dev_base_lock to be sure it remains unique. + */ + +int dev_new_index(void) +{ + static int ifindex; + for (;;) { + if (++ifindex <= 0) + ifindex=1; + if (__dev_get_by_index(ifindex) == NULL) + return ifindex; + } +} + +static int dev_boot_phase = 1; + +/** + * register_netdevice - register a network device + * @dev: device to register + * + * Take a completed network device structure and add it to the kernel + * interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier + * chain. 0 is returned on success. A negative errno code is returned + * on a failure to set up the device, or if the name is a duplicate. + * + * Callers must hold the rtnl semaphore. See the comment at the + * end of Space.c for details about the locking. You may want + * register_netdev() instead of this. + * + * BUGS: + * The locking appears insufficient to guarantee two parallel registers + * will not get the same name. + */ + +int net_dev_init(void); + +int register_netdevice(struct net_device *dev) +{ + struct net_device *d, **dp; +#ifdef CONFIG_NET_DIVERT + int ret; +#endif + + spin_lock_init(&dev->queue_lock); + spin_lock_init(&dev->xmit_lock); + dev->xmit_lock_owner = -1; +#ifdef CONFIG_NET_FASTROUTE + dev->fastpath_lock=RW_LOCK_UNLOCKED; +#endif + + if (dev_boot_phase) + net_dev_init(); + +#ifdef CONFIG_NET_DIVERT + ret = alloc_divert_blk(dev); + if (ret) + return ret; +#endif /* CONFIG_NET_DIVERT */ + + dev->iflink = -1; + + /* Init, if this function is available */ + if (dev->init && dev->init(dev) != 0) { +#ifdef CONFIG_NET_DIVERT + free_divert_blk(dev); +#endif + return -EIO; + } + + dev->ifindex = dev_new_index(); + if (dev->iflink == -1) + dev->iflink = dev->ifindex; + + /* Check for existence, and append to tail of chain */ + for (dp=&dev_base; (d=*dp) != NULL; dp=&d->next) { + if (d == dev || strcmp(d->name, dev->name) == 0) { +#ifdef CONFIG_NET_DIVERT + free_divert_blk(dev); +#endif + return -EEXIST; + } + } + /* + * nil rebuild_header routine, + * that should be never called and used as just bug trap. + */ + + if (dev->rebuild_header == NULL) + dev->rebuild_header = default_rebuild_header; + + /* + * Default initial state at registry is that the + * device is present. + */ + + set_bit(__LINK_STATE_PRESENT, &dev->state); + + dev->next = NULL; + dev_init_scheduler(dev); + write_lock_bh(&dev_base_lock); + *dp = dev; + dev_hold(dev); + dev->deadbeaf = 0; + write_unlock_bh(&dev_base_lock); + + /* Notify protocols, that a new device appeared. */ + notifier_call_chain(&netdev_chain, NETDEV_REGISTER, dev); + + return 0; +} + +/** + * netdev_finish_unregister - complete unregistration + * @dev: device + * + * Destroy and free a dead device. A value of zero is returned on + * success. + */ + +int netdev_finish_unregister(struct net_device *dev) +{ + BUG_TRAP(dev->ip_ptr==NULL); + BUG_TRAP(dev->ip6_ptr==NULL); + BUG_TRAP(dev->dn_ptr==NULL); + + if (!dev->deadbeaf) { + printk(KERN_ERR "Freeing alive device %p, %s\n", dev, dev->name); + return 0; + } +#ifdef NET_REFCNT_DEBUG + printk(KERN_DEBUG "netdev_finish_unregister: %s%s.\n", dev->name, + (dev->features & NETIF_F_DYNALLOC)?"":", old style"); +#endif + if (dev->destructor) + dev->destructor(dev); + if (dev->features & NETIF_F_DYNALLOC) + kfree(dev); + return 0; +} + +/** + * unregister_netdevice - remove device from the kernel + * @dev: device + * + * This function shuts down a device interface and removes it + * from the kernel tables. On success 0 is returned, on a failure + * a negative errno code is returned. + * + * Callers must hold the rtnl semaphore. See the comment at the + * end of Space.c for details about the locking. You may want + * unregister_netdev() instead of this. + */ + +int unregister_netdevice(struct net_device *dev) +{ + unsigned long now, warning_time; + struct net_device *d, **dp; + + /* If device is running, close it first. */ + if (dev->flags & IFF_UP) + dev_close(dev); + + BUG_TRAP(dev->deadbeaf==0); + dev->deadbeaf = 1; + + /* And unlink it from device chain. */ + for (dp = &dev_base; (d=*dp) != NULL; dp=&d->next) { + if (d == dev) { + write_lock_bh(&dev_base_lock); + *dp = d->next; + write_unlock_bh(&dev_base_lock); + break; + } + } + if (d == NULL) { + printk(KERN_DEBUG "unregister_netdevice: device %s/%p never was registered\n", dev->name, dev); + return -ENODEV; + } + + /* Synchronize to net_rx_action. */ + br_write_lock_bh(BR_NETPROTO_LOCK); + br_write_unlock_bh(BR_NETPROTO_LOCK); + + if (dev_boot_phase == 0) { + + /* Shutdown queueing discipline. */ + dev_shutdown(dev); + + /* Notify protocols, that we are about to destroy + this device. They should clean all the things. + */ + notifier_call_chain(&netdev_chain, NETDEV_UNREGISTER, dev); + + /* + * Flush the multicast chain + */ + dev_mc_discard(dev); + } + + if (dev->uninit) + dev->uninit(dev); + + /* Notifier chain MUST detach us from master device. */ + BUG_TRAP(dev->master==NULL); + +#ifdef CONFIG_NET_DIVERT + free_divert_blk(dev); +#endif + + if (dev->features & NETIF_F_DYNALLOC) { +#ifdef NET_REFCNT_DEBUG + if (atomic_read(&dev->refcnt) != 1) + printk(KERN_DEBUG "unregister_netdevice: holding %s refcnt=%d\n", dev->name, atomic_read(&dev->refcnt)-1); +#endif + dev_put(dev); + return 0; + } + + /* Last reference is our one */ + if (atomic_read(&dev->refcnt) == 1) { + dev_put(dev); + return 0; + } + +#ifdef NET_REFCNT_DEBUG + printk("unregister_netdevice: waiting %s refcnt=%d\n", dev->name, atomic_read(&dev->refcnt)); +#endif + + /* EXPLANATION. If dev->refcnt is not now 1 (our own reference) + it means that someone in the kernel still has a reference + to this device and we cannot release it. + + "New style" devices have destructors, hence we can return from this + function and destructor will do all the work later. As of kernel 2.4.0 + there are very few "New Style" devices. + + "Old style" devices expect that the device is free of any references + upon exit from this function. + We cannot return from this function until all such references have + fallen away. This is because the caller of this function will probably + immediately kfree(*dev) and then be unloaded via sys_delete_module. + + So, we linger until all references fall away. The duration of the + linger is basically unbounded! It is driven by, for example, the + current setting of sysctl_ipfrag_time. + + After 1 second, we start to rebroadcast unregister notifications + in hope that careless clients will release the device. + + */ + + now = warning_time = jiffies; + while (atomic_read(&dev->refcnt) != 1) { + if ((jiffies - now) > 1*HZ) { + /* Rebroadcast unregister notification */ + notifier_call_chain(&netdev_chain, NETDEV_UNREGISTER, dev); + } + mdelay(250); + if ((jiffies - warning_time) > 10*HZ) { + printk(KERN_EMERG "unregister_netdevice: waiting for %s to " + "become free. Usage count = %d\n", + dev->name, atomic_read(&dev->refcnt)); + warning_time = jiffies; + } + } + dev_put(dev); + return 0; +} + + +/* + * Initialize the DEV module. At boot time this walks the device list and + * unhooks any devices that fail to initialise (normally hardware not + * present) and leaves us with a valid list of present and active devices. + * + */ + +extern void net_device_init(void); +extern void ip_auto_config(void); +#ifdef CONFIG_NET_DIVERT +extern void dv_init(void); +#endif /* CONFIG_NET_DIVERT */ + + +/* + * Callers must hold the rtnl semaphore. See the comment at the + * end of Space.c for details about the locking. + */ +int __init net_dev_init(void) +{ + struct net_device *dev, **dp; + int i; + + if (!dev_boot_phase) + return 0; + + /* + * KAF: was sone in socket_init, but that top-half stuff is gone. + */ + skb_init(); + + /* + * Initialise the packet receive queues. + */ + + for (i = 0; i < NR_CPUS; i++) { + struct softnet_data *queue; + + queue = &softnet_data[i]; + skb_queue_head_init(&queue->input_pkt_queue); + queue->throttle = 0; + queue->cng_level = 0; + queue->avg_blog = 10; /* arbitrary non-zero */ + queue->completion_queue = NULL; + } + + /* + * Add the devices. + * If the call to dev->init fails, the dev is removed + * from the chain disconnecting the device until the + * next reboot. + * + * NB At boot phase networking is dead. No locking is required. + * But we still preserve dev_base_lock for sanity. + */ + + dp = &dev_base; + while ((dev = *dp) != NULL) { + spin_lock_init(&dev->queue_lock); + spin_lock_init(&dev->xmit_lock); + + dev->xmit_lock_owner = -1; + dev->iflink = -1; + dev_hold(dev); + + /* + * Allocate name. If the init() fails + * the name will be reissued correctly. + */ + if (strchr(dev->name, '%')) + dev_alloc_name(dev, dev->name); + + if (dev->init && dev->init(dev)) { + /* + * It failed to come up. It will be unhooked later. + * dev_alloc_name can now advance to next suitable + * name that is checked next. + */ + dev->deadbeaf = 1; + dp = &dev->next; + } else { + dp = &dev->next; + dev->ifindex = dev_new_index(); + if (dev->iflink == -1) + dev->iflink = dev->ifindex; + if (dev->rebuild_header == NULL) + dev->rebuild_header = default_rebuild_header; + dev_init_scheduler(dev); + set_bit(__LINK_STATE_PRESENT, &dev->state); + } + } + + /* + * Unhook devices that failed to come up + */ + dp = &dev_base; + while ((dev = *dp) != NULL) { + if (dev->deadbeaf) { + write_lock_bh(&dev_base_lock); + *dp = dev->next; + write_unlock_bh(&dev_base_lock); + dev_put(dev); + } else { + dp = &dev->next; + } + } + + dev_boot_phase = 0; + + open_softirq(NET_TX_SOFTIRQ, net_tx_action, NULL); + //open_softirq(NET_RX_SOFTIRQ, net_rx_action, NULL); + + dst_init(); + dev_mcast_init(); + +#ifdef CONFIG_NET_SCHED + pktsched_init(); +#endif + + /* + * Initialise network devices + */ + + net_device_init(); + + return 0; +} + + +/* + * do_net_update: + * + * Called from guest OS to notify updates to its transmit and/or receive + * descriptor rings. + */ +#define TX_RING_INC(_i) (((_i)+1) & (TX_RING_SIZE-1)) +#define RX_RING_INC(_i) (((_i)+1) & (RX_RING_SIZE-1)) +#define TX_RING_ADD(_i,_j) (((_i)+(_j)) & (TX_RING_SIZE-1)) +#define RX_RING_ADD(_i,_j) (((_i)+(_j)) & (RX_RING_SIZE-1)) +long do_net_update(void) +{ + shared_info_t *shared = current->shared_info; + net_ring_t *net_ring = current->net_ring_base; + unsigned int i; + struct sk_buff *skb; + tx_entry_t tx; + + for ( i = net_ring->tx_cons; i != net_ring->tx_prod; i = TX_RING_INC(i) ) + { + if ( copy_from_user(&tx, net_ring->tx_ring+i, sizeof(tx)) ) + continue; + + if ( TX_RING_INC(i) == net_ring->tx_event ) + set_bit(_EVENT_NET_TX, &shared->events); + + skb = alloc_skb(tx.size, GFP_KERNEL); + if ( skb == NULL ) continue; + skb_put(skb, tx.size); + if ( copy_from_user(skb->data, (void *)tx.addr, tx.size) ) + { + kfree_skb(skb); + continue; + } + skb->dev = the_dev; + + if ( skb->len < 16 ) + { + kfree_skb(skb); + continue; + } + + memcpy(skb->data + ETH_ALEN, skb->dev->dev_addr, ETH_ALEN); + + switch ( ntohs(*(unsigned short *)(skb->data + 12)) ) + { + case ETH_P_ARP: + skb->protocol = __constant_htons(ETH_P_ARP); + if ( skb->len < 42 ) break; + memcpy(skb->data + 22, skb->dev->dev_addr, 6); + break; + case ETH_P_IP: + skb->protocol = __constant_htons(ETH_P_IP); + break; + default: + kfree_skb(skb); + skb = NULL; + break; + } + + if ( skb != NULL ) + { + skb_get(skb); /* get a reference for non-local delivery */ + skb->protocol = eth_type_trans(skb, skb->dev); + if ( netif_rx(skb) == 0 ) + { + /* Give up non-local reference. Packet delivered locally. */ + kfree_skb(skb); + } + else + { + /* Pass the non-local reference to the net device. */ + skb_push(skb, skb->dev->hard_header_len); + dev_queue_xmit(skb); + } + } + } + + net_ring->tx_cons = i; + + return 0; +} + + +int setup_network_devices(void) +{ + int ret; + struct net_device *dev = dev_get_by_name("eth0"); + + if ( dev == NULL ) + { + printk("Could not find device eth0\n"); + return 0; + } + + ret = dev_open(dev); + if ( ret != 0 ) + { + printk("Error opening device eth0 for use (%d)\n", ret); + return 0; + } + printk("Device eth0 opened and ready for use\n"); + the_dev = dev; + + return 1; +} + diff --git a/xen-2.4.16/net/dev_mcast.c b/xen-2.4.16/net/dev_mcast.c new file mode 100644 index 0000000000..d7d2ae338d --- /dev/null +++ b/xen-2.4.16/net/dev_mcast.c @@ -0,0 +1,276 @@ +/* + * Linux NET3: Multicast List maintenance. + * + * Authors: + * Tim Kordas + * Richard Underwood + * + * Stir fried together from the IP multicast and CAP patches above + * Alan Cox + * + * Fixes: + * Alan Cox : Update the device on a real delete + * rather than any time but... + * Alan Cox : IFF_ALLMULTI support. + * Alan Cox : New format set_multicast_list() calls. + * Gleb Natapov : Remove dev_mc_lock. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +//#include +#include +//#include +#include +#include +#include +//#include +#include +#include +#include +//#include +#include +#include +//#include +#include +//#include +//#include +#include +//#include +//#include + + +/* + * Device multicast list maintenance. + * + * This is used both by IP and by the user level maintenance functions. + * Unlike BSD we maintain a usage count on a given multicast address so + * that a casual user application can add/delete multicasts used by + * protocols without doing damage to the protocols when it deletes the + * entries. It also helps IP as it tracks overlapping maps. + * + * Device mc lists are changed by bh at least if IPv6 is enabled, + * so that it must be bh protected. + * + * We block accesses to device mc filters with dev->xmit_lock. + */ + +/* + * Update the multicast list into the physical NIC controller. + */ + +static void __dev_mc_upload(struct net_device *dev) +{ + /* Don't do anything till we up the interface + * [dev_open will call this function so the list will + * stay sane] + */ + + if (!(dev->flags&IFF_UP)) + return; + + /* + * Devices with no set multicast or which have been + * detached don't get set. + */ + + if (dev->set_multicast_list == NULL || + !netif_device_present(dev)) + return; + + dev->set_multicast_list(dev); +} + +void dev_mc_upload(struct net_device *dev) +{ + spin_lock_bh(&dev->xmit_lock); + __dev_mc_upload(dev); + spin_unlock_bh(&dev->xmit_lock); +} + +/* + * Delete a device level multicast + */ + +int dev_mc_delete(struct net_device *dev, void *addr, int alen, int glbl) +{ + int err = 0; + struct dev_mc_list *dmi, **dmip; + + spin_lock_bh(&dev->xmit_lock); + + for (dmip = &dev->mc_list; (dmi = *dmip) != NULL; dmip = &dmi->next) { + /* + * Find the entry we want to delete. The device could + * have variable length entries so check these too. + */ + if (memcmp(dmi->dmi_addr, addr, dmi->dmi_addrlen) == 0 && + alen == dmi->dmi_addrlen) { + if (glbl) { + int old_glbl = dmi->dmi_gusers; + dmi->dmi_gusers = 0; + if (old_glbl == 0) + break; + } + if (--dmi->dmi_users) + goto done; + + /* + * Last user. So delete the entry. + */ + *dmip = dmi->next; + dev->mc_count--; + + kfree(dmi); + + /* + * We have altered the list, so the card + * loaded filter is now wrong. Fix it + */ + __dev_mc_upload(dev); + + spin_unlock_bh(&dev->xmit_lock); + return 0; + } + } + err = -ENOENT; +done: + spin_unlock_bh(&dev->xmit_lock); + return err; +} + +/* + * Add a device level multicast + */ + +int dev_mc_add(struct net_device *dev, void *addr, int alen, int glbl) +{ + int err = 0; + struct dev_mc_list *dmi, *dmi1; + + dmi1 = (struct dev_mc_list *)kmalloc(sizeof(*dmi), GFP_ATOMIC); + + spin_lock_bh(&dev->xmit_lock); + for (dmi = dev->mc_list; dmi != NULL; dmi = dmi->next) { + if (memcmp(dmi->dmi_addr, addr, dmi->dmi_addrlen) == 0 && + dmi->dmi_addrlen == alen) { + if (glbl) { + int old_glbl = dmi->dmi_gusers; + dmi->dmi_gusers = 1; + if (old_glbl) + goto done; + } + dmi->dmi_users++; + goto done; + } + } + + if ((dmi = dmi1) == NULL) { + spin_unlock_bh(&dev->xmit_lock); + return -ENOMEM; + } + memcpy(dmi->dmi_addr, addr, alen); + dmi->dmi_addrlen = alen; + dmi->next = dev->mc_list; + dmi->dmi_users = 1; + dmi->dmi_gusers = glbl ? 1 : 0; + dev->mc_list = dmi; + dev->mc_count++; + + __dev_mc_upload(dev); + + spin_unlock_bh(&dev->xmit_lock); + return 0; + +done: + spin_unlock_bh(&dev->xmit_lock); + if (dmi1) + kfree(dmi1); + return err; +} + +/* + * Discard multicast list when a device is downed + */ + +void dev_mc_discard(struct net_device *dev) +{ + spin_lock_bh(&dev->xmit_lock); + + while (dev->mc_list != NULL) { + struct dev_mc_list *tmp = dev->mc_list; + dev->mc_list = tmp->next; + if (tmp->dmi_users > tmp->dmi_gusers) + printk("dev_mc_discard: multicast leakage! dmi_users=%d\n", tmp->dmi_users); + kfree(tmp); + } + dev->mc_count = 0; + + spin_unlock_bh(&dev->xmit_lock); +} + +#ifdef CONFIG_PROC_FS +static int dev_mc_read_proc(char *buffer, char **start, off_t offset, + int length, int *eof, void *data) +{ + off_t pos = 0, begin = 0; + struct dev_mc_list *m; + int len = 0; + struct net_device *dev; + + read_lock(&dev_base_lock); + for (dev = dev_base; dev; dev = dev->next) { + spin_lock_bh(&dev->xmit_lock); + for (m = dev->mc_list; m; m = m->next) { + int i; + + len += sprintf(buffer+len,"%-4d %-15s %-5d %-5d ", dev->ifindex, + dev->name, m->dmi_users, m->dmi_gusers); + + for (i = 0; i < m->dmi_addrlen; i++) + len += sprintf(buffer+len, "%02x", m->dmi_addr[i]); + + len += sprintf(buffer+len, "\n"); + + pos = begin + len; + if (pos < offset) { + len = 0; + begin = pos; + } + if (pos > offset + length) { + spin_unlock_bh(&dev->xmit_lock); + goto done; + } + } + spin_unlock_bh(&dev->xmit_lock); + } + *eof = 1; + +done: + read_unlock(&dev_base_lock); + *start = buffer + (offset - begin); + len -= (offset - begin); + if (len > length) + len = length; + if (len < 0) + len = 0; + return len; +} +#endif + +void __init dev_mcast_init(void) +{ +#ifdef CONFIG_PROC_FS + create_proc_read_entry("net/dev_mcast", 0, 0, dev_mc_read_proc, NULL); +#endif +} + diff --git a/xen-2.4.16/net/eth.c b/xen-2.4.16/net/eth.c new file mode 100644 index 0000000000..d982eef39b --- /dev/null +++ b/xen-2.4.16/net/eth.c @@ -0,0 +1,242 @@ +/* + * INET An implementation of the TCP/IP protocol suite for the LINUX + * operating system. INET is implemented using the BSD Socket + * interface as the means of communication with the user level. + * + * Ethernet-type device handling. + * + * Version: @(#)eth.c 1.0.7 05/25/93 + * + * Authors: Ross Biro, + * Fred N. van Kempen, + * Mark Evans, + * Florian La Roche, + * Alan Cox, + * + * Fixes: + * Mr Linux : Arp problems + * Alan Cox : Generic queue tidyup (very tiny here) + * Alan Cox : eth_header ntohs should be htons + * Alan Cox : eth_rebuild_header missing an htons and + * minor other things. + * Tegge : Arp bug fixes. + * Florian : Removed many unnecessary functions, code cleanup + * and changes for new arp and skbuff. + * Alan Cox : Redid header building to reflect new format. + * Alan Cox : ARP only when compiled with CONFIG_INET + * Greg Page : 802.2 and SNAP stuff. + * Alan Cox : MAC layer pointers/new format. + * Paul Gortmaker : eth_copy_and_sum shouldn't csum padding. + * Alan Cox : Protect against forwarding explosions with + * older network drivers and IFF_ALLMULTI. + * Christer Weinigel : Better rebuild header message. + * Andrew Morton : 26Feb01: kill ether_setup() - use netdev_boot_setup(). + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include +//#include +#include +//#include +#include +#include +//#include +//#include +//#include +#include +#include +#include +#include +#include +#include +#include +//#include +//#include +//#include +//#include +//#include +#include +#include +//#include + +//extern int __init netdev_boot_setup(char *str); + +//__setup("ether=", netdev_boot_setup); + +/* + * Create the Ethernet MAC header for an arbitrary protocol layer + * + * saddr=NULL means use device source address + * daddr=NULL means leave destination address (eg unresolved arp) + */ + +int eth_header(struct sk_buff *skb, struct net_device *dev, unsigned short type, + void *daddr, void *saddr, unsigned len) +{ + struct ethhdr *eth = (struct ethhdr *)skb_push(skb,ETH_HLEN); + + /* + * Set the protocol type. For a packet of type ETH_P_802_3 we put the length + * in here instead. It is up to the 802.2 layer to carry protocol information. + */ + + if(type!=ETH_P_802_3) + eth->h_proto = htons(type); + else + eth->h_proto = htons(len); + + /* + * Set the source hardware address. + */ + + if(saddr) + memcpy(eth->h_source,saddr,dev->addr_len); + else + memcpy(eth->h_source,dev->dev_addr,dev->addr_len); + + /* + * Anyway, the loopback-device should never use this function... + */ + + if (dev->flags & (IFF_LOOPBACK|IFF_NOARP)) + { + memset(eth->h_dest, 0, dev->addr_len); + return(dev->hard_header_len); + } + + if(daddr) + { + memcpy(eth->h_dest,daddr,dev->addr_len); + return dev->hard_header_len; + } + + return -dev->hard_header_len; +} + + +/* + * Rebuild the Ethernet MAC header. This is called after an ARP + * (or in future other address resolution) has completed on this + * sk_buff. We now let ARP fill in the other fields. + * + * This routine CANNOT use cached dst->neigh! + * Really, it is used only when dst->neigh is wrong. + */ + +int eth_rebuild_header(struct sk_buff *skb) +{ + struct ethhdr *eth = (struct ethhdr *)skb->data; + struct net_device *dev = skb->dev; + + switch (eth->h_proto) + { +#ifdef CONFIG_INET + case __constant_htons(ETH_P_IP): + return arp_find(eth->h_dest, skb); +#endif + default: + printk(KERN_DEBUG + "%s: unable to resolve type %X addresses.\n", + dev->name, (int)eth->h_proto); + + memcpy(eth->h_source, dev->dev_addr, dev->addr_len); + break; + } + + return 0; +} + + +/* + * Determine the packet's protocol ID. The rule here is that we + * assume 802.3 if the type field is short enough to be a length. + * This is normal practice and works for any 'now in use' protocol. + */ + +unsigned short eth_type_trans(struct sk_buff *skb, struct net_device *dev) +{ + struct ethhdr *eth; + unsigned char *rawp; + + skb->mac.raw=skb->data; + skb_pull(skb,dev->hard_header_len); + eth= skb->mac.ethernet; + + if(*eth->h_dest&1) + { + if(memcmp(eth->h_dest,dev->broadcast, ETH_ALEN)==0) + skb->pkt_type=PACKET_BROADCAST; + else + skb->pkt_type=PACKET_MULTICAST; + } + + /* + * This ALLMULTI check should be redundant by 1.4 + * so don't forget to remove it. + * + * Seems, you forgot to remove it. All silly devices + * seems to set IFF_PROMISC. + */ + + else if(1 /*dev->flags&IFF_PROMISC*/) + { + if(memcmp(eth->h_dest,dev->dev_addr, ETH_ALEN)) + skb->pkt_type=PACKET_OTHERHOST; + } + + if (ntohs(eth->h_proto) >= 1536) + return eth->h_proto; + + rawp = skb->data; + + /* + * This is a magic hack to spot IPX packets. Older Novell breaks + * the protocol design and runs IPX over 802.3 without an 802.2 LLC + * layer. We look for FFFF which isn't a used 802.2 SSAP/DSAP. This + * won't work for fault tolerant netware but does for the rest. + */ + if (*(unsigned short *)rawp == 0xFFFF) + return htons(ETH_P_802_3); + + /* + * Real 802.2 LLC + */ + return htons(ETH_P_802_2); +} + +int eth_header_parse(struct sk_buff *skb, unsigned char *haddr) +{ + struct ethhdr *eth = skb->mac.ethernet; + memcpy(haddr, eth->h_source, ETH_ALEN); + return ETH_ALEN; +} + +int eth_header_cache(struct neighbour *neigh, struct hh_cache *hh) +{ +#if 0 + unsigned short type = hh->hh_type; + struct ethhdr *eth = (struct ethhdr*)(((u8*)hh->hh_data) + 2); + struct net_device *dev = neigh->dev; + + if (type == __constant_htons(ETH_P_802_3)) + return -1; + + eth->h_proto = type; + memcpy(eth->h_source, dev->dev_addr, dev->addr_len); + memcpy(eth->h_dest, neigh->ha, dev->addr_len); + hh->hh_len = ETH_HLEN; +#endif + return 0; +} + +/* + * Called by Address Resolution module to notify changes in address. + */ + +void eth_header_cache_update(struct hh_cache *hh, struct net_device *dev, unsigned char * haddr) +{ + memcpy(((u8*)hh->hh_data) + 2, haddr, dev->addr_len); +} diff --git a/xen-2.4.16/net/sch_generic.c b/xen-2.4.16/net/sch_generic.c new file mode 100644 index 0000000000..135142fe52 --- /dev/null +++ b/xen-2.4.16/net/sch_generic.c @@ -0,0 +1,525 @@ +/* + * net/sched/sch_generic.c Generic packet scheduler routines. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Authors: Alexey Kuznetsov, + * Jamal Hadi Salim, 990601 + * - Ingress support + */ + +#include +#include +#include +#include +#include +//#include +#include +#include +#include +#include +#include +//#include +#include +#include +#include +#include +//#include +#include +//#include +#include + +extern int net_ratelimit(void); +#define BUG_TRAP ASSERT + +/* Main transmission queue. */ + +/* Main qdisc structure lock. + + However, modifications + to data, participating in scheduling must be additionally + protected with dev->queue_lock spinlock. + + The idea is the following: + - enqueue, dequeue are serialized via top level device + spinlock dev->queue_lock. + - tree walking is protected by read_lock(qdisc_tree_lock) + and this lock is used only in process context. + - updates to tree are made only under rtnl semaphore, + hence this lock may be made without local bh disabling. + + qdisc_tree_lock must be grabbed BEFORE dev->queue_lock! + */ +rwlock_t qdisc_tree_lock = RW_LOCK_UNLOCKED; + +/* + dev->queue_lock serializes queue accesses for this device + AND dev->qdisc pointer itself. + + dev->xmit_lock serializes accesses to device driver. + + dev->queue_lock and dev->xmit_lock are mutually exclusive, + if one is grabbed, another must be free. + */ + + +/* Kick device. + Note, that this procedure can be called by a watchdog timer, so that + we do not check dev->tbusy flag here. + + Returns: 0 - queue is empty. + >0 - queue is not empty, but throttled. + <0 - queue is not empty. Device is throttled, if dev->tbusy != 0. + + NOTE: Called under dev->queue_lock with locally disabled BH. +*/ + +int qdisc_restart(struct net_device *dev) +{ + struct Qdisc *q = dev->qdisc; + struct sk_buff *skb; + + /* Dequeue packet */ + if ((skb = q->dequeue(q)) != NULL) { + if (spin_trylock(&dev->xmit_lock)) { + /* Remember that the driver is grabbed by us. */ + dev->xmit_lock_owner = smp_processor_id(); + + /* And release queue */ + spin_unlock(&dev->queue_lock); + + if (!netif_queue_stopped(dev)) { +#if 0 + if (netdev_nit) + dev_queue_xmit_nit(skb, dev); +#endif + + if (dev->hard_start_xmit(skb, dev) == 0) { + dev->xmit_lock_owner = -1; + spin_unlock(&dev->xmit_lock); + + spin_lock(&dev->queue_lock); + return -1; + } + } + + /* Release the driver */ + dev->xmit_lock_owner = -1; + spin_unlock(&dev->xmit_lock); + spin_lock(&dev->queue_lock); + q = dev->qdisc; + } else { + /* So, someone grabbed the driver. */ + + /* It may be transient configuration error, + when hard_start_xmit() recurses. We detect + it by checking xmit owner and drop the + packet when deadloop is detected. + */ + if (dev->xmit_lock_owner == smp_processor_id()) { + kfree_skb(skb); + if (net_ratelimit()) + printk(KERN_DEBUG "Dead loop on netdevice %s, fix it urgently!\n", dev->name); + return -1; + } + netdev_rx_stat[smp_processor_id()].cpu_collision++; + } + + /* Device kicked us out :( + This is possible in three cases: + + 0. driver is locked + 1. fastroute is enabled + 2. device cannot determine busy state + before start of transmission (f.e. dialout) + 3. device is buggy (ppp) + */ + + q->ops->requeue(skb, q); + netif_schedule(dev); + return 1; + } + return q->q.qlen; +} + +static void dev_watchdog(unsigned long arg) +{ + struct net_device *dev = (struct net_device *)arg; + + spin_lock(&dev->xmit_lock); + if (dev->qdisc != &noop_qdisc) { + if (netif_device_present(dev) && + netif_running(dev) && + netif_carrier_ok(dev)) { + if (netif_queue_stopped(dev) && + (jiffies - dev->trans_start) > dev->watchdog_timeo) { + printk(KERN_INFO "NETDEV WATCHDOG: %s: transmit timed out\n", dev->name); + dev->tx_timeout(dev); + } + if (!mod_timer(&dev->watchdog_timer, jiffies + dev->watchdog_timeo)) + dev_hold(dev); + } + } + spin_unlock(&dev->xmit_lock); + + dev_put(dev); +} + +static void dev_watchdog_init(struct net_device *dev) +{ + init_timer(&dev->watchdog_timer); + dev->watchdog_timer.data = (unsigned long)dev; + dev->watchdog_timer.function = dev_watchdog; +} + +void __netdev_watchdog_up(struct net_device *dev) +{ + if (dev->tx_timeout) { + if (dev->watchdog_timeo <= 0) + dev->watchdog_timeo = 5*HZ; + if (!mod_timer(&dev->watchdog_timer, jiffies + dev->watchdog_timeo)) + dev_hold(dev); + } +} + +static void dev_watchdog_up(struct net_device *dev) +{ + spin_lock_bh(&dev->xmit_lock); + __netdev_watchdog_up(dev); + spin_unlock_bh(&dev->xmit_lock); +} + +static void dev_watchdog_down(struct net_device *dev) +{ + spin_lock_bh(&dev->xmit_lock); + if (del_timer(&dev->watchdog_timer)) + __dev_put(dev); + spin_unlock_bh(&dev->xmit_lock); +} + +/* "NOOP" scheduler: the best scheduler, recommended for all interfaces + under all circumstances. It is difficult to invent anything faster or + cheaper. + */ + +static int +noop_enqueue(struct sk_buff *skb, struct Qdisc * qdisc) +{ + kfree_skb(skb); + return NET_XMIT_CN; +} + +static struct sk_buff * +noop_dequeue(struct Qdisc * qdisc) +{ + return NULL; +} + +static int +noop_requeue(struct sk_buff *skb, struct Qdisc* qdisc) +{ + if (net_ratelimit()) + printk(KERN_DEBUG "%s deferred output. It is buggy.\n", skb->dev->name); + kfree_skb(skb); + return NET_XMIT_CN; +} + +struct Qdisc_ops noop_qdisc_ops = +{ + NULL, + NULL, + "noop", + 0, + + noop_enqueue, + noop_dequeue, + noop_requeue, +}; + +struct Qdisc noop_qdisc = +{ + noop_enqueue, + noop_dequeue, + TCQ_F_BUILTIN, + &noop_qdisc_ops, +}; + + +struct Qdisc_ops noqueue_qdisc_ops = +{ + NULL, + NULL, + "noqueue", + 0, + + noop_enqueue, + noop_dequeue, + noop_requeue, + +}; + +struct Qdisc noqueue_qdisc = +{ + NULL, + noop_dequeue, + TCQ_F_BUILTIN, + &noqueue_qdisc_ops, +}; + + +static const u8 prio2band[TC_PRIO_MAX+1] = +{ 1, 2, 2, 2, 1, 2, 0, 0 , 1, 1, 1, 1, 1, 1, 1, 1 }; + +/* 3-band FIFO queue: old style, but should be a bit faster than + generic prio+fifo combination. + */ + +static int +pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc* qdisc) +{ + struct sk_buff_head *list; + + list = ((struct sk_buff_head*)qdisc->data) + + prio2band[skb->priority&TC_PRIO_MAX]; + + if (list->qlen <= skb->dev->tx_queue_len) { + __skb_queue_tail(list, skb); + qdisc->q.qlen++; + return 0; + } + //qdisc->stats.drops++; + kfree_skb(skb); + return NET_XMIT_DROP; +} + +static struct sk_buff * +pfifo_fast_dequeue(struct Qdisc* qdisc) +{ + int prio; + struct sk_buff_head *list = ((struct sk_buff_head*)qdisc->data); + struct sk_buff *skb; + + for (prio = 0; prio < 3; prio++, list++) { + skb = __skb_dequeue(list); + if (skb) { + qdisc->q.qlen--; + return skb; + } + } + return NULL; +} + +static int +pfifo_fast_requeue(struct sk_buff *skb, struct Qdisc* qdisc) +{ + struct sk_buff_head *list; + + list = ((struct sk_buff_head*)qdisc->data) + + prio2band[skb->priority&TC_PRIO_MAX]; + + __skb_queue_head(list, skb); + qdisc->q.qlen++; + return 0; +} + +static void +pfifo_fast_reset(struct Qdisc* qdisc) +{ + int prio; + struct sk_buff_head *list = ((struct sk_buff_head*)qdisc->data); + + for (prio=0; prio < 3; prio++) + skb_queue_purge(list+prio); + qdisc->q.qlen = 0; +} + +static int pfifo_fast_init(struct Qdisc *qdisc, struct rtattr *opt) +{ + int i; + struct sk_buff_head *list; + + list = ((struct sk_buff_head*)qdisc->data); + + for (i=0; i<3; i++) + skb_queue_head_init(list+i); + + return 0; +} + +static struct Qdisc_ops pfifo_fast_ops = +{ + NULL, + NULL, + "pfifo_fast", + 3 * sizeof(struct sk_buff_head), + + pfifo_fast_enqueue, + pfifo_fast_dequeue, + pfifo_fast_requeue, + NULL, + + pfifo_fast_init, + pfifo_fast_reset, +}; + +struct Qdisc * qdisc_create_dflt(struct net_device *dev, struct Qdisc_ops *ops) +{ + struct Qdisc *sch; + int size = sizeof(*sch) + ops->priv_size; + + sch = kmalloc(size, GFP_KERNEL); + if (!sch) + return NULL; + memset(sch, 0, size); + + skb_queue_head_init(&sch->q); + sch->ops = ops; + sch->enqueue = ops->enqueue; + sch->dequeue = ops->dequeue; + sch->dev = dev; + //sch->stats.lock = &dev->queue_lock; + atomic_set(&sch->refcnt, 1); + if (!ops->init || ops->init(sch, NULL) == 0) + return sch; + + kfree(sch); + return NULL; +} + +/* Under dev->queue_lock and BH! */ + +void qdisc_reset(struct Qdisc *qdisc) +{ + struct Qdisc_ops *ops = qdisc->ops; + + if (ops->reset) + ops->reset(qdisc); +} + +/* Under dev->queue_lock and BH! */ + +void qdisc_destroy(struct Qdisc *qdisc) +{ + struct Qdisc_ops *ops = qdisc->ops; + struct net_device *dev; + + if (!atomic_dec_and_test(&qdisc->refcnt)) + return; + + dev = qdisc->dev; + +#ifdef CONFIG_NET_SCHED + if (dev) { + struct Qdisc *q, **qp; + for (qp = &qdisc->dev->qdisc_list; (q=*qp) != NULL; qp = &q->next) { + if (q == qdisc) { + *qp = q->next; + break; + } + } + } +#ifdef CONFIG_NET_ESTIMATOR + qdisc_kill_estimator(&qdisc->stats); +#endif +#endif + if (ops->reset) + ops->reset(qdisc); + if (ops->destroy) + ops->destroy(qdisc); + if (!(qdisc->flags&TCQ_F_BUILTIN)) + kfree(qdisc); +} + + +void dev_activate(struct net_device *dev) +{ + /* No queueing discipline is attached to device; + create default one i.e. pfifo_fast for devices, + which need queueing and noqueue_qdisc for + virtual interfaces + */ + + if (dev->qdisc_sleeping == &noop_qdisc) { + struct Qdisc *qdisc; + if (dev->tx_queue_len) { + qdisc = qdisc_create_dflt(dev, &pfifo_fast_ops); + if (qdisc == NULL) { + printk(KERN_INFO "%s: activation failed\n", dev->name); + return; + } + } else { + qdisc = &noqueue_qdisc; + } + write_lock(&qdisc_tree_lock); + dev->qdisc_sleeping = qdisc; + write_unlock(&qdisc_tree_lock); + } + + spin_lock_bh(&dev->queue_lock); + if ((dev->qdisc = dev->qdisc_sleeping) != &noqueue_qdisc) { + dev->trans_start = jiffies; + dev_watchdog_up(dev); + } + spin_unlock_bh(&dev->queue_lock); +} + +void dev_deactivate(struct net_device *dev) +{ + struct Qdisc *qdisc; + + spin_lock_bh(&dev->queue_lock); + qdisc = dev->qdisc; + dev->qdisc = &noop_qdisc; + + qdisc_reset(qdisc); + + spin_unlock_bh(&dev->queue_lock); + + dev_watchdog_down(dev); + + while (test_bit(__LINK_STATE_SCHED, &dev->state)) { + current->policy |= SCHED_YIELD; + schedule(); + } + + spin_unlock_wait(&dev->xmit_lock); +} + +void dev_init_scheduler(struct net_device *dev) +{ + write_lock(&qdisc_tree_lock); + spin_lock_bh(&dev->queue_lock); + dev->qdisc = &noop_qdisc; + spin_unlock_bh(&dev->queue_lock); + dev->qdisc_sleeping = &noop_qdisc; + dev->qdisc_list = NULL; + write_unlock(&qdisc_tree_lock); + + dev_watchdog_init(dev); +} + +void dev_shutdown(struct net_device *dev) +{ + struct Qdisc *qdisc; + + write_lock(&qdisc_tree_lock); + spin_lock_bh(&dev->queue_lock); + qdisc = dev->qdisc_sleeping; + dev->qdisc = &noop_qdisc; + dev->qdisc_sleeping = &noop_qdisc; + qdisc_destroy(qdisc); +#if defined(CONFIG_NET_SCH_INGRESS) || defined(CONFIG_NET_SCH_INGRESS_MODULE) + if ((qdisc = dev->qdisc_ingress) != NULL) { + dev->qdisc_ingress = NULL; + qdisc_destroy(qdisc); + } +#endif + BUG_TRAP(dev->qdisc_list == NULL); + BUG_TRAP(!timer_pending(&dev->watchdog_timer)); + dev->qdisc_list = NULL; + spin_unlock_bh(&dev->queue_lock); + write_unlock(&qdisc_tree_lock); +} diff --git a/xen-2.4.16/net/skbuff.c b/xen-2.4.16/net/skbuff.c new file mode 100644 index 0000000000..7ed619c5ee --- /dev/null +++ b/xen-2.4.16/net/skbuff.c @@ -0,0 +1,1217 @@ +/* + * Routines having to do with the 'struct sk_buff' memory handlers. + * + * Authors: Alan Cox + * Florian La Roche + * + * Version: $Id: skbuff.c,v 1.89 2001/08/06 13:25:02 davem Exp $ + * + * Fixes: + * Alan Cox : Fixed the worst of the load balancer bugs. + * Dave Platt : Interrupt stacking fix. + * Richard Kooijman : Timestamp fixes. + * Alan Cox : Changed buffer format. + * Alan Cox : destructor hook for AF_UNIX etc. + * Linus Torvalds : Better skb_clone. + * Alan Cox : Added skb_copy. + * Alan Cox : Added all the changed routines Linus + * only put in the headers + * Ray VanTassle : Fixed --skb->lock in free + * Alan Cox : skb_copy copy arp field + * Andi Kleen : slabified it. + * + * NOTE: + * The __skb_ routines should be called with interrupts + * disabled, or you better be *real* sure that the operation is atomic + * with respect to whatever list is being frobbed (e.g. via lock_sock() + * or via disabling bottom half handlers, etc). + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +/* + * The functions in this file will not compile correctly with gcc 2.4.x + */ + +#include +#include +#include +#include +//#include +#include +#include +#include +//#include +//#include +#include +#include +//#include +#include +#include +#include +//#include + +//#include +//#include +//#include +//#include +//#include +//#include + +#include +#include + +#define BUG_TRAP ASSERT + +#define put_page(_p) ((void)0) /* XXXX KAF */ +#define get_page(_p) ((void)0) + +int sysctl_hot_list_len = 128; + +static kmem_cache_t *skbuff_head_cache; + +static union { + struct sk_buff_head list; + char pad[SMP_CACHE_BYTES]; +} skb_head_pool[NR_CPUS]; + +/* + * Keep out-of-line to prevent kernel bloat. + * __builtin_return_address is not used because it is not always + * reliable. + */ + +/** + * skb_over_panic - private function + * @skb: buffer + * @sz: size + * @here: address + * + * Out of line support code for skb_put(). Not user callable. + */ + +void skb_over_panic(struct sk_buff *skb, int sz, void *here) +{ + printk("skput:over: %p:%d put:%d dev:%s", + here, skb->len, sz, skb->dev ? skb->dev->name : ""); + BUG(); +} + +/** + * skb_under_panic - private function + * @skb: buffer + * @sz: size + * @here: address + * + * Out of line support code for skb_push(). Not user callable. + */ + + +void skb_under_panic(struct sk_buff *skb, int sz, void *here) +{ + printk("skput:under: %p:%d put:%d dev:%s", + here, skb->len, sz, skb->dev ? skb->dev->name : ""); + BUG(); +} + +static __inline__ struct sk_buff *skb_head_from_pool(void) +{ + struct sk_buff_head *list = &skb_head_pool[smp_processor_id()].list; + + if (skb_queue_len(list)) { + struct sk_buff *skb; + unsigned long flags; + + local_irq_save(flags); + skb = __skb_dequeue(list); + local_irq_restore(flags); + return skb; + } + return NULL; +} + +static __inline__ void skb_head_to_pool(struct sk_buff *skb) +{ + struct sk_buff_head *list = &skb_head_pool[smp_processor_id()].list; + + if (skb_queue_len(list) < sysctl_hot_list_len) { + unsigned long flags; + + local_irq_save(flags); + __skb_queue_head(list, skb); + local_irq_restore(flags); + + return; + } + kmem_cache_free(skbuff_head_cache, skb); +} + + +/* Allocate a new skbuff. We do this ourselves so we can fill in a few + * 'private' fields and also do memory statistics to find all the + * [BEEP] leaks. + * + */ + +/** + * alloc_skb - allocate a network buffer + * @size: size to allocate + * @gfp_mask: allocation mask + * + * Allocate a new &sk_buff. The returned buffer has no headroom and a + * tail room of size bytes. The object has a reference count of one. + * The return is the buffer. On a failure the return is %NULL. + * + * Buffers may only be allocated from interrupts using a @gfp_mask of + * %GFP_ATOMIC. + */ + +struct sk_buff *alloc_skb(unsigned int size,int gfp_mask) +{ + struct sk_buff *skb; + u8 *data; + + if (in_interrupt() && (gfp_mask & __GFP_WAIT)) { + static int count = 0; + if (++count < 5) { + printk(KERN_ERR "alloc_skb called nonatomically " + "from interrupt %p\n", NET_CALLER(size)); + BUG(); + } + gfp_mask &= ~__GFP_WAIT; + } + + /* Get the HEAD */ + skb = skb_head_from_pool(); + if (skb == NULL) { + skb = kmem_cache_alloc(skbuff_head_cache, gfp_mask & ~__GFP_DMA); + if (skb == NULL) + goto nohead; + } + + /* Get the DATA. Size must match skb_add_mtu(). */ + size = SKB_DATA_ALIGN(size); + data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask); + if (data == NULL) + goto nodata; + + /* XXX: does not include slab overhead */ + skb->truesize = size + sizeof(struct sk_buff); + + /* Load the data pointers. */ + skb->head = data; + skb->data = data; + skb->tail = data; + skb->end = data + size; + + /* Set up other state */ + skb->len = 0; + skb->cloned = 0; + skb->data_len = 0; + + atomic_set(&skb->users, 1); + atomic_set(&(skb_shinfo(skb)->dataref), 1); + skb_shinfo(skb)->nr_frags = 0; + skb_shinfo(skb)->frag_list = NULL; + return skb; + +nodata: + skb_head_to_pool(skb); +nohead: + return NULL; +} + + +/* + * Slab constructor for a skb head. + */ +static inline void skb_headerinit(void *p, kmem_cache_t *cache, + unsigned long flags) +{ + struct sk_buff *skb = p; + + skb->next = NULL; + skb->prev = NULL; + skb->list = NULL; + skb->sk = NULL; + skb->stamp.tv_sec=0; /* No idea about time */ + skb->dev = NULL; +// skb->dst = NULL; + memset(skb->cb, 0, sizeof(skb->cb)); + skb->pkt_type = PACKET_HOST; /* Default type */ + skb->ip_summed = 0; + skb->priority = 0; + skb->security = 0; /* By default packets are insecure */ + skb->destructor = NULL; + +#ifdef CONFIG_NETFILTER + skb->nfmark = skb->nfcache = 0; + skb->nfct = NULL; +#ifdef CONFIG_NETFILTER_DEBUG + skb->nf_debug = 0; +#endif +#endif +#ifdef CONFIG_NET_SCHED + skb->tc_index = 0; +#endif +} + +static void skb_drop_fraglist(struct sk_buff *skb) +{ + struct sk_buff *list = skb_shinfo(skb)->frag_list; + + skb_shinfo(skb)->frag_list = NULL; + + do { + struct sk_buff *this = list; + list = list->next; + kfree_skb(this); + } while (list); +} + +static void skb_clone_fraglist(struct sk_buff *skb) +{ + struct sk_buff *list; + + for (list = skb_shinfo(skb)->frag_list; list; list=list->next) + skb_get(list); +} + +static void skb_release_data(struct sk_buff *skb) +{ + if (!skb->cloned || + atomic_dec_and_test(&(skb_shinfo(skb)->dataref))) { + if (skb_shinfo(skb)->nr_frags) { + int i; + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) + put_page(skb_shinfo(skb)->frags[i].page); + } + + if (skb_shinfo(skb)->frag_list) + skb_drop_fraglist(skb); + + kfree(skb->head); + } +} + +/* + * Free an skbuff by memory without cleaning the state. + */ +void kfree_skbmem(struct sk_buff *skb) +{ + skb_release_data(skb); + skb_head_to_pool(skb); +} + +/** + * __kfree_skb - private function + * @skb: buffer + * + * Free an sk_buff. Release anything attached to the buffer. + * Clean the state. This is an internal helper function. Users should + * always call kfree_skb + */ + +void __kfree_skb(struct sk_buff *skb) +{ + if (skb->list) { + printk(KERN_WARNING "Warning: kfree_skb passed an skb still " + "on a list (from %p).\n", NET_CALLER(skb)); + BUG(); + } + +// dst_release(skb->dst); + if(skb->destructor) { + if (in_irq()) { + printk(KERN_WARNING "Warning: kfree_skb on hard IRQ %p\n", + NET_CALLER(skb)); + } + skb->destructor(skb); + } +#ifdef CONFIG_NETFILTER + nf_conntrack_put(skb->nfct); +#endif + skb_headerinit(skb, NULL, 0); /* clean state */ + kfree_skbmem(skb); +} + +/** + * skb_clone - duplicate an sk_buff + * @skb: buffer to clone + * @gfp_mask: allocation priority + * + * Duplicate an &sk_buff. The new one is not owned by a socket. Both + * copies share the same packet data but not structure. The new + * buffer has a reference count of 1. If the allocation fails the + * function returns %NULL otherwise the new buffer is returned. + * + * If this function is called from an interrupt gfp_mask() must be + * %GFP_ATOMIC. + */ + +struct sk_buff *skb_clone(struct sk_buff *skb, int gfp_mask) +{ + struct sk_buff *n; + + n = skb_head_from_pool(); + if (!n) { + n = kmem_cache_alloc(skbuff_head_cache, gfp_mask); + if (!n) + return NULL; + } + +#define C(x) n->x = skb->x + + n->next = n->prev = NULL; + n->list = NULL; + n->sk = NULL; + C(stamp); + C(dev); + C(h); + C(nh); + C(mac); +// C(dst); +// dst_clone(n->dst); + memcpy(n->cb, skb->cb, sizeof(skb->cb)); + C(len); + C(data_len); + C(csum); + n->cloned = 1; + C(pkt_type); + C(ip_summed); + C(priority); + atomic_set(&n->users, 1); + C(protocol); + C(security); + C(truesize); + C(head); + C(data); + C(tail); + C(end); + n->destructor = NULL; +#ifdef CONFIG_NETFILTER + C(nfmark); + C(nfcache); + C(nfct); +#ifdef CONFIG_NETFILTER_DEBUG + C(nf_debug); +#endif +#endif /*CONFIG_NETFILTER*/ +#if defined(CONFIG_HIPPI) + C(private); +#endif +#ifdef CONFIG_NET_SCHED + C(tc_index); +#endif + + atomic_inc(&(skb_shinfo(skb)->dataref)); + skb->cloned = 1; +#ifdef CONFIG_NETFILTER + nf_conntrack_get(skb->nfct); +#endif + return n; +} + +static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) +{ + /* + * Shift between the two data areas in bytes + */ + unsigned long offset = new->data - old->data; + + new->list=NULL; + new->sk=NULL; + new->dev=old->dev; + new->priority=old->priority; + new->protocol=old->protocol; +// new->dst=dst_clone(old->dst); + new->h.raw=old->h.raw+offset; + new->nh.raw=old->nh.raw+offset; + new->mac.raw=old->mac.raw+offset; + memcpy(new->cb, old->cb, sizeof(old->cb)); + atomic_set(&new->users, 1); + new->pkt_type=old->pkt_type; + new->stamp=old->stamp; + new->destructor = NULL; + new->security=old->security; +#ifdef CONFIG_NETFILTER + new->nfmark=old->nfmark; + new->nfcache=old->nfcache; + new->nfct=old->nfct; + nf_conntrack_get(new->nfct); +#ifdef CONFIG_NETFILTER_DEBUG + new->nf_debug=old->nf_debug; +#endif +#endif +#ifdef CONFIG_NET_SCHED + new->tc_index = old->tc_index; +#endif +} + +/** + * skb_copy - create private copy of an sk_buff + * @skb: buffer to copy + * @gfp_mask: allocation priority + * + * Make a copy of both an &sk_buff and its data. This is used when the + * caller wishes to modify the data and needs a private copy of the + * data to alter. Returns %NULL on failure or the pointer to the buffer + * on success. The returned buffer has a reference count of 1. + * + * As by-product this function converts non-linear &sk_buff to linear + * one, so that &sk_buff becomes completely private and caller is allowed + * to modify all the data of returned buffer. This means that this + * function is not recommended for use in circumstances when only + * header is going to be modified. Use pskb_copy() instead. + */ + +struct sk_buff *skb_copy(const struct sk_buff *skb, int gfp_mask) +{ + struct sk_buff *n; + int headerlen = skb->data-skb->head; + + /* + * Allocate the copy buffer + */ + n=alloc_skb(skb->end - skb->head + skb->data_len, gfp_mask); + if(n==NULL) + return NULL; + + /* Set the data pointer */ + skb_reserve(n,headerlen); + /* Set the tail pointer and length */ + skb_put(n,skb->len); + n->csum = skb->csum; + n->ip_summed = skb->ip_summed; + + if (skb_copy_bits(skb, -headerlen, n->head, headerlen+skb->len)) + BUG(); + + copy_skb_header(n, skb); + + return n; +} + +/* Keep head the same: replace data */ +int skb_linearize(struct sk_buff *skb, int gfp_mask) +{ + unsigned int size; + u8 *data; + long offset; + int headerlen = skb->data - skb->head; + int expand = (skb->tail+skb->data_len) - skb->end; + + if (skb_shared(skb)) + BUG(); + + if (expand <= 0) + expand = 0; + + size = (skb->end - skb->head + expand); + size = SKB_DATA_ALIGN(size); + data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask); + if (data == NULL) + return -ENOMEM; + + /* Copy entire thing */ + if (skb_copy_bits(skb, -headerlen, data, headerlen+skb->len)) + BUG(); + + /* Offset between the two in bytes */ + offset = data - skb->head; + + /* Free old data. */ + skb_release_data(skb); + + skb->head = data; + skb->end = data + size; + + /* Set up new pointers */ + skb->h.raw += offset; + skb->nh.raw += offset; + skb->mac.raw += offset; + skb->tail += offset; + skb->data += offset; + + /* Set up shinfo */ + atomic_set(&(skb_shinfo(skb)->dataref), 1); + skb_shinfo(skb)->nr_frags = 0; + skb_shinfo(skb)->frag_list = NULL; + + /* We are no longer a clone, even if we were. */ + skb->cloned = 0; + + skb->tail += skb->data_len; + skb->data_len = 0; + return 0; +} + + +/** + * pskb_copy - create copy of an sk_buff with private head. + * @skb: buffer to copy + * @gfp_mask: allocation priority + * + * Make a copy of both an &sk_buff and part of its data, located + * in header. Fragmented data remain shared. This is used when + * the caller wishes to modify only header of &sk_buff and needs + * private copy of the header to alter. Returns %NULL on failure + * or the pointer to the buffer on success. + * The returned buffer has a reference count of 1. + */ + +struct sk_buff *pskb_copy(struct sk_buff *skb, int gfp_mask) +{ + struct sk_buff *n; + + /* + * Allocate the copy buffer + */ + n=alloc_skb(skb->end - skb->head, gfp_mask); + if(n==NULL) + return NULL; + + /* Set the data pointer */ + skb_reserve(n,skb->data-skb->head); + /* Set the tail pointer and length */ + skb_put(n,skb_headlen(skb)); + /* Copy the bytes */ + memcpy(n->data, skb->data, n->len); + n->csum = skb->csum; + n->ip_summed = skb->ip_summed; + + n->data_len = skb->data_len; + n->len = skb->len; + + if (skb_shinfo(skb)->nr_frags) { + int i; + + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { + skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i]; + get_page(skb_shinfo(n)->frags[i].page); + } + skb_shinfo(n)->nr_frags = i; + } + + if (skb_shinfo(skb)->frag_list) { + skb_shinfo(n)->frag_list = skb_shinfo(skb)->frag_list; + skb_clone_fraglist(n); + } + + copy_skb_header(n, skb); + + return n; +} + +/** + * pskb_expand_head - reallocate header of &sk_buff + * @skb: buffer to reallocate + * @nhead: room to add at head + * @ntail: room to add at tail + * @gfp_mask: allocation priority + * + * Expands (or creates identical copy, if &nhead and &ntail are zero) + * header of skb. &sk_buff itself is not changed. &sk_buff MUST have + * reference count of 1. Returns zero in the case of success or error, + * if expansion failed. In the last case, &sk_buff is not changed. + * + * All the pointers pointing into skb header may change and must be + * reloaded after call to this function. + */ + +int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, int gfp_mask) +{ + int i; + u8 *data; + int size = nhead + (skb->end - skb->head) + ntail; + long off; + + if (skb_shared(skb)) + BUG(); + + size = SKB_DATA_ALIGN(size); + + data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask); + if (data == NULL) + goto nodata; + + /* Copy only real data... and, alas, header. This should be + * optimized for the cases when header is void. */ + memcpy(data+nhead, skb->head, skb->tail-skb->head); + memcpy(data+size, skb->end, sizeof(struct skb_shared_info)); + + for (i=0; inr_frags; i++) + get_page(skb_shinfo(skb)->frags[i].page); + + if (skb_shinfo(skb)->frag_list) + skb_clone_fraglist(skb); + + skb_release_data(skb); + + off = (data+nhead) - skb->head; + + skb->head = data; + skb->end = data+size; + + skb->data += off; + skb->tail += off; + skb->mac.raw += off; + skb->h.raw += off; + skb->nh.raw += off; + skb->cloned = 0; + atomic_set(&skb_shinfo(skb)->dataref, 1); + return 0; + +nodata: + return -ENOMEM; +} + +/* Make private copy of skb with writable head and some headroom */ + +struct sk_buff * +skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom) +{ + struct sk_buff *skb2; + int delta = headroom - skb_headroom(skb); + + if (delta <= 0) + return pskb_copy(skb, GFP_ATOMIC); + + skb2 = skb_clone(skb, GFP_ATOMIC); + if (skb2 == NULL || + !pskb_expand_head(skb2, SKB_DATA_ALIGN(delta), 0, GFP_ATOMIC)) + return skb2; + + kfree_skb(skb2); + return NULL; +} + + +/** + * skb_copy_expand - copy and expand sk_buff + * @skb: buffer to copy + * @newheadroom: new free bytes at head + * @newtailroom: new free bytes at tail + * @gfp_mask: allocation priority + * + * Make a copy of both an &sk_buff and its data and while doing so + * allocate additional space. + * + * This is used when the caller wishes to modify the data and needs a + * private copy of the data to alter as well as more space for new fields. + * Returns %NULL on failure or the pointer to the buffer + * on success. The returned buffer has a reference count of 1. + * + * You must pass %GFP_ATOMIC as the allocation priority if this function + * is called from an interrupt. + */ + + +struct sk_buff *skb_copy_expand(const struct sk_buff *skb, + int newheadroom, + int newtailroom, + int gfp_mask) +{ + struct sk_buff *n; + + /* + * Allocate the copy buffer + */ + + n=alloc_skb(newheadroom + skb->len + newtailroom, + gfp_mask); + if(n==NULL) + return NULL; + + skb_reserve(n,newheadroom); + + /* Set the tail pointer and length */ + skb_put(n,skb->len); + + /* Copy the data only. */ + if (skb_copy_bits(skb, 0, n->data, skb->len)) + BUG(); + + copy_skb_header(n, skb); + return n; +} + +/* Trims skb to length len. It can change skb pointers, if "realloc" is 1. + * If realloc==0 and trimming is impossible without change of data, + * it is BUG(). + */ + +int ___pskb_trim(struct sk_buff *skb, unsigned int len, int realloc) +{ + int offset = skb_headlen(skb); + int nfrags = skb_shinfo(skb)->nr_frags; + int i; + + for (i=0; ifrags[i].size; + if (end > len) { + if (skb_cloned(skb)) { + if (!realloc) + BUG(); + if (!pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) + return -ENOMEM; + } + if (len <= offset) { + put_page(skb_shinfo(skb)->frags[i].page); + skb_shinfo(skb)->nr_frags--; + } else { + skb_shinfo(skb)->frags[i].size = len-offset; + } + } + offset = end; + } + + if (offset < len) { + skb->data_len -= skb->len - len; + skb->len = len; + } else { + if (len <= skb_headlen(skb)) { + skb->len = len; + skb->data_len = 0; + skb->tail = skb->data + len; + if (skb_shinfo(skb)->frag_list && !skb_cloned(skb)) + skb_drop_fraglist(skb); + } else { + skb->data_len -= skb->len - len; + skb->len = len; + } + } + + return 0; +} + +/** + * __pskb_pull_tail - advance tail of skb header + * @skb: buffer to reallocate + * @delta: number of bytes to advance tail + * + * The function makes a sense only on a fragmented &sk_buff, + * it expands header moving its tail forward and copying necessary + * data from fragmented part. + * + * &sk_buff MUST have reference count of 1. + * + * Returns %NULL (and &sk_buff does not change) if pull failed + * or value of new tail of skb in the case of success. + * + * All the pointers pointing into skb header may change and must be + * reloaded after call to this function. + */ + +/* Moves tail of skb head forward, copying data from fragmented part, + * when it is necessary. + * 1. It may fail due to malloc failure. + * 2. It may change skb pointers. + * + * It is pretty complicated. Luckily, it is called only in exceptional cases. + */ +unsigned char * __pskb_pull_tail(struct sk_buff *skb, int delta) +{ + int i, k, eat; + + /* If skb has not enough free space at tail, get new one + * plus 128 bytes for future expansions. If we have enough + * room at tail, reallocate without expansion only if skb is cloned. + */ + eat = (skb->tail+delta) - skb->end; + + if (eat > 0 || skb_cloned(skb)) { + if (pskb_expand_head(skb, 0, eat>0 ? eat+128 : 0, GFP_ATOMIC)) + return NULL; + } + + if (skb_copy_bits(skb, skb_headlen(skb), skb->tail, delta)) + BUG(); + + /* Optimization: no fragments, no reasons to preestimate + * size of pulled pages. Superb. + */ + if (skb_shinfo(skb)->frag_list == NULL) + goto pull_pages; + + /* Estimate size of pulled pages. */ + eat = delta; + for (i=0; inr_frags; i++) { + if (skb_shinfo(skb)->frags[i].size >= eat) + goto pull_pages; + eat -= skb_shinfo(skb)->frags[i].size; + } + + /* If we need update frag list, we are in troubles. + * Certainly, it possible to add an offset to skb data, + * but taking into account that pulling is expected to + * be very rare operation, it is worth to fight against + * further bloating skb head and crucify ourselves here instead. + * Pure masohism, indeed. 8)8) + */ + if (eat) { + struct sk_buff *list = skb_shinfo(skb)->frag_list; + struct sk_buff *clone = NULL; + struct sk_buff *insp = NULL; + + do { + if (list == NULL) + BUG(); + + if (list->len <= eat) { + /* Eaten as whole. */ + eat -= list->len; + list = list->next; + insp = list; + } else { + /* Eaten partially. */ + + if (skb_shared(list)) { + /* Sucks! We need to fork list. :-( */ + clone = skb_clone(list, GFP_ATOMIC); + if (clone == NULL) + return NULL; + insp = list->next; + list = clone; + } else { + /* This may be pulled without + * problems. */ + insp = list; + } + if (pskb_pull(list, eat) == NULL) { + if (clone) + kfree_skb(clone); + return NULL; + } + break; + } + } while (eat); + + /* Free pulled out fragments. */ + while ((list = skb_shinfo(skb)->frag_list) != insp) { + skb_shinfo(skb)->frag_list = list->next; + kfree_skb(list); + } + /* And insert new clone at head. */ + if (clone) { + clone->next = list; + skb_shinfo(skb)->frag_list = clone; + } + } + /* Success! Now we may commit changes to skb data. */ + +pull_pages: + eat = delta; + k = 0; + for (i=0; inr_frags; i++) { + if (skb_shinfo(skb)->frags[i].size <= eat) { + put_page(skb_shinfo(skb)->frags[i].page); + eat -= skb_shinfo(skb)->frags[i].size; + } else { + skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i]; + if (eat) { + skb_shinfo(skb)->frags[k].page_offset += eat; + skb_shinfo(skb)->frags[k].size -= eat; + eat = 0; + } + k++; + } + } + skb_shinfo(skb)->nr_frags = k; + + skb->tail += delta; + skb->data_len -= delta; + + return skb->tail; +} + +/* Copy some data bits from skb to kernel buffer. */ + +int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len) +{ + int i, copy; + int start = skb->len - skb->data_len; + + if (offset > (int)skb->len-len) + goto fault; + + /* Copy header. */ + if ((copy = start-offset) > 0) { + if (copy > len) + copy = len; + memcpy(to, skb->data + offset, copy); + if ((len -= copy) == 0) + return 0; + offset += copy; + to += copy; + } + + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { + int end; + + BUG_TRAP(start <= offset+len); + + end = start + skb_shinfo(skb)->frags[i].size; + if ((copy = end-offset) > 0) { + u8 *vaddr; + + if (copy > len) + copy = len; + + vaddr = kmap_skb_frag(&skb_shinfo(skb)->frags[i]); + memcpy(to, vaddr+skb_shinfo(skb)->frags[i].page_offset+ + offset-start, copy); + kunmap_skb_frag(vaddr); + + if ((len -= copy) == 0) + return 0; + offset += copy; + to += copy; + } + start = end; + } + + if (skb_shinfo(skb)->frag_list) { + struct sk_buff *list; + + for (list = skb_shinfo(skb)->frag_list; list; list=list->next) { + int end; + + BUG_TRAP(start <= offset+len); + + end = start + list->len; + if ((copy = end-offset) > 0) { + if (copy > len) + copy = len; + if (skb_copy_bits(list, offset-start, to, copy)) + goto fault; + if ((len -= copy) == 0) + return 0; + offset += copy; + to += copy; + } + start = end; + } + } + if (len == 0) + return 0; + +fault: + return -EFAULT; +} + +/* Checksum skb data. */ + +#if 0 + +unsigned int skb_checksum(const struct sk_buff *skb, int offset, int len, unsigned int csum) +{ + int i, copy; + int start = skb->len - skb->data_len; + int pos = 0; + + /* Checksum header. */ + if ((copy = start-offset) > 0) { + if (copy > len) + copy = len; + csum = csum_partial(skb->data+offset, copy, csum); + if ((len -= copy) == 0) + return csum; + offset += copy; + pos = copy; + } + + for (i=0; inr_frags; i++) { + int end; + + BUG_TRAP(start <= offset+len); + + end = start + skb_shinfo(skb)->frags[i].size; + if ((copy = end-offset) > 0) { + unsigned int csum2; + u8 *vaddr; + skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; + + if (copy > len) + copy = len; + vaddr = kmap_skb_frag(frag); + csum2 = csum_partial(vaddr + frag->page_offset + + offset-start, copy, 0); + kunmap_skb_frag(vaddr); + csum = csum_block_add(csum, csum2, pos); + if (!(len -= copy)) + return csum; + offset += copy; + pos += copy; + } + start = end; + } + + if (skb_shinfo(skb)->frag_list) { + struct sk_buff *list; + + for (list = skb_shinfo(skb)->frag_list; list; list=list->next) { + int end; + + BUG_TRAP(start <= offset+len); + + end = start + list->len; + if ((copy = end-offset) > 0) { + unsigned int csum2; + if (copy > len) + copy = len; + csum2 = skb_checksum(list, offset-start, copy, 0); + csum = csum_block_add(csum, csum2, pos); + if ((len -= copy) == 0) + return csum; + offset += copy; + pos += copy; + } + start = end; + } + } + if (len == 0) + return csum; + + BUG(); + return csum; +} + +/* Both of above in one bottle. */ + +unsigned int skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, u8 *to, int len, unsigned int csum) +{ + int i, copy; + int start = skb->len - skb->data_len; + int pos = 0; + + /* Copy header. */ + if ((copy = start-offset) > 0) { + if (copy > len) + copy = len; + csum = csum_partial_copy_nocheck(skb->data+offset, to, copy, csum); + if ((len -= copy) == 0) + return csum; + offset += copy; + to += copy; + pos = copy; + } + + for (i=0; inr_frags; i++) { + int end; + + BUG_TRAP(start <= offset+len); + + end = start + skb_shinfo(skb)->frags[i].size; + if ((copy = end-offset) > 0) { + unsigned int csum2; + u8 *vaddr; + skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; + + if (copy > len) + copy = len; + vaddr = kmap_skb_frag(frag); + csum2 = csum_partial_copy_nocheck(vaddr + frag->page_offset + + offset-start, to, copy, 0); + kunmap_skb_frag(vaddr); + csum = csum_block_add(csum, csum2, pos); + if (!(len -= copy)) + return csum; + offset += copy; + to += copy; + pos += copy; + } + start = end; + } + + if (skb_shinfo(skb)->frag_list) { + struct sk_buff *list; + + for (list = skb_shinfo(skb)->frag_list; list; list=list->next) { + unsigned int csum2; + int end; + + BUG_TRAP(start <= offset+len); + + end = start + list->len; + if ((copy = end-offset) > 0) { + if (copy > len) + copy = len; + csum2 = skb_copy_and_csum_bits(list, offset-start, to, copy, 0); + csum = csum_block_add(csum, csum2, pos); + if ((len -= copy) == 0) + return csum; + offset += copy; + to += copy; + pos += copy; + } + start = end; + } + } + if (len == 0) + return csum; + + BUG(); + return csum; +} + +void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to) +{ + unsigned int csum; + long csstart; + + if (skb->ip_summed == CHECKSUM_HW) + csstart = skb->h.raw - skb->data; + else + csstart = skb->len - skb->data_len; + + if (csstart > skb->len - skb->data_len) + BUG(); + + memcpy(to, skb->data, csstart); + + csum = 0; + if (csstart != skb->len) + csum = skb_copy_and_csum_bits(skb, csstart, to+csstart, + skb->len-csstart, 0); + + if (skb->ip_summed == CHECKSUM_HW) { + long csstuff = csstart + skb->csum; + + *((unsigned short *)(to + csstuff)) = csum_fold(csum); + } +} + +#endif + +#if 0 +/* + * Tune the memory allocator for a new MTU size. + */ +void skb_add_mtu(int mtu) +{ + /* Must match allocation in alloc_skb */ + mtu = SKB_DATA_ALIGN(mtu) + sizeof(struct skb_shared_info); + + kmem_add_cache_size(mtu); +} +#endif + +void __init skb_init(void) +{ + int i; + + skbuff_head_cache = kmem_cache_create("skbuff_head_cache", + sizeof(struct sk_buff), + 0, + SLAB_HWCACHE_ALIGN, + skb_headerinit, NULL); + if (!skbuff_head_cache) + panic("cannot create skbuff cache"); + + for (i=0; i + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +//#include +#include +//#include +#include + +static unsigned long net_rand_seed = 152L; + +unsigned long net_random(void) +{ + net_rand_seed=net_rand_seed*69069L+1; + return net_rand_seed^jiffies; +} + +void net_srandom(unsigned long entropy) +{ + net_rand_seed ^= entropy; + net_random(); +} + +int net_msg_cost = 5*HZ; +int net_msg_burst = 10*5*HZ; + +/* + * This enforces a rate limit: not more than one kernel message + * every 5secs to make a denial-of-service attack impossible. + * + * All warning printk()s should be guarded by this function. + */ +int net_ratelimit(void) +{ + static spinlock_t ratelimit_lock = SPIN_LOCK_UNLOCKED; + static unsigned long toks = 10*5*HZ; + static unsigned long last_msg; + static int missed; + unsigned long flags; + unsigned long now = jiffies; + + spin_lock_irqsave(&ratelimit_lock, flags); + toks += now - last_msg; + last_msg = now; + if (toks > net_msg_burst) + toks = net_msg_burst; + if (toks >= net_msg_cost) { + int lost = missed; + missed = 0; + toks -= net_msg_cost; + spin_unlock_irqrestore(&ratelimit_lock, flags); + if (lost) + printk(KERN_WARNING "NET: %d messages suppressed.\n", lost); + return 1; + } + missed++; + spin_unlock_irqrestore(&ratelimit_lock, flags); + return 0; +} diff --git a/xen-2.4.16/tools/Makefile b/xen-2.4.16/tools/Makefile new file mode 100644 index 0000000000..ccf535aa49 --- /dev/null +++ b/xen-2.4.16/tools/Makefile @@ -0,0 +1,6 @@ + +elf-reloc: elf-reloc.c + gcc -O2 -Wall -o $@ $< + +clean: + rm -f elf-reloc *~ core diff --git a/xen-2.4.16/tools/elf-reloc.c b/xen-2.4.16/tools/elf-reloc.c new file mode 100644 index 0000000000..19a839ee84 --- /dev/null +++ b/xen-2.4.16/tools/elf-reloc.c @@ -0,0 +1,118 @@ +/****************************************************************************** + * elf-reloc.c + * + * Usage: elf-reloc + * + * Relocates from address to address by + * frobbing the Elf headers. Segment contents are unmodified! + */ + +#include +#include +#include + +typedef unsigned long Elf32_Addr; +typedef unsigned short Elf32_Half; +typedef unsigned long Elf32_Off; +typedef unsigned long Elf32_Word; + +typedef struct { + unsigned char e_ident[16]; + Elf32_Half e_type; + Elf32_Half e_machine; + Elf32_Word e_version; + Elf32_Addr e_entry; + Elf32_Off e_phoff; + Elf32_Off e_shoff; + Elf32_Word e_flags; + Elf32_Half e_ehsize; + Elf32_Half e_phentsize; + Elf32_Half e_phnum; + Elf32_Half e_shentsize; + Elf32_Half e_shnum; + Elf32_Half e_shstrndx; +} Elf32_Ehdr; + +typedef struct { + Elf32_Word p_type; + Elf32_Off p_offset; + Elf32_Addr p_vaddr; + Elf32_Addr p_paddr; + Elf32_Word p_filesz; + Elf32_Word p_memsz; + Elf32_Word p_flags; + Elf32_Word p_align; +} Elf32_Phdr; + +#define offsetof(_f,_p) ((unsigned long)&(((_p *)0)->_f)) + + +/* Add @reloc_distance to address at offset @off in file @fp. */ +void reloc(FILE *fp, long off, unsigned long reloc_distance) +{ + unsigned long base; + fseek(fp, off, SEEK_SET); + fread(&base, sizeof(base), 1, fp); + base += reloc_distance; + fseek(fp, off, SEEK_SET); + fwrite(&base, sizeof(base), 1, fp); + +} + + +int main(int argc, char **argv) +{ + unsigned long old_base, new_base, reloc_distance; + long virt_section, phys_section; + char *image_name; + FILE *fp; + Elf32_Off phoff; + Elf32_Half phnum, phentsz; + int i; + + if ( argc != 4 ) + { + fprintf(stderr, "Usage: elf-reloc \n"); + return(1); + } + + old_base = strtoul(argv[1], NULL, 16); + new_base = strtoul(argv[2], NULL, 16); + image_name = argv[3]; + + printf("Relocating `%s' from 0x%08lX to 0x%08lX\n", + image_name, old_base, new_base); + + fp = fopen(image_name, "rb+"); + if ( !fp ) + { + fprintf(stderr, "Failed to load image!\n"); + return(1); + } + + reloc_distance = new_base - old_base; + + /* First frob the entry address. */ + reloc(fp, offsetof(e_entry, Elf32_Ehdr), reloc_distance); + + fseek(fp, offsetof(e_phoff, Elf32_Ehdr), SEEK_SET); + fread(&phoff, sizeof(phoff), 1, fp); + fseek(fp, offsetof(e_phnum, Elf32_Ehdr), SEEK_SET); + fread(&phnum, sizeof(phnum), 1, fp); + fseek(fp, offsetof(e_phentsize, Elf32_Ehdr), SEEK_SET); + fread(&phentsz, sizeof(phentsz), 1, fp); + + virt_section = (long)phoff + offsetof(p_vaddr, Elf32_Phdr); + phys_section = (long)phoff + offsetof(p_paddr, Elf32_Phdr); + for ( i = 0; i < phnum; i++ ) + { + reloc(fp, phys_section, reloc_distance); + reloc(fp, virt_section, reloc_distance); + phys_section += phentsz; + virt_section += phentsz; + } + + fclose(fp); + + return(0); +} diff --git a/xenolinux-2.4.16-sparse/Makefile b/xenolinux-2.4.16-sparse/Makefile new file mode 100644 index 0000000000..bf7463b5b0 --- /dev/null +++ b/xenolinux-2.4.16-sparse/Makefile @@ -0,0 +1,580 @@ +VERSION = 2 +PATCHLEVEL = 4 +SUBLEVEL = 16 +EXTRAVERSION = -kdb-xeno + +KERNELRELEASE=$(VERSION).$(PATCHLEVEL).$(SUBLEVEL)$(EXTRAVERSION) + +HW_ARCH := $(shell uname -m | sed -e s/i.86/i386/ -e s/sun4u/sparc64/ -e s/arm.*/arm/ -e s/sa110/arm/) +ifndef ARCH +ARCH := $(HW_ARCH) +endif +KERNELPATH=kernel-$(shell echo $(KERNELRELEASE) | sed -e "s/-//") + +CONFIG_SHELL := $(shell if [ -x "$$BASH" ]; then echo $$BASH; \ + else if [ -x /bin/bash ]; then echo /bin/bash; \ + else echo sh; fi ; fi) +TOPDIR := $(shell /bin/pwd) + +HPATH = $(TOPDIR)/include +FINDHPATH = $(HPATH)/asm $(HPATH)/linux $(HPATH)/scsi $(HPATH)/net + +HOSTCC = gcc +HOSTCFLAGS = -Wall -Wstrict-prototypes -O2 -fomit-frame-pointer + +CROSS_COMPILE = + +# +# Include the make variables (CC, etc...) +# + +AS = $(CROSS_COMPILE)as +LD = $(CROSS_COMPILE)ld +CC = $(CROSS_COMPILE)gcc +CPP = $(CC) -E +AR = $(CROSS_COMPILE)ar +NM = $(CROSS_COMPILE)nm +STRIP = $(CROSS_COMPILE)strip +OBJCOPY = $(CROSS_COMPILE)objcopy +OBJDUMP = $(CROSS_COMPILE)objdump +MAKEFILES = $(TOPDIR)/.config +GENKSYMS = /sbin/genksyms +DEPMOD = /sbin/depmod +KALLSYMS = /sbin/kallsyms +MODFLAGS = -DMODULE +CFLAGS_KERNEL = +PERL = perl +AWK = awk +TMPPREFIX = + +export VERSION PATCHLEVEL SUBLEVEL EXTRAVERSION KERNELRELEASE ARCH \ + CONFIG_SHELL TOPDIR HPATH HOSTCC HOSTCFLAGS CROSS_COMPILE AS LD CC \ + CPP AR NM STRIP OBJCOPY OBJDUMP MAKE MAKEFILES GENKSYMS MODFLAGS PERL AWK + +all: do-it-all + +# +# Make "config" the default target if there is no configuration file or +# "depend" the target if there is no top-level dependency information. +# + +ifeq (.config,$(wildcard .config)) +include .config +ifeq (.depend,$(wildcard .depend)) +include .depend +do-it-all: Version vmlinux +else +CONFIGURATION = depend +do-it-all: depend +endif +else +CONFIGURATION = config +do-it-all: config +endif + +# +# INSTALL_PATH specifies where to place the updated kernel and system map +# images. Uncomment if you want to place them anywhere other than root. +# + +#export INSTALL_PATH=/boot + +# +# INSTALL_MOD_PATH specifies a prefix to MODLIB for module directory +# relocations required by build roots. This is not defined in the +# makefile but the arguement can be passed to make if needed. +# + +MODLIB := $(INSTALL_MOD_PATH)/lib/modules/$(KERNELRELEASE) +export MODLIB + +# +# standard CFLAGS +# + +CPPFLAGS := -D__KERNEL__ -I$(HPATH) +CPPFLAGS += $(patsubst %,-I%,$(CROSS_COMPILE_INC)) + +CFLAGS := $(CPPFLAGS) -Wall -Wstrict-prototypes -Wno-trigraphs -O2 \ + -fno-strict-aliasing -fno-common +ifndef CONFIG_FRAME_POINTER +CFLAGS += -fomit-frame-pointer +endif +AFLAGS := -D__ASSEMBLY__ $(CPPFLAGS) + +# +# ROOT_DEV specifies the default root-device when making the image. +# This can be either FLOPPY, CURRENT, /dev/xxxx or empty, in which case +# the default of FLOPPY is used by 'build'. +# This is i386 specific. +# + +export ROOT_DEV = CURRENT + +# +# If you want to preset the SVGA mode, uncomment the next line and +# set SVGA_MODE to whatever number you want. +# Set it to -DSVGA_MODE=NORMAL_VGA if you just want the EGA/VGA mode. +# The number is the same as you would ordinarily press at bootup. +# This is i386 specific. +# + +export SVGA_MODE = -DSVGA_MODE=NORMAL_VGA + +# +# If you want the RAM disk device, define this to be the size in blocks. +# This is i386 specific. +# + +#export RAMDISK = -DRAMDISK=512 + +CORE_FILES =kernel/kernel.o mm/mm.o fs/fs.o ipc/ipc.o +NETWORKS =net/network.o + +LIBS =$(TOPDIR)/lib/lib.a +SUBDIRS =kernel drivers mm fs net ipc lib + +ifeq ($(CONFIG_KDB),y) +CORE_FILES += kdb/kdb.o +SUBDIRS += kdb +endif + +DRIVERS-n := +DRIVERS-y := +DRIVERS-m := +DRIVERS- := + +DRIVERS-$(CONFIG_ACPI) += drivers/acpi/acpi.o +DRIVERS-$(CONFIG_PARPORT) += drivers/parport/driver.o +DRIVERS-y += drivers/char/char.o \ + drivers/block/block.o \ + drivers/misc/misc.o \ + drivers/net/net.o \ + drivers/media/media.o +DRIVERS-$(CONFIG_AGP) += drivers/char/agp/agp.o +DRIVERS-$(CONFIG_DRM) += drivers/char/drm/drm.o +DRIVERS-$(CONFIG_NUBUS) += drivers/nubus/nubus.a +DRIVERS-$(CONFIG_ISDN) += drivers/isdn/isdn.a +DRIVERS-$(CONFIG_NET_FC) += drivers/net/fc/fc.o +DRIVERS-$(CONFIG_APPLETALK) += drivers/net/appletalk/appletalk.o +DRIVERS-$(CONFIG_TR) += drivers/net/tokenring/tr.o +DRIVERS-$(CONFIG_WAN) += drivers/net/wan/wan.o +DRIVERS-$(CONFIG_ARCNET) += drivers/net/arcnet/arcnetdrv.o +DRIVERS-$(CONFIG_ATM) += drivers/atm/atm.o +DRIVERS-$(CONFIG_IDE) += drivers/ide/idedriver.o +DRIVERS-$(CONFIG_FC4) += drivers/fc4/fc4.a +DRIVERS-$(CONFIG_SCSI) += drivers/scsi/scsidrv.o +DRIVERS-$(CONFIG_FUSION_BOOT) += drivers/message/fusion/fusion.o +DRIVERS-$(CONFIG_IEEE1394) += drivers/ieee1394/ieee1394drv.o + +ifneq ($(CONFIG_CD_NO_IDESCSI)$(CONFIG_BLK_DEV_IDECD)$(CONFIG_BLK_DEV_SR)$(CONFIG_PARIDE_PCD),) +DRIVERS-y += drivers/cdrom/driver.o +endif + +DRIVERS-$(CONFIG_SOUND) += drivers/sound/sounddrivers.o +DRIVERS-$(CONFIG_PCI) += drivers/pci/driver.o +DRIVERS-$(CONFIG_MTD) += drivers/mtd/mtdlink.o +DRIVERS-$(CONFIG_PCMCIA) += drivers/pcmcia/pcmcia.o +DRIVERS-$(CONFIG_NET_PCMCIA) += drivers/net/pcmcia/pcmcia_net.o +DRIVERS-$(CONFIG_NET_WIRELESS) += drivers/net/wireless/wireless_net.o +DRIVERS-$(CONFIG_PCMCIA_CHRDEV) += drivers/char/pcmcia/pcmcia_char.o +DRIVERS-$(CONFIG_DIO) += drivers/dio/dio.a +DRIVERS-$(CONFIG_SBUS) += drivers/sbus/sbus_all.o +DRIVERS-$(CONFIG_ZORRO) += drivers/zorro/driver.o +DRIVERS-$(CONFIG_FC4) += drivers/fc4/fc4.a +DRIVERS-$(CONFIG_ALL_PPC) += drivers/macintosh/macintosh.o +DRIVERS-$(CONFIG_MAC) += drivers/macintosh/macintosh.o +DRIVERS-$(CONFIG_ISAPNP) += drivers/pnp/pnp.o +DRIVERS-$(CONFIG_SGI_IP22) += drivers/sgi/sgi.a +DRIVERS-$(CONFIG_VT) += drivers/video/video.o +DRIVERS-$(CONFIG_PARIDE) += drivers/block/paride/paride.a +DRIVERS-$(CONFIG_HAMRADIO) += drivers/net/hamradio/hamradio.o +DRIVERS-$(CONFIG_TC) += drivers/tc/tc.a +DRIVERS-$(CONFIG_USB) += drivers/usb/usbdrv.o +DRIVERS-$(CONFIG_INPUT) += drivers/input/inputdrv.o +DRIVERS-$(CONFIG_I2O) += drivers/message/i2o/i2o.o +DRIVERS-$(CONFIG_IRDA) += drivers/net/irda/irda.o +DRIVERS-$(CONFIG_I2C) += drivers/i2c/i2c.o +DRIVERS-$(CONFIG_PHONE) += drivers/telephony/telephony.o +DRIVERS-$(CONFIG_MD) += drivers/md/mddev.o +DRIVERS-$(CONFIG_BLUEZ) += drivers/bluetooth/bluetooth.o +DRIVERS-$(CONFIG_HOTPLUG_PCI) += drivers/hotplug/vmlinux-obj.o + +DRIVERS := $(DRIVERS-y) + + +# files removed with 'make clean' +CLEAN_FILES = \ + kernel/ksyms.lst include/linux/compile.h \ + vmlinux System.map \ + $(TMPPREFIX).tmp* \ + drivers/char/consolemap_deftbl.c drivers/video/promcon_tbl.c \ + drivers/char/conmakehash \ + drivers/char/drm/*-mod.c \ + drivers/pci/devlist.h drivers/pci/classlist.h drivers/pci/gen-devlist \ + drivers/zorro/devlist.h drivers/zorro/gen-devlist \ + drivers/sound/bin2hex drivers/sound/hex2hex \ + drivers/atm/fore200e_mkfirm drivers/atm/{pca,sba}*{.bin,.bin1,.bin2} \ + drivers/scsi/aic7xxx/aicasm/aicasm_gram.c \ + drivers/scsi/aic7xxx/aicasm/aicasm_scan.c \ + drivers/scsi/aic7xxx/aicasm/y.tab.h \ + drivers/scsi/aic7xxx/aicasm/aicasm \ + drivers/scsi/53c700-mem.c \ + net/khttpd/make_times_h \ + net/khttpd/times.h \ + submenu* +# directories removed with 'make clean' +CLEAN_DIRS = \ + modules + +# files removed with 'make mrproper' +MRPROPER_FILES = \ + include/linux/autoconf.h include/linux/version.h \ + drivers/net/hamradio/soundmodem/sm_tbl_{afsk1200,afsk2666,fsk9600}.h \ + drivers/net/hamradio/soundmodem/sm_tbl_{hapn4800,psk4800}.h \ + drivers/net/hamradio/soundmodem/sm_tbl_{afsk2400_7,afsk2400_8}.h \ + drivers/net/hamradio/soundmodem/gentbl \ + drivers/sound/*_boot.h drivers/sound/.*.boot \ + drivers/sound/msndinit.c \ + drivers/sound/msndperm.c \ + drivers/sound/pndsperm.c \ + drivers/sound/pndspini.c \ + drivers/atm/fore200e_*_fw.c drivers/atm/.fore200e_*.fw \ + .version .config* config.in config.old \ + scripts/tkparse scripts/kconfig.tk scripts/kconfig.tmp \ + scripts/lxdialog/*.o scripts/lxdialog/lxdialog \ + .menuconfig.log \ + include/asm \ + kdb/gen-kdb_cmds.c \ + .hdepend scripts/mkdep scripts/split-include scripts/docproc \ + $(TOPDIR)/include/linux/modversions.h \ + kernel.spec + +# directories removed with 'make mrproper' +MRPROPER_DIRS = \ + include/config \ + $(TOPDIR)/include/linux/modules + + +include arch/$(ARCH)/Makefile + +export CPPFLAGS CFLAGS CFLAGS_KERNEL AFLAGS AFLAGS_KERNEL + +export NETWORKS DRIVERS LIBS HEAD LDFLAGS LINKFLAGS MAKEBOOT ASFLAGS + +.S.s: + $(CPP) $(AFLAGS) $(AFLAGS_KERNEL) -traditional -o $*.s $< +.S.o: + $(CC) $(AFLAGS) $(AFLAGS_KERNEL) -traditional -c -o $*.o $< + +Version: dummy + @rm -f include/linux/compile.h + +boot: vmlinux + @$(MAKE) CFLAGS="$(CFLAGS) $(CFLAGS_KERNEL)" -C arch/$(ARCH)/boot + +LD_VMLINUX := $(LD) $(LINKFLAGS) $(HEAD) init/main.o init/version.o \ + --start-group \ + $(CORE_FILES) \ + $(DRIVERS) \ + $(NETWORKS) \ + $(LIBS) \ + --end-group +ifeq ($(CONFIG_KALLSYMS),y) +LD_VMLINUX_KALLSYMS := $(TMPPREFIX).tmp_kallsyms3.o +else +LD_VMLINUX_KALLSYMS := +endif + +vmlinux: include/linux/version.h $(CONFIGURATION) init/main.o init/version.o linuxsubdirs + @$(MAKE) CFLAGS="$(CFLAGS) $(CFLAGS_KERNEL)" kallsyms + +.PHONY: kallsyms + +kallsyms: +ifeq ($(CONFIG_KALLSYMS),y) + @echo kallsyms pass 1 + $(LD_VMLINUX) -o $(TMPPREFIX).tmp_vmlinux1 + @$(KALLSYMS) $(TMPPREFIX).tmp_vmlinux1 > $(TMPPREFIX).tmp_kallsyms1.o + @echo kallsyms pass 2 + @$(LD_VMLINUX) $(TMPPREFIX).tmp_kallsyms1.o -o $(TMPPREFIX).tmp_vmlinux2 + @$(KALLSYMS) $(TMPPREFIX).tmp_vmlinux2 > $(TMPPREFIX).tmp_kallsyms2.o + @echo kallsyms pass 3 + @$(LD_VMLINUX) $(TMPPREFIX).tmp_kallsyms2.o -o $(TMPPREFIX).tmp_vmlinux3 + @$(KALLSYMS) $(TMPPREFIX).tmp_vmlinux3 > $(TMPPREFIX).tmp_kallsyms3.o +endif + $(LD_VMLINUX) $(LD_VMLINUX_KALLSYMS) -o $(TMPPREFIX)vmlinux +ifneq ($(TMPPREFIX),) + mv $(TMPPREFIX)vmlinux vmlinux +endif + $(NM) vmlinux | grep -v '\(compiled\)\|\(\.o$$\)\|\( [aUw] \)\|\(\.\.ng$$\)\|\(LASH[RL]DI\)' | sort > System.map + @rm -f $(TMPPREFIX).tmp_vmlinux* $(TMPPREFIX).tmp_kallsyms* + +symlinks: + rm -f include/asm + ( cd include ; ln -sf asm-$(ARCH) asm) + @if [ ! -d include/linux/modules ]; then \ + mkdir include/linux/modules; \ + fi + +oldconfig: symlinks + $(CONFIG_SHELL) scripts/Configure -d arch/$(ARCH)/config.in + +xconfig: symlinks + $(MAKE) -C scripts kconfig.tk + wish -f scripts/kconfig.tk + +menuconfig: include/linux/version.h symlinks + $(MAKE) -C scripts/lxdialog all + $(CONFIG_SHELL) scripts/Menuconfig arch/$(ARCH)/config.in + +config: symlinks + $(CONFIG_SHELL) scripts/Configure arch/$(ARCH)/config.in + +include/config/MARKER: scripts/split-include include/linux/autoconf.h + scripts/split-include include/linux/autoconf.h include/config + @ touch include/config/MARKER + +linuxsubdirs: $(patsubst %, _dir_%, $(SUBDIRS)) + +$(patsubst %, _dir_%, $(SUBDIRS)) : dummy include/linux/version.h include/config/MARKER + $(MAKE) CFLAGS="$(CFLAGS) $(CFLAGS_KERNEL)" -C $(patsubst _dir_%, %, $@) + +$(TOPDIR)/include/linux/version.h: include/linux/version.h +$(TOPDIR)/include/linux/compile.h: include/linux/compile.h + +newversion: + . scripts/mkversion > .tmpversion + @mv -f .tmpversion .version + +include/linux/compile.h: $(CONFIGURATION) include/linux/version.h newversion + @echo -n \#define UTS_VERSION \"\#`cat .version` > .ver + @if [ -n "$(CONFIG_SMP)" ] ; then echo -n " SMP" >> .ver; fi + @if [ -f .name ]; then echo -n \-`cat .name` >> .ver; fi + @echo ' '`date`'"' >> .ver + @echo \#define LINUX_COMPILE_TIME \"`date +%T`\" >> .ver + @echo \#define LINUX_COMPILE_BY \"`whoami`\" >> .ver + @echo \#define LINUX_COMPILE_HOST \"`hostname`\" >> .ver + @if [ -x /bin/dnsdomainname ]; then \ + echo \#define LINUX_COMPILE_DOMAIN \"`dnsdomainname`\"; \ + elif [ -x /bin/domainname ]; then \ + echo \#define LINUX_COMPILE_DOMAIN \"`domainname`\"; \ + else \ + echo \#define LINUX_COMPILE_DOMAIN ; \ + fi >> .ver + @echo \#define LINUX_COMPILER \"`$(CC) $(CFLAGS) -v 2>&1 | tail -1`\" >> .ver + @mv -f .ver $@ + +include/linux/version.h: ./Makefile + @echo \#define UTS_RELEASE \"$(KERNELRELEASE)\" > .ver + @echo \#define LINUX_VERSION_CODE `expr $(VERSION) \\* 65536 + $(PATCHLEVEL) \\* 256 + $(SUBLEVEL)` >> .ver + @echo '#define KERNEL_VERSION(a,b,c) (((a) << 16) + ((b) << 8) + (c))' >>.ver + @mv -f .ver $@ + +init/version.o: init/version.c include/linux/compile.h include/config/MARKER + $(CC) $(CFLAGS) $(CFLAGS_KERNEL) -DUTS_MACHINE='"$(HW_ARCH)"' -c -o init/version.o init/version.c + +init/main.o: init/main.c include/config/MARKER + $(CC) $(CFLAGS) $(CFLAGS_KERNEL) $(PROFILING) -c -o $*.o $< + +fs lib mm ipc kernel drivers net: dummy + $(MAKE) CFLAGS="$(CFLAGS) $(CFLAGS_KERNEL)" $(subst $@, _dir_$@, $@) + +TAGS: dummy + etags `find include/asm-$(ARCH) -name '*.h'` + find include -type d \( -name "asm-*" -o -name config \) -prune -o -name '*.h' -print | xargs etags -a + find $(SUBDIRS) init -name '*.[ch]' | xargs etags -a + +# Exuberant ctags works better with -I +tags: dummy + CTAGSF=`ctags --version | grep -i exuberant >/dev/null && echo "-I __initdata,__exitdata,EXPORT_SYMBOL,EXPORT_SYMBOL_NOVERS"`; \ + ctags $$CTAGSF `find include/asm-$(ARCH) -name '*.h'` && \ + find include -type d \( -name "asm-*" -o -name config \) -prune -o -name '*.h' -print | xargs ctags $$CTAGSF -a && \ + find $(SUBDIRS) init -name '*.[ch]' | xargs ctags $$CTAGSF -a + +ifdef CONFIG_MODULES +ifdef CONFIG_MODVERSIONS +MODFLAGS += -DMODVERSIONS -include $(HPATH)/linux/modversions.h +endif + +.PHONY: modules +modules: $(patsubst %, _mod_%, $(SUBDIRS)) + +.PHONY: $(patsubst %, _mod_%, $(SUBDIRS)) +$(patsubst %, _mod_%, $(SUBDIRS)) : include/linux/version.h include/config/MARKER + $(MAKE) -C $(patsubst _mod_%, %, $@) CFLAGS="$(CFLAGS) $(MODFLAGS)" MAKING_MODULES=1 modules + +.PHONY: modules_install +modules_install: _modinst_ $(patsubst %, _modinst_%, $(SUBDIRS)) _modinst_post + +.PHONY: _modinst_ +_modinst_: + @rm -rf $(MODLIB)/kernel + @rm -f $(MODLIB)/build + @mkdir -p $(MODLIB)/kernel + @ln -s $(TOPDIR) $(MODLIB)/build + +# If System.map exists, run depmod. This deliberately does not have a +# dependency on System.map since that would run the dependency tree on +# vmlinux. This depmod is only for convenience to give the initial +# boot a modules.dep even before / is mounted read-write. However the +# boot script depmod is the master version. +ifeq "$(strip $(INSTALL_MOD_PATH))" "" +depmod_opts := +else +depmod_opts := -b $(INSTALL_MOD_PATH) -r +endif +.PHONY: _modinst_post +_modinst_post: _modinst_post_pcmcia +# if [ -r System.map ]; then $(DEPMOD) -ae -F System.map $(depmod_opts) $(KERNELRELEASE); fi + +# Backwards compatibilty symlinks for people still using old versions +# of pcmcia-cs with hard coded pathnames on insmod. Remove +# _modinst_post_pcmcia for kernel 2.4.1. +.PHONY: _modinst_post_pcmcia +_modinst_post_pcmcia: + cd $(MODLIB); \ + mkdir -p pcmcia; \ + find kernel -path '*/pcmcia/*' -name '*.o' | xargs -i -r ln -sf ../{} pcmcia + +.PHONY: $(patsubst %, _modinst_%, $(SUBDIRS)) +$(patsubst %, _modinst_%, $(SUBDIRS)) : + $(MAKE) -C $(patsubst _modinst_%, %, $@) modules_install + +# modules disabled.... + +else +modules modules_install: dummy + @echo + @echo "The present kernel configuration has modules disabled." + @echo "Type 'make config' and enable loadable module support." + @echo "Then build a kernel with module support enabled." + @echo + @exit 1 +endif + +clean: archclean + find . \( -name '*.[oas]' -o -name core -o -name '.*.flags' \) -type f -print \ + | grep -v lxdialog/ | xargs rm -f + rm -f $(CLEAN_FILES) + rm -rf $(CLEAN_DIRS) + $(MAKE) -C Documentation/DocBook clean + +mrproper: clean archmrproper + find . \( -size 0 -o -name .depend \) -type f -print | xargs rm -f + rm -f $(MRPROPER_FILES) + rm -rf $(MRPROPER_DIRS) + $(MAKE) -C Documentation/DocBook mrproper + +distclean: mrproper + rm -f core `find . \( -not -type d \) -and \ + \( -name '*.orig' -o -name '*.rej' -o -name '*~' \ + -o -name '*.bak' -o -name '#*#' -o -name '.*.orig' \ + -o -name '.*.rej' -o -name '.SUMS' -o -size 0 \) -type f -print` TAGS tags + +backup: mrproper + cd .. && tar cf - linux/ | gzip -9 > backup.gz + sync + +sgmldocs: + chmod 755 $(TOPDIR)/scripts/docgen + chmod 755 $(TOPDIR)/scripts/gen-all-syms + chmod 755 $(TOPDIR)/scripts/kernel-doc + $(MAKE) -C $(TOPDIR)/Documentation/DocBook books + +psdocs: sgmldocs + $(MAKE) -C Documentation/DocBook ps + +pdfdocs: sgmldocs + $(MAKE) -C Documentation/DocBook pdf + +htmldocs: sgmldocs + $(MAKE) -C Documentation/DocBook html + +sums: + find . -type f -print | sort | xargs sum > .SUMS + +dep-files: scripts/mkdep archdep include/linux/version.h + scripts/mkdep -- init/*.c > .depend + scripts/mkdep -- `find $(FINDHPATH) -name SCCS -prune -o -follow -name \*.h ! -name modversions.h -print` > .hdepend + $(MAKE) $(patsubst %,_sfdep_%,$(SUBDIRS)) _FASTDEP_ALL_SUB_DIRS="$(SUBDIRS)" +ifdef CONFIG_MODVERSIONS + $(MAKE) update-modverfile +endif + +ifdef CONFIG_MODVERSIONS +MODVERFILE := $(TOPDIR)/include/linux/modversions.h +else +MODVERFILE := +endif +export MODVERFILE + +depend dep: dep-files + +checkconfig: + find * -name '*.[hcS]' -type f -print | sort | xargs $(PERL) -w scripts/checkconfig.pl + +checkhelp: + find * -name [cC]onfig.in -print | sort | xargs $(PERL) -w scripts/checkhelp.pl + +checkincludes: + find * -name '*.[hcS]' -type f -print | sort | xargs $(PERL) -w scripts/checkincludes.pl + +ifdef CONFIGURATION +..$(CONFIGURATION): + @echo + @echo "You have a bad or nonexistent" .$(CONFIGURATION) ": running 'make" $(CONFIGURATION)"'" + @echo + $(MAKE) $(CONFIGURATION) + @echo + @echo "Successful. Try re-making (ignore the error that follows)" + @echo + exit 1 + +#dummy: ..$(CONFIGURATION) +dummy: + +else + +dummy: + +endif + +include Rules.make + +# +# This generates dependencies for the .h files. +# + +scripts/mkdep: scripts/mkdep.c + $(HOSTCC) $(HOSTCFLAGS) -o scripts/mkdep scripts/mkdep.c + +scripts/split-include: scripts/split-include.c + $(HOSTCC) $(HOSTCFLAGS) -o scripts/split-include scripts/split-include.c + +# +# RPM target +# +# If you do a make spec before packing the tarball you can rpm -ta it +# +spec: + . scripts/mkspec >kernel.spec + +# +# Build a tar ball, generate an rpm from it and pack the result +# There arw two bits of magic here +# 1) The use of /. to avoid tar packing just the symlink +# 2) Removing the .dep files as they have source paths in them that +# will become invalid +# +rpm: clean spec + find . \( -size 0 -o -name .depend -o -name .hdepend \) -type f -print | xargs rm -f + set -e; \ + cd $(TOPDIR)/.. ; \ + ln -sf $(TOPDIR) $(KERNELPATH) ; \ + tar -cvz --exclude CVS -f $(KERNELPATH).tar.gz $(KERNELPATH)/. ; \ + rm $(KERNELPATH) ; \ + cd $(TOPDIR) ; \ + . scripts/mkversion > .version ; \ + rpm -ta $(TOPDIR)/../$(KERNELPATH).tar.gz ; \ + rm $(TOPDIR)/../$(KERNELPATH).tar.gz diff --git a/xenolinux-2.4.16-sparse/arch/xeno/Makefile b/xenolinux-2.4.16-sparse/arch/xeno/Makefile new file mode 100644 index 0000000000..ffc378eda9 --- /dev/null +++ b/xenolinux-2.4.16-sparse/arch/xeno/Makefile @@ -0,0 +1,98 @@ +# +# xeno/Makefile +# +# This file is included by the global makefile so that you can add your own +# architecture-specific flags and dependencies. Remember to do have actions +# for "archclean" and "archdep" for cleaning up and making dependencies for +# this architecture +# +# This file is subject to the terms and conditions of the GNU General Public +# License. See the file "COPYING" in the main directory of this archive +# for more details. +# +# Copyright (C) 1994 by Linus Torvalds +# +# 19990713 Artur Skawina +# Added '-march' and '-mpreferred-stack-boundary' support +# + +LD=$(CROSS_COMPILE)ld -m elf_i386 +OBJCOPY=$(CROSS_COMPILE)objcopy -O binary -R .note -R .comment -S +LDFLAGS=-e stext +LINKFLAGS =-T $(TOPDIR)/arch/xeno/vmlinux.lds $(LDFLAGS) + +CFLAGS += -pipe + +# prevent gcc from keeping the stack 16 byte aligned +CFLAGS += $(shell if $(CC) -mpreferred-stack-boundary=2 -S -o /dev/null -xc /dev/null >/dev/null 2>&1; then echo "-mpreferred-stack-boundary=2"; fi) + +ifdef CONFIG_M686 +CFLAGS += -march=i686 +endif + +ifdef CONFIG_MPENTIUMIII +CFLAGS += -march=i686 +endif + +ifdef CONFIG_MPENTIUM4 +CFLAGS += -march=i686 +endif + +ifdef CONFIG_MK7 +CFLAGS += $(shell if $(CC) -march=athlon -S -o /dev/null -xc /dev/null >/dev/null 2>&1; then echo "-march=athlon"; else echo "-march=i686 -malign-functions=4"; fi) +endif + +HEAD := arch/xeno/kernel/head.o arch/xeno/kernel/init_task.o + +SUBDIRS += arch/xeno/kernel arch/xeno/mm arch/xeno/lib +SUBDIRS += arch/xeno/drivers/console arch/xeno/drivers/network +SUBDIRS += arch/xeno/drivers/dom0 arch/xeno/drivers/block + +CORE_FILES += arch/xeno/kernel/kernel.o arch/xeno/mm/mm.o +CORE_FILES += arch/xeno/drivers/console/con.o +CORE_FILES += arch/xeno/drivers/block/blk.o +CORE_FILES += arch/xeno/drivers/network/net.o +CORE_FILES += arch/xeno/drivers/dom0/dom0.o +LIBS := $(TOPDIR)/arch/xeno/lib/lib.a $(LIBS) $(TOPDIR)/arch/xeno/lib/lib.a + +arch/xeno/kernel: dummy + $(MAKE) linuxsubdirs SUBDIRS=arch/xeno/kernel + +arch/xeno/mm: dummy + $(MAKE) linuxsubdirs SUBDIRS=arch/xeno/mm + +arch/xeno/drivers/console: dummy + $(MAKE) linuxsubdirs SUBDIRS=arch/xeno/drivers/console + +arch/xeno/drivers/network: dummy + $(MAKE) linuxsubdirs SUBDIRS=arch/xeno/drivers/network + +arch/xeno/drivers/block: dummy + $(MAKE) linuxsubdirs SUBDIRS=arch/xeno/drivers/block + +arch/xeno/drivers/dom0: dummy + $(MAKE) linuxsubdirs SUBDIRS=arch/xeno/drivers/dom0 + +ifdef CONFIG_KDB +arch/xeno/kdb: dummy + $(MAKE) linuxsubdirs SUBDIRS=arch/xeno/kdb +endif + +MAKEBOOT = $(MAKE) -C arch/$(ARCH)/boot + +vmlinux: arch/xeno/vmlinux.lds + +FORCE: ; + +.PHONY: bzImage compressed clean archclean archmrproper archdep + +bzImage: vmlinux + @$(MAKEBOOT) image.gz + +archclean: + @$(MAKEBOOT) clean + +archmrproper: + +archdep: + @$(MAKEBOOT) dep diff --git a/xenolinux-2.4.16-sparse/arch/xeno/boot/Makefile b/xenolinux-2.4.16-sparse/arch/xeno/boot/Makefile new file mode 100644 index 0000000000..252daf50bf --- /dev/null +++ b/xenolinux-2.4.16-sparse/arch/xeno/boot/Makefile @@ -0,0 +1,22 @@ +# +# arch/xeno/boot/Makefile +# + +image.gz: image + gzip -f -9 < $< > $@ + +image: $(TOPDIR)/vmlinux + # Guest OS header -- first 8 bytes are identifier 'XenoGues'. + echo -e -n 'XenoGues' >$@ + # Guest OS header -- next 4 bytes are load address (0xC0000000). + echo -e -n '\000\000\000\300' >>$@ + $(OBJCOPY) $< image.body + # Guest OS header is immediately followed by raw OS image. + # Start address must be at byte 0. + cat image.body >>$@ + rm -f image.body + +dep: + +clean: + rm -f image image.gz diff --git a/xenolinux-2.4.16-sparse/arch/xeno/config.in b/xenolinux-2.4.16-sparse/arch/xeno/config.in new file mode 100644 index 0000000000..f6e48b4b81 --- /dev/null +++ b/xenolinux-2.4.16-sparse/arch/xeno/config.in @@ -0,0 +1,135 @@ +# +# For a description of the syntax of this configuration file, +# see Documentation/kbuild/config-language.txt. +# +mainmenu_name "Linux Kernel Configuration" + +define_bool CONFIG_XENO y + +define_bool CONFIG_X86 y +define_bool CONFIG_ISA y +define_bool CONFIG_SBUS n + +define_bool CONFIG_UID16 y + +mainmenu_option next_comment +comment 'Code maturity level options' +bool 'Prompt for development and/or incomplete code/drivers' CONFIG_EXPERIMENTAL +endmenu + +mainmenu_option next_comment +comment 'Loadable module support' +bool 'Enable loadable module support' CONFIG_MODULES +if [ "$CONFIG_MODULES" = "y" ]; then + bool ' Set version information on all module symbols' CONFIG_MODVERSIONS + bool ' Kernel module loader' CONFIG_KMOD +fi +endmenu + +mainmenu_option next_comment +comment 'Processor type and features' +choice 'Processor family' \ + "Pentium-Pro/Celeron/Pentium-II CONFIG_M686 \ + Pentium-III/Celeron(Coppermine) CONFIG_MPENTIUMIII \ + Pentium-4 CONFIG_MPENTIUM4 \ + Athlon/Duron/K7 CONFIG_MK7" Pentium-Pro + + define_bool CONFIG_X86_WP_WORKS_OK y + define_bool CONFIG_X86_INVLPG y + define_bool CONFIG_X86_CMPXCHG y + define_bool CONFIG_X86_XADD y + define_bool CONFIG_X86_BSWAP y + define_bool CONFIG_X86_POPAD_OK y + define_bool CONFIG_RWSEM_GENERIC_SPINLOCK n + define_bool CONFIG_RWSEM_XCHGADD_ALGORITHM y + + define_bool CONFIG_X86_GOOD_APIC y + define_bool CONFIG_X86_PGE y + define_bool CONFIG_X86_USE_PPRO_CHECKSUM y + define_bool CONFIG_X86_TSC y + +if [ "$CONFIG_M686" = "y" ]; then + define_int CONFIG_X86_L1_CACHE_SHIFT 5 +fi +if [ "$CONFIG_MPENTIUMIII" = "y" ]; then + define_int CONFIG_X86_L1_CACHE_SHIFT 5 +fi +if [ "$CONFIG_MPENTIUM4" = "y" ]; then + define_int CONFIG_X86_L1_CACHE_SHIFT 7 +fi +if [ "$CONFIG_MK7" = "y" ]; then + define_int CONFIG_X86_L1_CACHE_SHIFT 6 + define_bool CONFIG_X86_USE_3DNOW y +fi + +choice 'High Memory Support' \ + "off CONFIG_NOHIGHMEM \ + 4GB CONFIG_HIGHMEM4G \ + 64GB CONFIG_HIGHMEM64G" off +if [ "$CONFIG_HIGHMEM4G" = "y" ]; then + define_bool CONFIG_HIGHMEM y +fi +if [ "$CONFIG_HIGHMEM64G" = "y" ]; then + define_bool CONFIG_HIGHMEM y + define_bool CONFIG_X86_PAE y +fi + +#bool 'Symmetric multi-processing support' CONFIG_SMP +#if [ "$CONFIG_SMP" = "y" -a "$CONFIG_X86_CMPXCHG" = "y" ]; then +# define_bool CONFIG_HAVE_DEC_LOCK y +#fi +endmenu + +mainmenu_option next_comment +comment 'General setup' + +bool 'Networking support' CONFIG_NET + +bool 'System V IPC' CONFIG_SYSVIPC +bool 'BSD Process Accounting' CONFIG_BSD_PROCESS_ACCT +bool 'Sysctl support' CONFIG_SYSCTL +if [ "$CONFIG_PROC_FS" = "y" ]; then + choice 'Kernel core (/proc/kcore) format' \ + "ELF CONFIG_KCORE_ELF \ + A.OUT CONFIG_KCORE_AOUT" ELF +fi +tristate 'Kernel support for a.out binaries' CONFIG_BINFMT_AOUT +tristate 'Kernel support for ELF binaries' CONFIG_BINFMT_ELF +tristate 'Kernel support for MISC binaries' CONFIG_BINFMT_MISC + +endmenu + +if [ "$CONFIG_NET" = "y" ]; then + source net/Config.in +fi + +source drivers/block/Config.in +define_bool CONFIG_BLK_DEV_IDE_MODES n +define_bool CONFIG_BLK_DEV_HD n + +source fs/Config.in + +mainmenu_option next_comment +comment 'Kernel hacking' + +bool 'Kernel debugging' CONFIG_DEBUG_KERNEL +if [ "$CONFIG_DEBUG_KERNEL" != "n" ]; then + bool ' Debug high memory support' CONFIG_DEBUG_HIGHMEM + bool ' Debug memory allocations' CONFIG_DEBUG_SLAB + bool ' Memory mapped I/O debugging' CONFIG_DEBUG_IOVIRT + bool ' Magic SysRq key' CONFIG_MAGIC_SYSRQ + bool ' Spinlock debugging' CONFIG_DEBUG_SPINLOCK + bool ' Verbose BUG() reporting (adds 70K)' CONFIG_DEBUG_BUGVERBOSE + bool ' Built-in Kernel Debugger support' CONFIG_KDB + dep_tristate ' KDB modules' CONFIG_KDB_MODULES $CONFIG_KDB + if [ "$CONFIG_KDB" = "y" ]; then + bool ' KDB off by default' CONFIG_KDB_OFF + comment ' Load all symbols for debugging is required for KDB' + define_bool CONFIG_KALLSYMS y + else + bool ' Load all symbols for debugging' CONFIG_KALLSYMS + fi + bool ' Compile the kernel with frame pointers' CONFIG_FRAME_POINTER +fi + +endmenu diff --git a/xenolinux-2.4.16-sparse/arch/xeno/defconfig b/xenolinux-2.4.16-sparse/arch/xeno/defconfig new file mode 100644 index 0000000000..b278dec50f --- /dev/null +++ b/xenolinux-2.4.16-sparse/arch/xeno/defconfig @@ -0,0 +1,391 @@ +# +# Automatically generated make config: don't edit +# +CONFIG_X86=y +CONFIG_ISA=y +# CONFIG_SBUS is not set +CONFIG_UID16=y + +# +# Code maturity level options +# +# CONFIG_EXPERIMENTAL is not set + +# +# Loadable module support +# +CONFIG_MODULES=y +CONFIG_MODVERSIONS=y +CONFIG_KMOD=y + +# +# Processor type and features +# +# CONFIG_M386 is not set +# CONFIG_M486 is not set +# CONFIG_M586 is not set +# CONFIG_M586TSC is not set +# CONFIG_M586MMX is not set +CONFIG_M686=y +# CONFIG_MPENTIUMIII is not set +# CONFIG_MPENTIUM4 is not set +# CONFIG_MK6 is not set +# CONFIG_MK7 is not set +# CONFIG_MCRUSOE is not set +# CONFIG_MWINCHIPC6 is not set +# CONFIG_MWINCHIP2 is not set +# CONFIG_MWINCHIP3D is not set +# CONFIG_MCYRIXIII is not set +CONFIG_X86_WP_WORKS_OK=y +CONFIG_X86_INVLPG=y +CONFIG_X86_CMPXCHG=y +CONFIG_X86_XADD=y +CONFIG_X86_BSWAP=y +CONFIG_X86_POPAD_OK=y +# CONFIG_RWSEM_GENERIC_SPINLOCK is not set +CONFIG_RWSEM_XCHGADD_ALGORITHM=y +CONFIG_X86_L1_CACHE_SHIFT=5 +CONFIG_X86_TSC=y +CONFIG_X86_GOOD_APIC=y +CONFIG_X86_PGE=y +CONFIG_X86_USE_PPRO_CHECKSUM=y +# CONFIG_TOSHIBA is not set +# CONFIG_I8K is not set +# CONFIG_MICROCODE is not set +# CONFIG_X86_MSR is not set +# CONFIG_X86_CPUID is not set +CONFIG_NOHIGHMEM=y +# CONFIG_HIGHMEM4G is not set +# CONFIG_HIGHMEM64G is not set +# CONFIG_MATH_EMULATION is not set +# CONFIG_MTRR is not set +# CONFIG_SMP is not set +# CONFIG_X86_UP_APIC is not set + +# +# General setup +# +CONFIG_NET=y +# CONFIG_PCI is not set +# CONFIG_EISA is not set +# CONFIG_MCA is not set +# CONFIG_HOTPLUG is not set +# CONFIG_PCMCIA is not set +# CONFIG_HOTPLUG_PCI is not set +CONFIG_SYSVIPC=y +# CONFIG_BSD_PROCESS_ACCT is not set +CONFIG_SYSCTL=y +CONFIG_KCORE_ELF=y +# CONFIG_KCORE_AOUT is not set +CONFIG_BINFMT_AOUT=y +CONFIG_BINFMT_ELF=y +# CONFIG_BINFMT_MISC is not set +# CONFIG_PM is not set +# CONFIG_APM_IGNORE_USER_SUSPEND is not set +# CONFIG_APM_DO_ENABLE is not set +# CONFIG_APM_CPU_IDLE is not set +# CONFIG_APM_DISPLAY_BLANK is not set +# CONFIG_APM_RTC_IS_GMT is not set +# CONFIG_APM_ALLOW_INTS is not set +# CONFIG_APM_REAL_MODE_POWER_OFF is not set + +# +# Memory Technology Devices (MTD) +# +# CONFIG_MTD is not set + +# +# Parallel port support +# +# CONFIG_PARPORT is not set + +# +# Plug and Play configuration +# +# CONFIG_PNP is not set + +# +# Block devices +# +# CONFIG_BLK_DEV_FD is not set +# CONFIG_BLK_DEV_XD is not set +CONFIG_BLK_DEV_LOOP=y +CONFIG_BLK_DEV_NBD=y +CONFIG_BLK_DEV_RAM=y +CONFIG_BLK_DEV_RAM_SIZE=4096 +CONFIG_BLK_DEV_INITRD=y + +# +# Multi-device support (RAID and LVM) +# +# CONFIG_MD is not set + +# +# Networking options +# +CONFIG_PACKET=y +CONFIG_PACKET_MMAP=y +CONFIG_NETLINK=y +CONFIG_RTNETLINK=y +# CONFIG_NETLINK_DEV is not set +# CONFIG_NETFILTER is not set +CONFIG_FILTER=y +CONFIG_UNIX=y +CONFIG_INET=y +# CONFIG_IP_MULTICAST is not set +# CONFIG_IP_ADVANCED_ROUTER is not set +CONFIG_IP_PNP=y +# CONFIG_IP_PNP_DHCP is not set +# CONFIG_IP_PNP_BOOTP is not set +# CONFIG_IP_PNP_RARP is not set +# CONFIG_NET_IPIP is not set +# CONFIG_NET_IPGRE is not set +# CONFIG_INET_ECN is not set +# CONFIG_SYN_COOKIES is not set + +# +# +# +# CONFIG_IPX is not set +# CONFIG_ATALK is not set +# CONFIG_DECNET is not set +# CONFIG_BRIDGE is not set + +# +# QoS and/or fair queueing +# +# CONFIG_NET_SCHED is not set + +# +# Telephony Support +# +# CONFIG_PHONE is not set + +# +# ATA/IDE/MFM/RLL support +# +# CONFIG_IDE is not set +# CONFIG_BLK_DEV_IDE_MODES is not set +# CONFIG_BLK_DEV_HD is not set + +# +# SCSI support +# +# CONFIG_SCSI is not set + +# +# Fusion MPT device support +# +# CONFIG_FUSION_BOOT is not set +# CONFIG_FUSION_ISENSE is not set +# CONFIG_FUSION_CTL is not set +# CONFIG_FUSION_LAN is not set + +# +# I2O device support +# +# CONFIG_I2O is not set + +# +# Network device support +# +# CONFIG_NETDEVICES is not set + +# +# Amateur Radio support +# +# CONFIG_HAMRADIO is not set + +# +# IrDA (infrared) support +# +# CONFIG_IRDA is not set + +# +# ISDN subsystem +# +# CONFIG_ISDN is not set + +# +# Old CD-ROM drivers (not SCSI, not IDE) +# +# CONFIG_CD_NO_IDESCSI is not set + +# +# Input core support +# +# CONFIG_INPUT is not set +CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024 +CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768 + +# +# Character devices +# +# CONFIG_VT is not set +# CONFIG_SERIAL is not set +# CONFIG_SERIAL_NONSTANDARD is not set +# CONFIG_UNIX98_PTYS is not set + +# +# I2C support +# +# CONFIG_I2C is not set + +# +# Mice +# +# CONFIG_BUSMOUSE is not set +# CONFIG_MOUSE is not set + +# +# Joysticks +# +# CONFIG_INPUT_GAMEPORT is not set + +# +# Input core support is needed for gameports +# + +# +# Input core support is needed for joysticks +# +# CONFIG_QIC02_TAPE is not set + +# +# Watchdog Cards +# +# CONFIG_WATCHDOG is not set +# CONFIG_NVRAM is not set +# CONFIG_RTC is not set +# CONFIG_DTLK is not set +# CONFIG_R3964 is not set +# CONFIG_APPLICOM is not set + +# +# Ftape, the floppy tape device driver +# +# CONFIG_FTAPE is not set +# CONFIG_AGP is not set +# CONFIG_DRM is not set +# CONFIG_MWAVE is not set + +# +# Multimedia devices +# +# CONFIG_VIDEO_DEV is not set + +# +# File systems +# +# CONFIG_QUOTA is not set +CONFIG_AUTOFS_FS=y +CONFIG_AUTOFS4_FS=y +# CONFIG_EXT3_FS is not set +# CONFIG_JBD is not set +# CONFIG_FAT_FS is not set +# CONFIG_CRAMFS is not set +# CONFIG_TMPFS is not set +# CONFIG_RAMFS is not set +# CONFIG_ISO9660_FS is not set +# CONFIG_MINIX_FS is not set +# CONFIG_VXFS_FS is not set +# CONFIG_NTFS_FS is not set +# CONFIG_HPFS_FS is not set +CONFIG_PROC_FS=y +# CONFIG_ROMFS_FS is not set +CONFIG_EXT2_FS=y +# CONFIG_SYSV_FS is not set +# CONFIG_UDF_FS is not set +# CONFIG_UFS_FS is not set + +# +# Network File Systems +# +# CONFIG_CODA_FS is not set +CONFIG_NFS_FS=y +CONFIG_NFS_V3=y +CONFIG_ROOT_NFS=y +CONFIG_NFSD=y +CONFIG_NFSD_V3=y +CONFIG_SUNRPC=y +CONFIG_LOCKD=y +CONFIG_LOCKD_V4=y +# CONFIG_SMB_FS is not set +# CONFIG_NCP_FS is not set +# CONFIG_ZISOFS_FS is not set +# CONFIG_ZLIB_FS_INFLATE is not set + +# +# Partition Types +# +# CONFIG_PARTITION_ADVANCED is not set +CONFIG_MSDOS_PARTITION=y +# CONFIG_SMB_NLS is not set +# CONFIG_NLS is not set + +# +# Sound +# +# CONFIG_SOUND is not set + +# +# USB support +# + +# +# USB Controllers +# + +# +# USB Device Class drivers +# + +# +# USB Human Interface Devices (HID) +# + +# +# Input core support is needed for USB HID +# + +# +# USB Imaging devices +# + +# +# USB Multimedia devices +# + +# +# Video4Linux support is needed for USB Multimedia device support +# + +# +# USB Network adaptors +# + +# +# USB port drivers +# + +# +# USB Serial Converter support +# + +# +# USB Miscellaneous drivers +# + +# +# Kernel hacking +# +CONFIG_DEBUG_KERNEL=y +CONFIG_DEBUG_HIGHMEM=y +CONFIG_DEBUG_SLAB=y +CONFIG_DEBUG_IOVIRT=y +# CONFIG_MAGIC_SYSRQ is not set +CONFIG_DEBUG_SPINLOCK=y +CONFIG_DEBUG_BUGVERBOSE=y +# CONFIG_KDB is not set +CONFIG_KALLSYMS=y +CONFIG_FRAME_POINTER=y diff --git a/xenolinux-2.4.16-sparse/arch/xeno/drivers/block/Makefile b/xenolinux-2.4.16-sparse/arch/xeno/drivers/block/Makefile new file mode 100644 index 0000000000..9361a01ec7 --- /dev/null +++ b/xenolinux-2.4.16-sparse/arch/xeno/drivers/block/Makefile @@ -0,0 +1,3 @@ +O_TARGET := blk.o +obj-y := block.o +include $(TOPDIR)/Rules.make diff --git a/xenolinux-2.4.16-sparse/arch/xeno/drivers/block/block.c b/xenolinux-2.4.16-sparse/arch/xeno/drivers/block/block.c new file mode 100644 index 0000000000..bf7d416dff --- /dev/null +++ b/xenolinux-2.4.16-sparse/arch/xeno/drivers/block/block.c @@ -0,0 +1,392 @@ +/****************************************************************************** + * block.c + * + * Virtual block driver for XenoLinux. + * + * adapted from network.c + */ + +#include +#include + +#include +#include +#include +#include +#include + +#include + +#ifdef UNDEFINED + +#include +#include +#include +#include +#include + +#include + +#define BLK_TX_IRQ _EVENT_BLK_TX +#define BLK_RX_IRQ _EVENT_BLK_RX + +#define TX_MAX_ENTRIES (TX_RING_SIZE - 2) +#define RX_MAX_ENTRIES (RX_RING_SIZE - 2) + +#define TX_RING_INC(_i) (((_i)+1) & (TX_RING_SIZE-1)) +#define RX_RING_INC(_i) (((_i)+1) & (RX_RING_SIZE-1)) +#define TX_RING_ADD(_i,_j) (((_i)+(_j)) & (TX_RING_SIZE-1)) +#define RX_RING_ADD(_i,_j) (((_i)+(_j)) & (RX_RING_SIZE-1)) + +#define RX_BUF_SIZE 1600 /* Ethernet MTU + plenty of slack! */ + + + +int network_probe(struct net_device *dev); +static int network_open(struct net_device *dev); +static int network_start_xmit(struct sk_buff *skb, struct net_device *dev); +static int network_close(struct net_device *dev); +static struct net_device_stats *network_get_stats(struct net_device *dev); +static void network_rx_int(int irq, void *dev_id, struct pt_regs *ptregs); +static void network_tx_int(int irq, void *dev_id, struct pt_regs *ptregs); +static void network_tx_buf_gc(struct net_device *dev); +static void network_alloc_rx_buffers(struct net_device *dev); +static void network_free_rx_buffers(struct net_device *dev); + +static struct net_device dev_net_xeno; + +/* + * RX RING: RX_IDX <= rx_cons <= rx_prod + * TX RING: TX_IDX <= tx_cons <= tx_prod + * (*_IDX allocated privately here, *_cons & *_prod shared with hypervisor) + */ +struct net_private +{ + struct net_device_stats stats; + struct sk_buff **tx_skb_ring; + struct sk_buff **rx_skb_ring; + atomic_t tx_entries; + unsigned int rx_idx, tx_idx, tx_full; + net_ring_t *net_ring; + spinlock_t tx_lock; +}; + + +int __init network_probe(struct net_device *dev) +{ + SET_MODULE_OWNER(dev); + + memcpy(dev->dev_addr, "\xFE\xFD\x00\x00\x00\x00", 6); + + dev->open = network_open; + dev->hard_start_xmit = network_start_xmit; + dev->stop = network_close; + dev->get_stats = network_get_stats; + + ether_setup(dev); + + return 0; +} + + +static int network_open(struct net_device *dev) +{ + struct net_private *np; + int error; + + np = kmalloc(sizeof(struct net_private), GFP_KERNEL); + if ( np == NULL ) + { + printk(KERN_WARNING "%s: No memory for private data\n", dev->name); + return -ENOMEM; + } + memset(np, 0, sizeof(struct net_private)); + dev->priv = np; + + spin_lock_init(&np->tx_lock); + + atomic_set(&np->tx_entries, 0); + + np->net_ring = start_info.net_rings; + np->net_ring->tx_prod = np->net_ring->tx_cons = np->net_ring->tx_event = 0; + np->net_ring->rx_prod = np->net_ring->rx_cons = np->net_ring->rx_event = 0; + np->net_ring->tx_ring = NULL; + np->net_ring->rx_ring = NULL; + + np->tx_skb_ring = kmalloc(TX_RING_SIZE * sizeof(struct sk_buff *), + GFP_KERNEL); + np->rx_skb_ring = kmalloc(RX_RING_SIZE * sizeof(struct sk_buff *), + GFP_KERNEL); + np->net_ring->tx_ring = kmalloc(TX_RING_SIZE * sizeof(tx_entry_t), + GFP_KERNEL); + np->net_ring->rx_ring = kmalloc(RX_RING_SIZE * sizeof(rx_entry_t), + GFP_KERNEL); + if ( (np->tx_skb_ring == NULL) || (np->rx_skb_ring == NULL) || + (np->net_ring->tx_ring == NULL) || (np->net_ring->rx_ring == NULL) ) + { + printk(KERN_WARNING "%s; Could not allocate ring memory\n", dev->name); + error = -ENOBUFS; + goto fail; + } + + network_alloc_rx_buffers(dev); + + error = request_irq(NET_RX_IRQ, network_rx_int, 0, "net-rx", dev); + if ( error ) + { + printk(KERN_WARNING "%s: Could not allocate receive interrupt\n", + dev->name); + goto fail; + } + + error = request_irq(NET_TX_IRQ, network_tx_int, 0, "net-tx", dev); + if ( error ) + { + printk(KERN_WARNING "%s: Could not allocate transmit interrupt\n", + dev->name); + free_irq(NET_RX_IRQ, dev); + goto fail; + } + + printk("XenoLinux Virtual Network Driver installed as %s\n", dev->name); + + netif_start_queue(dev); + + MOD_INC_USE_COUNT; + + return 0; + + fail: + if ( np->net_ring->rx_ring ) kfree(np->net_ring->rx_ring); + if ( np->net_ring->tx_ring ) kfree(np->net_ring->tx_ring); + if ( np->rx_skb_ring ) kfree(np->rx_skb_ring); + if ( np->tx_skb_ring ) kfree(np->tx_skb_ring); + kfree(np); + return error; +} + + +static void network_tx_buf_gc(struct net_device *dev) +{ + unsigned int i; + struct net_private *np = dev->priv; + struct sk_buff *skb; + unsigned long flags; + + spin_lock_irqsave(&np->tx_lock, flags); + + for ( i = np->tx_idx; i != np->net_ring->tx_cons; i = TX_RING_INC(i) ) + { + skb = np->tx_skb_ring[i]; + dev_kfree_skb_any(skb); + atomic_dec(&np->tx_entries); + } + + np->tx_idx = i; + + if ( np->tx_full && (atomic_read(&np->tx_entries) < TX_MAX_ENTRIES) ) + { + np->tx_full = 0; + netif_wake_queue(dev); + } + + spin_unlock_irqrestore(&np->tx_lock, flags); +} + + +static void network_alloc_rx_buffers(struct net_device *dev) +{ + unsigned int i; + struct net_private *np = dev->priv; + struct sk_buff *skb; + unsigned int end = RX_RING_ADD(np->rx_idx, RX_MAX_ENTRIES); + + for ( i = np->net_ring->rx_prod; i != end; i = RX_RING_INC(i) ) + { + skb = dev_alloc_skb(RX_BUF_SIZE); + if ( skb == NULL ) break; + skb->dev = dev; + skb_reserve(skb, 2); /* word align the IP header */ + np->rx_skb_ring[i] = skb; + np->net_ring->rx_ring[i].addr = (unsigned long)skb->data; + np->net_ring->rx_ring[i].size = RX_BUF_SIZE - 16; /* arbitrary */ + } + + np->net_ring->rx_prod = i; + + np->net_ring->rx_event = RX_RING_INC(np->rx_idx); + + HYPERVISOR_net_update(); +} + + +static void network_free_rx_buffers(struct net_device *dev) +{ + unsigned int i; + struct net_private *np = dev->priv; + struct sk_buff *skb; + + for ( i = np->rx_idx; i != np->net_ring->rx_prod; i = RX_RING_INC(i) ) + { + skb = np->rx_skb_ring[i]; + dev_kfree_skb(skb); + } +} + + +static int network_start_xmit(struct sk_buff *skb, struct net_device *dev) +{ + unsigned int i; + struct net_private *np = (struct net_private *)dev->priv; + + if ( np->tx_full ) + { + printk(KERN_WARNING "%s: full queue wasn't stopped!\n", dev->name); + netif_stop_queue(dev); + return -ENOBUFS; + } + + i = np->net_ring->tx_prod; + np->tx_skb_ring[i] = skb; + np->net_ring->tx_ring[i].addr = (unsigned long)skb->data; + np->net_ring->tx_ring[i].size = skb->len; + np->net_ring->tx_prod = TX_RING_INC(i); + atomic_inc(&np->tx_entries); + + np->stats.tx_bytes += skb->len; + np->stats.tx_packets++; + + spin_lock_irq(&np->tx_lock); + if ( atomic_read(&np->tx_entries) >= TX_MAX_ENTRIES ) + { + np->tx_full = 1; + netif_stop_queue(dev); + np->net_ring->tx_event = TX_RING_ADD(np->tx_idx, + atomic_read(&np->tx_entries) >> 1); + } + else + { + /* Avoid unnecessary tx interrupts. */ + np->net_ring->tx_event = TX_RING_INC(np->net_ring->tx_prod); + } + spin_unlock_irq(&np->tx_lock); + + /* Must do this after setting tx_event: race with updates of tx_cons. */ + network_tx_buf_gc(dev); + + HYPERVISOR_net_update(); + + return 0; +} + + +static void network_rx_int(int irq, void *dev_id, struct pt_regs *ptregs) +{ + unsigned int i; + struct net_device *dev = (struct net_device *)dev_id; + struct net_private *np = dev->priv; + struct sk_buff *skb; + + again: + for ( i = np->rx_idx; i != np->net_ring->rx_cons; i = RX_RING_INC(i) ) + { + skb = np->rx_skb_ring[i]; + skb_put(skb, np->net_ring->rx_ring[i].size); + skb->protocol = eth_type_trans(skb, dev); + np->stats.rx_packets++; + np->stats.rx_bytes += np->net_ring->rx_ring[i].size; + netif_rx(skb); + dev->last_rx = jiffies; + } + + np->rx_idx = i; + + network_alloc_rx_buffers(dev); + + /* Deal with hypervisor racing our resetting of rx_event. */ + smp_mb(); + if ( np->net_ring->rx_cons != i ) goto again; +} + + +static void network_tx_int(int irq, void *dev_id, struct pt_regs *ptregs) +{ + struct net_device *dev = (struct net_device *)dev_id; + network_tx_buf_gc(dev); +} + + +static int network_close(struct net_device *dev) +{ + struct net_private *np = dev->priv; + + netif_stop_queue(dev); + free_irq(NET_RX_IRQ, dev); + free_irq(NET_TX_IRQ, dev); + network_free_rx_buffers(dev); + kfree(np->net_ring->rx_ring); + kfree(np->net_ring->tx_ring); + kfree(np->rx_skb_ring); + kfree(np->tx_skb_ring); + kfree(np); + MOD_DEC_USE_COUNT; + return 0; +} + + +static struct net_device_stats *network_get_stats(struct net_device *dev) +{ + struct net_private *np = (struct net_private *)dev->priv; + return &np->stats; +} + + +static int __init init_module(void) +{ + memset(&dev_net_xeno, 0, sizeof(dev_net_xeno)); + strcpy(dev_net_xeno.name, "eth%d"); + dev_net_xeno.init = network_probe; + return (register_netdev(&dev_net_xeno) != 0) ? -EIO : 0; +} + + +static void __exit cleanup_module(void) +{ + unregister_netdev(&dev_net_xeno); +} + +#endif /* UNDEFINED */ + + +static void block_initialize(void) +{ + blk_ring_t *blk_ring = start_info.blk_ring; + + if ( blk_ring == NULL ) return; + + blk_ring->tx_prod = blk_ring->tx_cons = blk_ring->tx_event = 0; + blk_ring->rx_prod = blk_ring->rx_cons = blk_ring->rx_event = 0; + blk_ring->tx_ring = NULL; + blk_ring->rx_ring = NULL; +} + + +/* + * block_setup initialized the xeno block device driver + */ + +static int __init init_module(void) +{ + block_initialize(); + printk("XenoLinux Virtual Block Device Driver installed\n"); + return 0; +} + +static void __exit cleanup_module(void) +{ + printk("XenoLinux Virtual Block Device Driver uninstalled\n"); +} + +module_init(init_module); +module_exit(cleanup_module); + diff --git a/xenolinux-2.4.16-sparse/arch/xeno/drivers/console/Makefile b/xenolinux-2.4.16-sparse/arch/xeno/drivers/console/Makefile new file mode 100644 index 0000000000..5a0e7b36b1 --- /dev/null +++ b/xenolinux-2.4.16-sparse/arch/xeno/drivers/console/Makefile @@ -0,0 +1,3 @@ +O_TARGET := con.o +obj-y := console.o +include $(TOPDIR)/Rules.make diff --git a/xenolinux-2.4.16-sparse/arch/xeno/drivers/console/console.c b/xenolinux-2.4.16-sparse/arch/xeno/drivers/console/console.c new file mode 100644 index 0000000000..11548f877e --- /dev/null +++ b/xenolinux-2.4.16-sparse/arch/xeno/drivers/console/console.c @@ -0,0 +1,204 @@ +/****************************************************************************** + * console.c + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +/******************** Kernel console driver ********************************/ + +static void kconsole_write(struct console *co, const char *s, unsigned count) +{ +#define STRLEN 256 + static char str[STRLEN]; + static int pos = 0; + int len; + + /* We buffer output until we see a newline, or until the buffer is full. */ + while ( count != 0 ) + { + len = ((STRLEN - pos) > count) ? count : STRLEN - pos; + memcpy(str + pos, s, len); + pos += len; + s += len; + count -= len; + if ( (pos == STRLEN) || (str[pos-1] == '\n') ) + { + (void)HYPERVISOR_console_write(str, pos); + pos = 0; + } + } +} + +static kdev_t kconsole_device(struct console *c) +{ + /* + * This is the magic that binds our "struct console" to our + * "tty_struct", defined below. + */ + return MKDEV(TTY_MAJOR, 0); +} + +static struct console kconsole_info = { + name: "xenocon", + write: kconsole_write, + device: kconsole_device, + flags: CON_PRINTBUFFER, + index: -1, +}; + +void xeno_console_init(void) +{ + register_console(&kconsole_info); +} + + +/******************** Initial /dev/console *********************************/ + + +static struct tty_driver console_driver; +static int console_refcount; +static struct tty_struct *console_table[1]; +static struct termios *console_termios[1]; +static struct termios *console_termios_locked[1]; + +static int console_write_room(struct tty_struct *tty) +{ + return INT_MAX; +} + +static int console_chars_in_buffer(struct tty_struct *tty) +{ + return 0; +} + +static inline int console_xmit(int ch) +{ + char _ch = ch; + kconsole_write(NULL, &_ch, 1); + return 1; +} + +static int console_write(struct tty_struct *tty, int from_user, + const u_char * buf, int count) +{ + int i; + + if ( from_user && verify_area(VERIFY_READ, buf, count) ) + { + return -EINVAL; + } + + for ( i = 0; i < count; i++ ) + { + char ch; + if ( from_user ) + { + __get_user(ch, buf + i); + } + else + { + ch = buf[i]; + } + console_xmit(ch); + } + return i; +} + +static void console_put_char(struct tty_struct *tty, u_char ch) +{ + console_xmit(ch); +} + +static int console_open(struct tty_struct *tty, struct file *filp) +{ + int line; + + MOD_INC_USE_COUNT; + line = MINOR(tty->device) - tty->driver.minor_start; + if ( line ) + { + MOD_DEC_USE_COUNT; + return -ENODEV; + } + + tty->driver_data = NULL; + + return 0; +} + +static void console_close(struct tty_struct *tty, struct file *filp) +{ + MOD_DEC_USE_COUNT; +} + +static int __init console_ini(void) +{ + memset(&console_driver, 0, sizeof(struct tty_driver)); + console_driver.magic = TTY_DRIVER_MAGIC; + console_driver.driver_name = "xeno_console"; + console_driver.name = "console"; + console_driver.major = TTY_MAJOR; + console_driver.minor_start = 0; + console_driver.num = 1; + console_driver.type = TTY_DRIVER_TYPE_SERIAL; + console_driver.subtype = SERIAL_TYPE_NORMAL; + console_driver.init_termios = tty_std_termios; + console_driver.flags = TTY_DRIVER_REAL_RAW; + console_driver.refcount = &console_refcount; + console_driver.table = console_table; + console_driver.termios = console_termios; + console_driver.termios_locked = console_termios_locked; + /* Functions */ + console_driver.open = console_open; + console_driver.close = console_close; + console_driver.write = console_write; + console_driver.write_room = console_write_room; + console_driver.put_char = console_put_char; + console_driver.chars_in_buffer = console_chars_in_buffer; + + if ( tty_register_driver(&console_driver) ) + { + printk(KERN_ERR "Couldn't register Xeno console driver\n"); + } + else + { + printk("Xeno console successfully installed\n"); + } + + return 0; +} + +static void __exit console_fin(void) +{ + int ret; + + ret = tty_unregister_driver(&console_driver); + if ( ret != 0 ) + { + printk(KERN_ERR "Unable to unregister Xeno console driver: %d\n", ret); + } +} + +module_init(console_ini); +module_exit(console_fin); + diff --git a/xenolinux-2.4.16-sparse/arch/xeno/drivers/dom0/Makefile b/xenolinux-2.4.16-sparse/arch/xeno/drivers/dom0/Makefile new file mode 100644 index 0000000000..38ba61772a --- /dev/null +++ b/xenolinux-2.4.16-sparse/arch/xeno/drivers/dom0/Makefile @@ -0,0 +1,3 @@ +O_TARGET := dom0.o +obj-y := dom0_core.o +include $(TOPDIR)/Rules.make diff --git a/xenolinux-2.4.16-sparse/arch/xeno/drivers/dom0/dom0_core.c b/xenolinux-2.4.16-sparse/arch/xeno/drivers/dom0/dom0_core.c new file mode 100644 index 0000000000..7121f89a09 --- /dev/null +++ b/xenolinux-2.4.16-sparse/arch/xeno/drivers/dom0/dom0_core.c @@ -0,0 +1,129 @@ +/****************************************************************************** + * dom0_core.c + * + * Interface to privileged domain-0 commands. + * + * Copyright (c) 2002, K A Fraser + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "dom0_ops.h" + +static struct proc_dir_entry *proc_dom0; + +static unsigned char readbuf[1204]; + +static int dom0_read_proc(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + strcpy(page, readbuf); + *readbuf = '\0'; + *eof = 1; + *start = page; + return strlen(page); +} + + +static int dom0_write_proc(struct file *file, const char *buffer, + u_long count, void *data) +{ + dom0_op_t op; + unsigned char c; + unsigned int val; + unsigned char result[20]; + int len = count, ret; + + while ( count ) + { + c = *buffer++; + count--; + val = 0; + if ( c == 'N' ) + { + op.cmd = DOM0_NEWDOMAIN; + while ( count && ((c = *buffer) >= '0') && (c <= '9') ) + { + val *= 10; + val += c - '0'; + buffer++; count--; + } + op.u.newdomain.memory_kb = val; + val = 0; + if (count && (*buffer == ',')) + { + buffer++; count--; + while ( count && ((c = *buffer) >= '0') && (c <= '9') ) + { + val *= 10; + val += c - '0'; + buffer++; count--; + } + } + else + { + val = 1; // default to 1 vif. + } + op.u.newdomain.num_vifs = val; + ret = HYPERVISOR_dom0_op(&op); + } + else if ( c == 'K' ) + { + op.cmd = DOM0_KILLDOMAIN; + while ( count && ((c = *buffer) >= '0') && (c <= '9') ) + { + val *= 10; + val += c - '0'; + buffer++; count--; + } + op.u.killdomain.domain = val; + ret = HYPERVISOR_dom0_op(&op); + } + else + { + ret = -ENOSYS; + } + + sprintf(result, "%d\n", ret); + strcat(readbuf, result); + + while ( count-- && (*buffer++ != '\n') ) continue; + } + + return len; +} + + +static int __init init_module(void) +{ + *readbuf = '\0'; + proc_dom0 = create_proc_entry ("dom0", 0600, &proc_root); + if ( proc_dom0 != NULL ) + { + proc_dom0->owner = THIS_MODULE; + proc_dom0->nlink = 1; + proc_dom0->read_proc = dom0_read_proc; + proc_dom0->write_proc = dom0_write_proc; + printk("Successfully installed domain-0 control interface\n"); + } + return 0; +} + + +static void __exit cleanup_module(void) +{ + if ( proc_dom0 == NULL ) return; + remove_proc_entry("dom0", &proc_root); + proc_dom0 = NULL; +} + + +module_init(init_module); +module_exit(cleanup_module); diff --git a/xenolinux-2.4.16-sparse/arch/xeno/drivers/dom0/dom0_ops.h b/xenolinux-2.4.16-sparse/arch/xeno/drivers/dom0/dom0_ops.h new file mode 100644 index 0000000000..118202aa2f --- /dev/null +++ b/xenolinux-2.4.16-sparse/arch/xeno/drivers/dom0/dom0_ops.h @@ -0,0 +1,32 @@ +/****************************************************************************** + * dom0_ops.h + * + * Process command requests from domain-0 guest OS. + * + * Copyright (c) 2002, K A Fraser + */ + +#define DOM0_NEWDOMAIN 0 +#define DOM0_KILLDOMAIN 1 + +typedef struct dom0_newdomain_st +{ + unsigned int memory_kb; + unsigned int num_vifs; +} dom0_newdomain_t; + +typedef struct dom0_killdomain_st +{ + unsigned int domain; +} dom0_killdomain_t; + +typedef struct dom0_op_st +{ + unsigned long cmd; + union + { + dom0_newdomain_t newdomain; + dom0_killdomain_t killdomain; + } + u; +} dom0_op_t; diff --git a/xenolinux-2.4.16-sparse/arch/xeno/drivers/network/Makefile b/xenolinux-2.4.16-sparse/arch/xeno/drivers/network/Makefile new file mode 100644 index 0000000000..b44a288a5b --- /dev/null +++ b/xenolinux-2.4.16-sparse/arch/xeno/drivers/network/Makefile @@ -0,0 +1,3 @@ +O_TARGET := net.o +obj-y := network.o +include $(TOPDIR)/Rules.make diff --git a/xenolinux-2.4.16-sparse/arch/xeno/drivers/network/network.c b/xenolinux-2.4.16-sparse/arch/xeno/drivers/network/network.c new file mode 100644 index 0000000000..7e1825f873 --- /dev/null +++ b/xenolinux-2.4.16-sparse/arch/xeno/drivers/network/network.c @@ -0,0 +1,374 @@ +/****************************************************************************** + * network.c + * + * Virtual network driver for XenoLinux. + * + * Copyright (c) 2002, K A Fraser + */ + +#include +#include + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include + +#define NET_TX_IRQ _EVENT_NET_TX +#define NET_RX_IRQ _EVENT_NET_RX + +#define TX_MAX_ENTRIES (TX_RING_SIZE - 2) +#define RX_MAX_ENTRIES (RX_RING_SIZE - 2) + +#define TX_RING_INC(_i) (((_i)+1) & (TX_RING_SIZE-1)) +#define RX_RING_INC(_i) (((_i)+1) & (RX_RING_SIZE-1)) +#define TX_RING_ADD(_i,_j) (((_i)+(_j)) & (TX_RING_SIZE-1)) +#define RX_RING_ADD(_i,_j) (((_i)+(_j)) & (RX_RING_SIZE-1)) + +#define RX_BUF_SIZE 1600 /* Ethernet MTU + plenty of slack! */ + +static void network_rx_int(int irq, void *dev_id, struct pt_regs *ptregs); +static void network_tx_int(int irq, void *dev_id, struct pt_regs *ptregs); +static void network_tx_buf_gc(struct net_device *dev); +static void network_alloc_rx_buffers(struct net_device *dev); +static void network_free_rx_buffers(struct net_device *dev); +static void cleanup_module(void); + +static struct list_head dev_list; + +/* + * RX RING: RX_IDX <= rx_cons <= rx_prod + * TX RING: TX_IDX <= tx_cons <= tx_prod + * (*_IDX allocated privately here, *_cons & *_prod shared with hypervisor) + */ +struct net_private +{ + struct list_head list; + struct net_device *dev; + + struct net_device_stats stats; + struct sk_buff **tx_skb_ring; + struct sk_buff **rx_skb_ring; + atomic_t tx_entries; + unsigned int rx_idx, tx_idx, tx_full; + net_ring_t *net_ring; + spinlock_t tx_lock; +}; + + +static int network_open(struct net_device *dev) +{ + struct net_private *np = dev->priv; + int error; + + np->rx_idx = np->tx_idx = np->tx_full = 0; + + memset(&np->stats, 0, sizeof(np->stats)); + + spin_lock_init(&np->tx_lock); + + atomic_set(&np->tx_entries, 0); + + np->net_ring->tx_prod = np->net_ring->tx_cons = np->net_ring->tx_event = 0; + np->net_ring->rx_prod = np->net_ring->rx_cons = np->net_ring->rx_event = 0; + np->net_ring->tx_ring = NULL; + np->net_ring->rx_ring = NULL; + + np->tx_skb_ring = kmalloc(TX_RING_SIZE * sizeof(struct sk_buff *), + GFP_KERNEL); + np->rx_skb_ring = kmalloc(RX_RING_SIZE * sizeof(struct sk_buff *), + GFP_KERNEL); + np->net_ring->tx_ring = kmalloc(TX_RING_SIZE * sizeof(tx_entry_t), + GFP_KERNEL); + np->net_ring->rx_ring = kmalloc(RX_RING_SIZE * sizeof(rx_entry_t), + GFP_KERNEL); + if ( (np->tx_skb_ring == NULL) || (np->rx_skb_ring == NULL) || + (np->net_ring->tx_ring == NULL) || (np->net_ring->rx_ring == NULL) ) + { + printk(KERN_WARNING "%s; Could not allocate ring memory\n", dev->name); + error = -ENOBUFS; + goto fail; + } + + network_alloc_rx_buffers(dev); + + error = request_irq(NET_RX_IRQ, network_rx_int, 0, "net-rx", dev); + if ( error ) + { + printk(KERN_WARNING "%s: Could not allocate receive interrupt\n", + dev->name); + goto fail; + } + + error = request_irq(NET_TX_IRQ, network_tx_int, 0, "net-tx", dev); + if ( error ) + { + printk(KERN_WARNING "%s: Could not allocate transmit interrupt\n", + dev->name); + free_irq(NET_RX_IRQ, dev); + goto fail; + } + + printk("XenoLinux Virtual Network Driver installed as %s\n", dev->name); + + netif_start_queue(dev); + + MOD_INC_USE_COUNT; + + return 0; + + fail: + if ( np->net_ring->rx_ring ) kfree(np->net_ring->rx_ring); + if ( np->net_ring->tx_ring ) kfree(np->net_ring->tx_ring); + if ( np->rx_skb_ring ) kfree(np->rx_skb_ring); + if ( np->tx_skb_ring ) kfree(np->tx_skb_ring); + kfree(np); + return error; +} + + +static void network_tx_buf_gc(struct net_device *dev) +{ + unsigned int i; + struct net_private *np = dev->priv; + struct sk_buff *skb; + unsigned long flags; + + spin_lock_irqsave(&np->tx_lock, flags); + + for ( i = np->tx_idx; i != np->net_ring->tx_cons; i = TX_RING_INC(i) ) + { + skb = np->tx_skb_ring[i]; + dev_kfree_skb_any(skb); + atomic_dec(&np->tx_entries); + } + + np->tx_idx = i; + + if ( np->tx_full && (atomic_read(&np->tx_entries) < TX_MAX_ENTRIES) ) + { + np->tx_full = 0; + netif_wake_queue(dev); + } + + spin_unlock_irqrestore(&np->tx_lock, flags); +} + + +static void network_alloc_rx_buffers(struct net_device *dev) +{ + unsigned int i; + struct net_private *np = dev->priv; + struct sk_buff *skb; + unsigned int end = RX_RING_ADD(np->rx_idx, RX_MAX_ENTRIES); + + for ( i = np->net_ring->rx_prod; i != end; i = RX_RING_INC(i) ) + { + skb = dev_alloc_skb(RX_BUF_SIZE); + if ( skb == NULL ) break; + skb->dev = dev; + skb_reserve(skb, 2); /* word align the IP header */ + np->rx_skb_ring[i] = skb; + np->net_ring->rx_ring[i].addr = (unsigned long)skb->data; + np->net_ring->rx_ring[i].size = RX_BUF_SIZE - 16; /* arbitrary */ + } + + np->net_ring->rx_prod = i; + + np->net_ring->rx_event = RX_RING_INC(np->rx_idx); + + HYPERVISOR_net_update(); +} + + +static void network_free_rx_buffers(struct net_device *dev) +{ + unsigned int i; + struct net_private *np = dev->priv; + struct sk_buff *skb; + + for ( i = np->rx_idx; i != np->net_ring->rx_prod; i = RX_RING_INC(i) ) + { + skb = np->rx_skb_ring[i]; + dev_kfree_skb(skb); + } +} + + +static int network_start_xmit(struct sk_buff *skb, struct net_device *dev) +{ + unsigned int i; + struct net_private *np = (struct net_private *)dev->priv; + + if ( np->tx_full ) + { + printk(KERN_WARNING "%s: full queue wasn't stopped!\n", dev->name); + netif_stop_queue(dev); + return -ENOBUFS; + } + + i = np->net_ring->tx_prod; + np->tx_skb_ring[i] = skb; + np->net_ring->tx_ring[i].addr = (unsigned long)skb->data; + np->net_ring->tx_ring[i].size = skb->len; + np->net_ring->tx_prod = TX_RING_INC(i); + atomic_inc(&np->tx_entries); + + np->stats.tx_bytes += skb->len; + np->stats.tx_packets++; + + spin_lock_irq(&np->tx_lock); + if ( atomic_read(&np->tx_entries) >= TX_MAX_ENTRIES ) + { + np->tx_full = 1; + netif_stop_queue(dev); + np->net_ring->tx_event = TX_RING_ADD(np->tx_idx, + atomic_read(&np->tx_entries) >> 1); + } + else + { + /* Avoid unnecessary tx interrupts. */ + np->net_ring->tx_event = TX_RING_INC(np->net_ring->tx_prod); + } + spin_unlock_irq(&np->tx_lock); + + /* Must do this after setting tx_event: race with updates of tx_cons. */ + network_tx_buf_gc(dev); + + HYPERVISOR_net_update(); + + return 0; +} + + +static void network_rx_int(int irq, void *dev_id, struct pt_regs *ptregs) +{ + unsigned int i; + struct net_device *dev = (struct net_device *)dev_id; + struct net_private *np = dev->priv; + struct sk_buff *skb; + + again: + for ( i = np->rx_idx; i != np->net_ring->rx_cons; i = RX_RING_INC(i) ) + { + skb = np->rx_skb_ring[i]; + skb_put(skb, np->net_ring->rx_ring[i].size); + skb->protocol = eth_type_trans(skb, dev); + np->stats.rx_packets++; + np->stats.rx_bytes += np->net_ring->rx_ring[i].size; + netif_rx(skb); + dev->last_rx = jiffies; + } + + np->rx_idx = i; + + network_alloc_rx_buffers(dev); + + /* Deal with hypervisor racing our resetting of rx_event. */ + smp_mb(); + if ( np->net_ring->rx_cons != i ) goto again; +} + + +static void network_tx_int(int irq, void *dev_id, struct pt_regs *ptregs) +{ + struct net_device *dev = (struct net_device *)dev_id; + network_tx_buf_gc(dev); +} + + +static int network_close(struct net_device *dev) +{ + struct net_private *np = dev->priv; + + netif_stop_queue(dev); + free_irq(NET_RX_IRQ, dev); + free_irq(NET_TX_IRQ, dev); + network_free_rx_buffers(dev); + kfree(np->net_ring->rx_ring); + kfree(np->net_ring->tx_ring); + kfree(np->rx_skb_ring); + kfree(np->tx_skb_ring); + MOD_DEC_USE_COUNT; + return 0; +} + + +static struct net_device_stats *network_get_stats(struct net_device *dev) +{ + struct net_private *np = (struct net_private *)dev->priv; + return &np->stats; +} + + +static int __init init_module(void) +{ + int i, err; + struct net_device *dev; + struct net_private *np; + + INIT_LIST_HEAD(&dev_list); + + for ( i = 0; i < start_info.num_net_rings; i++ ) + { + dev = alloc_etherdev(sizeof(struct net_private)); + if ( dev == NULL ) + { + err = -ENOMEM; + goto fail; + } + + np = dev->priv; + np->net_ring = start_info.net_rings + i; + + SET_MODULE_OWNER(dev); + dev->open = network_open; + dev->hard_start_xmit = network_start_xmit; + dev->stop = network_close; + dev->get_stats = network_get_stats; + + if ( (err = register_netdev(dev)) != 0 ) + { + kfree(dev); + goto fail; + } + + np->dev = dev; + list_add(&np->list, &dev_list); + } + + return 0; + + fail: + cleanup_module(); + return err; +} + + +static void cleanup_module(void) +{ + struct net_private *np; + struct net_device *dev; + + while ( !list_empty(&dev_list) ) + { + np = list_entry(dev_list.next, struct net_private, list); + list_del(&np->list); + dev = np->dev; + unregister_netdev(dev); + kfree(dev); + } +} + + +module_init(init_module); +module_exit(cleanup_module); diff --git a/xenolinux-2.4.16-sparse/arch/xeno/kernel/Makefile b/xenolinux-2.4.16-sparse/arch/xeno/kernel/Makefile new file mode 100644 index 0000000000..ea830e82b6 --- /dev/null +++ b/xenolinux-2.4.16-sparse/arch/xeno/kernel/Makefile @@ -0,0 +1,15 @@ + +.S.o: + $(CC) $(AFLAGS) -traditional -c $< -o $*.o + +all: kernel.o head.o init_task.o + +O_TARGET := kernel.o + +export-objs := i386_ksyms.o + +obj-y := process.o semaphore.o signal.o entry.o traps.o irq.o \ + ptrace.o ioport.o ldt.o setup.o time.o sys_i386.o \ + i386_ksyms.o i387.o hypervisor.o + +include $(TOPDIR)/Rules.make diff --git a/xenolinux-2.4.16-sparse/arch/xeno/kernel/entry.S b/xenolinux-2.4.16-sparse/arch/xeno/kernel/entry.S new file mode 100644 index 0000000000..b888ae7747 --- /dev/null +++ b/xenolinux-2.4.16-sparse/arch/xeno/kernel/entry.S @@ -0,0 +1,717 @@ +/* + * linux/arch/i386/entry.S + * + * Copyright (C) 1991, 1992 Linus Torvalds + */ + +/* + * entry.S contains the system-call and fault low-level handling routines. + * This also contains the timer-interrupt handler, as well as all interrupts + * and faults that can result in a task-switch. + * + * NOTE: This code handles signal-recognition, which happens every time + * after a timer-interrupt and after each system call. + * + * I changed all the .align's to 4 (16 byte alignment), as that's faster + * on a 486. + * + * Stack layout in 'ret_from_system_call': + * ptrace needs to have all regs on the stack. + * if the order here is changed, it needs to be + * updated in fork.c:copy_process, signal.c:do_signal, + * ptrace.c and ptrace.h + * + * 0(%esp) - %ebx + * 4(%esp) - %ecx + * 8(%esp) - %edx + * C(%esp) - %esi + * 10(%esp) - %edi + * 14(%esp) - %ebp + * 18(%esp) - %eax + * 1C(%esp) - %ds + * 20(%esp) - %es + * 24(%esp) - orig_eax + * 28(%esp) - %eip + * 2C(%esp) - %cs + * 30(%esp) - %eflags + * 34(%esp) - %oldesp + * 38(%esp) - %oldss + * + * "current" is in register %ebx during any slow entries. + */ + +#include +#include +#include +#include +#include + +EBX = 0x00 +ECX = 0x04 +EDX = 0x08 +ESI = 0x0C +EDI = 0x10 +EBP = 0x14 +EAX = 0x18 +DS = 0x1C +ES = 0x20 +ORIG_EAX = 0x24 +EIP = 0x28 +CS = 0x2C +EFLAGS = 0x30 +OLDESP = 0x34 +OLDSS = 0x38 + +CF_MASK = 0x00000001 +IF_MASK = 0x00000200 +NT_MASK = 0x00004000 + +/* + * these are offsets into the task-struct. + */ +state = 0 +flags = 4 +sigpending = 8 +addr_limit = 12 +exec_domain = 16 +need_resched = 20 +tsk_ptrace = 24 +processor = 52 + +ENOSYS = 38 + + +#define SAVE_ALL \ + cld; \ + pushl %es; \ + pushl %ds; \ + pushl %eax; \ + pushl %ebp; \ + pushl %edi; \ + pushl %esi; \ + pushl %edx; \ + pushl %ecx; \ + pushl %ebx; \ + movl $(__KERNEL_DS),%edx; \ + movl %edx,%ds; \ + movl %edx,%es; + +#define RESTORE_ALL \ + popl %ebx; \ + popl %ecx; \ + popl %edx; \ + popl %esi; \ + popl %edi; \ + popl %ebp; \ + popl %eax; \ +1: popl %ds; \ +2: popl %es; \ + addl $4,%esp; \ +3: iret; \ +.section .fixup,"ax"; \ +4: movl $0,(%esp); \ + jmp 1b; \ +5: movl $0,(%esp); \ + jmp 2b; \ +6: pushl %ss; \ + popl %ds; \ + pushl %ss; \ + popl %es; \ + pushl $11; \ + call do_exit; \ +.previous; \ +.section __ex_table,"a";\ + .align 4; \ + .long 1b,4b; \ + .long 2b,5b; \ + .long 3b,6b; \ +.previous + +#define GET_CURRENT(reg) \ + movl $-8192, reg; \ + andl %esp, reg + + +ENTRY(ret_from_fork) + pushl %ebx + call SYMBOL_NAME(schedule_tail) + addl $4, %esp + GET_CURRENT(%ebx) + testb $0x02,tsk_ptrace(%ebx) # PT_TRACESYS + jne tracesys_exit + jmp ret_from_sys_call + +#if defined(CONFIG_KDB) +ENTRY(kdb_call) + pushl %eax # save orig EAX + SAVE_ALL + pushl %esp # struct pt_regs + pushl $0 # error_code + pushl $7 # KDB_REASON_ENTRY + call SYMBOL_NAME(kdb) + addl $12,%esp # remove args + RESTORE_ALL +#endif + +/* + * Return to user mode is not as complex as all this looks, + * but we want the default path for a system call return to + * go as quickly as possible which is why some of this is + * less clear than it otherwise should be. + */ + +ENTRY(system_call) + pushl %eax # save orig_eax + SAVE_ALL + GET_CURRENT(%ebx) + testb $0x02,tsk_ptrace(%ebx) # PT_TRACESYS + jne tracesys + cmpl $(NR_syscalls),%eax + jae badsys + call *SYMBOL_NAME(sys_call_table)(,%eax,4) + movl %eax,EAX(%esp) # save the return value +ENTRY(ret_from_sys_call) + movl SYMBOL_NAME(HYPERVISOR_shared_info),%esi + xorl %eax,%eax + movl %eax,4(%esi) # need_resched and signals atomic test +ret_syscall_tests: + cmpl $0,need_resched(%ebx) + jne reschedule + cmpl $0,sigpending(%ebx) + je safesti # ensure need_resched updates are seen +signal_return: + btsl %eax,4(%esi) # reenable event callbacks + movl %esp,%eax + xorl %edx,%edx + call SYMBOL_NAME(do_signal) + jmp ret_from_sys_call + + ALIGN +restore_all: + RESTORE_ALL + + ALIGN +tracesys: + movl $-ENOSYS,EAX(%esp) + call SYMBOL_NAME(syscall_trace) + movl ORIG_EAX(%esp),%eax + cmpl $(NR_syscalls),%eax + jae tracesys_exit + call *SYMBOL_NAME(sys_call_table)(,%eax,4) + movl %eax,EAX(%esp) # save the return value +tracesys_exit: + call SYMBOL_NAME(syscall_trace) + jmp ret_from_sys_call +badsys: + movl $-ENOSYS,EAX(%esp) + jmp ret_from_sys_call + + ALIGN +ENTRY(ret_from_intr) + GET_CURRENT(%ebx) +ret_from_exception: + movb CS(%esp),%al + testl $2,%eax + jne ret_from_sys_call + jmp restore_all + + ALIGN +reschedule: + btsl %eax,4(%esi) # reenable event callbacks + call SYMBOL_NAME(schedule) # test + jmp ret_from_sys_call + +ENTRY(divide_error) + pushl $0 # no error code + pushl $ SYMBOL_NAME(do_divide_error) + ALIGN +error_code: + pushl %ds + pushl %eax + xorl %eax,%eax + pushl %ebp + pushl %edi + pushl %esi + pushl %edx + decl %eax # eax = -1 + pushl %ecx + pushl %ebx + GET_CURRENT(%ebx) + cld + movl %es,%ecx + movl ORIG_EAX(%esp), %esi # get the error code + movl ES(%esp), %edi # get the function address + movl %eax, ORIG_EAX(%esp) + movl %ecx, ES(%esp) + movl %esp,%edx + pushl %esi # push the error code + pushl %edx # push the pt_regs pointer + movl $(__KERNEL_DS),%edx + movl %edx,%ds + movl %edx,%es + call *%edi + addl $8,%esp + jmp ret_from_exception + +# A note on the "critical region" in our callback handler. +# We want to avoid stacking callback handlers due to events occurring +# during handling of the last event. To do this, we keep events disabled +# until we've done all processing. HOWEVER, we must enable events before +# popping the stack frame (can't be done atomically) and so it would still +# be possible to get enough handler activations to overflow the stack. +# Although unlikely, bugs of that kind are hard to track down, so we'd +# like to avoid the possibility. +# So, on entry to the handler we detect whether we interrupted an +# existing activation in its critical region -- if so, we pop the current +# activation and restart the handler using the previous one. +ENTRY(hypervisor_callback) + pushl %eax + SAVE_ALL + GET_CURRENT(%ebx) + movl EIP(%esp),%eax + cmpl $scrit,%eax + jb 11f + cmpl $ecrit,%eax + jb critical_region_fixup +11: push %esp + call do_hypervisor_callback + add $4,%esp + movl SYMBOL_NAME(HYPERVISOR_shared_info),%esi + xorl %eax,%eax + movb CS(%esp),%cl + test $2,%cl # slow return to ring 2 or 3 + jne ret_syscall_tests +safesti:btsl %eax,4(%esi) # reenable event callbacks +scrit: /**** START OF CRITICAL REGION ****/ + cmpl %eax,(%esi) + jne 14f # process more events if necessary... + RESTORE_ALL +14: btrl %eax,4(%esi) + jmp 11b +ecrit: /**** END OF CRITICAL REGION ****/ +# [How we do the fixup]. We want to merge the current stack frame with the +# just-interrupted frame. How we do this depends on where in the critical +# region the interrupted handler was executing, and so how many saved +# registers are in each frame. We do this quickly using the lookup table +# 'critical_fixup_table'. For each byte offset in the critical region, it +# provides the number of bytes which have already been popped from the +# interrupted stack frame. +critical_region_fixup: + addl $critical_fixup_table-scrit,%eax + movzbl (%eax),%eax # %eax contains num bytes popped + mov %esp,%esi + add %eax,%esi # %esi points at end of src region + mov %esp,%edi + add $0x34,%edi # %edi points at end of dst region + mov %eax,%ecx + shr $2,%ecx # convert words to bytes + je 16f # skip loop if nothing to copy +15: subl $4,%esi # pre-decrementing copy loop + subl $4,%edi + movl (%esi),%eax + movl %eax,(%edi) + loop 15b +16: movl %edi,%esp # final %edi is top of merged stack + jmp 11b + +critical_fixup_table: + .byte 0x00,0x00 # cmpl %eax,(%esi) + .byte 0x00,0x00 # jne 14f + .byte 0x00 # pop %ebx + .byte 0x04 # pop %ecx + .byte 0x08 # pop %edx + .byte 0x0c # pop %esi + .byte 0x10 # pop %edi + .byte 0x14 # pop %ebp + .byte 0x18 # pop %eax + .byte 0x1c # pop %ds + .byte 0x20 # pop %es + .byte 0x24,0x24,0x24 # add $4,%esp + .byte 0x28 # iret + .byte 0x00,0x00,0x00,0x00 # btrl %eax,4(%esi) + .byte 0x00,0x00 # jmp 11b + +# Hypervisor uses this for application faults while it executes. +ENTRY(failsafe_callback) +1: pop %ds +2: pop %es +3: iret +.section .fixup,"ax"; \ +4: movl $0,(%esp); \ + jmp 1b; \ +5: movl $0,(%esp); \ + jmp 2b; \ +6: pushl %ss; \ + popl %ds; \ + pushl %ss; \ + popl %es; \ + pushl $11; \ + call do_exit; \ +.previous; \ +.section __ex_table,"a";\ + .align 4; \ + .long 1b,4b; \ + .long 2b,5b; \ + .long 3b,6b; \ +.previous + +ENTRY(coprocessor_error) + pushl $0 + pushl $ SYMBOL_NAME(do_coprocessor_error) + jmp error_code + +ENTRY(simd_coprocessor_error) + pushl $0 + pushl $ SYMBOL_NAME(do_simd_coprocessor_error) + jmp error_code + +ENTRY(device_not_available) + pushl $-1 # mark this as an int + SAVE_ALL + GET_CURRENT(%ebx) + call SYMBOL_NAME(math_state_restore) + jmp ret_from_exception + +ENTRY(debug) + pushl $0 + pushl $ SYMBOL_NAME(do_debug) + jmp error_code + +ENTRY(int3) + pushl $0 + pushl $ SYMBOL_NAME(do_int3) + jmp error_code + +ENTRY(overflow) + pushl $0 + pushl $ SYMBOL_NAME(do_overflow) + jmp error_code + +ENTRY(bounds) + pushl $0 + pushl $ SYMBOL_NAME(do_bounds) + jmp error_code + +ENTRY(invalid_op) + pushl $0 + pushl $ SYMBOL_NAME(do_invalid_op) + jmp error_code + +ENTRY(coprocessor_segment_overrun) + pushl $0 + pushl $ SYMBOL_NAME(do_coprocessor_segment_overrun) + jmp error_code + +ENTRY(double_fault) + pushl $ SYMBOL_NAME(do_double_fault) + jmp error_code + +ENTRY(invalid_TSS) + pushl $ SYMBOL_NAME(do_invalid_TSS) + jmp error_code + +ENTRY(segment_not_present) + pushl $ SYMBOL_NAME(do_segment_not_present) + jmp error_code + +ENTRY(stack_segment) + pushl $ SYMBOL_NAME(do_stack_segment) + jmp error_code + +ENTRY(general_protection) + pushl $ SYMBOL_NAME(do_general_protection) + jmp error_code + +ENTRY(alignment_check) + pushl $ SYMBOL_NAME(do_alignment_check) + jmp error_code + +#if defined(CONFIG_KDB) +ENTRY(page_fault_mca) + pushl %ecx + pushl %edx + pushl %eax + movl $473,%ecx + rdmsr + andl $0xfffffffe,%eax /* Disable last branch recording */ + wrmsr + popl %eax + popl %edx + popl %ecx + pushl $ SYMBOL_NAME(do_page_fault) + jmp error_code +#endif + +# This handler is special, because it gets an extra value on its stack, +# which is the linear faulting address. +ENTRY(page_fault) + pushl %ds + pushl %eax + xorl %eax,%eax + pushl %ebp + pushl %edi + pushl %esi + pushl %edx + decl %eax # eax = -1 + pushl %ecx + pushl %ebx + GET_CURRENT(%ebx) + cld + movl %es,%ecx + movl ORIG_EAX(%esp), %esi # get the error code + movl ES(%esp), %edi # get the faulting address + movl %eax, ORIG_EAX(%esp) + movl %ecx, ES(%esp) + movl %esp,%edx + pushl %edi # push the faulting address + pushl %esi # push the error code + pushl %edx # push the pt_regs pointer + movl $(__KERNEL_DS),%edx + movl %edx,%ds + movl %edx,%es + call SYMBOL_NAME(do_page_fault) + addl $12,%esp + jmp ret_from_exception + +ENTRY(machine_check) + pushl $0 + pushl $ SYMBOL_NAME(do_machine_check) + jmp error_code + +ENTRY(spurious_interrupt_bug) + pushl $0 + pushl $ SYMBOL_NAME(do_spurious_interrupt_bug) + jmp error_code + +.data +ENTRY(sys_call_table) + .long SYMBOL_NAME(sys_ni_syscall) /* 0 - old "setup()" system call*/ + .long SYMBOL_NAME(sys_exit) + .long SYMBOL_NAME(sys_fork) + .long SYMBOL_NAME(sys_read) + .long SYMBOL_NAME(sys_write) + .long SYMBOL_NAME(sys_open) /* 5 */ + .long SYMBOL_NAME(sys_close) + .long SYMBOL_NAME(sys_waitpid) + .long SYMBOL_NAME(sys_creat) + .long SYMBOL_NAME(sys_link) + .long SYMBOL_NAME(sys_unlink) /* 10 */ + .long SYMBOL_NAME(sys_execve) + .long SYMBOL_NAME(sys_chdir) + .long SYMBOL_NAME(sys_time) + .long SYMBOL_NAME(sys_mknod) + .long SYMBOL_NAME(sys_chmod) /* 15 */ + .long SYMBOL_NAME(sys_lchown16) + .long SYMBOL_NAME(sys_ni_syscall) /* old break syscall holder */ + .long SYMBOL_NAME(sys_stat) + .long SYMBOL_NAME(sys_lseek) + .long SYMBOL_NAME(sys_getpid) /* 20 */ + .long SYMBOL_NAME(sys_mount) + .long SYMBOL_NAME(sys_oldumount) + .long SYMBOL_NAME(sys_setuid16) + .long SYMBOL_NAME(sys_getuid16) + .long SYMBOL_NAME(sys_stime) /* 25 */ + .long SYMBOL_NAME(sys_ptrace) + .long SYMBOL_NAME(sys_alarm) + .long SYMBOL_NAME(sys_fstat) + .long SYMBOL_NAME(sys_pause) + .long SYMBOL_NAME(sys_utime) /* 30 */ + .long SYMBOL_NAME(sys_ni_syscall) /* old stty syscall holder */ + .long SYMBOL_NAME(sys_ni_syscall) /* old gtty syscall holder */ + .long SYMBOL_NAME(sys_access) + .long SYMBOL_NAME(sys_nice) + .long SYMBOL_NAME(sys_ni_syscall) /* 35 */ /* old ftime syscall holder */ + .long SYMBOL_NAME(sys_sync) + .long SYMBOL_NAME(sys_kill) + .long SYMBOL_NAME(sys_rename) + .long SYMBOL_NAME(sys_mkdir) + .long SYMBOL_NAME(sys_rmdir) /* 40 */ + .long SYMBOL_NAME(sys_dup) + .long SYMBOL_NAME(sys_pipe) + .long SYMBOL_NAME(sys_times) + .long SYMBOL_NAME(sys_ni_syscall) /* old prof syscall holder */ + .long SYMBOL_NAME(sys_brk) /* 45 */ + .long SYMBOL_NAME(sys_setgid16) + .long SYMBOL_NAME(sys_getgid16) + .long SYMBOL_NAME(sys_signal) + .long SYMBOL_NAME(sys_geteuid16) + .long SYMBOL_NAME(sys_getegid16) /* 50 */ + .long SYMBOL_NAME(sys_acct) + .long SYMBOL_NAME(sys_umount) /* recycled never used phys() */ + .long SYMBOL_NAME(sys_ni_syscall) /* old lock syscall holder */ + .long SYMBOL_NAME(sys_ioctl) + .long SYMBOL_NAME(sys_fcntl) /* 55 */ + .long SYMBOL_NAME(sys_ni_syscall) /* old mpx syscall holder */ + .long SYMBOL_NAME(sys_setpgid) + .long SYMBOL_NAME(sys_ni_syscall) /* old ulimit syscall holder */ + .long SYMBOL_NAME(sys_olduname) + .long SYMBOL_NAME(sys_umask) /* 60 */ + .long SYMBOL_NAME(sys_chroot) + .long SYMBOL_NAME(sys_ustat) + .long SYMBOL_NAME(sys_dup2) + .long SYMBOL_NAME(sys_getppid) + .long SYMBOL_NAME(sys_getpgrp) /* 65 */ + .long SYMBOL_NAME(sys_setsid) + .long SYMBOL_NAME(sys_sigaction) + .long SYMBOL_NAME(sys_sgetmask) + .long SYMBOL_NAME(sys_ssetmask) + .long SYMBOL_NAME(sys_setreuid16) /* 70 */ + .long SYMBOL_NAME(sys_setregid16) + .long SYMBOL_NAME(sys_sigsuspend) + .long SYMBOL_NAME(sys_sigpending) + .long SYMBOL_NAME(sys_sethostname) + .long SYMBOL_NAME(sys_setrlimit) /* 75 */ + .long SYMBOL_NAME(sys_old_getrlimit) + .long SYMBOL_NAME(sys_getrusage) + .long SYMBOL_NAME(sys_gettimeofday) + .long SYMBOL_NAME(sys_settimeofday) + .long SYMBOL_NAME(sys_getgroups16) /* 80 */ + .long SYMBOL_NAME(sys_setgroups16) + .long SYMBOL_NAME(old_select) + .long SYMBOL_NAME(sys_symlink) + .long SYMBOL_NAME(sys_lstat) + .long SYMBOL_NAME(sys_readlink) /* 85 */ + .long SYMBOL_NAME(sys_uselib) + .long SYMBOL_NAME(sys_swapon) + .long SYMBOL_NAME(sys_reboot) + .long SYMBOL_NAME(old_readdir) + .long SYMBOL_NAME(old_mmap) /* 90 */ + .long SYMBOL_NAME(sys_munmap) + .long SYMBOL_NAME(sys_truncate) + .long SYMBOL_NAME(sys_ftruncate) + .long SYMBOL_NAME(sys_fchmod) + .long SYMBOL_NAME(sys_fchown16) /* 95 */ + .long SYMBOL_NAME(sys_getpriority) + .long SYMBOL_NAME(sys_setpriority) + .long SYMBOL_NAME(sys_ni_syscall) /* old profil syscall holder */ + .long SYMBOL_NAME(sys_statfs) + .long SYMBOL_NAME(sys_fstatfs) /* 100 */ + .long SYMBOL_NAME(sys_ioperm) + .long SYMBOL_NAME(sys_socketcall) + .long SYMBOL_NAME(sys_syslog) + .long SYMBOL_NAME(sys_setitimer) + .long SYMBOL_NAME(sys_getitimer) /* 105 */ + .long SYMBOL_NAME(sys_newstat) + .long SYMBOL_NAME(sys_newlstat) + .long SYMBOL_NAME(sys_newfstat) + .long SYMBOL_NAME(sys_uname) + .long SYMBOL_NAME(sys_iopl) /* 110 */ + .long SYMBOL_NAME(sys_vhangup) + .long SYMBOL_NAME(sys_ni_syscall) /* old "idle" system call */ + .long SYMBOL_NAME(sys_ni_syscall) /* was VM86 */ + .long SYMBOL_NAME(sys_wait4) + .long SYMBOL_NAME(sys_swapoff) /* 115 */ + .long SYMBOL_NAME(sys_sysinfo) + .long SYMBOL_NAME(sys_ipc) + .long SYMBOL_NAME(sys_fsync) + .long SYMBOL_NAME(sys_sigreturn) + .long SYMBOL_NAME(sys_clone) /* 120 */ + .long SYMBOL_NAME(sys_setdomainname) + .long SYMBOL_NAME(sys_newuname) + .long SYMBOL_NAME(sys_modify_ldt) + .long SYMBOL_NAME(sys_adjtimex) + .long SYMBOL_NAME(sys_mprotect) /* 125 */ + .long SYMBOL_NAME(sys_sigprocmask) + .long SYMBOL_NAME(sys_create_module) + .long SYMBOL_NAME(sys_init_module) + .long SYMBOL_NAME(sys_delete_module) + .long SYMBOL_NAME(sys_get_kernel_syms) /* 130 */ + .long SYMBOL_NAME(sys_quotactl) + .long SYMBOL_NAME(sys_getpgid) + .long SYMBOL_NAME(sys_fchdir) + .long SYMBOL_NAME(sys_bdflush) + .long SYMBOL_NAME(sys_sysfs) /* 135 */ + .long SYMBOL_NAME(sys_personality) + .long SYMBOL_NAME(sys_ni_syscall) /* for afs_syscall */ + .long SYMBOL_NAME(sys_setfsuid16) + .long SYMBOL_NAME(sys_setfsgid16) + .long SYMBOL_NAME(sys_llseek) /* 140 */ + .long SYMBOL_NAME(sys_getdents) + .long SYMBOL_NAME(sys_select) + .long SYMBOL_NAME(sys_flock) + .long SYMBOL_NAME(sys_msync) + .long SYMBOL_NAME(sys_readv) /* 145 */ + .long SYMBOL_NAME(sys_writev) + .long SYMBOL_NAME(sys_getsid) + .long SYMBOL_NAME(sys_fdatasync) + .long SYMBOL_NAME(sys_sysctl) + .long SYMBOL_NAME(sys_mlock) /* 150 */ + .long SYMBOL_NAME(sys_munlock) + .long SYMBOL_NAME(sys_mlockall) + .long SYMBOL_NAME(sys_munlockall) + .long SYMBOL_NAME(sys_sched_setparam) + .long SYMBOL_NAME(sys_sched_getparam) /* 155 */ + .long SYMBOL_NAME(sys_sched_setscheduler) + .long SYMBOL_NAME(sys_sched_getscheduler) + .long SYMBOL_NAME(sys_sched_yield) + .long SYMBOL_NAME(sys_sched_get_priority_max) + .long SYMBOL_NAME(sys_sched_get_priority_min) /* 160 */ + .long SYMBOL_NAME(sys_sched_rr_get_interval) + .long SYMBOL_NAME(sys_nanosleep) + .long SYMBOL_NAME(sys_mremap) + .long SYMBOL_NAME(sys_setresuid16) + .long SYMBOL_NAME(sys_getresuid16) /* 165 */ + .long SYMBOL_NAME(sys_ni_syscall) /* was VM86 */ + .long SYMBOL_NAME(sys_query_module) + .long SYMBOL_NAME(sys_poll) + .long SYMBOL_NAME(sys_nfsservctl) + .long SYMBOL_NAME(sys_setresgid16) /* 170 */ + .long SYMBOL_NAME(sys_getresgid16) + .long SYMBOL_NAME(sys_prctl) + .long SYMBOL_NAME(sys_rt_sigreturn) + .long SYMBOL_NAME(sys_rt_sigaction) + .long SYMBOL_NAME(sys_rt_sigprocmask) /* 175 */ + .long SYMBOL_NAME(sys_rt_sigpending) + .long SYMBOL_NAME(sys_rt_sigtimedwait) + .long SYMBOL_NAME(sys_rt_sigqueueinfo) + .long SYMBOL_NAME(sys_rt_sigsuspend) + .long SYMBOL_NAME(sys_pread) /* 180 */ + .long SYMBOL_NAME(sys_pwrite) + .long SYMBOL_NAME(sys_chown16) + .long SYMBOL_NAME(sys_getcwd) + .long SYMBOL_NAME(sys_capget) + .long SYMBOL_NAME(sys_capset) /* 185 */ + .long SYMBOL_NAME(sys_sigaltstack) + .long SYMBOL_NAME(sys_sendfile) + .long SYMBOL_NAME(sys_ni_syscall) /* streams1 */ + .long SYMBOL_NAME(sys_ni_syscall) /* streams2 */ + .long SYMBOL_NAME(sys_vfork) /* 190 */ + .long SYMBOL_NAME(sys_getrlimit) + .long SYMBOL_NAME(sys_mmap2) + .long SYMBOL_NAME(sys_truncate64) + .long SYMBOL_NAME(sys_ftruncate64) + .long SYMBOL_NAME(sys_stat64) /* 195 */ + .long SYMBOL_NAME(sys_lstat64) + .long SYMBOL_NAME(sys_fstat64) + .long SYMBOL_NAME(sys_lchown) + .long SYMBOL_NAME(sys_getuid) + .long SYMBOL_NAME(sys_getgid) /* 200 */ + .long SYMBOL_NAME(sys_geteuid) + .long SYMBOL_NAME(sys_getegid) + .long SYMBOL_NAME(sys_setreuid) + .long SYMBOL_NAME(sys_setregid) + .long SYMBOL_NAME(sys_getgroups) /* 205 */ + .long SYMBOL_NAME(sys_setgroups) + .long SYMBOL_NAME(sys_fchown) + .long SYMBOL_NAME(sys_setresuid) + .long SYMBOL_NAME(sys_getresuid) + .long SYMBOL_NAME(sys_setresgid) /* 210 */ + .long SYMBOL_NAME(sys_getresgid) + .long SYMBOL_NAME(sys_chown) + .long SYMBOL_NAME(sys_setuid) + .long SYMBOL_NAME(sys_setgid) + .long SYMBOL_NAME(sys_setfsuid) /* 215 */ + .long SYMBOL_NAME(sys_setfsgid) + .long SYMBOL_NAME(sys_pivot_root) + .long SYMBOL_NAME(sys_mincore) + .long SYMBOL_NAME(sys_madvise) + .long SYMBOL_NAME(sys_getdents64) /* 220 */ + .long SYMBOL_NAME(sys_fcntl64) + .long SYMBOL_NAME(sys_ni_syscall) /* reserved for TUX */ + .long SYMBOL_NAME(sys_ni_syscall) /* Reserved for Security */ + .long SYMBOL_NAME(sys_gettid) + .long SYMBOL_NAME(sys_readahead) /* 225 */ + + .rept NR_syscalls-(.-sys_call_table)/4 + .long SYMBOL_NAME(sys_ni_syscall) + .endr diff --git a/xenolinux-2.4.16-sparse/arch/xeno/kernel/head.S b/xenolinux-2.4.16-sparse/arch/xeno/kernel/head.S new file mode 100644 index 0000000000..b1a4e3eb21 --- /dev/null +++ b/xenolinux-2.4.16-sparse/arch/xeno/kernel/head.S @@ -0,0 +1,61 @@ + +.text +#include +#include +#include +#include +#include +#include +#include + +/* Offsets in start_info structure */ +#define SHARED_INFO 4 +#define MOD_START 16 +#define MOD_LEN 20 + +startup_32: + cld + + lss stack_start,%esp + + /* Copy initrd somewhere safe before it's clobbered by BSS. */ + mov MOD_LEN(%esi),%ecx + shr $2,%ecx + jz 2f /* bail from copy loop if no initrd */ + mov $SYMBOL_NAME(_end),%edi + add MOD_LEN(%esi),%edi + mov MOD_START(%esi),%eax + add MOD_LEN(%esi),%eax +1: sub $4,%eax + sub $4,%edi + mov (%eax),%ebx + mov %ebx,(%edi) + loop 1b + mov %edi,MOD_START(%esi) + + /* Clear BSS first so that there are no surprises... */ +2: xorl %eax,%eax + movl $SYMBOL_NAME(__bss_start),%edi + movl $SYMBOL_NAME(_end),%ecx + subl %edi,%ecx + rep stosb + + /* Copy the necessary stuff from start_info structure. */ + /* We need to copy shared_info early, so that sti/cli work */ + mov SHARED_INFO(%esi),%eax + mov %eax,SYMBOL_NAME(HYPERVISOR_shared_info) + mov $SYMBOL_NAME(start_info_union),%edi + mov $128,%ecx + rep movsl + + jmp SYMBOL_NAME(start_kernel) + +ENTRY(stack_start) + .long SYMBOL_NAME(init_task_union)+8192, __KERNEL_DS + +.org 0x1000 +ENTRY(empty_zero_page) + +.org 0x2000 +ENTRY(stext) +ENTRY(_stext) diff --git a/xenolinux-2.4.16-sparse/arch/xeno/kernel/hypervisor.c b/xenolinux-2.4.16-sparse/arch/xeno/kernel/hypervisor.c new file mode 100644 index 0000000000..c49087173f --- /dev/null +++ b/xenolinux-2.4.16-sparse/arch/xeno/kernel/hypervisor.c @@ -0,0 +1,118 @@ +/****************************************************************************** + * hypervisor.c + * + * Communication to/from hypervisor. + * + * Copyright (c) 2002, K A Fraser + */ + +#include +#include +#include +#include +#include +#include + +static unsigned long event_mask = 0; + +void frobb(void) {} + +void do_hypervisor_callback(struct pt_regs *regs) +{ + unsigned long events, flags; + shared_info_t *shared = HYPERVISOR_shared_info; + + do { + /* Specialised local_irq_save(). */ + flags = shared->events_enable; + shared->events_enable = 0; + barrier(); + + events = xchg(&shared->events, 0); + events &= event_mask; + + __asm__ __volatile__ ( + " push %1 ;" + " sub $4,%%esp ;" + " jmp 2f ;" + "1: btrl %%eax,%0 ;" /* clear bit */ + " mov %%eax,(%%esp) ;" + " call do_IRQ ;" /* do_IRQ(event) */ + "2: bsfl %0,%%eax ;" /* %eax == bit # */ + " jnz 1b ;" + " add $8,%%esp ;" + /* we use %ebx because it is callee-saved */ + : : "b" (events), "r" (regs) + /* clobbered by callback function calls */ + : "eax", "ecx", "edx", "memory" ); + + /* Specialised local_irq_restore(). */ + shared->events_enable = flags; + barrier(); + } + while ( shared->events ); +} + + + +/* + * Define interface to generic handling in irq.c + */ + +static unsigned int startup_hypervisor_event(unsigned int irq) +{ + set_bit(irq, &event_mask); + return 0; +} + +static void shutdown_hypervisor_event(unsigned int irq) +{ + clear_bit(irq, &event_mask); +} + +static void enable_hypervisor_event(unsigned int irq) +{ + set_bit(irq, &event_mask); +} + +static void disable_hypervisor_event(unsigned int irq) +{ + clear_bit(irq, &event_mask); +} + +static void ack_hypervisor_event(unsigned int irq) +{ + if ( !(event_mask & (1< +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +extern void dump_thread(struct pt_regs *, struct user *); +extern spinlock_t rtc_lock; + +#ifdef CONFIG_SMP +extern void FASTCALL( __write_lock_failed(rwlock_t *rw)); +extern void FASTCALL( __read_lock_failed(rwlock_t *rw)); +#endif + +#if defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_HD) || defined(CONFIG_BLK_DEV_IDE_MODULE) || defined(CONFIG_BLK_DEV_HD_MODULE) +extern struct drive_info_struct drive_info; +EXPORT_SYMBOL(drive_info); +#endif + +extern unsigned long get_cmos_time(void); + +/* platform dependent support */ +EXPORT_SYMBOL(boot_cpu_data); +EXPORT_SYMBOL(MCA_bus); +EXPORT_SYMBOL(__verify_write); +EXPORT_SYMBOL(dump_thread); +EXPORT_SYMBOL(dump_fpu); +EXPORT_SYMBOL(dump_extended_fpu); +EXPORT_SYMBOL(enable_irq); +EXPORT_SYMBOL(disable_irq); +EXPORT_SYMBOL(disable_irq_nosync); +EXPORT_SYMBOL(probe_irq_mask); +EXPORT_SYMBOL(kernel_thread); +EXPORT_SYMBOL(pm_idle); +EXPORT_SYMBOL(pm_power_off); +EXPORT_SYMBOL(get_cmos_time); +EXPORT_SYMBOL(apm_info); + +#ifdef CONFIG_DEBUG_IOVIRT +EXPORT_SYMBOL(__io_virt_debug); +#endif + +EXPORT_SYMBOL_NOVERS(__down_failed); +EXPORT_SYMBOL_NOVERS(__down_failed_interruptible); +EXPORT_SYMBOL_NOVERS(__down_failed_trylock); +EXPORT_SYMBOL_NOVERS(__up_wakeup); +/* Networking helper routines. */ +EXPORT_SYMBOL(csum_partial_copy_generic); +/* Delay loops */ +EXPORT_SYMBOL(__udelay); +EXPORT_SYMBOL(__delay); +EXPORT_SYMBOL(__const_udelay); + +EXPORT_SYMBOL_NOVERS(__get_user_1); +EXPORT_SYMBOL_NOVERS(__get_user_2); +EXPORT_SYMBOL_NOVERS(__get_user_4); + +EXPORT_SYMBOL(strtok); +EXPORT_SYMBOL(strpbrk); +EXPORT_SYMBOL(simple_strtol); +EXPORT_SYMBOL(strstr); + +EXPORT_SYMBOL(strncpy_from_user); +EXPORT_SYMBOL(__strncpy_from_user); +EXPORT_SYMBOL(clear_user); +EXPORT_SYMBOL(__clear_user); +EXPORT_SYMBOL(__generic_copy_from_user); +EXPORT_SYMBOL(__generic_copy_to_user); +EXPORT_SYMBOL(strnlen_user); + +#ifdef CONFIG_X86_USE_3DNOW +EXPORT_SYMBOL(_mmx_memcpy); +EXPORT_SYMBOL(mmx_clear_page); +EXPORT_SYMBOL(mmx_copy_page); +#endif + +#ifdef CONFIG_SMP +EXPORT_SYMBOL(cpu_data); +EXPORT_SYMBOL(kernel_flag); +EXPORT_SYMBOL(smp_num_cpus); +EXPORT_SYMBOL(cpu_online_map); +EXPORT_SYMBOL_NOVERS(__write_lock_failed); +EXPORT_SYMBOL_NOVERS(__read_lock_failed); + +/* Global SMP irq stuff */ +EXPORT_SYMBOL(synchronize_irq); +EXPORT_SYMBOL(global_irq_holder); +EXPORT_SYMBOL(__global_cli); +EXPORT_SYMBOL(__global_sti); +EXPORT_SYMBOL(__global_save_flags); +EXPORT_SYMBOL(__global_restore_flags); +EXPORT_SYMBOL(smp_call_function); + +/* TLB flushing */ +EXPORT_SYMBOL(flush_tlb_page); +#endif + +#ifdef CONFIG_VT +EXPORT_SYMBOL(screen_info); +#endif + +EXPORT_SYMBOL(get_wchan); + +EXPORT_SYMBOL(rtc_lock); + +#undef memcpy +#undef memset +extern void * memset(void *,int,__kernel_size_t); +extern void * memcpy(void *,const void *,__kernel_size_t); +EXPORT_SYMBOL_NOVERS(memcpy); +EXPORT_SYMBOL_NOVERS(memset); + +#ifdef CONFIG_HAVE_DEC_LOCK +EXPORT_SYMBOL(atomic_dec_and_lock); +#endif + +#ifdef CONFIG_DEBUG_BUGVERBOSE +EXPORT_SYMBOL(do_BUG); +#endif diff --git a/xenolinux-2.4.16-sparse/arch/xeno/kernel/i387.c b/xenolinux-2.4.16-sparse/arch/xeno/kernel/i387.c new file mode 100644 index 0000000000..9b8dfb0581 --- /dev/null +++ b/xenolinux-2.4.16-sparse/arch/xeno/kernel/i387.c @@ -0,0 +1,518 @@ +/* + * linux/arch/i386/kernel/i387.c + * + * Copyright (C) 1994 Linus Torvalds + * + * Pentium III FXSR, SSE support + * General FPU state handling cleanups + * Gareth Hughes , May 2000 + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define HAVE_HWFP 1 + +/* + * The _current_ task is using the FPU for the first time + * so initialize it and set the mxcsr to its default + * value at reset if we support XMM instructions and then + * remeber the current task has used the FPU. + */ +void init_fpu(void) +{ + __asm__("fninit"); + if ( cpu_has_xmm ) + load_mxcsr(0x1f80); + + current->used_math = 1; +} + +/* + * FPU lazy state save handling. + */ + +static inline void __save_init_fpu( struct task_struct *tsk ) +{ + if ( cpu_has_fxsr ) { + asm volatile( "fxsave %0 ; fnclex" + : "=m" (tsk->thread.i387.fxsave) ); + } else { + asm volatile( "fnsave %0 ; fwait" + : "=m" (tsk->thread.i387.fsave) ); + } + tsk->flags &= ~PF_USEDFPU; +} + +void save_init_fpu( struct task_struct *tsk ) +{ + __save_init_fpu(tsk); + stts(); +} + +void kernel_fpu_begin(void) +{ + struct task_struct *tsk = current; + + if (tsk->flags & PF_USEDFPU) { + __save_init_fpu(tsk); + return; + } + clts(); +} + +void restore_fpu( struct task_struct *tsk ) +{ + if ( cpu_has_fxsr ) { + asm volatile( "fxrstor %0" + : : "m" (tsk->thread.i387.fxsave) ); + } else { + asm volatile( "frstor %0" + : : "m" (tsk->thread.i387.fsave) ); + } +} + +/* + * FPU tag word conversions. + */ + +static inline unsigned short twd_i387_to_fxsr( unsigned short twd ) +{ + unsigned int tmp; /* to avoid 16 bit prefixes in the code */ + + /* Transform each pair of bits into 01 (valid) or 00 (empty) */ + tmp = ~twd; + tmp = (tmp | (tmp>>1)) & 0x5555; /* 0V0V0V0V0V0V0V0V */ + /* and move the valid bits to the lower byte. */ + tmp = (tmp | (tmp >> 1)) & 0x3333; /* 00VV00VV00VV00VV */ + tmp = (tmp | (tmp >> 2)) & 0x0f0f; /* 0000VVVV0000VVVV */ + tmp = (tmp | (tmp >> 4)) & 0x00ff; /* 00000000VVVVVVVV */ + return tmp; +} + +static inline unsigned long twd_fxsr_to_i387( struct i387_fxsave_struct *fxsave ) +{ + struct _fpxreg *st = NULL; + unsigned long twd = (unsigned long) fxsave->twd; + unsigned long tag; + unsigned long ret = 0xffff0000; + int i; + +#define FPREG_ADDR(f, n) ((char *)&(f)->st_space + (n) * 16); + + for ( i = 0 ; i < 8 ; i++ ) { + if ( twd & 0x1 ) { + st = (struct _fpxreg *) FPREG_ADDR( fxsave, i ); + + switch ( st->exponent & 0x7fff ) { + case 0x7fff: + tag = 2; /* Special */ + break; + case 0x0000: + if ( !st->significand[0] && + !st->significand[1] && + !st->significand[2] && + !st->significand[3] ) { + tag = 1; /* Zero */ + } else { + tag = 2; /* Special */ + } + break; + default: + if ( st->significand[3] & 0x8000 ) { + tag = 0; /* Valid */ + } else { + tag = 2; /* Special */ + } + break; + } + } else { + tag = 3; /* Empty */ + } + ret |= (tag << (2 * i)); + twd = twd >> 1; + } + return ret; +} + +/* + * FPU state interaction. + */ + +unsigned short get_fpu_cwd( struct task_struct *tsk ) +{ + if ( cpu_has_fxsr ) { + return tsk->thread.i387.fxsave.cwd; + } else { + return (unsigned short)tsk->thread.i387.fsave.cwd; + } +} + +unsigned short get_fpu_swd( struct task_struct *tsk ) +{ + if ( cpu_has_fxsr ) { + return tsk->thread.i387.fxsave.swd; + } else { + return (unsigned short)tsk->thread.i387.fsave.swd; + } +} + +unsigned short get_fpu_twd( struct task_struct *tsk ) +{ + if ( cpu_has_fxsr ) { + return tsk->thread.i387.fxsave.twd; + } else { + return (unsigned short)tsk->thread.i387.fsave.twd; + } +} + +unsigned short get_fpu_mxcsr( struct task_struct *tsk ) +{ + if ( cpu_has_xmm ) { + return tsk->thread.i387.fxsave.mxcsr; + } else { + return 0x1f80; + } +} + +void set_fpu_cwd( struct task_struct *tsk, unsigned short cwd ) +{ + if ( cpu_has_fxsr ) { + tsk->thread.i387.fxsave.cwd = cwd; + } else { + tsk->thread.i387.fsave.cwd = ((long)cwd | 0xffff0000); + } +} + +void set_fpu_swd( struct task_struct *tsk, unsigned short swd ) +{ + if ( cpu_has_fxsr ) { + tsk->thread.i387.fxsave.swd = swd; + } else { + tsk->thread.i387.fsave.swd = ((long)swd | 0xffff0000); + } +} + +void set_fpu_twd( struct task_struct *tsk, unsigned short twd ) +{ + if ( cpu_has_fxsr ) { + tsk->thread.i387.fxsave.twd = twd_i387_to_fxsr(twd); + } else { + tsk->thread.i387.fsave.twd = ((long)twd | 0xffff0000); + } +} + +void set_fpu_mxcsr( struct task_struct *tsk, unsigned short mxcsr ) +{ + if ( cpu_has_xmm ) { + tsk->thread.i387.fxsave.mxcsr = (mxcsr & 0xffbf); + } +} + +/* + * FXSR floating point environment conversions. + */ + +static inline int convert_fxsr_to_user( struct _fpstate *buf, + struct i387_fxsave_struct *fxsave ) +{ + unsigned long env[7]; + struct _fpreg *to; + struct _fpxreg *from; + int i; + + env[0] = (unsigned long)fxsave->cwd | 0xffff0000; + env[1] = (unsigned long)fxsave->swd | 0xffff0000; + env[2] = twd_fxsr_to_i387(fxsave); + env[3] = fxsave->fip; + env[4] = fxsave->fcs | ((unsigned long)fxsave->fop << 16); + env[5] = fxsave->foo; + env[6] = fxsave->fos; + + if ( __copy_to_user( buf, env, 7 * sizeof(unsigned long) ) ) + return 1; + + to = &buf->_st[0]; + from = (struct _fpxreg *) &fxsave->st_space[0]; + for ( i = 0 ; i < 8 ; i++, to++, from++ ) { + if ( __copy_to_user( to, from, sizeof(*to) ) ) + return 1; + } + return 0; +} + +static inline int convert_fxsr_from_user( struct i387_fxsave_struct *fxsave, + struct _fpstate *buf ) +{ + unsigned long env[7]; + struct _fpxreg *to; + struct _fpreg *from; + int i; + + if ( __copy_from_user( env, buf, 7 * sizeof(long) ) ) + return 1; + + fxsave->cwd = (unsigned short)(env[0] & 0xffff); + fxsave->swd = (unsigned short)(env[1] & 0xffff); + fxsave->twd = twd_i387_to_fxsr((unsigned short)(env[2] & 0xffff)); + fxsave->fip = env[3]; + fxsave->fop = (unsigned short)((env[4] & 0xffff0000) >> 16); + fxsave->fcs = (env[4] & 0xffff); + fxsave->foo = env[5]; + fxsave->fos = env[6]; + + to = (struct _fpxreg *) &fxsave->st_space[0]; + from = &buf->_st[0]; + for ( i = 0 ; i < 8 ; i++, to++, from++ ) { + if ( __copy_from_user( to, from, sizeof(*from) ) ) + return 1; + } + return 0; +} + +/* + * Signal frame handlers. + */ + +static inline int save_i387_fsave( struct _fpstate *buf ) +{ + struct task_struct *tsk = current; + + unlazy_fpu( tsk ); + tsk->thread.i387.fsave.status = tsk->thread.i387.fsave.swd; + if ( __copy_to_user( buf, &tsk->thread.i387.fsave, + sizeof(struct i387_fsave_struct) ) ) + return -1; + return 1; +} + +static inline int save_i387_fxsave( struct _fpstate *buf ) +{ + struct task_struct *tsk = current; + int err = 0; + + unlazy_fpu( tsk ); + + if ( convert_fxsr_to_user( buf, &tsk->thread.i387.fxsave ) ) + return -1; + + err |= __put_user( tsk->thread.i387.fxsave.swd, &buf->status ); + err |= __put_user( X86_FXSR_MAGIC, &buf->magic ); + if ( err ) + return -1; + + if ( __copy_to_user( &buf->_fxsr_env[0], &tsk->thread.i387.fxsave, + sizeof(struct i387_fxsave_struct) ) ) + return -1; + return 1; +} + +int save_i387( struct _fpstate *buf ) +{ + if ( !current->used_math ) + return 0; + + /* This will cause a "finit" to be triggered by the next + * attempted FPU operation by the 'current' process. + */ + current->used_math = 0; + + if ( HAVE_HWFP ) { + if ( cpu_has_fxsr ) { + return save_i387_fxsave( buf ); + } else { + return save_i387_fsave( buf ); + } + } else { + return save_i387_soft( ¤t->thread.i387.soft, buf ); + } +} + +static inline int restore_i387_fsave( struct _fpstate *buf ) +{ + struct task_struct *tsk = current; + clear_fpu( tsk ); + return __copy_from_user( &tsk->thread.i387.fsave, buf, + sizeof(struct i387_fsave_struct) ); +} + +static inline int restore_i387_fxsave( struct _fpstate *buf ) +{ + struct task_struct *tsk = current; + clear_fpu( tsk ); + if ( __copy_from_user( &tsk->thread.i387.fxsave, &buf->_fxsr_env[0], + sizeof(struct i387_fxsave_struct) ) ) + return 1; + /* mxcsr bit 6 and 31-16 must be zero for security reasons */ + tsk->thread.i387.fxsave.mxcsr &= 0xffbf; + return convert_fxsr_from_user( &tsk->thread.i387.fxsave, buf ); +} + +int restore_i387( struct _fpstate *buf ) +{ + int err; + + if ( HAVE_HWFP ) { + if ( cpu_has_fxsr ) { + err = restore_i387_fxsave( buf ); + } else { + err = restore_i387_fsave( buf ); + } + } else { + err = restore_i387_soft( ¤t->thread.i387.soft, buf ); + } + current->used_math = 1; + return err; +} + +/* + * ptrace request handlers. + */ + +static inline int get_fpregs_fsave( struct user_i387_struct *buf, + struct task_struct *tsk ) +{ + return __copy_to_user( buf, &tsk->thread.i387.fsave, + sizeof(struct user_i387_struct) ); +} + +static inline int get_fpregs_fxsave( struct user_i387_struct *buf, + struct task_struct *tsk ) +{ + return convert_fxsr_to_user( (struct _fpstate *)buf, + &tsk->thread.i387.fxsave ); +} + +int get_fpregs( struct user_i387_struct *buf, struct task_struct *tsk ) +{ + if ( HAVE_HWFP ) { + if ( cpu_has_fxsr ) { + return get_fpregs_fxsave( buf, tsk ); + } else { + return get_fpregs_fsave( buf, tsk ); + } + } else { + return save_i387_soft( &tsk->thread.i387.soft, + (struct _fpstate *)buf ); + } +} + +static inline int set_fpregs_fsave( struct task_struct *tsk, + struct user_i387_struct *buf ) +{ + return __copy_from_user( &tsk->thread.i387.fsave, buf, + sizeof(struct user_i387_struct) ); +} + +static inline int set_fpregs_fxsave( struct task_struct *tsk, + struct user_i387_struct *buf ) +{ + return convert_fxsr_from_user( &tsk->thread.i387.fxsave, + (struct _fpstate *)buf ); +} + +int set_fpregs( struct task_struct *tsk, struct user_i387_struct *buf ) +{ + if ( HAVE_HWFP ) { + if ( cpu_has_fxsr ) { + return set_fpregs_fxsave( tsk, buf ); + } else { + return set_fpregs_fsave( tsk, buf ); + } + } else { + return restore_i387_soft( &tsk->thread.i387.soft, + (struct _fpstate *)buf ); + } +} + +int get_fpxregs( struct user_fxsr_struct *buf, struct task_struct *tsk ) +{ + if ( cpu_has_fxsr ) { + if (__copy_to_user( (void *)buf, &tsk->thread.i387.fxsave, + sizeof(struct user_fxsr_struct) )) + return -EFAULT; + return 0; + } else { + return -EIO; + } +} + +int set_fpxregs( struct task_struct *tsk, struct user_fxsr_struct *buf ) +{ + if ( cpu_has_fxsr ) { + __copy_from_user( &tsk->thread.i387.fxsave, (void *)buf, + sizeof(struct user_fxsr_struct) ); + /* mxcsr bit 6 and 31-16 must be zero for security reasons */ + tsk->thread.i387.fxsave.mxcsr &= 0xffbf; + return 0; + } else { + return -EIO; + } +} + +/* + * FPU state for core dumps. + */ + +static inline void copy_fpu_fsave( struct task_struct *tsk, + struct user_i387_struct *fpu ) +{ + memcpy( fpu, &tsk->thread.i387.fsave, + sizeof(struct user_i387_struct) ); +} + +static inline void copy_fpu_fxsave( struct task_struct *tsk, + struct user_i387_struct *fpu ) +{ + unsigned short *to; + unsigned short *from; + int i; + + memcpy( fpu, &tsk->thread.i387.fxsave, 7 * sizeof(long) ); + + to = (unsigned short *)&fpu->st_space[0]; + from = (unsigned short *)&tsk->thread.i387.fxsave.st_space[0]; + for ( i = 0 ; i < 8 ; i++, to += 5, from += 8 ) { + memcpy( to, from, 5 * sizeof(unsigned short) ); + } +} + +int dump_fpu( struct pt_regs *regs, struct user_i387_struct *fpu ) +{ + int fpvalid; + struct task_struct *tsk = current; + + fpvalid = tsk->used_math; + if ( fpvalid ) { + unlazy_fpu( tsk ); + if ( cpu_has_fxsr ) { + copy_fpu_fxsave( tsk, fpu ); + } else { + copy_fpu_fsave( tsk, fpu ); + } + } + + return fpvalid; +} + +int dump_extended_fpu( struct pt_regs *regs, struct user_fxsr_struct *fpu ) +{ + int fpvalid; + struct task_struct *tsk = current; + + fpvalid = tsk->used_math && cpu_has_fxsr; + if ( fpvalid ) { + unlazy_fpu( tsk ); + memcpy( fpu, &tsk->thread.i387.fxsave, + sizeof(struct user_fxsr_struct) ); + } + + return fpvalid; +} diff --git a/xenolinux-2.4.16-sparse/arch/xeno/kernel/init_task.c b/xenolinux-2.4.16-sparse/arch/xeno/kernel/init_task.c new file mode 100644 index 0000000000..7779809ef2 --- /dev/null +++ b/xenolinux-2.4.16-sparse/arch/xeno/kernel/init_task.c @@ -0,0 +1,33 @@ +#include +#include +#include + +#include +#include +#include + +static struct fs_struct init_fs = INIT_FS; +static struct files_struct init_files = INIT_FILES; +static struct signal_struct init_signals = INIT_SIGNALS; +struct mm_struct init_mm = INIT_MM(init_mm); + +/* + * Initial task structure. + * + * We need to make sure that this is 8192-byte aligned due to the + * way process stacks are handled. This is done by having a special + * "init_task" linker map entry.. + */ +union task_union init_task_union + __attribute__((__section__(".data.init_task"))) = + { INIT_TASK(init_task_union.task) }; + +/* + * per-CPU TSS segments. Threads are completely 'soft' on Linux, + * no more per-task TSS's. The TSS size is kept cacheline-aligned + * so they are allowed to end up in the .data.cacheline_aligned + * section. Since TSS's are completely CPU-local, we want them + * on exact cacheline boundaries, to eliminate cacheline ping-pong. + */ +struct tss_struct init_tss[NR_CPUS] __cacheline_aligned = { [0 ... NR_CPUS-1] = INIT_TSS }; + diff --git a/xenolinux-2.4.16-sparse/arch/xeno/kernel/ioport.c b/xenolinux-2.4.16-sparse/arch/xeno/kernel/ioport.c new file mode 100644 index 0000000000..6bce25aec0 --- /dev/null +++ b/xenolinux-2.4.16-sparse/arch/xeno/kernel/ioport.c @@ -0,0 +1,19 @@ +#include +#include +#include +#include +#include + + +asmlinkage int sys_ioperm(unsigned long from, unsigned long num, int turn_on) +{ + /* No IO permission! */ + return -EPERM; +} + + +asmlinkage int sys_iopl(unsigned long unused) +{ + /* The hypervisor won't allow it! */ + return -EPERM; +} diff --git a/xenolinux-2.4.16-sparse/arch/xeno/kernel/irq.c b/xenolinux-2.4.16-sparse/arch/xeno/kernel/irq.c new file mode 100644 index 0000000000..7c855904ae --- /dev/null +++ b/xenolinux-2.4.16-sparse/arch/xeno/kernel/irq.c @@ -0,0 +1,1129 @@ +/* + * linux/arch/i386/kernel/irq.c + * + * Copyright (C) 1992, 1998 Linus Torvalds, Ingo Molnar + * + * This file contains the code used by various IRQ handling routines: + * asking for different IRQ's should be done through these routines + * instead of just grabbing them. Thus setups with different IRQ numbers + * shouldn't result in any weird surprises, and installing new handlers + * should be easier. + */ + +/* + * (mostly architecture independent, will move to kernel/irq.c in 2.5.) + * + * IRQs are in fact implemented a bit like signal handlers for the kernel. + * Naturally it's not a 1:1 relation, but there are similarities. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +/* + * Linux has a controller-independent x86 interrupt architecture. + * every controller has a 'controller-template', that is used + * by the main code to do the right thing. Each driver-visible + * interrupt source is transparently wired to the apropriate + * controller. Thus drivers need not be aware of the + * interrupt-controller. + * + * Various interrupt controllers we handle: 8259 PIC, SMP IO-APIC, + * PIIX4's internal 8259 PIC and SGI's Visual Workstation Cobalt (IO-)APIC. + * (IO-APICs assumed to be messaging to Pentium local-APICs) + * + * the code is designed to be easily extended with new/different + * interrupt controllers, without having to do assembly magic. + */ + +/* + * Controller mappings for all interrupt sources: + */ +irq_desc_t irq_desc[NR_IRQS] __cacheline_aligned = + { [0 ... NR_IRQS-1] = { 0, &no_irq_type, NULL, 0, SPIN_LOCK_UNLOCKED}}; + +static void register_irq_proc (unsigned int irq); + +/* + * Special irq handlers. + */ + +void no_action(int cpl, void *dev_id, struct pt_regs *regs) { } + +/* + * Generic no controller code + */ + +static void enable_none(unsigned int irq) { } +static unsigned int startup_none(unsigned int irq) { return 0; } +static void disable_none(unsigned int irq) { } +static void ack_none(unsigned int irq) +{ + printk("unexpected IRQ trap at vector %02x\n", irq); +} + +/* startup is the same as "enable", shutdown is same as "disable" */ +#define shutdown_none disable_none +#define end_none enable_none + +struct hw_interrupt_type no_irq_type = { + "none", + startup_none, + shutdown_none, + enable_none, + disable_none, + ack_none, + end_none +}; + +atomic_t irq_err_count; +#ifdef CONFIG_X86_IO_APIC +#ifdef APIC_MISMATCH_DEBUG +atomic_t irq_mis_count; +#endif +#endif + +/* + * Generic, controller-independent functions: + */ + +int get_irq_list(char *buf) +{ + int i, j; + struct irqaction * action; + char *p = buf; + + p += sprintf(p, " "); + for (j=0; jtypename); + p += sprintf(p, " %s", action->name); + + for (action=action->next; action; action = action->next) + p += sprintf(p, ", %s", action->name); + *p++ = '\n'; + } + p += sprintf(p, "NMI: "); + for (j = 0; j < smp_num_cpus; j++) + p += sprintf(p, "%10u ", + nmi_count(cpu_logical_map(j))); + p += sprintf(p, "\n"); +#if CONFIG_X86_LOCAL_APIC + p += sprintf(p, "LOC: "); + for (j = 0; j < smp_num_cpus; j++) + p += sprintf(p, "%10u ", + apic_timer_irqs[cpu_logical_map(j)]); + p += sprintf(p, "\n"); +#endif + p += sprintf(p, "ERR: %10u\n", atomic_read(&irq_err_count)); +#ifdef CONFIG_X86_IO_APIC +#ifdef APIC_MISMATCH_DEBUG + p += sprintf(p, "MIS: %10u\n", atomic_read(&irq_mis_count)); +#endif +#endif + return p - buf; +} + + +/* + * Global interrupt locks for SMP. Allow interrupts to come in on any + * CPU, yet make cli/sti act globally to protect critical regions.. + */ + +#ifdef CONFIG_SMP +unsigned char global_irq_holder = NO_PROC_ID; +unsigned volatile long global_irq_lock; /* pendantic: long for set_bit --RR */ + +extern void show_stack(unsigned long* esp); + +static void show(char * str) +{ + int i; + int cpu = smp_processor_id(); + + printk("\n%s, CPU %d:\n", str, cpu); + printk("irq: %d [",irqs_running()); + for(i=0;i < smp_num_cpus;i++) + printk(" %d",local_irq_count(i)); + printk(" ]\nbh: %d [",spin_is_locked(&global_bh_lock) ? 1 : 0); + for(i=0;i < smp_num_cpus;i++) + printk(" %d",local_bh_count(i)); + + printk(" ]\nStack dumps:"); + for(i = 0; i < smp_num_cpus; i++) { + unsigned long esp; + if (i == cpu) + continue; + printk("\nCPU %d:",i); + esp = init_tss[i].esp0; + if (!esp) { + /* tss->esp0 is set to NULL in cpu_init(), + * it's initialized when the cpu returns to user + * space. -- manfreds + */ + printk(" "); + continue; + } + esp &= ~(THREAD_SIZE-1); + esp += sizeof(struct task_struct); + show_stack((void*)esp); + } + printk("\nCPU %d:",cpu); + show_stack(NULL); + printk("\n"); +} + +#define MAXCOUNT 100000000 + +/* + * I had a lockup scenario where a tight loop doing + * spin_unlock()/spin_lock() on CPU#1 was racing with + * spin_lock() on CPU#0. CPU#0 should have noticed spin_unlock(), but + * apparently the spin_unlock() information did not make it + * through to CPU#0 ... nasty, is this by design, do we have to limit + * 'memory update oscillation frequency' artificially like here? + * + * Such 'high frequency update' races can be avoided by careful design, but + * some of our major constructs like spinlocks use similar techniques, + * it would be nice to clarify this issue. Set this define to 0 if you + * want to check whether your system freezes. I suspect the delay done + * by SYNC_OTHER_CORES() is in correlation with 'snooping latency', but + * i thought that such things are guaranteed by design, since we use + * the 'LOCK' prefix. + */ +#define SUSPECTED_CPU_OR_CHIPSET_BUG_WORKAROUND 0 + +#if SUSPECTED_CPU_OR_CHIPSET_BUG_WORKAROUND +# define SYNC_OTHER_CORES(x) udelay(x+1) +#else +/* + * We have to allow irqs to arrive between __sti and __cli + */ +# define SYNC_OTHER_CORES(x) __asm__ __volatile__ ("nop") +#endif + +static inline void wait_on_irq(int cpu) +{ + int count = MAXCOUNT; + + for (;;) { + + /* + * Wait until all interrupts are gone. Wait + * for bottom half handlers unless we're + * already executing in one.. + */ + if (!irqs_running()) + if (local_bh_count(cpu) || !spin_is_locked(&global_bh_lock)) + break; + + /* Duh, we have to loop. Release the lock to avoid deadlocks */ + clear_bit(0,&global_irq_lock); + + for (;;) { + if (!--count) { + show("wait_on_irq"); + count = ~0; + } + __sti(); + SYNC_OTHER_CORES(cpu); + __cli(); + if (irqs_running()) + continue; + if (global_irq_lock) + continue; + if (!local_bh_count(cpu) && spin_is_locked(&global_bh_lock)) + continue; + if (!test_and_set_bit(0,&global_irq_lock)) + break; + } + } +} + +/* + * This is called when we want to synchronize with + * interrupts. We may for example tell a device to + * stop sending interrupts: but to make sure there + * are no interrupts that are executing on another + * CPU we need to call this function. + */ +void synchronize_irq(void) +{ + if (irqs_running()) { + /* Stupid approach */ + cli(); + sti(); + } +} + +static inline void get_irqlock(int cpu) +{ +#ifdef CONFIG_KDB + static int kdb_rate; + if (KDB_IS_RUNNING() && kdb_rate++ < 10) + kdb_printf("Warning: get_irqlock on cpu %d while kdb is running, may hang\n", smp_processor_id()); +#endif /* CONFIG_KDB */ + if (test_and_set_bit(0,&global_irq_lock)) { + /* do we already hold the lock? */ + if ((unsigned char) cpu == global_irq_holder) + return; + /* Uhhuh.. Somebody else got it. Wait.. */ + do { + do { + rep_nop(); + } while (test_bit(0,&global_irq_lock)); + } while (test_and_set_bit(0,&global_irq_lock)); + } + /* + * We also to make sure that nobody else is running + * in an interrupt context. + */ + wait_on_irq(cpu); + + /* + * Ok, finally.. + */ + global_irq_holder = cpu; +} + +void __global_cli(void) +{ + panic("__global_cli"); +} + +void __global_sti(void) +{ + panic("__global_sti"); +} + +/* + * SMP flags value to restore to: + * 0 - global cli + * 1 - global sti + * 2 - local cli + * 3 - local sti + */ +unsigned long __global_save_flags(void) +{ + panic("__global_save_flags"); +} + +void __global_restore_flags(unsigned long flags) +{ + panic("__global_restore_flags"); +} + +#endif + +/* + * This should really return information about whether + * we should do bottom half handling etc. Right now we + * end up _always_ checking the bottom half, which is a + * waste of time and is not what some drivers would + * prefer. + */ +int handle_IRQ_event(unsigned int irq, struct pt_regs * regs, struct irqaction * action) +{ + int status; + int cpu = smp_processor_id(); + + irq_enter(cpu, irq); + + status = 1; /* Force the "do bottom halves" bit */ + + if (!(action->flags & SA_INTERRUPT)) + __sti(); + + do { + status |= action->flags; + action->handler(irq, action->dev_id, regs); + action = action->next; + } while (action); + if (status & SA_SAMPLE_RANDOM) + add_interrupt_randomness(irq); + __cli(); + + irq_exit(cpu, irq); + + return status; +} + +/* + * Generic enable/disable code: this just calls + * down into the PIC-specific version for the actual + * hardware disable after having gotten the irq + * controller lock. + */ + +/** + * disable_irq_nosync - disable an irq without waiting + * @irq: Interrupt to disable + * + * Disable the selected interrupt line. Disables and Enables are + * nested. + * Unlike disable_irq(), this function does not ensure existing + * instances of the IRQ handler have completed before returning. + * + * This function may be called from IRQ context. + */ + +inline void disable_irq_nosync(unsigned int irq) +{ + irq_desc_t *desc = irq_desc + irq; + unsigned long flags; + + spin_lock_irqsave(&desc->lock, flags); + if (!desc->depth++) { + desc->status |= IRQ_DISABLED; + desc->handler->disable(irq); + } + spin_unlock_irqrestore(&desc->lock, flags); +} + +/** + * disable_irq - disable an irq and wait for completion + * @irq: Interrupt to disable + * + * Disable the selected interrupt line. Enables and Disables are + * nested. + * This function waits for any pending IRQ handlers for this interrupt + * to complete before returning. If you use this function while + * holding a resource the IRQ handler may need you will deadlock. + * + * This function may be called - with care - from IRQ context. + */ + +void disable_irq(unsigned int irq) +{ + disable_irq_nosync(irq); + + if (!local_irq_count(smp_processor_id())) { + do { + barrier(); + cpu_relax(); + } while (irq_desc[irq].status & IRQ_INPROGRESS); + } +} + +/** + * enable_irq - enable handling of an irq + * @irq: Interrupt to enable + * + * Undoes the effect of one call to disable_irq(). If this + * matches the last disable, processing of interrupts on this + * IRQ line is re-enabled. + * + * This function may be called from IRQ context. + */ + +void enable_irq(unsigned int irq) +{ + irq_desc_t *desc = irq_desc + irq; + unsigned long flags; + + spin_lock_irqsave(&desc->lock, flags); + switch (desc->depth) { + case 1: { + unsigned int status = desc->status & ~IRQ_DISABLED; + desc->status = status; + if ((status & (IRQ_PENDING | IRQ_REPLAY)) == IRQ_PENDING) { + desc->status = status | IRQ_REPLAY; + hw_resend_irq(desc->handler,irq); + } + desc->handler->enable(irq); + /* fall-through */ + } + default: + desc->depth--; + break; + case 0: + printk("enable_irq(%u) unbalanced from %p\n", irq, + __builtin_return_address(0)); + } + spin_unlock_irqrestore(&desc->lock, flags); +} + +/* + * do_IRQ handles all normal device IRQ's (the special + * SMP cross-CPU interrupts have their own specific + * handlers). + */ +asmlinkage unsigned int do_IRQ(int irq, struct pt_regs *regs) +{ + /* + * We ack quickly, we don't want the irq controller + * thinking we're snobs just because some other CPU has + * disabled global interrupts (we have already done the + * INT_ACK cycles, it's too late to try to pretend to the + * controller that we aren't taking the interrupt). + * + * 0 return value means that this irq is already being + * handled by some other CPU. (or is disabled) + */ + int cpu = smp_processor_id(); + irq_desc_t *desc = irq_desc + irq; + struct irqaction * action; + unsigned int status; + + kstat.irqs[cpu][irq]++; + spin_lock(&desc->lock); + desc->handler->ack(irq); + /* + REPLAY is when Linux resends an IRQ that was dropped earlier + WAITING is used by probe to mark irqs that are being tested + */ + status = desc->status & ~(IRQ_REPLAY | IRQ_WAITING); + status |= IRQ_PENDING; /* we _want_ to handle it */ + + /* + * If the IRQ is disabled for whatever reason, we cannot + * use the action we have. + */ + action = NULL; + if (!(status & (IRQ_DISABLED | IRQ_INPROGRESS))) { + action = desc->action; + status &= ~IRQ_PENDING; /* we commit to handling */ + status |= IRQ_INPROGRESS; /* we are handling it */ + } + desc->status = status; + + /* + * If there is no IRQ handler or it was disabled, exit early. + Since we set PENDING, if another processor is handling + a different instance of this same irq, the other processor + will take care of it. + */ + if (!action) + goto out; + + /* + * Edge triggered interrupts need to remember + * pending events. + * This applies to any hw interrupts that allow a second + * instance of the same irq to arrive while we are in do_IRQ + * or in the handler. But the code here only handles the _second_ + * instance of the irq, not the third or fourth. So it is mostly + * useful for irq hardware that does not mask cleanly in an + * SMP environment. + */ + for (;;) { + spin_unlock(&desc->lock); + handle_IRQ_event(irq, regs, action); + spin_lock(&desc->lock); + + if (!(desc->status & IRQ_PENDING)) + break; + desc->status &= ~IRQ_PENDING; + } + desc->status &= ~IRQ_INPROGRESS; +out: + /* + * The ->end() handler has to deal with interrupts which got + * disabled while the handler was running. + */ + desc->handler->end(irq); + spin_unlock(&desc->lock); + + if (softirq_pending(cpu)) + do_softirq(); + + return 1; +} + +/** + * request_irq - allocate an interrupt line + * @irq: Interrupt line to allocate + * @handler: Function to be called when the IRQ occurs + * @irqflags: Interrupt type flags + * @devname: An ascii name for the claiming device + * @dev_id: A cookie passed back to the handler function + * + * This call allocates interrupt resources and enables the + * interrupt line and IRQ handling. From the point this + * call is made your handler function may be invoked. Since + * your handler function must clear any interrupt the board + * raises, you must take care both to initialise your hardware + * and to set up the interrupt handler in the right order. + * + * Dev_id must be globally unique. Normally the address of the + * device data structure is used as the cookie. Since the handler + * receives this value it makes sense to use it. + * + * If your interrupt is shared you must pass a non NULL dev_id + * as this is required when freeing the interrupt. + * + * Flags: + * + * SA_SHIRQ Interrupt is shared + * + * SA_INTERRUPT Disable local interrupts while processing + * + * SA_SAMPLE_RANDOM The interrupt can be used for entropy + * + */ + +int request_irq(unsigned int irq, + void (*handler)(int, void *, struct pt_regs *), + unsigned long irqflags, + const char * devname, + void *dev_id) +{ + int retval; + struct irqaction * action; + +#if 1 + /* + * Sanity-check: shared interrupts should REALLY pass in + * a real dev-ID, otherwise we'll have trouble later trying + * to figure out which interrupt is which (messes up the + * interrupt freeing logic etc). + */ + if (irqflags & SA_SHIRQ) { + if (!dev_id) + printk("Bad boy: %s (at 0x%x) called us without a dev_id!\n", devname, (&irq)[-1]); + } +#endif + + if (irq >= NR_IRQS) + return -EINVAL; + if (!handler) + return -EINVAL; + + action = (struct irqaction *) + kmalloc(sizeof(struct irqaction), GFP_KERNEL); + if (!action) + return -ENOMEM; + + action->handler = handler; + action->flags = irqflags; + action->mask = 0; + action->name = devname; + action->next = NULL; + action->dev_id = dev_id; + + retval = setup_irq(irq, action); + if (retval) + kfree(action); + return retval; +} + +/** + * free_irq - free an interrupt + * @irq: Interrupt line to free + * @dev_id: Device identity to free + * + * Remove an interrupt handler. The handler is removed and if the + * interrupt line is no longer in use by any driver it is disabled. + * On a shared IRQ the caller must ensure the interrupt is disabled + * on the card it drives before calling this function. The function + * does not return until any executing interrupts for this IRQ + * have completed. + * + * This function may be called from interrupt context. + * + * Bugs: Attempting to free an irq in a handler for the same irq hangs + * the machine. + */ + +void free_irq(unsigned int irq, void *dev_id) +{ + irq_desc_t *desc; + struct irqaction **p; + unsigned long flags; + + if (irq >= NR_IRQS) + return; + + desc = irq_desc + irq; + spin_lock_irqsave(&desc->lock,flags); + p = &desc->action; + for (;;) { + struct irqaction * action = *p; + if (action) { + struct irqaction **pp = p; + p = &action->next; + if (action->dev_id != dev_id) + continue; + + /* Found it - now remove it from the list of entries */ + *pp = action->next; + if (!desc->action) { + desc->status |= IRQ_DISABLED; + desc->handler->shutdown(irq); + } + spin_unlock_irqrestore(&desc->lock,flags); + +#ifdef CONFIG_SMP + /* Wait to make sure it's not being used on another CPU */ + while (desc->status & IRQ_INPROGRESS) { + barrier(); + cpu_relax(); + } +#endif + kfree(action); + return; + } + printk("Trying to free free IRQ%d\n",irq); + spin_unlock_irqrestore(&desc->lock,flags); + return; + } +} + +/* + * IRQ autodetection code.. + * + * This depends on the fact that any interrupt that + * comes in on to an unassigned handler will get stuck + * with "IRQ_WAITING" cleared and the interrupt + * disabled. + */ + +static DECLARE_MUTEX(probe_sem); + +/** + * probe_irq_on - begin an interrupt autodetect + * + * Commence probing for an interrupt. The interrupts are scanned + * and a mask of potential interrupt lines is returned. + * + */ + +unsigned long probe_irq_on(void) +{ + unsigned int i; + irq_desc_t *desc; + unsigned long val; + unsigned long delay; + + down(&probe_sem); + /* + * something may have generated an irq long ago and we want to + * flush such a longstanding irq before considering it as spurious. + */ + for (i = NR_IRQS-1; i > 0; i--) { + desc = irq_desc + i; + + spin_lock_irq(&desc->lock); + if (!irq_desc[i].action) + irq_desc[i].handler->startup(i); + spin_unlock_irq(&desc->lock); + } + + /* Wait for longstanding interrupts to trigger. */ + for (delay = jiffies + HZ/50; time_after(delay, jiffies); ) + /* about 20ms delay */ synchronize_irq(); + + /* + * enable any unassigned irqs + * (we must startup again here because if a longstanding irq + * happened in the previous stage, it may have masked itself) + */ + for (i = NR_IRQS-1; i > 0; i--) { + desc = irq_desc + i; + + spin_lock_irq(&desc->lock); + if (!desc->action) { + desc->status |= IRQ_AUTODETECT | IRQ_WAITING; + if (desc->handler->startup(i)) + desc->status |= IRQ_PENDING; + } + spin_unlock_irq(&desc->lock); + } + + /* + * Wait for spurious interrupts to trigger + */ + for (delay = jiffies + HZ/10; time_after(delay, jiffies); ) + /* about 100ms delay */ synchronize_irq(); + + /* + * Now filter out any obviously spurious interrupts + */ + val = 0; + for (i = 0; i < NR_IRQS; i++) { + irq_desc_t *desc = irq_desc + i; + unsigned int status; + + spin_lock_irq(&desc->lock); + status = desc->status; + + if (status & IRQ_AUTODETECT) { + /* It triggered already - consider it spurious. */ + if (!(status & IRQ_WAITING)) { + desc->status = status & ~IRQ_AUTODETECT; + desc->handler->shutdown(i); + } else + if (i < 32) + val |= 1 << i; + } + spin_unlock_irq(&desc->lock); + } + + return val; +} + +/* + * Return a mask of triggered interrupts (this + * can handle only legacy ISA interrupts). + */ + +/** + * probe_irq_mask - scan a bitmap of interrupt lines + * @val: mask of interrupts to consider + * + * Scan the ISA bus interrupt lines and return a bitmap of + * active interrupts. The interrupt probe logic state is then + * returned to its previous value. + * + * Note: we need to scan all the irq's even though we will + * only return ISA irq numbers - just so that we reset them + * all to a known state. + */ +unsigned int probe_irq_mask(unsigned long val) +{ + int i; + unsigned int mask; + + mask = 0; + for (i = 0; i < NR_IRQS; i++) { + irq_desc_t *desc = irq_desc + i; + unsigned int status; + + spin_lock_irq(&desc->lock); + status = desc->status; + + if (status & IRQ_AUTODETECT) { + if (i < 16 && !(status & IRQ_WAITING)) + mask |= 1 << i; + + desc->status = status & ~IRQ_AUTODETECT; + desc->handler->shutdown(i); + } + spin_unlock_irq(&desc->lock); + } + up(&probe_sem); + + return mask & val; +} + +/* + * Return the one interrupt that triggered (this can + * handle any interrupt source). + */ + +/** + * probe_irq_off - end an interrupt autodetect + * @val: mask of potential interrupts (unused) + * + * Scans the unused interrupt lines and returns the line which + * appears to have triggered the interrupt. If no interrupt was + * found then zero is returned. If more than one interrupt is + * found then minus the first candidate is returned to indicate + * their is doubt. + * + * The interrupt probe logic state is returned to its previous + * value. + * + * BUGS: When used in a module (which arguably shouldnt happen) + * nothing prevents two IRQ probe callers from overlapping. The + * results of this are non-optimal. + */ + +int probe_irq_off(unsigned long val) +{ + int i, irq_found, nr_irqs; + + nr_irqs = 0; + irq_found = 0; + for (i = 0; i < NR_IRQS; i++) { + irq_desc_t *desc = irq_desc + i; + unsigned int status; + + spin_lock_irq(&desc->lock); + status = desc->status; + + if (status & IRQ_AUTODETECT) { + if (!(status & IRQ_WAITING)) { + if (!nr_irqs) + irq_found = i; + nr_irqs++; + } + desc->status = status & ~IRQ_AUTODETECT; + desc->handler->shutdown(i); + } + spin_unlock_irq(&desc->lock); + } + up(&probe_sem); + + if (nr_irqs > 1) + irq_found = -irq_found; + return irq_found; +} + +/* this was setup_x86_irq but it seems pretty generic */ +int setup_irq(unsigned int irq, struct irqaction * new) +{ + int shared = 0; + unsigned long flags; + struct irqaction *old, **p; + irq_desc_t *desc = irq_desc + irq; + + /* + * Some drivers like serial.c use request_irq() heavily, + * so we have to be careful not to interfere with a + * running system. + */ + if (new->flags & SA_SAMPLE_RANDOM) { + /* + * This function might sleep, we want to call it first, + * outside of the atomic block. + * Yes, this might clear the entropy pool if the wrong + * driver is attempted to be loaded, without actually + * installing a new handler, but is this really a problem, + * only the sysadmin is able to do this. + */ + rand_initialize_irq(irq); + } + + /* + * The following block of code has to be executed atomically + */ + spin_lock_irqsave(&desc->lock,flags); + p = &desc->action; + if ((old = *p) != NULL) { + /* Can't share interrupts unless both agree to */ + if (!(old->flags & new->flags & SA_SHIRQ)) { + spin_unlock_irqrestore(&desc->lock,flags); + return -EBUSY; + } + + /* add new interrupt at end of irq queue */ + do { + p = &old->next; + old = *p; + } while (old); + shared = 1; + } + + *p = new; + + if (!shared) { + desc->depth = 0; + desc->status &= ~(IRQ_DISABLED | IRQ_AUTODETECT | IRQ_WAITING); + desc->handler->startup(irq); + } + spin_unlock_irqrestore(&desc->lock,flags); + + register_irq_proc(irq); + return 0; +} + +static struct proc_dir_entry * root_irq_dir; +static struct proc_dir_entry * irq_dir [NR_IRQS]; + +#define HEX_DIGITS 8 + +static unsigned int parse_hex_value (const char *buffer, + unsigned long count, unsigned long *ret) +{ + unsigned char hexnum [HEX_DIGITS]; + unsigned long value; + int i; + + if (!count) + return -EINVAL; + if (count > HEX_DIGITS) + count = HEX_DIGITS; + if (copy_from_user(hexnum, buffer, count)) + return -EFAULT; + + /* + * Parse the first 8 characters as a hex string, any non-hex char + * is end-of-string. '00e1', 'e1', '00E1', 'E1' are all the same. + */ + value = 0; + + for (i = 0; i < count; i++) { + unsigned int c = hexnum[i]; + + switch (c) { + case '0' ... '9': c -= '0'; break; + case 'a' ... 'f': c -= 'a'-10; break; + case 'A' ... 'F': c -= 'A'-10; break; + default: + goto out; + } + value = (value << 4) | c; + } +out: + *ret = value; + return 0; +} + +#if CONFIG_SMP + +static struct proc_dir_entry * smp_affinity_entry [NR_IRQS]; + +static unsigned long irq_affinity [NR_IRQS] = { [0 ... NR_IRQS-1] = ~0UL }; +static int irq_affinity_read_proc (char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + if (count < HEX_DIGITS+1) + return -EINVAL; + return sprintf (page, "%08lx\n", irq_affinity[(long)data]); +} + +static int irq_affinity_write_proc (struct file *file, const char *buffer, + unsigned long count, void *data) +{ + int irq = (long) data, full_count = count, err; + unsigned long new_value; + + if (!irq_desc[irq].handler->set_affinity) + return -EIO; + + err = parse_hex_value(buffer, count, &new_value); + + /* + * Do not allow disabling IRQs completely - it's a too easy + * way to make the system unusable accidentally :-) At least + * one online CPU still has to be targeted. + */ + if (!(new_value & cpu_online_map)) + return -EINVAL; + + irq_affinity[irq] = new_value; + irq_desc[irq].handler->set_affinity(irq, new_value); + + return full_count; +} + +#endif + +static int prof_cpu_mask_read_proc (char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + unsigned long *mask = (unsigned long *) data; + if (count < HEX_DIGITS+1) + return -EINVAL; + return sprintf (page, "%08lx\n", *mask); +} + +static int prof_cpu_mask_write_proc (struct file *file, const char *buffer, + unsigned long count, void *data) +{ + unsigned long *mask = (unsigned long *) data, full_count = count, err; + unsigned long new_value; + + err = parse_hex_value(buffer, count, &new_value); + if (err) + return err; + + *mask = new_value; + return full_count; +} + +#define MAX_NAMELEN 10 + +static void register_irq_proc (unsigned int irq) +{ + char name [MAX_NAMELEN]; + + if (!root_irq_dir || (irq_desc[irq].handler == &no_irq_type) || + irq_dir[irq]) + return; + + memset(name, 0, MAX_NAMELEN); + sprintf(name, "%d", irq); + + /* create /proc/irq/1234 */ + irq_dir[irq] = proc_mkdir(name, root_irq_dir); + +#if CONFIG_SMP + { + struct proc_dir_entry *entry; + + /* create /proc/irq/1234/smp_affinity */ + entry = create_proc_entry("smp_affinity", 0600, irq_dir[irq]); + + if (entry) { + entry->nlink = 1; + entry->data = (void *)(long)irq; + entry->read_proc = irq_affinity_read_proc; + entry->write_proc = irq_affinity_write_proc; + } + + smp_affinity_entry[irq] = entry; + } +#endif +} + +unsigned long prof_cpu_mask = -1; + +void init_irq_proc (void) +{ + struct proc_dir_entry *entry; + int i; + + /* create /proc/irq */ + root_irq_dir = proc_mkdir("irq", 0); + + /* create /proc/irq/prof_cpu_mask */ + entry = create_proc_entry("prof_cpu_mask", 0600, root_irq_dir); + + if (!entry) + return; + + entry->nlink = 1; + entry->data = (void *)&prof_cpu_mask; + entry->read_proc = prof_cpu_mask_read_proc; + entry->write_proc = prof_cpu_mask_write_proc; + + /* + * Create entries for all existing IRQs. + */ + for (i = 0; i < NR_IRQS; i++) + register_irq_proc(i); +} + diff --git a/xenolinux-2.4.16-sparse/arch/xeno/kernel/ldt.c b/xenolinux-2.4.16-sparse/arch/xeno/kernel/ldt.c new file mode 100644 index 0000000000..6c93943036 --- /dev/null +++ b/xenolinux-2.4.16-sparse/arch/xeno/kernel/ldt.c @@ -0,0 +1,26 @@ +/* + * linux/kernel/ldt.c + * + * Copyright (C) 1992 Krishna Balasubramanian and Linus Torvalds + * Copyright (C) 1999 Ingo Molnar + */ + +#include +#include +#include +#include + +/* + * XXX KAF (28/7/02): This stuff is only used for DOS emulation, and is + * the default way of finding current TCB in linuxthreads. Supporting + * table update svia the hypervisor is feasible, but a hassle: for now, + * recompiling linuxthreads is the most sensible option. + * + * Oh, this may become an issue depending on what JVM we use for + * running the xeno-daemon. + */ + +asmlinkage int sys_modify_ldt(int func, void *ptr, unsigned long bytecount) +{ + return -ENOSYS; +} diff --git a/xenolinux-2.4.16-sparse/arch/xeno/kernel/process.c b/xenolinux-2.4.16-sparse/arch/xeno/kernel/process.c new file mode 100644 index 0000000000..e4c236bf98 --- /dev/null +++ b/xenolinux-2.4.16-sparse/arch/xeno/kernel/process.c @@ -0,0 +1,462 @@ +/* + * linux/arch/i386/kernel/process.c + * + * Copyright (C) 1995 Linus Torvalds + * + * Pentium III FXSR, SSE support + * Gareth Hughes , May 2000 + */ + +/* + * This file handles the architecture-dependent parts of process handling.. + */ + +#define __KERNEL_SYSCALLS__ +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); + +int hlt_counter; + +/* + * Powermanagement idle function, if any.. + */ +void (*pm_idle)(void); + +/* + * Power off function, if any + */ +void (*pm_power_off)(void); + +void disable_hlt(void) +{ + hlt_counter++; +} + +void enable_hlt(void) +{ + hlt_counter--; +} + +/* + * The idle thread. There's no useful work to be + * done, so just try to conserve power and have a + * low exit latency (ie sit in a loop waiting for + * somebody to say that they'd like to reschedule) + */ +void cpu_idle (void) +{ + /* endless idle loop with no priority at all */ + init_idle(); + current->nice = 20; + current->counter = -100; + + while (1) { + while (!current->need_resched) + HYPERVISOR_yield(); + schedule(); + check_pgt_cache(); + } +} + +void machine_restart(char * __unused) +{ + HYPERVISOR_exit(); +} + +void machine_halt(void) +{ + HYPERVISOR_exit(); +} + +void machine_power_off(void) +{ + HYPERVISOR_exit(); +} + +extern void show_trace(unsigned long* esp); + +void show_regs(struct pt_regs * regs) +{ + printk("\n"); + printk("Pid: %d, comm: %20s\n", current->pid, current->comm); + printk("EIP: %04x:[<%08lx>] CPU: %d",0xffff & regs->xcs,regs->eip, smp_processor_id()); + if (regs->xcs & 2) + printk(" ESP: %04x:%08lx",0xffff & regs->xss,regs->esp); + printk(" EFLAGS: %08lx %s\n",regs->eflags, print_tainted()); + printk("EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n", + regs->eax,regs->ebx,regs->ecx,regs->edx); + printk("ESI: %08lx EDI: %08lx EBP: %08lx", + regs->esi, regs->edi, regs->ebp); + printk(" DS: %04x ES: %04x\n", + 0xffff & regs->xds,0xffff & regs->xes); + + show_trace(®s->esp); +} + +/* + * No need to lock the MM as we are the last user + */ +void release_segments(struct mm_struct *mm) +{ + void * ldt = mm->context.segments; + + /* + * free the LDT + */ + if (ldt) { + mm->context.segments = NULL; + clear_LDT(); + vfree(ldt); + } +} + +/* + * Create a kernel thread + */ +int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags) +{ + long retval, d0; + + __asm__ __volatile__( + "movl %%esp,%%esi\n\t" + "int $0x80\n\t" /* Linux/i386 system call */ + "cmpl %%esp,%%esi\n\t" /* child or parent? */ + "je 1f\n\t" /* parent - jump */ + /* Load the argument into eax, and push it. That way, it does + * not matter whether the called function is compiled with + * -mregparm or not. */ + "movl %4,%%eax\n\t" + "pushl %%eax\n\t" + "call *%5\n\t" /* call fn */ + "movl %3,%0\n\t" /* exit */ + "int $0x80\n" + "1:\t" + :"=&a" (retval), "=&S" (d0) + :"0" (__NR_clone), "i" (__NR_exit), + "r" (arg), "r" (fn), + "b" (flags | CLONE_VM) + : "memory"); + + return retval; +} + +/* + * Free current thread data structures etc.. + */ +void exit_thread(void) +{ + /* nothing to do ... */ +} + +void flush_thread(void) +{ + struct task_struct *tsk = current; + + memset(tsk->thread.debugreg, 0, sizeof(unsigned long)*8); + /* + * Forget coprocessor state.. + */ + clear_fpu(tsk); + tsk->used_math = 0; +} + +void release_thread(struct task_struct *dead_task) +{ + if (dead_task->mm) { + void * ldt = dead_task->mm->context.segments; + + // temporary debugging check + if (ldt) { + printk("WARNING: dead process %8s still has LDT? <%p>\n", + dead_task->comm, ldt); + BUG(); + } + } +} + +/* + * we do not have to muck with descriptors here, that is + * done in switch_mm() as needed. + */ +void copy_segments(struct task_struct *p, struct mm_struct *new_mm) +{ + struct mm_struct * old_mm; + void *old_ldt, *ldt; + + ldt = NULL; + old_mm = current->mm; + if (old_mm && (old_ldt = old_mm->context.segments) != NULL) { + /* + * Completely new LDT, we initialize it from the parent: + */ + ldt = vmalloc(LDT_ENTRIES*LDT_ENTRY_SIZE); + if (!ldt) + printk(KERN_WARNING "ldt allocation failed\n"); + else + memcpy(ldt, old_ldt, LDT_ENTRIES*LDT_ENTRY_SIZE); + } + new_mm->context.segments = ldt; + new_mm->context.cpuvalid = ~0UL; /* valid on all CPU's - they can't have stale data */ +} + +/* + * Save a segment. + */ +#define savesegment(seg,value) \ + asm volatile("movl %%" #seg ",%0":"=m" (*(int *)&(value))) + +int copy_thread(int nr, unsigned long clone_flags, unsigned long esp, + unsigned long unused, + struct task_struct * p, struct pt_regs * regs) +{ + struct pt_regs * childregs; + + childregs = ((struct pt_regs *) (THREAD_SIZE + (unsigned long) p)) - 1; + struct_cpy(childregs, regs); + childregs->eax = 0; + childregs->esp = esp; + + p->thread.esp = (unsigned long) childregs; + p->thread.esp0 = (unsigned long) (childregs+1); + + p->thread.eip = (unsigned long) ret_from_fork; + + savesegment(fs,p->thread.fs); + savesegment(gs,p->thread.gs); + + unlazy_fpu(current); + struct_cpy(&p->thread.i387, ¤t->thread.i387); + + return 0; +} + +/* + * fill in the user structure for a core dump.. + */ +void dump_thread(struct pt_regs * regs, struct user * dump) +{ + int i; + +/* changed the size calculations - should hopefully work better. lbt */ + dump->magic = CMAGIC; + dump->start_code = 0; + dump->start_stack = regs->esp & ~(PAGE_SIZE - 1); + dump->u_tsize = ((unsigned long) current->mm->end_code) >> PAGE_SHIFT; + dump->u_dsize = ((unsigned long) (current->mm->brk + (PAGE_SIZE-1))) >> PAGE_SHIFT; + dump->u_dsize -= dump->u_tsize; + dump->u_ssize = 0; + for (i = 0; i < 8; i++) + dump->u_debugreg[i] = current->thread.debugreg[i]; + + if (dump->start_stack < TASK_SIZE) + dump->u_ssize = ((unsigned long) (TASK_SIZE - dump->start_stack)) >> PAGE_SHIFT; + + dump->regs.ebx = regs->ebx; + dump->regs.ecx = regs->ecx; + dump->regs.edx = regs->edx; + dump->regs.esi = regs->esi; + dump->regs.edi = regs->edi; + dump->regs.ebp = regs->ebp; + dump->regs.eax = regs->eax; + dump->regs.ds = regs->xds; + dump->regs.es = regs->xes; + savesegment(fs,dump->regs.fs); + savesegment(gs,dump->regs.gs); + dump->regs.orig_eax = regs->orig_eax; + dump->regs.eip = regs->eip; + dump->regs.cs = regs->xcs; + dump->regs.eflags = regs->eflags; + dump->regs.esp = regs->esp; + dump->regs.ss = regs->xss; + + dump->u_fpvalid = dump_fpu (regs, &dump->i387); +} + +/* + * This special macro can be used to load a debugging register + */ +#define loaddebug(thread,register) \ + __asm__("movl %0,%%db" #register \ + : /* no output */ \ + :"r" (thread->debugreg[register])) + +/* + * switch_to(x,yn) should switch tasks from x to y. + * + * We fsave/fwait so that an exception goes off at the right time + * (as a call from the fsave or fwait in effect) rather than to + * the wrong process. Lazy FP saving no longer makes any sense + * with modern CPU's, and this simplifies a lot of things (SMP + * and UP become the same). + * + * NOTE! We used to use the x86 hardware context switching. The + * reason for not using it any more becomes apparent when you + * try to recover gracefully from saved state that is no longer + * valid (stale segment register values in particular). With the + * hardware task-switch, there is no way to fix up bad state in + * a reasonable manner. + * + * The fact that Intel documents the hardware task-switching to + * be slow is a fairly red herring - this code is not noticeably + * faster. However, there _is_ some room for improvement here, + * so the performance issues may eventually be a valid point. + * More important, however, is the fact that this allows us much + * more flexibility. + */ +void __switch_to(struct task_struct *prev_p, struct task_struct *next_p) +{ + struct thread_struct *prev = &prev_p->thread, + *next = &next_p->thread; + + unlazy_fpu(prev_p); + + HYPERVISOR_set_guest_stack(__KERNEL_DS, next->esp0); + + /* + * Save away %fs and %gs. No need to save %es and %ds, as + * those are always kernel segments while inside the kernel. + */ + asm volatile("movl %%fs,%0":"=m" (*(int *)&prev->fs)); + asm volatile("movl %%gs,%0":"=m" (*(int *)&prev->gs)); + + /* + * Restore %fs and %gs. + */ + loadsegment(fs, next->fs); + loadsegment(gs, next->gs); + +#if 0 + /* + * Now maybe reload the debug registers + */ + if (next->debugreg[7]){ + loaddebug(next, 0); + loaddebug(next, 1); + loaddebug(next, 2); + loaddebug(next, 3); + /* no 4 and 5 */ + loaddebug(next, 6); + loaddebug(next, 7); + } +#endif +} + +asmlinkage int sys_fork(struct pt_regs regs) +{ + return do_fork(SIGCHLD, regs.esp, ®s, 0); +} + +asmlinkage int sys_clone(struct pt_regs regs) +{ + unsigned long clone_flags; + unsigned long newsp; + + clone_flags = regs.ebx; + newsp = regs.ecx; + if (!newsp) + newsp = regs.esp; + return do_fork(clone_flags, newsp, ®s, 0); +} + +/* + * This is trivial, and on the face of it looks like it + * could equally well be done in user mode. + * + * Not so, for quite unobvious reasons - register pressure. + * In user mode vfork() cannot have a stack frame, and if + * done by calling the "clone()" system call directly, you + * do not have enough call-clobbered registers to hold all + * the information you need. + */ +asmlinkage int sys_vfork(struct pt_regs regs) +{ + return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs.esp, ®s, 0); +} + +/* + * sys_execve() executes a new program. + */ +asmlinkage int sys_execve(struct pt_regs regs) +{ + int error; + char * filename; + + filename = getname((char *) regs.ebx); + error = PTR_ERR(filename); + if (IS_ERR(filename)) + goto out; + error = do_execve(filename, (char **) regs.ecx, (char **) regs.edx, ®s); + if (error == 0) + current->ptrace &= ~PT_DTRACE; + putname(filename); + out: + return error; +} + +/* + * These bracket the sleeping functions.. + */ +extern void scheduling_functions_start_here(void); +extern void scheduling_functions_end_here(void); +#define first_sched ((unsigned long) scheduling_functions_start_here) +#define last_sched ((unsigned long) scheduling_functions_end_here) + +unsigned long get_wchan(struct task_struct *p) +{ + unsigned long ebp, esp, eip; + unsigned long stack_page; + int count = 0; + if (!p || p == current || p->state == TASK_RUNNING) + return 0; + stack_page = (unsigned long)p; + esp = p->thread.esp; + if (!stack_page || esp < stack_page || esp > 8188+stack_page) + return 0; + /* include/asm-i386/system.h:switch_to() pushes ebp last. */ + ebp = *(unsigned long *) esp; + do { + if (ebp < stack_page || ebp > 8184+stack_page) + return 0; + eip = *(unsigned long *) (ebp+4); + if (eip < first_sched || eip >= last_sched) + return eip; + ebp = *(unsigned long *) ebp; + } while (count++ < 16); + return 0; +} +#undef last_sched +#undef first_sched diff --git a/xenolinux-2.4.16-sparse/arch/xeno/kernel/ptrace.c b/xenolinux-2.4.16-sparse/arch/xeno/kernel/ptrace.c new file mode 100644 index 0000000000..0fe86897fb --- /dev/null +++ b/xenolinux-2.4.16-sparse/arch/xeno/kernel/ptrace.c @@ -0,0 +1,463 @@ +/* ptrace.c */ +/* By Ross Biro 1/23/92 */ +/* + * Pentium III FXSR, SSE support + * Gareth Hughes , May 2000 + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +/* + * does not yet catch signals sent when the child dies. + * in exit.c or in signal.c. + */ + +/* determines which flags the user has access to. */ +/* 1 = access 0 = no access */ +#define FLAG_MASK 0x00044dd5 + +/* set's the trap flag. */ +#define TRAP_FLAG 0x100 + +/* + * Offset of eflags on child stack.. + */ +#define EFL_OFFSET ((EFL-2)*4-sizeof(struct pt_regs)) + +/* + * this routine will get a word off of the processes privileged stack. + * the offset is how far from the base addr as stored in the TSS. + * this routine assumes that all the privileged stacks are in our + * data space. + */ +static inline int get_stack_long(struct task_struct *task, int offset) +{ + unsigned char *stack; + + stack = (unsigned char *)task->thread.esp0; + stack += offset; + return (*((int *)stack)); +} + +/* + * this routine will put a word on the processes privileged stack. + * the offset is how far from the base addr as stored in the TSS. + * this routine assumes that all the privileged stacks are in our + * data space. + */ +static inline int put_stack_long(struct task_struct *task, int offset, + unsigned long data) +{ + unsigned char * stack; + + stack = (unsigned char *) task->thread.esp0; + stack += offset; + *(unsigned long *) stack = data; + return 0; +} + +static int putreg(struct task_struct *child, + unsigned long regno, unsigned long value) +{ + switch (regno >> 2) { + case FS: + if (value && (value & 3) != 3) + return -EIO; + child->thread.fs = value; + return 0; + case GS: + if (value && (value & 3) != 3) + return -EIO; + child->thread.gs = value; + return 0; + case DS: + case ES: + if (value && (value & 3) != 3) + return -EIO; + value &= 0xffff; + break; + case SS: + case CS: + if ((value & 3) != 3) + return -EIO; + value &= 0xffff; + break; + case EFL: + value &= FLAG_MASK; + value |= get_stack_long(child, EFL_OFFSET) & ~FLAG_MASK; + break; + } + if (regno > GS*4) + regno -= 2*4; + put_stack_long(child, regno - sizeof(struct pt_regs), value); + return 0; +} + +static unsigned long getreg(struct task_struct *child, + unsigned long regno) +{ + unsigned long retval = ~0UL; + + switch (regno >> 2) { + case FS: + retval = child->thread.fs; + break; + case GS: + retval = child->thread.gs; + break; + case DS: + case ES: + case SS: + case CS: + retval = 0xffff; + /* fall through */ + default: + if (regno > GS*4) + regno -= 2*4; + regno = regno - sizeof(struct pt_regs); + retval &= get_stack_long(child, regno); + } + return retval; +} + +/* + * Called by kernel/ptrace.c when detaching.. + * + * Make sure the single step bit is not set. + */ +void ptrace_disable(struct task_struct *child) +{ + long tmp; + + tmp = get_stack_long(child, EFL_OFFSET) & ~TRAP_FLAG; + put_stack_long(child, EFL_OFFSET, tmp); +} + +asmlinkage int sys_ptrace(long request, long pid, long addr, long data) +{ + struct task_struct *child; + struct user * dummy = NULL; + int i, ret; + + lock_kernel(); + ret = -EPERM; + if (request == PTRACE_TRACEME) { + /* are we already being traced? */ + if (current->ptrace & PT_PTRACED) + goto out; + /* set the ptrace bit in the process flags. */ + current->ptrace |= PT_PTRACED; + ret = 0; + goto out; + } + ret = -ESRCH; + read_lock(&tasklist_lock); + child = find_task_by_pid(pid); + if (child) + get_task_struct(child); + read_unlock(&tasklist_lock); + if (!child) + goto out; + + ret = -EPERM; + if (pid == 1) /* you may not mess with init */ + goto out_tsk; + + if (request == PTRACE_ATTACH) { + ret = ptrace_attach(child); + goto out_tsk; + } + + ret = ptrace_check_attach(child, request == PTRACE_KILL); + if (ret < 0) + goto out_tsk; + + switch (request) { + /* when I and D space are separate, these will need to be fixed. */ + case PTRACE_PEEKTEXT: /* read word at location addr. */ + case PTRACE_PEEKDATA: { + unsigned long tmp; + int copied; + + copied = access_process_vm(child, addr, &tmp, sizeof(tmp), 0); + ret = -EIO; + if (copied != sizeof(tmp)) + break; + ret = put_user(tmp,(unsigned long *) data); + break; + } + + /* read the word at location addr in the USER area. */ + case PTRACE_PEEKUSR: { + unsigned long tmp; + + ret = -EIO; + if ((addr & 3) || addr < 0 || + addr > sizeof(struct user) - 3) + break; + + tmp = 0; /* Default return condition */ + if(addr < FRAME_SIZE*sizeof(long)) + tmp = getreg(child, addr); + if(addr >= (long) &dummy->u_debugreg[0] && + addr <= (long) &dummy->u_debugreg[7]){ + addr -= (long) &dummy->u_debugreg[0]; + addr = addr >> 2; + tmp = child->thread.debugreg[addr]; + } + ret = put_user(tmp,(unsigned long *) data); + break; + } + + /* when I and D space are separate, this will have to be fixed. */ + case PTRACE_POKETEXT: /* write the word at location addr. */ + case PTRACE_POKEDATA: + ret = 0; + if (access_process_vm(child, addr, &data, sizeof(data), 1) == sizeof(data)) + break; + ret = -EIO; + break; + + case PTRACE_POKEUSR: /* write the word at location addr in the USER area */ + ret = -EIO; + if ((addr & 3) || addr < 0 || + addr > sizeof(struct user) - 3) + break; + + if (addr < FRAME_SIZE*sizeof(long)) { + ret = putreg(child, addr, data); + break; + } + /* We need to be very careful here. We implicitly + want to modify a portion of the task_struct, and we + have to be selective about what portions we allow someone + to modify. */ + + ret = -EIO; + if(addr >= (long) &dummy->u_debugreg[0] && + addr <= (long) &dummy->u_debugreg[7]){ + + if(addr == (long) &dummy->u_debugreg[4]) break; + if(addr == (long) &dummy->u_debugreg[5]) break; + if(addr < (long) &dummy->u_debugreg[4] && + ((unsigned long) data) >= TASK_SIZE-3) break; + + if(addr == (long) &dummy->u_debugreg[7]) { + data &= ~DR_CONTROL_RESERVED; + for(i=0; i<4; i++) + if ((0x5f54 >> ((data >> (16 + 4*i)) & 0xf)) & 1) + goto out_tsk; + } + + addr -= (long) &dummy->u_debugreg; + addr = addr >> 2; + child->thread.debugreg[addr] = data; + ret = 0; + } + break; + + case PTRACE_SYSCALL: /* continue and stop at next (return from) syscall */ + case PTRACE_CONT: { /* restart after signal. */ + long tmp; + + ret = -EIO; + if ((unsigned long) data > _NSIG) + break; + if (request == PTRACE_SYSCALL) + child->ptrace |= PT_TRACESYS; + else + child->ptrace &= ~PT_TRACESYS; + child->exit_code = data; + /* make sure the single step bit is not set. */ + tmp = get_stack_long(child, EFL_OFFSET) & ~TRAP_FLAG; + put_stack_long(child, EFL_OFFSET,tmp); + wake_up_process(child); + ret = 0; + break; + } + +/* + * make the child exit. Best I can do is send it a sigkill. + * perhaps it should be put in the status that it wants to + * exit. + */ + case PTRACE_KILL: { + long tmp; + + ret = 0; + if (child->state == TASK_ZOMBIE) /* already dead */ + break; + child->exit_code = SIGKILL; + /* make sure the single step bit is not set. */ + tmp = get_stack_long(child, EFL_OFFSET) & ~TRAP_FLAG; + put_stack_long(child, EFL_OFFSET, tmp); + wake_up_process(child); + break; + } + + case PTRACE_SINGLESTEP: { /* set the trap flag. */ + long tmp; + + ret = -EIO; + if ((unsigned long) data > _NSIG) + break; + child->ptrace &= ~PT_TRACESYS; + if ((child->ptrace & PT_DTRACE) == 0) { + /* Spurious delayed TF traps may occur */ + child->ptrace |= PT_DTRACE; + } + tmp = get_stack_long(child, EFL_OFFSET) | TRAP_FLAG; + put_stack_long(child, EFL_OFFSET, tmp); + child->exit_code = data; + /* give it a chance to run. */ + wake_up_process(child); + ret = 0; + break; + } + + case PTRACE_DETACH: + /* detach a process that was attached. */ + ret = ptrace_detach(child, data); + break; + + case PTRACE_GETREGS: { /* Get all gp regs from the child. */ + if (!access_ok(VERIFY_WRITE, (unsigned *)data, FRAME_SIZE*sizeof(long))) { + ret = -EIO; + break; + } + for ( i = 0; i < FRAME_SIZE*sizeof(long); i += sizeof(long) ) { + __put_user(getreg(child, i),(unsigned long *) data); + data += sizeof(long); + } + ret = 0; + break; + } + + case PTRACE_SETREGS: { /* Set all gp regs in the child. */ + unsigned long tmp; + if (!access_ok(VERIFY_READ, (unsigned *)data, FRAME_SIZE*sizeof(long))) { + ret = -EIO; + break; + } + for ( i = 0; i < FRAME_SIZE*sizeof(long); i += sizeof(long) ) { + __get_user(tmp, (unsigned long *) data); + putreg(child, i, tmp); + data += sizeof(long); + } + ret = 0; + break; + } + + case PTRACE_GETFPREGS: { /* Get the child FPU state. */ + if (!access_ok(VERIFY_WRITE, (unsigned *)data, + sizeof(struct user_i387_struct))) { + ret = -EIO; + break; + } + ret = 0; + if ( !child->used_math ) { + /* Simulate an empty FPU. */ + set_fpu_cwd(child, 0x037f); + set_fpu_swd(child, 0x0000); + set_fpu_twd(child, 0xffff); + } + get_fpregs((struct user_i387_struct *)data, child); + break; + } + + case PTRACE_SETFPREGS: { /* Set the child FPU state. */ + if (!access_ok(VERIFY_READ, (unsigned *)data, + sizeof(struct user_i387_struct))) { + ret = -EIO; + break; + } + child->used_math = 1; + set_fpregs(child, (struct user_i387_struct *)data); + ret = 0; + break; + } + + case PTRACE_GETFPXREGS: { /* Get the child extended FPU state. */ + if (!access_ok(VERIFY_WRITE, (unsigned *)data, + sizeof(struct user_fxsr_struct))) { + ret = -EIO; + break; + } + if ( !child->used_math ) { + /* Simulate an empty FPU. */ + set_fpu_cwd(child, 0x037f); + set_fpu_swd(child, 0x0000); + set_fpu_twd(child, 0xffff); + set_fpu_mxcsr(child, 0x1f80); + } + ret = get_fpxregs((struct user_fxsr_struct *)data, child); + break; + } + + case PTRACE_SETFPXREGS: { /* Set the child extended FPU state. */ + if (!access_ok(VERIFY_READ, (unsigned *)data, + sizeof(struct user_fxsr_struct))) { + ret = -EIO; + break; + } + child->used_math = 1; + ret = set_fpxregs(child, (struct user_fxsr_struct *)data); + break; + } + + case PTRACE_SETOPTIONS: { + if (data & PTRACE_O_TRACESYSGOOD) + child->ptrace |= PT_TRACESYSGOOD; + else + child->ptrace &= ~PT_TRACESYSGOOD; + ret = 0; + break; + } + + default: + ret = -EIO; + break; + } +out_tsk: + free_task_struct(child); +out: + unlock_kernel(); + return ret; +} + +asmlinkage void syscall_trace(void) +{ + if ((current->ptrace & (PT_PTRACED|PT_TRACESYS)) != + (PT_PTRACED|PT_TRACESYS)) + return; + /* the 0x80 provides a way for the tracing parent to distinguish + between a syscall stop and SIGTRAP delivery */ + current->exit_code = SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD) + ? 0x80 : 0); + current->state = TASK_STOPPED; + notify_parent(current, SIGCHLD); + schedule(); + /* + * this isn't the same as continuing with a signal, but it will do + * for normal use. strace only continues with a signal if the + * stopping signal is not SIGTRAP. -brl + */ + if (current->exit_code) { + send_sig(current->exit_code, current, 1); + current->exit_code = 0; + } +} diff --git a/xenolinux-2.4.16-sparse/arch/xeno/kernel/semaphore.c b/xenolinux-2.4.16-sparse/arch/xeno/kernel/semaphore.c new file mode 100644 index 0000000000..08ff686f1e --- /dev/null +++ b/xenolinux-2.4.16-sparse/arch/xeno/kernel/semaphore.c @@ -0,0 +1,292 @@ +/* + * i386 semaphore implementation. + * + * (C) Copyright 1999 Linus Torvalds + * + * Portions Copyright 1999 Red Hat, Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * rw semaphores implemented November 1999 by Benjamin LaHaise + */ +#include +#include +#include + +/* + * Semaphores are implemented using a two-way counter: + * The "count" variable is decremented for each process + * that tries to acquire the semaphore, while the "sleeping" + * variable is a count of such acquires. + * + * Notably, the inline "up()" and "down()" functions can + * efficiently test if they need to do any extra work (up + * needs to do something only if count was negative before + * the increment operation. + * + * "sleeping" and the contention routine ordering is + * protected by the semaphore spinlock. + * + * Note that these functions are only called when there is + * contention on the lock, and as such all this is the + * "non-critical" part of the whole semaphore business. The + * critical part is the inline stuff in + * where we want to avoid any extra jumps and calls. + */ + +/* + * Logic: + * - only on a boundary condition do we need to care. When we go + * from a negative count to a non-negative, we wake people up. + * - when we go from a non-negative count to a negative do we + * (a) synchronize with the "sleeper" count and (b) make sure + * that we're on the wakeup list before we synchronize so that + * we cannot lose wakeup events. + */ + +void __up(struct semaphore *sem) +{ + wake_up(&sem->wait); +} + +static spinlock_t semaphore_lock = SPIN_LOCK_UNLOCKED; + +void __down(struct semaphore * sem) +{ + struct task_struct *tsk = current; + DECLARE_WAITQUEUE(wait, tsk); + tsk->state = TASK_UNINTERRUPTIBLE; + add_wait_queue_exclusive(&sem->wait, &wait); + + spin_lock_irq(&semaphore_lock); + sem->sleepers++; + for (;;) { + int sleepers = sem->sleepers; + + /* + * Add "everybody else" into it. They aren't + * playing, because we own the spinlock. + */ + if (!atomic_add_negative(sleepers - 1, &sem->count)) { + sem->sleepers = 0; + break; + } + sem->sleepers = 1; /* us - see -1 above */ + spin_unlock_irq(&semaphore_lock); + + schedule(); + tsk->state = TASK_UNINTERRUPTIBLE; + spin_lock_irq(&semaphore_lock); + } + spin_unlock_irq(&semaphore_lock); + remove_wait_queue(&sem->wait, &wait); + tsk->state = TASK_RUNNING; + wake_up(&sem->wait); +} + +int __down_interruptible(struct semaphore * sem) +{ + int retval = 0; + struct task_struct *tsk = current; + DECLARE_WAITQUEUE(wait, tsk); + tsk->state = TASK_INTERRUPTIBLE; + add_wait_queue_exclusive(&sem->wait, &wait); + + spin_lock_irq(&semaphore_lock); + sem->sleepers ++; + for (;;) { + int sleepers = sem->sleepers; + + /* + * With signals pending, this turns into + * the trylock failure case - we won't be + * sleeping, and we* can't get the lock as + * it has contention. Just correct the count + * and exit. + */ + if (signal_pending(current)) { + retval = -EINTR; + sem->sleepers = 0; + atomic_add(sleepers, &sem->count); + break; + } + + /* + * Add "everybody else" into it. They aren't + * playing, because we own the spinlock. The + * "-1" is because we're still hoping to get + * the lock. + */ + if (!atomic_add_negative(sleepers - 1, &sem->count)) { + sem->sleepers = 0; + break; + } + sem->sleepers = 1; /* us - see -1 above */ + spin_unlock_irq(&semaphore_lock); + + schedule(); + tsk->state = TASK_INTERRUPTIBLE; + spin_lock_irq(&semaphore_lock); + } + spin_unlock_irq(&semaphore_lock); + tsk->state = TASK_RUNNING; + remove_wait_queue(&sem->wait, &wait); + wake_up(&sem->wait); + return retval; +} + +/* + * Trylock failed - make sure we correct for + * having decremented the count. + * + * We could have done the trylock with a + * single "cmpxchg" without failure cases, + * but then it wouldn't work on a 386. + */ +int __down_trylock(struct semaphore * sem) +{ + int sleepers; + unsigned long flags; + + spin_lock_irqsave(&semaphore_lock, flags); + sleepers = sem->sleepers + 1; + sem->sleepers = 0; + + /* + * Add "everybody else" and us into it. They aren't + * playing, because we own the spinlock. + */ + if (!atomic_add_negative(sleepers, &sem->count)) + wake_up(&sem->wait); + + spin_unlock_irqrestore(&semaphore_lock, flags); + return 1; +} + + +/* + * The semaphore operations have a special calling sequence that + * allow us to do a simpler in-line version of them. These routines + * need to convert that sequence back into the C sequence when + * there is contention on the semaphore. + * + * %ecx contains the semaphore pointer on entry. Save the C-clobbered + * registers (%eax, %edx and %ecx) except %eax when used as a return + * value.. + */ +asm( +".text\n" +".align 4\n" +".globl __down_failed\n" +"__down_failed:\n\t" +#if defined(CONFIG_FRAME_POINTER) + "pushl %ebp\n\t" + "movl %esp,%ebp\n\t" +#endif + "pushl %eax\n\t" + "pushl %edx\n\t" + "pushl %ecx\n\t" + "call __down\n\t" + "popl %ecx\n\t" + "popl %edx\n\t" + "popl %eax\n\t" +#if defined(CONFIG_FRAME_POINTER) + "movl %ebp,%esp\n\t" + "popl %ebp\n\t" +#endif + "ret" +); + +asm( +".text\n" +".align 4\n" +".globl __down_failed_interruptible\n" +"__down_failed_interruptible:\n\t" +#if defined(CONFIG_FRAME_POINTER) + "pushl %ebp\n\t" + "movl %esp,%ebp\n\t" +#endif + "pushl %edx\n\t" + "pushl %ecx\n\t" + "call __down_interruptible\n\t" + "popl %ecx\n\t" + "popl %edx\n\t" +#if defined(CONFIG_FRAME_POINTER) + "movl %ebp,%esp\n\t" + "popl %ebp\n\t" +#endif + "ret" +); + +asm( +".text\n" +".align 4\n" +".globl __down_failed_trylock\n" +"__down_failed_trylock:\n\t" +#if defined(CONFIG_FRAME_POINTER) + "pushl %ebp\n\t" + "movl %esp,%ebp\n\t" +#endif + "pushl %edx\n\t" + "pushl %ecx\n\t" + "call __down_trylock\n\t" + "popl %ecx\n\t" + "popl %edx\n\t" +#if defined(CONFIG_FRAME_POINTER) + "movl %ebp,%esp\n\t" + "popl %ebp\n\t" +#endif + "ret" +); + +asm( +".text\n" +".align 4\n" +".globl __up_wakeup\n" +"__up_wakeup:\n\t" + "pushl %eax\n\t" + "pushl %edx\n\t" + "pushl %ecx\n\t" + "call __up\n\t" + "popl %ecx\n\t" + "popl %edx\n\t" + "popl %eax\n\t" + "ret" +); + +/* + * rw spinlock fallbacks + */ +#if defined(CONFIG_SMP) +asm( +" +.align 4 +.globl __write_lock_failed +__write_lock_failed: + " LOCK "addl $" RW_LOCK_BIAS_STR ",(%eax) +1: rep; nop + cmpl $" RW_LOCK_BIAS_STR ",(%eax) + jne 1b + + " LOCK "subl $" RW_LOCK_BIAS_STR ",(%eax) + jnz __write_lock_failed + ret + + +.align 4 +.globl __read_lock_failed +__read_lock_failed: + lock ; incl (%eax) +1: rep; nop + cmpl $1,(%eax) + js 1b + + lock ; decl (%eax) + js __read_lock_failed + ret +" +); +#endif diff --git a/xenolinux-2.4.16-sparse/arch/xeno/kernel/setup.c b/xenolinux-2.4.16-sparse/arch/xeno/kernel/setup.c new file mode 100644 index 0000000000..3d4325f0e9 --- /dev/null +++ b/xenolinux-2.4.16-sparse/arch/xeno/kernel/setup.c @@ -0,0 +1,975 @@ +/* + * linux/arch/i386/kernel/setup.c + * + * Copyright (C) 1995 Linus Torvalds + */ + +/* + * This file handles the architecture-dependent parts of initialization + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef CONFIG_BLK_DEV_RAM +#include +#endif +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +shared_info_t *HYPERVISOR_shared_info; + +/* + * Machine setup.. + */ + +char ignore_irq13; /* set if exception 16 works */ +struct cpuinfo_x86 boot_cpu_data = { 0, 0, 0, 0, -1, 1, 0, 0, -1 }; + +unsigned long mmu_cr4_features; + +/* + * Bus types .. + */ +#ifdef CONFIG_EISA +int EISA_bus; +#endif +int MCA_bus; + +/* for MCA, but anyone else can use it if they want */ +unsigned int machine_id; +unsigned int machine_submodel_id; +unsigned int BIOS_revision; +unsigned int mca_pentium_flag; + +/* For PCI or other memory-mapped resources */ +unsigned long pci_mem_start = 0x10000000; + +/* + * Setup options + */ +struct drive_info_struct { char dummy[32]; } drive_info; +struct screen_info screen_info; +struct apm_info apm_info; +struct sys_desc_table_struct { + unsigned short length; + unsigned char table[0]; +}; + +unsigned char aux_device_present; + +extern int root_mountflags; +extern char _text, _etext, _edata, _end; + +int enable_acpi_smp_table; + +/* Raw start-of-day parameters from the hypervisor. */ +union start_info_union start_info_union; + +#define COMMAND_LINE_SIZE 256 +static char command_line[COMMAND_LINE_SIZE]; +char saved_command_line[COMMAND_LINE_SIZE]; + +static void __init parse_mem_cmdline (char ** cmdline_p) +{ + char c = ' ', *to = command_line, *from = saved_command_line; + int len = 0; + + /* Save unparsed command line copy for /proc/cmdline */ + memcpy(saved_command_line, start_info.cmd_line, COMMAND_LINE_SIZE); + saved_command_line[COMMAND_LINE_SIZE-1] = '\0'; + + for (;;) { + /* + * "mem=nopentium" disables the 4MB page tables. + * "mem=XXX[kKmM]" defines a memory region from HIGH_MEM + * to , overriding the bios size. + * "mem=XXX[KkmM]@XXX[KkmM]" defines a memory region from + * to +, overriding the bios size. + */ + if (c == ' ' && !memcmp(from, "mem=", 4)) { + if (to != command_line) + to--; + if (!memcmp(from+4, "nopentium", 9)) { + from += 9+4; + } else if (!memcmp(from+4, "exactmap", 8)) { + from += 8+4; + } else { + (void)memparse(from+4, &from); + if (*from == '@') + (void)memparse(from+1, &from); + } + } + + c = *(from++); + if (!c) + break; + if (COMMAND_LINE_SIZE <= ++len) + break; + *(to++) = c; + } + *to = '\0'; + *cmdline_p = command_line; +} + +void __init setup_arch(char **cmdline_p) +{ + unsigned long start_pfn, max_pfn, max_low_pfn; + unsigned long bootmap_size; + char str[256]; int strcnt; + + void hypervisor_callback(void); + void failsafe_callback(void); + + HYPERVISOR_shared_info->event_address = + (unsigned long)hypervisor_callback; + HYPERVISOR_shared_info->failsafe_address = + (unsigned long)failsafe_callback; + + ROOT_DEV = MKDEV(RAMDISK_MAJOR,0); + memset(&drive_info, 0, sizeof(drive_info)); + memset(&screen_info, 0, sizeof(screen_info)); + memset(&apm_info.bios, 0, sizeof(apm_info.bios)); + aux_device_present = 0; + +#ifdef CONFIG_BLK_DEV_RAM + rd_image_start = 0; + rd_prompt = 0; + rd_doload = 0; +#endif + + root_mountflags &= ~MS_RDONLY; + init_mm.start_code = (unsigned long) &_text; + init_mm.end_code = (unsigned long) &_etext; + init_mm.end_data = (unsigned long) &_edata; + init_mm.brk = (unsigned long) &_end; + + parse_mem_cmdline(cmdline_p); + +#define PFN_UP(x) (((x) + PAGE_SIZE-1) >> PAGE_SHIFT) +#define PFN_DOWN(x) ((x) >> PAGE_SHIFT) +#define PFN_PHYS(x) ((x) << PAGE_SHIFT) + +/* + * 128MB for vmalloc and initrd + */ +#define VMALLOC_RESERVE (unsigned long)(128 << 20) +#define MAXMEM (unsigned long)(-PAGE_OFFSET-VMALLOC_RESERVE) +#define MAXMEM_PFN PFN_DOWN(MAXMEM) +#define MAX_NONPAE_PFN (1 << 20) + + /* + * partially used pages are not usable - thus + * we are rounding upwards: + */ +#ifdef CONFIG_BLK_DEV_INITRD + if ( start_info.mod_start ) + start_pfn = PFN_UP(__pa(start_info.mod_start + start_info.mod_len)); + else +#endif + start_pfn = PFN_UP(__pa(&_end)); + max_pfn = start_info.nr_pages; + + /* + * Determine low and high memory ranges: + */ + max_low_pfn = max_pfn; + if (max_low_pfn > MAXMEM_PFN) { + max_low_pfn = MAXMEM_PFN; +#ifndef CONFIG_HIGHMEM + /* Maximum memory usable is what is directly addressable */ + printk(KERN_WARNING "Warning only %ldMB will be used.\n", + MAXMEM>>20); + if (max_pfn > MAX_NONPAE_PFN) + printk(KERN_WARNING "Use a PAE enabled kernel.\n"); + else + printk(KERN_WARNING "Use a HIGHMEM enabled kernel.\n"); +#else /* !CONFIG_HIGHMEM */ +#ifndef CONFIG_X86_PAE + if (max_pfn > MAX_NONPAE_PFN) { + max_pfn = MAX_NONPAE_PFN; + printk(KERN_WARNING "Warning only 4GB will be used.\n"); + printk(KERN_WARNING "Use a PAE enabled kernel.\n"); + } +#endif /* !CONFIG_X86_PAE */ +#endif /* !CONFIG_HIGHMEM */ + } + +#ifdef CONFIG_HIGHMEM + highstart_pfn = highend_pfn = max_pfn; + if (max_pfn > MAXMEM_PFN) { + highstart_pfn = MAXMEM_PFN; + printk(KERN_NOTICE "%ldMB HIGHMEM available.\n", + pages_to_mb(highend_pfn - highstart_pfn)); + } +#endif + + /* + * Initialize the boot-time allocator, and free up all RAM. + * Then reserve space for OS image, and the bootmem bitmap. + */ + bootmap_size = init_bootmem(start_pfn, max_low_pfn); + free_bootmem(0, PFN_PHYS(max_pfn)); + reserve_bootmem(0, PFN_PHYS(start_pfn) + bootmap_size + PAGE_SIZE-1); + + /* Now reserve space for the hypervisor-provided page tables. */ + { + unsigned long *pgd = (unsigned long *)start_info.pt_base; + unsigned long pte; + int i; + reserve_bootmem(__pa(pgd), PAGE_SIZE); + for ( i = 0; i < (0xE0000000UL>>22); i++ ) + { + unsigned long pgde = *pgd++; + if ( !(pgde & 1) ) continue; + pte = (pgde & PAGE_MASK) - start_info.phys_base; + reserve_bootmem(pte, PAGE_SIZE); + } + } + cur_pgd = init_mm.pgd = (pgd_t *)start_info.pt_base; + +#ifdef CONFIG_BLK_DEV_INITRD + if (start_info.mod_start) { + if ((__pa(start_info.mod_start) + start_info.mod_len) <= + (max_low_pfn << PAGE_SHIFT)) { + initrd_start = start_info.mod_start; + initrd_end = initrd_start + start_info.mod_len; + initrd_below_start_ok = 1; + } + else { + printk(KERN_ERR "initrd extends beyond end of memory " + "(0x%08lx > 0x%08lx)\ndisabling initrd\n", + __pa(start_info.mod_start) + start_info.mod_len, + max_low_pfn << PAGE_SHIFT); + initrd_start = 0; + } + } +#endif + + paging_init(); +} + +static int cachesize_override __initdata = -1; +static int __init cachesize_setup(char *str) +{ + get_option (&str, &cachesize_override); + return 1; +} +__setup("cachesize=", cachesize_setup); + + +static int __init get_model_name(struct cpuinfo_x86 *c) +{ + unsigned int *v; + char *p, *q; + + if (cpuid_eax(0x80000000) < 0x80000004) + return 0; + + v = (unsigned int *) c->x86_model_id; + cpuid(0x80000002, &v[0], &v[1], &v[2], &v[3]); + cpuid(0x80000003, &v[4], &v[5], &v[6], &v[7]); + cpuid(0x80000004, &v[8], &v[9], &v[10], &v[11]); + c->x86_model_id[48] = 0; + + /* Intel chips right-justify this string for some dumb reason; + undo that brain damage */ + p = q = &c->x86_model_id[0]; + while ( *p == ' ' ) + p++; + if ( p != q ) { + while ( *p ) + *q++ = *p++; + while ( q <= &c->x86_model_id[48] ) + *q++ = '\0'; /* Zero-pad the rest */ + } + + return 1; +} + + +static void __init display_cacheinfo(struct cpuinfo_x86 *c) +{ + unsigned int n, dummy, ecx, edx, l2size; + + n = cpuid_eax(0x80000000); + + if (n >= 0x80000005) { + cpuid(0x80000005, &dummy, &dummy, &ecx, &edx); + printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), D cache %dK (%d bytes/line)\n", + edx>>24, edx&0xFF, ecx>>24, ecx&0xFF); + c->x86_cache_size=(ecx>>24)+(edx>>24); + } + + if (n < 0x80000006) /* Some chips just has a large L1. */ + return; + + ecx = cpuid_ecx(0x80000006); + l2size = ecx >> 16; + + /* AMD errata T13 (order #21922) */ + if ((c->x86_vendor == X86_VENDOR_AMD) && (c->x86 == 6)) { + if (c->x86_model == 3 && c->x86_mask == 0) /* Duron Rev A0 */ + l2size = 64; + if (c->x86_model == 4 && + (c->x86_mask==0 || c->x86_mask==1)) /* Tbird rev A1/A2 */ + l2size = 256; + } + + /* Intel PIII Tualatin. This comes in two flavours. + * One has 256kb of cache, the other 512. We have no way + * to determine which, so we use a boottime override + * for the 512kb model, and assume 256 otherwise. + */ + if ((c->x86_vendor == X86_VENDOR_INTEL) && (c->x86 == 6) && + (c->x86_model == 11) && (l2size == 0)) + l2size = 256; + + /* VIA C3 CPUs (670-68F) need further shifting. */ + if (c->x86_vendor == X86_VENDOR_CENTAUR && (c->x86 == 6) && + ((c->x86_model == 7) || (c->x86_model == 8))) { + l2size = l2size >> 8; + } + + /* Allow user to override all this if necessary. */ + if (cachesize_override != -1) + l2size = cachesize_override; + + if ( l2size == 0 ) + return; /* Again, no L2 cache is possible */ + + c->x86_cache_size = l2size; + + printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n", + l2size, ecx & 0xFF); +} + + +static int __init init_amd(struct cpuinfo_x86 *c) +{ + int r; + + /* Bit 31 in normal CPUID used for nonstandard 3DNow ID; + 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */ + clear_bit(0*32+31, &c->x86_capability); + + r = get_model_name(c); + + switch(c->x86) + { + case 6: /* An Athlon/Duron. We can trust the BIOS probably */ + break; + default: + panic("Unsupported AMD processor\n"); + } + + display_cacheinfo(c); + return r; +} + + +static void __init init_intel(struct cpuinfo_x86 *c) +{ + char *p = NULL; + unsigned int l1i = 0, l1d = 0, l2 = 0, l3 = 0; /* Cache sizes */ + + if (c->cpuid_level > 1) { + /* supports eax=2 call */ + int i, j, n; + int regs[4]; + unsigned char *dp = (unsigned char *)regs; + + /* Number of times to iterate */ + n = cpuid_eax(2) & 0xFF; + + for ( i = 0 ; i < n ; i++ ) { + cpuid(2, ®s[0], ®s[1], ®s[2], ®s[3]); + + /* If bit 31 is set, this is an unknown format */ + for ( j = 0 ; j < 3 ; j++ ) { + if ( regs[j] < 0 ) regs[j] = 0; + } + + /* Byte 0 is level count, not a descriptor */ + for ( j = 1 ; j < 16 ; j++ ) { + unsigned char des = dp[j]; + unsigned char dl, dh; + unsigned int cs; + + dh = des >> 4; + dl = des & 0x0F; + + /* Black magic... */ + + switch ( dh ) + { + case 0: + switch ( dl ) { + case 6: + /* L1 I cache */ + l1i += 8; + break; + case 8: + /* L1 I cache */ + l1i += 16; + break; + case 10: + /* L1 D cache */ + l1d += 8; + break; + case 12: + /* L1 D cache */ + l1d += 16; + break; + default:; + /* TLB, or unknown */ + } + break; + case 2: + if ( dl ) { + /* L3 cache */ + cs = (dl-1) << 9; + l3 += cs; + } + break; + case 4: + if ( c->x86 > 6 && dl ) { + /* P4 family */ + /* L3 cache */ + cs = 128 << (dl-1); + l3 += cs; + break; + } + /* else same as 8 - fall through */ + case 8: + if ( dl ) { + /* L2 cache */ + cs = 128 << (dl-1); + l2 += cs; + } + break; + case 6: + if (dl > 5) { + /* L1 D cache */ + cs = 8<<(dl-6); + l1d += cs; + } + break; + case 7: + if ( dl >= 8 ) + { + /* L2 cache */ + cs = 64<<(dl-8); + l2 += cs; + } else { + /* L0 I cache, count as L1 */ + cs = dl ? (16 << (dl-1)) : 12; + l1i += cs; + } + break; + default: + /* TLB, or something else we don't know about */ + break; + } + } + } + if ( l1i || l1d ) + printk(KERN_INFO "CPU: L1 I cache: %dK, L1 D cache: %dK\n", + l1i, l1d); + if ( l2 ) + printk(KERN_INFO "CPU: L2 cache: %dK\n", l2); + if ( l3 ) + printk(KERN_INFO "CPU: L3 cache: %dK\n", l3); + + /* + * This assumes the L3 cache is shared; it typically lives in + * the northbridge. The L1 caches are included by the L2 + * cache, and so should not be included for the purpose of + * SMP switching weights. + */ + c->x86_cache_size = l2 ? l2 : (l1i+l1d); + } + + /* SEP CPUID bug: Pentium Pro reports SEP but doesn't have it */ + if ( c->x86 == 6 && c->x86_model < 3 && c->x86_mask < 3 ) + clear_bit(X86_FEATURE_SEP, &c->x86_capability); + + /* Names for the Pentium II/Celeron processors + detectable only by also checking the cache size. + Dixon is NOT a Celeron. */ + if (c->x86 == 6) { + switch (c->x86_model) { + case 5: + if (l2 == 0) + p = "Celeron (Covington)"; + if (l2 == 256) + p = "Mobile Pentium II (Dixon)"; + break; + + case 6: + if (l2 == 128) + p = "Celeron (Mendocino)"; + break; + + case 8: + if (l2 == 128) + p = "Celeron (Coppermine)"; + break; + } + } + + if ( p ) + strcpy(c->x86_model_id, p); +} + +void __init get_cpu_vendor(struct cpuinfo_x86 *c) +{ + char *v = c->x86_vendor_id; + + if (!strcmp(v, "GenuineIntel")) + c->x86_vendor = X86_VENDOR_INTEL; + else if (!strcmp(v, "AuthenticAMD")) + c->x86_vendor = X86_VENDOR_AMD; + else + c->x86_vendor = X86_VENDOR_UNKNOWN; +} + +struct cpu_model_info { + int vendor; + int family; + char *model_names[16]; +}; + +/* Naming convention should be: [()] */ +/* This table only is used unless init_() below doesn't set it; */ +/* in particular, if CPUID levels 0x80000002..4 are supported, this isn't used */ +static struct cpu_model_info cpu_models[] __initdata = { + { X86_VENDOR_INTEL, 6, + { "Pentium Pro A-step", "Pentium Pro", NULL, "Pentium II (Klamath)", + NULL, "Pentium II (Deschutes)", "Mobile Pentium II", + "Pentium III (Katmai)", "Pentium III (Coppermine)", NULL, + "Pentium III (Cascades)", NULL, NULL, NULL, NULL }}, + { X86_VENDOR_AMD, 6, /* Is this this really necessary?? */ + { "Athlon", "Athlon", + "Athlon", NULL, "Athlon", NULL, + NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL }} +}; + +/* Look up CPU names by table lookup. */ +static char __init *table_lookup_model(struct cpuinfo_x86 *c) +{ + struct cpu_model_info *info = cpu_models; + int i; + + if ( c->x86_model >= 16 ) + return NULL; /* Range check */ + + for ( i = 0 ; i < sizeof(cpu_models)/sizeof(struct cpu_model_info) ; i++ ) { + if ( info->vendor == c->x86_vendor && + info->family == c->x86 ) { + return info->model_names[c->x86_model]; + } + info++; + } + return NULL; /* Not found */ +} + + + +/* Standard macro to see if a specific flag is changeable */ +static inline int flag_is_changeable_p(u32 flag) +{ + u32 f1, f2; + + asm("pushfl\n\t" + "pushfl\n\t" + "popl %0\n\t" + "movl %0,%1\n\t" + "xorl %2,%0\n\t" + "pushl %0\n\t" + "popfl\n\t" + "pushfl\n\t" + "popl %0\n\t" + "popfl\n\t" + : "=&r" (f1), "=&r" (f2) + : "ir" (flag)); + + return ((f1^f2) & flag) != 0; +} + + +/* Probe for the CPUID instruction */ +static int __init have_cpuid_p(void) +{ + return flag_is_changeable_p(X86_EFLAGS_ID); +} + + + +/* + * This does the hard work of actually picking apart the CPU stuff... + */ +void __init identify_cpu(struct cpuinfo_x86 *c) +{ + int junk, i; + u32 xlvl, tfms; + + c->loops_per_jiffy = loops_per_jiffy; + c->x86_cache_size = -1; + c->x86_vendor = X86_VENDOR_UNKNOWN; + c->cpuid_level = -1; /* CPUID not detected */ + c->x86_model = c->x86_mask = 0; /* So far unknown... */ + c->x86_vendor_id[0] = '\0'; /* Unset */ + c->x86_model_id[0] = '\0'; /* Unset */ + memset(&c->x86_capability, 0, sizeof c->x86_capability); + c->hard_math = 1; + + if ( !have_cpuid_p() ) { + panic("Processor must support CPUID\n"); + } else { + /* CPU does have CPUID */ + + /* Get vendor name */ + cpuid(0x00000000, &c->cpuid_level, + (int *)&c->x86_vendor_id[0], + (int *)&c->x86_vendor_id[8], + (int *)&c->x86_vendor_id[4]); + + get_cpu_vendor(c); + /* Initialize the standard set of capabilities */ + /* Note that the vendor-specific code below might override */ + + /* Intel-defined flags: level 0x00000001 */ + if ( c->cpuid_level >= 0x00000001 ) { + cpuid(0x00000001, &tfms, &junk, &junk, + &c->x86_capability[0]); + c->x86 = (tfms >> 8) & 15; + c->x86_model = (tfms >> 4) & 15; + c->x86_mask = tfms & 15; + } else { + /* Have CPUID level 0 only - unheard of */ + c->x86 = 4; + } + + /* AMD-defined flags: level 0x80000001 */ + xlvl = cpuid_eax(0x80000000); + if ( (xlvl & 0xffff0000) == 0x80000000 ) { + if ( xlvl >= 0x80000001 ) + c->x86_capability[1] = cpuid_edx(0x80000001); + if ( xlvl >= 0x80000004 ) + get_model_name(c); /* Default name */ + } + + /* Transmeta-defined flags: level 0x80860001 */ + xlvl = cpuid_eax(0x80860000); + if ( (xlvl & 0xffff0000) == 0x80860000 ) { + if ( xlvl >= 0x80860001 ) + c->x86_capability[2] = cpuid_edx(0x80860001); + } + } + + printk(KERN_DEBUG "CPU: Before vendor init, caps: %08x %08x %08x, vendor = %d\n", + c->x86_capability[0], + c->x86_capability[1], + c->x86_capability[2], + c->x86_vendor); + + /* + * Vendor-specific initialization. In this section we + * canonicalize the feature flags, meaning if there are + * features a certain CPU supports which CPUID doesn't + * tell us, CPUID claiming incorrect flags, or other bugs, + * we handle them here. + * + * At the end of this section, c->x86_capability better + * indicate the features this CPU genuinely supports! + */ + switch ( c->x86_vendor ) { + case X86_VENDOR_AMD: + init_amd(c); + break; + + case X86_VENDOR_INTEL: + init_intel(c); + break; + + default: + panic("Unsupported CPU vendor\n"); + } + + printk(KERN_DEBUG "CPU: After vendor init, caps: %08x %08x %08x %08x\n", + c->x86_capability[0], + c->x86_capability[1], + c->x86_capability[2], + c->x86_capability[3]); + + + /* If the model name is still unset, do table lookup. */ + if ( !c->x86_model_id[0] ) { + char *p; + p = table_lookup_model(c); + if ( p ) + strcpy(c->x86_model_id, p); + else + /* Last resort... */ + sprintf(c->x86_model_id, "%02x/%02x", + c->x86_vendor, c->x86_model); + } + + /* Now the feature flags better reflect actual CPU features! */ + + printk(KERN_DEBUG "CPU: After generic, caps: %08x %08x %08x %08x\n", + c->x86_capability[0], + c->x86_capability[1], + c->x86_capability[2], + c->x86_capability[3]); + + /* + * On SMP, boot_cpu_data holds the common feature set between + * all CPUs; so make sure that we indicate which features are + * common between the CPUs. The first time this routine gets + * executed, c == &boot_cpu_data. + */ + if ( c != &boot_cpu_data ) { + /* AND the already accumulated flags with these */ + for ( i = 0 ; i < NCAPINTS ; i++ ) + boot_cpu_data.x86_capability[i] &= c->x86_capability[i]; + } + + printk(KERN_DEBUG "CPU: Common caps: %08x %08x %08x %08x\n", + boot_cpu_data.x86_capability[0], + boot_cpu_data.x86_capability[1], + boot_cpu_data.x86_capability[2], + boot_cpu_data.x86_capability[3]); +} + + +/* These need to match */ +static char *cpu_vendor_names[] __initdata = { + "Intel", "Cyrix", "AMD", "UMC", "NexGen", "Centaur", "Rise", "Transmeta" }; + + +void __init print_cpu_info(struct cpuinfo_x86 *c) +{ + char *vendor = NULL; + + if (c->x86_vendor < sizeof(cpu_vendor_names)/sizeof(char *)) + vendor = cpu_vendor_names[c->x86_vendor]; + else if (c->cpuid_level >= 0) + vendor = c->x86_vendor_id; + + if (vendor && strncmp(c->x86_model_id, vendor, strlen(vendor))) + printk("%s ", vendor); + + if (!c->x86_model_id[0]) + printk("%d86", c->x86); + else + printk("%s", c->x86_model_id); + + if (c->x86_mask || c->cpuid_level >= 0) + printk(" stepping %02x\n", c->x86_mask); + else + printk("\n"); +} + +/* + * Get CPU information for use by the procfs. + */ +static int show_cpuinfo(struct seq_file *m, void *v) +{ + /* + * These flag bits must match the definitions in . + * NULL means this bit is undefined or reserved; either way it doesn't + * have meaning as far as Linux is concerned. Note that it's important + * to realize there is a difference between this table and CPUID -- if + * applications want to get the raw CPUID data, they should access + * /dev/cpu//cpuid instead. + */ + static char *x86_cap_flags[] = { + /* Intel-defined */ + "fpu", "vme", "de", "pse", "tsc", "msr", "pae", "mce", + "cx8", "apic", NULL, "sep", "mtrr", "pge", "mca", "cmov", + "pat", "pse36", "pn", "clflush", NULL, "dts", "acpi", "mmx", + "fxsr", "sse", "sse2", "ss", NULL, "tm", "ia64", NULL, + + /* AMD-defined */ + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, "syscall", NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, "mmxext", NULL, + NULL, NULL, NULL, NULL, NULL, "lm", "3dnowext", "3dnow", + + /* Transmeta-defined */ + "recovery", "longrun", NULL, "lrti", NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + + /* Other (Linux-defined) */ + "cxmmx", "k6_mtrr", "cyrix_arr", "centaur_mcr", NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + }; + struct cpuinfo_x86 *c = v; + int i, n = c - cpu_data; + int fpu_exception; + +#ifdef CONFIG_SMP + if (!(cpu_online_map & (1<x86_vendor_id[0] ? c->x86_vendor_id : "unknown", + c->x86, + c->x86_model, + c->x86_model_id[0] ? c->x86_model_id : "unknown"); + + if (c->x86_mask || c->cpuid_level >= 0) + seq_printf(m, "stepping\t: %d\n", c->x86_mask); + else + seq_printf(m, "stepping\t: unknown\n"); + + if ( test_bit(X86_FEATURE_TSC, &c->x86_capability) ) { + seq_printf(m, "cpu MHz\t\t: %lu.%03lu\n", + cpu_khz / 1000, (cpu_khz % 1000)); + } + + /* Cache size */ + if (c->x86_cache_size >= 0) + seq_printf(m, "cache size\t: %d KB\n", c->x86_cache_size); + + /* We use exception 16 if we have hardware math and we've either seen it or the CPU claims it is internal */ + fpu_exception = c->hard_math && (ignore_irq13 || cpu_has_fpu); + seq_printf(m, "fdiv_bug\t: %s\n" + "hlt_bug\t\t: %s\n" + "f00f_bug\t: %s\n" + "coma_bug\t: %s\n" + "fpu\t\t: %s\n" + "fpu_exception\t: %s\n" + "cpuid level\t: %d\n" + "wp\t\t: %s\n" + "flags\t\t:", + c->fdiv_bug ? "yes" : "no", + c->hlt_works_ok ? "no" : "yes", + c->f00f_bug ? "yes" : "no", + c->coma_bug ? "yes" : "no", + c->hard_math ? "yes" : "no", + fpu_exception ? "yes" : "no", + c->cpuid_level, + c->wp_works_ok ? "yes" : "no"); + + for ( i = 0 ; i < 32*NCAPINTS ; i++ ) + if ( test_bit(i, &c->x86_capability) && + x86_cap_flags[i] != NULL ) + seq_printf(m, " %s", x86_cap_flags[i]); + + seq_printf(m, "\nbogomips\t: %lu.%02lu\n\n", + c->loops_per_jiffy/(500000/HZ), + (c->loops_per_jiffy/(5000/HZ)) % 100); + return 0; +} + +static void *c_start(struct seq_file *m, loff_t *pos) +{ + return *pos < NR_CPUS ? cpu_data + *pos : NULL; +} +static void *c_next(struct seq_file *m, void *v, loff_t *pos) +{ + ++*pos; + return c_start(m, pos); +} +static void c_stop(struct seq_file *m, void *v) +{ +} +struct seq_operations cpuinfo_op = { + start: c_start, + next: c_next, + stop: c_stop, + show: show_cpuinfo, +}; + +unsigned long cpu_initialized __initdata = 0; + +/* + * cpu_init() initializes state that is per-CPU. Some data is already + * initialized (naturally) in the bootstrap process, such as the GDT + * and IDT. We reload them nevertheless, this function acts as a + * 'CPU state barrier', nothing should get across. + */ +void __init cpu_init (void) +{ + int nr = smp_processor_id(); + + if (test_and_set_bit(nr, &cpu_initialized)) { + printk(KERN_WARNING "CPU#%d already initialized!\n", nr); + for (;;) __sti(); + } + printk(KERN_INFO "Initializing CPU#%d\n", nr); + + /* + * set up and load the per-CPU TSS and LDT + */ + atomic_inc(&init_mm.mm_count); + current->active_mm = &init_mm; + if(current->mm) + BUG(); + enter_lazy_tlb(&init_mm, current, nr); + + HYPERVISOR_set_guest_stack(__KERNEL_DS, current->thread.esp0); + + /* Force FPU initialization. */ + current->flags &= ~PF_USEDFPU; + current->used_math = 0; + stts(); +} + + +/****************************************************************************** + * Time-to-die callback handling. + */ + +static void time_to_die(int irq, void *unused, struct pt_regs *regs) +{ + extern void ctrl_alt_del(void); + ctrl_alt_del(); +} + +static int __init setup_death_event(void) +{ + (void)request_irq(_EVENT_DIE, time_to_die, 0, "die", NULL); +} + +__initcall(setup_death_event); diff --git a/xenolinux-2.4.16-sparse/arch/xeno/kernel/signal.c b/xenolinux-2.4.16-sparse/arch/xeno/kernel/signal.c new file mode 100644 index 0000000000..c0f43e6342 --- /dev/null +++ b/xenolinux-2.4.16-sparse/arch/xeno/kernel/signal.c @@ -0,0 +1,712 @@ +/* + * linux/arch/i386/kernel/signal.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + * + * 1997-11-28 Modified for POSIX.1b signals by Richard Henderson + * 2000-06-20 Pentium III FXSR, SSE support by Gareth Hughes + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define DEBUG_SIG 0 + +#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) + +int FASTCALL(do_signal(struct pt_regs *regs, sigset_t *oldset)); + +int copy_siginfo_to_user(siginfo_t *to, siginfo_t *from) +{ + if (!access_ok (VERIFY_WRITE, to, sizeof(siginfo_t))) + return -EFAULT; + if (from->si_code < 0) + return __copy_to_user(to, from, sizeof(siginfo_t)); + else { + int err; + + /* If you change siginfo_t structure, please be sure + this code is fixed accordingly. + It should never copy any pad contained in the structure + to avoid security leaks, but must copy the generic + 3 ints plus the relevant union member. */ + err = __put_user(from->si_signo, &to->si_signo); + err |= __put_user(from->si_errno, &to->si_errno); + err |= __put_user((short)from->si_code, &to->si_code); + /* First 32bits of unions are always present. */ + err |= __put_user(from->si_pid, &to->si_pid); + switch (from->si_code >> 16) { + case __SI_FAULT >> 16: + break; + case __SI_CHLD >> 16: + err |= __put_user(from->si_utime, &to->si_utime); + err |= __put_user(from->si_stime, &to->si_stime); + err |= __put_user(from->si_status, &to->si_status); + default: + err |= __put_user(from->si_uid, &to->si_uid); + break; + /* case __SI_RT: This is not generated by the kernel as of now. */ + } + return err; + } +} + +/* + * Atomically swap in the new signal mask, and wait for a signal. + */ +asmlinkage int +sys_sigsuspend(int history0, int history1, old_sigset_t mask) +{ + struct pt_regs * regs = (struct pt_regs *) &history0; + sigset_t saveset; + + mask &= _BLOCKABLE; + spin_lock_irq(¤t->sigmask_lock); + saveset = current->blocked; + siginitset(¤t->blocked, mask); + recalc_sigpending(current); + spin_unlock_irq(¤t->sigmask_lock); + + regs->eax = -EINTR; + while (1) { + current->state = TASK_INTERRUPTIBLE; + schedule(); + if (do_signal(regs, &saveset)) + return -EINTR; + } +} + +asmlinkage int +sys_rt_sigsuspend(sigset_t *unewset, size_t sigsetsize) +{ + struct pt_regs * regs = (struct pt_regs *) &unewset; + sigset_t saveset, newset; + + /* XXX: Don't preclude handling different sized sigset_t's. */ + if (sigsetsize != sizeof(sigset_t)) + return -EINVAL; + + if (copy_from_user(&newset, unewset, sizeof(newset))) + return -EFAULT; + sigdelsetmask(&newset, ~_BLOCKABLE); + + spin_lock_irq(¤t->sigmask_lock); + saveset = current->blocked; + current->blocked = newset; + recalc_sigpending(current); + spin_unlock_irq(¤t->sigmask_lock); + + regs->eax = -EINTR; + while (1) { + current->state = TASK_INTERRUPTIBLE; + schedule(); + if (do_signal(regs, &saveset)) + return -EINTR; + } +} + +asmlinkage int +sys_sigaction(int sig, const struct old_sigaction *act, + struct old_sigaction *oact) +{ + struct k_sigaction new_ka, old_ka; + int ret; + + if (act) { + old_sigset_t mask; + if (verify_area(VERIFY_READ, act, sizeof(*act)) || + __get_user(new_ka.sa.sa_handler, &act->sa_handler) || + __get_user(new_ka.sa.sa_restorer, &act->sa_restorer)) + return -EFAULT; + __get_user(new_ka.sa.sa_flags, &act->sa_flags); + __get_user(mask, &act->sa_mask); + siginitset(&new_ka.sa.sa_mask, mask); + } + + ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL); + + if (!ret && oact) { + if (verify_area(VERIFY_WRITE, oact, sizeof(*oact)) || + __put_user(old_ka.sa.sa_handler, &oact->sa_handler) || + __put_user(old_ka.sa.sa_restorer, &oact->sa_restorer)) + return -EFAULT; + __put_user(old_ka.sa.sa_flags, &oact->sa_flags); + __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask); + } + + return ret; +} + +asmlinkage int +sys_sigaltstack(const stack_t *uss, stack_t *uoss) +{ + struct pt_regs *regs = (struct pt_regs *) &uss; + return do_sigaltstack(uss, uoss, regs->esp); +} + + +/* + * Do a signal return; undo the signal stack. + */ + +struct sigframe +{ + char *pretcode; + int sig; + struct sigcontext sc; + struct _fpstate fpstate; + unsigned long extramask[_NSIG_WORDS-1]; + char retcode[8]; +}; + +struct rt_sigframe +{ + char *pretcode; + int sig; + struct siginfo *pinfo; + void *puc; + struct siginfo info; + struct ucontext uc; + struct _fpstate fpstate; + char retcode[8]; +}; + +static int +restore_sigcontext(struct pt_regs *regs, struct sigcontext *sc, int *peax) +{ + unsigned int err = 0; + +#define COPY(x) err |= __get_user(regs->x, &sc->x) + +#define COPY_SEG(seg) \ + { unsigned short tmp; \ + err |= __get_user(tmp, &sc->seg); \ + regs->x##seg = tmp; } + +#define COPY_SEG_STRICT(seg) \ + { unsigned short tmp; \ + err |= __get_user(tmp, &sc->seg); \ + regs->x##seg = tmp|3; } + +#define GET_SEG(seg) \ + { unsigned short tmp; \ + err |= __get_user(tmp, &sc->seg); \ + loadsegment(seg,tmp); } + + GET_SEG(gs); + GET_SEG(fs); + COPY_SEG(es); + COPY_SEG(ds); + COPY(edi); + COPY(esi); + COPY(ebp); + COPY(esp); + COPY(ebx); + COPY(edx); + COPY(ecx); + COPY(eip); + COPY_SEG_STRICT(cs); + COPY_SEG_STRICT(ss); + + { + unsigned int tmpflags; + err |= __get_user(tmpflags, &sc->eflags); + regs->eflags = (regs->eflags & ~0x40DD5) | (tmpflags & 0x40DD5); + regs->orig_eax = -1; /* disable syscall checks */ + } + + { + struct _fpstate * buf; + err |= __get_user(buf, &sc->fpstate); + if (buf) { + if (verify_area(VERIFY_READ, buf, sizeof(*buf))) + goto badframe; + err |= restore_i387(buf); + } + } + + err |= __get_user(*peax, &sc->eax); + return err; + +badframe: + return 1; +} + +asmlinkage int sys_sigreturn(unsigned long __unused) +{ + struct pt_regs *regs = (struct pt_regs *) &__unused; + struct sigframe *frame = (struct sigframe *)(regs->esp - 8); + sigset_t set; + int eax; + + if (verify_area(VERIFY_READ, frame, sizeof(*frame))) + goto badframe; + if (__get_user(set.sig[0], &frame->sc.oldmask) + || (_NSIG_WORDS > 1 + && __copy_from_user(&set.sig[1], &frame->extramask, + sizeof(frame->extramask)))) + goto badframe; + + sigdelsetmask(&set, ~_BLOCKABLE); + spin_lock_irq(¤t->sigmask_lock); + current->blocked = set; + recalc_sigpending(current); + spin_unlock_irq(¤t->sigmask_lock); + + if (restore_sigcontext(regs, &frame->sc, &eax)) + goto badframe; + return eax; + +badframe: + force_sig(SIGSEGV, current); + return 0; +} + +asmlinkage int sys_rt_sigreturn(unsigned long __unused) +{ + struct pt_regs *regs = (struct pt_regs *) &__unused; + struct rt_sigframe *frame = (struct rt_sigframe *)(regs->esp - 4); + sigset_t set; + stack_t st; + int eax; + + if (verify_area(VERIFY_READ, frame, sizeof(*frame))) + goto badframe; + if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set))) + goto badframe; + + sigdelsetmask(&set, ~_BLOCKABLE); + spin_lock_irq(¤t->sigmask_lock); + current->blocked = set; + recalc_sigpending(current); + spin_unlock_irq(¤t->sigmask_lock); + + if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &eax)) + goto badframe; + + if (__copy_from_user(&st, &frame->uc.uc_stack, sizeof(st))) + goto badframe; + /* It is more difficult to avoid calling this function than to + call it and ignore errors. */ + do_sigaltstack(&st, NULL, regs->esp); + + return eax; + +badframe: + force_sig(SIGSEGV, current); + return 0; +} + +/* + * Set up a signal frame. + */ + +static int +setup_sigcontext(struct sigcontext *sc, struct _fpstate *fpstate, + struct pt_regs *regs, unsigned long mask) +{ + int tmp, err = 0; + + tmp = 0; + __asm__("movl %%gs,%0" : "=r"(tmp): "0"(tmp)); + err |= __put_user(tmp, (unsigned int *)&sc->gs); + __asm__("movl %%fs,%0" : "=r"(tmp): "0"(tmp)); + err |= __put_user(tmp, (unsigned int *)&sc->fs); + + err |= __put_user(regs->xes, (unsigned int *)&sc->es); + err |= __put_user(regs->xds, (unsigned int *)&sc->ds); + err |= __put_user(regs->edi, &sc->edi); + err |= __put_user(regs->esi, &sc->esi); + err |= __put_user(regs->ebp, &sc->ebp); + err |= __put_user(regs->esp, &sc->esp); + err |= __put_user(regs->ebx, &sc->ebx); + err |= __put_user(regs->edx, &sc->edx); + err |= __put_user(regs->ecx, &sc->ecx); + err |= __put_user(regs->eax, &sc->eax); + err |= __put_user(current->thread.trap_no, &sc->trapno); + err |= __put_user(current->thread.error_code, &sc->err); + err |= __put_user(regs->eip, &sc->eip); + err |= __put_user(regs->xcs, (unsigned int *)&sc->cs); + err |= __put_user(regs->eflags, &sc->eflags); + err |= __put_user(regs->esp, &sc->esp_at_signal); + err |= __put_user(regs->xss, (unsigned int *)&sc->ss); + + tmp = save_i387(fpstate); + if (tmp < 0) + err = 1; + else + err |= __put_user(tmp ? fpstate : NULL, &sc->fpstate); + + /* non-iBCS2 extensions.. */ + err |= __put_user(mask, &sc->oldmask); + err |= __put_user(current->thread.cr2, &sc->cr2); + + return err; +} + +/* + * Determine which stack to use.. + */ +static inline void * +get_sigframe(struct k_sigaction *ka, struct pt_regs * regs, size_t frame_size) +{ + unsigned long esp; + + /* Default to using normal stack */ + esp = regs->esp; + + /* This is the X/Open sanctioned signal stack switching. */ + if (ka->sa.sa_flags & SA_ONSTACK) { + if (sas_ss_flags(esp) == 0) + esp = current->sas_ss_sp + current->sas_ss_size; + } + + /* This is the legacy signal stack switching. */ + else if ((regs->xss & 0xffff) != __USER_DS && + !(ka->sa.sa_flags & SA_RESTORER) && + ka->sa.sa_restorer) { + esp = (unsigned long) ka->sa.sa_restorer; + } + + return (void *)((esp - frame_size) & -8ul); +} + +static void setup_frame(int sig, struct k_sigaction *ka, + sigset_t *set, struct pt_regs * regs) +{ + struct sigframe *frame; + int err = 0; + + frame = get_sigframe(ka, regs, sizeof(*frame)); + + if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) + goto give_sigsegv; + + err |= __put_user((current->exec_domain + && current->exec_domain->signal_invmap + && sig < 32 + ? current->exec_domain->signal_invmap[sig] + : sig), + &frame->sig); + if (err) + goto give_sigsegv; + + err |= setup_sigcontext(&frame->sc, &frame->fpstate, regs, set->sig[0]); + if (err) + goto give_sigsegv; + + if (_NSIG_WORDS > 1) { + err |= __copy_to_user(frame->extramask, &set->sig[1], + sizeof(frame->extramask)); + } + if (err) + goto give_sigsegv; + + /* Set up to return from userspace. If provided, use a stub + already in userspace. */ + if (ka->sa.sa_flags & SA_RESTORER) { + err |= __put_user(ka->sa.sa_restorer, &frame->pretcode); + } else { + err |= __put_user(frame->retcode, &frame->pretcode); + /* This is popl %eax ; movl $,%eax ; int $0x80 */ + err |= __put_user(0xb858, (short *)(frame->retcode+0)); + err |= __put_user(__NR_sigreturn, (int *)(frame->retcode+2)); + err |= __put_user(0x80cd, (short *)(frame->retcode+6)); + } + + if (err) + goto give_sigsegv; + + /* Set up registers for signal handler */ + regs->esp = (unsigned long) frame; + regs->eip = (unsigned long) ka->sa.sa_handler; + + set_fs(USER_DS); + regs->xds = __USER_DS; + regs->xes = __USER_DS; + regs->xss = __USER_DS; + regs->xcs = __USER_CS; + regs->eflags &= ~TF_MASK; + +#if DEBUG_SIG + printk("SIG deliver (%s:%d): sp=%p pc=%p ra=%p\n", + current->comm, current->pid, frame, regs->eip, frame->pretcode); +#endif + + return; + +give_sigsegv: + if (sig == SIGSEGV) + ka->sa.sa_handler = SIG_DFL; + force_sig(SIGSEGV, current); +} + +static void setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, + sigset_t *set, struct pt_regs * regs) +{ + struct rt_sigframe *frame; + int err = 0; + + frame = get_sigframe(ka, regs, sizeof(*frame)); + + if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) + goto give_sigsegv; + + err |= __put_user((current->exec_domain + && current->exec_domain->signal_invmap + && sig < 32 + ? current->exec_domain->signal_invmap[sig] + : sig), + &frame->sig); + err |= __put_user(&frame->info, &frame->pinfo); + err |= __put_user(&frame->uc, &frame->puc); + err |= copy_siginfo_to_user(&frame->info, info); + if (err) + goto give_sigsegv; + + /* Create the ucontext. */ + err |= __put_user(0, &frame->uc.uc_flags); + err |= __put_user(0, &frame->uc.uc_link); + err |= __put_user(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp); + err |= __put_user(sas_ss_flags(regs->esp), + &frame->uc.uc_stack.ss_flags); + err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size); + err |= setup_sigcontext(&frame->uc.uc_mcontext, &frame->fpstate, + regs, set->sig[0]); + err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); + if (err) + goto give_sigsegv; + + /* Set up to return from userspace. If provided, use a stub + already in userspace. */ + if (ka->sa.sa_flags & SA_RESTORER) { + err |= __put_user(ka->sa.sa_restorer, &frame->pretcode); + } else { + err |= __put_user(frame->retcode, &frame->pretcode); + /* This is movl $,%eax ; int $0x80 */ + err |= __put_user(0xb8, (char *)(frame->retcode+0)); + err |= __put_user(__NR_rt_sigreturn, (int *)(frame->retcode+1)); + err |= __put_user(0x80cd, (short *)(frame->retcode+5)); + } + + if (err) + goto give_sigsegv; + + /* Set up registers for signal handler */ + regs->esp = (unsigned long) frame; + regs->eip = (unsigned long) ka->sa.sa_handler; + + set_fs(USER_DS); + regs->xds = __USER_DS; + regs->xes = __USER_DS; + regs->xss = __USER_DS; + regs->xcs = __USER_CS; + regs->eflags &= ~TF_MASK; + +#if DEBUG_SIG + printk("SIG deliver (%s:%d): sp=%p pc=%p ra=%p\n", + current->comm, current->pid, frame, regs->eip, frame->pretcode); +#endif + + return; + +give_sigsegv: + if (sig == SIGSEGV) + ka->sa.sa_handler = SIG_DFL; + force_sig(SIGSEGV, current); +} + +/* + * OK, we're invoking a handler + */ + +static void +handle_signal(unsigned long sig, struct k_sigaction *ka, + siginfo_t *info, sigset_t *oldset, struct pt_regs * regs) +{ + /* Are we from a system call? */ + if (regs->orig_eax >= 0) { + /* If so, check system call restarting.. */ + switch (regs->eax) { + case -ERESTARTNOHAND: + regs->eax = -EINTR; + break; + + case -ERESTARTSYS: + if (!(ka->sa.sa_flags & SA_RESTART)) { + regs->eax = -EINTR; + break; + } + /* fallthrough */ + case -ERESTARTNOINTR: + regs->eax = regs->orig_eax; + regs->eip -= 2; + } + } + + /* Set up the stack frame */ + if (ka->sa.sa_flags & SA_SIGINFO) + setup_rt_frame(sig, ka, info, oldset, regs); + else + setup_frame(sig, ka, oldset, regs); + + if (ka->sa.sa_flags & SA_ONESHOT) + ka->sa.sa_handler = SIG_DFL; + + if (!(ka->sa.sa_flags & SA_NODEFER)) { + spin_lock_irq(¤t->sigmask_lock); + sigorsets(¤t->blocked,¤t->blocked,&ka->sa.sa_mask); + sigaddset(¤t->blocked,sig); + recalc_sigpending(current); + spin_unlock_irq(¤t->sigmask_lock); + } +} + +/* + * Note that 'init' is a special process: it doesn't get signals it doesn't + * want to handle. Thus you cannot kill init even with a SIGKILL even by + * mistake. + */ +int do_signal(struct pt_regs *regs, sigset_t *oldset) +{ + siginfo_t info; + struct k_sigaction *ka; + + /* + * We want the common case to go fast, which + * is why we may in certain cases get here from + * kernel mode. Just return without doing anything + * if so. + */ + if ((regs->xcs & 2) != 2) + return 1; + + if (!oldset) + oldset = ¤t->blocked; + + for (;;) { + unsigned long signr; + + spin_lock_irq(¤t->sigmask_lock); + signr = dequeue_signal(¤t->blocked, &info); + spin_unlock_irq(¤t->sigmask_lock); + + if (!signr) + break; + + if ((current->ptrace & PT_PTRACED) && signr != SIGKILL) { + /* Let the debugger run. */ + current->exit_code = signr; + current->state = TASK_STOPPED; + notify_parent(current, SIGCHLD); + schedule(); + + /* We're back. Did the debugger cancel the sig? */ + if (!(signr = current->exit_code)) + continue; + current->exit_code = 0; + + /* The debugger continued. Ignore SIGSTOP. */ + if (signr == SIGSTOP) + continue; + + /* Update the siginfo structure. Is this good? */ + if (signr != info.si_signo) { + info.si_signo = signr; + info.si_errno = 0; + info.si_code = SI_USER; + info.si_pid = current->p_pptr->pid; + info.si_uid = current->p_pptr->uid; + } + + /* If the (new) signal is now blocked, requeue it. */ + if (sigismember(¤t->blocked, signr)) { + send_sig_info(signr, &info, current); + continue; + } + } + + ka = ¤t->sig->action[signr-1]; + if (ka->sa.sa_handler == SIG_IGN) { + if (signr != SIGCHLD) + continue; + /* Check for SIGCHLD: it's special. */ + while (sys_wait4(-1, NULL, WNOHANG, NULL) > 0) + /* nothing */; + continue; + } + + if (ka->sa.sa_handler == SIG_DFL) { + int exit_code = signr; + + /* Init gets no signals it doesn't want. */ + if (current->pid == 1) + continue; + + switch (signr) { + case SIGCONT: case SIGCHLD: case SIGWINCH: + continue; + + case SIGTSTP: case SIGTTIN: case SIGTTOU: + if (is_orphaned_pgrp(current->pgrp)) + continue; + /* FALLTHRU */ + + case SIGSTOP: { + struct signal_struct *sig; + current->state = TASK_STOPPED; + current->exit_code = signr; + sig = current->p_pptr->sig; + if (sig && !(sig->action[SIGCHLD-1].sa.sa_flags & SA_NOCLDSTOP)) + notify_parent(current, SIGCHLD); + schedule(); + continue; + } + + case SIGQUIT: case SIGILL: case SIGTRAP: + case SIGABRT: case SIGFPE: case SIGSEGV: + case SIGBUS: case SIGSYS: case SIGXCPU: case SIGXFSZ: + if (do_coredump(signr, regs)) + exit_code |= 0x80; + /* FALLTHRU */ + + default: + sigaddset(¤t->pending.signal, signr); + recalc_sigpending(current); + current->flags |= PF_SIGNALED; + do_exit(exit_code); + /* NOTREACHED */ + } + } + + /* Whee! Actually deliver the signal. */ + handle_signal(signr, ka, &info, oldset, regs); + return 1; + } + + /* Did we come from a system call? */ + if (regs->orig_eax >= 0) { + /* Restart the system call - no handlers present */ + if (regs->eax == -ERESTARTNOHAND || + regs->eax == -ERESTARTSYS || + regs->eax == -ERESTARTNOINTR) { + regs->eax = regs->orig_eax; + regs->eip -= 2; + } + } + return 0; +} diff --git a/xenolinux-2.4.16-sparse/arch/xeno/kernel/sys_i386.c b/xenolinux-2.4.16-sparse/arch/xeno/kernel/sys_i386.c new file mode 100644 index 0000000000..5fd6910b9c --- /dev/null +++ b/xenolinux-2.4.16-sparse/arch/xeno/kernel/sys_i386.c @@ -0,0 +1,256 @@ +/* + * linux/arch/i386/kernel/sys_i386.c + * + * This file contains various random system calls that + * have a non-standard calling sequence on the Linux/i386 + * platform. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +/* + * sys_pipe() is the normal C calling standard for creating + * a pipe. It's not the way Unix traditionally does this, though. + */ +asmlinkage int sys_pipe(unsigned long * fildes) +{ + int fd[2]; + int error; + + error = do_pipe(fd); + if (!error) { + if (copy_to_user(fildes, fd, 2*sizeof(int))) + error = -EFAULT; + } + return error; +} + +/* common code for old and new mmaps */ +static inline long do_mmap2( + unsigned long addr, unsigned long len, + unsigned long prot, unsigned long flags, + unsigned long fd, unsigned long pgoff) +{ + int error = -EBADF; + struct file * file = NULL; + + flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); + if (!(flags & MAP_ANONYMOUS)) { + file = fget(fd); + if (!file) + goto out; + } + + down_write(¤t->mm->mmap_sem); + error = do_mmap_pgoff(file, addr, len, prot, flags, pgoff); + up_write(¤t->mm->mmap_sem); + + if (file) + fput(file); +out: + return error; +} + +asmlinkage long sys_mmap2(unsigned long addr, unsigned long len, + unsigned long prot, unsigned long flags, + unsigned long fd, unsigned long pgoff) +{ + return do_mmap2(addr, len, prot, flags, fd, pgoff); +} + +/* + * Perform the select(nd, in, out, ex, tv) and mmap() system + * calls. Linux/i386 didn't use to be able to handle more than + * 4 system call parameters, so these system calls used a memory + * block for parameter passing.. + */ + +struct mmap_arg_struct { + unsigned long addr; + unsigned long len; + unsigned long prot; + unsigned long flags; + unsigned long fd; + unsigned long offset; +}; + +asmlinkage int old_mmap(struct mmap_arg_struct *arg) +{ + struct mmap_arg_struct a; + int err = -EFAULT; + + if (copy_from_user(&a, arg, sizeof(a))) + goto out; + + err = -EINVAL; + if (a.offset & ~PAGE_MASK) + goto out; + + err = do_mmap2(a.addr, a.len, a.prot, a.flags, a.fd, a.offset >> PAGE_SHIFT); +out: + return err; +} + + +extern asmlinkage int sys_select(int, fd_set *, fd_set *, fd_set *, struct timeval *); + +struct sel_arg_struct { + unsigned long n; + fd_set *inp, *outp, *exp; + struct timeval *tvp; +}; + +asmlinkage int old_select(struct sel_arg_struct *arg) +{ + struct sel_arg_struct a; + + if (copy_from_user(&a, arg, sizeof(a))) + return -EFAULT; + /* sys_select() does the appropriate kernel locking */ + return sys_select(a.n, a.inp, a.outp, a.exp, a.tvp); +} + +/* + * sys_ipc() is the de-multiplexer for the SysV IPC calls.. + * + * This is really horribly ugly. + */ +asmlinkage int sys_ipc (uint call, int first, int second, + int third, void *ptr, long fifth) +{ + int version, ret; + + version = call >> 16; /* hack for backward compatibility */ + call &= 0xffff; + + switch (call) { + case SEMOP: + return sys_semop (first, (struct sembuf *)ptr, second); + case SEMGET: + return sys_semget (first, second, third); + case SEMCTL: { + union semun fourth; + if (!ptr) + return -EINVAL; + if (get_user(fourth.__pad, (void **) ptr)) + return -EFAULT; + return sys_semctl (first, second, third, fourth); + } + + case MSGSND: + return sys_msgsnd (first, (struct msgbuf *) ptr, + second, third); + case MSGRCV: + switch (version) { + case 0: { + struct ipc_kludge tmp; + if (!ptr) + return -EINVAL; + + if (copy_from_user(&tmp, + (struct ipc_kludge *) ptr, + sizeof (tmp))) + return -EFAULT; + return sys_msgrcv (first, tmp.msgp, second, + tmp.msgtyp, third); + } + default: + return sys_msgrcv (first, + (struct msgbuf *) ptr, + second, fifth, third); + } + case MSGGET: + return sys_msgget ((key_t) first, second); + case MSGCTL: + return sys_msgctl (first, second, (struct msqid_ds *) ptr); + + case SHMAT: + switch (version) { + default: { + ulong raddr; + ret = sys_shmat (first, (char *) ptr, second, &raddr); + if (ret) + return ret; + return put_user (raddr, (ulong *) third); + } + case 1: /* iBCS2 emulator entry point */ + if (!segment_eq(get_fs(), get_ds())) + return -EINVAL; + return sys_shmat (first, (char *) ptr, second, (ulong *) third); + } + case SHMDT: + return sys_shmdt ((char *)ptr); + case SHMGET: + return sys_shmget (first, second, third); + case SHMCTL: + return sys_shmctl (first, second, + (struct shmid_ds *) ptr); + default: + return -EINVAL; + } +} + +/* + * Old cruft + */ +asmlinkage int sys_uname(struct old_utsname * name) +{ + int err; + if (!name) + return -EFAULT; + down_read(&uts_sem); + err=copy_to_user(name, &system_utsname, sizeof (*name)); + up_read(&uts_sem); + return err?-EFAULT:0; +} + +asmlinkage int sys_olduname(struct oldold_utsname * name) +{ + int error; + + if (!name) + return -EFAULT; + if (!access_ok(VERIFY_WRITE,name,sizeof(struct oldold_utsname))) + return -EFAULT; + + down_read(&uts_sem); + + error = __copy_to_user(&name->sysname,&system_utsname.sysname,__OLD_UTS_LEN); + error |= __put_user(0,name->sysname+__OLD_UTS_LEN); + error |= __copy_to_user(&name->nodename,&system_utsname.nodename,__OLD_UTS_LEN); + error |= __put_user(0,name->nodename+__OLD_UTS_LEN); + error |= __copy_to_user(&name->release,&system_utsname.release,__OLD_UTS_LEN); + error |= __put_user(0,name->release+__OLD_UTS_LEN); + error |= __copy_to_user(&name->version,&system_utsname.version,__OLD_UTS_LEN); + error |= __put_user(0,name->version+__OLD_UTS_LEN); + error |= __copy_to_user(&name->machine,&system_utsname.machine,__OLD_UTS_LEN); + error |= __put_user(0,name->machine+__OLD_UTS_LEN); + + up_read(&uts_sem); + + error = error ? -EFAULT : 0; + + return error; +} + +asmlinkage int sys_pause(void) +{ + current->state = TASK_INTERRUPTIBLE; + schedule(); + return -ERESTARTNOHAND; +} + diff --git a/xenolinux-2.4.16-sparse/arch/xeno/kernel/time.c b/xenolinux-2.4.16-sparse/arch/xeno/kernel/time.c new file mode 100644 index 0000000000..1e331e553c --- /dev/null +++ b/xenolinux-2.4.16-sparse/arch/xeno/kernel/time.c @@ -0,0 +1,305 @@ +/* + * linux/arch/i386/kernel/time.c + * + * Copyright (C) 1991, 1992, 1995 Linus Torvalds + * + * This file contains the PC-specific time handling details: + * reading the RTC at bootup, etc.. + * 1994-07-02 Alan Modra + * fixed set_rtc_mmss, fixed time.year for >= 2000, new mktime + * 1995-03-26 Markus Kuhn + * fixed 500 ms bug at call to set_rtc_mmss, fixed DS12887 + * precision CMOS clock update + * 1996-05-03 Ingo Molnar + * fixed time warps in do_[slow|fast]_gettimeoffset() + * 1997-09-10 Updated NTP code according to technical memorandum Jan '96 + * "A Kernel Model for Precision Timekeeping" by Dave Mills + * 1998-09-05 (Various) + * More robust do_fast_gettimeoffset() algorithm implemented + * (works with APM, Cyrix 6x86MX and Centaur C6), + * monotonic gettimeofday() with fast_get_timeoffset(), + * drift-proof precision TSC calibration on boot + * (C. Scott Ananian , Andrew D. + * Balsa , Philip Gladstone ; + * ported from 2.0.35 Jumbo-9 by Michael Krause ). + * 1998-12-16 Andrea Arcangeli + * Fixed Jumbo-9 code in 2.1.131: do_gettimeofday was missing 1 jiffy + * because was not accounting lost_ticks. + * 1998-12-24 Copyright (C) 1998 Andrea Arcangeli + * Fixed a xtime SMP race (we need the xtime_lock rw spinlock to + * serialize accesses to xtime/lost_ticks). + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include + +#include + + +unsigned long cpu_khz; /* Detected as we calibrate the TSC */ + +/* Cached *multiplier* to convert TSC counts to microseconds. + * (see the equation below). + * Equal to 2^32 * (1 / (clocks per usec) ). + * Initialized in time_init. + */ +unsigned long fast_gettimeoffset_quotient; + +extern rwlock_t xtime_lock; +extern unsigned long wall_jiffies; + +spinlock_t rtc_lock = SPIN_LOCK_UNLOCKED; + +static inline unsigned long ticks_to_secs(unsigned long long ticks) +{ + unsigned long lo, hi; + unsigned long little_ticks; + + little_ticks = ticks /* XXX URK! XXX / 1000000ULL */; + + __asm__ __volatile__ ( + "mull %2" + : "=a" (lo), "=d" (hi) + : "rm" (fast_gettimeoffset_quotient), "0" (little_ticks) ); + + return(hi); +} + +/* NB. Only 32 bits of ticks are considered here. */ +static inline unsigned long ticks_to_us(unsigned long ticks) +{ + unsigned long lo, hi; + + __asm__ __volatile__ ( + "mull %2" + : "=a" (lo), "=d" (hi) + : "rm" (fast_gettimeoffset_quotient), "0" (ticks) ); + + return(hi); +} + +static inline unsigned long do_gettimeoffset(void) +{ +#if 0 + register unsigned long eax, edx; + + /* Read the Time Stamp Counter */ + + rdtsc(eax,edx); + + /* .. relative to previous jiffy (32 bits is enough) */ + eax -= last_tsc_low; /* tsc_low delta */ + + /* + * Time offset = (tsc_low delta) * fast_gettimeoffset_quotient + * = (tsc_low delta) * (usecs_per_clock) + * = (tsc_low delta) * (usecs_per_jiffy / clocks_per_jiffy) + * + * Using a mull instead of a divl saves up to 31 clock cycles + * in the critical path. + */ + + edx = ticks_to_us(eax); + + /* our adjusted time offset in microseconds */ + return delay_at_last_interrupt + edx; +#else + /* + * We should keep a 'last_tsc_low' thing which incorporates + * delay_at_last_interrupt, adjusted in timer_interrupt after + * do_timer_interrupt. It would look at change in xtime, and + * make appropriate adjustment to a last_tsc variable. + * + * We'd be affected by rounding error in ticks_per_usec, and by + * processor clock drift (which should be no more than in an + * external interrupt source anyhow). + * + * Perhaps a bit rough and ready, but never mind! + */ + return 0; +#endif +} + +/* + * This version of gettimeofday has microsecond resolution + * and better than microsecond precision on fast x86 machines with TSC. + */ +void do_gettimeofday(struct timeval *tv) +{ + unsigned long flags; + unsigned long usec, sec, lost; + + read_lock_irqsave(&xtime_lock, flags); + usec = do_gettimeoffset(); + lost = jiffies - wall_jiffies; + if ( lost != 0 ) usec += lost * (1000000 / HZ); + sec = xtime.tv_sec; + usec += xtime.tv_usec; + read_unlock_irqrestore(&xtime_lock, flags); + + while ( usec >= 1000000 ) + { + usec -= 1000000; + sec++; + } + + tv->tv_sec = sec; + tv->tv_usec = usec; +} + +void do_settimeofday(struct timeval *tv) +{ + write_lock_irq(&xtime_lock); + /* + * This is revolting. We need to set "xtime" correctly. However, the + * value in this location is the value at the most recent update of + * wall time. Discover what correction gettimeofday() would have + * made, and then undo it! + */ + tv->tv_usec -= do_gettimeoffset(); + tv->tv_usec -= (jiffies - wall_jiffies) * (1000000 / HZ); + + while ( tv->tv_usec < 0 ) + { + tv->tv_usec += 1000000; + tv->tv_sec--; + } + + xtime = *tv; + time_adjust = 0; /* stop active adjtime() */ + time_status |= STA_UNSYNC; + time_maxerror = NTP_PHASE_LIMIT; + time_esterror = NTP_PHASE_LIMIT; + write_unlock_irq(&xtime_lock); +} + + +/* + * timer_interrupt() needs to keep up the real-time clock, + * as well as call the "do_timer()" routine every clocktick + */ +static inline void do_timer_interrupt( + int irq, void *dev_id, struct pt_regs *regs) +{ + do_timer(regs); +#if 0 + if (!user_mode(regs)) + x86_do_profile(regs->eip); +#endif +} + + +/* + * This is the same as the above, except we _also_ save the current + * Time Stamp Counter value at the time of the timer interrupt, so that + * we later on can estimate the time of day more exactly. + */ +static void timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) +{ + write_lock(&xtime_lock); + do_timer_interrupt(irq, NULL, regs); + write_unlock(&xtime_lock); +} + +static struct irqaction irq_timer = { + timer_interrupt, + SA_INTERRUPT, + 0, + "timer", + NULL, + NULL +}; + + +unsigned long get_cmos_time(void) +{ + unsigned long secs = HYPERVISOR_shared_info->rtc_time; + unsigned long diff; + + rdtscl(diff); + diff -= (unsigned long)HYPERVISOR_shared_info->rtc_timestamp; + + secs += ticks_to_us(diff); + + return(secs + ticks_to_secs(diff)); +} + + +/* Return 2^32 * (1 / (TSC clocks per usec)) for do_fast_gettimeoffset(). */ +static unsigned long __init calibrate_tsc(void) +{ + unsigned long quo, rem; + + /* quotient == (1000 * 2^32) / ticks_per ms */ + __asm__ __volatile__ ( + "divl %2" + : "=a" (quo), "=d" (rem) + : "r" (HYPERVISOR_shared_info->ticks_per_ms), "0" (0), "1" (1000) ); + + return(quo); +} + +void __init time_init(void) +{ + unsigned long long alarm; + + fast_gettimeoffset_quotient = calibrate_tsc(); + do_get_fast_time = do_gettimeofday; + + /* report CPU clock rate in Hz. + * The formula is (10^6 * 2^32) / (2^32 * 1 / (clocks/us)) = + * clock/second. Our precision is about 100 ppm. + */ + { + unsigned long eax=0, edx=1000; + __asm__ __volatile__ + ("divl %2" + :"=a" (cpu_khz), "=d" (edx) + :"r" (fast_gettimeoffset_quotient), + "0" (eax), "1" (edx)); + printk("Detected %lu.%03lu MHz processor.\n", + cpu_khz / 1000, cpu_khz % 1000); + } + + setup_irq(TIMER_IRQ, &irq_timer); + + /* + * Start ticker. Note that timing runs of wall clock, not virtual + * 'domain' time. This means that clock sshould run at the correct + * rate. For things like scheduling, it's not clear whether it + * matters which sort of time we use. + */ + rdtscll(alarm); + alarm += (1000/HZ)*HYPERVISOR_shared_info->ticks_per_ms; + HYPERVISOR_shared_info->wall_timeout = alarm; + HYPERVISOR_shared_info->domain_timeout = ~0ULL; + clear_bit(_EVENT_TIMER, &HYPERVISOR_shared_info->events); + + xtime.tv_sec = get_cmos_time(); + xtime.tv_usec = 0; +} diff --git a/xenolinux-2.4.16-sparse/arch/xeno/kernel/traps.c b/xenolinux-2.4.16-sparse/arch/xeno/kernel/traps.c new file mode 100644 index 0000000000..6e8f347420 --- /dev/null +++ b/xenolinux-2.4.16-sparse/arch/xeno/kernel/traps.c @@ -0,0 +1,517 @@ +/* + * linux/arch/i386/traps.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + * + * Pentium III FXSR, SSE support + * Gareth Hughes , May 2000 + */ + +/* + * 'Traps.c' handles hardware traps and faults after we have saved some + * state in 'asm.s'. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include + +#include +#include + +asmlinkage int system_call(void); + +asmlinkage void divide_error(void); +asmlinkage void debug(void); +asmlinkage void int3(void); +asmlinkage void overflow(void); +asmlinkage void bounds(void); +asmlinkage void invalid_op(void); +asmlinkage void device_not_available(void); +asmlinkage void double_fault(void); +asmlinkage void coprocessor_segment_overrun(void); +asmlinkage void invalid_TSS(void); +asmlinkage void segment_not_present(void); +asmlinkage void stack_segment(void); +asmlinkage void general_protection(void); +asmlinkage void page_fault(void); +asmlinkage void coprocessor_error(void); +asmlinkage void simd_coprocessor_error(void); +asmlinkage void alignment_check(void); +asmlinkage void spurious_interrupt_bug(void); +asmlinkage void machine_check(void); + +int kstack_depth_to_print = 24; + + +/* + * If the address is either in the .text section of the + * kernel, or in the vmalloc'ed module regions, it *may* + * be the address of a calling routine + */ + +#ifdef CONFIG_MODULES + +extern struct module *module_list; +extern struct module kernel_module; + +static inline int kernel_text_address(unsigned long addr) +{ + int retval = 0; + struct module *mod; + + if (addr >= (unsigned long) &_stext && + addr <= (unsigned long) &_etext) + return 1; + + for (mod = module_list; mod != &kernel_module; mod = mod->next) { + /* mod_bound tests for addr being inside the vmalloc'ed + * module area. Of course it'd be better to test only + * for the .text subset... */ + if (mod_bound(addr, 0, mod)) { + retval = 1; + break; + } + } + + return retval; +} + +#else + +static inline int kernel_text_address(unsigned long addr) +{ + return (addr >= (unsigned long) &_stext && + addr <= (unsigned long) &_etext); +} + +#endif + +void show_trace(unsigned long * stack) +{ + int i; + unsigned long addr; + + if (!stack) + stack = (unsigned long*)&stack; + + printk("Call Trace: "); + i = 1; + while (((long) stack & (THREAD_SIZE-1)) != 0) { + addr = *stack++; + if (kernel_text_address(addr)) { + if (i && ((i % 6) == 0)) + printk("\n "); + printk("[<%08lx>] ", addr); + i++; + } + } + printk("\n"); +} + +void show_trace_task(struct task_struct *tsk) +{ + unsigned long esp = tsk->thread.esp; + + /* User space on another CPU? */ + if ((esp ^ (unsigned long)tsk) & (PAGE_MASK<<1)) + return; + show_trace((unsigned long *)esp); +} + +void show_stack(unsigned long * esp) +{ + unsigned long *stack; + int i; + + // debugging aid: "show_stack(NULL);" prints the + // back trace for this cpu. + + if(esp==NULL) + esp=(unsigned long*)&esp; + + stack = esp; + for(i=0; i < kstack_depth_to_print; i++) { + if (((long) stack & (THREAD_SIZE-1)) == 0) + break; + if (i && ((i % 8) == 0)) + printk("\n "); + printk("%08lx ", *stack++); + } + printk("\n"); + show_trace(esp); +} + +void show_registers(struct pt_regs *regs) +{ + int i; + int in_kernel = 1; + unsigned long esp; + unsigned short ss; + + esp = (unsigned long) (®s->esp); + ss = __KERNEL_DS; + if (regs->xcs & 2) { + in_kernel = 0; + esp = regs->esp; + ss = regs->xss & 0xffff; + } + printk("CPU: %d\nEIP: %04x:[<%08lx>] %s\nEFLAGS: %08lx\n", + smp_processor_id(), 0xffff & regs->xcs, regs->eip, print_tainted(), regs->eflags); + printk("eax: %08lx ebx: %08lx ecx: %08lx edx: %08lx\n", + regs->eax, regs->ebx, regs->ecx, regs->edx); + printk("esi: %08lx edi: %08lx ebp: %08lx esp: %08lx\n", + regs->esi, regs->edi, regs->ebp, esp); + printk("ds: %04x es: %04x ss: %04x\n", + regs->xds & 0xffff, regs->xes & 0xffff, ss); + printk("Process %s (pid: %d, stackpage=%08lx)", + current->comm, current->pid, 4096+(unsigned long)current); + /* + * When in-kernel, we also print out the stack and code at the + * time of the fault.. + */ + if (in_kernel) { + + printk("\nStack: "); + show_stack((unsigned long*)esp); + +#if 0 + printk("\nCode: "); + if(regs->eip < PAGE_OFFSET) + goto bad; + + for(i=0;i<20;i++) + { + unsigned char c; + if(__get_user(c, &((unsigned char*)regs->eip)[i])) { +bad: + printk(" Bad EIP value."); + break; + } + printk("%02x ", c); + } +#endif + } + printk("\n"); +} + +spinlock_t die_lock = SPIN_LOCK_UNLOCKED; + +void die(const char * str, struct pt_regs * regs, long err) +{ + console_verbose(); + spin_lock_irq(&die_lock); + bust_spinlocks(1); + printk("%s: %04lx\n", str, err & 0xffff); + show_registers(regs); + bust_spinlocks(0); + spin_unlock_irq(&die_lock); + do_exit(SIGSEGV); +} + +static inline void die_if_kernel(const char * str, struct pt_regs * regs, long err) +{ + if (!(2 & regs->xcs)) + die(str, regs, err); +} + + +static void inline do_trap(int trapnr, int signr, char *str, + struct pt_regs * regs, long error_code, + siginfo_t *info) +{ + if (!(regs->xcs & 2)) + goto kernel_trap; + + /*trap_signal:*/ { + struct task_struct *tsk = current; + tsk->thread.error_code = error_code; + tsk->thread.trap_no = trapnr; + if (info) + force_sig_info(signr, info, tsk); + else + force_sig(signr, tsk); + return; + } + + kernel_trap: { + unsigned long fixup = search_exception_table(regs->eip); + if (fixup) + regs->eip = fixup; + else + die(str, regs, error_code); + return; + } +} + +#define DO_ERROR(trapnr, signr, str, name) \ +asmlinkage void do_##name(struct pt_regs * regs, long error_code) \ +{ \ + do_trap(trapnr, signr, str, regs, error_code, NULL); \ +} + +#define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \ +asmlinkage void do_##name(struct pt_regs * regs, long error_code) \ +{ \ + siginfo_t info; \ + info.si_signo = signr; \ + info.si_errno = 0; \ + info.si_code = sicode; \ + info.si_addr = (void *)siaddr; \ + do_trap(trapnr, signr, str, regs, error_code, &info); \ +} + +DO_ERROR_INFO( 0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->eip) +DO_ERROR( 3, SIGTRAP, "int3", int3) +DO_ERROR( 4, SIGSEGV, "overflow", overflow) +DO_ERROR( 5, SIGSEGV, "bounds", bounds) +DO_ERROR_INFO( 6, SIGILL, "invalid operand", invalid_op, ILL_ILLOPN, regs->eip) +DO_ERROR( 7, SIGSEGV, "device not available", device_not_available) +DO_ERROR( 8, SIGSEGV, "double fault", double_fault) +DO_ERROR( 9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun) +DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS) +DO_ERROR(11, SIGBUS, "segment not present", segment_not_present) +DO_ERROR(12, SIGBUS, "stack segment", stack_segment) +DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0) +DO_ERROR(18, SIGBUS, "machine check", machine_check) + +asmlinkage void do_general_protection(struct pt_regs * regs, long error_code) +{ + if (!(regs->xcs & 2)) + goto gp_in_kernel; + + current->thread.error_code = error_code; + current->thread.trap_no = 13; + force_sig(SIGSEGV, current); + return; + +gp_in_kernel: + { + unsigned long fixup; + fixup = search_exception_table(regs->eip); + if (fixup) { + regs->eip = fixup; + return; + } + die("general protection fault", regs, error_code); + } +} + + +asmlinkage void do_debug(struct pt_regs * regs, long error_code) +{ + /* + * We don't mess with breakpoints, so the only way this exception + * type can occur is through single-step mode. + */ + regs->eflags &= ~TF_MASK; +} + + +/* + * Note that we play around with the 'TS' bit in an attempt to get + * the correct behaviour even in the presence of the asynchronous + * IRQ13 behaviour + */ +void math_error(void *eip) +{ + struct task_struct * task; + siginfo_t info; + unsigned short cwd, swd; + + /* + * Save the info for the exception handler and clear the error. + */ + task = current; + save_init_fpu(task); + task->thread.trap_no = 16; + task->thread.error_code = 0; + info.si_signo = SIGFPE; + info.si_errno = 0; + info.si_code = __SI_FAULT; + info.si_addr = eip; + /* + * (~cwd & swd) will mask out exceptions that are not set to unmasked + * status. 0x3f is the exception bits in these regs, 0x200 is the + * C1 reg you need in case of a stack fault, 0x040 is the stack + * fault bit. We should only be taking one exception at a time, + * so if this combination doesn't produce any single exception, + * then we have a bad program that isn't syncronizing its FPU usage + * and it will suffer the consequences since we won't be able to + * fully reproduce the context of the exception + */ + cwd = get_fpu_cwd(task); + swd = get_fpu_swd(task); + switch (((~cwd) & swd & 0x3f) | (swd & 0x240)) { + case 0x000: + default: + break; + case 0x001: /* Invalid Op */ + case 0x040: /* Stack Fault */ + case 0x240: /* Stack Fault | Direction */ + info.si_code = FPE_FLTINV; + break; + case 0x002: /* Denormalize */ + case 0x010: /* Underflow */ + info.si_code = FPE_FLTUND; + break; + case 0x004: /* Zero Divide */ + info.si_code = FPE_FLTDIV; + break; + case 0x008: /* Overflow */ + info.si_code = FPE_FLTOVF; + break; + case 0x020: /* Precision */ + info.si_code = FPE_FLTRES; + break; + } + force_sig_info(SIGFPE, &info, task); +} + +asmlinkage void do_coprocessor_error(struct pt_regs * regs, long error_code) +{ + ignore_irq13 = 1; + math_error((void *)regs->eip); +} + +void simd_math_error(void *eip) +{ + struct task_struct * task; + siginfo_t info; + unsigned short mxcsr; + + /* + * Save the info for the exception handler and clear the error. + */ + task = current; + save_init_fpu(task); + task->thread.trap_no = 19; + task->thread.error_code = 0; + info.si_signo = SIGFPE; + info.si_errno = 0; + info.si_code = __SI_FAULT; + info.si_addr = eip; + /* + * The SIMD FPU exceptions are handled a little differently, as there + * is only a single status/control register. Thus, to determine which + * unmasked exception was caught we must mask the exception mask bits + * at 0x1f80, and then use these to mask the exception bits at 0x3f. + */ + mxcsr = get_fpu_mxcsr(task); + switch (~((mxcsr & 0x1f80) >> 7) & (mxcsr & 0x3f)) { + case 0x000: + default: + break; + case 0x001: /* Invalid Op */ + info.si_code = FPE_FLTINV; + break; + case 0x002: /* Denormalize */ + case 0x010: /* Underflow */ + info.si_code = FPE_FLTUND; + break; + case 0x004: /* Zero Divide */ + info.si_code = FPE_FLTDIV; + break; + case 0x008: /* Overflow */ + info.si_code = FPE_FLTOVF; + break; + case 0x020: /* Precision */ + info.si_code = FPE_FLTRES; + break; + } + force_sig_info(SIGFPE, &info, task); +} + +asmlinkage void do_simd_coprocessor_error(struct pt_regs * regs, + long error_code) +{ + if (cpu_has_xmm) { + /* Handle SIMD FPU exceptions on PIII+ processors. */ + ignore_irq13 = 1; + simd_math_error((void *)regs->eip); + } else { + die_if_kernel("cache flush denied", regs, error_code); + current->thread.trap_no = 19; + current->thread.error_code = error_code; + force_sig(SIGSEGV, current); + } +} + +asmlinkage void do_spurious_interrupt_bug(struct pt_regs * regs, + long error_code) +{ +} + +/* + * 'math_state_restore()' saves the current math information in the + * old math state array, and gets the new ones from the current task + * + * Careful.. There are problems with IBM-designed IRQ13 behaviour. + * Don't touch unless you *really* know how it works. + */ +asmlinkage void math_state_restore(struct pt_regs regs) +{ + if (current->used_math) { + restore_fpu(current); + } else { + init_fpu(); + } + current->flags |= PF_USEDFPU; /* So we fnsave on switch_to() */ +} + +static trap_info_t trap_table[] = { + { 0, 0, __KERNEL_CS, (unsigned long)divide_error }, + { 1, 0, __KERNEL_CS, (unsigned long)debug }, + { 3, 3, __KERNEL_CS, (unsigned long)int3 }, + { 4, 3, __KERNEL_CS, (unsigned long)overflow }, + { 5, 3, __KERNEL_CS, (unsigned long)bounds }, + { 6, 0, __KERNEL_CS, (unsigned long)invalid_op }, + { 7, 0, __KERNEL_CS, (unsigned long)device_not_available }, + { 8, 0, __KERNEL_CS, (unsigned long)double_fault }, + { 9, 0, __KERNEL_CS, (unsigned long)coprocessor_segment_overrun }, + { 10, 0, __KERNEL_CS, (unsigned long)invalid_TSS }, + { 11, 0, __KERNEL_CS, (unsigned long)segment_not_present }, + { 12, 0, __KERNEL_CS, (unsigned long)stack_segment }, + { 13, 0, __KERNEL_CS, (unsigned long)general_protection }, + { 14, 0, __KERNEL_CS, (unsigned long)page_fault }, + { 15, 0, __KERNEL_CS, (unsigned long)spurious_interrupt_bug }, + { 16, 0, __KERNEL_CS, (unsigned long)coprocessor_error }, + { 17, 0, __KERNEL_CS, (unsigned long)alignment_check }, + { 18, 0, __KERNEL_CS, (unsigned long)machine_check }, + { 19, 0, __KERNEL_CS, (unsigned long)simd_coprocessor_error }, + { SYSCALL_VECTOR, + 3, __KERNEL_CS, (unsigned long)system_call }, + { 0, 0, 0, 0 } +}; + + + +void __init trap_init(void) +{ + HYPERVISOR_set_trap_table(trap_table); + cpu_init(); +} diff --git a/xenolinux-2.4.16-sparse/arch/xeno/lib/Makefile b/xenolinux-2.4.16-sparse/arch/xeno/lib/Makefile new file mode 100644 index 0000000000..2224f0312c --- /dev/null +++ b/xenolinux-2.4.16-sparse/arch/xeno/lib/Makefile @@ -0,0 +1,15 @@ + +.S.o: + $(CC) $(AFLAGS) -c $< -o $*.o + +L_TARGET = lib.a + +obj-y = checksum.o old-checksum.o delay.o \ + usercopy.o getuser.o \ + memcpy.o strstr.o + +obj-$(CONFIG_X86_USE_3DNOW) += mmx.o +obj-$(CONFIG_HAVE_DEC_LOCK) += dec_and_lock.o +obj-$(CONFIG_DEBUG_IOVIRT) += iodebug.o + +include $(TOPDIR)/Rules.make diff --git a/xenolinux-2.4.16-sparse/arch/xeno/lib/checksum.S b/xenolinux-2.4.16-sparse/arch/xeno/lib/checksum.S new file mode 100644 index 0000000000..bfc6389531 --- /dev/null +++ b/xenolinux-2.4.16-sparse/arch/xeno/lib/checksum.S @@ -0,0 +1,460 @@ +/* + * INET An implementation of the TCP/IP protocol suite for the LINUX + * operating system. INET is implemented using the BSD Socket + * interface as the means of communication with the user level. + * + * IP/TCP/UDP checksumming routines + * + * Authors: Jorge Cwik, + * Arnt Gulbrandsen, + * Tom May, + * Pentium Pro/II routines: + * Alexander Kjeldaas + * Finn Arne Gangstad + * Lots of code moved from tcp.c and ip.c; see those files + * for more names. + * + * Changes: Ingo Molnar, converted csum_partial_copy() to 2.1 exception + * handling. + * Andi Kleen, add zeroing on error + * converted to pure assembler + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include + +/* + * computes a partial checksum, e.g. for TCP/UDP fragments + */ + +/* +unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum) + */ + +.text +.align 4 +.globl csum_partial + +#ifndef CONFIG_X86_USE_PPRO_CHECKSUM + + /* + * Experiments with Ethernet and SLIP connections show that buff + * is aligned on either a 2-byte or 4-byte boundary. We get at + * least a twofold speedup on 486 and Pentium if it is 4-byte aligned. + * Fortunately, it is easy to convert 2-byte alignment to 4-byte + * alignment for the unrolled loop. + */ +csum_partial: + pushl %esi + pushl %ebx + movl 20(%esp),%eax # Function arg: unsigned int sum + movl 16(%esp),%ecx # Function arg: int len + movl 12(%esp),%esi # Function arg: unsigned char *buff + testl $2, %esi # Check alignment. + jz 2f # Jump if alignment is ok. + subl $2, %ecx # Alignment uses up two bytes. + jae 1f # Jump if we had at least two bytes. + addl $2, %ecx # ecx was < 2. Deal with it. + jmp 4f +1: movw (%esi), %bx + addl $2, %esi + addw %bx, %ax + adcl $0, %eax +2: + movl %ecx, %edx + shrl $5, %ecx + jz 2f + testl %esi, %esi +1: movl (%esi), %ebx + adcl %ebx, %eax + movl 4(%esi), %ebx + adcl %ebx, %eax + movl 8(%esi), %ebx + adcl %ebx, %eax + movl 12(%esi), %ebx + adcl %ebx, %eax + movl 16(%esi), %ebx + adcl %ebx, %eax + movl 20(%esi), %ebx + adcl %ebx, %eax + movl 24(%esi), %ebx + adcl %ebx, %eax + movl 28(%esi), %ebx + adcl %ebx, %eax + lea 32(%esi), %esi + dec %ecx + jne 1b + adcl $0, %eax +2: movl %edx, %ecx + andl $0x1c, %edx + je 4f + shrl $2, %edx # This clears CF +3: adcl (%esi), %eax + lea 4(%esi), %esi + dec %edx + jne 3b + adcl $0, %eax +4: andl $3, %ecx + jz 7f + cmpl $2, %ecx + jb 5f + movw (%esi),%cx + leal 2(%esi),%esi + je 6f + shll $16,%ecx +5: movb (%esi),%cl +6: addl %ecx,%eax + adcl $0, %eax +7: + popl %ebx + popl %esi + ret + +#else + +/* Version for PentiumII/PPro */ + +csum_partial: + pushl %esi + pushl %ebx + movl 20(%esp),%eax # Function arg: unsigned int sum + movl 16(%esp),%ecx # Function arg: int len + movl 12(%esp),%esi # Function arg: const unsigned char *buf + + testl $2, %esi + jnz 30f +10: + movl %ecx, %edx + movl %ecx, %ebx + andl $0x7c, %ebx + shrl $7, %ecx + addl %ebx,%esi + shrl $2, %ebx + negl %ebx + lea 45f(%ebx,%ebx,2), %ebx + testl %esi, %esi + jmp *%ebx + + # Handle 2-byte-aligned regions +20: addw (%esi), %ax + lea 2(%esi), %esi + adcl $0, %eax + jmp 10b + +30: subl $2, %ecx + ja 20b + je 32f + movzbl (%esi),%ebx # csumming 1 byte, 2-aligned + addl %ebx, %eax + adcl $0, %eax + jmp 80f +32: + addw (%esi), %ax # csumming 2 bytes, 2-aligned + adcl $0, %eax + jmp 80f + +40: + addl -128(%esi), %eax + adcl -124(%esi), %eax + adcl -120(%esi), %eax + adcl -116(%esi), %eax + adcl -112(%esi), %eax + adcl -108(%esi), %eax + adcl -104(%esi), %eax + adcl -100(%esi), %eax + adcl -96(%esi), %eax + adcl -92(%esi), %eax + adcl -88(%esi), %eax + adcl -84(%esi), %eax + adcl -80(%esi), %eax + adcl -76(%esi), %eax + adcl -72(%esi), %eax + adcl -68(%esi), %eax + adcl -64(%esi), %eax + adcl -60(%esi), %eax + adcl -56(%esi), %eax + adcl -52(%esi), %eax + adcl -48(%esi), %eax + adcl -44(%esi), %eax + adcl -40(%esi), %eax + adcl -36(%esi), %eax + adcl -32(%esi), %eax + adcl -28(%esi), %eax + adcl -24(%esi), %eax + adcl -20(%esi), %eax + adcl -16(%esi), %eax + adcl -12(%esi), %eax + adcl -8(%esi), %eax + adcl -4(%esi), %eax +45: + lea 128(%esi), %esi + adcl $0, %eax + dec %ecx + jge 40b + movl %edx, %ecx +50: andl $3, %ecx + jz 80f + + # Handle the last 1-3 bytes without jumping + notl %ecx # 1->2, 2->1, 3->0, higher bits are masked + movl $0xffffff,%ebx # by the shll and shrl instructions + shll $3,%ecx + shrl %cl,%ebx + andl -128(%esi),%ebx # esi is 4-aligned so should be ok + addl %ebx,%eax + adcl $0,%eax +80: + popl %ebx + popl %esi + ret + +#endif + +/* +unsigned int csum_partial_copy_generic (const char *src, char *dst, + int len, int sum, int *src_err_ptr, int *dst_err_ptr) + */ + +/* + * Copy from ds while checksumming, otherwise like csum_partial + * + * The macros SRC and DST specify the type of access for the instruction. + * thus we can call a custom exception handler for all access types. + * + * FIXME: could someone double-check whether I haven't mixed up some SRC and + * DST definitions? It's damn hard to trigger all cases. I hope I got + * them all but there's no guarantee. + */ + +#define SRC(y...) \ + 9999: y; \ + .section __ex_table, "a"; \ + .long 9999b, 6001f ; \ + .previous + +#define DST(y...) \ + 9999: y; \ + .section __ex_table, "a"; \ + .long 9999b, 6002f ; \ + .previous + +.align 4 +.globl csum_partial_copy_generic + +#ifndef CONFIG_X86_USE_PPRO_CHECKSUM + +#define ARGBASE 16 +#define FP 12 + +csum_partial_copy_generic: + subl $4,%esp + pushl %edi + pushl %esi + pushl %ebx + movl ARGBASE+16(%esp),%eax # sum + movl ARGBASE+12(%esp),%ecx # len + movl ARGBASE+4(%esp),%esi # src + movl ARGBASE+8(%esp),%edi # dst + + testl $2, %edi # Check alignment. + jz 2f # Jump if alignment is ok. + subl $2, %ecx # Alignment uses up two bytes. + jae 1f # Jump if we had at least two bytes. + addl $2, %ecx # ecx was < 2. Deal with it. + jmp 4f +SRC(1: movw (%esi), %bx ) + addl $2, %esi +DST( movw %bx, (%edi) ) + addl $2, %edi + addw %bx, %ax + adcl $0, %eax +2: + movl %ecx, FP(%esp) + shrl $5, %ecx + jz 2f + testl %esi, %esi +SRC(1: movl (%esi), %ebx ) +SRC( movl 4(%esi), %edx ) + adcl %ebx, %eax +DST( movl %ebx, (%edi) ) + adcl %edx, %eax +DST( movl %edx, 4(%edi) ) + +SRC( movl 8(%esi), %ebx ) +SRC( movl 12(%esi), %edx ) + adcl %ebx, %eax +DST( movl %ebx, 8(%edi) ) + adcl %edx, %eax +DST( movl %edx, 12(%edi) ) + +SRC( movl 16(%esi), %ebx ) +SRC( movl 20(%esi), %edx ) + adcl %ebx, %eax +DST( movl %ebx, 16(%edi) ) + adcl %edx, %eax +DST( movl %edx, 20(%edi) ) + +SRC( movl 24(%esi), %ebx ) +SRC( movl 28(%esi), %edx ) + adcl %ebx, %eax +DST( movl %ebx, 24(%edi) ) + adcl %edx, %eax +DST( movl %edx, 28(%edi) ) + + lea 32(%esi), %esi + lea 32(%edi), %edi + dec %ecx + jne 1b + adcl $0, %eax +2: movl FP(%esp), %edx + movl %edx, %ecx + andl $0x1c, %edx + je 4f + shrl $2, %edx # This clears CF +SRC(3: movl (%esi), %ebx ) + adcl %ebx, %eax +DST( movl %ebx, (%edi) ) + lea 4(%esi), %esi + lea 4(%edi), %edi + dec %edx + jne 3b + adcl $0, %eax +4: andl $3, %ecx + jz 7f + cmpl $2, %ecx + jb 5f +SRC( movw (%esi), %cx ) + leal 2(%esi), %esi +DST( movw %cx, (%edi) ) + leal 2(%edi), %edi + je 6f + shll $16,%ecx +SRC(5: movb (%esi), %cl ) +DST( movb %cl, (%edi) ) +6: addl %ecx, %eax + adcl $0, %eax +7: +5000: + +# Exception handler: +.section .fixup, "ax" + +6001: + movl ARGBASE+20(%esp), %ebx # src_err_ptr + movl $-EFAULT, (%ebx) + + # zero the complete destination - computing the rest + # is too much work + movl ARGBASE+8(%esp), %edi # dst + movl ARGBASE+12(%esp), %ecx # len + xorl %eax,%eax + rep ; stosb + + jmp 5000b + +6002: + movl ARGBASE+24(%esp), %ebx # dst_err_ptr + movl $-EFAULT,(%ebx) + jmp 5000b + +.previous + + popl %ebx + popl %esi + popl %edi + popl %ecx # equivalent to addl $4,%esp + ret + +#else + +/* Version for PentiumII/PPro */ + +#define ROUND1(x) \ + SRC(movl x(%esi), %ebx ) ; \ + addl %ebx, %eax ; \ + DST(movl %ebx, x(%edi) ) ; + +#define ROUND(x) \ + SRC(movl x(%esi), %ebx ) ; \ + adcl %ebx, %eax ; \ + DST(movl %ebx, x(%edi) ) ; + +#define ARGBASE 12 + +csum_partial_copy_generic: + pushl %ebx + pushl %edi + pushl %esi + movl ARGBASE+4(%esp),%esi #src + movl ARGBASE+8(%esp),%edi #dst + movl ARGBASE+12(%esp),%ecx #len + movl ARGBASE+16(%esp),%eax #sum +# movl %ecx, %edx + movl %ecx, %ebx + movl %esi, %edx + shrl $6, %ecx + andl $0x3c, %ebx + negl %ebx + subl %ebx, %esi + subl %ebx, %edi + lea -1(%esi),%edx + andl $-32,%edx + lea 3f(%ebx,%ebx), %ebx + testl %esi, %esi + jmp *%ebx +1: addl $64,%esi + addl $64,%edi + SRC(movb -32(%edx),%bl) ; SRC(movb (%edx),%bl) + ROUND1(-64) ROUND(-60) ROUND(-56) ROUND(-52) + ROUND (-48) ROUND(-44) ROUND(-40) ROUND(-36) + ROUND (-32) ROUND(-28) ROUND(-24) ROUND(-20) + ROUND (-16) ROUND(-12) ROUND(-8) ROUND(-4) +3: adcl $0,%eax + addl $64, %edx + dec %ecx + jge 1b +4: movl ARGBASE+12(%esp),%edx #len + andl $3, %edx + jz 7f + cmpl $2, %edx + jb 5f +SRC( movw (%esi), %dx ) + leal 2(%esi), %esi +DST( movw %dx, (%edi) ) + leal 2(%edi), %edi + je 6f + shll $16,%edx +5: +SRC( movb (%esi), %dl ) +DST( movb %dl, (%edi) ) +6: addl %edx, %eax + adcl $0, %eax +7: +.section .fixup, "ax" +6001: movl ARGBASE+20(%esp), %ebx # src_err_ptr + movl $-EFAULT, (%ebx) + # zero the complete destination (computing the rest is too much work) + movl ARGBASE+8(%esp),%edi # dst + movl ARGBASE+12(%esp),%ecx # len + xorl %eax,%eax + rep; stosb + jmp 7b +6002: movl ARGBASE+24(%esp), %ebx # dst_err_ptr + movl $-EFAULT, (%ebx) + jmp 7b +.previous + + popl %esi + popl %edi + popl %ebx + ret + +#undef ROUND +#undef ROUND1 + +#endif diff --git a/xenolinux-2.4.16-sparse/arch/xeno/lib/dec_and_lock.c b/xenolinux-2.4.16-sparse/arch/xeno/lib/dec_and_lock.c new file mode 100644 index 0000000000..ffd4869001 --- /dev/null +++ b/xenolinux-2.4.16-sparse/arch/xeno/lib/dec_and_lock.c @@ -0,0 +1,40 @@ +/* + * x86 version of "atomic_dec_and_lock()" using + * the atomic "cmpxchg" instruction. + * + * (For CPU's lacking cmpxchg, we use the slow + * generic version, and this one never even gets + * compiled). + */ + +#include +#include + +int atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock) +{ + int counter; + int newcount; + +repeat: + counter = atomic_read(atomic); + newcount = counter-1; + + if (!newcount) + goto slow_path; + + asm volatile("lock; cmpxchgl %1,%2" + :"=a" (newcount) + :"r" (newcount), "m" (atomic->counter), "0" (counter)); + + /* If the above failed, "eax" will have changed */ + if (newcount != counter) + goto repeat; + return 0; + +slow_path: + spin_lock(lock); + if (atomic_dec_and_test(atomic)) + return 1; + spin_unlock(lock); + return 0; +} diff --git a/xenolinux-2.4.16-sparse/arch/xeno/lib/delay.c b/xenolinux-2.4.16-sparse/arch/xeno/lib/delay.c new file mode 100644 index 0000000000..c9180268f8 --- /dev/null +++ b/xenolinux-2.4.16-sparse/arch/xeno/lib/delay.c @@ -0,0 +1,47 @@ +/* + * Precise Delay Loops for i386 + * + * Copyright (C) 1993 Linus Torvalds + * Copyright (C) 1997 Martin Mares + * + * The __delay function must _NOT_ be inlined as its execution time + * depends wildly on alignment on many x86 processors. The additional + * jump magic is needed to get the timing stable on all the CPU's + * we have to worry about. + */ + +#include +#include +#include +#include +#include + +#ifdef CONFIG_SMP +#include +#endif + +void __delay(unsigned long loops) +{ + unsigned long bclock, now; + + rdtscl(bclock); + do + { + rep_nop(); + rdtscl(now); + } while ((now-bclock) < loops); +} + +inline void __const_udelay(unsigned long xloops) +{ + int d0; + __asm__("mull %0" + :"=d" (xloops), "=&a" (d0) + :"1" (xloops),"0" (current_cpu_data.loops_per_jiffy)); + __delay(xloops * HZ); +} + +void __udelay(unsigned long usecs) +{ + __const_udelay(usecs * 0x000010c6); /* 2**32 / 1000000 */ +} diff --git a/xenolinux-2.4.16-sparse/arch/xeno/lib/getuser.S b/xenolinux-2.4.16-sparse/arch/xeno/lib/getuser.S new file mode 100644 index 0000000000..c244721e70 --- /dev/null +++ b/xenolinux-2.4.16-sparse/arch/xeno/lib/getuser.S @@ -0,0 +1,73 @@ +/* + * __get_user functions. + * + * (C) Copyright 1998 Linus Torvalds + * + * These functions have a non-standard call interface + * to make them more efficient, especially as they + * return an error value in addition to the "real" + * return value. + */ + +/* + * __get_user_X + * + * Inputs: %eax contains the address + * + * Outputs: %eax is error code (0 or -EFAULT) + * %edx contains zero-extended value + * + * These functions should not modify any other registers, + * as they get called from within inline assembly. + */ + +addr_limit = 12 + +.text +.align 4 +.globl __get_user_1 +__get_user_1: + movl %esp,%edx + andl $0xffffe000,%edx + cmpl addr_limit(%edx),%eax + jae bad_get_user +1: movzbl (%eax),%edx + xorl %eax,%eax + ret + +.align 4 +.globl __get_user_2 +__get_user_2: + addl $1,%eax + movl %esp,%edx + jc bad_get_user + andl $0xffffe000,%edx + cmpl addr_limit(%edx),%eax + jae bad_get_user +2: movzwl -1(%eax),%edx + xorl %eax,%eax + ret + +.align 4 +.globl __get_user_4 +__get_user_4: + addl $3,%eax + movl %esp,%edx + jc bad_get_user + andl $0xffffe000,%edx + cmpl addr_limit(%edx),%eax + jae bad_get_user +3: movl -3(%eax),%edx + xorl %eax,%eax + ret + +bad_get_user: + xorl %edx,%edx + movl $-14,%eax + ret + +.section __ex_table,"a" + .long 1b,bad_get_user + .long 2b,bad_get_user + .long 3b,bad_get_user +.previous diff --git a/xenolinux-2.4.16-sparse/arch/xeno/lib/iodebug.c b/xenolinux-2.4.16-sparse/arch/xeno/lib/iodebug.c new file mode 100644 index 0000000000..701a07fe72 --- /dev/null +++ b/xenolinux-2.4.16-sparse/arch/xeno/lib/iodebug.c @@ -0,0 +1,19 @@ +#include + +void * __io_virt_debug(unsigned long x, const char *file, int line) +{ + if (x < PAGE_OFFSET) { + printk("io mapaddr 0x%05lx not valid at %s:%d!\n", x, file, line); + return __va(x); + } + return (void *)x; +} + +unsigned long __io_phys_debug(unsigned long x, const char *file, int line) +{ + if (x < PAGE_OFFSET) { + printk("io mapaddr 0x%05lx not valid at %s:%d!\n", x, file, line); + return x; + } + return __pa(x); +} diff --git a/xenolinux-2.4.16-sparse/arch/xeno/lib/memcpy.c b/xenolinux-2.4.16-sparse/arch/xeno/lib/memcpy.c new file mode 100644 index 0000000000..4cb37b6e50 --- /dev/null +++ b/xenolinux-2.4.16-sparse/arch/xeno/lib/memcpy.c @@ -0,0 +1,19 @@ +#include +#include + +#undef memcpy +#undef memset + +void * memcpy(void * to, const void * from, size_t n) +{ +#ifdef CONFIG_X86_USE_3DNOW + return __memcpy3d(to, from, n); +#else + return __memcpy(to, from, n); +#endif +} + +void * memset(void * s, int c, size_t count) +{ + return __memset(s, c, count); +} diff --git a/xenolinux-2.4.16-sparse/arch/xeno/lib/mmx.c b/xenolinux-2.4.16-sparse/arch/xeno/lib/mmx.c new file mode 100644 index 0000000000..0433cfa319 --- /dev/null +++ b/xenolinux-2.4.16-sparse/arch/xeno/lib/mmx.c @@ -0,0 +1,375 @@ +#include +#include +#include +#include + +#include +#include + + +/* + * MMX 3DNow! library helper functions + * + * To do: + * We can use MMX just for prefetch in IRQ's. This may be a win. + * (reported so on K6-III) + * We should use a better code neutral filler for the short jump + * leal ebx. [ebx] is apparently best for K6-2, but Cyrix ?? + * We also want to clobber the filler register so we dont get any + * register forwarding stalls on the filler. + * + * Add *user handling. Checksums are not a win with MMX on any CPU + * tested so far for any MMX solution figured. + * + * 22/09/2000 - Arjan van de Ven + * Improved for non-egineering-sample Athlons + * + */ + +void *_mmx_memcpy(void *to, const void *from, size_t len) +{ + void *p; + int i; + + if (in_interrupt()) + return __memcpy(to, from, len); + + p = to; + i = len >> 6; /* len/64 */ + + kernel_fpu_begin(); + + __asm__ __volatile__ ( + "1: prefetch (%0)\n" /* This set is 28 bytes */ + " prefetch 64(%0)\n" + " prefetch 128(%0)\n" + " prefetch 192(%0)\n" + " prefetch 256(%0)\n" + "2: \n" + ".section .fixup, \"ax\"\n" + "3: movw $0x1AEB, 1b\n" /* jmp on 26 bytes */ + " jmp 2b\n" + ".previous\n" + ".section __ex_table,\"a\"\n" + " .align 4\n" + " .long 1b, 3b\n" + ".previous" + : : "r" (from) ); + + + for(; i>0; i--) + { + __asm__ __volatile__ ( + "1: prefetch 320(%0)\n" + "2: movq (%0), %%mm0\n" + " movq 8(%0), %%mm1\n" + " movq 16(%0), %%mm2\n" + " movq 24(%0), %%mm3\n" + " movq %%mm0, (%1)\n" + " movq %%mm1, 8(%1)\n" + " movq %%mm2, 16(%1)\n" + " movq %%mm3, 24(%1)\n" + " movq 32(%0), %%mm0\n" + " movq 40(%0), %%mm1\n" + " movq 48(%0), %%mm2\n" + " movq 56(%0), %%mm3\n" + " movq %%mm0, 32(%1)\n" + " movq %%mm1, 40(%1)\n" + " movq %%mm2, 48(%1)\n" + " movq %%mm3, 56(%1)\n" + ".section .fixup, \"ax\"\n" + "3: movw $0x05EB, 1b\n" /* jmp on 5 bytes */ + " jmp 2b\n" + ".previous\n" + ".section __ex_table,\"a\"\n" + " .align 4\n" + " .long 1b, 3b\n" + ".previous" + : : "r" (from), "r" (to) : "memory"); + from+=64; + to+=64; + } + /* + * Now do the tail of the block + */ + __memcpy(to, from, len&63); + kernel_fpu_end(); + return p; +} + +#ifdef CONFIG_MK7 + +/* + * The K7 has streaming cache bypass load/store. The Cyrix III, K6 and + * other MMX using processors do not. + */ + +static void fast_clear_page(void *page) +{ + int i; + + kernel_fpu_begin(); + + __asm__ __volatile__ ( + " pxor %%mm0, %%mm0\n" : : + ); + + for(i=0;i<4096/64;i++) + { + __asm__ __volatile__ ( + " movntq %%mm0, (%0)\n" + " movntq %%mm0, 8(%0)\n" + " movntq %%mm0, 16(%0)\n" + " movntq %%mm0, 24(%0)\n" + " movntq %%mm0, 32(%0)\n" + " movntq %%mm0, 40(%0)\n" + " movntq %%mm0, 48(%0)\n" + " movntq %%mm0, 56(%0)\n" + : : "r" (page) : "memory"); + page+=64; + } + /* since movntq is weakly-ordered, a "sfence" is needed to become + * ordered again. + */ + __asm__ __volatile__ ( + " sfence \n" : : + ); + kernel_fpu_end(); +} + +static void fast_copy_page(void *to, void *from) +{ + int i; + + kernel_fpu_begin(); + + /* maybe the prefetch stuff can go before the expensive fnsave... + * but that is for later. -AV + */ + __asm__ __volatile__ ( + "1: prefetch (%0)\n" + " prefetch 64(%0)\n" + " prefetch 128(%0)\n" + " prefetch 192(%0)\n" + " prefetch 256(%0)\n" + "2: \n" + ".section .fixup, \"ax\"\n" + "3: movw $0x1AEB, 1b\n" /* jmp on 26 bytes */ + " jmp 2b\n" + ".previous\n" + ".section __ex_table,\"a\"\n" + " .align 4\n" + " .long 1b, 3b\n" + ".previous" + : : "r" (from) ); + + for(i=0; i<(4096-320)/64; i++) + { + __asm__ __volatile__ ( + "1: prefetch 320(%0)\n" + "2: movq (%0), %%mm0\n" + " movntq %%mm0, (%1)\n" + " movq 8(%0), %%mm1\n" + " movntq %%mm1, 8(%1)\n" + " movq 16(%0), %%mm2\n" + " movntq %%mm2, 16(%1)\n" + " movq 24(%0), %%mm3\n" + " movntq %%mm3, 24(%1)\n" + " movq 32(%0), %%mm4\n" + " movntq %%mm4, 32(%1)\n" + " movq 40(%0), %%mm5\n" + " movntq %%mm5, 40(%1)\n" + " movq 48(%0), %%mm6\n" + " movntq %%mm6, 48(%1)\n" + " movq 56(%0), %%mm7\n" + " movntq %%mm7, 56(%1)\n" + ".section .fixup, \"ax\"\n" + "3: movw $0x05EB, 1b\n" /* jmp on 5 bytes */ + " jmp 2b\n" + ".previous\n" + ".section __ex_table,\"a\"\n" + " .align 4\n" + " .long 1b, 3b\n" + ".previous" + : : "r" (from), "r" (to) : "memory"); + from+=64; + to+=64; + } + for(i=(4096-320)/64; i<4096/64; i++) + { + __asm__ __volatile__ ( + "2: movq (%0), %%mm0\n" + " movntq %%mm0, (%1)\n" + " movq 8(%0), %%mm1\n" + " movntq %%mm1, 8(%1)\n" + " movq 16(%0), %%mm2\n" + " movntq %%mm2, 16(%1)\n" + " movq 24(%0), %%mm3\n" + " movntq %%mm3, 24(%1)\n" + " movq 32(%0), %%mm4\n" + " movntq %%mm4, 32(%1)\n" + " movq 40(%0), %%mm5\n" + " movntq %%mm5, 40(%1)\n" + " movq 48(%0), %%mm6\n" + " movntq %%mm6, 48(%1)\n" + " movq 56(%0), %%mm7\n" + " movntq %%mm7, 56(%1)\n" + : : "r" (from), "r" (to) : "memory"); + from+=64; + to+=64; + } + /* since movntq is weakly-ordered, a "sfence" is needed to become + * ordered again. + */ + __asm__ __volatile__ ( + " sfence \n" : : + ); + kernel_fpu_end(); +} + +#else + +/* + * Generic MMX implementation without K7 specific streaming + */ + +static void fast_clear_page(void *page) +{ + int i; + + kernel_fpu_begin(); + + __asm__ __volatile__ ( + " pxor %%mm0, %%mm0\n" : : + ); + + for(i=0;i<4096/128;i++) + { + __asm__ __volatile__ ( + " movq %%mm0, (%0)\n" + " movq %%mm0, 8(%0)\n" + " movq %%mm0, 16(%0)\n" + " movq %%mm0, 24(%0)\n" + " movq %%mm0, 32(%0)\n" + " movq %%mm0, 40(%0)\n" + " movq %%mm0, 48(%0)\n" + " movq %%mm0, 56(%0)\n" + " movq %%mm0, 64(%0)\n" + " movq %%mm0, 72(%0)\n" + " movq %%mm0, 80(%0)\n" + " movq %%mm0, 88(%0)\n" + " movq %%mm0, 96(%0)\n" + " movq %%mm0, 104(%0)\n" + " movq %%mm0, 112(%0)\n" + " movq %%mm0, 120(%0)\n" + : : "r" (page) : "memory"); + page+=128; + } + + kernel_fpu_end(); +} + +static void fast_copy_page(void *to, void *from) +{ + int i; + + + kernel_fpu_begin(); + + __asm__ __volatile__ ( + "1: prefetch (%0)\n" + " prefetch 64(%0)\n" + " prefetch 128(%0)\n" + " prefetch 192(%0)\n" + " prefetch 256(%0)\n" + "2: \n" + ".section .fixup, \"ax\"\n" + "3: movw $0x1AEB, 1b\n" /* jmp on 26 bytes */ + " jmp 2b\n" + ".previous\n" + ".section __ex_table,\"a\"\n" + " .align 4\n" + " .long 1b, 3b\n" + ".previous" + : : "r" (from) ); + + for(i=0; i<4096/64; i++) + { + __asm__ __volatile__ ( + "1: prefetch 320(%0)\n" + "2: movq (%0), %%mm0\n" + " movq 8(%0), %%mm1\n" + " movq 16(%0), %%mm2\n" + " movq 24(%0), %%mm3\n" + " movq %%mm0, (%1)\n" + " movq %%mm1, 8(%1)\n" + " movq %%mm2, 16(%1)\n" + " movq %%mm3, 24(%1)\n" + " movq 32(%0), %%mm0\n" + " movq 40(%0), %%mm1\n" + " movq 48(%0), %%mm2\n" + " movq 56(%0), %%mm3\n" + " movq %%mm0, 32(%1)\n" + " movq %%mm1, 40(%1)\n" + " movq %%mm2, 48(%1)\n" + " movq %%mm3, 56(%1)\n" + ".section .fixup, \"ax\"\n" + "3: movw $0x05EB, 1b\n" /* jmp on 5 bytes */ + " jmp 2b\n" + ".previous\n" + ".section __ex_table,\"a\"\n" + " .align 4\n" + " .long 1b, 3b\n" + ".previous" + : : "r" (from), "r" (to) : "memory"); + from+=64; + to+=64; + } + kernel_fpu_end(); +} + + +#endif + +/* + * Favour MMX for page clear and copy. + */ + +static void slow_zero_page(void * page) +{ + int d0, d1; + __asm__ __volatile__( \ + "cld\n\t" \ + "rep ; stosl" \ + : "=&c" (d0), "=&D" (d1) + :"a" (0),"1" (page),"0" (1024) + :"memory"); +} + +void mmx_clear_page(void * page) +{ + if(in_interrupt()) + slow_zero_page(page); + else + fast_clear_page(page); +} + +static void slow_copy_page(void *to, void *from) +{ + int d0, d1, d2; + __asm__ __volatile__( \ + "cld\n\t" \ + "rep ; movsl" \ + : "=&c" (d0), "=&D" (d1), "=&S" (d2) \ + : "0" (1024),"1" ((long) to),"2" ((long) from) \ + : "memory"); +} + + +void mmx_copy_page(void *to, void *from) +{ + if(in_interrupt()) + slow_copy_page(to, from); + else + fast_copy_page(to, from); +} diff --git a/xenolinux-2.4.16-sparse/arch/xeno/lib/old-checksum.c b/xenolinux-2.4.16-sparse/arch/xeno/lib/old-checksum.c new file mode 100644 index 0000000000..ae3a38043a --- /dev/null +++ b/xenolinux-2.4.16-sparse/arch/xeno/lib/old-checksum.c @@ -0,0 +1,19 @@ +/* + * FIXME: old compatibility stuff, will be removed soon. + */ + +#include + +unsigned int csum_partial_copy( const char *src, char *dst, int len, int sum) +{ + int src_err=0, dst_err=0; + + sum = csum_partial_copy_generic ( src, dst, len, sum, &src_err, &dst_err); + + if (src_err || dst_err) + printk("old csum_partial_copy_fromuser(), tell mingo to convert me.\n"); + + return sum; +} + + diff --git a/xenolinux-2.4.16-sparse/arch/xeno/lib/strstr.c b/xenolinux-2.4.16-sparse/arch/xeno/lib/strstr.c new file mode 100644 index 0000000000..a3dafbf59d --- /dev/null +++ b/xenolinux-2.4.16-sparse/arch/xeno/lib/strstr.c @@ -0,0 +1,31 @@ +#include + +char * strstr(const char * cs,const char * ct) +{ +int d0, d1; +register char * __res; +__asm__ __volatile__( + "movl %6,%%edi\n\t" + "repne\n\t" + "scasb\n\t" + "notl %%ecx\n\t" + "decl %%ecx\n\t" /* NOTE! This also sets Z if searchstring='' */ + "movl %%ecx,%%edx\n" + "1:\tmovl %6,%%edi\n\t" + "movl %%esi,%%eax\n\t" + "movl %%edx,%%ecx\n\t" + "repe\n\t" + "cmpsb\n\t" + "je 2f\n\t" /* also works for empty string, see above */ + "xchgl %%eax,%%esi\n\t" + "incl %%esi\n\t" + "cmpb $0,-1(%%eax)\n\t" + "jne 1b\n\t" + "xorl %%eax,%%eax\n\t" + "2:" + :"=a" (__res), "=&c" (d0), "=&S" (d1) + :"0" (0), "1" (0xffffffff), "2" (cs), "g" (ct) + :"dx", "di"); +return __res; +} + diff --git a/xenolinux-2.4.16-sparse/arch/xeno/lib/usercopy.c b/xenolinux-2.4.16-sparse/arch/xeno/lib/usercopy.c new file mode 100644 index 0000000000..d81fa81af8 --- /dev/null +++ b/xenolinux-2.4.16-sparse/arch/xeno/lib/usercopy.c @@ -0,0 +1,190 @@ +/* + * User address space access functions. + * The non inlined parts of asm-i386/uaccess.h are here. + * + * Copyright 1997 Andi Kleen + * Copyright 1997 Linus Torvalds + */ +#include +#include +#include + +#ifdef CONFIG_X86_USE_3DNOW_AND_WORKS + +unsigned long +__generic_copy_to_user(void *to, const void *from, unsigned long n) +{ + if (access_ok(VERIFY_WRITE, to, n)) + { + if(n<512) + __copy_user(to,from,n); + else + mmx_copy_user(to,from,n); + } + return n; +} + +unsigned long +__generic_copy_from_user(void *to, const void *from, unsigned long n) +{ + if (access_ok(VERIFY_READ, from, n)) + { + if(n<512) + __copy_user_zeroing(to,from,n); + else + mmx_copy_user_zeroing(to, from, n); + } + else + memset(to, 0, n); + return n; +} + +#else + +unsigned long +__generic_copy_to_user(void *to, const void *from, unsigned long n) +{ + prefetch(from); + if (access_ok(VERIFY_WRITE, to, n)) + __copy_user(to,from,n); + return n; +} + +unsigned long +__generic_copy_from_user(void *to, const void *from, unsigned long n) +{ + prefetchw(to); + if (access_ok(VERIFY_READ, from, n)) + __copy_user_zeroing(to,from,n); + else + memset(to, 0, n); + return n; +} + +#endif + +/* + * Copy a null terminated string from userspace. + */ + +#define __do_strncpy_from_user(dst,src,count,res) \ +do { \ + int __d0, __d1, __d2; \ + __asm__ __volatile__( \ + " testl %1,%1\n" \ + " jz 2f\n" \ + "0: lodsb\n" \ + " stosb\n" \ + " testb %%al,%%al\n" \ + " jz 1f\n" \ + " decl %1\n" \ + " jnz 0b\n" \ + "1: subl %1,%0\n" \ + "2:\n" \ + ".section .fixup,\"ax\"\n" \ + "3: movl %5,%0\n" \ + " jmp 2b\n" \ + ".previous\n" \ + ".section __ex_table,\"a\"\n" \ + " .align 4\n" \ + " .long 0b,3b\n" \ + ".previous" \ + : "=d"(res), "=c"(count), "=&a" (__d0), "=&S" (__d1), \ + "=&D" (__d2) \ + : "i"(-EFAULT), "0"(count), "1"(count), "3"(src), "4"(dst) \ + : "memory"); \ +} while (0) + +long +__strncpy_from_user(char *dst, const char *src, long count) +{ + long res; + __do_strncpy_from_user(dst, src, count, res); + return res; +} + +long +strncpy_from_user(char *dst, const char *src, long count) +{ + long res = -EFAULT; + if (access_ok(VERIFY_READ, src, 1)) + __do_strncpy_from_user(dst, src, count, res); + return res; +} + + +/* + * Zero Userspace + */ + +#define __do_clear_user(addr,size) \ +do { \ + int __d0; \ + __asm__ __volatile__( \ + "0: rep; stosl\n" \ + " movl %2,%0\n" \ + "1: rep; stosb\n" \ + "2:\n" \ + ".section .fixup,\"ax\"\n" \ + "3: lea 0(%2,%0,4),%0\n" \ + " jmp 2b\n" \ + ".previous\n" \ + ".section __ex_table,\"a\"\n" \ + " .align 4\n" \ + " .long 0b,3b\n" \ + " .long 1b,2b\n" \ + ".previous" \ + : "=&c"(size), "=&D" (__d0) \ + : "r"(size & 3), "0"(size / 4), "1"(addr), "a"(0)); \ +} while (0) + +unsigned long +clear_user(void *to, unsigned long n) +{ + if (access_ok(VERIFY_WRITE, to, n)) + __do_clear_user(to, n); + return n; +} + +unsigned long +__clear_user(void *to, unsigned long n) +{ + __do_clear_user(to, n); + return n; +} + +/* + * Return the size of a string (including the ending 0) + * + * Return 0 on exception, a value greater than N if too long + */ + +long strnlen_user(const char *s, long n) +{ + unsigned long mask = -__addr_ok(s); + unsigned long res, tmp; + + __asm__ __volatile__( + " testl %0, %0\n" + " jz 3f\n" + " andl %0,%%ecx\n" + "0: repne; scasb\n" + " setne %%al\n" + " subl %%ecx,%0\n" + " addl %0,%%eax\n" + "1:\n" + ".section .fixup,\"ax\"\n" + "2: xorl %%eax,%%eax\n" + " jmp 1b\n" + "3: movb $1,%%al\n" + " jmp 1b\n" + ".previous\n" + ".section __ex_table,\"a\"\n" + " .align 4\n" + " .long 0b,2b\n" + ".previous" + :"=r" (n), "=D" (s), "=a" (res), "=c" (tmp) + :"0" (n), "1" (s), "2" (0), "3" (mask) + :"cc"); + return res & mask; +} diff --git a/xenolinux-2.4.16-sparse/arch/xeno/mm/Makefile b/xenolinux-2.4.16-sparse/arch/xeno/mm/Makefile new file mode 100644 index 0000000000..4628394d47 --- /dev/null +++ b/xenolinux-2.4.16-sparse/arch/xeno/mm/Makefile @@ -0,0 +1,6 @@ + +O_TARGET := mm.o + +obj-y := init.o fault.o extable.o hypervisor.o + +include $(TOPDIR)/Rules.make diff --git a/xenolinux-2.4.16-sparse/arch/xeno/mm/extable.c b/xenolinux-2.4.16-sparse/arch/xeno/mm/extable.c new file mode 100644 index 0000000000..4cd9f064c3 --- /dev/null +++ b/xenolinux-2.4.16-sparse/arch/xeno/mm/extable.c @@ -0,0 +1,62 @@ +/* + * linux/arch/i386/mm/extable.c + */ + +#include +#include +#include +#include + +extern const struct exception_table_entry __start___ex_table[]; +extern const struct exception_table_entry __stop___ex_table[]; + +static inline unsigned long +search_one_table(const struct exception_table_entry *first, + const struct exception_table_entry *last, + unsigned long value) +{ + while (first <= last) { + const struct exception_table_entry *mid; + long diff; + + mid = (last - first) / 2 + first; + diff = mid->insn - value; + if (diff == 0) + return mid->fixup; + else if (diff < 0) + first = mid+1; + else + last = mid-1; + } + return 0; +} + +extern spinlock_t modlist_lock; + +unsigned long +search_exception_table(unsigned long addr) +{ + unsigned long ret = 0; + +#ifndef CONFIG_MODULES + /* There is only the kernel to search. */ + ret = search_one_table(__start___ex_table, __stop___ex_table-1, addr); + return ret; +#else + unsigned long flags; + /* The kernel is the last "module" -- no need to treat it special. */ + struct module *mp; + + spin_lock_irqsave(&modlist_lock, flags); + for (mp = module_list; mp != NULL; mp = mp->next) { + if (mp->ex_table_start == NULL || !(mp->flags&(MOD_RUNNING|MOD_INITIALIZING))) + continue; + ret = search_one_table(mp->ex_table_start, + mp->ex_table_end - 1, addr); + if (ret) + break; + } + spin_unlock_irqrestore(&modlist_lock, flags); + return ret; +#endif +} diff --git a/xenolinux-2.4.16-sparse/arch/xeno/mm/fault.c b/xenolinux-2.4.16-sparse/arch/xeno/mm/fault.c new file mode 100644 index 0000000000..18e21674a9 --- /dev/null +++ b/xenolinux-2.4.16-sparse/arch/xeno/mm/fault.c @@ -0,0 +1,375 @@ +/* + * linux/arch/i386/mm/fault.c + * + * Copyright (C) 1995 Linus Torvalds + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include /* For unblank_screen() */ + +#include +#include +#include +#include + +extern void die(const char *,struct pt_regs *,long); + +extern int console_loglevel; + +pgd_t *cur_pgd; + +/* + * Ugly, ugly, but the goto's result in better assembly.. + */ +int __verify_write(const void * addr, unsigned long size) +{ + struct vm_area_struct * vma; + unsigned long start = (unsigned long) addr; + + if (!size) + return 1; + + vma = find_vma(current->mm, start); + if (!vma) + goto bad_area; + if (vma->vm_start > start) + goto check_stack; + +good_area: + if (!(vma->vm_flags & VM_WRITE)) + goto bad_area; + size--; + size += start & ~PAGE_MASK; + size >>= PAGE_SHIFT; + start &= PAGE_MASK; + + for (;;) { + survive: + { + int fault = handle_mm_fault(current->mm, vma, start, 1); + if (!fault) + goto bad_area; + if (fault < 0) + goto out_of_memory; + } + if (!size) + break; + size--; + start += PAGE_SIZE; + if (start < vma->vm_end) + continue; + vma = vma->vm_next; + if (!vma || vma->vm_start != start) + goto bad_area; + if (!(vma->vm_flags & VM_WRITE)) + goto bad_area;; + } + return 1; + +check_stack: + if (!(vma->vm_flags & VM_GROWSDOWN)) + goto bad_area; + if (expand_stack(vma, start) == 0) + goto good_area; + +bad_area: + return 0; + +out_of_memory: + if (current->pid == 1) { + current->policy |= SCHED_YIELD; + schedule(); + goto survive; + } + goto bad_area; +} + +extern spinlock_t timerlist_lock; + +/* + * Unlock any spinlocks which will prevent us from getting the + * message out (timerlist_lock is acquired through the + * console unblank code) + */ +void bust_spinlocks(int yes) +{ + spin_lock_init(&timerlist_lock); + if (yes) { + oops_in_progress = 1; + } else { + int loglevel_save = console_loglevel; +#ifdef CONFIG_VT + unblank_screen(); +#endif + oops_in_progress = 0; + /* + * OK, the message is on the console. Now we call printk() + * without oops_in_progress set so that printk will give klogd + * a poke. Hold onto your hats... + */ + console_loglevel = 15; /* NMI oopser may have shut the console up */ + printk(" "); + console_loglevel = loglevel_save; + } +} + +void do_BUG(const char *file, int line) +{ + bust_spinlocks(1); + printk("kernel BUG at %s:%d!\n", file, line); +} + +/* + * This routine handles page faults. It determines the address, + * and the problem, and then passes it off to one of the appropriate + * routines. + * + * error_code: + * bit 0 == 0 means no page found, 1 means protection fault + * bit 1 == 0 means read, 1 means write + * bit 2 == 0 means kernel, 1 means user-mode + */ +asmlinkage void do_page_fault(struct pt_regs *regs, + unsigned long error_code, + unsigned long address) +{ + struct task_struct *tsk = current; + struct mm_struct *mm; + struct vm_area_struct * vma; + unsigned long page; + unsigned long fixup; + int write; + siginfo_t info; + + /* Set the "privileged fault" bit to something sane. */ + error_code &= ~4; + error_code |= (regs->xcs & 2) << 1; + + /* + * We fault-in kernel-space virtual memory on-demand. The + * 'reference' page table is init_mm.pgd. + * + * NOTE! We MUST NOT take any locks for this case. We may + * be in an interrupt or a critical region, and should + * only copy the information from the master page table, + * nothing more. + * + * This verifies that the fault happens in kernel space + * (error_code & 4) == 0, and that the fault was not a + * protection error (error_code & 1) == 0. + */ + if (address >= TASK_SIZE && !(error_code & 5)) + goto vmalloc_fault; + + mm = tsk->mm; + info.si_code = SEGV_MAPERR; + + /* + * If we're in an interrupt or have no user + * context, we must not take the fault.. + */ + if (in_interrupt() || !mm) + goto no_context; + + down_read(&mm->mmap_sem); + + vma = find_vma(mm, address); + if (!vma) + goto bad_area; + if (vma->vm_start <= address) + goto good_area; + if (!(vma->vm_flags & VM_GROWSDOWN)) + goto bad_area; + if (error_code & 4) { + /* + * accessing the stack below %esp is always a bug. + * The "+ 32" is there due to some instructions (like + * pusha) doing post-decrement on the stack and that + * doesn't show up until later.. + */ + if (address + 32 < regs->esp) + goto bad_area; + } + if (expand_stack(vma, address)) + goto bad_area; +/* + * Ok, we have a good vm_area for this memory access, so + * we can handle it.. + */ +good_area: + info.si_code = SEGV_ACCERR; + write = 0; + switch (error_code & 3) { + default: /* 3: write, present */ + /* fall through */ + case 2: /* write, not present */ + if (!(vma->vm_flags & VM_WRITE)) + goto bad_area; + write++; + break; + case 1: /* read, present */ + goto bad_area; + case 0: /* read, not present */ + if (!(vma->vm_flags & (VM_READ | VM_EXEC))) + goto bad_area; + } + + survive: + /* + * If for any reason at all we couldn't handle the fault, + * make sure we exit gracefully rather than endlessly redo + * the fault. + */ + switch (handle_mm_fault(mm, vma, address, write)) { + case 1: + tsk->min_flt++; + break; + case 2: + tsk->maj_flt++; + break; + case 0: + goto do_sigbus; + default: + goto out_of_memory; + } + + up_read(&mm->mmap_sem); + return; + +/* + * Something tried to access memory that isn't in our memory map.. + * Fix it, but check if it's kernel or user first.. + */ +bad_area: + up_read(&mm->mmap_sem); + + /* User mode accesses just cause a SIGSEGV */ + if (error_code & 4) { + tsk->thread.cr2 = address; + tsk->thread.error_code = error_code; + tsk->thread.trap_no = 14; + info.si_signo = SIGSEGV; + info.si_errno = 0; + /* info.si_code has been set above */ + info.si_addr = (void *)address; + force_sig_info(SIGSEGV, &info, tsk); + return; + } + +no_context: + /* Are we prepared to handle this kernel fault? */ + if ((fixup = search_exception_table(regs->eip)) != 0) { + regs->eip = fixup; + return; + } + +/* + * Oops. The kernel tried to access some bad page. We'll have to + * terminate things with extreme prejudice. + */ + + bust_spinlocks(1); + + if (address < PAGE_SIZE) + printk(KERN_ALERT "Unable to handle kernel NULL pointer dereference"); + else + printk(KERN_ALERT "Unable to handle kernel paging request"); + printk(" at virtual address %08lx\n",address); + printk(" printing eip:\n"); + printk("%08lx\n", regs->eip); + page = ((unsigned long *) cur_pgd)[address >> 22]; + printk(KERN_ALERT "*pde = %08lx\n", page); + if (page & 1) { + page &= PAGE_MASK; + address &= 0x003ff000; + page = ((unsigned long *) __va(page))[address >> PAGE_SHIFT]; + printk(KERN_ALERT "*pte = %08lx\n", page); + } + die("Oops", regs, error_code); + bust_spinlocks(0); + do_exit(SIGKILL); + +/* + * We ran out of memory, or some other thing happened to us that made + * us unable to handle the page fault gracefully. + */ +out_of_memory: + up_read(&mm->mmap_sem); + if (tsk->pid == 1) { + tsk->policy |= SCHED_YIELD; + schedule(); + down_read(&mm->mmap_sem); + goto survive; + } + printk("VM: killing process %s\n", tsk->comm); + if (error_code & 4) + do_exit(SIGKILL); + goto no_context; + +do_sigbus: + up_read(&mm->mmap_sem); + + /* + * Send a sigbus, regardless of whether we were in kernel + * or user mode. + */ + tsk->thread.cr2 = address; + tsk->thread.error_code = error_code; + tsk->thread.trap_no = 14; + info.si_signo = SIGBUS; + info.si_errno = 0; + info.si_code = BUS_ADRERR; + info.si_addr = (void *)address; + force_sig_info(SIGBUS, &info, tsk); + + /* Kernel mode? Handle exceptions or die */ + if (!(error_code & 4)) + goto no_context; + return; + +vmalloc_fault: + { + /* + * Synchronize this task's top level page-table + * with the 'reference' page table. + * + * Do _not_ use "tsk" here. We might be inside + * an interrupt in the middle of a task switch.. + */ + int offset = __pgd_offset(address); + pgd_t *pgd, *pgd_k; + pmd_t *pmd, *pmd_k; + pte_t *pte_k; + + pgd = offset + cur_pgd; + pgd_k = init_mm.pgd + offset; + + if (!pgd_present(*pgd_k)) + goto no_context; + set_pgd(pgd, *pgd_k); + + pmd = pmd_offset(pgd, address); + pmd_k = pmd_offset(pgd_k, address); + if (!pmd_present(*pmd_k)) + goto no_context; + set_pmd(pmd, *pmd_k); + + pte_k = pte_offset(pmd_k, address); + if (!pte_present(*pte_k)) + goto no_context; + return; + } +} diff --git a/xenolinux-2.4.16-sparse/arch/xeno/mm/hypervisor.c b/xenolinux-2.4.16-sparse/arch/xeno/mm/hypervisor.c new file mode 100644 index 0000000000..8454c9b36a --- /dev/null +++ b/xenolinux-2.4.16-sparse/arch/xeno/mm/hypervisor.c @@ -0,0 +1,58 @@ +/****************************************************************************** + * xeno/mm/hypervisor.c + * + * Update page tables via the hypervisor. + * + * Copyright (c) 2002, K A Fraser + */ + +#include +#include +#include +#include + +#define QUEUE_SIZE 1 +static page_update_request_t update_queue[QUEUE_SIZE]; + +void queue_l1_entry_update(unsigned long ptr, unsigned long val) +{ + update_queue[0].ptr = ptr + start_info.phys_base; + update_queue[0].val = val; + flush_page_update_queue(); +} + +void queue_l2_entry_update(unsigned long ptr, unsigned long val) +{ + update_queue[0].ptr = ptr + start_info.phys_base; + update_queue[0].val = val; + flush_page_update_queue(); +} + +void queue_baseptr_create(unsigned long ptr) +{ + update_queue[0].ptr = PGREQ_ADD_BASEPTR; + update_queue[0].val = ptr + start_info.phys_base; + flush_page_update_queue(); +} + +void queue_baseptr_remove(unsigned long ptr) +{ + update_queue[0].ptr = PGREQ_REMOVE_BASEPTR; + update_queue[0].val = ptr + start_info.phys_base; + flush_page_update_queue(); +} + +void queue_tlb_flush(void) +{ + /* nothing */ +} + +void queue_tlb_flush_one(unsigned long ptr) +{ + /* nothing */ +} + +void flush_page_update_queue(void) +{ + HYPERVISOR_pt_update(update_queue, 1); +} diff --git a/xenolinux-2.4.16-sparse/arch/xeno/mm/init.c b/xenolinux-2.4.16-sparse/arch/xeno/mm/init.c new file mode 100644 index 0000000000..022a511cdd --- /dev/null +++ b/xenolinux-2.4.16-sparse/arch/xeno/mm/init.c @@ -0,0 +1,368 @@ +/* + * linux/arch/i386/mm/init.c + * + * Copyright (C) 1995 Linus Torvalds + * + * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999 + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef CONFIG_BLK_DEV_INITRD +#include +#endif +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +mmu_gather_t mmu_gathers[NR_CPUS]; +unsigned long highstart_pfn, highend_pfn; +static unsigned long totalram_pages; +static unsigned long totalhigh_pages; + +int do_check_pgt_cache(int low, int high) +{ + int freed = 0; + if(pgtable_cache_size > high) { + do { + if (pgd_quicklist) { + free_pgd_slow(get_pgd_fast()); + freed++; + } + if (pmd_quicklist) { + pmd_free_slow(pmd_alloc_one_fast(NULL, 0)); + freed++; + } + if (pte_quicklist) { + pte_free_slow(pte_alloc_one_fast(NULL, 0)); + freed++; + } + } while(pgtable_cache_size > low); + } + return freed; +} + +/* + * NOTE: pagetable_init alloc all the fixmap pagetables contiguous on the + * physical space so we can cache the place of the first one and move + * around without checking the pgd every time. + */ + +#if CONFIG_HIGHMEM +pte_t *kmap_pte; +pgprot_t kmap_prot; + +#define kmap_get_fixmap_pte(vaddr) \ + pte_offset(pmd_offset(pgd_offset_k(vaddr), (vaddr)), (vaddr)) + +void __init kmap_init(void) +{ + unsigned long kmap_vstart; + + /* cache the first kmap pte */ + kmap_vstart = __fix_to_virt(FIX_KMAP_BEGIN); + kmap_pte = kmap_get_fixmap_pte(kmap_vstart); + + kmap_prot = PAGE_KERNEL; +} +#endif /* CONFIG_HIGHMEM */ + +void show_mem(void) +{ + int i, total = 0, reserved = 0; + int shared = 0, cached = 0; + int highmem = 0; + + printk("Mem-info:\n"); + show_free_areas(); + printk("Free swap: %6dkB\n",nr_swap_pages<<(PAGE_SHIFT-10)); + i = max_mapnr; + while (i-- > 0) { + total++; + if (PageHighMem(mem_map+i)) + highmem++; + if (PageReserved(mem_map+i)) + reserved++; + else if (PageSwapCache(mem_map+i)) + cached++; + else if (page_count(mem_map+i)) + shared += page_count(mem_map+i) - 1; + } + printk("%d pages of RAM\n", total); + printk("%d pages of HIGHMEM\n",highmem); + printk("%d reserved pages\n",reserved); + printk("%d pages shared\n",shared); + printk("%d pages swap cached\n",cached); + printk("%ld pages in page table cache\n",pgtable_cache_size); + show_buffers(); +} + +/* References to section boundaries */ + +extern char _text, _etext, _edata, __bss_start, _end; +extern char __init_begin, __init_end; + +static inline void set_pte_phys (unsigned long vaddr, + unsigned long phys, pgprot_t flags) +{ + pgprot_t prot; + pgd_t *pgd; + pmd_t *pmd; + pte_t *pte; + + pgd = init_mm.pgd + __pgd_offset(vaddr); + if (pgd_none(*pgd)) { + printk("PAE BUG #00!\n"); + return; + } + pmd = pmd_offset(pgd, vaddr); + if (pmd_none(*pmd)) { + printk("PAE BUG #01!\n"); + return; + } + pte = pte_offset(pmd, vaddr); + if (pte_val(*pte)) + pte_ERROR(*pte); + pgprot_val(prot) = pgprot_val(PAGE_KERNEL) | pgprot_val(flags); + set_pte(pte, mk_pte_phys(phys, prot)); + + /* + * It's enough to flush this one mapping. + * (PGE mappings get flushed as well) + */ + __flush_tlb_one(vaddr); +} + +void __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t flags) +{ + unsigned long address = __fix_to_virt(idx); + + if (idx >= __end_of_fixed_addresses) { + printk("Invalid __set_fixmap\n"); + return; + } + set_pte_phys(address, phys, flags); +} + +#if 0 +static void __init fixrange_init (unsigned long start, unsigned long end, pgd_t *pgd_base) +{ + pgd_t *pgd; + pmd_t *pmd; + pte_t *pte; + int i, j; + unsigned long vaddr; + + vaddr = start; + i = __pgd_offset(vaddr); + j = __pmd_offset(vaddr); + pgd = pgd_base + i; + + for ( ; (i < PTRS_PER_PGD) && (vaddr != end); pgd++, i++) { +#if CONFIG_X86_PAE + if (pgd_none(*pgd)) { + pmd = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE); + set_pgd(pgd, __pgd(__pa(pmd) + 0x1)); + if (pmd != pmd_offset(pgd, 0)) + printk("PAE BUG #02!\n"); + } + pmd = pmd_offset(pgd, vaddr); +#else + pmd = (pmd_t *)pgd; +#endif + for (; (j < PTRS_PER_PMD) && (vaddr != end); pmd++, j++) { + if (pmd_none(*pmd)) { + pte = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE); + set_pmd(pmd, __pmd(_KERNPG_TABLE + __pa(pte))); + if (pte != pte_offset(pmd, 0)) + BUG(); + } + vaddr += PMD_SIZE; + } + j = 0; + } +} +#endif + +static void __init pagetable_init (void) +{ +#if 0 + vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK; + fixrange_init(vaddr, 0, pgd_base); +#endif + +#if CONFIG_HIGHMEM + /* + * Permanent kmaps: + */ + vaddr = PKMAP_BASE; + fixrange_init(vaddr, vaddr + PAGE_SIZE*LAST_PKMAP, pgd_base); + + pgd = init_mm.pgd + __pgd_offset(vaddr); + pmd = pmd_offset(pgd, vaddr); + pte = pte_offset(pmd, vaddr); + pkmap_page_table = pte; +#endif +} + +/* + * paging_init() sets up the page tables - note that the first 8MB are + * already mapped by head.S. + * + * This routines also unmaps the page at virtual kernel address 0, so + * that we can trap those pesky NULL-reference errors in the kernel. + */ +void __init paging_init(void) +{ + pagetable_init(); + +#ifdef CONFIG_HIGHMEM + kmap_init(); +#endif + { + unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0}; + unsigned int max_dma, high, low; + + max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT; + low = max_low_pfn; + high = highend_pfn; + + if (low < max_dma) + zones_size[ZONE_DMA] = low; + else { + zones_size[ZONE_DMA] = max_dma; + zones_size[ZONE_NORMAL] = low - max_dma; +#ifdef CONFIG_HIGHMEM + zones_size[ZONE_HIGHMEM] = high - low; +#endif + } + free_area_init(zones_size); + } + return; +} + + +static inline int page_is_ram (unsigned long pagenr) +{ + return 1; +} + +void __init mem_init(void) +{ + int codesize, reservedpages, datasize, initsize; + int tmp; + +#ifdef CONFIG_HIGHMEM + highmem_start_page = mem_map + highstart_pfn; + max_mapnr = num_physpages = highend_pfn; +#else + max_mapnr = num_physpages = max_low_pfn; +#endif + high_memory = (void *) __va(max_low_pfn * PAGE_SIZE); + + /* clear the zero-page */ + memset(empty_zero_page, 0, PAGE_SIZE); + + /* this will put all low memory onto the freelists */ + totalram_pages += free_all_bootmem(); + + reservedpages = 0; + for (tmp = 0; tmp < max_low_pfn; tmp++) + /* + * Only count reserved RAM pages + */ + if (page_is_ram(tmp) && PageReserved(mem_map+tmp)) + reservedpages++; +#ifdef CONFIG_HIGHMEM + for (tmp = highstart_pfn; tmp < highend_pfn; tmp++) { + struct page *page = mem_map + tmp; + + if (!page_is_ram(tmp)) { + SetPageReserved(page); + continue; + } + ClearPageReserved(page); + set_bit(PG_highmem, &page->flags); + atomic_set(&page->count, 1); + __free_page(page); + totalhigh_pages++; + } + totalram_pages += totalhigh_pages; +#endif + codesize = (unsigned long) &_etext - (unsigned long) &_text; + datasize = (unsigned long) &_edata - (unsigned long) &_etext; + initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin; + + printk("Memory: %luk/%luk available (%dk kernel code, %dk reserved, %dk data, %dk init, %ldk highmem)\n", + (unsigned long) nr_free_pages() << (PAGE_SHIFT-10), + max_mapnr << (PAGE_SHIFT-10), + codesize >> 10, + reservedpages << (PAGE_SHIFT-10), + datasize >> 10, + initsize >> 10, + (unsigned long) (totalhigh_pages << (PAGE_SHIFT-10)) + ); + + boot_cpu_data.wp_works_ok = 1; +} + +void free_initmem(void) +{ + unsigned long addr; + + addr = (unsigned long)(&__init_begin); + for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) { + ClearPageReserved(virt_to_page(addr)); + set_page_count(virt_to_page(addr), 1); + free_page(addr); + totalram_pages++; + } + printk ("Freeing unused kernel memory: %dk freed\n", (&__init_end - &__init_begin) >> 10); +} + +#ifdef CONFIG_BLK_DEV_INITRD +void free_initrd_mem(unsigned long start, unsigned long end) +{ + if (start < end) + printk ("Freeing initrd memory: %ldk freed\n", (end - start) >> 10); + for (; start < end; start += PAGE_SIZE) { + ClearPageReserved(virt_to_page(start)); + set_page_count(virt_to_page(start), 1); + free_page(start); + totalram_pages++; + } +} +#endif + +void si_meminfo(struct sysinfo *val) +{ + val->totalram = totalram_pages; + val->sharedram = 0; + val->freeram = nr_free_pages(); + val->bufferram = atomic_read(&buffermem_pages); + val->totalhigh = totalhigh_pages; + val->freehigh = nr_free_highpages(); + val->mem_unit = PAGE_SIZE; + return; +} diff --git a/xenolinux-2.4.16-sparse/arch/xeno/vmlinux.lds b/xenolinux-2.4.16-sparse/arch/xeno/vmlinux.lds new file mode 100644 index 0000000000..26a59aa642 --- /dev/null +++ b/xenolinux-2.4.16-sparse/arch/xeno/vmlinux.lds @@ -0,0 +1,87 @@ +/* ld script to make i386 Linux kernel + * Written by Martin Mares ; + */ +OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386") +OUTPUT_ARCH(i386) +ENTRY(_start) +SECTIONS +{ + . = 0xC0000000 + 0x000000; + _text = .; /* Text and read-only data */ + .text : { + *(.text) + *(.fixup) + *(.gnu.warning) + } = 0x9090 + .text.lock : { *(.text.lock) } /* out-of-line lock text */ + + _etext = .; /* End of text section */ + + .rodata : { *(.rodata) *(.rodata.*) } + .kstrtab : { *(.kstrtab) } + + . = ALIGN(16); /* Exception table */ + __start___ex_table = .; + __ex_table : { *(__ex_table) } + __stop___ex_table = .; + + __start___ksymtab = .; /* Kernel symbol table */ + __ksymtab : { *(__ksymtab) } + __stop___ksymtab = .; + + __start___kallsyms = .; /* All kernel symbols */ + __kallsyms : { *(__kallsyms) } + __stop___kallsyms = .; + + .data : { /* Data */ + *(.data) + CONSTRUCTORS + } + + _edata = .; /* End of data section */ + + . = ALIGN(8192); /* init_task */ + .data.init_task : { *(.data.init_task) } + + . = ALIGN(4096); /* Init code and data */ + __init_begin = .; + .text.init : { *(.text.init) } + .data.init : { *(.data.init) } + . = ALIGN(16); + __setup_start = .; + .setup.init : { *(.setup.init) } + __setup_end = .; + __initcall_start = .; + .initcall.init : { *(.initcall.init) } + __initcall_end = .; + . = ALIGN(4096); + __init_end = .; + + . = ALIGN(4096); + .data.page_aligned : { *(.data.idt) } + + . = ALIGN(32); + .data.cacheline_aligned : { *(.data.cacheline_aligned) } + + __bss_start = .; /* BSS */ + .bss : { + *(.bss) + } + _end = . ; + + /* Sections to be discarded */ + /DISCARD/ : { + *(.text.exit) + *(.data.exit) + *(.exitcall.exit) + } + + /* Stabs debugging sections. */ + .stab 0 : { *(.stab) } + .stabstr 0 : { *(.stabstr) } + .stab.excl 0 : { *(.stab.excl) } + .stab.exclstr 0 : { *(.stab.exclstr) } + .stab.index 0 : { *(.stab.index) } + .stab.indexstr 0 : { *(.stab.indexstr) } + .comment 0 : { *(.comment) } +} diff --git a/xenolinux-2.4.16-sparse/drivers/block/ll_rw_blk.c b/xenolinux-2.4.16-sparse/drivers/block/ll_rw_blk.c new file mode 100644 index 0000000000..a9e973d05f --- /dev/null +++ b/xenolinux-2.4.16-sparse/drivers/block/ll_rw_blk.c @@ -0,0 +1,1243 @@ +/* + * linux/drivers/block/ll_rw_blk.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + * Copyright (C) 1994, Karl Keyte: Added support for disk statistics + * Elevator latency, (C) 2000 Andrea Arcangeli SuSE + * Queue request tables / lock, selectable elevator, Jens Axboe + * kernel-doc documentation started by NeilBrown - July2000 + */ + +/* + * This handles all read/write requests to block devices + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +/* + * MAC Floppy IWM hooks + */ + +#ifdef CONFIG_MAC_FLOPPY_IWM +extern int mac_floppy_init(void); +#endif + +/* + * For the allocated request tables + */ +static kmem_cache_t *request_cachep; + +/* + * The "disk" task queue is used to start the actual requests + * after a plug + */ +DECLARE_TASK_QUEUE(tq_disk); + +/* + * Protect the request list against multiple users.. + * + * With this spinlock the Linux block IO subsystem is 100% SMP threaded + * from the IRQ event side, and almost 100% SMP threaded from the syscall + * side (we still have protect against block device array operations, and + * the do_request() side is casually still unsafe. The kernel lock protects + * this part currently.). + * + * there is a fair chance that things will work just OK if these functions + * are called with no global kernel lock held ... + */ +spinlock_t io_request_lock = SPIN_LOCK_UNLOCKED; + +/* This specifies how many sectors to read ahead on the disk. */ + +int read_ahead[MAX_BLKDEV]; + +/* blk_dev_struct is: + * *request_fn + * *current_request + */ +struct blk_dev_struct blk_dev[MAX_BLKDEV]; /* initialized by blk_dev_init() */ + +/* + * blk_size contains the size of all block-devices in units of 1024 byte + * sectors: + * + * blk_size[MAJOR][MINOR] + * + * if (!blk_size[MAJOR]) then no minor size checking is done. + */ +int * blk_size[MAX_BLKDEV]; + +/* + * blksize_size contains the size of all block-devices: + * + * blksize_size[MAJOR][MINOR] + * + * if (!blksize_size[MAJOR]) then 1024 bytes is assumed. + */ +int * blksize_size[MAX_BLKDEV]; + +/* + * hardsect_size contains the size of the hardware sector of a device. + * + * hardsect_size[MAJOR][MINOR] + * + * if (!hardsect_size[MAJOR]) + * then 512 bytes is assumed. + * else + * sector_size is hardsect_size[MAJOR][MINOR] + * This is currently set by some scsi devices and read by the msdos fs driver. + * Other uses may appear later. + */ +int * hardsect_size[MAX_BLKDEV]; + +/* + * The following tunes the read-ahead algorithm in mm/filemap.c + */ +int * max_readahead[MAX_BLKDEV]; + +/* + * Max number of sectors per request + */ +int * max_sectors[MAX_BLKDEV]; + +/* + * How many reqeusts do we allocate per queue, + * and how many do we "batch" on freeing them? + */ +static int queue_nr_requests, batch_requests; + +static inline int get_max_sectors(kdev_t dev) +{ + if (!max_sectors[MAJOR(dev)]) + return MAX_SECTORS; + return max_sectors[MAJOR(dev)][MINOR(dev)]; +} + +inline request_queue_t *blk_get_queue(kdev_t dev) +{ + struct blk_dev_struct *bdev = blk_dev + MAJOR(dev); + + if (bdev->queue) + return bdev->queue(dev); + else + return &blk_dev[MAJOR(dev)].request_queue; +} + +static int __blk_cleanup_queue(struct request_list *list) +{ + struct list_head *head = &list->free; + struct request *rq; + int i = 0; + + while (!list_empty(head)) { + rq = list_entry(head->next, struct request, queue); + list_del(&rq->queue); + kmem_cache_free(request_cachep, rq); + i++; + }; + + if (i != list->count) + printk("request list leak!\n"); + + list->count = 0; + return i; +} + +/** + * blk_cleanup_queue: - release a &request_queue_t when it is no longer needed + * @q: the request queue to be released + * + * Description: + * blk_cleanup_queue is the pair to blk_init_queue(). It should + * be called when a request queue is being released; typically + * when a block device is being de-registered. Currently, its + * primary task it to free all the &struct request structures that + * were allocated to the queue. + * Caveat: + * Hopefully the low level driver will have finished any + * outstanding requests first... + **/ +void blk_cleanup_queue(request_queue_t * q) +{ + int count = queue_nr_requests; + + count -= __blk_cleanup_queue(&q->rq[READ]); + count -= __blk_cleanup_queue(&q->rq[WRITE]); + + if (count) + printk("blk_cleanup_queue: leaked requests (%d)\n", count); + + memset(q, 0, sizeof(*q)); +} + +/** + * blk_queue_headactive - indicate whether head of request queue may be active + * @q: The queue which this applies to. + * @active: A flag indication where the head of the queue is active. + * + * Description: + * The driver for a block device may choose to leave the currently active + * request on the request queue, removing it only when it has completed. + * The queue handling routines assume this by default for safety reasons + * and will not involve the head of the request queue in any merging or + * reordering of requests when the queue is unplugged (and thus may be + * working on this particular request). + * + * If a driver removes requests from the queue before processing them, then + * it may indicate that it does so, there by allowing the head of the queue + * to be involved in merging and reordering. This is done be calling + * blk_queue_headactive() with an @active flag of %0. + * + * If a driver processes several requests at once, it must remove them (or + * at least all but one of them) from the request queue. + * + * When a queue is plugged the head will be assumed to be inactive. + **/ + +void blk_queue_headactive(request_queue_t * q, int active) +{ + q->head_active = active; +} + +/** + * blk_queue_make_request - define an alternate make_request function for a device + * @q: the request queue for the device to be affected + * @mfn: the alternate make_request function + * + * Description: + * The normal way for &struct buffer_heads to be passed to a device + * driver is for them to be collected into requests on a request + * queue, and then to allow the device driver to select requests + * off that queue when it is ready. This works well for many block + * devices. However some block devices (typically virtual devices + * such as md or lvm) do not benefit from the processing on the + * request queue, and are served best by having the requests passed + * directly to them. This can be achieved by providing a function + * to blk_queue_make_request(). + * + * Caveat: + * The driver that does this *must* be able to deal appropriately + * with buffers in "highmemory", either by calling bh_kmap() to get + * a kernel mapping, to by calling create_bounce() to create a + * buffer in normal memory. + **/ + +void blk_queue_make_request(request_queue_t * q, make_request_fn * mfn) +{ + q->make_request_fn = mfn; +} + +static inline int ll_new_segment(request_queue_t *q, struct request *req, int max_segments) +{ + if (req->nr_segments < max_segments) { + req->nr_segments++; + return 1; + } + return 0; +} + +static int ll_back_merge_fn(request_queue_t *q, struct request *req, + struct buffer_head *bh, int max_segments) +{ + if (req->bhtail->b_data + req->bhtail->b_size == bh->b_data) + return 1; + return ll_new_segment(q, req, max_segments); +} + +static int ll_front_merge_fn(request_queue_t *q, struct request *req, + struct buffer_head *bh, int max_segments) +{ + if (bh->b_data + bh->b_size == req->bh->b_data) + return 1; + return ll_new_segment(q, req, max_segments); +} + +static int ll_merge_requests_fn(request_queue_t *q, struct request *req, + struct request *next, int max_segments) +{ + int total_segments = req->nr_segments + next->nr_segments; + + if (req->bhtail->b_data + req->bhtail->b_size == next->bh->b_data) + total_segments--; + + if (total_segments > max_segments) + return 0; + + req->nr_segments = total_segments; + return 1; +} + +/* + * "plug" the device if there are no outstanding requests: this will + * force the transfer to start only after we have put all the requests + * on the list. + * + * This is called with interrupts off and no requests on the queue. + * (and with the request spinlock acquired) + */ +static void generic_plug_device(request_queue_t *q, kdev_t dev) +{ + /* + * no need to replug device + */ + if (!list_empty(&q->queue_head) || q->plugged) + return; + + q->plugged = 1; + queue_task(&q->plug_tq, &tq_disk); +} + +/* + * remove the plug and let it rip.. + */ +static inline void __generic_unplug_device(request_queue_t *q) +{ + if (q->plugged) { + q->plugged = 0; + if (!list_empty(&q->queue_head)) + q->request_fn(q); + } +} + +void generic_unplug_device(void *data) +{ + request_queue_t *q = (request_queue_t *) data; + unsigned long flags; + + spin_lock_irqsave(&io_request_lock, flags); + __generic_unplug_device(q); + spin_unlock_irqrestore(&io_request_lock, flags); +} + +static void blk_init_free_list(request_queue_t *q) +{ + struct request *rq; + int i; + + INIT_LIST_HEAD(&q->rq[READ].free); + INIT_LIST_HEAD(&q->rq[WRITE].free); + q->rq[READ].count = 0; + q->rq[WRITE].count = 0; + + /* + * Divide requests in half between read and write + */ + for (i = 0; i < queue_nr_requests; i++) { + rq = kmem_cache_alloc(request_cachep, SLAB_KERNEL); + if (rq == NULL) { + /* We'll get a `leaked requests' message from blk_cleanup_queue */ + printk(KERN_EMERG "blk_init_free_list: error allocating requests\n"); + break; + } + memset(rq, 0, sizeof(struct request)); + rq->rq_status = RQ_INACTIVE; + list_add(&rq->queue, &q->rq[i&1].free); + q->rq[i&1].count++; + } + + init_waitqueue_head(&q->wait_for_request); + spin_lock_init(&q->queue_lock); +} + +static int __make_request(request_queue_t * q, int rw, struct buffer_head * bh); + +/** + * blk_init_queue - prepare a request queue for use with a block device + * @q: The &request_queue_t to be initialised + * @rfn: The function to be called to process requests that have been + * placed on the queue. + * + * Description: + * If a block device wishes to use the standard request handling procedures, + * which sorts requests and coalesces adjacent requests, then it must + * call blk_init_queue(). The function @rfn will be called when there + * are requests on the queue that need to be processed. If the device + * supports plugging, then @rfn may not be called immediately when requests + * are available on the queue, but may be called at some time later instead. + * Plugged queues are generally unplugged when a buffer belonging to one + * of the requests on the queue is needed, or due to memory pressure. + * + * @rfn is not required, or even expected, to remove all requests off the + * queue, but only as many as it can handle at a time. If it does leave + * requests on the queue, it is responsible for arranging that the requests + * get dealt with eventually. + * + * A global spin lock $io_request_lock must be held while manipulating the + * requests on the request queue. + * + * The request on the head of the queue is by default assumed to be + * potentially active, and it is not considered for re-ordering or merging + * whenever the given queue is unplugged. This behaviour can be changed with + * blk_queue_headactive(). + * + * Note: + * blk_init_queue() must be paired with a blk_cleanup_queue() call + * when the block device is deactivated (such as at module unload). + **/ +void blk_init_queue(request_queue_t * q, request_fn_proc * rfn) +{ + INIT_LIST_HEAD(&q->queue_head); + elevator_init(&q->elevator, ELEVATOR_LINUS); + blk_init_free_list(q); + q->request_fn = rfn; + q->back_merge_fn = ll_back_merge_fn; + q->front_merge_fn = ll_front_merge_fn; + q->merge_requests_fn = ll_merge_requests_fn; + q->make_request_fn = __make_request; + q->plug_tq.sync = 0; + q->plug_tq.routine = &generic_unplug_device; + q->plug_tq.data = q; + q->plugged = 0; + /* + * These booleans describe the queue properties. We set the + * default (and most common) values here. Other drivers can + * use the appropriate functions to alter the queue properties. + * as appropriate. + */ + q->plug_device_fn = generic_plug_device; + q->head_active = 1; +} + +#define blkdev_free_rq(list) list_entry((list)->next, struct request, queue); +/* + * Get a free request. io_request_lock must be held and interrupts + * disabled on the way in. + */ +static inline struct request *get_request(request_queue_t *q, int rw) +{ + struct request *rq = NULL; + struct request_list *rl = q->rq + rw; + + if (!list_empty(&rl->free)) { + rq = blkdev_free_rq(&rl->free); + list_del(&rq->queue); + rl->count--; + rq->rq_status = RQ_ACTIVE; + rq->special = NULL; + rq->q = q; + } + + return rq; +} + +/* + * No available requests for this queue, unplug the device. + */ +static struct request *__get_request_wait(request_queue_t *q, int rw) +{ + register struct request *rq; + DECLARE_WAITQUEUE(wait, current); + + generic_unplug_device(q); + add_wait_queue(&q->wait_for_request, &wait); + do { + set_current_state(TASK_UNINTERRUPTIBLE); + if (q->rq[rw].count < batch_requests) + schedule(); + spin_lock_irq(&io_request_lock); + rq = get_request(q,rw); + spin_unlock_irq(&io_request_lock); + } while (rq == NULL); + remove_wait_queue(&q->wait_for_request, &wait); + current->state = TASK_RUNNING; + return rq; +} + +static inline struct request *get_request_wait(request_queue_t *q, int rw) +{ + register struct request *rq; + + spin_lock_irq(&io_request_lock); + rq = get_request(q, rw); + spin_unlock_irq(&io_request_lock); + if (rq) + return rq; + return __get_request_wait(q, rw); +} + +/* RO fail safe mechanism */ + +static long ro_bits[MAX_BLKDEV][8]; + +int is_read_only(kdev_t dev) +{ + int minor,major; + + major = MAJOR(dev); + minor = MINOR(dev); + if (major < 0 || major >= MAX_BLKDEV) return 0; + return ro_bits[major][minor >> 5] & (1 << (minor & 31)); +} + +void set_device_ro(kdev_t dev,int flag) +{ + int minor,major; + + major = MAJOR(dev); + minor = MINOR(dev); + if (major < 0 || major >= MAX_BLKDEV) return; + if (flag) ro_bits[major][minor >> 5] |= 1 << (minor & 31); + else ro_bits[major][minor >> 5] &= ~(1 << (minor & 31)); +} + +inline void drive_stat_acct (kdev_t dev, int rw, + unsigned long nr_sectors, int new_io) +{ + unsigned int major = MAJOR(dev); + unsigned int index; + + index = disk_index(dev); + if ((index >= DK_MAX_DISK) || (major >= DK_MAX_MAJOR)) + return; + + kstat.dk_drive[major][index] += new_io; + if (rw == READ) { + kstat.dk_drive_rio[major][index] += new_io; + kstat.dk_drive_rblk[major][index] += nr_sectors; + } else if (rw == WRITE) { + kstat.dk_drive_wio[major][index] += new_io; + kstat.dk_drive_wblk[major][index] += nr_sectors; + } else + printk(KERN_ERR "drive_stat_acct: cmd not R/W?\n"); +} + +/* + * add-request adds a request to the linked list. + * io_request_lock is held and interrupts disabled, as we muck with the + * request queue list. + * + * By this point, req->cmd is always either READ/WRITE, never READA, + * which is important for drive_stat_acct() above. + */ +static inline void add_request(request_queue_t * q, struct request * req, + struct list_head *insert_here) +{ + drive_stat_acct(req->rq_dev, req->cmd, req->nr_sectors, 1); + + if (!q->plugged && q->head_active && insert_here == &q->queue_head) { + spin_unlock_irq(&io_request_lock); + BUG(); + } + + /* + * elevator indicated where it wants this request to be + * inserted at elevator_merge time + */ + list_add(&req->queue, insert_here); +} + +/* + * Must be called with io_request_lock held and interrupts disabled + */ +inline void blkdev_release_request(struct request *req) +{ + request_queue_t *q = req->q; + int rw = req->cmd; + + req->rq_status = RQ_INACTIVE; + req->q = NULL; + + /* + * Request may not have originated from ll_rw_blk. if not, + * assume it has free buffers and check waiters + */ + if (q) { + list_add(&req->queue, &q->rq[rw].free); + if (++q->rq[rw].count >= batch_requests && waitqueue_active(&q->wait_for_request)) + wake_up(&q->wait_for_request); + } +} + +/* + * Has to be called with the request spinlock acquired + */ +static void attempt_merge(request_queue_t * q, + struct request *req, + int max_sectors, + int max_segments) +{ + struct request *next; + + next = blkdev_next_request(req); + if (req->sector + req->nr_sectors != next->sector) + return; + if (req->cmd != next->cmd + || req->rq_dev != next->rq_dev + || req->nr_sectors + next->nr_sectors > max_sectors + || next->waiting) + return; + /* + * If we are not allowed to merge these requests, then + * return. If we are allowed to merge, then the count + * will have been updated to the appropriate number, + * and we shouldn't do it here too. + */ + if (!q->merge_requests_fn(q, req, next, max_segments)) + return; + + q->elevator.elevator_merge_req_fn(req, next); + req->bhtail->b_reqnext = next->bh; + req->bhtail = next->bhtail; + req->nr_sectors = req->hard_nr_sectors += next->hard_nr_sectors; + list_del(&next->queue); + blkdev_release_request(next); +} + +static inline void attempt_back_merge(request_queue_t * q, + struct request *req, + int max_sectors, + int max_segments) +{ + if (&req->queue == q->queue_head.prev) + return; + attempt_merge(q, req, max_sectors, max_segments); +} + +static inline void attempt_front_merge(request_queue_t * q, + struct list_head * head, + struct request *req, + int max_sectors, + int max_segments) +{ + struct list_head * prev; + + prev = req->queue.prev; + if (head == prev) + return; + attempt_merge(q, blkdev_entry_to_request(prev), max_sectors, max_segments); +} + +static int __make_request(request_queue_t * q, int rw, + struct buffer_head * bh) +{ + unsigned int sector, count; + int max_segments = MAX_SEGMENTS; + struct request * req, *freereq = NULL; + int rw_ahead, max_sectors, el_ret; + struct list_head *head, *insert_here; + int latency; + elevator_t *elevator = &q->elevator; + + count = bh->b_size >> 9; + sector = bh->b_rsector; + + rw_ahead = 0; /* normal case; gets changed below for READA */ + switch (rw) { + case READA: + rw_ahead = 1; + rw = READ; /* drop into READ */ + case READ: + case WRITE: + latency = elevator_request_latency(elevator, rw); + break; + default: + BUG(); + goto end_io; + } + + /* We'd better have a real physical mapping! + Check this bit only if the buffer was dirty and just locked + down by us so at this point flushpage will block and + won't clear the mapped bit under us. */ + if (!buffer_mapped(bh)) + BUG(); + + /* + * Temporary solution - in 2.5 this will be done by the lowlevel + * driver. Create a bounce buffer if the buffer data points into + * high memory - keep the original buffer otherwise. + */ +#if CONFIG_HIGHMEM + bh = create_bounce(rw, bh); +#endif + +/* look for a free request. */ + /* + * Try to coalesce the new request with old requests + */ + max_sectors = get_max_sectors(bh->b_rdev); + +again: + req = NULL; + head = &q->queue_head; + /* + * Now we acquire the request spinlock, we have to be mega careful + * not to schedule or do something nonatomic + */ + spin_lock_irq(&io_request_lock); + + insert_here = head->prev; + if (list_empty(head)) { + q->plug_device_fn(q, bh->b_rdev); /* is atomic */ + goto get_rq; + } else if (q->head_active && !q->plugged) + head = head->next; + + el_ret = elevator->elevator_merge_fn(q, &req, head, bh, rw,max_sectors); + switch (el_ret) { + + case ELEVATOR_BACK_MERGE: + if (!q->back_merge_fn(q, req, bh, max_segments)) + break; + elevator->elevator_merge_cleanup_fn(q, req, count); + req->bhtail->b_reqnext = bh; + req->bhtail = bh; + req->nr_sectors = req->hard_nr_sectors += count; + blk_started_io(count); + drive_stat_acct(req->rq_dev, req->cmd, count, 0); + attempt_back_merge(q, req, max_sectors, max_segments); + goto out; + + case ELEVATOR_FRONT_MERGE: + if (!q->front_merge_fn(q, req, bh, max_segments)) + break; + elevator->elevator_merge_cleanup_fn(q, req, count); + bh->b_reqnext = req->bh; + req->bh = bh; + req->buffer = bh->b_data; + req->current_nr_sectors = count; + req->sector = req->hard_sector = sector; + req->nr_sectors = req->hard_nr_sectors += count; + blk_started_io(count); + drive_stat_acct(req->rq_dev, req->cmd, count, 0); + attempt_front_merge(q, head, req, max_sectors, max_segments); + goto out; + + /* + * elevator says don't/can't merge. get new request + */ + case ELEVATOR_NO_MERGE: + /* + * use elevator hints as to where to insert the + * request. if no hints, just add it to the back + * of the queue + */ + if (req) + insert_here = &req->queue; + break; + + default: + printk("elevator returned crap (%d)\n", el_ret); + BUG(); + } + + /* + * Grab a free request from the freelist - if that is empty, check + * if we are doing read ahead and abort instead of blocking for + * a free slot. + */ +get_rq: + if (freereq) { + req = freereq; + freereq = NULL; + } else if ((req = get_request(q, rw)) == NULL) { + spin_unlock_irq(&io_request_lock); + if (rw_ahead) + goto end_io; + + freereq = __get_request_wait(q, rw); + goto again; + } + +/* fill up the request-info, and add it to the queue */ + req->elevator_sequence = latency; + req->cmd = rw; + req->errors = 0; + req->hard_sector = req->sector = sector; + req->hard_nr_sectors = req->nr_sectors = count; + req->current_nr_sectors = count; + req->nr_segments = 1; /* Always 1 for a new request. */ + req->nr_hw_segments = 1; /* Always 1 for a new request. */ + req->buffer = bh->b_data; + req->waiting = NULL; + req->bh = bh; + req->bhtail = bh; + req->rq_dev = bh->b_rdev; + blk_started_io(count); + add_request(q, req, insert_here); +out: + if (freereq) + blkdev_release_request(freereq); + spin_unlock_irq(&io_request_lock); + return 0; +end_io: + bh->b_end_io(bh, test_bit(BH_Uptodate, &bh->b_state)); + return 0; +} + +/** + * generic_make_request: hand a buffer head to it's device driver for I/O + * @rw: READ, WRITE, or READA - what sort of I/O is desired. + * @bh: The buffer head describing the location in memory and on the device. + * + * generic_make_request() is used to make I/O requests of block + * devices. It is passed a &struct buffer_head and a &rw value. The + * %READ and %WRITE options are (hopefully) obvious in meaning. The + * %READA value means that a read is required, but that the driver is + * free to fail the request if, for example, it cannot get needed + * resources immediately. + * + * generic_make_request() does not return any status. The + * success/failure status of the request, along with notification of + * completion, is delivered asynchronously through the bh->b_end_io + * function described (one day) else where. + * + * The caller of generic_make_request must make sure that b_page, + * b_addr, b_size are set to describe the memory buffer, that b_rdev + * and b_rsector are set to describe the device address, and the + * b_end_io and optionally b_private are set to describe how + * completion notification should be signaled. BH_Mapped should also + * be set (to confirm that b_dev and b_blocknr are valid). + * + * generic_make_request and the drivers it calls may use b_reqnext, + * and may change b_rdev and b_rsector. So the values of these fields + * should NOT be depended on after the call to generic_make_request. + * Because of this, the caller should record the device address + * information in b_dev and b_blocknr. + * + * Apart from those fields mentioned above, no other fields, and in + * particular, no other flags, are changed by generic_make_request or + * any lower level drivers. + * */ +void generic_make_request (int rw, struct buffer_head * bh) +{ + int major = MAJOR(bh->b_rdev); + int minorsize = 0; + request_queue_t *q; + + if (!bh->b_end_io) + BUG(); + + /* Test device size, when known. */ + if (blk_size[major]) + minorsize = blk_size[major][MINOR(bh->b_rdev)]; + if (minorsize) { + unsigned long maxsector = (minorsize << 1) + 1; + unsigned long sector = bh->b_rsector; + unsigned int count = bh->b_size >> 9; + + if (maxsector < count || maxsector - count < sector) { + /* Yecch */ + bh->b_state &= (1 << BH_Lock) | (1 << BH_Mapped); + + /* This may well happen - the kernel calls bread() + without checking the size of the device, e.g., + when mounting a device. */ + printk(KERN_INFO + "attempt to access beyond end of device\n"); + printk(KERN_INFO "%s: rw=%d, want=%ld, limit=%d\n", + kdevname(bh->b_rdev), rw, + (sector + count)>>1, minorsize); + + /* Yecch again */ + bh->b_end_io(bh, 0); + return; + } + } + + /* + * Resolve the mapping until finished. (drivers are + * still free to implement/resolve their own stacking + * by explicitly returning 0) + */ + /* NOTE: we don't repeat the blk_size check for each new device. + * Stacking drivers are expected to know what they are doing. + */ + do { + q = blk_get_queue(bh->b_rdev); + if (!q) { + printk(KERN_ERR + "generic_make_request: Trying to access " + "nonexistent block-device %s (%ld)\n", + kdevname(bh->b_rdev), bh->b_rsector); + buffer_IO_error(bh); + break; + } + } while (q->make_request_fn(q, rw, bh)); +} + + +/** + * submit_bh: submit a buffer_head to the block device later for I/O + * @rw: whether to %READ or %WRITE, or maybe to %READA (read ahead) + * @bh: The &struct buffer_head which describes the I/O + * + * submit_bh() is very similar in purpose to generic_make_request(), and + * uses that function to do most of the work. + * + * The extra functionality provided by submit_bh is to determine + * b_rsector from b_blocknr and b_size, and to set b_rdev from b_dev. + * This is is appropriate for IO requests that come from the buffer + * cache and page cache which (currently) always use aligned blocks. + */ +void submit_bh(int rw, struct buffer_head * bh) +{ + int count = bh->b_size >> 9; + + if (!test_bit(BH_Lock, &bh->b_state)) + BUG(); + + set_bit(BH_Req, &bh->b_state); + + /* + * First step, 'identity mapping' - RAID or LVM might + * further remap this. + */ + bh->b_rdev = bh->b_dev; + bh->b_rsector = bh->b_blocknr * count; + + generic_make_request(rw, bh); + + switch (rw) { + case WRITE: + kstat.pgpgout += count; + break; + default: + kstat.pgpgin += count; + break; + } +} + +/** + * ll_rw_block: low-level access to block devices + * @rw: whether to %READ or %WRITE or maybe %READA (readahead) + * @nr: number of &struct buffer_heads in the array + * @bhs: array of pointers to &struct buffer_head + * + * ll_rw_block() takes an array of pointers to &struct buffer_heads, + * and requests an I/O operation on them, either a %READ or a %WRITE. + * The third %READA option is described in the documentation for + * generic_make_request() which ll_rw_block() calls. + * + * This function provides extra functionality that is not in + * generic_make_request() that is relevant to buffers in the buffer + * cache or page cache. In particular it drops any buffer that it + * cannot get a lock on (with the BH_Lock state bit), any buffer that + * appears to be clean when doing a write request, and any buffer that + * appears to be up-to-date when doing read request. Further it marks + * as clean buffers that are processed for writing (the buffer cache + * wont assume that they are actually clean until the buffer gets + * unlocked). + * + * ll_rw_block sets b_end_io to simple completion handler that marks + * the buffer up-to-date (if approriate), unlocks the buffer and wakes + * any waiters. As client that needs a more interesting completion + * routine should call submit_bh() (or generic_make_request()) + * directly. + * + * Caveat: + * All of the buffers must be for the same device, and must also be + * of the current approved size for the device. */ + +void ll_rw_block(int rw, int nr, struct buffer_head * bhs[]) +{ + unsigned int major; + int correct_size; + int i; + + if (!nr) + return; + + major = MAJOR(bhs[0]->b_dev); + + /* Determine correct block size for this device. */ + correct_size = get_hardsect_size(bhs[0]->b_dev); + + /* Verify requested block sizes. */ + for (i = 0; i < nr; i++) { + struct buffer_head *bh = bhs[i]; + if (bh->b_size % correct_size) { + printk(KERN_NOTICE "ll_rw_block: device %s: " + "only %d-char blocks implemented (%u)\n", + kdevname(bhs[0]->b_dev), + correct_size, bh->b_size); + goto sorry; + } + } + + if ((rw & WRITE) && is_read_only(bhs[0]->b_dev)) { + printk(KERN_NOTICE "Can't write to read-only device %s\n", + kdevname(bhs[0]->b_dev)); + goto sorry; + } + + for (i = 0; i < nr; i++) { + struct buffer_head *bh = bhs[i]; + + /* Only one thread can actually submit the I/O. */ + if (test_and_set_bit(BH_Lock, &bh->b_state)) + continue; + + /* We have the buffer lock */ + atomic_inc(&bh->b_count); + bh->b_end_io = end_buffer_io_sync; + + switch(rw) { + case WRITE: + if (!atomic_set_buffer_clean(bh)) + /* Hmmph! Nothing to write */ + goto end_io; + __mark_buffer_clean(bh); + break; + + case READA: + case READ: + if (buffer_uptodate(bh)) + /* Hmmph! Already have it */ + goto end_io; + break; + default: + BUG(); + end_io: + bh->b_end_io(bh, test_bit(BH_Uptodate, &bh->b_state)); + continue; + } + + submit_bh(rw, bh); + } + return; + +sorry: + /* Make sure we don't get infinite dirty retries.. */ + for (i = 0; i < nr; i++) + mark_buffer_clean(bhs[i]); +} + +#ifdef CONFIG_STRAM_SWAP +extern int stram_device_init (void); +#endif + + +/** + * end_that_request_first - end I/O on one buffer. + * @req: the request being processed + * @uptodate: 0 for I/O error + * @name: the name printed for an I/O error + * + * Description: + * Ends I/O on the first buffer attached to @req, and sets it up + * for the next buffer_head (if any) in the cluster. + * + * Return: + * 0 - we are done with this request, call end_that_request_last() + * 1 - still buffers pending for this request + * + * Caveat: + * Drivers implementing their own end_request handling must call + * blk_finished_io() appropriately. + **/ + +int end_that_request_first (struct request *req, int uptodate, char *name) +{ + struct buffer_head * bh; + int nsect; + + req->errors = 0; + if (!uptodate) + printk("end_request: I/O error, dev %s (%s), sector %lu\n", + kdevname(req->rq_dev), name, req->sector); + + if ((bh = req->bh) != NULL) { + nsect = bh->b_size >> 9; + blk_finished_io(nsect); + req->bh = bh->b_reqnext; + bh->b_reqnext = NULL; + bh->b_end_io(bh, uptodate); + if ((bh = req->bh) != NULL) { + req->hard_sector += nsect; + req->hard_nr_sectors -= nsect; + req->sector = req->hard_sector; + req->nr_sectors = req->hard_nr_sectors; + + req->current_nr_sectors = bh->b_size >> 9; + if (req->nr_sectors < req->current_nr_sectors) { + req->nr_sectors = req->current_nr_sectors; + printk("end_request: buffer-list destroyed\n"); + } + req->buffer = bh->b_data; + return 1; + } + } + return 0; +} + +void end_that_request_last(struct request *req) +{ + if (req->waiting != NULL) + complete(req->waiting); + + blkdev_release_request(req); +} + +#define MB(kb) ((kb) << 10) + +int __init blk_dev_init(void) +{ + struct blk_dev_struct *dev; + int total_ram; + + request_cachep = kmem_cache_create("blkdev_requests", + sizeof(struct request), + 0, SLAB_HWCACHE_ALIGN, NULL, NULL); + + if (!request_cachep) + panic("Can't create request pool slab cache\n"); + + for (dev = blk_dev + MAX_BLKDEV; dev-- != blk_dev;) + dev->queue = NULL; + + memset(ro_bits,0,sizeof(ro_bits)); + memset(max_readahead, 0, sizeof(max_readahead)); + memset(max_sectors, 0, sizeof(max_sectors)); + + total_ram = nr_free_pages() << (PAGE_SHIFT - 10); + + /* + * Free request slots per queue. + * (Half for reads, half for writes) + */ + queue_nr_requests = 64; + if (total_ram > MB(32)) + queue_nr_requests = 128; + + /* + * Batch frees according to queue length + */ + batch_requests = queue_nr_requests/4; + printk("block: %d slots per queue, batch=%d\n", queue_nr_requests, batch_requests); + +#ifdef CONFIG_AMIGA_Z2RAM + z2_init(); +#endif +#ifdef CONFIG_STRAM_SWAP + stram_device_init(); +#endif +#ifdef CONFIG_BLK_DEV_RAM + rd_init(); +#endif +#ifdef CONFIG_ISP16_CDI + isp16_init(); +#endif +#if defined(CONFIG_IDE) && defined(CONFIG_BLK_DEV_IDE) + ide_init(); /* this MUST precede hd_init */ +#endif +#if defined(CONFIG_IDE) && defined(CONFIG_BLK_DEV_HD) + hd_init(); +#endif +#ifdef CONFIG_BLK_DEV_PS2 + ps2esdi_init(); +#endif +#ifdef CONFIG_BLK_DEV_XD + xd_init(); +#endif +#ifdef CONFIG_BLK_DEV_MFM + mfm_init(); +#endif +#ifdef CONFIG_PARIDE + { extern void paride_init(void); paride_init(); }; +#endif +#ifdef CONFIG_MAC_FLOPPY + swim3_init(); +#endif +#ifdef CONFIG_BLK_DEV_SWIM_IOP + swimiop_init(); +#endif +#ifdef CONFIG_AMIGA_FLOPPY + amiga_floppy_init(); +#endif +#ifdef CONFIG_ATARI_FLOPPY + atari_floppy_init(); +#endif +#ifdef CONFIG_BLK_DEV_FD + floppy_init(); +#else +#if defined(__i386__) && !defined(CONFIG_XENO) /* Do we even need this? */ + outb_p(0xc, 0x3f2); +#endif +#endif +#ifdef CONFIG_CDU31A + cdu31a_init(); +#endif +#ifdef CONFIG_ATARI_ACSI + acsi_init(); +#endif +#ifdef CONFIG_MCD + mcd_init(); +#endif +#ifdef CONFIG_MCDX + mcdx_init(); +#endif +#ifdef CONFIG_SBPCD + sbpcd_init(); +#endif +#ifdef CONFIG_AZTCD + aztcd_init(); +#endif +#ifdef CONFIG_CDU535 + sony535_init(); +#endif +#ifdef CONFIG_GSCD + gscd_init(); +#endif +#ifdef CONFIG_CM206 + cm206_init(); +#endif +#ifdef CONFIG_OPTCD + optcd_init(); +#endif +#ifdef CONFIG_SJCD + sjcd_init(); +#endif +#ifdef CONFIG_APBLOCK + ap_init(); +#endif +#ifdef CONFIG_DDV + ddv_init(); +#endif +#ifdef CONFIG_MDISK + mdisk_init(); +#endif +#ifdef CONFIG_DASD + dasd_init(); +#endif +#if defined(CONFIG_S390_TAPE) && defined(CONFIG_S390_TAPE_BLOCK) + tapeblock_init(); +#endif +#ifdef CONFIG_BLK_DEV_XPRAM + xpram_init(); +#endif + +#ifdef CONFIG_SUN_JSFLASH + jsfd_init(); +#endif + return 0; +}; + +EXPORT_SYMBOL(io_request_lock); +EXPORT_SYMBOL(end_that_request_first); +EXPORT_SYMBOL(end_that_request_last); +EXPORT_SYMBOL(blk_init_queue); +EXPORT_SYMBOL(blk_get_queue); +EXPORT_SYMBOL(blk_cleanup_queue); +EXPORT_SYMBOL(blk_queue_headactive); +EXPORT_SYMBOL(blk_queue_make_request); +EXPORT_SYMBOL(generic_make_request); +EXPORT_SYMBOL(blkdev_release_request); +EXPORT_SYMBOL(generic_unplug_device); diff --git a/xenolinux-2.4.16-sparse/drivers/block/rd.c b/xenolinux-2.4.16-sparse/drivers/block/rd.c new file mode 100644 index 0000000000..ffcc9d3825 --- /dev/null +++ b/xenolinux-2.4.16-sparse/drivers/block/rd.c @@ -0,0 +1,1009 @@ +/* + * ramdisk.c - Multiple RAM disk driver - gzip-loading version - v. 0.8 beta. + * + * (C) Chad Page, Theodore Ts'o, et. al, 1995. + * + * This RAM disk is designed to have filesystems created on it and mounted + * just like a regular floppy disk. + * + * It also does something suggested by Linus: use the buffer cache as the + * RAM disk data. This makes it possible to dynamically allocate the RAM disk + * buffer - with some consequences I have to deal with as I write this. + * + * This code is based on the original ramdisk.c, written mostly by + * Theodore Ts'o (TYT) in 1991. The code was largely rewritten by + * Chad Page to use the buffer cache to store the RAM disk data in + * 1995; Theodore then took over the driver again, and cleaned it up + * for inclusion in the mainline kernel. + * + * The original CRAMDISK code was written by Richard Lyons, and + * adapted by Chad Page to use the new RAM disk interface. Theodore + * Ts'o rewrote it so that both the compressed RAM disk loader and the + * kernel decompressor uses the same inflate.c codebase. The RAM disk + * loader now also loads into a dynamic (buffer cache based) RAM disk, + * not the old static RAM disk. Support for the old static RAM disk has + * been completely removed. + * + * Loadable module support added by Tom Dyas. + * + * Further cleanups by Chad Page (page0588@sundance.sjsu.edu): + * Cosmetic changes in #ifdef MODULE, code movement, etc. + * When the RAM disk module is removed, free the protected buffers + * Default RAM disk size changed to 2.88 MB + * + * Added initrd: Werner Almesberger & Hans Lermen, Feb '96 + * + * 4/25/96 : Made RAM disk size a parameter (default is now 4 MB) + * - Chad Page + * + * Add support for fs images split across >1 disk, Paul Gortmaker, Mar '98 + * + * Make block size and block size shift for RAM disks a global macro + * and set blk_size for -ENOSPC, Werner Fink , Apr '99 + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +extern void wait_for_keypress(void); + +/* + * 35 has been officially registered as the RAMDISK major number, but + * so is the original MAJOR number of 1. We're using 1 in + * include/linux/major.h for now + */ +#define MAJOR_NR RAMDISK_MAJOR +#include +#include + +/* The RAM disk size is now a parameter */ +#define NUM_RAMDISKS 16 /* This cannot be overridden (yet) */ + +#ifndef MODULE +/* We don't have to load RAM disks or gunzip them in a module. */ +#define RD_LOADER +#define BUILD_CRAMDISK + +void rd_load(void); +static int crd_load(struct file *fp, struct file *outfp); + +#ifdef CONFIG_BLK_DEV_INITRD +static int initrd_users; +#endif +#endif + +/* Various static variables go here. Most are used only in the RAM disk code. + */ + +static unsigned long rd_length[NUM_RAMDISKS]; /* Size of RAM disks in bytes */ +static int rd_hardsec[NUM_RAMDISKS]; /* Size of real blocks in bytes */ +static int rd_blocksizes[NUM_RAMDISKS]; /* Size of 1024 byte blocks :) */ +static int rd_kbsize[NUM_RAMDISKS]; /* Size in blocks of 1024 bytes */ +static devfs_handle_t devfs_handle; +static struct block_device *rd_bdev[NUM_RAMDISKS];/* Protected device data */ + +/* + * Parameters for the boot-loading of the RAM disk. These are set by + * init/main.c (from arguments to the kernel command line) or from the + * architecture-specific setup routine (from the stored boot sector + * information). + */ +int rd_size = CONFIG_BLK_DEV_RAM_SIZE; /* Size of the RAM disks */ +/* + * It would be very desiderable to have a soft-blocksize (that in the case + * of the ramdisk driver is also the hardblocksize ;) of PAGE_SIZE because + * doing that we'll achieve a far better MM footprint. Using a rd_blocksize of + * BLOCK_SIZE in the worst case we'll make PAGE_SIZE/BLOCK_SIZE buffer-pages + * unfreeable. With a rd_blocksize of PAGE_SIZE instead we are sure that only + * 1 page will be protected. Depending on the size of the ramdisk you + * may want to change the ramdisk blocksize to achieve a better or worse MM + * behaviour. The default is still BLOCK_SIZE (needed by rd_load_image that + * supposes the filesystem in the image uses a BLOCK_SIZE blocksize). + */ +int rd_blocksize = BLOCK_SIZE; /* blocksize of the RAM disks */ + +#ifndef MODULE + +int rd_doload; /* 1 = load RAM disk, 0 = don't load */ +int rd_prompt = 1; /* 1 = prompt for RAM disk, 0 = don't prompt */ +int rd_image_start; /* starting block # of image */ +#ifdef CONFIG_BLK_DEV_INITRD +unsigned long initrd_start, initrd_end; +int mount_initrd = 1; /* zero if initrd should not be mounted */ +int initrd_below_start_ok; + +static int __init no_initrd(char *str) +{ + mount_initrd = 0; + return 1; +} + +__setup("noinitrd", no_initrd); + +#endif + +static int __init ramdisk_start_setup(char *str) +{ + rd_image_start = simple_strtol(str,NULL,0); + return 1; +} + +static int __init load_ramdisk(char *str) +{ + rd_doload = simple_strtol(str,NULL,0) & 3; + return 1; +} + +static int __init prompt_ramdisk(char *str) +{ + rd_prompt = simple_strtol(str,NULL,0) & 1; + return 1; +} + +static int __init ramdisk_size(char *str) +{ + rd_size = simple_strtol(str,NULL,0); + return 1; +} + +static int __init ramdisk_size2(char *str) +{ + return ramdisk_size(str); +} + +static int __init ramdisk_blocksize(char *str) +{ + rd_blocksize = simple_strtol(str,NULL,0); + return 1; +} + +__setup("ramdisk_start=", ramdisk_start_setup); +__setup("load_ramdisk=", load_ramdisk); +__setup("prompt_ramdisk=", prompt_ramdisk); +__setup("ramdisk=", ramdisk_size); +__setup("ramdisk_size=", ramdisk_size2); +__setup("ramdisk_blocksize=", ramdisk_blocksize); + +#endif + +/* + * Copyright (C) 2000 Linus Torvalds. + * 2000 Transmeta Corp. + * aops copied from ramfs. + */ +static int ramdisk_readpage(struct file *file, struct page * page) +{ + if (!Page_Uptodate(page)) { + memset(kmap(page), 0, PAGE_CACHE_SIZE); + kunmap(page); + flush_dcache_page(page); + SetPageUptodate(page); + } + UnlockPage(page); + return 0; +} + +static int ramdisk_prepare_write(struct file *file, struct page *page, unsigned offset, unsigned to) +{ + if (!Page_Uptodate(page)) { + void *addr = page_address(page); + memset(addr, 0, PAGE_CACHE_SIZE); + flush_dcache_page(page); + SetPageUptodate(page); + } + SetPageDirty(page); + return 0; +} + +static int ramdisk_commit_write(struct file *file, struct page *page, unsigned offset, unsigned to) +{ + return 0; +} + +static struct address_space_operations ramdisk_aops = { + readpage: ramdisk_readpage, + writepage: fail_writepage, + prepare_write: ramdisk_prepare_write, + commit_write: ramdisk_commit_write, +}; + +static int rd_blkdev_pagecache_IO(int rw, struct buffer_head * sbh, int minor) +{ + struct address_space * mapping; + unsigned long index; + int offset, size, err; + + err = -EIO; + err = 0; + mapping = rd_bdev[minor]->bd_inode->i_mapping; + + index = sbh->b_rsector >> (PAGE_CACHE_SHIFT - 9); + offset = (sbh->b_rsector << 9) & ~PAGE_CACHE_MASK; + size = sbh->b_size; + + do { + int count; + struct page ** hash; + struct page * page; + char * src, * dst; + int unlock = 0; + + count = PAGE_CACHE_SIZE - offset; + if (count > size) + count = size; + size -= count; + + hash = page_hash(mapping, index); + page = __find_get_page(mapping, index, hash); + if (!page) { + page = grab_cache_page(mapping, index); + err = -ENOMEM; + if (!page) + goto out; + err = 0; + + if (!Page_Uptodate(page)) { + memset(kmap(page), 0, PAGE_CACHE_SIZE); + kunmap(page); + SetPageUptodate(page); + } + + unlock = 1; + } + + index++; + + if (rw == READ) { + src = kmap(page); + src += offset; + dst = bh_kmap(sbh); + } else { + dst = kmap(page); + dst += offset; + src = bh_kmap(sbh); + } + offset = 0; + + memcpy(dst, src, count); + + kunmap(page); + bh_kunmap(sbh); + + if (rw == READ) { + flush_dcache_page(page); + } else { + SetPageDirty(page); + } + if (unlock) + UnlockPage(page); + __free_page(page); + } while (size); + + out: + return err; +} + +/* + * Basically, my strategy here is to set up a buffer-head which can't be + * deleted, and make that my Ramdisk. If the request is outside of the + * allocated size, we must get rid of it... + * + * 19-JAN-1998 Richard Gooch Added devfs support + * + */ +static int rd_make_request(request_queue_t * q, int rw, struct buffer_head *sbh) +{ + unsigned int minor; + unsigned long offset, len; + + minor = MINOR(sbh->b_rdev); + + if (minor >= NUM_RAMDISKS) + goto fail; + + + offset = sbh->b_rsector << 9; + len = sbh->b_size; + + if ((offset + len) > rd_length[minor]) + goto fail; + + if (rw==READA) + rw=READ; + if ((rw != READ) && (rw != WRITE)) { + printk(KERN_INFO "RAMDISK: bad command: %d\n", rw); + goto fail; + } + + if (rd_blkdev_pagecache_IO(rw, sbh, minor)) + goto fail; + + sbh->b_end_io(sbh,1); + return 0; + fail: + sbh->b_end_io(sbh,0); + return 0; +} + +static int rd_ioctl(struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg) +{ + int error = -EINVAL; + unsigned int minor; + + if (!inode || !inode->i_rdev) + goto out; + + minor = MINOR(inode->i_rdev); + + switch (cmd) { + case BLKFLSBUF: + if (!capable(CAP_SYS_ADMIN)) + return -EACCES; + /* special: we want to release the ramdisk memory, + it's not like with the other blockdevices where + this ioctl only flushes away the buffer cache. */ + error = -EBUSY; + down(&inode->i_bdev->bd_sem); + if (inode->i_bdev->bd_openers <= 2) { + truncate_inode_pages(inode->i_mapping, 0); + error = 0; + } + up(&inode->i_bdev->bd_sem); + break; + case BLKGETSIZE: /* Return device size */ + if (!arg) + break; + error = put_user(rd_kbsize[minor] << 1, (unsigned long *) arg); + break; + case BLKGETSIZE64: + error = put_user((u64)rd_kbsize[minor]<<10, (u64*)arg); + break; + case BLKROSET: + case BLKROGET: + case BLKSSZGET: + error = blk_ioctl(inode->i_rdev, cmd, arg); + }; +out: + return error; +} + + +#ifdef CONFIG_BLK_DEV_INITRD + +static ssize_t initrd_read(struct file *file, char *buf, + size_t count, loff_t *ppos) +{ + int left; + + left = initrd_end - initrd_start - *ppos; + if (count > left) count = left; + if (count == 0) return 0; + copy_to_user(buf, (char *)initrd_start + *ppos, count); + *ppos += count; + return count; +} + + +static int initrd_release(struct inode *inode,struct file *file) +{ + extern void free_initrd_mem(unsigned long, unsigned long); + + lock_kernel(); + if (!--initrd_users) { + free_initrd_mem(initrd_start, initrd_end); + initrd_start = 0; + } + unlock_kernel(); + blkdev_put(inode->i_bdev, BDEV_FILE); + return 0; +} + + +static struct file_operations initrd_fops = { + read: initrd_read, + release: initrd_release, +}; + +#endif + + +static int rd_open(struct inode * inode, struct file * filp) +{ + int unit = DEVICE_NR(inode->i_rdev); + +#ifdef CONFIG_BLK_DEV_INITRD + if (unit == INITRD_MINOR) { + if (!initrd_start) return -ENODEV; + initrd_users++; + filp->f_op = &initrd_fops; + return 0; + } +#endif + + if (unit >= NUM_RAMDISKS) + return -ENXIO; + + /* + * Immunize device against invalidate_buffers() and prune_icache(). + */ + if (rd_bdev[unit] == NULL) { + rd_bdev[unit] = bdget(kdev_t_to_nr(inode->i_rdev)); + rd_bdev[unit]->bd_openers++; + rd_bdev[unit]->bd_inode->i_mapping->a_ops = &ramdisk_aops; + } + + return 0; +} + +static struct block_device_operations rd_bd_op = { + owner: THIS_MODULE, + open: rd_open, + ioctl: rd_ioctl, +}; + +#ifdef MODULE +/* Before freeing the module, invalidate all of the protected buffers! */ +static void __exit rd_cleanup (void) +{ + int i; + + for (i = 0 ; i < NUM_RAMDISKS; i++) { + struct block_device *bdev = rd_bdev[i]; + rd_bdev[i] = NULL; + if (bdev) + blkdev_put(bdev, BDEV_FILE); + destroy_buffers(MKDEV(MAJOR_NR, i)); + } + + devfs_unregister (devfs_handle); + unregister_blkdev( MAJOR_NR, "ramdisk" ); + hardsect_size[MAJOR_NR] = NULL; + blksize_size[MAJOR_NR] = NULL; + blk_size[MAJOR_NR] = NULL; +} +#endif + +/* This is the registration and initialization section of the RAM disk driver */ +int __init rd_init (void) +{ + int i; + + if (rd_blocksize > PAGE_SIZE || rd_blocksize < 512 || + (rd_blocksize & (rd_blocksize-1))) + { + printk("RAMDISK: wrong blocksize %d, reverting to defaults\n", + rd_blocksize); + rd_blocksize = BLOCK_SIZE; + } + + if (register_blkdev(MAJOR_NR, "ramdisk", &rd_bd_op)) { + printk("RAMDISK: Could not get major %d", MAJOR_NR); + return -EIO; + } + + blk_queue_make_request(BLK_DEFAULT_QUEUE(MAJOR_NR), &rd_make_request); + + for (i = 0; i < NUM_RAMDISKS; i++) { + /* rd_size is given in kB */ + rd_length[i] = rd_size << 10; + rd_hardsec[i] = rd_blocksize; + rd_blocksizes[i] = rd_blocksize; + rd_kbsize[i] = rd_size; + } + devfs_handle = devfs_mk_dir (NULL, "rd", NULL); + devfs_register_series (devfs_handle, "%u", NUM_RAMDISKS, + DEVFS_FL_DEFAULT, MAJOR_NR, 0, + S_IFBLK | S_IRUSR | S_IWUSR, + &rd_bd_op, NULL); + + for (i = 0; i < NUM_RAMDISKS; i++) + register_disk(NULL, MKDEV(MAJOR_NR,i), 1, &rd_bd_op, rd_size<<1); + +#ifdef CONFIG_BLK_DEV_INITRD + /* We ought to separate initrd operations here */ + register_disk(NULL, MKDEV(MAJOR_NR,INITRD_MINOR), 1, &rd_bd_op, rd_size<<1); +#endif + + hardsect_size[MAJOR_NR] = rd_hardsec; /* Size of the RAM disk blocks */ + blksize_size[MAJOR_NR] = rd_blocksizes; /* Avoid set_blocksize() check */ + blk_size[MAJOR_NR] = rd_kbsize; /* Size of the RAM disk in kB */ + + /* rd_size is given in kB */ + printk("RAMDISK driver initialized: " + "%d RAM disks of %dK size %d blocksize\n", + NUM_RAMDISKS, rd_size, rd_blocksize); + + return 0; +} + +#ifdef MODULE +module_init(rd_init); +module_exit(rd_cleanup); +#endif + +/* loadable module support */ +MODULE_PARM (rd_size, "1i"); +MODULE_PARM_DESC(rd_size, "Size of each RAM disk in kbytes."); +MODULE_PARM (rd_blocksize, "i"); +MODULE_PARM_DESC(rd_blocksize, "Blocksize of each RAM disk in bytes."); + +MODULE_LICENSE("GPL"); + +/* End of non-loading portions of the RAM disk driver */ + +#ifdef RD_LOADER +/* + * This routine tries to find a RAM disk image to load, and returns the + * number of blocks to read for a non-compressed image, 0 if the image + * is a compressed image, and -1 if an image with the right magic + * numbers could not be found. + * + * We currently check for the following magic numbers: + * minix + * ext2 + * romfs + * gzip + */ +static int __init +identify_ramdisk_image(kdev_t device, struct file *fp, int start_block) +{ + const int size = 512; + struct minix_super_block *minixsb; + struct ext2_super_block *ext2sb; + struct romfs_super_block *romfsb; + int nblocks = -1; + unsigned char *buf; + + buf = kmalloc(size, GFP_KERNEL); + if (buf == 0) + return -1; + + minixsb = (struct minix_super_block *) buf; + ext2sb = (struct ext2_super_block *) buf; + romfsb = (struct romfs_super_block *) buf; + memset(buf, 0xe5, size); + + /* + * Read block 0 to test for gzipped kernel + */ + if (fp->f_op->llseek) + fp->f_op->llseek(fp, start_block * BLOCK_SIZE, 0); + fp->f_pos = start_block * BLOCK_SIZE; + + fp->f_op->read(fp, buf, size, &fp->f_pos); + + /* + * If it matches the gzip magic numbers, return -1 + */ + if (buf[0] == 037 && ((buf[1] == 0213) || (buf[1] == 0236))) { + printk(KERN_NOTICE + "RAMDISK: Compressed image found at block %d\n", + start_block); + nblocks = 0; + goto done; + } + + /* romfs is at block zero too */ + if (romfsb->word0 == ROMSB_WORD0 && + romfsb->word1 == ROMSB_WORD1) { + printk(KERN_NOTICE + "RAMDISK: romfs filesystem found at block %d\n", + start_block); + nblocks = (ntohl(romfsb->size)+BLOCK_SIZE-1)>>BLOCK_SIZE_BITS; + goto done; + } + + /* + * Read block 1 to test for minix and ext2 superblock + */ + if (fp->f_op->llseek) + fp->f_op->llseek(fp, (start_block+1) * BLOCK_SIZE, 0); + fp->f_pos = (start_block+1) * BLOCK_SIZE; + + fp->f_op->read(fp, buf, size, &fp->f_pos); + + /* Try minix */ + if (minixsb->s_magic == MINIX_SUPER_MAGIC || + minixsb->s_magic == MINIX_SUPER_MAGIC2) { + printk(KERN_NOTICE + "RAMDISK: Minix filesystem found at block %d\n", + start_block); + nblocks = minixsb->s_nzones << minixsb->s_log_zone_size; + goto done; + } + + /* Try ext2 */ + if (ext2sb->s_magic == cpu_to_le16(EXT2_SUPER_MAGIC)) { + printk(KERN_NOTICE + "RAMDISK: ext2 filesystem found at block %d\n", + start_block); + nblocks = le32_to_cpu(ext2sb->s_blocks_count); + goto done; + } + + printk(KERN_NOTICE + "RAMDISK: Couldn't find valid RAM disk image starting at %d.\n", + start_block); + +done: + if (fp->f_op->llseek) + fp->f_op->llseek(fp, start_block * BLOCK_SIZE, 0); + fp->f_pos = start_block * BLOCK_SIZE; + + kfree(buf); + return nblocks; +} + +/* + * This routine loads in the RAM disk image. + */ +static void __init rd_load_image(kdev_t device, int offset, int unit) +{ + struct inode *inode, *out_inode; + struct file infile, outfile; + struct dentry in_dentry, out_dentry; + mm_segment_t fs; + kdev_t ram_device; + int nblocks, i; + char *buf; + unsigned short rotate = 0; + unsigned short devblocks = 0; +#if !defined(CONFIG_ARCH_S390) && !defined(CONFIG_PPC_ISERIES) && !defined(CONFIG_XENO) + char rotator[4] = { '|' , '/' , '-' , '\\' }; +#endif + ram_device = MKDEV(MAJOR_NR, unit); + + if ((inode = get_empty_inode()) == NULL) + return; + memset(&infile, 0, sizeof(infile)); + memset(&in_dentry, 0, sizeof(in_dentry)); + infile.f_mode = 1; /* read only */ + infile.f_dentry = &in_dentry; + in_dentry.d_inode = inode; + infile.f_op = &def_blk_fops; + init_special_inode(inode, S_IFBLK | S_IRUSR, kdev_t_to_nr(device)); + + if ((out_inode = get_empty_inode()) == NULL) + goto free_inode; + memset(&outfile, 0, sizeof(outfile)); + memset(&out_dentry, 0, sizeof(out_dentry)); + outfile.f_mode = 3; /* read/write */ + outfile.f_dentry = &out_dentry; + out_dentry.d_inode = out_inode; + outfile.f_op = &def_blk_fops; + init_special_inode(out_inode, S_IFBLK | S_IRUSR | S_IWUSR, kdev_t_to_nr(ram_device)); + + if (blkdev_open(inode, &infile) != 0) { + iput(out_inode); + goto free_inode; + } + if (blkdev_open(out_inode, &outfile) != 0) + goto free_inodes; + + fs = get_fs(); + set_fs(KERNEL_DS); + + nblocks = identify_ramdisk_image(device, &infile, offset); + if (nblocks < 0) + goto done; + + if (nblocks == 0) { +#ifdef BUILD_CRAMDISK + if (crd_load(&infile, &outfile) == 0) + goto successful_load; +#else + printk(KERN_NOTICE + "RAMDISK: Kernel does not support compressed " + "RAM disk images\n"); +#endif + goto done; + } + + /* + * NOTE NOTE: nblocks suppose that the blocksize is BLOCK_SIZE, so + * rd_load_image will work only with filesystem BLOCK_SIZE wide! + * So make sure to use 1k blocksize while generating ext2fs + * ramdisk-images. + */ + if (nblocks > (rd_length[unit] >> BLOCK_SIZE_BITS)) { + printk("RAMDISK: image too big! (%d/%ld blocks)\n", + nblocks, rd_length[unit] >> BLOCK_SIZE_BITS); + goto done; + } + + /* + * OK, time to copy in the data + */ + buf = kmalloc(BLOCK_SIZE, GFP_KERNEL); + if (buf == 0) { + printk(KERN_ERR "RAMDISK: could not allocate buffer\n"); + goto done; + } + + if (blk_size[MAJOR(device)]) + devblocks = blk_size[MAJOR(device)][MINOR(device)]; + +#ifdef CONFIG_BLK_DEV_INITRD + if (MAJOR(device) == MAJOR_NR && MINOR(device) == INITRD_MINOR) + devblocks = nblocks; +#endif + + if (devblocks == 0) { + printk(KERN_ERR "RAMDISK: could not determine device size\n"); + goto done; + } + + printk(KERN_NOTICE "RAMDISK: Loading %d blocks [%d disk%s] into ram disk... ", + nblocks, ((nblocks-1)/devblocks)+1, nblocks>devblocks ? "s" : ""); + for (i=0; i < nblocks; i++) { + if (i && (i % devblocks == 0)) { + printk("done disk #%d.\n", i/devblocks); + rotate = 0; + if (infile.f_op->release(inode, &infile) != 0) { + printk("Error closing the disk.\n"); + goto noclose_input; + } + printk("Please insert disk #%d and press ENTER\n", i/devblocks+1); + wait_for_keypress(); + if (blkdev_open(inode, &infile) != 0) { + printk("Error opening disk.\n"); + goto noclose_input; + } + infile.f_pos = 0; + printk("Loading disk #%d... ", i/devblocks+1); + } + infile.f_op->read(&infile, buf, BLOCK_SIZE, &infile.f_pos); + outfile.f_op->write(&outfile, buf, BLOCK_SIZE, &outfile.f_pos); +#if !defined(CONFIG_ARCH_S390) && !defined(CONFIG_PPC_ISERIES) && !defined(CONFIG_XENO) + if (!(i % 16)) { + printk("%c\b", rotator[rotate & 0x3]); + rotate++; + } +#endif + } + printk("done.\n"); + kfree(buf); + +successful_load: + ROOT_DEV = MKDEV(MAJOR_NR, unit); + if (ROOT_DEVICE_NAME != NULL) strcpy (ROOT_DEVICE_NAME, "rd/0"); + +done: + infile.f_op->release(inode, &infile); +noclose_input: + blkdev_close(out_inode, &outfile); + iput(inode); + iput(out_inode); + set_fs(fs); + return; +free_inodes: /* free inodes on error */ + iput(out_inode); + infile.f_op->release(inode, &infile); +free_inode: + iput(inode); +} + +#ifdef CONFIG_MAC_FLOPPY +int swim3_fd_eject(int devnum); +#endif + +static void __init rd_load_disk(int n) +{ + + if (rd_doload == 0) + return; + + if (MAJOR(ROOT_DEV) != FLOPPY_MAJOR +#ifdef CONFIG_BLK_DEV_INITRD + && MAJOR(real_root_dev) != FLOPPY_MAJOR +#endif + ) + return; + + if (rd_prompt) { +#ifdef CONFIG_BLK_DEV_FD + floppy_eject(); +#endif +#ifdef CONFIG_MAC_FLOPPY + if(MAJOR(ROOT_DEV) == FLOPPY_MAJOR) + swim3_fd_eject(MINOR(ROOT_DEV)); + else if(MAJOR(real_root_dev) == FLOPPY_MAJOR) + swim3_fd_eject(MINOR(real_root_dev)); +#endif + printk(KERN_NOTICE + "VFS: Insert root floppy disk to be loaded into RAM disk and press ENTER\n"); + wait_for_keypress(); + } + + rd_load_image(ROOT_DEV,rd_image_start, n); + +} + +void __init rd_load(void) +{ + rd_load_disk(0); +} + +void __init rd_load_secondary(void) +{ + rd_load_disk(1); +} + +#ifdef CONFIG_BLK_DEV_INITRD +void __init initrd_load(void) +{ + rd_load_image(MKDEV(MAJOR_NR, INITRD_MINOR),rd_image_start,0); +} +#endif + +#endif /* RD_LOADER */ + +#ifdef BUILD_CRAMDISK + +/* + * gzip declarations + */ + +#define OF(args) args + +#ifndef memzero +#define memzero(s, n) memset ((s), 0, (n)) +#endif + +typedef unsigned char uch; +typedef unsigned short ush; +typedef unsigned long ulg; + +#define INBUFSIZ 4096 +#define WSIZE 0x8000 /* window size--must be a power of two, and */ + /* at least 32K for zip's deflate method */ + +static uch *inbuf; +static uch *window; + +static unsigned insize; /* valid bytes in inbuf */ +static unsigned inptr; /* index of next byte to be processed in inbuf */ +static unsigned outcnt; /* bytes in output buffer */ +static int exit_code; +static long bytes_out; +static struct file *crd_infp, *crd_outfp; + +#define get_byte() (inptr < insize ? inbuf[inptr++] : fill_inbuf()) + +/* Diagnostic functions (stubbed out) */ +#define Assert(cond,msg) +#define Trace(x) +#define Tracev(x) +#define Tracevv(x) +#define Tracec(c,x) +#define Tracecv(c,x) + +#define STATIC static + +static int fill_inbuf(void); +static void flush_window(void); +static void *malloc(int size); +static void free(void *where); +static void error(char *m); +static void gzip_mark(void **); +static void gzip_release(void **); + +#include "../../lib/inflate.c" + +static void __init *malloc(int size) +{ + return kmalloc(size, GFP_KERNEL); +} + +static void __init free(void *where) +{ + kfree(where); +} + +static void __init gzip_mark(void **ptr) +{ +} + +static void __init gzip_release(void **ptr) +{ +} + + +/* =========================================================================== + * Fill the input buffer. This is called only when the buffer is empty + * and at least one byte is really needed. + */ +static int __init fill_inbuf(void) +{ + if (exit_code) return -1; + + insize = crd_infp->f_op->read(crd_infp, inbuf, INBUFSIZ, + &crd_infp->f_pos); + if (insize == 0) return -1; + + inptr = 1; + + return inbuf[0]; +} + +/* =========================================================================== + * Write the output window window[0..outcnt-1] and update crc and bytes_out. + * (Used for the decompressed data only.) + */ +static void __init flush_window(void) +{ + ulg c = crc; /* temporary variable */ + unsigned n; + uch *in, ch; + + crd_outfp->f_op->write(crd_outfp, window, outcnt, &crd_outfp->f_pos); + in = window; + for (n = 0; n < outcnt; n++) { + ch = *in++; + c = crc_32_tab[((int)c ^ ch) & 0xff] ^ (c >> 8); + } + crc = c; + bytes_out += (ulg)outcnt; + outcnt = 0; +} + +static void __init error(char *x) +{ + printk(KERN_ERR "%s", x); + exit_code = 1; +} + +static int __init +crd_load(struct file * fp, struct file *outfp) +{ + int result; + + insize = 0; /* valid bytes in inbuf */ + inptr = 0; /* index of next byte to be processed in inbuf */ + outcnt = 0; /* bytes in output buffer */ + exit_code = 0; + bytes_out = 0; + crc = (ulg)0xffffffffL; /* shift register contents */ + + crd_infp = fp; + crd_outfp = outfp; + inbuf = kmalloc(INBUFSIZ, GFP_KERNEL); + if (inbuf == 0) { + printk(KERN_ERR "RAMDISK: Couldn't allocate gzip buffer\n"); + return -1; + } + window = kmalloc(WSIZE, GFP_KERNEL); + if (window == 0) { + printk(KERN_ERR "RAMDISK: Couldn't allocate gzip window\n"); + kfree(inbuf); + return -1; + } + makecrc(); + result = gunzip(); + kfree(inbuf); + kfree(window); + return result; +} + +#endif /* BUILD_CRAMDISK */ + diff --git a/xenolinux-2.4.16-sparse/drivers/char/tty_io.c b/xenolinux-2.4.16-sparse/drivers/char/tty_io.c new file mode 100644 index 0000000000..1466b03add --- /dev/null +++ b/xenolinux-2.4.16-sparse/drivers/char/tty_io.c @@ -0,0 +1,2388 @@ +/* + * linux/drivers/char/tty_io.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + */ + +/* + * 'tty_io.c' gives an orthogonal feeling to tty's, be they consoles + * or rs-channels. It also implements echoing, cooked mode etc. + * + * Kill-line thanks to John T Kohl, who also corrected VMIN = VTIME = 0. + * + * Modified by Theodore Ts'o, 9/14/92, to dynamically allocate the + * tty_struct and tty_queue structures. Previously there was an array + * of 256 tty_struct's which was statically allocated, and the + * tty_queue structures were allocated at boot time. Both are now + * dynamically allocated only when the tty is open. + * + * Also restructured routines so that there is more of a separation + * between the high-level tty routines (tty_io.c and tty_ioctl.c) and + * the low-level tty routines (serial.c, pty.c, console.c). This + * makes for cleaner and more compact code. -TYT, 9/17/92 + * + * Modified by Fred N. van Kempen, 01/29/93, to add line disciplines + * which can be dynamically activated and de-activated by the line + * discipline handling modules (like SLIP). + * + * NOTE: pay no attention to the line discipline code (yet); its + * interface is still subject to change in this version... + * -- TYT, 1/31/92 + * + * Added functionality to the OPOST tty handling. No delays, but all + * other bits should be there. + * -- Nick Holloway , 27th May 1993. + * + * Rewrote canonical mode and added more termios flags. + * -- julian@uhunix.uhcc.hawaii.edu (J. Cowley), 13Jan94 + * + * Reorganized FASYNC support so mouse code can share it. + * -- ctm@ardi.com, 9Sep95 + * + * New TIOCLINUX variants added. + * -- mj@k332.feld.cvut.cz, 19-Nov-95 + * + * Restrict vt switching via ioctl() + * -- grif@cs.ucr.edu, 5-Dec-95 + * + * Move console and virtual terminal code to more appropriate files, + * implement CONFIG_VT and generalize console device interface. + * -- Marko Kohtala , March 97 + * + * Rewrote init_dev and release_dev to eliminate races. + * -- Bill Hawes , June 97 + * + * Added devfs support. + * -- C. Scott Ananian , 13-Jan-1998 + * + * Added support for a Unix98-style ptmx device. + * -- C. Scott Ananian , 14-Jan-1998 + * + * Reduced memory usage for older ARM systems + * -- Russell King + * + * Move do_SAK() into process context. Less stack use in devfs functions. + * alloc_tty_struct() always uses kmalloc() -- Andrew Morton 17Mar01 + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#include + +#include + +#ifdef CONFIG_XENO +extern void xeno_console_init(void); +#endif + +#ifdef CONFIG_VT +extern void con_init_devfs (void); +#endif + +#define CONSOLE_DEV MKDEV(TTY_MAJOR,0) +#define TTY_DEV MKDEV(TTYAUX_MAJOR,0) +#define SYSCONS_DEV MKDEV(TTYAUX_MAJOR,1) +#define PTMX_DEV MKDEV(TTYAUX_MAJOR,2) + +#undef TTY_DEBUG_HANGUP + +#define TTY_PARANOIA_CHECK 1 +#define CHECK_TTY_COUNT 1 + +struct termios tty_std_termios; /* for the benefit of tty drivers */ +struct tty_driver *tty_drivers; /* linked list of tty drivers */ +struct tty_ldisc ldiscs[NR_LDISCS]; /* line disc dispatch table */ + +#ifdef CONFIG_UNIX98_PTYS +extern struct tty_driver ptm_driver[]; /* Unix98 pty masters; for /dev/ptmx */ +extern struct tty_driver pts_driver[]; /* Unix98 pty slaves; for /dev/ptmx */ +#endif + +/* + * redirect is the pseudo-tty that console output + * is redirected to if asked by TIOCCONS. + */ +struct tty_struct * redirect; + +static void initialize_tty_struct(struct tty_struct *tty); + +static ssize_t tty_read(struct file *, char *, size_t, loff_t *); +static ssize_t tty_write(struct file *, const char *, size_t, loff_t *); +static unsigned int tty_poll(struct file *, poll_table *); +static int tty_open(struct inode *, struct file *); +static int tty_release(struct inode *, struct file *); +int tty_ioctl(struct inode * inode, struct file * file, + unsigned int cmd, unsigned long arg); +static int tty_fasync(int fd, struct file * filp, int on); +extern int vme_scc_init (void); +extern long vme_scc_console_init(void); +extern int serial167_init(void); +extern long serial167_console_init(void); +extern void console_8xx_init(void); +extern int rs_8xx_init(void); +extern void mac_scc_console_init(void); +extern void hwc_console_init(void); +extern void hwc_tty_init(void); +extern void con3215_init(void); +extern void tty3215_init(void); +extern void tub3270_con_init(void); +extern void tub3270_init(void); +extern void rs285_console_init(void); +extern void sa1100_rs_console_init(void); +extern void sgi_serial_console_init(void); +extern void sci_console_init(void); +extern void tx3912_console_init(void); +extern void tx3912_rs_init(void); + +#ifndef MIN +#define MIN(a,b) ((a) < (b) ? (a) : (b)) +#endif +#ifndef MAX +#define MAX(a,b) ((a) < (b) ? (b) : (a)) +#endif + +static struct tty_struct *alloc_tty_struct(void) +{ + struct tty_struct *tty; + + tty = kmalloc(sizeof(struct tty_struct), GFP_KERNEL); + if (tty) + memset(tty, 0, sizeof(struct tty_struct)); + return tty; +} + +static inline void free_tty_struct(struct tty_struct *tty) +{ + kfree(tty); +} + +/* + * This routine returns the name of tty. + */ +static char * +_tty_make_name(struct tty_struct *tty, const char *name, char *buf) +{ + int idx = (tty)?MINOR(tty->device) - tty->driver.minor_start:0; + + if (!tty) /* Hmm. NULL pointer. That's fun. */ + strcpy(buf, "NULL tty"); + else + sprintf(buf, name, + idx + tty->driver.name_base); + + return buf; +} + +#define TTY_NUMBER(tty) (MINOR((tty)->device) - (tty)->driver.minor_start + \ + (tty)->driver.name_base) + +char *tty_name(struct tty_struct *tty, char *buf) +{ + return _tty_make_name(tty, (tty)?tty->driver.name:NULL, buf); +} + +inline int tty_paranoia_check(struct tty_struct *tty, kdev_t device, + const char *routine) +{ +#ifdef TTY_PARANOIA_CHECK + static const char badmagic[] = KERN_WARNING + "Warning: bad magic number for tty struct (%s) in %s\n"; + static const char badtty[] = KERN_WARNING + "Warning: null TTY for (%s) in %s\n"; + + if (!tty) { + printk(badtty, kdevname(device), routine); + return 1; + } + if (tty->magic != TTY_MAGIC) { + printk(badmagic, kdevname(device), routine); + return 1; + } +#endif + return 0; +} + +static int check_tty_count(struct tty_struct *tty, const char *routine) +{ +#ifdef CHECK_TTY_COUNT + struct list_head *p; + int count = 0; + + file_list_lock(); + for(p = tty->tty_files.next; p != &tty->tty_files; p = p->next) { + if(list_entry(p, struct file, f_list)->private_data == tty) + count++; + } + file_list_unlock(); + if (tty->driver.type == TTY_DRIVER_TYPE_PTY && + tty->driver.subtype == PTY_TYPE_SLAVE && + tty->link && tty->link->count) + count++; + if (tty->count != count) { + printk(KERN_WARNING "Warning: dev (%s) tty->count(%d) " + "!= #fd's(%d) in %s\n", + kdevname(tty->device), tty->count, count, routine); + return count; + } +#endif + return 0; +} + +int tty_register_ldisc(int disc, struct tty_ldisc *new_ldisc) +{ + if (disc < N_TTY || disc >= NR_LDISCS) + return -EINVAL; + + if (new_ldisc) { + ldiscs[disc] = *new_ldisc; + ldiscs[disc].flags |= LDISC_FLAG_DEFINED; + ldiscs[disc].num = disc; + } else + memset(&ldiscs[disc], 0, sizeof(struct tty_ldisc)); + + return 0; +} + +EXPORT_SYMBOL(tty_register_ldisc); + +/* Set the discipline of a tty line. */ +static int tty_set_ldisc(struct tty_struct *tty, int ldisc) +{ + int retval = 0; + struct tty_ldisc o_ldisc; + char buf[64]; + + if ((ldisc < N_TTY) || (ldisc >= NR_LDISCS)) + return -EINVAL; + /* Eduardo Blanco */ + /* Cyrus Durgin */ + if (!(ldiscs[ldisc].flags & LDISC_FLAG_DEFINED)) { + char modname [20]; + sprintf(modname, "tty-ldisc-%d", ldisc); + request_module (modname); + } + if (!(ldiscs[ldisc].flags & LDISC_FLAG_DEFINED)) + return -EINVAL; + + if (tty->ldisc.num == ldisc) + return 0; /* We are already in the desired discipline */ + o_ldisc = tty->ldisc; + + tty_wait_until_sent(tty, 0); + + /* Shutdown the current discipline. */ + if (tty->ldisc.close) + (tty->ldisc.close)(tty); + + /* Now set up the new line discipline. */ + tty->ldisc = ldiscs[ldisc]; + tty->termios->c_line = ldisc; + if (tty->ldisc.open) + retval = (tty->ldisc.open)(tty); + if (retval < 0) { + tty->ldisc = o_ldisc; + tty->termios->c_line = tty->ldisc.num; + if (tty->ldisc.open && (tty->ldisc.open(tty) < 0)) { + tty->ldisc = ldiscs[N_TTY]; + tty->termios->c_line = N_TTY; + if (tty->ldisc.open) { + int r = tty->ldisc.open(tty); + + if (r < 0) + panic("Couldn't open N_TTY ldisc for " + "%s --- error %d.", + tty_name(tty, buf), r); + } + } + } + if (tty->ldisc.num != o_ldisc.num && tty->driver.set_ldisc) + tty->driver.set_ldisc(tty); + return retval; +} + +/* + * This routine returns a tty driver structure, given a device number + */ +struct tty_driver *get_tty_driver(kdev_t device) +{ + int major, minor; + struct tty_driver *p; + + minor = MINOR(device); + major = MAJOR(device); + + for (p = tty_drivers; p; p = p->next) { + if (p->major != major) + continue; + if (minor < p->minor_start) + continue; + if (minor >= p->minor_start + p->num) + continue; + return p; + } + return NULL; +} + +/* + * If we try to write to, or set the state of, a terminal and we're + * not in the foreground, send a SIGTTOU. If the signal is blocked or + * ignored, go ahead and perform the operation. (POSIX 7.2) + */ +int tty_check_change(struct tty_struct * tty) +{ + if (current->tty != tty) + return 0; + if (tty->pgrp <= 0) { + printk(KERN_WARNING "tty_check_change: tty->pgrp <= 0!\n"); + return 0; + } + if (current->pgrp == tty->pgrp) + return 0; + if (is_ignored(SIGTTOU)) + return 0; + if (is_orphaned_pgrp(current->pgrp)) + return -EIO; + (void) kill_pg(current->pgrp,SIGTTOU,1); + return -ERESTARTSYS; +} + +static ssize_t hung_up_tty_read(struct file * file, char * buf, + size_t count, loff_t *ppos) +{ + /* Can't seek (pread) on ttys. */ + if (ppos != &file->f_pos) + return -ESPIPE; + return 0; +} + +static ssize_t hung_up_tty_write(struct file * file, const char * buf, + size_t count, loff_t *ppos) +{ + /* Can't seek (pwrite) on ttys. */ + if (ppos != &file->f_pos) + return -ESPIPE; + return -EIO; +} + +/* No kernel lock held - none needed ;) */ +static unsigned int hung_up_tty_poll(struct file * filp, poll_table * wait) +{ + return POLLIN | POLLOUT | POLLERR | POLLHUP | POLLRDNORM | POLLWRNORM; +} + +static int hung_up_tty_ioctl(struct inode * inode, struct file * file, + unsigned int cmd, unsigned long arg) +{ + return cmd == TIOCSPGRP ? -ENOTTY : -EIO; +} + +static struct file_operations tty_fops = { + llseek: no_llseek, + read: tty_read, + write: tty_write, + poll: tty_poll, + ioctl: tty_ioctl, + open: tty_open, + release: tty_release, + fasync: tty_fasync, +}; + +static struct file_operations hung_up_tty_fops = { + llseek: no_llseek, + read: hung_up_tty_read, + write: hung_up_tty_write, + poll: hung_up_tty_poll, + ioctl: hung_up_tty_ioctl, + release: tty_release, +}; + +/* + * This can be called by the "eventd" kernel thread. That is process synchronous, + * but doesn't hold any locks, so we need to make sure we have the appropriate + * locks for what we're doing.. + */ +void do_tty_hangup(void *data) +{ + struct tty_struct *tty = (struct tty_struct *) data; + struct file * cons_filp = NULL; + struct task_struct *p; + struct list_head *l; + int closecount = 0, n; + + if (!tty) + return; + + /* inuse_filps is protected by the single kernel lock */ + lock_kernel(); + + check_tty_count(tty, "do_tty_hangup"); + file_list_lock(); + for (l = tty->tty_files.next; l != &tty->tty_files; l = l->next) { + struct file * filp = list_entry(l, struct file, f_list); + if (filp->f_dentry->d_inode->i_rdev == CONSOLE_DEV || + filp->f_dentry->d_inode->i_rdev == SYSCONS_DEV) { + cons_filp = filp; + continue; + } + if (filp->f_op != &tty_fops) + continue; + closecount++; + tty_fasync(-1, filp, 0); /* can't block */ + filp->f_op = &hung_up_tty_fops; + } + file_list_unlock(); + + /* FIXME! What are the locking issues here? This may me overdoing things.. */ + { + unsigned long flags; + + save_flags(flags); cli(); + if (tty->ldisc.flush_buffer) + tty->ldisc.flush_buffer(tty); + if (tty->driver.flush_buffer) + tty->driver.flush_buffer(tty); + if ((test_bit(TTY_DO_WRITE_WAKEUP, &tty->flags)) && + tty->ldisc.write_wakeup) + (tty->ldisc.write_wakeup)(tty); + restore_flags(flags); + } + + wake_up_interruptible(&tty->write_wait); + wake_up_interruptible(&tty->read_wait); + + /* + * Shutdown the current line discipline, and reset it to + * N_TTY. + */ + if (tty->driver.flags & TTY_DRIVER_RESET_TERMIOS) + *tty->termios = tty->driver.init_termios; + if (tty->ldisc.num != ldiscs[N_TTY].num) { + if (tty->ldisc.close) + (tty->ldisc.close)(tty); + tty->ldisc = ldiscs[N_TTY]; + tty->termios->c_line = N_TTY; + if (tty->ldisc.open) { + int i = (tty->ldisc.open)(tty); + if (i < 0) + printk(KERN_ERR "do_tty_hangup: N_TTY open: " + "error %d\n", -i); + } + } + + read_lock(&tasklist_lock); + for_each_task(p) { + if ((tty->session > 0) && (p->session == tty->session) && + p->leader) { + send_sig(SIGHUP,p,1); + send_sig(SIGCONT,p,1); + if (tty->pgrp > 0) + p->tty_old_pgrp = tty->pgrp; + } + if (p->tty == tty) + p->tty = NULL; + } + read_unlock(&tasklist_lock); + + tty->flags = 0; + tty->session = 0; + tty->pgrp = -1; + tty->ctrl_status = 0; + /* + * If one of the devices matches a console pointer, we + * cannot just call hangup() because that will cause + * tty->count and state->count to go out of sync. + * So we just call close() the right number of times. + */ + if (cons_filp) { + if (tty->driver.close) + for (n = 0; n < closecount; n++) + tty->driver.close(tty, cons_filp); + } else if (tty->driver.hangup) + (tty->driver.hangup)(tty); + unlock_kernel(); +} + +void tty_hangup(struct tty_struct * tty) +{ +#ifdef TTY_DEBUG_HANGUP + char buf[64]; + + printk(KERN_DEBUG "%s hangup...\n", tty_name(tty, buf)); +#endif + schedule_task(&tty->tq_hangup); +} + +void tty_vhangup(struct tty_struct * tty) +{ +#ifdef TTY_DEBUG_HANGUP + char buf[64]; + + printk(KERN_DEBUG "%s vhangup...\n", tty_name(tty, buf)); +#endif + do_tty_hangup((void *) tty); +} + +int tty_hung_up_p(struct file * filp) +{ + return (filp->f_op == &hung_up_tty_fops); +} + +/* + * This function is typically called only by the session leader, when + * it wants to disassociate itself from its controlling tty. + * + * It performs the following functions: + * (1) Sends a SIGHUP and SIGCONT to the foreground process group + * (2) Clears the tty from being controlling the session + * (3) Clears the controlling tty for all processes in the + * session group. + * + * The argument on_exit is set to 1 if called when a process is + * exiting; it is 0 if called by the ioctl TIOCNOTTY. + */ +void disassociate_ctty(int on_exit) +{ + struct tty_struct *tty = current->tty; + struct task_struct *p; + int tty_pgrp = -1; + + if (tty) { + tty_pgrp = tty->pgrp; + if (on_exit && tty->driver.type != TTY_DRIVER_TYPE_PTY) + tty_vhangup(tty); + } else { + if (current->tty_old_pgrp) { + kill_pg(current->tty_old_pgrp, SIGHUP, on_exit); + kill_pg(current->tty_old_pgrp, SIGCONT, on_exit); + } + return; + } + if (tty_pgrp > 0) { + kill_pg(tty_pgrp, SIGHUP, on_exit); + if (!on_exit) + kill_pg(tty_pgrp, SIGCONT, on_exit); + } + + current->tty_old_pgrp = 0; + tty->session = 0; + tty->pgrp = -1; + + read_lock(&tasklist_lock); + for_each_task(p) + if (p->session == current->session) + p->tty = NULL; + read_unlock(&tasklist_lock); +} + +void wait_for_keypress(void) +{ + struct console *c = console_drivers; + if (c) c->wait_key(c); +} + +void stop_tty(struct tty_struct *tty) +{ + if (tty->stopped) + return; + tty->stopped = 1; + if (tty->link && tty->link->packet) { + tty->ctrl_status &= ~TIOCPKT_START; + tty->ctrl_status |= TIOCPKT_STOP; + wake_up_interruptible(&tty->link->read_wait); + } + if (tty->driver.stop) + (tty->driver.stop)(tty); +} + +void start_tty(struct tty_struct *tty) +{ + if (!tty->stopped || tty->flow_stopped) + return; + tty->stopped = 0; + if (tty->link && tty->link->packet) { + tty->ctrl_status &= ~TIOCPKT_STOP; + tty->ctrl_status |= TIOCPKT_START; + wake_up_interruptible(&tty->link->read_wait); + } + if (tty->driver.start) + (tty->driver.start)(tty); + if ((test_bit(TTY_DO_WRITE_WAKEUP, &tty->flags)) && + tty->ldisc.write_wakeup) + (tty->ldisc.write_wakeup)(tty); + wake_up_interruptible(&tty->write_wait); +} + +static ssize_t tty_read(struct file * file, char * buf, size_t count, + loff_t *ppos) +{ + int i; + struct tty_struct * tty; + struct inode *inode; + + /* Can't seek (pread) on ttys. */ + if (ppos != &file->f_pos) + return -ESPIPE; + + tty = (struct tty_struct *)file->private_data; + inode = file->f_dentry->d_inode; + if (tty_paranoia_check(tty, inode->i_rdev, "tty_read")) + return -EIO; + if (!tty || (test_bit(TTY_IO_ERROR, &tty->flags))) + return -EIO; + + /* This check not only needs to be done before reading, but also + whenever read_chan() gets woken up after sleeping, so I've + moved it to there. This should only be done for the N_TTY + line discipline, anyway. Same goes for write_chan(). -- jlc. */ +#if 0 + if ((inode->i_rdev != CONSOLE_DEV) && /* don't stop on /dev/console */ + (tty->pgrp > 0) && + (current->tty == tty) && + (tty->pgrp != current->pgrp)) + if (is_ignored(SIGTTIN) || is_orphaned_pgrp(current->pgrp)) + return -EIO; + else { + (void) kill_pg(current->pgrp, SIGTTIN, 1); + return -ERESTARTSYS; + } +#endif + lock_kernel(); + if (tty->ldisc.read) + i = (tty->ldisc.read)(tty,file,buf,count); + else + i = -EIO; + unlock_kernel(); + if (i > 0) + inode->i_atime = CURRENT_TIME; + return i; +} + +/* + * Split writes up in sane blocksizes to avoid + * denial-of-service type attacks + */ +static inline ssize_t do_tty_write( + ssize_t (*write)(struct tty_struct *, struct file *, const unsigned char *, size_t), + struct tty_struct *tty, + struct file *file, + const unsigned char *buf, + size_t count) +{ + ssize_t ret = 0, written = 0; + + if (down_interruptible(&tty->atomic_write)) { + return -ERESTARTSYS; + } + if ( test_bit(TTY_NO_WRITE_SPLIT, &tty->flags) ) { + lock_kernel(); + written = write(tty, file, buf, count); + unlock_kernel(); + } else { + for (;;) { + unsigned long size = MAX(PAGE_SIZE*2,16384); + if (size > count) + size = count; + lock_kernel(); + ret = write(tty, file, buf, size); + unlock_kernel(); + if (ret <= 0) + break; + written += ret; + buf += ret; + count -= ret; + if (!count) + break; + ret = -ERESTARTSYS; + if (signal_pending(current)) + break; + if (current->need_resched) + schedule(); + } + } + if (written) { + file->f_dentry->d_inode->i_mtime = CURRENT_TIME; + ret = written; + } + up(&tty->atomic_write); + return ret; +} + + +static ssize_t tty_write(struct file * file, const char * buf, size_t count, + loff_t *ppos) +{ + int is_console; + struct tty_struct * tty; + struct inode *inode; + + /* Can't seek (pwrite) on ttys. */ + if (ppos != &file->f_pos) + return -ESPIPE; + + /* + * For now, we redirect writes from /dev/console as + * well as /dev/tty0. + */ + inode = file->f_dentry->d_inode; + is_console = (inode->i_rdev == SYSCONS_DEV || + inode->i_rdev == CONSOLE_DEV); + + if (is_console && redirect) + tty = redirect; + else + tty = (struct tty_struct *)file->private_data; + if (tty_paranoia_check(tty, inode->i_rdev, "tty_write")) + return -EIO; + if (!tty || !tty->driver.write || (test_bit(TTY_IO_ERROR, &tty->flags))) + return -EIO; +#if 0 + if (!is_console && L_TOSTOP(tty) && (tty->pgrp > 0) && + (current->tty == tty) && (tty->pgrp != current->pgrp)) { + if (is_orphaned_pgrp(current->pgrp)) + return -EIO; + if (!is_ignored(SIGTTOU)) { + (void) kill_pg(current->pgrp, SIGTTOU, 1); + return -ERESTARTSYS; + } + } +#endif + if (!tty->ldisc.write) + return -EIO; + return do_tty_write(tty->ldisc.write, tty, file, + (const unsigned char *)buf, count); +} + +/* Semaphore to protect creating and releasing a tty */ +static DECLARE_MUTEX(tty_sem); + +static void down_tty_sem(int index) +{ + down(&tty_sem); +} + +static void up_tty_sem(int index) +{ + up(&tty_sem); +} + +static void release_mem(struct tty_struct *tty, int idx); + +/* + * WSH 06/09/97: Rewritten to remove races and properly clean up after a + * failed open. The new code protects the open with a semaphore, so it's + * really quite straightforward. The semaphore locking can probably be + * relaxed for the (most common) case of reopening a tty. + */ +static int init_dev(kdev_t device, struct tty_struct **ret_tty) +{ + struct tty_struct *tty, *o_tty; + struct termios *tp, **tp_loc, *o_tp, **o_tp_loc; + struct termios *ltp, **ltp_loc, *o_ltp, **o_ltp_loc; + struct tty_driver *driver; + int retval=0; + int idx; + + driver = get_tty_driver(device); + if (!driver) + return -ENODEV; + + idx = MINOR(device) - driver->minor_start; + + /* + * Check whether we need to acquire the tty semaphore to avoid + * race conditions. For now, play it safe. + */ + down_tty_sem(idx); + + /* check whether we're reopening an existing tty */ + tty = driver->table[idx]; + if (tty) goto fast_track; + + /* + * First time open is complex, especially for PTY devices. + * This code guarantees that either everything succeeds and the + * TTY is ready for operation, or else the table slots are vacated + * and the allocated memory released. (Except that the termios + * and locked termios may be retained.) + */ + + o_tty = NULL; + tp = o_tp = NULL; + ltp = o_ltp = NULL; + + tty = alloc_tty_struct(); + if(!tty) + goto fail_no_mem; + initialize_tty_struct(tty); + tty->device = device; + tty->driver = *driver; + + tp_loc = &driver->termios[idx]; + if (!*tp_loc) { + tp = (struct termios *) kmalloc(sizeof(struct termios), + GFP_KERNEL); + if (!tp) + goto free_mem_out; + *tp = driver->init_termios; + } + + ltp_loc = &driver->termios_locked[idx]; + if (!*ltp_loc) { + ltp = (struct termios *) kmalloc(sizeof(struct termios), + GFP_KERNEL); + if (!ltp) + goto free_mem_out; + memset(ltp, 0, sizeof(struct termios)); + } + + if (driver->type == TTY_DRIVER_TYPE_PTY) { + o_tty = alloc_tty_struct(); + if (!o_tty) + goto free_mem_out; + initialize_tty_struct(o_tty); + o_tty->device = (kdev_t) MKDEV(driver->other->major, + driver->other->minor_start + idx); + o_tty->driver = *driver->other; + + o_tp_loc = &driver->other->termios[idx]; + if (!*o_tp_loc) { + o_tp = (struct termios *) + kmalloc(sizeof(struct termios), GFP_KERNEL); + if (!o_tp) + goto free_mem_out; + *o_tp = driver->other->init_termios; + } + + o_ltp_loc = &driver->other->termios_locked[idx]; + if (!*o_ltp_loc) { + o_ltp = (struct termios *) + kmalloc(sizeof(struct termios), GFP_KERNEL); + if (!o_ltp) + goto free_mem_out; + memset(o_ltp, 0, sizeof(struct termios)); + } + + /* + * Everything allocated ... set up the o_tty structure. + */ + driver->other->table[idx] = o_tty; + if (!*o_tp_loc) + *o_tp_loc = o_tp; + if (!*o_ltp_loc) + *o_ltp_loc = o_ltp; + o_tty->termios = *o_tp_loc; + o_tty->termios_locked = *o_ltp_loc; + (*driver->other->refcount)++; + if (driver->subtype == PTY_TYPE_MASTER) + o_tty->count++; + + /* Establish the links in both directions */ + tty->link = o_tty; + o_tty->link = tty; + } + + /* + * All structures have been allocated, so now we install them. + * Failures after this point use release_mem to clean up, so + * there's no need to null out the local pointers. + */ + driver->table[idx] = tty; + + if (!*tp_loc) + *tp_loc = tp; + if (!*ltp_loc) + *ltp_loc = ltp; + tty->termios = *tp_loc; + tty->termios_locked = *ltp_loc; + (*driver->refcount)++; + tty->count++; + + /* + * Structures all installed ... call the ldisc open routines. + * If we fail here just call release_mem to clean up. No need + * to decrement the use counts, as release_mem doesn't care. + */ + if (tty->ldisc.open) { + retval = (tty->ldisc.open)(tty); + if (retval) + goto release_mem_out; + } + if (o_tty && o_tty->ldisc.open) { + retval = (o_tty->ldisc.open)(o_tty); + if (retval) { + if (tty->ldisc.close) + (tty->ldisc.close)(tty); + goto release_mem_out; + } + } + goto success; + + /* + * This fast open can be used if the tty is already open. + * No memory is allocated, and the only failures are from + * attempting to open a closing tty or attempting multiple + * opens on a pty master. + */ +fast_track: + if (test_bit(TTY_CLOSING, &tty->flags)) { + retval = -EIO; + goto end_init; + } + if (driver->type == TTY_DRIVER_TYPE_PTY && + driver->subtype == PTY_TYPE_MASTER) { + /* + * special case for PTY masters: only one open permitted, + * and the slave side open count is incremented as well. + */ + if (tty->count) { + retval = -EIO; + goto end_init; + } + tty->link->count++; + } + tty->count++; + tty->driver = *driver; /* N.B. why do this every time?? */ + +success: + *ret_tty = tty; + + /* All paths come through here to release the semaphore */ +end_init: + up_tty_sem(idx); + return retval; + + /* Release locally allocated memory ... nothing placed in slots */ +free_mem_out: + if (o_tp) + kfree(o_tp); + if (o_tty) + free_tty_struct(o_tty); + if (ltp) + kfree(ltp); + if (tp) + kfree(tp); + free_tty_struct(tty); + +fail_no_mem: + retval = -ENOMEM; + goto end_init; + + /* call the tty release_mem routine to clean out this slot */ +release_mem_out: + printk(KERN_INFO "init_dev: ldisc open failed, " + "clearing slot %d\n", idx); + release_mem(tty, idx); + goto end_init; +} + +/* + * Releases memory associated with a tty structure, and clears out the + * driver table slots. + */ +static void release_mem(struct tty_struct *tty, int idx) +{ + struct tty_struct *o_tty; + struct termios *tp; + + if ((o_tty = tty->link) != NULL) { + o_tty->driver.table[idx] = NULL; + if (o_tty->driver.flags & TTY_DRIVER_RESET_TERMIOS) { + tp = o_tty->driver.termios[idx]; + o_tty->driver.termios[idx] = NULL; + kfree(tp); + } + o_tty->magic = 0; + (*o_tty->driver.refcount)--; + list_del(&o_tty->tty_files); + free_tty_struct(o_tty); + } + + tty->driver.table[idx] = NULL; + if (tty->driver.flags & TTY_DRIVER_RESET_TERMIOS) { + tp = tty->driver.termios[idx]; + tty->driver.termios[idx] = NULL; + kfree(tp); + } + tty->magic = 0; + (*tty->driver.refcount)--; + list_del(&tty->tty_files); + free_tty_struct(tty); +} + +/* + * Even releasing the tty structures is a tricky business.. We have + * to be very careful that the structures are all released at the + * same time, as interrupts might otherwise get the wrong pointers. + * + * WSH 09/09/97: rewritten to avoid some nasty race conditions that could + * lead to double frees or releasing memory still in use. + */ +static void release_dev(struct file * filp) +{ + struct tty_struct *tty, *o_tty; + int pty_master, tty_closing, o_tty_closing, do_sleep; + int idx; + char buf[64]; + + tty = (struct tty_struct *)filp->private_data; + if (tty_paranoia_check(tty, filp->f_dentry->d_inode->i_rdev, "release_dev")) + return; + + check_tty_count(tty, "release_dev"); + + tty_fasync(-1, filp, 0); + + idx = MINOR(tty->device) - tty->driver.minor_start; + pty_master = (tty->driver.type == TTY_DRIVER_TYPE_PTY && + tty->driver.subtype == PTY_TYPE_MASTER); + o_tty = tty->link; + +#ifdef TTY_PARANOIA_CHECK + if (idx < 0 || idx >= tty->driver.num) { + printk(KERN_DEBUG "release_dev: bad idx when trying to " + "free (%s)\n", kdevname(tty->device)); + return; + } + if (tty != tty->driver.table[idx]) { + printk(KERN_DEBUG "release_dev: driver.table[%d] not tty " + "for (%s)\n", idx, kdevname(tty->device)); + return; + } + if (tty->termios != tty->driver.termios[idx]) { + printk(KERN_DEBUG "release_dev: driver.termios[%d] not termios " + "for (%s)\n", + idx, kdevname(tty->device)); + return; + } + if (tty->termios_locked != tty->driver.termios_locked[idx]) { + printk(KERN_DEBUG "release_dev: driver.termios_locked[%d] not " + "termios_locked for (%s)\n", + idx, kdevname(tty->device)); + return; + } +#endif + +#ifdef TTY_DEBUG_HANGUP + printk(KERN_DEBUG "release_dev of %s (tty count=%d)...", + tty_name(tty, buf), tty->count); +#endif + +#ifdef TTY_PARANOIA_CHECK + if (tty->driver.other) { + if (o_tty != tty->driver.other->table[idx]) { + printk(KERN_DEBUG "release_dev: other->table[%d] " + "not o_tty for (%s)\n", + idx, kdevname(tty->device)); + return; + } + if (o_tty->termios != tty->driver.other->termios[idx]) { + printk(KERN_DEBUG "release_dev: other->termios[%d] " + "not o_termios for (%s)\n", + idx, kdevname(tty->device)); + return; + } + if (o_tty->termios_locked != + tty->driver.other->termios_locked[idx]) { + printk(KERN_DEBUG "release_dev: other->termios_locked[" + "%d] not o_termios_locked for (%s)\n", + idx, kdevname(tty->device)); + return; + } + if (o_tty->link != tty) { + printk(KERN_DEBUG "release_dev: bad pty pointers\n"); + return; + } + } +#endif + + if (tty->driver.close) + tty->driver.close(tty, filp); + + /* + * Sanity check: if tty->count is going to zero, there shouldn't be + * any waiters on tty->read_wait or tty->write_wait. We test the + * wait queues and kick everyone out _before_ actually starting to + * close. This ensures that we won't block while releasing the tty + * structure. + * + * The test for the o_tty closing is necessary, since the master and + * slave sides may close in any order. If the slave side closes out + * first, its count will be one, since the master side holds an open. + * Thus this test wouldn't be triggered at the time the slave closes, + * so we do it now. + * + * Note that it's possible for the tty to be opened again while we're + * flushing out waiters. By recalculating the closing flags before + * each iteration we avoid any problems. + */ + while (1) { + tty_closing = tty->count <= 1; + o_tty_closing = o_tty && + (o_tty->count <= (pty_master ? 1 : 0)); + do_sleep = 0; + + if (tty_closing) { + if (waitqueue_active(&tty->read_wait)) { + wake_up(&tty->read_wait); + do_sleep++; + } + if (waitqueue_active(&tty->write_wait)) { + wake_up(&tty->write_wait); + do_sleep++; + } + } + if (o_tty_closing) { + if (waitqueue_active(&o_tty->read_wait)) { + wake_up(&o_tty->read_wait); + do_sleep++; + } + if (waitqueue_active(&o_tty->write_wait)) { + wake_up(&o_tty->write_wait); + do_sleep++; + } + } + if (!do_sleep) + break; + + printk(KERN_WARNING "release_dev: %s: read/write wait queue " + "active!\n", tty_name(tty, buf)); + schedule(); + } + + /* + * The closing flags are now consistent with the open counts on + * both sides, and we've completed the last operation that could + * block, so it's safe to proceed with closing. + */ + if (pty_master) { + if (--o_tty->count < 0) { + printk(KERN_WARNING "release_dev: bad pty slave count " + "(%d) for %s\n", + o_tty->count, tty_name(o_tty, buf)); + o_tty->count = 0; + } + } + if (--tty->count < 0) { + printk(KERN_WARNING "release_dev: bad tty->count (%d) for %s\n", + tty->count, tty_name(tty, buf)); + tty->count = 0; + } + + /* + * We've decremented tty->count, so we should zero out + * filp->private_data, to break the link between the tty and + * the file descriptor. Otherwise if filp_close() blocks before + * the file descriptor is removed from the inuse_filp + * list, check_tty_count() could observe a discrepancy and + * printk a warning message to the user. + */ + filp->private_data = 0; + + /* + * Perform some housekeeping before deciding whether to return. + * + * Set the TTY_CLOSING flag if this was the last open. In the + * case of a pty we may have to wait around for the other side + * to close, and TTY_CLOSING makes sure we can't be reopened. + */ + if(tty_closing) + set_bit(TTY_CLOSING, &tty->flags); + if(o_tty_closing) + set_bit(TTY_CLOSING, &o_tty->flags); + + /* + * If _either_ side is closing, make sure there aren't any + * processes that still think tty or o_tty is their controlling + * tty. Also, clear redirect if it points to either tty. + */ + if (tty_closing || o_tty_closing) { + struct task_struct *p; + + read_lock(&tasklist_lock); + for_each_task(p) { + if (p->tty == tty || (o_tty && p->tty == o_tty)) + p->tty = NULL; + } + read_unlock(&tasklist_lock); + + if (redirect == tty || (o_tty && redirect == o_tty)) + redirect = NULL; + } + + /* check whether both sides are closing ... */ + if (!tty_closing || (o_tty && !o_tty_closing)) + return; + +#ifdef TTY_DEBUG_HANGUP + printk(KERN_DEBUG "freeing tty structure..."); +#endif + + /* + * Shutdown the current line discipline, and reset it to N_TTY. + * N.B. why reset ldisc when we're releasing the memory?? + */ + if (tty->ldisc.close) + (tty->ldisc.close)(tty); + tty->ldisc = ldiscs[N_TTY]; + tty->termios->c_line = N_TTY; + if (o_tty) { + if (o_tty->ldisc.close) + (o_tty->ldisc.close)(o_tty); + o_tty->ldisc = ldiscs[N_TTY]; + } + + /* + * Make sure that the tty's task queue isn't activated. + */ + run_task_queue(&tq_timer); + flush_scheduled_tasks(); + + /* + * The release_mem function takes care of the details of clearing + * the slots and preserving the termios structure. + */ + release_mem(tty, idx); +} + +/* + * tty_open and tty_release keep up the tty count that contains the + * number of opens done on a tty. We cannot use the inode-count, as + * different inodes might point to the same tty. + * + * Open-counting is needed for pty masters, as well as for keeping + * track of serial lines: DTR is dropped when the last close happens. + * (This is not done solely through tty->count, now. - Ted 1/27/92) + * + * The termios state of a pty is reset on first open so that + * settings don't persist across reuse. + */ +static int tty_open(struct inode * inode, struct file * filp) +{ + struct tty_struct *tty; + int noctty, retval; + kdev_t device; + unsigned short saved_flags; + char buf[64]; + + saved_flags = filp->f_flags; +retry_open: + noctty = filp->f_flags & O_NOCTTY; + device = inode->i_rdev; + if (device == TTY_DEV) { + if (!current->tty) + return -ENXIO; + device = current->tty->device; + filp->f_flags |= O_NONBLOCK; /* Don't let /dev/tty block */ + /* noctty = 1; */ + } +#ifdef CONFIG_VT + if (device == CONSOLE_DEV) { + extern int fg_console; + device = MKDEV(TTY_MAJOR, fg_console + 1); + noctty = 1; + } +#endif + if (device == SYSCONS_DEV) { + struct console *c = console_drivers; + while(c && !c->device) + c = c->next; + if (!c) + return -ENODEV; + device = c->device(c); + filp->f_flags |= O_NONBLOCK; /* Don't let /dev/console block */ + noctty = 1; + } + + if (device == PTMX_DEV) { +#ifdef CONFIG_UNIX98_PTYS + + /* find a free pty. */ + int major, minor; + struct tty_driver *driver; + + /* find a device that is not in use. */ + retval = -1; + for ( major = 0 ; major < UNIX98_NR_MAJORS ; major++ ) { + driver = &ptm_driver[major]; + for (minor = driver->minor_start ; + minor < driver->minor_start + driver->num ; + minor++) { + device = MKDEV(driver->major, minor); + if (!init_dev(device, &tty)) goto ptmx_found; /* ok! */ + } + } + return -EIO; /* no free ptys */ + ptmx_found: + set_bit(TTY_PTY_LOCK, &tty->flags); /* LOCK THE SLAVE */ + minor -= driver->minor_start; + devpts_pty_new(driver->other->name_base + minor, MKDEV(driver->other->major, minor + driver->other->minor_start)); + tty_register_devfs(&pts_driver[major], DEVFS_FL_NO_PERSISTENCE, + pts_driver[major].minor_start + minor); + noctty = 1; + goto init_dev_done; + +#else /* CONFIG_UNIX_98_PTYS */ + + return -ENODEV; + +#endif /* CONFIG_UNIX_98_PTYS */ + } + + retval = init_dev(device, &tty); + if (retval) + return retval; + +#ifdef CONFIG_UNIX98_PTYS +init_dev_done: +#endif + filp->private_data = tty; + file_move(filp, &tty->tty_files); + check_tty_count(tty, "tty_open"); + if (tty->driver.type == TTY_DRIVER_TYPE_PTY && + tty->driver.subtype == PTY_TYPE_MASTER) + noctty = 1; +#ifdef TTY_DEBUG_HANGUP + printk(KERN_DEBUG "opening %s...", tty_name(tty, buf)); +#endif + if (tty->driver.open) + retval = tty->driver.open(tty, filp); + else + retval = -ENODEV; + filp->f_flags = saved_flags; + + if (!retval && test_bit(TTY_EXCLUSIVE, &tty->flags) && !suser()) + retval = -EBUSY; + + if (retval) { +#ifdef TTY_DEBUG_HANGUP + printk(KERN_DEBUG "error %d in opening %s...", retval, + tty_name(tty, buf)); +#endif + + release_dev(filp); + if (retval != -ERESTARTSYS) + return retval; + if (signal_pending(current)) + return retval; + schedule(); + /* + * Need to reset f_op in case a hangup happened. + */ + filp->f_op = &tty_fops; + goto retry_open; + } + if (!noctty && + current->leader && + !current->tty && + tty->session == 0) { + task_lock(current); + current->tty = tty; + task_unlock(current); + current->tty_old_pgrp = 0; + tty->session = current->session; + tty->pgrp = current->pgrp; + } + if ((tty->driver.type == TTY_DRIVER_TYPE_SERIAL) && + (tty->driver.subtype == SERIAL_TYPE_CALLOUT) && + (tty->count == 1)) { + static int nr_warns; + if (nr_warns < 5) { + printk(KERN_WARNING "tty_io.c: " + "process %d (%s) used obsolete /dev/%s - " + "update software to use /dev/ttyS%d\n", + current->pid, current->comm, + tty_name(tty, buf), TTY_NUMBER(tty)); + nr_warns++; + } + } + return 0; +} + +static int tty_release(struct inode * inode, struct file * filp) +{ + lock_kernel(); + release_dev(filp); + unlock_kernel(); + return 0; +} + +/* No kernel lock held - fine */ +static unsigned int tty_poll(struct file * filp, poll_table * wait) +{ + struct tty_struct * tty; + + tty = (struct tty_struct *)filp->private_data; + if (tty_paranoia_check(tty, filp->f_dentry->d_inode->i_rdev, "tty_poll")) + return 0; + + if (tty->ldisc.poll) + return (tty->ldisc.poll)(tty, filp, wait); + return 0; +} + +static int tty_fasync(int fd, struct file * filp, int on) +{ + struct tty_struct * tty; + int retval; + + tty = (struct tty_struct *)filp->private_data; + if (tty_paranoia_check(tty, filp->f_dentry->d_inode->i_rdev, "tty_fasync")) + return 0; + + retval = fasync_helper(fd, filp, on, &tty->fasync); + if (retval <= 0) + return retval; + + if (on) { + if (!waitqueue_active(&tty->read_wait)) + tty->minimum_to_wake = 1; + if (filp->f_owner.pid == 0) { + filp->f_owner.pid = (-tty->pgrp) ? : current->pid; + filp->f_owner.uid = current->uid; + filp->f_owner.euid = current->euid; + } + } else { + if (!tty->fasync && !waitqueue_active(&tty->read_wait)) + tty->minimum_to_wake = N_TTY_BUF_SIZE; + } + return 0; +} + +static int tiocsti(struct tty_struct *tty, char * arg) +{ + char ch, mbz = 0; + + if ((current->tty != tty) && !suser()) + return -EPERM; + if (get_user(ch, arg)) + return -EFAULT; + tty->ldisc.receive_buf(tty, &ch, &mbz, 1); + return 0; +} + +static int tiocgwinsz(struct tty_struct *tty, struct winsize * arg) +{ + if (copy_to_user(arg, &tty->winsize, sizeof(*arg))) + return -EFAULT; + return 0; +} + +static int tiocswinsz(struct tty_struct *tty, struct tty_struct *real_tty, + struct winsize * arg) +{ + struct winsize tmp_ws; + + if (copy_from_user(&tmp_ws, arg, sizeof(*arg))) + return -EFAULT; + if (!memcmp(&tmp_ws, &tty->winsize, sizeof(*arg))) + return 0; + if (tty->pgrp > 0) + kill_pg(tty->pgrp, SIGWINCH, 1); + if ((real_tty->pgrp != tty->pgrp) && (real_tty->pgrp > 0)) + kill_pg(real_tty->pgrp, SIGWINCH, 1); + tty->winsize = tmp_ws; + real_tty->winsize = tmp_ws; + return 0; +} + +static int tioccons(struct inode *inode, + struct tty_struct *tty, struct tty_struct *real_tty) +{ + if (inode->i_rdev == SYSCONS_DEV || + inode->i_rdev == CONSOLE_DEV) { + if (!suser()) + return -EPERM; + redirect = NULL; + return 0; + } + if (redirect) + return -EBUSY; + redirect = real_tty; + return 0; +} + + +static int fionbio(struct file *file, int *arg) +{ + int nonblock; + + if (get_user(nonblock, arg)) + return -EFAULT; + + if (nonblock) + file->f_flags |= O_NONBLOCK; + else + file->f_flags &= ~O_NONBLOCK; + return 0; +} + +static int tiocsctty(struct tty_struct *tty, int arg) +{ + if (current->leader && + (current->session == tty->session)) + return 0; + /* + * The process must be a session leader and + * not have a controlling tty already. + */ + if (!current->leader || current->tty) + return -EPERM; + if (tty->session > 0) { + /* + * This tty is already the controlling + * tty for another session group! + */ + if ((arg == 1) && suser()) { + /* + * Steal it away + */ + struct task_struct *p; + + read_lock(&tasklist_lock); + for_each_task(p) + if (p->tty == tty) + p->tty = NULL; + read_unlock(&tasklist_lock); + } else + return -EPERM; + } + task_lock(current); + current->tty = tty; + task_unlock(current); + current->tty_old_pgrp = 0; + tty->session = current->session; + tty->pgrp = current->pgrp; + return 0; +} + +static int tiocgpgrp(struct tty_struct *tty, struct tty_struct *real_tty, pid_t *arg) +{ + /* + * (tty == real_tty) is a cheap way of + * testing if the tty is NOT a master pty. + */ + if (tty == real_tty && current->tty != real_tty) + return -ENOTTY; + return put_user(real_tty->pgrp, arg); +} + +static int tiocspgrp(struct tty_struct *tty, struct tty_struct *real_tty, pid_t *arg) +{ + pid_t pgrp; + int retval = tty_check_change(real_tty); + + if (retval == -EIO) + return -ENOTTY; + if (retval) + return retval; + if (!current->tty || + (current->tty != real_tty) || + (real_tty->session != current->session)) + return -ENOTTY; + if (get_user(pgrp, (pid_t *) arg)) + return -EFAULT; + if (pgrp < 0) + return -EINVAL; + if (session_of_pgrp(pgrp) != current->session) + return -EPERM; + real_tty->pgrp = pgrp; + return 0; +} + +static int tiocgsid(struct tty_struct *tty, struct tty_struct *real_tty, pid_t *arg) +{ + /* + * (tty == real_tty) is a cheap way of + * testing if the tty is NOT a master pty. + */ + if (tty == real_tty && current->tty != real_tty) + return -ENOTTY; + if (real_tty->session <= 0) + return -ENOTTY; + return put_user(real_tty->session, arg); +} + +static int tiocttygstruct(struct tty_struct *tty, struct tty_struct *arg) +{ + if (copy_to_user(arg, tty, sizeof(*arg))) + return -EFAULT; + return 0; +} + +static int tiocsetd(struct tty_struct *tty, int *arg) +{ + int ldisc; + + if (get_user(ldisc, arg)) + return -EFAULT; + return tty_set_ldisc(tty, ldisc); +} + +static int send_break(struct tty_struct *tty, int duration) +{ + set_current_state(TASK_INTERRUPTIBLE); + + tty->driver.break_ctl(tty, -1); + if (!signal_pending(current)) + schedule_timeout(duration); + tty->driver.break_ctl(tty, 0); + if (signal_pending(current)) + return -EINTR; + return 0; +} + +/* + * Split this up, as gcc can choke on it otherwise.. + */ +int tty_ioctl(struct inode * inode, struct file * file, + unsigned int cmd, unsigned long arg) +{ + struct tty_struct *tty, *real_tty; + int retval; + + tty = (struct tty_struct *)file->private_data; + if (tty_paranoia_check(tty, inode->i_rdev, "tty_ioctl")) + return -EINVAL; + + real_tty = tty; + if (tty->driver.type == TTY_DRIVER_TYPE_PTY && + tty->driver.subtype == PTY_TYPE_MASTER) + real_tty = tty->link; + + /* + * Break handling by driver + */ + if (!tty->driver.break_ctl) { + switch(cmd) { + case TIOCSBRK: + case TIOCCBRK: + if (tty->driver.ioctl) + return tty->driver.ioctl(tty, file, cmd, arg); + return -EINVAL; + + /* These two ioctl's always return success; even if */ + /* the driver doesn't support them. */ + case TCSBRK: + case TCSBRKP: + if (!tty->driver.ioctl) + return 0; + retval = tty->driver.ioctl(tty, file, cmd, arg); + if (retval == -ENOIOCTLCMD) + retval = 0; + return retval; + } + } + + /* + * Factor out some common prep work + */ + switch (cmd) { + case TIOCSETD: + case TIOCSBRK: + case TIOCCBRK: + case TCSBRK: + case TCSBRKP: + retval = tty_check_change(tty); + if (retval) + return retval; + if (cmd != TIOCCBRK) { + tty_wait_until_sent(tty, 0); + if (signal_pending(current)) + return -EINTR; + } + break; + } + + switch (cmd) { + case TIOCSTI: + return tiocsti(tty, (char *)arg); + case TIOCGWINSZ: + return tiocgwinsz(tty, (struct winsize *) arg); + case TIOCSWINSZ: + return tiocswinsz(tty, real_tty, (struct winsize *) arg); + case TIOCCONS: + return tioccons(inode, tty, real_tty); + case FIONBIO: + return fionbio(file, (int *) arg); + case TIOCEXCL: + set_bit(TTY_EXCLUSIVE, &tty->flags); + return 0; + case TIOCNXCL: + clear_bit(TTY_EXCLUSIVE, &tty->flags); + return 0; + case TIOCNOTTY: + if (current->tty != tty) + return -ENOTTY; + if (current->leader) + disassociate_ctty(0); + task_lock(current); + current->tty = NULL; + task_unlock(current); + return 0; + case TIOCSCTTY: + return tiocsctty(tty, arg); + case TIOCGPGRP: + return tiocgpgrp(tty, real_tty, (pid_t *) arg); + case TIOCSPGRP: + return tiocspgrp(tty, real_tty, (pid_t *) arg); + case TIOCGSID: + return tiocgsid(tty, real_tty, (pid_t *) arg); + case TIOCGETD: + return put_user(tty->ldisc.num, (int *) arg); + case TIOCSETD: + return tiocsetd(tty, (int *) arg); +#ifdef CONFIG_VT + case TIOCLINUX: + return tioclinux(tty, arg); +#endif + case TIOCTTYGSTRUCT: + return tiocttygstruct(tty, (struct tty_struct *) arg); + + /* + * Break handling + */ + case TIOCSBRK: /* Turn break on, unconditionally */ + tty->driver.break_ctl(tty, -1); + return 0; + + case TIOCCBRK: /* Turn break off, unconditionally */ + tty->driver.break_ctl(tty, 0); + return 0; + case TCSBRK: /* SVID version: non-zero arg --> no break */ + /* + * XXX is the above comment correct, or the + * code below correct? Is this ioctl used at + * all by anyone? + */ + if (!arg) + return send_break(tty, HZ/4); + return 0; + case TCSBRKP: /* support for POSIX tcsendbreak() */ + return send_break(tty, arg ? arg*(HZ/10) : HZ/4); + } + if (tty->driver.ioctl) { + int retval = (tty->driver.ioctl)(tty, file, cmd, arg); + if (retval != -ENOIOCTLCMD) + return retval; + } + if (tty->ldisc.ioctl) { + int retval = (tty->ldisc.ioctl)(tty, file, cmd, arg); + if (retval != -ENOIOCTLCMD) + return retval; + } + return -EINVAL; +} + + +/* + * This implements the "Secure Attention Key" --- the idea is to + * prevent trojan horses by killing all processes associated with this + * tty when the user hits the "Secure Attention Key". Required for + * super-paranoid applications --- see the Orange Book for more details. + * + * This code could be nicer; ideally it should send a HUP, wait a few + * seconds, then send a INT, and then a KILL signal. But you then + * have to coordinate with the init process, since all processes associated + * with the current tty must be dead before the new getty is allowed + * to spawn. + * + * Now, if it would be correct ;-/ The current code has a nasty hole - + * it doesn't catch files in flight. We may send the descriptor to ourselves + * via AF_UNIX socket, close it and later fetch from socket. FIXME. + * + * Nasty bug: do_SAK is being called in interrupt context. This can + * deadlock. We punt it up to process context. AKPM - 16Mar2001 + */ +static void __do_SAK(void *arg) +{ +#ifdef TTY_SOFT_SAK + tty_hangup(tty); +#else + struct tty_struct *tty = arg; + struct task_struct *p; + int session; + int i; + struct file *filp; + + if (!tty) + return; + session = tty->session; + if (tty->ldisc.flush_buffer) + tty->ldisc.flush_buffer(tty); + if (tty->driver.flush_buffer) + tty->driver.flush_buffer(tty); + read_lock(&tasklist_lock); + for_each_task(p) { + if ((p->tty == tty) || + ((session > 0) && (p->session == session))) { + send_sig(SIGKILL, p, 1); + continue; + } + task_lock(p); + if (p->files) { + read_lock(&p->files->file_lock); + for (i=0; i < p->files->max_fds; i++) { + filp = fcheck_files(p->files, i); + if (filp && (filp->f_op == &tty_fops) && + (filp->private_data == tty)) { + send_sig(SIGKILL, p, 1); + break; + } + } + read_unlock(&p->files->file_lock); + } + task_unlock(p); + } + read_unlock(&tasklist_lock); +#endif +} + +/* + * The tq handling here is a little racy - tty->SAK_tq may already be queued. + * But there's no mechanism to fix that without futzing with tqueue_lock. + * Fortunately we don't need to worry, because if ->SAK_tq is already queued, + * the values which we write to it will be identical to the values which it + * already has. --akpm + */ +void do_SAK(struct tty_struct *tty) +{ + if (!tty) + return; + PREPARE_TQUEUE(&tty->SAK_tq, __do_SAK, tty); + schedule_task(&tty->SAK_tq); +} + +/* + * This routine is called out of the software interrupt to flush data + * from the flip buffer to the line discipline. + */ +static void flush_to_ldisc(void *private_) +{ + struct tty_struct *tty = (struct tty_struct *) private_; + unsigned char *cp; + char *fp; + int count; + unsigned long flags; + + if (test_bit(TTY_DONT_FLIP, &tty->flags)) { + queue_task(&tty->flip.tqueue, &tq_timer); + return; + } + if (tty->flip.buf_num) { + cp = tty->flip.char_buf + TTY_FLIPBUF_SIZE; + fp = tty->flip.flag_buf + TTY_FLIPBUF_SIZE; + tty->flip.buf_num = 0; + + save_flags(flags); cli(); + tty->flip.char_buf_ptr = tty->flip.char_buf; + tty->flip.flag_buf_ptr = tty->flip.flag_buf; + } else { + cp = tty->flip.char_buf; + fp = tty->flip.flag_buf; + tty->flip.buf_num = 1; + + save_flags(flags); cli(); + tty->flip.char_buf_ptr = tty->flip.char_buf + TTY_FLIPBUF_SIZE; + tty->flip.flag_buf_ptr = tty->flip.flag_buf + TTY_FLIPBUF_SIZE; + } + count = tty->flip.count; + tty->flip.count = 0; + restore_flags(flags); + + tty->ldisc.receive_buf(tty, cp, fp, count); +} + +/* + * Routine which returns the baud rate of the tty + * + * Note that the baud_table needs to be kept in sync with the + * include/asm/termbits.h file. + */ +static int baud_table[] = { + 0, 50, 75, 110, 134, 150, 200, 300, 600, 1200, 1800, 2400, 4800, + 9600, 19200, 38400, 57600, 115200, 230400, 460800, +#ifdef __sparc__ + 76800, 153600, 307200, 614400, 921600 +#else + 500000, 576000, 921600, 1000000, 1152000, 1500000, 2000000, + 2500000, 3000000, 3500000, 4000000 +#endif +}; + +static int n_baud_table = sizeof(baud_table)/sizeof(int); + +int tty_get_baud_rate(struct tty_struct *tty) +{ + unsigned int cflag, i; + + cflag = tty->termios->c_cflag; + + i = cflag & CBAUD; + if (i & CBAUDEX) { + i &= ~CBAUDEX; + if (i < 1 || i+15 >= n_baud_table) + tty->termios->c_cflag &= ~CBAUDEX; + else + i += 15; + } + if (i==15 && tty->alt_speed) { + if (!tty->warned) { + printk(KERN_WARNING "Use of setserial/setrocket to " + "set SPD_* flags is deprecated\n"); + tty->warned = 1; + } + return(tty->alt_speed); + } + + return baud_table[i]; +} + +void tty_flip_buffer_push(struct tty_struct *tty) +{ + if (tty->low_latency) + flush_to_ldisc((void *) tty); + else + queue_task(&tty->flip.tqueue, &tq_timer); +} + +/* + * This subroutine initializes a tty structure. + */ +static void initialize_tty_struct(struct tty_struct *tty) +{ + memset(tty, 0, sizeof(struct tty_struct)); + tty->magic = TTY_MAGIC; + tty->ldisc = ldiscs[N_TTY]; + tty->pgrp = -1; + tty->flip.char_buf_ptr = tty->flip.char_buf; + tty->flip.flag_buf_ptr = tty->flip.flag_buf; + tty->flip.tqueue.routine = flush_to_ldisc; + tty->flip.tqueue.data = tty; + init_MUTEX(&tty->flip.pty_sem); + init_waitqueue_head(&tty->write_wait); + init_waitqueue_head(&tty->read_wait); + tty->tq_hangup.routine = do_tty_hangup; + tty->tq_hangup.data = tty; + sema_init(&tty->atomic_read, 1); + sema_init(&tty->atomic_write, 1); + spin_lock_init(&tty->read_lock); + INIT_LIST_HEAD(&tty->tty_files); + INIT_TQUEUE(&tty->SAK_tq, 0, 0); +} + +/* + * The default put_char routine if the driver did not define one. + */ +void tty_default_put_char(struct tty_struct *tty, unsigned char ch) +{ + tty->driver.write(tty, 0, &ch, 1); +} + +/* + * Register a tty device described by , with minor number . + */ +void tty_register_devfs (struct tty_driver *driver, unsigned int flags, unsigned minor) +{ +#ifdef CONFIG_DEVFS_FS + umode_t mode = S_IFCHR | S_IRUSR | S_IWUSR; + kdev_t device = MKDEV (driver->major, minor); + int idx = minor - driver->minor_start; + char buf[32]; + + switch (device) { + case TTY_DEV: + case PTMX_DEV: + mode |= S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH; + break; + default: + if (driver->major == PTY_MASTER_MAJOR) + mode |= S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH; + break; + } + if ( (minor < driver->minor_start) || + (minor >= driver->minor_start + driver->num) ) { + printk(KERN_ERR "Attempt to register invalid minor number " + "with devfs (%d:%d).\n", (int)driver->major,(int)minor); + return; + } +# ifdef CONFIG_UNIX98_PTYS + if ( (driver->major >= UNIX98_PTY_SLAVE_MAJOR) && + (driver->major < UNIX98_PTY_SLAVE_MAJOR + UNIX98_NR_MAJORS) ) + flags |= DEVFS_FL_CURRENT_OWNER; +# endif + sprintf(buf, driver->name, idx + driver->name_base); + devfs_register (NULL, buf, flags | DEVFS_FL_DEFAULT, + driver->major, minor, mode, &tty_fops, NULL); +#endif /* CONFIG_DEVFS_FS */ +} + +void tty_unregister_devfs (struct tty_driver *driver, unsigned minor) +{ +#ifdef CONFIG_DEVFS_FS + void * handle; + int idx = minor - driver->minor_start; + char buf[32]; + + sprintf(buf, driver->name, idx + driver->name_base); + handle = devfs_find_handle (NULL, buf, driver->major, minor, + DEVFS_SPECIAL_CHR, 0); + devfs_unregister (handle); +#endif /* CONFIG_DEVFS_FS */ +} + +EXPORT_SYMBOL(tty_register_devfs); +EXPORT_SYMBOL(tty_unregister_devfs); + +/* + * Called by a tty driver to register itself. + */ +int tty_register_driver(struct tty_driver *driver) +{ + int error; + int i; + + if (driver->flags & TTY_DRIVER_INSTALLED) + return 0; + + error = devfs_register_chrdev(driver->major, driver->name, &tty_fops); + if (error < 0) + return error; + else if(driver->major == 0) + driver->major = error; + + if (!driver->put_char) + driver->put_char = tty_default_put_char; + + driver->prev = 0; + driver->next = tty_drivers; + if (tty_drivers) tty_drivers->prev = driver; + tty_drivers = driver; + + if ( !(driver->flags & TTY_DRIVER_NO_DEVFS) ) { + for(i = 0; i < driver->num; i++) + tty_register_devfs(driver, 0, driver->minor_start + i); + } + proc_tty_register_driver(driver); + return error; +} + +/* + * Called by a tty driver to unregister itself. + */ +int tty_unregister_driver(struct tty_driver *driver) +{ + int retval; + struct tty_driver *p; + int i, found = 0; + struct termios *tp; + const char *othername = NULL; + + if (*driver->refcount) + return -EBUSY; + + for (p = tty_drivers; p; p = p->next) { + if (p == driver) + found++; + else if (p->major == driver->major) + othername = p->name; + } + + if (!found) + return -ENOENT; + + if (othername == NULL) { + retval = devfs_unregister_chrdev(driver->major, driver->name); + if (retval) + return retval; + } else + devfs_register_chrdev(driver->major, othername, &tty_fops); + + if (driver->prev) + driver->prev->next = driver->next; + else + tty_drivers = driver->next; + + if (driver->next) + driver->next->prev = driver->prev; + + /* + * Free the termios and termios_locked structures because + * we don't want to get memory leaks when modular tty + * drivers are removed from the kernel. + */ + for (i = 0; i < driver->num; i++) { + tp = driver->termios[i]; + if (tp) { + driver->termios[i] = NULL; + kfree(tp); + } + tp = driver->termios_locked[i]; + if (tp) { + driver->termios_locked[i] = NULL; + kfree(tp); + } + tty_unregister_devfs(driver, driver->minor_start + i); + } + proc_tty_unregister_driver(driver); + return 0; +} + + +/* + * Initialize the console device. This is called *early*, so + * we can't necessarily depend on lots of kernel help here. + * Just do some early initializations, and do the complex setup + * later. + */ +void __init console_init(void) +{ + /* Setup the default TTY line discipline. */ + memset(ldiscs, 0, sizeof(ldiscs)); + (void) tty_register_ldisc(N_TTY, &tty_ldisc_N_TTY); + + /* + * Set up the standard termios. Individual tty drivers may + * deviate from this; this is used as a template. + */ + memset(&tty_std_termios, 0, sizeof(struct termios)); + memcpy(tty_std_termios.c_cc, INIT_C_CC, NCCS); + tty_std_termios.c_iflag = ICRNL | IXON; + tty_std_termios.c_oflag = OPOST | ONLCR; + tty_std_termios.c_cflag = B38400 | CS8 | CREAD | HUPCL; + tty_std_termios.c_lflag = ISIG | ICANON | ECHO | ECHOE | ECHOK | + ECHOCTL | ECHOKE | IEXTEN; + + /* + * set up the console device so that later boot sequences can + * inform about problems etc.. + */ +#ifdef CONFIG_XENO + xeno_console_init(); +#endif +#ifdef CONFIG_VT + con_init(); +#endif +#ifdef CONFIG_AU1000_SERIAL_CONSOLE + au1000_serial_console_init(); +#endif +#ifdef CONFIG_SERIAL_CONSOLE +#if (defined(CONFIG_8xx) || defined(CONFIG_8260)) + console_8xx_init(); +#elif defined(CONFIG_MAC_SERIAL) + mac_scc_console_init(); +#elif defined(CONFIG_PARISC) + pdc_console_init(); +#elif defined(CONFIG_SERIAL) + serial_console_init(); +#endif /* CONFIG_8xx */ +#ifdef CONFIG_SGI_SERIAL + sgi_serial_console_init(); +#endif +#if defined(CONFIG_MVME162_SCC) || defined(CONFIG_BVME6000_SCC) || defined(CONFIG_MVME147_SCC) + vme_scc_console_init(); +#endif +#if defined(CONFIG_SERIAL167) + serial167_console_init(); +#endif +#if defined(CONFIG_SH_SCI) + sci_console_init(); +#endif +#endif +#ifdef CONFIG_TN3270_CONSOLE + tub3270_con_init(); +#endif +#ifdef CONFIG_TN3215 + con3215_init(); +#endif +#ifdef CONFIG_HWC + hwc_console_init(); +#endif +#ifdef CONFIG_STDIO_CONSOLE + stdio_console_init(); +#endif +#ifdef CONFIG_SERIAL_21285_CONSOLE + rs285_console_init(); +#endif +#ifdef CONFIG_SERIAL_SA1100_CONSOLE + sa1100_rs_console_init(); +#endif +#ifdef CONFIG_ARC_CONSOLE + arc_console_init(); +#endif +#ifdef CONFIG_SERIAL_AMBA_CONSOLE + ambauart_console_init(); +#endif +#ifdef CONFIG_SERIAL_TX3912_CONSOLE + tx3912_console_init(); +#endif +} + +static struct tty_driver dev_tty_driver, dev_syscons_driver; +#ifdef CONFIG_UNIX98_PTYS +static struct tty_driver dev_ptmx_driver; +#endif +#ifdef CONFIG_VT +static struct tty_driver dev_console_driver; +#endif + +/* + * Ok, now we can initialize the rest of the tty devices and can count + * on memory allocations, interrupts etc.. + */ +void __init tty_init(void) +{ + /* + * dev_tty_driver and dev_console_driver are actually magic + * devices which get redirected at open time. Nevertheless, + * we register them so that register_chrdev is called + * appropriately. + */ + memset(&dev_tty_driver, 0, sizeof(struct tty_driver)); + dev_tty_driver.magic = TTY_DRIVER_MAGIC; + dev_tty_driver.driver_name = "/dev/tty"; + dev_tty_driver.name = dev_tty_driver.driver_name + 5; + dev_tty_driver.name_base = 0; + dev_tty_driver.major = TTYAUX_MAJOR; + dev_tty_driver.minor_start = 0; + dev_tty_driver.num = 1; + dev_tty_driver.type = TTY_DRIVER_TYPE_SYSTEM; + dev_tty_driver.subtype = SYSTEM_TYPE_TTY; + + if (tty_register_driver(&dev_tty_driver)) + panic("Couldn't register /dev/tty driver\n"); + + dev_syscons_driver = dev_tty_driver; + dev_syscons_driver.driver_name = "/dev/console"; + dev_syscons_driver.name = dev_syscons_driver.driver_name + 5; + dev_syscons_driver.major = TTYAUX_MAJOR; + dev_syscons_driver.minor_start = 1; + dev_syscons_driver.type = TTY_DRIVER_TYPE_SYSTEM; + dev_syscons_driver.subtype = SYSTEM_TYPE_SYSCONS; + + if (tty_register_driver(&dev_syscons_driver)) + panic("Couldn't register /dev/console driver\n"); + + /* console calls tty_register_driver() before kmalloc() works. + * Thus, we can't devfs_register() then. Do so now, instead. + */ +#ifdef CONFIG_VT + con_init_devfs(); +#endif + +#ifdef CONFIG_UNIX98_PTYS + dev_ptmx_driver = dev_tty_driver; + dev_ptmx_driver.driver_name = "/dev/ptmx"; + dev_ptmx_driver.name = dev_ptmx_driver.driver_name + 5; + dev_ptmx_driver.major= MAJOR(PTMX_DEV); + dev_ptmx_driver.minor_start = MINOR(PTMX_DEV); + dev_ptmx_driver.type = TTY_DRIVER_TYPE_SYSTEM; + dev_ptmx_driver.subtype = SYSTEM_TYPE_SYSPTMX; + + if (tty_register_driver(&dev_ptmx_driver)) + panic("Couldn't register /dev/ptmx driver\n"); +#endif + +#ifdef CONFIG_VT + dev_console_driver = dev_tty_driver; + dev_console_driver.driver_name = "/dev/vc/0"; + dev_console_driver.name = dev_console_driver.driver_name + 5; + dev_console_driver.major = TTY_MAJOR; + dev_console_driver.type = TTY_DRIVER_TYPE_SYSTEM; + dev_console_driver.subtype = SYSTEM_TYPE_CONSOLE; + + if (tty_register_driver(&dev_console_driver)) + panic("Couldn't register /dev/tty0 driver\n"); + + kbd_init(); +#endif + +#ifdef CONFIG_ESPSERIAL /* init ESP before rs, so rs doesn't see the port */ + espserial_init(); +#endif +#if defined(CONFIG_MVME162_SCC) || defined(CONFIG_BVME6000_SCC) || defined(CONFIG_MVME147_SCC) + vme_scc_init(); +#endif +#ifdef CONFIG_SERIAL_TX3912 + tx3912_rs_init(); +#endif +#ifdef CONFIG_ROCKETPORT + rp_init(); +#endif +#ifdef CONFIG_SERIAL167 + serial167_init(); +#endif +#ifdef CONFIG_CYCLADES + cy_init(); +#endif +#ifdef CONFIG_STALLION + stl_init(); +#endif +#ifdef CONFIG_ISTALLION + stli_init(); +#endif +#ifdef CONFIG_DIGI + pcxe_init(); +#endif +#ifdef CONFIG_DIGIEPCA + pc_init(); +#endif +#ifdef CONFIG_SPECIALIX + specialix_init(); +#endif +#if (defined(CONFIG_8xx) || defined(CONFIG_8260)) + rs_8xx_init(); +#endif /* CONFIG_8xx */ + pty_init(); +#ifdef CONFIG_MOXA_SMARTIO + mxser_init(); +#endif +#ifdef CONFIG_MOXA_INTELLIO + moxa_init(); +#endif +#ifdef CONFIG_VT + vcs_init(); +#endif +#ifdef CONFIG_TN3270 + tub3270_init(); +#endif +#ifdef CONFIG_TN3215 + tty3215_init(); +#endif +#ifdef CONFIG_HWC + hwc_tty_init(); +#endif +#ifdef CONFIG_A2232 + a2232board_init(); +#endif +} diff --git a/xenolinux-2.4.16-sparse/fs/nfs/nfsroot.c b/xenolinux-2.4.16-sparse/fs/nfs/nfsroot.c new file mode 100644 index 0000000000..a2373467d5 --- /dev/null +++ b/xenolinux-2.4.16-sparse/fs/nfs/nfsroot.c @@ -0,0 +1,466 @@ +/* + * $Id: nfsroot.c,v 1.45 1998/03/07 10:44:46 mj Exp $ + * + * Copyright (C) 1995, 1996 Gero Kuhlmann + * + * Allow an NFS filesystem to be mounted as root. The way this works is: + * (1) Use the IP autoconfig mechanism to set local IP addresses and routes. + * (2) Handle RPC negotiation with the system which replied to RARP or + * was reported as a boot server by BOOTP or manually. + * (3) The actual mounting is done later, when init() is running. + * + * + * Changes: + * + * Alan Cox : Removed get_address name clash with FPU. + * Alan Cox : Reformatted a bit. + * Gero Kuhlmann : Code cleanup + * Michael Rausch : Fixed recognition of an incoming RARP answer. + * Martin Mares : (2.0) Auto-configuration via BOOTP supported. + * Martin Mares : Manual selection of interface & BOOTP/RARP. + * Martin Mares : Using network routes instead of host routes, + * allowing the default configuration to be used + * for normal operation of the host. + * Martin Mares : Randomized timer with exponential backoff + * installed to minimize network congestion. + * Martin Mares : Code cleanup. + * Martin Mares : (2.1) BOOTP and RARP made configuration options. + * Martin Mares : Server hostname generation fixed. + * Gerd Knorr : Fixed wired inode handling + * Martin Mares : (2.2) "0.0.0.0" addresses from command line ignored. + * Martin Mares : RARP replies not tested for server address. + * Gero Kuhlmann : (2.3) Some bug fixes and code cleanup again (please + * send me your new patches _before_ bothering + * Linus so that I don' always have to cleanup + * _afterwards_ - thanks) + * Gero Kuhlmann : Last changes of Martin Mares undone. + * Gero Kuhlmann : RARP replies are tested for specified server + * again. However, it's now possible to have + * different RARP and NFS servers. + * Gero Kuhlmann : "0.0.0.0" addresses from command line are + * now mapped to INADDR_NONE. + * Gero Kuhlmann : Fixed a bug which prevented BOOTP path name + * from being used (thanks to Leo Spiekman) + * Andy Walker : Allow to specify the NFS server in nfs_root + * without giving a path name + * Swen Thümmler : Allow to specify the NFS options in nfs_root + * without giving a path name. Fix BOOTP request + * for domainname (domainname is NIS domain, not + * DNS domain!). Skip dummy devices for BOOTP. + * Jacek Zapala : Fixed a bug which prevented server-ip address + * from nfsroot parameter from being used. + * Olaf Kirch : Adapted to new NFS code. + * Jakub Jelinek : Free used code segment. + * Marko Kohtala : Fixed some bugs. + * Martin Mares : Debug message cleanup + * Martin Mares : Changed to use the new generic IP layer autoconfig + * code. BOOTP and RARP moved there. + * Martin Mares : Default path now contains host name instead of + * host IP address (but host name defaults to IP + * address anyway). + * Martin Mares : Use root_server_addr appropriately during setup. + * Martin Mares : Rewrote parameter parsing, now hopefully giving + * correct overriding. + * Trond Myklebust : Add in preliminary support for NFSv3 and TCP. + * Fix bug in root_nfs_addr(). nfs_data.namlen + * is NOT for the length of the hostname. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* Define this to allow debugging output */ +#undef NFSROOT_DEBUG +#define NFSDBG_FACILITY NFSDBG_ROOT + +/* Default path we try to mount. "%s" gets replaced by our IP address */ +#define NFS_ROOT "/tftpboot/%s" + +/* Parameters passed from the kernel command line */ +static char nfs_root_name[256] __initdata = ""; + +/* Address of NFS server */ +static __u32 servaddr __initdata = 0; + +/* Name of directory to mount */ +static char nfs_path[NFS_MAXPATHLEN] __initdata = { 0, }; + +/* NFS-related data */ +static struct nfs_mount_data nfs_data __initdata = { 0, };/* NFS mount info */ +static int nfs_port __initdata = 0; /* Port to connect to for NFS */ +static int mount_port __initdata = 0; /* Mount daemon port number */ + + +/*************************************************************************** + + Parsing of options + + ***************************************************************************/ + +/* + * The following integer options are recognized + */ +static struct nfs_int_opts { + char *name; + int *val; +} root_int_opts[] __initdata = { + { "port", &nfs_port }, + { "rsize", &nfs_data.rsize }, + { "wsize", &nfs_data.wsize }, + { "timeo", &nfs_data.timeo }, + { "retrans", &nfs_data.retrans }, + { "acregmin", &nfs_data.acregmin }, + { "acregmax", &nfs_data.acregmax }, + { "acdirmin", &nfs_data.acdirmin }, + { "acdirmax", &nfs_data.acdirmax }, + { NULL, NULL } +}; + + +/* + * And now the flag options + */ +static struct nfs_bool_opts { + char *name; + int and_mask; + int or_mask; +} root_bool_opts[] __initdata = { + { "soft", ~NFS_MOUNT_SOFT, NFS_MOUNT_SOFT }, + { "hard", ~NFS_MOUNT_SOFT, 0 }, + { "intr", ~NFS_MOUNT_INTR, NFS_MOUNT_INTR }, + { "nointr", ~NFS_MOUNT_INTR, 0 }, + { "posix", ~NFS_MOUNT_POSIX, NFS_MOUNT_POSIX }, + { "noposix", ~NFS_MOUNT_POSIX, 0 }, + { "cto", ~NFS_MOUNT_NOCTO, 0 }, + { "nocto", ~NFS_MOUNT_NOCTO, NFS_MOUNT_NOCTO }, + { "ac", ~NFS_MOUNT_NOAC, 0 }, + { "noac", ~NFS_MOUNT_NOAC, NFS_MOUNT_NOAC }, + { "lock", ~NFS_MOUNT_NONLM, 0 }, + { "nolock", ~NFS_MOUNT_NONLM, NFS_MOUNT_NONLM }, +#ifdef CONFIG_NFS_V3 + { "v2", ~NFS_MOUNT_VER3, 0 }, + { "v3", ~NFS_MOUNT_VER3, NFS_MOUNT_VER3 }, +#endif + { "udp", ~NFS_MOUNT_TCP, 0 }, + { "tcp", ~NFS_MOUNT_TCP, NFS_MOUNT_TCP }, + { "broken_suid",~NFS_MOUNT_BROKEN_SUID, NFS_MOUNT_BROKEN_SUID }, + { NULL, 0, 0 } +}; + + +/* + * Extract IP address from the parameter string if needed. Note that we + * need to have root_server_addr set _before_ IPConfig gets called as it + * can override it. + */ +static void __init root_nfs_parse_addr(char *name) +{ + int octets = 0; + char *cp, *cq; + + cp = cq = name; + while (octets < 4) { + while (*cp >= '0' && *cp <= '9') + cp++; + if (cp == cq || cp - cq > 3) + break; + if (*cp == '.' || octets == 3) + octets++; + if (octets < 4) + cp++; + cq = cp; + } + if (octets == 4 && (*cp == ':' || *cp == '\0')) { + if (*cp == ':') + *cp++ = '\0'; + root_server_addr = in_aton(name); + strcpy(name, cp); + } +} + + +/* + * Parse option string. + */ +static void __init root_nfs_parse(char *name, char *buf) +{ + char *options, *val, *cp; + + if ((options = strchr(name, ','))) { + *options++ = 0; + cp = strtok(options, ","); + while (cp) { + if ((val = strchr(cp, '='))) { + struct nfs_int_opts *opts = root_int_opts; + *val++ = '\0'; + while (opts->name && strcmp(opts->name, cp)) + opts++; + if (opts->name) + *(opts->val) = (int) simple_strtoul(val, NULL, 10); + } else { + struct nfs_bool_opts *opts = root_bool_opts; + while (opts->name && strcmp(opts->name, cp)) + opts++; + if (opts->name) { + nfs_data.flags &= opts->and_mask; + nfs_data.flags |= opts->or_mask; + } + } + cp = strtok(NULL, ","); + } + } + if (name[0] && strcmp(name, "default")) { + strncpy(buf, name, NFS_MAXPATHLEN-1); + buf[NFS_MAXPATHLEN-1] = 0; + } +} + + +/* + * Prepare the NFS data structure and parse all options. + */ +static int __init root_nfs_name(char *name) +{ + char buf[NFS_MAXPATHLEN]; + char *cp; + + /* Set some default values */ + memset(&nfs_data, 0, sizeof(nfs_data)); + nfs_port = -1; + nfs_data.version = NFS_MOUNT_VERSION; + nfs_data.flags = NFS_MOUNT_NONLM; /* No lockd in nfs root yet */ + nfs_data.rsize = NFS_DEF_FILE_IO_BUFFER_SIZE; + nfs_data.wsize = NFS_DEF_FILE_IO_BUFFER_SIZE; + nfs_data.bsize = 0; + nfs_data.timeo = 7; + nfs_data.retrans = 3; + nfs_data.acregmin = 3; + nfs_data.acregmax = 60; + nfs_data.acdirmin = 30; + nfs_data.acdirmax = 60; + strcpy(buf, NFS_ROOT); + + /* Process options received from the remote server */ + root_nfs_parse(root_server_path, buf); + + /* Override them by options set on kernel command-line */ + root_nfs_parse(name, buf); + + cp = system_utsname.nodename; + if (strlen(buf) + strlen(cp) > NFS_MAXPATHLEN) { + printk(KERN_ERR "Root-NFS: Pathname for remote directory too long.\n"); + return -1; + } + sprintf(nfs_path, buf, cp); + + return 1; +} + + +/* + * Get NFS server address. + */ +static int __init root_nfs_addr(void) +{ + if ((servaddr = root_server_addr) == INADDR_NONE) { + printk(KERN_ERR "Root-NFS: No NFS server available, giving up.\n"); + return -1; + } + + strncpy(nfs_data.hostname, in_ntoa(servaddr), sizeof(nfs_data.hostname)-1); + return 0; +} + +/* + * Tell the user what's going on. + */ +#ifdef NFSROOT_DEBUG +static void __init root_nfs_print(void) +{ + printk(KERN_NOTICE "Root-NFS: Mounting %s on server %s as root\n", + nfs_path, nfs_data.hostname); + printk(KERN_NOTICE "Root-NFS: rsize = %d, wsize = %d, timeo = %d, retrans = %d\n", + nfs_data.rsize, nfs_data.wsize, nfs_data.timeo, nfs_data.retrans); + printk(KERN_NOTICE "Root-NFS: acreg (min,max) = (%d,%d), acdir (min,max) = (%d,%d)\n", + nfs_data.acregmin, nfs_data.acregmax, + nfs_data.acdirmin, nfs_data.acdirmax); + printk(KERN_NOTICE "Root-NFS: nfsd port = %d, mountd port = %d, flags = %08x\n", + nfs_port, mount_port, nfs_data.flags); +} +#endif + + +int __init root_nfs_init(void) +{ +#ifdef NFSROOT_DEBUG + nfs_debug |= NFSDBG_ROOT; +#endif + + /* + * Decode the root directory path name and NFS options from + * the kernel command line. This has to go here in order to + * be able to use the client IP address for the remote root + * directory (necessary for pure RARP booting). + */ + if (root_nfs_name(nfs_root_name) < 0 || + root_nfs_addr() < 0) + return -1; + +#ifdef NFSROOT_DEBUG + root_nfs_print(); +#endif + + return 0; +} + + +/* + * Parse NFS server and directory information passed on the kernel + * command line. + */ +int __init nfs_root_setup(char *line) +{ + ROOT_DEV = MKDEV(UNNAMED_MAJOR, 255); + if (line[0] == '/' || line[0] == ',' || (line[0] >= '0' && line[0] <= '9')) { + strncpy(nfs_root_name, line, sizeof(nfs_root_name)); + nfs_root_name[sizeof(nfs_root_name)-1] = '\0'; + } else { + int n = strlen(line) + strlen(NFS_ROOT); + if (n >= sizeof(nfs_root_name)) + line[sizeof(nfs_root_name) - strlen(NFS_ROOT) - 1] = '\0'; + sprintf(nfs_root_name, NFS_ROOT, line); + } + root_nfs_parse_addr(nfs_root_name); + return 1; +} + +__setup("nfsroot=", nfs_root_setup); + +/*************************************************************************** + + Routines to actually mount the root directory + + ***************************************************************************/ + +/* + * Construct sockaddr_in from address and port number. + */ +static inline void +set_sockaddr(struct sockaddr_in *sin, __u32 addr, __u16 port) +{ + sin->sin_family = AF_INET; + sin->sin_addr.s_addr = addr; + sin->sin_port = port; +} + +/* + * Query server portmapper for the port of a daemon program. + */ +static int __init root_nfs_getport(int program, int version, int proto) +{ + struct sockaddr_in sin; + + printk(KERN_NOTICE "Looking up port of RPC %d/%d on %s\n", + program, version, in_ntoa(servaddr)); + set_sockaddr(&sin, servaddr, 0); + return rpc_getport_external(&sin, program, version, proto); +} + + +/* + * Use portmapper to find mountd and nfsd port numbers if not overriden + * by the user. Use defaults if portmapper is not available. + * XXX: Is there any nfs server with no portmapper? + */ +static int __init root_nfs_ports(void) +{ + int port; + int nfsd_ver, mountd_ver; + int nfsd_port, mountd_port; + int proto; + + if (nfs_data.flags & NFS_MOUNT_VER3) { + nfsd_ver = NFS3_VERSION; + mountd_ver = NFS_MNT3_VERSION; + nfsd_port = NFS_PORT; + mountd_port = NFS_MNT_PORT; + } else { + nfsd_ver = NFS2_VERSION; + mountd_ver = NFS_MNT_VERSION; + nfsd_port = NFS_PORT; + mountd_port = NFS_MNT_PORT; + } + + proto = (nfs_data.flags & NFS_MOUNT_TCP) ? IPPROTO_TCP : IPPROTO_UDP; + + if (nfs_port < 0) { + if ((port = root_nfs_getport(NFS_PROGRAM, nfsd_ver, proto)) < 0) { + printk(KERN_ERR "Root-NFS: Unable to get nfsd port " + "number from server, using default\n"); + port = nfsd_port; + } + nfs_port = htons(port); + dprintk("Root-NFS: Portmapper on server returned %d " + "as nfsd port\n", port); + } + + if ((port = root_nfs_getport(NFS_MNT_PROGRAM, mountd_ver, proto)) < 0) { + printk(KERN_ERR "Root-NFS: Unable to get mountd port " + "number from server, using default\n"); + port = mountd_port; + } + mount_port = htons(port); + dprintk("Root-NFS: mountd port is %d\n", port); + + return 0; +} + + +/* + * Get a file handle from the server for the directory which is to be + * mounted. + */ +static int __init root_nfs_get_handle(void) +{ + struct sockaddr_in sin; + int status; + + set_sockaddr(&sin, servaddr, mount_port); + if (nfs_data.flags & NFS_MOUNT_VER3) + status = nfs3_mount(&sin, nfs_path, &nfs_data.root); + else + status = nfs_mount(&sin, nfs_path, &nfs_data.root); + if (status < 0) + printk(KERN_ERR "Root-NFS: Server returned error %d " + "while mounting %s\n", status, nfs_path); + + return status; +} + +/* + * Get the NFS port numbers and file handle, and return the prepared 'data' + * argument for ->read_super() if everything went OK. Return NULL otherwise. + */ +void * __init nfs_root_data(void) +{ + if (root_nfs_init() < 0 + || root_nfs_ports() < 0 + || root_nfs_get_handle() < 0) + return NULL; + set_sockaddr((struct sockaddr_in *) &nfs_data.addr, servaddr, nfs_port); + return (void*)&nfs_data; +} diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/a.out.h b/xenolinux-2.4.16-sparse/include/asm-xeno/a.out.h new file mode 100644 index 0000000000..ab17bb8e54 --- /dev/null +++ b/xenolinux-2.4.16-sparse/include/asm-xeno/a.out.h @@ -0,0 +1,26 @@ +#ifndef __I386_A_OUT_H__ +#define __I386_A_OUT_H__ + +struct exec +{ + unsigned long a_info; /* Use macros N_MAGIC, etc for access */ + unsigned a_text; /* length of text, in bytes */ + unsigned a_data; /* length of data, in bytes */ + unsigned a_bss; /* length of uninitialized data area for file, in bytes */ + unsigned a_syms; /* length of symbol table data in file, in bytes */ + unsigned a_entry; /* start address */ + unsigned a_trsize; /* length of relocation info for text, in bytes */ + unsigned a_drsize; /* length of relocation info for data, in bytes */ +}; + +#define N_TRSIZE(a) ((a).a_trsize) +#define N_DRSIZE(a) ((a).a_drsize) +#define N_SYMSIZE(a) ((a).a_syms) + +#ifdef __KERNEL__ + +#define STACK_TOP TASK_SIZE + +#endif + +#endif /* __A_OUT_GNU_H__ */ diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/apic.h b/xenolinux-2.4.16-sparse/include/asm-xeno/apic.h new file mode 100644 index 0000000000..5f682e955a --- /dev/null +++ b/xenolinux-2.4.16-sparse/include/asm-xeno/apic.h @@ -0,0 +1,97 @@ +#ifndef __ASM_APIC_H +#define __ASM_APIC_H + +#include +#include +#include +#include + +#ifdef CONFIG_X86_LOCAL_APIC + +#define APIC_DEBUG 0 + +#if APIC_DEBUG +#define Dprintk(x...) printk(x) +#else +#define Dprintk(x...) +#endif + +/* + * Basic functions accessing APICs. + */ + +static __inline void apic_write(unsigned long reg, unsigned long v) +{ + *((volatile unsigned long *)(APIC_BASE+reg)) = v; +} + +static __inline void apic_write_atomic(unsigned long reg, unsigned long v) +{ + xchg((volatile unsigned long *)(APIC_BASE+reg), v); +} + +static __inline unsigned long apic_read(unsigned long reg) +{ + return *((volatile unsigned long *)(APIC_BASE+reg)); +} + +static __inline__ void apic_wait_icr_idle(void) +{ + do { } while ( apic_read( APIC_ICR ) & APIC_ICR_BUSY ); +} + +#ifdef CONFIG_X86_GOOD_APIC +# define FORCE_READ_AROUND_WRITE 0 +# define apic_read_around(x) +# define apic_write_around(x,y) apic_write((x),(y)) +#else +# define FORCE_READ_AROUND_WRITE 1 +# define apic_read_around(x) apic_read(x) +# define apic_write_around(x,y) apic_write_atomic((x),(y)) +#endif + +static inline void ack_APIC_irq(void) +{ + /* + * ack_APIC_irq() actually gets compiled as a single instruction: + * - a single rmw on Pentium/82489DX + * - a single write on P6+ cores (CONFIG_X86_GOOD_APIC) + * ... yummie. + */ + + /* Docs say use 0 for future compatibility */ + apic_write_around(APIC_EOI, 0); +} + +extern int get_maxlvt(void); +extern void clear_local_APIC(void); +extern void connect_bsp_APIC (void); +extern void disconnect_bsp_APIC (void); +extern void disable_local_APIC (void); +extern int verify_local_APIC (void); +extern void cache_APIC_registers (void); +extern void sync_Arb_IDs (void); +extern void init_bsp_APIC (void); +extern void setup_local_APIC (void); +extern void init_apic_mappings (void); +extern void smp_local_timer_interrupt (struct pt_regs * regs); +extern void setup_APIC_clocks (void); +extern void setup_apic_nmi_watchdog (void); +extern inline void nmi_watchdog_tick (struct pt_regs * regs); +extern int APIC_init_uniprocessor (void); + +extern struct pm_dev *apic_pm_register(pm_dev_t, unsigned long, pm_callback); +extern void apic_pm_unregister(struct pm_dev*); + +extern unsigned int apic_timer_irqs [NR_CPUS]; +extern int check_nmi_watchdog (void); + +extern unsigned int nmi_watchdog; +#define NMI_NONE 0 +#define NMI_IO_APIC 1 +#define NMI_LOCAL_APIC 2 +#define NMI_INVALID 3 + +#endif /* CONFIG_X86_LOCAL_APIC */ + +#endif /* __ASM_APIC_H */ diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/apicdef.h b/xenolinux-2.4.16-sparse/include/asm-xeno/apicdef.h new file mode 100644 index 0000000000..f855a7d88d --- /dev/null +++ b/xenolinux-2.4.16-sparse/include/asm-xeno/apicdef.h @@ -0,0 +1,363 @@ +#ifndef __ASM_APICDEF_H +#define __ASM_APICDEF_H + +/* + * Constants for various Intel APICs. (local APIC, IOAPIC, etc.) + * + * Alan Cox , 1995. + * Ingo Molnar , 1999, 2000 + */ + +#define APIC_DEFAULT_PHYS_BASE 0xfee00000 + +#define APIC_ID 0x20 +#define APIC_ID_MASK (0x0F<<24) +#define GET_APIC_ID(x) (((x)>>24)&0x0F) +#define APIC_LVR 0x30 +#define APIC_LVR_MASK 0xFF00FF +#define GET_APIC_VERSION(x) ((x)&0xFF) +#define GET_APIC_MAXLVT(x) (((x)>>16)&0xFF) +#define APIC_INTEGRATED(x) ((x)&0xF0) +#define APIC_TASKPRI 0x80 +#define APIC_TPRI_MASK 0xFF +#define APIC_ARBPRI 0x90 +#define APIC_ARBPRI_MASK 0xFF +#define APIC_PROCPRI 0xA0 +#define APIC_EOI 0xB0 +#define APIC_EIO_ACK 0x0 /* Write this to the EOI register */ +#define APIC_RRR 0xC0 +#define APIC_LDR 0xD0 +#define APIC_LDR_MASK (0xFF<<24) +#define GET_APIC_LOGICAL_ID(x) (((x)>>24)&0xFF) +#define SET_APIC_LOGICAL_ID(x) (((x)<<24)) +#define APIC_ALL_CPUS 0xFF +#define APIC_DFR 0xE0 +#define APIC_SPIV 0xF0 +#define APIC_SPIV_FOCUS_DISABLED (1<<9) +#define APIC_SPIV_APIC_ENABLED (1<<8) +#define APIC_ISR 0x100 +#define APIC_TMR 0x180 +#define APIC_IRR 0x200 +#define APIC_ESR 0x280 +#define APIC_ESR_SEND_CS 0x00001 +#define APIC_ESR_RECV_CS 0x00002 +#define APIC_ESR_SEND_ACC 0x00004 +#define APIC_ESR_RECV_ACC 0x00008 +#define APIC_ESR_SENDILL 0x00020 +#define APIC_ESR_RECVILL 0x00040 +#define APIC_ESR_ILLREGA 0x00080 +#define APIC_ICR 0x300 +#define APIC_DEST_SELF 0x40000 +#define APIC_DEST_ALLINC 0x80000 +#define APIC_DEST_ALLBUT 0xC0000 +#define APIC_ICR_RR_MASK 0x30000 +#define APIC_ICR_RR_INVALID 0x00000 +#define APIC_ICR_RR_INPROG 0x10000 +#define APIC_ICR_RR_VALID 0x20000 +#define APIC_INT_LEVELTRIG 0x08000 +#define APIC_INT_ASSERT 0x04000 +#define APIC_ICR_BUSY 0x01000 +#define APIC_DEST_LOGICAL 0x00800 +#define APIC_DM_FIXED 0x00000 +#define APIC_DM_LOWEST 0x00100 +#define APIC_DM_SMI 0x00200 +#define APIC_DM_REMRD 0x00300 +#define APIC_DM_NMI 0x00400 +#define APIC_DM_INIT 0x00500 +#define APIC_DM_STARTUP 0x00600 +#define APIC_DM_EXTINT 0x00700 +#define APIC_VECTOR_MASK 0x000FF +#define APIC_ICR2 0x310 +#define GET_APIC_DEST_FIELD(x) (((x)>>24)&0xFF) +#define SET_APIC_DEST_FIELD(x) ((x)<<24) +#define APIC_LVTT 0x320 +#define APIC_LVTPC 0x340 +#define APIC_LVT0 0x350 +#define APIC_LVT_TIMER_BASE_MASK (0x3<<18) +#define GET_APIC_TIMER_BASE(x) (((x)>>18)&0x3) +#define SET_APIC_TIMER_BASE(x) (((x)<<18)) +#define APIC_TIMER_BASE_CLKIN 0x0 +#define APIC_TIMER_BASE_TMBASE 0x1 +#define APIC_TIMER_BASE_DIV 0x2 +#define APIC_LVT_TIMER_PERIODIC (1<<17) +#define APIC_LVT_MASKED (1<<16) +#define APIC_LVT_LEVEL_TRIGGER (1<<15) +#define APIC_LVT_REMOTE_IRR (1<<14) +#define APIC_INPUT_POLARITY (1<<13) +#define APIC_SEND_PENDING (1<<12) +#define GET_APIC_DELIVERY_MODE(x) (((x)>>8)&0x7) +#define SET_APIC_DELIVERY_MODE(x,y) (((x)&~0x700)|((y)<<8)) +#define APIC_MODE_FIXED 0x0 +#define APIC_MODE_NMI 0x4 +#define APIC_MODE_EXINT 0x7 +#define APIC_LVT1 0x360 +#define APIC_LVTERR 0x370 +#define APIC_TMICT 0x380 +#define APIC_TMCCT 0x390 +#define APIC_TDCR 0x3E0 +#define APIC_TDR_DIV_TMBASE (1<<2) +#define APIC_TDR_DIV_1 0xB +#define APIC_TDR_DIV_2 0x0 +#define APIC_TDR_DIV_4 0x1 +#define APIC_TDR_DIV_8 0x2 +#define APIC_TDR_DIV_16 0x3 +#define APIC_TDR_DIV_32 0x8 +#define APIC_TDR_DIV_64 0x9 +#define APIC_TDR_DIV_128 0xA + +#define APIC_BASE (fix_to_virt(FIX_APIC_BASE)) + +#define MAX_IO_APICS 8 + +/* + * the local APIC register structure, memory mapped. Not terribly well + * tested, but we might eventually use this one in the future - the + * problem why we cannot use it right now is the P5 APIC, it has an + * errata which cannot take 8-bit reads and writes, only 32-bit ones ... + */ +#define u32 unsigned int + +#define lapic ((volatile struct local_apic *)APIC_BASE) + +struct local_apic { + +/*000*/ struct { u32 __reserved[4]; } __reserved_01; + +/*010*/ struct { u32 __reserved[4]; } __reserved_02; + +/*020*/ struct { /* APIC ID Register */ + u32 __reserved_1 : 24, + phys_apic_id : 4, + __reserved_2 : 4; + u32 __reserved[3]; + } id; + +/*030*/ const + struct { /* APIC Version Register */ + u32 version : 8, + __reserved_1 : 8, + max_lvt : 8, + __reserved_2 : 8; + u32 __reserved[3]; + } version; + +/*040*/ struct { u32 __reserved[4]; } __reserved_03; + +/*050*/ struct { u32 __reserved[4]; } __reserved_04; + +/*060*/ struct { u32 __reserved[4]; } __reserved_05; + +/*070*/ struct { u32 __reserved[4]; } __reserved_06; + +/*080*/ struct { /* Task Priority Register */ + u32 priority : 8, + __reserved_1 : 24; + u32 __reserved_2[3]; + } tpr; + +/*090*/ const + struct { /* Arbitration Priority Register */ + u32 priority : 8, + __reserved_1 : 24; + u32 __reserved_2[3]; + } apr; + +/*0A0*/ const + struct { /* Processor Priority Register */ + u32 priority : 8, + __reserved_1 : 24; + u32 __reserved_2[3]; + } ppr; + +/*0B0*/ struct { /* End Of Interrupt Register */ + u32 eoi; + u32 __reserved[3]; + } eoi; + +/*0C0*/ struct { u32 __reserved[4]; } __reserved_07; + +/*0D0*/ struct { /* Logical Destination Register */ + u32 __reserved_1 : 24, + logical_dest : 8; + u32 __reserved_2[3]; + } ldr; + +/*0E0*/ struct { /* Destination Format Register */ + u32 __reserved_1 : 28, + model : 4; + u32 __reserved_2[3]; + } dfr; + +/*0F0*/ struct { /* Spurious Interrupt Vector Register */ + u32 spurious_vector : 8, + apic_enabled : 1, + focus_cpu : 1, + __reserved_2 : 22; + u32 __reserved_3[3]; + } svr; + +/*100*/ struct { /* In Service Register */ +/*170*/ u32 bitfield; + u32 __reserved[3]; + } isr [8]; + +/*180*/ struct { /* Trigger Mode Register */ +/*1F0*/ u32 bitfield; + u32 __reserved[3]; + } tmr [8]; + +/*200*/ struct { /* Interrupt Request Register */ +/*270*/ u32 bitfield; + u32 __reserved[3]; + } irr [8]; + +/*280*/ union { /* Error Status Register */ + struct { + u32 send_cs_error : 1, + receive_cs_error : 1, + send_accept_error : 1, + receive_accept_error : 1, + __reserved_1 : 1, + send_illegal_vector : 1, + receive_illegal_vector : 1, + illegal_register_address : 1, + __reserved_2 : 24; + u32 __reserved_3[3]; + } error_bits; + struct { + u32 errors; + u32 __reserved_3[3]; + } all_errors; + } esr; + +/*290*/ struct { u32 __reserved[4]; } __reserved_08; + +/*2A0*/ struct { u32 __reserved[4]; } __reserved_09; + +/*2B0*/ struct { u32 __reserved[4]; } __reserved_10; + +/*2C0*/ struct { u32 __reserved[4]; } __reserved_11; + +/*2D0*/ struct { u32 __reserved[4]; } __reserved_12; + +/*2E0*/ struct { u32 __reserved[4]; } __reserved_13; + +/*2F0*/ struct { u32 __reserved[4]; } __reserved_14; + +/*300*/ struct { /* Interrupt Command Register 1 */ + u32 vector : 8, + delivery_mode : 3, + destination_mode : 1, + delivery_status : 1, + __reserved_1 : 1, + level : 1, + trigger : 1, + __reserved_2 : 2, + shorthand : 2, + __reserved_3 : 12; + u32 __reserved_4[3]; + } icr1; + +/*310*/ struct { /* Interrupt Command Register 2 */ + union { + u32 __reserved_1 : 24, + phys_dest : 4, + __reserved_2 : 4; + u32 __reserved_3 : 24, + logical_dest : 8; + } dest; + u32 __reserved_4[3]; + } icr2; + +/*320*/ struct { /* LVT - Timer */ + u32 vector : 8, + __reserved_1 : 4, + delivery_status : 1, + __reserved_2 : 3, + mask : 1, + timer_mode : 1, + __reserved_3 : 14; + u32 __reserved_4[3]; + } lvt_timer; + +/*330*/ struct { u32 __reserved[4]; } __reserved_15; + +/*340*/ struct { /* LVT - Performance Counter */ + u32 vector : 8, + delivery_mode : 3, + __reserved_1 : 1, + delivery_status : 1, + __reserved_2 : 3, + mask : 1, + __reserved_3 : 15; + u32 __reserved_4[3]; + } lvt_pc; + +/*350*/ struct { /* LVT - LINT0 */ + u32 vector : 8, + delivery_mode : 3, + __reserved_1 : 1, + delivery_status : 1, + polarity : 1, + remote_irr : 1, + trigger : 1, + mask : 1, + __reserved_2 : 15; + u32 __reserved_3[3]; + } lvt_lint0; + +/*360*/ struct { /* LVT - LINT1 */ + u32 vector : 8, + delivery_mode : 3, + __reserved_1 : 1, + delivery_status : 1, + polarity : 1, + remote_irr : 1, + trigger : 1, + mask : 1, + __reserved_2 : 15; + u32 __reserved_3[3]; + } lvt_lint1; + +/*370*/ struct { /* LVT - Error */ + u32 vector : 8, + __reserved_1 : 4, + delivery_status : 1, + __reserved_2 : 3, + mask : 1, + __reserved_3 : 15; + u32 __reserved_4[3]; + } lvt_error; + +/*380*/ struct { /* Timer Initial Count Register */ + u32 initial_count; + u32 __reserved_2[3]; + } timer_icr; + +/*390*/ const + struct { /* Timer Current Count Register */ + u32 curr_count; + u32 __reserved_2[3]; + } timer_ccr; + +/*3A0*/ struct { u32 __reserved[4]; } __reserved_16; + +/*3B0*/ struct { u32 __reserved[4]; } __reserved_17; + +/*3C0*/ struct { u32 __reserved[4]; } __reserved_18; + +/*3D0*/ struct { u32 __reserved[4]; } __reserved_19; + +/*3E0*/ struct { /* Timer Divide Configuration Register */ + u32 divisor : 4, + __reserved_1 : 28; + u32 __reserved_2[3]; + } timer_dcr; + +/*3F0*/ struct { u32 __reserved[4]; } __reserved_20; + +} __attribute__ ((packed)); + +#undef u32 + +#endif diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/atomic.h b/xenolinux-2.4.16-sparse/include/asm-xeno/atomic.h new file mode 100644 index 0000000000..b356b37394 --- /dev/null +++ b/xenolinux-2.4.16-sparse/include/asm-xeno/atomic.h @@ -0,0 +1,204 @@ +#ifndef __ARCH_I386_ATOMIC__ +#define __ARCH_I386_ATOMIC__ + +#include + +/* + * Atomic operations that C can't guarantee us. Useful for + * resource counting etc.. + */ + +#ifdef CONFIG_SMP +#define LOCK "lock ; " +#else +#define LOCK "" +#endif + +/* + * Make sure gcc doesn't try to be clever and move things around + * on us. We need to use _exactly_ the address the user gave us, + * not some alias that contains the same information. + */ +typedef struct { volatile int counter; } atomic_t; + +#define ATOMIC_INIT(i) { (i) } + +/** + * atomic_read - read atomic variable + * @v: pointer of type atomic_t + * + * Atomically reads the value of @v. Note that the guaranteed + * useful range of an atomic_t is only 24 bits. + */ +#define atomic_read(v) ((v)->counter) + +/** + * atomic_set - set atomic variable + * @v: pointer of type atomic_t + * @i: required value + * + * Atomically sets the value of @v to @i. Note that the guaranteed + * useful range of an atomic_t is only 24 bits. + */ +#define atomic_set(v,i) (((v)->counter) = (i)) + +/** + * atomic_add - add integer to atomic variable + * @i: integer value to add + * @v: pointer of type atomic_t + * + * Atomically adds @i to @v. Note that the guaranteed useful range + * of an atomic_t is only 24 bits. + */ +static __inline__ void atomic_add(int i, atomic_t *v) +{ + __asm__ __volatile__( + LOCK "addl %1,%0" + :"=m" (v->counter) + :"ir" (i), "m" (v->counter)); +} + +/** + * atomic_sub - subtract the atomic variable + * @i: integer value to subtract + * @v: pointer of type atomic_t + * + * Atomically subtracts @i from @v. Note that the guaranteed + * useful range of an atomic_t is only 24 bits. + */ +static __inline__ void atomic_sub(int i, atomic_t *v) +{ + __asm__ __volatile__( + LOCK "subl %1,%0" + :"=m" (v->counter) + :"ir" (i), "m" (v->counter)); +} + +/** + * atomic_sub_and_test - subtract value from variable and test result + * @i: integer value to subtract + * @v: pointer of type atomic_t + * + * Atomically subtracts @i from @v and returns + * true if the result is zero, or false for all + * other cases. Note that the guaranteed + * useful range of an atomic_t is only 24 bits. + */ +static __inline__ int atomic_sub_and_test(int i, atomic_t *v) +{ + unsigned char c; + + __asm__ __volatile__( + LOCK "subl %2,%0; sete %1" + :"=m" (v->counter), "=qm" (c) + :"ir" (i), "m" (v->counter) : "memory"); + return c; +} + +/** + * atomic_inc - increment atomic variable + * @v: pointer of type atomic_t + * + * Atomically increments @v by 1. Note that the guaranteed + * useful range of an atomic_t is only 24 bits. + */ +static __inline__ void atomic_inc(atomic_t *v) +{ + __asm__ __volatile__( + LOCK "incl %0" + :"=m" (v->counter) + :"m" (v->counter)); +} + +/** + * atomic_dec - decrement atomic variable + * @v: pointer of type atomic_t + * + * Atomically decrements @v by 1. Note that the guaranteed + * useful range of an atomic_t is only 24 bits. + */ +static __inline__ void atomic_dec(atomic_t *v) +{ + __asm__ __volatile__( + LOCK "decl %0" + :"=m" (v->counter) + :"m" (v->counter)); +} + +/** + * atomic_dec_and_test - decrement and test + * @v: pointer of type atomic_t + * + * Atomically decrements @v by 1 and + * returns true if the result is 0, or false for all other + * cases. Note that the guaranteed + * useful range of an atomic_t is only 24 bits. + */ +static __inline__ int atomic_dec_and_test(atomic_t *v) +{ + unsigned char c; + + __asm__ __volatile__( + LOCK "decl %0; sete %1" + :"=m" (v->counter), "=qm" (c) + :"m" (v->counter) : "memory"); + return c != 0; +} + +/** + * atomic_inc_and_test - increment and test + * @v: pointer of type atomic_t + * + * Atomically increments @v by 1 + * and returns true if the result is zero, or false for all + * other cases. Note that the guaranteed + * useful range of an atomic_t is only 24 bits. + */ +static __inline__ int atomic_inc_and_test(atomic_t *v) +{ + unsigned char c; + + __asm__ __volatile__( + LOCK "incl %0; sete %1" + :"=m" (v->counter), "=qm" (c) + :"m" (v->counter) : "memory"); + return c != 0; +} + +/** + * atomic_add_negative - add and test if negative + * @v: pointer of type atomic_t + * @i: integer value to add + * + * Atomically adds @i to @v and returns true + * if the result is negative, or false when + * result is greater than or equal to zero. Note that the guaranteed + * useful range of an atomic_t is only 24 bits. + */ +static __inline__ int atomic_add_negative(int i, atomic_t *v) +{ + unsigned char c; + + __asm__ __volatile__( + LOCK "addl %2,%0; sets %1" + :"=m" (v->counter), "=qm" (c) + :"ir" (i), "m" (v->counter) : "memory"); + return c; +} + +/* These are x86-specific, used by some header files */ +#define atomic_clear_mask(mask, addr) \ +__asm__ __volatile__(LOCK "andl %0,%1" \ +: : "r" (~(mask)),"m" (*addr) : "memory") + +#define atomic_set_mask(mask, addr) \ +__asm__ __volatile__(LOCK "orl %0,%1" \ +: : "r" (mask),"m" (*addr) : "memory") + +/* Atomic operations are already serializing on x86 */ +#define smp_mb__before_atomic_dec() barrier() +#define smp_mb__after_atomic_dec() barrier() +#define smp_mb__before_atomic_inc() barrier() +#define smp_mb__after_atomic_inc() barrier() + +#endif diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/bitops.h b/xenolinux-2.4.16-sparse/include/asm-xeno/bitops.h new file mode 100644 index 0000000000..a3063cacc9 --- /dev/null +++ b/xenolinux-2.4.16-sparse/include/asm-xeno/bitops.h @@ -0,0 +1,384 @@ +#ifndef _I386_BITOPS_H +#define _I386_BITOPS_H + +/* + * Copyright 1992, Linus Torvalds. + */ + +#include + +/* + * These have to be done with inline assembly: that way the bit-setting + * is guaranteed to be atomic. All bit operations return 0 if the bit + * was cleared before the operation and != 0 if it was not. + * + * bit 0 is the LSB of addr; bit 32 is the LSB of (addr+1). + */ + +#ifdef CONFIG_SMP +#define LOCK_PREFIX "lock ; " +#else +#define LOCK_PREFIX "" +#endif + +#define ADDR (*(volatile long *) addr) + +/** + * set_bit - Atomically set a bit in memory + * @nr: the bit to set + * @addr: the address to start counting from + * + * This function is atomic and may not be reordered. See __set_bit() + * if you do not require the atomic guarantees. + * Note that @nr may be almost arbitrarily large; this function is not + * restricted to acting on a single-word quantity. + */ +static __inline__ void set_bit(int nr, volatile void * addr) +{ + __asm__ __volatile__( LOCK_PREFIX + "btsl %1,%0" + :"=m" (ADDR) + :"Ir" (nr)); +} + +/** + * __set_bit - Set a bit in memory + * @nr: the bit to set + * @addr: the address to start counting from + * + * Unlike set_bit(), this function is non-atomic and may be reordered. + * If it's called on the same region of memory simultaneously, the effect + * may be that only one operation succeeds. + */ +static __inline__ void __set_bit(int nr, volatile void * addr) +{ + __asm__( + "btsl %1,%0" + :"=m" (ADDR) + :"Ir" (nr)); +} + +/** + * clear_bit - Clears a bit in memory + * @nr: Bit to clear + * @addr: Address to start counting from + * + * clear_bit() is atomic and may not be reordered. However, it does + * not contain a memory barrier, so if it is used for locking purposes, + * you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit() + * in order to ensure changes are visible on other processors. + */ +static __inline__ void clear_bit(int nr, volatile void * addr) +{ + __asm__ __volatile__( LOCK_PREFIX + "btrl %1,%0" + :"=m" (ADDR) + :"Ir" (nr)); +} +#define smp_mb__before_clear_bit() barrier() +#define smp_mb__after_clear_bit() barrier() + +/** + * __change_bit - Toggle a bit in memory + * @nr: the bit to set + * @addr: the address to start counting from + * + * Unlike change_bit(), this function is non-atomic and may be reordered. + * If it's called on the same region of memory simultaneously, the effect + * may be that only one operation succeeds. + */ +static __inline__ void __change_bit(int nr, volatile void * addr) +{ + __asm__ __volatile__( + "btcl %1,%0" + :"=m" (ADDR) + :"Ir" (nr)); +} + +/** + * change_bit - Toggle a bit in memory + * @nr: Bit to clear + * @addr: Address to start counting from + * + * change_bit() is atomic and may not be reordered. + * Note that @nr may be almost arbitrarily large; this function is not + * restricted to acting on a single-word quantity. + */ +static __inline__ void change_bit(int nr, volatile void * addr) +{ + __asm__ __volatile__( LOCK_PREFIX + "btcl %1,%0" + :"=m" (ADDR) + :"Ir" (nr)); +} + +/** + * test_and_set_bit - Set a bit and return its old value + * @nr: Bit to set + * @addr: Address to count from + * + * This operation is atomic and cannot be reordered. + * It also implies a memory barrier. + */ +static __inline__ int test_and_set_bit(int nr, volatile void * addr) +{ + int oldbit; + + __asm__ __volatile__( LOCK_PREFIX + "btsl %2,%1\n\tsbbl %0,%0" + :"=r" (oldbit),"=m" (ADDR) + :"Ir" (nr) : "memory"); + return oldbit; +} + +/** + * __test_and_set_bit - Set a bit and return its old value + * @nr: Bit to set + * @addr: Address to count from + * + * This operation is non-atomic and can be reordered. + * If two examples of this operation race, one can appear to succeed + * but actually fail. You must protect multiple accesses with a lock. + */ +static __inline__ int __test_and_set_bit(int nr, volatile void * addr) +{ + int oldbit; + + __asm__( + "btsl %2,%1\n\tsbbl %0,%0" + :"=r" (oldbit),"=m" (ADDR) + :"Ir" (nr)); + return oldbit; +} + +/** + * test_and_clear_bit - Clear a bit and return its old value + * @nr: Bit to set + * @addr: Address to count from + * + * This operation is atomic and cannot be reordered. + * It also implies a memory barrier. + */ +static __inline__ int test_and_clear_bit(int nr, volatile void * addr) +{ + int oldbit; + + __asm__ __volatile__( LOCK_PREFIX + "btrl %2,%1\n\tsbbl %0,%0" + :"=r" (oldbit),"=m" (ADDR) + :"Ir" (nr) : "memory"); + return oldbit; +} + +/** + * __test_and_clear_bit - Clear a bit and return its old value + * @nr: Bit to set + * @addr: Address to count from + * + * This operation is non-atomic and can be reordered. + * If two examples of this operation race, one can appear to succeed + * but actually fail. You must protect multiple accesses with a lock. + */ +static __inline__ int __test_and_clear_bit(int nr, volatile void * addr) +{ + int oldbit; + + __asm__( + "btrl %2,%1\n\tsbbl %0,%0" + :"=r" (oldbit),"=m" (ADDR) + :"Ir" (nr)); + return oldbit; +} + +/* WARNING: non atomic and it can be reordered! */ +static __inline__ int __test_and_change_bit(int nr, volatile void * addr) +{ + int oldbit; + + __asm__ __volatile__( + "btcl %2,%1\n\tsbbl %0,%0" + :"=r" (oldbit),"=m" (ADDR) + :"Ir" (nr) : "memory"); + return oldbit; +} + +/** + * test_and_change_bit - Change a bit and return its new value + * @nr: Bit to set + * @addr: Address to count from + * + * This operation is atomic and cannot be reordered. + * It also implies a memory barrier. + */ +static __inline__ int test_and_change_bit(int nr, volatile void * addr) +{ + int oldbit; + + __asm__ __volatile__( LOCK_PREFIX + "btcl %2,%1\n\tsbbl %0,%0" + :"=r" (oldbit),"=m" (ADDR) + :"Ir" (nr) : "memory"); + return oldbit; +} + +#if 0 /* Fool kernel-doc since it doesn't do macros yet */ +/** + * test_bit - Determine whether a bit is set + * @nr: bit number to test + * @addr: Address to start counting from + */ +static int test_bit(int nr, const volatile void * addr); +#endif + +static __inline__ int constant_test_bit(int nr, const volatile void * addr) +{ + return ((1UL << (nr & 31)) & (((const volatile unsigned int *) addr)[nr >> 5])) != 0; +} + +static __inline__ int variable_test_bit(int nr, volatile void * addr) +{ + int oldbit; + + __asm__ __volatile__( + "btl %2,%1\n\tsbbl %0,%0" + :"=r" (oldbit) + :"m" (ADDR),"Ir" (nr)); + return oldbit; +} + +#define test_bit(nr,addr) \ +(__builtin_constant_p(nr) ? \ + constant_test_bit((nr),(addr)) : \ + variable_test_bit((nr),(addr))) + +/** + * find_first_zero_bit - find the first zero bit in a memory region + * @addr: The address to start the search at + * @size: The maximum size to search + * + * Returns the bit-number of the first zero bit, not the number of the byte + * containing a bit. + */ +static __inline__ int find_first_zero_bit(void * addr, unsigned size) +{ + int d0, d1, d2; + int res; + + if (!size) + return 0; + /* This looks at memory. Mark it volatile to tell gcc not to move it around */ + __asm__ __volatile__( + "movl $-1,%%eax\n\t" + "xorl %%edx,%%edx\n\t" + "repe; scasl\n\t" + "je 1f\n\t" + "xorl -4(%%edi),%%eax\n\t" + "subl $4,%%edi\n\t" + "bsfl %%eax,%%edx\n" + "1:\tsubl %%ebx,%%edi\n\t" + "shll $3,%%edi\n\t" + "addl %%edi,%%edx" + :"=d" (res), "=&c" (d0), "=&D" (d1), "=&a" (d2) + :"1" ((size + 31) >> 5), "2" (addr), "b" (addr)); + return res; +} + +/** + * find_next_zero_bit - find the first zero bit in a memory region + * @addr: The address to base the search on + * @offset: The bitnumber to start searching at + * @size: The maximum size to search + */ +static __inline__ int find_next_zero_bit (void * addr, int size, int offset) +{ + unsigned long * p = ((unsigned long *) addr) + (offset >> 5); + int set = 0, bit = offset & 31, res; + + if (bit) { + /* + * Look for zero in first byte + */ + __asm__("bsfl %1,%0\n\t" + "jne 1f\n\t" + "movl $32, %0\n" + "1:" + : "=r" (set) + : "r" (~(*p >> bit))); + if (set < (32 - bit)) + return set + offset; + set = 32 - bit; + p++; + } + /* + * No zero yet, search remaining full bytes for a zero + */ + res = find_first_zero_bit (p, size - 32 * (p - (unsigned long *) addr)); + return (offset + set + res); +} + +/** + * ffz - find first zero in word. + * @word: The word to search + * + * Undefined if no zero exists, so code should check against ~0UL first. + */ +static __inline__ unsigned long ffz(unsigned long word) +{ + __asm__("bsfl %1,%0" + :"=r" (word) + :"r" (~word)); + return word; +} + +#ifdef __KERNEL__ + +/** + * ffs - find first bit set + * @x: the word to search + * + * This is defined the same way as + * the libc and compiler builtin ffs routines, therefore + * differs in spirit from the above ffz (man ffs). + */ +static __inline__ int ffs(int x) +{ + int r; + + __asm__("bsfl %1,%0\n\t" + "jnz 1f\n\t" + "movl $-1,%0\n" + "1:" : "=r" (r) : "g" (x)); + return r+1; +} + +/** + * hweightN - returns the hamming weight of a N-bit word + * @x: the word to weigh + * + * The Hamming Weight of a number is the total number of bits set in it. + */ + +#define hweight32(x) generic_hweight32(x) +#define hweight16(x) generic_hweight16(x) +#define hweight8(x) generic_hweight8(x) + +#endif /* __KERNEL__ */ + +#ifdef __KERNEL__ + +#define ext2_set_bit __test_and_set_bit +#define ext2_clear_bit __test_and_clear_bit +#define ext2_test_bit test_bit +#define ext2_find_first_zero_bit find_first_zero_bit +#define ext2_find_next_zero_bit find_next_zero_bit + +/* Bitmap functions for the minix filesystem. */ +#define minix_test_and_set_bit(nr,addr) __test_and_set_bit(nr,addr) +#define minix_set_bit(nr,addr) __set_bit(nr,addr) +#define minix_test_and_clear_bit(nr,addr) __test_and_clear_bit(nr,addr) +#define minix_test_bit(nr,addr) test_bit(nr,addr) +#define minix_find_first_zero_bit(addr,size) find_first_zero_bit(addr,size) + +#endif /* __KERNEL__ */ + +#endif /* _I386_BITOPS_H */ diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/boot.h b/xenolinux-2.4.16-sparse/include/asm-xeno/boot.h new file mode 100644 index 0000000000..96b228e6e7 --- /dev/null +++ b/xenolinux-2.4.16-sparse/include/asm-xeno/boot.h @@ -0,0 +1,15 @@ +#ifndef _LINUX_BOOT_H +#define _LINUX_BOOT_H + +/* Don't touch these, unless you really know what you're doing. */ +#define DEF_INITSEG 0x9000 +#define DEF_SYSSEG 0x1000 +#define DEF_SETUPSEG 0x9020 +#define DEF_SYSSIZE 0x7F00 + +/* Internal svga startup constants */ +#define NORMAL_VGA 0xffff /* 80x25 mode */ +#define EXTENDED_VGA 0xfffe /* 80x50 mode */ +#define ASK_VGA 0xfffd /* ask for it at bootup */ + +#endif diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/bugs.h b/xenolinux-2.4.16-sparse/include/asm-xeno/bugs.h new file mode 100644 index 0000000000..07fee92a17 --- /dev/null +++ b/xenolinux-2.4.16-sparse/include/asm-xeno/bugs.h @@ -0,0 +1,50 @@ +/* + * include/asm-i386/bugs.h + * + * Copyright (C) 1994 Linus Torvalds + * + * Cyrix stuff, June 1998 by: + * - Rafael R. Reilova (moved everything from head.S), + * + * - Channing Corn (tests & fixes), + * - Andrew D. Balsa (code cleanup). + * + * Pentium III FXSR, SSE support + * Gareth Hughes , May 2000 + */ + +/* + * This is included by init/main.c to check for architecture-dependent bugs. + * + * Needs: + * void check_bugs(void); + */ + +#include +#include +#include +#include + + +static void __init check_fpu(void) +{ + boot_cpu_data.fdiv_bug = 0; +} + +static void __init check_hlt(void) +{ + boot_cpu_data.hlt_works_ok = 1; +} + +static void __init check_bugs(void) +{ + identify_cpu(&boot_cpu_data); +#ifndef CONFIG_SMP + printk("CPU: "); + print_cpu_info(&boot_cpu_data); +#endif + check_fpu(); + check_hlt(); + system_utsname.machine[1] = '0' + + (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86); +} diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/byteorder.h b/xenolinux-2.4.16-sparse/include/asm-xeno/byteorder.h new file mode 100644 index 0000000000..bbfb629fae --- /dev/null +++ b/xenolinux-2.4.16-sparse/include/asm-xeno/byteorder.h @@ -0,0 +1,47 @@ +#ifndef _I386_BYTEORDER_H +#define _I386_BYTEORDER_H + +#include + +#ifdef __GNUC__ + +/* For avoiding bswap on i386 */ +#ifdef __KERNEL__ +#include +#endif + +static __inline__ __const__ __u32 ___arch__swab32(__u32 x) +{ +#ifdef CONFIG_X86_BSWAP + __asm__("bswap %0" : "=r" (x) : "0" (x)); +#else + __asm__("xchgb %b0,%h0\n\t" /* swap lower bytes */ + "rorl $16,%0\n\t" /* swap words */ + "xchgb %b0,%h0" /* swap higher bytes */ + :"=q" (x) + : "0" (x)); +#endif + return x; +} + +static __inline__ __const__ __u16 ___arch__swab16(__u16 x) +{ + __asm__("xchgb %b0,%h0" /* swap bytes */ \ + : "=q" (x) \ + : "0" (x)); \ + return x; +} + +#define __arch__swab32(x) ___arch__swab32(x) +#define __arch__swab16(x) ___arch__swab16(x) + +#if !defined(__STRICT_ANSI__) || defined(__KERNEL__) +# define __BYTEORDER_HAS_U64__ +# define __SWAB_64_THRU_32__ +#endif + +#endif /* __GNUC__ */ + +#include + +#endif /* _I386_BYTEORDER_H */ diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/cache.h b/xenolinux-2.4.16-sparse/include/asm-xeno/cache.h new file mode 100644 index 0000000000..615911e5bd --- /dev/null +++ b/xenolinux-2.4.16-sparse/include/asm-xeno/cache.h @@ -0,0 +1,13 @@ +/* + * include/asm-i386/cache.h + */ +#ifndef __ARCH_I386_CACHE_H +#define __ARCH_I386_CACHE_H + +#include + +/* L1 cache line size */ +#define L1_CACHE_SHIFT (CONFIG_X86_L1_CACHE_SHIFT) +#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT) + +#endif diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/checksum.h b/xenolinux-2.4.16-sparse/include/asm-xeno/checksum.h new file mode 100644 index 0000000000..0de58abeeb --- /dev/null +++ b/xenolinux-2.4.16-sparse/include/asm-xeno/checksum.h @@ -0,0 +1,197 @@ +#ifndef _I386_CHECKSUM_H +#define _I386_CHECKSUM_H + + +/* + * computes the checksum of a memory block at buff, length len, + * and adds in "sum" (32-bit) + * + * returns a 32-bit number suitable for feeding into itself + * or csum_tcpudp_magic + * + * this function must be called with even lengths, except + * for the last fragment, which may be odd + * + * it's best to have buff aligned on a 32-bit boundary + */ +asmlinkage unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum); + +/* + * the same as csum_partial, but copies from src while it + * checksums, and handles user-space pointer exceptions correctly, when needed. + * + * here even more important to align src and dst on a 32-bit (or even + * better 64-bit) boundary + */ + +asmlinkage unsigned int csum_partial_copy_generic( const char *src, char *dst, int len, int sum, + int *src_err_ptr, int *dst_err_ptr); + +/* + * Note: when you get a NULL pointer exception here this means someone + * passed in an incorrect kernel address to one of these functions. + * + * If you use these functions directly please don't forget the + * verify_area(). + */ +static __inline__ +unsigned int csum_partial_copy_nocheck ( const char *src, char *dst, + int len, int sum) +{ + return csum_partial_copy_generic ( src, dst, len, sum, NULL, NULL); +} + +static __inline__ +unsigned int csum_partial_copy_from_user ( const char *src, char *dst, + int len, int sum, int *err_ptr) +{ + return csum_partial_copy_generic ( src, dst, len, sum, err_ptr, NULL); +} + +/* + * These are the old (and unsafe) way of doing checksums, a warning message will be + * printed if they are used and an exeption occurs. + * + * these functions should go away after some time. + */ + +#define csum_partial_copy_fromuser csum_partial_copy +unsigned int csum_partial_copy( const char *src, char *dst, int len, int sum); + +/* + * This is a version of ip_compute_csum() optimized for IP headers, + * which always checksum on 4 octet boundaries. + * + * By Jorge Cwik , adapted for linux by + * Arnt Gulbrandsen. + */ +static inline unsigned short ip_fast_csum(unsigned char * iph, + unsigned int ihl) { + unsigned int sum; + + __asm__ __volatile__(" + movl (%1), %0 + subl $4, %2 + jbe 2f + addl 4(%1), %0 + adcl 8(%1), %0 + adcl 12(%1), %0 +1: adcl 16(%1), %0 + lea 4(%1), %1 + decl %2 + jne 1b + adcl $0, %0 + movl %0, %2 + shrl $16, %0 + addw %w2, %w0 + adcl $0, %0 + notl %0 +2: + " + /* Since the input registers which are loaded with iph and ipl + are modified, we must also specify them as outputs, or gcc + will assume they contain their original values. */ + : "=r" (sum), "=r" (iph), "=r" (ihl) + : "1" (iph), "2" (ihl)); + return(sum); +} + +/* + * Fold a partial checksum + */ + +static inline unsigned int csum_fold(unsigned int sum) +{ + __asm__(" + addl %1, %0 + adcl $0xffff, %0 + " + : "=r" (sum) + : "r" (sum << 16), "0" (sum & 0xffff0000) + ); + return (~sum) >> 16; +} + +static inline unsigned long csum_tcpudp_nofold(unsigned long saddr, + unsigned long daddr, + unsigned short len, + unsigned short proto, + unsigned int sum) +{ + __asm__(" + addl %1, %0 + adcl %2, %0 + adcl %3, %0 + adcl $0, %0 + " + : "=r" (sum) + : "g" (daddr), "g"(saddr), "g"((ntohs(len)<<16)+proto*256), "0"(sum)); + return sum; +} + +/* + * computes the checksum of the TCP/UDP pseudo-header + * returns a 16-bit checksum, already complemented + */ +static inline unsigned short int csum_tcpudp_magic(unsigned long saddr, + unsigned long daddr, + unsigned short len, + unsigned short proto, + unsigned int sum) +{ + return csum_fold(csum_tcpudp_nofold(saddr,daddr,len,proto,sum)); +} + +/* + * this routine is used for miscellaneous IP-like checksums, mainly + * in icmp.c + */ + +static inline unsigned short ip_compute_csum(unsigned char * buff, int len) { + return csum_fold (csum_partial(buff, len, 0)); +} + +#define _HAVE_ARCH_IPV6_CSUM +static __inline__ unsigned short int csum_ipv6_magic(struct in6_addr *saddr, + struct in6_addr *daddr, + __u32 len, + unsigned short proto, + unsigned int sum) +{ + __asm__(" + addl 0(%1), %0 + adcl 4(%1), %0 + adcl 8(%1), %0 + adcl 12(%1), %0 + adcl 0(%2), %0 + adcl 4(%2), %0 + adcl 8(%2), %0 + adcl 12(%2), %0 + adcl %3, %0 + adcl %4, %0 + adcl $0, %0 + " + : "=&r" (sum) + : "r" (saddr), "r" (daddr), + "r"(htonl(len)), "r"(htonl(proto)), "0"(sum)); + + return csum_fold(sum); +} + +/* + * Copy and checksum to user + */ +#define HAVE_CSUM_COPY_USER +static __inline__ unsigned int csum_and_copy_to_user (const char *src, char *dst, + int len, int sum, int *err_ptr) +{ + if (access_ok(VERIFY_WRITE, dst, len)) + return csum_partial_copy_generic(src, dst, len, sum, NULL, err_ptr); + + if (len) + *err_ptr = -EFAULT; + + return -1; /* invalid checksum */ +} + +#endif diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/cpufeature.h b/xenolinux-2.4.16-sparse/include/asm-xeno/cpufeature.h new file mode 100644 index 0000000000..598edbdafe --- /dev/null +++ b/xenolinux-2.4.16-sparse/include/asm-xeno/cpufeature.h @@ -0,0 +1,73 @@ +/* + * cpufeature.h + * + * Defines x86 CPU feature bits + */ + +#ifndef __ASM_I386_CPUFEATURE_H +#define __ASM_I386_CPUFEATURE_H + +/* Sample usage: CPU_FEATURE_P(cpu.x86_capability, FPU) */ +#define CPU_FEATURE_P(CAP, FEATURE) test_bit(CAP, X86_FEATURE_##FEATURE ##_BIT) + +#define NCAPINTS 4 /* Currently we have 4 32-bit words worth of info */ + +/* Intel-defined CPU features, CPUID level 0x00000001, word 0 */ +#define X86_FEATURE_FPU (0*32+ 0) /* Onboard FPU */ +#define X86_FEATURE_VME (0*32+ 1) /* Virtual Mode Extensions */ +#define X86_FEATURE_DE (0*32+ 2) /* Debugging Extensions */ +#define X86_FEATURE_PSE (0*32+ 3) /* Page Size Extensions */ +#define X86_FEATURE_TSC (0*32+ 4) /* Time Stamp Counter */ +#define X86_FEATURE_MSR (0*32+ 5) /* Model-Specific Registers, RDMSR, WRMSR */ +#define X86_FEATURE_PAE (0*32+ 6) /* Physical Address Extensions */ +#define X86_FEATURE_MCE (0*32+ 7) /* Machine Check Architecture */ +#define X86_FEATURE_CX8 (0*32+ 8) /* CMPXCHG8 instruction */ +#define X86_FEATURE_APIC (0*32+ 9) /* Onboard APIC */ +#define X86_FEATURE_SEP (0*32+11) /* SYSENTER/SYSEXIT */ +#define X86_FEATURE_MTRR (0*32+12) /* Memory Type Range Registers */ +#define X86_FEATURE_PGE (0*32+13) /* Page Global Enable */ +#define X86_FEATURE_MCA (0*32+14) /* Machine Check Architecture */ +#define X86_FEATURE_CMOV (0*32+15) /* CMOV instruction (FCMOVCC and FCOMI too if FPU present) */ +#define X86_FEATURE_PAT (0*32+16) /* Page Attribute Table */ +#define X86_FEATURE_PSE36 (0*32+17) /* 36-bit PSEs */ +#define X86_FEATURE_PN (0*32+18) /* Processor serial number */ +#define X86_FEATURE_CLFLSH (0*32+19) /* Supports the CLFLUSH instruction */ +#define X86_FEATURE_DTES (0*32+21) /* Debug Trace Store */ +#define X86_FEATURE_ACPI (0*32+22) /* ACPI via MSR */ +#define X86_FEATURE_MMX (0*32+23) /* Multimedia Extensions */ +#define X86_FEATURE_FXSR (0*32+24) /* FXSAVE and FXRSTOR instructions (fast save and restore */ + /* of FPU context), and CR4.OSFXSR available */ +#define X86_FEATURE_XMM (0*32+25) /* Streaming SIMD Extensions */ +#define X86_FEATURE_XMM2 (0*32+26) /* Streaming SIMD Extensions-2 */ +#define X86_FEATURE_SELFSNOOP (0*32+27) /* CPU self snoop */ +#define X86_FEATURE_ACC (0*32+29) /* Automatic clock control */ +#define X86_FEATURE_IA64 (0*32+30) /* IA-64 processor */ + +/* AMD-defined CPU features, CPUID level 0x80000001, word 1 */ +/* Don't duplicate feature flags which are redundant with Intel! */ +#define X86_FEATURE_SYSCALL (1*32+11) /* SYSCALL/SYSRET */ +#define X86_FEATURE_MMXEXT (1*32+22) /* AMD MMX extensions */ +#define X86_FEATURE_LM (1*32+29) /* Long Mode (x86-64) */ +#define X86_FEATURE_3DNOWEXT (1*32+30) /* AMD 3DNow! extensions */ +#define X86_FEATURE_3DNOW (1*32+31) /* 3DNow! */ + +/* Transmeta-defined CPU features, CPUID level 0x80860001, word 2 */ +#define X86_FEATURE_RECOVERY (2*32+ 0) /* CPU in recovery mode */ +#define X86_FEATURE_LONGRUN (2*32+ 1) /* Longrun power control */ +#define X86_FEATURE_LRTI (2*32+ 3) /* LongRun table interface */ + +/* Other features, Linux-defined mapping, word 3 */ +/* This range is used for feature bits which conflict or are synthesized */ +#define X86_FEATURE_CXMMX (3*32+ 0) /* Cyrix MMX extensions */ +#define X86_FEATURE_K6_MTRR (3*32+ 1) /* AMD K6 nonstandard MTRRs */ +#define X86_FEATURE_CYRIX_ARR (3*32+ 2) /* Cyrix ARRs (= MTRRs) */ +#define X86_FEATURE_CENTAUR_MCR (3*32+ 3) /* Centaur MCRs (= MTRRs) */ + +#endif /* __ASM_I386_CPUFEATURE_H */ + +/* + * Local Variables: + * mode:c + * comment-column:42 + * End: + */ diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/current.h b/xenolinux-2.4.16-sparse/include/asm-xeno/current.h new file mode 100644 index 0000000000..bc1496a2c9 --- /dev/null +++ b/xenolinux-2.4.16-sparse/include/asm-xeno/current.h @@ -0,0 +1,15 @@ +#ifndef _I386_CURRENT_H +#define _I386_CURRENT_H + +struct task_struct; + +static inline struct task_struct * get_current(void) +{ + struct task_struct *current; + __asm__("andl %%esp,%0; ":"=r" (current) : "0" (~8191UL)); + return current; + } + +#define current get_current() + +#endif /* !(_I386_CURRENT_H) */ diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/debugreg.h b/xenolinux-2.4.16-sparse/include/asm-xeno/debugreg.h new file mode 100644 index 0000000000..f0b2b06ae0 --- /dev/null +++ b/xenolinux-2.4.16-sparse/include/asm-xeno/debugreg.h @@ -0,0 +1,64 @@ +#ifndef _I386_DEBUGREG_H +#define _I386_DEBUGREG_H + + +/* Indicate the register numbers for a number of the specific + debug registers. Registers 0-3 contain the addresses we wish to trap on */ +#define DR_FIRSTADDR 0 /* u_debugreg[DR_FIRSTADDR] */ +#define DR_LASTADDR 3 /* u_debugreg[DR_LASTADDR] */ + +#define DR_STATUS 6 /* u_debugreg[DR_STATUS] */ +#define DR_CONTROL 7 /* u_debugreg[DR_CONTROL] */ + +/* Define a few things for the status register. We can use this to determine + which debugging register was responsible for the trap. The other bits + are either reserved or not of interest to us. */ + +#define DR_TRAP0 (0x1) /* db0 */ +#define DR_TRAP1 (0x2) /* db1 */ +#define DR_TRAP2 (0x4) /* db2 */ +#define DR_TRAP3 (0x8) /* db3 */ + +#define DR_STEP (0x4000) /* single-step */ +#define DR_SWITCH (0x8000) /* task switch */ + +/* Now define a bunch of things for manipulating the control register. + The top two bytes of the control register consist of 4 fields of 4 + bits - each field corresponds to one of the four debug registers, + and indicates what types of access we trap on, and how large the data + field is that we are looking at */ + +#define DR_CONTROL_SHIFT 16 /* Skip this many bits in ctl register */ +#define DR_CONTROL_SIZE 4 /* 4 control bits per register */ + +#define DR_RW_EXECUTE (0x0) /* Settings for the access types to trap on */ +#define DR_RW_WRITE (0x1) +#define DR_RW_READ (0x3) + +#define DR_LEN_1 (0x0) /* Settings for data length to trap on */ +#define DR_LEN_2 (0x4) +#define DR_LEN_4 (0xC) + +/* The low byte to the control register determine which registers are + enabled. There are 4 fields of two bits. One bit is "local", meaning + that the processor will reset the bit after a task switch and the other + is global meaning that we have to explicitly reset the bit. With linux, + you can use either one, since we explicitly zero the register when we enter + kernel mode. */ + +#define DR_LOCAL_ENABLE_SHIFT 0 /* Extra shift to the local enable bit */ +#define DR_GLOBAL_ENABLE_SHIFT 1 /* Extra shift to the global enable bit */ +#define DR_ENABLE_SIZE 2 /* 2 enable bits per register */ + +#define DR_LOCAL_ENABLE_MASK (0x55) /* Set local bits for all 4 regs */ +#define DR_GLOBAL_ENABLE_MASK (0xAA) /* Set global bits for all 4 regs */ + +/* The second byte to the control register has a few special things. + We can slow the instruction pipeline for instructions coming via the + gdt or the ldt if we want to. I am not sure why this is an advantage */ + +#define DR_CONTROL_RESERVED (0xFC00) /* Reserved by Intel */ +#define DR_LOCAL_SLOWDOWN (0x100) /* Local slow the pipeline */ +#define DR_GLOBAL_SLOWDOWN (0x200) /* Global slow the pipeline */ + +#endif diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/delay.h b/xenolinux-2.4.16-sparse/include/asm-xeno/delay.h new file mode 100644 index 0000000000..c7d2184929 --- /dev/null +++ b/xenolinux-2.4.16-sparse/include/asm-xeno/delay.h @@ -0,0 +1,20 @@ +#ifndef _I386_DELAY_H +#define _I386_DELAY_H + +/* + * Copyright (C) 1993 Linus Torvalds + * + * Delay routines calling functions in arch/i386/lib/delay.c + */ + +extern void __bad_udelay(void); + +extern void __udelay(unsigned long usecs); +extern void __const_udelay(unsigned long usecs); +extern void __delay(unsigned long loops); + +#define udelay(n) (__builtin_constant_p(n) ? \ + ((n) > 20000 ? __bad_udelay() : __const_udelay((n) * 0x10c6ul)) : \ + __udelay(n)) + +#endif /* defined(_I386_DELAY_H) */ diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/desc.h b/xenolinux-2.4.16-sparse/include/asm-xeno/desc.h new file mode 100644 index 0000000000..1920de026a --- /dev/null +++ b/xenolinux-2.4.16-sparse/include/asm-xeno/desc.h @@ -0,0 +1,11 @@ +#ifndef __ARCH_DESC_H +#define __ARCH_DESC_H + +#include + +#define __LDT(_X) (0) + +#define clear_LDT() ((void)0) +#define load_LDT(_mm) ((void)0) + +#endif diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/div64.h b/xenolinux-2.4.16-sparse/include/asm-xeno/div64.h new file mode 100644 index 0000000000..ef915df700 --- /dev/null +++ b/xenolinux-2.4.16-sparse/include/asm-xeno/div64.h @@ -0,0 +1,17 @@ +#ifndef __I386_DIV64 +#define __I386_DIV64 + +#define do_div(n,base) ({ \ + unsigned long __upper, __low, __high, __mod; \ + asm("":"=a" (__low), "=d" (__high):"A" (n)); \ + __upper = __high; \ + if (__high) { \ + __upper = __high % (base); \ + __high = __high / (base); \ + } \ + asm("divl %2":"=a" (__low), "=d" (__mod):"rm" (base), "0" (__low), "1" (__upper)); \ + asm("":"=A" (n):"a" (__low),"d" (__high)); \ + __mod; \ +}) + +#endif diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/dma.h b/xenolinux-2.4.16-sparse/include/asm-xeno/dma.h new file mode 100644 index 0000000000..1bc9899b20 --- /dev/null +++ b/xenolinux-2.4.16-sparse/include/asm-xeno/dma.h @@ -0,0 +1,298 @@ +/* $Id: dma.h,v 1.7 1992/12/14 00:29:34 root Exp root $ + * linux/include/asm/dma.h: Defines for using and allocating dma channels. + * Written by Hennus Bergman, 1992. + * High DMA channel support & info by Hannu Savolainen + * and John Boyd, Nov. 1992. + */ + +#ifndef _ASM_DMA_H +#define _ASM_DMA_H + +#include +#include /* And spinlocks */ +#include /* need byte IO */ +#include + + +#ifdef HAVE_REALLY_SLOW_DMA_CONTROLLER +#define dma_outb outb_p +#else +#define dma_outb outb +#endif + +#define dma_inb inb + +/* + * NOTES about DMA transfers: + * + * controller 1: channels 0-3, byte operations, ports 00-1F + * controller 2: channels 4-7, word operations, ports C0-DF + * + * - ALL registers are 8 bits only, regardless of transfer size + * - channel 4 is not used - cascades 1 into 2. + * - channels 0-3 are byte - addresses/counts are for physical bytes + * - channels 5-7 are word - addresses/counts are for physical words + * - transfers must not cross physical 64K (0-3) or 128K (5-7) boundaries + * - transfer count loaded to registers is 1 less than actual count + * - controller 2 offsets are all even (2x offsets for controller 1) + * - page registers for 5-7 don't use data bit 0, represent 128K pages + * - page registers for 0-3 use bit 0, represent 64K pages + * + * DMA transfers are limited to the lower 16MB of _physical_ memory. + * Note that addresses loaded into registers must be _physical_ addresses, + * not logical addresses (which may differ if paging is active). + * + * Address mapping for channels 0-3: + * + * A23 ... A16 A15 ... A8 A7 ... A0 (Physical addresses) + * | ... | | ... | | ... | + * | ... | | ... | | ... | + * | ... | | ... | | ... | + * P7 ... P0 A7 ... A0 A7 ... A0 + * | Page | Addr MSB | Addr LSB | (DMA registers) + * + * Address mapping for channels 5-7: + * + * A23 ... A17 A16 A15 ... A9 A8 A7 ... A1 A0 (Physical addresses) + * | ... | \ \ ... \ \ \ ... \ \ + * | ... | \ \ ... \ \ \ ... \ (not used) + * | ... | \ \ ... \ \ \ ... \ + * P7 ... P1 (0) A7 A6 ... A0 A7 A6 ... A0 + * | Page | Addr MSB | Addr LSB | (DMA registers) + * + * Again, channels 5-7 transfer _physical_ words (16 bits), so addresses + * and counts _must_ be word-aligned (the lowest address bit is _ignored_ at + * the hardware level, so odd-byte transfers aren't possible). + * + * Transfer count (_not # bytes_) is limited to 64K, represented as actual + * count - 1 : 64K => 0xFFFF, 1 => 0x0000. Thus, count is always 1 or more, + * and up to 128K bytes may be transferred on channels 5-7 in one operation. + * + */ + +#define MAX_DMA_CHANNELS 8 + +/* The maximum address that we can perform a DMA transfer to on this platform */ +#define MAX_DMA_ADDRESS (PAGE_OFFSET+0x1000000) + +/* 8237 DMA controllers */ +#define IO_DMA1_BASE 0x00 /* 8 bit slave DMA, channels 0..3 */ +#define IO_DMA2_BASE 0xC0 /* 16 bit master DMA, ch 4(=slave input)..7 */ + +/* DMA controller registers */ +#define DMA1_CMD_REG 0x08 /* command register (w) */ +#define DMA1_STAT_REG 0x08 /* status register (r) */ +#define DMA1_REQ_REG 0x09 /* request register (w) */ +#define DMA1_MASK_REG 0x0A /* single-channel mask (w) */ +#define DMA1_MODE_REG 0x0B /* mode register (w) */ +#define DMA1_CLEAR_FF_REG 0x0C /* clear pointer flip-flop (w) */ +#define DMA1_TEMP_REG 0x0D /* Temporary Register (r) */ +#define DMA1_RESET_REG 0x0D /* Master Clear (w) */ +#define DMA1_CLR_MASK_REG 0x0E /* Clear Mask */ +#define DMA1_MASK_ALL_REG 0x0F /* all-channels mask (w) */ + +#define DMA2_CMD_REG 0xD0 /* command register (w) */ +#define DMA2_STAT_REG 0xD0 /* status register (r) */ +#define DMA2_REQ_REG 0xD2 /* request register (w) */ +#define DMA2_MASK_REG 0xD4 /* single-channel mask (w) */ +#define DMA2_MODE_REG 0xD6 /* mode register (w) */ +#define DMA2_CLEAR_FF_REG 0xD8 /* clear pointer flip-flop (w) */ +#define DMA2_TEMP_REG 0xDA /* Temporary Register (r) */ +#define DMA2_RESET_REG 0xDA /* Master Clear (w) */ +#define DMA2_CLR_MASK_REG 0xDC /* Clear Mask */ +#define DMA2_MASK_ALL_REG 0xDE /* all-channels mask (w) */ + +#define DMA_ADDR_0 0x00 /* DMA address registers */ +#define DMA_ADDR_1 0x02 +#define DMA_ADDR_2 0x04 +#define DMA_ADDR_3 0x06 +#define DMA_ADDR_4 0xC0 +#define DMA_ADDR_5 0xC4 +#define DMA_ADDR_6 0xC8 +#define DMA_ADDR_7 0xCC + +#define DMA_CNT_0 0x01 /* DMA count registers */ +#define DMA_CNT_1 0x03 +#define DMA_CNT_2 0x05 +#define DMA_CNT_3 0x07 +#define DMA_CNT_4 0xC2 +#define DMA_CNT_5 0xC6 +#define DMA_CNT_6 0xCA +#define DMA_CNT_7 0xCE + +#define DMA_PAGE_0 0x87 /* DMA page registers */ +#define DMA_PAGE_1 0x83 +#define DMA_PAGE_2 0x81 +#define DMA_PAGE_3 0x82 +#define DMA_PAGE_5 0x8B +#define DMA_PAGE_6 0x89 +#define DMA_PAGE_7 0x8A + +#define DMA_MODE_READ 0x44 /* I/O to memory, no autoinit, increment, single mode */ +#define DMA_MODE_WRITE 0x48 /* memory to I/O, no autoinit, increment, single mode */ +#define DMA_MODE_CASCADE 0xC0 /* pass thru DREQ->HRQ, DACK<-HLDA only */ + +#define DMA_AUTOINIT 0x10 + + +extern spinlock_t dma_spin_lock; + +static __inline__ unsigned long claim_dma_lock(void) +{ + unsigned long flags; + spin_lock_irqsave(&dma_spin_lock, flags); + return flags; +} + +static __inline__ void release_dma_lock(unsigned long flags) +{ + spin_unlock_irqrestore(&dma_spin_lock, flags); +} + +/* enable/disable a specific DMA channel */ +static __inline__ void enable_dma(unsigned int dmanr) +{ + if (dmanr<=3) + dma_outb(dmanr, DMA1_MASK_REG); + else + dma_outb(dmanr & 3, DMA2_MASK_REG); +} + +static __inline__ void disable_dma(unsigned int dmanr) +{ + if (dmanr<=3) + dma_outb(dmanr | 4, DMA1_MASK_REG); + else + dma_outb((dmanr & 3) | 4, DMA2_MASK_REG); +} + +/* Clear the 'DMA Pointer Flip Flop'. + * Write 0 for LSB/MSB, 1 for MSB/LSB access. + * Use this once to initialize the FF to a known state. + * After that, keep track of it. :-) + * --- In order to do that, the DMA routines below should --- + * --- only be used while holding the DMA lock ! --- + */ +static __inline__ void clear_dma_ff(unsigned int dmanr) +{ + if (dmanr<=3) + dma_outb(0, DMA1_CLEAR_FF_REG); + else + dma_outb(0, DMA2_CLEAR_FF_REG); +} + +/* set mode (above) for a specific DMA channel */ +static __inline__ void set_dma_mode(unsigned int dmanr, char mode) +{ + if (dmanr<=3) + dma_outb(mode | dmanr, DMA1_MODE_REG); + else + dma_outb(mode | (dmanr&3), DMA2_MODE_REG); +} + +/* Set only the page register bits of the transfer address. + * This is used for successive transfers when we know the contents of + * the lower 16 bits of the DMA current address register, but a 64k boundary + * may have been crossed. + */ +static __inline__ void set_dma_page(unsigned int dmanr, char pagenr) +{ + switch(dmanr) { + case 0: + dma_outb(pagenr, DMA_PAGE_0); + break; + case 1: + dma_outb(pagenr, DMA_PAGE_1); + break; + case 2: + dma_outb(pagenr, DMA_PAGE_2); + break; + case 3: + dma_outb(pagenr, DMA_PAGE_3); + break; + case 5: + dma_outb(pagenr & 0xfe, DMA_PAGE_5); + break; + case 6: + dma_outb(pagenr & 0xfe, DMA_PAGE_6); + break; + case 7: + dma_outb(pagenr & 0xfe, DMA_PAGE_7); + break; + } +} + + +/* Set transfer address & page bits for specific DMA channel. + * Assumes dma flipflop is clear. + */ +static __inline__ void set_dma_addr(unsigned int dmanr, unsigned int a) +{ + set_dma_page(dmanr, a>>16); + if (dmanr <= 3) { + dma_outb( a & 0xff, ((dmanr&3)<<1) + IO_DMA1_BASE ); + dma_outb( (a>>8) & 0xff, ((dmanr&3)<<1) + IO_DMA1_BASE ); + } else { + dma_outb( (a>>1) & 0xff, ((dmanr&3)<<2) + IO_DMA2_BASE ); + dma_outb( (a>>9) & 0xff, ((dmanr&3)<<2) + IO_DMA2_BASE ); + } +} + + +/* Set transfer size (max 64k for DMA1..3, 128k for DMA5..7) for + * a specific DMA channel. + * You must ensure the parameters are valid. + * NOTE: from a manual: "the number of transfers is one more + * than the initial word count"! This is taken into account. + * Assumes dma flip-flop is clear. + * NOTE 2: "count" represents _bytes_ and must be even for channels 5-7. + */ +static __inline__ void set_dma_count(unsigned int dmanr, unsigned int count) +{ + count--; + if (dmanr <= 3) { + dma_outb( count & 0xff, ((dmanr&3)<<1) + 1 + IO_DMA1_BASE ); + dma_outb( (count>>8) & 0xff, ((dmanr&3)<<1) + 1 + IO_DMA1_BASE ); + } else { + dma_outb( (count>>1) & 0xff, ((dmanr&3)<<2) + 2 + IO_DMA2_BASE ); + dma_outb( (count>>9) & 0xff, ((dmanr&3)<<2) + 2 + IO_DMA2_BASE ); + } +} + + +/* Get DMA residue count. After a DMA transfer, this + * should return zero. Reading this while a DMA transfer is + * still in progress will return unpredictable results. + * If called before the channel has been used, it may return 1. + * Otherwise, it returns the number of _bytes_ left to transfer. + * + * Assumes DMA flip-flop is clear. + */ +static __inline__ int get_dma_residue(unsigned int dmanr) +{ + unsigned int io_port = (dmanr<=3)? ((dmanr&3)<<1) + 1 + IO_DMA1_BASE + : ((dmanr&3)<<2) + 2 + IO_DMA2_BASE; + + /* using short to get 16-bit wrap around */ + unsigned short count; + + count = 1 + dma_inb(io_port); + count += dma_inb(io_port) << 8; + + return (dmanr<=3)? count : (count<<1); +} + + +/* These are in kernel/dma.c: */ +extern int request_dma(unsigned int dmanr, const char * device_id); /* reserve a DMA channel */ +extern void free_dma(unsigned int dmanr); /* release it again */ + +/* From PCI */ + +#ifdef CONFIG_PCI +extern int isa_dma_bridge_buggy; +#else +#define isa_dma_bridge_buggy (0) +#endif + +#endif /* _ASM_DMA_H */ diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/elf.h b/xenolinux-2.4.16-sparse/include/asm-xeno/elf.h new file mode 100644 index 0000000000..9b14bcf6c9 --- /dev/null +++ b/xenolinux-2.4.16-sparse/include/asm-xeno/elf.h @@ -0,0 +1,104 @@ +#ifndef __ASMi386_ELF_H +#define __ASMi386_ELF_H + +/* + * ELF register definitions.. + */ + +#include +#include + +#include + +typedef unsigned long elf_greg_t; + +#define ELF_NGREG (sizeof (struct user_regs_struct) / sizeof(elf_greg_t)) +typedef elf_greg_t elf_gregset_t[ELF_NGREG]; + +typedef struct user_i387_struct elf_fpregset_t; +typedef struct user_fxsr_struct elf_fpxregset_t; + +/* + * This is used to ensure we don't load something for the wrong architecture. + */ +#define elf_check_arch(x) \ + (((x)->e_machine == EM_386) || ((x)->e_machine == EM_486)) + +/* + * These are used to set parameters in the core dumps. + */ +#define ELF_CLASS ELFCLASS32 +#define ELF_DATA ELFDATA2LSB +#define ELF_ARCH EM_386 + +/* SVR4/i386 ABI (pages 3-31, 3-32) says that when the program starts %edx + contains a pointer to a function which might be registered using `atexit'. + This provides a mean for the dynamic linker to call DT_FINI functions for + shared libraries that have been loaded before the code runs. + + A value of 0 tells we have no such handler. + + We might as well make sure everything else is cleared too (except for %esp), + just to make things more deterministic. + */ +#define ELF_PLAT_INIT(_r) do { \ + _r->ebx = 0; _r->ecx = 0; _r->edx = 0; \ + _r->esi = 0; _r->edi = 0; _r->ebp = 0; \ + _r->eax = 0; \ +} while (0) + +#define USE_ELF_CORE_DUMP +#define ELF_EXEC_PAGESIZE 4096 + +/* This is the location that an ET_DYN program is loaded if exec'ed. Typical + use of this is to invoke "./ld.so someprog" to test out a new version of + the loader. We need to make sure that it is out of the way of the program + that it will "exec", and that there is sufficient room for the brk. */ + +#define ELF_ET_DYN_BASE (TASK_SIZE / 3 * 2) + +/* Wow, the "main" arch needs arch dependent functions too.. :) */ + +/* regs is struct pt_regs, pr_reg is elf_gregset_t (which is + now struct_user_regs, they are different) */ + +#define ELF_CORE_COPY_REGS(pr_reg, regs) \ + pr_reg[0] = regs->ebx; \ + pr_reg[1] = regs->ecx; \ + pr_reg[2] = regs->edx; \ + pr_reg[3] = regs->esi; \ + pr_reg[4] = regs->edi; \ + pr_reg[5] = regs->ebp; \ + pr_reg[6] = regs->eax; \ + pr_reg[7] = regs->xds; \ + pr_reg[8] = regs->xes; \ + /* fake once used fs and gs selectors? */ \ + pr_reg[9] = regs->xds; /* was fs and __fs */ \ + pr_reg[10] = regs->xds; /* was gs and __gs */ \ + pr_reg[11] = regs->orig_eax; \ + pr_reg[12] = regs->eip; \ + pr_reg[13] = regs->xcs; \ + pr_reg[14] = regs->eflags; \ + pr_reg[15] = regs->esp; \ + pr_reg[16] = regs->xss; + +/* This yields a mask that user programs can use to figure out what + instruction set this CPU supports. This could be done in user space, + but it's not easy, and we've already done it here. */ + +#define ELF_HWCAP (boot_cpu_data.x86_capability[0]) + +/* This yields a string that ld.so will use to load implementation + specific libraries for optimization. This is more specific in + intent than poking at uname or /proc/cpuinfo. + + For the moment, we have only optimizations for the Intel generations, + but that could change... */ + +#define ELF_PLATFORM (system_utsname.machine) + +#ifdef __KERNEL__ +#define SET_PERSONALITY(ex, ibcs2) set_personality((ibcs2)?PER_SVR4:PER_LINUX) +#endif + +#endif diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/errno.h b/xenolinux-2.4.16-sparse/include/asm-xeno/errno.h new file mode 100644 index 0000000000..7cf599f4de --- /dev/null +++ b/xenolinux-2.4.16-sparse/include/asm-xeno/errno.h @@ -0,0 +1,132 @@ +#ifndef _I386_ERRNO_H +#define _I386_ERRNO_H + +#define EPERM 1 /* Operation not permitted */ +#define ENOENT 2 /* No such file or directory */ +#define ESRCH 3 /* No such process */ +#define EINTR 4 /* Interrupted system call */ +#define EIO 5 /* I/O error */ +#define ENXIO 6 /* No such device or address */ +#define E2BIG 7 /* Arg list too long */ +#define ENOEXEC 8 /* Exec format error */ +#define EBADF 9 /* Bad file number */ +#define ECHILD 10 /* No child processes */ +#define EAGAIN 11 /* Try again */ +#define ENOMEM 12 /* Out of memory */ +#define EACCES 13 /* Permission denied */ +#define EFAULT 14 /* Bad address */ +#define ENOTBLK 15 /* Block device required */ +#define EBUSY 16 /* Device or resource busy */ +#define EEXIST 17 /* File exists */ +#define EXDEV 18 /* Cross-device link */ +#define ENODEV 19 /* No such device */ +#define ENOTDIR 20 /* Not a directory */ +#define EISDIR 21 /* Is a directory */ +#define EINVAL 22 /* Invalid argument */ +#define ENFILE 23 /* File table overflow */ +#define EMFILE 24 /* Too many open files */ +#define ENOTTY 25 /* Not a typewriter */ +#define ETXTBSY 26 /* Text file busy */ +#define EFBIG 27 /* File too large */ +#define ENOSPC 28 /* No space left on device */ +#define ESPIPE 29 /* Illegal seek */ +#define EROFS 30 /* Read-only file system */ +#define EMLINK 31 /* Too many links */ +#define EPIPE 32 /* Broken pipe */ +#define EDOM 33 /* Math argument out of domain of func */ +#define ERANGE 34 /* Math result not representable */ +#define EDEADLK 35 /* Resource deadlock would occur */ +#define ENAMETOOLONG 36 /* File name too long */ +#define ENOLCK 37 /* No record locks available */ +#define ENOSYS 38 /* Function not implemented */ +#define ENOTEMPTY 39 /* Directory not empty */ +#define ELOOP 40 /* Too many symbolic links encountered */ +#define EWOULDBLOCK EAGAIN /* Operation would block */ +#define ENOMSG 42 /* No message of desired type */ +#define EIDRM 43 /* Identifier removed */ +#define ECHRNG 44 /* Channel number out of range */ +#define EL2NSYNC 45 /* Level 2 not synchronized */ +#define EL3HLT 46 /* Level 3 halted */ +#define EL3RST 47 /* Level 3 reset */ +#define ELNRNG 48 /* Link number out of range */ +#define EUNATCH 49 /* Protocol driver not attached */ +#define ENOCSI 50 /* No CSI structure available */ +#define EL2HLT 51 /* Level 2 halted */ +#define EBADE 52 /* Invalid exchange */ +#define EBADR 53 /* Invalid request descriptor */ +#define EXFULL 54 /* Exchange full */ +#define ENOANO 55 /* No anode */ +#define EBADRQC 56 /* Invalid request code */ +#define EBADSLT 57 /* Invalid slot */ + +#define EDEADLOCK EDEADLK + +#define EBFONT 59 /* Bad font file format */ +#define ENOSTR 60 /* Device not a stream */ +#define ENODATA 61 /* No data available */ +#define ETIME 62 /* Timer expired */ +#define ENOSR 63 /* Out of streams resources */ +#define ENONET 64 /* Machine is not on the network */ +#define ENOPKG 65 /* Package not installed */ +#define EREMOTE 66 /* Object is remote */ +#define ENOLINK 67 /* Link has been severed */ +#define EADV 68 /* Advertise error */ +#define ESRMNT 69 /* Srmount error */ +#define ECOMM 70 /* Communication error on send */ +#define EPROTO 71 /* Protocol error */ +#define EMULTIHOP 72 /* Multihop attempted */ +#define EDOTDOT 73 /* RFS specific error */ +#define EBADMSG 74 /* Not a data message */ +#define EOVERFLOW 75 /* Value too large for defined data type */ +#define ENOTUNIQ 76 /* Name not unique on network */ +#define EBADFD 77 /* File descriptor in bad state */ +#define EREMCHG 78 /* Remote address changed */ +#define ELIBACC 79 /* Can not access a needed shared library */ +#define ELIBBAD 80 /* Accessing a corrupted shared library */ +#define ELIBSCN 81 /* .lib section in a.out corrupted */ +#define ELIBMAX 82 /* Attempting to link in too many shared libraries */ +#define ELIBEXEC 83 /* Cannot exec a shared library directly */ +#define EILSEQ 84 /* Illegal byte sequence */ +#define ERESTART 85 /* Interrupted system call should be restarted */ +#define ESTRPIPE 86 /* Streams pipe error */ +#define EUSERS 87 /* Too many users */ +#define ENOTSOCK 88 /* Socket operation on non-socket */ +#define EDESTADDRREQ 89 /* Destination address required */ +#define EMSGSIZE 90 /* Message too long */ +#define EPROTOTYPE 91 /* Protocol wrong type for socket */ +#define ENOPROTOOPT 92 /* Protocol not available */ +#define EPROTONOSUPPORT 93 /* Protocol not supported */ +#define ESOCKTNOSUPPORT 94 /* Socket type not supported */ +#define EOPNOTSUPP 95 /* Operation not supported on transport endpoint */ +#define EPFNOSUPPORT 96 /* Protocol family not supported */ +#define EAFNOSUPPORT 97 /* Address family not supported by protocol */ +#define EADDRINUSE 98 /* Address already in use */ +#define EADDRNOTAVAIL 99 /* Cannot assign requested address */ +#define ENETDOWN 100 /* Network is down */ +#define ENETUNREACH 101 /* Network is unreachable */ +#define ENETRESET 102 /* Network dropped connection because of reset */ +#define ECONNABORTED 103 /* Software caused connection abort */ +#define ECONNRESET 104 /* Connection reset by peer */ +#define ENOBUFS 105 /* No buffer space available */ +#define EISCONN 106 /* Transport endpoint is already connected */ +#define ENOTCONN 107 /* Transport endpoint is not connected */ +#define ESHUTDOWN 108 /* Cannot send after transport endpoint shutdown */ +#define ETOOMANYREFS 109 /* Too many references: cannot splice */ +#define ETIMEDOUT 110 /* Connection timed out */ +#define ECONNREFUSED 111 /* Connection refused */ +#define EHOSTDOWN 112 /* Host is down */ +#define EHOSTUNREACH 113 /* No route to host */ +#define EALREADY 114 /* Operation already in progress */ +#define EINPROGRESS 115 /* Operation now in progress */ +#define ESTALE 116 /* Stale NFS file handle */ +#define EUCLEAN 117 /* Structure needs cleaning */ +#define ENOTNAM 118 /* Not a XENIX named type file */ +#define ENAVAIL 119 /* No XENIX semaphores available */ +#define EISNAM 120 /* Is a named type file */ +#define EREMOTEIO 121 /* Remote I/O error */ +#define EDQUOT 122 /* Quota exceeded */ + +#define ENOMEDIUM 123 /* No medium found */ +#define EMEDIUMTYPE 124 /* Wrong medium type */ + +#endif diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/fcntl.h b/xenolinux-2.4.16-sparse/include/asm-xeno/fcntl.h new file mode 100644 index 0000000000..41e3c4d914 --- /dev/null +++ b/xenolinux-2.4.16-sparse/include/asm-xeno/fcntl.h @@ -0,0 +1,87 @@ +#ifndef _I386_FCNTL_H +#define _I386_FCNTL_H + +/* open/fcntl - O_SYNC is only implemented on blocks devices and on files + located on an ext2 file system */ +#define O_ACCMODE 0003 +#define O_RDONLY 00 +#define O_WRONLY 01 +#define O_RDWR 02 +#define O_CREAT 0100 /* not fcntl */ +#define O_EXCL 0200 /* not fcntl */ +#define O_NOCTTY 0400 /* not fcntl */ +#define O_TRUNC 01000 /* not fcntl */ +#define O_APPEND 02000 +#define O_NONBLOCK 04000 +#define O_NDELAY O_NONBLOCK +#define O_SYNC 010000 +#define FASYNC 020000 /* fcntl, for BSD compatibility */ +#define O_DIRECT 040000 /* direct disk access hint */ +#define O_LARGEFILE 0100000 +#define O_DIRECTORY 0200000 /* must be a directory */ +#define O_NOFOLLOW 0400000 /* don't follow links */ + +#define F_DUPFD 0 /* dup */ +#define F_GETFD 1 /* get close_on_exec */ +#define F_SETFD 2 /* set/clear close_on_exec */ +#define F_GETFL 3 /* get file->f_flags */ +#define F_SETFL 4 /* set file->f_flags */ +#define F_GETLK 5 +#define F_SETLK 6 +#define F_SETLKW 7 + +#define F_SETOWN 8 /* for sockets. */ +#define F_GETOWN 9 /* for sockets. */ +#define F_SETSIG 10 /* for sockets. */ +#define F_GETSIG 11 /* for sockets. */ + +#define F_GETLK64 12 /* using 'struct flock64' */ +#define F_SETLK64 13 +#define F_SETLKW64 14 + +/* for F_[GET|SET]FL */ +#define FD_CLOEXEC 1 /* actually anything with low bit set goes */ + +/* for posix fcntl() and lockf() */ +#define F_RDLCK 0 +#define F_WRLCK 1 +#define F_UNLCK 2 + +/* for old implementation of bsd flock () */ +#define F_EXLCK 4 /* or 3 */ +#define F_SHLCK 8 /* or 4 */ + +/* for leases */ +#define F_INPROGRESS 16 + +/* operations for bsd flock(), also used by the kernel implementation */ +#define LOCK_SH 1 /* shared lock */ +#define LOCK_EX 2 /* exclusive lock */ +#define LOCK_NB 4 /* or'd with one of the above to prevent + blocking */ +#define LOCK_UN 8 /* remove lock */ + +#define LOCK_MAND 32 /* This is a mandatory flock */ +#define LOCK_READ 64 /* ... Which allows concurrent read operations */ +#define LOCK_WRITE 128 /* ... Which allows concurrent write operations */ +#define LOCK_RW 192 /* ... Which allows concurrent read & write ops */ + +struct flock { + short l_type; + short l_whence; + off_t l_start; + off_t l_len; + pid_t l_pid; +}; + +struct flock64 { + short l_type; + short l_whence; + loff_t l_start; + loff_t l_len; + pid_t l_pid; +}; + +#define F_LINUX_SPECIFIC_BASE 1024 + +#endif diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/fixmap.h b/xenolinux-2.4.16-sparse/include/asm-xeno/fixmap.h new file mode 100644 index 0000000000..381e9e9f3b --- /dev/null +++ b/xenolinux-2.4.16-sparse/include/asm-xeno/fixmap.h @@ -0,0 +1,118 @@ +/* + * fixmap.h: compile-time virtual memory allocation + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 1998 Ingo Molnar + * + * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999 + */ + +#ifndef _ASM_FIXMAP_H +#define _ASM_FIXMAP_H + +#include +#include +#include +#include +#ifdef CONFIG_HIGHMEM +#include +#include +#endif + +/* + * Here we define all the compile-time 'special' virtual + * addresses. The point is to have a constant address at + * compile time, but to set the physical address only + * in the boot process. We allocate these special addresses + * from the end of virtual memory (0xfffff000) backwards. + * Also this lets us do fail-safe vmalloc(), we + * can guarantee that these special addresses and + * vmalloc()-ed addresses never overlap. + * + * these 'compile-time allocated' memory buffers are + * fixed-size 4k pages. (or larger if used with an increment + * highger than 1) use fixmap_set(idx,phys) to associate + * physical memory with fixmap indices. + * + * TLB entries of such buffers will not be flushed across + * task switches. + */ + +/* + * on UP currently we will have no trace of the fixmap mechanizm, + * no page table allocations, etc. This might change in the + * future, say framebuffers for the console driver(s) could be + * fix-mapped? + */ +enum fixed_addresses { +#ifdef CONFIG_X86_LOCAL_APIC + FIX_APIC_BASE, /* local (CPU) APIC) -- required for SMP or not */ +#endif +#ifdef CONFIG_X86_IO_APIC + FIX_IO_APIC_BASE_0, + FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS-1, +#endif +#ifdef CONFIG_X86_VISWS_APIC + FIX_CO_CPU, /* Cobalt timer */ + FIX_CO_APIC, /* Cobalt APIC Redirection Table */ + FIX_LI_PCIA, /* Lithium PCI Bridge A */ + FIX_LI_PCIB, /* Lithium PCI Bridge B */ +#endif +#ifdef CONFIG_HIGHMEM + FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */ + FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1, +#endif + __end_of_fixed_addresses +}; + +extern void __set_fixmap (enum fixed_addresses idx, + unsigned long phys, pgprot_t flags); + +#define set_fixmap(idx, phys) \ + __set_fixmap(idx, phys, PAGE_KERNEL) +/* + * Some hardware wants to get fixmapped without caching. + */ +#define set_fixmap_nocache(idx, phys) \ + __set_fixmap(idx, phys, PAGE_KERNEL_NOCACHE) +/* + * used by vmalloc.c. + * + * Leave one empty page between vmalloc'ed areas and + * the start of the fixmap, and leave one page empty + * at the top of mem.. + */ +#define FIXADDR_TOP (0xffffe000UL) +#define FIXADDR_SIZE (__end_of_fixed_addresses << PAGE_SHIFT) +#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE) + +#define __fix_to_virt(x) (FIXADDR_TOP - ((x) << PAGE_SHIFT)) + +extern void __this_fixmap_does_not_exist(void); + +/* + * 'index to address' translation. If anyone tries to use the idx + * directly without tranlation, we catch the bug with a NULL-deference + * kernel oops. Illegal ranges of incoming indices are caught too. + */ +static inline unsigned long fix_to_virt(const unsigned int idx) +{ + /* + * this branch gets completely eliminated after inlining, + * except when someone tries to use fixaddr indices in an + * illegal way. (such as mixing up address types or using + * out-of-range indices). + * + * If it doesn't get removed, the linker will complain + * loudly with a reasonably clear error message.. + */ + if (idx >= __end_of_fixed_addresses) + __this_fixmap_does_not_exist(); + + return __fix_to_virt(idx); +} + +#endif diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/floppy.h b/xenolinux-2.4.16-sparse/include/asm-xeno/floppy.h new file mode 100644 index 0000000000..62f9fd831c --- /dev/null +++ b/xenolinux-2.4.16-sparse/include/asm-xeno/floppy.h @@ -0,0 +1,320 @@ +/* + * Architecture specific parts of the Floppy driver + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 1995 + */ +#ifndef __ASM_I386_FLOPPY_H +#define __ASM_I386_FLOPPY_H + +#include + + +/* + * The DMA channel used by the floppy controller cannot access data at + * addresses >= 16MB + * + * Went back to the 1MB limit, as some people had problems with the floppy + * driver otherwise. It doesn't matter much for performance anyway, as most + * floppy accesses go through the track buffer. + */ +#define _CROSS_64KB(a,s,vdma) \ +(!vdma && ((unsigned long)(a)/K_64 != ((unsigned long)(a) + (s) - 1) / K_64)) + +#define CROSS_64KB(a,s) _CROSS_64KB(a,s,use_virtual_dma & 1) + + +#define SW fd_routine[use_virtual_dma&1] +#define CSW fd_routine[can_use_virtual_dma & 1] + + +#define fd_inb(port) inb_p(port) +#define fd_outb(port,value) outb_p(port,value) + +#define fd_request_dma() CSW._request_dma(FLOPPY_DMA,"floppy") +#define fd_free_dma() CSW._free_dma(FLOPPY_DMA) +#define fd_enable_irq() enable_irq(FLOPPY_IRQ) +#define fd_disable_irq() disable_irq(FLOPPY_IRQ) +#define fd_free_irq() free_irq(FLOPPY_IRQ, NULL) +#define fd_get_dma_residue() SW._get_dma_residue(FLOPPY_DMA) +#define fd_dma_mem_alloc(size) SW._dma_mem_alloc(size) +#define fd_dma_setup(addr, size, mode, io) SW._dma_setup(addr, size, mode, io) + +#define FLOPPY_CAN_FALLBACK_ON_NODMA + +static int virtual_dma_count; +static int virtual_dma_residue; +static char *virtual_dma_addr; +static int virtual_dma_mode; +static int doing_pdma; + +static void floppy_hardint(int irq, void *dev_id, struct pt_regs * regs) +{ + register unsigned char st; + +#undef TRACE_FLPY_INT +#define NO_FLOPPY_ASSEMBLER + +#ifdef TRACE_FLPY_INT + static int calls=0; + static int bytes=0; + static int dma_wait=0; +#endif + if(!doing_pdma) { + floppy_interrupt(irq, dev_id, regs); + return; + } + +#ifdef TRACE_FLPY_INT + if(!calls) + bytes = virtual_dma_count; +#endif + +#ifndef NO_FLOPPY_ASSEMBLER + __asm__ ( + "testl %1,%1 + je 3f +1: inb %w4,%b0 + andb $160,%b0 + cmpb $160,%b0 + jne 2f + incw %w4 + testl %3,%3 + jne 4f + inb %w4,%b0 + movb %0,(%2) + jmp 5f +4: movb (%2),%0 + outb %b0,%w4 +5: decw %w4 + outb %0,$0x80 + decl %1 + incl %2 + testl %1,%1 + jne 1b +3: inb %w4,%b0 +2: " + : "=a" ((char) st), + "=c" ((long) virtual_dma_count), + "=S" ((long) virtual_dma_addr) + : "b" ((long) virtual_dma_mode), + "d" ((short) virtual_dma_port+4), + "1" ((long) virtual_dma_count), + "2" ((long) virtual_dma_addr)); +#else + { + register int lcount; + register char *lptr; + + st = 1; + for(lcount=virtual_dma_count, lptr=virtual_dma_addr; + lcount; lcount--, lptr++) { + st=inb(virtual_dma_port+4) & 0xa0 ; + if(st != 0xa0) + break; + if(virtual_dma_mode) + outb_p(*lptr, virtual_dma_port+5); + else + *lptr = inb_p(virtual_dma_port+5); + } + virtual_dma_count = lcount; + virtual_dma_addr = lptr; + st = inb(virtual_dma_port+4); + } +#endif + +#ifdef TRACE_FLPY_INT + calls++; +#endif + if(st == 0x20) + return; + if(!(st & 0x20)) { + virtual_dma_residue += virtual_dma_count; + virtual_dma_count=0; +#ifdef TRACE_FLPY_INT + printk("count=%x, residue=%x calls=%d bytes=%d dma_wait=%d\n", + virtual_dma_count, virtual_dma_residue, calls, bytes, + dma_wait); + calls = 0; + dma_wait=0; +#endif + doing_pdma = 0; + floppy_interrupt(irq, dev_id, regs); + return; + } +#ifdef TRACE_FLPY_INT + if(!virtual_dma_count) + dma_wait++; +#endif +} + +static void fd_disable_dma(void) +{ + if(! (can_use_virtual_dma & 1)) + disable_dma(FLOPPY_DMA); + doing_pdma = 0; + virtual_dma_residue += virtual_dma_count; + virtual_dma_count=0; +} + +static int vdma_request_dma(unsigned int dmanr, const char * device_id) +{ + return 0; +} + +static void vdma_nop(unsigned int dummy) +{ +} + + +static int vdma_get_dma_residue(unsigned int dummy) +{ + return virtual_dma_count + virtual_dma_residue; +} + + +static int fd_request_irq(void) +{ + if(can_use_virtual_dma) + return request_irq(FLOPPY_IRQ, floppy_hardint,SA_INTERRUPT, + "floppy", NULL); + else + return request_irq(FLOPPY_IRQ, floppy_interrupt, + SA_INTERRUPT|SA_SAMPLE_RANDOM, + "floppy", NULL); + +} + +static unsigned long dma_mem_alloc(unsigned long size) +{ + return __get_dma_pages(GFP_KERNEL,get_order(size)); +} + + +static unsigned long vdma_mem_alloc(unsigned long size) +{ + return (unsigned long) vmalloc(size); + +} + +#define nodma_mem_alloc(size) vdma_mem_alloc(size) + +static void _fd_dma_mem_free(unsigned long addr, unsigned long size) +{ + if((unsigned int) addr >= (unsigned int) high_memory) + return vfree((void *)addr); + else + free_pages(addr, get_order(size)); +} + +#define fd_dma_mem_free(addr, size) _fd_dma_mem_free(addr, size) + +static void _fd_chose_dma_mode(char *addr, unsigned long size) +{ + if(can_use_virtual_dma == 2) { + if((unsigned int) addr >= (unsigned int) high_memory || + virt_to_bus(addr) >= 0x1000000 || + _CROSS_64KB(addr, size, 0)) + use_virtual_dma = 1; + else + use_virtual_dma = 0; + } else { + use_virtual_dma = can_use_virtual_dma & 1; + } +} + +#define fd_chose_dma_mode(addr, size) _fd_chose_dma_mode(addr, size) + + +static int vdma_dma_setup(char *addr, unsigned long size, int mode, int io) +{ + doing_pdma = 1; + virtual_dma_port = io; + virtual_dma_mode = (mode == DMA_MODE_WRITE); + virtual_dma_addr = addr; + virtual_dma_count = size; + virtual_dma_residue = 0; + return 0; +} + +static int hard_dma_setup(char *addr, unsigned long size, int mode, int io) +{ +#ifdef FLOPPY_SANITY_CHECK + if (CROSS_64KB(addr, size)) { + printk("DMA crossing 64-K boundary %p-%p\n", addr, addr+size); + return -1; + } +#endif + /* actual, physical DMA */ + doing_pdma = 0; + clear_dma_ff(FLOPPY_DMA); + set_dma_mode(FLOPPY_DMA,mode); + set_dma_addr(FLOPPY_DMA,virt_to_bus(addr)); + set_dma_count(FLOPPY_DMA,size); + enable_dma(FLOPPY_DMA); + return 0; +} + +struct fd_routine_l { + int (*_request_dma)(unsigned int dmanr, const char * device_id); + void (*_free_dma)(unsigned int dmanr); + int (*_get_dma_residue)(unsigned int dummy); + unsigned long (*_dma_mem_alloc) (unsigned long size); + int (*_dma_setup)(char *addr, unsigned long size, int mode, int io); +} fd_routine[] = { + { + request_dma, + free_dma, + get_dma_residue, + dma_mem_alloc, + hard_dma_setup + }, + { + vdma_request_dma, + vdma_nop, + vdma_get_dma_residue, + vdma_mem_alloc, + vdma_dma_setup + } +}; + + +static int FDC1 = 0x3f0; +static int FDC2 = -1; + +/* + * Floppy types are stored in the rtc's CMOS RAM and so rtc_lock + * is needed to prevent corrupted CMOS RAM in case "insmod floppy" + * coincides with another rtc CMOS user. Paul G. + */ +#define FLOPPY0_TYPE ({ \ + unsigned long flags; \ + unsigned char val; \ + spin_lock_irqsave(&rtc_lock, flags); \ + val = (CMOS_READ(0x10) >> 4) & 15; \ + spin_unlock_irqrestore(&rtc_lock, flags); \ + val; \ +}) + +#define FLOPPY1_TYPE ({ \ + unsigned long flags; \ + unsigned char val; \ + spin_lock_irqsave(&rtc_lock, flags); \ + val = CMOS_READ(0x10) & 15; \ + spin_unlock_irqrestore(&rtc_lock, flags); \ + val; \ +}) + +#define N_FDC 2 +#define N_DRIVE 8 + +#define FLOPPY_MOTOR_MASK 0xf0 + +#define AUTO_DMA + +#define EXTRA_FLOPPY_PARAMS + +#endif /* __ASM_I386_FLOPPY_H */ diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/hardirq.h b/xenolinux-2.4.16-sparse/include/asm-xeno/hardirq.h new file mode 100644 index 0000000000..4acb4b09dd --- /dev/null +++ b/xenolinux-2.4.16-sparse/include/asm-xeno/hardirq.h @@ -0,0 +1,91 @@ +#ifndef __ASM_HARDIRQ_H +#define __ASM_HARDIRQ_H + +#include +#include +#include + +/* assembly code in softirq.h is sensitive to the offsets of these fields */ +typedef struct { + unsigned int __softirq_pending; + unsigned int __local_irq_count; + unsigned int __local_bh_count; + unsigned int __syscall_count; + struct task_struct * __ksoftirqd_task; /* waitqueue is too large */ + unsigned int __nmi_count; /* arch dependent */ +} ____cacheline_aligned irq_cpustat_t; + +#include /* Standard mappings for irq_cpustat_t above */ + +/* + * Are we in an interrupt context? Either doing bottom half + * or hardware interrupt processing? + */ +#define in_interrupt() ({ int __cpu = smp_processor_id(); \ + (local_irq_count(__cpu) + local_bh_count(__cpu) != 0); }) + +#define in_irq() (local_irq_count(smp_processor_id()) != 0) + +#ifndef CONFIG_SMP + +#define hardirq_trylock(cpu) (local_irq_count(cpu) == 0) +#define hardirq_endlock(cpu) do { } while (0) + +#define irq_enter(cpu, irq) (local_irq_count(cpu)++) +#define irq_exit(cpu, irq) (local_irq_count(cpu)--) + +#define synchronize_irq() barrier() + +#else + +#include +#include + +extern unsigned char global_irq_holder; +extern unsigned volatile long global_irq_lock; /* long for set_bit -RR */ + +static inline int irqs_running (void) +{ + int i; + + for (i = 0; i < smp_num_cpus; i++) + if (local_irq_count(i)) + return 1; + return 0; +} + +static inline void release_irqlock(int cpu) +{ + /* if we didn't own the irq lock, just ignore.. */ + if (global_irq_holder == (unsigned char) cpu) { + global_irq_holder = NO_PROC_ID; + clear_bit(0,&global_irq_lock); + } +} + +static inline void irq_enter(int cpu, int irq) +{ + ++local_irq_count(cpu); + + while (test_bit(0,&global_irq_lock)) { + cpu_relax(); + } +} + +static inline void irq_exit(int cpu, int irq) +{ + --local_irq_count(cpu); +} + +static inline int hardirq_trylock(int cpu) +{ + return !local_irq_count(cpu) && !test_bit(0,&global_irq_lock); +} + +#define hardirq_endlock(cpu) do { } while (0) + +extern void synchronize_irq(void); + +#endif /* CONFIG_SMP */ + +#endif /* __ASM_HARDIRQ_H */ diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/hdreg.h b/xenolinux-2.4.16-sparse/include/asm-xeno/hdreg.h new file mode 100644 index 0000000000..1ad5c07394 --- /dev/null +++ b/xenolinux-2.4.16-sparse/include/asm-xeno/hdreg.h @@ -0,0 +1,12 @@ +/* + * linux/include/asm-i386/hdreg.h + * + * Copyright (C) 1994-1996 Linus Torvalds & authors + */ + +#ifndef __ASMi386_HDREG_H +#define __ASMi386_HDREG_H + +typedef unsigned short ide_ioreg_t; + +#endif /* __ASMi386_HDREG_H */ diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/highmem.h b/xenolinux-2.4.16-sparse/include/asm-xeno/highmem.h new file mode 100644 index 0000000000..42f32426ea --- /dev/null +++ b/xenolinux-2.4.16-sparse/include/asm-xeno/highmem.h @@ -0,0 +1,129 @@ +/* + * highmem.h: virtual kernel memory mappings for high memory + * + * Used in CONFIG_HIGHMEM systems for memory pages which + * are not addressable by direct kernel virtual addresses. + * + * Copyright (C) 1999 Gerhard Wichert, Siemens AG + * Gerhard.Wichert@pdb.siemens.de + * + * + * Redesigned the x86 32-bit VM architecture to deal with + * up to 16 Terabyte physical memory. With current x86 CPUs + * we now support up to 64 Gigabytes physical RAM. + * + * Copyright (C) 1999 Ingo Molnar + */ + +#ifndef _ASM_HIGHMEM_H +#define _ASM_HIGHMEM_H + +#ifdef __KERNEL__ + +#include +#include +#include +#include +#include + +#ifdef CONFIG_DEBUG_HIGHMEM +#define HIGHMEM_DEBUG 1 +#else +#define HIGHMEM_DEBUG 0 +#endif + +/* declarations for highmem.c */ +extern unsigned long highstart_pfn, highend_pfn; + +extern pte_t *kmap_pte; +extern pgprot_t kmap_prot; +extern pte_t *pkmap_page_table; + +extern void kmap_init(void) __init; + +/* + * Right now we initialize only a single pte table. It can be extended + * easily, subsequent pte tables have to be allocated in one physical + * chunk of RAM. + */ +#define PKMAP_BASE (0xfe000000UL) +#ifdef CONFIG_X86_PAE +#define LAST_PKMAP 512 +#else +#define LAST_PKMAP 1024 +#endif +#define LAST_PKMAP_MASK (LAST_PKMAP-1) +#define PKMAP_NR(virt) ((virt-PKMAP_BASE) >> PAGE_SHIFT) +#define PKMAP_ADDR(nr) (PKMAP_BASE + ((nr) << PAGE_SHIFT)) + +extern void * FASTCALL(kmap_high(struct page *page)); +extern void FASTCALL(kunmap_high(struct page *page)); + +static inline void *kmap(struct page *page) +{ + if (in_interrupt()) + BUG(); + if (page < highmem_start_page) + return page_address(page); + return kmap_high(page); +} + +static inline void kunmap(struct page *page) +{ + if (in_interrupt()) + BUG(); + if (page < highmem_start_page) + return; + kunmap_high(page); +} + +/* + * The use of kmap_atomic/kunmap_atomic is discouraged - kmap/kunmap + * gives a more generic (and caching) interface. But kmap_atomic can + * be used in IRQ contexts, so in some (very limited) cases we need + * it. + */ +static inline void *kmap_atomic(struct page *page, enum km_type type) +{ + enum fixed_addresses idx; + unsigned long vaddr; + + if (page < highmem_start_page) + return page_address(page); + + idx = type + KM_TYPE_NR*smp_processor_id(); + vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); +#if HIGHMEM_DEBUG + if (!pte_none(*(kmap_pte-idx))) + BUG(); +#endif + set_pte(kmap_pte-idx, mk_pte(page, kmap_prot)); + __flush_tlb_one(vaddr); + + return (void*) vaddr; +} + +static inline void kunmap_atomic(void *kvaddr, enum km_type type) +{ +#if HIGHMEM_DEBUG + unsigned long vaddr = (unsigned long) kvaddr; + enum fixed_addresses idx = type + KM_TYPE_NR*smp_processor_id(); + + if (vaddr < FIXADDR_START) // FIXME + return; + + if (vaddr != __fix_to_virt(FIX_KMAP_BEGIN+idx)) + BUG(); + + /* + * force other mappings to Oops if they'll try to access + * this pte without first remap it + */ + pte_clear(kmap_pte-idx); + __flush_tlb_one(vaddr); +#endif +} + +#endif /* __KERNEL__ */ + +#endif /* _ASM_HIGHMEM_H */ diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/hw_irq.h b/xenolinux-2.4.16-sparse/include/asm-xeno/hw_irq.h new file mode 100644 index 0000000000..a7303f0946 --- /dev/null +++ b/xenolinux-2.4.16-sparse/include/asm-xeno/hw_irq.h @@ -0,0 +1,62 @@ +#ifndef _ASM_HW_IRQ_H +#define _ASM_HW_IRQ_H + +/* + * linux/include/asm/hw_irq.h + * + * (C) 1992, 1993 Linus Torvalds, (C) 1997 Ingo Molnar + */ + +#include +#include +#include +#include + +#define SYSCALL_VECTOR 0x80 +#define KDBENTER_VECTOR 0x81 + +extern int irq_vector[NR_IRQS]; + +extern atomic_t irq_err_count; +extern atomic_t irq_mis_count; + +extern char _stext, _etext; + +extern unsigned long prof_cpu_mask; +extern unsigned int * prof_buffer; +extern unsigned long prof_len; +extern unsigned long prof_shift; + +/* + * x86 profiling function, SMP safe. We might want to do this in + * assembly totally? + */ +static inline void x86_do_profile (unsigned long eip) +{ + if (!prof_buffer) + return; + + /* + * Only measure the CPUs specified by /proc/irq/prof_cpu_mask. + * (default is all CPUs.) + */ + if (!((1<>= prof_shift; + /* + * Don't ignore out-of-bounds EIP values silently, + * put them into the last histogram slot, so if + * present, they will show up as a sharp peak. + */ + if (eip > prof_len-1) + eip = prof_len-1; + atomic_inc((atomic_t *)&prof_buffer[eip]); +} + +static inline void hw_resend_irq(struct hw_interrupt_type *h, + unsigned int i) +{} + +#endif /* _ASM_HW_IRQ_H */ diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/hypervisor-ifs/block.h b/xenolinux-2.4.16-sparse/include/asm-xeno/hypervisor-ifs/block.h new file mode 100644 index 0000000000..55f7a33ce2 --- /dev/null +++ b/xenolinux-2.4.16-sparse/include/asm-xeno/hypervisor-ifs/block.h @@ -0,0 +1,40 @@ +/****************************************************************************** + * block.h + * + * Block IO communication rings. + * + * These are the ring data structures for buffering messages between + * the hypervisor and guestos's. + * + * For now we'll start with our own rings for the block IO code instead + * of using the network rings. Hopefully, this will give us additional + * flexibility in the future should we choose to move away from a + * ring producer consumer communication model. + */ + +#ifndef __BLOCK_H__ +#define __BLOCK_H__ + +typedef struct blk_tx_entry_st { + unsigned long addr; /* virtual address */ + unsigned long size; /* in bytes */ +} blk_tx_entry_t; + +typedef struct blk_rx_entry_st { + unsigned long addr; /* virtual address */ + unsigned long size; /* in bytes */ +} blk_rx_entry_t; + +typedef struct blk_ring_st { + blk_tx_entry_t *tx_ring; + unsigned int tx_prod, tx_cons, tx_event; + unsigned int tx_ring_size; + + blk_rx_entry_t *rx_ring; + unsigned int rx_prod, rx_cons, rx_event; + unsigned int rx_ring_size; +} blk_ring_t; + +int blk_create_ring(int domain, unsigned long ptr); + +#endif diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/hypervisor-ifs/hypervisor-if.h b/xenolinux-2.4.16-sparse/include/asm-xeno/hypervisor-ifs/hypervisor-if.h new file mode 100644 index 0000000000..6a362d3635 --- /dev/null +++ b/xenolinux-2.4.16-sparse/include/asm-xeno/hypervisor-ifs/hypervisor-if.h @@ -0,0 +1,271 @@ +/****************************************************************************** + * hypervisor-if.h + * + * Interface to Xeno hypervisor. + */ + +#include +#include + +#ifndef __HYPERVISOR_IF_H__ +#define __HYPERVISOR_IF_H__ + +typedef struct trap_info_st +{ + unsigned char vector; /* exception/interrupt vector */ + unsigned char dpl; /* privilege level */ + unsigned short cs; /* code selector */ + unsigned long address; /* code address */ +} trap_info_t; + + +typedef struct +{ +#define PGREQ_ADD_BASEPTR 0 +#define PGREQ_REMOVE_BASEPTR 1 + unsigned long ptr, val; /* *ptr = val */ +} page_update_request_t; + + +/* EAX = vector; EBX, ECX, EDX, ESI, EDI = args 1, 2, 3, 4, 5. */ + +#define __HYPERVISOR_set_trap_table 0 +#define __HYPERVISOR_pt_update 1 +#define __HYPERVISOR_console_write 2 +#define __HYPERVISOR_set_pagetable 3 +#define __HYPERVISOR_set_guest_stack 4 +#define __HYPERVISOR_net_update 5 +#define __HYPERVISOR_fpu_taskswitch 6 +#define __HYPERVISOR_yield 7 +#define __HYPERVISOR_exit 8 +#define __HYPERVISOR_dom0_op 9 +#define __HYPERVISOR_network_op 10 + +#define TRAP_INSTR "int $0x82" + + +static inline int HYPERVISOR_set_trap_table(trap_info_t *table) +{ + int ret; + __asm__ __volatile__ ( + TRAP_INSTR + : "=a" (ret) : "0" (__HYPERVISOR_set_trap_table), + "b" (table) ); + + return ret; +} + + +static inline int HYPERVISOR_pt_update(page_update_request_t *req, int count) +{ + int ret; + __asm__ __volatile__ ( + TRAP_INSTR + : "=a" (ret) : "0" (__HYPERVISOR_pt_update), + "b" (req), "c" (count) ); + + return ret; +} + + +static inline int HYPERVISOR_console_write(const char *str, int count) +{ + int ret; + __asm__ __volatile__ ( + TRAP_INSTR + : "=a" (ret) : "0" (__HYPERVISOR_console_write), + "b" (str), "c" (count) ); + + + return ret; +} + +static inline int HYPERVISOR_set_pagetable(unsigned long ptr) +{ + int ret; + __asm__ __volatile__ ( + TRAP_INSTR + : "=a" (ret) : "0" (__HYPERVISOR_set_pagetable), + "b" (ptr) ); + + return ret; +} + +static inline int HYPERVISOR_set_guest_stack( + unsigned long ss, unsigned long esp) +{ + int ret; + __asm__ __volatile__ ( + TRAP_INSTR + : "=a" (ret) : "0" (__HYPERVISOR_set_guest_stack), + "b" (ss), "c" (esp) ); + + return ret; +} + +static inline int HYPERVISOR_net_update(void) +{ + int ret; + __asm__ __volatile__ ( + TRAP_INSTR + : "=a" (ret) : "0" (__HYPERVISOR_net_update) ); + + return ret; +} + +static inline int HYPERVISOR_fpu_taskswitch(void) +{ + int ret; + __asm__ __volatile__ ( + TRAP_INSTR + : "=a" (ret) : "0" (__HYPERVISOR_fpu_taskswitch) ); + + return ret; +} + +static inline int HYPERVISOR_yield(void) +{ + int ret; + __asm__ __volatile__ ( + TRAP_INSTR + : "=a" (ret) : "0" (__HYPERVISOR_yield) ); + + return ret; +} + +static inline int HYPERVISOR_exit(void) +{ + int ret; + __asm__ __volatile__ ( + TRAP_INSTR + : "=a" (ret) : "0" (__HYPERVISOR_exit) ); + + return ret; +} + +static inline int HYPERVISOR_dom0_op(void *dom0_op) +{ + int ret; + __asm__ __volatile__ ( + TRAP_INSTR + : "=a" (ret) : "0" (__HYPERVISOR_dom0_op), + "b" (dom0_op) ); + + return ret; +} + +static inline int HYPERVISOR_network_op(void *network_op) +{ + int ret; + __asm__ __volatile__ ( + TRAP_INSTR + : "=a" (ret) : "0" (__HYPERVISOR_network_op), + "b" (network_op) ); + + return ret; +} + +/* Events that a guest OS may receive from the hypervisor. */ +#define EVENT_NET_TX 0x01 /* packets for transmission. */ +#define EVENT_NET_RX 0x02 /* empty buffers for receive. */ +#define EVENT_TIMER 0x04 /* a timeout has been updated. */ +#define EVENT_DIE 0x08 /* OS is about to be killed. Clean up please! */ +#define EVENT_BLK_TX 0x10 /* packets for transmission. */ +#define EVENT_BLK_RX 0x20 /* empty buffers for receive. */ + +/* Bit offsets, as opposed to the above masks. */ +#define _EVENT_NET_TX 0 +#define _EVENT_NET_RX 1 +#define _EVENT_TIMER 2 +#define _EVENT_DIE 3 +#define _EVENT_BLK_TX 4 +#define _EVENT_BLK_RX 5 + +/* + * NB. We expect that this struct is smaller than a page. + */ +typedef struct shared_info_st { + + /* Bitmask of outstanding event notifications hypervisor -> guest OS. */ + unsigned long events; + /* + * Hypervisor will only signal event delivery via the "callback + * exception" when this value is non-zero. Hypervisor clears this when + * notiying the guest OS -- thsi prevents unbounded reentrancy and + * stack overflow (in this way, acts as an interrupt-enable flag). + */ + unsigned long events_enable; + + /* + * Address for callbacks hypervisor -> guest OS. + * Stack frame looks like that of an interrupt. + * Code segment is the default flat selector. + * This handler will only be called when events_enable is non-zero. + */ + unsigned long event_address; + + /* + * Hypervisor uses this callback when it takes a fault on behalf of + * an application. This can happen when returning from interrupts for + * example: various faults can occur when reloading the segment + * registers, and executing 'iret'. + * This callback is provided with an extended stack frame, augmented + * with saved values for segment registers %ds and %es: + * %ds, %es, %eip, %cs, %eflags [, %oldesp, %oldss] + * Code segment is the default flat selector. + * FAULTS WHEN CALLING THIS HANDLER WILL TERMINATE THE DOMAIN!!! + */ + unsigned long failsafe_address; + + /* + * CPU ticks since start of day. + * `wall_time' counts CPU ticks in real time. + * `domain_time' counts CPU ticks during which this domain has run. + */ + unsigned long ticks_per_ms; /* CPU ticks per millisecond */ + /* + * Current wall_time can be found by rdtsc. Only possible use of + * variable below is that it provides a timestamp for last update + * of domain_time. + */ + unsigned long long wall_time; + unsigned long long domain_time; + + /* + * Timeouts for points at which guest OS would like a callback. + * This will probably be backed up by a timer heap in the guest OS. + * In Linux we use timeouts to update 'jiffies'. + */ + unsigned long long wall_timeout; + unsigned long long domain_timeout; + + /* + * Real-Time Clock. This shows time, in seconds, since 1.1.1980. + * The timestamp shows the CPU 'wall time' when RTC was last read. + * Thus it allows a mapping between 'real time' and 'wall time'. + */ + unsigned long rtc_time; + unsigned long long rtc_timestamp; + +} shared_info_t; + +/* + * NB. We expect that this struct is smaller than a page. + */ +typedef struct start_info_st { + unsigned long nr_pages; /* total pages allocated to this domain */ + shared_info_t *shared_info; /* start address of shared info struct */ + unsigned long pt_base; /* address of page directory */ + unsigned long phys_base; + unsigned long mod_start; /* start address of pre-loaded module */ + unsigned long mod_len; /* size (bytes) of pre-loaded module */ + net_ring_t *net_rings; + int num_net_rings; + blk_ring_t *blk_ring; /* block io communication rings */ + unsigned char cmd_line[1]; /* variable-length */ +} start_info_t; + +/* For use in guest OSes. */ +extern shared_info_t *HYPERVISOR_shared_info; + +#endif /* __HYPERVISOR_IF_H__ */ diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/hypervisor-ifs/network.h b/xenolinux-2.4.16-sparse/include/asm-xeno/hypervisor-ifs/network.h new file mode 100644 index 0000000000..c10bef0d4c --- /dev/null +++ b/xenolinux-2.4.16-sparse/include/asm-xeno/hypervisor-ifs/network.h @@ -0,0 +1,129 @@ +/****************************************************************************** + * network.h + * + * ring data structures for buffering messages between hypervisor and + * guestos's. As it stands this is only used for network buffer exchange. + * + * This file also contains structures and interfaces for the per-domain + * routing/filtering tables in the hypervisor. + * + */ + +#ifndef __RING_H__ +#define __RING_H__ + +#include + +typedef struct tx_entry_st { + unsigned long addr; /* virtual address */ + unsigned long size; /* in bytes */ +} tx_entry_t; + +typedef struct rx_entry_st { + unsigned long addr; /* virtual address */ + unsigned long size; /* in bytes */ +} rx_entry_t; + +#define TX_RING_SIZE 1024 +#define RX_RING_SIZE 1024 +typedef struct net_ring_st { + /* + * Guest OS places packets into ring at tx_prod. + * Hypervisor removes at tx_cons. + * Ring is empty when tx_prod == tx_cons. + * Guest OS receives a DOMAIN_EVENT_NET_TX when tx_cons passes tx_event. + * Hypervisor may be prodded whenever tx_prod is updated, but this is + * only necessary when tx_cons == old_tx_prod (ie. transmitter stalled). + */ + tx_entry_t *tx_ring; + unsigned int tx_prod, tx_cons, tx_event; + unsigned int tx_ring_size; + /* + * Guest OS places empty buffers into ring at rx_prod. + * Hypervisor fills buffers as rx_cons. + * Ring is empty when rx_prod == rx_cons. + * Guest OS receives a DOMAIN_EVENT_NET_RX when rx_cons passes rx_event. + * Hypervisor may be prodded whenever rx_prod is updated, but this is + * only necessary when rx_cons == old_rx_prod (ie. receiver stalled). + */ + rx_entry_t *rx_ring; + unsigned int rx_prod, rx_cons, rx_event; + unsigned int rx_ring_size; +} net_ring_t; + +/* net_vif_st is the larger struct that describes a virtual network interface + * it contains a pointer to the net_ring_t structure that needs to be on a + * shared page between the hypervisor and guest. The vif struct is private + * to the hypervisor and is used primarily as a container to allow routing + * and interface administration. This define should eventually be moved to + * a non-shared interface file, as it is of no relevance to the guest. + */ + +typedef struct net_vif_st { + net_ring_t *net_ring; + int id; + // rules table goes here in next revision. +} net_vif_t; + +/* VIF-related defines. */ +#define MAX_GUEST_VIFS 2 // each VIF is a small overhead in task_struct +#define MAX_SYSTEM_VIFS 256 // trying to avoid dynamic allocation + +/* vif globals */ +extern int sys_vif_count; + +/* This is here for consideration: Having a global lookup for vifs + * may make the guest /proc stuff more straight forward, and could + * be used in the routing code. I don't know if it warrants the + * overhead yet. + */ + +/* net_vif_t sys_vif_list[MAX_SYSTEM_VIFS]; */ + +/* Specify base of per-domain array. Get returned free slot in the array. */ +net_ring_t *create_net_vif(int domain); + +/* Packet routing/filtering code follows: + */ + +#define NETWORK_ACTION_DROP 0 +#define NETWORK_ACTION_PASS 1 + +typedef struct net_rule_st +{ + u32 src_addr; + u32 dst_addr; + u16 src_port; + u16 dst_port; + u32 src_addr_mask; + u32 dst_addr_mask; + u16 src_port_mask; + u16 dst_port_mask; + + int src_interface; + int dst_interface; + int action; +} net_rule_t; + +/* Network trap operations and associated structure. + * This presently just handles rule insertion and deletion, but will + * evenually have code to add and remove interfaces. + */ + +#define NETWORK_OP_ADDRULE 0 +#define NETWORK_OP_DELETERULE 1 + +typedef struct network_op_st +{ + unsigned long cmd; + union + { + net_rule_t net_rule; + } + u; +} network_op_t; + +/* Drop a new rule down to the network tables. */ +int add_net_rule(net_rule_t *rule); + +#endif diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/hypervisor.h b/xenolinux-2.4.16-sparse/include/asm-xeno/hypervisor.h new file mode 100644 index 0000000000..62f5b6d023 --- /dev/null +++ b/xenolinux-2.4.16-sparse/include/asm-xeno/hypervisor.h @@ -0,0 +1,40 @@ +/****************************************************************************** + * hypervisor.h + * + * Linux-specific hypervisor handling. + * + * Copyright (c) 2002, K A Fraser + */ + +#ifndef __HYPERVISOR_H__ +#define __HYPERVISOR_H__ + +#include +#include + +/* arch/xeno/kernel/setup.c */ +union start_info_union +{ + start_info_t start_info; + char padding[512]; +}; +extern union start_info_union start_info_union; +#define start_info (start_info_union.start_info) + +/* arch/xeno/kernel/hypervisor.c */ +void do_hypervisor_callback(struct pt_regs *regs); + +/* arch/xeno/mm/hypervisor.c */ +/* + * NB. ptr values should be fake-physical. 'vals' should be alread + * fully adjusted (ie. for start_info.phys_base). + */ +void queue_l1_entry_update(unsigned long ptr, unsigned long val); +void queue_l2_entry_update(unsigned long ptr, unsigned long val); +void queue_baseptr_create(unsigned long ptr); +void queue_baseptr_remove(unsigned long ptr); +void queue_tlb_flush(void); +void queue_tlb_flush_one(unsigned long ptr); +void flush_page_update_queue(void); + +#endif /* __HYPERVISOR_H__ */ diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/i387.h b/xenolinux-2.4.16-sparse/include/asm-xeno/i387.h new file mode 100644 index 0000000000..1cf8dc2ab3 --- /dev/null +++ b/xenolinux-2.4.16-sparse/include/asm-xeno/i387.h @@ -0,0 +1,88 @@ +/* + * include/asm-i386/i387.h + * + * Copyright (C) 1994 Linus Torvalds + * + * Pentium III FXSR, SSE support + * General FPU state handling cleanups + * Gareth Hughes , May 2000 + */ + +#ifndef __ASM_I386_I387_H +#define __ASM_I386_I387_H + +#include +#include +#include +#include + +extern void init_fpu(void); +/* + * FPU lazy state save handling... + */ +extern void save_init_fpu( struct task_struct *tsk ); +extern void restore_fpu( struct task_struct *tsk ); + +extern void kernel_fpu_begin(void); +#define kernel_fpu_end() stts() + + +#define unlazy_fpu( tsk ) do { \ + if ( tsk->flags & PF_USEDFPU ) \ + save_init_fpu( tsk ); \ +} while (0) + +#define clear_fpu( tsk ) do { \ + if ( tsk->flags & PF_USEDFPU ) { \ + asm volatile("fwait"); \ + tsk->flags &= ~PF_USEDFPU; \ + stts(); \ + } \ +} while (0) + +/* + * FPU state interaction... + */ +extern unsigned short get_fpu_cwd( struct task_struct *tsk ); +extern unsigned short get_fpu_swd( struct task_struct *tsk ); +extern unsigned short get_fpu_twd( struct task_struct *tsk ); +extern unsigned short get_fpu_mxcsr( struct task_struct *tsk ); + +extern void set_fpu_cwd( struct task_struct *tsk, unsigned short cwd ); +extern void set_fpu_swd( struct task_struct *tsk, unsigned short swd ); +extern void set_fpu_twd( struct task_struct *tsk, unsigned short twd ); +extern void set_fpu_mxcsr( struct task_struct *tsk, unsigned short mxcsr ); + +#define load_mxcsr( val ) do { \ + unsigned long __mxcsr = ((unsigned long)(val) & 0xffbf); \ + asm volatile( "ldmxcsr %0" : : "m" (__mxcsr) ); \ +} while (0) + +/* + * Signal frame handlers... + */ +extern int save_i387( struct _fpstate *buf ); +extern int restore_i387( struct _fpstate *buf ); + +/* + * ptrace request handers... + */ +extern int get_fpregs( struct user_i387_struct *buf, + struct task_struct *tsk ); +extern int set_fpregs( struct task_struct *tsk, + struct user_i387_struct *buf ); + +extern int get_fpxregs( struct user_fxsr_struct *buf, + struct task_struct *tsk ); +extern int set_fpxregs( struct task_struct *tsk, + struct user_fxsr_struct *buf ); + +/* + * FPU state for core dumps... + */ +extern int dump_fpu( struct pt_regs *regs, + struct user_i387_struct *fpu ); +extern int dump_extended_fpu( struct pt_regs *regs, + struct user_fxsr_struct *fpu ); + +#endif /* __ASM_I386_I387_H */ diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/ide.h b/xenolinux-2.4.16-sparse/include/asm-xeno/ide.h new file mode 100644 index 0000000000..6ac787665c --- /dev/null +++ b/xenolinux-2.4.16-sparse/include/asm-xeno/ide.h @@ -0,0 +1,116 @@ +/* + * linux/include/asm-i386/ide.h + * + * Copyright (C) 1994-1996 Linus Torvalds & authors + */ + +/* + * This file contains the i386 architecture specific IDE code. + */ + +#ifndef __ASMi386_IDE_H +#define __ASMi386_IDE_H + +#ifdef __KERNEL__ + +#include + +#ifndef MAX_HWIFS +# ifdef CONFIG_BLK_DEV_IDEPCI +#define MAX_HWIFS 10 +# else +#define MAX_HWIFS 6 +# endif +#endif + +#define ide__sti() __sti() + +static __inline__ int ide_default_irq(ide_ioreg_t base) +{ + switch (base) { + case 0x1f0: return 14; + case 0x170: return 15; + case 0x1e8: return 11; + case 0x168: return 10; + case 0x1e0: return 8; + case 0x160: return 12; + default: + return 0; + } +} + +static __inline__ ide_ioreg_t ide_default_io_base(int index) +{ + switch (index) { + case 0: return 0x1f0; + case 1: return 0x170; + case 2: return 0x1e8; + case 3: return 0x168; + case 4: return 0x1e0; + case 5: return 0x160; + default: + return 0; + } +} + +static __inline__ void ide_init_hwif_ports(hw_regs_t *hw, ide_ioreg_t data_port, ide_ioreg_t ctrl_port, int *irq) +{ + ide_ioreg_t reg = data_port; + int i; + + for (i = IDE_DATA_OFFSET; i <= IDE_STATUS_OFFSET; i++) { + hw->io_ports[i] = reg; + reg += 1; + } + if (ctrl_port) { + hw->io_ports[IDE_CONTROL_OFFSET] = ctrl_port; + } else { + hw->io_ports[IDE_CONTROL_OFFSET] = hw->io_ports[IDE_DATA_OFFSET] + 0x206; + } + if (irq != NULL) + *irq = 0; + hw->io_ports[IDE_IRQ_OFFSET] = 0; +} + +static __inline__ void ide_init_default_hwifs(void) +{ +#ifndef CONFIG_BLK_DEV_IDEPCI + hw_regs_t hw; + int index; + + for(index = 0; index < MAX_HWIFS; index++) { + ide_init_hwif_ports(&hw, ide_default_io_base(index), 0, NULL); + hw.irq = ide_default_irq(ide_default_io_base(index)); + ide_register_hw(&hw, NULL); + } +#endif /* CONFIG_BLK_DEV_IDEPCI */ +} + +typedef union { + unsigned all : 8; /* all of the bits together */ + struct { + unsigned head : 4; /* always zeros here */ + unsigned unit : 1; /* drive select number, 0 or 1 */ + unsigned bit5 : 1; /* always 1 */ + unsigned lba : 1; /* using LBA instead of CHS */ + unsigned bit7 : 1; /* always 1 */ + } b; + } select_t; + +#define ide_request_irq(irq,hand,flg,dev,id) request_irq((irq),(hand),(flg),(dev),(id)) +#define ide_free_irq(irq,dev_id) free_irq((irq), (dev_id)) +#define ide_check_region(from,extent) check_region((from), (extent)) +#define ide_request_region(from,extent,name) request_region((from), (extent), (name)) +#define ide_release_region(from,extent) release_region((from), (extent)) + +/* + * The following are not needed for the non-m68k ports + */ +#define ide_ack_intr(hwif) (1) +#define ide_fix_driveid(id) do {} while (0) +#define ide_release_lock(lock) do {} while (0) +#define ide_get_lock(lock, hdlr, data) do {} while (0) + +#endif /* __KERNEL__ */ + +#endif /* __ASMi386_IDE_H */ diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/init.h b/xenolinux-2.4.16-sparse/include/asm-xeno/init.h new file mode 100644 index 0000000000..17d2155741 --- /dev/null +++ b/xenolinux-2.4.16-sparse/include/asm-xeno/init.h @@ -0,0 +1 @@ +#error " should never be used - use instead" diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/io.h b/xenolinux-2.4.16-sparse/include/asm-xeno/io.h new file mode 100644 index 0000000000..250b64fac8 --- /dev/null +++ b/xenolinux-2.4.16-sparse/include/asm-xeno/io.h @@ -0,0 +1,311 @@ +#ifndef _ASM_IO_H +#define _ASM_IO_H + +#include + +/* + * This file contains the definitions for the x86 IO instructions + * inb/inw/inl/outb/outw/outl and the "string versions" of the same + * (insb/insw/insl/outsb/outsw/outsl). You can also use "pausing" + * versions of the single-IO instructions (inb_p/inw_p/..). + * + * This file is not meant to be obfuscating: it's just complicated + * to (a) handle it all in a way that makes gcc able to optimize it + * as well as possible and (b) trying to avoid writing the same thing + * over and over again with slight variations and possibly making a + * mistake somewhere. + */ + +/* + * Thanks to James van Artsdalen for a better timing-fix than + * the two short jumps: using outb's to a nonexistent port seems + * to guarantee better timings even on fast machines. + * + * On the other hand, I'd like to be sure of a non-existent port: + * I feel a bit unsafe about using 0x80 (should be safe, though) + * + * Linus + */ + + /* + * Bit simplified and optimized by Jan Hubicka + * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999. + * + * isa_memset_io, isa_memcpy_fromio, isa_memcpy_toio added, + * isa_read[wl] and isa_write[wl] fixed + * - Arnaldo Carvalho de Melo + */ + +#define IO_SPACE_LIMIT 0xffff + +#define XQUAD_PORTIO_BASE 0xfe400000 +#define XQUAD_PORTIO_LEN 0x40000 /* 256k per quad. Only remapping 1st */ + +#ifdef __KERNEL__ + +#include + +/* + * Temporary debugging check to catch old code using + * unmapped ISA addresses. Will be removed in 2.4. + */ +#if CONFIG_DEBUG_IOVIRT + extern void *__io_virt_debug(unsigned long x, const char *file, int line); + extern unsigned long __io_phys_debug(unsigned long x, const char *file, int line); + #define __io_virt(x) __io_virt_debug((unsigned long)(x), __FILE__, __LINE__) +//#define __io_phys(x) __io_phys_debug((unsigned long)(x), __FILE__, __LINE__) +#else + #define __io_virt(x) ((void *)(x)) +//#define __io_phys(x) __pa(x) +#endif + +/* + * Change virtual addresses to physical addresses and vv. + * These are pretty trivial + */ +static inline unsigned long virt_to_phys(volatile void * address) +{ + return __pa(address); +} + +static inline void * phys_to_virt(unsigned long address) +{ + return __va(address); +} + +/* + * Change "struct page" to physical address. + */ +#define page_to_phys(page) ((page - mem_map) << PAGE_SHIFT) + +/* + * IO bus memory addresses are also 1:1 with the physical address + */ +#define virt_to_bus virt_to_phys +#define bus_to_virt phys_to_virt +#define page_to_bus page_to_phys + +/* + * readX/writeX() are used to access memory mapped devices. On some + * architectures the memory mapped IO stuff needs to be accessed + * differently. On the x86 architecture, we just read/write the + * memory location directly. + */ + +#define readb(addr) (*(volatile unsigned char *) __io_virt(addr)) +#define readw(addr) (*(volatile unsigned short *) __io_virt(addr)) +#define readl(addr) (*(volatile unsigned int *) __io_virt(addr)) +#define __raw_readb readb +#define __raw_readw readw +#define __raw_readl readl + +#define writeb(b,addr) (*(volatile unsigned char *) __io_virt(addr) = (b)) +#define writew(b,addr) (*(volatile unsigned short *) __io_virt(addr) = (b)) +#define writel(b,addr) (*(volatile unsigned int *) __io_virt(addr) = (b)) +#define __raw_writeb writeb +#define __raw_writew writew +#define __raw_writel writel + +#define memset_io(a,b,c) memset(__io_virt(a),(b),(c)) +#define memcpy_fromio(a,b,c) memcpy((a),__io_virt(b),(c)) +#define memcpy_toio(a,b,c) memcpy(__io_virt(a),(b),(c)) + +/* + * ISA space is 'always mapped' on a typical x86 system, no need to + * explicitly ioremap() it. The fact that the ISA IO space is mapped + * to PAGE_OFFSET is pure coincidence - it does not mean ISA values + * are physical addresses. The following constant pointer can be + * used as the IO-area pointer (it can be iounmapped as well, so the + * analogy with PCI is quite large): + */ +#define __ISA_IO_base ((char *)(PAGE_OFFSET)) + +#define isa_readb(a) readb(__ISA_IO_base + (a)) +#define isa_readw(a) readw(__ISA_IO_base + (a)) +#define isa_readl(a) readl(__ISA_IO_base + (a)) +#define isa_writeb(b,a) writeb(b,__ISA_IO_base + (a)) +#define isa_writew(w,a) writew(w,__ISA_IO_base + (a)) +#define isa_writel(l,a) writel(l,__ISA_IO_base + (a)) +#define isa_memset_io(a,b,c) memset_io(__ISA_IO_base + (a),(b),(c)) +#define isa_memcpy_fromio(a,b,c) memcpy_fromio((a),__ISA_IO_base + (b),(c)) +#define isa_memcpy_toio(a,b,c) memcpy_toio(__ISA_IO_base + (a),(b),(c)) + + +/* + * Again, i386 does not require mem IO specific function. + */ + +#define eth_io_copy_and_sum(a,b,c,d) eth_copy_and_sum((a),__io_virt(b),(c),(d)) +#define isa_eth_io_copy_and_sum(a,b,c,d) eth_copy_and_sum((a),__io_virt(__ISA_IO_base + (b)),(c),(d)) + +static inline int check_signature(unsigned long io_addr, + const unsigned char *signature, int length) +{ + int retval = 0; + do { + if (readb(io_addr) != *signature) + goto out; + io_addr++; + signature++; + length--; + } while (length); + retval = 1; +out: + return retval; +} + +static inline int isa_check_signature(unsigned long io_addr, + const unsigned char *signature, int length) +{ + int retval = 0; + do { + if (isa_readb(io_addr) != *signature) + goto out; + io_addr++; + signature++; + length--; + } while (length); + retval = 1; +out: + return retval; +} + +/* + * Cache management + * + * This needed for two cases + * 1. Out of order aware processors + * 2. Accidentally out of order processors (PPro errata #51) + */ + +#if defined(CONFIG_X86_OOSTORE) || defined(CONFIG_X86_PPRO_FENCE) + +static inline void flush_write_buffers(void) +{ + __asm__ __volatile__ ("lock; addl $0,0(%%esp)": : :"memory"); +} + +#define dma_cache_inv(_start,_size) flush_write_buffers() +#define dma_cache_wback(_start,_size) flush_write_buffers() +#define dma_cache_wback_inv(_start,_size) flush_write_buffers() + +#else + +/* Nothing to do */ + +#define dma_cache_inv(_start,_size) do { } while (0) +#define dma_cache_wback(_start,_size) do { } while (0) +#define dma_cache_wback_inv(_start,_size) do { } while (0) +#define flush_write_buffers() + +#endif + +#endif /* __KERNEL__ */ + +#ifdef SLOW_IO_BY_JUMPING +#define __SLOW_DOWN_IO "\njmp 1f\n1:\tjmp 1f\n1:" +#else +#define __SLOW_DOWN_IO "\noutb %%al,$0x80" +#endif + +#ifdef REALLY_SLOW_IO +#define __FULL_SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO +#else +#define __FULL_SLOW_DOWN_IO __SLOW_DOWN_IO +#endif + +#ifdef CONFIG_MULTIQUAD +extern void *xquad_portio; /* Where the IO area was mapped */ +#endif /* CONFIG_MULTIQUAD */ + +/* + * Talk about misusing macros.. + */ +#define __OUT1(s,x) \ +static inline void out##s(unsigned x value, unsigned short port) { + +#define __OUT2(s,s1,s2) \ +__asm__ __volatile__ ("out" #s " %" s1 "0,%" s2 "1" + +#ifdef CONFIG_MULTIQUAD +/* Make the default portio routines operate on quad 0 for now */ +#define __OUT(s,s1,x) \ +__OUT1(s##_local,x) __OUT2(s,s1,"w") : : "a" (value), "Nd" (port)); } \ +__OUT1(s##_p_local,x) __OUT2(s,s1,"w") __FULL_SLOW_DOWN_IO : : "a" (value), "Nd" (port));} \ +__OUTQ0(s,s,x) \ +__OUTQ0(s,s##_p,x) +#else +#define __OUT(s,s1,x) \ +__OUT1(s,x) __OUT2(s,s1,"w") : : "a" (value), "Nd" (port)); } \ +__OUT1(s##_p,x) __OUT2(s,s1,"w") __FULL_SLOW_DOWN_IO : : "a" (value), "Nd" (port));} +#endif /* CONFIG_MULTIQUAD */ + +#ifdef CONFIG_MULTIQUAD +#define __OUTQ0(s,ss,x) /* Do the equivalent of the portio op on quad 0 */ \ +static inline void out##ss(unsigned x value, unsigned short port) { \ + if (xquad_portio) \ + write##s(value, (unsigned long) xquad_portio + port); \ + else /* We're still in early boot, running on quad 0 */ \ + out##ss##_local(value, port); \ +} + +#define __INQ0(s,ss) /* Do the equivalent of the portio op on quad 0 */ \ +static inline RETURN_TYPE in##ss(unsigned short port) { \ + if (xquad_portio) \ + return read##s((unsigned long) xquad_portio + port); \ + else /* We're still in early boot, running on quad 0 */ \ + return in##ss##_local(port); \ +} +#endif /* CONFIG_MULTIQUAD */ + +#define __IN1(s) \ +static inline RETURN_TYPE in##s(unsigned short port) { RETURN_TYPE _v; + +#define __IN2(s,s1,s2) \ +__asm__ __volatile__ ("in" #s " %" s2 "1,%" s1 "0" + +#ifdef CONFIG_MULTIQUAD +#define __IN(s,s1,i...) \ +__IN1(s##_local) __IN2(s,s1,"w") : "=a" (_v) : "Nd" (port) ,##i ); return _v; } \ +__IN1(s##_p_local) __IN2(s,s1,"w") __FULL_SLOW_DOWN_IO : "=a" (_v) : "Nd" (port) ,##i ); return _v; } \ +__INQ0(s,s) \ +__INQ0(s,s##_p) +#else +#define __IN(s,s1,i...) \ +__IN1(s) __IN2(s,s1,"w") : "=a" (_v) : "Nd" (port) ,##i ); return _v; } \ +__IN1(s##_p) __IN2(s,s1,"w") __FULL_SLOW_DOWN_IO : "=a" (_v) : "Nd" (port) ,##i ); return _v; } +#endif /* CONFIG_MULTIQUAD */ + +#define __INS(s) \ +static inline void ins##s(unsigned short port, void * addr, unsigned long count) \ +{ __asm__ __volatile__ ("rep ; ins" #s \ +: "=D" (addr), "=c" (count) : "d" (port),"0" (addr),"1" (count)); } + +#define __OUTS(s) \ +static inline void outs##s(unsigned short port, const void * addr, unsigned long count) \ +{ __asm__ __volatile__ ("rep ; outs" #s \ +: "=S" (addr), "=c" (count) : "d" (port),"0" (addr),"1" (count)); } + +#define RETURN_TYPE unsigned char +__IN(b,"") +#undef RETURN_TYPE +#define RETURN_TYPE unsigned short +__IN(w,"") +#undef RETURN_TYPE +#define RETURN_TYPE unsigned int +__IN(l,"") +#undef RETURN_TYPE + +__OUT(b,"b",char) +__OUT(w,"w",short) +__OUT(l,,int) + +__INS(b) +__INS(w) +__INS(l) + +__OUTS(b) +__OUTS(w) +__OUTS(l) + +#endif diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/io_apic.h b/xenolinux-2.4.16-sparse/include/asm-xeno/io_apic.h new file mode 100644 index 0000000000..b132819e08 --- /dev/null +++ b/xenolinux-2.4.16-sparse/include/asm-xeno/io_apic.h @@ -0,0 +1,147 @@ +#ifndef __ASM_IO_APIC_H +#define __ASM_IO_APIC_H + +#include +#include + +/* + * Intel IO-APIC support for SMP and UP systems. + * + * Copyright (C) 1997, 1998, 1999, 2000 Ingo Molnar + */ + +#ifdef CONFIG_X86_IO_APIC + +#define APIC_MISMATCH_DEBUG + +#define IO_APIC_BASE(idx) \ + ((volatile int *)__fix_to_virt(FIX_IO_APIC_BASE_0 + idx)) + +/* + * The structure of the IO-APIC: + */ +struct IO_APIC_reg_00 { + __u32 __reserved_2 : 24, + ID : 4, + __reserved_1 : 4; +} __attribute__ ((packed)); + +struct IO_APIC_reg_01 { + __u32 version : 8, + __reserved_2 : 7, + PRQ : 1, + entries : 8, + __reserved_1 : 8; +} __attribute__ ((packed)); + +struct IO_APIC_reg_02 { + __u32 __reserved_2 : 24, + arbitration : 4, + __reserved_1 : 4; +} __attribute__ ((packed)); + +/* + * # of IO-APICs and # of IRQ routing registers + */ +extern int nr_ioapics; +extern int nr_ioapic_registers[MAX_IO_APICS]; + +enum ioapic_irq_destination_types { + dest_Fixed = 0, + dest_LowestPrio = 1, + dest_SMI = 2, + dest__reserved_1 = 3, + dest_NMI = 4, + dest_INIT = 5, + dest__reserved_2 = 6, + dest_ExtINT = 7 +}; + +struct IO_APIC_route_entry { + __u32 vector : 8, + delivery_mode : 3, /* 000: FIXED + * 001: lowest prio + * 111: ExtINT + */ + dest_mode : 1, /* 0: physical, 1: logical */ + delivery_status : 1, + polarity : 1, + irr : 1, + trigger : 1, /* 0: edge, 1: level */ + mask : 1, /* 0: enabled, 1: disabled */ + __reserved_2 : 15; + + union { struct { __u32 + __reserved_1 : 24, + physical_dest : 4, + __reserved_2 : 4; + } physical; + + struct { __u32 + __reserved_1 : 24, + logical_dest : 8; + } logical; + } dest; + +} __attribute__ ((packed)); + +/* + * MP-BIOS irq configuration table structures: + */ + +/* I/O APIC entries */ +extern struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS]; + +/* # of MP IRQ source entries */ +extern int mp_irq_entries; + +/* MP IRQ source entries */ +extern struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES]; + +/* non-0 if default (table-less) MP configuration */ +extern int mpc_default_type; + +static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg) +{ + *IO_APIC_BASE(apic) = reg; + return *(IO_APIC_BASE(apic)+4); +} + +static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned int value) +{ + *IO_APIC_BASE(apic) = reg; + *(IO_APIC_BASE(apic)+4) = value; +} + +/* + * Re-write a value: to be used for read-modify-write + * cycles where the read already set up the index register. + */ +static inline void io_apic_modify(unsigned int apic, unsigned int value) +{ + *(IO_APIC_BASE(apic)+4) = value; +} + +/* + * Synchronize the IO-APIC and the CPU by doing + * a dummy read from the IO-APIC + */ +static inline void io_apic_sync(unsigned int apic) +{ + (void) *(IO_APIC_BASE(apic)+4); +} + +/* 1 if "noapic" boot option passed */ +extern int skip_ioapic_setup; + +/* + * If we use the IO-APIC for IRQ routing, disable automatic + * assignment of PCI IRQ's. + */ +#define io_apic_assign_pci_irqs (mp_irq_entries && !skip_ioapic_setup) + +#else /* !CONFIG_X86_IO_APIC */ +#define io_apic_assign_pci_irqs 0 +#endif + +#endif diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/ioctl.h b/xenolinux-2.4.16-sparse/include/asm-xeno/ioctl.h new file mode 100644 index 0000000000..c75f20ade6 --- /dev/null +++ b/xenolinux-2.4.16-sparse/include/asm-xeno/ioctl.h @@ -0,0 +1,75 @@ +/* $Id: ioctl.h,v 1.5 1993/07/19 21:53:50 root Exp root $ + * + * linux/ioctl.h for Linux by H.H. Bergman. + */ + +#ifndef _ASMI386_IOCTL_H +#define _ASMI386_IOCTL_H + +/* ioctl command encoding: 32 bits total, command in lower 16 bits, + * size of the parameter structure in the lower 14 bits of the + * upper 16 bits. + * Encoding the size of the parameter structure in the ioctl request + * is useful for catching programs compiled with old versions + * and to avoid overwriting user space outside the user buffer area. + * The highest 2 bits are reserved for indicating the ``access mode''. + * NOTE: This limits the max parameter size to 16kB -1 ! + */ + +/* + * The following is for compatibility across the various Linux + * platforms. The i386 ioctl numbering scheme doesn't really enforce + * a type field. De facto, however, the top 8 bits of the lower 16 + * bits are indeed used as a type field, so we might just as well make + * this explicit here. Please be sure to use the decoding macros + * below from now on. + */ +#define _IOC_NRBITS 8 +#define _IOC_TYPEBITS 8 +#define _IOC_SIZEBITS 14 +#define _IOC_DIRBITS 2 + +#define _IOC_NRMASK ((1 << _IOC_NRBITS)-1) +#define _IOC_TYPEMASK ((1 << _IOC_TYPEBITS)-1) +#define _IOC_SIZEMASK ((1 << _IOC_SIZEBITS)-1) +#define _IOC_DIRMASK ((1 << _IOC_DIRBITS)-1) + +#define _IOC_NRSHIFT 0 +#define _IOC_TYPESHIFT (_IOC_NRSHIFT+_IOC_NRBITS) +#define _IOC_SIZESHIFT (_IOC_TYPESHIFT+_IOC_TYPEBITS) +#define _IOC_DIRSHIFT (_IOC_SIZESHIFT+_IOC_SIZEBITS) + +/* + * Direction bits. + */ +#define _IOC_NONE 0U +#define _IOC_WRITE 1U +#define _IOC_READ 2U + +#define _IOC(dir,type,nr,size) \ + (((dir) << _IOC_DIRSHIFT) | \ + ((type) << _IOC_TYPESHIFT) | \ + ((nr) << _IOC_NRSHIFT) | \ + ((size) << _IOC_SIZESHIFT)) + +/* used to create numbers */ +#define _IO(type,nr) _IOC(_IOC_NONE,(type),(nr),0) +#define _IOR(type,nr,size) _IOC(_IOC_READ,(type),(nr),sizeof(size)) +#define _IOW(type,nr,size) _IOC(_IOC_WRITE,(type),(nr),sizeof(size)) +#define _IOWR(type,nr,size) _IOC(_IOC_READ|_IOC_WRITE,(type),(nr),sizeof(size)) + +/* used to decode ioctl numbers.. */ +#define _IOC_DIR(nr) (((nr) >> _IOC_DIRSHIFT) & _IOC_DIRMASK) +#define _IOC_TYPE(nr) (((nr) >> _IOC_TYPESHIFT) & _IOC_TYPEMASK) +#define _IOC_NR(nr) (((nr) >> _IOC_NRSHIFT) & _IOC_NRMASK) +#define _IOC_SIZE(nr) (((nr) >> _IOC_SIZESHIFT) & _IOC_SIZEMASK) + +/* ...and for the drivers/sound files... */ + +#define IOC_IN (_IOC_WRITE << _IOC_DIRSHIFT) +#define IOC_OUT (_IOC_READ << _IOC_DIRSHIFT) +#define IOC_INOUT ((_IOC_WRITE|_IOC_READ) << _IOC_DIRSHIFT) +#define IOCSIZE_MASK (_IOC_SIZEMASK << _IOC_SIZESHIFT) +#define IOCSIZE_SHIFT (_IOC_SIZESHIFT) + +#endif /* _ASMI386_IOCTL_H */ diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/ioctls.h b/xenolinux-2.4.16-sparse/include/asm-xeno/ioctls.h new file mode 100644 index 0000000000..9fc340a8aa --- /dev/null +++ b/xenolinux-2.4.16-sparse/include/asm-xeno/ioctls.h @@ -0,0 +1,82 @@ +#ifndef __ARCH_I386_IOCTLS_H__ +#define __ARCH_I386_IOCTLS_H__ + +#include + +/* 0x54 is just a magic number to make these relatively unique ('T') */ + +#define TCGETS 0x5401 +#define TCSETS 0x5402 +#define TCSETSW 0x5403 +#define TCSETSF 0x5404 +#define TCGETA 0x5405 +#define TCSETA 0x5406 +#define TCSETAW 0x5407 +#define TCSETAF 0x5408 +#define TCSBRK 0x5409 +#define TCXONC 0x540A +#define TCFLSH 0x540B +#define TIOCEXCL 0x540C +#define TIOCNXCL 0x540D +#define TIOCSCTTY 0x540E +#define TIOCGPGRP 0x540F +#define TIOCSPGRP 0x5410 +#define TIOCOUTQ 0x5411 +#define TIOCSTI 0x5412 +#define TIOCGWINSZ 0x5413 +#define TIOCSWINSZ 0x5414 +#define TIOCMGET 0x5415 +#define TIOCMBIS 0x5416 +#define TIOCMBIC 0x5417 +#define TIOCMSET 0x5418 +#define TIOCGSOFTCAR 0x5419 +#define TIOCSSOFTCAR 0x541A +#define FIONREAD 0x541B +#define TIOCINQ FIONREAD +#define TIOCLINUX 0x541C +#define TIOCCONS 0x541D +#define TIOCGSERIAL 0x541E +#define TIOCSSERIAL 0x541F +#define TIOCPKT 0x5420 +#define FIONBIO 0x5421 +#define TIOCNOTTY 0x5422 +#define TIOCSETD 0x5423 +#define TIOCGETD 0x5424 +#define TCSBRKP 0x5425 /* Needed for POSIX tcsendbreak() */ +#define TIOCTTYGSTRUCT 0x5426 /* For debugging only */ +#define TIOCSBRK 0x5427 /* BSD compatibility */ +#define TIOCCBRK 0x5428 /* BSD compatibility */ +#define TIOCGSID 0x5429 /* Return the session ID of FD */ +#define TIOCGPTN _IOR('T',0x30, unsigned int) /* Get Pty Number (of pty-mux device) */ +#define TIOCSPTLCK _IOW('T',0x31, int) /* Lock/unlock Pty */ + +#define FIONCLEX 0x5450 /* these numbers need to be adjusted. */ +#define FIOCLEX 0x5451 +#define FIOASYNC 0x5452 +#define TIOCSERCONFIG 0x5453 +#define TIOCSERGWILD 0x5454 +#define TIOCSERSWILD 0x5455 +#define TIOCGLCKTRMIOS 0x5456 +#define TIOCSLCKTRMIOS 0x5457 +#define TIOCSERGSTRUCT 0x5458 /* For debugging only */ +#define TIOCSERGETLSR 0x5459 /* Get line status register */ +#define TIOCSERGETMULTI 0x545A /* Get multiport config */ +#define TIOCSERSETMULTI 0x545B /* Set multiport config */ + +#define TIOCMIWAIT 0x545C /* wait for a change on serial input line(s) */ +#define TIOCGICOUNT 0x545D /* read serial port inline interrupt counts */ +#define TIOCGHAYESESP 0x545E /* Get Hayes ESP configuration */ +#define TIOCSHAYESESP 0x545F /* Set Hayes ESP configuration */ + +/* Used for packet mode */ +#define TIOCPKT_DATA 0 +#define TIOCPKT_FLUSHREAD 1 +#define TIOCPKT_FLUSHWRITE 2 +#define TIOCPKT_STOP 4 +#define TIOCPKT_START 8 +#define TIOCPKT_NOSTOP 16 +#define TIOCPKT_DOSTOP 32 + +#define TIOCSER_TEMT 0x01 /* Transmitter physically empty */ + +#endif diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/ipc.h b/xenolinux-2.4.16-sparse/include/asm-xeno/ipc.h new file mode 100644 index 0000000000..36f43063ad --- /dev/null +++ b/xenolinux-2.4.16-sparse/include/asm-xeno/ipc.h @@ -0,0 +1,31 @@ +#ifndef __i386_IPC_H__ +#define __i386_IPC_H__ + +/* + * These are used to wrap system calls on x86. + * + * See arch/i386/kernel/sys_i386.c for ugly details.. + */ +struct ipc_kludge { + struct msgbuf *msgp; + long msgtyp; +}; + +#define SEMOP 1 +#define SEMGET 2 +#define SEMCTL 3 +#define MSGSND 11 +#define MSGRCV 12 +#define MSGGET 13 +#define MSGCTL 14 +#define SHMAT 21 +#define SHMDT 22 +#define SHMGET 23 +#define SHMCTL 24 + +/* Used by the DIPC package, try and avoid reusing it */ +#define DIPC 25 + +#define IPCCALL(version,op) ((version)<<16 | (op)) + +#endif diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/ipcbuf.h b/xenolinux-2.4.16-sparse/include/asm-xeno/ipcbuf.h new file mode 100644 index 0000000000..0dcad4f84c --- /dev/null +++ b/xenolinux-2.4.16-sparse/include/asm-xeno/ipcbuf.h @@ -0,0 +1,29 @@ +#ifndef __i386_IPCBUF_H__ +#define __i386_IPCBUF_H__ + +/* + * The ipc64_perm structure for i386 architecture. + * Note extra padding because this structure is passed back and forth + * between kernel and user space. + * + * Pad space is left for: + * - 32-bit mode_t and seq + * - 2 miscellaneous 32-bit values + */ + +struct ipc64_perm +{ + __kernel_key_t key; + __kernel_uid32_t uid; + __kernel_gid32_t gid; + __kernel_uid32_t cuid; + __kernel_gid32_t cgid; + __kernel_mode_t mode; + unsigned short __pad1; + unsigned short seq; + unsigned short __pad2; + unsigned long __unused1; + unsigned long __unused2; +}; + +#endif /* __i386_IPCBUF_H__ */ diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/irq.h b/xenolinux-2.4.16-sparse/include/asm-xeno/irq.h new file mode 100644 index 0000000000..f5f65b8b82 --- /dev/null +++ b/xenolinux-2.4.16-sparse/include/asm-xeno/irq.h @@ -0,0 +1,30 @@ +#ifndef _ASM_IRQ_H +#define _ASM_IRQ_H + +/* + * linux/include/asm/irq.h + * + * (C) 1992, 1993 Linus Torvalds, (C) 1997 Ingo Molnar + * + * IRQ/IPI changes taken from work by Thomas Radke + * + */ + +#include +#include +#include + +#define NET_TX_IRQ _EVENT_NET_TX +#define NET_RX_IRQ _EVENT_NET_RX +#define TIMER_IRQ _EVENT_TIMER + +#define NR_IRQS (sizeof(HYPERVISOR_shared_info->events) * 8) + +#define irq_cannonicalize(_irq) (_irq) + +extern void disable_irq(unsigned int); +extern void disable_irq_nosync(unsigned int); +extern void enable_irq(unsigned int); +extern unsigned int do_IRQ(int, struct pt_regs *); + +#endif /* _ASM_IRQ_H */ diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/kdb.h b/xenolinux-2.4.16-sparse/include/asm-xeno/kdb.h new file mode 100644 index 0000000000..315bde8adf --- /dev/null +++ b/xenolinux-2.4.16-sparse/include/asm-xeno/kdb.h @@ -0,0 +1,62 @@ +/* + * Minimalist Kernel Debugger + * + * Copyright (C) 1999 Silicon Graphics, Inc. + * Copyright (C) Scott Lurndal (slurn@engr.sgi.com) + * Copyright (C) Scott Foehner (sfoehner@engr.sgi.com) + * Copyright (C) Srinivasa Thirumalachar (sprasad@engr.sgi.com) + * + * See the file LIA-COPYRIGHT for additional information. + * + * Written March 1999 by Scott Lurndal at Silicon Graphics, Inc. + * + * Modifications from: + * Richard Bass 1999/07/20 + * Many bug fixes and enhancements. + * Scott Foehner + * Port to ia64 + * Scott Lurndal 1999/12/12 + * v1.0 restructuring. + */ +#if !defined(_ASM_KDB_H) +#define _ASM_KDB_H + + /* + * KDB_ENTER() is a macro which causes entry into the kernel + * debugger from any point in the kernel code stream. If it + * is intended to be used from interrupt level, it must use + * a non-maskable entry method. + */ +#define KDB_ENTER() asm("\tint $129\n") + + /* + * Define the exception frame for this architeture + */ +struct pt_regs; +typedef struct pt_regs *kdb_eframe_t; + + /* + * Needed for exported symbols. + */ +typedef unsigned long kdb_machreg_t; + +#define kdb_machreg_fmt "0x%lx" +#define kdb_machreg_fmt0 "0x%08lx" +#define kdb_bfd_vma_fmt "0x%lx" +#define kdb_bfd_vma_fmt0 "0x%08lx" +#define kdb_elfw_addr_fmt "0x%x" +#define kdb_elfw_addr_fmt0 "0x%08x" + + /* + * Per cpu arch specific kdb state. Must be in range 0xff000000. + */ +#define KDB_STATE_A_IF 0x01000000 /* Saved IF flag */ + + /* + * Interface from kernel trap handling code to kernel debugger. + */ +extern int kdba_callback_die(struct pt_regs *, int, long, void*); +extern int kdba_callback_bp(struct pt_regs *, int, long, void*); +extern int kdba_callback_debug(struct pt_regs *, int, long, void *); + +#endif /* ASM_KDB_H */ diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/kdbprivate.h b/xenolinux-2.4.16-sparse/include/asm-xeno/kdbprivate.h new file mode 100644 index 0000000000..7bbd86b70a --- /dev/null +++ b/xenolinux-2.4.16-sparse/include/asm-xeno/kdbprivate.h @@ -0,0 +1,178 @@ +/* + * Minimalist Kernel Debugger + * + * Copyright (C) 1999 Silicon Graphics, Inc. + * Copyright (C) Scott Lurndal (slurn@engr.sgi.com) + * Copyright (C) Scott Foehner (sfoehner@engr.sgi.com) + * Copyright (C) Srinivasa Thirumalachar (sprasad@engr.sgi.com) + * + * See the file LIA-COPYRIGHT for additional information. + * + * Written March 1999 by Scott Lurndal at Silicon Graphics, Inc. + * + * Modifications from: + * Richard Bass 1999/07/20 + * Many bug fixes and enhancements. + * Scott Foehner + * Port to ia64 + * Scott Lurndal 1999/12/12 + * v1.0 restructuring. + * Keith Owens 2000/05/23 + * KDB v1.2 + */ +#if !defined(_ASM_KDBPRIVATE_H) +#define _ASM_KDBPRIVATE_H + +typedef unsigned char kdb_machinst_t; + + /* + * KDB_MAXBPT describes the total number of breakpoints + * supported by this architecure. + */ +#define KDB_MAXBPT 16 + /* + * KDB_MAXHARDBPT describes the total number of hardware + * breakpoint registers that exist. + */ +#define KDB_MAXHARDBPT 4 + /* + * Provide space for KDB_MAX_COMMANDS commands. + */ +#define KDB_MAX_COMMANDS 125 + + /* + * Platform specific environment entries + */ +#define KDB_PLATFORM_ENV "IDMODE=x86", "BYTESPERWORD=4", "IDCOUNT=16" + + /* + * Define the direction that the stack grows + */ +#define KDB_STACK_DIRECTION (-1) /* Stack grows down */ + + /* + * Support for ia32 debug registers + */ +typedef struct _kdbhard_bp { + kdb_machreg_t bph_reg; /* Register this breakpoint uses */ + + unsigned int bph_free:1; /* Register available for use */ + unsigned int bph_data:1; /* Data Access breakpoint */ + + unsigned int bph_write:1; /* Write Data breakpoint */ + unsigned int bph_mode:2; /* 0=inst, 1=write, 2=io, 3=read */ + unsigned int bph_length:2; /* 0=1, 1=2, 2=BAD, 3=4 (bytes) */ +} kdbhard_bp_t; + +extern kdbhard_bp_t kdb_hardbreaks[/* KDB_MAXHARDBPT */]; + +#define IA32_BREAKPOINT_INSTRUCTION 0xcc + +#define DR6_BT 0x00008000 +#define DR6_BS 0x00004000 +#define DR6_BD 0x00002000 + +#define DR6_B3 0x00000008 +#define DR6_B2 0x00000004 +#define DR6_B1 0x00000002 +#define DR6_B0 0x00000001 + +#define DR7_RW_VAL(dr, drnum) \ + (((dr) >> (16 + (4 * (drnum)))) & 0x3) + +#define DR7_RW_SET(dr, drnum, rw) \ + do { \ + (dr) &= ~(0x3 << (16 + (4 * (drnum)))); \ + (dr) |= (((rw) & 0x3) << (16 + (4 * (drnum)))); \ + } while (0) + +#define DR7_RW0(dr) DR7_RW_VAL(dr, 0) +#define DR7_RW0SET(dr,rw) DR7_RW_SET(dr, 0, rw) +#define DR7_RW1(dr) DR7_RW_VAL(dr, 1) +#define DR7_RW1SET(dr,rw) DR7_RW_SET(dr, 1, rw) +#define DR7_RW2(dr) DR7_RW_VAL(dr, 2) +#define DR7_RW2SET(dr,rw) DR7_RW_SET(dr, 2, rw) +#define DR7_RW3(dr) DR7_RW_VAL(dr, 3) +#define DR7_RW3SET(dr,rw) DR7_RW_SET(dr, 3, rw) + + +#define DR7_LEN_VAL(dr, drnum) \ + (((dr) >> (18 + (4 * (drnum)))) & 0x3) + +#define DR7_LEN_SET(dr, drnum, rw) \ + do { \ + (dr) &= ~(0x3 << (18 + (4 * (drnum)))); \ + (dr) |= (((rw) & 0x3) << (18 + (4 * (drnum)))); \ + } while (0) +#define DR7_LEN0(dr) DR7_LEN_VAL(dr, 0) +#define DR7_LEN0SET(dr,len) DR7_LEN_SET(dr, 0, len) +#define DR7_LEN1(dr) DR7_LEN_VAL(dr, 1) +#define DR7_LEN1SET(dr,len) DR7_LEN_SET(dr, 1, len) +#define DR7_LEN2(dr) DR7_LEN_VAL(dr, 2) +#define DR7_LEN2SET(dr,len) DR7_LEN_SET(dr, 2, len) +#define DR7_LEN3(dr) DR7_LEN_VAL(dr, 3) +#define DR7_LEN3SET(dr,len) DR7_LEN_SET(dr, 3, len) + +#define DR7_G0(dr) (((dr)>>1)&0x1) +#define DR7_G0SET(dr) ((dr) |= 0x2) +#define DR7_G0CLR(dr) ((dr) &= ~0x2) +#define DR7_G1(dr) (((dr)>>3)&0x1) +#define DR7_G1SET(dr) ((dr) |= 0x8) +#define DR7_G1CLR(dr) ((dr) &= ~0x8) +#define DR7_G2(dr) (((dr)>>5)&0x1) +#define DR7_G2SET(dr) ((dr) |= 0x20) +#define DR7_G2CLR(dr) ((dr) &= ~0x20) +#define DR7_G3(dr) (((dr)>>7)&0x1) +#define DR7_G3SET(dr) ((dr) |= 0x80) +#define DR7_G3CLR(dr) ((dr) &= ~0x80) + +#define DR7_L0(dr) (((dr))&0x1) +#define DR7_L0SET(dr) ((dr) |= 0x1) +#define DR7_L0CLR(dr) ((dr) &= ~0x1) +#define DR7_L1(dr) (((dr)>>2)&0x1) +#define DR7_L1SET(dr) ((dr) |= 0x4) +#define DR7_L1CLR(dr) ((dr) &= ~0x4) +#define DR7_L2(dr) (((dr)>>4)&0x1) +#define DR7_L2SET(dr) ((dr) |= 0x10) +#define DR7_L2CLR(dr) ((dr) &= ~0x10) +#define DR7_L3(dr) (((dr)>>6)&0x1) +#define DR7_L3SET(dr) ((dr) |= 0x40) +#define DR7_L3CLR(dr) ((dr) &= ~0x40) + +#define DR7_GD 0x00002000 /* General Detect Enable */ +#define DR7_GE 0x00000200 /* Global exact */ +#define DR7_LE 0x00000100 /* Local exact */ + +extern kdb_machreg_t kdba_getdr6(void); +extern void kdba_putdr6(kdb_machreg_t); + +extern kdb_machreg_t kdba_getdr7(void); + +extern kdb_machreg_t kdba_getdr(int); +extern void kdba_putdr(int, kdb_machreg_t); + +extern kdb_machreg_t kdb_getcr(int); + +#define KDB_HAVE_LONGJMP +#ifdef KDB_HAVE_LONGJMP +/* + * Support for setjmp/longjmp + */ +#define JB_BX 0 +#define JB_SI 1 +#define JB_DI 2 +#define JB_BP 3 +#define JB_SP 4 +#define JB_PC 5 + +typedef struct __kdb_jmp_buf { + unsigned long regs[6]; /* kdba_setjmp assumes fixed offsets here */ +} kdb_jmp_buf; + +extern int kdba_setjmp(kdb_jmp_buf *); +extern void kdba_longjmp(kdb_jmp_buf *, int); + +extern kdb_jmp_buf kdbjmpbuf[]; +#endif /* KDB_HAVE_LONGJMP */ + +#endif /* !_ASM_KDBPRIVATE_H */ diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/keyboard.h b/xenolinux-2.4.16-sparse/include/asm-xeno/keyboard.h new file mode 100644 index 0000000000..3b41967cbe --- /dev/null +++ b/xenolinux-2.4.16-sparse/include/asm-xeno/keyboard.h @@ -0,0 +1,73 @@ +/* + * linux/include/asm-i386/keyboard.h + * + * Created 3 Nov 1996 by Geert Uytterhoeven + */ + +/* + * This file contains the i386 architecture specific keyboard definitions + */ + +#ifndef _I386_KEYBOARD_H +#define _I386_KEYBOARD_H + +#ifdef __KERNEL__ + +#include +#include +#include +#include +#include + +#define KEYBOARD_IRQ 1 +#define DISABLE_KBD_DURING_INTERRUPTS 0 + +extern int pckbd_setkeycode(unsigned int scancode, unsigned int keycode); +extern int pckbd_getkeycode(unsigned int scancode); +extern int pckbd_translate(unsigned char scancode, unsigned char *keycode, + char raw_mode); +extern char pckbd_unexpected_up(unsigned char keycode); +extern void pckbd_leds(unsigned char leds); +extern void pckbd_init_hw(void); +extern int pckbd_pm_resume(struct pm_dev *, pm_request_t, void *); +extern pm_callback pm_kbd_request_override; +extern unsigned char pckbd_sysrq_xlate[128]; + +#define kbd_setkeycode pckbd_setkeycode +#define kbd_getkeycode pckbd_getkeycode +#define kbd_translate pckbd_translate +#define kbd_unexpected_up pckbd_unexpected_up +#define kbd_leds pckbd_leds +#define kbd_init_hw pckbd_init_hw +#define kbd_sysrq_xlate pckbd_sysrq_xlate + +#define SYSRQ_KEY 0x54 +#define E1_PAUSE 119 /* PAUSE key */ + +/* resource allocation */ +#define kbd_request_region() +#define kbd_request_irq(handler) request_irq(KEYBOARD_IRQ, handler, 0, \ + "keyboard", NULL) + +/* How to access the keyboard macros on this platform. */ +#define kbd_read_input() inb(KBD_DATA_REG) +#define kbd_read_status() inb(KBD_STATUS_REG) +#define kbd_write_output(val) outb(val, KBD_DATA_REG) +#define kbd_write_command(val) outb(val, KBD_CNTL_REG) + +/* Some stoneage hardware needs delays after some operations. */ +#define kbd_pause() do { } while(0) + +/* + * Machine specific bits for the PS/2 driver + */ + +#define AUX_IRQ 12 + +#define aux_request_irq(hand, dev_id) \ + request_irq(AUX_IRQ, hand, SA_SHIRQ, "PS/2 Mouse", dev_id) + +#define aux_free_irq(dev_id) free_irq(AUX_IRQ, dev_id) + +#endif /* __KERNEL__ */ +#endif /* _I386_KEYBOARD_H */ diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/kmap_types.h b/xenolinux-2.4.16-sparse/include/asm-xeno/kmap_types.h new file mode 100644 index 0000000000..5107c3db16 --- /dev/null +++ b/xenolinux-2.4.16-sparse/include/asm-xeno/kmap_types.h @@ -0,0 +1,13 @@ +#ifndef _ASM_KMAP_TYPES_H +#define _ASM_KMAP_TYPES_H + +enum km_type { + KM_BOUNCE_READ, + KM_SKB_DATA, + KM_SKB_DATA_SOFTIRQ, + KM_USER0, + KM_USER1, + KM_TYPE_NR +}; + +#endif diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/ldt.h b/xenolinux-2.4.16-sparse/include/asm-xeno/ldt.h new file mode 100644 index 0000000000..9d1110f984 --- /dev/null +++ b/xenolinux-2.4.16-sparse/include/asm-xeno/ldt.h @@ -0,0 +1,32 @@ +/* + * ldt.h + * + * Definitions of structures used with the modify_ldt system call. + */ +#ifndef _LINUX_LDT_H +#define _LINUX_LDT_H + +/* Maximum number of LDT entries supported. */ +#define LDT_ENTRIES 8192 +/* The size of each LDT entry. */ +#define LDT_ENTRY_SIZE 8 + +#ifndef __ASSEMBLY__ +struct modify_ldt_ldt_s { + unsigned int entry_number; + unsigned long base_addr; + unsigned int limit; + unsigned int seg_32bit:1; + unsigned int contents:2; + unsigned int read_exec_only:1; + unsigned int limit_in_pages:1; + unsigned int seg_not_present:1; + unsigned int useable:1; +}; + +#define MODIFY_LDT_CONTENTS_DATA 0 +#define MODIFY_LDT_CONTENTS_STACK 1 +#define MODIFY_LDT_CONTENTS_CODE 2 + +#endif /* !__ASSEMBLY__ */ +#endif diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/linux_logo.h b/xenolinux-2.4.16-sparse/include/asm-xeno/linux_logo.h new file mode 100644 index 0000000000..1e2fe6c899 --- /dev/null +++ b/xenolinux-2.4.16-sparse/include/asm-xeno/linux_logo.h @@ -0,0 +1,27 @@ +/* $Id: linux_logo.h,v 1.8 1998/07/30 16:30:24 jj Exp $ + * include/asm-i386/linux_logo.h: This is a linux logo + * to be displayed on boot. + * + * Copyright (C) 1996 Larry Ewing (lewing@isc.tamu.edu) + * Copyright (C) 1996 Jakub Jelinek (jj@sunsite.mff.cuni.cz) + * + * You can put anything here, but: + * LINUX_LOGO_COLORS has to be less than 224 + * image size has to be 80x80 + * values have to start from 0x20 + * (i.e. RGB(linux_logo_red[0], + * linux_logo_green[0], + * linux_logo_blue[0]) is color 0x20) + * BW image has to be 80x80 as well, with MS bit + * on the left + * Serial_console ascii image can be any size, + * but should contain %s to display the version + */ + +#include +#include + +#define linux_logo_banner "Linux/ia32 version " UTS_RELEASE + +#include + diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/locks.h b/xenolinux-2.4.16-sparse/include/asm-xeno/locks.h new file mode 100644 index 0000000000..ffcab0afb6 --- /dev/null +++ b/xenolinux-2.4.16-sparse/include/asm-xeno/locks.h @@ -0,0 +1,135 @@ +/* + * SMP locks primitives for building ix86 locks + * (not yet used). + * + * Alan Cox, alan@redhat.com, 1995 + */ + +/* + * This would be much easier but far less clear and easy + * to borrow for other processors if it was just assembler. + */ + +static __inline__ void prim_spin_lock(struct spinlock *sp) +{ + int processor=smp_processor_id(); + + /* + * Grab the lock bit + */ + + while(lock_set_bit(0,&sp->lock)) + { + /* + * Failed, but that's cos we own it! + */ + + if(sp->cpu==processor) + { + sp->users++; + return 0; + } + /* + * Spin in the cache S state if possible + */ + while(sp->lock) + { + /* + * Wait for any invalidates to go off + */ + + if(smp_invalidate_needed&(1<spins++; + } + /* + * Someone wrote the line, we go 'I' and get + * the cache entry. Now try to regrab + */ + } + sp->users++;sp->cpu=processor; + return 1; +} + +/* + * Release a spin lock + */ + +static __inline__ int prim_spin_unlock(struct spinlock *sp) +{ + /* This is safe. The decrement is still guarded by the lock. A multilock would + not be safe this way */ + if(!--sp->users) + { + sp->cpu= NO_PROC_ID;lock_clear_bit(0,&sp->lock); + return 1; + } + return 0; +} + + +/* + * Non blocking lock grab + */ + +static __inline__ int prim_spin_lock_nb(struct spinlock *sp) +{ + if(lock_set_bit(0,&sp->lock)) + return 0; /* Locked already */ + sp->users++; + return 1; /* We got the lock */ +} + + +/* + * These wrap the locking primitives up for usage + */ + +static __inline__ void spinlock(struct spinlock *sp) +{ + if(sp->prioritylock_order) + panic("lock order violation: %s (%d)\n", sp->name, current->lock_order); + if(prim_spin_lock(sp)) + { + /* + * We got a new lock. Update the priority chain + */ + sp->oldpri=current->lock_order; + current->lock_order=sp->priority; + } +} + +static __inline__ void spinunlock(struct spinlock *sp) +{ + int pri; + if(current->lock_order!=sp->priority) + panic("lock release order violation %s (%d)\n", sp->name, current->lock_order); + pri=sp->oldpri; + if(prim_spin_unlock(sp)) + { + /* + * Update the debugging lock priority chain. We dumped + * our last right to the lock. + */ + current->lock_order=sp->pri; + } +} + +static __inline__ void spintestlock(struct spinlock *sp) +{ + /* + * We do no sanity checks, it's legal to optimistically + * get a lower lock. + */ + prim_spin_lock_nb(sp); +} + +static __inline__ void spintestunlock(struct spinlock *sp) +{ + /* + * A testlock doesn't update the lock chain so we + * must not update it on free + */ + prim_spin_unlock(sp); +} diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/math_emu.h b/xenolinux-2.4.16-sparse/include/asm-xeno/math_emu.h new file mode 100644 index 0000000000..bc8421d5e5 --- /dev/null +++ b/xenolinux-2.4.16-sparse/include/asm-xeno/math_emu.h @@ -0,0 +1,35 @@ +#ifndef _I386_MATH_EMU_H +#define _I386_MATH_EMU_H + +#include + +int restore_i387_soft(void *s387, struct _fpstate *buf); +int save_i387_soft(void *s387, struct _fpstate * buf); + +/* This structure matches the layout of the data saved to the stack + following a device-not-present interrupt, part of it saved + automatically by the 80386/80486. + */ +struct info { + long ___orig_eip; + long ___ebx; + long ___ecx; + long ___edx; + long ___esi; + long ___edi; + long ___ebp; + long ___eax; + long ___ds; + long ___es; + long ___orig_eax; + long ___eip; + long ___cs; + long ___eflags; + long ___esp; + long ___ss; + long ___vm86_es; /* This and the following only in vm86 mode */ + long ___vm86_ds; + long ___vm86_fs; + long ___vm86_gs; +}; +#endif diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/mc146818rtc.h b/xenolinux-2.4.16-sparse/include/asm-xeno/mc146818rtc.h new file mode 100644 index 0000000000..d6e3009430 --- /dev/null +++ b/xenolinux-2.4.16-sparse/include/asm-xeno/mc146818rtc.h @@ -0,0 +1,29 @@ +/* + * Machine dependent access functions for RTC registers. + */ +#ifndef _ASM_MC146818RTC_H +#define _ASM_MC146818RTC_H + +#include + +#ifndef RTC_PORT +#define RTC_PORT(x) (0x70 + (x)) +#define RTC_ALWAYS_BCD 1 /* RTC operates in binary mode */ +#endif + +/* + * The yet supported machines all access the RTC index register via + * an ISA port access but the way to access the date register differs ... + */ +#define CMOS_READ(addr) ({ \ +outb_p((addr),RTC_PORT(0)); \ +inb_p(RTC_PORT(1)); \ +}) +#define CMOS_WRITE(val, addr) ({ \ +outb_p((addr),RTC_PORT(0)); \ +outb_p((val),RTC_PORT(1)); \ +}) + +#define RTC_IRQ 8 + +#endif /* _ASM_MC146818RTC_H */ diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/mca_dma.h b/xenolinux-2.4.16-sparse/include/asm-xeno/mca_dma.h new file mode 100644 index 0000000000..4b3b526c5a --- /dev/null +++ b/xenolinux-2.4.16-sparse/include/asm-xeno/mca_dma.h @@ -0,0 +1,202 @@ +#ifndef MCA_DMA_H +#define MCA_DMA_H + +#include +#include + +/* + * Microchannel specific DMA stuff. DMA on an MCA machine is fairly similar to + * standard PC dma, but it certainly has its quirks. DMA register addresses + * are in a different place and there are some added functions. Most of this + * should be pretty obvious on inspection. Note that the user must divide + * count by 2 when using 16-bit dma; that is not handled by these functions. + * + * Ramen Noodles are yummy. + * + * 1998 Tymm Twillman + */ + +/* + * Registers that are used by the DMA controller; FN is the function register + * (tell the controller what to do) and EXE is the execution register (how + * to do it) + */ + +#define MCA_DMA_REG_FN 0x18 +#define MCA_DMA_REG_EXE 0x1A + +/* + * Functions that the DMA controller can do + */ + +#define MCA_DMA_FN_SET_IO 0x00 +#define MCA_DMA_FN_SET_ADDR 0x20 +#define MCA_DMA_FN_GET_ADDR 0x30 +#define MCA_DMA_FN_SET_COUNT 0x40 +#define MCA_DMA_FN_GET_COUNT 0x50 +#define MCA_DMA_FN_GET_STATUS 0x60 +#define MCA_DMA_FN_SET_MODE 0x70 +#define MCA_DMA_FN_SET_ARBUS 0x80 +#define MCA_DMA_FN_MASK 0x90 +#define MCA_DMA_FN_RESET_MASK 0xA0 +#define MCA_DMA_FN_MASTER_CLEAR 0xD0 + +/* + * Modes (used by setting MCA_DMA_FN_MODE in the function register) + * + * Note that the MODE_READ is read from memory (write to device), and + * MODE_WRITE is vice-versa. + */ + +#define MCA_DMA_MODE_XFER 0x04 /* read by default */ +#define MCA_DMA_MODE_READ 0x04 /* same as XFER */ +#define MCA_DMA_MODE_WRITE 0x08 /* OR with MODE_XFER to use */ +#define MCA_DMA_MODE_IO 0x01 /* DMA from IO register */ +#define MCA_DMA_MODE_16 0x40 /* 16 bit xfers */ + + +/** + * mca_enable_dma - channel to enable DMA on + * @dmanr: DMA channel + * + * Enable the MCA bus DMA on a channel. This can be called from + * IRQ context. + */ + +static __inline__ void mca_enable_dma(unsigned int dmanr) +{ + outb(MCA_DMA_FN_RESET_MASK | dmanr, MCA_DMA_REG_FN); +} + +/** + * mca_disble_dma - channel to disable DMA on + * @dmanr: DMA channel + * + * Enable the MCA bus DMA on a channel. This can be called from + * IRQ context. + */ + +static __inline__ void mca_disable_dma(unsigned int dmanr) +{ + outb(MCA_DMA_FN_MASK | dmanr, MCA_DMA_REG_FN); +} + +/** + * mca_set_dma_addr - load a 24bit DMA address + * @dmanr: DMA channel + * @a: 24bit bus address + * + * Load the address register in the DMA controller. This has a 24bit + * limitation (16Mb). + */ + +static __inline__ void mca_set_dma_addr(unsigned int dmanr, unsigned int a) +{ + outb(MCA_DMA_FN_SET_ADDR | dmanr, MCA_DMA_REG_FN); + outb(a & 0xff, MCA_DMA_REG_EXE); + outb((a >> 8) & 0xff, MCA_DMA_REG_EXE); + outb((a >> 16) & 0xff, MCA_DMA_REG_EXE); +} + +/** + * mca_get_dma_addr - load a 24bit DMA address + * @dmanr: DMA channel + * + * Read the address register in the DMA controller. This has a 24bit + * limitation (16Mb). The return is a bus address. + */ + +static __inline__ unsigned int mca_get_dma_addr(unsigned int dmanr) +{ + unsigned int addr; + + outb(MCA_DMA_FN_GET_ADDR | dmanr, MCA_DMA_REG_FN); + addr = inb(MCA_DMA_REG_EXE); + addr |= inb(MCA_DMA_REG_EXE) << 8; + addr |= inb(MCA_DMA_REG_EXE) << 16; + + return addr; +} + +/** + * mca_set_dma_count - load a 16bit transfer count + * @dmanr: DMA channel + * @count: count + * + * Set the DMA count for this channel. This can be up to 64Kbytes. + * Setting a count of zero will not do what you expect. + */ + +static __inline__ void mca_set_dma_count(unsigned int dmanr, unsigned int count) +{ + count--; /* transfers one more than count -- correct for this */ + + outb(MCA_DMA_FN_SET_COUNT | dmanr, MCA_DMA_REG_FN); + outb(count & 0xff, MCA_DMA_REG_EXE); + outb((count >> 8) & 0xff, MCA_DMA_REG_EXE); +} + +/** + * mca_get_dma_residue - get the remaining bytes to transfer + * @dmanr: DMA channel + * + * This function returns the number of bytes left to transfer + * on this DMA channel. + */ + +static __inline__ unsigned int mca_get_dma_residue(unsigned int dmanr) +{ + unsigned short count; + + outb(MCA_DMA_FN_GET_COUNT | dmanr, MCA_DMA_REG_FN); + count = 1 + inb(MCA_DMA_REG_EXE); + count += inb(MCA_DMA_REG_EXE) << 8; + + return count; +} + +/** + * mca_set_dma_io - set the port for an I/O transfer + * @dmanr: DMA channel + * @io_addr: an I/O port number + * + * Unlike the ISA bus DMA controllers the DMA on MCA bus can transfer + * with an I/O port target. + */ + +static __inline__ void mca_set_dma_io(unsigned int dmanr, unsigned int io_addr) +{ + /* + * DMA from a port address -- set the io address + */ + + outb(MCA_DMA_FN_SET_IO | dmanr, MCA_DMA_REG_FN); + outb(io_addr & 0xff, MCA_DMA_REG_EXE); + outb((io_addr >> 8) & 0xff, MCA_DMA_REG_EXE); +} + +/** + * mca_set_dma_mode - set the DMA mode + * @dmanr: DMA channel + * @mode: mode to set + * + * The DMA controller supports several modes. The mode values you can + * set are : + * + * %MCA_DMA_MODE_READ when reading from the DMA device. + * + * %MCA_DMA_MODE_WRITE to writing to the DMA device. + * + * %MCA_DMA_MODE_IO to do DMA to or from an I/O port. + * + * %MCA_DMA_MODE_16 to do 16bit transfers. + * + */ + +static __inline__ void mca_set_dma_mode(unsigned int dmanr, unsigned int mode) +{ + outb(MCA_DMA_FN_SET_MODE | dmanr, MCA_DMA_REG_FN); + outb(mode, MCA_DMA_REG_EXE); +} + +#endif /* MCA_DMA_H */ diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/mman.h b/xenolinux-2.4.16-sparse/include/asm-xeno/mman.h new file mode 100644 index 0000000000..f953c436ce --- /dev/null +++ b/xenolinux-2.4.16-sparse/include/asm-xeno/mman.h @@ -0,0 +1,38 @@ +#ifndef __I386_MMAN_H__ +#define __I386_MMAN_H__ + +#define PROT_READ 0x1 /* page can be read */ +#define PROT_WRITE 0x2 /* page can be written */ +#define PROT_EXEC 0x4 /* page can be executed */ +#define PROT_NONE 0x0 /* page can not be accessed */ + +#define MAP_SHARED 0x01 /* Share changes */ +#define MAP_PRIVATE 0x02 /* Changes are private */ +#define MAP_TYPE 0x0f /* Mask for type of mapping */ +#define MAP_FIXED 0x10 /* Interpret addr exactly */ +#define MAP_ANONYMOUS 0x20 /* don't use a file */ + +#define MAP_GROWSDOWN 0x0100 /* stack-like segment */ +#define MAP_DENYWRITE 0x0800 /* ETXTBSY */ +#define MAP_EXECUTABLE 0x1000 /* mark it as an executable */ +#define MAP_LOCKED 0x2000 /* pages are locked */ +#define MAP_NORESERVE 0x4000 /* don't check for reservations */ + +#define MS_ASYNC 1 /* sync memory asynchronously */ +#define MS_INVALIDATE 2 /* invalidate the caches */ +#define MS_SYNC 4 /* synchronous memory sync */ + +#define MCL_CURRENT 1 /* lock all current mappings */ +#define MCL_FUTURE 2 /* lock all future mappings */ + +#define MADV_NORMAL 0x0 /* default page-in behavior */ +#define MADV_RANDOM 0x1 /* page-in minimum required */ +#define MADV_SEQUENTIAL 0x2 /* read-ahead aggressively */ +#define MADV_WILLNEED 0x3 /* pre-fault pages */ +#define MADV_DONTNEED 0x4 /* discard these pages */ + +/* compatibility flags */ +#define MAP_ANON MAP_ANONYMOUS +#define MAP_FILE 0 + +#endif /* __I386_MMAN_H__ */ diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/mmu.h b/xenolinux-2.4.16-sparse/include/asm-xeno/mmu.h new file mode 100644 index 0000000000..de014031cd --- /dev/null +++ b/xenolinux-2.4.16-sparse/include/asm-xeno/mmu.h @@ -0,0 +1,13 @@ +#ifndef __i386_MMU_H +#define __i386_MMU_H + +/* + * The i386 doesn't have a mmu context, but + * we put the segment information here. + */ +typedef struct { + void *segments; + unsigned long cpuvalid; +} mm_context_t; + +#endif diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/mmu_context.h b/xenolinux-2.4.16-sparse/include/asm-xeno/mmu_context.h new file mode 100644 index 0000000000..aecc68582d --- /dev/null +++ b/xenolinux-2.4.16-sparse/include/asm-xeno/mmu_context.h @@ -0,0 +1,70 @@ +#ifndef __I386_MMU_CONTEXT_H +#define __I386_MMU_CONTEXT_H + +#include +#include +#include +#include + +/* + * possibly do the LDT unload here? + */ +#define destroy_context(mm) do { } while(0) +#define init_new_context(tsk,mm) 0 + +#ifdef CONFIG_SMP + +static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk, unsigned cpu) +{ + if(cpu_tlbstate[cpu].state == TLBSTATE_OK) + cpu_tlbstate[cpu].state = TLBSTATE_LAZY; +} +#else +static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk, unsigned cpu) +{ +} +#endif + +extern pgd_t *cur_pgd; + +static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk, unsigned cpu) +{ + if (prev != next) { + /* stop flush ipis for the previous mm */ + clear_bit(cpu, &prev->cpu_vm_mask); + /* + * Re-load LDT if necessary + */ + if (prev->context.segments != next->context.segments) + load_LDT(next); +#ifdef CONFIG_SMP + cpu_tlbstate[cpu].state = TLBSTATE_OK; + cpu_tlbstate[cpu].active_mm = next; +#endif + set_bit(cpu, &next->cpu_vm_mask); + set_bit(cpu, &next->context.cpuvalid); + /* Re-load page tables */ + cur_pgd = next->pgd; + HYPERVISOR_set_pagetable(__pa(cur_pgd) + start_info.phys_base); + } +#ifdef CONFIG_SMP + else { + cpu_tlbstate[cpu].state = TLBSTATE_OK; + if(cpu_tlbstate[cpu].active_mm != next) + BUG(); + if(!test_and_set_bit(cpu, &next->cpu_vm_mask)) { + /* We were in lazy tlb mode and leave_mm disabled + * tlb flush IPI delivery. We must flush our tlb. + */ + local_flush_tlb(); + } + if (!test_and_set_bit(cpu, &next->context.cpuvalid)) + load_LDT(next); + } +#endif +} + +#define activate_mm(prev, next) \ + switch_mm((prev),(next),NULL,smp_processor_id()) + +#endif diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/mmx.h b/xenolinux-2.4.16-sparse/include/asm-xeno/mmx.h new file mode 100644 index 0000000000..46b71da998 --- /dev/null +++ b/xenolinux-2.4.16-sparse/include/asm-xeno/mmx.h @@ -0,0 +1,14 @@ +#ifndef _ASM_MMX_H +#define _ASM_MMX_H + +/* + * MMX 3Dnow! helper operations + */ + +#include + +extern void *_mmx_memcpy(void *to, const void *from, size_t size); +extern void mmx_clear_page(void *page); +extern void mmx_copy_page(void *to, void *from); + +#endif diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/module.h b/xenolinux-2.4.16-sparse/include/asm-xeno/module.h new file mode 100644 index 0000000000..a55217377a --- /dev/null +++ b/xenolinux-2.4.16-sparse/include/asm-xeno/module.h @@ -0,0 +1,12 @@ +#ifndef _ASM_I386_MODULE_H +#define _ASM_I386_MODULE_H +/* + * This file contains the i386 architecture specific module code. + */ + +#define module_map(x) vmalloc(x) +#define module_unmap(x) vfree(x) +#define module_arch_init(x) (0) +#define arch_init_modules(x) do { } while (0) + +#endif /* _ASM_I386_MODULE_H */ diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/mpspec.h b/xenolinux-2.4.16-sparse/include/asm-xeno/mpspec.h new file mode 100644 index 0000000000..b5719c0bb0 --- /dev/null +++ b/xenolinux-2.4.16-sparse/include/asm-xeno/mpspec.h @@ -0,0 +1,221 @@ +#ifndef __ASM_MPSPEC_H +#define __ASM_MPSPEC_H + +/* + * Structure definitions for SMP machines following the + * Intel Multiprocessing Specification 1.1 and 1.4. + */ + +/* + * This tag identifies where the SMP configuration + * information is. + */ + +#define SMP_MAGIC_IDENT (('_'<<24)|('P'<<16)|('M'<<8)|'_') + +/* + * a maximum of 16 APICs with the current APIC ID architecture. + */ +#ifdef CONFIG_MULTIQUAD +#define MAX_APICS 256 +#else /* !CONFIG_MULTIQUAD */ +#define MAX_APICS 16 +#endif /* CONFIG_MULTIQUAD */ + +#define MAX_MPC_ENTRY 1024 + +struct intel_mp_floating +{ + char mpf_signature[4]; /* "_MP_" */ + unsigned long mpf_physptr; /* Configuration table address */ + unsigned char mpf_length; /* Our length (paragraphs) */ + unsigned char mpf_specification;/* Specification version */ + unsigned char mpf_checksum; /* Checksum (makes sum 0) */ + unsigned char mpf_feature1; /* Standard or configuration ? */ + unsigned char mpf_feature2; /* Bit7 set for IMCR|PIC */ + unsigned char mpf_feature3; /* Unused (0) */ + unsigned char mpf_feature4; /* Unused (0) */ + unsigned char mpf_feature5; /* Unused (0) */ +}; + +struct mp_config_table +{ + char mpc_signature[4]; +#define MPC_SIGNATURE "PCMP" + unsigned short mpc_length; /* Size of table */ + char mpc_spec; /* 0x01 */ + char mpc_checksum; + char mpc_oem[8]; + char mpc_productid[12]; + unsigned long mpc_oemptr; /* 0 if not present */ + unsigned short mpc_oemsize; /* 0 if not present */ + unsigned short mpc_oemcount; + unsigned long mpc_lapic; /* APIC address */ + unsigned long reserved; +}; + +/* Followed by entries */ + +#define MP_PROCESSOR 0 +#define MP_BUS 1 +#define MP_IOAPIC 2 +#define MP_INTSRC 3 +#define MP_LINTSRC 4 +#define MP_TRANSLATION 192 /* Used by IBM NUMA-Q to describe node locality */ + +struct mpc_config_processor +{ + unsigned char mpc_type; + unsigned char mpc_apicid; /* Local APIC number */ + unsigned char mpc_apicver; /* Its versions */ + unsigned char mpc_cpuflag; +#define CPU_ENABLED 1 /* Processor is available */ +#define CPU_BOOTPROCESSOR 2 /* Processor is the BP */ + unsigned long mpc_cpufeature; +#define CPU_STEPPING_MASK 0x0F +#define CPU_MODEL_MASK 0xF0 +#define CPU_FAMILY_MASK 0xF00 + unsigned long mpc_featureflag; /* CPUID feature value */ + unsigned long mpc_reserved[2]; +}; + +struct mpc_config_bus +{ + unsigned char mpc_type; + unsigned char mpc_busid; + unsigned char mpc_bustype[6] __attribute((packed)); +}; + +/* List of Bus Type string values, Intel MP Spec. */ +#define BUSTYPE_EISA "EISA" +#define BUSTYPE_ISA "ISA" +#define BUSTYPE_INTERN "INTERN" /* Internal BUS */ +#define BUSTYPE_MCA "MCA" +#define BUSTYPE_VL "VL" /* Local bus */ +#define BUSTYPE_PCI "PCI" +#define BUSTYPE_PCMCIA "PCMCIA" +#define BUSTYPE_CBUS "CBUS" +#define BUSTYPE_CBUSII "CBUSII" +#define BUSTYPE_FUTURE "FUTURE" +#define BUSTYPE_MBI "MBI" +#define BUSTYPE_MBII "MBII" +#define BUSTYPE_MPI "MPI" +#define BUSTYPE_MPSA "MPSA" +#define BUSTYPE_NUBUS "NUBUS" +#define BUSTYPE_TC "TC" +#define BUSTYPE_VME "VME" +#define BUSTYPE_XPRESS "XPRESS" + +struct mpc_config_ioapic +{ + unsigned char mpc_type; + unsigned char mpc_apicid; + unsigned char mpc_apicver; + unsigned char mpc_flags; +#define MPC_APIC_USABLE 0x01 + unsigned long mpc_apicaddr; +}; + +struct mpc_config_intsrc +{ + unsigned char mpc_type; + unsigned char mpc_irqtype; + unsigned short mpc_irqflag; + unsigned char mpc_srcbus; + unsigned char mpc_srcbusirq; + unsigned char mpc_dstapic; + unsigned char mpc_dstirq; +}; + +enum mp_irq_source_types { + mp_INT = 0, + mp_NMI = 1, + mp_SMI = 2, + mp_ExtINT = 3 +}; + +#define MP_IRQDIR_DEFAULT 0 +#define MP_IRQDIR_HIGH 1 +#define MP_IRQDIR_LOW 3 + + +struct mpc_config_lintsrc +{ + unsigned char mpc_type; + unsigned char mpc_irqtype; + unsigned short mpc_irqflag; + unsigned char mpc_srcbusid; + unsigned char mpc_srcbusirq; + unsigned char mpc_destapic; +#define MP_APIC_ALL 0xFF + unsigned char mpc_destapiclint; +}; + +struct mp_config_oemtable +{ + char oem_signature[4]; +#define MPC_OEM_SIGNATURE "_OEM" + unsigned short oem_length; /* Size of table */ + char oem_rev; /* 0x01 */ + char oem_checksum; + char mpc_oem[8]; +}; + +struct mpc_config_translation +{ + unsigned char mpc_type; + unsigned char trans_len; + unsigned char trans_type; + unsigned char trans_quad; + unsigned char trans_global; + unsigned char trans_local; + unsigned short trans_reserved; +}; + +/* + * Default configurations + * + * 1 2 CPU ISA 82489DX + * 2 2 CPU EISA 82489DX neither IRQ 0 timer nor IRQ 13 DMA chaining + * 3 2 CPU EISA 82489DX + * 4 2 CPU MCA 82489DX + * 5 2 CPU ISA+PCI + * 6 2 CPU EISA+PCI + * 7 2 CPU MCA+PCI + */ + +#ifdef CONFIG_MULTIQUAD +#define MAX_IRQ_SOURCES 512 +#else /* !CONFIG_MULTIQUAD */ +#define MAX_IRQ_SOURCES 256 +#endif /* CONFIG_MULTIQUAD */ + +#define MAX_MP_BUSSES 32 +enum mp_bustype { + MP_BUS_ISA = 1, + MP_BUS_EISA, + MP_BUS_PCI, + MP_BUS_MCA +}; +extern int mp_bus_id_to_type [MAX_MP_BUSSES]; +extern int mp_bus_id_to_pci_bus [MAX_MP_BUSSES]; + +extern unsigned int boot_cpu_physical_apicid; +extern unsigned long phys_cpu_present_map; +extern int smp_found_config; +extern void find_smp_config (void); +extern void get_smp_config (void); +extern int nr_ioapics; +extern int apic_version [MAX_APICS]; +extern int mp_bus_id_to_type [MAX_MP_BUSSES]; +extern int mp_irq_entries; +extern struct mpc_config_intsrc mp_irqs [MAX_IRQ_SOURCES]; +extern int mpc_default_type; +extern int mp_bus_id_to_pci_bus [MAX_MP_BUSSES]; +extern int mp_current_pci_id; +extern unsigned long mp_lapic_addr; +extern int pic_mode; +extern int using_apic_timer; + +#endif + diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/msgbuf.h b/xenolinux-2.4.16-sparse/include/asm-xeno/msgbuf.h new file mode 100644 index 0000000000..b8d659c157 --- /dev/null +++ b/xenolinux-2.4.16-sparse/include/asm-xeno/msgbuf.h @@ -0,0 +1,31 @@ +#ifndef _I386_MSGBUF_H +#define _I386_MSGBUF_H + +/* + * The msqid64_ds structure for i386 architecture. + * Note extra padding because this structure is passed back and forth + * between kernel and user space. + * + * Pad space is left for: + * - 64-bit time_t to solve y2038 problem + * - 2 miscellaneous 32-bit values + */ + +struct msqid64_ds { + struct ipc64_perm msg_perm; + __kernel_time_t msg_stime; /* last msgsnd time */ + unsigned long __unused1; + __kernel_time_t msg_rtime; /* last msgrcv time */ + unsigned long __unused2; + __kernel_time_t msg_ctime; /* last change time */ + unsigned long __unused3; + unsigned long msg_cbytes; /* current number of bytes on queue */ + unsigned long msg_qnum; /* number of messages in queue */ + unsigned long msg_qbytes; /* max number of bytes on queue */ + __kernel_pid_t msg_lspid; /* pid of last msgsnd */ + __kernel_pid_t msg_lrpid; /* last receive pid */ + unsigned long __unused4; + unsigned long __unused5; +}; + +#endif /* _I386_MSGBUF_H */ diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/msr.h b/xenolinux-2.4.16-sparse/include/asm-xeno/msr.h new file mode 100644 index 0000000000..11bcb7f29e --- /dev/null +++ b/xenolinux-2.4.16-sparse/include/asm-xeno/msr.h @@ -0,0 +1,104 @@ +#ifndef __ASM_MSR_H +#define __ASM_MSR_H + +/* + * Access to machine-specific registers (available on 586 and better only) + * Note: the rd* operations modify the parameters directly (without using + * pointer indirection), this allows gcc to optimize better + */ + +#define rdmsr(msr,val1,val2) \ + __asm__ __volatile__("rdmsr" \ + : "=a" (val1), "=d" (val2) \ + : "c" (msr)) + +#define wrmsr(msr,val1,val2) \ + __asm__ __volatile__("wrmsr" \ + : /* no outputs */ \ + : "c" (msr), "a" (val1), "d" (val2)) + +#define rdtsc(low,high) \ + __asm__ __volatile__("rdtsc" : "=a" (low), "=d" (high)) + +#define rdtscl(low) \ + __asm__ __volatile__("rdtsc" : "=a" (low) : : "edx") + +#define rdtscll(val) \ + __asm__ __volatile__("rdtsc" : "=A" (val)) + +#define write_tsc(val1,val2) wrmsr(0x10, val1, val2) + +#define rdpmc(counter,low,high) \ + __asm__ __volatile__("rdpmc" \ + : "=a" (low), "=d" (high) \ + : "c" (counter)) + +/* symbolic names for some interesting MSRs */ +/* Intel defined MSRs. */ +#define MSR_IA32_P5_MC_ADDR 0 +#define MSR_IA32_P5_MC_TYPE 1 +#define MSR_IA32_PLATFORM_ID 0x17 +#define MSR_IA32_EBL_CR_POWERON 0x2a + +#define MSR_IA32_APICBASE 0x1b +#define MSR_IA32_APICBASE_BSP (1<<8) +#define MSR_IA32_APICBASE_ENABLE (1<<11) +#define MSR_IA32_APICBASE_BASE (0xfffff<<12) + +#define MSR_IA32_UCODE_WRITE 0x79 +#define MSR_IA32_UCODE_REV 0x8b + +#define MSR_IA32_PERFCTR0 0xc1 +#define MSR_IA32_PERFCTR1 0xc2 + +#define MSR_IA32_BBL_CR_CTL 0x119 + +#define MSR_IA32_MCG_CAP 0x179 +#define MSR_IA32_MCG_STATUS 0x17a +#define MSR_IA32_MCG_CTL 0x17b + +#define MSR_IA32_EVNTSEL0 0x186 +#define MSR_IA32_EVNTSEL1 0x187 + +#define MSR_IA32_DEBUGCTLMSR 0x1d9 +#define MSR_IA32_LASTBRANCHFROMIP 0x1db +#define MSR_IA32_LASTBRANCHTOIP 0x1dc +#define MSR_IA32_LASTINTFROMIP 0x1dd +#define MSR_IA32_LASTINTTOIP 0x1de + +#define MSR_IA32_MC0_CTL 0x400 +#define MSR_IA32_MC0_STATUS 0x401 +#define MSR_IA32_MC0_ADDR 0x402 +#define MSR_IA32_MC0_MISC 0x403 + +/* AMD Defined MSRs */ +#define MSR_K6_EFER 0xC0000080 +#define MSR_K6_STAR 0xC0000081 +#define MSR_K6_WHCR 0xC0000082 +#define MSR_K6_UWCCR 0xC0000085 +#define MSR_K6_PSOR 0xC0000087 +#define MSR_K6_PFIR 0xC0000088 + +#define MSR_K7_EVNTSEL0 0xC0010000 +#define MSR_K7_PERFCTR0 0xC0010004 + +/* Centaur-Hauls/IDT defined MSRs. */ +#define MSR_IDT_FCR1 0x107 +#define MSR_IDT_FCR2 0x108 +#define MSR_IDT_FCR3 0x109 +#define MSR_IDT_FCR4 0x10a + +#define MSR_IDT_MCR0 0x110 +#define MSR_IDT_MCR1 0x111 +#define MSR_IDT_MCR2 0x112 +#define MSR_IDT_MCR3 0x113 +#define MSR_IDT_MCR4 0x114 +#define MSR_IDT_MCR5 0x115 +#define MSR_IDT_MCR6 0x116 +#define MSR_IDT_MCR7 0x117 +#define MSR_IDT_MCR_CTRL 0x120 + +/* VIA Cyrix defined MSRs*/ +#define MSR_VIA_FCR 0x1107 + +#endif /* __ASM_MSR_H */ diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/mtrr.h b/xenolinux-2.4.16-sparse/include/asm-xeno/mtrr.h new file mode 100644 index 0000000000..ff3ea870d0 --- /dev/null +++ b/xenolinux-2.4.16-sparse/include/asm-xeno/mtrr.h @@ -0,0 +1,127 @@ +/* Generic MTRR (Memory Type Range Register) ioctls. + + Copyright (C) 1997-1999 Richard Gooch + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public + License as published by the Free Software Foundation; either + version 2 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with this library; if not, write to the Free + Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + + Richard Gooch may be reached by email at rgooch@atnf.csiro.au + The postal address is: + Richard Gooch, c/o ATNF, P. O. Box 76, Epping, N.S.W., 2121, Australia. +*/ +#ifndef _LINUX_MTRR_H +#define _LINUX_MTRR_H + +#include +#include + +#define MTRR_IOCTL_BASE 'M' + +struct mtrr_sentry +{ + unsigned long base; /* Base address */ + unsigned long size; /* Size of region */ + unsigned int type; /* Type of region */ +}; + +struct mtrr_gentry +{ + unsigned int regnum; /* Register number */ + unsigned long base; /* Base address */ + unsigned long size; /* Size of region */ + unsigned int type; /* Type of region */ +}; + +/* These are the various ioctls */ +#define MTRRIOC_ADD_ENTRY _IOW(MTRR_IOCTL_BASE, 0, struct mtrr_sentry) +#define MTRRIOC_SET_ENTRY _IOW(MTRR_IOCTL_BASE, 1, struct mtrr_sentry) +#define MTRRIOC_DEL_ENTRY _IOW(MTRR_IOCTL_BASE, 2, struct mtrr_sentry) +#define MTRRIOC_GET_ENTRY _IOWR(MTRR_IOCTL_BASE, 3, struct mtrr_gentry) +#define MTRRIOC_KILL_ENTRY _IOW(MTRR_IOCTL_BASE, 4, struct mtrr_sentry) +#define MTRRIOC_ADD_PAGE_ENTRY _IOW(MTRR_IOCTL_BASE, 5, struct mtrr_sentry) +#define MTRRIOC_SET_PAGE_ENTRY _IOW(MTRR_IOCTL_BASE, 6, struct mtrr_sentry) +#define MTRRIOC_DEL_PAGE_ENTRY _IOW(MTRR_IOCTL_BASE, 7, struct mtrr_sentry) +#define MTRRIOC_GET_PAGE_ENTRY _IOWR(MTRR_IOCTL_BASE, 8, struct mtrr_gentry) +#define MTRRIOC_KILL_PAGE_ENTRY _IOW(MTRR_IOCTL_BASE, 9, struct mtrr_sentry) + +/* These are the region types */ +#define MTRR_TYPE_UNCACHABLE 0 +#define MTRR_TYPE_WRCOMB 1 +/*#define MTRR_TYPE_ 2*/ +/*#define MTRR_TYPE_ 3*/ +#define MTRR_TYPE_WRTHROUGH 4 +#define MTRR_TYPE_WRPROT 5 +#define MTRR_TYPE_WRBACK 6 +#define MTRR_NUM_TYPES 7 + +#ifdef MTRR_NEED_STRINGS +static char *mtrr_strings[MTRR_NUM_TYPES] = +{ + "uncachable", /* 0 */ + "write-combining", /* 1 */ + "?", /* 2 */ + "?", /* 3 */ + "write-through", /* 4 */ + "write-protect", /* 5 */ + "write-back", /* 6 */ +}; +#endif + +#ifdef __KERNEL__ + +/* The following functions are for use by other drivers */ +# ifdef CONFIG_MTRR +extern int mtrr_add (unsigned long base, unsigned long size, + unsigned int type, char increment); +extern int mtrr_add_page (unsigned long base, unsigned long size, + unsigned int type, char increment); +extern int mtrr_del (int reg, unsigned long base, unsigned long size); +extern int mtrr_del_page (int reg, unsigned long base, unsigned long size); +extern void mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi); +# else +static __inline__ int mtrr_add (unsigned long base, unsigned long size, + unsigned int type, char increment) +{ + return -ENODEV; +} +static __inline__ int mtrr_add_page (unsigned long base, unsigned long size, + unsigned int type, char increment) +{ + return -ENODEV; +} +static __inline__ int mtrr_del (int reg, unsigned long base, + unsigned long size) +{ + return -ENODEV; +} +static __inline__ int mtrr_del_page (int reg, unsigned long base, + unsigned long size) +{ + return -ENODEV; +} + +static __inline__ void mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi) {;} + +# endif + +/* The following functions are for initialisation: don't use them! */ +extern int mtrr_init (void); +# if defined(CONFIG_SMP) && defined(CONFIG_MTRR) +extern void mtrr_init_boot_cpu (void); +extern void mtrr_init_secondary_cpu (void); +# endif + +#endif + +#endif /* _LINUX_MTRR_H */ diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/namei.h b/xenolinux-2.4.16-sparse/include/asm-xeno/namei.h new file mode 100644 index 0000000000..8148650886 --- /dev/null +++ b/xenolinux-2.4.16-sparse/include/asm-xeno/namei.h @@ -0,0 +1,17 @@ +/* $Id: namei.h,v 1.1 1996/12/13 14:48:21 jj Exp $ + * linux/include/asm-i386/namei.h + * + * Included from linux/fs/namei.c + */ + +#ifndef __I386_NAMEI_H +#define __I386_NAMEI_H + +/* This dummy routine maybe changed to something useful + * for /usr/gnemul/ emulation stuff. + * Look at asm-sparc/namei.h for details. + */ + +#define __emul_prefix() NULL + +#endif /* __I386_NAMEI_H */ diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/page.h b/xenolinux-2.4.16-sparse/include/asm-xeno/page.h new file mode 100644 index 0000000000..74c8824f94 --- /dev/null +++ b/xenolinux-2.4.16-sparse/include/asm-xeno/page.h @@ -0,0 +1,147 @@ +#ifndef _I386_PAGE_H +#define _I386_PAGE_H + +/* PAGE_SHIFT determines the page size */ +#define PAGE_SHIFT 12 +#define PAGE_SIZE (1UL << PAGE_SHIFT) +#define PAGE_MASK (~(PAGE_SIZE-1)) + +#ifdef __KERNEL__ +#ifndef __ASSEMBLY__ + +#include +#include + +#ifdef CONFIG_X86_USE_3DNOW + +#include + +#define clear_page(page) mmx_clear_page((void *)(page)) +#define copy_page(to,from) mmx_copy_page(to,from) + +#else + +/* + * On older X86 processors its not a win to use MMX here it seems. + * Maybe the K6-III ? + */ + +#define clear_page(page) memset((void *)(page), 0, PAGE_SIZE) +#define copy_page(to,from) memcpy((void *)(to), (void *)(from), PAGE_SIZE) + +#endif + +#define clear_user_page(page, vaddr) clear_page(page) +#define copy_user_page(to, from, vaddr) copy_page(to, from) + +/* + * These are used to make use of C type-checking.. + */ +#if CONFIG_X86_PAE +typedef struct { unsigned long pte_low, pte_high; } pte_t; +typedef struct { unsigned long long pmd; } pmd_t; +typedef struct { unsigned long long pgd; } pgd_t; +#define pte_val(x) ((x).pte_low | ((unsigned long long)(x).pte_high << 32)) +#else +typedef struct { unsigned long pte_low; } pte_t; +typedef struct { unsigned long pmd; } pmd_t; +typedef struct { unsigned long pgd; } pgd_t; +static inline unsigned long pte_val(pte_t x) +{ + unsigned long ret = x.pte_low; + if ( (ret & 1) ) ret -= start_info.phys_base; + return ret; +} +#endif +#define PTE_MASK PAGE_MASK + +typedef struct { unsigned long pgprot; } pgprot_t; + +static inline unsigned long pmd_val(pmd_t x) +{ + unsigned long ret = x.pmd; + if ( (ret & 1) ) ret -= start_info.phys_base; + return ret; +} +#define pgd_val(x) ({ BUG(); (unsigned long)0; }) +#define pgprot_val(x) ((x).pgprot) + +static inline pte_t __pte(unsigned long x) +{ + if ( (x & 1) ) x += start_info.phys_base; + return ((pte_t) { (x) }); +} +static inline pmd_t __pmd(unsigned long x) +{ + if ( (x & 1) ) x += start_info.phys_base; + return ((pmd_t) { (x) }); +} +#define __pgd(x) ({ BUG(); (pgprot_t) { 0 }; }) +#define __pgprot(x) ((pgprot_t) { (x) } ) + +#endif /* !__ASSEMBLY__ */ + +/* to align the pointer to the (next) page boundary */ +#define PAGE_ALIGN(addr) (((addr)+PAGE_SIZE-1)&PAGE_MASK) + +/* + * This handles the memory map.. We could make this a config + * option, but too many people screw it up, and too few need + * it. + * + * A __PAGE_OFFSET of 0xC0000000 means that the kernel has + * a virtual address space of one gigabyte, which limits the + * amount of physical memory you can use to about 950MB. + * + * If you want more physical memory than this then see the CONFIG_HIGHMEM4G + * and CONFIG_HIGHMEM64G options in the kernel configuration. + */ + +#define __PAGE_OFFSET (0xC0000000) + +#ifndef __ASSEMBLY__ + +/* + * Tell the user there is some problem. Beep too, so we can + * see^H^H^Hhear bugs in early bootup as well! + */ + +#ifdef CONFIG_DEBUG_BUGVERBOSE +extern void do_BUG(const char *file, int line); +#define BUG() do { \ + do_BUG(__FILE__, __LINE__); \ + __asm__ __volatile__("ud2"); \ +} while (0) +#else +#define BUG() __asm__ __volatile__(".byte 0x0f,0x0b") +#endif + +#define PAGE_BUG(page) do { \ + BUG(); \ +} while (0) + +/* Pure 2^n version of get_order */ +static __inline__ int get_order(unsigned long size) +{ + int order; + + size = (size-1) >> (PAGE_SHIFT-1); + order = -1; + do { + size >>= 1; + order++; + } while (size); + return order; +} + +#endif /* __ASSEMBLY__ */ + +#define PAGE_OFFSET ((unsigned long)__PAGE_OFFSET) +#define __pa(x) ((unsigned long)(x)-PAGE_OFFSET) +#define __va(x) ((void *)((unsigned long)(x)+PAGE_OFFSET)) +#define virt_to_page(kaddr) (mem_map + (__pa(kaddr) >> PAGE_SHIFT)) +#define VALID_PAGE(page) ((page - mem_map) < max_mapnr) + +#endif /* __KERNEL__ */ + +#endif /* _I386_PAGE_H */ diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/param.h b/xenolinux-2.4.16-sparse/include/asm-xeno/param.h new file mode 100644 index 0000000000..1b10bf49fe --- /dev/null +++ b/xenolinux-2.4.16-sparse/include/asm-xeno/param.h @@ -0,0 +1,24 @@ +#ifndef _ASMi386_PARAM_H +#define _ASMi386_PARAM_H + +#ifndef HZ +#define HZ 100 +#endif + +#define EXEC_PAGESIZE 4096 + +#ifndef NGROUPS +#define NGROUPS 32 +#endif + +#ifndef NOGROUP +#define NOGROUP (-1) +#endif + +#define MAXHOSTNAMELEN 64 /* max length of hostname */ + +#ifdef __KERNEL__ +# define CLOCKS_PER_SEC 100 /* frequency at which times() counts */ +#endif + +#endif diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/parport.h b/xenolinux-2.4.16-sparse/include/asm-xeno/parport.h new file mode 100644 index 0000000000..fa0e321e49 --- /dev/null +++ b/xenolinux-2.4.16-sparse/include/asm-xeno/parport.h @@ -0,0 +1,18 @@ +/* + * parport.h: ia32-specific parport initialisation + * + * Copyright (C) 1999, 2000 Tim Waugh + * + * This file should only be included by drivers/parport/parport_pc.c. + */ + +#ifndef _ASM_I386_PARPORT_H +#define _ASM_I386_PARPORT_H 1 + +static int __devinit parport_pc_find_isa_ports (int autoirq, int autodma); +static int __devinit parport_pc_find_nonpci_ports (int autoirq, int autodma) +{ + return parport_pc_find_isa_ports (autoirq, autodma); +} + +#endif /* !(_ASM_I386_PARPORT_H) */ diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/pgalloc.h b/xenolinux-2.4.16-sparse/include/asm-xeno/pgalloc.h new file mode 100644 index 0000000000..da6bf8422d --- /dev/null +++ b/xenolinux-2.4.16-sparse/include/asm-xeno/pgalloc.h @@ -0,0 +1,185 @@ +#ifndef _I386_PGALLOC_H +#define _I386_PGALLOC_H + +#include +#include +#include +#include +#include + +#define pgd_quicklist (current_cpu_data.pgd_quick) +#define pmd_quicklist (current_cpu_data.pmd_quick) +#define pte_quicklist (current_cpu_data.pte_quick) +#define pgtable_cache_size (current_cpu_data.pgtable_cache_sz) + +#define pmd_populate(mm, pmd, pte) \ + set_pmd(pmd, __pmd(_PAGE_TABLE + __pa(pte))) + +static __inline__ pgd_t *get_pgd_slow(void) +{ + pgd_t *pgd = (pgd_t *)__get_free_page(GFP_KERNEL); + pgd_t *kpgd; + pmd_t *kpmd; + pte_t *kpte; + + if (pgd) { + memset(pgd, 0, USER_PTRS_PER_PGD * sizeof(pgd_t)); + memcpy(pgd + USER_PTRS_PER_PGD, + init_mm.pgd + USER_PTRS_PER_PGD, + (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t)); + kpgd = pgd_offset_k((unsigned long)pgd); + kpmd = pmd_offset(kpgd, (unsigned long)pgd); + kpte = pte_offset(kpmd, (unsigned long)pgd); + queue_l1_entry_update(__pa(kpte), (*(unsigned long *)kpte)&~_PAGE_RW); + queue_baseptr_create(__pa(pgd)); + } + + return pgd; +} + +#if 0 +static __inline__ pgd_t *get_pgd_fast(void) +{ + unsigned long *ret; + + if ((ret = pgd_quicklist) != NULL) { + pgd_quicklist = (unsigned long *)(*ret); + ret[0] = 0; + pgtable_cache_size--; + } else + ret = (unsigned long *)get_pgd_slow(); + return (pgd_t *)ret; +} + +static __inline__ void free_pgd_fast(pgd_t *pgd) +{ + *(unsigned long *)pgd = (unsigned long) pgd_quicklist; + pgd_quicklist = (unsigned long *) pgd; + pgtable_cache_size++; +} +#else +#define get_pgd_fast get_pgd_slow +#define free_pgd_fast free_pgd_slow +#endif + +static __inline__ void free_pgd_slow(pgd_t *pgd) +{ + pgd_t *kpgd; + pmd_t *kpmd; + pte_t *kpte; + queue_baseptr_remove(__pa(pgd)); + kpgd = pgd_offset_k((unsigned long)pgd); + kpmd = pmd_offset(kpgd, (unsigned long)pgd); + kpte = pte_offset(kpmd, (unsigned long)pgd); + queue_l1_entry_update(__pa(kpte), (*(unsigned long *)kpte)|_PAGE_RW); + free_page((unsigned long)pgd); +} + +static inline pte_t *pte_alloc_one(struct mm_struct *mm, unsigned long address) +{ + pte_t *pte; + pgd_t *kpgd; + pmd_t *kpmd; + pte_t *kpte; + + pte = (pte_t *) __get_free_page(GFP_KERNEL); + if (pte) + { + clear_page(pte); + kpgd = pgd_offset_k((unsigned long)pte); + kpmd = pmd_offset(kpgd, (unsigned long)pte); + kpte = pte_offset(kpmd, (unsigned long)pte); + queue_l1_entry_update(__pa(kpte), (*(unsigned long *)kpte)&~_PAGE_RW); + } + return pte; +} + +#if 0 +static inline pte_t *pte_alloc_one_fast(struct mm_struct *mm, unsigned long address) +{ + unsigned long *ret; + + if ((ret = (unsigned long *)pte_quicklist) != NULL) { + pte_quicklist = (unsigned long *)(*ret); + ret[0] = ret[1]; + pgtable_cache_size--; + } + return (pte_t *)ret; +} + +static __inline__ void pte_free_fast(pte_t *pte) +{ + *(unsigned long *)pte = (unsigned long) pte_quicklist; + pte_quicklist = (unsigned long *) pte; + pgtable_cache_size++; +} +#else +#define pte_alloc_one_fast pte_alloc_one +#define pte_free_fast pte_free_slow +#endif + +static __inline__ void pte_free_slow(pte_t *pte) +{ + pgd_t *kpgd; + pmd_t *kpmd; + pte_t *kpte; + kpgd = pgd_offset_k((unsigned long)pte); + kpmd = pmd_offset(kpgd, (unsigned long)pte); + kpte = pte_offset(kpmd, (unsigned long)pte); + queue_l1_entry_update(__pa(kpte), (*(unsigned long *)kpte)|_PAGE_RW); + free_page((unsigned long)pte); +} + +#define pte_free(pte) pte_free_fast(pte) +#define pgd_alloc(mm) get_pgd_fast() +#define pgd_free(pgd) free_pgd_fast(pgd) + +#define pmd_alloc_one_fast(mm, addr) ({ BUG(); ((pmd_t *)1); }) +#define pmd_alloc_one(mm, addr) ({ BUG(); ((pmd_t *)2); }) +#define pmd_free_slow(x) do { } while (0) +#define pmd_free_fast(x) do { } while (0) +#define pmd_free(x) do { } while (0) +#define pgd_populate(mm, pmd, pte) BUG() + +extern int do_check_pgt_cache(int, int); + +/* + * - flush_tlb() flushes the current mm struct TLBs + * - flush_tlb_all() flushes all processes TLBs + * - flush_tlb_mm(mm) flushes the specified mm context TLB's + * - flush_tlb_page(vma, vmaddr) flushes one page + * - flush_tlb_range(mm, start, end) flushes a range of pages + * - flush_tlb_pgtables(mm, start, end) flushes a range of page tables + */ + +#define flush_tlb() __flush_tlb() +#define flush_tlb_all() __flush_tlb_all() +#define local_flush_tlb() __flush_tlb() + +static inline void flush_tlb_mm(struct mm_struct *mm) +{ + if (mm == current->active_mm) + __flush_tlb(); +} + +static inline void flush_tlb_page(struct vm_area_struct *vma, + unsigned long addr) +{ + if (vma->vm_mm == current->active_mm) + __flush_tlb_one(addr); +} + +static inline void flush_tlb_range(struct mm_struct *mm, + unsigned long start, unsigned long end) +{ + if (mm == current->active_mm) + __flush_tlb(); +} + +static inline void flush_tlb_pgtables(struct mm_struct *mm, + unsigned long start, unsigned long end) +{ + /* i386 does not keep any page table caches in TLB */ +} + +#endif /* _I386_PGALLOC_H */ diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/pgtable-2level.h b/xenolinux-2.4.16-sparse/include/asm-xeno/pgtable-2level.h new file mode 100644 index 0000000000..735ac99311 --- /dev/null +++ b/xenolinux-2.4.16-sparse/include/asm-xeno/pgtable-2level.h @@ -0,0 +1,70 @@ +#ifndef _I386_PGTABLE_2LEVEL_H +#define _I386_PGTABLE_2LEVEL_H + +/* + * traditional i386 two-level paging structure: + */ + +#define PGDIR_SHIFT 22 +#define PTRS_PER_PGD 1024 + +/* + * the i386 is two-level, so we don't really have any + * PMD directory physically. + */ +#define PMD_SHIFT 22 +#define PTRS_PER_PMD 1 + +#define PTRS_PER_PTE 1024 + +#define pte_ERROR(e) \ + printk("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, (e).pte_low) +#define pmd_ERROR(e) \ + printk("%s:%d: bad pmd %08lx.\n", __FILE__, __LINE__, pmd_val(e)) +#define pgd_ERROR(e) \ + printk("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e)) + +/* + * The "pgd_xxx()" functions here are trivial for a folded two-level + * setup: the pgd is never bad, and a pmd always exists (as it's folded + * into the pgd entry) + */ +static inline int pgd_none(pgd_t pgd) { return 0; } +static inline int pgd_bad(pgd_t pgd) { return 0; } +static inline int pgd_present(pgd_t pgd) { return 1; } +#define pgd_clear(xp) do { } while (0) + +#define set_pte(pteptr, pteval) queue_l1_entry_update(__pa(pteptr), (pteval).pte_low) +#define set_pmd(pmdptr, pmdval) queue_l2_entry_update(__pa(pmdptr), (pmdval).pmd) +#define set_pgd(pgdptr, pgdval) ((void)0) + +#define pgd_page(pgd) \ +((unsigned long) __va(pgd_val(pgd) & PAGE_MASK)) + +static inline pmd_t * pmd_offset(pgd_t * dir, unsigned long address) +{ + return (pmd_t *) dir; +} + +/* + * A note on implementation of this atomic 'get-and-clear' operation. + * This is actually very simple because XenoLinux can only run on a single + * processor. Therefore, we cannot race other processors setting the 'accessed' + * or 'dirty' bits on a page-table entry. + * Even if pages are shared between domains, that is not a problem because + * each domain will have separate page tables, with their own versions of + * accessed & dirty state. + */ +static inline pte_t ptep_get_and_clear(pte_t *xp) +{ + pte_t pte = *xp; + queue_l1_entry_update(__pa(xp), 0); + return pte; +} + +#define pte_same(a, b) ((a).pte_low == (b).pte_low) +#define pte_page(x) (mem_map+((unsigned long)((pte_val(x) >> PAGE_SHIFT)))) +#define pte_none(x) (!(x).pte_low) +#define __mk_pte(page_nr,pgprot) __pte(((page_nr) << PAGE_SHIFT) | pgprot_val(pgprot)) + +#endif /* _I386_PGTABLE_2LEVEL_H */ diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/pgtable-3level.h b/xenolinux-2.4.16-sparse/include/asm-xeno/pgtable-3level.h new file mode 100644 index 0000000000..22c69d2393 --- /dev/null +++ b/xenolinux-2.4.16-sparse/include/asm-xeno/pgtable-3level.h @@ -0,0 +1,103 @@ +#ifndef _I386_PGTABLE_3LEVEL_H +#define _I386_PGTABLE_3LEVEL_H + +#error "PAE unsupported" + +/* + * Intel Physical Address Extension (PAE) Mode - three-level page + * tables on PPro+ CPUs. + * + * Copyright (C) 1999 Ingo Molnar + */ + +/* + * PGDIR_SHIFT determines what a top-level page table entry can map + */ +#define PGDIR_SHIFT 30 +#define PTRS_PER_PGD 4 + +/* + * PMD_SHIFT determines the size of the area a middle-level + * page table can map + */ +#define PMD_SHIFT 21 +#define PTRS_PER_PMD 512 + +/* + * entries per page directory level + */ +#define PTRS_PER_PTE 512 + +#define pte_ERROR(e) \ + printk("%s:%d: bad pte %p(%08lx%08lx).\n", __FILE__, __LINE__, &(e), (e).pte_high, (e).pte_low) +#define pmd_ERROR(e) \ + printk("%s:%d: bad pmd %p(%016Lx).\n", __FILE__, __LINE__, &(e), pmd_val(e)) +#define pgd_ERROR(e) \ + printk("%s:%d: bad pgd %p(%016Lx).\n", __FILE__, __LINE__, &(e), pgd_val(e)) + +static inline int pgd_none(pgd_t pgd) { return 0; } +static inline int pgd_bad(pgd_t pgd) { return 0; } +static inline int pgd_present(pgd_t pgd) { return 1; } + +/* Rules for using set_pte: the pte being assigned *must* be + * either not present or in a state where the hardware will + * not attempt to update the pte. In places where this is + * not possible, use pte_get_and_clear to obtain the old pte + * value and then use set_pte to update it. -ben + */ +static inline void set_pte(pte_t *ptep, pte_t pte) +{ + ptep->pte_high = pte.pte_high; + smp_wmb(); + ptep->pte_low = pte.pte_low; +} +#define set_pmd(pmdptr,pmdval) \ + set_64bit((unsigned long long *)(pmdptr),pmd_val(pmdval)) +#define set_pgd(pgdptr,pgdval) \ + set_64bit((unsigned long long *)(pgdptr),pgd_val(pgdval)) + +/* + * Pentium-II erratum A13: in PAE mode we explicitly have to flush + * the TLB via cr3 if the top-level pgd is changed... + * We do not let the generic code free and clear pgd entries due to + * this erratum. + */ +static inline void pgd_clear (pgd_t * pgd) { } + +#define pgd_page(pgd) \ +((unsigned long) __va(pgd_val(pgd) & PAGE_MASK)) + +/* Find an entry in the second-level page table.. */ +#define pmd_offset(dir, address) ((pmd_t *) pgd_page(*(dir)) + \ + __pmd_offset(address)) + +static inline pte_t ptep_get_and_clear(pte_t *ptep) +{ + pte_t res; + + /* xchg acts as a barrier before the setting of the high bits */ + res.pte_low = xchg(&ptep->pte_low, 0); + res.pte_high = ptep->pte_high; + ptep->pte_high = 0; + + return res; +} + +static inline int pte_same(pte_t a, pte_t b) +{ + return a.pte_low == b.pte_low && a.pte_high == b.pte_high; +} + +#define pte_page(x) (mem_map+(((x).pte_low >> PAGE_SHIFT) | ((x).pte_high << (32 - PAGE_SHIFT)))) +#define pte_none(x) (!(x).pte_low && !(x).pte_high) + +static inline pte_t __mk_pte(unsigned long page_nr, pgprot_t pgprot) +{ + pte_t pte; + + pte.pte_high = page_nr >> (32 - PAGE_SHIFT); + pte.pte_low = (page_nr << PAGE_SHIFT) | pgprot_val(pgprot); + return pte; +} + +#endif /* _I386_PGTABLE_3LEVEL_H */ diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/pgtable.h b/xenolinux-2.4.16-sparse/include/asm-xeno/pgtable.h new file mode 100644 index 0000000000..2eaa28733d --- /dev/null +++ b/xenolinux-2.4.16-sparse/include/asm-xeno/pgtable.h @@ -0,0 +1,309 @@ +#ifndef _I386_PGTABLE_H +#define _I386_PGTABLE_H + +#include + +/* + * The Linux memory management assumes a three-level page table setup. On + * the i386, we use that, but "fold" the mid level into the top-level page + * table, so that we physically have the same two-level page table as the + * i386 mmu expects. + * + * This file contains the functions and defines necessary to modify and use + * the i386 page table tree. + */ +#ifndef __ASSEMBLY__ +#include +#include +#include +#include + +#ifndef _I386_BITOPS_H +#include +#endif + +#define swapper_pg_dir 0 +extern void paging_init(void); + +/* Caches aren't brain-dead on the intel. */ +#define flush_cache_all() do { } while (0) +#define flush_cache_mm(mm) do { } while (0) +#define flush_cache_range(mm, start, end) do { } while (0) +#define flush_cache_page(vma, vmaddr) do { } while (0) +#define flush_page_to_ram(page) do { } while (0) +#define flush_dcache_page(page) do { } while (0) +#define flush_icache_range(start, end) do { } while (0) +#define flush_icache_page(vma,pg) do { } while (0) + +extern unsigned long pgkern_mask; + +#define __flush_tlb() queue_tlb_flush() +#define __flush_tlb_global() __flush_tlb() +#define __flush_tlb_all() __flush_tlb_global() +#define __flush_tlb_one(addr) queue_tlb_flush_one(addr) + +/* + * ZERO_PAGE is a global shared page that is always zero: used + * for zero-mapped memory areas etc.. + */ +extern unsigned long empty_zero_page[1024]; +#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page)) + +#endif /* !__ASSEMBLY__ */ + +/* + * The Linux x86 paging architecture is 'compile-time dual-mode', it + * implements both the traditional 2-level x86 page tables and the + * newer 3-level PAE-mode page tables. + */ +#ifndef __ASSEMBLY__ +#if CONFIG_X86_PAE +# include + +/* + * Need to initialise the X86 PAE caches + */ +extern void pgtable_cache_init(void); + +#else +# include + +/* + * No page table caches to initialise + */ +#define pgtable_cache_init() do { } while (0) + +#endif +#endif + +#define PMD_SIZE (1UL << PMD_SHIFT) +#define PMD_MASK (~(PMD_SIZE-1)) +#define PGDIR_SIZE (1UL << PGDIR_SHIFT) +#define PGDIR_MASK (~(PGDIR_SIZE-1)) + +#define USER_PTRS_PER_PGD (TASK_SIZE/PGDIR_SIZE) +#define FIRST_USER_PGD_NR 0 + +#define USER_PGD_PTRS (PAGE_OFFSET >> PGDIR_SHIFT) +#define KERNEL_PGD_PTRS (PTRS_PER_PGD-USER_PGD_PTRS) + +#define TWOLEVEL_PGDIR_SHIFT 22 +#define BOOT_USER_PGD_PTRS (__PAGE_OFFSET >> TWOLEVEL_PGDIR_SHIFT) +#define BOOT_KERNEL_PGD_PTRS (1024-BOOT_USER_PGD_PTRS) + + +#ifndef __ASSEMBLY__ +/* Just any arbitrary offset to the start of the vmalloc VM area: the + * current 8MB value just means that there will be a 8MB "hole" after the + * physical memory until the kernel virtual memory starts. That means that + * any out-of-bounds memory accesses will hopefully be caught. + * The vmalloc() routines leaves a hole of 4kB between each vmalloced + * area for the same reason. ;) + */ +#define VMALLOC_OFFSET (8*1024*1024) +#define VMALLOC_START (((unsigned long) high_memory + 2*VMALLOC_OFFSET-1) & \ + ~(VMALLOC_OFFSET-1)) +#define VMALLOC_VMADDR(x) ((unsigned long)(x)) +#if CONFIG_HIGHMEM +# define VMALLOC_END (PKMAP_BASE-2*PAGE_SIZE) +#else +# define VMALLOC_END (FIXADDR_START-2*PAGE_SIZE) +#endif + +#define _PAGE_BIT_PRESENT 0 +#define _PAGE_BIT_RW 1 +#define _PAGE_BIT_USER 2 +#define _PAGE_BIT_PWT 3 +#define _PAGE_BIT_PCD 4 +#define _PAGE_BIT_ACCESSED 5 +#define _PAGE_BIT_DIRTY 6 +#define _PAGE_BIT_PSE 7 /* 4 MB (or 2MB) page, Pentium+, if present.. */ +#define _PAGE_BIT_GLOBAL 8 /* Global TLB entry PPro+ */ + +#define _PAGE_PRESENT 0x001 +#define _PAGE_RW 0x002 +#define _PAGE_USER 0x004 +#define _PAGE_PWT 0x008 +#define _PAGE_PCD 0x010 +#define _PAGE_ACCESSED 0x020 +#define _PAGE_DIRTY 0x040 +#define _PAGE_PSE 0x080 /* 4 MB (or 2MB) page, Pentium+, if present.. */ +#define _PAGE_GLOBAL 0x100 /* Global TLB entry PPro+ */ + +#define _PAGE_PROTNONE 0x080 /* If not present */ + +#define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED | _PAGE_DIRTY) +#define _KERNPG_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY) +#define _PAGE_CHG_MASK (PTE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY) + +#define PAGE_NONE __pgprot(_PAGE_PROTNONE | _PAGE_ACCESSED) +#define PAGE_SHARED __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED) +#define PAGE_COPY __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED) +#define PAGE_READONLY __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED) + +#define __PAGE_KERNEL \ + (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED) +#define __PAGE_KERNEL_NOCACHE \ + (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_PCD | _PAGE_ACCESSED) +#define __PAGE_KERNEL_RO \ + (_PAGE_PRESENT | _PAGE_DIRTY | _PAGE_ACCESSED) + +#if 0 +#define MAKE_GLOBAL(x) __pgprot((x) | _PAGE_GLOBAL) +#else +#define MAKE_GLOBAL(x) __pgprot(x) +#endif + +#define PAGE_KERNEL MAKE_GLOBAL(__PAGE_KERNEL) +#define PAGE_KERNEL_RO MAKE_GLOBAL(__PAGE_KERNEL_RO) +#define PAGE_KERNEL_NOCACHE MAKE_GLOBAL(__PAGE_KERNEL_NOCACHE) + +/* + * The i386 can't do page protection for execute, and considers that + * the same are read. Also, write permissions imply read permissions. + * This is the closest we can get.. + */ +#define __P000 PAGE_NONE +#define __P001 PAGE_READONLY +#define __P010 PAGE_COPY +#define __P011 PAGE_COPY +#define __P100 PAGE_READONLY +#define __P101 PAGE_READONLY +#define __P110 PAGE_COPY +#define __P111 PAGE_COPY + +#define __S000 PAGE_NONE +#define __S001 PAGE_READONLY +#define __S010 PAGE_SHARED +#define __S011 PAGE_SHARED +#define __S100 PAGE_READONLY +#define __S101 PAGE_READONLY +#define __S110 PAGE_SHARED +#define __S111 PAGE_SHARED + +#define pte_present(x) ((x).pte_low & (_PAGE_PRESENT | _PAGE_PROTNONE)) +#define pte_clear(xp) queue_l1_entry_update(__pa(xp), 0) + +#define pmd_none(x) (!(x).pmd) +#define pmd_present(x) ((x).pmd & _PAGE_PRESENT) +#define pmd_clear(xp) do { set_pmd(xp, __pmd(0)); } while (0) +#define pmd_bad(x) (((x).pmd & (~PAGE_MASK & ~_PAGE_USER)) != _KERNPG_TABLE) + +/* + * Permanent address of a page. Obviously must never be + * called on a highmem page. + */ +#define page_address(page) ((page)->virtual) +#define pages_to_mb(x) ((x) >> (20-PAGE_SHIFT)) + +/* + * The following only work if pte_present() is true. + * Undefined behaviour if not.. + */ +static inline int pte_read(pte_t pte) { return (pte).pte_low & _PAGE_USER; } +static inline int pte_exec(pte_t pte) { return (pte).pte_low & _PAGE_USER; } +static inline int pte_dirty(pte_t pte) { return (pte).pte_low & _PAGE_DIRTY; } +static inline int pte_young(pte_t pte) { return (pte).pte_low & _PAGE_ACCESSED; } +static inline int pte_write(pte_t pte) { return (pte).pte_low & _PAGE_RW; } + +static inline pte_t pte_rdprotect(pte_t pte) { (pte).pte_low &= ~_PAGE_USER; return pte; } +static inline pte_t pte_exprotect(pte_t pte) { (pte).pte_low &= ~_PAGE_USER; return pte; } +static inline pte_t pte_mkclean(pte_t pte) { (pte).pte_low &= ~_PAGE_DIRTY; return pte; } +static inline pte_t pte_mkold(pte_t pte) { (pte).pte_low &= ~_PAGE_ACCESSED; return pte; } +static inline pte_t pte_wrprotect(pte_t pte) { (pte).pte_low &= ~_PAGE_RW; return pte; } +static inline pte_t pte_mkread(pte_t pte) { (pte).pte_low |= _PAGE_USER; return pte; } +static inline pte_t pte_mkexec(pte_t pte) { (pte).pte_low |= _PAGE_USER; return pte; } +static inline pte_t pte_mkdirty(pte_t pte) { (pte).pte_low |= _PAGE_DIRTY; return pte; } +static inline pte_t pte_mkyoung(pte_t pte) { (pte).pte_low |= _PAGE_ACCESSED; return pte; } +static inline pte_t pte_mkwrite(pte_t pte) { (pte).pte_low |= _PAGE_RW; return pte; } + +static inline int ptep_test_and_clear_dirty(pte_t *ptep) +{ + unsigned long pteval = *(unsigned long *)ptep; + int ret = pteval & _PAGE_DIRTY; + if ( ret ) queue_l1_entry_update(__pa(ptep), pteval & ~_PAGE_DIRTY); + return ret; +} +static inline int ptep_test_and_clear_young(pte_t *ptep) +{ + unsigned long pteval = *(unsigned long *)ptep; + int ret = pteval & _PAGE_ACCESSED; + if ( ret ) queue_l1_entry_update(__pa(ptep), pteval & ~_PAGE_ACCESSED); + return ret; +} +static inline void ptep_set_wrprotect(pte_t *ptep) +{ + unsigned long pteval = *(unsigned long *)ptep; + if ( (pteval & _PAGE_RW) ) + queue_l1_entry_update(__pa(ptep), pteval & ~_PAGE_RW); +} +static inline void ptep_mkdirty(pte_t *ptep) +{ + unsigned long pteval = *(unsigned long *)ptep; + if ( !(pteval & _PAGE_DIRTY) ) + queue_l1_entry_update(__pa(ptep), pteval | _PAGE_DIRTY); +} + +/* + * Conversion functions: convert a page and protection to a page entry, + * and a page entry and page directory to the page they refer to. + */ + +#define mk_pte(page, pgprot) __mk_pte((page) - mem_map, (pgprot)) + +/* This takes a physical page address that is used by the remapping functions */ +#define mk_pte_phys(physpage, pgprot) __mk_pte((physpage) >> PAGE_SHIFT, pgprot) + +static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) +{ + pte.pte_low &= _PAGE_CHG_MASK; + pte.pte_low |= pgprot_val(newprot); + return pte; +} + +#define page_pte(page) page_pte_prot(page, __pgprot(0)) + +#define pmd_page(pmd) \ +((unsigned long) __va(pmd_val(pmd) & PAGE_MASK)) + +/* to find an entry in a page-table-directory. */ +#define pgd_index(address) ((address >> PGDIR_SHIFT) & (PTRS_PER_PGD-1)) + +#define __pgd_offset(address) pgd_index(address) + +#define pgd_offset(mm, address) ((mm)->pgd+pgd_index(address)) + +/* to find an entry in a kernel page-table-directory */ +#define pgd_offset_k(address) pgd_offset(&init_mm, address) + +#define __pmd_offset(address) \ + (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1)) + +/* Find an entry in the third-level page table.. */ +#define __pte_offset(address) \ + ((address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) +#define pte_offset(dir, address) ((pte_t *) pmd_page(*(dir)) + \ + __pte_offset(address)) + +/* + * The i386 doesn't have any external MMU info: the kernel page + * tables contain all the necessary information. + */ +#define update_mmu_cache(vma,address,pte) do { } while (0) + +/* Encode and de-code a swap entry */ +#define SWP_TYPE(x) (((x).val >> 1) & 0x3f) +#define SWP_OFFSET(x) ((x).val >> 8) +#define SWP_ENTRY(type, offset) ((swp_entry_t) { ((type) << 1) | ((offset) << 8) }) +#define pte_to_swp_entry(pte) ((swp_entry_t) { (pte).pte_low }) +#define swp_entry_to_pte(x) ((pte_t) { (x).val }) + +#endif /* !__ASSEMBLY__ */ + +/* Needs to be defined here and not in linux/mm.h, as it is arch dependent */ +#define PageSkip(page) (0) +#define kern_addr_valid(addr) (1) + +#define io_remap_page_range remap_page_range + +#endif /* _I386_PGTABLE_H */ diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/poll.h b/xenolinux-2.4.16-sparse/include/asm-xeno/poll.h new file mode 100644 index 0000000000..e5feda71b3 --- /dev/null +++ b/xenolinux-2.4.16-sparse/include/asm-xeno/poll.h @@ -0,0 +1,25 @@ +#ifndef __i386_POLL_H +#define __i386_POLL_H + +/* These are specified by iBCS2 */ +#define POLLIN 0x0001 +#define POLLPRI 0x0002 +#define POLLOUT 0x0004 +#define POLLERR 0x0008 +#define POLLHUP 0x0010 +#define POLLNVAL 0x0020 + +/* The rest seem to be more-or-less nonstandard. Check them! */ +#define POLLRDNORM 0x0040 +#define POLLRDBAND 0x0080 +#define POLLWRNORM 0x0100 +#define POLLWRBAND 0x0200 +#define POLLMSG 0x0400 + +struct pollfd { + int fd; + short events; + short revents; +}; + +#endif diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/posix_types.h b/xenolinux-2.4.16-sparse/include/asm-xeno/posix_types.h new file mode 100644 index 0000000000..5529f32702 --- /dev/null +++ b/xenolinux-2.4.16-sparse/include/asm-xeno/posix_types.h @@ -0,0 +1,80 @@ +#ifndef __ARCH_I386_POSIX_TYPES_H +#define __ARCH_I386_POSIX_TYPES_H + +/* + * This file is generally used by user-level software, so you need to + * be a little careful about namespace pollution etc. Also, we cannot + * assume GCC is being used. + */ + +typedef unsigned short __kernel_dev_t; +typedef unsigned long __kernel_ino_t; +typedef unsigned short __kernel_mode_t; +typedef unsigned short __kernel_nlink_t; +typedef long __kernel_off_t; +typedef int __kernel_pid_t; +typedef unsigned short __kernel_ipc_pid_t; +typedef unsigned short __kernel_uid_t; +typedef unsigned short __kernel_gid_t; +typedef unsigned int __kernel_size_t; +typedef int __kernel_ssize_t; +typedef int __kernel_ptrdiff_t; +typedef long __kernel_time_t; +typedef long __kernel_suseconds_t; +typedef long __kernel_clock_t; +typedef int __kernel_daddr_t; +typedef char * __kernel_caddr_t; +typedef unsigned short __kernel_uid16_t; +typedef unsigned short __kernel_gid16_t; +typedef unsigned int __kernel_uid32_t; +typedef unsigned int __kernel_gid32_t; + +typedef unsigned short __kernel_old_uid_t; +typedef unsigned short __kernel_old_gid_t; + +#ifdef __GNUC__ +typedef long long __kernel_loff_t; +#endif + +typedef struct { +#if defined(__KERNEL__) || defined(__USE_ALL) + int val[2]; +#else /* !defined(__KERNEL__) && !defined(__USE_ALL) */ + int __val[2]; +#endif /* !defined(__KERNEL__) && !defined(__USE_ALL) */ +} __kernel_fsid_t; + +#if defined(__KERNEL__) || !defined(__GLIBC__) || (__GLIBC__ < 2) + +#undef __FD_SET +#define __FD_SET(fd,fdsetp) \ + __asm__ __volatile__("btsl %1,%0": \ + "=m" (*(__kernel_fd_set *) (fdsetp)):"r" ((int) (fd))) + +#undef __FD_CLR +#define __FD_CLR(fd,fdsetp) \ + __asm__ __volatile__("btrl %1,%0": \ + "=m" (*(__kernel_fd_set *) (fdsetp)):"r" ((int) (fd))) + +#undef __FD_ISSET +#define __FD_ISSET(fd,fdsetp) (__extension__ ({ \ + unsigned char __result; \ + __asm__ __volatile__("btl %1,%2 ; setb %0" \ + :"=q" (__result) :"r" ((int) (fd)), \ + "m" (*(__kernel_fd_set *) (fdsetp))); \ + __result; })) + +#undef __FD_ZERO +#define __FD_ZERO(fdsetp) \ +do { \ + int __d0, __d1; \ + __asm__ __volatile__("cld ; rep ; stosl" \ + :"=m" (*(__kernel_fd_set *) (fdsetp)), \ + "=&c" (__d0), "=&D" (__d1) \ + :"a" (0), "1" (__FDSET_LONGS), \ + "2" ((__kernel_fd_set *) (fdsetp)) : "memory"); \ +} while (0) + +#endif /* defined(__KERNEL__) || !defined(__GLIBC__) || (__GLIBC__ < 2) */ + +#endif diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/processor.h b/xenolinux-2.4.16-sparse/include/asm-xeno/processor.h new file mode 100644 index 0000000000..3e65e199f0 --- /dev/null +++ b/xenolinux-2.4.16-sparse/include/asm-xeno/processor.h @@ -0,0 +1,484 @@ +/* + * include/asm-i386/processor.h + * + * Copyright (C) 1994 Linus Torvalds + */ + +#ifndef __ASM_I386_PROCESSOR_H +#define __ASM_I386_PROCESSOR_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * Default implementation of macro that returns current + * instruction pointer ("program counter"). + */ +#define current_text_addr() ({ void *pc; __asm__("movl $1f,%0\n1:":"=g" (pc)); pc; }) + +/* + * CPU type and hardware bug flags. Kept separately for each CPU. + * Members of this structure are referenced in head.S, so think twice + * before touching them. [mj] + */ + +struct cpuinfo_x86 { + __u8 x86; /* CPU family */ + __u8 x86_vendor; /* CPU vendor */ + __u8 x86_model; + __u8 x86_mask; + char wp_works_ok; /* It doesn't on 386's */ + char hlt_works_ok; /* Problems on some 486Dx4's and old 386's */ + char hard_math; + char rfu; + int cpuid_level; /* Maximum supported CPUID level, -1=no CPUID */ + __u32 x86_capability[NCAPINTS]; + char x86_vendor_id[16]; + char x86_model_id[64]; + int x86_cache_size; /* in KB - valid for CPUS which support this + call */ + int fdiv_bug; + int f00f_bug; + int coma_bug; + unsigned long loops_per_jiffy; + unsigned long *pgd_quick; + unsigned long *pmd_quick; + unsigned long *pte_quick; + unsigned long pgtable_cache_sz; +} __attribute__((__aligned__(SMP_CACHE_BYTES))); + +#define X86_VENDOR_INTEL 0 +#define X86_VENDOR_CYRIX 1 +#define X86_VENDOR_AMD 2 +#define X86_VENDOR_UMC 3 +#define X86_VENDOR_NEXGEN 4 +#define X86_VENDOR_CENTAUR 5 +#define X86_VENDOR_RISE 6 +#define X86_VENDOR_TRANSMETA 7 +#define X86_VENDOR_UNKNOWN 0xff + +/* + * capabilities of CPUs + */ + +extern struct cpuinfo_x86 boot_cpu_data; +extern struct tss_struct init_tss[NR_CPUS]; + +#ifdef CONFIG_SMP +extern struct cpuinfo_x86 cpu_data[]; +#define current_cpu_data cpu_data[smp_processor_id()] +#else +#define cpu_data (&boot_cpu_data) +#define current_cpu_data boot_cpu_data +#endif + +#define cpu_has_pge (test_bit(X86_FEATURE_PGE, boot_cpu_data.x86_capability)) +#define cpu_has_pse (test_bit(X86_FEATURE_PSE, boot_cpu_data.x86_capability)) +#define cpu_has_pae (test_bit(X86_FEATURE_PAE, boot_cpu_data.x86_capability)) +#define cpu_has_tsc (test_bit(X86_FEATURE_TSC, boot_cpu_data.x86_capability)) +#define cpu_has_de (test_bit(X86_FEATURE_DE, boot_cpu_data.x86_capability)) +#define cpu_has_vme (test_bit(X86_FEATURE_VME, boot_cpu_data.x86_capability)) +#define cpu_has_fxsr (test_bit(X86_FEATURE_FXSR, boot_cpu_data.x86_capability)) +#define cpu_has_xmm (test_bit(X86_FEATURE_XMM, boot_cpu_data.x86_capability)) +#define cpu_has_fpu (test_bit(X86_FEATURE_FPU, boot_cpu_data.x86_capability)) +#define cpu_has_apic (test_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability)) + +extern char ignore_irq13; + +extern void identify_cpu(struct cpuinfo_x86 *); +extern void print_cpu_info(struct cpuinfo_x86 *); + +/* + * EFLAGS bits + */ +#define X86_EFLAGS_CF 0x00000001 /* Carry Flag */ +#define X86_EFLAGS_PF 0x00000004 /* Parity Flag */ +#define X86_EFLAGS_AF 0x00000010 /* Auxillary carry Flag */ +#define X86_EFLAGS_ZF 0x00000040 /* Zero Flag */ +#define X86_EFLAGS_SF 0x00000080 /* Sign Flag */ +#define X86_EFLAGS_TF 0x00000100 /* Trap Flag */ +#define X86_EFLAGS_IF 0x00000200 /* Interrupt Flag */ +#define X86_EFLAGS_DF 0x00000400 /* Direction Flag */ +#define X86_EFLAGS_OF 0x00000800 /* Overflow Flag */ +#define X86_EFLAGS_IOPL 0x00003000 /* IOPL mask */ +#define X86_EFLAGS_NT 0x00004000 /* Nested Task */ +#define X86_EFLAGS_RF 0x00010000 /* Resume Flag */ +#define X86_EFLAGS_VM 0x00020000 /* Virtual Mode */ +#define X86_EFLAGS_AC 0x00040000 /* Alignment Check */ +#define X86_EFLAGS_VIF 0x00080000 /* Virtual Interrupt Flag */ +#define X86_EFLAGS_VIP 0x00100000 /* Virtual Interrupt Pending */ +#define X86_EFLAGS_ID 0x00200000 /* CPUID detection flag */ + +/* + * Generic CPUID function + */ +static inline void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx) +{ + __asm__("cpuid" + : "=a" (*eax), + "=b" (*ebx), + "=c" (*ecx), + "=d" (*edx) + : "0" (op)); +} + +/* + * CPUID functions returning a single datum + */ +static inline unsigned int cpuid_eax(unsigned int op) +{ + unsigned int eax; + + __asm__("cpuid" + : "=a" (eax) + : "0" (op) + : "bx", "cx", "dx"); + return eax; +} +static inline unsigned int cpuid_ebx(unsigned int op) +{ + unsigned int eax, ebx; + + __asm__("cpuid" + : "=a" (eax), "=b" (ebx) + : "0" (op) + : "cx", "dx" ); + return ebx; +} +static inline unsigned int cpuid_ecx(unsigned int op) +{ + unsigned int eax, ecx; + + __asm__("cpuid" + : "=a" (eax), "=c" (ecx) + : "0" (op) + : "bx", "dx" ); + return ecx; +} +static inline unsigned int cpuid_edx(unsigned int op) +{ + unsigned int eax, edx; + + __asm__("cpuid" + : "=a" (eax), "=d" (edx) + : "0" (op) + : "bx", "cx"); + return edx; +} + +/* + * Intel CPU features in CR4 + */ +#define X86_CR4_VME 0x0001 /* enable vm86 extensions */ +#define X86_CR4_PVI 0x0002 /* virtual interrupts flag enable */ +#define X86_CR4_TSD 0x0004 /* disable time stamp at ipl 3 */ +#define X86_CR4_DE 0x0008 /* enable debugging extensions */ +#define X86_CR4_PSE 0x0010 /* enable page size extensions */ +#define X86_CR4_PAE 0x0020 /* enable physical address extensions */ +#define X86_CR4_MCE 0x0040 /* Machine check enable */ +#define X86_CR4_PGE 0x0080 /* enable global pages */ +#define X86_CR4_PCE 0x0100 /* enable performance counters at ipl 3 */ +#define X86_CR4_OSFXSR 0x0200 /* enable fast FPU save and restore */ +#define X86_CR4_OSXMMEXCPT 0x0400 /* enable unmasked SSE exceptions */ + +extern unsigned long mmu_cr4_features; + +#include + +static inline void set_in_cr4 (unsigned long mask) +{ + HYPERVISOR_console_write("No set_in_cr4", 13); +} + +static inline void clear_in_cr4 (unsigned long mask) +{ + HYPERVISOR_console_write("No clear_in_cr4", 15); +} + +/* + * Cyrix CPU configuration register indexes + */ +#define CX86_CCR0 0xc0 +#define CX86_CCR1 0xc1 +#define CX86_CCR2 0xc2 +#define CX86_CCR3 0xc3 +#define CX86_CCR4 0xe8 +#define CX86_CCR5 0xe9 +#define CX86_CCR6 0xea +#define CX86_CCR7 0xeb +#define CX86_DIR0 0xfe +#define CX86_DIR1 0xff +#define CX86_ARR_BASE 0xc4 +#define CX86_RCR_BASE 0xdc + +/* + * Cyrix CPU indexed register access macros + */ + +#define getCx86(reg) ({ outb((reg), 0x22); inb(0x23); }) + +#define setCx86(reg, data) do { \ + outb((reg), 0x22); \ + outb((data), 0x23); \ +} while (0) + +/* + * Bus types (default is ISA, but people can check others with these..) + */ +#ifdef CONFIG_EISA +extern int EISA_bus; +#else +#define EISA_bus (0) +#endif +extern int MCA_bus; + +/* from system description table in BIOS. Mostly for MCA use, but +others may find it useful. */ +extern unsigned int machine_id; +extern unsigned int machine_submodel_id; +extern unsigned int BIOS_revision; +extern unsigned int mca_pentium_flag; + +/* + * User space process size: 3GB (default). + */ +#define TASK_SIZE (PAGE_OFFSET) + +/* This decides where the kernel will search for a free chunk of vm + * space during mmap's. + */ +#define TASK_UNMAPPED_BASE (TASK_SIZE / 3) + +/* + * Size of io_bitmap in longwords: 32 is ports 0-0x3ff. + */ +#define IO_BITMAP_SIZE 32 +#define IO_BITMAP_OFFSET offsetof(struct tss_struct,io_bitmap) +#define INVALID_IO_BITMAP_OFFSET 0x8000 + +struct i387_fsave_struct { + long cwd; + long swd; + long twd; + long fip; + long fcs; + long foo; + long fos; + long st_space[20]; /* 8*10 bytes for each FP-reg = 80 bytes */ + long status; /* software status information */ +}; + +struct i387_fxsave_struct { + unsigned short cwd; + unsigned short swd; + unsigned short twd; + unsigned short fop; + long fip; + long fcs; + long foo; + long fos; + long mxcsr; + long reserved; + long st_space[32]; /* 8*16 bytes for each FP-reg = 128 bytes */ + long xmm_space[32]; /* 8*16 bytes for each XMM-reg = 128 bytes */ + long padding[56]; +} __attribute__ ((aligned (16))); + +struct i387_soft_struct { + long cwd; + long swd; + long twd; + long fip; + long fcs; + long foo; + long fos; + long st_space[20]; /* 8*10 bytes for each FP-reg = 80 bytes */ + unsigned char ftop, changed, lookahead, no_update, rm, alimit; + struct info *info; + unsigned long entry_eip; +}; + +union i387_union { + struct i387_fsave_struct fsave; + struct i387_fxsave_struct fxsave; + struct i387_soft_struct soft; +}; + +typedef struct { + unsigned long seg; +} mm_segment_t; + +struct tss_struct { + unsigned short back_link,__blh; + unsigned long esp0; + unsigned short ss0,__ss0h; + unsigned long esp1; + unsigned short ss1,__ss1h; + unsigned long esp2; + unsigned short ss2,__ss2h; + unsigned long __cr3; + unsigned long eip; + unsigned long eflags; + unsigned long eax,ecx,edx,ebx; + unsigned long esp; + unsigned long ebp; + unsigned long esi; + unsigned long edi; + unsigned short es, __esh; + unsigned short cs, __csh; + unsigned short ss, __ssh; + unsigned short ds, __dsh; + unsigned short fs, __fsh; + unsigned short gs, __gsh; + unsigned short ldt, __ldth; + unsigned short trace, bitmap; + unsigned long io_bitmap[IO_BITMAP_SIZE+1]; + /* + * pads the TSS to be cacheline-aligned (size is 0x100) + */ + unsigned long __cacheline_filler[5]; +}; + +struct thread_struct { + unsigned long esp0; + unsigned long eip; + unsigned long esp; + unsigned long fs; + unsigned long gs; +/* Hardware debugging registers */ + unsigned long debugreg[8]; /* %%db0-7 debug registers */ +/* fault info */ + unsigned long cr2, trap_no, error_code; +/* floating point info */ + union i387_union i387; +/* virtual 86 mode info */ + struct vm86_struct * vm86_info; + unsigned long screen_bitmap; + unsigned long v86flags, v86mask, v86mode, saved_esp0; +}; + +#define INIT_THREAD { 0 } + +#define INIT_TSS { \ + 0,0, /* back_link, __blh */ \ + sizeof(init_stack) + (long) &init_stack, /* esp0 */ \ + __KERNEL_DS, 0, /* ss0 */ \ + 0,0,0,0,0,0, /* stack1, stack2 */ \ + 0, /* cr3 */ \ + 0,0, /* eip,eflags */ \ + 0,0,0,0, /* eax,ecx,edx,ebx */ \ + 0,0,0,0, /* esp,ebp,esi,edi */ \ + 0,0,0,0,0,0, /* es,cs,ss */ \ + 0,0,0,0,0,0, /* ds,fs,gs */ \ + __LDT(0),0, /* ldt */ \ + 0, INVALID_IO_BITMAP_OFFSET, /* tace, bitmap */ \ + {~0, } /* ioperm */ \ +} + +#define start_thread(regs, new_eip, new_esp) do { \ + __asm__("movl %0,%%fs ; movl %0,%%gs": :"r" (0)); \ + set_fs(USER_DS); \ + regs->xds = __USER_DS; \ + regs->xes = __USER_DS; \ + regs->xss = __USER_DS; \ + regs->xcs = __USER_CS; \ + regs->eip = new_eip; \ + regs->esp = new_esp; \ +} while (0) + +/* Forward declaration, a strange C thing */ +struct task_struct; +struct mm_struct; + +/* Free all resources held by a thread. */ +extern void release_thread(struct task_struct *); +/* + * create a kernel thread without removing it from tasklists + */ +extern int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags); + +/* Copy and release all segment info associated with a VM */ +extern void copy_segments(struct task_struct *p, struct mm_struct * mm); +extern void release_segments(struct mm_struct * mm); + +/* + * Return saved PC of a blocked thread. + */ +static inline unsigned long thread_saved_pc(struct thread_struct *t) +{ + return ((unsigned long *)t->esp)[3]; +} + +unsigned long get_wchan(struct task_struct *p); +#define KSTK_EIP(tsk) (((unsigned long *)(4096+(unsigned long)(tsk)))[1019]) +#define KSTK_ESP(tsk) (((unsigned long *)(4096+(unsigned long)(tsk)))[1022]) + +#define THREAD_SIZE (2*PAGE_SIZE) +#define alloc_task_struct() ((struct task_struct *) __get_free_pages(GFP_KERNEL,1)) +#define free_task_struct(p) free_pages((unsigned long) (p), 1) +#define get_task_struct(tsk) atomic_inc(&virt_to_page(tsk)->count) + +#define init_task (init_task_union.task) +#define init_stack (init_task_union.stack) + +struct microcode { + unsigned int hdrver; + unsigned int rev; + unsigned int date; + unsigned int sig; + unsigned int cksum; + unsigned int ldrver; + unsigned int pf; + unsigned int reserved[5]; + unsigned int bits[500]; +}; + +/* '6' because it used to be for P6 only (but now covers Pentium 4 as well) */ +#define MICROCODE_IOCFREE _IO('6',0) + +/* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */ +static inline void rep_nop(void) +{ + __asm__ __volatile__("rep;nop"); +} + +#define cpu_relax() rep_nop() + +/* Prefetch instructions for Pentium III and AMD Athlon */ +#ifdef CONFIG_MPENTIUMIII + +#define ARCH_HAS_PREFETCH +extern inline void prefetch(const void *x) +{ + __asm__ __volatile__ ("prefetchnta (%0)" : : "r"(x)); +} + +#elif CONFIG_X86_USE_3DNOW + +#define ARCH_HAS_PREFETCH +#define ARCH_HAS_PREFETCHW +#define ARCH_HAS_SPINLOCK_PREFETCH + +extern inline void prefetch(const void *x) +{ + __asm__ __volatile__ ("prefetch (%0)" : : "r"(x)); +} + +extern inline void prefetchw(const void *x) +{ + __asm__ __volatile__ ("prefetchw (%0)" : : "r"(x)); +} +#define spin_lock_prefetch(x) prefetchw(x) + +#endif + +#define TF_MASK 0x100 + +#endif /* __ASM_I386_PROCESSOR_H */ diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/ptrace.h b/xenolinux-2.4.16-sparse/include/asm-xeno/ptrace.h new file mode 100644 index 0000000000..f6d95dcf62 --- /dev/null +++ b/xenolinux-2.4.16-sparse/include/asm-xeno/ptrace.h @@ -0,0 +1,86 @@ +#ifndef _I386_PTRACE_H +#define _I386_PTRACE_H + +#define EBX 0 +#define ECX 1 +#define EDX 2 +#define ESI 3 +#define EDI 4 +#define EBP 5 +#define EAX 6 +#define DS 7 +#define ES 8 +#define FS 9 +#define GS 10 +#define ORIG_EAX 11 +#define EIP 12 +#define CS 13 +#define EFL 14 +#define UESP 15 +#define SS 16 +#define FRAME_SIZE 17 + +/* this struct defines the way the registers are stored on the + stack during a system call. */ + +struct pt_regs { + long ebx; + long ecx; + long edx; + long esi; + long edi; + long ebp; + long eax; + int xds; + int xes; + long orig_eax; + long eip; + int xcs; + long eflags; + long esp; + int xss; +}; + +/* Arbitrarily choose the same ptrace numbers as used by the Sparc code. */ +#define PTRACE_GETREGS 12 +#define PTRACE_SETREGS 13 +#define PTRACE_GETFPREGS 14 +#define PTRACE_SETFPREGS 15 +#define PTRACE_GETFPXREGS 18 +#define PTRACE_SETFPXREGS 19 + +#define PTRACE_SETOPTIONS 21 + +/* options set using PTRACE_SETOPTIONS */ +#define PTRACE_O_TRACESYSGOOD 0x00000001 + +enum EFLAGS { + EF_CF = 0x00000001, + EF_PF = 0x00000004, + EF_AF = 0x00000010, + EF_ZF = 0x00000040, + EF_SF = 0x00000080, + EF_TF = 0x00000100, + EF_IE = 0x00000200, + EF_DF = 0x00000400, + EF_OF = 0x00000800, + EF_IOPL = 0x00003000, + EF_IOPL_RING0 = 0x00000000, + EF_IOPL_RING1 = 0x00001000, + EF_IOPL_RING2 = 0x00002000, + EF_NT = 0x00004000, /* nested task */ + EF_RF = 0x00010000, /* resume */ + EF_VM = 0x00020000, /* virtual mode */ + EF_AC = 0x00040000, /* alignment */ + EF_VIF = 0x00080000, /* virtual interrupt */ + EF_VIP = 0x00100000, /* virtual interrupt pending */ + EF_ID = 0x00200000, /* id */ +}; + +#ifdef __KERNEL__ +#define user_mode(regs) ((regs) && (3 & (regs)->xcs)) +#define instruction_pointer(regs) ((regs) ? (regs)->eip : NULL) +extern void show_regs(struct pt_regs *); +#endif + +#endif diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/resource.h b/xenolinux-2.4.16-sparse/include/asm-xeno/resource.h new file mode 100644 index 0000000000..e0da3ad1d0 --- /dev/null +++ b/xenolinux-2.4.16-sparse/include/asm-xeno/resource.h @@ -0,0 +1,47 @@ +#ifndef _I386_RESOURCE_H +#define _I386_RESOURCE_H + +/* + * Resource limits + */ + +#define RLIMIT_CPU 0 /* CPU time in ms */ +#define RLIMIT_FSIZE 1 /* Maximum filesize */ +#define RLIMIT_DATA 2 /* max data size */ +#define RLIMIT_STACK 3 /* max stack size */ +#define RLIMIT_CORE 4 /* max core file size */ +#define RLIMIT_RSS 5 /* max resident set size */ +#define RLIMIT_NPROC 6 /* max number of processes */ +#define RLIMIT_NOFILE 7 /* max number of open files */ +#define RLIMIT_MEMLOCK 8 /* max locked-in-memory address space */ +#define RLIMIT_AS 9 /* address space limit */ +#define RLIMIT_LOCKS 10 /* maximum file locks held */ + +#define RLIM_NLIMITS 11 + +/* + * SuS says limits have to be unsigned. + * Which makes a ton more sense anyway. + */ +#define RLIM_INFINITY (~0UL) + +#ifdef __KERNEL__ + +#define INIT_RLIMITS \ +{ \ + { RLIM_INFINITY, RLIM_INFINITY }, \ + { RLIM_INFINITY, RLIM_INFINITY }, \ + { RLIM_INFINITY, RLIM_INFINITY }, \ + { _STK_LIM, RLIM_INFINITY }, \ + { 0, RLIM_INFINITY }, \ + { RLIM_INFINITY, RLIM_INFINITY }, \ + { 0, 0 }, \ + { INR_OPEN, INR_OPEN }, \ + { RLIM_INFINITY, RLIM_INFINITY }, \ + { RLIM_INFINITY, RLIM_INFINITY }, \ + { RLIM_INFINITY, RLIM_INFINITY }, \ +} + +#endif /* __KERNEL__ */ + +#endif diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/rwlock.h b/xenolinux-2.4.16-sparse/include/asm-xeno/rwlock.h new file mode 100644 index 0000000000..9475419f95 --- /dev/null +++ b/xenolinux-2.4.16-sparse/include/asm-xeno/rwlock.h @@ -0,0 +1,83 @@ +/* include/asm-i386/rwlock.h + * + * Helpers used by both rw spinlocks and rw semaphores. + * + * Based in part on code from semaphore.h and + * spinlock.h Copyright 1996 Linus Torvalds. + * + * Copyright 1999 Red Hat, Inc. + * + * Written by Benjamin LaHaise. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#ifndef _ASM_I386_RWLOCK_H +#define _ASM_I386_RWLOCK_H + +#define RW_LOCK_BIAS 0x01000000 +#define RW_LOCK_BIAS_STR "0x01000000" + +#define __build_read_lock_ptr(rw, helper) \ + asm volatile(LOCK "subl $1,(%0)\n\t" \ + "js 2f\n" \ + "1:\n" \ + ".section .text.lock,\"ax\"\n" \ + "2:\tcall " helper "\n\t" \ + "jmp 1b\n" \ + ".previous" \ + ::"a" (rw) : "memory") + +#define __build_read_lock_const(rw, helper) \ + asm volatile(LOCK "subl $1,%0\n\t" \ + "js 2f\n" \ + "1:\n" \ + ".section .text.lock,\"ax\"\n" \ + "2:\tpushl %%eax\n\t" \ + "leal %0,%%eax\n\t" \ + "call " helper "\n\t" \ + "popl %%eax\n\t" \ + "jmp 1b\n" \ + ".previous" \ + :"=m" (*(volatile int *)rw) : : "memory") + +#define __build_read_lock(rw, helper) do { \ + if (__builtin_constant_p(rw)) \ + __build_read_lock_const(rw, helper); \ + else \ + __build_read_lock_ptr(rw, helper); \ + } while (0) + +#define __build_write_lock_ptr(rw, helper) \ + asm volatile(LOCK "subl $" RW_LOCK_BIAS_STR ",(%0)\n\t" \ + "jnz 2f\n" \ + "1:\n" \ + ".section .text.lock,\"ax\"\n" \ + "2:\tcall " helper "\n\t" \ + "jmp 1b\n" \ + ".previous" \ + ::"a" (rw) : "memory") + +#define __build_write_lock_const(rw, helper) \ + asm volatile(LOCK "subl $" RW_LOCK_BIAS_STR ",(%0)\n\t" \ + "jnz 2f\n" \ + "1:\n" \ + ".section .text.lock,\"ax\"\n" \ + "2:\tpushl %%eax\n\t" \ + "leal %0,%%eax\n\t" \ + "call " helper "\n\t" \ + "popl %%eax\n\t" \ + "jmp 1b\n" \ + ".previous" \ + :"=m" (*(volatile int *)rw) : : "memory") + +#define __build_write_lock(rw, helper) do { \ + if (__builtin_constant_p(rw)) \ + __build_write_lock_const(rw, helper); \ + else \ + __build_write_lock_ptr(rw, helper); \ + } while (0) + +#endif diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/rwsem.h b/xenolinux-2.4.16-sparse/include/asm-xeno/rwsem.h new file mode 100644 index 0000000000..0d416f7aef --- /dev/null +++ b/xenolinux-2.4.16-sparse/include/asm-xeno/rwsem.h @@ -0,0 +1,226 @@ +/* rwsem.h: R/W semaphores implemented using XADD/CMPXCHG for i486+ + * + * Written by David Howells (dhowells@redhat.com). + * + * Derived from asm-i386/semaphore.h + * + * + * The MSW of the count is the negated number of active writers and waiting + * lockers, and the LSW is the total number of active locks + * + * The lock count is initialized to 0 (no active and no waiting lockers). + * + * When a writer subtracts WRITE_BIAS, it'll get 0xffff0001 for the case of an + * uncontended lock. This can be determined because XADD returns the old value. + * Readers increment by 1 and see a positive value when uncontended, negative + * if there are writers (and maybe) readers waiting (in which case it goes to + * sleep). + * + * The value of WAITING_BIAS supports up to 32766 waiting processes. This can + * be extended to 65534 by manually checking the whole MSW rather than relying + * on the S flag. + * + * The value of ACTIVE_BIAS supports up to 65535 active processes. + * + * This should be totally fair - if anything is waiting, a process that wants a + * lock will go to the back of the queue. When the currently active lock is + * released, if there's a writer at the front of the queue, then that and only + * that will be woken up; if there's a bunch of consequtive readers at the + * front, then they'll all be woken up, but no other readers will be. + */ + +#ifndef _I386_RWSEM_H +#define _I386_RWSEM_H + +#ifndef _LINUX_RWSEM_H +#error please dont include asm/rwsem.h directly, use linux/rwsem.h instead +#endif + +#ifdef __KERNEL__ + +#include +#include + +struct rwsem_waiter; + +extern struct rw_semaphore *FASTCALL(rwsem_down_read_failed(struct rw_semaphore *sem)); +extern struct rw_semaphore *FASTCALL(rwsem_down_write_failed(struct rw_semaphore *sem)); +extern struct rw_semaphore *FASTCALL(rwsem_wake(struct rw_semaphore *)); + +/* + * the semaphore definition + */ +struct rw_semaphore { + signed long count; +#define RWSEM_UNLOCKED_VALUE 0x00000000 +#define RWSEM_ACTIVE_BIAS 0x00000001 +#define RWSEM_ACTIVE_MASK 0x0000ffff +#define RWSEM_WAITING_BIAS (-0x00010000) +#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS +#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS) + spinlock_t wait_lock; + struct list_head wait_list; +#if RWSEM_DEBUG + int debug; +#endif +}; + +/* + * initialisation + */ +#if RWSEM_DEBUG +#define __RWSEM_DEBUG_INIT , 0 +#else +#define __RWSEM_DEBUG_INIT /* */ +#endif + +#define __RWSEM_INITIALIZER(name) \ +{ RWSEM_UNLOCKED_VALUE, SPIN_LOCK_UNLOCKED, LIST_HEAD_INIT((name).wait_list) \ + __RWSEM_DEBUG_INIT } + +#define DECLARE_RWSEM(name) \ + struct rw_semaphore name = __RWSEM_INITIALIZER(name) + +static inline void init_rwsem(struct rw_semaphore *sem) +{ + sem->count = RWSEM_UNLOCKED_VALUE; + spin_lock_init(&sem->wait_lock); + INIT_LIST_HEAD(&sem->wait_list); +#if RWSEM_DEBUG + sem->debug = 0; +#endif +} + +/* + * lock for reading + */ +static inline void __down_read(struct rw_semaphore *sem) +{ + __asm__ __volatile__( + "# beginning down_read\n\t" +LOCK_PREFIX " incl (%%eax)\n\t" /* adds 0x00000001, returns the old value */ + " js 2f\n\t" /* jump if we weren't granted the lock */ + "1:\n\t" + ".section .text.lock,\"ax\"\n" + "2:\n\t" + " pushl %%ecx\n\t" + " pushl %%edx\n\t" + " call rwsem_down_read_failed\n\t" + " popl %%edx\n\t" + " popl %%ecx\n\t" + " jmp 1b\n" + ".previous" + "# ending down_read\n\t" + : "+m"(sem->count) + : "a"(sem) + : "memory", "cc"); +} + +/* + * lock for writing + */ +static inline void __down_write(struct rw_semaphore *sem) +{ + int tmp; + + tmp = RWSEM_ACTIVE_WRITE_BIAS; + __asm__ __volatile__( + "# beginning down_write\n\t" +LOCK_PREFIX " xadd %0,(%%eax)\n\t" /* subtract 0x0000ffff, returns the old value */ + " testl %0,%0\n\t" /* was the count 0 before? */ + " jnz 2f\n\t" /* jump if we weren't granted the lock */ + "1:\n\t" + ".section .text.lock,\"ax\"\n" + "2:\n\t" + " pushl %%ecx\n\t" + " call rwsem_down_write_failed\n\t" + " popl %%ecx\n\t" + " jmp 1b\n" + ".previous\n" + "# ending down_write" + : "+d"(tmp), "+m"(sem->count) + : "a"(sem) + : "memory", "cc"); +} + +/* + * unlock after reading + */ +static inline void __up_read(struct rw_semaphore *sem) +{ + __s32 tmp = -RWSEM_ACTIVE_READ_BIAS; + __asm__ __volatile__( + "# beginning __up_read\n\t" +LOCK_PREFIX " xadd %%edx,(%%eax)\n\t" /* subtracts 1, returns the old value */ + " js 2f\n\t" /* jump if the lock is being waited upon */ + "1:\n\t" + ".section .text.lock,\"ax\"\n" + "2:\n\t" + " decw %%dx\n\t" /* do nothing if still outstanding active readers */ + " jnz 1b\n\t" + " pushl %%ecx\n\t" + " call rwsem_wake\n\t" + " popl %%ecx\n\t" + " jmp 1b\n" + ".previous\n" + "# ending __up_read\n" + : "+m"(sem->count), "+d"(tmp) + : "a"(sem) + : "memory", "cc"); +} + +/* + * unlock after writing + */ +static inline void __up_write(struct rw_semaphore *sem) +{ + __asm__ __volatile__( + "# beginning __up_write\n\t" + " movl %2,%%edx\n\t" +LOCK_PREFIX " xaddl %%edx,(%%eax)\n\t" /* tries to transition 0xffff0001 -> 0x00000000 */ + " jnz 2f\n\t" /* jump if the lock is being waited upon */ + "1:\n\t" + ".section .text.lock,\"ax\"\n" + "2:\n\t" + " decw %%dx\n\t" /* did the active count reduce to 0? */ + " jnz 1b\n\t" /* jump back if not */ + " pushl %%ecx\n\t" + " call rwsem_wake\n\t" + " popl %%ecx\n\t" + " jmp 1b\n" + ".previous\n" + "# ending __up_write\n" + : "+m"(sem->count) + : "a"(sem), "i"(-RWSEM_ACTIVE_WRITE_BIAS) + : "memory", "cc", "edx"); +} + +/* + * implement atomic add functionality + */ +static inline void rwsem_atomic_add(int delta, struct rw_semaphore *sem) +{ + __asm__ __volatile__( +LOCK_PREFIX "addl %1,%0" + :"=m"(sem->count) + :"ir"(delta), "m"(sem->count)); +} + +/* + * implement exchange and add functionality + */ +static inline int rwsem_atomic_update(int delta, struct rw_semaphore *sem) +{ + int tmp = delta; + + __asm__ __volatile__( +LOCK_PREFIX "xadd %0,(%2)" + : "+r"(tmp), "=m"(sem->count) + : "r"(sem), "m"(sem->count) + : "memory"); + + return tmp+delta; +} + +#endif /* __KERNEL__ */ +#endif /* _I386_RWSEM_H */ diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/scatterlist.h b/xenolinux-2.4.16-sparse/include/asm-xeno/scatterlist.h new file mode 100644 index 0000000000..9e2614f232 --- /dev/null +++ b/xenolinux-2.4.16-sparse/include/asm-xeno/scatterlist.h @@ -0,0 +1,16 @@ +#ifndef _I386_SCATTERLIST_H +#define _I386_SCATTERLIST_H + +struct scatterlist { + char * address; /* Location data is to be transferred to, NULL for + * highmem page */ + struct page * page; /* Location for highmem page, if any */ + unsigned int offset;/* for highmem, page offset */ + + dma_addr_t dma_address; + unsigned int length; +}; + +#define ISA_DMA_THRESHOLD (0x00ffffff) + +#endif /* !(_I386_SCATTERLIST_H) */ diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/segment.h b/xenolinux-2.4.16-sparse/include/asm-xeno/segment.h new file mode 100644 index 0000000000..5623211570 --- /dev/null +++ b/xenolinux-2.4.16-sparse/include/asm-xeno/segment.h @@ -0,0 +1,10 @@ +#ifndef _ASM_SEGMENT_H +#define _ASM_SEGMENT_H + +#define __KERNEL_CS 0x11 +#define __KERNEL_DS 0x19 + +#define __USER_CS 0x23 +#define __USER_DS 0x2B + +#endif diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/semaphore.h b/xenolinux-2.4.16-sparse/include/asm-xeno/semaphore.h new file mode 100644 index 0000000000..76c738e735 --- /dev/null +++ b/xenolinux-2.4.16-sparse/include/asm-xeno/semaphore.h @@ -0,0 +1,216 @@ +#ifndef _I386_SEMAPHORE_H +#define _I386_SEMAPHORE_H + +#include + +#ifdef __KERNEL__ + +/* + * SMP- and interrupt-safe semaphores.. + * + * (C) Copyright 1996 Linus Torvalds + * + * Modified 1996-12-23 by Dave Grothe to fix bugs in + * the original code and to make semaphore waits + * interruptible so that processes waiting on + * semaphores can be killed. + * Modified 1999-02-14 by Andrea Arcangeli, split the sched.c helper + * functions in asm/sempahore-helper.h while fixing a + * potential and subtle race discovered by Ulrich Schmid + * in down_interruptible(). Since I started to play here I + * also implemented the `trylock' semaphore operation. + * 1999-07-02 Artur Skawina + * Optimized "0(ecx)" -> "(ecx)" (the assembler does not + * do this). Changed calling sequences from push/jmp to + * traditional call/ret. + * Modified 2001-01-01 Andreas Franck + * Some hacks to ensure compatibility with recent + * GCC snapshots, to avoid stack corruption when compiling + * with -fomit-frame-pointer. It's not sure if this will + * be fixed in GCC, as our previous implementation was a + * bit dubious. + * + * If you would like to see an analysis of this implementation, please + * ftp to gcom.com and download the file + * /pub/linux/src/semaphore/semaphore-2.0.24.tar.gz. + * + */ + +#include +#include +#include +#include + +struct semaphore { + atomic_t count; + int sleepers; + wait_queue_head_t wait; +#if WAITQUEUE_DEBUG + long __magic; +#endif +}; + +#if WAITQUEUE_DEBUG +# define __SEM_DEBUG_INIT(name) \ + , (int)&(name).__magic +#else +# define __SEM_DEBUG_INIT(name) +#endif + +#define __SEMAPHORE_INITIALIZER(name,count) \ +{ ATOMIC_INIT(count), 0, __WAIT_QUEUE_HEAD_INITIALIZER((name).wait) \ + __SEM_DEBUG_INIT(name) } + +#define __MUTEX_INITIALIZER(name) \ + __SEMAPHORE_INITIALIZER(name,1) + +#define __DECLARE_SEMAPHORE_GENERIC(name,count) \ + struct semaphore name = __SEMAPHORE_INITIALIZER(name,count) + +#define DECLARE_MUTEX(name) __DECLARE_SEMAPHORE_GENERIC(name,1) +#define DECLARE_MUTEX_LOCKED(name) __DECLARE_SEMAPHORE_GENERIC(name,0) + +static inline void sema_init (struct semaphore *sem, int val) +{ +/* + * *sem = (struct semaphore)__SEMAPHORE_INITIALIZER((*sem),val); + * + * i'd rather use the more flexible initialization above, but sadly + * GCC 2.7.2.3 emits a bogus warning. EGCS doesnt. Oh well. + */ + atomic_set(&sem->count, val); + sem->sleepers = 0; + init_waitqueue_head(&sem->wait); +#if WAITQUEUE_DEBUG + sem->__magic = (int)&sem->__magic; +#endif +} + +static inline void init_MUTEX (struct semaphore *sem) +{ + sema_init(sem, 1); +} + +static inline void init_MUTEX_LOCKED (struct semaphore *sem) +{ + sema_init(sem, 0); +} + +asmlinkage void __down_failed(void /* special register calling convention */); +asmlinkage int __down_failed_interruptible(void /* params in registers */); +asmlinkage int __down_failed_trylock(void /* params in registers */); +asmlinkage void __up_wakeup(void /* special register calling convention */); + +asmlinkage void __down(struct semaphore * sem); +asmlinkage int __down_interruptible(struct semaphore * sem); +asmlinkage int __down_trylock(struct semaphore * sem); +asmlinkage void __up(struct semaphore * sem); + +/* + * This is ugly, but we want the default case to fall through. + * "__down_failed" is a special asm handler that calls the C + * routine that actually waits. See arch/i386/kernel/semaphore.c + */ +static inline void down(struct semaphore * sem) +{ +#if WAITQUEUE_DEBUG + CHECK_MAGIC(sem->__magic); +#endif + + __asm__ __volatile__( + "# atomic down operation\n\t" + LOCK "decl %0\n\t" /* --sem->count */ + "js 2f\n" + "1:\n" + ".section .text.lock,\"ax\"\n" + "2:\tcall __down_failed\n\t" + "jmp 1b\n" + ".previous" + :"=m" (sem->count) + :"c" (sem) + :"memory"); +} + +/* + * Interruptible try to acquire a semaphore. If we obtained + * it, return zero. If we were interrupted, returns -EINTR + */ +static inline int down_interruptible(struct semaphore * sem) +{ + int result; + +#if WAITQUEUE_DEBUG + CHECK_MAGIC(sem->__magic); +#endif + + __asm__ __volatile__( + "# atomic interruptible down operation\n\t" + LOCK "decl %1\n\t" /* --sem->count */ + "js 2f\n\t" + "xorl %0,%0\n" + "1:\n" + ".section .text.lock,\"ax\"\n" + "2:\tcall __down_failed_interruptible\n\t" + "jmp 1b\n" + ".previous" + :"=a" (result), "=m" (sem->count) + :"c" (sem) + :"memory"); + return result; +} + +/* + * Non-blockingly attempt to down() a semaphore. + * Returns zero if we acquired it + */ +static inline int down_trylock(struct semaphore * sem) +{ + int result; + +#if WAITQUEUE_DEBUG + CHECK_MAGIC(sem->__magic); +#endif + + __asm__ __volatile__( + "# atomic interruptible down operation\n\t" + LOCK "decl %1\n\t" /* --sem->count */ + "js 2f\n\t" + "xorl %0,%0\n" + "1:\n" + ".section .text.lock,\"ax\"\n" + "2:\tcall __down_failed_trylock\n\t" + "jmp 1b\n" + ".previous" + :"=a" (result), "=m" (sem->count) + :"c" (sem) + :"memory"); + return result; +} + +/* + * Note! This is subtle. We jump to wake people up only if + * the semaphore was negative (== somebody was waiting on it). + * The default case (no contention) will result in NO + * jumps for both down() and up(). + */ +static inline void up(struct semaphore * sem) +{ +#if WAITQUEUE_DEBUG + CHECK_MAGIC(sem->__magic); +#endif + __asm__ __volatile__( + "# atomic up operation\n\t" + LOCK "incl %0\n\t" /* ++sem->count */ + "jle 2f\n" + "1:\n" + ".section .text.lock,\"ax\"\n" + "2:\tcall __up_wakeup\n\t" + "jmp 1b\n" + ".previous" + :"=m" (sem->count) + :"c" (sem) + :"memory"); +} + +#endif +#endif diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/sembuf.h b/xenolinux-2.4.16-sparse/include/asm-xeno/sembuf.h new file mode 100644 index 0000000000..323835166c --- /dev/null +++ b/xenolinux-2.4.16-sparse/include/asm-xeno/sembuf.h @@ -0,0 +1,25 @@ +#ifndef _I386_SEMBUF_H +#define _I386_SEMBUF_H + +/* + * The semid64_ds structure for i386 architecture. + * Note extra padding because this structure is passed back and forth + * between kernel and user space. + * + * Pad space is left for: + * - 64-bit time_t to solve y2038 problem + * - 2 miscellaneous 32-bit values + */ + +struct semid64_ds { + struct ipc64_perm sem_perm; /* permissions .. see ipc.h */ + __kernel_time_t sem_otime; /* last semop time */ + unsigned long __unused1; + __kernel_time_t sem_ctime; /* last change time */ + unsigned long __unused2; + unsigned long sem_nsems; /* no. of semaphores in array */ + unsigned long __unused3; + unsigned long __unused4; +}; + +#endif /* _I386_SEMBUF_H */ diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/serial.h b/xenolinux-2.4.16-sparse/include/asm-xeno/serial.h new file mode 100644 index 0000000000..ecc8278676 --- /dev/null +++ b/xenolinux-2.4.16-sparse/include/asm-xeno/serial.h @@ -0,0 +1,133 @@ +/* + * include/asm-i386/serial.h + */ + +#include + +/* + * This assumes you have a 1.8432 MHz clock for your UART. + * + * It'd be nice if someone built a serial card with a 24.576 MHz + * clock, since the 16550A is capable of handling a top speed of 1.5 + * megabits/second; but this requires the faster clock. + */ +#define BASE_BAUD ( 1843200 / 16 ) + +/* Standard COM flags (except for COM4, because of the 8514 problem) */ +#ifdef CONFIG_SERIAL_DETECT_IRQ +#define STD_COM_FLAGS (ASYNC_BOOT_AUTOCONF | ASYNC_SKIP_TEST | ASYNC_AUTO_IRQ) +#define STD_COM4_FLAGS (ASYNC_BOOT_AUTOCONF | ASYNC_AUTO_IRQ) +#else +#define STD_COM_FLAGS (ASYNC_BOOT_AUTOCONF | ASYNC_SKIP_TEST) +#define STD_COM4_FLAGS ASYNC_BOOT_AUTOCONF +#endif + +#ifdef CONFIG_SERIAL_MANY_PORTS +#define FOURPORT_FLAGS ASYNC_FOURPORT +#define ACCENT_FLAGS 0 +#define BOCA_FLAGS 0 +#define HUB6_FLAGS 0 +#define RS_TABLE_SIZE 64 +#else +#define RS_TABLE_SIZE +#endif + +#define MCA_COM_FLAGS (STD_COM_FLAGS|ASYNC_BOOT_ONLYMCA) + +/* + * The following define the access methods for the HUB6 card. All + * access is through two ports for all 24 possible chips. The card is + * selected through the high 2 bits, the port on that card with the + * "middle" 3 bits, and the register on that port with the bottom + * 3 bits. + * + * While the access port and interrupt is configurable, the default + * port locations are 0x302 for the port control register, and 0x303 + * for the data read/write register. Normally, the interrupt is at irq3 + * but can be anything from 3 to 7 inclusive. Note that using 3 will + * require disabling com2. + */ + +#define C_P(card,port) (((card)<<6|(port)<<3) + 1) + +#define STD_SERIAL_PORT_DEFNS \ + /* UART CLK PORT IRQ FLAGS */ \ + { 0, BASE_BAUD, 0x3F8, 4, STD_COM_FLAGS }, /* ttyS0 */ \ + { 0, BASE_BAUD, 0x2F8, 3, STD_COM_FLAGS }, /* ttyS1 */ \ + { 0, BASE_BAUD, 0x3E8, 4, STD_COM_FLAGS }, /* ttyS2 */ \ + { 0, BASE_BAUD, 0x2E8, 3, STD_COM4_FLAGS }, /* ttyS3 */ + + +#ifdef CONFIG_SERIAL_MANY_PORTS +#define EXTRA_SERIAL_PORT_DEFNS \ + { 0, BASE_BAUD, 0x1A0, 9, FOURPORT_FLAGS }, /* ttyS4 */ \ + { 0, BASE_BAUD, 0x1A8, 9, FOURPORT_FLAGS }, /* ttyS5 */ \ + { 0, BASE_BAUD, 0x1B0, 9, FOURPORT_FLAGS }, /* ttyS6 */ \ + { 0, BASE_BAUD, 0x1B8, 9, FOURPORT_FLAGS }, /* ttyS7 */ \ + { 0, BASE_BAUD, 0x2A0, 5, FOURPORT_FLAGS }, /* ttyS8 */ \ + { 0, BASE_BAUD, 0x2A8, 5, FOURPORT_FLAGS }, /* ttyS9 */ \ + { 0, BASE_BAUD, 0x2B0, 5, FOURPORT_FLAGS }, /* ttyS10 */ \ + { 0, BASE_BAUD, 0x2B8, 5, FOURPORT_FLAGS }, /* ttyS11 */ \ + { 0, BASE_BAUD, 0x330, 4, ACCENT_FLAGS }, /* ttyS12 */ \ + { 0, BASE_BAUD, 0x338, 4, ACCENT_FLAGS }, /* ttyS13 */ \ + { 0, BASE_BAUD, 0x000, 0, 0 }, /* ttyS14 (spare) */ \ + { 0, BASE_BAUD, 0x000, 0, 0 }, /* ttyS15 (spare) */ \ + { 0, BASE_BAUD, 0x100, 12, BOCA_FLAGS }, /* ttyS16 */ \ + { 0, BASE_BAUD, 0x108, 12, BOCA_FLAGS }, /* ttyS17 */ \ + { 0, BASE_BAUD, 0x110, 12, BOCA_FLAGS }, /* ttyS18 */ \ + { 0, BASE_BAUD, 0x118, 12, BOCA_FLAGS }, /* ttyS19 */ \ + { 0, BASE_BAUD, 0x120, 12, BOCA_FLAGS }, /* ttyS20 */ \ + { 0, BASE_BAUD, 0x128, 12, BOCA_FLAGS }, /* ttyS21 */ \ + { 0, BASE_BAUD, 0x130, 12, BOCA_FLAGS }, /* ttyS22 */ \ + { 0, BASE_BAUD, 0x138, 12, BOCA_FLAGS }, /* ttyS23 */ \ + { 0, BASE_BAUD, 0x140, 12, BOCA_FLAGS }, /* ttyS24 */ \ + { 0, BASE_BAUD, 0x148, 12, BOCA_FLAGS }, /* ttyS25 */ \ + { 0, BASE_BAUD, 0x150, 12, BOCA_FLAGS }, /* ttyS26 */ \ + { 0, BASE_BAUD, 0x158, 12, BOCA_FLAGS }, /* ttyS27 */ \ + { 0, BASE_BAUD, 0x160, 12, BOCA_FLAGS }, /* ttyS28 */ \ + { 0, BASE_BAUD, 0x168, 12, BOCA_FLAGS }, /* ttyS29 */ \ + { 0, BASE_BAUD, 0x170, 12, BOCA_FLAGS }, /* ttyS30 */ \ + { 0, BASE_BAUD, 0x178, 12, BOCA_FLAGS }, /* ttyS31 */ +#else +#define EXTRA_SERIAL_PORT_DEFNS +#endif + +/* You can have up to four HUB6's in the system, but I've only + * included two cards here for a total of twelve ports. + */ +#if (defined(CONFIG_HUB6) && defined(CONFIG_SERIAL_MANY_PORTS)) +#define HUB6_SERIAL_PORT_DFNS \ + { 0, BASE_BAUD, 0x302, 3, HUB6_FLAGS, C_P(0,0) }, /* ttyS32 */ \ + { 0, BASE_BAUD, 0x302, 3, HUB6_FLAGS, C_P(0,1) }, /* ttyS33 */ \ + { 0, BASE_BAUD, 0x302, 3, HUB6_FLAGS, C_P(0,2) }, /* ttyS34 */ \ + { 0, BASE_BAUD, 0x302, 3, HUB6_FLAGS, C_P(0,3) }, /* ttyS35 */ \ + { 0, BASE_BAUD, 0x302, 3, HUB6_FLAGS, C_P(0,4) }, /* ttyS36 */ \ + { 0, BASE_BAUD, 0x302, 3, HUB6_FLAGS, C_P(0,5) }, /* ttyS37 */ \ + { 0, BASE_BAUD, 0x302, 3, HUB6_FLAGS, C_P(1,0) }, /* ttyS38 */ \ + { 0, BASE_BAUD, 0x302, 3, HUB6_FLAGS, C_P(1,1) }, /* ttyS39 */ \ + { 0, BASE_BAUD, 0x302, 3, HUB6_FLAGS, C_P(1,2) }, /* ttyS40 */ \ + { 0, BASE_BAUD, 0x302, 3, HUB6_FLAGS, C_P(1,3) }, /* ttyS41 */ \ + { 0, BASE_BAUD, 0x302, 3, HUB6_FLAGS, C_P(1,4) }, /* ttyS42 */ \ + { 0, BASE_BAUD, 0x302, 3, HUB6_FLAGS, C_P(1,5) }, /* ttyS43 */ +#else +#define HUB6_SERIAL_PORT_DFNS +#endif + +#ifdef CONFIG_MCA +#define MCA_SERIAL_PORT_DFNS \ + { 0, BASE_BAUD, 0x3220, 3, MCA_COM_FLAGS }, \ + { 0, BASE_BAUD, 0x3228, 3, MCA_COM_FLAGS }, \ + { 0, BASE_BAUD, 0x4220, 3, MCA_COM_FLAGS }, \ + { 0, BASE_BAUD, 0x4228, 3, MCA_COM_FLAGS }, \ + { 0, BASE_BAUD, 0x5220, 3, MCA_COM_FLAGS }, \ + { 0, BASE_BAUD, 0x5228, 3, MCA_COM_FLAGS }, +#else +#define MCA_SERIAL_PORT_DFNS +#endif + +#define SERIAL_PORT_DFNS \ + STD_SERIAL_PORT_DEFNS \ + EXTRA_SERIAL_PORT_DEFNS \ + HUB6_SERIAL_PORT_DFNS \ + MCA_SERIAL_PORT_DFNS + diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/setup.h b/xenolinux-2.4.16-sparse/include/asm-xeno/setup.h new file mode 100644 index 0000000000..ae25cc4275 --- /dev/null +++ b/xenolinux-2.4.16-sparse/include/asm-xeno/setup.h @@ -0,0 +1,10 @@ +/* + * Just a place holder. We don't want to have to test x86 before + * we include stuff + */ + +#ifndef _i386_SETUP_H +#define _i386_SETUP_H + + +#endif /* _i386_SETUP_H */ diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/shmbuf.h b/xenolinux-2.4.16-sparse/include/asm-xeno/shmbuf.h new file mode 100644 index 0000000000..d1cdc3cb07 --- /dev/null +++ b/xenolinux-2.4.16-sparse/include/asm-xeno/shmbuf.h @@ -0,0 +1,42 @@ +#ifndef _I386_SHMBUF_H +#define _I386_SHMBUF_H + +/* + * The shmid64_ds structure for i386 architecture. + * Note extra padding because this structure is passed back and forth + * between kernel and user space. + * + * Pad space is left for: + * - 64-bit time_t to solve y2038 problem + * - 2 miscellaneous 32-bit values + */ + +struct shmid64_ds { + struct ipc64_perm shm_perm; /* operation perms */ + size_t shm_segsz; /* size of segment (bytes) */ + __kernel_time_t shm_atime; /* last attach time */ + unsigned long __unused1; + __kernel_time_t shm_dtime; /* last detach time */ + unsigned long __unused2; + __kernel_time_t shm_ctime; /* last change time */ + unsigned long __unused3; + __kernel_pid_t shm_cpid; /* pid of creator */ + __kernel_pid_t shm_lpid; /* pid of last operator */ + unsigned long shm_nattch; /* no. of current attaches */ + unsigned long __unused4; + unsigned long __unused5; +}; + +struct shminfo64 { + unsigned long shmmax; + unsigned long shmmin; + unsigned long shmmni; + unsigned long shmseg; + unsigned long shmall; + unsigned long __unused1; + unsigned long __unused2; + unsigned long __unused3; + unsigned long __unused4; +}; + +#endif /* _I386_SHMBUF_H */ diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/shmparam.h b/xenolinux-2.4.16-sparse/include/asm-xeno/shmparam.h new file mode 100644 index 0000000000..786243a5b3 --- /dev/null +++ b/xenolinux-2.4.16-sparse/include/asm-xeno/shmparam.h @@ -0,0 +1,6 @@ +#ifndef _ASMI386_SHMPARAM_H +#define _ASMI386_SHMPARAM_H + +#define SHMLBA PAGE_SIZE /* attach addr a multiple of this */ + +#endif /* _ASMI386_SHMPARAM_H */ diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/sigcontext.h b/xenolinux-2.4.16-sparse/include/asm-xeno/sigcontext.h new file mode 100644 index 0000000000..b51145936a --- /dev/null +++ b/xenolinux-2.4.16-sparse/include/asm-xeno/sigcontext.h @@ -0,0 +1,83 @@ +#ifndef _ASMi386_SIGCONTEXT_H +#define _ASMi386_SIGCONTEXT_H + +/* + * As documented in the iBCS2 standard.. + * + * The first part of "struct _fpstate" is just the normal i387 + * hardware setup, the extra "status" word is used to save the + * coprocessor status word before entering the handler. + * + * Pentium III FXSR, SSE support + * Gareth Hughes , May 2000 + * + * The FPU state data structure has had to grow to accomodate the + * extended FPU state required by the Streaming SIMD Extensions. + * There is no documented standard to accomplish this at the moment. + */ +struct _fpreg { + unsigned short significand[4]; + unsigned short exponent; +}; + +struct _fpxreg { + unsigned short significand[4]; + unsigned short exponent; + unsigned short padding[3]; +}; + +struct _xmmreg { + unsigned long element[4]; +}; + +struct _fpstate { + /* Regular FPU environment */ + unsigned long cw; + unsigned long sw; + unsigned long tag; + unsigned long ipoff; + unsigned long cssel; + unsigned long dataoff; + unsigned long datasel; + struct _fpreg _st[8]; + unsigned short status; + unsigned short magic; /* 0xffff = regular FPU data only */ + + /* FXSR FPU environment */ + unsigned long _fxsr_env[6]; /* FXSR FPU env is ignored */ + unsigned long mxcsr; + unsigned long reserved; + struct _fpxreg _fxsr_st[8]; /* FXSR FPU reg data is ignored */ + struct _xmmreg _xmm[8]; + unsigned long padding[56]; +}; + +#define X86_FXSR_MAGIC 0x0000 + +struct sigcontext { + unsigned short gs, __gsh; + unsigned short fs, __fsh; + unsigned short es, __esh; + unsigned short ds, __dsh; + unsigned long edi; + unsigned long esi; + unsigned long ebp; + unsigned long esp; + unsigned long ebx; + unsigned long edx; + unsigned long ecx; + unsigned long eax; + unsigned long trapno; + unsigned long err; + unsigned long eip; + unsigned short cs, __csh; + unsigned long eflags; + unsigned long esp_at_signal; + unsigned short ss, __ssh; + struct _fpstate * fpstate; + unsigned long oldmask; + unsigned long cr2; +}; + + +#endif diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/siginfo.h b/xenolinux-2.4.16-sparse/include/asm-xeno/siginfo.h new file mode 100644 index 0000000000..9abf5427a9 --- /dev/null +++ b/xenolinux-2.4.16-sparse/include/asm-xeno/siginfo.h @@ -0,0 +1,232 @@ +#ifndef _I386_SIGINFO_H +#define _I386_SIGINFO_H + +#include + +/* XXX: This structure was copied from the Alpha; is there an iBCS version? */ + +typedef union sigval { + int sival_int; + void *sival_ptr; +} sigval_t; + +#define SI_MAX_SIZE 128 +#define SI_PAD_SIZE ((SI_MAX_SIZE/sizeof(int)) - 3) + +typedef struct siginfo { + int si_signo; + int si_errno; + int si_code; + + union { + int _pad[SI_PAD_SIZE]; + + /* kill() */ + struct { + pid_t _pid; /* sender's pid */ + uid_t _uid; /* sender's uid */ + } _kill; + + /* POSIX.1b timers */ + struct { + unsigned int _timer1; + unsigned int _timer2; + } _timer; + + /* POSIX.1b signals */ + struct { + pid_t _pid; /* sender's pid */ + uid_t _uid; /* sender's uid */ + sigval_t _sigval; + } _rt; + + /* SIGCHLD */ + struct { + pid_t _pid; /* which child */ + uid_t _uid; /* sender's uid */ + int _status; /* exit code */ + clock_t _utime; + clock_t _stime; + } _sigchld; + + /* SIGILL, SIGFPE, SIGSEGV, SIGBUS */ + struct { + void *_addr; /* faulting insn/memory ref. */ + } _sigfault; + + /* SIGPOLL */ + struct { + int _band; /* POLL_IN, POLL_OUT, POLL_MSG */ + int _fd; + } _sigpoll; + } _sifields; +} siginfo_t; + +/* + * How these fields are to be accessed. + */ +#define si_pid _sifields._kill._pid +#define si_uid _sifields._kill._uid +#define si_status _sifields._sigchld._status +#define si_utime _sifields._sigchld._utime +#define si_stime _sifields._sigchld._stime +#define si_value _sifields._rt._sigval +#define si_int _sifields._rt._sigval.sival_int +#define si_ptr _sifields._rt._sigval.sival_ptr +#define si_addr _sifields._sigfault._addr +#define si_band _sifields._sigpoll._band +#define si_fd _sifields._sigpoll._fd + +#ifdef __KERNEL__ +#define __SI_MASK 0xffff0000 +#define __SI_KILL (0 << 16) +#define __SI_TIMER (1 << 16) +#define __SI_POLL (2 << 16) +#define __SI_FAULT (3 << 16) +#define __SI_CHLD (4 << 16) +#define __SI_RT (5 << 16) +#define __SI_CODE(T,N) ((T) << 16 | ((N) & 0xffff)) +#else +#define __SI_KILL 0 +#define __SI_TIMER 0 +#define __SI_POLL 0 +#define __SI_FAULT 0 +#define __SI_CHLD 0 +#define __SI_RT 0 +#define __SI_CODE(T,N) (N) +#endif + +/* + * si_code values + * Digital reserves positive values for kernel-generated signals. + */ +#define SI_USER 0 /* sent by kill, sigsend, raise */ +#define SI_KERNEL 0x80 /* sent by the kernel from somewhere */ +#define SI_QUEUE -1 /* sent by sigqueue */ +#define SI_TIMER __SI_CODE(__SI_TIMER,-2) /* sent by timer expiration */ +#define SI_MESGQ -3 /* sent by real time mesq state change */ +#define SI_ASYNCIO -4 /* sent by AIO completion */ +#define SI_SIGIO -5 /* sent by queued SIGIO */ + +#define SI_FROMUSER(siptr) ((siptr)->si_code <= 0) +#define SI_FROMKERNEL(siptr) ((siptr)->si_code > 0) + +/* + * SIGILL si_codes + */ +#define ILL_ILLOPC (__SI_FAULT|1) /* illegal opcode */ +#define ILL_ILLOPN (__SI_FAULT|2) /* illegal operand */ +#define ILL_ILLADR (__SI_FAULT|3) /* illegal addressing mode */ +#define ILL_ILLTRP (__SI_FAULT|4) /* illegal trap */ +#define ILL_PRVOPC (__SI_FAULT|5) /* privileged opcode */ +#define ILL_PRVREG (__SI_FAULT|6) /* privileged register */ +#define ILL_COPROC (__SI_FAULT|7) /* coprocessor error */ +#define ILL_BADSTK (__SI_FAULT|8) /* internal stack error */ +#define NSIGILL 8 + +/* + * SIGFPE si_codes + */ +#define FPE_INTDIV (__SI_FAULT|1) /* integer divide by zero */ +#define FPE_INTOVF (__SI_FAULT|2) /* integer overflow */ +#define FPE_FLTDIV (__SI_FAULT|3) /* floating point divide by zero */ +#define FPE_FLTOVF (__SI_FAULT|4) /* floating point overflow */ +#define FPE_FLTUND (__SI_FAULT|5) /* floating point underflow */ +#define FPE_FLTRES (__SI_FAULT|6) /* floating point inexact result */ +#define FPE_FLTINV (__SI_FAULT|7) /* floating point invalid operation */ +#define FPE_FLTSUB (__SI_FAULT|8) /* subscript out of range */ +#define NSIGFPE 8 + +/* + * SIGSEGV si_codes + */ +#define SEGV_MAPERR (__SI_FAULT|1) /* address not mapped to object */ +#define SEGV_ACCERR (__SI_FAULT|2) /* invalid permissions for mapped object */ +#define NSIGSEGV 2 + +/* + * SIGBUS si_codes + */ +#define BUS_ADRALN (__SI_FAULT|1) /* invalid address alignment */ +#define BUS_ADRERR (__SI_FAULT|2) /* non-existant physical address */ +#define BUS_OBJERR (__SI_FAULT|3) /* object specific hardware error */ +#define NSIGBUS 3 + +/* + * SIGTRAP si_codes + */ +#define TRAP_BRKPT (__SI_FAULT|1) /* process breakpoint */ +#define TRAP_TRACE (__SI_FAULT|2) /* process trace trap */ +#define NSIGTRAP 2 + +/* + * SIGCHLD si_codes + */ +#define CLD_EXITED (__SI_CHLD|1) /* child has exited */ +#define CLD_KILLED (__SI_CHLD|2) /* child was killed */ +#define CLD_DUMPED (__SI_CHLD|3) /* child terminated abnormally */ +#define CLD_TRAPPED (__SI_CHLD|4) /* traced child has trapped */ +#define CLD_STOPPED (__SI_CHLD|5) /* child has stopped */ +#define CLD_CONTINUED (__SI_CHLD|6) /* stopped child has continued */ +#define NSIGCHLD 6 + +/* + * SIGPOLL si_codes + */ +#define POLL_IN (__SI_POLL|1) /* data input available */ +#define POLL_OUT (__SI_POLL|2) /* output buffers available */ +#define POLL_MSG (__SI_POLL|3) /* input message available */ +#define POLL_ERR (__SI_POLL|4) /* i/o error */ +#define POLL_PRI (__SI_POLL|5) /* high priority input available */ +#define POLL_HUP (__SI_POLL|6) /* device disconnected */ +#define NSIGPOLL 6 + +/* + * sigevent definitions + * + * It seems likely that SIGEV_THREAD will have to be handled from + * userspace, libpthread transmuting it to SIGEV_SIGNAL, which the + * thread manager then catches and does the appropriate nonsense. + * However, everything is written out here so as to not get lost. + */ +#define SIGEV_SIGNAL 0 /* notify via signal */ +#define SIGEV_NONE 1 /* other notification: meaningless */ +#define SIGEV_THREAD 2 /* deliver via thread creation */ + +#define SIGEV_MAX_SIZE 64 +#define SIGEV_PAD_SIZE ((SIGEV_MAX_SIZE/sizeof(int)) - 3) + +typedef struct sigevent { + sigval_t sigev_value; + int sigev_signo; + int sigev_notify; + union { + int _pad[SIGEV_PAD_SIZE]; + + struct { + void (*_function)(sigval_t); + void *_attribute; /* really pthread_attr_t */ + } _sigev_thread; + } _sigev_un; +} sigevent_t; + +#define sigev_notify_function _sigev_un._sigev_thread._function +#define sigev_notify_attributes _sigev_un._sigev_thread._attribute + +#ifdef __KERNEL__ +#include + +static inline void copy_siginfo(siginfo_t *to, siginfo_t *from) +{ + if (from->si_code < 0) + memcpy(to, from, sizeof(siginfo_t)); + else + /* _sigchld is currently the largest know union member */ + memcpy(to, from, 3*sizeof(int) + sizeof(from->_sifields._sigchld)); +} + +extern int copy_siginfo_to_user(siginfo_t *to, siginfo_t *from); + +#endif /* __KERNEL__ */ + +#endif diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/signal.h b/xenolinux-2.4.16-sparse/include/asm-xeno/signal.h new file mode 100644 index 0000000000..8740d4ea24 --- /dev/null +++ b/xenolinux-2.4.16-sparse/include/asm-xeno/signal.h @@ -0,0 +1,221 @@ +#ifndef _ASMi386_SIGNAL_H +#define _ASMi386_SIGNAL_H + +#include + +/* Avoid too many header ordering problems. */ +struct siginfo; + +#ifdef __KERNEL__ +/* Most things should be clean enough to redefine this at will, if care + is taken to make libc match. */ + +#define _NSIG 64 +#define _NSIG_BPW 32 +#define _NSIG_WORDS (_NSIG / _NSIG_BPW) + +typedef unsigned long old_sigset_t; /* at least 32 bits */ + +typedef struct { + unsigned long sig[_NSIG_WORDS]; +} sigset_t; + +#else +/* Here we must cater to libcs that poke about in kernel headers. */ + +#define NSIG 32 +typedef unsigned long sigset_t; + +#endif /* __KERNEL__ */ + +#define SIGHUP 1 +#define SIGINT 2 +#define SIGQUIT 3 +#define SIGILL 4 +#define SIGTRAP 5 +#define SIGABRT 6 +#define SIGIOT 6 +#define SIGBUS 7 +#define SIGFPE 8 +#define SIGKILL 9 +#define SIGUSR1 10 +#define SIGSEGV 11 +#define SIGUSR2 12 +#define SIGPIPE 13 +#define SIGALRM 14 +#define SIGTERM 15 +#define SIGSTKFLT 16 +#define SIGCHLD 17 +#define SIGCONT 18 +#define SIGSTOP 19 +#define SIGTSTP 20 +#define SIGTTIN 21 +#define SIGTTOU 22 +#define SIGURG 23 +#define SIGXCPU 24 +#define SIGXFSZ 25 +#define SIGVTALRM 26 +#define SIGPROF 27 +#define SIGWINCH 28 +#define SIGIO 29 +#define SIGPOLL SIGIO +/* +#define SIGLOST 29 +*/ +#define SIGPWR 30 +#define SIGSYS 31 +#define SIGUNUSED 31 + +/* These should not be considered constants from userland. */ +#define SIGRTMIN 32 +#define SIGRTMAX (_NSIG-1) + +/* + * SA_FLAGS values: + * + * SA_ONSTACK indicates that a registered stack_t will be used. + * SA_INTERRUPT is a no-op, but left due to historical reasons. Use the + * SA_RESTART flag to get restarting signals (which were the default long ago) + * SA_NOCLDSTOP flag to turn off SIGCHLD when children stop. + * SA_RESETHAND clears the handler when the signal is delivered. + * SA_NOCLDWAIT flag on SIGCHLD to inhibit zombies. + * SA_NODEFER prevents the current signal from being masked in the handler. + * + * SA_ONESHOT and SA_NOMASK are the historical Linux names for the Single + * Unix names RESETHAND and NODEFER respectively. + */ +#define SA_NOCLDSTOP 0x00000001 +#define SA_NOCLDWAIT 0x00000002 /* not supported yet */ +#define SA_SIGINFO 0x00000004 +#define SA_ONSTACK 0x08000000 +#define SA_RESTART 0x10000000 +#define SA_NODEFER 0x40000000 +#define SA_RESETHAND 0x80000000 + +#define SA_NOMASK SA_NODEFER +#define SA_ONESHOT SA_RESETHAND +#define SA_INTERRUPT 0x20000000 /* dummy -- ignored */ + +#define SA_RESTORER 0x04000000 + +/* + * sigaltstack controls + */ +#define SS_ONSTACK 1 +#define SS_DISABLE 2 + +#define MINSIGSTKSZ 2048 +#define SIGSTKSZ 8192 + +#ifdef __KERNEL__ + +/* + * These values of sa_flags are used only by the kernel as part of the + * irq handling routines. + * + * SA_INTERRUPT is also used by the irq handling routines. + * SA_SHIRQ is for shared interrupt support on PCI and EISA. + */ +#define SA_PROBE SA_ONESHOT +#define SA_SAMPLE_RANDOM SA_RESTART +#define SA_SHIRQ 0x04000000 +#endif + +#define SIG_BLOCK 0 /* for blocking signals */ +#define SIG_UNBLOCK 1 /* for unblocking signals */ +#define SIG_SETMASK 2 /* for setting the signal mask */ + +/* Type of a signal handler. */ +typedef void (*__sighandler_t)(int); + +#define SIG_DFL ((__sighandler_t)0) /* default signal handling */ +#define SIG_IGN ((__sighandler_t)1) /* ignore signal */ +#define SIG_ERR ((__sighandler_t)-1) /* error return from signal */ + +#ifdef __KERNEL__ +struct old_sigaction { + __sighandler_t sa_handler; + old_sigset_t sa_mask; + unsigned long sa_flags; + void (*sa_restorer)(void); +}; + +struct sigaction { + __sighandler_t sa_handler; + unsigned long sa_flags; + void (*sa_restorer)(void); + sigset_t sa_mask; /* mask last for extensibility */ +}; + +struct k_sigaction { + struct sigaction sa; +}; +#else +/* Here we must cater to libcs that poke about in kernel headers. */ + +struct sigaction { + union { + __sighandler_t _sa_handler; + void (*_sa_sigaction)(int, struct siginfo *, void *); + } _u; + sigset_t sa_mask; + unsigned long sa_flags; + void (*sa_restorer)(void); +}; + +#define sa_handler _u._sa_handler +#define sa_sigaction _u._sa_sigaction + +#endif /* __KERNEL__ */ + +typedef struct sigaltstack { + void *ss_sp; + int ss_flags; + size_t ss_size; +} stack_t; + +#ifdef __KERNEL__ +#include + +#define __HAVE_ARCH_SIG_BITOPS + +static __inline__ void sigaddset(sigset_t *set, int _sig) +{ + __asm__("btsl %1,%0" : "=m"(*set) : "Ir"(_sig - 1) : "cc"); +} + +static __inline__ void sigdelset(sigset_t *set, int _sig) +{ + __asm__("btrl %1,%0" : "=m"(*set) : "Ir"(_sig - 1) : "cc"); +} + +static __inline__ int __const_sigismember(sigset_t *set, int _sig) +{ + unsigned long sig = _sig - 1; + return 1 & (set->sig[sig / _NSIG_BPW] >> (sig % _NSIG_BPW)); +} + +static __inline__ int __gen_sigismember(sigset_t *set, int _sig) +{ + int ret; + __asm__("btl %2,%1\n\tsbbl %0,%0" + : "=r"(ret) : "m"(*set), "Ir"(_sig-1) : "cc"); + return ret; +} + +#define sigismember(set,sig) \ + (__builtin_constant_p(sig) ? \ + __const_sigismember((set),(sig)) : \ + __gen_sigismember((set),(sig))) + +#define sigmask(sig) (1UL << ((sig) - 1)) + +static __inline__ int sigfindinword(unsigned long word) +{ + __asm__("bsfl %1,%0" : "=r"(word) : "rm"(word) : "cc"); + return word; +} + +#endif /* __KERNEL__ */ + +#endif diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/smp.h b/xenolinux-2.4.16-sparse/include/asm-xeno/smp.h new file mode 100644 index 0000000000..e485ca0b3d --- /dev/null +++ b/xenolinux-2.4.16-sparse/include/asm-xeno/smp.h @@ -0,0 +1,113 @@ +#ifndef __ASM_SMP_H +#define __ASM_SMP_H + +/* + * We need the APIC definitions automatically as part of 'smp.h' + */ +#ifndef __ASSEMBLY__ +#include +#include +#include +#endif + +#ifdef CONFIG_X86_LOCAL_APIC +#ifndef __ASSEMBLY__ +#include +#include +#include +#ifdef CONFIG_X86_IO_APIC +#include +#endif +#include +#endif +#endif + +#ifdef CONFIG_SMP +# define TARGET_CPUS cpu_online_map +# define INT_DELIVERY_MODE 1 /* logical delivery broadcast to all procs */ +#else +# define INT_DELIVERY_MODE 1 /* logical delivery */ +# define TARGET_CPUS 0x01 +#endif + +#ifndef clustered_apic_mode + #define clustered_apic_mode (0) + #define esr_disable (0) +#endif + +#ifdef CONFIG_SMP +#ifndef __ASSEMBLY__ + +/* + * Private routines/data + */ + +extern void smp_alloc_memory(void); +extern unsigned long phys_cpu_present_map; +extern unsigned long cpu_online_map; +extern volatile unsigned long smp_invalidate_needed; +extern int pic_mode; +extern void smp_flush_tlb(void); +extern void smp_message_irq(int cpl, void *dev_id, struct pt_regs *regs); +extern void smp_send_reschedule(int cpu); +extern void smp_invalidate_rcv(void); /* Process an NMI */ +extern void (*mtrr_hook) (void); +extern void zap_low_mappings (void); + +/* + * On x86 all CPUs are mapped 1:1 to the APIC space. + * This simplifies scheduling and IPI sending and + * compresses data structures. + */ +static inline int cpu_logical_map(int cpu) +{ + return cpu; +} +static inline int cpu_number_map(int cpu) +{ + return cpu; +} + +/* + * Some lowlevel functions might want to know about + * the real APIC ID <-> CPU # mapping. + */ +#define MAX_APICID 256 +extern volatile int cpu_to_physical_apicid[NR_CPUS]; +extern volatile int physical_apicid_to_cpu[MAX_APICID]; +extern volatile int cpu_to_logical_apicid[NR_CPUS]; +extern volatile int logical_apicid_to_cpu[MAX_APICID]; + +/* + * General functions that each host system must provide. + */ + +extern void smp_boot_cpus(void); +extern void smp_store_cpu_info(int id); /* Store per CPU info (like the initial udelay numbers */ + +/* + * This function is needed by all SMP systems. It must _always_ be valid + * from the initial startup. We map APIC_BASE very early in page_setup(), + * so this is correct in the x86 case. + */ + +#define smp_processor_id() (current->processor) + +#endif /* !__ASSEMBLY__ */ + +#define NO_PROC_ID 0xFF /* No processor magic marker */ + +/* + * This magic constant controls our willingness to transfer + * a process across CPUs. Such a transfer incurs misses on the L1 + * cache, and on a P6 or P5 with multiple L2 caches L2 hits. My + * gut feeling is this will vary by board in value. For a board + * with separate L2 cache it probably depends also on the RSS, and + * for a board with shared L2 cache it ought to decay fast as other + * processes are run. + */ + +#define PROC_CHANGE_PENALTY 15 /* Schedule penalty */ + +#endif +#endif diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/smplock.h b/xenolinux-2.4.16-sparse/include/asm-xeno/smplock.h new file mode 100644 index 0000000000..864351c543 --- /dev/null +++ b/xenolinux-2.4.16-sparse/include/asm-xeno/smplock.h @@ -0,0 +1,75 @@ +/* + * + * + * i386 SMP lock implementation + */ +#include +#include +#include +#include + +extern spinlock_t kernel_flag; + +#define kernel_locked() spin_is_locked(&kernel_flag) + +/* + * Release global kernel lock and global interrupt lock + */ +#define release_kernel_lock(task, cpu) \ +do { \ + if (task->lock_depth >= 0) \ + spin_unlock(&kernel_flag); \ + release_irqlock(cpu); \ + __sti(); \ +} while (0) + +/* + * Re-acquire the kernel lock + */ +#define reacquire_kernel_lock(task) \ +do { \ + if (task->lock_depth >= 0) \ + spin_lock(&kernel_flag); \ +} while (0) + + +/* + * Getting the big kernel lock. + * + * This cannot happen asynchronously, + * so we only need to worry about other + * CPU's. + */ +static __inline__ void lock_kernel(void) +{ +#if 1 + if (!++current->lock_depth) + spin_lock(&kernel_flag); +#else + __asm__ __volatile__( + "incl %1\n\t" + "jne 9f" + spin_lock_string + "\n9:" + :"=m" (__dummy_lock(&kernel_flag)), + "=m" (current->lock_depth)); +#endif +} + +static __inline__ void unlock_kernel(void) +{ + if (current->lock_depth < 0) + BUG(); +#if 1 + if (--current->lock_depth < 0) + spin_unlock(&kernel_flag); +#else + __asm__ __volatile__( + "decl %1\n\t" + "jns 9f\n\t" + spin_unlock_string + "\n9:" + :"=m" (__dummy_lock(&kernel_flag)), + "=m" (current->lock_depth)); +#endif +} diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/socket.h b/xenolinux-2.4.16-sparse/include/asm-xeno/socket.h new file mode 100644 index 0000000000..fbcc44d343 --- /dev/null +++ b/xenolinux-2.4.16-sparse/include/asm-xeno/socket.h @@ -0,0 +1,64 @@ +#ifndef _ASM_SOCKET_H +#define _ASM_SOCKET_H + +#include + +/* For setsockoptions(2) */ +#define SOL_SOCKET 1 + +#define SO_DEBUG 1 +#define SO_REUSEADDR 2 +#define SO_TYPE 3 +#define SO_ERROR 4 +#define SO_DONTROUTE 5 +#define SO_BROADCAST 6 +#define SO_SNDBUF 7 +#define SO_RCVBUF 8 +#define SO_KEEPALIVE 9 +#define SO_OOBINLINE 10 +#define SO_NO_CHECK 11 +#define SO_PRIORITY 12 +#define SO_LINGER 13 +#define SO_BSDCOMPAT 14 +/* To add :#define SO_REUSEPORT 15 */ +#define SO_PASSCRED 16 +#define SO_PEERCRED 17 +#define SO_RCVLOWAT 18 +#define SO_SNDLOWAT 19 +#define SO_RCVTIMEO 20 +#define SO_SNDTIMEO 21 + +/* Security levels - as per NRL IPv6 - don't actually do anything */ +#define SO_SECURITY_AUTHENTICATION 22 +#define SO_SECURITY_ENCRYPTION_TRANSPORT 23 +#define SO_SECURITY_ENCRYPTION_NETWORK 24 + +#define SO_BINDTODEVICE 25 + +/* Socket filtering */ +#define SO_ATTACH_FILTER 26 +#define SO_DETACH_FILTER 27 + +#define SO_PEERNAME 28 +#define SO_TIMESTAMP 29 +#define SCM_TIMESTAMP SO_TIMESTAMP + +#define SO_ACCEPTCONN 30 + +/* Nasty libc5 fixup - bletch */ +#if defined(__KERNEL__) || !defined(__GLIBC__) || (__GLIBC__ < 2) +/* Socket types. */ +#define SOCK_STREAM 1 /* stream (connection) socket */ +#define SOCK_DGRAM 2 /* datagram (conn.less) socket */ +#define SOCK_RAW 3 /* raw socket */ +#define SOCK_RDM 4 /* reliably-delivered message */ +#define SOCK_SEQPACKET 5 /* sequential packet socket */ +#define SOCK_PACKET 10 /* linux specific way of */ + /* getting packets at the dev */ + /* level. For writing rarp and */ + /* other similar things on the */ + /* user level. */ +#define SOCK_MAX (SOCK_PACKET+1) +#endif + +#endif /* _ASM_SOCKET_H */ diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/sockios.h b/xenolinux-2.4.16-sparse/include/asm-xeno/sockios.h new file mode 100644 index 0000000000..6b747f8e22 --- /dev/null +++ b/xenolinux-2.4.16-sparse/include/asm-xeno/sockios.h @@ -0,0 +1,12 @@ +#ifndef __ARCH_I386_SOCKIOS__ +#define __ARCH_I386_SOCKIOS__ + +/* Socket-level I/O control calls. */ +#define FIOSETOWN 0x8901 +#define SIOCSPGRP 0x8902 +#define FIOGETOWN 0x8903 +#define SIOCGPGRP 0x8904 +#define SIOCATMARK 0x8905 +#define SIOCGSTAMP 0x8906 /* Get stamp */ + +#endif diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/softirq.h b/xenolinux-2.4.16-sparse/include/asm-xeno/softirq.h new file mode 100644 index 0000000000..254224411b --- /dev/null +++ b/xenolinux-2.4.16-sparse/include/asm-xeno/softirq.h @@ -0,0 +1,48 @@ +#ifndef __ASM_SOFTIRQ_H +#define __ASM_SOFTIRQ_H + +#include +#include + +#define __cpu_bh_enable(cpu) \ + do { barrier(); local_bh_count(cpu)--; } while (0) +#define cpu_bh_disable(cpu) \ + do { local_bh_count(cpu)++; barrier(); } while (0) + +#define local_bh_disable() cpu_bh_disable(smp_processor_id()) +#define __local_bh_enable() __cpu_bh_enable(smp_processor_id()) + +#define in_softirq() (local_bh_count(smp_processor_id()) != 0) + +/* + * NOTE: this assembly code assumes: + * + * (char *)&local_bh_count - 8 == (char *)&softirq_pending + * + * If you change the offsets in irq_stat then you have to + * update this code as well. + */ +#define local_bh_enable() \ +do { \ + unsigned int *ptr = &local_bh_count(smp_processor_id()); \ + \ + barrier(); \ + if (!--*ptr) \ + __asm__ __volatile__ ( \ + "cmpl $0, -8(%0);" \ + "jnz 2f;" \ + "1:;" \ + \ + ".section .text.lock,\"ax\";" \ + "2: pushl %%eax; pushl %%ecx; pushl %%edx;" \ + "call %c1;" \ + "popl %%edx; popl %%ecx; popl %%eax;" \ + "jmp 1b;" \ + ".previous;" \ + \ + : /* no output */ \ + : "r" (ptr), "i" (do_softirq) \ + /* no registers clobbered */ ); \ +} while (0) + +#endif /* __ASM_SOFTIRQ_H */ diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/spinlock.h b/xenolinux-2.4.16-sparse/include/asm-xeno/spinlock.h new file mode 100644 index 0000000000..dbdd68b41b --- /dev/null +++ b/xenolinux-2.4.16-sparse/include/asm-xeno/spinlock.h @@ -0,0 +1,212 @@ +#ifndef __ASM_SPINLOCK_H +#define __ASM_SPINLOCK_H + +#include +#include +#include +#include + +extern int printk(const char * fmt, ...) + __attribute__ ((format (printf, 1, 2))); + +/* It seems that people are forgetting to + * initialize their spinlocks properly, tsk tsk. + * Remember to turn this off in 2.4. -ben + */ +#if defined(CONFIG_DEBUG_SPINLOCK) +#define SPINLOCK_DEBUG 1 +#else +#define SPINLOCK_DEBUG 0 +#endif + +/* + * Your basic SMP spinlocks, allowing only a single CPU anywhere + */ + +typedef struct { + volatile unsigned int lock; +#if SPINLOCK_DEBUG + unsigned magic; +#endif +} spinlock_t; + +#define SPINLOCK_MAGIC 0xdead4ead + +#if SPINLOCK_DEBUG +#define SPINLOCK_MAGIC_INIT , SPINLOCK_MAGIC +#else +#define SPINLOCK_MAGIC_INIT /* */ +#endif + +#define SPIN_LOCK_UNLOCKED (spinlock_t) { 1 SPINLOCK_MAGIC_INIT } + +#define spin_lock_init(x) do { *(x) = SPIN_LOCK_UNLOCKED; } while(0) + +/* + * Simple spin lock operations. There are two variants, one clears IRQ's + * on the local processor, one does not. + * + * We make no fairness assumptions. They have a cost. + */ + +#define spin_is_locked(x) (*(volatile char *)(&(x)->lock) <= 0) +#define spin_unlock_wait(x) do { barrier(); } while(spin_is_locked(x)) + +#define spin_lock_string \ + "\n1:\t" \ + "lock ; decb %0\n\t" \ + "js 2f\n" \ + ".section .text.lock,\"ax\"\n" \ + "2:\t" \ + "cmpb $0,%0\n\t" \ + "rep;nop\n\t" \ + "jle 2b\n\t" \ + "jmp 1b\n" \ + ".previous" + +/* + * This works. Despite all the confusion. + * (except on PPro SMP or if we are using OOSTORE) + * (PPro errata 66, 92) + */ + +#if !defined(CONFIG_X86_OOSTORE) && !defined(CONFIG_X86_PPRO_FENCE) + +#define spin_unlock_string \ + "movb $1,%0" \ + :"=m" (lock->lock) : : "memory" + + +static inline void spin_unlock(spinlock_t *lock) +{ +#if SPINLOCK_DEBUG + if (lock->magic != SPINLOCK_MAGIC) + BUG(); + if (!spin_is_locked(lock)) + BUG(); +#endif + __asm__ __volatile__( + spin_unlock_string + ); +} + +#else + +#define spin_unlock_string \ + "xchgb %b0, %1" \ + :"=q" (oldval), "=m" (lock->lock) \ + :"0" (oldval) : "memory" + +static inline void spin_unlock(spinlock_t *lock) +{ + char oldval = 1; +#if SPINLOCK_DEBUG + if (lock->magic != SPINLOCK_MAGIC) + BUG(); + if (!spin_is_locked(lock)) + BUG(); +#endif + __asm__ __volatile__( + spin_unlock_string + ); +} + +#endif + +static inline int spin_trylock(spinlock_t *lock) +{ + char oldval; + __asm__ __volatile__( + "xchgb %b0,%1" + :"=q" (oldval), "=m" (lock->lock) + :"0" (0) : "memory"); + return oldval > 0; +} + +static inline void spin_lock(spinlock_t *lock) +{ +#if SPINLOCK_DEBUG + __label__ here; +here: + if (lock->magic != SPINLOCK_MAGIC) { +printk("eip: %p\n", &&here); + BUG(); + } +#endif + __asm__ __volatile__( + spin_lock_string + :"=m" (lock->lock) : : "memory"); +} + + +/* + * Read-write spinlocks, allowing multiple readers + * but only one writer. + * + * NOTE! it is quite common to have readers in interrupts + * but no interrupt writers. For those circumstances we + * can "mix" irq-safe locks - any writer needs to get a + * irq-safe write-lock, but readers can get non-irqsafe + * read-locks. + */ +typedef struct { + volatile unsigned int lock; +#if SPINLOCK_DEBUG + unsigned magic; +#endif +} rwlock_t; + +#define RWLOCK_MAGIC 0xdeaf1eed + +#if SPINLOCK_DEBUG +#define RWLOCK_MAGIC_INIT , RWLOCK_MAGIC +#else +#define RWLOCK_MAGIC_INIT /* */ +#endif + +#define RW_LOCK_UNLOCKED (rwlock_t) { RW_LOCK_BIAS RWLOCK_MAGIC_INIT } + +#define rwlock_init(x) do { *(x) = RW_LOCK_UNLOCKED; } while(0) + +/* + * On x86, we implement read-write locks as a 32-bit counter + * with the high bit (sign) being the "contended" bit. + * + * The inline assembly is non-obvious. Think about it. + * + * Changed to use the same technique as rw semaphores. See + * semaphore.h for details. -ben + */ +/* the spinlock helpers are in arch/i386/kernel/semaphore.c */ + +static inline void read_lock(rwlock_t *rw) +{ +#if SPINLOCK_DEBUG + if (rw->magic != RWLOCK_MAGIC) + BUG(); +#endif + __build_read_lock(rw, "__read_lock_failed"); +} + +static inline void write_lock(rwlock_t *rw) +{ +#if SPINLOCK_DEBUG + if (rw->magic != RWLOCK_MAGIC) + BUG(); +#endif + __build_write_lock(rw, "__write_lock_failed"); +} + +#define read_unlock(rw) asm volatile("lock ; incl %0" :"=m" ((rw)->lock) : : "memory") +#define write_unlock(rw) asm volatile("lock ; addl $" RW_LOCK_BIAS_STR ",%0":"=m" ((rw)->lock) : : "memory") + +static inline int write_trylock(rwlock_t *lock) +{ + atomic_t *count = (atomic_t *)lock; + if (atomic_sub_and_test(RW_LOCK_BIAS, count)) + return 1; + atomic_add(RW_LOCK_BIAS, count); + return 0; +} + +#endif /* __ASM_SPINLOCK_H */ diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/stat.h b/xenolinux-2.4.16-sparse/include/asm-xeno/stat.h new file mode 100644 index 0000000000..ef16311fe0 --- /dev/null +++ b/xenolinux-2.4.16-sparse/include/asm-xeno/stat.h @@ -0,0 +1,78 @@ +#ifndef _I386_STAT_H +#define _I386_STAT_H + +struct __old_kernel_stat { + unsigned short st_dev; + unsigned short st_ino; + unsigned short st_mode; + unsigned short st_nlink; + unsigned short st_uid; + unsigned short st_gid; + unsigned short st_rdev; + unsigned long st_size; + unsigned long st_atime; + unsigned long st_mtime; + unsigned long st_ctime; +}; + +struct stat { + unsigned short st_dev; + unsigned short __pad1; + unsigned long st_ino; + unsigned short st_mode; + unsigned short st_nlink; + unsigned short st_uid; + unsigned short st_gid; + unsigned short st_rdev; + unsigned short __pad2; + unsigned long st_size; + unsigned long st_blksize; + unsigned long st_blocks; + unsigned long st_atime; + unsigned long __unused1; + unsigned long st_mtime; + unsigned long __unused2; + unsigned long st_ctime; + unsigned long __unused3; + unsigned long __unused4; + unsigned long __unused5; +}; + +/* This matches struct stat64 in glibc2.1, hence the absolutely + * insane amounts of padding around dev_t's. + */ +struct stat64 { + unsigned short st_dev; + unsigned char __pad0[10]; + +#define STAT64_HAS_BROKEN_ST_INO 1 + unsigned long __st_ino; + + unsigned int st_mode; + unsigned int st_nlink; + + unsigned long st_uid; + unsigned long st_gid; + + unsigned short st_rdev; + unsigned char __pad3[10]; + + long long st_size; + unsigned long st_blksize; + + unsigned long st_blocks; /* Number 512-byte blocks allocated. */ + unsigned long __pad4; /* future possible st_blocks high bits */ + + unsigned long st_atime; + unsigned long __pad5; + + unsigned long st_mtime; + unsigned long __pad6; + + unsigned long st_ctime; + unsigned long __pad7; /* will be high 32 bits of ctime someday */ + + unsigned long long st_ino; +}; + +#endif diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/statfs.h b/xenolinux-2.4.16-sparse/include/asm-xeno/statfs.h new file mode 100644 index 0000000000..113d5d428a --- /dev/null +++ b/xenolinux-2.4.16-sparse/include/asm-xeno/statfs.h @@ -0,0 +1,25 @@ +#ifndef _I386_STATFS_H +#define _I386_STATFS_H + +#ifndef __KERNEL_STRICT_NAMES + +#include + +typedef __kernel_fsid_t fsid_t; + +#endif + +struct statfs { + long f_type; + long f_bsize; + long f_blocks; + long f_bfree; + long f_bavail; + long f_files; + long f_ffree; + __kernel_fsid_t f_fsid; + long f_namelen; + long f_spare[6]; +}; + +#endif diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/string-486.h b/xenolinux-2.4.16-sparse/include/asm-xeno/string-486.h new file mode 100644 index 0000000000..51bfd051bc --- /dev/null +++ b/xenolinux-2.4.16-sparse/include/asm-xeno/string-486.h @@ -0,0 +1,617 @@ +#ifndef _I386_STRING_I486_H_ +#define _I386_STRING_I486_H_ + +/* + * This string-include defines all string functions as inline + * functions. Use gcc. It also assumes ds=es=data space, this should be + * normal. Most of the string-functions are rather heavily hand-optimized, + * see especially strtok,strstr,str[c]spn. They should work, but are not + * very easy to understand. Everything is done entirely within the register + * set, making the functions fast and clean. + * + * Copyright (C) 1991, 1992 Linus Torvalds + * Revised and optimized for i486/pentium + * 1994/03/15 by Alberto Vignani/Davide Parodi @crf.it + * + * Split into 2 CPU specific files by Alan Cox to keep #ifdef noise down. + * + * 1999/10/5 Proper register args for newer GCCs and minor bugs + * fixed - Petko Manolov (petkan@spct.net) + * 1999/10/14 3DNow memscpy() added - Petkan + * 2000/05/09 extern changed to static in function definitions + * and a few cleanups - Petkan + */ + +#define __HAVE_ARCH_STRCPY +static inline char * strcpy(char * dest,const char *src) +{ +register char *tmp= (char *)dest; +register char dummy; +__asm__ __volatile__( + "\n1:\t" + "movb (%0),%2\n\t" + "incl %0\n\t" + "movb %2,(%1)\n\t" + "incl %1\n\t" + "testb %2,%2\n\t" + "jne 1b" + :"=r" (src), "=r" (tmp), "=q" (dummy) + :"0" (src), "1" (tmp) + :"memory"); +return dest; +} + +#define __HAVE_ARCH_STRNCPY +static inline char * strncpy(char * dest,const char *src,size_t count) +{ +register char *tmp= (char *)dest; +register char dummy; +if (count) { +__asm__ __volatile__( + "\n1:\t" + "movb (%0),%2\n\t" + "incl %0\n\t" + "movb %2,(%1)\n\t" + "incl %1\n\t" + "decl %3\n\t" + "je 3f\n\t" + "testb %2,%2\n\t" + "jne 1b\n\t" + "2:\tmovb %2,(%1)\n\t" + "incl %1\n\t" + "decl %3\n\t" + "jne 2b\n\t" + "3:" + :"=r" (src), "=r" (tmp), "=q" (dummy), "=r" (count) + :"0" (src), "1" (tmp), "3" (count) + :"memory"); + } /* if (count) */ +return dest; +} + +#define __HAVE_ARCH_STRCAT +static inline char * strcat(char * dest,const char * src) +{ +register char *tmp = (char *)(dest-1); +register char dummy; +__asm__ __volatile__( + "\n1:\tincl %1\n\t" + "cmpb $0,(%1)\n\t" + "jne 1b\n" + "2:\tmovb (%2),%b0\n\t" + "incl %2\n\t" + "movb %b0,(%1)\n\t" + "incl %1\n\t" + "testb %b0,%b0\n\t" + "jne 2b\n" + :"=q" (dummy), "=r" (tmp), "=r" (src) + :"1" (tmp), "2" (src) + :"memory"); +return dest; +} + +#define __HAVE_ARCH_STRNCAT +static inline char * strncat(char * dest,const char * src,size_t count) +{ +register char *tmp = (char *)(dest-1); +register char dummy; +__asm__ __volatile__( + "\n1:\tincl %1\n\t" + "cmpb $0,(%1)\n\t" + "jne 1b\n" + "2:\tdecl %3\n\t" + "js 3f\n\t" + "movb (%2),%b0\n\t" + "incl %2\n\t" + "movb %b0,(%1)\n\t" + "incl %1\n\t" + "testb %b0,%b0\n\t" + "jne 2b\n" + "3:\txorb %0,%0\n\t" + "movb %b0,(%1)\n\t" + :"=q" (dummy), "=r" (tmp), "=r" (src), "=r" (count) + :"1" (tmp), "2" (src), "3" (count) + :"memory"); +return dest; +} + +#define __HAVE_ARCH_STRCMP +static inline int strcmp(const char * cs,const char * ct) +{ +register int __res; +__asm__ __volatile__( + "\n1:\tmovb (%1),%b0\n\t" + "incl %1\n\t" + "cmpb %b0,(%2)\n\t" + "jne 2f\n\t" + "incl %2\n\t" + "testb %b0,%b0\n\t" + "jne 1b\n\t" + "xorl %0,%0\n\t" + "jmp 3f\n" + "2:\tmovl $1,%0\n\t" + "jb 3f\n\t" + "negl %0\n" + "3:" + :"=q" (__res), "=r" (cs), "=r" (ct) + :"1" (cs), "2" (ct) + : "memory" ); +return __res; +} + +#define __HAVE_ARCH_STRNCMP +static inline int strncmp(const char * cs,const char * ct,size_t count) +{ +register int __res; +__asm__ __volatile__( + "\n1:\tdecl %3\n\t" + "js 2f\n\t" + "movb (%1),%b0\n\t" + "incl %1\n\t" + "cmpb %b0,(%2)\n\t" + "jne 3f\n\t" + "incl %2\n\t" + "testb %b0,%b0\n\t" + "jne 1b\n" + "2:\txorl %0,%0\n\t" + "jmp 4f\n" + "3:\tmovl $1,%0\n\t" + "jb 4f\n\t" + "negl %0\n" + "4:" + :"=q" (__res), "=r" (cs), "=r" (ct), "=r" (count) + :"1" (cs), "2" (ct), "3" (count)); +return __res; +} + +#define __HAVE_ARCH_STRCHR +static inline char * strchr(const char * s, int c) +{ +register char * __res; +__asm__ __volatile__( + "movb %%al,%%ah\n" + "1:\tmovb (%1),%%al\n\t" + "cmpb %%ah,%%al\n\t" + "je 2f\n\t" + "incl %1\n\t" + "testb %%al,%%al\n\t" + "jne 1b\n\t" + "xorl %1,%1\n" + "2:\tmovl %1,%0\n\t" + :"=a" (__res), "=r" (s) + :"0" (c), "1" (s)); +return __res; +} + +#define __HAVE_ARCH_STRRCHR +static inline char * strrchr(const char * s, int c) +{ +int d0, d1; +register char * __res; +__asm__ __volatile__( + "movb %%al,%%ah\n" + "1:\tlodsb\n\t" + "cmpb %%ah,%%al\n\t" + "jne 2f\n\t" + "leal -1(%%esi),%0\n" + "2:\ttestb %%al,%%al\n\t" + "jne 1b" + :"=d" (__res), "=&S" (d0), "=&a" (d1) + :"0" (0), "1" (s), "2" (c)); +return __res; +} + + +#define __HAVE_ARCH_STRCSPN +static inline size_t strcspn(const char * cs, const char * ct) +{ +int d0, d1; +register char * __res; +__asm__ __volatile__( + "movl %6,%%edi\n\t" + "repne\n\t" + "scasb\n\t" + "notl %%ecx\n\t" + "decl %%ecx\n\t" + "movl %%ecx,%%edx\n" + "1:\tlodsb\n\t" + "testb %%al,%%al\n\t" + "je 2f\n\t" + "movl %6,%%edi\n\t" + "movl %%edx,%%ecx\n\t" + "repne\n\t" + "scasb\n\t" + "jne 1b\n" + "2:\tdecl %0" + :"=S" (__res), "=&a" (d0), "=&c" (d1) + :"0" (cs), "1" (0), "2" (0xffffffff), "g" (ct) + :"dx", "di"); +return __res-cs; +} + + +#define __HAVE_ARCH_STRLEN +static inline size_t strlen(const char * s) +{ +/* + * slightly slower on a 486, but with better chances of + * register allocation + */ +register char dummy, *tmp= (char *)s; +__asm__ __volatile__( + "\n1:\t" + "movb\t(%0),%1\n\t" + "incl\t%0\n\t" + "testb\t%1,%1\n\t" + "jne\t1b" + :"=r" (tmp),"=q" (dummy) + :"0" (s) + : "memory" ); +return (tmp-s-1); +} + +/* Added by Gertjan van Wingerde to make minix and sysv module work */ +#define __HAVE_ARCH_STRNLEN +static inline size_t strnlen(const char * s, size_t count) +{ +int d0; +register int __res; +__asm__ __volatile__( + "movl %3,%0\n\t" + "jmp 2f\n" + "1:\tcmpb $0,(%0)\n\t" + "je 3f\n\t" + "incl %0\n" + "2:\tdecl %2\n\t" + "cmpl $-1,%2\n\t" + "jne 1b\n" + "3:\tsubl %3,%0" + :"=a" (__res), "=&d" (d0) + :"1" (count), "c" (s)); +return __res; +} +/* end of additional stuff */ + + +/* + * These ought to get tweaked to do some cache priming. + */ + +static inline void * __memcpy_by4(void * to, const void * from, size_t n) +{ +register void *tmp = (void *)to; +register int dummy1,dummy2; +__asm__ __volatile__ ( + "\n1:\tmovl (%2),%0\n\t" + "addl $4,%2\n\t" + "movl %0,(%1)\n\t" + "addl $4,%1\n\t" + "decl %3\n\t" + "jnz 1b" + :"=r" (dummy1), "=r" (tmp), "=r" (from), "=r" (dummy2) + :"1" (tmp), "2" (from), "3" (n/4) + :"memory"); +return (to); +} + +static inline void * __memcpy_by2(void * to, const void * from, size_t n) +{ +register void *tmp = (void *)to; +register int dummy1,dummy2; +__asm__ __volatile__ ( + "shrl $1,%3\n\t" + "jz 2f\n" /* only a word */ + "1:\tmovl (%2),%0\n\t" + "addl $4,%2\n\t" + "movl %0,(%1)\n\t" + "addl $4,%1\n\t" + "decl %3\n\t" + "jnz 1b\n" + "2:\tmovw (%2),%w0\n\t" + "movw %w0,(%1)" + :"=r" (dummy1), "=r" (tmp), "=r" (from), "=r" (dummy2) + :"1" (tmp), "2" (from), "3" (n/2) + :"memory"); +return (to); +} + +static inline void * __memcpy_g(void * to, const void * from, size_t n) +{ +int d0, d1, d2; +register void *tmp = (void *)to; +__asm__ __volatile__ ( + "shrl $1,%%ecx\n\t" + "jnc 1f\n\t" + "movsb\n" + "1:\tshrl $1,%%ecx\n\t" + "jnc 2f\n\t" + "movsw\n" + "2:\trep\n\t" + "movsl" + :"=&c" (d0), "=&D" (d1), "=&S" (d2) + :"0" (n), "1" ((long) tmp), "2" ((long) from) + :"memory"); +return (to); +} + +#define __memcpy_c(d,s,count) \ +((count%4==0) ? \ + __memcpy_by4((d),(s),(count)) : \ + ((count%2==0) ? \ + __memcpy_by2((d),(s),(count)) : \ + __memcpy_g((d),(s),(count)))) + +#define __memcpy(d,s,count) \ +(__builtin_constant_p(count) ? \ + __memcpy_c((d),(s),(count)) : \ + __memcpy_g((d),(s),(count))) + +#define __HAVE_ARCH_MEMCPY + +#include + +#ifdef CONFIG_X86_USE_3DNOW + +#include + +/* +** This CPU favours 3DNow strongly (eg AMD K6-II, K6-III, Athlon) +*/ + +static inline void * __constant_memcpy3d(void * to, const void * from, size_t len) +{ + if (len < 512) + return __memcpy_c(to, from, len); + return _mmx_memcpy(to, from, len); +} + +static inline void *__memcpy3d(void *to, const void *from, size_t len) +{ + if(len < 512) + return __memcpy_g(to, from, len); + return _mmx_memcpy(to, from, len); +} + +#define memcpy(d, s, count) \ +(__builtin_constant_p(count) ? \ + __constant_memcpy3d((d),(s),(count)) : \ + __memcpy3d((d),(s),(count))) + +#else /* CONFIG_X86_USE_3DNOW */ + +/* +** Generic routines +*/ + + +#define memcpy(d, s, count) __memcpy(d, s, count) + +#endif /* CONFIG_X86_USE_3DNOW */ + + +extern void __struct_cpy_bug( void ); + +#define struct_cpy(x,y) \ +({ \ + if (sizeof(*(x)) != sizeof(*(y))) \ + __struct_cpy_bug; \ + memcpy(x, y, sizeof(*(x))); \ +}) + + +#define __HAVE_ARCH_MEMMOVE +static inline void * memmove(void * dest,const void * src, size_t n) +{ +int d0, d1, d2; +register void *tmp = (void *)dest; +if (dest +/* + * On a 486 or Pentium, we are better off not using the + * byte string operations. But on a 386 or a PPro the + * byte string ops are faster than doing it by hand + * (MUCH faster on a Pentium). + * + * Also, the byte strings actually work correctly. Forget + * the i486 routines for now as they may be broken.. + */ +#if FIXED_486_STRING && defined(CONFIG_X86_USE_STRING_486) +#include +#else + +/* + * This string-include defines all string functions as inline + * functions. Use gcc. It also assumes ds=es=data space, this should be + * normal. Most of the string-functions are rather heavily hand-optimized, + * see especially strtok,strstr,str[c]spn. They should work, but are not + * very easy to understand. Everything is done entirely within the register + * set, making the functions fast and clean. String instructions have been + * used through-out, making for "slightly" unclear code :-) + * + * NO Copyright (C) 1991, 1992 Linus Torvalds, + * consider these trivial functions to be PD. + */ + +#define __HAVE_ARCH_STRCPY +static inline char * strcpy(char * dest,const char *src) +{ +int d0, d1, d2; +__asm__ __volatile__( + "1:\tlodsb\n\t" + "stosb\n\t" + "testb %%al,%%al\n\t" + "jne 1b" + : "=&S" (d0), "=&D" (d1), "=&a" (d2) + :"0" (src),"1" (dest) : "memory"); +return dest; +} + +#define __HAVE_ARCH_STRNCPY +static inline char * strncpy(char * dest,const char *src,size_t count) +{ +int d0, d1, d2, d3; +__asm__ __volatile__( + "1:\tdecl %2\n\t" + "js 2f\n\t" + "lodsb\n\t" + "stosb\n\t" + "testb %%al,%%al\n\t" + "jne 1b\n\t" + "rep\n\t" + "stosb\n" + "2:" + : "=&S" (d0), "=&D" (d1), "=&c" (d2), "=&a" (d3) + :"0" (src),"1" (dest),"2" (count) : "memory"); +return dest; +} + +#define __HAVE_ARCH_STRCAT +static inline char * strcat(char * dest,const char * src) +{ +int d0, d1, d2, d3; +__asm__ __volatile__( + "repne\n\t" + "scasb\n\t" + "decl %1\n" + "1:\tlodsb\n\t" + "stosb\n\t" + "testb %%al,%%al\n\t" + "jne 1b" + : "=&S" (d0), "=&D" (d1), "=&a" (d2), "=&c" (d3) + : "0" (src), "1" (dest), "2" (0), "3" (0xffffffff):"memory"); +return dest; +} + +#define __HAVE_ARCH_STRNCAT +static inline char * strncat(char * dest,const char * src,size_t count) +{ +int d0, d1, d2, d3; +__asm__ __volatile__( + "repne\n\t" + "scasb\n\t" + "decl %1\n\t" + "movl %8,%3\n" + "1:\tdecl %3\n\t" + "js 2f\n\t" + "lodsb\n\t" + "stosb\n\t" + "testb %%al,%%al\n\t" + "jne 1b\n" + "2:\txorl %2,%2\n\t" + "stosb" + : "=&S" (d0), "=&D" (d1), "=&a" (d2), "=&c" (d3) + : "0" (src),"1" (dest),"2" (0),"3" (0xffffffff), "g" (count) + : "memory"); +return dest; +} + +#define __HAVE_ARCH_STRCMP +static inline int strcmp(const char * cs,const char * ct) +{ +int d0, d1; +register int __res; +__asm__ __volatile__( + "1:\tlodsb\n\t" + "scasb\n\t" + "jne 2f\n\t" + "testb %%al,%%al\n\t" + "jne 1b\n\t" + "xorl %%eax,%%eax\n\t" + "jmp 3f\n" + "2:\tsbbl %%eax,%%eax\n\t" + "orb $1,%%al\n" + "3:" + :"=a" (__res), "=&S" (d0), "=&D" (d1) + :"1" (cs),"2" (ct)); +return __res; +} + +#define __HAVE_ARCH_STRNCMP +static inline int strncmp(const char * cs,const char * ct,size_t count) +{ +register int __res; +int d0, d1, d2; +__asm__ __volatile__( + "1:\tdecl %3\n\t" + "js 2f\n\t" + "lodsb\n\t" + "scasb\n\t" + "jne 3f\n\t" + "testb %%al,%%al\n\t" + "jne 1b\n" + "2:\txorl %%eax,%%eax\n\t" + "jmp 4f\n" + "3:\tsbbl %%eax,%%eax\n\t" + "orb $1,%%al\n" + "4:" + :"=a" (__res), "=&S" (d0), "=&D" (d1), "=&c" (d2) + :"1" (cs),"2" (ct),"3" (count)); +return __res; +} + +#define __HAVE_ARCH_STRCHR +static inline char * strchr(const char * s, int c) +{ +int d0; +register char * __res; +__asm__ __volatile__( + "movb %%al,%%ah\n" + "1:\tlodsb\n\t" + "cmpb %%ah,%%al\n\t" + "je 2f\n\t" + "testb %%al,%%al\n\t" + "jne 1b\n\t" + "movl $1,%1\n" + "2:\tmovl %1,%0\n\t" + "decl %0" + :"=a" (__res), "=&S" (d0) : "1" (s),"0" (c)); +return __res; +} + +#define __HAVE_ARCH_STRRCHR +static inline char * strrchr(const char * s, int c) +{ +int d0, d1; +register char * __res; +__asm__ __volatile__( + "movb %%al,%%ah\n" + "1:\tlodsb\n\t" + "cmpb %%ah,%%al\n\t" + "jne 2f\n\t" + "leal -1(%%esi),%0\n" + "2:\ttestb %%al,%%al\n\t" + "jne 1b" + :"=g" (__res), "=&S" (d0), "=&a" (d1) :"0" (0),"1" (s),"2" (c)); +return __res; +} + +#define __HAVE_ARCH_STRLEN +static inline size_t strlen(const char * s) +{ +int d0; +register int __res; +__asm__ __volatile__( + "repne\n\t" + "scasb\n\t" + "notl %0\n\t" + "decl %0" + :"=c" (__res), "=&D" (d0) :"1" (s),"a" (0), "0" (0xffffffff)); +return __res; +} + +static inline void * __memcpy(void * to, const void * from, size_t n) +{ +int d0, d1, d2; +__asm__ __volatile__( + "rep ; movsl\n\t" + "testb $2,%b4\n\t" + "je 1f\n\t" + "movsw\n" + "1:\ttestb $1,%b4\n\t" + "je 2f\n\t" + "movsb\n" + "2:" + : "=&c" (d0), "=&D" (d1), "=&S" (d2) + :"0" (n/4), "q" (n),"1" ((long) to),"2" ((long) from) + : "memory"); +return (to); +} + +/* + * This looks horribly ugly, but the compiler can optimize it totally, + * as the count is constant. + */ +static inline void * __constant_memcpy(void * to, const void * from, size_t n) +{ + switch (n) { + case 0: + return to; + case 1: + *(unsigned char *)to = *(const unsigned char *)from; + return to; + case 2: + *(unsigned short *)to = *(const unsigned short *)from; + return to; + case 3: + *(unsigned short *)to = *(const unsigned short *)from; + *(2+(unsigned char *)to) = *(2+(const unsigned char *)from); + return to; + case 4: + *(unsigned long *)to = *(const unsigned long *)from; + return to; + case 6: /* for Ethernet addresses */ + *(unsigned long *)to = *(const unsigned long *)from; + *(2+(unsigned short *)to) = *(2+(const unsigned short *)from); + return to; + case 8: + *(unsigned long *)to = *(const unsigned long *)from; + *(1+(unsigned long *)to) = *(1+(const unsigned long *)from); + return to; + case 12: + *(unsigned long *)to = *(const unsigned long *)from; + *(1+(unsigned long *)to) = *(1+(const unsigned long *)from); + *(2+(unsigned long *)to) = *(2+(const unsigned long *)from); + return to; + case 16: + *(unsigned long *)to = *(const unsigned long *)from; + *(1+(unsigned long *)to) = *(1+(const unsigned long *)from); + *(2+(unsigned long *)to) = *(2+(const unsigned long *)from); + *(3+(unsigned long *)to) = *(3+(const unsigned long *)from); + return to; + case 20: + *(unsigned long *)to = *(const unsigned long *)from; + *(1+(unsigned long *)to) = *(1+(const unsigned long *)from); + *(2+(unsigned long *)to) = *(2+(const unsigned long *)from); + *(3+(unsigned long *)to) = *(3+(const unsigned long *)from); + *(4+(unsigned long *)to) = *(4+(const unsigned long *)from); + return to; + } +#define COMMON(x) \ +__asm__ __volatile__( \ + "rep ; movsl" \ + x \ + : "=&c" (d0), "=&D" (d1), "=&S" (d2) \ + : "0" (n/4),"1" ((long) to),"2" ((long) from) \ + : "memory"); +{ + int d0, d1, d2; + switch (n % 4) { + case 0: COMMON(""); return to; + case 1: COMMON("\n\tmovsb"); return to; + case 2: COMMON("\n\tmovsw"); return to; + default: COMMON("\n\tmovsw\n\tmovsb"); return to; + } +} + +#undef COMMON +} + +#define __HAVE_ARCH_MEMCPY + +#ifdef CONFIG_X86_USE_3DNOW + +#include + +/* + * This CPU favours 3DNow strongly (eg AMD Athlon) + */ + +static inline void * __constant_memcpy3d(void * to, const void * from, size_t len) +{ + if (len < 512) + return __constant_memcpy(to, from, len); + return _mmx_memcpy(to, from, len); +} + +static __inline__ void *__memcpy3d(void *to, const void *from, size_t len) +{ + if (len < 512) + return __memcpy(to, from, len); + return _mmx_memcpy(to, from, len); +} + +#define memcpy(t, f, n) \ +(__builtin_constant_p(n) ? \ + __constant_memcpy3d((t),(f),(n)) : \ + __memcpy3d((t),(f),(n))) + +#else + +/* + * No 3D Now! + */ + +#define memcpy(t, f, n) \ +(__builtin_constant_p(n) ? \ + __constant_memcpy((t),(f),(n)) : \ + __memcpy((t),(f),(n))) + +#endif + +/* + * struct_cpy(x,y), copy structure *x into (matching structure) *y. + * + * We get link-time errors if the structure sizes do not match. + * There is no runtime overhead, it's all optimized away at + * compile time. + */ +extern void __struct_cpy_bug (void); + +#define struct_cpy(x,y) \ +({ \ + if (sizeof(*(x)) != sizeof(*(y))) \ + __struct_cpy_bug; \ + memcpy(x, y, sizeof(*(x))); \ +}) + +#define __HAVE_ARCH_MEMMOVE +static inline void * memmove(void * dest,const void * src, size_t n) +{ +int d0, d1, d2; +if (dest +#include +#include +#include +#include +#include /* for LOCK_PREFIX */ + +#ifdef __KERNEL__ + +struct task_struct; /* one of the stranger aspects of C forward declarations.. */ +extern void FASTCALL(__switch_to(struct task_struct *prev, struct task_struct *next)); + +#define prepare_to_switch() do { } while(0) +#define switch_to(prev,next,last) do { \ + asm volatile("pushl %%esi\n\t" \ + "pushl %%edi\n\t" \ + "pushl %%ebp\n\t" \ + "movl %%esp,%0\n\t" /* save ESP */ \ + "movl %3,%%esp\n\t" /* restore ESP */ \ + "movl $1f,%1\n\t" /* save EIP */ \ + "pushl %4\n\t" /* restore EIP */ \ + "jmp __switch_to\n" \ + "1:\t" \ + "popl %%ebp\n\t" \ + "popl %%edi\n\t" \ + "popl %%esi\n\t" \ + :"=m" (prev->thread.esp),"=m" (prev->thread.eip), \ + "=b" (last) \ + :"m" (next->thread.esp),"m" (next->thread.eip), \ + "a" (prev), "d" (next), \ + "b" (prev)); \ +} while (0) + +#define _set_base(addr,base) do { unsigned long __pr; \ +__asm__ __volatile__ ("movw %%dx,%1\n\t" \ + "rorl $16,%%edx\n\t" \ + "movb %%dl,%2\n\t" \ + "movb %%dh,%3" \ + :"=&d" (__pr) \ + :"m" (*((addr)+2)), \ + "m" (*((addr)+4)), \ + "m" (*((addr)+7)), \ + "0" (base) \ + ); } while(0) + +#define _set_limit(addr,limit) do { unsigned long __lr; \ +__asm__ __volatile__ ("movw %%dx,%1\n\t" \ + "rorl $16,%%edx\n\t" \ + "movb %2,%%dh\n\t" \ + "andb $0xf0,%%dh\n\t" \ + "orb %%dh,%%dl\n\t" \ + "movb %%dl,%2" \ + :"=&d" (__lr) \ + :"m" (*(addr)), \ + "m" (*((addr)+6)), \ + "0" (limit) \ + ); } while(0) + +#define set_base(ldt,base) _set_base( ((char *)&(ldt)) , (base) ) +#define set_limit(ldt,limit) _set_limit( ((char *)&(ldt)) , ((limit)-1)>>12 ) + +static inline unsigned long _get_base(char * addr) +{ + unsigned long __base; + __asm__("movb %3,%%dh\n\t" + "movb %2,%%dl\n\t" + "shll $16,%%edx\n\t" + "movw %1,%%dx" + :"=&d" (__base) + :"m" (*((addr)+2)), + "m" (*((addr)+4)), + "m" (*((addr)+7))); + return __base; +} + +#define get_base(ldt) _get_base( ((char *)&(ldt)) ) + +/* + * Load a segment. Fall back on loading the zero + * segment if something goes wrong.. + */ +#define loadsegment(seg,value) \ + asm volatile("\n" \ + "1:\t" \ + "movl %0,%%" #seg "\n" \ + "2:\n" \ + ".section .fixup,\"ax\"\n" \ + "3:\t" \ + "pushl $0\n\t" \ + "popl %%" #seg "\n\t" \ + "jmp 2b\n" \ + ".previous\n" \ + ".section __ex_table,\"a\"\n\t" \ + ".align 4\n\t" \ + ".long 1b,3b\n" \ + ".previous" \ + : :"m" (*(unsigned int *)&(value))) + +#define clts() ((void)0) +#define read_cr0() ({ \ + unsigned int __dummy; \ + __asm__( \ + "movl %%cr0,%0\n\t" \ + :"=r" (__dummy)); \ + __dummy; \ +}) +#define write_cr0(x) \ + __asm__("movl %0,%%cr0": :"r" (x)); + +#define read_cr4() ({ \ + unsigned int __dummy; \ + __asm__( \ + "movl %%cr4,%0\n\t" \ + :"=r" (__dummy)); \ + __dummy; \ +}) +#define write_cr4(x) \ + __asm__("movl %0,%%cr4": :"r" (x)); +#define stts() (HYPERVISOR_fpu_taskswitch()) + +#endif /* __KERNEL__ */ + +#define wbinvd() \ + __asm__ __volatile__ ("wbinvd": : :"memory"); + +static inline unsigned long get_limit(unsigned long segment) +{ + unsigned long __limit; + __asm__("lsll %1,%0" + :"=r" (__limit):"r" (segment)); + return __limit+1; +} + +#define nop() __asm__ __volatile__ ("nop") + +#define xchg(ptr,v) ((__typeof__(*(ptr)))__xchg((unsigned long)(v),(ptr),sizeof(*(ptr)))) + +#define tas(ptr) (xchg((ptr),1)) + +struct __xchg_dummy { unsigned long a[100]; }; +#define __xg(x) ((struct __xchg_dummy *)(x)) + + +/* + * The semantics of XCHGCMP8B are a bit strange, this is why + * there is a loop and the loading of %%eax and %%edx has to + * be inside. This inlines well in most cases, the cached + * cost is around ~38 cycles. (in the future we might want + * to do an SIMD/3DNOW!/MMX/FPU 64-bit store here, but that + * might have an implicit FPU-save as a cost, so it's not + * clear which path to go.) + */ +static inline void __set_64bit (unsigned long long * ptr, + unsigned int low, unsigned int high) +{ + __asm__ __volatile__ ( + "\n1:\t" + "movl (%0), %%eax\n\t" + "movl 4(%0), %%edx\n\t" + "cmpxchg8b (%0)\n\t" + "jnz 1b" + : /* no outputs */ + : "D"(ptr), + "b"(low), + "c"(high) + : "ax","dx","memory"); +} + +static inline void __set_64bit_constant (unsigned long long *ptr, + unsigned long long value) +{ + __set_64bit(ptr,(unsigned int)(value), (unsigned int)((value)>>32ULL)); +} +#define ll_low(x) *(((unsigned int*)&(x))+0) +#define ll_high(x) *(((unsigned int*)&(x))+1) + +static inline void __set_64bit_var (unsigned long long *ptr, + unsigned long long value) +{ + __set_64bit(ptr,ll_low(value), ll_high(value)); +} + +#define set_64bit(ptr,value) \ +(__builtin_constant_p(value) ? \ + __set_64bit_constant(ptr, value) : \ + __set_64bit_var(ptr, value) ) + +#define _set_64bit(ptr,value) \ +(__builtin_constant_p(value) ? \ + __set_64bit(ptr, (unsigned int)(value), (unsigned int)((value)>>32ULL) ) : \ + __set_64bit(ptr, ll_low(value), ll_high(value)) ) + +/* + * Note: no "lock" prefix even on SMP: xchg always implies lock anyway + * Note 2: xchg has side effect, so that attribute volatile is necessary, + * but generally the primitive is invalid, *ptr is output argument. --ANK + */ +static inline unsigned long __xchg(unsigned long x, volatile void * ptr, int size) +{ + switch (size) { + case 1: + __asm__ __volatile__("xchgb %b0,%1" + :"=q" (x) + :"m" (*__xg(ptr)), "0" (x) + :"memory"); + break; + case 2: + __asm__ __volatile__("xchgw %w0,%1" + :"=r" (x) + :"m" (*__xg(ptr)), "0" (x) + :"memory"); + break; + case 4: + __asm__ __volatile__("xchgl %0,%1" + :"=r" (x) + :"m" (*__xg(ptr)), "0" (x) + :"memory"); + break; + } + return x; +} + +/* + * Atomic compare and exchange. Compare OLD with MEM, if identical, + * store NEW in MEM. Return the initial value in MEM. Success is + * indicated by comparing RETURN with OLD. + */ + +#ifdef CONFIG_X86_CMPXCHG +#define __HAVE_ARCH_CMPXCHG 1 + +static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old, + unsigned long new, int size) +{ + unsigned long prev; + switch (size) { + case 1: + __asm__ __volatile__(LOCK_PREFIX "cmpxchgb %b1,%2" + : "=a"(prev) + : "q"(new), "m"(*__xg(ptr)), "0"(old) + : "memory"); + return prev; + case 2: + __asm__ __volatile__(LOCK_PREFIX "cmpxchgw %w1,%2" + : "=a"(prev) + : "q"(new), "m"(*__xg(ptr)), "0"(old) + : "memory"); + return prev; + case 4: + __asm__ __volatile__(LOCK_PREFIX "cmpxchgl %1,%2" + : "=a"(prev) + : "q"(new), "m"(*__xg(ptr)), "0"(old) + : "memory"); + return prev; + } + return old; +} + +#define cmpxchg(ptr,o,n)\ + ((__typeof__(*(ptr)))__cmpxchg((ptr),(unsigned long)(o),\ + (unsigned long)(n),sizeof(*(ptr)))) + +#else +/* Compiling for a 386 proper. Is it worth implementing via cli/sti? */ +#endif + +/* + * Force strict CPU ordering. + * And yes, this is required on UP too when we're talking + * to devices. + * + * For now, "wmb()" doesn't actually do anything, as all + * Intel CPU's follow what Intel calls a *Processor Order*, + * in which all writes are seen in the program order even + * outside the CPU. + * + * I expect future Intel CPU's to have a weaker ordering, + * but I'd also expect them to finally get their act together + * and add some real memory barriers if so. + * + * Some non intel clones support out of order store. wmb() ceases to be a + * nop for these. + */ + +#define mb() __asm__ __volatile__ ("lock; addl $0,0(%%esp)": : :"memory") +#define rmb() mb() + +#ifdef CONFIG_X86_OOSTORE +#define wmb() __asm__ __volatile__ ("lock; addl $0,0(%%esp)": : :"memory") +#else +#define wmb() __asm__ __volatile__ ("": : :"memory") +#endif + +#ifdef CONFIG_SMP +#define smp_mb() mb() +#define smp_rmb() rmb() +#define smp_wmb() wmb() +#else +#define smp_mb() barrier() +#define smp_rmb() barrier() +#define smp_wmb() barrier() +#endif + +#define set_mb(var, value) do { xchg(&var, value); } while (0) +#define set_wmb(var, value) do { var = value; wmb(); } while (0) + +#define __save_flags(x) ((x) = HYPERVISOR_shared_info->events_enable); barrier() +#define __restore_flags(x) \ +do { \ + shared_info_t *_shared = HYPERVISOR_shared_info; \ + _shared->events_enable = (x); \ + barrier(); \ + if ( _shared->events && (x) ) do_hypervisor_callback(NULL); \ +} while (0) +#define __cli() (HYPERVISOR_shared_info->events_enable = 0); barrier() +#define __sti() \ +do { \ + shared_info_t *_shared = HYPERVISOR_shared_info; \ + _shared->events_enable = 1; \ + barrier(); \ + if ( _shared->events ) do_hypervisor_callback(NULL); \ +} while (0) +#define safe_halt() ((void)0) + +/* For spinlocks etc */ +#define local_irq_save(x) ((x) = HYPERVISOR_shared_info->events_enable); (HYPERVISOR_shared_info->events_enable = 0); barrier() +#define local_irq_restore(x) __restore_flags(x) +#define local_irq_disable() __cli() +#define local_irq_enable() __sti() + +#ifdef CONFIG_SMP + +extern void __global_cli(void); +extern void __global_sti(void); +extern unsigned long __global_save_flags(void); +extern void __global_restore_flags(unsigned long); +#define cli() __global_cli() +#define sti() __global_sti() +#define save_flags(x) ((x)=__global_save_flags()) +#define restore_flags(x) __global_restore_flags(x) + +#else + +#define cli() __cli() +#define sti() __sti() +#define save_flags(x) __save_flags(x) +#define restore_flags(x) __restore_flags(x) + +#endif + +/* + * disable hlt during certain critical i/o operations + */ +#define HAVE_DISABLE_HLT +void disable_hlt(void); +void enable_hlt(void); + +extern unsigned long dmi_broken; +extern int is_sony_vaio_laptop; + +#define BROKEN_ACPI_Sx 0x0001 +#define BROKEN_INIT_AFTER_S1 0x0002 + +#endif diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/termbits.h b/xenolinux-2.4.16-sparse/include/asm-xeno/termbits.h new file mode 100644 index 0000000000..5ccd7d8f07 --- /dev/null +++ b/xenolinux-2.4.16-sparse/include/asm-xeno/termbits.h @@ -0,0 +1,172 @@ +#ifndef __ARCH_I386_TERMBITS_H__ +#define __ARCH_I386_TERMBITS_H__ + +#include + +typedef unsigned char cc_t; +typedef unsigned int speed_t; +typedef unsigned int tcflag_t; + +#define NCCS 19 +struct termios { + tcflag_t c_iflag; /* input mode flags */ + tcflag_t c_oflag; /* output mode flags */ + tcflag_t c_cflag; /* control mode flags */ + tcflag_t c_lflag; /* local mode flags */ + cc_t c_line; /* line discipline */ + cc_t c_cc[NCCS]; /* control characters */ +}; + +/* c_cc characters */ +#define VINTR 0 +#define VQUIT 1 +#define VERASE 2 +#define VKILL 3 +#define VEOF 4 +#define VTIME 5 +#define VMIN 6 +#define VSWTC 7 +#define VSTART 8 +#define VSTOP 9 +#define VSUSP 10 +#define VEOL 11 +#define VREPRINT 12 +#define VDISCARD 13 +#define VWERASE 14 +#define VLNEXT 15 +#define VEOL2 16 + +/* c_iflag bits */ +#define IGNBRK 0000001 +#define BRKINT 0000002 +#define IGNPAR 0000004 +#define PARMRK 0000010 +#define INPCK 0000020 +#define ISTRIP 0000040 +#define INLCR 0000100 +#define IGNCR 0000200 +#define ICRNL 0000400 +#define IUCLC 0001000 +#define IXON 0002000 +#define IXANY 0004000 +#define IXOFF 0010000 +#define IMAXBEL 0020000 + +/* c_oflag bits */ +#define OPOST 0000001 +#define OLCUC 0000002 +#define ONLCR 0000004 +#define OCRNL 0000010 +#define ONOCR 0000020 +#define ONLRET 0000040 +#define OFILL 0000100 +#define OFDEL 0000200 +#define NLDLY 0000400 +#define NL0 0000000 +#define NL1 0000400 +#define CRDLY 0003000 +#define CR0 0000000 +#define CR1 0001000 +#define CR2 0002000 +#define CR3 0003000 +#define TABDLY 0014000 +#define TAB0 0000000 +#define TAB1 0004000 +#define TAB2 0010000 +#define TAB3 0014000 +#define XTABS 0014000 +#define BSDLY 0020000 +#define BS0 0000000 +#define BS1 0020000 +#define VTDLY 0040000 +#define VT0 0000000 +#define VT1 0040000 +#define FFDLY 0100000 +#define FF0 0000000 +#define FF1 0100000 + +/* c_cflag bit meaning */ +#define CBAUD 0010017 +#define B0 0000000 /* hang up */ +#define B50 0000001 +#define B75 0000002 +#define B110 0000003 +#define B134 0000004 +#define B150 0000005 +#define B200 0000006 +#define B300 0000007 +#define B600 0000010 +#define B1200 0000011 +#define B1800 0000012 +#define B2400 0000013 +#define B4800 0000014 +#define B9600 0000015 +#define B19200 0000016 +#define B38400 0000017 +#define EXTA B19200 +#define EXTB B38400 +#define CSIZE 0000060 +#define CS5 0000000 +#define CS6 0000020 +#define CS7 0000040 +#define CS8 0000060 +#define CSTOPB 0000100 +#define CREAD 0000200 +#define PARENB 0000400 +#define PARODD 0001000 +#define HUPCL 0002000 +#define CLOCAL 0004000 +#define CBAUDEX 0010000 +#define B57600 0010001 +#define B115200 0010002 +#define B230400 0010003 +#define B460800 0010004 +#define B500000 0010005 +#define B576000 0010006 +#define B921600 0010007 +#define B1000000 0010010 +#define B1152000 0010011 +#define B1500000 0010012 +#define B2000000 0010013 +#define B2500000 0010014 +#define B3000000 0010015 +#define B3500000 0010016 +#define B4000000 0010017 +#define CIBAUD 002003600000 /* input baud rate (not used) */ +#define CMSPAR 010000000000 /* mark or space (stick) parity */ +#define CRTSCTS 020000000000 /* flow control */ + +/* c_lflag bits */ +#define ISIG 0000001 +#define ICANON 0000002 +#define XCASE 0000004 +#define ECHO 0000010 +#define ECHOE 0000020 +#define ECHOK 0000040 +#define ECHONL 0000100 +#define NOFLSH 0000200 +#define TOSTOP 0000400 +#define ECHOCTL 0001000 +#define ECHOPRT 0002000 +#define ECHOKE 0004000 +#define FLUSHO 0010000 +#define PENDIN 0040000 +#define IEXTEN 0100000 + +/* tcflow() and TCXONC use these */ +#define TCOOFF 0 +#define TCOON 1 +#define TCIOFF 2 +#define TCION 3 + +/* tcflush() and TCFLSH use these */ +#define TCIFLUSH 0 +#define TCOFLUSH 1 +#define TCIOFLUSH 2 + +/* tcsetattr uses these */ +#define TCSANOW 0 +#define TCSADRAIN 1 +#define TCSAFLUSH 2 + +#endif diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/termios.h b/xenolinux-2.4.16-sparse/include/asm-xeno/termios.h new file mode 100644 index 0000000000..c4cc5c8168 --- /dev/null +++ b/xenolinux-2.4.16-sparse/include/asm-xeno/termios.h @@ -0,0 +1,106 @@ +#ifndef _I386_TERMIOS_H +#define _I386_TERMIOS_H + +#include +#include + +struct winsize { + unsigned short ws_row; + unsigned short ws_col; + unsigned short ws_xpixel; + unsigned short ws_ypixel; +}; + +#define NCC 8 +struct termio { + unsigned short c_iflag; /* input mode flags */ + unsigned short c_oflag; /* output mode flags */ + unsigned short c_cflag; /* control mode flags */ + unsigned short c_lflag; /* local mode flags */ + unsigned char c_line; /* line discipline */ + unsigned char c_cc[NCC]; /* control characters */ +}; + +/* modem lines */ +#define TIOCM_LE 0x001 +#define TIOCM_DTR 0x002 +#define TIOCM_RTS 0x004 +#define TIOCM_ST 0x008 +#define TIOCM_SR 0x010 +#define TIOCM_CTS 0x020 +#define TIOCM_CAR 0x040 +#define TIOCM_RNG 0x080 +#define TIOCM_DSR 0x100 +#define TIOCM_CD TIOCM_CAR +#define TIOCM_RI TIOCM_RNG +#define TIOCM_OUT1 0x2000 +#define TIOCM_OUT2 0x4000 +#define TIOCM_LOOP 0x8000 + +/* ioctl (fd, TIOCSERGETLSR, &result) where result may be as below */ + +/* line disciplines */ +#define N_TTY 0 +#define N_SLIP 1 +#define N_MOUSE 2 +#define N_PPP 3 +#define N_STRIP 4 +#define N_AX25 5 +#define N_X25 6 /* X.25 async */ +#define N_6PACK 7 +#define N_MASC 8 /* Reserved for Mobitex module */ +#define N_R3964 9 /* Reserved for Simatic R3964 module */ +#define N_PROFIBUS_FDL 10 /* Reserved for Profibus */ +#define N_IRDA 11 /* Linux IR - http://irda.sourceforge.net/ */ +#define N_SMSBLOCK 12 /* SMS block mode - for talking to GSM data cards about SMS messages */ +#define N_HDLC 13 /* synchronous HDLC */ +#define N_SYNC_PPP 14 /* synchronous PPP */ +#define N_HCI 15 /* Bluetooth HCI UART */ + +#ifdef __KERNEL__ + +/* intr=^C quit=^\ erase=del kill=^U + eof=^D vtime=\0 vmin=\1 sxtc=\0 + start=^Q stop=^S susp=^Z eol=\0 + reprint=^R discard=^U werase=^W lnext=^V + eol2=\0 +*/ +#define INIT_C_CC "\003\034\177\025\004\0\1\0\021\023\032\0\022\017\027\026\0" + +/* + * Translate a "termio" structure into a "termios". Ugh. + */ +#define SET_LOW_TERMIOS_BITS(termios, termio, x) { \ + unsigned short __tmp; \ + get_user(__tmp,&(termio)->x); \ + *(unsigned short *) &(termios)->x = __tmp; \ +} + +#define user_termio_to_kernel_termios(termios, termio) \ +({ \ + SET_LOW_TERMIOS_BITS(termios, termio, c_iflag); \ + SET_LOW_TERMIOS_BITS(termios, termio, c_oflag); \ + SET_LOW_TERMIOS_BITS(termios, termio, c_cflag); \ + SET_LOW_TERMIOS_BITS(termios, termio, c_lflag); \ + copy_from_user((termios)->c_cc, (termio)->c_cc, NCC); \ +}) + +/* + * Translate a "termios" structure into a "termio". Ugh. + */ +#define kernel_termios_to_user_termio(termio, termios) \ +({ \ + put_user((termios)->c_iflag, &(termio)->c_iflag); \ + put_user((termios)->c_oflag, &(termio)->c_oflag); \ + put_user((termios)->c_cflag, &(termio)->c_cflag); \ + put_user((termios)->c_lflag, &(termio)->c_lflag); \ + put_user((termios)->c_line, &(termio)->c_line); \ + copy_to_user((termio)->c_cc, (termios)->c_cc, NCC); \ +}) + +#define user_termios_to_kernel_termios(k, u) copy_from_user(k, u, sizeof(struct termios)) +#define kernel_termios_to_user_termios(u, k) copy_to_user(u, k, sizeof(struct termios)) + +#endif /* __KERNEL__ */ + +#endif /* _I386_TERMIOS_H */ diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/timex.h b/xenolinux-2.4.16-sparse/include/asm-xeno/timex.h new file mode 100644 index 0000000000..97099dd0d4 --- /dev/null +++ b/xenolinux-2.4.16-sparse/include/asm-xeno/timex.h @@ -0,0 +1,50 @@ +/* + * linux/include/asm-i386/timex.h + * + * i386 architecture timex specifications + */ +#ifndef _ASMi386_TIMEX_H +#define _ASMi386_TIMEX_H + +#include +#include + +#define CLOCK_TICK_RATE 1193180 /* Underlying HZ */ +#define CLOCK_TICK_FACTOR 20 /* Factor of both 1000000 and CLOCK_TICK_RATE */ +#define FINETUNE ((((((long)LATCH * HZ - CLOCK_TICK_RATE) << SHIFT_HZ) * \ + (1000000/CLOCK_TICK_FACTOR) / (CLOCK_TICK_RATE/CLOCK_TICK_FACTOR)) \ + << (SHIFT_SCALE-SHIFT_HZ)) / HZ) + +/* + * Standard way to access the cycle counter on i586+ CPUs. + * Currently only used on SMP. + * + * If you really have a SMP machine with i486 chips or older, + * compile for that, and this will just always return zero. + * That's ok, it just means that the nicer scheduling heuristics + * won't work for you. + * + * We only use the low 32 bits, and we'd simply better make sure + * that we reschedule before that wraps. Scheduling at least every + * four billion cycles just basically sounds like a good idea, + * regardless of how fast the machine is. + */ +typedef unsigned long long cycles_t; + +extern cycles_t cacheflush_time; + +static inline cycles_t get_cycles (void) +{ +#ifndef CONFIG_X86_TSC + return 0; +#else + unsigned long long ret; + + rdtscll(ret); + return ret; +#endif +} + +extern unsigned long cpu_khz; + +#endif diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/tlb.h b/xenolinux-2.4.16-sparse/include/asm-xeno/tlb.h new file mode 100644 index 0000000000..69c0faa931 --- /dev/null +++ b/xenolinux-2.4.16-sparse/include/asm-xeno/tlb.h @@ -0,0 +1 @@ +#include diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/types.h b/xenolinux-2.4.16-sparse/include/asm-xeno/types.h new file mode 100644 index 0000000000..238635a0a2 --- /dev/null +++ b/xenolinux-2.4.16-sparse/include/asm-xeno/types.h @@ -0,0 +1,57 @@ +#ifndef _I386_TYPES_H +#define _I386_TYPES_H + +typedef unsigned short umode_t; + +/* + * __xx is ok: it doesn't pollute the POSIX namespace. Use these in the + * header files exported to user space + */ + +typedef __signed__ char __s8; +typedef unsigned char __u8; + +typedef __signed__ short __s16; +typedef unsigned short __u16; + +typedef __signed__ int __s32; +typedef unsigned int __u32; + +#if defined(__GNUC__) && !defined(__STRICT_ANSI__) +typedef __signed__ long long __s64; +typedef unsigned long long __u64; +#endif + +/* + * These aren't exported outside the kernel to avoid name space clashes + */ +#ifdef __KERNEL__ + +#include + +typedef signed char s8; +typedef unsigned char u8; + +typedef signed short s16; +typedef unsigned short u16; + +typedef signed int s32; +typedef unsigned int u32; + +typedef signed long long s64; +typedef unsigned long long u64; + +#define BITS_PER_LONG 32 + +/* DMA addresses come in generic and 64-bit flavours. */ + +#ifdef CONFIG_HIGHMEM +typedef u64 dma_addr_t; +#else +typedef u32 dma_addr_t; +#endif +typedef u64 dma64_addr_t; + +#endif /* __KERNEL__ */ + +#endif diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/uaccess.h b/xenolinux-2.4.16-sparse/include/asm-xeno/uaccess.h new file mode 100644 index 0000000000..ecc6693352 --- /dev/null +++ b/xenolinux-2.4.16-sparse/include/asm-xeno/uaccess.h @@ -0,0 +1,606 @@ +#ifndef __i386_UACCESS_H +#define __i386_UACCESS_H + +/* + * User space memory access functions + */ +#include +#include +#include +#include + +#define VERIFY_READ 0 +#define VERIFY_WRITE 1 + +/* + * The fs value determines whether argument validity checking should be + * performed or not. If get_fs() == USER_DS, checking is performed, with + * get_fs() == KERNEL_DS, checking is bypassed. + * + * For historical reasons, these macros are grossly misnamed. + */ + +#define MAKE_MM_SEG(s) ((mm_segment_t) { (s) }) + + +#define KERNEL_DS MAKE_MM_SEG(0xFFFFFFFF) +#define USER_DS MAKE_MM_SEG(PAGE_OFFSET) + +#define get_ds() (KERNEL_DS) +#define get_fs() (current->addr_limit) +#define set_fs(x) (current->addr_limit = (x)) + +#define segment_eq(a,b) ((a).seg == (b).seg) + +extern int __verify_write(const void *, unsigned long); + +#define __addr_ok(addr) ((unsigned long)(addr) < (current->addr_limit.seg)) + +/* + * Uhhuh, this needs 33-bit arithmetic. We have a carry.. + */ +#define __range_ok(addr,size) ({ \ + unsigned long flag,sum; \ + asm("addl %3,%1 ; sbbl %0,%0; cmpl %1,%4; sbbl $0,%0" \ + :"=&r" (flag), "=r" (sum) \ + :"1" (addr),"g" ((int)(size)),"g" (current->addr_limit.seg)); \ + flag; }) + +#ifdef CONFIG_X86_WP_WORKS_OK + +#define access_ok(type,addr,size) (__range_ok(addr,size) == 0) + +#else + +#define access_ok(type,addr,size) ( (__range_ok(addr,size) == 0) && \ + ((type) == VERIFY_READ || boot_cpu_data.wp_works_ok || \ + segment_eq(get_fs(),KERNEL_DS) || \ + __verify_write((void *)(addr),(size)))) + +#endif + +static inline int verify_area(int type, const void * addr, unsigned long size) +{ + return access_ok(type,addr,size) ? 0 : -EFAULT; +} + + +/* + * The exception table consists of pairs of addresses: the first is the + * address of an instruction that is allowed to fault, and the second is + * the address at which the program should continue. No registers are + * modified, so it is entirely up to the continuation code to figure out + * what to do. + * + * All the routines below use bits of fixup code that are out of line + * with the main instruction path. This means when everything is well, + * we don't even have to jump over them. Further, they do not intrude + * on our cache or tlb entries. + */ + +struct exception_table_entry +{ + unsigned long insn, fixup; +}; + +/* Returns 0 if exception not found and fixup otherwise. */ +extern unsigned long search_exception_table(unsigned long); + + +/* + * These are the main single-value transfer routines. They automatically + * use the right size if we just have the right pointer type. + * + * This gets kind of ugly. We want to return _two_ values in "get_user()" + * and yet we don't want to do any pointers, because that is too much + * of a performance impact. Thus we have a few rather ugly macros here, + * and hide all the uglyness from the user. + * + * The "__xxx" versions of the user access functions are versions that + * do not verify the address space, that must have been done previously + * with a separate "access_ok()" call (this is used when we do multiple + * accesses to the same area of user memory). + */ + +extern void __get_user_1(void); +extern void __get_user_2(void); +extern void __get_user_4(void); + +#define __get_user_x(size,ret,x,ptr) \ + __asm__ __volatile__("call __get_user_" #size \ + :"=a" (ret),"=d" (x) \ + :"0" (ptr)) + +/* Careful: we have to cast the result to the type of the pointer for sign reasons */ +#define get_user(x,ptr) \ +({ int __ret_gu,__val_gu; \ + switch(sizeof (*(ptr))) { \ + case 1: __get_user_x(1,__ret_gu,__val_gu,ptr); break; \ + case 2: __get_user_x(2,__ret_gu,__val_gu,ptr); break; \ + case 4: __get_user_x(4,__ret_gu,__val_gu,ptr); break; \ + default: __get_user_x(X,__ret_gu,__val_gu,ptr); break; \ + } \ + (x) = (__typeof__(*(ptr)))__val_gu; \ + __ret_gu; \ +}) + +extern void __put_user_1(void); +extern void __put_user_2(void); +extern void __put_user_4(void); +extern void __put_user_8(void); + +extern void __put_user_bad(void); + +#define put_user(x,ptr) \ + __put_user_check((__typeof__(*(ptr)))(x),(ptr),sizeof(*(ptr))) + +#define __get_user(x,ptr) \ + __get_user_nocheck((x),(ptr),sizeof(*(ptr))) +#define __put_user(x,ptr) \ + __put_user_nocheck((__typeof__(*(ptr)))(x),(ptr),sizeof(*(ptr))) + +#define __put_user_nocheck(x,ptr,size) \ +({ \ + long __pu_err; \ + __put_user_size((x),(ptr),(size),__pu_err); \ + __pu_err; \ +}) + + +#define __put_user_check(x,ptr,size) \ +({ \ + long __pu_err = -EFAULT; \ + __typeof__(*(ptr)) *__pu_addr = (ptr); \ + if (access_ok(VERIFY_WRITE,__pu_addr,size)) \ + __put_user_size((x),__pu_addr,(size),__pu_err); \ + __pu_err; \ +}) + +#define __put_user_u64(x, addr, err) \ + __asm__ __volatile__( \ + "1: movl %%eax,0(%2)\n" \ + "2: movl %%edx,4(%2)\n" \ + "3:\n" \ + ".section .fixup,\"ax\"\n" \ + "4: movl %3,%0\n" \ + " jmp 3b\n" \ + ".previous\n" \ + ".section __ex_table,\"a\"\n" \ + " .align 4\n" \ + " .long 1b,4b\n" \ + " .long 2b,4b\n" \ + ".previous" \ + : "=r"(err) \ + : "A" (x), "r" (addr), "i"(-EFAULT), "0"(err)) + +#define __put_user_size(x,ptr,size,retval) \ +do { \ + retval = 0; \ + switch (size) { \ + case 1: __put_user_asm(x,ptr,retval,"b","b","iq"); break; \ + case 2: __put_user_asm(x,ptr,retval,"w","w","ir"); break; \ + case 4: __put_user_asm(x,ptr,retval,"l","","ir"); break; \ + case 8: __put_user_u64(x,ptr,retval); break; \ + default: __put_user_bad(); \ + } \ +} while (0) + +struct __large_struct { unsigned long buf[100]; }; +#define __m(x) (*(struct __large_struct *)(x)) + +/* + * Tell gcc we read from memory instead of writing: this is because + * we do not write to any memory gcc knows about, so there are no + * aliasing issues. + */ +#define __put_user_asm(x, addr, err, itype, rtype, ltype) \ + __asm__ __volatile__( \ + "1: mov"itype" %"rtype"1,%2\n" \ + "2:\n" \ + ".section .fixup,\"ax\"\n" \ + "3: movl %3,%0\n" \ + " jmp 2b\n" \ + ".previous\n" \ + ".section __ex_table,\"a\"\n" \ + " .align 4\n" \ + " .long 1b,3b\n" \ + ".previous" \ + : "=r"(err) \ + : ltype (x), "m"(__m(addr)), "i"(-EFAULT), "0"(err)) + + +#define __get_user_nocheck(x,ptr,size) \ +({ \ + long __gu_err, __gu_val; \ + __get_user_size(__gu_val,(ptr),(size),__gu_err); \ + (x) = (__typeof__(*(ptr)))__gu_val; \ + __gu_err; \ +}) + +extern long __get_user_bad(void); + +#define __get_user_size(x,ptr,size,retval) \ +do { \ + retval = 0; \ + switch (size) { \ + case 1: __get_user_asm(x,ptr,retval,"b","b","=q"); break; \ + case 2: __get_user_asm(x,ptr,retval,"w","w","=r"); break; \ + case 4: __get_user_asm(x,ptr,retval,"l","","=r"); break; \ + default: (x) = __get_user_bad(); \ + } \ +} while (0) + +#define __get_user_asm(x, addr, err, itype, rtype, ltype) \ + __asm__ __volatile__( \ + "1: mov"itype" %2,%"rtype"1\n" \ + "2:\n" \ + ".section .fixup,\"ax\"\n" \ + "3: movl %3,%0\n" \ + " xor"itype" %"rtype"1,%"rtype"1\n" \ + " jmp 2b\n" \ + ".previous\n" \ + ".section __ex_table,\"a\"\n" \ + " .align 4\n" \ + " .long 1b,3b\n" \ + ".previous" \ + : "=r"(err), ltype (x) \ + : "m"(__m(addr)), "i"(-EFAULT), "0"(err)) + + +/* + * Copy To/From Userspace + */ + +/* Generic arbitrary sized copy. */ +#define __copy_user(to,from,size) \ +do { \ + int __d0, __d1; \ + __asm__ __volatile__( \ + "0: rep; movsl\n" \ + " movl %3,%0\n" \ + "1: rep; movsb\n" \ + "2:\n" \ + ".section .fixup,\"ax\"\n" \ + "3: lea 0(%3,%0,4),%0\n" \ + " jmp 2b\n" \ + ".previous\n" \ + ".section __ex_table,\"a\"\n" \ + " .align 4\n" \ + " .long 0b,3b\n" \ + " .long 1b,2b\n" \ + ".previous" \ + : "=&c"(size), "=&D" (__d0), "=&S" (__d1) \ + : "r"(size & 3), "0"(size / 4), "1"(to), "2"(from) \ + : "memory"); \ +} while (0) + +#define __copy_user_zeroing(to,from,size) \ +do { \ + int __d0, __d1; \ + __asm__ __volatile__( \ + "0: rep; movsl\n" \ + " movl %3,%0\n" \ + "1: rep; movsb\n" \ + "2:\n" \ + ".section .fixup,\"ax\"\n" \ + "3: lea 0(%3,%0,4),%0\n" \ + "4: pushl %0\n" \ + " pushl %%eax\n" \ + " xorl %%eax,%%eax\n" \ + " rep; stosb\n" \ + " popl %%eax\n" \ + " popl %0\n" \ + " jmp 2b\n" \ + ".previous\n" \ + ".section __ex_table,\"a\"\n" \ + " .align 4\n" \ + " .long 0b,3b\n" \ + " .long 1b,4b\n" \ + ".previous" \ + : "=&c"(size), "=&D" (__d0), "=&S" (__d1) \ + : "r"(size & 3), "0"(size / 4), "1"(to), "2"(from) \ + : "memory"); \ +} while (0) + +/* We let the __ versions of copy_from/to_user inline, because they're often + * used in fast paths and have only a small space overhead. + */ +static inline unsigned long +__generic_copy_from_user_nocheck(void *to, const void *from, unsigned long n) +{ + __copy_user_zeroing(to,from,n); + return n; +} + +static inline unsigned long +__generic_copy_to_user_nocheck(void *to, const void *from, unsigned long n) +{ + __copy_user(to,from,n); + return n; +} + + +/* Optimize just a little bit when we know the size of the move. */ +#define __constant_copy_user(to, from, size) \ +do { \ + int __d0, __d1; \ + switch (size & 3) { \ + default: \ + __asm__ __volatile__( \ + "0: rep; movsl\n" \ + "1:\n" \ + ".section .fixup,\"ax\"\n" \ + "2: shl $2,%0\n" \ + " jmp 1b\n" \ + ".previous\n" \ + ".section __ex_table,\"a\"\n" \ + " .align 4\n" \ + " .long 0b,2b\n" \ + ".previous" \ + : "=c"(size), "=&S" (__d0), "=&D" (__d1)\ + : "1"(from), "2"(to), "0"(size/4) \ + : "memory"); \ + break; \ + case 1: \ + __asm__ __volatile__( \ + "0: rep; movsl\n" \ + "1: movsb\n" \ + "2:\n" \ + ".section .fixup,\"ax\"\n" \ + "3: shl $2,%0\n" \ + "4: incl %0\n" \ + " jmp 2b\n" \ + ".previous\n" \ + ".section __ex_table,\"a\"\n" \ + " .align 4\n" \ + " .long 0b,3b\n" \ + " .long 1b,4b\n" \ + ".previous" \ + : "=c"(size), "=&S" (__d0), "=&D" (__d1)\ + : "1"(from), "2"(to), "0"(size/4) \ + : "memory"); \ + break; \ + case 2: \ + __asm__ __volatile__( \ + "0: rep; movsl\n" \ + "1: movsw\n" \ + "2:\n" \ + ".section .fixup,\"ax\"\n" \ + "3: shl $2,%0\n" \ + "4: addl $2,%0\n" \ + " jmp 2b\n" \ + ".previous\n" \ + ".section __ex_table,\"a\"\n" \ + " .align 4\n" \ + " .long 0b,3b\n" \ + " .long 1b,4b\n" \ + ".previous" \ + : "=c"(size), "=&S" (__d0), "=&D" (__d1)\ + : "1"(from), "2"(to), "0"(size/4) \ + : "memory"); \ + break; \ + case 3: \ + __asm__ __volatile__( \ + "0: rep; movsl\n" \ + "1: movsw\n" \ + "2: movsb\n" \ + "3:\n" \ + ".section .fixup,\"ax\"\n" \ + "4: shl $2,%0\n" \ + "5: addl $2,%0\n" \ + "6: incl %0\n" \ + " jmp 3b\n" \ + ".previous\n" \ + ".section __ex_table,\"a\"\n" \ + " .align 4\n" \ + " .long 0b,4b\n" \ + " .long 1b,5b\n" \ + " .long 2b,6b\n" \ + ".previous" \ + : "=c"(size), "=&S" (__d0), "=&D" (__d1)\ + : "1"(from), "2"(to), "0"(size/4) \ + : "memory"); \ + break; \ + } \ +} while (0) + +/* Optimize just a little bit when we know the size of the move. */ +#define __constant_copy_user_zeroing(to, from, size) \ +do { \ + int __d0, __d1; \ + switch (size & 3) { \ + default: \ + __asm__ __volatile__( \ + "0: rep; movsl\n" \ + "1:\n" \ + ".section .fixup,\"ax\"\n" \ + "2: pushl %0\n" \ + " pushl %%eax\n" \ + " xorl %%eax,%%eax\n" \ + " rep; stosl\n" \ + " popl %%eax\n" \ + " popl %0\n" \ + " shl $2,%0\n" \ + " jmp 1b\n" \ + ".previous\n" \ + ".section __ex_table,\"a\"\n" \ + " .align 4\n" \ + " .long 0b,2b\n" \ + ".previous" \ + : "=c"(size), "=&S" (__d0), "=&D" (__d1)\ + : "1"(from), "2"(to), "0"(size/4) \ + : "memory"); \ + break; \ + case 1: \ + __asm__ __volatile__( \ + "0: rep; movsl\n" \ + "1: movsb\n" \ + "2:\n" \ + ".section .fixup,\"ax\"\n" \ + "3: pushl %0\n" \ + " pushl %%eax\n" \ + " xorl %%eax,%%eax\n" \ + " rep; stosl\n" \ + " stosb\n" \ + " popl %%eax\n" \ + " popl %0\n" \ + " shl $2,%0\n" \ + " incl %0\n" \ + " jmp 2b\n" \ + "4: pushl %%eax\n" \ + " xorl %%eax,%%eax\n" \ + " stosb\n" \ + " popl %%eax\n" \ + " incl %0\n" \ + " jmp 2b\n" \ + ".previous\n" \ + ".section __ex_table,\"a\"\n" \ + " .align 4\n" \ + " .long 0b,3b\n" \ + " .long 1b,4b\n" \ + ".previous" \ + : "=c"(size), "=&S" (__d0), "=&D" (__d1)\ + : "1"(from), "2"(to), "0"(size/4) \ + : "memory"); \ + break; \ + case 2: \ + __asm__ __volatile__( \ + "0: rep; movsl\n" \ + "1: movsw\n" \ + "2:\n" \ + ".section .fixup,\"ax\"\n" \ + "3: pushl %0\n" \ + " pushl %%eax\n" \ + " xorl %%eax,%%eax\n" \ + " rep; stosl\n" \ + " stosw\n" \ + " popl %%eax\n" \ + " popl %0\n" \ + " shl $2,%0\n" \ + " addl $2,%0\n" \ + " jmp 2b\n" \ + "4: pushl %%eax\n" \ + " xorl %%eax,%%eax\n" \ + " stosw\n" \ + " popl %%eax\n" \ + " addl $2,%0\n" \ + " jmp 2b\n" \ + ".previous\n" \ + ".section __ex_table,\"a\"\n" \ + " .align 4\n" \ + " .long 0b,3b\n" \ + " .long 1b,4b\n" \ + ".previous" \ + : "=c"(size), "=&S" (__d0), "=&D" (__d1)\ + : "1"(from), "2"(to), "0"(size/4) \ + : "memory"); \ + break; \ + case 3: \ + __asm__ __volatile__( \ + "0: rep; movsl\n" \ + "1: movsw\n" \ + "2: movsb\n" \ + "3:\n" \ + ".section .fixup,\"ax\"\n" \ + "4: pushl %0\n" \ + " pushl %%eax\n" \ + " xorl %%eax,%%eax\n" \ + " rep; stosl\n" \ + " stosw\n" \ + " stosb\n" \ + " popl %%eax\n" \ + " popl %0\n" \ + " shl $2,%0\n" \ + " addl $3,%0\n" \ + " jmp 2b\n" \ + "5: pushl %%eax\n" \ + " xorl %%eax,%%eax\n" \ + " stosw\n" \ + " stosb\n" \ + " popl %%eax\n" \ + " addl $3,%0\n" \ + " jmp 2b\n" \ + "6: pushl %%eax\n" \ + " xorl %%eax,%%eax\n" \ + " stosb\n" \ + " popl %%eax\n" \ + " incl %0\n" \ + " jmp 3b\n" \ + ".previous\n" \ + ".section __ex_table,\"a\"\n" \ + " .align 4\n" \ + " .long 0b,4b\n" \ + " .long 1b,5b\n" \ + " .long 2b,6b\n" \ + ".previous" \ + : "=c"(size), "=&S" (__d0), "=&D" (__d1)\ + : "1"(from), "2"(to), "0"(size/4) \ + : "memory"); \ + break; \ + } \ +} while (0) + +unsigned long __generic_copy_to_user(void *, const void *, unsigned long); +unsigned long __generic_copy_from_user(void *, const void *, unsigned long); + +static inline unsigned long +__constant_copy_to_user(void *to, const void *from, unsigned long n) +{ + prefetch(from); + if (access_ok(VERIFY_WRITE, to, n)) + __constant_copy_user(to,from,n); + return n; +} + +static inline unsigned long +__constant_copy_from_user(void *to, const void *from, unsigned long n) +{ + if (access_ok(VERIFY_READ, from, n)) + __constant_copy_user_zeroing(to,from,n); + else + memset(to, 0, n); + return n; +} + +static inline unsigned long +__constant_copy_to_user_nocheck(void *to, const void *from, unsigned long n) +{ + __constant_copy_user(to,from,n); + return n; +} + +static inline unsigned long +__constant_copy_from_user_nocheck(void *to, const void *from, unsigned long n) +{ + __constant_copy_user_zeroing(to,from,n); + return n; +} + +#define copy_to_user(to,from,n) \ + (__builtin_constant_p(n) ? \ + __constant_copy_to_user((to),(from),(n)) : \ + __generic_copy_to_user((to),(from),(n))) + +#define copy_from_user(to,from,n) \ + (__builtin_constant_p(n) ? \ + __constant_copy_from_user((to),(from),(n)) : \ + __generic_copy_from_user((to),(from),(n))) + +#define __copy_to_user(to,from,n) \ + (__builtin_constant_p(n) ? \ + __constant_copy_to_user_nocheck((to),(from),(n)) : \ + __generic_copy_to_user_nocheck((to),(from),(n))) + +#define __copy_from_user(to,from,n) \ + (__builtin_constant_p(n) ? \ + __constant_copy_from_user_nocheck((to),(from),(n)) : \ + __generic_copy_from_user_nocheck((to),(from),(n))) + +long strncpy_from_user(char *dst, const char *src, long count); +long __strncpy_from_user(char *dst, const char *src, long count); +#define strlen_user(str) strnlen_user(str, ~0UL >> 1) +long strnlen_user(const char *str, long n); +unsigned long clear_user(void *mem, unsigned long len); +unsigned long __clear_user(void *mem, unsigned long len); + +#endif /* __i386_UACCESS_H */ diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/ucontext.h b/xenolinux-2.4.16-sparse/include/asm-xeno/ucontext.h new file mode 100644 index 0000000000..b0db36925f --- /dev/null +++ b/xenolinux-2.4.16-sparse/include/asm-xeno/ucontext.h @@ -0,0 +1,12 @@ +#ifndef _ASMi386_UCONTEXT_H +#define _ASMi386_UCONTEXT_H + +struct ucontext { + unsigned long uc_flags; + struct ucontext *uc_link; + stack_t uc_stack; + struct sigcontext uc_mcontext; + sigset_t uc_sigmask; /* mask last for extensibility */ +}; + +#endif /* !_ASMi386_UCONTEXT_H */ diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/unaligned.h b/xenolinux-2.4.16-sparse/include/asm-xeno/unaligned.h new file mode 100644 index 0000000000..7acd795762 --- /dev/null +++ b/xenolinux-2.4.16-sparse/include/asm-xeno/unaligned.h @@ -0,0 +1,37 @@ +#ifndef __I386_UNALIGNED_H +#define __I386_UNALIGNED_H + +/* + * The i386 can do unaligned accesses itself. + * + * The strange macros are there to make sure these can't + * be misused in a way that makes them not work on other + * architectures where unaligned accesses aren't as simple. + */ + +/** + * get_unaligned - get value from possibly mis-aligned location + * @ptr: pointer to value + * + * This macro should be used for accessing values larger in size than + * single bytes at locations that are expected to be improperly aligned, + * e.g. retrieving a u16 value from a location not u16-aligned. + * + * Note that unaligned accesses can be very expensive on some architectures. + */ +#define get_unaligned(ptr) (*(ptr)) + +/** + * put_unaligned - put value to a possibly mis-aligned location + * @val: value to place + * @ptr: pointer to location + * + * This macro should be used for placing values larger in size than + * single bytes at locations that are expected to be improperly aligned, + * e.g. writing a u16 value to a location not u16-aligned. + * + * Note that unaligned accesses can be very expensive on some architectures. + */ +#define put_unaligned(val, ptr) ((void)( *(ptr) = (val) )) + +#endif diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/unistd.h b/xenolinux-2.4.16-sparse/include/asm-xeno/unistd.h new file mode 100644 index 0000000000..36de103c24 --- /dev/null +++ b/xenolinux-2.4.16-sparse/include/asm-xeno/unistd.h @@ -0,0 +1,359 @@ +#ifndef _ASM_I386_UNISTD_H_ +#define _ASM_I386_UNISTD_H_ + +/* + * This file contains the system call numbers. + */ + +#define __NR_exit 1 +#define __NR_fork 2 +#define __NR_read 3 +#define __NR_write 4 +#define __NR_open 5 +#define __NR_close 6 +#define __NR_waitpid 7 +#define __NR_creat 8 +#define __NR_link 9 +#define __NR_unlink 10 +#define __NR_execve 11 +#define __NR_chdir 12 +#define __NR_time 13 +#define __NR_mknod 14 +#define __NR_chmod 15 +#define __NR_lchown 16 +#define __NR_break 17 +#define __NR_oldstat 18 +#define __NR_lseek 19 +#define __NR_getpid 20 +#define __NR_mount 21 +#define __NR_umount 22 +#define __NR_setuid 23 +#define __NR_getuid 24 +#define __NR_stime 25 +#define __NR_ptrace 26 +#define __NR_alarm 27 +#define __NR_oldfstat 28 +#define __NR_pause 29 +#define __NR_utime 30 +#define __NR_stty 31 +#define __NR_gtty 32 +#define __NR_access 33 +#define __NR_nice 34 +#define __NR_ftime 35 +#define __NR_sync 36 +#define __NR_kill 37 +#define __NR_rename 38 +#define __NR_mkdir 39 +#define __NR_rmdir 40 +#define __NR_dup 41 +#define __NR_pipe 42 +#define __NR_times 43 +#define __NR_prof 44 +#define __NR_brk 45 +#define __NR_setgid 46 +#define __NR_getgid 47 +#define __NR_signal 48 +#define __NR_geteuid 49 +#define __NR_getegid 50 +#define __NR_acct 51 +#define __NR_umount2 52 +#define __NR_lock 53 +#define __NR_ioctl 54 +#define __NR_fcntl 55 +#define __NR_mpx 56 +#define __NR_setpgid 57 +#define __NR_ulimit 58 +#define __NR_oldolduname 59 +#define __NR_umask 60 +#define __NR_chroot 61 +#define __NR_ustat 62 +#define __NR_dup2 63 +#define __NR_getppid 64 +#define __NR_getpgrp 65 +#define __NR_setsid 66 +#define __NR_sigaction 67 +#define __NR_sgetmask 68 +#define __NR_ssetmask 69 +#define __NR_setreuid 70 +#define __NR_setregid 71 +#define __NR_sigsuspend 72 +#define __NR_sigpending 73 +#define __NR_sethostname 74 +#define __NR_setrlimit 75 +#define __NR_getrlimit 76 /* Back compatible 2Gig limited rlimit */ +#define __NR_getrusage 77 +#define __NR_gettimeofday 78 +#define __NR_settimeofday 79 +#define __NR_getgroups 80 +#define __NR_setgroups 81 +#define __NR_select 82 +#define __NR_symlink 83 +#define __NR_oldlstat 84 +#define __NR_readlink 85 +#define __NR_uselib 86 +#define __NR_swapon 87 +#define __NR_reboot 88 +#define __NR_readdir 89 +#define __NR_mmap 90 +#define __NR_munmap 91 +#define __NR_truncate 92 +#define __NR_ftruncate 93 +#define __NR_fchmod 94 +#define __NR_fchown 95 +#define __NR_getpriority 96 +#define __NR_setpriority 97 +#define __NR_profil 98 +#define __NR_statfs 99 +#define __NR_fstatfs 100 +#define __NR_ioperm 101 +#define __NR_socketcall 102 +#define __NR_syslog 103 +#define __NR_setitimer 104 +#define __NR_getitimer 105 +#define __NR_stat 106 +#define __NR_lstat 107 +#define __NR_fstat 108 +#define __NR_olduname 109 +#define __NR_iopl 110 +#define __NR_vhangup 111 +#define __NR_idle 112 +#define __NR_vm86old 113 +#define __NR_wait4 114 +#define __NR_swapoff 115 +#define __NR_sysinfo 116 +#define __NR_ipc 117 +#define __NR_fsync 118 +#define __NR_sigreturn 119 +#define __NR_clone 120 +#define __NR_setdomainname 121 +#define __NR_uname 122 +#define __NR_modify_ldt 123 +#define __NR_adjtimex 124 +#define __NR_mprotect 125 +#define __NR_sigprocmask 126 +#define __NR_create_module 127 +#define __NR_init_module 128 +#define __NR_delete_module 129 +#define __NR_get_kernel_syms 130 +#define __NR_quotactl 131 +#define __NR_getpgid 132 +#define __NR_fchdir 133 +#define __NR_bdflush 134 +#define __NR_sysfs 135 +#define __NR_personality 136 +#define __NR_afs_syscall 137 /* Syscall for Andrew File System */ +#define __NR_setfsuid 138 +#define __NR_setfsgid 139 +#define __NR__llseek 140 +#define __NR_getdents 141 +#define __NR__newselect 142 +#define __NR_flock 143 +#define __NR_msync 144 +#define __NR_readv 145 +#define __NR_writev 146 +#define __NR_getsid 147 +#define __NR_fdatasync 148 +#define __NR__sysctl 149 +#define __NR_mlock 150 +#define __NR_munlock 151 +#define __NR_mlockall 152 +#define __NR_munlockall 153 +#define __NR_sched_setparam 154 +#define __NR_sched_getparam 155 +#define __NR_sched_setscheduler 156 +#define __NR_sched_getscheduler 157 +#define __NR_sched_yield 158 +#define __NR_sched_get_priority_max 159 +#define __NR_sched_get_priority_min 160 +#define __NR_sched_rr_get_interval 161 +#define __NR_nanosleep 162 +#define __NR_mremap 163 +#define __NR_setresuid 164 +#define __NR_getresuid 165 +#define __NR_vm86 166 +#define __NR_query_module 167 +#define __NR_poll 168 +#define __NR_nfsservctl 169 +#define __NR_setresgid 170 +#define __NR_getresgid 171 +#define __NR_prctl 172 +#define __NR_rt_sigreturn 173 +#define __NR_rt_sigaction 174 +#define __NR_rt_sigprocmask 175 +#define __NR_rt_sigpending 176 +#define __NR_rt_sigtimedwait 177 +#define __NR_rt_sigqueueinfo 178 +#define __NR_rt_sigsuspend 179 +#define __NR_pread 180 +#define __NR_pwrite 181 +#define __NR_chown 182 +#define __NR_getcwd 183 +#define __NR_capget 184 +#define __NR_capset 185 +#define __NR_sigaltstack 186 +#define __NR_sendfile 187 +#define __NR_getpmsg 188 /* some people actually want streams */ +#define __NR_putpmsg 189 /* some people actually want streams */ +#define __NR_vfork 190 +#define __NR_ugetrlimit 191 /* SuS compliant getrlimit */ +#define __NR_mmap2 192 +#define __NR_truncate64 193 +#define __NR_ftruncate64 194 +#define __NR_stat64 195 +#define __NR_lstat64 196 +#define __NR_fstat64 197 +#define __NR_lchown32 198 +#define __NR_getuid32 199 +#define __NR_getgid32 200 +#define __NR_geteuid32 201 +#define __NR_getegid32 202 +#define __NR_setreuid32 203 +#define __NR_setregid32 204 +#define __NR_getgroups32 205 +#define __NR_setgroups32 206 +#define __NR_fchown32 207 +#define __NR_setresuid32 208 +#define __NR_getresuid32 209 +#define __NR_setresgid32 210 +#define __NR_getresgid32 211 +#define __NR_chown32 212 +#define __NR_setuid32 213 +#define __NR_setgid32 214 +#define __NR_setfsuid32 215 +#define __NR_setfsgid32 216 +#define __NR_pivot_root 217 +#define __NR_mincore 218 +#define __NR_madvise 219 +#define __NR_madvise1 219 /* delete when C lib stub is removed */ +#define __NR_getdents64 220 +#define __NR_fcntl64 221 +#define __NR_security 223 /* syscall for security modules */ +#define __NR_gettid 224 +#define __NR_readahead 225 + +/* user-visible error numbers are in the range -1 - -124: see */ + +#define __syscall_return(type, res) \ +do { \ + if ((unsigned long)(res) >= (unsigned long)(-125)) { \ + errno = -(res); \ + res = -1; \ + } \ + return (type) (res); \ +} while (0) + +/* XXX - _foo needs to be __foo, while __NR_bar could be _NR_bar. */ +#define _syscall0(type,name) \ +type name(void) \ +{ \ +long __res; \ +__asm__ volatile ("int $0x80" \ + : "=a" (__res) \ + : "0" (__NR_##name)); \ +__syscall_return(type,__res); \ +} + +#define _syscall1(type,name,type1,arg1) \ +type name(type1 arg1) \ +{ \ +long __res; \ +__asm__ volatile ("int $0x80" \ + : "=a" (__res) \ + : "0" (__NR_##name),"b" ((long)(arg1))); \ +__syscall_return(type,__res); \ +} + +#define _syscall2(type,name,type1,arg1,type2,arg2) \ +type name(type1 arg1,type2 arg2) \ +{ \ +long __res; \ +__asm__ volatile ("int $0x80" \ + : "=a" (__res) \ + : "0" (__NR_##name),"b" ((long)(arg1)),"c" ((long)(arg2))); \ +__syscall_return(type,__res); \ +} + +#define _syscall3(type,name,type1,arg1,type2,arg2,type3,arg3) \ +type name(type1 arg1,type2 arg2,type3 arg3) \ +{ \ +long __res; \ +__asm__ volatile ("int $0x80" \ + : "=a" (__res) \ + : "0" (__NR_##name),"b" ((long)(arg1)),"c" ((long)(arg2)), \ + "d" ((long)(arg3))); \ +__syscall_return(type,__res); \ +} + +#define _syscall4(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4) \ +type name (type1 arg1, type2 arg2, type3 arg3, type4 arg4) \ +{ \ +long __res; \ +__asm__ volatile ("int $0x80" \ + : "=a" (__res) \ + : "0" (__NR_##name),"b" ((long)(arg1)),"c" ((long)(arg2)), \ + "d" ((long)(arg3)),"S" ((long)(arg4))); \ +__syscall_return(type,__res); \ +} + +#define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \ + type5,arg5) \ +type name (type1 arg1,type2 arg2,type3 arg3,type4 arg4,type5 arg5) \ +{ \ +long __res; \ +__asm__ volatile ("int $0x80" \ + : "=a" (__res) \ + : "0" (__NR_##name),"b" ((long)(arg1)),"c" ((long)(arg2)), \ + "d" ((long)(arg3)),"S" ((long)(arg4)),"D" ((long)(arg5))); \ +__syscall_return(type,__res); \ +} + +#define _syscall6(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \ + type5,arg5,type6,arg6) \ +type name (type1 arg1,type2 arg2,type3 arg3,type4 arg4,type5 arg5,type6 arg6) \ +{ \ +long __res; \ +__asm__ volatile ("push %%ebp ; movl %%eax,%%ebp ; movl %1,%%eax ; int $0x80 ; pop %%ebp" \ + : "=a" (__res) \ + : "i" (__NR_##name),"b" ((long)(arg1)),"c" ((long)(arg2)), \ + "d" ((long)(arg3)),"S" ((long)(arg4)),"D" ((long)(arg5)), \ + "0" ((long)(arg6))); \ +__syscall_return(type,__res); \ +} + +#ifdef __KERNEL_SYSCALLS__ + +/* + * we need this inline - forking from kernel space will result + * in NO COPY ON WRITE (!!!), until an execve is executed. This + * is no problem, but for the stack. This is handled by not letting + * main() use the stack at all after fork(). Thus, no function + * calls - which means inline code for fork too, as otherwise we + * would use the stack upon exit from 'fork()'. + * + * Actually only pause and fork are needed inline, so that there + * won't be any messing with the stack from main(), but we define + * some others too. + */ +#define __NR__exit __NR_exit +static inline _syscall0(int,pause) +static inline _syscall0(int,sync) +static inline _syscall0(pid_t,setsid) +static inline _syscall3(int,write,int,fd,const char *,buf,off_t,count) +static inline _syscall3(int,read,int,fd,char *,buf,off_t,count) +static inline _syscall3(off_t,lseek,int,fd,off_t,offset,int,count) +static inline _syscall1(int,dup,int,fd) +static inline _syscall3(int,execve,const char *,file,char **,argv,char **,envp) +static inline _syscall3(int,open,const char *,file,int,flag,int,mode) +static inline _syscall1(int,close,int,fd) +static inline _syscall1(int,_exit,int,exitcode) +static inline _syscall3(pid_t,waitpid,pid_t,pid,int *,wait_stat,int,options) +static inline _syscall1(int,delete_module,const char *,name) + +static inline pid_t wait(int * wait_stat) +{ + return waitpid(-1,wait_stat,0); +} + +#endif + +#endif /* _ASM_I386_UNISTD_H_ */ diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/user.h b/xenolinux-2.4.16-sparse/include/asm-xeno/user.h new file mode 100644 index 0000000000..ddc06ea014 --- /dev/null +++ b/xenolinux-2.4.16-sparse/include/asm-xeno/user.h @@ -0,0 +1,122 @@ +#ifndef _I386_USER_H +#define _I386_USER_H + +#include +#include +/* Core file format: The core file is written in such a way that gdb + can understand it and provide useful information to the user (under + linux we use the 'trad-core' bfd). There are quite a number of + obstacles to being able to view the contents of the floating point + registers, and until these are solved you will not be able to view the + contents of them. Actually, you can read in the core file and look at + the contents of the user struct to find out what the floating point + registers contain. + The actual file contents are as follows: + UPAGE: 1 page consisting of a user struct that tells gdb what is present + in the file. Directly after this is a copy of the task_struct, which + is currently not used by gdb, but it may come in useful at some point. + All of the registers are stored as part of the upage. The upage should + always be only one page. + DATA: The data area is stored. We use current->end_text to + current->brk to pick up all of the user variables, plus any memory + that may have been malloced. No attempt is made to determine if a page + is demand-zero or if a page is totally unused, we just cover the entire + range. All of the addresses are rounded in such a way that an integral + number of pages is written. + STACK: We need the stack information in order to get a meaningful + backtrace. We need to write the data from (esp) to + current->start_stack, so we round each of these off in order to be able + to write an integer number of pages. + The minimum core file size is 3 pages, or 12288 bytes. +*/ + +/* + * Pentium III FXSR, SSE support + * Gareth Hughes , May 2000 + * + * Provide support for the GDB 5.0+ PTRACE_{GET|SET}FPXREGS requests for + * interacting with the FXSR-format floating point environment. Floating + * point data can be accessed in the regular format in the usual manner, + * and both the standard and SIMD floating point data can be accessed via + * the new ptrace requests. In either case, changes to the FPU environment + * will be reflected in the task's state as expected. + */ + +struct user_i387_struct { + long cwd; + long swd; + long twd; + long fip; + long fcs; + long foo; + long fos; + long st_space[20]; /* 8*10 bytes for each FP-reg = 80 bytes */ +}; + +struct user_fxsr_struct { + unsigned short cwd; + unsigned short swd; + unsigned short twd; + unsigned short fop; + long fip; + long fcs; + long foo; + long fos; + long mxcsr; + long reserved; + long st_space[32]; /* 8*16 bytes for each FP-reg = 128 bytes */ + long xmm_space[32]; /* 8*16 bytes for each XMM-reg = 128 bytes */ + long padding[56]; +}; + +/* + * This is the old layout of "struct pt_regs", and + * is still the layout used by user mode (the new + * pt_regs doesn't have all registers as the kernel + * doesn't use the extra segment registers) + */ +struct user_regs_struct { + long ebx, ecx, edx, esi, edi, ebp, eax; + unsigned short ds, __ds, es, __es; + unsigned short fs, __fs, gs, __gs; + long orig_eax, eip; + unsigned short cs, __cs; + long eflags, esp; + unsigned short ss, __ss; +}; + +/* When the kernel dumps core, it starts by dumping the user struct - + this will be used by gdb to figure out where the data and stack segments + are within the file, and what virtual addresses to use. */ +struct user{ +/* We start with the registers, to mimic the way that "memory" is returned + from the ptrace(3,...) function. */ + struct user_regs_struct regs; /* Where the registers are actually stored */ +/* ptrace does not yet supply these. Someday.... */ + int u_fpvalid; /* True if math co-processor being used. */ + /* for this mess. Not yet used. */ + struct user_i387_struct i387; /* Math Co-processor registers. */ +/* The rest of this junk is to help gdb figure out what goes where */ + unsigned long int u_tsize; /* Text segment size (pages). */ + unsigned long int u_dsize; /* Data segment size (pages). */ + unsigned long int u_ssize; /* Stack segment size (pages). */ + unsigned long start_code; /* Starting virtual address of text. */ + unsigned long start_stack; /* Starting virtual address of stack area. + This is actually the bottom of the stack, + the top of the stack is always found in the + esp register. */ + long int signal; /* Signal that caused the core dump. */ + int reserved; /* No longer used */ + struct user_pt_regs * u_ar0; /* Used by gdb to help find the values for */ + /* the registers. */ + struct user_i387_struct* u_fpstate; /* Math Co-processor pointer. */ + unsigned long magic; /* To uniquely identify a core file */ + char u_comm[32]; /* User command that was responsible */ + int u_debugreg[8]; +}; +#define NBPG PAGE_SIZE +#define UPAGES 1 +#define HOST_TEXT_START_ADDR (u.start_code) +#define HOST_STACK_END_ADDR (u.start_stack + u.u_ssize * NBPG) + +#endif /* _I386_USER_H */ diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/vga.h b/xenolinux-2.4.16-sparse/include/asm-xeno/vga.h new file mode 100644 index 0000000000..ef0c0e50cc --- /dev/null +++ b/xenolinux-2.4.16-sparse/include/asm-xeno/vga.h @@ -0,0 +1,20 @@ +/* + * Access to VGA videoram + * + * (c) 1998 Martin Mares + */ + +#ifndef _LINUX_ASM_VGA_H_ +#define _LINUX_ASM_VGA_H_ + +/* + * On the PC, we can just recalculate addresses and then + * access the videoram directly without any black magic. + */ + +#define VGA_MAP_MEM(x) (unsigned long)phys_to_virt(x) + +#define vga_readb(x) (*(x)) +#define vga_writeb(x,y) (*(y) = (x)) + +#endif diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/xor.h b/xenolinux-2.4.16-sparse/include/asm-xeno/xor.h new file mode 100644 index 0000000000..5a99f54553 --- /dev/null +++ b/xenolinux-2.4.16-sparse/include/asm-xeno/xor.h @@ -0,0 +1,859 @@ +/* + * include/asm-i386/xor.h + * + * Optimized RAID-5 checksumming functions for MMX and SSE. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * You should have received a copy of the GNU General Public License + * (for example /usr/src/linux/COPYING); if not, write to the Free + * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +/* + * High-speed RAID5 checksumming functions utilizing MMX instructions. + * Copyright (C) 1998 Ingo Molnar. + */ + +#define FPU_SAVE \ + do { \ + if (!(current->flags & PF_USEDFPU)) \ + __asm__ __volatile__ (" clts;\n"); \ + __asm__ __volatile__ ("fsave %0; fwait": "=m"(fpu_save[0])); \ + } while (0) + +#define FPU_RESTORE \ + do { \ + __asm__ __volatile__ ("frstor %0": : "m"(fpu_save[0])); \ + if (!(current->flags & PF_USEDFPU)) \ + stts(); \ + } while (0) + +#define LD(x,y) " movq 8*("#x")(%1), %%mm"#y" ;\n" +#define ST(x,y) " movq %%mm"#y", 8*("#x")(%1) ;\n" +#define XO1(x,y) " pxor 8*("#x")(%2), %%mm"#y" ;\n" +#define XO2(x,y) " pxor 8*("#x")(%3), %%mm"#y" ;\n" +#define XO3(x,y) " pxor 8*("#x")(%4), %%mm"#y" ;\n" +#define XO4(x,y) " pxor 8*("#x")(%5), %%mm"#y" ;\n" + + +static void +xor_pII_mmx_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) +{ + unsigned long lines = bytes >> 7; + char fpu_save[108]; + + FPU_SAVE; + + __asm__ __volatile__ ( +#undef BLOCK +#define BLOCK(i) \ + LD(i,0) \ + LD(i+1,1) \ + LD(i+2,2) \ + LD(i+3,3) \ + XO1(i,0) \ + ST(i,0) \ + XO1(i+1,1) \ + ST(i+1,1) \ + XO1(i+2,2) \ + ST(i+2,2) \ + XO1(i+3,3) \ + ST(i+3,3) + + " .align 32 ;\n" + " 1: ;\n" + + BLOCK(0) + BLOCK(4) + BLOCK(8) + BLOCK(12) + + " addl $128, %1 ;\n" + " addl $128, %2 ;\n" + " decl %0 ;\n" + " jnz 1b ;\n" + : + : "r" (lines), + "r" (p1), "r" (p2) + : "memory"); + + FPU_RESTORE; +} + +static void +xor_pII_mmx_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, + unsigned long *p3) +{ + unsigned long lines = bytes >> 7; + char fpu_save[108]; + + FPU_SAVE; + + __asm__ __volatile__ ( +#undef BLOCK +#define BLOCK(i) \ + LD(i,0) \ + LD(i+1,1) \ + LD(i+2,2) \ + LD(i+3,3) \ + XO1(i,0) \ + XO1(i+1,1) \ + XO1(i+2,2) \ + XO1(i+3,3) \ + XO2(i,0) \ + ST(i,0) \ + XO2(i+1,1) \ + ST(i+1,1) \ + XO2(i+2,2) \ + ST(i+2,2) \ + XO2(i+3,3) \ + ST(i+3,3) + + " .align 32 ;\n" + " 1: ;\n" + + BLOCK(0) + BLOCK(4) + BLOCK(8) + BLOCK(12) + + " addl $128, %1 ;\n" + " addl $128, %2 ;\n" + " addl $128, %3 ;\n" + " decl %0 ;\n" + " jnz 1b ;\n" + : + : "r" (lines), + "r" (p1), "r" (p2), "r" (p3) + : "memory"); + + FPU_RESTORE; +} + +static void +xor_pII_mmx_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, + unsigned long *p3, unsigned long *p4) +{ + unsigned long lines = bytes >> 7; + char fpu_save[108]; + + FPU_SAVE; + + __asm__ __volatile__ ( +#undef BLOCK +#define BLOCK(i) \ + LD(i,0) \ + LD(i+1,1) \ + LD(i+2,2) \ + LD(i+3,3) \ + XO1(i,0) \ + XO1(i+1,1) \ + XO1(i+2,2) \ + XO1(i+3,3) \ + XO2(i,0) \ + XO2(i+1,1) \ + XO2(i+2,2) \ + XO2(i+3,3) \ + XO3(i,0) \ + ST(i,0) \ + XO3(i+1,1) \ + ST(i+1,1) \ + XO3(i+2,2) \ + ST(i+2,2) \ + XO3(i+3,3) \ + ST(i+3,3) + + " .align 32 ;\n" + " 1: ;\n" + + BLOCK(0) + BLOCK(4) + BLOCK(8) + BLOCK(12) + + " addl $128, %1 ;\n" + " addl $128, %2 ;\n" + " addl $128, %3 ;\n" + " addl $128, %4 ;\n" + " decl %0 ;\n" + " jnz 1b ;\n" + : + : "r" (lines), + "r" (p1), "r" (p2), "r" (p3), "r" (p4) + : "memory"); + + FPU_RESTORE; +} + +static void +xor_pII_mmx_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, + unsigned long *p3, unsigned long *p4, unsigned long *p5) +{ + unsigned long lines = bytes >> 7; + char fpu_save[108]; + + FPU_SAVE; + + __asm__ __volatile__ ( +#undef BLOCK +#define BLOCK(i) \ + LD(i,0) \ + LD(i+1,1) \ + LD(i+2,2) \ + LD(i+3,3) \ + XO1(i,0) \ + XO1(i+1,1) \ + XO1(i+2,2) \ + XO1(i+3,3) \ + XO2(i,0) \ + XO2(i+1,1) \ + XO2(i+2,2) \ + XO2(i+3,3) \ + XO3(i,0) \ + XO3(i+1,1) \ + XO3(i+2,2) \ + XO3(i+3,3) \ + XO4(i,0) \ + ST(i,0) \ + XO4(i+1,1) \ + ST(i+1,1) \ + XO4(i+2,2) \ + ST(i+2,2) \ + XO4(i+3,3) \ + ST(i+3,3) + + " .align 32 ;\n" + " 1: ;\n" + + BLOCK(0) + BLOCK(4) + BLOCK(8) + BLOCK(12) + + " addl $128, %1 ;\n" + " addl $128, %2 ;\n" + " addl $128, %3 ;\n" + " addl $128, %4 ;\n" + " addl $128, %5 ;\n" + " decl %0 ;\n" + " jnz 1b ;\n" + : + : "g" (lines), + "r" (p1), "r" (p2), "r" (p3), "r" (p4), "r" (p5) + : "memory"); + + FPU_RESTORE; +} + +#undef LD +#undef XO1 +#undef XO2 +#undef XO3 +#undef XO4 +#undef ST +#undef BLOCK + +static void +xor_p5_mmx_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) +{ + unsigned long lines = bytes >> 6; + char fpu_save[108]; + + FPU_SAVE; + + __asm__ __volatile__ ( + " .align 32 ;\n" + " 1: ;\n" + " movq (%1), %%mm0 ;\n" + " movq 8(%1), %%mm1 ;\n" + " pxor (%2), %%mm0 ;\n" + " movq 16(%1), %%mm2 ;\n" + " movq %%mm0, (%1) ;\n" + " pxor 8(%2), %%mm1 ;\n" + " movq 24(%1), %%mm3 ;\n" + " movq %%mm1, 8(%1) ;\n" + " pxor 16(%2), %%mm2 ;\n" + " movq 32(%1), %%mm4 ;\n" + " movq %%mm2, 16(%1) ;\n" + " pxor 24(%2), %%mm3 ;\n" + " movq 40(%1), %%mm5 ;\n" + " movq %%mm3, 24(%1) ;\n" + " pxor 32(%2), %%mm4 ;\n" + " movq 48(%1), %%mm6 ;\n" + " movq %%mm4, 32(%1) ;\n" + " pxor 40(%2), %%mm5 ;\n" + " movq 56(%1), %%mm7 ;\n" + " movq %%mm5, 40(%1) ;\n" + " pxor 48(%2), %%mm6 ;\n" + " pxor 56(%2), %%mm7 ;\n" + " movq %%mm6, 48(%1) ;\n" + " movq %%mm7, 56(%1) ;\n" + + " addl $64, %1 ;\n" + " addl $64, %2 ;\n" + " decl %0 ;\n" + " jnz 1b ;\n" + : + : "r" (lines), + "r" (p1), "r" (p2) + : "memory"); + + FPU_RESTORE; +} + +static void +xor_p5_mmx_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, + unsigned long *p3) +{ + unsigned long lines = bytes >> 6; + char fpu_save[108]; + + FPU_SAVE; + + __asm__ __volatile__ ( + " .align 32,0x90 ;\n" + " 1: ;\n" + " movq (%1), %%mm0 ;\n" + " movq 8(%1), %%mm1 ;\n" + " pxor (%2), %%mm0 ;\n" + " movq 16(%1), %%mm2 ;\n" + " pxor 8(%2), %%mm1 ;\n" + " pxor (%3), %%mm0 ;\n" + " pxor 16(%2), %%mm2 ;\n" + " movq %%mm0, (%1) ;\n" + " pxor 8(%3), %%mm1 ;\n" + " pxor 16(%3), %%mm2 ;\n" + " movq 24(%1), %%mm3 ;\n" + " movq %%mm1, 8(%1) ;\n" + " movq 32(%1), %%mm4 ;\n" + " movq 40(%1), %%mm5 ;\n" + " pxor 24(%2), %%mm3 ;\n" + " movq %%mm2, 16(%1) ;\n" + " pxor 32(%2), %%mm4 ;\n" + " pxor 24(%3), %%mm3 ;\n" + " pxor 40(%2), %%mm5 ;\n" + " movq %%mm3, 24(%1) ;\n" + " pxor 32(%3), %%mm4 ;\n" + " pxor 40(%3), %%mm5 ;\n" + " movq 48(%1), %%mm6 ;\n" + " movq %%mm4, 32(%1) ;\n" + " movq 56(%1), %%mm7 ;\n" + " pxor 48(%2), %%mm6 ;\n" + " movq %%mm5, 40(%1) ;\n" + " pxor 56(%2), %%mm7 ;\n" + " pxor 48(%3), %%mm6 ;\n" + " pxor 56(%3), %%mm7 ;\n" + " movq %%mm6, 48(%1) ;\n" + " movq %%mm7, 56(%1) ;\n" + + " addl $64, %1 ;\n" + " addl $64, %2 ;\n" + " addl $64, %3 ;\n" + " decl %0 ;\n" + " jnz 1b ;\n" + : + : "r" (lines), + "r" (p1), "r" (p2), "r" (p3) + : "memory" ); + + FPU_RESTORE; +} + +static void +xor_p5_mmx_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, + unsigned long *p3, unsigned long *p4) +{ + unsigned long lines = bytes >> 6; + char fpu_save[108]; + + FPU_SAVE; + + __asm__ __volatile__ ( + " .align 32,0x90 ;\n" + " 1: ;\n" + " movq (%1), %%mm0 ;\n" + " movq 8(%1), %%mm1 ;\n" + " pxor (%2), %%mm0 ;\n" + " movq 16(%1), %%mm2 ;\n" + " pxor 8(%2), %%mm1 ;\n" + " pxor (%3), %%mm0 ;\n" + " pxor 16(%2), %%mm2 ;\n" + " pxor 8(%3), %%mm1 ;\n" + " pxor (%4), %%mm0 ;\n" + " movq 24(%1), %%mm3 ;\n" + " pxor 16(%3), %%mm2 ;\n" + " pxor 8(%4), %%mm1 ;\n" + " movq %%mm0, (%1) ;\n" + " movq 32(%1), %%mm4 ;\n" + " pxor 24(%2), %%mm3 ;\n" + " pxor 16(%4), %%mm2 ;\n" + " movq %%mm1, 8(%1) ;\n" + " movq 40(%1), %%mm5 ;\n" + " pxor 32(%2), %%mm4 ;\n" + " pxor 24(%3), %%mm3 ;\n" + " movq %%mm2, 16(%1) ;\n" + " pxor 40(%2), %%mm5 ;\n" + " pxor 32(%3), %%mm4 ;\n" + " pxor 24(%4), %%mm3 ;\n" + " movq %%mm3, 24(%1) ;\n" + " movq 56(%1), %%mm7 ;\n" + " movq 48(%1), %%mm6 ;\n" + " pxor 40(%3), %%mm5 ;\n" + " pxor 32(%4), %%mm4 ;\n" + " pxor 48(%2), %%mm6 ;\n" + " movq %%mm4, 32(%1) ;\n" + " pxor 56(%2), %%mm7 ;\n" + " pxor 40(%4), %%mm5 ;\n" + " pxor 48(%3), %%mm6 ;\n" + " pxor 56(%3), %%mm7 ;\n" + " movq %%mm5, 40(%1) ;\n" + " pxor 48(%4), %%mm6 ;\n" + " pxor 56(%4), %%mm7 ;\n" + " movq %%mm6, 48(%1) ;\n" + " movq %%mm7, 56(%1) ;\n" + + " addl $64, %1 ;\n" + " addl $64, %2 ;\n" + " addl $64, %3 ;\n" + " addl $64, %4 ;\n" + " decl %0 ;\n" + " jnz 1b ;\n" + : + : "r" (lines), + "r" (p1), "r" (p2), "r" (p3), "r" (p4) + : "memory"); + + FPU_RESTORE; +} + +static void +xor_p5_mmx_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, + unsigned long *p3, unsigned long *p4, unsigned long *p5) +{ + unsigned long lines = bytes >> 6; + char fpu_save[108]; + + FPU_SAVE; + + __asm__ __volatile__ ( + " .align 32,0x90 ;\n" + " 1: ;\n" + " movq (%1), %%mm0 ;\n" + " movq 8(%1), %%mm1 ;\n" + " pxor (%2), %%mm0 ;\n" + " pxor 8(%2), %%mm1 ;\n" + " movq 16(%1), %%mm2 ;\n" + " pxor (%3), %%mm0 ;\n" + " pxor 8(%3), %%mm1 ;\n" + " pxor 16(%2), %%mm2 ;\n" + " pxor (%4), %%mm0 ;\n" + " pxor 8(%4), %%mm1 ;\n" + " pxor 16(%3), %%mm2 ;\n" + " movq 24(%1), %%mm3 ;\n" + " pxor (%5), %%mm0 ;\n" + " pxor 8(%5), %%mm1 ;\n" + " movq %%mm0, (%1) ;\n" + " pxor 16(%4), %%mm2 ;\n" + " pxor 24(%2), %%mm3 ;\n" + " movq %%mm1, 8(%1) ;\n" + " pxor 16(%5), %%mm2 ;\n" + " pxor 24(%3), %%mm3 ;\n" + " movq 32(%1), %%mm4 ;\n" + " movq %%mm2, 16(%1) ;\n" + " pxor 24(%4), %%mm3 ;\n" + " pxor 32(%2), %%mm4 ;\n" + " movq 40(%1), %%mm5 ;\n" + " pxor 24(%5), %%mm3 ;\n" + " pxor 32(%3), %%mm4 ;\n" + " pxor 40(%2), %%mm5 ;\n" + " movq %%mm3, 24(%1) ;\n" + " pxor 32(%4), %%mm4 ;\n" + " pxor 40(%3), %%mm5 ;\n" + " movq 48(%1), %%mm6 ;\n" + " movq 56(%1), %%mm7 ;\n" + " pxor 32(%5), %%mm4 ;\n" + " pxor 40(%4), %%mm5 ;\n" + " pxor 48(%2), %%mm6 ;\n" + " pxor 56(%2), %%mm7 ;\n" + " movq %%mm4, 32(%1) ;\n" + " pxor 48(%3), %%mm6 ;\n" + " pxor 56(%3), %%mm7 ;\n" + " pxor 40(%5), %%mm5 ;\n" + " pxor 48(%4), %%mm6 ;\n" + " pxor 56(%4), %%mm7 ;\n" + " movq %%mm5, 40(%1) ;\n" + " pxor 48(%5), %%mm6 ;\n" + " pxor 56(%5), %%mm7 ;\n" + " movq %%mm6, 48(%1) ;\n" + " movq %%mm7, 56(%1) ;\n" + + " addl $64, %1 ;\n" + " addl $64, %2 ;\n" + " addl $64, %3 ;\n" + " addl $64, %4 ;\n" + " addl $64, %5 ;\n" + " decl %0 ;\n" + " jnz 1b ;\n" + : + : "g" (lines), + "r" (p1), "r" (p2), "r" (p3), "r" (p4), "r" (p5) + : "memory"); + + FPU_RESTORE; +} + +static struct xor_block_template xor_block_pII_mmx = { + name: "pII_mmx", + do_2: xor_pII_mmx_2, + do_3: xor_pII_mmx_3, + do_4: xor_pII_mmx_4, + do_5: xor_pII_mmx_5, +}; + +static struct xor_block_template xor_block_p5_mmx = { + name: "p5_mmx", + do_2: xor_p5_mmx_2, + do_3: xor_p5_mmx_3, + do_4: xor_p5_mmx_4, + do_5: xor_p5_mmx_5, +}; + +#undef FPU_SAVE +#undef FPU_RESTORE + +/* + * Cache avoiding checksumming functions utilizing KNI instructions + * Copyright (C) 1999 Zach Brown (with obvious credit due Ingo) + */ + +#define XMMS_SAVE \ + __asm__ __volatile__ ( \ + "movl %%cr0,%0 ;\n\t" \ + "clts ;\n\t" \ + "movups %%xmm0,(%1) ;\n\t" \ + "movups %%xmm1,0x10(%1) ;\n\t" \ + "movups %%xmm2,0x20(%1) ;\n\t" \ + "movups %%xmm3,0x30(%1) ;\n\t" \ + : "=r" (cr0) \ + : "r" (xmm_save) \ + : "memory") + +#define XMMS_RESTORE \ + __asm__ __volatile__ ( \ + "sfence ;\n\t" \ + "movups (%1),%%xmm0 ;\n\t" \ + "movups 0x10(%1),%%xmm1 ;\n\t" \ + "movups 0x20(%1),%%xmm2 ;\n\t" \ + "movups 0x30(%1),%%xmm3 ;\n\t" \ + "movl %0,%%cr0 ;\n\t" \ + : \ + : "r" (cr0), "r" (xmm_save) \ + : "memory") + +#define OFFS(x) "16*("#x")" +#define PF_OFFS(x) "256+16*("#x")" +#define PF0(x) " prefetchnta "PF_OFFS(x)"(%1) ;\n" +#define LD(x,y) " movaps "OFFS(x)"(%1), %%xmm"#y" ;\n" +#define ST(x,y) " movaps %%xmm"#y", "OFFS(x)"(%1) ;\n" +#define PF1(x) " prefetchnta "PF_OFFS(x)"(%2) ;\n" +#define PF2(x) " prefetchnta "PF_OFFS(x)"(%3) ;\n" +#define PF3(x) " prefetchnta "PF_OFFS(x)"(%4) ;\n" +#define PF4(x) " prefetchnta "PF_OFFS(x)"(%5) ;\n" +#define PF5(x) " prefetchnta "PF_OFFS(x)"(%6) ;\n" +#define XO1(x,y) " xorps "OFFS(x)"(%2), %%xmm"#y" ;\n" +#define XO2(x,y) " xorps "OFFS(x)"(%3), %%xmm"#y" ;\n" +#define XO3(x,y) " xorps "OFFS(x)"(%4), %%xmm"#y" ;\n" +#define XO4(x,y) " xorps "OFFS(x)"(%5), %%xmm"#y" ;\n" +#define XO5(x,y) " xorps "OFFS(x)"(%6), %%xmm"#y" ;\n" + + +static void +xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) +{ + unsigned long lines = bytes >> 8; + char xmm_save[16*4]; + int cr0; + + XMMS_SAVE; + + __asm__ __volatile__ ( +#undef BLOCK +#define BLOCK(i) \ + LD(i,0) \ + LD(i+1,1) \ + PF1(i) \ + PF1(i+2) \ + LD(i+2,2) \ + LD(i+3,3) \ + PF0(i+4) \ + PF0(i+6) \ + XO1(i,0) \ + XO1(i+1,1) \ + XO1(i+2,2) \ + XO1(i+3,3) \ + ST(i,0) \ + ST(i+1,1) \ + ST(i+2,2) \ + ST(i+3,3) \ + + + PF0(0) + PF0(2) + + " .align 32 ;\n" + " 1: ;\n" + + BLOCK(0) + BLOCK(4) + BLOCK(8) + BLOCK(12) + + " addl $256, %1 ;\n" + " addl $256, %2 ;\n" + " decl %0 ;\n" + " jnz 1b ;\n" + : + : "r" (lines), + "r" (p1), "r" (p2) + : "memory"); + + XMMS_RESTORE; +} + +static void +xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, + unsigned long *p3) +{ + unsigned long lines = bytes >> 8; + char xmm_save[16*4]; + int cr0; + + XMMS_SAVE; + + __asm__ __volatile__ ( +#undef BLOCK +#define BLOCK(i) \ + PF1(i) \ + PF1(i+2) \ + LD(i,0) \ + LD(i+1,1) \ + LD(i+2,2) \ + LD(i+3,3) \ + PF2(i) \ + PF2(i+2) \ + PF0(i+4) \ + PF0(i+6) \ + XO1(i,0) \ + XO1(i+1,1) \ + XO1(i+2,2) \ + XO1(i+3,3) \ + XO2(i,0) \ + XO2(i+1,1) \ + XO2(i+2,2) \ + XO2(i+3,3) \ + ST(i,0) \ + ST(i+1,1) \ + ST(i+2,2) \ + ST(i+3,3) \ + + + PF0(0) + PF0(2) + + " .align 32 ;\n" + " 1: ;\n" + + BLOCK(0) + BLOCK(4) + BLOCK(8) + BLOCK(12) + + " addl $256, %1 ;\n" + " addl $256, %2 ;\n" + " addl $256, %3 ;\n" + " decl %0 ;\n" + " jnz 1b ;\n" + : + : "r" (lines), + "r" (p1), "r"(p2), "r"(p3) + : "memory" ); + + XMMS_RESTORE; +} + +static void +xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, + unsigned long *p3, unsigned long *p4) +{ + unsigned long lines = bytes >> 8; + char xmm_save[16*4]; + int cr0; + + XMMS_SAVE; + + __asm__ __volatile__ ( +#undef BLOCK +#define BLOCK(i) \ + PF1(i) \ + PF1(i+2) \ + LD(i,0) \ + LD(i+1,1) \ + LD(i+2,2) \ + LD(i+3,3) \ + PF2(i) \ + PF2(i+2) \ + XO1(i,0) \ + XO1(i+1,1) \ + XO1(i+2,2) \ + XO1(i+3,3) \ + PF3(i) \ + PF3(i+2) \ + PF0(i+4) \ + PF0(i+6) \ + XO2(i,0) \ + XO2(i+1,1) \ + XO2(i+2,2) \ + XO2(i+3,3) \ + XO3(i,0) \ + XO3(i+1,1) \ + XO3(i+2,2) \ + XO3(i+3,3) \ + ST(i,0) \ + ST(i+1,1) \ + ST(i+2,2) \ + ST(i+3,3) \ + + + PF0(0) + PF0(2) + + " .align 32 ;\n" + " 1: ;\n" + + BLOCK(0) + BLOCK(4) + BLOCK(8) + BLOCK(12) + + " addl $256, %1 ;\n" + " addl $256, %2 ;\n" + " addl $256, %3 ;\n" + " addl $256, %4 ;\n" + " decl %0 ;\n" + " jnz 1b ;\n" + : + : "r" (lines), + "r" (p1), "r" (p2), "r" (p3), "r" (p4) + : "memory" ); + + XMMS_RESTORE; +} + +static void +xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, + unsigned long *p3, unsigned long *p4, unsigned long *p5) +{ + unsigned long lines = bytes >> 8; + char xmm_save[16*4]; + int cr0; + + XMMS_SAVE; + + __asm__ __volatile__ ( +#undef BLOCK +#define BLOCK(i) \ + PF1(i) \ + PF1(i+2) \ + LD(i,0) \ + LD(i+1,1) \ + LD(i+2,2) \ + LD(i+3,3) \ + PF2(i) \ + PF2(i+2) \ + XO1(i,0) \ + XO1(i+1,1) \ + XO1(i+2,2) \ + XO1(i+3,3) \ + PF3(i) \ + PF3(i+2) \ + XO2(i,0) \ + XO2(i+1,1) \ + XO2(i+2,2) \ + XO2(i+3,3) \ + PF4(i) \ + PF4(i+2) \ + PF0(i+4) \ + PF0(i+6) \ + XO3(i,0) \ + XO3(i+1,1) \ + XO3(i+2,2) \ + XO3(i+3,3) \ + XO4(i,0) \ + XO4(i+1,1) \ + XO4(i+2,2) \ + XO4(i+3,3) \ + ST(i,0) \ + ST(i+1,1) \ + ST(i+2,2) \ + ST(i+3,3) \ + + + PF0(0) + PF0(2) + + " .align 32 ;\n" + " 1: ;\n" + + BLOCK(0) + BLOCK(4) + BLOCK(8) + BLOCK(12) + + " addl $256, %1 ;\n" + " addl $256, %2 ;\n" + " addl $256, %3 ;\n" + " addl $256, %4 ;\n" + " addl $256, %5 ;\n" + " decl %0 ;\n" + " jnz 1b ;\n" + : + : "r" (lines), + "r" (p1), "r" (p2), "r" (p3), "r" (p4), "r" (p5) + : "memory"); + + XMMS_RESTORE; +} + +static struct xor_block_template xor_block_pIII_sse = { + name: "pIII_sse", + do_2: xor_sse_2, + do_3: xor_sse_3, + do_4: xor_sse_4, + do_5: xor_sse_5, +}; + +/* Also try the generic routines. */ +#include + +#undef XOR_TRY_TEMPLATES +#define XOR_TRY_TEMPLATES \ + do { \ + xor_speed(&xor_block_8regs); \ + xor_speed(&xor_block_32regs); \ + if (cpu_has_xmm) \ + xor_speed(&xor_block_pIII_sse); \ + if (md_cpu_has_mmx()) { \ + xor_speed(&xor_block_pII_mmx); \ + xor_speed(&xor_block_p5_mmx); \ + } \ + } while (0) + +/* We force the use of the SSE xor block because it can write around L2. + We may also be able to load into the L1 only depending on how the cpu + deals with a load to a line that is being prefetched. */ +#define XOR_SELECT_TEMPLATE(FASTEST) \ + (cpu_has_xmm ? &xor_block_pIII_sse : FASTEST) diff --git a/xenolinux-2.4.16-sparse/include/linux/sunrpc/debug.h b/xenolinux-2.4.16-sparse/include/linux/sunrpc/debug.h new file mode 100644 index 0000000000..67dbfb887e --- /dev/null +++ b/xenolinux-2.4.16-sparse/include/linux/sunrpc/debug.h @@ -0,0 +1,95 @@ +/* + * linux/include/linux/sunrpc/debug.h + * + * Debugging support for sunrpc module + * + * Copyright (C) 1996, Olaf Kirch + */ + +#ifndef _LINUX_SUNRPC_DEBUG_H_ +#define _LINUX_SUNRPC_DEBUG_H_ + +#include + +#include +#include + +/* + * Enable RPC debugging/profiling. + */ +/*#ifdef CONFIG_SYSCTL*/ +/*#define RPC_DEBUG*/ +/*#endif*/ +/* #define RPC_PROFILE */ + +/* + * RPC debug facilities + */ +#define RPCDBG_XPRT 0x0001 +#define RPCDBG_CALL 0x0002 +#define RPCDBG_DEBUG 0x0004 +#define RPCDBG_NFS 0x0008 +#define RPCDBG_AUTH 0x0010 +#define RPCDBG_PMAP 0x0020 +#define RPCDBG_SCHED 0x0040 +#define RPCDBG_SVCSOCK 0x0100 +#define RPCDBG_SVCDSP 0x0200 +#define RPCDBG_MISC 0x0400 +#define RPCDBG_ALL 0x7fff + +#ifdef __KERNEL__ + +/* + * Debugging macros etc + */ +#ifdef RPC_DEBUG +extern unsigned int rpc_debug; +extern unsigned int nfs_debug; +extern unsigned int nfsd_debug; +extern unsigned int nlm_debug; +#endif + +#define dprintk(args...) dfprintk(FACILITY, ## args) + +#undef ifdebug +#ifdef RPC_DEBUG +# define ifdebug(fac) if (rpc_debug & RPCDBG_##fac) +# define dfprintk(fac, args...) do { ifdebug(fac) printk(args); } while(0) +# define RPC_IFDEBUG(x) x +#else +# define dfprintk(fac, args...) do ; while (0) +# define RPC_IFDEBUG(x) +#endif + +#ifdef RPC_PROFILE +# define pprintk(args...) printk(## args) +#else +# define pprintk(args...) do ; while (0) +#endif + +/* + * Sysctl interface for RPC debugging + */ +#ifdef RPC_DEBUG +void rpc_register_sysctl(void); +void rpc_unregister_sysctl(void); +#endif + +#endif /* __KERNEL__ */ + +/* + * Declarations for the sysctl debug interface, which allows to read or + * change the debug flags for rpc, nfs, nfsd, and lockd. Since the sunrpc + * module currently registers its sysctl table dynamically, the sysctl path + * for module FOO is . + */ +#define CTL_SUNRPC 7249 /* arbitrary and hopefully unused */ + +enum { + CTL_RPCDEBUG = 1, + CTL_NFSDEBUG, + CTL_NFSDDEBUG, + CTL_NLMDEBUG, +}; + +#endif /* _LINUX_SUNRPC_DEBUG_H_ */ diff --git a/xenolinux-2.4.16-sparse/kernel/panic.c b/xenolinux-2.4.16-sparse/kernel/panic.c new file mode 100644 index 0000000000..8a7237f979 --- /dev/null +++ b/xenolinux-2.4.16-sparse/kernel/panic.c @@ -0,0 +1,125 @@ +/* + * linux/kernel/panic.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + */ + +/* + * This function is used through-out the kernel (including mm and fs) + * to indicate a major problem. + */ +#include +#include +#include +#include +#include +#include +#include +#include + +asmlinkage void sys_sync(void); /* it's really int */ + +int panic_timeout; + +struct notifier_block *panic_notifier_list; + +static int __init panic_setup(char *str) +{ + panic_timeout = simple_strtoul(str, NULL, 0); + return 1; +} + +__setup("panic=", panic_setup); + +/** + * panic - halt the system + * @fmt: The text string to print + * + * Display a message, then perform cleanups. Functions in the panic + * notifier list are called after the filesystem cache is flushed (when possible). + * + * This function never returns. + */ + +NORET_TYPE void panic(const char * fmt, ...) +{ + static char buf[1024]; + va_list args; +#if defined(CONFIG_ARCH_S390) + unsigned long caller = (unsigned long) __builtin_return_address(0); +#endif + + bust_spinlocks(1); + va_start(args, fmt); + vsprintf(buf, fmt, args); + va_end(args); + printk(KERN_EMERG "Kernel panic: %s\n",buf); + if (in_interrupt()) + printk(KERN_EMERG "In interrupt handler - not syncing\n"); + else if (!current->pid) + printk(KERN_EMERG "In idle task - not syncing\n"); + else + sys_sync(); + bust_spinlocks(0); + +#ifdef CONFIG_SMP + smp_send_stop(); +#endif + + notifier_call_chain(&panic_notifier_list, 0, NULL); + + if (panic_timeout > 0) + { + /* + * Delay timeout seconds before rebooting the machine. + * We can't use the "normal" timers since we just panicked.. + */ + printk(KERN_EMERG "Rebooting in %d seconds..",panic_timeout); + mdelay(panic_timeout*1000); + /* + * Should we run the reboot notifier. For the moment Im + * choosing not too. It might crash, be corrupt or do + * more harm than good for other reasons. + */ + machine_restart(NULL); + } +#ifdef __sparc__ + { + extern int stop_a_enabled; + /* Make sure the user can actually press L1-A */ + stop_a_enabled = 1; + printk("Press L1-A to return to the boot prom\n"); + } +#endif +#if defined(CONFIG_ARCH_S390) + disabled_wait(caller); +#endif + sti(); + for(;;) { + CHECK_EMERGENCY_SYNC +#if defined(CONFIG_XENO) + HYPERVISOR_exit(); +#endif + } +} + +/** + * print_tainted - return a string to represent the kernel taint state. + * + * The string is overwritten by the next call to print_taint(). + */ + +const char *print_tainted() +{ + static char buf[20]; + if (tainted) { + snprintf(buf, sizeof(buf), "Tainted: %c%c", + tainted & 1 ? 'P' : 'G', + tainted & 2 ? 'F' : ' '); + } + else + snprintf(buf, sizeof(buf), "Not tainted"); + return(buf); +} + +int tainted = 0; diff --git a/xenolinux-2.4.16-sparse/mk b/xenolinux-2.4.16-sparse/mk new file mode 100755 index 0000000000..70cb988379 --- /dev/null +++ b/xenolinux-2.4.16-sparse/mk @@ -0,0 +1,6 @@ +export INSTALL_MOD_PATH=/local/scratch/kaf24/xeno/linux-install +ARCH=xeno make -j 4 bzImage +ARCH=xeno make -j 4 modules +ARCH=xeno INSTALL_MOD_PATH=/anfs/nta3/xeno-roots/kaf24/root0 really make modules_install +ARCH=xeno INSTALL_MOD_PATH=/anfs/nta3/xeno-roots/kaf24/root1 really make modules_install +cp arch/xeno/boot/image.gz /usr/groups/pegasus/boot/kaf24/meteors/vmlinux.gz