sync w/ head.
3f815145vGYx1WY79voKkZB9yKwJKQ extras/mini-os/time.c
3f815145xlKBAQmal9oces3G_Mvxqw extras/mini-os/traps.c
4187ca95_eQN62ugV1zliQcfzXrHnw install.sh
- 3e5a4e6589G-U42lFKs43plskXoFxQ linux-2.4.27-xen-sparse/Makefile
- 3e5a4e65IEPjnWPZ5w3TxS5scV8Ewg linux-2.4.27-xen-sparse/arch/xen/Makefile
- 3e5a4e65n-KhsEAs-A4ULiStBp-r6w linux-2.4.27-xen-sparse/arch/xen/boot/Makefile
- 3e5a4e65OV_j_DBtjzt5vej771AJsA linux-2.4.27-xen-sparse/arch/xen/config.in
- 40648526SxcA4lGIHB_k7ID8VlRSzw linux-2.4.27-xen-sparse/arch/xen/defconfig-xen0
- 40c73c77QesbL7eIvG-fJGAtVwhGRg linux-2.4.27-xen-sparse/arch/xen/defconfig-xenU
- 3e6377f5xwPfYZkPHPrDbEq1PRN7uQ linux-2.4.27-xen-sparse/arch/xen/drivers/balloon/Makefile
- 4083dc16z0jvZEH4PiVDbDRreaNp6w linux-2.4.27-xen-sparse/arch/xen/drivers/blkif/Makefile
- 4083dc16KQus88a4U3uCV6qVCA6_8Q linux-2.4.27-xen-sparse/arch/xen/drivers/blkif/backend/Makefile
- 4075806dI5kfeMD5RV-DA0PYoThx_w linux-2.4.27-xen-sparse/arch/xen/drivers/blkif/frontend/Makefile
- 4075806d4-j7vN0Mn0bklI1cRUX1vQ linux-2.4.27-xen-sparse/arch/xen/drivers/blkif/frontend/common.h
- 4075806dibjCcfuXv6CINMhxWTw3jQ linux-2.4.27-xen-sparse/arch/xen/drivers/blkif/frontend/vbd.c
- 3e5a4e65G3e2s0ghPMgiJ-gBTUJ0uQ linux-2.4.27-xen-sparse/arch/xen/drivers/console/Makefile
- 3e5a4e656nfFISThfbyXQOA6HN6YHw linux-2.4.27-xen-sparse/arch/xen/drivers/dom0/Makefile
- 40420a6ebRqDjufoN1WSJvolEW2Wjw linux-2.4.27-xen-sparse/arch/xen/drivers/evtchn/Makefile
- 4083dc16-Kd5y9psK_yk161sme5j5Q linux-2.4.27-xen-sparse/arch/xen/drivers/netif/Makefile
- 4083dc16UmHXxS9g_UFVnkUpN-oP2Q linux-2.4.27-xen-sparse/arch/xen/drivers/netif/backend/Makefile
- 405853f2wg7JXZJNltspMwOZJklxgw linux-2.4.27-xen-sparse/arch/xen/drivers/netif/frontend/Makefile
- 3e5a4e65lWzkiPXsZdzPt2RNnJGG1g linux-2.4.27-xen-sparse/arch/xen/kernel/Makefile
- 3e5a4e65_hqfuxtGG8IUy6wRM86Ecg linux-2.4.27-xen-sparse/arch/xen/kernel/entry.S
- 3e5a4e65Hy_1iUvMTPsNqGNXd9uFpg linux-2.4.27-xen-sparse/arch/xen/kernel/head.S
- 3e5a4e65RMGcuA-HCn3-wNx3fFQwdg linux-2.4.27-xen-sparse/arch/xen/kernel/i386_ksyms.c
- 3e5a4e653U6cELGv528IxOLHvCq8iA linux-2.4.27-xen-sparse/arch/xen/kernel/irq.c
- 3e5a4e65muT6SU3ck47IP87Q7Ti5hA linux-2.4.27-xen-sparse/arch/xen/kernel/ldt.c
- 4051db95N9N99FjsRwi49YKUNHWI8A linux-2.4.27-xen-sparse/arch/xen/kernel/pci-pc.c
- 3e5a4e65IGt3WwQDNiL4h-gYWgNTWQ linux-2.4.27-xen-sparse/arch/xen/kernel/process.c
- 3e5a4e66tR-qJMLj3MppcKqmvuI2XQ linux-2.4.27-xen-sparse/arch/xen/kernel/setup.c
- 3e5a4e66fWSTagLGU2P8BGFGRjhDiw linux-2.4.27-xen-sparse/arch/xen/kernel/signal.c
- 3e5a4e66N__lUXNwzQ-eADRzK9LXuQ linux-2.4.27-xen-sparse/arch/xen/kernel/time.c
- 3e5a4e66aHCbQ_F5QZ8VeyikLmuRZQ linux-2.4.27-xen-sparse/arch/xen/kernel/traps.c
- 3e5a4e66-9_NczrVMbuQkoSLyXckIw linux-2.4.27-xen-sparse/arch/xen/lib/Makefile
- 3e5a4e6637ZDk0BvFEC-aFQs599-ng linux-2.4.27-xen-sparse/arch/xen/lib/delay.c
- 3e5a4e66croVgpcJyJuF2ycQw0HuJw linux-2.4.27-xen-sparse/arch/xen/mm/Makefile
- 3e5a4e66l8Q5Tv-6B3lQIRmaVbFPzg linux-2.4.27-xen-sparse/arch/xen/mm/fault.c
- 3e5a4e661gLzzff25pJooKIIWe7IWg linux-2.4.27-xen-sparse/arch/xen/mm/init.c
- 3f0bed43UUdQichXAiVNrjV-y2Kzcg linux-2.4.27-xen-sparse/arch/xen/mm/ioremap.c
- 3e5a4e66qRlSTcjafidMB6ulECADvg linux-2.4.27-xen-sparse/arch/xen/vmlinux.lds
- 3e5a4e66mrtlmV75L1tjKDg8RaM5gA linux-2.4.27-xen-sparse/drivers/block/ll_rw_blk.c
- 40d70c24-Dy2HUMrwSZagfXvAPnI4w linux-2.4.27-xen-sparse/drivers/char/Makefile
- 3f108aeaLcGDgQdFAANLTUEid0a05w linux-2.4.27-xen-sparse/drivers/char/mem.c
- 3e5a4e66rw65CxyolW9PKz4GG42RcA linux-2.4.27-xen-sparse/drivers/char/tty_io.c
- 40c9c0c1pPwYE3-4i-oI3ubUu7UgvQ linux-2.4.27-xen-sparse/drivers/scsi/aic7xxx/Makefile
- 3e5a4e669uzIE54VwucPYtGwXLAbzA linux-2.4.27-xen-sparse/fs/exec.c
- 3e5a4e66wbeCpsJgVf_U8Jde-CNcsA linux-2.4.27-xen-sparse/include/asm-xen/bugs.h
- 3e5a4e66HdSkvIV6SJ1evG_xmTmXHA linux-2.4.27-xen-sparse/include/asm-xen/desc.h
- 3e5a4e66SYp_UpAVcF8Lc1wa3Qtgzw linux-2.4.27-xen-sparse/include/asm-xen/fixmap.h
- 406aeeaaQvl4RNtmd9hDEugBURbFpQ linux-2.4.27-xen-sparse/include/asm-xen/highmem.h
- 3e5a4e67YtcyDLQsShhCfQwPSELfvA linux-2.4.27-xen-sparse/include/asm-xen/hw_irq.h
- 4060044fVx7-tokvNLKBf_6qBB4lqQ linux-2.4.27-xen-sparse/include/asm-xen/io.h
- 3e5a4e673p7PEOyHFm3nHkYX6HQYBg linux-2.4.27-xen-sparse/include/asm-xen/irq.h
- 40d70c240tW7TWArl1VUgIFH2nVO1A linux-2.4.27-xen-sparse/include/asm-xen/keyboard.h
- 3e5a4e678ddsQOpbSiRdy1GRcDc9WA linux-2.4.27-xen-sparse/include/asm-xen/mmu_context.h
- 40d06e5b2YWInUX1Xv9amVANwd_2Xg linux-2.4.27-xen-sparse/include/asm-xen/module.h
- 3f8707e7ZmZ6TxyX0ZUEfvhA2Pb_xQ linux-2.4.27-xen-sparse/include/asm-xen/msr.h
- 3e5a4e67mnQfh-R8KcQCaVo2Oho6yg linux-2.4.27-xen-sparse/include/asm-xen/page.h
- 409ba2e7ZfV5hqTvIzxLtpClnxtIzg linux-2.4.27-xen-sparse/include/asm-xen/pci.h
- 3e5a4e67uTYU5oEnIDjxuaez8njjqg linux-2.4.27-xen-sparse/include/asm-xen/pgalloc.h
- 3e5a4e67X7JyupgdYkgDX19Huj2sAw linux-2.4.27-xen-sparse/include/asm-xen/pgtable-2level.h
- 3e5a4e67gr4NLGtQ5CvSLimMYZlkOA linux-2.4.27-xen-sparse/include/asm-xen/pgtable.h
- 3e5a4e676uK4xErTBDH6XJREn9LSyg linux-2.4.27-xen-sparse/include/asm-xen/processor.h
- 41224663YBCUMX1kVo_HRUtgaHTi7w linux-2.4.27-xen-sparse/include/asm-xen/queues.h
- 3e5a4e68uJz-xI0IBVMD7xRLQKJDFg linux-2.4.27-xen-sparse/include/asm-xen/segment.h
- 3e5a4e68Nfdh6QcOKUTGCaYkf2LmYA linux-2.4.27-xen-sparse/include/asm-xen/smp.h
- 4062f7e2PzFOUGT0PaE7A0VprTU3JQ linux-2.4.27-xen-sparse/include/asm-xen/synch_bitops.h
- 3e5a4e68mTr0zcp9SXDbnd-XLrrfxw linux-2.4.27-xen-sparse/include/asm-xen/system.h
- 3f1056a9L_kqHcFheV00KbKBzv9j5w linux-2.4.27-xen-sparse/include/asm-xen/vga.h
- 40659defgWA92arexpMGn8X3QMDj3w linux-2.4.27-xen-sparse/include/asm-xen/xor.h
- 3f056927gMHl7mWB89rb73JahbhQIA linux-2.4.27-xen-sparse/include/linux/blk.h
+419e0488SBzS3mdUhwgsES5a5e3abA linux-2.4.27-xen-sparse/include/linux/irq.h
- 4124f66fPHG6yvB_vXmesjvzrJ3yMg linux-2.4.27-xen-sparse/include/linux/mm.h
- 401c0590D_kwJDU59X8NyvqSv_Cl2A linux-2.4.27-xen-sparse/include/linux/sched.h
- 40a248afgI0_JKthdYAe8beVfXSTpQ linux-2.4.27-xen-sparse/include/linux/skbuff.h
- 401c0592pLrp_aCbQRo9GXiYQQaVVA linux-2.4.27-xen-sparse/include/linux/timer.h
- 3f9d4b44247udoqWEgFkaHiWv6Uvyg linux-2.4.27-xen-sparse/kernel/time.c
- 401c059bjLBFYHRD4Py2uM3eA1D4zQ linux-2.4.27-xen-sparse/kernel/timer.c
- 3e6e7c1efbQe93xCvOpOVCnXTMmQ5w linux-2.4.27-xen-sparse/mkbuildtree
- 406aeeafkrnCuIVWLFv3kfn4uAD5Eg linux-2.4.27-xen-sparse/mm/highmem.c
- 3e5a4e68GxCIaFH4sy01v1wjapetaA linux-2.4.27-xen-sparse/mm/memory.c
- 3f108af5VxPkLv13tXpXgoRKALQtXQ linux-2.4.27-xen-sparse/mm/mprotect.c
- 3e5a4e681xMPdF9xCMwpyfuYMySU5g linux-2.4.27-xen-sparse/mm/mremap.c
- 409ba2e7akOFqQUg6Qyg2s28xcXiMg linux-2.4.27-xen-sparse/mm/page_alloc.c
- 3e5a4e683HKVU-sxtagrDasRB8eBVw linux-2.4.27-xen-sparse/mm/swapfile.c
- 41180721bNns9Na7w1nJ0ZVt8bhUNA linux-2.4.27-xen-sparse/mm/vmalloc.c
- 41505c57WAd5l1rlfCLNSCpx9J13vA linux-2.4.27-xen-sparse/net/core/skbuff.c
+ 3e5a4e6589G-U42lFKs43plskXoFxQ linux-2.4.28-xen-sparse/Makefile
+ 3e5a4e65IEPjnWPZ5w3TxS5scV8Ewg linux-2.4.28-xen-sparse/arch/xen/Makefile
+ 3e5a4e65n-KhsEAs-A4ULiStBp-r6w linux-2.4.28-xen-sparse/arch/xen/boot/Makefile
+ 3e5a4e65OV_j_DBtjzt5vej771AJsA linux-2.4.28-xen-sparse/arch/xen/config.in
+ 40648526SxcA4lGIHB_k7ID8VlRSzw linux-2.4.28-xen-sparse/arch/xen/defconfig-xen0
+ 40c73c77QesbL7eIvG-fJGAtVwhGRg linux-2.4.28-xen-sparse/arch/xen/defconfig-xenU
+ 3e6377f5xwPfYZkPHPrDbEq1PRN7uQ linux-2.4.28-xen-sparse/arch/xen/drivers/balloon/Makefile
+ 4083dc16z0jvZEH4PiVDbDRreaNp6w linux-2.4.28-xen-sparse/arch/xen/drivers/blkif/Makefile
+ 4083dc16KQus88a4U3uCV6qVCA6_8Q linux-2.4.28-xen-sparse/arch/xen/drivers/blkif/backend/Makefile
+ 4075806dI5kfeMD5RV-DA0PYoThx_w linux-2.4.28-xen-sparse/arch/xen/drivers/blkif/frontend/Makefile
+ 4075806d4-j7vN0Mn0bklI1cRUX1vQ linux-2.4.28-xen-sparse/arch/xen/drivers/blkif/frontend/common.h
+ 4075806dibjCcfuXv6CINMhxWTw3jQ linux-2.4.28-xen-sparse/arch/xen/drivers/blkif/frontend/vbd.c
+ 3e5a4e65G3e2s0ghPMgiJ-gBTUJ0uQ linux-2.4.28-xen-sparse/arch/xen/drivers/console/Makefile
+ 3e5a4e656nfFISThfbyXQOA6HN6YHw linux-2.4.28-xen-sparse/arch/xen/drivers/dom0/Makefile
+ 40420a6ebRqDjufoN1WSJvolEW2Wjw linux-2.4.28-xen-sparse/arch/xen/drivers/evtchn/Makefile
+ 4083dc16-Kd5y9psK_yk161sme5j5Q linux-2.4.28-xen-sparse/arch/xen/drivers/netif/Makefile
+ 4083dc16UmHXxS9g_UFVnkUpN-oP2Q linux-2.4.28-xen-sparse/arch/xen/drivers/netif/backend/Makefile
+ 405853f2wg7JXZJNltspMwOZJklxgw linux-2.4.28-xen-sparse/arch/xen/drivers/netif/frontend/Makefile
+ 3e5a4e65lWzkiPXsZdzPt2RNnJGG1g linux-2.4.28-xen-sparse/arch/xen/kernel/Makefile
+ 3e5a4e65_hqfuxtGG8IUy6wRM86Ecg linux-2.4.28-xen-sparse/arch/xen/kernel/entry.S
+ 3e5a4e65Hy_1iUvMTPsNqGNXd9uFpg linux-2.4.28-xen-sparse/arch/xen/kernel/head.S
+ 3e5a4e65RMGcuA-HCn3-wNx3fFQwdg linux-2.4.28-xen-sparse/arch/xen/kernel/i386_ksyms.c
+ 3e5a4e653U6cELGv528IxOLHvCq8iA linux-2.4.28-xen-sparse/arch/xen/kernel/irq.c
+ 3e5a4e65muT6SU3ck47IP87Q7Ti5hA linux-2.4.28-xen-sparse/arch/xen/kernel/ldt.c
+ 4051db95N9N99FjsRwi49YKUNHWI8A linux-2.4.28-xen-sparse/arch/xen/kernel/pci-pc.c
+ 3e5a4e65IGt3WwQDNiL4h-gYWgNTWQ linux-2.4.28-xen-sparse/arch/xen/kernel/process.c
+ 3e5a4e66tR-qJMLj3MppcKqmvuI2XQ linux-2.4.28-xen-sparse/arch/xen/kernel/setup.c
+ 3e5a4e66fWSTagLGU2P8BGFGRjhDiw linux-2.4.28-xen-sparse/arch/xen/kernel/signal.c
+ 3e5a4e66N__lUXNwzQ-eADRzK9LXuQ linux-2.4.28-xen-sparse/arch/xen/kernel/time.c
+ 3e5a4e66aHCbQ_F5QZ8VeyikLmuRZQ linux-2.4.28-xen-sparse/arch/xen/kernel/traps.c
+ 3e5a4e66-9_NczrVMbuQkoSLyXckIw linux-2.4.28-xen-sparse/arch/xen/lib/Makefile
+ 3e5a4e6637ZDk0BvFEC-aFQs599-ng linux-2.4.28-xen-sparse/arch/xen/lib/delay.c
+ 3e5a4e66croVgpcJyJuF2ycQw0HuJw linux-2.4.28-xen-sparse/arch/xen/mm/Makefile
+ 3e5a4e66l8Q5Tv-6B3lQIRmaVbFPzg linux-2.4.28-xen-sparse/arch/xen/mm/fault.c
+ 3e5a4e661gLzzff25pJooKIIWe7IWg linux-2.4.28-xen-sparse/arch/xen/mm/init.c
+ 3f0bed43UUdQichXAiVNrjV-y2Kzcg linux-2.4.28-xen-sparse/arch/xen/mm/ioremap.c
+ 3e5a4e66qRlSTcjafidMB6ulECADvg linux-2.4.28-xen-sparse/arch/xen/vmlinux.lds
+ 3e5a4e66mrtlmV75L1tjKDg8RaM5gA linux-2.4.28-xen-sparse/drivers/block/ll_rw_blk.c
+ 40d70c24-Dy2HUMrwSZagfXvAPnI4w linux-2.4.28-xen-sparse/drivers/char/Makefile
+ 3f108aeaLcGDgQdFAANLTUEid0a05w linux-2.4.28-xen-sparse/drivers/char/mem.c
+ 3e5a4e66rw65CxyolW9PKz4GG42RcA linux-2.4.28-xen-sparse/drivers/char/tty_io.c
+ 40c9c0c1pPwYE3-4i-oI3ubUu7UgvQ linux-2.4.28-xen-sparse/drivers/scsi/aic7xxx/Makefile
+ 3e5a4e669uzIE54VwucPYtGwXLAbzA linux-2.4.28-xen-sparse/fs/exec.c
+ 3e5a4e66wbeCpsJgVf_U8Jde-CNcsA linux-2.4.28-xen-sparse/include/asm-xen/bugs.h
+ 3e5a4e66HdSkvIV6SJ1evG_xmTmXHA linux-2.4.28-xen-sparse/include/asm-xen/desc.h
+ 3e5a4e66SYp_UpAVcF8Lc1wa3Qtgzw linux-2.4.28-xen-sparse/include/asm-xen/fixmap.h
+ 406aeeaaQvl4RNtmd9hDEugBURbFpQ linux-2.4.28-xen-sparse/include/asm-xen/highmem.h
+ 3e5a4e67YtcyDLQsShhCfQwPSELfvA linux-2.4.28-xen-sparse/include/asm-xen/hw_irq.h
+ 4060044fVx7-tokvNLKBf_6qBB4lqQ linux-2.4.28-xen-sparse/include/asm-xen/io.h
+ 3e5a4e673p7PEOyHFm3nHkYX6HQYBg linux-2.4.28-xen-sparse/include/asm-xen/irq.h
+ 40d70c240tW7TWArl1VUgIFH2nVO1A linux-2.4.28-xen-sparse/include/asm-xen/keyboard.h
+ 3e5a4e678ddsQOpbSiRdy1GRcDc9WA linux-2.4.28-xen-sparse/include/asm-xen/mmu_context.h
+ 40d06e5b2YWInUX1Xv9amVANwd_2Xg linux-2.4.28-xen-sparse/include/asm-xen/module.h
+ 3f8707e7ZmZ6TxyX0ZUEfvhA2Pb_xQ linux-2.4.28-xen-sparse/include/asm-xen/msr.h
+ 3e5a4e67mnQfh-R8KcQCaVo2Oho6yg linux-2.4.28-xen-sparse/include/asm-xen/page.h
+ 409ba2e7ZfV5hqTvIzxLtpClnxtIzg linux-2.4.28-xen-sparse/include/asm-xen/pci.h
+ 3e5a4e67uTYU5oEnIDjxuaez8njjqg linux-2.4.28-xen-sparse/include/asm-xen/pgalloc.h
+ 3e5a4e67X7JyupgdYkgDX19Huj2sAw linux-2.4.28-xen-sparse/include/asm-xen/pgtable-2level.h
+ 3e5a4e67gr4NLGtQ5CvSLimMYZlkOA linux-2.4.28-xen-sparse/include/asm-xen/pgtable.h
+ 3e5a4e676uK4xErTBDH6XJREn9LSyg linux-2.4.28-xen-sparse/include/asm-xen/processor.h
+ 41224663YBCUMX1kVo_HRUtgaHTi7w linux-2.4.28-xen-sparse/include/asm-xen/queues.h
+ 3e5a4e68uJz-xI0IBVMD7xRLQKJDFg linux-2.4.28-xen-sparse/include/asm-xen/segment.h
+ 3e5a4e68Nfdh6QcOKUTGCaYkf2LmYA linux-2.4.28-xen-sparse/include/asm-xen/smp.h
+ 4062f7e2PzFOUGT0PaE7A0VprTU3JQ linux-2.4.28-xen-sparse/include/asm-xen/synch_bitops.h
+ 3e5a4e68mTr0zcp9SXDbnd-XLrrfxw linux-2.4.28-xen-sparse/include/asm-xen/system.h
+ 3f1056a9L_kqHcFheV00KbKBzv9j5w linux-2.4.28-xen-sparse/include/asm-xen/vga.h
+ 40659defgWA92arexpMGn8X3QMDj3w linux-2.4.28-xen-sparse/include/asm-xen/xor.h
+ 3f056927gMHl7mWB89rb73JahbhQIA linux-2.4.28-xen-sparse/include/linux/blk.h
+ 4124f66fPHG6yvB_vXmesjvzrJ3yMg linux-2.4.28-xen-sparse/include/linux/mm.h
+ 401c0590D_kwJDU59X8NyvqSv_Cl2A linux-2.4.28-xen-sparse/include/linux/sched.h
+ 40a248afgI0_JKthdYAe8beVfXSTpQ linux-2.4.28-xen-sparse/include/linux/skbuff.h
+ 401c0592pLrp_aCbQRo9GXiYQQaVVA linux-2.4.28-xen-sparse/include/linux/timer.h
+ 3f9d4b44247udoqWEgFkaHiWv6Uvyg linux-2.4.28-xen-sparse/kernel/time.c
+ 401c059bjLBFYHRD4Py2uM3eA1D4zQ linux-2.4.28-xen-sparse/kernel/timer.c
+ 3e6e7c1efbQe93xCvOpOVCnXTMmQ5w linux-2.4.28-xen-sparse/mkbuildtree
+ 406aeeafkrnCuIVWLFv3kfn4uAD5Eg linux-2.4.28-xen-sparse/mm/highmem.c
+ 3e5a4e68GxCIaFH4sy01v1wjapetaA linux-2.4.28-xen-sparse/mm/memory.c
+ 3f108af5VxPkLv13tXpXgoRKALQtXQ linux-2.4.28-xen-sparse/mm/mprotect.c
+ 3e5a4e681xMPdF9xCMwpyfuYMySU5g linux-2.4.28-xen-sparse/mm/mremap.c
+ 409ba2e7akOFqQUg6Qyg2s28xcXiMg linux-2.4.28-xen-sparse/mm/page_alloc.c
+ 3e5a4e683HKVU-sxtagrDasRB8eBVw linux-2.4.28-xen-sparse/mm/swapfile.c
+ 41180721bNns9Na7w1nJ0ZVt8bhUNA linux-2.4.28-xen-sparse/mm/vmalloc.c
+ 41505c57WAd5l1rlfCLNSCpx9J13vA linux-2.4.28-xen-sparse/net/core/skbuff.c
-40f562372u3A7_kfbYYixPHJJxYUxA linux-2.6.9-xen-sparse/arch/xen/Kconfig
-40f56237utH41NPukqHksuNf29IC9A linux-2.6.9-xen-sparse/arch/xen/Kconfig.drivers
-40f56237penAAlWVBVDpeQZNFIg8CA linux-2.6.9-xen-sparse/arch/xen/Makefile
-40f56237JTc60m1FRlUxkUaGSQKrNw linux-2.6.9-xen-sparse/arch/xen/boot/Makefile
-40f56237hRxbacU_3PdoAl6DjZ3Jnw linux-2.6.9-xen-sparse/arch/xen/configs/xen0_defconfig
-40f56237wubfjJKlfIzZlI3ZM2VgGA linux-2.6.9-xen-sparse/arch/xen/configs/xenU_defconfig
-40f56237Mta0yHNaMS_qtM2rge0qYA linux-2.6.9-xen-sparse/arch/xen/i386/Kconfig
-40f56238u2CJdXNpjsZgHBxeVyY-2g linux-2.6.9-xen-sparse/arch/xen/i386/Makefile
-40f56238eczveJ86k_4hNxCLRQIF-g linux-2.6.9-xen-sparse/arch/xen/i386/kernel/Makefile
-40f56238rXVTJQKbBuXXLH52qEArcg linux-2.6.9-xen-sparse/arch/xen/i386/kernel/cpu/Makefile
-40f562385s4lr6Zg92gExe7UQ4A76Q linux-2.6.9-xen-sparse/arch/xen/i386/kernel/cpu/common.c
+40f562372u3A7_kfbYYixPHJJxYUxA linux-2.6.10-rc2-xen-sparse/arch/xen/Kconfig
+40f56237utH41NPukqHksuNf29IC9A linux-2.6.10-rc2-xen-sparse/arch/xen/Kconfig.drivers
+40f56237penAAlWVBVDpeQZNFIg8CA linux-2.6.10-rc2-xen-sparse/arch/xen/Makefile
+40f56237JTc60m1FRlUxkUaGSQKrNw linux-2.6.10-rc2-xen-sparse/arch/xen/boot/Makefile
+40f56237hRxbacU_3PdoAl6DjZ3Jnw linux-2.6.10-rc2-xen-sparse/arch/xen/configs/xen0_defconfig
+40f56237wubfjJKlfIzZlI3ZM2VgGA linux-2.6.10-rc2-xen-sparse/arch/xen/configs/xenU_defconfig
+40f56237Mta0yHNaMS_qtM2rge0qYA linux-2.6.10-rc2-xen-sparse/arch/xen/i386/Kconfig
+40f56238u2CJdXNpjsZgHBxeVyY-2g linux-2.6.10-rc2-xen-sparse/arch/xen/i386/Makefile
+40f56238eczveJ86k_4hNxCLRQIF-g linux-2.6.10-rc2-xen-sparse/arch/xen/i386/kernel/Makefile
+40f56238rXVTJQKbBuXXLH52qEArcg linux-2.6.10-rc2-xen-sparse/arch/xen/i386/kernel/cpu/Makefile
+40f562385s4lr6Zg92gExe7UQ4A76Q linux-2.6.10-rc2-xen-sparse/arch/xen/i386/kernel/cpu/common.c
+40f56238XDtHSijkAFlbv1PT8Bhw_Q linux-2.6.10-rc2-xen-sparse/arch/xen/i386/kernel/entry.S
+40f56238bnvciAuyzAiMkdzGErYt1A linux-2.6.10-rc2-xen-sparse/arch/xen/i386/kernel/head.S
+40f58a0d31M2EkuPbG94ns_nOi0PVA linux-2.6.10-rc2-xen-sparse/arch/xen/i386/kernel/i386_ksyms.c
+40faa751_zbZlAmLyQgCXdYekVFdWA linux-2.6.10-rc2-xen-sparse/arch/xen/i386/kernel/ioport.c
+40f56238ue3YRsK52HG7iccNzP1AwQ linux-2.6.10-rc2-xen-sparse/arch/xen/i386/kernel/ldt.c
+4107adf1cNtsuOxOB4T6paAoY2R2PA linux-2.6.10-rc2-xen-sparse/arch/xen/i386/kernel/pci-dma.c
+40f56238a8iOVDEoostsbun_sy2i4g linux-2.6.10-rc2-xen-sparse/arch/xen/i386/kernel/process.c
+40f56238YQIJoYG2ehDGEcdTgLmGbg linux-2.6.10-rc2-xen-sparse/arch/xen/i386/kernel/setup.c
+40f56238nWMQg7CKbyTy0KJNvCzbtg linux-2.6.10-rc2-xen-sparse/arch/xen/i386/kernel/signal.c
+40f56238qVGkpO_ycnQA8k03kQzAgA linux-2.6.10-rc2-xen-sparse/arch/xen/i386/kernel/time.c
+40f56238NzTgeO63RGoxHrW5NQeO3Q linux-2.6.10-rc2-xen-sparse/arch/xen/i386/kernel/timers/Makefile
+40f56238BMqG5PuSHufpjbvp_helBw linux-2.6.10-rc2-xen-sparse/arch/xen/i386/kernel/timers/timer_tsc.c
+40f562389xNa78YBZciUibQjyRU_Lg linux-2.6.10-rc2-xen-sparse/arch/xen/i386/kernel/traps.c
+40f56238JypKAUG01ZojFwH7qnZ5uA linux-2.6.10-rc2-xen-sparse/arch/xen/i386/kernel/vsyscall.S
+40f56238wi6AdNQjm0RT57bSkwb6hg linux-2.6.10-rc2-xen-sparse/arch/xen/i386/kernel/vsyscall.lds
+40f56238a3w6-byOzexIlMgni76Lcg linux-2.6.10-rc2-xen-sparse/arch/xen/i386/mm/Makefile
+40f56238ILx8xlbywNbzTdv5Zr4xXQ linux-2.6.10-rc2-xen-sparse/arch/xen/i386/mm/fault.c
+4118cc35CbY8rfGVspF5O-7EkXBEAA linux-2.6.10-rc2-xen-sparse/arch/xen/i386/mm/highmem.c
+40f562383SKvDStdtrvzr5fyCbW4rw linux-2.6.10-rc2-xen-sparse/arch/xen/i386/mm/hypervisor.c
+40f56239xcNylAxuGsQHwi1AyMLV8w linux-2.6.10-rc2-xen-sparse/arch/xen/i386/mm/init.c
+41062ab7CjxC1UBaFhOMWWdhHkIUyg linux-2.6.10-rc2-xen-sparse/arch/xen/i386/mm/ioremap.c
+413b5ab8LIowAnQrEmaOJSdmqm96jQ linux-2.6.10-rc2-xen-sparse/arch/xen/i386/mm/pageattr.c
+40f5623906UYHv1rsVUeRc0tFT0dWw linux-2.6.10-rc2-xen-sparse/arch/xen/i386/mm/pgtable.c
+4107adf12ndy94MidCaivDibJ3pPAg linux-2.6.10-rc2-xen-sparse/arch/xen/i386/pci/Makefile
+4107adf1WcCgkhsdLTRGX52cOG1vJg linux-2.6.10-rc2-xen-sparse/arch/xen/i386/pci/direct.c
+4107adf1s5u6249DNPUViX1YNagbUQ linux-2.6.10-rc2-xen-sparse/arch/xen/i386/pci/irq.c
+40f56239zOksGg_H4XD4ye6iZNtoZA linux-2.6.10-rc2-xen-sparse/arch/xen/kernel/Makefile
+40f56239bvOjuuuViZ0XMlNiREFC0A linux-2.6.10-rc2-xen-sparse/arch/xen/kernel/ctrl_if.c
+40f56238xFQe9T7M_U_FItM-bZIpLw linux-2.6.10-rc2-xen-sparse/arch/xen/kernel/evtchn.c
+4110f478aeQWllIN7J4kouAHiAqrPw linux-2.6.10-rc2-xen-sparse/arch/xen/kernel/fixup.c
+412dfae9eA3_6e6bCGUtg1mj8b56fQ linux-2.6.10-rc2-xen-sparse/arch/xen/kernel/gnttab.c
+40f562392LBhwmOxVPsYdkYXMxI_ZQ linux-2.6.10-rc2-xen-sparse/arch/xen/kernel/reboot.c
+414c113396tK1HTVeUalm3u-1DF16g linux-2.6.10-rc2-xen-sparse/arch/xen/kernel/skbuff.c
+3f68905c5eiA-lBMQSvXLMWS1ikDEA linux-2.6.10-rc2-xen-sparse/arch/xen/kernel/xen_proc.c
+41261688yS8eAyy-7kzG4KBs0xbYCA linux-2.6.10-rc2-xen-sparse/drivers/Makefile
+4108f5c1WfTIrs0HZFeV39sttekCTw linux-2.6.10-rc2-xen-sparse/drivers/char/mem.c
+4111308bZAIzwf_Kzu6x1TZYZ3E0_Q linux-2.6.10-rc2-xen-sparse/drivers/char/tty_io.c
+40f56239Dp_vMTgz8TEbvo1hjHGc3w linux-2.6.10-rc2-xen-sparse/drivers/xen/Makefile
+41768fbcncpBQf8s2l2-CwoSNIZ9uA linux-2.6.10-rc2-xen-sparse/drivers/xen/balloon/Makefile
+3e6377f8i5e9eGz7Pw6fQuhuTQ7DQg linux-2.6.10-rc2-xen-sparse/drivers/xen/balloon/balloon.c
+410d0893otFGghmv4dUXDUBBdY5aIA linux-2.6.10-rc2-xen-sparse/drivers/xen/blkback/Makefile
+4087cf0d1XgMkooTZAiJS6NrcpLQNQ linux-2.6.10-rc2-xen-sparse/drivers/xen/blkback/blkback.c
+4087cf0dZadZ8r6CEt4fNN350Yle3A linux-2.6.10-rc2-xen-sparse/drivers/xen/blkback/common.h
+4087cf0dxlh29iw0w-9rxOCEGCjPcw linux-2.6.10-rc2-xen-sparse/drivers/xen/blkback/control.c
+4087cf0dbuoH20fMjNZjcgrRK-1msQ linux-2.6.10-rc2-xen-sparse/drivers/xen/blkback/interface.c
+4087cf0dk97tacDzxfByWV7JifUYqA linux-2.6.10-rc2-xen-sparse/drivers/xen/blkback/vbd.c
+40f56239Sfle6wGv5FS0wjS_HI150A linux-2.6.10-rc2-xen-sparse/drivers/xen/blkfront/Kconfig
+40f562395atl9x4suKGhPkjqLOXESg linux-2.6.10-rc2-xen-sparse/drivers/xen/blkfront/Makefile
+40f56239-JNIaTzlviVJohVdoYOUpw linux-2.6.10-rc2-xen-sparse/drivers/xen/blkfront/blkfront.c
+40f56239y9naBTXe40Pi2J_z3p-d1g linux-2.6.10-rc2-xen-sparse/drivers/xen/blkfront/block.h
+40f56239BVfPsXBiWQitXgDRtOsiqg linux-2.6.10-rc2-xen-sparse/drivers/xen/blkfront/vbd.c
+40f56239fsLjvtD8YBRAWphps4FDjg linux-2.6.10-rc2-xen-sparse/drivers/xen/console/Makefile
+3e5a4e651TH-SXHoufurnWjgl5bfOA linux-2.6.10-rc2-xen-sparse/drivers/xen/console/console.c
+40f56239KYxO0YabhPzCTeUuln-lnA linux-2.6.10-rc2-xen-sparse/drivers/xen/evtchn/Makefile
+40f56239DoibTX6R-ZYd3QTXAB8_TA linux-2.6.10-rc2-xen-sparse/drivers/xen/evtchn/evtchn.c
+410a9817HEVJvred5Oy_uKH3HFJC5Q linux-2.6.10-rc2-xen-sparse/drivers/xen/netback/Makefile
+4097ba831lpGeLlPg-bfV8XarVVuoQ linux-2.6.10-rc2-xen-sparse/drivers/xen/netback/common.h
+4097ba83wvv8yi5P5xugCUBAdb6O-A linux-2.6.10-rc2-xen-sparse/drivers/xen/netback/control.c
+4097ba83byY5bTSugJGZ1exTxIcMKw linux-2.6.10-rc2-xen-sparse/drivers/xen/netback/interface.c
+4087cf0dGmSbFhFZyIZBJzvqxY-qBw linux-2.6.10-rc2-xen-sparse/drivers/xen/netback/netback.c
+40f56239lrg_Ob0BJ8WBFS1zeg2CYw linux-2.6.10-rc2-xen-sparse/drivers/xen/netfront/Kconfig
+40f56239Wd4k_ycG_mFsSO1r5xKdtQ linux-2.6.10-rc2-xen-sparse/drivers/xen/netfront/Makefile
+405853f6nbeazrNyEWNHBuoSg2PiPA linux-2.6.10-rc2-xen-sparse/drivers/xen/netfront/netfront.c
+4108f5c1ppFXVpQzCOAZ6xXYubsjKA linux-2.6.10-rc2-xen-sparse/drivers/xen/privcmd/Makefile
+3e5a4e65IUfzzMu2kZFlGEB8-rpTaA linux-2.6.10-rc2-xen-sparse/drivers/xen/privcmd/privcmd.c
+412f47e4RKD-R5IS5gEXvcT8L4v8gA linux-2.6.10-rc2-xen-sparse/include/asm-generic/pgtable.h
+40f56239YAjS52QG2FIAQpHDZAdGHg linux-2.6.10-rc2-xen-sparse/include/asm-xen/asm-i386/desc.h
+4107adf1E5O4ztGHNGMzCCNhcvqNow linux-2.6.10-rc2-xen-sparse/include/asm-xen/asm-i386/dma-mapping.h
+40f5623akIoBsQ3KxSB2kufkbgONXQ linux-2.6.10-rc2-xen-sparse/include/asm-xen/asm-i386/fixmap.h
+41979925z1MsKU1SfuuheM1IFDQ_bA linux-2.6.10-rc2-xen-sparse/include/asm-xen/asm-i386/floppy.h
+4118b6a418gnL6AZsTdglC92YGqYTg linux-2.6.10-rc2-xen-sparse/include/asm-xen/asm-i386/highmem.h
+40f5623aJVXQwpJMOLE99XgvGsfQ8Q linux-2.6.10-rc2-xen-sparse/include/asm-xen/asm-i386/io.h
+40f5623aKXkBBxgpLx2NcvkncQ1Yyw linux-2.6.10-rc2-xen-sparse/include/asm-xen/asm-i386/mach-xen/irq_vectors.h
+40f5623aDMCsWOFO0jktZ4e8sjwvEg linux-2.6.10-rc2-xen-sparse/include/asm-xen/asm-i386/mach-xen/setup_arch_post.h
+40f5623arsFXkGdPvIqvFi3yFXGR0Q linux-2.6.10-rc2-xen-sparse/include/asm-xen/asm-i386/mach-xen/setup_arch_pre.h
+4120f807GCO0uqsLqdZj9csxR1Wthw linux-2.6.10-rc2-xen-sparse/include/asm-xen/asm-i386/mmu_context.h
+40f5623aFTyFTR-vdiA-KaGxk5JOKQ linux-2.6.10-rc2-xen-sparse/include/asm-xen/asm-i386/msr.h
+40f5623adgjZq9nAgCt0IXdWl7udSA linux-2.6.10-rc2-xen-sparse/include/asm-xen/asm-i386/page.h
+40f5623a54NuG-7qHihGYmw4wWQnMA linux-2.6.10-rc2-xen-sparse/include/asm-xen/asm-i386/param.h
+41137cc1kkvg0cg7uxddcEfjL7L67w linux-2.6.10-rc2-xen-sparse/include/asm-xen/asm-i386/pci.h
+40f5623atCokYc2uCysSJ8jFO8TEsw linux-2.6.10-rc2-xen-sparse/include/asm-xen/asm-i386/pgalloc.h
+412e01beTwiaC8sYY4XJP8PxLST5CA linux-2.6.10-rc2-xen-sparse/include/asm-xen/asm-i386/pgtable-2level-defs.h
+40f5623aEToIXouJgO-ao5d5pcEt1w linux-2.6.10-rc2-xen-sparse/include/asm-xen/asm-i386/pgtable-2level.h
+40f5623aCCXRPlGpNthVXstGz9ZV3A linux-2.6.10-rc2-xen-sparse/include/asm-xen/asm-i386/pgtable.h
+40f5623aPCkQQfPtJSooGdhcatrvnQ linux-2.6.10-rc2-xen-sparse/include/asm-xen/asm-i386/processor.h
+412ea0afQL2CAI-f522TbLjLPMibPQ linux-2.6.10-rc2-xen-sparse/include/asm-xen/asm-i386/ptrace.h
+40f5623bzLvxr7WoJIxVf2OH4rCBJg linux-2.6.10-rc2-xen-sparse/include/asm-xen/asm-i386/segment.h
+40f5623bG_LzgG6-qwk292nTc5Wabw linux-2.6.10-rc2-xen-sparse/include/asm-xen/asm-i386/setup.h
+40f5623bgzm_9vwxpzJswlAxg298Gg linux-2.6.10-rc2-xen-sparse/include/asm-xen/asm-i386/synch_bitops.h
+40f5623bVdKP7Dt7qm8twu3NcnGNbA linux-2.6.10-rc2-xen-sparse/include/asm-xen/asm-i386/system.h
+40f5623bc8LKPRO09wY5dGDnY_YCpw linux-2.6.10-rc2-xen-sparse/include/asm-xen/asm-i386/tlbflush.h
+41062ab7uFxnCq-KtPeAm-aV8CicgA linux-2.6.10-rc2-xen-sparse/include/asm-xen/asm-i386/vga.h
+40f5623bxUbeGjkRrjDguCy_Gm8RLw linux-2.6.10-rc2-xen-sparse/include/asm-xen/asm-i386/xor.h
+40f5623bYNP7tHE2zX6YQxp9Zq2utQ linux-2.6.10-rc2-xen-sparse/include/asm-xen/ctrl_if.h
+40f5623b3Eqs8pAc5WpPX8_jTzV2qw linux-2.6.10-rc2-xen-sparse/include/asm-xen/evtchn.h
+419b4e9367PjTEvdjwavWN12BeBBXg linux-2.6.10-rc2-xen-sparse/include/asm-xen/foreign_page.h
+412dfaeazclyNDM0cpnp60Yo4xulpQ linux-2.6.10-rc2-xen-sparse/include/asm-xen/gnttab.h
+40f5623aGPlsm0u1LTO-NVZ6AGzNRQ linux-2.6.10-rc2-xen-sparse/include/asm-xen/hypervisor.h
+3f108af1ylCIm82H052FVTfXACBHrw linux-2.6.10-rc2-xen-sparse/include/asm-xen/linux-public/privcmd.h
+3fa8e3f0kBLeE4To2vpdi3cpJbIkbQ linux-2.6.10-rc2-xen-sparse/include/asm-xen/linux-public/suspend.h
+40f5623cndVUFlkxpf7Lfx7xu8madQ linux-2.6.10-rc2-xen-sparse/include/asm-xen/multicall.h
+4122466356eIBnC9ot44WSVVIFyhQA linux-2.6.10-rc2-xen-sparse/include/asm-xen/queues.h
+3f689063BoW-HWV3auUJ-OqXfcGArw linux-2.6.10-rc2-xen-sparse/include/asm-xen/xen_proc.h
+419b4e93z2S0gR17XTy8wg09JEwAhg linux-2.6.10-rc2-xen-sparse/include/linux/gfp.h
+419dfc609zbti8rqL60tL2dHXQ_rvQ linux-2.6.10-rc2-xen-sparse/include/linux/irq.h
+4124f66f4NaKNa0xPiGGykn9QaZk3w linux-2.6.10-rc2-xen-sparse/include/linux/skbuff.h
+419dfc6awx7w88wk6cG9P3mPidX6LQ linux-2.6.10-rc2-xen-sparse/kernel/irq/manage.c
+40f56a0ddHCSs3501MY4hRf22tctOw linux-2.6.10-rc2-xen-sparse/mkbuildtree
+412f46c0LJuKAgSPGoC0Z1DEkLfuLA linux-2.6.10-rc2-xen-sparse/mm/memory.c
+410a94a4KT6I6X0LVc7djB39tRDp4g linux-2.6.10-rc2-xen-sparse/mm/page_alloc.c
+41505c572m-s9ATiO1LiD1GPznTTIg linux-2.6.10-rc2-xen-sparse/net/core/skbuff.c
+4149ec79wMpIHdvbntxqVGLRZZjPxw linux-2.6.10-rc2-xen-sparse/net/ipv4/raw.c
+ 41ab440bnpxZdWShZrGgM9pPaz5rmA linux-2.6.9-xen-sparse/arch/xen/i386/kernel/cpu/mtrr/Makefile
+ 41ab440bBKWz-aEOEojU4PAMXe3Ppg linux-2.6.9-xen-sparse/arch/xen/i386/kernel/cpu/mtrr/main.c
-40f56238XDtHSijkAFlbv1PT8Bhw_Q linux-2.6.9-xen-sparse/arch/xen/i386/kernel/entry.S
-40f56238bnvciAuyzAiMkdzGErYt1A linux-2.6.9-xen-sparse/arch/xen/i386/kernel/head.S
-40f58a0d31M2EkuPbG94ns_nOi0PVA linux-2.6.9-xen-sparse/arch/xen/i386/kernel/i386_ksyms.c
-40faa751_zbZlAmLyQgCXdYekVFdWA linux-2.6.9-xen-sparse/arch/xen/i386/kernel/ioport.c
-40f562382aC3_Gt4RG-4ZsfvDRUg3Q linux-2.6.9-xen-sparse/arch/xen/i386/kernel/irq.c
-40f56238ue3YRsK52HG7iccNzP1AwQ linux-2.6.9-xen-sparse/arch/xen/i386/kernel/ldt.c
-4107adf1cNtsuOxOB4T6paAoY2R2PA linux-2.6.9-xen-sparse/arch/xen/i386/kernel/pci-dma.c
-40f56238a8iOVDEoostsbun_sy2i4g linux-2.6.9-xen-sparse/arch/xen/i386/kernel/process.c
-40f56238YQIJoYG2ehDGEcdTgLmGbg linux-2.6.9-xen-sparse/arch/xen/i386/kernel/setup.c
-40f56238nWMQg7CKbyTy0KJNvCzbtg linux-2.6.9-xen-sparse/arch/xen/i386/kernel/signal.c
-40f56238UL9uv78ODDzMwLL9yryeFw linux-2.6.9-xen-sparse/arch/xen/i386/kernel/sysenter.c
-40f56238qVGkpO_ycnQA8k03kQzAgA linux-2.6.9-xen-sparse/arch/xen/i386/kernel/time.c
-40f56238NzTgeO63RGoxHrW5NQeO3Q linux-2.6.9-xen-sparse/arch/xen/i386/kernel/timers/Makefile
-40f56238BMqG5PuSHufpjbvp_helBw linux-2.6.9-xen-sparse/arch/xen/i386/kernel/timers/timer_tsc.c
-40f562389xNa78YBZciUibQjyRU_Lg linux-2.6.9-xen-sparse/arch/xen/i386/kernel/traps.c
-40f56238qASEI_IOhCKWNuwFKNZrKQ linux-2.6.9-xen-sparse/arch/xen/i386/kernel/vmlinux.lds.S
-40f56238JypKAUG01ZojFwH7qnZ5uA linux-2.6.9-xen-sparse/arch/xen/i386/kernel/vsyscall.S
-40f56238wi6AdNQjm0RT57bSkwb6hg linux-2.6.9-xen-sparse/arch/xen/i386/kernel/vsyscall.lds
-40f56238a3w6-byOzexIlMgni76Lcg linux-2.6.9-xen-sparse/arch/xen/i386/mm/Makefile
-40f56238ILx8xlbywNbzTdv5Zr4xXQ linux-2.6.9-xen-sparse/arch/xen/i386/mm/fault.c
-4118cc35CbY8rfGVspF5O-7EkXBEAA linux-2.6.9-xen-sparse/arch/xen/i386/mm/highmem.c
-40f562383SKvDStdtrvzr5fyCbW4rw linux-2.6.9-xen-sparse/arch/xen/i386/mm/hypervisor.c
-40f56239xcNylAxuGsQHwi1AyMLV8w linux-2.6.9-xen-sparse/arch/xen/i386/mm/init.c
-41062ab7CjxC1UBaFhOMWWdhHkIUyg linux-2.6.9-xen-sparse/arch/xen/i386/mm/ioremap.c
-413b5ab8LIowAnQrEmaOJSdmqm96jQ linux-2.6.9-xen-sparse/arch/xen/i386/mm/pageattr.c
-40f5623906UYHv1rsVUeRc0tFT0dWw linux-2.6.9-xen-sparse/arch/xen/i386/mm/pgtable.c
-4107adf12ndy94MidCaivDibJ3pPAg linux-2.6.9-xen-sparse/arch/xen/i386/pci/Makefile
-4107adf1WcCgkhsdLTRGX52cOG1vJg linux-2.6.9-xen-sparse/arch/xen/i386/pci/direct.c
-4107adf1s5u6249DNPUViX1YNagbUQ linux-2.6.9-xen-sparse/arch/xen/i386/pci/irq.c
-40f56239zOksGg_H4XD4ye6iZNtoZA linux-2.6.9-xen-sparse/arch/xen/kernel/Makefile
-40f56239bvOjuuuViZ0XMlNiREFC0A linux-2.6.9-xen-sparse/arch/xen/kernel/ctrl_if.c
-40f56238xFQe9T7M_U_FItM-bZIpLw linux-2.6.9-xen-sparse/arch/xen/kernel/evtchn.c
-4110f478aeQWllIN7J4kouAHiAqrPw linux-2.6.9-xen-sparse/arch/xen/kernel/fixup.c
-412dfae9eA3_6e6bCGUtg1mj8b56fQ linux-2.6.9-xen-sparse/arch/xen/kernel/gnttab.c
-40f562392LBhwmOxVPsYdkYXMxI_ZQ linux-2.6.9-xen-sparse/arch/xen/kernel/reboot.c
-414c113396tK1HTVeUalm3u-1DF16g linux-2.6.9-xen-sparse/arch/xen/kernel/skbuff.c
-3f68905c5eiA-lBMQSvXLMWS1ikDEA linux-2.6.9-xen-sparse/arch/xen/kernel/xen_proc.c
-41261688yS8eAyy-7kzG4KBs0xbYCA linux-2.6.9-xen-sparse/drivers/Makefile
-4108f5c1WfTIrs0HZFeV39sttekCTw linux-2.6.9-xen-sparse/drivers/char/mem.c
-4111308bZAIzwf_Kzu6x1TZYZ3E0_Q linux-2.6.9-xen-sparse/drivers/char/tty_io.c
-40f56239Dp_vMTgz8TEbvo1hjHGc3w linux-2.6.9-xen-sparse/drivers/xen/Makefile
-41768fbcncpBQf8s2l2-CwoSNIZ9uA linux-2.6.9-xen-sparse/drivers/xen/balloon/Makefile
-3e6377f8i5e9eGz7Pw6fQuhuTQ7DQg linux-2.6.9-xen-sparse/drivers/xen/balloon/balloon.c
-410d0893otFGghmv4dUXDUBBdY5aIA linux-2.6.9-xen-sparse/drivers/xen/blkback/Makefile
-4087cf0d1XgMkooTZAiJS6NrcpLQNQ linux-2.6.9-xen-sparse/drivers/xen/blkback/blkback.c
-4087cf0dZadZ8r6CEt4fNN350Yle3A linux-2.6.9-xen-sparse/drivers/xen/blkback/common.h
-4087cf0dxlh29iw0w-9rxOCEGCjPcw linux-2.6.9-xen-sparse/drivers/xen/blkback/control.c
-4087cf0dbuoH20fMjNZjcgrRK-1msQ linux-2.6.9-xen-sparse/drivers/xen/blkback/interface.c
-4087cf0dk97tacDzxfByWV7JifUYqA linux-2.6.9-xen-sparse/drivers/xen/blkback/vbd.c
-40f56239Sfle6wGv5FS0wjS_HI150A linux-2.6.9-xen-sparse/drivers/xen/blkfront/Kconfig
-40f562395atl9x4suKGhPkjqLOXESg linux-2.6.9-xen-sparse/drivers/xen/blkfront/Makefile
-40f56239-JNIaTzlviVJohVdoYOUpw linux-2.6.9-xen-sparse/drivers/xen/blkfront/blkfront.c
-40f56239y9naBTXe40Pi2J_z3p-d1g linux-2.6.9-xen-sparse/drivers/xen/blkfront/block.h
-40f56239BVfPsXBiWQitXgDRtOsiqg linux-2.6.9-xen-sparse/drivers/xen/blkfront/vbd.c
-40f56239fsLjvtD8YBRAWphps4FDjg linux-2.6.9-xen-sparse/drivers/xen/console/Makefile
-3e5a4e651TH-SXHoufurnWjgl5bfOA linux-2.6.9-xen-sparse/drivers/xen/console/console.c
-40f56239KYxO0YabhPzCTeUuln-lnA linux-2.6.9-xen-sparse/drivers/xen/evtchn/Makefile
-40f56239DoibTX6R-ZYd3QTXAB8_TA linux-2.6.9-xen-sparse/drivers/xen/evtchn/evtchn.c
-410a9817HEVJvred5Oy_uKH3HFJC5Q linux-2.6.9-xen-sparse/drivers/xen/netback/Makefile
-4097ba831lpGeLlPg-bfV8XarVVuoQ linux-2.6.9-xen-sparse/drivers/xen/netback/common.h
-4097ba83wvv8yi5P5xugCUBAdb6O-A linux-2.6.9-xen-sparse/drivers/xen/netback/control.c
-4097ba83byY5bTSugJGZ1exTxIcMKw linux-2.6.9-xen-sparse/drivers/xen/netback/interface.c
-4087cf0dGmSbFhFZyIZBJzvqxY-qBw linux-2.6.9-xen-sparse/drivers/xen/netback/netback.c
-40f56239lrg_Ob0BJ8WBFS1zeg2CYw linux-2.6.9-xen-sparse/drivers/xen/netfront/Kconfig
-40f56239Wd4k_ycG_mFsSO1r5xKdtQ linux-2.6.9-xen-sparse/drivers/xen/netfront/Makefile
-405853f6nbeazrNyEWNHBuoSg2PiPA linux-2.6.9-xen-sparse/drivers/xen/netfront/netfront.c
-4108f5c1ppFXVpQzCOAZ6xXYubsjKA linux-2.6.9-xen-sparse/drivers/xen/privcmd/Makefile
-3e5a4e65IUfzzMu2kZFlGEB8-rpTaA linux-2.6.9-xen-sparse/drivers/xen/privcmd/privcmd.c
-412f47e4RKD-R5IS5gEXvcT8L4v8gA linux-2.6.9-xen-sparse/include/asm-generic/pgtable.h
-40f56239YAjS52QG2FIAQpHDZAdGHg linux-2.6.9-xen-sparse/include/asm-xen/asm-i386/desc.h
-4107adf1E5O4ztGHNGMzCCNhcvqNow linux-2.6.9-xen-sparse/include/asm-xen/asm-i386/dma-mapping.h
-40f5623akIoBsQ3KxSB2kufkbgONXQ linux-2.6.9-xen-sparse/include/asm-xen/asm-i386/fixmap.h
-41979925z1MsKU1SfuuheM1IFDQ_bA linux-2.6.9-xen-sparse/include/asm-xen/asm-i386/floppy.h
-4118b6a418gnL6AZsTdglC92YGqYTg linux-2.6.9-xen-sparse/include/asm-xen/asm-i386/highmem.h
-40f5623aJVXQwpJMOLE99XgvGsfQ8Q linux-2.6.9-xen-sparse/include/asm-xen/asm-i386/io.h
-40f5623aKXkBBxgpLx2NcvkncQ1Yyw linux-2.6.9-xen-sparse/include/asm-xen/asm-i386/mach-xen/irq_vectors.h
-40f5623aDMCsWOFO0jktZ4e8sjwvEg linux-2.6.9-xen-sparse/include/asm-xen/asm-i386/mach-xen/setup_arch_post.h
-40f5623arsFXkGdPvIqvFi3yFXGR0Q linux-2.6.9-xen-sparse/include/asm-xen/asm-i386/mach-xen/setup_arch_pre.h
-4120f807GCO0uqsLqdZj9csxR1Wthw linux-2.6.9-xen-sparse/include/asm-xen/asm-i386/mmu_context.h
-40f5623aFTyFTR-vdiA-KaGxk5JOKQ linux-2.6.9-xen-sparse/include/asm-xen/asm-i386/msr.h
-40f5623adgjZq9nAgCt0IXdWl7udSA linux-2.6.9-xen-sparse/include/asm-xen/asm-i386/page.h
-40f5623a54NuG-7qHihGYmw4wWQnMA linux-2.6.9-xen-sparse/include/asm-xen/asm-i386/param.h
-41137cc1kkvg0cg7uxddcEfjL7L67w linux-2.6.9-xen-sparse/include/asm-xen/asm-i386/pci.h
-40f5623atCokYc2uCysSJ8jFO8TEsw linux-2.6.9-xen-sparse/include/asm-xen/asm-i386/pgalloc.h
-412e01beTwiaC8sYY4XJP8PxLST5CA linux-2.6.9-xen-sparse/include/asm-xen/asm-i386/pgtable-2level-defs.h
-40f5623aEToIXouJgO-ao5d5pcEt1w linux-2.6.9-xen-sparse/include/asm-xen/asm-i386/pgtable-2level.h
-40f5623aCCXRPlGpNthVXstGz9ZV3A linux-2.6.9-xen-sparse/include/asm-xen/asm-i386/pgtable.h
-40f5623aPCkQQfPtJSooGdhcatrvnQ linux-2.6.9-xen-sparse/include/asm-xen/asm-i386/processor.h
-412ea0afQL2CAI-f522TbLjLPMibPQ linux-2.6.9-xen-sparse/include/asm-xen/asm-i386/ptrace.h
-40f5623bzLvxr7WoJIxVf2OH4rCBJg linux-2.6.9-xen-sparse/include/asm-xen/asm-i386/segment.h
-40f5623bG_LzgG6-qwk292nTc5Wabw linux-2.6.9-xen-sparse/include/asm-xen/asm-i386/setup.h
-40f5623bgzm_9vwxpzJswlAxg298Gg linux-2.6.9-xen-sparse/include/asm-xen/asm-i386/synch_bitops.h
-40f5623bVdKP7Dt7qm8twu3NcnGNbA linux-2.6.9-xen-sparse/include/asm-xen/asm-i386/system.h
-40f5623bc8LKPRO09wY5dGDnY_YCpw linux-2.6.9-xen-sparse/include/asm-xen/asm-i386/tlbflush.h
-41062ab7uFxnCq-KtPeAm-aV8CicgA linux-2.6.9-xen-sparse/include/asm-xen/asm-i386/vga.h
-40f5623bxUbeGjkRrjDguCy_Gm8RLw linux-2.6.9-xen-sparse/include/asm-xen/asm-i386/xor.h
-40f5623bYNP7tHE2zX6YQxp9Zq2utQ linux-2.6.9-xen-sparse/include/asm-xen/ctrl_if.h
-40f5623b3Eqs8pAc5WpPX8_jTzV2qw linux-2.6.9-xen-sparse/include/asm-xen/evtchn.h
-419b4e9367PjTEvdjwavWN12BeBBXg linux-2.6.9-xen-sparse/include/asm-xen/foreign_page.h
-412dfaeazclyNDM0cpnp60Yo4xulpQ linux-2.6.9-xen-sparse/include/asm-xen/gnttab.h
-40f5623aGPlsm0u1LTO-NVZ6AGzNRQ linux-2.6.9-xen-sparse/include/asm-xen/hypervisor.h
-3f108af1ylCIm82H052FVTfXACBHrw linux-2.6.9-xen-sparse/include/asm-xen/linux-public/privcmd.h
-3fa8e3f0kBLeE4To2vpdi3cpJbIkbQ linux-2.6.9-xen-sparse/include/asm-xen/linux-public/suspend.h
-40f5623cndVUFlkxpf7Lfx7xu8madQ linux-2.6.9-xen-sparse/include/asm-xen/multicall.h
-4122466356eIBnC9ot44WSVVIFyhQA linux-2.6.9-xen-sparse/include/asm-xen/queues.h
-3f689063BoW-HWV3auUJ-OqXfcGArw linux-2.6.9-xen-sparse/include/asm-xen/xen_proc.h
-4124d8c4aocX7A-jIbuGraWN84pxGQ linux-2.6.9-xen-sparse/include/linux/bio.h
-419b4e93z2S0gR17XTy8wg09JEwAhg linux-2.6.9-xen-sparse/include/linux/gfp.h
-4124f66f4NaKNa0xPiGGykn9QaZk3w linux-2.6.9-xen-sparse/include/linux/skbuff.h
-40f56a0ddHCSs3501MY4hRf22tctOw linux-2.6.9-xen-sparse/mkbuildtree
-412f46c0LJuKAgSPGoC0Z1DEkLfuLA linux-2.6.9-xen-sparse/mm/memory.c
-410a94a4KT6I6X0LVc7djB39tRDp4g linux-2.6.9-xen-sparse/mm/page_alloc.c
-41505c572m-s9ATiO1LiD1GPznTTIg linux-2.6.9-xen-sparse/net/core/skbuff.c
-4149ec79wMpIHdvbntxqVGLRZZjPxw linux-2.6.9-xen-sparse/net/ipv4/raw.c
413cb1e4zst25MDYjg63Y-NGC5_pLg netbsd-2.0-xen-sparse/Makefile
413cb1e5c_Mkxf_X0zimEhTKI_l4DA netbsd-2.0-xen-sparse/mkbuildtree
413cb1e5kY_Zil7-b0kI6hvCIxBEYg netbsd-2.0-xen-sparse/nbconfig-xen
--- /dev/null
-asmlinkage unsigned int do_IRQ(int irq, struct pt_regs *regs)
+ /*
+ * linux/arch/i386/kernel/irq.c
+ *
+ * Copyright (C) 1992, 1998 Linus Torvalds, Ingo Molnar
+ *
+ * This file contains the code used by various IRQ handling routines:
+ * asking for different IRQ's should be done through these routines
+ * instead of just grabbing them. Thus setups with different IRQ numbers
+ * shouldn't result in any weird surprises, and installing new handlers
+ * should be easier.
+ */
+
+ /*
+ * (mostly architecture independent, will move to kernel/irq.c in 2.5.)
+ *
+ * IRQs are in fact implemented a bit like signal handlers for the kernel.
+ * Naturally it's not a 1:1 relation, but there are similarities.
+ */
+
+ #include <linux/config.h>
+ #include <linux/ptrace.h>
+ #include <linux/errno.h>
+ #include <linux/signal.h>
+ #include <linux/sched.h>
+ #include <linux/ioport.h>
+ #include <linux/interrupt.h>
+ #include <linux/timex.h>
+ #include <linux/slab.h>
+ #include <linux/random.h>
+ #include <linux/smp_lock.h>
+ #include <linux/init.h>
+ #include <linux/kernel_stat.h>
+ #include <linux/irq.h>
+ #include <linux/proc_fs.h>
+ #include <linux/seq_file.h>
+
+ #include <asm/atomic.h>
+ #include <asm/io.h>
+ #include <asm/smp.h>
+ #include <asm/system.h>
+ #include <asm/bitops.h>
+ #include <asm/uaccess.h>
+ #include <asm/pgalloc.h>
+ #include <asm/delay.h>
+ #include <asm/desc.h>
+ #include <asm/irq.h>
+
+
+
+ /*
+ * Linux has a controller-independent x86 interrupt architecture.
+ * every controller has a 'controller-template', that is used
+ * by the main code to do the right thing. Each driver-visible
+ * interrupt source is transparently wired to the apropriate
+ * controller. Thus drivers need not be aware of the
+ * interrupt-controller.
+ *
+ * Various interrupt controllers we handle: 8259 PIC, SMP IO-APIC,
+ * PIIX4's internal 8259 PIC and SGI's Visual Workstation Cobalt (IO-)APIC.
+ * (IO-APICs assumed to be messaging to Pentium local-APICs)
+ *
+ * the code is designed to be easily extended with new/different
+ * interrupt controllers, without having to do assembly magic.
+ */
+
+ /*
+ * Controller mappings for all interrupt sources:
+ */
+ irq_desc_t irq_desc[NR_IRQS] __cacheline_aligned =
+ { [0 ... NR_IRQS-1] = { 0, &no_irq_type, NULL, 0, SPIN_LOCK_UNLOCKED}};
+
+ static void register_irq_proc (unsigned int irq);
+
+ /*
+ * Special irq handlers.
+ */
+
+ void no_action(int cpl, void *dev_id, struct pt_regs *regs) { }
+
+ /*
+ * Generic no controller code
+ */
+
+ static void enable_none(unsigned int irq) { }
+ static unsigned int startup_none(unsigned int irq) { return 0; }
+ static void disable_none(unsigned int irq) { }
+ static void ack_none(unsigned int irq)
+ {
+ /*
+ * 'what should we do if we get a hw irq event on an illegal vector'.
+ * each architecture has to answer this themselves, it doesnt deserve
+ * a generic callback i think.
+ */
+ #if CONFIG_X86
+ printk("unexpected IRQ trap at vector %02x\n", irq);
+ #ifdef CONFIG_X86_LOCAL_APIC
+ /*
+ * Currently unexpected vectors happen only on SMP and APIC.
+ * We _must_ ack these because every local APIC has only N
+ * irq slots per priority level, and a 'hanging, unacked' IRQ
+ * holds up an irq slot - in excessive cases (when multiple
+ * unexpected vectors occur) that might lock up the APIC
+ * completely.
+ */
+ ack_APIC_irq();
+ #endif
+ #endif
+ }
+
+ /* startup is the same as "enable", shutdown is same as "disable" */
+ #define shutdown_none disable_none
+ #define end_none enable_none
+
+ struct hw_interrupt_type no_irq_type = {
+ "none",
+ startup_none,
+ shutdown_none,
+ enable_none,
+ disable_none,
+ ack_none,
+ end_none
+ };
+
+ atomic_t irq_err_count;
+ #ifdef CONFIG_X86_IO_APIC
+ #ifdef APIC_MISMATCH_DEBUG
+ atomic_t irq_mis_count;
+ #endif
+ #endif
+
+ /*
+ * Generic, controller-independent functions:
+ */
+
+ int show_interrupts(struct seq_file *p, void *v)
+ {
+ int i, j;
+ struct irqaction * action;
+
+ seq_printf(p, " ");
+ for (j=0; j<smp_num_cpus; j++)
+ seq_printf(p, "CPU%d ",j);
+ seq_putc(p,'\n');
+
+ for (i = 0 ; i < NR_IRQS ; i++) {
+ action = irq_desc[i].action;
+ if (!action)
+ continue;
+ seq_printf(p, "%3d: ",i);
+ #ifndef CONFIG_SMP
+ seq_printf(p, "%10u ", kstat_irqs(i));
+ #else
+ for (j = 0; j < smp_num_cpus; j++)
+ seq_printf(p, "%10u ",
+ kstat.irqs[cpu_logical_map(j)][i]);
+ #endif
+ seq_printf(p, " %14s", irq_desc[i].handler->typename);
+ seq_printf(p, " %s", action->name);
+
+ for (action=action->next; action; action = action->next)
+ seq_printf(p, ", %s", action->name);
+ seq_putc(p,'\n');
+ }
+ seq_printf(p, "NMI: ");
+ for (j = 0; j < smp_num_cpus; j++)
+ seq_printf(p, "%10u ",
+ nmi_count(cpu_logical_map(j)));
+ seq_printf(p, "\n");
+ #if CONFIG_X86_LOCAL_APIC
+ seq_printf(p, "LOC: ");
+ for (j = 0; j < smp_num_cpus; j++)
+ seq_printf(p, "%10u ",
+ apic_timer_irqs[cpu_logical_map(j)]);
+ seq_printf(p, "\n");
+ #endif
+ seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count));
+ #ifdef CONFIG_X86_IO_APIC
+ #ifdef APIC_MISMATCH_DEBUG
+ seq_printf(p, "MIS: %10u\n", atomic_read(&irq_mis_count));
+ #endif
+ #endif
+
+ return 0;
+ }
+
+
+ /*
+ * Global interrupt locks for SMP. Allow interrupts to come in on any
+ * CPU, yet make cli/sti act globally to protect critical regions..
+ */
+
+ #ifdef CONFIG_SMP
+ unsigned char global_irq_holder = NO_PROC_ID;
+ unsigned volatile long global_irq_lock; /* pendantic: long for set_bit --RR */
+
+ extern void show_stack(unsigned long* esp);
+
+ static void show(char * str)
+ {
+ int i;
+ int cpu = smp_processor_id();
+
+ printk("\n%s, CPU %d:\n", str, cpu);
+ printk("irq: %d [",irqs_running());
+ for(i=0;i < smp_num_cpus;i++)
+ printk(" %d",local_irq_count(i));
+ printk(" ]\nbh: %d [",spin_is_locked(&global_bh_lock) ? 1 : 0);
+ for(i=0;i < smp_num_cpus;i++)
+ printk(" %d",local_bh_count(i));
+
+ printk(" ]\nStack dumps:");
+ for(i = 0; i < smp_num_cpus; i++) {
+ unsigned long esp;
+ if (i == cpu)
+ continue;
+ printk("\nCPU %d:",i);
+ esp = init_tss[i].esp0;
+ if (!esp) {
+ /* tss->esp0 is set to NULL in cpu_init(),
+ * it's initialized when the cpu returns to user
+ * space. -- manfreds
+ */
+ printk(" <unknown> ");
+ continue;
+ }
+ esp &= ~(THREAD_SIZE-1);
+ esp += sizeof(struct task_struct);
+ show_stack((void*)esp);
+ }
+ printk("\nCPU %d:",cpu);
+ show_stack(NULL);
+ printk("\n");
+ }
+
+ #define MAXCOUNT 100000000
+
+ /*
+ * I had a lockup scenario where a tight loop doing
+ * spin_unlock()/spin_lock() on CPU#1 was racing with
+ * spin_lock() on CPU#0. CPU#0 should have noticed spin_unlock(), but
+ * apparently the spin_unlock() information did not make it
+ * through to CPU#0 ... nasty, is this by design, do we have to limit
+ * 'memory update oscillation frequency' artificially like here?
+ *
+ * Such 'high frequency update' races can be avoided by careful design, but
+ * some of our major constructs like spinlocks use similar techniques,
+ * it would be nice to clarify this issue. Set this define to 0 if you
+ * want to check whether your system freezes. I suspect the delay done
+ * by SYNC_OTHER_CORES() is in correlation with 'snooping latency', but
+ * i thought that such things are guaranteed by design, since we use
+ * the 'LOCK' prefix.
+ */
+ #define SUSPECTED_CPU_OR_CHIPSET_BUG_WORKAROUND 0
+
+ #if SUSPECTED_CPU_OR_CHIPSET_BUG_WORKAROUND
+ # define SYNC_OTHER_CORES(x) udelay(x+1)
+ #else
+ /*
+ * We have to allow irqs to arrive between __sti and __cli
+ */
+ # define SYNC_OTHER_CORES(x) __asm__ __volatile__ ("nop")
+ #endif
+
+ static inline void wait_on_irq(int cpu)
+ {
+ int count = MAXCOUNT;
+
+ for (;;) {
+
+ /*
+ * Wait until all interrupts are gone. Wait
+ * for bottom half handlers unless we're
+ * already executing in one..
+ */
+ if (!irqs_running())
+ if (local_bh_count(cpu) || !spin_is_locked(&global_bh_lock))
+ break;
+
+ /* Duh, we have to loop. Release the lock to avoid deadlocks */
+ clear_bit(0,&global_irq_lock);
+
+ for (;;) {
+ if (!--count) {
+ show("wait_on_irq");
+ count = ~0;
+ }
+ __sti();
+ SYNC_OTHER_CORES(cpu);
+ __cli();
+ if (irqs_running())
+ continue;
+ if (global_irq_lock)
+ continue;
+ if (!local_bh_count(cpu) && spin_is_locked(&global_bh_lock))
+ continue;
+ if (!test_and_set_bit(0,&global_irq_lock))
+ break;
+ }
+ }
+ }
+
+ /*
+ * This is called when we want to synchronize with
+ * interrupts. We may for example tell a device to
+ * stop sending interrupts: but to make sure there
+ * are no interrupts that are executing on another
+ * CPU we need to call this function.
+ */
+ void synchronize_irq(void)
+ {
+ if (irqs_running()) {
+ /* Stupid approach */
+ cli();
+ sti();
+ }
+ }
+
+ static inline void get_irqlock(int cpu)
+ {
+ if (test_and_set_bit(0,&global_irq_lock)) {
+ /* do we already hold the lock? */
+ if ((unsigned char) cpu == global_irq_holder)
+ return;
+ /* Uhhuh.. Somebody else got it. Wait.. */
+ do {
+ do {
+ rep_nop();
+ } while (test_bit(0,&global_irq_lock));
+ } while (test_and_set_bit(0,&global_irq_lock));
+ }
+ /*
+ * We also to make sure that nobody else is running
+ * in an interrupt context.
+ */
+ wait_on_irq(cpu);
+
+ /*
+ * Ok, finally..
+ */
+ global_irq_holder = cpu;
+ }
+
+ /*
+ * A global "cli()" while in an interrupt context
+ * turns into just a local cli(). Interrupts
+ * should use spinlocks for the (very unlikely)
+ * case that they ever want to protect against
+ * each other.
+ *
+ * If we already have local interrupts disabled,
+ * this will not turn a local disable into a
+ * global one (problems with spinlocks: this makes
+ * save_flags+cli+sti usable inside a spinlock).
+ */
+ void __global_cli(void)
+ {
+ unsigned int flags;
+
+ __save_flags(flags);
+ if (!flags) {
+ int cpu = smp_processor_id();
+ __cli();
+ if (!local_irq_count(cpu))
+ get_irqlock(cpu);
+ }
+ }
+
+ void __global_sti(void)
+ {
+ int cpu = smp_processor_id();
+
+ if (!local_irq_count(cpu))
+ release_irqlock(cpu);
+ __sti();
+ }
+
+ /*
+ * SMP flags value to restore to:
+ * 0 - global cli
+ * 1 - global sti
+ * 2 - local cli
+ * 3 - local sti
+ */
+ unsigned long __global_save_flags(void)
+ {
+ int retval;
+ int local_enabled;
+ unsigned long flags;
+ int cpu = smp_processor_id();
+
+ __save_flags(flags);
+ local_enabled = !flags;
+ /* default to local */
+ retval = 2 + local_enabled;
+
+ /* check for global flags if we're not in an interrupt */
+ if (!local_irq_count(cpu)) {
+ if (local_enabled)
+ retval = 1;
+ if (global_irq_holder == cpu)
+ retval = 0;
+ }
+ return retval;
+ }
+
+ void __global_restore_flags(unsigned long flags)
+ {
+ switch (flags) {
+ case 0:
+ __global_cli();
+ break;
+ case 1:
+ __global_sti();
+ break;
+ case 2:
+ __cli();
+ break;
+ case 3:
+ __sti();
+ break;
+ default:
+ printk("global_restore_flags: %08lx (%08lx)\n",
+ flags, (&flags)[-1]);
+ }
+ }
+
+ #endif
+
+ /*
+ * This should really return information about whether
+ * we should do bottom half handling etc. Right now we
+ * end up _always_ checking the bottom half, which is a
+ * waste of time and is not what some drivers would
+ * prefer.
+ */
+ int handle_IRQ_event(unsigned int irq, struct pt_regs * regs, struct irqaction * action)
+ {
+ int status;
+ int cpu = smp_processor_id();
+
+ irq_enter(cpu, irq);
+
+ status = 1; /* Force the "do bottom halves" bit */
+
+ if (!(action->flags & SA_INTERRUPT))
+ __sti();
+
+ do {
+ status |= action->flags;
+ action->handler(irq, action->dev_id, regs);
+ action = action->next;
+ } while (action);
+ if (status & SA_SAMPLE_RANDOM)
+ add_interrupt_randomness(irq);
+ __cli();
+
+ irq_exit(cpu, irq);
+
+ return status;
+ }
+
+ /*
+ * Generic enable/disable code: this just calls
+ * down into the PIC-specific version for the actual
+ * hardware disable after having gotten the irq
+ * controller lock.
+ */
+
+ /**
+ * disable_irq_nosync - disable an irq without waiting
+ * @irq: Interrupt to disable
+ *
+ * Disable the selected interrupt line. Disables and Enables are
+ * nested.
+ * Unlike disable_irq(), this function does not ensure existing
+ * instances of the IRQ handler have completed before returning.
+ *
+ * This function may be called from IRQ context.
+ */
+
+ inline void disable_irq_nosync(unsigned int irq)
+ {
+ irq_desc_t *desc = irq_desc + irq;
+ unsigned long flags;
+
+ spin_lock_irqsave(&desc->lock, flags);
+ if (!desc->depth++) {
+ desc->status |= IRQ_DISABLED;
+ desc->handler->disable(irq);
+ }
+ spin_unlock_irqrestore(&desc->lock, flags);
+ }
+
+ /**
+ * disable_irq - disable an irq and wait for completion
+ * @irq: Interrupt to disable
+ *
+ * Disable the selected interrupt line. Enables and Disables are
+ * nested.
+ * This function waits for any pending IRQ handlers for this interrupt
+ * to complete before returning. If you use this function while
+ * holding a resource the IRQ handler may need you will deadlock.
+ *
+ * This function may be called - with care - from IRQ context.
+ */
+
+ void disable_irq(unsigned int irq)
+ {
+ disable_irq_nosync(irq);
+
+ if (!local_irq_count(smp_processor_id())) {
+ do {
+ barrier();
+ cpu_relax();
+ } while (irq_desc[irq].status & IRQ_INPROGRESS);
+ }
+ }
+
+ /**
+ * enable_irq - enable handling of an irq
+ * @irq: Interrupt to enable
+ *
+ * Undoes the effect of one call to disable_irq(). If this
+ * matches the last disable, processing of interrupts on this
+ * IRQ line is re-enabled.
+ *
+ * This function may be called from IRQ context.
+ */
+
+ void enable_irq(unsigned int irq)
+ {
+ irq_desc_t *desc = irq_desc + irq;
+ unsigned long flags;
+
+ spin_lock_irqsave(&desc->lock, flags);
+ switch (desc->depth) {
+ case 1: {
+ unsigned int status = desc->status & ~IRQ_DISABLED;
+ desc->status = status;
+ if ((status & (IRQ_PENDING | IRQ_REPLAY)) == IRQ_PENDING) {
+ desc->status = status | IRQ_REPLAY;
+ hw_resend_irq(desc->handler,irq);
+ }
+ desc->handler->enable(irq);
+ /* fall-through */
+ }
+ default:
+ desc->depth--;
+ break;
+ case 0:
+ printk("enable_irq(%u) unbalanced from %p\n", irq,
+ __builtin_return_address(0));
+ }
+ spin_unlock_irqrestore(&desc->lock, flags);
+ }
+
+ /*
+ * do_IRQ handles all normal device IRQ's (the special
+ * SMP cross-CPU interrupts have their own specific
+ * handlers).
+ */
-/**
- * free_irq - free an interrupt
- * @irq: Interrupt line to free
- * @dev_id: Device identity to free
- *
- * Remove an interrupt handler. The handler is removed and if the
- * interrupt line is no longer in use by any driver it is disabled.
- * On a shared IRQ the caller must ensure the interrupt is disabled
- * on the card it drives before calling this function. The function
- * does not return until any executing interrupts for this IRQ
- * have completed.
- *
- * This function may be called from interrupt context.
- *
- * Bugs: Attempting to free an irq in a handler for the same irq hangs
- * the machine.
++asmlinkage unsigned int do_IRQ(struct pt_regs *regs)
+ {
+ /*
+ * We ack quickly, we don't want the irq controller
+ * thinking we're snobs just because some other CPU has
+ * disabled global interrupts (we have already done the
+ * INT_ACK cycles, it's too late to try to pretend to the
+ * controller that we aren't taking the interrupt).
+ *
+ * 0 return value means that this irq is already being
+ * handled by some other CPU. (or is disabled)
+ */
++ int irq = regs->orig_eax & 0xff; /* high bits used in ret_from_ code */
+ int cpu = smp_processor_id();
+ irq_desc_t *desc = irq_desc + irq;
+ struct irqaction * action;
+ unsigned int status;
+ #ifdef CONFIG_DEBUG_STACKOVERFLOW
+ long esp;
+
+ /* Debugging check for stack overflow: is there less than 1KB free? */
+ __asm__ __volatile__("andl %%esp,%0" : "=r" (esp) : "0" (8191));
+ if (unlikely(esp < (sizeof(struct task_struct) + 1024))) {
+ extern void show_stack(unsigned long *);
+
+ printk("do_IRQ: stack overflow: %ld\n",
+ esp - sizeof(struct task_struct));
+ __asm__ __volatile__("movl %%esp,%0" : "=r" (esp));
+ show_stack((void *)esp);
+ }
+ #endif
+
+ kstat.irqs[cpu][irq]++;
+ spin_lock(&desc->lock);
+ desc->handler->ack(irq);
+ /*
+ REPLAY is when Linux resends an IRQ that was dropped earlier
+ WAITING is used by probe to mark irqs that are being tested
+ */
+ status = desc->status & ~(IRQ_REPLAY | IRQ_WAITING);
+ status |= IRQ_PENDING; /* we _want_ to handle it */
+
+ /*
+ * If the IRQ is disabled for whatever reason, we cannot
+ * use the action we have.
+ */
+ action = NULL;
+ if (!(status & (IRQ_DISABLED | IRQ_INPROGRESS))) {
+ action = desc->action;
+ status &= ~IRQ_PENDING; /* we commit to handling */
+ status |= IRQ_INPROGRESS; /* we are handling it */
+ }
+ desc->status = status;
+
+ /*
+ * If there is no IRQ handler or it was disabled, exit early.
+ Since we set PENDING, if another processor is handling
+ a different instance of this same irq, the other processor
+ will take care of it.
+ */
+ if (!action)
+ goto out;
+
+ /*
+ * Edge triggered interrupts need to remember
+ * pending events.
+ * This applies to any hw interrupts that allow a second
+ * instance of the same irq to arrive while we are in do_IRQ
+ * or in the handler. But the code here only handles the _second_
+ * instance of the irq, not the third or fourth. So it is mostly
+ * useful for irq hardware that does not mask cleanly in an
+ * SMP environment.
+ */
+ for (;;) {
+ spin_unlock(&desc->lock);
+ handle_IRQ_event(irq, regs, action);
+ spin_lock(&desc->lock);
+
+ if (!(desc->status & IRQ_PENDING))
+ break;
+ desc->status &= ~IRQ_PENDING;
+ }
+ desc->status &= ~IRQ_INPROGRESS;
+ out:
+ /*
+ * The ->end() handler has to deal with interrupts which got
+ * disabled while the handler was running.
+ */
+ desc->handler->end(irq);
+ spin_unlock(&desc->lock);
+
+ if (softirq_pending(cpu))
+ do_softirq();
+ return 1;
+ }
+
+ /**
+ * request_irq - allocate an interrupt line
+ * @irq: Interrupt line to allocate
+ * @handler: Function to be called when the IRQ occurs
+ * @irqflags: Interrupt type flags
+ * @devname: An ascii name for the claiming device
+ * @dev_id: A cookie passed back to the handler function
+ *
+ * This call allocates interrupt resources and enables the
+ * interrupt line and IRQ handling. From the point this
+ * call is made your handler function may be invoked. Since
+ * your handler function must clear any interrupt the board
+ * raises, you must take care both to initialise your hardware
+ * and to set up the interrupt handler in the right order.
+ *
+ * Dev_id must be globally unique. Normally the address of the
+ * device data structure is used as the cookie. Since the handler
+ * receives this value it makes sense to use it.
+ *
+ * If your interrupt is shared you must pass a non NULL dev_id
+ * as this is required when freeing the interrupt.
+ *
+ * Flags:
+ *
+ * SA_SHIRQ Interrupt is shared
+ *
+ * SA_INTERRUPT Disable local interrupts while processing
+ *
+ * SA_SAMPLE_RANDOM The interrupt can be used for entropy
+ *
+ */
+
+ int request_irq(unsigned int irq,
+ void (*handler)(int, void *, struct pt_regs *),
+ unsigned long irqflags,
+ const char * devname,
+ void *dev_id)
+ {
+ int retval;
+ struct irqaction * action;
+
+ #if 1
+ /*
+ * Sanity-check: shared interrupts should REALLY pass in
+ * a real dev-ID, otherwise we'll have trouble later trying
+ * to figure out which interrupt is which (messes up the
+ * interrupt freeing logic etc).
+ */
+ if (irqflags & SA_SHIRQ) {
+ if (!dev_id)
+ printk("Bad boy: %s (at 0x%x) called us without a dev_id!\n", devname, (&irq)[-1]);
+ }
+ #endif
+
+ if (irq >= NR_IRQS)
+ return -EINVAL;
+ if (!handler)
+ return -EINVAL;
+
+ action = (struct irqaction *)
+ kmalloc(sizeof(struct irqaction), GFP_KERNEL);
+ if (!action)
+ return -ENOMEM;
+
+ action->handler = handler;
+ action->flags = irqflags;
+ action->mask = 0;
+ action->name = devname;
+ action->next = NULL;
+ action->dev_id = dev_id;
+
+ retval = setup_irq(irq, action);
+ if (retval)
+ kfree(action);
+ return retval;
+ }
+
-
-void free_irq(unsigned int irq, void *dev_id)
++/*
++ * Internal function to unregister an irqaction - typically used to
++ * deallocate special interrupts that are part of the architecture.
+ */
- return;
++int teardown_irq(unsigned int irq, struct irqaction * old)
+ {
+ irq_desc_t *desc;
+ struct irqaction **p;
+ unsigned long flags;
+
+ if (irq >= NR_IRQS)
- if (action->dev_id != dev_id)
++ return -ENOENT;
+
+ desc = irq_desc + irq;
+ spin_lock_irqsave(&desc->lock,flags);
+ p = &desc->action;
+ for (;;) {
+ struct irqaction * action = *p;
+ if (action) {
+ struct irqaction **pp = p;
+ p = &action->next;
-#define SA_STATIC_ACTION 0x01000000 /* Is it our duty to free the action? */
- if (!(action->flags & SA_STATIC_ACTION))
- kfree(action);
- return;
++ if (action != old)
+ continue;
+
+ /* Found it - now remove it from the list of entries */
+ *pp = action->next;
+ if (!desc->action) {
+ desc->status |= IRQ_DISABLED;
+ desc->handler->shutdown(irq);
+ }
+ spin_unlock_irqrestore(&desc->lock,flags);
+
+ #ifdef CONFIG_SMP
+ /* Wait to make sure it's not being used on another CPU */
+ while (desc->status & IRQ_INPROGRESS) {
+ barrier();
+ cpu_relax();
+ }
+ #endif
++ return 0;
+ }
+ printk("Trying to free free IRQ%d\n",irq);
+ spin_unlock_irqrestore(&desc->lock,flags);
++ return -ENOENT;
++ }
++}
++
++/**
++ * free_irq - free an interrupt
++ * @irq: Interrupt line to free
++ * @dev_id: Device identity to free
++ *
++ * Remove an interrupt handler. The handler is removed and if the
++ * interrupt line is no longer in use by any driver it is disabled.
++ * On a shared IRQ the caller must ensure the interrupt is disabled
++ * on the card it drives before calling this function. The function
++ * does not return until any executing interrupts for this IRQ
++ * have completed.
++ *
++ * This function may be called from interrupt context.
++ *
++ * Bugs: Attempting to free an irq in a handler for the same irq hangs
++ * the machine.
++ */
++
++void free_irq(unsigned int irq, void *dev_id)
++{
++ irq_desc_t *desc;
++ struct irqaction *action;
++ unsigned long flags;
++
++ if (irq >= NR_IRQS)
++ return;
++
++ desc = irq_desc + irq;
++ spin_lock_irqsave(&desc->lock,flags);
++ for (action = desc->action; action != NULL; action = action->next) {
++ if (action->dev_id != dev_id)
++ continue;
++
++ spin_unlock_irqrestore(&desc->lock,flags);
++
++ if (teardown_irq(irq, action) == 0)
++ kfree(action);
+ return;
+ }
++ printk("Trying to free free IRQ%d\n",irq);
++ spin_unlock_irqrestore(&desc->lock,flags);
++ return;
+ }
+
+ /*
+ * IRQ autodetection code..
+ *
+ * This depends on the fact that any interrupt that
+ * comes in on to an unassigned handler will get stuck
+ * with "IRQ_WAITING" cleared and the interrupt
+ * disabled.
+ */
+
+ static DECLARE_MUTEX(probe_sem);
+
+ /**
+ * probe_irq_on - begin an interrupt autodetect
+ *
+ * Commence probing for an interrupt. The interrupts are scanned
+ * and a mask of potential interrupt lines is returned.
+ *
+ */
+
+ unsigned long probe_irq_on(void)
+ {
+ unsigned int i;
+ irq_desc_t *desc;
+ unsigned long val;
+ unsigned long delay;
+
+ down(&probe_sem);
+ /*
+ * something may have generated an irq long ago and we want to
+ * flush such a longstanding irq before considering it as spurious.
+ */
+ for (i = NR_PIRQS-1; i > 0; i--) {
+ desc = irq_desc + i;
+
+ spin_lock_irq(&desc->lock);
+ if (!irq_desc[i].action)
+ irq_desc[i].handler->startup(i);
+ spin_unlock_irq(&desc->lock);
+ }
+
+ /* Wait for longstanding interrupts to trigger. */
+ for (delay = jiffies + HZ/50; time_after(delay, jiffies); )
+ /* about 20ms delay */ synchronize_irq();
+
+ /*
+ * enable any unassigned irqs
+ * (we must startup again here because if a longstanding irq
+ * happened in the previous stage, it may have masked itself)
+ */
+ for (i = NR_PIRQS-1; i > 0; i--) {
+ desc = irq_desc + i;
+
+ spin_lock_irq(&desc->lock);
+ if (!desc->action) {
+ desc->status |= IRQ_AUTODETECT | IRQ_WAITING;
+ if (desc->handler->startup(i))
+ desc->status |= IRQ_PENDING;
+ }
+ spin_unlock_irq(&desc->lock);
+ }
+
+ /*
+ * Wait for spurious interrupts to trigger
+ */
+ for (delay = jiffies + HZ/10; time_after(delay, jiffies); )
+ /* about 100ms delay */ synchronize_irq();
+
+ /*
+ * Now filter out any obviously spurious interrupts
+ */
+ val = 0;
+ for (i = 0; i < NR_PIRQS; i++) {
+ irq_desc_t *desc = irq_desc + i;
+ unsigned int status;
+
+ spin_lock_irq(&desc->lock);
+ status = desc->status;
+
+ if (status & IRQ_AUTODETECT) {
+ /* It triggered already - consider it spurious. */
+ if (!(status & IRQ_WAITING)) {
+ desc->status = status & ~IRQ_AUTODETECT;
+ desc->handler->shutdown(i);
+ } else
+ if (i < 32)
+ val |= 1 << i;
+ }
+ spin_unlock_irq(&desc->lock);
+ }
+
+ return val;
+ }
+
+ /*
+ * Return a mask of triggered interrupts (this
+ * can handle only legacy ISA interrupts).
+ */
+
+ /**
+ * probe_irq_mask - scan a bitmap of interrupt lines
+ * @val: mask of interrupts to consider
+ *
+ * Scan the ISA bus interrupt lines and return a bitmap of
+ * active interrupts. The interrupt probe logic state is then
+ * returned to its previous value.
+ *
+ * Note: we need to scan all the irq's even though we will
+ * only return ISA irq numbers - just so that we reset them
+ * all to a known state.
+ */
+ unsigned int probe_irq_mask(unsigned long val)
+ {
+ int i;
+ unsigned int mask;
+
+ mask = 0;
+ for (i = 0; i < NR_PIRQS; i++) {
+ irq_desc_t *desc = irq_desc + i;
+ unsigned int status;
+
+ spin_lock_irq(&desc->lock);
+ status = desc->status;
+
+ if (status & IRQ_AUTODETECT) {
+ if (i < 16 && !(status & IRQ_WAITING))
+ mask |= 1 << i;
+
+ desc->status = status & ~IRQ_AUTODETECT;
+ desc->handler->shutdown(i);
+ }
+ spin_unlock_irq(&desc->lock);
+ }
+ up(&probe_sem);
+
+ return mask & val;
+ }
+
+ /*
+ * Return the one interrupt that triggered (this can
+ * handle any interrupt source).
+ */
+
+ /**
+ * probe_irq_off - end an interrupt autodetect
+ * @val: mask of potential interrupts (unused)
+ *
+ * Scans the unused interrupt lines and returns the line which
+ * appears to have triggered the interrupt. If no interrupt was
+ * found then zero is returned. If more than one interrupt is
+ * found then minus the first candidate is returned to indicate
+ * their is doubt.
+ *
+ * The interrupt probe logic state is returned to its previous
+ * value.
+ *
+ * BUGS: When used in a module (which arguably shouldnt happen)
+ * nothing prevents two IRQ probe callers from overlapping. The
+ * results of this are non-optimal.
+ */
+
+ int probe_irq_off(unsigned long val)
+ {
+ int i, irq_found, nr_irqs;
+
+ nr_irqs = 0;
+ irq_found = 0;
+ for (i = 0; i < NR_PIRQS; i++) {
+ irq_desc_t *desc = irq_desc + i;
+ unsigned int status;
+
+ spin_lock_irq(&desc->lock);
+ status = desc->status;
+
+ if (status & IRQ_AUTODETECT) {
+ if (!(status & IRQ_WAITING)) {
+ if (!nr_irqs)
+ irq_found = i;
+ nr_irqs++;
+ }
+ desc->status = status & ~IRQ_AUTODETECT;
+ desc->handler->shutdown(i);
+ }
+ spin_unlock_irq(&desc->lock);
+ }
+ up(&probe_sem);
+
+ if (nr_irqs > 1)
+ irq_found = -irq_found;
+ return irq_found;
+ }
+
+ /* this was setup_x86_irq but it seems pretty generic */
+ int setup_irq(unsigned int irq, struct irqaction * new)
+ {
+ int shared = 0;
+ unsigned long flags;
+ struct irqaction *old, **p;
+ irq_desc_t *desc = irq_desc + irq;
+
+ /*
+ * Some drivers like serial.c use request_irq() heavily,
+ * so we have to be careful not to interfere with a
+ * running system.
+ */
+ if (new->flags & SA_SAMPLE_RANDOM) {
+ /*
+ * This function might sleep, we want to call it first,
+ * outside of the atomic block.
+ * Yes, this might clear the entropy pool if the wrong
+ * driver is attempted to be loaded, without actually
+ * installing a new handler, but is this really a problem,
+ * only the sysadmin is able to do this.
+ */
+ rand_initialize_irq(irq);
+ }
+
+ /*
+ * The following block of code has to be executed atomically
+ */
+ spin_lock_irqsave(&desc->lock,flags);
+ p = &desc->action;
+ if ((old = *p) != NULL) {
+ /* Can't share interrupts unless both agree to */
+ if (!(old->flags & new->flags & SA_SHIRQ)) {
+ spin_unlock_irqrestore(&desc->lock,flags);
+ return -EBUSY;
+ }
+
+ /* add new interrupt at end of irq queue */
+ do {
+ p = &old->next;
+ old = *p;
+ } while (old);
+ shared = 1;
+ }
+
+ *p = new;
+
+ if (!shared) {
+ desc->depth = 0;
+ desc->status &= ~(IRQ_DISABLED | IRQ_AUTODETECT | IRQ_WAITING | IRQ_INPROGRESS);
+ desc->handler->startup(irq);
+ }
+ spin_unlock_irqrestore(&desc->lock,flags);
+
+ register_irq_proc(irq);
+ return 0;
+ }
+
+ static struct proc_dir_entry * root_irq_dir;
+ static struct proc_dir_entry * irq_dir [NR_IRQS];
+
+ #define HEX_DIGITS 8
+
+ static unsigned int parse_hex_value (const char *buffer,
+ unsigned long count, unsigned long *ret)
+ {
+ unsigned char hexnum [HEX_DIGITS];
+ unsigned long value;
+ int i;
+
+ if (!count)
+ return -EINVAL;
+ if (count > HEX_DIGITS)
+ count = HEX_DIGITS;
+ if (copy_from_user(hexnum, buffer, count))
+ return -EFAULT;
+
+ /*
+ * Parse the first 8 characters as a hex string, any non-hex char
+ * is end-of-string. '00e1', 'e1', '00E1', 'E1' are all the same.
+ */
+ value = 0;
+
+ for (i = 0; i < count; i++) {
+ unsigned int c = hexnum[i];
+
+ switch (c) {
+ case '0' ... '9': c -= '0'; break;
+ case 'a' ... 'f': c -= 'a'-10; break;
+ case 'A' ... 'F': c -= 'A'-10; break;
+ default:
+ goto out;
+ }
+ value = (value << 4) | c;
+ }
+ out:
+ *ret = value;
+ return 0;
+ }
+
+ #if CONFIG_SMP
+
+ static struct proc_dir_entry * smp_affinity_entry [NR_IRQS];
+
+ static unsigned long irq_affinity [NR_IRQS] = { [0 ... NR_IRQS-1] = ~0UL };
+ static int irq_affinity_read_proc (char *page, char **start, off_t off,
+ int count, int *eof, void *data)
+ {
+ if (count < HEX_DIGITS+1)
+ return -EINVAL;
+ return sprintf (page, "%08lx\n", irq_affinity[(long)data]);
+ }
+
+ static int irq_affinity_write_proc (struct file *file, const char *buffer,
+ unsigned long count, void *data)
+ {
+ int irq = (long) data, full_count = count, err;
+ unsigned long new_value;
+
+ if (!irq_desc[irq].handler->set_affinity)
+ return -EIO;
+
+ err = parse_hex_value(buffer, count, &new_value);
+
+ /*
+ * Do not allow disabling IRQs completely - it's a too easy
+ * way to make the system unusable accidentally :-) At least
+ * one online CPU still has to be targeted.
+ */
+ if (!(new_value & cpu_online_map))
+ return -EINVAL;
+
+ irq_affinity[irq] = new_value;
+ irq_desc[irq].handler->set_affinity(irq, new_value);
+
+ return full_count;
+ }
+
+ #endif
+
+ static int prof_cpu_mask_read_proc (char *page, char **start, off_t off,
+ int count, int *eof, void *data)
+ {
+ unsigned long *mask = (unsigned long *) data;
+ if (count < HEX_DIGITS+1)
+ return -EINVAL;
+ return sprintf (page, "%08lx\n", *mask);
+ }
+
+ static int prof_cpu_mask_write_proc (struct file *file, const char *buffer,
+ unsigned long count, void *data)
+ {
+ unsigned long *mask = (unsigned long *) data, full_count = count, err;
+ unsigned long new_value;
+
+ err = parse_hex_value(buffer, count, &new_value);
+ if (err)
+ return err;
+
+ *mask = new_value;
+ return full_count;
+ }
+
+ #define MAX_NAMELEN 10
+
+ static void register_irq_proc (unsigned int irq)
+ {
+ char name [MAX_NAMELEN];
+
+ if (!root_irq_dir || (irq_desc[irq].handler == &no_irq_type) ||
+ irq_dir[irq])
+ return;
+
+ memset(name, 0, MAX_NAMELEN);
+ sprintf(name, "%d", irq);
+
+ /* create /proc/irq/1234 */
+ irq_dir[irq] = proc_mkdir(name, root_irq_dir);
+
+ #if CONFIG_SMP
+ {
+ struct proc_dir_entry *entry;
+
+ /* create /proc/irq/1234/smp_affinity */
+ entry = create_proc_entry("smp_affinity", 0600, irq_dir[irq]);
+
+ if (entry) {
+ entry->nlink = 1;
+ entry->data = (void *)(long)irq;
+ entry->read_proc = irq_affinity_read_proc;
+ entry->write_proc = irq_affinity_write_proc;
+ }
+
+ smp_affinity_entry[irq] = entry;
+ }
+ #endif
+ }
+
+ unsigned long prof_cpu_mask = -1;
+
+ void init_irq_proc (void)
+ {
+ struct proc_dir_entry *entry;
+ int i;
+
+ /* create /proc/irq */
+ root_irq_dir = proc_mkdir("irq", 0);
+
+ /* create /proc/irq/prof_cpu_mask */
+ entry = create_proc_entry("prof_cpu_mask", 0600, root_irq_dir);
+
+ if (!entry)
+ return;
+
+ entry->nlink = 1;
+ entry->data = (void *)&prof_cpu_mask;
+ entry->read_proc = prof_cpu_mask_read_proc;
+ entry->write_proc = prof_cpu_mask_write_proc;
+
+ /*
+ * Create entries for all existing IRQs.
+ */
+ for (i = 0; i < NR_IRQS; i++)
+ register_irq_proc(i);
+ }
+
--- /dev/null
+#
+# Automatically generated make config: don't edit
+# Linux kernel version: 2.6.10-rc2-xen0
+# Fri Nov 19 20:16:38 2004
+#
+CONFIG_XEN=y
+CONFIG_ARCH_XEN=y
+CONFIG_NO_IDLE_HZ=y
+
+#
+# XEN
+#
+CONFIG_XEN_PRIVILEGED_GUEST=y
+CONFIG_XEN_PHYSDEV_ACCESS=y
+CONFIG_XEN_BLKDEV_BACKEND=y
+CONFIG_XEN_NETDEV_BACKEND=y
+CONFIG_XEN_BLKDEV_FRONTEND=y
+CONFIG_XEN_NETDEV_FRONTEND=y
+# CONFIG_XEN_NETDEV_FRONTEND_PIPELINED_TRANSMITTER is not set
+CONFIG_XEN_WRITABLE_PAGETABLES=y
+CONFIG_XEN_SCRUB_PAGES=y
+CONFIG_HAVE_ARCH_DEV_ALLOC_SKB=y
+CONFIG_X86=y
+# CONFIG_X86_64 is not set
+
+#
+# Code maturity level options
+#
+CONFIG_EXPERIMENTAL=y
+# CONFIG_CLEAN_COMPILE is not set
+CONFIG_BROKEN=y
+CONFIG_BROKEN_ON_SMP=y
+CONFIG_LOCK_KERNEL=y
+
+#
+# General setup
+#
+CONFIG_LOCALVERSION=""
+CONFIG_SWAP=y
+CONFIG_SYSVIPC=y
+# CONFIG_POSIX_MQUEUE is not set
+# CONFIG_BSD_PROCESS_ACCT is not set
+CONFIG_SYSCTL=y
+# CONFIG_AUDIT is not set
+CONFIG_LOG_BUF_SHIFT=14
+CONFIG_HOTPLUG=y
+CONFIG_KOBJECT_UEVENT=y
+# CONFIG_IKCONFIG is not set
+# CONFIG_EMBEDDED is not set
+CONFIG_KALLSYMS=y
+# CONFIG_KALLSYMS_ALL is not set
+# CONFIG_KALLSYMS_EXTRA_PASS is not set
+CONFIG_FUTEX=y
+CONFIG_EPOLL=y
+# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
+CONFIG_SHMEM=y
+CONFIG_CC_ALIGN_FUNCTIONS=0
+CONFIG_CC_ALIGN_LABELS=0
+CONFIG_CC_ALIGN_LOOPS=0
+CONFIG_CC_ALIGN_JUMPS=0
+# CONFIG_TINY_SHMEM is not set
+
+#
+# Loadable module support
+#
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+# CONFIG_MODULE_FORCE_UNLOAD is not set
+CONFIG_OBSOLETE_MODPARM=y
+# CONFIG_MODVERSIONS is not set
+# CONFIG_MODULE_SRCVERSION_ALL is not set
+CONFIG_KMOD=y
+
+#
+# X86 Processor Configuration
+#
+CONFIG_XENARCH="i386"
+CONFIG_MMU=y
+CONFIG_UID16=y
+CONFIG_GENERIC_ISA_DMA=y
+CONFIG_GENERIC_IOMAP=y
+# CONFIG_M686 is not set
+# CONFIG_MPENTIUMII is not set
+# CONFIG_MPENTIUMIII is not set
+# CONFIG_MPENTIUMM is not set
+CONFIG_MPENTIUM4=y
+# CONFIG_MK6 is not set
+# CONFIG_MK7 is not set
+# CONFIG_MK8 is not set
+# CONFIG_MCRUSOE is not set
+# CONFIG_MCYRIXIII is not set
+# CONFIG_MVIAC3_2 is not set
+# CONFIG_X86_GENERIC is not set
+CONFIG_X86_CMPXCHG=y
+CONFIG_X86_XADD=y
+CONFIG_X86_L1_CACHE_SHIFT=7
+CONFIG_RWSEM_XCHGADD_ALGORITHM=y
+CONFIG_X86_WP_WORKS_OK=y
+CONFIG_X86_INVLPG=y
+CONFIG_X86_BSWAP=y
+CONFIG_X86_POPAD_OK=y
+CONFIG_X86_GOOD_APIC=y
+CONFIG_X86_INTEL_USERCOPY=y
+CONFIG_X86_USE_PPRO_CHECKSUM=y
+# CONFIG_HPET_TIMER is not set
+# CONFIG_HPET_EMULATE_RTC is not set
+# CONFIG_SMP is not set
+CONFIG_PREEMPT=y
+CONFIG_X86_CPUID=y
+
+#
+# Firmware Drivers
+#
+# CONFIG_EDD is not set
+CONFIG_NOHIGHMEM=y
+# CONFIG_HIGHMEM4G is not set
++CONFIG_MTRR=y
+CONFIG_HAVE_DEC_LOCK=y
+# CONFIG_REGPARM is not set
+
+#
+# Bus options (PCI, PCMCIA, EISA, MCA, ISA)
+#
+CONFIG_PCI=y
+CONFIG_PCI_DIRECT=y
+CONFIG_PCI_LEGACY_PROC=y
+# CONFIG_PCI_NAMES is not set
+CONFIG_ISA=y
+# CONFIG_EISA is not set
+# CONFIG_MCA is not set
+# CONFIG_SCx200 is not set
+
+#
+# PCCARD (PCMCIA/CardBus) support
+#
+# CONFIG_PCCARD is not set
+
+#
+# PC-card bridges
+#
+CONFIG_PCMCIA_PROBE=y
+
+#
+# PCI Hotplug Support
+#
+# CONFIG_HOTPLUG_PCI is not set
+
+#
+# Kernel hacking
+#
+CONFIG_DEBUG_KERNEL=y
+CONFIG_EARLY_PRINTK=y
+# CONFIG_DEBUG_STACKOVERFLOW is not set
+# CONFIG_DEBUG_STACK_USAGE is not set
+# CONFIG_DEBUG_SLAB is not set
+CONFIG_MAGIC_SYSRQ=y
+# CONFIG_DEBUG_SPINLOCK is not set
+# CONFIG_DEBUG_PAGEALLOC is not set
+# CONFIG_DEBUG_INFO is not set
+# CONFIG_DEBUG_SPINLOCK_SLEEP is not set
+# CONFIG_FRAME_POINTER is not set
+# CONFIG_4KSTACKS is not set
+CONFIG_GENERIC_HARDIRQS=y
+CONFIG_GENERIC_IRQ_PROBE=y
+CONFIG_X86_BIOS_REBOOT=y
+CONFIG_PC=y
+
+#
+# Executable file formats
+#
+CONFIG_BINFMT_ELF=y
+# CONFIG_BINFMT_AOUT is not set
+# CONFIG_BINFMT_MISC is not set
+
+#
+# Device Drivers
+#
+
+#
+# Generic Driver Options
+#
+# CONFIG_STANDALONE is not set
+CONFIG_PREVENT_FIRMWARE_BUILD=y
+# CONFIG_FW_LOADER is not set
+# CONFIG_DEBUG_DRIVER is not set
+
+#
+# Memory Technology Devices (MTD)
+#
+# CONFIG_MTD is not set
+
+#
+# Parallel port support
+#
+# CONFIG_PARPORT is not set
+
+#
+# Plug and Play support
+#
+# CONFIG_PNP is not set
+
+#
+# Block devices
+#
+CONFIG_BLK_DEV_FD=y
+# CONFIG_BLK_DEV_XD is not set
+# CONFIG_BLK_CPQ_DA is not set
+CONFIG_BLK_CPQ_CISS_DA=y
+# CONFIG_CISS_SCSI_TAPE is not set
+# CONFIG_BLK_DEV_DAC960 is not set
+# CONFIG_BLK_DEV_UMEM is not set
+CONFIG_BLK_DEV_LOOP=y
+# CONFIG_BLK_DEV_CRYPTOLOOP is not set
+# CONFIG_BLK_DEV_NBD is not set
+# CONFIG_BLK_DEV_SX8 is not set
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_SIZE=4096
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_INITRAMFS_SOURCE=""
+# CONFIG_LBD is not set
+# CONFIG_CDROM_PKTCDVD is not set
+
+#
+# IO Schedulers
+#
+CONFIG_IOSCHED_NOOP=y
+CONFIG_IOSCHED_AS=y
+CONFIG_IOSCHED_DEADLINE=y
+CONFIG_IOSCHED_CFQ=y
+
+#
+# ATA/ATAPI/MFM/RLL support
+#
+CONFIG_IDE=y
+CONFIG_BLK_DEV_IDE=y
+
+#
+# Please see Documentation/ide.txt for help/info on IDE drives
+#
+# CONFIG_BLK_DEV_IDE_SATA is not set
+# CONFIG_BLK_DEV_HD_IDE is not set
+CONFIG_BLK_DEV_IDEDISK=y
+# CONFIG_IDEDISK_MULTI_MODE is not set
+CONFIG_BLK_DEV_IDECD=y
+# CONFIG_BLK_DEV_IDETAPE is not set
+# CONFIG_BLK_DEV_IDEFLOPPY is not set
+# CONFIG_BLK_DEV_IDESCSI is not set
+# CONFIG_IDE_TASK_IOCTL is not set
+
+#
+# IDE chipset support/bugfixes
+#
+CONFIG_IDE_GENERIC=y
+# CONFIG_BLK_DEV_CMD640 is not set
+CONFIG_BLK_DEV_IDEPCI=y
+# CONFIG_IDEPCI_SHARE_IRQ is not set
+# CONFIG_BLK_DEV_OFFBOARD is not set
+CONFIG_BLK_DEV_GENERIC=y
+# CONFIG_BLK_DEV_OPTI621 is not set
+# CONFIG_BLK_DEV_RZ1000 is not set
+CONFIG_BLK_DEV_IDEDMA_PCI=y
+# CONFIG_BLK_DEV_IDEDMA_FORCED is not set
+CONFIG_IDEDMA_PCI_AUTO=y
+# CONFIG_IDEDMA_ONLYDISK is not set
+# CONFIG_BLK_DEV_AEC62XX is not set
+# CONFIG_BLK_DEV_ALI15X3 is not set
+# CONFIG_BLK_DEV_AMD74XX is not set
+# CONFIG_BLK_DEV_ATIIXP is not set
+# CONFIG_BLK_DEV_CMD64X is not set
+# CONFIG_BLK_DEV_TRIFLEX is not set
+# CONFIG_BLK_DEV_CY82C693 is not set
+# CONFIG_BLK_DEV_CS5520 is not set
+# CONFIG_BLK_DEV_CS5530 is not set
+# CONFIG_BLK_DEV_HPT34X is not set
+# CONFIG_BLK_DEV_HPT366 is not set
+# CONFIG_BLK_DEV_SC1200 is not set
+CONFIG_BLK_DEV_PIIX=y
+# CONFIG_BLK_DEV_NS87415 is not set
+# CONFIG_BLK_DEV_PDC202XX_OLD is not set
+# CONFIG_BLK_DEV_PDC202XX_NEW is not set
+CONFIG_BLK_DEV_SVWKS=y
+# CONFIG_BLK_DEV_SIIMAGE is not set
+# CONFIG_BLK_DEV_SIS5513 is not set
+# CONFIG_BLK_DEV_SLC90E66 is not set
+# CONFIG_BLK_DEV_TRM290 is not set
+# CONFIG_BLK_DEV_VIA82CXXX is not set
+# CONFIG_IDE_ARM is not set
+# CONFIG_IDE_CHIPSETS is not set
+CONFIG_BLK_DEV_IDEDMA=y
+# CONFIG_IDEDMA_IVB is not set
+CONFIG_IDEDMA_AUTO=y
+# CONFIG_BLK_DEV_HD is not set
+
+#
+# SCSI device support
+#
+CONFIG_SCSI=y
+CONFIG_SCSI_PROC_FS=y
+
+#
+# SCSI support type (disk, tape, CD-ROM)
+#
+CONFIG_BLK_DEV_SD=y
+# CONFIG_CHR_DEV_ST is not set
+# CONFIG_CHR_DEV_OSST is not set
+# CONFIG_BLK_DEV_SR is not set
+# CONFIG_CHR_DEV_SG is not set
+
+#
+# Some SCSI devices (e.g. CD jukebox) support multiple LUNs
+#
+# CONFIG_SCSI_MULTI_LUN is not set
+# CONFIG_SCSI_CONSTANTS is not set
+# CONFIG_SCSI_LOGGING is not set
+
+#
+# SCSI Transport Attributes
+#
+# CONFIG_SCSI_SPI_ATTRS is not set
+# CONFIG_SCSI_FC_ATTRS is not set
+
+#
+# SCSI low-level drivers
+#
+CONFIG_BLK_DEV_3W_XXXX_RAID=y
+# CONFIG_SCSI_3W_9XXX is not set
+# CONFIG_SCSI_7000FASST is not set
+# CONFIG_SCSI_ACARD is not set
+# CONFIG_SCSI_AHA152X is not set
+# CONFIG_SCSI_AHA1542 is not set
+CONFIG_SCSI_AACRAID=y
+CONFIG_SCSI_AIC7XXX=y
+CONFIG_AIC7XXX_CMDS_PER_DEVICE=32
+CONFIG_AIC7XXX_RESET_DELAY_MS=15000
+CONFIG_AIC7XXX_DEBUG_ENABLE=y
+CONFIG_AIC7XXX_DEBUG_MASK=0
+CONFIG_AIC7XXX_REG_PRETTY_PRINT=y
+# CONFIG_SCSI_AIC7XXX_OLD is not set
+CONFIG_SCSI_AIC79XX=y
+CONFIG_AIC79XX_CMDS_PER_DEVICE=32
+CONFIG_AIC79XX_RESET_DELAY_MS=15000
+# CONFIG_AIC79XX_ENABLE_RD_STRM is not set
+CONFIG_AIC79XX_DEBUG_ENABLE=y
+CONFIG_AIC79XX_DEBUG_MASK=0
+CONFIG_AIC79XX_REG_PRETTY_PRINT=y
+# CONFIG_SCSI_DPT_I2O is not set
+# CONFIG_SCSI_ADVANSYS is not set
+# CONFIG_SCSI_IN2000 is not set
+# CONFIG_MEGARAID_NEWGEN is not set
+# CONFIG_MEGARAID_LEGACY is not set
+CONFIG_SCSI_SATA=y
+# CONFIG_SCSI_SATA_AHCI is not set
+# CONFIG_SCSI_SATA_SVW is not set
+CONFIG_SCSI_ATA_PIIX=y
+# CONFIG_SCSI_SATA_NV is not set
+CONFIG_SCSI_SATA_PROMISE=y
+CONFIG_SCSI_SATA_SX4=y
+CONFIG_SCSI_SATA_SIL=y
+# CONFIG_SCSI_SATA_SIS is not set
+# CONFIG_SCSI_SATA_ULI is not set
+# CONFIG_SCSI_SATA_VIA is not set
+# CONFIG_SCSI_SATA_VITESSE is not set
+CONFIG_SCSI_BUSLOGIC=y
+# CONFIG_SCSI_OMIT_FLASHPOINT is not set
+# CONFIG_SCSI_CPQFCTS is not set
+# CONFIG_SCSI_DMX3191D is not set
+# CONFIG_SCSI_DTC3280 is not set
+# CONFIG_SCSI_EATA is not set
+# CONFIG_SCSI_EATA_PIO is not set
+# CONFIG_SCSI_FUTURE_DOMAIN is not set
+# CONFIG_SCSI_GDTH is not set
+# CONFIG_SCSI_GENERIC_NCR5380 is not set
+# CONFIG_SCSI_GENERIC_NCR5380_MMIO is not set
+# CONFIG_SCSI_IPS is not set
+# CONFIG_SCSI_INITIO is not set
+# CONFIG_SCSI_INIA100 is not set
+# CONFIG_SCSI_NCR53C406A is not set
+# CONFIG_SCSI_SYM53C8XX_2 is not set
+# CONFIG_SCSI_IPR is not set
+# CONFIG_SCSI_PAS16 is not set
+# CONFIG_SCSI_PCI2000 is not set
+# CONFIG_SCSI_PCI2220I is not set
+# CONFIG_SCSI_PSI240I is not set
+# CONFIG_SCSI_QLOGIC_FAS is not set
+# CONFIG_SCSI_QLOGIC_ISP is not set
+# CONFIG_SCSI_QLOGIC_FC is not set
+# CONFIG_SCSI_QLOGIC_1280 is not set
+# CONFIG_SCSI_QLOGIC_1280_1040 is not set
+CONFIG_SCSI_QLA2XXX=y
+# CONFIG_SCSI_QLA21XX is not set
+# CONFIG_SCSI_QLA22XX is not set
+# CONFIG_SCSI_QLA2300 is not set
+# CONFIG_SCSI_QLA2322 is not set
+# CONFIG_SCSI_QLA6312 is not set
+# CONFIG_SCSI_QLA6322 is not set
+# CONFIG_SCSI_SEAGATE is not set
+# CONFIG_SCSI_SYM53C416 is not set
+# CONFIG_SCSI_DC395x is not set
+# CONFIG_SCSI_DC390T is not set
+# CONFIG_SCSI_T128 is not set
+# CONFIG_SCSI_U14_34F is not set
+# CONFIG_SCSI_ULTRASTOR is not set
+# CONFIG_SCSI_NSP32 is not set
+# CONFIG_SCSI_DEBUG is not set
+
+#
+# Old CD-ROM drivers (not SCSI, not IDE)
+#
+# CONFIG_CD_NO_IDESCSI is not set
+
+#
+# Multi-device support (RAID and LVM)
+#
+CONFIG_MD=y
+CONFIG_BLK_DEV_MD=y
+# CONFIG_MD_LINEAR is not set
+CONFIG_MD_RAID0=y
+CONFIG_MD_RAID1=y
+# CONFIG_MD_RAID10 is not set
+CONFIG_MD_RAID5=y
+# CONFIG_MD_RAID6 is not set
+# CONFIG_MD_MULTIPATH is not set
+# CONFIG_MD_FAULTY is not set
+CONFIG_BLK_DEV_DM=y
+# CONFIG_DM_CRYPT is not set
+CONFIG_DM_SNAPSHOT=y
+CONFIG_DM_MIRROR=y
+# CONFIG_DM_ZERO is not set
+
+#
+# Fusion MPT device support
+#
+# CONFIG_FUSION is not set
+
+#
+# IEEE 1394 (FireWire) support
+#
+# CONFIG_IEEE1394 is not set
+
+#
+# I2O device support
+#
+# CONFIG_I2O is not set
+
+#
+# Networking support
+#
+CONFIG_NET=y
+
+#
+# Networking options
+#
+CONFIG_PACKET=y
+# CONFIG_PACKET_MMAP is not set
+# CONFIG_NETLINK_DEV is not set
+CONFIG_UNIX=y
+# CONFIG_NET_KEY is not set
+CONFIG_INET=y
+# CONFIG_IP_MULTICAST is not set
+# CONFIG_IP_ADVANCED_ROUTER is not set
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+# CONFIG_IP_PNP_BOOTP is not set
+# CONFIG_IP_PNP_RARP is not set
+# CONFIG_NET_IPIP is not set
+# CONFIG_NET_IPGRE is not set
+# CONFIG_ARPD is not set
+# CONFIG_SYN_COOKIES is not set
+# CONFIG_INET_AH is not set
+# CONFIG_INET_ESP is not set
+# CONFIG_INET_IPCOMP is not set
+# CONFIG_INET_TUNNEL is not set
+CONFIG_IP_TCPDIAG=y
+# CONFIG_IP_TCPDIAG_IPV6 is not set
+
+#
+# IP: Virtual Server Configuration
+#
+# CONFIG_IP_VS is not set
+# CONFIG_IPV6 is not set
+CONFIG_NETFILTER=y
+# CONFIG_NETFILTER_DEBUG is not set
+CONFIG_BRIDGE_NETFILTER=y
+
+#
+# IP: Netfilter Configuration
+#
+CONFIG_IP_NF_CONNTRACK=m
+CONFIG_IP_NF_CT_ACCT=y
+# CONFIG_IP_NF_CONNTRACK_MARK is not set
+# CONFIG_IP_NF_CT_PROTO_SCTP is not set
+CONFIG_IP_NF_FTP=m
+# CONFIG_IP_NF_IRC is not set
+# CONFIG_IP_NF_TFTP is not set
+# CONFIG_IP_NF_AMANDA is not set
+# CONFIG_IP_NF_QUEUE is not set
+CONFIG_IP_NF_IPTABLES=m
+# CONFIG_IP_NF_MATCH_LIMIT is not set
+# CONFIG_IP_NF_MATCH_IPRANGE is not set
+# CONFIG_IP_NF_MATCH_MAC is not set
+# CONFIG_IP_NF_MATCH_PKTTYPE is not set
+# CONFIG_IP_NF_MATCH_MARK is not set
+# CONFIG_IP_NF_MATCH_MULTIPORT is not set
+# CONFIG_IP_NF_MATCH_TOS is not set
+# CONFIG_IP_NF_MATCH_RECENT is not set
+# CONFIG_IP_NF_MATCH_ECN is not set
+# CONFIG_IP_NF_MATCH_DSCP is not set
+# CONFIG_IP_NF_MATCH_AH_ESP is not set
+# CONFIG_IP_NF_MATCH_LENGTH is not set
+# CONFIG_IP_NF_MATCH_TTL is not set
+# CONFIG_IP_NF_MATCH_TCPMSS is not set
+# CONFIG_IP_NF_MATCH_HELPER is not set
+# CONFIG_IP_NF_MATCH_STATE is not set
+# CONFIG_IP_NF_MATCH_CONNTRACK is not set
+# CONFIG_IP_NF_MATCH_OWNER is not set
+# CONFIG_IP_NF_MATCH_PHYSDEV is not set
+# CONFIG_IP_NF_MATCH_ADDRTYPE is not set
+# CONFIG_IP_NF_MATCH_REALM is not set
+# CONFIG_IP_NF_MATCH_SCTP is not set
+# CONFIG_IP_NF_MATCH_COMMENT is not set
+# CONFIG_IP_NF_MATCH_HASHLIMIT is not set
+# CONFIG_IP_NF_FILTER is not set
+# CONFIG_IP_NF_TARGET_LOG is not set
+# CONFIG_IP_NF_TARGET_ULOG is not set
+# CONFIG_IP_NF_TARGET_TCPMSS is not set
+# CONFIG_IP_NF_NAT is not set
+# CONFIG_IP_NF_MANGLE is not set
+# CONFIG_IP_NF_RAW is not set
+# CONFIG_IP_NF_ARPTABLES is not set
+# CONFIG_IP_NF_COMPAT_IPCHAINS is not set
+# CONFIG_IP_NF_COMPAT_IPFWADM is not set
+
+#
+# Bridge: Netfilter Configuration
+#
+# CONFIG_BRIDGE_NF_EBTABLES is not set
+
+#
+# SCTP Configuration (EXPERIMENTAL)
+#
+# CONFIG_IP_SCTP is not set
+# CONFIG_ATM is not set
+CONFIG_BRIDGE=y
+# CONFIG_VLAN_8021Q is not set
+# CONFIG_DECNET is not set
+# CONFIG_LLC2 is not set
+# CONFIG_IPX is not set
+# CONFIG_ATALK is not set
+# CONFIG_X25 is not set
+# CONFIG_LAPB is not set
+# CONFIG_NET_DIVERT is not set
+# CONFIG_ECONET is not set
+# CONFIG_WAN_ROUTER is not set
+
+#
+# QoS and/or fair queueing
+#
+# CONFIG_NET_SCHED is not set
+# CONFIG_NET_CLS_ROUTE is not set
+
+#
+# Network testing
+#
+# CONFIG_NET_PKTGEN is not set
+# CONFIG_NETPOLL is not set
+# CONFIG_NET_POLL_CONTROLLER is not set
+# CONFIG_HAMRADIO is not set
+# CONFIG_IRDA is not set
+# CONFIG_BT is not set
+CONFIG_NETDEVICES=y
+# CONFIG_DUMMY is not set
+# CONFIG_BONDING is not set
+# CONFIG_EQUALIZER is not set
+# CONFIG_TUN is not set
+
+#
+# ARCnet devices
+#
+# CONFIG_ARCNET is not set
+
+#
+# Ethernet (10 or 100Mbit)
+#
+CONFIG_NET_ETHERNET=y
+CONFIG_MII=y
+# CONFIG_HAPPYMEAL is not set
+# CONFIG_SUNGEM is not set
+CONFIG_NET_VENDOR_3COM=y
+# CONFIG_EL1 is not set
+# CONFIG_EL2 is not set
+# CONFIG_ELPLUS is not set
+# CONFIG_EL16 is not set
+# CONFIG_EL3 is not set
+# CONFIG_3C515 is not set
+CONFIG_VORTEX=y
+# CONFIG_TYPHOON is not set
+# CONFIG_LANCE is not set
+# CONFIG_NET_VENDOR_SMC is not set
+# CONFIG_NET_VENDOR_RACAL is not set
+
+#
+# Tulip family network device support
+#
+CONFIG_NET_TULIP=y
+# CONFIG_DE2104X is not set
+CONFIG_TULIP=y
+# CONFIG_TULIP_MWI is not set
+# CONFIG_TULIP_MMIO is not set
+# CONFIG_TULIP_NAPI is not set
+# CONFIG_DE4X5 is not set
+# CONFIG_WINBOND_840 is not set
+# CONFIG_DM9102 is not set
+# CONFIG_AT1700 is not set
+# CONFIG_DEPCA is not set
+# CONFIG_HP100 is not set
+# CONFIG_NET_ISA is not set
+CONFIG_NET_PCI=y
+CONFIG_PCNET32=y
+# CONFIG_AMD8111_ETH is not set
+# CONFIG_ADAPTEC_STARFIRE is not set
+# CONFIG_AC3200 is not set
+# CONFIG_APRICOT is not set
+# CONFIG_B44 is not set
+# CONFIG_FORCEDETH is not set
+# CONFIG_CS89x0 is not set
+# CONFIG_DGRS is not set
+# CONFIG_EEPRO100 is not set
+CONFIG_E100=y
+# CONFIG_E100_NAPI is not set
+# CONFIG_FEALNX is not set
+# CONFIG_NATSEMI is not set
+# CONFIG_NE2K_PCI is not set
+# CONFIG_8139CP is not set
+CONFIG_8139TOO=y
+CONFIG_8139TOO_PIO=y
+# CONFIG_8139TOO_TUNE_TWISTER is not set
+# CONFIG_8139TOO_8129 is not set
+# CONFIG_8139_OLD_RX_RESET is not set
+# CONFIG_SIS900 is not set
+# CONFIG_EPIC100 is not set
+# CONFIG_SUNDANCE is not set
+# CONFIG_TLAN is not set
+CONFIG_VIA_RHINE=y
+# CONFIG_VIA_RHINE_MMIO is not set
+# CONFIG_NET_POCKET is not set
+
+#
+# Ethernet (1000 Mbit)
+#
+CONFIG_ACENIC=y
+# CONFIG_ACENIC_OMIT_TIGON_I is not set
+# CONFIG_DL2K is not set
+CONFIG_E1000=y
+# CONFIG_E1000_NAPI is not set
+# CONFIG_NS83820 is not set
+# CONFIG_HAMACHI is not set
+# CONFIG_YELLOWFIN is not set
+# CONFIG_R8169 is not set
+# CONFIG_SK98LIN is not set
+# CONFIG_VIA_VELOCITY is not set
+CONFIG_TIGON3=y
+
+#
+# Ethernet (10000 Mbit)
+#
+# CONFIG_IXGB is not set
+# CONFIG_S2IO is not set
+
+#
+# Token Ring devices
+#
+# CONFIG_TR is not set
+
+#
+# Wireless LAN (non-hamradio)
+#
+# CONFIG_NET_RADIO is not set
+
+#
+# Wan interfaces
+#
+# CONFIG_WAN is not set
+# CONFIG_FDDI is not set
+# CONFIG_HIPPI is not set
+# CONFIG_PPP is not set
+# CONFIG_SLIP is not set
+# CONFIG_NET_FC is not set
+# CONFIG_SHAPER is not set
+# CONFIG_NETCONSOLE is not set
+
+#
+# ISDN subsystem
+#
+# CONFIG_ISDN is not set
+
+#
+# Telephony Support
+#
+# CONFIG_PHONE is not set
+
+#
+# Input device support
+#
+CONFIG_INPUT=y
+
+#
+# Userland interfaces
+#
+CONFIG_INPUT_MOUSEDEV=y
+CONFIG_INPUT_MOUSEDEV_PSAUX=y
+CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024
+CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768
+# CONFIG_INPUT_JOYDEV is not set
+# CONFIG_INPUT_TSDEV is not set
+# CONFIG_INPUT_EVDEV is not set
+# CONFIG_INPUT_EVBUG is not set
+
+#
+# Input I/O drivers
+#
+# CONFIG_GAMEPORT is not set
+CONFIG_SOUND_GAMEPORT=y
+CONFIG_SERIO=y
+CONFIG_SERIO_I8042=y
+CONFIG_SERIO_SERPORT=y
+# CONFIG_SERIO_CT82C710 is not set
+# CONFIG_SERIO_PCIPS2 is not set
+# CONFIG_SERIO_RAW is not set
+
+#
+# Input Device Drivers
+#
+CONFIG_INPUT_KEYBOARD=y
+CONFIG_KEYBOARD_ATKBD=y
+# CONFIG_KEYBOARD_SUNKBD is not set
+# CONFIG_KEYBOARD_LKKBD is not set
+# CONFIG_KEYBOARD_XTKBD is not set
+# CONFIG_KEYBOARD_NEWTON is not set
+CONFIG_INPUT_MOUSE=y
+CONFIG_MOUSE_PS2=y
+# CONFIG_MOUSE_SERIAL is not set
+# CONFIG_MOUSE_INPORT is not set
+# CONFIG_MOUSE_LOGIBM is not set
+# CONFIG_MOUSE_PC110PAD is not set
+# CONFIG_MOUSE_VSXXXAA is not set
+# CONFIG_INPUT_JOYSTICK is not set
+# CONFIG_INPUT_TOUCHSCREEN is not set
+# CONFIG_INPUT_MISC is not set
+
+#
+# Character devices
+#
+CONFIG_VT=y
+CONFIG_VT_CONSOLE=y
+CONFIG_HW_CONSOLE=y
+# CONFIG_SERIAL_NONSTANDARD is not set
+
+#
+# Serial drivers
+#
+# CONFIG_SERIAL_8250 is not set
+
+#
+# Non-8250 serial port support
+#
+CONFIG_UNIX98_PTYS=y
+CONFIG_LEGACY_PTYS=y
+CONFIG_LEGACY_PTY_COUNT=256
+
+#
+# IPMI
+#
+# CONFIG_IPMI_HANDLER is not set
+
+#
+# Watchdog Cards
+#
+# CONFIG_WATCHDOG is not set
+# CONFIG_HW_RANDOM is not set
+# CONFIG_NVRAM is not set
+# CONFIG_RTC is not set
+# CONFIG_GEN_RTC is not set
+# CONFIG_DTLK is not set
+# CONFIG_R3964 is not set
+# CONFIG_APPLICOM is not set
+# CONFIG_SONYPI is not set
+
+#
+# Ftape, the floppy tape device driver
+#
+# CONFIG_FTAPE is not set
+# CONFIG_AGP is not set
+# CONFIG_DRM is not set
+# CONFIG_MWAVE is not set
+# CONFIG_RAW_DRIVER is not set
+# CONFIG_HANGCHECK_TIMER is not set
+
+#
+# I2C support
+#
+# CONFIG_I2C is not set
+
+#
+# Dallas's 1-wire bus
+#
+# CONFIG_W1 is not set
+
+#
+# Misc devices
+#
+# CONFIG_IBM_ASM is not set
+
+#
+# Multimedia devices
+#
+# CONFIG_VIDEO_DEV is not set
+
+#
+# Digital Video Broadcasting Devices
+#
+# CONFIG_DVB is not set
+
+#
+# Graphics support
+#
+# CONFIG_FB is not set
+# CONFIG_VIDEO_SELECT is not set
+
+#
+# Console display driver support
+#
+CONFIG_VGA_CONSOLE=y
+# CONFIG_MDA_CONSOLE is not set
+CONFIG_DUMMY_CONSOLE=y
+
+#
+# Sound
+#
+# CONFIG_SOUND is not set
+
+#
+# USB support
+#
+# CONFIG_USB is not set
+CONFIG_USB_ARCH_HAS_HCD=y
+CONFIG_USB_ARCH_HAS_OHCI=y
+
+#
+# USB Gadget Support
+#
+# CONFIG_USB_GADGET is not set
+
+#
+# File systems
+#
+CONFIG_EXT2_FS=y
+# CONFIG_EXT2_FS_XATTR is not set
+CONFIG_EXT3_FS=y
+CONFIG_EXT3_FS_XATTR=y
+# CONFIG_EXT3_FS_POSIX_ACL is not set
+# CONFIG_EXT3_FS_SECURITY is not set
+CONFIG_JBD=y
+# CONFIG_JBD_DEBUG is not set
+CONFIG_FS_MBCACHE=y
+CONFIG_REISERFS_FS=y
+# CONFIG_REISERFS_CHECK is not set
+# CONFIG_REISERFS_PROC_INFO is not set
+# CONFIG_REISERFS_FS_XATTR is not set
+# CONFIG_JFS_FS is not set
+# CONFIG_XFS_FS is not set
+# CONFIG_MINIX_FS is not set
+# CONFIG_ROMFS_FS is not set
+# CONFIG_QUOTA is not set
+CONFIG_DNOTIFY=y
+# CONFIG_AUTOFS_FS is not set
+# CONFIG_AUTOFS4_FS is not set
+
+#
+# CD-ROM/DVD Filesystems
+#
+CONFIG_ISO9660_FS=y
+CONFIG_JOLIET=y
+CONFIG_ZISOFS=y
+CONFIG_ZISOFS_FS=y
+# CONFIG_UDF_FS is not set
+
+#
+# DOS/FAT/NT Filesystems
+#
+CONFIG_FAT_FS=m
+CONFIG_MSDOS_FS=m
+CONFIG_VFAT_FS=m
+CONFIG_FAT_DEFAULT_CODEPAGE=437
+CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1"
+# CONFIG_NTFS_FS is not set
+
+#
+# Pseudo filesystems
+#
+CONFIG_PROC_FS=y
+CONFIG_PROC_KCORE=y
+CONFIG_SYSFS=y
+# CONFIG_DEVFS_FS is not set
+# CONFIG_DEVPTS_FS_XATTR is not set
+CONFIG_TMPFS=y
+# CONFIG_TMPFS_XATTR is not set
+# CONFIG_HUGETLBFS is not set
+# CONFIG_HUGETLB_PAGE is not set
+CONFIG_RAMFS=y
+
+#
+# Miscellaneous filesystems
+#
+# CONFIG_ADFS_FS is not set
+# CONFIG_AFFS_FS is not set
+# CONFIG_HFS_FS is not set
+# CONFIG_HFSPLUS_FS is not set
+# CONFIG_BEFS_FS is not set
+# CONFIG_BFS_FS is not set
+# CONFIG_EFS_FS is not set
+# CONFIG_CRAMFS is not set
+# CONFIG_VXFS_FS is not set
+# CONFIG_HPFS_FS is not set
+# CONFIG_QNX4FS_FS is not set
+# CONFIG_SYSV_FS is not set
+# CONFIG_UFS_FS is not set
+
+#
+# Network File Systems
+#
+CONFIG_NFS_FS=y
+CONFIG_NFS_V3=y
+# CONFIG_NFS_V4 is not set
+# CONFIG_NFS_DIRECTIO is not set
+CONFIG_NFSD=m
+CONFIG_NFSD_V3=y
+# CONFIG_NFSD_V4 is not set
+CONFIG_NFSD_TCP=y
+CONFIG_ROOT_NFS=y
+CONFIG_LOCKD=y
+CONFIG_LOCKD_V4=y
+CONFIG_EXPORTFS=m
+CONFIG_SUNRPC=y
+# CONFIG_RPCSEC_GSS_KRB5 is not set
+# CONFIG_RPCSEC_GSS_SPKM3 is not set
+# CONFIG_SMB_FS is not set
+# CONFIG_CIFS is not set
+# CONFIG_NCP_FS is not set
+# CONFIG_CODA_FS is not set
+# CONFIG_AFS_FS is not set
+
+#
+# Partition Types
+#
+# CONFIG_PARTITION_ADVANCED is not set
+CONFIG_MSDOS_PARTITION=y
+
+#
+# Native Language Support
+#
+CONFIG_NLS=y
+CONFIG_NLS_DEFAULT="iso8859-1"
+CONFIG_NLS_CODEPAGE_437=y
+# CONFIG_NLS_CODEPAGE_737 is not set
+# CONFIG_NLS_CODEPAGE_775 is not set
+# CONFIG_NLS_CODEPAGE_850 is not set
+# CONFIG_NLS_CODEPAGE_852 is not set
+# CONFIG_NLS_CODEPAGE_855 is not set
+# CONFIG_NLS_CODEPAGE_857 is not set
+# CONFIG_NLS_CODEPAGE_860 is not set
+# CONFIG_NLS_CODEPAGE_861 is not set
+# CONFIG_NLS_CODEPAGE_862 is not set
+# CONFIG_NLS_CODEPAGE_863 is not set
+# CONFIG_NLS_CODEPAGE_864 is not set
+# CONFIG_NLS_CODEPAGE_865 is not set
+# CONFIG_NLS_CODEPAGE_866 is not set
+# CONFIG_NLS_CODEPAGE_869 is not set
+# CONFIG_NLS_CODEPAGE_936 is not set
+# CONFIG_NLS_CODEPAGE_950 is not set
+# CONFIG_NLS_CODEPAGE_932 is not set
+# CONFIG_NLS_CODEPAGE_949 is not set
+# CONFIG_NLS_CODEPAGE_874 is not set
+# CONFIG_NLS_ISO8859_8 is not set
+# CONFIG_NLS_CODEPAGE_1250 is not set
+# CONFIG_NLS_CODEPAGE_1251 is not set
+# CONFIG_NLS_ASCII is not set
+CONFIG_NLS_ISO8859_1=y
+# CONFIG_NLS_ISO8859_2 is not set
+# CONFIG_NLS_ISO8859_3 is not set
+# CONFIG_NLS_ISO8859_4 is not set
+# CONFIG_NLS_ISO8859_5 is not set
+# CONFIG_NLS_ISO8859_6 is not set
+# CONFIG_NLS_ISO8859_7 is not set
+# CONFIG_NLS_ISO8859_9 is not set
+# CONFIG_NLS_ISO8859_13 is not set
+# CONFIG_NLS_ISO8859_14 is not set
+# CONFIG_NLS_ISO8859_15 is not set
+# CONFIG_NLS_KOI8_R is not set
+# CONFIG_NLS_KOI8_U is not set
+# CONFIG_NLS_UTF8 is not set
+
+#
+# Security options
+#
+# CONFIG_KEYS is not set
+# CONFIG_SECURITY is not set
+
+#
+# Cryptographic options
+#
+CONFIG_CRYPTO=y
+CONFIG_CRYPTO_HMAC=y
+# CONFIG_CRYPTO_NULL is not set
+# CONFIG_CRYPTO_MD4 is not set
+CONFIG_CRYPTO_MD5=m
+CONFIG_CRYPTO_SHA1=m
+# CONFIG_CRYPTO_SHA256 is not set
+# CONFIG_CRYPTO_SHA512 is not set
+# CONFIG_CRYPTO_WP512 is not set
+CONFIG_CRYPTO_DES=m
+# CONFIG_CRYPTO_BLOWFISH is not set
+# CONFIG_CRYPTO_TWOFISH is not set
+# CONFIG_CRYPTO_SERPENT is not set
+# CONFIG_CRYPTO_AES_586 is not set
+# CONFIG_CRYPTO_CAST5 is not set
+# CONFIG_CRYPTO_CAST6 is not set
+# CONFIG_CRYPTO_TEA is not set
+# CONFIG_CRYPTO_ARC4 is not set
+# CONFIG_CRYPTO_KHAZAD is not set
+# CONFIG_CRYPTO_ANUBIS is not set
+# CONFIG_CRYPTO_DEFLATE is not set
+# CONFIG_CRYPTO_MICHAEL_MIC is not set
+CONFIG_CRYPTO_CRC32C=m
+# CONFIG_CRYPTO_TEST is not set
+
+#
+# Library routines
+#
+# CONFIG_CRC_CCITT is not set
+CONFIG_CRC32=y
+CONFIG_LIBCRC32C=y
+CONFIG_ZLIB_INFLATE=y
--- /dev/null
+#
+# For a description of the syntax of this configuration file,
+# see Documentation/kbuild/kconfig-language.txt.
+#
+
+menu "X86 Processor Configuration"
+
+config XENARCH
+ string
+ default i386
+
+config MMU
+ bool
+ default y
+
+config SBUS
+ bool
+
+config UID16
+ bool
+ default y
+
+config GENERIC_ISA_DMA
+ bool
+ default y
+
+config GENERIC_IOMAP
+ bool
+ default y
+
+choice
+ prompt "Processor family"
+ default M686
+
+#config M386
+# bool "386"
+# ---help---
+# This is the processor type of your CPU. This information is used for
+# optimizing purposes. In order to compile a kernel that can run on
+# all x86 CPU types (albeit not optimally fast), you can specify
+# "386" here.
+#
+# The kernel will not necessarily run on earlier architectures than
+# the one you have chosen, e.g. a Pentium optimized kernel will run on
+# a PPro, but not necessarily on a i486.
+#
+# Here are the settings recommended for greatest speed:
+# - "386" for the AMD/Cyrix/Intel 386DX/DXL/SL/SLC/SX, Cyrix/TI
+# 486DLC/DLC2, UMC 486SX-S and NexGen Nx586. Only "386" kernels
+# will run on a 386 class machine.
+# - "486" for the AMD/Cyrix/IBM/Intel 486DX/DX2/DX4 or
+# SL/SLC/SLC2/SLC3/SX/SX2 and UMC U5D or U5S.
+# - "586" for generic Pentium CPUs lacking the TSC
+# (time stamp counter) register.
+# - "Pentium-Classic" for the Intel Pentium.
+# - "Pentium-MMX" for the Intel Pentium MMX.
+# - "Pentium-Pro" for the Intel Pentium Pro.
+# - "Pentium-II" for the Intel Pentium II or pre-Coppermine Celeron.
+# - "Pentium-III" for the Intel Pentium III or Coppermine Celeron.
+# - "Pentium-4" for the Intel Pentium 4 or P4-based Celeron.
+# - "K6" for the AMD K6, K6-II and K6-III (aka K6-3D).
+# - "Athlon" for the AMD K7 family (Athlon/Duron/Thunderbird).
+# - "Crusoe" for the Transmeta Crusoe series.
+# - "Efficeon" for the Transmeta Efficeon series.
+# - "Winchip-C6" for original IDT Winchip.
+# - "Winchip-2" for IDT Winchip 2.
+# - "Winchip-2A" for IDT Winchips with 3dNow! capabilities.
+# - "CyrixIII/VIA C3" for VIA Cyrix III or VIA C3.
+# - "VIA C3-2 for VIA C3-2 "Nehemiah" (model 9 and above).
+#
+# If you don't know what to do, choose "386".
+
+#config M486
+# bool "486"
+# help
+# Select this for a 486 series processor, either Intel or one of the
+# compatible processors from AMD, Cyrix, IBM, or Intel. Includes DX,
+# DX2, and DX4 variants; also SL/SLC/SLC2/SLC3/SX/SX2 and UMC U5D or
+# U5S.
+
+#config M586
+# bool "586/K5/5x86/6x86/6x86MX"
+# help
+# Select this for an 586 or 686 series processor such as the AMD K5,
+# the Intel 5x86 or 6x86, or the Intel 6x86MX. This choice does not
+# assume the RDTSC (Read Time Stamp Counter) instruction.
+
+#config M586TSC
+# bool "Pentium-Classic"
+# help
+# Select this for a Pentium Classic processor with the RDTSC (Read
+# Time Stamp Counter) instruction for benchmarking.
+
+#config M586MMX
+# bool "Pentium-MMX"
+# help
+# Select this for a Pentium with the MMX graphics/multimedia
+# extended instructions.
+
+config M686
+ bool "Pentium-Pro"
+ help
+ Select this for Intel Pentium Pro chips. This enables the use of
+ Pentium Pro extended instructions, and disables the init-time guard
+ against the f00f bug found in earlier Pentiums.
+
+config MPENTIUMII
+ bool "Pentium-II/Celeron(pre-Coppermine)"
+ help
+ Select this for Intel chips based on the Pentium-II and
+ pre-Coppermine Celeron core. This option enables an unaligned
+ copy optimization, compiles the kernel with optimization flags
+ tailored for the chip, and applies any applicable Pentium Pro
+ optimizations.
+
+config MPENTIUMIII
+ bool "Pentium-III/Celeron(Coppermine)/Pentium-III Xeon"
+ help
+ Select this for Intel chips based on the Pentium-III and
+ Celeron-Coppermine core. This option enables use of some
+ extended prefetch instructions in addition to the Pentium II
+ extensions.
+
+config MPENTIUMM
+ bool "Pentium M"
+ help
+ Select this for Intel Pentium M (not Pentium-4 M)
+ notebook chips.
+
+config MPENTIUM4
+ bool "Pentium-4/Celeron(P4-based)/Pentium-4 M/Xeon"
+ help
+ Select this for Intel Pentium 4 chips. This includes the
+ Pentium 4, P4-based Celeron and Xeon, and Pentium-4 M
+ (not Pentium M) chips. This option enables compile flags
+ optimized for the chip, uses the correct cache shift, and
+ applies any applicable Pentium III optimizations.
+
+config MK6
+ bool "K6/K6-II/K6-III"
+ help
+ Select this for an AMD K6-family processor. Enables use of
+ some extended instructions, and passes appropriate optimization
+ flags to GCC.
+
+config MK7
+ bool "Athlon/Duron/K7"
+ help
+ Select this for an AMD Athlon K7-family processor. Enables use of
+ some extended instructions, and passes appropriate optimization
+ flags to GCC.
+
+config MK8
+ bool "Opteron/Athlon64/Hammer/K8"
+ help
+ Select this for an AMD Opteron or Athlon64 Hammer-family processor. Enables
+ use of some extended instructions, and passes appropriate optimization
+ flags to GCC.
+
+config MCRUSOE
+ bool "Crusoe"
+ help
+ Select this for a Transmeta Crusoe processor. Treats the processor
+ like a 586 with TSC, and sets some GCC optimization flags (like a
+ Pentium Pro with no alignment requirements).
+
+#config MEFFICEON
+# bool "Efficeon"
+# help
+# Select this for a Transmeta Efficeon processor.
+
+#config MWINCHIPC6
+# bool "Winchip-C6"
+# help
+# Select this for an IDT Winchip C6 chip. Linux and GCC
+# treat this chip as a 586TSC with some extended instructions
+# and alignment requirements.
+
+#config MWINCHIP2
+# bool "Winchip-2"
+# help
+# Select this for an IDT Winchip-2. Linux and GCC
+# treat this chip as a 586TSC with some extended instructions
+# and alignment requirements.
+
+#config MWINCHIP3D
+# bool "Winchip-2A/Winchip-3"
+# help
+# Select this for an IDT Winchip-2A or 3. Linux and GCC
+# treat this chip as a 586TSC with some extended instructions
+# and alignment reqirements. Also enable out of order memory
+# stores for this CPU, which can increase performance of some
+# operations.
+
+config MCYRIXIII
+ bool "CyrixIII/VIA-C3"
+ help
+ Select this for a Cyrix III or C3 chip. Presently Linux and GCC
+ treat this chip as a generic 586. Whilst the CPU is 686 class,
+ it lacks the cmov extension which gcc assumes is present when
+ generating 686 code.
+ Note that Nehemiah (Model 9) and above will not boot with this
+ kernel due to them lacking the 3DNow! instructions used in earlier
+ incarnations of the CPU.
+
+config MVIAC3_2
+ bool "VIA C3-2 (Nehemiah)"
+ help
+ Select this for a VIA C3 "Nehemiah". Selecting this enables usage
+ of SSE and tells gcc to treat the CPU as a 686.
+ Note, this kernel will not boot on older (pre model 9) C3s.
+
+endchoice
+
+config X86_GENERIC
+ bool "Generic x86 support"
+ help
+ Instead of just including optimizations for the selected
+ x86 variant (e.g. PII, Crusoe or Athlon), include some more
+ generic optimizations as well. This will make the kernel
+ perform better on x86 CPUs other than that selected.
+
+ This is really intended for distributors who need more
+ generic optimizations.
+
+#
+# Define implied options from the CPU selection here
+#
+config X86_CMPXCHG
+ bool
+ depends on !M386
+ default y
+
+config X86_XADD
+ bool
+ depends on !M386
+ default y
+
+config X86_L1_CACHE_SHIFT
+ int
+ default "7" if MPENTIUM4 || X86_GENERIC
+ default "4" if X86_ELAN || M486 || M386
+ default "5" if MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2
+ default "6" if MK7 || MK8 || MPENTIUMM
+
+config RWSEM_GENERIC_SPINLOCK
+ bool
+ depends on M386
+ default y
+
+config RWSEM_XCHGADD_ALGORITHM
+ bool
+ depends on !M386
+ default y
+
+config X86_PPRO_FENCE
+ bool
+ depends on M686 || M586MMX || M586TSC || M586 || M486 || M386
+ default y
+
+config X86_F00F_BUG
+ bool
+ depends on M586MMX || M586TSC || M586 || M486 || M386
+ default y
+
+config X86_WP_WORKS_OK
+ bool
+ depends on !M386
+ default y
+
+config X86_INVLPG
+ bool
+ depends on !M386
+ default y
+
+config X86_BSWAP
+ bool
+ depends on !M386
+ default y
+
+config X86_POPAD_OK
+ bool
+ depends on !M386
+ default y
+
+config X86_ALIGNMENT_16
+ bool
+ depends on MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCYRIXIII || X86_ELAN || MK6 || M586MMX || M586TSC || M586 || M486 || MVIAC3_2
+ default y
+
+config X86_GOOD_APIC
+ bool
+ depends on MK7 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || MK8 || MEFFICEON
+ default y
+
+config X86_INTEL_USERCOPY
+ bool
+ depends on MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M586MMX || X86_GENERIC || MK8 || MK7 || MEFFICEON
+ default y
+
+config X86_USE_PPRO_CHECKSUM
+ bool
+ depends on MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC3_2 || MEFFICEON
+ default y
+
+config X86_USE_3DNOW
+ bool
+ depends on MCYRIXIII || MK7
+ default y
+
+config X86_OOSTORE
+ bool
+ depends on (MWINCHIP3D || MWINCHIP2 || MWINCHIPC6) && MTRR
+ default y
+
+config HPET_TIMER
+ bool
+ default n
+#config HPET_TIMER
+# bool "HPET Timer Support"
+# help
+# This enables the use of the HPET for the kernel's internal timer.
+# HPET is the next generation timer replacing legacy 8254s.
+# You can safely choose Y here. However, HPET will only be
+# activated if the platform and the BIOS support this feature.
+# Otherwise the 8254 will be used for timing services.
+#
+# Choose N to continue using the legacy 8254 timer.
+
+config HPET_EMULATE_RTC
+ def_bool HPET_TIMER && RTC=y
+
+config SMP
+ bool
+ default n
+#config SMP
+# bool "Symmetric multi-processing support"
+# ---help---
+# This enables support for systems with more than one CPU. If you have
+# a system with only one CPU, like most personal computers, say N. If
+# you have a system with more than one CPU, say Y.
+#
+# If you say N here, the kernel will run on single and multiprocessor
+# machines, but will use only one CPU of a multiprocessor machine. If
+# you say Y here, the kernel will run on many, but not all,
+# singleprocessor machines. On a singleprocessor machine, the kernel
+# will run faster if you say N here.
+#
+# Note that if you say Y here and choose architecture "586" or
+# "Pentium" under "Processor family", the kernel will not work on 486
+# architectures. Similarly, multiprocessor kernels for the "PPro"
+# architecture may not work on all Pentium based boards.
+#
+# People using multiprocessor machines who say Y here should also say
+# Y to "Enhanced Real Time Clock Support", below. The "Advanced Power
+# Management" code will be disabled if you say Y here.
+#
+# See also the <file:Documentation/smp.txt>,
+# <file:Documentation/i386/IO-APIC.txt>,
+# <file:Documentation/nmi_watchdog.txt> and the SMP-HOWTO available at
+# <http://www.tldp.org/docs.html#howto>.
+#
+# If you don't know what to do here, say N.
+
+config NR_CPUS
+ int "Maximum number of CPUs (2-255)"
+ range 2 255
+ depends on SMP
+ default "32" if X86_NUMAQ || X86_SUMMIT || X86_BIGSMP || X86_ES7000
+ default "8"
+ help
+ This allows you to specify the maximum number of CPUs which this
+ kernel will support. The maximum supported value is 255 and the
+ minimum value which makes sense is 2.
+
+ This is purely to save memory - each supported CPU adds
+ approximately eight kilobytes to the kernel image.
+
+config SCHED_SMT
+ bool "SMT (Hyperthreading) scheduler support"
+ depends on SMP
+ default off
+ help
+ SMT scheduler support improves the CPU scheduler's decision making
+ when dealing with Intel Pentium 4 chips with HyperThreading at a
+ cost of slightly increased overhead in some places. If unsure say
+ N here.
+
+config PREEMPT
+ bool "Preemptible Kernel"
+ help
+ This option reduces the latency of the kernel when reacting to
+ real-time or interactive events by allowing a low priority process to
+ be preempted even if it is in kernel mode executing a system call.
+ This allows applications to run more reliably even when the system is
+ under load.
+
+ Say Y here if you are building a kernel for a desktop, embedded
+ or real-time system. Say N if you are unsure.
+
+#config X86_TSC
+# bool
+# depends on (MWINCHIP3D || MWINCHIP2 || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2) && !X86_NUMAQ
+# default y
+
+#config X86_MCE
+# bool "Machine Check Exception"
+# depends on !X86_VOYAGER
+# ---help---
+# Machine Check Exception support allows the processor to notify the
+# kernel if it detects a problem (e.g. overheating, component failure).
+# The action the kernel takes depends on the severity of the problem,
+# ranging from a warning message on the console, to halting the machine.
+# Your processor must be a Pentium or newer to support this - check the
+# flags in /proc/cpuinfo for mce. Note that some older Pentium systems
+# have a design flaw which leads to false MCE events - hence MCE is
+# disabled on all P5 processors, unless explicitly enabled with "mce"
+# as a boot argument. Similarly, if MCE is built in and creates a
+# problem on some new non-standard machine, you can boot with "nomce"
+# to disable it. MCE support simply ignores non-MCE processors like
+# the 386 and 486, so nearly everyone can say Y here.
+
+#config X86_MCE_NONFATAL
+# tristate "Check for non-fatal errors on AMD Athlon/Duron / Intel Pentium 4"
+# depends on X86_MCE
+# help
+# Enabling this feature starts a timer that triggers every 5 seconds which
+# will look at the machine check registers to see if anything happened.
+# Non-fatal problems automatically get corrected (but still logged).
+# Disable this if you don't want to see these messages.
+# Seeing the messages this option prints out may be indicative of dying hardware,
+# or out-of-spec (ie, overclocked) hardware.
+# This option only does something on certain CPUs.
+# (AMD Athlon/Duron and Intel Pentium 4)
+
+#config X86_MCE_P4THERMAL
+# bool "check for P4 thermal throttling interrupt."
+# depends on X86_MCE && (X86_UP_APIC || SMP)
+# help
+# Enabling this feature will cause a message to be printed when the P4
+# enters thermal throttling.
+
+#config MICROCODE
+# tristate "/dev/cpu/microcode - Intel IA32 CPU microcode support"
+# ---help---
+# If you say Y here and also to "/dev file system support" in the
+# 'File systems' section, you will be able to update the microcode on
+# Intel processors in the IA32 family, e.g. Pentium Pro, Pentium II,
+# Pentium III, Pentium 4, Xeon etc. You will obviously need the
+# actual microcode binary data itself which is not shipped with the
+# Linux kernel.
+#
+# For latest news and information on obtaining all the required
+# ingredients for this driver, check:
+# <http://www.urbanmyth.org/microcode/>.
+#
+# To compile this driver as a module, choose M here: the
+# module will be called microcode.
+
+#config X86_MSR
+# tristate "/dev/cpu/*/msr - Model-specific register support"
+# help
+# This device gives privileged processes access to the x86
+# Model-Specific Registers (MSRs). It is a character device with
+# major 202 and minors 0 to 31 for /dev/cpu/0/msr to /dev/cpu/31/msr.
+# MSR accesses are directed to a specific CPU on multi-processor
+# systems.
+
+config X86_CPUID
+ tristate "/dev/cpu/*/cpuid - CPU information support"
+ help
+ This device gives processes access to the x86 CPUID instruction to
+ be executed on a specific processor. It is a character device
+ with major 203 and minors 0 to 31 for /dev/cpu/0/cpuid to
+ /dev/cpu/31/cpuid.
+
+source "drivers/firmware/Kconfig"
+
+choice
+ prompt "High Memory Support"
+ default NOHIGHMEM
+
+config NOHIGHMEM
+ bool "off"
+ ---help---
+ Linux can use up to 64 Gigabytes of physical memory on x86 systems.
+ However, the address space of 32-bit x86 processors is only 4
+ Gigabytes large. That means that, if you have a large amount of
+ physical memory, not all of it can be "permanently mapped" by the
+ kernel. The physical memory that's not permanently mapped is called
+ "high memory".
+
+ If you are compiling a kernel which will never run on a machine with
+ more than 1 Gigabyte total physical RAM, answer "off" here (default
+ choice and suitable for most users). This will result in a "3GB/1GB"
+ split: 3GB are mapped so that each process sees a 3GB virtual memory
+ space and the remaining part of the 4GB virtual memory space is used
+ by the kernel to permanently map as much physical memory as
+ possible.
+
+ If the machine has between 1 and 4 Gigabytes physical RAM, then
+ answer "4GB" here.
+
+ If more than 4 Gigabytes is used then answer "64GB" here. This
+ selection turns Intel PAE (Physical Address Extension) mode on.
+ PAE implements 3-level paging on IA32 processors. PAE is fully
+ supported by Linux, PAE mode is implemented on all recent Intel
+ processors (Pentium Pro and better). NOTE: If you say "64GB" here,
+ then the kernel will not boot on CPUs that don't support PAE!
+
+ The actual amount of total physical memory will either be
+ auto detected or can be forced by using a kernel command line option
+ such as "mem=256M". (Try "man bootparam" or see the documentation of
+ your boot loader (lilo or loadlin) about how to pass options to the
+ kernel at boot time.)
+
+ If unsure, say "off".
+
+config HIGHMEM4G
+ bool "4GB"
+ help
+ Select this if you have a 32-bit processor and between 1 and 4
+ gigabytes of physical RAM.
+
+#config HIGHMEM64G
+# bool "64GB"
+# help
+# Select this if you have a 32-bit processor and more than 4
+# gigabytes of physical RAM.
+
+endchoice
+
+config HIGHMEM
+ bool
+ depends on HIGHMEM64G || HIGHMEM4G
+ default y
+
+config X86_PAE
+ bool
+ depends on HIGHMEM64G
+ default y
+
+# Common NUMA Features
+config NUMA
+ bool "Numa Memory Allocation and Scheduler Support"
+ depends on SMP && HIGHMEM64G && (X86_NUMAQ || X86_GENERICARCH || (X86_SUMMIT && ACPI))
+ default n if X86_PC
+ default y if (X86_NUMAQ || X86_SUMMIT)
+
+# Need comments to help the hapless user trying to turn on NUMA support
+comment "NUMA (NUMA-Q) requires SMP, 64GB highmem support"
+ depends on X86_NUMAQ && (!HIGHMEM64G || !SMP)
+
+comment "NUMA (Summit) requires SMP, 64GB highmem support, ACPI"
+ depends on X86_SUMMIT && (!HIGHMEM64G || !ACPI)
+
+config DISCONTIGMEM
+ bool
+ depends on NUMA
+ default y
+
+config HAVE_ARCH_BOOTMEM_NODE
+ bool
+ depends on NUMA
+ default y
+
+#config HIGHPTE
+# bool "Allocate 3rd-level pagetables from highmem"
+# depends on HIGHMEM4G || HIGHMEM64G
+# help
+# The VM uses one page table entry for each page of physical memory.
+# For systems with a lot of RAM, this can be wasteful of precious
+# low memory. Setting this option will put user-space page table
+# entries in high memory.
+
++config MTRR
++ bool
++ depends on XEN_PRIVILEGED_GUEST
++ default y
++
+#config MTRR
+# bool "MTRR (Memory Type Range Register) support"
+# ---help---
+# On Intel P6 family processors (Pentium Pro, Pentium II and later)
+# the Memory Type Range Registers (MTRRs) may be used to control
+# processor access to memory ranges. This is most useful if you have
+# a video (VGA) card on a PCI or AGP bus. Enabling write-combining
+# allows bus write transfers to be combined into a larger transfer
+# before bursting over the PCI/AGP bus. This can increase performance
+# of image write operations 2.5 times or more. Saying Y here creates a
+# /proc/mtrr file which may be used to manipulate your processor's
+# MTRRs. Typically the X server should use this.
+#
+# This code has a reasonably generic interface so that similar
+# control registers on other processors can be easily supported
+# as well:
+#
+# The Cyrix 6x86, 6x86MX and M II processors have Address Range
+# Registers (ARRs) which provide a similar functionality to MTRRs. For
+# these, the ARRs are used to emulate the MTRRs.
+# The AMD K6-2 (stepping 8 and above) and K6-3 processors have two
+# MTRRs. The Centaur C6 (WinChip) has 8 MCRs, allowing
+# write-combining. All of these processors are supported by this code
+# and it makes sense to say Y here if you have one of them.
+#
+# Saying Y here also fixes a problem with buggy SMP BIOSes which only
+# set the MTRRs for the boot CPU and not for the secondary CPUs. This
+# can lead to all sorts of problems, so it's good to say Y here.
+#
+# You can safely say Y even if your machine doesn't have MTRRs, you'll
+# just add about 9 KB to your kernel.
+#
+# See <file:Documentation/mtrr.txt> for more information.
+
+config IRQBALANCE
+ bool "Enable kernel irq balancing"
+ depends on SMP && X86_IO_APIC
+ default y
+ help
+ The default yes will allow the kernel to do irq load balancing.
+ Saying no will keep the kernel from doing irq load balancing.
+
+config HAVE_DEC_LOCK
+ bool
+ depends on (SMP || PREEMPT) && X86_CMPXCHG
+ default y
+
+# turning this on wastes a bunch of space.
+# Summit needs it only when NUMA is on
+config BOOT_IOREMAP
+ bool
+ depends on (((X86_SUMMIT || X86_GENERICARCH) && NUMA) || (X86 && EFI))
+ default y
+
+config REGPARM
+ bool "Use register arguments (EXPERIMENTAL)"
+ depends on EXPERIMENTAL
+ default n
+ help
+ Compile the kernel with -mregparm=3. This uses an different ABI
+ and passes the first three arguments of a function call in registers.
+ This will probably break binary only modules.
+
+ This feature is only enabled for gcc-3.0 and later - earlier compilers
+ generate incorrect output with certain kernel constructs when
+ -mregparm=3 is used.
+
+
+if XEN_PHYSDEV_ACCESS
+
+menu "Bus options (PCI, PCMCIA, EISA, MCA, ISA)"
+
+config X86_VISWS_APIC
+ bool
+ depends on X86_VISWS
+ default y
+
+config X86_LOCAL_APIC
+ bool
+ depends on (X86_VISWS || SMP) && !X86_VOYAGER
+ default y
+
+config X86_IO_APIC
+ bool
+ depends on SMP && !(X86_VISWS || X86_VOYAGER)
+ default y
+
+config PCI
+ bool "PCI support" if !X86_VISWS
+ depends on !X86_VOYAGER
+ default y if X86_VISWS
+ help
+ Find out whether you have a PCI motherboard. PCI is the name of a
+ bus system, i.e. the way the CPU talks to the other stuff inside
+ your box. Other bus systems are ISA, EISA, MicroChannel (MCA) or
+ VESA. If you have PCI, say Y, otherwise N.
+
+ The PCI-HOWTO, available from
+ <http://www.tldp.org/docs.html#howto>, contains valuable
+ information about which PCI hardware does work under Linux and which
+ doesn't.
+
+#choice
+# prompt "PCI access mode"
+# depends on PCI && !X86_VISWS
+# default PCI_GOANY
+# ---help---
+# On PCI systems, the BIOS can be used to detect the PCI devices and
+# determine their configuration. However, some old PCI motherboards
+# have BIOS bugs and may crash if this is done. Also, some embedded
+# PCI-based systems don't have any BIOS at all. Linux can also try to
+# detect the PCI hardware directly without using the BIOS.
+#
+# With this option, you can specify how Linux should detect the
+# PCI devices. If you choose "BIOS", the BIOS will be used,
+# if you choose "Direct", the BIOS won't be used, and if you
+# choose "MMConfig", then PCI Express MMCONFIG will be used.
+# If you choose "Any", the kernel will try MMCONFIG, then the
+# direct access method and falls back to the BIOS if that doesn't
+# work. If unsure, go with the default, which is "Any".
+#
+#config PCI_GOBIOS
+# bool "BIOS"
+#
+#config PCI_GOMMCONFIG
+# bool "MMConfig"
+#
+#config PCI_GODIRECT
+# bool "Direct"
+#
+#config PCI_GOANY
+# bool "Any"
+#
+#endchoice
+#
+#config PCI_BIOS
+# bool
+# depends on !X86_VISWS && PCI && (PCI_GOBIOS || PCI_GOANY)
+# default y
+#
+#config PCI_DIRECT
+# bool
+# depends on PCI && ((PCI_GODIRECT || PCI_GOANY) || X86_VISWS)
+# default y
+
+config PCI_DIRECT
+ bool
+ depends on PCI
+ default y
+
+source "drivers/pci/Kconfig"
+
+config ISA
+ bool "ISA support"
+ depends on !(X86_VOYAGER || X86_VISWS)
+ help
+ Find out whether you have ISA slots on your motherboard. ISA is the
+ name of a bus system, i.e. the way the CPU talks to the other stuff
+ inside your box. Other bus systems are PCI, EISA, MicroChannel
+ (MCA) or VESA. ISA is an older system, now being displaced by PCI;
+ newer boards don't support it. If you have ISA, say Y, otherwise N.
+
+config EISA
+ bool "EISA support"
+ depends on ISA
+ ---help---
+ The Extended Industry Standard Architecture (EISA) bus was
+ developed as an open alternative to the IBM MicroChannel bus.
+
+ The EISA bus provided some of the features of the IBM MicroChannel
+ bus while maintaining backward compatibility with cards made for
+ the older ISA bus. The EISA bus saw limited use between 1988 and
+ 1995 when it was made obsolete by the PCI bus.
+
+ Say Y here if you are building a kernel for an EISA-based machine.
+
+ Otherwise, say N.
+
+source "drivers/eisa/Kconfig"
+
+config MCA
+ bool "MCA support"
+ depends on !(X86_VISWS || X86_VOYAGER)
+ help
+ MicroChannel Architecture is found in some IBM PS/2 machines and
+ laptops. It is a bus system similar to PCI or ISA. See
+ <file:Documentation/mca.txt> (and especially the web page given
+ there) before attempting to build an MCA bus kernel.
+
+config MCA
+ depends on X86_VOYAGER
+ default y if X86_VOYAGER
+
+source "drivers/mca/Kconfig"
+
+config SCx200
+ tristate "NatSemi SCx200 support"
+ depends on !X86_VOYAGER
+ help
+ This provides basic support for the National Semiconductor SCx200
+ processor. Right now this is just a driver for the GPIO pins.
+
+ If you don't know what to do here, say N.
+
+ This support is also available as a module. If compiled as a
+ module, it will be called scx200.
+
+source "drivers/pcmcia/Kconfig"
+
+source "drivers/pci/hotplug/Kconfig"
+
+endmenu
+
+endif
+
+menu "Kernel hacking"
+
+config DEBUG_KERNEL
+ bool "Kernel debugging"
+ help
+ Say Y here if you are developing drivers or trying to debug and
+ identify kernel problems.
+
+config EARLY_PRINTK
+ bool "Early printk" if EMBEDDED
+ default y
+ help
+ Write kernel log output directly into the VGA buffer or to a serial
+ port.
+
+ This is useful for kernel debugging when your machine crashes very
+ early before the console code is initialized. For normal operation
+ it is not recommended because it looks ugly and doesn't cooperate
+ with klogd/syslogd or the X server. You should normally N here,
+ unless you want to debug such a crash.
+
+config DEBUG_STACKOVERFLOW
+ bool "Check for stack overflows"
+ depends on DEBUG_KERNEL
+
+config DEBUG_STACK_USAGE
+ bool "Stack utilization instrumentation"
+ depends on DEBUG_KERNEL
+ help
+ Enables the display of the minimum amount of free stack which each
+ task has ever had available in the sysrq-T and sysrq-P debug output.
+
+ This option will slow down process creation somewhat.
+
+config DEBUG_SLAB
+ bool "Debug memory allocations"
+ depends on DEBUG_KERNEL
+ help
+ Say Y here to have the kernel do limited verification on memory
+ allocation as well as poisoning memory on free to catch use of freed
+ memory.
+
+config MAGIC_SYSRQ
+ bool "Magic SysRq key"
+ depends on DEBUG_KERNEL
+ help
+ If you say Y here, you will have some control over the system even
+ if the system crashes for example during kernel debugging (e.g., you
+ will be able to flush the buffer cache to disk, reboot the system
+ immediately or dump some status information). This is accomplished
+ by pressing various keys while holding SysRq (Alt+PrintScreen). It
+ also works on a serial console (on PC hardware at least), if you
+ send a BREAK and then within 5 seconds a command keypress. The
+ keys are documented in <file:Documentation/sysrq.txt>. Don't say Y
+ unless you really know what this hack does.
+
+config DEBUG_SPINLOCK
+ bool "Spinlock debugging"
+ depends on DEBUG_KERNEL
+ help
+ Say Y here and build SMP to catch missing spinlock initialization
+ and certain other kinds of spinlock errors commonly made. This is
+ best used in conjunction with the NMI watchdog so that spinlock
+ deadlocks are also debuggable.
+
+config DEBUG_PAGEALLOC
+ bool "Page alloc debugging"
+ depends on DEBUG_KERNEL
+ help
+ Unmap pages from the kernel linear mapping after free_pages().
+ This results in a large slowdown, but helps to find certain types
+ of memory corruptions.
+
+config DEBUG_HIGHMEM
+ bool "Highmem debugging"
+ depends on DEBUG_KERNEL && HIGHMEM
+ help
+ This options enables addition error checking for high memory systems.
+ Disable for production systems.
+
+config DEBUG_INFO
+ bool "Compile the kernel with debug info"
+ depends on DEBUG_KERNEL
+ help
+ If you say Y here the resulting kernel image will include
+ debugging info resulting in a larger kernel image.
+ Say Y here only if you plan to use gdb to debug the kernel.
+ If you don't debug the kernel, you can say N.
+
+config DEBUG_SPINLOCK_SLEEP
+ bool "Sleep-inside-spinlock checking"
+ help
+ If you say Y here, various routines which may sleep will become very
+ noisy if they are called with a spinlock held.
+
+config FRAME_POINTER
+ bool "Compile the kernel with frame pointers"
+ help
+ If you say Y here the resulting kernel image will be slightly larger
+ and slower, but it will give very useful debugging information.
+ If you don't debug the kernel, you can say N, but we may not be able
+ to solve problems without frame pointers.
+
+config 4KSTACKS
+ bool "Use 4Kb for kernel stacks instead of 8Kb"
+ help
+ If you say Y here the kernel will use a 4Kb stacksize for the
+ kernel stack attached to each process/thread. This facilitates
+ running more threads on a system and also reduces the pressure
+ on the VM subsystem for higher order allocations. This option
+ will also use IRQ stacks to compensate for the reduced stackspace.
+
+config X86_FIND_SMP_CONFIG
+ bool
+ depends on X86_LOCAL_APIC || X86_VOYAGER
+ default y
+
+config X86_MPPARSE
+ bool
+ depends on X86_LOCAL_APIC && !X86_VISWS
+ default y
+
+endmenu
+
+#
+# Use the generic interrupt handling code in kernel/irq/:
+#
+config GENERIC_HARDIRQS
+ bool
+ default y
+
+config GENERIC_IRQ_PROBE
+ bool
+ default y
+
+config X86_SMP
+ bool
+ depends on SMP && !X86_VOYAGER
+ default y
+
+config X86_HT
+ bool
+ depends on SMP && !(X86_VISWS || X86_VOYAGER)
+ default y
+
+config X86_BIOS_REBOOT
+ bool
+ depends on !(X86_VISWS || X86_VOYAGER)
+ default y
+
+config X86_TRAMPOLINE
+ bool
+ depends on X86_SMP || (X86_VOYAGER && SMP)
+ default y
+
+config PC
+ bool
+ depends on X86 && !EMBEDDED
+ default y
+
+endmenu
--- /dev/null
- #obj-$(CONFIG_MTRR) += ../../../../i386/kernel/cpu/mtrr/
+#
+# Makefile for x86-compatible CPU details and quirks
+#
+
+CFLAGS += -Iarch/i386/kernel/cpu
+
+obj-y := common.o
+c-obj-y += proc.o
+
+c-obj-y += amd.o
+c-obj-y += cyrix.o
+c-obj-y += centaur.o
+c-obj-y += transmeta.o
+c-obj-y += intel.o intel_cacheinfo.o
+c-obj-y += rise.o
+c-obj-y += nexgen.o
+c-obj-y += umc.o
+
+#obj-$(CONFIG_X86_MCE) += ../../../../i386/kernel/cpu/mcheck/
+
++obj-$(CONFIG_MTRR) += mtrr/
+#obj-$(CONFIG_CPU_FREQ) += ../../../../i386/kernel/cpu/cpufreq/
+
+c-link :=
+
+$(patsubst %.o,$(obj)/%.c,$(c-obj-y) $(c-link)):
+ @ln -fsn $(srctree)/arch/i386/kernel/cpu/$(notdir $@) $@
+
+obj-y += $(c-obj-y)
+
+clean-files += $(patsubst %.o,%.c,$(c-obj-y) $(c-obj-) $(c-link))
--- /dev/null
- #define XEN_TEST_PENDING(reg) testb $0xFF,evtchn_upcall_pending(%reg)
+/*
+ * linux/arch/i386/entry.S
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ */
+
+/*
+ * entry.S contains the system-call and fault low-level handling routines.
+ * This also contains the timer-interrupt handler, as well as all interrupts
+ * and faults that can result in a task-switch.
+ *
+ * NOTE: This code handles signal-recognition, which happens every time
+ * after a timer-interrupt and after each system call.
+ *
+ * I changed all the .align's to 4 (16 byte alignment), as that's faster
+ * on a 486.
+ *
+ * Stack layout in 'ret_from_system_call':
+ * ptrace needs to have all regs on the stack.
+ * if the order here is changed, it needs to be
+ * updated in fork.c:copy_process, signal.c:do_signal,
+ * ptrace.c and ptrace.h
+ *
+ * 0(%esp) - %ebx
+ * 4(%esp) - %ecx
+ * 8(%esp) - %edx
+ * C(%esp) - %esi
+ * 10(%esp) - %edi
+ * 14(%esp) - %ebp
+ * 18(%esp) - %eax
+ * 1C(%esp) - %ds
+ * 20(%esp) - %es
+ * 24(%esp) - orig_eax
+ * 28(%esp) - %eip
+ * 2C(%esp) - %cs
+ * 30(%esp) - %eflags
+ * 34(%esp) - %oldesp
+ * 38(%esp) - %oldss
+ *
+ * "current" is in register %ebx during any slow entries.
+ */
+
+#include <linux/config.h>
+#include <linux/linkage.h>
+#include <asm/thread_info.h>
+#include <asm/errno.h>
+#include <asm/segment.h>
+#include <asm/smp.h>
+#include <asm/page.h>
+#include "irq_vectors.h"
+#include <asm-xen/xen-public/xen.h>
+
+#define nr_syscalls ((syscall_table_size)/4)
+
+EBX = 0x00
+ECX = 0x04
+EDX = 0x08
+ESI = 0x0C
+EDI = 0x10
+EBP = 0x14
+EAX = 0x18
+DS = 0x1C
+ES = 0x20
+ORIG_EAX = 0x24
+EIP = 0x28
+CS = 0x2C
++EVENT_MASK = 0x2E
+EFLAGS = 0x30
+OLDESP = 0x34
+OLDSS = 0x38
+
+CF_MASK = 0x00000001
+TF_MASK = 0x00000100
+IF_MASK = 0x00000200
+DF_MASK = 0x00000400
+NT_MASK = 0x00004000
+VM_MASK = 0x00020000
+
+/* Offsets into shared_info_t. */
+#define evtchn_upcall_pending /* 0 */
+#define evtchn_upcall_mask 1
+
++#define XEN_GET_VCPU_INFO(reg) movl HYPERVISOR_shared_info,reg
+#define XEN_BLOCK_EVENTS(reg) movb $1,evtchn_upcall_mask(reg)
+#define XEN_UNBLOCK_EVENTS(reg) movb $0,evtchn_upcall_mask(reg)
- #define preempt_stop movl HYPERVISOR_shared_info,%esi ; \
- XEN_BLOCK_EVENTS(%esi)
++#define XEN_SAVE_UPCALL_MASK(reg,tmp,off) \
++ movb evtchn_upcall_mask(reg), tmp; \
++ movb tmp, off(%esp)
++
++#define XEN_TEST_PENDING(reg) testb $0xFF,evtchn_upcall_pending(reg)
+
+#ifdef CONFIG_PREEMPT
- #define SAVE_ALL \
++#define preempt_stop XEN_BLOCK_EVENTS(%esi)
+#else
+#define preempt_stop
+#define resume_kernel restore_all_enable_events
+#endif
+
- movl %edx, %es;
++#define SAVE_ALL_NO_EVENTMASK \
+ cld; \
+ pushl %es; \
+ pushl %ds; \
+ pushl %eax; \
+ pushl %ebp; \
+ pushl %edi; \
+ pushl %esi; \
+ pushl %edx; \
+ pushl %ecx; \
+ pushl %ebx; \
+ movl $(__USER_DS), %edx; \
+ movl %edx, %ds; \
- movl HYPERVISOR_shared_info,%esi
- XEN_BLOCK_EVENTS(%esi) # make tests atomic
- # make sure we don't miss an interrupt
++ movl %edx, %es
++
++#define SAVE_ALL \
++ SAVE_ALL_NO_EVENTMASK; \
++ XEN_GET_VCPU_INFO(%esi); \
++ XEN_SAVE_UPCALL_MASK(%esi,%dl,EVENT_MASK)
+
+#define RESTORE_INT_REGS \
+ popl %ebx; \
+ popl %ecx; \
+ popl %edx; \
+ popl %esi; \
+ popl %edi; \
+ popl %ebp; \
+ popl %eax
+
+#define RESTORE_REGS \
+ RESTORE_INT_REGS; \
+1: popl %ds; \
+2: popl %es; \
+.section .fixup,"ax"; \
+3: movl $0,(%esp); \
+ jmp 1b; \
+4: movl $0,(%esp); \
+ jmp 2b; \
+.previous; \
+.section __ex_table,"a";\
+ .align 4; \
+ .long 1b,3b; \
+ .long 2b,4b; \
+.previous
+
+
+#define RESTORE_ALL \
+ RESTORE_REGS \
+ addl $4, %esp; \
+1: iret; \
+.section .fixup,"ax"; \
+2: movl $(__USER_DS), %edx; \
+ movl %edx, %ds; \
+ movl %edx, %es; \
+ movl $11,%eax; \
+ call do_exit; \
+.previous; \
+.section __ex_table,"a";\
+ .align 4; \
+ .long 1b,2b; \
+.previous
+
+
+ENTRY(ret_from_fork)
+ pushl %eax
+ call schedule_tail
+ GET_THREAD_INFO(%ebp)
+ popl %eax
++ XEN_GET_VCPU_INFO(%esi)
+ jmp syscall_exit
+
+/*
+ * Return to user mode is not as complex as all this looks,
+ * but we want the default path for a system call return to
+ * go as quickly as possible which is why some of this is
+ * less clear than it otherwise should be.
+ */
+
+ # userspace resumption stub bypassing syscall exit tracing
+ ALIGN
+ret_from_exception:
+ preempt_stop
+ret_from_intr:
+ GET_THREAD_INFO(%ebp)
+ movl EFLAGS(%esp), %eax # mix EFLAGS and CS
+ movb CS(%esp), %al
+ testl $(VM_MASK | 2), %eax
+ jz resume_kernel # returning to kernel or vm86-space
+ENTRY(resume_userspace)
- ret_syscall_tests:
++ XEN_GET_VCPU_INFO(%esi)
++ XEN_BLOCK_EVENTS(%esi) # make sure we don't miss an interrupt
+ # setting need_resched or sigpending
+ # between sampling and the iret
- jmp restore_all_enable_events
+ movl TI_flags(%ebp), %ecx
+ andl $_TIF_WORK_MASK, %ecx # is there any work to be done on
+ # int/exception return?
+ jne work_pending
- movl HYPERVISOR_shared_info,%esi
++ jmp restore_all
+
+#ifdef CONFIG_PREEMPT
+ENTRY(resume_kernel)
- jnz restore_all_enable_events
++ XEN_GET_VCPU_INFO(%esi)
+ cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ?
- jz restore_all_enable_events
- testl $IF_MASK,EFLAGS(%esp) # interrupts off (exception path) ?
- jz restore_all_enable_events
++ jnz restore_all
+need_resched:
+ movl TI_flags(%ebp), %ecx # need_resched set ?
+ testb $_TIF_NEED_RESCHED, %cl
- XEN_UNBLOCK_EVENTS(%esi) # reenable event callbacks
++ jz restore_all
++ testb $0xFF,EVENT_MASK(%esp) # interrupts off (exception path) ?
++ jnz restore_all
+ movl $PREEMPT_ACTIVE,TI_preempt_count(%ebp)
- movl HYPERVISOR_shared_info,%esi
- XEN_BLOCK_EVENTS(%esi) # make tests atomic
++ XEN_UNBLOCK_EVENTS(%esi)
+ call schedule
- movl HYPERVISOR_shared_info,%esi
- XEN_BLOCK_EVENTS(%esi) # make tests atomic
- # make sure we don't miss an interrupt
++ XEN_BLOCK_EVENTS(%esi)
+ movl $0,TI_preempt_count(%ebp)
+ jmp need_resched
+#endif
+
+/* SYSENTER_RETURN points to after the "sysenter" instruction in
+ the vsyscall page. See vsyscall-sysentry.S, which defines the symbol. */
+
+ # sysenter call handler stub
+ENTRY(sysenter_entry)
+ movl TSS_sysenter_esp0(%esp),%esp
+sysenter_past_esp:
+ sti
+ pushl $(__USER_DS)
+ pushl %ebp
+ pushfl
+ pushl $(__USER_CS)
+ pushl $SYSENTER_RETURN
+
+/*
+ * Load the potential sixth argument from user stack.
+ * Careful about security.
+ */
+ cmpl $__PAGE_OFFSET-3,%ebp
+ jae syscall_fault
+1: movl (%ebp),%ebp
+.section __ex_table,"a"
+ .align 4
+ .long 1b,syscall_fault
+.previous
+
+ pushl %eax
+ SAVE_ALL
+ GET_THREAD_INFO(%ebp)
+
+ testb $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),TI_flags(%ebp)
+ jnz syscall_trace_entry
+ cmpl $(nr_syscalls), %eax
+ jae syscall_badsys
+ call *sys_call_table(,%eax,4)
+ movl %eax,EAX(%esp)
+ cli
+ movl TI_flags(%ebp), %ecx
+ testw $_TIF_ALLWORK_MASK, %cx
+ jne syscall_exit_work
+/* if something modifies registers it must also disable sysexit */
+ movl EIP(%esp), %edx
+ movl OLDESP(%esp), %ecx
+ sti
+ sysexit
+
+
+ # system call handler stub
+ENTRY(system_call)
+ pushl %eax # save orig_eax
+ SAVE_ALL
+ GET_THREAD_INFO(%ebp)
+ # system call tracing in operation
+ testb $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),TI_flags(%ebp)
+ jnz syscall_trace_entry
+ cmpl $(nr_syscalls), %eax
+ jae syscall_badsys
+syscall_call:
+ call *sys_call_table(,%eax,4)
+ movl %eax,EAX(%esp) # store the return value
+syscall_exit:
- jmp restore_all_enable_events
++ XEN_BLOCK_EVENTS(%esi) # make sure we don't miss an interrupt
+ # setting need_resched or sigpending
+ # between sampling and the iret
+ movl TI_flags(%ebp), %ecx
+ testw $_TIF_ALLWORK_MASK, %cx # current->work
+ jne syscall_exit_work
- XEN_UNBLOCK_EVENTS(%esi) # reenable event callbacks
++restore_all:
++ testl $VM_MASK, EFLAGS(%esp)
++ jnz resume_vm86
++ movb EVENT_MASK(%esp), %al
++ notb %al # %al == ~saved_mask
++ andb evtchn_upcall_mask(%esi),%al
++ andb $1,%al # %al == mask & ~saved_mask
++ jnz restore_all_enable_events # != 0 => reenable event delivery
++ RESTORE_ALL
++
++resume_vm86:
++ XEN_UNBLOCK_EVENTS(%esi)
++ RESTORE_REGS
++ movl %eax,(%esp)
++ movl $__HYPERVISOR_switch_vm86,%eax
++ int $0x82
++ ud2
+
+ # perform work that needs to be done immediately before resumption
+ ALIGN
+work_pending:
- movl HYPERVISOR_shared_info,%esi
- XEN_BLOCK_EVENTS(%esi) # make tests atomic
- # make sure we don't miss an interrupt
+ testb $_TIF_NEED_RESCHED, %cl
+ jz work_notifysig
+work_resched:
+ call schedule
- jz restore_all_enable_events
- # XXXcl sti missing???
- XEN_UNBLOCK_EVENTS(%esi) # reenable event callbacks
++ XEN_BLOCK_EVENTS(%esi) # make sure we don't miss an interrupt
+ # setting need_resched or sigpending
+ # between sampling and the iret
+ movl TI_flags(%ebp), %ecx
+ andl $_TIF_WORK_MASK, %ecx # is there any work to be done other
+ # than syscall tracing?
- movl HYPERVISOR_shared_info,%esi
- jmp restore_all_enable_events
++ jz restore_all
+ testb $_TIF_NEED_RESCHED, %cl
+ jnz work_resched
+
+work_notifysig: # deal with pending signals and
+ # notify-resume requests
+ testl $VM_MASK, EFLAGS(%esp)
+ movl %esp, %eax
+ jne work_notifysig_v86 # returning to kernel-space or
+ # vm86-space
+ xorl %edx, %edx
+ call do_notify_resume
- movl HYPERVISOR_shared_info,%esi
- jmp restore_all_enable_events
++ jmp restore_all
+
+ ALIGN
+work_notifysig_v86:
+ pushl %ecx # save ti_flags for do_notify_resume
+ call save_v86_state # %eax contains pt_regs pointer
+ popl %ecx
+ movl %eax, %esp
+ xorl %edx, %edx
+ call do_notify_resume
- movl HYPERVISOR_shared_info,%esi
++ jmp restore_all
+
+ # perform syscall exit tracing
+ ALIGN
+syscall_trace_entry:
+ movl $-ENOSYS,EAX(%esp)
+ movl %esp, %eax
+ xorl %edx,%edx
+ call do_syscall_trace
+ movl ORIG_EAX(%esp), %eax
+ cmpl $(nr_syscalls), %eax
+ jnae syscall_call
+ jmp syscall_exit
+
+ # perform syscall exit tracing
+ ALIGN
+syscall_exit_work:
- XEN_UNBLOCK_EVENTS(%esi) # reenable event callbacks
- # could let do_syscall_trace() call
+ testb $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP), %cl
+ jz work_pending
- SAVE_ALL
- GET_THREAD_INFO(%ebp)
++ XEN_UNBLOCK_EVENTS(%esi) # could let do_syscall_trace() call
+ # schedule() instead
+ movl %esp, %eax
+ movl $1, %edx
+ call do_syscall_trace
+ jmp resume_userspace
+
+ ALIGN
+syscall_fault:
+ pushl %eax # save orig_eax
+ SAVE_ALL
+ GET_THREAD_INFO(%ebp)
+ movl $-EFAULT,EAX(%esp)
+ jmp resume_userspace
+
+ ALIGN
+syscall_badsys:
+ movl $-ENOSYS,EAX(%esp)
+ jmp resume_userspace
+
++#if 0 /* XEN */
++/*
++ * Build the entry stubs and pointer table with
++ * some assembler magic.
++ */
++.data
++ENTRY(interrupt)
++.text
++
++vector=0
++ENTRY(irq_entries_start)
++.rept NR_IRQS
++ ALIGN
++1: pushl $vector-256
++ jmp common_interrupt
++.data
++ .long 1b
++.text
++vector=vector+1
++.endr
++
++ ALIGN
++common_interrupt:
++ SAVE_ALL
++ call do_IRQ
++ jmp ret_from_intr
++
++#define BUILD_INTERRUPT(name, nr) \
++ENTRY(name) \
++ pushl $nr-256; \
++ SAVE_ALL \
++ call smp_/**/name; \
++ jmp ret_from_intr;
++
++/* The include is where all of the SMP etc. interrupts come from */
++#include "entry_arch.h"
++#endif /* XEN */
++
+ENTRY(divide_error)
+ pushl $0 # no error code
+ pushl $do_divide_error
+ ALIGN
+error_code:
+ pushl %ds
+ pushl %eax
+ xorl %eax, %eax
+ pushl %ebp
+ pushl %edi
+ pushl %esi
+ pushl %edx
+ decl %eax # eax = -1
+ pushl %ecx
+ pushl %ebx
+ cld
+ movl %es, %ecx
+ movl ES(%esp), %edi # get the function address
+ movl ORIG_EAX(%esp), %edx # get the error code
+ movl %eax, ORIG_EAX(%esp)
+ movl %ecx, ES(%esp)
+ movl $(__USER_DS), %ecx
+ movl %ecx, %ds
+ movl %ecx, %es
+ movl %esp,%eax # pt_regs pointer
++ XEN_GET_VCPU_INFO(%esi)
++ XEN_SAVE_UPCALL_MASK(%esi,%dl,EVENT_MASK+8)
+ call *%edi
+ jmp ret_from_exception
+
+# A note on the "critical region" in our callback handler.
+# We want to avoid stacking callback handlers due to events occurring
+# during handling of the last event. To do this, we keep events disabled
+# until we've done all processing. HOWEVER, we must enable events before
+# popping the stack frame (can't be done atomically) and so it would still
+# be possible to get enough handler activations to overflow the stack.
+# Although unlikely, bugs of that kind are hard to track down, so we'd
+# like to avoid the possibility.
+# So, on entry to the handler we detect whether we interrupted an
+# existing activation in its critical region -- if so, we pop the current
+# activation and restart the handler using the previous one.
+ENTRY(hypervisor_callback)
+ pushl %eax
- 11: push %esp
++ SAVE_ALL_NO_EVENTMASK
+ movl EIP(%esp),%eax
+ cmpl $scrit,%eax
+ jb 11f
+ cmpl $ecrit,%eax
+ jb critical_region_fixup
- movl HYPERVISOR_shared_info,%esi
- movb CS(%esp),%cl
- test $2,%cl # slow return to ring 2 or 3
- jne ret_syscall_tests
++11: XEN_GET_VCPU_INFO(%esi)
++ movb $0, EVENT_MASK(%esp)
++ push %esp
+ call evtchn_do_upcall
+ add $4,%esp
- safesti:XEN_UNBLOCK_EVENTS(%esi) # reenable event callbacks
++ jmp ret_from_intr
++
++ ALIGN
+restore_all_enable_events:
- testb $1,evtchn_upcall_pending(%esi)
++ XEN_UNBLOCK_EVENTS(%esi)
+scrit: /**** START OF CRITICAL REGION ****/
- .byte 0x00,0x00,0x00 # testb $0xff,(%esi)
++ XEN_TEST_PENDING(%esi)
+ jnz 14f # process more events if necessary...
+ RESTORE_ALL
+14: XEN_BLOCK_EVENTS(%esi)
+ jmp 11b
+ecrit: /**** END OF CRITICAL REGION ****/
+# [How we do the fixup]. We want to merge the current stack frame with the
+# just-interrupted frame. How we do this depends on where in the critical
+# region the interrupted handler was executing, and so how many saved
+# registers are in each frame. We do this quickly using the lookup table
+# 'critical_fixup_table'. For each byte offset in the critical region, it
+# provides the number of bytes which have already been popped from the
+# interrupted stack frame.
+critical_region_fixup:
+ addl $critical_fixup_table-scrit,%eax
+ movzbl (%eax),%eax # %eax contains num bytes popped
+ mov %esp,%esi
+ add %eax,%esi # %esi points at end of src region
+ mov %esp,%edi
+ add $0x34,%edi # %edi points at end of dst region
+ mov %eax,%ecx
+ shr $2,%ecx # convert words to bytes
+ je 16f # skip loop if nothing to copy
+15: subl $4,%esi # pre-decrementing copy loop
+ subl $4,%edi
+ movl (%esi),%eax
+ movl %eax,(%edi)
+ loop 15b
+16: movl %edi,%esp # final %edi is top of merged stack
+ jmp 11b
+
+critical_fixup_table:
- 1: popl %ds
- 2: popl %es
- 3: popl %fs
- 4: popl %gs
- 5: iret
++ .byte 0x00,0x00,0x00 # testb $0xff,(%esi) = XEN_TEST_PENDING
+ .byte 0x00,0x00 # jnz 14f
+ .byte 0x00 # pop %ebx
+ .byte 0x04 # pop %ecx
+ .byte 0x08 # pop %edx
+ .byte 0x0c # pop %esi
+ .byte 0x10 # pop %edi
+ .byte 0x14 # pop %ebp
+ .byte 0x18 # pop %eax
+ .byte 0x1c # pop %ds
+ .byte 0x20 # pop %es
+ .byte 0x24,0x24,0x24 # add $4,%esp
+ .byte 0x28 # iret
+ .byte 0x00,0x00,0x00,0x00 # movb $1,1(%esi)
+ .byte 0x00,0x00 # jmp 11b
+
+# Hypervisor uses this for application faults while it executes.
+ENTRY(failsafe_callback)
- 10: pushl %ss; \
- popl %ds; \
- pushl %ss; \
- popl %es; \
- pushl $11; \
- call do_exit; \
++1: popl %ds
++2: popl %es
++3: popl %fs
++4: popl %gs
++ subl $4,%esp
++ SAVE_ALL
++ jmp ret_from_exception
+.section .fixup,"ax"; \
+6: movl $0,(%esp); \
+ jmp 1b; \
+7: movl $0,(%esp); \
+ jmp 2b; \
+8: movl $0,(%esp); \
+ jmp 3b; \
+9: movl $0,(%esp); \
+ jmp 4b; \
- .long 5b,10b; \
+.previous; \
+.section __ex_table,"a";\
+ .align 4; \
+ .long 1b,6b; \
+ .long 2b,7b; \
+ .long 3b,8b; \
+ .long 4b,9b; \
- jnz restore_all_enable_events
+.previous
+
+ENTRY(coprocessor_error)
+ pushl $0
+ pushl $do_coprocessor_error
+ jmp error_code
+
+ENTRY(simd_coprocessor_error)
+ pushl $0
+ pushl $do_simd_coprocessor_error
+ jmp error_code
+
+ENTRY(device_not_available)
+ pushl $-1 # mark this as an int
+ SAVE_ALL
+ preempt_stop
+ call math_state_restore
+ jmp ret_from_exception
+
+/*
+ * Debug traps and NMI can happen at the one SYSENTER instruction
+ * that sets up the real kernel stack. Check here, since we can't
+ * allow the wrong stack to be used.
+ *
+ * "TSS_sysenter_esp0+12" is because the NMI/debug handler will have
+ * already pushed 3 words if it hits on the sysenter instruction:
+ * eflags, cs and eip.
+ *
+ * We just load the right stack, and push the three (known) values
+ * by hand onto the new stack - while updating the return eip past
+ * the instruction that would have done it for sysenter.
+ */
+#define FIX_STACK(offset, ok, label) \
+ cmpw $__KERNEL_CS,4(%esp); \
+ jne ok; \
+label: \
+ movl TSS_sysenter_esp0+offset(%esp),%esp; \
+ pushfl; \
+ pushl $__KERNEL_CS; \
+ pushl $sysenter_past_esp
+
+ENTRY(debug)
+ cmpl $sysenter_entry,(%esp)
+ jne debug_stack_correct
+ FIX_STACK(12, debug_stack_correct, debug_esp_fix_insn)
+debug_stack_correct:
+ pushl $-1 # mark this as an int
+ SAVE_ALL
+ xorl %edx,%edx # error code 0
+ movl %esp,%eax # pt_regs pointer
+ call do_debug
+ testl %eax,%eax
- #if 0
++ jnz restore
+ jmp ret_from_exception
+
- #endif
++#if 0 /* XEN */
+/*
+ * NMI is doubly nasty. It can happen _while_ we're handling
+ * a debug fault, and the debug fault hasn't yet been able to
+ * clear up the stack. So we first check whether we got an
+ * NMI on the sysenter entry path, but after that we need to
+ * check whether we got an NMI on the debug path where the debug
+ * fault happened on the sysenter path.
+ */
+ENTRY(nmi)
+ cmpl $sysenter_entry,(%esp)
+ je nmi_stack_fixup
+ pushl %eax
+ movl %esp,%eax
+ /* Do not access memory above the end of our stack page,
+ * it might not exist.
+ */
+ andl $(THREAD_SIZE-1),%eax
+ cmpl $(THREAD_SIZE-20),%eax
+ popl %eax
+ jae nmi_stack_correct
+ cmpl $sysenter_entry,12(%esp)
+ je nmi_debug_stack_check
+nmi_stack_correct:
+ pushl %eax
+ SAVE_ALL
+ xorl %edx,%edx # zero error code
+ movl %esp,%eax # pt_regs pointer
+ call do_nmi
+ RESTORE_ALL
+
+nmi_stack_fixup:
+ FIX_STACK(12,nmi_stack_correct, 1)
+ jmp nmi_stack_correct
+nmi_debug_stack_check:
+ cmpw $__KERNEL_CS,16(%esp)
+ jne nmi_stack_correct
+ cmpl $debug - 1,(%esp)
+ jle nmi_stack_correct
+ cmpl $debug_esp_fix_insn,(%esp)
+ jle nmi_debug_stack_fixup
+nmi_debug_stack_fixup:
+ FIX_STACK(24,nmi_stack_correct, 1)
+ jmp nmi_stack_correct
- jnz restore_all_enable_events
++#endif /* XEN */
+
+ENTRY(int3)
+ pushl $-1 # mark this as an int
+ SAVE_ALL
+ xorl %edx,%edx # zero error code
+ movl %esp,%eax # pt_regs pointer
+ call do_int3
+ testl %eax,%eax
- ENTRY(double_fault)
- pushl $do_double_fault
- jmp error_code
-
++ jnz restore_all
+ jmp ret_from_exception
+
+ENTRY(overflow)
+ pushl $0
+ pushl $do_overflow
+ jmp error_code
+
+ENTRY(bounds)
+ pushl $0
+ pushl $do_bounds
+ jmp error_code
+
+ENTRY(invalid_op)
+ pushl $0
+ pushl $do_invalid_op
+ jmp error_code
+
+ENTRY(coprocessor_segment_overrun)
+ pushl $0
+ pushl $do_coprocessor_segment_overrun
+ jmp error_code
+
- #define PAGE_FAULT_STUB(_name1, _name2) \
- ENTRY(_name1) \
- pushl %ds ; \
- pushl %eax ; \
- xorl %eax, %eax ; \
- pushl %ebp ; \
- pushl %edi ; \
- pushl %esi ; \
- pushl %edx ; \
- decl %eax /* eax = -1 */ ; \
- pushl %ecx ; \
- pushl %ebx ; \
- cld ; \
- movl %es,%edi ; \
- movl ES(%esp), %ecx /* get the faulting address */ ; \
- movl ORIG_EAX(%esp), %edx /* get the error code */ ; \
- movl %eax, ORIG_EAX(%esp) ; \
- movl %edi, ES(%esp) ; \
- movl $(__KERNEL_DS), %eax ; \
- movl %eax, %ds ; \
- movl %eax, %es ; \
- movl %esp,%eax /* pt_regs pointer */ ; \
- call _name2 ; \
- jmp ret_from_exception ;
- PAGE_FAULT_STUB(page_fault, do_page_fault)
+ENTRY(invalid_TSS)
+ pushl $do_invalid_TSS
+ jmp error_code
+
+ENTRY(segment_not_present)
+ pushl $do_segment_not_present
+ jmp error_code
+
+ENTRY(stack_segment)
+ pushl $do_stack_segment
+ jmp error_code
+
+ENTRY(general_protection)
+ pushl $do_general_protection
+ jmp error_code
+
+ENTRY(alignment_check)
+ pushl $do_alignment_check
+ jmp error_code
+
+# This handler is special, because it gets an extra value on its stack,
+# which is the linear faulting address.
+# fastcall register usage: %eax = pt_regs, %edx = error code,
+# %ecx = fault address
- .long sys_ni_syscall /* disable sys_vm86old */
++ENTRY(page_fault)
++ pushl %ds
++ pushl %eax
++ xorl %eax, %eax
++ pushl %ebp
++ pushl %edi
++ pushl %esi
++ pushl %edx
++ decl %eax /* eax = -1 */
++ pushl %ecx
++ pushl %ebx
++ cld
++ movl %es,%edi
++ movl ES(%esp), %ecx /* get the faulting address */
++ movl ORIG_EAX(%esp), %edx /* get the error code */
++ movl %eax, ORIG_EAX(%esp)
++ movl %edi, ES(%esp)
++ movl $(__KERNEL_DS),%eax
++ movl %eax, %ds
++ movl %eax, %es
++ movl %esp,%eax /* pt_regs pointer */
++ XEN_GET_VCPU_INFO(%esi)
++ XEN_SAVE_UPCALL_MASK(%esi,%bl,EVENT_MASK+12)
++ call do_page_fault
++ jmp ret_from_exception
+
+#ifdef CONFIG_X86_MCE
+ENTRY(machine_check)
+ pushl $0
+ pushl machine_check_vector
+ jmp error_code
+#endif
+
+ENTRY(fixup_4gb_segment)
+ pushl $do_fixup_4gb_segment
+ jmp error_code
+
+.data
+ENTRY(sys_call_table)
+ .long sys_restart_syscall /* 0 - old "setup()" system call, used for restarting */
+ .long sys_exit
+ .long sys_fork
+ .long sys_read
+ .long sys_write
+ .long sys_open /* 5 */
+ .long sys_close
+ .long sys_waitpid
+ .long sys_creat
+ .long sys_link
+ .long sys_unlink /* 10 */
+ .long sys_execve
+ .long sys_chdir
+ .long sys_time
+ .long sys_mknod
+ .long sys_chmod /* 15 */
+ .long sys_lchown16
+ .long sys_ni_syscall /* old break syscall holder */
+ .long sys_stat
+ .long sys_lseek
+ .long sys_getpid /* 20 */
+ .long sys_mount
+ .long sys_oldumount
+ .long sys_setuid16
+ .long sys_getuid16
+ .long sys_stime /* 25 */
+ .long sys_ptrace
+ .long sys_alarm
+ .long sys_fstat
+ .long sys_pause
+ .long sys_utime /* 30 */
+ .long sys_ni_syscall /* old stty syscall holder */
+ .long sys_ni_syscall /* old gtty syscall holder */
+ .long sys_access
+ .long sys_nice
+ .long sys_ni_syscall /* 35 - old ftime syscall holder */
+ .long sys_sync
+ .long sys_kill
+ .long sys_rename
+ .long sys_mkdir
+ .long sys_rmdir /* 40 */
+ .long sys_dup
+ .long sys_pipe
+ .long sys_times
+ .long sys_ni_syscall /* old prof syscall holder */
+ .long sys_brk /* 45 */
+ .long sys_setgid16
+ .long sys_getgid16
+ .long sys_signal
+ .long sys_geteuid16
+ .long sys_getegid16 /* 50 */
+ .long sys_acct
+ .long sys_umount /* recycled never used phys() */
+ .long sys_ni_syscall /* old lock syscall holder */
+ .long sys_ioctl
+ .long sys_fcntl /* 55 */
+ .long sys_ni_syscall /* old mpx syscall holder */
+ .long sys_setpgid
+ .long sys_ni_syscall /* old ulimit syscall holder */
+ .long sys_olduname
+ .long sys_umask /* 60 */
+ .long sys_chroot
+ .long sys_ustat
+ .long sys_dup2
+ .long sys_getppid
+ .long sys_getpgrp /* 65 */
+ .long sys_setsid
+ .long sys_sigaction
+ .long sys_sgetmask
+ .long sys_ssetmask
+ .long sys_setreuid16 /* 70 */
+ .long sys_setregid16
+ .long sys_sigsuspend
+ .long sys_sigpending
+ .long sys_sethostname
+ .long sys_setrlimit /* 75 */
+ .long sys_old_getrlimit
+ .long sys_getrusage
+ .long sys_gettimeofday
+ .long sys_settimeofday
+ .long sys_getgroups16 /* 80 */
+ .long sys_setgroups16
+ .long old_select
+ .long sys_symlink
+ .long sys_lstat
+ .long sys_readlink /* 85 */
+ .long sys_uselib
+ .long sys_swapon
+ .long sys_reboot
+ .long old_readdir
+ .long old_mmap /* 90 */
+ .long sys_munmap
+ .long sys_truncate
+ .long sys_ftruncate
+ .long sys_fchmod
+ .long sys_fchown16 /* 95 */
+ .long sys_getpriority
+ .long sys_setpriority
+ .long sys_ni_syscall /* old profil syscall holder */
+ .long sys_statfs
+ .long sys_fstatfs /* 100 */
+ .long sys_ioperm
+ .long sys_socketcall
+ .long sys_syslog
+ .long sys_setitimer
+ .long sys_getitimer /* 105 */
+ .long sys_newstat
+ .long sys_newlstat
+ .long sys_newfstat
+ .long sys_uname
+ .long sys_iopl /* 110 */
+ .long sys_vhangup
+ .long sys_ni_syscall /* old "idle" system call */
++ .long sys_vm86old
+ .long sys_wait4
+ .long sys_swapoff /* 115 */
+ .long sys_sysinfo
+ .long sys_ipc
+ .long sys_fsync
+ .long sys_sigreturn
+ .long sys_clone /* 120 */
+ .long sys_setdomainname
+ .long sys_newuname
+ .long sys_modify_ldt
+ .long sys_adjtimex
+ .long sys_mprotect /* 125 */
+ .long sys_sigprocmask
+ .long sys_ni_syscall /* old "create_module" */
+ .long sys_init_module
+ .long sys_delete_module
+ .long sys_ni_syscall /* 130: old "get_kernel_syms" */
+ .long sys_quotactl
+ .long sys_getpgid
+ .long sys_fchdir
+ .long sys_bdflush
+ .long sys_sysfs /* 135 */
+ .long sys_personality
+ .long sys_ni_syscall /* reserved for afs_syscall */
+ .long sys_setfsuid16
+ .long sys_setfsgid16
+ .long sys_llseek /* 140 */
+ .long sys_getdents
+ .long sys_select
+ .long sys_flock
+ .long sys_msync
+ .long sys_readv /* 145 */
+ .long sys_writev
+ .long sys_getsid
+ .long sys_fdatasync
+ .long sys_sysctl
+ .long sys_mlock /* 150 */
+ .long sys_munlock
+ .long sys_mlockall
+ .long sys_munlockall
+ .long sys_sched_setparam
+ .long sys_sched_getparam /* 155 */
+ .long sys_sched_setscheduler
+ .long sys_sched_getscheduler
+ .long sys_sched_yield
+ .long sys_sched_get_priority_max
+ .long sys_sched_get_priority_min /* 160 */
+ .long sys_sched_rr_get_interval
+ .long sys_nanosleep
+ .long sys_mremap
+ .long sys_setresuid16
+ .long sys_getresuid16 /* 165 */
+ .long sys_vm86
+ .long sys_ni_syscall /* Old sys_query_module */
+ .long sys_poll
+ .long sys_nfsservctl
+ .long sys_setresgid16 /* 170 */
+ .long sys_getresgid16
+ .long sys_prctl
+ .long sys_rt_sigreturn
+ .long sys_rt_sigaction
+ .long sys_rt_sigprocmask /* 175 */
+ .long sys_rt_sigpending
+ .long sys_rt_sigtimedwait
+ .long sys_rt_sigqueueinfo
+ .long sys_rt_sigsuspend
+ .long sys_pread64 /* 180 */
+ .long sys_pwrite64
+ .long sys_chown16
+ .long sys_getcwd
+ .long sys_capget
+ .long sys_capset /* 185 */
+ .long sys_sigaltstack
+ .long sys_sendfile
+ .long sys_ni_syscall /* reserved for streams1 */
+ .long sys_ni_syscall /* reserved for streams2 */
+ .long sys_vfork /* 190 */
+ .long sys_getrlimit
+ .long sys_mmap2
+ .long sys_truncate64
+ .long sys_ftruncate64
+ .long sys_stat64 /* 195 */
+ .long sys_lstat64
+ .long sys_fstat64
+ .long sys_lchown
+ .long sys_getuid
+ .long sys_getgid /* 200 */
+ .long sys_geteuid
+ .long sys_getegid
+ .long sys_setreuid
+ .long sys_setregid
+ .long sys_getgroups /* 205 */
+ .long sys_setgroups
+ .long sys_fchown
+ .long sys_setresuid
+ .long sys_getresuid
+ .long sys_setresgid /* 210 */
+ .long sys_getresgid
+ .long sys_chown
+ .long sys_setuid
+ .long sys_setgid
+ .long sys_setfsuid /* 215 */
+ .long sys_setfsgid
+ .long sys_pivot_root
+ .long sys_mincore
+ .long sys_madvise
+ .long sys_getdents64 /* 220 */
+ .long sys_fcntl64
+ .long sys_ni_syscall /* reserved for TUX */
+ .long sys_ni_syscall
+ .long sys_gettid
+ .long sys_readahead /* 225 */
+ .long sys_setxattr
+ .long sys_lsetxattr
+ .long sys_fsetxattr
+ .long sys_getxattr
+ .long sys_lgetxattr /* 230 */
+ .long sys_fgetxattr
+ .long sys_listxattr
+ .long sys_llistxattr
+ .long sys_flistxattr
+ .long sys_removexattr /* 235 */
+ .long sys_lremovexattr
+ .long sys_fremovexattr
+ .long sys_tkill
+ .long sys_sendfile64
+ .long sys_futex /* 240 */
+ .long sys_sched_setaffinity
+ .long sys_sched_getaffinity
+ .long sys_set_thread_area
+ .long sys_get_thread_area
+ .long sys_io_setup /* 245 */
+ .long sys_io_destroy
+ .long sys_io_getevents
+ .long sys_io_submit
+ .long sys_io_cancel
+ .long sys_fadvise64 /* 250 */
+ .long sys_ni_syscall
+ .long sys_exit_group
+ .long sys_lookup_dcookie
+ .long sys_epoll_create
+ .long sys_epoll_ctl /* 255 */
+ .long sys_epoll_wait
+ .long sys_remap_file_pages
+ .long sys_set_tid_address
+ .long sys_timer_create
+ .long sys_timer_settime /* 260 */
+ .long sys_timer_gettime
+ .long sys_timer_getoverrun
+ .long sys_timer_delete
+ .long sys_clock_settime
+ .long sys_clock_gettime /* 265 */
+ .long sys_clock_getres
+ .long sys_clock_nanosleep
+ .long sys_statfs64
+ .long sys_fstatfs64
+ .long sys_tgkill /* 270 */
+ .long sys_utimes
+ .long sys_fadvise64_64
+ .long sys_ni_syscall /* sys_vserver */
+ .long sys_mbind
+ .long sys_get_mempolicy
+ .long sys_set_mempolicy
+ .long sys_mq_open
+ .long sys_mq_unlink
+ .long sys_mq_timedsend
+ .long sys_mq_timedreceive /* 280 */
+ .long sys_mq_notify
+ .long sys_mq_getsetattr
+ .long sys_ni_syscall /* reserved for kexec */
+ .long sys_waitid
+ .long sys_ni_syscall /* 285 */ /* available */
+ .long sys_add_key
+ .long sys_request_key
+ .long sys_keyctl
+
+syscall_table_size=(.-sys_call_table)
--- /dev/null
- unsigned long phys_lp;
+/*
+ * linux/kernel/ldt.c
+ *
+ * Copyright (C) 1992 Krishna Balasubramanian and Linus Torvalds
+ * Copyright (C) 1999 Ingo Molnar <mingo@redhat.com>
+ */
+
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/vmalloc.h>
+#include <linux/slab.h>
+
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <asm/ldt.h>
+#include <asm/desc.h>
+
+#ifdef CONFIG_SMP /* avoids "defined but not used" warnig */
+static void flush_ldt(void *null)
+{
+ if (current->active_mm) {
+ load_LDT(¤t->active_mm->context);
+ flush_page_update_queue();
+ }
+}
+#endif
+
+static int alloc_ldt(mm_context_t *pc, int mincount, int reload)
+{
+ void *oldldt;
+ void *newldt;
+ int oldsize;
+
+ if (mincount <= pc->size)
+ return 0;
+ oldsize = pc->size;
+ mincount = (mincount+511)&(~511);
+ if (mincount*LDT_ENTRY_SIZE > PAGE_SIZE)
+ newldt = vmalloc(mincount*LDT_ENTRY_SIZE);
+ else
+ newldt = kmalloc(mincount*LDT_ENTRY_SIZE, GFP_KERNEL);
+
+ if (!newldt)
+ return -ENOMEM;
+
+ if (oldsize)
+ memcpy(newldt, pc->ldt, oldsize*LDT_ENTRY_SIZE);
+ oldldt = pc->ldt;
+ memset(newldt+oldsize*LDT_ENTRY_SIZE, 0, (mincount-oldsize)*LDT_ENTRY_SIZE);
+ pc->ldt = newldt;
+ wmb();
+ pc->size = mincount;
+ wmb();
+
+ if (reload) {
+#ifdef CONFIG_SMP
+ cpumask_t mask;
+ preempt_disable();
+#endif
+ make_pages_readonly(pc->ldt, (pc->size * LDT_ENTRY_SIZE) /
+ PAGE_SIZE);
+ load_LDT(pc);
+ flush_page_update_queue();
+#ifdef CONFIG_SMP
+ mask = cpumask_of_cpu(smp_processor_id());
+ if (!cpus_equal(current->mm->cpu_vm_mask, mask))
+ smp_call_function(flush_ldt, NULL, 1, 1);
+ preempt_enable();
+#endif
+ }
+ if (oldsize) {
+ make_pages_writable(oldldt, (oldsize * LDT_ENTRY_SIZE) /
+ PAGE_SIZE);
+ flush_page_update_queue();
+ if (oldsize*LDT_ENTRY_SIZE > PAGE_SIZE)
+ vfree(oldldt);
+ else
+ kfree(oldldt);
+ }
+ return 0;
+}
+
+static inline int copy_ldt(mm_context_t *new, mm_context_t *old)
+{
+ int err = alloc_ldt(new, old->size, 0);
+ if (err < 0)
+ return err;
+ memcpy(new->ldt, old->ldt, old->size*LDT_ENTRY_SIZE);
+ make_pages_readonly(new->ldt, (new->size * LDT_ENTRY_SIZE) /
+ PAGE_SIZE);
+ flush_page_update_queue();
+ return 0;
+}
+
+/*
+ * we do not have to muck with descriptors here, that is
+ * done in switch_mm() as needed.
+ */
+int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
+{
+ struct mm_struct * old_mm;
+ int retval = 0;
+
+ init_MUTEX(&mm->context.sem);
+ mm->context.size = 0;
+ old_mm = current->mm;
+ if (old_mm && old_mm->context.size > 0) {
+ down(&old_mm->context.sem);
+ retval = copy_ldt(&mm->context, &old_mm->context);
+ up(&old_mm->context.sem);
+ }
+ return retval;
+}
+
+/*
+ * No need to lock the MM as we are the last user
+ */
+void destroy_context(struct mm_struct *mm)
+{
+ if (mm->context.size) {
+ if (mm == current->active_mm)
+ clear_LDT();
+ make_pages_writable(mm->context.ldt,
+ (mm->context.size * LDT_ENTRY_SIZE) /
+ PAGE_SIZE);
+ flush_page_update_queue();
+ if (mm->context.size*LDT_ENTRY_SIZE > PAGE_SIZE)
+ vfree(mm->context.ldt);
+ else
+ kfree(mm->context.ldt);
+ mm->context.size = 0;
+ }
+}
+
+static int read_ldt(void __user * ptr, unsigned long bytecount)
+{
+ int err;
+ unsigned long size;
+ struct mm_struct * mm = current->mm;
+
+ if (!mm->context.size)
+ return 0;
+ if (bytecount > LDT_ENTRY_SIZE*LDT_ENTRIES)
+ bytecount = LDT_ENTRY_SIZE*LDT_ENTRIES;
+
+ down(&mm->context.sem);
+ size = mm->context.size*LDT_ENTRY_SIZE;
+ if (size > bytecount)
+ size = bytecount;
+
+ err = 0;
+ if (copy_to_user(ptr, mm->context.ldt, size))
+ err = -EFAULT;
+ up(&mm->context.sem);
+ if (err < 0)
+ goto error_return;
+ if (size != bytecount) {
+ /* zero-fill the rest */
+ if (clear_user(ptr+size, bytecount-size) != 0) {
+ err = -EFAULT;
+ goto error_return;
+ }
+ }
+ return bytecount;
+error_return:
+ return err;
+}
+
+static int read_default_ldt(void __user * ptr, unsigned long bytecount)
+{
+ int err;
+ unsigned long size;
+ void *address;
+
+ err = 0;
+ address = &default_ldt[0];
+ size = 5*sizeof(struct desc_struct);
+ if (size > bytecount)
+ size = bytecount;
+
+ err = size;
+ if (copy_to_user(ptr, address, size))
+ err = -EFAULT;
+
+ return err;
+}
+
+static int write_ldt(void __user * ptr, unsigned long bytecount, int oldmode)
+{
+ struct mm_struct * mm = current->mm;
+ __u32 entry_1, entry_2, *lp;
- phys_lp = arbitrary_virt_to_phys(lp);
++ unsigned long mach_lp;
+ int error;
+ struct user_desc ldt_info;
+
+ error = -EINVAL;
+ if (bytecount != sizeof(ldt_info))
+ goto out;
+ error = -EFAULT;
+ if (copy_from_user(&ldt_info, ptr, sizeof(ldt_info)))
+ goto out;
+
+ error = -EINVAL;
+ if (ldt_info.entry_number >= LDT_ENTRIES)
+ goto out;
+ if (ldt_info.contents == 3) {
+ if (oldmode)
+ goto out;
+ if (ldt_info.seg_not_present == 0)
+ goto out;
+ }
+
+ down(&mm->context.sem);
+ if (ldt_info.entry_number >= mm->context.size) {
+ error = alloc_ldt(¤t->mm->context, ldt_info.entry_number+1, 1);
+ if (error < 0)
+ goto out_unlock;
+ }
+
+ lp = (__u32 *) ((ldt_info.entry_number << 3) + (char *) mm->context.ldt);
- error = HYPERVISOR_update_descriptor(phys_lp, entry_1, entry_2);
++ mach_lp = arbitrary_virt_to_machine(lp);
+
+ /* Allow LDTs to be cleared by the user. */
+ if (ldt_info.base_addr == 0 && ldt_info.limit == 0) {
+ if (oldmode || LDT_empty(&ldt_info)) {
+ entry_1 = 0;
+ entry_2 = 0;
+ goto install;
+ }
+ }
+
+ entry_1 = LDT_entry_a(&ldt_info);
+ entry_2 = LDT_entry_b(&ldt_info);
+ if (oldmode)
+ entry_2 &= ~(1 << 20);
+
+ /* Install the new entry ... */
+install:
++ error = HYPERVISOR_update_descriptor(mach_lp, entry_1, entry_2);
+
+out_unlock:
+ up(&mm->context.sem);
+out:
+ return error;
+}
+
+asmlinkage int sys_modify_ldt(int func, void __user *ptr, unsigned long bytecount)
+{
+ int ret = -ENOSYS;
+
+ switch (func) {
+ case 0:
+ ret = read_ldt(ptr, bytecount);
+ break;
+ case 1:
+ ret = write_ldt(ptr, bytecount, 1);
+ break;
+ case 2:
+ ret = read_default_ldt(ptr, bytecount);
+ break;
+ case 0x11:
+ ret = write_ldt(ptr, bytecount, 0);
+ break;
+ }
+ return ret;
+}
--- /dev/null
+/*
+ * linux/arch/i386/kernel/process.c
+ *
+ * Copyright (C) 1995 Linus Torvalds
+ *
+ * Pentium III FXSR, SSE support
+ * Gareth Hughes <gareth@valinux.com>, May 2000
+ */
+
+/*
+ * This file handles the architecture-dependent parts of process handling..
+ */
+
+#include <stdarg.h>
+
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/fs.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/elfcore.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/stddef.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/user.h>
+#include <linux/a.out.h>
+#include <linux/interrupt.h>
+#include <linux/config.h>
+#include <linux/utsname.h>
+#include <linux/delay.h>
+#include <linux/reboot.h>
+#include <linux/init.h>
+#include <linux/mc146818rtc.h>
+#include <linux/module.h>
+#include <linux/kallsyms.h>
+#include <linux/ptrace.h>
+
+#include <asm/uaccess.h>
+#include <asm/pgtable.h>
+#include <asm/system.h>
+#include <asm/io.h>
+#include <asm/ldt.h>
+#include <asm/processor.h>
+#include <asm/i387.h>
+#include <asm/irq.h>
+#include <asm/desc.h>
+#include <asm-xen/multicall.h>
+#include <asm-xen/xen-public/dom0_ops.h>
+#ifdef CONFIG_MATH_EMULATION
+#include <asm/math_emu.h>
+#endif
+
+#include <linux/irq.h>
+#include <linux/err.h>
+
+asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
+
+int hlt_counter;
+
+unsigned long boot_option_idle_override = 0;
+EXPORT_SYMBOL(boot_option_idle_override);
+
+/*
+ * Return saved PC of a blocked thread.
+ */
+unsigned long thread_saved_pc(struct task_struct *tsk)
+{
+ return ((unsigned long *)tsk->thread.esp)[3];
+}
+
+/*
+ * Powermanagement idle function, if any..
+ */
+void (*pm_idle)(void);
+
+void disable_hlt(void)
+{
+ hlt_counter++;
+}
+
+EXPORT_SYMBOL(disable_hlt);
+
+void enable_hlt(void)
+{
+ hlt_counter--;
+}
+
+EXPORT_SYMBOL(enable_hlt);
+
+/* XXX XEN doesn't use default_idle(), poll_idle(). Use xen_idle() instead. */
+extern int set_timeout_timer(void);
+void xen_idle(void)
+{
+ int cpu = smp_processor_id();
+
+ local_irq_disable();
+
+ if (rcu_pending(cpu))
+ rcu_check_callbacks(cpu, 0);
+
+ if (need_resched()) {
+ local_irq_enable();
+ } else if (set_timeout_timer() == 0) {
+ /* NB. Blocking reenable events in a race-free manner. */
+ HYPERVISOR_block();
+ } else {
+ local_irq_enable();
+ HYPERVISOR_yield();
+ }
+}
+
+/*
+ * The idle thread. There's no useful work to be
+ * done, so just try to conserve power and have a
+ * low exit latency (ie sit in a loop waiting for
+ * somebody to say that they'd like to reschedule)
+ */
+void cpu_idle (void)
+{
+ /* endless idle loop with no priority at all */
+ while (1) {
+ while (!need_resched()) {
+ /*
+ * Mark this as an RCU critical section so that
+ * synchronize_kernel() in the unload path waits
+ * for our completion.
+ */
+ rcu_read_lock();
+ irq_stat[smp_processor_id()].idle_timestamp = jiffies;
+ xen_idle();
+ rcu_read_unlock();
+ }
+ schedule();
+ }
+}
+
+/* XXX XEN doesn't use mwait_idle(), select_idle_routine(), idle_setup(). */
+/* Always use xen_idle() instead. */
+void __init select_idle_routine(const struct cpuinfo_x86 *c) {}
+
+void show_regs(struct pt_regs * regs)
+{
+ printk("\n");
+ printk("Pid: %d, comm: %20s\n", current->pid, current->comm);
+ printk("EIP: %04x:[<%08lx>] CPU: %d\n",0xffff & regs->xcs,regs->eip, smp_processor_id());
+ print_symbol("EIP is at %s\n", regs->eip);
+
+ if (regs->xcs & 2)
+ printk(" ESP: %04x:%08lx",0xffff & regs->xss,regs->esp);
+ printk(" EFLAGS: %08lx %s (%s)\n",
+ regs->eflags, print_tainted(),UTS_RELEASE);
+ printk("EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n",
+ regs->eax,regs->ebx,regs->ecx,regs->edx);
+ printk("ESI: %08lx EDI: %08lx EBP: %08lx",
+ regs->esi, regs->edi, regs->ebp);
+ printk(" DS: %04x ES: %04x\n",
+ 0xffff & regs->xds,0xffff & regs->xes);
+
+ show_trace(NULL, ®s->esp);
+}
+
+/*
+ * This gets run with %ebx containing the
+ * function to call, and %edx containing
+ * the "args".
+ */
+extern void kernel_thread_helper(void);
+__asm__(".section .text\n"
+ ".align 4\n"
+ "kernel_thread_helper:\n\t"
+ "movl %edx,%eax\n\t"
+ "pushl %edx\n\t"
+ "call *%ebx\n\t"
+ "pushl %eax\n\t"
+ "call do_exit\n"
+ ".previous");
+
+/*
+ * Create a kernel thread
+ */
+int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
+{
+ struct pt_regs regs;
+
+ memset(®s, 0, sizeof(regs));
+
+ regs.ebx = (unsigned long) fn;
+ regs.edx = (unsigned long) arg;
+
+ regs.xds = __USER_DS;
+ regs.xes = __USER_DS;
+ regs.orig_eax = -1;
+ regs.eip = (unsigned long) kernel_thread_helper;
+ regs.xcs = __KERNEL_CS;
+ regs.eflags = X86_EFLAGS_IF | X86_EFLAGS_SF | X86_EFLAGS_PF | 0x2;
+
+ /* Ok, create the new process.. */
+ return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, ®s, 0, NULL, NULL);
+}
+
+/*
+ * Free current thread data structures etc..
+ */
+void exit_thread(void)
+{
+ struct task_struct *tsk = current;
+ struct thread_struct *t = &tsk->thread;
+
+ /* The process may have allocated an io port bitmap... nuke it. */
+ if (unlikely(NULL != t->io_bitmap_ptr)) {
+ int cpu = get_cpu();
+ struct tss_struct *tss = &per_cpu(init_tss, cpu);
+
+ kfree(t->io_bitmap_ptr);
+ t->io_bitmap_ptr = NULL;
+ /*
+ * Careful, clear this in the TSS too:
+ */
+ memset(tss->io_bitmap, 0xff, tss->io_bitmap_max);
+ t->io_bitmap_max = 0;
+ tss->io_bitmap_owner = NULL;
+ tss->io_bitmap_max = 0;
+ tss->io_bitmap_base = INVALID_IO_BITMAP_OFFSET;
+ put_cpu();
+ }
+}
+
+void flush_thread(void)
+{
+ struct task_struct *tsk = current;
+
+ memset(tsk->thread.debugreg, 0, sizeof(unsigned long)*8);
+ memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
+ /*
+ * Forget coprocessor state..
+ */
+ clear_fpu(tsk);
+ tsk->used_math = 0;
+}
+
+void release_thread(struct task_struct *dead_task)
+{
+ if (dead_task->mm) {
+ // temporary debugging check
+ if (dead_task->mm->context.size) {
+ printk("WARNING: dead process %8s still has LDT? <%p/%d>\n",
+ dead_task->comm,
+ dead_task->mm->context.ldt,
+ dead_task->mm->context.size);
+ BUG();
+ }
+ }
+
+ release_vm86_irqs(dead_task);
+}
+
+/*
+ * This gets called before we allocate a new thread and copy
+ * the current task into it.
+ */
+void prepare_to_copy(struct task_struct *tsk)
+{
+ unlazy_fpu(tsk);
+}
+
+int copy_thread(int nr, unsigned long clone_flags, unsigned long esp,
+ unsigned long unused,
+ struct task_struct * p, struct pt_regs * regs)
+{
+ struct pt_regs * childregs;
+ struct task_struct *tsk;
+ int err;
+ unsigned long eflags;
+
+ childregs = ((struct pt_regs *) (THREAD_SIZE + (unsigned long) p->thread_info)) - 1;
+ *childregs = *regs;
+ childregs->eax = 0;
+ childregs->esp = esp;
+
+ p->thread.esp = (unsigned long) childregs;
+ p->thread.esp0 = (unsigned long) (childregs+1);
+
+ p->thread.eip = (unsigned long) ret_from_fork;
+
+ savesegment(fs,p->thread.fs);
+ savesegment(gs,p->thread.gs);
+
+ tsk = current;
+ if (unlikely(NULL != tsk->thread.io_bitmap_ptr)) {
+ p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
+ if (!p->thread.io_bitmap_ptr) {
+ p->thread.io_bitmap_max = 0;
+ return -ENOMEM;
+ }
+ memcpy(p->thread.io_bitmap_ptr, tsk->thread.io_bitmap_ptr,
+ IO_BITMAP_BYTES);
+ }
+
+ /*
+ * Set a new TLS for the child thread?
+ */
+ if (clone_flags & CLONE_SETTLS) {
+ struct desc_struct *desc;
+ struct user_desc info;
+ int idx;
+
+ err = -EFAULT;
+ if (copy_from_user(&info, (void __user *)childregs->esi, sizeof(info)))
+ goto out;
+ err = -EINVAL;
+ if (LDT_empty(&info))
+ goto out;
+
+ idx = info.entry_number;
+ if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
+ goto out;
+
+ desc = p->thread.tls_array + idx - GDT_ENTRY_TLS_MIN;
+ desc->a = LDT_entry_a(&info);
+ desc->b = LDT_entry_b(&info);
+ }
+
+
+ __asm__ __volatile__ ( "pushfl; popl %0" : "=r" (eflags) : );
+ p->thread.io_pl = (eflags >> 12) & 3;
+
+ err = 0;
+ out:
+ if (err && p->thread.io_bitmap_ptr) {
+ kfree(p->thread.io_bitmap_ptr);
+ p->thread.io_bitmap_max = 0;
+ }
+ return err;
+}
+
+/*
+ * fill in the user structure for a core dump..
+ */
+void dump_thread(struct pt_regs * regs, struct user * dump)
+{
+ int i;
+
+/* changed the size calculations - should hopefully work better. lbt */
+ dump->magic = CMAGIC;
+ dump->start_code = 0;
+ dump->start_stack = regs->esp & ~(PAGE_SIZE - 1);
+ dump->u_tsize = ((unsigned long) current->mm->end_code) >> PAGE_SHIFT;
+ dump->u_dsize = ((unsigned long) (current->mm->brk + (PAGE_SIZE-1))) >> PAGE_SHIFT;
+ dump->u_dsize -= dump->u_tsize;
+ dump->u_ssize = 0;
+ for (i = 0; i < 8; i++)
+ dump->u_debugreg[i] = current->thread.debugreg[i];
+
+ if (dump->start_stack < TASK_SIZE)
+ dump->u_ssize = ((unsigned long) (TASK_SIZE - dump->start_stack)) >> PAGE_SHIFT;
+
+ dump->regs.ebx = regs->ebx;
+ dump->regs.ecx = regs->ecx;
+ dump->regs.edx = regs->edx;
+ dump->regs.esi = regs->esi;
+ dump->regs.edi = regs->edi;
+ dump->regs.ebp = regs->ebp;
+ dump->regs.eax = regs->eax;
+ dump->regs.ds = regs->xds;
+ dump->regs.es = regs->xes;
+ savesegment(fs,dump->regs.fs);
+ savesegment(gs,dump->regs.gs);
+ dump->regs.orig_eax = regs->orig_eax;
+ dump->regs.eip = regs->eip;
+ dump->regs.cs = regs->xcs;
+ dump->regs.eflags = regs->eflags;
+ dump->regs.esp = regs->esp;
+ dump->regs.ss = regs->xss;
+
+ dump->u_fpvalid = dump_fpu (regs, &dump->i387);
+}
+
+/*
+ * Capture the user space registers if the task is not running (in user space)
+ */
+int dump_task_regs(struct task_struct *tsk, elf_gregset_t *regs)
+{
+ struct pt_regs ptregs;
+
+ ptregs = *(struct pt_regs *)
+ ((unsigned long)tsk->thread_info+THREAD_SIZE - sizeof(ptregs));
+ ptregs.xcs &= 0xffff;
+ ptregs.xds &= 0xffff;
+ ptregs.xes &= 0xffff;
+ ptregs.xss &= 0xffff;
+
+ elf_core_copy_regs(regs, &ptregs);
+
+ boot_option_idle_override = 1;
+ return 1;
+}
+
+static inline void
+handle_io_bitmap(struct thread_struct *next, struct tss_struct *tss)
+{
+ if (!next->io_bitmap_ptr) {
+ /*
+ * Disable the bitmap via an invalid offset. We still cache
+ * the previous bitmap owner and the IO bitmap contents:
+ */
+ tss->io_bitmap_base = INVALID_IO_BITMAP_OFFSET;
+ return;
+ }
+ if (likely(next == tss->io_bitmap_owner)) {
+ /*
+ * Previous owner of the bitmap (hence the bitmap content)
+ * matches the next task, we dont have to do anything but
+ * to set a valid offset in the TSS:
+ */
+ tss->io_bitmap_base = IO_BITMAP_OFFSET;
+ return;
+ }
+ /*
+ * Lazy TSS's I/O bitmap copy. We set an invalid offset here
+ * and we let the task to get a GPF in case an I/O instruction
+ * is performed. The handler of the GPF will verify that the
+ * faulting task has a valid I/O bitmap and, it true, does the
+ * real copy and restart the instruction. This will save us
+ * redundant copies when the currently switched task does not
+ * perform any I/O during its timeslice.
+ */
+ tss->io_bitmap_base = INVALID_IO_BITMAP_OFFSET_LAZY;
+}
+/*
+ * This special macro can be used to load a debugging register
+ */
+#define loaddebug(thread,register) \
+ HYPERVISOR_set_debugreg((register), \
+ (thread->debugreg[register]))
+
+/*
+ * switch_to(x,yn) should switch tasks from x to y.
+ *
+ * We fsave/fwait so that an exception goes off at the right time
+ * (as a call from the fsave or fwait in effect) rather than to
+ * the wrong process. Lazy FP saving no longer makes any sense
+ * with modern CPU's, and this simplifies a lot of things (SMP
+ * and UP become the same).
+ *
+ * NOTE! We used to use the x86 hardware context switching. The
+ * reason for not using it any more becomes apparent when you
+ * try to recover gracefully from saved state that is no longer
+ * valid (stale segment register values in particular). With the
+ * hardware task-switch, there is no way to fix up bad state in
+ * a reasonable manner.
+ *
+ * The fact that Intel documents the hardware task-switching to
+ * be slow is a fairly red herring - this code is not noticeably
+ * faster. However, there _is_ some room for improvement here,
+ * so the performance issues may eventually be a valid point.
+ * More important, however, is the fact that this allows us much
+ * more flexibility.
+ *
+ * The return value (in %eax) will be the "prev" task after
+ * the task-switch, and shows up in ret_from_fork in entry.S,
+ * for example.
+ */
+struct task_struct fastcall * __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
+{
+ struct thread_struct *prev = &prev_p->thread,
+ *next = &next_p->thread;
+ int cpu = smp_processor_id();
+ struct tss_struct *tss = &per_cpu(init_tss, cpu);
+ dom0_op_t op;
+
+ /* NB. No need to disable interrupts as already done in sched.c */
+ /* __cli(); */
+
+ /*
+ * Save away %fs and %gs. No need to save %es and %ds, as
+ * those are always kernel segments while inside the kernel.
+ */
+ asm volatile("movl %%fs,%0":"=m" (*(int *)&prev->fs));
+ asm volatile("movl %%gs,%0":"=m" (*(int *)&prev->gs));
+
+ /*
+ * We clobber FS and GS here so that we avoid a GPF when
+ * restoring previous task's FS/GS values in Xen when the LDT
+ * is switched. If we don't do this then we can end up
+ * erroneously re-flushing the page-update queue when we
+ * 'execute_multicall_list'.
+ */
+ __asm__ __volatile__ (
+ "xorl %%eax,%%eax; movl %%eax,%%fs; movl %%eax,%%gs" : : :
+ "eax" );
+
+ MULTICALL_flush_page_update_queue();
+
+ /* never put a printk in __switch_to... printk() calls wake_up*() indirectly */
+
+ /*
+ * This is basically '__unlazy_fpu', except that we queue a
+ * multicall to indicate FPU task switch, rather than
+ * synchronously trapping to Xen.
+ */
+ if (prev_p->thread_info->status & TS_USEDFPU) {
+ save_init_fpu(prev_p);
+ queue_multicall0(__HYPERVISOR_fpu_taskswitch);
+ }
+
+ /*
+ * Reload esp0, LDT and the page table pointer:
+ * This is load_esp0(tss, next) with a multicall.
+ */
+ tss->esp0 = next->esp0;
+ /* This can only happen when SEP is enabled, no need to test
+ * "SEP"arately */
+ if (unlikely(tss->ss1 != next->sysenter_cs)) {
+ tss->ss1 = next->sysenter_cs;
+ wrmsr(MSR_IA32_SYSENTER_CS, next->sysenter_cs, 0);
+ }
+ queue_multicall2(__HYPERVISOR_stack_switch, tss->ss0, tss->esp0);
+
+ /*
+ * Load the per-thread Thread-Local Storage descriptor.
+ * This is load_TLS(next, cpu) with multicalls.
+ */
+#define C(i) do { \
+ if (unlikely(next->tls_array[i].a != prev->tls_array[i].a || \
+ next->tls_array[i].b != prev->tls_array[i].b)) \
+ queue_multicall3(__HYPERVISOR_update_descriptor, \
+ virt_to_machine(&get_cpu_gdt_table(cpu) \
+ [GDT_ENTRY_TLS_MIN + i]), \
+ ((u32 *)&next->tls_array[i])[0], \
+ ((u32 *)&next->tls_array[i])[1]); \
+} while (0)
+ C(0); C(1); C(2);
+#undef C
+
+ if (xen_start_info.flags & SIF_PRIVILEGED) {
+ op.cmd = DOM0_IOPL;
+ op.u.iopl.domain = DOMID_SELF;
+ op.u.iopl.iopl = next->io_pl;
++ op.interface_version = DOM0_INTERFACE_VERSION;
+ queue_multicall1(__HYPERVISOR_dom0_op, (unsigned long)&op);
+ }
+
+ /* EXECUTE ALL TASK SWITCH XEN SYSCALLS AT THIS POINT. */
+ execute_multicall_list();
+ /* __sti(); */
+
+ /*
+ * Restore %fs and %gs if needed.
+ */
+ if (unlikely(prev->fs | prev->gs | next->fs | next->gs)) {
+ loadsegment(fs, next->fs);
+ loadsegment(gs, next->gs);
+ }
+
+ /*
+ * Now maybe reload the debug registers
+ */
+ if (unlikely(next->debugreg[7])) {
+ loaddebug(next, 0);
+ loaddebug(next, 1);
+ loaddebug(next, 2);
+ loaddebug(next, 3);
+ /* no 4 and 5 */
+ loaddebug(next, 6);
+ loaddebug(next, 7);
+ }
+
+ if (unlikely(prev->io_bitmap_ptr || next->io_bitmap_ptr))
+ handle_io_bitmap(next, tss);
+
+ return prev_p;
+}
+
+asmlinkage int sys_fork(struct pt_regs regs)
+{
+ return do_fork(SIGCHLD, regs.esp, ®s, 0, NULL, NULL);
+}
+
+asmlinkage int sys_clone(struct pt_regs regs)
+{
+ unsigned long clone_flags;
+ unsigned long newsp;
+ int __user *parent_tidptr, *child_tidptr;
+
+ clone_flags = regs.ebx;
+ newsp = regs.ecx;
+ parent_tidptr = (int __user *)regs.edx;
+ child_tidptr = (int __user *)regs.edi;
+ if (!newsp)
+ newsp = regs.esp;
+ return do_fork(clone_flags, newsp, ®s, 0, parent_tidptr, child_tidptr);
+}
+
+/*
+ * This is trivial, and on the face of it looks like it
+ * could equally well be done in user mode.
+ *
+ * Not so, for quite unobvious reasons - register pressure.
+ * In user mode vfork() cannot have a stack frame, and if
+ * done by calling the "clone()" system call directly, you
+ * do not have enough call-clobbered registers to hold all
+ * the information you need.
+ */
+asmlinkage int sys_vfork(struct pt_regs regs)
+{
+ return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs.esp, ®s, 0, NULL, NULL);
+}
+
+/*
+ * sys_execve() executes a new program.
+ */
+asmlinkage int sys_execve(struct pt_regs regs)
+{
+ int error;
+ char * filename;
+
+ filename = getname((char __user *) regs.ebx);
+ error = PTR_ERR(filename);
+ if (IS_ERR(filename))
+ goto out;
+ error = do_execve(filename,
+ (char __user * __user *) regs.ecx,
+ (char __user * __user *) regs.edx,
+ ®s);
+ if (error == 0) {
+ task_lock(current);
+ current->ptrace &= ~PT_DTRACE;
+ task_unlock(current);
+ /* Make sure we don't return using sysenter.. */
+ set_thread_flag(TIF_IRET);
+ }
+ putname(filename);
+out:
+ return error;
+}
+
+#define top_esp (THREAD_SIZE - sizeof(unsigned long))
+#define top_ebp (THREAD_SIZE - 2*sizeof(unsigned long))
+
+unsigned long get_wchan(struct task_struct *p)
+{
+ unsigned long ebp, esp, eip;
+ unsigned long stack_page;
+ int count = 0;
+ if (!p || p == current || p->state == TASK_RUNNING)
+ return 0;
+ stack_page = (unsigned long)p->thread_info;
+ esp = p->thread.esp;
+ if (!stack_page || esp < stack_page || esp > top_esp+stack_page)
+ return 0;
+ /* include/asm-i386/system.h:switch_to() pushes ebp last. */
+ ebp = *(unsigned long *) esp;
+ do {
+ if (ebp < stack_page || ebp > top_ebp+stack_page)
+ return 0;
+ eip = *(unsigned long *) (ebp+4);
+ if (!in_sched_functions(eip))
+ return eip;
+ ebp = *(unsigned long *) ebp;
+ } while (count++ < 16);
+ return 0;
+}
+
+/*
+ * sys_alloc_thread_area: get a yet unused TLS descriptor index.
+ */
+static int get_free_idx(void)
+{
+ struct thread_struct *t = ¤t->thread;
+ int idx;
+
+ for (idx = 0; idx < GDT_ENTRY_TLS_ENTRIES; idx++)
+ if (desc_empty(t->tls_array + idx))
+ return idx + GDT_ENTRY_TLS_MIN;
+ return -ESRCH;
+}
+
+/*
+ * Set a given TLS descriptor:
+ */
+asmlinkage int sys_set_thread_area(struct user_desc __user *u_info)
+{
+ struct thread_struct *t = ¤t->thread;
+ struct user_desc info;
+ struct desc_struct *desc;
+ int cpu, idx;
+
+ if (copy_from_user(&info, u_info, sizeof(info)))
+ return -EFAULT;
+ idx = info.entry_number;
+
+ /*
+ * index -1 means the kernel should try to find and
+ * allocate an empty descriptor:
+ */
+ if (idx == -1) {
+ idx = get_free_idx();
+ if (idx < 0)
+ return idx;
+ if (put_user(idx, &u_info->entry_number))
+ return -EFAULT;
+ }
+
+ if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
+ return -EINVAL;
+
+ desc = t->tls_array + idx - GDT_ENTRY_TLS_MIN;
+
+ /*
+ * We must not get preempted while modifying the TLS.
+ */
+ cpu = get_cpu();
+
+ if (LDT_empty(&info)) {
+ desc->a = 0;
+ desc->b = 0;
+ } else {
+ desc->a = LDT_entry_a(&info);
+ desc->b = LDT_entry_b(&info);
+ }
+ load_TLS(t, cpu);
+
+ put_cpu();
+
+ return 0;
+}
+
+/*
+ * Get the current Thread-Local Storage area:
+ */
+
+#define GET_BASE(desc) ( \
+ (((desc)->a >> 16) & 0x0000ffff) | \
+ (((desc)->b << 16) & 0x00ff0000) | \
+ ( (desc)->b & 0xff000000) )
+
+#define GET_LIMIT(desc) ( \
+ ((desc)->a & 0x0ffff) | \
+ ((desc)->b & 0xf0000) )
+
+#define GET_32BIT(desc) (((desc)->b >> 22) & 1)
+#define GET_CONTENTS(desc) (((desc)->b >> 10) & 3)
+#define GET_WRITABLE(desc) (((desc)->b >> 9) & 1)
+#define GET_LIMIT_PAGES(desc) (((desc)->b >> 23) & 1)
+#define GET_PRESENT(desc) (((desc)->b >> 15) & 1)
+#define GET_USEABLE(desc) (((desc)->b >> 20) & 1)
+
+asmlinkage int sys_get_thread_area(struct user_desc __user *u_info)
+{
+ struct user_desc info;
+ struct desc_struct *desc;
+ int idx;
+
+ if (get_user(idx, &u_info->entry_number))
+ return -EFAULT;
+ if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
+ return -EINVAL;
+
+ desc = current->thread.tls_array + idx - GDT_ENTRY_TLS_MIN;
+
+ info.entry_number = idx;
+ info.base_addr = GET_BASE(desc);
+ info.limit = GET_LIMIT(desc);
+ info.seg_32bit = GET_32BIT(desc);
+ info.contents = GET_CONTENTS(desc);
+ info.read_exec_only = !GET_WRITABLE(desc);
+ info.limit_in_pages = GET_LIMIT_PAGES(desc);
+ info.seg_not_present = !GET_PRESENT(desc);
+ info.useable = GET_USEABLE(desc);
+
+ if (copy_to_user(u_info, &info, sizeof(info)))
+ return -EFAULT;
+ return 0;
+}
+
--- /dev/null
- code_resource.start = virt_to_phys(_text);
- code_resource.end = virt_to_phys(_etext)-1;
- data_resource.start = virt_to_phys(_etext);
- data_resource.end = virt_to_phys(_edata)-1;
+/*
+ * linux/arch/i386/kernel/setup.c
+ *
+ * Copyright (C) 1995 Linus Torvalds
+ *
+ * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
+ *
+ * Memory region support
+ * David Parsons <orc@pell.chi.il.us>, July-August 1999
+ *
+ * Added E820 sanitization routine (removes overlapping memory regions);
+ * Brian Moyle <bmoyle@mvista.com>, February 2001
+ *
+ * Moved CPU detection code to cpu/${cpu}.c
+ * Patrick Mochel <mochel@osdl.org>, March 2002
+ *
+ * Provisions for empty E820 memory regions (reported by certain BIOSes).
+ * Alex Achenbach <xela@slit.de>, December 2002.
+ *
+ */
+
+/*
+ * This file handles the architecture-dependent parts of initialization
+ */
+
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/tty.h>
+#include <linux/ioport.h>
+#include <linux/acpi.h>
+#include <linux/apm_bios.h>
+#include <linux/initrd.h>
+#include <linux/bootmem.h>
+#include <linux/seq_file.h>
+#include <linux/console.h>
+#include <linux/root_dev.h>
+#include <linux/highmem.h>
+#include <linux/module.h>
+#include <linux/efi.h>
+#include <linux/init.h>
+#include <linux/edd.h>
+#include <video/edid.h>
+#include <asm/e820.h>
+#include <asm/mpspec.h>
+#include <asm/setup.h>
+#include <asm/arch_hooks.h>
+#include <asm/sections.h>
+#include <asm/io_apic.h>
+#include <asm/ist.h>
+#include <asm/io.h>
+#include <asm-xen/hypervisor.h>
+#include "setup_arch_pre.h"
+#include <bios_ebda.h>
+
+int disable_pse __initdata = 0;
+
+/*
+ * Machine setup..
+ */
+
+#ifdef CONFIG_EFI
+int efi_enabled = 0;
+EXPORT_SYMBOL(efi_enabled);
+#endif
+
+/* cpu data as detected by the assembly code in head.S */
+struct cpuinfo_x86 new_cpu_data __initdata = { 0, 0, 0, 0, -1, 0, 1, 0, -1 };
+/* common cpu data for all cpus */
+struct cpuinfo_x86 boot_cpu_data = { 0, 0, 0, 0, -1, 0, 1, 0, -1 };
+
+unsigned long mmu_cr4_features;
+EXPORT_SYMBOL_GPL(mmu_cr4_features);
+
+#ifdef CONFIG_ACPI_INTERPRETER
+ int acpi_disabled = 0;
+#else
+ int acpi_disabled = 1;
+#endif
+EXPORT_SYMBOL(acpi_disabled);
+
+#ifdef CONFIG_ACPI_BOOT
+int __initdata acpi_force = 0;
+extern acpi_interrupt_flags acpi_sci_flags;
+#endif
+
+int MCA_bus;
+/* for MCA, but anyone else can use it if they want */
+unsigned int machine_id;
+unsigned int machine_submodel_id;
+unsigned int BIOS_revision;
+unsigned int mca_pentium_flag;
+
+/* For PCI or other memory-mapped resources */
+unsigned long pci_mem_start = 0x10000000;
+
+/* user-defined highmem size */
+static unsigned int highmem_pages = -1;
+
+/*
+ * Setup options
+ */
+struct drive_info_struct { char dummy[32]; } drive_info;
+struct screen_info screen_info;
+struct apm_info apm_info;
+struct sys_desc_table_struct {
+ unsigned short length;
+ unsigned char table[0];
+};
+struct edid_info edid_info;
+struct ist_info ist_info;
+struct e820map e820;
+
+unsigned char aux_device_present;
+
+extern void early_cpu_init(void);
+extern void dmi_scan_machine(void);
+extern void generic_apic_probe(char *);
+extern int root_mountflags;
+
+unsigned long saved_videomode;
+
+#define RAMDISK_IMAGE_START_MASK 0x07FF
+#define RAMDISK_PROMPT_FLAG 0x8000
+#define RAMDISK_LOAD_FLAG 0x4000
+
+static char command_line[COMMAND_LINE_SIZE];
+
+unsigned char __initdata boot_params[PARAM_SIZE];
+
+static struct resource data_resource = {
+ .name = "Kernel data",
+ .start = 0,
+ .end = 0,
+ .flags = IORESOURCE_BUSY | IORESOURCE_MEM
+};
+
+static struct resource code_resource = {
+ .name = "Kernel code",
+ .start = 0,
+ .end = 0,
+ .flags = IORESOURCE_BUSY | IORESOURCE_MEM
+};
+
+#ifdef CONFIG_XEN_PRIVILEGED_GUEST
+static struct resource system_rom_resource = {
+ .name = "System ROM",
+ .start = 0xf0000,
+ .end = 0xfffff,
+ .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
+};
+
+static struct resource extension_rom_resource = {
+ .name = "Extension ROM",
+ .start = 0xe0000,
+ .end = 0xeffff,
+ .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
+};
+
+static struct resource adapter_rom_resources[] = { {
+ .name = "Adapter ROM",
+ .start = 0xc8000,
+ .end = 0,
+ .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
+}, {
+ .name = "Adapter ROM",
+ .start = 0,
+ .end = 0,
+ .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
+}, {
+ .name = "Adapter ROM",
+ .start = 0,
+ .end = 0,
+ .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
+}, {
+ .name = "Adapter ROM",
+ .start = 0,
+ .end = 0,
+ .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
+}, {
+ .name = "Adapter ROM",
+ .start = 0,
+ .end = 0,
+ .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
+}, {
+ .name = "Adapter ROM",
+ .start = 0,
+ .end = 0,
+ .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
+} };
+
+#define ADAPTER_ROM_RESOURCES \
+ (sizeof adapter_rom_resources / sizeof adapter_rom_resources[0])
+
+static struct resource video_rom_resource = {
+ .name = "Video ROM",
+ .start = 0xc0000,
+ .end = 0xc7fff,
+ .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
+};
+#endif
+
+static struct resource video_ram_resource = {
+ .name = "Video RAM area",
+ .start = 0xa0000,
+ .end = 0xbffff,
+ .flags = IORESOURCE_BUSY | IORESOURCE_MEM
+};
+
+static struct resource standard_io_resources[] = { {
+ .name = "dma1",
+ .start = 0x0000,
+ .end = 0x001f,
+ .flags = IORESOURCE_BUSY | IORESOURCE_IO
+}, {
+ .name = "pic1",
+ .start = 0x0020,
+ .end = 0x0021,
+ .flags = IORESOURCE_BUSY | IORESOURCE_IO
+}, {
+ .name = "timer0",
+ .start = 0x0040,
+ .end = 0x0043,
+ .flags = IORESOURCE_BUSY | IORESOURCE_IO
+}, {
+ .name = "timer1",
+ .start = 0x0050,
+ .end = 0x0053,
+ .flags = IORESOURCE_BUSY | IORESOURCE_IO
+}, {
+ .name = "keyboard",
+ .start = 0x0060,
+ .end = 0x006f,
+ .flags = IORESOURCE_BUSY | IORESOURCE_IO
+}, {
+ .name = "dma page reg",
+ .start = 0x0080,
+ .end = 0x008f,
+ .flags = IORESOURCE_BUSY | IORESOURCE_IO
+}, {
+ .name = "pic2",
+ .start = 0x00a0,
+ .end = 0x00a1,
+ .flags = IORESOURCE_BUSY | IORESOURCE_IO
+}, {
+ .name = "dma2",
+ .start = 0x00c0,
+ .end = 0x00df,
+ .flags = IORESOURCE_BUSY | IORESOURCE_IO
+}, {
+ .name = "fpu",
+ .start = 0x00f0,
+ .end = 0x00ff,
+ .flags = IORESOURCE_BUSY | IORESOURCE_IO
+} };
+
+#define STANDARD_IO_RESOURCES \
+ (sizeof standard_io_resources / sizeof standard_io_resources[0])
+
+#ifdef CONFIG_XEN_PRIVILEGED_GUEST
+#define romsignature(x) (*(unsigned short *)(x) == 0xaa55)
+
+static int __init romchecksum(unsigned char *rom, unsigned long length)
+{
+ unsigned char *p, sum = 0;
+
+ for (p = rom; p < rom + length; p++)
+ sum += *p;
+ return sum == 0;
+}
+
+static void __init probe_roms(void)
+{
+ unsigned long start, length, upper;
+ unsigned char *rom;
+ int i;
+
+ /* video rom */
+ upper = adapter_rom_resources[0].start;
+ for (start = video_rom_resource.start; start < upper; start += 2048) {
+ rom = isa_bus_to_virt(start);
+ if (!romsignature(rom))
+ continue;
+
+ video_rom_resource.start = start;
+
+ /* 0 < length <= 0x7f * 512, historically */
+ length = rom[2] * 512;
+
+ /* if checksum okay, trust length byte */
+ if (length && romchecksum(rom, length))
+ video_rom_resource.end = start + length - 1;
+
+ request_resource(&iomem_resource, &video_rom_resource);
+ break;
+ }
+
+ start = (video_rom_resource.end + 1 + 2047) & ~2047UL;
+ if (start < upper)
+ start = upper;
+
+ /* system rom */
+ request_resource(&iomem_resource, &system_rom_resource);
+ upper = system_rom_resource.start;
+
+ /* check for extension rom (ignore length byte!) */
+ rom = isa_bus_to_virt(extension_rom_resource.start);
+ if (romsignature(rom)) {
+ length = extension_rom_resource.end - extension_rom_resource.start + 1;
+ if (romchecksum(rom, length)) {
+ request_resource(&iomem_resource, &extension_rom_resource);
+ upper = extension_rom_resource.start;
+ }
+ }
+
+ /* check for adapter roms on 2k boundaries */
+ for (i = 0; i < ADAPTER_ROM_RESOURCES && start < upper; start += 2048) {
+ rom = isa_bus_to_virt(start);
+ if (!romsignature(rom))
+ continue;
+
+ /* 0 < length <= 0x7f * 512, historically */
+ length = rom[2] * 512;
+
+ /* but accept any length that fits if checksum okay */
+ if (!length || start + length > upper || !romchecksum(rom, length))
+ continue;
+
+ adapter_rom_resources[i].start = start;
+ adapter_rom_resources[i].end = start + length - 1;
+ request_resource(&iomem_resource, &adapter_rom_resources[i]);
+
+ start = adapter_rom_resources[i++].end & ~2047UL;
+ }
+}
+#endif
+
+/*
+ * Point at the empty zero page to start with. We map the real shared_info
+ * page as soon as fixmap is up and running.
+ */
+shared_info_t *HYPERVISOR_shared_info = (shared_info_t *)empty_zero_page;
+EXPORT_SYMBOL(HYPERVISOR_shared_info);
+
+unsigned long *phys_to_machine_mapping, *pfn_to_mfn_frame_list;
+EXPORT_SYMBOL(phys_to_machine_mapping);
+
+multicall_entry_t multicall_list[8];
+int nr_multicall_ents = 0;
+
+/* Raw start-of-day parameters from the hypervisor. */
+union xen_start_info_union xen_start_info_union;
+
+static void __init limit_regions(unsigned long long size)
+{
+ unsigned long long current_addr = 0;
+ int i;
+
+ if (efi_enabled) {
+ for (i = 0; i < memmap.nr_map; i++) {
+ current_addr = memmap.map[i].phys_addr +
+ (memmap.map[i].num_pages << 12);
+ if (memmap.map[i].type == EFI_CONVENTIONAL_MEMORY) {
+ if (current_addr >= size) {
+ memmap.map[i].num_pages -=
+ (((current_addr-size) + PAGE_SIZE-1) >> PAGE_SHIFT);
+ memmap.nr_map = i + 1;
+ return;
+ }
+ }
+ }
+ }
+ for (i = 0; i < e820.nr_map; i++) {
+ if (e820.map[i].type == E820_RAM) {
+ current_addr = e820.map[i].addr + e820.map[i].size;
+ if (current_addr >= size) {
+ e820.map[i].size -= current_addr-size;
+ e820.nr_map = i + 1;
+ return;
+ }
+ }
+ }
+}
+
+static void __init add_memory_region(unsigned long long start,
+ unsigned long long size, int type)
+{
+ int x;
+
+ if (!efi_enabled) {
+ x = e820.nr_map;
+
+ if (x == E820MAX) {
+ printk(KERN_ERR "Ooops! Too many entries in the memory map!\n");
+ return;
+ }
+
+ e820.map[x].addr = start;
+ e820.map[x].size = size;
+ e820.map[x].type = type;
+ e820.nr_map++;
+ }
+} /* add_memory_region */
+
+#define E820_DEBUG 1
+
+static void __init print_memory_map(char *who)
+{
+ int i;
+
+ for (i = 0; i < e820.nr_map; i++) {
+ printk(" %s: %016Lx - %016Lx ", who,
+ e820.map[i].addr,
+ e820.map[i].addr + e820.map[i].size);
+ switch (e820.map[i].type) {
+ case E820_RAM: printk("(usable)\n");
+ break;
+ case E820_RESERVED:
+ printk("(reserved)\n");
+ break;
+ case E820_ACPI:
+ printk("(ACPI data)\n");
+ break;
+ case E820_NVS:
+ printk("(ACPI NVS)\n");
+ break;
+ default: printk("type %lu\n", e820.map[i].type);
+ break;
+ }
+ }
+}
+
+#if 0
+/*
+ * Sanitize the BIOS e820 map.
+ *
+ * Some e820 responses include overlapping entries. The following
+ * replaces the original e820 map with a new one, removing overlaps.
+ *
+ */
+struct change_member {
+ struct e820entry *pbios; /* pointer to original bios entry */
+ unsigned long long addr; /* address for this change point */
+};
+struct change_member change_point_list[2*E820MAX] __initdata;
+struct change_member *change_point[2*E820MAX] __initdata;
+struct e820entry *overlap_list[E820MAX] __initdata;
+struct e820entry new_bios[E820MAX] __initdata;
+
+static int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map)
+{
+ struct change_member *change_tmp;
+ unsigned long current_type, last_type;
+ unsigned long long last_addr;
+ int chgidx, still_changing;
+ int overlap_entries;
+ int new_bios_entry;
+ int old_nr, new_nr, chg_nr;
+ int i;
+
+ /*
+ Visually we're performing the following (1,2,3,4 = memory types)...
+
+ Sample memory map (w/overlaps):
+ ____22__________________
+ ______________________4_
+ ____1111________________
+ _44_____________________
+ 11111111________________
+ ____________________33__
+ ___________44___________
+ __________33333_________
+ ______________22________
+ ___________________2222_
+ _________111111111______
+ _____________________11_
+ _________________4______
+
+ Sanitized equivalent (no overlap):
+ 1_______________________
+ _44_____________________
+ ___1____________________
+ ____22__________________
+ ______11________________
+ _________1______________
+ __________3_____________
+ ___________44___________
+ _____________33_________
+ _______________2________
+ ________________1_______
+ _________________4______
+ ___________________2____
+ ____________________33__
+ ______________________4_
+ */
+
+ /* if there's only one memory region, don't bother */
+ if (*pnr_map < 2)
+ return -1;
+
+ old_nr = *pnr_map;
+
+ /* bail out if we find any unreasonable addresses in bios map */
+ for (i=0; i<old_nr; i++)
+ if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr)
+ return -1;
+
+ /* create pointers for initial change-point information (for sorting) */
+ for (i=0; i < 2*old_nr; i++)
+ change_point[i] = &change_point_list[i];
+
+ /* record all known change-points (starting and ending addresses),
+ omitting those that are for empty memory regions */
+ chgidx = 0;
+ for (i=0; i < old_nr; i++) {
+ if (biosmap[i].size != 0) {
+ change_point[chgidx]->addr = biosmap[i].addr;
+ change_point[chgidx++]->pbios = &biosmap[i];
+ change_point[chgidx]->addr = biosmap[i].addr + biosmap[i].size;
+ change_point[chgidx++]->pbios = &biosmap[i];
+ }
+ }
+ chg_nr = chgidx; /* true number of change-points */
+
+ /* sort change-point list by memory addresses (low -> high) */
+ still_changing = 1;
+ while (still_changing) {
+ still_changing = 0;
+ for (i=1; i < chg_nr; i++) {
+ /* if <current_addr> > <last_addr>, swap */
+ /* or, if current=<start_addr> & last=<end_addr>, swap */
+ if ((change_point[i]->addr < change_point[i-1]->addr) ||
+ ((change_point[i]->addr == change_point[i-1]->addr) &&
+ (change_point[i]->addr == change_point[i]->pbios->addr) &&
+ (change_point[i-1]->addr != change_point[i-1]->pbios->addr))
+ )
+ {
+ change_tmp = change_point[i];
+ change_point[i] = change_point[i-1];
+ change_point[i-1] = change_tmp;
+ still_changing=1;
+ }
+ }
+ }
+
+ /* create a new bios memory map, removing overlaps */
+ overlap_entries=0; /* number of entries in the overlap table */
+ new_bios_entry=0; /* index for creating new bios map entries */
+ last_type = 0; /* start with undefined memory type */
+ last_addr = 0; /* start with 0 as last starting address */
+ /* loop through change-points, determining affect on the new bios map */
+ for (chgidx=0; chgidx < chg_nr; chgidx++)
+ {
+ /* keep track of all overlapping bios entries */
+ if (change_point[chgidx]->addr == change_point[chgidx]->pbios->addr)
+ {
+ /* add map entry to overlap list (> 1 entry implies an overlap) */
+ overlap_list[overlap_entries++]=change_point[chgidx]->pbios;
+ }
+ else
+ {
+ /* remove entry from list (order independent, so swap with last) */
+ for (i=0; i<overlap_entries; i++)
+ {
+ if (overlap_list[i] == change_point[chgidx]->pbios)
+ overlap_list[i] = overlap_list[overlap_entries-1];
+ }
+ overlap_entries--;
+ }
+ /* if there are overlapping entries, decide which "type" to use */
+ /* (larger value takes precedence -- 1=usable, 2,3,4,4+=unusable) */
+ current_type = 0;
+ for (i=0; i<overlap_entries; i++)
+ if (overlap_list[i]->type > current_type)
+ current_type = overlap_list[i]->type;
+ /* continue building up new bios map based on this information */
+ if (current_type != last_type) {
+ if (last_type != 0) {
+ new_bios[new_bios_entry].size =
+ change_point[chgidx]->addr - last_addr;
+ /* move forward only if the new size was non-zero */
+ if (new_bios[new_bios_entry].size != 0)
+ if (++new_bios_entry >= E820MAX)
+ break; /* no more space left for new bios entries */
+ }
+ if (current_type != 0) {
+ new_bios[new_bios_entry].addr = change_point[chgidx]->addr;
+ new_bios[new_bios_entry].type = current_type;
+ last_addr=change_point[chgidx]->addr;
+ }
+ last_type = current_type;
+ }
+ }
+ new_nr = new_bios_entry; /* retain count for new bios entries */
+
+ /* copy new bios mapping into original location */
+ memcpy(biosmap, new_bios, new_nr*sizeof(struct e820entry));
+ *pnr_map = new_nr;
+
+ return 0;
+}
+
+/*
+ * Copy the BIOS e820 map into a safe place.
+ *
+ * Sanity-check it while we're at it..
+ *
+ * If we're lucky and live on a modern system, the setup code
+ * will have given us a memory map that we can use to properly
+ * set up memory. If we aren't, we'll fake a memory map.
+ *
+ * We check to see that the memory map contains at least 2 elements
+ * before we'll use it, because the detection code in setup.S may
+ * not be perfect and most every PC known to man has two memory
+ * regions: one from 0 to 640k, and one from 1mb up. (The IBM
+ * thinkpad 560x, for example, does not cooperate with the memory
+ * detection code.)
+ */
+static int __init copy_e820_map(struct e820entry * biosmap, int nr_map)
+{
+ /* Only one memory region (or negative)? Ignore it */
+ if (nr_map < 2)
+ return -1;
+
+ do {
+ unsigned long long start = biosmap->addr;
+ unsigned long long size = biosmap->size;
+ unsigned long long end = start + size;
+ unsigned long type = biosmap->type;
+
+ /* Overflow in 64 bits? Ignore the memory map. */
+ if (start > end)
+ return -1;
+
+ /*
+ * Some BIOSes claim RAM in the 640k - 1M region.
+ * Not right. Fix it up.
+ */
+ if (type == E820_RAM) {
+ if (start < 0x100000ULL && end > 0xA0000ULL) {
+ if (start < 0xA0000ULL)
+ add_memory_region(start, 0xA0000ULL-start, type);
+ if (end <= 0x100000ULL)
+ continue;
+ start = 0x100000ULL;
+ size = end - start;
+ }
+ }
+ add_memory_region(start, size, type);
+ } while (biosmap++,--nr_map);
+ return 0;
+}
+#endif
+
+#if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE)
+struct edd edd;
+#ifdef CONFIG_EDD_MODULE
+EXPORT_SYMBOL(edd);
+#endif
+/**
+ * copy_edd() - Copy the BIOS EDD information
+ * from boot_params into a safe place.
+ *
+ */
+static inline void copy_edd(void)
+{
+ memcpy(edd.mbr_signature, EDD_MBR_SIGNATURE, sizeof(edd.mbr_signature));
+ memcpy(edd.edd_info, EDD_BUF, sizeof(edd.edd_info));
+ edd.mbr_signature_nr = EDD_MBR_SIG_NR;
+ edd.edd_info_nr = EDD_NR;
+}
+#else
+static inline void copy_edd(void)
+{
+}
+#endif
+
+/*
+ * Do NOT EVER look at the BIOS memory size location.
+ * It does not work on many machines.
+ */
+#define LOWMEMSIZE() (0x9f000)
+
+static void __init parse_cmdline_early (char ** cmdline_p)
+{
+ char c = ' ', *to = command_line, *from = saved_command_line;
+ int len = 0;
+ int userdef = 0;
+
+ memcpy(saved_command_line, xen_start_info.cmd_line, MAX_CMDLINE);
+ /* Save unparsed command line copy for /proc/cmdline */
+ saved_command_line[COMMAND_LINE_SIZE-1] = '\0';
+
+ for (;;) {
+ /*
+ * "mem=nopentium" disables the 4MB page tables.
+ * "mem=XXX[kKmM]" defines a memory region from HIGH_MEM
+ * to <mem>, overriding the bios size.
+ * "memmap=XXX[KkmM]@XXX[KkmM]" defines a memory region from
+ * <start> to <start>+<mem>, overriding the bios size.
+ *
+ * HPA tells me bootloaders need to parse mem=, so no new
+ * option should be mem= [also see Documentation/i386/boot.txt]
+ */
+ if (c == ' ' && !memcmp(from, "mem=", 4)) {
+ if (to != command_line)
+ to--;
+ if (!memcmp(from+4, "nopentium", 9)) {
+ from += 9+4;
+ clear_bit(X86_FEATURE_PSE, boot_cpu_data.x86_capability);
+ disable_pse = 1;
+ } else {
+ /* If the user specifies memory size, we
+ * limit the BIOS-provided memory map to
+ * that size. exactmap can be used to specify
+ * the exact map. mem=number can be used to
+ * trim the existing memory map.
+ */
+ unsigned long long mem_size;
+
+ mem_size = memparse(from+4, &from);
+ limit_regions(mem_size);
+ userdef=1;
+ }
+ }
+
+ if (c == ' ' && !memcmp(from, "memmap=", 7)) {
+ if (to != command_line)
+ to--;
+ if (!memcmp(from+7, "exactmap", 8)) {
+ from += 8+7;
+ e820.nr_map = 0;
+ userdef = 1;
+ } else {
+ /* If the user specifies memory size, we
+ * limit the BIOS-provided memory map to
+ * that size. exactmap can be used to specify
+ * the exact map. mem=number can be used to
+ * trim the existing memory map.
+ */
+ unsigned long long start_at, mem_size;
+
+ mem_size = memparse(from+7, &from);
+ if (*from == '@') {
+ start_at = memparse(from+1, &from);
+ add_memory_region(start_at, mem_size, E820_RAM);
+ } else if (*from == '#') {
+ start_at = memparse(from+1, &from);
+ add_memory_region(start_at, mem_size, E820_ACPI);
+ } else if (*from == '$') {
+ start_at = memparse(from+1, &from);
+ add_memory_region(start_at, mem_size, E820_RESERVED);
+ } else {
+ limit_regions(mem_size);
+ userdef=1;
+ }
+ }
+ }
+
+#ifdef CONFIG_X86_SMP
+ /*
+ * If the BIOS enumerates physical processors before logical,
+ * maxcpus=N at enumeration-time can be used to disable HT.
+ */
+ else if (!memcmp(from, "maxcpus=", 8)) {
+ extern unsigned int maxcpus;
+
+ maxcpus = simple_strtoul(from + 8, NULL, 0);
+ }
+#endif
+
+#ifdef CONFIG_ACPI_BOOT
+ /* "acpi=off" disables both ACPI table parsing and interpreter */
+ else if (!memcmp(from, "acpi=off", 8)) {
+ disable_acpi();
+ }
+
+ /* acpi=force to over-ride black-list */
+ else if (!memcmp(from, "acpi=force", 10)) {
+ acpi_force = 1;
+ acpi_ht = 1;
+ acpi_disabled = 0;
+ }
+
+ /* acpi=strict disables out-of-spec workarounds */
+ else if (!memcmp(from, "acpi=strict", 11)) {
+ acpi_strict = 1;
+ }
+
+ /* Limit ACPI just to boot-time to enable HT */
+ else if (!memcmp(from, "acpi=ht", 7)) {
+ if (!acpi_force)
+ disable_acpi();
+ acpi_ht = 1;
+ }
+
+ /* "pci=noacpi" disable ACPI IRQ routing and PCI scan */
+ else if (!memcmp(from, "pci=noacpi", 10)) {
+ acpi_disable_pci();
+ }
+ /* "acpi=noirq" disables ACPI interrupt routing */
+ else if (!memcmp(from, "acpi=noirq", 10)) {
+ acpi_noirq_set();
+ }
+
+ else if (!memcmp(from, "acpi_sci=edge", 13))
+ acpi_sci_flags.trigger = 1;
+
+ else if (!memcmp(from, "acpi_sci=level", 14))
+ acpi_sci_flags.trigger = 3;
+
+ else if (!memcmp(from, "acpi_sci=high", 13))
+ acpi_sci_flags.polarity = 1;
+
+ else if (!memcmp(from, "acpi_sci=low", 12))
+ acpi_sci_flags.polarity = 3;
+
+#ifdef CONFIG_X86_IO_APIC
+ else if (!memcmp(from, "acpi_skip_timer_override", 24))
+ acpi_skip_timer_override = 1;
+#endif
+
+#ifdef CONFIG_X86_LOCAL_APIC
+ /* disable IO-APIC */
+ else if (!memcmp(from, "noapic", 6))
+ disable_ioapic_setup();
+#endif /* CONFIG_X86_LOCAL_APIC */
+#endif /* CONFIG_ACPI_BOOT */
+
+ /*
+ * highmem=size forces highmem to be exactly 'size' bytes.
+ * This works even on boxes that have no highmem otherwise.
+ * This also works to reduce highmem size on bigger boxes.
+ */
+ if (c == ' ' && !memcmp(from, "highmem=", 8))
+ highmem_pages = memparse(from+8, &from) >> PAGE_SHIFT;
+
+ /*
+ * vmalloc=size forces the vmalloc area to be exactly 'size'
+ * bytes. This can be used to increase (or decrease) the
+ * vmalloc area - the default is 128m.
+ */
+ if (c == ' ' && !memcmp(from, "vmalloc=", 8))
+ __VMALLOC_RESERVE = memparse(from+8, &from);
+
+ c = *(from++);
+ if (!c)
+ break;
+ if (COMMAND_LINE_SIZE <= ++len)
+ break;
+ *(to++) = c;
+ }
+ *to = '\0';
+ *cmdline_p = command_line;
+ if (userdef) {
+ printk(KERN_INFO "user-defined physical RAM map:\n");
+ print_memory_map("user");
+ }
+}
+
+/*
+ * Callback for efi_memory_walk.
+ */
+static int __init
+efi_find_max_pfn(unsigned long start, unsigned long end, void *arg)
+{
+ unsigned long *max_pfn = arg, pfn;
+
+ if (start < end) {
+ pfn = PFN_UP(end -1);
+ if (pfn > *max_pfn)
+ *max_pfn = pfn;
+ }
+ return 0;
+}
+
+
+/*
+ * Find the highest page frame number we have available
+ */
+void __init find_max_pfn(void)
+{
+ int i;
+
+ max_pfn = 0;
+ if (efi_enabled) {
+ efi_memmap_walk(efi_find_max_pfn, &max_pfn);
+ return;
+ }
+
+ for (i = 0; i < e820.nr_map; i++) {
+ unsigned long start, end;
+ /* RAM? */
+ if (e820.map[i].type != E820_RAM)
+ continue;
+ start = PFN_UP(e820.map[i].addr);
+ end = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
+ if (start >= end)
+ continue;
+ if (end > max_pfn)
+ max_pfn = end;
+ }
+}
+
+/*
+ * Determine low and high memory ranges:
+ */
+unsigned long __init find_max_low_pfn(void)
+{
+ unsigned long max_low_pfn;
+
+ max_low_pfn = max_pfn;
+ if (max_low_pfn > MAXMEM_PFN) {
+ if (highmem_pages == -1)
+ highmem_pages = max_pfn - MAXMEM_PFN;
+ if (highmem_pages + MAXMEM_PFN < max_pfn)
+ max_pfn = MAXMEM_PFN + highmem_pages;
+ if (highmem_pages + MAXMEM_PFN > max_pfn) {
+ printk("only %luMB highmem pages available, ignoring highmem size of %uMB.\n", pages_to_mb(max_pfn - MAXMEM_PFN), pages_to_mb(highmem_pages));
+ highmem_pages = 0;
+ }
+ max_low_pfn = MAXMEM_PFN;
+#ifndef CONFIG_HIGHMEM
+ /* Maximum memory usable is what is directly addressable */
+ printk(KERN_WARNING "Warning only %ldMB will be used.\n",
+ MAXMEM>>20);
+ if (max_pfn > MAX_NONPAE_PFN)
+ printk(KERN_WARNING "Use a PAE enabled kernel.\n");
+ else
+ printk(KERN_WARNING "Use a HIGHMEM enabled kernel.\n");
+ max_pfn = MAXMEM_PFN;
+#else /* !CONFIG_HIGHMEM */
+#ifndef CONFIG_X86_PAE
+ if (max_pfn > MAX_NONPAE_PFN) {
+ max_pfn = MAX_NONPAE_PFN;
+ printk(KERN_WARNING "Warning only 4GB will be used.\n");
+ printk(KERN_WARNING "Use a PAE enabled kernel.\n");
+ }
+#endif /* !CONFIG_X86_PAE */
+#endif /* !CONFIG_HIGHMEM */
+ } else {
+ if (highmem_pages == -1)
+ highmem_pages = 0;
+#ifdef CONFIG_HIGHMEM
+ if (highmem_pages >= max_pfn) {
+ printk(KERN_ERR "highmem size specified (%uMB) is bigger than pages available (%luMB)!.\n", pages_to_mb(highmem_pages), pages_to_mb(max_pfn));
+ highmem_pages = 0;
+ }
+ if (highmem_pages) {
+ if (max_low_pfn-highmem_pages < 64*1024*1024/PAGE_SIZE){
+ printk(KERN_ERR "highmem size %uMB results in smaller than 64MB lowmem, ignoring it.\n", pages_to_mb(highmem_pages));
+ highmem_pages = 0;
+ }
+ max_low_pfn -= highmem_pages;
+ }
+#else
+ if (highmem_pages)
+ printk(KERN_ERR "ignoring highmem size on non-highmem kernel!\n");
+#endif
+ }
+ return max_low_pfn;
+}
+
+#ifndef CONFIG_DISCONTIGMEM
+
+/*
+ * Free all available memory for boot time allocation. Used
+ * as a callback function by efi_memory_walk()
+ */
+
+static int __init
+free_available_memory(unsigned long start, unsigned long end, void *arg)
+{
+ /* check max_low_pfn */
+ if (start >= ((max_low_pfn + 1) << PAGE_SHIFT))
+ return 0;
+ if (end >= ((max_low_pfn + 1) << PAGE_SHIFT))
+ end = (max_low_pfn + 1) << PAGE_SHIFT;
+ if (start < end)
+ free_bootmem(start, end - start);
+
+ return 0;
+}
+/*
+ * Register fully available low RAM pages with the bootmem allocator.
+ */
+static void __init register_bootmem_low_pages(unsigned long max_low_pfn)
+{
+ int i;
+
+ if (efi_enabled) {
+ efi_memmap_walk(free_available_memory, NULL);
+ return;
+ }
+ for (i = 0; i < e820.nr_map; i++) {
+ unsigned long curr_pfn, last_pfn, size;
+ /*
+ * Reserve usable low memory
+ */
+ if (e820.map[i].type != E820_RAM)
+ continue;
+ /*
+ * We are rounding up the start address of usable memory:
+ */
+ curr_pfn = PFN_UP(e820.map[i].addr);
+ if (curr_pfn >= max_low_pfn)
+ continue;
+ /*
+ * ... and at the end of the usable range downwards:
+ */
+ last_pfn = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
+
+ if (last_pfn > max_low_pfn)
+ last_pfn = max_low_pfn;
+
+ /*
+ * .. finally, did all the rounding and playing
+ * around just make the area go away?
+ */
+ if (last_pfn <= curr_pfn)
+ continue;
+
+ size = last_pfn - curr_pfn;
+ free_bootmem(PFN_PHYS(curr_pfn), PFN_PHYS(size));
+ }
+}
+
+/*
+ * workaround for Dell systems that neglect to reserve EBDA
+ */
+static void __init reserve_ebda_region(void)
+{
+ unsigned int addr;
+ addr = get_bios_ebda();
+ if (addr)
+ reserve_bootmem(addr, PAGE_SIZE);
+}
+
+static unsigned long __init setup_memory(void)
+{
+ unsigned long bootmap_size, start_pfn, max_low_pfn;
+
+ /*
+ * partially used pages are not usable - thus
+ * we are rounding upwards:
+ */
+ start_pfn = PFN_UP(__pa(xen_start_info.pt_base)) + xen_start_info.nr_pt_frames;
+
+ find_max_pfn();
+
+ max_low_pfn = find_max_low_pfn();
+
+#ifdef CONFIG_HIGHMEM
+ highstart_pfn = highend_pfn = max_pfn;
+ if (max_pfn > max_low_pfn) {
+ highstart_pfn = max_low_pfn;
+ }
+ printk(KERN_NOTICE "%ldMB HIGHMEM available.\n",
+ pages_to_mb(highend_pfn - highstart_pfn));
+#endif
+ printk(KERN_NOTICE "%ldMB LOWMEM available.\n",
+ pages_to_mb(max_low_pfn));
+ /*
+ * Initialize the boot-time allocator (with low memory only):
+ */
+ bootmap_size = init_bootmem(start_pfn, max_low_pfn);
+
+ register_bootmem_low_pages(max_low_pfn);
+
+ /*
+ * Reserve the bootmem bitmap itself as well. We do this in two
+ * steps (first step was init_bootmem()) because this catches
+ * the (very unlikely) case of us accidentally initializing the
+ * bootmem allocator with an invalid RAM area.
+ */
+ reserve_bootmem(HIGH_MEMORY, (PFN_PHYS(start_pfn) +
+ bootmap_size + PAGE_SIZE-1) - (HIGH_MEMORY));
+
+ /* reserve EBDA region, it's a 4K region */
+ reserve_ebda_region();
+
+ /* could be an AMD 768MPX chipset. Reserve a page before VGA to prevent
+ PCI prefetch into it (errata #56). Usually the page is reserved anyways,
+ unless you have no PS/2 mouse plugged in. */
+ if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
+ boot_cpu_data.x86 == 6)
+ reserve_bootmem(0xa0000 - 4096, 4096);
+
+#ifdef CONFIG_SMP
+ /*
+ * But first pinch a few for the stack/trampoline stuff
+ * FIXME: Don't need the extra page at 4K, but need to fix
+ * trampoline before removing it. (see the GDT stuff)
+ */
+ reserve_bootmem(PAGE_SIZE, PAGE_SIZE);
+#endif
+#ifdef CONFIG_ACPI_SLEEP
+ /*
+ * Reserve low memory region for sleep support.
+ */
+ acpi_reserve_bootmem();
+#endif
+#ifdef CONFIG_X86_FIND_SMP_CONFIG
+ /*
+ * Find and reserve possible boot-time SMP configuration:
+ */
+ find_smp_config();
+#endif
+
+#ifdef CONFIG_BLK_DEV_INITRD
+ if (xen_start_info.mod_start) {
+ if (INITRD_START + INITRD_SIZE <= (max_low_pfn << PAGE_SHIFT)) {
+ /*reserve_bootmem(INITRD_START, INITRD_SIZE);*/
+ initrd_start = INITRD_START + PAGE_OFFSET;
+ initrd_end = initrd_start+INITRD_SIZE;
+ initrd_below_start_ok = 1;
+ }
+ else {
+ printk(KERN_ERR "initrd extends beyond end of memory "
+ "(0x%08lx > 0x%08lx)\ndisabling initrd\n",
+ INITRD_START + INITRD_SIZE,
+ max_low_pfn << PAGE_SHIFT);
+ initrd_start = 0;
+ }
+ }
+#endif
+
+ phys_to_machine_mapping = (unsigned long *)xen_start_info.mfn_list;
+
+ return max_low_pfn;
+}
+#else
+extern unsigned long setup_memory(void);
+#endif /* !CONFIG_DISCONTIGMEM */
+
+/*
+ * Request address space for all standard RAM and ROM resources
+ * and also for regions reported as reserved by the e820.
+ */
+static void __init
+legacy_init_iomem_resources(struct resource *code_resource, struct resource *data_resource)
+{
+ int i;
+
+#ifdef CONFIG_XEN_PRIVILEGED_GUEST
+ probe_roms();
+#endif
+ for (i = 0; i < e820.nr_map; i++) {
+ struct resource *res;
+ if (e820.map[i].addr + e820.map[i].size > 0x100000000ULL)
+ continue;
+ res = alloc_bootmem_low(sizeof(struct resource));
+ switch (e820.map[i].type) {
+ case E820_RAM: res->name = "System RAM"; break;
+ case E820_ACPI: res->name = "ACPI Tables"; break;
+ case E820_NVS: res->name = "ACPI Non-volatile Storage"; break;
+ default: res->name = "reserved";
+ }
+ res->start = e820.map[i].addr;
+ res->end = res->start + e820.map[i].size - 1;
+ res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+ request_resource(&iomem_resource, res);
+ if (e820.map[i].type == E820_RAM) {
+ /*
+ * We don't know which RAM region contains kernel data,
+ * so we try it repeatedly and let the resource manager
+ * test it.
+ */
+ request_resource(res, code_resource);
+ request_resource(res, data_resource);
+ }
+ }
+}
+
+/*
+ * Request address space for all standard resources
+ */
+static void __init register_memory(unsigned long max_low_pfn)
+{
+ unsigned long low_mem_size;
+ int i;
+
+ if (efi_enabled)
+ efi_initialize_iomem_resources(&code_resource, &data_resource);
+ else
+ legacy_init_iomem_resources(&code_resource, &data_resource);
+
+ /* EFI systems may still have VGA */
+ request_resource(&iomem_resource, &video_ram_resource);
+
+ /* request I/O space for devices used on all i[345]86 PCs */
+ for (i = 0; i < STANDARD_IO_RESOURCES; i++)
+ request_resource(&ioport_resource, &standard_io_resources[i]);
+
+ /* Tell the PCI layer not to allocate too close to the RAM area.. */
+ low_mem_size = ((max_low_pfn << PAGE_SHIFT) + 0xfffff) & ~0xfffff;
+ if (low_mem_size > pci_mem_start)
+ pci_mem_start = low_mem_size;
+}
+
+/* Use inline assembly to define this because the nops are defined
+ as inline assembly strings in the include files and we cannot
+ get them easily into strings. */
+asm("\t.data\nintelnops: "
+ GENERIC_NOP1 GENERIC_NOP2 GENERIC_NOP3 GENERIC_NOP4 GENERIC_NOP5 GENERIC_NOP6
+ GENERIC_NOP7 GENERIC_NOP8);
+asm("\t.data\nk8nops: "
+ K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6
+ K8_NOP7 K8_NOP8);
+asm("\t.data\nk7nops: "
+ K7_NOP1 K7_NOP2 K7_NOP3 K7_NOP4 K7_NOP5 K7_NOP6
+ K7_NOP7 K7_NOP8);
+
+extern unsigned char intelnops[], k8nops[], k7nops[];
+static unsigned char *intel_nops[ASM_NOP_MAX+1] = {
+ NULL,
+ intelnops,
+ intelnops + 1,
+ intelnops + 1 + 2,
+ intelnops + 1 + 2 + 3,
+ intelnops + 1 + 2 + 3 + 4,
+ intelnops + 1 + 2 + 3 + 4 + 5,
+ intelnops + 1 + 2 + 3 + 4 + 5 + 6,
+ intelnops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
+};
+static unsigned char *k8_nops[ASM_NOP_MAX+1] = {
+ NULL,
+ k8nops,
+ k8nops + 1,
+ k8nops + 1 + 2,
+ k8nops + 1 + 2 + 3,
+ k8nops + 1 + 2 + 3 + 4,
+ k8nops + 1 + 2 + 3 + 4 + 5,
+ k8nops + 1 + 2 + 3 + 4 + 5 + 6,
+ k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
+};
+static unsigned char *k7_nops[ASM_NOP_MAX+1] = {
+ NULL,
+ k7nops,
+ k7nops + 1,
+ k7nops + 1 + 2,
+ k7nops + 1 + 2 + 3,
+ k7nops + 1 + 2 + 3 + 4,
+ k7nops + 1 + 2 + 3 + 4 + 5,
+ k7nops + 1 + 2 + 3 + 4 + 5 + 6,
+ k7nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
+};
+static struct nop {
+ int cpuid;
+ unsigned char **noptable;
+} noptypes[] = {
+ { X86_FEATURE_K8, k8_nops },
+ { X86_FEATURE_K7, k7_nops },
+ { -1, NULL }
+};
+
+/* Replace instructions with better alternatives for this CPU type.
+
+ This runs before SMP is initialized to avoid SMP problems with
+ self modifying code. This implies that assymetric systems where
+ APs have less capabilities than the boot processor are not handled.
+ In this case boot with "noreplacement". */
+void apply_alternatives(void *start, void *end)
+{
+ struct alt_instr *a;
+ int diff, i, k;
+ unsigned char **noptable = intel_nops;
+ for (i = 0; noptypes[i].cpuid >= 0; i++) {
+ if (boot_cpu_has(noptypes[i].cpuid)) {
+ noptable = noptypes[i].noptable;
+ break;
+ }
+ }
+ for (a = start; (void *)a < end; a++) {
+ if (!boot_cpu_has(a->cpuid))
+ continue;
+ BUG_ON(a->replacementlen > a->instrlen);
+ memcpy(a->instr, a->replacement, a->replacementlen);
+ diff = a->instrlen - a->replacementlen;
+ /* Pad the rest with nops */
+ for (i = a->replacementlen; diff > 0; diff -= k, i += k) {
+ k = diff;
+ if (k > ASM_NOP_MAX)
+ k = ASM_NOP_MAX;
+ memcpy(a->instr + i, noptable[k], k);
+ }
+ }
+}
+
+static int no_replacement __initdata = 0;
+
+void __init alternative_instructions(void)
+{
+ extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
+ if (no_replacement)
+ return;
+ apply_alternatives(__alt_instructions, __alt_instructions_end);
+}
+
+static int __init noreplacement_setup(char *s)
+{
+ no_replacement = 1;
+ return 0;
+}
+
+__setup("noreplacement", noreplacement_setup);
+
+static char * __init machine_specific_memory_setup(void);
+
+/*
+ * Determine if we were loaded by an EFI loader. If so, then we have also been
+ * passed the efi memmap, systab, etc., so we should use these data structures
+ * for initialization. Note, the efi init code path is determined by the
+ * global efi_enabled. This allows the same kernel image to be used on existing
+ * systems (with a traditional BIOS) as well as on EFI systems.
+ */
+void __init setup_arch(char **cmdline_p)
+{
+ int i,j;
+
+ unsigned long max_low_pfn;
+
+ /* Force a quick death if the kernel panics. */
+ extern int panic_timeout;
+ if ( panic_timeout == 0 )
+ panic_timeout = 1;
+
+ HYPERVISOR_vm_assist(VMASST_CMD_enable,
+ VMASST_TYPE_4gb_segments);
+
+ memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data));
+ early_cpu_init();
+
+ /*
+ * FIXME: This isn't an official loader_type right
+ * now but does currently work with elilo.
+ * If we were configured as an EFI kernel, check to make
+ * sure that we were loaded correctly from elilo and that
+ * the system table is valid. If not, then initialize normally.
+ */
+#ifdef CONFIG_EFI
+ if ((LOADER_TYPE == 0x50) && EFI_SYSTAB)
+ efi_enabled = 1;
+#endif
+
+ ROOT_DEV = MKDEV(RAMDISK_MAJOR,0); /*old_decode_dev(ORIG_ROOT_DEV);*/
+ drive_info = DRIVE_INFO;
+ screen_info = SCREEN_INFO;
+ edid_info = EDID_INFO;
+ apm_info.bios = APM_BIOS_INFO;
+ ist_info = IST_INFO;
+ saved_videomode = VIDEO_MODE;
+ if( SYS_DESC_TABLE.length != 0 ) {
+ MCA_bus = SYS_DESC_TABLE.table[3] &0x2;
+ machine_id = SYS_DESC_TABLE.table[0];
+ machine_submodel_id = SYS_DESC_TABLE.table[1];
+ BIOS_revision = SYS_DESC_TABLE.table[2];
+ }
+ aux_device_present = AUX_DEVICE_INFO;
+
+#ifdef CONFIG_XEN_PHYSDEV_ACCESS
+ /* This is drawn from a dump from vgacon:startup in standard Linux. */
+ screen_info.orig_video_mode = 3;
+ screen_info.orig_video_isVGA = 1;
+ screen_info.orig_video_lines = 25;
+ screen_info.orig_video_cols = 80;
+ screen_info.orig_video_ega_bx = 3;
+ screen_info.orig_video_points = 16;
+#endif
+
+#ifdef CONFIG_BLK_DEV_RAM
+ rd_image_start = RAMDISK_FLAGS & RAMDISK_IMAGE_START_MASK;
+ rd_prompt = ((RAMDISK_FLAGS & RAMDISK_PROMPT_FLAG) != 0);
+ rd_doload = ((RAMDISK_FLAGS & RAMDISK_LOAD_FLAG) != 0);
+#endif
+ ARCH_SETUP
+ if (efi_enabled)
+ efi_init();
+ else {
+ printk(KERN_INFO "BIOS-provided physical RAM map:\n");
+ print_memory_map(machine_specific_memory_setup());
+ }
+
+ copy_edd();
+
+ if (!MOUNT_ROOT_RDONLY)
+ root_mountflags &= ~MS_RDONLY;
+ init_mm.start_code = (unsigned long) _text;
+ init_mm.end_code = (unsigned long) _etext;
+ init_mm.end_data = (unsigned long) _edata;
+ init_mm.brk = (PFN_UP(__pa(xen_start_info.pt_base)) + xen_start_info.nr_pt_frames) << PAGE_SHIFT;
+
++ /* XEN: This is nonsense: kernel may not even be contiguous in RAM. */
++ /*code_resource.start = virt_to_phys(_text);*/
++ /*code_resource.end = virt_to_phys(_etext)-1;*/
++ /*data_resource.start = virt_to_phys(_etext);*/
++ /*data_resource.end = virt_to_phys(_edata)-1;*/
+
+ parse_cmdline_early(cmdline_p);
+
+ max_low_pfn = setup_memory();
+
+ /*
+ * NOTE: before this point _nobody_ is allowed to allocate
+ * any memory using the bootmem allocator. Although the
+ * alloctor is now initialised only the first 8Mb of the kernel
+ * virtual address space has been mapped. All allocations before
+ * paging_init() has completed must use the alloc_bootmem_low_pages()
+ * variant (which allocates DMA'able memory) and care must be taken
+ * not to exceed the 8Mb limit.
+ */
+
+#ifdef CONFIG_SMP
+ smp_alloc_memory(); /* AP processor realmode stacks in low memory*/
+#endif
+ paging_init();
+
+ pfn_to_mfn_frame_list = alloc_bootmem_low_pages(PAGE_SIZE);
+ for ( i=0, j=0; i < max_pfn; i+=(PAGE_SIZE/sizeof(unsigned long)), j++ )
+ {
+ pfn_to_mfn_frame_list[j] =
+ virt_to_machine(&phys_to_machine_mapping[i]) >> PAGE_SHIFT;
+ }
+ HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list =
+ virt_to_machine(pfn_to_mfn_frame_list) >> PAGE_SHIFT;
+
+
+ /*
+ * NOTE: at this point the bootmem allocator is fully available.
+ */
+
+#ifdef CONFIG_EARLY_PRINTK
+ {
+ char *s = strstr(*cmdline_p, "earlyprintk=");
+ if (s) {
+ extern void setup_early_printk(char *);
+
+ setup_early_printk(s);
+ printk("early console enabled\n");
+ }
+ }
+#endif
+
+
+ dmi_scan_machine();
+
+#ifdef CONFIG_X86_GENERICARCH
+ generic_apic_probe(*cmdline_p);
+#endif
+ if (efi_enabled)
+ efi_map_memmap();
+
+ /*
+ * Parse the ACPI tables for possible boot-time SMP configuration.
+ */
+ acpi_boot_init();
+
+#ifdef CONFIG_X86_LOCAL_APIC
+ if (smp_found_config)
+ get_smp_config();
+#endif
+
+ register_memory(max_low_pfn);
+
+ /* If we are a privileged guest OS then we should request IO privs. */
+ if (xen_start_info.flags & SIF_PRIVILEGED) {
+ dom0_op_t op;
+ op.cmd = DOM0_IOPL;
+ op.u.iopl.domain = DOMID_SELF;
+ op.u.iopl.iopl = 1;
+ if (HYPERVISOR_dom0_op(&op) != 0)
+ panic("Unable to obtain IOPL, despite SIF_PRIVILEGED");
+ current->thread.io_pl = 1;
+ }
+
+ if (xen_start_info.flags & SIF_INITDOMAIN) {
+ if (!(xen_start_info.flags & SIF_PRIVILEGED))
+ panic("Xen granted us console access "
+ "but not privileged status");
+
+#ifdef CONFIG_VT
+#if defined(CONFIG_VGA_CONSOLE)
+ if (!efi_enabled ||
+ (efi_mem_type(0xa0000) != EFI_CONVENTIONAL_MEMORY))
+ conswitchp = &vga_con;
+#elif defined(CONFIG_DUMMY_CONSOLE)
+ conswitchp = &dummy_con;
+#endif
+#endif
+ } else {
+#ifdef CONFIG_XEN_PRIVILEGED_GUEST
+ extern const struct consw xennull_con;
+ extern int console_use_vt;
+#if defined(CONFIG_VGA_CONSOLE)
+ /* disable VGA driver */
+ ORIG_VIDEO_ISVGA = VIDEO_TYPE_VLFB;
+#endif
+ conswitchp = &xennull_con;
+ console_use_vt = 0;
+#endif
+ }
+}
+
+#include "setup_arch_post.h"
+/*
+ * Local Variables:
+ * mode:c
+ * c-file-style:"k&r"
+ * c-basic-offset:8
+ * End:
+ */
--- /dev/null
- asmlinkage void double_fault(void);
+/*
+ * linux/arch/i386/traps.c
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ *
+ * Pentium III FXSR, SSE support
+ * Gareth Hughes <gareth@valinux.com>, May 2000
+ */
+
+/*
+ * 'Traps.c' handles hardware traps and faults after we have saved some
+ * state in 'asm.s'.
+ */
+#include <linux/config.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/timer.h>
+#include <linux/mm.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/spinlock.h>
+#include <linux/interrupt.h>
+#include <linux/highmem.h>
+#include <linux/kallsyms.h>
+#include <linux/ptrace.h>
+#include <linux/utsname.h>
+#include <linux/kprobes.h>
+
+#ifdef CONFIG_EISA
+#include <linux/ioport.h>
+#include <linux/eisa.h>
+#endif
+
+#ifdef CONFIG_MCA
+#include <linux/mca.h>
+#endif
+
+#include <asm/processor.h>
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <asm/io.h>
+#include <asm/atomic.h>
+#include <asm/debugreg.h>
+#include <asm/desc.h>
+#include <asm/i387.h>
+#include <asm/nmi.h>
+
+#include <asm/smp.h>
+#include <asm/arch_hooks.h>
+#include <asm/kdebug.h>
+
+#include <linux/irq.h>
+#include <linux/module.h>
+
+#include "mach_traps.h"
+
+asmlinkage int system_call(void);
+
+/* Do we ignore FPU interrupts ? */
+char ignore_fpu_irq = 0;
+
+/*
+ * The IDT has to be page-aligned to simplify the Pentium
+ * F0 0F bug workaround.. We have a special link segment
+ * for this.
+ */
+struct desc_struct idt_table[256] __attribute__((__section__(".data.idt"))) = { {0, 0}, };
+
+asmlinkage void divide_error(void);
+asmlinkage void debug(void);
+asmlinkage void nmi(void);
+asmlinkage void int3(void);
+asmlinkage void overflow(void);
+asmlinkage void bounds(void);
+asmlinkage void invalid_op(void);
+asmlinkage void device_not_available(void);
- DO_ERROR( 8, SIGSEGV, "double fault", double_fault)
+asmlinkage void coprocessor_segment_overrun(void);
+asmlinkage void invalid_TSS(void);
+asmlinkage void segment_not_present(void);
+asmlinkage void stack_segment(void);
+asmlinkage void general_protection(void);
+asmlinkage void page_fault(void);
+asmlinkage void coprocessor_error(void);
+asmlinkage void simd_coprocessor_error(void);
+asmlinkage void alignment_check(void);
+asmlinkage void fixup_4gb_segment(void);
+asmlinkage void machine_check(void);
+
+static int kstack_depth_to_print = 24;
+struct notifier_block *i386die_chain;
+static spinlock_t die_notifier_lock = SPIN_LOCK_UNLOCKED;
+
+int register_die_notifier(struct notifier_block *nb)
+{
+ int err = 0;
+ unsigned long flags;
+ spin_lock_irqsave(&die_notifier_lock, flags);
+ err = notifier_chain_register(&i386die_chain, nb);
+ spin_unlock_irqrestore(&die_notifier_lock, flags);
+ return err;
+}
+
+static inline int valid_stack_ptr(struct thread_info *tinfo, void *p)
+{
+ return p > (void *)tinfo &&
+ p < (void *)tinfo + THREAD_SIZE - 3;
+}
+
+static inline unsigned long print_context_stack(struct thread_info *tinfo,
+ unsigned long *stack, unsigned long ebp)
+{
+ unsigned long addr;
+
+#ifdef CONFIG_FRAME_POINTER
+ while (valid_stack_ptr(tinfo, (void *)ebp)) {
+ addr = *(unsigned long *)(ebp + 4);
+ printk(" [<%08lx>] ", addr);
+ print_symbol("%s", addr);
+ printk("\n");
+ ebp = *(unsigned long *)ebp;
+ }
+#else
+ while (valid_stack_ptr(tinfo, stack)) {
+ addr = *stack++;
+ if (__kernel_text_address(addr)) {
+ printk(" [<%08lx>]", addr);
+ print_symbol(" %s", addr);
+ printk("\n");
+ }
+ }
+#endif
+ return ebp;
+}
+
+void show_trace(struct task_struct *task, unsigned long * stack)
+{
+ unsigned long ebp;
+
+ if (!task)
+ task = current;
+
+ if (task == current) {
+ /* Grab ebp right from our regs */
+ asm ("movl %%ebp, %0" : "=r" (ebp) : );
+ } else {
+ /* ebp is the last reg pushed by switch_to */
+ ebp = *(unsigned long *) task->thread.esp;
+ }
+
+ while (1) {
+ struct thread_info *context;
+ context = (struct thread_info *)
+ ((unsigned long)stack & (~(THREAD_SIZE - 1)));
+ ebp = print_context_stack(context, stack, ebp);
+ stack = (unsigned long*)context->previous_esp;
+ if (!stack)
+ break;
+ printk(" =======================\n");
+ }
+}
+
+void show_stack(struct task_struct *task, unsigned long *esp)
+{
+ unsigned long *stack;
+ int i;
+
+ if (esp == NULL) {
+ if (task)
+ esp = (unsigned long*)task->thread.esp;
+ else
+ esp = (unsigned long *)&esp;
+ }
+
+ stack = esp;
+ for(i = 0; i < kstack_depth_to_print; i++) {
+ if (kstack_end(stack))
+ break;
+ if (i && ((i % 8) == 0))
+ printk("\n ");
+ printk("%08lx ", *stack++);
+ }
+ printk("\nCall Trace:\n");
+ show_trace(task, esp);
+}
+
+/*
+ * The architecture-independent dump_stack generator
+ */
+void dump_stack(void)
+{
+ unsigned long stack;
+
+ show_trace(current, &stack);
+}
+
+EXPORT_SYMBOL(dump_stack);
+
+void show_registers(struct pt_regs *regs)
+{
+ int i;
+ int in_kernel = 1;
+ unsigned long esp;
+ unsigned short ss;
+
+ esp = (unsigned long) (®s->esp);
+ ss = __KERNEL_DS;
+ if (regs->xcs & 2) {
+ in_kernel = 0;
+ esp = regs->esp;
+ ss = regs->xss & 0xffff;
+ }
+ print_modules();
+ printk("CPU: %d\nEIP: %04x:[<%08lx>] %s VLI\nEFLAGS: %08lx"
+ " (%s) \n",
+ smp_processor_id(), 0xffff & regs->xcs, regs->eip,
+ print_tainted(), regs->eflags, system_utsname.release);
+ print_symbol("EIP is at %s\n", regs->eip);
+ printk("eax: %08lx ebx: %08lx ecx: %08lx edx: %08lx\n",
+ regs->eax, regs->ebx, regs->ecx, regs->edx);
+ printk("esi: %08lx edi: %08lx ebp: %08lx esp: %08lx\n",
+ regs->esi, regs->edi, regs->ebp, esp);
+ printk("ds: %04x es: %04x ss: %04x\n",
+ regs->xds & 0xffff, regs->xes & 0xffff, ss);
+ printk("Process %s (pid: %d, threadinfo=%p task=%p)",
+ current->comm, current->pid, current_thread_info(), current);
+ /*
+ * When in-kernel, we also print out the stack and code at the
+ * time of the fault..
+ */
+ if (in_kernel) {
+ u8 *eip;
+
+ printk("\nStack: ");
+ show_stack(NULL, (unsigned long*)esp);
+
+ printk("Code: ");
+
+ eip = (u8 *)regs->eip - 43;
+ for (i = 0; i < 64; i++, eip++) {
+ unsigned char c;
+
+ if (eip < (u8 *)PAGE_OFFSET || __get_user(c, eip)) {
+ printk(" Bad EIP value.");
+ break;
+ }
+ if (eip == (u8 *)regs->eip)
+ printk("<%02x> ", c);
+ else
+ printk("%02x ", c);
+ }
+ }
+ printk("\n");
+}
+
+static void handle_BUG(struct pt_regs *regs)
+{
+ unsigned short ud2;
+ unsigned short line;
+ char *file;
+ char c;
+ unsigned long eip;
+
+ if (regs->xcs & 2)
+ goto no_bug; /* Not in kernel */
+
+ eip = regs->eip;
+
+ if (eip < PAGE_OFFSET)
+ goto no_bug;
+ if (__get_user(ud2, (unsigned short *)eip))
+ goto no_bug;
+ if (ud2 != 0x0b0f)
+ goto no_bug;
+ if (__get_user(line, (unsigned short *)(eip + 2)))
+ goto bug;
+ if (__get_user(file, (char **)(eip + 4)) ||
+ (unsigned long)file < PAGE_OFFSET || __get_user(c, file))
+ file = "<bad filename>";
+
+ printk("------------[ cut here ]------------\n");
+ printk(KERN_ALERT "kernel BUG at %s:%d!\n", file, line);
+
+no_bug:
+ return;
+
+ /* Here we know it was a BUG but file-n-line is unavailable */
+bug:
+ printk("Kernel BUG\n");
+}
+
+void die(const char * str, struct pt_regs * regs, long err)
+{
+ static struct {
+ spinlock_t lock;
+ u32 lock_owner;
+ int lock_owner_depth;
+ } die = {
+ .lock = SPIN_LOCK_UNLOCKED,
+ .lock_owner = -1,
+ .lock_owner_depth = 0
+ };
+ static int die_counter;
+
+ if (die.lock_owner != smp_processor_id()) {
+ console_verbose();
+ spin_lock_irq(&die.lock);
+ die.lock_owner = smp_processor_id();
+ die.lock_owner_depth = 0;
+ bust_spinlocks(1);
+ }
+
+ if (++die.lock_owner_depth < 3) {
+ int nl = 0;
+ handle_BUG(regs);
+ printk(KERN_ALERT "%s: %04lx [#%d]\n", str, err & 0xffff, ++die_counter);
+#ifdef CONFIG_PREEMPT
+ printk("PREEMPT ");
+ nl = 1;
+#endif
+#ifdef CONFIG_SMP
+ printk("SMP ");
+ nl = 1;
+#endif
+#ifdef CONFIG_DEBUG_PAGEALLOC
+ printk("DEBUG_PAGEALLOC");
+ nl = 1;
+#endif
+ if (nl)
+ printk("\n");
+ notify_die(DIE_OOPS, (char *)str, regs, err, 255, SIGSEGV);
+ show_registers(regs);
+ } else
+ printk(KERN_ERR "Recursive die() failure, output suppressed\n");
+
+ bust_spinlocks(0);
+ die.lock_owner = -1;
+ spin_unlock_irq(&die.lock);
+ if (in_interrupt())
+ panic("Fatal exception in interrupt");
+
+ if (panic_on_oops) {
+ printk(KERN_EMERG "Fatal exception: panic in 5 seconds\n");
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ schedule_timeout(5 * HZ);
+ panic("Fatal exception");
+ }
+ do_exit(SIGSEGV);
+}
+
+static inline void die_if_kernel(const char * str, struct pt_regs * regs, long err)
+{
+ if (!(regs->eflags & VM_MASK) && !(2 & regs->xcs))
+ die(str, regs, err);
+}
+
+static inline unsigned long get_cr2(void)
+{
+ unsigned long address;
+
+ /* get the address */
+ __asm__("movl %%cr2,%0":"=r" (address));
+ return address;
+}
+
+static inline void do_trap(int trapnr, int signr, char *str, int vm86,
+ struct pt_regs * regs, long error_code, siginfo_t *info)
+{
+ if (regs->eflags & VM_MASK) {
+ if (vm86)
+ goto vm86_trap;
+ goto trap_signal;
+ }
+
+ if (!(regs->xcs & 2))
+ goto kernel_trap;
+
+ trap_signal: {
+ struct task_struct *tsk = current;
+ tsk->thread.error_code = error_code;
+ tsk->thread.trap_no = trapnr;
+ if (info)
+ force_sig_info(signr, info, tsk);
+ else
+ force_sig(signr, tsk);
+ return;
+ }
+
+ kernel_trap: {
+ if (!fixup_exception(regs))
+ die(str, regs, error_code);
+ return;
+ }
+
+ vm86_trap: {
+ int ret = handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, trapnr);
+ if (ret) goto trap_signal;
+ return;
+ }
+}
+
+#define DO_ERROR(trapnr, signr, str, name) \
+fastcall void do_##name(struct pt_regs * regs, long error_code) \
+{ \
+ if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
+ == NOTIFY_STOP) \
+ return; \
+ do_trap(trapnr, signr, str, 0, regs, error_code, NULL); \
+}
+
+#define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \
+fastcall void do_##name(struct pt_regs * regs, long error_code) \
+{ \
+ siginfo_t info; \
+ info.si_signo = signr; \
+ info.si_errno = 0; \
+ info.si_code = sicode; \
+ info.si_addr = (void __user *)siaddr; \
+ if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
+ == NOTIFY_STOP) \
+ return; \
+ do_trap(trapnr, signr, str, 0, regs, error_code, &info); \
+}
+
+#define DO_VM86_ERROR(trapnr, signr, str, name) \
+fastcall void do_##name(struct pt_regs * regs, long error_code) \
+{ \
+ if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
+ == NOTIFY_STOP) \
+ return; \
+ do_trap(trapnr, signr, str, 1, regs, error_code, NULL); \
+}
+
+#define DO_VM86_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \
+fastcall void do_##name(struct pt_regs * regs, long error_code) \
+{ \
+ siginfo_t info; \
+ info.si_signo = signr; \
+ info.si_errno = 0; \
+ info.si_code = sicode; \
+ info.si_addr = (void __user *)siaddr; \
+ if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
+ == NOTIFY_STOP) \
+ return; \
+ do_trap(trapnr, signr, str, 1, regs, error_code, &info); \
+}
+
+DO_VM86_ERROR_INFO( 0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->eip)
+#ifndef CONFIG_KPROBES
+DO_VM86_ERROR( 3, SIGTRAP, "int3", int3)
+#endif
+DO_VM86_ERROR( 4, SIGSEGV, "overflow", overflow)
+DO_VM86_ERROR( 5, SIGSEGV, "bounds", bounds)
+DO_ERROR_INFO( 6, SIGILL, "invalid operand", invalid_op, ILL_ILLOPN, regs->eip)
+DO_VM86_ERROR( 7, SIGSEGV, "device not available", device_not_available)
- { 8, 0, __KERNEL_CS, (unsigned long)double_fault },
+DO_ERROR( 9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun)
+DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS)
+DO_ERROR(11, SIGBUS, "segment not present", segment_not_present)
+DO_ERROR(12, SIGBUS, "stack segment", stack_segment)
+DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0)
+#ifdef CONFIG_X86_MCE
+DO_ERROR(18, SIGBUS, "machine check", machine_check)
+#endif
+
+fastcall void do_general_protection(struct pt_regs * regs, long error_code)
+{
+ /*
+ * If we trapped on an LDT access then ensure that the default_ldt is
+ * loaded, if nothing else. We load default_ldt lazily because LDT
+ * switching costs time and many applications don't need it.
+ */
+ if (unlikely((error_code & 6) == 4)) {
+ unsigned long ldt;
+ __asm__ __volatile__ ("sldt %0" : "=r" (ldt));
+ if (ldt == 0) {
+ mmu_update_t u;
+ u.ptr = MMU_EXTENDED_COMMAND;
+ u.ptr |= (unsigned long)&default_ldt[0];
+ u.val = MMUEXT_SET_LDT | (5 << MMUEXT_CMD_SHIFT);
+ if (unlikely(HYPERVISOR_mmu_update(&u, 1, NULL) < 0)) {
+ show_trace(NULL, (unsigned long *)&u);
+ panic("Failed to install default LDT");
+ }
+ return;
+ }
+ }
+
+ if (regs->eflags & VM_MASK)
+ goto gp_in_vm86;
+
+ if (!(regs->xcs & 2))
+ goto gp_in_kernel;
+
+ current->thread.error_code = error_code;
+ current->thread.trap_no = 13;
+ force_sig(SIGSEGV, current);
+ return;
+
+gp_in_vm86:
+ local_irq_enable();
+ handle_vm86_fault((struct kernel_vm86_regs *) regs, error_code);
+ return;
+
+gp_in_kernel:
+ if (!fixup_exception(regs)) {
+ if (notify_die(DIE_GPF, "general protection fault", regs,
+ error_code, 13, SIGSEGV) == NOTIFY_STOP);
+ return;
+ die("general protection fault", regs, error_code);
+ }
+}
+
+static void mem_parity_error(unsigned char reason, struct pt_regs * regs)
+{
+ printk("Uhhuh. NMI received. Dazed and confused, but trying to continue\n");
+ printk("You probably have a hardware problem with your RAM chips\n");
+
+ /* Clear and disable the memory parity error line. */
+ clear_mem_error(reason);
+}
+
+static void io_check_error(unsigned char reason, struct pt_regs * regs)
+{
+ unsigned long i;
+
+ printk("NMI: IOCK error (debug interrupt?)\n");
+ show_registers(regs);
+
+ /* Re-enable the IOCK line, wait for a few seconds */
+ reason = (reason & 0xf) | 8;
+ outb(reason, 0x61);
+ i = 2000;
+ while (--i) udelay(1000);
+ reason &= ~8;
+ outb(reason, 0x61);
+}
+
+static void unknown_nmi_error(unsigned char reason, struct pt_regs * regs)
+{
+#ifdef CONFIG_MCA
+ /* Might actually be able to figure out what the guilty party
+ * is. */
+ if( MCA_bus ) {
+ mca_handle_nmi();
+ return;
+ }
+#endif
+ printk("Uhhuh. NMI received for unknown reason %02x on CPU %d.\n",
+ reason, smp_processor_id());
+ printk("Dazed and confused, but trying to continue\n");
+ printk("Do you have a strange power saving mode enabled?\n");
+}
+
+static spinlock_t nmi_print_lock = SPIN_LOCK_UNLOCKED;
+
+void die_nmi (struct pt_regs *regs, const char *msg)
+{
+ spin_lock(&nmi_print_lock);
+ /*
+ * We are in trouble anyway, lets at least try
+ * to get a message out.
+ */
+ bust_spinlocks(1);
+ printk(msg);
+ printk(" on CPU%d, eip %08lx, registers:\n",
+ smp_processor_id(), regs->eip);
+ show_registers(regs);
+ printk("console shuts up ...\n");
+ console_silent();
+ spin_unlock(&nmi_print_lock);
+ bust_spinlocks(0);
+ do_exit(SIGSEGV);
+}
+
+static void default_do_nmi(struct pt_regs * regs)
+{
+ unsigned char reason = 0;
+
+ /* Only the BSP gets external NMIs from the system. */
+ if (!smp_processor_id())
+ reason = get_nmi_reason();
+
+ if (!(reason & 0xc0)) {
+ if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 0, SIGINT)
+ == NOTIFY_STOP)
+ return;
+#ifdef CONFIG_X86_LOCAL_APIC
+ /*
+ * Ok, so this is none of the documented NMI sources,
+ * so it must be the NMI watchdog.
+ */
+ if (nmi_watchdog) {
+ nmi_watchdog_tick(regs);
+ return;
+ }
+#endif
+ unknown_nmi_error(reason, regs);
+ return;
+ }
+ if (notify_die(DIE_NMI, "nmi", regs, reason, 0, SIGINT) == NOTIFY_STOP)
+ return;
+ if (reason & 0x80)
+ mem_parity_error(reason, regs);
+ if (reason & 0x40)
+ io_check_error(reason, regs);
+ /*
+ * Reassert NMI in case it became active meanwhile
+ * as it's edge-triggered.
+ */
+ reassert_nmi();
+}
+
+static int dummy_nmi_callback(struct pt_regs * regs, int cpu)
+{
+ return 0;
+}
+
+static nmi_callback_t nmi_callback = dummy_nmi_callback;
+
+fastcall void do_nmi(struct pt_regs * regs, long error_code)
+{
+ int cpu;
+
+ nmi_enter();
+
+ cpu = smp_processor_id();
+ ++nmi_count(cpu);
+
+ if (!nmi_callback(regs, cpu))
+ default_do_nmi(regs);
+
+ nmi_exit();
+}
+
+void set_nmi_callback(nmi_callback_t callback)
+{
+ nmi_callback = callback;
+}
+
+void unset_nmi_callback(void)
+{
+ nmi_callback = dummy_nmi_callback;
+}
+
+#ifdef CONFIG_KPROBES
+fastcall int do_int3(struct pt_regs *regs, long error_code)
+{
+ if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP)
+ == NOTIFY_STOP)
+ return 1;
+ /* This is an interrupt gate, because kprobes wants interrupts
+ disabled. Normal trap handlers don't. */
+ restore_interrupts(regs);
+ do_trap(3, SIGTRAP, "int3", 1, regs, error_code, NULL);
+ return 0;
+}
+#endif
+
+/*
+ * Our handling of the processor debug registers is non-trivial.
+ * We do not clear them on entry and exit from the kernel. Therefore
+ * it is possible to get a watchpoint trap here from inside the kernel.
+ * However, the code in ./ptrace.c has ensured that the user can
+ * only set watchpoints on userspace addresses. Therefore the in-kernel
+ * watchpoint trap can only occur in code which is reading/writing
+ * from user space. Such code must not hold kernel locks (since it
+ * can equally take a page fault), therefore it is safe to call
+ * force_sig_info even though that claims and releases locks.
+ *
+ * Code in ./signal.c ensures that the debug control register
+ * is restored before we deliver any signal, and therefore that
+ * user code runs with the correct debug control register even though
+ * we clear it here.
+ *
+ * Being careful here means that we don't have to be as careful in a
+ * lot of more complicated places (task switching can be a bit lazy
+ * about restoring all the debug state, and ptrace doesn't have to
+ * find every occurrence of the TF bit that could be saved away even
+ * by user code)
+ */
+fastcall void do_debug(struct pt_regs * regs, long error_code)
+{
+ unsigned int condition;
+ struct task_struct *tsk = current;
+ siginfo_t info;
+
+ condition = HYPERVISOR_get_debugreg(6);
+
+ if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code,
+ SIGTRAP) == NOTIFY_STOP)
+ return;
+#if 0
+ /* It's safe to allow irq's after DR6 has been saved */
+ if (regs->eflags & X86_EFLAGS_IF)
+ local_irq_enable();
+#endif
+
+ /* Mask out spurious debug traps due to lazy DR7 setting */
+ if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) {
+ if (!tsk->thread.debugreg[7])
+ goto clear_dr7;
+ }
+
+ if (regs->eflags & VM_MASK)
+ goto debug_vm86;
+
+ /* Save debug status register where ptrace can see it */
+ tsk->thread.debugreg[6] = condition;
+
+ /* Mask out spurious TF errors due to lazy TF clearing */
+ if (condition & DR_STEP) {
+ /*
+ * The TF error should be masked out only if the current
+ * process is not traced and if the TRAP flag has been set
+ * previously by a tracing process (condition detected by
+ * the PT_DTRACE flag); remember that the i386 TRAP flag
+ * can be modified by the process itself in user mode,
+ * allowing programs to debug themselves without the ptrace()
+ * interface.
+ */
+ if ((regs->xcs & 2) == 0)
+ goto clear_TF_reenable;
+ if ((tsk->ptrace & (PT_DTRACE|PT_PTRACED)) == PT_DTRACE)
+ goto clear_TF;
+ }
+
+ /* Ok, finally something we can handle */
+ tsk->thread.trap_no = 1;
+ tsk->thread.error_code = error_code;
+ info.si_signo = SIGTRAP;
+ info.si_errno = 0;
+ info.si_code = TRAP_BRKPT;
+
+ /* If this is a kernel mode trap, save the user PC on entry to
+ * the kernel, that's what the debugger can make sense of.
+ */
+ info.si_addr = ((regs->xcs & 2) == 0) ? (void __user *)tsk->thread.eip
+ : (void __user *)regs->eip;
+ force_sig_info(SIGTRAP, &info, tsk);
+
+ /* Disable additional traps. They'll be re-enabled when
+ * the signal is delivered.
+ */
+clear_dr7:
+ HYPERVISOR_set_debugreg(7, 0);
+ return;
+
+debug_vm86:
+ handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, 1);
+ return;
+
+clear_TF_reenable:
+ set_tsk_thread_flag(tsk, TIF_SINGLESTEP);
+clear_TF:
+ regs->eflags &= ~TF_MASK;
+ return;
+}
+
+/*
+ * Note that we play around with the 'TS' bit in an attempt to get
+ * the correct behaviour even in the presence of the asynchronous
+ * IRQ13 behaviour
+ */
+void math_error(void __user *eip)
+{
+ struct task_struct * task;
+ siginfo_t info;
+ unsigned short cwd, swd;
+
+ /*
+ * Save the info for the exception handler and clear the error.
+ */
+ task = current;
+ save_init_fpu(task);
+ task->thread.trap_no = 16;
+ task->thread.error_code = 0;
+ info.si_signo = SIGFPE;
+ info.si_errno = 0;
+ info.si_code = __SI_FAULT;
+ info.si_addr = eip;
+ /*
+ * (~cwd & swd) will mask out exceptions that are not set to unmasked
+ * status. 0x3f is the exception bits in these regs, 0x200 is the
+ * C1 reg you need in case of a stack fault, 0x040 is the stack
+ * fault bit. We should only be taking one exception at a time,
+ * so if this combination doesn't produce any single exception,
+ * then we have a bad program that isn't syncronizing its FPU usage
+ * and it will suffer the consequences since we won't be able to
+ * fully reproduce the context of the exception
+ */
+ cwd = get_fpu_cwd(task);
+ swd = get_fpu_swd(task);
+ switch (((~cwd) & swd & 0x3f) | (swd & 0x240)) {
+ case 0x000:
+ default:
+ break;
+ case 0x001: /* Invalid Op */
+ case 0x041: /* Stack Fault */
+ case 0x241: /* Stack Fault | Direction */
+ info.si_code = FPE_FLTINV;
+ /* Should we clear the SF or let user space do it ???? */
+ break;
+ case 0x002: /* Denormalize */
+ case 0x010: /* Underflow */
+ info.si_code = FPE_FLTUND;
+ break;
+ case 0x004: /* Zero Divide */
+ info.si_code = FPE_FLTDIV;
+ break;
+ case 0x008: /* Overflow */
+ info.si_code = FPE_FLTOVF;
+ break;
+ case 0x020: /* Precision */
+ info.si_code = FPE_FLTRES;
+ break;
+ }
+ force_sig_info(SIGFPE, &info, task);
+}
+
+fastcall void do_coprocessor_error(struct pt_regs * regs, long error_code)
+{
+ ignore_fpu_irq = 1;
+ math_error((void __user *)regs->eip);
+}
+
+void simd_math_error(void __user *eip)
+{
+ struct task_struct * task;
+ siginfo_t info;
+ unsigned short mxcsr;
+
+ /*
+ * Save the info for the exception handler and clear the error.
+ */
+ task = current;
+ save_init_fpu(task);
+ task->thread.trap_no = 19;
+ task->thread.error_code = 0;
+ info.si_signo = SIGFPE;
+ info.si_errno = 0;
+ info.si_code = __SI_FAULT;
+ info.si_addr = eip;
+ /*
+ * The SIMD FPU exceptions are handled a little differently, as there
+ * is only a single status/control register. Thus, to determine which
+ * unmasked exception was caught we must mask the exception mask bits
+ * at 0x1f80, and then use these to mask the exception bits at 0x3f.
+ */
+ mxcsr = get_fpu_mxcsr(task);
+ switch (~((mxcsr & 0x1f80) >> 7) & (mxcsr & 0x3f)) {
+ case 0x000:
+ default:
+ break;
+ case 0x001: /* Invalid Op */
+ info.si_code = FPE_FLTINV;
+ break;
+ case 0x002: /* Denormalize */
+ case 0x010: /* Underflow */
+ info.si_code = FPE_FLTUND;
+ break;
+ case 0x004: /* Zero Divide */
+ info.si_code = FPE_FLTDIV;
+ break;
+ case 0x008: /* Overflow */
+ info.si_code = FPE_FLTOVF;
+ break;
+ case 0x020: /* Precision */
+ info.si_code = FPE_FLTRES;
+ break;
+ }
+ force_sig_info(SIGFPE, &info, task);
+}
+
+fastcall void do_simd_coprocessor_error(struct pt_regs * regs,
+ long error_code)
+{
+ if (cpu_has_xmm) {
+ /* Handle SIMD FPU exceptions on PIII+ processors. */
+ ignore_fpu_irq = 1;
+ simd_math_error((void __user *)regs->eip);
+ } else {
+ /*
+ * Handle strange cache flush from user space exception
+ * in all other cases. This is undocumented behaviour.
+ */
+ if (regs->eflags & VM_MASK) {
+ handle_vm86_fault((struct kernel_vm86_regs *)regs,
+ error_code);
+ return;
+ }
+ die_if_kernel("cache flush denied", regs, error_code);
+ current->thread.trap_no = 19;
+ current->thread.error_code = error_code;
+ force_sig(SIGSEGV, current);
+ }
+}
+
+/*
+ * 'math_state_restore()' saves the current math information in the
+ * old math state array, and gets the new ones from the current task
+ *
+ * Careful.. There are problems with IBM-designed IRQ13 behaviour.
+ * Don't touch unless you *really* know how it works.
+ *
+ * Must be called with kernel preemption disabled (in this case,
+ * local interrupts are disabled at the call-site in entry.S).
+ */
+asmlinkage void math_state_restore(struct pt_regs regs)
+{
+ struct thread_info *thread = current_thread_info();
+ struct task_struct *tsk = thread->task;
+
+ /*
+ * A trap in kernel mode can be ignored. It'll be the fast XOR or
+ * copying libraries, which will correctly save/restore state and
+ * reset the TS bit in CR0.
+ */
+ if ((regs.xcs & 2) == 0)
+ return;
+
+ clts(); /* Allow maths ops (or we recurse) */
+ if (!tsk->used_math)
+ init_fpu(tsk);
+ restore_fpu(tsk);
+ thread->status |= TS_USEDFPU; /* So we fnsave on switch_to() */
+}
+
+#ifndef CONFIG_MATH_EMULATION
+
+asmlinkage void math_emulate(long arg)
+{
+ printk("math-emulation not enabled and no coprocessor found.\n");
+ printk("killing %s.\n",current->comm);
+ force_sig(SIGFPE,current);
+ schedule();
+}
+
+#endif /* CONFIG_MATH_EMULATION */
+
+#ifdef CONFIG_X86_F00F_BUG
+void __init trap_init_f00f_bug(void)
+{
+ __set_fixmap(FIX_F00F_IDT, __pa(&idt_table), PAGE_KERNEL_RO);
+
+ /*
+ * Update the IDT descriptor and reload the IDT so that
+ * it uses the read-only mapped virtual address.
+ */
+ idt_descr.address = fix_to_virt(FIX_F00F_IDT);
+ __asm__ __volatile__("lidt %0" : : "m" (idt_descr));
+}
+#endif
+
+
+/* NB. All these are "trap gates" (i.e. events_mask isn't cleared). */
+static trap_info_t trap_table[] = {
+ { 0, 0, __KERNEL_CS, (unsigned long)divide_error },
+ { 1, 0, __KERNEL_CS, (unsigned long)debug },
+ { 3, 3, __KERNEL_CS, (unsigned long)int3 },
+ { 4, 3, __KERNEL_CS, (unsigned long)overflow },
+ { 5, 3, __KERNEL_CS, (unsigned long)bounds },
+ { 6, 0, __KERNEL_CS, (unsigned long)invalid_op },
+ { 7, 0, __KERNEL_CS, (unsigned long)device_not_available },
+ { 9, 0, __KERNEL_CS, (unsigned long)coprocessor_segment_overrun },
+ { 10, 0, __KERNEL_CS, (unsigned long)invalid_TSS },
+ { 11, 0, __KERNEL_CS, (unsigned long)segment_not_present },
+ { 12, 0, __KERNEL_CS, (unsigned long)stack_segment },
+ { 13, 0, __KERNEL_CS, (unsigned long)general_protection },
+ { 14, 0, __KERNEL_CS, (unsigned long)page_fault },
+ { 15, 0, __KERNEL_CS, (unsigned long)fixup_4gb_segment },
+ { 16, 0, __KERNEL_CS, (unsigned long)coprocessor_error },
+ { 17, 0, __KERNEL_CS, (unsigned long)alignment_check },
+#ifdef CONFIG_X86_MCE
+ { 18, 0, __KERNEL_CS, (unsigned long)machine_check },
+#endif
+ { 19, 0, __KERNEL_CS, (unsigned long)simd_coprocessor_error },
+ { SYSCALL_VECTOR, 3, __KERNEL_CS, (unsigned long)system_call },
+ { 0, 0, 0, 0 }
+};
+
+void __init trap_init(void)
+{
+ HYPERVISOR_set_trap_table(trap_table);
+ HYPERVISOR_set_fast_trap(SYSCALL_VECTOR);
+
+ /*
+ * default LDT is a single-entry callgate to lcall7 for iBCS
+ * and a callgate to lcall27 for Solaris/x86 binaries
+ */
+ clear_page(&default_ldt[0]);
+ make_lowmem_page_readonly(&default_ldt[0]);
+ xen_flush_page_update_queue();
+
+ /*
+ * Should be a barrier for any external CPU state.
+ */
+ cpu_init();
+}
--- /dev/null
- #if MMU_UPDATE_DEBUG > 0
- page_update_debug_t update_debug_queue[QUEUE_SIZE] = {{0}};
- #undef queue_l1_entry_update
- #undef queue_l2_entry_update
- #endif
- #if MMU_UPDATE_DEBUG > 3
- static void DEBUG_allow_pt_reads(void)
- {
- pte_t *pte;
- mmu_update_t update;
- int i;
- for ( i = idx-1; i >= 0; i-- )
- {
- pte = update_debug_queue[i].ptep;
- if ( pte == NULL ) continue;
- update_debug_queue[i].ptep = NULL;
- update.ptr = virt_to_machine(pte);
- update.val = update_debug_queue[i].pteval;
- HYPERVISOR_mmu_update(&update, 1, NULL);
- }
- }
- static void DEBUG_disallow_pt_read(unsigned long va)
- {
- pte_t *pte;
- pmd_t *pmd;
- pgd_t *pgd;
- unsigned long pteval;
- /*
- * We may fault because of an already outstanding update.
- * That's okay -- it'll get fixed up in the fault handler.
- */
- mmu_update_t update;
- pgd = pgd_offset_k(va);
- pmd = pmd_offset(pgd, va);
- pte = pte_offset_kernel(pmd, va); /* XXXcl */
- update.ptr = virt_to_machine(pte);
- pteval = *(unsigned long *)pte;
- update.val = pteval & ~_PAGE_PRESENT;
- HYPERVISOR_mmu_update(&update, 1, NULL);
- update_debug_queue[idx].ptep = pte;
- update_debug_queue[idx].pteval = pteval;
- }
- #endif
-
- #if MMU_UPDATE_DEBUG > 1
- #undef queue_pt_switch
- #undef queue_tlb_flush
- #undef queue_invlpg
- #undef queue_pgd_pin
- #undef queue_pgd_unpin
- #undef queue_pte_pin
- #undef queue_pte_unpin
- #undef queue_set_ldt
- #endif
-
-
+/******************************************************************************
+ * mm/hypervisor.c
+ *
+ * Update page tables via the hypervisor.
+ *
+ * Copyright (c) 2002-2004, K A Fraser
+ *
+ * This file may be distributed separately from the Linux kernel, or
+ * incorporated into other software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <linux/config.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/vmalloc.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm-xen/hypervisor.h>
+#include <asm-xen/multicall.h>
+
+/*
+ * This suffices to protect us if we ever move to SMP domains.
+ * Further, it protects us against interrupts. At the very least, this is
+ * required for the network driver which flushes the update queue before
+ * pushing new receive buffers.
+ */
+static spinlock_t update_lock = SPIN_LOCK_UNLOCKED;
+
+/* Linux 2.6 isn't using the traditional batched interface. */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
+#define QUEUE_SIZE 2048
+#define pte_offset_kernel pte_offset
+#else
+#define QUEUE_SIZE 128
+#endif
+
+static mmu_update_t update_queue[QUEUE_SIZE];
+unsigned int mmu_update_queue_idx = 0;
+#define idx mmu_update_queue_idx
+
- #if MMU_UPDATE_DEBUG > 1
- if (idx > 1)
- printk("Flushing %d entries from pt update queue\n", idx);
- #endif
- #if MMU_UPDATE_DEBUG > 3
- DEBUG_allow_pt_reads();
- #endif
+/*
+ * MULTICALL_flush_page_update_queue:
+ * This is a version of the flush which queues as part of a multicall.
+ */
+void MULTICALL_flush_page_update_queue(void)
+{
+ unsigned long flags;
+ unsigned int _idx;
+ spin_lock_irqsave(&update_lock, flags);
+ if ( (_idx = idx) != 0 )
+ {
- #if MMU_UPDATE_DEBUG > 1
- if (idx > 1)
- printk("Flushing %d entries from pt update queue\n", idx);
- #endif
- #if MMU_UPDATE_DEBUG > 3
- DEBUG_allow_pt_reads();
- #endif
+ idx = 0;
+ wmb(); /* Make sure index is cleared first to avoid double updates. */
+ queue_multicall3(__HYPERVISOR_mmu_update,
+ (unsigned long)update_queue,
+ (unsigned long)_idx,
+ (unsigned long)NULL);
+ }
+ spin_unlock_irqrestore(&update_lock, flags);
+}
+
+static inline void __flush_page_update_queue(void)
+{
+ unsigned int _idx = idx;
- #if MMU_UPDATE_DEBUG > 3
- DEBUG_disallow_pt_read((unsigned long)ptr);
- #endif
+ idx = 0;
+ wmb(); /* Make sure index is cleared first to avoid double updates. */
+ if ( unlikely(HYPERVISOR_mmu_update(update_queue, _idx, NULL) < 0) )
+ {
+ printk(KERN_ALERT "Failed to execute MMU updates.\n");
+ BUG();
+ }
+}
+
+void _flush_page_update_queue(void)
+{
+ unsigned long flags;
+ spin_lock_irqsave(&update_lock, flags);
+ if ( idx != 0 ) __flush_page_update_queue();
+ spin_unlock_irqrestore(&update_lock, flags);
+}
+
+static inline void increment_index(void)
+{
+ idx++;
+ if ( unlikely(idx == QUEUE_SIZE) ) __flush_page_update_queue();
+}
+
+static inline void increment_index_and_flush(void)
+{
+ idx++;
+ __flush_page_update_queue();
+}
+
+void queue_l1_entry_update(pte_t *ptr, unsigned long val)
+{
+ unsigned long flags;
+ spin_lock_irqsave(&update_lock, flags);
- #if MMU_UPDATE_DEBUG > 3
- DEBUG_disallow_pt_read((unsigned long)ptr);
- #endif
+ update_queue[idx].ptr = virt_to_machine(ptr);
+ update_queue[idx].val = val;
+ increment_index();
+ spin_unlock_irqrestore(&update_lock, flags);
+}
+
+void queue_l2_entry_update(pmd_t *ptr, unsigned long val)
+{
+ unsigned long flags;
+ spin_lock_irqsave(&update_lock, flags);
+ update_queue[idx].ptr = virt_to_machine(ptr);
+ update_queue[idx].val = val;
+ increment_index();
+ spin_unlock_irqrestore(&update_lock, flags);
+}
+
+void queue_pt_switch(unsigned long ptr)
+{
+ unsigned long flags;
+ spin_lock_irqsave(&update_lock, flags);
+ update_queue[idx].ptr = phys_to_machine(ptr);
+ update_queue[idx].ptr |= MMU_EXTENDED_COMMAND;
+ update_queue[idx].val = MMUEXT_NEW_BASEPTR;
+ increment_index();
+ spin_unlock_irqrestore(&update_lock, flags);
+}
+
+void queue_tlb_flush(void)
+{
+ unsigned long flags;
+ spin_lock_irqsave(&update_lock, flags);
+ update_queue[idx].ptr = MMU_EXTENDED_COMMAND;
+ update_queue[idx].val = MMUEXT_TLB_FLUSH;
+ increment_index();
+ spin_unlock_irqrestore(&update_lock, flags);
+}
+
+void queue_invlpg(unsigned long ptr)
+{
+ unsigned long flags;
+ spin_lock_irqsave(&update_lock, flags);
+ update_queue[idx].ptr = MMU_EXTENDED_COMMAND;
+ update_queue[idx].ptr |= ptr & PAGE_MASK;
+ update_queue[idx].val = MMUEXT_INVLPG;
+ increment_index();
+ spin_unlock_irqrestore(&update_lock, flags);
+}
+
+void queue_pgd_pin(unsigned long ptr)
+{
+ unsigned long flags;
+ spin_lock_irqsave(&update_lock, flags);
+ update_queue[idx].ptr = phys_to_machine(ptr);
+ update_queue[idx].ptr |= MMU_EXTENDED_COMMAND;
+ update_queue[idx].val = MMUEXT_PIN_L2_TABLE;
+ increment_index();
+ spin_unlock_irqrestore(&update_lock, flags);
+}
+
+void queue_pgd_unpin(unsigned long ptr)
+{
+ unsigned long flags;
+ spin_lock_irqsave(&update_lock, flags);
+ update_queue[idx].ptr = phys_to_machine(ptr);
+ update_queue[idx].ptr |= MMU_EXTENDED_COMMAND;
+ update_queue[idx].val = MMUEXT_UNPIN_TABLE;
+ increment_index();
+ spin_unlock_irqrestore(&update_lock, flags);
+}
+
+void queue_pte_pin(unsigned long ptr)
+{
+ unsigned long flags;
+ spin_lock_irqsave(&update_lock, flags);
+ update_queue[idx].ptr = phys_to_machine(ptr);
+ update_queue[idx].ptr |= MMU_EXTENDED_COMMAND;
+ update_queue[idx].val = MMUEXT_PIN_L1_TABLE;
+ increment_index();
+ spin_unlock_irqrestore(&update_lock, flags);
+}
+
+void queue_pte_unpin(unsigned long ptr)
+{
+ unsigned long flags;
+ spin_lock_irqsave(&update_lock, flags);
+ update_queue[idx].ptr = phys_to_machine(ptr);
+ update_queue[idx].ptr |= MMU_EXTENDED_COMMAND;
+ update_queue[idx].val = MMUEXT_UNPIN_TABLE;
+ increment_index();
+ spin_unlock_irqrestore(&update_lock, flags);
+}
+
+void queue_set_ldt(unsigned long ptr, unsigned long len)
+{
+ unsigned long flags;
+ spin_lock_irqsave(&update_lock, flags);
+ update_queue[idx].ptr = MMU_EXTENDED_COMMAND | ptr;
+ update_queue[idx].val = MMUEXT_SET_LDT | (len << MMUEXT_CMD_SHIFT);
+ increment_index();
+ spin_unlock_irqrestore(&update_lock, flags);
+}
+
+void queue_machphys_update(unsigned long mfn, unsigned long pfn)
+{
+ unsigned long flags;
+ spin_lock_irqsave(&update_lock, flags);
+ update_queue[idx].ptr = (mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE;
+ update_queue[idx].val = pfn;
+ increment_index();
+ spin_unlock_irqrestore(&update_lock, flags);
+}
+
+/* queue and flush versions of the above */
+void xen_l1_entry_update(pte_t *ptr, unsigned long val)
+{
+ unsigned long flags;
+ spin_lock_irqsave(&update_lock, flags);
+ update_queue[idx].ptr = virt_to_machine(ptr);
+ update_queue[idx].val = val;
+ increment_index_and_flush();
+ spin_unlock_irqrestore(&update_lock, flags);
+}
+
+void xen_l2_entry_update(pmd_t *ptr, unsigned long val)
+{
+ unsigned long flags;
+ spin_lock_irqsave(&update_lock, flags);
+ update_queue[idx].ptr = virt_to_machine(ptr);
+ update_queue[idx].val = val;
+ increment_index_and_flush();
+ spin_unlock_irqrestore(&update_lock, flags);
+}
+
+void xen_pt_switch(unsigned long ptr)
+{
+ unsigned long flags;
+ spin_lock_irqsave(&update_lock, flags);
+ update_queue[idx].ptr = phys_to_machine(ptr);
+ update_queue[idx].ptr |= MMU_EXTENDED_COMMAND;
+ update_queue[idx].val = MMUEXT_NEW_BASEPTR;
+ increment_index_and_flush();
+ spin_unlock_irqrestore(&update_lock, flags);
+}
+
+void xen_tlb_flush(void)
+{
+ unsigned long flags;
+ spin_lock_irqsave(&update_lock, flags);
+ update_queue[idx].ptr = MMU_EXTENDED_COMMAND;
+ update_queue[idx].val = MMUEXT_TLB_FLUSH;
+ increment_index_and_flush();
+ spin_unlock_irqrestore(&update_lock, flags);
+}
+
+void xen_invlpg(unsigned long ptr)
+{
+ unsigned long flags;
+ spin_lock_irqsave(&update_lock, flags);
+ update_queue[idx].ptr = MMU_EXTENDED_COMMAND;
+ update_queue[idx].ptr |= ptr & PAGE_MASK;
+ update_queue[idx].val = MMUEXT_INVLPG;
+ increment_index_and_flush();
+ spin_unlock_irqrestore(&update_lock, flags);
+}
+
+void xen_pgd_pin(unsigned long ptr)
+{
+ unsigned long flags;
+ spin_lock_irqsave(&update_lock, flags);
+ update_queue[idx].ptr = phys_to_machine(ptr);
+ update_queue[idx].ptr |= MMU_EXTENDED_COMMAND;
+ update_queue[idx].val = MMUEXT_PIN_L2_TABLE;
+ increment_index_and_flush();
+ spin_unlock_irqrestore(&update_lock, flags);
+}
+
+void xen_pgd_unpin(unsigned long ptr)
+{
+ unsigned long flags;
+ spin_lock_irqsave(&update_lock, flags);
+ update_queue[idx].ptr = phys_to_machine(ptr);
+ update_queue[idx].ptr |= MMU_EXTENDED_COMMAND;
+ update_queue[idx].val = MMUEXT_UNPIN_TABLE;
+ increment_index_and_flush();
+ spin_unlock_irqrestore(&update_lock, flags);
+}
+
+void xen_pte_pin(unsigned long ptr)
+{
+ unsigned long flags;
+ spin_lock_irqsave(&update_lock, flags);
+ update_queue[idx].ptr = phys_to_machine(ptr);
+ update_queue[idx].ptr |= MMU_EXTENDED_COMMAND;
+ update_queue[idx].val = MMUEXT_PIN_L1_TABLE;
+ increment_index_and_flush();
+ spin_unlock_irqrestore(&update_lock, flags);
+}
+
+void xen_pte_unpin(unsigned long ptr)
+{
+ unsigned long flags;
+ spin_lock_irqsave(&update_lock, flags);
+ update_queue[idx].ptr = phys_to_machine(ptr);
+ update_queue[idx].ptr |= MMU_EXTENDED_COMMAND;
+ update_queue[idx].val = MMUEXT_UNPIN_TABLE;
+ increment_index_and_flush();
+ spin_unlock_irqrestore(&update_lock, flags);
+}
+
+void xen_set_ldt(unsigned long ptr, unsigned long len)
+{
+ unsigned long flags;
+ spin_lock_irqsave(&update_lock, flags);
+ update_queue[idx].ptr = MMU_EXTENDED_COMMAND | ptr;
+ update_queue[idx].val = MMUEXT_SET_LDT | (len << MMUEXT_CMD_SHIFT);
+ increment_index_and_flush();
+ spin_unlock_irqrestore(&update_lock, flags);
+}
+
+void xen_machphys_update(unsigned long mfn, unsigned long pfn)
+{
+ unsigned long flags;
+ spin_lock_irqsave(&update_lock, flags);
+ update_queue[idx].ptr = (mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE;
+ update_queue[idx].val = pfn;
+ increment_index_and_flush();
+ spin_unlock_irqrestore(&update_lock, flags);
+}
+
+#ifdef CONFIG_XEN_PHYSDEV_ACCESS
+
+unsigned long allocate_empty_lowmem_region(unsigned long pages)
+{
+ pgd_t *pgd;
+ pmd_t *pmd;
+ pte_t *pte;
+ unsigned long *pfn_array;
+ unsigned long vstart;
+ unsigned long i;
+ int ret;
+ unsigned int order = get_order(pages*PAGE_SIZE);
+
+ vstart = __get_free_pages(GFP_KERNEL, order);
+ if ( vstart == 0 )
+ return 0UL;
+
+ scrub_pages(vstart, 1 << order);
+
+ pfn_array = vmalloc((1<<order) * sizeof(*pfn_array));
+ if ( pfn_array == NULL )
+ BUG();
+
+ for ( i = 0; i < (1<<order); i++ )
+ {
+ pgd = pgd_offset_k( (vstart + (i*PAGE_SIZE)));
+ pmd = pmd_offset(pgd, (vstart + (i*PAGE_SIZE)));
+ pte = pte_offset_kernel(pmd, (vstart + (i*PAGE_SIZE)));
+ pfn_array[i] = pte->pte_low >> PAGE_SHIFT;
+ queue_l1_entry_update(pte, 0);
+ phys_to_machine_mapping[__pa(vstart)>>PAGE_SHIFT] = INVALID_P2M_ENTRY;
+ }
+
+ /* Flush updates through and flush the TLB. */
+ xen_tlb_flush();
+
+ ret = HYPERVISOR_dom_mem_op(MEMOP_decrease_reservation,
+ pfn_array, 1<<order, 0);
+ if ( unlikely(ret != (1<<order)) )
+ {
+ printk(KERN_WARNING "Unable to reduce memory reservation (%d)\n", ret);
+ BUG();
+ }
+
+ vfree(pfn_array);
+
+ return vstart;
+}
+
+void deallocate_lowmem_region(unsigned long vstart, unsigned long pages)
+{
+ pgd_t *pgd;
+ pmd_t *pmd;
+ pte_t *pte;
+ unsigned long *pfn_array;
+ unsigned long i;
+ int ret;
+ unsigned int order = get_order(pages*PAGE_SIZE);
+
+ pfn_array = vmalloc((1<<order) * sizeof(*pfn_array));
+ if ( pfn_array == NULL )
+ BUG();
+
+ ret = HYPERVISOR_dom_mem_op(MEMOP_increase_reservation,
+ pfn_array, 1<<order, 0);
+ if ( unlikely(ret != (1<<order)) )
+ {
+ printk(KERN_WARNING "Unable to increase memory reservation (%d)\n",
+ ret);
+ BUG();
+ }
+
+ for ( i = 0; i < (1<<order); i++ )
+ {
+ pgd = pgd_offset_k( (vstart + (i*PAGE_SIZE)));
+ pmd = pmd_offset(pgd, (vstart + (i*PAGE_SIZE)));
+ pte = pte_offset_kernel(pmd, (vstart + (i*PAGE_SIZE)));
+ queue_l1_entry_update(pte, (pfn_array[i]<<PAGE_SHIFT)|__PAGE_KERNEL);
+ queue_machphys_update(pfn_array[i], __pa(vstart)>>PAGE_SHIFT);
+ phys_to_machine_mapping[__pa(vstart)>>PAGE_SHIFT] = pfn_array[i];
+ }
+
+ flush_page_update_queue();
+
+ vfree(pfn_array);
+
+ free_pages(vstart, order);
+}
+
+#endif /* CONFIG_XEN_PHYSDEV_ACCESS */
--- /dev/null
- unsigned int max_dma, high, low;
+/*
+ * linux/arch/i386/mm/init.c
+ *
+ * Copyright (C) 1995 Linus Torvalds
+ *
+ * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/ptrace.h>
+#include <linux/mman.h>
+#include <linux/mm.h>
+#include <linux/hugetlb.h>
+#include <linux/swap.h>
+#include <linux/smp.h>
+#include <linux/init.h>
+#include <linux/highmem.h>
+#include <linux/pagemap.h>
+#include <linux/bootmem.h>
+#include <linux/slab.h>
+#include <linux/proc_fs.h>
+#include <linux/efi.h>
+
+#include <asm/processor.h>
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <asm/pgtable.h>
+#include <asm/dma.h>
+#include <asm/fixmap.h>
+#include <asm/e820.h>
+#include <asm/apic.h>
+#include <asm/tlb.h>
+#include <asm/tlbflush.h>
+#include <asm/sections.h>
+#include <asm-xen/hypervisor.h>
+
+unsigned int __VMALLOC_RESERVE = 128 << 20;
+
+DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
+unsigned long highstart_pfn, highend_pfn;
+
+static int noinline do_test_wp_bit(void);
+
+/*
+ * Creates a middle page table and puts a pointer to it in the
+ * given global directory entry. This only returns the gd entry
+ * in non-PAE compilation mode, since the middle layer is folded.
+ */
+static pmd_t * __init one_md_table_init(pgd_t *pgd)
+{
+ pmd_t *pmd_table;
+
+#ifdef CONFIG_X86_PAE
+ pmd_table = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE);
+ set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT));
+ if (pmd_table != pmd_offset(pgd, 0))
+ BUG();
+#else
+ pmd_table = pmd_offset(pgd, 0);
+#endif
+
+ return pmd_table;
+}
+
+/*
+ * Create a page table and place a pointer to it in a middle page
+ * directory entry.
+ */
+static pte_t * __init one_page_table_init(pmd_t *pmd)
+{
+ if (pmd_none(*pmd)) {
+ pte_t *page_table = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE);
+ set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE));
+ if (page_table != pte_offset_kernel(pmd, 0))
+ BUG();
+
+ return page_table;
+ }
+
+ return pte_offset_kernel(pmd, 0);
+}
+
+/*
+ * This function initializes a certain range of kernel virtual memory
+ * with new bootmem page tables, everywhere page tables are missing in
+ * the given range.
+ */
+
+/*
+ * NOTE: The pagetables are allocated contiguous on the physical space
+ * so we can cache the place of the first one and move around without
+ * checking the pgd every time.
+ */
+static void __init page_table_range_init (unsigned long start, unsigned long end, pgd_t *pgd_base)
+{
+ pgd_t *pgd;
+ pmd_t *pmd;
+ int pgd_idx, pmd_idx;
+ unsigned long vaddr;
+
+ vaddr = start;
+ pgd_idx = pgd_index(vaddr);
+ pmd_idx = pmd_index(vaddr);
+ pgd = pgd_base + pgd_idx;
+
+ for ( ; (pgd_idx < PTRS_PER_PGD_NO_HV) && (vaddr != end); pgd++, pgd_idx++) {
+ if (pgd_none(*pgd))
+ one_md_table_init(pgd);
+
+ pmd = pmd_offset(pgd, vaddr);
+ for (; (pmd_idx < PTRS_PER_PMD) && (vaddr != end); pmd++, pmd_idx++) {
+ if (pmd_none(*pmd))
+ one_page_table_init(pmd);
+
+ vaddr += PMD_SIZE;
+ }
+ pmd_idx = 0;
+ }
+}
+
+void __init protect_page(pgd_t *pgd, void *page, int mode)
+{
+ pmd_t *pmd;
+ pte_t *pte;
+ unsigned long addr;
+
+ addr = (unsigned long)page;
+ pgd += pgd_index(addr);
+ pmd = pmd_offset(pgd, addr);
+ pte = pte_offset_kernel(pmd, addr);
+ if (!pte_present(*pte))
+ return;
+ queue_l1_entry_update(pte, mode ? pte_val_ma(*pte) & ~_PAGE_RW :
+ pte_val_ma(*pte) | _PAGE_RW);
+}
+
+void __init protect_pagetable(pgd_t *dpgd, pgd_t *spgd, int mode)
+{
+ pmd_t *pmd;
+ pte_t *pte;
+ int pgd_idx, pmd_idx;
+
+ protect_page(dpgd, spgd, mode);
+
+ for (pgd_idx = 0; pgd_idx < PTRS_PER_PGD_NO_HV; spgd++, pgd_idx++) {
+ pmd = pmd_offset(spgd, 0);
+ if (pmd_none(*pmd))
+ continue;
+ for (pmd_idx = 0; pmd_idx < PTRS_PER_PMD; pmd++, pmd_idx++) {
+ pte = pte_offset_kernel(pmd, 0);
+ protect_page(dpgd, pte, mode);
+ }
+ }
+}
+
+static inline int is_kernel_text(unsigned long addr)
+{
+ if (addr >= (unsigned long)_stext && addr <= (unsigned long)__init_end)
+ return 1;
+ return 0;
+}
+
+/*
+ * This maps the physical memory to kernel virtual address space, a total
+ * of max_low_pfn pages, by creating page tables starting from address
+ * PAGE_OFFSET.
+ */
+static void __init kernel_physical_mapping_init(pgd_t *pgd_base)
+{
+ unsigned long pfn;
+ pgd_t *pgd;
+ pmd_t *pmd;
+ pte_t *pte;
+ int pgd_idx, pmd_idx, pte_ofs;
+
+ pgd_idx = pgd_index(PAGE_OFFSET);
+ pgd = pgd_base + pgd_idx;
+ pfn = 0;
+ pmd_idx = pmd_index(PAGE_OFFSET);
+ pte_ofs = pte_index(PAGE_OFFSET);
+
+ for (; pgd_idx < PTRS_PER_PGD_NO_HV; pgd++, pgd_idx++) {
+ pmd = one_md_table_init(pgd);
+ if (pfn >= max_low_pfn)
+ continue;
+ pmd += pmd_idx;
+ for (; pmd_idx < PTRS_PER_PMD && pfn < max_low_pfn; pmd++, pmd_idx++) {
+ unsigned int address = pfn * PAGE_SIZE + PAGE_OFFSET;
+
+ /* Map with big pages if possible, otherwise create normal page tables. */
+ if (cpu_has_pse) {
+ unsigned int address2 = (pfn + PTRS_PER_PTE - 1) * PAGE_SIZE + PAGE_OFFSET + PAGE_SIZE-1;
+
+ if (is_kernel_text(address) || is_kernel_text(address2))
+ set_pmd(pmd, pfn_pmd(pfn, PAGE_KERNEL_LARGE_EXEC));
+ else
+ set_pmd(pmd, pfn_pmd(pfn, PAGE_KERNEL_LARGE));
+ pfn += PTRS_PER_PTE;
+ } else {
+ pte = one_page_table_init(pmd);
+
+ pte += pte_ofs;
+ for (; pte_ofs < PTRS_PER_PTE && pfn < max_low_pfn; pte++, pfn++, pte_ofs++) {
+ if (is_kernel_text(address))
+ set_pte(pte, pfn_pte(pfn, PAGE_KERNEL_EXEC));
+ else
+ set_pte(pte, pfn_pte(pfn, PAGE_KERNEL));
+ }
+ pte_ofs = 0;
+ }
+ flush_page_update_queue();
+ }
+ pmd_idx = 0;
+ }
+}
+
+static inline int page_kills_ppro(unsigned long pagenr)
+{
+ if (pagenr >= 0x70000 && pagenr <= 0x7003F)
+ return 1;
+ return 0;
+}
+
+extern int is_available_memory(efi_memory_desc_t *);
+
+static inline int page_is_ram(unsigned long pagenr)
+{
+ int i;
+ unsigned long addr, end;
+
+ if (efi_enabled) {
+ efi_memory_desc_t *md;
+
+ for (i = 0; i < memmap.nr_map; i++) {
+ md = &memmap.map[i];
+ if (!is_available_memory(md))
+ continue;
+ addr = (md->phys_addr+PAGE_SIZE-1) >> PAGE_SHIFT;
+ end = (md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT)) >> PAGE_SHIFT;
+
+ if ((pagenr >= addr) && (pagenr < end))
+ return 1;
+ }
+ return 0;
+ }
+
+ for (i = 0; i < e820.nr_map; i++) {
+
+ if (e820.map[i].type != E820_RAM) /* not usable memory */
+ continue;
+ /*
+ * !!!FIXME!!! Some BIOSen report areas as RAM that
+ * are not. Notably the 640->1Mb area. We need a sanity
+ * check here.
+ */
+ addr = (e820.map[i].addr+PAGE_SIZE-1) >> PAGE_SHIFT;
+ end = (e820.map[i].addr+e820.map[i].size) >> PAGE_SHIFT;
+ if ((pagenr >= addr) && (pagenr < end))
+ return 1;
+ }
+ return 0;
+}
+
+#ifdef CONFIG_HIGHMEM
+pte_t *kmap_pte;
+pgprot_t kmap_prot;
+
+EXPORT_SYMBOL(kmap_prot);
+EXPORT_SYMBOL(kmap_pte);
+
+#define kmap_get_fixmap_pte(vaddr) \
+ pte_offset_kernel(pmd_offset(pgd_offset_k(vaddr), (vaddr)), (vaddr))
+
+void __init kmap_init(void)
+{
+ unsigned long kmap_vstart;
+
+ /* cache the first kmap pte */
+ kmap_vstart = __fix_to_virt(FIX_KMAP_BEGIN);
+ kmap_pte = kmap_get_fixmap_pte(kmap_vstart);
+
+ kmap_prot = PAGE_KERNEL;
+}
+
+void __init permanent_kmaps_init(pgd_t *pgd_base)
+{
+ pgd_t *pgd;
+ pmd_t *pmd;
+ pte_t *pte;
+ unsigned long vaddr;
+
+ vaddr = PKMAP_BASE;
+ page_table_range_init(vaddr, vaddr + PAGE_SIZE*LAST_PKMAP, pgd_base);
+
+ pgd = swapper_pg_dir + pgd_index(vaddr);
+ pmd = pmd_offset(pgd, vaddr);
+ pte = pte_offset_kernel(pmd, vaddr);
+ pkmap_page_table = pte;
+}
+
+void __init one_highpage_init(struct page *page, int pfn, int bad_ppro)
+{
+ if (page_is_ram(pfn) && !(bad_ppro && page_kills_ppro(pfn))) {
+ ClearPageReserved(page);
+ set_bit(PG_highmem, &page->flags);
+ set_page_count(page, 1);
+ __free_page(page);
+ totalhigh_pages++;
+ } else
+ SetPageReserved(page);
+}
+
+#ifndef CONFIG_DISCONTIGMEM
+void __init set_highmem_pages_init(int bad_ppro)
+{
+ int pfn;
+ for (pfn = highstart_pfn; pfn < highend_pfn; pfn++)
+ one_highpage_init(pfn_to_page(pfn), pfn, bad_ppro);
+ totalram_pages += totalhigh_pages;
+}
+#else
+extern void set_highmem_pages_init(int);
+#endif /* !CONFIG_DISCONTIGMEM */
+
+#else
+#define kmap_init() do { } while (0)
+#define permanent_kmaps_init(pgd_base) do { } while (0)
+#define set_highmem_pages_init(bad_ppro) do { } while (0)
+#endif /* CONFIG_HIGHMEM */
+
+unsigned long long __PAGE_KERNEL = _PAGE_KERNEL;
+unsigned long long __PAGE_KERNEL_EXEC = _PAGE_KERNEL_EXEC;
+
+#ifndef CONFIG_DISCONTIGMEM
+#define remap_numa_kva() do {} while (0)
+#else
+extern void __init remap_numa_kva(void);
+#endif
+
+static void __init pagetable_init (void)
+{
+ unsigned long vaddr;
+ pgd_t *pgd_base = swapper_pg_dir;
+
+#ifdef CONFIG_X86_PAE
+ int i;
+ /* Init entries of the first-level page table to the zero page */
+ for (i = 0; i < PTRS_PER_PGD; i++)
+ set_pgd(pgd_base + i, __pgd(__pa(empty_zero_page) | _PAGE_PRESENT));
+#endif
+
+ /* Enable PSE if available */
+ if (cpu_has_pse) {
+ set_in_cr4(X86_CR4_PSE);
+ }
+
+ /* Enable PGE if available */
+ if (cpu_has_pge) {
+ set_in_cr4(X86_CR4_PGE);
+ __PAGE_KERNEL |= _PAGE_GLOBAL;
+ __PAGE_KERNEL_EXEC |= _PAGE_GLOBAL;
+ }
+
+ kernel_physical_mapping_init(pgd_base);
+ remap_numa_kva();
+
+ /*
+ * Fixed mappings, only the page table structure has to be
+ * created - mappings will be set by set_fixmap():
+ */
+ vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK;
+ page_table_range_init(vaddr, 0, pgd_base);
+
+ permanent_kmaps_init(pgd_base);
+
+#ifdef CONFIG_X86_PAE
+ /*
+ * Add low memory identity-mappings - SMP needs it when
+ * starting up on an AP from real-mode. In the non-PAE
+ * case we already have these mappings through head.S.
+ * All user-space mappings are explicitly cleared after
+ * SMP startup.
+ */
+ pgd_base[0] = pgd_base[USER_PTRS_PER_PGD];
+#endif
+}
+
+#if defined(CONFIG_PM_DISK) || defined(CONFIG_SOFTWARE_SUSPEND)
+/*
+ * Swap suspend & friends need this for resume because things like the intel-agp
+ * driver might have split up a kernel 4MB mapping.
+ */
+char __nosavedata swsusp_pg_dir[PAGE_SIZE]
+ __attribute__ ((aligned (PAGE_SIZE)));
+
+static inline void save_pg_dir(void)
+{
+ memcpy(swsusp_pg_dir, swapper_pg_dir, PAGE_SIZE);
+}
+#else
+static inline void save_pg_dir(void)
+{
+}
+#endif
+
+void zap_low_mappings (void)
+{
+ int i;
+
+ save_pg_dir();
+
+ /*
+ * Zap initial low-memory mappings.
+ *
+ * Note that "pgd_clear()" doesn't do it for
+ * us, because pgd_clear() is a no-op on i386.
+ */
+ for (i = 0; i < USER_PTRS_PER_PGD; i++)
+#ifdef CONFIG_X86_PAE
+ set_pgd(swapper_pg_dir+i, __pgd(1 + __pa(empty_zero_page)));
+#else
+ set_pgd(swapper_pg_dir+i, __pgd(0));
+#endif
+ flush_tlb_all();
+}
+
+#ifndef CONFIG_DISCONTIGMEM
+void __init zone_sizes_init(void)
+{
+ unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0};
- max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
++ unsigned int /*max_dma,*/ high, low;
+
- if (low < max_dma)
++ /*
++ * XEN: Our notion of "DMA memory" is fake when running over Xen.
++ * We simply put all RAM in the DMA zone so that those drivers which
++ * needlessly specify GFP_DMA do not get starved of RAM unnecessarily.
++ * Those drivers that *do* require lowmem are screwed anyway when
++ * running over Xen!
++ */
++ /*max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;*/
+ low = max_low_pfn;
+ high = highend_pfn;
+
- else {
- zones_size[ZONE_DMA] = max_dma;
- zones_size[ZONE_NORMAL] = low - max_dma;
++ /*if (low < max_dma)*/
+ zones_size[ZONE_DMA] = low;
- #ifdef CONFIG_XEN_PRIVILEGED_GUEST
++ /*else*/ {
++ /*zones_size[ZONE_DMA] = max_dma;*/
++ /*zones_size[ZONE_NORMAL] = low - max_dma;*/
+#ifdef CONFIG_HIGHMEM
+ zones_size[ZONE_HIGHMEM] = high - low;
+#endif
+ }
+ free_area_init(zones_size);
+}
+#else
+extern void zone_sizes_init(void);
+#endif /* !CONFIG_DISCONTIGMEM */
+
+static int disable_nx __initdata = 0;
+u64 __supported_pte_mask = ~_PAGE_NX;
+
+/*
+ * noexec = on|off
+ *
+ * Control non executable mappings.
+ *
+ * on Enable
+ * off Disable
+ */
+static int __init noexec_setup(char *str)
+{
+ if (!strncmp(str, "on",2) && cpu_has_nx) {
+ __supported_pte_mask |= _PAGE_NX;
+ disable_nx = 0;
+ } else if (!strncmp(str,"off",3)) {
+ disable_nx = 1;
+ __supported_pte_mask &= ~_PAGE_NX;
+ }
+ return 1;
+}
+
+__setup("noexec=", noexec_setup);
+
+int nx_enabled = 0;
+#ifdef CONFIG_X86_PAE
+
+static void __init set_nx(void)
+{
+ unsigned int v[4], l, h;
+
+ if (cpu_has_pae && (cpuid_eax(0x80000000) > 0x80000001)) {
+ cpuid(0x80000001, &v[0], &v[1], &v[2], &v[3]);
+ if ((v[3] & (1 << 20)) && !disable_nx) {
+ rdmsr(MSR_EFER, l, h);
+ l |= EFER_NX;
+ wrmsr(MSR_EFER, l, h);
+ nx_enabled = 1;
+ __supported_pte_mask |= _PAGE_NX;
+ }
+ }
+}
+
+/*
+ * Enables/disables executability of a given kernel page and
+ * returns the previous setting.
+ */
+int __init set_kernel_exec(unsigned long vaddr, int enable)
+{
+ pte_t *pte;
+ int ret = 1;
+
+ if (!nx_enabled)
+ goto out;
+
+ pte = lookup_address(vaddr);
+ BUG_ON(!pte);
+
+ if (!pte_exec_kernel(*pte))
+ ret = 0;
+
+ if (enable)
+ pte->pte_high &= ~(1 << (_PAGE_BIT_NX - 32));
+ else
+ pte->pte_high |= 1 << (_PAGE_BIT_NX - 32);
+ __flush_tlb_all();
+out:
+ return ret;
+}
+
+#endif
+
+/*
+ * paging_init() sets up the page tables - note that the first 8MB are
+ * already mapped by head.S.
+ *
+ * This routines also unmaps the page at virtual kernel address 0, so
+ * that we can trap those pesky NULL-reference errors in the kernel.
+ */
+void __init paging_init(void)
+{
+ pgd_t *old_pgd = (pgd_t *)xen_start_info.pt_base;
+ pgd_t *new_pgd = swapper_pg_dir;
+#ifdef CONFIG_XEN_PHYSDEV_ACCESS
+ int i;
+#endif
+
+#ifdef CONFIG_X86_PAE
+ set_nx();
+ if (nx_enabled)
+ printk("NX (Execute Disable) protection: active\n");
+#endif
+
+ pagetable_init();
+
+ /*
+ * Write-protect both page tables within both page tables.
+ * That's three ops, as the old p.t. is already protected
+ * within the old p.t. Then pin the new table, switch tables,
+ * and unprotect the old table.
+ */
+ protect_pagetable(new_pgd, old_pgd, PROT_ON);
+ protect_pagetable(new_pgd, new_pgd, PROT_ON);
+ protect_pagetable(old_pgd, new_pgd, PROT_ON);
+ queue_pgd_pin(__pa(new_pgd));
+ load_cr3(new_pgd);
+ queue_pgd_unpin(__pa(old_pgd));
+ __flush_tlb_all(); /* implicit flush */
+ protect_pagetable(new_pgd, old_pgd, PROT_OFF);
+ flush_page_update_queue();
+
+ /* Completely detached from old tables, so free them. */
+ free_bootmem(__pa(old_pgd), xen_start_info.nr_pt_frames << PAGE_SHIFT);
+
+#ifdef CONFIG_X86_PAE
+ /*
+ * We will bail out later - printk doesn't work right now so
+ * the user would just see a hanging kernel.
+ */
+ if (cpu_has_pae)
+ set_in_cr4(X86_CR4_PAE);
+#endif
+ __flush_tlb_all();
+
+ kmap_init();
+ zone_sizes_init();
+
+ /* Switch to the real shared_info page, and clear the dummy page. */
+ flush_page_update_queue();
+ set_fixmap_ma(FIX_SHARED_INFO, xen_start_info.shared_info);
+ HYPERVISOR_shared_info = (shared_info_t *)fix_to_virt(FIX_SHARED_INFO);
+ memset(empty_zero_page, 0, sizeof(empty_zero_page));
+
++#ifdef CONFIG_XEN_PHYSDEV_ACCESS
+ /* Setup mapping of lower 1st MB */
+ for (i = 0; i < NR_FIX_ISAMAPS; i++)
+ if (xen_start_info.flags & SIF_PRIVILEGED)
+ set_fixmap_ma(FIX_ISAMAP_BEGIN - i, i * PAGE_SIZE);
+ else
+ set_fixmap_ma_ro(FIX_ISAMAP_BEGIN - i,
+ virt_to_machine(empty_zero_page));
+#endif
+}
+
+/*
+ * Test if the WP bit works in supervisor mode. It isn't supported on 386's
+ * and also on some strange 486's (NexGen etc.). All 586+'s are OK. This
+ * used to involve black magic jumps to work around some nasty CPU bugs,
+ * but fortunately the switch to using exceptions got rid of all that.
+ */
+
+void __init test_wp_bit(void)
+{
+ printk("Checking if this processor honours the WP bit even in supervisor mode... ");
+
+ /* Any page-aligned address will do, the test is non-destructive */
+ __set_fixmap(FIX_WP_TEST, __pa(&swapper_pg_dir), PAGE_READONLY);
+ boot_cpu_data.wp_works_ok = do_test_wp_bit();
+ clear_fixmap(FIX_WP_TEST);
+
+ if (!boot_cpu_data.wp_works_ok) {
+ printk("No.\n");
+#ifdef CONFIG_X86_WP_WORKS_OK
+ panic("This kernel doesn't support CPU's with broken WP. Recompile it for a 386!");
+#endif
+ } else {
+ printk("Ok.\n");
+ }
+}
+
+#ifndef CONFIG_DISCONTIGMEM
+static void __init set_max_mapnr_init(void)
+{
+#ifdef CONFIG_HIGHMEM
+ highmem_start_page = pfn_to_page(highstart_pfn);
+ max_mapnr = num_physpages = highend_pfn;
+#else
+ max_mapnr = num_physpages = max_low_pfn;
+#endif
+}
+#define __free_all_bootmem() free_all_bootmem()
+#else
+#define __free_all_bootmem() free_all_bootmem_node(NODE_DATA(0))
+extern void set_max_mapnr_init(void);
+#endif /* !CONFIG_DISCONTIGMEM */
+
+static struct kcore_list kcore_mem, kcore_vmalloc;
+
+void __init mem_init(void)
+{
+ extern int ppro_with_ram_bug(void);
+ int codesize, reservedpages, datasize, initsize;
+ int tmp;
+ int bad_ppro;
+
+#ifndef CONFIG_DISCONTIGMEM
+ if (!mem_map)
+ BUG();
+#endif
+
+ bad_ppro = ppro_with_ram_bug();
+
+#ifdef CONFIG_HIGHMEM
+ /* check that fixmap and pkmap do not overlap */
+ if (PKMAP_BASE+LAST_PKMAP*PAGE_SIZE >= FIXADDR_START) {
+ printk(KERN_ERR "fixmap and kmap areas overlap - this will crash\n");
+ printk(KERN_ERR "pkstart: %lxh pkend: %lxh fixstart %lxh\n",
+ PKMAP_BASE, PKMAP_BASE+LAST_PKMAP*PAGE_SIZE, FIXADDR_START);
+ BUG();
+ }
+#endif
+
+ set_max_mapnr_init();
+
+#ifdef CONFIG_HIGHMEM
+ high_memory = (void *) __va(highstart_pfn * PAGE_SIZE);
+#else
+ high_memory = (void *) __va(max_low_pfn * PAGE_SIZE);
+#endif
+
+ /* this will put all low memory onto the freelists */
+ totalram_pages += __free_all_bootmem();
+
+ reservedpages = 0;
+ for (tmp = 0; tmp < max_low_pfn; tmp++)
+ /*
+ * Only count reserved RAM pages
+ */
+ if (page_is_ram(tmp) && PageReserved(pfn_to_page(tmp)))
+ reservedpages++;
+
+ set_highmem_pages_init(bad_ppro);
+
+ codesize = (unsigned long) &_etext - (unsigned long) &_text;
+ datasize = (unsigned long) &_edata - (unsigned long) &_etext;
+ initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin;
+
+ kclist_add(&kcore_mem, __va(0), max_low_pfn << PAGE_SHIFT);
+ kclist_add(&kcore_vmalloc, (void *)VMALLOC_START,
+ VMALLOC_END-VMALLOC_START);
+
+ printk(KERN_INFO "Memory: %luk/%luk available (%dk kernel code, %dk reserved, %dk data, %dk init, %ldk highmem)\n",
+ (unsigned long) nr_free_pages() << (PAGE_SHIFT-10),
+ num_physpages << (PAGE_SHIFT-10),
+ codesize >> 10,
+ reservedpages << (PAGE_SHIFT-10),
+ datasize >> 10,
+ initsize >> 10,
+ (unsigned long) (totalhigh_pages << (PAGE_SHIFT-10))
+ );
+
+#ifdef CONFIG_X86_PAE
+ if (!cpu_has_pae)
+ panic("cannot execute a PAE-enabled kernel on a PAE-less CPU!");
+#endif
+ if (boot_cpu_data.wp_works_ok < 0)
+ test_wp_bit();
+
+ /*
+ * Subtle. SMP is doing it's boot stuff late (because it has to
+ * fork idle threads) - but it also needs low mappings for the
+ * protected-mode entry to work. We zap these entries only after
+ * the WP-bit has been tested.
+ */
+#ifndef CONFIG_SMP
+ zap_low_mappings();
+#endif
+}
+
+kmem_cache_t *pgd_cache;
+kmem_cache_t *pmd_cache;
+kmem_cache_t *pte_cache;
+
+void __init pgtable_cache_init(void)
+{
+ pte_cache = kmem_cache_create("pte",
+ PTRS_PER_PTE*sizeof(pte_t),
+ PTRS_PER_PTE*sizeof(pte_t),
+ 0,
+ pte_ctor,
+ pte_dtor);
+ if (!pte_cache)
+ panic("pgtable_cache_init(): Cannot create pte cache");
+ if (PTRS_PER_PMD > 1) {
+ pmd_cache = kmem_cache_create("pmd",
+ PTRS_PER_PMD*sizeof(pmd_t),
+ PTRS_PER_PMD*sizeof(pmd_t),
+ 0,
+ pmd_ctor,
+ NULL);
+ if (!pmd_cache)
+ panic("pgtable_cache_init(): cannot create pmd cache");
+ }
+ pgd_cache = kmem_cache_create("pgd",
+ PTRS_PER_PGD*sizeof(pgd_t),
+ PTRS_PER_PGD*sizeof(pgd_t),
+ 0,
+ pgd_ctor,
+ pgd_dtor);
+ if (!pgd_cache)
+ panic("pgtable_cache_init(): Cannot create pgd cache");
+}
+
+/*
+ * This function cannot be __init, since exceptions don't work in that
+ * section. Put this after the callers, so that it cannot be inlined.
+ */
+static int noinline do_test_wp_bit(void)
+{
+ char tmp_reg;
+ int flag;
+
+ __asm__ __volatile__(
+ " movb %0,%1 \n"
+ "1: movb %1,%0 \n"
+ " xorl %2,%2 \n"
+ "2: \n"
+ ".section __ex_table,\"a\"\n"
+ " .align 4 \n"
+ " .long 1b,2b \n"
+ ".previous \n"
+ :"=m" (*(char *)fix_to_virt(FIX_WP_TEST)),
+ "=q" (tmp_reg),
+ "=r" (flag)
+ :"2" (1)
+ :"memory");
+
+ return flag;
+}
+
+void free_initmem(void)
+{
+ unsigned long addr;
+
+ addr = (unsigned long)(&__init_begin);
+ for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) {
+ ClearPageReserved(virt_to_page(addr));
+ set_page_count(virt_to_page(addr), 1);
+ memset((void *)addr, 0xcc, PAGE_SIZE);
+ free_page(addr);
+ totalram_pages++;
+ }
+ printk (KERN_INFO "Freeing unused kernel memory: %dk freed\n", (__init_end - __init_begin) >> 10);
+}
+
+#ifdef CONFIG_BLK_DEV_INITRD
+void free_initrd_mem(unsigned long start, unsigned long end)
+{
+ if (start < end)
+ printk (KERN_INFO "Freeing initrd memory: %ldk freed\n", (end - start) >> 10);
+ for (; start < end; start += PAGE_SIZE) {
+ ClearPageReserved(virt_to_page(start));
+ set_page_count(virt_to_page(start), 1);
+ free_page(start);
+ totalram_pages++;
+ }
+}
+#endif
--- /dev/null
- if (phys_addr >= 0x0 && last_addr < 0x100000)
- return isa_bus_to_virt(phys_addr);
-
+/*
+ * arch/i386/mm/ioremap.c
+ *
+ * Re-map IO memory to kernel address space so that we can access it.
+ * This is needed for high PCI addresses that aren't mapped in the
+ * 640k-1MB IO memory area on PC's
+ *
+ * (C) Copyright 1995 1996 Linus Torvalds
+ */
+
+#include <linux/vmalloc.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <asm/io.h>
+#include <asm/fixmap.h>
+#include <asm/cacheflush.h>
+#include <asm/tlbflush.h>
+#include <asm/pgtable.h>
+
+#ifndef CONFIG_XEN_PHYSDEV_ACCESS
+
+void * __ioremap(unsigned long phys_addr, unsigned long size,
+ unsigned long flags)
+{
+ return NULL;
+}
+
+void *ioremap_nocache (unsigned long phys_addr, unsigned long size)
+{
+ return NULL;
+}
+
+void iounmap(volatile void __iomem *addr)
+{
+}
+
+void __init *bt_ioremap(unsigned long phys_addr, unsigned long size)
+{
+ return NULL;
+}
+
+void __init bt_iounmap(void *addr, unsigned long size)
+{
+}
+
+#else
+
++/*
++ * Does @address reside within a non-highmem page that is local to this virtual
++ * machine (i.e., not an I/O page, nor a memory page belonging to another VM).
++ * See the comment that accompanies pte_pfn() in pgtable-2level.h to understand
++ * why this works.
++ */
++static inline int is_local_lowmem(unsigned long address)
++{
++ extern unsigned long max_low_pfn;
++ unsigned long mfn = address >> PAGE_SHIFT;
++ unsigned long pfn = mfn_to_pfn(mfn);
++ return ((pfn < max_low_pfn) && (pfn_to_mfn(pfn) == mfn));
++}
++
+static inline void remap_area_pte(pte_t * pte, unsigned long address, unsigned long size,
+ unsigned long phys_addr, unsigned long flags)
+{
+ unsigned long end;
+ unsigned long pfn;
+
+ address &= ~PMD_MASK;
+ end = address + size;
+ if (end > PMD_SIZE)
+ end = PMD_SIZE;
+ if (address >= end)
+ BUG();
+ pfn = phys_addr >> PAGE_SHIFT;
+ do {
+ if (!pte_none(*pte)) {
+ printk("remap_area_pte: page already exists\n");
+ BUG();
+ }
+ set_pte(pte, pfn_pte_ma(pfn, __pgprot(_PAGE_PRESENT | _PAGE_RW |
+ _PAGE_DIRTY | _PAGE_ACCESSED | flags)));
+ address += PAGE_SIZE;
+ pfn++;
+ pte++;
+ } while (address && (address < end));
+}
+
+static inline int remap_area_pmd(pmd_t * pmd, unsigned long address, unsigned long size,
+ unsigned long phys_addr, unsigned long flags)
+{
+ unsigned long end;
+
+ address &= ~PGDIR_MASK;
+ end = address + size;
+ if (end > PGDIR_SIZE)
+ end = PGDIR_SIZE;
+ phys_addr -= address;
+ if (address >= end)
+ BUG();
+ do {
+ pte_t * pte = pte_alloc_kernel(&init_mm, pmd, address);
+ if (!pte)
+ return -ENOMEM;
+ remap_area_pte(pte, address, end - address, address + phys_addr, flags);
+ address = (address + PMD_SIZE) & PMD_MASK;
+ pmd++;
+ } while (address && (address < end));
+ return 0;
+}
+
+static int remap_area_pages(unsigned long address, unsigned long phys_addr,
+ unsigned long size, unsigned long flags)
+{
+ int error;
+ pgd_t * dir;
+ unsigned long end = address + size;
+
+ phys_addr -= address;
+ dir = pgd_offset(&init_mm, address);
+ flush_cache_all();
+ if (address >= end)
+ BUG();
+ spin_lock(&init_mm.page_table_lock);
+ do {
+ pmd_t *pmd;
+ pmd = pmd_alloc(&init_mm, dir, address);
+ error = -ENOMEM;
+ if (!pmd)
+ break;
+ if (remap_area_pmd(pmd, address, end - address,
+ phys_addr + address, flags))
+ break;
+ error = 0;
+ address = (address + PGDIR_SIZE) & PGDIR_MASK;
+ dir++;
+ } while (address && (address < end));
+ spin_unlock(&init_mm.page_table_lock);
+ flush_tlb_all();
+ return error;
+}
+
+/*
+ * Generic mapping function (not visible outside):
+ */
+
+/*
+ * Remap an arbitrary physical address space into the kernel virtual
+ * address space. Needed when the kernel wants to access high addresses
+ * directly.
+ *
+ * NOTE! We need to allow non-page-aligned mappings too: we will obviously
+ * have to convert them into an offset in a page-aligned mapping, but the
+ * caller shouldn't need to know that small detail.
+ */
+void __iomem * __ioremap(unsigned long phys_addr, unsigned long size, unsigned long flags)
+{
+ void __iomem * addr;
+ struct vm_struct * area;
+ unsigned long offset, last_addr;
+
+ /* Don't allow wraparound or zero size */
+ last_addr = phys_addr + size - 1;
+ if (!size || last_addr < phys_addr)
+ return NULL;
+
- if (phys_addr >= 0xA0000 && last_addr < 0x100000)
- return (void __iomem *) phys_to_virt(phys_addr);
++#ifdef CONFIG_XEN_PRIVILEGED_GUEST
+ /*
+ * Don't remap the low PCI/ISA area, it's always mapped..
+ */
- if (machine_to_phys(phys_addr) < virt_to_phys(high_memory)) {
++ if (phys_addr >= 0x0 && last_addr < 0x100000)
++ return isa_bus_to_virt(phys_addr);
++#endif
+
+ /*
+ * Don't allow anybody to remap normal RAM that we're using..
+ */
- if (machine_to_phys(last_addr) < virt_to_phys(high_memory)) {
++ if (is_local_lowmem(phys_addr)) {
+ char *t_addr, *t_end;
+ struct page *page;
+
+ t_addr = bus_to_virt(phys_addr);
+ t_end = t_addr + (size - 1);
+
+ for(page = virt_to_page(t_addr); page <= virt_to_page(t_end); page++)
+ if(!PageReserved(page))
+ return NULL;
+ }
+
+ /*
+ * Mappings have to be page-aligned
+ */
+ offset = phys_addr & ~PAGE_MASK;
+ phys_addr &= PAGE_MASK;
+ size = PAGE_ALIGN(last_addr+1) - phys_addr;
+
+ /*
+ * Ok, go for it..
+ */
+ area = get_vm_area(size, VM_IOREMAP);
+ if (!area)
+ return NULL;
+ area->phys_addr = phys_addr;
+ addr = (void __iomem *) area->addr;
+ if (remap_area_pages((unsigned long) addr, phys_addr, size, flags)) {
+ vunmap((void __force *) addr);
+ return NULL;
+ }
+ return (void __iomem *) (offset + (char __iomem *)addr);
+}
+
+
+/**
+ * ioremap_nocache - map bus memory into CPU space
+ * @offset: bus address of the memory
+ * @size: size of the resource to map
+ *
+ * ioremap_nocache performs a platform specific sequence of operations to
+ * make bus memory CPU accessible via the readb/readw/readl/writeb/
+ * writew/writel functions and the other mmio helpers. The returned
+ * address is not guaranteed to be usable directly as a virtual
+ * address.
+ *
+ * This version of ioremap ensures that the memory is marked uncachable
+ * on the CPU as well as honouring existing caching rules from things like
+ * the PCI bus. Note that there are other caches and buffers on many
+ * busses. In particular driver authors should read up on PCI writes
+ *
+ * It's useful if some control registers are in such an area and
+ * write combining or read caching is not desirable:
+ *
+ * Must be freed with iounmap.
+ */
+
+void __iomem *ioremap_nocache (unsigned long phys_addr, unsigned long size)
+{
+ unsigned long last_addr;
+ void __iomem *p = __ioremap(phys_addr, size, _PAGE_PCD);
+ if (!p)
+ return p;
+
+ /* Guaranteed to be > phys_addr, as per __ioremap() */
+ last_addr = phys_addr + size - 1;
+
- if ((unsigned long)addr <= 0x100000)
- return;
++ if (is_local_lowmem(last_addr)) {
+ struct page *ppage = virt_to_page(bus_to_virt(phys_addr));
+ unsigned long npages;
+
+ phys_addr &= PAGE_MASK;
+
+ /* This might overflow and become zero.. */
+ last_addr = PAGE_ALIGN(last_addr);
+
+ /* .. but that's ok, because modulo-2**n arithmetic will make
+ * the page-aligned "last - first" come out right.
+ */
+ npages = (last_addr - phys_addr) >> PAGE_SHIFT;
+
+ if (change_page_attr(ppage, npages, PAGE_KERNEL_NOCACHE) < 0) {
+ iounmap(p);
+ p = NULL;
+ }
+ global_flush_tlb();
+ }
+
+ return p;
+}
+
+void iounmap(volatile void __iomem *addr)
+{
+ struct vm_struct *p;
- if (p->flags && machine_to_phys(p->phys_addr) < virt_to_phys(high_memory)) {
+ if ((void __force *) addr <= high_memory)
+ return;
++#ifdef CONFIG_XEN_PRIVILEGED_GUEST
++ if ((unsigned long) addr >= fix_to_virt(FIX_ISAMAP_BEGIN))
++ return;
++#endif
+ p = remove_vm_area((void *) (PAGE_MASK & (unsigned long __force) addr));
+ if (!p) {
+ printk("__iounmap: bad address %p\n", addr);
+ return;
+ }
+
- if (phys_addr >= 0x0 && last_addr < 0x100000)
- return isa_bus_to_virt(phys_addr);
-
++ if (p->flags && is_local_lowmem(p->phys_addr)) {
+ change_page_attr(virt_to_page(bus_to_virt(p->phys_addr)),
+ p->size >> PAGE_SHIFT,
+ PAGE_KERNEL);
+ global_flush_tlb();
+ }
+ kfree(p);
+}
+
+void __init *bt_ioremap(unsigned long phys_addr, unsigned long size)
+{
+ unsigned long offset, last_addr;
+ unsigned int nrpages;
+ enum fixed_addresses idx;
+
+ /* Don't allow wraparound or zero size */
+ last_addr = phys_addr + size - 1;
+ if (!size || last_addr < phys_addr)
+ return NULL;
+
- if (phys_addr >= 0xA0000 && last_addr < 0x100000)
- return phys_to_virt(phys_addr);
++#ifdef CONFIG_XEN_PRIVILEGED_GUEST
+ /*
+ * Don't remap the low PCI/ISA area, it's always mapped..
+ */
- if (virt_addr < 0x100000)
- return;
++ if (phys_addr >= 0x0 && last_addr < 0x100000)
++ return isa_bus_to_virt(phys_addr);
++#endif
+
+ /*
+ * Mappings have to be page-aligned
+ */
+ offset = phys_addr & ~PAGE_MASK;
+ phys_addr &= PAGE_MASK;
+ size = PAGE_ALIGN(last_addr) - phys_addr;
+
+ /*
+ * Mappings have to fit in the FIX_BTMAP area.
+ */
+ nrpages = size >> PAGE_SHIFT;
+ if (nrpages > NR_FIX_BTMAPS)
+ return NULL;
+
+ /*
+ * Ok, go for it..
+ */
+ idx = FIX_BTMAP_BEGIN;
+ while (nrpages > 0) {
+ set_fixmap_ma(idx, phys_addr);
+ phys_addr += PAGE_SIZE;
+ --idx;
+ --nrpages;
+ }
+ return (void*) (offset + fix_to_virt(FIX_BTMAP_BEGIN));
+}
+
+void __init bt_iounmap(void *addr, unsigned long size)
+{
+ unsigned long virt_addr;
+ unsigned long offset;
+ unsigned int nrpages;
+ enum fixed_addresses idx;
+
+ virt_addr = (unsigned long)addr;
+ if (virt_addr < fix_to_virt(FIX_BTMAP_BEGIN))
+ return;
++#ifdef CONFIG_XEN_PRIVILEGED_GUEST
++ if (virt_addr >= fix_to_virt(FIX_ISAMAP_BEGIN))
++ return;
++#endif
+ offset = virt_addr & ~PAGE_MASK;
+ nrpages = PAGE_ALIGN(offset + size - 1) >> PAGE_SHIFT;
+
+ idx = FIX_BTMAP_BEGIN;
+ while (nrpages > 0) {
+ clear_fixmap(idx);
+ --idx;
+ --nrpages;
+ }
+}
+
+#endif /* CONFIG_XEN_PHYSDEV_ACCESS */
+
+/* These hacky macros avoid phys->machine translations. */
+#define __direct_pte(x) ((pte_t) { (x) } )
+#define __direct_mk_pte(page_nr,pgprot) \
+ __direct_pte(((page_nr) << PAGE_SHIFT) | pgprot_val(pgprot))
+#define direct_mk_pte_phys(physpage, pgprot) \
+ __direct_mk_pte((physpage) >> PAGE_SHIFT, pgprot)
+
+static inline void direct_remap_area_pte(pte_t *pte,
+ unsigned long address,
+ unsigned long size,
+ mmu_update_t **v)
+{
+ unsigned long end;
+
+ address &= ~PMD_MASK;
+ end = address + size;
+ if (end > PMD_SIZE)
+ end = PMD_SIZE;
+ if (address >= end)
+ BUG();
+
+ do {
+ (*v)->ptr = virt_to_machine(pte);
+ (*v)++;
+ address += PAGE_SIZE;
+ pte++;
+ } while (address && (address < end));
+}
+
+static inline int direct_remap_area_pmd(struct mm_struct *mm,
+ pmd_t *pmd,
+ unsigned long address,
+ unsigned long size,
+ mmu_update_t **v)
+{
+ unsigned long end;
+
+ address &= ~PGDIR_MASK;
+ end = address + size;
+ if (end > PGDIR_SIZE)
+ end = PGDIR_SIZE;
+ if (address >= end)
+ BUG();
+ do {
+ pte_t *pte = pte_alloc_map(mm, pmd, address);
+ if (!pte)
+ return -ENOMEM;
+ direct_remap_area_pte(pte, address, end - address, v);
+ pte_unmap(pte);
+ address = (address + PMD_SIZE) & PMD_MASK;
+ pmd++;
+ } while (address && (address < end));
+ return 0;
+}
+
+int __direct_remap_area_pages(struct mm_struct *mm,
+ unsigned long address,
+ unsigned long size,
+ mmu_update_t *v)
+{
+ pgd_t * dir;
+ unsigned long end = address + size;
+
+ dir = pgd_offset(mm, address);
+ flush_cache_all();
+ if (address >= end)
+ BUG();
+ spin_lock(&mm->page_table_lock);
+ do {
+ pmd_t *pmd = pmd_alloc(mm, dir, address);
+ if (!pmd)
+ return -ENOMEM;
+ direct_remap_area_pmd(mm, pmd, address, end - address, &v);
+ address = (address + PGDIR_SIZE) & PGDIR_MASK;
+ dir++;
+
+ } while (address && (address < end));
+ spin_unlock(&mm->page_table_lock);
+ flush_tlb_all();
+ return 0;
+}
+
+
+int direct_remap_area_pages(struct mm_struct *mm,
+ unsigned long address,
+ unsigned long machine_addr,
+ unsigned long size,
+ pgprot_t prot,
+ domid_t domid)
+{
+ int i;
+ unsigned long start_address;
+#define MAX_DIRECTMAP_MMU_QUEUE 130
+ mmu_update_t u[MAX_DIRECTMAP_MMU_QUEUE], *w, *v;
+
+ u[0].ptr = MMU_EXTENDED_COMMAND;
+ u[0].val = MMUEXT_SET_FOREIGNDOM;
+ u[0].val |= (unsigned long)domid << 16;
+ v = w = &u[1];
+
+ start_address = address;
+
+ for(i = 0; i < size; i += PAGE_SIZE) {
+ if ((v - u) == MAX_DIRECTMAP_MMU_QUEUE) {
+ /* Fill in the PTE pointers. */
+ __direct_remap_area_pages(mm,
+ start_address,
+ address-start_address,
+ w);
+
+ if (HYPERVISOR_mmu_update(u, v - u, NULL) < 0)
+ return -EFAULT;
+ v = w;
+ start_address = address;
+ }
+
+ /*
+ * Fill in the machine address: PTE ptr is done later by
+ * __direct_remap_area_pages().
+ */
+ v->val = (machine_addr & PAGE_MASK) | pgprot_val(prot);
+
+ machine_addr += PAGE_SIZE;
+ address += PAGE_SIZE;
+ v++;
+ }
+
+ if (v != w) {
+ /* get the ptep's filled in */
+ __direct_remap_area_pages(mm,
+ start_address,
+ address-start_address,
+ w);
+ if (unlikely(HYPERVISOR_mmu_update(u, v - u, NULL) < 0))
+ return -EFAULT;
+ }
+
+ return 0;
+}
--- /dev/null
- queue_pte_pin(virt_to_phys(pte));
+/*
+ * linux/arch/i386/mm/pgtable.c
+ */
+
+#include <linux/config.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/mm.h>
+#include <linux/swap.h>
+#include <linux/smp.h>
+#include <linux/highmem.h>
+#include <linux/slab.h>
+#include <linux/pagemap.h>
+#include <linux/spinlock.h>
+
+#include <asm/system.h>
+#include <asm/pgtable.h>
+#include <asm/pgalloc.h>
+#include <asm/fixmap.h>
+#include <asm/e820.h>
+#include <asm/tlb.h>
+#include <asm/tlbflush.h>
+#include <asm/io.h>
+
+#include <asm-xen/foreign_page.h>
+
+void show_mem(void)
+{
+ int total = 0, reserved = 0;
+ int shared = 0, cached = 0;
+ int highmem = 0;
+ struct page *page;
+ pg_data_t *pgdat;
+ unsigned long i;
+
+ printk("Mem-info:\n");
+ show_free_areas();
+ printk("Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
+ for_each_pgdat(pgdat) {
+ for (i = 0; i < pgdat->node_spanned_pages; ++i) {
+ page = pgdat->node_mem_map + i;
+ total++;
+ if (PageHighMem(page))
+ highmem++;
+ if (PageReserved(page))
+ reserved++;
+ else if (PageSwapCache(page))
+ cached++;
+ else if (page_count(page))
+ shared += page_count(page) - 1;
+ }
+ }
+ printk("%d pages of RAM\n", total);
+ printk("%d pages of HIGHMEM\n",highmem);
+ printk("%d reserved pages\n",reserved);
+ printk("%d pages shared\n",shared);
+ printk("%d pages swap cached\n",cached);
+}
+
+/*
+ * Associate a virtual page frame with a given physical page frame
+ * and protection flags for that frame.
+ */
+static void set_pte_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags)
+{
+ pgd_t *pgd;
+ pmd_t *pmd;
+ pte_t *pte;
+
+ pgd = swapper_pg_dir + pgd_index(vaddr);
+ if (pgd_none(*pgd)) {
+ BUG();
+ return;
+ }
+ pmd = pmd_offset(pgd, vaddr);
+ if (pmd_none(*pmd)) {
+ BUG();
+ return;
+ }
+ pte = pte_offset_kernel(pmd, vaddr);
+ /* <pfn,flags> stored as-is, to permit clearing entries */
+ set_pte(pte, pfn_pte(pfn, flags));
+
+ /*
+ * It's enough to flush this one mapping.
+ * (PGE mappings get flushed as well)
+ */
+ __flush_tlb_one(vaddr);
+}
+
+/*
+ * Associate a virtual page frame with a given physical page frame
+ * and protection flags for that frame.
+ */
+static void set_pte_pfn_ma(unsigned long vaddr, unsigned long pfn,
+ pgprot_t flags)
+{
+ pgd_t *pgd;
+ pmd_t *pmd;
+ pte_t *pte;
+
+ pgd = swapper_pg_dir + pgd_index(vaddr);
+ if (pgd_none(*pgd)) {
+ BUG();
+ return;
+ }
+ pmd = pmd_offset(pgd, vaddr);
+ if (pmd_none(*pmd)) {
+ BUG();
+ return;
+ }
+ pte = pte_offset_kernel(pmd, vaddr);
+ /* <pfn,flags> stored as-is, to permit clearing entries */
+ set_pte(pte, pfn_pte_ma(pfn, flags));
+
+ /*
+ * It's enough to flush this one mapping.
+ * (PGE mappings get flushed as well)
+ */
+ __flush_tlb_one(vaddr);
+}
+
+/*
+ * Associate a large virtual page frame with a given physical page frame
+ * and protection flags for that frame. pfn is for the base of the page,
+ * vaddr is what the page gets mapped to - both must be properly aligned.
+ * The pmd must already be instantiated. Assumes PAE mode.
+ */
+void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags)
+{
+ pgd_t *pgd;
+ pmd_t *pmd;
+
+ if (vaddr & (PMD_SIZE-1)) { /* vaddr is misaligned */
+ printk ("set_pmd_pfn: vaddr misaligned\n");
+ return; /* BUG(); */
+ }
+ if (pfn & (PTRS_PER_PTE-1)) { /* pfn is misaligned */
+ printk ("set_pmd_pfn: pfn misaligned\n");
+ return; /* BUG(); */
+ }
+ pgd = swapper_pg_dir + pgd_index(vaddr);
+ if (pgd_none(*pgd)) {
+ printk ("set_pmd_pfn: pgd_none\n");
+ return; /* BUG(); */
+ }
+ pmd = pmd_offset(pgd, vaddr);
+ set_pmd(pmd, pfn_pmd(pfn, flags));
+ /*
+ * It's enough to flush this one mapping.
+ * (PGE mappings get flushed as well)
+ */
+ __flush_tlb_one(vaddr);
+}
+
+void __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t flags)
+{
+ unsigned long address = __fix_to_virt(idx);
+
+ if (idx >= __end_of_fixed_addresses) {
+ BUG();
+ return;
+ }
+ set_pte_pfn(address, phys >> PAGE_SHIFT, flags);
+}
+
+void __set_fixmap_ma (enum fixed_addresses idx, unsigned long phys, pgprot_t flags)
+{
+ unsigned long address = __fix_to_virt(idx);
+
+ if (idx >= __end_of_fixed_addresses) {
+ BUG();
+ return;
+ }
+ set_pte_pfn_ma(address, phys >> PAGE_SHIFT, flags);
+}
+
+pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
+{
+ pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT);
+ if (pte) {
+ clear_page(pte);
+ make_page_readonly(pte);
+ xen_flush_page_update_queue();
+ }
+ return pte;
+}
+
+void pte_ctor(void *pte, kmem_cache_t *cache, unsigned long unused)
+{
+ struct page *page = virt_to_page(pte);
+ SetPageForeign(page, pte_free);
+ set_page_count(page, 1);
+
+ clear_page(pte);
+ make_page_readonly(pte);
- queue_pte_unpin(virt_to_phys(pte));
++ queue_pte_pin(__pa(pte));
+ flush_page_update_queue();
+}
+
+void pte_dtor(void *pte, kmem_cache_t *cache, unsigned long unused)
+{
+ struct page *page = virt_to_page(pte);
+ ClearPageForeign(page);
+
++ queue_pte_unpin(__pa(pte));
+ make_page_writable(pte);
+ flush_page_update_queue();
+}
+
+struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address)
+{
+ pte_t *ptep;
+
+#ifdef CONFIG_HIGHPTE
+ struct page *pte;
+
+ pte = alloc_pages(GFP_KERNEL|__GFP_HIGHMEM|__GFP_REPEAT, 0);
+ if (pte == NULL)
+ return pte;
+ if (pte >= highmem_start_page) {
+ clear_highpage(pte);
+ return pte;
+ }
+ /* not a highmem page -- free page and grab one from the cache */
+ __free_page(pte);
+#endif
+ ptep = kmem_cache_alloc(pte_cache, GFP_KERNEL);
+ if (ptep)
+ return virt_to_page(ptep);
+ return NULL;
+}
+
+void pte_free(struct page *pte)
+{
+ set_page_count(pte, 1);
+#ifdef CONFIG_HIGHPTE
+ if (pte < highmem_start_page)
+#endif
+ kmem_cache_free(pte_cache,
+ phys_to_virt(page_to_pseudophys(pte)));
+#ifdef CONFIG_HIGHPTE
+ else
+ __free_page(pte);
+#endif
+}
+
+void pmd_ctor(void *pmd, kmem_cache_t *cache, unsigned long flags)
+{
+ memset(pmd, 0, PTRS_PER_PMD*sizeof(pmd_t));
+}
+
+/*
+ * List of all pgd's needed for non-PAE so it can invalidate entries
+ * in both cached and uncached pgd's; not needed for PAE since the
+ * kernel pmd is shared. If PAE were not to share the pmd a similar
+ * tactic would be needed. This is essentially codepath-based locking
+ * against pageattr.c; it is the unique case in which a valid change
+ * of kernel pagetables can't be lazily synchronized by vmalloc faults.
+ * vmalloc faults work because attached pagetables are never freed.
+ * The locking scheme was chosen on the basis of manfred's
+ * recommendations and having no core impact whatsoever.
+ * -- wli
+ */
+spinlock_t pgd_lock = SPIN_LOCK_UNLOCKED;
+struct page *pgd_list;
+
+static inline void pgd_list_add(pgd_t *pgd)
+{
+ struct page *page = virt_to_page(pgd);
+ page->index = (unsigned long)pgd_list;
+ if (pgd_list)
+ pgd_list->private = (unsigned long)&page->index;
+ pgd_list = page;
+ page->private = (unsigned long)&pgd_list;
+}
+
+static inline void pgd_list_del(pgd_t *pgd)
+{
+ struct page *next, **pprev, *page = virt_to_page(pgd);
+ next = (struct page *)page->index;
+ pprev = (struct page **)page->private;
+ *pprev = next;
+ if (next)
+ next->private = (unsigned long)pprev;
+}
+
+void pgd_ctor(void *pgd, kmem_cache_t *cache, unsigned long unused)
+{
+ unsigned long flags;
+
+ if (PTRS_PER_PMD == 1)
+ spin_lock_irqsave(&pgd_lock, flags);
+
+ memcpy((pgd_t *)pgd + USER_PTRS_PER_PGD,
+ swapper_pg_dir + USER_PTRS_PER_PGD,
+ (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t));
+
+ if (PTRS_PER_PMD > 1)
+ goto out;
+
+ pgd_list_add(pgd);
+ spin_unlock_irqrestore(&pgd_lock, flags);
+ memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t));
+ out:
+ make_page_readonly(pgd);
+ queue_pgd_pin(__pa(pgd));
+ flush_page_update_queue();
+}
+
+/* never called when PTRS_PER_PMD > 1 */
+void pgd_dtor(void *pgd, kmem_cache_t *cache, unsigned long unused)
+{
+ unsigned long flags; /* can be called from interrupt context */
+
+ queue_pgd_unpin(__pa(pgd));
+ make_page_writable(pgd);
+ flush_page_update_queue();
+
+ if (PTRS_PER_PMD > 1)
+ return;
+
+ spin_lock_irqsave(&pgd_lock, flags);
+ pgd_list_del(pgd);
+ spin_unlock_irqrestore(&pgd_lock, flags);
+}
+
+pgd_t *pgd_alloc(struct mm_struct *mm)
+{
+ int i;
+ pgd_t *pgd = kmem_cache_alloc(pgd_cache, GFP_KERNEL);
+
+ if (PTRS_PER_PMD == 1 || !pgd)
+ return pgd;
+
+ for (i = 0; i < USER_PTRS_PER_PGD; ++i) {
+ pmd_t *pmd = kmem_cache_alloc(pmd_cache, GFP_KERNEL);
+ if (!pmd)
+ goto out_oom;
+ set_pgd(&pgd[i], __pgd(1 + __pa(pmd)));
+ }
+ return pgd;
+
+out_oom:
+ for (i--; i >= 0; i--)
+ kmem_cache_free(pmd_cache, (void *)__va(pgd_val(pgd[i])-1));
+ kmem_cache_free(pgd_cache, pgd);
+ return NULL;
+}
+
+void pgd_free(pgd_t *pgd)
+{
+ int i;
+
+ /* in the PAE case user pgd entries are overwritten before usage */
+ if (PTRS_PER_PMD > 1)
+ for (i = 0; i < USER_PTRS_PER_PGD; ++i)
+ kmem_cache_free(pmd_cache, (void *)__va(pgd_val(pgd[i])-1));
+ /* in the non-PAE case, clear_page_tables() clears user pgd entries */
+ kmem_cache_free(pgd_cache, pgd);
+}
+
+void make_lowmem_page_readonly(void *va)
+{
+ pgd_t *pgd = pgd_offset_k((unsigned long)va);
+ pmd_t *pmd = pmd_offset(pgd, (unsigned long)va);
+ pte_t *pte = pte_offset_kernel(pmd, (unsigned long)va);
+ queue_l1_entry_update(pte, (*(unsigned long *)pte)&~_PAGE_RW);
+}
+
+void make_lowmem_page_writable(void *va)
+{
+ pgd_t *pgd = pgd_offset_k((unsigned long)va);
+ pmd_t *pmd = pmd_offset(pgd, (unsigned long)va);
+ pte_t *pte = pte_offset_kernel(pmd, (unsigned long)va);
+ queue_l1_entry_update(pte, (*(unsigned long *)pte)|_PAGE_RW);
+}
+
+void make_page_readonly(void *va)
+{
+ pgd_t *pgd = pgd_offset_k((unsigned long)va);
+ pmd_t *pmd = pmd_offset(pgd, (unsigned long)va);
+ pte_t *pte = pte_offset_kernel(pmd, (unsigned long)va);
+ queue_l1_entry_update(pte, (*(unsigned long *)pte)&~_PAGE_RW);
+ if ( (unsigned long)va >= (unsigned long)high_memory )
+ {
+ unsigned long phys;
+ phys = machine_to_phys(*(unsigned long *)pte & PAGE_MASK);
+#ifdef CONFIG_HIGHMEM
+ if ( (phys >> PAGE_SHIFT) < highstart_pfn )
+#endif
+ make_lowmem_page_readonly(phys_to_virt(phys));
+ }
+}
+
+void make_page_writable(void *va)
+{
+ pgd_t *pgd = pgd_offset_k((unsigned long)va);
+ pmd_t *pmd = pmd_offset(pgd, (unsigned long)va);
+ pte_t *pte = pte_offset_kernel(pmd, (unsigned long)va);
+ queue_l1_entry_update(pte, (*(unsigned long *)pte)|_PAGE_RW);
+ if ( (unsigned long)va >= (unsigned long)high_memory )
+ {
+ unsigned long phys;
+ phys = machine_to_phys(*(unsigned long *)pte & PAGE_MASK);
+#ifdef CONFIG_HIGHMEM
+ if ( (phys >> PAGE_SHIFT) < highstart_pfn )
+#endif
+ make_lowmem_page_writable(phys_to_virt(phys));
+ }
+}
+
+void make_pages_readonly(void *va, unsigned int nr)
+{
+ while ( nr-- != 0 )
+ {
+ make_page_readonly(va);
+ va = (void *)((unsigned long)va + PAGE_SIZE);
+ }
+}
+
+void make_pages_writable(void *va, unsigned int nr)
+{
+ while ( nr-- != 0 )
+ {
+ make_page_writable(va);
+ va = (void *)((unsigned long)va + PAGE_SIZE);
+ }
+}
--- /dev/null
- #if 1
- #define dprintf(fmt, args...) \
- printk(KERN_ALERT "[XEN:%s:%s:%d] " fmt, \
- __FUNCTION__, __FILE__, __LINE__, ##args)
- #endif
-
+/******************************************************************************
+ * blkfront.c
+ *
+ * XenLinux virtual block-device driver.
+ *
+ * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
+ * Modifications by Mark A. Williamson are (c) Intel Research Cambridge
+ * Copyright (c) 2004, Christian Limpach
+ *
+ * This file may be distributed separately from the Linux kernel, or
+ * incorporated into other software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <linux/version.h>
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
+#include "block.h"
+#else
+#include "common.h"
+#include <linux/blk.h>
+#include <linux/tqueue.h>
+#endif
+
+#include <linux/cdrom.h>
+#include <linux/sched.h>
+#include <linux/interrupt.h>
+#include <scsi/scsi.h>
+#include <asm-xen/ctrl_if.h>
+
+typedef unsigned char byte; /* from linux/ide.h */
+
+/* Control whether runtime update of vbds is enabled. */
+#define ENABLE_VBD_UPDATE 1
+
+#if ENABLE_VBD_UPDATE
+static void vbd_update(void);
+#else
+static void vbd_update(void){};
+#endif
+
+#define BLKIF_STATE_CLOSED 0
+#define BLKIF_STATE_DISCONNECTED 1
+#define BLKIF_STATE_CONNECTED 2
+
+static char *blkif_state_name[] = {
+ [BLKIF_STATE_CLOSED] = "closed",
+ [BLKIF_STATE_DISCONNECTED] = "disconnected",
+ [BLKIF_STATE_CONNECTED] = "connected",
+};
+
+static char * blkif_status_name[] = {
+ [BLKIF_INTERFACE_STATUS_CLOSED] = "closed",
+ [BLKIF_INTERFACE_STATUS_DISCONNECTED] = "disconnected",
+ [BLKIF_INTERFACE_STATUS_CONNECTED] = "connected",
+ [BLKIF_INTERFACE_STATUS_CHANGED] = "changed",
+};
+
- static void vbd_update()
+#define WPRINTK(fmt, args...) printk(KERN_WARNING "xen_blk: " fmt, ##args)
+
+static int blkif_handle = 0;
+static unsigned int blkif_state = BLKIF_STATE_CLOSED;
+static unsigned int blkif_evtchn = 0;
+static unsigned int blkif_irq = 0;
+
+static int blkif_control_rsp_valid;
+static blkif_response_t blkif_control_rsp;
+
+static blkif_ring_t *blk_ring = NULL;
+static BLKIF_RING_IDX resp_cons; /* Response consumer for comms ring. */
+static BLKIF_RING_IDX req_prod; /* Private request producer. */
+
+unsigned long rec_ring_free;
+blkif_request_t rec_ring[BLKIF_RING_SIZE];
+
+static int recovery = 0; /* "Recovery in progress" flag. Protected
+ * by the blkif_io_lock */
+
+/* We plug the I/O ring if the driver is suspended or if the ring is full. */
+#define BLKIF_RING_FULL (((req_prod - resp_cons) == BLKIF_RING_SIZE) || \
+ (blkif_state != BLKIF_STATE_CONNECTED))
+
+static void kick_pending_request_queues(void);
+
+int __init xlblk_init(void);
+
+void blkif_completion( blkif_request_t *req );
+
+static inline int GET_ID_FROM_FREELIST( void )
+{
+ unsigned long free = rec_ring_free;
+
+ if ( free > BLKIF_RING_SIZE )
+ BUG();
+
+ rec_ring_free = rec_ring[free].id;
+
+ rec_ring[free].id = 0x0fffffee; /* debug */
+
+ return free;
+}
+
+static inline void ADD_ID_TO_FREELIST( unsigned long id )
+{
+ rec_ring[id].id = rec_ring_free;
+ rec_ring_free = id;
+}
+
+
+/************************ COMMON CODE (inlined) ************************/
+
+/* Kernel-specific definitions used in the common code */
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
+#define DISABLE_SCATTERGATHER()
+#else
+static int sg_operation = -1;
+#define DISABLE_SCATTERGATHER() (sg_operation = -1)
+#endif
+
+static inline void translate_req_to_pfn(blkif_request_t *xreq,
+ blkif_request_t *req)
+{
+ int i;
+
+ xreq->operation = req->operation;
+ xreq->nr_segments = req->nr_segments;
+ xreq->device = req->device;
+ /* preserve id */
+ xreq->sector_number = req->sector_number;
+
+ for ( i = 0; i < req->nr_segments; i++ )
+ xreq->frame_and_sects[i] = machine_to_phys(req->frame_and_sects[i]);
+}
+
+static inline void translate_req_to_mfn(blkif_request_t *xreq,
+ blkif_request_t *req)
+{
+ int i;
+
+ xreq->operation = req->operation;
+ xreq->nr_segments = req->nr_segments;
+ xreq->device = req->device;
+ xreq->id = req->id; /* copy id (unlike above) */
+ xreq->sector_number = req->sector_number;
+
+ for ( i = 0; i < req->nr_segments; i++ )
+ xreq->frame_and_sects[i] = phys_to_machine(req->frame_and_sects[i]);
+}
+
+
+static inline void flush_requests(void)
+{
+ DISABLE_SCATTERGATHER();
+ wmb(); /* Ensure that the frontend can see the requests. */
+ blk_ring->req_prod = req_prod;
+ notify_via_evtchn(blkif_evtchn);
+}
+
+
+
+
+/************************** KERNEL VERSION 2.6 **************************/
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
+
+__initcall(xlblk_init);
+
+#if ENABLE_VBD_UPDATE
- dprintf(">\n");
- dprintf("<\n");
++static void vbd_update(void)
+{
- DPRINTK("do_blkif_request %p: cmd %p, sec %lx, (%u/%li) buffer:%p [%s]\n",
+}
+#endif /* ENABLE_VBD_UPDATE */
+
+static void kick_pending_request_queues(void)
+{
+
+ if ( (xlbd_blk_queue != NULL) &&
+ test_bit(QUEUE_FLAG_STOPPED, &xlbd_blk_queue->queue_flags) )
+ {
+ blk_start_queue(xlbd_blk_queue);
+ /* XXXcl call to request_fn should not be needed but
+ * we get stuck without... needs investigating
+ */
+ xlbd_blk_queue->request_fn(xlbd_blk_queue);
+ }
+
+}
+
+
+int blkif_open(struct inode *inode, struct file *filep)
+{
+ struct gendisk *gd = inode->i_bdev->bd_disk;
+ struct xlbd_disk_info *di = (struct xlbd_disk_info *)gd->private_data;
+
+ /* Update of usage count is protected by per-device semaphore. */
+ di->mi->usage++;
+
+ return 0;
+}
+
+
+int blkif_release(struct inode *inode, struct file *filep)
+{
+ struct gendisk *gd = inode->i_bdev->bd_disk;
+ struct xlbd_disk_info *di = (struct xlbd_disk_info *)gd->private_data;
+
+ /*
+ * When usage drops to zero it may allow more VBD updates to occur.
+ * Update of usage count is protected by a per-device semaphore.
+ */
+ if (--di->mi->usage == 0) {
+ vbd_update();
+ }
+
+ return 0;
+}
+
+
+int blkif_ioctl(struct inode *inode, struct file *filep,
+ unsigned command, unsigned long argument)
+{
+ /* struct gendisk *gd = inode->i_bdev->bd_disk; */
+
+ DPRINTK_IOCTL("command: 0x%x, argument: 0x%lx, dev: 0x%04x\n",
+ command, (long)argument, inode->i_rdev);
+
+ switch (command) {
+
+ case HDIO_GETGEO:
+ /* return ENOSYS to use defaults */
+ return -ENOSYS;
+
+ default:
+ printk(KERN_ALERT "ioctl %08x not supported by Xen blkdev\n",
+ command);
+ return -ENOSYS;
+ }
+
+ return 0;
+}
+
+#if 0
+/* check media change: should probably do something here in some cases :-) */
+int blkif_check(kdev_t dev)
+{
+ DPRINTK("blkif_check\n");
+ return 0;
+}
+
+int blkif_revalidate(kdev_t dev)
+{
+ struct block_device *bd;
+ struct gendisk *gd;
+ xen_block_t *disk;
+ unsigned long capacity;
+ int i, rc = 0;
+
+ if ( (bd = bdget(dev)) == NULL )
+ return -EINVAL;
+
+ /*
+ * Update of partition info, and check of usage count, is protected
+ * by the per-block-device semaphore.
+ */
+ down(&bd->bd_sem);
+
+ if ( ((gd = get_gendisk(dev)) == NULL) ||
+ ((disk = xldev_to_xldisk(dev)) == NULL) ||
+ ((capacity = gd->part[MINOR(dev)].nr_sects) == 0) )
+ {
+ rc = -EINVAL;
+ goto out;
+ }
+
+ if ( disk->usage > 1 )
+ {
+ rc = -EBUSY;
+ goto out;
+ }
+
+ /* Only reread partition table if VBDs aren't mapped to partitions. */
+ if ( !(gd->flags[MINOR(dev) >> gd->minor_shift] & GENHD_FL_VIRT_PARTNS) )
+ {
+ for ( i = gd->max_p - 1; i >= 0; i-- )
+ {
+ invalidate_device(dev+i, 1);
+ gd->part[MINOR(dev+i)].start_sect = 0;
+ gd->part[MINOR(dev+i)].nr_sects = 0;
+ gd->sizes[MINOR(dev+i)] = 0;
+ }
+
+ grok_partitions(gd, MINOR(dev)>>gd->minor_shift, gd->max_p, capacity);
+ }
+
+ out:
+ up(&bd->bd_sem);
+ bdput(bd);
+ return rc;
+}
+#endif
+
+/*
+ * blkif_queue_request
+ *
+ * request block io
+ *
+ * id: for guest use only.
+ * operation: BLKIF_OP_{READ,WRITE,PROBE}
+ * buffer: buffer to read/write into. this should be a
+ * virtual address in the guest os.
+ */
+static int blkif_queue_request(struct request *req)
+{
+ struct xlbd_disk_info *di =
+ (struct xlbd_disk_info *)req->rq_disk->private_data;
+ unsigned long buffer_ma;
+ blkif_request_t *ring_req;
+ struct bio *bio;
+ struct bio_vec *bvec;
+ int idx, s;
+ unsigned long id;
+ unsigned int fsect, lsect;
+
+ if (unlikely(blkif_state != BLKIF_STATE_CONNECTED))
+ return 1;
+
+ /* Fill out a communications ring structure. */
+ ring_req = &blk_ring->ring[MASK_BLKIF_IDX(req_prod)].req;
+ id = GET_ID_FROM_FREELIST();
+ rec_ring[id].id = (unsigned long) req;
+
+ ring_req->id = id;
+ ring_req->operation = rq_data_dir(req) ? BLKIF_OP_WRITE :
+ BLKIF_OP_READ;
+ ring_req->sector_number = (blkif_sector_t)req->sector;
+ ring_req->device = di->xd_device;
+
+ s = 0;
+ ring_req->nr_segments = 0;
+ rq_for_each_bio(bio, req) {
+ bio_for_each_segment(bvec, bio, idx) {
+ buffer_ma = page_to_phys(bvec->bv_page);
+ if (unlikely((buffer_ma & ((1<<9)-1)) != 0))
+ BUG();
+
+ fsect = bvec->bv_offset >> 9;
+ lsect = fsect + (bvec->bv_len >> 9) - 1;
+ if (unlikely(lsect > 7))
+ BUG();
+
+ ring_req->frame_and_sects[ring_req->nr_segments++] =
+ buffer_ma | (fsect << 3) | lsect;
+ s += bvec->bv_len >> 9;
+ }
+ }
+
+ req_prod++;
+
+ /* Keep a private copy so we can reissue requests when recovering. */
+ translate_req_to_pfn( &rec_ring[id], ring_req);
+
+ return 0;
+}
+
+
+/*
+ * do_blkif_request
+ * read a block; request is in a request queue
+ */
+void do_blkif_request(request_queue_t *rq)
+{
+ struct request *req;
+ int queued;
+
+ DPRINTK("Entered do_blkif_request\n");
+
+ queued = 0;
+
+ while ((req = elv_next_request(rq)) != NULL) {
+ if (!blk_fs_request(req)) {
+ end_request(req, 0);
+ continue;
+ }
+
+ if ( BLKIF_RING_FULL )
+ {
+ blk_stop_queue(rq);
+ break;
+ }
- dprintf(">\n");
++ DPRINTK("do_blk_req %p: cmd %p, sec %lx, (%u/%li) buffer:%p [%s]\n",
+ req, req->cmd, req->sector, req->current_nr_sectors,
+ req->nr_sectors, req->buffer,
+ rq_data_dir(req) ? "write" : "read");
+ blkdev_dequeue_request(req);
+ if (blkif_queue_request(req)) {
+ blk_stop_queue(rq);
+ break;
+ }
+ queued++;
+ }
+
+ if (queued != 0)
+ flush_requests();
+}
+
+
+static irqreturn_t blkif_int(int irq, void *dev_id, struct pt_regs *ptregs)
+{
+ struct request *req;
+ blkif_response_t *bret;
+ BLKIF_RING_IDX i, rp;
+ unsigned long flags;
+
+ spin_lock_irqsave(&blkif_io_lock, flags);
+
+ if ( unlikely(blkif_state == BLKIF_STATE_CLOSED) ||
+ unlikely(recovery) )
+ {
+ spin_unlock_irqrestore(&blkif_io_lock, flags);
+ return IRQ_HANDLED;
+ }
+
+ rp = blk_ring->resp_prod;
+ rmb(); /* Ensure we see queued responses up to 'rp'. */
+
+ for ( i = resp_cons; i != rp; i++ )
+ {
+ unsigned long id;
+ bret = &blk_ring->ring[MASK_BLKIF_IDX(i)].resp;
+
+ id = bret->id;
+ req = (struct request *)rec_ring[id].id;
+
+ blkif_completion( &rec_ring[id] );
+
+ ADD_ID_TO_FREELIST(id); /* overwrites req */
+
+ switch ( bret->operation )
+ {
+ case BLKIF_OP_READ:
+ case BLKIF_OP_WRITE:
+ if ( unlikely(bret->status != BLKIF_RSP_OKAY) )
+ DPRINTK("Bad return from blkdev data request: %x\n",
+ bret->status);
+
+ if ( unlikely(end_that_request_first
+ (req,
+ (bret->status == BLKIF_RSP_OKAY),
+ req->hard_nr_sectors)) )
+ BUG();
+ end_that_request_last(req);
+
+ break;
+ case BLKIF_OP_PROBE:
+ memcpy(&blkif_control_rsp, bret, sizeof(*bret));
+ blkif_control_rsp_valid = 1;
+ break;
+ default:
+ BUG();
+ }
+ }
+
+ resp_cons = i;
+
+ kick_pending_request_queues();
+
+ spin_unlock_irqrestore(&blkif_io_lock, flags);
+
+ return IRQ_HANDLED;
+}
+
+#else
+/************************** KERNEL VERSION 2.4 **************************/
+
+static kdev_t sg_dev;
+static unsigned long sg_next_sect;
+
+/*
+ * Request queues with outstanding work, but ring is currently full.
+ * We need no special lock here, as we always access this with the
+ * blkif_io_lock held. We only need a small maximum list.
+ */
+#define MAX_PENDING 8
+static request_queue_t *pending_queues[MAX_PENDING];
+static int nr_pending;
+
+
+#define blkif_io_lock io_request_lock
+
+/*============================================================================*/
+#if ENABLE_VBD_UPDATE
+
+/*
+ * blkif_update_int/update-vbds_task - handle VBD update events.
+ * Schedule a task for keventd to run, which will update the VBDs and perform
+ * the corresponding updates to our view of VBD state.
+ */
+static void update_vbds_task(void *unused)
+{
+ xlvbd_update_vbds();
+}
+
+static void vbd_update(void)
+{
+ static struct tq_struct update_tq;
- dprintf("<\n");
+ update_tq.routine = update_vbds_task;
+ schedule_task(&update_tq);
-
-
+}
+
+#endif /* ENABLE_VBD_UPDATE */
+/*============================================================================*/
+
+
+static void kick_pending_request_queues(void)
+{
+ /* We kick pending request queues if the ring is reasonably empty. */
+ if ( (nr_pending != 0) &&
+ ((req_prod - resp_cons) < (BLKIF_RING_SIZE >> 1)) )
+ {
+ /* Attempt to drain the queue, but bail if the ring becomes full. */
+ while ( (nr_pending != 0) && !BLKIF_RING_FULL )
+ do_blkif_request(pending_queues[--nr_pending]);
+ }
+}
+
+int blkif_open(struct inode *inode, struct file *filep)
+{
+ short xldev = inode->i_rdev;
+ struct gendisk *gd = get_gendisk(xldev);
+ xl_disk_t *disk = xldev_to_xldisk(inode->i_rdev);
+ short minor = MINOR(xldev);
+
+ if ( gd->part[minor].nr_sects == 0 )
+ {
+ /*
+ * Device either doesn't exist, or has zero capacity; we use a few
+ * cheesy heuristics to return the relevant error code
+ */
+ if ( (gd->sizes[minor >> gd->minor_shift] != 0) ||
+ ((minor & (gd->max_p - 1)) != 0) )
+ {
+ /*
+ * We have a real device, but no such partition, or we just have a
+ * partition number so guess this is the problem.
+ */
+ return -ENXIO; /* no such device or address */
+ }
+ else if ( gd->flags[minor >> gd->minor_shift] & GENHD_FL_REMOVABLE )
+ {
+ /* This is a removable device => assume that media is missing. */
+ return -ENOMEDIUM; /* media not present (this is a guess) */
+ }
+ else
+ {
+ /* Just go for the general 'no such device' error. */
+ return -ENODEV; /* no such device */
+ }
+ }
+
+ /* Update of usage count is protected by per-device semaphore. */
+ disk->usage++;
+
+ return 0;
+}
+
+
+int blkif_release(struct inode *inode, struct file *filep)
+{
+ xl_disk_t *disk = xldev_to_xldisk(inode->i_rdev);
+
+ /*
+ * When usage drops to zero it may allow more VBD updates to occur.
+ * Update of usage count is protected by a per-device semaphore.
+ */
+ if ( --disk->usage == 0 ) {
+ vbd_update();
+ }
+
+ return 0;
+}
+
+
+int blkif_ioctl(struct inode *inode, struct file *filep,
+ unsigned command, unsigned long argument)
+{
+ kdev_t dev = inode->i_rdev;
+ struct hd_geometry *geo = (struct hd_geometry *)argument;
+ struct gendisk *gd;
+ struct hd_struct *part;
+ int i;
+ unsigned short cylinders;
+ byte heads, sectors;
+
+ /* NB. No need to check permissions. That is done for us. */
+
+ DPRINTK_IOCTL("command: 0x%x, argument: 0x%lx, dev: 0x%04x\n",
+ command, (long) argument, dev);
+
+ gd = get_gendisk(dev);
+ part = &gd->part[MINOR(dev)];
+
+ switch ( command )
+ {
+ case BLKGETSIZE:
+ DPRINTK_IOCTL(" BLKGETSIZE: %x %lx\n", BLKGETSIZE, part->nr_sects);
+ return put_user(part->nr_sects, (unsigned long *) argument);
+
+ case BLKGETSIZE64:
+ DPRINTK_IOCTL(" BLKGETSIZE64: %x %llx\n", BLKGETSIZE64,
+ (u64)part->nr_sects * 512);
+ return put_user((u64)part->nr_sects * 512, (u64 *) argument);
+
+ case BLKRRPART: /* re-read partition table */
+ DPRINTK_IOCTL(" BLKRRPART: %x\n", BLKRRPART);
+ return blkif_revalidate(dev);
+
+ case BLKSSZGET:
+ return hardsect_size[MAJOR(dev)][MINOR(dev)];
+
+ case BLKBSZGET: /* get block size */
+ DPRINTK_IOCTL(" BLKBSZGET: %x\n", BLKBSZGET);
+ break;
+
+ case BLKBSZSET: /* set block size */
+ DPRINTK_IOCTL(" BLKBSZSET: %x\n", BLKBSZSET);
+ break;
+
+ case BLKRASET: /* set read-ahead */
+ DPRINTK_IOCTL(" BLKRASET: %x\n", BLKRASET);
+ break;
+
+ case BLKRAGET: /* get read-ahead */
+ DPRINTK_IOCTL(" BLKRAFET: %x\n", BLKRAGET);
+ break;
+
+ case HDIO_GETGEO:
+ DPRINTK_IOCTL(" HDIO_GETGEO: %x\n", HDIO_GETGEO);
+ if (!argument) return -EINVAL;
+
+ /* We don't have real geometry info, but let's at least return
+ values consistent with the size of the device */
+
+ heads = 0xff;
+ sectors = 0x3f;
+ cylinders = part->nr_sects / (heads * sectors);
+
+ if (put_user(0x00, (unsigned long *) &geo->start)) return -EFAULT;
+ if (put_user(heads, (byte *)&geo->heads)) return -EFAULT;
+ if (put_user(sectors, (byte *)&geo->sectors)) return -EFAULT;
+ if (put_user(cylinders, (unsigned short *)&geo->cylinders)) return -EFAULT;
+
+ return 0;
+
+ case HDIO_GETGEO_BIG:
+ DPRINTK_IOCTL(" HDIO_GETGEO_BIG: %x\n", HDIO_GETGEO_BIG);
+ if (!argument) return -EINVAL;
+
+ /* We don't have real geometry info, but let's at least return
+ values consistent with the size of the device */
+
+ heads = 0xff;
+ sectors = 0x3f;
+ cylinders = part->nr_sects / (heads * sectors);
+
+ if (put_user(0x00, (unsigned long *) &geo->start)) return -EFAULT;
+ if (put_user(heads, (byte *)&geo->heads)) return -EFAULT;
+ if (put_user(sectors, (byte *)&geo->sectors)) return -EFAULT;
+ if (put_user(cylinders, (unsigned int *) &geo->cylinders)) return -EFAULT;
+
+ return 0;
+
+ case CDROMMULTISESSION:
+ DPRINTK("FIXME: support multisession CDs later\n");
+ for ( i = 0; i < sizeof(struct cdrom_multisession); i++ )
+ if ( put_user(0, (byte *)(argument + i)) ) return -EFAULT;
+ return 0;
+
+ case SCSI_IOCTL_GET_BUS_NUMBER:
+ DPRINTK("FIXME: SCSI_IOCTL_GET_BUS_NUMBER ioctl in XL blkif");
+ return -ENOSYS;
+
+ default:
+ printk(KERN_ALERT "ioctl %08x not supported by XL blkif\n", command);
+ return -ENOSYS;
+ }
+
+ return 0;
+}
+
+
+
+/* check media change: should probably do something here in some cases :-) */
+int blkif_check(kdev_t dev)
+{
+ DPRINTK("blkif_check\n");
+ return 0;
+}
+
+int blkif_revalidate(kdev_t dev)
+{
+ struct block_device *bd;
+ struct gendisk *gd;
+ xl_disk_t *disk;
+ unsigned long capacity;
+ int i, rc = 0;
+
+ if ( (bd = bdget(dev)) == NULL )
+ return -EINVAL;
+
+ /*
+ * Update of partition info, and check of usage count, is protected
+ * by the per-block-device semaphore.
+ */
+ down(&bd->bd_sem);
+
+ if ( ((gd = get_gendisk(dev)) == NULL) ||
+ ((disk = xldev_to_xldisk(dev)) == NULL) ||
+ ((capacity = gd->part[MINOR(dev)].nr_sects) == 0) )
+ {
+ rc = -EINVAL;
+ goto out;
+ }
+
+ if ( disk->usage > 1 )
+ {
+ rc = -EBUSY;
+ goto out;
+ }
+
+ /* Only reread partition table if VBDs aren't mapped to partitions. */
+ if ( !(gd->flags[MINOR(dev) >> gd->minor_shift] & GENHD_FL_VIRT_PARTNS) )
+ {
+ for ( i = gd->max_p - 1; i >= 0; i-- )
+ {
+ invalidate_device(dev+i, 1);
+ gd->part[MINOR(dev+i)].start_sect = 0;
+ gd->part[MINOR(dev+i)].nr_sects = 0;
+ gd->sizes[MINOR(dev+i)] = 0;
+ }
+
+ grok_partitions(gd, MINOR(dev)>>gd->minor_shift, gd->max_p, capacity);
+ }
+
+ out:
+ up(&bd->bd_sem);
+ bdput(bd);
+ return rc;
+}
+
+
- unsigned long buffer_ma = phys_to_machine(virt_to_phys(buffer));
+/*
+ * blkif_queue_request
+ *
+ * request block io
+ *
+ * id: for guest use only.
+ * operation: BLKIF_OP_{READ,WRITE,PROBE}
+ * buffer: buffer to read/write into. this should be a
+ * virtual address in the guest os.
+ */
+static int blkif_queue_request(unsigned long id,
+ int operation,
+ char * buffer,
+ unsigned long sector_number,
+ unsigned short nr_sectors,
+ kdev_t device)
+{
-
-
++ unsigned long buffer_ma = virt_to_bus(buffer);
+ unsigned long xid;
+ struct gendisk *gd;
+ blkif_request_t *req;
+ struct buffer_head *bh;
+ unsigned int fsect, lsect;
+
+ fsect = (buffer_ma & ~PAGE_MASK) >> 9;
+ lsect = fsect + nr_sectors - 1;
+
+ /* Buffer must be sector-aligned. Extent mustn't cross a page boundary. */
+ if ( unlikely((buffer_ma & ((1<<9)-1)) != 0) )
+ BUG();
+ if ( lsect > 7 )
+ BUG();
+
+ buffer_ma &= PAGE_MASK;
+
+ if ( unlikely(blkif_state != BLKIF_STATE_CONNECTED) )
+ return 1;
+
+ switch ( operation )
+ {
+
+ case BLKIF_OP_READ:
+ case BLKIF_OP_WRITE:
+ gd = get_gendisk(device);
+
+ /*
+ * Update the sector_number we'll pass down as appropriate; note that
+ * we could sanity check that resulting sector will be in this
+ * partition, but this will happen in driver backend anyhow.
+ */
+ sector_number += gd->part[MINOR(device)].start_sect;
+
+ /*
+ * If this unit doesn't consist of virtual partitions then we clear
+ * the partn bits from the device number.
+ */
+ if ( !(gd->flags[MINOR(device)>>gd->minor_shift] &
+ GENHD_FL_VIRT_PARTNS) )
+ device &= ~(gd->max_p - 1);
+
+ if ( (sg_operation == operation) &&
+ (sg_dev == device) &&
+ (sg_next_sect == sector_number) )
+ {
+
+ req = &blk_ring->ring[MASK_BLKIF_IDX(req_prod-1)].req;
+ bh = (struct buffer_head *)id;
+
+ bh->b_reqnext = (struct buffer_head *)rec_ring[req->id].id;
+
+
+ rec_ring[req->id].id = id;
+
+ req->frame_and_sects[req->nr_segments] =
+ buffer_ma | (fsect<<3) | lsect;
+ if ( ++req->nr_segments < BLKIF_MAX_SEGMENTS_PER_REQUEST )
+ sg_next_sect += nr_sectors;
+ else
+ DISABLE_SCATTERGATHER();
+
+ /* Update the copy of the request in the recovery ring. */
+ translate_req_to_pfn(&rec_ring[req->id], req );
+
+ return 0;
+ }
+ else if ( BLKIF_RING_FULL )
+ {
+ return 1;
+ }
+ else
+ {
+ sg_operation = operation;
+ sg_dev = device;
+ sg_next_sect = sector_number + nr_sectors;
+ }
+ break;
+
+ default:
+ panic("unknown op %d\n", operation);
+ }
+
+ /* Fill out a communications ring structure. */
+ req = &blk_ring->ring[MASK_BLKIF_IDX(req_prod)].req;
+
+ xid = GET_ID_FROM_FREELIST();
+ rec_ring[xid].id = id;
+
+ req->id = xid;
+ req->operation = operation;
+ req->sector_number = (blkif_sector_t)sector_number;
+ req->device = device;
+ req->nr_segments = 1;
+ req->frame_and_sects[0] = buffer_ma | (fsect<<3) | lsect;
+
+ req_prod++;
+
+ /* Keep a private copy so we can reissue requests when recovering. */
+ translate_req_to_pfn(&rec_ring[xid], req );
+
- printk(KERN_INFO "xen_blk: Recovering virtual block device driver\n");
-
+ return 0;
+}
+
+
+/*
+ * do_blkif_request
+ * read a block; request is in a request queue
+ */
+void do_blkif_request(request_queue_t *rq)
+{
+ struct request *req;
+ struct buffer_head *bh, *next_bh;
+ int rw, nsect, full, queued = 0;
+
+ DPRINTK("Entered do_blkif_request\n");
+
+ while ( !rq->plugged && !list_empty(&rq->queue_head))
+ {
+ if ( (req = blkdev_entry_next_request(&rq->queue_head)) == NULL )
+ goto out;
+
+ DPRINTK("do_blkif_request %p: cmd %i, sec %lx, (%li/%li) bh:%p\n",
+ req, req->cmd, req->sector,
+ req->current_nr_sectors, req->nr_sectors, req->bh);
+
+ rw = req->cmd;
+ if ( rw == READA )
+ rw = READ;
+ if ( unlikely((rw != READ) && (rw != WRITE)) )
+ panic("XenoLinux Virtual Block Device: bad cmd: %d\n", rw);
+
+ req->errors = 0;
+
+ bh = req->bh;
+ while ( bh != NULL )
+ {
+ next_bh = bh->b_reqnext;
+ bh->b_reqnext = NULL;
+
+ full = blkif_queue_request(
+ (unsigned long)bh,
+ (rw == READ) ? BLKIF_OP_READ : BLKIF_OP_WRITE,
+ bh->b_data, bh->b_rsector, bh->b_size>>9, bh->b_rdev);
+
+ if ( full )
+ {
+ bh->b_reqnext = next_bh;
+ pending_queues[nr_pending++] = rq;
+ if ( unlikely(nr_pending >= MAX_PENDING) )
+ BUG();
+ goto out;
+ }
+
+ queued++;
+
+ /* Dequeue the buffer head from the request. */
+ nsect = bh->b_size >> 9;
+ bh = req->bh = next_bh;
+
+ if ( bh != NULL )
+ {
+ /* There's another buffer head to do. Update the request. */
+ req->hard_sector += nsect;
+ req->hard_nr_sectors -= nsect;
+ req->sector = req->hard_sector;
+ req->nr_sectors = req->hard_nr_sectors;
+ req->current_nr_sectors = bh->b_size >> 9;
+ req->buffer = bh->b_data;
+ }
+ else
+ {
+ /* That was the last buffer head. Finalise the request. */
+ if ( unlikely(end_that_request_first(req, 1, "XenBlk")) )
+ BUG();
+ blkdev_dequeue_request(req);
+ end_that_request_last(req);
+ }
+ }
+ }
+
+ out:
+ if ( queued != 0 )
+ flush_requests();
+}
+
+
+static void blkif_int(int irq, void *dev_id, struct pt_regs *ptregs)
+{
+ BLKIF_RING_IDX i, rp;
+ unsigned long flags;
+ struct buffer_head *bh, *next_bh;
+
+ spin_lock_irqsave(&io_request_lock, flags);
+
+ if ( unlikely(blkif_state == BLKIF_STATE_CLOSED || recovery) )
+ {
+ spin_unlock_irqrestore(&io_request_lock, flags);
+ return;
+ }
+
+ rp = blk_ring->resp_prod;
+ rmb(); /* Ensure we see queued responses up to 'rp'. */
+
+ for ( i = resp_cons; i != rp; i++ )
+ {
+ unsigned long id;
+ blkif_response_t *bret = &blk_ring->ring[MASK_BLKIF_IDX(i)].resp;
+
+ id = bret->id;
+ bh = (struct buffer_head *)rec_ring[id].id;
+
+ blkif_completion( &rec_ring[id] );
+
+ ADD_ID_TO_FREELIST(id);
+
+ switch ( bret->operation )
+ {
+ case BLKIF_OP_READ:
+ case BLKIF_OP_WRITE:
+ if ( unlikely(bret->status != BLKIF_RSP_OKAY) )
+ DPRINTK("Bad return from blkdev data request: %lx\n",
+ bret->status);
+ for ( ; bh != NULL; bh = next_bh )
+ {
+ next_bh = bh->b_reqnext;
+ bh->b_reqnext = NULL;
+ bh->b_end_io(bh, bret->status == BLKIF_RSP_OKAY);
+ }
+
+ break;
+ case BLKIF_OP_PROBE:
+ memcpy(&blkif_control_rsp, bret, sizeof(*bret));
+ blkif_control_rsp_valid = 1;
+ break;
+ default:
+ BUG();
+ }
+ }
+
+ resp_cons = i;
+
+ kick_pending_request_queues();
+
+ spin_unlock_irqrestore(&io_request_lock, flags);
+}
+
+#endif
+
+/***************************** COMMON CODE *******************************/
+
+
+void blkif_control_send(blkif_request_t *req, blkif_response_t *rsp)
+{
+ unsigned long flags, id;
+
+ retry:
+ while ( (req_prod - resp_cons) == BLKIF_RING_SIZE )
+ {
+ set_current_state(TASK_INTERRUPTIBLE);
+ schedule_timeout(1);
+ }
+
+ spin_lock_irqsave(&blkif_io_lock, flags);
+ if ( (req_prod - resp_cons) == BLKIF_RING_SIZE )
+ {
+ spin_unlock_irqrestore(&blkif_io_lock, flags);
+ goto retry;
+ }
+
+ DISABLE_SCATTERGATHER();
+ blk_ring->ring[MASK_BLKIF_IDX(req_prod)].req = *req;
+
+ id = GET_ID_FROM_FREELIST();
+ blk_ring->ring[MASK_BLKIF_IDX(req_prod)].req.id = id;
+ rec_ring[id].id = (unsigned long) req;
+
+ translate_req_to_pfn( &rec_ring[id], req );
+
+ req_prod++;
+ flush_requests();
+
+ spin_unlock_irqrestore(&blkif_io_lock, flags);
+
+ while ( !blkif_control_rsp_valid )
+ {
+ set_current_state(TASK_INTERRUPTIBLE);
+ schedule_timeout(1);
+ }
+
+ memcpy(rsp, &blkif_control_rsp, sizeof(*rsp));
+ blkif_control_rsp_valid = 0;
+}
+
+
+/* Send a driver status notification to the domain controller. */
+static void send_driver_status(int ok)
+{
+ ctrl_msg_t cmsg = {
+ .type = CMSG_BLKIF_FE,
+ .subtype = CMSG_BLKIF_FE_DRIVER_STATUS,
+ .length = sizeof(blkif_fe_driver_status_t),
+ };
+ blkif_fe_driver_status_t *msg = (void*)cmsg.msg;
+
+ msg->status = (ok ? BLKIF_DRIVER_STATUS_UP : BLKIF_DRIVER_STATUS_DOWN);
+
+ ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
+}
+
+/* Tell the controller to bring up the interface. */
+static void blkif_send_interface_connect(void)
+{
+ ctrl_msg_t cmsg = {
+ .type = CMSG_BLKIF_FE,
+ .subtype = CMSG_BLKIF_FE_INTERFACE_CONNECT,
+ .length = sizeof(blkif_fe_interface_connect_t),
+ };
+ blkif_fe_interface_connect_t *msg = (void*)cmsg.msg;
+
+ msg->handle = 0;
+ msg->shmem_frame = (virt_to_machine(blk_ring) >> PAGE_SHIFT);
+
+ ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
+}
+
+static void blkif_free(void)
+{
- if(blk_ring) free_page((unsigned long)blk_ring);
+ /* Prevent new requests being issued until we fix things up. */
+ spin_lock_irq(&blkif_io_lock);
+ recovery = 1;
+ blkif_state = BLKIF_STATE_DISCONNECTED;
+ spin_unlock_irq(&blkif_io_lock);
+
+ /* Free resources associated with old device channel. */
+ if ( blk_ring != NULL )
+ {
+ free_page((unsigned long)blk_ring);
+ blk_ring = NULL;
+ }
+ free_irq(blkif_irq, NULL);
+ blkif_irq = 0;
+
+ unbind_evtchn_from_irq(blkif_evtchn);
+ blkif_evtchn = 0;
+}
+
+static void blkif_close(void)
+{
+}
+
+/* Move from CLOSED to DISCONNECTED state. */
+static void blkif_disconnect(void)
+{
- printk(KERN_INFO "xen_blk: Recovering virtual block device driver\n");
++ if ( blk_ring != NULL )
++ free_page((unsigned long)blk_ring);
+ blk_ring = (blkif_ring_t *)__get_free_page(GFP_KERNEL);
+ blk_ring->req_prod = blk_ring->resp_prod = resp_cons = req_prod = 0;
+ blkif_state = BLKIF_STATE_DISCONNECTED;
+ blkif_send_interface_connect();
+}
+
+static void blkif_reset(void)
+{
- printk(KERN_ALERT"blkfront: recovered %d descriptors\n",req_prod);
-
+ blkif_free();
+ blkif_disconnect();
+}
+
+static void blkif_recover(void)
+{
+ int i;
+
+ /* Hmm, requests might be re-ordered when we re-issue them.
+ * This will need to be fixed once we have barriers */
+
+ /* Stage 1 : Find active and move to safety. */
+ for ( i = 0; i < BLKIF_RING_SIZE; i++ )
+ {
+ if ( rec_ring[i].id >= PAGE_OFFSET )
+ {
+ translate_req_to_mfn(
+ &blk_ring->ring[req_prod].req, &rec_ring[i]);
+ req_prod++;
+ }
+ }
+
- WPRINTK(" Unexpected blkif status %s in state %s\n",
- blkif_status_name[status->status],
- blkif_state_name[blkif_state]);
+ /* Stage 2 : Set up shadow list. */
+ for ( i = 0; i < req_prod; i++ )
+ {
+ rec_ring[i].id = blk_ring->ring[i].req.id;
+ blk_ring->ring[i].req.id = i;
+ translate_req_to_pfn(&rec_ring[i], &blk_ring->ring[i].req);
+ }
+
+ /* Stage 3 : Set up free list. */
+ for ( ; i < BLKIF_RING_SIZE; i++ )
+ rec_ring[i].id = i+1;
+ rec_ring_free = req_prod;
+ rec_ring[BLKIF_RING_SIZE-1].id = 0x0fffffff;
+
+ /* blk_ring->req_prod will be set when we flush_requests().*/
+ wmb();
+
+ /* Switch off recovery mode, using a memory barrier to ensure that
+ * it's seen before we flush requests - we don't want to miss any
+ * interrupts. */
+ recovery = 0;
+ wmb();
+
+ /* Kicks things back into life. */
+ flush_requests();
+
+ /* Now safe to left other peope use interface. */
+ blkif_state = BLKIF_STATE_CONNECTED;
+}
+
+static void blkif_connect(blkif_fe_interface_status_t *status)
+{
+ int err = 0;
+
+ blkif_evtchn = status->evtchn;
+ blkif_irq = bind_evtchn_to_irq(blkif_evtchn);
+
+ err = request_irq(blkif_irq, blkif_int, SA_SAMPLE_RANDOM, "blkif", NULL);
+ if ( err )
+ {
+ printk(KERN_ALERT "xen_blk: request_irq failed (err=%d)\n", err);
+ return;
+ }
+
+ if ( recovery )
+ {
+ blkif_recover();
+ }
+ else
+ {
+ /* Transition to connected in case we need to do
+ * a partition probe on a whole disk. */
+ blkif_state = BLKIF_STATE_CONNECTED;
+
+ /* Probe for discs attached to the interface. */
+ xlvbd_init();
+ }
+
+ /* Kick pending requests. */
+ spin_lock_irq(&blkif_io_lock);
+ kick_pending_request_queues();
+ spin_unlock_irq(&blkif_io_lock);
+}
+
+static void unexpected(blkif_fe_interface_status_t *status)
+{
- unexpected(status);
++ DPRINTK(" Unexpected blkif status %s in state %s\n",
++ blkif_status_name[status->status],
++ blkif_state_name[blkif_state]);
+}
+
+static void blkif_status(blkif_fe_interface_status_t *status)
+{
+ if ( status->handle != blkif_handle )
+ {
+ WPRINTK(" Invalid blkif: handle=%u", status->handle);
+ return;
+ }
+
+ switch ( status->status )
+ {
+ case BLKIF_INTERFACE_STATUS_CLOSED:
+ switch ( blkif_state )
+ {
+ case BLKIF_STATE_CLOSED:
+ unexpected(status);
+ break;
+ case BLKIF_STATE_DISCONNECTED:
+ case BLKIF_STATE_CONNECTED:
+ unexpected(status);
+ blkif_close();
+ break;
+ }
+ break;
+
+ case BLKIF_INTERFACE_STATUS_DISCONNECTED:
+ switch ( blkif_state )
+ {
+ case BLKIF_STATE_CLOSED:
+ blkif_disconnect();
+ break;
+ case BLKIF_STATE_DISCONNECTED:
+ case BLKIF_STATE_CONNECTED:
++ /* unexpected(status); */ /* occurs during suspend/resume */
+ blkif_reset();
+ break;
+ }
+ break;
+
+ case BLKIF_INTERFACE_STATUS_CONNECTED:
+ switch ( blkif_state )
+ {
+ case BLKIF_STATE_CLOSED:
+ unexpected(status);
+ blkif_disconnect();
+ blkif_connect(status);
+ break;
+ case BLKIF_STATE_DISCONNECTED:
+ blkif_connect(status);
+ break;
+ case BLKIF_STATE_CONNECTED:
+ unexpected(status);
+ blkif_connect(status);
+ break;
+ }
+ break;
+
+ case BLKIF_INTERFACE_STATUS_CHANGED:
+ switch ( blkif_state )
+ {
+ case BLKIF_STATE_CLOSED:
+ case BLKIF_STATE_DISCONNECTED:
+ unexpected(status);
+ break;
+ case BLKIF_STATE_CONNECTED:
+ vbd_update();
+ break;
+ }
+ break;
+
+ default:
+ WPRINTK(" Invalid blkif status: %d\n", status->status);
+ break;
+ }
+}
+
+
+static void blkif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id)
+{
+ switch ( msg->subtype )
+ {
+ case CMSG_BLKIF_FE_INTERFACE_STATUS:
+ if ( msg->length != sizeof(blkif_fe_interface_status_t) )
+ goto parse_error;
+ blkif_status((blkif_fe_interface_status_t *)
+ &msg->msg[0]);
+ break;
+ default:
+ goto parse_error;
+ }
+
+ ctrl_if_send_response(msg);
+ return;
+
+ parse_error:
+ msg->length = 0;
+ ctrl_if_send_response(msg);
+}
+
+int wait_for_blkif(void)
+{
+ int err = 0;
+ int i;
+ send_driver_status(1);
+
+ /*
+ * We should read 'nr_interfaces' from response message and wait
+ * for notifications before proceeding. For now we assume that we
+ * will be notified of exactly one interface.
+ */
+ for ( i=0; (blkif_state != BLKIF_STATE_CONNECTED) && (i < 10*HZ); i++ )
+ {
+ set_current_state(TASK_INTERRUPTIBLE);
+ schedule_timeout(1);
+ }
+
+ if ( blkif_state != BLKIF_STATE_CONNECTED )
+ {
+ printk(KERN_INFO "xen_blk: Timeout connecting to device!\n");
+ err = -ENOSYS;
+ }
+ return err;
+}
+
+int __init xlblk_init(void)
+{
+ int i;
+
+ if ( (xen_start_info.flags & SIF_INITDOMAIN) ||
+ (xen_start_info.flags & SIF_BLK_BE_DOMAIN) )
+ return 0;
+
+ printk(KERN_INFO "xen_blk: Initialising virtual block device driver\n");
+
+ rec_ring_free = 0;
+ for ( i = 0; i < BLKIF_RING_SIZE; i++ )
+ rec_ring[i].id = i+1;
+ rec_ring[BLKIF_RING_SIZE-1].id = 0x0fffffff;
+
+ (void)ctrl_if_register_receiver(CMSG_BLKIF_FE, blkif_ctrlif_rx,
+ CALLBACK_IN_BLOCKING_CONTEXT);
+
+ wait_for_blkif();
+
+ return 0;
+}
+
+void blkdev_suspend(void)
+{
+}
+
+void blkdev_resume(void)
+{
+ send_driver_status(1);
+}
+
+/* XXXXX THIS IS A TEMPORARY FUNCTION UNTIL WE GET GRANT TABLES */
+
+void blkif_completion(blkif_request_t *req)
+{
+ int i;
+
+ switch ( req->operation )
+ {
+ case BLKIF_OP_READ:
+ for ( i = 0; i < req->nr_segments; i++ )
+ {
+ unsigned long pfn = req->frame_and_sects[i] >> PAGE_SHIFT;
+ unsigned long mfn = phys_to_machine_mapping[pfn];
+ xen_machphys_update(mfn, pfn);
+ }
+ break;
+ }
+
+}
--- /dev/null
- int xd_device;
+/******************************************************************************
+ * block.h
+ *
+ * Shared definitions between all levels of XenLinux Virtual block devices.
+ *
+ * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
+ * Modifications by Mark A. Williamson are (c) Intel Research Cambridge
+ * Copyright (c) 2004, Christian Limpach
+ *
+ * This file may be distributed separately from the Linux kernel, or
+ * incorporated into other software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef __XEN_DRIVERS_BLOCK_H__
+#define __XEN_DRIVERS_BLOCK_H__
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/hdreg.h>
+#include <linux/blkdev.h>
+#include <linux/major.h>
+#include <linux/devfs_fs_kernel.h>
+#include <asm-xen/xen-public/xen.h>
+#include <asm-xen/xen-public/io/blkif.h>
+#include <asm/io.h>
+#include <asm/atomic.h>
+#include <asm/uaccess.h>
+
+#if 0
+#define DPRINTK(_f, _a...) printk ( KERN_ALERT _f , ## _a )
+#else
+#define DPRINTK(_f, _a...) ((void)0)
+#endif
+
+#if 0
+#define DPRINTK_IOCTL(_f, _a...) printk ( KERN_ALERT _f , ## _a )
+#else
+#define DPRINTK_IOCTL(_f, _a...) ((void)0)
+#endif
+
+struct xlbd_type_info {
+ int partn_shift;
++ int partn_per_major;
+ int devs_per_major;
+ int hardsect_size;
+ int max_sectors;
+ char *name;
+};
+
+/*
+ * We have one of these per vbd, whether ide, scsi or 'other'. They
+ * hang in private_data off the gendisk structure. We may end up
+ * putting all kinds of interesting stuff here :-)
+ */
+struct xlbd_major_info {
+ int major;
++ int index;
+ int usage;
+ struct xlbd_type_info *type;
+};
+
+struct xlbd_disk_info {
+ int xd_device;
+ struct xlbd_major_info *mi;
+};
+
+typedef struct xen_block {
+ int usage;
+} xen_block_t;
+
+extern struct request_queue *xlbd_blk_queue;
+extern spinlock_t blkif_io_lock;
+
+extern int blkif_open(struct inode *inode, struct file *filep);
+extern int blkif_release(struct inode *inode, struct file *filep);
+extern int blkif_ioctl(struct inode *inode, struct file *filep,
+ unsigned command, unsigned long argument);
+extern int blkif_check(dev_t dev);
+extern int blkif_revalidate(dev_t dev);
+extern void blkif_control_send(blkif_request_t *req, blkif_response_t *rsp);
+extern void do_blkif_request (request_queue_t *rq);
+
+extern void xlvbd_update_vbds(void);
+
+/* Virtual block-device subsystem. */
+extern int xlvbd_init(void);
+extern void xlvbd_cleanup(void);
+
+#endif /* __XEN_DRIVERS_BLOCK_H__ */
--- /dev/null
- 'a' + (xd_minor >> mi->type->partn_shift),
+/******************************************************************************
+ * vbd.c
+ *
+ * XenLinux virtual block-device driver (xvd).
+ *
+ * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
+ * Modifications by Mark A. Williamson are (c) Intel Research Cambridge
+ * Copyright (c) 2004, Christian Limpach
+ *
+ * This file may be distributed separately from the Linux kernel, or
+ * incorporated into other software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "block.h"
+#include <linux/blkdev.h>
+
+/*
+ * For convenience we distinguish between ide, scsi and 'other' (i.e.
+ * potentially combinations of the two) in the naming scheme and in a few
+ * other places (like default readahead, etc).
+ */
+
+#define NUM_IDE_MAJORS 10
+#define NUM_SCSI_MAJORS 9
+#define NUM_VBD_MAJORS 1
+
+static struct xlbd_type_info xlbd_ide_type = {
+ .partn_shift = 6,
++ .partn_per_major = 2,
+ // XXXcl todo blksize_size[major] = 1024;
+ .hardsect_size = 512,
+ .max_sectors = 128, /* 'hwif->rqsize' if we knew it */
+ // XXXcl todo read_ahead[major] = 8; /* from drivers/ide/ide-probe.c */
+ .name = "hd",
+};
+
+static struct xlbd_type_info xlbd_scsi_type = {
+ .partn_shift = 4,
++ .partn_per_major = 16,
+ // XXXcl todo blksize_size[major] = 1024; /* XXX 512; */
+ .hardsect_size = 512,
+ .max_sectors = 128*8, /* XXX 128; */
+ // XXXcl todo read_ahead[major] = 0; /* XXX 8; -- guessing */
+ .name = "sd",
+};
+
+static struct xlbd_type_info xlbd_vbd_type = {
+ .partn_shift = 4,
++ .partn_per_major = 16,
+ // XXXcl todo blksize_size[major] = 512;
+ .hardsect_size = 512,
+ .max_sectors = 128,
+ // XXXcl todo read_ahead[major] = 8;
+ .name = "xvd",
+};
+
+/* XXXcl handle cciss after finding out why it's "hacked" in */
+
+static struct xlbd_major_info *major_info[NUM_IDE_MAJORS + NUM_SCSI_MAJORS +
+ NUM_VBD_MAJORS];
+
+/* Information about our VBDs. */
+#define MAX_VBDS 64
+static int nr_vbds;
+static vdisk_t *vbd_info;
+
+struct request_queue *xlbd_blk_queue = NULL;
+
+#define MAJOR_XEN(dev) ((dev)>>8)
+#define MINOR_XEN(dev) ((dev) & 0xff)
+
+static struct block_device_operations xlvbd_block_fops =
+{
+ .owner = THIS_MODULE,
+ .open = blkif_open,
+ .release = blkif_release,
+ .ioctl = blkif_ioctl,
+#if 0
+ check_media_change: blkif_check,
+ revalidate: blkif_revalidate,
+#endif
+};
+
+spinlock_t blkif_io_lock = SPIN_LOCK_UNLOCKED;
+
+static int xlvbd_get_vbd_info(vdisk_t *disk_info)
+{
+ vdisk_t *buf = (vdisk_t *)__get_free_page(GFP_KERNEL);
+ blkif_request_t req;
+ blkif_response_t rsp;
+ int nr;
+
+ memset(&req, 0, sizeof(req));
+ req.operation = BLKIF_OP_PROBE;
+ req.nr_segments = 1;
+ req.frame_and_sects[0] = virt_to_machine(buf) | 7;
+
+ blkif_control_send(&req, &rsp);
+
+ if ( rsp.status <= 0 )
+ {
+ printk(KERN_ALERT "Could not probe disks (%d)\n", rsp.status);
+ return -1;
+ }
+
+ if ( (nr = rsp.status) > MAX_VBDS )
+ nr = MAX_VBDS;
+ memcpy(disk_info, buf, nr * sizeof(vdisk_t));
+
+ free_page((unsigned long)buf);
+
+ return nr;
+}
+
+static struct xlbd_major_info *xlbd_get_major_info(int xd_device, int *minor)
+{
+ int mi_idx, new_major;
+ int xd_major = MAJOR_XEN(xd_device);
+ int xd_minor = MINOR_XEN(xd_device);
+
+ *minor = xd_minor;
+
+ switch (xd_major) {
+ case IDE0_MAJOR: mi_idx = 0; new_major = IDE0_MAJOR; break;
+ case IDE1_MAJOR: mi_idx = 1; new_major = IDE1_MAJOR; break;
+ case IDE2_MAJOR: mi_idx = 2; new_major = IDE2_MAJOR; break;
+ case IDE3_MAJOR: mi_idx = 3; new_major = IDE3_MAJOR; break;
+ case IDE4_MAJOR: mi_idx = 4; new_major = IDE4_MAJOR; break;
+ case IDE5_MAJOR: mi_idx = 5; new_major = IDE5_MAJOR; break;
+ case IDE6_MAJOR: mi_idx = 6; new_major = IDE6_MAJOR; break;
+ case IDE7_MAJOR: mi_idx = 7; new_major = IDE7_MAJOR; break;
+ case IDE8_MAJOR: mi_idx = 8; new_major = IDE8_MAJOR; break;
+ case IDE9_MAJOR: mi_idx = 9; new_major = IDE9_MAJOR; break;
+ case SCSI_DISK0_MAJOR: mi_idx = 10; new_major = SCSI_DISK0_MAJOR; break;
+ case SCSI_DISK1_MAJOR ... SCSI_DISK7_MAJOR:
+ mi_idx = 11 + xd_major - SCSI_DISK1_MAJOR;
+ new_major = SCSI_DISK1_MAJOR + xd_major - SCSI_DISK1_MAJOR;
+ break;
+ case SCSI_CDROM_MAJOR: mi_idx = 18; new_major = SCSI_CDROM_MAJOR; break;
+ default: mi_idx = 19; new_major = 0;/* XXXcl notyet */ break;
+ }
+
+ if (major_info[mi_idx])
+ return major_info[mi_idx];
+
+ major_info[mi_idx] = kmalloc(sizeof(struct xlbd_major_info), GFP_KERNEL);
+ if (major_info[mi_idx] == NULL)
+ return NULL;
+
+ memset(major_info[mi_idx], 0, sizeof(struct xlbd_major_info));
+
+ switch (mi_idx) {
+ case 0 ... (NUM_IDE_MAJORS - 1):
+ major_info[mi_idx]->type = &xlbd_ide_type;
++ major_info[mi_idx]->index = mi_idx;
+ break;
+ case NUM_IDE_MAJORS ... (NUM_IDE_MAJORS + NUM_SCSI_MAJORS - 1):
+ major_info[mi_idx]->type = &xlbd_scsi_type;
++ major_info[mi_idx]->index = mi_idx - NUM_IDE_MAJORS;
+ break;
+ case (NUM_IDE_MAJORS + NUM_SCSI_MAJORS) ...
+ (NUM_IDE_MAJORS + NUM_SCSI_MAJORS + NUM_VBD_MAJORS - 1):
+ major_info[mi_idx]->type = &xlbd_vbd_type;
++ major_info[mi_idx]->index = mi_idx -
++ (NUM_IDE_MAJORS + NUM_SCSI_MAJORS);
+ break;
+ }
+ major_info[mi_idx]->major = new_major;
+
+ if (register_blkdev(major_info[mi_idx]->major, major_info[mi_idx]->type->name)) {
+ printk(KERN_ALERT "XL VBD: can't get major %d with name %s\n",
+ major_info[mi_idx]->major, major_info[mi_idx]->type->name);
+ goto out;
+ }
+
+ devfs_mk_dir(major_info[mi_idx]->type->name);
+
+ return major_info[mi_idx];
+
+ out:
+ kfree(major_info[mi_idx]);
+ major_info[mi_idx] = NULL;
+ return NULL;
+}
+
+static struct gendisk *xlvbd_get_gendisk(struct xlbd_major_info *mi,
+ int xd_minor, vdisk_t *xd)
+{
+ struct gendisk *gd;
+ struct xlbd_disk_info *di;
+ int device, partno;
+
+ device = MKDEV(mi->major, xd_minor);
+ gd = get_gendisk(device, &partno);
+ if (gd)
+ return gd;
+
+ di = kmalloc(sizeof(struct xlbd_disk_info), GFP_KERNEL);
+ if (di == NULL)
+ return NULL;
+ di->mi = mi;
+ di->xd_device = xd->device;
+
+ /* Construct an appropriate gendisk structure. */
+ gd = alloc_disk(1);
+ if (gd == NULL)
+ goto out;
+
+ gd->major = mi->major;
+ gd->first_minor = xd_minor;
+ gd->fops = &xlvbd_block_fops;
+ gd->private_data = di;
+ sprintf(gd->disk_name, "%s%c%d", mi->type->name,
++ 'a' + mi->index * mi->type->partn_per_major +
++ (xd_minor >> mi->type->partn_shift),
+ xd_minor & ((1 << mi->type->partn_shift) - 1));
+ /* sprintf(gd->devfs_name, "%s%s/disc%d", mi->type->name, , ); XXXdevfs */
+
+ set_capacity(gd, xd->capacity);
+
+ if (xlbd_blk_queue == NULL) {
+ xlbd_blk_queue = blk_init_queue(do_blkif_request,
+ &blkif_io_lock);
+ if (xlbd_blk_queue == NULL)
+ goto out;
+ elevator_init(xlbd_blk_queue, "noop");
+
+ /*
+ * Turn off barking 'headactive' mode. We dequeue
+ * buffer heads as soon as we pass them to back-end
+ * driver.
+ */
+ blk_queue_headactive(xlbd_blk_queue, 0); /* XXXcl: noop according to blkdev.h */
+
+ blk_queue_hardsect_size(xlbd_blk_queue,
+ mi->type->hardsect_size);
+ blk_queue_max_sectors(xlbd_blk_queue, mi->type->max_sectors); /* 'hwif->rqsize' if we knew it */
+
+ /* XXXcl: set mask to PAGE_SIZE for now, to improve either use
+ - blk_queue_merge_bvec to merge requests with adjacent ma's
+ - the tags infrastructure
+ - the dma infrastructure
+ */
+ blk_queue_segment_boundary(xlbd_blk_queue, PAGE_SIZE - 1);
+
+ blk_queue_max_phys_segments(xlbd_blk_queue,
+ BLKIF_MAX_SEGMENTS_PER_REQUEST);
+ blk_queue_max_hw_segments(xlbd_blk_queue,
+ BLKIF_MAX_SEGMENTS_PER_REQUEST); /* XXXcl not needed? */
+
+
+ }
+ gd->queue = xlbd_blk_queue;
+
+ add_disk(gd);
+
+ return gd;
+
+ out:
+ if (gd)
+ del_gendisk(gd);
+ kfree(di);
+ return NULL;
+}
+
+/*
+ * xlvbd_init_device - initialise a VBD device
+ * @disk: a vdisk_t describing the VBD
+ *
+ * Takes a vdisk_t * that describes a VBD the domain has access to.
+ * Performs appropriate initialisation and registration of the device.
+ *
+ * Care needs to be taken when making re-entrant calls to ensure that
+ * corruption does not occur. Also, devices that are in use should not have
+ * their details updated. This is the caller's responsibility.
+ */
+static int xlvbd_init_device(vdisk_t *xd)
+{
+ struct block_device *bd;
+ struct gendisk *gd;
+ struct xlbd_major_info *mi;
+ int device;
+ int minor;
+
+ int err = -ENOMEM;
+
+ mi = xlbd_get_major_info(xd->device, &minor);
+ if (mi == NULL)
+ return -EPERM;
+
+ device = MKDEV(mi->major, minor);
+
+ if ((bd = bdget(device)) == NULL)
+ return -EPERM;
+
+ /*
+ * Update of partition info, and check of usage count, is protected
+ * by the per-block-device semaphore.
+ */
+ down(&bd->bd_sem);
+
+ gd = xlvbd_get_gendisk(mi, minor, xd);
+ if (mi == NULL) {
+ err = -EPERM;
+ goto out;
+ }
+
+ if (VDISK_READONLY(xd->info))
+ set_disk_ro(gd, 1);
+
+ /* Some final fix-ups depending on the device type */
+ switch (VDISK_TYPE(xd->info)) {
+ case VDISK_TYPE_CDROM:
+ gd->flags |= GENHD_FL_REMOVABLE | GENHD_FL_CD;
+ /* FALLTHROUGH */
+ case VDISK_TYPE_FLOPPY:
+ case VDISK_TYPE_TAPE:
+ gd->flags |= GENHD_FL_REMOVABLE;
+ break;
+
+ case VDISK_TYPE_DISK:
+ break;
+
+ default:
+ printk(KERN_ALERT "XenLinux: unknown device type %d\n",
+ VDISK_TYPE(xd->info));
+ break;
+ }
+
+ err = 0;
+ out:
+ up(&bd->bd_sem);
+ bdput(bd);
+ return err;
+}
+
+#if 0
+/*
+ * xlvbd_remove_device - remove a device node if possible
+ * @device: numeric device ID
+ *
+ * Updates the gendisk structure and invalidates devices.
+ *
+ * This is OK for now but in future, should perhaps consider where this should
+ * deallocate gendisks / unregister devices.
+ */
+static int xlvbd_remove_device(int device)
+{
+ int i, rc = 0, minor = MINOR(device);
+ struct gendisk *gd;
+ struct block_device *bd;
+ xen_block_t *disk = NULL;
+
+ if ( (bd = bdget(device)) == NULL )
+ return -1;
+
+ /*
+ * Update of partition info, and check of usage count, is protected
+ * by the per-block-device semaphore.
+ */
+ down(&bd->bd_sem);
+
+ if ( ((gd = get_gendisk(device)) == NULL) ||
+ ((disk = xldev_to_xldisk(device)) == NULL) )
+ BUG();
+
+ if ( disk->usage != 0 )
+ {
+ printk(KERN_ALERT "VBD removal failed - in use [dev=%x]\n", device);
+ rc = -1;
+ goto out;
+ }
+
+ if ( (minor & (gd->max_p-1)) != 0 )
+ {
+ /* 1: The VBD is mapped to a partition rather than a whole unit. */
+ invalidate_device(device, 1);
+ gd->part[minor].start_sect = 0;
+ gd->part[minor].nr_sects = 0;
+ gd->sizes[minor] = 0;
+
+ /* Clear the consists-of-virtual-partitions flag if possible. */
+ gd->flags[minor >> gd->minor_shift] &= ~GENHD_FL_VIRT_PARTNS;
+ for ( i = 1; i < gd->max_p; i++ )
+ if ( gd->sizes[(minor & ~(gd->max_p-1)) + i] != 0 )
+ gd->flags[minor >> gd->minor_shift] |= GENHD_FL_VIRT_PARTNS;
+
+ /*
+ * If all virtual partitions are now gone, and a 'whole unit' VBD is
+ * present, then we can try to grok the unit's real partition table.
+ */
+ if ( !(gd->flags[minor >> gd->minor_shift] & GENHD_FL_VIRT_PARTNS) &&
+ (gd->sizes[minor & ~(gd->max_p-1)] != 0) &&
+ !(gd->flags[minor >> gd->minor_shift] & GENHD_FL_REMOVABLE) )
+ {
+ register_disk(gd,
+ device&~(gd->max_p-1),
+ gd->max_p,
+ &xlvbd_block_fops,
+ gd->part[minor&~(gd->max_p-1)].nr_sects);
+ }
+ }
+ else
+ {
+ /*
+ * 2: The VBD is mapped to an entire 'unit'. Clear all partitions.
+ * NB. The partition entries are only cleared if there are no VBDs
+ * mapped to individual partitions on this unit.
+ */
+ i = gd->max_p - 1; /* Default: clear subpartitions as well. */
+ if ( gd->flags[minor >> gd->minor_shift] & GENHD_FL_VIRT_PARTNS )
+ i = 0; /* 'Virtual' mode: only clear the 'whole unit' entry. */
+ while ( i >= 0 )
+ {
+ invalidate_device(device+i, 1);
+ gd->part[minor+i].start_sect = 0;
+ gd->part[minor+i].nr_sects = 0;
+ gd->sizes[minor+i] = 0;
+ i--;
+ }
+ }
+
+ out:
+ up(&bd->bd_sem);
+ bdput(bd);
+ return rc;
+}
+
+/*
+ * xlvbd_update_vbds - reprobes the VBD status and performs updates driver
+ * state. The VBDs need to be updated in this way when the domain is
+ * initialised and also each time we receive an XLBLK_UPDATE event.
+ */
+void xlvbd_update_vbds(void)
+{
+ int i, j, k, old_nr, new_nr;
+ vdisk_t *old_info, *new_info, *merged_info;
+
+ old_info = vbd_info;
+ old_nr = nr_vbds;
+
+ new_info = kmalloc(MAX_VBDS * sizeof(vdisk_t), GFP_KERNEL);
+ if ( unlikely(new_nr = xlvbd_get_vbd_info(new_info)) < 0 )
+ {
+ kfree(new_info);
+ return;
+ }
+
+ /*
+ * Final list maximum size is old list + new list. This occurs only when
+ * old list and new list do not overlap at all, and we cannot yet destroy
+ * VBDs in the old list because the usage counts are busy.
+ */
+ merged_info = kmalloc((old_nr + new_nr) * sizeof(vdisk_t), GFP_KERNEL);
+
+ /* @i tracks old list; @j tracks new list; @k tracks merged list. */
+ i = j = k = 0;
+
+ while ( (i < old_nr) && (j < new_nr) )
+ {
+ if ( old_info[i].device < new_info[j].device )
+ {
+ if ( xlvbd_remove_device(old_info[i].device) != 0 )
+ memcpy(&merged_info[k++], &old_info[i], sizeof(vdisk_t));
+ i++;
+ }
+ else if ( old_info[i].device > new_info[j].device )
+ {
+ if ( xlvbd_init_device(&new_info[j]) == 0 )
+ memcpy(&merged_info[k++], &new_info[j], sizeof(vdisk_t));
+ j++;
+ }
+ else
+ {
+ if ( ((old_info[i].capacity == new_info[j].capacity) &&
+ (old_info[i].info == new_info[j].info)) ||
+ (xlvbd_remove_device(old_info[i].device) != 0) )
+ memcpy(&merged_info[k++], &old_info[i], sizeof(vdisk_t));
+ else if ( xlvbd_init_device(&new_info[j]) == 0 )
+ memcpy(&merged_info[k++], &new_info[j], sizeof(vdisk_t));
+ i++; j++;
+ }
+ }
+
+ for ( ; i < old_nr; i++ )
+ {
+ if ( xlvbd_remove_device(old_info[i].device) != 0 )
+ memcpy(&merged_info[k++], &old_info[i], sizeof(vdisk_t));
+ }
+
+ for ( ; j < new_nr; j++ )
+ {
+ if ( xlvbd_init_device(&new_info[j]) == 0 )
+ memcpy(&merged_info[k++], &new_info[j], sizeof(vdisk_t));
+ }
+
+ vbd_info = merged_info;
+ nr_vbds = k;
+
+ kfree(old_info);
+ kfree(new_info);
+}
+#endif
+
+/*
+ * Set up all the linux device goop for the virtual block devices
+ * (vbd's) that we know about. Note that although from the backend
+ * driver's p.o.v. VBDs are addressed simply an opaque 16-bit device
+ * number, the domain creation tools conventionally allocate these
+ * numbers to correspond to those used by 'real' linux -- this is just
+ * for convenience as it means e.g. that the same /etc/fstab can be
+ * used when booting with or without Xen.
+ */
+int xlvbd_init(void)
+{
+ int i;
+
+ /*
+ * If compiled as a module, we don't support unloading yet. We
+ * therefore permanently increment the reference count to
+ * disallow it.
+ */
+ /* MOD_INC_USE_COUNT; */
+
+ memset(major_info, 0, sizeof(major_info));
+
+ for (i = 0; i < sizeof(major_info) / sizeof(major_info[0]); i++) {
+ }
+
+ vbd_info = kmalloc(MAX_VBDS * sizeof(vdisk_t), GFP_KERNEL);
+ nr_vbds = xlvbd_get_vbd_info(vbd_info);
+
+ if (nr_vbds < 0) {
+ kfree(vbd_info);
+ vbd_info = NULL;
+ nr_vbds = 0;
+ } else {
+ for (i = 0; i < nr_vbds; i++)
+ xlvbd_init_device(&vbd_info[i]);
+ }
+
+ return 0;
+}
--- /dev/null
- if ( tty_register_driver(DRV(xencons_driver)) )
- panic("Couldn't register Xen virtual console driver as %s\n",
- DRV(xencons_driver)->name);
+/******************************************************************************
+ * console.c
+ *
+ * Virtual console driver.
+ *
+ * Copyright (c) 2002-2004, K A Fraser.
+ *
+ * This file may be distributed separately from the Linux kernel, or
+ * incorporated into other software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/interrupt.h>
+#include <linux/tty.h>
+#include <linux/tty_flip.h>
+#include <linux/serial.h>
+#include <linux/major.h>
+#include <linux/ptrace.h>
+#include <linux/ioport.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/init.h>
+#include <linux/console.h>
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <asm/uaccess.h>
+#include <asm-xen/xen-public/event_channel.h>
+#include <asm-xen/hypervisor.h>
+#include <asm-xen/evtchn.h>
+#include <asm-xen/ctrl_if.h>
+
+/*
+ * Modes:
+ * 'xencons=off' [XC_OFF]: Console is disabled.
+ * 'xencons=tty' [XC_TTY]: Console attached to '/dev/tty[0-9]+'.
+ * 'xencons=ttyS' [XC_SERIAL]: Console attached to '/dev/ttyS[0-9]+'.
+ * [XC_DEFAULT]: DOM0 -> XC_SERIAL ; all others -> XC_TTY.
+ *
+ * NB. In mode XC_TTY, we create dummy consoles for tty2-63. This suppresses
+ * warnings from standard distro startup scripts.
+ */
+static enum { XC_OFF, XC_DEFAULT, XC_TTY, XC_SERIAL } xc_mode = XC_DEFAULT;
+
+static int __init xencons_setup(char *str)
+{
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
+ if (str[0] == '=')
+ str++;
+#endif
+ if ( !strcmp(str, "tty") )
+ xc_mode = XC_TTY;
+ else if ( !strcmp(str, "ttyS") )
+ xc_mode = XC_SERIAL;
+ else if ( !strcmp(str, "off") )
+ xc_mode = XC_OFF;
+ return 1;
+}
+__setup("xencons", xencons_setup);
+
+/* The kernel and user-land drivers share a common transmit buffer. */
+#define WBUF_SIZE 4096
+#define WBUF_MASK(_i) ((_i)&(WBUF_SIZE-1))
+static char wbuf[WBUF_SIZE];
+static unsigned int wc, wp; /* write_cons, write_prod */
+
+/* This lock protects accesses to the common transmit buffer. */
+static spinlock_t xencons_lock = SPIN_LOCK_UNLOCKED;
+
+/* Common transmit-kick routine. */
+static void __xencons_tx_flush(void);
+
+/* This task is used to defer sending console data until there is space. */
+static void xencons_tx_flush_task_routine(void *data);
+
+static DECLARE_TQUEUE(xencons_tx_flush_task,
+ xencons_tx_flush_task_routine,
+ NULL);
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
+static struct tty_driver *xencons_driver;
+#else
+static struct tty_driver xencons_driver;
+#endif
+
+
+/******************** Kernel console driver ********************************/
+
+static void kcons_write(
+ struct console *c, const char *s, unsigned int count)
+{
+ int i;
+ unsigned long flags;
+
+ spin_lock_irqsave(&xencons_lock, flags);
+
+ for ( i = 0; i < count; i++ )
+ {
+ if ( (wp - wc) >= (WBUF_SIZE - 1) )
+ break;
+ if ( (wbuf[WBUF_MASK(wp++)] = s[i]) == '\n' )
+ wbuf[WBUF_MASK(wp++)] = '\r';
+ }
+
+ __xencons_tx_flush();
+
+ spin_unlock_irqrestore(&xencons_lock, flags);
+}
+
+static void kcons_write_dom0(
+ struct console *c, const char *s, unsigned int count)
+{
+ int rc;
+
+ while ( count > 0 )
+ {
+ if ( (rc = HYPERVISOR_console_io(CONSOLEIO_write,
+ count, (char *)s)) > 0 )
+ {
+ count -= rc;
+ s += rc;
+ }
+ else
+ break;
+ }
+}
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
+static struct tty_driver *kcons_device(struct console *c, int *index)
+{
+ *index = c->index;
+ return xencons_driver;
+}
+#else
+static kdev_t kcons_device(struct console *c)
+{
+ return MKDEV(TTY_MAJOR, (xc_mode == XC_SERIAL) ? 64 : 1);
+}
+#endif
+
+static struct console kcons_info = {
+ device: kcons_device,
+ flags: CON_PRINTBUFFER,
+ index: -1
+};
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
+#define __RETCODE 0
+static int __init xen_console_init(void)
+#else
+#define __RETCODE
+void xen_console_init(void)
+#endif
+{
+ if ( xen_start_info.flags & SIF_INITDOMAIN )
+ {
+ if ( xc_mode == XC_DEFAULT )
+ xc_mode = XC_SERIAL;
+ kcons_info.write = kcons_write_dom0;
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
+ if ( xc_mode == XC_SERIAL )
+ kcons_info.flags |= CON_ENABLED;
+#endif
+ }
+ else
+ {
+ if ( xc_mode == XC_DEFAULT )
+ xc_mode = XC_TTY;
+ kcons_info.write = kcons_write;
+ }
+
+ if ( xc_mode == XC_OFF )
+ return __RETCODE;
+
+ if ( xc_mode == XC_SERIAL )
+ strcpy(kcons_info.name, "ttyS");
+ else
+ strcpy(kcons_info.name, "tty");
+
+ register_console(&kcons_info);
+ return __RETCODE;
+}
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
+console_initcall(xen_console_init);
+#endif
+
+/*** Useful function for console debugging -- goes straight to Xen. ***/
+asmlinkage int xprintk(const char *fmt, ...)
+{
+ va_list args;
+ int printk_len;
+ static char printk_buf[1024];
+
+ /* Emit the output into the temporary buffer */
+ va_start(args, fmt);
+ printk_len = vsnprintf(printk_buf, sizeof(printk_buf), fmt, args);
+ va_end(args);
+
+ /* Send the processed output directly to Xen. */
+ kcons_write_dom0(NULL, printk_buf, printk_len);
+
+ return 0;
+}
+
+/*** Forcibly flush console data before dying. ***/
+void xencons_force_flush(void)
+{
+ ctrl_msg_t msg;
+ int sz;
+
+ /* Emergency console is synchronous, so there's nothing to flush. */
+ if ( xen_start_info.flags & SIF_INITDOMAIN )
+ return;
+
+ /*
+ * We use dangerous control-interface functions that require a quiescent
+ * system and no interrupts. Try to ensure this with a global cli().
+ */
+ cli();
+
+ /* Spin until console data is flushed through to the domain controller. */
+ while ( (wc != wp) && !ctrl_if_transmitter_empty() )
+ {
+ /* Interrupts are disabled -- we must manually reap responses. */
+ ctrl_if_discard_responses();
+
+ if ( (sz = wp - wc) == 0 )
+ continue;
+ if ( sz > sizeof(msg.msg) )
+ sz = sizeof(msg.msg);
+ if ( sz > (WBUF_SIZE - WBUF_MASK(wc)) )
+ sz = WBUF_SIZE - WBUF_MASK(wc);
+
+ msg.type = CMSG_CONSOLE;
+ msg.subtype = CMSG_CONSOLE_DATA;
+ msg.length = sz;
+ memcpy(msg.msg, &wbuf[WBUF_MASK(wc)], sz);
+
+ if ( ctrl_if_send_message_noblock(&msg, NULL, 0) == 0 )
+ wc += sz;
+ }
+}
+
+
+/******************** User-space console driver (/dev/console) ************/
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
+#define DRV(_d) (_d)
+#define TTY_INDEX(_tty) ((_tty)->index)
+#else
+static int xencons_refcount;
+static struct tty_struct *xencons_table[MAX_NR_CONSOLES];
+#define DRV(_d) (&(_d))
+#define TTY_INDEX(_tty) (MINOR((_tty)->device) - xencons_driver.minor_start)
+#endif
+
+static struct termios *xencons_termios[MAX_NR_CONSOLES];
+static struct termios *xencons_termios_locked[MAX_NR_CONSOLES];
+static struct tty_struct *xencons_tty;
+static int xencons_priv_irq;
+static char x_char;
+
+/* Non-privileged receive callback. */
+static void xencons_rx(ctrl_msg_t *msg, unsigned long id)
+{
+ int i;
+ unsigned long flags;
+
+ spin_lock_irqsave(&xencons_lock, flags);
+ if ( xencons_tty != NULL )
+ {
+ for ( i = 0; i < msg->length; i++ )
+ tty_insert_flip_char(xencons_tty, msg->msg[i], 0);
+ tty_flip_buffer_push(xencons_tty);
+ }
+ spin_unlock_irqrestore(&xencons_lock, flags);
+
+ msg->length = 0;
+ ctrl_if_send_response(msg);
+}
+
+/* Privileged and non-privileged transmit worker. */
+static void __xencons_tx_flush(void)
+{
+ int sz, work_done = 0;
+ ctrl_msg_t msg;
+
+ if ( xen_start_info.flags & SIF_INITDOMAIN )
+ {
+ if ( x_char )
+ {
+ kcons_write_dom0(NULL, &x_char, 1);
+ x_char = 0;
+ work_done = 1;
+ }
+
+ while ( wc != wp )
+ {
+ sz = wp - wc;
+ if ( sz > (WBUF_SIZE - WBUF_MASK(wc)) )
+ sz = WBUF_SIZE - WBUF_MASK(wc);
+ kcons_write_dom0(NULL, &wbuf[WBUF_MASK(wc)], sz);
+ wc += sz;
+ work_done = 1;
+ }
+ }
+ else
+ {
+ while ( x_char )
+ {
+ msg.type = CMSG_CONSOLE;
+ msg.subtype = CMSG_CONSOLE_DATA;
+ msg.length = 1;
+ msg.msg[0] = x_char;
+
+ if ( ctrl_if_send_message_noblock(&msg, NULL, 0) == 0 )
+ x_char = 0;
+ else if ( ctrl_if_enqueue_space_callback(&xencons_tx_flush_task) )
+ break;
+
+ work_done = 1;
+ }
+
+ while ( wc != wp )
+ {
+ sz = wp - wc;
+ if ( sz > sizeof(msg.msg) )
+ sz = sizeof(msg.msg);
+ if ( sz > (WBUF_SIZE - WBUF_MASK(wc)) )
+ sz = WBUF_SIZE - WBUF_MASK(wc);
+
+ msg.type = CMSG_CONSOLE;
+ msg.subtype = CMSG_CONSOLE_DATA;
+ msg.length = sz;
+ memcpy(msg.msg, &wbuf[WBUF_MASK(wc)], sz);
+
+ if ( ctrl_if_send_message_noblock(&msg, NULL, 0) == 0 )
+ wc += sz;
+ else if ( ctrl_if_enqueue_space_callback(&xencons_tx_flush_task) )
+ break;
+
+ work_done = 1;
+ }
+ }
+
+ if ( work_done && (xencons_tty != NULL) )
+ {
+ wake_up_interruptible(&xencons_tty->write_wait);
+ if ( (xencons_tty->flags & (1 << TTY_DO_WRITE_WAKEUP)) &&
+ (xencons_tty->ldisc.write_wakeup != NULL) )
+ (xencons_tty->ldisc.write_wakeup)(xencons_tty);
+ }
+}
+
+/* Non-privileged transmit kicker. */
+static void xencons_tx_flush_task_routine(void *data)
+{
+ unsigned long flags;
+ spin_lock_irqsave(&xencons_lock, flags);
+ __xencons_tx_flush();
+ spin_unlock_irqrestore(&xencons_lock, flags);
+}
+
+/* Privileged receive callback and transmit kicker. */
+static irqreturn_t xencons_priv_interrupt(int irq, void *dev_id,
+ struct pt_regs *regs)
+{
+ static char rbuf[16];
+ int i, l;
+ unsigned long flags;
+
+ spin_lock_irqsave(&xencons_lock, flags);
+
+ if ( xencons_tty != NULL )
+ {
+ /* Receive work. */
+ while ( (l = HYPERVISOR_console_io(CONSOLEIO_read, 16, rbuf)) > 0 )
+ for ( i = 0; i < l; i++ )
+ tty_insert_flip_char(xencons_tty, rbuf[i], 0);
+ if ( xencons_tty->flip.count != 0 )
+ tty_flip_buffer_push(xencons_tty);
+ }
+
+ /* Transmit work. */
+ __xencons_tx_flush();
+
+ spin_unlock_irqrestore(&xencons_lock, flags);
+
+ return IRQ_HANDLED;
+}
+
+static int xencons_write_room(struct tty_struct *tty)
+{
+ return WBUF_SIZE - (wp - wc);
+}
+
+static int xencons_chars_in_buffer(struct tty_struct *tty)
+{
+ return wp - wc;
+}
+
+static void xencons_send_xchar(struct tty_struct *tty, char ch)
+{
+ unsigned long flags;
+
+ if ( TTY_INDEX(tty) != 0 )
+ return;
+
+ spin_lock_irqsave(&xencons_lock, flags);
+ x_char = ch;
+ __xencons_tx_flush();
+ spin_unlock_irqrestore(&xencons_lock, flags);
+}
+
+static void xencons_throttle(struct tty_struct *tty)
+{
+ if ( TTY_INDEX(tty) != 0 )
+ return;
+
+ if ( I_IXOFF(tty) )
+ xencons_send_xchar(tty, STOP_CHAR(tty));
+}
+
+static void xencons_unthrottle(struct tty_struct *tty)
+{
+ if ( TTY_INDEX(tty) != 0 )
+ return;
+
+ if ( I_IXOFF(tty) )
+ {
+ if ( x_char != 0 )
+ x_char = 0;
+ else
+ xencons_send_xchar(tty, START_CHAR(tty));
+ }
+}
+
+static void xencons_flush_buffer(struct tty_struct *tty)
+{
+ unsigned long flags;
+
+ if ( TTY_INDEX(tty) != 0 )
+ return;
+
+ spin_lock_irqsave(&xencons_lock, flags);
+ wc = wp = 0;
+ spin_unlock_irqrestore(&xencons_lock, flags);
+}
+
+static inline int __xencons_put_char(int ch)
+{
+ char _ch = (char)ch;
+ if ( (wp - wc) == WBUF_SIZE )
+ return 0;
+ wbuf[WBUF_MASK(wp++)] = _ch;
+ return 1;
+}
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
+static int xencons_write(struct tty_struct *tty, const unsigned char *buf,
+ int count)
+{
+ int i;
+ unsigned long flags;
+
+ if ( TTY_INDEX(tty) != 0 )
+ return count;
+
+ spin_lock_irqsave(&xencons_lock, flags);
+
+ for ( i = 0; i < count; i++ )
+ if ( !__xencons_put_char(buf[i]) )
+ break;
+
+ if ( i != 0 )
+ __xencons_tx_flush();
+
+ spin_unlock_irqrestore(&xencons_lock, flags);
+
+ return i;
+}
+#else
+static int xencons_write(struct tty_struct *tty, int from_user,
+ const u_char *buf, int count)
+{
+ int i;
+ unsigned long flags;
+
+ if ( from_user && verify_area(VERIFY_READ, buf, count) )
+ return -EINVAL;
+
+ if ( TTY_INDEX(tty) != 0 )
+ return count;
+
+ spin_lock_irqsave(&xencons_lock, flags);
+
+ for ( i = 0; i < count; i++ )
+ {
+ char ch;
+ if ( from_user )
+ __get_user(ch, buf + i);
+ else
+ ch = buf[i];
+ if ( !__xencons_put_char(ch) )
+ break;
+ }
+
+ if ( i != 0 )
+ __xencons_tx_flush();
+
+ spin_unlock_irqrestore(&xencons_lock, flags);
+
+ return i;
+}
+#endif
+
+static void xencons_put_char(struct tty_struct *tty, u_char ch)
+{
+ unsigned long flags;
+
+ if ( TTY_INDEX(tty) != 0 )
+ return;
+
+ spin_lock_irqsave(&xencons_lock, flags);
+ (void)__xencons_put_char(ch);
+ spin_unlock_irqrestore(&xencons_lock, flags);
+}
+
+static void xencons_flush_chars(struct tty_struct *tty)
+{
+ unsigned long flags;
+
+ if ( TTY_INDEX(tty) != 0 )
+ return;
+
+ spin_lock_irqsave(&xencons_lock, flags);
+ __xencons_tx_flush();
+ spin_unlock_irqrestore(&xencons_lock, flags);
+}
+
+static void xencons_wait_until_sent(struct tty_struct *tty, int timeout)
+{
+ unsigned long orig_jiffies = jiffies;
+
+ if ( TTY_INDEX(tty) != 0 )
+ return;
+
+ while ( DRV(tty->driver)->chars_in_buffer(tty) )
+ {
+ set_current_state(TASK_INTERRUPTIBLE);
+ schedule_timeout(1);
+ if ( signal_pending(current) )
+ break;
+ if ( (timeout != 0) && time_after(jiffies, orig_jiffies + timeout) )
+ break;
+ }
+
+ set_current_state(TASK_RUNNING);
+}
+
+static int xencons_open(struct tty_struct *tty, struct file *filp)
+{
+ unsigned long flags;
+
+ if ( TTY_INDEX(tty) != 0 )
+ return 0;
+
+ spin_lock_irqsave(&xencons_lock, flags);
+ tty->driver_data = NULL;
+ if ( xencons_tty == NULL )
+ xencons_tty = tty;
+ __xencons_tx_flush();
+ spin_unlock_irqrestore(&xencons_lock, flags);
+
+ return 0;
+}
+
+static void xencons_close(struct tty_struct *tty, struct file *filp)
+{
+ unsigned long flags;
+
+ if ( TTY_INDEX(tty) != 0 )
+ return;
+
+ if ( tty->count == 1 )
+ {
+ tty->closing = 1;
+ tty_wait_until_sent(tty, 0);
+ if ( DRV(tty->driver)->flush_buffer != NULL )
+ DRV(tty->driver)->flush_buffer(tty);
+ if ( tty->ldisc.flush_buffer != NULL )
+ tty->ldisc.flush_buffer(tty);
+ tty->closing = 0;
+ spin_lock_irqsave(&xencons_lock, flags);
+ xencons_tty = NULL;
+ spin_unlock_irqrestore(&xencons_lock, flags);
+ }
+}
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
+static struct tty_operations xencons_ops = {
+ .open = xencons_open,
+ .close = xencons_close,
+ .write = xencons_write,
+ .write_room = xencons_write_room,
+ .put_char = xencons_put_char,
+ .flush_chars = xencons_flush_chars,
+ .chars_in_buffer = xencons_chars_in_buffer,
+ .send_xchar = xencons_send_xchar,
+ .flush_buffer = xencons_flush_buffer,
+ .throttle = xencons_throttle,
+ .unthrottle = xencons_unthrottle,
+ .wait_until_sent = xencons_wait_until_sent,
+};
+
+#ifdef CONFIG_XEN_PRIVILEGED_GUEST
+static const char *xennullcon_startup(void)
+{
+ return NULL;
+}
+
+static int xennullcon_dummy(void)
+{
+ return 0;
+}
+
+#define DUMMY (void *)xennullcon_dummy
+
+/*
+ * The console `switch' structure for the dummy console
+ *
+ * Most of the operations are dummies.
+ */
+
+const struct consw xennull_con = {
+ .owner = THIS_MODULE,
+ .con_startup = xennullcon_startup,
+ .con_init = DUMMY,
+ .con_deinit = DUMMY,
+ .con_clear = DUMMY,
+ .con_putc = DUMMY,
+ .con_putcs = DUMMY,
+ .con_cursor = DUMMY,
+ .con_scroll = DUMMY,
+ .con_bmove = DUMMY,
+ .con_switch = DUMMY,
+ .con_blank = DUMMY,
+ .con_font_set = DUMMY,
+ .con_font_get = DUMMY,
+ .con_font_default = DUMMY,
+ .con_font_copy = DUMMY,
+ .con_set_palette = DUMMY,
+ .con_scrolldelta = DUMMY,
+};
+#endif
+#endif
+
+static int __init xencons_init(void)
+{
++ int rc;
++
+ if ( xc_mode == XC_OFF )
+ return 0;
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
+ xencons_driver = alloc_tty_driver((xc_mode == XC_SERIAL) ?
+ 1 : MAX_NR_CONSOLES);
+ if ( xencons_driver == NULL )
+ return -ENOMEM;
+#else
+ memset(&xencons_driver, 0, sizeof(struct tty_driver));
+ xencons_driver.magic = TTY_DRIVER_MAGIC;
+ xencons_driver.refcount = &xencons_refcount;
+ xencons_driver.table = xencons_table;
+ xencons_driver.num = (xc_mode == XC_SERIAL) ? 1 : MAX_NR_CONSOLES;
+#endif
+
+ DRV(xencons_driver)->major = TTY_MAJOR;
+ DRV(xencons_driver)->type = TTY_DRIVER_TYPE_SERIAL;
+ DRV(xencons_driver)->subtype = SERIAL_TYPE_NORMAL;
+ DRV(xencons_driver)->init_termios = tty_std_termios;
+ DRV(xencons_driver)->flags =
+ TTY_DRIVER_REAL_RAW | TTY_DRIVER_RESET_TERMIOS | TTY_DRIVER_NO_DEVFS;
+ DRV(xencons_driver)->termios = xencons_termios;
+ DRV(xencons_driver)->termios_locked = xencons_termios_locked;
+
+ if ( xc_mode == XC_SERIAL )
+ {
+ DRV(xencons_driver)->name = "ttyS";
+ DRV(xencons_driver)->minor_start = 64;
+ DRV(xencons_driver)->name_base = 0;
+ }
+ else
+ {
+ DRV(xencons_driver)->name = "tty";
+ DRV(xencons_driver)->minor_start = 1;
+ DRV(xencons_driver)->name_base = 1;
+ }
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
+ tty_set_operations(xencons_driver, &xencons_ops);
+#else
+ xencons_driver.open = xencons_open;
+ xencons_driver.close = xencons_close;
+ xencons_driver.write = xencons_write;
+ xencons_driver.write_room = xencons_write_room;
+ xencons_driver.put_char = xencons_put_char;
+ xencons_driver.flush_chars = xencons_flush_chars;
+ xencons_driver.chars_in_buffer = xencons_chars_in_buffer;
+ xencons_driver.send_xchar = xencons_send_xchar;
+ xencons_driver.flush_buffer = xencons_flush_buffer;
+ xencons_driver.throttle = xencons_throttle;
+ xencons_driver.unthrottle = xencons_unthrottle;
+ xencons_driver.wait_until_sent = xencons_wait_until_sent;
+#endif
+
++ if ( (rc = tty_register_driver(DRV(xencons_driver))) != 0 )
++ {
++ printk("Couldn't register Xen virtual console driver as %s\n",
++ DRV(xencons_driver)->name);
++ return rc;
++ }
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
+ tty_register_device(xencons_driver, 0, NULL);
+#endif
+
+ if ( xen_start_info.flags & SIF_INITDOMAIN )
+ {
+ xencons_priv_irq = bind_virq_to_irq(VIRQ_CONSOLE);
+ (void)request_irq(xencons_priv_irq,
+ xencons_priv_interrupt, 0, "console", NULL);
+ }
+ else
+ {
+ (void)ctrl_if_register_receiver(CMSG_CONSOLE, xencons_rx, 0);
+ }
+
+ printk("Xen virtual console successfully installed as %s\n",
+ DRV(xencons_driver)->name);
+
+ return 0;
+}
+
+static void __exit xencons_fini(void)
+{
+ int ret;
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
+ tty_unregister_device(xencons_driver, 0);
+#endif
+
+ if ( (ret = tty_unregister_driver(DRV(xencons_driver))) != 0 )
+ printk(KERN_ERR "Unable to unregister Xen console driver: %d\n", ret);
+
+ if ( xen_start_info.flags & SIF_INITDOMAIN )
+ {
+ free_irq(xencons_priv_irq, NULL);
+ unbind_virq_from_irq(VIRQ_CONSOLE);
+ }
+ else
+ {
+ ctrl_if_unregister_receiver(CMSG_CONSOLE, xencons_rx);
+ }
+}
+
+module_init(xencons_init);
+module_exit(xencons_fini);
--- /dev/null
- phys_to_machine_mapping[virt_to_phys(skb->head) >> PAGE_SHIFT]
+/******************************************************************************
+ * Virtual network driver for conversing with remote driver backends.
+ *
+ * Copyright (c) 2002-2004, K A Fraser
+ *
+ * This file may be distributed separately from the Linux kernel, or
+ * incorporated into other software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/version.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/netdevice.h>
+#include <linux/inetdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/skbuff.h>
+#include <linux/init.h>
+#include <linux/bitops.h>
+#include <net/sock.h>
+#include <net/pkt_sched.h>
+#include <asm/io.h>
+#include <asm-xen/evtchn.h>
+#include <asm-xen/ctrl_if.h>
+#include <asm-xen/xen-public/io/netif.h>
+#include <asm/page.h>
+
+#include <net/arp.h>
+#include <net/route.h>
+
+#define DEBUG 0
+
+#ifndef __GFP_NOWARN
+#define __GFP_NOWARN 0
+#endif
+#define alloc_xen_skb(_l) __dev_alloc_skb((_l), GFP_ATOMIC|__GFP_NOWARN)
+
+#define init_skb_shinfo(_skb) \
+ do { \
+ atomic_set(&(skb_shinfo(_skb)->dataref), 1); \
+ skb_shinfo(_skb)->nr_frags = 0; \
+ skb_shinfo(_skb)->frag_list = NULL; \
+ } while ( 0 )
+
+/* Allow headroom on each rx pkt for Ethernet header, alignment padding, ... */
+#define RX_HEADROOM 200
+
+/*
+ * If the backend driver is pipelining transmit requests then we can be very
+ * aggressive in avoiding new-packet notifications -- only need to send a
+ * notification if there are no outstanding unreceived responses.
+ * If the backend may be buffering our transmit buffers for any reason then we
+ * are rather more conservative.
+ */
+#ifdef CONFIG_XEN_NETDEV_FRONTEND_PIPELINED_TRANSMITTER
+#define TX_TEST_IDX resp_prod /* aggressive: any outstanding responses? */
+#else
+#define TX_TEST_IDX req_cons /* conservative: not seen all our requests? */
+#endif
+
+static void network_tx_buf_gc(struct net_device *dev);
+static void network_alloc_rx_buffers(struct net_device *dev);
+
+static unsigned long rx_pfn_array[NETIF_RX_RING_SIZE];
+static multicall_entry_t rx_mcl[NETIF_RX_RING_SIZE+1];
+static mmu_update_t rx_mmu[NETIF_RX_RING_SIZE];
+
+static struct list_head dev_list;
+
+struct net_private
+{
+ struct list_head list;
+ struct net_device *dev;
+
+ struct net_device_stats stats;
+ NETIF_RING_IDX rx_resp_cons, tx_resp_cons;
+ unsigned int tx_full;
+
+ netif_tx_interface_t *tx;
+ netif_rx_interface_t *rx;
+
+ spinlock_t tx_lock;
+ spinlock_t rx_lock;
+
+ unsigned int handle;
+ unsigned int evtchn;
+ unsigned int irq;
+
+ /* What is the status of our connection to the remote backend? */
+#define BEST_CLOSED 0
+#define BEST_DISCONNECTED 1
+#define BEST_CONNECTED 2
+ unsigned int backend_state;
+
+ /* Is this interface open or closed (down or up)? */
+#define UST_CLOSED 0
+#define UST_OPEN 1
+ unsigned int user_state;
+
+ /* Receive-ring batched refills. */
+#define RX_MIN_TARGET 8
+#define RX_MAX_TARGET NETIF_RX_RING_SIZE
+ int rx_target;
+ struct sk_buff_head rx_batch;
+
+ /*
+ * {tx,rx}_skbs store outstanding skbuffs. The first entry in each
+ * array is an index into a chain of free entries.
+ */
+ struct sk_buff *tx_skbs[NETIF_TX_RING_SIZE+1];
+ struct sk_buff *rx_skbs[NETIF_RX_RING_SIZE+1];
+};
+
+/* Access macros for acquiring freeing slots in {tx,rx}_skbs[]. */
+#define ADD_ID_TO_FREELIST(_list, _id) \
+ (_list)[(_id)] = (_list)[0]; \
+ (_list)[0] = (void *)(unsigned long)(_id);
+#define GET_ID_FROM_FREELIST(_list) \
+ ({ unsigned long _id = (unsigned long)(_list)[0]; \
+ (_list)[0] = (_list)[_id]; \
+ (unsigned short)_id; })
+
+static char *status_name[] = {
+ [NETIF_INTERFACE_STATUS_CLOSED] = "closed",
+ [NETIF_INTERFACE_STATUS_DISCONNECTED] = "disconnected",
+ [NETIF_INTERFACE_STATUS_CONNECTED] = "connected",
+ [NETIF_INTERFACE_STATUS_CHANGED] = "changed",
+};
+
+static char *be_state_name[] = {
+ [BEST_CLOSED] = "closed",
+ [BEST_DISCONNECTED] = "disconnected",
+ [BEST_CONNECTED] = "connected",
+};
+
+#if DEBUG
+#define DPRINTK(fmt, args...) \
+ printk(KERN_ALERT "xen_net (%s:%d) " fmt, __FUNCTION__, __LINE__, ##args)
+#else
+#define DPRINTK(fmt, args...) ((void)0)
+#endif
+#define IPRINTK(fmt, args...) \
+ printk(KERN_INFO "xen_net: " fmt, ##args)
+#define WPRINTK(fmt, args...) \
+ printk(KERN_WARNING "xen_net: " fmt, ##args)
+
+static struct net_device *find_dev_by_handle(unsigned int handle)
+{
+ struct list_head *ent;
+ struct net_private *np;
+ list_for_each ( ent, &dev_list )
+ {
+ np = list_entry(ent, struct net_private, list);
+ if ( np->handle == handle )
+ return np->dev;
+ }
+ return NULL;
+}
+
+/** Network interface info. */
+struct netif_ctrl {
+ /** Number of interfaces. */
+ int interface_n;
+ /** Number of connected interfaces. */
+ int connected_n;
+ /** Error code. */
+ int err;
+ int up;
+};
+
+static struct netif_ctrl netctrl;
+
+static void netctrl_init(void)
+{
+ memset(&netctrl, 0, sizeof(netctrl));
+ netctrl.up = NETIF_DRIVER_STATUS_DOWN;
+}
+
+/** Get or set a network interface error.
+ */
+static int netctrl_err(int err)
+{
+ if ( (err < 0) && !netctrl.err )
+ netctrl.err = err;
+ return netctrl.err;
+}
+
+/** Test if all network interfaces are connected.
+ *
+ * @return 1 if all connected, 0 if not, negative error code otherwise
+ */
+static int netctrl_connected(void)
+{
+ int ok;
+
+ if ( netctrl.err )
+ ok = netctrl.err;
+ else if ( netctrl.up == NETIF_DRIVER_STATUS_UP )
+ ok = (netctrl.connected_n == netctrl.interface_n);
+ else
+ ok = 0;
+
+ return ok;
+}
+
+/** Count the connected network interfaces.
+ *
+ * @return connected count
+ */
+static int netctrl_connected_count(void)
+{
+
+ struct list_head *ent;
+ struct net_private *np;
+ unsigned int connected;
+
+ connected = 0;
+
+ list_for_each(ent, &dev_list) {
+ np = list_entry(ent, struct net_private, list);
+ if (np->backend_state == BEST_CONNECTED)
+ connected++;
+ }
+
+ netctrl.connected_n = connected;
+ DPRINTK("> connected_n=%d interface_n=%d\n",
+ netctrl.connected_n, netctrl.interface_n);
+ return connected;
+}
+
+/** Send a packet on a net device to encourage switches to learn the
+ * MAC. We send a fake ARP request.
+ *
+ * @param dev device
+ * @return 0 on success, error code otherwise
+ */
+static int vif_wake(struct net_device *dev)
+{
+ struct sk_buff *skb;
+ u32 src_ip, dst_ip;
+
+ dst_ip = INADDR_BROADCAST;
+ src_ip = inet_select_addr(dev, dst_ip, RT_SCOPE_LINK);
+
+ skb = arp_create(ARPOP_REPLY, ETH_P_ARP,
+ dst_ip, dev, src_ip,
+ /*dst_hw*/ NULL, /*src_hw*/ NULL,
+ /*target_hw*/ dev->dev_addr);
+ if ( skb == NULL )
+ return -ENOMEM;
+
+ return dev_queue_xmit(skb);
+}
+
+static int network_open(struct net_device *dev)
+{
+ struct net_private *np = dev->priv;
+
+ memset(&np->stats, 0, sizeof(np->stats));
+
+ np->user_state = UST_OPEN;
+
+ network_alloc_rx_buffers(dev);
+ np->rx->event = np->rx_resp_cons + 1;
+
+ netif_start_queue(dev);
+
+ return 0;
+}
+
+static void network_tx_buf_gc(struct net_device *dev)
+{
+ NETIF_RING_IDX i, prod;
+ unsigned short id;
+ struct net_private *np = dev->priv;
+ struct sk_buff *skb;
+
+ if ( np->backend_state != BEST_CONNECTED )
+ return;
+
+ do {
+ prod = np->tx->resp_prod;
+ rmb(); /* Ensure we see responses up to 'rp'. */
+
+ for ( i = np->tx_resp_cons; i != prod; i++ )
+ {
+ id = np->tx->ring[MASK_NETIF_TX_IDX(i)].resp.id;
+ skb = np->tx_skbs[id];
+ ADD_ID_TO_FREELIST(np->tx_skbs, id);
+ dev_kfree_skb_irq(skb);
+ }
+
+ np->tx_resp_cons = prod;
+
+ /*
+ * Set a new event, then check for race with update of tx_cons. Note
+ * that it is essential to schedule a callback, no matter how few
+ * buffers are pending. Even if there is space in the transmit ring,
+ * higher layers may be blocked because too much data is outstanding:
+ * in such cases notification from Xen is likely to be the only kick
+ * that we'll get.
+ */
+ np->tx->event =
+ prod + ((np->tx->req_prod - prod) >> 1) + 1;
+ mb();
+ }
+ while ( prod != np->tx->resp_prod );
+
+ if ( np->tx_full &&
+ ((np->tx->req_prod - prod) < NETIF_TX_RING_SIZE) )
+ {
+ np->tx_full = 0;
+ if ( np->user_state == UST_OPEN )
+ netif_wake_queue(dev);
+ }
+}
+
+
+static void network_alloc_rx_buffers(struct net_device *dev)
+{
+ unsigned short id;
+ struct net_private *np = dev->priv;
+ struct sk_buff *skb;
+ int i, batch_target;
+ NETIF_RING_IDX req_prod = np->rx->req_prod;
+
+ if ( unlikely(np->backend_state != BEST_CONNECTED) )
+ return;
+
+ /*
+ * Allocate skbuffs greedily, even though we batch updates to the
+ * receive ring. This creates a less bursty demand on the memory allocator,
+ * so should reduce the chance of failed allocation requests both for
+ * ourself and for other kernel subsystems.
+ */
+ batch_target = np->rx_target - (req_prod - np->rx_resp_cons);
+ for ( i = skb_queue_len(&np->rx_batch); i < batch_target; i++ )
+ {
+ if ( unlikely((skb = alloc_xen_skb(dev->mtu + RX_HEADROOM)) == NULL) )
+ break;
+ __skb_queue_tail(&np->rx_batch, skb);
+ }
+
+ /* Is the batch large enough to be worthwhile? */
+ if ( i < (np->rx_target/2) )
+ return;
+
+ for ( i = 0; ; i++ )
+ {
+ if ( (skb = __skb_dequeue(&np->rx_batch)) == NULL )
+ break;
+
+ skb->dev = dev;
+
+ id = GET_ID_FROM_FREELIST(np->rx_skbs);
+
+ np->rx_skbs[id] = skb;
+
+ np->rx->ring[MASK_NETIF_RX_IDX(req_prod + i)].req.id = id;
+
+ rx_pfn_array[i] = virt_to_machine(skb->head) >> PAGE_SHIFT;
+
+ /* Remove this page from pseudo phys map before passing back to Xen. */
++ phys_to_machine_mapping[__pa(skb->head) >> PAGE_SHIFT]
+ = INVALID_P2M_ENTRY;
+
+ rx_mcl[i].op = __HYPERVISOR_update_va_mapping;
+ rx_mcl[i].args[0] = (unsigned long)skb->head >> PAGE_SHIFT;
+ rx_mcl[i].args[1] = 0;
+ rx_mcl[i].args[2] = 0;
+ }
+
+ /*
+ * We may have allocated buffers which have entries outstanding in the page
+ * update queue -- make sure we flush those first!
+ */
+ flush_page_update_queue();
+
+ /* After all PTEs have been zapped we blow away stale TLB entries. */
+ rx_mcl[i-1].args[2] = UVMF_FLUSH_TLB;
+
+ /* Give away a batch of pages. */
+ rx_mcl[i].op = __HYPERVISOR_dom_mem_op;
+ rx_mcl[i].args[0] = MEMOP_decrease_reservation;
+ rx_mcl[i].args[1] = (unsigned long)rx_pfn_array;
+ rx_mcl[i].args[2] = (unsigned long)i;
+ rx_mcl[i].args[3] = 0;
+ rx_mcl[i].args[4] = DOMID_SELF;
+
+ /* Zap PTEs and give away pages in one big multicall. */
+ (void)HYPERVISOR_multicall(rx_mcl, i+1);
+
+ /* Check return status of HYPERVISOR_dom_mem_op(). */
+ if ( unlikely(rx_mcl[i].args[5] != i) )
+ panic("Unable to reduce memory reservation\n");
+
+ /* Above is a suitable barrier to ensure backend will see requests. */
+ np->rx->req_prod = req_prod + i;
+
+ /* Adjust our floating fill target if we risked running out of buffers. */
+ if ( ((req_prod - np->rx->resp_prod) < (np->rx_target / 4)) &&
+ ((np->rx_target *= 2) > RX_MAX_TARGET) )
+ np->rx_target = RX_MAX_TARGET;
+}
+
+
+static int network_start_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+ unsigned short id;
+ struct net_private *np = (struct net_private *)dev->priv;
+ netif_tx_request_t *tx;
+ NETIF_RING_IDX i;
+
+ if ( unlikely(np->tx_full) )
+ {
+ printk(KERN_ALERT "%s: full queue wasn't stopped!\n", dev->name);
+ netif_stop_queue(dev);
+ goto drop;
+ }
+
+ if ( unlikely((((unsigned long)skb->data & ~PAGE_MASK) + skb->len) >=
+ PAGE_SIZE) )
+ {
+ struct sk_buff *nskb;
+ if ( unlikely((nskb = alloc_xen_skb(skb->len)) == NULL) )
+ goto drop;
+ skb_put(nskb, skb->len);
+ memcpy(nskb->data, skb->data, skb->len);
+ nskb->dev = skb->dev;
+ dev_kfree_skb(skb);
+ skb = nskb;
+ }
+
+ spin_lock_irq(&np->tx_lock);
+
+ if ( np->backend_state != BEST_CONNECTED )
+ {
+ spin_unlock_irq(&np->tx_lock);
+ goto drop;
+ }
+
+ i = np->tx->req_prod;
+
+ id = GET_ID_FROM_FREELIST(np->tx_skbs);
+ np->tx_skbs[id] = skb;
+
+ tx = &np->tx->ring[MASK_NETIF_TX_IDX(i)].req;
+
+ tx->id = id;
+ tx->addr = virt_to_machine(skb->data);
+ tx->size = skb->len;
+
+ wmb(); /* Ensure that backend will see the request. */
+ np->tx->req_prod = i + 1;
+
+ network_tx_buf_gc(dev);
+
+ if ( (i - np->tx_resp_cons) == (NETIF_TX_RING_SIZE - 1) )
+ {
+ np->tx_full = 1;
+ netif_stop_queue(dev);
+ }
+
+ spin_unlock_irq(&np->tx_lock);
+
+ np->stats.tx_bytes += skb->len;
+ np->stats.tx_packets++;
+
+ /* Only notify Xen if we really have to. */
+ mb();
+ if ( np->tx->TX_TEST_IDX == i )
+ notify_via_evtchn(np->evtchn);
+
+ return 0;
+
+ drop:
+ np->stats.tx_dropped++;
+ dev_kfree_skb(skb);
+ return 0;
+}
+
+
+static irqreturn_t netif_int(int irq, void *dev_id, struct pt_regs *ptregs)
+{
+ struct net_device *dev = dev_id;
+ struct net_private *np = dev->priv;
+ unsigned long flags;
+
+ spin_lock_irqsave(&np->tx_lock, flags);
+ network_tx_buf_gc(dev);
+ spin_unlock_irqrestore(&np->tx_lock, flags);
+
+ if ( (np->rx_resp_cons != np->rx->resp_prod) &&
+ (np->user_state == UST_OPEN) )
+ netif_rx_schedule(dev);
+
+ return IRQ_HANDLED;
+}
+
+
+static int netif_poll(struct net_device *dev, int *pbudget)
+{
+ struct net_private *np = dev->priv;
+ struct sk_buff *skb, *nskb;
+ netif_rx_response_t *rx;
+ NETIF_RING_IDX i, rp;
+ mmu_update_t *mmu = rx_mmu;
+ multicall_entry_t *mcl = rx_mcl;
+ int work_done, budget, more_to_do = 1;
+ struct sk_buff_head rxq;
+ unsigned long flags;
+
+ spin_lock(&np->rx_lock);
+
+ if ( np->backend_state != BEST_CONNECTED )
+ {
+ spin_unlock(&np->rx_lock);
+ return 0;
+ }
+
+ skb_queue_head_init(&rxq);
+
+ if ( (budget = *pbudget) > dev->quota )
+ budget = dev->quota;
+
+ rp = np->rx->resp_prod;
+ rmb(); /* Ensure we see queued responses up to 'rp'. */
+
+ for ( i = np->rx_resp_cons, work_done = 0;
+ (i != rp) && (work_done < budget);
+ i++, work_done++ )
+ {
+ rx = &np->rx->ring[MASK_NETIF_RX_IDX(i)].resp;
+
+ /*
+ * An error here is very odd. Usually indicates a backend bug,
+ * low-memory condition, or that we didn't have reservation headroom.
+ * Whatever - print an error and queue the id again straight away.
+ */
+ if ( unlikely(rx->status <= 0) )
+ {
+ printk(KERN_ALERT "bad buffer on RX ring!(%d)\n", rx->status);
+ np->rx->ring[MASK_NETIF_RX_IDX(np->rx->req_prod)].req.id = rx->id;
+ wmb();
+ np->rx->req_prod++;
+ continue;
+ }
+
+ skb = np->rx_skbs[rx->id];
+ ADD_ID_TO_FREELIST(np->rx_skbs, rx->id);
+
+ /* NB. We handle skb overflow later. */
+ skb->data = skb->head + (rx->addr & ~PAGE_MASK);
+ skb->len = rx->status;
+ skb->tail = skb->data + skb->len;
+
+ np->stats.rx_packets++;
+ np->stats.rx_bytes += rx->status;
+
+ /* Remap the page. */
+ mmu->ptr = (rx->addr & PAGE_MASK) | MMU_MACHPHYS_UPDATE;
+ mmu->val = __pa(skb->head) >> PAGE_SHIFT;
+ mmu++;
+ mcl->op = __HYPERVISOR_update_va_mapping;
+ mcl->args[0] = (unsigned long)skb->head >> PAGE_SHIFT;
+ mcl->args[1] = (rx->addr & PAGE_MASK) | __PAGE_KERNEL;
+ mcl->args[2] = 0;
+ mcl++;
+
+ phys_to_machine_mapping[__pa(skb->head) >> PAGE_SHIFT] =
+ rx->addr >> PAGE_SHIFT;
+
+ __skb_queue_tail(&rxq, skb);
+ }
+
+ /* Do all the remapping work, and M->P updates, in one big hypercall. */
+ if ( likely((mcl - rx_mcl) != 0) )
+ {
+ mcl->op = __HYPERVISOR_mmu_update;
+ mcl->args[0] = (unsigned long)rx_mmu;
+ mcl->args[1] = mmu - rx_mmu;
+ mcl->args[2] = 0;
+ mcl++;
+ (void)HYPERVISOR_multicall(rx_mcl, mcl - rx_mcl);
+ }
+
+ while ( (skb = __skb_dequeue(&rxq)) != NULL )
+ {
+ /*
+ * Enough room in skbuff for the data we were passed? Also, Linux
+ * expects at least 16 bytes headroom in each receive buffer.
+ */
+ if ( unlikely(skb->tail > skb->end) ||
+ unlikely((skb->data - skb->head) < 16) )
+ {
+ nskb = NULL;
+
+ /* Only copy the packet if it fits in the current MTU. */
+ if ( skb->len <= (dev->mtu + ETH_HLEN) )
+ {
+ if ( (skb->tail > skb->end) && net_ratelimit() )
+ printk(KERN_INFO "Received packet needs %d bytes more "
+ "headroom.\n", skb->tail - skb->end);
+
+ if ( (nskb = alloc_xen_skb(skb->len + 2)) != NULL )
+ {
+ skb_reserve(nskb, 2);
+ skb_put(nskb, skb->len);
+ memcpy(nskb->data, skb->data, skb->len);
+ nskb->dev = skb->dev;
+ }
+ }
+ else if ( net_ratelimit() )
+ printk(KERN_INFO "Received packet too big for MTU "
+ "(%d > %d)\n", skb->len - ETH_HLEN, dev->mtu);
+
+ /* Reinitialise and then destroy the old skbuff. */
+ skb->len = 0;
+ skb->tail = skb->data;
+ init_skb_shinfo(skb);
+ dev_kfree_skb(skb);
+
+ /* Switch old for new, if we copied the buffer. */
+ if ( (skb = nskb) == NULL )
+ continue;
+ }
+
+ /* Set the shared-info area, which is hidden behind the real data. */
+ init_skb_shinfo(skb);
+
+ /* Ethernet-specific work. Delayed to here as it peeks the header. */
+ skb->protocol = eth_type_trans(skb, dev);
+
+ /* Pass it up. */
+ netif_receive_skb(skb);
+ dev->last_rx = jiffies;
+ }
+
+ np->rx_resp_cons = i;
+
+ /* If we get a callback with very few responses, reduce fill target. */
+ /* NB. Note exponential increase, linear decrease. */
+ if ( ((np->rx->req_prod - np->rx->resp_prod) > ((3*np->rx_target) / 4)) &&
+ (--np->rx_target < RX_MIN_TARGET) )
+ np->rx_target = RX_MIN_TARGET;
+
+ network_alloc_rx_buffers(dev);
+
+ *pbudget -= work_done;
+ dev->quota -= work_done;
+
+ if ( work_done < budget )
+ {
+ local_irq_save(flags);
+
+ np->rx->event = i + 1;
+
+ /* Deal with hypervisor racing our resetting of rx_event. */
+ mb();
+ if ( np->rx->resp_prod == i )
+ {
+ __netif_rx_complete(dev);
+ more_to_do = 0;
+ }
+
+ local_irq_restore(flags);
+ }
+
+ spin_unlock(&np->rx_lock);
+
+ return more_to_do;
+}
+
+
+static int network_close(struct net_device *dev)
+{
+ struct net_private *np = dev->priv;
+ np->user_state = UST_CLOSED;
+ netif_stop_queue(np->dev);
+ return 0;
+}
+
+
+static struct net_device_stats *network_get_stats(struct net_device *dev)
+{
+ struct net_private *np = (struct net_private *)dev->priv;
+ return &np->stats;
+}
+
+
+static void network_connect(struct net_device *dev,
+ netif_fe_interface_status_t *status)
+{
+ struct net_private *np;
+ int i, requeue_idx;
+ netif_tx_request_t *tx;
+
+ np = dev->priv;
+ spin_lock_irq(&np->tx_lock);
+ spin_lock(&np->rx_lock);
+
+ /* Recovery procedure: */
+
+ /* Step 1: Reinitialise variables. */
+ np->rx_resp_cons = np->tx_resp_cons = np->tx_full = 0;
+ np->rx->event = np->tx->event = 1;
+
+ /* Step 2: Rebuild the RX and TX ring contents.
+ * NB. We could just free the queued TX packets now but we hope
+ * that sending them out might do some good. We have to rebuild
+ * the RX ring because some of our pages are currently flipped out
+ * so we can't just free the RX skbs.
+ * NB2. Freelist index entries are always going to be less than
+ * __PAGE_OFFSET, whereas pointers to skbs will always be equal or
+ * greater than __PAGE_OFFSET: we use this property to distinguish
+ * them.
+ */
+
+ /* Rebuild the TX buffer freelist and the TX ring itself.
+ * NB. This reorders packets. We could keep more private state
+ * to avoid this but maybe it doesn't matter so much given the
+ * interface has been down.
+ */
+ for ( requeue_idx = 0, i = 1; i <= NETIF_TX_RING_SIZE; i++ )
+ {
+ if ( (unsigned long)np->tx_skbs[i] >= __PAGE_OFFSET )
+ {
+ struct sk_buff *skb = np->tx_skbs[i];
+
+ tx = &np->tx->ring[requeue_idx++].req;
+
+ tx->id = i;
+ tx->addr = virt_to_machine(skb->data);
+ tx->size = skb->len;
+
+ np->stats.tx_bytes += skb->len;
+ np->stats.tx_packets++;
+ }
+ }
+ wmb();
+ np->tx->req_prod = requeue_idx;
+
+ /* Rebuild the RX buffer freelist and the RX ring itself. */
+ for ( requeue_idx = 0, i = 1; i <= NETIF_RX_RING_SIZE; i++ )
+ if ( (unsigned long)np->rx_skbs[i] >= __PAGE_OFFSET )
+ np->rx->ring[requeue_idx++].req.id = i;
+ wmb();
+ np->rx->req_prod = requeue_idx;
+
+ /* Step 3: All public and private state should now be sane. Get
+ * ready to start sending and receiving packets and give the driver
+ * domain a kick because we've probably just requeued some
+ * packets.
+ */
+ np->backend_state = BEST_CONNECTED;
+ wmb();
+ notify_via_evtchn(status->evtchn);
+ network_tx_buf_gc(dev);
+
+ if ( np->user_state == UST_OPEN )
+ netif_start_queue(dev);
+
+ spin_unlock(&np->rx_lock);
+ spin_unlock_irq(&np->tx_lock);
+}
+
+static void vif_show(struct net_private *np)
+{
+#if DEBUG
+ if (np) {
+ IPRINTK("<vif handle=%u %s(%s) evtchn=%u irq=%u tx=%p rx=%p>\n",
+ np->handle,
+ be_state_name[np->backend_state],
+ np->user_state ? "open" : "closed",
+ np->evtchn,
+ np->irq,
+ np->tx,
+ np->rx);
+ } else {
+ IPRINTK("<vif NULL>\n");
+ }
+#endif
+}
+
+/* Send a connect message to xend to tell it to bring up the interface. */
+static void send_interface_connect(struct net_private *np)
+{
+ ctrl_msg_t cmsg = {
+ .type = CMSG_NETIF_FE,
+ .subtype = CMSG_NETIF_FE_INTERFACE_CONNECT,
+ .length = sizeof(netif_fe_interface_connect_t),
+ };
+ netif_fe_interface_connect_t *msg = (void*)cmsg.msg;
+
+ DPRINTK(">\n"); vif_show(np);
+ msg->handle = np->handle;
+ msg->tx_shmem_frame = (virt_to_machine(np->tx) >> PAGE_SHIFT);
+ msg->rx_shmem_frame = (virt_to_machine(np->rx) >> PAGE_SHIFT);
+
+ ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
+ DPRINTK("<\n");
+}
+
+/* Send a driver status notification to the domain controller. */
+static int send_driver_status(int ok)
+{
+ int err = 0;
+ ctrl_msg_t cmsg = {
+ .type = CMSG_NETIF_FE,
+ .subtype = CMSG_NETIF_FE_DRIVER_STATUS,
+ .length = sizeof(netif_fe_driver_status_t),
+ };
+ netif_fe_driver_status_t *msg = (void*)cmsg.msg;
+
+ msg->status = (ok ? NETIF_DRIVER_STATUS_UP : NETIF_DRIVER_STATUS_DOWN);
+ err = ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
+ return err;
+}
+
+/* Stop network device and free tx/rx queues and irq.
+ */
+static void vif_release(struct net_private *np)
+{
+ /* Stop old i/f to prevent errors whilst we rebuild the state. */
+ spin_lock_irq(&np->tx_lock);
+ spin_lock(&np->rx_lock);
+ netif_stop_queue(np->dev);
+ /* np->backend_state = BEST_DISCONNECTED; */
+ spin_unlock(&np->rx_lock);
+ spin_unlock_irq(&np->tx_lock);
+
+ /* Free resources. */
+ if(np->tx != NULL){
+ free_irq(np->irq, np->dev);
+ unbind_evtchn_from_irq(np->evtchn);
+ free_page((unsigned long)np->tx);
+ free_page((unsigned long)np->rx);
+ np->irq = 0;
+ np->evtchn = 0;
+ np->tx = NULL;
+ np->rx = NULL;
+ }
+}
+
+/* Release vif resources and close it down completely.
+ */
+static void vif_close(struct net_private *np)
+{
+ DPRINTK(">\n"); vif_show(np);
+ WPRINTK("Unexpected netif-CLOSED message in state %s\n",
+ be_state_name[np->backend_state]);
+ vif_release(np);
+ np->backend_state = BEST_CLOSED;
+ /* todo: take dev down and free. */
+ vif_show(np); DPRINTK("<\n");
+}
+
+/* Move the vif into disconnected state.
+ * Allocates tx/rx pages.
+ * Sends connect message to xend.
+ */
+static void vif_disconnect(struct net_private *np){
+ DPRINTK(">\n");
+ if(np->tx) free_page((unsigned long)np->tx);
+ if(np->rx) free_page((unsigned long)np->rx);
+ // Before this np->tx and np->rx had better be null.
+ np->tx = (netif_tx_interface_t *)__get_free_page(GFP_KERNEL);
+ np->rx = (netif_rx_interface_t *)__get_free_page(GFP_KERNEL);
+ memset(np->tx, 0, PAGE_SIZE);
+ memset(np->rx, 0, PAGE_SIZE);
+ np->backend_state = BEST_DISCONNECTED;
+ send_interface_connect(np);
+ vif_show(np); DPRINTK("<\n");
+}
+
+/* Begin interface recovery.
+ *
+ * NB. Whilst we're recovering, we turn the carrier state off. We
+ * take measures to ensure that this device isn't used for
+ * anything. We also stop the queue for this device. Various
+ * different approaches (e.g. continuing to buffer packets) have
+ * been tested but don't appear to improve the overall impact on
+ * TCP connections.
+ *
+ * TODO: (MAW) Change the Xend<->Guest protocol so that a recovery
+ * is initiated by a special "RESET" message - disconnect could
+ * just mean we're not allowed to use this interface any more.
+ */
+static void
+vif_reset(
+ struct net_private *np)
+{
+ DPRINTK(">\n");
+ IPRINTK("Attempting to reconnect network interface: handle=%u\n",
+ np->handle);
+ vif_release(np);
+ vif_disconnect(np);
+ vif_show(np); DPRINTK("<\n");
+}
+
+/* Move the vif into connected state.
+ * Sets the mac and event channel from the message.
+ * Binds the irq to the event channel.
+ */
+static void
+vif_connect(
+ struct net_private *np, netif_fe_interface_status_t *status)
+{
+ struct net_device *dev = np->dev;
+ DPRINTK(">\n");
+ memcpy(dev->dev_addr, status->mac, ETH_ALEN);
+ network_connect(dev, status);
+ np->evtchn = status->evtchn;
+ np->irq = bind_evtchn_to_irq(np->evtchn);
+ (void)request_irq(np->irq, netif_int, SA_SAMPLE_RANDOM, dev->name, dev);
+ netctrl_connected_count();
+ vif_wake(dev);
+ vif_show(np); DPRINTK("<\n");
+}
+
+
+/** Create a network device.
+ * @param handle device handle
+ * @param val return parameter for created device
+ * @return 0 on success, error code otherwise
+ */
+static int create_netdev(int handle, struct net_device **val)
+{
+ int i, err = 0;
+ struct net_device *dev = NULL;
+ struct net_private *np = NULL;
+
+ if ( (dev = alloc_etherdev(sizeof(struct net_private))) == NULL )
+ {
+ printk(KERN_WARNING "%s> alloc_etherdev failed.\n", __FUNCTION__);
+ err = -ENOMEM;
+ goto exit;
+ }
+
+ np = dev->priv;
+ np->backend_state = BEST_CLOSED;
+ np->user_state = UST_CLOSED;
+ np->handle = handle;
+
+ spin_lock_init(&np->tx_lock);
+ spin_lock_init(&np->rx_lock);
+
+ skb_queue_head_init(&np->rx_batch);
+ np->rx_target = RX_MIN_TARGET;
+
+ /* Initialise {tx,rx}_skbs to be a free chain containing every entry. */
+ for ( i = 0; i <= NETIF_TX_RING_SIZE; i++ )
+ np->tx_skbs[i] = (void *)(i+1);
+ for ( i = 0; i <= NETIF_RX_RING_SIZE; i++ )
+ np->rx_skbs[i] = (void *)(i+1);
+
+ dev->open = network_open;
+ dev->hard_start_xmit = network_start_xmit;
+ dev->stop = network_close;
+ dev->get_stats = network_get_stats;
+ dev->poll = netif_poll;
+ dev->weight = 64;
+
+ if ( (err = register_netdev(dev)) != 0 )
+ {
+ printk(KERN_WARNING "%s> register_netdev err=%d\n", __FUNCTION__, err);
+ goto exit;
+ }
+ np->dev = dev;
+ list_add(&np->list, &dev_list);
+
+ exit:
+ if ( (err != 0) && (dev != NULL ) )
+ kfree(dev);
+ else if ( val != NULL )
+ *val = dev;
+ return err;
+}
+
+/* Get the target interface for a status message.
+ * Creates the interface when it makes sense.
+ * The returned interface may be null when there is no error.
+ *
+ * @param status status message
+ * @param np return parameter for interface state
+ * @return 0 on success, error code otherwise
+ */
+static int
+target_vif(
+ netif_fe_interface_status_t *status, struct net_private **np)
+{
+ int err = 0;
+ struct net_device *dev;
+
+ DPRINTK("> handle=%d\n", status->handle);
+ if ( status->handle < 0 )
+ {
+ err = -EINVAL;
+ goto exit;
+ }
+
+ if ( (dev = find_dev_by_handle(status->handle)) != NULL )
+ goto exit;
+
+ if ( status->status == NETIF_INTERFACE_STATUS_CLOSED )
+ goto exit;
+ if ( status->status == NETIF_INTERFACE_STATUS_CHANGED )
+ goto exit;
+
+ /* It's a new interface in a good state - create it. */
+ DPRINTK("> create device...\n");
+ if ( (err = create_netdev(status->handle, &dev)) != 0 )
+ goto exit;
+
+ netctrl.interface_n++;
+
+ exit:
+ if ( np != NULL )
+ *np = ((dev && !err) ? dev->priv : NULL);
+ DPRINTK("< err=%d\n", err);
+ return err;
+}
+
+/* Handle an interface status message. */
+static void netif_interface_status(netif_fe_interface_status_t *status)
+{
+ int err = 0;
+ struct net_private *np = NULL;
+
+ DPRINTK(">\n");
+ DPRINTK("> status=%s handle=%d\n",
+ status_name[status->status], status->handle);
+
+ if ( (err = target_vif(status, &np)) != 0 )
+ {
+ WPRINTK("Invalid netif: handle=%u\n", status->handle);
+ return;
+ }
+
+ if ( np == NULL )
+ {
+ DPRINTK("> no vif\n");
+ return;
+ }
+
+ DPRINTK(">\n"); vif_show(np);
+
+ switch ( status->status )
+ {
+ case NETIF_INTERFACE_STATUS_CLOSED:
+ switch ( np->backend_state )
+ {
+ case BEST_CLOSED:
+ case BEST_DISCONNECTED:
+ case BEST_CONNECTED:
+ vif_close(np);
+ break;
+ }
+ break;
+
+ case NETIF_INTERFACE_STATUS_DISCONNECTED:
+ switch ( np->backend_state )
+ {
+ case BEST_CLOSED:
+ vif_disconnect(np);
+ break;
+ case BEST_DISCONNECTED:
+ case BEST_CONNECTED:
+ vif_reset(np);
+ break;
+ }
+ break;
+
+ case NETIF_INTERFACE_STATUS_CONNECTED:
+ switch ( np->backend_state )
+ {
+ case BEST_CLOSED:
+ WPRINTK("Unexpected netif status %s in state %s\n",
+ status_name[status->status],
+ be_state_name[np->backend_state]);
+ vif_disconnect(np);
+ vif_connect(np, status);
+ break;
+ case BEST_DISCONNECTED:
+ vif_connect(np, status);
+ break;
+ }
+ break;
+
+ case NETIF_INTERFACE_STATUS_CHANGED:
+ /*
+ * The domain controller is notifying us that a device has been
+ * added or removed.
+ */
+ break;
+
+ default:
+ WPRINTK("Invalid netif status code %d\n", status->status);
+ break;
+ }
+ vif_show(np);
+ DPRINTK("<\n");
+}
+
+/*
+ * Initialize the network control interface.
+ */
+static void netif_driver_status(netif_fe_driver_status_t *status)
+{
+ DPRINTK("> status=%d\n", status->status);
+ netctrl.up = status->status;
+ //netctrl.interface_n = status->max_handle;
+ //netctrl.connected_n = 0;
+ netctrl_connected_count();
+}
+
+/* Receive handler for control messages. */
+static void netif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id)
+{
+
+ switch ( msg->subtype )
+ {
+ case CMSG_NETIF_FE_INTERFACE_STATUS:
+ if ( msg->length != sizeof(netif_fe_interface_status_t) )
+ goto error;
+ netif_interface_status((netif_fe_interface_status_t *)
+ &msg->msg[0]);
+ break;
+
+ case CMSG_NETIF_FE_DRIVER_STATUS:
+ if ( msg->length != sizeof(netif_fe_driver_status_t) )
+ goto error;
+ netif_driver_status((netif_fe_driver_status_t *)
+ &msg->msg[0]);
+ break;
+
+ error:
+ default:
+ msg->length = 0;
+ break;
+ }
+
+ ctrl_if_send_response(msg);
+}
+
+
+#if 1
+/* Wait for all interfaces to be connected.
+ *
+ * This works OK, but we'd like to use the probing mode (see below).
+ */
+static int probe_interfaces(void)
+{
+ int err = 0, conn = 0;
+ int wait_i, wait_n = 100;
+
+ DPRINTK(">\n");
+
+ for ( wait_i = 0; wait_i < wait_n; wait_i++)
+ {
+ DPRINTK("> wait_i=%d\n", wait_i);
+ conn = netctrl_connected();
+ if(conn) break;
+ DPRINTK("> schedule_timeout...\n");
+ set_current_state(TASK_INTERRUPTIBLE);
+ schedule_timeout(10);
+ }
+
+ DPRINTK("> wait finished...\n");
+ if ( conn <= 0 )
+ {
+ err = netctrl_err(-ENETDOWN);
+ WPRINTK("Failed to connect all virtual interfaces: err=%d\n", err);
+ }
+
+ DPRINTK("< err=%d\n", err);
+
+ return err;
+}
+#else
+/* Probe for interfaces until no more are found.
+ *
+ * This is the mode we'd like to use, but at the moment it panics the kernel.
+*/
+static int probe_interfaces(void)
+{
+ int err = 0;
+ int wait_i, wait_n = 100;
+ ctrl_msg_t cmsg = {
+ .type = CMSG_NETIF_FE,
+ .subtype = CMSG_NETIF_FE_INTERFACE_STATUS,
+ .length = sizeof(netif_fe_interface_status_t),
+ };
+ netif_fe_interface_status_t msg = {};
+ ctrl_msg_t rmsg = {};
+ netif_fe_interface_status_t *reply = (void*)rmsg.msg;
+ int state = TASK_UNINTERRUPTIBLE;
+ u32 query = -1;
+
+ DPRINTK(">\n");
+
+ netctrl.interface_n = 0;
+ for ( wait_i = 0; wait_i < wait_n; wait_i++ )
+ {
+ DPRINTK("> wait_i=%d query=%d\n", wait_i, query);
+ msg.handle = query;
+ memcpy(cmsg.msg, &msg, sizeof(msg));
+ DPRINTK("> set_current_state...\n");
+ set_current_state(state);
+ DPRINTK("> rmsg=%p msg=%p, reply=%p\n", &rmsg, rmsg.msg, reply);
+ DPRINTK("> sending...\n");
+ err = ctrl_if_send_message_and_get_response(&cmsg, &rmsg, state);
+ DPRINTK("> err=%d\n", err);
+ if(err) goto exit;
+ DPRINTK("> rmsg=%p msg=%p, reply=%p\n", &rmsg, rmsg.msg, reply);
+ if((int)reply->handle < 0){
+ // No more interfaces.
+ break;
+ }
+ query = -reply->handle - 2;
+ DPRINTK(">netif_interface_status ...\n");
+ netif_interface_status(reply);
+ }
+
+ exit:
+ if ( err )
+ {
+ err = netctrl_err(-ENETDOWN);
+ WPRINTK("Connecting virtual network interfaces failed: err=%d\n", err);
+ }
+
+ DPRINTK("< err=%d\n", err);
+ return err;
+}
+
+#endif
+
+static int __init netif_init(void)
+{
+ int err = 0;
+
+ if ( (xen_start_info.flags & SIF_INITDOMAIN) ||
+ (xen_start_info.flags & SIF_NET_BE_DOMAIN) )
+ return 0;
+
+ IPRINTK("Initialising virtual ethernet driver.\n");
+ INIT_LIST_HEAD(&dev_list);
+ netctrl_init();
+ (void)ctrl_if_register_receiver(CMSG_NETIF_FE, netif_ctrlif_rx,
+ CALLBACK_IN_BLOCKING_CONTEXT);
+ send_driver_status(1);
+ err = probe_interfaces();
+ if ( err )
+ ctrl_if_unregister_receiver(CMSG_NETIF_FE, netif_ctrlif_rx);
+
+ DPRINTK("< err=%d\n", err);
+ return err;
+}
+
+static void vif_suspend(struct net_private *np)
+{
+ // Avoid having tx/rx stuff happen until we're ready.
+ DPRINTK(">\n");
+ free_irq(np->irq, np->dev);
+ unbind_evtchn_from_irq(np->evtchn);
+ DPRINTK("<\n");
+}
+
+static void vif_resume(struct net_private *np)
+{
+ // Connect regardless of whether IFF_UP flag set.
+ // Stop bad things from happening until we're back up.
+ DPRINTK(">\n");
+ np->backend_state = BEST_DISCONNECTED;
+ memset(np->tx, 0, PAGE_SIZE);
+ memset(np->rx, 0, PAGE_SIZE);
+
+ send_interface_connect(np);
+ DPRINTK("<\n");
+}
+
+void netif_suspend(void)
+{
+#if 1 /* XXX THIS IS TEMPORARY */
+ struct list_head *ent;
+ struct net_private *np;
+
+ DPRINTK(">\n");
+ list_for_each(ent, &dev_list){
+ np = list_entry(ent, struct net_private, list);
+ vif_suspend(np);
+ }
+ DPRINTK("<\n");
+#endif
+}
+
+void netif_resume(void)
+{
+#if 1
+ /* XXX THIS IS TEMPORARY */
+ struct list_head *ent;
+ struct net_private *np;
+
+ DPRINTK(">\n");
+ list_for_each ( ent, &dev_list )
+ {
+ np = list_entry(ent, struct net_private, list);
+ vif_resume(np);
+ }
+ DPRINTK("<\n");
+#endif
+}
+
+
+__initcall(netif_init);
--- /dev/null
- #ifdef CONFIG_XEN_PRIVILEGED_GUEST
+/*
+ * fixmap.h: compile-time virtual memory allocation
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1998 Ingo Molnar
+ *
+ * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
+ */
+
+#ifndef _ASM_FIXMAP_H
+#define _ASM_FIXMAP_H
+
+#include <linux/config.h>
+
+/* used by vmalloc.c, vsyscall.lds.S.
+ *
+ * Leave one empty page between vmalloc'ed areas and
+ * the start of the fixmap.
+ */
+#define __FIXADDR_TOP (HYPERVISOR_VIRT_START - 2 * PAGE_SIZE)
+
+#ifndef __ASSEMBLY__
+#include <linux/kernel.h>
+#include <asm/acpi.h>
+#include <asm/apicdef.h>
+#include <asm/page.h>
+#ifdef CONFIG_HIGHMEM
+#include <linux/threads.h>
+#include <asm/kmap_types.h>
+#endif
+
+/*
+ * Here we define all the compile-time 'special' virtual
+ * addresses. The point is to have a constant address at
+ * compile time, but to set the physical address only
+ * in the boot process. We allocate these special addresses
+ * from the end of virtual memory (0xfffff000) backwards.
+ * Also this lets us do fail-safe vmalloc(), we
+ * can guarantee that these special addresses and
+ * vmalloc()-ed addresses never overlap.
+ *
+ * these 'compile-time allocated' memory buffers are
+ * fixed-size 4k pages. (or larger if used with an increment
+ * highger than 1) use fixmap_set(idx,phys) to associate
+ * physical memory with fixmap indices.
+ *
+ * TLB entries of such buffers will not be flushed across
+ * task switches.
+ */
+enum fixed_addresses {
+ FIX_HOLE,
+ FIX_VSYSCALL,
+#ifdef CONFIG_X86_LOCAL_APIC
+ FIX_APIC_BASE, /* local (CPU) APIC) -- required for SMP or not */
+#endif
+#ifdef CONFIG_X86_IO_APIC
+ FIX_IO_APIC_BASE_0,
+ FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS-1,
+#endif
+#ifdef CONFIG_X86_VISWS_APIC
+ FIX_CO_CPU, /* Cobalt timer */
+ FIX_CO_APIC, /* Cobalt APIC Redirection Table */
+ FIX_LI_PCIA, /* Lithium PCI Bridge A */
+ FIX_LI_PCIB, /* Lithium PCI Bridge B */
+#endif
+#ifdef CONFIG_X86_F00F_BUG
+ FIX_F00F_IDT, /* Virtual mapping for IDT */
+#endif
+#ifdef CONFIG_X86_CYCLONE_TIMER
+ FIX_CYCLONE_TIMER, /*cyclone timer register*/
+#endif
+#ifdef CONFIG_HIGHMEM
+ FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */
+ FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1,
+#endif
+#ifdef CONFIG_ACPI_BOOT
+ FIX_ACPI_BEGIN,
+ FIX_ACPI_END = FIX_ACPI_BEGIN + FIX_ACPI_PAGES - 1,
+#endif
+#ifdef CONFIG_PCI_MMCONFIG
+ FIX_PCIE_MCFG,
+#endif
+ FIX_SHARED_INFO,
+ FIX_GNTTAB,
++#ifdef CONFIG_XEN_PHYSDEV_ACCESS
+#define NR_FIX_ISAMAPS 256
+ FIX_ISAMAP_END,
+ FIX_ISAMAP_BEGIN = FIX_ISAMAP_END + NR_FIX_ISAMAPS - 1,
+#endif
+ __end_of_permanent_fixed_addresses,
+ /* temporary boot-time mappings, used before ioremap() is functional */
+#define NR_FIX_BTMAPS 16
+ FIX_BTMAP_END = __end_of_permanent_fixed_addresses,
+ FIX_BTMAP_BEGIN = FIX_BTMAP_END + NR_FIX_BTMAPS - 1,
+ FIX_WP_TEST,
+ __end_of_fixed_addresses
+};
+
+extern void __set_fixmap (enum fixed_addresses idx,
+ unsigned long phys, pgprot_t flags);
+extern void __set_fixmap_ma (enum fixed_addresses idx,
+ unsigned long mach, pgprot_t flags);
+
+#define set_fixmap(idx, phys) \
+ __set_fixmap(idx, phys, PAGE_KERNEL)
+#define set_fixmap_ma(idx, phys) \
+ __set_fixmap_ma(idx, phys, PAGE_KERNEL)
+#define set_fixmap_ma_ro(idx, phys) \
+ __set_fixmap_ma(idx, phys, PAGE_KERNEL_RO)
+/*
+ * Some hardware wants to get fixmapped without caching.
+ */
+#define set_fixmap_nocache(idx, phys) \
+ __set_fixmap(idx, phys, PAGE_KERNEL_NOCACHE)
+
+#define clear_fixmap(idx) \
+ __set_fixmap(idx, 0, __pgprot(0))
+
+#define FIXADDR_TOP ((unsigned long)__FIXADDR_TOP)
+
+#define __FIXADDR_SIZE (__end_of_permanent_fixed_addresses << PAGE_SHIFT)
+#define FIXADDR_START (FIXADDR_TOP - __FIXADDR_SIZE)
+
+#define __fix_to_virt(x) (FIXADDR_TOP - ((x) << PAGE_SHIFT))
+#define __virt_to_fix(x) ((FIXADDR_TOP - ((x)&PAGE_MASK)) >> PAGE_SHIFT)
+
+/*
+ * This is the range that is readable by user mode, and things
+ * acting like user mode such as get_user_pages.
+ */
+#define FIXADDR_USER_START (__fix_to_virt(FIX_VSYSCALL))
+#define FIXADDR_USER_END (FIXADDR_USER_START + PAGE_SIZE)
+
+
+extern void __this_fixmap_does_not_exist(void);
+
+/*
+ * 'index to address' translation. If anyone tries to use the idx
+ * directly without tranlation, we catch the bug with a NULL-deference
+ * kernel oops. Illegal ranges of incoming indices are caught too.
+ */
+static __always_inline unsigned long fix_to_virt(const unsigned int idx)
+{
+ /*
+ * this branch gets completely eliminated after inlining,
+ * except when someone tries to use fixaddr indices in an
+ * illegal way. (such as mixing up address types or using
+ * out-of-range indices).
+ *
+ * If it doesn't get removed, the linker will complain
+ * loudly with a reasonably clear error message..
+ */
+ if (idx >= __end_of_fixed_addresses)
+ __this_fixmap_does_not_exist();
+
+ return __fix_to_virt(idx);
+}
+
+static inline unsigned long virt_to_fix(const unsigned long vaddr)
+{
+ BUG_ON(vaddr >= FIXADDR_TOP || vaddr < FIXADDR_START);
+ return __virt_to_fix(vaddr);
+}
+
+#endif /* !__ASSEMBLY__ */
+#endif
--- /dev/null
- #ifdef CONFIG_XEN_PRIVILEGED_GUEST
- #define isa_bus_to_virt(_x) (void *)__fix_to_virt(FIX_ISAMAP_BEGIN - \
- ((_x) >> PAGE_SHIFT))
+#ifndef _ASM_IO_H
+#define _ASM_IO_H
+
+#include <linux/config.h>
+#include <linux/string.h>
+#include <linux/compiler.h>
+
+/*
+ * This file contains the definitions for the x86 IO instructions
+ * inb/inw/inl/outb/outw/outl and the "string versions" of the same
+ * (insb/insw/insl/outsb/outsw/outsl). You can also use "pausing"
+ * versions of the single-IO instructions (inb_p/inw_p/..).
+ *
+ * This file is not meant to be obfuscating: it's just complicated
+ * to (a) handle it all in a way that makes gcc able to optimize it
+ * as well as possible and (b) trying to avoid writing the same thing
+ * over and over again with slight variations and possibly making a
+ * mistake somewhere.
+ */
+
+/*
+ * Thanks to James van Artsdalen for a better timing-fix than
+ * the two short jumps: using outb's to a nonexistent port seems
+ * to guarantee better timings even on fast machines.
+ *
+ * On the other hand, I'd like to be sure of a non-existent port:
+ * I feel a bit unsafe about using 0x80 (should be safe, though)
+ *
+ * Linus
+ */
+
+ /*
+ * Bit simplified and optimized by Jan Hubicka
+ * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999.
+ *
+ * isa_memset_io, isa_memcpy_fromio, isa_memcpy_toio added,
+ * isa_read[wl] and isa_write[wl] fixed
+ * - Arnaldo Carvalho de Melo <acme@conectiva.com.br>
+ */
+
+#define IO_SPACE_LIMIT 0xffff
+
+#define XQUAD_PORTIO_BASE 0xfe400000
+#define XQUAD_PORTIO_QUAD 0x40000 /* 256k per quad. */
+
+#ifdef __KERNEL__
+
+#include <asm-generic/iomap.h>
+
+#include <linux/vmalloc.h>
+#include <asm/fixmap.h>
+
+/**
+ * virt_to_phys - map virtual addresses to physical
+ * @address: address to remap
+ *
+ * The returned physical address is the physical (CPU) mapping for
+ * the memory address given. It is only valid to use this function on
+ * addresses directly mapped or allocated via kmalloc.
+ *
+ * This function does not give bus mappings for DMA transfers. In
+ * almost all conceivable cases a device driver should not be using
+ * this function
+ */
+
+static inline unsigned long virt_to_phys(volatile void * address)
+{
+ return __pa(address);
+}
+
+/**
+ * phys_to_virt - map physical address to virtual
+ * @address: address to remap
+ *
+ * The returned virtual address is a current CPU mapping for
+ * the memory address given. It is only valid to use this function on
+ * addresses that have a kernel mapping
+ *
+ * This function does not handle bus mappings for DMA transfers. In
+ * almost all conceivable cases a device driver should not be using
+ * this function
+ */
+
+static inline void * phys_to_virt(unsigned long address)
+{
+ return __va(address);
+}
+
+/*
+ * Change "struct page" to physical address.
+ */
+#define page_to_pseudophys(page) ((dma_addr_t)page_to_pfn(page) << PAGE_SHIFT)
+#define page_to_phys(page) (phys_to_machine(page_to_pseudophys(page)))
+
+#define bio_to_pseudophys(bio) (page_to_pseudophys(bio_page((bio))) + \
+ (unsigned long) bio_offset((bio)))
+#define bvec_to_pseudophys(bv) (page_to_pseudophys((bv)->bv_page) + \
+ (unsigned long) (bv)->bv_offset)
+
+#define BIOVEC_PHYS_MERGEABLE(vec1, vec2) \
+ (((bvec_to_phys((vec1)) + (vec1)->bv_len) == bvec_to_phys((vec2))) && \
+ ((bvec_to_pseudophys((vec1)) + (vec1)->bv_len) == \
+ bvec_to_pseudophys((vec2))))
+
+extern void __iomem * __ioremap(unsigned long offset, unsigned long size, unsigned long flags);
+
+/**
+ * ioremap - map bus memory into CPU space
+ * @offset: bus address of the memory
+ * @size: size of the resource to map
+ *
+ * ioremap performs a platform specific sequence of operations to
+ * make bus memory CPU accessible via the readb/readw/readl/writeb/
+ * writew/writel functions and the other mmio helpers. The returned
+ * address is not guaranteed to be usable directly as a virtual
+ * address.
+ */
+
+static inline void __iomem * ioremap(unsigned long offset, unsigned long size)
+{
+ return __ioremap(offset, size, 0);
+}
+
+extern void __iomem * ioremap_nocache(unsigned long offset, unsigned long size);
+extern void iounmap(volatile void __iomem *addr);
+
+/*
+ * bt_ioremap() and bt_iounmap() are for temporary early boot-time
+ * mappings, before the real ioremap() is functional.
+ * A boot-time mapping is currently limited to at most 16 pages.
+ */
+extern void *bt_ioremap(unsigned long offset, unsigned long size);
+extern void bt_iounmap(void *addr, unsigned long size);
+
+/*
+ * ISA I/O bus memory addresses are 1:1 with the physical address.
+ */
+#define isa_virt_to_bus(_x) isa_virt_to_bus_is_UNSUPPORTED->x
+#define isa_page_to_bus(_x) isa_page_to_bus_is_UNSUPPORTED->x
- #define virt_to_bus(_x) phys_to_machine(virt_to_phys(_x))
- #define bus_to_virt(_x) phys_to_virt(machine_to_phys(_x))
++#ifdef CONFIG_XEN_PHYSDEV_ACCESS
++#define isa_bus_to_virt(_x) (void *)(__fix_to_virt(FIX_ISAMAP_BEGIN) + (_x))
+#else
+#define isa_bus_to_virt(_x) isa_bus_to_virt_needs_PRIVILEGED_BUILD
+#endif
+
+/*
+ * However PCI ones are not necessarily 1:1 and therefore these interfaces
+ * are forbidden in portable PCI drivers.
+ *
+ * Allow them on x86 for legacy drivers, though.
+ */
++#define virt_to_bus(_x) phys_to_machine(__pa(_x))
++#define bus_to_virt(_x) __va(machine_to_phys(_x))
+
+/*
+ * readX/writeX() are used to access memory mapped devices. On some
+ * architectures the memory mapped IO stuff needs to be accessed
+ * differently. On the x86 architecture, we just read/write the
+ * memory location directly.
+ */
+
+static inline unsigned char readb(const volatile void __iomem *addr)
+{
+ return *(volatile unsigned char __force *) addr;
+}
+static inline unsigned short readw(const volatile void __iomem *addr)
+{
+ return *(volatile unsigned short __force *) addr;
+}
+static inline unsigned int readl(const volatile void __iomem *addr)
+{
+ return *(volatile unsigned int __force *) addr;
+}
+#define readb_relaxed(addr) readb(addr)
+#define readw_relaxed(addr) readw(addr)
+#define readl_relaxed(addr) readl(addr)
+#define __raw_readb readb
+#define __raw_readw readw
+#define __raw_readl readl
+
+static inline void writeb(unsigned char b, volatile void __iomem *addr)
+{
+ *(volatile unsigned char __force *) addr = b;
+}
+static inline void writew(unsigned short b, volatile void __iomem *addr)
+{
+ *(volatile unsigned short __force *) addr = b;
+}
+static inline void writel(unsigned int b, volatile void __iomem *addr)
+{
+ *(volatile unsigned int __force *) addr = b;
+}
+#define __raw_writeb writeb
+#define __raw_writew writew
+#define __raw_writel writel
+
+#define mmiowb()
+
+static inline void memset_io(volatile void __iomem *addr, unsigned char val, int count)
+{
+ memset((void __force *) addr, val, count);
+}
+static inline void memcpy_fromio(void *dst, volatile void __iomem *src, int count)
+{
+ __memcpy(dst, (void __force *) src, count);
+}
+static inline void memcpy_toio(volatile void __iomem *dst, const void *src, int count)
+{
+ __memcpy((void __force *) dst, src, count);
+}
+
+/*
+ * ISA space is 'always mapped' on a typical x86 system, no need to
+ * explicitly ioremap() it. The fact that the ISA IO space is mapped
+ * to PAGE_OFFSET is pure coincidence - it does not mean ISA values
+ * are physical addresses. The following constant pointer can be
+ * used as the IO-area pointer (it can be iounmapped as well, so the
+ * analogy with PCI is quite large):
+ */
+#define __ISA_IO_base ((char __iomem *)(PAGE_OFFSET))
+
+#define isa_readb(a) readb(__ISA_IO_base + (a))
+#define isa_readw(a) readw(__ISA_IO_base + (a))
+#define isa_readl(a) readl(__ISA_IO_base + (a))
+#define isa_writeb(b,a) writeb(b,__ISA_IO_base + (a))
+#define isa_writew(w,a) writew(w,__ISA_IO_base + (a))
+#define isa_writel(l,a) writel(l,__ISA_IO_base + (a))
+#define isa_memset_io(a,b,c) memset_io(__ISA_IO_base + (a),(b),(c))
+#define isa_memcpy_fromio(a,b,c) memcpy_fromio((a),__ISA_IO_base + (b),(c))
+#define isa_memcpy_toio(a,b,c) memcpy_toio(__ISA_IO_base + (a),(b),(c))
+
+
+/*
+ * Again, i386 does not require mem IO specific function.
+ */
+
+#define eth_io_copy_and_sum(a,b,c,d) eth_copy_and_sum((a),(void __force *)(b),(c),(d))
+#define isa_eth_io_copy_and_sum(a,b,c,d) eth_copy_and_sum((a),(void __force *)(__ISA_IO_base + (b)),(c),(d))
+
+/**
+ * check_signature - find BIOS signatures
+ * @io_addr: mmio address to check
+ * @signature: signature block
+ * @length: length of signature
+ *
+ * Perform a signature comparison with the mmio address io_addr. This
+ * address should have been obtained by ioremap.
+ * Returns 1 on a match.
+ */
+
+static inline int check_signature(volatile void __iomem * io_addr,
+ const unsigned char *signature, int length)
+{
+ int retval = 0;
+ do {
+ if (readb(io_addr) != *signature)
+ goto out;
+ io_addr++;
+ signature++;
+ length--;
+ } while (length);
+ retval = 1;
+out:
+ return retval;
+}
+
+/**
+ * isa_check_signature - find BIOS signatures
+ * @io_addr: mmio address to check
+ * @signature: signature block
+ * @length: length of signature
+ *
+ * Perform a signature comparison with the ISA mmio address io_addr.
+ * Returns 1 on a match.
+ *
+ * This function is deprecated. New drivers should use ioremap and
+ * check_signature.
+ */
+
+
+static inline int isa_check_signature(unsigned long io_addr,
+ const unsigned char *signature, int length)
+{
+ int retval = 0;
+ do {
+ if (isa_readb(io_addr) != *signature)
+ goto out;
+ io_addr++;
+ signature++;
+ length--;
+ } while (length);
+ retval = 1;
+out:
+ return retval;
+}
+
+/*
+ * Cache management
+ *
+ * This needed for two cases
+ * 1. Out of order aware processors
+ * 2. Accidentally out of order processors (PPro errata #51)
+ */
+
+#if defined(CONFIG_X86_OOSTORE) || defined(CONFIG_X86_PPRO_FENCE)
+
+static inline void flush_write_buffers(void)
+{
+ __asm__ __volatile__ ("lock; addl $0,0(%%esp)": : :"memory");
+}
+
+#define dma_cache_inv(_start,_size) flush_write_buffers()
+#define dma_cache_wback(_start,_size) flush_write_buffers()
+#define dma_cache_wback_inv(_start,_size) flush_write_buffers()
+
+#else
+
+/* Nothing to do */
+
+#define dma_cache_inv(_start,_size) do { } while (0)
+#define dma_cache_wback(_start,_size) do { } while (0)
+#define dma_cache_wback_inv(_start,_size) do { } while (0)
+#define flush_write_buffers()
+
+#endif
+
+#endif /* __KERNEL__ */
+
+#ifdef SLOW_IO_BY_JUMPING
+#define __SLOW_DOWN_IO "jmp 1f; 1: jmp 1f; 1:"
+#elif defined(__UNSAFE_IO__)
+#define __SLOW_DOWN_IO "outb %%al,$0x80;"
+#else
+#define __SLOW_DOWN_IO "\n1: outb %%al,$0x80\n" \
+ "2:\n" \
+ ".section __ex_table,\"a\"\n\t" \
+ ".align 4\n\t" \
+ ".long 1b,2b\n" \
+ ".previous"
+#endif
+
+static inline void slow_down_io(void) {
+ __asm__ __volatile__(
+ __SLOW_DOWN_IO
+#ifdef REALLY_SLOW_IO
+ __SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO
+#endif
+ : : );
+}
+
+#ifdef CONFIG_X86_NUMAQ
+extern void *xquad_portio; /* Where the IO area was mapped */
+#define XQUAD_PORT_ADDR(port, quad) (xquad_portio + (XQUAD_PORTIO_QUAD*quad) + port)
+#define __BUILDIO(bwl,bw,type) \
+static inline void out##bwl##_quad(unsigned type value, int port, int quad) { \
+ if (xquad_portio) \
+ write##bwl(value, XQUAD_PORT_ADDR(port, quad)); \
+ else \
+ out##bwl##_local(value, port); \
+} \
+static inline void out##bwl(unsigned type value, int port) { \
+ out##bwl##_quad(value, port, 0); \
+} \
+static inline unsigned type in##bwl##_quad(int port, int quad) { \
+ if (xquad_portio) \
+ return read##bwl(XQUAD_PORT_ADDR(port, quad)); \
+ else \
+ return in##bwl##_local(port); \
+} \
+static inline unsigned type in##bwl(int port) { \
+ return in##bwl##_quad(port, 0); \
+}
+#else
+#define __BUILDIO(bwl,bw,type) \
+static inline void out##bwl(unsigned type value, int port) { \
+ out##bwl##_local(value, port); \
+} \
+static inline unsigned type in##bwl(int port) { \
+ return in##bwl##_local(port); \
+}
+#endif
+
+
+#if __UNSAFE_IO__
+#define ____BUILDIO(bwl,bw,type) \
+static inline void out##bwl##_local(unsigned type value, int port) { \
+ __asm__ __volatile__("out" #bwl " %" #bw "0, %w1" : : "a"(value), "Nd"(port)); \
+} \
+static inline unsigned type in##bwl##_local(int port) { \
+ unsigned type value; \
+ __asm__ __volatile__("in" #bwl " %w1, %" #bw "0" : "=a"(value) : "Nd"(port)); \
+ return value; \
+}
+#else
+#define ____BUILDIO(bwl,bw,type) \
+static inline void out##bwl##_local(unsigned type value, int port) { \
+ __asm__ __volatile__("1: out" #bwl " %" #bw "0, %w1\n" \
+ "2:\n" \
+ ".section __ex_table,\"a\"\n\t" \
+ ".align 4\n\t" \
+ ".long 1b,2b\n" \
+ ".previous" : : "a"(value), "Nd"(port)); \
+} \
+static inline unsigned type in##bwl##_local(int port) { \
+ unsigned type value; \
+ __asm__ __volatile__("1:in" #bwl " %w1, %" #bw "0\n" \
+ "2:\n" \
+ ".section .fixup,\"ax\"\n" \
+ "3: mov" #bwl " $~0,%" #bw "0\n\t" \
+ "jmp 2b\n" \
+ ".previous\n" \
+ ".section __ex_table,\"a\"\n\t" \
+ ".align 4\n\t" \
+ ".long 1b,3b\n" \
+ ".previous" : "=a"(value) : "Nd"(port)); \
+ return value; \
+}
+#endif
+
+#define BUILDIO(bwl,bw,type) \
+____BUILDIO(bwl,bw,type) \
+static inline void out##bwl##_local_p(unsigned type value, int port) { \
+ out##bwl##_local(value, port); \
+ slow_down_io(); \
+} \
+static inline unsigned type in##bwl##_local_p(int port) { \
+ unsigned type value = in##bwl##_local(port); \
+ slow_down_io(); \
+ return value; \
+} \
+__BUILDIO(bwl,bw,type) \
+static inline void out##bwl##_p(unsigned type value, int port) { \
+ out##bwl(value, port); \
+ slow_down_io(); \
+} \
+static inline unsigned type in##bwl##_p(int port) { \
+ unsigned type value = in##bwl(port); \
+ slow_down_io(); \
+ return value; \
+} \
+static inline void outs##bwl(int port, const void *addr, unsigned long count) { \
+ __asm__ __volatile__("rep; outs" #bwl : "+S"(addr), "+c"(count) : "d"(port)); \
+} \
+static inline void ins##bwl(int port, void *addr, unsigned long count) { \
+ __asm__ __volatile__("rep; ins" #bwl : "+D"(addr), "+c"(count) : "d"(port)); \
+}
+
+BUILDIO(b,b,char)
+BUILDIO(w,w,short)
+BUILDIO(l,,int)
+
+#endif
--- /dev/null
- clear_bit(X86_FEATURE_MTRR, c->x86_capability);
- clear_bit(X86_FEATURE_FXSR, c->x86_capability);
+/**
+ * machine_specific_memory_setup - Hook for machine specific memory setup.
+ *
+ * Description:
+ * This is included late in kernel/setup.c so that it can make
+ * use of all of the static functions.
+ **/
+
+static char * __init machine_specific_memory_setup(void)
+{
+ char *who;
+ unsigned long start_pfn, max_pfn;
+
+ who = "Xen";
+
+ start_pfn = 0;
+ max_pfn = xen_start_info.nr_pages;
+
+ e820.nr_map = 0;
+ add_memory_region(PFN_PHYS(start_pfn), PFN_PHYS(max_pfn) - PFN_PHYS(start_pfn), E820_RAM);
+
+ return who;
+}
+
+void __init machine_specific_modify_cpu_capabilities(struct cpuinfo_x86 *c)
+{
+ clear_bit(X86_FEATURE_VME, c->x86_capability);
+ clear_bit(X86_FEATURE_DE, c->x86_capability);
+ clear_bit(X86_FEATURE_PSE, c->x86_capability);
+ clear_bit(X86_FEATURE_PGE, c->x86_capability);
+ clear_bit(X86_FEATURE_SEP, c->x86_capability);
++ if (!(xen_start_info.flags & SIF_PRIVILEGED))
++ clear_bit(X86_FEATURE_MTRR, c->x86_capability);
+}
+
+extern void hypervisor_callback(void);
+extern void failsafe_callback(void);
+
+static void __init machine_specific_arch_setup(void)
+{
+ HYPERVISOR_set_callbacks(
+ __KERNEL_CS, (unsigned long)hypervisor_callback,
+ __KERNEL_CS, (unsigned long)failsafe_callback);
+
+ machine_specific_modify_cpu_capabilities(&boot_cpu_data);
+}
--- /dev/null
- static inline unsigned long arbitrary_virt_to_phys(void *va)
+#ifndef _I386_PGTABLE_H
+#define _I386_PGTABLE_H
+
+#include <linux/config.h>
+#include <asm-xen/hypervisor.h>
+
+/*
+ * The Linux memory management assumes a three-level page table setup. On
+ * the i386, we use that, but "fold" the mid level into the top-level page
+ * table, so that we physically have the same two-level page table as the
+ * i386 mmu expects.
+ *
+ * This file contains the functions and defines necessary to modify and use
+ * the i386 page table tree.
+ */
+#ifndef __ASSEMBLY__
+#include <asm/processor.h>
+#include <asm/fixmap.h>
+#include <linux/threads.h>
+
+#ifndef _I386_BITOPS_H
+#include <asm/bitops.h>
+#endif
+
+#include <linux/slab.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+
+/*
+ * ZERO_PAGE is a global shared page that is always zero: used
+ * for zero-mapped memory areas etc..
+ */
+#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page))
+extern unsigned long empty_zero_page[1024];
+extern pgd_t swapper_pg_dir[1024];
+extern kmem_cache_t *pgd_cache;
+extern kmem_cache_t *pmd_cache;
+extern kmem_cache_t *pte_cache;
+extern spinlock_t pgd_lock;
+extern struct page *pgd_list;
+
+void pte_ctor(void *, kmem_cache_t *, unsigned long);
+void pte_dtor(void *, kmem_cache_t *, unsigned long);
+void pmd_ctor(void *, kmem_cache_t *, unsigned long);
+void pgd_ctor(void *, kmem_cache_t *, unsigned long);
+void pgd_dtor(void *, kmem_cache_t *, unsigned long);
+void pgtable_cache_init(void);
+void paging_init(void);
+
+/*
+ * The Linux x86 paging architecture is 'compile-time dual-mode', it
+ * implements both the traditional 2-level x86 page tables and the
+ * newer 3-level PAE-mode page tables.
+ */
+#ifdef CONFIG_X86_PAE
+# include <asm/pgtable-3level-defs.h>
+#else
+# include <asm/pgtable-2level-defs.h>
+#endif
+
+#define PMD_SIZE (1UL << PMD_SHIFT)
+#define PMD_MASK (~(PMD_SIZE-1))
+#define PGDIR_SIZE (1UL << PGDIR_SHIFT)
+#define PGDIR_MASK (~(PGDIR_SIZE-1))
+
+#define USER_PTRS_PER_PGD (TASK_SIZE/PGDIR_SIZE)
+#define FIRST_USER_PGD_NR 0
+
+#define USER_PGD_PTRS (PAGE_OFFSET >> PGDIR_SHIFT)
+#define KERNEL_PGD_PTRS (PTRS_PER_PGD-USER_PGD_PTRS)
+
+#define TWOLEVEL_PGDIR_SHIFT 22
+#define BOOT_USER_PGD_PTRS (__PAGE_OFFSET >> TWOLEVEL_PGDIR_SHIFT)
+#define BOOT_KERNEL_PGD_PTRS (1024-BOOT_USER_PGD_PTRS)
+
+/* Just any arbitrary offset to the start of the vmalloc VM area: the
+ * current 8MB value just means that there will be a 8MB "hole" after the
+ * physical memory until the kernel virtual memory starts. That means that
+ * any out-of-bounds memory accesses will hopefully be caught.
+ * The vmalloc() routines leaves a hole of 4kB between each vmalloced
+ * area for the same reason. ;)
+ */
+#define VMALLOC_OFFSET (8*1024*1024)
+#define VMALLOC_START (((unsigned long) high_memory + vmalloc_earlyreserve + \
+ 2*VMALLOC_OFFSET-1) & ~(VMALLOC_OFFSET-1))
+#ifdef CONFIG_HIGHMEM
+# define VMALLOC_END (PKMAP_BASE-2*PAGE_SIZE)
+#else
+# define VMALLOC_END (FIXADDR_START-2*PAGE_SIZE)
+#endif
+
+extern void *high_memory;
+extern unsigned long vmalloc_earlyreserve;
+
+/*
+ * The 4MB page is guessing.. Detailed in the infamous "Chapter H"
+ * of the Pentium details, but assuming intel did the straightforward
+ * thing, this bit set in the page directory entry just means that
+ * the page directory entry points directly to a 4MB-aligned block of
+ * memory.
+ */
+#define _PAGE_BIT_PRESENT 0
+#define _PAGE_BIT_RW 1
+#define _PAGE_BIT_USER 2
+#define _PAGE_BIT_PWT 3
+#define _PAGE_BIT_PCD 4
+#define _PAGE_BIT_ACCESSED 5
+#define _PAGE_BIT_DIRTY 6
+#define _PAGE_BIT_PSE 7 /* 4 MB (or 2MB) page, Pentium+, if present.. */
+#define _PAGE_BIT_GLOBAL 8 /* Global TLB entry PPro+ */
+#define _PAGE_BIT_UNUSED1 9 /* available for programmer */
+#define _PAGE_BIT_UNUSED2 10
+#define _PAGE_BIT_UNUSED3 11
+#define _PAGE_BIT_NX 63
+
+#define _PAGE_PRESENT 0x001
+#define _PAGE_RW 0x002
+#define _PAGE_USER 0x004
+#define _PAGE_PWT 0x008
+#define _PAGE_PCD 0x010
+#define _PAGE_ACCESSED 0x020
+#define _PAGE_DIRTY 0x040
+#define _PAGE_PSE 0x080 /* 4 MB (or 2MB) page, Pentium+, if present.. */
+#define _PAGE_GLOBAL 0x100 /* Global TLB entry PPro+ */
+#define _PAGE_UNUSED1 0x200 /* available for programmer */
+#define _PAGE_UNUSED2 0x400
+#define _PAGE_UNUSED3 0x800
+
+#define _PAGE_FILE 0x040 /* set:pagecache unset:swap */
+#define _PAGE_PROTNONE 0x080 /* If not present */
+#ifdef CONFIG_X86_PAE
+#define _PAGE_NX (1ULL<<_PAGE_BIT_NX)
+#else
+#define _PAGE_NX 0
+#endif
+
+#define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED | _PAGE_DIRTY)
+#define _KERNPG_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
+#define _PAGE_CHG_MASK (PTE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY)
+
+#define PAGE_NONE \
+ __pgprot(_PAGE_PROTNONE | _PAGE_ACCESSED)
+#define PAGE_SHARED \
+ __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED)
+
+#define PAGE_SHARED_EXEC \
+ __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED)
+#define PAGE_COPY_NOEXEC \
+ __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED | _PAGE_NX)
+#define PAGE_COPY_EXEC \
+ __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED)
+#define PAGE_COPY \
+ PAGE_COPY_NOEXEC
+#define PAGE_READONLY \
+ __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED | _PAGE_NX)
+#define PAGE_READONLY_EXEC \
+ __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED)
+
+#define _PAGE_KERNEL \
+ (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_NX)
+#define _PAGE_KERNEL_EXEC \
+ (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED)
+
+extern unsigned long long __PAGE_KERNEL, __PAGE_KERNEL_EXEC;
+#define __PAGE_KERNEL_RO (__PAGE_KERNEL & ~_PAGE_RW)
+#define __PAGE_KERNEL_NOCACHE (__PAGE_KERNEL | _PAGE_PCD)
+#define __PAGE_KERNEL_LARGE (__PAGE_KERNEL | _PAGE_PSE)
+#define __PAGE_KERNEL_LARGE_EXEC (__PAGE_KERNEL_EXEC | _PAGE_PSE)
+
+#define PAGE_KERNEL __pgprot(__PAGE_KERNEL)
+#define PAGE_KERNEL_RO __pgprot(__PAGE_KERNEL_RO)
+#define PAGE_KERNEL_EXEC __pgprot(__PAGE_KERNEL_EXEC)
+#define PAGE_KERNEL_NOCACHE __pgprot(__PAGE_KERNEL_NOCACHE)
+#define PAGE_KERNEL_LARGE __pgprot(__PAGE_KERNEL_LARGE)
+#define PAGE_KERNEL_LARGE_EXEC __pgprot(__PAGE_KERNEL_LARGE_EXEC)
+
+/*
+ * The i386 can't do page protection for execute, and considers that
+ * the same are read. Also, write permissions imply read permissions.
+ * This is the closest we can get..
+ */
+#define __P000 PAGE_NONE
+#define __P001 PAGE_READONLY
+#define __P010 PAGE_COPY
+#define __P011 PAGE_COPY
+#define __P100 PAGE_READONLY_EXEC
+#define __P101 PAGE_READONLY_EXEC
+#define __P110 PAGE_COPY_EXEC
+#define __P111 PAGE_COPY_EXEC
+
+#define __S000 PAGE_NONE
+#define __S001 PAGE_READONLY
+#define __S010 PAGE_SHARED
+#define __S011 PAGE_SHARED
+#define __S100 PAGE_READONLY_EXEC
+#define __S101 PAGE_READONLY_EXEC
+#define __S110 PAGE_SHARED_EXEC
+#define __S111 PAGE_SHARED_EXEC
+
+/*
+ * Define this if things work differently on an i386 and an i486:
+ * it will (on an i486) warn about kernel memory accesses that are
+ * done without a 'verify_area(VERIFY_WRITE,..)'
+ */
+#undef TEST_VERIFY_AREA
+
+/* The boot page tables (all created as a single array) */
+extern unsigned long pg0[];
+
+#define pte_present(x) ((x).pte_low & (_PAGE_PRESENT | _PAGE_PROTNONE))
+#define pte_clear(xp) do { set_pte(xp, __pte(0)); } while (0)
+
+#define pmd_none(x) (!pmd_val(x))
+/* pmd_present doesn't just test the _PAGE_PRESENT bit since wr.p.t.
+ can temporarily clear it. */
+#define pmd_present(x) (pmd_val(x))
+/* pmd_clear below */
+#define pmd_bad(x) ((pmd_val(x) & (~PAGE_MASK & ~_PAGE_USER & ~_PAGE_PRESENT)) != (_KERNPG_TABLE & ~_PAGE_PRESENT))
+
+
+#define pages_to_mb(x) ((x) >> (20-PAGE_SHIFT))
+
+/*
+ * The following only work if pte_present() is true.
+ * Undefined behaviour if not..
+ */
+static inline int pte_user(pte_t pte) { return (pte).pte_low & _PAGE_USER; }
+static inline int pte_read(pte_t pte) { return (pte).pte_low & _PAGE_USER; }
+static inline int pte_dirty(pte_t pte) { return (pte).pte_low & _PAGE_DIRTY; }
+static inline int pte_young(pte_t pte) { return (pte).pte_low & _PAGE_ACCESSED; }
+static inline int pte_write(pte_t pte) { return (pte).pte_low & _PAGE_RW; }
+
+/*
+ * The following only works if pte_present() is not true.
+ */
+static inline int pte_file(pte_t pte) { return (pte).pte_low & _PAGE_FILE; }
+
+static inline pte_t pte_rdprotect(pte_t pte) { (pte).pte_low &= ~_PAGE_USER; return pte; }
+static inline pte_t pte_exprotect(pte_t pte) { (pte).pte_low &= ~_PAGE_USER; return pte; }
+static inline pte_t pte_mkclean(pte_t pte) { (pte).pte_low &= ~_PAGE_DIRTY; return pte; }
+static inline pte_t pte_mkold(pte_t pte) { (pte).pte_low &= ~_PAGE_ACCESSED; return pte; }
+static inline pte_t pte_wrprotect(pte_t pte) { (pte).pte_low &= ~_PAGE_RW; return pte; }
+static inline pte_t pte_mkread(pte_t pte) { (pte).pte_low |= _PAGE_USER; return pte; }
+static inline pte_t pte_mkexec(pte_t pte) { (pte).pte_low |= _PAGE_USER; return pte; }
+static inline pte_t pte_mkdirty(pte_t pte) { (pte).pte_low |= _PAGE_DIRTY; return pte; }
+static inline pte_t pte_mkyoung(pte_t pte) { (pte).pte_low |= _PAGE_ACCESSED; return pte; }
+static inline pte_t pte_mkwrite(pte_t pte) { (pte).pte_low |= _PAGE_RW; return pte; }
+
+#ifdef CONFIG_X86_PAE
+# include <asm/pgtable-3level.h>
+#else
+# include <asm/pgtable-2level.h>
+#endif
+
+static inline int ptep_test_and_clear_dirty(pte_t *ptep)
+{
+ pte_t pte = *ptep;
+ int ret = pte_dirty(pte);
+ if (ret)
+ xen_l1_entry_update(ptep, pte_mkclean(pte).pte_low);
+ return ret;
+}
+
+static inline int ptep_test_and_clear_young(pte_t *ptep)
+{
+ pte_t pte = *ptep;
+ int ret = pte_young(pte);
+ if (ret)
+ xen_l1_entry_update(ptep, pte_mkold(pte).pte_low);
+ return ret;
+}
+
+static inline void ptep_set_wrprotect(pte_t *ptep)
+{
+ pte_t pte = *ptep;
+ if (pte_write(pte))
+ set_pte(ptep, pte_wrprotect(pte));
+}
+static inline void ptep_mkdirty(pte_t *ptep)
+{
+ pte_t pte = *ptep;
+ if (!pte_dirty(pte))
+ xen_l1_entry_update(ptep, pte_mkdirty(pte).pte_low);
+}
+
+/*
+ * Macro to mark a page protection value as "uncacheable". On processors which do not support
+ * it, this is a no-op.
+ */
+#define pgprot_noncached(prot) ((boot_cpu_data.x86 > 3) \
+ ? (__pgprot(pgprot_val(prot) | _PAGE_PCD | _PAGE_PWT)) : (prot))
+
+/*
+ * Conversion functions: convert a page and protection to a page entry,
+ * and a page entry and page directory to the page they refer to.
+ */
+
+#define mk_pte(page, pgprot) pfn_pte(page_to_pfn(page), (pgprot))
+#define mk_pte_huge(entry) ((entry).pte_low |= _PAGE_PRESENT | _PAGE_PSE)
+
+static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
+{
+ pte.pte_low &= _PAGE_CHG_MASK;
+ pte.pte_low |= pgprot_val(newprot);
+#ifdef CONFIG_X86_PAE
+ /*
+ * Chop off the NX bit (if present), and add the NX portion of
+ * the newprot (if present):
+ */
+ pte.pte_high &= ~(1 << (_PAGE_BIT_NX - 32));
+ pte.pte_high |= (pgprot_val(newprot) >> 32) & \
+ (__supported_pte_mask >> 32);
+#endif
+ return pte;
+}
+
+#define page_pte(page) page_pte_prot(page, __pgprot(0))
+
+#define pmd_page_kernel(pmd) \
+((unsigned long) __va(pmd_val(pmd) & PAGE_MASK))
+
+#define pmd_clear(xp) do { \
+ set_pmd(xp, __pmd(0)); \
+ xen_flush_page_update_queue(); \
+} while (0)
+
+#ifndef CONFIG_DISCONTIGMEM
+#define pmd_page(pmd) (pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT))
+#endif /* !CONFIG_DISCONTIGMEM */
+
+#define pmd_large(pmd) \
+ ((pmd_val(pmd) & (_PAGE_PSE|_PAGE_PRESENT)) == (_PAGE_PSE|_PAGE_PRESENT))
+
+/*
+ * the pgd page can be thought of an array like this: pgd_t[PTRS_PER_PGD]
+ *
+ * this macro returns the index of the entry in the pgd page which would
+ * control the given virtual address
+ */
+#define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD-1))
+
+/*
+ * pgd_offset() returns a (pgd_t *)
+ * pgd_index() is used get the offset into the pgd page's array of pgd_t's;
+ */
+#define pgd_offset(mm, address) ((mm)->pgd+pgd_index(address))
+
+/*
+ * a shortcut which implies the use of the kernel's pgd, instead
+ * of a process's
+ */
+#define pgd_offset_k(address) pgd_offset(&init_mm, address)
+
+/*
+ * the pmd page can be thought of an array like this: pmd_t[PTRS_PER_PMD]
+ *
+ * this macro returns the index of the entry in the pmd page which would
+ * control the given virtual address
+ */
+#define pmd_index(address) \
+ (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1))
+
+/*
+ * the pte page can be thought of an array like this: pte_t[PTRS_PER_PTE]
+ *
+ * this macro returns the index of the entry in the pte page which would
+ * control the given virtual address
+ */
+#define pte_index(address) \
+ (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
+#define pte_offset_kernel(dir, address) \
+ ((pte_t *) pmd_page_kernel(*(dir)) + pte_index(address))
+
+/*
+ * Helper function that returns the kernel pagetable entry controlling
+ * the virtual address 'address'. NULL means no pagetable entry present.
+ * NOTE: the return type is pte_t but if the pmd is PSE then we return it
+ * as a pte too.
+ */
+extern pte_t *lookup_address(unsigned long address);
+
+/*
+ * Make a given kernel text page executable/non-executable.
+ * Returns the previous executability setting of that page (which
+ * is used to restore the previous state). Used by the SMP bootup code.
+ * NOTE: this is an __init function for security reasons.
+ */
+#ifdef CONFIG_X86_PAE
+ extern int set_kernel_exec(unsigned long vaddr, int enable);
+#else
+ static inline int set_kernel_exec(unsigned long vaddr, int enable) { return 0;}
+#endif
+
+#if defined(CONFIG_HIGHPTE)
+#define pte_offset_map(dir, address) \
+ ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)),KM_PTE0) + \
+ pte_index(address))
+#define pte_offset_map_nested(dir, address) \
+ ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)),KM_PTE1) + \
+ pte_index(address))
+#define pte_unmap(pte) kunmap_atomic(pte, KM_PTE0)
+#define pte_unmap_nested(pte) kunmap_atomic(pte, KM_PTE1)
+#else
+#define pte_offset_map(dir, address) \
+ ((pte_t *)page_address(pmd_page(*(dir))) + pte_index(address))
+#define pte_offset_map_nested(dir, address) pte_offset_map(dir, address)
+#define pte_unmap(pte) do { } while (0)
+#define pte_unmap_nested(pte) do { } while (0)
+#endif
+
+/*
+ * The i386 doesn't have any external MMU info: the kernel page
+ * tables contain all the necessary information.
+ *
+ * Also, we only update the dirty/accessed state if we set
+ * the dirty bit by hand in the kernel, since the hardware
+ * will do the accessed bit for us, and we don't want to
+ * race with other CPU's that might be updating the dirty
+ * bit at the same time.
+ */
+#define update_mmu_cache(vma,address,pte) do { } while (0)
+#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
+
+#define ptep_set_access_flags(__vma, __address, __ptep, __entry, __dirty) \
+ do { \
+ if (__dirty) { \
+ if (likely(vma->vm_mm == current->mm)) { \
+ xen_flush_page_update_queue(); \
+ HYPERVISOR_update_va_mapping(address>>PAGE_SHIFT, \
+ entry, UVMF_INVLPG); \
+ } else { \
+ xen_l1_entry_update((__ptep), (__entry).pte_low); \
+ flush_tlb_page(__vma, __address); \
+ } \
+ } \
+ } while (0)
+
+#define __HAVE_ARCH_PTEP_ESTABLISH
+#define ptep_establish(__vma, __address, __ptep, __entry) \
+do { \
+ ptep_set_access_flags(__vma, __address, __ptep, __entry, 1); \
+} while (0)
+
+#define __HAVE_ARCH_PTEP_ESTABLISH_NEW
+#define ptep_establish_new(__vma, __address, __ptep, __entry) \
+do { \
+ if (likely((__vma)->vm_mm == current->mm)) { \
+ xen_flush_page_update_queue(); \
+ HYPERVISOR_update_va_mapping((__address)>>PAGE_SHIFT, \
+ __entry, 0); \
+ } else { \
+ xen_l1_entry_update((__ptep), (__entry).pte_low); \
+ } \
+} while (0)
+
+/* NOTE: make_page* callers must call flush_page_update_queue() */
+void make_lowmem_page_readonly(void *va);
+void make_lowmem_page_writable(void *va);
+void make_page_readonly(void *va);
+void make_page_writable(void *va);
+void make_pages_readonly(void *va, unsigned int nr);
+void make_pages_writable(void *va, unsigned int nr);
+
++static inline unsigned long arbitrary_virt_to_machine(void *va)
+{
+ pgd_t *pgd = pgd_offset_k((unsigned long)va);
+ pmd_t *pmd = pmd_offset(pgd, (unsigned long)va);
+ pte_t *pte = pte_offset_kernel(pmd, (unsigned long)va);
+ unsigned long pa = (*(unsigned long *)pte) & PAGE_MASK;
+ return pa | ((unsigned long)va & (PAGE_SIZE-1));
+}
+
+#endif /* !__ASSEMBLY__ */
+
+#ifndef CONFIG_DISCONTIGMEM
+#define kern_addr_valid(addr) (1)
+#endif /* !CONFIG_DISCONTIGMEM */
+
+#define io_remap_page_range(vma,from,phys,size,prot) \
+ direct_remap_area_pages(vma->vm_mm,from,phys,size,prot,DOMID_IO)
+
+int direct_remap_area_pages(struct mm_struct *mm,
+ unsigned long address,
+ unsigned long machine_addr,
+ unsigned long size,
+ pgprot_t prot,
+ domid_t domid);
+int __direct_remap_area_pages(struct mm_struct *mm,
+ unsigned long address,
+ unsigned long size,
+ mmu_update_t *v);
+
+#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
+#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY
+#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
+#define __HAVE_ARCH_PTEP_SET_WRPROTECT
+#define __HAVE_ARCH_PTEP_MKDIRTY
+#define __HAVE_ARCH_PTE_SAME
+#include <asm-generic/pgtable.h>
+
+#endif /* _I386_PGTABLE_H */
--- /dev/null
- #define MMU_UPDATE_DEBUG 0
-
- #if MMU_UPDATE_DEBUG > 0
- typedef struct {
- void *ptr;
- unsigned long val, pteval;
- void *ptep;
- int line; char *file;
- } page_update_debug_t;
- extern page_update_debug_t update_debug_queue[];
- #define queue_l1_entry_update(_p,_v) ({ \
- update_debug_queue[mmu_update_queue_idx].ptr = (_p); \
- update_debug_queue[mmu_update_queue_idx].val = (_v); \
- update_debug_queue[mmu_update_queue_idx].line = __LINE__; \
- update_debug_queue[mmu_update_queue_idx].file = __FILE__; \
- queue_l1_entry_update((_p),(_v)); \
- })
- #define queue_l2_entry_update(_p,_v) ({ \
- update_debug_queue[mmu_update_queue_idx].ptr = (_p); \
- update_debug_queue[mmu_update_queue_idx].val = (_v); \
- update_debug_queue[mmu_update_queue_idx].line = __LINE__; \
- update_debug_queue[mmu_update_queue_idx].file = __FILE__; \
- queue_l2_entry_update((_p),(_v)); \
- })
- #endif
-
- #if MMU_UPDATE_DEBUG > 1
- #if MMU_UPDATE_DEBUG > 2
- #undef queue_l1_entry_update
- #define queue_l1_entry_update(_p,_v) ({ \
- update_debug_queue[mmu_update_queue_idx].ptr = (_p); \
- update_debug_queue[mmu_update_queue_idx].val = (_v); \
- update_debug_queue[mmu_update_queue_idx].line = __LINE__; \
- update_debug_queue[mmu_update_queue_idx].file = __FILE__; \
- printk("L1 %s %d: %p/%08lx (%08lx -> %08lx)\n", __FILE__, __LINE__, \
- (_p), virt_to_machine(_p), pte_val(*(_p)), \
- (unsigned long)(_v)); \
- queue_l1_entry_update((_p),(_v)); \
- })
- #endif
- #undef queue_l2_entry_update
- #define queue_l2_entry_update(_p,_v) ({ \
- update_debug_queue[mmu_update_queue_idx].ptr = (_p); \
- update_debug_queue[mmu_update_queue_idx].val = (_v); \
- update_debug_queue[mmu_update_queue_idx].line = __LINE__; \
- update_debug_queue[mmu_update_queue_idx].file = __FILE__; \
- printk("L2 %s %d: %p/%08lx (%08lx -> %08lx)\n", __FILE__, __LINE__, \
- (_p), virt_to_machine(_p), pmd_val(*_p), \
- (unsigned long)(_v)); \
- queue_l2_entry_update((_p),(_v)); \
- })
- #define queue_pt_switch(_p) ({ \
- printk("PTSWITCH %s %d: %08lx\n", __FILE__, __LINE__, (_p)); \
- queue_pt_switch(_p); \
- })
- #define queue_tlb_flush() ({ \
- printk("TLB FLUSH %s %d\n", __FILE__, __LINE__); \
- queue_tlb_flush(); \
- })
- #define queue_invlpg(_p) ({ \
- printk("INVLPG %s %d: %08lx\n", __FILE__, __LINE__, (_p)); \
- queue_invlpg(_p); \
- })
- #define queue_pgd_pin(_p) ({ \
- printk("PGD PIN %s %d: %08lx/%08lx\n", __FILE__, __LINE__, (_p), \
- phys_to_machine(_p)); \
- queue_pgd_pin(_p); \
- })
- #define queue_pgd_unpin(_p) ({ \
- printk("PGD UNPIN %s %d: %08lx\n", __FILE__, __LINE__, (_p)); \
- queue_pgd_unpin(_p); \
- })
- #define queue_pte_pin(_p) ({ \
- printk("PTE PIN %s %d: %08lx\n", __FILE__, __LINE__, (_p)); \
- queue_pte_pin(_p); \
- })
- #define queue_pte_unpin(_p) ({ \
- printk("PTE UNPIN %s %d: %08lx\n", __FILE__, __LINE__, (_p)); \
- queue_pte_unpin(_p); \
- })
- #define queue_set_ldt(_p,_l) ({ \
- printk("SETL LDT %s %d: %08lx %d\n", __FILE__, __LINE__, (_p), (_l)); \
- queue_set_ldt((_p), (_l)); \
- })
- #endif
+/******************************************************************************
+ * hypervisor.h
+ *
+ * Linux-specific hypervisor handling.
+ *
+ * Copyright (c) 2002-2004, K A Fraser
+ *
+ * This file may be distributed separately from the Linux kernel, or
+ * incorporated into other software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef __HYPERVISOR_H__
+#define __HYPERVISOR_H__
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/version.h>
+#include <asm-xen/xen-public/xen.h>
+#include <asm-xen/xen-public/dom0_ops.h>
+#include <asm-xen/xen-public/io/domain_controller.h>
+#include <asm/ptrace.h>
+#include <asm/page.h>
+
+/* arch/xen/i386/kernel/setup.c */
+union xen_start_info_union
+{
+ start_info_t xen_start_info;
+ char padding[512];
+};
+extern union xen_start_info_union xen_start_info_union;
+#define xen_start_info (xen_start_info_union.xen_start_info)
+
+/* arch/xen/kernel/process.c */
+void xen_cpu_idle (void);
+
+/* arch/xen/i386/kernel/hypervisor.c */
+void do_hypervisor_callback(struct pt_regs *regs);
+
+/* arch/xen/i386/mm/init.c */
+/* NOTE: caller must call flush_page_update_queue() */
+#define PROT_ON 1
+#define PROT_OFF 0
+void /* __init */ protect_page(pgd_t *dpgd, void *page, int mode);
+void /* __init */ protect_pagetable(pgd_t *dpgd, pgd_t *spgd, int mode);
+
+/* arch/xen/i386/kernel/head.S */
+void lgdt_finish(void);
+
+/* arch/xen/i386/mm/hypervisor.c */
+/*
+ * NB. ptr values should be PHYSICAL, not MACHINE. 'vals' should be already
+ * be MACHINE addresses.
+ */
+
+extern unsigned int mmu_update_queue_idx;
+
+void queue_l1_entry_update(pte_t *ptr, unsigned long val);
+void queue_l2_entry_update(pmd_t *ptr, unsigned long val);
+void queue_pt_switch(unsigned long ptr);
+void queue_tlb_flush(void);
+void queue_invlpg(unsigned long ptr);
+void queue_pgd_pin(unsigned long ptr);
+void queue_pgd_unpin(unsigned long ptr);
+void queue_pte_pin(unsigned long ptr);
+void queue_pte_unpin(unsigned long ptr);
+void queue_set_ldt(unsigned long ptr, unsigned long bytes);
+void queue_machphys_update(unsigned long mfn, unsigned long pfn);
+void xen_l1_entry_update(pte_t *ptr, unsigned long val);
+void xen_l2_entry_update(pmd_t *ptr, unsigned long val);
+void xen_pt_switch(unsigned long ptr);
+void xen_tlb_flush(void);
+void xen_invlpg(unsigned long ptr);
+void xen_pgd_pin(unsigned long ptr);
+void xen_pgd_unpin(unsigned long ptr);
+void xen_pte_pin(unsigned long ptr);
+void xen_pte_unpin(unsigned long ptr);
+void xen_set_ldt(unsigned long ptr, unsigned long bytes);
+void xen_machphys_update(unsigned long mfn, unsigned long pfn);
+
+void _flush_page_update_queue(void);
+static inline int flush_page_update_queue(void)
+{
+ unsigned int idx = mmu_update_queue_idx;
+ if ( idx != 0 ) _flush_page_update_queue();
+ return idx;
+}
+#define xen_flush_page_update_queue() (_flush_page_update_queue())
+#define XEN_flush_page_update_queue() (_flush_page_update_queue())
+void MULTICALL_flush_page_update_queue(void);
+
+#ifdef CONFIG_XEN_PHYSDEV_ACCESS
+/* Allocate a contiguous empty region of low memory. Return virtual start. */
+unsigned long allocate_empty_lowmem_region(unsigned long pages);
+/* Deallocate a contiguous region of low memory. Return it to the allocator. */
+void deallocate_lowmem_region(unsigned long vstart, unsigned long pages);
+#endif
+
+/*
+ * Assembler stubs for hyper-calls.
+ */
+
+static inline int
+HYPERVISOR_set_trap_table(
+ trap_info_t *table)
+{
+ int ret;
+ unsigned long ignore;
+
+ __asm__ __volatile__ (
+ TRAP_INSTR
+ : "=a" (ret), "=b" (ignore)
+ : "0" (__HYPERVISOR_set_trap_table), "1" (table)
+ : "memory" );
+
+ return ret;
+}
+
+static inline int
+HYPERVISOR_mmu_update(
+ mmu_update_t *req, int count, int *success_count)
+{
+ int ret;
+ unsigned long ign1, ign2, ign3;
+
+ __asm__ __volatile__ (
+ TRAP_INSTR
+ : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3)
+ : "0" (__HYPERVISOR_mmu_update), "1" (req), "2" (count),
+ "3" (success_count)
+ : "memory" );
+
+ return ret;
+}
+
+static inline int
+HYPERVISOR_set_gdt(
+ unsigned long *frame_list, int entries)
+{
+ int ret;
+ unsigned long ign1, ign2;
+
+ __asm__ __volatile__ (
+ TRAP_INSTR
+ : "=a" (ret), "=b" (ign1), "=c" (ign2)
+ : "0" (__HYPERVISOR_set_gdt), "1" (frame_list), "2" (entries)
+ : "memory" );
+
+
+ return ret;
+}
+
+static inline int
+HYPERVISOR_stack_switch(
+ unsigned long ss, unsigned long esp)
+{
+ int ret;
+ unsigned long ign1, ign2;
+
+ __asm__ __volatile__ (
+ TRAP_INSTR
+ : "=a" (ret), "=b" (ign1), "=c" (ign2)
+ : "0" (__HYPERVISOR_stack_switch), "1" (ss), "2" (esp)
+ : "memory" );
+
+ return ret;
+}
+
+static inline int
+HYPERVISOR_set_callbacks(
+ unsigned long event_selector, unsigned long event_address,
+ unsigned long failsafe_selector, unsigned long failsafe_address)
+{
+ int ret;
+ unsigned long ign1, ign2, ign3, ign4;
+
+ __asm__ __volatile__ (
+ TRAP_INSTR
+ : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3), "=S" (ign4)
+ : "0" (__HYPERVISOR_set_callbacks), "1" (event_selector),
+ "2" (event_address), "3" (failsafe_selector), "4" (failsafe_address)
+ : "memory" );
+
+ return ret;
+}
+
+static inline int
+HYPERVISOR_fpu_taskswitch(
+ void)
+{
+ int ret;
+ __asm__ __volatile__ (
+ TRAP_INSTR
+ : "=a" (ret) : "0" (__HYPERVISOR_fpu_taskswitch) : "memory" );
+
+ return ret;
+}
+
+static inline int
+HYPERVISOR_yield(
+ void)
+{
+ int ret;
+ unsigned long ign;
+
+ __asm__ __volatile__ (
+ TRAP_INSTR
+ : "=a" (ret), "=b" (ign)
+ : "0" (__HYPERVISOR_sched_op), "1" (SCHEDOP_yield)
+ : "memory" );
+
+ return ret;
+}
+
+static inline int
+HYPERVISOR_block(
+ void)
+{
+ int ret;
+ unsigned long ign1;
+ __asm__ __volatile__ (
+ TRAP_INSTR
+ : "=a" (ret), "=b" (ign1)
+ : "0" (__HYPERVISOR_sched_op), "1" (SCHEDOP_block)
+ : "memory" );
+
+ return ret;
+}
+
+static inline int
+HYPERVISOR_shutdown(
+ void)
+{
+ int ret;
+ unsigned long ign1;
+ __asm__ __volatile__ (
+ TRAP_INSTR
+ : "=a" (ret), "=b" (ign1)
+ : "0" (__HYPERVISOR_sched_op),
+ "1" (SCHEDOP_shutdown | (SHUTDOWN_poweroff << SCHEDOP_reasonshift))
+ : "memory" );
+
+ return ret;
+}
+
+static inline int
+HYPERVISOR_reboot(
+ void)
+{
+ int ret;
+ unsigned long ign1;
+ __asm__ __volatile__ (
+ TRAP_INSTR
+ : "=a" (ret), "=b" (ign1)
+ : "0" (__HYPERVISOR_sched_op),
+ "1" (SCHEDOP_shutdown | (SHUTDOWN_reboot << SCHEDOP_reasonshift))
+ : "memory" );
+
+ return ret;
+}
+
+static inline int
+HYPERVISOR_suspend(
+ unsigned long srec)
+{
+ int ret;
+ unsigned long ign1, ign2;
+
+ /* NB. On suspend, control software expects a suspend record in %esi. */
+ __asm__ __volatile__ (
+ TRAP_INSTR
+ : "=a" (ret), "=b" (ign1), "=S" (ign2)
+ : "0" (__HYPERVISOR_sched_op),
+ "b" (SCHEDOP_shutdown | (SHUTDOWN_suspend << SCHEDOP_reasonshift)),
+ "S" (srec) : "memory");
+
+ return ret;
+}
+
+static inline long
+HYPERVISOR_set_timer_op(
+ u64 timeout)
+{
+ int ret;
+ unsigned long timeout_hi = (unsigned long)(timeout>>32);
+ unsigned long timeout_lo = (unsigned long)timeout;
+ unsigned long ign1, ign2;
+
+ __asm__ __volatile__ (
+ TRAP_INSTR
+ : "=a" (ret), "=b" (ign1), "=c" (ign2)
+ : "0" (__HYPERVISOR_set_timer_op), "b" (timeout_hi), "c" (timeout_lo)
+ : "memory");
+
+ return ret;
+}
+
+static inline int
+HYPERVISOR_dom0_op(
+ dom0_op_t *dom0_op)
+{
+ int ret;
+ unsigned long ign1;
+
+ dom0_op->interface_version = DOM0_INTERFACE_VERSION;
+ __asm__ __volatile__ (
+ TRAP_INSTR
+ : "=a" (ret), "=b" (ign1)
+ : "0" (__HYPERVISOR_dom0_op), "1" (dom0_op)
+ : "memory");
+
+ return ret;
+}
+
+static inline int
+HYPERVISOR_set_debugreg(
+ int reg, unsigned long value)
+{
+ int ret;
+ unsigned long ign1, ign2;
+ __asm__ __volatile__ (
+ TRAP_INSTR
+ : "=a" (ret), "=b" (ign1), "=c" (ign2)
+ : "0" (__HYPERVISOR_set_debugreg), "1" (reg), "2" (value)
+ : "memory" );
+
+ return ret;
+}
+
+static inline unsigned long
+HYPERVISOR_get_debugreg(
+ int reg)
+{
+ unsigned long ret;
+ unsigned long ign;
+ __asm__ __volatile__ (
+ TRAP_INSTR
+ : "=a" (ret), "=b" (ign)
+ : "0" (__HYPERVISOR_get_debugreg), "1" (reg)
+ : "memory" );
+
+ return ret;
+}
+
+static inline int
+HYPERVISOR_update_descriptor(
+ unsigned long ma, unsigned long word1, unsigned long word2)
+{
+ int ret;
+ unsigned long ign1, ign2, ign3;
+
+ __asm__ __volatile__ (
+ TRAP_INSTR
+ : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3)
+ : "0" (__HYPERVISOR_update_descriptor), "1" (ma), "2" (word1),
+ "3" (word2)
+ : "memory" );
+
+ return ret;
+}
+
+static inline int
+HYPERVISOR_set_fast_trap(
+ int idx)
+{
+ int ret;
+ unsigned long ign;
+
+ __asm__ __volatile__ (
+ TRAP_INSTR
+ : "=a" (ret), "=b" (ign)
+ : "0" (__HYPERVISOR_set_fast_trap), "1" (idx)
+ : "memory" );
+
+ return ret;
+}
+
+static inline int
+HYPERVISOR_dom_mem_op(
+ unsigned int op, unsigned long *extent_list,
+ unsigned long nr_extents, unsigned int extent_order)
+{
+ int ret;
+ unsigned long ign1, ign2, ign3, ign4, ign5;
+
+ __asm__ __volatile__ (
+ TRAP_INSTR
+ : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3), "=S" (ign4),
+ "=D" (ign5)
+ : "0" (__HYPERVISOR_dom_mem_op), "1" (op), "2" (extent_list),
+ "3" (nr_extents), "4" (extent_order), "5" (DOMID_SELF)
+ : "memory" );
+
+ return ret;
+}
+
+static inline int
+HYPERVISOR_multicall(
+ void *call_list, int nr_calls)
+{
+ int ret;
+ unsigned long ign1, ign2;
+
+ __asm__ __volatile__ (
+ TRAP_INSTR
+ : "=a" (ret), "=b" (ign1), "=c" (ign2)
+ : "0" (__HYPERVISOR_multicall), "1" (call_list), "2" (nr_calls)
+ : "memory" );
+
+ return ret;
+}
+
+static inline int
+HYPERVISOR_update_va_mapping(
+ unsigned long page_nr, pte_t new_val, unsigned long flags)
+{
+ int ret;
+ unsigned long ign1, ign2, ign3;
+
+ __asm__ __volatile__ (
+ TRAP_INSTR
+ : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3)
+ : "0" (__HYPERVISOR_update_va_mapping),
+ "1" (page_nr), "2" ((new_val).pte_low), "3" (flags)
+ : "memory" );
+
+ if ( unlikely(ret < 0) )
+ {
+ printk(KERN_ALERT "Failed update VA mapping: %08lx, %08lx, %08lx\n",
+ page_nr, (new_val).pte_low, flags);
+ BUG();
+ }
+
+ return ret;
+}
+
+static inline int
+HYPERVISOR_event_channel_op(
+ void *op)
+{
+ int ret;
+ unsigned long ignore;
+ __asm__ __volatile__ (
+ TRAP_INSTR
+ : "=a" (ret), "=b" (ignore)
+ : "0" (__HYPERVISOR_event_channel_op), "1" (op)
+ : "memory" );
+
+ return ret;
+}
+
+static inline int
+HYPERVISOR_xen_version(
+ int cmd)
+{
+ int ret;
+ unsigned long ignore;
+
+ __asm__ __volatile__ (
+ TRAP_INSTR
+ : "=a" (ret), "=b" (ignore)
+ : "0" (__HYPERVISOR_xen_version), "1" (cmd)
+ : "memory" );
+
+ return ret;
+}
+
+static inline int
+HYPERVISOR_console_io(
+ int cmd, int count, char *str)
+{
+ int ret;
+ unsigned long ign1, ign2, ign3;
+ __asm__ __volatile__ (
+ TRAP_INSTR
+ : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3)
+ : "0" (__HYPERVISOR_console_io), "1" (cmd), "2" (count), "3" (str)
+ : "memory" );
+
+ return ret;
+}
+
+static inline int
+HYPERVISOR_physdev_op(
+ void *physdev_op)
+{
+ int ret;
+ unsigned long ign;
+
+ __asm__ __volatile__ (
+ TRAP_INSTR
+ : "=a" (ret), "=b" (ign)
+ : "0" (__HYPERVISOR_physdev_op), "1" (physdev_op)
+ : "memory" );
+
+ return ret;
+}
+
+static inline int
+HYPERVISOR_grant_table_op(
+ unsigned int cmd, void *uop, unsigned int count)
+{
+ int ret;
+ unsigned long ign1, ign2, ign3;
+
+ __asm__ __volatile__ (
+ TRAP_INSTR
+ : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3)
+ : "0" (__HYPERVISOR_grant_table_op), "1" (cmd), "2" (count), "3" (uop)
+ : "memory" );
+
+ return ret;
+}
+
+static inline int
+HYPERVISOR_update_va_mapping_otherdomain(
+ unsigned long page_nr, pte_t new_val, unsigned long flags, domid_t domid)
+{
+ int ret;
+ unsigned long ign1, ign2, ign3, ign4;
+
+ __asm__ __volatile__ (
+ TRAP_INSTR
+ : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3), "=S" (ign4)
+ : "0" (__HYPERVISOR_update_va_mapping_otherdomain),
+ "1" (page_nr), "2" ((new_val).pte_low), "3" (flags), "4" (domid) :
+ "memory" );
+
+ return ret;
+}
+
+static inline int
+HYPERVISOR_vm_assist(
+ unsigned int cmd, unsigned int type)
+{
+ int ret;
+ unsigned long ign1, ign2;
+
+ __asm__ __volatile__ (
+ TRAP_INSTR
+ : "=a" (ret), "=b" (ign1), "=c" (ign2)
+ : "0" (__HYPERVISOR_vm_assist), "1" (cmd), "2" (type)
+ : "memory" );
+
+ return ret;
+}
+
+#endif /* __HYPERVISOR_H__ */