direct-io.hg
changeset 6513:1ae656509f02
Merge.
line diff
38.1 --- a/linux-2.6-xen-sparse/arch/xen/configs/xen0_defconfig_x86_32 Tue Aug 16 12:15:23 2005 +0800 38.2 +++ b/linux-2.6-xen-sparse/arch/xen/configs/xen0_defconfig_x86_32 Tue Aug 16 10:09:07 2005 -0800 38.3 @@ -130,6 +130,7 @@ CONFIG_PREEMPT_BKL=y 38.4 # CONFIG_X86_REBOOTFIXUPS is not set 38.5 CONFIG_MICROCODE=y 38.6 CONFIG_X86_CPUID=y 38.7 +CONFIG_SWIOTLB=y 38.8 38.9 # 38.10 # Firmware Drivers
39.1 --- a/linux-2.6-xen-sparse/arch/xen/configs/xen0_defconfig_x86_64 Tue Aug 16 12:15:23 2005 +0800 39.2 +++ b/linux-2.6-xen-sparse/arch/xen/configs/xen0_defconfig_x86_64 Tue Aug 16 10:09:07 2005 -0800 39.3 @@ -1,7 +1,7 @@ 39.4 # 39.5 # Automatically generated make config: don't edit 39.6 -# Linux kernel version: 2.6.12.3-xen0 39.7 -# Mon Aug 15 11:36:25 2005 39.8 +# Linux kernel version: 2.6.12.4-xen0 39.9 +# Mon Aug 15 18:57:19 2005 39.10 # 39.11 CONFIG_XEN=y 39.12 CONFIG_ARCH_XEN=y 39.13 @@ -52,6 +52,7 @@ CONFIG_KOBJECT_UEVENT=y 39.14 # CONFIG_IKCONFIG is not set 39.15 # CONFIG_EMBEDDED is not set 39.16 CONFIG_KALLSYMS=y 39.17 +# CONFIG_KALLSYMS_ALL is not set 39.18 # CONFIG_KALLSYMS_EXTRA_PASS is not set 39.19 CONFIG_PRINTK=y 39.20 CONFIG_BUG=y 39.21 @@ -122,6 +123,7 @@ CONFIG_X86_XEN_GENAPIC=y 39.22 # CONFIG_X86_MSR is not set 39.23 # CONFIG_GART_IOMMU is not set 39.24 CONFIG_DUMMY_IOMMU=y 39.25 +CONFIG_SWIOTLB=y 39.26 # CONFIG_X86_MCE is not set 39.27 39.28 # 39.29 @@ -163,6 +165,7 @@ CONFIG_BINFMT_MISC=y 39.30 CONFIG_STANDALONE=y 39.31 # CONFIG_PREVENT_FIRMWARE_BUILD is not set 39.32 # CONFIG_FW_LOADER is not set 39.33 +# CONFIG_DEBUG_DRIVER is not set 39.34 39.35 # 39.36 # Memory Technology Devices (MTD) 39.37 @@ -1060,7 +1063,22 @@ CONFIG_ZLIB_INFLATE=y 39.38 # Kernel hacking 39.39 # 39.40 # CONFIG_PRINTK_TIME is not set 39.41 -# CONFIG_DEBUG_KERNEL is not set 39.42 -CONFIG_LOG_BUF_SHIFT=14 39.43 +CONFIG_DEBUG_KERNEL=y 39.44 +CONFIG_MAGIC_SYSRQ=y 39.45 +CONFIG_LOG_BUF_SHIFT=15 39.46 +# CONFIG_SCHEDSTATS is not set 39.47 +# CONFIG_DEBUG_SLAB is not set 39.48 +# CONFIG_DEBUG_SPINLOCK is not set 39.49 +# CONFIG_DEBUG_SPINLOCK_SLEEP is not set 39.50 +# CONFIG_DEBUG_KOBJECT is not set 39.51 +# CONFIG_DEBUG_INFO is not set 39.52 +# CONFIG_DEBUG_FS is not set 39.53 +# CONFIG_DEBUG_STACKOVERFLOW is not set 39.54 +# CONFIG_KPROBES is not set 39.55 +# CONFIG_DEBUG_STACK_USAGE is not set 39.56 +# CONFIG_DEBUG_PAGEALLOC is not set 39.57 +# CONFIG_4KSTACKS is not set 39.58 CONFIG_X86_FIND_SMP_CONFIG=y 39.59 CONFIG_X86_MPPARSE=y 39.60 +# CONFIG_CHECKING is not set 39.61 +# CONFIG_INIT_DEBUG is not set
41.1 --- a/linux-2.6-xen-sparse/arch/xen/configs/xenU_defconfig_x86_64 Tue Aug 16 12:15:23 2005 +0800 41.2 +++ b/linux-2.6-xen-sparse/arch/xen/configs/xenU_defconfig_x86_64 Tue Aug 16 10:09:07 2005 -0800 41.3 @@ -1,7 +1,7 @@ 41.4 # 41.5 # Automatically generated make config: don't edit 41.6 -# Linux kernel version: 2.6.12-xenU 41.7 -# Tue Aug 2 23:56:13 2005 41.8 +# Linux kernel version: 2.6.12.4-xenU 41.9 +# Mon Aug 15 19:25:22 2005 41.10 # 41.11 CONFIG_XEN=y 41.12 CONFIG_ARCH_XEN=y 41.13 @@ -30,7 +30,7 @@ CONFIG_HAVE_ARCH_DEV_ALLOC_SKB=y 41.14 # 41.15 CONFIG_EXPERIMENTAL=y 41.16 CONFIG_CLEAN_COMPILE=y 41.17 -CONFIG_BROKEN_ON_SMP=y 41.18 +CONFIG_LOCK_KERNEL=y 41.19 CONFIG_INIT_ENV_ARG_LIMIT=32 41.20 41.21 # 41.22 @@ -48,8 +48,10 @@ CONFIG_AUDITSYSCALL=y 41.23 CONFIG_HOTPLUG=y 41.24 CONFIG_KOBJECT_UEVENT=y 41.25 # CONFIG_IKCONFIG is not set 41.26 +# CONFIG_CPUSETS is not set 41.27 # CONFIG_EMBEDDED is not set 41.28 CONFIG_KALLSYMS=y 41.29 +# CONFIG_KALLSYMS_ALL is not set 41.30 CONFIG_KALLSYMS_EXTRA_PASS=y 41.31 CONFIG_PRINTK=y 41.32 CONFIG_BUG=y 41.33 @@ -74,6 +76,7 @@ CONFIG_OBSOLETE_MODPARM=y 41.34 CONFIG_MODVERSIONS=y 41.35 # CONFIG_MODULE_SRCVERSION_ALL is not set 41.36 CONFIG_KMOD=y 41.37 +CONFIG_STOP_MACHINE=y 41.38 CONFIG_XENARCH="x86_64" 41.39 CONFIG_X86=y 41.40 CONFIG_MMU=y 41.41 @@ -86,12 +89,15 @@ CONFIG_RWSEM_GENERIC_SPINLOCK=y 41.42 CONFIG_GENERIC_CALIBRATE_DELAY=y 41.43 CONFIG_X86_GOOD_APIC=y 41.44 # CONFIG_HPET_TIMER is not set 41.45 -# CONFIG_SMP is not set 41.46 +CONFIG_SMP=y 41.47 +CONFIG_NR_CPUS=8 41.48 +# CONFIG_SCHED_SMT is not set 41.49 # CONFIG_PREEMPT is not set 41.50 # CONFIG_MICROCODE is not set 41.51 CONFIG_X86_CPUID=y 41.52 # CONFIG_NUMA is not set 41.53 # CONFIG_MTRR is not set 41.54 +CONFIG_HAVE_DEC_LOCK=y 41.55 # CONFIG_X86_LOCAL_APIC is not set 41.56 # CONFIG_X86_IO_APIC is not set 41.57 # CONFIG_PCI is not set 41.58 @@ -114,7 +120,11 @@ CONFIG_MPSC=y 41.59 # CONFIG_GENERIC_CPU is not set 41.60 CONFIG_X86_L1_CACHE_BYTES=128 41.61 # CONFIG_X86_TSC is not set 41.62 +CONFIG_X86_XEN_GENAPIC=y 41.63 # CONFIG_X86_MSR is not set 41.64 +CONFIG_X86_HT=y 41.65 +# CONFIG_K8_NUMA is not set 41.66 +# CONFIG_NUMA_EMU is not set 41.67 CONFIG_DUMMY_IOMMU=y 41.68 # CONFIG_X86_MCE is not set 41.69 41.70 @@ -157,6 +167,7 @@ CONFIG_BINFMT_MISC=y 41.71 CONFIG_STANDALONE=y 41.72 CONFIG_PREVENT_FIRMWARE_BUILD=y 41.73 CONFIG_FW_LOADER=y 41.74 +# CONFIG_DEBUG_DRIVER is not set 41.75 41.76 # 41.77 # Block devices 41.78 @@ -559,7 +570,6 @@ CONFIG_ACT200L_DONGLE=m 41.79 # 41.80 # Old SIR device drivers 41.81 # 41.82 -# CONFIG_IRPORT_SIR is not set 41.83 41.84 # 41.85 # Old Serial dongle support 41.86 @@ -861,17 +871,7 @@ CONFIG_NLS_UTF8=m 41.87 # Security options 41.88 # 41.89 # CONFIG_KEYS is not set 41.90 -CONFIG_SECURITY=y 41.91 -CONFIG_SECURITY_NETWORK=y 41.92 -CONFIG_SECURITY_CAPABILITIES=y 41.93 -# CONFIG_SECURITY_SECLVL is not set 41.94 -CONFIG_SECURITY_SELINUX=y 41.95 -CONFIG_SECURITY_SELINUX_BOOTPARAM=y 41.96 -CONFIG_SECURITY_SELINUX_BOOTPARAM_VALUE=1 41.97 -CONFIG_SECURITY_SELINUX_DISABLE=y 41.98 -CONFIG_SECURITY_SELINUX_DEVELOP=y 41.99 -CONFIG_SECURITY_SELINUX_AVC_STATS=y 41.100 -CONFIG_SECURITY_SELINUX_CHECKREQPROT_VALUE=1 41.101 +# CONFIG_SECURITY is not set 41.102 41.103 # 41.104 # Cryptographic options 41.105 @@ -919,5 +919,19 @@ CONFIG_ZLIB_DEFLATE=m 41.106 # Kernel hacking 41.107 # 41.108 # CONFIG_PRINTK_TIME is not set 41.109 -# CONFIG_DEBUG_KERNEL is not set 41.110 -CONFIG_LOG_BUF_SHIFT=14 41.111 +CONFIG_DEBUG_KERNEL=y 41.112 +CONFIG_MAGIC_SYSRQ=y 41.113 +CONFIG_LOG_BUF_SHIFT=15 41.114 +# CONFIG_SCHEDSTATS is not set 41.115 +# CONFIG_DEBUG_SLAB is not set 41.116 +# CONFIG_DEBUG_SPINLOCK is not set 41.117 +# CONFIG_DEBUG_SPINLOCK_SLEEP is not set 41.118 +# CONFIG_DEBUG_KOBJECT is not set 41.119 +# CONFIG_DEBUG_INFO is not set 41.120 +# CONFIG_DEBUG_FS is not set 41.121 +# CONFIG_DEBUG_STACKOVERFLOW is not set 41.122 +# CONFIG_KPROBES is not set 41.123 +# CONFIG_DEBUG_STACK_USAGE is not set 41.124 +# CONFIG_DEBUG_PAGEALLOC is not set 41.125 +# CONFIG_4KSTACKS is not set 41.126 +# CONFIG_INIT_DEBUG is not set
42.1 --- a/linux-2.6-xen-sparse/arch/xen/configs/xen_defconfig_x86_32 Tue Aug 16 12:15:23 2005 +0800 42.2 +++ b/linux-2.6-xen-sparse/arch/xen/configs/xen_defconfig_x86_32 Tue Aug 16 10:09:07 2005 -0800 42.3 @@ -137,6 +137,7 @@ CONFIG_PREEMPT_BKL=y 42.4 # CONFIG_X86_REBOOTFIXUPS is not set 42.5 CONFIG_MICROCODE=m 42.6 CONFIG_X86_CPUID=m 42.7 +CONFIG_SWIOTLB=y 42.8 42.9 # 42.10 # Firmware Drivers
43.1 --- a/linux-2.6-xen-sparse/arch/xen/configs/xen_defconfig_x86_64 Tue Aug 16 12:15:23 2005 +0800 43.2 +++ b/linux-2.6-xen-sparse/arch/xen/configs/xen_defconfig_x86_64 Tue Aug 16 10:09:07 2005 -0800 43.3 @@ -1,7 +1,7 @@ 43.4 # 43.5 # Automatically generated make config: don't edit 43.6 -# Linux kernel version: 2.6.12.3-xen0 43.7 -# Mon Aug 15 19:46:39 2005 43.8 +# Linux kernel version: 2.6.12.4-xen 43.9 +# Mon Aug 15 19:54:11 2005 43.10 # 43.11 CONFIG_XEN=y 43.12 CONFIG_ARCH_XEN=y 43.13 @@ -35,6 +35,7 @@ CONFIG_EXPERIMENTAL=y 43.14 # CONFIG_CLEAN_COMPILE is not set 43.15 CONFIG_BROKEN=y 43.16 CONFIG_BROKEN_ON_SMP=y 43.17 +CONFIG_LOCK_KERNEL=y 43.18 CONFIG_INIT_ENV_ARG_LIMIT=32 43.19 43.20 # 43.21 @@ -50,8 +51,10 @@ CONFIG_SYSCTL=y 43.22 CONFIG_HOTPLUG=y 43.23 CONFIG_KOBJECT_UEVENT=y 43.24 # CONFIG_IKCONFIG is not set 43.25 +# CONFIG_CPUSETS is not set 43.26 # CONFIG_EMBEDDED is not set 43.27 CONFIG_KALLSYMS=y 43.28 +# CONFIG_KALLSYMS_ALL is not set 43.29 CONFIG_KALLSYMS_EXTRA_PASS=y 43.30 CONFIG_PRINTK=y 43.31 CONFIG_BUG=y 43.32 @@ -76,6 +79,7 @@ CONFIG_OBSOLETE_MODPARM=y 43.33 # CONFIG_MODVERSIONS is not set 43.34 CONFIG_MODULE_SRCVERSION_ALL=y 43.35 CONFIG_KMOD=y 43.36 +CONFIG_STOP_MACHINE=y 43.37 CONFIG_XENARCH="x86_64" 43.38 CONFIG_X86=y 43.39 CONFIG_MMU=y 43.40 @@ -88,12 +92,15 @@ CONFIG_RWSEM_GENERIC_SPINLOCK=y 43.41 CONFIG_GENERIC_CALIBRATE_DELAY=y 43.42 CONFIG_X86_GOOD_APIC=y 43.43 # CONFIG_HPET_TIMER is not set 43.44 -# CONFIG_SMP is not set 43.45 +CONFIG_SMP=y 43.46 +CONFIG_NR_CPUS=8 43.47 +# CONFIG_SCHED_SMT is not set 43.48 # CONFIG_PREEMPT is not set 43.49 CONFIG_MICROCODE=y 43.50 # CONFIG_X86_CPUID is not set 43.51 # CONFIG_NUMA is not set 43.52 # CONFIG_MTRR is not set 43.53 +CONFIG_HAVE_DEC_LOCK=y 43.54 CONFIG_X86_LOCAL_APIC=y 43.55 CONFIG_X86_IO_APIC=y 43.56 CONFIG_PCI=y 43.57 @@ -120,8 +127,12 @@ CONFIG_X86_L1_CACHE_BYTES=128 43.58 # CONFIG_X86_TSC is not set 43.59 CONFIG_X86_XEN_GENAPIC=y 43.60 # CONFIG_X86_MSR is not set 43.61 +CONFIG_X86_HT=y 43.62 +# CONFIG_K8_NUMA is not set 43.63 +# CONFIG_NUMA_EMU is not set 43.64 # CONFIG_GART_IOMMU is not set 43.65 CONFIG_DUMMY_IOMMU=y 43.66 +CONFIG_SWIOTLB=y 43.67 # CONFIG_X86_MCE is not set 43.68 43.69 # 43.70 @@ -163,6 +174,7 @@ CONFIG_BINFMT_MISC=y 43.71 CONFIG_STANDALONE=y 43.72 CONFIG_PREVENT_FIRMWARE_BUILD=y 43.73 CONFIG_FW_LOADER=y 43.74 +# CONFIG_DEBUG_DRIVER is not set 43.75 43.76 # 43.77 # Memory Technology Devices (MTD) 43.78 @@ -214,7 +226,6 @@ CONFIG_MTD_RAM=m 43.79 CONFIG_MTD_ROM=m 43.80 CONFIG_MTD_ABSENT=m 43.81 # CONFIG_MTD_OBSOLETE_CHIPS is not set 43.82 -# CONFIG_MTD_XIP is not set 43.83 43.84 # 43.85 # Mapping drivers for chip access 43.86 @@ -2395,7 +2406,21 @@ CONFIG_ZLIB_DEFLATE=m 43.87 # Kernel hacking 43.88 # 43.89 # CONFIG_PRINTK_TIME is not set 43.90 -# CONFIG_DEBUG_KERNEL is not set 43.91 -CONFIG_LOG_BUF_SHIFT=14 43.92 +CONFIG_DEBUG_KERNEL=y 43.93 +CONFIG_MAGIC_SYSRQ=y 43.94 +CONFIG_LOG_BUF_SHIFT=15 43.95 +# CONFIG_SCHEDSTATS is not set 43.96 +# CONFIG_DEBUG_SLAB is not set 43.97 +# CONFIG_DEBUG_SPINLOCK is not set 43.98 +# CONFIG_DEBUG_SPINLOCK_SLEEP is not set 43.99 +# CONFIG_DEBUG_KOBJECT is not set 43.100 +# CONFIG_DEBUG_INFO is not set 43.101 +# CONFIG_DEBUG_FS is not set 43.102 +# CONFIG_DEBUG_STACKOVERFLOW is not set 43.103 +# CONFIG_KPROBES is not set 43.104 +# CONFIG_DEBUG_STACK_USAGE is not set 43.105 +# CONFIG_DEBUG_PAGEALLOC is not set 43.106 +# CONFIG_4KSTACKS is not set 43.107 CONFIG_X86_FIND_SMP_CONFIG=y 43.108 CONFIG_X86_MPPARSE=y 43.109 +# CONFIG_INIT_DEBUG is not set
44.1 --- a/linux-2.6-xen-sparse/arch/xen/i386/Kconfig Tue Aug 16 12:15:23 2005 +0800 44.2 +++ b/linux-2.6-xen-sparse/arch/xen/i386/Kconfig Tue Aug 16 10:09:07 2005 -0800 44.3 @@ -533,6 +533,11 @@ config X86_CPUID 44.4 with major 203 and minors 0 to 31 for /dev/cpu/0/cpuid to 44.5 /dev/cpu/31/cpuid. 44.6 44.7 +config SWIOTLB 44.8 + bool 44.9 + depends on PCI 44.10 + default y 44.11 + 44.12 source "drivers/firmware/Kconfig" 44.13 44.14 choice
45.1 --- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/Makefile Tue Aug 16 12:15:23 2005 +0800 45.2 +++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/Makefile Tue Aug 16 10:09:07 2005 -0800 45.3 @@ -44,6 +44,7 @@ c-obj-$(CONFIG_HPET_TIMER) += time_hpet 45.4 c-obj-$(CONFIG_EFI) += efi.o efi_stub.o 45.5 c-obj-$(CONFIG_EARLY_PRINTK) += early_printk.o 45.6 c-obj-$(CONFIG_SMP_ALTERNATIVES)+= smpalts.o 45.7 +c-obj-$(CONFIG_SWIOTLB) += swiotlb.o 45.8 45.9 EXTRA_AFLAGS := -traditional 45.10
51.1 --- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/pci-dma.c Tue Aug 16 12:15:23 2005 +0800 51.2 +++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/pci-dma.c Tue Aug 16 10:09:07 2005 -0800 51.3 @@ -24,6 +24,103 @@ struct dma_coherent_mem { 51.4 unsigned long *bitmap; 51.5 }; 51.6 51.7 +static void iommu_bug(void) 51.8 +{ 51.9 + printk(KERN_ALERT "Fatal DMA error! Please use 'swiotlb=force'\n"); 51.10 + BUG(); 51.11 +} 51.12 + 51.13 +#define IOMMU_BUG_ON(test) do { if (unlikely(test)) iommu_bug(); } while(0) 51.14 + 51.15 +int 51.16 +dma_map_sg(struct device *hwdev, struct scatterlist *sg, int nents, 51.17 + enum dma_data_direction direction) 51.18 +{ 51.19 + int i, rc; 51.20 + 51.21 + BUG_ON(direction == DMA_NONE); 51.22 + 51.23 + if (swiotlb) { 51.24 + rc = swiotlb_map_sg(hwdev, sg, nents, direction); 51.25 + } else { 51.26 + for (i = 0; i < nents; i++ ) { 51.27 + sg[i].dma_address = 51.28 + page_to_phys(sg[i].page) + sg[i].offset; 51.29 + sg[i].dma_length = sg[i].length; 51.30 + BUG_ON(!sg[i].page); 51.31 + IOMMU_BUG_ON(address_needs_mapping( 51.32 + hwdev, sg[i].dma_address)); 51.33 + } 51.34 + rc = nents; 51.35 + } 51.36 + 51.37 + flush_write_buffers(); 51.38 + return rc; 51.39 +} 51.40 +EXPORT_SYMBOL(dma_map_sg); 51.41 + 51.42 +void 51.43 +dma_unmap_sg(struct device *hwdev, struct scatterlist *sg, int nents, 51.44 + enum dma_data_direction direction) 51.45 +{ 51.46 + BUG_ON(direction == DMA_NONE); 51.47 + if (swiotlb) 51.48 + swiotlb_unmap_sg(hwdev, sg, nents, direction); 51.49 +} 51.50 +EXPORT_SYMBOL(dma_unmap_sg); 51.51 + 51.52 +dma_addr_t 51.53 +dma_map_page(struct device *dev, struct page *page, unsigned long offset, 51.54 + size_t size, enum dma_data_direction direction) 51.55 +{ 51.56 + dma_addr_t dma_addr; 51.57 + 51.58 + BUG_ON(direction == DMA_NONE); 51.59 + 51.60 + if (swiotlb) { 51.61 + dma_addr = swiotlb_map_page( 51.62 + dev, page, offset, size, direction); 51.63 + } else { 51.64 + dma_addr = page_to_phys(page) + offset; 51.65 + IOMMU_BUG_ON(address_needs_mapping(dev, dma_addr)); 51.66 + } 51.67 + 51.68 + return dma_addr; 51.69 +} 51.70 +EXPORT_SYMBOL(dma_map_page); 51.71 + 51.72 +void 51.73 +dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size, 51.74 + enum dma_data_direction direction) 51.75 +{ 51.76 + BUG_ON(direction == DMA_NONE); 51.77 + if (swiotlb) 51.78 + swiotlb_unmap_page(dev, dma_address, size, direction); 51.79 +} 51.80 +EXPORT_SYMBOL(dma_unmap_page); 51.81 + 51.82 +int 51.83 +dma_mapping_error(dma_addr_t dma_addr) 51.84 +{ 51.85 + if (swiotlb) 51.86 + return swiotlb_dma_mapping_error(dma_addr); 51.87 + return 0; 51.88 +} 51.89 +EXPORT_SYMBOL(dma_mapping_error); 51.90 + 51.91 +int 51.92 +dma_supported(struct device *dev, u64 mask) 51.93 +{ 51.94 + if (swiotlb) 51.95 + return swiotlb_dma_supported(dev, mask); 51.96 + /* 51.97 + * By default we'll BUG when an infeasible DMA is requested, and 51.98 + * request swiotlb=force (see IOMMU_BUG_ON). 51.99 + */ 51.100 + return 1; 51.101 +} 51.102 +EXPORT_SYMBOL(dma_supported); 51.103 + 51.104 void *dma_alloc_coherent(struct device *dev, size_t size, 51.105 dma_addr_t *dma_handle, unsigned int __nocast gfp) 51.106 { 51.107 @@ -54,13 +151,14 @@ void *dma_alloc_coherent(struct device * 51.108 ret = (void *)vstart; 51.109 51.110 if (ret != NULL) { 51.111 - xen_contig_memory(vstart, order); 51.112 + xen_create_contiguous_region(vstart, order); 51.113 51.114 memset(ret, 0, size); 51.115 *dma_handle = virt_to_bus(ret); 51.116 } 51.117 return ret; 51.118 } 51.119 +EXPORT_SYMBOL(dma_alloc_coherent); 51.120 51.121 void dma_free_coherent(struct device *dev, size_t size, 51.122 void *vaddr, dma_addr_t dma_handle) 51.123 @@ -72,9 +170,12 @@ void dma_free_coherent(struct device *de 51.124 int page = (vaddr - mem->virt_base) >> PAGE_SHIFT; 51.125 51.126 bitmap_release_region(mem->bitmap, page, order); 51.127 - } else 51.128 + } else { 51.129 + xen_destroy_contiguous_region((unsigned long)vaddr, order); 51.130 free_pages((unsigned long)vaddr, order); 51.131 + } 51.132 } 51.133 +EXPORT_SYMBOL(dma_free_coherent); 51.134 51.135 int dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr, 51.136 dma_addr_t device_addr, size_t size, int flags) 51.137 @@ -153,46 +254,20 @@ void *dma_mark_declared_memory_occupied( 51.138 } 51.139 EXPORT_SYMBOL(dma_mark_declared_memory_occupied); 51.140 51.141 -static LIST_HEAD(dma_map_head); 51.142 -static DEFINE_SPINLOCK(dma_map_lock); 51.143 -struct dma_map_entry { 51.144 - struct list_head list; 51.145 - dma_addr_t dma; 51.146 - char *bounce, *host; 51.147 - size_t size; 51.148 -}; 51.149 -#define DMA_MAP_MATCHES(e,d) (((e)->dma<=(d)) && (((e)->dma+(e)->size)>(d))) 51.150 - 51.151 dma_addr_t 51.152 dma_map_single(struct device *dev, void *ptr, size_t size, 51.153 enum dma_data_direction direction) 51.154 { 51.155 - struct dma_map_entry *ent; 51.156 - void *bnc; 51.157 dma_addr_t dma; 51.158 - unsigned long flags; 51.159 51.160 BUG_ON(direction == DMA_NONE); 51.161 51.162 - /* 51.163 - * Even if size is sub-page, the buffer may still straddle a page 51.164 - * boundary. Take into account buffer start offset. All other calls are 51.165 - * conservative and always search the dma_map list if it's non-empty. 51.166 - */ 51.167 - if ((((unsigned int)ptr & ~PAGE_MASK) + size) <= PAGE_SIZE) { 51.168 - dma = virt_to_bus(ptr); 51.169 + if (swiotlb) { 51.170 + dma = swiotlb_map_single(dev, ptr, size, direction); 51.171 } else { 51.172 - BUG_ON((bnc = dma_alloc_coherent(dev, size, &dma, GFP_ATOMIC)) == NULL); 51.173 - BUG_ON((ent = kmalloc(sizeof(*ent), GFP_ATOMIC)) == NULL); 51.174 - if (direction != DMA_FROM_DEVICE) 51.175 - memcpy(bnc, ptr, size); 51.176 - ent->dma = dma; 51.177 - ent->bounce = bnc; 51.178 - ent->host = ptr; 51.179 - ent->size = size; 51.180 - spin_lock_irqsave(&dma_map_lock, flags); 51.181 - list_add(&ent->list, &dma_map_head); 51.182 - spin_unlock_irqrestore(&dma_map_lock, flags); 51.183 + dma = virt_to_bus(ptr); 51.184 + IOMMU_BUG_ON(range_straddles_page_boundary(ptr, size)); 51.185 + IOMMU_BUG_ON(address_needs_mapping(dev, dma)); 51.186 } 51.187 51.188 flush_write_buffers(); 51.189 @@ -204,30 +279,9 @@ void 51.190 dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size, 51.191 enum dma_data_direction direction) 51.192 { 51.193 - struct dma_map_entry *ent; 51.194 - unsigned long flags; 51.195 - 51.196 BUG_ON(direction == DMA_NONE); 51.197 - 51.198 - /* Fast-path check: are there any multi-page DMA mappings? */ 51.199 - if (!list_empty(&dma_map_head)) { 51.200 - spin_lock_irqsave(&dma_map_lock, flags); 51.201 - list_for_each_entry ( ent, &dma_map_head, list ) { 51.202 - if (DMA_MAP_MATCHES(ent, dma_addr)) { 51.203 - list_del(&ent->list); 51.204 - break; 51.205 - } 51.206 - } 51.207 - spin_unlock_irqrestore(&dma_map_lock, flags); 51.208 - if (&ent->list != &dma_map_head) { 51.209 - BUG_ON(dma_addr != ent->dma); 51.210 - BUG_ON(size != ent->size); 51.211 - if (direction != DMA_TO_DEVICE) 51.212 - memcpy(ent->host, ent->bounce, size); 51.213 - dma_free_coherent(dev, size, ent->bounce, ent->dma); 51.214 - kfree(ent); 51.215 - } 51.216 - } 51.217 + if (swiotlb) 51.218 + swiotlb_unmap_single(dev, dma_addr, size, direction); 51.219 } 51.220 EXPORT_SYMBOL(dma_unmap_single); 51.221 51.222 @@ -235,23 +289,8 @@ void 51.223 dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, size_t size, 51.224 enum dma_data_direction direction) 51.225 { 51.226 - struct dma_map_entry *ent; 51.227 - unsigned long flags, off; 51.228 - 51.229 - /* Fast-path check: are there any multi-page DMA mappings? */ 51.230 - if (!list_empty(&dma_map_head)) { 51.231 - spin_lock_irqsave(&dma_map_lock, flags); 51.232 - list_for_each_entry ( ent, &dma_map_head, list ) 51.233 - if (DMA_MAP_MATCHES(ent, dma_handle)) 51.234 - break; 51.235 - spin_unlock_irqrestore(&dma_map_lock, flags); 51.236 - if (&ent->list != &dma_map_head) { 51.237 - off = dma_handle - ent->dma; 51.238 - BUG_ON((off + size) > ent->size); 51.239 - /*if (direction != DMA_TO_DEVICE)*/ 51.240 - memcpy(ent->host+off, ent->bounce+off, size); 51.241 - } 51.242 - } 51.243 + if (swiotlb) 51.244 + swiotlb_sync_single_for_cpu(dev, dma_handle, size, direction); 51.245 } 51.246 EXPORT_SYMBOL(dma_sync_single_for_cpu); 51.247 51.248 @@ -259,24 +298,17 @@ void 51.249 dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle, size_t size, 51.250 enum dma_data_direction direction) 51.251 { 51.252 - struct dma_map_entry *ent; 51.253 - unsigned long flags, off; 51.254 - 51.255 - /* Fast-path check: are there any multi-page DMA mappings? */ 51.256 - if (!list_empty(&dma_map_head)) { 51.257 - spin_lock_irqsave(&dma_map_lock, flags); 51.258 - list_for_each_entry ( ent, &dma_map_head, list ) 51.259 - if (DMA_MAP_MATCHES(ent, dma_handle)) 51.260 - break; 51.261 - spin_unlock_irqrestore(&dma_map_lock, flags); 51.262 - if (&ent->list != &dma_map_head) { 51.263 - off = dma_handle - ent->dma; 51.264 - BUG_ON((off + size) > ent->size); 51.265 - /*if (direction != DMA_FROM_DEVICE)*/ 51.266 - memcpy(ent->bounce+off, ent->host+off, size); 51.267 - } 51.268 - } 51.269 - 51.270 - flush_write_buffers(); 51.271 + if (swiotlb) 51.272 + swiotlb_sync_single_for_device(dev, dma_handle, size, direction); 51.273 } 51.274 EXPORT_SYMBOL(dma_sync_single_for_device); 51.275 + 51.276 +/* 51.277 + * Local variables: 51.278 + * c-file-style: "linux" 51.279 + * indent-tabs-mode: t 51.280 + * c-indent-level: 8 51.281 + * c-basic-offset: 8 51.282 + * tab-width: 8 51.283 + * End: 51.284 + */
55.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 55.2 +++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/swiotlb.c Tue Aug 16 10:09:07 2005 -0800 55.3 @@ -0,0 +1,653 @@ 55.4 +/* 55.5 + * Dynamic DMA mapping support. 55.6 + * 55.7 + * This implementation is a fallback for platforms that do not support 55.8 + * I/O TLBs (aka DMA address translation hardware). 55.9 + * Copyright (C) 2000 Asit Mallick <Asit.K.Mallick@intel.com> 55.10 + * Copyright (C) 2000 Goutham Rao <goutham.rao@intel.com> 55.11 + * Copyright (C) 2000, 2003 Hewlett-Packard Co 55.12 + * David Mosberger-Tang <davidm@hpl.hp.com> 55.13 + * Copyright (C) 2005 Keir Fraser <keir@xensource.com> 55.14 + */ 55.15 + 55.16 +#include <linux/cache.h> 55.17 +#include <linux/mm.h> 55.18 +#include <linux/module.h> 55.19 +#include <linux/pci.h> 55.20 +#include <linux/spinlock.h> 55.21 +#include <linux/string.h> 55.22 +#include <linux/types.h> 55.23 +#include <linux/ctype.h> 55.24 +#include <linux/init.h> 55.25 +#include <linux/bootmem.h> 55.26 +#include <linux/highmem.h> 55.27 +#include <asm/io.h> 55.28 +#include <asm/pci.h> 55.29 +#include <asm/dma.h> 55.30 + 55.31 +#define OFFSET(val,align) ((unsigned long)((val) & ( (align) - 1))) 55.32 + 55.33 +#define SG_ENT_PHYS_ADDRESS(sg) (page_to_phys((sg)->page) + (sg)->offset) 55.34 + 55.35 +/* 55.36 + * Maximum allowable number of contiguous slabs to map, 55.37 + * must be a power of 2. What is the appropriate value ? 55.38 + * The complexity of {map,unmap}_single is linearly dependent on this value. 55.39 + */ 55.40 +#define IO_TLB_SEGSIZE 128 55.41 + 55.42 +/* 55.43 + * log of the size of each IO TLB slab. The number of slabs is command line 55.44 + * controllable. 55.45 + */ 55.46 +#define IO_TLB_SHIFT 11 55.47 + 55.48 +int swiotlb_force; 55.49 + 55.50 +/* 55.51 + * Used to do a quick range check in swiotlb_unmap_single and 55.52 + * swiotlb_sync_single_*, to see if the memory was in fact allocated by this 55.53 + * API. 55.54 + */ 55.55 +static char *io_tlb_start, *io_tlb_end; 55.56 + 55.57 +/* 55.58 + * The number of IO TLB blocks (in groups of 64) betweeen io_tlb_start and 55.59 + * io_tlb_end. This is command line adjustable via setup_io_tlb_npages. 55.60 + */ 55.61 +static unsigned long io_tlb_nslabs; 55.62 + 55.63 +/* 55.64 + * When the IOMMU overflows we return a fallback buffer. This sets the size. 55.65 + */ 55.66 +static unsigned long io_tlb_overflow = 32*1024; 55.67 + 55.68 +void *io_tlb_overflow_buffer; 55.69 + 55.70 +/* 55.71 + * This is a free list describing the number of free entries available from 55.72 + * each index 55.73 + */ 55.74 +static unsigned int *io_tlb_list; 55.75 +static unsigned int io_tlb_index; 55.76 + 55.77 +/* 55.78 + * We need to save away the original address corresponding to a mapped entry 55.79 + * for the sync operations. 55.80 + */ 55.81 +static struct phys_addr { 55.82 + struct page *page; 55.83 + unsigned int offset; 55.84 +} *io_tlb_orig_addr; 55.85 + 55.86 +/* 55.87 + * Protect the above data structures in the map and unmap calls 55.88 + */ 55.89 +static DEFINE_SPINLOCK(io_tlb_lock); 55.90 + 55.91 +static int __init 55.92 +setup_io_tlb_npages(char *str) 55.93 +{ 55.94 + if (isdigit(*str)) { 55.95 + io_tlb_nslabs = simple_strtoul(str, &str, 0) << 55.96 + (PAGE_SHIFT - IO_TLB_SHIFT); 55.97 + /* avoid tail segment of size < IO_TLB_SEGSIZE */ 55.98 + io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE); 55.99 + } 55.100 + if (*str == ',') 55.101 + ++str; 55.102 + /* 55.103 + * NB. 'force' enables the swiotlb, but doesn't force its use for 55.104 + * every DMA like it does on native Linux. 55.105 + */ 55.106 + if (!strcmp(str, "force")) 55.107 + swiotlb_force = 1; 55.108 + return 1; 55.109 +} 55.110 +__setup("swiotlb=", setup_io_tlb_npages); 55.111 +/* make io_tlb_overflow tunable too? */ 55.112 + 55.113 +/* 55.114 + * Statically reserve bounce buffer space and initialize bounce buffer data 55.115 + * structures for the software IO TLB used to implement the PCI DMA API. 55.116 + */ 55.117 +void 55.118 +swiotlb_init_with_default_size (size_t default_size) 55.119 +{ 55.120 + unsigned long i; 55.121 + 55.122 + if (!io_tlb_nslabs) { 55.123 + io_tlb_nslabs = (default_size >> PAGE_SHIFT); 55.124 + io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE); 55.125 + } 55.126 + 55.127 + /* 55.128 + * Get IO TLB memory from the low pages 55.129 + */ 55.130 + io_tlb_start = alloc_bootmem_low_pages(io_tlb_nslabs * 55.131 + (1 << IO_TLB_SHIFT)); 55.132 + if (!io_tlb_start) 55.133 + panic("Cannot allocate SWIOTLB buffer"); 55.134 + 55.135 + xen_create_contiguous_region( 55.136 + (unsigned long)io_tlb_start, 55.137 + get_order(io_tlb_nslabs * (1 << IO_TLB_SHIFT))); 55.138 + 55.139 + io_tlb_end = io_tlb_start + io_tlb_nslabs * (1 << IO_TLB_SHIFT); 55.140 + 55.141 + /* 55.142 + * Allocate and initialize the free list array. This array is used 55.143 + * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE 55.144 + * between io_tlb_start and io_tlb_end. 55.145 + */ 55.146 + io_tlb_list = alloc_bootmem(io_tlb_nslabs * sizeof(int)); 55.147 + for (i = 0; i < io_tlb_nslabs; i++) 55.148 + io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE); 55.149 + io_tlb_index = 0; 55.150 + io_tlb_orig_addr = alloc_bootmem( 55.151 + io_tlb_nslabs * sizeof(*io_tlb_orig_addr)); 55.152 + 55.153 + /* 55.154 + * Get the overflow emergency buffer 55.155 + */ 55.156 + io_tlb_overflow_buffer = alloc_bootmem_low(io_tlb_overflow); 55.157 + printk(KERN_INFO "Placing software IO TLB between 0x%lx - 0x%lx\n", 55.158 + virt_to_bus(io_tlb_start), virt_to_bus(io_tlb_end-1)); 55.159 +} 55.160 + 55.161 +void 55.162 +swiotlb_init(void) 55.163 +{ 55.164 + /* The user can forcibly enable swiotlb. */ 55.165 + if (swiotlb_force) 55.166 + swiotlb = 1; 55.167 + 55.168 + /* 55.169 + * Otherwise, enable for domain 0 if the machine has 'lots of memory', 55.170 + * which we take to mean more than 2GB. 55.171 + */ 55.172 + if (xen_start_info.flags & SIF_INITDOMAIN) { 55.173 + dom0_op_t op; 55.174 + op.cmd = DOM0_PHYSINFO; 55.175 + if ((HYPERVISOR_dom0_op(&op) == 0) && 55.176 + (op.u.physinfo.total_pages > 0x7ffff)) 55.177 + swiotlb = 1; 55.178 + } 55.179 + 55.180 + if (swiotlb) 55.181 + swiotlb_init_with_default_size(64 * (1<<20)); 55.182 +} 55.183 + 55.184 +static void 55.185 +__sync_single(struct phys_addr buffer, char *dma_addr, size_t size, int dir) 55.186 +{ 55.187 + if (PageHighMem(buffer.page)) { 55.188 + size_t len, bytes; 55.189 + char *dev, *host, *kmp; 55.190 + len = size; 55.191 + while (len != 0) { 55.192 + if (((bytes = len) + buffer.offset) > PAGE_SIZE) 55.193 + bytes = PAGE_SIZE - buffer.offset; 55.194 + kmp = kmap_atomic(buffer.page, KM_SWIOTLB); 55.195 + dev = dma_addr + size - len; 55.196 + host = kmp + buffer.offset; 55.197 + memcpy((dir == DMA_FROM_DEVICE) ? host : dev, 55.198 + (dir == DMA_FROM_DEVICE) ? dev : host, 55.199 + bytes); 55.200 + kunmap_atomic(kmp, KM_SWIOTLB); 55.201 + len -= bytes; 55.202 + buffer.page++; 55.203 + buffer.offset = 0; 55.204 + } 55.205 + } else { 55.206 + char *host = (char *)phys_to_virt( 55.207 + page_to_pseudophys(buffer.page)) + buffer.offset; 55.208 + if (dir == DMA_FROM_DEVICE) 55.209 + memcpy(host, dma_addr, size); 55.210 + else if (dir == DMA_TO_DEVICE) 55.211 + memcpy(dma_addr, host, size); 55.212 + } 55.213 +} 55.214 + 55.215 +/* 55.216 + * Allocates bounce buffer and returns its kernel virtual address. 55.217 + */ 55.218 +static void * 55.219 +map_single(struct device *hwdev, struct phys_addr buffer, size_t size, int dir) 55.220 +{ 55.221 + unsigned long flags; 55.222 + char *dma_addr; 55.223 + unsigned int nslots, stride, index, wrap; 55.224 + int i; 55.225 + 55.226 + /* 55.227 + * For mappings greater than a page, we limit the stride (and 55.228 + * hence alignment) to a page size. 55.229 + */ 55.230 + nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT; 55.231 + if (size > PAGE_SIZE) 55.232 + stride = (1 << (PAGE_SHIFT - IO_TLB_SHIFT)); 55.233 + else 55.234 + stride = 1; 55.235 + 55.236 + BUG_ON(!nslots); 55.237 + 55.238 + /* 55.239 + * Find suitable number of IO TLB entries size that will fit this 55.240 + * request and allocate a buffer from that IO TLB pool. 55.241 + */ 55.242 + spin_lock_irqsave(&io_tlb_lock, flags); 55.243 + { 55.244 + wrap = index = ALIGN(io_tlb_index, stride); 55.245 + 55.246 + if (index >= io_tlb_nslabs) 55.247 + wrap = index = 0; 55.248 + 55.249 + do { 55.250 + /* 55.251 + * If we find a slot that indicates we have 'nslots' 55.252 + * number of contiguous buffers, we allocate the 55.253 + * buffers from that slot and mark the entries as '0' 55.254 + * indicating unavailable. 55.255 + */ 55.256 + if (io_tlb_list[index] >= nslots) { 55.257 + int count = 0; 55.258 + 55.259 + for (i = index; i < (int)(index + nslots); i++) 55.260 + io_tlb_list[i] = 0; 55.261 + for (i = index - 1; 55.262 + (OFFSET(i, IO_TLB_SEGSIZE) != 55.263 + IO_TLB_SEGSIZE -1) && io_tlb_list[i]; 55.264 + i--) 55.265 + io_tlb_list[i] = ++count; 55.266 + dma_addr = io_tlb_start + 55.267 + (index << IO_TLB_SHIFT); 55.268 + 55.269 + /* 55.270 + * Update the indices to avoid searching in 55.271 + * the next round. 55.272 + */ 55.273 + io_tlb_index = 55.274 + ((index + nslots) < io_tlb_nslabs 55.275 + ? (index + nslots) : 0); 55.276 + 55.277 + goto found; 55.278 + } 55.279 + index += stride; 55.280 + if (index >= io_tlb_nslabs) 55.281 + index = 0; 55.282 + } while (index != wrap); 55.283 + 55.284 + spin_unlock_irqrestore(&io_tlb_lock, flags); 55.285 + return NULL; 55.286 + } 55.287 + found: 55.288 + spin_unlock_irqrestore(&io_tlb_lock, flags); 55.289 + 55.290 + /* 55.291 + * Save away the mapping from the original address to the DMA address. 55.292 + * This is needed when we sync the memory. Then we sync the buffer if 55.293 + * needed. 55.294 + */ 55.295 + io_tlb_orig_addr[index] = buffer; 55.296 + if ((dir == DMA_TO_DEVICE) || (dir == DMA_BIDIRECTIONAL)) 55.297 + __sync_single(buffer, dma_addr, size, DMA_TO_DEVICE); 55.298 + 55.299 + return dma_addr; 55.300 +} 55.301 + 55.302 +/* 55.303 + * dma_addr is the kernel virtual address of the bounce buffer to unmap. 55.304 + */ 55.305 +static void 55.306 +unmap_single(struct device *hwdev, char *dma_addr, size_t size, int dir) 55.307 +{ 55.308 + unsigned long flags; 55.309 + int i, count, nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT; 55.310 + int index = (dma_addr - io_tlb_start) >> IO_TLB_SHIFT; 55.311 + struct phys_addr buffer = io_tlb_orig_addr[index]; 55.312 + 55.313 + /* 55.314 + * First, sync the memory before unmapping the entry 55.315 + */ 55.316 + if ((dir == DMA_FROM_DEVICE) || (dir == DMA_BIDIRECTIONAL)) 55.317 + __sync_single(buffer, dma_addr, size, DMA_FROM_DEVICE); 55.318 + 55.319 + /* 55.320 + * Return the buffer to the free list by setting the corresponding 55.321 + * entries to indicate the number of contigous entries available. 55.322 + * While returning the entries to the free list, we merge the entries 55.323 + * with slots below and above the pool being returned. 55.324 + */ 55.325 + spin_lock_irqsave(&io_tlb_lock, flags); 55.326 + { 55.327 + count = ((index + nslots) < ALIGN(index + 1, IO_TLB_SEGSIZE) ? 55.328 + io_tlb_list[index + nslots] : 0); 55.329 + /* 55.330 + * Step 1: return the slots to the free list, merging the 55.331 + * slots with superceeding slots 55.332 + */ 55.333 + for (i = index + nslots - 1; i >= index; i--) 55.334 + io_tlb_list[i] = ++count; 55.335 + /* 55.336 + * Step 2: merge the returned slots with the preceding slots, 55.337 + * if available (non zero) 55.338 + */ 55.339 + for (i = index - 1; 55.340 + (OFFSET(i, IO_TLB_SEGSIZE) != 55.341 + IO_TLB_SEGSIZE -1) && io_tlb_list[i]; 55.342 + i--) 55.343 + io_tlb_list[i] = ++count; 55.344 + } 55.345 + spin_unlock_irqrestore(&io_tlb_lock, flags); 55.346 +} 55.347 + 55.348 +static void 55.349 +sync_single(struct device *hwdev, char *dma_addr, size_t size, int dir) 55.350 +{ 55.351 + int index = (dma_addr - io_tlb_start) >> IO_TLB_SHIFT; 55.352 + struct phys_addr buffer = io_tlb_orig_addr[index]; 55.353 + BUG_ON((dir != DMA_FROM_DEVICE) && (dir != DMA_TO_DEVICE)); 55.354 + __sync_single(buffer, dma_addr, size, dir); 55.355 +} 55.356 + 55.357 +static void 55.358 +swiotlb_full(struct device *dev, size_t size, int dir, int do_panic) 55.359 +{ 55.360 + /* 55.361 + * Ran out of IOMMU space for this operation. This is very bad. 55.362 + * Unfortunately the drivers cannot handle this operation properly. 55.363 + * unless they check for pci_dma_mapping_error (most don't) 55.364 + * When the mapping is small enough return a static buffer to limit 55.365 + * the damage, or panic when the transfer is too big. 55.366 + */ 55.367 + printk(KERN_ERR "PCI-DMA: Out of SW-IOMMU space for %lu bytes at " 55.368 + "device %s\n", (unsigned long)size, dev ? dev->bus_id : "?"); 55.369 + 55.370 + if (size > io_tlb_overflow && do_panic) { 55.371 + if (dir == PCI_DMA_FROMDEVICE || dir == PCI_DMA_BIDIRECTIONAL) 55.372 + panic("PCI-DMA: Memory would be corrupted\n"); 55.373 + if (dir == PCI_DMA_TODEVICE || dir == PCI_DMA_BIDIRECTIONAL) 55.374 + panic("PCI-DMA: Random memory would be DMAed\n"); 55.375 + } 55.376 +} 55.377 + 55.378 +/* 55.379 + * Map a single buffer of the indicated size for DMA in streaming mode. The 55.380 + * PCI address to use is returned. 55.381 + * 55.382 + * Once the device is given the dma address, the device owns this memory until 55.383 + * either swiotlb_unmap_single or swiotlb_dma_sync_single is performed. 55.384 + */ 55.385 +dma_addr_t 55.386 +swiotlb_map_single(struct device *hwdev, void *ptr, size_t size, int dir) 55.387 +{ 55.388 + dma_addr_t dev_addr = virt_to_bus(ptr); 55.389 + void *map; 55.390 + struct phys_addr buffer; 55.391 + 55.392 + BUG_ON(dir == DMA_NONE); 55.393 + 55.394 + /* 55.395 + * If the pointer passed in happens to be in the device's DMA window, 55.396 + * we can safely return the device addr and not worry about bounce 55.397 + * buffering it. 55.398 + */ 55.399 + if (!range_straddles_page_boundary(ptr, size) && 55.400 + !address_needs_mapping(hwdev, dev_addr)) 55.401 + return dev_addr; 55.402 + 55.403 + /* 55.404 + * Oh well, have to allocate and map a bounce buffer. 55.405 + */ 55.406 + buffer.page = virt_to_page(ptr); 55.407 + buffer.offset = (unsigned long)ptr & ~PAGE_MASK; 55.408 + map = map_single(hwdev, buffer, size, dir); 55.409 + if (!map) { 55.410 + swiotlb_full(hwdev, size, dir, 1); 55.411 + map = io_tlb_overflow_buffer; 55.412 + } 55.413 + 55.414 + dev_addr = virt_to_bus(map); 55.415 + 55.416 + /* 55.417 + * Ensure that the address returned is DMA'ble 55.418 + */ 55.419 + if (address_needs_mapping(hwdev, dev_addr)) 55.420 + panic("map_single: bounce buffer is not DMA'ble"); 55.421 + 55.422 + return dev_addr; 55.423 +} 55.424 + 55.425 +/* 55.426 + * Unmap a single streaming mode DMA translation. The dma_addr and size must 55.427 + * match what was provided for in a previous swiotlb_map_single call. All 55.428 + * other usages are undefined. 55.429 + * 55.430 + * After this call, reads by the cpu to the buffer are guaranteed to see 55.431 + * whatever the device wrote there. 55.432 + */ 55.433 +void 55.434 +swiotlb_unmap_single(struct device *hwdev, dma_addr_t dev_addr, size_t size, 55.435 + int dir) 55.436 +{ 55.437 + char *dma_addr = bus_to_virt(dev_addr); 55.438 + 55.439 + BUG_ON(dir == DMA_NONE); 55.440 + if (dma_addr >= io_tlb_start && dma_addr < io_tlb_end) 55.441 + unmap_single(hwdev, dma_addr, size, dir); 55.442 +} 55.443 + 55.444 +/* 55.445 + * Make physical memory consistent for a single streaming mode DMA translation 55.446 + * after a transfer. 55.447 + * 55.448 + * If you perform a swiotlb_map_single() but wish to interrogate the buffer 55.449 + * using the cpu, yet do not wish to teardown the PCI dma mapping, you must 55.450 + * call this function before doing so. At the next point you give the PCI dma 55.451 + * address back to the card, you must first perform a 55.452 + * swiotlb_dma_sync_for_device, and then the device again owns the buffer 55.453 + */ 55.454 +void 55.455 +swiotlb_sync_single_for_cpu(struct device *hwdev, dma_addr_t dev_addr, 55.456 + size_t size, int dir) 55.457 +{ 55.458 + char *dma_addr = bus_to_virt(dev_addr); 55.459 + 55.460 + BUG_ON(dir == DMA_NONE); 55.461 + if (dma_addr >= io_tlb_start && dma_addr < io_tlb_end) 55.462 + sync_single(hwdev, dma_addr, size, dir); 55.463 +} 55.464 + 55.465 +void 55.466 +swiotlb_sync_single_for_device(struct device *hwdev, dma_addr_t dev_addr, 55.467 + size_t size, int dir) 55.468 +{ 55.469 + char *dma_addr = bus_to_virt(dev_addr); 55.470 + 55.471 + BUG_ON(dir == DMA_NONE); 55.472 + if (dma_addr >= io_tlb_start && dma_addr < io_tlb_end) 55.473 + sync_single(hwdev, dma_addr, size, dir); 55.474 +} 55.475 + 55.476 +/* 55.477 + * Map a set of buffers described by scatterlist in streaming mode for DMA. 55.478 + * This is the scatter-gather version of the above swiotlb_map_single 55.479 + * interface. Here the scatter gather list elements are each tagged with the 55.480 + * appropriate dma address and length. They are obtained via 55.481 + * sg_dma_{address,length}(SG). 55.482 + * 55.483 + * NOTE: An implementation may be able to use a smaller number of 55.484 + * DMA address/length pairs than there are SG table elements. 55.485 + * (for example via virtual mapping capabilities) 55.486 + * The routine returns the number of addr/length pairs actually 55.487 + * used, at most nents. 55.488 + * 55.489 + * Device ownership issues as mentioned above for swiotlb_map_single are the 55.490 + * same here. 55.491 + */ 55.492 +int 55.493 +swiotlb_map_sg(struct device *hwdev, struct scatterlist *sg, int nelems, 55.494 + int dir) 55.495 +{ 55.496 + struct phys_addr buffer; 55.497 + dma_addr_t dev_addr; 55.498 + char *map; 55.499 + int i; 55.500 + 55.501 + BUG_ON(dir == DMA_NONE); 55.502 + 55.503 + for (i = 0; i < nelems; i++, sg++) { 55.504 + dev_addr = SG_ENT_PHYS_ADDRESS(sg); 55.505 + if (address_needs_mapping(hwdev, dev_addr)) { 55.506 + buffer.page = sg->page; 55.507 + buffer.offset = sg->offset; 55.508 + map = map_single(hwdev, buffer, sg->length, dir); 55.509 + if (!map) { 55.510 + /* Don't panic here, we expect map_sg users 55.511 + to do proper error handling. */ 55.512 + swiotlb_full(hwdev, sg->length, dir, 0); 55.513 + swiotlb_unmap_sg(hwdev, sg - i, i, dir); 55.514 + sg[0].dma_length = 0; 55.515 + return 0; 55.516 + } 55.517 + sg->dma_address = (dma_addr_t)virt_to_bus(map); 55.518 + } else 55.519 + sg->dma_address = dev_addr; 55.520 + sg->dma_length = sg->length; 55.521 + } 55.522 + return nelems; 55.523 +} 55.524 + 55.525 +/* 55.526 + * Unmap a set of streaming mode DMA translations. Again, cpu read rules 55.527 + * concerning calls here are the same as for swiotlb_unmap_single() above. 55.528 + */ 55.529 +void 55.530 +swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sg, int nelems, 55.531 + int dir) 55.532 +{ 55.533 + int i; 55.534 + 55.535 + BUG_ON(dir == DMA_NONE); 55.536 + 55.537 + for (i = 0; i < nelems; i++, sg++) 55.538 + if (sg->dma_address != SG_ENT_PHYS_ADDRESS(sg)) 55.539 + unmap_single(hwdev, 55.540 + (void *)bus_to_virt(sg->dma_address), 55.541 + sg->dma_length, dir); 55.542 +} 55.543 + 55.544 +/* 55.545 + * Make physical memory consistent for a set of streaming mode DMA translations 55.546 + * after a transfer. 55.547 + * 55.548 + * The same as swiotlb_sync_single_* but for a scatter-gather list, same rules 55.549 + * and usage. 55.550 + */ 55.551 +void 55.552 +swiotlb_sync_sg_for_cpu(struct device *hwdev, struct scatterlist *sg, 55.553 + int nelems, int dir) 55.554 +{ 55.555 + int i; 55.556 + 55.557 + BUG_ON(dir == DMA_NONE); 55.558 + 55.559 + for (i = 0; i < nelems; i++, sg++) 55.560 + if (sg->dma_address != SG_ENT_PHYS_ADDRESS(sg)) 55.561 + sync_single(hwdev, 55.562 + (void *)bus_to_virt(sg->dma_address), 55.563 + sg->dma_length, dir); 55.564 +} 55.565 + 55.566 +void 55.567 +swiotlb_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg, 55.568 + int nelems, int dir) 55.569 +{ 55.570 + int i; 55.571 + 55.572 + BUG_ON(dir == DMA_NONE); 55.573 + 55.574 + for (i = 0; i < nelems; i++, sg++) 55.575 + if (sg->dma_address != SG_ENT_PHYS_ADDRESS(sg)) 55.576 + sync_single(hwdev, 55.577 + (void *)bus_to_virt(sg->dma_address), 55.578 + sg->dma_length, dir); 55.579 +} 55.580 + 55.581 +dma_addr_t 55.582 +swiotlb_map_page(struct device *hwdev, struct page *page, 55.583 + unsigned long offset, size_t size, 55.584 + enum dma_data_direction direction) 55.585 +{ 55.586 + struct phys_addr buffer; 55.587 + dma_addr_t dev_addr; 55.588 + char *map; 55.589 + 55.590 + dev_addr = page_to_phys(page) + offset; 55.591 + if (address_needs_mapping(hwdev, dev_addr)) { 55.592 + buffer.page = page; 55.593 + buffer.offset = offset; 55.594 + map = map_single(hwdev, buffer, size, direction); 55.595 + if (!map) { 55.596 + swiotlb_full(hwdev, size, direction, 1); 55.597 + map = io_tlb_overflow_buffer; 55.598 + } 55.599 + dev_addr = (dma_addr_t)virt_to_bus(map); 55.600 + } 55.601 + 55.602 + return dev_addr; 55.603 +} 55.604 + 55.605 +void 55.606 +swiotlb_unmap_page(struct device *hwdev, dma_addr_t dma_address, 55.607 + size_t size, enum dma_data_direction direction) 55.608 +{ 55.609 + char *dma_addr = bus_to_virt(dma_address); 55.610 + 55.611 + BUG_ON(direction == DMA_NONE); 55.612 + if (dma_addr >= io_tlb_start && dma_addr < io_tlb_end) 55.613 + unmap_single(hwdev, dma_addr, size, direction); 55.614 +} 55.615 + 55.616 +int 55.617 +swiotlb_dma_mapping_error(dma_addr_t dma_addr) 55.618 +{ 55.619 + return (dma_addr == virt_to_bus(io_tlb_overflow_buffer)); 55.620 +} 55.621 + 55.622 +/* 55.623 + * Return whether the given PCI device DMA address mask can be supported 55.624 + * properly. For example, if your device can only drive the low 24-bits 55.625 + * during PCI bus mastering, then you would pass 0x00ffffff as the mask to 55.626 + * this function. 55.627 + */ 55.628 +int 55.629 +swiotlb_dma_supported (struct device *hwdev, u64 mask) 55.630 +{ 55.631 + return (mask >= 0xffffffffUL); 55.632 +} 55.633 + 55.634 +EXPORT_SYMBOL(swiotlb_init); 55.635 +EXPORT_SYMBOL(swiotlb_map_single); 55.636 +EXPORT_SYMBOL(swiotlb_unmap_single); 55.637 +EXPORT_SYMBOL(swiotlb_map_sg); 55.638 +EXPORT_SYMBOL(swiotlb_unmap_sg); 55.639 +EXPORT_SYMBOL(swiotlb_sync_single_for_cpu); 55.640 +EXPORT_SYMBOL(swiotlb_sync_single_for_device); 55.641 +EXPORT_SYMBOL(swiotlb_sync_sg_for_cpu); 55.642 +EXPORT_SYMBOL(swiotlb_sync_sg_for_device); 55.643 +EXPORT_SYMBOL(swiotlb_map_page); 55.644 +EXPORT_SYMBOL(swiotlb_unmap_page); 55.645 +EXPORT_SYMBOL(swiotlb_dma_mapping_error); 55.646 +EXPORT_SYMBOL(swiotlb_dma_supported); 55.647 + 55.648 +/* 55.649 + * Local variables: 55.650 + * c-file-style: "linux" 55.651 + * indent-tabs-mode: t 55.652 + * c-indent-level: 8 55.653 + * c-basic-offset: 8 55.654 + * tab-width: 8 55.655 + * End: 55.656 + */
56.1 --- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/time.c Tue Aug 16 12:15:23 2005 +0800 56.2 +++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/time.c Tue Aug 16 10:09:07 2005 -0800 56.3 @@ -540,17 +540,14 @@ unsigned long profile_pc(struct pt_regs 56.4 EXPORT_SYMBOL(profile_pc); 56.5 #endif 56.6 56.7 -/* 56.8 - * timer_interrupt() needs to keep up the real-time clock, 56.9 - * as well as call the "do_timer()" routine every clocktick 56.10 - */ 56.11 -static inline void do_timer_interrupt(int irq, void *dev_id, 56.12 - struct pt_regs *regs) 56.13 +irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) 56.14 { 56.15 s64 delta, delta_cpu; 56.16 int cpu = smp_processor_id(); 56.17 struct shadow_time_info *shadow = &per_cpu(shadow_time, cpu); 56.18 56.19 + write_seqlock(&xtime_lock); 56.20 + 56.21 do { 56.22 get_time_values_from_xen(); 56.23 56.24 @@ -572,7 +569,6 @@ static inline void do_timer_interrupt(in 56.25 for (cpu = 0; cpu < num_online_cpus(); cpu++) 56.26 printk(" %d: %lld\n", cpu, 56.27 per_cpu(processed_system_time, cpu)); 56.28 - return; 56.29 } 56.30 56.31 /* System-wide jiffy work. */ 56.32 @@ -582,7 +578,18 @@ static inline void do_timer_interrupt(in 56.33 do_timer(regs); 56.34 } 56.35 56.36 - /* Local CPU jiffy work. */ 56.37 + if (shadow_tv_version != HYPERVISOR_shared_info->wc_version) { 56.38 + update_wallclock(); 56.39 + clock_was_set(); 56.40 + } 56.41 + 56.42 + write_sequnlock(&xtime_lock); 56.43 + 56.44 + /* 56.45 + * Local CPU jiffy work. No need to hold xtime_lock, and I'm not sure 56.46 + * if there is risk of deadlock if we do (since update_process_times 56.47 + * may do scheduler rebalancing work and thus acquire runqueue locks). 56.48 + */ 56.49 while (delta_cpu >= NS_PER_TICK) { 56.50 delta_cpu -= NS_PER_TICK; 56.51 per_cpu(processed_system_time, cpu) += NS_PER_TICK; 56.52 @@ -590,29 +597,6 @@ static inline void do_timer_interrupt(in 56.53 profile_tick(CPU_PROFILING, regs); 56.54 } 56.55 56.56 - if (shadow_tv_version != HYPERVISOR_shared_info->wc_version) { 56.57 - update_wallclock(); 56.58 - clock_was_set(); 56.59 - } 56.60 -} 56.61 - 56.62 -/* 56.63 - * This is the same as the above, except we _also_ save the current 56.64 - * Time Stamp Counter value at the time of the timer interrupt, so that 56.65 - * we later on can estimate the time of day more exactly. 56.66 - */ 56.67 -irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) 56.68 -{ 56.69 - /* 56.70 - * Here we are in the timer irq handler. We just have irqs locally 56.71 - * disabled but we don't know if the timer_bh is running on the other 56.72 - * CPU. We need to avoid to SMP race with it. NOTE: we don' t need 56.73 - * the irq version of write_lock because as just said we have irq 56.74 - * locally disabled. -arca 56.75 - */ 56.76 - write_seqlock(&xtime_lock); 56.77 - do_timer_interrupt(irq, NULL, regs); 56.78 - write_sequnlock(&xtime_lock); 56.79 return IRQ_HANDLED; 56.80 } 56.81
59.1 --- a/linux-2.6-xen-sparse/arch/xen/i386/mm/hypervisor.c Tue Aug 16 12:15:23 2005 +0800 59.2 +++ b/linux-2.6-xen-sparse/arch/xen/i386/mm/hypervisor.c Tue Aug 16 10:09:07 2005 -0800 59.3 @@ -263,12 +263,9 @@ void xen_set_ldt(unsigned long ptr, unsi 59.4 BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0); 59.5 } 59.6 59.7 -void xen_contig_memory(unsigned long vstart, unsigned int order) 59.8 +/* Ensure multi-page extents are contiguous in machine memory. */ 59.9 +void xen_create_contiguous_region(unsigned long vstart, unsigned int order) 59.10 { 59.11 - /* 59.12 - * Ensure multi-page extents are contiguous in machine memory. This code 59.13 - * could be cleaned up some, and the number of hypercalls reduced. 59.14 - */ 59.15 pgd_t *pgd; 59.16 pud_t *pud; 59.17 pmd_t *pmd; 59.18 @@ -312,6 +309,49 @@ void xen_contig_memory(unsigned long vst 59.19 balloon_unlock(flags); 59.20 } 59.21 59.22 +void xen_destroy_contiguous_region(unsigned long vstart, unsigned int order) 59.23 +{ 59.24 + pgd_t *pgd; 59.25 + pud_t *pud; 59.26 + pmd_t *pmd; 59.27 + pte_t *pte; 59.28 + unsigned long mfn, i, flags; 59.29 + 59.30 + scrub_pages(vstart, 1 << order); 59.31 + 59.32 + balloon_lock(flags); 59.33 + 59.34 + /* 1. Zap current PTEs, giving away the underlying pages. */ 59.35 + for (i = 0; i < (1<<order); i++) { 59.36 + pgd = pgd_offset_k(vstart + (i*PAGE_SIZE)); 59.37 + pud = pud_offset(pgd, (vstart + (i*PAGE_SIZE))); 59.38 + pmd = pmd_offset(pud, (vstart + (i*PAGE_SIZE))); 59.39 + pte = pte_offset_kernel(pmd, (vstart + (i*PAGE_SIZE))); 59.40 + mfn = pte_mfn(*pte); 59.41 + BUG_ON(HYPERVISOR_update_va_mapping( 59.42 + vstart + (i*PAGE_SIZE), __pte_ma(0), 0)); 59.43 + phys_to_machine_mapping[(__pa(vstart)>>PAGE_SHIFT)+i] = 59.44 + INVALID_P2M_ENTRY; 59.45 + BUG_ON(HYPERVISOR_dom_mem_op( 59.46 + MEMOP_decrease_reservation, &mfn, 1, 0) != 1); 59.47 + } 59.48 + 59.49 + /* 2. Map new pages in place of old pages. */ 59.50 + for (i = 0; i < (1<<order); i++) { 59.51 + BUG_ON(HYPERVISOR_dom_mem_op( 59.52 + MEMOP_increase_reservation, &mfn, 1, 0) != 1); 59.53 + BUG_ON(HYPERVISOR_update_va_mapping( 59.54 + vstart + (i*PAGE_SIZE), 59.55 + __pte_ma((mfn<<PAGE_SHIFT)|__PAGE_KERNEL), 0)); 59.56 + xen_machphys_update(mfn, (__pa(vstart)>>PAGE_SHIFT)+i); 59.57 + phys_to_machine_mapping[(__pa(vstart)>>PAGE_SHIFT)+i] = mfn; 59.58 + } 59.59 + 59.60 + flush_tlb_all(); 59.61 + 59.62 + balloon_unlock(flags); 59.63 +} 59.64 + 59.65 #ifdef CONFIG_XEN_PHYSDEV_ACCESS 59.66 59.67 unsigned long allocate_empty_lowmem_region(unsigned long pages)
60.1 --- a/linux-2.6-xen-sparse/arch/xen/i386/mm/init.c Tue Aug 16 12:15:23 2005 +0800 60.2 +++ b/linux-2.6-xen-sparse/arch/xen/i386/mm/init.c Tue Aug 16 10:09:07 2005 -0800 60.3 @@ -41,6 +41,12 @@ 60.4 #include <asm/sections.h> 60.5 #include <asm-xen/hypervisor.h> 60.6 60.7 +#if defined(CONFIG_SWIOTLB) 60.8 +extern void swiotlb_init(void); 60.9 +int swiotlb; 60.10 +EXPORT_SYMBOL(swiotlb); 60.11 +#endif 60.12 + 60.13 unsigned int __VMALLOC_RESERVE = 128 << 20; 60.14 60.15 DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); 60.16 @@ -631,6 +637,10 @@ void __init mem_init(void) 60.17 int bad_ppro; 60.18 unsigned long pfn; 60.19 60.20 +#if defined(CONFIG_SWIOTLB) 60.21 + swiotlb_init(); 60.22 +#endif 60.23 + 60.24 #ifndef CONFIG_DISCONTIGMEM 60.25 if (!mem_map) 60.26 BUG();
61.1 --- a/linux-2.6-xen-sparse/arch/xen/i386/mm/ioremap.c Tue Aug 16 12:15:23 2005 +0800 61.2 +++ b/linux-2.6-xen-sparse/arch/xen/i386/mm/ioremap.c Tue Aug 16 10:09:07 2005 -0800 61.3 @@ -332,10 +332,10 @@ int direct_remap_area_pages(struct mm_st 61.4 for (i = 0; i < size; i += PAGE_SIZE) { 61.5 if ((v - u) == MAX_DIRECTMAP_MMU_QUEUE) { 61.6 /* Fill in the PTE pointers. */ 61.7 - generic_page_range(mm, start_address, 61.8 - address-start_address, 61.9 - direct_remap_area_pte_fn, &w); 61.10 - 61.11 + generic_page_range(mm, start_address, 61.12 + address - start_address, 61.13 + direct_remap_area_pte_fn, &w); 61.14 + w = u; 61.15 if (HYPERVISOR_mmu_update(u, v - u, NULL, domid) < 0) 61.16 return -EFAULT; 61.17 v = u; 61.18 @@ -355,9 +355,8 @@ int direct_remap_area_pages(struct mm_st 61.19 61.20 if (v != u) { 61.21 /* get the ptep's filled in */ 61.22 - generic_page_range(mm, start_address, 61.23 - address-start_address, 61.24 - direct_remap_area_pte_fn, &w); 61.25 + generic_page_range(mm, start_address, address - start_address, 61.26 + direct_remap_area_pte_fn, &w); 61.27 if (unlikely(HYPERVISOR_mmu_update(u, v - u, NULL, domid) < 0)) 61.28 return -EFAULT; 61.29 } 61.30 @@ -370,32 +369,34 @@ int direct_remap_area_pages(struct mm_st 61.31 EXPORT_SYMBOL(direct_remap_area_pages); 61.32 61.33 int create_lookup_pte_addr(struct mm_struct *mm, 61.34 - unsigned long address, 61.35 - unsigned long *ptep) 61.36 + unsigned long address, 61.37 + unsigned long *ptep) 61.38 { 61.39 - int f(pte_t *pte, struct page *pte_page, unsigned long addr, void *data) 61.40 - { 61.41 - unsigned long *ptep = (unsigned long *)data; 61.42 - if (ptep) *ptep = (pfn_to_mfn(page_to_pfn(pte_page)) << PAGE_SHIFT) 61.43 - | ((unsigned long)pte & ~PAGE_MASK); 61.44 - return 0; 61.45 - } 61.46 + int f(pte_t *pte, struct page *pte_page, unsigned long addr, 61.47 + void *data) { 61.48 + unsigned long *ptep = (unsigned long *)data; 61.49 + if (ptep) 61.50 + *ptep = (pfn_to_mfn(page_to_pfn(pte_page)) << 61.51 + PAGE_SHIFT) | 61.52 + ((unsigned long)pte & ~PAGE_MASK); 61.53 + return 0; 61.54 + } 61.55 61.56 - return generic_page_range(mm, address, PAGE_SIZE, f, ptep); 61.57 + return generic_page_range(mm, address, PAGE_SIZE, f, ptep); 61.58 } 61.59 61.60 EXPORT_SYMBOL(create_lookup_pte_addr); 61.61 61.62 int touch_pte_range(struct mm_struct *mm, 61.63 - unsigned long address, 61.64 - unsigned long size) 61.65 + unsigned long address, 61.66 + unsigned long size) 61.67 { 61.68 - int f(pte_t *pte, struct page *pte_page, unsigned long addr, void *data) 61.69 - { 61.70 - return 0; 61.71 - } 61.72 + int f(pte_t *pte, struct page *pte_page, unsigned long addr, 61.73 + void *data) { 61.74 + return 0; 61.75 + } 61.76 61.77 - return generic_page_range(mm, address, size, f, NULL); 61.78 + return generic_page_range(mm, address, size, f, NULL); 61.79 } 61.80 61.81 EXPORT_SYMBOL(touch_pte_range);
62.1 --- a/linux-2.6-xen-sparse/arch/xen/i386/mm/pgtable.c Tue Aug 16 12:15:23 2005 +0800 62.2 +++ b/linux-2.6-xen-sparse/arch/xen/i386/mm/pgtable.c Tue Aug 16 10:09:07 2005 -0800 62.3 @@ -277,7 +277,7 @@ void pgd_ctor(void *pgd, kmem_cache_t *c 62.4 62.5 #ifdef CONFIG_X86_PAE 62.6 /* this gives us a page below 4GB */ 62.7 - xen_contig_memory((unsigned long)pgd, 0); 62.8 + xen_create_contiguous_region((unsigned long)pgd, 0); 62.9 #endif 62.10 62.11 if (!HAVE_SHARED_KERNEL_PMD)
67.1 --- a/linux-2.6-xen-sparse/arch/xen/x86_64/Kconfig Tue Aug 16 12:15:23 2005 +0800 67.2 +++ b/linux-2.6-xen-sparse/arch/xen/x86_64/Kconfig Tue Aug 16 10:09:07 2005 -0800 67.3 @@ -329,12 +329,12 @@ config GART_IOMMU 67.4 # need this always enabled with GART_IOMMU for the VIA workaround 67.5 config SWIOTLB 67.6 bool 67.7 - depends on GART_IOMMU 67.8 + depends on PCI 67.9 default y 67.10 67.11 config DUMMY_IOMMU 67.12 bool 67.13 - depends on !GART_IOMMU && !SWIOTLB 67.14 + depends on !GART_IOMMU 67.15 default y 67.16 help 67.17 Don't use IOMMU code. This will cause problems when you have more than 4GB
69.1 --- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/Makefile Tue Aug 16 12:15:23 2005 +0800 69.2 +++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/Makefile Tue Aug 16 10:09:07 2005 -0800 69.3 @@ -36,8 +36,9 @@ c-obj-$(CONFIG_X86_IO_APIC) += genapic_c 69.4 #obj-$(CONFIG_CPU_FREQ) += cpufreq/ 69.5 #obj-$(CONFIG_EARLY_PRINTK) += early_printk.o 69.6 #obj-$(CONFIG_GART_IOMMU) += pci-gart.o aperture.o 69.7 -obj-$(CONFIG_DUMMY_IOMMU) += pci-nommu.o pci-dma.o 69.8 -#obj-$(CONFIG_SWIOTLB) += swiotlb.o 69.9 +obj-$(CONFIG_DUMMY_IOMMU) += pci-nommu.o 69.10 +i386-obj-$(CONFIG_DUMMY_IOMMU) += pci-dma.o 69.11 +i386-obj-$(CONFIG_SWIOTLB) += swiotlb.o 69.12 obj-$(CONFIG_KPROBES) += kprobes.o 69.13 obj-$(CONFIG_X86_PM_TIMER) += pmtimer.o 69.14 69.15 @@ -49,7 +50,7 @@ c-obj-y += intel_cacheinfo.o 69.16 bootflag-y += ../../../i386/kernel/bootflag.o 69.17 cpuid-$(subst m,y,$(CONFIG_X86_CPUID)) += ../../../i386/kernel/cpuid.o 69.18 topology-y += ../../../i386/mach-default/topology.o 69.19 -swiotlb-$(CONFIG_SWIOTLB) += ../../../ia64/lib/swiotlb.o 69.20 +#swiotlb-$(CONFIG_SWIOTLB) += ../../../ia64/lib/swiotlb.o 69.21 microcode-$(subst m,y,$(CONFIG_MICROCODE)) += ../../../i386/kernel/microcode.o 69.22 intel_cacheinfo-y += ../../../i386/kernel/cpu/intel_cacheinfo.o 69.23 quirks-y += ../../i386/kernel/quirks.o
78.1 --- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/pci-dma.c Tue Aug 16 12:15:23 2005 +0800 78.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 78.3 @@ -1,336 +0,0 @@ 78.4 -/* 78.5 - * Dynamic DMA mapping support. 78.6 - */ 78.7 - 78.8 -#include <linux/types.h> 78.9 -#include <linux/mm.h> 78.10 -#include <linux/string.h> 78.11 -#include <linux/pci.h> 78.12 -#include <linux/module.h> 78.13 -#include <asm/io.h> 78.14 -#include <asm-xen/balloon.h> 78.15 - 78.16 -/* Map a set of buffers described by scatterlist in streaming 78.17 - * mode for DMA. This is the scatter-gather version of the 78.18 - * above pci_map_single interface. Here the scatter gather list 78.19 - * elements are each tagged with the appropriate dma address 78.20 - * and length. They are obtained via sg_dma_{address,length}(SG). 78.21 - * 78.22 - * NOTE: An implementation may be able to use a smaller number of 78.23 - * DMA address/length pairs than there are SG table elements. 78.24 - * (for example via virtual mapping capabilities) 78.25 - * The routine returns the number of addr/length pairs actually 78.26 - * used, at most nents. 78.27 - * 78.28 - * Device ownership issues as mentioned above for pci_map_single are 78.29 - * the same here. 78.30 - */ 78.31 -int dma_map_sg(struct device *hwdev, struct scatterlist *sg, 78.32 - int nents, int direction) 78.33 -{ 78.34 - int i; 78.35 - 78.36 - BUG_ON(direction == DMA_NONE); 78.37 - for (i = 0; i < nents; i++ ) { 78.38 - struct scatterlist *s = &sg[i]; 78.39 - BUG_ON(!s->page); 78.40 - s->dma_address = virt_to_bus(page_address(s->page) +s->offset); 78.41 - s->dma_length = s->length; 78.42 - } 78.43 - return nents; 78.44 -} 78.45 - 78.46 -EXPORT_SYMBOL(dma_map_sg); 78.47 - 78.48 -/* Unmap a set of streaming mode DMA translations. 78.49 - * Again, cpu read rules concerning calls here are the same as for 78.50 - * pci_unmap_single() above. 78.51 - */ 78.52 -void dma_unmap_sg(struct device *dev, struct scatterlist *sg, 78.53 - int nents, int dir) 78.54 -{ 78.55 - int i; 78.56 - for (i = 0; i < nents; i++) { 78.57 - struct scatterlist *s = &sg[i]; 78.58 - BUG_ON(s->page == NULL); 78.59 - BUG_ON(s->dma_address == 0); 78.60 - dma_unmap_single(dev, s->dma_address, s->dma_length, dir); 78.61 - } 78.62 -} 78.63 - 78.64 -EXPORT_SYMBOL(dma_unmap_sg); 78.65 - 78.66 -struct dma_coherent_mem { 78.67 - void *virt_base; 78.68 - u32 device_base; 78.69 - int size; 78.70 - int flags; 78.71 - unsigned long *bitmap; 78.72 -}; 78.73 - 78.74 -void *dma_alloc_coherent(struct device *dev, size_t size, 78.75 - dma_addr_t *dma_handle, unsigned gfp) 78.76 -{ 78.77 - void *ret; 78.78 - unsigned int order = get_order(size); 78.79 - unsigned long vstart; 78.80 - 78.81 - struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL; 78.82 - 78.83 - /* ignore region specifiers */ 78.84 - gfp &= ~(__GFP_DMA | __GFP_HIGHMEM); 78.85 - 78.86 - if (mem) { 78.87 - int page = bitmap_find_free_region(mem->bitmap, mem->size, 78.88 - order); 78.89 - if (page >= 0) { 78.90 - *dma_handle = mem->device_base + (page << PAGE_SHIFT); 78.91 - ret = mem->virt_base + (page << PAGE_SHIFT); 78.92 - memset(ret, 0, size); 78.93 - return ret; 78.94 - } 78.95 - if (mem->flags & DMA_MEMORY_EXCLUSIVE) 78.96 - return NULL; 78.97 - } 78.98 - 78.99 - if (dev == NULL || (dev->coherent_dma_mask < 0xffffffff)) 78.100 - gfp |= GFP_DMA; 78.101 - 78.102 - vstart = __get_free_pages(gfp, order); 78.103 - ret = (void *)vstart; 78.104 - if (ret == NULL) 78.105 - return ret; 78.106 - 78.107 - xen_contig_memory(vstart, order); 78.108 - 78.109 - memset(ret, 0, size); 78.110 - *dma_handle = virt_to_bus(ret); 78.111 - 78.112 - return ret; 78.113 -} 78.114 -EXPORT_SYMBOL(dma_alloc_coherent); 78.115 - 78.116 -void dma_free_coherent(struct device *dev, size_t size, 78.117 - void *vaddr, dma_addr_t dma_handle) 78.118 -{ 78.119 - struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL; 78.120 - int order = get_order(size); 78.121 - 78.122 - if (mem && vaddr >= mem->virt_base && vaddr < (mem->virt_base + (mem->size << PAGE_SHIFT))) { 78.123 - int page = (vaddr - mem->virt_base) >> PAGE_SHIFT; 78.124 - 78.125 - bitmap_release_region(mem->bitmap, page, order); 78.126 - } else 78.127 - free_pages((unsigned long)vaddr, order); 78.128 -} 78.129 -EXPORT_SYMBOL(dma_free_coherent); 78.130 - 78.131 -#if 0 78.132 -int dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr, 78.133 - dma_addr_t device_addr, size_t size, int flags) 78.134 -{ 78.135 - void __iomem *mem_base; 78.136 - int pages = size >> PAGE_SHIFT; 78.137 - int bitmap_size = (pages + 31)/32; 78.138 - 78.139 - if ((flags & (DMA_MEMORY_MAP | DMA_MEMORY_IO)) == 0) 78.140 - goto out; 78.141 - if (!size) 78.142 - goto out; 78.143 - if (dev->dma_mem) 78.144 - goto out; 78.145 - 78.146 - /* FIXME: this routine just ignores DMA_MEMORY_INCLUDES_CHILDREN */ 78.147 - 78.148 - mem_base = ioremap(bus_addr, size); 78.149 - if (!mem_base) 78.150 - goto out; 78.151 - 78.152 - dev->dma_mem = kmalloc(sizeof(struct dma_coherent_mem), GFP_KERNEL); 78.153 - if (!dev->dma_mem) 78.154 - goto out; 78.155 - memset(dev->dma_mem, 0, sizeof(struct dma_coherent_mem)); 78.156 - dev->dma_mem->bitmap = kmalloc(bitmap_size, GFP_KERNEL); 78.157 - if (!dev->dma_mem->bitmap) 78.158 - goto free1_out; 78.159 - memset(dev->dma_mem->bitmap, 0, bitmap_size); 78.160 - 78.161 - dev->dma_mem->virt_base = mem_base; 78.162 - dev->dma_mem->device_base = device_addr; 78.163 - dev->dma_mem->size = pages; 78.164 - dev->dma_mem->flags = flags; 78.165 - 78.166 - if (flags & DMA_MEMORY_MAP) 78.167 - return DMA_MEMORY_MAP; 78.168 - 78.169 - return DMA_MEMORY_IO; 78.170 - 78.171 - free1_out: 78.172 - kfree(dev->dma_mem->bitmap); 78.173 - out: 78.174 - return 0; 78.175 -} 78.176 -EXPORT_SYMBOL(dma_declare_coherent_memory); 78.177 - 78.178 -void dma_release_declared_memory(struct device *dev) 78.179 -{ 78.180 - struct dma_coherent_mem *mem = dev->dma_mem; 78.181 - 78.182 - if(!mem) 78.183 - return; 78.184 - dev->dma_mem = NULL; 78.185 - iounmap(mem->virt_base); 78.186 - kfree(mem->bitmap); 78.187 - kfree(mem); 78.188 -} 78.189 -EXPORT_SYMBOL(dma_release_declared_memory); 78.190 - 78.191 -void *dma_mark_declared_memory_occupied(struct device *dev, 78.192 - dma_addr_t device_addr, size_t size) 78.193 -{ 78.194 - struct dma_coherent_mem *mem = dev->dma_mem; 78.195 - int pages = (size + (device_addr & ~PAGE_MASK) + PAGE_SIZE - 1) >> PAGE_SHIFT; 78.196 - int pos, err; 78.197 - 78.198 - if (!mem) 78.199 - return ERR_PTR(-EINVAL); 78.200 - 78.201 - pos = (device_addr - mem->device_base) >> PAGE_SHIFT; 78.202 - err = bitmap_allocate_region(mem->bitmap, pos, get_order(pages)); 78.203 - if (err != 0) 78.204 - return ERR_PTR(err); 78.205 - return mem->virt_base + (pos << PAGE_SHIFT); 78.206 -} 78.207 -EXPORT_SYMBOL(dma_mark_declared_memory_occupied); 78.208 -#endif 78.209 - 78.210 -static LIST_HEAD(dma_map_head); 78.211 -static DEFINE_SPINLOCK(dma_map_lock); 78.212 -struct dma_map_entry { 78.213 - struct list_head list; 78.214 - dma_addr_t dma; 78.215 - char *bounce, *host; 78.216 - size_t size; 78.217 -}; 78.218 -#define DMA_MAP_MATCHES(e,d) (((e)->dma<=(d)) && (((e)->dma+(e)->size)>(d))) 78.219 - 78.220 -dma_addr_t 78.221 -dma_map_single(struct device *dev, void *ptr, size_t size, 78.222 - enum dma_data_direction direction) 78.223 -{ 78.224 - struct dma_map_entry *ent; 78.225 - void *bnc; 78.226 - dma_addr_t dma; 78.227 - unsigned long flags; 78.228 - 78.229 - if (direction == DMA_NONE) 78.230 - out_of_line_bug(); 78.231 - 78.232 - /* 78.233 - * Even if size is sub-page, the buffer may still straddle a page 78.234 - * boundary. Take into account buffer start offset. All other calls are 78.235 - * conservative and always search the dma_map list if it's non-empty. 78.236 - */ 78.237 - if (((((unsigned long)ptr) & ~PAGE_MASK) + size) <= PAGE_SIZE) { 78.238 - dma = virt_to_bus(ptr); 78.239 - } else { 78.240 - BUG_ON((bnc = dma_alloc_coherent(dev, size, &dma, GFP_ATOMIC)) == NULL); 78.241 - BUG_ON((ent = kmalloc(sizeof(*ent), GFP_ATOMIC)) == NULL); 78.242 - if (direction != DMA_FROM_DEVICE) 78.243 - memcpy(bnc, ptr, size); 78.244 - ent->dma = dma; 78.245 - ent->bounce = bnc; 78.246 - ent->host = ptr; 78.247 - ent->size = size; 78.248 - spin_lock_irqsave(&dma_map_lock, flags); 78.249 - list_add(&ent->list, &dma_map_head); 78.250 - spin_unlock_irqrestore(&dma_map_lock, flags); 78.251 - } 78.252 - 78.253 - if ((dma+size) & ~*dev->dma_mask) 78.254 - out_of_line_bug(); 78.255 - return dma; 78.256 -} 78.257 -EXPORT_SYMBOL(dma_map_single); 78.258 - 78.259 -void 78.260 -dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size, 78.261 - enum dma_data_direction direction) 78.262 -{ 78.263 - struct dma_map_entry *ent; 78.264 - unsigned long flags; 78.265 - 78.266 - if (direction == DMA_NONE) 78.267 - out_of_line_bug(); 78.268 - 78.269 - /* Fast-path check: are there any multi-page DMA mappings? */ 78.270 - if (!list_empty(&dma_map_head)) { 78.271 - spin_lock_irqsave(&dma_map_lock, flags); 78.272 - list_for_each_entry ( ent, &dma_map_head, list ) { 78.273 - if (DMA_MAP_MATCHES(ent, dma_addr)) { 78.274 - list_del(&ent->list); 78.275 - break; 78.276 - } 78.277 - } 78.278 - spin_unlock_irqrestore(&dma_map_lock, flags); 78.279 - if (&ent->list != &dma_map_head) { 78.280 - BUG_ON(dma_addr != ent->dma); 78.281 - BUG_ON(size != ent->size); 78.282 - if (direction != DMA_TO_DEVICE) 78.283 - memcpy(ent->host, ent->bounce, size); 78.284 - dma_free_coherent(dev, size, ent->bounce, ent->dma); 78.285 - kfree(ent); 78.286 - } 78.287 - } 78.288 -} 78.289 -EXPORT_SYMBOL(dma_unmap_single); 78.290 - 78.291 -void 78.292 -dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, size_t size, 78.293 - enum dma_data_direction direction) 78.294 -{ 78.295 - struct dma_map_entry *ent; 78.296 - unsigned long flags, off; 78.297 - 78.298 - /* Fast-path check: are there any multi-page DMA mappings? */ 78.299 - if (!list_empty(&dma_map_head)) { 78.300 - spin_lock_irqsave(&dma_map_lock, flags); 78.301 - list_for_each_entry ( ent, &dma_map_head, list ) 78.302 - if (DMA_MAP_MATCHES(ent, dma_handle)) 78.303 - break; 78.304 - spin_unlock_irqrestore(&dma_map_lock, flags); 78.305 - if (&ent->list != &dma_map_head) { 78.306 - off = dma_handle - ent->dma; 78.307 - BUG_ON((off + size) > ent->size); 78.308 - /*if (direction != DMA_TO_DEVICE)*/ 78.309 - memcpy(ent->host+off, ent->bounce+off, size); 78.310 - } 78.311 - } 78.312 -} 78.313 -EXPORT_SYMBOL(dma_sync_single_for_cpu); 78.314 - 78.315 -void 78.316 -dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle, size_t size, 78.317 - enum dma_data_direction direction) 78.318 -{ 78.319 - struct dma_map_entry *ent; 78.320 - unsigned long flags, off; 78.321 - 78.322 - /* Fast-path check: are there any multi-page DMA mappings? */ 78.323 - if (!list_empty(&dma_map_head)) { 78.324 - spin_lock_irqsave(&dma_map_lock, flags); 78.325 - list_for_each_entry ( ent, &dma_map_head, list ) 78.326 - if (DMA_MAP_MATCHES(ent, dma_handle)) 78.327 - break; 78.328 - spin_unlock_irqrestore(&dma_map_lock, flags); 78.329 - if (&ent->list != &dma_map_head) { 78.330 - off = dma_handle - ent->dma; 78.331 - BUG_ON((off + size) > ent->size); 78.332 - /*if (direction != DMA_FROM_DEVICE)*/ 78.333 - memcpy(ent->bounce+off, ent->host+off, size); 78.334 - } 78.335 - } 78.336 - 78.337 - flush_write_buffers(); 78.338 -} 78.339 -EXPORT_SYMBOL(dma_sync_single_for_device);
79.1 --- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/pci-nommu.c Tue Aug 16 12:15:23 2005 +0800 79.2 +++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/pci-nommu.c Tue Aug 16 10:09:07 2005 -0800 79.3 @@ -61,6 +61,7 @@ void dma_free_coherent(struct device *hw 79.4 EXPORT_SYMBOL(dma_free_coherent); 79.5 #endif 79.6 79.7 +#if 0 79.8 int dma_supported(struct device *hwdev, u64 mask) 79.9 { 79.10 /* 79.11 @@ -76,6 +77,7 @@ int dma_supported(struct device *hwdev, 79.12 return 1; 79.13 } 79.14 EXPORT_SYMBOL(dma_supported); 79.15 +#endif 79.16 79.17 int dma_get_cache_alignment(void) 79.18 {
87.1 --- a/linux-2.6-xen-sparse/arch/xen/x86_64/mm/init.c Tue Aug 16 12:15:23 2005 +0800 87.2 +++ b/linux-2.6-xen-sparse/arch/xen/x86_64/mm/init.c Tue Aug 16 10:09:07 2005 -0800 87.3 @@ -44,10 +44,6 @@ 87.4 #define Dprintk(x...) 87.5 #endif 87.6 87.7 -#ifdef CONFIG_GART_IOMMU 87.8 -extern int swiotlb; 87.9 -#endif 87.10 - 87.11 extern char _stext[]; 87.12 87.13 DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); 87.14 @@ -790,8 +786,6 @@ static inline int page_is_ram (unsigned 87.15 return 1; 87.16 } 87.17 87.18 -extern int swiotlb_force; 87.19 - 87.20 static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel, kcore_modules, 87.21 kcore_vsyscall; 87.22 87.23 @@ -800,14 +794,9 @@ void __init mem_init(void) 87.24 int codesize, reservedpages, datasize, initsize; 87.25 int tmp; 87.26 87.27 -#ifdef CONFIG_SWIOTLB 87.28 - if (swiotlb_force) 87.29 - swiotlb = 1; 87.30 - if (!iommu_aperture && 87.31 - (end_pfn >= 0xffffffff>>PAGE_SHIFT || force_iommu)) 87.32 - swiotlb = 1; 87.33 - if (swiotlb) 87.34 - swiotlb_init(); 87.35 +#if defined(CONFIG_SWIOTLB) 87.36 + extern void swiotlb_init(void); 87.37 + swiotlb_init(); 87.38 #endif 87.39 87.40 /* How many end-of-memory variables you have, grandma! */
100.1 --- a/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c Tue Aug 16 12:15:23 2005 +0800 100.2 +++ b/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c Tue Aug 16 10:09:07 2005 -0800 100.3 @@ -23,6 +23,9 @@ int __init xlblktap_init(void) 100.4 blkif_be_driver_status_t be_st; 100.5 100.6 printk(KERN_INFO "Initialising Xen block tap device\n"); 100.7 +#ifdef CONFIG_XEN_BLKDEV_GRANT 100.8 + printk(KERN_INFO "Block tap is using grant tables.\n"); 100.9 +#endif 100.10 100.11 DPRINTK(" tap - Backend connection init:\n"); 100.12
101.1 --- a/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.h Tue Aug 16 12:15:23 2005 +0800 101.2 +++ b/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.h Tue Aug 16 10:09:07 2005 -0800 101.3 @@ -85,6 +85,11 @@ typedef struct blkif_st { 101.4 spinlock_t blk_ring_lock; 101.5 atomic_t refcnt; 101.6 struct work_struct work; 101.7 +#ifdef CONFIG_XEN_BLKDEV_GRANT 101.8 + u16 shmem_handle; 101.9 + memory_t shmem_vaddr; 101.10 + grant_ref_t shmem_ref; 101.11 +#endif 101.12 } blkif_t; 101.13 101.14 blkif_t *blkif_find_by_handle(domid_t domid, unsigned int handle);
102.1 --- a/linux-2.6-xen-sparse/drivers/xen/blktap/blktap_controlmsg.c Tue Aug 16 12:15:23 2005 +0800 102.2 +++ b/linux-2.6-xen-sparse/drivers/xen/blktap/blktap_controlmsg.c Tue Aug 16 10:09:07 2005 -0800 102.3 @@ -9,6 +9,7 @@ 102.4 */ 102.5 102.6 #include "blktap.h" 102.7 +#include <asm-xen/evtchn.h> 102.8 102.9 static char *blkif_state_name[] = { 102.10 [BLKIF_STATE_CLOSED] = "closed", 102.11 @@ -48,12 +49,21 @@ static void __blkif_disconnect_complete( 102.12 blkif_t *blkif = (blkif_t *)arg; 102.13 ctrl_msg_t cmsg; 102.14 blkif_be_disconnect_t disc; 102.15 +#ifdef CONFIG_XEN_BLKDEV_GRANT 102.16 + struct gnttab_unmap_grant_ref op; 102.17 +#endif 102.18 102.19 /* 102.20 * These can't be done in blkif_disconnect() because at that point there 102.21 * may be outstanding requests at the disc whose asynchronous responses 102.22 * must still be notified to the remote driver. 102.23 */ 102.24 +#ifdef CONFIG_XEN_BLKDEV_GRANT 102.25 + op.host_addr = blkif->shmem_vaddr; 102.26 + op.handle = blkif->shmem_handle; 102.27 + op.dev_bus_addr = 0; 102.28 + BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1)); 102.29 +#endif 102.30 vfree(blkif->blk_ring.sring); 102.31 102.32 /* Construct the deferred response message. */ 102.33 @@ -177,8 +187,12 @@ void blkif_ptfe_connect(blkif_be_connect 102.34 unsigned int evtchn = connect->evtchn; 102.35 unsigned long shmem_frame = connect->shmem_frame; 102.36 struct vm_struct *vma; 102.37 +#ifdef CONFIG_XEN_BLKDEV_GRANT 102.38 + int ref = connect->shmem_ref; 102.39 +#else 102.40 pgprot_t prot; 102.41 int error; 102.42 +#endif 102.43 blkif_t *blkif; 102.44 blkif_sring_t *sring; 102.45 102.46 @@ -199,24 +213,46 @@ void blkif_ptfe_connect(blkif_be_connect 102.47 return; 102.48 } 102.49 102.50 - prot = __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED); 102.51 +#ifndef CONFIG_XEN_BLKDEV_GRANT 102.52 + prot = __pgprot(_KERNPG_TABLE); 102.53 error = direct_remap_area_pages(&init_mm, VMALLOC_VMADDR(vma->addr), 102.54 shmem_frame<<PAGE_SHIFT, PAGE_SIZE, 102.55 prot, domid); 102.56 if ( error != 0 ) 102.57 { 102.58 - WPRINTK("BE_CONNECT: error! (%d)\n", error); 102.59 if ( error == -ENOMEM ) 102.60 connect->status = BLKIF_BE_STATUS_OUT_OF_MEMORY; 102.61 - else if ( error == -EFAULT ) { 102.62 + else if ( error == -EFAULT ) 102.63 connect->status = BLKIF_BE_STATUS_MAPPING_ERROR; 102.64 - WPRINTK("BE_CONNECT: MAPPING error!\n"); 102.65 - } 102.66 else 102.67 connect->status = BLKIF_BE_STATUS_ERROR; 102.68 vfree(vma->addr); 102.69 return; 102.70 } 102.71 +#else 102.72 + { /* Map: Use the Grant table reference */ 102.73 + struct gnttab_map_grant_ref op; 102.74 + op.host_addr = VMALLOC_VMADDR(vma->addr); 102.75 + op.flags = GNTMAP_host_map; 102.76 + op.ref = ref; 102.77 + op.dom = domid; 102.78 + 102.79 + BUG_ON( HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1) ); 102.80 + 102.81 + handle = op.handle; 102.82 + 102.83 + if (op.handle < 0) { 102.84 + DPRINTK(" Grant table operation failure !\n"); 102.85 + connect->status = BLKIF_BE_STATUS_MAPPING_ERROR; 102.86 + vfree(vma->addr); 102.87 + return; 102.88 + } 102.89 + 102.90 + blkif->shmem_ref = ref; 102.91 + blkif->shmem_handle = handle; 102.92 + blkif->shmem_vaddr = VMALLOC_VMADDR(vma->addr); 102.93 + } 102.94 +#endif 102.95 102.96 if ( blkif->status != DISCONNECTED ) 102.97 {
104.1 --- a/linux-2.6-xen-sparse/drivers/xen/blktap/blktap_userdev.c Tue Aug 16 12:15:23 2005 +0800 104.2 +++ b/linux-2.6-xen-sparse/drivers/xen/blktap/blktap_userdev.c Tue Aug 16 10:09:07 2005 -0800 104.3 @@ -21,6 +21,9 @@ 104.4 #include <asm/pgalloc.h> 104.5 #include <asm/tlbflush.h> 104.6 #include <asm-xen/xen-public/io/blkif.h> /* for control ring. */ 104.7 +#ifdef CONFIG_XEN_BLKDEV_GRANT 104.8 +#include <asm-xen/xen-public/grant_table.h> 104.9 +#endif 104.10 104.11 #include "blktap.h" 104.12 104.13 @@ -43,6 +46,7 @@ static ctrl_front_ring_t blktap_uctrl_r 104.14 static int blktap_read_fe_ring(void); 104.15 static int blktap_read_be_ring(void); 104.16 104.17 + 104.18 /* -------[ mmap region ]--------------------------------------------- */ 104.19 /* 104.20 * We use a big chunk of address space to map in-flight requests into, 104.21 @@ -73,7 +77,28 @@ unsigned long user_vstart; /* start of 104.22 ((_req) * MMAP_PAGES_PER_REQUEST * PAGE_SIZE) + \ 104.23 ((_seg) * PAGE_SIZE)) 104.24 104.25 +/* -------[ grant handles ]------------------------------------------- */ 104.26 104.27 +#ifdef CONFIG_XEN_BLKDEV_GRANT 104.28 +/* When using grant tables to map a frame for device access then the 104.29 + * handle returned must be used to unmap the frame. This is needed to 104.30 + * drop the ref count on the frame. 104.31 + */ 104.32 +struct grant_handle_pair 104.33 +{ 104.34 + u16 kernel; 104.35 + u16 user; 104.36 +}; 104.37 +static struct grant_handle_pair pending_grant_handles[MMAP_PAGES]; 104.38 +#define pending_handle(_idx, _i) \ 104.39 + (pending_grant_handles[((_idx) * BLKIF_MAX_SEGMENTS_PER_REQUEST) + (_i)]) 104.40 +#define BLKTAP_INVALID_HANDLE(_g) \ 104.41 + (((_g->kernel) == 0xFFFF) && ((_g->user) == 0xFFFF)) 104.42 +#define BLKTAP_INVALIDATE_HANDLE(_g) do { \ 104.43 + (_g)->kernel = 0xFFFF; (_g)->user = 0xFFFF; \ 104.44 + } while(0) 104.45 + 104.46 +#endif 104.47 104.48 104.49 /* -------[ blktap vm ops ]------------------------------------------- */ 104.50 @@ -348,9 +373,43 @@ static struct file_operations blktap_fop 104.51 104.52 /*-----[ Data to/from user space ]----------------------------------------*/ 104.53 104.54 - 104.55 static void fast_flush_area(int idx, int nr_pages) 104.56 { 104.57 +#ifdef CONFIG_XEN_BLKDEV_GRANT 104.58 + struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST*2]; 104.59 + unsigned int i, op = 0; 104.60 + struct grant_handle_pair *handle; 104.61 + unsigned long ptep; 104.62 + 104.63 + for (i=0; i<nr_pages; i++) 104.64 + { 104.65 + handle = &pending_handle(idx, i); 104.66 + if (!BLKTAP_INVALID_HANDLE(handle)) 104.67 + { 104.68 + 104.69 + unmap[op].host_addr = MMAP_VADDR(mmap_vstart, idx, i); 104.70 + unmap[op].dev_bus_addr = 0; 104.71 + unmap[op].handle = handle->kernel; 104.72 + op++; 104.73 + 104.74 + if (create_lookup_pte_addr(blktap_vma->vm_mm, 104.75 + MMAP_VADDR(user_vstart, idx, i), 104.76 + &ptep) !=0) { 104.77 + DPRINTK("Couldn't get a pte addr!\n"); 104.78 + return; 104.79 + } 104.80 + unmap[op].host_addr = ptep; 104.81 + unmap[op].dev_bus_addr = 0; 104.82 + unmap[op].handle = handle->user; 104.83 + op++; 104.84 + 104.85 + BLKTAP_INVALIDATE_HANDLE(handle); 104.86 + } 104.87 + } 104.88 + if ( unlikely(HYPERVISOR_grant_table_op( 104.89 + GNTTABOP_unmap_grant_ref, unmap, op))) 104.90 + BUG(); 104.91 +#else 104.92 multicall_entry_t mcl[BLKIF_MAX_SEGMENTS_PER_REQUEST]; 104.93 int i; 104.94 104.95 @@ -363,21 +422,22 @@ static void fast_flush_area(int idx, int 104.96 mcl[nr_pages-1].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_ALL; 104.97 if ( unlikely(HYPERVISOR_multicall(mcl, nr_pages) != 0) ) 104.98 BUG(); 104.99 +#endif 104.100 } 104.101 104.102 104.103 -extern int __direct_remap_area_pages(struct mm_struct *mm, 104.104 - unsigned long address, 104.105 - unsigned long size, 104.106 - mmu_update_t *v); 104.107 - 104.108 int blktap_write_fe_ring(blkif_request_t *req) 104.109 { 104.110 blkif_request_t *target; 104.111 - int i; 104.112 + int i, ret = 0; 104.113 +#ifdef CONFIG_XEN_BLKDEV_GRANT 104.114 + struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST*2]; 104.115 + int op; 104.116 +#else 104.117 unsigned long remap_prot; 104.118 multicall_entry_t mcl[BLKIF_MAX_SEGMENTS_PER_REQUEST+1]; 104.119 mmu_update_t mmu[BLKIF_MAX_SEGMENTS_PER_REQUEST]; 104.120 +#endif 104.121 104.122 /* 104.123 * This is called to pass a request from the real frontend domain's 104.124 @@ -394,18 +454,109 @@ int blktap_write_fe_ring(blkif_request_t 104.125 return 0; 104.126 } 104.127 104.128 - remap_prot = _PAGE_PRESENT|_PAGE_DIRTY|_PAGE_ACCESSED|_PAGE_RW; 104.129 flush_cache_all(); /* a noop on intel... */ 104.130 104.131 target = RING_GET_REQUEST(&blktap_ufe_ring, blktap_ufe_ring.req_prod_pvt); 104.132 memcpy(target, req, sizeof(*req)); 104.133 104.134 /* Map the foreign pages directly in to the application */ 104.135 +#ifdef CONFIG_XEN_BLKDEV_GRANT 104.136 + op = 0; 104.137 + for (i=0; i<target->nr_segments; i++) { 104.138 + 104.139 + unsigned long uvaddr; 104.140 + unsigned long kvaddr; 104.141 + unsigned long ptep; 104.142 + 104.143 + uvaddr = MMAP_VADDR(user_vstart, ID_TO_IDX(req->id), i); 104.144 + kvaddr = MMAP_VADDR(mmap_vstart, ID_TO_IDX(req->id), i); 104.145 + 104.146 + /* Map the remote page to kernel. */ 104.147 + map[op].host_addr = kvaddr; 104.148 + map[op].dom = ID_TO_DOM(req->id); 104.149 + map[op].ref = blkif_gref_from_fas(target->frame_and_sects[i]); 104.150 + map[op].flags = GNTMAP_host_map; 104.151 + /* This needs a bit more thought in terms of interposition: 104.152 + * If we want to be able to modify pages during write using 104.153 + * grant table mappings, the guest will either need to allow 104.154 + * it, or we'll need to incur a copy. */ 104.155 + if (req->operation == BLKIF_OP_WRITE) 104.156 + map[op].flags |= GNTMAP_readonly; 104.157 + op++; 104.158 + 104.159 + /* Now map it to user. */ 104.160 + ret = create_lookup_pte_addr(blktap_vma->vm_mm, uvaddr, &ptep); 104.161 + if (ret) 104.162 + { 104.163 + DPRINTK("Couldn't get a pte addr!\n"); 104.164 + goto fail; 104.165 + } 104.166 + 104.167 + map[op].host_addr = ptep; 104.168 + map[op].dom = ID_TO_DOM(req->id); 104.169 + map[op].ref = blkif_gref_from_fas(target->frame_and_sects[i]); 104.170 + map[op].flags = GNTMAP_host_map | GNTMAP_application_map 104.171 + | GNTMAP_contains_pte; 104.172 + /* Above interposition comment applies here as well. */ 104.173 + if (req->operation == BLKIF_OP_WRITE) 104.174 + map[op].flags |= GNTMAP_readonly; 104.175 + op++; 104.176 + } 104.177 + 104.178 + if ( unlikely(HYPERVISOR_grant_table_op( 104.179 + GNTTABOP_map_grant_ref, map, op))) 104.180 + BUG(); 104.181 + 104.182 + op = 0; 104.183 + for (i=0; i<(target->nr_segments*2); i+=2) { 104.184 + unsigned long uvaddr; 104.185 + unsigned long kvaddr; 104.186 + unsigned long offset; 104.187 + int cancel = 0; 104.188 + 104.189 + uvaddr = MMAP_VADDR(user_vstart, ID_TO_IDX(req->id), i/2); 104.190 + kvaddr = MMAP_VADDR(mmap_vstart, ID_TO_IDX(req->id), i/2); 104.191 + 104.192 + if ( unlikely(map[i].handle < 0) ) { 104.193 + DPRINTK("Error on kernel grant mapping (%d)\n", map[i].handle); 104.194 + ret = map[i].handle; 104.195 + cancel = 1; 104.196 + } 104.197 + 104.198 + if ( unlikely(map[i+1].handle < 0) ) { 104.199 + DPRINTK("Error on user grant mapping (%d)\n", map[i+1].handle); 104.200 + ret = map[i+1].handle; 104.201 + cancel = 1; 104.202 + } 104.203 + 104.204 + if (cancel) 104.205 + goto fail; 104.206 + 104.207 + /* Set the necessary mappings in p2m and in the VM_FOREIGN 104.208 + * vm_area_struct to allow user vaddr -> struct page lookups 104.209 + * to work. This is needed for direct IO to foreign pages. */ 104.210 + phys_to_machine_mapping[__pa(kvaddr)>>PAGE_SHIFT] = 104.211 + FOREIGN_FRAME(map[i].dev_bus_addr); 104.212 + 104.213 + offset = (uvaddr - blktap_vma->vm_start) >> PAGE_SHIFT; 104.214 + ((struct page **)blktap_vma->vm_private_data)[offset] = 104.215 + pfn_to_page(__pa(kvaddr) >> PAGE_SHIFT); 104.216 + 104.217 + /* Save handles for unmapping later. */ 104.218 + pending_handle(ID_TO_IDX(req->id), i/2).kernel = map[i].handle; 104.219 + pending_handle(ID_TO_IDX(req->id), i/2).user = map[i+1].handle; 104.220 + } 104.221 + 104.222 +#else 104.223 + 104.224 + remap_prot = _PAGE_PRESENT|_PAGE_DIRTY|_PAGE_ACCESSED|_PAGE_RW; 104.225 + 104.226 for (i=0; i<target->nr_segments; i++) { 104.227 unsigned long buf; 104.228 unsigned long uvaddr; 104.229 unsigned long kvaddr; 104.230 unsigned long offset; 104.231 + unsigned long ptep; 104.232 104.233 buf = target->frame_and_sects[i] & PAGE_MASK; 104.234 uvaddr = MMAP_VADDR(user_vstart, ID_TO_IDX(req->id), i); 104.235 @@ -421,10 +572,14 @@ int blktap_write_fe_ring(blkif_request_t 104.236 phys_to_machine_mapping[__pa(kvaddr)>>PAGE_SHIFT] = 104.237 FOREIGN_FRAME(buf >> PAGE_SHIFT); 104.238 104.239 - __direct_remap_area_pages(blktap_vma->vm_mm, 104.240 - uvaddr, 104.241 - PAGE_SIZE, 104.242 - &mmu[i]); 104.243 + ret = create_lookup_pte_addr(blktap_vma->vm_mm, uvaddr, &ptep); 104.244 + if (ret) 104.245 + { 104.246 + DPRINTK("error getting pte\n"); 104.247 + goto fail; 104.248 + } 104.249 + 104.250 + mmu[i].ptr = ptep; 104.251 mmu[i].val = (target->frame_and_sects[i] & PAGE_MASK) 104.252 | pgprot_val(blktap_vma->vm_page_prot); 104.253 104.254 @@ -448,16 +603,17 @@ int blktap_write_fe_ring(blkif_request_t 104.255 if ( unlikely(mcl[i].result != 0) ) 104.256 { 104.257 DPRINTK("invalid buffer -- could not remap it\n"); 104.258 - fast_flush_area(ID_TO_IDX(req->id), target->nr_segments); 104.259 - return -1; 104.260 + ret = mcl[i].result; 104.261 + goto fail; 104.262 } 104.263 } 104.264 if ( unlikely(mcl[i].result != 0) ) 104.265 { 104.266 DPRINTK("direct remapping of pages to /dev/blktap failed.\n"); 104.267 - return -1; 104.268 + ret = mcl[i].result; 104.269 + goto fail; 104.270 } 104.271 - 104.272 +#endif /* CONFIG_XEN_BLKDEV_GRANT */ 104.273 104.274 /* Mark mapped pages as reserved: */ 104.275 for ( i = 0; i < target->nr_segments; i++ ) 104.276 @@ -472,6 +628,10 @@ int blktap_write_fe_ring(blkif_request_t 104.277 blktap_ufe_ring.req_prod_pvt++; 104.278 104.279 return 0; 104.280 + 104.281 + fail: 104.282 + fast_flush_area(ID_TO_IDX(req->id), target->nr_segments); 104.283 + return ret; 104.284 } 104.285 104.286 int blktap_write_be_ring(blkif_response_t *rsp) 104.287 @@ -538,11 +698,10 @@ static int blktap_read_fe_ring(void) 104.288 map[offset] = NULL; 104.289 } 104.290 104.291 - 104.292 + fast_flush_area(ID_TO_IDX(resp_s->id), ar->nr_pages); 104.293 zap_page_range(blktap_vma, 104.294 MMAP_VADDR(user_vstart, ID_TO_IDX(resp_s->id), 0), 104.295 ar->nr_pages << PAGE_SHIFT, NULL); 104.296 - fast_flush_area(ID_TO_IDX(resp_s->id), ar->nr_pages); 104.297 write_resp_to_fe_ring(blkif, resp_s); 104.298 blktap_ufe_ring.rsp_cons = i + 1; 104.299 kick_fe_domain(blkif); 104.300 @@ -616,11 +775,17 @@ static struct miscdevice blktap_miscdev 104.301 104.302 int blktap_init(void) 104.303 { 104.304 - int err; 104.305 + int err, i, j; 104.306 104.307 if ( (mmap_vstart = allocate_empty_lowmem_region(MMAP_PAGES)) == 0 ) 104.308 BUG(); 104.309 104.310 +#ifdef CONFIG_XEN_BLKDEV_GRANT 104.311 + for (i=0; i<MAX_PENDING_REQS ; i++) 104.312 + for (j=0; j<BLKIF_MAX_SEGMENTS_PER_REQUEST; j++) 104.313 + BLKTAP_INVALIDATE_HANDLE(&pending_handle(i, j)); 104.314 +#endif 104.315 + 104.316 err = misc_register(&blktap_miscdev); 104.317 if ( err != 0 ) 104.318 {
121.1 --- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/dma-mapping.h Tue Aug 16 12:15:23 2005 +0800 121.2 +++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/dma-mapping.h Tue Aug 16 10:09:07 2005 -0800 121.3 @@ -1,11 +1,33 @@ 121.4 #ifndef _ASM_I386_DMA_MAPPING_H 121.5 #define _ASM_I386_DMA_MAPPING_H 121.6 121.7 +/* 121.8 + * IOMMU interface. See Documentation/DMA-mapping.txt and DMA-API.txt for 121.9 + * documentation. 121.10 + */ 121.11 + 121.12 +#include <linux/config.h> 121.13 #include <linux/mm.h> 121.14 - 121.15 #include <asm/cache.h> 121.16 #include <asm/io.h> 121.17 #include <asm/scatterlist.h> 121.18 +#include <asm-i386/swiotlb.h> 121.19 + 121.20 +static inline int 121.21 +address_needs_mapping(struct device *hwdev, dma_addr_t addr) 121.22 +{ 121.23 + dma_addr_t mask = 0xffffffff; 121.24 + /* If the device has a mask, use it, otherwise default to 32 bits */ 121.25 + if (hwdev && hwdev->dma_mask) 121.26 + mask = *hwdev->dma_mask; 121.27 + return (addr & ~mask) != 0; 121.28 +} 121.29 + 121.30 +static inline int 121.31 +range_straddles_page_boundary(void *p, size_t size) 121.32 +{ 121.33 + return ((((unsigned long)p & ~PAGE_MASK) + size) > PAGE_SIZE); 121.34 +} 121.35 121.36 #define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f) 121.37 #define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h) 121.38 @@ -24,46 +46,18 @@ extern void 121.39 dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size, 121.40 enum dma_data_direction direction); 121.41 121.42 -static inline int 121.43 -dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, 121.44 - enum dma_data_direction direction) 121.45 -{ 121.46 - int i; 121.47 - 121.48 - BUG_ON(direction == DMA_NONE); 121.49 +extern int dma_map_sg(struct device *hwdev, struct scatterlist *sg, 121.50 + int nents, enum dma_data_direction direction); 121.51 +extern void dma_unmap_sg(struct device *hwdev, struct scatterlist *sg, 121.52 + int nents, enum dma_data_direction direction); 121.53 121.54 - for (i = 0; i < nents; i++ ) { 121.55 - BUG_ON(!sg[i].page); 121.56 - 121.57 - sg[i].dma_address = page_to_phys(sg[i].page) + sg[i].offset; 121.58 - } 121.59 - 121.60 - flush_write_buffers(); 121.61 - return nents; 121.62 -} 121.63 - 121.64 -static inline dma_addr_t 121.65 +extern dma_addr_t 121.66 dma_map_page(struct device *dev, struct page *page, unsigned long offset, 121.67 - size_t size, enum dma_data_direction direction) 121.68 -{ 121.69 - BUG_ON(direction == DMA_NONE); 121.70 - return page_to_phys(page) + offset; 121.71 -} 121.72 + size_t size, enum dma_data_direction direction); 121.73 121.74 -static inline void 121.75 +extern void 121.76 dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size, 121.77 - enum dma_data_direction direction) 121.78 -{ 121.79 - BUG_ON(direction == DMA_NONE); 121.80 -} 121.81 - 121.82 - 121.83 -static inline void 121.84 -dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nhwentries, 121.85 - enum dma_data_direction direction) 121.86 -{ 121.87 - BUG_ON(direction == DMA_NONE); 121.88 -} 121.89 + enum dma_data_direction direction); 121.90 121.91 extern void 121.92 dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, size_t size, 121.93 @@ -93,34 +87,25 @@ static inline void 121.94 dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nelems, 121.95 enum dma_data_direction direction) 121.96 { 121.97 + if (swiotlb) 121.98 + swiotlb_sync_sg_for_cpu(dev,sg,nelems,direction); 121.99 + flush_write_buffers(); 121.100 } 121.101 121.102 static inline void 121.103 dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, int nelems, 121.104 enum dma_data_direction direction) 121.105 { 121.106 + if (swiotlb) 121.107 + swiotlb_sync_sg_for_device(dev,sg,nelems,direction); 121.108 flush_write_buffers(); 121.109 } 121.110 121.111 -static inline int 121.112 -dma_mapping_error(dma_addr_t dma_addr) 121.113 -{ 121.114 - return 0; 121.115 -} 121.116 +extern int 121.117 +dma_mapping_error(dma_addr_t dma_addr); 121.118 121.119 -static inline int 121.120 -dma_supported(struct device *dev, u64 mask) 121.121 -{ 121.122 - /* 121.123 - * we fall back to GFP_DMA when the mask isn't all 1s, 121.124 - * so we can't guarantee allocations that must be 121.125 - * within a tighter range than GFP_DMA.. 121.126 - */ 121.127 - if(mask < 0x00ffffff) 121.128 - return 0; 121.129 - 121.130 - return 1; 121.131 -} 121.132 +extern int 121.133 +dma_supported(struct device *dev, u64 mask); 121.134 121.135 static inline int 121.136 dma_set_mask(struct device *dev, u64 mask) 121.137 @@ -133,6 +118,7 @@ dma_set_mask(struct device *dev, u64 mas 121.138 return 0; 121.139 } 121.140 121.141 +#ifdef __i386__ 121.142 static inline int 121.143 dma_get_cache_alignment(void) 121.144 { 121.145 @@ -140,6 +126,9 @@ dma_get_cache_alignment(void) 121.146 * maximum possible, to be safe */ 121.147 return (1 << L1_CACHE_SHIFT_MAX); 121.148 } 121.149 +#else 121.150 +extern int dma_get_cache_alignment(void); 121.151 +#endif 121.152 121.153 #define dma_is_consistent(d) (1) 121.154
124.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 124.2 +++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/kmap_types.h Tue Aug 16 10:09:07 2005 -0800 124.3 @@ -0,0 +1,32 @@ 124.4 +#ifndef _ASM_KMAP_TYPES_H 124.5 +#define _ASM_KMAP_TYPES_H 124.6 + 124.7 +#include <linux/config.h> 124.8 + 124.9 +#ifdef CONFIG_DEBUG_HIGHMEM 124.10 +# define D(n) __KM_FENCE_##n , 124.11 +#else 124.12 +# define D(n) 124.13 +#endif 124.14 + 124.15 +enum km_type { 124.16 +D(0) KM_BOUNCE_READ, 124.17 +D(1) KM_SKB_SUNRPC_DATA, 124.18 +D(2) KM_SKB_DATA_SOFTIRQ, 124.19 +D(3) KM_USER0, 124.20 +D(4) KM_USER1, 124.21 +D(5) KM_BIO_SRC_IRQ, 124.22 +D(6) KM_BIO_DST_IRQ, 124.23 +D(7) KM_PTE0, 124.24 +D(8) KM_PTE1, 124.25 +D(9) KM_IRQ0, 124.26 +D(10) KM_IRQ1, 124.27 +D(11) KM_SOFTIRQ0, 124.28 +D(12) KM_SOFTIRQ1, 124.29 +D(13) KM_SWIOTLB, 124.30 +D(14) KM_TYPE_NR 124.31 +}; 124.32 + 124.33 +#undef D 124.34 + 124.35 +#endif
128.1 --- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/pci.h Tue Aug 16 12:15:23 2005 +0800 128.2 +++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/pci.h Tue Aug 16 10:09:07 2005 -0800 128.3 @@ -43,11 +43,8 @@ int pcibios_set_irq_routing(struct pci_d 128.4 128.5 struct pci_dev; 128.6 128.7 -/* The PCI address space does equal the physical memory 128.8 - * address space. The networking and block device layers use 128.9 - * this boolean for bounce buffer decisions. 128.10 - */ 128.11 -#define PCI_DMA_BUS_IS_PHYS (1) 128.12 +/* On Xen we use SWIOTLB instead of blk-specific bounce buffers. */ 128.13 +#define PCI_DMA_BUS_IS_PHYS (0) 128.14 128.15 /* pci_unmap_{page,single} is a nop so... */ 128.16 #define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME)
133.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 133.2 +++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/scatterlist.h Tue Aug 16 10:09:07 2005 -0800 133.3 @@ -0,0 +1,22 @@ 133.4 +#ifndef _I386_SCATTERLIST_H 133.5 +#define _I386_SCATTERLIST_H 133.6 + 133.7 +struct scatterlist { 133.8 + struct page *page; 133.9 + unsigned int offset; 133.10 + unsigned int length; 133.11 + dma_addr_t dma_address; 133.12 + unsigned int dma_length; 133.13 +}; 133.14 + 133.15 +/* These macros should be used after a pci_map_sg call has been done 133.16 + * to get bus addresses of each of the SG entries and their lengths. 133.17 + * You should only work with the number of sg entries pci_map_sg 133.18 + * returns. 133.19 + */ 133.20 +#define sg_dma_address(sg) ((sg)->dma_address) 133.21 +#define sg_dma_len(sg) ((sg)->dma_length) 133.22 + 133.23 +#define ISA_DMA_THRESHOLD (0x00ffffff) 133.24 + 133.25 +#endif /* !(_I386_SCATTERLIST_H) */
134.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 134.2 +++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/swiotlb.h Tue Aug 16 10:09:07 2005 -0800 134.3 @@ -0,0 +1,42 @@ 134.4 +#ifndef _ASM_SWIOTLB_H 134.5 +#define _ASM_SWIOTLB_H 1 134.6 + 134.7 +#include <linux/config.h> 134.8 + 134.9 +/* SWIOTLB interface */ 134.10 + 134.11 +extern dma_addr_t swiotlb_map_single(struct device *hwdev, void *ptr, size_t size, 134.12 + int dir); 134.13 +extern void swiotlb_unmap_single(struct device *hwdev, dma_addr_t dev_addr, 134.14 + size_t size, int dir); 134.15 +extern void swiotlb_sync_single_for_cpu(struct device *hwdev, 134.16 + dma_addr_t dev_addr, 134.17 + size_t size, int dir); 134.18 +extern void swiotlb_sync_single_for_device(struct device *hwdev, 134.19 + dma_addr_t dev_addr, 134.20 + size_t size, int dir); 134.21 +extern void swiotlb_sync_sg_for_cpu(struct device *hwdev, 134.22 + struct scatterlist *sg, int nelems, 134.23 + int dir); 134.24 +extern void swiotlb_sync_sg_for_device(struct device *hwdev, 134.25 + struct scatterlist *sg, int nelems, 134.26 + int dir); 134.27 +extern int swiotlb_map_sg(struct device *hwdev, struct scatterlist *sg, 134.28 + int nents, int direction); 134.29 +extern void swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sg, 134.30 + int nents, int direction); 134.31 +extern int swiotlb_dma_mapping_error(dma_addr_t dma_addr); 134.32 +extern dma_addr_t swiotlb_map_page(struct device *hwdev, struct page *page, 134.33 + unsigned long offset, size_t size, 134.34 + enum dma_data_direction direction); 134.35 +extern void swiotlb_unmap_page(struct device *hwdev, dma_addr_t dma_address, 134.36 + size_t size, enum dma_data_direction direction); 134.37 +extern int swiotlb_dma_supported(struct device *hwdev, u64 mask); 134.38 + 134.39 +#ifdef CONFIG_SWIOTLB 134.40 +extern int swiotlb; 134.41 +#else 134.42 +#define swiotlb 0 134.43 +#endif 134.44 + 134.45 +#endif
135.1 --- a/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/dma-mapping.h Tue Aug 16 12:15:23 2005 +0800 135.2 +++ b/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/dma-mapping.h Tue Aug 16 10:09:07 2005 -0800 135.3 @@ -1,89 +1,1 @@ 135.4 -#ifndef _X8664_DMA_MAPPING_H 135.5 -#define _X8664_DMA_MAPPING_H 1 135.6 - 135.7 -/* 135.8 - * IOMMU interface. See Documentation/DMA-mapping.txt and DMA-API.txt for 135.9 - * documentation. 135.10 - */ 135.11 - 135.12 -#include <linux/config.h> 135.13 - 135.14 -#include <asm/scatterlist.h> 135.15 -#include <asm/io.h> 135.16 -#include <asm/swiotlb.h> 135.17 - 135.18 -extern dma_addr_t bad_dma_address; 135.19 -#define dma_mapping_error(x) \ 135.20 - (swiotlb ? swiotlb_dma_mapping_error(x) : ((x) == bad_dma_address)) 135.21 - 135.22 -void *dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, 135.23 - unsigned gfp); 135.24 -void dma_free_coherent(struct device *dev, size_t size, void *vaddr, 135.25 - dma_addr_t dma_handle); 135.26 - 135.27 -extern dma_addr_t dma_map_single(struct device *hwdev, void *ptr, size_t size, 135.28 - enum dma_data_direction direction); 135.29 -extern void dma_unmap_single(struct device *dev, dma_addr_t addr,size_t size, 135.30 - enum dma_data_direction direction); 135.31 - 135.32 -#define dma_map_page(dev,page,offset,size,dir) \ 135.33 - dma_map_single((dev), page_address(page)+(offset), (size), (dir)) 135.34 - 135.35 -extern void 135.36 -dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, size_t size, 135.37 - enum dma_data_direction direction); 135.38 - 135.39 -extern void 135.40 -dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle, size_t size, 135.41 - enum dma_data_direction direction); 135.42 - 135.43 -static inline void dma_sync_sg_for_cpu(struct device *hwdev, 135.44 - struct scatterlist *sg, 135.45 - int nelems, int direction) 135.46 -{ 135.47 - if (direction == DMA_NONE) 135.48 - out_of_line_bug(); 135.49 - 135.50 - if (swiotlb) 135.51 - return swiotlb_sync_sg_for_cpu(hwdev,sg,nelems,direction); 135.52 - 135.53 - flush_write_buffers(); 135.54 -} 135.55 - 135.56 -static inline void dma_sync_sg_for_device(struct device *hwdev, 135.57 - struct scatterlist *sg, 135.58 - int nelems, int direction) 135.59 -{ 135.60 - if (direction == DMA_NONE) 135.61 - out_of_line_bug(); 135.62 - 135.63 - if (swiotlb) 135.64 - return swiotlb_sync_sg_for_device(hwdev,sg,nelems,direction); 135.65 - 135.66 - flush_write_buffers(); 135.67 -} 135.68 - 135.69 -extern int dma_map_sg(struct device *hwdev, struct scatterlist *sg, 135.70 - int nents, int direction); 135.71 -extern void dma_unmap_sg(struct device *hwdev, struct scatterlist *sg, 135.72 - int nents, int direction); 135.73 - 135.74 -#define dma_unmap_page dma_unmap_single 135.75 - 135.76 -extern int dma_supported(struct device *hwdev, u64 mask); 135.77 -extern int dma_get_cache_alignment(void); 135.78 -#define dma_is_consistent(h) 1 135.79 - 135.80 -static inline int dma_set_mask(struct device *dev, u64 mask) 135.81 -{ 135.82 - if (!dev->dma_mask || !dma_supported(dev, mask)) 135.83 - return -EIO; 135.84 - *dev->dma_mask = mask; 135.85 - return 0; 135.86 -} 135.87 - 135.88 -static inline void dma_cache_sync(void *vaddr, size_t size, enum dma_data_direction dir) 135.89 -{ 135.90 - flush_write_buffers(); 135.91 -} 135.92 -#endif 135.93 +#include <asm-i386/dma-mapping.h>
138.1 --- a/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/pci.h Tue Aug 16 12:15:23 2005 +0800 138.2 +++ b/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/pci.h Tue Aug 16 10:09:07 2005 -0800 138.3 @@ -79,7 +79,9 @@ extern int iommu_sac_force; 138.4 #else 138.5 /* No IOMMU */ 138.6 138.7 -#define PCI_DMA_BUS_IS_PHYS 1 138.8 +/* On Xen we use SWIOTLB instead of blk-specific bounce buffers. */ 138.9 +#define PCI_DMA_BUS_IS_PHYS (0) 138.10 + 138.11 #define pci_dac_dma_supported(pci_dev, mask) 1 138.12 138.13 #define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME)
145.1 --- a/linux-2.6-xen-sparse/include/asm-xen/hypervisor.h Tue Aug 16 12:15:23 2005 +0800 145.2 +++ b/linux-2.6-xen-sparse/include/asm-xen/hypervisor.h Tue Aug 16 10:09:07 2005 -0800 145.3 @@ -134,7 +134,8 @@ void xen_invlpg_mask(cpumask_t *mask, un 145.4 #define EXPORT_PER_CPU_SYMBOL_GPL(var) EXPORT_SYMBOL_GPL(per_cpu__##var) 145.5 #endif /* linux < 2.6.0 */ 145.6 145.7 -void xen_contig_memory(unsigned long vstart, unsigned int order); 145.8 +void xen_create_contiguous_region(unsigned long vstart, unsigned int order); 145.9 +void xen_destroy_contiguous_region(unsigned long vstart, unsigned int order); 145.10 145.11 #ifdef CONFIG_XEN_PHYSDEV_ACCESS 145.12 /* Allocate a contiguous empty region of low memory. Return virtual start. */
158.1 --- a/tools/console/daemon/io.c Tue Aug 16 12:15:23 2005 +0800 158.2 +++ b/tools/console/daemon/io.c Tue Aug 16 10:09:07 2005 -0800 158.3 @@ -87,6 +87,7 @@ struct domain 158.4 { 158.5 int domid; 158.6 int tty_fd; 158.7 + bool is_dead; 158.8 struct buffer buffer; 158.9 struct domain *next; 158.10 }; 158.11 @@ -156,10 +157,12 @@ static struct domain *create_domain(int 158.12 158.13 dom->domid = domid; 158.14 dom->tty_fd = domain_create_tty(dom); 158.15 + dom->is_dead = false; 158.16 dom->buffer.data = 0; 158.17 dom->buffer.size = 0; 158.18 dom->buffer.capacity = 0; 158.19 dom->buffer.max_capacity = 0; 158.20 + dom->next = 0; 158.21 158.22 dolog(LOG_DEBUG, "New domain %d", domid); 158.23 158.24 @@ -206,6 +209,16 @@ static void remove_domain(struct domain 158.25 } 158.26 } 158.27 158.28 +static void remove_dead_domains(struct domain *dom) 158.29 +{ 158.30 + if (dom == NULL) return; 158.31 + remove_dead_domains(dom->next); 158.32 + 158.33 + if (dom->is_dead) { 158.34 + remove_domain(dom); 158.35 + } 158.36 +} 158.37 + 158.38 static void handle_tty_read(struct domain *dom) 158.39 { 158.40 ssize_t len; 158.41 @@ -224,7 +237,7 @@ static void handle_tty_read(struct domai 158.42 if (domain_is_valid(dom->domid)) { 158.43 dom->tty_fd = domain_create_tty(dom); 158.44 } else { 158.45 - remove_domain(dom); 158.46 + dom->is_dead = true; 158.47 } 158.48 } else if (domain_is_valid(dom->domid)) { 158.49 msg.u.control.msg.length = len; 158.50 @@ -235,7 +248,7 @@ static void handle_tty_read(struct domai 158.51 } 158.52 } else { 158.53 close(dom->tty_fd); 158.54 - remove_domain(dom); 158.55 + dom->is_dead = true; 158.56 } 158.57 } 158.58 158.59 @@ -250,7 +263,7 @@ static void handle_tty_write(struct doma 158.60 if (domain_is_valid(dom->domid)) { 158.61 dom->tty_fd = domain_create_tty(dom); 158.62 } else { 158.63 - remove_domain(dom); 158.64 + dom->is_dead = true; 158.65 } 158.66 } else { 158.67 buffer_advance(&dom->buffer, len); 158.68 @@ -316,6 +329,7 @@ void handle_io(void) 158.69 158.70 ret = select(max_fd + 1, &readfds, &writefds, 0, &tv); 158.71 if (tv.tv_sec == 1 && (++num_of_writes % 100) == 0) { 158.72 +#if 0 158.73 /* FIXME */ 158.74 /* This is a nasty hack. xcs does not handle the 158.75 control channels filling up well at all. We'll 158.76 @@ -325,6 +339,7 @@ void handle_io(void) 158.77 going away */ 158.78 tv.tv_usec = 1000; 158.79 select(0, 0, 0, 0, &tv); 158.80 +#endif 158.81 } 158.82 enum_domains(); 158.83 158.84 @@ -333,13 +348,15 @@ void handle_io(void) 158.85 } 158.86 158.87 for (d = dom_head; d; d = d->next) { 158.88 - if (FD_ISSET(d->tty_fd, &readfds)) { 158.89 + if (!d->is_dead && FD_ISSET(d->tty_fd, &readfds)) { 158.90 handle_tty_read(d); 158.91 } 158.92 158.93 - if (FD_ISSET(d->tty_fd, &writefds)) { 158.94 + if (!d->is_dead && FD_ISSET(d->tty_fd, &writefds)) { 158.95 handle_tty_write(d); 158.96 } 158.97 } 158.98 + 158.99 + remove_dead_domains(dom_head); 158.100 } while (ret > -1); 158.101 }
162.1 --- a/tools/debugger/pdb/Domain.ml Tue Aug 16 12:15:23 2005 +0800 162.2 +++ b/tools/debugger/pdb/Domain.ml Tue Aug 16 10:09:07 2005 -0800 162.3 @@ -36,6 +36,7 @@ let string_of_context ctx = 162.4 Printf.sprintf "{domain} domain: %d, vcpu: %d" 162.5 ctx.domain ctx.vcpu 162.6 162.7 +external read_register : context_t -> int -> int32 = "dom_read_register" 162.8 external read_registers : context_t -> registers = "dom_read_registers" 162.9 external write_register : context_t -> register -> int32 -> unit = 162.10 "dom_write_register"
163.1 --- a/tools/debugger/pdb/Domain.mli Tue Aug 16 12:15:23 2005 +0800 163.2 +++ b/tools/debugger/pdb/Domain.mli Tue Aug 16 10:09:07 2005 -0800 163.3 @@ -22,6 +22,7 @@ val get_vcpu : context_t -> int 163.4 163.5 val string_of_context : context_t -> string 163.6 163.7 +val read_register : context_t -> int -> int32 163.8 val read_registers : context_t -> registers 163.9 val write_register : context_t -> register -> int32 -> unit 163.10 val read_memory : context_t -> int32 -> int -> int list
164.1 --- a/tools/debugger/pdb/Makefile Tue Aug 16 12:15:23 2005 +0800 164.2 +++ b/tools/debugger/pdb/Makefile Tue Aug 16 10:09:07 2005 -0800 164.3 @@ -33,7 +33,8 @@ LIBDIRS += ../libxendebug 164.4 LIBS += unix str 164.5 164.6 # bc = byte-code, dc = debug byte-code 164.7 -all : patches dc 164.8 +# patches = patch linux domU source code 164.9 +all : dc 164.10 164.11 SOURCES += pdb_caml_xc.c 164.12 SOURCES += pdb_caml_domain.c pdb_caml_process.c
165.1 --- a/tools/debugger/pdb/PDB.ml Tue Aug 16 12:15:23 2005 +0800 165.2 +++ b/tools/debugger/pdb/PDB.ml Tue Aug 16 10:09:07 2005 -0800 165.3 @@ -219,6 +219,17 @@ let add_default_context sock = 165.4 165.5 (***************************************************************************) 165.6 165.7 +let read_register ctx register = (* register is int32 because of sscanf *) 165.8 + match ctx with 165.9 + | Void -> 0l (* default for startup *) 165.10 + | Domain d -> Domain.read_register d register 165.11 + | Process p -> 165.12 + begin 165.13 + Process.read_register p register; 165.14 + raise No_reply 165.15 + end 165.16 + | _ -> raise (Unimplemented "read registers") 165.17 + 165.18 let read_registers ctx = 165.19 match ctx with 165.20 | Void -> Intel.null_registers (* default for startup *) 165.21 @@ -278,15 +289,43 @@ let step ctx = 165.22 let insert_memory_breakpoint ctx addr len = 165.23 match ctx with 165.24 | Domain d -> Domain.insert_memory_breakpoint d addr len 165.25 - | Process p -> Process.insert_memory_breakpoint p addr len 165.26 + | Process p -> 165.27 + begin 165.28 + Process.insert_memory_breakpoint p addr len; 165.29 + raise No_reply 165.30 + end 165.31 | _ -> raise (Unimplemented "insert memory breakpoint") 165.32 165.33 let remove_memory_breakpoint ctx addr len = 165.34 match ctx with 165.35 | Domain d -> Domain.remove_memory_breakpoint d addr len 165.36 - | Process p -> Process.remove_memory_breakpoint p addr len 165.37 + | Process p -> 165.38 + begin 165.39 + Process.remove_memory_breakpoint p addr len; 165.40 + raise No_reply 165.41 + end 165.42 | _ -> raise (Unimplemented "remove memory breakpoint") 165.43 165.44 +let insert_watchpoint ctx kind addr len = 165.45 + match ctx with 165.46 +(* | Domain d -> Domain.insert_watchpoint d kind addr len TODO *) 165.47 + | Process p -> 165.48 + begin 165.49 + Process.insert_watchpoint p kind addr len; 165.50 + raise No_reply 165.51 + end 165.52 + | _ -> raise (Unimplemented "insert watchpoint") 165.53 + 165.54 +let remove_watchpoint ctx kind addr len = 165.55 + match ctx with 165.56 +(* | Domain d -> Domain.remove_watchpoint d kind addr len TODO *) 165.57 + | Process p -> 165.58 + begin 165.59 + Process.remove_watchpoint p kind addr len; 165.60 + raise No_reply 165.61 + end 165.62 + | _ -> raise (Unimplemented "remove watchpoint") 165.63 + 165.64 165.65 let pause ctx = 165.66 match ctx with
166.1 --- a/tools/debugger/pdb/Process.ml Tue Aug 16 12:15:23 2005 +0800 166.2 +++ b/tools/debugger/pdb/Process.ml Tue Aug 16 10:09:07 2005 -0800 166.3 @@ -54,6 +54,7 @@ let attach_debugger proc_ctx dom_ctx = 166.4 proc_ctx.ring <- Xen_domain.get_ring dom_ctx; 166.5 _attach_debugger proc_ctx 166.6 166.7 +external read_register : context_t -> int -> unit = "proc_read_register" 166.8 external read_registers : context_t -> unit = "proc_read_registers" 166.9 external write_register : context_t -> register -> int32 -> unit = 166.10 "proc_write_register" 166.11 @@ -69,6 +70,10 @@ external insert_memory_breakpoint : cont 166.12 "proc_insert_memory_breakpoint" 166.13 external remove_memory_breakpoint : context_t -> int32 -> int -> unit = 166.14 "proc_remove_memory_breakpoint" 166.15 +external insert_watchpoint : context_t -> int -> int32 -> int -> unit = 166.16 + "proc_insert_watchpoint" 166.17 +external remove_watchpoint : context_t -> int -> int32 -> int -> unit = 166.18 + "proc_remove_watchpoint" 166.19 166.20 let pause ctx = 166.21 pause_target ctx
167.1 --- a/tools/debugger/pdb/Process.mli Tue Aug 16 12:15:23 2005 +0800 167.2 +++ b/tools/debugger/pdb/Process.mli Tue Aug 16 10:09:07 2005 -0800 167.3 @@ -26,7 +26,7 @@ val attach_debugger : context_t -> Xen_d 167.4 val detach_debugger : context_t -> unit 167.5 val pause : context_t -> unit 167.6 167.7 - 167.8 +val read_register : context_t -> int -> unit 167.9 val read_registers : context_t -> unit 167.10 val write_register : context_t -> register -> int32 -> unit 167.11 val read_memory : context_t -> int32 -> int -> unit 167.12 @@ -37,3 +37,5 @@ val step : context_t -> unit 167.13 167.14 val insert_memory_breakpoint : context_t -> int32 -> int -> unit 167.15 val remove_memory_breakpoint : context_t -> int32 -> int -> unit 167.16 +val insert_watchpoint : context_t -> int -> int32 -> int -> unit 167.17 +val remove_watchpoint : context_t -> int -> int32 -> int -> unit
171.1 --- a/tools/debugger/pdb/debugger.ml Tue Aug 16 12:15:23 2005 +0800 171.2 +++ b/tools/debugger/pdb/debugger.ml Tue Aug 16 10:09:07 2005 -0800 171.3 @@ -53,10 +53,20 @@ let gdb_step ctx = 171.4 PDB.step ctx; 171.5 raise No_reply 171.6 171.7 +(** 171.8 + Read Register Command. 171.9 + return register as a 4-byte value. 171.10 + *) 171.11 +let gdb_read_register ctx command = 171.12 + let read_reg register = 171.13 + (Printf.sprintf "%08lx" (Util.flip_int32 (PDB.read_register ctx register))) 171.14 + in 171.15 + Scanf.sscanf command "p%x" read_reg 171.16 + 171.17 171.18 (** 171.19 Read Registers Command. 171.20 - returns 16 4-byte registers in a particular defined by gdb. 171.21 + returns 16 4-byte registers in a particular format defined by gdb. 171.22 *) 171.23 let gdb_read_registers ctx = 171.24 let regs = PDB.read_registers ctx in 171.25 @@ -100,7 +110,7 @@ let gdb_read_memory ctx command = 171.26 with 171.27 Failure s -> "E02" 171.28 in 171.29 - Scanf.sscanf command "m%lx,%d" read_mem 171.30 + Scanf.sscanf command "m%lx,%x" read_mem 171.31 171.32 171.33 171.34 @@ -218,16 +228,24 @@ let pdb_extensions command sock = 171.35 (** 171.36 Insert Breakpoint or Watchpoint Packet 171.37 *) 171.38 + 171.39 +let bwc_watch_write = 102 (* from pdb_module.h *) 171.40 +let bwc_watch_read = 103 171.41 +let bwc_watch_access = 104 171.42 + 171.43 let gdb_insert_bwcpoint ctx command = 171.44 let insert cmd addr length = 171.45 try 171.46 match cmd with 171.47 | 0 -> PDB.insert_memory_breakpoint ctx addr length; "OK" 171.48 + | 2 -> PDB.insert_watchpoint ctx bwc_watch_write addr length; "OK" 171.49 + | 3 -> PDB.insert_watchpoint ctx bwc_watch_read addr length; "OK" 171.50 + | 4 -> PDB.insert_watchpoint ctx bwc_watch_access addr length; "OK" 171.51 | _ -> "" 171.52 with 171.53 Failure s -> "E03" 171.54 in 171.55 - Scanf.sscanf command "Z%d,%lx,%d" insert 171.56 + Scanf.sscanf command "Z%d,%lx,%x" insert 171.57 171.58 (** 171.59 Remove Breakpoint or Watchpoint Packet 171.60 @@ -237,6 +255,9 @@ let gdb_remove_bwcpoint ctx command = 171.61 try 171.62 match cmd with 171.63 | 0 -> PDB.remove_memory_breakpoint ctx addr length; "OK" 171.64 + | 2 -> PDB.remove_watchpoint ctx bwc_watch_write addr length; "OK" 171.65 + | 3 -> PDB.remove_watchpoint ctx bwc_watch_read addr length; "OK" 171.66 + | 4 -> PDB.remove_watchpoint ctx bwc_watch_access addr length; "OK" 171.67 | _ -> "" 171.68 with 171.69 Failure s -> "E04" 171.70 @@ -260,6 +281,7 @@ let process_command command sock = 171.71 | 'k' -> gdb_kill () 171.72 | 'm' -> gdb_read_memory ctx command 171.73 | 'M' -> gdb_write_memory ctx command 171.74 + | 'p' -> gdb_read_register ctx command 171.75 | 'P' -> gdb_write_register ctx command 171.76 | 'q' -> gdb_query command 171.77 | 's' -> gdb_step ctx 171.78 @@ -270,7 +292,7 @@ let process_command command sock = 171.79 | 'Z' -> gdb_insert_bwcpoint ctx command 171.80 | _ -> 171.81 print_endline (Printf.sprintf "unknown gdb command [%s]" command); 171.82 - "E02" 171.83 + "" 171.84 with 171.85 Unimplemented s -> 171.86 print_endline (Printf.sprintf "loser. unimplemented command [%s][%s]"
173.1 --- a/tools/debugger/pdb/linux-2.6-module/debug.c Tue Aug 16 12:15:23 2005 +0800 173.2 +++ b/tools/debugger/pdb/linux-2.6-module/debug.c Tue Aug 16 10:09:07 2005 -0800 173.3 @@ -9,33 +9,143 @@ 173.4 #include <asm-i386/kdebug.h> 173.5 #include <asm-xen/asm-i386/processor.h> 173.6 #include <asm-xen/asm-i386/ptrace.h> 173.7 +#include <asm-xen/asm-i386/tlbflush.h> 173.8 #include <asm-xen/xen-public/xen.h> 173.9 #include "pdb_module.h" 173.10 #include "pdb_debug.h" 173.11 173.12 -#define BWC_DEBUG 1 173.13 -#define BWC_INT3 3 173.14 + 173.15 +static int pdb_debug_fn (struct pt_regs *regs, long error_code, 173.16 + unsigned int condition); 173.17 +static int pdb_int3_fn (struct pt_regs *regs, long error_code); 173.18 +static int pdb_page_fault_fn (struct pt_regs *regs, long error_code, 173.19 + unsigned int condition); 173.20 + 173.21 +/***********************************************************************/ 173.22 + 173.23 typedef struct bwcpoint /* break/watch/catch point */ 173.24 { 173.25 struct list_head list; 173.26 memory_t address; 173.27 - u32 domain; 173.28 + int length; 173.29 + 173.30 + u8 type; /* BWC_??? */ 173.31 + u8 mode; /* for BWC_PAGE, the current protection mode */ 173.32 u32 process; 173.33 - u8 old_value; /* old value for software bkpt */ 173.34 - u8 type; /* BWC_??? */ 173.35 + u8 error; /* error occured when enabling: don't disable. */ 173.36 + 173.37 + /* original values */ 173.38 + u8 orig_bkpt; /* single byte breakpoint */ 173.39 + pte_t orig_pte; 173.40 + 173.41 + struct list_head watchpt_read_list; /* read watchpoints on this page */ 173.42 + struct list_head watchpt_write_list; /* write */ 173.43 + struct list_head watchpt_access_list; /* access */ 173.44 + struct list_head watchpt_disabled_list; /* disabled */ 173.45 + 173.46 + struct bwcpoint *parent; /* watchpoint: bwc_watch (the page) */ 173.47 + struct bwcpoint *watchpoint; /* bwc_watch_step: original watchpoint */ 173.48 } bwcpoint_t, *bwcpoint_p; 173.49 173.50 -static bwcpoint_t bwcpoint_list; 173.51 +static struct list_head bwcpoint_list = LIST_HEAD_INIT(bwcpoint_list); 173.52 + 173.53 +#define _pdb_bwcpoint_alloc(_var) \ 173.54 +{ \ 173.55 + if ( (_var = kmalloc(sizeof(bwcpoint_t), GFP_KERNEL)) == NULL ) \ 173.56 + printk("error: unable to allocate memory %d\n", __LINE__); \ 173.57 + else { \ 173.58 + memset(_var, 0, sizeof(bwcpoint_t)); \ 173.59 + INIT_LIST_HEAD(&_var->watchpt_read_list); \ 173.60 + INIT_LIST_HEAD(&_var->watchpt_write_list); \ 173.61 + INIT_LIST_HEAD(&_var->watchpt_access_list); \ 173.62 + INIT_LIST_HEAD(&_var->watchpt_disabled_list); \ 173.63 + } \ 173.64 +} 173.65 + 173.66 +/***********************************************************************/ 173.67 + 173.68 +static void _pdb_bwc_print_list (struct list_head *, char *, int); 173.69 + 173.70 +static void 173.71 +_pdb_bwc_print (bwcpoint_p bwc, char *label, int level) 173.72 +{ 173.73 + printk("%s%03d 0x%08lx:0x%02x %c\n", label, bwc->type, 173.74 + bwc->address, bwc->length, bwc->error ? 'e' : '-'); 173.75 + 173.76 + if ( !list_empty(&bwc->watchpt_read_list) ) 173.77 + _pdb_bwc_print_list(&bwc->watchpt_read_list, "r", level); 173.78 + if ( !list_empty(&bwc->watchpt_write_list) ) 173.79 + _pdb_bwc_print_list(&bwc->watchpt_write_list, "w", level); 173.80 + if ( !list_empty(&bwc->watchpt_access_list) ) 173.81 + _pdb_bwc_print_list(&bwc->watchpt_access_list, "a", level); 173.82 + if ( !list_empty(&bwc->watchpt_disabled_list) ) 173.83 + _pdb_bwc_print_list(&bwc->watchpt_disabled_list, "d", level); 173.84 +} 173.85 + 173.86 +static void 173.87 +_pdb_bwc_print_list (struct list_head *bwc_list, char *label, int level) 173.88 +{ 173.89 + struct list_head *ptr; 173.90 + int counter = 0; 173.91 + 173.92 + list_for_each(ptr, bwc_list) 173.93 + { 173.94 + bwcpoint_p bwc = list_entry(ptr, bwcpoint_t, list); 173.95 + printk(" %s[%02d]%s ", level > 0 ? " " : "", counter++, 173.96 + level > 0 ? "" : " "); 173.97 + _pdb_bwc_print(bwc, label, level+1); 173.98 + } 173.99 + 173.100 + if (counter == 0) 173.101 + { 173.102 + printk(" empty list\n"); 173.103 + } 173.104 +} 173.105 173.106 void 173.107 -pdb_initialize_bwcpoint (void) 173.108 +pdb_bwc_print_list (void) 173.109 { 173.110 - memset((void *) &bwcpoint_list, 0, sizeof(bwcpoint_t)); 173.111 - INIT_LIST_HEAD(&bwcpoint_list.list); 173.112 - 173.113 - return; 173.114 + _pdb_bwc_print_list(&bwcpoint_list, " ", 0); 173.115 } 173.116 173.117 +bwcpoint_p 173.118 +pdb_search_watchpoint (u32 process, memory_t address) 173.119 +{ 173.120 + bwcpoint_p bwc_watch = (bwcpoint_p) 0; 173.121 + bwcpoint_p bwc_entry = (bwcpoint_p) 0; 173.122 + struct list_head *ptr; 173.123 + 173.124 + list_for_each(ptr, &bwcpoint_list) /* find bwc page entry */ 173.125 + { 173.126 + bwc_watch = list_entry(ptr, bwcpoint_t, list); 173.127 + if (bwc_watch->address == (address & PAGE_MASK)) break; 173.128 + } 173.129 + 173.130 + if ( !bwc_watch ) 173.131 + { 173.132 + return (bwcpoint_p) 0; 173.133 + } 173.134 + 173.135 +#define __pdb_search_watchpoint_list(__list) \ 173.136 + list_for_each(ptr, (__list)) \ 173.137 + { \ 173.138 + bwc_entry = list_entry(ptr, bwcpoint_t, list); \ 173.139 + if ( bwc_entry->process == process && \ 173.140 + bwc_entry->address <= address && \ 173.141 + bwc_entry->address + bwc_entry->length > address ) \ 173.142 + return bwc_entry; \ 173.143 + } 173.144 + 173.145 + __pdb_search_watchpoint_list(&bwc_watch->watchpt_read_list); 173.146 + __pdb_search_watchpoint_list(&bwc_watch->watchpt_write_list); 173.147 + __pdb_search_watchpoint_list(&bwc_watch->watchpt_access_list); 173.148 + 173.149 +#undef __pdb_search_watchpoint_list 173.150 + 173.151 + return (bwcpoint_p) 0; 173.152 +} 173.153 + 173.154 +/*************************************************************/ 173.155 173.156 int 173.157 pdb_suspend (struct task_struct *target) 173.158 @@ -137,6 +247,35 @@ static void 173.159 } 173.160 173.161 int 173.162 +pdb_read_register (struct task_struct *target, pdb_op_rd_reg_p op) 173.163 +{ 173.164 + int rc = 0; 173.165 + 173.166 + switch (op->reg) 173.167 + { 173.168 + case 0: op->value = _pdb_get_register(target, LINUX_EAX); break; 173.169 + case 1: op->value = _pdb_get_register(target, LINUX_ECX); break; 173.170 + case 2: op->value = _pdb_get_register(target, LINUX_EDX); break; 173.171 + case 3: op->value = _pdb_get_register(target, LINUX_EBX); break; 173.172 + case 4: op->value = _pdb_get_register(target, LINUX_ESP); break; 173.173 + case 5: op->value = _pdb_get_register(target, LINUX_EBP); break; 173.174 + case 6: op->value = _pdb_get_register(target, LINUX_ESI); break; 173.175 + case 7: op->value = _pdb_get_register(target, LINUX_EDI); break; 173.176 + case 8: op->value = _pdb_get_register(target, LINUX_EIP); break; 173.177 + case 9: op->value = _pdb_get_register(target, LINUX_EFL); break; 173.178 + 173.179 + case 10: op->value = _pdb_get_register(target, LINUX_CS); break; 173.180 + case 11: op->value = _pdb_get_register(target, LINUX_SS); break; 173.181 + case 12: op->value = _pdb_get_register(target, LINUX_DS); break; 173.182 + case 13: op->value = _pdb_get_register(target, LINUX_ES); break; 173.183 + case 14: op->value = _pdb_get_register(target, LINUX_FS); break; 173.184 + case 15: op->value = _pdb_get_register(target, LINUX_GS); break; 173.185 + } 173.186 + 173.187 + return rc; 173.188 +} 173.189 + 173.190 +int 173.191 pdb_read_registers (struct task_struct *target, pdb_op_rd_regs_p op) 173.192 { 173.193 int rc = 0; 173.194 @@ -209,18 +348,14 @@ pdb_step (struct task_struct *target) 173.195 eflags |= X86_EFLAGS_TF; 173.196 _pdb_set_register(target, LINUX_EFL, eflags); 173.197 173.198 - bkpt = kmalloc(sizeof(bwcpoint_t), GFP_KERNEL); 173.199 - if ( bkpt == NULL ) 173.200 - { 173.201 - printk("error: unable to allocation memory\n"); 173.202 - return -1; 173.203 - } 173.204 + _pdb_bwcpoint_alloc(bkpt); 173.205 + if ( bkpt == NULL ) return -1; 173.206 173.207 bkpt->process = target->pid; 173.208 bkpt->address = 0; 173.209 bkpt->type = BWC_DEBUG; 173.210 173.211 - list_add(&bkpt->list, &bwcpoint_list.list); 173.212 + list_add_tail(&bkpt->list, &bwcpoint_list); 173.213 173.214 wake_up_process(target); 173.215 173.216 @@ -237,31 +372,27 @@ pdb_insert_memory_breakpoint (struct tas 173.217 173.218 printk("insert breakpoint %d:%lx len: %d\n", target->pid, address, length); 173.219 173.220 - bkpt = kmalloc(sizeof(bwcpoint_t), GFP_KERNEL); 173.221 - if ( bkpt == NULL ) 173.222 + if ( length != 1 ) 173.223 { 173.224 - printk("error: unable to allocation memory\n"); 173.225 + printk("error: breakpoint length should be 1\n"); 173.226 return -1; 173.227 } 173.228 173.229 - if ( length != 1 ) 173.230 - { 173.231 - printk("error: breakpoint length should be 1\n"); 173.232 - kfree(bkpt); 173.233 - return -1; 173.234 - } 173.235 + _pdb_bwcpoint_alloc(bkpt); 173.236 + if ( bkpt == NULL ) return -1; 173.237 173.238 bkpt->process = target->pid; 173.239 bkpt->address = address; 173.240 bkpt->type = BWC_INT3; 173.241 173.242 - pdb_access_memory(target, address, &bkpt->old_value, 1, 0); 173.243 - pdb_access_memory(target, address, &breakpoint_opcode, 1, 1); 173.244 + pdb_access_memory(target, address, &bkpt->orig_bkpt, 1, PDB_MEM_READ); 173.245 + pdb_access_memory(target, address, &breakpoint_opcode, 1, PDB_MEM_WRITE); 173.246 173.247 - list_add(&bkpt->list, &bwcpoint_list.list); 173.248 + list_add_tail(&bkpt->list, &bwcpoint_list); 173.249 173.250 printk("breakpoint_set %d:%lx OLD: 0x%x\n", 173.251 - target->pid, address, bkpt->old_value); 173.252 + target->pid, address, bkpt->orig_bkpt); 173.253 + pdb_bwc_print_list(); 173.254 173.255 return rc; 173.256 } 173.257 @@ -276,7 +407,7 @@ pdb_remove_memory_breakpoint (struct tas 173.258 printk ("remove breakpoint %d:%lx\n", target->pid, address); 173.259 173.260 struct list_head *entry; 173.261 - list_for_each(entry, &bwcpoint_list.list) 173.262 + list_for_each(entry, &bwcpoint_list) 173.263 { 173.264 bkpt = list_entry(entry, bwcpoint_t, list); 173.265 if ( target->pid == bkpt->process && 173.266 @@ -285,17 +416,223 @@ pdb_remove_memory_breakpoint (struct tas 173.267 break; 173.268 } 173.269 173.270 - if (bkpt == &bwcpoint_list || bkpt == NULL) 173.271 + if (entry == &bwcpoint_list) 173.272 { 173.273 printk ("error: no breakpoint found\n"); 173.274 return -1; 173.275 } 173.276 173.277 + pdb_access_memory(target, address, &bkpt->orig_bkpt, 1, PDB_MEM_WRITE); 173.278 + 173.279 list_del(&bkpt->list); 173.280 + kfree(bkpt); 173.281 + 173.282 + pdb_bwc_print_list(); 173.283 + 173.284 + return rc; 173.285 +} 173.286 + 173.287 +#define PDB_PTE_UPDATE 1 173.288 +#define PDB_PTE_RESTORE 2 173.289 + 173.290 +int 173.291 +pdb_change_pte (struct task_struct *target, bwcpoint_p bwc, int mode) 173.292 +{ 173.293 + int rc = 0; 173.294 + pgd_t *pgd; 173.295 + pud_t *pud; 173.296 + pmd_t *pmd; 173.297 + pte_t *ptep; 173.298 + 173.299 + pgd = pgd_offset(target->mm, bwc->address); 173.300 + if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd))) return -1; 173.301 + 173.302 + pud = pud_offset(pgd, bwc->address); 173.303 + if (pud_none(*pud) || unlikely(pud_bad(*pud))) return -2; 173.304 + 173.305 + pmd = pmd_offset(pud, bwc->address); 173.306 + if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd))) return -3; 173.307 + 173.308 + ptep = pte_offset_map(pmd, bwc->address); 173.309 + if (!ptep) return -4; 173.310 + 173.311 + switch ( mode ) 173.312 + { 173.313 + case PDB_PTE_UPDATE: /* added or removed a watchpoint. update pte. */ 173.314 + { 173.315 + pte_t new_pte; 173.316 + 173.317 + if ( pte_val(bwc->parent->orig_pte) == 0 ) /* new watchpoint page */ 173.318 + { 173.319 + bwc->parent->orig_pte = *ptep; 173.320 + } 173.321 + 173.322 + new_pte = bwc->parent->orig_pte; 173.323 + 173.324 + if ( !list_empty(&bwc->parent->watchpt_read_list) || 173.325 + !list_empty(&bwc->parent->watchpt_access_list) ) 173.326 + { 173.327 + new_pte = pte_rdprotect(new_pte); 173.328 + } 173.329 + 173.330 + if ( !list_empty(&bwc->parent->watchpt_write_list) || 173.331 + !list_empty(&bwc->parent->watchpt_access_list) ) 173.332 + { 173.333 + new_pte = pte_wrprotect(new_pte); 173.334 + } 173.335 + 173.336 + if ( pte_val(new_pte) != pte_val(*ptep) ) 173.337 + { 173.338 + *ptep = new_pte; 173.339 + flush_tlb_mm(target->mm); 173.340 + } 173.341 + break; 173.342 + } 173.343 + case PDB_PTE_RESTORE : /* suspend watchpoint by restoring original pte */ 173.344 + { 173.345 + *ptep = bwc->parent->orig_pte; 173.346 + flush_tlb_mm(target->mm); 173.347 + break; 173.348 + } 173.349 + default : 173.350 + { 173.351 + printk("(linux) unknown mode %d %d\n", mode, __LINE__); 173.352 + break; 173.353 + } 173.354 + } 173.355 + 173.356 + pte_unmap(ptep); /* can i flush the tlb before pte_unmap? */ 173.357 + 173.358 + return rc; 173.359 +} 173.360 + 173.361 +int 173.362 +pdb_insert_watchpoint (struct task_struct *target, pdb_op_watchpt_p watchpt) 173.363 +{ 173.364 + int rc = 0; 173.365 + 173.366 + bwcpoint_p bwc_watch; 173.367 + bwcpoint_p bwc_entry; 173.368 + struct list_head *ptr; 173.369 + unsigned long page = watchpt->address & PAGE_MASK; 173.370 + struct list_head *watchpoint_list; 173.371 + 173.372 + printk("insert watchpoint: %d %x %x\n", 173.373 + watchpt->type, watchpt->address, watchpt->length); 173.374 + 173.375 + list_for_each(ptr, &bwcpoint_list) /* find existing bwc page entry */ 173.376 + { 173.377 + bwc_watch = list_entry(ptr, bwcpoint_t, list); 173.378 + 173.379 + if (bwc_watch->address == page) goto got_bwc_watch; 173.380 + } 173.381 173.382 - pdb_access_memory(target, address, &bkpt->old_value, 1, 1); 173.383 + _pdb_bwcpoint_alloc(bwc_watch); /* create new bwc:watch */ 173.384 + if ( bwc_watch == NULL ) return -1; 173.385 + 173.386 + bwc_watch->type = BWC_WATCH; 173.387 + bwc_watch->process = target->pid; 173.388 + bwc_watch->address = page; 173.389 + 173.390 + list_add_tail(&bwc_watch->list, &bwcpoint_list); 173.391 + 173.392 + got_bwc_watch: 173.393 + 173.394 + switch (watchpt->type) 173.395 + { 173.396 + case BWC_WATCH_READ: 173.397 + watchpoint_list = &bwc_watch->watchpt_read_list; break; 173.398 + case BWC_WATCH_WRITE: 173.399 + watchpoint_list = &bwc_watch->watchpt_write_list; break; 173.400 + case BWC_WATCH_ACCESS: 173.401 + watchpoint_list = &bwc_watch->watchpt_access_list; break; 173.402 + default: 173.403 + printk("unknown type %d\n", watchpt->type); return -2; 173.404 + } 173.405 + 173.406 + _pdb_bwcpoint_alloc(bwc_entry); /* create new bwc:entry */ 173.407 + if ( bwc_entry == NULL ) return -1; 173.408 + 173.409 + bwc_entry->process = target->pid; 173.410 + bwc_entry->address = watchpt->address; 173.411 + bwc_entry->length = watchpt->length; 173.412 + bwc_entry->type = watchpt->type; 173.413 + bwc_entry->parent = bwc_watch; 173.414 + 173.415 + list_add_tail(&bwc_entry->list, watchpoint_list); 173.416 + pdb_change_pte(target, bwc_entry, PDB_PTE_UPDATE); 173.417 + 173.418 + pdb_bwc_print_list(); 173.419 + 173.420 + return rc; 173.421 +} 173.422 + 173.423 +int 173.424 +pdb_remove_watchpoint (struct task_struct *target, pdb_op_watchpt_p watchpt) 173.425 +{ 173.426 + int rc = 0; 173.427 + bwcpoint_p bwc_watch = (bwcpoint_p) NULL; 173.428 + bwcpoint_p bwc_entry = (bwcpoint_p) NULL; 173.429 + unsigned long page = watchpt->address & PAGE_MASK; 173.430 + struct list_head *ptr; 173.431 + struct list_head *watchpoint_list; 173.432 + 173.433 + printk("remove watchpoint: %d %x %x\n", 173.434 + watchpt->type, watchpt->address, watchpt->length); 173.435 173.436 - kfree(bkpt); 173.437 + list_for_each(ptr, &bwcpoint_list) /* find bwc page entry */ 173.438 + { 173.439 + bwc_watch = list_entry(ptr, bwcpoint_t, list); 173.440 + if (bwc_watch->address == page) break; 173.441 + } 173.442 + 173.443 + if ( !bwc_watch ) 173.444 + { 173.445 + printk("(linux) delete watchpoint: can't find bwc page 0x%08x\n", 173.446 + watchpt->address); 173.447 + return -1; 173.448 + } 173.449 + 173.450 + switch (watchpt->type) 173.451 + { 173.452 + case BWC_WATCH_READ: 173.453 + watchpoint_list = &bwc_watch->watchpt_read_list; break; 173.454 + case BWC_WATCH_WRITE: 173.455 + watchpoint_list = &bwc_watch->watchpt_write_list; break; 173.456 + case BWC_WATCH_ACCESS: 173.457 + watchpoint_list = &bwc_watch->watchpt_access_list; break; 173.458 + default: 173.459 + printk("unknown type %d\n", watchpt->type); return -2; 173.460 + } 173.461 + 173.462 + list_for_each(ptr, watchpoint_list) /* find watchpoint */ 173.463 + { 173.464 + bwc_entry = list_entry(ptr, bwcpoint_t, list); 173.465 + if ( bwc_entry->address == watchpt->address && 173.466 + bwc_entry->length == watchpt->length ) break; 173.467 + } 173.468 + 173.469 + if ( !bwc_entry ) /* or ptr == watchpoint_list */ 173.470 + { 173.471 + printk("(linux) delete watchpoint: can't find watchpoint 0x%08x\n", 173.472 + watchpt->address); 173.473 + return -1; 173.474 + } 173.475 + 173.476 + list_del(&bwc_entry->list); 173.477 + pdb_change_pte(target, bwc_entry, PDB_PTE_UPDATE); 173.478 + kfree(bwc_entry); 173.479 + 173.480 + 173.481 + if ( list_empty(&bwc_watch->watchpt_read_list) && 173.482 + list_empty(&bwc_watch->watchpt_write_list) && 173.483 + list_empty(&bwc_watch->watchpt_access_list) ) 173.484 + { 173.485 + list_del(&bwc_watch->list); 173.486 + kfree(bwc_watch); 173.487 + } 173.488 + 173.489 + pdb_bwc_print_list(); 173.490 173.491 return rc; 173.492 } 173.493 @@ -312,16 +649,24 @@ pdb_exceptions_notify (struct notifier_b 173.494 switch (val) 173.495 { 173.496 case DIE_DEBUG: 173.497 - if (pdb_debug_fn(args->regs, args->trapnr, args->err)) 173.498 + if ( pdb_debug_fn(args->regs, args->trapnr, args->err) ) 173.499 return NOTIFY_STOP; 173.500 break; 173.501 case DIE_TRAP: 173.502 - if (args->trapnr == 3 && pdb_int3_fn(args->regs, args->err)) 173.503 + if ( args->trapnr == 3 && pdb_int3_fn(args->regs, args->err) ) 173.504 return NOTIFY_STOP; 173.505 break; 173.506 case DIE_INT3: /* without kprobes, we should never see DIE_INT3 */ 173.507 + if ( pdb_int3_fn(args->regs, args->err) ) 173.508 + return NOTIFY_STOP; 173.509 + break; 173.510 + case DIE_PAGE_FAULT: 173.511 + if ( pdb_page_fault_fn(args->regs, args->trapnr, args->err) ) 173.512 + return NOTIFY_STOP; 173.513 + break; 173.514 case DIE_GPF: 173.515 - case DIE_PAGE_FAULT: 173.516 + printk("---------------GPF\n"); 173.517 + break; 173.518 default: 173.519 break; 173.520 } 173.521 @@ -330,70 +675,110 @@ pdb_exceptions_notify (struct notifier_b 173.522 } 173.523 173.524 173.525 -int 173.526 +static int 173.527 pdb_debug_fn (struct pt_regs *regs, long error_code, 173.528 unsigned int condition) 173.529 { 173.530 pdb_response_t resp; 173.531 bwcpoint_p bkpt = NULL; 173.532 + struct list_head *entry; 173.533 173.534 - struct list_head *entry; 173.535 - list_for_each(entry, &bwcpoint_list.list) 173.536 + printk("pdb_debug_fn\n"); 173.537 + 173.538 + list_for_each(entry, &bwcpoint_list) 173.539 { 173.540 bkpt = list_entry(entry, bwcpoint_t, list); 173.541 if ( current->pid == bkpt->process && 173.542 - bkpt->type == BWC_DEBUG ) 173.543 + (bkpt->type == BWC_DEBUG || /* single step */ 173.544 + bkpt->type == BWC_WATCH_STEP)) /* single step over watchpoint */ 173.545 break; 173.546 } 173.547 173.548 - if (bkpt == &bwcpoint_list || bkpt == NULL) 173.549 + if (entry == &bwcpoint_list) 173.550 { 173.551 printk("not my debug 0x%x 0x%lx\n", current->pid, regs->eip); 173.552 return 0; 173.553 } 173.554 173.555 - list_del(&bkpt->list); 173.556 - 173.557 pdb_suspend(current); 173.558 173.559 - printk("(pdb) debug pid: %d, eip: 0x%08lx\n", current->pid, regs->eip); 173.560 + printk("(pdb) %s pid: %d, eip: 0x%08lx\n", 173.561 + bkpt->type == BWC_DEBUG ? "debug" : "watch-step", 173.562 + current->pid, regs->eip); 173.563 173.564 regs->eflags &= ~X86_EFLAGS_TF; 173.565 set_tsk_thread_flag(current, TIF_SINGLESTEP); 173.566 173.567 - resp.operation = PDB_OPCODE_STEP; 173.568 + switch (bkpt->type) 173.569 + { 173.570 + case BWC_DEBUG: 173.571 + resp.operation = PDB_OPCODE_STEP; 173.572 + break; 173.573 + case BWC_WATCH_STEP: 173.574 + { 173.575 + struct list_head *watchpoint_list; 173.576 + bwcpoint_p watch_page = bkpt->watchpoint->parent; 173.577 + 173.578 + switch (bkpt->watchpoint->type) 173.579 + { 173.580 + case BWC_WATCH_READ: 173.581 + watchpoint_list = &watch_page->watchpt_read_list; break; 173.582 + case BWC_WATCH_WRITE: 173.583 + watchpoint_list = &watch_page->watchpt_write_list; break; 173.584 + case BWC_WATCH_ACCESS: 173.585 + watchpoint_list = &watch_page->watchpt_access_list; break; 173.586 + default: 173.587 + printk("unknown type %d\n", bkpt->watchpoint->type); return 0; 173.588 + } 173.589 + 173.590 + resp.operation = PDB_OPCODE_WATCHPOINT; 173.591 + list_del_init(&bkpt->watchpoint->list); 173.592 + list_add_tail(&bkpt->watchpoint->list, watchpoint_list); 173.593 + pdb_change_pte(current, bkpt->watchpoint, PDB_PTE_UPDATE); 173.594 + pdb_bwc_print_list(); 173.595 + break; 173.596 + } 173.597 + default: 173.598 + printk("unknown breakpoint type %d %d\n", __LINE__, bkpt->type); 173.599 + return 0; 173.600 + } 173.601 + 173.602 resp.process = current->pid; 173.603 resp.status = PDB_RESPONSE_OKAY; 173.604 173.605 pdb_send_response(&resp); 173.606 173.607 + list_del(&bkpt->list); 173.608 + kfree(bkpt); 173.609 + 173.610 return 1; 173.611 } 173.612 173.613 173.614 -int 173.615 +static int 173.616 pdb_int3_fn (struct pt_regs *regs, long error_code) 173.617 { 173.618 pdb_response_t resp; 173.619 bwcpoint_p bkpt = NULL; 173.620 + memory_t address = regs->eip - 1; 173.621 173.622 struct list_head *entry; 173.623 - list_for_each(entry, &bwcpoint_list.list) 173.624 + list_for_each(entry, &bwcpoint_list) 173.625 { 173.626 bkpt = list_entry(entry, bwcpoint_t, list); 173.627 if ( current->pid == bkpt->process && 173.628 - regs->eip == bkpt->address && 173.629 + address == bkpt->address && 173.630 bkpt->type == BWC_INT3 ) 173.631 break; 173.632 } 173.633 173.634 - if (bkpt == &bwcpoint_list || bkpt == NULL) 173.635 + if (entry == &bwcpoint_list) 173.636 { 173.637 - printk("not my int3 bkpt 0x%x 0x%lx\n", current->pid, regs->eip); 173.638 + printk("not my int3 bkpt 0x%x 0x%lx\n", current->pid, address); 173.639 return 0; 173.640 } 173.641 173.642 - printk("(pdb) int3 pid: %d, eip: 0x%08lx\n", current->pid, regs->eip); 173.643 + printk("(pdb) int3 pid: %d, eip: 0x%08lx\n", current->pid, address); 173.644 173.645 pdb_suspend(current); 173.646 173.647 @@ -406,6 +791,54 @@ pdb_int3_fn (struct pt_regs *regs, long 173.648 return 1; 173.649 } 173.650 173.651 +static int 173.652 +pdb_page_fault_fn (struct pt_regs *regs, long error_code, 173.653 + unsigned int condition) 173.654 +{ 173.655 + unsigned long cr2; 173.656 + unsigned long cr3; 173.657 + bwcpoint_p bwc; 173.658 + bwcpoint_p watchpt; 173.659 + bwcpoint_p bkpt; 173.660 + 173.661 + __asm__ __volatile__ ("movl %%cr3,%0" : "=r" (cr3) : ); 173.662 + __asm__ __volatile__ ("movl %%cr2,%0" : "=r" (cr2) : ); 173.663 + 173.664 + bwc = pdb_search_watchpoint(current->pid, cr2); 173.665 + if ( !bwc ) 173.666 + { 173.667 + return 0; /* not mine */ 173.668 + } 173.669 + 173.670 + printk("page_fault cr2:%08lx err:%lx eip:%08lx\n", 173.671 + cr2, error_code, regs->eip); 173.672 + 173.673 + /* disable the watchpoint */ 173.674 + watchpt = bwc->watchpoint; 173.675 + list_del_init(&bwc->list); 173.676 + list_add_tail(&bwc->list, &bwc->parent->watchpt_disabled_list); 173.677 + pdb_change_pte(current, bwc, PDB_PTE_RESTORE); 173.678 + 173.679 + /* single step the faulting instruction */ 173.680 + regs->eflags |= X86_EFLAGS_TF; 173.681 + 173.682 + /* create a bwcpoint entry so we know what to do once we regain control */ 173.683 + _pdb_bwcpoint_alloc(bkpt); 173.684 + if ( bkpt == NULL ) return -1; 173.685 + 173.686 + bkpt->process = current->pid; 173.687 + bkpt->address = 0; 173.688 + bkpt->type = BWC_WATCH_STEP; 173.689 + bkpt->watchpoint = bwc; 173.690 + 173.691 + /* add to head so we see it first the next time we break */ 173.692 + list_add(&bkpt->list, &bwcpoint_list); 173.693 + 173.694 + pdb_bwc_print_list(); 173.695 + return 1; 173.696 +} 173.697 + 173.698 + 173.699 /* 173.700 * Local variables: 173.701 * mode: C
174.1 --- a/tools/debugger/pdb/linux-2.6-module/module.c Tue Aug 16 12:15:23 2005 +0800 174.2 +++ b/tools/debugger/pdb/linux-2.6-module/module.c Tue Aug 16 10:09:07 2005 -0800 174.3 @@ -98,6 +98,11 @@ pdb_process_request (pdb_request_t *requ 174.4 printk("(linux) detach 0x%x\n", request->process); 174.5 resp.status = PDB_RESPONSE_OKAY; 174.6 break; 174.7 + case PDB_OPCODE_RD_REG : 174.8 + resp.u.rd_reg.reg = request->u.rd_reg.reg; 174.9 + pdb_read_register(target, &resp.u.rd_reg); 174.10 + resp.status = PDB_RESPONSE_OKAY; 174.11 + break; 174.12 case PDB_OPCODE_RD_REGS : 174.13 pdb_read_registers(target, &resp.u.rd_regs); 174.14 resp.status = PDB_RESPONSE_OKAY; 174.15 @@ -108,14 +113,16 @@ pdb_process_request (pdb_request_t *requ 174.16 break; 174.17 case PDB_OPCODE_RD_MEM : 174.18 pdb_access_memory(target, request->u.rd_mem.address, 174.19 - &resp.u.rd_mem.data, request->u.rd_mem.length, 0); 174.20 + &resp.u.rd_mem.data, request->u.rd_mem.length, 174.21 + PDB_MEM_READ); 174.22 resp.u.rd_mem.address = request->u.rd_mem.address; 174.23 resp.u.rd_mem.length = request->u.rd_mem.length; 174.24 resp.status = PDB_RESPONSE_OKAY; 174.25 break; 174.26 case PDB_OPCODE_WR_MEM : 174.27 pdb_access_memory(target, request->u.wr_mem.address, 174.28 - &request->u.wr_mem.data, request->u.wr_mem.length, 1); 174.29 + &request->u.wr_mem.data, request->u.wr_mem.length, 174.30 + PDB_MEM_WRITE); 174.31 resp.status = PDB_RESPONSE_OKAY; 174.32 break; 174.33 case PDB_OPCODE_CONTINUE : 174.34 @@ -137,6 +144,14 @@ pdb_process_request (pdb_request_t *requ 174.35 request->u.bkpt.length); 174.36 resp.status = PDB_RESPONSE_OKAY; 174.37 break; 174.38 + case PDB_OPCODE_SET_WATCHPT : 174.39 + pdb_insert_watchpoint(target, &request->u.watchpt); 174.40 + resp.status = PDB_RESPONSE_OKAY; 174.41 + break; 174.42 + case PDB_OPCODE_CLR_WATCHPT : 174.43 + pdb_remove_watchpoint(target, &request->u.watchpt); 174.44 + resp.status = PDB_RESPONSE_OKAY; 174.45 + break; 174.46 default: 174.47 printk("(pdb) unknown request operation %d\n", request->operation); 174.48 resp.status = PDB_RESPONSE_ERROR; 174.49 @@ -249,8 +264,6 @@ pdb_initialize (void) 174.50 174.51 printk("----\npdb initialize %s %s\n", __DATE__, __TIME__); 174.52 174.53 - pdb_initialize_bwcpoint(); 174.54 - 174.55 /* 174.56 if ( xen_start_info.flags & SIF_INITDOMAIN ) 174.57 return 1;
175.1 --- a/tools/debugger/pdb/linux-2.6-module/pdb_debug.h Tue Aug 16 12:15:23 2005 +0800 175.2 +++ b/tools/debugger/pdb/linux-2.6-module/pdb_debug.h Tue Aug 16 10:09:07 2005 -0800 175.3 @@ -6,6 +6,7 @@ 175.4 void pdb_initialize_bwcpoint (void); 175.5 int pdb_suspend (struct task_struct *target); 175.6 int pdb_resume (struct task_struct *target); 175.7 +int pdb_read_register (struct task_struct *target, pdb_op_rd_reg_p op); 175.8 int pdb_read_registers (struct task_struct *target, pdb_op_rd_regs_p op); 175.9 int pdb_write_register (struct task_struct *target, pdb_op_wr_reg_p op); 175.10 int pdb_read_memory (struct task_struct *target, pdb_op_rd_mem_req_p req, 175.11 @@ -20,14 +21,14 @@ int pdb_insert_memory_breakpoint (struct 175.12 memory_t address, u32 length); 175.13 int pdb_remove_memory_breakpoint (struct task_struct *target, 175.14 memory_t address, u32 length); 175.15 +int pdb_insert_watchpoint (struct task_struct *target, 175.16 + pdb_op_watchpt_p watchpt); 175.17 +int pdb_remove_watchpoint (struct task_struct *target, 175.18 + pdb_op_watchpt_p watchpt); 175.19 175.20 int pdb_exceptions_notify (struct notifier_block *self, unsigned long val, 175.21 void *data); 175.22 175.23 -int pdb_debug_fn (struct pt_regs *regs, long error_code, 175.24 - unsigned int condition); 175.25 -int pdb_int3_fn (struct pt_regs *regs, long error_code); 175.26 - 175.27 /* module.c */ 175.28 void pdb_send_response (pdb_response_t *response); 175.29
176.1 --- a/tools/debugger/pdb/linux-2.6-module/pdb_module.h Tue Aug 16 12:15:23 2005 +0800 176.2 +++ b/tools/debugger/pdb/linux-2.6-module/pdb_module.h Tue Aug 16 10:09:07 2005 -0800 176.3 @@ -14,20 +14,27 @@ typedef struct pdb_op_attach 176.4 176.5 #define PDB_OPCODE_DETACH 3 176.6 176.7 -#define PDB_OPCODE_RD_REGS 4 176.8 +#define PDB_OPCODE_RD_REG 4 176.9 +typedef struct pdb_op_rd_reg 176.10 +{ 176.11 + u32 reg; 176.12 + u32 value; 176.13 +} pdb_op_rd_reg_t, *pdb_op_rd_reg_p; 176.14 + 176.15 +#define PDB_OPCODE_RD_REGS 5 176.16 typedef struct pdb_op_rd_regs 176.17 { 176.18 u32 reg[GDB_REGISTER_FRAME_SIZE]; 176.19 } pdb_op_rd_regs_t, *pdb_op_rd_regs_p; 176.20 176.21 -#define PDB_OPCODE_WR_REG 5 176.22 +#define PDB_OPCODE_WR_REG 6 176.23 typedef struct pdb_op_wr_reg 176.24 { 176.25 u32 reg; 176.26 u32 value; 176.27 } pdb_op_wr_reg_t, *pdb_op_wr_reg_p; 176.28 176.29 -#define PDB_OPCODE_RD_MEM 6 176.30 +#define PDB_OPCODE_RD_MEM 7 176.31 typedef struct pdb_op_rd_mem_req 176.32 { 176.33 u32 address; 176.34 @@ -41,7 +48,7 @@ typedef struct pdb_op_rd_mem_resp 176.35 u8 data[1024]; 176.36 } pdb_op_rd_mem_resp_t, *pdb_op_rd_mem_resp_p; 176.37 176.38 -#define PDB_OPCODE_WR_MEM 7 176.39 +#define PDB_OPCODE_WR_MEM 8 176.40 typedef struct pdb_op_wr_mem 176.41 { 176.42 u32 address; 176.43 @@ -49,17 +56,34 @@ typedef struct pdb_op_wr_mem 176.44 u8 data[1024]; /* arbitrary */ 176.45 } pdb_op_wr_mem_t, *pdb_op_wr_mem_p; 176.46 176.47 -#define PDB_OPCODE_CONTINUE 8 176.48 -#define PDB_OPCODE_STEP 9 176.49 +#define PDB_OPCODE_CONTINUE 9 176.50 +#define PDB_OPCODE_STEP 10 176.51 176.52 -#define PDB_OPCODE_SET_BKPT 10 176.53 -#define PDB_OPCODE_CLR_BKPT 11 176.54 +#define PDB_OPCODE_SET_BKPT 11 176.55 +#define PDB_OPCODE_CLR_BKPT 12 176.56 typedef struct pdb_op_bkpt 176.57 { 176.58 u32 address; 176.59 u32 length; 176.60 } pdb_op_bkpt_t, *pdb_op_bkpt_p; 176.61 176.62 +#define PDB_OPCODE_SET_WATCHPT 13 176.63 +#define PDB_OPCODE_CLR_WATCHPT 14 176.64 +#define PDB_OPCODE_WATCHPOINT 15 176.65 +typedef struct pdb_op_watchpt 176.66 +{ 176.67 +#define BWC_DEBUG 1 176.68 +#define BWC_INT3 3 176.69 +#define BWC_WATCH 100 /* pdb: watchpoint page */ 176.70 +#define BWC_WATCH_STEP 101 /* pdb: watchpoint single step */ 176.71 +#define BWC_WATCH_WRITE 102 176.72 +#define BWC_WATCH_READ 103 176.73 +#define BWC_WATCH_ACCESS 104 176.74 + u32 type; 176.75 + u32 address; 176.76 + u32 length; 176.77 +} pdb_op_watchpt_t, *pdb_op_watchpt_p; 176.78 + 176.79 176.80 typedef struct 176.81 { 176.82 @@ -68,10 +92,12 @@ typedef struct 176.83 union 176.84 { 176.85 pdb_op_attach_t attach; 176.86 + pdb_op_rd_reg_t rd_reg; 176.87 pdb_op_wr_reg_t wr_reg; 176.88 pdb_op_rd_mem_req_t rd_mem; 176.89 pdb_op_wr_mem_t wr_mem; 176.90 pdb_op_bkpt_t bkpt; 176.91 + pdb_op_watchpt_t watchpt; 176.92 } u; 176.93 } pdb_request_t, *pdb_request_p; 176.94 176.95 @@ -87,6 +113,7 @@ typedef struct { 176.96 s16 status; /* PDB_RESPONSE_??? */ 176.97 union 176.98 { 176.99 + pdb_op_rd_reg_t rd_reg; 176.100 pdb_op_rd_regs_t rd_regs; 176.101 pdb_op_rd_mem_resp_t rd_mem; 176.102 } u; 176.103 @@ -95,6 +122,11 @@ typedef struct { 176.104 176.105 DEFINE_RING_TYPES(pdb, pdb_request_t, pdb_response_t); 176.106 176.107 + 176.108 +/* from access_process_vm */ 176.109 +#define PDB_MEM_READ 0 176.110 +#define PDB_MEM_WRITE 1 176.111 + 176.112 #endif 176.113 176.114
178.1 --- a/tools/debugger/pdb/linux-2.6-patches/i386_ksyms.patch Tue Aug 16 12:15:23 2005 +0800 178.2 +++ b/tools/debugger/pdb/linux-2.6-patches/i386_ksyms.patch Tue Aug 16 10:09:07 2005 -0800 178.3 @@ -1,7 +1,15 @@ 178.4 diff -u linux-2.6.12/arch/xen/i386/kernel/i386_ksyms.c linux-2.6.12-pdb/arch/xen/i386/kernel/i386_ksyms.c 178.5 --- linux-2.6.12/arch/xen/i386/kernel/i386_ksyms.c 2005-07-31 22:36:50.000000000 +0100 178.6 +++ linux-2.6.12-pdb/arch/xen/i386/kernel/i386_ksyms.c 2005-08-01 10:57:31.000000000 +0100 178.7 -@@ -172,6 +172,7 @@ 178.8 +@@ -151,6 +151,7 @@ 178.9 + /* TLB flushing */ 178.10 + EXPORT_SYMBOL(flush_tlb_page); 178.11 + #endif 178.12 ++EXPORT_SYMBOL(flush_tlb_mm); 178.13 + 178.14 + #ifdef CONFIG_X86_IO_APIC 178.15 + EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector); 178.16 +@@ -172,6 +173,7 @@ 178.17 EXPORT_SYMBOL_GPL(unset_nmi_callback); 178.18 178.19 EXPORT_SYMBOL(register_die_notifier);
183.1 --- a/tools/debugger/pdb/pdb_caml_domain.c Tue Aug 16 12:15:23 2005 +0800 183.2 +++ b/tools/debugger/pdb/pdb_caml_domain.c Tue Aug 16 10:09:07 2005 -0800 183.3 @@ -43,6 +43,54 @@ typedef struct 183.4 /****************************************************************************/ 183.5 183.6 /* 183.7 + * dom_read_register : context_t -> int -> int32 183.8 + */ 183.9 +value 183.10 +dom_read_register (value context, value reg) 183.11 +{ 183.12 + CAMLparam2(context, reg); 183.13 + CAMLlocal1(result); 183.14 + 183.15 + int my_reg = Int_val(reg); 183.16 + cpu_user_regs_t *regs; 183.17 + context_t ctx; 183.18 + 183.19 + decode_context(&ctx, context); 183.20 + 183.21 + if ( xendebug_read_registers(xc_handle, ctx.domain, ctx.vcpu, ®s) ) 183.22 + { 183.23 + printf("(pdb) read registers error!\n"); fflush(stdout); 183.24 + failwith("read registers error"); 183.25 + } 183.26 + 183.27 + dump_regs(regs); 183.28 + 183.29 + result = caml_alloc_tuple(16); 183.30 + 183.31 + switch (my_reg) 183.32 + { 183.33 + case GDB_EAX: result = caml_copy_int32(regs->eax); break; 183.34 + case GDB_ECX: result = caml_copy_int32(regs->ecx); break; 183.35 + case GDB_EDX: result = caml_copy_int32(regs->edx); break; 183.36 + case GDB_EBX: result = caml_copy_int32(regs->ebx); break; 183.37 + case GDB_ESP: result = caml_copy_int32(regs->esp); break; 183.38 + case GDB_EBP: result = caml_copy_int32(regs->ebp); break; 183.39 + case GDB_ESI: result = caml_copy_int32(regs->esi); break; 183.40 + case GDB_EDI: result = caml_copy_int32(regs->edi); break; 183.41 + case GDB_EIP: result = caml_copy_int32(regs->eip); break; 183.42 + case GDB_EFL: result = caml_copy_int32(regs->eflags); break; 183.43 + case GDB_CS: result = caml_copy_int32(regs->cs); break; 183.44 + case GDB_SS: result = caml_copy_int32(regs->ss); break; 183.45 + case GDB_DS: result = caml_copy_int32(regs->ds); break; 183.46 + case GDB_ES: result = caml_copy_int32(regs->es); break; 183.47 + case GDB_FS: result = caml_copy_int32(regs->fs); break; 183.48 + case GDB_GS: result = caml_copy_int32(regs->gs); break; 183.49 + } 183.50 + 183.51 + CAMLreturn(result); 183.52 +} 183.53 + 183.54 +/* 183.55 * dom_read_registers : context_t -> int32 183.56 */ 183.57 value
184.1 --- a/tools/debugger/pdb/pdb_caml_process.c Tue Aug 16 12:15:23 2005 +0800 184.2 +++ b/tools/debugger/pdb/pdb_caml_process.c Tue Aug 16 10:09:07 2005 -0800 184.3 @@ -113,6 +113,12 @@ process_handle_response (value ring) 184.4 case PDB_OPCODE_DETACH : 184.5 break; 184.6 184.7 + case PDB_OPCODE_RD_REG : 184.8 + { 184.9 + sprintf(&msg[0], "%08x", _flip(resp->u.rd_reg.value)); 184.10 + break; 184.11 + } 184.12 + 184.13 case PDB_OPCODE_RD_REGS : 184.14 { 184.15 int loop; 184.16 @@ -161,16 +167,22 @@ process_handle_response (value ring) 184.17 } 184.18 184.19 case PDB_OPCODE_SET_BKPT : 184.20 - { 184.21 - break; 184.22 - } 184.23 case PDB_OPCODE_CLR_BKPT : 184.24 + case PDB_OPCODE_SET_WATCHPT : 184.25 + case PDB_OPCODE_CLR_WATCHPT : 184.26 { 184.27 break; 184.28 } 184.29 184.30 + case PDB_OPCODE_WATCHPOINT : 184.31 + { 184.32 + sprintf(msg, "S05"); 184.33 + break; 184.34 + } 184.35 + 184.36 default : 184.37 - printf("(linux) UNKNOWN MESSAGE TYPE IN RESPONSE\n"); 184.38 + printf("(linux) UNKNOWN MESSAGE TYPE IN RESPONSE %d\n", 184.39 + resp->operation); 184.40 break; 184.41 } 184.42 184.43 @@ -261,6 +273,32 @@ proc_pause_target (value context) 184.44 184.45 184.46 /* 184.47 + * proc_read_register : context_t -> int -> unit 184.48 + */ 184.49 +value 184.50 +proc_read_register (value context, value reg) 184.51 +{ 184.52 + CAMLparam1(context); 184.53 + 184.54 + pdb_request_t req; 184.55 + context_t ctx; 184.56 + int my_reg = Int_val(reg); 184.57 + 184.58 + decode_context(&ctx, context); 184.59 + 184.60 + req.operation = PDB_OPCODE_RD_REG; 184.61 + req.process = ctx.process; 184.62 + req.u.rd_reg.reg = my_reg; 184.63 + req.u.rd_reg.value = 0; 184.64 + 184.65 + send_request (ctx.ring, ctx.evtchn, &req); 184.66 + 184.67 + CAMLreturn(Val_unit); 184.68 +} 184.69 + 184.70 + 184.71 + 184.72 +/* 184.73 * proc_read_registers : context_t -> unit 184.74 */ 184.75 value 184.76 @@ -443,7 +481,7 @@ proc_step_target (value context) 184.77 184.78 184.79 /* 184.80 - * proc_insert_memory_breakpoint : context_t -> int32 -> int list -> unit 184.81 + * proc_insert_memory_breakpoint : context_t -> int32 -> int -> unit 184.82 */ 184.83 value 184.84 proc_insert_memory_breakpoint (value context, value address, value length) 184.85 @@ -466,7 +504,7 @@ proc_insert_memory_breakpoint (value con 184.86 } 184.87 184.88 /* 184.89 - * proc_remove_memory_breakpoint : context_t -> int32 -> int list -> unit 184.90 + * proc_remove_memory_breakpoint : context_t -> int32 -> int -> unit 184.91 */ 184.92 value 184.93 proc_remove_memory_breakpoint (value context, value address, value length) 184.94 @@ -488,6 +526,54 @@ proc_remove_memory_breakpoint (value con 184.95 CAMLreturn(Val_unit); 184.96 } 184.97 184.98 +/* 184.99 + * proc_insert_watchpoint : context_t -> bwcpoint_t -> int32 -> int -> unit 184.100 + */ 184.101 +value 184.102 +proc_insert_watchpoint (value context, value kind, value address, value length) 184.103 +{ 184.104 + CAMLparam3(context, address, length); 184.105 + 184.106 + context_t ctx; 184.107 + pdb_request_t req; 184.108 + 184.109 + decode_context(&ctx, context); 184.110 + 184.111 + req.operation = PDB_OPCODE_SET_WATCHPT; 184.112 + req.process = ctx.process; 184.113 + req.u.watchpt.type = Int_val(kind); 184.114 + req.u.watchpt.address = (memory_t) Int32_val(address); 184.115 + req.u.watchpt.length = Int_val(length); 184.116 + 184.117 + send_request(ctx.ring, ctx.evtchn, &req); 184.118 + 184.119 + CAMLreturn(Val_unit); 184.120 +} 184.121 + 184.122 +/* 184.123 + * proc_remove_watchpoint : context_t -> bwcpoint_t -> int32 -> int -> unit 184.124 + */ 184.125 +value 184.126 +proc_remove_watchpoint (value context, value kind, value address, value length) 184.127 +{ 184.128 + CAMLparam3(context, address, length); 184.129 + 184.130 + context_t ctx; 184.131 + pdb_request_t req; 184.132 + 184.133 + decode_context(&ctx, context); 184.134 + 184.135 + req.operation = PDB_OPCODE_CLR_WATCHPT; 184.136 + req.process = ctx.process; 184.137 + req.u.watchpt.type = Int_val(kind); 184.138 + req.u.watchpt.address = (memory_t) Int32_val(address); 184.139 + req.u.watchpt.length = Int_val(length); 184.140 + 184.141 + send_request(ctx.ring, ctx.evtchn, &req); 184.142 + 184.143 + CAMLreturn(Val_unit); 184.144 +} 184.145 + 184.146 184.147 /* 184.148 * Local variables:
187.1 --- a/tools/debugger/pdb/readme Tue Aug 16 12:15:23 2005 +0800 187.2 +++ b/tools/debugger/pdb/readme Tue Aug 16 10:09:07 2005 -0800 187.3 @@ -1,9 +1,9 @@ 187.4 187.5 -PDB 0.3 187.6 +PDB 0.3.3 187.7 http://www.cl.cam.ac.uk/netos/pdb 187.8 187.9 Alex Ho 187.10 -June 2005 187.11 +August 2005 187.12 187.13 187.14 This is the latest incarnation of the pervasive debugger. 187.15 @@ -79,6 +79,11 @@ Usage 187.16 Process 187.17 187.18 PDB can also debug a process running in a Linux 2.6 domain. 187.19 + You will need to patch the Linux 2.6 domain U tree to export some 187.20 + additional symbols for the pdb module 187.21 + 187.22 + % make -C linux-2.6-patches 187.23 + 187.24 After running PDB in domain 0, insert the pdb module in dom u: 187.25 187.26 % insmod linux-2.6-module/pdb.ko 187.27 @@ -87,7 +92,14 @@ Process 187.28 187.29 (gdb) maint packet x context = process <domid> <pid> 187.30 187.31 + Read, write, and access watchpoint should also work for processes, 187.32 + use the "rwatch", "watch" and "awatch" gdb commands respectively. 187.33 + 187.34 + If you are having trouble with GDB 5.3 (i386-redhat-linux-gnu), 187.35 + try GDB 6.3 (configured with --target=i386-linux-gnu). 187.36 + 187.37 + 187.38 To Do 187.39 187.40 -- watchpoints 187.41 +- watchpoints for domains 187.42 - support for SMP
190.1 --- a/tools/examples/network-bridge Tue Aug 16 12:15:23 2005 +0800 190.2 +++ b/tools/examples/network-bridge Tue Aug 16 10:09:07 2005 -0800 190.3 @@ -188,12 +188,13 @@ op_start () { 190.4 fi 190.5 fi 190.6 ip link set ${netdev} name p${netdev} 190.7 - ip link set veth0 name eth0 190.8 + ip link set veth0 name ${netdev} 190.9 ifconfig p${netdev} -arp down 190.10 ifconfig p${netdev} hw ether fe:ff:ff:ff:ff:ff 190.11 ifconfig ${netdev} hw ether ${mac} 190.12 add_to_bridge ${bridge} vif0.0 190.13 add_to_bridge ${bridge} p${netdev} 190.14 + ip link set ${bridge} up 190.15 ip link set vif0.0 up 190.16 ip link set p${netdev} up 190.17 if ! ifup ${netdev} ; then
253.1 --- a/tools/python/xen/xend/XendDomainInfo.py Tue Aug 16 12:15:23 2005 +0800 253.2 +++ b/tools/python/xen/xend/XendDomainInfo.py Tue Aug 16 10:09:07 2005 -0800 253.3 @@ -583,7 +583,7 @@ class XendDomainInfo: 253.4 self.create_channel() 253.5 self.image.createImage() 253.6 self.exportToDB() 253.7 - if self.store_channel: 253.8 + if self.store_channel and self.store_mfn >= 0: 253.9 self.db.introduceDomain(self.id, 253.10 self.store_mfn, 253.11 self.store_channel) 253.12 @@ -915,8 +915,7 @@ class XendDomainInfo: 253.13 """ 253.14 self.configure_fields() 253.15 self.create_devices() 253.16 - if self.image.ostype != 'vmx': 253.17 - self.create_blkif() 253.18 + self.create_blkif() 253.19 253.20 def create_blkif(self): 253.21 """Create the block device interface (blkif) for the vm.