direct-io.hg

changeset 15181:45f939d0c724

[LINUX] gnttab: Add basic DMA tracking

This patch adds basic tracking of outstanding DMA requests on
grant table entries marked as PageForeign.

When a PageForeign struct page is about to be mapped for DMA,
we set its map count to 1 (or zero in actual value). This is
then checked for when we need to free a grant table entry early
to ensure that we don't free an entry that's currently used for
DMA.

So any entry that has been marked for DMA will not be freed early.

If the unmapping API had a struct page (which exists for the sg
case) then we could do this properly.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
author kfraser@localhost.localdomain
date Wed May 30 10:46:13 2007 +0100 (2007-05-30)
parents 1f7a6456c330
children c4f62fe631e4
files linux-2.6-xen-sparse/arch/i386/kernel/pci-dma-xen.c linux-2.6-xen-sparse/arch/i386/kernel/swiotlb.c linux-2.6-xen-sparse/drivers/xen/core/gnttab.c linux-2.6-xen-sparse/include/xen/gnttab.h
line diff
     1.1 --- a/linux-2.6-xen-sparse/arch/i386/kernel/pci-dma-xen.c	Wed May 30 10:45:44 2007 +0100
     1.2 +++ b/linux-2.6-xen-sparse/arch/i386/kernel/pci-dma-xen.c	Wed May 30 10:46:13 2007 +0100
     1.3 @@ -15,6 +15,7 @@
     1.4  #include <linux/version.h>
     1.5  #include <asm/io.h>
     1.6  #include <xen/balloon.h>
     1.7 +#include <xen/gnttab.h>
     1.8  #include <asm/swiotlb.h>
     1.9  #include <asm/tlbflush.h>
    1.10  #include <asm-i386/mach-xen/asm/swiotlb.h>
    1.11 @@ -90,7 +91,7 @@ dma_map_sg(struct device *hwdev, struct 
    1.12  	} else {
    1.13  		for (i = 0; i < nents; i++ ) {
    1.14  			sg[i].dma_address =
    1.15 -				page_to_bus(sg[i].page) + sg[i].offset;
    1.16 +				gnttab_dma_map_page(sg[i].page) + sg[i].offset;
    1.17  			sg[i].dma_length  = sg[i].length;
    1.18  			BUG_ON(!sg[i].page);
    1.19  			IOMMU_BUG_ON(address_needs_mapping(
    1.20 @@ -108,9 +109,15 @@ void
    1.21  dma_unmap_sg(struct device *hwdev, struct scatterlist *sg, int nents,
    1.22  	     enum dma_data_direction direction)
    1.23  {
    1.24 +	int i;
    1.25 +
    1.26  	BUG_ON(direction == DMA_NONE);
    1.27  	if (swiotlb)
    1.28  		swiotlb_unmap_sg(hwdev, sg, nents, direction);
    1.29 +	else {
    1.30 +		for (i = 0; i < nents; i++ )
    1.31 +			gnttab_dma_unmap_page(sg[i].dma_address);
    1.32 +	}
    1.33  }
    1.34  EXPORT_SYMBOL(dma_unmap_sg);
    1.35  
    1.36 @@ -127,7 +134,7 @@ dma_map_page(struct device *dev, struct 
    1.37  		dma_addr = swiotlb_map_page(
    1.38  			dev, page, offset, size, direction);
    1.39  	} else {
    1.40 -		dma_addr = page_to_bus(page) + offset;
    1.41 +		dma_addr = gnttab_dma_map_page(page) + offset;
    1.42  		IOMMU_BUG_ON(address_needs_mapping(dev, dma_addr));
    1.43  	}
    1.44  
    1.45 @@ -142,6 +149,8 @@ dma_unmap_page(struct device *dev, dma_a
    1.46  	BUG_ON(direction == DMA_NONE);
    1.47  	if (swiotlb)
    1.48  		swiotlb_unmap_page(dev, dma_address, size, direction);
    1.49 +	else
    1.50 +		gnttab_dma_unmap_page(dma_address);
    1.51  }
    1.52  EXPORT_SYMBOL(dma_unmap_page);
    1.53  #endif /* CONFIG_HIGHMEM */
    1.54 @@ -326,7 +335,8 @@ dma_map_single(struct device *dev, void 
    1.55  	if (swiotlb) {
    1.56  		dma = swiotlb_map_single(dev, ptr, size, direction);
    1.57  	} else {
    1.58 -		dma = virt_to_bus(ptr);
    1.59 +		dma = gnttab_dma_map_page(virt_to_page(ptr)) +
    1.60 +		      offset_in_page(ptr);
    1.61  		IOMMU_BUG_ON(range_straddles_page_boundary(ptr, size));
    1.62  		IOMMU_BUG_ON(address_needs_mapping(dev, dma));
    1.63  	}
    1.64 @@ -344,6 +354,8 @@ dma_unmap_single(struct device *dev, dma
    1.65  		BUG();
    1.66  	if (swiotlb)
    1.67  		swiotlb_unmap_single(dev, dma_addr, size, direction);
    1.68 +	else
    1.69 +		gnttab_dma_unmap_page(dma_addr);
    1.70  }
    1.71  EXPORT_SYMBOL(dma_unmap_single);
    1.72  
     2.1 --- a/linux-2.6-xen-sparse/arch/i386/kernel/swiotlb.c	Wed May 30 10:45:44 2007 +0100
     2.2 +++ b/linux-2.6-xen-sparse/arch/i386/kernel/swiotlb.c	Wed May 30 10:46:13 2007 +0100
     2.3 @@ -25,6 +25,7 @@
     2.4  #include <asm/pci.h>
     2.5  #include <asm/dma.h>
     2.6  #include <asm/uaccess.h>
     2.7 +#include <xen/gnttab.h>
     2.8  #include <xen/interface/memory.h>
     2.9  
    2.10  int swiotlb;
    2.11 @@ -32,8 +33,6 @@ EXPORT_SYMBOL(swiotlb);
    2.12  
    2.13  #define OFFSET(val,align) ((unsigned long)((val) & ( (align) - 1)))
    2.14  
    2.15 -#define SG_ENT_PHYS_ADDRESS(sg)	(page_to_bus((sg)->page) + (sg)->offset)
    2.16 -
    2.17  /*
    2.18   * Maximum allowable number of contiguous slabs to map,
    2.19   * must be a power of 2.  What is the appropriate value ?
    2.20 @@ -468,7 +467,8 @@ swiotlb_full(struct device *dev, size_t 
    2.21  dma_addr_t
    2.22  swiotlb_map_single(struct device *hwdev, void *ptr, size_t size, int dir)
    2.23  {
    2.24 -	dma_addr_t dev_addr = virt_to_bus(ptr);
    2.25 +	dma_addr_t dev_addr = gnttab_dma_map_page(virt_to_page(ptr)) +
    2.26 +			      offset_in_page(ptr);
    2.27  	void *map;
    2.28  	struct phys_addr buffer;
    2.29  
    2.30 @@ -486,6 +486,7 @@ swiotlb_map_single(struct device *hwdev,
    2.31  	/*
    2.32  	 * Oh well, have to allocate and map a bounce buffer.
    2.33  	 */
    2.34 +	gnttab_dma_unmap_page(dev_addr);
    2.35  	buffer.page   = virt_to_page(ptr);
    2.36  	buffer.offset = (unsigned long)ptr & ~PAGE_MASK;
    2.37  	map = map_single(hwdev, buffer, size, dir);
    2.38 @@ -513,6 +514,8 @@ swiotlb_unmap_single(struct device *hwde
    2.39  	BUG_ON(dir == DMA_NONE);
    2.40  	if (in_swiotlb_aperture(dev_addr))
    2.41  		unmap_single(hwdev, bus_to_virt(dev_addr), size, dir);
    2.42 +	else
    2.43 +		gnttab_dma_unmap_page(dev_addr);
    2.44  }
    2.45  
    2.46  /*
    2.47 @@ -571,8 +574,10 @@ swiotlb_map_sg(struct device *hwdev, str
    2.48  	BUG_ON(dir == DMA_NONE);
    2.49  
    2.50  	for (i = 0; i < nelems; i++, sg++) {
    2.51 -		dev_addr = SG_ENT_PHYS_ADDRESS(sg);
    2.52 +		dev_addr = gnttab_dma_map_page(sg->page) + sg->offset;
    2.53 +
    2.54  		if (address_needs_mapping(hwdev, dev_addr)) {
    2.55 +			gnttab_dma_unmap_page(dev_addr);
    2.56  			buffer.page   = sg->page;
    2.57  			buffer.offset = sg->offset;
    2.58  			map = map_single(hwdev, buffer, sg->length, dir);
    2.59 @@ -605,10 +610,12 @@ swiotlb_unmap_sg(struct device *hwdev, s
    2.60  	BUG_ON(dir == DMA_NONE);
    2.61  
    2.62  	for (i = 0; i < nelems; i++, sg++)
    2.63 -		if (sg->dma_address != SG_ENT_PHYS_ADDRESS(sg))
    2.64 +		if (in_swiotlb_aperture(sg->dma_address))
    2.65  			unmap_single(hwdev, 
    2.66  				     (void *)bus_to_virt(sg->dma_address),
    2.67  				     sg->dma_length, dir);
    2.68 +		else
    2.69 +			gnttab_dma_unmap_page(sg->dma_address);
    2.70  }
    2.71  
    2.72  /*
    2.73 @@ -627,7 +634,7 @@ swiotlb_sync_sg_for_cpu(struct device *h
    2.74  	BUG_ON(dir == DMA_NONE);
    2.75  
    2.76  	for (i = 0; i < nelems; i++, sg++)
    2.77 -		if (sg->dma_address != SG_ENT_PHYS_ADDRESS(sg))
    2.78 +		if (in_swiotlb_aperture(sg->dma_address))
    2.79  			sync_single(hwdev,
    2.80  				    (void *)bus_to_virt(sg->dma_address),
    2.81  				    sg->dma_length, dir);
    2.82 @@ -642,7 +649,7 @@ swiotlb_sync_sg_for_device(struct device
    2.83  	BUG_ON(dir == DMA_NONE);
    2.84  
    2.85  	for (i = 0; i < nelems; i++, sg++)
    2.86 -		if (sg->dma_address != SG_ENT_PHYS_ADDRESS(sg))
    2.87 +		if (in_swiotlb_aperture(sg->dma_address))
    2.88  			sync_single(hwdev,
    2.89  				    (void *)bus_to_virt(sg->dma_address),
    2.90  				    sg->dma_length, dir);
    2.91 @@ -659,8 +666,9 @@ swiotlb_map_page(struct device *hwdev, s
    2.92  	dma_addr_t dev_addr;
    2.93  	char *map;
    2.94  
    2.95 -	dev_addr = page_to_bus(page) + offset;
    2.96 +	dev_addr = gnttab_dma_map_page(page) + offset;
    2.97  	if (address_needs_mapping(hwdev, dev_addr)) {
    2.98 +		gnttab_dma_unmap_page(dev_addr);
    2.99  		buffer.page   = page;
   2.100  		buffer.offset = offset;
   2.101  		map = map_single(hwdev, buffer, size, direction);
   2.102 @@ -681,6 +689,8 @@ swiotlb_unmap_page(struct device *hwdev,
   2.103  	BUG_ON(direction == DMA_NONE);
   2.104  	if (in_swiotlb_aperture(dma_address))
   2.105  		unmap_single(hwdev, bus_to_virt(dma_address), size, direction);
   2.106 +	else
   2.107 +		gnttab_dma_unmap_page(dma_address);
   2.108  }
   2.109  
   2.110  #endif
     3.1 --- a/linux-2.6-xen-sparse/drivers/xen/core/gnttab.c	Wed May 30 10:45:44 2007 +0100
     3.2 +++ b/linux-2.6-xen-sparse/drivers/xen/core/gnttab.c	Wed May 30 10:46:13 2007 +0100
     3.3 @@ -490,6 +490,128 @@ static int gnttab_map(unsigned int start
     3.4  	return 0;
     3.5  }
     3.6  
     3.7 +static void gnttab_page_free(struct page *page)
     3.8 +{
     3.9 +	if (page->mapping) {
    3.10 +		put_page((struct page *)page->mapping);
    3.11 +		page->mapping = NULL;
    3.12 +	}
    3.13 +
    3.14 +	ClearPageForeign(page);
    3.15 +	gnttab_reset_grant_page(page);
    3.16 +	put_page(page);
    3.17 +}
    3.18 +
    3.19 +/*
    3.20 + * Must not be called with IRQs off.  This should only be used on the
    3.21 + * slow path.
    3.22 + *
    3.23 + * Copy a foreign granted page to local memory.
    3.24 + */
    3.25 +int gnttab_copy_grant_page(grant_ref_t ref, struct page **pagep)
    3.26 +{
    3.27 +	struct gnttab_unmap_and_replace unmap;
    3.28 +	mmu_update_t mmu;
    3.29 +	struct page *page;
    3.30 +	struct page *new_page;
    3.31 +	void *new_addr;
    3.32 +	void *addr;
    3.33 +	paddr_t pfn;
    3.34 +	maddr_t mfn;
    3.35 +	maddr_t new_mfn;
    3.36 +	int err;
    3.37 +
    3.38 +	page = *pagep;
    3.39 +	if (!get_page_unless_zero(page))
    3.40 +		return -ENOENT;
    3.41 +
    3.42 +	err = -ENOMEM;
    3.43 +	new_page = alloc_page(GFP_ATOMIC | __GFP_NOWARN);
    3.44 +	if (!new_page)
    3.45 +		goto out;
    3.46 +
    3.47 +	new_addr = page_address(new_page);
    3.48 +	addr = page_address(page);
    3.49 +	memcpy(new_addr, addr, PAGE_SIZE);
    3.50 +
    3.51 +	pfn = page_to_pfn(page);
    3.52 +	mfn = pfn_to_mfn(pfn);
    3.53 +	new_mfn = virt_to_mfn(new_addr);
    3.54 +
    3.55 +	if (!xen_feature(XENFEAT_auto_translated_physmap)) {
    3.56 +		set_phys_to_machine(pfn, new_mfn);
    3.57 +		set_phys_to_machine(page_to_pfn(new_page), INVALID_P2M_ENTRY);
    3.58 +
    3.59 +		mmu.ptr = (new_mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE;
    3.60 +		mmu.val = pfn;
    3.61 +		err = HYPERVISOR_mmu_update(&mmu, 1, NULL, DOMID_SELF);
    3.62 +		BUG_ON(err);
    3.63 +	}
    3.64 +
    3.65 +	gnttab_set_replace_op(&unmap, (unsigned long)addr,
    3.66 +			      (unsigned long)new_addr, ref);
    3.67 +
    3.68 +	err = HYPERVISOR_grant_table_op(GNTTABOP_unmap_and_replace,
    3.69 +					&unmap, 1);
    3.70 +	BUG_ON(err);
    3.71 +	BUG_ON(unmap.status);
    3.72 +
    3.73 +	new_page->mapping = page->mapping;
    3.74 +	new_page->index = page->index;
    3.75 +	set_bit(PG_foreign, &new_page->flags);
    3.76 +	*pagep = new_page;
    3.77 +
    3.78 +	SetPageForeign(page, gnttab_page_free);
    3.79 +	page->mapping = NULL;
    3.80 +
    3.81 +	/*
    3.82 +	 * Ensure that there is a barrier between setting the p2m entry
    3.83 +	 * and checking the map count.  See gnttab_dma_map_page.
    3.84 +	 */
    3.85 +	smp_mb();
    3.86 +
    3.87 +	/* Has the page been DMA-mapped? */
    3.88 +	if (unlikely(page_mapped(page))) {
    3.89 +		err = -EBUSY;
    3.90 +		page->mapping = (void *)new_page;
    3.91 +	}
    3.92 +
    3.93 +out:
    3.94 +	put_page(page);
    3.95 +	return err;
    3.96 +
    3.97 +}
    3.98 +EXPORT_SYMBOL(gnttab_copy_grant_page);
    3.99 +
   3.100 +/*
   3.101 + * Keep track of foreign pages marked as PageForeign so that we don't
   3.102 + * return them to the remote domain prematurely.
   3.103 + *
   3.104 + * PageForeign pages are pinned down by increasing their mapcount.
   3.105 + *
   3.106 + * All other pages are simply returned as is.
   3.107 + */
   3.108 +maddr_t gnttab_dma_map_page(struct page *page)
   3.109 +{
   3.110 +	maddr_t mfn = pfn_to_mfn(page_to_pfn(page)), mfn2;
   3.111 +
   3.112 +	if (!PageForeign(page))
   3.113 +		return mfn << PAGE_SHIFT;
   3.114 +
   3.115 +	if (mfn_to_local_pfn(mfn) < max_mapnr)
   3.116 +		return mfn << PAGE_SHIFT;
   3.117 +
   3.118 +	atomic_set(&page->_mapcount, 0);
   3.119 +
   3.120 +	/* This barrier corresponds to the one in gnttab_copy_grant_page. */
   3.121 +	smp_mb();
   3.122 +
   3.123 +	/* Has this page been copied in the mean time? */
   3.124 +	mfn2 = pfn_to_mfn(page_to_pfn(page));
   3.125 +
   3.126 +	return mfn2 << PAGE_SHIFT;
   3.127 +}
   3.128 +
   3.129  int gnttab_resume(void)
   3.130  {
   3.131  	if (max_nr_grant_frames() < nr_grant_frames)
     4.1 --- a/linux-2.6-xen-sparse/include/xen/gnttab.h	Wed May 30 10:45:44 2007 +0100
     4.2 +++ b/linux-2.6-xen-sparse/include/xen/gnttab.h	Wed May 30 10:46:13 2007 +0100
     4.3 @@ -39,6 +39,7 @@
     4.4  
     4.5  #include <asm/hypervisor.h>
     4.6  #include <asm/maddr.h> /* maddr_t */
     4.7 +#include <linux/mm.h>
     4.8  #include <xen/interface/grant_table.h>
     4.9  #include <xen/features.h>
    4.10  
    4.11 @@ -101,6 +102,19 @@ void gnttab_grant_foreign_access_ref(gra
    4.12  void gnttab_grant_foreign_transfer_ref(grant_ref_t, domid_t domid,
    4.13  				       unsigned long pfn);
    4.14  
    4.15 +int gnttab_copy_grant_page(grant_ref_t ref, struct page **pagep);
    4.16 +maddr_t gnttab_dma_map_page(struct page *page);
    4.17 +
    4.18 +static inline void gnttab_dma_unmap_page(maddr_t mfn)
    4.19 +{
    4.20 +}
    4.21 +
    4.22 +static inline void gnttab_reset_grant_page(struct page *page)
    4.23 +{
    4.24 +	init_page_count(page);
    4.25 +	reset_page_mapcount(page);
    4.26 +}
    4.27 +
    4.28  int gnttab_suspend(void);
    4.29  int gnttab_resume(void);
    4.30