ia64/linux-2.6.18-xen.hg

view lib/swiotlb-xen.c @ 897:329ea0ccb344

balloon: try harder to balloon up under memory pressure.

Currently if the balloon driver is unable to increase the guest's
reservation it assumes the failure was due to reaching its full
allocation, gives up on the ballooning operation and records the limit
it reached as the "hard limit". The driver will not try again until
the target is set again (even to the same value).

However it is possible that ballooning has in fact failed due to
memory pressure in the host and therefore it is desirable to keep
attempting to reach the target in case memory becomes available. The
most likely scenario is that some guests are ballooning down while
others are ballooning up and therefore there is temporary memory
pressure while things stabilise. You would not expect a well behaved
toolstack to ask a domain to balloon to more than its allocation nor
would you expect it to deliberately over-commit memory by setting
balloon targets which exceed the total host memory.

This patch drops the concept of a hard limit and causes the balloon
driver to retry increasing the reservation on a timer in the same
manner as when decreasing the reservation.

Also if we partially succeed in increasing the reservation
(i.e. receive less pages than we asked for) then we may as well keep
those pages rather than returning them to Xen.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Fri Jun 05 14:01:20 2009 +0100 (2009-06-05)
parents 87c84f7dd850
children
line source
1 /*
2 * Dynamic DMA mapping support.
3 *
4 * This implementation is a fallback for platforms that do not support
5 * I/O TLBs (aka DMA address translation hardware).
6 * Copyright (C) 2000 Asit Mallick <Asit.K.Mallick@intel.com>
7 * Copyright (C) 2000 Goutham Rao <goutham.rao@intel.com>
8 * Copyright (C) 2000, 2003 Hewlett-Packard Co
9 * David Mosberger-Tang <davidm@hpl.hp.com>
10 * Copyright (C) 2005 Keir Fraser <keir@xensource.com>
11 */
13 #include <linux/cache.h>
14 #include <linux/mm.h>
15 #include <linux/module.h>
16 #include <linux/pci.h>
17 #include <linux/spinlock.h>
18 #include <linux/string.h>
19 #include <linux/types.h>
20 #include <linux/ctype.h>
21 #include <linux/init.h>
22 #include <linux/bootmem.h>
23 #include <linux/highmem.h>
24 #include <asm/io.h>
25 #include <asm/pci.h>
26 #include <asm/dma.h>
27 #include <asm/uaccess.h>
28 #include <xen/gnttab.h>
29 #include <xen/interface/memory.h>
30 #include <asm-i386/mach-xen/asm/gnttab_dma.h>
32 int swiotlb;
33 EXPORT_SYMBOL(swiotlb);
35 #define OFFSET(val,align) ((unsigned long)((val) & ( (align) - 1)))
37 /*
38 * Maximum allowable number of contiguous slabs to map,
39 * must be a power of 2. What is the appropriate value ?
40 * The complexity of {map,unmap}_single is linearly dependent on this value.
41 */
42 #define IO_TLB_SEGSIZE 128
44 /*
45 * log of the size of each IO TLB slab. The number of slabs is command line
46 * controllable.
47 */
48 #define IO_TLB_SHIFT 11
50 int swiotlb_force;
52 static char *iotlb_virt_start;
53 static unsigned long iotlb_nslabs;
55 /*
56 * Used to do a quick range check in swiotlb_unmap_single and
57 * swiotlb_sync_single_*, to see if the memory was in fact allocated by this
58 * API.
59 */
60 static unsigned long iotlb_pfn_start, iotlb_pfn_end;
62 /* Does the given dma address reside within the swiotlb aperture? */
63 static inline int in_swiotlb_aperture(dma_addr_t dev_addr)
64 {
65 unsigned long pfn = mfn_to_local_pfn(dev_addr >> PAGE_SHIFT);
66 return (pfn_valid(pfn)
67 && (pfn >= iotlb_pfn_start)
68 && (pfn < iotlb_pfn_end));
69 }
71 /*
72 * When the IOMMU overflows we return a fallback buffer. This sets the size.
73 */
74 static unsigned long io_tlb_overflow = 32*1024;
76 void *io_tlb_overflow_buffer;
78 /*
79 * This is a free list describing the number of free entries available from
80 * each index
81 */
82 static unsigned int *io_tlb_list;
83 static unsigned int io_tlb_index;
85 /*
86 * We need to save away the original address corresponding to a mapped entry
87 * for the sync operations.
88 */
89 static struct phys_addr {
90 struct page *page;
91 unsigned int offset;
92 } *io_tlb_orig_addr;
94 /*
95 * Protect the above data structures in the map and unmap calls
96 */
97 static DEFINE_SPINLOCK(io_tlb_lock);
99 static unsigned int dma_bits;
100 static unsigned int __initdata max_dma_bits = 32;
101 static int __init
102 setup_dma_bits(char *str)
103 {
104 max_dma_bits = simple_strtoul(str, NULL, 0);
105 return 0;
106 }
107 __setup("dma_bits=", setup_dma_bits);
109 static int __init
110 setup_io_tlb_npages(char *str)
111 {
112 /* Unlike ia64, the size is aperture in megabytes, not 'slabs'! */
113 if (isdigit(*str)) {
114 iotlb_nslabs = simple_strtoul(str, &str, 0) <<
115 (20 - IO_TLB_SHIFT);
116 iotlb_nslabs = ALIGN(iotlb_nslabs, IO_TLB_SEGSIZE);
117 }
118 if (*str == ',')
119 ++str;
120 /*
121 * NB. 'force' enables the swiotlb, but doesn't force its use for
122 * every DMA like it does on native Linux. 'off' forcibly disables
123 * use of the swiotlb.
124 */
125 if (!strcmp(str, "force"))
126 swiotlb_force = 1;
127 else if (!strcmp(str, "off"))
128 swiotlb_force = -1;
129 return 1;
130 }
131 __setup("swiotlb=", setup_io_tlb_npages);
132 /* make io_tlb_overflow tunable too? */
134 /*
135 * Statically reserve bounce buffer space and initialize bounce buffer data
136 * structures for the software IO TLB used to implement the PCI DMA API.
137 */
138 void
139 swiotlb_init_with_default_size (size_t default_size)
140 {
141 unsigned long i, bytes;
142 int rc;
144 if (!iotlb_nslabs) {
145 iotlb_nslabs = (default_size >> IO_TLB_SHIFT);
146 iotlb_nslabs = ALIGN(iotlb_nslabs, IO_TLB_SEGSIZE);
147 }
149 bytes = iotlb_nslabs * (1UL << IO_TLB_SHIFT);
151 /*
152 * Get IO TLB memory from the low pages
153 */
154 iotlb_virt_start = alloc_bootmem_pages(bytes);
155 if (!iotlb_virt_start)
156 panic("Cannot allocate SWIOTLB buffer!\n");
158 dma_bits = get_order(IO_TLB_SEGSIZE << IO_TLB_SHIFT) + PAGE_SHIFT;
159 for (i = 0; i < iotlb_nslabs; i += IO_TLB_SEGSIZE) {
160 do {
161 rc = xen_create_contiguous_region(
162 (unsigned long)iotlb_virt_start + (i << IO_TLB_SHIFT),
163 get_order(IO_TLB_SEGSIZE << IO_TLB_SHIFT),
164 dma_bits);
165 } while (rc && dma_bits++ < max_dma_bits);
166 if (rc) {
167 if (i == 0)
168 panic("No suitable physical memory available for SWIOTLB buffer!\n"
169 "Use dom0_mem Xen boot parameter to reserve\n"
170 "some DMA memory (e.g., dom0_mem=-128M).\n");
171 iotlb_nslabs = i;
172 i <<= IO_TLB_SHIFT;
173 free_bootmem(__pa(iotlb_virt_start + i), bytes - i);
174 bytes = i;
175 for (dma_bits = 0; i > 0; i -= IO_TLB_SEGSIZE << IO_TLB_SHIFT) {
176 unsigned int bits = fls64(virt_to_bus(iotlb_virt_start + i - 1));
178 if (bits > dma_bits)
179 dma_bits = bits;
180 }
181 break;
182 }
183 }
185 /*
186 * Allocate and initialize the free list array. This array is used
187 * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE.
188 */
189 io_tlb_list = alloc_bootmem(iotlb_nslabs * sizeof(int));
190 for (i = 0; i < iotlb_nslabs; i++)
191 io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE);
192 io_tlb_index = 0;
193 io_tlb_orig_addr = alloc_bootmem(
194 iotlb_nslabs * sizeof(*io_tlb_orig_addr));
196 /*
197 * Get the overflow emergency buffer
198 */
199 io_tlb_overflow_buffer = alloc_bootmem(io_tlb_overflow);
200 if (!io_tlb_overflow_buffer)
201 panic("Cannot allocate SWIOTLB overflow buffer!\n");
203 do {
204 rc = xen_create_contiguous_region(
205 (unsigned long)io_tlb_overflow_buffer,
206 get_order(io_tlb_overflow),
207 dma_bits);
208 } while (rc && dma_bits++ < max_dma_bits);
209 if (rc)
210 panic("No suitable physical memory available for SWIOTLB overflow buffer!\n");
212 iotlb_pfn_start = __pa(iotlb_virt_start) >> PAGE_SHIFT;
213 iotlb_pfn_end = iotlb_pfn_start + (bytes >> PAGE_SHIFT);
215 printk(KERN_INFO "Software IO TLB enabled: \n"
216 " Aperture: %lu megabytes\n"
217 " Kernel range: %p - %p\n"
218 " Address size: %u bits\n",
219 bytes >> 20,
220 iotlb_virt_start, iotlb_virt_start + bytes,
221 dma_bits);
222 }
224 void
225 swiotlb_init(void)
226 {
227 long ram_end;
228 size_t defsz = 64 * (1 << 20); /* 64MB default size */
230 if (swiotlb_force == 1) {
231 swiotlb = 1;
232 } else if ((swiotlb_force != -1) &&
233 is_running_on_xen() &&
234 is_initial_xendomain()) {
235 /* Domain 0 always has a swiotlb. */
236 ram_end = HYPERVISOR_memory_op(XENMEM_maximum_ram_page, NULL);
237 if (ram_end <= 0x7ffff)
238 defsz = 2 * (1 << 20); /* 2MB on <2GB on systems. */
239 swiotlb = 1;
240 }
242 if (swiotlb)
243 swiotlb_init_with_default_size(defsz);
244 else
245 printk(KERN_INFO "Software IO TLB disabled\n");
246 }
248 /*
249 * We use __copy_to_user_inatomic to transfer to the host buffer because the
250 * buffer may be mapped read-only (e.g, in blkback driver) but lower-level
251 * drivers map the buffer for DMA_BIDIRECTIONAL access. This causes an
252 * unnecessary copy from the aperture to the host buffer, and a page fault.
253 */
254 static void
255 __sync_single(struct phys_addr buffer, char *dma_addr, size_t size, int dir)
256 {
257 if (PageHighMem(buffer.page)) {
258 size_t len, bytes;
259 char *dev, *host, *kmp;
260 len = size;
261 while (len != 0) {
262 unsigned long flags;
264 if (((bytes = len) + buffer.offset) > PAGE_SIZE)
265 bytes = PAGE_SIZE - buffer.offset;
266 local_irq_save(flags); /* protects KM_BOUNCE_READ */
267 kmp = kmap_atomic(buffer.page, KM_BOUNCE_READ);
268 dev = dma_addr + size - len;
269 host = kmp + buffer.offset;
270 if (dir == DMA_FROM_DEVICE) {
271 if (__copy_to_user_inatomic(host, dev, bytes))
272 /* inaccessible */;
273 } else
274 memcpy(dev, host, bytes);
275 kunmap_atomic(kmp, KM_BOUNCE_READ);
276 local_irq_restore(flags);
277 len -= bytes;
278 buffer.page++;
279 buffer.offset = 0;
280 }
281 } else {
282 char *host = (char *)phys_to_virt(
283 page_to_pseudophys(buffer.page)) + buffer.offset;
284 if (dir == DMA_FROM_DEVICE) {
285 if (__copy_to_user_inatomic(host, dma_addr, size))
286 /* inaccessible */;
287 } else if (dir == DMA_TO_DEVICE)
288 memcpy(dma_addr, host, size);
289 }
290 }
292 /*
293 * Allocates bounce buffer and returns its kernel virtual address.
294 */
295 static void *
296 map_single(struct device *hwdev, struct phys_addr buffer, size_t size, int dir)
297 {
298 unsigned long flags;
299 char *dma_addr;
300 unsigned int nslots, stride, index, wrap;
301 struct phys_addr slot_buf;
302 int i;
304 /*
305 * For mappings greater than a page, we limit the stride (and
306 * hence alignment) to a page size.
307 */
308 nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
309 if (size > PAGE_SIZE)
310 stride = (1 << (PAGE_SHIFT - IO_TLB_SHIFT));
311 else
312 stride = 1;
314 BUG_ON(!nslots);
316 /*
317 * Find suitable number of IO TLB entries size that will fit this
318 * request and allocate a buffer from that IO TLB pool.
319 */
320 spin_lock_irqsave(&io_tlb_lock, flags);
321 {
322 wrap = index = ALIGN(io_tlb_index, stride);
324 if (index >= iotlb_nslabs)
325 wrap = index = 0;
327 do {
328 /*
329 * If we find a slot that indicates we have 'nslots'
330 * number of contiguous buffers, we allocate the
331 * buffers from that slot and mark the entries as '0'
332 * indicating unavailable.
333 */
334 if (io_tlb_list[index] >= nslots) {
335 int count = 0;
337 for (i = index; i < (int)(index + nslots); i++)
338 io_tlb_list[i] = 0;
339 for (i = index - 1;
340 (OFFSET(i, IO_TLB_SEGSIZE) !=
341 IO_TLB_SEGSIZE -1) && io_tlb_list[i];
342 i--)
343 io_tlb_list[i] = ++count;
344 dma_addr = iotlb_virt_start +
345 (index << IO_TLB_SHIFT);
347 /*
348 * Update the indices to avoid searching in
349 * the next round.
350 */
351 io_tlb_index =
352 ((index + nslots) < iotlb_nslabs
353 ? (index + nslots) : 0);
355 goto found;
356 }
357 index += stride;
358 if (index >= iotlb_nslabs)
359 index = 0;
360 } while (index != wrap);
362 spin_unlock_irqrestore(&io_tlb_lock, flags);
363 return NULL;
364 }
365 found:
366 spin_unlock_irqrestore(&io_tlb_lock, flags);
368 /*
369 * Save away the mapping from the original address to the DMA address.
370 * This is needed when we sync the memory. Then we sync the buffer if
371 * needed.
372 */
373 slot_buf = buffer;
374 for (i = 0; i < nslots; i++) {
375 slot_buf.page += slot_buf.offset >> PAGE_SHIFT;
376 slot_buf.offset &= PAGE_SIZE - 1;
377 io_tlb_orig_addr[index+i] = slot_buf;
378 slot_buf.offset += 1 << IO_TLB_SHIFT;
379 }
380 if ((dir == DMA_TO_DEVICE) || (dir == DMA_BIDIRECTIONAL))
381 __sync_single(buffer, dma_addr, size, DMA_TO_DEVICE);
383 return dma_addr;
384 }
386 static struct phys_addr dma_addr_to_phys_addr(char *dma_addr)
387 {
388 int index = (dma_addr - iotlb_virt_start) >> IO_TLB_SHIFT;
389 struct phys_addr buffer = io_tlb_orig_addr[index];
390 buffer.offset += (long)dma_addr & ((1 << IO_TLB_SHIFT) - 1);
391 buffer.page += buffer.offset >> PAGE_SHIFT;
392 buffer.offset &= PAGE_SIZE - 1;
393 return buffer;
394 }
396 /*
397 * dma_addr is the kernel virtual address of the bounce buffer to unmap.
398 */
399 static void
400 unmap_single(struct device *hwdev, char *dma_addr, size_t size, int dir)
401 {
402 unsigned long flags;
403 int i, count, nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
404 int index = (dma_addr - iotlb_virt_start) >> IO_TLB_SHIFT;
405 struct phys_addr buffer = dma_addr_to_phys_addr(dma_addr);
407 /*
408 * First, sync the memory before unmapping the entry
409 */
410 if ((dir == DMA_FROM_DEVICE) || (dir == DMA_BIDIRECTIONAL))
411 __sync_single(buffer, dma_addr, size, DMA_FROM_DEVICE);
413 /*
414 * Return the buffer to the free list by setting the corresponding
415 * entries to indicate the number of contigous entries available.
416 * While returning the entries to the free list, we merge the entries
417 * with slots below and above the pool being returned.
418 */
419 spin_lock_irqsave(&io_tlb_lock, flags);
420 {
421 count = ((index + nslots) < ALIGN(index + 1, IO_TLB_SEGSIZE) ?
422 io_tlb_list[index + nslots] : 0);
423 /*
424 * Step 1: return the slots to the free list, merging the
425 * slots with superceeding slots
426 */
427 for (i = index + nslots - 1; i >= index; i--)
428 io_tlb_list[i] = ++count;
429 /*
430 * Step 2: merge the returned slots with the preceding slots,
431 * if available (non zero)
432 */
433 for (i = index - 1;
434 (OFFSET(i, IO_TLB_SEGSIZE) !=
435 IO_TLB_SEGSIZE -1) && io_tlb_list[i];
436 i--)
437 io_tlb_list[i] = ++count;
438 }
439 spin_unlock_irqrestore(&io_tlb_lock, flags);
440 }
442 static void
443 sync_single(struct device *hwdev, char *dma_addr, size_t size, int dir)
444 {
445 struct phys_addr buffer = dma_addr_to_phys_addr(dma_addr);
446 BUG_ON((dir != DMA_FROM_DEVICE) && (dir != DMA_TO_DEVICE));
447 __sync_single(buffer, dma_addr, size, dir);
448 }
450 static void
451 swiotlb_full(struct device *dev, size_t size, int dir, int do_panic)
452 {
453 /*
454 * Ran out of IOMMU space for this operation. This is very bad.
455 * Unfortunately the drivers cannot handle this operation properly.
456 * unless they check for pci_dma_mapping_error (most don't)
457 * When the mapping is small enough return a static buffer to limit
458 * the damage, or panic when the transfer is too big.
459 */
460 printk(KERN_ERR "PCI-DMA: Out of SW-IOMMU space for %lu bytes at "
461 "device %s\n", (unsigned long)size, dev ? dev->bus_id : "?");
463 if (size > io_tlb_overflow && do_panic) {
464 if (dir == PCI_DMA_FROMDEVICE || dir == PCI_DMA_BIDIRECTIONAL)
465 panic("PCI-DMA: Memory would be corrupted\n");
466 if (dir == PCI_DMA_TODEVICE || dir == PCI_DMA_BIDIRECTIONAL)
467 panic("PCI-DMA: Random memory would be DMAed\n");
468 }
469 }
471 /*
472 * Map a single buffer of the indicated size for DMA in streaming mode. The
473 * PCI address to use is returned.
474 *
475 * Once the device is given the dma address, the device owns this memory until
476 * either swiotlb_unmap_single or swiotlb_dma_sync_single is performed.
477 */
478 dma_addr_t
479 swiotlb_map_single(struct device *hwdev, void *ptr, size_t size, int dir)
480 {
481 dma_addr_t dev_addr = gnttab_dma_map_page(virt_to_page(ptr)) +
482 offset_in_page(ptr);
483 void *map;
484 struct phys_addr buffer;
486 BUG_ON(dir == DMA_NONE);
488 /*
489 * If the pointer passed in happens to be in the device's DMA window,
490 * we can safely return the device addr and not worry about bounce
491 * buffering it.
492 */
493 if (!range_straddles_page_boundary(__pa(ptr), size) &&
494 !address_needs_mapping(hwdev, dev_addr))
495 return dev_addr;
497 /*
498 * Oh well, have to allocate and map a bounce buffer.
499 */
500 gnttab_dma_unmap_page(dev_addr);
501 buffer.page = virt_to_page(ptr);
502 buffer.offset = (unsigned long)ptr & ~PAGE_MASK;
503 map = map_single(hwdev, buffer, size, dir);
504 if (!map) {
505 swiotlb_full(hwdev, size, dir, 1);
506 map = io_tlb_overflow_buffer;
507 }
509 dev_addr = virt_to_bus(map);
510 return dev_addr;
511 }
513 /*
514 * Unmap a single streaming mode DMA translation. The dma_addr and size must
515 * match what was provided for in a previous swiotlb_map_single call. All
516 * other usages are undefined.
517 *
518 * After this call, reads by the cpu to the buffer are guaranteed to see
519 * whatever the device wrote there.
520 */
521 void
522 swiotlb_unmap_single(struct device *hwdev, dma_addr_t dev_addr, size_t size,
523 int dir)
524 {
525 BUG_ON(dir == DMA_NONE);
526 if (in_swiotlb_aperture(dev_addr))
527 unmap_single(hwdev, bus_to_virt(dev_addr), size, dir);
528 else
529 gnttab_dma_unmap_page(dev_addr);
530 }
532 /*
533 * Make physical memory consistent for a single streaming mode DMA translation
534 * after a transfer.
535 *
536 * If you perform a swiotlb_map_single() but wish to interrogate the buffer
537 * using the cpu, yet do not wish to teardown the PCI dma mapping, you must
538 * call this function before doing so. At the next point you give the PCI dma
539 * address back to the card, you must first perform a
540 * swiotlb_dma_sync_for_device, and then the device again owns the buffer
541 */
542 void
543 swiotlb_sync_single_for_cpu(struct device *hwdev, dma_addr_t dev_addr,
544 size_t size, int dir)
545 {
546 BUG_ON(dir == DMA_NONE);
547 if (in_swiotlb_aperture(dev_addr))
548 sync_single(hwdev, bus_to_virt(dev_addr), size, dir);
549 }
551 void
552 swiotlb_sync_single_for_device(struct device *hwdev, dma_addr_t dev_addr,
553 size_t size, int dir)
554 {
555 BUG_ON(dir == DMA_NONE);
556 if (in_swiotlb_aperture(dev_addr))
557 sync_single(hwdev, bus_to_virt(dev_addr), size, dir);
558 }
560 /*
561 * Map a set of buffers described by scatterlist in streaming mode for DMA.
562 * This is the scatter-gather version of the above swiotlb_map_single
563 * interface. Here the scatter gather list elements are each tagged with the
564 * appropriate dma address and length. They are obtained via
565 * sg_dma_{address,length}(SG).
566 *
567 * NOTE: An implementation may be able to use a smaller number of
568 * DMA address/length pairs than there are SG table elements.
569 * (for example via virtual mapping capabilities)
570 * The routine returns the number of addr/length pairs actually
571 * used, at most nents.
572 *
573 * Device ownership issues as mentioned above for swiotlb_map_single are the
574 * same here.
575 */
576 int
577 swiotlb_map_sg(struct device *hwdev, struct scatterlist *sg, int nelems,
578 int dir)
579 {
580 struct phys_addr buffer;
581 dma_addr_t dev_addr;
582 char *map;
583 int i;
585 BUG_ON(dir == DMA_NONE);
587 for (i = 0; i < nelems; i++, sg++) {
588 dev_addr = gnttab_dma_map_page(sg->page) + sg->offset;
590 if (range_straddles_page_boundary(page_to_pseudophys(sg->page)
591 + sg->offset, sg->length)
592 || address_needs_mapping(hwdev, dev_addr)) {
593 gnttab_dma_unmap_page(dev_addr);
594 buffer.page = sg->page;
595 buffer.offset = sg->offset;
596 map = map_single(hwdev, buffer, sg->length, dir);
597 if (!map) {
598 /* Don't panic here, we expect map_sg users
599 to do proper error handling. */
600 swiotlb_full(hwdev, sg->length, dir, 0);
601 swiotlb_unmap_sg(hwdev, sg - i, i, dir);
602 sg[0].dma_length = 0;
603 return 0;
604 }
605 sg->dma_address = (dma_addr_t)virt_to_bus(map);
606 } else
607 sg->dma_address = dev_addr;
608 sg->dma_length = sg->length;
609 }
610 return nelems;
611 }
613 /*
614 * Unmap a set of streaming mode DMA translations. Again, cpu read rules
615 * concerning calls here are the same as for swiotlb_unmap_single() above.
616 */
617 void
618 swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sg, int nelems,
619 int dir)
620 {
621 int i;
623 BUG_ON(dir == DMA_NONE);
625 for (i = 0; i < nelems; i++, sg++)
626 if (in_swiotlb_aperture(sg->dma_address))
627 unmap_single(hwdev,
628 (void *)bus_to_virt(sg->dma_address),
629 sg->dma_length, dir);
630 else
631 gnttab_dma_unmap_page(sg->dma_address);
632 }
634 /*
635 * Make physical memory consistent for a set of streaming mode DMA translations
636 * after a transfer.
637 *
638 * The same as swiotlb_sync_single_* but for a scatter-gather list, same rules
639 * and usage.
640 */
641 void
642 swiotlb_sync_sg_for_cpu(struct device *hwdev, struct scatterlist *sg,
643 int nelems, int dir)
644 {
645 int i;
647 BUG_ON(dir == DMA_NONE);
649 for (i = 0; i < nelems; i++, sg++)
650 if (in_swiotlb_aperture(sg->dma_address))
651 sync_single(hwdev,
652 (void *)bus_to_virt(sg->dma_address),
653 sg->dma_length, dir);
654 }
656 void
657 swiotlb_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg,
658 int nelems, int dir)
659 {
660 int i;
662 BUG_ON(dir == DMA_NONE);
664 for (i = 0; i < nelems; i++, sg++)
665 if (in_swiotlb_aperture(sg->dma_address))
666 sync_single(hwdev,
667 (void *)bus_to_virt(sg->dma_address),
668 sg->dma_length, dir);
669 }
671 #ifdef CONFIG_HIGHMEM
673 dma_addr_t
674 swiotlb_map_page(struct device *hwdev, struct page *page,
675 unsigned long offset, size_t size,
676 enum dma_data_direction direction)
677 {
678 struct phys_addr buffer;
679 dma_addr_t dev_addr;
680 char *map;
682 dev_addr = gnttab_dma_map_page(page) + offset;
683 if (address_needs_mapping(hwdev, dev_addr)) {
684 gnttab_dma_unmap_page(dev_addr);
685 buffer.page = page;
686 buffer.offset = offset;
687 map = map_single(hwdev, buffer, size, direction);
688 if (!map) {
689 swiotlb_full(hwdev, size, direction, 1);
690 map = io_tlb_overflow_buffer;
691 }
692 dev_addr = (dma_addr_t)virt_to_bus(map);
693 }
695 return dev_addr;
696 }
698 void
699 swiotlb_unmap_page(struct device *hwdev, dma_addr_t dma_address,
700 size_t size, enum dma_data_direction direction)
701 {
702 BUG_ON(direction == DMA_NONE);
703 if (in_swiotlb_aperture(dma_address))
704 unmap_single(hwdev, bus_to_virt(dma_address), size, direction);
705 else
706 gnttab_dma_unmap_page(dma_address);
707 }
709 #endif
711 int
712 swiotlb_dma_mapping_error(dma_addr_t dma_addr)
713 {
714 return (dma_addr == virt_to_bus(io_tlb_overflow_buffer));
715 }
717 /*
718 * Return whether the given PCI device DMA address mask can be supported
719 * properly. For example, if your device can only drive the low 24-bits
720 * during PCI bus mastering, then you would pass 0x00ffffff as the mask to
721 * this function.
722 */
723 int
724 swiotlb_dma_supported (struct device *hwdev, u64 mask)
725 {
726 return (mask >= ((1UL << dma_bits) - 1));
727 }
729 EXPORT_SYMBOL(swiotlb_init);
730 EXPORT_SYMBOL(swiotlb_map_single);
731 EXPORT_SYMBOL(swiotlb_unmap_single);
732 EXPORT_SYMBOL(swiotlb_map_sg);
733 EXPORT_SYMBOL(swiotlb_unmap_sg);
734 EXPORT_SYMBOL(swiotlb_sync_single_for_cpu);
735 EXPORT_SYMBOL(swiotlb_sync_single_for_device);
736 EXPORT_SYMBOL(swiotlb_sync_sg_for_cpu);
737 EXPORT_SYMBOL(swiotlb_sync_sg_for_device);
738 EXPORT_SYMBOL(swiotlb_dma_mapping_error);
739 EXPORT_SYMBOL(swiotlb_dma_supported);