ia64/linux-2.6.18-xen.hg

view lib/swiotlb.c @ 897:329ea0ccb344

balloon: try harder to balloon up under memory pressure.

Currently if the balloon driver is unable to increase the guest's
reservation it assumes the failure was due to reaching its full
allocation, gives up on the ballooning operation and records the limit
it reached as the "hard limit". The driver will not try again until
the target is set again (even to the same value).

However it is possible that ballooning has in fact failed due to
memory pressure in the host and therefore it is desirable to keep
attempting to reach the target in case memory becomes available. The
most likely scenario is that some guests are ballooning down while
others are ballooning up and therefore there is temporary memory
pressure while things stabilise. You would not expect a well behaved
toolstack to ask a domain to balloon to more than its allocation nor
would you expect it to deliberately over-commit memory by setting
balloon targets which exceed the total host memory.

This patch drops the concept of a hard limit and causes the balloon
driver to retry increasing the reservation on a timer in the same
manner as when decreasing the reservation.

Also if we partially succeed in increasing the reservation
(i.e. receive less pages than we asked for) then we may as well keep
those pages rather than returning them to Xen.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Fri Jun 05 14:01:20 2009 +0100 (2009-06-05)
parents 831230e53067
children
line source
1 /*
2 * Dynamic DMA mapping support.
3 *
4 * This implementation is for IA-64 and EM64T platforms that do not support
5 * I/O TLBs (aka DMA address translation hardware).
6 * Copyright (C) 2000 Asit Mallick <Asit.K.Mallick@intel.com>
7 * Copyright (C) 2000 Goutham Rao <goutham.rao@intel.com>
8 * Copyright (C) 2000, 2003 Hewlett-Packard Co
9 * David Mosberger-Tang <davidm@hpl.hp.com>
10 *
11 * 03/05/07 davidm Switch from PCI-DMA to generic device DMA API.
12 * 00/12/13 davidm Rename to swiotlb.c and add mark_clean() to avoid
13 * unnecessary i-cache flushing.
14 * 04/07/.. ak Better overflow handling. Assorted fixes.
15 * 05/09/10 linville Add support for syncing ranges, support syncing for
16 * DMA_BIDIRECTIONAL mappings, miscellaneous cleanup.
17 */
19 #include <linux/cache.h>
20 #include <linux/dma-mapping.h>
21 #include <linux/mm.h>
22 #include <linux/module.h>
23 #include <linux/spinlock.h>
24 #include <linux/string.h>
25 #include <linux/types.h>
26 #include <linux/ctype.h>
28 #include <asm/io.h>
29 #include <asm/dma.h>
30 #include <asm/scatterlist.h>
32 #include <linux/init.h>
33 #include <linux/bootmem.h>
35 #define OFFSET(val,align) ((unsigned long) \
36 ( (val) & ( (align) - 1)))
38 #define SG_ENT_VIRT_ADDRESS(sg) (page_address((sg)->page) + (sg)->offset)
39 #define SG_ENT_PHYS_ADDRESS(SG) virt_to_phys(SG_ENT_VIRT_ADDRESS(SG))
41 /*
42 * Maximum allowable number of contiguous slabs to map,
43 * must be a power of 2. What is the appropriate value ?
44 * The complexity of {map,unmap}_single is linearly dependent on this value.
45 */
46 #define IO_TLB_SEGSIZE 128
48 /*
49 * log of the size of each IO TLB slab. The number of slabs is command line
50 * controllable.
51 */
52 #define IO_TLB_SHIFT 11
54 #define SLABS_PER_PAGE (1 << (PAGE_SHIFT - IO_TLB_SHIFT))
56 /*
57 * Minimum IO TLB size to bother booting with. Systems with mainly
58 * 64bit capable cards will only lightly use the swiotlb. If we can't
59 * allocate a contiguous 1MB, we're probably in trouble anyway.
60 */
61 #define IO_TLB_MIN_SLABS ((1<<20) >> IO_TLB_SHIFT)
63 /*
64 * Enumeration for sync targets
65 */
66 enum dma_sync_target {
67 SYNC_FOR_CPU = 0,
68 SYNC_FOR_DEVICE = 1,
69 };
71 int swiotlb_force;
73 /*
74 * Used to do a quick range check in swiotlb_unmap_single and
75 * swiotlb_sync_single_*, to see if the memory was in fact allocated by this
76 * API.
77 */
78 static char *io_tlb_start, *io_tlb_end;
80 /*
81 * The number of IO TLB blocks (in groups of 64) betweeen io_tlb_start and
82 * io_tlb_end. This is command line adjustable via setup_io_tlb_npages.
83 */
84 static unsigned long io_tlb_nslabs;
86 /*
87 * When the IOMMU overflows we return a fallback buffer. This sets the size.
88 */
89 static unsigned long io_tlb_overflow = 32*1024;
91 void *io_tlb_overflow_buffer;
93 /*
94 * This is a free list describing the number of free entries available from
95 * each index
96 */
97 static unsigned int *io_tlb_list;
98 static unsigned int io_tlb_index;
100 /*
101 * We need to save away the original address corresponding to a mapped entry
102 * for the sync operations.
103 */
104 static unsigned char **io_tlb_orig_addr;
106 /*
107 * Protect the above data structures in the map and unmap calls
108 */
109 static DEFINE_SPINLOCK(io_tlb_lock);
111 static int __init
112 setup_io_tlb_npages(char *str)
113 {
114 if (isdigit(*str)) {
115 io_tlb_nslabs = simple_strtoul(str, &str, 0);
116 /* avoid tail segment of size < IO_TLB_SEGSIZE */
117 io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE);
118 }
119 if (*str == ',')
120 ++str;
121 if (!strcmp(str, "force"))
122 swiotlb_force = 1;
123 return 1;
124 }
125 __setup("swiotlb=", setup_io_tlb_npages);
126 /* make io_tlb_overflow tunable too? */
128 /*
129 * Statically reserve bounce buffer space and initialize bounce buffer data
130 * structures for the software IO TLB used to implement the DMA API.
131 */
132 void
133 swiotlb_init_with_default_size (size_t default_size)
134 {
135 unsigned long i;
137 if (!io_tlb_nslabs) {
138 io_tlb_nslabs = (default_size >> IO_TLB_SHIFT);
139 io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE);
140 }
142 /*
143 * Get IO TLB memory from the low pages
144 */
145 io_tlb_start = alloc_bootmem_low_pages(io_tlb_nslabs * (1 << IO_TLB_SHIFT));
146 if (!io_tlb_start)
147 panic("Cannot allocate SWIOTLB buffer");
148 io_tlb_end = io_tlb_start + io_tlb_nslabs * (1 << IO_TLB_SHIFT);
150 /*
151 * Allocate and initialize the free list array. This array is used
152 * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE
153 * between io_tlb_start and io_tlb_end.
154 */
155 io_tlb_list = alloc_bootmem(io_tlb_nslabs * sizeof(int));
156 for (i = 0; i < io_tlb_nslabs; i++)
157 io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE);
158 io_tlb_index = 0;
159 io_tlb_orig_addr = alloc_bootmem(io_tlb_nslabs * sizeof(char *));
161 /*
162 * Get the overflow emergency buffer
163 */
164 io_tlb_overflow_buffer = alloc_bootmem_low(io_tlb_overflow);
165 printk(KERN_INFO "Placing software IO TLB between 0x%lx - 0x%lx\n",
166 virt_to_phys(io_tlb_start), virt_to_phys(io_tlb_end));
167 }
169 void
170 swiotlb_init (void)
171 {
172 swiotlb_init_with_default_size(64 * (1<<20)); /* default to 64MB */
173 }
175 /*
176 * Systems with larger DMA zones (those that don't support ISA) can
177 * initialize the swiotlb later using the slab allocator if needed.
178 * This should be just like above, but with some error catching.
179 */
180 int
181 swiotlb_late_init_with_default_size (size_t default_size)
182 {
183 unsigned long i, req_nslabs = io_tlb_nslabs;
184 unsigned int order;
186 if (!io_tlb_nslabs) {
187 io_tlb_nslabs = (default_size >> IO_TLB_SHIFT);
188 io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE);
189 }
191 /*
192 * Get IO TLB memory from the low pages
193 */
194 order = get_order(io_tlb_nslabs * (1 << IO_TLB_SHIFT));
195 io_tlb_nslabs = SLABS_PER_PAGE << order;
197 while ((SLABS_PER_PAGE << order) > IO_TLB_MIN_SLABS) {
198 io_tlb_start = (char *)__get_free_pages(GFP_DMA | __GFP_NOWARN,
199 order);
200 if (io_tlb_start)
201 break;
202 order--;
203 }
205 if (!io_tlb_start)
206 goto cleanup1;
208 if (order != get_order(io_tlb_nslabs * (1 << IO_TLB_SHIFT))) {
209 printk(KERN_WARNING "Warning: only able to allocate %ld MB "
210 "for software IO TLB\n", (PAGE_SIZE << order) >> 20);
211 io_tlb_nslabs = SLABS_PER_PAGE << order;
212 }
213 io_tlb_end = io_tlb_start + io_tlb_nslabs * (1 << IO_TLB_SHIFT);
214 memset(io_tlb_start, 0, io_tlb_nslabs * (1 << IO_TLB_SHIFT));
216 /*
217 * Allocate and initialize the free list array. This array is used
218 * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE
219 * between io_tlb_start and io_tlb_end.
220 */
221 io_tlb_list = (unsigned int *)__get_free_pages(GFP_KERNEL,
222 get_order(io_tlb_nslabs * sizeof(int)));
223 if (!io_tlb_list)
224 goto cleanup2;
226 for (i = 0; i < io_tlb_nslabs; i++)
227 io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE);
228 io_tlb_index = 0;
230 io_tlb_orig_addr = (unsigned char **)__get_free_pages(GFP_KERNEL,
231 get_order(io_tlb_nslabs * sizeof(char *)));
232 if (!io_tlb_orig_addr)
233 goto cleanup3;
235 memset(io_tlb_orig_addr, 0, io_tlb_nslabs * sizeof(char *));
237 /*
238 * Get the overflow emergency buffer
239 */
240 io_tlb_overflow_buffer = (void *)__get_free_pages(GFP_DMA,
241 get_order(io_tlb_overflow));
242 if (!io_tlb_overflow_buffer)
243 goto cleanup4;
245 printk(KERN_INFO "Placing %ldMB software IO TLB between 0x%lx - "
246 "0x%lx\n", (io_tlb_nslabs * (1 << IO_TLB_SHIFT)) >> 20,
247 virt_to_phys(io_tlb_start), virt_to_phys(io_tlb_end));
249 return 0;
251 cleanup4:
252 free_pages((unsigned long)io_tlb_orig_addr, get_order(io_tlb_nslabs *
253 sizeof(char *)));
254 io_tlb_orig_addr = NULL;
255 cleanup3:
256 free_pages((unsigned long)io_tlb_list, get_order(io_tlb_nslabs *
257 sizeof(int)));
258 io_tlb_list = NULL;
259 io_tlb_end = NULL;
260 cleanup2:
261 free_pages((unsigned long)io_tlb_start, order);
262 io_tlb_start = NULL;
263 cleanup1:
264 io_tlb_nslabs = req_nslabs;
265 return -ENOMEM;
266 }
268 static inline int
269 address_needs_mapping(struct device *hwdev, dma_addr_t addr)
270 {
271 dma_addr_t mask = 0xffffffff;
272 /* If the device has a mask, use it, otherwise default to 32 bits */
273 if (hwdev && hwdev->dma_mask)
274 mask = *hwdev->dma_mask;
275 return (addr & ~mask) != 0;
276 }
278 /*
279 * Allocates bounce buffer and returns its kernel virtual address.
280 */
281 static void *
282 map_single(struct device *hwdev, char *buffer, size_t size, int dir)
283 {
284 unsigned long flags;
285 char *dma_addr;
286 unsigned int nslots, stride, index, wrap;
287 int i;
289 /*
290 * For mappings greater than a page, we limit the stride (and
291 * hence alignment) to a page size.
292 */
293 nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
294 if (size > PAGE_SIZE)
295 stride = (1 << (PAGE_SHIFT - IO_TLB_SHIFT));
296 else
297 stride = 1;
299 BUG_ON(!nslots);
301 /*
302 * Find suitable number of IO TLB entries size that will fit this
303 * request and allocate a buffer from that IO TLB pool.
304 */
305 spin_lock_irqsave(&io_tlb_lock, flags);
306 {
307 wrap = index = ALIGN(io_tlb_index, stride);
309 if (index >= io_tlb_nslabs)
310 wrap = index = 0;
312 do {
313 /*
314 * If we find a slot that indicates we have 'nslots'
315 * number of contiguous buffers, we allocate the
316 * buffers from that slot and mark the entries as '0'
317 * indicating unavailable.
318 */
319 if (io_tlb_list[index] >= nslots) {
320 int count = 0;
322 for (i = index; i < (int) (index + nslots); i++)
323 io_tlb_list[i] = 0;
324 for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE -1) && io_tlb_list[i]; i--)
325 io_tlb_list[i] = ++count;
326 dma_addr = io_tlb_start + (index << IO_TLB_SHIFT);
328 /*
329 * Update the indices to avoid searching in
330 * the next round.
331 */
332 io_tlb_index = ((index + nslots) < io_tlb_nslabs
333 ? (index + nslots) : 0);
335 goto found;
336 }
337 index += stride;
338 if (index >= io_tlb_nslabs)
339 index = 0;
340 } while (index != wrap);
342 spin_unlock_irqrestore(&io_tlb_lock, flags);
343 return NULL;
344 }
345 found:
346 spin_unlock_irqrestore(&io_tlb_lock, flags);
348 /*
349 * Save away the mapping from the original address to the DMA address.
350 * This is needed when we sync the memory. Then we sync the buffer if
351 * needed.
352 */
353 io_tlb_orig_addr[index] = buffer;
354 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)
355 memcpy(dma_addr, buffer, size);
357 return dma_addr;
358 }
360 /*
361 * dma_addr is the kernel virtual address of the bounce buffer to unmap.
362 */
363 static void
364 unmap_single(struct device *hwdev, char *dma_addr, size_t size, int dir)
365 {
366 unsigned long flags;
367 int i, count, nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
368 int index = (dma_addr - io_tlb_start) >> IO_TLB_SHIFT;
369 char *buffer = io_tlb_orig_addr[index];
371 /*
372 * First, sync the memory before unmapping the entry
373 */
374 if (buffer && ((dir == DMA_FROM_DEVICE) || (dir == DMA_BIDIRECTIONAL)))
375 /*
376 * bounce... copy the data back into the original buffer * and
377 * delete the bounce buffer.
378 */
379 memcpy(buffer, dma_addr, size);
381 /*
382 * Return the buffer to the free list by setting the corresponding
383 * entries to indicate the number of contigous entries available.
384 * While returning the entries to the free list, we merge the entries
385 * with slots below and above the pool being returned.
386 */
387 spin_lock_irqsave(&io_tlb_lock, flags);
388 {
389 count = ((index + nslots) < ALIGN(index + 1, IO_TLB_SEGSIZE) ?
390 io_tlb_list[index + nslots] : 0);
391 /*
392 * Step 1: return the slots to the free list, merging the
393 * slots with superceeding slots
394 */
395 for (i = index + nslots - 1; i >= index; i--)
396 io_tlb_list[i] = ++count;
397 /*
398 * Step 2: merge the returned slots with the preceding slots,
399 * if available (non zero)
400 */
401 for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE -1) && io_tlb_list[i]; i--)
402 io_tlb_list[i] = ++count;
403 }
404 spin_unlock_irqrestore(&io_tlb_lock, flags);
405 }
407 static void
408 sync_single(struct device *hwdev, char *dma_addr, size_t size,
409 int dir, int target)
410 {
411 int index = (dma_addr - io_tlb_start) >> IO_TLB_SHIFT;
412 char *buffer = io_tlb_orig_addr[index];
414 switch (target) {
415 case SYNC_FOR_CPU:
416 if (likely(dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL))
417 memcpy(buffer, dma_addr, size);
418 else
419 BUG_ON(dir != DMA_TO_DEVICE);
420 break;
421 case SYNC_FOR_DEVICE:
422 if (likely(dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL))
423 memcpy(dma_addr, buffer, size);
424 else
425 BUG_ON(dir != DMA_FROM_DEVICE);
426 break;
427 default:
428 BUG();
429 }
430 }
432 void *
433 swiotlb_alloc_coherent(struct device *hwdev, size_t size,
434 dma_addr_t *dma_handle, gfp_t flags)
435 {
436 unsigned long dev_addr;
437 void *ret;
438 int order = get_order(size);
440 /*
441 * XXX fix me: the DMA API should pass us an explicit DMA mask
442 * instead, or use ZONE_DMA32 (ia64 overloads ZONE_DMA to be a ~32
443 * bit range instead of a 16MB one).
444 */
445 flags |= GFP_DMA;
447 ret = (void *)__get_free_pages(flags, order);
448 if (ret && address_needs_mapping(hwdev, virt_to_phys(ret))) {
449 /*
450 * The allocated memory isn't reachable by the device.
451 * Fall back on swiotlb_map_single().
452 */
453 free_pages((unsigned long) ret, order);
454 ret = NULL;
455 }
456 if (!ret) {
457 /*
458 * We are either out of memory or the device can't DMA
459 * to GFP_DMA memory; fall back on
460 * swiotlb_map_single(), which will grab memory from
461 * the lowest available address range.
462 */
463 dma_addr_t handle;
464 handle = swiotlb_map_single(NULL, NULL, size, DMA_FROM_DEVICE);
465 if (swiotlb_dma_mapping_error(handle))
466 return NULL;
468 ret = phys_to_virt(handle);
469 }
471 memset(ret, 0, size);
472 dev_addr = virt_to_phys(ret);
474 /* Confirm address can be DMA'd by device */
475 if (address_needs_mapping(hwdev, dev_addr)) {
476 printk("hwdev DMA mask = 0x%016Lx, dev_addr = 0x%016lx\n",
477 (unsigned long long)*hwdev->dma_mask, dev_addr);
478 panic("swiotlb_alloc_coherent: allocated memory is out of "
479 "range for device");
480 }
481 *dma_handle = dev_addr;
482 return ret;
483 }
485 void
486 swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr,
487 dma_addr_t dma_handle)
488 {
489 if (!(vaddr >= (void *)io_tlb_start
490 && vaddr < (void *)io_tlb_end))
491 free_pages((unsigned long) vaddr, get_order(size));
492 else
493 /* DMA_TO_DEVICE to avoid memcpy in unmap_single */
494 swiotlb_unmap_single (hwdev, dma_handle, size, DMA_TO_DEVICE);
495 }
497 static void
498 swiotlb_full(struct device *dev, size_t size, int dir, int do_panic)
499 {
500 /*
501 * Ran out of IOMMU space for this operation. This is very bad.
502 * Unfortunately the drivers cannot handle this operation properly.
503 * unless they check for dma_mapping_error (most don't)
504 * When the mapping is small enough return a static buffer to limit
505 * the damage, or panic when the transfer is too big.
506 */
507 printk(KERN_ERR "DMA: Out of SW-IOMMU space for %lu bytes at "
508 "device %s\n", size, dev ? dev->bus_id : "?");
510 if (size > io_tlb_overflow && do_panic) {
511 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
512 panic("DMA: Memory would be corrupted\n");
513 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)
514 panic("DMA: Random memory would be DMAed\n");
515 }
516 }
518 /*
519 * Map a single buffer of the indicated size for DMA in streaming mode. The
520 * physical address to use is returned.
521 *
522 * Once the device is given the dma address, the device owns this memory until
523 * either swiotlb_unmap_single or swiotlb_dma_sync_single is performed.
524 */
525 dma_addr_t
526 swiotlb_map_single(struct device *hwdev, void *ptr, size_t size, int dir)
527 {
528 unsigned long dev_addr = virt_to_phys(ptr);
529 void *map;
531 BUG_ON(dir == DMA_NONE);
532 /*
533 * If the pointer passed in happens to be in the device's DMA window,
534 * we can safely return the device addr and not worry about bounce
535 * buffering it.
536 */
537 if (!address_needs_mapping(hwdev, dev_addr) && !swiotlb_force)
538 return dev_addr;
540 /*
541 * Oh well, have to allocate and map a bounce buffer.
542 */
543 map = map_single(hwdev, ptr, size, dir);
544 if (!map) {
545 swiotlb_full(hwdev, size, dir, 1);
546 map = io_tlb_overflow_buffer;
547 }
549 dev_addr = virt_to_phys(map);
551 /*
552 * Ensure that the address returned is DMA'ble
553 */
554 if (address_needs_mapping(hwdev, dev_addr))
555 panic("map_single: bounce buffer is not DMA'ble");
557 return dev_addr;
558 }
560 /*
561 * Since DMA is i-cache coherent, any (complete) pages that were written via
562 * DMA can be marked as "clean" so that lazy_mmu_prot_update() doesn't have to
563 * flush them when they get mapped into an executable vm-area.
564 */
565 static void
566 mark_clean(void *addr, size_t size)
567 {
568 unsigned long pg_addr, end;
570 pg_addr = PAGE_ALIGN((unsigned long) addr);
571 end = (unsigned long) addr + size;
572 while (pg_addr + PAGE_SIZE <= end) {
573 struct page *page = virt_to_page(pg_addr);
574 set_bit(PG_arch_1, &page->flags);
575 pg_addr += PAGE_SIZE;
576 }
577 }
579 /*
580 * Unmap a single streaming mode DMA translation. The dma_addr and size must
581 * match what was provided for in a previous swiotlb_map_single call. All
582 * other usages are undefined.
583 *
584 * After this call, reads by the cpu to the buffer are guaranteed to see
585 * whatever the device wrote there.
586 */
587 void
588 swiotlb_unmap_single(struct device *hwdev, dma_addr_t dev_addr, size_t size,
589 int dir)
590 {
591 char *dma_addr = phys_to_virt(dev_addr);
593 BUG_ON(dir == DMA_NONE);
594 if (dma_addr >= io_tlb_start && dma_addr < io_tlb_end)
595 unmap_single(hwdev, dma_addr, size, dir);
596 else if (dir == DMA_FROM_DEVICE)
597 mark_clean(dma_addr, size);
598 }
600 /*
601 * Make physical memory consistent for a single streaming mode DMA translation
602 * after a transfer.
603 *
604 * If you perform a swiotlb_map_single() but wish to interrogate the buffer
605 * using the cpu, yet do not wish to teardown the dma mapping, you must
606 * call this function before doing so. At the next point you give the dma
607 * address back to the card, you must first perform a
608 * swiotlb_dma_sync_for_device, and then the device again owns the buffer
609 */
610 static inline void
611 swiotlb_sync_single(struct device *hwdev, dma_addr_t dev_addr,
612 size_t size, int dir, int target)
613 {
614 char *dma_addr = phys_to_virt(dev_addr);
616 BUG_ON(dir == DMA_NONE);
617 if (dma_addr >= io_tlb_start && dma_addr < io_tlb_end)
618 sync_single(hwdev, dma_addr, size, dir, target);
619 else if (dir == DMA_FROM_DEVICE)
620 mark_clean(dma_addr, size);
621 }
623 void
624 swiotlb_sync_single_for_cpu(struct device *hwdev, dma_addr_t dev_addr,
625 size_t size, int dir)
626 {
627 swiotlb_sync_single(hwdev, dev_addr, size, dir, SYNC_FOR_CPU);
628 }
630 void
631 swiotlb_sync_single_for_device(struct device *hwdev, dma_addr_t dev_addr,
632 size_t size, int dir)
633 {
634 swiotlb_sync_single(hwdev, dev_addr, size, dir, SYNC_FOR_DEVICE);
635 }
637 /*
638 * Same as above, but for a sub-range of the mapping.
639 */
640 static inline void
641 swiotlb_sync_single_range(struct device *hwdev, dma_addr_t dev_addr,
642 unsigned long offset, size_t size,
643 int dir, int target)
644 {
645 char *dma_addr = phys_to_virt(dev_addr) + offset;
647 BUG_ON(dir == DMA_NONE);
648 if (dma_addr >= io_tlb_start && dma_addr < io_tlb_end)
649 sync_single(hwdev, dma_addr, size, dir, target);
650 else if (dir == DMA_FROM_DEVICE)
651 mark_clean(dma_addr, size);
652 }
654 void
655 swiotlb_sync_single_range_for_cpu(struct device *hwdev, dma_addr_t dev_addr,
656 unsigned long offset, size_t size, int dir)
657 {
658 swiotlb_sync_single_range(hwdev, dev_addr, offset, size, dir,
659 SYNC_FOR_CPU);
660 }
662 void
663 swiotlb_sync_single_range_for_device(struct device *hwdev, dma_addr_t dev_addr,
664 unsigned long offset, size_t size, int dir)
665 {
666 swiotlb_sync_single_range(hwdev, dev_addr, offset, size, dir,
667 SYNC_FOR_DEVICE);
668 }
670 /*
671 * Map a set of buffers described by scatterlist in streaming mode for DMA.
672 * This is the scatter-gather version of the above swiotlb_map_single
673 * interface. Here the scatter gather list elements are each tagged with the
674 * appropriate dma address and length. They are obtained via
675 * sg_dma_{address,length}(SG).
676 *
677 * NOTE: An implementation may be able to use a smaller number of
678 * DMA address/length pairs than there are SG table elements.
679 * (for example via virtual mapping capabilities)
680 * The routine returns the number of addr/length pairs actually
681 * used, at most nents.
682 *
683 * Device ownership issues as mentioned above for swiotlb_map_single are the
684 * same here.
685 */
686 int
687 swiotlb_map_sg(struct device *hwdev, struct scatterlist *sg, int nelems,
688 int dir)
689 {
690 void *addr;
691 unsigned long dev_addr;
692 int i;
694 BUG_ON(dir == DMA_NONE);
696 for (i = 0; i < nelems; i++, sg++) {
697 addr = SG_ENT_VIRT_ADDRESS(sg);
698 dev_addr = virt_to_phys(addr);
699 if (swiotlb_force || address_needs_mapping(hwdev, dev_addr)) {
700 void *map = map_single(hwdev, addr, sg->length, dir);
701 sg->dma_address = virt_to_bus(map);
702 if (!map) {
703 /* Don't panic here, we expect map_sg users
704 to do proper error handling. */
705 swiotlb_full(hwdev, sg->length, dir, 0);
706 swiotlb_unmap_sg(hwdev, sg - i, i, dir);
707 sg[0].dma_length = 0;
708 return 0;
709 }
710 } else
711 sg->dma_address = dev_addr;
712 sg->dma_length = sg->length;
713 }
714 return nelems;
715 }
717 /*
718 * Unmap a set of streaming mode DMA translations. Again, cpu read rules
719 * concerning calls here are the same as for swiotlb_unmap_single() above.
720 */
721 void
722 swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sg, int nelems,
723 int dir)
724 {
725 int i;
727 BUG_ON(dir == DMA_NONE);
729 for (i = 0; i < nelems; i++, sg++)
730 if (sg->dma_address != SG_ENT_PHYS_ADDRESS(sg))
731 unmap_single(hwdev, (void *) phys_to_virt(sg->dma_address), sg->dma_length, dir);
732 else if (dir == DMA_FROM_DEVICE)
733 mark_clean(SG_ENT_VIRT_ADDRESS(sg), sg->dma_length);
734 }
736 /*
737 * Make physical memory consistent for a set of streaming mode DMA translations
738 * after a transfer.
739 *
740 * The same as swiotlb_sync_single_* but for a scatter-gather list, same rules
741 * and usage.
742 */
743 static inline void
744 swiotlb_sync_sg(struct device *hwdev, struct scatterlist *sg,
745 int nelems, int dir, int target)
746 {
747 int i;
749 BUG_ON(dir == DMA_NONE);
751 for (i = 0; i < nelems; i++, sg++)
752 if (sg->dma_address != SG_ENT_PHYS_ADDRESS(sg))
753 sync_single(hwdev, (void *) sg->dma_address,
754 sg->dma_length, dir, target);
755 }
757 void
758 swiotlb_sync_sg_for_cpu(struct device *hwdev, struct scatterlist *sg,
759 int nelems, int dir)
760 {
761 swiotlb_sync_sg(hwdev, sg, nelems, dir, SYNC_FOR_CPU);
762 }
764 void
765 swiotlb_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg,
766 int nelems, int dir)
767 {
768 swiotlb_sync_sg(hwdev, sg, nelems, dir, SYNC_FOR_DEVICE);
769 }
771 int
772 swiotlb_dma_mapping_error(dma_addr_t dma_addr)
773 {
774 return (dma_addr == virt_to_phys(io_tlb_overflow_buffer));
775 }
777 /*
778 * Return whether the given device DMA address mask can be supported
779 * properly. For example, if your device can only drive the low 24-bits
780 * during bus mastering, then you would pass 0x00ffffff as the mask to
781 * this function.
782 */
783 int
784 swiotlb_dma_supported (struct device *hwdev, u64 mask)
785 {
786 return (virt_to_phys (io_tlb_end) - 1) <= mask;
787 }
789 EXPORT_SYMBOL(swiotlb_init);
790 EXPORT_SYMBOL(swiotlb_map_single);
791 EXPORT_SYMBOL(swiotlb_unmap_single);
792 EXPORT_SYMBOL(swiotlb_map_sg);
793 EXPORT_SYMBOL(swiotlb_unmap_sg);
794 EXPORT_SYMBOL(swiotlb_sync_single_for_cpu);
795 EXPORT_SYMBOL(swiotlb_sync_single_for_device);
796 EXPORT_SYMBOL_GPL(swiotlb_sync_single_range_for_cpu);
797 EXPORT_SYMBOL_GPL(swiotlb_sync_single_range_for_device);
798 EXPORT_SYMBOL(swiotlb_sync_sg_for_cpu);
799 EXPORT_SYMBOL(swiotlb_sync_sg_for_device);
800 EXPORT_SYMBOL(swiotlb_dma_mapping_error);
801 EXPORT_SYMBOL(swiotlb_alloc_coherent);
802 EXPORT_SYMBOL(swiotlb_free_coherent);
803 EXPORT_SYMBOL(swiotlb_dma_supported);