ia64/linux-2.6.18-xen.hg

view arch/ia64/xen/swiotlb.c @ 897:329ea0ccb344

balloon: try harder to balloon up under memory pressure.

Currently if the balloon driver is unable to increase the guest's
reservation it assumes the failure was due to reaching its full
allocation, gives up on the ballooning operation and records the limit
it reached as the "hard limit". The driver will not try again until
the target is set again (even to the same value).

However it is possible that ballooning has in fact failed due to
memory pressure in the host and therefore it is desirable to keep
attempting to reach the target in case memory becomes available. The
most likely scenario is that some guests are ballooning down while
others are ballooning up and therefore there is temporary memory
pressure while things stabilise. You would not expect a well behaved
toolstack to ask a domain to balloon to more than its allocation nor
would you expect it to deliberately over-commit memory by setting
balloon targets which exceed the total host memory.

This patch drops the concept of a hard limit and causes the balloon
driver to retry increasing the reservation on a timer in the same
manner as when decreasing the reservation.

Also if we partially succeed in increasing the reservation
(i.e. receive less pages than we asked for) then we may as well keep
those pages rather than returning them to Xen.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Fri Jun 05 14:01:20 2009 +0100 (2009-06-05)
parents 6fcc6c1e87f6
children
line source
1 /*
2 * Dynamic DMA mapping support.
3 *
4 * This implementation is for IA-64 and EM64T platforms that do not support
5 * I/O TLBs (aka DMA address translation hardware).
6 * Copyright (C) 2000 Asit Mallick <Asit.K.Mallick@intel.com>
7 * Copyright (C) 2000 Goutham Rao <goutham.rao@intel.com>
8 * Copyright (C) 2000, 2003 Hewlett-Packard Co
9 * David Mosberger-Tang <davidm@hpl.hp.com>
10 *
11 * 03/05/07 davidm Switch from PCI-DMA to generic device DMA API.
12 * 00/12/13 davidm Rename to swiotlb.c and add mark_clean() to avoid
13 * unnecessary i-cache flushing.
14 * 04/07/.. ak Better overflow handling. Assorted fixes.
15 * 05/09/10 linville Add support for syncing ranges, support syncing for
16 * DMA_BIDIRECTIONAL mappings, miscellaneous cleanup.
17 */
19 #include <linux/cache.h>
20 #include <linux/dma-mapping.h>
21 #include <linux/mm.h>
22 #include <linux/module.h>
23 #include <linux/spinlock.h>
24 #include <linux/string.h>
25 #include <linux/types.h>
26 #include <linux/ctype.h>
28 #include <asm/io.h>
29 #include <asm/dma.h>
30 #include <asm/scatterlist.h>
32 #include <linux/init.h>
33 #include <linux/bootmem.h>
35 #ifdef CONFIG_XEN
36 #include <xen/gnttab.h>
37 #include <asm/gnttab_dma.h>
38 /*
39 * What DMA mask should Xen use to remap the bounce buffer pool? Most
40 * reports seem to indicate 30 bits is sufficient, except maybe for old
41 * sound cards that we probably don't care about anyway. If we need to,
42 * we could put in some smarts to try to lower, but hopefully it's not
43 * necessary.
44 */
45 #define DMA_BITS (30)
46 #endif
48 #define OFFSET(val,align) ((unsigned long) \
49 ( (val) & ( (align) - 1)))
51 #define SG_ENT_VIRT_ADDRESS(sg) (page_address((sg)->page) + (sg)->offset)
52 #define SG_ENT_PHYS_ADDRESS(SG) virt_to_bus(SG_ENT_VIRT_ADDRESS(SG))
54 /*
55 * Maximum allowable number of contiguous slabs to map,
56 * must be a power of 2. What is the appropriate value ?
57 * The complexity of {map,unmap}_single is linearly dependent on this value.
58 */
59 #define IO_TLB_SEGSIZE 128
61 /*
62 * log of the size of each IO TLB slab. The number of slabs is command line
63 * controllable.
64 */
65 #define IO_TLB_SHIFT 11
67 #define SLABS_PER_PAGE (1 << (PAGE_SHIFT - IO_TLB_SHIFT))
69 /*
70 * Minimum IO TLB size to bother booting with. Systems with mainly
71 * 64bit capable cards will only lightly use the swiotlb. If we can't
72 * allocate a contiguous 1MB, we're probably in trouble anyway.
73 */
74 #define IO_TLB_MIN_SLABS ((1<<20) >> IO_TLB_SHIFT)
76 /*
77 * Enumeration for sync targets
78 */
79 enum dma_sync_target {
80 SYNC_FOR_CPU = 0,
81 SYNC_FOR_DEVICE = 1,
82 };
84 int swiotlb_force;
86 /*
87 * Used to do a quick range check in swiotlb_unmap_single and
88 * swiotlb_sync_single_*, to see if the memory was in fact allocated by this
89 * API.
90 */
91 static char *io_tlb_start, *io_tlb_end;
93 /*
94 * The number of IO TLB blocks (in groups of 64) betweeen io_tlb_start and
95 * io_tlb_end. This is command line adjustable via setup_io_tlb_npages.
96 */
97 static unsigned long io_tlb_nslabs;
99 /*
100 * When the IOMMU overflows we return a fallback buffer. This sets the size.
101 */
102 static unsigned long io_tlb_overflow = 32*1024;
104 void *io_tlb_overflow_buffer;
106 /*
107 * This is a free list describing the number of free entries available from
108 * each index
109 */
110 static unsigned int *io_tlb_list;
111 static unsigned int io_tlb_index;
113 /*
114 * We need to save away the original address corresponding to a mapped entry
115 * for the sync operations.
116 */
117 static unsigned char **io_tlb_orig_addr;
119 /*
120 * Protect the above data structures in the map and unmap calls
121 */
122 static DEFINE_SPINLOCK(io_tlb_lock);
124 static int __init
125 setup_io_tlb_npages(char *str)
126 {
127 if (isdigit(*str)) {
128 io_tlb_nslabs = simple_strtoul(str, &str, 0);
129 /* avoid tail segment of size < IO_TLB_SEGSIZE */
130 io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE);
131 }
132 if (*str == ',')
133 ++str;
134 if (!strcmp(str, "force"))
135 swiotlb_force = 1;
136 return 1;
137 }
138 __setup("swiotlb=", setup_io_tlb_npages);
139 /* make io_tlb_overflow tunable too? */
141 /*
142 * Statically reserve bounce buffer space and initialize bounce buffer data
143 * structures for the software IO TLB used to implement the DMA API.
144 */
145 void
146 swiotlb_init_with_default_size (size_t default_size)
147 {
148 unsigned long i;
150 if (!io_tlb_nslabs) {
151 io_tlb_nslabs = (default_size >> IO_TLB_SHIFT);
152 io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE);
153 }
155 /*
156 * Get IO TLB memory from the low pages
157 */
158 io_tlb_start = alloc_bootmem_low_pages(io_tlb_nslabs * (1 << IO_TLB_SHIFT));
159 if (!io_tlb_start)
160 panic("Cannot allocate SWIOTLB buffer");
161 io_tlb_end = io_tlb_start + io_tlb_nslabs * (1 << IO_TLB_SHIFT);
163 #ifdef CONFIG_XEN
164 for (i = 0 ; i < io_tlb_nslabs ; i += IO_TLB_SEGSIZE) {
165 if (xen_create_contiguous_region(
166 (unsigned long)io_tlb_start +
167 (i << IO_TLB_SHIFT),
168 get_order(IO_TLB_SEGSIZE << IO_TLB_SHIFT),
169 DMA_BITS))
170 panic("Failed to setup Xen contiguous region");
171 }
172 #endif
174 /*
175 * Allocate and initialize the free list array. This array is used
176 * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE
177 * between io_tlb_start and io_tlb_end.
178 */
179 io_tlb_list = alloc_bootmem(io_tlb_nslabs * sizeof(int));
180 for (i = 0; i < io_tlb_nslabs; i++)
181 io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE);
182 io_tlb_index = 0;
183 io_tlb_orig_addr = alloc_bootmem(io_tlb_nslabs * sizeof(char *));
185 /*
186 * Get the overflow emergency buffer
187 */
188 io_tlb_overflow_buffer = alloc_bootmem_low(io_tlb_overflow);
189 #ifdef CONFIG_XEN
190 if (xen_create_contiguous_region((unsigned long)io_tlb_overflow_buffer,
191 get_order(io_tlb_overflow), DMA_BITS))
192 panic("Failed to setup Xen contiguous region for overflow");
193 #endif
194 printk(KERN_INFO "Placing software IO TLB between 0x%lx - 0x%lx\n",
195 virt_to_phys(io_tlb_start), virt_to_phys(io_tlb_end));
196 }
198 void
199 swiotlb_init (void)
200 {
201 swiotlb_init_with_default_size(64 * (1<<20)); /* default to 64MB */
202 }
204 /*
205 * Systems with larger DMA zones (those that don't support ISA) can
206 * initialize the swiotlb later using the slab allocator if needed.
207 * This should be just like above, but with some error catching.
208 */
209 int
210 swiotlb_late_init_with_default_size (size_t default_size)
211 {
212 unsigned long i, req_nslabs = io_tlb_nslabs;
213 unsigned int order;
215 if (!io_tlb_nslabs) {
216 io_tlb_nslabs = (default_size >> IO_TLB_SHIFT);
217 io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE);
218 }
220 /*
221 * Get IO TLB memory from the low pages
222 */
223 order = get_order(io_tlb_nslabs * (1 << IO_TLB_SHIFT));
224 io_tlb_nslabs = SLABS_PER_PAGE << order;
226 while ((SLABS_PER_PAGE << order) > IO_TLB_MIN_SLABS) {
227 io_tlb_start = (char *)__get_free_pages(GFP_DMA | __GFP_NOWARN,
228 order);
229 if (io_tlb_start)
230 break;
231 order--;
232 }
234 if (!io_tlb_start)
235 goto cleanup1;
237 if (order != get_order(io_tlb_nslabs * (1 << IO_TLB_SHIFT))) {
238 printk(KERN_WARNING "Warning: only able to allocate %ld MB "
239 "for software IO TLB\n", (PAGE_SIZE << order) >> 20);
240 io_tlb_nslabs = SLABS_PER_PAGE << order;
241 }
242 io_tlb_end = io_tlb_start + io_tlb_nslabs * (1 << IO_TLB_SHIFT);
243 memset(io_tlb_start, 0, io_tlb_nslabs * (1 << IO_TLB_SHIFT));
245 #ifdef CONFIG_XEN
246 for (i = 0 ; i < io_tlb_nslabs ; i += IO_TLB_SEGSIZE) {
247 if (xen_create_contiguous_region(
248 (unsigned long)io_tlb_start +
249 (i << IO_TLB_SHIFT),
250 get_order(IO_TLB_SEGSIZE << IO_TLB_SHIFT),
251 DMA_BITS))
252 panic("Failed to setup Xen contiguous region");
253 }
254 #endif
255 /*
256 * Allocate and initialize the free list array. This array is used
257 * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE
258 * between io_tlb_start and io_tlb_end.
259 */
260 io_tlb_list = (unsigned int *)__get_free_pages(GFP_KERNEL,
261 get_order(io_tlb_nslabs * sizeof(int)));
262 if (!io_tlb_list)
263 goto cleanup2;
265 for (i = 0; i < io_tlb_nslabs; i++)
266 io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE);
267 io_tlb_index = 0;
269 io_tlb_orig_addr = (unsigned char **)__get_free_pages(GFP_KERNEL,
270 get_order(io_tlb_nslabs * sizeof(char *)));
271 if (!io_tlb_orig_addr)
272 goto cleanup3;
274 memset(io_tlb_orig_addr, 0, io_tlb_nslabs * sizeof(char *));
276 /*
277 * Get the overflow emergency buffer
278 */
279 io_tlb_overflow_buffer = (void *)__get_free_pages(GFP_DMA,
280 get_order(io_tlb_overflow));
281 if (!io_tlb_overflow_buffer)
282 goto cleanup4;
284 #ifdef CONFIG_XEN
285 if (xen_create_contiguous_region((unsigned long)io_tlb_overflow_buffer,
286 get_order(io_tlb_overflow), DMA_BITS))
287 panic("Failed to setup Xen contiguous region for overflow");
288 #endif
289 printk(KERN_INFO "Placing %ldMB software IO TLB between 0x%lx - "
290 "0x%lx\n", (io_tlb_nslabs * (1 << IO_TLB_SHIFT)) >> 20,
291 virt_to_phys(io_tlb_start), virt_to_phys(io_tlb_end));
293 return 0;
295 cleanup4:
296 free_pages((unsigned long)io_tlb_orig_addr, get_order(io_tlb_nslabs *
297 sizeof(char *)));
298 io_tlb_orig_addr = NULL;
299 cleanup3:
300 free_pages((unsigned long)io_tlb_list, get_order(io_tlb_nslabs *
301 sizeof(int)));
302 io_tlb_list = NULL;
303 io_tlb_end = NULL;
304 cleanup2:
305 free_pages((unsigned long)io_tlb_start, order);
306 io_tlb_start = NULL;
307 cleanup1:
308 io_tlb_nslabs = req_nslabs;
309 return -ENOMEM;
310 }
312 static inline int
313 address_needs_mapping(struct device *hwdev, dma_addr_t addr)
314 {
315 dma_addr_t mask = 0xffffffff;
316 /* If the device has a mask, use it, otherwise default to 32 bits */
317 if (hwdev && hwdev->dma_mask)
318 mask = *hwdev->dma_mask;
319 return (addr & ~mask) != 0;
320 }
322 /*
323 * Allocates bounce buffer and returns its kernel virtual address.
324 */
325 static void *
326 map_single(struct device *hwdev, char *buffer, size_t size, int dir)
327 {
328 unsigned long flags;
329 char *dma_addr;
330 unsigned int nslots, stride, index, wrap;
331 char *slot_buf;
332 int i;
334 /*
335 * For mappings greater than a page, we limit the stride (and
336 * hence alignment) to a page size.
337 */
338 nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
339 if (size > PAGE_SIZE)
340 stride = (1 << (PAGE_SHIFT - IO_TLB_SHIFT));
341 else
342 stride = 1;
344 BUG_ON(!nslots);
346 /*
347 * Find suitable number of IO TLB entries size that will fit this
348 * request and allocate a buffer from that IO TLB pool.
349 */
350 spin_lock_irqsave(&io_tlb_lock, flags);
351 {
352 wrap = index = ALIGN(io_tlb_index, stride);
354 if (index >= io_tlb_nslabs)
355 wrap = index = 0;
357 do {
358 /*
359 * If we find a slot that indicates we have 'nslots'
360 * number of contiguous buffers, we allocate the
361 * buffers from that slot and mark the entries as '0'
362 * indicating unavailable.
363 */
364 if (io_tlb_list[index] >= nslots) {
365 int count = 0;
367 for (i = index; i < (int) (index + nslots); i++)
368 io_tlb_list[i] = 0;
369 for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE -1) && io_tlb_list[i]; i--)
370 io_tlb_list[i] = ++count;
371 dma_addr = io_tlb_start + (index << IO_TLB_SHIFT);
373 /*
374 * Update the indices to avoid searching in
375 * the next round.
376 */
377 io_tlb_index = ((index + nslots) < io_tlb_nslabs
378 ? (index + nslots) : 0);
380 goto found;
381 }
382 index += stride;
383 if (index >= io_tlb_nslabs)
384 index = 0;
385 } while (index != wrap);
387 spin_unlock_irqrestore(&io_tlb_lock, flags);
388 return NULL;
389 }
390 found:
391 spin_unlock_irqrestore(&io_tlb_lock, flags);
393 /*
394 * Save away the mapping from the original address to the DMA address.
395 * This is needed when we sync the memory. Then we sync the buffer if
396 * needed.
397 */
398 slot_buf = buffer;
399 for (i = 0; i < nslots; i++) {
400 io_tlb_orig_addr[index + i] = slot_buf;
401 slot_buf += 1 << IO_TLB_SHIFT;
402 }
403 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)
404 memcpy(dma_addr, buffer, size);
406 return dma_addr;
407 }
409 /*
410 * dma_addr is the kernel virtual address of the bounce buffer to unmap.
411 */
412 static void
413 unmap_single(struct device *hwdev, char *dma_addr, size_t size, int dir)
414 {
415 unsigned long flags;
416 int i, count, nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
417 int index = (dma_addr - io_tlb_start) >> IO_TLB_SHIFT;
418 char *buffer = io_tlb_orig_addr[index];
420 /*
421 * First, sync the memory before unmapping the entry
422 */
423 if (buffer && ((dir == DMA_FROM_DEVICE) || (dir == DMA_BIDIRECTIONAL)))
424 /*
425 * bounce... copy the data back into the original buffer * and
426 * delete the bounce buffer.
427 */
428 memcpy(buffer, dma_addr, size);
430 /*
431 * Return the buffer to the free list by setting the corresponding
432 * entries to indicate the number of contigous entries available.
433 * While returning the entries to the free list, we merge the entries
434 * with slots below and above the pool being returned.
435 */
436 spin_lock_irqsave(&io_tlb_lock, flags);
437 {
438 count = ((index + nslots) < ALIGN(index + 1, IO_TLB_SEGSIZE) ?
439 io_tlb_list[index + nslots] : 0);
440 /*
441 * Step 1: return the slots to the free list, merging the
442 * slots with superceeding slots
443 */
444 for (i = index + nslots - 1; i >= index; i--)
445 io_tlb_list[i] = ++count;
446 /*
447 * Step 2: merge the returned slots with the preceding slots,
448 * if available (non zero)
449 */
450 for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE -1) && io_tlb_list[i]; i--)
451 io_tlb_list[i] = ++count;
452 }
453 spin_unlock_irqrestore(&io_tlb_lock, flags);
454 }
456 static void
457 sync_single(struct device *hwdev, char *dma_addr, size_t size,
458 int dir, int target)
459 {
460 int index = (dma_addr - io_tlb_start) >> IO_TLB_SHIFT;
461 char *buffer = io_tlb_orig_addr[index];
463 switch (target) {
464 case SYNC_FOR_CPU:
465 if (likely(dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL))
466 memcpy(buffer, dma_addr, size);
467 else
468 BUG_ON(dir != DMA_TO_DEVICE);
469 break;
470 case SYNC_FOR_DEVICE:
471 if (likely(dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL))
472 memcpy(dma_addr, buffer, size);
473 else
474 BUG_ON(dir != DMA_FROM_DEVICE);
475 break;
476 default:
477 BUG();
478 }
479 }
481 void *
482 swiotlb_alloc_coherent(struct device *hwdev, size_t size,
483 dma_addr_t *dma_handle, gfp_t flags)
484 {
485 unsigned long dev_addr;
486 void *ret;
487 int order = get_order(size);
489 /*
490 * XXX fix me: the DMA API should pass us an explicit DMA mask
491 * instead, or use ZONE_DMA32 (ia64 overloads ZONE_DMA to be a ~32
492 * bit range instead of a 16MB one).
493 */
494 flags |= GFP_DMA;
496 ret = (void *)__get_free_pages(flags, order);
497 #ifdef CONFIG_XEN
498 if (ret && is_running_on_xen()) {
499 if (xen_create_contiguous_region((unsigned long)ret, order,
500 fls64(hwdev->coherent_dma_mask))) {
501 free_pages((unsigned long)ret, order);
502 ret = NULL;
503 } else {
504 /*
505 * Short circuit the rest, xen_create_contiguous_region
506 * should fail if it didn't give us an address within
507 * the mask requested.
508 */
509 memset(ret, 0, size);
510 *dma_handle = virt_to_bus(ret);
511 return ret;
512 }
513 }
514 #endif
515 if (ret && address_needs_mapping(hwdev, virt_to_bus(ret))) {
516 /*
517 * The allocated memory isn't reachable by the device.
518 * Fall back on swiotlb_map_single().
519 */
520 free_pages((unsigned long) ret, order);
521 ret = NULL;
522 }
523 if (!ret) {
524 /*
525 * We are either out of memory or the device can't DMA
526 * to GFP_DMA memory; fall back on
527 * swiotlb_map_single(), which will grab memory from
528 * the lowest available address range.
529 */
530 dma_addr_t handle;
531 handle = swiotlb_map_single(NULL, NULL, size, DMA_FROM_DEVICE);
532 if (swiotlb_dma_mapping_error(handle))
533 return NULL;
535 ret = bus_to_virt(handle);
536 }
538 memset(ret, 0, size);
539 dev_addr = virt_to_bus(ret);
541 /* Confirm address can be DMA'd by device */
542 if (address_needs_mapping(hwdev, dev_addr)) {
543 printk("hwdev DMA mask = 0x%016Lx, dev_addr = 0x%016lx\n",
544 (unsigned long long)*hwdev->dma_mask, dev_addr);
545 panic("swiotlb_alloc_coherent: allocated memory is out of "
546 "range for device");
547 }
548 *dma_handle = dev_addr;
549 return ret;
550 }
552 void
553 swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr,
554 dma_addr_t dma_handle)
555 {
556 if (!(vaddr >= (void *)io_tlb_start
557 && vaddr < (void *)io_tlb_end)) {
558 #ifdef CONFIG_XEN
559 xen_destroy_contiguous_region((unsigned long)vaddr,
560 get_order(size));
561 #endif
562 free_pages((unsigned long) vaddr, get_order(size));
563 } else
564 /* DMA_TO_DEVICE to avoid memcpy in unmap_single */
565 swiotlb_unmap_single (hwdev, dma_handle, size, DMA_TO_DEVICE);
566 }
568 static void
569 swiotlb_full(struct device *dev, size_t size, int dir, int do_panic)
570 {
571 /*
572 * Ran out of IOMMU space for this operation. This is very bad.
573 * Unfortunately the drivers cannot handle this operation properly.
574 * unless they check for dma_mapping_error (most don't)
575 * When the mapping is small enough return a static buffer to limit
576 * the damage, or panic when the transfer is too big.
577 */
578 printk(KERN_ERR "DMA: Out of SW-IOMMU space for %lu bytes at "
579 "device %s\n", size, dev ? dev->bus_id : "?");
581 if (size > io_tlb_overflow && do_panic) {
582 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
583 panic("DMA: Memory would be corrupted\n");
584 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)
585 panic("DMA: Random memory would be DMAed\n");
586 }
587 }
589 /*
590 * Map a single buffer of the indicated size for DMA in streaming mode. The
591 * physical address to use is returned.
592 *
593 * Once the device is given the dma address, the device owns this memory until
594 * either swiotlb_unmap_single or swiotlb_dma_sync_single is performed.
595 */
596 dma_addr_t
597 swiotlb_map_single(struct device *hwdev, void *ptr, size_t size, int dir)
598 {
599 unsigned long dev_addr = gnttab_dma_map_virt(ptr);
600 void *map;
602 BUG_ON(dir == DMA_NONE);
603 /*
604 * If the pointer passed in happens to be in the device's DMA window,
605 * we can safely return the device addr and not worry about bounce
606 * buffering it.
607 */
608 if (!range_straddles_page_boundary(__pa(ptr), size) &&
609 !address_needs_mapping(hwdev, dev_addr) && !swiotlb_force)
610 return dev_addr;
612 __gnttab_dma_unmap_page(virt_to_page(ptr));
613 /*
614 * Oh well, have to allocate and map a bounce buffer.
615 */
616 map = map_single(hwdev, ptr, size, dir);
617 if (!map) {
618 swiotlb_full(hwdev, size, dir, 1);
619 map = io_tlb_overflow_buffer;
620 }
622 dev_addr = virt_to_bus(map);
624 /*
625 * Ensure that the address returned is DMA'ble
626 */
627 if (address_needs_mapping(hwdev, dev_addr))
628 panic("map_single: bounce buffer is not DMA'ble");
630 return dev_addr;
631 }
633 /*
634 * Since DMA is i-cache coherent, any (complete) pages that were written via
635 * DMA can be marked as "clean" so that lazy_mmu_prot_update() doesn't have to
636 * flush them when they get mapped into an executable vm-area.
637 */
638 static void
639 mark_clean(void *addr, size_t size)
640 {
641 unsigned long pg_addr, end;
643 #ifdef CONFIG_XEN
644 /* XXX: Bad things happen when starting domUs if this is enabled. */
645 if (is_running_on_xen())
646 return;
647 #endif
649 pg_addr = PAGE_ALIGN((unsigned long) addr);
650 end = (unsigned long) addr + size;
651 while (pg_addr + PAGE_SIZE <= end) {
652 struct page *page = virt_to_page(pg_addr);
653 set_bit(PG_arch_1, &page->flags);
654 pg_addr += PAGE_SIZE;
655 }
656 }
658 /*
659 * Unmap a single streaming mode DMA translation. The dma_addr and size must
660 * match what was provided for in a previous swiotlb_map_single call. All
661 * other usages are undefined.
662 *
663 * After this call, reads by the cpu to the buffer are guaranteed to see
664 * whatever the device wrote there.
665 */
666 void
667 swiotlb_unmap_single(struct device *hwdev, dma_addr_t dev_addr, size_t size,
668 int dir)
669 {
670 char *dma_addr = bus_to_virt(dev_addr);
672 BUG_ON(dir == DMA_NONE);
673 if (dma_addr >= io_tlb_start && dma_addr < io_tlb_end)
674 unmap_single(hwdev, dma_addr, size, dir);
675 else {
676 __gnttab_dma_unmap_page(virt_to_page(dma_addr));
677 if (dir == DMA_FROM_DEVICE)
678 mark_clean(dma_addr, size);
679 }
680 }
682 /*
683 * Make physical memory consistent for a single streaming mode DMA translation
684 * after a transfer.
685 *
686 * If you perform a swiotlb_map_single() but wish to interrogate the buffer
687 * using the cpu, yet do not wish to teardown the dma mapping, you must
688 * call this function before doing so. At the next point you give the dma
689 * address back to the card, you must first perform a
690 * swiotlb_dma_sync_for_device, and then the device again owns the buffer
691 */
692 static inline void
693 swiotlb_sync_single(struct device *hwdev, dma_addr_t dev_addr,
694 size_t size, int dir, int target)
695 {
696 char *dma_addr = bus_to_virt(dev_addr);
698 BUG_ON(dir == DMA_NONE);
699 if (dma_addr >= io_tlb_start && dma_addr < io_tlb_end)
700 sync_single(hwdev, dma_addr, size, dir, target);
701 else if (dir == DMA_FROM_DEVICE)
702 mark_clean(dma_addr, size);
703 }
705 void
706 swiotlb_sync_single_for_cpu(struct device *hwdev, dma_addr_t dev_addr,
707 size_t size, int dir)
708 {
709 swiotlb_sync_single(hwdev, dev_addr, size, dir, SYNC_FOR_CPU);
710 }
712 void
713 swiotlb_sync_single_for_device(struct device *hwdev, dma_addr_t dev_addr,
714 size_t size, int dir)
715 {
716 swiotlb_sync_single(hwdev, dev_addr, size, dir, SYNC_FOR_DEVICE);
717 }
719 /*
720 * Same as above, but for a sub-range of the mapping.
721 */
722 static inline void
723 swiotlb_sync_single_range(struct device *hwdev, dma_addr_t dev_addr,
724 unsigned long offset, size_t size,
725 int dir, int target)
726 {
727 char *dma_addr = bus_to_virt(dev_addr) + offset;
729 BUG_ON(dir == DMA_NONE);
730 if (dma_addr >= io_tlb_start && dma_addr < io_tlb_end)
731 sync_single(hwdev, dma_addr, size, dir, target);
732 else if (dir == DMA_FROM_DEVICE)
733 mark_clean(dma_addr, size);
734 }
736 void
737 swiotlb_sync_single_range_for_cpu(struct device *hwdev, dma_addr_t dev_addr,
738 unsigned long offset, size_t size, int dir)
739 {
740 swiotlb_sync_single_range(hwdev, dev_addr, offset, size, dir,
741 SYNC_FOR_CPU);
742 }
744 void
745 swiotlb_sync_single_range_for_device(struct device *hwdev, dma_addr_t dev_addr,
746 unsigned long offset, size_t size, int dir)
747 {
748 swiotlb_sync_single_range(hwdev, dev_addr, offset, size, dir,
749 SYNC_FOR_DEVICE);
750 }
752 /*
753 * Map a set of buffers described by scatterlist in streaming mode for DMA.
754 * This is the scatter-gather version of the above swiotlb_map_single
755 * interface. Here the scatter gather list elements are each tagged with the
756 * appropriate dma address and length. They are obtained via
757 * sg_dma_{address,length}(SG).
758 *
759 * NOTE: An implementation may be able to use a smaller number of
760 * DMA address/length pairs than there are SG table elements.
761 * (for example via virtual mapping capabilities)
762 * The routine returns the number of addr/length pairs actually
763 * used, at most nents.
764 *
765 * Device ownership issues as mentioned above for swiotlb_map_single are the
766 * same here.
767 */
768 int
769 swiotlb_map_sg(struct device *hwdev, struct scatterlist *sg, int nelems,
770 int dir)
771 {
772 void *addr;
773 unsigned long dev_addr;
774 int i;
776 BUG_ON(dir == DMA_NONE);
778 for (i = 0; i < nelems; i++, sg++) {
779 addr = SG_ENT_VIRT_ADDRESS(sg);
780 dev_addr = gnttab_dma_map_virt(addr);
781 if (swiotlb_force ||
782 range_straddles_page_boundary(page_to_pseudophys(sg->page)
783 + sg->offset, sg->length) ||
784 address_needs_mapping(hwdev, dev_addr)) {
785 void *map;
786 __gnttab_dma_unmap_page(sg->page);
787 map = map_single(hwdev, addr, sg->length, dir);
788 sg->dma_address = virt_to_bus(map);
789 if (!map) {
790 /* Don't panic here, we expect map_sg users
791 to do proper error handling. */
792 swiotlb_full(hwdev, sg->length, dir, 0);
793 swiotlb_unmap_sg(hwdev, sg - i, i, dir);
794 sg[0].dma_length = 0;
795 return 0;
796 }
797 } else
798 sg->dma_address = dev_addr;
799 sg->dma_length = sg->length;
800 }
801 return nelems;
802 }
804 /*
805 * Unmap a set of streaming mode DMA translations. Again, cpu read rules
806 * concerning calls here are the same as for swiotlb_unmap_single() above.
807 */
808 void
809 swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sg, int nelems,
810 int dir)
811 {
812 int i;
814 BUG_ON(dir == DMA_NONE);
816 for (i = 0; i < nelems; i++, sg++)
817 if (sg->dma_address != SG_ENT_PHYS_ADDRESS(sg))
818 unmap_single(hwdev, (void *) bus_to_virt(sg->dma_address), sg->dma_length, dir);
819 else {
820 __gnttab_dma_unmap_page(sg->page);
821 if (dir == DMA_FROM_DEVICE)
822 mark_clean(SG_ENT_VIRT_ADDRESS(sg),
823 sg->dma_length);
824 }
825 }
827 /*
828 * Make physical memory consistent for a set of streaming mode DMA translations
829 * after a transfer.
830 *
831 * The same as swiotlb_sync_single_* but for a scatter-gather list, same rules
832 * and usage.
833 */
834 static inline void
835 swiotlb_sync_sg(struct device *hwdev, struct scatterlist *sg,
836 int nelems, int dir, int target)
837 {
838 int i;
840 BUG_ON(dir == DMA_NONE);
842 for (i = 0; i < nelems; i++, sg++)
843 if (sg->dma_address != SG_ENT_PHYS_ADDRESS(sg))
844 sync_single(hwdev, (void *) sg->dma_address,
845 sg->dma_length, dir, target);
846 }
848 void
849 swiotlb_sync_sg_for_cpu(struct device *hwdev, struct scatterlist *sg,
850 int nelems, int dir)
851 {
852 swiotlb_sync_sg(hwdev, sg, nelems, dir, SYNC_FOR_CPU);
853 }
855 void
856 swiotlb_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg,
857 int nelems, int dir)
858 {
859 swiotlb_sync_sg(hwdev, sg, nelems, dir, SYNC_FOR_DEVICE);
860 }
862 int
863 swiotlb_dma_mapping_error(dma_addr_t dma_addr)
864 {
865 return (dma_addr == virt_to_bus(io_tlb_overflow_buffer));
866 }
868 /*
869 * Return whether the given device DMA address mask can be supported
870 * properly. For example, if your device can only drive the low 24-bits
871 * during bus mastering, then you would pass 0x00ffffff as the mask to
872 * this function.
873 */
874 int
875 swiotlb_dma_supported (struct device *hwdev, u64 mask)
876 {
877 #ifdef CONFIG_XEN
878 return (virt_to_bus(io_tlb_end - 1)) <= mask;
879 #else
880 return (virt_to_bus(io_tlb_end) - 1) <= mask;
881 #endif
882 }
884 EXPORT_SYMBOL(swiotlb_init);
885 EXPORT_SYMBOL(swiotlb_map_single);
886 EXPORT_SYMBOL(swiotlb_unmap_single);
887 EXPORT_SYMBOL(swiotlb_map_sg);
888 EXPORT_SYMBOL(swiotlb_unmap_sg);
889 EXPORT_SYMBOL(swiotlb_sync_single_for_cpu);
890 EXPORT_SYMBOL(swiotlb_sync_single_for_device);
891 EXPORT_SYMBOL_GPL(swiotlb_sync_single_range_for_cpu);
892 EXPORT_SYMBOL_GPL(swiotlb_sync_single_range_for_device);
893 EXPORT_SYMBOL(swiotlb_sync_sg_for_cpu);
894 EXPORT_SYMBOL(swiotlb_sync_sg_for_device);
895 EXPORT_SYMBOL(swiotlb_dma_mapping_error);
896 EXPORT_SYMBOL(swiotlb_alloc_coherent);
897 EXPORT_SYMBOL(swiotlb_free_coherent);
898 EXPORT_SYMBOL(swiotlb_dma_supported);