ia64/xen-unstable

view linux-2.6-xen-sparse/arch/xen/i386/kernel/swiotlb.c @ 6538:84ee014ebd41

Merge xen-vtx-unstable.hg
author adsharma@los-vmm.sc.intel.com
date Wed Aug 17 12:34:38 2005 -0800 (2005-08-17)
parents 23979fb12c49 3d187585c141
children 99914b54f7bf
line source
1 /*
2 * Dynamic DMA mapping support.
3 *
4 * This implementation is a fallback for platforms that do not support
5 * I/O TLBs (aka DMA address translation hardware).
6 * Copyright (C) 2000 Asit Mallick <Asit.K.Mallick@intel.com>
7 * Copyright (C) 2000 Goutham Rao <goutham.rao@intel.com>
8 * Copyright (C) 2000, 2003 Hewlett-Packard Co
9 * David Mosberger-Tang <davidm@hpl.hp.com>
10 * Copyright (C) 2005 Keir Fraser <keir@xensource.com>
11 */
13 #include <linux/cache.h>
14 #include <linux/mm.h>
15 #include <linux/module.h>
16 #include <linux/pci.h>
17 #include <linux/spinlock.h>
18 #include <linux/string.h>
19 #include <linux/types.h>
20 #include <linux/ctype.h>
21 #include <linux/init.h>
22 #include <linux/bootmem.h>
23 #include <linux/highmem.h>
24 #include <asm/io.h>
25 #include <asm/pci.h>
26 #include <asm/dma.h>
28 #define OFFSET(val,align) ((unsigned long)((val) & ( (align) - 1)))
30 #define SG_ENT_PHYS_ADDRESS(sg) (page_to_phys((sg)->page) + (sg)->offset)
32 /*
33 * Maximum allowable number of contiguous slabs to map,
34 * must be a power of 2. What is the appropriate value ?
35 * The complexity of {map,unmap}_single is linearly dependent on this value.
36 */
37 #define IO_TLB_SEGSIZE 128
39 /*
40 * log of the size of each IO TLB slab. The number of slabs is command line
41 * controllable.
42 */
43 #define IO_TLB_SHIFT 11
45 int swiotlb_force;
47 /*
48 * Used to do a quick range check in swiotlb_unmap_single and
49 * swiotlb_sync_single_*, to see if the memory was in fact allocated by this
50 * API.
51 */
52 static char *io_tlb_start, *io_tlb_end;
54 /*
55 * The number of IO TLB blocks (in groups of 64) betweeen io_tlb_start and
56 * io_tlb_end. This is command line adjustable via setup_io_tlb_npages.
57 */
58 static unsigned long io_tlb_nslabs;
60 /*
61 * When the IOMMU overflows we return a fallback buffer. This sets the size.
62 */
63 static unsigned long io_tlb_overflow = 32*1024;
65 void *io_tlb_overflow_buffer;
67 /*
68 * This is a free list describing the number of free entries available from
69 * each index
70 */
71 static unsigned int *io_tlb_list;
72 static unsigned int io_tlb_index;
74 /*
75 * We need to save away the original address corresponding to a mapped entry
76 * for the sync operations.
77 */
78 static struct phys_addr {
79 struct page *page;
80 unsigned int offset;
81 } *io_tlb_orig_addr;
83 /*
84 * Protect the above data structures in the map and unmap calls
85 */
86 static DEFINE_SPINLOCK(io_tlb_lock);
88 static int __init
89 setup_io_tlb_npages(char *str)
90 {
91 if (isdigit(*str)) {
92 io_tlb_nslabs = simple_strtoul(str, &str, 0) <<
93 (PAGE_SHIFT - IO_TLB_SHIFT);
94 /* avoid tail segment of size < IO_TLB_SEGSIZE */
95 io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE);
96 }
97 if (*str == ',')
98 ++str;
99 /*
100 * NB. 'force' enables the swiotlb, but doesn't force its use for
101 * every DMA like it does on native Linux.
102 */
103 if (!strcmp(str, "force"))
104 swiotlb_force = 1;
105 return 1;
106 }
107 __setup("swiotlb=", setup_io_tlb_npages);
108 /* make io_tlb_overflow tunable too? */
110 /*
111 * Statically reserve bounce buffer space and initialize bounce buffer data
112 * structures for the software IO TLB used to implement the PCI DMA API.
113 */
114 void
115 swiotlb_init_with_default_size (size_t default_size)
116 {
117 unsigned long i;
119 if (!io_tlb_nslabs) {
120 io_tlb_nslabs = (default_size >> PAGE_SHIFT);
121 io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE);
122 }
124 /*
125 * Get IO TLB memory from the low pages
126 */
127 io_tlb_start = alloc_bootmem_low_pages(io_tlb_nslabs *
128 (1 << IO_TLB_SHIFT));
129 if (!io_tlb_start)
130 panic("Cannot allocate SWIOTLB buffer");
132 xen_create_contiguous_region(
133 (unsigned long)io_tlb_start,
134 get_order(io_tlb_nslabs * (1 << IO_TLB_SHIFT)));
136 io_tlb_end = io_tlb_start + io_tlb_nslabs * (1 << IO_TLB_SHIFT);
138 /*
139 * Allocate and initialize the free list array. This array is used
140 * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE
141 * between io_tlb_start and io_tlb_end.
142 */
143 io_tlb_list = alloc_bootmem(io_tlb_nslabs * sizeof(int));
144 for (i = 0; i < io_tlb_nslabs; i++)
145 io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE);
146 io_tlb_index = 0;
147 io_tlb_orig_addr = alloc_bootmem(
148 io_tlb_nslabs * sizeof(*io_tlb_orig_addr));
150 /*
151 * Get the overflow emergency buffer
152 */
153 io_tlb_overflow_buffer = alloc_bootmem_low(io_tlb_overflow);
154 printk(KERN_INFO "Placing software IO TLB between 0x%lx - 0x%lx\n",
155 virt_to_bus(io_tlb_start), virt_to_bus(io_tlb_end-1));
156 }
158 void
159 swiotlb_init(void)
160 {
161 /* The user can forcibly enable swiotlb. */
162 if (swiotlb_force)
163 swiotlb = 1;
165 /*
166 * Otherwise, enable for domain 0 if the machine has 'lots of memory',
167 * which we take to mean more than 2GB.
168 */
169 if (xen_start_info.flags & SIF_INITDOMAIN) {
170 dom0_op_t op;
171 op.cmd = DOM0_PHYSINFO;
172 if ((HYPERVISOR_dom0_op(&op) == 0) &&
173 (op.u.physinfo.total_pages > 0x7ffff))
174 swiotlb = 1;
175 }
177 if (swiotlb)
178 swiotlb_init_with_default_size(64 * (1<<20));
179 }
181 static void
182 __sync_single(struct phys_addr buffer, char *dma_addr, size_t size, int dir)
183 {
184 if (PageHighMem(buffer.page)) {
185 size_t len, bytes;
186 char *dev, *host, *kmp;
187 len = size;
188 while (len != 0) {
189 if (((bytes = len) + buffer.offset) > PAGE_SIZE)
190 bytes = PAGE_SIZE - buffer.offset;
191 kmp = kmap_atomic(buffer.page, KM_SWIOTLB);
192 dev = dma_addr + size - len;
193 host = kmp + buffer.offset;
194 memcpy((dir == DMA_FROM_DEVICE) ? host : dev,
195 (dir == DMA_FROM_DEVICE) ? dev : host,
196 bytes);
197 kunmap_atomic(kmp, KM_SWIOTLB);
198 len -= bytes;
199 buffer.page++;
200 buffer.offset = 0;
201 }
202 } else {
203 char *host = (char *)phys_to_virt(
204 page_to_pseudophys(buffer.page)) + buffer.offset;
205 if (dir == DMA_FROM_DEVICE)
206 memcpy(host, dma_addr, size);
207 else if (dir == DMA_TO_DEVICE)
208 memcpy(dma_addr, host, size);
209 }
210 }
212 /*
213 * Allocates bounce buffer and returns its kernel virtual address.
214 */
215 static void *
216 map_single(struct device *hwdev, struct phys_addr buffer, size_t size, int dir)
217 {
218 unsigned long flags;
219 char *dma_addr;
220 unsigned int nslots, stride, index, wrap;
221 int i;
223 /*
224 * For mappings greater than a page, we limit the stride (and
225 * hence alignment) to a page size.
226 */
227 nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
228 if (size > PAGE_SIZE)
229 stride = (1 << (PAGE_SHIFT - IO_TLB_SHIFT));
230 else
231 stride = 1;
233 BUG_ON(!nslots);
235 /*
236 * Find suitable number of IO TLB entries size that will fit this
237 * request and allocate a buffer from that IO TLB pool.
238 */
239 spin_lock_irqsave(&io_tlb_lock, flags);
240 {
241 wrap = index = ALIGN(io_tlb_index, stride);
243 if (index >= io_tlb_nslabs)
244 wrap = index = 0;
246 do {
247 /*
248 * If we find a slot that indicates we have 'nslots'
249 * number of contiguous buffers, we allocate the
250 * buffers from that slot and mark the entries as '0'
251 * indicating unavailable.
252 */
253 if (io_tlb_list[index] >= nslots) {
254 int count = 0;
256 for (i = index; i < (int)(index + nslots); i++)
257 io_tlb_list[i] = 0;
258 for (i = index - 1;
259 (OFFSET(i, IO_TLB_SEGSIZE) !=
260 IO_TLB_SEGSIZE -1) && io_tlb_list[i];
261 i--)
262 io_tlb_list[i] = ++count;
263 dma_addr = io_tlb_start +
264 (index << IO_TLB_SHIFT);
266 /*
267 * Update the indices to avoid searching in
268 * the next round.
269 */
270 io_tlb_index =
271 ((index + nslots) < io_tlb_nslabs
272 ? (index + nslots) : 0);
274 goto found;
275 }
276 index += stride;
277 if (index >= io_tlb_nslabs)
278 index = 0;
279 } while (index != wrap);
281 spin_unlock_irqrestore(&io_tlb_lock, flags);
282 return NULL;
283 }
284 found:
285 spin_unlock_irqrestore(&io_tlb_lock, flags);
287 /*
288 * Save away the mapping from the original address to the DMA address.
289 * This is needed when we sync the memory. Then we sync the buffer if
290 * needed.
291 */
292 io_tlb_orig_addr[index] = buffer;
293 if ((dir == DMA_TO_DEVICE) || (dir == DMA_BIDIRECTIONAL))
294 __sync_single(buffer, dma_addr, size, DMA_TO_DEVICE);
296 return dma_addr;
297 }
299 /*
300 * dma_addr is the kernel virtual address of the bounce buffer to unmap.
301 */
302 static void
303 unmap_single(struct device *hwdev, char *dma_addr, size_t size, int dir)
304 {
305 unsigned long flags;
306 int i, count, nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
307 int index = (dma_addr - io_tlb_start) >> IO_TLB_SHIFT;
308 struct phys_addr buffer = io_tlb_orig_addr[index];
310 /*
311 * First, sync the memory before unmapping the entry
312 */
313 if ((dir == DMA_FROM_DEVICE) || (dir == DMA_BIDIRECTIONAL))
314 __sync_single(buffer, dma_addr, size, DMA_FROM_DEVICE);
316 /*
317 * Return the buffer to the free list by setting the corresponding
318 * entries to indicate the number of contigous entries available.
319 * While returning the entries to the free list, we merge the entries
320 * with slots below and above the pool being returned.
321 */
322 spin_lock_irqsave(&io_tlb_lock, flags);
323 {
324 count = ((index + nslots) < ALIGN(index + 1, IO_TLB_SEGSIZE) ?
325 io_tlb_list[index + nslots] : 0);
326 /*
327 * Step 1: return the slots to the free list, merging the
328 * slots with superceeding slots
329 */
330 for (i = index + nslots - 1; i >= index; i--)
331 io_tlb_list[i] = ++count;
332 /*
333 * Step 2: merge the returned slots with the preceding slots,
334 * if available (non zero)
335 */
336 for (i = index - 1;
337 (OFFSET(i, IO_TLB_SEGSIZE) !=
338 IO_TLB_SEGSIZE -1) && io_tlb_list[i];
339 i--)
340 io_tlb_list[i] = ++count;
341 }
342 spin_unlock_irqrestore(&io_tlb_lock, flags);
343 }
345 static void
346 sync_single(struct device *hwdev, char *dma_addr, size_t size, int dir)
347 {
348 int index = (dma_addr - io_tlb_start) >> IO_TLB_SHIFT;
349 struct phys_addr buffer = io_tlb_orig_addr[index];
350 BUG_ON((dir != DMA_FROM_DEVICE) && (dir != DMA_TO_DEVICE));
351 __sync_single(buffer, dma_addr, size, dir);
352 }
354 static void
355 swiotlb_full(struct device *dev, size_t size, int dir, int do_panic)
356 {
357 /*
358 * Ran out of IOMMU space for this operation. This is very bad.
359 * Unfortunately the drivers cannot handle this operation properly.
360 * unless they check for pci_dma_mapping_error (most don't)
361 * When the mapping is small enough return a static buffer to limit
362 * the damage, or panic when the transfer is too big.
363 */
364 printk(KERN_ERR "PCI-DMA: Out of SW-IOMMU space for %lu bytes at "
365 "device %s\n", (unsigned long)size, dev ? dev->bus_id : "?");
367 if (size > io_tlb_overflow && do_panic) {
368 if (dir == PCI_DMA_FROMDEVICE || dir == PCI_DMA_BIDIRECTIONAL)
369 panic("PCI-DMA: Memory would be corrupted\n");
370 if (dir == PCI_DMA_TODEVICE || dir == PCI_DMA_BIDIRECTIONAL)
371 panic("PCI-DMA: Random memory would be DMAed\n");
372 }
373 }
375 /*
376 * Map a single buffer of the indicated size for DMA in streaming mode. The
377 * PCI address to use is returned.
378 *
379 * Once the device is given the dma address, the device owns this memory until
380 * either swiotlb_unmap_single or swiotlb_dma_sync_single is performed.
381 */
382 dma_addr_t
383 swiotlb_map_single(struct device *hwdev, void *ptr, size_t size, int dir)
384 {
385 dma_addr_t dev_addr = virt_to_bus(ptr);
386 void *map;
387 struct phys_addr buffer;
389 BUG_ON(dir == DMA_NONE);
391 /*
392 * If the pointer passed in happens to be in the device's DMA window,
393 * we can safely return the device addr and not worry about bounce
394 * buffering it.
395 */
396 if (!range_straddles_page_boundary(ptr, size) &&
397 !address_needs_mapping(hwdev, dev_addr))
398 return dev_addr;
400 /*
401 * Oh well, have to allocate and map a bounce buffer.
402 */
403 buffer.page = virt_to_page(ptr);
404 buffer.offset = (unsigned long)ptr & ~PAGE_MASK;
405 map = map_single(hwdev, buffer, size, dir);
406 if (!map) {
407 swiotlb_full(hwdev, size, dir, 1);
408 map = io_tlb_overflow_buffer;
409 }
411 dev_addr = virt_to_bus(map);
413 /*
414 * Ensure that the address returned is DMA'ble
415 */
416 if (address_needs_mapping(hwdev, dev_addr))
417 panic("map_single: bounce buffer is not DMA'ble");
419 return dev_addr;
420 }
422 /*
423 * Unmap a single streaming mode DMA translation. The dma_addr and size must
424 * match what was provided for in a previous swiotlb_map_single call. All
425 * other usages are undefined.
426 *
427 * After this call, reads by the cpu to the buffer are guaranteed to see
428 * whatever the device wrote there.
429 */
430 void
431 swiotlb_unmap_single(struct device *hwdev, dma_addr_t dev_addr, size_t size,
432 int dir)
433 {
434 char *dma_addr = bus_to_virt(dev_addr);
436 BUG_ON(dir == DMA_NONE);
437 if (dma_addr >= io_tlb_start && dma_addr < io_tlb_end)
438 unmap_single(hwdev, dma_addr, size, dir);
439 }
441 /*
442 * Make physical memory consistent for a single streaming mode DMA translation
443 * after a transfer.
444 *
445 * If you perform a swiotlb_map_single() but wish to interrogate the buffer
446 * using the cpu, yet do not wish to teardown the PCI dma mapping, you must
447 * call this function before doing so. At the next point you give the PCI dma
448 * address back to the card, you must first perform a
449 * swiotlb_dma_sync_for_device, and then the device again owns the buffer
450 */
451 void
452 swiotlb_sync_single_for_cpu(struct device *hwdev, dma_addr_t dev_addr,
453 size_t size, int dir)
454 {
455 char *dma_addr = bus_to_virt(dev_addr);
457 BUG_ON(dir == DMA_NONE);
458 if (dma_addr >= io_tlb_start && dma_addr < io_tlb_end)
459 sync_single(hwdev, dma_addr, size, dir);
460 }
462 void
463 swiotlb_sync_single_for_device(struct device *hwdev, dma_addr_t dev_addr,
464 size_t size, int dir)
465 {
466 char *dma_addr = bus_to_virt(dev_addr);
468 BUG_ON(dir == DMA_NONE);
469 if (dma_addr >= io_tlb_start && dma_addr < io_tlb_end)
470 sync_single(hwdev, dma_addr, size, dir);
471 }
473 /*
474 * Map a set of buffers described by scatterlist in streaming mode for DMA.
475 * This is the scatter-gather version of the above swiotlb_map_single
476 * interface. Here the scatter gather list elements are each tagged with the
477 * appropriate dma address and length. They are obtained via
478 * sg_dma_{address,length}(SG).
479 *
480 * NOTE: An implementation may be able to use a smaller number of
481 * DMA address/length pairs than there are SG table elements.
482 * (for example via virtual mapping capabilities)
483 * The routine returns the number of addr/length pairs actually
484 * used, at most nents.
485 *
486 * Device ownership issues as mentioned above for swiotlb_map_single are the
487 * same here.
488 */
489 int
490 swiotlb_map_sg(struct device *hwdev, struct scatterlist *sg, int nelems,
491 int dir)
492 {
493 struct phys_addr buffer;
494 dma_addr_t dev_addr;
495 char *map;
496 int i;
498 BUG_ON(dir == DMA_NONE);
500 for (i = 0; i < nelems; i++, sg++) {
501 dev_addr = SG_ENT_PHYS_ADDRESS(sg);
502 if (address_needs_mapping(hwdev, dev_addr)) {
503 buffer.page = sg->page;
504 buffer.offset = sg->offset;
505 map = map_single(hwdev, buffer, sg->length, dir);
506 if (!map) {
507 /* Don't panic here, we expect map_sg users
508 to do proper error handling. */
509 swiotlb_full(hwdev, sg->length, dir, 0);
510 swiotlb_unmap_sg(hwdev, sg - i, i, dir);
511 sg[0].dma_length = 0;
512 return 0;
513 }
514 sg->dma_address = (dma_addr_t)virt_to_bus(map);
515 } else
516 sg->dma_address = dev_addr;
517 sg->dma_length = sg->length;
518 }
519 return nelems;
520 }
522 /*
523 * Unmap a set of streaming mode DMA translations. Again, cpu read rules
524 * concerning calls here are the same as for swiotlb_unmap_single() above.
525 */
526 void
527 swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sg, int nelems,
528 int dir)
529 {
530 int i;
532 BUG_ON(dir == DMA_NONE);
534 for (i = 0; i < nelems; i++, sg++)
535 if (sg->dma_address != SG_ENT_PHYS_ADDRESS(sg))
536 unmap_single(hwdev,
537 (void *)bus_to_virt(sg->dma_address),
538 sg->dma_length, dir);
539 }
541 /*
542 * Make physical memory consistent for a set of streaming mode DMA translations
543 * after a transfer.
544 *
545 * The same as swiotlb_sync_single_* but for a scatter-gather list, same rules
546 * and usage.
547 */
548 void
549 swiotlb_sync_sg_for_cpu(struct device *hwdev, struct scatterlist *sg,
550 int nelems, int dir)
551 {
552 int i;
554 BUG_ON(dir == DMA_NONE);
556 for (i = 0; i < nelems; i++, sg++)
557 if (sg->dma_address != SG_ENT_PHYS_ADDRESS(sg))
558 sync_single(hwdev,
559 (void *)bus_to_virt(sg->dma_address),
560 sg->dma_length, dir);
561 }
563 void
564 swiotlb_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg,
565 int nelems, int dir)
566 {
567 int i;
569 BUG_ON(dir == DMA_NONE);
571 for (i = 0; i < nelems; i++, sg++)
572 if (sg->dma_address != SG_ENT_PHYS_ADDRESS(sg))
573 sync_single(hwdev,
574 (void *)bus_to_virt(sg->dma_address),
575 sg->dma_length, dir);
576 }
578 dma_addr_t
579 swiotlb_map_page(struct device *hwdev, struct page *page,
580 unsigned long offset, size_t size,
581 enum dma_data_direction direction)
582 {
583 struct phys_addr buffer;
584 dma_addr_t dev_addr;
585 char *map;
587 dev_addr = page_to_phys(page) + offset;
588 if (address_needs_mapping(hwdev, dev_addr)) {
589 buffer.page = page;
590 buffer.offset = offset;
591 map = map_single(hwdev, buffer, size, direction);
592 if (!map) {
593 swiotlb_full(hwdev, size, direction, 1);
594 map = io_tlb_overflow_buffer;
595 }
596 dev_addr = (dma_addr_t)virt_to_bus(map);
597 }
599 return dev_addr;
600 }
602 void
603 swiotlb_unmap_page(struct device *hwdev, dma_addr_t dma_address,
604 size_t size, enum dma_data_direction direction)
605 {
606 char *dma_addr = bus_to_virt(dma_address);
608 BUG_ON(direction == DMA_NONE);
609 if (dma_addr >= io_tlb_start && dma_addr < io_tlb_end)
610 unmap_single(hwdev, dma_addr, size, direction);
611 }
613 int
614 swiotlb_dma_mapping_error(dma_addr_t dma_addr)
615 {
616 return (dma_addr == virt_to_bus(io_tlb_overflow_buffer));
617 }
619 /*
620 * Return whether the given PCI device DMA address mask can be supported
621 * properly. For example, if your device can only drive the low 24-bits
622 * during PCI bus mastering, then you would pass 0x00ffffff as the mask to
623 * this function.
624 */
625 int
626 swiotlb_dma_supported (struct device *hwdev, u64 mask)
627 {
628 return (mask >= 0xffffffffUL);
629 }
631 EXPORT_SYMBOL(swiotlb_init);
632 EXPORT_SYMBOL(swiotlb_map_single);
633 EXPORT_SYMBOL(swiotlb_unmap_single);
634 EXPORT_SYMBOL(swiotlb_map_sg);
635 EXPORT_SYMBOL(swiotlb_unmap_sg);
636 EXPORT_SYMBOL(swiotlb_sync_single_for_cpu);
637 EXPORT_SYMBOL(swiotlb_sync_single_for_device);
638 EXPORT_SYMBOL(swiotlb_sync_sg_for_cpu);
639 EXPORT_SYMBOL(swiotlb_sync_sg_for_device);
640 EXPORT_SYMBOL(swiotlb_map_page);
641 EXPORT_SYMBOL(swiotlb_unmap_page);
642 EXPORT_SYMBOL(swiotlb_dma_mapping_error);
643 EXPORT_SYMBOL(swiotlb_dma_supported);
645 /*
646 * Local variables:
647 * c-file-style: "linux"
648 * indent-tabs-mode: t
649 * c-indent-level: 8
650 * c-basic-offset: 8
651 * tab-width: 8
652 * End:
653 */