ia64/xen-unstable

view linux-2.6-xen-sparse/arch/xen/i386/kernel/swiotlb.c @ 6316:f7dfaa2af90c

merge?
author cl349@firebug.cl.cam.ac.uk
date Sun Aug 21 11:02:00 2005 +0000 (2005-08-21)
parents 1872e09bfba3
children 6721abf6b16d
line source
1 /*
2 * Dynamic DMA mapping support.
3 *
4 * This implementation is a fallback for platforms that do not support
5 * I/O TLBs (aka DMA address translation hardware).
6 * Copyright (C) 2000 Asit Mallick <Asit.K.Mallick@intel.com>
7 * Copyright (C) 2000 Goutham Rao <goutham.rao@intel.com>
8 * Copyright (C) 2000, 2003 Hewlett-Packard Co
9 * David Mosberger-Tang <davidm@hpl.hp.com>
10 * Copyright (C) 2005 Keir Fraser <keir@xensource.com>
11 */
13 #include <linux/cache.h>
14 #include <linux/mm.h>
15 #include <linux/module.h>
16 #include <linux/pci.h>
17 #include <linux/spinlock.h>
18 #include <linux/string.h>
19 #include <linux/types.h>
20 #include <linux/ctype.h>
21 #include <linux/init.h>
22 #include <linux/bootmem.h>
23 #include <linux/highmem.h>
24 #include <asm/io.h>
25 #include <asm/pci.h>
26 #include <asm/dma.h>
28 #define OFFSET(val,align) ((unsigned long)((val) & ( (align) - 1)))
30 #define SG_ENT_PHYS_ADDRESS(sg) (page_to_phys((sg)->page) + (sg)->offset)
32 /*
33 * Maximum allowable number of contiguous slabs to map,
34 * must be a power of 2. What is the appropriate value ?
35 * The complexity of {map,unmap}_single is linearly dependent on this value.
36 */
37 #define IO_TLB_SEGSIZE 128
39 /*
40 * log of the size of each IO TLB slab. The number of slabs is command line
41 * controllable.
42 */
43 #define IO_TLB_SHIFT 11
45 int swiotlb_force;
46 static char *iotlb_virt_start;
47 static unsigned long iotlb_nslabs;
49 /*
50 * Used to do a quick range check in swiotlb_unmap_single and
51 * swiotlb_sync_single_*, to see if the memory was in fact allocated by this
52 * API.
53 */
54 static dma_addr_t iotlb_bus_start, iotlb_bus_mask;
56 /* Does the given dma address reside within the swiotlb aperture? */
57 #define in_swiotlb_aperture(a) (!(((a) ^ iotlb_bus_start) & iotlb_bus_mask))
59 /*
60 * When the IOMMU overflows we return a fallback buffer. This sets the size.
61 */
62 static unsigned long io_tlb_overflow = 32*1024;
64 void *io_tlb_overflow_buffer;
66 /*
67 * This is a free list describing the number of free entries available from
68 * each index
69 */
70 static unsigned int *io_tlb_list;
71 static unsigned int io_tlb_index;
73 /*
74 * We need to save away the original address corresponding to a mapped entry
75 * for the sync operations.
76 */
77 static struct phys_addr {
78 struct page *page;
79 unsigned int offset;
80 } *io_tlb_orig_addr;
82 /*
83 * Protect the above data structures in the map and unmap calls
84 */
85 static DEFINE_SPINLOCK(io_tlb_lock);
87 static int __init
88 setup_io_tlb_npages(char *str)
89 {
90 /* Unlike ia64, the size is aperture in megabytes, not 'slabs'! */
91 if (isdigit(*str)) {
92 iotlb_nslabs = simple_strtoul(str, &str, 0) <<
93 (20 - IO_TLB_SHIFT);
94 iotlb_nslabs = ALIGN(iotlb_nslabs, IO_TLB_SEGSIZE);
95 /* Round up to power of two (xen_create_contiguous_region). */
96 while (iotlb_nslabs & (iotlb_nslabs-1))
97 iotlb_nslabs += iotlb_nslabs & ~(iotlb_nslabs-1);
98 }
99 if (*str == ',')
100 ++str;
101 /*
102 * NB. 'force' enables the swiotlb, but doesn't force its use for
103 * every DMA like it does on native Linux.
104 */
105 if (!strcmp(str, "force"))
106 swiotlb_force = 1;
107 return 1;
108 }
109 __setup("swiotlb=", setup_io_tlb_npages);
110 /* make io_tlb_overflow tunable too? */
112 /*
113 * Statically reserve bounce buffer space and initialize bounce buffer data
114 * structures for the software IO TLB used to implement the PCI DMA API.
115 */
116 void
117 swiotlb_init_with_default_size (size_t default_size)
118 {
119 unsigned long i, bytes;
121 if (!iotlb_nslabs) {
122 iotlb_nslabs = (default_size >> IO_TLB_SHIFT);
123 iotlb_nslabs = ALIGN(iotlb_nslabs, IO_TLB_SEGSIZE);
124 /* Round up to power of two (xen_create_contiguous_region). */
125 while (iotlb_nslabs & (iotlb_nslabs-1))
126 iotlb_nslabs += iotlb_nslabs & ~(iotlb_nslabs-1);
127 }
129 bytes = iotlb_nslabs * (1UL << IO_TLB_SHIFT);
131 /*
132 * Get IO TLB memory from the low pages
133 */
134 iotlb_virt_start = alloc_bootmem_low_pages(bytes);
135 if (!iotlb_virt_start)
136 panic("Cannot allocate SWIOTLB buffer!\n"
137 "Use dom0_mem Xen boot parameter to reserve\n"
138 "some DMA memory (e.g., dom0_mem=-128M).\n");
140 xen_create_contiguous_region(
141 (unsigned long)iotlb_virt_start, get_order(bytes));
143 /*
144 * Allocate and initialize the free list array. This array is used
145 * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE.
146 */
147 io_tlb_list = alloc_bootmem(iotlb_nslabs * sizeof(int));
148 for (i = 0; i < iotlb_nslabs; i++)
149 io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE);
150 io_tlb_index = 0;
151 io_tlb_orig_addr = alloc_bootmem(
152 iotlb_nslabs * sizeof(*io_tlb_orig_addr));
154 /*
155 * Get the overflow emergency buffer
156 */
157 io_tlb_overflow_buffer = alloc_bootmem_low(io_tlb_overflow);
159 iotlb_bus_start = virt_to_bus(iotlb_virt_start);
160 iotlb_bus_mask = ~(dma_addr_t)(bytes - 1);
162 printk(KERN_INFO "Software IO TLB enabled: \n"
163 " Aperture: %lu megabytes\n"
164 " Bus range: 0x%016lx - 0x%016lx\n"
165 " Kernel range: 0x%016lx - 0x%016lx\n",
166 bytes >> 20,
167 (unsigned long)iotlb_bus_start,
168 (unsigned long)iotlb_bus_start + bytes,
169 (unsigned long)iotlb_virt_start,
170 (unsigned long)iotlb_virt_start + bytes);
171 }
173 void
174 swiotlb_init(void)
175 {
176 /* The user can forcibly enable swiotlb. */
177 if (swiotlb_force)
178 swiotlb = 1;
180 /*
181 * Otherwise, enable for domain 0 if the machine has 'lots of memory',
182 * which we take to mean more than 2GB.
183 */
184 if (xen_start_info.flags & SIF_INITDOMAIN) {
185 dom0_op_t op;
186 op.cmd = DOM0_PHYSINFO;
187 if ((HYPERVISOR_dom0_op(&op) == 0) &&
188 (op.u.physinfo.total_pages > 0x7ffff))
189 swiotlb = 1;
190 }
192 if (swiotlb)
193 swiotlb_init_with_default_size(64 * (1<<20));
194 }
196 static void
197 __sync_single(struct phys_addr buffer, char *dma_addr, size_t size, int dir)
198 {
199 if (PageHighMem(buffer.page)) {
200 size_t len, bytes;
201 char *dev, *host, *kmp;
202 len = size;
203 while (len != 0) {
204 if (((bytes = len) + buffer.offset) > PAGE_SIZE)
205 bytes = PAGE_SIZE - buffer.offset;
206 kmp = kmap_atomic(buffer.page, KM_SWIOTLB);
207 dev = dma_addr + size - len;
208 host = kmp + buffer.offset;
209 memcpy((dir == DMA_FROM_DEVICE) ? host : dev,
210 (dir == DMA_FROM_DEVICE) ? dev : host,
211 bytes);
212 kunmap_atomic(kmp, KM_SWIOTLB);
213 len -= bytes;
214 buffer.page++;
215 buffer.offset = 0;
216 }
217 } else {
218 char *host = (char *)phys_to_virt(
219 page_to_pseudophys(buffer.page)) + buffer.offset;
220 if (dir == DMA_FROM_DEVICE)
221 memcpy(host, dma_addr, size);
222 else if (dir == DMA_TO_DEVICE)
223 memcpy(dma_addr, host, size);
224 }
225 }
227 /*
228 * Allocates bounce buffer and returns its kernel virtual address.
229 */
230 static void *
231 map_single(struct device *hwdev, struct phys_addr buffer, size_t size, int dir)
232 {
233 unsigned long flags;
234 char *dma_addr;
235 unsigned int nslots, stride, index, wrap;
236 int i;
238 /*
239 * For mappings greater than a page, we limit the stride (and
240 * hence alignment) to a page size.
241 */
242 nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
243 if (size > PAGE_SIZE)
244 stride = (1 << (PAGE_SHIFT - IO_TLB_SHIFT));
245 else
246 stride = 1;
248 BUG_ON(!nslots);
250 /*
251 * Find suitable number of IO TLB entries size that will fit this
252 * request and allocate a buffer from that IO TLB pool.
253 */
254 spin_lock_irqsave(&io_tlb_lock, flags);
255 {
256 wrap = index = ALIGN(io_tlb_index, stride);
258 if (index >= iotlb_nslabs)
259 wrap = index = 0;
261 do {
262 /*
263 * If we find a slot that indicates we have 'nslots'
264 * number of contiguous buffers, we allocate the
265 * buffers from that slot and mark the entries as '0'
266 * indicating unavailable.
267 */
268 if (io_tlb_list[index] >= nslots) {
269 int count = 0;
271 for (i = index; i < (int)(index + nslots); i++)
272 io_tlb_list[i] = 0;
273 for (i = index - 1;
274 (OFFSET(i, IO_TLB_SEGSIZE) !=
275 IO_TLB_SEGSIZE -1) && io_tlb_list[i];
276 i--)
277 io_tlb_list[i] = ++count;
278 dma_addr = iotlb_virt_start +
279 (index << IO_TLB_SHIFT);
281 /*
282 * Update the indices to avoid searching in
283 * the next round.
284 */
285 io_tlb_index =
286 ((index + nslots) < iotlb_nslabs
287 ? (index + nslots) : 0);
289 goto found;
290 }
291 index += stride;
292 if (index >= iotlb_nslabs)
293 index = 0;
294 } while (index != wrap);
296 spin_unlock_irqrestore(&io_tlb_lock, flags);
297 return NULL;
298 }
299 found:
300 spin_unlock_irqrestore(&io_tlb_lock, flags);
302 /*
303 * Save away the mapping from the original address to the DMA address.
304 * This is needed when we sync the memory. Then we sync the buffer if
305 * needed.
306 */
307 io_tlb_orig_addr[index] = buffer;
308 if ((dir == DMA_TO_DEVICE) || (dir == DMA_BIDIRECTIONAL))
309 __sync_single(buffer, dma_addr, size, DMA_TO_DEVICE);
311 return dma_addr;
312 }
314 /*
315 * dma_addr is the kernel virtual address of the bounce buffer to unmap.
316 */
317 static void
318 unmap_single(struct device *hwdev, char *dma_addr, size_t size, int dir)
319 {
320 unsigned long flags;
321 int i, count, nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
322 int index = (dma_addr - iotlb_virt_start) >> IO_TLB_SHIFT;
323 struct phys_addr buffer = io_tlb_orig_addr[index];
325 /*
326 * First, sync the memory before unmapping the entry
327 */
328 if ((dir == DMA_FROM_DEVICE) || (dir == DMA_BIDIRECTIONAL))
329 __sync_single(buffer, dma_addr, size, DMA_FROM_DEVICE);
331 /*
332 * Return the buffer to the free list by setting the corresponding
333 * entries to indicate the number of contigous entries available.
334 * While returning the entries to the free list, we merge the entries
335 * with slots below and above the pool being returned.
336 */
337 spin_lock_irqsave(&io_tlb_lock, flags);
338 {
339 count = ((index + nslots) < ALIGN(index + 1, IO_TLB_SEGSIZE) ?
340 io_tlb_list[index + nslots] : 0);
341 /*
342 * Step 1: return the slots to the free list, merging the
343 * slots with superceeding slots
344 */
345 for (i = index + nslots - 1; i >= index; i--)
346 io_tlb_list[i] = ++count;
347 /*
348 * Step 2: merge the returned slots with the preceding slots,
349 * if available (non zero)
350 */
351 for (i = index - 1;
352 (OFFSET(i, IO_TLB_SEGSIZE) !=
353 IO_TLB_SEGSIZE -1) && io_tlb_list[i];
354 i--)
355 io_tlb_list[i] = ++count;
356 }
357 spin_unlock_irqrestore(&io_tlb_lock, flags);
358 }
360 static void
361 sync_single(struct device *hwdev, char *dma_addr, size_t size, int dir)
362 {
363 int index = (dma_addr - iotlb_virt_start) >> IO_TLB_SHIFT;
364 struct phys_addr buffer = io_tlb_orig_addr[index];
365 BUG_ON((dir != DMA_FROM_DEVICE) && (dir != DMA_TO_DEVICE));
366 __sync_single(buffer, dma_addr, size, dir);
367 }
369 static void
370 swiotlb_full(struct device *dev, size_t size, int dir, int do_panic)
371 {
372 /*
373 * Ran out of IOMMU space for this operation. This is very bad.
374 * Unfortunately the drivers cannot handle this operation properly.
375 * unless they check for pci_dma_mapping_error (most don't)
376 * When the mapping is small enough return a static buffer to limit
377 * the damage, or panic when the transfer is too big.
378 */
379 printk(KERN_ERR "PCI-DMA: Out of SW-IOMMU space for %lu bytes at "
380 "device %s\n", (unsigned long)size, dev ? dev->bus_id : "?");
382 if (size > io_tlb_overflow && do_panic) {
383 if (dir == PCI_DMA_FROMDEVICE || dir == PCI_DMA_BIDIRECTIONAL)
384 panic("PCI-DMA: Memory would be corrupted\n");
385 if (dir == PCI_DMA_TODEVICE || dir == PCI_DMA_BIDIRECTIONAL)
386 panic("PCI-DMA: Random memory would be DMAed\n");
387 }
388 }
390 /*
391 * Map a single buffer of the indicated size for DMA in streaming mode. The
392 * PCI address to use is returned.
393 *
394 * Once the device is given the dma address, the device owns this memory until
395 * either swiotlb_unmap_single or swiotlb_dma_sync_single is performed.
396 */
397 dma_addr_t
398 swiotlb_map_single(struct device *hwdev, void *ptr, size_t size, int dir)
399 {
400 dma_addr_t dev_addr = virt_to_bus(ptr);
401 void *map;
402 struct phys_addr buffer;
404 BUG_ON(dir == DMA_NONE);
406 /*
407 * If the pointer passed in happens to be in the device's DMA window,
408 * we can safely return the device addr and not worry about bounce
409 * buffering it.
410 */
411 if (!range_straddles_page_boundary(ptr, size) &&
412 !address_needs_mapping(hwdev, dev_addr))
413 return dev_addr;
415 /*
416 * Oh well, have to allocate and map a bounce buffer.
417 */
418 buffer.page = virt_to_page(ptr);
419 buffer.offset = (unsigned long)ptr & ~PAGE_MASK;
420 map = map_single(hwdev, buffer, size, dir);
421 if (!map) {
422 swiotlb_full(hwdev, size, dir, 1);
423 map = io_tlb_overflow_buffer;
424 }
426 dev_addr = virt_to_bus(map);
428 /*
429 * Ensure that the address returned is DMA'ble
430 */
431 if (address_needs_mapping(hwdev, dev_addr))
432 panic("map_single: bounce buffer is not DMA'ble");
434 return dev_addr;
435 }
437 /*
438 * Unmap a single streaming mode DMA translation. The dma_addr and size must
439 * match what was provided for in a previous swiotlb_map_single call. All
440 * other usages are undefined.
441 *
442 * After this call, reads by the cpu to the buffer are guaranteed to see
443 * whatever the device wrote there.
444 */
445 void
446 swiotlb_unmap_single(struct device *hwdev, dma_addr_t dev_addr, size_t size,
447 int dir)
448 {
449 BUG_ON(dir == DMA_NONE);
450 if (in_swiotlb_aperture(dev_addr))
451 unmap_single(hwdev, bus_to_virt(dev_addr), size, dir);
452 }
454 /*
455 * Make physical memory consistent for a single streaming mode DMA translation
456 * after a transfer.
457 *
458 * If you perform a swiotlb_map_single() but wish to interrogate the buffer
459 * using the cpu, yet do not wish to teardown the PCI dma mapping, you must
460 * call this function before doing so. At the next point you give the PCI dma
461 * address back to the card, you must first perform a
462 * swiotlb_dma_sync_for_device, and then the device again owns the buffer
463 */
464 void
465 swiotlb_sync_single_for_cpu(struct device *hwdev, dma_addr_t dev_addr,
466 size_t size, int dir)
467 {
468 BUG_ON(dir == DMA_NONE);
469 if (in_swiotlb_aperture(dev_addr))
470 sync_single(hwdev, bus_to_virt(dev_addr), size, dir);
471 }
473 void
474 swiotlb_sync_single_for_device(struct device *hwdev, dma_addr_t dev_addr,
475 size_t size, int dir)
476 {
477 BUG_ON(dir == DMA_NONE);
478 if (in_swiotlb_aperture(dev_addr))
479 sync_single(hwdev, bus_to_virt(dev_addr), size, dir);
480 }
482 /*
483 * Map a set of buffers described by scatterlist in streaming mode for DMA.
484 * This is the scatter-gather version of the above swiotlb_map_single
485 * interface. Here the scatter gather list elements are each tagged with the
486 * appropriate dma address and length. They are obtained via
487 * sg_dma_{address,length}(SG).
488 *
489 * NOTE: An implementation may be able to use a smaller number of
490 * DMA address/length pairs than there are SG table elements.
491 * (for example via virtual mapping capabilities)
492 * The routine returns the number of addr/length pairs actually
493 * used, at most nents.
494 *
495 * Device ownership issues as mentioned above for swiotlb_map_single are the
496 * same here.
497 */
498 int
499 swiotlb_map_sg(struct device *hwdev, struct scatterlist *sg, int nelems,
500 int dir)
501 {
502 struct phys_addr buffer;
503 dma_addr_t dev_addr;
504 char *map;
505 int i;
507 BUG_ON(dir == DMA_NONE);
509 for (i = 0; i < nelems; i++, sg++) {
510 dev_addr = SG_ENT_PHYS_ADDRESS(sg);
511 if (address_needs_mapping(hwdev, dev_addr)) {
512 buffer.page = sg->page;
513 buffer.offset = sg->offset;
514 map = map_single(hwdev, buffer, sg->length, dir);
515 if (!map) {
516 /* Don't panic here, we expect map_sg users
517 to do proper error handling. */
518 swiotlb_full(hwdev, sg->length, dir, 0);
519 swiotlb_unmap_sg(hwdev, sg - i, i, dir);
520 sg[0].dma_length = 0;
521 return 0;
522 }
523 sg->dma_address = (dma_addr_t)virt_to_bus(map);
524 } else
525 sg->dma_address = dev_addr;
526 sg->dma_length = sg->length;
527 }
528 return nelems;
529 }
531 /*
532 * Unmap a set of streaming mode DMA translations. Again, cpu read rules
533 * concerning calls here are the same as for swiotlb_unmap_single() above.
534 */
535 void
536 swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sg, int nelems,
537 int dir)
538 {
539 int i;
541 BUG_ON(dir == DMA_NONE);
543 for (i = 0; i < nelems; i++, sg++)
544 if (sg->dma_address != SG_ENT_PHYS_ADDRESS(sg))
545 unmap_single(hwdev,
546 (void *)bus_to_virt(sg->dma_address),
547 sg->dma_length, dir);
548 }
550 /*
551 * Make physical memory consistent for a set of streaming mode DMA translations
552 * after a transfer.
553 *
554 * The same as swiotlb_sync_single_* but for a scatter-gather list, same rules
555 * and usage.
556 */
557 void
558 swiotlb_sync_sg_for_cpu(struct device *hwdev, struct scatterlist *sg,
559 int nelems, int dir)
560 {
561 int i;
563 BUG_ON(dir == DMA_NONE);
565 for (i = 0; i < nelems; i++, sg++)
566 if (sg->dma_address != SG_ENT_PHYS_ADDRESS(sg))
567 sync_single(hwdev,
568 (void *)bus_to_virt(sg->dma_address),
569 sg->dma_length, dir);
570 }
572 void
573 swiotlb_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg,
574 int nelems, int dir)
575 {
576 int i;
578 BUG_ON(dir == DMA_NONE);
580 for (i = 0; i < nelems; i++, sg++)
581 if (sg->dma_address != SG_ENT_PHYS_ADDRESS(sg))
582 sync_single(hwdev,
583 (void *)bus_to_virt(sg->dma_address),
584 sg->dma_length, dir);
585 }
587 dma_addr_t
588 swiotlb_map_page(struct device *hwdev, struct page *page,
589 unsigned long offset, size_t size,
590 enum dma_data_direction direction)
591 {
592 struct phys_addr buffer;
593 dma_addr_t dev_addr;
594 char *map;
596 dev_addr = page_to_phys(page) + offset;
597 if (address_needs_mapping(hwdev, dev_addr)) {
598 buffer.page = page;
599 buffer.offset = offset;
600 map = map_single(hwdev, buffer, size, direction);
601 if (!map) {
602 swiotlb_full(hwdev, size, direction, 1);
603 map = io_tlb_overflow_buffer;
604 }
605 dev_addr = (dma_addr_t)virt_to_bus(map);
606 }
608 return dev_addr;
609 }
611 void
612 swiotlb_unmap_page(struct device *hwdev, dma_addr_t dma_address,
613 size_t size, enum dma_data_direction direction)
614 {
615 BUG_ON(direction == DMA_NONE);
616 if (in_swiotlb_aperture(dma_address))
617 unmap_single(hwdev, bus_to_virt(dma_address), size, direction);
618 }
620 int
621 swiotlb_dma_mapping_error(dma_addr_t dma_addr)
622 {
623 return (dma_addr == virt_to_bus(io_tlb_overflow_buffer));
624 }
626 /*
627 * Return whether the given PCI device DMA address mask can be supported
628 * properly. For example, if your device can only drive the low 24-bits
629 * during PCI bus mastering, then you would pass 0x00ffffff as the mask to
630 * this function.
631 */
632 int
633 swiotlb_dma_supported (struct device *hwdev, u64 mask)
634 {
635 return (mask >= 0xffffffffUL);
636 }
638 EXPORT_SYMBOL(swiotlb_init);
639 EXPORT_SYMBOL(swiotlb_map_single);
640 EXPORT_SYMBOL(swiotlb_unmap_single);
641 EXPORT_SYMBOL(swiotlb_map_sg);
642 EXPORT_SYMBOL(swiotlb_unmap_sg);
643 EXPORT_SYMBOL(swiotlb_sync_single_for_cpu);
644 EXPORT_SYMBOL(swiotlb_sync_single_for_device);
645 EXPORT_SYMBOL(swiotlb_sync_sg_for_cpu);
646 EXPORT_SYMBOL(swiotlb_sync_sg_for_device);
647 EXPORT_SYMBOL(swiotlb_map_page);
648 EXPORT_SYMBOL(swiotlb_unmap_page);
649 EXPORT_SYMBOL(swiotlb_dma_mapping_error);
650 EXPORT_SYMBOL(swiotlb_dma_supported);
652 /*
653 * Local variables:
654 * c-file-style: "linux"
655 * indent-tabs-mode: t
656 * c-indent-level: 8
657 * c-basic-offset: 8
658 * tab-width: 8
659 * End:
660 */