ia64/linux-2.6.18-xen.hg

view arch/sparc64/kernel/pci_iommu.c @ 897:329ea0ccb344

balloon: try harder to balloon up under memory pressure.

Currently if the balloon driver is unable to increase the guest's
reservation it assumes the failure was due to reaching its full
allocation, gives up on the ballooning operation and records the limit
it reached as the "hard limit". The driver will not try again until
the target is set again (even to the same value).

However it is possible that ballooning has in fact failed due to
memory pressure in the host and therefore it is desirable to keep
attempting to reach the target in case memory becomes available. The
most likely scenario is that some guests are ballooning down while
others are ballooning up and therefore there is temporary memory
pressure while things stabilise. You would not expect a well behaved
toolstack to ask a domain to balloon to more than its allocation nor
would you expect it to deliberately over-commit memory by setting
balloon targets which exceed the total host memory.

This patch drops the concept of a hard limit and causes the balloon
driver to retry increasing the reservation on a timer in the same
manner as when decreasing the reservation.

Also if we partially succeed in increasing the reservation
(i.e. receive less pages than we asked for) then we may as well keep
those pages rather than returning them to Xen.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Fri Jun 05 14:01:20 2009 +0100 (2009-06-05)
parents 3e8752eb6d9c
children
line source
1 /* $Id: pci_iommu.c,v 1.17 2001/12/17 07:05:09 davem Exp $
2 * pci_iommu.c: UltraSparc PCI controller IOM/STC support.
3 *
4 * Copyright (C) 1999 David S. Miller (davem@redhat.com)
5 * Copyright (C) 1999, 2000 Jakub Jelinek (jakub@redhat.com)
6 */
8 #include <linux/kernel.h>
9 #include <linux/sched.h>
10 #include <linux/mm.h>
11 #include <linux/delay.h>
13 #include <asm/pbm.h>
15 #include "iommu_common.h"
17 #define PCI_STC_CTXMATCH_ADDR(STC, CTX) \
18 ((STC)->strbuf_ctxmatch_base + ((CTX) << 3))
20 /* Accessing IOMMU and Streaming Buffer registers.
21 * REG parameter is a physical address. All registers
22 * are 64-bits in size.
23 */
24 #define pci_iommu_read(__reg) \
25 ({ u64 __ret; \
26 __asm__ __volatile__("ldxa [%1] %2, %0" \
27 : "=r" (__ret) \
28 : "r" (__reg), "i" (ASI_PHYS_BYPASS_EC_E) \
29 : "memory"); \
30 __ret; \
31 })
32 #define pci_iommu_write(__reg, __val) \
33 __asm__ __volatile__("stxa %0, [%1] %2" \
34 : /* no outputs */ \
35 : "r" (__val), "r" (__reg), \
36 "i" (ASI_PHYS_BYPASS_EC_E))
38 /* Must be invoked under the IOMMU lock. */
39 static void __iommu_flushall(struct pci_iommu *iommu)
40 {
41 unsigned long tag;
42 int entry;
44 tag = iommu->iommu_flush + (0xa580UL - 0x0210UL);
45 for (entry = 0; entry < 16; entry++) {
46 pci_iommu_write(tag, 0);
47 tag += 8;
48 }
50 /* Ensure completion of previous PIO writes. */
51 (void) pci_iommu_read(iommu->write_complete_reg);
52 }
54 #define IOPTE_CONSISTENT(CTX) \
55 (IOPTE_VALID | IOPTE_CACHE | \
56 (((CTX) << 47) & IOPTE_CONTEXT))
58 #define IOPTE_STREAMING(CTX) \
59 (IOPTE_CONSISTENT(CTX) | IOPTE_STBUF)
61 /* Existing mappings are never marked invalid, instead they
62 * are pointed to a dummy page.
63 */
64 #define IOPTE_IS_DUMMY(iommu, iopte) \
65 ((iopte_val(*iopte) & IOPTE_PAGE) == (iommu)->dummy_page_pa)
67 static void inline iopte_make_dummy(struct pci_iommu *iommu, iopte_t *iopte)
68 {
69 unsigned long val = iopte_val(*iopte);
71 val &= ~IOPTE_PAGE;
72 val |= iommu->dummy_page_pa;
74 iopte_val(*iopte) = val;
75 }
77 /* Based largely upon the ppc64 iommu allocator. */
78 static long pci_arena_alloc(struct pci_iommu *iommu, unsigned long npages)
79 {
80 struct pci_iommu_arena *arena = &iommu->arena;
81 unsigned long n, i, start, end, limit;
82 int pass;
84 limit = arena->limit;
85 start = arena->hint;
86 pass = 0;
88 again:
89 n = find_next_zero_bit(arena->map, limit, start);
90 end = n + npages;
91 if (unlikely(end >= limit)) {
92 if (likely(pass < 1)) {
93 limit = start;
94 start = 0;
95 __iommu_flushall(iommu);
96 pass++;
97 goto again;
98 } else {
99 /* Scanned the whole thing, give up. */
100 return -1;
101 }
102 }
104 for (i = n; i < end; i++) {
105 if (test_bit(i, arena->map)) {
106 start = i + 1;
107 goto again;
108 }
109 }
111 for (i = n; i < end; i++)
112 __set_bit(i, arena->map);
114 arena->hint = end;
116 return n;
117 }
119 static void pci_arena_free(struct pci_iommu_arena *arena, unsigned long base, unsigned long npages)
120 {
121 unsigned long i;
123 for (i = base; i < (base + npages); i++)
124 __clear_bit(i, arena->map);
125 }
127 void pci_iommu_table_init(struct pci_iommu *iommu, int tsbsize, u32 dma_offset, u32 dma_addr_mask)
128 {
129 unsigned long i, tsbbase, order, sz, num_tsb_entries;
131 num_tsb_entries = tsbsize / sizeof(iopte_t);
133 /* Setup initial software IOMMU state. */
134 spin_lock_init(&iommu->lock);
135 iommu->ctx_lowest_free = 1;
136 iommu->page_table_map_base = dma_offset;
137 iommu->dma_addr_mask = dma_addr_mask;
139 /* Allocate and initialize the free area map. */
140 sz = num_tsb_entries / 8;
141 sz = (sz + 7UL) & ~7UL;
142 iommu->arena.map = kzalloc(sz, GFP_KERNEL);
143 if (!iommu->arena.map) {
144 prom_printf("PCI_IOMMU: Error, kmalloc(arena.map) failed.\n");
145 prom_halt();
146 }
147 iommu->arena.limit = num_tsb_entries;
149 /* Allocate and initialize the dummy page which we
150 * set inactive IO PTEs to point to.
151 */
152 iommu->dummy_page = __get_free_pages(GFP_KERNEL, 0);
153 if (!iommu->dummy_page) {
154 prom_printf("PCI_IOMMU: Error, gfp(dummy_page) failed.\n");
155 prom_halt();
156 }
157 memset((void *)iommu->dummy_page, 0, PAGE_SIZE);
158 iommu->dummy_page_pa = (unsigned long) __pa(iommu->dummy_page);
160 /* Now allocate and setup the IOMMU page table itself. */
161 order = get_order(tsbsize);
162 tsbbase = __get_free_pages(GFP_KERNEL, order);
163 if (!tsbbase) {
164 prom_printf("PCI_IOMMU: Error, gfp(tsb) failed.\n");
165 prom_halt();
166 }
167 iommu->page_table = (iopte_t *)tsbbase;
169 for (i = 0; i < num_tsb_entries; i++)
170 iopte_make_dummy(iommu, &iommu->page_table[i]);
171 }
173 static inline iopte_t *alloc_npages(struct pci_iommu *iommu, unsigned long npages)
174 {
175 long entry;
177 entry = pci_arena_alloc(iommu, npages);
178 if (unlikely(entry < 0))
179 return NULL;
181 return iommu->page_table + entry;
182 }
184 static inline void free_npages(struct pci_iommu *iommu, dma_addr_t base, unsigned long npages)
185 {
186 pci_arena_free(&iommu->arena, base >> IO_PAGE_SHIFT, npages);
187 }
189 static int iommu_alloc_ctx(struct pci_iommu *iommu)
190 {
191 int lowest = iommu->ctx_lowest_free;
192 int sz = IOMMU_NUM_CTXS - lowest;
193 int n = find_next_zero_bit(iommu->ctx_bitmap, sz, lowest);
195 if (unlikely(n == sz)) {
196 n = find_next_zero_bit(iommu->ctx_bitmap, lowest, 1);
197 if (unlikely(n == lowest)) {
198 printk(KERN_WARNING "IOMMU: Ran out of contexts.\n");
199 n = 0;
200 }
201 }
202 if (n)
203 __set_bit(n, iommu->ctx_bitmap);
205 return n;
206 }
208 static inline void iommu_free_ctx(struct pci_iommu *iommu, int ctx)
209 {
210 if (likely(ctx)) {
211 __clear_bit(ctx, iommu->ctx_bitmap);
212 if (ctx < iommu->ctx_lowest_free)
213 iommu->ctx_lowest_free = ctx;
214 }
215 }
217 /* Allocate and map kernel buffer of size SIZE using consistent mode
218 * DMA for PCI device PDEV. Return non-NULL cpu-side address if
219 * successful and set *DMA_ADDRP to the PCI side dma address.
220 */
221 static void *pci_4u_alloc_consistent(struct pci_dev *pdev, size_t size, dma_addr_t *dma_addrp, gfp_t gfp)
222 {
223 struct pcidev_cookie *pcp;
224 struct pci_iommu *iommu;
225 iopte_t *iopte;
226 unsigned long flags, order, first_page;
227 void *ret;
228 int npages;
230 size = IO_PAGE_ALIGN(size);
231 order = get_order(size);
232 if (order >= 10)
233 return NULL;
235 first_page = __get_free_pages(gfp, order);
236 if (first_page == 0UL)
237 return NULL;
238 memset((char *)first_page, 0, PAGE_SIZE << order);
240 pcp = pdev->sysdata;
241 iommu = pcp->pbm->iommu;
243 spin_lock_irqsave(&iommu->lock, flags);
244 iopte = alloc_npages(iommu, size >> IO_PAGE_SHIFT);
245 spin_unlock_irqrestore(&iommu->lock, flags);
247 if (unlikely(iopte == NULL)) {
248 free_pages(first_page, order);
249 return NULL;
250 }
252 *dma_addrp = (iommu->page_table_map_base +
253 ((iopte - iommu->page_table) << IO_PAGE_SHIFT));
254 ret = (void *) first_page;
255 npages = size >> IO_PAGE_SHIFT;
256 first_page = __pa(first_page);
257 while (npages--) {
258 iopte_val(*iopte) = (IOPTE_CONSISTENT(0UL) |
259 IOPTE_WRITE |
260 (first_page & IOPTE_PAGE));
261 iopte++;
262 first_page += IO_PAGE_SIZE;
263 }
265 return ret;
266 }
268 /* Free and unmap a consistent DMA translation. */
269 static void pci_4u_free_consistent(struct pci_dev *pdev, size_t size, void *cpu, dma_addr_t dvma)
270 {
271 struct pcidev_cookie *pcp;
272 struct pci_iommu *iommu;
273 iopte_t *iopte;
274 unsigned long flags, order, npages;
276 npages = IO_PAGE_ALIGN(size) >> IO_PAGE_SHIFT;
277 pcp = pdev->sysdata;
278 iommu = pcp->pbm->iommu;
279 iopte = iommu->page_table +
280 ((dvma - iommu->page_table_map_base) >> IO_PAGE_SHIFT);
282 spin_lock_irqsave(&iommu->lock, flags);
284 free_npages(iommu, dvma - iommu->page_table_map_base, npages);
286 spin_unlock_irqrestore(&iommu->lock, flags);
288 order = get_order(size);
289 if (order < 10)
290 free_pages((unsigned long)cpu, order);
291 }
293 /* Map a single buffer at PTR of SZ bytes for PCI DMA
294 * in streaming mode.
295 */
296 static dma_addr_t pci_4u_map_single(struct pci_dev *pdev, void *ptr, size_t sz, int direction)
297 {
298 struct pcidev_cookie *pcp;
299 struct pci_iommu *iommu;
300 struct pci_strbuf *strbuf;
301 iopte_t *base;
302 unsigned long flags, npages, oaddr;
303 unsigned long i, base_paddr, ctx;
304 u32 bus_addr, ret;
305 unsigned long iopte_protection;
307 pcp = pdev->sysdata;
308 iommu = pcp->pbm->iommu;
309 strbuf = &pcp->pbm->stc;
311 if (unlikely(direction == PCI_DMA_NONE))
312 goto bad_no_ctx;
314 oaddr = (unsigned long)ptr;
315 npages = IO_PAGE_ALIGN(oaddr + sz) - (oaddr & IO_PAGE_MASK);
316 npages >>= IO_PAGE_SHIFT;
318 spin_lock_irqsave(&iommu->lock, flags);
319 base = alloc_npages(iommu, npages);
320 ctx = 0;
321 if (iommu->iommu_ctxflush)
322 ctx = iommu_alloc_ctx(iommu);
323 spin_unlock_irqrestore(&iommu->lock, flags);
325 if (unlikely(!base))
326 goto bad;
328 bus_addr = (iommu->page_table_map_base +
329 ((base - iommu->page_table) << IO_PAGE_SHIFT));
330 ret = bus_addr | (oaddr & ~IO_PAGE_MASK);
331 base_paddr = __pa(oaddr & IO_PAGE_MASK);
332 if (strbuf->strbuf_enabled)
333 iopte_protection = IOPTE_STREAMING(ctx);
334 else
335 iopte_protection = IOPTE_CONSISTENT(ctx);
336 if (direction != PCI_DMA_TODEVICE)
337 iopte_protection |= IOPTE_WRITE;
339 for (i = 0; i < npages; i++, base++, base_paddr += IO_PAGE_SIZE)
340 iopte_val(*base) = iopte_protection | base_paddr;
342 return ret;
344 bad:
345 iommu_free_ctx(iommu, ctx);
346 bad_no_ctx:
347 if (printk_ratelimit())
348 WARN_ON(1);
349 return PCI_DMA_ERROR_CODE;
350 }
352 static void pci_strbuf_flush(struct pci_strbuf *strbuf, struct pci_iommu *iommu, u32 vaddr, unsigned long ctx, unsigned long npages, int direction)
353 {
354 int limit;
356 if (strbuf->strbuf_ctxflush &&
357 iommu->iommu_ctxflush) {
358 unsigned long matchreg, flushreg;
359 u64 val;
361 flushreg = strbuf->strbuf_ctxflush;
362 matchreg = PCI_STC_CTXMATCH_ADDR(strbuf, ctx);
364 pci_iommu_write(flushreg, ctx);
365 val = pci_iommu_read(matchreg);
366 val &= 0xffff;
367 if (!val)
368 goto do_flush_sync;
370 while (val) {
371 if (val & 0x1)
372 pci_iommu_write(flushreg, ctx);
373 val >>= 1;
374 }
375 val = pci_iommu_read(matchreg);
376 if (unlikely(val)) {
377 printk(KERN_WARNING "pci_strbuf_flush: ctx flush "
378 "timeout matchreg[%lx] ctx[%lx]\n",
379 val, ctx);
380 goto do_page_flush;
381 }
382 } else {
383 unsigned long i;
385 do_page_flush:
386 for (i = 0; i < npages; i++, vaddr += IO_PAGE_SIZE)
387 pci_iommu_write(strbuf->strbuf_pflush, vaddr);
388 }
390 do_flush_sync:
391 /* If the device could not have possibly put dirty data into
392 * the streaming cache, no flush-flag synchronization needs
393 * to be performed.
394 */
395 if (direction == PCI_DMA_TODEVICE)
396 return;
398 PCI_STC_FLUSHFLAG_INIT(strbuf);
399 pci_iommu_write(strbuf->strbuf_fsync, strbuf->strbuf_flushflag_pa);
400 (void) pci_iommu_read(iommu->write_complete_reg);
402 limit = 100000;
403 while (!PCI_STC_FLUSHFLAG_SET(strbuf)) {
404 limit--;
405 if (!limit)
406 break;
407 udelay(1);
408 rmb();
409 }
410 if (!limit)
411 printk(KERN_WARNING "pci_strbuf_flush: flushflag timeout "
412 "vaddr[%08x] ctx[%lx] npages[%ld]\n",
413 vaddr, ctx, npages);
414 }
416 /* Unmap a single streaming mode DMA translation. */
417 static void pci_4u_unmap_single(struct pci_dev *pdev, dma_addr_t bus_addr, size_t sz, int direction)
418 {
419 struct pcidev_cookie *pcp;
420 struct pci_iommu *iommu;
421 struct pci_strbuf *strbuf;
422 iopte_t *base;
423 unsigned long flags, npages, ctx, i;
425 if (unlikely(direction == PCI_DMA_NONE)) {
426 if (printk_ratelimit())
427 WARN_ON(1);
428 return;
429 }
431 pcp = pdev->sysdata;
432 iommu = pcp->pbm->iommu;
433 strbuf = &pcp->pbm->stc;
435 npages = IO_PAGE_ALIGN(bus_addr + sz) - (bus_addr & IO_PAGE_MASK);
436 npages >>= IO_PAGE_SHIFT;
437 base = iommu->page_table +
438 ((bus_addr - iommu->page_table_map_base) >> IO_PAGE_SHIFT);
439 #ifdef DEBUG_PCI_IOMMU
440 if (IOPTE_IS_DUMMY(iommu, base))
441 printk("pci_unmap_single called on non-mapped region %08x,%08x from %016lx\n",
442 bus_addr, sz, __builtin_return_address(0));
443 #endif
444 bus_addr &= IO_PAGE_MASK;
446 spin_lock_irqsave(&iommu->lock, flags);
448 /* Record the context, if any. */
449 ctx = 0;
450 if (iommu->iommu_ctxflush)
451 ctx = (iopte_val(*base) & IOPTE_CONTEXT) >> 47UL;
453 /* Step 1: Kick data out of streaming buffers if necessary. */
454 if (strbuf->strbuf_enabled)
455 pci_strbuf_flush(strbuf, iommu, bus_addr, ctx,
456 npages, direction);
458 /* Step 2: Clear out TSB entries. */
459 for (i = 0; i < npages; i++)
460 iopte_make_dummy(iommu, base + i);
462 free_npages(iommu, bus_addr - iommu->page_table_map_base, npages);
464 iommu_free_ctx(iommu, ctx);
466 spin_unlock_irqrestore(&iommu->lock, flags);
467 }
469 #define SG_ENT_PHYS_ADDRESS(SG) \
470 (__pa(page_address((SG)->page)) + (SG)->offset)
472 static inline void fill_sg(iopte_t *iopte, struct scatterlist *sg,
473 int nused, int nelems, unsigned long iopte_protection)
474 {
475 struct scatterlist *dma_sg = sg;
476 struct scatterlist *sg_end = sg + nelems;
477 int i;
479 for (i = 0; i < nused; i++) {
480 unsigned long pteval = ~0UL;
481 u32 dma_npages;
483 dma_npages = ((dma_sg->dma_address & (IO_PAGE_SIZE - 1UL)) +
484 dma_sg->dma_length +
485 ((IO_PAGE_SIZE - 1UL))) >> IO_PAGE_SHIFT;
486 do {
487 unsigned long offset;
488 signed int len;
490 /* If we are here, we know we have at least one
491 * more page to map. So walk forward until we
492 * hit a page crossing, and begin creating new
493 * mappings from that spot.
494 */
495 for (;;) {
496 unsigned long tmp;
498 tmp = SG_ENT_PHYS_ADDRESS(sg);
499 len = sg->length;
500 if (((tmp ^ pteval) >> IO_PAGE_SHIFT) != 0UL) {
501 pteval = tmp & IO_PAGE_MASK;
502 offset = tmp & (IO_PAGE_SIZE - 1UL);
503 break;
504 }
505 if (((tmp ^ (tmp + len - 1UL)) >> IO_PAGE_SHIFT) != 0UL) {
506 pteval = (tmp + IO_PAGE_SIZE) & IO_PAGE_MASK;
507 offset = 0UL;
508 len -= (IO_PAGE_SIZE - (tmp & (IO_PAGE_SIZE - 1UL)));
509 break;
510 }
511 sg++;
512 }
514 pteval = iopte_protection | (pteval & IOPTE_PAGE);
515 while (len > 0) {
516 *iopte++ = __iopte(pteval);
517 pteval += IO_PAGE_SIZE;
518 len -= (IO_PAGE_SIZE - offset);
519 offset = 0;
520 dma_npages--;
521 }
523 pteval = (pteval & IOPTE_PAGE) + len;
524 sg++;
526 /* Skip over any tail mappings we've fully mapped,
527 * adjusting pteval along the way. Stop when we
528 * detect a page crossing event.
529 */
530 while (sg < sg_end &&
531 (pteval << (64 - IO_PAGE_SHIFT)) != 0UL &&
532 (pteval == SG_ENT_PHYS_ADDRESS(sg)) &&
533 ((pteval ^
534 (SG_ENT_PHYS_ADDRESS(sg) + sg->length - 1UL)) >> IO_PAGE_SHIFT) == 0UL) {
535 pteval += sg->length;
536 sg++;
537 }
538 if ((pteval << (64 - IO_PAGE_SHIFT)) == 0UL)
539 pteval = ~0UL;
540 } while (dma_npages != 0);
541 dma_sg++;
542 }
543 }
545 /* Map a set of buffers described by SGLIST with NELEMS array
546 * elements in streaming mode for PCI DMA.
547 * When making changes here, inspect the assembly output. I was having
548 * hard time to kepp this routine out of using stack slots for holding variables.
549 */
550 static int pci_4u_map_sg(struct pci_dev *pdev, struct scatterlist *sglist, int nelems, int direction)
551 {
552 struct pcidev_cookie *pcp;
553 struct pci_iommu *iommu;
554 struct pci_strbuf *strbuf;
555 unsigned long flags, ctx, npages, iopte_protection;
556 iopte_t *base;
557 u32 dma_base;
558 struct scatterlist *sgtmp;
559 int used;
561 /* Fast path single entry scatterlists. */
562 if (nelems == 1) {
563 sglist->dma_address =
564 pci_4u_map_single(pdev,
565 (page_address(sglist->page) + sglist->offset),
566 sglist->length, direction);
567 if (unlikely(sglist->dma_address == PCI_DMA_ERROR_CODE))
568 return 0;
569 sglist->dma_length = sglist->length;
570 return 1;
571 }
573 pcp = pdev->sysdata;
574 iommu = pcp->pbm->iommu;
575 strbuf = &pcp->pbm->stc;
577 if (unlikely(direction == PCI_DMA_NONE))
578 goto bad_no_ctx;
580 /* Step 1: Prepare scatter list. */
582 npages = prepare_sg(sglist, nelems);
584 /* Step 2: Allocate a cluster and context, if necessary. */
586 spin_lock_irqsave(&iommu->lock, flags);
588 base = alloc_npages(iommu, npages);
589 ctx = 0;
590 if (iommu->iommu_ctxflush)
591 ctx = iommu_alloc_ctx(iommu);
593 spin_unlock_irqrestore(&iommu->lock, flags);
595 if (base == NULL)
596 goto bad;
598 dma_base = iommu->page_table_map_base +
599 ((base - iommu->page_table) << IO_PAGE_SHIFT);
601 /* Step 3: Normalize DMA addresses. */
602 used = nelems;
604 sgtmp = sglist;
605 while (used && sgtmp->dma_length) {
606 sgtmp->dma_address += dma_base;
607 sgtmp++;
608 used--;
609 }
610 used = nelems - used;
612 /* Step 4: Create the mappings. */
613 if (strbuf->strbuf_enabled)
614 iopte_protection = IOPTE_STREAMING(ctx);
615 else
616 iopte_protection = IOPTE_CONSISTENT(ctx);
617 if (direction != PCI_DMA_TODEVICE)
618 iopte_protection |= IOPTE_WRITE;
620 fill_sg(base, sglist, used, nelems, iopte_protection);
622 #ifdef VERIFY_SG
623 verify_sglist(sglist, nelems, base, npages);
624 #endif
626 return used;
628 bad:
629 iommu_free_ctx(iommu, ctx);
630 bad_no_ctx:
631 if (printk_ratelimit())
632 WARN_ON(1);
633 return 0;
634 }
636 /* Unmap a set of streaming mode DMA translations. */
637 static void pci_4u_unmap_sg(struct pci_dev *pdev, struct scatterlist *sglist, int nelems, int direction)
638 {
639 struct pcidev_cookie *pcp;
640 struct pci_iommu *iommu;
641 struct pci_strbuf *strbuf;
642 iopte_t *base;
643 unsigned long flags, ctx, i, npages;
644 u32 bus_addr;
646 if (unlikely(direction == PCI_DMA_NONE)) {
647 if (printk_ratelimit())
648 WARN_ON(1);
649 }
651 pcp = pdev->sysdata;
652 iommu = pcp->pbm->iommu;
653 strbuf = &pcp->pbm->stc;
655 bus_addr = sglist->dma_address & IO_PAGE_MASK;
657 for (i = 1; i < nelems; i++)
658 if (sglist[i].dma_length == 0)
659 break;
660 i--;
661 npages = (IO_PAGE_ALIGN(sglist[i].dma_address + sglist[i].dma_length) -
662 bus_addr) >> IO_PAGE_SHIFT;
664 base = iommu->page_table +
665 ((bus_addr - iommu->page_table_map_base) >> IO_PAGE_SHIFT);
667 #ifdef DEBUG_PCI_IOMMU
668 if (IOPTE_IS_DUMMY(iommu, base))
669 printk("pci_unmap_sg called on non-mapped region %016lx,%d from %016lx\n", sglist->dma_address, nelems, __builtin_return_address(0));
670 #endif
672 spin_lock_irqsave(&iommu->lock, flags);
674 /* Record the context, if any. */
675 ctx = 0;
676 if (iommu->iommu_ctxflush)
677 ctx = (iopte_val(*base) & IOPTE_CONTEXT) >> 47UL;
679 /* Step 1: Kick data out of streaming buffers if necessary. */
680 if (strbuf->strbuf_enabled)
681 pci_strbuf_flush(strbuf, iommu, bus_addr, ctx, npages, direction);
683 /* Step 2: Clear out the TSB entries. */
684 for (i = 0; i < npages; i++)
685 iopte_make_dummy(iommu, base + i);
687 free_npages(iommu, bus_addr - iommu->page_table_map_base, npages);
689 iommu_free_ctx(iommu, ctx);
691 spin_unlock_irqrestore(&iommu->lock, flags);
692 }
694 /* Make physical memory consistent for a single
695 * streaming mode DMA translation after a transfer.
696 */
697 static void pci_4u_dma_sync_single_for_cpu(struct pci_dev *pdev, dma_addr_t bus_addr, size_t sz, int direction)
698 {
699 struct pcidev_cookie *pcp;
700 struct pci_iommu *iommu;
701 struct pci_strbuf *strbuf;
702 unsigned long flags, ctx, npages;
704 pcp = pdev->sysdata;
705 iommu = pcp->pbm->iommu;
706 strbuf = &pcp->pbm->stc;
708 if (!strbuf->strbuf_enabled)
709 return;
711 spin_lock_irqsave(&iommu->lock, flags);
713 npages = IO_PAGE_ALIGN(bus_addr + sz) - (bus_addr & IO_PAGE_MASK);
714 npages >>= IO_PAGE_SHIFT;
715 bus_addr &= IO_PAGE_MASK;
717 /* Step 1: Record the context, if any. */
718 ctx = 0;
719 if (iommu->iommu_ctxflush &&
720 strbuf->strbuf_ctxflush) {
721 iopte_t *iopte;
723 iopte = iommu->page_table +
724 ((bus_addr - iommu->page_table_map_base)>>IO_PAGE_SHIFT);
725 ctx = (iopte_val(*iopte) & IOPTE_CONTEXT) >> 47UL;
726 }
728 /* Step 2: Kick data out of streaming buffers. */
729 pci_strbuf_flush(strbuf, iommu, bus_addr, ctx, npages, direction);
731 spin_unlock_irqrestore(&iommu->lock, flags);
732 }
734 /* Make physical memory consistent for a set of streaming
735 * mode DMA translations after a transfer.
736 */
737 static void pci_4u_dma_sync_sg_for_cpu(struct pci_dev *pdev, struct scatterlist *sglist, int nelems, int direction)
738 {
739 struct pcidev_cookie *pcp;
740 struct pci_iommu *iommu;
741 struct pci_strbuf *strbuf;
742 unsigned long flags, ctx, npages, i;
743 u32 bus_addr;
745 pcp = pdev->sysdata;
746 iommu = pcp->pbm->iommu;
747 strbuf = &pcp->pbm->stc;
749 if (!strbuf->strbuf_enabled)
750 return;
752 spin_lock_irqsave(&iommu->lock, flags);
754 /* Step 1: Record the context, if any. */
755 ctx = 0;
756 if (iommu->iommu_ctxflush &&
757 strbuf->strbuf_ctxflush) {
758 iopte_t *iopte;
760 iopte = iommu->page_table +
761 ((sglist[0].dma_address - iommu->page_table_map_base) >> IO_PAGE_SHIFT);
762 ctx = (iopte_val(*iopte) & IOPTE_CONTEXT) >> 47UL;
763 }
765 /* Step 2: Kick data out of streaming buffers. */
766 bus_addr = sglist[0].dma_address & IO_PAGE_MASK;
767 for(i = 1; i < nelems; i++)
768 if (!sglist[i].dma_length)
769 break;
770 i--;
771 npages = (IO_PAGE_ALIGN(sglist[i].dma_address + sglist[i].dma_length)
772 - bus_addr) >> IO_PAGE_SHIFT;
773 pci_strbuf_flush(strbuf, iommu, bus_addr, ctx, npages, direction);
775 spin_unlock_irqrestore(&iommu->lock, flags);
776 }
778 struct pci_iommu_ops pci_sun4u_iommu_ops = {
779 .alloc_consistent = pci_4u_alloc_consistent,
780 .free_consistent = pci_4u_free_consistent,
781 .map_single = pci_4u_map_single,
782 .unmap_single = pci_4u_unmap_single,
783 .map_sg = pci_4u_map_sg,
784 .unmap_sg = pci_4u_unmap_sg,
785 .dma_sync_single_for_cpu = pci_4u_dma_sync_single_for_cpu,
786 .dma_sync_sg_for_cpu = pci_4u_dma_sync_sg_for_cpu,
787 };
789 static void ali_sound_dma_hack(struct pci_dev *pdev, int set_bit)
790 {
791 struct pci_dev *ali_isa_bridge;
792 u8 val;
794 /* ALI sound chips generate 31-bits of DMA, a special register
795 * determines what bit 31 is emitted as.
796 */
797 ali_isa_bridge = pci_get_device(PCI_VENDOR_ID_AL,
798 PCI_DEVICE_ID_AL_M1533,
799 NULL);
801 pci_read_config_byte(ali_isa_bridge, 0x7e, &val);
802 if (set_bit)
803 val |= 0x01;
804 else
805 val &= ~0x01;
806 pci_write_config_byte(ali_isa_bridge, 0x7e, val);
807 pci_dev_put(ali_isa_bridge);
808 }
810 int pci_dma_supported(struct pci_dev *pdev, u64 device_mask)
811 {
812 struct pcidev_cookie *pcp = pdev->sysdata;
813 u64 dma_addr_mask;
815 if (pdev == NULL) {
816 dma_addr_mask = 0xffffffff;
817 } else {
818 struct pci_iommu *iommu = pcp->pbm->iommu;
820 dma_addr_mask = iommu->dma_addr_mask;
822 if (pdev->vendor == PCI_VENDOR_ID_AL &&
823 pdev->device == PCI_DEVICE_ID_AL_M5451 &&
824 device_mask == 0x7fffffff) {
825 ali_sound_dma_hack(pdev,
826 (dma_addr_mask & 0x80000000) != 0);
827 return 1;
828 }
829 }
831 if (device_mask >= (1UL << 32UL))
832 return 0;
834 return (device_mask & dma_addr_mask) == dma_addr_mask;
835 }