ia64/linux-2.6.18-xen.hg

view arch/alpha/kernel/pci_iommu.c @ 897:329ea0ccb344

balloon: try harder to balloon up under memory pressure.

Currently if the balloon driver is unable to increase the guest's
reservation it assumes the failure was due to reaching its full
allocation, gives up on the ballooning operation and records the limit
it reached as the "hard limit". The driver will not try again until
the target is set again (even to the same value).

However it is possible that ballooning has in fact failed due to
memory pressure in the host and therefore it is desirable to keep
attempting to reach the target in case memory becomes available. The
most likely scenario is that some guests are ballooning down while
others are ballooning up and therefore there is temporary memory
pressure while things stabilise. You would not expect a well behaved
toolstack to ask a domain to balloon to more than its allocation nor
would you expect it to deliberately over-commit memory by setting
balloon targets which exceed the total host memory.

This patch drops the concept of a hard limit and causes the balloon
driver to retry increasing the reservation on a timer in the same
manner as when decreasing the reservation.

Also if we partially succeed in increasing the reservation
(i.e. receive less pages than we asked for) then we may as well keep
those pages rather than returning them to Xen.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Fri Jun 05 14:01:20 2009 +0100 (2009-06-05)
parents 831230e53067
children
line source
1 /*
2 * linux/arch/alpha/kernel/pci_iommu.c
3 */
5 #include <linux/kernel.h>
6 #include <linux/mm.h>
7 #include <linux/pci.h>
8 #include <linux/slab.h>
9 #include <linux/bootmem.h>
11 #include <asm/io.h>
12 #include <asm/hwrpb.h>
14 #include "proto.h"
15 #include "pci_impl.h"
18 #define DEBUG_ALLOC 0
19 #if DEBUG_ALLOC > 0
20 # define DBGA(args...) printk(KERN_DEBUG args)
21 #else
22 # define DBGA(args...)
23 #endif
24 #if DEBUG_ALLOC > 1
25 # define DBGA2(args...) printk(KERN_DEBUG args)
26 #else
27 # define DBGA2(args...)
28 #endif
30 #define DEBUG_NODIRECT 0
31 #define DEBUG_FORCEDAC 0
33 #define ISA_DMA_MASK 0x00ffffff
35 static inline unsigned long
36 mk_iommu_pte(unsigned long paddr)
37 {
38 return (paddr >> (PAGE_SHIFT-1)) | 1;
39 }
41 static inline long
42 calc_npages(long bytes)
43 {
44 return (bytes + PAGE_SIZE - 1) >> PAGE_SHIFT;
45 }
48 /* Return the minimum of MAX or the first power of two larger
49 than main memory. */
51 unsigned long
52 size_for_memory(unsigned long max)
53 {
54 unsigned long mem = max_low_pfn << PAGE_SHIFT;
55 if (mem < max)
56 max = 1UL << ceil_log2(mem);
57 return max;
58 }
60 struct pci_iommu_arena *
61 iommu_arena_new_node(int nid, struct pci_controller *hose, dma_addr_t base,
62 unsigned long window_size, unsigned long align)
63 {
64 unsigned long mem_size;
65 struct pci_iommu_arena *arena;
67 mem_size = window_size / (PAGE_SIZE / sizeof(unsigned long));
69 /* Note that the TLB lookup logic uses bitwise concatenation,
70 not addition, so the required arena alignment is based on
71 the size of the window. Retain the align parameter so that
72 particular systems can over-align the arena. */
73 if (align < mem_size)
74 align = mem_size;
77 #ifdef CONFIG_DISCONTIGMEM
79 if (!NODE_DATA(nid) ||
80 (NULL == (arena = alloc_bootmem_node(NODE_DATA(nid),
81 sizeof(*arena))))) {
82 printk("%s: couldn't allocate arena from node %d\n"
83 " falling back to system-wide allocation\n",
84 __FUNCTION__, nid);
85 arena = alloc_bootmem(sizeof(*arena));
86 }
88 if (!NODE_DATA(nid) ||
89 (NULL == (arena->ptes = __alloc_bootmem_node(NODE_DATA(nid),
90 mem_size,
91 align,
92 0)))) {
93 printk("%s: couldn't allocate arena ptes from node %d\n"
94 " falling back to system-wide allocation\n",
95 __FUNCTION__, nid);
96 arena->ptes = __alloc_bootmem(mem_size, align, 0);
97 }
99 #else /* CONFIG_DISCONTIGMEM */
101 arena = alloc_bootmem(sizeof(*arena));
102 arena->ptes = __alloc_bootmem(mem_size, align, 0);
104 #endif /* CONFIG_DISCONTIGMEM */
106 spin_lock_init(&arena->lock);
107 arena->hose = hose;
108 arena->dma_base = base;
109 arena->size = window_size;
110 arena->next_entry = 0;
112 /* Align allocations to a multiple of a page size. Not needed
113 unless there are chip bugs. */
114 arena->align_entry = 1;
116 return arena;
117 }
119 struct pci_iommu_arena *
120 iommu_arena_new(struct pci_controller *hose, dma_addr_t base,
121 unsigned long window_size, unsigned long align)
122 {
123 return iommu_arena_new_node(0, hose, base, window_size, align);
124 }
126 /* Must be called with the arena lock held */
127 static long
128 iommu_arena_find_pages(struct pci_iommu_arena *arena, long n, long mask)
129 {
130 unsigned long *ptes;
131 long i, p, nent;
133 /* Search forward for the first mask-aligned sequence of N free ptes */
134 ptes = arena->ptes;
135 nent = arena->size >> PAGE_SHIFT;
136 p = (arena->next_entry + mask) & ~mask;
137 i = 0;
138 while (i < n && p+i < nent) {
139 if (ptes[p+i])
140 p = (p + i + 1 + mask) & ~mask, i = 0;
141 else
142 i = i + 1;
143 }
145 if (i < n) {
146 /* Reached the end. Flush the TLB and restart the
147 search from the beginning. */
148 alpha_mv.mv_pci_tbi(arena->hose, 0, -1);
150 p = 0, i = 0;
151 while (i < n && p+i < nent) {
152 if (ptes[p+i])
153 p = (p + i + 1 + mask) & ~mask, i = 0;
154 else
155 i = i + 1;
156 }
158 if (i < n)
159 return -1;
160 }
162 /* Success. It's the responsibility of the caller to mark them
163 in use before releasing the lock */
164 return p;
165 }
167 static long
168 iommu_arena_alloc(struct pci_iommu_arena *arena, long n, unsigned int align)
169 {
170 unsigned long flags;
171 unsigned long *ptes;
172 long i, p, mask;
174 spin_lock_irqsave(&arena->lock, flags);
176 /* Search for N empty ptes */
177 ptes = arena->ptes;
178 mask = max(align, arena->align_entry) - 1;
179 p = iommu_arena_find_pages(arena, n, mask);
180 if (p < 0) {
181 spin_unlock_irqrestore(&arena->lock, flags);
182 return -1;
183 }
185 /* Success. Mark them all in use, ie not zero and invalid
186 for the iommu tlb that could load them from under us.
187 The chip specific bits will fill this in with something
188 kosher when we return. */
189 for (i = 0; i < n; ++i)
190 ptes[p+i] = IOMMU_INVALID_PTE;
192 arena->next_entry = p + n;
193 spin_unlock_irqrestore(&arena->lock, flags);
195 return p;
196 }
198 static void
199 iommu_arena_free(struct pci_iommu_arena *arena, long ofs, long n)
200 {
201 unsigned long *p;
202 long i;
204 p = arena->ptes + ofs;
205 for (i = 0; i < n; ++i)
206 p[i] = 0;
207 }
209 /* Map a single buffer of the indicated size for PCI DMA in streaming
210 mode. The 32-bit PCI bus mastering address to use is returned.
211 Once the device is given the dma address, the device owns this memory
212 until either pci_unmap_single or pci_dma_sync_single is performed. */
214 static dma_addr_t
215 pci_map_single_1(struct pci_dev *pdev, void *cpu_addr, size_t size,
216 int dac_allowed)
217 {
218 struct pci_controller *hose = pdev ? pdev->sysdata : pci_isa_hose;
219 dma_addr_t max_dma = pdev ? pdev->dma_mask : ISA_DMA_MASK;
220 struct pci_iommu_arena *arena;
221 long npages, dma_ofs, i;
222 unsigned long paddr;
223 dma_addr_t ret;
224 unsigned int align = 0;
226 paddr = __pa(cpu_addr);
228 #if !DEBUG_NODIRECT
229 /* First check to see if we can use the direct map window. */
230 if (paddr + size + __direct_map_base - 1 <= max_dma
231 && paddr + size <= __direct_map_size) {
232 ret = paddr + __direct_map_base;
234 DBGA2("pci_map_single: [%p,%lx] -> direct %lx from %p\n",
235 cpu_addr, size, ret, __builtin_return_address(0));
237 return ret;
238 }
239 #endif
241 /* Next, use DAC if selected earlier. */
242 if (dac_allowed) {
243 ret = paddr + alpha_mv.pci_dac_offset;
245 DBGA2("pci_map_single: [%p,%lx] -> DAC %lx from %p\n",
246 cpu_addr, size, ret, __builtin_return_address(0));
248 return ret;
249 }
251 /* If the machine doesn't define a pci_tbi routine, we have to
252 assume it doesn't support sg mapping, and, since we tried to
253 use direct_map above, it now must be considered an error. */
254 if (! alpha_mv.mv_pci_tbi) {
255 static int been_here = 0; /* Only print the message once. */
256 if (!been_here) {
257 printk(KERN_WARNING "pci_map_single: no HW sg\n");
258 been_here = 1;
259 }
260 return 0;
261 }
263 arena = hose->sg_pci;
264 if (!arena || arena->dma_base + arena->size - 1 > max_dma)
265 arena = hose->sg_isa;
267 npages = calc_npages((paddr & ~PAGE_MASK) + size);
269 /* Force allocation to 64KB boundary for ISA bridges. */
270 if (pdev && pdev == isa_bridge)
271 align = 8;
272 dma_ofs = iommu_arena_alloc(arena, npages, align);
273 if (dma_ofs < 0) {
274 printk(KERN_WARNING "pci_map_single failed: "
275 "could not allocate dma page tables\n");
276 return 0;
277 }
279 paddr &= PAGE_MASK;
280 for (i = 0; i < npages; ++i, paddr += PAGE_SIZE)
281 arena->ptes[i + dma_ofs] = mk_iommu_pte(paddr);
283 ret = arena->dma_base + dma_ofs * PAGE_SIZE;
284 ret += (unsigned long)cpu_addr & ~PAGE_MASK;
286 DBGA2("pci_map_single: [%p,%lx] np %ld -> sg %lx from %p\n",
287 cpu_addr, size, npages, ret, __builtin_return_address(0));
289 return ret;
290 }
292 dma_addr_t
293 pci_map_single(struct pci_dev *pdev, void *cpu_addr, size_t size, int dir)
294 {
295 int dac_allowed;
297 if (dir == PCI_DMA_NONE)
298 BUG();
300 dac_allowed = pdev ? pci_dac_dma_supported(pdev, pdev->dma_mask) : 0;
301 return pci_map_single_1(pdev, cpu_addr, size, dac_allowed);
302 }
304 dma_addr_t
305 pci_map_page(struct pci_dev *pdev, struct page *page, unsigned long offset,
306 size_t size, int dir)
307 {
308 int dac_allowed;
310 if (dir == PCI_DMA_NONE)
311 BUG();
313 dac_allowed = pdev ? pci_dac_dma_supported(pdev, pdev->dma_mask) : 0;
314 return pci_map_single_1(pdev, (char *)page_address(page) + offset,
315 size, dac_allowed);
316 }
318 /* Unmap a single streaming mode DMA translation. The DMA_ADDR and
319 SIZE must match what was provided for in a previous pci_map_single
320 call. All other usages are undefined. After this call, reads by
321 the cpu to the buffer are guaranteed to see whatever the device
322 wrote there. */
324 void
325 pci_unmap_single(struct pci_dev *pdev, dma_addr_t dma_addr, size_t size,
326 int direction)
327 {
328 unsigned long flags;
329 struct pci_controller *hose = pdev ? pdev->sysdata : pci_isa_hose;
330 struct pci_iommu_arena *arena;
331 long dma_ofs, npages;
333 if (direction == PCI_DMA_NONE)
334 BUG();
336 if (dma_addr >= __direct_map_base
337 && dma_addr < __direct_map_base + __direct_map_size) {
338 /* Nothing to do. */
340 DBGA2("pci_unmap_single: direct [%lx,%lx] from %p\n",
341 dma_addr, size, __builtin_return_address(0));
343 return;
344 }
346 if (dma_addr > 0xffffffff) {
347 DBGA2("pci64_unmap_single: DAC [%lx,%lx] from %p\n",
348 dma_addr, size, __builtin_return_address(0));
349 return;
350 }
352 arena = hose->sg_pci;
353 if (!arena || dma_addr < arena->dma_base)
354 arena = hose->sg_isa;
356 dma_ofs = (dma_addr - arena->dma_base) >> PAGE_SHIFT;
357 if (dma_ofs * PAGE_SIZE >= arena->size) {
358 printk(KERN_ERR "Bogus pci_unmap_single: dma_addr %lx "
359 " base %lx size %x\n", dma_addr, arena->dma_base,
360 arena->size);
361 return;
362 BUG();
363 }
365 npages = calc_npages((dma_addr & ~PAGE_MASK) + size);
367 spin_lock_irqsave(&arena->lock, flags);
369 iommu_arena_free(arena, dma_ofs, npages);
371 /* If we're freeing ptes above the `next_entry' pointer (they
372 may have snuck back into the TLB since the last wrap flush),
373 we need to flush the TLB before reallocating the latter. */
374 if (dma_ofs >= arena->next_entry)
375 alpha_mv.mv_pci_tbi(hose, dma_addr, dma_addr + size - 1);
377 spin_unlock_irqrestore(&arena->lock, flags);
379 DBGA2("pci_unmap_single: sg [%lx,%lx] np %ld from %p\n",
380 dma_addr, size, npages, __builtin_return_address(0));
381 }
383 void
384 pci_unmap_page(struct pci_dev *pdev, dma_addr_t dma_addr,
385 size_t size, int direction)
386 {
387 pci_unmap_single(pdev, dma_addr, size, direction);
388 }
390 /* Allocate and map kernel buffer using consistent mode DMA for PCI
391 device. Returns non-NULL cpu-view pointer to the buffer if
392 successful and sets *DMA_ADDRP to the pci side dma address as well,
393 else DMA_ADDRP is undefined. */
395 void *
396 pci_alloc_consistent(struct pci_dev *pdev, size_t size, dma_addr_t *dma_addrp)
397 {
398 void *cpu_addr;
399 long order = get_order(size);
400 gfp_t gfp = GFP_ATOMIC;
402 try_again:
403 cpu_addr = (void *)__get_free_pages(gfp, order);
404 if (! cpu_addr) {
405 printk(KERN_INFO "pci_alloc_consistent: "
406 "get_free_pages failed from %p\n",
407 __builtin_return_address(0));
408 /* ??? Really atomic allocation? Otherwise we could play
409 with vmalloc and sg if we can't find contiguous memory. */
410 return NULL;
411 }
412 memset(cpu_addr, 0, size);
414 *dma_addrp = pci_map_single_1(pdev, cpu_addr, size, 0);
415 if (*dma_addrp == 0) {
416 free_pages((unsigned long)cpu_addr, order);
417 if (alpha_mv.mv_pci_tbi || (gfp & GFP_DMA))
418 return NULL;
419 /* The address doesn't fit required mask and we
420 do not have iommu. Try again with GFP_DMA. */
421 gfp |= GFP_DMA;
422 goto try_again;
423 }
425 DBGA2("pci_alloc_consistent: %lx -> [%p,%x] from %p\n",
426 size, cpu_addr, *dma_addrp, __builtin_return_address(0));
428 return cpu_addr;
429 }
431 /* Free and unmap a consistent DMA buffer. CPU_ADDR and DMA_ADDR must
432 be values that were returned from pci_alloc_consistent. SIZE must
433 be the same as what as passed into pci_alloc_consistent.
434 References to the memory and mappings associated with CPU_ADDR or
435 DMA_ADDR past this call are illegal. */
437 void
438 pci_free_consistent(struct pci_dev *pdev, size_t size, void *cpu_addr,
439 dma_addr_t dma_addr)
440 {
441 pci_unmap_single(pdev, dma_addr, size, PCI_DMA_BIDIRECTIONAL);
442 free_pages((unsigned long)cpu_addr, get_order(size));
444 DBGA2("pci_free_consistent: [%x,%lx] from %p\n",
445 dma_addr, size, __builtin_return_address(0));
446 }
449 /* Classify the elements of the scatterlist. Write dma_address
450 of each element with:
451 0 : Followers all physically adjacent.
452 1 : Followers all virtually adjacent.
453 -1 : Not leader, physically adjacent to previous.
454 -2 : Not leader, virtually adjacent to previous.
455 Write dma_length of each leader with the combined lengths of
456 the mergable followers. */
458 #define SG_ENT_VIRT_ADDRESS(SG) (page_address((SG)->page) + (SG)->offset)
459 #define SG_ENT_PHYS_ADDRESS(SG) __pa(SG_ENT_VIRT_ADDRESS(SG))
461 static void
462 sg_classify(struct scatterlist *sg, struct scatterlist *end, int virt_ok)
463 {
464 unsigned long next_paddr;
465 struct scatterlist *leader;
466 long leader_flag, leader_length;
468 leader = sg;
469 leader_flag = 0;
470 leader_length = leader->length;
471 next_paddr = SG_ENT_PHYS_ADDRESS(leader) + leader_length;
473 for (++sg; sg < end; ++sg) {
474 unsigned long addr, len;
475 addr = SG_ENT_PHYS_ADDRESS(sg);
476 len = sg->length;
478 if (next_paddr == addr) {
479 sg->dma_address = -1;
480 leader_length += len;
481 } else if (((next_paddr | addr) & ~PAGE_MASK) == 0 && virt_ok) {
482 sg->dma_address = -2;
483 leader_flag = 1;
484 leader_length += len;
485 } else {
486 leader->dma_address = leader_flag;
487 leader->dma_length = leader_length;
488 leader = sg;
489 leader_flag = 0;
490 leader_length = len;
491 }
493 next_paddr = addr + len;
494 }
496 leader->dma_address = leader_flag;
497 leader->dma_length = leader_length;
498 }
500 /* Given a scatterlist leader, choose an allocation method and fill
501 in the blanks. */
503 static int
504 sg_fill(struct scatterlist *leader, struct scatterlist *end,
505 struct scatterlist *out, struct pci_iommu_arena *arena,
506 dma_addr_t max_dma, int dac_allowed)
507 {
508 unsigned long paddr = SG_ENT_PHYS_ADDRESS(leader);
509 long size = leader->dma_length;
510 struct scatterlist *sg;
511 unsigned long *ptes;
512 long npages, dma_ofs, i;
514 #if !DEBUG_NODIRECT
515 /* If everything is physically contiguous, and the addresses
516 fall into the direct-map window, use it. */
517 if (leader->dma_address == 0
518 && paddr + size + __direct_map_base - 1 <= max_dma
519 && paddr + size <= __direct_map_size) {
520 out->dma_address = paddr + __direct_map_base;
521 out->dma_length = size;
523 DBGA(" sg_fill: [%p,%lx] -> direct %lx\n",
524 __va(paddr), size, out->dma_address);
526 return 0;
527 }
528 #endif
530 /* If physically contiguous and DAC is available, use it. */
531 if (leader->dma_address == 0 && dac_allowed) {
532 out->dma_address = paddr + alpha_mv.pci_dac_offset;
533 out->dma_length = size;
535 DBGA(" sg_fill: [%p,%lx] -> DAC %lx\n",
536 __va(paddr), size, out->dma_address);
538 return 0;
539 }
541 /* Otherwise, we'll use the iommu to make the pages virtually
542 contiguous. */
544 paddr &= ~PAGE_MASK;
545 npages = calc_npages(paddr + size);
546 dma_ofs = iommu_arena_alloc(arena, npages, 0);
547 if (dma_ofs < 0) {
548 /* If we attempted a direct map above but failed, die. */
549 if (leader->dma_address == 0)
550 return -1;
552 /* Otherwise, break up the remaining virtually contiguous
553 hunks into individual direct maps and retry. */
554 sg_classify(leader, end, 0);
555 return sg_fill(leader, end, out, arena, max_dma, dac_allowed);
556 }
558 out->dma_address = arena->dma_base + dma_ofs*PAGE_SIZE + paddr;
559 out->dma_length = size;
561 DBGA(" sg_fill: [%p,%lx] -> sg %lx np %ld\n",
562 __va(paddr), size, out->dma_address, npages);
564 /* All virtually contiguous. We need to find the length of each
565 physically contiguous subsegment to fill in the ptes. */
566 ptes = &arena->ptes[dma_ofs];
567 sg = leader;
568 do {
569 #if DEBUG_ALLOC > 0
570 struct scatterlist *last_sg = sg;
571 #endif
573 size = sg->length;
574 paddr = SG_ENT_PHYS_ADDRESS(sg);
576 while (sg+1 < end && (int) sg[1].dma_address == -1) {
577 size += sg[1].length;
578 sg++;
579 }
581 npages = calc_npages((paddr & ~PAGE_MASK) + size);
583 paddr &= PAGE_MASK;
584 for (i = 0; i < npages; ++i, paddr += PAGE_SIZE)
585 *ptes++ = mk_iommu_pte(paddr);
587 #if DEBUG_ALLOC > 0
588 DBGA(" (%ld) [%p,%x] np %ld\n",
589 last_sg - leader, SG_ENT_VIRT_ADDRESS(last_sg),
590 last_sg->length, npages);
591 while (++last_sg <= sg) {
592 DBGA(" (%ld) [%p,%x] cont\n",
593 last_sg - leader, SG_ENT_VIRT_ADDRESS(last_sg),
594 last_sg->length);
595 }
596 #endif
597 } while (++sg < end && (int) sg->dma_address < 0);
599 return 1;
600 }
602 int
603 pci_map_sg(struct pci_dev *pdev, struct scatterlist *sg, int nents,
604 int direction)
605 {
606 struct scatterlist *start, *end, *out;
607 struct pci_controller *hose;
608 struct pci_iommu_arena *arena;
609 dma_addr_t max_dma;
610 int dac_allowed;
612 if (direction == PCI_DMA_NONE)
613 BUG();
615 dac_allowed = pdev ? pci_dac_dma_supported(pdev, pdev->dma_mask) : 0;
617 /* Fast path single entry scatterlists. */
618 if (nents == 1) {
619 sg->dma_length = sg->length;
620 sg->dma_address
621 = pci_map_single_1(pdev, SG_ENT_VIRT_ADDRESS(sg),
622 sg->length, dac_allowed);
623 return sg->dma_address != 0;
624 }
626 start = sg;
627 end = sg + nents;
629 /* First, prepare information about the entries. */
630 sg_classify(sg, end, alpha_mv.mv_pci_tbi != 0);
632 /* Second, figure out where we're going to map things. */
633 if (alpha_mv.mv_pci_tbi) {
634 hose = pdev ? pdev->sysdata : pci_isa_hose;
635 max_dma = pdev ? pdev->dma_mask : ISA_DMA_MASK;
636 arena = hose->sg_pci;
637 if (!arena || arena->dma_base + arena->size - 1 > max_dma)
638 arena = hose->sg_isa;
639 } else {
640 max_dma = -1;
641 arena = NULL;
642 hose = NULL;
643 }
645 /* Third, iterate over the scatterlist leaders and allocate
646 dma space as needed. */
647 for (out = sg; sg < end; ++sg) {
648 if ((int) sg->dma_address < 0)
649 continue;
650 if (sg_fill(sg, end, out, arena, max_dma, dac_allowed) < 0)
651 goto error;
652 out++;
653 }
655 /* Mark the end of the list for pci_unmap_sg. */
656 if (out < end)
657 out->dma_length = 0;
659 if (out - start == 0)
660 printk(KERN_WARNING "pci_map_sg failed: no entries?\n");
661 DBGA("pci_map_sg: %ld entries\n", out - start);
663 return out - start;
665 error:
666 printk(KERN_WARNING "pci_map_sg failed: "
667 "could not allocate dma page tables\n");
669 /* Some allocation failed while mapping the scatterlist
670 entries. Unmap them now. */
671 if (out > start)
672 pci_unmap_sg(pdev, start, out - start, direction);
673 return 0;
674 }
676 /* Unmap a set of streaming mode DMA translations. Again, cpu read
677 rules concerning calls here are the same as for pci_unmap_single()
678 above. */
680 void
681 pci_unmap_sg(struct pci_dev *pdev, struct scatterlist *sg, int nents,
682 int direction)
683 {
684 unsigned long flags;
685 struct pci_controller *hose;
686 struct pci_iommu_arena *arena;
687 struct scatterlist *end;
688 dma_addr_t max_dma;
689 dma_addr_t fbeg, fend;
691 if (direction == PCI_DMA_NONE)
692 BUG();
694 if (! alpha_mv.mv_pci_tbi)
695 return;
697 hose = pdev ? pdev->sysdata : pci_isa_hose;
698 max_dma = pdev ? pdev->dma_mask : ISA_DMA_MASK;
699 arena = hose->sg_pci;
700 if (!arena || arena->dma_base + arena->size - 1 > max_dma)
701 arena = hose->sg_isa;
703 fbeg = -1, fend = 0;
705 spin_lock_irqsave(&arena->lock, flags);
707 for (end = sg + nents; sg < end; ++sg) {
708 dma64_addr_t addr;
709 size_t size;
710 long npages, ofs;
711 dma_addr_t tend;
713 addr = sg->dma_address;
714 size = sg->dma_length;
715 if (!size)
716 break;
718 if (addr > 0xffffffff) {
719 /* It's a DAC address -- nothing to do. */
720 DBGA(" (%ld) DAC [%lx,%lx]\n",
721 sg - end + nents, addr, size);
722 continue;
723 }
725 if (addr >= __direct_map_base
726 && addr < __direct_map_base + __direct_map_size) {
727 /* Nothing to do. */
728 DBGA(" (%ld) direct [%lx,%lx]\n",
729 sg - end + nents, addr, size);
730 continue;
731 }
733 DBGA(" (%ld) sg [%lx,%lx]\n",
734 sg - end + nents, addr, size);
736 npages = calc_npages((addr & ~PAGE_MASK) + size);
737 ofs = (addr - arena->dma_base) >> PAGE_SHIFT;
738 iommu_arena_free(arena, ofs, npages);
740 tend = addr + size - 1;
741 if (fbeg > addr) fbeg = addr;
742 if (fend < tend) fend = tend;
743 }
745 /* If we're freeing ptes above the `next_entry' pointer (they
746 may have snuck back into the TLB since the last wrap flush),
747 we need to flush the TLB before reallocating the latter. */
748 if ((fend - arena->dma_base) >> PAGE_SHIFT >= arena->next_entry)
749 alpha_mv.mv_pci_tbi(hose, fbeg, fend);
751 spin_unlock_irqrestore(&arena->lock, flags);
753 DBGA("pci_unmap_sg: %ld entries\n", nents - (end - sg));
754 }
757 /* Return whether the given PCI device DMA address mask can be
758 supported properly. */
760 int
761 pci_dma_supported(struct pci_dev *pdev, u64 mask)
762 {
763 struct pci_controller *hose;
764 struct pci_iommu_arena *arena;
766 /* If there exists a direct map, and the mask fits either
767 the entire direct mapped space or the total system memory as
768 shifted by the map base */
769 if (__direct_map_size != 0
770 && (__direct_map_base + __direct_map_size - 1 <= mask ||
771 __direct_map_base + (max_low_pfn << PAGE_SHIFT) - 1 <= mask))
772 return 1;
774 /* Check that we have a scatter-gather arena that fits. */
775 hose = pdev ? pdev->sysdata : pci_isa_hose;
776 arena = hose->sg_isa;
777 if (arena && arena->dma_base + arena->size - 1 <= mask)
778 return 1;
779 arena = hose->sg_pci;
780 if (arena && arena->dma_base + arena->size - 1 <= mask)
781 return 1;
783 /* As last resort try ZONE_DMA. */
784 if (!__direct_map_base && MAX_DMA_ADDRESS - IDENT_ADDR - 1 <= mask)
785 return 1;
787 return 0;
788 }
791 /*
792 * AGP GART extensions to the IOMMU
793 */
794 int
795 iommu_reserve(struct pci_iommu_arena *arena, long pg_count, long align_mask)
796 {
797 unsigned long flags;
798 unsigned long *ptes;
799 long i, p;
801 if (!arena) return -EINVAL;
803 spin_lock_irqsave(&arena->lock, flags);
805 /* Search for N empty ptes. */
806 ptes = arena->ptes;
807 p = iommu_arena_find_pages(arena, pg_count, align_mask);
808 if (p < 0) {
809 spin_unlock_irqrestore(&arena->lock, flags);
810 return -1;
811 }
813 /* Success. Mark them all reserved (ie not zero and invalid)
814 for the iommu tlb that could load them from under us.
815 They will be filled in with valid bits by _bind() */
816 for (i = 0; i < pg_count; ++i)
817 ptes[p+i] = IOMMU_RESERVED_PTE;
819 arena->next_entry = p + pg_count;
820 spin_unlock_irqrestore(&arena->lock, flags);
822 return p;
823 }
825 int
826 iommu_release(struct pci_iommu_arena *arena, long pg_start, long pg_count)
827 {
828 unsigned long *ptes;
829 long i;
831 if (!arena) return -EINVAL;
833 ptes = arena->ptes;
835 /* Make sure they're all reserved first... */
836 for(i = pg_start; i < pg_start + pg_count; i++)
837 if (ptes[i] != IOMMU_RESERVED_PTE)
838 return -EBUSY;
840 iommu_arena_free(arena, pg_start, pg_count);
841 return 0;
842 }
844 int
845 iommu_bind(struct pci_iommu_arena *arena, long pg_start, long pg_count,
846 unsigned long *physaddrs)
847 {
848 unsigned long flags;
849 unsigned long *ptes;
850 long i, j;
852 if (!arena) return -EINVAL;
854 spin_lock_irqsave(&arena->lock, flags);
856 ptes = arena->ptes;
858 for(j = pg_start; j < pg_start + pg_count; j++) {
859 if (ptes[j] != IOMMU_RESERVED_PTE) {
860 spin_unlock_irqrestore(&arena->lock, flags);
861 return -EBUSY;
862 }
863 }
865 for(i = 0, j = pg_start; i < pg_count; i++, j++)
866 ptes[j] = mk_iommu_pte(physaddrs[i]);
868 spin_unlock_irqrestore(&arena->lock, flags);
870 return 0;
871 }
873 int
874 iommu_unbind(struct pci_iommu_arena *arena, long pg_start, long pg_count)
875 {
876 unsigned long *p;
877 long i;
879 if (!arena) return -EINVAL;
881 p = arena->ptes + pg_start;
882 for(i = 0; i < pg_count; i++)
883 p[i] = IOMMU_RESERVED_PTE;
885 return 0;
886 }
888 /* True if the machine supports DAC addressing, and DEV can
889 make use of it given MASK. */
891 int
892 pci_dac_dma_supported(struct pci_dev *dev, u64 mask)
893 {
894 dma64_addr_t dac_offset = alpha_mv.pci_dac_offset;
895 int ok = 1;
897 /* If this is not set, the machine doesn't support DAC at all. */
898 if (dac_offset == 0)
899 ok = 0;
901 /* The device has to be able to address our DAC bit. */
902 if ((dac_offset & dev->dma_mask) != dac_offset)
903 ok = 0;
905 /* If both conditions above are met, we are fine. */
906 DBGA("pci_dac_dma_supported %s from %p\n",
907 ok ? "yes" : "no", __builtin_return_address(0));
909 return ok;
910 }
912 dma64_addr_t
913 pci_dac_page_to_dma(struct pci_dev *pdev, struct page *page,
914 unsigned long offset, int direction)
915 {
916 return (alpha_mv.pci_dac_offset
917 + __pa(page_address(page))
918 + (dma64_addr_t) offset);
919 }
921 struct page *
922 pci_dac_dma_to_page(struct pci_dev *pdev, dma64_addr_t dma_addr)
923 {
924 unsigned long paddr = (dma_addr & PAGE_MASK) - alpha_mv.pci_dac_offset;
925 return virt_to_page(__va(paddr));
926 }
928 unsigned long
929 pci_dac_dma_to_offset(struct pci_dev *pdev, dma64_addr_t dma_addr)
930 {
931 return (dma_addr & ~PAGE_MASK);
932 }
935 /* Helper for generic DMA-mapping functions. */
937 struct pci_dev *
938 alpha_gendev_to_pci(struct device *dev)
939 {
940 if (dev && dev->bus == &pci_bus_type)
941 return to_pci_dev(dev);
943 /* Assume that non-PCI devices asking for DMA are either ISA or EISA,
944 BUG() otherwise. */
945 BUG_ON(!isa_bridge);
947 /* Assume non-busmaster ISA DMA when dma_mask is not set (the ISA
948 bridge is bus master then). */
949 if (!dev || !dev->dma_mask || !*dev->dma_mask)
950 return isa_bridge;
952 /* For EISA bus masters, return isa_bridge (it might have smaller
953 dma_mask due to wiring limitations). */
954 if (*dev->dma_mask >= isa_bridge->dma_mask)
955 return isa_bridge;
957 /* This assumes ISA bus master with dma_mask 0xffffff. */
958 return NULL;
959 }
961 int
962 dma_set_mask(struct device *dev, u64 mask)
963 {
964 if (!dev->dma_mask ||
965 !pci_dma_supported(alpha_gendev_to_pci(dev), mask))
966 return -EIO;
968 *dev->dma_mask = mask;
970 return 0;
971 }