ia64/xen-unstable

view linux-2.6-xen-sparse/mm/highmem.c @ 7446:18eb059ae471

New network-bridge script and associated gubbins.

This is Kurt Garloff's reworked network-bridge script:

* we got rid of ifconfig
* it works for netdev != eth0
* arp on and off are symmetric as are ifdown and ifup
* ifup will be passed the ifcfg config file name if needed
(the ifup may otherwise figure that the veth0 hardware is
NOT the same as the original ${netdev} and not use the same
config -- this happens on SUSE. Charles Coffing tracked this
one down.)

Plus Kurt's avoid-dash patch:

the network setup scripts on SUSE have trouble with the bridge
name xen-br0; they don't expect the '-'.
Arguably this should be fixed.
But I assume there's more scripts out there which may not like it,
so I suggest the following patch to rename xen-br0 to xenbr0.

Plus Charles Duffy's patch to support multiple bridges:

The attached patch allows the network-bridge script to be used to
generate multiple bridges corresponding to different physical
interfaces. It adds a new parameter, "vifnum", used to refer both to
the loopback interface to be used and to set defaults regarding the
physical interface and bridge name.

Thus, if one wishes to start xenbr0 on eth0 and xenbr1 on eth1, one
need only call:

network-bridge start ## vifnum is 0 by default
network-bridge start vifnum=1

...well, that and set loopback.nloopbacks=2 in the Dom0 kernel
parameters.

Plus renaming of virtnum to vifnum in Charles' patch, as requested by Ian Pratt.

Plus a fix to DevController to allocate vif IDs starting from 0 (i.e. vif2.0
is now domain 2's first vif, as opposed to vif2.1 in the recent past).

Plus tidying up inside network-bridge using some helper variables.

Signed-off-by: Ewan Mellor <ewan@xensource.com>
author emellor@leeni.uk.xensource.com
date Wed Oct 19 16:24:54 2005 +0100 (2005-10-19)
parents 06d84bf87159
children fd9b2c1bb577
line source
1 /*
2 * High memory handling common code and variables.
3 *
4 * (C) 1999 Andrea Arcangeli, SuSE GmbH, andrea@suse.de
5 * Gerhard Wichert, Siemens AG, Gerhard.Wichert@pdb.siemens.de
6 *
7 *
8 * Redesigned the x86 32-bit VM architecture to deal with
9 * 64-bit physical space. With current x86 CPUs this
10 * means up to 64 Gigabytes physical RAM.
11 *
12 * Rewrote high memory support to move the page cache into
13 * high memory. Implemented permanent (schedulable) kmaps
14 * based on Linus' idea.
15 *
16 * Copyright (C) 1999 Ingo Molnar <mingo@redhat.com>
17 */
19 #include <linux/mm.h>
20 #include <linux/module.h>
21 #include <linux/swap.h>
22 #include <linux/bio.h>
23 #include <linux/pagemap.h>
24 #include <linux/mempool.h>
25 #include <linux/blkdev.h>
26 #include <linux/init.h>
27 #include <linux/hash.h>
28 #include <linux/highmem.h>
29 #include <asm/tlbflush.h>
31 static mempool_t *page_pool, *isa_page_pool;
33 static void *page_pool_alloc(unsigned int __nocast gfp_mask, void *data)
34 {
35 unsigned int gfp = gfp_mask | (unsigned int) (long) data;
37 return alloc_page(gfp);
38 }
40 static void page_pool_free(void *page, void *data)
41 {
42 __free_page(page);
43 }
45 /*
46 * Virtual_count is not a pure "count".
47 * 0 means that it is not mapped, and has not been mapped
48 * since a TLB flush - it is usable.
49 * 1 means that there are no users, but it has been mapped
50 * since the last TLB flush - so we can't use it.
51 * n means that there are (n-1) current users of it.
52 */
53 #ifdef CONFIG_HIGHMEM
54 static int pkmap_count[LAST_PKMAP];
55 static unsigned int last_pkmap_nr;
56 static __cacheline_aligned_in_smp DEFINE_SPINLOCK(kmap_lock);
58 pte_t * pkmap_page_table;
60 static DECLARE_WAIT_QUEUE_HEAD(pkmap_map_wait);
62 static void flush_all_zero_pkmaps(void)
63 {
64 int i;
66 flush_cache_kmaps();
68 for (i = 0; i < LAST_PKMAP; i++) {
69 struct page *page;
71 /*
72 * zero means we don't have anything to do,
73 * >1 means that it is still in use. Only
74 * a count of 1 means that it is free but
75 * needs to be unmapped
76 */
77 if (pkmap_count[i] != 1)
78 continue;
79 pkmap_count[i] = 0;
81 /* sanity check */
82 if (pte_none(pkmap_page_table[i]))
83 BUG();
85 /*
86 * Don't need an atomic fetch-and-clear op here;
87 * no-one has the page mapped, and cannot get at
88 * its virtual address (and hence PTE) without first
89 * getting the kmap_lock (which is held here).
90 * So no dangers, even with speculative execution.
91 */
92 page = pte_page(pkmap_page_table[i]);
93 pte_clear(&init_mm, (unsigned long)page_address(page),
94 &pkmap_page_table[i]);
96 set_page_address(page, NULL);
97 }
98 flush_tlb_kernel_range(PKMAP_ADDR(0), PKMAP_ADDR(LAST_PKMAP));
99 }
101 static inline unsigned long map_new_virtual(struct page *page)
102 {
103 unsigned long vaddr;
104 int count;
106 start:
107 count = LAST_PKMAP;
108 /* Find an empty entry */
109 for (;;) {
110 last_pkmap_nr = (last_pkmap_nr + 1) & LAST_PKMAP_MASK;
111 if (!last_pkmap_nr) {
112 flush_all_zero_pkmaps();
113 count = LAST_PKMAP;
114 }
115 if (!pkmap_count[last_pkmap_nr])
116 break; /* Found a usable entry */
117 if (--count)
118 continue;
120 /*
121 * Sleep for somebody else to unmap their entries
122 */
123 {
124 DECLARE_WAITQUEUE(wait, current);
126 __set_current_state(TASK_UNINTERRUPTIBLE);
127 add_wait_queue(&pkmap_map_wait, &wait);
128 spin_unlock(&kmap_lock);
129 schedule();
130 remove_wait_queue(&pkmap_map_wait, &wait);
131 spin_lock(&kmap_lock);
133 /* Somebody else might have mapped it while we slept */
134 if (page_address(page))
135 return (unsigned long)page_address(page);
137 /* Re-start */
138 goto start;
139 }
140 }
141 vaddr = PKMAP_ADDR(last_pkmap_nr);
142 set_pte_at(&init_mm, vaddr,
143 &(pkmap_page_table[last_pkmap_nr]), mk_pte(page, kmap_prot));
145 pkmap_count[last_pkmap_nr] = 1;
146 set_page_address(page, (void *)vaddr);
148 return vaddr;
149 }
151 void kmap_flush_unused(void)
152 {
153 spin_lock(&kmap_lock);
154 flush_all_zero_pkmaps();
155 spin_unlock(&kmap_lock);
156 }
158 EXPORT_SYMBOL(kmap_flush_unused);
160 void fastcall *kmap_high(struct page *page)
161 {
162 unsigned long vaddr;
164 /*
165 * For highmem pages, we can't trust "virtual" until
166 * after we have the lock.
167 *
168 * We cannot call this from interrupts, as it may block
169 */
170 spin_lock(&kmap_lock);
171 vaddr = (unsigned long)page_address(page);
172 if (!vaddr)
173 vaddr = map_new_virtual(page);
174 pkmap_count[PKMAP_NR(vaddr)]++;
175 if (pkmap_count[PKMAP_NR(vaddr)] < 2)
176 BUG();
177 spin_unlock(&kmap_lock);
178 return (void*) vaddr;
179 }
181 EXPORT_SYMBOL(kmap_high);
183 void fastcall kunmap_high(struct page *page)
184 {
185 unsigned long vaddr;
186 unsigned long nr;
187 int need_wakeup;
189 spin_lock(&kmap_lock);
190 vaddr = (unsigned long)page_address(page);
191 if (!vaddr)
192 BUG();
193 nr = PKMAP_NR(vaddr);
195 /*
196 * A count must never go down to zero
197 * without a TLB flush!
198 */
199 need_wakeup = 0;
200 switch (--pkmap_count[nr]) {
201 case 0:
202 BUG();
203 case 1:
204 /*
205 * Avoid an unnecessary wake_up() function call.
206 * The common case is pkmap_count[] == 1, but
207 * no waiters.
208 * The tasks queued in the wait-queue are guarded
209 * by both the lock in the wait-queue-head and by
210 * the kmap_lock. As the kmap_lock is held here,
211 * no need for the wait-queue-head's lock. Simply
212 * test if the queue is empty.
213 */
214 need_wakeup = waitqueue_active(&pkmap_map_wait);
215 }
216 spin_unlock(&kmap_lock);
218 /* do wake-up, if needed, race-free outside of the spin lock */
219 if (need_wakeup)
220 wake_up(&pkmap_map_wait);
221 }
223 EXPORT_SYMBOL(kunmap_high);
225 #define POOL_SIZE 64
227 static __init int init_emergency_pool(void)
228 {
229 struct sysinfo i;
230 si_meminfo(&i);
231 si_swapinfo(&i);
233 if (!i.totalhigh)
234 return 0;
236 page_pool = mempool_create(POOL_SIZE, page_pool_alloc, page_pool_free, NULL);
237 if (!page_pool)
238 BUG();
239 printk("highmem bounce pool size: %d pages\n", POOL_SIZE);
241 return 0;
242 }
244 __initcall(init_emergency_pool);
246 /*
247 * highmem version, map in to vec
248 */
249 static void bounce_copy_vec(struct bio_vec *to, unsigned char *vfrom)
250 {
251 unsigned long flags;
252 unsigned char *vto;
254 local_irq_save(flags);
255 vto = kmap_atomic(to->bv_page, KM_BOUNCE_READ);
256 memcpy(vto + to->bv_offset, vfrom, to->bv_len);
257 kunmap_atomic(vto, KM_BOUNCE_READ);
258 local_irq_restore(flags);
259 }
261 #else /* CONFIG_HIGHMEM */
263 #define bounce_copy_vec(to, vfrom) \
264 memcpy(page_address((to)->bv_page) + (to)->bv_offset, vfrom, (to)->bv_len)
266 #endif
268 #define ISA_POOL_SIZE 16
270 /*
271 * gets called "every" time someone init's a queue with BLK_BOUNCE_ISA
272 * as the max address, so check if the pool has already been created.
273 */
274 int init_emergency_isa_pool(void)
275 {
276 if (isa_page_pool)
277 return 0;
279 isa_page_pool = mempool_create(ISA_POOL_SIZE, page_pool_alloc, page_pool_free, (void *) __GFP_DMA);
280 if (!isa_page_pool)
281 BUG();
283 printk("isa bounce pool size: %d pages\n", ISA_POOL_SIZE);
284 return 0;
285 }
287 /*
288 * Simple bounce buffer support for highmem pages. Depending on the
289 * queue gfp mask set, *to may or may not be a highmem page. kmap it
290 * always, it will do the Right Thing
291 */
292 static void copy_to_high_bio_irq(struct bio *to, struct bio *from)
293 {
294 unsigned char *vfrom;
295 struct bio_vec *tovec, *fromvec;
296 int i;
298 __bio_for_each_segment(tovec, to, i, 0) {
299 fromvec = from->bi_io_vec + i;
301 /*
302 * not bounced
303 */
304 if (tovec->bv_page == fromvec->bv_page)
305 continue;
307 /*
308 * fromvec->bv_offset and fromvec->bv_len might have been
309 * modified by the block layer, so use the original copy,
310 * bounce_copy_vec already uses tovec->bv_len
311 */
312 vfrom = page_address(fromvec->bv_page) + tovec->bv_offset;
314 flush_dcache_page(tovec->bv_page);
315 bounce_copy_vec(tovec, vfrom);
316 }
317 }
319 static void bounce_end_io(struct bio *bio, mempool_t *pool, int err)
320 {
321 struct bio *bio_orig = bio->bi_private;
322 struct bio_vec *bvec, *org_vec;
323 int i;
325 if (test_bit(BIO_EOPNOTSUPP, &bio->bi_flags))
326 set_bit(BIO_EOPNOTSUPP, &bio_orig->bi_flags);
328 /*
329 * free up bounce indirect pages used
330 */
331 __bio_for_each_segment(bvec, bio, i, 0) {
332 org_vec = bio_orig->bi_io_vec + i;
333 if (bvec->bv_page == org_vec->bv_page)
334 continue;
336 mempool_free(bvec->bv_page, pool);
337 dec_page_state(nr_bounce);
338 }
340 bio_endio(bio_orig, bio_orig->bi_size, err);
341 bio_put(bio);
342 }
344 static int bounce_end_io_write(struct bio *bio, unsigned int bytes_done,int err)
345 {
346 if (bio->bi_size)
347 return 1;
349 bounce_end_io(bio, page_pool, err);
350 return 0;
351 }
353 static int bounce_end_io_write_isa(struct bio *bio, unsigned int bytes_done, int err)
354 {
355 if (bio->bi_size)
356 return 1;
358 bounce_end_io(bio, isa_page_pool, err);
359 return 0;
360 }
362 static void __bounce_end_io_read(struct bio *bio, mempool_t *pool, int err)
363 {
364 struct bio *bio_orig = bio->bi_private;
366 if (test_bit(BIO_UPTODATE, &bio->bi_flags))
367 copy_to_high_bio_irq(bio_orig, bio);
369 bounce_end_io(bio, pool, err);
370 }
372 static int bounce_end_io_read(struct bio *bio, unsigned int bytes_done, int err)
373 {
374 if (bio->bi_size)
375 return 1;
377 __bounce_end_io_read(bio, page_pool, err);
378 return 0;
379 }
381 static int bounce_end_io_read_isa(struct bio *bio, unsigned int bytes_done, int err)
382 {
383 if (bio->bi_size)
384 return 1;
386 __bounce_end_io_read(bio, isa_page_pool, err);
387 return 0;
388 }
390 static void __blk_queue_bounce(request_queue_t *q, struct bio **bio_orig,
391 mempool_t *pool)
392 {
393 struct page *page;
394 struct bio *bio = NULL;
395 int i, rw = bio_data_dir(*bio_orig);
396 struct bio_vec *to, *from;
398 bio_for_each_segment(from, *bio_orig, i) {
399 page = from->bv_page;
401 /*
402 * is destination page below bounce pfn?
403 */
404 if (page_to_pfn(page) < q->bounce_pfn)
405 continue;
407 /*
408 * irk, bounce it
409 */
410 if (!bio)
411 bio = bio_alloc(GFP_NOIO, (*bio_orig)->bi_vcnt);
413 to = bio->bi_io_vec + i;
415 to->bv_page = mempool_alloc(pool, q->bounce_gfp);
416 to->bv_len = from->bv_len;
417 to->bv_offset = from->bv_offset;
418 inc_page_state(nr_bounce);
420 if (rw == WRITE) {
421 char *vto, *vfrom;
423 flush_dcache_page(from->bv_page);
424 vto = page_address(to->bv_page) + to->bv_offset;
425 vfrom = kmap(from->bv_page) + from->bv_offset;
426 memcpy(vto, vfrom, to->bv_len);
427 kunmap(from->bv_page);
428 }
429 }
431 /*
432 * no pages bounced
433 */
434 if (!bio)
435 return;
437 /*
438 * at least one page was bounced, fill in possible non-highmem
439 * pages
440 */
441 __bio_for_each_segment(from, *bio_orig, i, 0) {
442 to = bio_iovec_idx(bio, i);
443 if (!to->bv_page) {
444 to->bv_page = from->bv_page;
445 to->bv_len = from->bv_len;
446 to->bv_offset = from->bv_offset;
447 }
448 }
450 bio->bi_bdev = (*bio_orig)->bi_bdev;
451 bio->bi_flags |= (1 << BIO_BOUNCED);
452 bio->bi_sector = (*bio_orig)->bi_sector;
453 bio->bi_rw = (*bio_orig)->bi_rw;
455 bio->bi_vcnt = (*bio_orig)->bi_vcnt;
456 bio->bi_idx = (*bio_orig)->bi_idx;
457 bio->bi_size = (*bio_orig)->bi_size;
459 if (pool == page_pool) {
460 bio->bi_end_io = bounce_end_io_write;
461 if (rw == READ)
462 bio->bi_end_io = bounce_end_io_read;
463 } else {
464 bio->bi_end_io = bounce_end_io_write_isa;
465 if (rw == READ)
466 bio->bi_end_io = bounce_end_io_read_isa;
467 }
469 bio->bi_private = *bio_orig;
470 *bio_orig = bio;
471 }
473 void blk_queue_bounce(request_queue_t *q, struct bio **bio_orig)
474 {
475 mempool_t *pool;
477 /*
478 * for non-isa bounce case, just check if the bounce pfn is equal
479 * to or bigger than the highest pfn in the system -- in that case,
480 * don't waste time iterating over bio segments
481 */
482 if (!(q->bounce_gfp & GFP_DMA)) {
483 if (q->bounce_pfn >= blk_max_pfn)
484 return;
485 pool = page_pool;
486 } else {
487 BUG_ON(!isa_page_pool);
488 pool = isa_page_pool;
489 }
491 /*
492 * slow path
493 */
494 __blk_queue_bounce(q, bio_orig, pool);
495 }
497 EXPORT_SYMBOL(blk_queue_bounce);
499 #if defined(HASHED_PAGE_VIRTUAL)
501 #define PA_HASH_ORDER 7
503 /*
504 * Describes one page->virtual association
505 */
506 struct page_address_map {
507 struct page *page;
508 void *virtual;
509 struct list_head list;
510 };
512 /*
513 * page_address_map freelist, allocated from page_address_maps.
514 */
515 static struct list_head page_address_pool; /* freelist */
516 static spinlock_t pool_lock; /* protects page_address_pool */
518 /*
519 * Hash table bucket
520 */
521 static struct page_address_slot {
522 struct list_head lh; /* List of page_address_maps */
523 spinlock_t lock; /* Protect this bucket's list */
524 } ____cacheline_aligned_in_smp page_address_htable[1<<PA_HASH_ORDER];
526 static struct page_address_slot *page_slot(struct page *page)
527 {
528 return &page_address_htable[hash_ptr(page, PA_HASH_ORDER)];
529 }
531 void *page_address(struct page *page)
532 {
533 unsigned long flags;
534 void *ret;
535 struct page_address_slot *pas;
537 if (!PageHighMem(page))
538 return lowmem_page_address(page);
540 pas = page_slot(page);
541 ret = NULL;
542 spin_lock_irqsave(&pas->lock, flags);
543 if (!list_empty(&pas->lh)) {
544 struct page_address_map *pam;
546 list_for_each_entry(pam, &pas->lh, list) {
547 if (pam->page == page) {
548 ret = pam->virtual;
549 goto done;
550 }
551 }
552 }
553 done:
554 spin_unlock_irqrestore(&pas->lock, flags);
555 return ret;
556 }
558 EXPORT_SYMBOL(page_address);
560 void set_page_address(struct page *page, void *virtual)
561 {
562 unsigned long flags;
563 struct page_address_slot *pas;
564 struct page_address_map *pam;
566 BUG_ON(!PageHighMem(page));
568 pas = page_slot(page);
569 if (virtual) { /* Add */
570 BUG_ON(list_empty(&page_address_pool));
572 spin_lock_irqsave(&pool_lock, flags);
573 pam = list_entry(page_address_pool.next,
574 struct page_address_map, list);
575 list_del(&pam->list);
576 spin_unlock_irqrestore(&pool_lock, flags);
578 pam->page = page;
579 pam->virtual = virtual;
581 spin_lock_irqsave(&pas->lock, flags);
582 list_add_tail(&pam->list, &pas->lh);
583 spin_unlock_irqrestore(&pas->lock, flags);
584 } else { /* Remove */
585 spin_lock_irqsave(&pas->lock, flags);
586 list_for_each_entry(pam, &pas->lh, list) {
587 if (pam->page == page) {
588 list_del(&pam->list);
589 spin_unlock_irqrestore(&pas->lock, flags);
590 spin_lock_irqsave(&pool_lock, flags);
591 list_add_tail(&pam->list, &page_address_pool);
592 spin_unlock_irqrestore(&pool_lock, flags);
593 goto done;
594 }
595 }
596 spin_unlock_irqrestore(&pas->lock, flags);
597 }
598 done:
599 return;
600 }
602 static struct page_address_map page_address_maps[LAST_PKMAP];
604 void __init page_address_init(void)
605 {
606 int i;
608 INIT_LIST_HEAD(&page_address_pool);
609 for (i = 0; i < ARRAY_SIZE(page_address_maps); i++)
610 list_add(&page_address_maps[i].list, &page_address_pool);
611 for (i = 0; i < ARRAY_SIZE(page_address_htable); i++) {
612 INIT_LIST_HEAD(&page_address_htable[i].lh);
613 spin_lock_init(&page_address_htable[i].lock);
614 }
615 spin_lock_init(&pool_lock);
616 }
618 #endif /* defined(CONFIG_HIGHMEM) && !defined(WANT_PAGE_VIRTUAL) */