ia64/linux-2.6.18-xen.hg

view drivers/md/bitmap.c @ 897:329ea0ccb344

balloon: try harder to balloon up under memory pressure.

Currently if the balloon driver is unable to increase the guest's
reservation it assumes the failure was due to reaching its full
allocation, gives up on the ballooning operation and records the limit
it reached as the "hard limit". The driver will not try again until
the target is set again (even to the same value).

However it is possible that ballooning has in fact failed due to
memory pressure in the host and therefore it is desirable to keep
attempting to reach the target in case memory becomes available. The
most likely scenario is that some guests are ballooning down while
others are ballooning up and therefore there is temporary memory
pressure while things stabilise. You would not expect a well behaved
toolstack to ask a domain to balloon to more than its allocation nor
would you expect it to deliberately over-commit memory by setting
balloon targets which exceed the total host memory.

This patch drops the concept of a hard limit and causes the balloon
driver to retry increasing the reservation on a timer in the same
manner as when decreasing the reservation.

Also if we partially succeed in increasing the reservation
(i.e. receive less pages than we asked for) then we may as well keep
those pages rather than returning them to Xen.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Fri Jun 05 14:01:20 2009 +0100 (2009-06-05)
parents 831230e53067
children
line source
1 /*
2 * bitmap.c two-level bitmap (C) Peter T. Breuer (ptb@ot.uc3m.es) 2003
3 *
4 * bitmap_create - sets up the bitmap structure
5 * bitmap_destroy - destroys the bitmap structure
6 *
7 * additions, Copyright (C) 2003-2004, Paul Clements, SteelEye Technology, Inc.:
8 * - added disk storage for bitmap
9 * - changes to allow various bitmap chunk sizes
10 */
12 /*
13 * Still to do:
14 *
15 * flush after percent set rather than just time based. (maybe both).
16 * wait if count gets too high, wake when it drops to half.
17 */
19 #include <linux/module.h>
20 #include <linux/errno.h>
21 #include <linux/slab.h>
22 #include <linux/init.h>
23 #include <linux/timer.h>
24 #include <linux/sched.h>
25 #include <linux/list.h>
26 #include <linux/file.h>
27 #include <linux/mount.h>
28 #include <linux/buffer_head.h>
29 #include <linux/raid/md.h>
30 #include <linux/raid/bitmap.h>
32 /* debug macros */
34 #define DEBUG 0
36 #if DEBUG
37 /* these are for debugging purposes only! */
39 /* define one and only one of these */
40 #define INJECT_FAULTS_1 0 /* cause bitmap_alloc_page to fail always */
41 #define INJECT_FAULTS_2 0 /* cause bitmap file to be kicked when first bit set*/
42 #define INJECT_FAULTS_3 0 /* treat bitmap file as kicked at init time */
43 #define INJECT_FAULTS_4 0 /* undef */
44 #define INJECT_FAULTS_5 0 /* undef */
45 #define INJECT_FAULTS_6 0
47 /* if these are defined, the driver will fail! debug only */
48 #define INJECT_FATAL_FAULT_1 0 /* fail kmalloc, causing bitmap_create to fail */
49 #define INJECT_FATAL_FAULT_2 0 /* undef */
50 #define INJECT_FATAL_FAULT_3 0 /* undef */
51 #endif
53 //#define DPRINTK PRINTK /* set this NULL to avoid verbose debug output */
54 #define DPRINTK(x...) do { } while(0)
56 #ifndef PRINTK
57 # if DEBUG > 0
58 # define PRINTK(x...) printk(KERN_DEBUG x)
59 # else
60 # define PRINTK(x...)
61 # endif
62 #endif
64 static inline char * bmname(struct bitmap *bitmap)
65 {
66 return bitmap->mddev ? mdname(bitmap->mddev) : "mdX";
67 }
70 /*
71 * just a placeholder - calls kmalloc for bitmap pages
72 */
73 static unsigned char *bitmap_alloc_page(struct bitmap *bitmap)
74 {
75 unsigned char *page;
77 #ifdef INJECT_FAULTS_1
78 page = NULL;
79 #else
80 page = kmalloc(PAGE_SIZE, GFP_NOIO);
81 #endif
82 if (!page)
83 printk("%s: bitmap_alloc_page FAILED\n", bmname(bitmap));
84 else
85 PRINTK("%s: bitmap_alloc_page: allocated page at %p\n",
86 bmname(bitmap), page);
87 return page;
88 }
90 /*
91 * for now just a placeholder -- just calls kfree for bitmap pages
92 */
93 static void bitmap_free_page(struct bitmap *bitmap, unsigned char *page)
94 {
95 PRINTK("%s: bitmap_free_page: free page %p\n", bmname(bitmap), page);
96 kfree(page);
97 }
99 /*
100 * check a page and, if necessary, allocate it (or hijack it if the alloc fails)
101 *
102 * 1) check to see if this page is allocated, if it's not then try to alloc
103 * 2) if the alloc fails, set the page's hijacked flag so we'll use the
104 * page pointer directly as a counter
105 *
106 * if we find our page, we increment the page's refcount so that it stays
107 * allocated while we're using it
108 */
109 static int bitmap_checkpage(struct bitmap *bitmap, unsigned long page, int create)
110 {
111 unsigned char *mappage;
113 if (page >= bitmap->pages) {
114 printk(KERN_ALERT
115 "%s: invalid bitmap page request: %lu (> %lu)\n",
116 bmname(bitmap), page, bitmap->pages-1);
117 return -EINVAL;
118 }
121 if (bitmap->bp[page].hijacked) /* it's hijacked, don't try to alloc */
122 return 0;
124 if (bitmap->bp[page].map) /* page is already allocated, just return */
125 return 0;
127 if (!create)
128 return -ENOENT;
130 spin_unlock_irq(&bitmap->lock);
132 /* this page has not been allocated yet */
134 if ((mappage = bitmap_alloc_page(bitmap)) == NULL) {
135 PRINTK("%s: bitmap map page allocation failed, hijacking\n",
136 bmname(bitmap));
137 /* failed - set the hijacked flag so that we can use the
138 * pointer as a counter */
139 spin_lock_irq(&bitmap->lock);
140 if (!bitmap->bp[page].map)
141 bitmap->bp[page].hijacked = 1;
142 goto out;
143 }
145 /* got a page */
147 spin_lock_irq(&bitmap->lock);
149 /* recheck the page */
151 if (bitmap->bp[page].map || bitmap->bp[page].hijacked) {
152 /* somebody beat us to getting the page */
153 bitmap_free_page(bitmap, mappage);
154 return 0;
155 }
157 /* no page was in place and we have one, so install it */
159 memset(mappage, 0, PAGE_SIZE);
160 bitmap->bp[page].map = mappage;
161 bitmap->missing_pages--;
162 out:
163 return 0;
164 }
167 /* if page is completely empty, put it back on the free list, or dealloc it */
168 /* if page was hijacked, unmark the flag so it might get alloced next time */
169 /* Note: lock should be held when calling this */
170 static void bitmap_checkfree(struct bitmap *bitmap, unsigned long page)
171 {
172 char *ptr;
174 if (bitmap->bp[page].count) /* page is still busy */
175 return;
177 /* page is no longer in use, it can be released */
179 if (bitmap->bp[page].hijacked) { /* page was hijacked, undo this now */
180 bitmap->bp[page].hijacked = 0;
181 bitmap->bp[page].map = NULL;
182 return;
183 }
185 /* normal case, free the page */
187 #if 0
188 /* actually ... let's not. We will probably need the page again exactly when
189 * memory is tight and we are flusing to disk
190 */
191 return;
192 #else
193 ptr = bitmap->bp[page].map;
194 bitmap->bp[page].map = NULL;
195 bitmap->missing_pages++;
196 bitmap_free_page(bitmap, ptr);
197 return;
198 #endif
199 }
202 /*
203 * bitmap file handling - read and write the bitmap file and its superblock
204 */
206 /* copy the pathname of a file to a buffer */
207 char *file_path(struct file *file, char *buf, int count)
208 {
209 struct dentry *d;
210 struct vfsmount *v;
212 if (!buf)
213 return NULL;
215 d = file->f_dentry;
216 v = file->f_vfsmnt;
218 buf = d_path(d, v, buf, count);
220 return IS_ERR(buf) ? NULL : buf;
221 }
223 /*
224 * basic page I/O operations
225 */
227 /* IO operations when bitmap is stored near all superblocks */
228 static struct page *read_sb_page(mddev_t *mddev, long offset, unsigned long index)
229 {
230 /* choose a good rdev and read the page from there */
232 mdk_rdev_t *rdev;
233 struct list_head *tmp;
234 struct page *page = alloc_page(GFP_KERNEL);
235 sector_t target;
237 if (!page)
238 return ERR_PTR(-ENOMEM);
240 ITERATE_RDEV(mddev, rdev, tmp) {
241 if (! test_bit(In_sync, &rdev->flags)
242 || test_bit(Faulty, &rdev->flags))
243 continue;
245 target = (rdev->sb_offset << 1) + offset + index * (PAGE_SIZE/512);
247 if (sync_page_io(rdev->bdev, target, PAGE_SIZE, page, READ)) {
248 page->index = index;
249 attach_page_buffers(page, NULL); /* so that free_buffer will
250 * quietly no-op */
251 return page;
252 }
253 }
254 return ERR_PTR(-EIO);
256 }
258 static int write_sb_page(mddev_t *mddev, long offset, struct page *page, int wait)
259 {
260 mdk_rdev_t *rdev;
261 struct list_head *tmp;
263 ITERATE_RDEV(mddev, rdev, tmp)
264 if (test_bit(In_sync, &rdev->flags)
265 && !test_bit(Faulty, &rdev->flags))
266 md_super_write(mddev, rdev,
267 (rdev->sb_offset<<1) + offset
268 + page->index * (PAGE_SIZE/512),
269 PAGE_SIZE,
270 page);
272 if (wait)
273 md_super_wait(mddev);
274 return 0;
275 }
277 /*
278 * write out a page to a file
279 */
280 static int write_page(struct bitmap *bitmap, struct page *page, int wait)
281 {
282 struct buffer_head *bh;
284 if (bitmap->file == NULL)
285 return write_sb_page(bitmap->mddev, bitmap->offset, page, wait);
287 bh = page_buffers(page);
289 while (bh && bh->b_blocknr) {
290 atomic_inc(&bitmap->pending_writes);
291 set_buffer_locked(bh);
292 set_buffer_mapped(bh);
293 submit_bh(WRITE, bh);
294 bh = bh->b_this_page;
295 }
297 if (wait) {
298 wait_event(bitmap->write_wait,
299 atomic_read(&bitmap->pending_writes)==0);
300 return (bitmap->flags & BITMAP_WRITE_ERROR) ? -EIO : 0;
301 }
302 return 0;
303 }
305 static void end_bitmap_write(struct buffer_head *bh, int uptodate)
306 {
307 struct bitmap *bitmap = bh->b_private;
308 unsigned long flags;
310 if (!uptodate) {
311 spin_lock_irqsave(&bitmap->lock, flags);
312 bitmap->flags |= BITMAP_WRITE_ERROR;
313 spin_unlock_irqrestore(&bitmap->lock, flags);
314 }
315 if (atomic_dec_and_test(&bitmap->pending_writes))
316 wake_up(&bitmap->write_wait);
317 }
319 /* copied from buffer.c */
320 static void
321 __clear_page_buffers(struct page *page)
322 {
323 ClearPagePrivate(page);
324 set_page_private(page, 0);
325 page_cache_release(page);
326 }
327 static void free_buffers(struct page *page)
328 {
329 struct buffer_head *bh = page_buffers(page);
331 while (bh) {
332 struct buffer_head *next = bh->b_this_page;
333 free_buffer_head(bh);
334 bh = next;
335 }
336 __clear_page_buffers(page);
337 put_page(page);
338 }
340 /* read a page from a file.
341 * We both read the page, and attach buffers to the page to record the
342 * address of each block (using bmap). These addresses will be used
343 * to write the block later, completely bypassing the filesystem.
344 * This usage is similar to how swap files are handled, and allows us
345 * to write to a file with no concerns of memory allocation failing.
346 */
347 static struct page *read_page(struct file *file, unsigned long index,
348 struct bitmap *bitmap,
349 unsigned long count)
350 {
351 struct page *page = NULL;
352 struct inode *inode = file->f_dentry->d_inode;
353 struct buffer_head *bh;
354 sector_t block;
356 PRINTK("read bitmap file (%dB @ %Lu)\n", (int)PAGE_SIZE,
357 (unsigned long long)index << PAGE_SHIFT);
359 page = alloc_page(GFP_KERNEL);
360 if (!page)
361 page = ERR_PTR(-ENOMEM);
362 if (IS_ERR(page))
363 goto out;
365 bh = alloc_page_buffers(page, 1<<inode->i_blkbits, 0);
366 if (!bh) {
367 put_page(page);
368 page = ERR_PTR(-ENOMEM);
369 goto out;
370 }
371 attach_page_buffers(page, bh);
372 block = index << (PAGE_SHIFT - inode->i_blkbits);
373 while (bh) {
374 if (count == 0)
375 bh->b_blocknr = 0;
376 else {
377 bh->b_blocknr = bmap(inode, block);
378 if (bh->b_blocknr == 0) {
379 /* Cannot use this file! */
380 free_buffers(page);
381 page = ERR_PTR(-EINVAL);
382 goto out;
383 }
384 bh->b_bdev = inode->i_sb->s_bdev;
385 if (count < (1<<inode->i_blkbits))
386 count = 0;
387 else
388 count -= (1<<inode->i_blkbits);
390 bh->b_end_io = end_bitmap_write;
391 bh->b_private = bitmap;
392 atomic_inc(&bitmap->pending_writes);
393 set_buffer_locked(bh);
394 set_buffer_mapped(bh);
395 submit_bh(READ, bh);
396 }
397 block++;
398 bh = bh->b_this_page;
399 }
400 page->index = index;
402 wait_event(bitmap->write_wait,
403 atomic_read(&bitmap->pending_writes)==0);
404 if (bitmap->flags & BITMAP_WRITE_ERROR) {
405 free_buffers(page);
406 page = ERR_PTR(-EIO);
407 }
408 out:
409 if (IS_ERR(page))
410 printk(KERN_ALERT "md: bitmap read error: (%dB @ %Lu): %ld\n",
411 (int)PAGE_SIZE,
412 (unsigned long long)index << PAGE_SHIFT,
413 PTR_ERR(page));
414 return page;
415 }
417 /*
418 * bitmap file superblock operations
419 */
421 /* update the event counter and sync the superblock to disk */
422 int bitmap_update_sb(struct bitmap *bitmap)
423 {
424 bitmap_super_t *sb;
425 unsigned long flags;
427 if (!bitmap || !bitmap->mddev) /* no bitmap for this array */
428 return 0;
429 spin_lock_irqsave(&bitmap->lock, flags);
430 if (!bitmap->sb_page) { /* no superblock */
431 spin_unlock_irqrestore(&bitmap->lock, flags);
432 return 0;
433 }
434 spin_unlock_irqrestore(&bitmap->lock, flags);
435 sb = (bitmap_super_t *)kmap_atomic(bitmap->sb_page, KM_USER0);
436 sb->events = cpu_to_le64(bitmap->mddev->events);
437 if (!bitmap->mddev->degraded)
438 sb->events_cleared = cpu_to_le64(bitmap->mddev->events);
439 kunmap_atomic(sb, KM_USER0);
440 return write_page(bitmap, bitmap->sb_page, 1);
441 }
443 /* print out the bitmap file superblock */
444 void bitmap_print_sb(struct bitmap *bitmap)
445 {
446 bitmap_super_t *sb;
448 if (!bitmap || !bitmap->sb_page)
449 return;
450 sb = (bitmap_super_t *)kmap_atomic(bitmap->sb_page, KM_USER0);
451 printk(KERN_DEBUG "%s: bitmap file superblock:\n", bmname(bitmap));
452 printk(KERN_DEBUG " magic: %08x\n", le32_to_cpu(sb->magic));
453 printk(KERN_DEBUG " version: %d\n", le32_to_cpu(sb->version));
454 printk(KERN_DEBUG " uuid: %08x.%08x.%08x.%08x\n",
455 *(__u32 *)(sb->uuid+0),
456 *(__u32 *)(sb->uuid+4),
457 *(__u32 *)(sb->uuid+8),
458 *(__u32 *)(sb->uuid+12));
459 printk(KERN_DEBUG " events: %llu\n",
460 (unsigned long long) le64_to_cpu(sb->events));
461 printk(KERN_DEBUG "events cleared: %llu\n",
462 (unsigned long long) le64_to_cpu(sb->events_cleared));
463 printk(KERN_DEBUG " state: %08x\n", le32_to_cpu(sb->state));
464 printk(KERN_DEBUG " chunksize: %d B\n", le32_to_cpu(sb->chunksize));
465 printk(KERN_DEBUG " daemon sleep: %ds\n", le32_to_cpu(sb->daemon_sleep));
466 printk(KERN_DEBUG " sync size: %llu KB\n",
467 (unsigned long long)le64_to_cpu(sb->sync_size)/2);
468 printk(KERN_DEBUG "max write behind: %d\n", le32_to_cpu(sb->write_behind));
469 kunmap_atomic(sb, KM_USER0);
470 }
472 /* read the superblock from the bitmap file and initialize some bitmap fields */
473 static int bitmap_read_sb(struct bitmap *bitmap)
474 {
475 char *reason = NULL;
476 bitmap_super_t *sb;
477 unsigned long chunksize, daemon_sleep, write_behind;
478 unsigned long long events;
479 int err = -EINVAL;
481 /* page 0 is the superblock, read it... */
482 if (bitmap->file)
483 bitmap->sb_page = read_page(bitmap->file, 0, bitmap, PAGE_SIZE);
484 else {
485 bitmap->sb_page = read_sb_page(bitmap->mddev, bitmap->offset, 0);
486 }
487 if (IS_ERR(bitmap->sb_page)) {
488 err = PTR_ERR(bitmap->sb_page);
489 bitmap->sb_page = NULL;
490 return err;
491 }
493 sb = (bitmap_super_t *)kmap_atomic(bitmap->sb_page, KM_USER0);
495 chunksize = le32_to_cpu(sb->chunksize);
496 daemon_sleep = le32_to_cpu(sb->daemon_sleep);
497 write_behind = le32_to_cpu(sb->write_behind);
499 /* verify that the bitmap-specific fields are valid */
500 if (sb->magic != cpu_to_le32(BITMAP_MAGIC))
501 reason = "bad magic";
502 else if (le32_to_cpu(sb->version) < BITMAP_MAJOR_LO ||
503 le32_to_cpu(sb->version) > BITMAP_MAJOR_HI)
504 reason = "unrecognized superblock version";
505 else if (chunksize < PAGE_SIZE)
506 reason = "bitmap chunksize too small";
507 else if ((1 << ffz(~chunksize)) != chunksize)
508 reason = "bitmap chunksize not a power of 2";
509 else if (daemon_sleep < 1 || daemon_sleep > MAX_SCHEDULE_TIMEOUT / HZ)
510 reason = "daemon sleep period out of range";
511 else if (write_behind > COUNTER_MAX)
512 reason = "write-behind limit out of range (0 - 16383)";
513 if (reason) {
514 printk(KERN_INFO "%s: invalid bitmap file superblock: %s\n",
515 bmname(bitmap), reason);
516 goto out;
517 }
519 /* keep the array size field of the bitmap superblock up to date */
520 sb->sync_size = cpu_to_le64(bitmap->mddev->resync_max_sectors);
522 if (!bitmap->mddev->persistent)
523 goto success;
525 /*
526 * if we have a persistent array superblock, compare the
527 * bitmap's UUID and event counter to the mddev's
528 */
529 if (memcmp(sb->uuid, bitmap->mddev->uuid, 16)) {
530 printk(KERN_INFO "%s: bitmap superblock UUID mismatch\n",
531 bmname(bitmap));
532 goto out;
533 }
534 events = le64_to_cpu(sb->events);
535 if (events < bitmap->mddev->events) {
536 printk(KERN_INFO "%s: bitmap file is out of date (%llu < %llu) "
537 "-- forcing full recovery\n", bmname(bitmap), events,
538 (unsigned long long) bitmap->mddev->events);
539 sb->state |= BITMAP_STALE;
540 }
541 success:
542 /* assign fields using values from superblock */
543 bitmap->chunksize = chunksize;
544 bitmap->daemon_sleep = daemon_sleep;
545 bitmap->daemon_lastrun = jiffies;
546 bitmap->max_write_behind = write_behind;
547 bitmap->flags |= sb->state;
548 if (le32_to_cpu(sb->version) == BITMAP_MAJOR_HOSTENDIAN)
549 bitmap->flags |= BITMAP_HOSTENDIAN;
550 bitmap->events_cleared = le64_to_cpu(sb->events_cleared);
551 if (sb->state & BITMAP_STALE)
552 bitmap->events_cleared = bitmap->mddev->events;
553 err = 0;
554 out:
555 kunmap_atomic(sb, KM_USER0);
556 if (err)
557 bitmap_print_sb(bitmap);
558 return err;
559 }
561 enum bitmap_mask_op {
562 MASK_SET,
563 MASK_UNSET
564 };
566 /* record the state of the bitmap in the superblock */
567 static void bitmap_mask_state(struct bitmap *bitmap, enum bitmap_state bits,
568 enum bitmap_mask_op op)
569 {
570 bitmap_super_t *sb;
571 unsigned long flags;
573 spin_lock_irqsave(&bitmap->lock, flags);
574 if (!bitmap->sb_page) { /* can't set the state */
575 spin_unlock_irqrestore(&bitmap->lock, flags);
576 return;
577 }
578 spin_unlock_irqrestore(&bitmap->lock, flags);
579 sb = (bitmap_super_t *)kmap_atomic(bitmap->sb_page, KM_USER0);
580 switch (op) {
581 case MASK_SET: sb->state |= bits;
582 break;
583 case MASK_UNSET: sb->state &= ~bits;
584 break;
585 default: BUG();
586 }
587 kunmap_atomic(sb, KM_USER0);
588 }
590 /*
591 * general bitmap file operations
592 */
594 /* calculate the index of the page that contains this bit */
595 static inline unsigned long file_page_index(unsigned long chunk)
596 {
597 return CHUNK_BIT_OFFSET(chunk) >> PAGE_BIT_SHIFT;
598 }
600 /* calculate the (bit) offset of this bit within a page */
601 static inline unsigned long file_page_offset(unsigned long chunk)
602 {
603 return CHUNK_BIT_OFFSET(chunk) & (PAGE_BITS - 1);
604 }
606 /*
607 * return a pointer to the page in the filemap that contains the given bit
608 *
609 * this lookup is complicated by the fact that the bitmap sb might be exactly
610 * 1 page (e.g., x86) or less than 1 page -- so the bitmap might start on page
611 * 0 or page 1
612 */
613 static inline struct page *filemap_get_page(struct bitmap *bitmap,
614 unsigned long chunk)
615 {
616 return bitmap->filemap[file_page_index(chunk) - file_page_index(0)];
617 }
620 static void bitmap_file_unmap(struct bitmap *bitmap)
621 {
622 struct page **map, *sb_page;
623 unsigned long *attr;
624 int pages;
625 unsigned long flags;
627 spin_lock_irqsave(&bitmap->lock, flags);
628 map = bitmap->filemap;
629 bitmap->filemap = NULL;
630 attr = bitmap->filemap_attr;
631 bitmap->filemap_attr = NULL;
632 pages = bitmap->file_pages;
633 bitmap->file_pages = 0;
634 sb_page = bitmap->sb_page;
635 bitmap->sb_page = NULL;
636 spin_unlock_irqrestore(&bitmap->lock, flags);
638 while (pages--)
639 if (map[pages]->index != 0) /* 0 is sb_page, release it below */
640 free_buffers(map[pages]);
641 kfree(map);
642 kfree(attr);
644 if (sb_page)
645 free_buffers(sb_page);
646 }
648 static void bitmap_file_put(struct bitmap *bitmap)
649 {
650 struct file *file;
651 unsigned long flags;
653 spin_lock_irqsave(&bitmap->lock, flags);
654 file = bitmap->file;
655 bitmap->file = NULL;
656 spin_unlock_irqrestore(&bitmap->lock, flags);
658 if (file)
659 wait_event(bitmap->write_wait,
660 atomic_read(&bitmap->pending_writes)==0);
661 bitmap_file_unmap(bitmap);
663 if (file) {
664 struct inode *inode = file->f_dentry->d_inode;
665 invalidate_inode_pages(inode->i_mapping);
666 fput(file);
667 }
668 }
671 /*
672 * bitmap_file_kick - if an error occurs while manipulating the bitmap file
673 * then it is no longer reliable, so we stop using it and we mark the file
674 * as failed in the superblock
675 */
676 static void bitmap_file_kick(struct bitmap *bitmap)
677 {
678 char *path, *ptr = NULL;
680 bitmap_mask_state(bitmap, BITMAP_STALE, MASK_SET);
681 bitmap_update_sb(bitmap);
683 if (bitmap->file) {
684 path = kmalloc(PAGE_SIZE, GFP_KERNEL);
685 if (path)
686 ptr = file_path(bitmap->file, path, PAGE_SIZE);
688 printk(KERN_ALERT "%s: kicking failed bitmap file %s from array!\n",
689 bmname(bitmap), ptr ? ptr : "");
691 kfree(path);
692 }
694 bitmap_file_put(bitmap);
696 return;
697 }
699 enum bitmap_page_attr {
700 BITMAP_PAGE_DIRTY = 0, // there are set bits that need to be synced
701 BITMAP_PAGE_CLEAN = 1, // there are bits that might need to be cleared
702 BITMAP_PAGE_NEEDWRITE=2, // there are cleared bits that need to be synced
703 };
705 static inline void set_page_attr(struct bitmap *bitmap, struct page *page,
706 enum bitmap_page_attr attr)
707 {
708 __set_bit((page->index<<2) + attr, bitmap->filemap_attr);
709 }
711 static inline void clear_page_attr(struct bitmap *bitmap, struct page *page,
712 enum bitmap_page_attr attr)
713 {
714 __clear_bit((page->index<<2) + attr, bitmap->filemap_attr);
715 }
717 static inline unsigned long test_page_attr(struct bitmap *bitmap, struct page *page,
718 enum bitmap_page_attr attr)
719 {
720 return test_bit((page->index<<2) + attr, bitmap->filemap_attr);
721 }
723 /*
724 * bitmap_file_set_bit -- called before performing a write to the md device
725 * to set (and eventually sync) a particular bit in the bitmap file
726 *
727 * we set the bit immediately, then we record the page number so that
728 * when an unplug occurs, we can flush the dirty pages out to disk
729 */
730 static void bitmap_file_set_bit(struct bitmap *bitmap, sector_t block)
731 {
732 unsigned long bit;
733 struct page *page;
734 void *kaddr;
735 unsigned long chunk = block >> CHUNK_BLOCK_SHIFT(bitmap);
737 if (!bitmap->filemap) {
738 return;
739 }
741 page = filemap_get_page(bitmap, chunk);
742 bit = file_page_offset(chunk);
744 /* set the bit */
745 kaddr = kmap_atomic(page, KM_USER0);
746 if (bitmap->flags & BITMAP_HOSTENDIAN)
747 set_bit(bit, kaddr);
748 else
749 ext2_set_bit(bit, kaddr);
750 kunmap_atomic(kaddr, KM_USER0);
751 PRINTK("set file bit %lu page %lu\n", bit, page->index);
753 /* record page number so it gets flushed to disk when unplug occurs */
754 set_page_attr(bitmap, page, BITMAP_PAGE_DIRTY);
756 }
758 /* this gets called when the md device is ready to unplug its underlying
759 * (slave) device queues -- before we let any writes go down, we need to
760 * sync the dirty pages of the bitmap file to disk */
761 int bitmap_unplug(struct bitmap *bitmap)
762 {
763 unsigned long i, flags;
764 int dirty, need_write;
765 struct page *page;
766 int wait = 0;
767 int err;
769 if (!bitmap)
770 return 0;
772 /* look at each page to see if there are any set bits that need to be
773 * flushed out to disk */
774 for (i = 0; i < bitmap->file_pages; i++) {
775 spin_lock_irqsave(&bitmap->lock, flags);
776 if (!bitmap->filemap) {
777 spin_unlock_irqrestore(&bitmap->lock, flags);
778 return 0;
779 }
780 page = bitmap->filemap[i];
781 dirty = test_page_attr(bitmap, page, BITMAP_PAGE_DIRTY);
782 need_write = test_page_attr(bitmap, page, BITMAP_PAGE_NEEDWRITE);
783 clear_page_attr(bitmap, page, BITMAP_PAGE_DIRTY);
784 clear_page_attr(bitmap, page, BITMAP_PAGE_NEEDWRITE);
785 if (dirty)
786 wait = 1;
787 spin_unlock_irqrestore(&bitmap->lock, flags);
789 if (dirty | need_write)
790 err = write_page(bitmap, page, 0);
791 }
792 if (wait) { /* if any writes were performed, we need to wait on them */
793 if (bitmap->file)
794 wait_event(bitmap->write_wait,
795 atomic_read(&bitmap->pending_writes)==0);
796 else
797 md_super_wait(bitmap->mddev);
798 }
799 if (bitmap->flags & BITMAP_WRITE_ERROR)
800 bitmap_file_kick(bitmap);
801 return 0;
802 }
804 static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int needed);
805 /* * bitmap_init_from_disk -- called at bitmap_create time to initialize
806 * the in-memory bitmap from the on-disk bitmap -- also, sets up the
807 * memory mapping of the bitmap file
808 * Special cases:
809 * if there's no bitmap file, or if the bitmap file had been
810 * previously kicked from the array, we mark all the bits as
811 * 1's in order to cause a full resync.
812 *
813 * We ignore all bits for sectors that end earlier than 'start'.
814 * This is used when reading an out-of-date bitmap...
815 */
816 static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start)
817 {
818 unsigned long i, chunks, index, oldindex, bit;
819 struct page *page = NULL, *oldpage = NULL;
820 unsigned long num_pages, bit_cnt = 0;
821 struct file *file;
822 unsigned long bytes, offset;
823 int outofdate;
824 int ret = -ENOSPC;
825 void *paddr;
827 chunks = bitmap->chunks;
828 file = bitmap->file;
830 BUG_ON(!file && !bitmap->offset);
832 #ifdef INJECT_FAULTS_3
833 outofdate = 1;
834 #else
835 outofdate = bitmap->flags & BITMAP_STALE;
836 #endif
837 if (outofdate)
838 printk(KERN_INFO "%s: bitmap file is out of date, doing full "
839 "recovery\n", bmname(bitmap));
841 bytes = (chunks + 7) / 8;
843 num_pages = (bytes + sizeof(bitmap_super_t) + PAGE_SIZE - 1) / PAGE_SIZE;
845 if (file && i_size_read(file->f_mapping->host) < bytes + sizeof(bitmap_super_t)) {
846 printk(KERN_INFO "%s: bitmap file too short %lu < %lu\n",
847 bmname(bitmap),
848 (unsigned long) i_size_read(file->f_mapping->host),
849 bytes + sizeof(bitmap_super_t));
850 goto out;
851 }
853 ret = -ENOMEM;
855 bitmap->filemap = kmalloc(sizeof(struct page *) * num_pages, GFP_KERNEL);
856 if (!bitmap->filemap)
857 goto out;
859 /* We need 4 bits per page, rounded up to a multiple of sizeof(unsigned long) */
860 bitmap->filemap_attr = kzalloc(
861 (((num_pages*4/8)+sizeof(unsigned long)-1)
862 /sizeof(unsigned long))
863 *sizeof(unsigned long),
864 GFP_KERNEL);
865 if (!bitmap->filemap_attr)
866 goto out;
868 oldindex = ~0L;
870 for (i = 0; i < chunks; i++) {
871 int b;
872 index = file_page_index(i);
873 bit = file_page_offset(i);
874 if (index != oldindex) { /* this is a new page, read it in */
875 int count;
876 /* unmap the old page, we're done with it */
877 if (index == num_pages-1)
878 count = bytes - index * PAGE_SIZE;
879 else
880 count = PAGE_SIZE;
881 if (index == 0) {
882 /*
883 * if we're here then the superblock page
884 * contains some bits (PAGE_SIZE != sizeof sb)
885 * we've already read it in, so just use it
886 */
887 page = bitmap->sb_page;
888 offset = sizeof(bitmap_super_t);
889 } else if (file) {
890 page = read_page(file, index, bitmap, count);
891 offset = 0;
892 } else {
893 page = read_sb_page(bitmap->mddev, bitmap->offset, index);
894 offset = 0;
895 }
896 if (IS_ERR(page)) { /* read error */
897 ret = PTR_ERR(page);
898 goto out;
899 }
901 oldindex = index;
902 oldpage = page;
904 if (outofdate) {
905 /*
906 * if bitmap is out of date, dirty the
907 * whole page and write it out
908 */
909 paddr = kmap_atomic(page, KM_USER0);
910 memset(paddr + offset, 0xff,
911 PAGE_SIZE - offset);
912 kunmap_atomic(paddr, KM_USER0);
913 ret = write_page(bitmap, page, 1);
914 if (ret) {
915 /* release, page not in filemap yet */
916 put_page(page);
917 goto out;
918 }
919 }
921 bitmap->filemap[bitmap->file_pages++] = page;
922 }
923 paddr = kmap_atomic(page, KM_USER0);
924 if (bitmap->flags & BITMAP_HOSTENDIAN)
925 b = test_bit(bit, paddr);
926 else
927 b = ext2_test_bit(bit, paddr);
928 kunmap_atomic(paddr, KM_USER0);
929 if (b) {
930 /* if the disk bit is set, set the memory bit */
931 bitmap_set_memory_bits(bitmap, i << CHUNK_BLOCK_SHIFT(bitmap),
932 ((i+1) << (CHUNK_BLOCK_SHIFT(bitmap)) >= start)
933 );
934 bit_cnt++;
935 set_page_attr(bitmap, page, BITMAP_PAGE_CLEAN);
936 }
937 }
939 /* everything went OK */
940 ret = 0;
941 bitmap_mask_state(bitmap, BITMAP_STALE, MASK_UNSET);
943 if (bit_cnt) { /* Kick recovery if any bits were set */
944 set_bit(MD_RECOVERY_NEEDED, &bitmap->mddev->recovery);
945 md_wakeup_thread(bitmap->mddev->thread);
946 }
948 out:
949 printk(KERN_INFO "%s: bitmap initialized from disk: "
950 "read %lu/%lu pages, set %lu bits, status: %d\n",
951 bmname(bitmap), bitmap->file_pages, num_pages, bit_cnt, ret);
953 return ret;
954 }
956 void bitmap_write_all(struct bitmap *bitmap)
957 {
958 /* We don't actually write all bitmap blocks here,
959 * just flag them as needing to be written
960 */
961 int i;
963 for (i=0; i < bitmap->file_pages; i++)
964 set_page_attr(bitmap, bitmap->filemap[i],
965 BITMAP_PAGE_NEEDWRITE);
966 }
969 static void bitmap_count_page(struct bitmap *bitmap, sector_t offset, int inc)
970 {
971 sector_t chunk = offset >> CHUNK_BLOCK_SHIFT(bitmap);
972 unsigned long page = chunk >> PAGE_COUNTER_SHIFT;
973 bitmap->bp[page].count += inc;
974 /*
975 if (page == 0) printk("count page 0, offset %llu: %d gives %d\n",
976 (unsigned long long)offset, inc, bitmap->bp[page].count);
977 */
978 bitmap_checkfree(bitmap, page);
979 }
980 static bitmap_counter_t *bitmap_get_counter(struct bitmap *bitmap,
981 sector_t offset, int *blocks,
982 int create);
984 /*
985 * bitmap daemon -- periodically wakes up to clean bits and flush pages
986 * out to disk
987 */
989 int bitmap_daemon_work(struct bitmap *bitmap)
990 {
991 unsigned long j;
992 unsigned long flags;
993 struct page *page = NULL, *lastpage = NULL;
994 int err = 0;
995 int blocks;
996 void *paddr;
998 if (bitmap == NULL)
999 return 0;
1000 if (time_before(jiffies, bitmap->daemon_lastrun + bitmap->daemon_sleep*HZ))
1001 return 0;
1002 bitmap->daemon_lastrun = jiffies;
1004 for (j = 0; j < bitmap->chunks; j++) {
1005 bitmap_counter_t *bmc;
1006 spin_lock_irqsave(&bitmap->lock, flags);
1007 if (!bitmap->filemap) {
1008 /* error or shutdown */
1009 spin_unlock_irqrestore(&bitmap->lock, flags);
1010 break;
1013 page = filemap_get_page(bitmap, j);
1015 if (page != lastpage) {
1016 /* skip this page unless it's marked as needing cleaning */
1017 if (!test_page_attr(bitmap, page, BITMAP_PAGE_CLEAN)) {
1018 int need_write = test_page_attr(bitmap, page,
1019 BITMAP_PAGE_NEEDWRITE);
1020 if (need_write)
1021 clear_page_attr(bitmap, page, BITMAP_PAGE_NEEDWRITE);
1023 spin_unlock_irqrestore(&bitmap->lock, flags);
1024 if (need_write) {
1025 switch (write_page(bitmap, page, 0)) {
1026 case 0:
1027 break;
1028 default:
1029 bitmap_file_kick(bitmap);
1032 continue;
1035 /* grab the new page, sync and release the old */
1036 if (lastpage != NULL) {
1037 if (test_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE)) {
1038 clear_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE);
1039 spin_unlock_irqrestore(&bitmap->lock, flags);
1040 err = write_page(bitmap, lastpage, 0);
1041 } else {
1042 set_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE);
1043 spin_unlock_irqrestore(&bitmap->lock, flags);
1045 if (err)
1046 bitmap_file_kick(bitmap);
1047 } else
1048 spin_unlock_irqrestore(&bitmap->lock, flags);
1049 lastpage = page;
1050 /*
1051 printk("bitmap clean at page %lu\n", j);
1052 */
1053 spin_lock_irqsave(&bitmap->lock, flags);
1054 clear_page_attr(bitmap, page, BITMAP_PAGE_CLEAN);
1056 bmc = bitmap_get_counter(bitmap, j << CHUNK_BLOCK_SHIFT(bitmap),
1057 &blocks, 0);
1058 if (bmc) {
1059 /*
1060 if (j < 100) printk("bitmap: j=%lu, *bmc = 0x%x\n", j, *bmc);
1061 */
1062 if (*bmc == 2) {
1063 *bmc=1; /* maybe clear the bit next time */
1064 set_page_attr(bitmap, page, BITMAP_PAGE_CLEAN);
1065 } else if (*bmc == 1) {
1066 /* we can clear the bit */
1067 *bmc = 0;
1068 bitmap_count_page(bitmap, j << CHUNK_BLOCK_SHIFT(bitmap),
1069 -1);
1071 /* clear the bit */
1072 paddr = kmap_atomic(page, KM_USER0);
1073 if (bitmap->flags & BITMAP_HOSTENDIAN)
1074 clear_bit(file_page_offset(j), paddr);
1075 else
1076 ext2_clear_bit(file_page_offset(j), paddr);
1077 kunmap_atomic(paddr, KM_USER0);
1080 spin_unlock_irqrestore(&bitmap->lock, flags);
1083 /* now sync the final page */
1084 if (lastpage != NULL) {
1085 spin_lock_irqsave(&bitmap->lock, flags);
1086 if (test_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE)) {
1087 clear_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE);
1088 spin_unlock_irqrestore(&bitmap->lock, flags);
1089 err = write_page(bitmap, lastpage, 0);
1090 } else {
1091 set_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE);
1092 spin_unlock_irqrestore(&bitmap->lock, flags);
1096 return err;
1099 static bitmap_counter_t *bitmap_get_counter(struct bitmap *bitmap,
1100 sector_t offset, int *blocks,
1101 int create)
1103 /* If 'create', we might release the lock and reclaim it.
1104 * The lock must have been taken with interrupts enabled.
1105 * If !create, we don't release the lock.
1106 */
1107 sector_t chunk = offset >> CHUNK_BLOCK_SHIFT(bitmap);
1108 unsigned long page = chunk >> PAGE_COUNTER_SHIFT;
1109 unsigned long pageoff = (chunk & PAGE_COUNTER_MASK) << COUNTER_BYTE_SHIFT;
1110 sector_t csize;
1112 if (bitmap_checkpage(bitmap, page, create) < 0) {
1113 csize = ((sector_t)1) << (CHUNK_BLOCK_SHIFT(bitmap));
1114 *blocks = csize - (offset & (csize- 1));
1115 return NULL;
1117 /* now locked ... */
1119 if (bitmap->bp[page].hijacked) { /* hijacked pointer */
1120 /* should we use the first or second counter field
1121 * of the hijacked pointer? */
1122 int hi = (pageoff > PAGE_COUNTER_MASK);
1123 csize = ((sector_t)1) << (CHUNK_BLOCK_SHIFT(bitmap) +
1124 PAGE_COUNTER_SHIFT - 1);
1125 *blocks = csize - (offset & (csize- 1));
1126 return &((bitmap_counter_t *)
1127 &bitmap->bp[page].map)[hi];
1128 } else { /* page is allocated */
1129 csize = ((sector_t)1) << (CHUNK_BLOCK_SHIFT(bitmap));
1130 *blocks = csize - (offset & (csize- 1));
1131 return (bitmap_counter_t *)
1132 &(bitmap->bp[page].map[pageoff]);
1136 int bitmap_startwrite(struct bitmap *bitmap, sector_t offset, unsigned long sectors, int behind)
1138 if (!bitmap) return 0;
1140 if (behind) {
1141 atomic_inc(&bitmap->behind_writes);
1142 PRINTK(KERN_DEBUG "inc write-behind count %d/%d\n",
1143 atomic_read(&bitmap->behind_writes), bitmap->max_write_behind);
1146 while (sectors) {
1147 int blocks;
1148 bitmap_counter_t *bmc;
1150 spin_lock_irq(&bitmap->lock);
1151 bmc = bitmap_get_counter(bitmap, offset, &blocks, 1);
1152 if (!bmc) {
1153 spin_unlock_irq(&bitmap->lock);
1154 return 0;
1157 switch(*bmc) {
1158 case 0:
1159 bitmap_file_set_bit(bitmap, offset);
1160 bitmap_count_page(bitmap,offset, 1);
1161 blk_plug_device(bitmap->mddev->queue);
1162 /* fall through */
1163 case 1:
1164 *bmc = 2;
1166 BUG_ON((*bmc & COUNTER_MAX) == COUNTER_MAX);
1167 (*bmc)++;
1169 spin_unlock_irq(&bitmap->lock);
1171 offset += blocks;
1172 if (sectors > blocks)
1173 sectors -= blocks;
1174 else sectors = 0;
1176 return 0;
1179 void bitmap_endwrite(struct bitmap *bitmap, sector_t offset, unsigned long sectors,
1180 int success, int behind)
1182 if (!bitmap) return;
1183 if (behind) {
1184 atomic_dec(&bitmap->behind_writes);
1185 PRINTK(KERN_DEBUG "dec write-behind count %d/%d\n",
1186 atomic_read(&bitmap->behind_writes), bitmap->max_write_behind);
1189 while (sectors) {
1190 int blocks;
1191 unsigned long flags;
1192 bitmap_counter_t *bmc;
1194 spin_lock_irqsave(&bitmap->lock, flags);
1195 bmc = bitmap_get_counter(bitmap, offset, &blocks, 0);
1196 if (!bmc) {
1197 spin_unlock_irqrestore(&bitmap->lock, flags);
1198 return;
1201 if (!success && ! (*bmc & NEEDED_MASK))
1202 *bmc |= NEEDED_MASK;
1204 (*bmc)--;
1205 if (*bmc <= 2) {
1206 set_page_attr(bitmap,
1207 filemap_get_page(bitmap, offset >> CHUNK_BLOCK_SHIFT(bitmap)),
1208 BITMAP_PAGE_CLEAN);
1210 spin_unlock_irqrestore(&bitmap->lock, flags);
1211 offset += blocks;
1212 if (sectors > blocks)
1213 sectors -= blocks;
1214 else sectors = 0;
1218 int bitmap_start_sync(struct bitmap *bitmap, sector_t offset, int *blocks,
1219 int degraded)
1221 bitmap_counter_t *bmc;
1222 int rv;
1223 if (bitmap == NULL) {/* FIXME or bitmap set as 'failed' */
1224 *blocks = 1024;
1225 return 1; /* always resync if no bitmap */
1227 spin_lock_irq(&bitmap->lock);
1228 bmc = bitmap_get_counter(bitmap, offset, blocks, 0);
1229 rv = 0;
1230 if (bmc) {
1231 /* locked */
1232 if (RESYNC(*bmc))
1233 rv = 1;
1234 else if (NEEDED(*bmc)) {
1235 rv = 1;
1236 if (!degraded) { /* don't set/clear bits if degraded */
1237 *bmc |= RESYNC_MASK;
1238 *bmc &= ~NEEDED_MASK;
1242 spin_unlock_irq(&bitmap->lock);
1243 return rv;
1246 void bitmap_end_sync(struct bitmap *bitmap, sector_t offset, int *blocks, int aborted)
1248 bitmap_counter_t *bmc;
1249 unsigned long flags;
1250 /*
1251 if (offset == 0) printk("bitmap_end_sync 0 (%d)\n", aborted);
1252 */ if (bitmap == NULL) {
1253 *blocks = 1024;
1254 return;
1256 spin_lock_irqsave(&bitmap->lock, flags);
1257 bmc = bitmap_get_counter(bitmap, offset, blocks, 0);
1258 if (bmc == NULL)
1259 goto unlock;
1260 /* locked */
1261 /*
1262 if (offset == 0) printk("bitmap_end sync found 0x%x, blocks %d\n", *bmc, *blocks);
1263 */
1264 if (RESYNC(*bmc)) {
1265 *bmc &= ~RESYNC_MASK;
1267 if (!NEEDED(*bmc) && aborted)
1268 *bmc |= NEEDED_MASK;
1269 else {
1270 if (*bmc <= 2) {
1271 set_page_attr(bitmap,
1272 filemap_get_page(bitmap, offset >> CHUNK_BLOCK_SHIFT(bitmap)),
1273 BITMAP_PAGE_CLEAN);
1277 unlock:
1278 spin_unlock_irqrestore(&bitmap->lock, flags);
1281 void bitmap_close_sync(struct bitmap *bitmap)
1283 /* Sync has finished, and any bitmap chunks that weren't synced
1284 * properly have been aborted. It remains to us to clear the
1285 * RESYNC bit wherever it is still on
1286 */
1287 sector_t sector = 0;
1288 int blocks;
1289 if (!bitmap) return;
1290 while (sector < bitmap->mddev->resync_max_sectors) {
1291 bitmap_end_sync(bitmap, sector, &blocks, 0);
1292 /*
1293 if (sector < 500) printk("bitmap_close_sync: sec %llu blks %d\n",
1294 (unsigned long long)sector, blocks);
1295 */ sector += blocks;
1299 static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int needed)
1301 /* For each chunk covered by any of these sectors, set the
1302 * counter to 1 and set resync_needed. They should all
1303 * be 0 at this point
1304 */
1306 int secs;
1307 bitmap_counter_t *bmc;
1308 spin_lock_irq(&bitmap->lock);
1309 bmc = bitmap_get_counter(bitmap, offset, &secs, 1);
1310 if (!bmc) {
1311 spin_unlock_irq(&bitmap->lock);
1312 return;
1314 if (! *bmc) {
1315 struct page *page;
1316 *bmc = 1 | (needed?NEEDED_MASK:0);
1317 bitmap_count_page(bitmap, offset, 1);
1318 page = filemap_get_page(bitmap, offset >> CHUNK_BLOCK_SHIFT(bitmap));
1319 set_page_attr(bitmap, page, BITMAP_PAGE_CLEAN);
1321 spin_unlock_irq(&bitmap->lock);
1325 /*
1326 * flush out any pending updates
1327 */
1328 void bitmap_flush(mddev_t *mddev)
1330 struct bitmap *bitmap = mddev->bitmap;
1331 int sleep;
1333 if (!bitmap) /* there was no bitmap */
1334 return;
1336 /* run the daemon_work three time to ensure everything is flushed
1337 * that can be
1338 */
1339 sleep = bitmap->daemon_sleep;
1340 bitmap->daemon_sleep = 0;
1341 bitmap_daemon_work(bitmap);
1342 bitmap_daemon_work(bitmap);
1343 bitmap_daemon_work(bitmap);
1344 bitmap->daemon_sleep = sleep;
1345 bitmap_update_sb(bitmap);
1348 /*
1349 * free memory that was allocated
1350 */
1351 static void bitmap_free(struct bitmap *bitmap)
1353 unsigned long k, pages;
1354 struct bitmap_page *bp;
1356 if (!bitmap) /* there was no bitmap */
1357 return;
1359 /* release the bitmap file and kill the daemon */
1360 bitmap_file_put(bitmap);
1362 bp = bitmap->bp;
1363 pages = bitmap->pages;
1365 /* free all allocated memory */
1367 if (bp) /* deallocate the page memory */
1368 for (k = 0; k < pages; k++)
1369 if (bp[k].map && !bp[k].hijacked)
1370 kfree(bp[k].map);
1371 kfree(bp);
1372 kfree(bitmap);
1374 void bitmap_destroy(mddev_t *mddev)
1376 struct bitmap *bitmap = mddev->bitmap;
1378 if (!bitmap) /* there was no bitmap */
1379 return;
1381 mddev->bitmap = NULL; /* disconnect from the md device */
1382 if (mddev->thread)
1383 mddev->thread->timeout = MAX_SCHEDULE_TIMEOUT;
1385 bitmap_free(bitmap);
1388 /*
1389 * initialize the bitmap structure
1390 * if this returns an error, bitmap_destroy must be called to do clean up
1391 */
1392 int bitmap_create(mddev_t *mddev)
1394 struct bitmap *bitmap;
1395 unsigned long blocks = mddev->resync_max_sectors;
1396 unsigned long chunks;
1397 unsigned long pages;
1398 struct file *file = mddev->bitmap_file;
1399 int err;
1400 sector_t start;
1402 BUG_ON(sizeof(bitmap_super_t) != 256);
1404 if (!file && !mddev->bitmap_offset) /* bitmap disabled, nothing to do */
1405 return 0;
1407 BUG_ON(file && mddev->bitmap_offset);
1409 bitmap = kzalloc(sizeof(*bitmap), GFP_KERNEL);
1410 if (!bitmap)
1411 return -ENOMEM;
1413 spin_lock_init(&bitmap->lock);
1414 atomic_set(&bitmap->pending_writes, 0);
1415 init_waitqueue_head(&bitmap->write_wait);
1417 bitmap->mddev = mddev;
1419 bitmap->file = file;
1420 bitmap->offset = mddev->bitmap_offset;
1421 if (file) {
1422 get_file(file);
1423 do_sync_file_range(file, 0, LLONG_MAX,
1424 SYNC_FILE_RANGE_WAIT_BEFORE |
1425 SYNC_FILE_RANGE_WRITE |
1426 SYNC_FILE_RANGE_WAIT_AFTER);
1428 /* read superblock from bitmap file (this sets bitmap->chunksize) */
1429 err = bitmap_read_sb(bitmap);
1430 if (err)
1431 goto error;
1433 bitmap->chunkshift = find_first_bit(&bitmap->chunksize,
1434 sizeof(bitmap->chunksize));
1436 /* now that chunksize and chunkshift are set, we can use these macros */
1437 chunks = (blocks + CHUNK_BLOCK_RATIO(bitmap) - 1) /
1438 CHUNK_BLOCK_RATIO(bitmap);
1439 pages = (chunks + PAGE_COUNTER_RATIO - 1) / PAGE_COUNTER_RATIO;
1441 BUG_ON(!pages);
1443 bitmap->chunks = chunks;
1444 bitmap->pages = pages;
1445 bitmap->missing_pages = pages;
1446 bitmap->counter_bits = COUNTER_BITS;
1448 bitmap->syncchunk = ~0UL;
1450 #ifdef INJECT_FATAL_FAULT_1
1451 bitmap->bp = NULL;
1452 #else
1453 bitmap->bp = kzalloc(pages * sizeof(*bitmap->bp), GFP_KERNEL);
1454 #endif
1455 err = -ENOMEM;
1456 if (!bitmap->bp)
1457 goto error;
1459 /* now that we have some pages available, initialize the in-memory
1460 * bitmap from the on-disk bitmap */
1461 start = 0;
1462 if (mddev->degraded == 0
1463 || bitmap->events_cleared == mddev->events)
1464 /* no need to keep dirty bits to optimise a re-add of a missing device */
1465 start = mddev->recovery_cp;
1466 err = bitmap_init_from_disk(bitmap, start);
1468 if (err)
1469 goto error;
1471 printk(KERN_INFO "created bitmap (%lu pages) for device %s\n",
1472 pages, bmname(bitmap));
1474 mddev->bitmap = bitmap;
1476 mddev->thread->timeout = bitmap->daemon_sleep * HZ;
1478 return bitmap_update_sb(bitmap);
1480 error:
1481 bitmap_free(bitmap);
1482 return err;
1485 /* the bitmap API -- for raid personalities */
1486 EXPORT_SYMBOL(bitmap_startwrite);
1487 EXPORT_SYMBOL(bitmap_endwrite);
1488 EXPORT_SYMBOL(bitmap_start_sync);
1489 EXPORT_SYMBOL(bitmap_end_sync);
1490 EXPORT_SYMBOL(bitmap_unplug);
1491 EXPORT_SYMBOL(bitmap_close_sync);