ia64/linux-2.6.18-xen.hg

view drivers/md/dm-table.c @ 897:329ea0ccb344

balloon: try harder to balloon up under memory pressure.

Currently if the balloon driver is unable to increase the guest's
reservation it assumes the failure was due to reaching its full
allocation, gives up on the ballooning operation and records the limit
it reached as the "hard limit". The driver will not try again until
the target is set again (even to the same value).

However it is possible that ballooning has in fact failed due to
memory pressure in the host and therefore it is desirable to keep
attempting to reach the target in case memory becomes available. The
most likely scenario is that some guests are ballooning down while
others are ballooning up and therefore there is temporary memory
pressure while things stabilise. You would not expect a well behaved
toolstack to ask a domain to balloon to more than its allocation nor
would you expect it to deliberately over-commit memory by setting
balloon targets which exceed the total host memory.

This patch drops the concept of a hard limit and causes the balloon
driver to retry increasing the reservation on a timer in the same
manner as when decreasing the reservation.

Also if we partially succeed in increasing the reservation
(i.e. receive less pages than we asked for) then we may as well keep
those pages rather than returning them to Xen.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Fri Jun 05 14:01:20 2009 +0100 (2009-06-05)
parents 831230e53067
children
line source
1 /*
2 * Copyright (C) 2001 Sistina Software (UK) Limited.
3 * Copyright (C) 2004 Red Hat, Inc. All rights reserved.
4 *
5 * This file is released under the GPL.
6 */
8 #include "dm.h"
10 #include <linux/module.h>
11 #include <linux/vmalloc.h>
12 #include <linux/blkdev.h>
13 #include <linux/namei.h>
14 #include <linux/ctype.h>
15 #include <linux/slab.h>
16 #include <linux/interrupt.h>
17 #include <linux/mutex.h>
18 #include <asm/atomic.h>
20 #define DM_MSG_PREFIX "table"
22 #define MAX_DEPTH 16
23 #define NODE_SIZE L1_CACHE_BYTES
24 #define KEYS_PER_NODE (NODE_SIZE / sizeof(sector_t))
25 #define CHILDREN_PER_NODE (KEYS_PER_NODE + 1)
27 struct dm_table {
28 struct mapped_device *md;
29 atomic_t holders;
31 /* btree table */
32 unsigned int depth;
33 unsigned int counts[MAX_DEPTH]; /* in nodes */
34 sector_t *index[MAX_DEPTH];
36 unsigned int num_targets;
37 unsigned int num_allocated;
38 sector_t *highs;
39 struct dm_target *targets;
41 /*
42 * Indicates the rw permissions for the new logical
43 * device. This should be a combination of FMODE_READ
44 * and FMODE_WRITE.
45 */
46 int mode;
48 /* a list of devices used by this table */
49 struct list_head devices;
51 /*
52 * These are optimistic limits taken from all the
53 * targets, some targets will need smaller limits.
54 */
55 struct io_restrictions limits;
57 /* events get handed up using this callback */
58 void (*event_fn)(void *);
59 void *event_context;
60 };
62 /*
63 * Similar to ceiling(log_size(n))
64 */
65 static unsigned int int_log(unsigned int n, unsigned int base)
66 {
67 int result = 0;
69 while (n > 1) {
70 n = dm_div_up(n, base);
71 result++;
72 }
74 return result;
75 }
77 /*
78 * Returns the minimum that is _not_ zero, unless both are zero.
79 */
80 #define min_not_zero(l, r) (l == 0) ? r : ((r == 0) ? l : min(l, r))
82 /*
83 * Combine two io_restrictions, always taking the lower value.
84 */
85 static void combine_restrictions_low(struct io_restrictions *lhs,
86 struct io_restrictions *rhs)
87 {
88 lhs->max_sectors =
89 min_not_zero(lhs->max_sectors, rhs->max_sectors);
91 lhs->max_phys_segments =
92 min_not_zero(lhs->max_phys_segments, rhs->max_phys_segments);
94 lhs->max_hw_segments =
95 min_not_zero(lhs->max_hw_segments, rhs->max_hw_segments);
97 lhs->hardsect_size = max(lhs->hardsect_size, rhs->hardsect_size);
99 lhs->max_segment_size =
100 min_not_zero(lhs->max_segment_size, rhs->max_segment_size);
102 lhs->seg_boundary_mask =
103 min_not_zero(lhs->seg_boundary_mask, rhs->seg_boundary_mask);
105 lhs->no_cluster |= rhs->no_cluster;
106 }
108 /*
109 * Calculate the index of the child node of the n'th node k'th key.
110 */
111 static inline unsigned int get_child(unsigned int n, unsigned int k)
112 {
113 return (n * CHILDREN_PER_NODE) + k;
114 }
116 /*
117 * Return the n'th node of level l from table t.
118 */
119 static inline sector_t *get_node(struct dm_table *t,
120 unsigned int l, unsigned int n)
121 {
122 return t->index[l] + (n * KEYS_PER_NODE);
123 }
125 /*
126 * Return the highest key that you could lookup from the n'th
127 * node on level l of the btree.
128 */
129 static sector_t high(struct dm_table *t, unsigned int l, unsigned int n)
130 {
131 for (; l < t->depth - 1; l++)
132 n = get_child(n, CHILDREN_PER_NODE - 1);
134 if (n >= t->counts[l])
135 return (sector_t) - 1;
137 return get_node(t, l, n)[KEYS_PER_NODE - 1];
138 }
140 /*
141 * Fills in a level of the btree based on the highs of the level
142 * below it.
143 */
144 static int setup_btree_index(unsigned int l, struct dm_table *t)
145 {
146 unsigned int n, k;
147 sector_t *node;
149 for (n = 0U; n < t->counts[l]; n++) {
150 node = get_node(t, l, n);
152 for (k = 0U; k < KEYS_PER_NODE; k++)
153 node[k] = high(t, l + 1, get_child(n, k));
154 }
156 return 0;
157 }
159 void *dm_vcalloc(unsigned long nmemb, unsigned long elem_size)
160 {
161 unsigned long size;
162 void *addr;
164 /*
165 * Check that we're not going to overflow.
166 */
167 if (nmemb > (ULONG_MAX / elem_size))
168 return NULL;
170 size = nmemb * elem_size;
171 addr = vmalloc(size);
172 if (addr)
173 memset(addr, 0, size);
175 return addr;
176 }
178 /*
179 * highs, and targets are managed as dynamic arrays during a
180 * table load.
181 */
182 static int alloc_targets(struct dm_table *t, unsigned int num)
183 {
184 sector_t *n_highs;
185 struct dm_target *n_targets;
186 int n = t->num_targets;
188 /*
189 * Allocate both the target array and offset array at once.
190 */
191 n_highs = (sector_t *) dm_vcalloc(num, sizeof(struct dm_target) +
192 sizeof(sector_t));
193 if (!n_highs)
194 return -ENOMEM;
196 n_targets = (struct dm_target *) (n_highs + num);
198 if (n) {
199 memcpy(n_highs, t->highs, sizeof(*n_highs) * n);
200 memcpy(n_targets, t->targets, sizeof(*n_targets) * n);
201 }
203 memset(n_highs + n, -1, sizeof(*n_highs) * (num - n));
204 vfree(t->highs);
206 t->num_allocated = num;
207 t->highs = n_highs;
208 t->targets = n_targets;
210 return 0;
211 }
213 int dm_table_create(struct dm_table **result, int mode,
214 unsigned num_targets, struct mapped_device *md)
215 {
216 struct dm_table *t = kmalloc(sizeof(*t), GFP_KERNEL);
218 if (!t)
219 return -ENOMEM;
221 memset(t, 0, sizeof(*t));
222 INIT_LIST_HEAD(&t->devices);
223 atomic_set(&t->holders, 1);
225 if (!num_targets)
226 num_targets = KEYS_PER_NODE;
228 num_targets = dm_round_up(num_targets, KEYS_PER_NODE);
230 if (alloc_targets(t, num_targets)) {
231 kfree(t);
232 t = NULL;
233 return -ENOMEM;
234 }
236 t->mode = mode;
237 t->md = md;
238 *result = t;
239 return 0;
240 }
242 int dm_create_error_table(struct dm_table **result, struct mapped_device *md)
243 {
244 struct dm_table *t;
245 sector_t dev_size = 1;
246 int r;
248 /*
249 * Find current size of device.
250 * Default to 1 sector if inactive.
251 */
252 t = dm_get_table(md);
253 if (t) {
254 dev_size = dm_table_get_size(t);
255 dm_table_put(t);
256 }
258 r = dm_table_create(&t, FMODE_READ, 1, md);
259 if (r)
260 return r;
262 r = dm_table_add_target(t, "error", 0, dev_size, NULL);
263 if (r)
264 goto out;
266 r = dm_table_complete(t);
267 if (r)
268 goto out;
270 *result = t;
272 out:
273 if (r)
274 dm_table_put(t);
276 return r;
277 }
278 EXPORT_SYMBOL_GPL(dm_create_error_table);
280 static void free_devices(struct list_head *devices)
281 {
282 struct list_head *tmp, *next;
284 for (tmp = devices->next; tmp != devices; tmp = next) {
285 struct dm_dev *dd = list_entry(tmp, struct dm_dev, list);
286 next = tmp->next;
287 kfree(dd);
288 }
289 }
291 static void table_destroy(struct dm_table *t)
292 {
293 unsigned int i;
295 /* free the indexes (see dm_table_complete) */
296 if (t->depth >= 2)
297 vfree(t->index[t->depth - 2]);
299 /* free the targets */
300 for (i = 0; i < t->num_targets; i++) {
301 struct dm_target *tgt = t->targets + i;
303 if (tgt->type->dtr)
304 tgt->type->dtr(tgt);
306 dm_put_target_type(tgt->type);
307 }
309 vfree(t->highs);
311 /* free the device list */
312 if (t->devices.next != &t->devices) {
313 DMWARN("devices still present during destroy: "
314 "dm_table_remove_device calls missing");
316 free_devices(&t->devices);
317 }
319 kfree(t);
320 }
322 void dm_table_get(struct dm_table *t)
323 {
324 atomic_inc(&t->holders);
325 }
327 void dm_table_put(struct dm_table *t)
328 {
329 if (!t)
330 return;
332 if (atomic_dec_and_test(&t->holders))
333 table_destroy(t);
334 }
336 /*
337 * Checks to see if we need to extend highs or targets.
338 */
339 static inline int check_space(struct dm_table *t)
340 {
341 if (t->num_targets >= t->num_allocated)
342 return alloc_targets(t, t->num_allocated * 2);
344 return 0;
345 }
347 /*
348 * Convert a device path to a dev_t.
349 */
350 static int lookup_device(const char *path, dev_t *dev)
351 {
352 int r;
353 struct nameidata nd;
354 struct inode *inode;
356 if ((r = path_lookup(path, LOOKUP_FOLLOW, &nd)))
357 return r;
359 inode = nd.dentry->d_inode;
360 if (!inode) {
361 r = -ENOENT;
362 goto out;
363 }
365 if (!S_ISBLK(inode->i_mode)) {
366 r = -ENOTBLK;
367 goto out;
368 }
370 *dev = inode->i_rdev;
372 out:
373 path_release(&nd);
374 return r;
375 }
377 /*
378 * See if we've already got a device in the list.
379 */
380 static struct dm_dev *find_device(struct list_head *l, dev_t dev)
381 {
382 struct dm_dev *dd;
384 list_for_each_entry (dd, l, list)
385 if (dd->bdev->bd_dev == dev)
386 return dd;
388 return NULL;
389 }
391 /*
392 * Open a device so we can use it as a map destination.
393 */
394 static int open_dev(struct dm_dev *d, dev_t dev, struct mapped_device *md)
395 {
396 static char *_claim_ptr = "I belong to device-mapper";
397 struct block_device *bdev;
399 int r;
401 BUG_ON(d->bdev);
403 bdev = open_by_devnum(dev, d->mode);
404 if (IS_ERR(bdev))
405 return PTR_ERR(bdev);
406 r = bd_claim_by_disk(bdev, _claim_ptr, dm_disk(md));
407 if (r)
408 blkdev_put(bdev);
409 else
410 d->bdev = bdev;
411 return r;
412 }
414 /*
415 * Close a device that we've been using.
416 */
417 static void close_dev(struct dm_dev *d, struct mapped_device *md)
418 {
419 if (!d->bdev)
420 return;
422 bd_release_from_disk(d->bdev, dm_disk(md));
423 blkdev_put(d->bdev);
424 d->bdev = NULL;
425 }
427 /*
428 * If possible (ie. blk_size[major] is set), this checks an area
429 * of a destination device is valid.
430 */
431 static int check_device_area(struct dm_dev *dd, sector_t start, sector_t len)
432 {
433 sector_t dev_size;
434 dev_size = dd->bdev->bd_inode->i_size >> SECTOR_SHIFT;
435 return ((start < dev_size) && (len <= (dev_size - start)));
436 }
438 /*
439 * This upgrades the mode on an already open dm_dev. Being
440 * careful to leave things as they were if we fail to reopen the
441 * device.
442 */
443 static int upgrade_mode(struct dm_dev *dd, int new_mode, struct mapped_device *md)
444 {
445 int r;
446 struct dm_dev dd_copy;
447 dev_t dev = dd->bdev->bd_dev;
449 dd_copy = *dd;
451 dd->mode |= new_mode;
452 dd->bdev = NULL;
453 r = open_dev(dd, dev, md);
454 if (!r)
455 close_dev(&dd_copy, md);
456 else
457 *dd = dd_copy;
459 return r;
460 }
462 /*
463 * Add a device to the list, or just increment the usage count if
464 * it's already present.
465 */
466 static int __table_get_device(struct dm_table *t, struct dm_target *ti,
467 const char *path, sector_t start, sector_t len,
468 int mode, struct dm_dev **result)
469 {
470 int r;
471 dev_t dev;
472 struct dm_dev *dd;
473 unsigned int major, minor;
475 BUG_ON(!t);
477 if (sscanf(path, "%u:%u", &major, &minor) == 2) {
478 /* Extract the major/minor numbers */
479 dev = MKDEV(major, minor);
480 if (MAJOR(dev) != major || MINOR(dev) != minor)
481 return -EOVERFLOW;
482 } else {
483 /* convert the path to a device */
484 if ((r = lookup_device(path, &dev)))
485 return r;
486 }
488 dd = find_device(&t->devices, dev);
489 if (!dd) {
490 dd = kmalloc(sizeof(*dd), GFP_KERNEL);
491 if (!dd)
492 return -ENOMEM;
494 dd->mode = mode;
495 dd->bdev = NULL;
497 if ((r = open_dev(dd, dev, t->md))) {
498 kfree(dd);
499 return r;
500 }
502 format_dev_t(dd->name, dev);
504 atomic_set(&dd->count, 0);
505 list_add(&dd->list, &t->devices);
507 } else if (dd->mode != (mode | dd->mode)) {
508 r = upgrade_mode(dd, mode, t->md);
509 if (r)
510 return r;
511 }
512 atomic_inc(&dd->count);
514 if (!check_device_area(dd, start, len)) {
515 DMWARN("device %s too small for target", path);
516 dm_put_device(ti, dd);
517 return -EINVAL;
518 }
520 *result = dd;
522 return 0;
523 }
526 int dm_get_device(struct dm_target *ti, const char *path, sector_t start,
527 sector_t len, int mode, struct dm_dev **result)
528 {
529 int r = __table_get_device(ti->table, ti, path,
530 start, len, mode, result);
531 if (!r) {
532 request_queue_t *q = bdev_get_queue((*result)->bdev);
533 struct io_restrictions *rs = &ti->limits;
535 /*
536 * Combine the device limits low.
537 *
538 * FIXME: if we move an io_restriction struct
539 * into q this would just be a call to
540 * combine_restrictions_low()
541 */
542 rs->max_sectors =
543 min_not_zero(rs->max_sectors, q->max_sectors);
545 /* FIXME: Device-Mapper on top of RAID-0 breaks because DM
546 * currently doesn't honor MD's merge_bvec_fn routine.
547 * In this case, we'll force DM to use PAGE_SIZE or
548 * smaller I/O, just to be safe. A better fix is in the
549 * works, but add this for the time being so it will at
550 * least operate correctly.
551 */
552 if (q->merge_bvec_fn)
553 rs->max_sectors =
554 min_not_zero(rs->max_sectors,
555 (unsigned int) (PAGE_SIZE >> 9));
557 rs->max_phys_segments =
558 min_not_zero(rs->max_phys_segments,
559 q->max_phys_segments);
561 rs->max_hw_segments =
562 min_not_zero(rs->max_hw_segments, q->max_hw_segments);
564 rs->hardsect_size = max(rs->hardsect_size, q->hardsect_size);
566 rs->max_segment_size =
567 min_not_zero(rs->max_segment_size, q->max_segment_size);
569 rs->seg_boundary_mask =
570 min_not_zero(rs->seg_boundary_mask,
571 q->seg_boundary_mask);
573 rs->no_cluster |= !test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags);
574 }
576 return r;
577 }
579 /*
580 * Decrement a devices use count and remove it if necessary.
581 */
582 void dm_put_device(struct dm_target *ti, struct dm_dev *dd)
583 {
584 if (atomic_dec_and_test(&dd->count)) {
585 close_dev(dd, ti->table->md);
586 list_del(&dd->list);
587 kfree(dd);
588 }
589 }
591 /*
592 * Checks to see if the target joins onto the end of the table.
593 */
594 static int adjoin(struct dm_table *table, struct dm_target *ti)
595 {
596 struct dm_target *prev;
598 if (!table->num_targets)
599 return !ti->begin;
601 prev = &table->targets[table->num_targets - 1];
602 return (ti->begin == (prev->begin + prev->len));
603 }
605 /*
606 * Used to dynamically allocate the arg array.
607 */
608 static char **realloc_argv(unsigned *array_size, char **old_argv)
609 {
610 char **argv;
611 unsigned new_size;
613 new_size = *array_size ? *array_size * 2 : 64;
614 argv = kmalloc(new_size * sizeof(*argv), GFP_KERNEL);
615 if (argv) {
616 memcpy(argv, old_argv, *array_size * sizeof(*argv));
617 *array_size = new_size;
618 }
620 kfree(old_argv);
621 return argv;
622 }
624 /*
625 * Destructively splits up the argument list to pass to ctr.
626 */
627 int dm_split_args(int *argc, char ***argvp, char *input)
628 {
629 char *start, *end = input, *out, **argv = NULL;
630 unsigned array_size = 0;
632 *argc = 0;
634 if (!input) {
635 *argvp = NULL;
636 return 0;
637 }
639 argv = realloc_argv(&array_size, argv);
640 if (!argv)
641 return -ENOMEM;
643 while (1) {
644 start = end;
646 /* Skip whitespace */
647 while (*start && isspace(*start))
648 start++;
650 if (!*start)
651 break; /* success, we hit the end */
653 /* 'out' is used to remove any back-quotes */
654 end = out = start;
655 while (*end) {
656 /* Everything apart from '\0' can be quoted */
657 if (*end == '\\' && *(end + 1)) {
658 *out++ = *(end + 1);
659 end += 2;
660 continue;
661 }
663 if (isspace(*end))
664 break; /* end of token */
666 *out++ = *end++;
667 }
669 /* have we already filled the array ? */
670 if ((*argc + 1) > array_size) {
671 argv = realloc_argv(&array_size, argv);
672 if (!argv)
673 return -ENOMEM;
674 }
676 /* we know this is whitespace */
677 if (*end)
678 end++;
680 /* terminate the string and put it in the array */
681 *out = '\0';
682 argv[*argc] = start;
683 (*argc)++;
684 }
686 *argvp = argv;
687 return 0;
688 }
690 static void check_for_valid_limits(struct io_restrictions *rs)
691 {
692 if (!rs->max_sectors)
693 rs->max_sectors = SAFE_MAX_SECTORS;
694 if (!rs->max_phys_segments)
695 rs->max_phys_segments = MAX_PHYS_SEGMENTS;
696 if (!rs->max_hw_segments)
697 rs->max_hw_segments = MAX_HW_SEGMENTS;
698 if (!rs->hardsect_size)
699 rs->hardsect_size = 1 << SECTOR_SHIFT;
700 if (!rs->max_segment_size)
701 rs->max_segment_size = MAX_SEGMENT_SIZE;
702 if (!rs->seg_boundary_mask)
703 rs->seg_boundary_mask = -1;
704 }
706 int dm_table_add_target(struct dm_table *t, const char *type,
707 sector_t start, sector_t len, char *params)
708 {
709 int r = -EINVAL, argc;
710 char **argv;
711 struct dm_target *tgt;
713 if ((r = check_space(t)))
714 return r;
716 tgt = t->targets + t->num_targets;
717 memset(tgt, 0, sizeof(*tgt));
719 if (!len) {
720 DMERR("%s: zero-length target", dm_device_name(t->md));
721 return -EINVAL;
722 }
724 tgt->type = dm_get_target_type(type);
725 if (!tgt->type) {
726 DMERR("%s: %s: unknown target type", dm_device_name(t->md),
727 type);
728 return -EINVAL;
729 }
731 tgt->table = t;
732 tgt->begin = start;
733 tgt->len = len;
734 tgt->error = "Unknown error";
736 /*
737 * Does this target adjoin the previous one ?
738 */
739 if (!adjoin(t, tgt)) {
740 tgt->error = "Gap in table";
741 r = -EINVAL;
742 goto bad;
743 }
745 r = dm_split_args(&argc, &argv, params);
746 if (r) {
747 tgt->error = "couldn't split parameters (insufficient memory)";
748 goto bad;
749 }
751 r = tgt->type->ctr(tgt, argc, argv);
752 kfree(argv);
753 if (r)
754 goto bad;
756 t->highs[t->num_targets++] = tgt->begin + tgt->len - 1;
758 /* FIXME: the plan is to combine high here and then have
759 * the merge fn apply the target level restrictions. */
760 combine_restrictions_low(&t->limits, &tgt->limits);
761 return 0;
763 bad:
764 DMERR("%s: %s: %s", dm_device_name(t->md), type, tgt->error);
765 dm_put_target_type(tgt->type);
766 return r;
767 }
769 static int setup_indexes(struct dm_table *t)
770 {
771 int i;
772 unsigned int total = 0;
773 sector_t *indexes;
775 /* allocate the space for *all* the indexes */
776 for (i = t->depth - 2; i >= 0; i--) {
777 t->counts[i] = dm_div_up(t->counts[i + 1], CHILDREN_PER_NODE);
778 total += t->counts[i];
779 }
781 indexes = (sector_t *) dm_vcalloc(total, (unsigned long) NODE_SIZE);
782 if (!indexes)
783 return -ENOMEM;
785 /* set up internal nodes, bottom-up */
786 for (i = t->depth - 2, total = 0; i >= 0; i--) {
787 t->index[i] = indexes;
788 indexes += (KEYS_PER_NODE * t->counts[i]);
789 setup_btree_index(i, t);
790 }
792 return 0;
793 }
795 /*
796 * Builds the btree to index the map.
797 */
798 int dm_table_complete(struct dm_table *t)
799 {
800 int r = 0;
801 unsigned int leaf_nodes;
803 check_for_valid_limits(&t->limits);
805 /* how many indexes will the btree have ? */
806 leaf_nodes = dm_div_up(t->num_targets, KEYS_PER_NODE);
807 t->depth = 1 + int_log(leaf_nodes, CHILDREN_PER_NODE);
809 /* leaf layer has already been set up */
810 t->counts[t->depth - 1] = leaf_nodes;
811 t->index[t->depth - 1] = t->highs;
813 if (t->depth >= 2)
814 r = setup_indexes(t);
816 return r;
817 }
819 static DEFINE_MUTEX(_event_lock);
820 void dm_table_event_callback(struct dm_table *t,
821 void (*fn)(void *), void *context)
822 {
823 mutex_lock(&_event_lock);
824 t->event_fn = fn;
825 t->event_context = context;
826 mutex_unlock(&_event_lock);
827 }
829 void dm_table_event(struct dm_table *t)
830 {
831 /*
832 * You can no longer call dm_table_event() from interrupt
833 * context, use a bottom half instead.
834 */
835 BUG_ON(in_interrupt());
837 mutex_lock(&_event_lock);
838 if (t->event_fn)
839 t->event_fn(t->event_context);
840 mutex_unlock(&_event_lock);
841 }
843 sector_t dm_table_get_size(struct dm_table *t)
844 {
845 return t->num_targets ? (t->highs[t->num_targets - 1] + 1) : 0;
846 }
848 struct dm_target *dm_table_get_target(struct dm_table *t, unsigned int index)
849 {
850 if (index >= t->num_targets)
851 return NULL;
853 return t->targets + index;
854 }
856 /*
857 * Search the btree for the correct target.
858 */
859 struct dm_target *dm_table_find_target(struct dm_table *t, sector_t sector)
860 {
861 unsigned int l, n = 0, k = 0;
862 sector_t *node;
864 for (l = 0; l < t->depth; l++) {
865 n = get_child(n, k);
866 node = get_node(t, l, n);
868 for (k = 0; k < KEYS_PER_NODE; k++)
869 if (node[k] >= sector)
870 break;
871 }
873 return &t->targets[(KEYS_PER_NODE * n) + k];
874 }
876 void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q)
877 {
878 /*
879 * Make sure we obey the optimistic sub devices
880 * restrictions.
881 */
882 blk_queue_max_sectors(q, t->limits.max_sectors);
883 q->max_phys_segments = t->limits.max_phys_segments;
884 q->max_hw_segments = t->limits.max_hw_segments;
885 q->hardsect_size = t->limits.hardsect_size;
886 q->max_segment_size = t->limits.max_segment_size;
887 q->seg_boundary_mask = t->limits.seg_boundary_mask;
888 if (t->limits.no_cluster)
889 q->queue_flags &= ~(1 << QUEUE_FLAG_CLUSTER);
890 else
891 q->queue_flags |= (1 << QUEUE_FLAG_CLUSTER);
893 }
895 unsigned int dm_table_get_num_targets(struct dm_table *t)
896 {
897 return t->num_targets;
898 }
900 struct list_head *dm_table_get_devices(struct dm_table *t)
901 {
902 return &t->devices;
903 }
905 int dm_table_get_mode(struct dm_table *t)
906 {
907 return t->mode;
908 }
910 static void suspend_targets(struct dm_table *t, unsigned postsuspend)
911 {
912 int i = t->num_targets;
913 struct dm_target *ti = t->targets;
915 while (i--) {
916 if (postsuspend) {
917 if (ti->type->postsuspend)
918 ti->type->postsuspend(ti);
919 } else if (ti->type->presuspend)
920 ti->type->presuspend(ti);
922 ti++;
923 }
924 }
926 void dm_table_presuspend_targets(struct dm_table *t)
927 {
928 if (!t)
929 return;
931 return suspend_targets(t, 0);
932 }
934 void dm_table_postsuspend_targets(struct dm_table *t)
935 {
936 if (!t)
937 return;
939 return suspend_targets(t, 1);
940 }
942 void dm_table_resume_targets(struct dm_table *t)
943 {
944 int i;
946 for (i = 0; i < t->num_targets; i++) {
947 struct dm_target *ti = t->targets + i;
949 if (ti->type->resume)
950 ti->type->resume(ti);
951 }
952 }
954 int dm_table_any_congested(struct dm_table *t, int bdi_bits)
955 {
956 struct list_head *d, *devices;
957 int r = 0;
959 devices = dm_table_get_devices(t);
960 for (d = devices->next; d != devices; d = d->next) {
961 struct dm_dev *dd = list_entry(d, struct dm_dev, list);
962 request_queue_t *q = bdev_get_queue(dd->bdev);
963 r |= bdi_congested(&q->backing_dev_info, bdi_bits);
964 }
966 return r;
967 }
969 void dm_table_unplug_all(struct dm_table *t)
970 {
971 struct list_head *d, *devices = dm_table_get_devices(t);
973 for (d = devices->next; d != devices; d = d->next) {
974 struct dm_dev *dd = list_entry(d, struct dm_dev, list);
975 request_queue_t *q = bdev_get_queue(dd->bdev);
977 if (q->unplug_fn)
978 q->unplug_fn(q);
979 }
980 }
982 int dm_table_flush_all(struct dm_table *t)
983 {
984 struct list_head *d, *devices = dm_table_get_devices(t);
985 int ret = 0;
987 for (d = devices->next; d != devices; d = d->next) {
988 struct dm_dev *dd = list_entry(d, struct dm_dev, list);
989 request_queue_t *q = bdev_get_queue(dd->bdev);
990 int err;
992 if (!q->issue_flush_fn)
993 err = -EOPNOTSUPP;
994 else
995 err = q->issue_flush_fn(q, dd->bdev->bd_disk, NULL);
997 if (!ret)
998 ret = err;
999 }
1001 return ret;
1004 struct mapped_device *dm_table_get_md(struct dm_table *t)
1006 dm_get(t->md);
1008 return t->md;
1011 EXPORT_SYMBOL(dm_vcalloc);
1012 EXPORT_SYMBOL(dm_get_device);
1013 EXPORT_SYMBOL(dm_put_device);
1014 EXPORT_SYMBOL(dm_table_event);
1015 EXPORT_SYMBOL(dm_table_get_size);
1016 EXPORT_SYMBOL(dm_table_get_mode);
1017 EXPORT_SYMBOL(dm_table_get_md);
1018 EXPORT_SYMBOL(dm_table_put);
1019 EXPORT_SYMBOL(dm_table_get);
1020 EXPORT_SYMBOL(dm_table_unplug_all);
1021 EXPORT_SYMBOL(dm_table_flush_all);