ia64/linux-2.6.18-xen.hg

view drivers/md/dm-mpath.c @ 897:329ea0ccb344

balloon: try harder to balloon up under memory pressure.

Currently if the balloon driver is unable to increase the guest's
reservation it assumes the failure was due to reaching its full
allocation, gives up on the ballooning operation and records the limit
it reached as the "hard limit". The driver will not try again until
the target is set again (even to the same value).

However it is possible that ballooning has in fact failed due to
memory pressure in the host and therefore it is desirable to keep
attempting to reach the target in case memory becomes available. The
most likely scenario is that some guests are ballooning down while
others are ballooning up and therefore there is temporary memory
pressure while things stabilise. You would not expect a well behaved
toolstack to ask a domain to balloon to more than its allocation nor
would you expect it to deliberately over-commit memory by setting
balloon targets which exceed the total host memory.

This patch drops the concept of a hard limit and causes the balloon
driver to retry increasing the reservation on a timer in the same
manner as when decreasing the reservation.

Also if we partially succeed in increasing the reservation
(i.e. receive less pages than we asked for) then we may as well keep
those pages rather than returning them to Xen.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Fri Jun 05 14:01:20 2009 +0100 (2009-06-05)
parents 831230e53067
children
line source
1 /*
2 * Copyright (C) 2003 Sistina Software Limited.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This file is released under the GPL.
6 */
8 #include "dm.h"
9 #include "dm-path-selector.h"
10 #include "dm-hw-handler.h"
11 #include "dm-bio-list.h"
12 #include "dm-bio-record.h"
14 #include <linux/ctype.h>
15 #include <linux/init.h>
16 #include <linux/mempool.h>
17 #include <linux/module.h>
18 #include <linux/pagemap.h>
19 #include <linux/slab.h>
20 #include <linux/time.h>
21 #include <linux/workqueue.h>
22 #include <asm/atomic.h>
24 #define DM_MSG_PREFIX "multipath"
25 #define MESG_STR(x) x, sizeof(x)
27 /* Path properties */
28 struct pgpath {
29 struct list_head list;
31 struct priority_group *pg; /* Owning PG */
32 unsigned fail_count; /* Cumulative failure count */
34 struct path path;
35 };
37 #define path_to_pgpath(__pgp) container_of((__pgp), struct pgpath, path)
39 /*
40 * Paths are grouped into Priority Groups and numbered from 1 upwards.
41 * Each has a path selector which controls which path gets used.
42 */
43 struct priority_group {
44 struct list_head list;
46 struct multipath *m; /* Owning multipath instance */
47 struct path_selector ps;
49 unsigned pg_num; /* Reference number */
50 unsigned bypassed; /* Temporarily bypass this PG? */
52 unsigned nr_pgpaths; /* Number of paths in PG */
53 struct list_head pgpaths;
54 };
56 /* Multipath context */
57 struct multipath {
58 struct list_head list;
59 struct dm_target *ti;
61 spinlock_t lock;
63 struct hw_handler hw_handler;
64 unsigned nr_priority_groups;
65 struct list_head priority_groups;
66 unsigned pg_init_required; /* pg_init needs calling? */
67 unsigned pg_init_in_progress; /* Only one pg_init allowed at once */
69 unsigned nr_valid_paths; /* Total number of usable paths */
70 struct pgpath *current_pgpath;
71 struct priority_group *current_pg;
72 struct priority_group *next_pg; /* Switch to this PG if set */
73 unsigned repeat_count; /* I/Os left before calling PS again */
75 unsigned queue_io; /* Must we queue all I/O? */
76 unsigned queue_if_no_path; /* Queue I/O if last path fails? */
77 unsigned saved_queue_if_no_path;/* Saved state during suspension */
79 struct work_struct process_queued_ios;
80 struct bio_list queued_ios;
81 unsigned queue_size;
83 struct work_struct trigger_event;
85 /*
86 * We must use a mempool of mpath_io structs so that we
87 * can resubmit bios on error.
88 */
89 mempool_t *mpio_pool;
90 };
92 /*
93 * Context information attached to each bio we process.
94 */
95 struct mpath_io {
96 struct pgpath *pgpath;
97 struct dm_bio_details details;
98 };
100 typedef int (*action_fn) (struct pgpath *pgpath);
102 #define MIN_IOS 256 /* Mempool size */
104 static kmem_cache_t *_mpio_cache;
106 struct workqueue_struct *kmultipathd;
107 static void process_queued_ios(void *data);
108 static void trigger_event(void *data);
111 /*-----------------------------------------------
112 * Allocation routines
113 *-----------------------------------------------*/
115 static struct pgpath *alloc_pgpath(void)
116 {
117 struct pgpath *pgpath = kmalloc(sizeof(*pgpath), GFP_KERNEL);
119 if (pgpath) {
120 memset(pgpath, 0, sizeof(*pgpath));
121 pgpath->path.is_active = 1;
122 }
124 return pgpath;
125 }
127 static inline void free_pgpath(struct pgpath *pgpath)
128 {
129 kfree(pgpath);
130 }
132 static struct priority_group *alloc_priority_group(void)
133 {
134 struct priority_group *pg;
136 pg = kmalloc(sizeof(*pg), GFP_KERNEL);
137 if (!pg)
138 return NULL;
140 memset(pg, 0, sizeof(*pg));
141 INIT_LIST_HEAD(&pg->pgpaths);
143 return pg;
144 }
146 static void free_pgpaths(struct list_head *pgpaths, struct dm_target *ti)
147 {
148 struct pgpath *pgpath, *tmp;
150 list_for_each_entry_safe(pgpath, tmp, pgpaths, list) {
151 list_del(&pgpath->list);
152 dm_put_device(ti, pgpath->path.dev);
153 free_pgpath(pgpath);
154 }
155 }
157 static void free_priority_group(struct priority_group *pg,
158 struct dm_target *ti)
159 {
160 struct path_selector *ps = &pg->ps;
162 if (ps->type) {
163 ps->type->destroy(ps);
164 dm_put_path_selector(ps->type);
165 }
167 free_pgpaths(&pg->pgpaths, ti);
168 kfree(pg);
169 }
171 static struct multipath *alloc_multipath(void)
172 {
173 struct multipath *m;
175 m = kmalloc(sizeof(*m), GFP_KERNEL);
176 if (m) {
177 memset(m, 0, sizeof(*m));
178 INIT_LIST_HEAD(&m->priority_groups);
179 spin_lock_init(&m->lock);
180 m->queue_io = 1;
181 INIT_WORK(&m->process_queued_ios, process_queued_ios, m);
182 INIT_WORK(&m->trigger_event, trigger_event, m);
183 m->mpio_pool = mempool_create_slab_pool(MIN_IOS, _mpio_cache);
184 if (!m->mpio_pool) {
185 kfree(m);
186 return NULL;
187 }
188 }
190 return m;
191 }
193 static void free_multipath(struct multipath *m)
194 {
195 struct priority_group *pg, *tmp;
196 struct hw_handler *hwh = &m->hw_handler;
198 list_for_each_entry_safe(pg, tmp, &m->priority_groups, list) {
199 list_del(&pg->list);
200 free_priority_group(pg, m->ti);
201 }
203 if (hwh->type) {
204 hwh->type->destroy(hwh);
205 dm_put_hw_handler(hwh->type);
206 }
208 mempool_destroy(m->mpio_pool);
209 kfree(m);
210 }
213 /*-----------------------------------------------
214 * Path selection
215 *-----------------------------------------------*/
217 static void __switch_pg(struct multipath *m, struct pgpath *pgpath)
218 {
219 struct hw_handler *hwh = &m->hw_handler;
221 m->current_pg = pgpath->pg;
223 /* Must we initialise the PG first, and queue I/O till it's ready? */
224 if (hwh->type && hwh->type->pg_init) {
225 m->pg_init_required = 1;
226 m->queue_io = 1;
227 } else {
228 m->pg_init_required = 0;
229 m->queue_io = 0;
230 }
231 }
233 static int __choose_path_in_pg(struct multipath *m, struct priority_group *pg)
234 {
235 struct path *path;
237 path = pg->ps.type->select_path(&pg->ps, &m->repeat_count);
238 if (!path)
239 return -ENXIO;
241 m->current_pgpath = path_to_pgpath(path);
243 if (m->current_pg != pg)
244 __switch_pg(m, m->current_pgpath);
246 return 0;
247 }
249 static void __choose_pgpath(struct multipath *m)
250 {
251 struct priority_group *pg;
252 unsigned bypassed = 1;
254 if (!m->nr_valid_paths)
255 goto failed;
257 /* Were we instructed to switch PG? */
258 if (m->next_pg) {
259 pg = m->next_pg;
260 m->next_pg = NULL;
261 if (!__choose_path_in_pg(m, pg))
262 return;
263 }
265 /* Don't change PG until it has no remaining paths */
266 if (m->current_pg && !__choose_path_in_pg(m, m->current_pg))
267 return;
269 /*
270 * Loop through priority groups until we find a valid path.
271 * First time we skip PGs marked 'bypassed'.
272 * Second time we only try the ones we skipped.
273 */
274 do {
275 list_for_each_entry(pg, &m->priority_groups, list) {
276 if (pg->bypassed == bypassed)
277 continue;
278 if (!__choose_path_in_pg(m, pg))
279 return;
280 }
281 } while (bypassed--);
283 failed:
284 m->current_pgpath = NULL;
285 m->current_pg = NULL;
286 }
288 static int map_io(struct multipath *m, struct bio *bio, struct mpath_io *mpio,
289 unsigned was_queued)
290 {
291 int r = 1;
292 unsigned long flags;
293 struct pgpath *pgpath;
295 spin_lock_irqsave(&m->lock, flags);
297 /* Do we need to select a new pgpath? */
298 if (!m->current_pgpath ||
299 (!m->queue_io && (m->repeat_count && --m->repeat_count == 0)))
300 __choose_pgpath(m);
302 pgpath = m->current_pgpath;
304 if (was_queued)
305 m->queue_size--;
307 if ((pgpath && m->queue_io) ||
308 (!pgpath && m->queue_if_no_path)) {
309 /* Queue for the daemon to resubmit */
310 bio_list_add(&m->queued_ios, bio);
311 m->queue_size++;
312 if ((m->pg_init_required && !m->pg_init_in_progress) ||
313 !m->queue_io)
314 queue_work(kmultipathd, &m->process_queued_ios);
315 pgpath = NULL;
316 r = 0;
317 } else if (!pgpath)
318 r = -EIO; /* Failed */
319 else
320 bio->bi_bdev = pgpath->path.dev->bdev;
322 mpio->pgpath = pgpath;
324 spin_unlock_irqrestore(&m->lock, flags);
326 return r;
327 }
329 /*
330 * If we run out of usable paths, should we queue I/O or error it?
331 */
332 static int queue_if_no_path(struct multipath *m, unsigned queue_if_no_path,
333 unsigned save_old_value)
334 {
335 unsigned long flags;
337 spin_lock_irqsave(&m->lock, flags);
339 if (save_old_value)
340 m->saved_queue_if_no_path = m->queue_if_no_path;
341 else
342 m->saved_queue_if_no_path = queue_if_no_path;
343 m->queue_if_no_path = queue_if_no_path;
344 if (!m->queue_if_no_path && m->queue_size)
345 queue_work(kmultipathd, &m->process_queued_ios);
347 spin_unlock_irqrestore(&m->lock, flags);
349 return 0;
350 }
352 /*-----------------------------------------------------------------
353 * The multipath daemon is responsible for resubmitting queued ios.
354 *---------------------------------------------------------------*/
356 static void dispatch_queued_ios(struct multipath *m)
357 {
358 int r;
359 unsigned long flags;
360 struct bio *bio = NULL, *next;
361 struct mpath_io *mpio;
362 union map_info *info;
364 spin_lock_irqsave(&m->lock, flags);
365 bio = bio_list_get(&m->queued_ios);
366 spin_unlock_irqrestore(&m->lock, flags);
368 while (bio) {
369 next = bio->bi_next;
370 bio->bi_next = NULL;
372 info = dm_get_mapinfo(bio);
373 mpio = info->ptr;
375 r = map_io(m, bio, mpio, 1);
376 if (r < 0)
377 bio_endio(bio, bio->bi_size, r);
378 else if (r == 1)
379 generic_make_request(bio);
381 bio = next;
382 }
383 }
385 static void process_queued_ios(void *data)
386 {
387 struct multipath *m = (struct multipath *) data;
388 struct hw_handler *hwh = &m->hw_handler;
389 struct pgpath *pgpath = NULL;
390 unsigned init_required = 0, must_queue = 1;
391 unsigned long flags;
393 spin_lock_irqsave(&m->lock, flags);
395 if (!m->queue_size)
396 goto out;
398 if (!m->current_pgpath)
399 __choose_pgpath(m);
401 pgpath = m->current_pgpath;
403 if ((pgpath && !m->queue_io) ||
404 (!pgpath && !m->queue_if_no_path))
405 must_queue = 0;
407 if (m->pg_init_required && !m->pg_init_in_progress) {
408 m->pg_init_required = 0;
409 m->pg_init_in_progress = 1;
410 init_required = 1;
411 }
413 out:
414 spin_unlock_irqrestore(&m->lock, flags);
416 if (init_required)
417 hwh->type->pg_init(hwh, pgpath->pg->bypassed, &pgpath->path);
419 if (!must_queue)
420 dispatch_queued_ios(m);
421 }
423 /*
424 * An event is triggered whenever a path is taken out of use.
425 * Includes path failure and PG bypass.
426 */
427 static void trigger_event(void *data)
428 {
429 struct multipath *m = (struct multipath *) data;
431 dm_table_event(m->ti->table);
432 }
434 /*-----------------------------------------------------------------
435 * Constructor/argument parsing:
436 * <#multipath feature args> [<arg>]*
437 * <#hw_handler args> [hw_handler [<arg>]*]
438 * <#priority groups>
439 * <initial priority group>
440 * [<selector> <#selector args> [<arg>]*
441 * <#paths> <#per-path selector args>
442 * [<path> [<arg>]* ]+ ]+
443 *---------------------------------------------------------------*/
444 struct param {
445 unsigned min;
446 unsigned max;
447 char *error;
448 };
450 static int read_param(struct param *param, char *str, unsigned *v, char **error)
451 {
452 if (!str ||
453 (sscanf(str, "%u", v) != 1) ||
454 (*v < param->min) ||
455 (*v > param->max)) {
456 *error = param->error;
457 return -EINVAL;
458 }
460 return 0;
461 }
463 struct arg_set {
464 unsigned argc;
465 char **argv;
466 };
468 static char *shift(struct arg_set *as)
469 {
470 char *r;
472 if (as->argc) {
473 as->argc--;
474 r = *as->argv;
475 as->argv++;
476 return r;
477 }
479 return NULL;
480 }
482 static void consume(struct arg_set *as, unsigned n)
483 {
484 BUG_ON (as->argc < n);
485 as->argc -= n;
486 as->argv += n;
487 }
489 static int parse_path_selector(struct arg_set *as, struct priority_group *pg,
490 struct dm_target *ti)
491 {
492 int r;
493 struct path_selector_type *pst;
494 unsigned ps_argc;
496 static struct param _params[] = {
497 {0, 1024, "invalid number of path selector args"},
498 };
500 pst = dm_get_path_selector(shift(as));
501 if (!pst) {
502 ti->error = "unknown path selector type";
503 return -EINVAL;
504 }
506 r = read_param(_params, shift(as), &ps_argc, &ti->error);
507 if (r)
508 return -EINVAL;
510 r = pst->create(&pg->ps, ps_argc, as->argv);
511 if (r) {
512 dm_put_path_selector(pst);
513 ti->error = "path selector constructor failed";
514 return r;
515 }
517 pg->ps.type = pst;
518 consume(as, ps_argc);
520 return 0;
521 }
523 static struct pgpath *parse_path(struct arg_set *as, struct path_selector *ps,
524 struct dm_target *ti)
525 {
526 int r;
527 struct pgpath *p;
529 /* we need at least a path arg */
530 if (as->argc < 1) {
531 ti->error = "no device given";
532 return NULL;
533 }
535 p = alloc_pgpath();
536 if (!p)
537 return NULL;
539 r = dm_get_device(ti, shift(as), ti->begin, ti->len,
540 dm_table_get_mode(ti->table), &p->path.dev);
541 if (r) {
542 ti->error = "error getting device";
543 goto bad;
544 }
546 r = ps->type->add_path(ps, &p->path, as->argc, as->argv, &ti->error);
547 if (r) {
548 dm_put_device(ti, p->path.dev);
549 goto bad;
550 }
552 return p;
554 bad:
555 free_pgpath(p);
556 return NULL;
557 }
559 static struct priority_group *parse_priority_group(struct arg_set *as,
560 struct multipath *m,
561 struct dm_target *ti)
562 {
563 static struct param _params[] = {
564 {1, 1024, "invalid number of paths"},
565 {0, 1024, "invalid number of selector args"}
566 };
568 int r;
569 unsigned i, nr_selector_args, nr_params;
570 struct priority_group *pg;
572 if (as->argc < 2) {
573 as->argc = 0;
574 ti->error = "not enough priority group aruments";
575 return NULL;
576 }
578 pg = alloc_priority_group();
579 if (!pg) {
580 ti->error = "couldn't allocate priority group";
581 return NULL;
582 }
583 pg->m = m;
585 r = parse_path_selector(as, pg, ti);
586 if (r)
587 goto bad;
589 /*
590 * read the paths
591 */
592 r = read_param(_params, shift(as), &pg->nr_pgpaths, &ti->error);
593 if (r)
594 goto bad;
596 r = read_param(_params + 1, shift(as), &nr_selector_args, &ti->error);
597 if (r)
598 goto bad;
600 nr_params = 1 + nr_selector_args;
601 for (i = 0; i < pg->nr_pgpaths; i++) {
602 struct pgpath *pgpath;
603 struct arg_set path_args;
605 if (as->argc < nr_params)
606 goto bad;
608 path_args.argc = nr_params;
609 path_args.argv = as->argv;
611 pgpath = parse_path(&path_args, &pg->ps, ti);
612 if (!pgpath)
613 goto bad;
615 pgpath->pg = pg;
616 list_add_tail(&pgpath->list, &pg->pgpaths);
617 consume(as, nr_params);
618 }
620 return pg;
622 bad:
623 free_priority_group(pg, ti);
624 return NULL;
625 }
627 static int parse_hw_handler(struct arg_set *as, struct multipath *m,
628 struct dm_target *ti)
629 {
630 int r;
631 struct hw_handler_type *hwht;
632 unsigned hw_argc;
634 static struct param _params[] = {
635 {0, 1024, "invalid number of hardware handler args"},
636 };
638 r = read_param(_params, shift(as), &hw_argc, &ti->error);
639 if (r)
640 return -EINVAL;
642 if (!hw_argc)
643 return 0;
645 hwht = dm_get_hw_handler(shift(as));
646 if (!hwht) {
647 ti->error = "unknown hardware handler type";
648 return -EINVAL;
649 }
651 r = hwht->create(&m->hw_handler, hw_argc - 1, as->argv);
652 if (r) {
653 dm_put_hw_handler(hwht);
654 ti->error = "hardware handler constructor failed";
655 return r;
656 }
658 m->hw_handler.type = hwht;
659 consume(as, hw_argc - 1);
661 return 0;
662 }
664 static int parse_features(struct arg_set *as, struct multipath *m,
665 struct dm_target *ti)
666 {
667 int r;
668 unsigned argc;
670 static struct param _params[] = {
671 {0, 1, "invalid number of feature args"},
672 };
674 r = read_param(_params, shift(as), &argc, &ti->error);
675 if (r)
676 return -EINVAL;
678 if (!argc)
679 return 0;
681 if (!strnicmp(shift(as), MESG_STR("queue_if_no_path")))
682 return queue_if_no_path(m, 1, 0);
683 else {
684 ti->error = "Unrecognised multipath feature request";
685 return -EINVAL;
686 }
687 }
689 static int multipath_ctr(struct dm_target *ti, unsigned int argc,
690 char **argv)
691 {
692 /* target parameters */
693 static struct param _params[] = {
694 {1, 1024, "invalid number of priority groups"},
695 {1, 1024, "invalid initial priority group number"},
696 };
698 int r;
699 struct multipath *m;
700 struct arg_set as;
701 unsigned pg_count = 0;
702 unsigned next_pg_num;
704 as.argc = argc;
705 as.argv = argv;
707 m = alloc_multipath();
708 if (!m) {
709 ti->error = "can't allocate multipath";
710 return -EINVAL;
711 }
713 m->ti = ti;
715 r = parse_features(&as, m, ti);
716 if (r)
717 goto bad;
719 r = parse_hw_handler(&as, m, ti);
720 if (r)
721 goto bad;
723 r = read_param(_params, shift(&as), &m->nr_priority_groups, &ti->error);
724 if (r)
725 goto bad;
727 r = read_param(_params + 1, shift(&as), &next_pg_num, &ti->error);
728 if (r)
729 goto bad;
731 /* parse the priority groups */
732 while (as.argc) {
733 struct priority_group *pg;
735 pg = parse_priority_group(&as, m, ti);
736 if (!pg) {
737 r = -EINVAL;
738 goto bad;
739 }
741 m->nr_valid_paths += pg->nr_pgpaths;
742 list_add_tail(&pg->list, &m->priority_groups);
743 pg_count++;
744 pg->pg_num = pg_count;
745 if (!--next_pg_num)
746 m->next_pg = pg;
747 }
749 if (pg_count != m->nr_priority_groups) {
750 ti->error = "priority group count mismatch";
751 r = -EINVAL;
752 goto bad;
753 }
755 ti->private = m;
757 return 0;
759 bad:
760 free_multipath(m);
761 return r;
762 }
764 static void multipath_dtr(struct dm_target *ti)
765 {
766 struct multipath *m = (struct multipath *) ti->private;
768 flush_workqueue(kmultipathd);
769 free_multipath(m);
770 }
772 /*
773 * Map bios, recording original fields for later in case we have to resubmit
774 */
775 static int multipath_map(struct dm_target *ti, struct bio *bio,
776 union map_info *map_context)
777 {
778 int r;
779 struct mpath_io *mpio;
780 struct multipath *m = (struct multipath *) ti->private;
782 if (bio_barrier(bio))
783 return -EOPNOTSUPP;
785 mpio = mempool_alloc(m->mpio_pool, GFP_NOIO);
786 dm_bio_record(&mpio->details, bio);
788 map_context->ptr = mpio;
789 bio->bi_rw |= (1 << BIO_RW_FAILFAST);
790 r = map_io(m, bio, mpio, 0);
791 if (r < 0)
792 mempool_free(mpio, m->mpio_pool);
794 return r;
795 }
797 /*
798 * Take a path out of use.
799 */
800 static int fail_path(struct pgpath *pgpath)
801 {
802 unsigned long flags;
803 struct multipath *m = pgpath->pg->m;
805 spin_lock_irqsave(&m->lock, flags);
807 if (!pgpath->path.is_active)
808 goto out;
810 DMWARN("Failing path %s.", pgpath->path.dev->name);
812 pgpath->pg->ps.type->fail_path(&pgpath->pg->ps, &pgpath->path);
813 pgpath->path.is_active = 0;
814 pgpath->fail_count++;
816 m->nr_valid_paths--;
818 if (pgpath == m->current_pgpath)
819 m->current_pgpath = NULL;
821 queue_work(kmultipathd, &m->trigger_event);
823 out:
824 spin_unlock_irqrestore(&m->lock, flags);
826 return 0;
827 }
829 /*
830 * Reinstate a previously-failed path
831 */
832 static int reinstate_path(struct pgpath *pgpath)
833 {
834 int r = 0;
835 unsigned long flags;
836 struct multipath *m = pgpath->pg->m;
838 spin_lock_irqsave(&m->lock, flags);
840 if (pgpath->path.is_active)
841 goto out;
843 if (!pgpath->pg->ps.type) {
844 DMWARN("Reinstate path not supported by path selector %s",
845 pgpath->pg->ps.type->name);
846 r = -EINVAL;
847 goto out;
848 }
850 r = pgpath->pg->ps.type->reinstate_path(&pgpath->pg->ps, &pgpath->path);
851 if (r)
852 goto out;
854 pgpath->path.is_active = 1;
856 m->current_pgpath = NULL;
857 if (!m->nr_valid_paths++ && m->queue_size)
858 queue_work(kmultipathd, &m->process_queued_ios);
860 queue_work(kmultipathd, &m->trigger_event);
862 out:
863 spin_unlock_irqrestore(&m->lock, flags);
865 return r;
866 }
868 /*
869 * Fail or reinstate all paths that match the provided struct dm_dev.
870 */
871 static int action_dev(struct multipath *m, struct dm_dev *dev,
872 action_fn action)
873 {
874 int r = 0;
875 struct pgpath *pgpath;
876 struct priority_group *pg;
878 list_for_each_entry(pg, &m->priority_groups, list) {
879 list_for_each_entry(pgpath, &pg->pgpaths, list) {
880 if (pgpath->path.dev == dev)
881 r = action(pgpath);
882 }
883 }
885 return r;
886 }
888 /*
889 * Temporarily try to avoid having to use the specified PG
890 */
891 static void bypass_pg(struct multipath *m, struct priority_group *pg,
892 int bypassed)
893 {
894 unsigned long flags;
896 spin_lock_irqsave(&m->lock, flags);
898 pg->bypassed = bypassed;
899 m->current_pgpath = NULL;
900 m->current_pg = NULL;
902 spin_unlock_irqrestore(&m->lock, flags);
904 queue_work(kmultipathd, &m->trigger_event);
905 }
907 /*
908 * Switch to using the specified PG from the next I/O that gets mapped
909 */
910 static int switch_pg_num(struct multipath *m, const char *pgstr)
911 {
912 struct priority_group *pg;
913 unsigned pgnum;
914 unsigned long flags;
916 if (!pgstr || (sscanf(pgstr, "%u", &pgnum) != 1) || !pgnum ||
917 (pgnum > m->nr_priority_groups)) {
918 DMWARN("invalid PG number supplied to switch_pg_num");
919 return -EINVAL;
920 }
922 spin_lock_irqsave(&m->lock, flags);
923 list_for_each_entry(pg, &m->priority_groups, list) {
924 pg->bypassed = 0;
925 if (--pgnum)
926 continue;
928 m->current_pgpath = NULL;
929 m->current_pg = NULL;
930 m->next_pg = pg;
931 }
932 spin_unlock_irqrestore(&m->lock, flags);
934 queue_work(kmultipathd, &m->trigger_event);
935 return 0;
936 }
938 /*
939 * Set/clear bypassed status of a PG.
940 * PGs are numbered upwards from 1 in the order they were declared.
941 */
942 static int bypass_pg_num(struct multipath *m, const char *pgstr, int bypassed)
943 {
944 struct priority_group *pg;
945 unsigned pgnum;
947 if (!pgstr || (sscanf(pgstr, "%u", &pgnum) != 1) || !pgnum ||
948 (pgnum > m->nr_priority_groups)) {
949 DMWARN("invalid PG number supplied to bypass_pg");
950 return -EINVAL;
951 }
953 list_for_each_entry(pg, &m->priority_groups, list) {
954 if (!--pgnum)
955 break;
956 }
958 bypass_pg(m, pg, bypassed);
959 return 0;
960 }
962 /*
963 * pg_init must call this when it has completed its initialisation
964 */
965 void dm_pg_init_complete(struct path *path, unsigned err_flags)
966 {
967 struct pgpath *pgpath = path_to_pgpath(path);
968 struct priority_group *pg = pgpath->pg;
969 struct multipath *m = pg->m;
970 unsigned long flags;
972 /* We insist on failing the path if the PG is already bypassed. */
973 if (err_flags && pg->bypassed)
974 err_flags |= MP_FAIL_PATH;
976 if (err_flags & MP_FAIL_PATH)
977 fail_path(pgpath);
979 if (err_flags & MP_BYPASS_PG)
980 bypass_pg(m, pg, 1);
982 spin_lock_irqsave(&m->lock, flags);
983 if (err_flags) {
984 m->current_pgpath = NULL;
985 m->current_pg = NULL;
986 } else if (!m->pg_init_required)
987 m->queue_io = 0;
989 m->pg_init_in_progress = 0;
990 queue_work(kmultipathd, &m->process_queued_ios);
991 spin_unlock_irqrestore(&m->lock, flags);
992 }
994 /*
995 * end_io handling
996 */
997 static int do_end_io(struct multipath *m, struct bio *bio,
998 int error, struct mpath_io *mpio)
999 {
1000 struct hw_handler *hwh = &m->hw_handler;
1001 unsigned err_flags = MP_FAIL_PATH; /* Default behavior */
1002 unsigned long flags;
1004 if (!error)
1005 return 0; /* I/O complete */
1007 if ((error == -EWOULDBLOCK) && bio_rw_ahead(bio))
1008 return error;
1010 if (error == -EOPNOTSUPP)
1011 return error;
1013 spin_lock_irqsave(&m->lock, flags);
1014 if (!m->nr_valid_paths) {
1015 if (!m->queue_if_no_path) {
1016 spin_unlock_irqrestore(&m->lock, flags);
1017 return -EIO;
1018 } else {
1019 spin_unlock_irqrestore(&m->lock, flags);
1020 goto requeue;
1023 spin_unlock_irqrestore(&m->lock, flags);
1025 if (hwh->type && hwh->type->error)
1026 err_flags = hwh->type->error(hwh, bio);
1028 if (mpio->pgpath) {
1029 if (err_flags & MP_FAIL_PATH)
1030 fail_path(mpio->pgpath);
1032 if (err_flags & MP_BYPASS_PG)
1033 bypass_pg(m, mpio->pgpath->pg, 1);
1036 if (err_flags & MP_ERROR_IO)
1037 return -EIO;
1039 requeue:
1040 dm_bio_restore(&mpio->details, bio);
1042 /* queue for the daemon to resubmit or fail */
1043 spin_lock_irqsave(&m->lock, flags);
1044 bio_list_add(&m->queued_ios, bio);
1045 m->queue_size++;
1046 if (!m->queue_io)
1047 queue_work(kmultipathd, &m->process_queued_ios);
1048 spin_unlock_irqrestore(&m->lock, flags);
1050 return 1; /* io not complete */
1053 static int multipath_end_io(struct dm_target *ti, struct bio *bio,
1054 int error, union map_info *map_context)
1056 struct multipath *m = (struct multipath *) ti->private;
1057 struct mpath_io *mpio = (struct mpath_io *) map_context->ptr;
1058 struct pgpath *pgpath = mpio->pgpath;
1059 struct path_selector *ps;
1060 int r;
1062 r = do_end_io(m, bio, error, mpio);
1063 if (pgpath) {
1064 ps = &pgpath->pg->ps;
1065 if (ps->type->end_io)
1066 ps->type->end_io(ps, &pgpath->path);
1068 if (r <= 0)
1069 mempool_free(mpio, m->mpio_pool);
1071 return r;
1074 /*
1075 * Suspend can't complete until all the I/O is processed so if
1076 * the last path fails we must error any remaining I/O.
1077 * Note that if the freeze_bdev fails while suspending, the
1078 * queue_if_no_path state is lost - userspace should reset it.
1079 */
1080 static void multipath_presuspend(struct dm_target *ti)
1082 struct multipath *m = (struct multipath *) ti->private;
1084 queue_if_no_path(m, 0, 1);
1087 /*
1088 * Restore the queue_if_no_path setting.
1089 */
1090 static void multipath_resume(struct dm_target *ti)
1092 struct multipath *m = (struct multipath *) ti->private;
1093 unsigned long flags;
1095 spin_lock_irqsave(&m->lock, flags);
1096 m->queue_if_no_path = m->saved_queue_if_no_path;
1097 spin_unlock_irqrestore(&m->lock, flags);
1100 /*
1101 * Info output has the following format:
1102 * num_multipath_feature_args [multipath_feature_args]*
1103 * num_handler_status_args [handler_status_args]*
1104 * num_groups init_group_number
1105 * [A|D|E num_ps_status_args [ps_status_args]*
1106 * num_paths num_selector_args
1107 * [path_dev A|F fail_count [selector_args]* ]+ ]+
1109 * Table output has the following format (identical to the constructor string):
1110 * num_feature_args [features_args]*
1111 * num_handler_args hw_handler [hw_handler_args]*
1112 * num_groups init_group_number
1113 * [priority selector-name num_ps_args [ps_args]*
1114 * num_paths num_selector_args [path_dev [selector_args]* ]+ ]+
1115 */
1116 static int multipath_status(struct dm_target *ti, status_type_t type,
1117 char *result, unsigned int maxlen)
1119 int sz = 0;
1120 unsigned long flags;
1121 struct multipath *m = (struct multipath *) ti->private;
1122 struct hw_handler *hwh = &m->hw_handler;
1123 struct priority_group *pg;
1124 struct pgpath *p;
1125 unsigned pg_num;
1126 char state;
1128 spin_lock_irqsave(&m->lock, flags);
1130 /* Features */
1131 if (type == STATUSTYPE_INFO)
1132 DMEMIT("1 %u ", m->queue_size);
1133 else if (m->queue_if_no_path)
1134 DMEMIT("1 queue_if_no_path ");
1135 else
1136 DMEMIT("0 ");
1138 if (hwh->type && hwh->type->status)
1139 sz += hwh->type->status(hwh, type, result + sz, maxlen - sz);
1140 else if (!hwh->type || type == STATUSTYPE_INFO)
1141 DMEMIT("0 ");
1142 else
1143 DMEMIT("1 %s ", hwh->type->name);
1145 DMEMIT("%u ", m->nr_priority_groups);
1147 if (m->next_pg)
1148 pg_num = m->next_pg->pg_num;
1149 else if (m->current_pg)
1150 pg_num = m->current_pg->pg_num;
1151 else
1152 pg_num = 1;
1154 DMEMIT("%u ", pg_num);
1156 switch (type) {
1157 case STATUSTYPE_INFO:
1158 list_for_each_entry(pg, &m->priority_groups, list) {
1159 if (pg->bypassed)
1160 state = 'D'; /* Disabled */
1161 else if (pg == m->current_pg)
1162 state = 'A'; /* Currently Active */
1163 else
1164 state = 'E'; /* Enabled */
1166 DMEMIT("%c ", state);
1168 if (pg->ps.type->status)
1169 sz += pg->ps.type->status(&pg->ps, NULL, type,
1170 result + sz,
1171 maxlen - sz);
1172 else
1173 DMEMIT("0 ");
1175 DMEMIT("%u %u ", pg->nr_pgpaths,
1176 pg->ps.type->info_args);
1178 list_for_each_entry(p, &pg->pgpaths, list) {
1179 DMEMIT("%s %s %u ", p->path.dev->name,
1180 p->path.is_active ? "A" : "F",
1181 p->fail_count);
1182 if (pg->ps.type->status)
1183 sz += pg->ps.type->status(&pg->ps,
1184 &p->path, type, result + sz,
1185 maxlen - sz);
1188 break;
1190 case STATUSTYPE_TABLE:
1191 list_for_each_entry(pg, &m->priority_groups, list) {
1192 DMEMIT("%s ", pg->ps.type->name);
1194 if (pg->ps.type->status)
1195 sz += pg->ps.type->status(&pg->ps, NULL, type,
1196 result + sz,
1197 maxlen - sz);
1198 else
1199 DMEMIT("0 ");
1201 DMEMIT("%u %u ", pg->nr_pgpaths,
1202 pg->ps.type->table_args);
1204 list_for_each_entry(p, &pg->pgpaths, list) {
1205 DMEMIT("%s ", p->path.dev->name);
1206 if (pg->ps.type->status)
1207 sz += pg->ps.type->status(&pg->ps,
1208 &p->path, type, result + sz,
1209 maxlen - sz);
1212 break;
1215 spin_unlock_irqrestore(&m->lock, flags);
1217 return 0;
1220 static int multipath_message(struct dm_target *ti, unsigned argc, char **argv)
1222 int r;
1223 struct dm_dev *dev;
1224 struct multipath *m = (struct multipath *) ti->private;
1225 action_fn action;
1227 if (argc == 1) {
1228 if (!strnicmp(argv[0], MESG_STR("queue_if_no_path")))
1229 return queue_if_no_path(m, 1, 0);
1230 else if (!strnicmp(argv[0], MESG_STR("fail_if_no_path")))
1231 return queue_if_no_path(m, 0, 0);
1234 if (argc != 2)
1235 goto error;
1237 if (!strnicmp(argv[0], MESG_STR("disable_group")))
1238 return bypass_pg_num(m, argv[1], 1);
1239 else if (!strnicmp(argv[0], MESG_STR("enable_group")))
1240 return bypass_pg_num(m, argv[1], 0);
1241 else if (!strnicmp(argv[0], MESG_STR("switch_group")))
1242 return switch_pg_num(m, argv[1]);
1243 else if (!strnicmp(argv[0], MESG_STR("reinstate_path")))
1244 action = reinstate_path;
1245 else if (!strnicmp(argv[0], MESG_STR("fail_path")))
1246 action = fail_path;
1247 else
1248 goto error;
1250 r = dm_get_device(ti, argv[1], ti->begin, ti->len,
1251 dm_table_get_mode(ti->table), &dev);
1252 if (r) {
1253 DMWARN("message: error getting device %s",
1254 argv[1]);
1255 return -EINVAL;
1258 r = action_dev(m, dev, action);
1260 dm_put_device(ti, dev);
1262 return r;
1264 error:
1265 DMWARN("Unrecognised multipath message received.");
1266 return -EINVAL;
1269 /*-----------------------------------------------------------------
1270 * Module setup
1271 *---------------------------------------------------------------*/
1272 static struct target_type multipath_target = {
1273 .name = "multipath",
1274 .version = {1, 0, 4},
1275 .module = THIS_MODULE,
1276 .ctr = multipath_ctr,
1277 .dtr = multipath_dtr,
1278 .map = multipath_map,
1279 .end_io = multipath_end_io,
1280 .presuspend = multipath_presuspend,
1281 .resume = multipath_resume,
1282 .status = multipath_status,
1283 .message = multipath_message,
1284 };
1286 static int __init dm_multipath_init(void)
1288 int r;
1290 /* allocate a slab for the dm_ios */
1291 _mpio_cache = kmem_cache_create("dm_mpath", sizeof(struct mpath_io),
1292 0, 0, NULL, NULL);
1293 if (!_mpio_cache)
1294 return -ENOMEM;
1296 r = dm_register_target(&multipath_target);
1297 if (r < 0) {
1298 DMERR("%s: register failed %d", multipath_target.name, r);
1299 kmem_cache_destroy(_mpio_cache);
1300 return -EINVAL;
1303 kmultipathd = create_workqueue("kmpathd");
1304 if (!kmultipathd) {
1305 DMERR("%s: failed to create workqueue kmpathd",
1306 multipath_target.name);
1307 dm_unregister_target(&multipath_target);
1308 kmem_cache_destroy(_mpio_cache);
1309 return -ENOMEM;
1312 DMINFO("version %u.%u.%u loaded",
1313 multipath_target.version[0], multipath_target.version[1],
1314 multipath_target.version[2]);
1316 return r;
1319 static void __exit dm_multipath_exit(void)
1321 int r;
1323 destroy_workqueue(kmultipathd);
1325 r = dm_unregister_target(&multipath_target);
1326 if (r < 0)
1327 DMERR("%s: target unregister failed %d",
1328 multipath_target.name, r);
1329 kmem_cache_destroy(_mpio_cache);
1332 EXPORT_SYMBOL_GPL(dm_pg_init_complete);
1334 module_init(dm_multipath_init);
1335 module_exit(dm_multipath_exit);
1337 MODULE_DESCRIPTION(DM_NAME " multipath target");
1338 MODULE_AUTHOR("Sistina Software <dm-devel@redhat.com>");
1339 MODULE_LICENSE("GPL");